Belle II Software  release-05-02-19
Framework.cc
1 /**************************************************************************
2  * BASF2 (Belle Analysis Framework 2) *
3  * Copyright(C) 2010-2011 Belle II Collaboration *
4  * *
5  * Author: The Belle II Collaboration *
6  * Contributors: Andreas Moll, Thomas Kuhr *
7  * R.Itoh, addition of parallel processing function *
8  * *
9  * This software is provided "as is" without any warranty. *
10  **************************************************************************/
11 
12 #include <framework/pybasf2/Framework.h>
13 
14 #include <framework/core/PyObjConvUtils.h>
15 #include <framework/core/Environment.h>
16 #include <framework/core/RandomNumbers.h>
17 #include <framework/core/EventProcessor.h>
18 #include <framework/core/ModuleManager.h>
19 #include <framework/datastore/DataStore.h>
20 #include <framework/database/DBStore.h>
21 #include <framework/database/Database.h>
22 #include <framework/pcore/pEventProcessor.h>
23 #include <framework/pcore/ZMQEventProcessor.h>
24 #include <framework/pcore/zmq/utils/ZMQAddressUtils.h>
25 #include <framework/utilities/FileSystem.h>
26 #include <framework/database/Configuration.h>
27 
28 #include <framework/logging/Logger.h>
29 #include <framework/logging/LogSystem.h>
30 
31 #include <boost/python.hpp>
32 
33 #include <set>
34 
35 using namespace boost::python;
36 using namespace Belle2;
37 
38 
39 Framework::Framework()
40 {
41  DataStore::s_DoCleanup = true;
42  LogSystem::Instance().enableErrorSummary(true);
43 
44  if (!RandomNumbers::isInitialized()) {
45  RandomNumbers::initialize();
46  }
47  Environment::Instance();
48 }
49 
50 
51 Framework::~Framework()
52 {
53  //empty module manager of modules
54  //since modules may contain shared pointers of Path objects created in Python,
55  //these shared pointers have special cleanup hooks that can cause crashes if run
56  //after Py_Finalize(). The framework object is cleaned up before, so this is a good place.
57  ModuleManager::Instance().reset();
58  DataStore::s_DoCleanup = false;
59  //Also the database configuration has things to cleanup before Py_Finalize()
60  Conditions::Configuration::getInstance().reset();
61 }
62 
63 
64 void Framework::addModuleSearchPath(const std::string& path)
65 {
66  ModuleManager::Instance().addModuleSearchPath(path);
67 }
68 
69 
70 void Framework::setExternalsPath(const std::string& path)
71 {
72  Environment::Instance().setExternalsPath(path);
73 }
74 
75 
76 ModulePtr Framework::registerModule(const std::string& moduleName)
77 {
78  return ModuleManager::Instance().registerModule(moduleName);
79 }
80 
81 
82 ModulePtr Framework::registerModule(const std::string& moduleName, const std::string& sharedLibPath)
83 {
84  return ModuleManager::Instance().registerModule(moduleName, sharedLibPath);
85 }
86 
87 
88 void Framework::process(PathPtr startPath, long maxEvent)
89 {
90  if (Environment::Instance().getDryRun()) {
91  Environment::Instance().setJobInformation(startPath);
92  return; //processing disabled!
93  }
94 
95  static bool already_executed = false;
96  static std::set<const Module*> previously_run_modules; //not a shared pointer to not screw up ownership
97  static int errors_from_previous_run = 0;
98  const auto moduleListUnique = startPath->buildModulePathList(true);
99  if (already_executed) {
100  B2WARNING("Calling process() more than once per steering file is still experimental, please check results carefully! Python modules especially should reinitialise their state in initialise() to avoid problems");
101  if (startPath->buildModulePathList(true) != startPath->buildModulePathList(false)) {
102  B2FATAL("Your path contains the same module instance in multiple places. Calling process() multiple times is not implemented for this case.");
103  }
104 
105  //were any modules in moduleListUnique already run?
106  for (const auto& m : moduleListUnique) {
107  if (previously_run_modules.count(m.get()) > 0) {
108  //only clone if modules have been run before
109  startPath = std::static_pointer_cast<Path>(startPath->clone());
110  break;
111  }
112  }
113  }
114  for (const auto& m : moduleListUnique) {
115  previously_run_modules.insert(m.get());
116  }
117 
118  int numLogError = LogSystem::Instance().getMessageCounter(LogConfig::c_Error);
119  if (numLogError != errors_from_previous_run) {
120  B2FATAL(numLogError << " ERROR(S) occurred! The processing of events will not be started.");
121  }
122 
123  try {
124  LogSystem::Instance().resetMessageCounter();
125  DataStore::Instance().reset();
126  DataStore::Instance().setInitializeActive(true);
127 
128  auto& environment = Environment::Instance();
129 
130  already_executed = true;
131  if (environment.getNumberProcesses() == 0) {
132  EventProcessor processor;
133  processor.setProfileModuleName(environment.getProfileModuleName());
134  processor.process(startPath, maxEvent);
135  } else {
136  if (environment.getUseZMQ()) {
137  // If the user has not given any socket address, use a random one.
138  if (environment.getZMQSocketAddress().empty()) {
139  environment.setZMQSocketAddress(ZMQAddressUtils::randomSocketName());
140  }
141  ZMQEventProcessor processor;
142  processor.process(startPath, maxEvent);
143  } else {
144  pEventProcessor processor;
145  processor.process(startPath, maxEvent);
146  }
147  }
148  errors_from_previous_run = LogSystem::Instance().getMessageCounter(LogConfig::c_Error);
149 
150  DBStore::Instance().reset();
151  // Also, reset the Database connection itself. However don't reset the
152  // configuration, just the actual setup. In case the user runs process()
153  // again it will reinitialize correctly with the same settings.
154  Database::Instance().reset(true);
155  } catch (std::exception& e) {
156  B2ERROR("Uncaught exception encountered: " << e.what()); //should show module name
157  DataStore::Instance().reset(); // ensure we are executed before ROOT's exit handlers
158  throw; //and let python's global handler do the rest
159  } catch (...) {
160  B2ERROR("Uncaught exception encountered!"); //should show module name
161  DataStore::Instance().reset(); // ensure we are executed before ROOT's exit handlers
162  throw; //and let python's global handler do the rest
163  //TODO: having a stack trace would be nicer, but somehow a handler I set using std::set_terminate() never gets called
164  }
165 }
166 
167 
168 void Framework::setNumberProcesses(int numProcesses)
169 {
170  Environment::Instance().setNumberProcesses(numProcesses);
171 }
172 
173 
174 int Framework::getNumberProcesses()
175 {
176  return Environment::Instance().getNumberProcesses();
177 }
178 
179 
180 void Framework::setPicklePath(const std::string& path)
181 {
182  Environment::Instance().setPicklePath(path);
183 }
184 
185 
186 std::string Framework::getPicklePath()
187 {
188  return Environment::Instance().getPicklePath();
189 }
190 
191 void Framework::setStreamingObjects(const boost::python::list& streamingObjects)
192 {
193  auto vec = PyObjConvUtils::convertPythonObject(streamingObjects, std::vector<std::string>());
194  Environment::Instance().setStreamingObjects(vec);
195 }
196 
197 std::string Framework::findFile(const std::string& filename, const std::string& type, bool ignore_errors)
198 {
199  std::string result;
200  if (type.empty()) {
201  //behave like FileSystem.findFile by using it
202  result = FileSystem::findFile(filename, ignore_errors);
203  } else {
204  result = FileSystem::findFile(filename, type, ignore_errors);
205  }
206  if (!ignore_errors and result.empty()) {
207  // Still not found ... see if we raise an exception or not.
208  // We want a FileNotFoundError ... so lets fudge the errno to the correct
209  // error value and then create the correct exception in python
210  errno = ENOENT;
211  PyErr_SetFromErrnoWithFilename(PyExc_FileNotFoundError, filename.c_str());
212  boost::python::throw_error_already_set();
213  }
214  return result;
215 }
216 
217 //=====================================================================
218 // Python API
219 //=====================================================================
220 
221 boost::python::list Framework::getModuleSearchPathsPython()
222 {
223  boost::python::list returnList;
224 
225  for (const std::string& path : ModuleManager::Instance().getModuleSearchPaths())
226  returnList.append(boost::python::object(path));
227  return returnList;
228 }
229 
230 
231 boost::python::dict Framework::getAvailableModulesPython()
232 {
233  boost::python::dict returnDict;
234  for (const auto& modulePair : ModuleManager::Instance().getAvailableModules())
235  returnDict[boost::python::object(modulePair.first)] = boost::python::object(modulePair.second);
236  return returnDict;
237 }
238 
239 
240 boost::python::list Framework::getRegisteredModulesPython()
241 {
242  boost::python::list returnList;
243 
244  for (const ModulePtr& mod : ModuleManager::Instance().getCreatedModules())
245  returnList.append(boost::python::object(mod));
246  return returnList;
247 }
248 
249 
250 #if !defined(__GNUG__) || defined(__ICC)
251 #else
252 #pragma GCC diagnostic push
253 #pragma GCC diagnostic ignored "-Wunused-local-typedefs"
254 #endif
255 BOOST_PYTHON_FUNCTION_OVERLOADS(process_overloads, Framework::process, 1, 2)
256 #if !defined(__GNUG__) || defined(__ICC)
257 #else
258 #pragma GCC diagnostic pop
259 #endif
260 
261 namespace {
262  PyObject* PyExc_ModuleNotCreatedError{nullptr};
265  void moduleNotCreatedTranslator(const ModuleManager::ModuleNotCreatedError& e)
266  {
267  PyErr_SetString(PyExc_ModuleNotCreatedError, e.what());
268  }
269 }
270 
271 void Framework::exposePythonAPI()
272 {
273  PyExc_ModuleNotCreatedError = PyErr_NewExceptionWithDoc("basf2.ModuleNotCreatedError",
274  "This exception is raised when a basf2 module could not be created for any reason",
275  PyExc_RuntimeError, nullptr);
276  scope().attr("ModuleNotCreatedError") = handle<>(borrowed(PyExc_ModuleNotCreatedError));
277  register_exception_translator<ModuleManager::ModuleNotCreatedError>(moduleNotCreatedTranslator);
278  //Overloaded methods
279  ModulePtr(*registerModule1)(const std::string&) = &Framework::registerModule;
280  ModulePtr(*registerModule2)(const std::string&, const std::string&) = &Framework::registerModule;
281 
282  //don't show c++ signature in python doc to keep it simple
283  docstring_options options(true, true, false);
284 
285  //Expose framework class
286  class_<Framework, std::shared_ptr<Framework>, boost::noncopyable>("Framework", "Initialize and Cleanup functions", no_init);
287  std::shared_ptr<Framework> initguard{new Framework()};
288  scope().attr("__framework") = initguard;
289 
290  def("add_module_search_path", &Framework::addModuleSearchPath, R"DOCSTRING(
291 Add a directory in which to search for compiled basf2 C++ `Modules <Module>`.
292 
293 This directory needs to contain the shared libraries containing the compiled
294 modules as well as companion files ending in ``.b2modmap`` which contain a list
295 of the module names contained in each library.
296 
297 Note:
298  The newly added path will not override existing modules
299 
300 Parameters:
301  path (str): directory containing the modules.
302 )DOCSTRING", args("path"));
303  def("set_externals_path", &Framework::setExternalsPath, R"DOCSTRING(
304 Set the path to the externals to be used.
305 
306 Warning:
307  This will not change the library and executable paths but will just change
308  the directory where to look for certain data files like the Evtgen particle
309  definition file. Don't use this unless you really know what you are doing.
310 
311 Parameters:
312  path (str): new top level directory for the externals
313 )DOCSTRING", args("path"));
314  def("list_module_search_paths", &Framework::getModuleSearchPathsPython, R"DOCSTRING(
315 Return a python list containing all the directories included in the module
316 search Path.
317 
318 See:
319  `add_module_search_path`
320 )DOCSTRING");
321  def("list_available_modules", &Framework::getAvailableModulesPython, R"DOCSTRING(
322 Return a dictionary containing the names of all known modules
323 as keys and the name of the shared library containing these modules as values.
324 )DOCSTRING");
325  def("list_registered_modules", &Framework::getRegisteredModulesPython, R"DOCSTRING(
326 Return a list with pointers to all previously created module instances by calling `register_module()`
327 )DOCSTRING");
328  def("get_pickle_path", &Framework::getPicklePath, R"DOCSTRING(
329 Return the filename where the pickled path is or should be stored
330 )DOCSTRING");
331  def("set_pickle_path", &Framework::setPicklePath, R"DOCSTRING(
332 Set the filename where the pickled path should be stored or retrieved from
333 )DOCSTRING", args("path"));
334  def("set_nprocesses", &Framework::setNumberProcesses, R"DOCSTRING(
335 Sets number of worker processes for parallel processing.
336 
337 Can be overridden using the ``-p`` argument to basf2.
338 
339 Note:
340  Setting this to 1 will have one parallel worker job which is almost always
341  slower than just running without parallel processing but is still provided to
342  allow debugging of parallel execution.
343 
344 Parameters:
345  nproc (int): number of worker processes. 0 to disable parallel processing.
346 )DOCSTRING");
347  def("get_nprocesses", &Framework::getNumberProcesses, R"DOCSTRING(
348 Gets number of worker processes for parallel processing. 0 disables parallel processing
349 )DOCSTRING");
350  def("set_streamobjs", &Framework::setStreamingObjects, R"DOCSTRING(
351 Set the names of all DataStore objects which should be sent between the
352 parallel processes. This can be used to improve parallel processing performance
353 by removing objects not required.
354 )DOCSTRING");
355  {
356  // The register_module function is overloaded with different signatures which makes
357  // the boost docstring very useless so we handcraft a docstring
358  docstring_options param_options(true, false, false);
359  def("_register_module", registerModule1);
360  def("_register_module", registerModule2, R"DOCSTRING(register_module(name, library=None)
361 Register a new Module.
362 
363 This function will try to create a new instance of a module with the given name. If no library is given it will try to find the module by itself from the module search path. Optionally one can specify the name of a shared library containing the module code then this library will be loaded
364 
365 See:
366  `list_module_search_paths()`, `add_module_search_path()`
367 
368 Parameters:
369  name (str): Type of the module to create
370  library (str): Optional, name of a shared library containing the module
371 
372 Returns:
373  An instance of the module if successful.
374 
375 Raises:
376  will raise a `ModuleNotCreatedError` if there is any problem creating the module.
377 )DOCSTRING");
378  def("_process", &Framework::process, process_overloads(R"DOCSTRING(process(path, num_events=0)
379 Processes up to max_events events by starting with the first module in the specified path.
380 
381  This method starts processing events only if there is a module in the path
382  which is capable of specifying the end of the data flow.
383 
384  Parameters:
385  path (Path): The processing starts with the first module of this path.
386  max_events (int): The maximum number of events that will be processed.
387  If the number is smaller than 1, all events will be processed (default).
388 )DOCSTRING"));
389  ;
390  }
391 
392  def("find_file", &Framework::findFile, (arg("filename"), arg("data_type") = "", arg("silent") = false), R"DOC(
393  Try to find a file and return its full path
394 
395  If ``data_type`` is empty this function will try to find the file
396 
397  1. in ``$BELLE2_LOCAL_DIR``,
398  2. in ``$BELLE2_RELEASE_DIR``
399  3. relative to the current working directory.
400 
401  Other known ``data_type`` values are
402 
403  ``examples``
404  Example data for examples and tutorials. Will try to find the file
405 
406  1. in ``$BELLE2_EXAMPLES_DATA_DIR``
407  2. in ``$VO_BELLE2_SW_DIR/examples-data``
408  3. relative to the current working directory
409 
410  ``validation``
411  Data for Validation purposes. Will try to find the file in
412 
413  1. in ``$BELLE2_VALIDATION_DATA_DIR``
414  2. in ``$VO_BELLE2_SW_DIR/validation-data``
415  3. relative to the current working directory
416 
417  .. versionadded:: release-03-00-00
418 
419  Arguments:
420  filename (str): relative filename to look for, either in a central place or
421  in the current working directory
422  data_type (str): case insensitive data type to find. Either empty string or
423  one of ``"examples"`` or ``"validation"``
424  silent (bool): If True don't print any errors and just return an empty
425  string if the file cannot be found
426  )DOC");
427 }
Belle2::ZMQEventProcessor
This class provides the core event processing loop for parallel processing with ZMQ.
Definition: ZMQEventProcessor.h:33
Belle2::pEventProcessor
This class provides the core event processing loop for parallel processing.
Definition: pEventProcessor.h:38
Belle2
Abstract base class for different kinds of events.
Definition: MillepedeAlgorithm.h:19
Belle2::PathPtr
std::shared_ptr< Path > PathPtr
Defines a pointer to a path object as a boost shared pointer.
Definition: Path.h:30
Belle2::ModulePtr
std::shared_ptr< Module > ModulePtr
Defines a pointer to a module object as a boost shared pointer.
Definition: Module.h:42
Belle2::Framework
This class combines all subsystems of the framework, and exports the main interface to Python.
Definition: Framework.h:39
Belle2::EventProcessor
provides the core event processing loop.
Definition: EventProcessor.h:39