Belle II Software light-2405-quaxo
Framework.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#include <framework/pybasf2/Framework.h>
10
11#include <framework/core/PyObjConvUtils.h>
12#include <framework/core/Environment.h>
13#include <framework/core/RandomNumbers.h>
14#include <framework/core/EventProcessor.h>
15#include <framework/core/ModuleManager.h>
16#include <framework/datastore/DataStore.h>
17#include <framework/database/DBStore.h>
18#include <framework/database/Database.h>
19#include <framework/pcore/pEventProcessor.h>
20#include <framework/pcore/ZMQEventProcessor.h>
21#include <framework/pcore/zmq/utils/ZMQAddressUtils.h>
22#include <framework/utilities/FileSystem.h>
23#include <framework/database/Configuration.h>
24
25#include <framework/logging/Logger.h>
26#include <framework/logging/LogSystem.h>
27
28#include <boost/algorithm/string.hpp>
29#include <boost/algorithm/string/join.hpp>
30#include <boost/python.hpp>
31
32#include <set>
33#include <vector>
34
35using namespace boost::python;
36using namespace Belle2;
37
38
40{
43
46 }
48}
49
50
52{
53 //empty module manager of modules
54 //since modules may contain shared pointers of Path objects created in Python,
55 //these shared pointers have special cleanup hooks that can cause crashes if run
56 //after Py_Finalize(). The framework object is cleaned up before, so this is a good place.
59 //Also the database configuration has things to cleanup before Py_Finalize()
61}
62
63
64void Framework::addModuleSearchPath(const std::string& path)
65{
67}
68
69
70void Framework::setExternalsPath(const std::string& path)
71{
73}
74
75
76ModulePtr Framework::registerModule(const std::string& moduleName)
77{
78 return ModuleManager::Instance().registerModule(moduleName);
79}
80
81
82ModulePtr Framework::registerModule(const std::string& moduleName, const std::string& sharedLibPath)
83{
84 return ModuleManager::Instance().registerModule(moduleName, sharedLibPath);
85}
86
87
88void Framework::process(PathPtr startPath, long maxEvent)
89{
90 if (Environment::Instance().getDryRun()) {
92 return; //processing disabled!
93 }
94
95 static bool already_executed = false;
96 static std::set<const Module*> previously_run_modules; //not a shared pointer to not screw up ownership
97 static int errors_from_previous_run = 0;
98 const auto moduleListUnique = startPath->buildModulePathList(true);
99 if (already_executed) {
100 B2WARNING("Calling process() more than once per steering file is still experimental, please check results carefully! Python modules especially should reinitialise their state in initialise() to avoid problems");
101 if (startPath->buildModulePathList(true) != startPath->buildModulePathList(false)) {
102 B2FATAL("Your path contains the same module instance in multiple places. Calling process() multiple times is not implemented for this case.");
103 }
104
105 //were any modules in moduleListUnique already run?
106 for (const auto& m : moduleListUnique) {
107 if (previously_run_modules.count(m.get()) > 0) {
108 //only clone if modules have been run before
109 startPath = std::static_pointer_cast<Path>(startPath->clone());
110 break;
111 }
112 }
113 }
114 for (const auto& m : moduleListUnique) {
115 previously_run_modules.insert(m.get());
116 }
117
119 if (numLogError != errors_from_previous_run) {
120 B2FATAL(numLogError << " ERROR(S) occurred! The processing of events will not be started.");
121 }
122
123 try {
127
128 auto& environment = Environment::Instance();
129
130 already_executed = true;
131 if (environment.getNumberProcesses() == 0) {
132 EventProcessor processor;
133 processor.setProfileModuleName(environment.getProfileModuleName());
134 processor.process(startPath, maxEvent);
135 } else {
136 if (environment.getUseZMQ()) {
137 // If the user has not given any socket address, use a random one.
138 if (environment.getZMQSocketAddress().empty()) {
139 environment.setZMQSocketAddress(ZMQAddressUtils::randomSocketName());
140 }
141 ZMQEventProcessor processor;
142 processor.process(startPath, maxEvent);
143 } else {
144 pEventProcessor processor;
145 processor.process(startPath, maxEvent);
146 }
147 }
148 errors_from_previous_run = LogSystem::Instance().getMessageCounter(LogConfig::c_Error);
149
151 // Also, reset the Database connection itself. However don't reset the
152 // configuration, just the actual setup. In case the user runs process()
153 // again it will reinitialize correctly with the same settings.
155 } catch (std::exception& e) {
156 B2ERROR("Uncaught exception encountered: " << e.what()); //should show module name
157 DataStore::Instance().reset(); // ensure we are executed before ROOT's exit handlers
158 throw; //and let python's global handler do the rest
159 } catch (...) {
160 B2ERROR("Uncaught exception encountered!"); //should show module name
161 DataStore::Instance().reset(); // ensure we are executed before ROOT's exit handlers
162 throw; //and let python's global handler do the rest
163 //TODO: having a stack trace would be nicer, but somehow a handler I set using std::set_terminate() never gets called
164 }
165}
166
167
168void Framework::setNumberProcesses(int numProcesses)
169{
171}
172
173
175{
177}
178
179
180void Framework::setPicklePath(const std::string& path)
181{
183}
184
185
187{
189}
190
191void Framework::setStreamingObjects(const boost::python::list& streamingObjects)
192{
193 auto vec = PyObjConvUtils::convertPythonObject(streamingObjects, std::vector<std::string>());
195}
196
197void Framework::setRealm(const std::string& realm)
198{
199 int irealm = -1;
200 std::vector<std::string> realms;
201 for (int i = LogConfig::c_None; i <= LogConfig::c_Production; i++) {
202 std::string thisRealm = LogConfig::logRealmToString((LogConfig::ELogRealm)i);
203 realms.push_back(thisRealm);
204 if (boost::iequals(realm, thisRealm)) { //case-insensitive
205 irealm = i;
206 break;
207 }
208 }
209 if (irealm < 0) {
210 B2ERROR("Invalid realm! Needs to be one of " << boost::join(realms, ", "));
211 } else {
213 }
214}
215
217{
218 B2WARNING("basf2 will write the simulation steps of each event into output csv files. "
219 "This is fine if you are producing events for the Belle II Virtual Reality application, "
220 "otherwise this function should not be used since the exeuction time will significantly increase.");
222}
223
224std::string Framework::findFile(const std::string& filename, const std::string& type, bool ignore_errors)
225{
226 std::string result;
227 if (type.empty()) {
228 //behave like FileSystem.findFile by using it
229 result = FileSystem::findFile(filename, ignore_errors);
230 } else {
231 result = FileSystem::findFile(filename, type, ignore_errors);
232 }
233 if (!ignore_errors and result.empty()) {
234 // Still not found ... see if we raise an exception or not.
235 // We want a FileNotFoundError ... so lets fudge the errno to the correct
236 // error value and then create the correct exception in python
237 errno = ENOENT;
238 PyErr_SetFromErrnoWithFilename(PyExc_FileNotFoundError, filename.c_str());
239 boost::python::throw_error_already_set();
240 }
241 return result;
242}
243
244//=====================================================================
245// Python API
246//=====================================================================
247
249{
250 boost::python::list returnList;
251
252 for (const std::string& path : ModuleManager::Instance().getModuleSearchPaths())
253 returnList.append(boost::python::object(path));
254 return returnList;
255}
256
257
259{
260 boost::python::dict returnDict;
261 for (const auto& modulePair : ModuleManager::Instance().getAvailableModules())
262 returnDict[boost::python::object(modulePair.first)] = boost::python::object(modulePair.second);
263 return returnDict;
264}
265
266
268{
269 boost::python::list returnList;
270
271 for (const ModulePtr& mod : ModuleManager::Instance().getCreatedModules())
272 returnList.append(boost::python::object(mod));
273 return returnList;
274}
275
276
277#if !defined(__GNUG__) || defined(__ICC)
278#else
279#pragma GCC diagnostic push
280#pragma GCC diagnostic ignored "-Wunused-local-typedefs"
281#endif
282BOOST_PYTHON_FUNCTION_OVERLOADS(process_overloads, Framework::process, 1, 2)
283#if !defined(__GNUG__) || defined(__ICC)
284#else
285#pragma GCC diagnostic pop
286#endif
287
288namespace {
289 PyObject* PyExc_ModuleNotCreatedError{nullptr};
292 void moduleNotCreatedTranslator(const ModuleManager::ModuleNotCreatedError& e)
293 {
294 PyErr_SetString(PyExc_ModuleNotCreatedError, e.what());
295 }
296}
297
299{
300 PyExc_ModuleNotCreatedError = PyErr_NewExceptionWithDoc("basf2.ModuleNotCreatedError",
301 "This exception is raised when a basf2 module could not be created for any reason",
302 PyExc_RuntimeError, nullptr);
303 scope().attr("ModuleNotCreatedError") = handle<>(borrowed(PyExc_ModuleNotCreatedError));
304 register_exception_translator<ModuleManager::ModuleNotCreatedError>(moduleNotCreatedTranslator);
305 //Overloaded methods
306 ModulePtr(*registerModule1)(const std::string&) = &Framework::registerModule;
307 ModulePtr(*registerModule2)(const std::string&, const std::string&) = &Framework::registerModule;
308
309 //don't show c++ signature in python doc to keep it simple
310 docstring_options options(true, true, false);
311
312 //Expose framework class
313 class_<Framework, std::shared_ptr<Framework>, boost::noncopyable>("Framework", "Initialize and Cleanup functions", no_init);
314 std::shared_ptr<Framework> initguard{new Framework()};
315 scope().attr("__framework") = initguard;
316
317 def("add_module_search_path", &Framework::addModuleSearchPath, R"DOCSTRING(
318Add a directory in which to search for compiled basf2 C++ `Modules <Module>`.
319
320This directory needs to contain the shared libraries containing the compiled
321modules as well as companion files ending in ``.b2modmap`` which contain a list
322of the module names contained in each library.
323
324Note:
325 The newly added path will not override existing modules
326
327Parameters:
328 path (str): directory containing the modules.
329)DOCSTRING", args("path"));
330 def("set_externals_path", &Framework::setExternalsPath, R"DOCSTRING(
331Set the path to the externals to be used.
332
333Warning:
334 This will not change the library and executable paths but will just change
335 the directory where to look for certain data files like the Evtgen particle
336 definition file. Don't use this unless you really know what you are doing.
337
338Parameters:
339 path (str): new top level directory for the externals
340)DOCSTRING", args("path"));
341 def("list_module_search_paths", &Framework::getModuleSearchPathsPython, R"DOCSTRING(
342Return a python list containing all the directories included in the module
343search Path.
344
345See:
346 `add_module_search_path`
347)DOCSTRING");
348 def("list_available_modules", &Framework::getAvailableModulesPython, R"DOCSTRING(
349Return a dictionary containing the names of all known modules
350as keys and the name of the shared library containing these modules as values.
351)DOCSTRING");
352 def("list_registered_modules", &Framework::getRegisteredModulesPython, R"DOCSTRING(
353Return a list with pointers to all previously created module instances by calling `register_module()`
354)DOCSTRING");
355 def("get_pickle_path", &Framework::getPicklePath, R"DOCSTRING(
356Return the filename where the pickled path is or should be stored
357)DOCSTRING");
358 def("set_pickle_path", &Framework::setPicklePath, R"DOCSTRING(
359Set the filename where the pickled path should be stored or retrieved from
360)DOCSTRING", args("path"));
361 def("set_nprocesses", &Framework::setNumberProcesses, R"DOCSTRING(
362Sets number of worker processes for parallel processing.
363
364Can be overridden using the ``-p`` argument to basf2.
365
366Note:
367 Setting this to 1 will have one parallel worker job which is almost always
368 slower than just running without parallel processing but is still provided to
369 allow debugging of parallel execution.
370
371Parameters:
372 nproc (int): number of worker processes. 0 to disable parallel processing.
373)DOCSTRING");
374 def("get_nprocesses", &Framework::getNumberProcesses, R"DOCSTRING(
375Gets number of worker processes for parallel processing. 0 disables parallel processing
376)DOCSTRING");
377 def("set_streamobjs", &Framework::setStreamingObjects, R"DOCSTRING(
378Set the names of all DataStore objects which should be sent between the
379parallel processes. This can be used to improve parallel processing performance
380by removing objects not required.
381)DOCSTRING");
382 {
383 // The register_module function is overloaded with different signatures which makes
384 // the boost docstring very useless so we handcraft a docstring
385 docstring_options param_options(true, false, false);
386 def("_register_module", registerModule1);
387 def("_register_module", registerModule2, R"DOCSTRING(register_module(name, library=None)
388Register a new Module.
389
390This function will try to create a new instance of a module with the given name. If no library is given it will try to find the module by itself from the module search path. Optionally one can specify the name of a shared library containing the module code then this library will be loaded
391
392See:
393 `list_module_search_paths()`, `add_module_search_path()`
394
395Parameters:
396 name (str): Type of the module to create
397 library (str): Optional, name of a shared library containing the module
398
399Returns:
400 An instance of the module if successful.
401
402Raises:
403 will raise a `ModuleNotCreatedError` if there is any problem creating the module.
404)DOCSTRING");
405 def("set_realm", &Framework::setRealm, R"DOCSTRING(
406Set the basf2 execution realm.
407
408The severity of log messages sometimes depends on where basf2 runs. This is controlled by the execution realm.
409
410Usually the realm does not have to be set explicitly. On the HLT or express reco it should be set to 'online' and for official productions to 'production'.
411)DOCSTRING", args("realm"));
412 def("write_simulation_steps", &Framework::writeSimulationSteps, R"DOCSTRING(
413Allow basf2 to write the simulation steps of each event into csv files.
414
415This function should not be used in production jobs because the exeuction time will significantly increase.
416)DOCSTRING");
417 def("_process", &Framework::process, process_overloads(R"DOCSTRING(process(path, num_events=0)
418Processes up to max_events events by starting with the first module in the specified path.
419
420 This method starts processing events only if there is a module in the path
421 which is capable of specifying the end of the data flow.
422
423 Parameters:
424 path (Path): The processing starts with the first module of this path.
425 max_events (int): The maximum number of events that will be processed.
426 If the number is smaller than 1, all events will be processed (default).
427)DOCSTRING"));
428 ;
429 }
430
431 def("find_file", &Framework::findFile, (arg("filename"), arg("data_type") = "", arg("silent") = false), R"DOC(
432 Try to find a file and return its full path
433
434 If ``data_type`` is empty this function will try to find the file
435
436 1. in ``$BELLE2_LOCAL_DIR``,
437 2. in ``$BELLE2_RELEASE_DIR``
438 3. relative to the current working directory.
439
440 Other known ``data_type`` values are
441
442 ``examples``
443 Example data for examples and tutorials. Will try to find the file
444
445 1. in ``$BELLE2_EXAMPLES_DATA_DIR``
446 2. relative to the current working directory
447
448 ``validation``
449 Data for Validation purposes. Will try to find the file in
450
451 1. in ``$BELLE2_VALIDATION_DATA_DIR``
452 2. relative to the current working directory
453
454 .. versionadded:: release-03-00-00
455
456 Arguments:
457 filename (str): relative filename to look for, either in a central place or
458 in the current working directory
459 data_type (str): case insensitive data type to find. Either empty string or
460 one of ``"examples"`` or ``"validation"``
461 silent (bool): If True don't print any errors and just return an empty
462 string if the file cannot be found
463 )DOC");
464}
static Configuration & getInstance()
Get a reference to the instance which will be used when the Database is initialized.
void reset()
Reset to default values.
static DataStore & Instance()
Instance of singleton Store.
Definition: DataStore.cc:54
void setInitializeActive(bool active)
Setter for m_initializeActive.
Definition: DataStore.cc:94
static bool s_DoCleanup
Global flag to to decide if we can do normal cleanup.
Definition: DataStore.h:100
void reset(EDurability durability)
Frees memory occupied by data store items and removes all objects from the map.
Definition: DataStore.cc:86
void setNumberProcesses(int number)
Sets the number of processes which should be used for the parallel processing.
Definition: Environment.h:140
void setRealm(LogConfig::ELogRealm realm)
Set the basf2 execution realm.
Definition: Environment.cc:61
void setJobInformation(const std::shared_ptr< Path > &path)
Set info from path executed by the framework.
Definition: Environment.cc:160
int getNumberProcesses() const
Returns the number of worker processes which should be used for the parallel processing.
Definition: Environment.h:145
void setExternalsPath(const std::string &externalsPath)
Sets the path which points to the externals directory of the framework.
Definition: Environment.h:53
void setStreamingObjects(const std::vector< std::string > &strobjs)
Set list of streaming objects.
Definition: Environment.h:229
void setWriteSimSteps(const bool writeSimSteps)
Set the flag for writing the simulation steps into an output csv file.
Definition: Environment.h:205
static Environment & Instance()
Static method to get a reference to the Environment instance.
Definition: Environment.cc:28
void setPicklePath(const std::string &path)
Sets the path to the file where the pickled path is stored.
Definition: Environment.h:158
std::string getPicklePath() const
Returns the path to the file where the pickled path is stored.
Definition: Environment.h:165
provides the core event processing loop.
void setProfileModuleName(const std::string &name)
Set the name of the module we want to profile.
void process(const PathPtr &startPath, long maxEvent=0)
Processes the full module chain, starting with the first module in the given path.
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
Definition: FileSystem.cc:151
static boost::python::list getRegisteredModulesPython()
Returns a list of all registered modules.
Definition: Framework.cc:267
static void writeSimulationSteps()
Function for writing the simulation steps of each event into csv files.
Definition: Framework.cc:216
static std::string findFile(const std::string &filename, const std::string &type, bool ignore_errors=false)
Find a file.
Definition: Framework.cc:224
static boost::python::list getModuleSearchPathsPython()
Returns a list of all module search paths known to the framework.
Definition: Framework.cc:248
static void setNumberProcesses(int numProcesses)
Function to set number of worker processes for parallel processing.
Definition: Framework.cc:168
static void exposePythonAPI()
Exposes methods of the Framework class to Python.
Definition: Framework.cc:298
static void setRealm(const std::string &realm)
Function to set the execution realm.
Definition: Framework.cc:197
static boost::python::dict getAvailableModulesPython()
Returns a dictionary containing the found modules and the filenames of the shared libraries in which ...
Definition: Framework.cc:258
static void setStreamingObjects(const boost::python::list &streamingObjects)
Function to set streaming objects for Tx module.
Definition: Framework.cc:191
static std::string getPicklePath()
Function to get the path to the file where the pickled path is stored.
Definition: Framework.cc:186
static ModulePtr registerModule(const std::string &moduleName)
Registers a new module to the framework and returns a shared pointer.
Definition: Framework.cc:76
static void setPicklePath(const std::string &path)
Function to set the path to the file where the pickled path is stored.
Definition: Framework.cc:180
static void setExternalsPath(const std::string &path)
Sets the path in which the externals of the framework are located.
Definition: Framework.cc:70
virtual ~Framework()
Destructor.
Definition: Framework.cc:51
static int getNumberProcesses()
Function to get number of worker processes for parallel processing.
Definition: Framework.cc:174
Framework()
Constructor.
Definition: Framework.cc:39
static void addModuleSearchPath(const std::string &path)
Adds a new filepath to the list of filepaths which are searched for modules.
Definition: Framework.cc:64
static void process(PathPtr startPath, long maxEvent=0)
Processes up to maxEvent events by starting with the first module in the specified path.
Definition: Framework.cc:88
static const char * logRealmToString(ELogRealm realm)
Converts a log realm type to a string.
Definition: LogConfig.cc:49
@ c_Error
Error: for things that went wrong and have to be fixed.
Definition: LogConfig.h:30
ELogRealm
Definition of the supported execution realms.
Definition: LogConfig.h:48
@ c_None
No specific realm.
Definition: LogConfig.h:48
@ c_Production
Data production jobs.
Definition: LogConfig.h:50
void resetMessageCounter()
Resets the message counter and error log by setting all message counts to 0.
Definition: LogSystem.cc:150
void enableErrorSummary(bool on)
enable/disable error/warning summary after successful execution and B2FATAL.
Definition: LogSystem.h:181
int getMessageCounter(LogConfig::ELogLevel logLevel) const
Returns the number of logging calls per log level.
Definition: LogSystem.cc:161
static LogSystem & Instance()
Static method to get a reference to the LogSystem instance.
Definition: LogSystem.cc:31
std::shared_ptr< Module > registerModule(const std::string &moduleName, std::string sharedLibPath="") noexcept(false)
Creates an instance of a module and registers it to the ModuleManager.
static ModuleManager & Instance()
Exception is thrown if the requested module could not be created by the ModuleManager.
void reset()
Delete all created modules.
void addModuleSearchPath(const std::string &path)
Adds a new filepath to the list of filepaths which are searched for a requested module.
static bool isInitialized()
Truth that the random number generator has been initialized.
static void initialize()
Initialize the random number generator with a unique random seed;.
static std::string randomSocketName()
Generate a random socket name in the form ipc:///socketname.
This class provides the core event processing loop for parallel processing with ZMQ.
void process(const PathPtr &spath, long maxEvent)
Processes the full module chain using parallel processing, starting with the first module in the give...
This class provides the core event processing loop for parallel processing.
void process(const PathPtr &spath, long maxEvent)
Processes the full module chain, starting with the first module in the given path.
void reset(bool keepEntries=false)
Invalidate all payloads.
Definition: DBStore.cc:177
std::shared_ptr< Path > PathPtr
Defines a pointer to a path object as a boost shared pointer.
Definition: Path.h:35
static Database & Instance()
Instance of a singleton Database.
Definition: Database.cc:42
static DBStore & Instance()
Instance of a singleton DBStore.
Definition: DBStore.cc:28
std::shared_ptr< Module > ModulePtr
Defines a pointer to a module object as a boost shared pointer.
Definition: Module.h:43
static void reset(bool keepConfig=false)
Reset the database instance.
Definition: Database.cc:50
Scalar convertPythonObject(const boost::python::object &pyObject, Scalar)
Convert from Python to given type.
Abstract base class for different kinds of events.
Definition: ClusterUtils.h:24