Belle II Software development
basf2.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9/*
10 * There are two ways to work with the framework. Either
11 * by executing "basf2" and providing a python steering
12 * file as an argument or by using the framework within
13 * python itself.
14 *
15 * This file implements the main executable "basf2".
16 */
17
18#include <Python.h> //Has to be the first include (restriction due to python)
19
20#include <framework/core/Environment.h>
21#include <framework/core/DataFlowVisualization.h>
22#include <framework/core/MetadataService.h>
23#include <framework/core/Module.h>
24#include <framework/core/ModuleManager.h>
25#include <framework/core/RandomNumbers.h>
26#include <framework/logging/Logger.h>
27#include <framework/logging/LogConfig.h>
28#include <framework/logging/LogSystem.h>
29#include <framework/utilities/FileSystem.h>
30
31
32#include <boost/program_options.hpp>
33#include <boost/algorithm/string/predicate.hpp> //for iequals()
34
35#include <csignal>
36#include <cstdlib>
37#include <iostream>
38#include <algorithm>
39#include <string>
40#include <vector>
41#include <fstream>
42#include <locale>
43#include <codecvt>
44#include <filesystem>
45
46#ifdef HAS_CALLGRIND
47#include <valgrind/valgrind.h>
48#endif
49
50using namespace std;
51using namespace Belle2;
52
53namespace prog = boost::program_options;
54
55namespace {
56
57 void checkPythonStatus(PyConfig& config, PyStatus& status)
58 {
59 if (PyStatus_Exception(status)) {
60 PyConfig_Clear(&config);
61 Py_ExitStatusException(status);
62 }
63 }
64}
65
66int main(int argc, char* argv[])
67{
68 //remove SIGPIPE handler set by ROOT which sometimes caused infinite loops
69 //See https://savannah.cern.ch/bugs/?97991
70 //default action is to abort
71 if (signal(SIGPIPE, SIG_DFL) == SIG_ERR) {
72 B2FATAL("Cannot remove SIGPIPE signal handler");
73 }
74
75 //Initialize metadata service
77
78 //Check for Belle2 environment variables (during environment initialisation)
80
81 //Get the lib path (checked for NULL in Environment)
82 const char* belle2SubDir = getenv("BELLE2_SUBDIR");
83 std::filesystem::path libPath = "lib";
84 libPath /= belle2SubDir;
85
86 string runModuleIOVisualization(""); //nothing done if empty
87 vector<string> arguments;
88 string pythonFile;
89
90 try {
91 //---------------------------------------------------
92 // Handle command line options
93 //---------------------------------------------------
94
95 prog::options_description generic("Generic options (to be used instead of steering file)");
96 generic.add_options()
97 ("help,h", "Print this help")
98 ("version,v", "Print long and verbose version string")
99 ("version-short", "Print short version string")
100 ("info", "Print information about basf2")
101 ("license", "Print the short version of the basf2 license")
102 ("modules,m", prog::value<string>()->implicit_value(""),
103 "Print a list of all available modules (can be limited to a given package), or give detailed information on a specific module given as an argument (case sensitive).")
104 ;
105
106 prog::options_description config("Configuration");
107 config.add_options()
108 ("steering", prog::value<string>(), "The python steering file to run.")
109 ("arg", prog::value<vector<string> >(&arguments), "Additional arguments to be passed to the steering file")
110 ("log_level,l", prog::value<string>(),
111 "Set global log level (one of DEBUG, INFO, RESULT, WARNING, or ERROR). Takes precedence over set_log_level() in steering file.")
112 ("package_log_level", prog::value<vector<string> >(),
113 "Set package log level. Can be specified multiple times to use more than one package. (Examples: 'klm:INFO or cdc:DEBUG:10') ")
114 ("random-seed", prog::value<string>(),
115 "Set the default initial seed for the random number generator. "
116 "This does not take precedence over calls to set_random_seed() in the steering file, but just changes the default. "
117 "If no seed is set via either of these mechanisms, the initial seed will be taken from the system's entropy pool.")
118 ("debug_level,d", prog::value<unsigned int>(), "Set default debug level. Also sets the log level to DEBUG.")
119 ("events,n", prog::value<unsigned int>(), "Override number of events for EventInfoSetter; otherwise set maximum number of events.")
120 ("run", prog::value<int>(), "Override run for EventInfoSetter, must be used with -n and --experiment")
121 ("experiment", prog::value<int>(), "Override experiment for EventInfoSetter, must be used with -n and --run")
122 ("skip-events", prog::value<unsigned int>(),
123 "Override skipNEvents for EventInfoSetter and RootInput. Skips this many events before starting.")
124 ("input,i", prog::value<vector<string> >(),
125 "Override name of input file for (Seq)RootInput. Can be specified multiple times to use more than one file. For RootInput, wildcards (as in *.root or [1-3].root) can be used, but need to be escaped with \\ or by quoting the argument to avoid expansion by the shell.")
126 ("sequence,S", prog::value<vector<string> >(),
127 "Override the number sequence (e.g. 23:42,101) defining the entries (starting from 0) which are processed by RootInput."
128 "Must be specified exactly once for each file to be opened."
129 "This means one sequence per input file AFTER wildcard expansion."
130 "The first event has the number 0.")
131 ("output,o", prog::value<string>(),
132 "Override name of output file for (Seq)RootOutput. In case multiple modules are present in the path, only the first will be affected.")
133 ("processes,p", prog::value<int>(), "Override number of worker processes (>=1 enables, 0 disables parallel processing)");
134
135 prog::options_description advanced("Advanced Options");
136 advanced.add_options()
137 ("module-io", prog::value<string>(),
138 "Create diagram of inputs and outputs for a single module, saved as ModuleName.dot. To create a PostScript file, use e.g. 'dot ModuleName.dot -Tps -o out.ps'.")
139 ("visualize-dataflow", "Generate data flow diagram (dataflow.dot) for the executed steering file.")
140 ("no-stats",
141 "Disable collection of statistics during event processing. Useful for very high-rate applications, but produces empty table with 'print(statistics)'.")
142 ("dry-run",
143 "Read steering file, but do not start any event processing when process(path) is called. Prints information on input/output files that would be used during normal execution.")
144 ("dump-path", prog::value<string>(),
145 "Read steering file, but do not actually start any event processing. The module path the steering file would execute is instead pickled (serialized) into the given file.")
146 ("execute-path", prog::value<string>(),
147 "Do not read any provided steering file, instead execute the pickled (serialized) path from the given file.")
148 ("zmq",
149 "Use ZMQ for multiprocessing instead of a RingBuffer. This has many implications and should only be used by experts.")
150 ("job-information", prog::value<string>(),
151 "Create json file with metadata of output files and basf2 execution status.")
152 ("realm", prog::value<string>(),
153 "Set the realm of the basf2 execution (online or production).")
154 ("secondary-input", prog::value<vector<string>>(),
155 "Override name of input file for the secondary RootInput module used for the event embedding. Can be specified multiple times to use more than one file. Wildcards (as in *.root or [1-3].root) can be used, but need to be escaped with \\ or by quoting the argument to avoid expansion by the shell.")
156#ifdef HAS_CALLGRIND
157 ("profile", prog::value<string>(),
158 "Name of a module to profile using callgrind. If more than one module of that name is registered only the first one will be profiled.")
159#endif
160 ;
161
162 prog::options_description cmdlineOptions;
163 cmdlineOptions.add(generic).add(config).add(advanced);
164
165 prog::positional_options_description posOptDesc;
166 posOptDesc.add("steering", 1);
167 posOptDesc.add("arg", -1);
168
169 prog::variables_map varMap;
170 prog::store(prog::command_line_parser(argc, argv).
171 options(cmdlineOptions).positional(posOptDesc).run(), varMap);
172 prog::notify(varMap);
173
174 //Check for non-steering file options
175 if (varMap.count("help")) {
176 cout << "Usage: " << argv[0] << " [OPTIONS] [STEERING_FILE] [-- [STEERING_FILE_OPTIONS]]\n";
177 cout << cmdlineOptions << endl;
178 return 0;
179 } else if (varMap.count("version")) {
180 pythonFile = "basf2/version.py";
181 } else if (varMap.count("version-short")) {
182 pythonFile = "basf2/version_short.py";
183 } else if (varMap.count("info")) {
184 pythonFile = "basf2_cli/print_info.py";
185 } else if (varMap.count("license")) {
186 pythonFile = "basf2_cli/print_license.py";
187 } else if (varMap.count("modules")) {
188 string modArgs = varMap["modules"].as<string>();
189 if (!modArgs.empty()) {
190 arguments.insert(arguments.begin(), modArgs);
191 }
192 // recent boost program_options will not consume extra tokens for
193 // implicit options. In this case the module/package name gets consumed
194 // in the steering file so we just use that.
195 if (varMap.count("steering")) {
196 arguments.insert(arguments.begin(), varMap["steering"].as<string>());
197 }
198 pythonFile = "basf2_cli/modules.py";
199 } else if (varMap.count("module-io")) {
200 runModuleIOVisualization = varMap["module-io"].as<string>();
201 pythonFile = "basf2/core.py"; //make module maps available, visualization will happen later
202 } else if (varMap.count("execute-path")) {
203 Environment::Instance().setPicklePath(varMap["execute-path"].as<string>());
204 pythonFile = "basf2_cli/execute_pickled_path.py";
205 } else if (varMap.count("steering")) {
206 // steering file not misused as module name, so print it's name :D
207 pythonFile = varMap["steering"].as<string>();
208 } else {
209 // launch an interactive python session.
210 pythonFile = "interactive.py";
211 }
212
213 if (!pythonFile.empty()) {
214 //Search in local or central lib/ if this isn't a direct path
215 if (!std::filesystem::exists(pythonFile)) {
216 std::string libFile = FileSystem::findFile((libPath / pythonFile).string(), true);
217 if (!libFile.empty())
218 pythonFile = libFile;
219 }
220 if (varMap.count("steering") and not varMap.count("modules")) {
221 B2INFO("Steering file: " << pythonFile);
222 }
223 }
224
225 // -p
226 // Do now so that we can override if profiling is requested
227 if (varMap.count("processes")) {
228 int nprocesses = varMap["processes"].as<int>();
229 if (nprocesses < 0) {
230 B2FATAL("Invalid number of processes!");
231 }
233 }
234
235 // --zmq
236 if (varMap.count("zmq")) {
238 }
239
240
241#ifdef HAS_CALLGRIND
242 if (varMap.count("profile")) {
243 string profileModule = varMap["profile"].as<string>();
244 //We want to profile a module so check if we are running under valgrind
245 if (!RUNNING_ON_VALGRIND) {
246 //Apparently not. Ok, let's call ourself using valgrind
247 cout << "Profiling requested, restarting using callgrind" << endl;
248
249 //Sadly calling processes in C++ is very annoying as we have to
250 //build a command line.
251 vector<char*> cmd;
252 //First we add all valgrind arguments.
253 const vector<string> valgrind_argv {
254 "valgrind", "--tool=callgrind", "--instr-atstart=no", "--trace-children=no",
255 "--callgrind-out-file=callgrind." + profileModule + ".%p",
256 };
257 //As execvp wants non-const char* pointers we have to copy the string contents.
258 cmd.reserve(valgrind_argv.size());
259 for (const auto& arg : valgrind_argv) { cmd.push_back(strdup(arg.c_str())); }
260 //And now we add our own arguments, including the program name.
261 for (int i = 0; i < argc; ++i) { cmd.push_back(argv[i]); }
262 //Finally, execvp wants a nullptr as last argument
263 cmd.push_back(nullptr);
264 //And call this thing. Execvp will not return if successful as the
265 //current process will be replaced so we do not need to care about what
266 //happens if succesful
267 if (execvp(cmd[0], cmd.data()) == -1) {
268 int errsv = errno;
269 perror("Problem calling valgrind");
270 return errsv;
271 }
272 }
273 //Ok, running under valgrind, set module name we want to profile in
274 //environment.
276 //and make sure there is no multiprocessing when profiling
278 }
279#endif
280
281 // -n
282 if (varMap.count("events")) {
283 unsigned int nevents = varMap["events"].as<unsigned int>();
284 if (nevents == 0 or nevents == std::numeric_limits<unsigned int>::max()) {
285 B2FATAL("Invalid number of events (valid range: 1.." << std::numeric_limits<unsigned int>::max() - 1 << ")!");
286 }
288 }
289 // --run & --experiment
290 if (varMap.count("experiment") or varMap.count("run")) {
291 if (!varMap.count("events"))
292 B2FATAL("--experiment and --run must be used with --events/-n!");
293 if (!(varMap.count("run") and varMap.count("experiment")))
294 B2FATAL("Both --experiment and --run must be specified!");
295
296 int run = varMap["run"].as<int>();
297 int experiment = varMap["experiment"].as<int>();
298 B2ASSERT("run must be >= 0!", run >= 0);
299 B2ASSERT("experiment must be >= 0!", experiment >= 0);
301 }
302
303 // --skip-events
304 if (varMap.count("skip-events")) {
305 unsigned int skipevents = varMap["skip-events"].as<unsigned int>();
307 }
308
309 // -i
310 if (varMap.count("input")) {
311 const auto& names = varMap["input"].as<vector<string>>();
313 }
314
315 // -S
316 if (varMap.count("sequence")) {
317 const auto& sequences = varMap["sequence"].as<vector<string>>();
319 }
320
321 // -o
322 if (varMap.count("output")) {
323 std::string name = varMap["output"].as<string>();
325 }
326
327 // -l
328 if (varMap.count("log_level")) {
329 std::string levelParam = varMap["log_level"].as<string>();
330 int level = -1;
331 for (int i = LogConfig::c_Debug; i < LogConfig::c_Fatal; i++) {
332 std::string thisLevel = LogConfig::logLevelToString((LogConfig::ELogLevel)i);
333 if (boost::iequals(levelParam, thisLevel)) { //case-insensitive
334 level = i;
335 break;
336 }
337 }
338 if (level < 0) {
339 B2FATAL("Invalid log level! Needs to be one of DEBUG, INFO, RESULT, WARNING, or ERROR.");
340 }
341
342 //set log level
344 //and make sure it takes precedence over anything in the steering file
346 }
347
348 // --package_log_level
349 if (varMap.count("package_log_level")) {
350 const auto& packLogList = varMap["package_log_level"].as<vector<string>>();
351 const std::string delimiter = ":";
352 for (const std::string& packLog : packLogList) {
353 if (packLog.find(delimiter) == std::string::npos) {
354 B2FATAL("In --package_log_level input " << packLog << ", no colon detected. ");
355 break;
356 }
357 /* string parsing for packageName:LOGLEVEL or packageName:DEBUG:LEVEL*/
358 auto packageName = packLog.substr(0, packLog.find(delimiter));
359 std::string logName = packLog.substr(packLog.find(delimiter) + delimiter.length(), packLog.length());
360 int debugLevel = -1;
361 if ((logName.find("DEBUG") != std::string::npos) && logName.length() > 5) {
362 try {
363 debugLevel = std::stoi(logName.substr(logName.find(delimiter) + delimiter.length(), logName.length()));
364 } catch (std::exception& e) {
365 B2WARNING("In --package_log_level, issue parsing debugLevel. Still setting log level to DEBUG.");
366 }
367 logName = "DEBUG";
368 }
369
370 int level = -1;
371 /* determine log level for package */
372 for (int i = LogConfig::c_Debug; i < LogConfig::c_Fatal; i++) {
373 std::string thisLevel = LogConfig::logLevelToString((LogConfig::ELogLevel)i);
374 if (boost::iequals(logName, thisLevel)) { //case-insensitive
375 level = i;
376 break;
377 }
378 }
379 if (level < 0) {
380 B2FATAL("Invalid log level! Needs to be one of DEBUG, INFO, RESULT, WARNING, or ERROR.");
381 }
382 /* set package log level*/
383 if ((logName == "DEBUG") && (debugLevel >= 0)) {
384 LogSystem::Instance().getPackageLogConfig(packageName).setDebugLevel(debugLevel);
385 }
387
388 }
389 }
390
391 // -d
392 if (varMap.count("debug_level")) {
393 unsigned int level = varMap["debug_level"].as<unsigned int>();
396 }
397
398 if (varMap.count("visualize-dataflow")) {
400 if (Environment::Instance().getNumberProcesses() > 0) {
401 B2WARNING("--visualize-dataflow cannot be used with parallel processing, no graphs will be saved!");
402 }
403 }
404
405 if (varMap.count("no-stats")) {
407 }
408
409 if (varMap.count("dry-run")) {
411 }
412
413 if (varMap.count("dump-path")) {
414 Environment::Instance().setPicklePath(varMap["dump-path"].as<string>());
415 }
416
417 if (varMap.count("random-seed")) {
418 RandomNumbers::initialize(varMap["random-seed"].as<string>());
419 }
420
421 if (varMap.count("job-information")) {
422 string jobInfoFile = varMap["job-information"].as<string>();
424 B2INFO("Job information file: " << jobInfoFile);
425 }
426
427 if (varMap.count("realm")) {
428 std::string realmParam = varMap["realm"].as<string>();
429 int realm = -1;
430 for (int i = LogConfig::c_Online; i <= LogConfig::c_Production; i++) {
431 std::string thisRealm = LogConfig::logRealmToString((LogConfig::ELogRealm)i);
432 if (boost::iequals(realmParam, thisRealm)) { //case-insensitive
433 realm = i;
434 break;
435 }
436 }
437 if (realm < 0) {
438 B2FATAL("Invalid realm! Needs to be one of online or production.");
439 }
441 }
442
443 if (varMap.count("secondary-input")) {
444 const auto& names = varMap["secondary-input"].as<vector<string>>();
446 }
447
448 } catch (exception& e) {
449 cerr << "error: " << e.what() << endl;
450 return 1;
451 } catch (...) {
452 cerr << "Exception of unknown type!" << endl;
453 return 1;
454 }
455
456 //---------------------------------------------------
457 // If the python file is set, execute it
458 //---------------------------------------------------
459 PyStatus status;
460 PyConfig config;
461 PyConfig_InitPythonConfig(&config);
462 config.install_signal_handlers = 0;
463 config.safe_path = 0;
464
465 std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
466
467 std::vector<wstring> pyArgvString(arguments.size() + 2);
468
469
470 pyArgvString[0] = L"python3"; //set the executable
471 pyArgvString[1] = converter.from_bytes(pythonFile);
472 for (size_t i = 0; i < arguments.size(); i++) {
473 pyArgvString[i + 2] = converter.from_bytes(arguments[i]);
474 }
475 std::vector<const wchar_t*> pyArgvArray(pyArgvString.size());
476 for (size_t i = 0; i < pyArgvString.size(); ++i) {
477 pyArgvArray[i] = pyArgvString[i].c_str();
478 }
479
480
481 //Pass python filename and additional arguments to python
482 status = PyConfig_SetArgv(&config, pyArgvArray.size(), const_cast<wchar_t**>(pyArgvArray.data()));
483 checkPythonStatus(config, status);
484
485 status = Py_InitializeFromConfig(&config);
486 checkPythonStatus(config, status);
487
488 auto fullPath = std::filesystem::absolute(std::filesystem::path(pythonFile));
489
490 if ((std::filesystem::is_directory(fullPath)) || !(std::filesystem::exists(fullPath))) {
491 B2FATAL("The given filename and/or path is not valid: " + pythonFile);
492 }
493
494 std::ifstream file(fullPath.string().c_str());
495 std::stringstream buffer;
496 buffer << file.rdbuf();
497 Environment::Instance().setSteering(buffer.str());
498 int pyReturnValue = Py_RunMain();
499
500 //Finish Python interpreter
501 PyConfig_Clear(&config);
502 Py_Finalize();
503
504 //basf2.py was loaded, now do module I/O visualization
505 if (!runModuleIOVisualization.empty()) {
507 }
508
509 //--dry-run: print gathered information
510 if (Environment::Instance().getDryRun()) {
512 }
513
514 //Report completion in json metadata
515 MetadataService::Instance().addBasf2Status("finished successfully");
517
518 return pyReturnValue;
519}
static void executeModuleAndCreateIOPlot(const std::string &module)
Create independent I/O graph for a single module (without requiring a steering file).
void setOutputFileOverride(const std::string &name)
Override output file name for modules.
Definition: Environment.h:124
void setProfileModuleName(const std::string &name)
Set the name of a module to be profiled.
Definition: Environment.h:229
void setSteering(const std::string &steering)
Sets the steering file content.
Definition: Environment.h:182
void setNumberEventsOverride(unsigned int nevents)
Override the number of events in run 1 for EventInfoSetter module.
Definition: Environment.h:64
void setRealm(LogConfig::ELogRealm realm)
Set the basf2 execution realm.
Definition: Environment.cc:61
void setRunExperimentOverride(int run, int experiment)
Override run and experiment for EventInfoSetter.
Definition: Environment.h:79
void setNumberProcessesOverride(int nproc)
Override number of processes to run in parallel.
Definition: Environment.h:144
void setLogLevelOverride(int level)
Override global log level if != LogConfig::c_Default.
Definition: Environment.h:235
void printJobInformation() const
Print information on input/output files in current steering file, used by –dry-run.
Definition: Environment.cc:181
void setEntrySequencesOverride(const std::vector< std::string > &sequences)
Override the number sequences (e.g.
Definition: Environment.h:70
static Environment & Instance()
Static method to get a reference to the Environment instance.
Definition: Environment.cc:28
void setSecondaryInputFilesOverride(const std::vector< std::string > &names)
Override secondary input file names for modules.
Definition: Environment.h:118
void setUseZMQ(bool useZMQ)
Set the flag if ZMQ should be used instead of the RingBuffer multiprocessing implementation.
Definition: Environment.h:254
void setPicklePath(const std::string &path)
Sets the path to the file where the pickled path is stored.
Definition: Environment.h:170
void setSkipEventsOverride(unsigned int skipEvents)
Set skipNEvents override.
Definition: Environment.h:87
void setVisualizeDataFlow(bool on)
Wether to generate DOT files with data store inputs/outputs of each module.
Definition: Environment.h:190
void setInputFilesOverride(const std::vector< std::string > &names)
Override input file names for modules.
Definition: Environment.h:112
void setNoStats(bool noStats)
Disable collection of statistics during event processing.
Definition: Environment.h:196
void setDryRun(bool dryRun)
Read steering file, but do not start any actually start any event processing.
Definition: Environment.h:201
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
Definition: FileSystem.cc:151
void setDebugLevel(int debugLevel)
Configure the debug messaging level.
Definition: LogConfig.h:98
static const char * logRealmToString(ELogRealm realm)
Converts a log realm type to a string.
Definition: LogConfig.cc:49
ELogLevel
Definition of the supported log levels.
Definition: LogConfig.h:26
@ c_Debug
Debug: for code development.
Definition: LogConfig.h:26
@ c_Fatal
Fatal: for situations were the program execution can not be continued.
Definition: LogConfig.h:31
ELogRealm
Definition of the supported execution realms.
Definition: LogConfig.h:48
@ c_Online
Online data taking.
Definition: LogConfig.h:49
@ c_Production
Data production jobs.
Definition: LogConfig.h:50
void setLogLevel(ELogLevel logLevel)
Configure the log level.
Definition: LogConfig.cc:25
static const char * logLevelToString(ELogLevel logLevelType)
Converts a log level type to a string.
Definition: LogConfig.cc:42
LogConfig * getLogConfig()
Returns global log system configuration.
Definition: LogSystem.h:78
static LogSystem & Instance()
Static method to get a reference to the LogSystem instance.
Definition: LogSystem.cc:31
LogConfig & getPackageLogConfig(const std::string &package)
Get the log configuration for the package with the given name.
Definition: LogSystem.h:96
void addBasf2Status(const std::string &message="")
Add metadata of basf2 status.
void setJsonFileName(const std::string &fileName)
Set the name of the json metadata file.
static MetadataService & Instance()
Static method to get a reference to the MetadataService instance.
void finishBasf2(bool success=true)
Add metadata for basf2 completion.
static void initialize()
Initialize the random number generator with a unique random seed;.
Abstract base class for different kinds of events.
STL namespace.