Belle II Software development
basf2.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9/*
10 * There are two ways to work with the framework. Either
11 * by executing "basf2" and providing a python steering
12 * file as an argument or by using the framework within
13 * python itself.
14 *
15 * This file implements the main executable "basf2".
16 */
17
18#include <Python.h> //Has to be the first include (restriction due to python)
19
20#include <framework/core/Environment.h>
21#include <framework/core/DataFlowVisualization.h>
22#include <framework/core/MetadataService.h>
23#include <framework/core/RandomNumbers.h>
24#include <framework/logging/Logger.h>
25#include <framework/logging/LogConfig.h>
26#include <framework/logging/LogSystem.h>
27#include <framework/utilities/FileSystem.h>
28
29
30#include <boost/program_options.hpp>
31#include <boost/algorithm/string/predicate.hpp> //for iequals()
32
33#include <csignal>
34#include <cstdlib>
35#include <iostream>
36#include <string>
37#include <vector>
38#include <fstream>
39#include <locale>
40#include <codecvt>
41#include <filesystem>
42
43#ifdef HAS_CALLGRIND
44#include <valgrind/valgrind.h>
45#endif
46
47using namespace std;
48using namespace Belle2;
49
50namespace prog = boost::program_options;
51
52namespace {
53
54 void checkPythonStatus(PyConfig& config, PyStatus& status)
55 {
56 if (PyStatus_Exception(status)) {
57 PyConfig_Clear(&config);
58 Py_ExitStatusException(status);
59 }
60 }
61}
62
63int main(int argc, char* argv[])
64{
65 //remove SIGPIPE handler set by ROOT which sometimes caused infinite loops
66 //See https://savannah.cern.ch/bugs/?97991
67 //default action is to abort
68 if (signal(SIGPIPE, SIG_DFL) == SIG_ERR) {
69 B2FATAL("Cannot remove SIGPIPE signal handler");
70 }
71
72 //Initialize metadata service
74
75 //Check for Belle2 environment variables (during environment initialisation)
77
78 //Get the lib path (checked for NULL in Environment)
79 const char* belle2SubDir = getenv("BELLE2_SUBDIR");
80 std::filesystem::path libPath = "lib";
81 libPath /= belle2SubDir;
82
83 string runModuleIOVisualization(""); //nothing done if empty
84 vector<string> arguments;
85 string pythonFile;
86
87 try {
88 //---------------------------------------------------
89 // Handle command line options
90 //---------------------------------------------------
91
92 prog::options_description generic("Generic options (to be used instead of steering file)");
93 generic.add_options()
94 ("help,h", "Print this help")
95 ("version,v", "Print long and verbose version string")
96 ("version-short", "Print short version string")
97 ("info", "Print information about basf2")
98 ("license", "Print the short version of the basf2 license")
99 ("modules,m", prog::value<string>()->implicit_value(""),
100 "Print a list of all available modules (can be limited to a given package), or give detailed information on a specific module given as an argument (case sensitive).")
101 ;
102
103 prog::options_description config("Configuration");
104 config.add_options()
105 ("steering", prog::value<string>(), "The python steering file to run.")
106 ("arg", prog::value<vector<string> >(&arguments), "Additional arguments to be passed to the steering file")
107 ("log_level,l", prog::value<string>(),
108 "Set global log level (one of DEBUG, INFO, RESULT, WARNING, or ERROR). Takes precedence over set_log_level() in steering file.")
109 ("package_log_level", prog::value<vector<string> >(),
110 "Set package log level. Can be specified multiple times to use more than one package. (Examples: 'klm:INFO or cdc:DEBUG:10') ")
111 ("random-seed", prog::value<string>(),
112 "Set the default initial seed for the random number generator. "
113 "This does not take precedence over calls to set_random_seed() in the steering file, but just changes the default. "
114 "If no seed is set via either of these mechanisms, the initial seed will be taken from the system's entropy pool.")
115 ("debug_level,d", prog::value<unsigned int>(), "Set default debug level. Also sets the log level to DEBUG.")
116 ("events,n", prog::value<unsigned int>(), "Override number of events for EventInfoSetter; otherwise set maximum number of events.")
117 ("run", prog::value<int>(), "Override run for EventInfoSetter, must be used with -n and --experiment")
118 ("experiment", prog::value<int>(), "Override experiment for EventInfoSetter, must be used with -n and --run")
119 ("skip-events", prog::value<unsigned int>(),
120 "Override skipNEvents for EventInfoSetter and RootInput. Skips this many events before starting.")
121 ("input,i", prog::value<vector<string> >(),
122 "Override name of input file for (Seq)RootInput. Can be specified multiple times to use more than one file. For RootInput, wildcards (as in *.root or [1-3].root) can be used, but need to be escaped with \\ or by quoting the argument to avoid expansion by the shell.")
123 ("sequence,S", prog::value<vector<string> >(),
124 "Override the number sequence (e.g. 23:42,101) defining the entries (starting from 0) which are processed by RootInput."
125 "Must be specified exactly once for each file to be opened."
126 "This means one sequence per input file AFTER wildcard expansion."
127 "The first event has the number 0.")
128 ("output,o", prog::value<string>(),
129 "Override name of output file for (Seq)RootOutput. In case multiple modules are present in the path, only the first will be affected.")
130 ("processes,p", prog::value<int>(), "Override number of worker processes (>=1 enables, 0 disables parallel processing)");
131
132 prog::options_description advanced("Advanced Options");
133 advanced.add_options()
134 ("module-io", prog::value<string>(),
135 "Create diagram of inputs and outputs for a single module, saved as ModuleName.dot. To create a PostScript file, use e.g. 'dot ModuleName.dot -Tps -o out.ps'.")
136 ("visualize-dataflow", "Generate data flow diagram (dataflow.dot) for the executed steering file.")
137 ("no-stats",
138 "Disable collection of statistics during event processing. Useful for very high-rate applications, but produces empty table with 'print(statistics)'.")
139 ("dry-run",
140 "Read steering file, but do not start any event processing when process(path) is called. Prints information on input/output files that would be used during normal execution.")
141 ("dump-path", prog::value<string>(),
142 "Read steering file, but do not actually start any event processing. The module path the steering file would execute is instead pickled (serialized) into the given file.")
143 ("execute-path", prog::value<string>(),
144 "Do not read any provided steering file, instead execute the pickled (serialized) path from the given file.")
145 ("zmq",
146 "Use ZMQ for multiprocessing instead of a RingBuffer. This has many implications and should only be used by experts.")
147 ("job-information", prog::value<string>(),
148 "Create json file with metadata of output files and basf2 execution status.")
149 ("realm", prog::value<string>(),
150 "Set the realm of the basf2 execution (online or production).")
151 ("secondary-input", prog::value<vector<string>>(),
152 "Override name of input file for the secondary RootInput module used for the event embedding. Can be specified multiple times to use more than one file. Wildcards (as in *.root or [1-3].root) can be used, but need to be escaped with \\ or by quoting the argument to avoid expansion by the shell.")
153#ifdef HAS_CALLGRIND
154 ("profile", prog::value<string>(),
155 "Name of a module to profile using callgrind. If more than one module of that name is registered only the first one will be profiled.")
156#endif
157 ;
158
159 prog::options_description cmdlineOptions;
160 cmdlineOptions.add(generic).add(config).add(advanced);
161
162 prog::positional_options_description posOptDesc;
163 posOptDesc.add("steering", 1);
164 posOptDesc.add("arg", -1);
165
166 prog::variables_map varMap;
167 prog::store(prog::command_line_parser(argc, argv).
168 options(cmdlineOptions).positional(posOptDesc).run(), varMap);
169 prog::notify(varMap);
170
171 //Check for non-steering file options
172 if (varMap.count("help")) {
173 cout << "Usage: " << argv[0] << " [OPTIONS] [STEERING_FILE] [-- [STEERING_FILE_OPTIONS]]\n";
174 cout << cmdlineOptions << endl;
175 return 0;
176 } else if (varMap.count("version")) {
177 pythonFile = "basf2/version.py";
178 } else if (varMap.count("version-short")) {
179 pythonFile = "basf2/version_short.py";
180 } else if (varMap.count("info")) {
181 pythonFile = "basf2_cli/print_info.py";
182 } else if (varMap.count("license")) {
183 pythonFile = "basf2_cli/print_license.py";
184 } else if (varMap.count("modules")) {
185 string modArgs = varMap["modules"].as<string>();
186 if (!modArgs.empty()) {
187 arguments.insert(arguments.begin(), modArgs);
188 }
189 // recent boost program_options will not consume extra tokens for
190 // implicit options. In this case the module/package name gets consumed
191 // in the steering file so we just use that.
192 if (varMap.count("steering")) {
193 arguments.insert(arguments.begin(), varMap["steering"].as<string>());
194 }
195 pythonFile = "basf2_cli/modules.py";
196 } else if (varMap.count("module-io")) {
197 runModuleIOVisualization = varMap["module-io"].as<string>();
198 pythonFile = "basf2/core.py"; //make module maps available, visualization will happen later
199 } else if (varMap.count("execute-path")) {
200 Environment::Instance().setPicklePath(varMap["execute-path"].as<string>());
201 pythonFile = "basf2_cli/execute_pickled_path.py";
202 } else if (varMap.count("steering")) {
203 // steering file not misused as module name, so print it's name :D
204 pythonFile = varMap["steering"].as<string>();
205 } else {
206 // launch an interactive python session.
207 pythonFile = "interactive.py";
208 }
209
210 if (!pythonFile.empty()) {
211 //Search in local or central lib/ if this isn't a direct path
212 if (!std::filesystem::exists(pythonFile)) {
213 std::string libFile = FileSystem::findFile((libPath / pythonFile).string(), true);
214 if (!libFile.empty())
215 pythonFile = libFile;
216 }
217 if (varMap.count("steering") and not varMap.count("modules")) {
218 B2INFO("Steering file: " << pythonFile);
219 }
220 }
221
222 // -p
223 // Do now so that we can override if profiling is requested
224 if (varMap.count("processes")) {
225 int nprocesses = varMap["processes"].as<int>();
226 if (nprocesses < 0) {
227 B2FATAL("Invalid number of processes!");
228 }
230 }
231
232 // --zmq
233 if (varMap.count("zmq")) {
235 }
236
237
238#ifdef HAS_CALLGRIND
239 if (varMap.count("profile")) {
240 string profileModule = varMap["profile"].as<string>();
241 //We want to profile a module so check if we are running under valgrind
242 if (!RUNNING_ON_VALGRIND) {
243 //Apparently not. Ok, let's call ourself using valgrind
244 cout << "Profiling requested, restarting using callgrind" << endl;
245
246 //Sadly calling processes in C++ is very annoying as we have to
247 //build a command line.
248 vector<char*> cmd;
249 //First we add all valgrind arguments.
250 const vector<string> valgrind_argv {
251 "valgrind", "--tool=callgrind", "--instr-atstart=no", "--trace-children=no",
252 "--callgrind-out-file=callgrind." + profileModule + ".%p",
253 };
254 //As execvp wants non-const char* pointers we have to copy the string contents.
255 cmd.reserve(valgrind_argv.size());
256 for (const auto& arg : valgrind_argv) { cmd.push_back(strdup(arg.c_str())); }
257 //And now we add our own arguments, including the program name.
258 for (int i = 0; i < argc; ++i) { cmd.push_back(argv[i]); }
259 //Finally, execvp wants a nullptr as last argument
260 cmd.push_back(nullptr);
261 //And call this thing. Execvp will not return if successful as the
262 //current process will be replaced so we do not need to care about what
263 //happens if successful
264 if (execvp(cmd[0], cmd.data()) == -1) {
265 int errsv = errno;
266 perror("Problem calling valgrind");
267 return errsv;
268 }
269 }
270 //Ok, running under valgrind, set module name we want to profile in
271 //environment.
273 //and make sure there is no multiprocessing when profiling
275 }
276#endif
277
278 // -n
279 if (varMap.count("events")) {
280 unsigned int nevents = varMap["events"].as<unsigned int>();
281 if (nevents == 0 or nevents == std::numeric_limits<unsigned int>::max()) {
282 B2FATAL("Invalid number of events (valid range: 1.." << std::numeric_limits<unsigned int>::max() - 1 << ")!");
283 }
285 }
286 // --run & --experiment
287 if (varMap.count("experiment") or varMap.count("run")) {
288 if (!varMap.count("events"))
289 B2FATAL("--experiment and --run must be used with --events/-n!");
290 if (!(varMap.count("run") and varMap.count("experiment")))
291 B2FATAL("Both --experiment and --run must be specified!");
292
293 int run = varMap["run"].as<int>();
294 int experiment = varMap["experiment"].as<int>();
295 B2ASSERT("run must be >= 0!", run >= 0);
296 B2ASSERT("experiment must be >= 0!", experiment >= 0);
298 }
299
300 // --skip-events
301 if (varMap.count("skip-events")) {
302 unsigned int skipevents = varMap["skip-events"].as<unsigned int>();
304 }
305
306 // -i
307 if (varMap.count("input")) {
308 const auto& names = varMap["input"].as<vector<string>>();
310 }
311
312 // -S
313 if (varMap.count("sequence")) {
314 const auto& sequences = varMap["sequence"].as<vector<string>>();
316 }
317
318 // -o
319 if (varMap.count("output")) {
320 std::string name = varMap["output"].as<string>();
322 }
323
324 // -l
325 if (varMap.count("log_level")) {
326 std::string levelParam = varMap["log_level"].as<string>();
327 int level = -1;
328 for (int i = LogConfig::c_Debug; i < LogConfig::c_Fatal; i++) {
329 std::string thisLevel = LogConfig::logLevelToString((LogConfig::ELogLevel)i);
330 if (boost::iequals(levelParam, thisLevel)) { //case-insensitive
331 level = i;
332 break;
333 }
334 }
335 if (level < 0) {
336 B2FATAL("Invalid log level! Needs to be one of DEBUG, INFO, RESULT, WARNING, or ERROR.");
337 }
338
339 //set log level
341 //and make sure it takes precedence over anything in the steering file
343 }
344
345 // --package_log_level
346 if (varMap.count("package_log_level")) {
347 const auto& packLogList = varMap["package_log_level"].as<vector<string>>();
348 const std::string delimiter = ":";
349 for (const std::string& packLog : packLogList) {
350 if (packLog.find(delimiter) == std::string::npos) {
351 B2FATAL("In --package_log_level input " << packLog << ", no colon detected. ");
352 break;
353 }
354 /* string parsing for packageName:LOGLEVEL or packageName:DEBUG:LEVEL*/
355 auto packageName = packLog.substr(0, packLog.find(delimiter));
356 std::string logName = packLog.substr(packLog.find(delimiter) + delimiter.length(), packLog.length());
357 int debugLevel = -1;
358 if ((logName.find("DEBUG") != std::string::npos) && logName.length() > 5) {
359 try {
360 debugLevel = std::stoi(logName.substr(logName.find(delimiter) + delimiter.length(), logName.length()));
361 } catch (std::exception& e) {
362 B2WARNING("In --package_log_level, issue parsing debugLevel. Still setting log level to DEBUG.");
363 }
364 logName = "DEBUG";
365 }
366
367 int level = -1;
368 /* determine log level for package */
369 for (int i = LogConfig::c_Debug; i < LogConfig::c_Fatal; i++) {
370 std::string thisLevel = LogConfig::logLevelToString((LogConfig::ELogLevel)i);
371 if (boost::iequals(logName, thisLevel)) { //case-insensitive
372 level = i;
373 break;
374 }
375 }
376 if (level < 0) {
377 B2FATAL("Invalid log level! Needs to be one of DEBUG, INFO, RESULT, WARNING, or ERROR.");
378 }
379 /* set package log level*/
380 if ((logName == "DEBUG") && (debugLevel >= 0)) {
381 LogSystem::Instance().getPackageLogConfig(packageName).setDebugLevel(debugLevel);
382 }
384
385 }
386 }
387
388 // -d
389 if (varMap.count("debug_level")) {
390 unsigned int level = varMap["debug_level"].as<unsigned int>();
393 }
394
395 if (varMap.count("visualize-dataflow")) {
397 if (Environment::Instance().getNumberProcesses() > 0) {
398 B2WARNING("--visualize-dataflow cannot be used with parallel processing, no graphs will be saved!");
399 }
400 }
401
402 if (varMap.count("no-stats")) {
404 }
405
406 if (varMap.count("dry-run")) {
408 }
409
410 if (varMap.count("dump-path")) {
411 Environment::Instance().setPicklePath(varMap["dump-path"].as<string>());
412 }
413
414 if (varMap.count("random-seed")) {
415 RandomNumbers::initialize(varMap["random-seed"].as<string>());
416 }
417
418 if (varMap.count("job-information")) {
419 string jobInfoFile = varMap["job-information"].as<string>();
421 B2INFO("Job information file: " << jobInfoFile);
422 }
423
424 if (varMap.count("realm")) {
425 std::string realmParam = varMap["realm"].as<string>();
426 int realm = -1;
427 for (int i = LogConfig::c_Online; i <= LogConfig::c_Production; i++) {
428 std::string thisRealm = LogConfig::logRealmToString((LogConfig::ELogRealm)i);
429 if (boost::iequals(realmParam, thisRealm)) { //case-insensitive
430 realm = i;
431 break;
432 }
433 }
434 if (realm < 0) {
435 B2FATAL("Invalid realm! Needs to be one of online or production.");
436 }
438 }
439
440 if (varMap.count("secondary-input")) {
441 const auto& names = varMap["secondary-input"].as<vector<string>>();
443 }
444
445 } catch (exception& e) {
446 cerr << "error: " << e.what() << endl;
447 return 1;
448 } catch (...) {
449 cerr << "Exception of unknown type!" << endl;
450 return 1;
451 }
452
453 //---------------------------------------------------
454 // If the python file is set, execute it
455 //---------------------------------------------------
456 PyStatus status;
457 PyConfig config;
458 PyConfig_InitPythonConfig(&config);
459 config.install_signal_handlers = 0;
460 config.safe_path = 0;
461
462 std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
463
464 std::vector<wstring> pyArgvString(arguments.size() + 2);
465
466
467 pyArgvString[0] = L"python3"; //set the executable
468 pyArgvString[1] = converter.from_bytes(pythonFile);
469 for (size_t i = 0; i < arguments.size(); i++) {
470 pyArgvString[i + 2] = converter.from_bytes(arguments[i]);
471 }
472 std::vector<const wchar_t*> pyArgvArray(pyArgvString.size());
473 for (size_t i = 0; i < pyArgvString.size(); ++i) {
474 pyArgvArray[i] = pyArgvString[i].c_str();
475 }
476
477
478 //Pass python filename and additional arguments to python
479 status = PyConfig_SetArgv(&config, pyArgvArray.size(), const_cast<wchar_t**>(pyArgvArray.data()));
480 checkPythonStatus(config, status);
481
482 status = Py_InitializeFromConfig(&config);
483 checkPythonStatus(config, status);
484
485 auto fullPath = std::filesystem::absolute(std::filesystem::path(pythonFile));
486
487 if ((std::filesystem::is_directory(fullPath)) || !(std::filesystem::exists(fullPath))) {
488 B2FATAL("The given filename and/or path is not valid: " + pythonFile);
489 }
490
491 std::ifstream file(fullPath.string().c_str());
492 std::stringstream buffer;
493 buffer << file.rdbuf();
494 Environment::Instance().setSteering(buffer.str());
495 int pyReturnValue = Py_RunMain();
496
497 //Finish Python interpreter
498 PyConfig_Clear(&config);
499 Py_Finalize();
500
501 //basf2.py was loaded, now do module I/O visualization
502 if (!runModuleIOVisualization.empty()) {
504 }
505
506 //--dry-run: print gathered information
507 if (Environment::Instance().getDryRun()) {
509 }
510
511 //Report completion in json metadata
512 MetadataService::Instance().addBasf2Status("finished successfully");
514
515 return pyReturnValue;
516}
static void executeModuleAndCreateIOPlot(const std::string &module)
Create independent I/O graph for a single module (without requiring a steering file).
void setOutputFileOverride(const std::string &name)
Override output file name for modules.
Definition: Environment.h:125
void setProfileModuleName(const std::string &name)
Set the name of a module to be profiled.
Definition: Environment.h:230
void setSteering(const std::string &steering)
Sets the steering file content.
Definition: Environment.h:183
void setNumberEventsOverride(unsigned int nevents)
Override the number of events in run 1 for EventInfoSetter module.
Definition: Environment.h:65
void setRealm(LogConfig::ELogRealm realm)
Set the basf2 execution realm.
Definition: Environment.cc:61
void setRunExperimentOverride(int run, int experiment)
Override run and experiment for EventInfoSetter.
Definition: Environment.h:80
void setNumberProcessesOverride(int nproc)
Override number of processes to run in parallel.
Definition: Environment.h:145
void setLogLevelOverride(int level)
Override global log level if != LogConfig::c_Default.
Definition: Environment.h:236
void printJobInformation() const
Print information on input/output files in current steering file, used by –dry-run.
Definition: Environment.cc:177
void setEntrySequencesOverride(const std::vector< std::string > &sequences)
Override the number sequences (e.g.
Definition: Environment.h:71
static Environment & Instance()
Static method to get a reference to the Environment instance.
Definition: Environment.cc:28
void setSecondaryInputFilesOverride(const std::vector< std::string > &names)
Override secondary input file names for modules.
Definition: Environment.h:119
void setUseZMQ(bool useZMQ)
Set the flag if ZMQ should be used instead of the RingBuffer multiprocessing implementation.
Definition: Environment.h:255
void setPicklePath(const std::string &path)
Sets the path to the file where the pickled path is stored.
Definition: Environment.h:171
void setSkipEventsOverride(unsigned int skipEvents)
Set skipNEvents override.
Definition: Environment.h:88
void setVisualizeDataFlow(bool on)
Whether to generate DOT files with data store inputs/outputs of each module.
Definition: Environment.h:191
void setInputFilesOverride(const std::vector< std::string > &names)
Override input file names for modules.
Definition: Environment.h:113
void setNoStats(bool noStats)
Disable collection of statistics during event processing.
Definition: Environment.h:197
void setDryRun(bool dryRun)
Read steering file, but do not start any actually start any event processing.
Definition: Environment.h:202
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
Definition: FileSystem.cc:151
void setDebugLevel(int debugLevel)
Configure the debug messaging level.
Definition: LogConfig.h:98
static const char * logRealmToString(ELogRealm realm)
Converts a log realm type to a string.
Definition: LogConfig.cc:49
ELogLevel
Definition of the supported log levels.
Definition: LogConfig.h:26
@ c_Debug
Debug: for code development.
Definition: LogConfig.h:26
@ c_Fatal
Fatal: for situations were the program execution can not be continued.
Definition: LogConfig.h:31
ELogRealm
Definition of the supported execution realms.
Definition: LogConfig.h:48
@ c_Online
Online data taking.
Definition: LogConfig.h:49
@ c_Production
Data production jobs.
Definition: LogConfig.h:50
void setLogLevel(ELogLevel logLevel)
Configure the log level.
Definition: LogConfig.cc:25
static const char * logLevelToString(ELogLevel logLevelType)
Converts a log level type to a string.
Definition: LogConfig.cc:42
LogConfig * getLogConfig()
Returns global log system configuration.
Definition: LogSystem.h:78
static LogSystem & Instance()
Static method to get a reference to the LogSystem instance.
Definition: LogSystem.cc:28
LogConfig & getPackageLogConfig(const std::string &package)
Get the log configuration for the package with the given name.
Definition: LogSystem.h:96
void addBasf2Status(const std::string &message="")
Add metadata of basf2 status.
void setJsonFileName(const std::string &fileName)
Set the name of the json metadata file.
static MetadataService & Instance()
Static method to get a reference to the MetadataService instance.
void finishBasf2(bool success=true)
Add metadata for basf2 completion.
static void initialize()
Initialize the random number generator with a unique random seed;.
Abstract base class for different kinds of events.
STL namespace.