20#include <framework/core/Environment.h>
21#include <framework/core/DataFlowVisualization.h>
22#include <framework/core/MetadataService.h>
23#include <framework/core/RandomNumbers.h>
24#include <framework/logging/Logger.h>
25#include <framework/logging/LogConfig.h>
26#include <framework/logging/LogSystem.h>
27#include <framework/utilities/FileSystem.h>
30#include <boost/program_options.hpp>
31#include <boost/algorithm/string/predicate.hpp>
43#include <valgrind/valgrind.h>
49namespace prog = boost::program_options;
53 void checkPythonStatus(PyConfig& config, PyStatus& status)
55 if (PyStatus_Exception(status)) {
56 PyConfig_Clear(&config);
57 Py_ExitStatusException(status);
62int main(
int argc,
char* argv[])
71 const char* belle2SubDir = getenv(
"BELLE2_SUBDIR");
72 std::filesystem::path libPath =
"lib";
73 libPath /= belle2SubDir;
75 string runModuleIOVisualization(
"");
76 vector<string> arguments;
84 prog::options_description
generic(
"Generic options (to be used instead of steering file)");
86 (
"help,h",
"Print this help")
87 (
"version,v",
"Print long and verbose version string")
88 (
"version-short",
"Print short version string")
89 (
"info",
"Print information about basf2")
90 (
"license",
"Print the short version of the basf2 license")
91 (
"modules,m", prog::value<string>()->implicit_value(
""),
92 "Print a list of all available modules (can be limited to a given package), or give detailed information on a specific module given as an argument (case sensitive).")
95 prog::options_description config(
"Configuration");
97 (
"steering", prog::value<string>(),
"The python steering file to run.")
98 (
"arg", prog::value<vector<string> >(&arguments),
"Additional arguments to be passed to the steering file")
99 (
"log_level,l", prog::value<string>(),
100 "Set global log level (one of DEBUG, INFO, RESULT, WARNING, or ERROR). Takes precedence over set_log_level() in steering file.")
101 (
"package_log_level", prog::value<vector<string> >(),
102 "Set package log level. Can be specified multiple times to use more than one package. (Examples: 'klm:INFO or cdc:DEBUG:10') ")
103 (
"module_log_level", prog::value<vector<string> >(),
104 "Set module log level. Can be specified multiple times to use more than one package. (Examples: 'EventInfoSetter:INFO or CDCDigitizer:DEBUG:10') ")
105 (
"random-seed", prog::value<string>(),
106 "Set the default initial seed for the random number generator. "
107 "This does not take precedence over calls to set_random_seed() in the steering file, but just changes the default. "
108 "If no seed is set via either of these mechanisms, the initial seed will be taken from the system's entropy pool.")
109 (
"debug_level,d", prog::value<unsigned int>(),
"Set default debug level. Also sets the log level to DEBUG.")
110 (
"events,n", prog::value<unsigned int>(),
"Override number of events for EventInfoSetter; otherwise set maximum number of events.")
111 (
"run", prog::value<int>(),
"Override run for EventInfoSetter, must be used with -n and --experiment")
112 (
"experiment", prog::value<int>(),
"Override experiment for EventInfoSetter, must be used with -n and --run")
113 (
"skip-events", prog::value<unsigned int>(),
114 "Override skipNEvents for EventInfoSetter and RootInput. Skips this many events before starting.")
115 (
"input,i", prog::value<vector<string> >(),
116 "Override name of input file for (Seq)RootInput. Can be specified multiple times to use more than one file. For RootInput, wildcards (as in *.root or [1-3].root) can be used, but need to be escaped with \\ or by quoting the argument to avoid expansion by the shell.")
117 (
"sequence,S", prog::value<vector<string> >(),
118 "Override the number sequence (e.g. 23:42,101) defining the entries (starting from 0) which are processed by RootInput."
119 "Must be specified exactly once for each file to be opened."
120 "This means one sequence per input file AFTER wildcard expansion."
121 "The first event has the number 0.")
122 (
"output,o", prog::value<string>(),
123 "Override name of output file for (Seq)RootOutput. In case multiple modules are present in the path, only the first will be affected.")
124 (
"processes,p", prog::value<int>(),
"Override number of worker processes (>=1 enables, 0 disables parallel processing)");
126 prog::options_description advanced(
"Advanced Options");
127 advanced.add_options()
128 (
"module-io", prog::value<string>(),
129 "Create diagram of inputs and outputs for a single module, saved as ModuleName.dot. To create a PostScript file, use e.g. 'dot ModuleName.dot -Tps -o out.ps'.")
130 (
"visualize-dataflow",
"Generate data flow diagram (dataflow.dot) for the executed steering file.")
132 "Enable collection of statistics during event processing (fills table called via 'print(statistics)). Useful for debugging, but adds extra processing time.")
134 "Read steering file, but do not start any event processing when process(path) is called. Prints information on input/output files that would be used during normal execution.")
135 (
"dump-path", prog::value<string>(),
136 "Read steering file, but do not actually start any event processing. The module path the steering file would execute is instead pickled (serialized) into the given file.")
137 (
"execute-path", prog::value<string>(),
138 "Do not read any provided steering file, instead execute the pickled (serialized) path from the given file.")
140 "Use ZMQ for multiprocessing instead of a RingBuffer. This has many implications and should only be used by experts.")
141 (
"job-information", prog::value<string>(),
142 "Create json file with metadata of output files and basf2 execution status.")
143 (
"realm", prog::value<string>(),
144 "Set the realm of the basf2 execution (online or production).")
145 (
"secondary-input", prog::value<vector<string>>(),
146 "Override name of input file for the secondary RootInput module used for the event embedding. Can be specified multiple times to use more than one file. Wildcards (as in *.root or [1-3].root) can be used, but need to be escaped with \\ or by quoting the argument to avoid expansion by the shell.")
148 (
"profile", prog::value<string>(),
149 "Name of a module to profile using callgrind. If more than one module of that name is registered only the first one will be profiled.")
153 prog::options_description cmdlineOptions;
154 cmdlineOptions.add(generic).add(config).add(advanced);
156 prog::positional_options_description posOptDesc;
157 posOptDesc.add(
"steering", 1);
158 posOptDesc.add(
"arg", -1);
160 prog::variables_map varMap;
161 prog::store(prog::command_line_parser(argc, argv).
162 options(cmdlineOptions).positional(posOptDesc).run(), varMap);
163 prog::notify(varMap);
166 if (varMap.count(
"help")) {
167 cout <<
"Usage: " << argv[0] <<
" [OPTIONS] [STEERING_FILE] [-- [STEERING_FILE_OPTIONS]]\n";
168 cout << cmdlineOptions << endl;
170 }
else if (varMap.count(
"version")) {
171 pythonFile =
"basf2/version.py";
172 }
else if (varMap.count(
"version-short")) {
173 pythonFile =
"basf2/version_short.py";
174 }
else if (varMap.count(
"info")) {
175 pythonFile =
"basf2_cli/print_info.py";
176 }
else if (varMap.count(
"license")) {
177 pythonFile =
"basf2_cli/print_license.py";
178 }
else if (varMap.count(
"modules")) {
179 string modArgs = varMap[
"modules"].as<
string>();
180 if (!modArgs.empty()) {
181 arguments.insert(arguments.begin(), modArgs);
186 if (varMap.count(
"steering")) {
187 arguments.insert(arguments.begin(), varMap[
"steering"].as<
string>());
189 pythonFile =
"basf2_cli/modules.py";
190 }
else if (varMap.count(
"module-io")) {
191 runModuleIOVisualization = varMap[
"module-io"].as<
string>();
192 pythonFile =
"basf2/core.py";
193 }
else if (varMap.count(
"execute-path")) {
195 pythonFile =
"basf2_cli/execute_pickled_path.py";
196 }
else if (varMap.count(
"steering")) {
198 pythonFile = varMap[
"steering"].as<
string>();
201 pythonFile =
"interactive.py";
204 if (!pythonFile.empty()) {
206 if (!std::filesystem::exists(pythonFile)) {
208 if (!libFile.empty())
209 pythonFile = libFile;
211 if (varMap.count(
"steering") and not varMap.count(
"modules")) {
212 B2INFO(
"Steering file: " << pythonFile);
218 if (varMap.count(
"processes")) {
219 int nprocesses = varMap[
"processes"].as<
int>();
220 if (nprocesses < 0) {
221 B2FATAL(
"Invalid number of processes!");
227 if (varMap.count(
"zmq")) {
233 if (varMap.count(
"profile")) {
234 string profileModule = varMap[
"profile"].as<
string>();
236 if (!RUNNING_ON_VALGRIND) {
238 cout <<
"Profiling requested, restarting using callgrind" << endl;
244 const vector<string> valgrind_argv {
245 "valgrind",
"--tool=callgrind",
"--instr-atstart=no",
"--trace-children=no",
246 "--callgrind-out-file=callgrind." + profileModule +
".%p",
249 cmd.reserve(valgrind_argv.size());
250 for (
const auto& arg : valgrind_argv) { cmd.push_back(strdup(arg.c_str())); }
252 for (
int i = 0; i < argc; ++i) { cmd.push_back(argv[i]); }
254 cmd.push_back(
nullptr);
258 if (execvp(cmd[0], cmd.data()) == -1) {
260 perror(
"Problem calling valgrind");
273 if (varMap.count(
"events")) {
274 unsigned int nevents = varMap[
"events"].as<
unsigned int>();
275 if (nevents == 0 or nevents == std::numeric_limits<unsigned int>::max()) {
276 B2FATAL(
"Invalid number of events (valid range: 1.." << std::numeric_limits<unsigned int>::max() - 1 <<
")!");
281 if (varMap.count(
"experiment") or varMap.count(
"run")) {
282 if (!varMap.count(
"events"))
283 B2FATAL(
"--experiment and --run must be used with --events/-n!");
284 if (!(varMap.count(
"run") and varMap.count(
"experiment")))
285 B2FATAL(
"Both --experiment and --run must be specified!");
287 int run = varMap[
"run"].as<
int>();
288 int experiment = varMap[
"experiment"].as<
int>();
289 B2ASSERT(
"run must be >= 0!", run >= 0);
290 B2ASSERT(
"experiment must be >= 0!", experiment >= 0);
295 if (varMap.count(
"skip-events")) {
296 unsigned int skipevents = varMap[
"skip-events"].as<
unsigned int>();
301 if (varMap.count(
"input")) {
302 const auto& names = varMap[
"input"].as<vector<string>>();
307 if (varMap.count(
"sequence")) {
308 const auto& sequences = varMap[
"sequence"].as<vector<string>>();
313 if (varMap.count(
"output")) {
314 std::string name = varMap[
"output"].as<
string>();
319 if (varMap.count(
"log_level")) {
320 std::string levelParam = varMap[
"log_level"].as<
string>();
324 if (boost::iequals(levelParam, thisLevel)) {
330 B2FATAL(
"Invalid log level! Needs to be one of DEBUG, INFO, RESULT, WARNING, or ERROR.");
340 if (varMap.count(
"package_log_level")) {
341 const auto& packLogList = varMap[
"package_log_level"].as<vector<string>>();
342 const std::string delimiter =
":";
343 for (
const std::string& packLog : packLogList) {
344 if (packLog.find(delimiter) == std::string::npos) {
345 B2FATAL(
"In --package_log_level input " << packLog <<
", no colon detected. ");
349 auto packageName = packLog.substr(0, packLog.find(delimiter));
350 std::string logName = packLog.substr(packLog.find(delimiter) + delimiter.length(), packLog.length());
352 if ((logName.find(
"DEBUG") != std::string::npos) && logName.length() > 5) {
354 debugLevel = std::stoi(logName.substr(logName.find(delimiter) + delimiter.length(), logName.length()));
355 }
catch (std::exception& e) {
356 B2WARNING(
"In --package_log_level, issue parsing debugLevel. Still setting log level to DEBUG.");
365 if (boost::iequals(logName, thisLevel)) {
371 B2FATAL(
"Invalid log level! Needs to be one of DEBUG, INFO, RESULT, WARNING, or ERROR.");
374 if ((logName ==
"DEBUG") && (debugLevel >= 0)) {
383 if (varMap.count(
"module_log_level")) {
384 const auto& moduleLogList = varMap[
"module_log_level"].as<vector<string>>();
385 const std::string delimiter =
":";
386 for (
const std::string& moduleLog : moduleLogList) {
387 if (moduleLog.find(delimiter) == std::string::npos) {
388 B2FATAL(
"In --module_log_level input " << moduleLog <<
", no colon detected. ");
392 auto moduleName = moduleLog.substr(0, moduleLog.find(delimiter));
393 std::string moduleLogName = moduleLog.substr(moduleLog.find(delimiter) + delimiter.length(), moduleLog.length());
394 int moduleDebugLevel = -1;
395 if ((moduleLogName.find(
"DEBUG") != std::string::npos) && moduleLogName.length() > 5) {
397 moduleDebugLevel = std::stoi(moduleLogName.substr(moduleLogName.find(delimiter) + delimiter.length(), moduleLogName.length()));
398 }
catch (std::exception& e) {
399 B2WARNING(
"In --module_log_level, issue parsing debugLevel. Still setting log level to DEBUG.");
401 moduleLogName =
"DEBUG";
404 int module_level = -1;
408 if (boost::iequals(moduleLogName, moduleThisLevel)) {
413 if (module_level < 0) {
414 B2FATAL(
"Invalid log level! Needs to be one of DEBUG, INFO, RESULT, WARNING, or ERROR.");
417 if ((moduleLogName ==
"DEBUG") && (moduleDebugLevel >= 0)) {
426 if (varMap.count(
"debug_level")) {
427 unsigned int level = varMap[
"debug_level"].as<
unsigned int>();
432 if (varMap.count(
"visualize-dataflow")) {
435 B2WARNING(
"--visualize-dataflow cannot be used with parallel processing, no graphs will be saved!");
439 if (varMap.count(
"stats")) {
443 if (varMap.count(
"dry-run")) {
447 if (varMap.count(
"dump-path")) {
451 if (varMap.count(
"random-seed")) {
455 if (varMap.count(
"job-information")) {
456 string jobInfoFile = varMap[
"job-information"].as<
string>();
458 B2INFO(
"Job information file: " << jobInfoFile);
461 if (varMap.count(
"realm")) {
462 std::string realmParam = varMap[
"realm"].as<
string>();
466 if (boost::iequals(realmParam, thisRealm)) {
472 B2FATAL(
"Invalid realm! Needs to be one of online or production.");
477 if (varMap.count(
"secondary-input")) {
478 const auto& names = varMap[
"secondary-input"].as<vector<string>>();
482 }
catch (exception& e) {
483 cerr <<
"error: " << e.what() << endl;
486 cerr <<
"Exception of unknown type!" << endl;
495 PyConfig_InitPythonConfig(&config);
496 config.install_signal_handlers = 0;
497 config.safe_path = 0;
499 std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
501 std::vector<wstring> pyArgvString(arguments.size() + 2);
504 pyArgvString[0] = L
"python3";
505 pyArgvString[1] = converter.from_bytes(pythonFile);
506 for (
size_t i = 0; i < arguments.size(); i++) {
507 pyArgvString[i + 2] = converter.from_bytes(arguments[i]);
509 std::vector<const wchar_t*> pyArgvArray(pyArgvString.size());
510 for (
size_t i = 0; i < pyArgvString.size(); ++i) {
511 pyArgvArray[i] = pyArgvString[i].c_str();
516 status = PyConfig_SetArgv(&config, pyArgvArray.size(),
const_cast<wchar_t**
>(pyArgvArray.data()));
517 checkPythonStatus(config, status);
519 status = Py_InitializeFromConfig(&config);
520 checkPythonStatus(config, status);
522 auto fullPath = std::filesystem::absolute(std::filesystem::path(pythonFile));
524 if ((std::filesystem::is_directory(fullPath)) || !(std::filesystem::exists(fullPath))) {
525 B2FATAL(
"The given filename and/or path is not valid: " + pythonFile);
528 std::ifstream file(fullPath.string().c_str());
529 std::stringstream buffer;
530 buffer << file.rdbuf();
532 int pyReturnValue = Py_RunMain();
535 PyConfig_Clear(&config);
539 if (!runModuleIOVisualization.empty()) {
552 return pyReturnValue;
static void executeModuleAndCreateIOPlot(const std::string &module)
Create independent I/O graph for a single module (without requiring a steering file).
void setOutputFileOverride(const std::string &name)
Override output file name for modules.
void setProfileModuleName(const std::string &name)
Set the name of a module to be profiled.
void setSteering(const std::string &steering)
Sets the steering file content.
void setStats(bool stats)
Enable collection of statistics during event processing.
void setNumberEventsOverride(unsigned int nevents)
Override the number of events in run 1 for EventInfoSetter module.
void setRealm(LogConfig::ELogRealm realm)
Set the basf2 execution realm.
void setRunExperimentOverride(int run, int experiment)
Override run and experiment for EventInfoSetter.
void setNumberProcessesOverride(int nproc)
Override number of processes to run in parallel.
void setLogLevelOverride(int level)
Override global log level if != LogConfig::c_Default.
void printJobInformation() const
Print information on input/output files in current steering file, used by –dry-run.
void setEntrySequencesOverride(const std::vector< std::string > &sequences)
Override the number sequences (e.g.
static Environment & Instance()
Static method to get a reference to the Environment instance.
void setSecondaryInputFilesOverride(const std::vector< std::string > &names)
Override secondary input file names for modules.
void setUseZMQ(bool useZMQ)
Set the flag if ZMQ should be used instead of the RingBuffer multiprocessing implementation.
void setPicklePath(const std::string &path)
Sets the path to the file where the pickled path is stored.
void setSkipEventsOverride(unsigned int skipEvents)
Set skipNEvents override.
void setVisualizeDataFlow(bool on)
Whether to generate DOT files with data store inputs/outputs of each module.
void setInputFilesOverride(const std::vector< std::string > &names)
Override input file names for modules.
void setDryRun(bool dryRun)
Read steering file, but do not start any actually start any event processing.
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
void setDebugLevel(int debugLevel)
Configure the debug messaging level.
static const char * logRealmToString(ELogRealm realm)
Converts a log realm type to a string.
ELogLevel
Definition of the supported log levels.
@ c_Debug
Debug: for code development.
@ c_Fatal
Fatal: for situations were the program execution can not be continued.
ELogRealm
Definition of the supported execution realms.
@ c_Online
Online data taking.
@ c_Production
Data production jobs.
void setLogLevel(ELogLevel logLevel)
Configure the log level.
static const char * logLevelToString(ELogLevel logLevelType)
Converts a log level type to a string.
LogConfig * getLogConfig()
Returns global log system configuration.
LogConfig & getModuleLogConfig(const std::string &module)
Get the log configuration for the module with the given name.
static LogSystem & Instance()
Static method to get a reference to the LogSystem instance.
LogConfig & getPackageLogConfig(const std::string &package)
Get the log configuration for the package with the given name.
static void initialize()
Initialize the random number generator with a unique random seed;.
Abstract base class for different kinds of events.