Belle II Software  light-2403-persian
basf2.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 /*
10  * There are two ways to work with the framework. Either
11  * by executing "basf2" and providing a python steering
12  * file as an argument or by using the framework within
13  * python itself.
14  *
15  * This file implements the main executable "basf2".
16  */
17 
18 #include <boost/python.hpp> //Has to be the first include (restriction due to python)
19 
20 #include <framework/core/Environment.h>
21 #include <framework/core/DataFlowVisualization.h>
22 #include <framework/core/MetadataService.h>
23 #include <framework/core/Module.h>
24 #include <framework/core/ModuleManager.h>
25 #include <framework/core/RandomNumbers.h>
26 #include <framework/logging/Logger.h>
27 #include <framework/logging/LogConfig.h>
28 #include <framework/logging/LogSystem.h>
29 #include <framework/utilities/FileSystem.h>
30 
31 
32 #include <boost/program_options.hpp>
33 #include <boost/algorithm/string/predicate.hpp> //for iequals()
34 
35 #include <csignal>
36 #include <cstdlib>
37 #include <iostream>
38 #include <algorithm>
39 #include <string>
40 #include <vector>
41 #include <fstream>
42 #include <locale>
43 #include <codecvt>
44 #include <filesystem>
45 
46 #ifdef HAS_CALLGRIND
47 #include <valgrind/valgrind.h>
48 #endif
49 
50 using namespace std;
51 using namespace Belle2;
52 using namespace boost::python;
53 
54 namespace prog = boost::program_options;
55 
56 namespace {
57  void executePythonFile(const string& pythonFile)
58  {
59  // temporarily disable users' rootlogon
60  // FIXME: remove this line when ROOT-10468 is resolved
61  import("ROOT").attr("PyConfig").attr("DisableRootLogon") = true;
62 
63  object main_module = import("__main__");
64  object main_namespace = main_module.attr("__dict__");
65  if (pythonFile.empty()) {
66  // No steering file given, start an interactive ipython session
67  object interactive = import("interactive");
68  main_namespace["__b2shell_config"] = interactive.attr("basf2_shell_config")();
69  exec("import IPython; "
70  " from basf2 import *; "
71  "IPython.embed(config=__b2shell_config, header=f\"Welcome to {basf2label}\"); ",
72  main_namespace, main_namespace);
73  return;
74  }
75  // otherwise execute the steering file
76  auto fullPath = std::filesystem::absolute(std::filesystem::path(pythonFile));
77  if ((!(std::filesystem::is_directory(fullPath))) && (std::filesystem::exists(fullPath))) {
78 
79  std::ifstream file(fullPath.string().c_str());
80  std::stringstream buffer;
81  buffer << file.rdbuf();
82  Environment::Instance().setSteering(buffer.str());
83  exec_file(boost::python::str(fullPath.string()), main_namespace, main_namespace);
84  } else {
85  B2FATAL("The given filename and/or path is not valid: " + pythonFile);
86  }
87  }
88 }
89 
90 int main(int argc, char* argv[])
91 {
92  //remove SIGPIPE handler set by ROOT which sometimes caused infinite loops
93  //See https://savannah.cern.ch/bugs/?97991
94  //default action is to abort
95  if (signal(SIGPIPE, SIG_DFL) == SIG_ERR) {
96  B2FATAL("Cannot remove SIGPIPE signal handler");
97  }
98 
99  //Initialize metadata service
100  MetadataService::Instance();
101 
102  //Check for Belle2 environment variables (during environment initialisation)
103  Environment::Instance();
104 
105  //Get the lib path (checked for NULL in Environment)
106  const char* belle2SubDir = getenv("BELLE2_SUBDIR");
107  std::filesystem::path libPath = "lib";
108  libPath /= belle2SubDir;
109 
110  string runModuleIOVisualization(""); //nothing done if empty
111  vector<string> arguments;
112  string pythonFile;
113 
114  try {
115  //---------------------------------------------------
116  // Handle command line options
117  //---------------------------------------------------
118 
119  prog::options_description generic("Generic options (to be used instead of steering file)");
120  generic.add_options()
121  ("help,h", "Print this help")
122  ("version,v", "Print long and verbose version string")
123  ("version-short", "Print short version string")
124  ("info", "Print information about basf2")
125  ("license", "Print the short version of the basf2 license")
126  ("modules,m", prog::value<string>()->implicit_value(""),
127  "Print a list of all available modules (can be limited to a given package), or give detailed information on a specific module given as an argument (case sensitive).")
128  ;
129 
130  prog::options_description config("Configuration");
131  config.add_options()
132  ("steering", prog::value<string>(), "The python steering file to run.")
133  ("arg", prog::value<vector<string> >(&arguments), "Additional arguments to be passed to the steering file")
134  ("log_level,l", prog::value<string>(),
135  "Set global log level (one of DEBUG, INFO, RESULT, WARNING, or ERROR). Takes precedence over set_log_level() in steering file.")
136  ("package_log_level", prog::value<vector<string> >(),
137  "Set package log level. Can be specified multiple times to use more than one package. (Examples: 'klm:INFO or cdc:DEBUG:10') ")
138  ("random-seed", prog::value<string>(),
139  "Set the default initial seed for the random number generator. "
140  "This does not take precedence over calls to set_random_seed() in the steering file, but just changes the default. "
141  "If no seed is set via either of these mechanisms, the initial seed will be taken from the system's entropy pool.")
142  ("debug_level,d", prog::value<unsigned int>(), "Set default debug level. Also sets the log level to DEBUG.")
143  ("events,n", prog::value<unsigned int>(), "Override number of events for EventInfoSetter; otherwise set maximum number of events.")
144  ("run", prog::value<int>(), "Override run for EventInfoSetter, must be used with -n and --experiment")
145  ("experiment", prog::value<int>(), "Override experiment for EventInfoSetter, must be used with -n and --run")
146  ("skip-events", prog::value<unsigned int>(),
147  "Override skipNEvents for EventInfoSetter and RootInput. Skips this many events before starting.")
148  ("input,i", prog::value<vector<string> >(),
149  "Override name of input file for (Seq)RootInput. Can be specified multiple times to use more than one file. For RootInput, wildcards (as in *.root or [1-3].root) can be used, but need to be escaped with \\ or by quoting the argument to avoid expansion by the shell.")
150  ("sequence,S", prog::value<vector<string> >(),
151  "Override the number sequence (e.g. 23:42,101) defining the entries (starting from 0) which are processed by RootInput."
152  "Must be specified exactly once for each file to be opened."
153  "This means one sequence per input file AFTER wildcard expansion."
154  "The first event has the number 0.")
155  ("output,o", prog::value<string>(),
156  "Override name of output file for (Seq)RootOutput. In case multiple modules are present in the path, only the first will be affected.")
157  ("processes,p", prog::value<int>(), "Override number of worker processes (>=1 enables, 0 disables parallel processing)");
158 
159  prog::options_description advanced("Advanced Options");
160  advanced.add_options()
161  ("module-io", prog::value<string>(),
162  "Create diagram of inputs and outputs for a single module, saved as ModuleName.dot. To create a PostScript file, use e.g. 'dot ModuleName.dot -Tps -o out.ps'.")
163  ("visualize-dataflow", "Generate data flow diagram (dataflow.dot) for the executed steering file.")
164  ("no-stats",
165  "Disable collection of statistics during event processing. Useful for very high-rate applications, but produces empty table with 'print(statistics)'.")
166  ("dry-run",
167  "Read steering file, but do not start any event processing when process(path) is called. Prints information on input/output files that would be used during normal execution.")
168  ("dump-path", prog::value<string>(),
169  "Read steering file, but do not actually start any event processing. The module path the steering file would execute is instead pickled (serialized) into the given file.")
170  ("execute-path", prog::value<string>(),
171  "Do not read any provided steering file, instead execute the pickled (serialized) path from the given file.")
172  ("zmq",
173  "Use ZMQ for multiprocessing instead of a RingBuffer. This has many implications and should only be used by experts.")
174  ("job-information", prog::value<string>(),
175  "Create json file with metadata of output files and basf2 execution status.")
176  ("realm", prog::value<string>(),
177  "Set the realm of the basf2 execution (online or production).")
178  ("secondary-input", prog::value<vector<string>>(),
179  "Override name of input file for the secondary RootInput module used for the event embedding. Can be specified multiple times to use more than one file. Wildcards (as in *.root or [1-3].root) can be used, but need to be escaped with \\ or by quoting the argument to avoid expansion by the shell.")
180 #ifdef HAS_CALLGRIND
181  ("profile", prog::value<string>(),
182  "Name of a module to profile using callgrind. If more than one module of that name is registered only the first one will be profiled.")
183 #endif
184  ;
185 
186  prog::options_description cmdlineOptions;
187  cmdlineOptions.add(generic).add(config).add(advanced);
188 
189  prog::positional_options_description posOptDesc;
190  posOptDesc.add("steering", 1);
191  posOptDesc.add("arg", -1);
192 
193  prog::variables_map varMap;
194  prog::store(prog::command_line_parser(argc, argv).
195  options(cmdlineOptions).positional(posOptDesc).run(), varMap);
196  prog::notify(varMap);
197 
198  //Check for non-steering file options
199  if (varMap.count("help")) {
200  cout << "Usage: " << argv[0] << " [OPTIONS] [STEERING_FILE] [-- [STEERING_FILE_OPTIONS]]\n";
201  cout << cmdlineOptions << endl;
202  return 0;
203  } else if (varMap.count("version")) {
204  pythonFile = "basf2/version.py";
205  } else if (varMap.count("version-short")) {
206  pythonFile = "basf2/version_short.py";
207  } else if (varMap.count("info")) {
208  pythonFile = "basf2_cli/print_info.py";
209  } else if (varMap.count("license")) {
210  pythonFile = "basf2_cli/print_license.py";
211  } else if (varMap.count("modules")) {
212  string modArgs = varMap["modules"].as<string>();
213  if (!modArgs.empty()) {
214  arguments.insert(arguments.begin(), modArgs);
215  }
216  // recent boost program_options will not consume extra tokens for
217  // implicit options. In this case the module/package name gets consumed
218  // in the steering file so we just use that.
219  if (varMap.count("steering")) {
220  arguments.insert(arguments.begin(), varMap["steering"].as<string>());
221  }
222  pythonFile = "basf2_cli/modules.py";
223  } else if (varMap.count("module-io")) {
224  runModuleIOVisualization = varMap["module-io"].as<string>();
225  pythonFile = "basf2/core.py"; //make module maps available, visualization will happen later
226  } else if (varMap.count("execute-path")) {
227  Environment::Instance().setPicklePath(varMap["execute-path"].as<string>());
228  pythonFile = "basf2_cli/execute_pickled_path.py";
229  } else if (varMap.count("steering")) {
230  // steering file not misused as module name, so print it's name :D
231  pythonFile = varMap["steering"].as<string>();
232  }
233 
234  if (!pythonFile.empty()) {
235  //Search in local or central lib/ if this isn't a direct path
236  if (!std::filesystem::exists(pythonFile)) {
237  std::string libFile = FileSystem::findFile((libPath / pythonFile).string(), true);
238  if (!libFile.empty())
239  pythonFile = libFile;
240  }
241  if (varMap.count("steering") and not varMap.count("modules")) {
242  B2INFO("Steering file: " << pythonFile);
243  }
244  }
245 
246  // -p
247  // Do now so that we can override if profiling is requested
248  if (varMap.count("processes")) {
249  int nprocesses = varMap["processes"].as<int>();
250  if (nprocesses < 0) {
251  B2FATAL("Invalid number of processes!");
252  }
253  Environment::Instance().setNumberProcessesOverride(nprocesses);
254  }
255 
256  // --zmq
257  if (varMap.count("zmq")) {
258  Environment::Instance().setUseZMQ(true);
259  }
260 
261 
262 #ifdef HAS_CALLGRIND
263  if (varMap.count("profile")) {
264  string profileModule = varMap["profile"].as<string>();
265  //We want to profile a module so check if we are running under valgrind
266  if (!RUNNING_ON_VALGRIND) {
267  //Apparently not. Ok, let's call ourself using valgrind
268  cout << "Profiling requested, restarting using callgrind" << endl;
269 
270  //Sadly calling processes in C++ is very annoying as we have to
271  //build a command line.
272  vector<char*> cmd;
273  //First we add all valgrind arguments.
274  const vector<string> valgrind_argv {
275  "valgrind", "--tool=callgrind", "--instr-atstart=no", "--trace-children=no",
276  "--callgrind-out-file=callgrind." + profileModule + ".%p",
277  };
278  //As execvp wants non-const char* pointers we have to copy the string contents.
279  cmd.reserve(valgrind_argv.size());
280  for (const auto& arg : valgrind_argv) { cmd.push_back(strdup(arg.c_str())); }
281  //And now we add our own arguments, including the program name.
282  for (int i = 0; i < argc; ++i) { cmd.push_back(argv[i]); }
283  //Finally, execvp wants a nullptr as last argument
284  cmd.push_back(nullptr);
285  //And call this thing. Execvp will not return if successful as the
286  //current process will be replaced so we do not need to care about what
287  //happens if succesful
288  if (execvp(cmd[0], cmd.data()) == -1) {
289  int errsv = errno;
290  perror("Problem calling valgrind");
291  return errsv;
292  }
293  }
294  //Ok, running under valgrind, set module name we want to profile in
295  //environment.
296  Environment::Instance().setProfileModuleName(profileModule);
297  //and make sure there is no multiprocessing when profiling
298  Environment::Instance().setNumberProcessesOverride(0);
299  }
300 #endif
301 
302  // -n
303  if (varMap.count("events")) {
304  unsigned int nevents = varMap["events"].as<unsigned int>();
305  if (nevents == 0 or nevents == std::numeric_limits<unsigned int>::max()) {
306  B2FATAL("Invalid number of events (valid range: 1.." << std::numeric_limits<unsigned int>::max() - 1 << ")!");
307  }
308  Environment::Instance().setNumberEventsOverride(nevents);
309  }
310  // --run & --experiment
311  if (varMap.count("experiment") or varMap.count("run")) {
312  if (!varMap.count("events"))
313  B2FATAL("--experiment and --run must be used with --events/-n!");
314  if (!(varMap.count("run") and varMap.count("experiment")))
315  B2FATAL("Both --experiment and --run must be specified!");
316 
317  int run = varMap["run"].as<int>();
318  int experiment = varMap["experiment"].as<int>();
319  B2ASSERT("run must be >= 0!", run >= 0);
320  B2ASSERT("experiment must be >= 0!", experiment >= 0);
321  Environment::Instance().setRunExperimentOverride(run, experiment);
322  }
323 
324  // --skip-events
325  if (varMap.count("skip-events")) {
326  unsigned int skipevents = varMap["skip-events"].as<unsigned int>();
327  Environment::Instance().setSkipEventsOverride(skipevents);
328  }
329 
330  // -i
331  if (varMap.count("input")) {
332  const auto& names = varMap["input"].as<vector<string>>();
333  Environment::Instance().setInputFilesOverride(names);
334  }
335 
336  // -S
337  if (varMap.count("sequence")) {
338  const auto& sequences = varMap["sequence"].as<vector<string>>();
339  Environment::Instance().setEntrySequencesOverride(sequences);
340  }
341 
342  // -o
343  if (varMap.count("output")) {
344  std::string name = varMap["output"].as<string>();
345  Environment::Instance().setOutputFileOverride(name);
346  }
347 
348  // -l
349  if (varMap.count("log_level")) {
350  std::string levelParam = varMap["log_level"].as<string>();
351  int level = -1;
352  for (int i = LogConfig::c_Debug; i < LogConfig::c_Fatal; i++) {
353  std::string thisLevel = LogConfig::logLevelToString((LogConfig::ELogLevel)i);
354  if (boost::iequals(levelParam, thisLevel)) { //case-insensitive
355  level = i;
356  break;
357  }
358  }
359  if (level < 0) {
360  B2FATAL("Invalid log level! Needs to be one of DEBUG, INFO, RESULT, WARNING, or ERROR.");
361  }
362 
363  //set log level
364  LogSystem::Instance().getLogConfig()->setLogLevel((LogConfig::ELogLevel)level);
365  //and make sure it takes precedence over anything in the steering file
366  Environment::Instance().setLogLevelOverride(level);
367  }
368 
369  // --package_log_level
370  if (varMap.count("package_log_level")) {
371  const auto& packLogList = varMap["package_log_level"].as<vector<string>>();
372  const std::string delimiter = ":";
373  for (const std::string& packLog : packLogList) {
374  if (packLog.find(delimiter) == std::string::npos) {
375  B2FATAL("In --package_log_level input " << packLog << ", no colon detected. ");
376  break;
377  }
378  /* string parsing for packageName:LOGLEVEL or packageName:DEBUG:LEVEL*/
379  auto packageName = packLog.substr(0, packLog.find(delimiter));
380  std::string logName = packLog.substr(packLog.find(delimiter) + delimiter.length(), packLog.length());
381  int debugLevel = -1;
382  if ((logName.find("DEBUG") != std::string::npos) && logName.length() > 5) {
383  try {
384  debugLevel = std::stoi(logName.substr(logName.find(delimiter) + delimiter.length(), logName.length()));
385  } catch (std::exception& e) {
386  B2WARNING("In --package_log_level, issue parsing debugLevel. Still setting log level to DEBUG.");
387  }
388  logName = "DEBUG";
389  }
390 
391  int level = -1;
392  /* determine log level for package */
393  for (int i = LogConfig::c_Debug; i < LogConfig::c_Fatal; i++) {
394  std::string thisLevel = LogConfig::logLevelToString((LogConfig::ELogLevel)i);
395  if (boost::iequals(logName, thisLevel)) { //case-insensitive
396  level = i;
397  break;
398  }
399  }
400  if (level < 0) {
401  B2FATAL("Invalid log level! Needs to be one of DEBUG, INFO, RESULT, WARNING, or ERROR.");
402  }
403  /* set package log level*/
404  if ((logName == "DEBUG") && (debugLevel >= 0)) {
405  LogSystem::Instance().getPackageLogConfig(packageName).setDebugLevel(debugLevel);
406  }
407  LogSystem::Instance().getPackageLogConfig(packageName).setLogLevel((LogConfig::ELogLevel)level);
408 
409  }
410  }
411 
412  // -d
413  if (varMap.count("debug_level")) {
414  unsigned int level = varMap["debug_level"].as<unsigned int>();
415  LogSystem::Instance().getLogConfig()->setDebugLevel(level);
416  LogSystem::Instance().getLogConfig()->setLogLevel(LogConfig::c_Debug);
417  }
418 
419  if (varMap.count("visualize-dataflow")) {
420  Environment::Instance().setVisualizeDataFlow(true);
421  if (Environment::Instance().getNumberProcesses() > 0) {
422  B2WARNING("--visualize-dataflow cannot be used with parallel processing, no graphs will be saved!");
423  }
424  }
425 
426  if (varMap.count("no-stats")) {
427  Environment::Instance().setNoStats(true);
428  }
429 
430  if (varMap.count("dry-run")) {
431  Environment::Instance().setDryRun(true);
432  }
433 
434  if (varMap.count("dump-path")) {
435  Environment::Instance().setPicklePath(varMap["dump-path"].as<string>());
436  }
437 
438  if (varMap.count("random-seed")) {
439  RandomNumbers::initialize(varMap["random-seed"].as<string>());
440  }
441 
442  if (varMap.count("job-information")) {
443  string jobInfoFile = varMap["job-information"].as<string>();
444  MetadataService::Instance().setJsonFileName(jobInfoFile);
445  B2INFO("Job information file: " << jobInfoFile);
446  }
447 
448  if (varMap.count("realm")) {
449  std::string realmParam = varMap["realm"].as<string>();
450  int realm = -1;
451  for (int i = LogConfig::c_Online; i <= LogConfig::c_Production; i++) {
452  std::string thisRealm = LogConfig::logRealmToString((LogConfig::ELogRealm)i);
453  if (boost::iequals(realmParam, thisRealm)) { //case-insensitive
454  realm = i;
455  break;
456  }
457  }
458  if (realm < 0) {
459  B2FATAL("Invalid realm! Needs to be one of online or production.");
460  }
461  Environment::Instance().setRealm((LogConfig::ELogRealm)realm);
462  }
463 
464  if (varMap.count("secondary-input")) {
465  const auto& names = varMap["secondary-input"].as<vector<string>>();
466  Environment::Instance().setSecondaryInputFilesOverride(names);
467  }
468 
469  } catch (exception& e) {
470  cerr << "error: " << e.what() << endl;
471  return 1;
472  } catch (...) {
473  cerr << "Exception of unknown type!" << endl;
474  return 1;
475  }
476 
477  //---------------------------------------------------
478  // If the python file is set, execute it
479  //---------------------------------------------------
480  try {
481  //Init Python interpreter
482  Py_InitializeEx(0);
483 
484  std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
485  std::vector<wstring> pyArgvString(arguments.size() + 1);
486  // Set argument 0 to either script name or the basf2 exectuable
487  if (!pythonFile.empty()) {
488  pyArgvString[0] = converter.from_bytes(pythonFile);
489  } else {
490  pyArgvString[0] = converter.from_bytes(argv[0]);
491  }
492  for (size_t i = 0; i < arguments.size(); i++) {
493  pyArgvString[i + 1] = converter.from_bytes(arguments[i]);
494  }
495  std::vector<const wchar_t*> pyArgvArray(pyArgvString.size());
496  for (size_t i = 0; i < pyArgvString.size(); ++i) {
497  pyArgvArray[i] = pyArgvString[i].c_str();
498  }
499  //Pass python filename and additional arguments to python
500  PySys_SetArgv(pyArgvArray.size(), const_cast<wchar_t**>(pyArgvArray.data()));
501 
502  //Execute Python file
503  executePythonFile(pythonFile);
504 
505  //Finish Python interpreter
506  Py_Finalize();
507 
508  //basf2.py was loaded, now do module I/O visualization
509  if (!runModuleIOVisualization.empty()) {
510  DataFlowVisualization::executeModuleAndCreateIOPlot(runModuleIOVisualization);
511  }
512 
513  //--dry-run: print gathered information
514  if (Environment::Instance().getDryRun()) {
515  Environment::Instance().printJobInformation();
516  }
517 
518  //Report completion in json metadata
519  MetadataService::Instance().addBasf2Status("finished successfully");
520  MetadataService::Instance().finishBasf2();
521  } catch (error_already_set&) {
522  //Apparently an exception occured which wasn't handled. So print the traceback
523  PyErr_Print();
524  //And in rare cases, i.e. when redirecting output, the buffers are not
525  //flushed unless we finalize python. So do it now
526  Py_Finalize();
527  return 1;
528  }
529 
530  return 0;
531 }
ELogLevel
Definition of the supported log levels.
Definition: LogConfig.h:26
ELogRealm
Definition of the supported execution realms.
Definition: LogConfig.h:48
Abstract base class for different kinds of events.
Definition: ClusterUtils.h:24
int main(int argc, char **argv)
Run all tests.
Definition: test_main.cc:91