Belle II Software  release-08-01-10
basf2.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 /*
10  * There are two ways to work with the framework. Either
11  * by executing "basf2" and providing a python steering
12  * file as an argument or by using the framework within
13  * python itself.
14  *
15  * This file implements the main executable "basf2".
16  */
17 
18 #include <boost/python.hpp> //Has to be the first include (restriction due to python)
19 
20 #include <framework/core/Environment.h>
21 #include <framework/core/DataFlowVisualization.h>
22 #include <framework/core/RandomNumbers.h>
23 #include <framework/logging/Logger.h>
24 #include <framework/logging/LogConfig.h>
25 #include <framework/logging/LogSystem.h>
26 #include <framework/utilities/FileSystem.h>
27 #include <framework/core/MetadataService.h>
28 
29 #include <boost/program_options.hpp>
30 #include <boost/algorithm/string/predicate.hpp> //for iequals()
31 
32 #include <csignal>
33 #include <cstdlib>
34 #include <iostream>
35 #include <algorithm>
36 #include <string>
37 #include <vector>
38 #include <fstream>
39 #include <locale>
40 #include <codecvt>
41 #include <filesystem>
42 
43 #ifdef HAS_CALLGRIND
44 #include <valgrind/valgrind.h>
45 #endif
46 
47 using namespace std;
48 using namespace Belle2;
49 using namespace boost::python;
50 
51 namespace prog = boost::program_options;
52 
53 namespace {
54  void executePythonFile(const string& pythonFile)
55  {
56  // temporarily disable users' rootlogon
57  // FIXME: remove this line when ROOT-10468 is resolved
58  import("ROOT").attr("PyConfig").attr("DisableRootLogon") = true;
59 
60  object main_module = import("__main__");
61  object main_namespace = main_module.attr("__dict__");
62  if (pythonFile.empty()) {
63  // No steering file given, start an interactive ipython session
64  object interactive = import("interactive");
65  main_namespace["__b2shell_config"] = interactive.attr("basf2_shell_config")();
66  exec("import IPython; "
67  " from basf2 import *; "
68  "IPython.embed(config=__b2shell_config, header=f\"Welcome to {basf2label}\"); ",
69  main_namespace, main_namespace);
70  return;
71  }
72  // otherwise execute the steering file
73  auto fullPath = std::filesystem::absolute(std::filesystem::path(pythonFile));
74  if ((!(std::filesystem::is_directory(fullPath))) && (std::filesystem::exists(fullPath))) {
75 
76  std::ifstream file(fullPath.string().c_str());
77  std::stringstream buffer;
78  buffer << file.rdbuf();
79  Environment::Instance().setSteering(buffer.str());
80  exec_file(boost::python::str(fullPath.string()), main_namespace, main_namespace);
81  } else {
82  B2FATAL("The given filename and/or path is not valid: " + pythonFile);
83  }
84  }
85 }
86 
87 int main(int argc, char* argv[])
88 {
89  //remove SIGPIPE handler set by ROOT which sometimes caused infinite loops
90  //See https://savannah.cern.ch/bugs/?97991
91  //default action is to abort
92  if (signal(SIGPIPE, SIG_DFL) == SIG_ERR) {
93  B2FATAL("Cannot remove SIGPIPE signal handler");
94  }
95 
96  //Initialize metadata service
97  MetadataService::Instance();
98 
99  //Check for Belle2 environment variables (during environment initialisation)
100  Environment::Instance();
101 
102  //Get the lib path (checked for NULL in Environment)
103  const char* belle2SubDir = getenv("BELLE2_SUBDIR");
104  std::filesystem::path libPath = "lib";
105  libPath /= belle2SubDir;
106 
107  string runModuleIOVisualization(""); //nothing done if empty
108  vector<string> arguments;
109  string pythonFile;
110 
111  try {
112  //---------------------------------------------------
113  // Handle command line options
114  //---------------------------------------------------
115 
116  prog::options_description generic("Generic options (to be used instead of steering file)");
117  generic.add_options()
118  ("help,h", "Print this help")
119  ("version,v", "Print long and verbose version string")
120  ("version-short", "Print short version string")
121  ("info", "Print information about basf2")
122  ("license", "Print the short version of the basf2 license")
123  ("modules,m", prog::value<string>()->implicit_value(""),
124  "Print a list of all available modules (can be limited to a given package), or give detailed information on a specific module given as an argument (case sensitive).")
125  ;
126 
127  prog::options_description config("Configuration");
128  config.add_options()
129  ("steering", prog::value<string>(), "The python steering file to run.")
130  ("arg", prog::value<vector<string> >(&arguments), "Additional arguments to be passed to the steering file")
131  ("log_level,l", prog::value<string>(),
132  "Set global log level (one of DEBUG, INFO, RESULT, WARNING, or ERROR). Takes precedence over set_log_level() in steering file.")
133  ("random-seed", prog::value<string>(),
134  "Set the default initial seed for the random number generator. "
135  "This does not take precedence over calls to set_random_seed() in the steering file, but just changes the default. "
136  "If no seed is set via either of these mechanisms, the initial seed will be taken from the system's entropy pool.")
137  ("debug_level,d", prog::value<unsigned int>(), "Set default debug level. Also sets the log level to DEBUG.")
138  ("events,n", prog::value<unsigned int>(), "Override number of events for EventInfoSetter; otherwise set maximum number of events.")
139  ("run", prog::value<int>(), "Override run for EventInfoSetter, must be used with -n and --experiment")
140  ("experiment", prog::value<int>(), "Override experiment for EventInfoSetter, must be used with -n and --run")
141  ("skip-events", prog::value<unsigned int>(),
142  "Override skipNEvents for EventInfoSetter and RootInput. Skips this many events before starting.")
143  ("input,i", prog::value<vector<string> >(),
144  "Override name of input file for (Seq)RootInput. Can be specified multiple times to use more than one file. For RootInput, wildcards (as in *.root or [1-3].root) can be used, but need to be escaped with \\ or by quoting the argument to avoid expansion by the shell.")
145  ("sequence,S", prog::value<vector<string> >(),
146  "Override the number sequence (e.g. 23:42,101) defining the entries (starting from 0) which are processed by RootInput."
147  "Must be specified exactly once for each file to be opened."
148  "This means one sequence per input file AFTER wildcard expansion."
149  "The first event has the number 0.")
150  ("output,o", prog::value<string>(),
151  "Override name of output file for (Seq)RootOutput. In case multiple modules are present in the path, only the first will be affected.")
152  ("processes,p", prog::value<int>(), "Override number of worker processes (>=1 enables, 0 disables parallel processing)");
153 
154  prog::options_description advanced("Advanced Options");
155  advanced.add_options()
156  ("module-io", prog::value<string>(),
157  "Create diagram of inputs and outputs for a single module, saved as ModuleName.dot. To create a PostScript file, use e.g. 'dot ModuleName.dot -Tps -o out.ps'.")
158  ("visualize-dataflow", "Generate data flow diagram (dataflow.dot) for the executed steering file.")
159  ("no-stats",
160  "Disable collection of statistics during event processing. Useful for very high-rate applications, but produces empty table with 'print(statistics)'.")
161  ("dry-run",
162  "Read steering file, but do not start any event processing when process(path) is called. Prints information on input/output files that would be used during normal execution.")
163  ("dump-path", prog::value<string>(),
164  "Read steering file, but do not actually start any event processing. The module path the steering file would execute is instead pickled (serialized) into the given file.")
165  ("execute-path", prog::value<string>(),
166  "Do not read any provided steering file, instead execute the pickled (serialized) path from the given file.")
167  ("zmq",
168  "Use ZMQ for multiprocessing instead of a RingBuffer. This has many implications and should only be used by experts.")
169  ("job-information", prog::value<string>(),
170  "Create json file with metadata of output files and basf2 execution status.")
171  ("realm", prog::value<string>(),
172  "Set the realm of the basf2 execution (online or production).")
173  ("secondary-input", prog::value<vector<string>>(),
174  "Override name of input file for the secondary RootInput module used for the event embedding. Can be specified multiple times to use more than one file. Wildcards (as in *.root or [1-3].root) can be used, but need to be escaped with \\ or by quoting the argument to avoid expansion by the shell.")
175 #ifdef HAS_CALLGRIND
176  ("profile", prog::value<string>(),
177  "Name of a module to profile using callgrind. If more than one module of that name is registered only the first one will be profiled.")
178 #endif
179  ;
180 
181  prog::options_description cmdlineOptions;
182  cmdlineOptions.add(generic).add(config).add(advanced);
183 
184  prog::positional_options_description posOptDesc;
185  posOptDesc.add("steering", 1);
186  posOptDesc.add("arg", -1);
187 
188  prog::variables_map varMap;
189  prog::store(prog::command_line_parser(argc, argv).
190  options(cmdlineOptions).positional(posOptDesc).run(), varMap);
191  prog::notify(varMap);
192 
193  //Check for non-steering file options
194  if (varMap.count("help")) {
195  cout << "Usage: " << argv[0] << " [OPTIONS] [STEERING_FILE] [-- [STEERING_FILE_OPTIONS]]\n";
196  cout << cmdlineOptions << endl;
197  return 0;
198  } else if (varMap.count("version")) {
199  pythonFile = "basf2/version.py";
200  } else if (varMap.count("version-short")) {
201  pythonFile = "basf2/version_short.py";
202  } else if (varMap.count("info")) {
203  pythonFile = "basf2_cli/print_info.py";
204  } else if (varMap.count("license")) {
205  pythonFile = "basf2_cli/print_license.py";
206  } else if (varMap.count("modules")) {
207  string modArgs = varMap["modules"].as<string>();
208  if (!modArgs.empty()) {
209  arguments.insert(arguments.begin(), modArgs);
210  }
211  // recent boost program_options will not consume extra tokens for
212  // implicit options. In this case the module/package name gets consumed
213  // in the steering file so we just use that.
214  if (varMap.count("steering")) {
215  arguments.insert(arguments.begin(), varMap["steering"].as<string>());
216  }
217  pythonFile = "basf2_cli/modules.py";
218  } else if (varMap.count("module-io")) {
219  runModuleIOVisualization = varMap["module-io"].as<string>();
220  pythonFile = "basf2/core.py"; //make module maps available, visualization will happen later
221  } else if (varMap.count("execute-path")) {
222  Environment::Instance().setPicklePath(varMap["execute-path"].as<string>());
223  pythonFile = "basf2_cli/execute_pickled_path.py";
224  } else if (varMap.count("steering")) {
225  // steering file not misused as module name, so print it's name :D
226  pythonFile = varMap["steering"].as<string>();
227  B2INFO("Steering file: " << pythonFile);
228  }
229 
230 
231  // -p
232  // Do now so that we can override if profiling is requested
233  if (varMap.count("processes")) {
234  int nprocesses = varMap["processes"].as<int>();
235  if (nprocesses < 0) {
236  B2FATAL("Invalid number of processes!");
237  }
238  Environment::Instance().setNumberProcessesOverride(nprocesses);
239  }
240 
241  // --zmq
242  if (varMap.count("zmq")) {
243  Environment::Instance().setUseZMQ(true);
244  }
245 
246 
247 #ifdef HAS_CALLGRIND
248  if (varMap.count("profile")) {
249  string profileModule = varMap["profile"].as<string>();
250  //We want to profile a module so check if we are running under valgrind
251  if (!RUNNING_ON_VALGRIND) {
252  //Apparently not. Ok, let's call ourself using valgrind
253  cout << "Profiling requested, restarting using callgrind" << endl;
254 
255  //Sadly calling processes in C++ is very annoying as we have to
256  //build a command line.
257  vector<char*> cmd;
258  //First we add all valgrind arguments.
259  const vector<string> valgrind_argv {
260  "valgrind", "--tool=callgrind", "--instr-atstart=no", "--trace-children=no",
261  "--callgrind-out-file=callgrind." + profileModule + ".%p",
262  };
263  //As execvp wants non-const char* pointers we have to copy the string contents.
264  cmd.reserve(valgrind_argv.size());
265  for (const auto& arg : valgrind_argv) { cmd.push_back(strdup(arg.c_str())); }
266  //And now we add our own arguments, including the program name.
267  for (int i = 0; i < argc; ++i) { cmd.push_back(argv[i]); }
268  //Finally, execvp wants a nullptr as last argument
269  cmd.push_back(nullptr);
270  //And call this thing. Execvp will not return if successful as the
271  //current process will be replaced so we do not need to care about what
272  //happens if succesful
273  if (execvp(cmd[0], cmd.data()) == -1) {
274  int errsv = errno;
275  perror("Problem calling valgrind");
276  return errsv;
277  }
278  }
279  //Ok, running under valgrind, set module name we want to profile in
280  //environment.
281  Environment::Instance().setProfileModuleName(profileModule);
282  //and make sure there is no multiprocessing when profiling
283  Environment::Instance().setNumberProcessesOverride(0);
284  }
285 #endif
286 
287  // -n
288  if (varMap.count("events")) {
289  unsigned int nevents = varMap["events"].as<unsigned int>();
290  if (nevents == 0 or nevents == std::numeric_limits<unsigned int>::max()) {
291  B2FATAL("Invalid number of events (valid range: 1.." << std::numeric_limits<unsigned int>::max() - 1 << ")!");
292  }
293  Environment::Instance().setNumberEventsOverride(nevents);
294  }
295  // --run & --experiment
296  if (varMap.count("experiment") or varMap.count("run")) {
297  if (!varMap.count("events"))
298  B2FATAL("--experiment and --run must be used with --events/-n!");
299  if (!(varMap.count("run") and varMap.count("experiment")))
300  B2FATAL("Both --experiment and --run must be specified!");
301 
302  int run = varMap["run"].as<int>();
303  int experiment = varMap["experiment"].as<int>();
304  B2ASSERT("run must be >= 0!", run >= 0);
305  B2ASSERT("experiment must be >= 0!", experiment >= 0);
306  Environment::Instance().setRunExperimentOverride(run, experiment);
307  }
308 
309  // --skip-events
310  if (varMap.count("skip-events")) {
311  unsigned int skipevents = varMap["skip-events"].as<unsigned int>();
312  Environment::Instance().setSkipEventsOverride(skipevents);
313  }
314 
315  // -i
316  if (varMap.count("input")) {
317  const auto& names = varMap["input"].as<vector<string>>();
318  Environment::Instance().setInputFilesOverride(names);
319  }
320 
321  // -S
322  if (varMap.count("sequence")) {
323  const auto& sequences = varMap["sequence"].as<vector<string>>();
324  Environment::Instance().setEntrySequencesOverride(sequences);
325  }
326 
327  // -o
328  if (varMap.count("output")) {
329  std::string name = varMap["output"].as<string>();
330  Environment::Instance().setOutputFileOverride(name);
331  }
332 
333  // -l
334  if (varMap.count("log_level")) {
335  std::string levelParam = varMap["log_level"].as<string>();
336  int level = -1;
337  for (int i = LogConfig::c_Debug; i < LogConfig::c_Fatal; i++) {
338  std::string thisLevel = LogConfig::logLevelToString((LogConfig::ELogLevel)i);
339  if (boost::iequals(levelParam, thisLevel)) { //case-insensitive
340  level = i;
341  break;
342  }
343  }
344  if (level < 0) {
345  B2FATAL("Invalid log level! Needs to be one of DEBUG, INFO, RESULT, WARNING, or ERROR.");
346  }
347 
348  //set log level
349  LogSystem::Instance().getLogConfig()->setLogLevel((LogConfig::ELogLevel)level);
350  //and make sure it takes precedence overy anything in the steeering file
351  Environment::Instance().setLogLevelOverride(level);
352  }
353 
354  // -d
355  if (varMap.count("debug_level")) {
356  unsigned int level = varMap["debug_level"].as<unsigned int>();
357  LogSystem::Instance().getLogConfig()->setDebugLevel(level);
358  LogSystem::Instance().getLogConfig()->setLogLevel(LogConfig::c_Debug);
359  }
360 
361  if (varMap.count("visualize-dataflow")) {
362  Environment::Instance().setVisualizeDataFlow(true);
363  if (Environment::Instance().getNumberProcesses() > 0) {
364  B2WARNING("--visualize-dataflow cannot be used with parallel processing, no graphs will be saved!");
365  }
366  }
367 
368  if (varMap.count("no-stats")) {
369  Environment::Instance().setNoStats(true);
370  }
371 
372  if (varMap.count("dry-run")) {
373  Environment::Instance().setDryRun(true);
374  }
375 
376  if (varMap.count("dump-path")) {
377  Environment::Instance().setPicklePath(varMap["dump-path"].as<string>());
378  }
379 
380  if (varMap.count("random-seed")) {
381  RandomNumbers::initialize(varMap["random-seed"].as<string>());
382  }
383 
384  if (varMap.count("job-information")) {
385  string jobInfoFile = varMap["job-information"].as<string>();
386  MetadataService::Instance().setJsonFileName(jobInfoFile);
387  B2INFO("Job information file: " << jobInfoFile);
388  }
389 
390  if (varMap.count("realm")) {
391  std::string realmParam = varMap["realm"].as<string>();
392  int realm = -1;
393  for (int i = LogConfig::c_Online; i <= LogConfig::c_Production; i++) {
394  std::string thisRealm = LogConfig::logRealmToString((LogConfig::ELogRealm)i);
395  if (boost::iequals(realmParam, thisRealm)) { //case-insensitive
396  realm = i;
397  break;
398  }
399  }
400  if (realm < 0) {
401  B2FATAL("Invalid realm! Needs to be one of online or production.");
402  }
403  Environment::Instance().setRealm((LogConfig::ELogRealm)realm);
404  }
405 
406  if (varMap.count("secondary-input")) {
407  const auto& names = varMap["secondary-input"].as<vector<string>>();
408  Environment::Instance().setSecondaryInputFilesOverride(names);
409  }
410 
411  } catch (exception& e) {
412  cerr << "error: " << e.what() << endl;
413  return 1;
414  } catch (...) {
415  cerr << "Exception of unknown type!" << endl;
416  return 1;
417  }
418 
419  //---------------------------------------------------
420  // If the python file is set, execute it
421  //---------------------------------------------------
422  if (!pythonFile.empty()) {
423  //Search in local or central lib/ if this isn't a direct path
424  if (!std::filesystem::exists(pythonFile)) {
425  std::string libFile = FileSystem::findFile((libPath / pythonFile).string(), true);
426  if (!libFile.empty())
427  pythonFile = libFile;
428  }
429  }
430 
431  try {
432  //Init Python interpreter
433  Py_InitializeEx(0);
434 
435  std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
436  std::vector<wstring> pyArgvString(arguments.size() + 1);
437  // Set argument 0 to either script name or the basf2 exectuable
438  if (!pythonFile.empty()) {
439  pyArgvString[0] = converter.from_bytes(pythonFile);
440  } else {
441  pyArgvString[0] = converter.from_bytes(argv[0]);
442  }
443  for (size_t i = 0; i < arguments.size(); i++) {
444  pyArgvString[i + 1] = converter.from_bytes(arguments[i]);
445  }
446  std::vector<const wchar_t*> pyArgvArray(pyArgvString.size());
447  for (size_t i = 0; i < pyArgvString.size(); ++i) {
448  pyArgvArray[i] = pyArgvString[i].c_str();
449  }
450  //Pass python filename and additional arguments to python
451  PySys_SetArgv(pyArgvArray.size(), const_cast<wchar_t**>(pyArgvArray.data()));
452 
453  //Execute Python file
454  executePythonFile(pythonFile);
455 
456  //Finish Python interpreter
457  Py_Finalize();
458 
459  //basf2.py was loaded, now do module I/O visualization
460  if (!runModuleIOVisualization.empty()) {
461  DataFlowVisualization::executeModuleAndCreateIOPlot(runModuleIOVisualization);
462  }
463 
464  //--dry-run: print gathered information
465  if (Environment::Instance().getDryRun()) {
466  Environment::Instance().printJobInformation();
467  }
468 
469  //Report completion in json metadata
470  MetadataService::Instance().addBasf2Status("finished successfully");
471  MetadataService::Instance().finishBasf2();
472  } catch (error_already_set&) {
473  //Apparently an exception occured which wasn't handled. So print the traceback
474  PyErr_Print();
475  //And in rare cases, i.e. when redirecting output, the buffers are not
476  //flushed unless we finalize python. So do it now
477  Py_Finalize();
478  return 1;
479  }
480 
481  return 0;
482 }
ELogLevel
Definition of the supported log levels.
Definition: LogConfig.h:26
ELogRealm
Definition of the supported execution realms.
Definition: LogConfig.h:48
Abstract base class for different kinds of events.
int main(int argc, char **argv)
Run all tests.
Definition: test_main.cc:91