Belle II Software  release-06-01-15
basf2.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 /*
10  * There are two ways to work with the framework. Either
11  * by executing "basf2" and providing a python steering
12  * file as an argument or by using the framework within
13  * python itself.
14  *
15  * This file implements the main executable "basf2".
16  */
17 
18 #include <boost/python.hpp> //Has to be the first include (restriction due to python)
19 
20 #include <framework/core/Environment.h>
21 #include <framework/core/DataFlowVisualization.h>
22 #include <framework/core/RandomNumbers.h>
23 #include <framework/logging/Logger.h>
24 #include <framework/logging/LogConfig.h>
25 #include <framework/logging/LogSystem.h>
26 #include <framework/utilities/FileSystem.h>
27 #include <framework/core/MetadataService.h>
28 
29 #include <boost/program_options.hpp>
30 #include <boost/filesystem.hpp>
31 #include <boost/algorithm/string/predicate.hpp> //for iequals()
32 
33 #include <csignal>
34 #include <cstdlib>
35 #include <iostream>
36 #include <algorithm>
37 #include <string>
38 #include <vector>
39 #include <fstream>
40 #include <locale>
41 #include <codecvt>
42 
43 #ifdef HAS_CALLGRIND
44 #include <valgrind/valgrind.h>
45 #endif
46 
47 using namespace std;
48 using namespace Belle2;
49 using namespace boost::python;
50 
51 namespace prog = boost::program_options;
52 
53 namespace {
54  void executePythonFile(const string& pythonFile)
55  {
56  // temporarily disable users' rootlogon
57  // FIXME: remove this line when ROOT-10468 is resolved
58  import("ROOT").attr("PyConfig").attr("DisableRootLogon") = true;
59 
60  object main_module = import("__main__");
61  object main_namespace = main_module.attr("__dict__");
62  if (pythonFile.empty()) {
63  // No steering file given, start an interactive ipython session
64  object interactive = import("interactive");
65  main_namespace["__b2shell_config"] = interactive.attr("basf2_shell_config")();
66  exec("import IPython; "
67  " from basf2 import *; "
68  "IPython.embed(config=__b2shell_config, header=f\"Welcome to {basf2label}\"); ",
69  main_namespace, main_namespace);
70  return;
71  }
72  // otherwise execute the steering file
73  auto fullPath = boost::filesystem::system_complete(boost::filesystem::path(pythonFile));
74  if ((!(boost::filesystem::is_directory(fullPath))) && (boost::filesystem::exists(fullPath))) {
75 
76  std::ifstream file(fullPath.string().c_str());
77  std::stringstream buffer;
78  buffer << file.rdbuf();
79  Environment::Instance().setSteering(buffer.str());
80  exec_file(boost::python::str(fullPath.string()), main_namespace, main_namespace);
81  } else {
82  B2FATAL("The given filename and/or path is not valid: " + pythonFile);
83  }
84  }
85 }
86 
87 int main(int argc, char* argv[])
88 {
89  //remove SIGPIPE handler set by ROOT which sometimes caused infinite loops
90  //See https://savannah.cern.ch/bugs/?97991
91  //default action is to abort
92  if (signal(SIGPIPE, SIG_DFL) == SIG_ERR) {
93  B2FATAL("Cannot remove SIGPIPE signal handler");
94  }
95 
96  //Initialize metadata service
97  MetadataService::Instance();
98 
99  //Check for Belle2 environment variables (during environment initialisation)
100  Environment::Instance();
101 
102  //Get the lib path (checked for NULL in Environment)
103  const char* belle2SubDir = getenv("BELLE2_SUBDIR");
104  boost::filesystem::path libPath = "lib";
105  libPath /= belle2SubDir;
106 
107  string runModuleIOVisualization(""); //nothing done if empty
108  vector<string> arguments;
109  string pythonFile;
110 
111  try {
112  //---------------------------------------------------
113  // Handle command line options
114  //---------------------------------------------------
115 
116  prog::options_description generic("Generic options (to be used instead of steering file)");
117  generic.add_options()
118  ("help,h", "Print this help")
119  ("version,v", "Print long and verbose version string")
120  ("version-short", "Print short version string")
121  ("info", "Print information about basf2")
122  ("license", "Print the short version of the basf2 license")
123  ("modules,m", prog::value<string>()->implicit_value(""),
124  "Print a list of all available modules (can be limited to a given package), or give detailed information on a specific module given as an argument (case sensitive).")
125  ;
126 
127  prog::options_description config("Configuration");
128  config.add_options()
129  ("steering", prog::value<string>(), "The python steering file to run.")
130  ("arg", prog::value<vector<string> >(&arguments), "Additional arguments to be passed to the steering file")
131  ("log_level,l", prog::value<string>(),
132  "Set global log level (one of DEBUG, INFO, RESULT, WARNING, or ERROR). Takes precedence over set_log_level() in steering file.")
133  ("random-seed", prog::value<string>(),
134  "Set the default initial seed for the random number generator. "
135  "This does not take precedence over calls to set_random_seed() in the steering file, but just changes the default. "
136  "If no seed is set via either of these mechanisms, the initial seed will be taken from the system's entropy pool.")
137  ("debug_level,d", prog::value<unsigned int>(), "Set default debug level. Also sets the log level to DEBUG.")
138  ("events,n", prog::value<unsigned int>(), "Override number of events for EventInfoSetter; otherwise set maximum number of events.")
139  ("run", prog::value<int>(), "Override run for EventInfoSetter, must be used with -n and --experiment")
140  ("experiment", prog::value<int>(), "Override experiment for EventInfoSetter, must be used with -n and --run")
141  ("skip-events", prog::value<unsigned int>(),
142  "Override skipNEvents for EventInfoSetter and RootInput. Skips this many events before starting.")
143  ("input,i", prog::value<vector<string> >(),
144  "Override name of input file for (Seq)RootInput. Can be specified multiple times to use more than one file. For RootInput, wildcards (as in *.root or [1-3].root) can be used, but need to be escaped with \\ or by quoting the argument to avoid expansion by the shell.")
145  ("sequence,S", prog::value<vector<string> >(),
146  "Override the number sequence (e.g. 23:42,101) defining the entries (starting from 0) which are processed by RootInput."
147  "Must be specified exactly once for each file to be opened."
148  "This means one sequence per input file AFTER wildcard expansion."
149  "The first event has the number 0.")
150  ("output,o", prog::value<string>(),
151  "Override name of output file for (Seq)RootOutput. In case multiple modules are present in the path, only the first will be affected.")
152  ("processes,p", prog::value<int>(), "Override number of worker processes (>=1 enables, 0 disables parallel processing)");
153 
154  prog::options_description advanced("Advanced Options");
155  advanced.add_options()
156  ("module-io", prog::value<string>(),
157  "Create diagram of inputs and outputs for a single module, saved as ModuleName.dot. To create a PostScript file, use e.g. 'dot ModuleName.dot -Tps -o out.ps'.")
158  ("visualize-dataflow", "Generate data flow diagram (dataflow.dot) for the executed steering file.")
159  ("no-stats",
160  "Disable collection of statistics during event processing. Useful for very high-rate applications, but produces empty table with 'print(statistics)'.")
161  ("dry-run",
162  "Read steering file, but do not start any event processing when process(path) is called. Prints information on input/output files that would be used during normal execution.")
163  ("dump-path", prog::value<string>(),
164  "Read steering file, but do not actually start any event processing. The module path the steering file would execute is instead pickled (serialized) into the given file.")
165  ("execute-path", prog::value<string>(),
166  "Do not read any provided steering file, instead execute the pickled (serialized) path from the given file.")
167  ("zmq",
168  "Use ZMQ for multiprocessing instead of a RingBuffer. This has many implications and should only be used by experts.")
169  ("job-information", prog::value<string>(),
170  "Create json file with metadata of output files and basf2 execution status.")
171  ("realm", prog::value<string>(),
172  "Set the realm of the basf2 execution (online or production).")
173 #ifdef HAS_CALLGRIND
174  ("profile", prog::value<string>(),
175  "Name of a module to profile using callgrind. If more than one module of that name is registered only the first one will be profiled.")
176 #endif
177  ;
178 
179  prog::options_description cmdlineOptions;
180  cmdlineOptions.add(generic).add(config).add(advanced);
181 
182  prog::positional_options_description posOptDesc;
183  posOptDesc.add("steering", 1);
184  posOptDesc.add("arg", -1);
185 
186  prog::variables_map varMap;
187  prog::store(prog::command_line_parser(argc, argv).
188  options(cmdlineOptions).positional(posOptDesc).run(), varMap);
189  prog::notify(varMap);
190 
191  //Check for non-steering file options
192  if (varMap.count("help")) {
193  cout << "Usage: " << argv[0] << " [OPTIONS] [STEERING_FILE] [-- [STEERING_FILE_OPTIONS]]\n";
194  cout << cmdlineOptions << endl;
195  return 0;
196  } else if (varMap.count("version")) {
197  pythonFile = "basf2/version.py";
198  } else if (varMap.count("version-short")) {
199  pythonFile = "basf2/version_short.py";
200  } else if (varMap.count("info")) {
201  pythonFile = "basf2_cli/print_info.py";
202  } else if (varMap.count("license")) {
203  pythonFile = "basf2_cli/print_license.py";
204  } else if (varMap.count("modules")) {
205  string modArgs = varMap["modules"].as<string>();
206  if (!modArgs.empty()) {
207  arguments.insert(arguments.begin(), modArgs);
208  }
209  // recent boost program_options will not consume extra tokens for
210  // implicit options. In this case the module/package name gets consumed
211  // in the steering file so we just use that.
212  if (varMap.count("steering")) {
213  arguments.insert(arguments.begin(), varMap["steering"].as<string>());
214  }
215  pythonFile = "basf2_cli/modules.py";
216  } else if (varMap.count("module-io")) {
217  runModuleIOVisualization = varMap["module-io"].as<string>();
218  pythonFile = "basf2/core.py"; //make module maps available, visualization will happen later
219  } else if (varMap.count("execute-path")) {
220  Environment::Instance().setPicklePath(varMap["execute-path"].as<string>());
221  pythonFile = "basf2_cli/execute_pickled_path.py";
222  } else if (varMap.count("steering")) {
223  // steering file not misused as module name, so print it's name :D
224  pythonFile = varMap["steering"].as<string>();
225  B2INFO("Steering file: " << pythonFile);
226  }
227 
228 
229  // -p
230  // Do now so that we can override if profiling is requested
231  if (varMap.count("processes")) {
232  int nprocesses = varMap["processes"].as<int>();
233  if (nprocesses < 0) {
234  B2FATAL("Invalid number of processes!");
235  }
236  Environment::Instance().setNumberProcessesOverride(nprocesses);
237  }
238 
239  // --zmq
240  if (varMap.count("zmq")) {
241  Environment::Instance().setUseZMQ(true);
242  }
243 
244 
245 #ifdef HAS_CALLGRIND
246  if (varMap.count("profile")) {
247  string profileModule = varMap["profile"].as<string>();
248  //We want to profile a module so check if we are running under valgrind
249  if (!RUNNING_ON_VALGRIND) {
250  //Apparently not. Ok, let's call ourself using valgrind
251  cout << "Profiling requested, restarting using callgrind" << endl;
252 
253  //Sadly calling processes in C++ is very annoying as we have to
254  //build a command line.
255  vector<char*> cmd;
256  //First we add all valgrind arguments.
257  const vector<string> valgrind_argv {
258  "valgrind", "--tool=callgrind", "--instr-atstart=no", "--trace-children=no",
259  "--callgrind-out-file=callgrind." + profileModule + ".%p",
260  };
261  //As execvp wants non-const char* pointers we have to copy the string contents.
262  cmd.reserve(valgrind_argv.size());
263  for (const auto& arg : valgrind_argv) { cmd.push_back(strdup(arg.c_str())); }
264  //And now we add our own arguments, including the program name.
265  for (int i = 0; i < argc; ++i) { cmd.push_back(argv[i]); }
266  //Finally, execvp wants a nullptr as last argument
267  cmd.push_back(nullptr);
268  //And call this thing. Execvp will not return if successful as the
269  //current process will be replaced so we do not need to care about what
270  //happens if succesful
271  if (execvp(cmd[0], cmd.data()) == -1) {
272  int errsv = errno;
273  perror("Problem calling valgrind");
274  return errsv;
275  }
276  }
277  //Ok, running under valgrind, set module name we want to profile in
278  //environment.
279  Environment::Instance().setProfileModuleName(profileModule);
280  //and make sure there is no multiprocessing when profiling
281  Environment::Instance().setNumberProcessesOverride(0);
282  }
283 #endif
284 
285  // -n
286  if (varMap.count("events")) {
287  unsigned int nevents = varMap["events"].as<unsigned int>();
288  if (nevents == 0 or nevents == std::numeric_limits<unsigned int>::max()) {
289  B2FATAL("Invalid number of events (valid range: 1.." << std::numeric_limits<unsigned int>::max() - 1 << ")!");
290  }
291  Environment::Instance().setNumberEventsOverride(nevents);
292  }
293  // --run & --experiment
294  if (varMap.count("experiment") or varMap.count("run")) {
295  if (!varMap.count("events"))
296  B2FATAL("--experiment and --run must be used with --events/-n!");
297  if (!(varMap.count("run") and varMap.count("experiment")))
298  B2FATAL("Both --experiment and --run must be specified!");
299 
300  int run = varMap["run"].as<int>();
301  int experiment = varMap["experiment"].as<int>();
302  B2ASSERT("run must be >= 0!", run >= 0);
303  B2ASSERT("experiment must be >= 0!", experiment >= 0);
304  Environment::Instance().setRunExperimentOverride(run, experiment);
305  }
306 
307  // --skip-events
308  if (varMap.count("skip-events")) {
309  unsigned int skipevents = varMap["skip-events"].as<unsigned int>();
310  Environment::Instance().setSkipEventsOverride(skipevents);
311  }
312 
313  // -i
314  if (varMap.count("input")) {
315  const auto& names = varMap["input"].as<vector<string>>();
316  Environment::Instance().setInputFilesOverride(names);
317  }
318 
319  // -S
320  if (varMap.count("sequence")) {
321  const auto& sequences = varMap["sequence"].as<vector<string>>();
322  Environment::Instance().setEntrySequencesOverride(sequences);
323  }
324 
325  // -o
326  if (varMap.count("output")) {
327  std::string name = varMap["output"].as<string>();
328  Environment::Instance().setOutputFileOverride(name);
329  }
330 
331  // -l
332  if (varMap.count("log_level")) {
333  std::string levelParam = varMap["log_level"].as<string>();
334  int level = -1;
335  for (int i = LogConfig::c_Debug; i < LogConfig::c_Fatal; i++) {
336  std::string thisLevel = LogConfig::logLevelToString((LogConfig::ELogLevel)i);
337  if (boost::iequals(levelParam, thisLevel)) { //case-insensitive
338  level = i;
339  break;
340  }
341  }
342  if (level < 0) {
343  B2FATAL("Invalid log level! Needs to be one of DEBUG, INFO, RESULT, WARNING, or ERROR.");
344  }
345 
346  //set log level
347  LogSystem::Instance().getLogConfig()->setLogLevel((LogConfig::ELogLevel)level);
348  //and make sure it takes precedence overy anything in the steeering file
349  Environment::Instance().setLogLevelOverride(level);
350  }
351 
352  // -d
353  if (varMap.count("debug_level")) {
354  unsigned int level = varMap["debug_level"].as<unsigned int>();
355  LogSystem::Instance().getLogConfig()->setDebugLevel(level);
356  LogSystem::Instance().getLogConfig()->setLogLevel(LogConfig::c_Debug);
357  }
358 
359  if (varMap.count("visualize-dataflow")) {
360  Environment::Instance().setVisualizeDataFlow(true);
361  if (Environment::Instance().getNumberProcesses() > 0) {
362  B2WARNING("--visualize-dataflow cannot be used with parallel processing, no graphs will be saved!");
363  }
364  }
365 
366  if (varMap.count("no-stats")) {
367  Environment::Instance().setNoStats(true);
368  }
369 
370  if (varMap.count("dry-run")) {
371  Environment::Instance().setDryRun(true);
372  }
373 
374  if (varMap.count("dump-path")) {
375  Environment::Instance().setPicklePath(varMap["dump-path"].as<string>());
376  }
377 
378  if (varMap.count("random-seed")) {
379  RandomNumbers::initialize(varMap["random-seed"].as<string>());
380  }
381 
382  if (varMap.count("job-information")) {
383  string jobInfoFile = varMap["job-information"].as<string>();
384  MetadataService::Instance().setJsonFileName(jobInfoFile);
385  B2INFO("Job information file: " << jobInfoFile);
386  }
387 
388  if (varMap.count("realm")) {
389  std::string realmParam = varMap["realm"].as<string>();
390  int realm = -1;
391  for (int i = LogConfig::c_Online; i <= LogConfig::c_Production; i++) {
392  std::string thisRealm = LogConfig::logRealmToString((LogConfig::ELogRealm)i);
393  if (boost::iequals(realmParam, thisRealm)) { //case-insensitive
394  realm = i;
395  break;
396  }
397  }
398  if (realm < 0) {
399  B2FATAL("Invalid realm! Needs to be one of online or production.");
400  }
401  Environment::Instance().setRealm((LogConfig::ELogRealm)realm);
402  }
403 
404 
405  } catch (exception& e) {
406  cerr << "error: " << e.what() << endl;
407  return 1;
408  } catch (...) {
409  cerr << "Exception of unknown type!" << endl;
410  return 1;
411  }
412 
413  //---------------------------------------------------
414  // If the python file is set, execute it
415  //---------------------------------------------------
416  if (!pythonFile.empty()) {
417  //Search in local or central lib/ if this isn't a direct path
418  if (!boost::filesystem::exists(pythonFile)) {
419  std::string libFile = FileSystem::findFile((libPath / pythonFile).string(), true);
420  if (!libFile.empty())
421  pythonFile = libFile;
422  }
423  }
424 
425  try {
426  //Init Python interpreter
427  Py_InitializeEx(0);
428 
429  std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
430  std::vector<wstring> pyArgvString(arguments.size() + 1);
431  // Set argument 0 to either script name or the basf2 exectuable
432  if (!pythonFile.empty()) {
433  pyArgvString[0] = converter.from_bytes(pythonFile);
434  } else {
435  pyArgvString[0] = converter.from_bytes(argv[0]);
436  }
437  for (size_t i = 0; i < arguments.size(); i++) {
438  pyArgvString[i + 1] = converter.from_bytes(arguments[i]);
439  }
440  std::vector<const wchar_t*> pyArgvArray(pyArgvString.size());
441  for (size_t i = 0; i < pyArgvString.size(); ++i) {
442  pyArgvArray[i] = pyArgvString[i].c_str();
443  }
444  //Pass python filename and additional arguments to python
445  PySys_SetArgv(pyArgvArray.size(), const_cast<wchar_t**>(pyArgvArray.data()));
446 
447  //Execute Python file
448  executePythonFile(pythonFile);
449 
450  //Finish Python interpreter
451  Py_Finalize();
452 
453  //basf2.py was loaded, now do module I/O visualization
454  if (!runModuleIOVisualization.empty()) {
455  DataFlowVisualization::executeModuleAndCreateIOPlot(runModuleIOVisualization);
456  }
457 
458  //--dry-run: print gathered information
459  if (Environment::Instance().getDryRun()) {
460  Environment::Instance().printJobInformation();
461  }
462 
463  //Report completion in json metadata
464  MetadataService::Instance().addBasf2Status("finished successfully");
465  MetadataService::Instance().finishBasf2();
466  } catch (error_already_set&) {
467  //Apparently an exception occured which wasn't handled. So print the traceback
468  PyErr_Print();
469  //And in rare cases, i.e. when redirecting output, the buffers are not
470  //flushed unless we finalize python. So do it now
471  Py_Finalize();
472  return 1;
473  }
474 
475  return 0;
476 }
ELogLevel
Definition of the supported log levels.
Definition: LogConfig.h:26
ELogRealm
Definition of the supported execution realms.
Definition: LogConfig.h:48
Abstract base class for different kinds of events.
int main(int argc, char **argv)
Run all tests.
Definition: test_main.cc:75