Belle II Software  release-05-02-19
ProcessStatisticsPython.cc
1 /**************************************************************************
2  * BASF2 (Belle Analysis Framework 2) *
3  * Copyright(C) 2014 - Belle II Collaboration *
4  * *
5  * Author: The Belle II Collaboration *
6  * Contributors: Martin Ritter, Christian Pulvermacher *
7  * *
8  * This software is provided "as is" without any warranty. *
9  **************************************************************************/
10 
11 #include <boost/python.hpp>
12 #include <framework/pybasf2/ProcessStatisticsPython.h>
13 #include <framework/core/Module.h>
14 #include <framework/core/Environment.h>
15 #include <framework/datastore/StoreObjPtr.h>
16 #include <framework/logging/Logger.h>
17 #include <framework/core/PyObjConvUtils.h>
18 
19 using namespace Belle2;
20 using namespace std;
21 using namespace boost::python;
22 
24 {
25  static ProcessStatisticsPython instance;
26  return instance;
27 }
28 
30 {
32  if (!stats) {
33  if (!Environment::Instance().getDryRun()) {
34  B2ERROR("ProcessStatistics data object is not available, you either disabled statistics with --no-stats or didn't run process(path) yet.");
35  }
36  return nullptr;
37  }
38  return &(*stats);
39 }
40 
42  const std::vector<ModuleStatistics>* modules)
43 {
44  if (!getWrapped())
45  return "";
46  return getWrapped()->getStatisticsString(mode, modules);
47 }
48 
49 string ProcessStatisticsPython::getModuleStatistics(const boost::python::list& modulesPyList,
51 {
52  if (!getWrapped())
53  return "";
54 
55  std::vector<ModuleStatistics> moduleStats;
56  auto modules = PyObjConvUtils::convertPythonObject(modulesPyList, std::vector<ModulePtr>());
57  for (const ModulePtr& ptr : modules) {
58  ModuleStatistics& stats = getWrapped()->getStatistics(ptr.get());
59  //Name could be empty if module has never been called
60  if (stats.getName().empty()) stats.setName(ptr->getName());
61  moduleStats.push_back(stats);
62  }
63  return getStatisticsString(mode, &moduleStats);
64 }
65 
66 boost::python::list ProcessStatisticsPython::getAll()
67 {
68  boost::python::list result;
69  if (!getWrapped())
70  return result;
71  for (auto& module : getWrapped()->getAll()) {
72  result.append(module);
73  }
74  return result;
75 }
76 
78 {
79  if (!getWrapped())
80  return nullptr;
81  return &getWrapped()->getStatistics(module.get());
82 }
83 
85 {
86  if (!getWrapped())
87  return nullptr;
88  return &getWrapped()->getGlobal();
89 
90 }
92 {
93  if (!getWrapped())
94  return;
95  getWrapped()->clear();
96 }
97 
98 
99 #if !defined(__GNUG__) || defined(__ICC)
100 #else
101 #pragma GCC diagnostic push
102 #pragma GCC diagnostic ignored "-Wunused-local-typedefs"
103 #endif
104 //used to make python aware of default arguments
105 // cppcheck-suppress unknownMacro
106 BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(getStatistics_overloads, getStatisticsString, 0, 1)
107 BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(getModuleStatistics_overloads, getModuleStatistics, 1, 2)
108 #if !defined(__GNUG__) || defined(__ICC)
109 #else
110 #pragma GCC diagnostic pop
111 #endif
112 
114 {
115  // to avoid confusion between std::arg and boost::python::arg we want a shorthand namespace as well
116  namespace bp = boost::python;
117 
118  //Reference to global scope
119  scope global;
120 
121  docstring_options options(true, true, false); //userdef, py sigs, c++ sigs
122 
123  //Wrap ProcessStatisticsPython as non-copy and non-instantiable in python
124  class_<ProcessStatisticsPython, boost::noncopyable> stats("ProcessStatistics", R"DOCSTRING(
125 Interface for retrieving statistics about module execution at runtime or after
126 :py:func:`basf2.process()` returns. Should be accessed through a global instance `basf2.statistics`.
127 
128 Statistics for `event() <Module.event()>` calls are available as a string representation of the object:
129 
130 >>> from basf2 import statistics
131 >>> print(statistics)
132 =================================================================================
133 Name | Calls | Memory(MB) | Time(s) | Time(ms)/Call
134 =================================================================================
135 RootInput | 101 | 0 | 0.01 | 0.05 +- 0.02
136 RootOutput | 100 | 0 | 0.02 | 0.20 +- 0.87
137 ProgressBar | 100 | 0 | 0.00 | 0.00 +- 0.00
138 =================================================================================
139 Total | 101 | 0 | 0.03 | 0.26 +- 0.86
140 =================================================================================
141 
142 This provides information on the number of calls, elapsed time, and the average
143 difference in resident memory before and after the `event() <Module.event>` call.
144 
145 .. note::
146 
147  The module responsible for reading (or generating) events usually has one
148  additional event() call which is used to determine whether event processing
149  should stop.
150 
151 .. warning::
152 
153  Memory consumption is reporting the difference in memory usage as reported
154  by the kernel before and after the call. This is not the maximum memory the
155  module has consumed. Negative values indicate that this module has freed
156  momemory which was allocated in other modules or function calls.
157 
158 Information on other calls like `initialize() <Module.initialize>`,
159 `terminate() <Module.terminate>`, etc. are also available through the different
160 counters defined in `StatisticCounters`:
161 
162 >>> print(statistics(statistics.INIT))
163 >>> print(statistics(statistics.BEGIN_RUN))
164 >>> print(statistics(statistics.END_RUN))
165 >>> print(statistics(statistics.TERM))
166 )DOCSTRING", no_init);
167 
168  stats
169  .def("set_name", &ProcessStatisticsPython::setModuleName, bp::args("module", "name"),
170  R"DOCSTRING(Set name for module in statistics.
171 
172 Normally, all modules get assigned their default name which is
173 used to register them. If multiple instances of the same module
174 are present at the same time, this can be used to distinguish
175 between them.
176 
177 .. deprecated:: release-01-00-00
178  Use `Module.set_name` instead
179 )DOCSTRING")
180  .def("get", &ProcessStatisticsPython::get, return_value_policy<reference_existing_object>(), bp::arg("module"),
181  "Get `ModuleStatistics` for given Module.")
182  .def("get_global", &ProcessStatisticsPython::getGlobal, return_value_policy<reference_existing_object>(),
183  "Get global `ModuleStatistics` containing total elapsed time etc.")
184  .def("clear", &ProcessStatisticsPython::clear, "Clear collected statistics but keep names of modules")
185  .def_readonly("modules", &ProcessStatisticsPython::getAll, "List of all `ModuleStatistics` objects.")
186  ;
187 
188 
189  {
190  // the overloaded __str__ and __call__ give very confusing signatures so hand-craft doc string.
191  docstring_options custom_options(true, false, false); //userdef, py sigs, c++ sigs
192  stats
194  getStatistics_overloads("Return the event statistics for all modules as string in a human readable form"))
195  .def("__call__", &ProcessStatisticsPython::getStatisticsString, getStatistics_overloads())
196  .def("__call__", &ProcessStatisticsPython::getModuleStatistics, getModuleStatistics_overloads(R"DOCSTRING(
197 __call__(modules=None, counter=StatisticCounters.TOTAL)
198 
199 Calling the statistics object directly like a function will return a string
200 with the execution statistics in human readable form.
201 
202 Parameters:
203  modules (list[Module]): A list of modules to include in the returned string.
204  If omitted the statistics for all modules will be included.
205  counter (StatisticCounters): Which counter to use
206 
207 * print the `beginRun() <Module.beginRun>` statistics for all modules:
208 
209  >>> print(statistics(statistics.BEGIN_RUN))
210 
211 * print the total execution times and memory consumption but only for the
212  modules ``module1`` and ``module2``
213 
214  >>> print(statistics([module1, module2], statistics.TOTAL))
215 )DOCSTRING"))
216  ;
217  }
218 
219  //Set scope to current class
220  scope statistics = stats;
221  //Define enum for all the counter types in scope of class
222  enum_<ModuleStatistics::EStatisticCounters>("StatisticCounters", R"DOCSTRING(
223 Available types of statistic counters (corresponds to Module functions)
224 
225 .. attribute:: INIT
226 
227 Time spent or memory used in the `initialize() <Module.initialize>` function
228 
229 .. attribute:: BEGIN_RUN
230 
231 Time spent or memory used in the `beginRun() <Module.beginRun>` function
232 
233 .. attribute:: EVENT
234 
235 Time spent or memory used in the `event() <Module.event>` function
236 
237 .. attribute:: END_RUN
238 
239 Time spent or memory used in the `endRun() <Module.endRun>` function
240 
241 .. attribute:: TERM
242 
243 Time spent or memory used in the `terminate() <Module.terminate>` function
244 
245 .. attribute:: TOTAL
246 
247 Time spent or memory used in any module function. This is the sum of all of the above.
248 
249 )DOCSTRING")
250  .value("INIT", ModuleStatistics::c_Init)
251  .value("BEGIN_RUN", ModuleStatistics::c_BeginRun)
252  .value("EVENT", ModuleStatistics::c_Event)
253  .value("END_RUN", ModuleStatistics::c_EndRun)
254  .value("TERM", ModuleStatistics::c_Term)
255  .value("TOTAL", ModuleStatistics::c_Total)
256  .export_values()
257  ;
258 
259  //Wrap statistics class. The default boost python docstring signature is way
260  //to noisy for these simple getters so this time we do it ourselves ...
261  docstring_options new_options(true, false, false); //userdef, py sigs, c++ sigs
262  class_<ModuleStatistics>("ModuleStatistics", "Execution statistics for a single module. "
263  "All member functions take exactly one argument to select which "
264  "counter to query which defaults to `StatisticCounters.TOTAL` if omitted.")
265  .add_property("name", make_function(&ModuleStatistics::getName, return_value_policy<copy_const_reference>()),
266  &ModuleStatistics::setName, "property to set or get the name of the module to be displayed in the statistics"
267  "\n\n.. deprecated:: release-01-00-00\n use `Module.set_name` instead")
268  .def("time_sum", &ModuleStatistics::getTimeSum, bp::arg("counter") = ModuleStatistics::c_Total,
269  "time_sum(counter=StatisticCounters.TOTAL)\nReturn the sum of all execution times")
270  .def("time_mean", &ModuleStatistics::getTimeMean, bp::arg("counter") = ModuleStatistics::c_Total,
271  "time_mean(counter=StatisticCounters.TOTAL)\nReturn the mean of all execution times")
272  .def("time_stddev", &ModuleStatistics::getTimeStddev, bp::arg("counter") = ModuleStatistics::c_Total,
273  "time_stddev(counter=StatisticCounters.TOTAL)\nReturn the standard deviation of all execution times")
274  .def("memory_sum", &ModuleStatistics::getMemorySum, bp::arg("counter") = ModuleStatistics::c_Total,
275  "memory_sum(counter=StatisticCounters.TOTAL)\nReturn the sum of the total memory usage")
276  .def("memory_mean", &ModuleStatistics::getMemoryMean, bp::arg("counter") = ModuleStatistics::c_Total,
277  "memory_mean(counter=StatisticCounters.TOTAL)\nReturn the mean of the memory usage")
278  .def("memory_stddev", &ModuleStatistics::getMemoryStddev, bp::arg("counter") = ModuleStatistics::c_Total,
279  "memory_stddev(counter=StatisticCounters.TOTAL)\nReturn the standard deviation of the memory usage")
280  .def("time_memory_corr", &ModuleStatistics::getTimeMemoryCorrelation, bp::arg("counter") = ModuleStatistics::c_Total,
281  "time_memory_corr(counter=StatisticCounters.TOTAL)\nReturn the correlaction factor between time and memory consumption")
282  .def("calls", &ModuleStatistics::getCalls, bp::arg("counter") = ModuleStatistics::c_Total,
283  "calls(counter=StatisticCounters.TOTAL)\nReturn the total number of calls")
284  ;
285 
286  //Expose ProcessStatisticsPython instance as "statistics" object in pybasf2 module
287  ProcessStatisticsPython& instance = getInstance();
288  global.attr("statistics") = object(ptr(&instance));
289 }
Belle2::ProcessStatisticsPython::get
const ModuleStatistics * get(const std::shared_ptr< Module > &module)
Get statistics for given module.
Definition: ProcessStatisticsPython.cc:77
Belle2::ProcessStatisticsPython::setModuleName
void setModuleName(Module *module, const std::string &name)
Set name for module in statistics.
Definition: ProcessStatisticsPython.h:74
Belle2::ModuleStatistics::c_Event
@ c_Event
Counting time/calls in event()
Definition: ModuleStatistics.h:45
Belle2::ProcessStatisticsPython::getInstance
static ProcessStatisticsPython & getInstance()
Return singleton instance of the statistics.
Definition: ProcessStatisticsPython.cc:23
Belle2::ModuleStatistics::c_EndRun
@ c_EndRun
Counting time/calls in endRun()
Definition: ModuleStatistics.h:47
Belle2::ModuleStatistics::EStatisticCounters
EStatisticCounters
Enum to define all counter types.
Definition: ModuleStatistics.h:39
Belle2::ModuleStatistics::getMemorySum
value_type getMemorySum(EStatisticCounters type=c_Total) const
return the total used memory for a given counter
Definition: ModuleStatistics.h:112
Belle2::ModuleStatistics::c_Term
@ c_Term
Counting time/calls in terminate()
Definition: ModuleStatistics.h:49
Belle2::ModuleStatistics::c_Total
@ c_Total
Sum of the above.
Definition: ModuleStatistics.h:51
Belle2::ProcessStatisticsPython::clear
void clear()
Clear collected statistics but keep names of modules.
Definition: ProcessStatisticsPython.cc:91
Belle2::ModuleStatistics::getMemoryStddev
value_type getMemoryStddev(EStatisticCounters type=c_Total) const
return the stddev of the memory consumption changes per call
Definition: ModuleStatistics.h:122
Belle2::ModuleStatistics::getName
const std::string & getName() const
Return the previously set name.
Definition: ModuleStatistics.h:86
Belle2::ModuleStatistics::setName
void setName(const std::string &name)
Set the name of the module for display.
Definition: ModuleStatistics.h:81
Belle2::ProcessStatisticsPython::getStatisticsString
std::string getStatisticsString(ModuleStatistics::EStatisticCounters type=ModuleStatistics::c_Event, const std::vector< ModuleStatistics > *modules=nullptr)
Return string with statistics for all modules.
Definition: ProcessStatisticsPython.cc:41
Belle2
Abstract base class for different kinds of events.
Definition: MillepedeAlgorithm.h:19
Belle2::StoreObjPtr
Type-safe access to single objects in the data store.
Definition: ParticleList.h:33
Belle2::ModulePtr
std::shared_ptr< Module > ModulePtr
Defines a pointer to a module object as a boost shared pointer.
Definition: Module.h:42
Belle2::ModuleStatistics::getMemoryMean
value_type getMemoryMean(EStatisticCounters type=c_Total) const
return the average memory change per call
Definition: ModuleStatistics.h:117
Belle2::ProcessStatisticsPython::getGlobal
const ModuleStatistics * getGlobal()
Get statistics for the framework itself.
Definition: ProcessStatisticsPython.cc:84
Belle2::ProcessStatisticsPython::getModuleStatistics
std::string getModuleStatistics(const boost::python::list &modulesPyList, ModuleStatistics::EStatisticCounters type=ModuleStatistics::c_Event)
Return string with statistics for selected modules.
Definition: ProcessStatisticsPython.cc:49
Belle2::ModuleStatistics::c_BeginRun
@ c_BeginRun
Counting time/calls in beginRun()
Definition: ModuleStatistics.h:43
Belle2::ProcessStatisticsPython::exposePythonAPI
static void exposePythonAPI()
Define python wrappers to make functionality avaiable in python.
Definition: ProcessStatisticsPython.cc:113
Belle2::DataStore::c_Persistent
@ c_Persistent
Object is available during entire execution time.
Definition: DataStore.h:62
Belle2::ModuleStatistics::getTimeMemoryCorrelation
value_type getTimeMemoryCorrelation(EStatisticCounters type=c_Total) const
return the pearson correlation coefficient between execution times and memory consumption changes
Definition: ModuleStatistics.h:128
Belle2::ModuleStatistics::getCalls
value_type getCalls(EStatisticCounters type=c_Total) const
return the number of calls for a given counter type
Definition: ModuleStatistics.h:91
Belle2::ProcessStatisticsPython::getAll
boost::python::list getAll()
Get statistics for all modules as python list.
Definition: ProcessStatisticsPython.cc:66
Belle2::Environment::Instance
static Environment & Instance()
Static method to get a reference to the Environment instance.
Definition: Environment.cc:31
Belle2::ModuleStatistics::getTimeMean
value_type getTimeMean(EStatisticCounters type=c_Total) const
return the mean execution time for a given counter
Definition: ModuleStatistics.h:102
Belle2::ModuleStatistics::getTimeStddev
value_type getTimeStddev(EStatisticCounters type=c_Total) const
return the stddev of the execution times for a given counter
Definition: ModuleStatistics.h:107
Belle2::ProcessStatistics
Class to collect call statistics for all modules.
Definition: ProcessStatistics.h:94
Belle2::ProcessStatisticsPython::getWrapped
ProcessStatistics * getWrapped()
Get wrapped ProcessStatistics object.
Definition: ProcessStatisticsPython.cc:29
Belle2::ModuleStatistics::getTimeSum
value_type getTimeSum(EStatisticCounters type=c_Total) const
return the sum of all execution times for a given counter
Definition: ModuleStatistics.h:97
Belle2::ProcessStatisticsPython
Python interface for ProcessStatistics.
Definition: ProcessStatisticsPython.h:33
Belle2::PyObjConvUtils::convertPythonObject
Scalar convertPythonObject(const boost::python::object &pyObject, Scalar)
Convert from Python to given type.
Definition: PyObjConvUtils.h:510
Belle2::ModuleStatistics
Keep track of time and memory consumption during processing.
Definition: ModuleStatistics.h:36
Belle2::ModuleStatistics::c_Init
@ c_Init
Counting time/calls in initialize()
Definition: ModuleStatistics.h:41