Belle II Software  release-08-01-10
ProcessStatisticsPython.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #include <boost/python.hpp>
10 #include <framework/pybasf2/ProcessStatisticsPython.h>
11 #include <framework/core/Module.h>
12 #include <framework/core/Environment.h>
13 #include <framework/datastore/StoreObjPtr.h>
14 #include <framework/logging/Logger.h>
15 #include <framework/core/PyObjConvUtils.h>
16 
17 using namespace Belle2;
18 using namespace std;
19 using namespace boost::python;
20 
22 {
24  if (!stats) {
25  if (!Environment::Instance().getDryRun()) {
26  B2ERROR("ProcessStatistics data object is not available, you either disabled statistics with --no-stats or didn't run process(path) yet.");
27  }
28  return nullptr;
29  }
30  return &(*stats);
31 }
32 
34 {
35  if (!getWrapped())
36  return "";
37  return getWrapped()->getStatisticsString(m_type, m_modules.empty() ? nullptr : &m_modules);
38 }
39 
41 {
42  if (!getWrapped())
43  return "";
44  return getWrapped()->getStatisticsString(m_type, m_modules.empty() ? nullptr : &m_modules, true);
45 }
46 
48  const boost::python::list& modulesPyList)
49 {
50  if (!getWrapped())
51  return ProcessStatisticsPython();
52 
53  std::vector<ModuleStatistics> moduleStats;
54  auto modules = PyObjConvUtils::convertPythonObject(modulesPyList, std::vector<ModulePtr>());
55  for (const ModulePtr& ptr : modules) {
56  ModuleStatistics& stats = getWrapped()->getStatistics(ptr.get());
57  //Name could be empty if module has never been called
58  if (stats.getName().empty()) stats.setName(ptr->getName());
59  moduleStats.push_back(stats);
60  }
61  return ProcessStatisticsPython(type, moduleStats);
62 }
63 
64 boost::python::list ProcessStatisticsPython::getAll()
65 {
66  boost::python::list result;
67  if (!getWrapped())
68  return result;
69  for (auto& module : (m_modules.empty()) ? getWrapped()->getAll() : m_modules) {
70  result.append(module);
71  }
72  return result;
73 }
74 
76 {
77  if (!getWrapped())
78  return nullptr;
79  return &getWrapped()->getStatistics(module.get());
80 }
81 
83 {
84  if (!getWrapped())
85  return nullptr;
86  return &getWrapped()->getGlobal();
87 
88 }
90 {
91  if (!getWrapped())
92  return;
93  getWrapped()->clear();
94 }
95 
96 
98 {
99  // to avoid confusion between std::arg and boost::python::arg we want a shorthand namespace as well
100  namespace bp = boost::python;
101 
102  //Reference to global scope
103  scope global;
104 
105  docstring_options options(true, true, false); //userdef, py sigs, c++ sigs
106 
107  //Wrap ProcessStatisticsPython as non-copy and non-instantiable in python
108  class_<ProcessStatisticsPython> stats("ProcessStatistics", R"DOCSTRING(
109 Interface for retrieving statistics about module execution at runtime or after
110 :py:func:`basf2.process()` returns. Should be accessed through a global instance `basf2.statistics`.
111 
112 Statistics for `event() <Module.event()>` calls are available as a string representation of the object:
113 
114 >>> from basf2 import statistics
115 >>> print(statistics)
116 =================================================================================
117 Name | Calls | Memory(MB) | Time(s) | Time(ms)/Call
118 =================================================================================
119 RootInput | 101 | 0 | 0.01 | 0.05 +- 0.02
120 RootOutput | 100 | 0 | 0.02 | 0.20 +- 0.87
121 ProgressBar | 100 | 0 | 0.00 | 0.00 +- 0.00
122 =================================================================================
123 Total | 101 | 0 | 0.03 | 0.26 +- 0.86
124 =================================================================================
125 
126 This provides information on the number of calls, elapsed time, and the average
127 difference in resident memory before and after the `event() <Module.event>` call.
128 
129 .. note::
130 
131  The module responsible for reading (or generating) events usually has one
132  additional event() call which is used to determine whether event processing
133  should stop.
134 
135 .. warning::
136 
137  Memory consumption is reporting the difference in memory usage as reported
138  by the kernel before and after the call. This is not the maximum memory the
139  module has consumed. Negative values indicate that this module has freed
140  momemory which was allocated in other modules or function calls.
141 
142 Information on other calls like `initialize() <Module.initialize>`,
143 `terminate() <Module.terminate>`, etc. are also available through the different
144 counters defined in `StatisticCounters`:
145 
146 >>> print(statistics(statistics.INIT))
147 >>> print(statistics(statistics.BEGIN_RUN))
148 >>> print(statistics(statistics.END_RUN))
149 >>> print(statistics(statistics.TERM))
150 )DOCSTRING", no_init);
151 
152  stats
153  .def("get", &ProcessStatisticsPython::get, return_value_policy<reference_existing_object>(), bp::arg("module"),
154  "Get `ModuleStatistics` for given Module.")
155  .def("get_global", &ProcessStatisticsPython::getGlobal, return_value_policy<reference_existing_object>(),
156  "Get global `ModuleStatistics` containing total elapsed time etc.")
157  .def("clear", &ProcessStatisticsPython::clear, "Clear collected statistics but keep names of modules")
158  .def_readonly("modules", &ProcessStatisticsPython::getAll, "List of all `ModuleStatistics` objects.")
159  ;
160 
161  //Set scope to current class
162  scope statistics{stats};
163  //Define enum for all the counter types in scope of class
164  enum_<ModuleStatistics::EStatisticCounters>("StatisticCounters", R"DOCSTRING(
165 Available types of statistic counters (corresponds to Module functions)
166 
167 .. attribute:: INIT
168 
169 Time spent or memory used in the `initialize() <Module.initialize>` function
170 
171 .. attribute:: BEGIN_RUN
172 
173 Time spent or memory used in the `beginRun() <Module.beginRun>` function
174 
175 .. attribute:: EVENT
176 
177 Time spent or memory used in the `event() <Module.event>` function
178 
179 .. attribute:: END_RUN
180 
181 Time spent or memory used in the `endRun() <Module.endRun>` function
182 
183 .. attribute:: TERM
184 
185 Time spent or memory used in the `terminate() <Module.terminate>` function
186 
187 .. attribute:: TOTAL
188 
189 Time spent or memory used in any module function. This is the sum of all of the above.
190 
191 )DOCSTRING")
192  .value("INIT", ModuleStatistics::c_Init)
193  .value("BEGIN_RUN", ModuleStatistics::c_BeginRun)
194  .value("EVENT", ModuleStatistics::c_Event)
195  .value("END_RUN", ModuleStatistics::c_EndRun)
196  .value("TERM", ModuleStatistics::c_Term)
197  .value("TOTAL", ModuleStatistics::c_Total)
198  .export_values()
199  ;
200 
201  {
202  // the overloaded __str__ and __call__ give very confusing signatures so hand-craft doc string.
203  docstring_options custom_options(true, false, false); //userdef, py sigs, c++ sigs
204  stats
206  "Return the event statistics as a string in a human readable form")
208  "Return an html represenation of the statistics (used by ipython/jupyter)")
209  .def("__call__", &ProcessStatisticsPython::getModuleStatistics, (bp::arg("counter") = ModuleStatistics::EStatisticCounters::c_Event, bp::arg("modules") = boost::python::list()),
210  R"DOCSTRING(__call__(counter=StatisticCounters.EVENT, modules=None)
211 
212 Calling the statistics object directly like a function will return a string
213 with the execution statistics in human readable form.
214 
215 Parameters:
216  counter (StatisticCounters): Which counter to use
217  modules (list[Module]): A list of modules to include in the returned string.
218  If omitted the statistics for all modules will be included.
219 
220 * print the `beginRun() <Module.beginRun>` statistics for all modules:
221 
222  >>> print(statistics(statistics.BEGIN_RUN))
223 
224 * print the total execution times and memory consumption but only for the
225  modules ``module1`` and ``module2``
226 
227  >>> print(statistics(statistics.TOTAL, [module1, module2]))
228 
229 * print the event statistics (default) for only two modules
230 
231  >>> print(statistics(modules=[module1, module2]))
232 )DOCSTRING")
233  ;
234  }
235 
236  //Wrap statistics class. The default boost python docstring signature is way
237  //to noisy for these simple getters so this time we do it ourselves ...
238  docstring_options new_options(true, false, false); //userdef, py sigs, c++ sigs
239  class_<ModuleStatistics>("ModuleStatistics", "Execution statistics for a single module. "
240  "All member functions take exactly one argument to select which "
241  "counter to query which defaults to `StatisticCounters.TOTAL` if omitted.")
242  .add_property("name", make_function(&ModuleStatistics::getName, return_value_policy<copy_const_reference>()),
243  "property to get the name of the module to be displayed in the statistics")
244  .def("time_sum", &ModuleStatistics::getTimeSum, bp::arg("counter") = ModuleStatistics::c_Total,
245  "time_sum(counter=StatisticCounters.TOTAL)\nReturn the sum of all execution times")
246  .def("time_mean", &ModuleStatistics::getTimeMean, bp::arg("counter") = ModuleStatistics::c_Total,
247  "time_mean(counter=StatisticCounters.TOTAL)\nReturn the mean of all execution times")
248  .def("time_stddev", &ModuleStatistics::getTimeStddev, bp::arg("counter") = ModuleStatistics::c_Total,
249  "time_stddev(counter=StatisticCounters.TOTAL)\nReturn the standard deviation of all execution times")
250  .def("memory_sum", &ModuleStatistics::getMemorySum, bp::arg("counter") = ModuleStatistics::c_Total,
251  "memory_sum(counter=StatisticCounters.TOTAL)\nReturn the sum of the total memory usage")
252  .def("memory_mean", &ModuleStatistics::getMemoryMean, bp::arg("counter") = ModuleStatistics::c_Total,
253  "memory_mean(counter=StatisticCounters.TOTAL)\nReturn the mean of the memory usage")
254  .def("memory_stddev", &ModuleStatistics::getMemoryStddev, bp::arg("counter") = ModuleStatistics::c_Total,
255  "memory_stddev(counter=StatisticCounters.TOTAL)\nReturn the standard deviation of the memory usage")
256  .def("time_memory_corr", &ModuleStatistics::getTimeMemoryCorrelation, bp::arg("counter") = ModuleStatistics::c_Total,
257  "time_memory_corr(counter=StatisticCounters.TOTAL)\nReturn the correlaction factor between time and memory consumption")
258  .def("calls", &ModuleStatistics::getCalls, bp::arg("counter") = ModuleStatistics::c_Total,
259  "calls(counter=StatisticCounters.TOTAL)\nReturn the total number of calls")
260  ;
261 
262  //Expose ProcessStatisticsPython instance as "statistics" object in pybasf2 module
263  global.attr("statistics") = object(ProcessStatisticsPython());
264 }
@ c_Persistent
Object is available during entire execution time.
Definition: DataStore.h:60
static Environment & Instance()
Static method to get a reference to the Environment instance.
Definition: Environment.cc:28
Keep track of time and memory consumption during processing.
value_type getTimeStddev(EStatisticCounters type=c_Total) const
return the stddev of the execution times for a given counter
value_type getCalls(EStatisticCounters type=c_Total) const
return the number of calls for a given counter type
value_type getTimeMemoryCorrelation(EStatisticCounters type=c_Total) const
return the pearson correlation coefficient between execution times and memory consumption changes
value_type getMemoryStddev(EStatisticCounters type=c_Total) const
return the stddev of the memory consumption changes per call
const std::string & getName() const
Return the previously set name.
value_type getMemoryMean(EStatisticCounters type=c_Total) const
return the average memory change per call
EStatisticCounters
Enum to define all counter types.
@ c_Init
Counting time/calls in initialize()
@ c_EndRun
Counting time/calls in endRun()
@ c_Term
Counting time/calls in terminate()
@ c_BeginRun
Counting time/calls in beginRun()
@ c_Event
Counting time/calls in event()
@ c_Total
Sum of the above.
value_type getMemorySum(EStatisticCounters type=c_Total) const
return the total used memory for a given counter
value_type getTimeSum(EStatisticCounters type=c_Total) const
return the sum of all execution times for a given counter
value_type getTimeMean(EStatisticCounters type=c_Total) const
return the mean execution time for a given counter
Python interface for ProcessStatistics.
std::string getStatisticsStringHTML()
Return string with statistics for all selected modules as html table.
ProcessStatisticsPython getModuleStatistics(ModuleStatistics::EStatisticCounters type, const boost::python::list &modulesPyList)
Get a new statistics object for a different counter/different list of modules.
const ModuleStatistics * get(const std::shared_ptr< Module > &module)
Get statistics for given module.
std::string getStatisticsString()
Return string with statistics for all selected modules.
static void exposePythonAPI()
Define python wrappers to make functionality avaiable in python.
ProcessStatistics * getWrapped()
Get wrapped ProcessStatistics object.
void clear()
Clear collected statistics but keep names of modules.
boost::python::list getAll()
Get statistics for all modules as python list.
const ModuleStatistics * getGlobal()
Get statistics for the framework itself.
Class to collect call statistics for all modules.
Type-safe access to single objects in the data store.
Definition: StoreObjPtr.h:96
std::shared_ptr< Module > ModulePtr
Defines a pointer to a module object as a boost shared pointer.
Definition: Module.h:40
Scalar convertPythonObject(const boost::python::object &pyObject, Scalar)
Convert from Python to given type.
Abstract base class for different kinds of events.