Belle II Software development
ProcessStatisticsPython.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#include <boost/python.hpp>
10#include <framework/pybasf2/ProcessStatisticsPython.h>
11#include <framework/core/Module.h>
12#include <framework/core/Environment.h>
13#include <framework/datastore/StoreObjPtr.h>
14#include <framework/logging/Logger.h>
15#include <framework/core/PyObjConvUtils.h>
16
17using namespace Belle2;
18using namespace std;
19using namespace boost::python;
20
22{
24 if (!stats) {
25 if (!Environment::Instance().getDryRun()) {
26 B2ERROR("ProcessStatistics data object is not available, you either disabled statistics with --no-stats or didn't run process(path) yet.");
27 }
28 return nullptr;
29 }
30 return &(*stats);
31}
32
34{
35 if (!getWrapped())
36 return "";
37 return getWrapped()->getStatisticsString(m_type, m_modules.empty() ? nullptr : &m_modules);
38}
39
41{
42 if (!getWrapped())
43 return "";
44 return getWrapped()->getStatisticsString(m_type, m_modules.empty() ? nullptr : &m_modules, true);
45}
46
48 const boost::python::list& modulesPyList)
49{
50 if (!getWrapped())
52
53 std::vector<ModuleStatistics> moduleStats;
54 auto modules = PyObjConvUtils::convertPythonObject(modulesPyList, std::vector<ModulePtr>());
55 for (const ModulePtr& ptr : modules) {
56 ModuleStatistics& stats = getWrapped()->getStatistics(ptr.get());
57 //Name could be empty if module has never been called
58 if (stats.getName().empty()) stats.setName(ptr->getName());
59 moduleStats.push_back(stats);
60 }
61 return ProcessStatisticsPython(type, moduleStats);
62}
63
65{
66 boost::python::list result;
67 if (!getWrapped())
68 return result;
69 for (auto& module : (m_modules.empty()) ? getWrapped()->getAll() : m_modules) {
70 result.append(module);
71 }
72 return result;
73}
74
76{
77 if (!getWrapped())
78 return nullptr;
79 return &getWrapped()->getStatistics(module.get());
80}
81
83{
84 if (!getWrapped())
85 return nullptr;
86 return &getWrapped()->getGlobal();
87
88}
90{
91 if (!getWrapped())
92 return;
93 getWrapped()->clear();
94}
95
96void ProcessStatisticsPython::csv(const char* filename)
97{
98 if (!getWrapped())
99 return;
100 getWrapped()->write_csv(filename);
101}
102
103
105{
106 // to avoid confusion between std::arg and boost::python::arg we want a shorthand namespace as well
107 namespace bp = boost::python;
108
109 //Reference to global scope
110 scope global;
111
112 docstring_options options(true, true, false); //userdef, py sigs, c++ sigs
113
114 //Wrap ProcessStatisticsPython as non-copy and non-instantiable in python
115 class_<ProcessStatisticsPython> stats("ProcessStatistics", R"DOCSTRING(
116Interface for retrieving statistics about module execution at runtime or after
117:py:func:`basf2.process()` returns. Should be accessed through a global instance `basf2.statistics`.
118
119Statistics for `event() <Module.event()>` calls are available as a string representation of the object:
120
121>>> from basf2 import statistics
122>>> print(statistics)
123=================================================================================
124Name | Calls | Memory(MB) | Time(s) | Time(ms)/Call
125=================================================================================
126RootInput | 101 | 0 | 0.01 | 0.05 +- 0.02
127RootOutput | 100 | 0 | 0.02 | 0.20 +- 0.87
128ProgressBar | 100 | 0 | 0.00 | 0.00 +- 0.00
129=================================================================================
130Total | 101 | 0 | 0.03 | 0.26 +- 0.86
131=================================================================================
132
133This provides information on the number of calls, elapsed time, and the average
134difference in resident memory before and after the `event() <Module.event>` call.
135
136.. note::
137
138 The module responsible for reading (or generating) events usually has one
139 additional event() call which is used to determine whether event processing
140 should stop.
141
142.. warning::
143
144 Memory consumption is reporting the difference in memory usage as reported
145 by the kernel before and after the call. This is not the maximum memory the
146 module has consumed. Negative values indicate that this module has freed
147 momemory which was allocated in other modules or function calls.
148
149Information on other calls like `initialize() <Module.initialize>`,
150`terminate() <Module.terminate>`, etc. are also available through the different
151counters defined in `StatisticCounters`:
152
153>>> print(statistics(statistics.INIT))
154>>> print(statistics(statistics.BEGIN_RUN))
155>>> print(statistics(statistics.END_RUN))
156>>> print(statistics(statistics.TERM))
157)DOCSTRING", no_init);
158
159 stats
160 .def("get", &ProcessStatisticsPython::get, return_value_policy<reference_existing_object>(), bp::arg("module"),
161 "Get `ModuleStatistics` for given Module.")
162 .def("get_global", &ProcessStatisticsPython::getGlobal, return_value_policy<reference_existing_object>(),
163 "Get global `ModuleStatistics` containing total elapsed time etc.")
164 .def("clear", &ProcessStatisticsPython::clear, "Clear collected statistics but keep names of modules")
165 .def_readonly("modules", &ProcessStatisticsPython::getAll, "List of all `ModuleStatistics` objects.")
166 .def("csv", &ProcessStatisticsPython::csv, "Write statistics to a csv file")
167 ;
168
169 //Set scope to current class
170 scope statistics{stats};
171 //Define enum for all the counter types in scope of class
172 enum_<ModuleStatistics::EStatisticCounters>("StatisticCounters", R"DOCSTRING(
173Available types of statistic counters (corresponds to Module functions)
174
175.. attribute:: INIT
176
177Time spent or memory used in the `initialize() <Module.initialize>` function
178
179.. attribute:: BEGIN_RUN
180
181Time spent or memory used in the `beginRun() <Module.beginRun>` function
182
183.. attribute:: EVENT
184
185Time spent or memory used in the `event() <Module.event>` function
186
187.. attribute:: END_RUN
188
189Time spent or memory used in the `endRun() <Module.endRun>` function
190
191.. attribute:: TERM
192
193Time spent or memory used in the `terminate() <Module.terminate>` function
194
195.. attribute:: TOTAL
196
197Time spent or memory used in any module function. This is the sum of all of the above.
198
199)DOCSTRING")
200 .value("INIT", ModuleStatistics::c_Init)
201 .value("BEGIN_RUN", ModuleStatistics::c_BeginRun)
202 .value("EVENT", ModuleStatistics::c_Event)
203 .value("END_RUN", ModuleStatistics::c_EndRun)
204 .value("TERM", ModuleStatistics::c_Term)
205 .value("TOTAL", ModuleStatistics::c_Total)
206 .export_values()
207 ;
208
209 {
210 // the overloaded __str__ and __call__ give very confusing signatures so hand-craft doc string.
211 docstring_options custom_options(true, false, false); //userdef, py sigs, c++ sigs
212 stats
214 "Return the event statistics as a string in a human readable form")
216 "Return an html represenation of the statistics (used by ipython/jupyter)")
217 .def("__call__", &ProcessStatisticsPython::getModuleStatistics, (bp::arg("counter") = ModuleStatistics::EStatisticCounters::c_Event, bp::arg("modules") = boost::python::list()),
218 R"DOCSTRING(__call__(counter=StatisticCounters.EVENT, modules=None)
219
220Calling the statistics object directly like a function will return a string
221with the execution statistics in human readable form.
222
223Parameters:
224 counter (StatisticCounters): Which counter to use
225 modules (list[Module]): A list of modules to include in the returned string.
226 If omitted the statistics for all modules will be included.
227
228* print the `beginRun() <Module.beginRun>` statistics for all modules:
229
230 >>> print(statistics(statistics.BEGIN_RUN))
231
232* print the total execution times and memory consumption but only for the
233 modules ``module1`` and ``module2``
234
235 >>> print(statistics(statistics.TOTAL, [module1, module2]))
236
237* print the event statistics (default) for only two modules
238
239 >>> print(statistics(modules=[module1, module2]))
240)DOCSTRING")
241 ;
242 }
243
244 //Wrap statistics class. The default boost python docstring signature is way
245 //to noisy for these simple getters so this time we do it ourselves ...
246 docstring_options new_options(true, false, false); //userdef, py sigs, c++ sigs
247 class_<ModuleStatistics>("ModuleStatistics", "Execution statistics for a single module. "
248 "All member functions take exactly one argument to select which "
249 "counter to query which defaults to `StatisticCounters.TOTAL` if omitted.")
250 .add_property("name", make_function(&ModuleStatistics::getName, return_value_policy<copy_const_reference>()),
251 "property to get the name of the module to be displayed in the statistics")
252 .def("time_sum", &ModuleStatistics::getTimeSum, bp::arg("counter") = ModuleStatistics::c_Total,
253 "time_sum(counter=StatisticCounters.TOTAL)\nReturn the sum of all execution times")
254 .def("time_mean", &ModuleStatistics::getTimeMean, bp::arg("counter") = ModuleStatistics::c_Total,
255 "time_mean(counter=StatisticCounters.TOTAL)\nReturn the mean of all execution times")
256 .def("time_stddev", &ModuleStatistics::getTimeStddev, bp::arg("counter") = ModuleStatistics::c_Total,
257 "time_stddev(counter=StatisticCounters.TOTAL)\nReturn the standard deviation of all execution times")
258 .def("memory_sum", &ModuleStatistics::getMemorySum, bp::arg("counter") = ModuleStatistics::c_Total,
259 "memory_sum(counter=StatisticCounters.TOTAL)\nReturn the sum of the total memory usage")
260 .def("memory_mean", &ModuleStatistics::getMemoryMean, bp::arg("counter") = ModuleStatistics::c_Total,
261 "memory_mean(counter=StatisticCounters.TOTAL)\nReturn the mean of the memory usage")
262 .def("memory_stddev", &ModuleStatistics::getMemoryStddev, bp::arg("counter") = ModuleStatistics::c_Total,
263 "memory_stddev(counter=StatisticCounters.TOTAL)\nReturn the standard deviation of the memory usage")
264 .def("time_memory_corr", &ModuleStatistics::getTimeMemoryCorrelation, bp::arg("counter") = ModuleStatistics::c_Total,
265 "time_memory_corr(counter=StatisticCounters.TOTAL)\nReturn the correlaction factor between time and memory consumption")
266 .def("calls", &ModuleStatistics::getCalls, bp::arg("counter") = ModuleStatistics::c_Total,
267 "calls(counter=StatisticCounters.TOTAL)\nReturn the total number of calls")
268 ;
269
270 //Expose ProcessStatisticsPython instance as "statistics" object in pybasf2 module
271 global.attr("statistics") = object(ProcessStatisticsPython());
272}
@ c_Persistent
Object is available during entire execution time.
Definition: DataStore.h:60
static Environment & Instance()
Static method to get a reference to the Environment instance.
Definition: Environment.cc:28
Keep track of time and memory consumption during processing.
value_type getTimeStddev(EStatisticCounters type=c_Total) const
return the stddev of the execution times for a given counter
value_type getCalls(EStatisticCounters type=c_Total) const
return the number of calls for a given counter type
value_type getTimeMemoryCorrelation(EStatisticCounters type=c_Total) const
return the pearson correlation coefficient between execution times and memory consumption changes
value_type getMemoryStddev(EStatisticCounters type=c_Total) const
return the stddev of the memory consumption changes per call
const std::string & getName() const
Return the previously set name.
value_type getMemoryMean(EStatisticCounters type=c_Total) const
return the average memory change per call
EStatisticCounters
Enum to define all counter types.
@ c_Init
Counting time/calls in initialize()
@ c_EndRun
Counting time/calls in endRun()
@ c_Term
Counting time/calls in terminate()
@ c_BeginRun
Counting time/calls in beginRun()
@ c_Event
Counting time/calls in event()
@ c_Total
Sum of the above.
value_type getMemorySum(EStatisticCounters type=c_Total) const
return the total used memory for a given counter
value_type getTimeSum(EStatisticCounters type=c_Total) const
return the sum of all execution times for a given counter
value_type getTimeMean(EStatisticCounters type=c_Total) const
return the mean execution time for a given counter
Python interface for ProcessStatistics.
std::string getStatisticsStringHTML()
Return string with statistics for all selected modules as html table.
ProcessStatisticsPython getModuleStatistics(ModuleStatistics::EStatisticCounters type, const boost::python::list &modulesPyList)
Get a new statistics object for a different counter/different list of modules.
std::vector< ModuleStatistics > m_modules
Which modules to show.
void csv(const char *filename)
Write statistics to a csv file.
const ModuleStatistics * get(const std::shared_ptr< Module > &module)
Get statistics for given module.
std::string getStatisticsString()
Return string with statistics for all selected modules.
static void exposePythonAPI()
Define python wrappers to make functionality avaiable in python.
ModuleStatistics::EStatisticCounters m_type
Which counter to show when printing the statistics.
ProcessStatistics * getWrapped()
Get wrapped ProcessStatistics object.
void clear()
Clear collected statistics but keep names of modules.
boost::python::list getAll()
Get statistics for all modules as python list.
const ModuleStatistics * getGlobal()
Get statistics for the framework itself.
Class to collect call statistics for all modules.
const ModuleStatistics & getGlobal() const
Get global statistics.
ModuleStatistics & getStatistics(const Module *module)
Get statistics for single module.
std::string getStatisticsString(ModuleStatistics::EStatisticCounters type=ModuleStatistics::c_Event, const std::vector< Belle2::ModuleStatistics > *modules=nullptr, bool html=false) const
Return string with statistics for all modules.
void write_csv(const char *filename="ProcessStatistics.csv") const
Write process statistics to a csv file.
virtual void clear() override
Clear collected statistics but keep names of modules.
Type-safe access to single objects in the data store.
Definition: StoreObjPtr.h:96
std::shared_ptr< Module > ModulePtr
Defines a pointer to a module object as a boost shared pointer.
Definition: Module.h:43
Scalar convertPythonObject(const boost::python::object &pyObject, Scalar)
Convert from Python to given type.
Abstract base class for different kinds of events.
STL namespace.