Belle II Software development
ProcessStatisticsPython.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#include <boost/python.hpp>
10#include <framework/pybasf2/ProcessStatisticsPython.h>
11#include <framework/core/Module.h>
12#include <framework/core/Environment.h>
13#include <framework/datastore/StoreObjPtr.h>
14#include <framework/logging/Logger.h>
15#include <framework/core/PyObjConvUtils.h>
16
17using namespace Belle2;
18using namespace std;
19using namespace boost::python;
20
22{
24 if (!stats) {
25 if (!Environment::Instance().getDryRun()) {
26 B2ERROR("ProcessStatistics data object is not available, you either didn't enable statistics with --stats or didn't run process(path) yet.");
27 }
28 return nullptr;
29 }
30 return &(*stats);
31}
32
34{
35 if (!getWrapped())
36 return "";
37 if (getWrapped()->getStatisticsPrintStatus(m_type))
38 return "";
40 return getWrapped()->getStatisticsString(m_type, m_modules.empty() ? nullptr : &m_modules);
41}
42
44{
45 if (!getWrapped())
46 return "";
47 if (getWrapped()->getStatisticsPrintStatus(m_type))
48 return "";
50 return getWrapped()->getStatisticsString(m_type, m_modules.empty() ? nullptr : &m_modules, true);
51}
52
54 const boost::python::list& modulesPyList)
55{
56 if (!getWrapped())
58
59 std::vector<ModuleStatistics> moduleStats;
60 auto modules = PyObjConvUtils::convertPythonObject(modulesPyList, std::vector<ModulePtr>());
61 for (const ModulePtr& ptr : modules) {
62 ModuleStatistics& stats = getWrapped()->getStatistics(ptr.get());
63 //Name could be empty if module has never been called
64 if (stats.getName().empty()) stats.setName(ptr->getName());
65 moduleStats.push_back(stats);
66 }
67 return ProcessStatisticsPython(type, moduleStats);
68}
69
71{
72 boost::python::list result;
73 if (!getWrapped())
74 return result;
75 for (auto& module : (m_modules.empty()) ? getWrapped()->getAll() : m_modules) {
76 result.append(module);
77 }
78 return result;
79}
80
82{
83 if (!getWrapped())
84 return nullptr;
85 return &getWrapped()->getStatistics(module.get());
86}
87
89{
90 if (!getWrapped())
91 return nullptr;
92 return &getWrapped()->getGlobal();
93
94}
96{
97 if (!getWrapped())
98 return;
99 getWrapped()->clear();
100}
101
102void ProcessStatisticsPython::csv(const char* filename)
103{
104 if (!getWrapped())
105 return;
106 getWrapped()->write_csv(filename);
107}
108
109
111{
112 // to avoid confusion between std::arg and boost::python::arg we want a shorthand namespace as well
113 namespace bp = boost::python;
114
115 //Reference to global scope
116 scope global;
117
118 docstring_options options(true, true, false); //userdef, py sigs, c++ sigs
119
120 //Wrap ProcessStatisticsPython as non-copy and non-instantiable in python
121 class_<ProcessStatisticsPython> stats("ProcessStatistics", R"DOCSTRING(
122Interface for retrieving statistics about module execution at runtime or after
123:py:func:`basf2.process()` returns. Should be accessed through a global instance `basf2.statistics`.
124
125Statistics for `event() <Module.event()>` calls are available as a string representation of the object:
126
127>>> from basf2 import statistics
128>>> print(statistics)
129=================================================================================
130Name | Calls | Memory(MB) | Time(s) | Time(ms)/Call
131=================================================================================
132RootInput | 101 | 0 | 0.01 | 0.05 +- 0.02
133RootOutput | 100 | 0 | 0.02 | 0.20 +- 0.87
134ProgressBar | 100 | 0 | 0.00 | 0.00 +- 0.00
135=================================================================================
136Total | 101 | 0 | 0.03 | 0.26 +- 0.86
137=================================================================================
138
139This provides information on the number of calls, elapsed time, and the average
140difference in resident memory before and after the `event() <Module.event>` call.
141
142.. note::
143
144 The module responsible for reading (or generating) events usually has one
145 additional event() call which is used to determine whether event processing
146 should stop.
147
148.. warning::
149
150 Memory consumption is reporting the difference in memory usage as reported
151 by the kernel before and after the call. This is not the maximum memory the
152 module has consumed. Negative values indicate that this module has freed
153 memory which was allocated in other modules or function calls.
154
155Information on other calls like `initialize() <Module.initialize>`,
156`terminate() <Module.terminate>`, etc. are also available through the different
157counters defined in `StatisticCounters`:
158
159>>> print(statistics(statistics.INIT))
160>>> print(statistics(statistics.BEGIN_RUN))
161>>> print(statistics(statistics.END_RUN))
162>>> print(statistics(statistics.TERM))
163)DOCSTRING", no_init);
164
165 stats
166 .def("get", &ProcessStatisticsPython::get, return_value_policy<reference_existing_object>(), bp::arg("module"),
167 "Get `ModuleStatistics` for given Module.")
168 .def("get_global", &ProcessStatisticsPython::getGlobal, return_value_policy<reference_existing_object>(),
169 "Get global `ModuleStatistics` containing total elapsed time etc.")
170 .def("clear", &ProcessStatisticsPython::clear, "Clear collected statistics but keep names of modules")
171 .def_readonly("modules", &ProcessStatisticsPython::getAll, "List of all `ModuleStatistics` objects.")
172 .def("csv", &ProcessStatisticsPython::csv, "Write statistics to a csv file")
173 ;
174
175 //Set scope to current class
176 scope statistics{stats};
177 //Define enum for all the counter types in scope of class
178 enum_<ModuleStatistics::EStatisticCounters>("StatisticCounters", R"DOCSTRING(
179Available types of statistic counters (corresponds to Module functions)
180
181.. attribute:: INIT
182
183Time spent or memory used in the `initialize() <Module.initialize>` function
184
185.. attribute:: BEGIN_RUN
186
187Time spent or memory used in the `beginRun() <Module.beginRun>` function
188
189.. attribute:: EVENT
190
191Time spent or memory used in the `event() <Module.event>` function
192
193.. attribute:: END_RUN
194
195Time spent or memory used in the `endRun() <Module.endRun>` function
196
197.. attribute:: TERM
198
199Time spent or memory used in the `terminate() <Module.terminate>` function
200
201.. attribute:: TOTAL
202
203Time spent or memory used in any module function. This is the sum of all of the above.
204
205)DOCSTRING")
206 .value("INIT", ModuleStatistics::c_Init)
207 .value("BEGIN_RUN", ModuleStatistics::c_BeginRun)
208 .value("EVENT", ModuleStatistics::c_Event)
209 .value("END_RUN", ModuleStatistics::c_EndRun)
210 .value("TERM", ModuleStatistics::c_Term)
211 .value("TOTAL", ModuleStatistics::c_Total)
212 .export_values()
213 ;
214
215 {
216 // the overloaded __str__ and __call__ give very confusing signatures so hand-craft doc string.
217 docstring_options custom_options(true, false, false); //userdef, py sigs, c++ sigs
218 stats
220 "Return the event statistics as a string in a human readable form")
222 "Return an html representation of the statistics (used by ipython/jupyter)")
223 .def("__call__", &ProcessStatisticsPython::getModuleStatistics, (bp::arg("counter") = ModuleStatistics::EStatisticCounters::c_Event, bp::arg("modules") = boost::python::list()),
224 R"DOCSTRING(__call__(counter=StatisticCounters.EVENT, modules=None)
225
226Calling the statistics object directly like a function will return a string
227with the execution statistics in human readable form.
228
229Parameters:
230 counter (StatisticCounters): Which counter to use
231 modules (list[Module]): A list of modules to include in the returned string.
232 If omitted the statistics for all modules will be included.
233
234* print the `beginRun() <Module.beginRun>` statistics for all modules:
235
236 >>> print(statistics(statistics.BEGIN_RUN))
237
238* print the total execution times and memory consumption but only for the
239 modules ``module1`` and ``module2``
240
241 >>> print(statistics(statistics.TOTAL, [module1, module2]))
242
243* print the event statistics (default) for only two modules
244
245 >>> print(statistics(modules=[module1, module2]))
246)DOCSTRING")
247 ;
248 }
249
250 //Wrap statistics class. The default boost python docstring signature is way
251 //to noisy for these simple getters so this time we do it ourselves ...
252 docstring_options new_options(true, false, false); //userdef, py sigs, c++ sigs
253 class_<ModuleStatistics>("ModuleStatistics", "Execution statistics for a single module. "
254 "All member functions take exactly one argument to select which "
255 "counter to query which defaults to `StatisticCounters.TOTAL` if omitted.")
256 .add_property("name", make_function(&ModuleStatistics::getName, return_value_policy<copy_const_reference>()),
257 "property to get the name of the module to be displayed in the statistics")
258 .def("time_sum", &ModuleStatistics::getTimeSum, bp::arg("counter") = ModuleStatistics::c_Total,
259 "time_sum(counter=StatisticCounters.TOTAL)\nReturn the sum of all execution times")
260 .def("time_mean", &ModuleStatistics::getTimeMean, bp::arg("counter") = ModuleStatistics::c_Total,
261 "time_mean(counter=StatisticCounters.TOTAL)\nReturn the mean of all execution times")
262 .def("time_stddev", &ModuleStatistics::getTimeStddev, bp::arg("counter") = ModuleStatistics::c_Total,
263 "time_stddev(counter=StatisticCounters.TOTAL)\nReturn the standard deviation of all execution times")
264 .def("memory_sum", &ModuleStatistics::getMemorySum, bp::arg("counter") = ModuleStatistics::c_Total,
265 "memory_sum(counter=StatisticCounters.TOTAL)\nReturn the sum of the total memory usage")
266 .def("memory_mean", &ModuleStatistics::getMemoryMean, bp::arg("counter") = ModuleStatistics::c_Total,
267 "memory_mean(counter=StatisticCounters.TOTAL)\nReturn the mean of the memory usage")
268 .def("memory_stddev", &ModuleStatistics::getMemoryStddev, bp::arg("counter") = ModuleStatistics::c_Total,
269 "memory_stddev(counter=StatisticCounters.TOTAL)\nReturn the standard deviation of the memory usage")
270 .def("time_memory_corr", &ModuleStatistics::getTimeMemoryCorrelation, bp::arg("counter") = ModuleStatistics::c_Total,
271 "time_memory_corr(counter=StatisticCounters.TOTAL)\nReturn the correlaction factor between time and memory consumption")
272 .def("calls", &ModuleStatistics::getCalls, bp::arg("counter") = ModuleStatistics::c_Total,
273 "calls(counter=StatisticCounters.TOTAL)\nReturn the total number of calls")
274 ;
275
276 //Expose ProcessStatisticsPython instance as "statistics" object in pybasf2 module
277 global.attr("statistics") = object(ProcessStatisticsPython());
278}
@ c_Persistent
Object is available during entire execution time.
Definition DataStore.h:60
static Environment & Instance()
Static method to get a reference to the Environment instance.
Keep track of time and memory consumption during processing.
value_type getTimeStddev(EStatisticCounters type=c_Total) const
return the stddev of the execution times for a given counter
value_type getCalls(EStatisticCounters type=c_Total) const
return the number of calls for a given counter type
value_type getTimeMemoryCorrelation(EStatisticCounters type=c_Total) const
return the pearson correlation coefficient between execution times and memory consumption changes
value_type getMemoryStddev(EStatisticCounters type=c_Total) const
return the stddev of the memory consumption changes per call
const std::string & getName() const
Return the previously set name.
value_type getMemoryMean(EStatisticCounters type=c_Total) const
return the average memory change per call
EStatisticCounters
Enum to define all counter types.
@ c_Init
Counting time/calls in initialize()
@ c_EndRun
Counting time/calls in endRun()
@ c_Term
Counting time/calls in terminate()
@ c_BeginRun
Counting time/calls in beginRun()
@ c_Event
Counting time/calls in event()
@ c_Total
Sum of the above.
value_type getMemorySum(EStatisticCounters type=c_Total) const
return the total used memory for a given counter
value_type getTimeSum(EStatisticCounters type=c_Total) const
return the sum of all execution times for a given counter
value_type getTimeMean(EStatisticCounters type=c_Total) const
return the mean execution time for a given counter
std::string getStatisticsStringHTML()
Return string with statistics for all selected modules as html table.
ProcessStatisticsPython getModuleStatistics(ModuleStatistics::EStatisticCounters type, const boost::python::list &modulesPyList)
Get a new statistics object for a different counter/different list of modules.
std::vector< ModuleStatistics > m_modules
Which modules to show.
ProcessStatisticsPython(ModuleStatistics::EStatisticCounters type=ModuleStatistics::c_Event, const std::vector< ModuleStatistics > &modules={})
Construct a new object to show statistics for a given call type.
void csv(const char *filename)
Write statistics to a csv file.
const ModuleStatistics * get(const std::shared_ptr< Module > &module)
Get statistics for given module.
std::string getStatisticsString()
Return string with statistics for all selected modules.
static void exposePythonAPI()
Define python wrappers to make functionality available in python.
ModuleStatistics::EStatisticCounters m_type
Which counter to show when printing the statistics.
ProcessStatistics * getWrapped()
Get wrapped ProcessStatistics object.
void clear()
Clear collected statistics but keep names of modules.
boost::python::list getAll()
Get statistics for all modules as python list.
const ModuleStatistics * getGlobal()
Get statistics for the framework itself.
Class to collect call statistics for all modules.
const ModuleStatistics & getGlobal() const
Get global statistics.
ModuleStatistics & getStatistics(const Module *module)
Get statistics for single module.
std::string getStatisticsString(ModuleStatistics::EStatisticCounters type=ModuleStatistics::c_Event, const std::vector< Belle2::ModuleStatistics > *modules=nullptr, bool html=false) const
Return string with statistics for all modules.
void setStatisticsPrintStatus(ModuleStatistics::EStatisticCounters type, bool printStatus)
Set print status of statistics.
void write_csv(const char *filename="ProcessStatistics.csv") const
Write process statistics to a csv file.
virtual void clear() override
Clear collected statistics but keep names of modules.
Type-safe access to single objects in the data store.
Definition StoreObjPtr.h:96
std::shared_ptr< Module > ModulePtr
Defines a pointer to a module object as a boost shared pointer.
Definition Module.h:43
Scalar convertPythonObject(const boost::python::object &pyObject, Scalar)
Convert from Python to given type.
Abstract base class for different kinds of events.
STL namespace.