Belle II Software light-2406-ragdoll
ProcessStatisticsPython.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#include <boost/python.hpp>
10#include <framework/pybasf2/ProcessStatisticsPython.h>
11#include <framework/core/Module.h>
12#include <framework/core/Environment.h>
13#include <framework/datastore/StoreObjPtr.h>
14#include <framework/logging/Logger.h>
15#include <framework/core/PyObjConvUtils.h>
16
17using namespace Belle2;
18using namespace std;
19using namespace boost::python;
20
22{
24 if (!stats) {
25 if (!Environment::Instance().getDryRun()) {
26 B2ERROR("ProcessStatistics data object is not available, you either disabled statistics with --no-stats or didn't run process(path) yet.");
27 }
28 return nullptr;
29 }
30 return &(*stats);
31}
32
34{
35 if (!getWrapped())
36 return "";
37 return getWrapped()->getStatisticsString(m_type, m_modules.empty() ? nullptr : &m_modules);
38}
39
41{
42 if (!getWrapped())
43 return "";
44 return getWrapped()->getStatisticsString(m_type, m_modules.empty() ? nullptr : &m_modules, true);
45}
46
48 const boost::python::list& modulesPyList)
49{
50 if (!getWrapped())
52
53 std::vector<ModuleStatistics> moduleStats;
54 auto modules = PyObjConvUtils::convertPythonObject(modulesPyList, std::vector<ModulePtr>());
55 for (const ModulePtr& ptr : modules) {
56 ModuleStatistics& stats = getWrapped()->getStatistics(ptr.get());
57 //Name could be empty if module has never been called
58 if (stats.getName().empty()) stats.setName(ptr->getName());
59 moduleStats.push_back(stats);
60 }
61 return ProcessStatisticsPython(type, moduleStats);
62}
63
65{
66 boost::python::list result;
67 if (!getWrapped())
68 return result;
69 for (auto& module : (m_modules.empty()) ? getWrapped()->getAll() : m_modules) {
70 result.append(module);
71 }
72 return result;
73}
74
76{
77 if (!getWrapped())
78 return nullptr;
79 return &getWrapped()->getStatistics(module.get());
80}
81
83{
84 if (!getWrapped())
85 return nullptr;
86 return &getWrapped()->getGlobal();
87
88}
90{
91 if (!getWrapped())
92 return;
93 getWrapped()->clear();
94}
95
96
98{
99 // to avoid confusion between std::arg and boost::python::arg we want a shorthand namespace as well
100 namespace bp = boost::python;
101
102 //Reference to global scope
103 scope global;
104
105 docstring_options options(true, true, false); //userdef, py sigs, c++ sigs
106
107 //Wrap ProcessStatisticsPython as non-copy and non-instantiable in python
108 class_<ProcessStatisticsPython> stats("ProcessStatistics", R"DOCSTRING(
109Interface for retrieving statistics about module execution at runtime or after
110:py:func:`basf2.process()` returns. Should be accessed through a global instance `basf2.statistics`.
111
112Statistics for `event() <Module.event()>` calls are available as a string representation of the object:
113
114>>> from basf2 import statistics
115>>> print(statistics)
116=================================================================================
117Name | Calls | Memory(MB) | Time(s) | Time(ms)/Call
118=================================================================================
119RootInput | 101 | 0 | 0.01 | 0.05 +- 0.02
120RootOutput | 100 | 0 | 0.02 | 0.20 +- 0.87
121ProgressBar | 100 | 0 | 0.00 | 0.00 +- 0.00
122=================================================================================
123Total | 101 | 0 | 0.03 | 0.26 +- 0.86
124=================================================================================
125
126This provides information on the number of calls, elapsed time, and the average
127difference in resident memory before and after the `event() <Module.event>` call.
128
129.. note::
130
131 The module responsible for reading (or generating) events usually has one
132 additional event() call which is used to determine whether event processing
133 should stop.
134
135.. warning::
136
137 Memory consumption is reporting the difference in memory usage as reported
138 by the kernel before and after the call. This is not the maximum memory the
139 module has consumed. Negative values indicate that this module has freed
140 momemory which was allocated in other modules or function calls.
141
142Information on other calls like `initialize() <Module.initialize>`,
143`terminate() <Module.terminate>`, etc. are also available through the different
144counters defined in `StatisticCounters`:
145
146>>> print(statistics(statistics.INIT))
147>>> print(statistics(statistics.BEGIN_RUN))
148>>> print(statistics(statistics.END_RUN))
149>>> print(statistics(statistics.TERM))
150)DOCSTRING", no_init);
151
152 stats
153 .def("get", &ProcessStatisticsPython::get, return_value_policy<reference_existing_object>(), bp::arg("module"),
154 "Get `ModuleStatistics` for given Module.")
155 .def("get_global", &ProcessStatisticsPython::getGlobal, return_value_policy<reference_existing_object>(),
156 "Get global `ModuleStatistics` containing total elapsed time etc.")
157 .def("clear", &ProcessStatisticsPython::clear, "Clear collected statistics but keep names of modules")
158 .def_readonly("modules", &ProcessStatisticsPython::getAll, "List of all `ModuleStatistics` objects.")
159 ;
160
161 //Set scope to current class
162 scope statistics{stats};
163 //Define enum for all the counter types in scope of class
164 enum_<ModuleStatistics::EStatisticCounters>("StatisticCounters", R"DOCSTRING(
165Available types of statistic counters (corresponds to Module functions)
166
167.. attribute:: INIT
168
169Time spent or memory used in the `initialize() <Module.initialize>` function
170
171.. attribute:: BEGIN_RUN
172
173Time spent or memory used in the `beginRun() <Module.beginRun>` function
174
175.. attribute:: EVENT
176
177Time spent or memory used in the `event() <Module.event>` function
178
179.. attribute:: END_RUN
180
181Time spent or memory used in the `endRun() <Module.endRun>` function
182
183.. attribute:: TERM
184
185Time spent or memory used in the `terminate() <Module.terminate>` function
186
187.. attribute:: TOTAL
188
189Time spent or memory used in any module function. This is the sum of all of the above.
190
191)DOCSTRING")
192 .value("INIT", ModuleStatistics::c_Init)
193 .value("BEGIN_RUN", ModuleStatistics::c_BeginRun)
194 .value("EVENT", ModuleStatistics::c_Event)
195 .value("END_RUN", ModuleStatistics::c_EndRun)
196 .value("TERM", ModuleStatistics::c_Term)
197 .value("TOTAL", ModuleStatistics::c_Total)
198 .export_values()
199 ;
200
201 {
202 // the overloaded __str__ and __call__ give very confusing signatures so hand-craft doc string.
203 docstring_options custom_options(true, false, false); //userdef, py sigs, c++ sigs
204 stats
206 "Return the event statistics as a string in a human readable form")
208 "Return an html represenation of the statistics (used by ipython/jupyter)")
209 .def("__call__", &ProcessStatisticsPython::getModuleStatistics, (bp::arg("counter") = ModuleStatistics::EStatisticCounters::c_Event, bp::arg("modules") = boost::python::list()),
210 R"DOCSTRING(__call__(counter=StatisticCounters.EVENT, modules=None)
211
212Calling the statistics object directly like a function will return a string
213with the execution statistics in human readable form.
214
215Parameters:
216 counter (StatisticCounters): Which counter to use
217 modules (list[Module]): A list of modules to include in the returned string.
218 If omitted the statistics for all modules will be included.
219
220* print the `beginRun() <Module.beginRun>` statistics for all modules:
221
222 >>> print(statistics(statistics.BEGIN_RUN))
223
224* print the total execution times and memory consumption but only for the
225 modules ``module1`` and ``module2``
226
227 >>> print(statistics(statistics.TOTAL, [module1, module2]))
228
229* print the event statistics (default) for only two modules
230
231 >>> print(statistics(modules=[module1, module2]))
232)DOCSTRING")
233 ;
234 }
235
236 //Wrap statistics class. The default boost python docstring signature is way
237 //to noisy for these simple getters so this time we do it ourselves ...
238 docstring_options new_options(true, false, false); //userdef, py sigs, c++ sigs
239 class_<ModuleStatistics>("ModuleStatistics", "Execution statistics for a single module. "
240 "All member functions take exactly one argument to select which "
241 "counter to query which defaults to `StatisticCounters.TOTAL` if omitted.")
242 .add_property("name", make_function(&ModuleStatistics::getName, return_value_policy<copy_const_reference>()),
243 "property to get the name of the module to be displayed in the statistics")
244 .def("time_sum", &ModuleStatistics::getTimeSum, bp::arg("counter") = ModuleStatistics::c_Total,
245 "time_sum(counter=StatisticCounters.TOTAL)\nReturn the sum of all execution times")
246 .def("time_mean", &ModuleStatistics::getTimeMean, bp::arg("counter") = ModuleStatistics::c_Total,
247 "time_mean(counter=StatisticCounters.TOTAL)\nReturn the mean of all execution times")
248 .def("time_stddev", &ModuleStatistics::getTimeStddev, bp::arg("counter") = ModuleStatistics::c_Total,
249 "time_stddev(counter=StatisticCounters.TOTAL)\nReturn the standard deviation of all execution times")
250 .def("memory_sum", &ModuleStatistics::getMemorySum, bp::arg("counter") = ModuleStatistics::c_Total,
251 "memory_sum(counter=StatisticCounters.TOTAL)\nReturn the sum of the total memory usage")
252 .def("memory_mean", &ModuleStatistics::getMemoryMean, bp::arg("counter") = ModuleStatistics::c_Total,
253 "memory_mean(counter=StatisticCounters.TOTAL)\nReturn the mean of the memory usage")
254 .def("memory_stddev", &ModuleStatistics::getMemoryStddev, bp::arg("counter") = ModuleStatistics::c_Total,
255 "memory_stddev(counter=StatisticCounters.TOTAL)\nReturn the standard deviation of the memory usage")
256 .def("time_memory_corr", &ModuleStatistics::getTimeMemoryCorrelation, bp::arg("counter") = ModuleStatistics::c_Total,
257 "time_memory_corr(counter=StatisticCounters.TOTAL)\nReturn the correlaction factor between time and memory consumption")
258 .def("calls", &ModuleStatistics::getCalls, bp::arg("counter") = ModuleStatistics::c_Total,
259 "calls(counter=StatisticCounters.TOTAL)\nReturn the total number of calls")
260 ;
261
262 //Expose ProcessStatisticsPython instance as "statistics" object in pybasf2 module
263 global.attr("statistics") = object(ProcessStatisticsPython());
264}
@ c_Persistent
Object is available during entire execution time.
Definition: DataStore.h:60
static Environment & Instance()
Static method to get a reference to the Environment instance.
Definition: Environment.cc:28
Keep track of time and memory consumption during processing.
value_type getTimeStddev(EStatisticCounters type=c_Total) const
return the stddev of the execution times for a given counter
value_type getCalls(EStatisticCounters type=c_Total) const
return the number of calls for a given counter type
value_type getTimeMemoryCorrelation(EStatisticCounters type=c_Total) const
return the pearson correlation coefficient between execution times and memory consumption changes
value_type getMemoryStddev(EStatisticCounters type=c_Total) const
return the stddev of the memory consumption changes per call
const std::string & getName() const
Return the previously set name.
value_type getMemoryMean(EStatisticCounters type=c_Total) const
return the average memory change per call
EStatisticCounters
Enum to define all counter types.
@ c_Init
Counting time/calls in initialize()
@ c_EndRun
Counting time/calls in endRun()
@ c_Term
Counting time/calls in terminate()
@ c_BeginRun
Counting time/calls in beginRun()
@ c_Event
Counting time/calls in event()
@ c_Total
Sum of the above.
value_type getMemorySum(EStatisticCounters type=c_Total) const
return the total used memory for a given counter
value_type getTimeSum(EStatisticCounters type=c_Total) const
return the sum of all execution times for a given counter
value_type getTimeMean(EStatisticCounters type=c_Total) const
return the mean execution time for a given counter
Python interface for ProcessStatistics.
std::string getStatisticsStringHTML()
Return string with statistics for all selected modules as html table.
ProcessStatisticsPython getModuleStatistics(ModuleStatistics::EStatisticCounters type, const boost::python::list &modulesPyList)
Get a new statistics object for a different counter/different list of modules.
std::vector< ModuleStatistics > m_modules
Which modules to show.
const ModuleStatistics * get(const std::shared_ptr< Module > &module)
Get statistics for given module.
std::string getStatisticsString()
Return string with statistics for all selected modules.
static void exposePythonAPI()
Define python wrappers to make functionality avaiable in python.
ModuleStatistics::EStatisticCounters m_type
Which counter to show when printing the statistics.
ProcessStatistics * getWrapped()
Get wrapped ProcessStatistics object.
void clear()
Clear collected statistics but keep names of modules.
boost::python::list getAll()
Get statistics for all modules as python list.
const ModuleStatistics * getGlobal()
Get statistics for the framework itself.
Class to collect call statistics for all modules.
const ModuleStatistics & getGlobal() const
Get global statistics.
ModuleStatistics & getStatistics(const Module *module)
Get statistics for single module.
std::string getStatisticsString(ModuleStatistics::EStatisticCounters type=ModuleStatistics::c_Event, const std::vector< Belle2::ModuleStatistics > *modules=nullptr, bool html=false) const
Return string with statistics for all modules.
virtual void clear() override
Clear collected statistics but keep names of modules.
Type-safe access to single objects in the data store.
Definition: StoreObjPtr.h:96
std::shared_ptr< Module > ModulePtr
Defines a pointer to a module object as a boost shared pointer.
Definition: Module.h:43
Scalar convertPythonObject(const boost::python::object &pyObject, Scalar)
Convert from Python to given type.
Abstract base class for different kinds of events.
Definition: ClusterUtils.h:24
STL namespace.