Belle II Software development
harvesting.py
1
8
9import copy
10
11import numbers
12import collections
13
14import array
15import numpy as np
16import types
17import functools
18
19import basf2
20import ROOT
21from ROOT import Belle2 # make Belle2 namespace available
22
23from tracking.harvest.refiners import Refiner
24from tracking.root_utils import root_cd, root_browse
25
26import logging
27
28
29def get_logger():
30 """Getter for the logger instance of this file."""
31 return logging.getLogger(__name__)
32
33
34def coroutine(generator_func):
35 """Famous coroutine decorator.
36
37 Starts a receiving generator function to the first yield,
38 such that it can receive a sent call immediately.
39 """
40
41 @functools.wraps(generator_func)
42 def start(*args, **kwargs):
43 cr = generator_func(*args, **kwargs)
44 next(cr)
45 return cr
46 return start
47
48
49def harvest(foreach="", pick=None, name=None, output_file_name=None, show_results=False):
50 """Decorator to turn a function into a HarvestingModule instance.
51
52 The decorated function becomes the peel method of the module.
53 The function is invoked once for each element in the foreach storearray and should return
54 a mapping of names and values extracting the relevant variables from the object in question.
55
56 Parameters
57 ----------
58 foreach : string
59 Name of the StoreArray or StoreObjPtr
60 pick : function(obj) -> bool
61 Function that gets invoked with each object in the foreach StoreArray.
62 It can return a False value, if the object should not be investigated further.
63 name : string
64 Name used in tree abd histrogram names produced in this harvest.
65 output_file_name : string
66 Name of the ROOT output file to be produced.
67
68 Returns
69 -------
70 function decorator
71 A decorator that turns a function into HarvestingModule instance,
72 which peel function is replaced by the decorated function.
73
74 Notes
75 -----
76 An example of the usage pattern can be found at the end of this file
77 """
78
79 def harvest_decorator(peel_func):
80 name_or_default = name or peel_func.__name__
81 output_file_name_or_default = output_file_name or f"{name_or_default}.root"
82 harvesting_module = HarvestingModule(foreach=foreach,
83 output_file_name=output_file_name_or_default,
84 name=name_or_default,
85 show_results=show_results)
86 harvesting_module.peel = peel_func
87 if pick:
88 harvesting_module.pick = pick
89 return harvesting_module
90 return harvest_decorator
91
92
93class HarvestingModule(basf2.Module):
94
95 """Python module to generate summary figures of merits, plots and/or trees
96 from on StoreArray.
97
98 It runs as a proper module in the main path and examines each object in a StoreArray
99 in each event.
100
101
102 Notes
103 -----
104 Methods to be overwritten
105 ``prepare``
106 Method called at the start of each event, that may prepare things
107 (e.g. setup lookup tables or precomputed list) used in the following methods.
108 ``pick``
109 Method called with each object in the StoreArray.
110 Returns a False value if the object should be skipped.
111
112 ``peel``
113 Method called with each object in the StoreArray.
114 Extractes the parts relevant for analysis and
115 returns them as MutableMapping (e.g. a dict) of part_name and values.
116 Currently only float values or values convertible to floats are supported.
117 If requested that can change in the future.
118
119 On termination all the collected values are recasted to numpy arrays and
120 the whole ``crops`` of the harvest are casted to MutableMapping of numpy.array
121 with the same part_name and the same MutableMapping class as returned from peel.
122
123 Also in the termination phase refiners a invoked with the aggregated crops.
124 Refiners can be given in two ways.
125
126 First way is as a class methods marked as refiners like
127
128 @refiners.Refiner
129 def plot(self, crops, tdirectory, **kwds):
130 ...
131
132 where self is the module instance, crops is the MutableMapping of numpy arrays and tdirectory
133 is the current tdirectory to which the current output shall be written.
134 The additional kwds leave room for future additional arguments.
135
136 Second way is to define the refiner method (like plot) out of line and add it to
137 the harvesting module instance refiners list like harvesting_module.refiners.append(plot).
138
139 Other specialised decorators to mark a function as a Refiner such as
140
141 * refiners.filter
142 * refiners.select
143 * refiners.groupby
144
145 exist.
146
147 Predefined refiner functions exist in the refiners python module as well.
148 For instance
149
150 save_tree = refiners.save_tree()
151
152 is a predefined method to output the MutableMapping of numpy arrays as a TTree.
153 """
154
155
156 default_expert_level = 1
157
158 def __init__(self,
159 foreach,
160 output_file_name,
161 name=None,
162 title=None,
163 contact=None,
164 expert_level=None,
165 show_results=False):
166 """Constructor of the harvesting module.
167
168 Parameters
169 ----------
170 foreach : string
171 Name of a StoreArray, which objects should be investigated
172 output_file_name : string
173 Name of the ROOT file to which the results should be written.
174 Giving an opened ROOT file is also allowed.
175 If None is given write to the current ROOT file.
176 name : string, optional
177 Name of the harvest that is used in the names of ROOT plots and trees.
178 Defaults to the class name.
179 title : string, optional
180 Name of the harvest that is used in the title of ROOT plots and trees.
181 Defaults to the name.
182 contact : string, optional
183 Contact email address to be used in the validation plots contact. Defaults to None.
184 expert_level : int, optional
185 Expert level that can be used to switch on more plots.
186 Generally the higher the more detailed to analysis.
187 Meaning depends entirely on the subclass implementing a certain policy.
188 Defaults to default_expert_level.
189 show_results : bool, optional
190 Indicator to show the refined results at termination of the path
191 """
192
193 super().__init__()
194
195
196 self.foreach = foreach
197
198
199 self.output_file_name = output_file_name
200
201 if not isinstance(self.output_file_name, (ROOT.TFile, str)):
202 raise TypeError("output_file_name is allowed to be a string or a ROOT.TFile object")
203
204
205 self.set_name(name or self.__class__.__name__)
206
207
208 self.title = title or self.name()
209
210
211 self.contact = contact
212
213
214 self.expert_level = self.default_expert_level if expert_level is None else expert_level
215
216
218 self.refiners = []
219
220
221 self.show_results = show_results
222
223 @property
224 def id(self):
225 """Working around that name() is a method.
226
227 Exposing the name as a property using a different name
228 """
229 return self.name()
230
231 def initialize(self):
232 """Initialisation method of the module.
233
234 Prepares the receiver stash of objects to be harvestered.
235 """
236
237 self.stash = self.barn()
238
239 def event(self):
240 """Event method of the module
241
242 * Does invoke the prepare method before the iteration starts.
243 * In each event fetch the StoreArray / iterable StoreObjPtr,
244 * Iterate through all instances
245 * Feed each instance to the pick method to decide it the instance is relevant
246 * Forward it to the peel method that should generated a dictionary of values
247 * Store each dictionary of values
248 """
249 self.prepare()
250 stash = self.stash.send
251 pick = self.pick
252 peel = self.peel
253 for crop in self.gather():
254 if pick(crop):
255 crop = peel(crop)
256 if isinstance(crop, types.GeneratorType):
257 many_crops = crop
258 for crop in many_crops:
259 stash(crop)
260 else:
261 stash(crop)
262
263 def terminate(self):
264 """Termination method of the module.
265
266 Finalize the collected crops.
267 Start the refinement.
268 """
269
270 self.stash.close()
271 del self.stash
272
273 try:
274 self.refine(self.crops)
275 except AttributeError:
276 pass
277
278 @staticmethod
280 """Create the storing objects for the crop values
281
282 Currently a numpy.array of doubles is used to store all values in memory.
283 """
284 return array.array("d")
285
286 @coroutine
287 def barn(self):
288 """Coroutine that receives the dictionaries of names and values from peel and store them."""
289 crop = (yield)
290 raw_crops = copy.copy(crop)
291 crops = copy.copy(crop)
292
293 if isinstance(crop, numbers.Number):
294 raw_crops = self.create_crop_part_collection()
295 try:
296 while True:
297 raw_crops.append(crop)
298 # next crop
299 crop = (yield)
300 except GeneratorExit:
301 crops = np.array(raw_crops)
302
303 elif isinstance(crop, collections.abc.MutableMapping):
304 for part_name in crop:
305 raw_crops[part_name] = self.create_crop_part_collection()
306
307 try:
308 while True:
309 for part_name, parts in list(raw_crops.items()):
310 if part_name in crop:
311 parts.append(crop[part_name])
312 else:
313 parts.append(np.NaN)
314 # next crop
315 crop = (yield)
316 except GeneratorExit:
317 for part_name, parts in list(raw_crops.items()):
318 crops[part_name] = np.array(parts)
319
320 else:
321 msg = f"Unrecognised crop {crop} of type {type(crop)}"
322 raise ValueError(msg)
323
324
325 self.raw_crops = raw_crops
326
327 self.crops = crops
328
329 def gather(self):
330 """Iterator that yield the instances form the StoreArray / iterable StoreObj.
331
332 Yields
333 ------
334 Object instances from the StoreArray, iterable StoreObj or the StoreObj itself
335 in case it is not iterable.
336 """
337
338 registered_store_arrays = Belle2.PyStoreArray.list()
339 registered_store_objs = Belle2.PyStoreObj.list()
340
341 foreach = self.foreach
342 foreach_is_store_obj = foreach in registered_store_objs
343 foreach_is_store_array = foreach in registered_store_arrays
344
345 if foreach is not None:
346 if foreach_is_store_array:
347 store_array = Belle2.PyStoreArray(self.foreach)
348 yield from store_array
349
350 elif foreach_is_store_obj:
351 store_obj = Belle2.PyStoreObj(self.foreach)
352 try:
353 yield from self.iter_store_obj(store_obj)
354 except TypeError:
355 # Cannot iter the store object. Yield it instead.
356 yield store_obj.obj()
357
358 else:
359 msg = f"Name {self.foreach} does not refer to a valid object on the data store"
360 raise KeyError(msg)
361 else:
362 yield None
363
364 def prepare(self):
365 """Default implementation of prepare.
366
367 Can be overridden by subclasses.
368 """
369 return
370
371 def peel(self, crop):
372 """Unpack the the instances and return and dictionary of names to values or
373 a generator of those dictionaries to be saved.
374
375 Returns
376 -------
377 dict(str -> float)
378 Unpacked names and values
379 or
380
381 Yields
382 ------
383 dict(str -> float)
384 Unpacked names and values
385
386 """
387 return {"name": np.nan}
388
389 def pick(self, crop):
390 """Indicate whether the instance should be forwarded to the peeling
391
392 Returns
393 -------
394 bool : Indicator if the instance is valuable in the current harvest.
395 """
396 return True
397
398 def refine(self, crops):
399 """Receive the gathered crops and forward them to the refiners."""
400
401 kwds = {}
402 if self.output_file_name:
403 # Save everything to a ROOT file
404 if isinstance(self.output_file_name, ROOT.TFile):
405 output_tdirectory = self.output_file_name
406 else:
407 output_tfile = ROOT.TFile(self.output_file_name, 'recreate')
408 output_tdirectory = output_tfile
409
410 else:
411 output_tdirectory = None
412
413 try:
414 with root_cd(output_tdirectory):
415 for refiner in self.refiners:
416 refiner(self, crops, tdirectory=output_tdirectory, **kwds)
417
418 # Get the methods marked as refiners from the class
419 cls = type(self)
420 for name in dir(cls):
421 if isinstance(getattr(cls, name), Refiner):
422 refiner = getattr(self, name)
423 # Getattr already binds self
424 refiner(crops, tdirectory=output_tdirectory, **kwds)
425
426 finally:
427 # If we opened the TFile ourself, close it again
428 if self.output_file_name:
429 if isinstance(self.output_file_name, str):
430 output_tfile.Close()
431
432 if self.show_results and self.output_file_name:
433 if isinstance(self.output_file_name, str):
434 output_tfile = ROOT.TFile(self.output_file_name)
435 root_browse(output_tfile)
436 input("Press enter to close")
437 output_tfile.Close()
438 else:
439 root_browse(self.output_file_name)
440 input("Press enter to close")
441
442 @staticmethod
443 def iter_store_obj(store_obj):
444 """Obtain a iterator from a StoreObj
445
446 Repeatedly calls iter(store_obj) or store_obj.__iter__()
447 until the final iterator returns itself
448
449 Returns
450 -------
451 iterator of the StoreObj
452 """
453 iterable = store_obj.obj()
454 last_iterable = None
455 while iterable is not last_iterable:
456 if hasattr(iterable, "__iter__"):
457 iterable, last_iterable = iterable.__iter__(), iterable
458 else:
459 iterable, last_iterable = iter(iterable), iterable
460 return iterable
461
462
463def test():
464 """Test a quick analysis of the MCParticles in generic events."""
465 from .refiners import save_histograms, save_tree, save_fom
466
467 def primaries_seen_in_detector(mc_particle):
468 return (mc_particle.hasStatus(Belle2.MCParticle.c_PrimaryParticle) and
469 mc_particle.hasStatus(Belle2.MCParticle.c_StableInGenerator) and
470 not mc_particle.hasStatus(Belle2.MCParticle.c_IsVirtual) and
471 (mc_particle.hasStatus(Belle2.MCParticle.c_LeftDetector) or
472 mc_particle.hasStatus(Belle2.MCParticle.c_StoppedInDetector)))
473
474 # Proposed syntax for quick generation of overview plots
475 @save_fom(aggregation=np.mean, select=["energy", "pt"], name="physics", key="mean_{part_name}")
476 @save_histograms(outlier_z_score=5.0,
477 allow_discrete=True,
478 filter=lambda xs: xs != 0.0,
479 filter_on="is_secondary",
480 select=["pt", "is_secondary"],
481 folder_name="secondary_pt")
482 @save_histograms(outlier_z_score=5.0,
483 allow_discrete=True,
484 groupby="status",
485 select=["is_secondary", "pt"])
486 @save_histograms(outlier_z_score=5.0,
487 allow_discrete=True,
488 select=["is_secondary", "pt"],
489 stackby="is_secondary",
490 folder_name="pt_stackby_is_secondary/nested_test")
491 @save_histograms(outlier_z_score=5.0,
492 allow_discrete=True,
493 select={'pt': '$p_t$'},
494 title="Distribution of p_{t}")
495 @save_tree()
496 @harvest(foreach="MCParticles",
497 pick=primaries_seen_in_detector,
498 output_file_name="MCParticleOverview.root")
499 def MCParticleOverview(mc_particle):
500 momentum = mc_particle.getMomentum()
501 pdg_code = mc_particle.getPDG()
502 secondary_process = mc_particle.getSecondaryPhysicsProcess()
503
504 return dict(
505 # Save divide not throwing an ZeroDivisionError
506 tan_lambda=np.divide(1.0, np.tan(momentum.Theta())),
507 pt=momentum.Rho(),
508 secondary_process=secondary_process,
509 is_secondary=secondary_process != 0,
510 mass=mc_particle.getMass(),
511 status=mc_particle.getStatus(),
512 pdg_mass=ROOT.TDatabasePDG.Instance().GetParticle(pdg_code).Mass(),
513 energy=mc_particle.getEnergy(),
514 pdg_code=pdg_code,
515 )
516
517 from .run import HarvestingRun
518
519 class ExampleHarvestingRun(HarvestingRun):
520 n_events = 100
521
522 def harvesting_module(self):
523 return MCParticleOverview
524
525 ExampleHarvestingRun().configure_and_execute_from_commandline()
526
527
528if __name__ == "__main__":
529 test()
A (simplified) python wrapper for StoreArray.
Definition: PyStoreArray.h:72
static std::vector< std::string > list(DataStore::EDurability durability=DataStore::EDurability::c_Event)
Return list of available arrays for given durability.
Definition: PyStoreArray.cc:28
a (simplified) python wrapper for StoreObjPtr.
Definition: PyStoreObj.h:67
static std::vector< std::string > list(DataStore::EDurability durability=DataStore::EDurability::c_Event)
Return list of available objects for given durability.
Definition: PyStoreObj.cc:28
refiners
A list of additional refiner instances to be executed on top of the refiner methods that are members ...
Definition: harvesting.py:218
foreach
Name of the StoreArray or iterable StoreObjPtr that contains the objects to be harvested.
Definition: harvesting.py:196
stash
stash of the harvested crops (start with those in the barn)
Definition: harvesting.py:237
contact
Contact email address to be displayed on the validation page.
Definition: harvesting.py:211
show_results
Switch to show the result ROOT file in a TBrowser on terminate.
Definition: harvesting.py:221
raw_crops
the dictionaries from peel as a numpy.array of doubles
Definition: harvesting.py:325
output_file_name
Name of the ROOT output file to be generated.
Definition: harvesting.py:199
expert_level
Integer expert level that controls to detail of plots to be generated.
Definition: harvesting.py:214
def __init__(self, foreach, output_file_name, name=None, title=None, contact=None, expert_level=None, show_results=False)
Definition: harvesting.py:165
int default_expert_level
The default value of expert_level if not specified explicitly by the caller.
Definition: harvesting.py:156
Definition: plot.py:1