Belle II Software  release-05-01-25
harvesting.py
1 # -*- coding: utf-8 -*-
2 import copy
3 
4 import numbers
5 import collections
6 
7 import array
8 import numpy as np
9 import types
10 import functools
11 
12 import basf2
13 import ROOT
14 from ROOT import Belle2 # make Belle2 namespace available
15 
16 from .refiners import Refiner
17 from tracking.root_utils import root_cd, root_browse
18 
19 import logging
20 
21 
22 def get_logger():
23  """Getter for the logger instance of this file."""
24  return logging.getLogger(__name__)
25 
26 
27 def coroutine(generator_func):
28  """Famous coroutine decorator.
29 
30  Starts a receiving generator function to the first yield,
31  such that it can receive a send call immediatly.
32  """
33 
34  @functools.wraps(generator_func)
35  def start(*args, **kwargs):
36  cr = generator_func(*args, **kwargs)
37  next(cr)
38  return cr
39  return start
40 
41 
42 def harvest(foreach="", pick=None, name=None, output_file_name=None, show_results=False):
43  """Decorator to turn a function into a HarvestingModule instance.
44 
45  The decorated function becomes the peel method of the module.
46  The function is invoked once for each element in the foreach storearray and should return
47  a mapping of names and values extracting the relevant variables from the object in question.
48 
49  Parameters
50  ----------
51  foreach : string
52  Name of the StoreArray or StoreObjPtr
53  pick : function(obj) -> bool
54  Function that gets invoked with each object in the foreach StoreArray.
55  It can return a False value, if the object should not be investigated further.
56  name : string
57  Name used in tree abd histrogram names produced in this harvest.
58  output_file_name : string
59  Name of the ROOT output file to be produced.
60 
61  Returns
62  -------
63  function decorator
64  A decorator that turns a function into HarvestingModule instance,
65  which peel function is replaced by the decorated function.
66 
67  Notes
68  -----
69  An example of the usage pattern can be found at the end of this file
70  """
71 
72  def harvest_decorator(peel_func):
73  name_or_default = name or peel_func.__name__
74  output_file_name_or_default = output_file_name or "{}.root".format(name_or_default)
75  harvesting_module = HarvestingModule(foreach=foreach,
76  output_file_name=output_file_name_or_default,
77  name=name_or_default,
78  show_results=show_results)
79  harvesting_module.peel = peel_func
80  if pick:
81  harvesting_module.pick = pick
82  return harvesting_module
83  return harvest_decorator
84 
85 
86 class HarvestingModule(basf2.Module):
87 
88  """Python module to generate summary figures of merits, plots and/or trees
89  from on StoreArray.
90 
91  It runs as a proper module in the main path and examines each object in a StoreArray
92  in each event.
93 
94 
95  Notes
96  -----
97  Methods to be overwritten
98  ``prepare``
99  Method called at the start of each event, that may prepare things
100  (e.g. setup lookup tables or precomputed list) used in the following methods.
101  ``pick``
102  Method called with each object in the StoreArray.
103  Returns a False value if the object should be skipped.
104 
105  ``peel``
106  Method called with each object in the StoreArray.
107  Extractes the parts relevant for analysis and
108  returns them as MutableMapping (e.g. a dict) of part_name and values.
109  Currently only float values or values convertable to floats are supported.
110  If requested that can change in the future.
111 
112  On termination all the collected values are recasted to numpy arrays and
113  the whole ``crops`` of the harvest are casted to MutableMapping of numpy.array
114  with the same part_name and the same MutableMapping class as returned from peel.
115 
116  Also in the termination phase refiners a invoked with the aggregated crops.
117  Refiners can be given in two ways.
118 
119  First way is as a class methods marked as refiners like
120 
121  @refiners.Refiner
122  def plot(self, crops, tdirectory, **kwds):
123  ...
124 
125  where self is the module instance, crops is the MutableMapping of numpy arrays and tdirectory
126  is the current tdirectory to which the current output shall be written.
127  The additional kwds leave room for future additional arguments.
128 
129  Second way is to define the refiner method (like plot) out of line and add it to
130  the harvesting module instance refiners list like harvesting_module.refiners.append(plot).
131 
132  Other specialised decorators to mark a function as a Refiner such as
133 
134  * refiners.filter
135  * refiners.select
136  * refiners.groupby
137 
138  exist.
139 
140  Predefined refiner functions exist in the refiners python module as well.
141  For instance
142 
143  save_tree = refiners.save_tree()
144 
145  is a predefined method to output the MutableMapping of numpy arrays as a TTree.
146  """
147 
148 
149  default_expert_level = 1
150 
151  def __init__(self,
152  foreach,
153  output_file_name,
154  name=None,
155  title=None,
156  contact=None,
157  expert_level=None,
158  show_results=False):
159  """Constructor of the harvesting module.
160 
161  Parameters
162  ----------
163  foreach : string
164  Name of a StoreArray, which objects should be investigated
165  output_file_name : string
166  Name of the ROOT file to which the results should be written.
167  Giving an opened ROOT file is also allowed.
168  If None is given write to the current ROOT file.
169  name : string, optional
170  Name of the harvest that is used in the names of ROOT plots and trees.
171  Defaults to the class name.
172  title : string, optional
173  Name of the harvest that is used in the title of ROOT plots and trees.
174  Defaults to the name.
175  contact : string, optional
176  Contact email adress to be used in the validation plots contact. Defaults to None.
177  expert_level : int, optional
178  Expert level that can be used to switch on more plots.
179  Generally the higher the more detailed to analysis.
180  Meaning depends entirely on the subclass implementing a certain policy.
181  Defaults to default_expert_level.
182  show_results : bool, optional
183  Indicator to show the refined results at termination of the path
184  """
185 
186  super().__init__()
187 
188 
189  self.foreach = foreach
190 
191 
192  self.output_file_name = output_file_name
193 
194  if not isinstance(self.output_file_name, (ROOT.TFile, str)):
195  raise TypeError("output_file_name is allowed to be a string or a ROOT.TFile object")
196 
197 
198  self.set_name(name or self.__class__.__name__)
199 
200 
201  self.title = title or self.name()
202 
203 
204  self.contact = contact
205 
206 
207  self.expert_level = self.default_expert_level if expert_level is None else expert_level
208 
209 
211  self.refiners = []
212 
213 
214  self.show_results = show_results
215 
216  @property
217  def id(self):
218  """Working around that name() is a method.
219 
220  Exposing the name as a property using a different name
221  """
222  return self.name()
223 
224  def initialize(self):
225  """Initialisation method of the module.
226 
227  Prepares the receiver stash of objects to be harvestered.
228  """
229 
230  self.stash = self.barn()
231 
232  def event(self):
233  """Event method of the module
234 
235  * Does invoke the prepare method before the iteration starts.
236  * In each event fetch the StoreArray / iterable StoreObjPtr,
237  * Iterate through all instances
238  * Feed each instance to the pick method to deside it the instance is relevant
239  * Forward it to the peel method that should generated a dictionary of values
240  * Store each dictionary of values
241  """
242  self.prepare()
243  stash = self.stash.send
244  pick = self.pick
245  peel = self.peel
246  for crop in self.gather():
247  if pick(crop):
248  crop = peel(crop)
249  if isinstance(crop, types.GeneratorType):
250  many_crops = crop
251  for crop in many_crops:
252  stash(crop)
253  else:
254  stash(crop)
255 
256  def terminate(self):
257  """Termination method of the module.
258 
259  Finalize the collected crops.
260  Start the refinement.
261  """
262 
263  self.stash.close()
264  del self.stash
265 
266  try:
267  self.refine(self.crops)
268  except AttributeError:
269  pass
270 
271  @staticmethod
273  """Create the storing objects for the crop values
274 
275  Currently a numpy.array of doubles is used to store all values in memory.
276  """
277  return array.array("d")
278 
279  @coroutine
280  def barn(self):
281  """Coroutine that receives the dictionaries of names and values from peel and store them."""
282  crop = (yield)
283  raw_crops = copy.copy(crop)
284  crops = copy.copy(crop)
285 
286  if isinstance(crop, numbers.Number):
287  raw_crops = self.create_crop_part_collection()
288  try:
289  while True:
290  raw_crops.append(crop)
291  # next crop
292  crop = (yield)
293  except GeneratorExit:
294  crops = np.array(raw_crops)
295 
296  elif isinstance(crop, collections.MutableMapping):
297  for part_name in crop:
298  raw_crops[part_name] = self.create_crop_part_collection()
299 
300  try:
301  while True:
302  for part_name, parts in list(raw_crops.items()):
303  if part_name in crop:
304  parts.append(crop[part_name])
305  else:
306  parts.append(np.NaN)
307  # next crop
308  crop = (yield)
309  except GeneratorExit:
310  for part_name, parts in list(raw_crops.items()):
311  crops[part_name] = np.array(parts)
312 
313  else:
314  msg = "Unrecognised crop {} of type {}".format(
315  crop,
316  type(crop)
317  )
318  raise ValueError(msg)
319 
320 
321  self.raw_crops = raw_crops
322 
323  self.crops = crops
324 
325  def gather(self):
326  """Iterator that yield the instances form the StoreArray / iterable StoreObj.
327 
328  Yields
329  ------
330  Object instances from the StoreArray, iterable StoreObj or the StoreObj itself
331  in case it is not iterable.
332  """
333 
334  registered_store_arrays = Belle2.PyStoreArray.list()
335  registered_store_objs = Belle2.PyStoreObj.list()
336 
337  foreach = self.foreach
338  foreach_is_store_obj = foreach in registered_store_objs
339  foreach_is_store_array = foreach in registered_store_arrays
340 
341  if foreach is not None:
342  if foreach_is_store_array:
343  store_array = Belle2.PyStoreArray(self.foreach)
344  for crop in store_array:
345  yield crop
346 
347  elif foreach_is_store_obj:
348  store_obj = Belle2.PyStoreObj(self.foreach)
349  try:
350  for crop in self.iter_store_obj(store_obj):
351  yield crop
352  except TypeError:
353  # Cannot iter the store object. Yield it instead.
354  yield store_obj.obj()
355 
356  else:
357  msg = "Name {} does not refer to a valid object on the data store".format(
358  self.foreach
359  )
360  raise KeyError(msg)
361  else:
362  yield None
363 
364  def prepare(self):
365  """Default implementation of prepare.
366 
367  Can be overridden by subclasses.
368  """
369  return
370 
371  def peel(self, crop):
372  """Unpack the the instances and return and dictionary of names to values or
373  a generator of those dictionaries to be saved.
374 
375  Returns
376  -------
377  dict(str -> float)
378  Unpacked names and values
379  or
380 
381  Yields
382  ------
383  dict(str -> float)
384  Unpacked names and values
385 
386  """
387  return {"name": np.nan}
388 
389  def pick(self, crop):
390  """Indicate whether the instance should be forwarded to the peeling
391 
392  Returns
393  -------
394  bool : Indicator if the instance is valueable in the current harverst.
395  """
396  return True
397 
398  def refine(self, crops):
399  """Receive the gathered crops and forward them to the refiners."""
400 
401  kwds = {}
402  if self.output_file_name:
403  # Save everything to a ROOT file
404  if isinstance(self.output_file_name, ROOT.TFile):
405  output_tdirectory = self.output_file_name
406  else:
407  output_tfile = ROOT.TFile(self.output_file_name, 'recreate')
408  output_tdirectory = output_tfile
409 
410  else:
411  output_tdirectory = None
412 
413  try:
414  with root_cd(output_tdirectory) as tdirectory:
415  for refiner in self.refiners:
416  refiner(self, crops, tdirectory=output_tdirectory, **kwds)
417 
418  # Get the methods marked as refiners from the class
419  cls = type(self)
420  for name in dir(cls):
421  if isinstance(getattr(cls, name), Refiner):
422  refiner = getattr(self, name)
423  # Getattr already binds self
424  refiner(crops, tdirectory=output_tdirectory, **kwds)
425 
426  finally:
427  # If we opened the TFile ourself, close it again
428  if self.output_file_name:
429  if isinstance(self.output_file_name, str):
430  output_tfile.Close()
431 
432  if self.show_results and self.output_file_name:
433  if isinstance(self.output_file_name, str):
434  output_tfile = ROOT.TFile(self.output_file_name)
435  root_browse(output_tfile)
436  input("Press enter to close")
437  output_tfile.Close()
438  else:
439  root_browse(self.output_file_name)
440  input("Press enter to close")
441 
442  @staticmethod
443  def iter_store_obj(store_obj):
444  """Obtain a iterator from a StoreObj
445 
446  Repeatly calls iter(store_obj) or store_obj.__iter__()
447  until the final iterator returns itself
448 
449  Returns
450  -------
451  iterator of the StoreObj
452  """
453  iterable = store_obj.obj()
454  last_iterable = None
455  while iterable is not last_iterable:
456  if hasattr(iterable, "__iter__"):
457  iterable, last_iterable = iterable.__iter__(), iterable
458  else:
459  iterable, last_iterable = iter(iterable), iterable
460  return iterable
461 
462 
463 def test():
464  """Test a quick analysis of the MCParticles in generic events."""
465  from tracking.validation.utilities import is_primary, is_stable_in_generator
466  from .refiners import save_histograms, save_tree, save_fom
467 
468  def primaries_seen_in_detector(mc_particle):
469  return (mc_particle.hasStatus(Belle2.MCParticle.c_PrimaryParticle) and
470  mc_particle.hasStatus(Belle2.MCParticle.c_StableInGenerator) and
471  not mc_particle.hasStatus(Belle2.MCParticle.c_IsVirtual) and
472  (mc_particle.hasStatus(Belle2.MCParticle.c_LeftDetector) or
473  mc_particle.hasStatus(Belle2.MCParticle.c_StoppedInDetector)))
474 
475  # Proposed syntax for quick generation of overview plots
476  @save_fom(aggregation=np.mean, select=["energy", "pt"], name="physics", key="mean_{part_name}")
477  @save_histograms(outlier_z_score=5.0,
478  allow_discrete=True,
479  filter=lambda xs: xs != 0.0,
480  filter_on="is_secondary",
481  select=["pt", "is_secondary"],
482  folder_name="secondary_pt")
483  @save_histograms(outlier_z_score=5.0,
484  allow_discrete=True,
485  groupby="status",
486  select=["is_secondary", "pt"])
487  @save_histograms(outlier_z_score=5.0,
488  allow_discrete=True,
489  select=["is_secondary", "pt"],
490  stackby="is_secondary",
491  folder_name="pt_stackby_is_secondary/nested_test")
492  @save_histograms(outlier_z_score=5.0,
493  allow_discrete=True,
494  select={'pt': '$p_t$'},
495  title="Distribution of p_{t}")
496  @save_tree()
497  @harvest(foreach="MCParticles",
498  pick=primaries_seen_in_detector,
499  output_file_name="MCParticleOverview.root")
500  def MCParticleOverview(mc_particle):
501  momentum_tvector3 = mc_particle.getMomentum()
502  pdg_code = mc_particle.getPDG()
503  secondary_process = mc_particle.getSecondaryPhysicsProcess()
504 
505  return dict(
506  # Save divide not throwing an ZeroDivisionError
507  tan_lambda=np.divide(1.0, np.tan(momentum_tvector3.Theta())),
508  pt=momentum_tvector3.Pt(),
509  secondary_process=secondary_process,
510  is_secondary=secondary_process != 0,
511  mass=mc_particle.getMass(),
512  status=mc_particle.getStatus(),
513  pdg_mass=ROOT.TDatabasePDG.Instance().GetParticle(pdg_code).Mass(),
514  energy=mc_particle.getEnergy(),
515  pdg_code=pdg_code,
516  )
517 
518  from .run import HarvestingRun
519 
520  class ExampleHarvestingRun(HarvestingRun):
521  n_events = 100
522 
523  def harvesting_module(self):
524  return MCParticleOverview
525 
526  ExampleHarvestingRun().configure_and_execute_from_commandline()
527 
528 if __name__ == "__main__":
529  test()
tracking.harvest.harvesting.HarvestingModule.output_file_name
output_file_name
Name of the ROOT output file to be generated.
Definition: harvesting.py:185
tracking.harvest.harvesting.HarvestingModule.default_expert_level
int default_expert_level
The default value of expert_level if not specified explicitly by the caller.
Definition: harvesting.py:149
tracking.harvest.harvesting.HarvestingModule.title
title
Name of this harvest.
Definition: harvesting.py:194
tracking.harvest.harvesting.HarvestingModule.crops
crops
the dictionaries from peel
Definition: harvesting.py:323
tracking.harvest.harvesting.HarvestingModule.show_results
show_results
Switch to show the result ROOT file in a TBrowser on terminate.
Definition: harvesting.py:207
tracking.harvest.harvesting.HarvestingModule.foreach
foreach
Name of the StoreArray or iterable StoreObjPtr that contains the objects to be harvested.
Definition: harvesting.py:182
tracking.harvest.harvesting.HarvestingModule.prepare
def prepare(self)
Definition: harvesting.py:364
Belle2::PyStoreObj::list
static std::vector< std::string > list(DataStore::EDurability durability=DataStore::EDurability::c_Event)
Return list of available objects for given durability.
Definition: PyStoreObj.cc:21
Belle2::PyStoreObj
a (simplified) python wrapper for StoreObjPtr.
Definition: PyStoreObj.h:69
Belle2::PyStoreArray::list
static std::vector< std::string > list(DataStore::EDurability durability=DataStore::EDurability::c_Event)
Return list of available arrays for given durability.
Definition: PyStoreArray.cc:21
tracking.harvest.harvesting.HarvestingModule.contact
contact
Contact email address to be displayed on the validation page.
Definition: harvesting.py:197
tracking.harvest.harvesting.HarvestingModule.pick
def pick(self, crop)
Definition: harvesting.py:389
tracking.harvest.harvesting.HarvestingModule.event
def event(self)
Definition: harvesting.py:232
tracking.harvest.harvesting.HarvestingModule.refiners
refiners
A list of additional refiner instances to be executed on top of the refiner methods that are members ...
Definition: harvesting.py:204
tracking.harvest.harvesting.HarvestingModule.barn
def barn(self)
Definition: harvesting.py:280
tracking.harvest.harvesting.HarvestingModule.id
def id(self)
Definition: harvesting.py:217
tracking.harvest.harvesting.HarvestingModule.create_crop_part_collection
def create_crop_part_collection()
Definition: harvesting.py:272
tracking.harvest.harvesting.HarvestingModule.raw_crops
raw_crops
the dictionaries from peel as a numpy.array of doubles
Definition: harvesting.py:321
tracking.harvest.harvesting.HarvestingModule.__init__
def __init__(self, foreach, output_file_name, name=None, title=None, contact=None, expert_level=None, show_results=False)
Definition: harvesting.py:151
tracking.root_utils
Definition: root_utils.py:1
tracking.harvest.harvesting.HarvestingModule.gather
def gather(self)
Definition: harvesting.py:325
tracking.harvest.harvesting.HarvestingModule.terminate
def terminate(self)
Definition: harvesting.py:256
tracking.harvest.harvesting.HarvestingModule.refine
def refine(self, crops)
Definition: harvesting.py:398
tracking.validation.utilities
Definition: utilities.py:1
Belle2::PyStoreArray
a (simplified) python wrapper for StoreArray.
Definition: PyStoreArray.h:58
tracking.harvest.harvesting.HarvestingModule
Definition: harvesting.py:86
tracking.harvest.harvesting.HarvestingModule.expert_level
expert_level
Integer expert level that controlls to detail of plots to be generated.
Definition: harvesting.py:200
tracking.harvest.harvesting.HarvestingModule.iter_store_obj
def iter_store_obj(store_obj)
Definition: harvesting.py:443
tracking.harvest.harvesting.HarvestingModule.initialize
def initialize(self)
Definition: harvesting.py:224
tracking.harvest.harvesting.HarvestingModule.peel
def peel(self, crop)
Definition: harvesting.py:371
tracking.harvest.harvesting.HarvestingModule.stash
stash
stash of the harvested crops (start with those in the barn)
Definition: harvesting.py:230