Belle II Software  release-08-01-10
harvesting.py
1 # -*- coding: utf-8 -*-
2 
3 
10 
11 import copy
12 
13 import numbers
14 import collections
15 
16 import array
17 import numpy as np
18 import types
19 import functools
20 
21 import basf2
22 import ROOT
23 from ROOT import Belle2 # make Belle2 namespace available
24 
25 from tracking.harvest.refiners import Refiner
26 from tracking.root_utils import root_cd, root_browse
27 
28 import logging
29 
30 
31 def get_logger():
32  """Getter for the logger instance of this file."""
33  return logging.getLogger(__name__)
34 
35 
36 def coroutine(generator_func):
37  """Famous coroutine decorator.
38 
39  Starts a receiving generator function to the first yield,
40  such that it can receive a send call immediatly.
41  """
42 
43  @functools.wraps(generator_func)
44  def start(*args, **kwargs):
45  cr = generator_func(*args, **kwargs)
46  next(cr)
47  return cr
48  return start
49 
50 
51 def harvest(foreach="", pick=None, name=None, output_file_name=None, show_results=False):
52  """Decorator to turn a function into a HarvestingModule instance.
53 
54  The decorated function becomes the peel method of the module.
55  The function is invoked once for each element in the foreach storearray and should return
56  a mapping of names and values extracting the relevant variables from the object in question.
57 
58  Parameters
59  ----------
60  foreach : string
61  Name of the StoreArray or StoreObjPtr
62  pick : function(obj) -> bool
63  Function that gets invoked with each object in the foreach StoreArray.
64  It can return a False value, if the object should not be investigated further.
65  name : string
66  Name used in tree abd histrogram names produced in this harvest.
67  output_file_name : string
68  Name of the ROOT output file to be produced.
69 
70  Returns
71  -------
72  function decorator
73  A decorator that turns a function into HarvestingModule instance,
74  which peel function is replaced by the decorated function.
75 
76  Notes
77  -----
78  An example of the usage pattern can be found at the end of this file
79  """
80 
81  def harvest_decorator(peel_func):
82  name_or_default = name or peel_func.__name__
83  output_file_name_or_default = output_file_name or "{}.root".format(name_or_default)
84  harvesting_module = HarvestingModule(foreach=foreach,
85  output_file_name=output_file_name_or_default,
86  name=name_or_default,
87  show_results=show_results)
88  harvesting_module.peel = peel_func
89  if pick:
90  harvesting_module.pick = pick
91  return harvesting_module
92  return harvest_decorator
93 
94 
95 class HarvestingModule(basf2.Module):
96 
97  """Python module to generate summary figures of merits, plots and/or trees
98  from on StoreArray.
99 
100  It runs as a proper module in the main path and examines each object in a StoreArray
101  in each event.
102 
103 
104  Notes
105  -----
106  Methods to be overwritten
107  ``prepare``
108  Method called at the start of each event, that may prepare things
109  (e.g. setup lookup tables or precomputed list) used in the following methods.
110  ``pick``
111  Method called with each object in the StoreArray.
112  Returns a False value if the object should be skipped.
113 
114  ``peel``
115  Method called with each object in the StoreArray.
116  Extractes the parts relevant for analysis and
117  returns them as MutableMapping (e.g. a dict) of part_name and values.
118  Currently only float values or values convertable to floats are supported.
119  If requested that can change in the future.
120 
121  On termination all the collected values are recasted to numpy arrays and
122  the whole ``crops`` of the harvest are casted to MutableMapping of numpy.array
123  with the same part_name and the same MutableMapping class as returned from peel.
124 
125  Also in the termination phase refiners a invoked with the aggregated crops.
126  Refiners can be given in two ways.
127 
128  First way is as a class methods marked as refiners like
129 
130  @refiners.Refiner
131  def plot(self, crops, tdirectory, **kwds):
132  ...
133 
134  where self is the module instance, crops is the MutableMapping of numpy arrays and tdirectory
135  is the current tdirectory to which the current output shall be written.
136  The additional kwds leave room for future additional arguments.
137 
138  Second way is to define the refiner method (like plot) out of line and add it to
139  the harvesting module instance refiners list like harvesting_module.refiners.append(plot).
140 
141  Other specialised decorators to mark a function as a Refiner such as
142 
143  * refiners.filter
144  * refiners.select
145  * refiners.groupby
146 
147  exist.
148 
149  Predefined refiner functions exist in the refiners python module as well.
150  For instance
151 
152  save_tree = refiners.save_tree()
153 
154  is a predefined method to output the MutableMapping of numpy arrays as a TTree.
155  """
156 
157 
158  default_expert_level = 1
159 
160  def __init__(self,
161  foreach,
162  output_file_name,
163  name=None,
164  title=None,
165  contact=None,
166  expert_level=None,
167  show_results=False):
168  """Constructor of the harvesting module.
169 
170  Parameters
171  ----------
172  foreach : string
173  Name of a StoreArray, which objects should be investigated
174  output_file_name : string
175  Name of the ROOT file to which the results should be written.
176  Giving an opened ROOT file is also allowed.
177  If None is given write to the current ROOT file.
178  name : string, optional
179  Name of the harvest that is used in the names of ROOT plots and trees.
180  Defaults to the class name.
181  title : string, optional
182  Name of the harvest that is used in the title of ROOT plots and trees.
183  Defaults to the name.
184  contact : string, optional
185  Contact email adress to be used in the validation plots contact. Defaults to None.
186  expert_level : int, optional
187  Expert level that can be used to switch on more plots.
188  Generally the higher the more detailed to analysis.
189  Meaning depends entirely on the subclass implementing a certain policy.
190  Defaults to default_expert_level.
191  show_results : bool, optional
192  Indicator to show the refined results at termination of the path
193  """
194 
195  super().__init__()
196 
197 
198  self.foreachforeach = foreach
199 
200 
201  self.output_file_nameoutput_file_name = output_file_name
202 
203  if not isinstance(self.output_file_nameoutput_file_name, (ROOT.TFile, str)):
204  raise TypeError("output_file_name is allowed to be a string or a ROOT.TFile object")
205 
206 
207  self.set_name(name or self.__class__.__name__)
208 
209 
210  self.titletitle = title or self.name()
211 
212 
213  self.contactcontact = contact
214 
215 
216  self.expert_levelexpert_level = self.default_expert_leveldefault_expert_level if expert_level is None else expert_level
217 
218 
220  self.refinersrefiners = []
221 
222 
223  self.show_resultsshow_results = show_results
224 
225  @property
226  def id(self):
227  """Working around that name() is a method.
228 
229  Exposing the name as a property using a different name
230  """
231  return self.name()
232 
233  def initialize(self):
234  """Initialisation method of the module.
235 
236  Prepares the receiver stash of objects to be harvestered.
237  """
238 
239  self.stashstash = self.barnbarn()
240 
241  def event(self):
242  """Event method of the module
243 
244  * Does invoke the prepare method before the iteration starts.
245  * In each event fetch the StoreArray / iterable StoreObjPtr,
246  * Iterate through all instances
247  * Feed each instance to the pick method to deside it the instance is relevant
248  * Forward it to the peel method that should generated a dictionary of values
249  * Store each dictionary of values
250  """
251  self.prepareprepare()
252  stash = self.stashstash.send
253  pick = self.pickpick
254  peel = self.peelpeel
255  for crop in self.gathergather():
256  if pick(crop):
257  crop = peel(crop)
258  if isinstance(crop, types.GeneratorType):
259  many_crops = crop
260  for crop in many_crops:
261  stash(crop)
262  else:
263  stash(crop)
264 
265  def terminate(self):
266  """Termination method of the module.
267 
268  Finalize the collected crops.
269  Start the refinement.
270  """
271 
272  self.stashstash.close()
273  del self.stashstash
274 
275  try:
276  self.refinerefine(self.cropscrops)
277  except AttributeError:
278  pass
279 
280  @staticmethod
282  """Create the storing objects for the crop values
283 
284  Currently a numpy.array of doubles is used to store all values in memory.
285  """
286  return array.array("d")
287 
288  @coroutine
289  def barn(self):
290  """Coroutine that receives the dictionaries of names and values from peel and store them."""
291  crop = (yield)
292  raw_crops = copy.copy(crop)
293  crops = copy.copy(crop)
294 
295  if isinstance(crop, numbers.Number):
296  raw_crops = self.create_crop_part_collectioncreate_crop_part_collection()
297  try:
298  while True:
299  raw_crops.append(crop)
300  # next crop
301  crop = (yield)
302  except GeneratorExit:
303  crops = np.array(raw_crops)
304 
305  elif isinstance(crop, collections.MutableMapping):
306  for part_name in crop:
307  raw_crops[part_name] = self.create_crop_part_collectioncreate_crop_part_collection()
308 
309  try:
310  while True:
311  for part_name, parts in list(raw_crops.items()):
312  if part_name in crop:
313  parts.append(crop[part_name])
314  else:
315  parts.append(np.NaN)
316  # next crop
317  crop = (yield)
318  except GeneratorExit:
319  for part_name, parts in list(raw_crops.items()):
320  crops[part_name] = np.array(parts)
321 
322  else:
323  msg = "Unrecognised crop {} of type {}".format(
324  crop,
325  type(crop)
326  )
327  raise ValueError(msg)
328 
329 
330  self.raw_cropsraw_crops = raw_crops
331 
332  self.cropscrops = crops
333 
334  def gather(self):
335  """Iterator that yield the instances form the StoreArray / iterable StoreObj.
336 
337  Yields
338  ------
339  Object instances from the StoreArray, iterable StoreObj or the StoreObj itself
340  in case it is not iterable.
341  """
342 
343  registered_store_arrays = Belle2.PyStoreArray.list()
344  registered_store_objs = Belle2.PyStoreObj.list()
345 
346  foreach = self.foreachforeach
347  foreach_is_store_obj = foreach in registered_store_objs
348  foreach_is_store_array = foreach in registered_store_arrays
349 
350  if foreach is not None:
351  if foreach_is_store_array:
352  store_array = Belle2.PyStoreArray(self.foreachforeach)
353  for crop in store_array:
354  yield crop
355 
356  elif foreach_is_store_obj:
357  store_obj = Belle2.PyStoreObj(self.foreachforeach)
358  try:
359  for crop in self.iter_store_objiter_store_obj(store_obj):
360  yield crop
361  except TypeError:
362  # Cannot iter the store object. Yield it instead.
363  yield store_obj.obj()
364 
365  else:
366  msg = "Name {} does not refer to a valid object on the data store".format(
367  self.foreachforeach
368  )
369  raise KeyError(msg)
370  else:
371  yield None
372 
373  def prepare(self):
374  """Default implementation of prepare.
375 
376  Can be overridden by subclasses.
377  """
378  return
379 
380  def peel(self, crop):
381  """Unpack the the instances and return and dictionary of names to values or
382  a generator of those dictionaries to be saved.
383 
384  Returns
385  -------
386  dict(str -> float)
387  Unpacked names and values
388  or
389 
390  Yields
391  ------
392  dict(str -> float)
393  Unpacked names and values
394 
395  """
396  return {"name": np.nan}
397 
398  def pick(self, crop):
399  """Indicate whether the instance should be forwarded to the peeling
400 
401  Returns
402  -------
403  bool : Indicator if the instance is valueable in the current harverst.
404  """
405  return True
406 
407  def refine(self, crops):
408  """Receive the gathered crops and forward them to the refiners."""
409 
410  kwds = {}
411  if self.output_file_nameoutput_file_name:
412  # Save everything to a ROOT file
413  if isinstance(self.output_file_nameoutput_file_name, ROOT.TFile):
414  output_tdirectory = self.output_file_nameoutput_file_name
415  else:
416  output_tfile = ROOT.TFile(self.output_file_nameoutput_file_name, 'recreate')
417  output_tdirectory = output_tfile
418 
419  else:
420  output_tdirectory = None
421 
422  try:
423  with root_cd(output_tdirectory):
424  for refiner in self.refinersrefiners:
425  refiner(self, crops, tdirectory=output_tdirectory, **kwds)
426 
427  # Get the methods marked as refiners from the class
428  cls = type(self)
429  for name in dir(cls):
430  if isinstance(getattr(cls, name), Refiner):
431  refiner = getattr(self, name)
432  # Getattr already binds self
433  refiner(crops, tdirectory=output_tdirectory, **kwds)
434 
435  finally:
436  # If we opened the TFile ourself, close it again
437  if self.output_file_nameoutput_file_name:
438  if isinstance(self.output_file_nameoutput_file_name, str):
439  output_tfile.Close()
440 
441  if self.show_resultsshow_results and self.output_file_nameoutput_file_name:
442  if isinstance(self.output_file_nameoutput_file_name, str):
443  output_tfile = ROOT.TFile(self.output_file_nameoutput_file_name)
444  root_browse(output_tfile)
445  input("Press enter to close")
446  output_tfile.Close()
447  else:
448  root_browse(self.output_file_nameoutput_file_name)
449  input("Press enter to close")
450 
451  @staticmethod
452  def iter_store_obj(store_obj):
453  """Obtain a iterator from a StoreObj
454 
455  Repeatly calls iter(store_obj) or store_obj.__iter__()
456  until the final iterator returns itself
457 
458  Returns
459  -------
460  iterator of the StoreObj
461  """
462  iterable = store_obj.obj()
463  last_iterable = None
464  while iterable is not last_iterable:
465  if hasattr(iterable, "__iter__"):
466  iterable, last_iterable = iterable.__iter__(), iterable
467  else:
468  iterable, last_iterable = iter(iterable), iterable
469  return iterable
470 
471 
472 def test():
473  """Test a quick analysis of the MCParticles in generic events."""
474  from .refiners import save_histograms, save_tree, save_fom
475 
476  def primaries_seen_in_detector(mc_particle):
477  return (mc_particle.hasStatus(Belle2.MCParticle.c_PrimaryParticle) and
478  mc_particle.hasStatus(Belle2.MCParticle.c_StableInGenerator) and
479  not mc_particle.hasStatus(Belle2.MCParticle.c_IsVirtual) and
480  (mc_particle.hasStatus(Belle2.MCParticle.c_LeftDetector) or
481  mc_particle.hasStatus(Belle2.MCParticle.c_StoppedInDetector)))
482 
483  # Proposed syntax for quick generation of overview plots
484  @save_fom(aggregation=np.mean, select=["energy", "pt"], name="physics", key="mean_{part_name}")
485  @save_histograms(outlier_z_score=5.0, allow_discrete=True, filter=lambda xs: xs != 0.0, filter_on="is_secondary", select=["pt", "is_secondary"], folder_name="secondary_pt")
486  @save_histograms(outlier_z_score=5.0, allow_discrete=True, groupby="status", select=["is_secondary", "pt"])
487  @save_histograms(outlier_z_score=5.0, allow_discrete=True, select=["is_secondary", "pt"], stackby="is_secondary", folder_name="pt_stackby_is_secondary/nested_test")
488  @save_histograms(outlier_z_score=5.0, allow_discrete=True, select={'pt': '$p_t$'}, title="Distribution of p_{t}")
489  @save_tree()
490  @harvest(foreach="MCParticles", pick=primaries_seen_in_detector, output_file_name="MCParticleOverview.root")
491  def MCParticleOverview(mc_particle):
492  momentum = mc_particle.getMomentum()
493  pdg_code = mc_particle.getPDG()
494  secondary_process = mc_particle.getSecondaryPhysicsProcess()
495 
496  return dict(
497  # Save divide not throwing an ZeroDivisionError
498  tan_lambda=np.divide(1.0, np.tan(momentum.Theta())),
499  pt=momentum.Rho(),
500  secondary_process=secondary_process,
501  is_secondary=secondary_process != 0,
502  mass=mc_particle.getMass(),
503  status=mc_particle.getStatus(),
504  pdg_mass=ROOT.TDatabasePDG.Instance().GetParticle(pdg_code).Mass(),
505  energy=mc_particle.getEnergy(),
506  pdg_code=pdg_code,
507  )
508 
509  from .run import HarvestingRun
510 
511  class ExampleHarvestingRun(HarvestingRun):
512  n_events = 100
513 
514  def harvesting_module(self):
515  return MCParticleOverview
516 
517  ExampleHarvestingRun().configure_and_execute_from_commandline()
518 
519 
520 if __name__ == "__main__":
521  test()
522 
A (simplified) python wrapper for StoreArray.
Definition: PyStoreArray.h:72
static std::vector< std::string > list(DataStore::EDurability durability=DataStore::EDurability::c_Event)
Return list of available arrays for given durability.
Definition: PyStoreArray.cc:28
a (simplified) python wrapper for StoreObjPtr.
Definition: PyStoreObj.h:67
static std::vector< std::string > list(DataStore::EDurability durability=DataStore::EDurability::c_Event)
Return list of available objects for given durability.
Definition: PyStoreObj.cc:28
refiners
A list of additional refiner instances to be executed on top of the refiner methods that are members ...
Definition: harvesting.py:220
foreach
Name of the StoreArray or iterable StoreObjPtr that contains the objects to be harvested.
Definition: harvesting.py:198
stash
stash of the harvested crops (start with those in the barn)
Definition: harvesting.py:239
contact
Contact email address to be displayed on the validation page.
Definition: harvesting.py:213
show_results
Switch to show the result ROOT file in a TBrowser on terminate.
Definition: harvesting.py:223
raw_crops
the dictionaries from peel as a numpy.array of doubles
Definition: harvesting.py:330
output_file_name
Name of the ROOT output file to be generated.
Definition: harvesting.py:201
expert_level
Integer expert level that controlls to detail of plots to be generated.
Definition: harvesting.py:216
def __init__(self, foreach, output_file_name, name=None, title=None, contact=None, expert_level=None, show_results=False)
Definition: harvesting.py:167
int default_expert_level
The default value of expert_level if not specified explicitly by the caller.
Definition: harvesting.py:158