Belle II Software  release-06-02-00
b2pandas_utils.py
1 
8 import basf2
9 from ROOT import Belle2
10 import tables
11 import numpy as np
12 import warnings
13 from variables import variables as variable_manager
14 from variables import std_vector
15 
16 """
17 Python uilities to help create or manage ntuples and work with them in pandas
18 """
19 
20 
21 class VariablesToHDF5(basf2.Module):
22  """
23  Dump variables directly to HDF5
24 
25  This Module is the equivalent of VariablesToNtuple but creates an hdf5 file
26  instead of a root file. It is slower as it is implemented in pure python and
27  should currently be considered a proof of concept.
28  """
29 
30  def __init__(self, listname, variables, filename):
31  """Constructor to initialize the internal state
32 
33  Arguments:
34  listname(str): name of the particle list
35  variables(list(str)): list of variables to save for each particle
36  filename(str): name of the hdf5 file to be created
37  """
38  super().__init__()
39 
40  self._filename_filename = filename
41 
42  self._listname_listname = listname
43 
44  self._variables_variables = variables
45 
46  def initialize(self):
47  """Create the hdf5 file and list of variable objects to be used during
48  event processing."""
49 
50  self._varnames_varnames = [str(varname) for varname in variable_manager.resolveCollections(std_vector(*self._variables_variables))]
51 
52  self._var_objects_var_objects = [variable_manager.getVariable(n) for n in self._varnames_varnames]
53 
54 
55  self._evtmeta_evtmeta = Belle2.PyStoreObj("EventMetaData")
56  self._evtmeta_evtmeta.isRequired()
57 
58  self._plist_plist = Belle2.PyStoreObj(self._listname_listname)
59  self._plist_plist.isRequired()
60 
61 
62  self._hdf5file_hdf5file = tables.open_file(self._filename_filename, mode="w", title="Belle2 Variables to HDF5")
63  if not self._hdf5file_hdf5file:
64  basf2.B2ERROR("Cannot create output file")
65  return
66 
67  dtype = [("exp", np.int32), ("run", np.int32), ("evt", np.uint32),
68  ("prod", np.uint32), ("icand", np.uint32), ("ncand", np.uint32)]
69  for name in self._varnames_varnames:
70  # only float variables for now
71  dtype.append((name, np.float64))
72 
73 
74  self._dtype_dtype = dtype
75  filters = tables.Filters(complevel=1, complib='blosc:lz4', fletcher32=False)
76  # some variable names are not just A-Za-z0-9 so pytables complains but
77  # seems to work. Ignore warning
78  with warnings.catch_warnings():
79  warnings.simplefilter("ignore")
80 
81  self._table_table = self._hdf5file_hdf5file.create_table("/", self._listname_listname, obj=np.zeros(0, dtype), filters=filters)
82 
83  def event(self):
84  """Create a new row in the hdf5 file with for each particle in the list"""
85  buf = np.empty(self._plist_plist.getListSize(), dtype=self._dtype_dtype)
86  # add some extra columns for bookkeeping
87  buf["exp"] = self._evtmeta_evtmeta.getExperiment()
88  buf["run"] = self._evtmeta_evtmeta.getRun()
89  buf["evt"] = self._evtmeta_evtmeta.getEvent()
90  buf["prod"] = self._evtmeta_evtmeta.getProduction()
91  buf["ncand"] = len(buf)
92  buf["icand"] = np.arange(len(buf))
93 
94  for row, p in zip(buf, self._plist_plist):
95  for name, v in zip(self._varnames_varnames, self._var_objects_var_objects):
96  # pyroot proxy not working with callables, we should fix this.
97  # For now we need to go back by name and call it.
98  # should be `row[v.name] = v.func(p)`
99  row[name] = variable_manager.evaluate(v.name, p)
100 
101  self._table_table.append(buf)
102 
103  def terminate(self):
104  """save and close the output"""
105  self._table_table.flush()
106  self._hdf5file_hdf5file.close()
107 
108 
109 def make_mcerrors_readable(dataframe, column="mcErrors"):
110  """
111  Take a dataframe containing an column with the output of the :b2:var:`mcErrors`
112  variable from :b2:mod:`VariablesToNTuple` and convert it to a readable set
113  of columns of the form ``{column}_{name}`` where column is the value of the
114  ``column`` argument and ``name`` is one of one of the :ref:`mcmatching`
115  error flags (without the leading 'c_').
116 
117  Arguments:
118  dataframe(pandas.DataFrame): the pandas dataframe containing an ntuple
119  with column containing the output of the mcErrors variable
120  column(str): the name containing the values from the mcErrors variable
121  """
122 
123  if column not in dataframe:
124  raise KeyError(f"Cannot find coulumn '{column}'")
125 
126  # convert mcErrors to int to be able to logical operate on it
127  mcErrors = dataframe[column].astype(int)
128 
129  # and loop over all the c_ constants in the Belle2.MCMatching class
130  for flag in (e for e in dir(Belle2.MCMatching) if e.startswith("c_")):
131  try:
132  value = int(getattr(Belle2.MCMatching, flag))
133  except ValueError:
134  # probably the extraInfo column name, ignore
135  continue
136 
137  # and set the column
138  name = column + flag[1:]
139  if value == 0:
140  dataframe[name] = mcErrors == 0
141  else:
142  dataframe[name] = (mcErrors & value) == value
143 
144 
145 # This is just for testing, no need for doxygen to weirdly document it
146 # @cond
147 if __name__ == "__main__":
148  import modularAnalysis
149 
150  p = basf2.create_path()
151  p.add_module("EventInfoSetter", evtNumList=100)
152  p.add_module("EvtGenInput")
153  modularAnalysis.fillParticleListsFromMC([("pi-:gen", "")], path=p)
154  a = VariablesToHDF5("pi-:gen", ["M", "E", "px", "py", "pz"], "test.hdf5")
155  p.add_module(a)
156  # Process the events
157  basf2.process(p)
158  print(basf2.statistics)
159 # @endcond
a (simplified) python wrapper for StoreObjPtr.
Definition: PyStoreObj.h:67
_plist
Pointer to the particle list.
_var_objects
variable objects for each variable
_listname
Particle list name.
def __init__(self, listname, variables, filename)
static ExpRun getRun(map< ExpRun, pair< double, double >> runs, double t)
Get exp number + run number from time.
Definition: Splitter.cc:264
def fillParticleListsFromMC(decayStringsWithCuts, addDaughters=False, skipNonPrimaryDaughters=False, writeOut=False, path=None)
Functions to perform Monte Carlo matching for reconstructed Particles.
Definition: MCMatching.h:28