Belle II Software  release-08-01-10
b2pandas_utils.py
1 
8 
9 import basf2
10 import variables
11 import tables
12 import numpy as np
13 import warnings
14 
15 
16 """
17 Python uilities to help create or manage ntuples and work with them in pandas
18 """
19 
20 
21 class VariablesToHDF5(basf2.Module):
22  """
23  Dump variables directly to HDF5
24 
25  This Module is the equivalent of VariablesToNtuple but creates an hdf5 file
26  instead of a root file. It is slower as it is implemented in pure python and
27  should currently be considered a proof of concept.
28  """
29 
30  def __init__(self, listname, variables, filename):
31  """Constructor to initialize the internal state
32 
33  Arguments:
34  listname(str): name of the particle list
35  variables(list(str)): list of variables to save for each particle
36  filename(str): name of the hdf5 file to be created
37  """
38  super().__init__()
39 
40  self._filename_filename = filename
41 
42  self._listname_listname = listname
43 
44  self._variables_variables = variables
45 
46  def initialize(self):
47  """Create the hdf5 file and list of variable objects to be used during
48  event processing."""
49  # Always avoid the top-level 'import ROOT'.
50  import ROOT # noqa
51 
52  self._varnames_varnames = [
53  str(varname) for varname in variables.variables.resolveCollections(
55  *self._variables_variables))]
56 
57  self._var_objects_var_objects = [variables.variables.getVariable(n) for n in self._varnames_varnames]
58 
59 
60  self._evtmeta_evtmeta = ROOT.Belle2.PyStoreObj("EventMetaData")
61  self._evtmeta_evtmeta.isRequired()
62 
63  self._plist_plist = ROOT.Belle2.PyStoreObj(self._listname_listname)
64  self._plist_plist.isRequired()
65 
66 
67  self._hdf5file_hdf5file = tables.open_file(self._filename_filename, mode="w", title="Belle2 Variables to HDF5")
68  if not self._hdf5file_hdf5file:
69  basf2.B2ERROR("Cannot create output file")
70  return
71 
72  dtype = [("exp", np.int32), ("run", np.int32), ("evt", np.uint32),
73  ("prod", np.uint32), ("icand", np.uint32), ("ncand", np.uint32)]
74  for name in self._varnames_varnames:
75  # only float variables for now
76  dtype.append((name, np.float64))
77 
78 
79  self._dtype_dtype = dtype
80  filters = tables.Filters(complevel=1, complib='blosc:lz4', fletcher32=False)
81  # some variable names are not just A-Za-z0-9 so pytables complains but
82  # seems to work. Ignore warning
83  with warnings.catch_warnings():
84  warnings.simplefilter("ignore")
85 
86  self._table_table = self._hdf5file_hdf5file.create_table("/", self._listname_listname, obj=np.zeros(0, dtype), filters=filters)
87 
88  def event(self):
89  """Create a new row in the hdf5 file with for each particle in the list"""
90  buf = np.empty(self._plist_plist.getListSize(), dtype=self._dtype_dtype)
91  # add some extra columns for bookkeeping
92  buf["exp"] = self._evtmeta_evtmeta.getExperiment()
93  buf["run"] = self._evtmeta_evtmeta.getRun()
94  buf["evt"] = self._evtmeta_evtmeta.getEvent()
95  buf["prod"] = self._evtmeta_evtmeta.getProduction()
96  buf["ncand"] = len(buf)
97  buf["icand"] = np.arange(len(buf))
98 
99  for row, p in zip(buf, self._plist_plist):
100  for name, v in zip(self._varnames_varnames, self._var_objects_var_objects):
101  # pyroot proxy not working with callables, we should fix this.
102  # For now we need to go back by name and call it.
103  # should be `row[v.name] = v.func(p)`
104  row[name] = variables.variables.evaluate(v.name, p)
105 
106  self._table_table.append(buf)
107 
108  def terminate(self):
109  """save and close the output"""
110  self._table_table.flush()
111  self._hdf5file_hdf5file.close()
112  import ROOT
113  ROOT.Belle2.MetadataService.Instance().addHDF5File(self._filename_filename)
114 
115 
116 def make_mcerrors_readable(dataframe, column="mcErrors"):
117  """
118  Take a dataframe containing an column with the output of the :b2:var:`mcErrors`
119  variable from :b2:mod:`VariablesToNTuple` and convert it to a readable set
120  of columns of the form ``{column}_{name}`` where column is the value of the
121  ``column`` argument and ``name`` is one of one of the :ref:`mcmatching`
122  error flags (without the leading 'c_').
123 
124  Arguments:
125  dataframe(pandas.DataFrame): the pandas dataframe containing an ntuple
126  with column containing the output of the mcErrors variable
127  column(str): the name containing the values from the mcErrors variable
128  """
129  # Always avoid the top-level 'import ROOT'.
130  import ROOT # noqa
131 
132  if column not in dataframe:
133  raise KeyError(f"Cannot find coulumn '{column}'")
134 
135  # convert mcErrors to int to be able to logical operate on it
136  mcErrors = dataframe[column].astype(int)
137 
138  # and loop over all the c_ constants in the Belle2.MCMatching class
139  for flag in (e for e in dir(ROOT.Belle2.MCMatching) if e.startswith("c_")):
140  try:
141  value = int(getattr(ROOT.Belle2.MCMatching, flag))
142  except ValueError:
143  # probably the extraInfo column name, ignore
144  continue
145 
146  # and set the column
147  name = column + flag[1:]
148  if value == 0:
149  dataframe[name] = mcErrors == 0
150  else:
151  dataframe[name] = (mcErrors & value) == value
152 
153 
154 # This is just for testing, no need for doxygen to weirdly document it
155 # @cond
156 if __name__ == "__main__":
157  import modularAnalysis
158 
159  p = basf2.create_path()
160  p.add_module("EventInfoSetter", evtNumList=100)
161  p.add_module("EvtGenInput")
162  modularAnalysis.fillParticleListsFromMC([("pi-:gen", "")], path=p)
163  a = VariablesToHDF5("pi-:gen", ["M", "E", "px", "py", "pz"], "test.hdf5")
164  p.add_module(a)
165  # Process the events
166  basf2.process(p)
167  print(basf2.statistics)
168 # @endcond
def std_vector(*args)
Definition: __init__.py:134
_plist
Pointer to the particle list.
_var_objects
variable objects for each variable
_listname
Particle list name.
def __init__(self, listname, variables, filename)
static ExpRun getRun(std::map< ExpRun, std::pair< double, double >> runs, double t)
Get exp number + run number from time.
Definition: Splitter.cc:262
def fillParticleListsFromMC(decayStringsWithCuts, addDaughters=False, skipNonPrimaryDaughters=False, writeOut=False, path=None, skipNonPrimary=False, skipInitial=True)