release-06-02-00/doxygen/b2pandas__utils_8py_source.html

 import basf2

 from ROOT import Belle2

 import tables

 import numpy as np

 import warnings

 from variables import variables as variable_manager

 from variables import std_vector


 """

 Python uilities to help create or manage ntuples and work with them in pandas

 """


 class VariablesToHDF5(basf2.Module):

     """

     Dump variables directly to HDF5


     This Module is the equivalent of VariablesToNtuple but creates an hdf5 file

     instead of a root file. It is slower as it is implemented in pure python and

     should currently be considered a proof of concept.

     """


     def __init__(self, listname, variables, filename):

         """Constructor to initialize the internal state


         Arguments:

             listname(str): name of the particle list

             variables(list(str)): list of variables to save for each particle

             filename(str): name of the hdf5 file to be created

         """

         super().__init__()


         self._filename_filename = filename


         self._listname_listname = listname


         self._variables_variables = variables


     def initialize(self):

         """Create the hdf5 file and list of variable objects to be used during

         event processing."""


         self._varnames_varnames = [str(varname) for varname in variable_manager.resolveCollections(std_vector(*self._variables_variables))]


         self._var_objects_var_objects = [variable_manager.getVariable(n) for n in self._varnames_varnames]


         self._evtmeta_evtmeta = Belle2.PyStoreObj("EventMetaData")

         self._evtmeta_evtmeta.isRequired()


         self._plist_plist = Belle2.PyStoreObj(self._listname_listname)

         self._plist_plist.isRequired()


         self._hdf5file_hdf5file = tables.open_file(self._filename_filename, mode="w", title="Belle2 Variables to HDF5")

         if not self._hdf5file_hdf5file:

             basf2.B2ERROR("Cannot create output file")

             return


         dtype = [("exp", np.int32), ("run", np.int32), ("evt", np.uint32),

                  ("prod", np.uint32), ("icand", np.uint32), ("ncand", np.uint32)]

         for name in self._varnames_varnames:

             # only float variables for now

             dtype.append((name, np.float64))


         self._dtype_dtype = dtype

         filters = tables.Filters(complevel=1, complib='blosc:lz4', fletcher32=False)

         # some variable names are not just A-Za-z0-9 so pytables complains but

         # seems to work. Ignore warning

         with warnings.catch_warnings():

             warnings.simplefilter("ignore")


             self._table_table = self._hdf5file_hdf5file.create_table("/", self._listname_listname, obj=np.zeros(0, dtype), filters=filters)


     def event(self):

         """Create a new row in the hdf5 file with for each particle in the list"""

         buf = np.empty(self._plist_plist.getListSize(), dtype=self._dtype_dtype)

         # add some extra columns for bookkeeping

         buf["exp"] = self._evtmeta_evtmeta.getExperiment()

         buf["run"] = self._evtmeta_evtmeta.getRun()

         buf["evt"] = self._evtmeta_evtmeta.getEvent()

         buf["prod"] = self._evtmeta_evtmeta.getProduction()

         buf["ncand"] = len(buf)

         buf["icand"] = np.arange(len(buf))


         for row, p in zip(buf, self._plist_plist):

             for name, v in zip(self._varnames_varnames, self._var_objects_var_objects):

                 # pyroot proxy not working with callables, we should fix this.

                 # For now we need to go back by name and call it.

                 # should be `row[v.name] = v.func(p)`

                 row[name] = variable_manager.evaluate(v.name, p)


         self._table_table.append(buf)


     def terminate(self):

         """save and close the output"""

         self._table_table.flush()

         self._hdf5file_hdf5file.close()


 def make_mcerrors_readable(dataframe, column="mcErrors"):

     """

     Take a dataframe containing an column with the output of the :b2:var:`mcErrors`

     variable from :b2:mod:`VariablesToNTuple` and convert it to a readable set

     of columns of the form ``{column}_{name}`` where column is the value of the

     ``column`` argument and ``name`` is one of one of the :ref:`mcmatching`

     error flags (without the leading 'c_').


     Arguments:

         dataframe(pandas.DataFrame): the pandas dataframe containing an ntuple

                 with column containing the output of the  mcErrors variable

         column(str): the name containing the values from the mcErrors variable

     """


     if column not in dataframe:

         raise KeyError(f"Cannot find coulumn '{column}'")


     # convert mcErrors to int to be able to logical operate on it

     mcErrors = dataframe[column].astype(int)


     # and loop over all the c_ constants in the Belle2.MCMatching class

     for flag in (e for e in dir(Belle2.MCMatching) if e.startswith("c_")):

         try:

             value = int(getattr(Belle2.MCMatching, flag))

         except ValueError:

             # probably the extraInfo column name, ignore

             continue


         # and set the column

         name = column + flag[1:]

         if value == 0:

             dataframe[name] = mcErrors == 0

         else:

             dataframe[name] = (mcErrors & value) == value


 # This is just for testing, no need for doxygen to weirdly document it

 # @cond

 if __name__ == "__main__":

     import modularAnalysis


     p = basf2.create_path()

     p.add_module("EventInfoSetter", evtNumList=100)

     p.add_module("EvtGenInput")

     modularAnalysis.fillParticleListsFromMC([("pi-:gen", "")], path=p)

     a = VariablesToHDF5("pi-:gen", ["M", "E", "px", "py", "pz"], "test.hdf5")

     p.add_module(a)

     # Process the events

     basf2.process(p)

     print(basf2.statistics)

 # @endcond

Belle2::PyStoreObj
a (simplified) python wrapper for StoreObjPtr.
Definition: PyStoreObj.h:67

b2pandas_utils.VariablesToHDF5
Definition: b2pandas_utils.py:21

b2pandas_utils.VariablesToHDF5._evtmeta
_evtmeta
Event metadata.
Definition: b2pandas_utils.py:55

b2pandas_utils.VariablesToHDF5.terminate
def terminate(self)
Definition: b2pandas_utils.py:103

b2pandas_utils.VariablesToHDF5._varnames
_varnames
variable names
Definition: b2pandas_utils.py:50

b2pandas_utils.VariablesToHDF5._filename
_filename
Output filename.
Definition: b2pandas_utils.py:40

b2pandas_utils.VariablesToHDF5._variables
_variables
List of variables.
Definition: b2pandas_utils.py:44

b2pandas_utils.VariablesToHDF5._dtype
_dtype
The data type.
Definition: b2pandas_utils.py:74

b2pandas_utils.VariablesToHDF5._plist
_plist
Pointer to the particle list.
Definition: b2pandas_utils.py:58

b2pandas_utils.VariablesToHDF5._hdf5file
_hdf5file
The hdf5 file.
Definition: b2pandas_utils.py:62

b2pandas_utils.VariablesToHDF5._var_objects
_var_objects
variable objects for each variable
Definition: b2pandas_utils.py:52

b2pandas_utils.VariablesToHDF5._table
_table
The pytable.
Definition: b2pandas_utils.py:81

b2pandas_utils.VariablesToHDF5._listname
_listname
Particle list name.
Definition: b2pandas_utils.py:42

b2pandas_utils.VariablesToHDF5.initialize
def initialize(self)
Definition: b2pandas_utils.py:46

b2pandas_utils.VariablesToHDF5.__init__
def __init__(self, listname, variables, filename)
Definition: b2pandas_utils.py:30

b2pandas_utils.VariablesToHDF5.event
def event(self)
Definition: b2pandas_utils.py:83

Belle2::getRun
static ExpRun getRun(map< ExpRun, pair< double, double >> runs, double t)
Get exp number + run number from time.
Definition: Splitter.cc:264

modularAnalysis.fillParticleListsFromMC
def fillParticleListsFromMC(decayStringsWithCuts, addDaughters=False, skipNonPrimaryDaughters=False, writeOut=False, path=None)
Definition: modularAnalysis.py:1153

Belle2::MCMatching
Functions to perform Monte Carlo matching for reconstructed Particles.
Definition: MCMatching.h:28