17Python utilities to help create or manage ntuples and work with them in pandas
23 Dump variables directly to HDF5
25 This Module is the equivalent of VariablesToNtuple but creates an hdf5 file
26 instead of a root file. It
is slower
as it
is implemented
in pure python
and
27 should currently be considered a proof of concept.
30 def __init__(self, listname, variables, filename):
31 """Constructor to initialize the internal state
34 listname(str): name of the particle list
35 variables(list(str)): list of variables to save for each particle
36 filename(str): name of the hdf5 file to be created
47 """Create the hdf5 file and list of variable objects to be used during
53 str(varname)
for varname
in variables.variables.resolveCollections(
60 self.
_evtmeta = ROOT.Belle2.PyStoreObj(
"EventMetaData")
69 basf2.B2ERROR(
"Cannot create output file")
72 dtype = [(
"exp", np.int32), (
"run", np.int32), (
"evt", np.uint32),
73 (
"prod", np.uint32), (
"icand", np.uint32), (
"ncand", np.uint32)]
76 dtype.append((name, np.float64))
80 filters = tables.Filters(complevel=1, complib=
'blosc:lz4', fletcher32=
False)
83 with warnings.catch_warnings():
84 warnings.simplefilter(
"ignore")
89 """Create a new row in the hdf5 file with for each particle in the list"""
90 buf = np.empty(self.
_plist.getListSize(), dtype=self.
_dtype)
92 buf[
"exp"] = self.
_evtmeta.getExperiment()
94 buf[
"evt"] = self.
_evtmeta.getEvent()
95 buf[
"prod"] = self.
_evtmeta.getProduction()
96 buf[
"ncand"] = len(buf)
97 buf[
"icand"] = np.arange(len(buf))
99 for row, p
in zip(buf, self.
_plist):
104 row[name] = variables.variables.evaluate(v.name, p)
109 """save and close the output"""
113 ROOT.Belle2.MetadataService.Instance().addHDF5File(self.
_filename)
116def make_mcerrors_readable(dataframe, column="mcErrors"):
118 Take a dataframe containing an column with the output of the :b2:var:`mcErrors`
119 variable
from :b2:mod:`VariablesToNTuple`
and convert it to a readable set
120 of columns of the form ``{column}_{name}`` where column
is the value of the
121 ``column`` argument
and ``name``
is one of one of the :ref:`mcmatching`
122 error flags (without the leading
'c_').
125 dataframe(pandas.DataFrame): the pandas dataframe containing an ntuple
126 with column containing the output of the mcErrors variable
127 column(str): the name containing the values
from the mcErrors variable
132 if column
not in dataframe:
133 raise KeyError(f
"Cannot find column '{column}'")
136 mcErrors = dataframe[column].astype(int)
139 for flag
in (e
for e
in dir(ROOT.Belle2.MCMatching)
if e.startswith(
"c_")):
141 value = int(getattr(ROOT.Belle2.MCMatching, flag))
147 name = column + flag[1:]
149 dataframe[name] = mcErrors == 0
151 dataframe[name] = (mcErrors & value) == value
156if __name__ ==
"__main__":
157 import modularAnalysis
159 p = basf2.create_path()
160 p.add_module(
"EventInfoSetter", evtNumList=100)
161 p.add_module(
"EvtGenInput")
163 a =
VariablesToHDF5(
"pi-:gen", [
"M",
"E",
"px",
"py",
"pz"],
"test.hdf5")
167 print(basf2.statistics)
_filename
Output filename.
_variables
List of variables.
_plist
Pointer to the particle list.
_var_objects
variable objects for each variable
_listname
Particle list name.
def __init__(self, listname, variables, filename)
def fillParticleListsFromMC(decayStringsWithCuts, addDaughters=False, skipNonPrimaryDaughters=False, writeOut=False, path=None, skipNonPrimary=False, skipInitial=True)