2 from ROOT
import Belle2
6 from variables
import variables
as variable_manager
7 from variables
import std_vector
10 Python uilities to help create or manage ntuples and work with them in pandas
16 Dump variables directly to HDF5
18 This Module is the equivalent of VariablesToNtuple but creates an hdf5 file
19 instead of a root file. It is slower as it is implemented in pure python and
20 should currently be considered a proof of concept.
22 def __init__(self, listname, variables, filename):
23 """Constructor to initialize the internal state
26 listname(str): name of the particle list
27 variables(list(str)): list of variables to save for each particle
28 filename(str): name of the hdf5 file to be created
39 """Create the hdf5 file and list of variable objects to be used during
42 varnames = variable_manager.resolveCollections(std_vector(*self.
_variables))
44 self.
_var_objects = [variable_manager.getVariable(n)
for n
in varnames]
56 basf2.B2ERROR(
"Cannot create output file")
59 dtype = [(
"exp", np.int32), (
"run", np.int32), (
"evt", np.uint32), (
"icand", np.uint32), (
"ncand", np.uint32)]
62 dtype.append((v.name, np.float64))
66 filters = tables.Filters(complevel=1, complib=
'blosc:lz4', fletcher32=
False)
69 with warnings.catch_warnings():
70 warnings.simplefilter(
"ignore")
75 """Create a new row in the hdf5 file with for each particle in the list"""
76 buf = np.empty(self.
_plist.getListSize(), dtype=self.
_dtype)
78 buf[
"exp"] = self.
_evtmeta.getExperiment()
80 buf[
"evt"] = self.
_evtmeta.getEvent()
81 buf[
"ncand"] = len(buf)
82 buf[
"icand"] = np.arange(len(buf))
84 for row, p
in zip(buf, self.
_plist):
89 row[v.name] = variable_manager.evaluate(v.name, p)
94 """save and close the output"""
99 def make_mcerrors_readable(dataframe, column="mcErrors"):
101 Take a dataframe containing an column with the output of the :b2:var:`mcErrors`
102 variable from :b2:mod:`VariablesToNTuple` and convert it to a readable set
103 of columns of the form ``{column}_{name}`` where column is the value of the
104 ``column`` argument and ``name`` is one of one of the :ref:`mcmatching`
105 error flags (without the leading 'c_').
108 dataframe(pandas.DataFrame): the pandas dataframe containing an ntuple
109 with column containing the output of the mcErrors variable
110 column(str): the name containing the values from the mcErrors variable
113 if column
not in dataframe:
114 raise KeyError(f
"Cannot find coulumn '{column}'")
117 mcErrors = dataframe[column].astype(int)
128 name = column + flag[1:]
130 dataframe[name] = mcErrors == 0
132 dataframe[name] = (mcErrors & value) == value
137 if __name__ ==
"__main__":
138 import modularAnalysis
140 p = basf2.create_path()
141 p.add_module(
"EventInfoSetter", evtNumList=100)
142 p.add_module(
"EvtGenInput")
144 a =
VariablesToHDF5(
"pi-:gen", [
"M",
"E",
"px",
"py",
"pz"],
"test.hdf5")
148 print(basf2.statistics)