Belle II Software development
VariablesToTable Class Reference
Inheritance diagram for VariablesToTable:
VariablesToHDF5

Public Member Functions

def __init__ (self, str listname, List[str] variables, str filename, str format)
 
def initialize (self)
 
def initialize_parquet_writer (self)
 
def initialize_csv_writer (self)
 
def initialize_hdf5_writer (self)
 
def fill_buffer (self)
 
def event (self)
 
def terminate (self)
 

Protected Attributes

 _filename
 Output filename.
 
 _listname
 Particle list name.
 
 _variables
 List of variables.
 
 _format
 Output format.
 
 _varnames
 variable names
 
 _var_objects
 variable objects for each variable
 
 _evtmeta
 Event metadata.
 
 _plist
 Pointer to the particle list.
 
 _dtypes
 The data type.
 
 _schema
 A list of tuples and py.DataTypes to define the pyarrow schema.
 
 _parquet_writer
 a writer object to write data into a parquet file
 
 _csv_writer
 a writer object to write data into a csv file
 
 _hdf5_writer
 The pytable file.
 
 _table
 The pytable.
 

Detailed Description

Base class to dump ntuples into a non root format of your choosing

Definition at line 37 of file b2pandas_utils.py.

Constructor & Destructor Documentation

◆ __init__()

def __init__ (   self,
str  listname,
List[str]  variables,
str  filename,
str  format 
)
Constructor to initialize the internal state

Arguments:
    listname(str): name of the particle list
    variables(list(str)): list of variables to save for each particle
    filename(str): name of the output file to be created
    format(str): format of the output file, one of 'hdf5', 'parquet', 'csv'

Reimplemented in VariablesToHDF5.

Definition at line 42 of file b2pandas_utils.py.

42 def __init__(self, listname: str, variables: List[str], filename: str, format: str):
43 """Constructor to initialize the internal state
44
45 Arguments:
46 listname(str): name of the particle list
47 variables(list(str)): list of variables to save for each particle
48 filename(str): name of the output file to be created
49 format(str): format of the output file, one of 'hdf5', 'parquet', 'csv'
50 """
51 super().__init__()
52
53 self._filename = filename
54
55 self._listname = listname
56
57 self._variables = variables
58
59 self._format = format
60

Member Function Documentation

◆ event()

def event (   self)
Event processing function
executes the fill_buffer function and writes the data to the output file

Definition at line 157 of file b2pandas_utils.py.

157 def event(self):
158 """
159 Event processing function
160 executes the fill_buffer function and writes the data to the output file
161 """
162 buf = self.fill_buffer()
163
164 if self._format == "hdf5":
165 """Create a new row in the hdf5 file with for each particle in the list"""
166 self._table.append(buf)
167 elif self._format == "parquet":
168 table = {name: buf[name] for name, _ in self._dtypes}
169 pa_table = pa.table(table, schema=pa.schema(self._schema))
170 self._parquet_writer.write_table(pa_table)
171 elif self._format == "csv":
172 table = {name: buf[name] for name, _ in self._dtypes}
173 pa_table = pa.table(table, schema=pa.schema(self._schema))
174 self._csv_writer.write(pa_table)
175

◆ fill_buffer()

def fill_buffer (   self)
collect all variables for the particle in a numpy array

Definition at line 134 of file b2pandas_utils.py.

134 def fill_buffer(self):
135 """
136 collect all variables for the particle in a numpy array
137 """
138
139 # create a numpy array with the data
140 buf = np.empty(self._plist.getListSize(), dtype=self._dtypes)
141 # add some extra columns for bookkeeping
142 buf["__experiment__"] = self._evtmeta.getExperiment()
143 buf["__run__"] = self._evtmeta.getRun()
144 buf["__event__"] = self._evtmeta.getEvent()
145 buf["__production__"] = self._evtmeta.getProduction()
146 buf["__ncandidates__"] = len(buf)
147 buf["__candidate__"] = np.arange(len(buf))
148
149 for row, p in zip(buf, self._plist):
150 for name, v in zip(self._varnames, self._var_objects):
151 # pyroot proxy not working with callables, we should fix this.
152 # For now we need to go back by name and call it.
153 # should be `row[v.name] = v.func(p)`
154 row[name] = variables.variables.evaluate(v.name, p)
155 return buf
156

◆ initialize()

def initialize (   self)
Create the hdf5 file and list of variable objects to be used during
event processing.

Definition at line 61 of file b2pandas_utils.py.

61 def initialize(self):
62 """Create the hdf5 file and list of variable objects to be used during
63 event processing."""
64 # Always avoid the top-level 'import ROOT'.
65 import ROOT # noqa
66
67 self._varnames = [
68 str(varname) for varname in variables.variables.resolveCollections(
70 *self._variables))]
71
72 self._var_objects = [variables.variables.getVariable(n) for n in self._varnames]
73
74
75 self._evtmeta = ROOT.Belle2.PyStoreObj("EventMetaData")
76 self._evtmeta.isRequired()
77
78 self._plist = ROOT.Belle2.PyStoreObj(self._listname)
79 self._plist.isRequired()
80
81 dtypes = [
82 ("__experiment__", np.int32), ("__run__", np.int32), ("__event__", np.uint32),
83 ("__production__", np.uint32), ("__candidate__", np.uint32), ("__ncandidates__", np.uint32)
84 ]
85 for name in self._varnames:
86 # only float variables for now
87 dtypes.append((name, np.float64))
88
89
90 self._dtypes = dtypes
91
92 if self._format == "hdf5":
93 self.initialize_hdf5_writer()
94 elif self._format == "parquet":
95 self.initialize_parquet_writer()
96 elif self._format == "csv":
97 self.initialize_csv_writer()
98 else:
99 raise ValueError(f"Unknown format {self._format}, supported formats are 'hdf5', 'parquet', 'csv'.")
100
def std_vector(*args)
Definition: __init__.py:135

◆ initialize_csv_writer()

def initialize_csv_writer (   self)
Initialize the csv writer using pyarrow

Definition at line 110 of file b2pandas_utils.py.

110 def initialize_csv_writer(self):
111 """
112 Initialize the csv writer using pyarrow
113 """
114
115 self._schema = [(name, numpy_to_pyarrow_type_map[dt]) for name, dt in self._dtypes]
116
117 self._csv_writer = CSVWriter(self._filename, schema=pa.schema(self._schema))
118

◆ initialize_hdf5_writer()

def initialize_hdf5_writer (   self)
Initialize the hdf5 writer using pytables

Definition at line 119 of file b2pandas_utils.py.

119 def initialize_hdf5_writer(self):
120 """
121 Initialize the hdf5 writer using pytables
122 """
123
124 self._hdf5_writer = tables.open_file(self._filename, mode="w", title="Belle2 Variables to HDF5")
125 filters = tables.Filters(complevel=1, complib='blosc:lz4', fletcher32=False)
126
127 # some variable names are not just A-Za-z0-9 so pytables complains but
128 # seems to work. Ignore warning
129 with warnings.catch_warnings():
130 warnings.simplefilter("ignore")
131
132 self._table = self._hdf5_writer.create_table("/", self._listname, obj=np.zeros(0, self._dtypes), filters=filters)
133

◆ initialize_parquet_writer()

def initialize_parquet_writer (   self)
Initialize the parquet writer using pyarrow

Definition at line 101 of file b2pandas_utils.py.

101 def initialize_parquet_writer(self):
102 """
103 Initialize the parquet writer using pyarrow
104 """
105
106 self._schema = [(name, numpy_to_pyarrow_type_map[dt]) for name, dt in self._dtypes]
107
108 self._parquet_writer = ParquetWriter(self._filename, schema=pa.schema(self._schema))
109

◆ terminate()

def terminate (   self)
save and close the output

Definition at line 176 of file b2pandas_utils.py.

176 def terminate(self):
177 """save and close the output"""
178 import ROOT # noqa
179 if self._format == "hdf5":
180 self._table.flush()
181 self._hdf5_writer.close()
182 elif self._format == "parquet":
183 self._parquet_writer.close()
184 elif self._format == "csv":
185 self._csv_writer.close()
186 ROOT.Belle2.MetadataService.Instance().addNtuple(self._filename)
187
188

Member Data Documentation

◆ _csv_writer

_csv_writer
protected

a writer object to write data into a csv file

Definition at line 117 of file b2pandas_utils.py.

◆ _dtypes

_dtypes
protected

The data type.

Definition at line 90 of file b2pandas_utils.py.

◆ _evtmeta

_evtmeta
protected

Event metadata.

Definition at line 75 of file b2pandas_utils.py.

◆ _filename

_filename
protected

Output filename.

Definition at line 53 of file b2pandas_utils.py.

◆ _format

_format
protected

Output format.

Definition at line 59 of file b2pandas_utils.py.

◆ _hdf5_writer

_hdf5_writer
protected

The pytable file.

Definition at line 124 of file b2pandas_utils.py.

◆ _listname

_listname
protected

Particle list name.

Definition at line 55 of file b2pandas_utils.py.

◆ _parquet_writer

_parquet_writer
protected

a writer object to write data into a parquet file

Definition at line 108 of file b2pandas_utils.py.

◆ _plist

_plist
protected

Pointer to the particle list.

Definition at line 78 of file b2pandas_utils.py.

◆ _schema

_schema
protected

A list of tuples and py.DataTypes to define the pyarrow schema.

Definition at line 106 of file b2pandas_utils.py.

◆ _table

_table
protected

The pytable.

Definition at line 132 of file b2pandas_utils.py.

◆ _var_objects

_var_objects
protected

variable objects for each variable

Definition at line 72 of file b2pandas_utils.py.

◆ _variables

_variables
protected

List of variables.

Definition at line 57 of file b2pandas_utils.py.

◆ _varnames

_varnames
protected

variable names

Definition at line 67 of file b2pandas_utils.py.


The documentation for this class was generated from the following file: