Belle II Software development
VariablesToHDF5 Class Reference
Inheritance diagram for VariablesToHDF5:
VariablesToTable

Public Member Functions

 __init__ (self, listname, variables, filename, Optional[str] hdf_table_name=None)
 
 buffer (self)
 
 buffer_full (self)
 
 initialize (self)
 
 event_buffer (self)
 
 clear_buffer (self)
 
 append_buffer (self)
 
 initialize_feather_writer (self)
 
 initialize_parquet_writer (self)
 
 initialize_csv_writer (self)
 
 initialize_hdf5_writer (self)
 
 fill_event_buffer (self)
 
 write_buffer (self)
 
 event (self)
 
 terminate (self)
 

Public Attributes

 buffer = "hdf5":
 
 buffer_full
 

Protected Attributes

 _filename = filename
 Output filename.
 
 _listname = listname
 Particle list name.
 
 _variables = list(set(variables))
 List of variables.
 
str _format = "csv"
 Output format.
 
tuple _table_name
 Table name in the hdf5 file.
 
 _event_buffer_size = event_buffer_size
 Event buffer size.
 
int _event_buffer_counter = 0
 Event buffer counter.
 
 _writer_kwargs = writer_kwargs
 writer kwargs
 
list _varnames
 variable names
 
 _std_varnames = variables.std_vector(*self._varnames)
 std.vector of variable names
 
 _evtmeta = ROOT.Belle2.PyStoreObj("EventMetaData")
 Event metadata.
 
 _plist = ROOT.Belle2.PyStoreObj(self._listname)
 Pointer to the particle list.
 
 _dtypes = dtypes
 The data type.
 
 _buffer = np.empty(self._event_buffer_size * 10, dtype=self._dtypes)
 event variables buffer (will be automatically grown if necessary)
 
int _buffer_index = 0
 current start index in the event variables buffer
 
list _schema
 A list of tuples and py.DataTypes to define the pyarrow schema.
 
 _feather_writer
 a writer object to write data into a feather file
 
 _parquet_writer
 A list of tuples and py.DataTypes to define the pyarrow schema.
 
 _csv_writer = CSVWriter(self._filename, schema=pa.schema(self._schema), **self._writer_kwargs)
 A list of tuples and py.DataTypes to define the pyarrow schema.
 
 _hdf5_writer
 The pytable file.
 
 _table
 The pytable.
 

Detailed Description

Legacy class to not break existing code

Definition at line 330 of file b2pandas_utils.py.

Constructor & Destructor Documentation

◆ __init__()

__init__ ( self,
listname,
variables,
filename,
Optional[str] hdf_table_name = None )

Definition at line 335 of file b2pandas_utils.py.

335 def __init__(self, listname, variables, filename, hdf_table_name: Optional[str] = None,):
336 super().__init__(listname, variables, filename, hdf_table_name)
337 assert self._filename.split(".")[-1] in ["h5", "hdf", "hdf5"], (
338 "Filename must end with .h5, .hdf or .hdf5 for HDF5 output. "
339 f"Got {self._filename}"
340 )
341
342

Member Function Documentation

◆ append_buffer()

append_buffer ( self)
inherited
"Append" a new event to the buffer by moving the buffer index forward by particle list size

Automatically replaces the buffer by a larger one if necessary

Definition at line 179 of file b2pandas_utils.py.

179 def append_buffer(self):
180 """
181 "Append" a new event to the buffer by moving the buffer index forward by particle list size
182
183 Automatically replaces the buffer by a larger one if necessary
184 """
185 plist_size = self._plist.getListSize()
186 if (plist_size + self._buffer_index) > len(self._buffer):
187 new_buffer = np.empty(
188 # factor 1.5 larger or at least as large as necessary
189 max(int(len(self._buffer) * 1.5), self._buffer_index + plist_size),
190 dtype=self._dtypes,
191 )
192 new_buffer[:self._buffer_index] = self.buffer
193 self._buffer = new_buffer
194 self._buffer_index += plist_size
195 self._event_buffer_counter += 1
196

◆ buffer()

buffer ( self)
inherited
The buffer slice across multiple entries

Definition at line 159 of file b2pandas_utils.py.

159 def buffer(self):
160 """
161 The buffer slice across multiple entries
162 """
163 return self._buffer[:self._buffer_index]
164

◆ buffer_full()

buffer_full ( self)
inherited
check if the buffer is full

Definition at line 276 of file b2pandas_utils.py.

276 def buffer_full(self):
277 """
278 check if the buffer is full
279 """
280 return self._event_buffer_counter == self._event_buffer_size
281

◆ clear_buffer()

clear_buffer ( self)
inherited
Reset the buffer event counter and index

Definition at line 172 of file b2pandas_utils.py.

172 def clear_buffer(self):
173 """
174 Reset the buffer event counter and index
175 """
176 self._event_buffer_counter = 0
177 self._buffer_index = 0
178

◆ event()

event ( self)
inherited
Event processing function

executes the fill_buffer function and writes the data to the output file
in chunks of event_buffer_size

Definition at line 299 of file b2pandas_utils.py.

299 def event(self):
300 """
301 Event processing function
302
303 executes the fill_buffer function and writes the data to the output file
304 in chunks of event_buffer_size
305 """
306 self.append_buffer()
307 self.fill_event_buffer()
308 if self.buffer_full:
309 self.write_buffer()
310 self.clear_buffer()
311

◆ event_buffer()

event_buffer ( self)
inherited
The buffer slice for the current event

Definition at line 166 of file b2pandas_utils.py.

166 def event_buffer(self):
167 """
168 The buffer slice for the current event
169 """
170 return self._buffer[self._buffer_index - self._plist.getListSize(): self._buffer_index]
171

◆ fill_event_buffer()

fill_event_buffer ( self)
inherited
Assign values for all variables for all particles in the particle list to the current event buffer

Definition at line 255 of file b2pandas_utils.py.

255 def fill_event_buffer(self):
256 """
257 Assign values for all variables for all particles in the particle list to the current event buffer
258 """
259 buf = self.event_buffer
260
261 # add some extra columns for bookkeeping
262 buf["__experiment__"] = self._evtmeta.getExperiment()
263 buf["__run__"] = self._evtmeta.getRun()
264 buf["__event__"] = self._evtmeta.getEvent()
265 buf["__production__"] = self._evtmeta.getProduction()
266 buf["__ncandidates__"] = len(buf)
267 buf["__candidate__"] = np.arange(len(buf))
268
269 # fill variables into buffer
270 vector = variables.variables.evaluateVariables(self._std_varnames, self._plist)
271 values = np.array(vector.data()).reshape(-1, len(self._varnames))
272 for name, col in zip(self._varnames, values.T):
273 buf[name] = col
274

◆ initialize()

initialize ( self)
inherited
Setup variable lists, pointers, buffers and file writers

Definition at line 102 of file b2pandas_utils.py.

102 def initialize(self):
103 """
104 Setup variable lists, pointers, buffers and file writers
105 """
106 # Always avoid the top-level 'import ROOT'.
107 import ROOT # noqa
108
109
110 self._varnames = [
111 str(varname)
112 for varname in variables.variables.resolveCollections(
113 variables.std_vector(*self._variables)
114 )
115 ]
116
117
118 self._std_varnames = variables.std_vector(*self._varnames)
119
120
121 self._evtmeta = ROOT.Belle2.PyStoreObj("EventMetaData")
122 self._evtmeta.isRequired()
123
124
125 self._plist = ROOT.Belle2.PyStoreObj(self._listname)
126 self._plist.isRequired()
127
128 dtypes = [
129 ("__experiment__", np.int32),
130 ("__run__", np.int32),
131 ("__event__", np.uint32),
132 ("__production__", np.uint32),
133 ("__candidate__", np.uint32),
134 ("__ncandidates__", np.uint32),
135 ]
136 for name in self._varnames:
137 # only float variables for now
138 dtypes.append((name, np.float64))
139
140
141 self._dtypes = dtypes
142
143
144 self._buffer = np.empty(self._event_buffer_size * 10, dtype=self._dtypes)
145
146
147 self._buffer_index = 0
148
149 if self._format == "hdf5":
150 self.initialize_hdf5_writer()
151 elif self._format == "parquet":
152 self.initialize_parquet_writer()
153 elif self._format == "csv":
154 self.initialize_csv_writer()
155 elif self._format == "feather":
156 self.initialize_feather_writer()
157
std_vector(*args)
Definition __init__.py:142

◆ initialize_csv_writer()

initialize_csv_writer ( self)
inherited
Initialize the csv writer using pyarrow

Definition at line 225 of file b2pandas_utils.py.

225 def initialize_csv_writer(self):
226 """
227 Initialize the csv writer using pyarrow
228 """
229
230 self._schema = [
231 (name, numpy_to_pyarrow_type_map[dt]) for name, dt in self._dtypes
232 ]
233
234 self._csv_writer = CSVWriter(self._filename, schema=pa.schema(self._schema), **self._writer_kwargs)
235

◆ initialize_feather_writer()

initialize_feather_writer ( self)
inherited
Initialize the feather writer using pyarrow

Definition at line 197 of file b2pandas_utils.py.

197 def initialize_feather_writer(self):
198 """
199 Initialize the feather writer using pyarrow
200 """
201
202 self._schema = [
203 (name, numpy_to_pyarrow_type_map[dt]) for name, dt in self._dtypes
204 ]
205
206 self._feather_writer = ipc.RecordBatchFileWriter(
207 sink=self._filename,
208 schema=pa.schema(self._schema),
209 **self._writer_kwargs,
210 )
211

◆ initialize_hdf5_writer()

initialize_hdf5_writer ( self)
inherited
Initialize the hdf5 writer using pytables

Definition at line 236 of file b2pandas_utils.py.

236 def initialize_hdf5_writer(self):
237 """
238 Initialize the hdf5 writer using pytables
239 """
240
241 self._hdf5_writer = tables.open_file(
242 self._filename, mode="w", title="Belle2 Variables to HDF5"
243 )
244 filters = tables.Filters(complevel=1, complib="blosc:lz4", fletcher32=False)
245
246 # some variable names are not just A-Za-z0-9 so pytables complains but
247 # seems to work. Ignore warning
248 with warnings.catch_warnings():
249 warnings.simplefilter("ignore")
250
251 self._table = self._hdf5_writer.create_table(
252 "/", self._table_name, obj=np.zeros(0, self._dtypes), filters=filters, **self._writer_kwargs
253 )
254

◆ initialize_parquet_writer()

initialize_parquet_writer ( self)
inherited
Initialize the parquet writer using pyarrow

Definition at line 212 of file b2pandas_utils.py.

212 def initialize_parquet_writer(self):
213 """
214 Initialize the parquet writer using pyarrow
215 """
216
217 self._schema = [
218 (name, numpy_to_pyarrow_type_map[dt]) for name, dt in self._dtypes
219 ]
220
221 self._parquet_writer = ParquetWriter(
222 self._filename, schema=pa.schema(self._schema), **self._writer_kwargs
223 )
224

◆ terminate()

terminate ( self)
inherited
save and close the output

Definition at line 312 of file b2pandas_utils.py.

312 def terminate(self):
313 """save and close the output"""
314 import ROOT # noqa
315 if len(self.buffer) > 0:
316 self.write_buffer()
317
318 if self._format == "hdf5":
319 self._table.flush()
320 self._hdf5_writer.close()
321 elif self._format == "parquet":
322 self._parquet_writer.close()
323 elif self._format == "csv":
324 self._csv_writer.close()
325 elif self._format == "feather":
326 self._feather_writer.close()
327 ROOT.Belle2.MetadataService.Instance().addNtuple(self._filename)
328
329

◆ write_buffer()

write_buffer ( self)
inherited
write the buffer to the output file

Definition at line 282 of file b2pandas_utils.py.

282 def write_buffer(self):
283 """
284 write the buffer to the output file
285 """
286 if self._format == "hdf5":
287 """Create a new row in the hdf5 file with for each particle in the list"""
288 self._table.append(self.buffer)
289 else:
290 table = {name: self.buffer[name] for name, _ in self._dtypes}
291 pa_table = pa.table(table, schema=pa.schema(self._schema))
292 if self._format == "parquet":
293 self._parquet_writer.write_table(pa_table)
294 elif self._format == "csv":
295 self._csv_writer.write(pa_table)
296 elif self._format == "feather":
297 self._feather_writer.write_table(pa_table)
298

Member Data Documentation

◆ _buffer

_buffer = np.empty(self._event_buffer_size * 10, dtype=self._dtypes)
protectedinherited

event variables buffer (will be automatically grown if necessary)

Definition at line 144 of file b2pandas_utils.py.

◆ _buffer_index

_buffer_index = 0
protectedinherited

current start index in the event variables buffer

Definition at line 147 of file b2pandas_utils.py.

◆ _csv_writer

_csv_writer = CSVWriter(self._filename, schema=pa.schema(self._schema), **self._writer_kwargs)
protectedinherited

A list of tuples and py.DataTypes to define the pyarrow schema.

a writer object to write data into a csv file

Definition at line 234 of file b2pandas_utils.py.

◆ _dtypes

_dtypes = dtypes
protectedinherited

The data type.

Definition at line 141 of file b2pandas_utils.py.

◆ _event_buffer_counter

int _event_buffer_counter = 0
protectedinherited

Event buffer counter.

Definition at line 98 of file b2pandas_utils.py.

◆ _event_buffer_size

_event_buffer_size = event_buffer_size
protectedinherited

Event buffer size.

Definition at line 96 of file b2pandas_utils.py.

◆ _evtmeta

_evtmeta = ROOT.Belle2.PyStoreObj("EventMetaData")
protectedinherited

Event metadata.

Definition at line 121 of file b2pandas_utils.py.

◆ _feather_writer

_feather_writer
protectedinherited
Initial value:
= ipc.RecordBatchFileWriter(
sink=self._filename,
schema=pa.schema(self._schema),
**self._writer_kwargs,
)

a writer object to write data into a feather file

Definition at line 206 of file b2pandas_utils.py.

◆ _filename

_filename = filename
protectedinherited

Output filename.

Definition at line 70 of file b2pandas_utils.py.

◆ _format

str _format = "csv"
protectedinherited

Output format.

Definition at line 79 of file b2pandas_utils.py.

◆ _hdf5_writer

_hdf5_writer
protectedinherited
Initial value:
= tables.open_file(
self._filename, mode="w", title="Belle2 Variables to HDF5"
)

The pytable file.

Definition at line 241 of file b2pandas_utils.py.

◆ _listname

_listname = listname
protectedinherited

Particle list name.

Definition at line 72 of file b2pandas_utils.py.

◆ _parquet_writer

_parquet_writer
protectedinherited
Initial value:
= ParquetWriter(
self._filename, schema=pa.schema(self._schema), **self._writer_kwargs
)

A list of tuples and py.DataTypes to define the pyarrow schema.

a writer object to write data into a parquet file

Definition at line 221 of file b2pandas_utils.py.

◆ _plist

_plist = ROOT.Belle2.PyStoreObj(self._listname)
protectedinherited

Pointer to the particle list.

Definition at line 125 of file b2pandas_utils.py.

◆ _schema

list _schema
protectedinherited
Initial value:
= [
(name, numpy_to_pyarrow_type_map[dt]) for name, dt in self._dtypes
]

A list of tuples and py.DataTypes to define the pyarrow schema.

Definition at line 202 of file b2pandas_utils.py.

◆ _std_varnames

_std_varnames = variables.std_vector(*self._varnames)
protectedinherited

std.vector of variable names

Definition at line 118 of file b2pandas_utils.py.

◆ _table

_table
protectedinherited
Initial value:
= self._hdf5_writer.create_table(
"/", self._table_name, obj=np.zeros(0, self._dtypes), filters=filters, **self._writer_kwargs
)

The pytable.

Definition at line 251 of file b2pandas_utils.py.

◆ _table_name

tuple _table_name
protectedinherited
Initial value:
= (
hdf_table_name if hdf_table_name is not None else self._listname
)

Table name in the hdf5 file.

Definition at line 92 of file b2pandas_utils.py.

◆ _variables

_variables = list(set(variables))
protectedinherited

List of variables.

Definition at line 74 of file b2pandas_utils.py.

◆ _varnames

_varnames
protectedinherited
Initial value:
= [
str(varname)
for varname in variables.variables.resolveCollections(
variables.std_vector(*self._variables)
)
]

variable names

Definition at line 110 of file b2pandas_utils.py.

◆ _writer_kwargs

_writer_kwargs = writer_kwargs
protectedinherited

writer kwargs

Definition at line 100 of file b2pandas_utils.py.

◆ buffer

buffer = "hdf5":
inherited

Definition at line 288 of file b2pandas_utils.py.

◆ buffer_full

buffer_full
inherited

Definition at line 308 of file b2pandas_utils.py.


The documentation for this class was generated from the following file: