Belle II Software development
b2root.py
1#!/usr/bin/env python3
2
3
10
11import os
12import sys
13import tempfile
14import shutil
15from basf2 import B2ERROR
16
17
18byteorder = 'big'
19
20
22 """
23 Interface to binary root file content
24 """
25
26 def __init__(self, filename):
27 """
28 Open a root file and read its header
29 """
30
31
32 self.rootfile = open(filename, 'rb')
33 root = self.rootfile.read(4)
34 if root != b'root':
35 raise Exception(f"{filename} is not a root file")
36
37 self.version = int.from_bytes(self.rootfile.read(4), byteorder)
38
39 self.large = (self.version >= 1000000)
40
41 self.wordlen = 8 if self.large else 4
42
43 self.begin = int.from_bytes(self.rootfile.read(4), byteorder)
44
45 self.end = int.from_bytes(self.rootfile.read(self.wordlen), byteorder)
46
47 self.seekfree = int.from_bytes(self.rootfile.read(self.wordlen), byteorder)
48
49 self.nbytesfree = int.from_bytes(self.rootfile.read(4), byteorder)
50
51 self.nfree = int.from_bytes(self.rootfile.read(4), byteorder)
52
53 self.nbytesname = int.from_bytes(self.rootfile.read(4), byteorder)
54
55 self.units = int.from_bytes(self.rootfile.read(1), byteorder)
56
57 self.compress = int.from_bytes(self.rootfile.read(4), byteorder)
58
59 self.seekinfo = int.from_bytes(self.rootfile.read(self.wordlen), byteorder)
60
61 self.nbytesinfo = int.from_bytes(self.rootfile.read(self.wordlen), byteorder)
62
63 self.uuid = self.rootfile.read(18)
64 self.rootfile.seek(0)
65
66 self.header = self.rootfile.read(self.begin)
67 self.rootfile.seek(self.begin)
68
69 self.seekkeys = 0
70
71 def __del__(self):
72 """
73 Close file
74 """
75
76 if hasattr(self, 'rootfile'):
77 self.rootfile.close()
78
79 def normalize(self, end=None, seekfree=None, nbytesfree=None, nbytesname=None, seekinfo=None):
80 """
81 Set UUID to zero and adjust pointers that are given as arguments
82 """
83
84 if end:
85 self.end = end
86 if seekfree:
87 self.seekfree = seekfree
88 if nbytesfree:
89 self.nbytesfree = nbytesfree
90 if nbytesname:
91 self.nbytesname = nbytesname
92 if seekinfo:
93 self.seekinfo = seekinfo
94 self.uuid = b'\x00' * 18
95
96 self.header = b'root' \
97 + self.version.to_bytes(4, byteorder) \
98 + self.begin.to_bytes(4, byteorder) \
99 + self.end.to_bytes(self.wordlen, byteorder) \
100 + self.seekfree.to_bytes(self.wordlen, byteorder) \
101 + self.nbytesfree.to_bytes(4, byteorder) \
102 + self.nfree.to_bytes(4, byteorder) \
103 + self.nbytesname.to_bytes(4, byteorder) \
104 + self.units.to_bytes(1, byteorder) \
105 + self.compress.to_bytes(4, byteorder) \
106 + self.seekinfo.to_bytes(self.wordlen, byteorder) \
107 + self.nbytesinfo.to_bytes(self.wordlen, byteorder) \
108 + self.uuid
109 nzero = self.begin - len(self.header)
110 self.header += b'\x00' * nzero
111
112 def __iter__(self):
113 """
114 iterator interface
115 """
116
117 return self
118
119 class Key:
120 """
121 Root file key
122 """
123
124 def __init__(self, keydata, large=False):
125 """
126 Obtain key header and data payload from given data and extract header information
127 """
128
129
130 self.large = large
131
132 self.wordlen = 8 if self.large else 4
133 pos = 0
134
135 self.nbytes, pos = self.get_int(keydata, pos)
136
137 self.version, pos = self.get_int(keydata, pos, 2)
138
139 self.objlen, pos = self.get_int(keydata, pos)
140
141 self.datime, pos = keydata[pos:pos+4], pos+4
142
143 self.keylen, pos = self.get_int(keydata, pos, 2)
144
145 self.cycle, pos = self.get_int(keydata, pos, 2)
146
147 self.seekkey, pos = self.get_int(keydata, pos, self.wordlen)
148
149 self.seekpdir, pos = self.get_int(keydata, pos, self.wordlen)
150
151 self.classname, pos = self.get_string(keydata, pos)
152
153 self.name, pos = self.get_string(keydata, pos)
154
155 self.title, pos = self.get_string(keydata, pos)
156
157 self.header = keydata[:pos]
158
159 self.data = keydata[pos:]
160
161 self.showname = self.classname
162
163 def normalize(self, pos=None, offset=None):
164 """
165 Set the key datime to zero adjust the pointer to itself if given as argument
166 """
167
168
169 self.datime = b'\x00' * 4
170 if self.seekkey > 0:
171 if pos:
172
173 self.seekkey = pos
174 elif offset:
175 self.seekkey += offset
176 self.recreate_header()
177
179 """
180 Build the binary header information from the data members
181 """
182
183 self.header = self.nbytes.to_bytes(4, byteorder) \
184 + self.version.to_bytes(2, byteorder) \
185 + self.objlen.to_bytes(4, byteorder) \
186 + self.datime \
187 + self.keylen.to_bytes(2, byteorder) \
188 + self.cycle.to_bytes(2, byteorder) \
189 + self.seekkey.to_bytes(self.wordlen, byteorder) \
190 + self.seekpdir.to_bytes(self.wordlen, byteorder) \
191 + len(self.classname).to_bytes(1, byteorder) + self.classname \
192 + len(self.name).to_bytes(1, byteorder) + self.name \
193 + len(self.title).to_bytes(1, byteorder) + self.title
194
195 def get_int(self, data, pos, wordlen=4):
196 """
197 Helper function to read an int from binary data
198 """
199
200 return (int.from_bytes(data[pos:pos+wordlen], byteorder), pos+wordlen)
201
202 def get_string(self, data, pos):
203 """
204 Helper function to read a string from binary data
205 """
206
207 strlen = data[pos]
208 return (data[pos+1:pos+1+strlen], pos+1+strlen)
209
210 def __next__(self):
211 """
212 Iterate to next key
213 """
214
215 pos = self.rootfile.tell()
216 large = pos > 0x80000000
217 nbytes = int.from_bytes(self.rootfile.read(4), byteorder)
218 if (nbytes == 0):
219 raise StopIteration()
220 self.rootfile.seek(-4, 1)
221 result = self.Key(self.rootfile.read(nbytes), large)
222 if pos == self.seekfree:
223 result.showname = b'FreeSegments'
224 elif pos == self.seekinfo:
225 result.showname = b'StreamerInfo'
226 elif pos == self.seekkeys:
227 result.showname = b'KeysList'
228
229 if result.showname == b'TFile':
230 result.filename, pos = result.get_string(result.data, 0)
231 result.filetitle, pos = result.get_string(result.data, pos)
232 result.version = int.from_bytes(result.data[pos:pos+2], byteorder)
233 result.nbyteskeys = int.from_bytes(result.data[pos+10:pos+14], byteorder)
234 result.nbytesname = int.from_bytes(result.data[pos+14:pos+18], byteorder)
235 if result.version > 1000:
236 result.seekdir = int.from_bytes(result.data[pos+18:pos+26], byteorder)
237 result.seekparent = int.from_bytes(result.data[pos+26:pos+34], byteorder)
238 self.seekkeys = int.from_bytes(result.data[pos+34:pos+42], byteorder)
239 else:
240 result.seekdir = int.from_bytes(result.data[pos+18:pos+22], byteorder)
241 result.seekparent = int.from_bytes(result.data[pos+22:pos+26], byteorder)
242 self.seekkeys = int.from_bytes(result.data[pos+26:pos+30], byteorder)
243 return result
244
245
246def normalize_file(filename, output=None, in_place=False, name=None, root_version=None):
247 """
248 Reset the non-reproducible root file metadata: UUID and datimes.
249 It can also reset the initial file name stored in the file itself, but
250 (WARNING!) this may corrupt the root file.
251 """
252
253 # open input file
254 rootfile = RawRootFile(filename)
255
256 # adjust root version number
257 if root_version:
258 rootfile.version = root_version
259
260 # create output file
261 if output:
262 newrootfile = open(output, 'wb')
263 elif in_place:
264 newrootfile = tempfile.TemporaryFile()
265 else:
266 basename, ext = os.path.splitext(filename)
267 newrootfile = open(basename + '_normalized' + ext, 'wb')
268
269 # write output file header
270 newrootfile.write(rootfile.header)
271
272 # file name in the metadata
273 if name:
274 newname = name.encode()
275 else:
276 newname = None
277
278 # bookkeeping of offsets, positions, and keys
279 offset = 0
280 seekfree = rootfile.seekfree
281 nbytesfree = rootfile.nbytesfree
282 nbytesname = rootfile.nbytesname
283 seekinfo = rootfile.seekinfo
284 keylist = []
285 keyskey = None
286 infokey = None
287 swap = False
288
289 for key in rootfile:
290 # reset datime and adjust position of key
291 key.normalize(pos=newrootfile.tell())
292
293 # Special treatment of key containing the TFile information
294 if key.showname == b'TFile':
295
296 # if a new name is given change the name and determine the offset caused by the change of name length
297 namelen = len(key.name)
298 if newname:
299 key.name = key.filename = newname
300 offset = len(key.name) - namelen
301
302 # apply the offset to total (2x), object, and key length
303 key.nbytes += 2*offset
304 key.objlen += offset
305 key.keylen += offset
306
307 # recreate the header from updated data members
308 key.recreate_header()
309
310 # recreate the key data with new name and lengths
311 buffer = len(key.name).to_bytes(1, byteorder) + key.name
312 buffer += len(key.title).to_bytes(1, byteorder) + key.title
313 buffer += key.version.to_bytes(2, byteorder)
314 buffer += (0).to_bytes(8, byteorder) # reset datimeC and datimeM
315 buffer += (key.nbyteskeys + offset).to_bytes(4, byteorder)
316 buffer += (key.nbytesname + 2*offset).to_bytes(4, byteorder)
317 wordlen = 8 if key.version > 1000 else 4
318 buffer += key.seekdir.to_bytes(wordlen, byteorder)
319 buffer += key.seekparent.to_bytes(wordlen, byteorder)
320 seekkeyspos = newrootfile.tell() + len(key.header) + len(buffer)
321 buffer += (rootfile.seekkeys + 2*offset).to_bytes(wordlen, byteorder)
322 buffer += (0).to_bytes(18, byteorder) # reset UUID
323 if key.version <= 1000:
324 buffer += (0).to_bytes(12, byteorder)
325 key.data = buffer
326
327 else:
328 # check whether we break pointers in TTrees
329 if key.classname == b'TTree' and offset != 0:
330 B2ERROR('Changing the name of root files containing a tree is not supported.')
331 if not in_place:
332 os.remove(newrootfile.name)
333 sys.exit(1)
334
335 # update key data in KeysList: number of keys and key headers
336 # and remember key position
337 if key.showname == b'KeysList':
338 seekkeys = newrootfile.tell()
339 buffer = len(keylist).to_bytes(4, byteorder)
340 for filekey in keylist:
341 buffer += filekey.header
342 key.data = buffer
343 keyskey = key
344 swap = (infokey is None)
345
346 # update free segments pointer and remember key position
347 if key.showname == b'FreeSegments':
348 seekfree = newrootfile.tell()
349 pointer = int.from_bytes(key.data[2:6], byteorder) + 4*offset
350 key.data = key.data[:2] + pointer.to_bytes(4, byteorder) + key.data[6:]
351
352 # update name in KeysList and FreeSegments
353 if key.showname in [b'KeysList', b'FreeSegments'] and newname:
354 key.name = newname
355 key.nbytes += offset
356 key.keylen += offset
357 key.recreate_header()
358
359 # keep track of all keys for the KeysList
360 elif key.showname not in [b'StreamerInfo', b'']:
361 keylist.append(key)
362
363 # remember streamer info key and position
364 if key.showname == b'StreamerInfo':
365 seekinfo = newrootfile.tell()
366 infokey = key
367
368 # write the updated key, making sure the KeysList come after the StreamerInfo
369 if swap and key.showname == b'KeysList':
370 pass
371 elif swap:
372 seekinfo = newrootfile.tell()
373 infokey.normalize(pos=seekinfo)
374 newrootfile.write(infokey.header)
375 newrootfile.write(infokey.data)
376 seekkeys = newrootfile.tell()
377 keyskey.normalize(pos=seekkeys)
378 newrootfile.write(keyskey.header)
379 newrootfile.write(keyskey.data)
380 swap = False
381 else:
382 newrootfile.write(key.header)
383 newrootfile.write(key.data)
384
385 # write the new file header
386 rootfile.normalize(end=newrootfile.tell(), seekfree=seekfree, nbytesfree=nbytesfree+offset,
387 nbytesname=nbytesname+2*offset, seekinfo=seekinfo)
388 newrootfile.seek(0)
389 newrootfile.write(rootfile.header)
390
391 # update pointer to keyslist
392 newrootfile.seek(seekkeyspos)
393 newrootfile.write(seekkeys.to_bytes(wordlen, byteorder))
394
395 # replace in the input file if the in-place option is used
396 if in_place:
397 del rootfile
398 newrootfile.seek(0)
399 shutil.copyfileobj(newrootfile, open(filename, 'wb'))
400
401 newrootfile.close()
large
are file location pointers 4 or 8 bits?
Definition: b2root.py:130
wordlen
size of file location pointers
Definition: b2root.py:132
def get_string(self, data, pos)
Definition: b2root.py:202
data
data bytes associated to the object
Definition: b2root.py:159
seekkey
pointer to record itself (consistency check)
Definition: b2root.py:173
showname
name to show in the list of keys
Definition: b2root.py:161
datime
date and time when object was written to file
Definition: b2root.py:169
def get_int(self, data, pos, wordlen=4)
Definition: b2root.py:195
def __init__(self, keydata, large=False)
Definition: b2root.py:124
header
length of compressed object (in bytes)
Definition: b2root.py:157
def normalize(self, pos=None, offset=None)
Definition: b2root.py:163
large
are file location pointers 4 or 8 bits?
Definition: b2root.py:39
wordlen
size of file location pointers
Definition: b2root.py:41
units
number of bytes for file pointers
Definition: b2root.py:55
seekkeys
pointer to KeysList record
Definition: b2root.py:69
nbytesfree
number of bytes in FREE data record
Definition: b2root.py:49
compress
compression level and algorithm
Definition: b2root.py:57
version
root/file format version
Definition: b2root.py:37
nbytesinfo
number of bytes in TStreamerInfo record
Definition: b2root.py:61
nfree
number of free data records
Definition: b2root.py:51
def normalize(self, end=None, seekfree=None, nbytesfree=None, nbytesname=None, seekinfo=None)
Definition: b2root.py:79
uuid
universal unique ID
Definition: b2root.py:63
def __init__(self, filename)
Definition: b2root.py:26
rootfile
binary root file
Definition: b2root.py:32
begin
pointer to first data record
Definition: b2root.py:43
seekinfo
pointer to TStreamerInfo record
Definition: b2root.py:59
header
binary header
Definition: b2root.py:66
seekfree
pointer to FREE data record
Definition: b2root.py:47
nbytesname
number of bytes in TNamed at creation time
Definition: b2root.py:53
end
pointer to first free word at the EOF
Definition: b2root.py:45