Belle II Software  release-05-01-25
b2root.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 import os
5 import sys
6 import tempfile
7 import shutil
8 from basf2 import B2ERROR
9 
10 
11 byteorder = 'big'
12 
13 
15  """
16  Interface to binary root file content
17  """
18 
19  def __init__(self, filename):
20  """
21  Open a root file and read its header
22  """
23 
24 
25  self.rootfile = open(filename, 'rb')
26  root = self.rootfile.read(4)
27  if root != b'root':
28  raise Exception("%s is not a root file" % filename)
29 
30  self.version = int.from_bytes(self.rootfile.read(4), byteorder)
31 
32  self.large = (self.version >= 1000000)
33 
34  self.wordlen = 8 if self.large else 4
35 
36  self.begin = int.from_bytes(self.rootfile.read(4), byteorder)
37 
38  self.end = int.from_bytes(self.rootfile.read(self.wordlen), byteorder)
39 
40  self.seekfree = int.from_bytes(self.rootfile.read(self.wordlen), byteorder)
41 
42  self.nbytesfree = int.from_bytes(self.rootfile.read(4), byteorder)
43 
44  self.nfree = int.from_bytes(self.rootfile.read(4), byteorder)
45 
46  self.nbytesname = int.from_bytes(self.rootfile.read(4), byteorder)
47 
48  self.units = int.from_bytes(self.rootfile.read(1), byteorder)
49 
50  self.compress = int.from_bytes(self.rootfile.read(4), byteorder)
51 
52  self.seekinfo = int.from_bytes(self.rootfile.read(self.wordlen), byteorder)
53 
54  self.nbytesinfo = int.from_bytes(self.rootfile.read(self.wordlen), byteorder)
55 
56  self.uuid = self.rootfile.read(18)
57  self.rootfile.seek(0)
58 
59  self.header = self.rootfile.read(self.begin)
60  self.rootfile.seek(self.begin)
61 
62  self.seekkeys = 0
63 
64  def __del__(self):
65  """
66  Close file
67  """
68 
69  if hasattr(self, 'rootfile'):
70  self.rootfile.close()
71 
72  def normalize(self, end=None, seekfree=None, nbytesfree=None, nbytesname=None, seekinfo=None):
73  """
74  Set UUID to zero and adjust pointers that are given as arguments
75  """
76 
77  if end:
78  self.end = end
79  if seekfree:
80  self.seekfree = seekfree
81  if nbytesfree:
82  self.nbytesfree = nbytesfree
83  if nbytesname:
84  self.nbytesname = nbytesname
85  if seekinfo:
86  self.seekinfo = seekinfo
87  self.uuid = b'\x00' * 18
88 
89  self.header = b'root' \
90  + self.version.to_bytes(4, byteorder) \
91  + self.begin.to_bytes(4, byteorder) \
92  + self.end.to_bytes(self.wordlen, byteorder) \
93  + self.seekfree.to_bytes(self.wordlen, byteorder) \
94  + self.nbytesfree.to_bytes(4, byteorder) \
95  + self.nfree.to_bytes(4, byteorder) \
96  + self.nbytesname.to_bytes(4, byteorder) \
97  + self.units.to_bytes(1, byteorder) \
98  + self.compress.to_bytes(4, byteorder) \
99  + self.seekinfo.to_bytes(self.wordlen, byteorder) \
100  + self.nbytesinfo.to_bytes(self.wordlen, byteorder) \
101  + self.uuid
102  nzero = self.begin - len(self.header)
103  self.header += b'\x00' * nzero
104 
105  def __iter__(self):
106  """
107  iterator interface
108  """
109 
110  return self
111 
112  class Key:
113  """
114  Root file key
115  """
116 
117  def __init__(self, keydata, large=False):
118  """
119  Obtain key header and data payload from given data and extract header information
120  """
121 
122 
123  self.large = large
124 
125  self.wordlen = 8 if self.large else 4
126  pos = 0
127 
128  self.nbytes, pos = self.get_int(keydata, pos)
129 
130  self.version, pos = self.get_int(keydata, pos, 2)
131 
132  self.objlen, pos = self.get_int(keydata, pos)
133 
134  self.datime, pos = keydata[pos:pos+4], pos+4
135 
136  self.keylen, pos = self.get_int(keydata, pos, 2)
137 
138  self.cycle, pos = self.get_int(keydata, pos, 2)
139 
140  self.seekkey, pos = self.get_int(keydata, pos, self.wordlen)
141 
142  self.seekpdir, pos = self.get_int(keydata, pos, self.wordlen)
143 
144  self.classname, pos = self.get_string(keydata, pos)
145 
146  self.name, pos = self.get_string(keydata, pos)
147 
148  self.title, pos = self.get_string(keydata, pos)
149 
150  self.header = keydata[:pos]
151 
152  self.data = keydata[pos:]
153 
154  self.showname = self.classname
155 
156  def normalize(self, pos=None, offset=None):
157  """
158  Set the key datime to zero adjust the pointer to itself if given as argument
159  """
160 
161 
162  self.datime = b'\x00' * 4
163  if self.seekkey > 0:
164  if pos:
165 
166  self.seekkey = pos
167  elif offset:
168  self.seekkey += offset
169  self.recreate_header()
170 
171  def recreate_header(self):
172  """
173  Build the binary header information from the data members
174  """
175 
176  self.header = self.nbytes.to_bytes(4, byteorder) \
177  + self.version.to_bytes(2, byteorder) \
178  + self.objlen.to_bytes(4, byteorder) \
179  + self.datime \
180  + self.keylen.to_bytes(2, byteorder) \
181  + self.cycle.to_bytes(2, byteorder) \
182  + self.seekkey.to_bytes(self.wordlen, byteorder) \
183  + self.seekpdir.to_bytes(self.wordlen, byteorder) \
184  + len(self.classname).to_bytes(1, byteorder) + self.classname \
185  + len(self.name).to_bytes(1, byteorder) + self.name \
186  + len(self.title).to_bytes(1, byteorder) + self.title
187 
188  def get_int(self, data, pos, wordlen=4):
189  """
190  Helper function to read an int from binary data
191  """
192 
193  return (int.from_bytes(data[pos:pos+wordlen], byteorder), pos+wordlen)
194 
195  def get_string(self, data, pos):
196  """
197  Helper function to read a string from binary data
198  """
199 
200  strlen = data[pos]
201  return (data[pos+1:pos+1+strlen], pos+1+strlen)
202 
203  def __next__(self):
204  """
205  Iterate to next key
206  """
207 
208  pos = self.rootfile.tell()
209  large = pos > 0x80000000
210  nbytes = int.from_bytes(self.rootfile.read(4), byteorder)
211  if (nbytes == 0):
212  raise StopIteration()
213  self.rootfile.seek(-4, 1)
214  result = self.Key(self.rootfile.read(nbytes), large)
215  if pos == self.seekfree:
216  result.showname = b'FreeSegments'
217  elif pos == self.seekinfo:
218  result.showname = b'StreamerInfo'
219  elif pos == self.seekkeys:
220  result.showname = b'KeysList'
221 
222  if result.showname == b'TFile':
223  result.filename, pos = result.get_string(result.data, 0)
224  result.filetitle, pos = result.get_string(result.data, pos)
225  result.version = int.from_bytes(result.data[pos:pos+2], byteorder)
226  result.nbyteskeys = int.from_bytes(result.data[pos+10:pos+14], byteorder)
227  result.nbytesname = int.from_bytes(result.data[pos+14:pos+18], byteorder)
228  if result.version > 1000:
229  result.seekdir = int.from_bytes(result.data[pos+18:pos+26], byteorder)
230  result.seekparent = int.from_bytes(result.data[pos+26:pos+34], byteorder)
231  self.seekkeys = int.from_bytes(result.data[pos+34:pos+42], byteorder)
232  else:
233  result.seekdir = int.from_bytes(result.data[pos+18:pos+22], byteorder)
234  result.seekparent = int.from_bytes(result.data[pos+22:pos+26], byteorder)
235  self.seekkeys = int.from_bytes(result.data[pos+26:pos+30], byteorder)
236  return result
237 
238 
239 def normalize_file(filename, output=None, in_place=False, name=None, root_version=None):
240  """
241  Reset the non-reproducible root file metadata: UUID and datimes.
242  It can also reset the initial file name stored in the file itself, but
243  (WARNING!) this may corrupt the root file.
244  """
245 
246  # open input file
247  rootfile = RawRootFile(filename)
248 
249  # adjust root version number
250  if root_version:
251  rootfile.version = root_version
252 
253  # create output file
254  if output:
255  newrootfile = open(output, 'wb')
256  elif in_place:
257  newrootfile = tempfile.TemporaryFile()
258  else:
259  basename, ext = os.path.splitext(filename)
260  newrootfile = open(basename + '_normalized' + ext, 'wb')
261 
262  # write output file header
263  newrootfile.write(rootfile.header)
264 
265  # file name in the metadata
266  if name:
267  newname = name.encode()
268  else:
269  newname = None
270 
271  # bookkeeping of offsets, positions, and keys
272  offset = 0
273  seekfree = rootfile.seekfree
274  nbytesfree = rootfile.nbytesfree
275  nbytesname = rootfile.nbytesname
276  seekinfo = rootfile.seekinfo
277  keylist = []
278  keyskey = None
279  infokey = None
280  swap = False
281 
282  for key in rootfile:
283  # reset datime and adjust position of key
284  key.normalize(pos=newrootfile.tell())
285 
286  # Special treatment of key containing the TFile information
287  if key.showname == b'TFile':
288 
289  # if a new name is given change the name and determine the offset caused by the change of name length
290  namelen = len(key.name)
291  if newname:
292  key.name = key.filename = newname
293  offset = len(key.name) - namelen
294 
295  # apply the offset to total (2x), object, and key length
296  key.nbytes += 2*offset
297  key.objlen += offset
298  key.keylen += offset
299 
300  # recreate the header from updated data members
301  key.recreate_header()
302 
303  # recreate the key data with new name and lengths
304  buffer = len(key.name).to_bytes(1, byteorder) + key.name
305  buffer += len(key.title).to_bytes(1, byteorder) + key.title
306  buffer += key.version.to_bytes(2, byteorder)
307  buffer += (0).to_bytes(8, byteorder) # reset datimeC and datimeM
308  buffer += (key.nbyteskeys + offset).to_bytes(4, byteorder)
309  buffer += (key.nbytesname + 2*offset).to_bytes(4, byteorder)
310  wordlen = 8 if key.version > 1000 else 4
311  buffer += key.seekdir.to_bytes(wordlen, byteorder)
312  buffer += key.seekparent.to_bytes(wordlen, byteorder)
313  seekkeyspos = newrootfile.tell() + len(key.header) + len(buffer)
314  buffer += (rootfile.seekkeys + 2*offset).to_bytes(wordlen, byteorder)
315  buffer += (0).to_bytes(18, byteorder) # reset UUID
316  if key.version <= 1000:
317  buffer += (0).to_bytes(12, byteorder)
318  key.data = buffer
319 
320  else:
321  # check whether we break pointers in TTrees
322  if key.classname == b'TTree' and offset != 0:
323  B2ERROR('Changing the name of root files containing a tree is not supported.')
324  if not in_place:
325  os.remove(newrootfile.name)
326  sys.exit(1)
327 
328  # update key data in KeysList: number of keys and key headers
329  # and remember key position
330  if key.showname == b'KeysList':
331  seekkeys = newrootfile.tell()
332  buffer = len(keylist).to_bytes(4, byteorder)
333  for filekey in keylist:
334  buffer += filekey.header
335  key.data = buffer
336  keyskey = key
337  swap = (infokey is None)
338 
339  # update free segments pointer and remember key position
340  if key.showname == b'FreeSegments':
341  seekfree = newrootfile.tell()
342  pointer = int.from_bytes(key.data[2:6], byteorder) + 4*offset
343  key.data = key.data[:2] + pointer.to_bytes(4, byteorder) + key.data[6:]
344 
345  # update name in KeysList and FreeSegments
346  if key.showname in [b'KeysList', b'FreeSegments'] and newname:
347  key.name = newname
348  key.nbytes += offset
349  key.keylen += offset
350  key.recreate_header()
351 
352  # keep track of all keys for the KeysList
353  elif key.showname not in [b'StreamerInfo', b'']:
354  keylist.append(key)
355 
356  # remember streamer info key and position
357  if key.showname == b'StreamerInfo':
358  seekinfo = newrootfile.tell()
359  infokey = key
360 
361  # write the updated key, making sure the KeysList come after the StreamerInfo
362  if swap and key.showname == b'KeysList':
363  pass
364  elif swap:
365  seekinfo = newrootfile.tell()
366  infokey.normalize(pos=seekinfo)
367  newrootfile.write(infokey.header)
368  newrootfile.write(infokey.data)
369  seekkeys = newrootfile.tell()
370  keyskey.normalize(pos=seekkeys)
371  newrootfile.write(keyskey.header)
372  newrootfile.write(keyskey.data)
373  swap = False
374  else:
375  newrootfile.write(key.header)
376  newrootfile.write(key.data)
377 
378  # write the new file header
379  rootfile.normalize(end=newrootfile.tell(), seekfree=seekfree, nbytesfree=nbytesfree+offset,
380  nbytesname=nbytesname+2*offset, seekinfo=seekinfo)
381  newrootfile.seek(0)
382  newrootfile.write(rootfile.header)
383 
384  # update pointer to keyslist
385  newrootfile.seek(seekkeyspos)
386  newrootfile.write(seekkeys.to_bytes(wordlen, byteorder))
387 
388  # replace in the input file if the in-place option is used
389  if in_place:
390  del rootfile
391  newrootfile.seek(0)
392  shutil.copyfileobj(newrootfile, open(filename, 'wb'))
393 
394  newrootfile.close()
B2Tools.b2root.RawRootFile.__iter__
def __iter__(self)
Definition: b2root.py:105
B2Tools.b2root.RawRootFile.compress
compress
compression level and algorithm
Definition: b2root.py:50
B2Tools.b2root.RawRootFile.units
units
number of bytes for file pointers
Definition: b2root.py:48
B2Tools.b2root.RawRootFile.seekinfo
seekinfo
pointer to TStreamerInfo record
Definition: b2root.py:52
B2Tools.b2root.RawRootFile.nbytesname
nbytesname
number of bytes in TNamed at creation time
Definition: b2root.py:46
B2Tools.b2root.RawRootFile.Key.seekkey
seekkey
pointer to record itself (consistency check)
Definition: b2root.py:166
B2Tools.b2root.RawRootFile.Key.data
data
data bytes associated to the object
Definition: b2root.py:152
B2Tools.b2root.RawRootFile.Key.datime
datime
date and time when object was written to file
Definition: b2root.py:162
B2Tools.b2root.RawRootFile
Definition: b2root.py:14
B2Tools.b2root.RawRootFile.seekfree
seekfree
pointer to FREE data record
Definition: b2root.py:40
B2Tools.b2root.RawRootFile.Key.large
large
are file location pointers 4 or 8 bits?
Definition: b2root.py:123
B2Tools.b2root.RawRootFile.header
header
binary header
Definition: b2root.py:59
B2Tools.b2root.RawRootFile.wordlen
wordlen
size of file location pointers
Definition: b2root.py:34
B2Tools.b2root.RawRootFile.Key.normalize
def normalize(self, pos=None, offset=None)
Definition: b2root.py:156
B2Tools.b2root.RawRootFile.uuid
uuid
universal unique ID
Definition: b2root.py:56
B2Tools.b2root.RawRootFile.nbytesinfo
nbytesinfo
number of bytes in TStreamerInfo record
Definition: b2root.py:54
B2Tools.b2root.RawRootFile.Key.get_int
def get_int(self, data, pos, wordlen=4)
Definition: b2root.py:188
B2Tools.b2root.RawRootFile.__del__
def __del__(self)
Definition: b2root.py:64
B2Tools.b2root.RawRootFile.normalize
def normalize(self, end=None, seekfree=None, nbytesfree=None, nbytesname=None, seekinfo=None)
Definition: b2root.py:72
B2Tools.b2root.RawRootFile.end
end
pointer to first free word at the EOF
Definition: b2root.py:38
B2Tools.b2root.RawRootFile.Key.get_string
def get_string(self, data, pos)
Definition: b2root.py:195
B2Tools.b2root.RawRootFile.Key.recreate_header
def recreate_header(self)
Definition: b2root.py:171
B2Tools.b2root.RawRootFile.__init__
def __init__(self, filename)
Definition: b2root.py:19
B2Tools.b2root.RawRootFile.Key.showname
showname
name to show in the list of keys
Definition: b2root.py:154
B2Tools.b2root.RawRootFile.nbytesfree
nbytesfree
number of bytes in FREE data record
Definition: b2root.py:42
B2Tools.b2root.RawRootFile.large
large
are file location pointers 4 or 8 bits?
Definition: b2root.py:32
B2Tools.b2root.RawRootFile.Key.header
header
length of compressed object (in bytes)
Definition: b2root.py:150
B2Tools.b2root.RawRootFile.version
version
root/file format version
Definition: b2root.py:30
B2Tools.b2root.RawRootFile.begin
begin
pointer to first data record
Definition: b2root.py:36
B2Tools.b2root.RawRootFile.nfree
nfree
number of free data records
Definition: b2root.py:44
B2Tools.b2root.RawRootFile.Key
Definition: b2root.py:112
B2Tools.b2root.RawRootFile.seekkeys
seekkeys
pointer to KeysList record
Definition: b2root.py:62
B2Tools.b2root.RawRootFile.Key.__init__
def __init__(self, keydata, large=False)
Definition: b2root.py:117
B2Tools.b2root.RawRootFile.rootfile
rootfile
binary root file
Definition: b2root.py:25
B2Tools.b2root.RawRootFile.__next__
def __next__(self)
Definition: b2root.py:203
B2Tools.b2root.RawRootFile.Key.wordlen
wordlen
size of file location pointers
Definition: b2root.py:125