Belle II Software  release-08-01-10
b2root.py
1 #!/usr/bin/env python3
2 
3 
10 
11 import os
12 import sys
13 import tempfile
14 import shutil
15 from basf2 import B2ERROR
16 
17 
18 byteorder = 'big'
19 
20 
22  """
23  Interface to binary root file content
24  """
25 
26  def __init__(self, filename):
27  """
28  Open a root file and read its header
29  """
30 
31 
32  self.rootfilerootfile = open(filename, 'rb')
33  root = self.rootfilerootfile.read(4)
34  if root != b'root':
35  raise Exception("%s is not a root file" % filename)
36 
37  self.versionversion = int.from_bytes(self.rootfilerootfile.read(4), byteorder)
38 
39  self.largelarge = (self.versionversion >= 1000000)
40 
41  self.wordlenwordlen = 8 if self.largelarge else 4
42 
43  self.beginbegin = int.from_bytes(self.rootfilerootfile.read(4), byteorder)
44 
45  self.endend = int.from_bytes(self.rootfilerootfile.read(self.wordlenwordlen), byteorder)
46 
47  self.seekfreeseekfree = int.from_bytes(self.rootfilerootfile.read(self.wordlenwordlen), byteorder)
48 
49  self.nbytesfreenbytesfree = int.from_bytes(self.rootfilerootfile.read(4), byteorder)
50 
51  self.nfreenfree = int.from_bytes(self.rootfilerootfile.read(4), byteorder)
52 
53  self.nbytesnamenbytesname = int.from_bytes(self.rootfilerootfile.read(4), byteorder)
54 
55  self.unitsunits = int.from_bytes(self.rootfilerootfile.read(1), byteorder)
56 
57  self.compresscompress = int.from_bytes(self.rootfilerootfile.read(4), byteorder)
58 
59  self.seekinfoseekinfo = int.from_bytes(self.rootfilerootfile.read(self.wordlenwordlen), byteorder)
60 
61  self.nbytesinfonbytesinfo = int.from_bytes(self.rootfilerootfile.read(self.wordlenwordlen), byteorder)
62 
63  self.uuiduuid = self.rootfilerootfile.read(18)
64  self.rootfilerootfile.seek(0)
65 
66  self.headerheader = self.rootfilerootfile.read(self.beginbegin)
67  self.rootfilerootfile.seek(self.beginbegin)
68 
69  self.seekkeysseekkeys = 0
70 
71  def __del__(self):
72  """
73  Close file
74  """
75 
76  if hasattr(self, 'rootfile'):
77  self.rootfilerootfile.close()
78 
79  def normalize(self, end=None, seekfree=None, nbytesfree=None, nbytesname=None, seekinfo=None):
80  """
81  Set UUID to zero and adjust pointers that are given as arguments
82  """
83 
84  if end:
85  self.endend = end
86  if seekfree:
87  self.seekfreeseekfree = seekfree
88  if nbytesfree:
89  self.nbytesfreenbytesfree = nbytesfree
90  if nbytesname:
91  self.nbytesnamenbytesname = nbytesname
92  if seekinfo:
93  self.seekinfoseekinfo = seekinfo
94  self.uuiduuid = b'\x00' * 18
95 
96  self.headerheader = b'root' \
97  + self.versionversion.to_bytes(4, byteorder) \
98  + self.beginbegin.to_bytes(4, byteorder) \
99  + self.endend.to_bytes(self.wordlenwordlen, byteorder) \
100  + self.seekfreeseekfree.to_bytes(self.wordlenwordlen, byteorder) \
101  + self.nbytesfreenbytesfree.to_bytes(4, byteorder) \
102  + self.nfreenfree.to_bytes(4, byteorder) \
103  + self.nbytesnamenbytesname.to_bytes(4, byteorder) \
104  + self.unitsunits.to_bytes(1, byteorder) \
105  + self.compresscompress.to_bytes(4, byteorder) \
106  + self.seekinfoseekinfo.to_bytes(self.wordlenwordlen, byteorder) \
107  + self.nbytesinfonbytesinfo.to_bytes(self.wordlenwordlen, byteorder) \
108  + self.uuiduuid
109  nzero = self.beginbegin - len(self.headerheader)
110  self.headerheader += b'\x00' * nzero
111 
112  def __iter__(self):
113  """
114  iterator interface
115  """
116 
117  return self
118 
119  class Key:
120  """
121  Root file key
122  """
123 
124  def __init__(self, keydata, large=False):
125  """
126  Obtain key header and data payload from given data and extract header information
127  """
128 
129 
130  self.largelarge = large
131 
132  self.wordlenwordlen = 8 if self.largelarge else 4
133  pos = 0
134 
135  self.nbytes, pos = self.get_intget_int(keydata, pos)
136 
137  self.version, pos = self.get_intget_int(keydata, pos, 2)
138 
139  self.objlen, pos = self.get_intget_int(keydata, pos)
140 
141  self.datimedatime, pos = keydata[pos:pos+4], pos+4
142 
143  self.keylen, pos = self.get_intget_int(keydata, pos, 2)
144 
145  self.cycle, pos = self.get_intget_int(keydata, pos, 2)
146 
147  self.seekkeyseekkey, pos = self.get_intget_int(keydata, pos, self.wordlenwordlen)
148 
149  self.seekpdir, pos = self.get_intget_int(keydata, pos, self.wordlenwordlen)
150 
151  self.classname, pos = self.get_stringget_string(keydata, pos)
152 
153  self.name, pos = self.get_stringget_string(keydata, pos)
154 
155  self.title, pos = self.get_stringget_string(keydata, pos)
156 
157  self.headerheader = keydata[:pos]
158 
159  self.datadata = keydata[pos:]
160 
161  self.shownameshowname = self.classname
162 
163  def normalize(self, pos=None, offset=None):
164  """
165  Set the key datime to zero adjust the pointer to itself if given as argument
166  """
167 
168 
169  self.datimedatime = b'\x00' * 4
170  if self.seekkeyseekkey > 0:
171  if pos:
172 
173  self.seekkeyseekkey = pos
174  elif offset:
175  self.seekkeyseekkey += offset
176  self.recreate_headerrecreate_header()
177 
178  def recreate_header(self):
179  """
180  Build the binary header information from the data members
181  """
182 
183  self.headerheader = self.nbytes.to_bytes(4, byteorder) \
184  + self.version.to_bytes(2, byteorder) \
185  + self.objlen.to_bytes(4, byteorder) \
186  + self.datimedatime \
187  + self.keylen.to_bytes(2, byteorder) \
188  + self.cycle.to_bytes(2, byteorder) \
189  + self.seekkeyseekkey.to_bytes(self.wordlenwordlen, byteorder) \
190  + self.seekpdir.to_bytes(self.wordlenwordlen, byteorder) \
191  + len(self.classname).to_bytes(1, byteorder) + self.classname \
192  + len(self.name).to_bytes(1, byteorder) + self.name \
193  + len(self.title).to_bytes(1, byteorder) + self.title
194 
195  def get_int(self, data, pos, wordlen=4):
196  """
197  Helper function to read an int from binary data
198  """
199 
200  return (int.from_bytes(data[pos:pos+wordlen], byteorder), pos+wordlen)
201 
202  def get_string(self, data, pos):
203  """
204  Helper function to read a string from binary data
205  """
206 
207  strlen = data[pos]
208  return (data[pos+1:pos+1+strlen], pos+1+strlen)
209 
210  def __next__(self):
211  """
212  Iterate to next key
213  """
214 
215  pos = self.rootfilerootfile.tell()
216  large = pos > 0x80000000
217  nbytes = int.from_bytes(self.rootfilerootfile.read(4), byteorder)
218  if (nbytes == 0):
219  raise StopIteration()
220  self.rootfilerootfile.seek(-4, 1)
221  result = self.KeyKey(self.rootfilerootfile.read(nbytes), large)
222  if pos == self.seekfreeseekfree:
223  result.showname = b'FreeSegments'
224  elif pos == self.seekinfoseekinfo:
225  result.showname = b'StreamerInfo'
226  elif pos == self.seekkeysseekkeys:
227  result.showname = b'KeysList'
228 
229  if result.showname == b'TFile':
230  result.filename, pos = result.get_string(result.data, 0)
231  result.filetitle, pos = result.get_string(result.data, pos)
232  result.version = int.from_bytes(result.data[pos:pos+2], byteorder)
233  result.nbyteskeys = int.from_bytes(result.data[pos+10:pos+14], byteorder)
234  result.nbytesname = int.from_bytes(result.data[pos+14:pos+18], byteorder)
235  if result.version > 1000:
236  result.seekdir = int.from_bytes(result.data[pos+18:pos+26], byteorder)
237  result.seekparent = int.from_bytes(result.data[pos+26:pos+34], byteorder)
238  self.seekkeysseekkeys = int.from_bytes(result.data[pos+34:pos+42], byteorder)
239  else:
240  result.seekdir = int.from_bytes(result.data[pos+18:pos+22], byteorder)
241  result.seekparent = int.from_bytes(result.data[pos+22:pos+26], byteorder)
242  self.seekkeysseekkeys = int.from_bytes(result.data[pos+26:pos+30], byteorder)
243  return result
244 
245 
246 def normalize_file(filename, output=None, in_place=False, name=None, root_version=None):
247  """
248  Reset the non-reproducible root file metadata: UUID and datimes.
249  It can also reset the initial file name stored in the file itself, but
250  (WARNING!) this may corrupt the root file.
251  """
252 
253  # open input file
254  rootfile = RawRootFile(filename)
255 
256  # adjust root version number
257  if root_version:
258  rootfile.version = root_version
259 
260  # create output file
261  if output:
262  newrootfile = open(output, 'wb')
263  elif in_place:
264  newrootfile = tempfile.TemporaryFile()
265  else:
266  basename, ext = os.path.splitext(filename)
267  newrootfile = open(basename + '_normalized' + ext, 'wb')
268 
269  # write output file header
270  newrootfile.write(rootfile.header)
271 
272  # file name in the metadata
273  if name:
274  newname = name.encode()
275  else:
276  newname = None
277 
278  # bookkeeping of offsets, positions, and keys
279  offset = 0
280  seekfree = rootfile.seekfree
281  nbytesfree = rootfile.nbytesfree
282  nbytesname = rootfile.nbytesname
283  seekinfo = rootfile.seekinfo
284  keylist = []
285  keyskey = None
286  infokey = None
287  swap = False
288 
289  for key in rootfile:
290  # reset datime and adjust position of key
291  key.normalize(pos=newrootfile.tell())
292 
293  # Special treatment of key containing the TFile information
294  if key.showname == b'TFile':
295 
296  # if a new name is given change the name and determine the offset caused by the change of name length
297  namelen = len(key.name)
298  if newname:
299  key.name = key.filename = newname
300  offset = len(key.name) - namelen
301 
302  # apply the offset to total (2x), object, and key length
303  key.nbytes += 2*offset
304  key.objlen += offset
305  key.keylen += offset
306 
307  # recreate the header from updated data members
308  key.recreate_header()
309 
310  # recreate the key data with new name and lengths
311  buffer = len(key.name).to_bytes(1, byteorder) + key.name
312  buffer += len(key.title).to_bytes(1, byteorder) + key.title
313  buffer += key.version.to_bytes(2, byteorder)
314  buffer += (0).to_bytes(8, byteorder) # reset datimeC and datimeM
315  buffer += (key.nbyteskeys + offset).to_bytes(4, byteorder)
316  buffer += (key.nbytesname + 2*offset).to_bytes(4, byteorder)
317  wordlen = 8 if key.version > 1000 else 4
318  buffer += key.seekdir.to_bytes(wordlen, byteorder)
319  buffer += key.seekparent.to_bytes(wordlen, byteorder)
320  seekkeyspos = newrootfile.tell() + len(key.header) + len(buffer)
321  buffer += (rootfile.seekkeys + 2*offset).to_bytes(wordlen, byteorder)
322  buffer += (0).to_bytes(18, byteorder) # reset UUID
323  if key.version <= 1000:
324  buffer += (0).to_bytes(12, byteorder)
325  key.data = buffer
326 
327  else:
328  # check whether we break pointers in TTrees
329  if key.classname == b'TTree' and offset != 0:
330  B2ERROR('Changing the name of root files containing a tree is not supported.')
331  if not in_place:
332  os.remove(newrootfile.name)
333  sys.exit(1)
334 
335  # update key data in KeysList: number of keys and key headers
336  # and remember key position
337  if key.showname == b'KeysList':
338  seekkeys = newrootfile.tell()
339  buffer = len(keylist).to_bytes(4, byteorder)
340  for filekey in keylist:
341  buffer += filekey.header
342  key.data = buffer
343  keyskey = key
344  swap = (infokey is None)
345 
346  # update free segments pointer and remember key position
347  if key.showname == b'FreeSegments':
348  seekfree = newrootfile.tell()
349  pointer = int.from_bytes(key.data[2:6], byteorder) + 4*offset
350  key.data = key.data[:2] + pointer.to_bytes(4, byteorder) + key.data[6:]
351 
352  # update name in KeysList and FreeSegments
353  if key.showname in [b'KeysList', b'FreeSegments'] and newname:
354  key.name = newname
355  key.nbytes += offset
356  key.keylen += offset
357  key.recreate_header()
358 
359  # keep track of all keys for the KeysList
360  elif key.showname not in [b'StreamerInfo', b'']:
361  keylist.append(key)
362 
363  # remember streamer info key and position
364  if key.showname == b'StreamerInfo':
365  seekinfo = newrootfile.tell()
366  infokey = key
367 
368  # write the updated key, making sure the KeysList come after the StreamerInfo
369  if swap and key.showname == b'KeysList':
370  pass
371  elif swap:
372  seekinfo = newrootfile.tell()
373  infokey.normalize(pos=seekinfo)
374  newrootfile.write(infokey.header)
375  newrootfile.write(infokey.data)
376  seekkeys = newrootfile.tell()
377  keyskey.normalize(pos=seekkeys)
378  newrootfile.write(keyskey.header)
379  newrootfile.write(keyskey.data)
380  swap = False
381  else:
382  newrootfile.write(key.header)
383  newrootfile.write(key.data)
384 
385  # write the new file header
386  rootfile.normalize(end=newrootfile.tell(), seekfree=seekfree, nbytesfree=nbytesfree+offset,
387  nbytesname=nbytesname+2*offset, seekinfo=seekinfo)
388  newrootfile.seek(0)
389  newrootfile.write(rootfile.header)
390 
391  # update pointer to keyslist
392  newrootfile.seek(seekkeyspos)
393  newrootfile.write(seekkeys.to_bytes(wordlen, byteorder))
394 
395  # replace in the input file if the in-place option is used
396  if in_place:
397  del rootfile
398  newrootfile.seek(0)
399  shutil.copyfileobj(newrootfile, open(filename, 'wb'))
400 
401  newrootfile.close()
large
are file location pointers 4 or 8 bits?
Definition: b2root.py:130
wordlen
size of file location pointers
Definition: b2root.py:132
def get_string(self, data, pos)
Definition: b2root.py:202
data
data bytes associated to the object
Definition: b2root.py:159
seekkey
pointer to record itself (consistency check)
Definition: b2root.py:173
showname
name to show in the list of keys
Definition: b2root.py:161
datime
date and time when object was written to file
Definition: b2root.py:169
def get_int(self, data, pos, wordlen=4)
Definition: b2root.py:195
def __init__(self, keydata, large=False)
Definition: b2root.py:124
header
length of compressed object (in bytes)
Definition: b2root.py:157
def normalize(self, pos=None, offset=None)
Definition: b2root.py:163
large
are file location pointers 4 or 8 bits?
Definition: b2root.py:39
wordlen
size of file location pointers
Definition: b2root.py:41
units
number of bytes for file pointers
Definition: b2root.py:55
seekkeys
pointer to KeysList record
Definition: b2root.py:69
nbytesfree
number of bytes in FREE data record
Definition: b2root.py:49
compress
compression level and algorithm
Definition: b2root.py:57
version
root/file format version
Definition: b2root.py:37
nbytesinfo
number of bytes in TStreamerInfo record
Definition: b2root.py:61
nfree
number of free data records
Definition: b2root.py:51
def normalize(self, end=None, seekfree=None, nbytesfree=None, nbytesname=None, seekinfo=None)
Definition: b2root.py:79
uuid
universal unique ID
Definition: b2root.py:63
def __init__(self, filename)
Definition: b2root.py:26
rootfile
binary root file
Definition: b2root.py:32
begin
pointer to first data record
Definition: b2root.py:43
seekinfo
pointer to TStreamerInfo record
Definition: b2root.py:59
header
binary header
Definition: b2root.py:66
seekfree
pointer to FREE data record
Definition: b2root.py:47
nbytesname
number of bytes in TNamed at creation time
Definition: b2root.py:53
end
pointer to first free word at the EOF
Definition: b2root.py:45