Belle II Software  release-06-00-14
millepede_calibration.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 
11 
12 import basf2
13 
14 from caf.framework import Calibration, CentralDatabase, Collection, LocalDatabase
15 from caf import strategies
16 
17 import os
18 
19 import alignment.parameters as parameters # noqa
20 import alignment.constraints # noqa
21 
22 import alignment.collections # noqa
23 from alignment.collections import make_collection # noqa
24 
25 
26 def limit_file_events(calibration, collection_limits):
27  for colname, max_events in collection_limits.items():
28  basf2.set_module_parameters(
29  calibration.collections[colname].pre_collector_path,
30  'RootInput',
31  entrySequences=[f'0:{max_events}'])
32 
33 
34 def collect(calibration, collection, input_files, output_file='CollectorOutput.root', basf2_args=None, bsub=False):
35  """
36  Standalone collection for calibration (without CAF)
37  (experimental)
38 
39  Pickles the reprocessing path and collector of given collection and runs it in separate process
40  to collect calibration data in output_file from input_files.
41 
42  Parameters
43  ----------
44  calibration : caf.framework.Calibration
45  The configured Millepede calibration (see create(...)) to use for collection
46  collection : str
47  Collection name which should be collected (which re-processing path and collector to use)
48  input_files : list(str)
49  List of input files for this collection job
50  output_file : str
51  Name of output collector file with data/histograms (produced by HistoManager)
52  basf2_args : dict
53  Additional arguments to pass to basf2 job
54  """
55  if basf2_args is None:
56  basf2_args = []
57 
58  import pickle
59  import subprocess
60 
61  main = calibration.collections[collection].pre_collector_path
62 
63  tmp = basf2.Path()
64  for m in main.modules():
65  if m.name() == 'RootInput':
66  m.param('inputFileNames', input_files)
67  tmp.add_module('HistoManager', histoFileName=output_file)
68  tmp.add_module(m)
69  main = tmp
70  main.add_module(calibration.collections[collection].collector)
71 
72  path_file_name = calibration.name + '.' + collection + '.' + output_file + '.path'
73  with open(path_file_name, 'bw') as serialized_path_file:
74  pickle.dump(basf2.pickle_path.serialize_path(main), serialized_path_file)
75 
76  if bsub:
77  subprocess.call(["bsub", "-o", output_file + ".txt", "basf2", "--execute-path", path_file_name] + basf2_args)
78  else:
79  subprocess.call(["basf2", "--execute-path", path_file_name] + basf2_args)
80 
81  return os.path.abspath(output_file)
82 
83 
84 def calibrate(calibration, input_files=None, iteration=0):
85  """
86  Execute the algorithm from configured Millepede calibration over collected
87  files in input_files. The pre_algorithm function is run before the algorithm.
88 
89  Parameters
90  ----------
91  calibration : caf.framework.Calibration
92  Configured Millepede calibration (see create(...))
93  input_files : list(str)
94  List of input collected files
95  iteration : int
96  Iteration number to pass to pre_algorithm function
97  """
98  if input_files is None:
99  input_files = ['CollectorOutput.root']
100  """
101  Execute algorithm of the Millepede calibration over
102  """
103  for algo in calibration.algorithms:
104  algo.algorithm.setInputFileNames(input_files)
105  algo.pre_algorithm(algo.algorithm, iteration)
106  algo.algorithm.execute()
107  algo.algorithm.commit()
108 
109 
110 def create_algorithm(dbobjects, min_entries=10, ignore_undetermined=True):
111  """
112  Create Belle2.MillepedeAlgorithm
113 
114  Parameters
115  ----------
116  dbobjects : list(str)
117  List of DB objects to calibrate - has to match collector settings
118  min_entries : int
119  Minimum number of collected entries for calibration. Algorithm will return
120  NotEnoughData is less entries collected.
121  ignore_undetermined : bool
122  Whether undetermined parameters should be ignored or the calibration should fail if any
123  """
124  import ROOT
125  from ROOT.Belle2 import MillepedeAlgorithm
126  algorithm = MillepedeAlgorithm()
127 
128  std_components = ROOT.vector('string')()
129  for component in dbobjects:
130  std_components.push_back(component)
131  algorithm.setComponents(std_components)
132 
133  algorithm.ignoreUndeterminedParams(ignore_undetermined)
134  algorithm.setMinEntries(min_entries)
135 
136  algorithm.invertSign(True)
137 
138  return algorithm
139 
140 
141 def create_commands():
142  """
143  Create default list of commands for Pede
144  """
145  cmds = []
146  cmds.append('method inversion 3 0.1')
147  cmds.append('skipemptycons')
148 
149  import multiprocessing
150  ncpus = multiprocessing.cpu_count()
151 
152  cmds.append(f'threads {ncpus} {ncpus}')
153  cmds.append('printcounts 2')
154  cmds.append('closeandreopen')
155 
156  cmds.append('hugecut 50.')
157  cmds.append('chiscut 30. 6.')
158  cmds.append('outlierdownweighting 3')
159  cmds.append('dwfractioncut 0.1')
160 
161  return cmds
162 
163 
164 def create_collector(dbobjects, **argk):
165  """
166  Create MillepedeCollector module with default configuration
167 
168  Parameters
169  ----------
170  dbobject : list(str)
171  List of database objects to be calibrated (global derivatives of others
172  will be disabled) - has to match algorithm settings
173  argk : dict
174  Dictionary of additional module parameters (can override defaults)
175 
176  Returns
177  -------
178  MillepedeCollectorModule (configured)
179  """
180  import basf2
181  m = basf2.register_module('MillepedeCollector')
182 
183  m.param('granularity', 'all')
184  # Let's enable this always - will be in effect only if BeamSpot is in dbobjects
185  m.param('calibrateVertex', True)
186  # Not yet implemeted -> alwas OFF for now
187  m.param('calibrateKinematics', False)
188  m.param('minUsedCDCHitFraction', 0.8)
189  m.param('minPValue', 0.0)
190  m.param('externalIterations', 0)
191  m.param('tracks', [])
192  m.param('fitTrackT0', True)
193  m.param('components', dbobjects)
194  m.param('useGblTree', False)
195  m.param('absFilePaths', True)
196 
197  # By default, hierarchy is disabled, because you need some constraints
198  # - Adding VXDHierarchyConstraints changes the hierarchy type to the correct one
199  m.param('hierarchyType', 0)
200 
201  m.param(argk)
202 
203  return m
204 
205 
206 def create(name,
207  dbobjects,
208  collections,
209  files=None,
210  tags=None,
211  timedep=None,
212  commands=None,
213  constraints=None,
214  fixed=None,
215  params=None,
216  min_entries=0
217  ):
218  """
219  Create the Millepede Calibration, fully configured in one call
220 
221 
222  Parameters
223  ----------
224  name : str
225  Calibration name
226  dbobjects : list(str)
227  List of database objects to calibrate, e.g. ['BeamSpot', 'VXDAlignment']
228  Note that by default all parameters of the db object are free (exceptions
229  depend on some constraint and collector configuration) and you might need to fix
230  the unwanted ones (typically higher order sensor deformations) using the 'fixed' parameter.
231 
232  collections : list(namedtuple('MillepedeCollection', ['name', 'files', 'path', 'params']))
233  List of collection definitions.
234  - name : str
235  Collection name has to math entry in 'files' dictionary.
236  - files : list(str) | None
237  Optional list of files. Can (should if not set here) be overriden by 'files' parameter
238  - path : basf2.Path
239  The reprocessing path
240  - dict(...) : additional dictionary of parameters passed to the collector.
241  This has to contain the input data sample (tracks / particles / primaryVertices ...) configuration for the collector.
242  Optionally additional arguments can be specified for the collector specific for this collection.
243  (Default arguments for all collections can be set using the 'params' parameter)
244  Use make_collection(str, path=basf2.Path, **argk) for more convenient creation of custom collections.
245  For standard collections to use, see alignment.collections
246 
247  files : dict( str -> list(str) )
248  Dictionary of lists of input file paths, key is collection name, value is list of input files.
249  NOTE: This overrides possible list of files assigned during creation of collections (if set)
250  tags : list(str)
251  List of input global tags. Can include absolute file paths to local databases (added to the chain).
252 
253  timedep : list(tuple(list(int), list(tuple(int, int, int))))
254  Time-depence configuration.
255  Each list item is 2-tuple with list of parameter numbers (use alignment.parameters to get them) and
256  the (event, run, exp) numbers at which values of these parameters can change.
257  Use with caution. Namely the first event of the lowest run in input data has to be included in (some of the)
258  event lists.
259 
260  commands : list(str | tuple(str, None))
261  List of commands for Millepede. Default commands can be overriden be specifing different values for them.
262  A command can be erased completely from the default commands if instead a ('command_name', None) is passed.
263  constraints : list(alignment.Constraints)
264  List of constraints from alignment.constraints to be used.
265  Constraints are generated by the pre-algorithm function by CAF.
266  fixed : list(int)
267  List of fixed parameters (use alignment.parameters to get them)
268 
269  params : dict
270  Dictionary of common parameters to set for collectors of all collections.
271  min_entries : int
272  Minimum entries to required by the algorithm. Returns NotEnoughData if less entries is collected.
273 
274  Returns
275  ----------
276  caf.framework.Calibration object, fully configured and ready to run.
277  You might want to set/override some options to custom values, like 'max_iterations' etc.
278  """
279 
280  print("----------------------------")
281  print(" Calibration: ", name, "")
282  print("----------------------------")
283 
284  print("- DB Objects:")
285  for objname in dbobjects:
286  print(" ", objname)
287 
288  cmds = create_commands()
289 
290  _commands = dict()
291 
292  def set_command(command):
293  spec = ''
294  if isinstance(command, tuple):
295  if not len(command) == 2:
296  raise AttributeError("Commands has to be strings or tuple ('command name', None) to remove the command")
297  spec = None
298  command = command[0]
299  words = command.split(" ")
300  cmd_name = words[0]
301 
302  if spec == '':
303  spec = command
304 
305  if spec is not None:
306  _commands[cmd_name] = spec
307  elif cmd_name in _commands:
308  del _commands[cmd_name]
309 
310  for cmd in cmds:
311  set_command(cmd)
312 
313  if commands is None:
314  commands = []
315 
316  for cmd in commands:
317  set_command(cmd)
318 
319  algo = create_algorithm(dbobjects, min_entries=min_entries)
320 
321  print("- Commands:")
322  for cmd_name, cmd in _commands.items():
323  print(" ", cmd)
324  algo.steering().command(cmd)
325 
326  if constraints is None:
327  constraints = []
328 
329  print("- Constraints:")
330  consts = constraints
331  if len(consts):
332  algo.steering().command('FortranFiles')
333  for const in consts:
334  print(" ", const.filename)
335  algo.steering().command(const.filename)
336 
337  if timedep is None:
338  timedep = []
339 
340  def gen_constraints(algorithm, iteration):
341  import basf2
342  from alignment.constraints import generate_constraints
343 
344  data_iov = algorithm.getRunRangeFromAllData().getIntervalOfValidity()
345  init_event = (0, data_iov.getRunLow(), data_iov.getExperimentLow())
346 
347  # This function runs with DB chain set up
348  # TODO: we ignore the local DBs from CAF etc., that is, the constraints
349  # are always build only from last valid central GT. As constraints are only
350  # linearizations, small alignment changes have numerical neglible effects on
351  # constraint coefficients. But we can do even better -> should be not difficult
352  # but needs much more testing.
353  # NOTE: rversed (highest priority last) order expected here
354  constraint_tags = [tag for tag in reversed(basf2.conditions.globaltags)]
355 
356  if len(consts):
357  generate_constraints(consts, timedep, constraint_tags, init_event)
358 
359  algo.steering().command('Parameters')
360 
361  def fix(labels):
362  for label in labels:
363  algo.steering().command('{} 0.0 -1.'.format(str(label)))
364 
365  if fixed is None:
366  fixed = []
367 
368  print("- Fixed parameters:", len(fixed))
369  fix(fixed)
370 
371  algo.setTimedepConfig(timedep)
372 
373  print("- Tags:")
374 
375  def make_database_chain(tags):
376  import os
377  chain = []
378  for tag in tags:
379  if os.path.exists(tag):
380  print(" Local:", os.path.abspath(tag))
381  chain.append(LocalDatabase(os.path.abspath(tag)))
382  else:
383  print(" Global:", tag)
384  chain.append(CentralDatabase(tag))
385  return chain
386 
387  dbchain = make_database_chain(tags) if tags is not None else None
388 
389  # Note the default collector and path are ignored
390  # The user is supposed to add at least one own collection
391  calibration = Calibration(name,
392  collector=None,
393  algorithms=algo,
394  input_files=None,
395  pre_collector_path=None,
396  database_chain=dbchain,
397  output_patterns=None,
398  backend_args=None
399  )
400 
401  if params is None:
402  params = dict()
403 
404  print("- Overriden common collector parameters:")
405  for parname, parval in params.items():
406  print(" ", parname, ":", parval)
407 
408  if files is None:
409  files = dict()
410 
411  print("- Collections:")
412  for col in collections:
413  colname = col.name
414  colfiles = col.files
415  path = col.path
416  args = col.params
417 
418  filelist = colfiles
419  if colname in files:
420  if colname in files:
421  filelist = files[colname]
422 
423  print(f" - {colname} ({len(filelist)} files)")
424 
425  collector = create_collector(dbobjects)
426  if params is not None:
427  collector.param(params)
428 
429  for const in consts:
430  const.configure_collector(collector)
431 
432  collector.param('timedepConfig', timedep)
433 
434  for argname, argval in args.items():
435  print(" ", argname, " : ", argval)
436 
437  collector.param(args)
438 
439  collection = Collection(collector=collector,
440  input_files=filelist,
441  pre_collector_path=path,
442  database_chain=dbchain)
443 
444  calibration.add_collection(colname, collection)
445 
446  calibration.strategies = strategies.SingleIOV
447  calibration.max_iterations = 1
448 
449  calibration.pre_algorithms = gen_constraints
450 
451  print("----------------------------")
452 
453  return calibration
def serialize_path(path)
Definition: pickle_path.py:94