Belle II Software  release-08-01-10
caf_pxd_datamcgain.py
1 
8 
9 # This steering file computes PXD calibrations for energy loss (gain). The script uses the CAF framework.
10 # This script uses by default the SequentialRunByRun strategy and is designed to try to compute new calibration
11 # constants regularly to follow temporal changes of the PXD.
12 #
13 # There are options to:
14 # * obtain MC charge (typically done only once per given geometry & beam condition
15 # * obtain Data charge (for studies and cross checks)
16 # * derive gain calibration constants (Data/MC ratio) from Data charge computed on-the-fly and
17 # MC charge from existing DB
18 #
19 # (0) Prerequisite
20 #
21 # * Hot pixel masks should be available for the given iov range, either in GT or in the specified localDB.
22 #
23 # (1) Preparation for Data
24 #
25 # The script allows you to create a list of runs that will be ignored in the calibration.
26 # Use this list for known bad runs.
27 # By default, "RunListCosmics" and "RunListBeam" files on KEKCC are read in to skip these runs.
28 # (Best to avoid these runs for calibration unless there exist corresponding MC samples to model them.)
29 #
30 # Before calibration, you have to put a file to IoV mapping for Data called 'file_iov_map.pkl' in your
31 # working directory. The mapping file should map file locations to IoV ranges and allows to
32 # logically address data by experiment and run numbers. You can create such a map file by using
33 # the tool b2caf-filemap. See also the option --help.
34 #
35 # b2caf-filemap -m raw -p "/hsm/belle2/bdata/Data/Raw/e0008/r*/**/*.root"
36 #
37 # (2) Preparation for MC
38 #
39 # The next step is to prepare mc runs containing PXDSimHits. That setup for the simulation
40 # should mimic the situation of beam data as closely as possible.
41 # In this version of calibration, mc file is created only once for the entire iov range.
42 #
43 # basf2 submit_create_mcruns.py -- --backend='local' --outputdir='pxd_mc_phase3' --runLow=0 --runHigh=0 --expNo=7
44 #
45 # The scripts submits the creation of mc runs to a CAF.backend (here local) for all for given run range. Runs in the
46 # specified runs but not found in 'file_iov_map.pkl' will be skipped. The simulated runs will be collected in the folder
47 # outputdir. It is best to create another mapping file for the mc data.
48 #
49 # b2caf-filemap -m metadata -p --output-file "dummy_mc_file_iov_map.pkl" "pxd_mc_phase3/*.root"
50 #
51 # (3) Run charge calibration (measurement)
52 #
53 # A stand-alone process to collect charge and creating Cluster Charge payloads for MC is needed before
54 # running gain calibration on data. This needs to be done once for a given sample and the DB listing has to be
55 # modified by hand to make it valid for the calibration range for data.
56 #
57 # basf2 caf_pxd_datamcgain.py -- --runLow=0 --runHigh=0 --expNo=7 --mcOnly
58 #
59 # Check the output in a folder 'pxd_calibration_results_e7_range_0_0' and modify outputdb/database.txt
60 # such that the iov covers the entire range, e.g. 7 0 -1 -1
61 #
62 # One can run the above script with --dataOnly to get Cluster Charge payloads for data without computing gain calibration.
63 #
64 # (4) Run gain calibration
65 #
66 # Finally, a CAF script for the calibration needs to be started:
67 #
68 # basf2 caf_pxd_datamcgain.py -- --runLow=4120 --runHigh=4120 --expNo=7 /
69 # --localDB="/hsm/belle2/bdata/group/detector/PXD/calibration/mcdb_median/database.txt"
70 #
71 # The results will be collected in a folder 'pxd_calibration_results_eE_range_XY'. In order to complete the
72 # process, the check and uploads the outputdbs to a global tag (GT).
73 #
74 # b2conditionsdb upload pxd_calibration ./database.txt
75 #
76 # The option --help provides extensive help for the b2conditionsdb tool.
77 
78 
79 import argparse
80 from tracking import add_tracking_reconstruction
81 from rawdata import add_unpackers
82 from caf.strategies import SequentialRunByRun
83 from caf.utils import CentralDatabase
84 from caf.utils import LocalDatabase
85 from caf.utils import ExpRun, IoV
86 from caf.backends import LSF
87 from caf.framework import Calibration, CAF
88 from ROOT.Belle2 import PXDDataMCGainCalibrationAlgorithm
89 import ROOT
90 import pickle
91 import basf2 as b2
92 b2.set_log_level(b2.LogLevel.INFO)
93 
94 
95 parser = argparse.ArgumentParser(
96  description="Compute gain correction maps for PXD from beam data")
97 parser.add_argument(
98  '--runLow',
99  default=0,
100  type=int,
101  help='Compute mask for specific IoV')
102 parser.add_argument('--runHigh', default=-1, type=int,
103  help='Compute mask for specific IoV')
104 parser.add_argument(
105  '--expNo',
106  default=3,
107  type=int,
108  help='Compute mask for specific IoV')
109 parser.add_argument('--maxSubRuns', default=-1, type=int,
110  help='Maximum number of subruns to use')
111 parser.add_argument('--localDB', default="", type=str,
112  help='path to local DB database.txt')
113 parser.add_argument(
114  '--ignoreRuns',
115  default="DefaultFromKEKCC",
116  type=str,
117  help='Full paths to list of runs to ignore, separate multiple by a comma , ')
118 parser.add_argument(
119  '--mcOnly',
120  dest='mcOnly',
121  action="store_true",
122  help='Run charge calibration for MC only, otherwise specify --dataOnly \
123  or run the full gain calibration on data using MC charge from DB')
124 parser.add_argument(
125  '--dataOnly',
126  dest='dataOnly',
127  action="store_true",
128  help='Run charge calibration for Data only, otherwise specify --mcOnly \
129  or run the full gain calibration on data using MC charge from DB')
130 parser.add_argument(
131  '--useTrackClusters',
132  default=0,
133  type=int,
134  help='Flag to use track matched clusters (=1) and apply theta angle projection to cluster charge (=2)')
135 
136 args = parser.parse_args()
137 
138 ROOT.gROOT.SetBatch(True)
139 
140 
143 
144 # Set the IoV range for this calibration
145 iov_to_calibrate = IoV(
146  exp_low=args.expNo,
147  run_low=args.runLow,
148  exp_high=args.expNo,
149  run_high=args.runHigh)
150 
151 input_files = []
152 pxd_ignore_run_list = []
153 
154 if args.mcOnly:
155  # Access files_to_iovs for MC runs
156  with open("mc_file_iov_map.pkl", 'br') as map_file:
157  files_to_iovs = pickle.load(map_file)
158  input_files = list(files_to_iovs.keys())
159  print(f'Number selected mc input files: {len(input_files)}')
160 
161 else:
162 
163  # odd runs to ignore for data
164  pxd_ignore_run_list = [ExpRun(3, 484), ExpRun(3, 485), ExpRun(3, 486), ExpRun(3, 524), # from Phase2
165  ExpRun(
166  7, 1000), # 19 modules excluded, and 1 remaining module with high occupancy
167  # problem processing one file, anyway a short 'debug' beam run
168  ExpRun(8, 106),
169  ExpRun(8, 676), # LER beam lost at early point
170  ]
171 
172  # load ignore run list
173  ignoreRuns = args.ignoreRuns
174  if "DefaultFromKEKCC" in ignoreRuns:
175  dirkekcc = f"/hsm/belle2/bdata/Data/Raw/e000{args.expNo}/"
176  ignoreRuns = dirkekcc + "RunListBeam," + dirkekcc + "RunListCosmic"
177  if ignoreRuns:
178  for flist in ignoreRuns.split(","):
179  fignore = open(flist)
180  line = fignore.readline()
181  while line:
182  run = line[0:line.find(',')]
183  pxd_ignore_run_list.append(ExpRun(args.expNo, int(run)))
184  line = fignore.readline()
185  fignore.close()
186 
187  print('List of ignored runs')
188  print(pxd_ignore_run_list)
189 
190  # Access files_to_iovs for beam runs
191  with open("file_iov_map.pkl", 'br') as map_file:
192  files_to_iovs = pickle.load(map_file)
193 
194  input_file_iov_set = set(files_to_iovs.values())
195  for file_iov in input_file_iov_set:
196  if iov_to_calibrate.contains(file_iov):
197  subruns = [k for k, v in files_to_iovs.items() if v == file_iov]
198  input_files.extend(subruns[:args.maxSubRuns])
199 
200  print(f'Number selected data input files: {len(input_files)}')
201 
202 
203 
206 
207 # Charge collector for MC or data
208 
209 charge_collector = b2.register_module("PXDClusterChargeCollector")
210 charge_collector.param("granularity", "run")
211 charge_collector.param("minClusterCharge", 8)
212 charge_collector.param("minClusterSize", 2)
213 charge_collector.param("maxClusterSize", 6)
214 charge_collector.param("nBinsU", 4)
215 charge_collector.param("nBinsV", 6)
216 # For gain calibration, collect charge for data and use MC charge from DB
217 if not args.mcOnly and not args.dataOnly:
218  charge_collector.param(
219  "chargePayloadName",
220  "PXDMCClusterChargeMapPar") # MC from DB
221 charge_collector.param("fillChargeHistogram", True)
222 charge_collector.param("matchTrack", args.useTrackClusters)
223 
224 # The pre collector path for MC or data
225 
226 pre_charge_collector_path = b2.create_path()
227 pre_charge_collector_path.add_module("Gearbox")
228 pre_charge_collector_path.add_module("Geometry")
229 if args.mcOnly:
230  # only needed when starting from PXDSimHits
231  pre_charge_collector_path.add_module("PXDDigitizer")
232  if args.useTrackClusters:
233  add_tracking_reconstruction(pre_charge_collector_path)
234  else:
235  pre_charge_collector_path.add_module("PXDClusterizer")
236 else:
237  if args.useTrackClusters:
238  add_unpackers(pre_charge_collector_path, ['PXD', 'SVD', 'CDC'])
239  add_tracking_reconstruction(pre_charge_collector_path)
240  else:
241  add_unpackers(pre_charge_collector_path, ['PXD'])
242  pre_charge_collector_path.add_module("PXDClusterizer")
243 
244 for module in pre_charge_collector_path.modules():
245  if module.name() == 'Geeometry':
246  module.param('excludedComponents', ['ECL', 'KLM', 'TOP'])
247  if module.name() == 'PXDUnpacker':
248  module.param('SuppressErrorMask', 0xffffffff)
249  if module.name() == 'PXDPostErrorChecker':
250  module.param('CriticalErrorMask', 0)
251 
252 
255 
256 datamc_algo = PXDDataMCGainCalibrationAlgorithm()
257 
258 # We can play around with algo parameters
259 # Minimum number of collected clusters for estimating gains
260 datamc_algo.minClusters = 5000
261 # Artificial noise sigma for smearing cluster charge
262 datamc_algo.noiseSigma = 0.0
263 # Force continue algorithm instead of c_notEnoughData, set True for Cosmics
264 datamc_algo.forceContinue = False
265 datamc_algo.strategy = 0 # 0: medians, 1: landau fit
266 if args.mcOnly or args.dataOnly:
267  # only estimate charge MPV from median or landau fit
268  datamc_algo.doCalibration = False
269  if args.mcOnly:
270  # payload name to store on DB for MC
271  datamc_algo.chargePayloadName = "PXDMCClusterChargeMapPar"
272  if args.dataOnly:
273  # payload name to store on DB for data
274  datamc_algo.chargePayloadName = "PXDClusterChargeMapPar"
275 else:
276  # do gain calibration on data against MC from DB
277  datamc_algo.doCalibration = True
278 # use histogram rather than tree input to save time
279 datamc_algo.useChargeHistogram = True
280 # We want to use a specific collector
281 datamc_algo.setPrefix("PXDClusterChargeCollector")
282 
283 # create calibration
284 charge_cal = Calibration(
285  name="PXDDataMCGainCalibrationAlgorithm",
286  collector=charge_collector,
287  algorithms=datamc_algo,
288  input_files=input_files,
289  pre_collector_path=pre_charge_collector_path,
290  database_chain=[
291  CentralDatabase("data_reprocessing_prompt"),
292  CentralDatabase("pxd_calibration"),
293  LocalDatabase(
294  args.localDB)])
295 
296 # Apply the map to this calibration, now the CAF doesn't have to do it
297 charge_cal.files_to_iovs = files_to_iovs
298 
299 # Here we set the AlgorithmStrategy
300 charge_cal.strategies = SequentialRunByRun
301 charge_cal.max_files_per_collector_job = 1
302 
303 charge_cal.algorithms[0].params["iov_coverage"] = iov_to_calibrate
304 charge_cal.ignored_runs = pxd_ignore_run_list
305 
306 
309 
310 # create a CAF instance and add the calibration
311 cal_fw = CAF()
312 cal_fw.add_calibration(charge_cal)
313 cal_fw.backend = LSF() # KEKCC batch
314 # cal_fw.backend = backends.Local(max_processes=20) # interactive
315 cal_fw.output_dir = f'pxd_calibration_results_e{args.expNo}_range_{args.runLow}_{args.runHigh}'
316 cal_fw.run(iov=iov_to_calibrate)