Belle II Software  release-05-01-25
caf_pxd_datamcgain.py
1 # This steering file computes PXD calibrations for energy loss (gain). The script uses the CAF framework.
2 # This script uses by default the SequentialRunByRun strategy and is designed to try to compute new calibration
3 # constants regularly to follow temporal changes of the PXD.
4 #
5 # There are options to:
6 # * obtain MC charge (typically done only once per given geometry & beam condition
7 # * obtain Data charge (for studies and cross checks)
8 # * derive gain calibration constants (Data/MC ratio) from Data charge computed on-the-fly and
9 # MC charge from existing DB
10 #
11 # (0) Prerequisite
12 #
13 # * Hot pixel masks should be available for the given iov range, either in GT or in the specified localDB.
14 #
15 # (1) Preparation for Data
16 #
17 # The script allows you to create a list of runs that will be ignored in the calibration.
18 # Use this list for known bad runs.
19 # By default, "RunListCosmics" and "RunListBeam" files on KEKCC are read in to skip these runs.
20 # (Best to avoid these runs for calibration unless there exist corresponding MC samples to model them.)
21 #
22 # Before calibration, you have to put a file to IoV mapping for Data called 'file_iov_map.pkl' in your
23 # working directory. The mapping file should map file locations to IoV ranges and allows to
24 # logically address data by experiment and run numbers. You can create such a map file by using
25 # the tool b2caf-filemap. See also the option --help.
26 #
27 # b2caf-filemap -m raw -p "/hsm/belle2/bdata/Data/Raw/e0008/r*/**/*.root"
28 #
29 # (2) Preparation for MC
30 #
31 # The next step is to prepare mc runs containing PXDSimHits. That setup for the simulation
32 # should mimic the situation of beam data as closely as possible.
33 # In this version of calibration, mc file is created only once for the entire iov range.
34 #
35 # basf2 submit_create_mcruns.py -- --backend='local' --outputdir='pxd_mc_phase3' --runLow=0 --runHigh=0 --expNo=7
36 #
37 # The scripts submits the creation of mc runs to a CAF.backend (here local) for all for given run range. Runs in the
38 # specified runs but not found in 'file_iov_map.pkl' will be skipped. The simulated runs will be collected in the folder
39 # outputdir. It is best to create another mapping file for the mc data.
40 #
41 # b2caf-filemap -m metadata -p --output-file "dummy_mc_file_iov_map.pkl" "pxd_mc_phase3/*.root"
42 #
43 # (3) Run charge calibration (measurement)
44 #
45 # A stand-alone process to collect charge and creating Cluster Charge payloads for MC is needed before
46 # running gain calibration on data. This needs to be done once for a given sample and the DB listing has to be
47 # modified by hand to make it valid for the calibration range for data.
48 #
49 # basf2 caf_pxd_datamcgain.py -- --runLow=0 --runHigh=0 --expNo=7 --mcOnly
50 #
51 # Check the output in a folder 'pxd_calibration_results_e7_range_0_0' and modify outputdb/database.txt
52 # such that the iov covers the entire range, e.g. 7 0 -1 -1
53 #
54 # One can run the above script with --dataOnly to get Cluster Charge payloads for data without computing gain calibration.
55 #
56 # (4) Run gain calibration
57 #
58 # Finally, a CAF script for the calibration needs to be started:
59 #
60 # basf2 caf_pxd_datamcgain.py -- --runLow=4120 --runHigh=4120 --expNo=7 /
61 # --localDB="/hsm/belle2/bdata/group/detector/PXD/calibration/mcdb_median/database.txt"
62 #
63 # The results will be collected in a folder 'pxd_calibration_results_eE_range_XY'. In order to complete the
64 # process, the check and uploads the outputdbs to a global tag (GT).
65 #
66 # b2conditionsdb upload pxd_calibration ./database.txt
67 #
68 # The option --help provides extensive help for the b2conditionsdb tool.
69 #
70 # author: benjamin.schwenker@pyhs.uni-goettingen.de, maiko.takahashi@desy.de
71 
72 
73 from basf2 import *
74 set_log_level(LogLevel.INFO)
75 
76 import pickle
77 import glob
78 import os
79 import ROOT
80 
81 from ROOT.Belle2 import PXDDataMCGainCalibrationAlgorithm
82 from caf.framework import Calibration, CAF
83 from caf import backends
84 from caf.backends import LSF
85 from caf.utils import ExpRun, IoV
86 from caf.utils import get_iov_from_file
87 from caf.utils import find_absolute_file_paths
88 from caf.utils import LocalDatabase
89 from caf.utils import CentralDatabase
90 from caf.strategies import SequentialRunByRun, SingleIOV, SimpleRunByRun
91 from rawdata import add_unpackers
92 from tracking import add_tracking_reconstruction
93 
94 import argparse
95 parser = argparse.ArgumentParser(description="Compute gain correction maps for PXD from beam data")
96 parser.add_argument('--runLow', default=0, type=int, help='Compute mask for specific IoV')
97 parser.add_argument('--runHigh', default=-1, type=int, help='Compute mask for specific IoV')
98 parser.add_argument('--expNo', default=3, type=int, help='Compute mask for specific IoV')
99 parser.add_argument('--maxSubRuns', default=-1, type=int, help='Maximum number of subruns to use')
100 parser.add_argument('--localDB', default="", type=str, help='path to local DB database.txt')
101 parser.add_argument('--ignoreRuns', default="DefaultFromKEKCC", type=str,
102  help='Full paths to list of runs to ignore, separate multiple by a comma , ')
103 parser.add_argument('--mcOnly', dest='mcOnly', action="store_true",
104  help='Run charge calibration for MC only, otherwise specify --dataOnly \
105  or run the full gain calibration on data using MC charge from DB')
106 parser.add_argument('--dataOnly', dest='dataOnly', action="store_true",
107  help='Run charge calibration for Data only, otherwise specify --mcOnly \
108  or run the full gain calibration on data using MC charge from DB')
109 parser.add_argument('--useTrackClusters', default=0, type=int,
110  help='Flag to use track matched clusters (=1) and apply theta angle projection to cluster charge (=2)')
111 
112 args = parser.parse_args()
113 
114 ROOT.gROOT.SetBatch(True)
115 
116 
119 
120 # Set the IoV range for this calibration
121 iov_to_calibrate = IoV(exp_low=args.expNo, run_low=args.runLow, exp_high=args.expNo, run_high=args.runHigh)
122 
123 input_files = []
124 pxd_ignore_run_list = []
125 
126 if args.mcOnly:
127  # Access files_to_iovs for MC runs
128  with open("mc_file_iov_map.pkl", 'br') as map_file:
129  files_to_iovs = pickle.load(map_file)
130  input_files = list(files_to_iovs.keys())
131  print('Number selected mc input files: {}'.format(len(input_files)))
132 
133 else:
134 
135  # odd runs to ignore for data
136  pxd_ignore_run_list = [ExpRun(3, 484), ExpRun(3, 485), ExpRun(3, 486), ExpRun(3, 524), # from Phase2
137  ExpRun(7, 1000), # 19 modules excluded, and 1 remaining module with high occupancy
138  ExpRun(8, 106), # problem processing one file, anyway a short 'debug' beam run
139  ExpRun(8, 676), # LER beam lost at early point
140  ]
141 
142  # load ignore run list
143  ignoreRuns = args.ignoreRuns
144  if "DefaultFromKEKCC" in ignoreRuns:
145  dirkekcc = "/hsm/belle2/bdata/Data/Raw/e000{}/".format(args.expNo)
146  ignoreRuns = dirkekcc + "RunListBeam," + dirkekcc + "RunListCosmic"
147  if ignoreRuns:
148  for flist in ignoreRuns.split(","):
149  fignore = open(flist, 'r')
150  line = fignore.readline()
151  while line:
152  run = line[0:line.find(',')]
153  pxd_ignore_run_list.append(ExpRun(args.expNo, int(run)))
154  line = fignore.readline()
155  fignore.close()
156 
157  print('List of ignored runs')
158  print(pxd_ignore_run_list)
159 
160  # Access files_to_iovs for beam runs
161  with open("file_iov_map.pkl", 'br') as map_file:
162  files_to_iovs = pickle.load(map_file)
163 
164  input_file_iov_set = set(files_to_iovs.values())
165  for file_iov in input_file_iov_set:
166  if iov_to_calibrate.contains(file_iov):
167  subruns = [k for k, v in files_to_iovs.items() if v == file_iov]
168  input_files.extend(subruns[:args.maxSubRuns])
169 
170  print('Number selected data input files: {}'.format(len(input_files)))
171 
172 
173 
176 
177 # Charge collector for MC or data
178 
179 charge_collector = register_module("PXDClusterChargeCollector")
180 charge_collector.param("granularity", "run")
181 charge_collector.param("minClusterCharge", 8)
182 charge_collector.param("minClusterSize", 2)
183 charge_collector.param("maxClusterSize", 6)
184 charge_collector.param("nBinsU", 4)
185 charge_collector.param("nBinsV", 6)
186 # For gain calibration, collect charge for data and use MC charge from DB
187 if not args.mcOnly and not args.dataOnly:
188  charge_collector.param("chargePayloadName", "PXDMCClusterChargeMapPar") # MC from DB
189 charge_collector.param("fillChargeHistogram", True)
190 charge_collector.param("matchTrack", args.useTrackClusters)
191 
192 # The pre collector path for MC or data
193 
194 pre_charge_collector_path = create_path()
195 pre_charge_collector_path.add_module("Gearbox")
196 pre_charge_collector_path.add_module("Geometry")
197 if args.mcOnly:
198  pre_charge_collector_path.add_module("PXDDigitizer") # only needed when starting from PXDSimHits
199  if args.useTrackClusters:
200  add_tracking_reconstruction(pre_charge_collector_path)
201  else:
202  pre_charge_collector_path.add_module("PXDClusterizer")
203 else:
204  if args.useTrackClusters:
205  add_unpackers(pre_charge_collector_path, ['PXD', 'SVD', 'CDC'])
206  add_tracking_reconstruction(pre_charge_collector_path)
207  else:
208  add_unpackers(pre_charge_collector_path, ['PXD'])
209  pre_charge_collector_path.add_module("PXDClusterizer")
210 
211 for module in pre_charge_collector_path.modules():
212  if module.name() == 'Geeometry':
213  module.param('excludedComponents', ['ECL', 'KLM', 'TOP'])
214  if module.name() == 'PXDUnpacker':
215  module.param('SuppressErrorMask', 0xffffffff)
216  if module.name() == 'PXDPostErrorChecker':
217  module.param('CriticalErrorMask', 0)
218 
219 
222 
223 datamc_algo = PXDDataMCGainCalibrationAlgorithm()
224 
225 # We can play around with algo parameters
226 datamc_algo.minClusters = 5000 # Minimum number of collected clusters for estimating gains
227 datamc_algo.noiseSigma = 0.0 # Artificial noise sigma for smearing cluster charge
228 datamc_algo.forceContinue = False # Force continue algorithm instead of c_notEnoughData, set True for Cosmics
229 datamc_algo.strategy = 0 # 0: medians, 1: landau fit
230 if args.mcOnly or args.dataOnly:
231  datamc_algo.doCalibration = False # only estimate charge MPV from median or landau fit
232  if args.mcOnly:
233  datamc_algo.chargePayloadName = "PXDMCClusterChargeMapPar" # payload name to store on DB for MC
234  if args.dataOnly:
235  datamc_algo.chargePayloadName = "PXDClusterChargeMapPar" # payload name to store on DB for data
236 else:
237  datamc_algo.doCalibration = True # do gain calibration on data against MC from DB
238 datamc_algo.useChargeHistogram = True # use histogram rather than tree input to save time
239 # We want to use a specific collector
240 datamc_algo.setPrefix("PXDClusterChargeCollector")
241 
242 # create calibration
243 charge_cal = Calibration(
244  name="PXDDataMCGainCalibrationAlgorithm",
245  collector=charge_collector,
246  algorithms=datamc_algo,
247  input_files=input_files,
248  pre_collector_path=pre_charge_collector_path,
249  database_chain=[CentralDatabase("data_reprocessing_prompt"), CentralDatabase("pxd_calibration"), LocalDatabase(args.localDB)]
250 )
251 
252 # Apply the map to this calibration, now the CAF doesn't have to do it
253 charge_cal.files_to_iovs = files_to_iovs
254 
255 # Here we set the AlgorithmStrategy
256 charge_cal.strategies = SequentialRunByRun
257 charge_cal.max_files_per_collector_job = 1
258 
259 charge_cal.algorithms[0].params["iov_coverage"] = iov_to_calibrate
260 charge_cal.ignored_runs = pxd_ignore_run_list
261 
262 
265 
266 # create a CAF instance and add the calibration
267 cal_fw = CAF()
268 cal_fw.add_calibration(charge_cal)
269 cal_fw.backend = LSF() # KEKCC batch
270 # cal_fw.backend = backends.Local(max_processes=20) # interactive
271 cal_fw.output_dir = 'pxd_calibration_results_e{}_range_{}_{}'.format(args.expNo, args.runLow, args.runHigh)
272 cal_fw.run(iov=iov_to_calibrate)
Calibration
Definition: Calibration.py:1