Belle II Software development
cdcdedx_validation.py
1
8
9'''
10Validation plots for CDC dedx calibration.
11'''
12
13import sys
14import os
15import json
16import matplotlib.pyplot as plt
17import pandas as pd
18import numpy as np
19from matplotlib.backends.backend_pdf import PdfPages
20import shutil
21import basf2
22import process_wiregain as pw
23import process_cosgain as pc
24import process_onedcell as oned
25import process_rungain as rg
26import re
27
28from prompt import ValidationSettings
29
30settings = ValidationSettings(name="CDC dedx",
31 description=__doc__,
32 download_files=[],
33 expert_config={
34 "GT": "data_prompt_rel09",
35 })
36
37
38def get_latest_calibration_dir(base_dir, cal_name):
39
40 latest_idx = -1
41 latest_dir = None
42
43 for idir in os.listdir(base_dir):
44
45 match = re.fullmatch(rf"{re.escape(cal_name)}(\d+)", idir)
46
47 if not match:
48 continue
49
50 idx = int(match.group(1))
51
52 if idx > latest_idx:
53 latest_idx = idx
54 latest_dir = idir
55
56 if latest_dir is None:
57 basf2.B2FATAL(f"No calibration directory found for {cal_name}")
58
59 basf2.B2INFO(f"Using latest calibration directory: {latest_dir}")
60
61 return os.path.join(base_dir, latest_dir)
62
63
64def save_to_pdf(pdf, fig):
65 fig.tight_layout()
66 pdf.savefig(fig)
67 plt.close(fig)
68
69
70def read_txt(filepath, columns, sep=r"\s+"):
71 if not os.path.exists(filepath):
72 basf2.B2ERROR(f"File not found: {filepath}")
73 return None
74 return pd.read_csv(filepath, sep=sep, header=None, names=columns)
75
76
77def make_pdf_path(prefix, suffix):
78 pdf_path = os.path.join("plots", "validation", f"{prefix}_{suffix}.pdf")
79 os.makedirs(os.path.dirname(pdf_path), exist_ok=True)
80 return pdf_path
81
82
83def get_positive_minmax(series):
84 positive = series[series > 0]
85 ymin = positive.min() if not positive.empty else series.min()
86 ymax = series.max()
87 return ymin, ymax
88
89
90def rungain_validation(path, suffix):
91 val_path = os.path.join(path, "plots", "run", f"dedx_vs_run_{suffix}.txt")
92 df = read_txt(val_path, ["run", "mean", "mean_err", "reso", "reso_err"])
93 if df is None:
94 return
95
96 df['run'] = df['run'].astype(str)
97
98 pdf_path = make_pdf_path("dedx_vs_run", suffix)
99
100 with PdfPages(pdf_path) as pdf:
101 fig, ax = plt.subplots(1, 2, figsize=(20, 6))
102 n = len(df)
103 space = max(10, min(50, int(200 / max(n, 1))))
104
105 # Mean plot
106 ymin, ymax = get_positive_minmax(df['mean'])
107 pc.hist(y_min=ymin-0.02, y_max=ymax+0.02, xlabel="Run range", ylabel="dE/dx mean", space=space, ax=ax[0])
108 ax[0].errorbar(df['run'], df['mean'], yerr=df['mean_err'], fmt='*', markersize=8, rasterized=True, label='Bhabha mean')
109 ax[0].legend(fontsize=12)
110 ax[0].set_title('dE/dx Mean vs Run', fontsize=14)
111
112 # Reso plot
113 ymin, ymax = get_positive_minmax(df['reso'])
114 pc.hist(y_min=ymin-0.01, y_max=ymax+0.01, xlabel="Run range", ylabel="dE/dx reso", space=space, ax=ax[1])
115 ax[1].errorbar(df['run'], df['reso'], yerr=df['reso_err'], fmt='*', markersize=8, rasterized=True, label='Bhabha reso')
116 ax[1].legend(fontsize=12)
117 ax[1].set_title('dE/dx Resolution vs Run', fontsize=14)
118
119 fig.suptitle("dE/dx vs Run", fontsize=20)
120 save_to_pdf(pdf, fig)
121
122
123def wiregain_validation(path, suffix):
124
125 val_path_gwire = os.path.join(path, "plots", "wire", f"dedx_mean_gwire_{suffix}.txt")
126 val_path_bwire = os.path.join(path, "plots", "wire", f"dedx_mean_badwire_{suffix}.txt")
127 val_path_layer = os.path.join(path, "plots", "wire", f"dedx_mean_layer_{suffix}.txt")
128
129 df_gwire = read_txt(val_path_gwire, ["wire", "mean"])
130 df_bwire = read_txt(val_path_bwire, ["wire", "mean"])
131 df_layer = read_txt(val_path_layer, ["layer", "mean", "gmean"])
132
133 if df_gwire is None or df_bwire is None or df_layer is None:
134 return
135
136 pdf_path = make_pdf_path("dedx_vs_wire_layer", suffix)
137
138 with PdfPages(pdf_path) as pdf:
139 fig, ax = plt.subplots(2, 2, figsize=(20, 12))
140
141 ymin, ymax = get_positive_minmax(df_gwire['mean'])
142
143 pc.hist(y_min=ymin-0.05, y_max=ymax+0.05, xlabel="Wire", ylabel="dE/dx mean", space=1000, ax=ax[0, 0])
144 ax[0, 0].plot(df_gwire['wire'], df_gwire['mean'], '*', markersize=5, rasterized=True)
145 ax[0, 0].set_title('dE/dx Mean vs good Wire', fontsize=14)
146
147 ymin, ymax = get_positive_minmax(df_bwire['mean'])
148
149 pc.hist(y_min=ymin-0.05, y_max=ymax+0.05, xlabel="Wire", ylabel="dE/dx mean", space=1000, ax=ax[1, 0])
150 ax[1, 0].plot(df_bwire['wire'], df_bwire['mean'], '*', markersize=5, rasterized=True)
151 ax[1, 0].set_title('dE/dx Mean vs bad Wire', fontsize=14)
152
153 ymin, ymax = get_positive_minmax(df_layer['mean'])
154
155 pc.hist(x_min=0, x_max=56, y_min=ymin-0.05, y_max=ymax+0.05, xlabel="Layer", ylabel="dE/dx mean", space=3, ax=ax[0, 1])
156 ax[0, 1].plot(df_layer['layer'], df_layer['mean'], '*', markersize=10, rasterized=True)
157 ax[0, 1].set_title('dE/dx Mean vs Layer', fontsize=14)
158
159 ymin, ymax = get_positive_minmax(df_layer['gmean'])
160 pc.hist(x_min=0, x_max=56, y_min=ymin-0.02, y_max=ymax+0.02, xlabel="Layer", ylabel="dE/dx mean", space=3, ax=ax[1, 1])
161 ax[1, 1].plot(df_layer['layer'], df_layer['gmean'], '*', markersize=10, rasterized=True)
162 ax[1, 1].set_title('dE/dx Mean vs Layer (good wires)', fontsize=14)
163
164 fig.suptitle(f"dE/dx vs #wire {suffix}", fontsize=20)
165 save_to_pdf(pdf, fig)
166
167
168def cosgain_validation(path, suffix):
169 val_path_el = os.path.join(path, "plots", "costh", f"dedx_vs_cos_electrons_{suffix}.txt")
170 val_path_po = os.path.join(path, "plots", "costh", f"dedx_vs_cos_positrons_{suffix}.txt")
171
172 df_el = read_txt(val_path_el, ["cos", "mean", "mean_err", "reso", "reso_err"])
173 df_po = read_txt(val_path_po, ["cos", "mean", "mean_err", "reso", "reso_err"])
174
175 if df_el is None or df_po is None:
176 return
177
178 # Ensure both dataframes are sorted by 'cos' so addition is element-wise correct
179 df_el = df_el.sort_values(by='cos').reset_index(drop=True)
180 df_po = df_po.sort_values(by='cos').reset_index(drop=True)
181
182 # New DataFrame with summed means
183 mean_avg = (df_el['mean'] + df_po['mean']) / 2
184 err_avg = 0.5 * np.sqrt(df_el['mean_err']**2 + df_po['mean_err']**2)
185 df_sum = pd.DataFrame({'cos': df_el['cos'], 'mean_sum': mean_avg, 'err_avg': err_avg})
186
187 pdf_path = make_pdf_path("dedx_vs_cosine", suffix)
188
189 with PdfPages(pdf_path) as pdf:
190 fig, ax = plt.subplots(1, 2, figsize=(20, 6)) # Two plots side-by-side
191 # mean
192 pc.hist(x_min=-1.0, x_max=1.0, y_min=0.96, y_max=1.03, xlabel=r"cos#theta", ylabel="dE/dx mean", space=0.1, ax=ax[0])
193 ax[0].errorbar(
194 df_el['cos'],
195 df_el['mean'],
196 yerr=df_el['mean_err'],
197 fmt='*',
198 markersize=10,
199 rasterized=True,
200 label='electron')
201 ax[0].errorbar(
202 df_po['cos'],
203 df_po['mean'],
204 yerr=df_po['mean_err'],
205 fmt='*',
206 markersize=10,
207 rasterized=True,
208 label='positrons')
209 ax[0].errorbar(df_sum['cos'], df_sum['mean_sum'], yerr=df_sum['err_avg'], fmt='*',
210 markersize=10, rasterized=True, label=r'average of e^{+} and e^{-}')
211 ax[0].legend(fontsize=17)
212 ax[0].set_title('dE/dx Mean vs cosine', fontsize=14)
213
214 # reso
215 pc.hist(x_min=-1.0, x_max=1.0, y_min=0.04, y_max=0.13, xlabel=r"cos#theta", ylabel="dE/dx reso", space=0.1, ax=ax[1])
216 ax[1].errorbar(
217 df_el['cos'],
218 df_el['reso'],
219 yerr=df_el['reso_err'],
220 fmt='*',
221 markersize=10,
222 rasterized=True,
223 label='electron')
224 ax[1].errorbar(
225 df_po['cos'],
226 df_po['reso'],
227 yerr=df_po['reso_err'],
228 fmt='*',
229 markersize=10,
230 rasterized=True,
231 label='positrons')
232 ax[1].legend(fontsize=17)
233 ax[1].set_title('dE/dx Resolution vs cosine', fontsize=14)
234
235 fig.suptitle(fr"dE/dx vs cos$\theta$ {suffix}", fontsize=20)
236 save_to_pdf(pdf, fig)
237
238
239def injection_validation(path, suffix):
240
241 cols = ["var", "bin", "mean", "mean_err", "reso", "reso_err"]
242 # corrected files
243 val_path_ler = os.path.join(path, "plots", "injection", f"dedx_vs_inj_ler_{suffix}.txt")
244 val_path_her = os.path.join(path, "plots", "injection", f"dedx_vs_inj_her_{suffix}.txt")
245
246 df_ler = read_txt(val_path_ler, cols)
247 df_her = read_txt(val_path_her, cols)
248
249 # no-correction files
250 val_path_ler_nocor = os.path.join(path, "plots", "injection", f"dedx_vs_inj_nocor_ler_{suffix}.txt")
251 val_path_her_nocor = os.path.join(path, "plots", "injection", f"dedx_vs_inj_nocor_her_{suffix}.txt")
252
253 df_ler_nocor = read_txt(val_path_ler_nocor, cols)
254 df_her_nocor = read_txt(val_path_her_nocor, cols)
255
256 if df_ler is None or df_her is None or df_ler_nocor is None or df_her_nocor is None:
257 return
258
259 for df in [df_ler, df_her, df_ler_nocor, df_her_nocor]:
260 df["bin"] = df["bin"].astype(str)
261
262 pdf_path = make_pdf_path("dedx_mean_inj", suffix)
263
264 with PdfPages(pdf_path) as pdf:
265
266 fig, ax = plt.subplots(2, 1, figsize=(18, 10), sharex=True)
267
268 ymin, ymax = get_positive_minmax(
269 pd.concat([df_ler["mean"], df_her["mean"]])
270 )
271
272 pc.hist(y_min=ymin - 0.01, y_max=ymax + 0.01,
273 xlabel="", ylabel="dE/dx mean",
274 space=3, ax=ax[0])
275
276 ax[0].errorbar(df_ler['bin'], df_ler['mean'], yerr=df_ler['mean_err'],
277 fmt='*', markersize=10, rasterized=True, label='LER')
278 ax[0].errorbar(df_her['bin'], df_her['mean'], yerr=df_her['mean_err'],
279 fmt='*', markersize=10, rasterized=True, label='HER')
280
281 ax[0].legend(fontsize=14)
282 ax[0].set_title("Corrected", fontsize=16)
283
284 all_means = pd.concat([
285 df_ler["mean"], df_her["mean"],
286 df_ler_nocor["mean"], df_her_nocor["mean"]
287 ])
288
289 positive_means = all_means[all_means > 0]
290 ymin2 = positive_means.min() if not positive_means.empty else all_means.min()
291 ymax2 = all_means.max()
292
293 pc.hist(y_min=ymin2 - 0.01, y_max=ymax2 + 0.01,
294 xlabel="injection time", ylabel="dE/dx mean",
295 space=3, ax=ax[1])
296
297 datasets = [
298 ("LER corr", df_ler, "o"),
299 ("HER corr", df_her, "s"),
300 ("LER no corr", df_ler_nocor, "^"),
301 ("HER no corr", df_her_nocor, "D"),
302 ]
303
304 for label, df, marker in datasets:
305 ax[1].errorbar(df['bin'], df['mean'], yerr=df['mean_err'],
306 fmt=marker, markersize=6, rasterized=True, label=label)
307
308 ax[1].legend(fontsize=12)
309 ax[1].set_title("Corrected vs No correction", fontsize=16)
310
311 fig.suptitle(f"dE/dx vs Injection time {suffix}", fontsize=20)
312
313 plt.tight_layout(rect=[0, 0, 1, 0.96]) # important to avoid overlap
314 save_to_pdf(pdf, fig)
315
316
317def mom_validation(path, suffix):
318
319 cos_labels = [
320 "acos",
321 "cos$\\theta > 0.0$",
322 "cos$\\theta < 0.0$",
323 "cos$\\theta \\leq -0.8$",
324 "cos$\\theta > -0.8$ and $\\cos\\theta \\leq -0.6$",
325 "cos$\\theta > -0.6$ and $\\cos\\theta \\leq -0.4$",
326 "cos$\\theta > -0.4$ and $\\cos\\theta \\leq -0.2$",
327 "cos$\\theta > -0.2$ and $\\cos\\theta \\leq 0$",
328 "cos$\\theta > 0$ and $\\cos\\theta \\leq 0.2$",
329 "cos$\\theta > 0.2$ and $\\cos\\theta \\leq 0.4$",
330 "cos$\\theta > 0.4$ and $\\cos\\theta \\leq 0.6$",
331 "cos$\\theta > 0.6$ and $\\cos\\theta \\leq 0.8$",
332 "cos$\\theta > 0.8$"
333 ]
334
335 # Define output PDFs
336 pdf_paths = {
337 "low": make_pdf_path("dedx_vs_mom", suffix),
338 "high": make_pdf_path("dedx_vs_mom", f"{suffix}_cosbins"),
339 }
340
341 with PdfPages(pdf_paths["low"]) as pdf_low, PdfPages(pdf_paths["high"]) as pdf_high:
342 for i in range(13):
343 cols = ["mom", "mean", "mean_err", "reso", "reso_err"]
344 val_path_el = os.path.join(path, "plots", "mom", f"dedx_vs_mom_{i}_elec_{suffix}.txt")
345 val_path_po = os.path.join(path, "plots", "mom", f"dedx_vs_mom_{i}_posi_{suffix}.txt")
346
347 df_el = read_txt(val_path_el, cols)
348 df_po = read_txt(val_path_po, cols)
349
350 if df_el is None or df_po is None:
351 continue
352
353 df_el['mom'] *= -1 # flip electron momentum
354
355 fig, ax = plt.subplots(2, 2, figsize=(20, 12))
356
357 ymin, ymax = get_positive_minmax(df_el['mean'])
358
359 panels = [
360 {"xlim": (-7, 7), "ylim": (ymin-0.01, ymax+0.01),
361 "ylabel": "dE/dx mean", "df_col": "mean", "err_col": "mean_err",
362 "title": "dE/dx Mean vs momentum"},
363 {"xlim": (-7, 7), "ylim": (0.04, 0.1),
364 "ylabel": "dE/dx reso", "df_col": "reso", "err_col": "reso_err",
365 "title": "dE/dx resolution vs momentum"},
366 {"xlim": (-3, 3), "ylim": (ymin-0.01, ymax+0.01),
367 "ylabel": "dE/dx mean", "df_col": "mean", "err_col": "mean_err",
368 "title": "dE/dx Mean vs momentum (zoomed)"},
369 {"xlim": (-3, 3), "ylim": (0.04, 0.1),
370 "ylabel": "dE/dx reso", "df_col": "reso", "err_col": "reso_err",
371 "title": "dE/dx resolution vs momentum (zoomed)"},
372 ]
373
374 for ax_i, panel in zip(ax.flat, panels):
375 pc.hist(x_min=panel["xlim"][0], x_max=panel["xlim"][1],
376 y_min=panel["ylim"][0], y_max=panel["ylim"][1],
377 xlabel="Momentum", ylabel=panel["ylabel"],
378 space=1, ax=ax_i)
379
380 ax_i.errorbar(df_el['mom'], df_el[panel["df_col"]],
381 yerr=df_el[panel["err_col"]],
382 fmt='*', markersize=10, rasterized=True, label='electron')
383 ax_i.errorbar(df_po['mom'], df_po[panel["df_col"]],
384 yerr=df_po[panel["err_col"]],
385 fmt='*', markersize=10, rasterized=True, label='positron')
386 ax_i.legend(fontsize=17)
387 ax_i.set_title(panel["title"], fontsize=14)
388 if i == 3 and panel["df_col"] == "reso":
389 ymin, ymax = ax_i.get_ylim()
390 ax_i.set_ylim(ymin, ymax * 1.5)
391
392 fig.suptitle(f"dE/dx vs Momentum ({cos_labels[i]}) {suffix}", fontsize=20)
393 plt.tight_layout()
394
395 # Save to correct PDF
396 if i <= 2:
397 save_to_pdf(pdf_low, fig)
398 else:
399 save_to_pdf(pdf_high, fig)
400
401
402def oneDcell_validation(path, suffix):
403
404 val_path_il = os.path.join(path, "plots", "oneD", f"dedx_vs_1D_IL_{suffix}.txt")
405 val_path_ol = os.path.join(path, "plots", "oneD", f"dedx_vs_1D_OL_{suffix}.txt")
406
407 df_il = read_txt(val_path_il, ["enta", "mean"])
408 df_ol = read_txt(val_path_ol, ["enta", "mean"])
409
410 if df_il is None or df_ol is None:
411 return
412
413 pdf_path = make_pdf_path("dedx_vs_enta", suffix)
414
415 with PdfPages(pdf_path) as pdf:
416 fig, ax = plt.subplots(2, 2, figsize=(20, 12))
417
418 pc.hist(x_min=-1.5, x_max=1.5, y_min=0.9, y_max=1.07, xlabel=r"entaRS", ylabel="dE/dx mean", space=0.3, ax=ax[0, 0])
419 ax[0, 0].plot(df_il['enta'], df_il['mean'], '-', markersize=10, rasterized=True, label='IL')
420 ax[0, 0].legend(fontsize=17)
421 ax[0, 0].set_title('dE/dx Mean vs entaRS (IL)', fontsize=14)
422
423 pc.hist(x_min=-1.5, x_max=1.5, y_min=0.9, y_max=1.05, xlabel=r"entaRS", ylabel="dE/dx mean", space=0.3, ax=ax[0, 1])
424 ax[0, 1].plot(df_ol['enta'], df_ol['mean'], '-', markersize=10, rasterized=True, label='OL')
425 ax[0, 1].legend(fontsize=17)
426 ax[0, 1].set_title('dE/dx Mean vs entaRS (OL)', fontsize=14)
427
428 pc.hist(x_min=-0.2, x_max=0.2, y_min=0.9, y_max=1.07, xlabel=r"entaRS", ylabel="dE/dx mean", space=0.02, ax=ax[1, 0])
429 ax[1, 0].plot(df_il['enta'], df_il['mean'], '-', markersize=10, rasterized=True, label='IL')
430 ax[1, 0].legend(fontsize=17)
431 ax[1, 0].set_title('dE/dx Mean vs entaRS (IL) zoom', fontsize=14)
432
433 pc.hist(x_min=-0.2, x_max=0.2, y_min=0.9, y_max=1.05, xlabel=r"entaRS", ylabel="dE/dx mean", space=0.02, ax=ax[1, 1])
434 ax[1, 1].plot(df_ol['enta'], df_ol['mean'], '-', markersize=10, rasterized=True, label='OL')
435 ax[1, 1].legend(fontsize=17)
436 ax[1, 1].set_title('dE/dx Mean vs entaRS (OL) zoom', fontsize=14)
437
438 fig.suptitle(f"dE/dx vs entaRS {suffix}", fontsize=20)
439 save_to_pdf(pdf, fig)
440
441
442def run_validation(job_path, input_data_path, requested_iov, expert_config, **kwargs):
443 '''
444 Makes validation plots
445 :job_path: path to cdcdedx calibration output
446 :input_data_path: path to the input files
447 :requested_iov: required argument but not used
448 :expert_config: required argument
449 '''
450 os.makedirs('plots/validation', exist_ok=True)
451 os.makedirs('plots/constant', exist_ok=True)
452
453 expert_config = json.loads(expert_config)
454 GT = expert_config["GT"]
455
456 basf2.B2INFO("Starting validation...")
457
458 payloads = [
459 ("rungain", "run gain", rg.getRunGain),
460 ("coscorr", "coscorr", pc.process_cosgain),
461 ("wiregain", "wire gain", pw.process_wiregain),
462 ("onedcell", "1D gain", oned.process_onedgain),
463 ]
464
465 exp_run_dict = None
466
467 for dirname, label, function in payloads:
468
469 basf2.B2INFO(f"Processing {label} payloads...")
470
471 caldir = get_latest_calibration_dir(job_path, dirname)
472
473 dbpath = os.path.join(caldir, "outputdb")
474
475 result = function(dbpath, GT)
476
477 if dirname == "wiregain":
478 exp_run_dict = result
479
480 basf2.B2INFO("Generating validation plots...")
481 val_path = os.path.join(job_path, 'validation0', '0', 'algorithm_output')
482
483 validators = [
484 ("rungain validation plots", rungain_validation),
485 ("wire gain validation plots", wiregain_validation),
486 ("cosine correction validation plots", cosgain_validation),
487 ("injection time validation plots", injection_validation),
488 ("momentum validation plots", mom_validation),
489 ("1D validation plots", oneDcell_validation),
490 ]
491
492 for exp, run_list in exp_run_dict.items():
493 for run in run_list:
494 suffix = f"e{exp}_r{run}"
495 for msg, func in validators:
496 basf2.B2INFO(f"Processing {msg} for {suffix}...")
497 func(val_path, suffix)
498
499 source_path = os.path.join(job_path, 'validation0', '0', 'algorithm_output', 'plots')
500 shutil.copy(source_path+f"/costh/dedxpeaks_vs_cos_{suffix}.pdf", 'plots/validation/')
501
502 shutil.copy(source_path+f"/mom/dedxpeaks_vs_mom_{suffix}.pdf", 'plots/validation/')
503
504
505if __name__ == "__main__":
506 run_validation(*sys.argv[1:])
Definition plot.py:1