Belle II Software development
dimuon.py
1
8"""
9Dimuon alignment validation: variable definitions, data loading, and validation run.
10"""
11
12from pathlib import Path
13
14
15import ROOT
16import numpy as np
17
18import alignment_validation.plotting as plotting
19from alignment_validation.variables import GlobalVariable, TrackVariable, s, cm, rad, unit, inverse_cm, gev
20from alignment_validation.utils import get_variable_names, pseudomomentum, get_filter
22 plot_histogram, plot_correlations, plot_2D_histogram, draw_map,
23 plot_resolutions_hist, plot_resolution_comparison, plot_resolution,
24)
25
26# ---------------------------------------------------------------------------
27# Variable definitions (CMS frame)
28# ---------------------------------------------------------------------------
29
30
31run = GlobalVariable("__run__", "run", unit, "run")
32
33time = GlobalVariable("eventTimeSeconds", r"t$_{0}$", s, "time")
34
35InvM = GlobalVariable("InvM", r"$M_{inv}$", gev, "InvM")
36
37
38d = TrackVariable(
39 "useCMSFrame__bodaughter__bo0__cm__spd0__bc__bc",
40 "useCMSFrame__bodaughter__bo1__cm__spd0__bc__bc",
41 r"d$_{0}$", cm, "d",
42)
43
44z = TrackVariable(
45 "useCMSFrame__bodaughter__bo0__cm__spz0__bc__bc",
46 "useCMSFrame__bodaughter__bo1__cm__spz0__bc__bc",
47 r"z$_{0}$", cm, "z",
48)
49
50phi = TrackVariable(
51 "useCMSFrame__bodaughter__bo0__cm__spphi0__bc__bc",
52 "useCMSFrame__bodaughter__bo1__cm__spphi0__bc__bc",
53 r"$\Phi_{0}$", rad, "phi",
54)
55
56tanLambda = TrackVariable(
57 "useCMSFrame__bodaughter__bo0__cm__sptanLambda__bc__bc",
58 "useCMSFrame__bodaughter__bo1__cm__sptanLambda__bc__bc",
59 r"tan($\lambda$)", unit, "tanLambda",
60)
61
62omega = TrackVariable(
63 "useCMSFrame__bodaughter__bo0__cm__spomega__bc__bc",
64 "useCMSFrame__bodaughter__bo1__cm__spomega__bc__bc",
65 r"$\omega$", inverse_cm, "omega",
66)
67
68pt = TrackVariable(
69 "useCMSFrame__bodaughter__bo0__cm__sppt__bc__bc",
70 "useCMSFrame__bodaughter__bo1__cm__sppt__bc__bc",
71 r"$P_{t}$", gev, "pt",
72)
73
74
75SELECTION = (
76 "__run__>=0"
77 " && daughter__bo0__cm__spmuonID__bc>=0.8"
78 " && daughter__bo1__cm__spmuonID__bc>=0.8"
79)
80
81# ---------------------------------------------------------------------------
82# IP correction
83# ---------------------------------------------------------------------------
84
85
86def fix_ip_location(data: dict):
87 """Refit helix parameters relative to the per-event IP position for all dimuon events.
88
89 Modifies ``data`` in-place. For each event the helix of both tracks is
90 passively moved to the per-event IP (stored in branches ``IPX``, ``IPY``,
91 ``IPZ``), and the five helix parameters (d0, phi0, omega, z0, tanLambda)
92 are updated accordingly.
93
94 Parameters
95 ----------
96 data : dict
97 ``{filename: {branch_name: numpy_array}}`` as returned by
98 :func:`load_data`. Must contain ``IPX``, ``IPY``, ``IPZ`` arrays.
99 """
100 print("Recalculating IP location.")
101 for file in data:
102 for i, _ in enumerate(data[file][time.name]):
103 helix1 = ROOT.Belle2.Helix(
104 data[file][d.name1][i], data[file][phi.name1][i],
105 data[file][omega.name1][i], data[file][z.name1][i],
106 data[file][tanLambda.name1][i],
107 )
108 helix2 = ROOT.Belle2.Helix(
109 data[file][d.name2][i], data[file][phi.name2][i],
110 data[file][omega.name2][i], data[file][z.name2][i],
111 data[file][tanLambda.name2][i],
112 )
113 helix1.passiveMoveBy(data[file]["IPX"][i], data[file]["IPY"][i], data[file]["IPZ"][i])
114 helix2.passiveMoveBy(data[file]["IPX"][i], data[file]["IPY"][i], data[file]["IPZ"][i])
115
116 data[file][d.name1][i] = helix1.getD0()
117 data[file][d.name2][i] = helix2.getD0()
118 data[file][z.name1][i] = helix1.getZ0()
119 data[file][z.name2][i] = helix2.getZ0()
120 data[file][tanLambda.name1][i] = helix1.getTanLambda()
121 data[file][tanLambda.name2][i] = helix2.getTanLambda()
122 data[file][phi.name1][i] = helix1.getPhi0()
123 data[file][phi.name2][i] = helix2.getPhi0()
124 data[file][omega.name1][i] = helix1.getOmega()
125 data[file][omega.name2][i] = helix2.getOmega()
126
127 if i % 10000 == 0:
128 print(f"Processing event: {i} of {len(data[file][d.name1])}.", end="\r", flush=True)
129 if len(data[file][d.name1]) > 0:
130 print(f"Processing event: {i+1} of {len(data[file][d.name1])}.", flush=True)
131
132
133def fix_ip_location_run_by_run(data: dict):
134 """Refit helix parameters using the per-run mean IP position.
135
136 Modifies ``data`` in-place. For each unique (experiment, run) pair the
137 mean IP position is computed from the per-event ``IPX``, ``IPY``, ``IPZ``
138 values. All events in that run are then refitted using this mean IP,
139 rather than the individual per-event IP. This is more robust when the
140 per-event IP has large statistical fluctuations.
141
142 Parameters
143 ----------
144 data : dict
145 ``{filename: {branch_name: numpy_array}}`` as returned by
146 :func:`load_data`. Must contain ``IPX``, ``IPY``, ``IPZ``, and
147 ``__experiment__`` arrays.
148 """
149 print("Recalculating IP location (run-by-run).")
150 for file in data:
151 experiments = data[file]["__experiment__"]
152 runs = data[file][run.name]
153 unique_pairs = np.unique(np.stack([experiments, runs], axis=1), axis=0)
154 for pair in unique_pairs:
155 exp, run_num = pair[0], pair[1]
156 mask = (experiments == exp) & (runs == run_num)
157 mean_ipx = float(np.mean(data[file]["IPX"][mask]))
158 mean_ipy = float(np.mean(data[file]["IPY"][mask]))
159 mean_ipz = float(np.mean(data[file]["IPZ"][mask]))
160 n_events = int(np.sum(mask))
161 print(
162 f"Experiment {exp}, run {run_num}: {n_events} events, "
163 f"mean IP = ({mean_ipx:.4f}, {mean_ipy:.4f}, {mean_ipz:.4f})"
164 )
165 indices = np.where(mask)[0]
166 for i in indices:
167 helix1 = ROOT.Belle2.Helix(
168 data[file][d.name1][i], data[file][phi.name1][i],
169 data[file][omega.name1][i], data[file][z.name1][i],
170 data[file][tanLambda.name1][i],
171 )
172 helix2 = ROOT.Belle2.Helix(
173 data[file][d.name2][i], data[file][phi.name2][i],
174 data[file][omega.name2][i], data[file][z.name2][i],
175 data[file][tanLambda.name2][i],
176 )
177 helix1.passiveMoveBy(mean_ipx, mean_ipy, mean_ipz)
178 helix2.passiveMoveBy(mean_ipx, mean_ipy, mean_ipz)
179
180 data[file][d.name1][i] = helix1.getD0()
181 data[file][d.name2][i] = helix2.getD0()
182 data[file][z.name1][i] = helix1.getZ0()
183 data[file][z.name2][i] = helix2.getZ0()
184 data[file][tanLambda.name1][i] = helix1.getTanLambda()
185 data[file][tanLambda.name2][i] = helix2.getTanLambda()
186 data[file][phi.name1][i] = helix1.getPhi0()
187 data[file][phi.name2][i] = helix2.getPhi0()
188 data[file][omega.name1][i] = helix1.getOmega()
189 data[file][omega.name2][i] = helix2.getOmega()
190
191# ---------------------------------------------------------------------------
192# Data loading
193# ---------------------------------------------------------------------------
194
195
196def load_data(filenames: list, selection: str = SELECTION) -> dict:
197 """Read dimuon ROOT files and return filtered data arrays keyed by filename.
198
199 No IP correction is applied here; call :func:`fix_ip_location` or
200 :func:`fix_ip_location_run_by_run` afterwards if needed.
201
202 Parameters
203 ----------
204 filenames : list of str
205 Paths to the input ROOT files. Each file must contain a TTree named
206 ``"variables"`` with the branches defined by the module-level variable
207 list, plus ``IPX``, ``IPY``, ``IPZ``, and ``__experiment__``.
208 selection : str, optional
209 ROOT/RDataFrame filter string applied before reading the data.
210 Defaults to :data:`SELECTION`.
211
212 Returns
213 -------
214 dict
215 ``{filename: {branch_name: numpy_array}}`` with one entry per file.
216 Only events passing ``selection`` are included.
217 """
218 print("Loading dimuon data.")
219 all_vars = [run, time, d, z, phi, tanLambda, omega, pt, InvM]
220 branch_names = get_variable_names(all_vars) + ["IPX", "IPY", "IPZ", "__experiment__"]
221 data = {}
222 for file in filenames:
223 print(f"Reading {file}")
224 tfile = ROOT.TFile(file, "OPEN")
225 df = ROOT.RDataFrame("variables", tfile).Filter(selection)
226 data[file] = df.AsNumpy(columns=branch_names)
227 print(
228 f"Number of events after applying selection in {file}"
229 f" is: {len(data[file][branch_names[0]])}"
230 )
231 return data
232
233# ---------------------------------------------------------------------------
234# Validation run
235# ---------------------------------------------------------------------------
236
237
238def run_validation(filenames: list, output_dir: str, file_format: str = "pdf",
239 ip_correction: str = "run_by_run"):
240 """Load dimuon data and produce all validation plots.
241
242 Produces the following sets of plots in ``output_dir``:
243
244 - Per-variable histograms, track1 − track2 difference and track1 + track2
245 sum histograms.
246 - Di-muon invariant mass fit (Crystal Ball + exponential background).
247 - Correlation profiles (median and sigma68 of d0 sum / z0 difference vs
248 phi and tan(lambda)).
249 - 2D detector maps (phi vs tan(lambda)) of median and resolution for d0
250 and z0, using Sigma mode (track1 + track2) for d0 and Delta mode
251 (track1 - track2) for z0.
252 - Resolution histograms per dataset and a multi-dataset comparison.
253 - Resolution vs pseudomomentum for d0 and z0.
254 - Pt resolution vs Pt.
255
256 Parameters
257 ----------
258 filenames : list of str
259 Paths to the input ROOT files.
260 output_dir : str
261 Directory where all plots are saved. Created if it does not exist.
262 file_format : str, optional
263 Image format passed to matplotlib (e.g. ``"pdf"``, ``"png"``).
264 Default is ``"pdf"``.
265 ip_correction : str, optional
266 IP correction strategy to apply after loading the data.
267 ``"run_by_run"`` (default) uses the mean IP per (experiment, run) pair
268 via :func:`fix_ip_location_run_by_run`; ``"per_event"`` uses the
269 individual per-event IP via :func:`fix_ip_location`.
270 """
271 import alignment_validation.fit_dimuon_mass as fit_dimuon_mass # requires Belle II environment
272
273 Path(output_dir).mkdir(parents=True, exist_ok=True)
274 plotting.output_dir = output_dir
275 plotting.file_format = file_format
276
277 labels = [Path(f).stem for f in filenames]
278 data = load_data(filenames)
279
280 if ip_correction == "run_by_run":
281 fix_ip_location_run_by_run(data)
282 elif ip_correction == "per_event":
283 fix_ip_location(data)
284 else:
285 raise ValueError(
286 f"Unknown ip_correction: {ip_correction!r}. Use 'run_by_run' or 'per_event'."
287 )
288
289 # -----------------------------------------------------------------------
290 # Histograms
291 # -----------------------------------------------------------------------
292 print("Making histograms")
293
294 for var in [d, z, omega, pt]:
295 plot_histogram(
296 [[*data[f][var.name1], *data[f][var.name2]] for f in filenames],
297 labels, var.plaintext, f"{var.latex} {var.unit.name}",
298 )
299
300 for var in [phi, tanLambda]:
301 plot_histogram(
302 [[*data[f][var.name1], *data[f][var.name2]] for f in filenames],
303 labels, var.plaintext, f"{var.latex} {var.unit.name}", range=100,
304 )
305
306 for var in [run, time]:
307 plot_histogram(
308 [data[f][var.name] for f in filenames],
309 labels, var.plaintext, f"{var.latex} {var.unit.name}",
310 )
311
312 plot_histogram(
313 [data[f][InvM.name] for f in filenames],
314 labels, InvM.plaintext, f"{InvM.latex} {InvM.unit.name}",
315 )
316
317 print("Fitting dimuon mass")
318 for f in filenames:
319 result = fit_dimuon_mass.fit(
320 data[f][InvM.name],
321 savefig=f"{output_dir}/dimuon_fit_{Path(f).stem}.{file_format}",
322 )
323 print(result)
324
325 print("Making track1 - track2 histograms")
326 for var in [z]:
327 plot_histogram(
328 [(data[f][var.name1] - data[f][var.name2]) / 2**0.5 for f in filenames],
329 labels, "delta-" + var.plaintext, f"$\\Delta${var.latex} {var.unit.name}",
330 )
331
332 print("Making track1 + track2 histograms")
333 for var in [d]:
334 plot_histogram(
335 [(data[f][var.name1] + data[f][var.name2]) / 2**0.5 for f in filenames],
336 labels, "sigma-" + var.plaintext, f"$\\Sigma${var.latex} {var.unit.name}",
337 )
338
339 # -----------------------------------------------------------------------
340 # Correlations
341 # -----------------------------------------------------------------------
342 print("Making correlations")
343
344 xdata = {f: {
345 phi: np.array([*data[f][phi.name1], *data[f][phi.name2]]),
346 tanLambda: np.array([*data[f][tanLambda.name1], *data[f][tanLambda.name2]]),
347 } for f in filenames}
348
349 ydata = {f: {
350 d: np.array([*(data[f][d.name1] + data[f][d.name2]) / 2**0.5 * d.unit.convert,
351 *(data[f][d.name1] + data[f][d.name2]) / 2**0.5 * d.unit.convert]),
352 z: np.array([*(data[f][z.name1] - data[f][z.name2]) / 2**0.5 * z.unit.convert,
353 *(data[f][z.name1] - data[f][z.name2]) / 2**0.5 * z.unit.convert]),
354 } for f in filenames}
355
356 xlabels = {phi: phi.latex + phi.unit.name, tanLambda: tanLambda.latex + tanLambda.unit.name}
357 ylabels = {d: r"$\Sigma$" + d.latex + d.unit.dname, z: r"$\Delta$" + z.latex + z.unit.dname}
358
359 plot_correlations(
360 'median',
361 [xdata[f] for f in filenames],
362 [ydata[f] for f in filenames],
363 [xlabels[var] for var in xlabels],
364 [ylabels[var] for var in ylabels],
365 labels, nbins=15, figsize=(6, 5),
366 )
367 plot_correlations(
368 'resolution',
369 [xdata[f] for f in filenames],
370 [ydata[f] for f in filenames],
371 [xlabels[var] for var in xlabels],
372 [r"$\sigma_{{68}} ({})$".format(ylabels[var].replace(var.unit.dname, "").replace("$", " "))
373 + "\n" + var.unit.dname for var in ylabels],
374 labels, nbins=15, figsize=(6, 5), make_2D_hist=False,
375 )
376
377 # -----------------------------------------------------------------------
378 # Detector maps
379 # -----------------------------------------------------------------------
380 print("Making maps")
381 map_bins = (80, 80)
382
383 for f in filenames:
384 label = Path(f).stem
385 plot_2D_histogram(data[f], label, map_bins, phi, tanLambda)
386 for var in [d, z]:
387 mode = 'sigma' if var is d else 'delta'
388 draw_map('median', data[f], label, var, mode, map_bins, phi, tanLambda)
389 draw_map('resolution', data[f], label, var, mode, map_bins, phi, tanLambda)
390
391 # -----------------------------------------------------------------------
392 # Resolutions
393 # -----------------------------------------------------------------------
394 print("Making resolutions")
395 resolutions_cut = {}
396
397 resolutions_data = {}
398 resolutions_labels = {}
399 for f in filenames:
400 resolutions_data[f] = {}
401 mask = get_filter(data[f], resolutions_cut)
402 resolutions_data[f][d] = (data[f][d.name1][mask] + data[f][d.name2][mask]) / 2**0.5 * d.unit.convert
403 resolutions_labels[d] = r"$\Sigma$" + d.latex + d.unit.dname
404 resolutions_data[f][z] = (data[f][z.name1][mask] - data[f][z.name2][mask]) / 2**0.5 * z.unit.convert
405 resolutions_labels[z] = r"$\Delta$" + z.latex + z.unit.dname
406
407 plot_resolutions_hist(
408 f"Resolutions {Path(f).stem}",
409 resolutions_data[f], resolutions_labels,
410 nbins=40, vars_to_fit=resolutions_data[f].keys(),
411 shape=(1, 2), figsize=(11.0, 5.0),
412 )
413
414 plot_resolution_comparison(
415 "Resolutions",
416 [resolutions_data[f] for f in filenames],
417 labels, resolutions_labels,
418 nbins=40, shape=(1, 2), figsize=(11.0, 5.0),
419 )
420
421 # -----------------------------------------------------------------------
422 # Resolution vs pseudomomentum
423 # -----------------------------------------------------------------------
424 print("Making resolution vs pseudomomentum")
425
426 pseudomom3 = [np.array([
427 *pseudomomentum(data[f][pt.name1], data[f][tanLambda.name1], 3),
428 *pseudomomentum(data[f][pt.name2], data[f][tanLambda.name2], 3),
429 ]) for f in filenames]
430 pseudomom5 = [np.array([
431 *pseudomomentum(data[f][pt.name1], data[f][tanLambda.name1], 5),
432 *pseudomomentum(data[f][pt.name2], data[f][tanLambda.name2], 5),
433 ]) for f in filenames]
434
435 pm_xlimit = [0, 5]
436 pm_ylimits = {d: [0, 50], z: [0, 100]}
437 pm_bins = [2, 2.5, 3, 3.5, 4, 4.2, 4.4, 4.6, 4.8, 5, 6]
438
439 pm_res_data = {}
440 pm_res_labels = {}
441
442 # d uses SUM for dimuon
443 ydata_d = [
444 np.ndarray.flatten(np.array([
445 *(data[f][d.name1] + data[f][d.name2]) / 2**0.5 * d.unit.convert,
446 *(data[f][d.name1] + data[f][d.name2]) / 2**0.5 * d.unit.convert,
447 ]))
448 for f in filenames
449 ]
450 pm_res_data[d] = [[pseudomom5[i], ydata_d[i]] for i in range(len(filenames))]
451 pm_res_labels[d] = [
452 r"p$\beta$" + r"(sin$\theta$)$^{5/2}$ [GeV/c]",
453 r"$\sigma_{{68}}(\frac{{\Sigma {}}}{{\sqrt{{2}}}})$".format(d.latex.replace("$", "")) + f"{d.unit.dname}",
454 ]
455
456 ydata_z = [
457 np.ndarray.flatten(np.array([
458 *(data[f][z.name1] - data[f][z.name2]) / 2**0.5 * z.unit.convert,
459 *(data[f][z.name1] - data[f][z.name2]) / 2**0.5 * z.unit.convert,
460 ]))
461 for f in filenames
462 ]
463 pm_res_data[z] = [[pseudomom3[i], ydata_z[i]] for i in range(len(filenames))]
464 pm_res_labels[z] = [
465 r"p$\beta$" + r"(sin$\theta$)$^{3/2}$ [GeV/c]",
466 r"$\sigma_{{68}}(\frac{{\Delta {}}}{{\sqrt{{2}}}})$".format(z.latex.replace("$", "")) + f"{z.unit.dname}",
467 ]
468
469 def resolutionfit(x, a, b):
470 return (a**2 + (b / x)**2)**0.5
471
472 def resolutionfitlabel(fit, err):
473 return (r"y=$\sqrt{a^{2} +b^{2}/x^{2}} $" + "\n"
474 + f"a = ({fit[0]:.3f}" + r" $\pm$ " + f"{err[0]:.3f})" + r"[$\mu$m]" + "\n"
475 + f"b = ({fit[1]:.3f}" + r" $\pm$ " + f"{err[1]:.3f})" + r"[$\mu$m GeV/c]")
476
477 plot_resolution(
478 "Resolution vs Pseudomomentum",
479 pm_res_data, labels, pm_res_labels,
480 pm_xlimit, pm_ylimits, pm_bins,
481 resolutionfit, resolutionfitlabel, fitrange=[0.01, pm_xlimit[1]],
482 )
483
484 # -----------------------------------------------------------------------
485 # Pt resolution vs pt
486 # -----------------------------------------------------------------------
487 print("Making pt resolution vs pt")
488
489 xdata_pt = [np.array([*data[f][pt.name1], *data[f][pt.name2]]) for f in filenames]
490 ydata_pt = [
491 np.ndarray.flatten(np.array([
492 *(data[f][pt.name1] - data[f][pt.name2]) / data[f][pt.name1] * 100,
493 *(data[f][pt.name1] - data[f][pt.name2]) / data[f][pt.name2] * 100,
494 ]))
495 for f in filenames
496 ]
497 pt_res_data = {pt: [[xdata_pt[i], ydata_pt[i]] for i in range(len(filenames))]}
498 pt_res_labels = {pt: [
499 f"{pt.latex}{pt.unit.name}",
500 r"$\sigma_{{68}}(\frac{{\Delta {}}}{{{}}})$".format(
501 pt.latex.replace("$", ""), pt.latex.replace("$", "")) + "[%]",
502 ]}
503
504 def ptfit(x, a, b):
505 return ((x * a)**2 + b**2)**0.5
506
507 def ptfitlabel(fit, err):
508 return (r"y=$\sqrt{A^{2} x^{2}+B^{2}} $" + "\n"
509 + f"A = ({fit[0]:.3f}" + r" $\pm$ " + f"{err[0]:.3f})" + "[% c/GeV]" + "\n"
510 + f"B = ({fit[1]:.3f}" + r" $\pm$ " + f"{err[1]:.3f})" + "[%]")
511
512 plot_resolution(
513 "Pt resolution vs Pt",
514 pt_res_data, labels, pt_res_labels,
515 xlimit=[2, 5.5], ylimits={pt: [0, 9]},
516 bins=[2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5],
517 fitfunction=ptfit, fitlabel=ptfitlabel,
518 )