Belle II Software development
cosmics.py
1
8"""
9Cosmics alignment validation: variable definitions, data loading, and validation run.
10"""
11
12import pathlib
13from pathlib import Path
14
15import ROOT
16import numpy as np
17
18import alignment_validation.plotting as plotting
19from alignment_validation.variables import GlobalVariable, TrackVariable, s, cm, rad, unit, inverse_cm, gev
20from alignment_validation.utils import get_variable_names, pseudomomentum, get_filter
22 plot_histogram, plot_correlations, plot_2D_histogram, draw_map,
23 plot_resolutions_hist, plot_resolution_comparison, plot_resolution,
24)
25
26# ---------------------------------------------------------------------------
27# Variable definitions
28# ---------------------------------------------------------------------------
29
30
31run = GlobalVariable("run", "run", unit, "run")
32
33time = GlobalVariable("evtT0", r"t$_{0}$", s, "time")
34
35d = TrackVariable("D01", "D02", r"d$_{0}$", cm, "d")
36
37z = TrackVariable("Z01", "Z02", r"z$_{0}$", cm, "z")
38
39phi = TrackVariable("Phi01", "Phi02", r"$\Phi_{0}$", rad, "phi")
40
41tanLambda = TrackVariable("tanLambda1", "tanLambda2", r"$\tan(\lambda$)", unit, "tanLambda")
42
43omega = TrackVariable("Omega1", "Omega2", r"$\omega$", inverse_cm, "omega")
44
45pt = TrackVariable("Pt1", "Pt2", r"$P_{t}$", gev, "pt")
46
47
48SELECTION = (
49 "run>=0"
50 " && abs(D01)<1 && abs(D02)<1"
51 " && Z01>-2 && Z02>-2 && Z01<4 && Z02<4"
52 " && abs(D01-D02)<0.2 && abs(Z01-Z02)<0.2"
53)
54
55# ---------------------------------------------------------------------------
56# Data loading
57# ---------------------------------------------------------------------------
58
59
60def load_data(filenames: list, selection: str = SELECTION) -> dict:
61 """Read cosmic-track ROOT files and return filtered data arrays keyed by filename.
62
63 Parameters
64 ----------
65 filenames : list of str
66 Paths to the input ROOT files. Each file must contain a TTree named
67 ``"tree"`` with the branches defined by the module-level variable list.
68 selection : str, optional
69 ROOT/RDataFrame filter string applied before reading the data.
70 Defaults to :data:`SELECTION`.
71
72 Returns
73 -------
74 dict
75 ``{filename: {branch_name: numpy_array}}`` with one entry per file.
76 Only events passing ``selection`` are included.
77 """
78 print("Loading cosmic data.")
79 all_vars = [run, time, d, z, phi, tanLambda, omega, pt]
80 branch_names = get_variable_names(all_vars)
81 data = {}
82 for file in filenames:
83 print(f"Reading {file}")
84 tfile = ROOT.TFile(file, "OPEN")
85 df = ROOT.RDataFrame("tree", tfile).Filter(selection)
86 data[file] = df.AsNumpy(columns=branch_names)
87 print(
88 f"Number of events after applying selection in {file}"
89 f" is: {len(data[file][branch_names[0]])}"
90 )
91 return data
92
93# ---------------------------------------------------------------------------
94# Validation run
95# ---------------------------------------------------------------------------
96
97
98def run_validation(filenames: list, output_dir: str, file_format: str = "pdf"):
99 """Load cosmics data and produce all validation plots.
100
101 Produces the following sets of plots in ``output_dir``:
102
103 - Per-variable histograms and track1 − track2 difference histograms.
104 - Correlation profiles (median and sigma68 vs each track parameter).
105 - 2D detector maps (phi vs tan(lambda)) of median and resolution for d0 and z0,
106 using Delta mode (track1 - track2) for both variables.
107 - Resolution histograms per dataset and a multi-dataset comparison.
108 - Resolution vs pseudomomentum for d0, z0, phi0, tan(lambda).
109 - Pt resolution vs Pt.
110
111 Parameters
112 ----------
113 filenames : list of str
114 Paths to the input ROOT files.
115 output_dir : str
116 Directory where all plots are saved. Created if it does not exist.
117 file_format : str, optional
118 Image format passed to matplotlib (e.g. ``"pdf"``, ``"png"``).
119 Default is ``"pdf"``.
120 """
121 pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
122 plotting.output_dir = output_dir
123 plotting.file_format = file_format
124
125 labels = [Path(f).stem for f in filenames]
126 data = load_data(filenames)
127
128 # -----------------------------------------------------------------------
129 # Histograms
130 # -----------------------------------------------------------------------
131 print("Making histograms")
132
133 for var in [d, z, omega, pt]:
134 plot_histogram(
135 [[*data[f][var.name1], *data[f][var.name2]] for f in filenames],
136 labels, var.plaintext, f"{var.latex} {var.unit.name}",
137 )
138
139 for var in [phi, tanLambda]:
140 plot_histogram(
141 [[*data[f][var.name1], *data[f][var.name2]] for f in filenames],
142 labels, var.plaintext, f"{var.latex} {var.unit.name}", range=100,
143 )
144
145 for var in [run, time]:
146 plot_histogram(
147 [data[f][var.name] for f in filenames],
148 labels, var.plaintext, f"{var.latex} {var.unit.name}",
149 )
150
151 print("Making track1 - track2 histograms")
152 for var in [d, z, phi, tanLambda, omega, pt]:
153 plot_histogram(
154 [(data[f][var.name1] - data[f][var.name2]) / 2**0.5 for f in filenames],
155 labels, "delta-" + var.plaintext, f"$\\Delta${var.latex} {var.unit.name}",
156 )
157
158 # -----------------------------------------------------------------------
159 # Correlations
160 # -----------------------------------------------------------------------
161 print("Making correlations")
162
163 xdata = {f: {var: np.array([*data[f][var.name1], *data[f][var.name2]])
164 for var in [d, z, phi, tanLambda, omega]}
165 for f in filenames}
166 ydata = {f: {var: np.array([*(data[f][var.name1] - data[f][var.name2]) / 2**0.5 * var.unit.convert,
167 *(data[f][var.name1] - data[f][var.name2]) / 2**0.5 * var.unit.convert])
168 for var in [d, z, phi, tanLambda, omega]}
169 for f in filenames}
170 xlabels = {var: var.latex + var.unit.name for var in [d, z, phi, tanLambda, omega]}
171 ylabels = {var: r"$\Delta$" + var.latex + var.unit.dname for var in [d, z, phi, tanLambda, omega]}
172
173 plot_correlations(
174 'median',
175 [xdata[f] for f in filenames],
176 [ydata[f] for f in filenames],
177 [xlabels[var] for var in xlabels],
178 [ylabels[var] for var in ylabels],
179 labels, nbins=15, figsize=(10.0, 7.5),
180 )
181 plot_correlations(
182 'resolution',
183 [xdata[f] for f in filenames],
184 [ydata[f] for f in filenames],
185 [xlabels[var] for var in xlabels],
186 [r"$\sigma_{{68}} ({})$".format(ylabels[var].replace(var.unit.dname, "").replace("$", " "))
187 + "\n" + var.unit.dname for var in ylabels],
188 labels, nbins=15, figsize=(10.0, 7.5), make_2D_hist=False,
189 )
190
191 # -----------------------------------------------------------------------
192 # Detector maps
193 # -----------------------------------------------------------------------
194 print("Making maps")
195 map_bins = (80, 80)
196
197 for f in filenames:
198 label = Path(f).stem
199 plot_2D_histogram(data[f], label, map_bins, phi, tanLambda)
200 for var in [d, z]:
201 draw_map('median', data[f], label, var, 'delta', map_bins, phi, tanLambda)
202 draw_map('resolution', data[f], label, var, 'delta', map_bins, phi, tanLambda)
203
204 # -----------------------------------------------------------------------
205 # Resolutions
206 # -----------------------------------------------------------------------
207 print("Making resolutions")
208 resolutions_cut = {}
209
210 resolutions_data = {}
211 resolutions_labels = {}
212 for f in filenames:
213 resolutions_data[f] = {}
214 mask = get_filter(data[f], resolutions_cut)
215 for var in [d, z, phi, tanLambda, omega]:
216 resolutions_data[f][var] = (
217 (data[f][var.name1][mask] - data[f][var.name2][mask]) / 2**0.5 * var.unit.convert
218 )
219 resolutions_labels[var] = r"$\Delta$" + var.latex + var.unit.dname
220 resolutions_data[f][pt] = (
221 (data[f][pt.name1][mask] - data[f][pt.name2][mask]) /
222 (data[f][pt.name1][mask] + data[f][pt.name2][mask]) / 2**0.5
223 )
224 resolutions_labels[pt] = r"$\frac{\Delta P_{t}}{\overline{P_{t}}}$ [1]"
225
226 plot_resolutions_hist(
227 f"Resolutions {Path(f).stem}",
228 resolutions_data[f], resolutions_labels,
229 nbins=40, vars_to_fit=resolutions_data[f].keys(),
230 )
231
232 plot_resolution_comparison(
233 "Resolutions",
234 [resolutions_data[f] for f in filenames],
235 labels, resolutions_labels,
236 nbins=40, figsize=(13.0, 8.0),
237 )
238
239 # -----------------------------------------------------------------------
240 # Resolution vs pseudomomentum
241 # -----------------------------------------------------------------------
242 print("Making resolution vs pseudomomentum")
243
244 pseudomom3 = [np.array([
245 *pseudomomentum(data[f][pt.name1], data[f][tanLambda.name1], 3),
246 *pseudomomentum(data[f][pt.name2], data[f][tanLambda.name2], 3),
247 ]) for f in filenames]
248 pseudomom5 = [np.array([
249 *pseudomomentum(data[f][pt.name1], data[f][tanLambda.name1], 5),
250 *pseudomomentum(data[f][pt.name2], data[f][tanLambda.name2], 5),
251 ]) for f in filenames]
252
253 pm_xlimit = [0, 10]
254 pm_ylimits = {d: [0, 50], z: [0, 100], phi: [0, 3], tanLambda: [0, 4]}
255 pm_bins = [0.5, 1, 1.5, 2, 2.5, 3, 4, 5, 6, 7, 8, 9, 10]
256
257 pm_res_data = {}
258 pm_res_labels = {}
259
260 for var in [d, phi]:
261 ydata = [
262 np.ndarray.flatten(np.array([
263 *(data[f][var.name1] - data[f][var.name2]) / 2**0.5 * var.unit.convert,
264 *(data[f][var.name1] - data[f][var.name2]) / 2**0.5 * var.unit.convert,
265 ]))
266 for f in filenames
267 ]
268 pm_res_data[var] = [[pseudomom5[i], ydata[i]] for i in range(len(filenames))]
269 pm_res_labels[var] = [
270 r"p$\beta$" + r"(sin$\theta$)$^{5/2}$ [GeV/c]",
271 r"$\sigma_{{68}}(\frac{{\Delta {}}}{{\sqrt{{2}}}})$".format(var.latex.replace("$", "")) + f"{var.unit.dname}",
272 ]
273
274 for var in [z, tanLambda]:
275 ydata = [
276 np.ndarray.flatten(np.array([
277 *(data[f][var.name1] - data[f][var.name2]) / 2**0.5 * var.unit.convert,
278 *(data[f][var.name1] - data[f][var.name2]) / 2**0.5 * var.unit.convert,
279 ]))
280 for f in filenames
281 ]
282 pm_res_data[var] = [[pseudomom3[i], ydata[i]] for i in range(len(filenames))]
283 pm_res_labels[var] = [
284 r"p$\beta$" + r"(sin$\theta$)$^{3/2}$ [GeV/c]",
285 r"$\sigma_{{68}}(\frac{{\Delta {}}}{{\sqrt{{2}}}})$".format(var.latex.replace("$", "")) + f"{var.unit.dname}",
286 ]
287
288 def resolutionfit(x, a, b):
289 return (a**2 + (b / x)**2)**0.5
290
291 def resolutionfitlabel(fit, err):
292 return (r"y=$\sqrt{a^{2} +b^{2}/x^{2}} $" + "\n"
293 + f"a = ({fit[0]:.3f}" + r" $\pm$ " + f"{err[0]:.3f})" + r"[$\mu$m]" + "\n"
294 + f"b = ({fit[1]:.3f}" + r" $\pm$ " + f"{err[1]:.3f})" + r"[$\mu$m GeV/c]")
295
296 plot_resolution(
297 "Resolution vs Pseudomomentum",
298 {var: pm_res_data[var] for var in [d, z]},
299 labels, pm_res_labels, pm_xlimit, pm_ylimits, pm_bins,
300 resolutionfit, resolutionfitlabel, fitrange=[0.01, pm_xlimit[1]],
301 )
302 plot_resolution(
303 "Resolution vs Pseudomomentum",
304 {var: pm_res_data[var] for var in [phi, tanLambda]},
305 labels, pm_res_labels, pm_xlimit, pm_ylimits, pm_bins,
306 resolutionfit, resolutionfitlabel, fitrange=[0.01, pm_xlimit[1]],
307 )
308
309 # -----------------------------------------------------------------------
310 # Pt resolution vs pt
311 # -----------------------------------------------------------------------
312 print("Making pt resolution vs pt")
313
314 xdata_pt = [np.array([*data[f][pt.name1], *data[f][pt.name2]]) for f in filenames]
315 ydata_pt = [
316 np.ndarray.flatten(np.array([
317 *(data[f][pt.name1] - data[f][pt.name2]) / data[f][pt.name1] * 100,
318 *(data[f][pt.name1] - data[f][pt.name2]) / data[f][pt.name2] * 100,
319 ]))
320 for f in filenames
321 ]
322 pt_res_data = {pt: [[xdata_pt[i], ydata_pt[i]] for i in range(len(filenames))]}
323 pt_res_labels = {pt: [
324 f"{pt.latex}{pt.unit.name}",
325 r"$\sigma_{{68}}(\frac{{\Delta {}}}{{{}}})$".format(
326 pt.latex.replace("$", ""), pt.latex.replace("$", "")) + "[%]",
327 ]}
328
329 def ptfit(x, a, b):
330 return ((x * a)**2 + b**2)**0.5
331
332 def ptfitlabel(fit, err):
333 return (r"y=$\sqrt{A^{2} x^{2}+B^{2}} $" + "\n"
334 + f"A = ({fit[0]:.3f}" + r" $\pm$ " + f"{err[0]:.3f})" + "[% c/GeV]" + "\n"
335 + f"B = ({fit[1]:.3f}" + r" $\pm$ " + f"{err[1]:.3f})" + "[%]")
336
337 plot_resolution(
338 "Pt resolution vs Pt",
339 pt_res_data, labels, pt_res_labels,
340 xlimit=[0, 10], ylimits={pt: [0, 2]},
341 bins=[1, 1.5, 2, 2.5, 3, 4, 5, 6, 7, 8, 9, 10],
342 fitfunction=ptfit, fitlabel=ptfitlabel,
343 )