9Pure utility functions shared between cosmics and dimuon validation.
20def get_variable_names(variables: list) -> list:
21 """Flatten a list of Variable objects into a list of ROOT branch-name strings.
25 variables : list of GlobalVariable or TrackVariable
26 Variable metadata objects whose branch names should be collected.
31 All ROOT branch names
in the order they appear
in ``variables``.
32 TrackVariable contributes two names (name1, name2).
35 for variable
in variables:
36 names.extend(variable.getName())
40def auto_range(data_list: list, percent: float, modify: float = 0, symmetric: bool =
False) -> tuple:
41 """Return an axis range covering the central ``percent`` of all datasets.
43 The range is determined by the union of per-dataset percentile intervals,
44 optionally symmetrised around the grand median
and padded by ``modify``.
48 data_list : list of array-like
49 One
or more data arrays to consider simultaneously. The returned range
50 is wide enough to cover all of them.
52 Central percentage of data to include (e.g. 96 keeps the central 96%,
53 clipping 2%
from each tail).
54 modify : float, optional
55 Fractional padding applied to the final range width on each side.
56 E.g. 0.1 expands the range by 10% on each end. Default
is 0.
57 symmetric : bool, optional
58 If
True, the range
is symmetrised around the mean of per-dataset
59 medians before padding. Default
is False.
63 tuple of (float, float)
64 ``(minimum, maximum)`` of the computed range.
66 low_percent = (100 - percent) / 2
67 high_percent = 100 - low_percent
68 low, high, median = (), (), ()
69 for data
in data_list:
70 low += (np.percentile(data, low_percent),)
71 high += (np.percentile(data, high_percent),)
72 median += (np.median(data),)
75 center = np.mean(median)
77 if abs(center - minimum) > abs(center - maximum):
78 maximum = center + center - minimum
79 if abs(center - minimum) < abs(center - maximum):
80 minimum = center + center - maximum
81 minimum = minimum - (maximum - minimum) * modify
82 maximum = maximum + (maximum - minimum) * modify
83 return (minimum, maximum)
86def normal_distribution(x: float, a: float, mu: float, sigma: float) -> float:
87 """Evaluate a normalised Gaussian at ``x``.
91 x : float or array-like
92 Point(s) at which to evaluate the Gaussian.
94 Amplitude (total integral of the Gaussian).
98 Standard deviation of the Gaussian.
103 Value of ``a / (sigma * sqrt(2*pi)) * exp(-0.5 * ((x-mu)/sigma)^2)``.
105 return (a / (sigma * (2 * np.pi) ** 0.5)) * np.exp(-((x - mu) / sigma) ** 2 / 2)
108def to_bins(x, y, bins, x_limits):
109 """Bin (x, y) data and compute per-bin median, sigma68, and their uncertainties.
111 Bins with fewer than 10 entries are filled
with ``nan``.
116 Independent variable used to define bins.
118 Dependent variable whose distribution
is summarised
in each bin.
119 bins : int
or sequence of float
120 Number of equal-width bins
or an explicit sequence of bin edges,
121 passed directly to ``numpy.histogram_bin_edges``.
122 x_limits : tuple of (float, float)
123 ``(min, max)`` range over which to bin.
127 x_vals : list of float
128 Bin-centre x values (``nan``
for empty bins).
129 y_medians : list of float
130 Per-bin median of ``y`` (``nan``
for empty bins).
131 x_halfwidth : list of float
132 Half-width of each bin
in x (``nan``
for empty bins).
133 y_approx_uncert : list of float
134 Approximate uncertainty on the median: ``
std(y) / sqrt(N)``
135 (``nan``
for empty bins).
136 halfwidth_of_sigma68 : list of float
137 Half the 16th–84th percentile range of ``y``, i.e. sigma68
138 (``nan``
for empty bins).
139 sigma68_approx_uncert : list of float
140 Approximate uncertainty on sigma68: ``sigma68 / sqrt(N)``
141 (``nan``
for empty bins).
143 x_edges = np.histogram_bin_edges(x, bins, x_limits)
144 x_bin_halfwidth = [(x_edges[i] - x_edges[i - 1]) / 2 for i
in range(1, len(x_edges))]
145 bin_numbers = np.digitize(x, x_edges) - 1
146 y_medians, y_approx_uncert, halfwidth_of_sigma68, sigma68_approx_uncert, x_halfwidth, x_vals = [], [], [], [], [], []
147 for i
in range(len(x_edges) - 1):
148 if y[bin_numbers == i].size >= 10:
150 print(f
"Events in bin {x_edges[i]} to {x_edges[i+1]}: {y[bin_numbers == i].size}")
151 y_medians.append(float(np.median(y[bin_numbers == i])))
152 y_approx_uncert.append(float(np.std(y[bin_numbers == i]) / len(y[bin_numbers == i]) ** 0.5))
153 halfwidth_of_sigma68.append((np.percentile(y[bin_numbers == i], 84) -
154 np.percentile(y[bin_numbers == i], 16)) / 2)
155 sigma68_approx_uncert.append(halfwidth_of_sigma68[-1] / len(y[bin_numbers == i]) ** 0.5)
156 x_halfwidth.append(x_bin_halfwidth[i])
157 x_vals.append(x_edges[i] + x_bin_halfwidth[i])
159 y_medians.append(np.nan)
160 y_approx_uncert.append(np.nan)
161 halfwidth_of_sigma68.append(np.nan)
162 sigma68_approx_uncert.append(np.nan)
163 x_halfwidth.append(np.nan)
164 x_vals.append(np.nan)
166 print(f
"Warning: bin {x_edges[i]}-{x_edges[i+1]} is empty.")
167 return x_vals, y_medians, x_halfwidth, y_approx_uncert, halfwidth_of_sigma68, sigma68_approx_uncert
170def swap_tracks(data: dict, vars: list) -> dict:
171 """Swap the name1/name2 arrays for each TrackVariable in ``vars`` within the data dict.
173 Modifies ``data`` in-place
and also returns it.
178 Data dictionary mapping ROOT branch names to arrays,
as returned by
180 vars : list of GlobalVariable
or TrackVariable
181 Variables to process. Only TrackVariable instances are swapped;
182 GlobalVariable instances are ignored.
187 The same ``data`` dict
with track-1
and track-2 arrays swapped
for
188 each TrackVariable
in ``vars``.
191 if isinstance(var, TrackVariable):
192 data[var.name1], data[var.name2] = data[var.name2], data[var.name1]
196def get_filter(data: dict, cut: dict) -> np.ndarray:
197 """Return a boolean mask selecting events that pass all (min, max) cuts.
202 Data dictionary mapping branch names to arrays.
204 Selection criteria as ``{branch_name: (min, max)}``. An event passes
205 if ``min < data[branch_name] < max``
for every entry
in ``cut``.
206 An empty dict returns a mask of all
True.
210 numpy.ndarray of bool
211 Boolean mask
with the same length
as the data arrays.
213 mask = np.ones_like(next(iter(data.values())), dtype=bool)
215 mask = np.logical_and(mask, np.logical_and(cut[var][0] < data[var], data[var] < cut[var][1]))
219def pseudomomentum(p, tan, power):
220 """Compute the muon pseudomomentum p*beta*sin(theta)^(power/2).
222 This quantity is used
as the x-axis
for resolution-vs-momentum plots.
223 The muon mass (105.7 MeV/c²)
is used
for the beta factor.
228 Transverse momentum
in GeV/c.
230 tan(
lambda), where
lambda is the dip angle (= pz/pt).
232 Exponent of sin(theta)
in the pseudomomentum definition.
233 Typical values are 3 (
for z0/tanLambda)
and 5 (
for d0/phi).
238 Pseudomomentum values
in GeV/c.
240 return p * (1 + 1 / tan ** 2) ** 0.5 * 1 / (1 + 0.105 ** 2 / (p ** 2 *
241 (1 + tan ** 2))) ** 0.5 * 1 / ((1 + tan ** 2) ** 0.5) ** power