Belle II Software development
fit_dimuon_mass.py
1#!/usr/bin/env python3
2
3
10"""
11Unbinned maximum-likelihood fit of the di-muon invariant mass.
12Model: Crystal Ball (signal peak) + Exponential (ISR / continuum background)
13
14Can be used as a script::
15
16 python3 fit_dimuon_mass.py
17
18or imported and called programmatically::
19
20 from fit_dimuon_mass import fit
21 result = fit(data_array)
22"""
23
24import numpy as np
25import matplotlib.pyplot as plt
26from scipy.optimize import minimize
27from scipy.special import erf
28from scipy.stats import chi2 as chi2_dist
29from numpy.linalg import inv
30
31# ─── Default fit range & binning ──────────────────────────────────────────────
32MASS_MIN = 10.2 # GeV/c^2
33MASS_MAX = 10.8
34NBINS = 100
35
36# ─── PDF components (pure functions, no state) ────────────────────────────────
37
38
39def _cb_norm(mu, sigma, alpha, n, a, b):
40 """Closed-form integral of the Crystal Ball function over [a, b].
41
42 The power-law tail is on the LOW-MASS side: x_break = mu - alpha * sigma.
43
44 Parameters
45 ----------
46 mu, sigma, alpha, n : float
47 Crystal Ball shape parameters (mean, width, tail onset, tail exponent).
48 a, b : float
49 Integration limits.
50
51 Returns
52 -------
53 float
54 Integral of the (unnormalised) Crystal Ball over [a, b].
55 """
56 xb = mu - alpha * sigma
57 A = (n / alpha) ** n * np.exp(-0.5 * alpha**2)
58 B = n / alpha - alpha # > 0 for alpha > 0
59
60 # Gaussian piece: [max(a, xb), b]
61 lo = max(a, xb)
62 if lo < b:
63 s2 = sigma * np.sqrt(2.0)
64 n_g = sigma * np.sqrt(np.pi / 2.0) * (
65 erf((b - mu) / s2) - erf((lo - mu) / s2)
66 )
67 else:
68 n_g = 0.0
69
70 # Power-law piece: [a, min(b, xb)]
71 hi = min(b, xb)
72 if a < hi and n > 1.0:
73 t_lo = (a - mu) / sigma
74 t_hi = (hi - mu) / sigma # = -alpha at the break
75 # ∫ A*(B-t)^{-n} sigma dt = sigma*A/(n-1) * [(B-t)^{1-n}]_{t_lo}^{t_hi}
76 n_p = sigma * A / (n - 1.0) * (
77 (B - t_hi) ** (1.0 - n) - (B - t_lo) ** (1.0 - n)
78 )
79 else:
80 n_p = 0.0
81
82 return n_g + n_p
83
84
85def _exp_norm(slope, a, b, x0):
86 """Integral of exp(slope*(x - x0)) over [a, b].
87
88 Parameters
89 ----------
90 slope : float
91 Exponential slope. If ``abs(slope) < 1e-12`` the function is treated
92 as flat and the result is ``b - a``.
93 a, b : float
94 Integration limits.
95 x0 : float
96 Pivot point of the exponential (reduces numerical sensitivity).
97
98 Returns
99 -------
100 float
101 """
102 if abs(slope) < 1e-12:
103 return b - a
104 return (np.exp(slope * (b - x0)) - np.exp(slope * (a - x0))) / slope
105
106
107def _cb(x, mu, sigma, alpha, n):
108 """Evaluate the unnormalised Crystal Ball function at ``x``.
109
110 Gaussian core for ``(x - mu)/sigma >= -alpha``, power-law tail for lower values.
111
112 Parameters
113 ----------
114 x : array-like
115 mu, sigma, alpha, n : float
116 Shape parameters (mean, width, tail onset, tail exponent).
117
118 Returns
119 -------
120 ndarray
121 """
122 t = (x - mu) / sigma
123 A = (n / alpha) ** n * np.exp(-0.5 * alpha**2)
124 B = n / alpha - alpha
125 # Guard against overflow: np.where pre-evaluates both branches, so
126 # substitute a safe dummy in the Gaussian region before taking the power.
127 power_arg = np.where(t < -alpha, B - t, n / alpha)
128 return np.where(
129 t >= -alpha,
130 np.exp(-0.5 * t**2),
131 A * np.power(power_arg, -n),
132 )
133
134
135def _exp(x, slope, x0):
136 """Evaluate exp(slope * (x - x0)).
137
138 Parameters
139 ----------
140 x : array-like
141 slope : float
142 x0 : float
143 Pivot point.
144
145 Returns
146 -------
147 ndarray
148 """
149 return np.exp(slope * (x - x0))
150
151
152def _hessian(f, x0, eps=1e-5):
153 """Compute the symmetric finite-difference Hessian of ``f`` at ``x0``.
154
155 Uses ``n + n*(n+1)/2 + 1`` function evaluations.
156
157 Parameters
158 ----------
159 f : callable
160 Scalar function of a 1-D array.
161 x0 : array-like
162 Point at which the Hessian is evaluated.
163 eps : float, optional
164 Finite-difference step size. Default is ``1e-5``.
165
166 Returns
167 -------
168 numpy.ndarray of shape (n, n)
169 Symmetric Hessian matrix.
170 """
171 nd = len(x0)
172 f0 = f(x0)
173 fi = [f(x0 + eps * np.eye(nd)[i]) for i in range(nd)]
174 H = np.zeros((nd, nd))
175 for i in range(nd):
176 for j in range(i, nd):
177 xij = x0.copy()
178 xij[i] += eps
179 xij[j] += eps
180 H[i, j] = (f(xij) - fi[i] - fi[j] + f0) / eps**2
181 H[j, i] = H[i, j]
182 return H
183
184
185# ─── Main fitting function ────────────────────────────────────────────────────
186def fit(data, mass_min=MASS_MIN, mass_max=MASS_MAX, nbins=NBINS,
187 savefig="dimuon_fit.pdf", title=r"Di-muon invariant mass — $e^+e^- \to \mu^+\mu^-$ (Belle II data)"):
188 """Fit the di-muon invariant mass with Crystal Ball + exponential background.
189
190 Parameters
191 ----------
192 data : array-like
193 Raw invariant mass values in GeV/c^2 (range selection is applied internally).
194 mass_min, mass_max : float
195 Fit range in GeV/c^2.
196 nbins : int
197 Number of histogram bins used for the chi^2/NDF and the plot.
198 savefig : str or None
199 File name for the output plot. Pass ``None`` to skip saving.
200
201 Returns
202 -------
203 dict with keys:
204 pfit – array [mu, sigma, alpha, n, slope, f_sig]
205 errors – array of 1-sigma uncertainties (from Hessian)
206 chi2 – chi^2 value (binned, bins with expected > 5)
207 ndf – degrees of freedom
208 pval – p-value
209 fig – matplotlib Figure
210 """
211 data = np.asarray(data)
212 data = data[(data >= mass_min) & (data <= mass_max)]
213 N = len(data)
214 bw = (mass_max - mass_min) / nbins
215 x0 = (mass_min + mass_max) / 2.0 # exponential pivot
216
217 print(f"Events in fit range [{mass_min}, {mass_max}] GeV/c^2: {N:,}")
218
219 # ── NLL (closes over data, x0, mass_min, mass_max) ────────────────────────
220 def nll(p):
221 mu, sigma, alpha, n, slope, fsig = p
222 if sigma <= 0 or alpha <= 0 or n <= 1.01 or not (0.0 < fsig < 1.0):
223 return 1e15
224 cb_n = _cb_norm(mu, sigma, alpha, n, mass_min, mass_max)
225 exp_n = _exp_norm(slope, mass_min, mass_max, x0)
226 if cb_n <= 0 or exp_n <= 0:
227 return 1e15
228 pdf = (fsig * _cb(data, mu, sigma, alpha, n) / cb_n +
229 (1.0 - fsig) * _exp(data, slope, x0) / exp_n)
230 if np.any(pdf <= 0):
231 return 1e15
232 return -np.sum(np.log(pdf))
233
234 # ── Minimisation ──────────────────────────────────────────────────────────
235 # mu sigma alpha n slope fsig
236 p0 = [10.575, 0.040, 1.5, 5.0, 3.0, 0.65]
237 bounds = [(10.50, 10.65), (0.005, 0.10), (0.1, 5.0),
238 (1.1, 20.0), (-10.0, 10.0), (0.01, 0.999)]
239
240 print("Minimising NLL ...")
241 res = minimize(nll, p0, method="L-BFGS-B", bounds=bounds,
242 options={"maxiter": 10_000, "ftol": 1e-14, "gtol": 1e-8})
243 if not res.success:
244 print(" L-BFGS-B did not converge; switching to Nelder-Mead ...")
245 res2 = minimize(nll, p0, method="Nelder-Mead",
246 options={"maxiter": 500_000, "xatol": 1e-8,
247 "fatol": 1e-8, "adaptive": True})
248 if res2.fun < res.fun:
249 res = res2
250
251 pfit = res.x
252 mu_f, sig_f, alp_f, n_f, sl_f, fs_f = pfit
253 print(f" NLL = {res.fun:.4f} (converged: {res.success})")
254
255 # ── Errors from numerical Hessian ─────────────────────────────────────────
256 try:
257 H = _hessian(nll, pfit)
258 cov = inv(H)
259 errors = np.sqrt(np.abs(np.diag(cov)))
260 except (np.linalg.LinAlgError, ValueError):
261 errors = np.full(len(pfit), np.nan)
262 print(" Warning: Hessian inversion failed — errors unavailable")
263
264 pnames = ["mu [GeV/c^2]", "sigma [GeV/c^2]", "alpha", "n", "slope", "f_sig"]
265 print("\nFit result:")
266 for name, val, err in zip(pnames, pfit, errors):
267 print(f" {name:16s} {val:+10.6f} +/- {err:.6f}")
268
269 # ── Chi^2 / NDF ────────────────────────────────────────────────────────────
270 counts, edges = np.histogram(data, bins=nbins, range=(mass_min, mass_max))
271 centers = 0.5 * (edges[:-1] + edges[1:])
272
273 cb_n = _cb_norm(mu_f, sig_f, alp_f, n_f, mass_min, mass_max)
274 exp_n = _exp_norm(sl_f, mass_min, mass_max, x0)
275 exp_c = (fs_f * _cb(centers, mu_f, sig_f, alp_f, n_f) / cb_n +
276 (1.0 - fs_f) * _exp(centers, sl_f, x0) / exp_n) * N * bw
277
278 valid = exp_c > 5.0
279 chi2v = np.sum((counts[valid] - exp_c[valid])**2 / exp_c[valid])
280 ndf = int(valid.sum()) - len(pfit)
281 pval = chi2_dist.sf(chi2v, ndf)
282
283 print("\nGoodness of fit (binned chi^2):")
284 print(f" chi^2/NDF = {chi2v:.1f} / {ndf} = {chi2v / ndf:.3f}")
285 print(f" p-value = {pval:.4f}")
286
287 # ── Plot ──────────────────────────────────────────────────────────────────
288 fig, (ax, axp) = plt.subplots(
289 2, 1, figsize=(9, 7),
290 gridspec_kw={"height_ratios": [3, 1]}, sharex=True
291 )
292
293 x_fine = np.linspace(mass_min, mass_max, 2000)
294 cb_plt = _cb(x_fine, mu_f, sig_f, alp_f, n_f) / cb_n
295 exp_plt = _exp(x_fine, sl_f, x0) / exp_n
296 tot_plt = fs_f * cb_plt + (1.0 - fs_f) * exp_plt
297 sc = N * bw # PDF → counts / bin
298
299 ax.errorbar(centers, counts, yerr=np.sqrt(counts),
300 fmt="ko", ms=3, lw=1.0, label="Data", zorder=3)
301 ax.plot(x_fine, tot_plt * sc, "b-", lw=2.0, label="Total fit")
302 ax.plot(x_fine, fs_f * cb_plt * sc, "r--", lw=1.5, label="Crystal Ball (signal)")
303 ax.plot(x_fine, (1 - fs_f) * exp_plt * sc, "g-.", lw=1.5, label="Exp. background")
304
305 ax.set_xlim(mass_min, mass_max)
306 ax.set_ylabel(f"Entries / {bw * 1000:.0f} MeV/$c^2$", fontsize=13)
307 ax.legend(fontsize=11, loc="upper left")
308 ax.set_title(title,
309 fontsize=13)
310
311 info = "\n".join([
312 rf"$\mu$ = {mu_f * 1000:.2f} $\pm$ {errors[0] * 1000:.2f} MeV/$c^2$",
313 rf"$\sigma$ = {sig_f * 1000:.2f} $\pm$ {errors[1] * 1000:.2f} MeV/$c^2$",
314 rf"$\alpha$ = {alp_f:.3f} $\pm$ {errors[2]:.3f}",
315 rf"$n$ = {n_f:.2f} $\pm$ {errors[3]:.2f}",
316 rf"slope = {sl_f:.3f} $\pm$ {errors[4]:.3f}",
317 rf"$f_{{\rm sig}}$ = {fs_f:.3f} $\pm$ {errors[5]:.3f}",
318 rf"$\chi^2$/NDF = {chi2v:.0f}/{ndf} = {chi2v / ndf:.2f}",
319 rf"$p$-value = {pval:.4f}",
320 ])
321 ax.text(0.04, 0.3, info, transform=ax.transAxes, va="bottom", ha="left",
322 fontsize=10, bbox=dict(boxstyle="round", facecolor="lightyellow", alpha=0.85))
323
324 pulls = np.where(exp_c > 0, (counts - exp_c) / np.sqrt(exp_c), 0.0)
325 axp.bar(centers, pulls, width=bw, color="steelblue", alpha=0.7)
326 axp.axhline(0, color="black", lw=0.8)
327 axp.axhline(+2, color="red", lw=0.8, ls="--")
328 axp.axhline(-2, color="red", lw=0.8, ls="--")
329 axp.set_ylabel("pull", fontsize=12)
330 axp.set_xlabel(r"$M(\mu^+\mu^-)$ [GeV/$c^2$]", fontsize=13)
331 axp.set_ylim(-5, 5)
332
333 fig.tight_layout()
334 if savefig:
335 fig.savefig(savefig, dpi=150)
336 print(f"\nSaved {savefig}")
337
338 return dict(pfit=pfit, errors=errors, chi2=chi2v, ndf=ndf, pval=pval, fig=fig)
339
340
341# ─── Script entry point ───────────────────────────────────────────────────────
342if __name__ == "__main__":
343 data_all = np.loadtxt("InvM_data.txt")
344 result = fit(data_all)
345 plt.show()