Belle II Software release-09-00-14
fit_dimuon_mass.py
1#!/usr/bin/env python3
2
3
10"""
11Unbinned maximum-likelihood fit of the di-muon invariant mass.
12Model: Crystal Ball (signal peak) + Exponential (ISR / continuum background)
13
14Can be used as a script::
15
16 python3 fit_dimuon_mass.py
17
18or imported and called programmatically::
19
20 from fit_dimuon_mass import fit
21 result = fit(data_array)
22"""
23
24import numpy as np
25import matplotlib.pyplot as plt
26from scipy.optimize import minimize
27from scipy.special import erf
28from scipy.stats import chi2 as chi2_dist
29from numpy.linalg import inv
30
31# ─── Default fit range & binning ──────────────────────────────────────────────
32MASS_MIN = 10.2 # GeV/c^2
33MASS_MAX = 10.8
34NBINS = 100
35
36# ─── PDF components (pure functions, no state) ────────────────────────────────
37
38
39def _cb_norm(mu, sigma, alpha, n, a, b):
40 """Closed-form integral of the Crystal Ball function over [a, b].
41
42 The power-law tail is on the LOW-MASS side: x_break = mu - alpha * sigma.
43
44 Parameters
45 ----------
46 mu, sigma, alpha, n : float
47 Crystal Ball shape parameters (mean, width, tail onset, tail exponent).
48 a, b : float
49 Integration limits.
50
51 Returns
52 -------
53 float
54 Integral of the (unnormalised) Crystal Ball over [a, b].
55 """
56 xb = mu - alpha * sigma
57 A = (n / alpha) ** n * np.exp(-0.5 * alpha**2)
58 B = n / alpha - alpha # > 0 for alpha > 0
59
60 # Gaussian piece: [max(a, xb), b]
61 lo = max(a, xb)
62 if lo < b:
63 s2 = sigma * np.sqrt(2.0)
64 n_g = sigma * np.sqrt(np.pi / 2.0) * (
65 erf((b - mu) / s2) - erf((lo - mu) / s2)
66 )
67 else:
68 n_g = 0.0
69
70 # Power-law piece: [a, min(b, xb)]
71 hi = min(b, xb)
72 if a < hi and n > 1.0:
73 t_lo = (a - mu) / sigma
74 t_hi = (hi - mu) / sigma # = -alpha at the break
75 # ∫ A*(B-t)^{-n} sigma dt = sigma*A/(n-1) * [(B-t)^{1-n}]_{t_lo}^{t_hi}
76 n_p = sigma * A / (n - 1.0) * (
77 (B - t_hi) ** (1.0 - n) - (B - t_lo) ** (1.0 - n)
78 )
79 else:
80 n_p = 0.0
81
82 return n_g + n_p
83
84
85def _exp_norm(slope, a, b, x0):
86 """Integral of exp(slope*(x - x0)) over [a, b].
87
88 Parameters
89 ----------
90 slope : float
91 Exponential slope. If ``abs(slope) < 1e-12`` the function is treated
92 as flat and the result is ``b - a``.
93 a, b : float
94 Integration limits.
95 x0 : float
96 Pivot point of the exponential (reduces numerical sensitivity).
97
98 Returns
99 -------
100 float
101 """
102 if abs(slope) < 1e-12:
103 return b - a
104 return (np.exp(slope * (b - x0)) - np.exp(slope * (a - x0))) / slope
105
106
107def _cb(x, mu, sigma, alpha, n):
108 """Evaluate the unnormalised Crystal Ball function at ``x``.
109
110 Gaussian core for ``(x - mu)/sigma >= -alpha``, power-law tail for lower values.
111
112 Parameters
113 ----------
114 x : array-like
115 mu, sigma, alpha, n : float
116 Shape parameters (mean, width, tail onset, tail exponent).
117
118 Returns
119 -------
120 ndarray
121 """
122 t = (x - mu) / sigma
123 A = (n / alpha) ** n * np.exp(-0.5 * alpha**2)
124 B = n / alpha - alpha
125 # Guard against overflow: np.where pre-evaluates both branches, so
126 # substitute a safe dummy in the Gaussian region before taking the power.
127 power_arg = np.where(t < -alpha, B - t, n / alpha)
128 return np.where(
129 t >= -alpha,
130 np.exp(-0.5 * t**2),
131 A * np.power(power_arg, -n),
132 )
133
134
135def _exp(x, slope, x0):
136 """Evaluate exp(slope * (x - x0)).
137
138 Parameters
139 ----------
140 x : array-like
141 slope : float
142 x0 : float
143 Pivot point.
144
145 Returns
146 -------
147 ndarray
148 """
149 return np.exp(slope * (x - x0))
150
151
152def _hessian(f, x0, eps=1e-5):
153 """Compute the symmetric finite-difference Hessian of ``f`` at ``x0``.
154
155 Uses ``n + n*(n+1)/2 + 1`` function evaluations.
156
157 Parameters
158 ----------
159 f : callable
160 Scalar function of a 1-D array.
161 x0 : array-like
162 Point at which the Hessian is evaluated.
163 eps : float, optional
164 Finite-difference step size. Default is ``1e-5``.
165
166 Returns
167 -------
168 numpy.ndarray of shape (n, n)
169 Symmetric Hessian matrix.
170 """
171 nd = len(x0)
172 f0 = f(x0)
173 fi = [f(x0 + eps * np.eye(nd)[i]) for i in range(nd)]
174 H = np.zeros((nd, nd))
175 for i in range(nd):
176 for j in range(i, nd):
177 xij = x0.copy()
178 xij[i] += eps
179 xij[j] += eps
180 H[i, j] = (f(xij) - fi[i] - fi[j] + f0) / eps**2
181 H[j, i] = H[i, j]
182 return H
183
184
185# ─── Main fitting function ────────────────────────────────────────────────────
186def fit(data, mass_min=MASS_MIN, mass_max=MASS_MAX, nbins=NBINS,
187 savefig="dimuon_fit.pdf", title=r"Di-muon invariant mass — $e^+e^- \to \mu^+\mu^-$ (Belle II data)"): """Fit the di-muon invariant mass with Crystal Ball + exponential background.
188
189 Parameters
190 ----------
191 data : array-like
192 Raw invariant mass values in GeV/c^2 (range selection is applied internally).
193 mass_min, mass_max : float
194 Fit range in GeV/c^2.
195 nbins : int
196 Number of histogram bins used for the chi^2/NDF and the plot.
197 savefig : str or None
198 File name for the output plot. Pass ``None`` to skip saving.
199
200 Returns
201 -------
202 dict with keys:
203 pfit – array [mu, sigma, alpha, n, slope, f_sig]
204 errors – array of 1-sigma uncertainties (from Hessian)
205 chi2 – chi^2 value (binned, bins with expected > 5)
206 ndf – degrees of freedom
207 pval – p-value
208 fig – matplotlib Figure
209 """
210 data = np.asarray(data)
211 data = data[(data >= mass_min) & (data <= mass_max)]
212 N = len(data)
213 bw = (mass_max - mass_min) / nbins
214 x0 = (mass_min + mass_max) / 2.0 # exponential pivot
215
216 print(f"Events in fit range [{mass_min}, {mass_max}] GeV/c^2: {N:,}")
217
218 # ── NLL (closes over data, x0, mass_min, mass_max) ────────────────────────
219 def nll(p):
220 mu, sigma, alpha, n, slope, fsig = p
221 if sigma <= 0 or alpha <= 0 or n <= 1.01 or not (0.0 < fsig < 1.0):
222 return 1e15
223 cb_n = _cb_norm(mu, sigma, alpha, n, mass_min, mass_max)
224 exp_n = _exp_norm(slope, mass_min, mass_max, x0)
225 if cb_n <= 0 or exp_n <= 0:
226 return 1e15
227 pdf = (fsig * _cb(data, mu, sigma, alpha, n) / cb_n +
228 (1.0 - fsig) * _exp(data, slope, x0) / exp_n)
229 if np.any(pdf <= 0):
230 return 1e15
231 return -np.sum(np.log(pdf))
232
233 # ── Minimisation ──────────────────────────────────────────────────────────
234 # mu sigma alpha n slope fsig
235 p0 = [10.575, 0.040, 1.5, 5.0, 3.0, 0.65]
236 bounds = [(10.50, 10.65), (0.005, 0.10), (0.1, 5.0),
237 (1.1, 20.0), (-10.0, 10.0), (0.01, 0.999)]
238
239 print("Minimising NLL ...")
240 res = minimize(nll, p0, method="L-BFGS-B", bounds=bounds,
241 options={"maxiter": 10_000, "ftol": 1e-14, "gtol": 1e-8})
242 if not res.success:
243 print(" L-BFGS-B did not converge; switching to Nelder-Mead ...")
244 res2 = minimize(nll, p0, method="Nelder-Mead",
245 options={"maxiter": 500_000, "xatol": 1e-8,
246 "fatol": 1e-8, "adaptive": True})
247 if res2.fun < res.fun:
248 res = res2
249
250 pfit = res.x
251 mu_f, sig_f, alp_f, n_f, sl_f, fs_f = pfit
252 print(f" NLL = {res.fun:.4f} (converged: {res.success})")
253
254 # ── Errors from numerical Hessian ─────────────────────────────────────────
255 try:
256 H = _hessian(nll, pfit)
257 cov = inv(H)
258 errors = np.sqrt(np.abs(np.diag(cov)))
259 except (np.linalg.LinAlgError, ValueError):
260 errors = np.full(len(pfit), np.nan)
261 print(" Warning: Hessian inversion failed — errors unavailable")
262
263 pnames = ["mu [GeV/c^2]", "sigma [GeV/c^2]", "alpha", "n", "slope", "f_sig"]
264 print("\nFit result:")
265 for name, val, err in zip(pnames, pfit, errors):
266 print(f" {name:16s} {val:+10.6f} +/- {err:.6f}")
267
268 # ── Chi^2 / NDF ────────────────────────────────────────────────────────────
269 counts, edges = np.histogram(data, bins=nbins, range=(mass_min, mass_max))
270 centers = 0.5 * (edges[:-1] + edges[1:])
271
272 cb_n = _cb_norm(mu_f, sig_f, alp_f, n_f, mass_min, mass_max)
273 exp_n = _exp_norm(sl_f, mass_min, mass_max, x0)
274 exp_c = (fs_f * _cb(centers, mu_f, sig_f, alp_f, n_f) / cb_n +
275 (1.0 - fs_f) * _exp(centers, sl_f, x0) / exp_n) * N * bw
276
277 valid = exp_c > 5.0
278 chi2v = np.sum((counts[valid] - exp_c[valid])**2 / exp_c[valid])
279 ndf = int(valid.sum()) - len(pfit)
280 pval = chi2_dist.sf(chi2v, ndf)
281
282 print("\nGoodness of fit (binned chi^2):")
283 print(f" chi^2/NDF = {chi2v:.1f} / {ndf} = {chi2v / ndf:.3f}")
284 print(f" p-value = {pval:.4f}")
285
286 # ── Plot ──────────────────────────────────────────────────────────────────
287 fig, (ax, axp) = plt.subplots(
288 2, 1, figsize=(9, 7),
289 gridspec_kw={"height_ratios": [3, 1]}, sharex=True
290 )
291
292 x_fine = np.linspace(mass_min, mass_max, 2000)
293 cb_plt = _cb(x_fine, mu_f, sig_f, alp_f, n_f) / cb_n
294 exp_plt = _exp(x_fine, sl_f, x0) / exp_n
295 tot_plt = fs_f * cb_plt + (1.0 - fs_f) * exp_plt
296 sc = N * bw # PDF → counts / bin
297
298 ax.errorbar(centers, counts, yerr=np.sqrt(counts),
299 fmt="ko", ms=3, lw=1.0, label="Data", zorder=3)
300 ax.plot(x_fine, tot_plt * sc, "b-", lw=2.0, label="Total fit")
301 ax.plot(x_fine, fs_f * cb_plt * sc, "r--", lw=1.5, label="Crystal Ball (signal)")
302 ax.plot(x_fine, (1 - fs_f) * exp_plt * sc, "g-.", lw=1.5, label="Exp. background")
303
304 ax.set_xlim(mass_min, mass_max)
305 ax.set_ylabel(f"Entries / {bw * 1000:.0f} MeV/$c^2$", fontsize=13)
306 ax.legend(fontsize=11, loc="upper left")
307 ax.set_title(title,
308 fontsize=13)
309
310 info = "\n".join([
311 rf"$\mu$ = {mu_f * 1000:.2f} $\pm$ {errors[0] * 1000:.2f} MeV/$c^2$",
312 rf"$\sigma$ = {sig_f * 1000:.2f} $\pm$ {errors[1] * 1000:.2f} MeV/$c^2$",
313 rf"$\alpha$ = {alp_f:.3f} $\pm$ {errors[2]:.3f}",
314 rf"$n$ = {n_f:.2f} $\pm$ {errors[3]:.2f}",
315 rf"slope = {sl_f:.3f} $\pm$ {errors[4]:.3f}",
316 rf"$f_{{\rm sig}}$ = {fs_f:.3f} $\pm$ {errors[5]:.3f}",
317 rf"$\chi^2$/NDF = {chi2v:.0f}/{ndf} = {chi2v / ndf:.2f}",
318 rf"$p$-value = {pval:.4f}",
319 ])
320 ax.text(0.04, 0.3, info, transform=ax.transAxes, va="bottom", ha="left",
321 fontsize=10, bbox=dict(boxstyle="round", facecolor="lightyellow", alpha=0.85))
322
323 pulls = np.where(exp_c > 0, (counts - exp_c) / np.sqrt(exp_c), 0.0)
324 axp.bar(centers, pulls, width=bw, color="steelblue", alpha=0.7)
325 axp.axhline(0, color="black", lw=0.8)
326 axp.axhline(+2, color="red", lw=0.8, ls="--")
327 axp.axhline(-2, color="red", lw=0.8, ls="--")
328 axp.set_ylabel("pull", fontsize=12)
329 axp.set_xlabel(r"$M(\mu^+\mu^-)$ [GeV/$c^2$]", fontsize=13)
330 axp.set_ylim(-5, 5)
331
332 fig.tight_layout()
333 if savefig:
334 fig.savefig(savefig, dpi=150)
335 print(f"\nSaved {savefig}")
336
337 return dict(pfit=pfit, errors=errors, chi2=chi2v, ndf=ndf, pval=pval, fig=fig)
338
339
340# ─── Script entry point ───────────────────────────────────────────────────────
341if __name__ == "__main__":
342 data_all = np.loadtxt("InvM_data.txt")
343 result = fit(data_all)
344 plt.show()
345