4 from .
import fit_functions
5 from scipy.optimize
import curve_fit
6 import matplotlib.pyplot
as plt
8 from sklearn
import tree
12 """Train a neural network for dE/dx-based particle identification"""
22 """Train on the input data"""
24 raise NotImplementedError(
"Use this class as a base class only")
27 """Get the trained neural-network output value for test data"""
29 raise ValueError(
"Train the estimator first!")
35 """Train a neural network for dE/dx-based particle identification"""
38 number_of_bins_in_dedx = 20
40 number_of_bins_in_p = 29
42 number_of_head_values_used_to_fit = 20
45 """Construct the dE/dx bins and then populate them with the data"""
46 dedx_bins = np.linspace(
49 self.
dedx_column].max(), GroupedDEDXEstimationTrainer.number_of_bins_in_dedx)
50 dedx_cuts = pd.cut(data[self.
dedx_column], dedx_bins)
51 return data.groupby(dedx_cuts), dedx_bins
54 """Construct the momentum bins and then populate them with the data"""
55 p_bins = np.linspace(data.p.min(), data.p.max(), GroupedDEDXEstimationTrainer.number_of_bins_in_p)
56 p_cuts = pd.cut(data.p, p_bins)
57 return data.groupby(p_cuts), p_bins
60 """Sort the data then select only the highest N values"""
61 if number_of_values
is None:
64 return data.sort(
"number_of_p_values", ascending=
False).head(number_of_values).sort()
67 """Fit track-momentum values"""
70 number_of_p_values = pd.Series(p_binned_data.count().p.values, name=
"number_of_p_values")
71 p_bin_centers = pd.Series(0.5 * (p_bins[:-1] + p_bins[1:]), name=
"p_bin_centers")
73 all_fit_data = pd.DataFrame([number_of_p_values, p_bin_centers]).T
79 """Fit the track-momentum values in the selected dE/dx bin, then train on the fitted values"""
81 return self.train_function(fit_data)
85 """Train a neural network for dE/dx-based particle identification"""
87 def __init__(self, result_function, use_sigma_for_result_fitting):
97 GroupedDEDXEstimationTrainer.__init__(self)
100 """Fit for the mean dE/dx and standard deviation, return the fit Dataframe"""
101 result_df = pd.DataFrame([{
"dedx_bin_center": dedx_bin_center,
102 "mu": fit_parameters[1][1],
103 "sigma": fit_parameters[0]}
for dedx_bin_center,
105 if fit_parameters
is not None])
107 if len(result_df) == 0:
108 raise ValueError(
"Could not find any fitted parameters!")
111 result_df[
"mu_plus_sigma"] = result_df.mu + result_df.sigma
112 result_df[
"mu_minus_sigma"] = result_df.mu - result_df.sigma
114 result_df.sort(
"dedx_bin_center", inplace=
True)
119 """Define the parameters for the fit, assign initial guesses"""
122 p0 = (7e+08, -4e+04, 0.1, 0)
125 popt, pcov = curve_fit(self.
result_function, result_df.dedx_bin_center, result_df.mu, p0=p0,
126 sigma=result_df.sigma, absolute_sigma=
True)
128 popt, pcov = curve_fit(self.
result_function, result_df.dedx_bin_center, result_df.mu, p0=p0)
133 """Train the neural network using curated data"""
136 def fit_and_save_results(dedx_bin):
138 return {dedx_bin.mean()[self.
dedx_column]: fit_result}
140 for result
in dedx_binned_data.apply(fit_and_save_results):
147 """Plot the fitted results"""
151 plt.plot(plot_dedx_data, self.
dedx_estimator_function(plot_dedx_data), color=
"black", label=
"Fitted estimator")
154 plt.errorbar(result_df.dedx_bin_center, result_df.mu, marker=
"o", ls=
"", label=
"Data Points", yerr=result_df.sigma)
157 plt.xlabel(
"dEdX in ADC count/cm")
158 plt.ylabel(
"p in GeV/c")
159 plt.legend(frameon=
True)
162 """Plot the fitted grouped results"""
166 already_plotted_list = []
168 def plot_fitted_results(dedx_bin):
169 dedx_bin_center = dedx_bin.mean().values[0]
171 if dedx_bin_center
not in already_plotted_list:
172 already_plotted_list.append(dedx_bin_center)
175 plt.plot(fit_data.p_bin_centers, fit_data.number_of_p_values, ls=
"", marker=
".", color=
"black")
179 plt.xlabel(
"p in GeV/c")
180 plt.ylabel(
"Entries")
182 dedx_binned_data.apply(plot_fitted_results)
186 """Train a neural network for dE/dx-based particle identification"""
188 def __init__(self, fit_function, dimension_of_fit_function, result_function, use_sigma_for_result_fitting):
196 FittedGroupedDEDXEstimatorTrainer.__init__(self, result_function, use_sigma_for_result_fitting)
199 """Train on the fit to curated-data highest values whose truth value is known"""
203 p0 = (1e3, max_value, 4e-2)
205 p0 = (1e3, max_value, 4e-2, 1, 1, 1)
207 popt, pcov = curve_fit(self.
fit_function, fit_data.p_bin_centers, fit_data.number_of_p_values, p0=p0)
209 return [np.sqrt(np.diag(pcov)[1]), popt]
215 """Plot the fitted grouped results"""
216 FittedGroupedDEDXEstimatorTrainer.plot_grouped_result(self, data)
220 p_plot_data = np.linspace(data.p.min(), data.p.max(), 1000)
223 already_plotted_list = []
225 def plot_fitted_results(dedx_bin):
226 dedx_bin_center = dedx_bin.mean().values[0]
228 if dedx_bin_center
not in already_plotted_list:
230 already_plotted_list.append(dedx_bin_center)
231 unneeded, fit_options = fitted_results
233 dedx_plot_data = self.
fit_function(p_plot_data, *fitted_results[1])
234 plt.plot(p_plot_data, dedx_plot_data)
238 dedx_binned_data.apply(plot_fitted_results)
242 """Train a neural network for dE/dx-based particle identification using a Gaussian estimator"""
246 FunctionFittedGroupedDEDXEstimatorTrainer.__init__(
250 fit_functions.inverse_squared,
251 use_sigma_for_result_fitting=
True)
255 """Train a neural network for dE/dx-based particle identification using a Landau estimator"""
259 FunctionFittedGroupedDEDXEstimatorTrainer.__init__(
261 fit_functions.landau,
263 fit_functions.inverse_squared,
264 use_sigma_for_result_fitting=
True)
268 """Train a neural network for dE/dx-based particle identification using only the highest values"""
272 FittedGroupedDEDXEstimatorTrainer.__init__(self, fit_functions.inverse_squared, use_sigma_for_result_fitting=
False)
275 """Train on the curated-data highest values whose truth value is known"""
278 return [
None, [
None, max_value,
None]]
285 """Train a neural network for dE/dx-based particle identification using only the median values"""
289 FittedGroupedDEDXEstimatorTrainer.__init__(self, fit_functions.inverse_squared, use_sigma_for_result_fitting=
True)
292 """Train on the curated-data median values whose truth value is known"""
293 weighted_p_values = fit_data.apply(
lambda data: [data.p_bin_centers] * int(data.number_of_p_values), axis=1).sum()
294 median_value = np.median(weighted_p_values)
295 iqr = np.percentile(weighted_p_values, 75) - np.percentile(weighted_p_values, 50)
297 return [iqr, [
None, median_value,
None]]
304 """Train a neural network for dE/dx-based particle identification using a Gaussian estimator"""
308 FunctionFittedGroupedDEDXEstimatorTrainer.__init__(
312 fit_functions.inverse_sqrt,
313 use_sigma_for_result_fitting=
True)
317 """Train a neural network for dE/dx-based particle identification using a Landau estimator"""
321 FunctionFittedGroupedDEDXEstimatorTrainer.__init__(
323 fit_functions.landau,
325 fit_functions.inverse_sqrt,
326 use_sigma_for_result_fitting=
True)
330 """Train a neural network for dE/dx-based particle identification using only the highest values"""
334 FittedGroupedDEDXEstimatorTrainer.__init__(self, fit_functions.inverse_sqrt, use_sigma_for_result_fitting=
False)
337 """Train on the curated-data highest values whose truth value is known"""
340 return [
None, [
None, max_value,
None]]
347 """Train a neural network for dE/dx-based particle identification using only the median values"""
351 FittedGroupedDEDXEstimatorTrainer.__init__(self, fit_functions.inverse_sqrt, use_sigma_for_result_fitting=
True)
354 """Train on the curated-data median values whose truth value is known"""
355 weighted_p_values = fit_data.apply(
lambda data: [data.p_bin_centers] * int(data.number_of_p_values), axis=1).sum()
356 median_value = np.median(weighted_p_values)
357 iqr = np.percentile(weighted_p_values, 75) - np.percentile(weighted_p_values, 50)
359 return [iqr, [
None, median_value,
None]]
366 """Train a neural network for dE/dx-based particle identification using multivariate data analysis"""
372 self.
tree = tree.DecisionTreeRegressor()
373 DEDXEstimationTrainer.__init__(self)
376 """Train the neural network using curated data"""
378 train_data = data.copy()
383 self.
tree.fit(train_data.values, p_values.values)
386 """Get the trained neural-network output value for test data"""
388 test_data = data.copy()
391 return self.
tree.predict(test_data.values)