12from .
import fit_functions
13from scipy.optimize
import curve_fit
14import matplotlib.pyplot
as plt
16from sklearn
import tree
20 """Train a neural network for dE/dx-based particle identification"""
30 """Train on the input data"""
32 raise NotImplementedError(
"Use this class as a base class only")
35 """Get the trained neural-network output value for test data"""
37 raise ValueError(
"Train the estimator first!")
43 """Train a neural network for dE/dx-based particle identification"""
46 number_of_bins_in_dedx = 20
48 number_of_bins_in_p = 29
50 number_of_head_values_used_to_fit = 20
53 """Construct the dE/dx bins and then populate them with the data"""
54 dedx_bins = np.linspace(
57 self.
dedx_column].max(), GroupedDEDXEstimationTrainer.number_of_bins_in_dedx)
58 dedx_cuts = pd.cut(data[self.
dedx_column], dedx_bins)
59 return data.groupby(dedx_cuts), dedx_bins
62 """Construct the momentum bins and then populate them with the data"""
63 p_bins = np.linspace(data.p.min(), data.p.max(), GroupedDEDXEstimationTrainer.number_of_bins_in_p)
64 p_cuts = pd.cut(data.p, p_bins)
65 return data.groupby(p_cuts), p_bins
68 """Sort the data then select only the highest N values"""
69 if number_of_values
is None:
72 return data.sort(
"number_of_p_values", ascending=
False).head(number_of_values).sort()
75 """Fit track-momentum values"""
78 number_of_p_values = pd.Series(p_binned_data.count().p.values, name=
"number_of_p_values")
79 p_bin_centers = pd.Series(0.5 * (p_bins[:-1] + p_bins[1:]), name=
"p_bin_centers")
81 all_fit_data = pd.DataFrame([number_of_p_values, p_bin_centers]).T
87 """Fit the track-momentum values in the selected dE/dx bin, then train on the fitted values"""
89 return self.train_function(fit_data)
93 """Train a neural network for dE/dx-based particle identification"""
95 def __init__(self, result_function, use_sigma_for_result_fitting):
105 GroupedDEDXEstimationTrainer.__init__(self)
108 """Fit for the mean dE/dx and standard deviation, return the fit Dataframe"""
109 result_df = pd.DataFrame([{
"dedx_bin_center": dedx_bin_center,
110 "mu": fit_parameters[1][1],
111 "sigma": fit_parameters[0]}
for dedx_bin_center,
113 if fit_parameters
is not None])
115 if len(result_df) == 0:
116 raise ValueError(
"Could not find any fitted parameters!")
119 result_df[
"mu_plus_sigma"] = result_df.mu + result_df.sigma
120 result_df[
"mu_minus_sigma"] = result_df.mu - result_df.sigma
122 result_df.sort(
"dedx_bin_center", inplace=
True)
127 """Define the parameters for the fit, assign initial guesses"""
130 p0 = (7e+08, -4e+04, 0.1, 0)
133 popt, pcov = curve_fit(self.
result_function, result_df.dedx_bin_center, result_df.mu, p0=p0,
134 sigma=result_df.sigma, absolute_sigma=
True)
136 popt, pcov = curve_fit(self.
result_function, result_df.dedx_bin_center, result_df.mu, p0=p0)
141 """Train the neural network using curated data"""
144 def fit_and_save_results(dedx_bin):
146 return {dedx_bin.mean()[self.
dedx_column]: fit_result}
148 for result
in dedx_binned_data.apply(fit_and_save_results):
155 """Plot the fitted results"""
162 plt.errorbar(result_df.dedx_bin_center, result_df.mu, marker=
"o", ls=
"", label=
"Data Points", yerr=result_df.sigma)
165 plt.xlabel(
"dEdX in ADC count/cm")
166 plt.ylabel(
"p in GeV/c")
167 plt.legend(frameon=
True)
170 """Plot the fitted grouped results"""
174 already_plotted_list = []
176 def plot_fitted_results(dedx_bin):
177 dedx_bin_center = dedx_bin.mean().values[0]
179 if dedx_bin_center
not in already_plotted_list:
180 already_plotted_list.append(dedx_bin_center)
183 plt.plot(fit_data.p_bin_centers, fit_data.number_of_p_values, ls=
"", marker=
".", color=
"black")
187 plt.xlabel(
"p in GeV/c")
188 plt.ylabel(
"Entries")
190 dedx_binned_data.apply(plot_fitted_results)
194 """Train a neural network for dE/dx-based particle identification"""
196 def __init__(self, fit_function, dimension_of_fit_function, result_function, use_sigma_for_result_fitting):
204 FittedGroupedDEDXEstimatorTrainer.__init__(self, result_function, use_sigma_for_result_fitting)
207 """Train on the fit to curated-data highest values whose truth value is known"""
211 p0 = (1e3, max_value, 4e-2)
213 p0 = (1e3, max_value, 4e-2, 1, 1, 1)
215 popt, pcov = curve_fit(self.
fit_function, fit_data.p_bin_centers, fit_data.number_of_p_values, p0=p0)
217 return [np.sqrt(np.diag(pcov)[1]), popt]
223 """Plot the fitted grouped results"""
224 FittedGroupedDEDXEstimatorTrainer.plot_grouped_result(self, data)
228 p_plot_data = np.linspace(data.p.min(), data.p.max(), 1000)
231 already_plotted_list = []
233 def plot_fitted_results(dedx_bin):
234 dedx_bin_center = dedx_bin.mean().values[0]
236 if dedx_bin_center
not in already_plotted_list:
238 already_plotted_list.append(dedx_bin_center)
239 unneeded, fit_options = fitted_results
241 dedx_plot_data = self.
fit_function(p_plot_data, *fitted_results[1])
242 plt.plot(p_plot_data, dedx_plot_data)
246 dedx_binned_data.apply(plot_fitted_results)
250 """Train a neural network for dE/dx-based particle identification using a Gaussian estimator"""
254 FunctionFittedGroupedDEDXEstimatorTrainer.__init__(
258 fit_functions.inverse_squared,
259 use_sigma_for_result_fitting=True)
263 """Train a neural network for dE/dx-based particle identification using a Landau estimator"""
267 FunctionFittedGroupedDEDXEstimatorTrainer.__init__(
269 fit_functions.landau,
271 fit_functions.inverse_squared,
272 use_sigma_for_result_fitting=True)
276 """Train a neural network for dE/dx-based particle identification using only the highest values"""
280 FittedGroupedDEDXEstimatorTrainer.__init__(self, fit_functions.inverse_squared, use_sigma_for_result_fitting=False)
283 """Train on the curated-data highest values whose truth value is known"""
286 return [
None, [
None, max_value,
None]]
293 """Train a neural network for dE/dx-based particle identification using only the median values"""
297 FittedGroupedDEDXEstimatorTrainer.__init__(self, fit_functions.inverse_squared, use_sigma_for_result_fitting=True)
300 """Train on the curated-data median values whose truth value is known"""
301 weighted_p_values = fit_data.apply(
lambda data: [data.p_bin_centers] * int(data.number_of_p_values), axis=1).sum()
302 median_value = np.median(weighted_p_values)
303 iqr = np.percentile(weighted_p_values, 75) - np.percentile(weighted_p_values, 50)
305 return [iqr, [
None, median_value,
None]]
312 """Train a neural network for dE/dx-based particle identification using a Gaussian estimator"""
316 FunctionFittedGroupedDEDXEstimatorTrainer.__init__(
320 fit_functions.inverse_sqrt,
321 use_sigma_for_result_fitting=True)
325 """Train a neural network for dE/dx-based particle identification using a Landau estimator"""
329 FunctionFittedGroupedDEDXEstimatorTrainer.__init__(
331 fit_functions.landau,
333 fit_functions.inverse_sqrt,
334 use_sigma_for_result_fitting=True)
338 """Train a neural network for dE/dx-based particle identification using only the highest values"""
342 FittedGroupedDEDXEstimatorTrainer.__init__(self, fit_functions.inverse_sqrt, use_sigma_for_result_fitting=False)
345 """Train on the curated-data highest values whose truth value is known"""
348 return [
None, [
None, max_value,
None]]
355 """Train a neural network for dE/dx-based particle identification using only the median values"""
359 FittedGroupedDEDXEstimatorTrainer.__init__(self, fit_functions.inverse_sqrt, use_sigma_for_result_fitting=True)
362 """Train on the curated-data median values whose truth value is known"""
363 weighted_p_values = fit_data.apply(
lambda data: [data.p_bin_centers] * int(data.number_of_p_values), axis=1).sum()
364 median_value = np.median(weighted_p_values)
365 iqr = np.percentile(weighted_p_values, 75) - np.percentile(weighted_p_values, 50)
367 return [iqr, [
None, median_value,
None]]
374 """Train a neural network for dE/dx-based particle identification using multivariate data analysis"""
380 self.tree = tree.DecisionTreeRegressor()
381 DEDXEstimationTrainer.__init__(self)
384 """Train the neural network using curated data"""
386 train_data = data.copy()
391 self.
tree.fit(train_data.values, p_values.values)
394 """Get the trained neural-network output value for test data"""
396 test_data = data.copy()
399 return self.
tree.predict(test_data.values)
dedx_estimator_function
by default, the dE/dx-particle-identification trainer has not run yet
dedx_column
the default data column is 'dedx'
result_parameters_for_each_dedx_bin
cached copy of the dictionary of fitting parameters for each dE/dx bin
result_function
cached copy of the result function
def plot_fit_result(self, data)
use_sigma_for_result_fitting
cached copy of the flag to add mean+/-sigma values to the output Dataframe
def plot_grouped_result(self, data)
def fit_result_parameters(self)
dedx_estimator_function
cached copies of the fit parameters and estimator function
def create_result_dataframe(self)
def __init__(self, result_function, use_sigma_for_result_fitting)
def plot_grouped_result(self, data)
fit_function
cached copy of the fitting function
def __init__(self, fit_function, dimension_of_fit_function, result_function, use_sigma_for_result_fitting)
dimension_of_fit_function
cached value of the degrees of freedom in the fit
train_function
this class's training function
def create_p_bins(self, data)
def use_only_the_highest_values(self, data, number_of_values=None)
def fit_p_to_dedx_bin(self, dedx_bin)
def create_fit_data(self, dedx_bin)
def create_dedx_bins(self, data)
tree
cached copy of the MVA tool
train_function
this class's training function
train_function
this class's training function