Belle II Software development
MedianEstimatorTrainerSQRT Class Reference
Inheritance diagram for MedianEstimatorTrainerSQRT:
FittedGroupedDEDXEstimatorTrainer GroupedDEDXEstimationTrainer DEDXEstimationTrainer

Public Member Functions

 __init__ (self)
 
 create_result_dataframe (self)
 
 fit_result_parameters (self)
 
 train (self, data)
 
 plot_fit_result (self, data)
 
 plot_grouped_result (self, data)
 
 create_dedx_bins (self, data)
 
 create_p_bins (self, data)
 
 use_only_the_highest_values (self, data, number_of_values=None)
 
 create_fit_data (self, dedx_bin)
 
 fit_p_to_dedx_bin (self, dedx_bin)
 
 test (self, data)
 

Public Attributes

 train_function = train_function
 this class's training function
 
 result_function = result_function
 cached copy of the result function
 
dict result_parameters_for_each_dedx_bin = {}
 cached copy of the dictionary of fitting parameters for each dE/dx bin
 
 use_sigma_for_result_fitting = use_sigma_for_result_fitting
 cached copy of the flag to add mean+/-sigma values to the output Dataframe
 
 dedx_estimator_parameters
 cached copies of the fit parameters and estimator function
 
 dedx_estimator_function = None
 by default, the dE/dx-particle-identification trainer has not run yet
 
str dedx_column = "dedx"
 the default data column is 'dedx'
 

Static Public Attributes

int number_of_bins_in_dedx = 20
 number of dE/dx bins
 
int number_of_bins_in_p = 29
 number of track-momentum bins
 
int number_of_head_values_used_to_fit = 20
 number of head values in fit
 

Detailed Description

Train a neural network for dE/dx-based particle identification using only the median values

Definition at line 354 of file train.py.

Constructor & Destructor Documentation

◆ __init__()

__init__ ( self)
Constructor

Definition at line 357 of file train.py.

357 def __init__(self):
358 """Constructor"""
359 FittedGroupedDEDXEstimatorTrainer.__init__(self, fit_functions.inverse_sqrt, use_sigma_for_result_fitting=True)
360
361 def train_function(fit_data):
362 """Train on the curated-data median values whose truth value is known"""
363 weighted_p_values = fit_data.apply(lambda data: [data.p_bin_centers] * int(data.number_of_p_values), axis=1).sum()
364 median_value = np.median(weighted_p_values)
365 iqr = np.percentile(weighted_p_values, 75) - np.percentile(weighted_p_values, 50)
366
367 return [iqr, [None, median_value, None]]
368
369 ## this class's training function
370 self.train_function = train_function
371
372

Member Function Documentation

◆ create_dedx_bins()

create_dedx_bins ( self,
data )
inherited
Construct the dE/dx bins and then populate them with the data

Definition at line 52 of file train.py.

52 def create_dedx_bins(self, data):
53 """Construct the dE/dx bins and then populate them with the data"""
54 dedx_bins = np.linspace(
55 data[
56 self.dedx_column].min(), data[
57 self.dedx_column].max(), GroupedDEDXEstimationTrainer.number_of_bins_in_dedx)
58 dedx_cuts = pd.cut(data[self.dedx_column], dedx_bins)
59 return data.groupby(dedx_cuts), dedx_bins
60

◆ create_fit_data()

create_fit_data ( self,
dedx_bin )
inherited
Fit track-momentum values

Definition at line 74 of file train.py.

74 def create_fit_data(self, dedx_bin):
75 """Fit track-momentum values"""
76 p_binned_data, p_bins = self.create_p_bins(dedx_bin)
77
78 number_of_p_values = pd.Series(p_binned_data.count().p.values, name="number_of_p_values")
79 p_bin_centers = pd.Series(0.5 * (p_bins[:-1] + p_bins[1:]), name="p_bin_centers")
80
81 all_fit_data = pd.DataFrame([number_of_p_values, p_bin_centers]).T
82 fit_data = self.use_only_the_highest_values(all_fit_data, GroupedDEDXEstimationTrainer.number_of_head_values_used_to_fit)
83
84 return fit_data
85

◆ create_p_bins()

create_p_bins ( self,
data )
inherited
Construct the momentum bins and then populate them with the data

Definition at line 61 of file train.py.

61 def create_p_bins(self, data):
62 """Construct the momentum bins and then populate them with the data"""
63 p_bins = np.linspace(data.p.min(), data.p.max(), GroupedDEDXEstimationTrainer.number_of_bins_in_p)
64 p_cuts = pd.cut(data.p, p_bins)
65 return data.groupby(p_cuts), p_bins
66

◆ create_result_dataframe()

create_result_dataframe ( self)
inherited
Fit for the mean dE/dx and standard deviation, return the fit Dataframe

Definition at line 107 of file train.py.

107 def create_result_dataframe(self):
108 """Fit for the mean dE/dx and standard deviation, return the fit Dataframe"""
109 result_df = pd.DataFrame([{"dedx_bin_center": dedx_bin_center,
110 "mu": fit_parameters[1][1],
111 "sigma": fit_parameters[0]} for dedx_bin_center,
112 fit_parameters in self.result_parameters_for_each_dedx_bin.items()
113 if fit_parameters is not None])
114
115 if len(result_df) == 0:
116 raise ValueError("Could not find any fitted parameters!")
117
118 if self.use_sigma_for_result_fitting:
119 result_df["mu_plus_sigma"] = result_df.mu + result_df.sigma
120 result_df["mu_minus_sigma"] = result_df.mu - result_df.sigma
121
122 result_df.sort("dedx_bin_center", inplace=True)
123
124 return result_df
125

◆ fit_p_to_dedx_bin()

fit_p_to_dedx_bin ( self,
dedx_bin )
inherited
Fit the track-momentum values in the selected dE/dx bin, then train on the fitted values

Definition at line 86 of file train.py.

86 def fit_p_to_dedx_bin(self, dedx_bin):
87 """Fit the track-momentum values in the selected dE/dx bin, then train on the fitted values"""
88 fit_data = self.create_fit_data(dedx_bin)
89 return self.train_function(fit_data)
90
91

◆ fit_result_parameters()

fit_result_parameters ( self)
inherited
Define the parameters for the fit, assign initial guesses

Definition at line 126 of file train.py.

126 def fit_result_parameters(self):
127 """Define the parameters for the fit, assign initial guesses"""
128 result_df = self.create_result_dataframe()
129
130 p0 = (7e+08, -4e+04, 0.1, 0)
131
132 if self.use_sigma_for_result_fitting:
133 popt, pcov = curve_fit(self.result_function, result_df.dedx_bin_center, result_df.mu, p0=p0,
134 sigma=result_df.sigma, absolute_sigma=True)
135 else:
136 popt, pcov = curve_fit(self.result_function, result_df.dedx_bin_center, result_df.mu, p0=p0)
137
138 return popt, lambda dedx: self.result_function(dedx, *popt)
139

◆ plot_fit_result()

plot_fit_result ( self,
data )
inherited
Plot the fitted results

Definition at line 154 of file train.py.

154 def plot_fit_result(self, data):
155 """Plot the fitted results"""
156 plot_dedx_data = np.linspace(data[self.dedx_column].min(), data[self.dedx_column].max(), 100)
157 result_df = self.create_result_dataframe()
158
159 plt.plot(plot_dedx_data, self.dedx_estimator_function(plot_dedx_data), color="black", label="Fitted estimator")
160 if self.use_sigma_for_result_fitting:
161 # color = "black"
162 plt.errorbar(result_df.dedx_bin_center, result_df.mu, marker="o", ls="", label="Data Points", yerr=result_df.sigma)
163
164 plt.ylim(0, 0.14)
165 plt.xlabel("dEdX in ADC count/cm")
166 plt.ylabel("p in GeV/c")
167 plt.legend(frameon=True)
168

◆ plot_grouped_result()

plot_grouped_result ( self,
data )
inherited
Plot the fitted grouped results

Reimplemented in FunctionFittedGroupedDEDXEstimatorTrainer.

Definition at line 169 of file train.py.

169 def plot_grouped_result(self, data):
170 """Plot the fitted grouped results"""
171 dedx_binned_data, dedx_bins = self.create_dedx_bins(data)
172
173 # List to prevent bug in pd.DataFrame.apply
174 already_plotted_list = []
175
176 def plot_fitted_results(dedx_bin):
177 dedx_bin_center = dedx_bin.mean().values[0]
178
179 if dedx_bin_center not in already_plotted_list:
180 already_plotted_list.append(dedx_bin_center)
181
182 fit_data = self.create_fit_data(dedx_bin)
183 plt.plot(fit_data.p_bin_centers, fit_data.number_of_p_values, ls="", marker=".", color="black")
184
185 return True
186
187 plt.xlabel("p in GeV/c")
188 plt.ylabel("Entries")
189
190 dedx_binned_data.apply(plot_fitted_results)
191
192

◆ test()

test ( self,
data )
inherited
Get the trained neural-network output value for test data

Reimplemented in MVADEDXEstimationTrainer.

Definition at line 34 of file train.py.

34 def test(self, data):
35 """Get the trained neural-network output value for test data"""
36 if self.dedx_estimator_function is None:
37 raise ValueError("Train the estimator first!")
38
39 return self.dedx_estimator_function(data[self.dedx_column])
40
41

◆ train()

train ( self,
data )
inherited
Train the neural network using curated data

Reimplemented from DEDXEstimationTrainer.

Definition at line 140 of file train.py.

140 def train(self, data):
141 """Train the neural network using curated data"""
142 dedx_binned_data, dedx_bins = self.create_dedx_bins(data)
143
144 def fit_and_save_results(dedx_bin):
145 fit_result = self.fit_p_to_dedx_bin(dedx_bin)
146 return {dedx_bin.mean()[self.dedx_column]: fit_result}
147
148 for result in dedx_binned_data.apply(fit_and_save_results):
149 self.result_parameters_for_each_dedx_bin.update(result)
150
151
152 self.dedx_estimator_parameters, self.dedx_estimator_function = self.fit_result_parameters()
153
Definition train.py:1

◆ use_only_the_highest_values()

use_only_the_highest_values ( self,
data,
number_of_values = None )
inherited
Sort the data then select only the highest N values

Definition at line 67 of file train.py.

67 def use_only_the_highest_values(self, data, number_of_values=None):
68 """Sort the data then select only the highest N values"""
69 if number_of_values is None:
70 return data
71 else:
72 return data.sort("number_of_p_values", ascending=False).head(number_of_values).sort()
73

Member Data Documentation

◆ dedx_column

str dedx_column = "dedx"
inherited

the default data column is 'dedx'

Definition at line 27 of file train.py.

◆ dedx_estimator_function

dedx_estimator_function = None
inherited

by default, the dE/dx-particle-identification trainer has not run yet

Definition at line 25 of file train.py.

◆ dedx_estimator_parameters

dedx_estimator_parameters
inherited

cached copies of the fit parameters and estimator function

Definition at line 152 of file train.py.

◆ number_of_bins_in_dedx

int number_of_bins_in_dedx = 20
staticinherited

number of dE/dx bins

Definition at line 46 of file train.py.

◆ number_of_bins_in_p

int number_of_bins_in_p = 29
staticinherited

number of track-momentum bins

Definition at line 48 of file train.py.

◆ number_of_head_values_used_to_fit

int number_of_head_values_used_to_fit = 20
staticinherited

number of head values in fit

Definition at line 50 of file train.py.

◆ result_function

result_function = result_function
inherited

cached copy of the result function

Definition at line 99 of file train.py.

◆ result_parameters_for_each_dedx_bin

dict result_parameters_for_each_dedx_bin = {}
inherited

cached copy of the dictionary of fitting parameters for each dE/dx bin

Definition at line 101 of file train.py.

◆ train_function

train_function = train_function

this class's training function

Definition at line 370 of file train.py.

◆ use_sigma_for_result_fitting

use_sigma_for_result_fitting = use_sigma_for_result_fitting
inherited

cached copy of the flag to add mean+/-sigma values to the output Dataframe

Definition at line 103 of file train.py.


The documentation for this class was generated from the following file: