12 import tensorflow
as tf
20 Calculates prior from signal and background pdfs of the fit variable
25 Constructor of a new prior distribution
27 @param y target variable
30 self.signal_cdf, self.signal_pdf, self.
signal_binssignal_bins = calculate_cdf_and_pdf(z[y == 1])
32 self.bckgrd_cdf, self.bckgrd_pdf, self.
bckgrd_binsbckgrd_bins = calculate_cdf_and_pdf(z[y == 0])
34 self.bckgrd_pdf[0] = self.bckgrd_pdf[-1] = 1
38 Calculate signal pdf for given fit variable value
39 @param X nd-array containing fit variable values
41 return self.signal_pdf[np.digitize(X, bins=self.
signal_binssignal_bins)]
45 Calculate background pdf for given fit variable value
46 @param X nd-array containing fit variable values
48 return self.bckgrd_pdf[np.digitize(X, bins=self.
bckgrd_binsbckgrd_bins)]
52 Calculate signal cdf for given fit variable value
53 @param X nd-array containing fit variable values
55 return self.signal_cdf[np.digitize(X, bins=self.
signal_binssignal_bins)]
59 Calculate background cdf for given fit variable value
60 @param X nd-array containing fit variable values
62 return self.bckgrd_cdf[np.digitize(X, bins=self.
bckgrd_binsbckgrd_bins)]
66 Calculate prior signal probability for given fit variable value
67 @param X nd-array containing fit variable values
70 prior = np.where(np.isfinite(prior), prior, 0.5)
75 Calculate boost weights used in dplot boost training step
76 @param X nd-array containing fit variable values
79 signal_weight = np.where(np.isfinite(signal_weight), signal_weight, 0)
82 bckgrd_weight = np.where(np.isfinite(bckgrd_weight), bckgrd_weight, 0)
83 return np.r_[signal_weight, bckgrd_weight]
87 Calculate uncorrelation weights used in dplot classifier training step
88 @param X nd-array containing fit variable values
89 @param boost_prediction output of the boost classifier
91 reg_boost_prediction = boost_prediction * 0.99 + 0.005
92 weights = (self.
get_signal_cdfget_signal_cdf(X) / reg_boost_prediction +
93 (1.0 - self.
get_signal_cdfget_signal_cdf(X)) / (1.0 - reg_boost_prediction)) / 2
97 def calculate_cdf_and_pdf(X):
99 Calculates cdf and pdf of given sample and adds under/overflow bins
100 @param X 1-d np.array
102 pdf, bins = np.histogram(X, bins=200, density=
True)
103 cdf = np.cumsum(pdf * (bins - np.roll(bins, 1))[1:])
104 return np.hstack([0.0, cdf, 1.0]), np.hstack([0.0, pdf, 0.0]), bins
107 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
110 def dense(x, W, b, activation_function):
111 return activation_function(tf.matmul(x, W) + b)
113 class my_model(tf.Module):
118 self.boost_optimizer = tf.optimizers.Adam(0.01)
119 self.inference_optimizer = tf.optimizers.Adam(0.01)
121 def create_layer_variables(shape, name, activation_function):
122 weights = tf.Variable(
123 tf.random.truncated_normal(shape, stddev=1.0 / np.sqrt(float(shape[0]))),
124 name=f
'{name}_weights')
125 biases = tf.Variable(tf.zeros(shape=[shape[1]]), name=f
'{name}_biases')
126 return weights, biases, activation_function
128 self.boost_layer_vars = []
129 self.boost_layer_vars.append(create_layer_variables([number_of_features, 20],
'boost_input', tf.nn.sigmoid))
131 self.boost_layer_vars.append(create_layer_variables([20, 20], f
'boost_hidden{i}', tf.nn.sigmoid))
132 self.boost_layer_vars.append(create_layer_variables([20, 1],
'boost_output', tf.nn.sigmoid))
134 self.inference_layer_vars = []
135 self.inference_layer_vars.append(create_layer_variables([number_of_features, 20],
'inference_input', tf.nn.sigmoid))
137 self.inference_layer_vars.append(create_layer_variables([20, 20], f
'inference_hidden{i}', tf.nn.sigmoid))
138 self.inference_layer_vars.append(create_layer_variables([20, 1],
'inference_output', tf.nn.sigmoid))
140 self.n_boost_layers = len(self.boost_layer_vars)
141 self.n_inference_layers = len(self.inference_layer_vars)
143 @tf.function(input_signature=[tf.TensorSpec(shape=[None, number_of_features], dtype=tf.float32)])
144 def __call__(self, x):
145 for i
in range(self.n_inference_layers):
146 x = dense(x, *self.inference_layer_vars[i])
149 @tf.function(input_signature=[tf.TensorSpec(shape=[None, number_of_features], dtype=tf.float32)])
151 for i
in range(self.n_boost_layers):
152 x = dense(x, *self.boost_layer_vars[i])
156 def loss(self, predicted_y, target_y, w):
158 diff_from_truth = tf.where(target_y == 1., predicted_y, 1. - predicted_y)
159 cross_entropy = - tf.reduce_sum(w * tf.math.log(diff_from_truth + epsilon)) / tf.reduce_sum(w)
162 state =
State(model=my_model())
166 def partial_fit(state, X, S, y, w, epoch, batch):
168 Pass received data to tensorflow session
170 prior =
Prior(S[:, 0], y[:, 0])
175 assert epoch < 2,
"There should only be two iterations, one for the boost training,"\
176 " one for the dplot training. Check the value of m_nIterations."
177 assert batch == 0,
"All data should be passed to partial_fit on each call."\
178 " The mini batches are handled internally. Check that m_mini_batch_size=0."
180 indices = np.arange(len(X))
182 np.random.shuffle(indices)
183 for pos
in range(0, len(indices), batch_size):
184 if pos + batch_size >= len(indices):
186 index = indices[pos: pos + batch_size]
187 z_batch = S[index, 0]
191 x_batch = np.r_[x_batch, x_batch]
192 w_batch = prior.get_boost_weights(z_batch) * np.r_[w[index, 0], w[index, 0]]
193 y_batch = np.r_[np.ones(batch_size), np.zeros(batch_size)]
194 y_batch = np.reshape(y_batch, (-1, 1))
195 optimizer = state.model.boost_optimizer
198 p_batch = state.model.boost(x_batch).numpy()
199 w_batch = prior.get_uncorrelation_weights(z_batch, p_batch.flatten()) * w[index, 0]
201 optimizer = state.model.inference_optimizer
204 w_batch = np.reshape(w_batch, (-1, 1)).astype(np.float32)
206 with tf.GradientTape()
as tape:
208 y_predict_batch = state.model.boost(x_batch)
210 y_predict_batch = state.model(x_batch)
212 avg_cost = state.model.loss(y_predict_batch, y_batch, w_batch)
213 trainable_variables = [v
for v
in state.model.trainable_variables
if name
in v.name]
214 grads = tape.gradient(avg_cost, trainable_variables)
216 optimizer.apply_gradients(zip(grads, trainable_variables))
218 print(
"Internal Epoch:",
'%04d' % (i),
"cost=",
"{:.9f}".format(avg_cost))
222 if __name__ ==
"__main__":
223 general_options = basf2_mva.GeneralOptions()
224 general_options.m_datafiles = basf2_mva.vector(
"train.root")
225 general_options.m_identifier =
"TensorflowDPlot"
226 general_options.m_treename =
"tree"
227 variables = [
'p',
'pt',
'pz',
228 'daughter(0, p)',
'daughter(0, pz)',
'daughter(0, pt)',
229 'daughter(1, p)',
'daughter(1, pz)',
'daughter(1, pt)',
230 'daughter(2, p)',
'daughter(2, pz)',
'daughter(2, pt)',
231 'chiProb',
'dr',
'dz',
232 'daughter(0, dr)',
'daughter(1, dr)',
233 'daughter(0, dz)',
'daughter(1, dz)',
234 'daughter(0, chiProb)',
'daughter(1, chiProb)',
'daughter(2, chiProb)',
235 'daughter(0, kaonID)',
'daughter(0, pionID)',
236 'daughterInvM(0, 1)',
'daughterInvM(0, 2)',
'daughterInvM(1, 2)']
237 general_options.m_variables = basf2_mva.vector(*variables)
238 general_options.m_spectators = basf2_mva.vector(
'M')
239 general_options.m_target_variable =
"isSignal"
241 specific_options = basf2_mva.PythonOptions()
242 specific_options.m_framework =
"tensorflow"
243 specific_options.m_steering_file =
'mva/examples/tensorflow/dplot.py'
244 specific_options.m_nIterations = 2
245 specific_options.m_mini_batch_size = 0
246 basf2_mva.teacher(general_options, specific_options)
def get_bckgrd_cdf(self, X)
bckgrd_bins
background cdf, pdf and binning
def get_boost_weights(self, X)
def get_bckgrd_pdf(self, X)
signal_bins
signal cdf, pdf and binning
def get_signal_cdf(self, X)
def get_signal_pdf(self, X)
def get_uncorrelation_weights(self, X, boost_prediction)