12 import tensorflow 
as tf
 
   20     Calculates prior from signal and background pdfs of the fit variable 
   25         Constructor of a new prior distribution 
   27             @param y target variable 
   30         self.signal_cdf, self.signal_pdf, self.
signal_binssignal_bins = calculate_cdf_and_pdf(z[y == 1])
 
   32         self.bckgrd_cdf, self.bckgrd_pdf, self.
bckgrd_binsbckgrd_bins = calculate_cdf_and_pdf(z[y == 0])
 
   34         self.bckgrd_pdf[0] = self.bckgrd_pdf[-1] = 1
 
   38         Calculate signal pdf for given fit variable value 
   39             @param X nd-array containing fit variable values 
   41         return self.signal_pdf[np.digitize(X, bins=self.
signal_binssignal_bins)]
 
   45         Calculate background pdf for given fit variable value 
   46             @param X nd-array containing fit variable values 
   48         return self.bckgrd_pdf[np.digitize(X, bins=self.
bckgrd_binsbckgrd_bins)]
 
   52         Calculate signal cdf for given fit variable value 
   53             @param X nd-array containing fit variable values 
   55         return self.signal_cdf[np.digitize(X, bins=self.
signal_binssignal_bins)]
 
   59         Calculate background cdf for given fit variable value 
   60             @param X nd-array containing fit variable values 
   62         return self.bckgrd_cdf[np.digitize(X, bins=self.
bckgrd_binsbckgrd_bins)]
 
   66         Calculate prior signal probability for given fit variable value 
   67             @param X nd-array containing fit variable values 
   70         prior = np.where(np.isfinite(prior), prior, 0.5)
 
   75         Calculate boost weights used in dplot boost training step 
   76             @param X nd-array containing fit variable values 
   79         signal_weight = np.where(np.isfinite(signal_weight), signal_weight, 0)
 
   82         bckgrd_weight = np.where(np.isfinite(bckgrd_weight), bckgrd_weight, 0)
 
   83         return np.r_[signal_weight, bckgrd_weight]
 
   87         Calculate uncorrelation weights used in dplot classifier training step 
   88             @param X nd-array containing fit variable values 
   89             @param boost_prediction output of the boost classifier 
   91         reg_boost_prediction = boost_prediction * 0.99 + 0.005
 
   92         weights = (self.
get_signal_cdfget_signal_cdf(X) / reg_boost_prediction +
 
   93                    (1.0 - self.
get_signal_cdfget_signal_cdf(X)) / (1.0 - reg_boost_prediction)) / 2
 
   97 def calculate_cdf_and_pdf(X):
 
   99     Calculates cdf and pdf of given sample and adds under/overflow bins 
  100         @param X 1-d np.array 
  102     pdf, bins = np.histogram(X, bins=200, density=
True)
 
  103     cdf = np.cumsum(pdf * (bins - np.roll(bins, 1))[1:])
 
  104     return np.hstack([0.0, cdf, 1.0]), np.hstack([0.0, pdf, 0.0]), bins
 
  107 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
 
  110     def dense(x, W, b, activation_function):
 
  111         return activation_function(tf.matmul(x, W) + b)
 
  113     class my_model(tf.Module):
 
  118             self.boost_optimizer = tf.optimizers.Adam(0.01)
 
  119             self.inference_optimizer = tf.optimizers.Adam(0.01)
 
  121             def create_layer_variables(shape, name, activation_function):
 
  122                 weights = tf.Variable(
 
  123                     tf.random.truncated_normal(shape, stddev=1.0 / np.sqrt(float(shape[0]))),
 
  124                     name=f
'{name}_weights')
 
  125                 biases = tf.Variable(tf.zeros(shape=[shape[1]]), name=f
'{name}_biases')
 
  126                 return weights, biases, activation_function
 
  128             self.boost_layer_vars = []
 
  129             self.boost_layer_vars.append(create_layer_variables([number_of_features, 20], 
'boost_input', tf.nn.sigmoid))
 
  131                 self.boost_layer_vars.append(create_layer_variables([20, 20], f
'boost_hidden{i}', tf.nn.sigmoid))
 
  132             self.boost_layer_vars.append(create_layer_variables([20, 1], 
'boost_output', tf.nn.sigmoid))
 
  134             self.inference_layer_vars = []
 
  135             self.inference_layer_vars.append(create_layer_variables([number_of_features, 20], 
'inference_input', tf.nn.sigmoid))
 
  137                 self.inference_layer_vars.append(create_layer_variables([20, 20], f
'inference_hidden{i}', tf.nn.sigmoid))
 
  138             self.inference_layer_vars.append(create_layer_variables([20, 1], 
'inference_output', tf.nn.sigmoid))
 
  140             self.n_boost_layers = len(self.boost_layer_vars)
 
  141             self.n_inference_layers = len(self.inference_layer_vars)
 
  143         @tf.function(input_signature=[tf.TensorSpec(shape=[None, number_of_features], dtype=tf.float32)])
 
  144         def __call__(self, x):
 
  145             for i 
in range(self.n_inference_layers):
 
  146                 x = dense(x, *self.inference_layer_vars[i])
 
  149         @tf.function(input_signature=[tf.TensorSpec(shape=[None, number_of_features], dtype=tf.float32)])
 
  151             for i 
in range(self.n_boost_layers):
 
  152                 x = dense(x, *self.boost_layer_vars[i])
 
  156         def loss(self, predicted_y, target_y, w):
 
  158             diff_from_truth = tf.where(target_y == 1., predicted_y, 1. - predicted_y)
 
  159             cross_entropy = - tf.reduce_sum(w * tf.math.log(diff_from_truth + epsilon)) / tf.reduce_sum(w)
 
  162     state = 
State(model=my_model())
 
  166 def partial_fit(state, X, S, y, w, epoch):
 
  168     Pass received data to tensorflow session 
  170     prior = 
Prior(S[:, 0], y[:, 0])
 
  174     indices = np.arange(len(X))
 
  176         np.random.shuffle(indices)
 
  177         for pos 
in range(0, len(indices), batch_size):
 
  178             if pos + batch_size >= len(indices):
 
  180             index = indices[pos: pos + batch_size]
 
  181             z_batch = S[index, 0]
 
  185                 x_batch = np.r_[x_batch, x_batch]
 
  186                 w_batch = prior.get_boost_weights(z_batch) * np.r_[w[index, 0], w[index, 0]]
 
  187                 y_batch = np.r_[np.ones(batch_size), np.zeros(batch_size)]
 
  188                 y_batch = np.reshape(y_batch, (-1, 1))
 
  189                 optimizer = state.model.boost_optimizer
 
  192                 p_batch = state.model.boost(x_batch).numpy()
 
  193                 w_batch = prior.get_uncorrelation_weights(z_batch, p_batch.flatten()) * w[index, 0]
 
  195                 optimizer = state.model.inference_optimizer
 
  198             w_batch = np.reshape(w_batch, (-1, 1)).astype(np.float32)
 
  200             with tf.GradientTape() 
as tape:
 
  202                     y_predict_batch = state.model.boost(x_batch)
 
  204                     y_predict_batch = state.model(x_batch)
 
  206                 avg_cost = state.model.loss(y_predict_batch, y_batch, w_batch)
 
  207                 trainable_variables = [v 
for v 
in state.model.trainable_variables 
if name 
in v.name]
 
  208                 grads = tape.gradient(avg_cost, trainable_variables)
 
  210             optimizer.apply_gradients(zip(grads, trainable_variables))
 
  212         print(
"Epoch:", 
'%04d' % (i), 
"cost=", 
"{:.9f}".format(avg_cost))
 
  216 if __name__ == 
"__main__":
 
  217     general_options = basf2_mva.GeneralOptions()
 
  218     general_options.m_datafiles = basf2_mva.vector(
"train.root")
 
  219     general_options.m_identifier = 
"TensorflowDPlot" 
  220     general_options.m_treename = 
"tree" 
  221     variables = [
'p', 
'pt', 
'pz',
 
  222                  'daughter(0, p)', 
'daughter(0, pz)', 
'daughter(0, pt)',
 
  223                  'daughter(1, p)', 
'daughter(1, pz)', 
'daughter(1, pt)',
 
  224                  'daughter(2, p)', 
'daughter(2, pz)', 
'daughter(2, pt)',
 
  225                  'chiProb', 
'dr', 
'dz',
 
  226                  'daughter(0, dr)', 
'daughter(1, dr)',
 
  227                  'daughter(0, dz)', 
'daughter(1, dz)',
 
  228                  'daughter(0, chiProb)', 
'daughter(1, chiProb)', 
'daughter(2, chiProb)',
 
  229                  'daughter(0, kaonID)', 
'daughter(0, pionID)',
 
  230                  'daughterInvM(0, 1)', 
'daughterInvM(0, 2)', 
'daughterInvM(1, 2)']
 
  231     general_options.m_variables = basf2_mva.vector(*variables)
 
  232     general_options.m_spectators = basf2_mva.vector(
'M')
 
  233     general_options.m_target_variable = 
"isSignal" 
  235     specific_options = basf2_mva.PythonOptions()
 
  236     specific_options.m_framework = 
"tensorflow" 
  237     specific_options.m_steering_file = 
'mva/examples/tensorflow/dplot.py' 
  238     specific_options.m_nIterations = 2  
 
  239     specific_options.m_mini_batch_size = 0  
 
  240     basf2_mva.teacher(general_options, specific_options)
 
def get_bckgrd_cdf(self, X)
bckgrd_bins
background cdf, pdf and binning
def get_boost_weights(self, X)
def get_bckgrd_pdf(self, X)
signal_bins
signal cdf, pdf and binning
def get_signal_cdf(self, X)
def get_signal_pdf(self, X)
def get_uncorrelation_weights(self, X, boost_prediction)