Belle II Software  light-2205-abys
simple_deep.py
1 #!/usr/bin/env python3
2 
3 
10 
11 import numpy as np
12 import tensorflow as tf
13 import basf2_mva
14 import basf2_mva_util
15 import time
16 
18 
19 
20 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
21 
22  number_of_features *= 5
23 
24  gpus = tf.config.list_physical_devices('GPU')
25  if gpus:
26  for gpu in gpus:
27  tf.config.experimental.set_memory_growth(gpu, True)
28 
29  class my_model(tf.Module):
30 
31  def __init__(self, **kwargs):
32  super().__init__(**kwargs)
33 
34  self.optimizer = tf.optimizers.Adam(0.01)
35 
36  def create_layer_variables(shape, name, activation_function):
37  weights = tf.Variable(
38  tf.random.truncated_normal(shape, stddev=1.0 / np.sqrt(float(shape[0]))),
39  name=f'{name}_weights')
40  biases = tf.Variable(tf.zeros(shape=[shape[1]]), name=f'{name}_biases')
41  return weights, biases, activation_function
42 
43  self.n_layers = 10
44  self.layer_variables = []
45 
46  shape = [number_of_features, number_of_features]
47  for i in range(self.n_layers - 1):
48  self.layer_variables.append(create_layer_variables(shape, f'inference_hidden{i}', tf.nn.relu))
49  self.layer_variables.append(create_layer_variables([number_of_features, 1], 'inference_sigmoid', tf.nn.sigmoid))
50 
51  @tf.function(input_signature=[tf.TensorSpec(shape=[None, number_of_features], dtype=tf.float32)])
52  def __call__(self, x):
53 
54  def dense(x, W, b, activation_function):
55  return activation_function(tf.matmul(x, W) + b)
56 
57  for i in range(self.n_layers):
58  x = dense(x, *self.layer_variables[i])
59  return x
60 
61  @tf.function
62  def loss(self, predicted_y, target_y, w):
63  lam = 1e-15
64  epsilon = 1e-5
65  l2_loss = lam * tf.math.add_n([tf.nn.l2_loss(n) for n in self.trainable_variables
66  if '_weights' in n.name and 'sigmoid' not in n.name])
67 
68  diff_from_truth = tf.where(target_y == 1., predicted_y, 1. - predicted_y)
69  cross_entropy = - tf.reduce_sum(w * tf.math.log(diff_from_truth + epsilon)) / tf.reduce_sum(w)
70  return cross_entropy + l2_loss
71 
72  state = State(model=my_model())
73  return state
74 
75 
76 def partial_fit(state, X, S, y, w, epoch):
77  """
78  Pass batches of received data to tensorflow
79  """
80  X = np.repeat(X, 5, axis=1)
81 
82  with tf.GradientTape() as tape:
83  avg_cost = state.model.loss(state.model(X), y, w)
84  grads = tape.gradient(avg_cost, state.model.trainable_variables)
85 
86  state.model.optimizer.apply_gradients(zip(grads, state.model.trainable_variables))
87 
88  # epoch = i_epoch * nBatches + iBatch
89  if epoch % 1000 == 0:
90  print(f"Epoch: {epoch:04d} cost= {avg_cost:.9f}")
91  if epoch == 100000:
92  return False
93  return True
94 
95 
96 def apply(state, X):
97  """
98  Apply estimator to passed data.
99  """
100  X = np.repeat(X, 5, axis=1)
101  r = state.model(X).numpy().flatten()
102  return np.require(r, dtype=np.float32, requirements=['A', 'W', 'C', 'O'])
103 
104 
105 if __name__ == "__main__":
106  from basf2 import conditions
107  # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
108  conditions.testing_payloads = [
109  'localdb/database.txt'
110  ]
111 
112  general_options = basf2_mva.GeneralOptions()
113  general_options.m_datafiles = basf2_mva.vector("train.root")
114  general_options.m_identifier = "Tensorflow"
115  general_options.m_treename = "tree"
116  variables = ['M', 'p', 'pt', 'pz',
117  'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)',
118  'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)',
119  'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)',
120  'chiProb', 'dr', 'dz',
121  'daughter(0, dr)', 'daughter(1, dr)',
122  'daughter(0, dz)', 'daughter(1, dz)',
123  'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)',
124  'daughter(0, kaonID)', 'daughter(0, pionID)',
125  'daughterInvM(0, 1)', 'daughterInvM(0, 2)', 'daughterInvM(1, 2)']
126  general_options.m_variables = basf2_mva.vector(*variables)
127  general_options.m_target_variable = "isSignal"
128 
129  specific_options = basf2_mva.PythonOptions()
130  specific_options.m_framework = "tensorflow"
131  specific_options.m_steering_file = 'mva/examples/tensorflow/simple_deep.py'
132  specific_options.m_normalize = True
133  specific_options.m_nIterations = 100
134  specific_options.m_mini_batch_size = 500
135 
136  training_start = time.time()
137  basf2_mva.teacher(general_options, specific_options)
138  training_stop = time.time()
139  training_time = training_stop - training_start
140  method = basf2_mva_util.Method(general_options.m_identifier)
141  inference_start = time.time()
142  test_data = ["test.root"] * 10
143  p, t = method.apply_expert(basf2_mva.vector(*test_data), general_options.m_treename)
144  inference_stop = time.time()
145  inference_time = inference_stop - inference_start
147  print("Tensorflow", training_time, inference_time, auc)
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)