Belle II Software  light-2205-abys
tensorflow_adversary.py
1 #!/usr/bin/env python3
2 
3 
10 
11 import numpy as np
12 import tensorflow as tf
13 import basf2_mva
14 import basf2_mva_util
15 import uproot3 as uproot
16 
18 
19 
20 # This example is based on
21 # 'Machine learning algorithms for the Belle II experiment and their validation on Belle data' - Thomas Keck (2017), Section 3.3.2.4
22 # See also Algorithm 1 in https://arxiv.org/pdf/1611.01046.pdf
23 
24 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
25 
26  gpus = tf.config.list_physical_devices('GPU')
27  if gpus:
28  for gpu in gpus:
29  tf.config.experimental.set_memory_growth(gpu, True)
30 
31  # don't run the adversarial net if lambda is negative
32  if parameters.get('lambda', 50) <= 0:
33  number_of_spectators = 0
34 
35  def dense(x, W, b, activation_function):
36  return activation_function(tf.matmul(x, W) + b)
37 
38  class adversarial_model(tf.Module):
39 
40  def __init__(self, **kwargs):
41  super().__init__(**kwargs)
42 
43  self.optimizer = tf.optimizers.Adam(parameters.get('learning_rate', 0.01))
44  self.epsilon = 1e-5
45  # strength of adversarial constraint
46  self.lam = parameters.get('lambda', 50)
47  # number of adversarial training epochs per inference training epoch
48  self.K = parameters.get('adversary_steps', 13)
49  # number of epochs training the inference net before switching on the adversarial networks
50  self.pre_train_epochs = parameters.get('pre_train_epochs', 0)
51  self.advarsarial = number_of_spectators > 0
52 
53  def create_layer_variables(shape, name, activation_function=tf.tanh):
54  weights = tf.Variable(
55  tf.random.truncated_normal(shape, stddev=1.0 / np.sqrt(float(shape[0]))),
56  name=f'{name}_weights')
57  biases = tf.Variable(tf.zeros(shape=[shape[1]]), name=f'{name}_biases')
58  return weights, biases, activation_function
59 
60  self.inference_layers = [
61  create_layer_variables([number_of_features, number_of_features+1],
62  'inference_hidden', tf.sigmoid),
63  create_layer_variables([number_of_features+1, 1],
64  'inference_sigmoid', tf.sigmoid),
65  ]
66 
67  # create the layers for the adversarial net.
68  # This has 2*n_spectators sets of layers, one each for signal and background for each spectator variable
69  # the final 3 layers (means, widths, fractions) form part of the activation function and are not evaluated in sequence
70  self.adversarial_layers = []
71  for i in range(number_of_spectators):
72  self.adversarial_layers.append([])
73  for i_c, c in enumerate(['signal', 'background']):
74  self.adversarial_layers[i].append([
75  create_layer_variables([1, number_of_spectators+1], f'adversary_hidden_{i}_{c}',
76  activation_function=tf.tanh),
77  create_layer_variables([number_of_spectators + 1, 4], f'adversary_means_{i}_{c}',
78  activation_function=tf.identity),
79  create_layer_variables([number_of_spectators + 1, 4], f'adversary_widths_{i}_{c}',
80  activation_function=tf.exp),
81  create_layer_variables([number_of_spectators + 1, 4], f'adversary_fractions_{i}_{c}',
82  activation_function=tf.identity)
83  ])
84  return
85 
86  @tf.function(input_signature=[tf.TensorSpec(shape=[None, number_of_features], dtype=tf.float32)])
87  def __call__(self, x):
88  for i in range(len(self.inference_layers)):
89  x = dense(x, *self.inference_layers[i])
90  return x
91 
92  @tf.function
93  def inference_loss(self, predicted_y, target_y, w):
94  diff_from_truth = tf.where(target_y == 1., predicted_y, 1. - predicted_y)
95  cross_entropy = - tf.reduce_sum(w * tf.math.log(diff_from_truth + self.epsilon)) / tf.reduce_sum(w)
96  return cross_entropy
97 
98  @tf.function
99  def adversarial_loss(self, predicted_y, s, target_y, w):
100 
101  if number_of_spectators == 0:
102  return tf.constant(0.)
103 
104  adversary_losses = []
105  for i_spectator in range(number_of_spectators):
106  s_single = tf.slice(s, [0, i_spectator], [-1, 1]) # get the single spectator variable column
107  for i_c, c in enumerate(['signal', 'background']):
108 
109  # loop through the hidden layers
110  x = tf.identity(predicted_y)
111  for i_layer in range(len(self.adversarial_layers[i_spectator][i_c])-3):
112  x = dense(x, *self.adversarial_layers[i_spectator][i_c][i_layer])
113 
114  # calculate the activation function and loss
115  means = dense(x, *self.adversarial_layers[i_spectator][i_c][-3])
116  widths = dense(x, *self.adversarial_layers[i_spectator][i_c][-2])
117  fractions = dense(x, *self.adversarial_layers[i_spectator][i_c][-1])
118 
119  activation = tf.reduce_sum(tf.nn.softmax(fractions) *
120  tf.exp(-(means-s_single)*(means-s_single) / (2*widths)) /
121  tf.sqrt(2 * np.pi * widths), axis=1)
122 
123  if c == 'signal':
124  loss = -tf.reduce_sum(target_y*w*tf.math.log(activation + self.epsilon)) / tf.reduce_sum(target_y*w)
125  else:
126  loss = -tf.reduce_sum((1-target_y)*w*tf.math.log(activation + self.epsilon)) / tf.reduce_sum((1-target_y)*w)
127  adversary_losses.append(loss)
128 
129  return tf.math.add_n(adversary_losses)
130 
131  @tf.function
132  def loss(self, x, s, y, w):
133  predicted_y = self.__call__(x)
134  inference_loss = self.inference_loss(predicted_y, y, w)
135  adversary_loss = self.adversarial_loss(predicted_y, s, y, w)
136  return inference_loss - self.lam * adversary_loss
137 
138  state = State(model=adversarial_model())
139  return state
140 
141 
142 def partial_fit(state, X, S, y, w, epoch):
143  """
144  Pass batches of received data to tensorflow
145  """
146  with tf.GradientTape() as tape:
147  if epoch < state.model.pre_train_epochs:
148  cost = state.model.inference_loss(state.model(X), y, w)
149  trainable_vars = [v for v in state.model.trainable_variables if 'inference' in v.name]
150 
151  elif epoch % state.model.K == 0 or not state.model.advarsarial:
152  cost = state.model.loss(X, S, y, w)
153  trainable_vars = [v for v in state.model.trainable_variables if 'inference' in v.name]
154 
155  else:
156  cost = state.model.adversarial_loss(state.model(X), S, y, w)
157  trainable_vars = [v for v in state.model.trainable_variables if 'adversary' in v.name]
158 
159  grads = tape.gradient(cost, trainable_vars)
160 
161  state.model.optimizer.apply_gradients(zip(grads, trainable_vars))
162 
163  # epoch = i_epoch * nBatches + iBatch
164  if epoch % 1000 == 0:
165  print(f"Epoch: {epoch:04d} cost= {cost:.9f}")
166  if epoch == 100000:
167  return False
168  return True
169 
170 
171 if __name__ == "__main__":
172  from basf2 import conditions
173  # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
174  conditions.testing_payloads = [
175  'localdb/database.txt'
176  ]
177 
178  variables = ['p', 'pt', 'pz', 'phi',
179  'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)', 'daughter(0, phi)',
180  'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)', 'daughter(1, phi)',
181  'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)', 'daughter(2, phi)',
182  'chiProb', 'dr', 'dz', 'dphi',
183  'daughter(0, dr)', 'daughter(1, dr)', 'daughter(0, dz)', 'daughter(1, dz)',
184  'daughter(0, dphi)', 'daughter(1, dphi)',
185  'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)',
186  'daughter(0, kaonID)', 'daughter(0, pionID)', 'daughter(1, kaonID)', 'daughter(1, pionID)',
187  'daughterAngle(0, 1)', 'daughterAngle(0, 2)', 'daughterAngle(1, 2)',
188  'daughter(2, daughter(0, E))', 'daughter(2, daughter(1, E))',
189  'daughter(2, daughter(0, clusterTiming))', 'daughter(2, daughter(1, clusterTiming))',
190  'daughter(2, daughter(0, clusterE9E25))', 'daughter(2, daughter(1, clusterE9E25))',
191  'daughter(2, daughter(0, minC2TDist))', 'daughter(2, daughter(1, minC2TDist))',
192  'M']
193 
194  variables2 = ['p', 'pt', 'pz', 'phi',
195  'chiProb', 'dr', 'dz', 'dphi',
196  'daughter(2, chiProb)',
197  'daughter(0, kaonID)', 'daughter(0, pionID)', 'daughter(1, kaonID)', 'daughter(1, pionID)',
198  'daughter(2, daughter(0, E))', 'daughter(2, daughter(1, E))',
199  'daughter(2, daughter(0, clusterTiming))', 'daughter(2, daughter(1, clusterTiming))',
200  'daughter(2, daughter(0, clusterE9E25))', 'daughter(2, daughter(1, clusterE9E25))',
201  'daughter(2, daughter(0, minC2TDist))', 'daughter(2, daughter(1, minC2TDist))']
202 
203  general_options = basf2_mva.GeneralOptions()
204  general_options.m_datafiles = basf2_mva.vector("train.root")
205  general_options.m_treename = "tree"
206  general_options.m_variables = basf2_mva.vector(*variables)
207  general_options.m_spectators = basf2_mva.vector('daughterInvM(0, 1)', 'daughterInvM(0, 2)')
208  general_options.m_target_variable = "isSignal"
209  general_options.m_identifier = "tensorflow"
210 
211  specific_options = basf2_mva.PythonOptions()
212  specific_options.m_framework = "tensorflow"
213  specific_options.m_steering_file = 'mva/examples/orthogonal_discriminators/tensorflow_adversary.py'
214  specific_options.m_normalize = True
215  specific_options.m_nIterations = 1000
216  specific_options.m_mini_batch_size = 400
217  specific_options.m_config = '{"pre_train_epochs" : 50, "adversary_steps": 7, '\
218  ' "learning_rate": 0.001, "lambda": 0.01}'
219  basf2_mva.teacher(general_options, specific_options)
220 
221  method = basf2_mva_util.Method(general_options.m_identifier)
222  test_data = ["test.root"]
223  p, t = method.apply_expert(basf2_mva.vector(*test_data), general_options.m_treename)
225  results = {}
226  results['adversarial'] = {'p': p, 't': t, 'auc': auc}
227 
228  general_options.m_identifier = "tensorflow_baseline"
229  specific_options.m_nIterations = 200
230  specific_options.m_mini_batch_size = 400
231  specific_options.m_config = '{"pre_train_epochs" : 0, "adversary_steps": 1, '\
232  ' "learning_rate": 0.001, "lambda": -1.0}'
233  basf2_mva.teacher(general_options, specific_options)
234 
235  method = basf2_mva_util.Method(general_options.m_identifier)
236  test_data = ["test.root"]
237  p, t = method.apply_expert(basf2_mva.vector(*test_data), general_options.m_treename)
239  results['baseline'] = {'p': p, 't': t, 'auc': auc}
240 
241  general_options.m_variables = basf2_mva.vector(*variables2)
242  general_options.m_identifier = "tensorflow_feature_drop"
243  specific_options.m_nIterations = 200
244  specific_options.m_mini_batch_size = 400
245  specific_options.m_config = '{"pre_train_epochs" : 0, "adversary_steps": 1, '\
246  ' "learning_rate": 0.001, "lambda": -1.0}'
247  basf2_mva.teacher(general_options, specific_options)
248 
249  method = basf2_mva_util.Method(general_options.m_identifier)
250  test_data = ["test.root"]
251  p, t = method.apply_expert(basf2_mva.vector(*test_data), general_options.m_treename)
253  results['featureDrop'] = {'p': p, 't': t, 'auc': auc}
254 
255  test_tree = uproot.open('test.root')['tree']
256  invMassDaughter01 = test_tree.array('daughterInvM__bo0__cm__sp1__bc')
257  invMassDaughter02 = test_tree.array('daughterInvM__bo0__cm__sp2__bc')
258  isSignal = test_tree.array('isSignal').astype(np.int)
259 
260  def print_summary(name, data):
261 
262  def get_corr(p, var, isSignal, sig=True):
263  if not sig:
264  isSignal = (1 - isSignal)
265  signal_mask = np.where(isSignal)
266  p = p[signal_mask]
267  var = var[signal_mask]
268  return np.corrcoef(p, var)[0, 1]
269 
270  print(f'{name}:')
271  print(f'auc: {data["auc"]:.3f}')
272  print('Correlations between probability and spectator variables:')
273  print(
274  f'Sig: {get_corr(data["p"], invMassDaughter01, isSignal, True):.3f},'
275  f' {get_corr(data["p"], invMassDaughter02, isSignal, True):.3f}')
276  print(
277  f'Bkg: {get_corr(data["p"], invMassDaughter01, isSignal, False):.3f},'
278  f' {get_corr(data["p"], invMassDaughter02, isSignal, False):.3f}\n')
279 
280  print_summary('Baseline', results['baseline'])
281  print_summary('Adversarial', results['adversarial'])
282  print_summary('Feature Drop', results['featureDrop'])
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)