Belle II Software  light-2212-foldex
tensorflow_adversary.py
1 #!/usr/bin/env python3
2 
3 
10 
11 import numpy as np
12 import tensorflow as tf
13 import basf2_mva
14 import basf2_mva_util
15 import uproot3 as uproot
16 
18 
19 
20 # This example is based on
21 # 'Machine learning algorithms for the Belle II experiment and their validation on Belle data' - Thomas Keck (2017), Section 3.3.2.4
22 # See also Algorithm 1 in https://arxiv.org/pdf/1611.01046.pdf
23 
24 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
25 
26  gpus = tf.config.list_physical_devices('GPU')
27  if gpus:
28  for gpu in gpus:
29  tf.config.experimental.set_memory_growth(gpu, True)
30 
31  # don't run the adversarial net if lambda is negative
32  if parameters.get('lambda', 50) <= 0:
33  number_of_spectators = 0
34 
35  def dense(x, W, b, activation_function):
36  return activation_function(tf.matmul(x, W) + b)
37 
38  class adversarial_model(tf.Module):
39 
40  def __init__(self, **kwargs):
41  super().__init__(**kwargs)
42 
43  self.optimizer = tf.optimizers.Adam(parameters.get('learning_rate', 0.01))
44  self.epsilon = 1e-5
45  # strength of adversarial constraint
46  self.lam = parameters.get('lambda', 50)
47  # number of adversarial training epochs per inference training epoch
48  self.K = parameters.get('adversary_steps', 13)
49  # number of epochs training the inference net before switching on the adversarial networks
50  self.pre_train_epochs = parameters.get('pre_train_epochs', 0)
51  self.advarsarial = number_of_spectators > 0
52 
53  def create_layer_variables(shape, name, activation_function=tf.tanh):
54  weights = tf.Variable(
55  tf.random.truncated_normal(shape, stddev=1.0 / np.sqrt(float(shape[0]))),
56  name=f'{name}_weights')
57  biases = tf.Variable(tf.zeros(shape=[shape[1]]), name=f'{name}_biases')
58  return weights, biases, activation_function
59 
60  self.inference_layers = [
61  create_layer_variables([number_of_features, number_of_features+1],
62  'inference_hidden', tf.sigmoid),
63  create_layer_variables([number_of_features+1, 1],
64  'inference_sigmoid', tf.sigmoid),
65  ]
66 
67  # create the layers for the adversarial net.
68  # This has 2*n_spectators sets of layers, one each for signal and background for each spectator variable
69  # the final 3 layers (means, widths, fractions) form part of the activation function and are not evaluated in sequence
70  self.adversarial_layers = []
71  for i in range(number_of_spectators):
72  self.adversarial_layers.append([])
73  for i_c, c in enumerate(['signal', 'background']):
74  self.adversarial_layers[i].append([
75  create_layer_variables([1, number_of_spectators+1], f'adversary_hidden_{i}_{c}',
76  activation_function=tf.tanh),
77  create_layer_variables([number_of_spectators + 1, 4], f'adversary_means_{i}_{c}',
78  activation_function=tf.identity),
79  create_layer_variables([number_of_spectators + 1, 4], f'adversary_widths_{i}_{c}',
80  activation_function=tf.exp),
81  create_layer_variables([number_of_spectators + 1, 4], f'adversary_fractions_{i}_{c}',
82  activation_function=tf.identity)
83  ])
84  return
85 
86  @tf.function(input_signature=[tf.TensorSpec(shape=[None, number_of_features], dtype=tf.float32)])
87  def __call__(self, x):
88  for i in range(len(self.inference_layers)):
89  x = dense(x, *self.inference_layers[i])
90  return x
91 
92  @tf.function
93  def inference_loss(self, predicted_y, target_y, w):
94  diff_from_truth = tf.where(target_y == 1., predicted_y, 1. - predicted_y)
95  cross_entropy = - tf.reduce_sum(w * tf.math.log(diff_from_truth + self.epsilon)) / tf.reduce_sum(w)
96  return cross_entropy
97 
98  @tf.function
99  def adversarial_loss(self, predicted_y, s, target_y, w):
100 
101  if number_of_spectators == 0:
102  return tf.constant(0.)
103 
104  adversary_losses = []
105  for i_spectator in range(number_of_spectators):
106  s_single = tf.slice(s, [0, i_spectator], [-1, 1]) # get the single spectator variable column
107  for i_c, c in enumerate(['signal', 'background']):
108 
109  # loop through the hidden layers
110  x = tf.identity(predicted_y)
111  for i_layer in range(len(self.adversarial_layers[i_spectator][i_c])-3):
112  x = dense(x, *self.adversarial_layers[i_spectator][i_c][i_layer])
113 
114  # calculate the activation function and loss
115  means = dense(x, *self.adversarial_layers[i_spectator][i_c][-3])
116  widths = dense(x, *self.adversarial_layers[i_spectator][i_c][-2])
117  fractions = dense(x, *self.adversarial_layers[i_spectator][i_c][-1])
118 
119  activation = tf.reduce_sum(tf.nn.softmax(fractions) *
120  tf.exp(-(means-s_single)*(means-s_single) / (2*widths)) /
121  tf.sqrt(2 * np.pi * widths), axis=1)
122 
123  if c == 'signal':
124  loss = -tf.reduce_sum(target_y*w*tf.math.log(activation + self.epsilon)) / tf.reduce_sum(target_y*w)
125  else:
126  loss = -tf.reduce_sum((1-target_y)*w*tf.math.log(activation + self.epsilon)) / tf.reduce_sum((1-target_y)*w)
127  adversary_losses.append(loss)
128 
129  return tf.math.add_n(adversary_losses)
130 
131  @tf.function
132  def loss(self, x, s, y, w):
133  predicted_y = self.__call__(x)
134  inference_loss = self.inference_loss(predicted_y, y, w)
135  adversary_loss = self.adversarial_loss(predicted_y, s, y, w)
136  return inference_loss - self.lam * adversary_loss
137 
138  state = State(model=adversarial_model())
139 
140  state.epoch = 0
141  state.avg_costs = [] # keeps track of the avg costs per batch over an epoch
142  return state
143 
144 
145 def partial_fit(state, X, S, y, w, epoch, batch):
146  """
147  Pass batches of received data to tensorflow
148  """
149  with tf.GradientTape() as tape:
150  if epoch < state.model.pre_train_epochs:
151  cost = state.model.inference_loss(state.model(X), y, w)
152  trainable_vars = [v for v in state.model.trainable_variables if 'inference' in v.name]
153 
154  elif epoch % state.model.K == 0 or not state.model.advarsarial:
155  cost = state.model.loss(X, S, y, w)
156  trainable_vars = [v for v in state.model.trainable_variables if 'inference' in v.name]
157 
158  else:
159  cost = state.model.adversarial_loss(state.model(X), S, y, w)
160  trainable_vars = [v for v in state.model.trainable_variables if 'adversary' in v.name]
161 
162  grads = tape.gradient(cost, trainable_vars)
163 
164  state.model.optimizer.apply_gradients(zip(grads, trainable_vars))
165 
166  if batch == 0 and epoch == 0:
167  state.avg_costs = [cost]
168  elif batch != state.nBatches-1:
169  state.avg_costs.append(cost)
170  else:
171  # end of the epoch, print summary results, reset the avg_costs and update the counter
172  print(f"Epoch: {epoch:04d} cost= {np.mean(state.avg_costs):.9f}")
173  state.avg_costs = [cost]
174 
175  if epoch == 100000:
176  return False
177  return True
178 
179 
180 if __name__ == "__main__":
181  from basf2 import conditions
182  # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
183  conditions.testing_payloads = [
184  'localdb/database.txt'
185  ]
186 
187  variables = ['p', 'pt', 'pz', 'phi',
188  'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)', 'daughter(0, phi)',
189  'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)', 'daughter(1, phi)',
190  'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)', 'daughter(2, phi)',
191  'chiProb', 'dr', 'dz', 'dphi',
192  'daughter(0, dr)', 'daughter(1, dr)', 'daughter(0, dz)', 'daughter(1, dz)',
193  'daughter(0, dphi)', 'daughter(1, dphi)',
194  'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)',
195  'daughter(0, kaonID)', 'daughter(0, pionID)', 'daughter(1, kaonID)', 'daughter(1, pionID)',
196  'daughterAngle(0, 1)', 'daughterAngle(0, 2)', 'daughterAngle(1, 2)',
197  'daughter(2, daughter(0, E))', 'daughter(2, daughter(1, E))',
198  'daughter(2, daughter(0, clusterTiming))', 'daughter(2, daughter(1, clusterTiming))',
199  'daughter(2, daughter(0, clusterE9E25))', 'daughter(2, daughter(1, clusterE9E25))',
200  'daughter(2, daughter(0, minC2TDist))', 'daughter(2, daughter(1, minC2TDist))',
201  'M']
202 
203  variables2 = ['p', 'pt', 'pz', 'phi',
204  'chiProb', 'dr', 'dz', 'dphi',
205  'daughter(2, chiProb)',
206  'daughter(0, kaonID)', 'daughter(0, pionID)', 'daughter(1, kaonID)', 'daughter(1, pionID)',
207  'daughter(2, daughter(0, E))', 'daughter(2, daughter(1, E))',
208  'daughter(2, daughter(0, clusterTiming))', 'daughter(2, daughter(1, clusterTiming))',
209  'daughter(2, daughter(0, clusterE9E25))', 'daughter(2, daughter(1, clusterE9E25))',
210  'daughter(2, daughter(0, minC2TDist))', 'daughter(2, daughter(1, minC2TDist))']
211 
212  general_options = basf2_mva.GeneralOptions()
213  general_options.m_datafiles = basf2_mva.vector("train.root")
214  general_options.m_treename = "tree"
215  general_options.m_variables = basf2_mva.vector(*variables)
216  general_options.m_spectators = basf2_mva.vector('daughterInvM(0, 1)', 'daughterInvM(0, 2)')
217  general_options.m_target_variable = "isSignal"
218  general_options.m_identifier = "tensorflow"
219 
220  specific_options = basf2_mva.PythonOptions()
221  specific_options.m_framework = "tensorflow"
222  specific_options.m_steering_file = 'mva/examples/orthogonal_discriminators/tensorflow_adversary.py'
223  specific_options.m_normalize = True
224  specific_options.m_nIterations = 1000
225  specific_options.m_mini_batch_size = 400
226  specific_options.m_config = '{"pre_train_epochs" : 50, "adversary_steps": 7, '\
227  ' "learning_rate": 0.001, "lambda": 0.01}'
228  basf2_mva.teacher(general_options, specific_options)
229 
230  method = basf2_mva_util.Method(general_options.m_identifier)
231  test_data = ["test.root"]
232  p, t = method.apply_expert(basf2_mva.vector(*test_data), general_options.m_treename)
234  results = {}
235  results['adversarial'] = {'p': p, 't': t, 'auc': auc}
236 
237  general_options.m_identifier = "tensorflow_baseline"
238  specific_options.m_nIterations = 200
239  specific_options.m_mini_batch_size = 400
240  specific_options.m_config = '{"pre_train_epochs" : 0, "adversary_steps": 1, '\
241  ' "learning_rate": 0.001, "lambda": -1.0}'
242  basf2_mva.teacher(general_options, specific_options)
243 
244  method = basf2_mva_util.Method(general_options.m_identifier)
245  test_data = ["test.root"]
246  p, t = method.apply_expert(basf2_mva.vector(*test_data), general_options.m_treename)
248  results['baseline'] = {'p': p, 't': t, 'auc': auc}
249 
250  general_options.m_variables = basf2_mva.vector(*variables2)
251  general_options.m_identifier = "tensorflow_feature_drop"
252  specific_options.m_nIterations = 200
253  specific_options.m_mini_batch_size = 400
254  specific_options.m_config = '{"pre_train_epochs" : 0, "adversary_steps": 1, '\
255  ' "learning_rate": 0.001, "lambda": -1.0}'
256  basf2_mva.teacher(general_options, specific_options)
257 
258  method = basf2_mva_util.Method(general_options.m_identifier)
259  test_data = ["test.root"]
260  p, t = method.apply_expert(basf2_mva.vector(*test_data), general_options.m_treename)
262  results['featureDrop'] = {'p': p, 't': t, 'auc': auc}
263 
264  test_tree = uproot.open('test.root')['tree']
265  invMassDaughter01 = test_tree.array('daughterInvM__bo0__cm__sp1__bc')
266  invMassDaughter02 = test_tree.array('daughterInvM__bo0__cm__sp2__bc')
267  isSignal = test_tree.array('isSignal').astype(np.int)
268 
269  def print_summary(name, data):
270 
271  def get_corr(p, var, isSignal, sig=True):
272  if not sig:
273  isSignal = (1 - isSignal)
274  signal_mask = np.where(isSignal)
275  p = p[signal_mask]
276  var = var[signal_mask]
277  return np.corrcoef(p, var)[0, 1]
278 
279  print(f'{name}:')
280  print(f'auc: {data["auc"]:.3f}')
281  print('Correlations between probability and spectator variables:')
282  print(
283  f'Sig: {get_corr(data["p"], invMassDaughter01, isSignal, True):.3f},'
284  f' {get_corr(data["p"], invMassDaughter02, isSignal, True):.3f}')
285  print(
286  f'Bkg: {get_corr(data["p"], invMassDaughter01, isSignal, False):.3f},'
287  f' {get_corr(data["p"], invMassDaughter02, isSignal, False):.3f}\n')
288 
289  print_summary('Baseline', results['baseline'])
290  print_summary('Adversarial', results['adversarial'])
291  print_summary('Feature Drop', results['featureDrop'])
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)