Belle II Software  release-08-01-10
tensorflow_adversary.py
1 #!/usr/bin/env python3
2 
3 
10 
11 import numpy as np
12 import tensorflow as tf
13 import basf2_mva
14 import basf2_mva_util
15 import uproot3 as uproot
16 
18 
19 
20 # This example is based on
21 # 'Machine learning algorithms for the Belle II experiment and their validation on Belle data' - Thomas Keck (2017), Section 3.3.2.4
22 # See also Algorithm 1 in https://arxiv.org/pdf/1611.01046.pdf
23 
24 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
25 
26  gpus = tf.config.list_physical_devices('GPU')
27  if gpus:
28  for gpu in gpus:
29  tf.config.experimental.set_memory_growth(gpu, True)
30 
31  # don't run the adversarial net if lambda is negative
32  if parameters.get('lambda', 50) <= 0:
33  number_of_spectators = 0
34 
35  def dense(x, W, b, activation_function):
36  return activation_function(tf.matmul(x, W) + b)
37 
38  class adversarial_model(tf.Module):
39 
40  def __init__(self, **kwargs):
41  super().__init__(**kwargs)
42 
43  self.optimizer = tf.optimizers.Adam(parameters.get('learning_rate', 0.01))
44  self.epsilon = 1e-5
45  # strength of adversarial constraint
46  self.lam = parameters.get('lambda', 50)
47  # number of adversarial training epochs per inference training epoch
48  self.K = parameters.get('adversary_steps', 13)
49  # number of epochs training the inference net before switching on the adversarial networks
50  self.pre_train_epochs = parameters.get('pre_train_epochs', 0)
51  self.advarsarial = number_of_spectators > 0
52 
53  def create_layer_variables(shape, name, activation_function=tf.tanh):
54  weights = tf.Variable(
55  tf.random.truncated_normal(shape, stddev=1.0 / np.sqrt(float(shape[0]))),
56  name=f'{name}_weights')
57  biases = tf.Variable(tf.zeros(shape=[shape[1]]), name=f'{name}_biases')
58  return weights, biases, activation_function
59 
60  self.inference_layers = [
61  create_layer_variables([number_of_features, number_of_features+1],
62  'inference_hidden', tf.sigmoid),
63  create_layer_variables([number_of_features+1, 1],
64  'inference_sigmoid', tf.sigmoid),
65  ]
66 
67  # create the layers for the adversarial net.
68  # This has 2*n_spectators sets of layers, one each for signal and background for each spectator variable
69  # the final 3 layers (means, widths, fractions) form part of the activation function and are not evaluated in sequence
70  self.adversarial_layers = []
71  for i in range(number_of_spectators):
72  self.adversarial_layers.append([])
73  for i_c, c in enumerate(['signal', 'background']):
74  self.adversarial_layers[i].append([
75  create_layer_variables([1, number_of_spectators+1], f'adversary_hidden_{i}_{c}',
76  activation_function=tf.tanh),
77  create_layer_variables([number_of_spectators + 1, 4], f'adversary_means_{i}_{c}',
78  activation_function=tf.identity),
79  create_layer_variables([number_of_spectators + 1, 4], f'adversary_widths_{i}_{c}',
80  activation_function=tf.exp),
81  create_layer_variables([number_of_spectators + 1, 4], f'adversary_fractions_{i}_{c}',
82  activation_function=tf.identity)
83  ])
84  return
85 
86  @tf.function(input_signature=[tf.TensorSpec(shape=[None, number_of_features], dtype=tf.float32)])
87  def __call__(self, x):
88  for i in range(len(self.inference_layers)):
89  x = dense(x, *self.inference_layers[i])
90  return x
91 
92  @tf.function
93  def inference_loss(self, predicted_y, target_y, w):
94  diff_from_truth = tf.where(target_y == 1., predicted_y, 1. - predicted_y)
95  cross_entropy = - tf.reduce_sum(w * tf.math.log(diff_from_truth + self.epsilon)) / tf.reduce_sum(w)
96  return cross_entropy
97 
98  @tf.function
99  def adversarial_loss(self, predicted_y, s, target_y, w):
100 
101  if number_of_spectators == 0:
102  return tf.constant(0.)
103 
104  adversary_losses = []
105  for i_spectator in range(number_of_spectators):
106  s_single = tf.slice(s, [0, i_spectator], [-1, 1]) # get the single spectator variable column
107  for i_c, c in enumerate(['signal', 'background']):
108 
109  # loop through the hidden layers
110  x = tf.identity(predicted_y)
111  for i_layer in range(len(self.adversarial_layers[i_spectator][i_c])-3):
112  x = dense(x, *self.adversarial_layers[i_spectator][i_c][i_layer])
113 
114  # calculate the activation function and loss
115  means = dense(x, *self.adversarial_layers[i_spectator][i_c][-3])
116  widths = dense(x, *self.adversarial_layers[i_spectator][i_c][-2])
117  fractions = dense(x, *self.adversarial_layers[i_spectator][i_c][-1])
118 
119  activation = tf.reduce_sum(tf.nn.softmax(fractions) *
120  tf.exp(-(means-s_single)*(means-s_single) / (2*widths)) /
121  tf.sqrt(2 * np.pi * widths), axis=1)
122 
123  if c == 'signal':
124  loss = -tf.reduce_sum(target_y*w*tf.math.log(activation + self.epsilon)) / tf.reduce_sum(target_y*w)
125  else:
126  loss = -tf.reduce_sum((1-target_y)*w*tf.math.log(activation + self.epsilon)) / tf.reduce_sum((1-target_y)*w)
127  adversary_losses.append(loss)
128 
129  return tf.math.add_n(adversary_losses)
130 
131  @tf.function
132  def loss(self, x, s, y, w):
133  predicted_y = self.__call__(x)
134  inference_loss = self.inference_loss(predicted_y, y, w)
135  adversary_loss = self.adversarial_loss(predicted_y, s, y, w)
136  return inference_loss - self.lam * adversary_loss
137 
138  state = State(model=adversarial_model())
139 
140  state.epoch = 0
141  state.avg_costs = [] # keeps track of the avg costs per batch over an epoch
142  return state
143 
144 
145 def partial_fit(state, X, S, y, w, epoch, batch):
146  """
147  Pass batches of received data to tensorflow
148  """
149  with tf.GradientTape() as tape:
150  if epoch < state.model.pre_train_epochs:
151  cost = state.model.inference_loss(state.model(X), y, w)
152  trainable_vars = [v for v in state.model.trainable_variables if 'inference' in v.name]
153 
154  elif epoch % state.model.K == 0 or not state.model.advarsarial:
155  cost = state.model.loss(X, S, y, w)
156  trainable_vars = [v for v in state.model.trainable_variables if 'inference' in v.name]
157 
158  else:
159  cost = state.model.adversarial_loss(state.model(X), S, y, w)
160  trainable_vars = [v for v in state.model.trainable_variables if 'adversary' in v.name]
161 
162  grads = tape.gradient(cost, trainable_vars)
163 
164  state.model.optimizer.apply_gradients(zip(grads, trainable_vars))
165 
166  if batch == 0 and epoch == 0:
167  state.avg_costs = [cost]
168  elif batch != state.nBatches-1:
169  state.avg_costs.append(cost)
170  else:
171  # end of the epoch, print summary results, reset the avg_costs and update the counter
172  print(f"Epoch: {epoch:04d} cost= {np.mean(state.avg_costs):.9f}")
173  state.avg_costs = [cost]
174 
175  if epoch == 100000:
176  return False
177  return True
178 
179 
180 if __name__ == "__main__":
181  from basf2 import conditions, find_file
182  # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
183  conditions.testing_payloads = [
184  'localdb/database.txt'
185  ]
186 
187  variables = ['p', 'pt', 'pz', 'phi',
188  'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)', 'daughter(0, phi)',
189  'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)', 'daughter(1, phi)',
190  'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)', 'daughter(2, phi)',
191  'chiProb', 'dr', 'dz', 'dphi',
192  'daughter(0, dr)', 'daughter(1, dr)', 'daughter(0, dz)', 'daughter(1, dz)',
193  'daughter(0, dphi)', 'daughter(1, dphi)',
194  'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)',
195  'daughter(0, kaonID)', 'daughter(0, pionID)', 'daughter(1, kaonID)', 'daughter(1, pionID)',
196  'daughterAngle(0, 1)', 'daughterAngle(0, 2)', 'daughterAngle(1, 2)',
197  'daughter(2, daughter(0, E))', 'daughter(2, daughter(1, E))',
198  'daughter(2, daughter(0, clusterTiming))', 'daughter(2, daughter(1, clusterTiming))',
199  'daughter(2, daughter(0, clusterE9E25))', 'daughter(2, daughter(1, clusterE9E25))',
200  'daughter(2, daughter(0, minC2TDist))', 'daughter(2, daughter(1, minC2TDist))',
201  'M']
202 
203  variables2 = ['p', 'pt', 'pz', 'phi',
204  'chiProb', 'dr', 'dz', 'dphi',
205  'daughter(2, chiProb)',
206  'daughter(0, kaonID)', 'daughter(0, pionID)', 'daughter(1, kaonID)', 'daughter(1, pionID)',
207  'daughter(2, daughter(0, E))', 'daughter(2, daughter(1, E))',
208  'daughter(2, daughter(0, clusterTiming))', 'daughter(2, daughter(1, clusterTiming))',
209  'daughter(2, daughter(0, clusterE9E25))', 'daughter(2, daughter(1, clusterE9E25))',
210  'daughter(2, daughter(0, minC2TDist))', 'daughter(2, daughter(1, minC2TDist))']
211 
212  train_file = find_file("mva/train_D0toKpipi.root", "examples")
213  test_file = find_file("mva/test_D0toKpipi.root", "examples")
214 
215  training_data = basf2_mva.vector(train_file)
216  testing_data = basf2_mva.vector(test_file)
217 
218  general_options = basf2_mva.GeneralOptions()
219  general_options.m_datafiles = training_data
220  general_options.m_treename = "tree"
221  general_options.m_variables = basf2_mva.vector(*variables)
222  general_options.m_spectators = basf2_mva.vector('daughterInvM(0, 1)', 'daughterInvM(0, 2)')
223  general_options.m_target_variable = "isSignal"
224  general_options.m_identifier = "tensorflow"
225 
226  specific_options = basf2_mva.PythonOptions()
227  specific_options.m_framework = "tensorflow"
228  specific_options.m_steering_file = 'mva/examples/orthogonal_discriminators/tensorflow_adversary.py'
229  specific_options.m_normalize = True
230  specific_options.m_nIterations = 1000
231  specific_options.m_mini_batch_size = 400
232  specific_options.m_config = '{"pre_train_epochs" : 50, "adversary_steps": 7, '\
233  ' "learning_rate": 0.001, "lambda": 0.01}'
234  basf2_mva.teacher(general_options, specific_options)
235 
236  method = basf2_mva_util.Method(general_options.m_identifier)
237  p, t = method.apply_expert(testing_data, general_options.m_treename)
239  results = {}
240  results['adversarial'] = {'p': p, 't': t, 'auc': auc}
241 
242  general_options.m_identifier = "tensorflow_baseline"
243  specific_options.m_nIterations = 200
244  specific_options.m_mini_batch_size = 400
245  specific_options.m_config = '{"pre_train_epochs" : 0, "adversary_steps": 1, '\
246  ' "learning_rate": 0.001, "lambda": -1.0}'
247  basf2_mva.teacher(general_options, specific_options)
248 
249  method = basf2_mva_util.Method(general_options.m_identifier)
250  p, t = method.apply_expert(testing_data, general_options.m_treename)
252  results['baseline'] = {'p': p, 't': t, 'auc': auc}
253 
254  general_options.m_variables = basf2_mva.vector(*variables2)
255  general_options.m_identifier = "tensorflow_feature_drop"
256  specific_options.m_nIterations = 200
257  specific_options.m_mini_batch_size = 400
258  specific_options.m_config = '{"pre_train_epochs" : 0, "adversary_steps": 1, '\
259  ' "learning_rate": 0.001, "lambda": -1.0}'
260  basf2_mva.teacher(general_options, specific_options)
261 
262  method = basf2_mva_util.Method(general_options.m_identifier)
263  p, t = method.apply_expert(testing_data, general_options.m_treename)
265  results['featureDrop'] = {'p': p, 't': t, 'auc': auc}
266 
267  test_tree = uproot.open(test_file)['tree']
268  invMassDaughter01 = test_tree.array('daughterInvM__bo0__cm__sp1__bc')
269  invMassDaughter02 = test_tree.array('daughterInvM__bo0__cm__sp2__bc')
270  isSignal = test_tree.array('isSignal').astype(np.int)
271 
272  def print_summary(name, data):
273 
274  def get_corr(p, var, isSignal, sig=True):
275  if not sig:
276  isSignal = (1 - isSignal)
277  signal_mask = np.where(isSignal)
278  p = p[signal_mask]
279  var = var[signal_mask]
280  return np.corrcoef(p, var)[0, 1]
281 
282  print(f'{name}:')
283  print(f'auc: {data["auc"]:.3f}')
284  print('Correlations between probability and spectator variables:')
285  print(
286  f'Sig: {get_corr(data["p"], invMassDaughter01, isSignal, True):.3f},'
287  f' {get_corr(data["p"], invMassDaughter02, isSignal, True):.3f}')
288  print(
289  f'Bkg: {get_corr(data["p"], invMassDaughter01, isSignal, False):.3f},'
290  f' {get_corr(data["p"], invMassDaughter02, isSignal, False):.3f}\n')
291 
292  print_summary('Baseline', results['baseline'])
293  print_summary('Adversarial', results['adversarial'])
294  print_summary('Feature Drop', results['featureDrop'])
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)