Belle II Software development
tensorflow_adversary.py
1#!/usr/bin/env python3
2
3
10
11import numpy as np
12import tensorflow as tf
13import basf2_mva
14import basf2_mva_util
15import uproot3 as uproot
16
18
19
20# This example is based on
21# 'Machine learning algorithms for the Belle II experiment and their validation on Belle data' - Thomas Keck (2017), Section 3.3.2.4
22# See also Algorithm 1 in https://arxiv.org/pdf/1611.01046.pdf
23
24def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
25
26 gpus = tf.config.list_physical_devices('GPU')
27 if gpus:
28 for gpu in gpus:
29 tf.config.experimental.set_memory_growth(gpu, True)
30
31 # don't run the adversarial net if lambda is negative
32 if parameters.get('lambda', 50) <= 0:
33 number_of_spectators = 0
34
35 def dense(x, W, b, activation_function):
36 return activation_function(tf.matmul(x, W) + b)
37
38 class adversarial_model(tf.Module):
39
40 def __init__(self, **kwargs):
41 super().__init__(**kwargs)
42
43 self.optimizer = tf.optimizers.Adam(parameters.get('learning_rate', 0.01))
44 self.epsilon = 1e-5
45 # strength of adversarial constraint
46 self.lam = parameters.get('lambda', 50)
47 # number of adversarial training epochs per inference training epoch
48 self.K = parameters.get('adversary_steps', 13)
49 # number of epochs training the inference net before switching on the adversarial networks
50 self.pre_train_epochs = parameters.get('pre_train_epochs', 0)
51 self.advarsarial = number_of_spectators > 0
52
53 def create_layer_variables(shape, name, activation_function=tf.tanh):
54 weights = tf.Variable(
55 tf.random.truncated_normal(shape, stddev=1.0 / np.sqrt(float(shape[0]))),
56 name=f'{name}_weights')
57 biases = tf.Variable(tf.zeros(shape=[shape[1]]), name=f'{name}_biases')
58 return weights, biases, activation_function
59
60 self.inference_layers = [
61 create_layer_variables([number_of_features, number_of_features+1],
62 'inference_hidden', tf.sigmoid),
63 create_layer_variables([number_of_features+1, 1],
64 'inference_sigmoid', tf.sigmoid),
65 ]
66
67 # create the layers for the adversarial net.
68 # This has 2*n_spectators sets of layers, one each for signal and background for each spectator variable
69 # the final 3 layers (means, widths, fractions) form part of the activation function and are not evaluated in sequence
70 self.adversarial_layers = []
71 for i in range(number_of_spectators):
72 self.adversarial_layers.append([])
73 for i_c, c in enumerate(['signal', 'background']):
74 self.adversarial_layers[i].append([
75 create_layer_variables([1, number_of_spectators+1], f'adversary_hidden_{i}_{c}',
76 activation_function=tf.tanh),
77 create_layer_variables([number_of_spectators + 1, 4], f'adversary_means_{i}_{c}',
78 activation_function=tf.identity),
79 create_layer_variables([number_of_spectators + 1, 4], f'adversary_widths_{i}_{c}',
80 activation_function=tf.exp),
81 create_layer_variables([number_of_spectators + 1, 4], f'adversary_fractions_{i}_{c}',
82 activation_function=tf.identity)
83 ])
84 return
85
86 @tf.function(input_signature=[tf.TensorSpec(shape=[None, number_of_features], dtype=tf.float32)])
87 def __call__(self, x):
88 for i in range(len(self.inference_layers)):
89 x = dense(x, *self.inference_layers[i])
90 return x
91
92 @tf.function
93 def inference_loss(self, predicted_y, target_y, w):
94 diff_from_truth = tf.where(target_y == 1., predicted_y, 1. - predicted_y)
95 cross_entropy = - tf.reduce_sum(w * tf.math.log(diff_from_truth + self.epsilon)) / tf.reduce_sum(w)
96 return cross_entropy
97
98 @tf.function
99 def adversarial_loss(self, predicted_y, s, target_y, w):
100
101 if number_of_spectators == 0:
102 return tf.constant(0.)
103
104 adversary_losses = []
105 for i_spectator in range(number_of_spectators):
106 s_single = tf.slice(s, [0, i_spectator], [-1, 1]) # get the single spectator variable column
107 for i_c, c in enumerate(['signal', 'background']):
108
109 # loop through the hidden layers
110 x = tf.identity(predicted_y)
111 for i_layer in range(len(self.adversarial_layers[i_spectator][i_c])-3):
112 x = dense(x, *self.adversarial_layers[i_spectator][i_c][i_layer])
113
114 # calculate the activation function and loss
115 means = dense(x, *self.adversarial_layers[i_spectator][i_c][-3])
116 widths = dense(x, *self.adversarial_layers[i_spectator][i_c][-2])
117 fractions = dense(x, *self.adversarial_layers[i_spectator][i_c][-1])
118
119 activation = tf.reduce_sum(tf.nn.softmax(fractions) *
120 tf.exp(-(means-s_single)*(means-s_single) / (2*widths)) /
121 tf.sqrt(2 * np.pi * widths), axis=1)
122
123 if c == 'signal':
124 loss = -tf.reduce_sum(target_y*w*tf.math.log(activation + self.epsilon)) / tf.reduce_sum(target_y*w)
125 else:
126 loss = -tf.reduce_sum((1-target_y)*w*tf.math.log(activation + self.epsilon)) / tf.reduce_sum((1-target_y)*w)
127 adversary_losses.append(loss)
128
129 return tf.math.add_n(adversary_losses)
130
131 @tf.function
132 def loss(self, x, s, y, w):
133 predicted_y = self.__call__(x)
134 inference_loss = self.inference_loss(predicted_y, y, w)
135 adversary_loss = self.adversarial_loss(predicted_y, s, y, w)
136 return inference_loss - self.lam * adversary_loss
137
138 state = State(model=adversarial_model())
139
140 state.epoch = 0
141 state.avg_costs = [] # keeps track of the avg costs per batch over an epoch
142 return state
143
144
145def partial_fit(state, X, S, y, w, epoch, batch):
146 """
147 Pass batches of received data to tensorflow
148 """
149 with tf.GradientTape() as tape:
150 if epoch < state.model.pre_train_epochs:
151 cost = state.model.inference_loss(state.model(X), y, w)
152 trainable_vars = [v for v in state.model.trainable_variables if 'inference' in v.name]
153
154 elif epoch % state.model.K == 0 or not state.model.advarsarial:
155 cost = state.model.loss(X, S, y, w)
156 trainable_vars = [v for v in state.model.trainable_variables if 'inference' in v.name]
157
158 else:
159 cost = state.model.adversarial_loss(state.model(X), S, y, w)
160 trainable_vars = [v for v in state.model.trainable_variables if 'adversary' in v.name]
161
162 grads = tape.gradient(cost, trainable_vars)
163
164 state.model.optimizer.apply_gradients(zip(grads, trainable_vars))
165
166 if batch == 0 and epoch == 0:
167 state.avg_costs = [cost]
168 elif batch != state.nBatches-1:
169 state.avg_costs.append(cost)
170 else:
171 # end of the epoch, print summary results, reset the avg_costs and update the counter
172 print(f"Epoch: {epoch:04d} cost= {np.mean(state.avg_costs):.9f}")
173 state.avg_costs = [cost]
174
175 if epoch == 100000:
176 return False
177 return True
178
179
180if __name__ == "__main__":
181 from basf2 import conditions, find_file
182 # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
183 conditions.testing_payloads = [
184 'localdb/database.txt'
185 ]
186
187 variables = ['p', 'pt', 'pz', 'phi',
188 'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)', 'daughter(0, phi)',
189 'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)', 'daughter(1, phi)',
190 'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)', 'daughter(2, phi)',
191 'chiProb', 'dr', 'dz', 'dphi',
192 'daughter(0, dr)', 'daughter(1, dr)', 'daughter(0, dz)', 'daughter(1, dz)',
193 'daughter(0, dphi)', 'daughter(1, dphi)',
194 'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)',
195 'daughter(0, kaonID)', 'daughter(0, pionID)', 'daughter(1, kaonID)', 'daughter(1, pionID)',
196 'daughterAngle(0, 1)', 'daughterAngle(0, 2)', 'daughterAngle(1, 2)',
197 'daughter(2, daughter(0, E))', 'daughter(2, daughter(1, E))',
198 'daughter(2, daughter(0, clusterTiming))', 'daughter(2, daughter(1, clusterTiming))',
199 'daughter(2, daughter(0, clusterE9E25))', 'daughter(2, daughter(1, clusterE9E25))',
200 'daughter(2, daughter(0, minC2TDist))', 'daughter(2, daughter(1, minC2TDist))',
201 'M']
202
203 variables2 = ['p', 'pt', 'pz', 'phi',
204 'chiProb', 'dr', 'dz', 'dphi',
205 'daughter(2, chiProb)',
206 'daughter(0, kaonID)', 'daughter(0, pionID)', 'daughter(1, kaonID)', 'daughter(1, pionID)',
207 'daughter(2, daughter(0, E))', 'daughter(2, daughter(1, E))',
208 'daughter(2, daughter(0, clusterTiming))', 'daughter(2, daughter(1, clusterTiming))',
209 'daughter(2, daughter(0, clusterE9E25))', 'daughter(2, daughter(1, clusterE9E25))',
210 'daughter(2, daughter(0, minC2TDist))', 'daughter(2, daughter(1, minC2TDist))']
211
212 train_file = find_file("mva/train_D0toKpipi.root", "examples")
213 test_file = find_file("mva/test_D0toKpipi.root", "examples")
214
215 training_data = basf2_mva.vector(train_file)
216 testing_data = basf2_mva.vector(test_file)
217
218 general_options = basf2_mva.GeneralOptions()
219 general_options.m_datafiles = training_data
220 general_options.m_treename = "tree"
221 general_options.m_variables = basf2_mva.vector(*variables)
222 general_options.m_spectators = basf2_mva.vector('daughterInvM(0, 1)', 'daughterInvM(0, 2)')
223 general_options.m_target_variable = "isSignal"
224 general_options.m_identifier = "tensorflow"
225
226 specific_options = basf2_mva.PythonOptions()
227 specific_options.m_framework = "tensorflow"
228 specific_options.m_steering_file = 'mva/examples/orthogonal_discriminators/tensorflow_adversary.py'
229 specific_options.m_normalize = True
230 specific_options.m_nIterations = 1000
231 specific_options.m_mini_batch_size = 400
232 specific_options.m_config = '{"pre_train_epochs" : 50, "adversary_steps": 7, '\
233 ' "learning_rate": 0.001, "lambda": 0.01}'
234 basf2_mva.teacher(general_options, specific_options)
235
236 method = basf2_mva_util.Method(general_options.m_identifier)
237 p, t = method.apply_expert(testing_data, general_options.m_treename)
239 results = {}
240 results['adversarial'] = {'p': p, 't': t, 'auc': auc}
241
242 general_options.m_identifier = "tensorflow_baseline"
243 specific_options.m_nIterations = 200
244 specific_options.m_mini_batch_size = 400
245 specific_options.m_config = '{"pre_train_epochs" : 0, "adversary_steps": 1, '\
246 ' "learning_rate": 0.001, "lambda": -1.0}'
247 basf2_mva.teacher(general_options, specific_options)
248
249 method = basf2_mva_util.Method(general_options.m_identifier)
250 p, t = method.apply_expert(testing_data, general_options.m_treename)
252 results['baseline'] = {'p': p, 't': t, 'auc': auc}
253
254 general_options.m_variables = basf2_mva.vector(*variables2)
255 general_options.m_identifier = "tensorflow_feature_drop"
256 specific_options.m_nIterations = 200
257 specific_options.m_mini_batch_size = 400
258 specific_options.m_config = '{"pre_train_epochs" : 0, "adversary_steps": 1, '\
259 ' "learning_rate": 0.001, "lambda": -1.0}'
260 basf2_mva.teacher(general_options, specific_options)
261
262 method = basf2_mva_util.Method(general_options.m_identifier)
263 p, t = method.apply_expert(testing_data, general_options.m_treename)
265 results['featureDrop'] = {'p': p, 't': t, 'auc': auc}
266
267 test_tree = uproot.open(test_file)['tree']
268 invMassDaughter01 = test_tree.array('daughterInvM__bo0__cm__sp1__bc')
269 invMassDaughter02 = test_tree.array('daughterInvM__bo0__cm__sp2__bc')
270 isSignal = test_tree.array('isSignal').astype(np.int32)
271
272 def print_summary(name, data):
273
274 def get_corr(p, var, isSignal, sig=True):
275 if not sig:
276 isSignal = (1 - isSignal)
277 signal_mask = np.where(isSignal)
278 p = p[signal_mask]
279 var = var[signal_mask]
280 return np.corrcoef(p, var)[0, 1]
281
282 print(f'{name}:')
283 print(f'auc: {data["auc"]:.3f}')
284 print('Correlations between probability and spectator variables:')
285 print(
286 f'Sig: {get_corr(data["p"], invMassDaughter01, isSignal, True):.3f},'
287 f' {get_corr(data["p"], invMassDaughter02, isSignal, True):.3f}')
288 print(
289 f'Bkg: {get_corr(data["p"], invMassDaughter01, isSignal, False):.3f},'
290 f' {get_corr(data["p"], invMassDaughter02, isSignal, False):.3f}\n')
291
292 print_summary('Baseline', results['baseline'])
293 print_summary('Adversarial', results['adversarial'])
294 print_summary('Feature Drop', results['featureDrop'])
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)