Belle II Software  release-05-02-19
preprocessing.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 # Dennis Weyland 2017
5 
6 # This example shows how to implement a preprocessing step like equal frequency binning
7 
8 import basf2_mva
9 import basf2_mva_util
10 import time
11 
12 import tensorflow as tf
13 import tensorflow.contrib.keras as keras
14 
15 from keras.layers import Input, Dense, Dropout
16 from keras.layers.normalization import BatchNormalization
17 from keras.models import Model
18 from keras.optimizers import adam
19 from keras.losses import binary_crossentropy
20 from keras.activations import sigmoid, tanh
21 from keras.callbacks import Callback
22 
24 from basf2_mva_extensions.preprocessing import fast_equal_frequency_binning
25 
26 old_time = time.time()
27 
28 
29 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
30  """
31  Build feed forward keras model
32  """
33  input = Input(shape=(number_of_features,))
34 
35  net = Dense(units=number_of_features, activation=tanh)(input)
36  for i in range(7):
37  net = Dense(units=number_of_features, activation=tanh)(net)
38  net = BatchNormalization()(net)
39  for i in range(7):
40  net = Dense(units=number_of_features, activation=tanh)(net)
41  net = Dropout(rate=0.4)(net)
42 
43  output = Dense(units=1, activation=sigmoid)(net)
44 
45  # Pass empty preprocessor state as kwarg in the state class.
46  # The interface is designed to automatically save every kwarg, which is passed in the initializer in end_fit.
47  state = State(Model(input, output), preprocessor_state=None)
48 
49  state.model.compile(optimizer=adam(lr=0.01), loss=binary_crossentropy, metrics=['accuracy'])
50 
51  state.model.summary()
52 
53  return state
54 
55 
56 def begin_fit(state, Xtest, Stest, ytest, wtest):
57  """
58  Returns just the state object
59  """
60 
61  state.Xtest = Xtest
62  state.ytest = ytest
63 
64  return state
65 
66 
67 def partial_fit(state, X, S, y, w, epoch):
68  """
69  Pass received data to tensorflow session
70  """
71  # Fit and Apply preprocessor
72  preprocessor = fast_equal_frequency_binning()
73  preprocessor.fit(X)
74  X = preprocessor.apply(X)
75  state.Xtest = preprocessor.apply(state.Xtest)
76 
77  # save preprocessor state in the State class
78  state.preprocessor_state = preprocessor.export_state()
79 
80  class TestCallback(Callback):
81  def on_epoch_end(self, epoch, logs={}):
82  loss, acc = state.model.evaluate(state.Xtest, state.ytest, verbose=0, batch_size=1000)
83  loss2, acc2 = state.model.evaluate(X[:10000], y[:10000], verbose=0, batch_size=1000)
84  print('\nTesting loss: {}, acc: {}'.format(loss, acc))
85  print('Training loss: {}, acc: {}'.format(loss2, acc2))
86 
87  state.model.fit(X, y, batch_size=500, epochs=10, callbacks=[TestCallback()])
88  return False
89 
90 
91 def apply(state, X):
92  """
93  Apply estimator to passed data.
94  Has to be overwritten, because also the expert has to apply preprocessing.
95  """
96  # The preprocessor state is automatically loaded in the load function
97  preprocessor = fast_equal_frequency_binning(state.preprocessor_state)
98  # Apply preprocessor
99  X = preprocessor.apply(X)
100 
101  r = state.model.predict(X).flatten()
102  return np.require(r, dtype=np.float32, requirements=['A', 'W', 'C', 'O'])
103 
104 
105 if __name__ == "__main__":
106  from basf2 import conditions
107  # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
108  conditions.testing_payloads = [
109  'localdb/database.txt'
110  ]
111 
112  general_options = basf2_mva.GeneralOptions()
113  general_options.m_datafiles = basf2_mva.vector("train.root")
114  general_options.m_identifier = "deep_keras"
115  general_options.m_treename = "tree"
116  variables = ['M', 'p', 'pt', 'pz',
117  'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)',
118  'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)',
119  'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)',
120  'chiProb', 'dr', 'dz',
121  'daughter(0, dr)', 'daughter(1, dr)',
122  'daughter(0, dz)', 'daughter(1, dz)',
123  'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)',
124  'daughter(0, Kid)', 'daughter(0, piid)',
125  'daughterInvariantMass(0, 1)', 'daughterInvariantMass(0, 2)', 'daughterInvariantMass(1, 2)']
126  general_options.m_variables = basf2_mva.vector(*variables)
127  general_options.m_target_variable = "isSignal"
128 
129  specific_options = basf2_mva.PythonOptions()
130  specific_options.m_framework = "contrib_keras"
131  specific_options.m_steering_file = 'mva/examples/keras/preprocessing.py'
132  specific_options.m_normalize = True
133  specific_options.m_training_fraction = 0.9
134 
135  training_start = time.time()
136  basf2_mva.teacher(general_options, specific_options)
137  training_stop = time.time()
138  training_time = training_stop - training_start
139  method = basf2_mva_util.Method(general_options.m_identifier)
140  inference_start = time.time()
141  test_data = ["test.root"] * 10
142  p, t = method.apply_expert(basf2_mva.vector(*test_data), general_options.m_treename)
143  inference_stop = time.time()
144  inference_time = inference_stop - inference_start
146  print("Tensorflow", training_time, inference_time, auc)
basf2_mva_util.calculate_roc_auc
def calculate_roc_auc(p, t)
Definition: basf2_mva_util.py:39
basf2_mva_util.Method
Definition: basf2_mva_util.py:81
basf2_mva_python_interface.contrib_keras
Definition: contrib_keras.py:1