Belle II Software  release-06-02-00
preprocessing.py
1 #!/usr/bin/env python3
2 
3 
10 
11 # This example shows how to implement a preprocessing step like equal frequency binning
12 
13 import basf2_mva
14 import basf2_mva_util
15 import time
16 
17 import numpy as np
18 
19 
20 from keras.layers import Input, Dense, Dropout
21 from keras.layers.normalization import BatchNormalization
22 from keras.models import Model
23 from keras.optimizers import Adam
24 from keras.losses import binary_crossentropy
25 from keras.activations import sigmoid, tanh
26 from keras.callbacks import Callback
27 
29 from basf2_mva_extensions.preprocessing import fast_equal_frequency_binning
30 
31 old_time = time.time()
32 
33 
34 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
35  """
36  Build feed forward keras model
37  """
38  input = Input(shape=(number_of_features,))
39 
40  net = Dense(units=number_of_features, activation=tanh)(input)
41  for i in range(7):
42  net = Dense(units=number_of_features, activation=tanh)(net)
43  net = BatchNormalization()(net)
44  for i in range(7):
45  net = Dense(units=number_of_features, activation=tanh)(net)
46  net = Dropout(rate=0.4)(net)
47 
48  output = Dense(units=1, activation=sigmoid)(net)
49 
50  # Pass empty preprocessor state as kwarg in the state class.
51  # The interface is designed to automatically save every kwarg, which is passed in the initializer in end_fit.
52  state = State(Model(input, output), preprocessor_state=None)
53 
54  state.model.compile(optimizer=Adam(lr=0.01), loss=binary_crossentropy, metrics=['accuracy'])
55 
56  state.model.summary()
57 
58  return state
59 
60 
61 def begin_fit(state, Xtest, Stest, ytest, wtest):
62  """
63  Returns just the state object
64  """
65 
66  state.Xtest = Xtest
67  state.ytest = ytest
68 
69  return state
70 
71 
72 def partial_fit(state, X, S, y, w, epoch):
73  """
74  Pass received data to tensorflow session
75  """
76  # Fit and Apply preprocessor
77  preprocessor = fast_equal_frequency_binning()
78  preprocessor.fit(X)
79  X = preprocessor.apply(X)
80  state.Xtest = preprocessor.apply(state.Xtest)
81 
82  # save preprocessor state in the State class
83  state.preprocessor_state = preprocessor.export_state()
84 
85  class TestCallback(Callback):
86  def on_epoch_end(self, epoch, logs=None):
87  loss, acc = state.model.evaluate(state.Xtest, state.ytest, verbose=0, batch_size=1000)
88  loss2, acc2 = state.model.evaluate(X[:10000], y[:10000], verbose=0, batch_size=1000)
89  print('\nTesting loss: {}, acc: {}'.format(loss, acc))
90  print('Training loss: {}, acc: {}'.format(loss2, acc2))
91 
92  state.model.fit(X, y, batch_size=500, epochs=10, callbacks=[TestCallback()])
93  return False
94 
95 
96 def apply(state, X):
97  """
98  Apply estimator to passed data.
99  Has to be overwritten, because also the expert has to apply preprocessing.
100  """
101  # The preprocessor state is automatically loaded in the load function
102  preprocessor = fast_equal_frequency_binning(state.preprocessor_state)
103  # Apply preprocessor
104  X = preprocessor.apply(X)
105 
106  r = state.model.predict(X).flatten()
107  return np.require(r, dtype=np.float32, requirements=['A', 'W', 'C', 'O'])
108 
109 
110 if __name__ == "__main__":
111  from basf2 import conditions
112  # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
113  conditions.testing_payloads = [
114  'localdb/database.txt'
115  ]
116 
117  general_options = basf2_mva.GeneralOptions()
118  general_options.m_datafiles = basf2_mva.vector("train.root")
119  general_options.m_identifier = "deep_keras"
120  general_options.m_treename = "tree"
121  variables = ['M', 'p', 'pt', 'pz',
122  'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)',
123  'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)',
124  'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)',
125  'chiProb', 'dr', 'dz',
126  'daughter(0, dr)', 'daughter(1, dr)',
127  'daughter(0, dz)', 'daughter(1, dz)',
128  'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)',
129  'daughter(0, kaonID)', 'daughter(0, pionID)',
130  'daughterInvariantMass(0, 1)', 'daughterInvariantMass(0, 2)', 'daughterInvariantMass(1, 2)']
131  general_options.m_variables = basf2_mva.vector(*variables)
132  general_options.m_target_variable = "isSignal"
133 
134  specific_options = basf2_mva.PythonOptions()
135  specific_options.m_framework = "contrib_keras"
136  specific_options.m_steering_file = 'mva/examples/keras/preprocessing.py'
137  specific_options.m_normalize = True
138  specific_options.m_training_fraction = 0.9
139 
140  training_start = time.time()
141  basf2_mva.teacher(general_options, specific_options)
142  training_stop = time.time()
143  training_time = training_stop - training_start
144  method = basf2_mva_util.Method(general_options.m_identifier)
145  inference_start = time.time()
146  test_data = ["test.root"] * 10
147  p, t = method.apply_expert(basf2_mva.vector(*test_data), general_options.m_treename)
148  inference_stop = time.time()
149  inference_time = inference_stop - inference_start
151  print("Tensorflow", training_time, inference_time, auc)
def calculate_roc_auc(p, t)