Belle II Software  light-2212-foldex
preprocessing.py
1 #!/usr/bin/env python3
2 
3 
10 
11 # This example shows how to implement a preprocessing step like equal frequency binning
12 
13 import basf2_mva
14 import basf2_mva_util
15 import time
16 
17 import numpy as np
18 
19 
20 from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization
21 from tensorflow.keras.models import Model
22 from tensorflow.keras.optimizers import Adam
23 from tensorflow.keras.losses import binary_crossentropy
24 from tensorflow.keras.activations import sigmoid, tanh
25 from tensorflow.keras.callbacks import Callback
26 
27 from basf2_mva_python_interface.keras import State
28 from basf2_mva_extensions.preprocessing import fast_equal_frequency_binning
29 
30 old_time = time.time()
31 
32 
33 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
34  """
35  Build feed forward keras model
36  """
37  input = Input(shape=(number_of_features,))
38 
39  net = Dense(units=number_of_features, activation=tanh)(input)
40  for i in range(7):
41  net = Dense(units=number_of_features, activation=tanh)(net)
42  net = BatchNormalization()(net)
43  for i in range(7):
44  net = Dense(units=number_of_features, activation=tanh)(net)
45  net = Dropout(rate=0.4)(net)
46 
47  output = Dense(units=1, activation=sigmoid)(net)
48 
49  # Pass empty preprocessor state as kwarg in the state class.
50  # The interface is designed to automatically save every kwarg, which is passed in the initializer in end_fit.
51  state = State(Model(input, output), preprocessor_state=None)
52 
53  state.model.compile(optimizer=Adam(lr=0.01), loss=binary_crossentropy, metrics=['accuracy'])
54 
55  state.model.summary()
56 
57  return state
58 
59 
60 def begin_fit(state, Xtest, Stest, ytest, wtest, nBatches):
61  """
62  Returns just the state object
63  """
64 
65  state.Xtest = Xtest
66  state.ytest = ytest
67 
68  return state
69 
70 
71 def partial_fit(state, X, S, y, w, epoch, batch):
72  """
73  Pass received data to tensorflow session
74  """
75  # Fit and Apply preprocessor
76  preprocessor = fast_equal_frequency_binning()
77  preprocessor.fit(X)
78  X = preprocessor.apply(X)
79  state.Xtest = preprocessor.apply(state.Xtest)
80 
81  # save preprocessor state in the State class
82  state.preprocessor_state = preprocessor.export_state()
83 
84  class TestCallback(Callback):
85  def on_epoch_end(self, epoch, logs=None):
86  loss, acc = state.model.evaluate(state.Xtest, state.ytest, verbose=0, batch_size=1000)
87  loss2, acc2 = state.model.evaluate(X[:10000], y[:10000], verbose=0, batch_size=1000)
88  print('\nTesting loss: {}, acc: {}'.format(loss, acc))
89  print('Training loss: {}, acc: {}'.format(loss2, acc2))
90 
91  state.model.fit(X, y, batch_size=500, epochs=10, callbacks=[TestCallback()])
92  return False
93 
94 
95 def apply(state, X):
96  """
97  Apply estimator to passed data.
98  Has to be overwritten, because also the expert has to apply preprocessing.
99  """
100  # The preprocessor state is automatically loaded in the load function
101  preprocessor = fast_equal_frequency_binning(state.preprocessor_state)
102  # Apply preprocessor
103  X = preprocessor.apply(X)
104 
105  r = state.model.predict(X).flatten()
106  return np.require(r, dtype=np.float32, requirements=['A', 'W', 'C', 'O'])
107 
108 
109 if __name__ == "__main__":
110  from basf2 import conditions
111  # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
112  conditions.testing_payloads = [
113  'localdb/database.txt'
114  ]
115 
116  general_options = basf2_mva.GeneralOptions()
117  general_options.m_datafiles = basf2_mva.vector("train.root")
118  general_options.m_identifier = "preprocessed_deep_keras"
119  general_options.m_treename = "tree"
120  variables = ['M', 'p', 'pt', 'pz',
121  'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)',
122  'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)',
123  'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)',
124  'chiProb', 'dr', 'dz',
125  'daughter(0, dr)', 'daughter(1, dr)',
126  'daughter(0, dz)', 'daughter(1, dz)',
127  'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)',
128  'daughter(0, kaonID)', 'daughter(0, pionID)',
129  'daughterInvM(0, 1)', 'daughterInvM(0, 2)', 'daughterInvM(1, 2)']
130  general_options.m_variables = basf2_mva.vector(*variables)
131  general_options.m_target_variable = "isSignal"
132 
133  specific_options = basf2_mva.PythonOptions()
134  specific_options.m_framework = "keras"
135  specific_options.m_steering_file = 'mva/examples/keras/preprocessing.py'
136  specific_options.m_normalize = True
137  specific_options.m_training_fraction = 0.9
138 
139  training_start = time.time()
140  basf2_mva.teacher(general_options, specific_options)
141  training_stop = time.time()
142  training_time = training_stop - training_start
143  method = basf2_mva_util.Method(general_options.m_identifier)
144  inference_start = time.time()
145  test_data = ["test.root"] * 10
146  p, t = method.apply_expert(basf2_mva.vector(*test_data), general_options.m_treename)
147  inference_stop = time.time()
148  inference_time = inference_stop - inference_start
150 
151  print("Tensorflow", training_time, inference_time, auc)
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)