Belle II Software development
preprocessing.py
1#!/usr/bin/env python3
2
3
10
11# This example shows how to implement a preprocessing step like equal frequency binning
12
13import basf2_mva
14import basf2_mva_util
15import time
16
17import numpy as np
18
19
20from keras.layers import Input, Dense, Dropout, BatchNormalization
21from keras.models import Model
22from keras.optimizers import Adam
23from keras.losses import binary_crossentropy
24from keras.activations import sigmoid, tanh
25from keras.callbacks import Callback
26
28from basf2_mva_extensions.preprocessing import fast_equal_frequency_binning
29
30old_time = time.time()
31
32
33def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
34 """
35 Build feed forward keras model
36 """
37 input = Input(shape=(number_of_features,))
38
39 net = Dense(units=number_of_features, activation=tanh)(input)
40 for i in range(7):
41 net = Dense(units=number_of_features, activation=tanh)(net)
42 net = BatchNormalization()(net)
43 for i in range(7):
44 net = Dense(units=number_of_features, activation=tanh)(net)
45 net = Dropout(rate=0.4)(net)
46
47 output = Dense(units=1, activation=sigmoid)(net)
48
49 # Pass empty preprocessor state as kwarg in the state class.
50 # The interface is designed to automatically save every kwarg, which is passed in the initializer in end_fit.
51 state = State(Model(input, output), preprocessor_state=None)
52
53 state.model.compile(optimizer=Adam(learning_rate=0.01), loss=binary_crossentropy, metrics=['accuracy'])
54
55 state.model.summary()
56
57 return state
58
59
60def begin_fit(state, Xtest, Stest, ytest, wtest, nBatches):
61 """
62 Returns just the state object
63 """
64
65 state.Xtest = Xtest
66 state.ytest = ytest
67
68 return state
69
70
71def partial_fit(state, X, S, y, w, epoch, batch):
72 """
73 Pass received data to tensorflow session
74 """
75 # Fit and Apply preprocessor
76 preprocessor = fast_equal_frequency_binning()
77 preprocessor.fit(X)
78 X = preprocessor.apply(X)
79 state.Xtest = preprocessor.apply(state.Xtest)
80
81 # save preprocessor state in the State class
82 state.preprocessor_state = preprocessor.export_state()
83
84 class TestCallback(Callback):
85 def on_epoch_end(self, epoch, logs=None):
86 loss, acc = state.model.evaluate(state.Xtest, state.ytest, verbose=0, batch_size=1000)
87 loss2, acc2 = state.model.evaluate(X[:10000], y[:10000], verbose=0, batch_size=1000)
88 print(f'\nTesting loss: {loss}, acc: {acc}')
89 print(f'Training loss: {loss2}, acc: {acc2}')
90
91 state.model.fit(X, y, batch_size=500, epochs=10, callbacks=[TestCallback()])
92 return False
93
94
95def apply(state, X):
96 """
97 Apply estimator to passed data.
98 Has to be overwritten, because also the expert has to apply preprocessing.
99 """
100 # The preprocessor state is automatically loaded in the load function
101 preprocessor = fast_equal_frequency_binning(state.preprocessor_state)
102 # Apply preprocessor
103 X = preprocessor.apply(X)
104
105 r = state.model.predict(X).flatten()
106 return np.require(r, dtype=np.float32, requirements=['A', 'W', 'C', 'O'])
107
108
109if __name__ == "__main__":
110 from basf2 import conditions, find_file
111 # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
112 conditions.testing_payloads = [
113 'localdb/database.txt'
114 ]
115
116 train_file = find_file("mva/train_D0toKpipi.root", "examples")
117 test_file = find_file("mva/test_D0toKpipi.root", "examples")
118
119 training_data = basf2_mva.vector(train_file)
120 testing_data = basf2_mva.vector(test_file)
121
122 general_options = basf2_mva.GeneralOptions()
123 general_options.m_datafiles = training_data
124 general_options.m_identifier = "preprocessed_deep_keras"
125 general_options.m_treename = "tree"
126 variables = ['M', 'p', 'pt', 'pz',
127 'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)',
128 'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)',
129 'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)',
130 'chiProb', 'dr', 'dz',
131 'daughter(0, dr)', 'daughter(1, dr)',
132 'daughter(0, dz)', 'daughter(1, dz)',
133 'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)',
134 'daughter(0, kaonID)', 'daughter(0, pionID)',
135 'daughterInvM(0, 1)', 'daughterInvM(0, 2)', 'daughterInvM(1, 2)']
136 general_options.m_variables = basf2_mva.vector(*variables)
137 general_options.m_target_variable = "isSignal"
138
139 specific_options = basf2_mva.PythonOptions()
140 specific_options.m_framework = "keras"
141 specific_options.m_steering_file = 'mva/examples/keras/preprocessing.py'
142 specific_options.m_normalize = True
143 specific_options.m_training_fraction = 0.9
144
145 training_start = time.time()
146 basf2_mva.teacher(general_options, specific_options)
147 training_stop = time.time()
148 training_time = training_stop - training_start
149 method = basf2_mva_util.Method(general_options.m_identifier)
150 inference_start = time.time()
151 p, t = method.apply_expert(testing_data, general_options.m_treename)
152 inference_stop = time.time()
153 inference_time = inference_stop - inference_start
155
156 print("Tensorflow", training_time, inference_time, auc)
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)