20from keras.layers
import Input, Dense, Dropout, BatchNormalization
21from keras.models
import Model
22from keras.optimizers
import Adam
23from keras.losses
import binary_crossentropy
24from keras.activations
import sigmoid, tanh
25from keras.callbacks
import Callback
28from basf2_mva_extensions.preprocessing
import fast_equal_frequency_binning
33def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
35 Build feed forward keras model
37 input = Input(shape=(number_of_features,))
39 net = Dense(units=number_of_features, activation=tanh)(input)
41 net = Dense(units=number_of_features, activation=tanh)(net)
42 net = BatchNormalization()(net)
44 net = Dense(units=number_of_features, activation=tanh)(net)
45 net = Dropout(rate=0.4)(net)
47 output = Dense(units=1, activation=sigmoid)(net)
51 state = State(Model(input, output), preprocessor_state=
None)
53 state.model.compile(optimizer=Adam(learning_rate=0.01), loss=binary_crossentropy, metrics=[
'accuracy'])
60def begin_fit(state, Xtest, Stest, ytest, wtest, nBatches):
62 Returns just the state object
71def partial_fit(state, X, S, y, w, epoch, batch):
73 Pass received data to tensorflow session
76 preprocessor = fast_equal_frequency_binning()
78 X = preprocessor.apply(X)
79 state.Xtest = preprocessor.apply(state.Xtest)
82 state.preprocessor_state = preprocessor.export_state()
84 class TestCallback(Callback):
85 def on_epoch_end(self, epoch, logs=None):
86 loss, acc = state.model.evaluate(state.Xtest, state.ytest, verbose=0, batch_size=1000)
87 loss2, acc2 = state.model.evaluate(X[:10000], y[:10000], verbose=0, batch_size=1000)
88 print(f
'\nTesting loss: {loss}, acc: {acc}')
89 print(f
'Training loss: {loss2}, acc: {acc2}')
91 state.model.fit(X, y, batch_size=500, epochs=10, callbacks=[TestCallback()])
97 Apply estimator to passed data.
98 Has to be overwritten, because also the expert has to apply preprocessing.
101 preprocessor = fast_equal_frequency_binning(state.preprocessor_state)
103 X = preprocessor.apply(X)
105 r = state.model.predict(X).flatten()
106 return np.require(r, dtype=np.float32, requirements=[
'A',
'W',
'C',
'O'])
109if __name__ ==
"__main__":
110 from basf2
import conditions, find_file
112 conditions.testing_payloads = [
113 'localdb/database.txt'
116 train_file = find_file(
"mva/train_D0toKpipi.root",
"examples")
117 test_file = find_file(
"mva/test_D0toKpipi.root",
"examples")
119 training_data = basf2_mva.vector(train_file)
120 testing_data = basf2_mva.vector(test_file)
122 general_options = basf2_mva.GeneralOptions()
123 general_options.m_datafiles = training_data
124 general_options.m_identifier =
"preprocessed_deep_keras"
125 general_options.m_treename =
"tree"
126 variables = [
'M',
'p',
'pt',
'pz',
127 'daughter(0, p)',
'daughter(0, pz)',
'daughter(0, pt)',
128 'daughter(1, p)',
'daughter(1, pz)',
'daughter(1, pt)',
129 'daughter(2, p)',
'daughter(2, pz)',
'daughter(2, pt)',
130 'chiProb',
'dr',
'dz',
131 'daughter(0, dr)',
'daughter(1, dr)',
132 'daughter(0, dz)',
'daughter(1, dz)',
133 'daughter(0, chiProb)',
'daughter(1, chiProb)',
'daughter(2, chiProb)',
134 'daughter(0, kaonID)',
'daughter(0, pionID)',
135 'daughterInvM(0, 1)',
'daughterInvM(0, 2)',
'daughterInvM(1, 2)']
136 general_options.m_variables = basf2_mva.vector(*variables)
137 general_options.m_target_variable =
"isSignal"
139 specific_options = basf2_mva.PythonOptions()
140 specific_options.m_framework =
"keras"
141 specific_options.m_steering_file =
'mva/examples/keras/preprocessing.py'
142 specific_options.m_normalize =
True
143 specific_options.m_training_fraction = 0.9
145 training_start = time.time()
146 basf2_mva.teacher(general_options, specific_options)
147 training_stop = time.time()
148 training_time = training_stop - training_start
150 inference_start = time.time()
151 p, t = method.apply_expert(testing_data, general_options.m_treename)
152 inference_stop = time.time()
153 inference_time = inference_stop - inference_start
156 print(
"Tensorflow", training_time, inference_time, auc)
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)