20 from tensorflow.keras.layers
import Input, Dense, Dropout, BatchNormalization
21 from tensorflow.keras.models
import Model
22 from tensorflow.keras.optimizers
import Adam
23 from tensorflow.keras.losses
import binary_crossentropy
24 from tensorflow.keras.activations
import sigmoid, tanh
25 from tensorflow.keras.callbacks
import Callback
28 from basf2_mva_extensions.preprocessing
import fast_equal_frequency_binning
30 old_time = time.time()
33 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
35 Build feed forward keras model
37 input = Input(shape=(number_of_features,))
39 net = Dense(units=number_of_features, activation=tanh)(input)
41 net = Dense(units=number_of_features, activation=tanh)(net)
42 net = BatchNormalization()(net)
44 net = Dense(units=number_of_features, activation=tanh)(net)
45 net = Dropout(rate=0.4)(net)
47 output = Dense(units=1, activation=sigmoid)(net)
51 state = State(Model(input, output), preprocessor_state=
None)
53 state.model.compile(optimizer=Adam(lr=0.01), loss=binary_crossentropy, metrics=[
'accuracy'])
60 def begin_fit(state, Xtest, Stest, ytest, wtest, nBatches):
62 Returns just the state object
71 def partial_fit(state, X, S, y, w, epoch, batch):
73 Pass received data to tensorflow session
76 preprocessor = fast_equal_frequency_binning()
78 X = preprocessor.apply(X)
79 state.Xtest = preprocessor.apply(state.Xtest)
82 state.preprocessor_state = preprocessor.export_state()
84 class TestCallback(Callback):
85 def on_epoch_end(self, epoch, logs=None):
86 loss, acc = state.model.evaluate(state.Xtest, state.ytest, verbose=0, batch_size=1000)
87 loss2, acc2 = state.model.evaluate(X[:10000], y[:10000], verbose=0, batch_size=1000)
88 print(f
'\nTesting loss: {loss}, acc: {acc}')
89 print(f
'Training loss: {loss2}, acc: {acc2}')
91 state.model.fit(X, y, batch_size=500, epochs=10, callbacks=[TestCallback()])
97 Apply estimator to passed data.
98 Has to be overwritten, because also the expert has to apply preprocessing.
101 preprocessor = fast_equal_frequency_binning(state.preprocessor_state)
103 X = preprocessor.apply(X)
105 r = state.model.predict(X).flatten()
106 return np.require(r, dtype=np.float32, requirements=[
'A',
'W',
'C',
'O'])
109 if __name__ ==
"__main__":
110 from basf2
import conditions, find_file
112 conditions.testing_payloads = [
113 'localdb/database.txt'
116 train_file = find_file(
"mva/train_D0toKpipi.root",
"examples")
117 test_file = find_file(
"mva/test_D0toKpipi.root",
"examples")
119 training_data = basf2_mva.vector(train_file)
120 testing_data = basf2_mva.vector(test_file)
122 general_options = basf2_mva.GeneralOptions()
123 general_options.m_datafiles = training_data
124 general_options.m_identifier =
"preprocessed_deep_keras"
125 general_options.m_treename =
"tree"
126 variables = [
'M',
'p',
'pt',
'pz',
127 'daughter(0, p)',
'daughter(0, pz)',
'daughter(0, pt)',
128 'daughter(1, p)',
'daughter(1, pz)',
'daughter(1, pt)',
129 'daughter(2, p)',
'daughter(2, pz)',
'daughter(2, pt)',
130 'chiProb',
'dr',
'dz',
131 'daughter(0, dr)',
'daughter(1, dr)',
132 'daughter(0, dz)',
'daughter(1, dz)',
133 'daughter(0, chiProb)',
'daughter(1, chiProb)',
'daughter(2, chiProb)',
134 'daughter(0, kaonID)',
'daughter(0, pionID)',
135 'daughterInvM(0, 1)',
'daughterInvM(0, 2)',
'daughterInvM(1, 2)']
136 general_options.m_variables = basf2_mva.vector(*variables)
137 general_options.m_target_variable =
"isSignal"
139 specific_options = basf2_mva.PythonOptions()
140 specific_options.m_framework =
"keras"
141 specific_options.m_steering_file =
'mva/examples/keras/preprocessing.py'
142 specific_options.m_normalize =
True
143 specific_options.m_training_fraction = 0.9
145 training_start = time.time()
146 basf2_mva.teacher(general_options, specific_options)
147 training_stop = time.time()
148 training_time = training_stop - training_start
150 inference_start = time.time()
151 p, t = method.apply_expert(testing_data, general_options.m_treename)
152 inference_stop = time.time()
153 inference_time = inference_stop - inference_start
156 print(
"Tensorflow", training_time, inference_time, auc)
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)