19 from tensorflow.keras.layers
import Dense, GlobalAveragePooling1D, Input, Reshape
20 from tensorflow.keras.models
import Model
21 from tensorflow.keras.optimizers
import Adam
22 from tensorflow.keras.losses
import binary_crossentropy
23 from tensorflow.keras.activations
import sigmoid, tanh
24 from tensorflow.keras.callbacks
import Callback, EarlyStopping
27 from basf2_mva_extensions.keras_relational
import Relations
30 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
35 input = Input(shape=(number_of_features,))
38 if parameters[
'use_relations']:
39 net = Reshape((number_of_features // 6, 6))(net)
40 net = Relations(number_features=parameters[
'number_features'])(net)
42 net = GlobalAveragePooling1D()(net)
45 net = Dense(units=2 * number_of_features, activation=tanh)(net)
47 output = Dense(units=1, activation=sigmoid)(net)
49 state = State(Model(input, output), custom_objects={
'Relations': Relations})
51 state.model.compile(optimizer=Adam(lr=0.001), loss=binary_crossentropy, metrics=[
'accuracy'])
57 def begin_fit(state, Xtest, Stest, ytest, wtest, nBatches):
59 Returns just the state object
67 def partial_fit(state, X, S, y, w, epoch, batch):
71 class TestCallback(Callback):
74 Class has to inherit from abstract Callback class
77 def on_epoch_end(self, epoch, logs=None):
79 Print summary at the end of epoch.
80 For other possibilities look at the abstract Callback class.
82 loss, acc = state.model.evaluate(state.Xtest, state.ytest, verbose=0, batch_size=1000)
83 loss2, acc2 = state.model.evaluate(X[:10000], y[:10000], verbose=0, batch_size=1000)
84 print(f
'\nTesting loss: {loss}, acc: {acc}')
85 print(f
'Training loss: {loss2}, acc: {acc2}')
87 state.model.fit(X, y, batch_size=100, epochs=100, validation_data=(state.Xtest, state.ytest),
88 callbacks=[TestCallback(), EarlyStopping(monitor=
'val_loss')])
92 if __name__ ==
"__main__":
100 import basf2_mva_util
101 from basf2
import conditions
103 conditions.testing_payloads = [
104 'localdb/database.txt'
117 number_total_lines = 5
119 for i
in range(number_total_lines):
120 variables += [
'px_' + str(i),
'py_' + str(i),
'pz_' + str(i),
'dx_' + str(i),
'dy_' + str(i),
123 number_of_events = 1000000
125 def build_signal_event():
126 """Building two lines which are hitting each other"""
127 p_vec1, p_vec2 = np.random.normal(size=3), np.random.normal(size=3)
128 v_cross = np.random.normal(size=3)
129 epsilon1, epsilon2 = (np.random.rand() * 2 - 1) / 10, (np.random.rand() * 2 - 1) / 10
130 v_vec1 = v_cross + (p_vec1 * epsilon1)
131 v_vec2 = v_cross + (p_vec2 * epsilon2)
132 return np.concatenate([p_vec1, v_vec1]), np.concatenate([p_vec2, v_vec2])
135 with tempfile.TemporaryDirectory()
as path:
136 for filename
in [
'train.root',
'test.root']:
137 print(
'Building ' + filename)
139 data = np.random.normal(size=[number_of_events, number_total_lines * 6])
140 target = np.zeros([number_of_events], dtype=np.bool)
144 for index, sample
in enumerate(data):
145 if np.random.rand() > 0.5:
147 i1, i2 = int(np.random.rand() * number_total_lines), int(np.random.rand() * (number_total_lines - 1))
148 i2 = (i1 + i2) % number_total_lines
149 track1, track2 = build_signal_event()
150 data[index, i1 * 6:(i1 + 1) * 6] = track1
151 data[index, i2 * 6:(i2 + 1) * 6] = track2
155 for i, name
in enumerate(variables):
156 dic.update({name: data[:, i]})
157 dic.update({
'isSignal': target})
159 df = pandas.DataFrame(dic)
160 with uproot.recreate(os.path.join(path, filename))
as outfile:
161 outfile[
'variables'] = df
166 general_options = basf2_mva.GeneralOptions()
167 general_options.m_datafiles = basf2_mva.vector(os.path.join(path,
'train.root'))
168 general_options.m_treename =
"variables"
169 general_options.m_variables = basf2_mva.vector(*variables)
170 general_options.m_target_variable =
"isSignal"
172 specific_options = basf2_mva.PythonOptions()
173 specific_options.m_framework =
"keras"
174 specific_options.m_steering_file =
'mva/examples/keras/relational_network.py'
175 specific_options.m_training_fraction = 0.999
178 print(
'Train relational net ')
179 general_options.m_identifier = os.path.join(path,
'relation.xml')
180 specific_options.m_config = json.dumps({
'use_relations':
True,
181 'number_features': 3})
182 basf2_mva.teacher(general_options, specific_options)
185 print(
'Train feed forward net')
186 general_options.m_identifier = os.path.join(path,
'feed_forward.xml')
187 specific_options.m_config = json.dumps({
'use_relations':
False})
188 basf2_mva.teacher(general_options, specific_options)
194 test_data = basf2_mva.vector(os.path.join(path,
'test.root'))
196 print(
'Apply relational net')
197 p1, t1 = method1.apply_expert(test_data, general_options.m_treename)
198 print(
'Apply feed forward net')
199 p2, t2 = method2.apply_expert(test_data, general_options.m_treename)
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)