12 import tensorflow
as tf
21 def execute_train_op(state):
23 Do the actual training multithreaded.
28 while (
not COORD.should_stop()):
29 X, w, y = state.queue.dequeue_many(state.dequeue_batch_size)
31 with tf.GradientTape()
as tape:
32 avg_cost = state.model.loss(state.model(X), y, w)
33 grads = tape.gradient(avg_cost, state.model.trainable_variables)
35 state.model.optimizer.apply_gradients(zip(grads, state.model.trainable_variables))
37 if (epoch % 100 == 0):
38 print(
'Step %d: Train cost = %.4f' % (epoch, avg_cost))
41 except tf.errors.OutOfRangeError:
42 print(
'No more items in closed queue, end of training.')
46 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
48 Build tensorflow graph, handles parameter and initialise variables
51 param = {
'capacity': 1e6,
'min_after_dequeue': 800,
'batch_size': 500}
53 if isinstance(parameters, dict):
54 param.update(parameters)
56 gpus = tf.config.list_physical_devices(
'GPU')
59 tf.config.experimental.set_memory_growth(gpu,
True)
61 class my_model(tf.Module):
63 def __init__(self, **kwargs):
64 super().__init__(**kwargs)
66 self.optimizer = tf.optimizers.Adam(0.01)
68 def create_layer_variables(shape, name, activation_function):
69 weights = tf.Variable(
70 tf.random.truncated_normal(shape, stddev=1.0 / np.sqrt(float(shape[0]))),
71 name=f
'{name}_weights')
72 biases = tf.Variable(tf.zeros(shape=[shape[1]]), name=f
'{name}_biases')
73 return weights, biases, activation_function
76 self.layer_variables = []
78 shape = [number_of_features, number_of_features]
79 for i
in range(self.n_layers - 1):
80 self.layer_variables.append(create_layer_variables(shape, f
'inference_hidden{i}', tf.nn.relu))
81 self.layer_variables.append(create_layer_variables([number_of_features, 1],
'inference_sigmoid', tf.nn.sigmoid))
83 @tf.function(input_signature=[tf.TensorSpec(shape=[None, number_of_features], dtype=tf.float32)])
84 def __call__(self, x):
86 def dense(x, W, b, activation_function):
87 return activation_function(tf.matmul(x, W) + b)
89 for i
in range(self.n_layers):
90 x = dense(x, *self.layer_variables[i])
94 def loss(self, predicted_y, target_y, w):
96 diff_from_truth = tf.where(target_y == 1., predicted_y, 1. - predicted_y)
97 cross_entropy = - tf.reduce_sum(w * tf.math.log(diff_from_truth + epsilon)) / tf.reduce_sum(w)
100 state = State(model=my_model())
103 state.queue = tf.queue.RandomShuffleQueue(int(param[
'capacity']), int(param[
'min_after_dequeue']),
104 [tf.float32, tf.float32, tf.float32],
105 shapes=[[number_of_features], [1], [1]])
107 state.dequeue_batch_size = int(param[
'batch_size'])
111 COORD = tf.train.Coordinator()
112 state.threads = [threading.Thread(target=execute_train_op, args=(state,))
for i
in range(2)]
113 COORD.join(state.threads)
117 def begin_fit(state, Xtest, Stest, ytest, wtest, nBatches):
119 Starting training op async
121 for t
in state.threads:
126 def partial_fit(state, X, S, y, w, epoch, batch):
128 Put data in the queue.
130 state.queue.enqueue_many([X, w, y])
131 print(
"Queue Epoch: %d, Queue Batch: %d, Queue Size: %d" % (epoch, batch, state.queue.size()))
137 Store tensorflow model in a graph
140 import tensorflow
as tf
142 print(
"Please install tensorflow: pip3 install tensorflow")
148 with tempfile.TemporaryDirectory()
as path:
150 tf.saved_model.save(state.model, path)
157 file_names = [
'saved_model.pb',
158 'variables/variables.index',
159 'variables/variables.data-00000-of-00001']
162 assets_path = os.path.join(path,
'assets/')
163 file_names.extend([f
'assets/{f.name}' for f
in os.scandir(assets_path)
if os.path.isfile(os.path.join(assets_path, f))])
166 for file_name
in file_names:
167 with open(os.path.join(path, file_name),
'rb')
as file:
168 files.append(file.read())
170 return [file_names, files]
173 if __name__ ==
"__main__":
174 from basf2
import conditions
176 conditions.testing_payloads = [
177 'localdb/database.txt'
183 general_options = basf2_mva.GeneralOptions()
184 general_options.m_datafiles = basf2_mva.vector(
"train.root")
185 general_options.m_treename =
"tree"
186 variables = [
'p',
'pt',
'pz',
187 'daughter(0, p)',
'daughter(0, pz)',
'daughter(0, pt)',
188 'daughter(1, p)',
'daughter(1, pz)',
'daughter(1, pt)',
189 'daughter(2, p)',
'daughter(2, pz)',
'daughter(2, pt)',
190 'chiProb',
'dr',
'dz',
191 'daughter(0, dr)',
'daughter(1, dr)',
192 'daughter(0, dz)',
'daughter(1, dz)',
193 'daughter(0, chiProb)',
'daughter(1, chiProb)',
'daughter(2, chiProb)',
194 'daughter(0, kaonID)',
'daughter(0, pionID)',
195 'daughterInvM(0, 1)',
'daughterInvM(0, 2)',
'daughterInvM(1, 2)']
197 general_options.m_variables = basf2_mva.vector(*variables)
198 general_options.m_spectators = basf2_mva.vector(
'M')
199 general_options.m_target_variable =
"isSignal"
201 specific_options = basf2_mva.PythonOptions()
202 specific_options.m_framework =
"tensorflow"
203 specific_options.m_steering_file =
'mva/examples/tensorflow/multithreaded.py'
204 specific_options.m_nIterations = 100
205 specific_options.m_mini_batch_size = 0
207 general_options.m_identifier =
"tensorflow_multithreaded"
208 specific_options.m_config = json.dumps({
'capacity': 2e3,
'min_after_dequeue': 500,
'batch_size': 500})
211 import basf2_mva_util
212 training_start = time.time()
213 basf2_mva.teacher(general_options, specific_options)
214 training_stop = time.time()
215 training_time = training_stop - training_start
217 inference_start = time.time()
218 test_data = [
"test.root"] * 10
219 p, t = method.apply_expert(basf2_mva.vector(*test_data), general_options.m_treename)
220 inference_stop = time.time()
221 inference_time = inference_stop - inference_start
223 print(
"Tensorflow", training_time, inference_time, auc)
def calculate_roc_auc(p, t)