Belle II Software  release-05-01-25
relations.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 # Dennis Weyland 2017
5 
6 # This example serves as a basic example of implementing Relational networks into basf2 with tensorflow.
7 # As a toy example it will try to tell if 2 out of multiple lines are hitting each other in three dimensional space.
8 # Relevant Paper: https://arxiv.org/abs/1706.01427
9 
10 import tensorflow as tf
12 import numpy as np
13 
14 
16  """ Using class to stop training early if it's not getting better"""
17 
18  def __init__(self):
19  """ init class """
20 
21  self.counter = 0
22 
23  self.best_result = np.inf
24 
25  def check(self, cost):
26  """
27  Check if validation result is better than the best validation result.
28  Decide if training should be continued.
29  """
30  if cost < self.best_result:
31  self.counter = 0
32  self.best_result = cost
33  else:
34  self.counter += 1
35  if self.counter >= 20:
36  return False
37  return True
38 
39 EARLY_STOPPER = early_stopping()
40 
41 
42 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
43  """Building Graph inside tensorflow"""
44  tf.reset_default_graph()
45  x = tf.placeholder(tf.float32, [None, number_of_features])
46  y = tf.placeholder(tf.float32, [None, 1])
47  # Used as input for pre training data set.
48  z = tf.placeholder(tf.float32, [None, number_of_spectators])
49 
50  def layer(x, shape, name, unit=tf.sigmoid):
51  """Build one hidden layer in feed forward net"""
52  with tf.name_scope(name) as scope:
53  weights = tf.Variable(tf.truncated_normal(shape, stddev=1.0 / np.sqrt(float(shape[0]))), name='weights')
54  biases = tf.Variable(tf.constant(0.0, shape=[shape[1]]), name='biases')
55  layer = unit(tf.matmul(x, weights) + biases)
56  return layer
57 
58  def build_relation_net_variables(shape, name):
59  """Build the variables(not the net itself), who will be shared between multiple relations"""
60  variables = []
61  with tf.name_scope(name), tf.variable_scope(name):
62  for i in range(len(shape) - 1):
63  weights = tf.get_variable('weights_{}'.format(i),
64  initializer=tf.truncated_normal(shape[i:i + 2],
65  stddev=1.0 / np.sqrt(float(shape[0]))))
66  biases = tf.get_variable('biases_{}'.format(i), initializer=tf.constant(0.0, shape=[shape[i + 1]]))
67  variables.append([weights, biases])
68  return variables
69 
70  def relation_net(x, variables):
71  """Build one relation net between 2 object using pre-build variables"""
72  net = [x]
73  for layer in variables:
74  if len(variables) != len(net):
75  net.append(tf.nn.tanh(tf.matmul(net[-1], layer[0]) + layer[1]))
76  else:
77  return tf.nn.sigmoid(tf.matmul(net[-1], layer[0]) + layer[1])
78 
79  if parameters['use_relations']:
80  # Group input according to relations.
81  tracks = []
82  [tracks.append(tf.slice(x, [0, i * 6], [-1, 6])) for i in range(int(number_of_features / 6))]
83 
84  # Number of features per reation. Each feature is a net with shared variables across all combinations.
85  # Number of Features is also the number of different set of variables for relational nets.
86  number_of_features_per_relation = 1
87  relations = []
88  pre_training_relations = []
89  for feature_number in range(number_of_features_per_relation):
90  # Build the variables, which will be shared across all combinations
91  relational_variables = build_relation_net_variables([12, 50, 50, 1],
92  'tracks_relational_{}'.format(feature_number))
93  # Loop over everx combination of input groups.
94  for counter, track1 in enumerate(tracks):
95  for track2 in tracks[counter + 1:]:
96  # Build the net wit pre-build variables.
97  relations.append(relation_net(tf.concat([track1, track2], 1), relational_variables))
98 
99  if parameters['pre_training_epochs'] > 0:
100  # build net for pre-training with the same shared variables.
101  pre_training_relations.append(relation_net(z, relational_variables))
102 
103  new_x = tf.concat(relations, 1)
104 
105  else:
106  new_x = x
107 
108  if parameters['use_feed_forward']:
109  print('Number of variables going into feed_forward:', int(new_x.get_shape()[1]))
110  inference_hidden1 = layer(new_x, [int(new_x.get_shape()[1]), 50], 'inference_hidden1')
111  inference_hidden2 = layer(inference_hidden1, [50, 50], 'inference_hidden2')
112  inference_activation = layer(inference_hidden2, [50, 1], 'inference_sigmoid', unit=tf.sigmoid)
113  else:
114  print('Number of variables going into reduce_max:', int(new_x.get_shape()[1]))
115  inference_activation = layer(new_x, [int(new_x.get_shape()[1]), 1], 'inference_sigmoid', unit=tf.sigmoid)
116  print(inference_activation.get_shape())
117 
118  epsilon = 1e-5
119  inference_loss = -tf.reduce_sum(y * tf.log(inference_activation + epsilon) +
120  (1.0 - y) * tf.log(1 - inference_activation + epsilon))
121 
122  inference_optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
123  inference_minimize = inference_optimizer.minimize(inference_loss)
124 
125  config = tf.ConfigProto()
126  config.gpu_options.allow_growth = True
127  session = tf.Session(config=config)
128 
129  state = State(x, y, inference_activation, inference_loss, inference_minimize, session)
130 
131  if parameters['pre_training_epochs'] > 0:
132  # define training ops for pre-training and save them into state
133  new_z = tf.concat(pre_training_relations, 1)
134  pre_activation = layer(new_z, [int(new_z.get_shape()[1]), 1], 'pre_output')
135  state.pre_loss = -tf.reduce_sum(y * tf.log(pre_activation + epsilon) +
136  (1.0 - y) * tf.log(1 - pre_activation + epsilon))
137  pre_optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
138  state.pre_minimize = pre_optimizer.minimize(state.pre_loss)
139 
140  state.pre_training_epochs = parameters['pre_training_epochs']
141  state.z = z
142  init = tf.global_variables_initializer()
143  session.run(init)
144 
145  return state
146 
147 
148 def begin_fit(state, Xtest, Stest, ytest, wtest):
149  """Saves the training validation set for monitoring."""
150  state.val_x = Xtest
151  state.val_y = ytest
152  state.val_z = Stest
153 
154  return state
155 
156 
157 def partial_fit(state, X, S, y, w, epoch):
158  """Pass received data to tensorflow session"""
159  feed_dict = {state.x: X, state.y: y, state.z: S}
160 
161  # pre training trains shared variables on only 2 lines.
162  # In this case there is no relation net which have to compare two lines not hitting each other in a signal event.
163  if state.pre_training_epochs > epoch:
164  state.session.run(state.pre_minimize, feed_dict=feed_dict)
165  if epoch % 1000 == 0:
166  avg_cost = state.session.run(state.pre_loss, feed_dict={state.y: state.val_y, state.z: state.val_z})
167  print("Pre-Training: Epoch:", '%04d' % (epoch), "cost=", "{:.9f}".format(avg_cost))
168  return True
169 
170  # Training of the whole network.
171  else:
172  state.session.run(state.optimizer, feed_dict=feed_dict)
173  if epoch % 1000 == 0:
174  avg_cost = state.session.run(state.cost, feed_dict={state.y: state.val_y, state.x: state.val_x})
175  print("Epoch:", '%04d' % (epoch), "cost=", "{:.9f}".format(avg_cost))
176  return EARLY_STOPPER.check(avg_cost)
177 
178  return True
179 
180 
181 if __name__ == "__main__":
182  import os
183  import pandas
184  from root_pandas import to_root
185  import tempfile
186  import json
187 
188  import basf2
189  import basf2_mva
190  import basf2_mva_util
191  # ##############Building Data samples ###########################
192  # This is a dataset for testing relational nets.
193  # It consists of number_total_lines lines in 3 dimensional space.
194  # Each line has 6 variables.
195  # In apprx. half of the cases, two lines are hitting each other.
196  # This is considered a signal event.
197  # Training results differs from the number of total lines.
198 
199  variables = []
200  # try using 10 lines and see what happens
201  number_total_lines = 5
202  # Names for the training data set
203  for i in range(number_total_lines):
204  variables += ['px_' + str(i), 'py_' + str(i), 'pz_' + str(i), 'dx_' + str(i), 'dy_' + str(i),
205  'dz_' + str(i)]
206  # Names for the spectator variables.
207  # Used as input variables for pre-training.
208  spectators = ['Spx1', 'Spy1', 'Spz1', 'Sdx1', 'Sdy1', 'Sdz1', 'Spx2', 'Spy2', 'Spz2', 'Sdx2', 'Sdy2', 'Sdz2']
209  # Number of events in training and test root file.
210  number_of_events = 1000000
211 
212  def build_signal_event():
213  """Building two lines which are hitting each other"""
214  p_vec1, p_vec2 = np.random.normal(size=3), np.random.normal(size=3)
215  v_cross = np.random.normal(size=3)
216  epsilon1, epsilon2 = np.random.rand() * 2 - 1, np.random.rand() * 2 - 1
217  v_vec1 = v_cross + (p_vec1 * epsilon1)
218  v_vec2 = v_cross + (p_vec2 * epsilon2)
219  return np.concatenate([p_vec1, v_vec1]), np.concatenate([p_vec2, v_vec2])
220 
221  # This path will delete itself with all data in it after end of program.
222  with tempfile.TemporaryDirectory() as path:
223  for filename in ['train.root', 'test.root']:
224  print('Building ' + filename)
225  # Use random numbers to build all training and spectator variables.
226  data = np.random.normal(size=[number_of_events, number_total_lines * 6 + 12])
227  target = np.zeros([number_of_events], dtype=bool)
228 
229  # Overwrite for half of the variables some lines so that they are hitting each other.
230  # Write them also at the end for the spectators.
231  for index, sample in enumerate(data):
232  if np.random.rand() > 0.5:
233  target[index] = True
234  i1, i2 = int(np.random.rand() * number_total_lines), int(np.random.rand() * (number_total_lines - 1))
235  i2 = (i1 + i2) % number_total_lines
236  track1, track2 = build_signal_event()
237  data[index, i1 * 6:(i1 + 1) * 6] = track1
238  data[index, i2 * 6:(i2 + 1) * 6] = track2
239  data[index, number_total_lines * 6:] = np.append(track1, track2)
240 
241  # Saving all variables in root files
242  dic = {}
243  for i, name in enumerate(variables + spectators):
244  dic.update({name: data[:, i]})
245  dic.update({'isSignal': target})
246 
247  df = pandas.DataFrame(dic, dtype=np.float32)
248  to_root(df, os.path.join(path, filename), tree_key='variables')
249 
250  # ##########################Do Training#################################
251  # Do a comparison of different Nets for this task.
252 
253  general_options = basf2_mva.GeneralOptions()
254  general_options.m_datafiles = basf2_mva.vector(os.path.join(path, 'train.root'))
255  general_options.m_treename = "variables"
256  general_options.m_variables = basf2_mva.vector(*variables)
257  general_options.m_target_variable = "isSignal"
258  general_options.m_spectators = basf2_mva.vector(*spectators)
259 
260  specific_options = basf2_mva.PythonOptions()
261  specific_options.m_framework = "tensorflow"
262  specific_options.m_steering_file = 'toy_relations.py'
263  specific_options.m_nIterations = 100
264  specific_options.m_mini_batch_size = 100
265  specific_options.m_training_fraction = 0.999
266 
267  print('Train relational net with pre-training')
268  general_options.m_identifier = os.path.join(path, 'relation_2.xml')
269  specific_options.m_config = json.dumps({'use_relations': True, 'use_feed_forward': False, 'pre_training_epochs': 30000})
270  basf2_mva.teacher(general_options, specific_options)
271 
272  # Train normal feed forward Net:
273  print('Train feed forward net')
274  general_options.m_identifier = os.path.join(path, 'feed_forward.xml')
275  specific_options.m_config = json.dumps({'use_relations': False, 'use_feed_forward': True, 'pre_training_epochs': 0})
276  basf2_mva.teacher(general_options, specific_options)
277 
278  # Train Relation Net:
279  print('Train relational net')
280  general_options.m_identifier = os.path.join(path, 'relation.xml')
281  specific_options.m_config = json.dumps({'use_relations': True, 'use_feed_forward': True, 'pre_training_epochs': 0})
282  basf2_mva.teacher(general_options, specific_options)
283 
284  # ########################Compare Results####################################
285 
286  method1 = basf2_mva_util.Method(os.path.join(path, 'feed_forward.xml'))
287  method2 = basf2_mva_util.Method(os.path.join(path, 'relation.xml'))
288  method3 = basf2_mva_util.Method(os.path.join(path, 'relation_2.xml'))
289 
290  test_data = basf2_mva.vector(os.path.join(path, 'test.root'))
291  print('Apply feed forward net')
292  p1, t1 = method1.apply_expert(test_data, general_options.m_treename)
293  print('Apply relational net')
294  p2, t2 = method2.apply_expert(test_data, general_options.m_treename)
295  print('Aplly special relational net')
296  p3, t3 = method3.apply_expert(test_data, general_options.m_treename)
297 
298  print('Feed Forward Net AUC: ', basf2_mva_util.calculate_roc_auc(p1, t1))
299  print('Relational Net AUC: ', basf2_mva_util.calculate_roc_auc(p2, t2))
300  print('Relational Net with pre-training AUC: ', basf2_mva_util.calculate_roc_auc(p3, t3))
basf2_mva_util.calculate_roc_auc
def calculate_roc_auc(p, t)
Definition: basf2_mva_util.py:39
relations.early_stopping.__init__
def __init__(self)
Definition: relations.py:18
basf2_mva_util.Method
Definition: basf2_mva_util.py:81
relations.early_stopping
Definition: relations.py:15
relations.early_stopping.best_result
best_result
saves best training result
Definition: relations.py:23
relations.early_stopping.check
def check(self, cost)
Definition: relations.py:25
basf2_mva_python_interface.tensorflow
Definition: tensorflow.py:1
basf2_mva_python_interface.tensorflow.State
Definition: tensorflow.py:12
relations.early_stopping.counter
counter
counts how many times training is not getting better
Definition: relations.py:21