Belle II Software development
tensorflow.py
1#!/usr/bin/env python3
2
3
10
11import numpy as np
12import os
13import tempfile
14
15import tensorflow as tf
16tf.config.threading.set_intra_op_parallelism_threads(1)
17tf.config.threading.set_inter_op_parallelism_threads(1)
18
19
20class State:
21 """
22 Tensorflow state
23 """
24
25 def __init__(self, model=None, **kwargs):
26 """ Constructor of the state object """
27
28
29 self.model = model
30
31
32def feature_importance(state):
33 """
34 Return a list containing the feature importances
35 """
36 return []
37
38
39def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
40 """
41 Return default tensorflow model
42 """
43 gpus = tf.config.list_physical_devices('GPU')
44 if gpus:
45 for gpu in gpus:
46 tf.config.experimental.set_memory_growth(gpu, True)
47
48 class my_model(tf.Module):
49
50 def __init__(self, **kwargs):
51
52 super().__init__(**kwargs)
53
54 self.W = tf.Variable(tf.ones(shape=(number_of_features, 1)), name="W")
55 self.b = tf.Variable(tf.ones(shape=(1, 1)), name="b")
56
57 self.optimizer = tf.optimizers.SGD(0.01)
58
59 @tf.function(input_signature=[tf.TensorSpec(shape=[None, number_of_features], dtype=tf.float32)])
60 def __call__(self, x):
61 return tf.nn.sigmoid(tf.matmul(self.clean_nans(x), self.W) + self.b)
62
63 def clean_nans(self, x):
64 return tf.where(tf.math.is_nan(x), tf.zeros_like(x), x)
65
66 def loss(self, predicted_y, target_y, w):
67 # cross entropy
68 epsilon = 1e-5
69 diff_from_truth = tf.where(target_y == 1., predicted_y, 1. - predicted_y)
70 return - tf.reduce_sum(w * tf.math.log(diff_from_truth + epsilon)) / tf.reduce_sum(w)
71
72 state = State(model=my_model())
73 return state
74
75
76def load(obj):
77 """
78 Load Tensorflow estimator into state
79 """
80 gpus = tf.config.list_physical_devices('GPU')
81 if gpus:
82 for gpu in gpus:
83 tf.config.experimental.set_memory_growth(gpu, True)
84
85 with tempfile.TemporaryDirectory() as path:
86
87 # recreate the expected folder structure
88 for subfolder in ['variables', 'assets']:
89 os.makedirs(os.path.join(path, subfolder))
90
91 file_names = obj[0]
92 for file_index, file_name in enumerate(file_names):
93 with open(f'{path}/{file_name}', 'w+b') as file:
94 file.write(bytes(obj[1][file_index]))
95
96 model = tf.saved_model.load(path)
97
98 state = State(model=model)
99 return state
100
101
102def apply(state, X):
103 """
104 Apply estimator to passed data.
105 """
106 r = state.model(tf.convert_to_tensor(np.atleast_2d(X), dtype=tf.float32)).numpy()
107 if r.shape[1] == 1:
108 r = r[:, 0] # cannot use squeeze because we might have output of shape [1,X classes]
109 return np.require(r, dtype=np.float32, requirements=['A', 'W', 'C', 'O'])
110
111
112def begin_fit(state, Xtest, Stest, ytest, wtest, nBatches):
113 """
114 Returns just the state object
115 """
116 state.nBatches = nBatches
117 return state
118
119
120def partial_fit(state, X, S, y, w, epoch, batch):
121 """
122 Pass batches of received data to tensorflow
123 """
124 with tf.GradientTape() as tape:
125 avg_cost = state.model.loss(state.model(X), y, w)
126 grads = tape.gradient(avg_cost, state.model.trainable_variables)
127
128 state.model.optimizer.apply_gradients(zip(grads, state.model.trainable_variables))
129
130 if batch == 0 and epoch == 0:
131 state.avg_costs = [avg_cost]
132 elif batch != state.nBatches-1:
133 state.avg_costs.append(avg_cost)
134 else:
135 # end of the epoch, print summary results, reset the avg_costs and update the counter
136 print(f"Epoch: {epoch:04d} cost= {np.mean(state.avg_costs):.9f}")
137 state.avg_costs = [avg_cost]
138
139 if epoch == 100000:
140 return False
141 return True
142
143
144def end_fit(state):
145 """
146 Store tensorflow model in a graph
147 """
148 with tempfile.TemporaryDirectory() as path:
149
150 tf.saved_model.save(state.model, path)
151 # tf.saved_model.save creates:
152 # path/saved_model.pb
153 # path/variables/variables.index
154 # path/variables/variables.data-00000-of-00001
155 # path/assets/* - This contains additional assets stored in the model.
156
157 file_names = ['saved_model.pb',
158 'variables/variables.index',
159 'variables/variables.data-00000-of-00001']
160
161 # we dont know what, if anything, is saved in assets/
162 assets_path = os.path.join(path, 'assets/')
163 file_names.extend([f'assets/{f.name}' for f in os.scandir(assets_path) if os.path.isfile(os.path.join(assets_path, f))])
164
165 files = []
166 for file_name in file_names:
167 with open(os.path.join(path, file_name), 'rb') as file:
168 files.append(file.read())
169 del state
170 return [file_names, files]
model
tensorflow model inheriting from tf.Module
Definition: tensorflow.py:29
def __init__(self, model=None, **kwargs)
Definition: tensorflow.py:25