19 import tensorflow
as tf
27 from dft
import binning
29 from dft
import tensorflow_dnn_model
as tfm
30 from dft.TfData import TfDataBasf2, TfDataBasf2Stub
33 def get_tensorflow_model(number_of_features, parameters):
35 generates the tensorflow model
36 :param number_of_features: int, number of features is handled separately
41 layers = parameters.get(
'layers',
None)
42 wd_coeffs = parameters.get(
'wd_coeffs', [])
44 lr_dec_rate = parameters.get(
'lr_dec_rate', 1 / (1 + 2e-7)**1.2e5)
45 lr_init = parameters.get(
'lr_init', .05)
46 mom_init = parameters.get(
'mom_init', .9)
47 min_epochs = parameters.get(
'min_epochs', 300)
48 max_epochs = parameters.get(
'max_epochs', 400)
49 stop_epochs = parameters.get(
'stop_epochs', 10)
52 layers = [[
'h0',
'tanh', number_of_features, 300, .0001, 1.0 / np.sqrt(300)],
53 [
'h1',
'tanh', 300, 300, .0001, 1.0 / np.sqrt(300)],
54 [
'h2',
'tanh', 300, 300, .0001, 1.0 / np.sqrt(300)],
55 [
'h3',
'tanh', 300, 300, .0001, 1.0 / np.sqrt(300)],
56 [
'h4',
'tanh', 300, 300, .0001, 1.0 / np.sqrt(300)],
57 [
'h5',
'tanh', 300, 300, .0001, 1.0 / np.sqrt(300)],
58 [
'h6',
'tanh', 300, 300, .0001, 1.0 / np.sqrt(300)],
59 [
'h7',
'tanh', 300, 300, .0001, 1.0 / np.sqrt(300)],
60 [
'y',
'sigmoid', 300, 1, .0001, 0.002 * 1.0 / np.sqrt(300)]]
62 layers[0][2] = number_of_features
65 if wd_coeffs
is not None and not wd_coeffs:
66 wd_coeffs = [2e-5
for _
in layers]
68 mlp = tfm.MultilayerPerceptron.from_list(layers)
69 model = tfm.DefaultModel(mlp, lr_dec_rate=lr_dec_rate, lr_init=lr_init, mom_init=mom_init, wd_coeffs=wd_coeffs,
70 min_epochs=min_epochs, max_epochs=max_epochs, stop_epochs=stop_epochs)
75 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
77 specifies the and configures the tensorflow model
78 :param number_of_features:
79 :param number_of_spectators:
80 :param number_of_events:
81 :param training_fraction:
82 :param parameters: as dictionary encoded json object
87 if parameters
is None:
90 if not isinstance(parameters, dict):
91 raise TypeError(
'parameters must be a dictionary')
93 cuda_mask = parameters.get(
'cuda_visible_devices',
'3')
94 tensorboard_dir = parameters.get(
'tensorboard_dir',
None)
96 batch_size = parameters.get(
'batch_size', 100)
97 seed = parameters.get(
'seed',
None)
100 transform_to_probability = parameters.get(
'transform_to_probability',
False)
103 tf.reset_default_graph()
104 gpu_options = tf.GPUOptions(allow_growth=
True)
108 print(
'Seed: ', seed)
109 tf.set_random_seed(seed)
112 os.environ[
'CUDA_VISIBLE_DEVICES'] = cuda_mask
113 session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
116 x = tf.placeholder(tf.float32, [
None, number_of_features])
117 y = tf.placeholder(tf.float32, [
None, 1])
120 stub_data_set = TfDataBasf2Stub(batch_size, number_of_features, number_of_events, training_fraction)
123 save_dir = tempfile.TemporaryDirectory()
124 save_name = os.path.join(save_dir.name,
'mymodel')
126 model = get_tensorflow_model(number_of_features, parameters)
127 training = tfm.Trainer(model, stub_data_set, session, tensorboard_dir, save_name, input_placeholders=[x, y])
129 state = State(x, y, session=session)
132 state.training = training
133 state.batch_size = batch_size
134 state.save_dir = save_dir
136 state.transform_to_probability = transform_to_probability
139 saved_parameters = parameters.copy()
140 saved_parameters[
'number_of_features'] = number_of_features
141 state.parameters = json.dumps(saved_parameters)
148 modified apply function
151 binning.transform_ndarray(X, state.binning_parameters)
153 if len(X) > chunk_size:
155 for i
in range(0, len(X), chunk_size):
156 results.append(state.session.run(state.activation, feed_dict={state.x: X[i: i + chunk_size]}))
157 r = np.concatenate(results).flatten()
159 r = state.session.run(state.activation, feed_dict={state.x: X}).flatten()
160 if state.transform_to_probability:
161 binning.transform_array_to_sf(r, state.sig_back_tuple, signal_fraction=.5)
163 return np.require(r, dtype=np.float32, requirements=[
'A',
'W',
'C',
'O'])
168 Load Tensorflow estimator into state
171 tf.reset_default_graph()
172 config = tf.ConfigProto()
173 config.gpu_options.allow_growth =
True
174 session = tf.Session(config=config)
176 parameters = json.loads(obj[0])
178 number_of_features = parameters.pop(
'number_of_features')
180 x = tf.placeholder(tf.float32, [
None, number_of_features])
181 y = tf.placeholder(tf.float32, [
None, 1])
187 feature_number = number_of_features
190 model = get_tensorflow_model(number_of_features, parameters)
191 model.initialize(DataStub(), [x, y])
192 saver = tf.train.Saver()
195 with tempfile.TemporaryDirectory()
as path:
196 with open(os.path.join(path, obj[1] +
'.data-00000-of-00001'),
'w+b')
as file1, open(
197 os.path.join(path, obj[1] +
'.index'),
'w+b')
as file2:
198 file1.write(bytes(obj[2]))
199 file2.write(bytes(obj[3]))
200 tf.train.update_checkpoint_state(path, obj[1])
201 saver.restore(session, os.path.join(path, obj[1]))
204 state = State(x, y, session=session)
205 state.activation = model.mlp.output
208 state.binning_parameters = obj[4]
211 state.transform_to_probability = obj[5]
212 state.sig_back_tuple = obj[6]
215 print(
'Deep FlavorTagger loading... Training seed: ', seed)
220 def begin_fit(state, Xtest, Stest, ytest, wtest):
222 use test sets for monitoring
226 state.Xvalid = Xtest[:len(Xtest) // 2]
227 state.yvalid = ytest[:len(ytest) // 2]
229 state.Xtest = Xtest[len(Xtest) // 2:]
230 state.ytest = ytest[len(ytest) // 2:]
235 def partial_fit(state, X, S, y, w, epoch):
237 returns fractions of training and testing dataset, also uses weights
238 :param X: unprocessed training dataset
239 :param Xtest: unprocessed validation dataset
240 :return: bool, True == continue, False == stop iterations
248 state.binning_parameters = binning.get_ndarray_binning_parameters(X)
250 binning.transform_ndarray(X, state.binning_parameters)
251 binning.transform_ndarray(state.Xvalid, state.binning_parameters)
253 if np.all(np.isnan(X)):
254 raise ValueError(
'NaN values in Dataset. Preprocessing transformations failed.')
257 data_set = TfDataBasf2(X, y, state.Xvalid, state.yvalid, state.batch_size, seed=state.seed)
259 state.training.data_set = data_set
262 state.training.train_model()
269 save the trained model
273 filename = state.training.save_name
274 with open(filename + str(
'.data-00000-of-00001'),
'rb')
as file1, open(filename + str(
'.index'),
'rb')
as file2:
277 binning_parameters = state.binning_parameters
280 transform_to_probability = state.transform_to_probability
281 state.transform_to_probability =
False
284 state.get_from_collection()
285 y_hat = apply(state, state.Xtest)
286 test_df = pandas.DataFrame.from_dict({
'y': state.ytest.reshape(-1),
'y_hat': y_hat.reshape(-1)})
287 (sig_pdf, back_pdf) = binning.get_signal_background_pdf(test_df)
289 parameters = state.parameters
291 return [parameters, os.path.basename(filename), data1, data2, binning_parameters, transform_to_probability,
292 (sig_pdf, back_pdf), seed]