15 import tensorflow 
as tf
 
   20 from dft 
import binning
 
   22 from dft 
import tensorflow_dnn_model 
as tfm
 
   23 from dft.TfData import TfDataBasf2, TfDataBasf2Stub
 
   26 def get_tensorflow_model(number_of_features, parameters):
 
   28     generates the tensorflow model 
   29     :param int number_of_features: number of features is handled separately 
   30     :param dictionary parameters: additional parameters passed to tensorflow_dnn_model.DefaultModel 
   34     layers = parameters.get(
'layers', 
None)
 
   35     wd_coeffs = parameters.get(
'wd_coeffs', [])
 
   37     lr_dec_rate = parameters.get(
'lr_dec_rate', 1 / (1 + 2e-7)**1.2e5)
 
   38     lr_init = parameters.get(
'lr_init', .05)
 
   39     mom_init = parameters.get(
'mom_init', .9)
 
   40     min_epochs = parameters.get(
'min_epochs', 300)
 
   41     max_epochs = parameters.get(
'max_epochs', 400)
 
   42     stop_epochs = parameters.get(
'stop_epochs', 10)
 
   45         layers = [[
'h0', 
'tanh', number_of_features, 300, .0001, 1.0 / np.sqrt(300)],
 
   46                   [
'h1', 
'tanh', 300, 300, .0001, 1.0 / np.sqrt(300)],
 
   47                   [
'h2', 
'tanh', 300, 300, .0001, 1.0 / np.sqrt(300)],
 
   48                   [
'h3', 
'tanh', 300, 300, .0001, 1.0 / np.sqrt(300)],
 
   49                   [
'h4', 
'tanh', 300, 300, .0001, 1.0 / np.sqrt(300)],
 
   50                   [
'h5', 
'tanh', 300, 300, .0001, 1.0 / np.sqrt(300)],
 
   51                   [
'h6', 
'tanh', 300, 300, .0001, 1.0 / np.sqrt(300)],
 
   52                   [
'h7', 
'tanh', 300, 300, .0001, 1.0 / np.sqrt(300)],
 
   53                   [
'y', 
'sigmoid', 300, 1, .0001, 0.002 * 1.0 / np.sqrt(300)]]
 
   55         layers[0][2] = number_of_features
 
   58     if wd_coeffs 
is not None and not wd_coeffs:
 
   59         wd_coeffs = [2e-5 
for _ 
in layers]
 
   61     mlp = tfm.MultilayerPerceptron.from_list(layers)
 
   62     model = tfm.DefaultModel(mlp, lr_dec_rate=lr_dec_rate, lr_init=lr_init, mom_init=mom_init, wd_coeffs=wd_coeffs,
 
   63                              min_epochs=min_epochs, max_epochs=max_epochs, stop_epochs=stop_epochs)
 
   67 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
 
   69     specifies the and configures the tensorflow model 
   70     :param number_of_features: 
   71     :param number_of_spectators: 
   72     :param number_of_events: 
   73     :param training_fraction: 
   74     :param parameters: as dictionary encoded json object 
   79     if parameters 
is None:
 
   82         if not isinstance(parameters, dict):
 
   83             raise TypeError(
'parameters must be a dictionary')
 
   85     cuda_mask = parameters.get(
'cuda_visible_devices', 
'3')
 
   86     tensorboard_dir = parameters.get(
'tensorboard_dir', 
None)
 
   88     batch_size = parameters.get(
'batch_size', 100)
 
   89     seed = parameters.get(
'seed', 
None)
 
   92     transform_to_probability = parameters.get(
'transform_to_probability', 
False)
 
   97         tf.set_random_seed(seed)
 
  100     os.environ[
'CUDA_VISIBLE_DEVICES'] = cuda_mask
 
  101     gpus = tf.config.list_physical_devices(
'GPU')
 
  104             tf.config.experimental.set_memory_growth(gpu, 
True)
 
  107     stub_data_set = TfDataBasf2Stub(batch_size, number_of_features, number_of_events, training_fraction)
 
  110     save_dir = tempfile.TemporaryDirectory()
 
  111     save_name = os.path.join(save_dir.name, 
'mymodel')
 
  113     model = get_tensorflow_model(number_of_features, parameters)
 
  114     training = tfm.Trainer(model, stub_data_set, tensorboard_dir, save_name)
 
  119     state.training = training
 
  120     state.batch_size = batch_size
 
  121     state.save_dir = save_dir
 
  123     state.transform_to_probability = transform_to_probability
 
  126     saved_parameters = parameters.copy()
 
  127     saved_parameters[
'number_of_features'] = number_of_features
 
  128     state.parameters = json.dumps(saved_parameters)
 
  135     modified apply function 
  138     binning.transform_ndarray(X, state.binning_parameters)
 
  140     if len(X) > chunk_size:
 
  142         for i 
in range(0, len(X), chunk_size):
 
  143             results.append(state.model(X).numpy().flatten())
 
  144         r = np.concatenate(results).flatten()
 
  146         r = state.model(X).numpy().flatten()
 
  147     if state.transform_to_probability:
 
  148         binning.transform_array_to_sf(r, state.sig_back_tuple, signal_fraction=.5)
 
  150     return np.require(r, dtype=np.float32, requirements=[
'A', 
'W', 
'C', 
'O'])
 
  155     Load Tensorflow estimator into state 
  158     gpus = tf.config.list_physical_devices(
'GPU')
 
  161             tf.config.experimental.set_memory_growth(gpu, 
True)
 
  163     parameters = json.loads(obj[0])
 
  165     number_of_features = parameters.pop(
'number_of_features')
 
  171         feature_number = number_of_features
 
  174     model = get_tensorflow_model(number_of_features, parameters)
 
  175     model.initialize(DataStub())
 
  178     with tempfile.TemporaryDirectory() 
as path:
 
  179         with open(os.path.join(path, obj[1] + 
'.data-00000-of-00001'), 
'w+b') 
as file1, open(
 
  180                 os.path.join(path, obj[1] + 
'.index'), 
'w+b') 
as file2:
 
  181             file1.write(bytes(obj[2]))
 
  182             file2.write(bytes(obj[3]))
 
  184         checkpoint = tf.train.Checkpoint(model)
 
  185         checkpoint.restore(os.path.join(path, obj[1]))
 
  189     state.binning_parameters = obj[4]
 
  192     state.transform_to_probability = obj[5]
 
  193     state.sig_back_tuple = obj[6]
 
  196     print(
'Deep FlavorTagger loading... Training seed: ', seed)
 
  201 def begin_fit(state, Xtest, Stest, ytest, wtest):
 
  203     use test sets for monitoring 
  206     state.Xvalid = Xtest[:len(Xtest) // 2]
 
  207     state.yvalid = ytest[:len(ytest) // 2]
 
  209     state.Xtest = Xtest[len(Xtest) // 2:]
 
  210     state.ytest = ytest[len(ytest) // 2:]
 
  215 def partial_fit(state, X, S, y, w, epoch):
 
  217     returns fractions of training and testing dataset, also uses weights 
  218     :param X: unprocessed training dataset 
  219     :param Xtest: unprocessed validation dataset 
  220     :return: bool, True == continue, False == stop iterations 
  228     state.binning_parameters = binning.get_ndarray_binning_parameters(X)
 
  230     binning.transform_ndarray(X, state.binning_parameters)
 
  231     binning.transform_ndarray(state.Xvalid, state.binning_parameters)
 
  233     if np.any(np.isnan(X)):
 
  234         raise ValueError(
'NaN values in Dataset. Preprocessing transformations failed.')
 
  237     data_set = TfDataBasf2(X, y, state.Xvalid, state.yvalid, state.batch_size, seed=state.seed)
 
  239     state.training.data_set = data_set
 
  242     state.training.train_model()
 
  249     save the trained model 
  253     filename = state.training.save_name
 
  255     with open(filename + 
'-2.data-00000-of-00001', 
'rb') 
as file1, open(filename + 
'-2.index', 
'rb') 
as file2:
 
  258     binning_parameters = state.binning_parameters
 
  261     transform_to_probability = state.transform_to_probability
 
  262     state.transform_to_probability = 
False 
  265     y_hat = state.model(state.Xtest).numpy().flatten()
 
  266     test_df = pandas.DataFrame.from_dict({
'y': state.ytest.reshape(-1), 
'y_hat': y_hat.reshape(-1)})
 
  267     (sig_pdf, back_pdf) = binning.get_signal_background_pdf(test_df)
 
  269     parameters = state.parameters
 
  271     return [parameters, os.path.basename(filename), data1, data2, binning_parameters, transform_to_probability,
 
  272             (sig_pdf, back_pdf), seed]