17import tensorflow
as tf
18from tqdm
import trange
23 definition of a layer obj
26 def __init__(self, name, tf_activation_str, dim_input, dim_output, p_bias, p_w,
29 :param name: name of the layer.
30 :param tf_activation: string, name of an available tensorflow activations function
31 :param dim_input: dimension of the input
32 :param dim_output: dimension of the output
33 :param p_bias: initial bias
34 :param p_w: stddev of uniform distribution to initialize
35 :param random_seed: random seed used in initialising the weights
41 tf_activation_dict = {
43 'sigmoid': tf.nn.sigmoid,
45 'leaky_relu': tf.nn.leaky_relu,
48 if tf_activation_str
not in tf_activation_dict:
55 self.
shape = [dim_input, dim_output]
74 name = self.name +
'_b'
75 initial = tf.constant(init_val, shape=[width], name=name)
76 return tf.Variable(initial, name=name, trainable=
True)
80 define weight variables
83 name = self.name +
'_w'
84 initial = tf.random.truncated_normal(shape, stddev=stddev, seed=operation_seed, name=name)
85 return tf.Variable(initial, name=name, trainable=
True)
96 Passes information about each variable to the summary writer.
98 with writer.as_default():
99 mean = tf.reduce_mean(var)
100 stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
101 tf.summary.scalar(f
'{var.name}_mean', mean, step=step)
102 tf.summary.scalar(f
'{var.name}_stddev', stddev, step=step)
103 tf.summary.scalar(f
'{var.name}_max', tf.reduce_max(var), step=step)
104 tf.summary.scalar(f
'{var.name}_min', tf.reduce_min(var), step=step)
105 tf.summary.histogram(f
'{var.name}_histogram', var, step=step)
111 Passes all layer variables to the tf.summary writer.
120 multilayer perceptron class.
145 define layers from list
149 layer_obj.append(
Layer(*layer))
156 collect tunable parameters
161 self.
w.append(layer.w)
162 self.
b.append(layer.b)
166 initialize. Checks that the layer dimensions align.
172 for _idx
in range(len(self.
layers) - 1):
173 assert self.
layers[_idx].shape[1] == self.
layers[_idx + 1].shape[0]
183 Run the events through all the layers
191 passes all the MLP variables to the tf.summary writer
194 layer.all_to_summary(step, writer)
200 define the default model
214 change_optimizer=None,
216 smooth_cross_entropy=False):
218 initialization function
219 :param mlp: network model.
220 :param mom_init: initial momentum
221 :param mom_max: maximum momentum
222 :param mom_epochs: momentum epochs
223 :param lr_init: initial learning rate
224 :param lr_min: minimum learning rate
225 :param lr_dec_rate: learning rate decay factor
226 :param stop_epochs: number of epochs without improvement required for early termination
227 :param min_epochs: minimum number of epochs
for training
228 :param max_epochs: maximum number of epochs
for training
229 :param wd_coeffs: weight decay coefficients. If
not None must have one per mlp layer.
230 :param change_optimizer:
232 "param smooth_cross_entropy:
238 if wd_coeffs
is not None:
239 assert len(wd_coeffs) == len(mlp.layers)
245 self.
global_step = tf.Variable(0, trainable=
False, name=
'global_step', dtype=tf.int64)
264 self.
c_lr_min = tf.constant(lr_min, dtype=tf.float32)
283 if change_optimizer
is not None:
318 Finalises initialization based of data_set specific information (number of batches per epoch)
326 if not self.
mlp.is_initialized:
347 early stopping criterion
349 :param monitoring_param: the parameter to monitor for early termination
350 :param epoch: the current epoch
357 if monitoring_param < self.
best_value * (1. - prop_dec):
369 Returns the learning rate at the current global step.
378 returns the momentum at the current global step.
381 global_step = tf.cast(self.global_step, tf.float32)
385 tf.floor(global_step / t_batches_per_epoch), self.
c_mom_max)
388 (global_step / t_batches_per_epoch), self.
c_mom_max)
402 get the optimizer. If multiple optimizers are booked gets the one appropriate for the epoch.
404 :param epoch: current epoch.
418 def loss(self, predict_y, true_y):
422 :param predict_y: predicted labels
423 :param true_y: true labels
427 t_epsilon = tf.constant(epsilon)
430 with tf.name_scope(
'cross_entropy'):
432 cross_entropy = -tf.reduce_mean(tf.reduce_sum((true_y * tf.math.log(predict_y + t_epsilon) + (1 - true_y) *
433 tf.math.log(1 - predict_y + t_epsilon)), 1))
435 cross_entropy = -tf.reduce_mean(tf.reduce_sum((true_y * tf.math.log(tf.clip_by_value(predict_y, epsilon, 1))) +
436 ((1 - true_y) * tf.math.log(tf.clip_by_value((1 - predict_y),
444 wd = [tf.constant(coeff) * tf.nn.l2_loss(w)
for coeff, w
in zip(wd_coeffs, weights)]
447 return loss, cross_entropy
452 handling the training of the network model
460 monitoring_size=10000):
462 class to train a predefined
model
463 :param model: DefaultModel obj
464 :param data_set: TFData obj
465 :param log_dir: str, directory name of tensorboard logging
466 :param save_name: str, path
and name
for saving the weightfiles
467 :param monitoring_size: int, number of events of training fraction used
for monitoring
478 self.model.initialize(data_set)
495 if log_dir
is not None:
498 if save_name
is None:
499 time_str = time.strftime(
"%Y%m%d-%H%M%S")
500 save_name = os.path.join(os.getcwd(),
'_'.join([time_str,
'model']))
510 checking dataset sizes for evaluation. These samples are used after each epoch to collect
511 summary statistics
and test early stopping criteria.
528 log_dir_train = os.path.join(log_dir, 'train')
529 log_dir_valid = os.path.join(log_dir,
'valid')
545 batch_iter = self.data_set.batch_iterator()
547 t_range = trange(self.data_set.batches)
548 t_range.set_description(f'Epoch {current_epoch:4d}')
551 batch = next(batch_iter)
556 with tf.GradientTape()
as tape:
557 loss, _ = self.
model.loss(self.
model(batch_x), batch_y)
558 grads = tape.gradient(loss, self.
model.trainable_variables)
560 self.
optimizer.apply_gradients(zip(grads, self.
model.trainable_variables))
565 tf.summary.scalar(
'learning_rate', self.
model._get_learning_rate(), step=self.
model.global_step)
566 tf.summary.scalar(
'momentum', self.
model._get_momentum(), step=self.
model.global_step)
569 self.
model.global_step.assign_add(1)
578 train_loss, train_cross_entropy = self.
model.loss(self.
model(train_x), train_y)
579 valid_loss, valid_cross_entropy = self.
model.loss(self.
model(valid_x), valid_y)
584 tf.summary.scalar(
'loss', train_loss, step=current_epoch)
585 tf.summary.scalar(
'cross_entropy', train_cross_entropy, step=current_epoch)
588 tf.summary.scalar(
'epoch_learning_rate', self.
model._get_learning_rate(), step=current_epoch)
589 tf.summary.scalar(
'epoch_momentum', self.
model._get_momentum(), step=current_epoch)
596 tf.summary.scalar(
'loss', valid_loss, step=current_epoch)
597 tf.summary.scalar(
'cross_entropy', valid_cross_entropy, step=current_epoch)
598 tf.summary.scalar(
'best_epoch', self.
best_epoch, step=current_epoch)
602 self.
_time = time.time()
605 return valid_cross_entropy
609 save model as a checkpoint only
if a
global minimum
is reached on validation sample
613 checkpoint = tf.train.Checkpoint(self.
model)
614 checkpoint.save(self.
save_name.replace(
'model',
'model_current'))
617 if self.
model.best_value == np.inf:
620 if cross_entropy < self.
model.best_value:
638 for epoch
in range(self.
model.max_epochs):
optimizer_change_epochs
used optimizers
is_initialized
check if initialized
optimizers
define multiple optimizers
def _default_termination_criterion(self, monitoring_param, epoch, prop_dec=1e-5)
c_lr_min
minimum learning rate
best_value
the best value will be set a default start value, then updated with the termination criterion
c_lr_init
initial learning rate
c_mom_init
initial momentum
step_countdown
step countdown
def __init__(self, mlp, mom_init=.9, mom_max=.99, mom_epochs=200, lr_init=.05, lr_min=1e-6, lr_dec_rate=.976, stop_epochs=10, min_epochs=200, max_epochs=1000, wd_coeffs=None, change_optimizer=None, staircase=True, smooth_cross_entropy=False)
wd_coeffs
weight decay coefficients
c_lr_dec_rate
learning rate decay rate
c_stop_epochs
number of epochs without improvement for early termination
c_mom_max
maximum momentum
batches_per_epoch
batches per epoch unknown.
c_mom_dec_rate
momentum decay rate
def _get_learning_rate(self)
def get_optimizer(self, epoch=0)
c_mom_epochs
momentum epochs
def initialize(self, data_set)
def loss(self, predict_y, true_y)
termination_criterion
termination criterion
smooth_cross_entropy
True for a small epsilon addition, false for a clipped network output.
recent_params
recent params
b
init parameters for bias
def _init_bias(self, width, init_val, name=None)
def _init_weight(self, shape, stddev, operation_seed, name=None)
tf_activation
layer parameters
w
init parameters for uniform distribution
def variable_to_summary(self, var, step, writer)
def __init__(self, name, tf_activation_str, dim_input, dim_output, p_bias, p_w, random_seed=None)
def all_to_summary(self, step, writer)
is_initialized
collect all mlp parameters
def variables_to_writer(self, step, writer)
def from_list(cls, layers)
def __init__(self, layers, name='mlp')
def _collect_weights_and_biases(self)
def _prepare_monitoring(self)
def _prepare_tensorboard(self, log_dir)
def _save_best_state(self, cross_entropy)
def __init__(self, model, data_set, log_dir=None, save_name=None, monitoring_size=10000)
optimizer
set optimizer for this epoch
best_epoch
initialise best epoch
valid_monitor
test monitor
valid_writer
tf.summary.writer for validation
train_writer
tf.summary.writer for training
current_epoch
initialise current epoch
monitoring_size
monitoring size
def _train_epoch(self, current_epoch)
save_name
set the path and name for saving the weightfiles
termination_criterion
termination criterion
train_monitor
train_monitor