18 import tensorflow
as tf
23 definition of a layer obj
26 def __init__(self, name, tf_activation_str, dim_input, dim_output, p_bias, p_w,
30 :param tf_activation: string, name of an available tensorflow activations function
33 :param p_bias: initial constant
34 :param p_w: stddev of uniform distribution to initialize
40 tf_activation_dict = {
42 'sigmoid': tf.sigmoid,
44 'leaky_relu': tf.nn.leaky_relu,
47 if tf_activation_str
not in tf_activation_dict:
57 self.
shapeshape = [dim_input, dim_output]
82 name = self.
namename +
'_b'
83 initial = tf.constant(init_val, shape=[width], name=name)
84 return tf.Variable(initial, name=name)
88 define weight variables
91 name = self.
namename +
'_w'
92 initial = tf.truncated_normal(shape, stddev=stddev, seed=operation_seed, name=name)
93 return tf.Variable(initial, name=name)
97 initialization, requires layer input
100 raise RuntimeError(
'Layer %s is already initialized.' % self.
namename)
102 self.
inputinput = layer_input
103 name = self.
namename +
'_output'
104 with tf.name_scope(
'output'):
106 tf.add_to_collection(
'output', self.
outputoutput)
113 add variables of this layer to tensorboard
114 :param var: tensorflow variable
117 with tf.name_scope(
'summaries'):
118 mean = tf.reduce_mean(var)
119 tf.summary.scalar(
'mean', mean)
121 with tf.name_scope(
'stddev'):
122 stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
124 tf.summary.scalar(
'stddev', stddev)
125 tf.summary.scalar(
'max', tf.reduce_max(var))
126 tf.summary.scalar(
'min', tf.reduce_min(var))
128 tf.summary.histogram(
'histogram', var)
132 adds tuneable parameters to summary
134 with tf.name_scope(self.
namename):
135 with tf.name_scope(
'weights'):
137 with tf.name_scope(
'biases'):
143 class combine layer obj
172 define layers from list
176 layer_obj.append(
Layer(*layer))
183 collect tunable parameters
187 for layer
in self.
layerslayers:
188 self.
ww.append(layer.w)
189 self.
bb.append(layer.b)
195 for layer
in self.
layerslayers:
196 layer.initialize(layer_input)
197 layer_input = layer.output
208 for _idx
in range(len(self.
layerslayers) - 1):
209 assert self.
layerslayers[_idx].shape[1] == self.
layerslayers[_idx + 1].shape[0]
220 define the default model
223 def __init__(self, mlp, mom_init=.9, mom_max=.99, mom_epochs=200, lr_init=.05, lr_min=1e-6,
225 stop_epochs=10, min_epochs=200, max_epochs=1000, wd_coeffs=None, change_optimizer=None,
226 staircase=True, smooth_cross_entropy=False):
228 initialization function
231 if wd_coeffs
is not None:
232 assert len(wd_coeffs) == len(mlp.layers)
243 with tf.name_scope(
'global_step'):
276 if change_optimizer
is not None:
326 t_lr_min = tf.Variable(self.
c_lr_minc_lr_min, trainable=
False)
328 with tf.name_scope(
'learning_rate'):
329 t_limited_lr = tf.maximum(t_learning_rate, t_lr_min)
330 tf.summary.scalar(
'learning rate', t_limited_lr)
335 t_mom_dec_rate = tf.Variable(c_mom_dec_rate, trainable=
False, dtype=tf.float32)
337 t_momentum = tf.Variable(self.
c_mom_initc_mom_init, trainable=
False, dtype=tf.float32)
339 t_mom_max = tf.Variable(self.
c_mom_maxc_mom_max, trainable=
False, dtype=tf.float32)
341 t_batches_per_epoch = tf.Variable(self.
batches_per_epochbatches_per_epoch, trainable=
False, dtype=tf.float32)
343 with tf.name_scope(
'momentum'):
345 global_step = tf.cast(self.
global_stepglobal_step, tf.float32)
347 t_limited_mom = tf.minimum(t_momentum + t_mom_dec_rate *
348 tf.floor(global_step / t_batches_per_epoch), t_mom_max)
350 t_limited_mom = tf.minimum(t_momentum + t_mom_dec_rate *
351 (global_step / t_batches_per_epoch), t_mom_max)
352 tf.summary.scalar(
'momentum', t_limited_mom)
354 self.
mon_dictmon_dict[
'learning_rate'] = t_limited_lr
355 self.
mon_dictmon_dict[
'momentum'] = t_limited_mom
357 with tf.name_scope(
'optimizer'):
358 self.
optimizersoptimizers.append(tf.train.MomentumOptimizer(t_limited_lr, t_limited_mom))
359 self.
optimizersoptimizers.append(tf.train.GradientDescentOptimizer(t_limited_lr))
366 _y = self.
mlpmlp.output
370 t_epsilon = tf.constant(epsilon)
373 with tf.name_scope(
'cross_entropy'):
375 cross_entropy = -tf.reduce_mean(tf.reduce_sum((_y_ * tf.log(_y + t_epsilon) + (1 - _y_) *
376 tf.log(1 - _y + t_epsilon)), 1))
378 cross_entropy = -tf.reduce_mean(tf.reduce_sum((_y_ * tf.log(tf.clip_by_value(_y, epsilon, 1))) +
379 ((1 - _y_) * tf.log(tf.clip_by_value((1 - _y),
381 tf.summary.scalar(
'cross entropy', cross_entropy)
387 weights = self.
mlpmlp.w
389 wd = [tf.constant(coeff) * tf.nn.l2_loss(w)
for coeff, w
in zip(wd_coeffs, weights)]
392 with tf.name_scope(
'loss_function'):
395 tf.summary.scalar(
'loss', loss)
399 sets monitoring params
402 _y = self.
mlpmlp.output
406 t_epsilon = tf.constant(epsilon)
408 with tf.name_scope(
'mean_cross_entropy'):
410 mean_cross_entropy = -tf.reduce_mean(tf.reduce_sum((_y_ * tf.log(_y + t_epsilon) + (1 - _y_) *
411 tf.log(1 - _y + t_epsilon)), 1))
413 mean_cross_entropy = -tf.reduce_mean(tf.reduce_sum((_y_ * tf.log(tf.clip_by_value(_y, epsilon, 1))) +
414 ((1 - _y_) * tf.log(tf.clip_by_value((1 - _y),
416 tf.summary.scalar(
'mean cross entropy', mean_cross_entropy)
425 :param monitoring_params:
434 mon_param = monitoring_params[loss_idx]
439 if mon_param < self.
best_valuebest_value * (1. - prop_dec):
477 if input_placeholders
is None:
478 with tf.name_scope(
'input'):
479 self.
xx = tf.placeholder(tf.float32, shape=[
None, data_set.feature_number], name=
'x-input')
480 self.
y_y_ = tf.placeholder(tf.float32, shape=[
None, 1], name=
"y-input")
482 self.
weightsweights = tf.placeholder(tf.float32, shape=[
None, 1], name=
"weight-input")
484 self.
xx = input_placeholders[0]
485 self.
y_y_ = input_placeholders[1]
500 handling the training of the network model
503 def __init__(self, model, data_set, sess, log_dir=None, save_name=None, monitoring_size=100000,
504 input_placeholders=None):
506 class to train a predefined model
507 :param model: DefaultModel obj
508 :param data_set: TFData obj
509 :param sess: tensorflow.Session obj
510 :param log_dir: str, directory name of tensorboard logging
511 :param save_name: str, path and name for saving the weightfiles
512 :param monitoring_size: int, number of events of training fraction used for monitoring
513 :param input_placeholders: list of tf.placeholders, [features, targets]
531 if input_placeholders
is None:
532 with tf.name_scope(
'input'):
534 self.
xx = tf.placeholder(tf.float32, shape=[
None, data_set.feature_number], name=
'x-input')
537 self.
y_y_ = tf.placeholder(tf.float32, shape=[
None, 1], name=
"y-input")
540 self.
xx = input_placeholders[0]
542 self.
y_y_ = input_placeholders[1]
544 self.
modelmodel.initialize(data_set, input_placeholders=[self.
xx, self.
y_y_])
562 for label
in self.
modelmodel.monitoring_labels:
566 for label
in self.
modelmodel.mon_dict.keys():
571 if log_dir
is not None:
577 init_op = tf.global_variables_initializer()
579 self.
sesssess.run(init_op)
581 if save_name
is None:
582 time_str = time.strftime(
"%Y%m%d-%H%M%S")
584 self.
save_namesave_name = os.path.join(os.getcwd(),
'_'.join([self.
data_setdata_set.selection, time_str,
595 checking dataset sizes for evaluation
611 log_dir_train = os.path.join(log_dir,
'train')
612 log_dir_test = os.path.join(log_dir,
'test')
617 self.
test_writertest_writer = tf.summary.FileWriter(log_dir_test, self.
sesssess.graph)
624 add to basf2 collection
626 tf.add_to_collection(
'x', self.
xx)
627 tf.add_to_collection(
'y', self.
y_y_)
628 tf.add_to_collection(
'activation', self.
modelmodel.mlp.output)
629 tf.add_to_collection(
'cost', self.
modelmodel.loss)
630 tf.add_to_collection(
'optimizer', self.
minimizerminimizer)
634 save model only if a global minimum is reached on validation set
638 self.
saversaver.save(self.
sesssess, self.
save_namesave_name.replace(
'.ckpt',
'_current.ckpt'))
641 if self.
modelmodel.best_value == numpy.inf:
644 loss_idx = self.
modelmodel.monitoring_labels.index(label_name)
645 mon_param = monitoring_params[loss_idx]
647 if mon_param < self.
modelmodel.best_value:
655 if self.
log_dirlog_dir
is not None:
658 with open(os.path.join(self.
log_dirlog_dir,
'training_params.pkl'),
'wb')
as f:
672 self.
minimizerminimizer = self.
modelmodel.get_minimizer(current_epoch)
674 batch_iter = self.
data_setdata_set.batch_iterator()
676 for i
in range(self.
data_setdata_set.batches):
677 batch = next(batch_iter)
678 feed = {self.
xx: batch[0], self.
y_y_: batch[1]}
680 if i % int(.2 * self.
data_setdata_set.batches) == 0:
681 print(
'Epoch status: %1.2f' % (i / self.
data_setdata_set.batches))
690 if self.
log_dirlog_dir
is not None:
693 summary = self.
sesssess.run(self.
merged_summarymerged_summary, feed_dict=train_mon_dict)
694 self.
train_writertrain_writer.add_summary(summary, current_epoch)
698 feed_dict=valid_dict)
699 epoch_parameters_train = self.
sesssess.run(
706 feed_dict=valid_dict)
707 epoch_parameters_train = self.
sesssess.run(self.
monitoring_paramsmonitoring_params, feed_dict=train_mon_dict)
709 model_mon_params = []
710 for key, entry
in self.
modelmodel.mon_dict.items():
711 model_mon_params.append((key, self.
sesssess.run(entry)))
716 print(f
"valid: {label}: {param:1.5f}")
717 for label, param
in zip(self.
modelmodel.monitoring_labels, epoch_parameters_train):
718 print(f
"train: {label}: {param:1.5f}")
721 for val
in model_mon_params:
722 print(
"%s: %f" % val)
725 print(
'Epoch training time: %.1f' % (time.time() - self.
_time_time))
726 self.
_time_time = time.time()
734 for epoch
in range(self.
max_epochsmax_epochs):
optimizer_change_epochs
used opimizers
is_initialized
check if initialized
optimizers
define multiple optimizers
c_lr_min
minimal learning rate
best_value
the best value will be set a default start value, then updated with the termination criterion
c_lr_init
initial learning rate
def _set_monitoring_params(self)
monitoring_params
params for monitoring
max_epochs
other variables
monitoring_labels
params for labeling
c_mom_init
initial momentum
step_countdown
step countdown
def _default_termination_criterion(self, monitoring_params, epoch, label_name='mean_cross_entropy', prop_dec=1e-5)
def __init__(self, mlp, mom_init=.9, mom_max=.99, mom_epochs=200, lr_init=.05, lr_min=1e-6, lr_dec_rate=.976, stop_epochs=10, min_epochs=200, max_epochs=1000, wd_coeffs=None, change_optimizer=None, staircase=True, smooth_cross_entropy=False)
wd_coeffs
weight decay coefficients
c_lr_dec_rate
learning rate decay factor
c_stop_epochs
number of epochs until stopping
c_mom_max
maximum momentum
batches_per_epoch
batches per epoch unknown.
def initialize(self, data_set, input_placeholders=None)
x
variable space must be known
mon_dict
further monitoring variables
loss
cross entropy with weight decay
c_mom_epochs
momentum epochs
termination_criterion
termination criterion
smooth_cross_entropy
True for a small epsilon addition, false for a clipped network output.
def get_minimizer(self, epoch=0)
recent_params
recent params
is_initialized
check if initialized (input and output are connected)
b
init parameters for bias
def _add_all_to_summary(self)
def _init_bias(self, width, init_val, name=None)
def __init__(self, name, tf_activation_str, dim_input, dim_output, p_bias, p_w, operation_seed=None)
def _init_weight(self, shape, stddev, operation_seed, name=None)
tf_activation
activation function
def initialize(self, layer_input)
w
init parameters for uniform distribution
def _add_var_to_summary(self, var)
is_initialized
set all mlp parameters
def __init__(self, layers)
def from_list(cls, layers)
def initialize(self, layer_input)
def _set_weights_and_biases(self)
def _connect_layers(self, layer_input)
def _prepare_monitoring(self)
def _save_best_state(self, monitoring_params, label_name='mean_cross_entropy')
train_log_dict
train_log_dict
def _prepare_tensorboard(self, log_dir)
y_
input placeholder targets
monitoring_params
monitoring params for early stopping criterion, loss function, etc
max_epochs
global_training_parameters
valid_monitor
valid monitor
x
input placeholder features
current_epoch
current epoch
monitoring_size
monitoring size
def _train_epoch(self, current_epoch)
def __init__(self, model, data_set, sess, log_dir=None, save_name=None, monitoring_size=100000, input_placeholders=None)
epoch_parameters
epoch parameters
termination_criterion
termination criterion
train_monitor
train_monitor
def _add_to_basf2_collections(self)