14 from __future__
import division, print_function, generators
22 import tensorflow
as tf
27 definition of a layer obj
30 def __init__(self, name, tf_activation_str, dim_input, dim_output, p_bias, p_w,
34 :param tf_activation: string, name of an available tensorflow activations function
37 :param p_bias: initial constant
38 :param p_w: stddev of uniform distribution to initialize
44 tf_activation_dict = {
46 'sigmoid': tf.sigmoid,
48 'leaky_relu': tf.nn.leaky_relu,
51 if tf_activation_str
not in tf_activation_dict:
61 self.
shape = [dim_input, dim_output]
86 name = self.
name +
'_b'
87 initial = tf.constant(init_val, shape=[width], name=name)
88 return tf.Variable(initial, name=name)
92 define weight variables
95 name = self.
name +
'_w'
96 initial = tf.truncated_normal(shape, stddev=stddev, seed=operation_seed, name=name)
97 return tf.Variable(initial, name=name)
101 initialization, requires layer input
104 raise RuntimeError(
'Layer %s is already initialized.' % self.
name)
106 self.
input = layer_input
107 name = self.
name +
'_output'
108 with tf.name_scope(
'output'):
110 tf.add_to_collection(
'output', self.
output)
117 add variables of this layer to tensorboard
118 :param var: tensorflow variable
121 with tf.name_scope(
'summaries'):
122 mean = tf.reduce_mean(var)
123 tf.summary.scalar(
'mean', mean)
125 with tf.name_scope(
'stddev'):
126 stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
128 tf.summary.scalar(
'stddev', stddev)
129 tf.summary.scalar(
'max', tf.reduce_max(var))
130 tf.summary.scalar(
'min', tf.reduce_min(var))
132 tf.summary.histogram(
'histogram', var)
136 adds tuneable parameters to summary
138 with tf.name_scope(self.
name):
139 with tf.name_scope(
'weights'):
141 with tf.name_scope(
'biases'):
147 class combine layer obj
176 define layers from list
180 layer_obj.append(
Layer(*layer))
187 collect tunable parameters
192 self.
w.append(layer.w)
193 self.
b.append(layer.b)
200 layer.initialize(layer_input)
201 layer_input = layer.output
212 for _idx
in range(len(self.
layers) - 1):
213 assert self.
layers[_idx].shape[1] == self.
layers[_idx + 1].shape[0]
224 define the default model
227 def __init__(self, mlp, mom_init=.9, mom_max=.99, mom_epochs=200, lr_init=.05, lr_min=1e-6,
229 stop_epochs=10, min_epochs=200, max_epochs=1000, wd_coeffs=None, change_optimizer=None,
230 staircase=True, smooth_cross_entropy=False):
232 initialization function
235 if wd_coeffs
is not None:
236 assert len(wd_coeffs) == len(mlp.layers)
247 with tf.name_scope(
'global_step'):
280 if change_optimizer
is not None:
330 t_lr_min = tf.Variable(self.
c_lr_min, trainable=
False)
332 with tf.name_scope(
'learning_rate'):
333 t_limited_lr = tf.maximum(t_learning_rate, t_lr_min)
334 tf.summary.scalar(
'learning rate', t_limited_lr)
339 t_mom_dec_rate = tf.Variable(c_mom_dec_rate, trainable=
False, dtype=tf.float32)
341 t_momentum = tf.Variable(self.
c_mom_init, trainable=
False, dtype=tf.float32)
343 t_mom_max = tf.Variable(self.
c_mom_max, trainable=
False, dtype=tf.float32)
345 t_batches_per_epoch = tf.Variable(self.
batches_per_epoch, trainable=
False, dtype=tf.float32)
347 with tf.name_scope(
'momentum'):
349 global_step = tf.cast(self.
global_step, tf.float32)
351 t_limited_mom = tf.minimum(t_momentum + t_mom_dec_rate *
352 tf.floor(global_step / t_batches_per_epoch), t_mom_max)
354 t_limited_mom = tf.minimum(t_momentum + t_mom_dec_rate *
355 (global_step / t_batches_per_epoch), t_mom_max)
356 tf.summary.scalar(
'momentum', t_limited_mom)
358 self.
mon_dict[
'learning_rate'] = t_limited_lr
359 self.
mon_dict[
'momentum'] = t_limited_mom
361 with tf.name_scope(
'optimizer'):
362 self.
optimizers.append(tf.train.MomentumOptimizer(t_limited_lr, t_limited_mom))
363 self.
optimizers.append(tf.train.GradientDescentOptimizer(t_limited_lr))
374 t_epsilon = tf.constant(epsilon)
377 with tf.name_scope(
'cross_entropy'):
379 cross_entropy = -tf.reduce_mean(tf.reduce_sum((_y_ * tf.log(_y + t_epsilon) + (1 - _y_) *
380 tf.log(1 - _y + t_epsilon)), 1))
382 cross_entropy = -tf.reduce_mean(tf.reduce_sum((_y_ * tf.log(tf.clip_by_value(_y, epsilon, 1))) +
383 ((1 - _y_) * tf.log(tf.clip_by_value((1 - _y),
385 tf.summary.scalar(
'cross entropy', cross_entropy)
393 wd = [tf.constant(coeff) * tf.nn.l2_loss(w)
for coeff, w
in zip(wd_coeffs, weights)]
396 with tf.name_scope(
'loss_function'):
399 tf.summary.scalar(
'loss', loss)
403 sets monitoring params
410 t_epsilon = tf.constant(epsilon)
412 with tf.name_scope(
'mean_cross_entropy'):
414 mean_cross_entropy = -tf.reduce_mean(tf.reduce_sum((_y_ * tf.log(_y + t_epsilon) + (1 - _y_) *
415 tf.log(1 - _y + t_epsilon)), 1))
417 mean_cross_entropy = -tf.reduce_mean(tf.reduce_sum((_y_ * tf.log(tf.clip_by_value(_y, epsilon, 1))) +
418 ((1 - _y_) * tf.log(tf.clip_by_value((1 - _y),
420 tf.summary.scalar(
'mean cross entropy', mean_cross_entropy)
429 :param monitoring_params:
438 mon_param = monitoring_params[loss_idx]
443 if mon_param < self.
best_value * (1. - prop_dec):
481 if input_placeholders
is None:
482 with tf.name_scope(
'input'):
483 self.
x = tf.placeholder(tf.float32, shape=[
None, data_set.feature_number], name=
'x-input')
484 self.
y_ = tf.placeholder(tf.float32, shape=[
None, 1], name=
"y-input")
486 self.
weights = tf.placeholder(tf.float32, shape=[
None, 1], name=
"weight-input")
488 self.
x = input_placeholders[0]
489 self.
y_ = input_placeholders[1]
504 handling the training of the network model
507 def __init__(self, model, data_set, sess, log_dir=None, save_name=None, monitoring_size=100000,
508 input_placeholders=None):
510 class to train a predefined model
511 :param model: DefaultModel obj
512 :param data_set: TFData obj
513 :param sess: tensorflow.Session obj
514 :param log_dir: str, directory name of tensorboard loging
515 :param save_name: str, path and name for saving the weightfiles
516 :param monitoring_size: int, number of events of training fraction used for monitoring
517 :param input_placeholders: list of tf.placeholders, [features, targets]
521 self.
_time = time.time()
535 if input_placeholders
is None:
536 with tf.name_scope(
'input'):
538 self.
x = tf.placeholder(tf.float32, shape=[
None, data_set.feature_number], name=
'x-input')
541 self.
y_ = tf.placeholder(tf.float32, shape=[
None, 1], name=
"y-input")
544 self.
x = input_placeholders[0]
546 self.
y_ = input_placeholders[1]
548 self.
model.initialize(data_set, input_placeholders=[self.
x, self.
y_])
566 for label
in self.
model.monitoring_labels:
570 for label
in self.
model.mon_dict.keys():
575 if log_dir
is not None:
579 self.
saver = tf.train.Saver()
581 init_op = tf.global_variables_initializer()
583 self.
sess.run(init_op)
585 if save_name
is None:
586 time_str = time.strftime(
"%Y%m%d-%H%M%S")
588 self.
save_name = os.path.join(os.getcwd(),
'_'.join([self.
data_set.selection, time_str,
599 checking dataset sizes for evaluation
615 log_dir_train = os.path.join(log_dir,
'train')
616 log_dir_test = os.path.join(log_dir,
'test')
628 add to basf2 collection
630 tf.add_to_collection(
'x', self.
x)
631 tf.add_to_collection(
'y', self.
y_)
632 tf.add_to_collection(
'activation', self.
model.mlp.output)
633 tf.add_to_collection(
'cost', self.
model.loss)
634 tf.add_to_collection(
'optimizer', self.
minimizer)
638 save model only if a global minimum is reached on validation set
645 if self.
model.best_value == numpy.inf:
648 loss_idx = self.
model.monitoring_labels.index(label_name)
649 mon_param = monitoring_params[loss_idx]
651 if mon_param < self.
model.best_value:
662 with open(os.path.join(self.
log_dir,
'training_params.pkl'),
'wb')
as f:
678 batch_iter = self.
data_set.batch_iterator()
680 for i
in range(self.
data_set.batches):
681 batch = next(batch_iter)
682 feed = {self.
x: batch[0], self.
y_: batch[1]}
684 if i % int(.2 * self.
data_set.batches) == 0:
685 print(
'Epoch status: %1.2f' % (i / self.
data_set.batches))
702 feed_dict=valid_dict)
703 epoch_parameters_train = self.
sess.run(
710 feed_dict=valid_dict)
713 model_mon_params = []
714 for key, entry
in self.
model.mon_dict.items():
715 model_mon_params.append((key, self.
sess.run(entry)))
720 print(
"valid: %s: %1.5f" % (label, param))
721 for label, param
in zip(self.
model.monitoring_labels, epoch_parameters_train):
722 print(
"train: %s: %1.5f" % (label, param))
725 for val
in model_mon_params:
726 print(
"%s: %f" % val)
729 print(
'Epoch training time: %.1f' % (time.time() - self.
_time))
730 self.
_time = time.time()