Просмотр исходного кода

consistant parameter names

consistant parameter names in run_pretraining.py and optimization.py
Swetha Mandava 6 лет назад
Родитель
Сommit
281beab09d
1 измененных файлов с 5 добавлено и 5 удалено
  1. 5 5
      TensorFlow/LanguageModeling/BERT/optimization.py

+ 5 - 5
TensorFlow/LanguageModeling/BERT/optimization.py

@@ -22,7 +22,7 @@ import re
 import tensorflow as tf
 
 
-def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, hvd=None, use_fp16=False, amp=False):
+def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, hvd=None, manual_fp16=False, use_fp16=False):
   """Creates an optimizer training op."""
   global_step = tf.train.get_or_create_global_step()
 
@@ -72,7 +72,7 @@ def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, hvd=None,
   if hvd is not None:
     from horovod.tensorflow.compression import Compression
     optimizer = hvd.DistributedOptimizer(optimizer, sparse_as_dense=True, compression=Compression.none)
-  if use_fp16 or amp:
+  if manual_fp16 or use_fp16:
     loss_scale_manager = tf.contrib.mixed_precision.ExponentialUpdateLossScaleManager(init_loss_scale=2**32, incr_every_n_steps=1000, decr_every_n_nan_or_inf=2, decr_ratio=0.5)
     optimizer = tf.contrib.mixed_precision.LossScaleOptimizer(optimizer, loss_scale_manager)
 
@@ -80,7 +80,7 @@ def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, hvd=None,
   grads_and_vars = optimizer.compute_gradients(loss, tvars)
   grads_and_vars = [(g,v) for g,v in grads_and_vars if g is not None]
   grads, tvars = list(zip(*grads_and_vars))
-  all_are_finite = tf.reduce_all([tf.reduce_all(tf.is_finite(g)) for g in grads]) if use_fp16 or amp else tf.constant(True, dtype=tf.bool)
+  all_are_finite = tf.reduce_all([tf.reduce_all(tf.is_finite(g)) for g in grads]) if manual_fp16 or use_fp16 else tf.constant(True, dtype=tf.bool)
 
   # This is how the model was pre-trained.
   # ensure global norm is a finite number 
@@ -126,7 +126,7 @@ class AdamWeightDecayOptimizer(tf.train.Optimizer):
     self.exclude_from_weight_decay = exclude_from_weight_decay
 
   def apply_gradients(self, grads_and_vars, global_step=None, name=None,
-      use_fp16=False):
+      manual_fp16=False):
     """See base class."""
     assignments = []
     for (grad, param) in grads_and_vars:
@@ -134,7 +134,7 @@ class AdamWeightDecayOptimizer(tf.train.Optimizer):
         continue
 
       param_name = self._get_variable_name(param.name)
-      has_shadow = use_fp16 and param.dtype.base_dtype != tf.float32
+      has_shadow = manual_fp16 and param.dtype.base_dtype != tf.float32
       if has_shadow:
         # create shadow fp32 weights for fp16 variable
         param_fp32 = tf.get_variable(