|
|
@@ -72,7 +72,7 @@ flags.DEFINE_integer("num_train_steps", 100000, "Number of training steps.")
|
|
|
|
|
|
flags.DEFINE_integer("num_warmup_steps", 10000, "Number of warmup steps.")
|
|
|
|
|
|
-flags.DEFINE_integer("save_checkpoint_steps", 1000,
|
|
|
+flags.DEFINE_integer("save_checkpoints_steps", 1000,
|
|
|
"How often to save the model checkpoint.")
|
|
|
|
|
|
flags.DEFINE_integer("iterations_per_loop", 1000,
|
|
|
@@ -477,7 +477,7 @@ def main(_):
|
|
|
run_config = tf.estimator.RunConfig(
|
|
|
model_dir=FLAGS.output_dir,
|
|
|
session_config=config,
|
|
|
- save_checkpoints_steps=FLAGS.save_checkpoint_steps if not FLAGS.horovod or hvd.rank() == 0 else None,
|
|
|
+ save_checkpoints_steps=FLAGS.save_checkpoints_steps if not FLAGS.horovod or hvd.rank() == 0 else None,
|
|
|
# This variable controls how often estimator reports examples/sec.
|
|
|
# Default value is every 100 steps.
|
|
|
# When --report_loss is True, we set to very large value to prevent
|