7 лет назад · 3d90b6dbf0
--- a/TensorFlow/Detection/SSD/configs/ssd320_bench.config
+++ b/TensorFlow/Detection/SSD/configs/ssd320_bench.config
@@ -172,7 +172,7 @@ train_config: {
 
				 
			
 
				 train_input_reader: {
			
 
				   tf_record_input_reader {
			
 
				-    input_path: "/data/coco_train.record*"
			
 
				+    input_path: "/data/*train*"
			
 
				   }
			
 
				   label_map_path: "object_detection/data/mscoco_label_map.pbtxt"
			
 
				 }
			
@@ -185,7 +185,7 @@ eval_config: {
 
				 
			
 
				 eval_input_reader: {
			
 
				   tf_record_input_reader {
			
 
				-    input_path: "/data/coco_val.record*"
			
 
				+    input_path: "/data/*val*"
			
 
				   }
			
 
				   label_map_path: "object_detection/data/mscoco_label_map.pbtxt"
			
 
				   shuffle: false
			
--- a/TensorFlow/Detection/SSD/configs/ssd320_double_1gpus.config
+++ b/TensorFlow/Detection/SSD/configs/ssd320_double_1gpus.config
@@ -1,193 +0,0 @@
 
				-# SSD with Resnet 50 v1 FPN feature extractor, shared box predictor and focal
			
 
				-# loss (a.k.a Retinanet).
			
 
				-# See Lin et al, https://arxiv.org/abs/1708.02002
			
 
				-# Trained on COCO, initialized from Imagenet classification checkpoint
			
 
				-
			
 
				-model {
			
 
				-  ssd {
			
 
				-    inplace_batchnorm_update: true
			
 
				-    freeze_batchnorm: true
			
 
				-    num_classes: 90
			
 
				-    box_coder {
			
 
				-      faster_rcnn_box_coder {
			
 
				-        y_scale: 10.0
			
 
				-        x_scale: 10.0
			
 
				-        height_scale: 5.0
			
 
				-        width_scale: 5.0
			
 
				-      }
			
 
				-    }
			
 
				-    matcher {
			
 
				-      argmax_matcher {
			
 
				-        matched_threshold: 0.5
			
 
				-        unmatched_threshold: 0.5
			
 
				-        ignore_thresholds: false
			
 
				-        negatives_lower_than_unmatched: true
			
 
				-        force_match_for_each_row: true
			
 
				-        use_matmul_gather: true
			
 
				-      }
			
 
				-    }
			
 
				-    similarity_calculator {
			
 
				-      iou_similarity {
			
 
				-      }
			
 
				-    }
			
 
				-    encode_background_as_zeros: true
			
 
				-    anchor_generator {
			
 
				-      multiscale_anchor_generator {
			
 
				-        min_level: 3
			
 
				-        max_level: 7
			
 
				-        anchor_scale: 4.0
			
 
				-        aspect_ratios: [1.0, 2.0, 0.5]
			
 
				-        scales_per_octave: 2
			
 
				-      }
			
 
				-    }
			
 
				-    image_resizer {
			
 
				-      fixed_shape_resizer {
			
 
				-        height: 320
			
 
				-        width: 320
			
 
				-      }
			
 
				-    }
			
 
				-    box_predictor {
			
 
				-      weight_shared_convolutional_box_predictor {
			
 
				-        depth: 256
			
 
				-        class_prediction_bias_init: -4.6
			
 
				-        conv_hyperparams {
			
 
				-          activation: RELU_6,
			
 
				-          regularizer {
			
 
				-            l2_regularizer {
			
 
				-              weight: 0.0004
			
 
				-            }
			
 
				-          }
			
 
				-          initializer {
			
 
				-            random_normal_initializer {
			
 
				-              stddev: 0.01
			
 
				-              mean: 0.0
			
 
				-            }
			
 
				-          }
			
 
				-          batch_norm {
			
 
				-            scale: true,
			
 
				-            decay: 0.997,
			
 
				-            epsilon: 0.001,
			
 
				-          }
			
 
				-        }
			
 
				-        num_layers_before_predictor: 4
			
 
				-        kernel_size: 3
			
 
				-      }
			
 
				-    }
			
 
				-    feature_extractor {
			
 
				-      type: 'ssd_resnet50_v1_fpn'
			
 
				-      fpn {
			
 
				-        min_level: 3
			
 
				-        max_level: 7
			
 
				-      }
			
 
				-      min_depth: 16
			
 
				-      depth_multiplier: 1.0
			
 
				-      conv_hyperparams {
			
 
				-        activation: RELU_6,
			
 
				-        regularizer {
			
 
				-          l2_regularizer {
			
 
				-            weight: 0.0004
			
 
				-          }
			
 
				-        }
			
 
				-        initializer {
			
 
				-          truncated_normal_initializer {
			
 
				-            stddev: 0.03
			
 
				-            mean: 0.0
			
 
				-          }
			
 
				-        }
			
 
				-        batch_norm {
			
 
				-          scale: true,
			
 
				-          decay: 0.997,
			
 
				-          epsilon: 0.001,
			
 
				-        }
			
 
				-      }
			
 
				-      override_base_feature_extractor_hyperparams: true
			
 
				-    }
			
 
				-    loss {
			
 
				-      classification_loss {
			
 
				-        weighted_sigmoid_focal {
			
 
				-          alpha: 0.25
			
 
				-          gamma: 2.0
			
 
				-        }
			
 
				-      }
			
 
				-      localization_loss {
			
 
				-        weighted_smooth_l1 {
			
 
				-        }
			
 
				-      }
			
 
				-      classification_weight: 1.0
			
 
				-      localization_weight: 1.0
			
 
				-    }
			
 
				-    normalize_loss_by_num_matches: true
			
 
				-    normalize_loc_loss_by_codesize: true
			
 
				-    post_processing {
			
 
				-      batch_non_max_suppression {
			
 
				-        score_threshold: 1e-8
			
 
				-        iou_threshold: 0.6
			
 
				-        max_detections_per_class: 100
			
 
				-        max_total_detections: 100
			
 
				-      }
			
 
				-      score_converter: SIGMOID
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-train_config: {
			
 
				-  fine_tune_checkpoint: "/checkpoints/resnet_v1_50/model.ckpt"
			
 
				-  fine_tune_checkpoint_type: "classification"
			
 
				-  batch_size: 32 
			
 
				-  sync_replicas: true
			
 
				-  startup_delay_steps: 0
			
 
				-  replicas_to_aggregate: 8
			
 
				-  num_steps: 200000
			
 
				-  data_augmentation_options {
			
 
				-    random_horizontal_flip {
			
 
				-    }
			
 
				-  }
			
 
				-  data_augmentation_options {
			
 
				-    random_crop_image {
			
 
				-      min_object_covered: 0.0
			
 
				-      min_aspect_ratio: 0.75
			
 
				-      max_aspect_ratio: 3.0
			
 
				-      min_area: 0.75
			
 
				-      max_area: 1.0
			
 
				-      overlap_thresh: 0.0
			
 
				-    }
			
 
				-  }
			
 
				-  optimizer {
			
 
				-    momentum_optimizer: {
			
 
				-      learning_rate: {
			
 
				-        cosine_decay_learning_rate {
			
 
				-          learning_rate_base: .02000000000000000000
			
 
				-          total_steps: 200000
			
 
				-          warmup_learning_rate: .00866640000000000000
			
 
				-          warmup_steps: 8000
			
 
				-        }
			
 
				-      }
			
 
				-      momentum_optimizer_value: 0.9
			
 
				-    }
			
 
				-    use_moving_average: false
			
 
				-  }
			
 
				-  max_number_of_boxes: 100
			
 
				-  unpad_groundtruth_tensors: false
			
 
				-}
			
 
				-
			
 
				-train_input_reader: {
			
 
				-  tf_record_input_reader {
			
 
				-    input_path: "/data/coco_train.record*"
			
 
				-  }
			
 
				-  label_map_path: "object_detection/data/mscoco_label_map.pbtxt"
			
 
				-}
			
 
				-
			
 
				-eval_config: {
			
 
				-  metrics_set: "coco_detection_metrics"
			
 
				-  use_moving_averages: false
			
 
				-  num_examples: 8000
			
 
				-}
			
 
				-
			
 
				-eval_input_reader: {
			
 
				-  tf_record_input_reader {
			
 
				-    input_path: "/data/coco_val.record*"
			
 
				-  }
			
 
				-  label_map_path: "object_detection/data/mscoco_label_map.pbtxt"
			
 
				-  shuffle: false
			
 
				-  num_readers: 1
			
 
				-}
			
--- a/TensorFlow/Detection/SSD/configs/ssd320_double_4gpus.config
+++ b/TensorFlow/Detection/SSD/configs/ssd320_double_4gpus.config
@@ -1,193 +0,0 @@
 
				-# SSD with Resnet 50 v1 FPN feature extractor, shared box predictor and focal
			
 
				-# loss (a.k.a Retinanet).
			
 
				-# See Lin et al, https://arxiv.org/abs/1708.02002
			
 
				-# Trained on COCO, initialized from Imagenet classification checkpoint
			
 
				-
			
 
				-model {
			
 
				-  ssd {
			
 
				-    inplace_batchnorm_update: true
			
 
				-    freeze_batchnorm: true
			
 
				-    num_classes: 90
			
 
				-    box_coder {
			
 
				-      faster_rcnn_box_coder {
			
 
				-        y_scale: 10.0
			
 
				-        x_scale: 10.0
			
 
				-        height_scale: 5.0
			
 
				-        width_scale: 5.0
			
 
				-      }
			
 
				-    }
			
 
				-    matcher {
			
 
				-      argmax_matcher {
			
 
				-        matched_threshold: 0.5
			
 
				-        unmatched_threshold: 0.5
			
 
				-        ignore_thresholds: false
			
 
				-        negatives_lower_than_unmatched: true
			
 
				-        force_match_for_each_row: true
			
 
				-        use_matmul_gather: true
			
 
				-      }
			
 
				-    }
			
 
				-    similarity_calculator {
			
 
				-      iou_similarity {
			
 
				-      }
			
 
				-    }
			
 
				-    encode_background_as_zeros: true
			
 
				-    anchor_generator {
			
 
				-      multiscale_anchor_generator {
			
 
				-        min_level: 3
			
 
				-        max_level: 7
			
 
				-        anchor_scale: 4.0
			
 
				-        aspect_ratios: [1.0, 2.0, 0.5]
			
 
				-        scales_per_octave: 2
			
 
				-      }
			
 
				-    }
			
 
				-    image_resizer {
			
 
				-      fixed_shape_resizer {
			
 
				-        height: 320
			
 
				-        width: 320
			
 
				-      }
			
 
				-    }
			
 
				-    box_predictor {
			
 
				-      weight_shared_convolutional_box_predictor {
			
 
				-        depth: 256
			
 
				-        class_prediction_bias_init: -4.6
			
 
				-        conv_hyperparams {
			
 
				-          activation: RELU_6,
			
 
				-          regularizer {
			
 
				-            l2_regularizer {
			
 
				-              weight: 0.0004
			
 
				-            }
			
 
				-          }
			
 
				-          initializer {
			
 
				-            random_normal_initializer {
			
 
				-              stddev: 0.01
			
 
				-              mean: 0.0
			
 
				-            }
			
 
				-          }
			
 
				-          batch_norm {
			
 
				-            scale: true,
			
 
				-            decay: 0.997,
			
 
				-            epsilon: 0.001,
			
 
				-          }
			
 
				-        }
			
 
				-        num_layers_before_predictor: 4
			
 
				-        kernel_size: 3
			
 
				-      }
			
 
				-    }
			
 
				-    feature_extractor {
			
 
				-      type: 'ssd_resnet50_v1_fpn'
			
 
				-      fpn {
			
 
				-        min_level: 3
			
 
				-        max_level: 7
			
 
				-      }
			
 
				-      min_depth: 16
			
 
				-      depth_multiplier: 1.0
			
 
				-      conv_hyperparams {
			
 
				-        activation: RELU_6,
			
 
				-        regularizer {
			
 
				-          l2_regularizer {
			
 
				-            weight: 0.0004
			
 
				-          }
			
 
				-        }
			
 
				-        initializer {
			
 
				-          truncated_normal_initializer {
			
 
				-            stddev: 0.03
			
 
				-            mean: 0.0
			
 
				-          }
			
 
				-        }
			
 
				-        batch_norm {
			
 
				-          scale: true,
			
 
				-          decay: 0.997,
			
 
				-          epsilon: 0.001,
			
 
				-        }
			
 
				-      }
			
 
				-      override_base_feature_extractor_hyperparams: true
			
 
				-    }
			
 
				-    loss {
			
 
				-      classification_loss {
			
 
				-        weighted_sigmoid_focal {
			
 
				-          alpha: 0.25
			
 
				-          gamma: 2.0
			
 
				-        }
			
 
				-      }
			
 
				-      localization_loss {
			
 
				-        weighted_smooth_l1 {
			
 
				-        }
			
 
				-      }
			
 
				-      classification_weight: 1.0
			
 
				-      localization_weight: 1.0
			
 
				-    }
			
 
				-    normalize_loss_by_num_matches: true
			
 
				-    normalize_loc_loss_by_codesize: true
			
 
				-    post_processing {
			
 
				-      batch_non_max_suppression {
			
 
				-        score_threshold: 1e-8
			
 
				-        iou_threshold: 0.6
			
 
				-        max_detections_per_class: 100
			
 
				-        max_total_detections: 100
			
 
				-      }
			
 
				-      score_converter: SIGMOID
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-train_config: {
			
 
				-  fine_tune_checkpoint: "/checkpoints/resnet_v1_50/model.ckpt"
			
 
				-  fine_tune_checkpoint_type: "classification"
			
 
				-  batch_size: 32 
			
 
				-  sync_replicas: true
			
 
				-  startup_delay_steps: 0
			
 
				-  replicas_to_aggregate: 8
			
 
				-  num_steps: 50000
			
 
				-  data_augmentation_options {
			
 
				-    random_horizontal_flip {
			
 
				-    }
			
 
				-  }
			
 
				-  data_augmentation_options {
			
 
				-    random_crop_image {
			
 
				-      min_object_covered: 0.0
			
 
				-      min_aspect_ratio: 0.75
			
 
				-      max_aspect_ratio: 3.0
			
 
				-      min_area: 0.75
			
 
				-      max_area: 1.0
			
 
				-      overlap_thresh: 0.0
			
 
				-    }
			
 
				-  }
			
 
				-  optimizer {
			
 
				-    momentum_optimizer: {
			
 
				-      learning_rate: {
			
 
				-        cosine_decay_learning_rate {
			
 
				-          learning_rate_base: .08000000000000000000
			
 
				-          total_steps: 50000
			
 
				-          warmup_learning_rate: .03466560000000000000
			
 
				-          warmup_steps: 2000
			
 
				-        }
			
 
				-      }
			
 
				-      momentum_optimizer_value: 0.9
			
 
				-    }
			
 
				-    use_moving_average: false
			
 
				-  }
			
 
				-  max_number_of_boxes: 100
			
 
				-  unpad_groundtruth_tensors: false
			
 
				-}
			
 
				-
			
 
				-train_input_reader: {
			
 
				-  tf_record_input_reader {
			
 
				-    input_path: "/data/coco_train.record*"
			
 
				-  }
			
 
				-  label_map_path: "object_detection/data/mscoco_label_map.pbtxt"
			
 
				-}
			
 
				-
			
 
				-eval_config: {
			
 
				-  metrics_set: "coco_detection_metrics"
			
 
				-  use_moving_averages: false
			
 
				-  num_examples: 8000
			
 
				-}
			
 
				-
			
 
				-eval_input_reader: {
			
 
				-  tf_record_input_reader {
			
 
				-    input_path: "/data/coco_val.record*"
			
 
				-  }
			
 
				-  label_map_path: "object_detection/data/mscoco_label_map.pbtxt"
			
 
				-  shuffle: false
			
 
				-  num_readers: 1
			
 
				-}
			
--- a/TensorFlow/Detection/SSD/configs/ssd320_double_8gpus.config
+++ b/TensorFlow/Detection/SSD/configs/ssd320_double_8gpus.config
@@ -1,193 +0,0 @@
 
				-# SSD with Resnet 50 v1 FPN feature extractor, shared box predictor and focal
			
 
				-# loss (a.k.a Retinanet).
			
 
				-# See Lin et al, https://arxiv.org/abs/1708.02002
			
 
				-# Trained on COCO, initialized from Imagenet classification checkpoint
			
 
				-
			
 
				-model {
			
 
				-  ssd {
			
 
				-    inplace_batchnorm_update: true
			
 
				-    freeze_batchnorm: true
			
 
				-    num_classes: 90
			
 
				-    box_coder {
			
 
				-      faster_rcnn_box_coder {
			
 
				-        y_scale: 10.0
			
 
				-        x_scale: 10.0
			
 
				-        height_scale: 5.0
			
 
				-        width_scale: 5.0
			
 
				-      }
			
 
				-    }
			
 
				-    matcher {
			
 
				-      argmax_matcher {
			
 
				-        matched_threshold: 0.5
			
 
				-        unmatched_threshold: 0.5
			
 
				-        ignore_thresholds: false
			
 
				-        negatives_lower_than_unmatched: true
			
 
				-        force_match_for_each_row: true
			
 
				-        use_matmul_gather: true
			
 
				-      }
			
 
				-    }
			
 
				-    similarity_calculator {
			
 
				-      iou_similarity {
			
 
				-      }
			
 
				-    }
			
 
				-    encode_background_as_zeros: true
			
 
				-    anchor_generator {
			
 
				-      multiscale_anchor_generator {
			
 
				-        min_level: 3
			
 
				-        max_level: 7
			
 
				-        anchor_scale: 4.0
			
 
				-        aspect_ratios: [1.0, 2.0, 0.5]
			
 
				-        scales_per_octave: 2
			
 
				-      }
			
 
				-    }
			
 
				-    image_resizer {
			
 
				-      fixed_shape_resizer {
			
 
				-        height: 320
			
 
				-        width: 320
			
 
				-      }
			
 
				-    }
			
 
				-    box_predictor {
			
 
				-      weight_shared_convolutional_box_predictor {
			
 
				-        depth: 256
			
 
				-        class_prediction_bias_init: -4.6
			
 
				-        conv_hyperparams {
			
 
				-          activation: RELU_6,
			
 
				-          regularizer {
			
 
				-            l2_regularizer {
			
 
				-              weight: 0.0004
			
 
				-            }
			
 
				-          }
			
 
				-          initializer {
			
 
				-            random_normal_initializer {
			
 
				-              stddev: 0.01
			
 
				-              mean: 0.0
			
 
				-            }
			
 
				-          }
			
 
				-          batch_norm {
			
 
				-            scale: true,
			
 
				-            decay: 0.997,
			
 
				-            epsilon: 0.001,
			
 
				-          }
			
 
				-        }
			
 
				-        num_layers_before_predictor: 4
			
 
				-        kernel_size: 3
			
 
				-      }
			
 
				-    }
			
 
				-    feature_extractor {
			
 
				-      type: 'ssd_resnet50_v1_fpn'
			
 
				-      fpn {
			
 
				-        min_level: 3
			
 
				-        max_level: 7
			
 
				-      }
			
 
				-      min_depth: 16
			
 
				-      depth_multiplier: 1.0
			
 
				-      conv_hyperparams {
			
 
				-        activation: RELU_6,
			
 
				-        regularizer {
			
 
				-          l2_regularizer {
			
 
				-            weight: 0.0004
			
 
				-          }
			
 
				-        }
			
 
				-        initializer {
			
 
				-          truncated_normal_initializer {
			
 
				-            stddev: 0.03
			
 
				-            mean: 0.0
			
 
				-          }
			
 
				-        }
			
 
				-        batch_norm {
			
 
				-          scale: true,
			
 
				-          decay: 0.997,
			
 
				-          epsilon: 0.001,
			
 
				-        }
			
 
				-      }
			
 
				-      override_base_feature_extractor_hyperparams: true
			
 
				-    }
			
 
				-    loss {
			
 
				-      classification_loss {
			
 
				-        weighted_sigmoid_focal {
			
 
				-          alpha: 0.25
			
 
				-          gamma: 2.0
			
 
				-        }
			
 
				-      }
			
 
				-      localization_loss {
			
 
				-        weighted_smooth_l1 {
			
 
				-        }
			
 
				-      }
			
 
				-      classification_weight: 1.0
			
 
				-      localization_weight: 1.0
			
 
				-    }
			
 
				-    normalize_loss_by_num_matches: true
			
 
				-    normalize_loc_loss_by_codesize: true
			
 
				-    post_processing {
			
 
				-      batch_non_max_suppression {
			
 
				-        score_threshold: 1e-8
			
 
				-        iou_threshold: 0.6
			
 
				-        max_detections_per_class: 100
			
 
				-        max_total_detections: 100
			
 
				-      }
			
 
				-      score_converter: SIGMOID
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-train_config: {
			
 
				-  fine_tune_checkpoint: "/checkpoints/resnet_v1_50/model.ckpt"
			
 
				-  fine_tune_checkpoint_type: "classification"
			
 
				-  batch_size: 32 
			
 
				-  sync_replicas: true
			
 
				-  startup_delay_steps: 0
			
 
				-  replicas_to_aggregate: 8
			
 
				-  num_steps: 25000
			
 
				-  data_augmentation_options {
			
 
				-    random_horizontal_flip {
			
 
				-    }
			
 
				-  }
			
 
				-  data_augmentation_options {
			
 
				-    random_crop_image {
			
 
				-      min_object_covered: 0.0
			
 
				-      min_aspect_ratio: 0.75
			
 
				-      max_aspect_ratio: 3.0
			
 
				-      min_area: 0.75
			
 
				-      max_area: 1.0
			
 
				-      overlap_thresh: 0.0
			
 
				-    }
			
 
				-  }
			
 
				-  optimizer {
			
 
				-    momentum_optimizer: {
			
 
				-      learning_rate: {
			
 
				-        cosine_decay_learning_rate {
			
 
				-          learning_rate_base: .16000000000000000000
			
 
				-          total_steps: 25000
			
 
				-          warmup_learning_rate: .06933120000000000000
			
 
				-          warmup_steps: 1000
			
 
				-        }
			
 
				-      }
			
 
				-      momentum_optimizer_value: 0.9
			
 
				-    }
			
 
				-    use_moving_average: false
			
 
				-  }
			
 
				-  max_number_of_boxes: 100
			
 
				-  unpad_groundtruth_tensors: false
			
 
				-}
			
 
				-
			
 
				-train_input_reader: {
			
 
				-  tf_record_input_reader {
			
 
				-    input_path: "/data/coco_train.record*"
			
 
				-  }
			
 
				-  label_map_path: "object_detection/data/mscoco_label_map.pbtxt"
			
 
				-}
			
 
				-
			
 
				-eval_config: {
			
 
				-  metrics_set: "coco_detection_metrics"
			
 
				-  use_moving_averages: false
			
 
				-  num_examples: 8000
			
 
				-}
			
 
				-
			
 
				-eval_input_reader: {
			
 
				-  tf_record_input_reader {
			
 
				-    input_path: "/data/coco_val.record*"
			
 
				-  }
			
 
				-  label_map_path: "object_detection/data/mscoco_label_map.pbtxt"
			
 
				-  shuffle: false
			
 
				-  num_readers: 1
			
 
				-}
			
--- a/TensorFlow/Detection/SSD/configs/ssd320_full_1gpus.config
+++ b/TensorFlow/Detection/SSD/configs/ssd320_full_1gpus.config
@@ -172,7 +172,7 @@ train_config: {
 
				 
			
 
				 train_input_reader: {
			
 
				   tf_record_input_reader {
			
 
				-    input_path: "/data/coco_train.record*"
			
 
				+    input_path: "/data/*train*"
			
 
				   }
			
 
				   label_map_path: "object_detection/data/mscoco_label_map.pbtxt"
			
 
				 }
			
@@ -185,7 +185,7 @@ eval_config: {
 
				 
			
 
				 eval_input_reader: {
			
 
				   tf_record_input_reader {
			
 
				-    input_path: "/data/coco_val.record*"
			
 
				+    input_path: "/data/*val*"
			
 
				   }
			
 
				   label_map_path: "object_detection/data/mscoco_label_map.pbtxt"
			
 
				   shuffle: false
			
--- a/TensorFlow/Detection/SSD/configs/ssd320_full_4gpus.config
+++ b/TensorFlow/Detection/SSD/configs/ssd320_full_4gpus.config
@@ -172,7 +172,7 @@ train_config: {
 
				 
			
 
				 train_input_reader: {
			
 
				   tf_record_input_reader {
			
 
				-    input_path: "/data/coco_train.record*"
			
 
				+    input_path: "/data/*train*"
			
 
				   }
			
 
				   label_map_path: "object_detection/data/mscoco_label_map.pbtxt"
			
 
				 }
			
@@ -185,7 +185,7 @@ eval_config: {
 
				 
			
 
				 eval_input_reader: {
			
 
				   tf_record_input_reader {
			
 
				-    input_path: "/data/coco_val.record*"
			
 
				+    input_path: "/data/*val*"
			
 
				   }
			
 
				   label_map_path: "object_detection/data/mscoco_label_map.pbtxt"
			
 
				   shuffle: false
			
--- a/TensorFlow/Detection/SSD/configs/ssd320_full_8gpus.config
+++ b/TensorFlow/Detection/SSD/configs/ssd320_full_8gpus.config
@@ -172,7 +172,7 @@ train_config: {
 
				 
			
 
				 train_input_reader: {
			
 
				   tf_record_input_reader {
			
 
				-    input_path: "/data/coco_train.record*"
			
 
				+    input_path: "/data/*train*"
			
 
				   }
			
 
				   label_map_path: "object_detection/data/mscoco_label_map.pbtxt"
			
 
				 }
			
@@ -185,7 +185,7 @@ eval_config: {
 
				 
			
 
				 eval_input_reader: {
			
 
				   tf_record_input_reader {
			
 
				-    input_path: "/data/coco_val.record*"
			
 
				+    input_path: "/data/*val*"
			
 
				   }
			
 
				   label_map_path: "object_detection/data/mscoco_label_map.pbtxt"
			
 
				   shuffle: false
			
--- a/TensorFlow/LanguageModeling/BERT/NOTICE
+++ b/TensorFlow/LanguageModeling/BERT/NOTICE
@@ -0,0 +1,4 @@
 
				+BERT TensorFlow
			
 
				+
			
 
				+This repository includes software from https://github.com/google-research/bert
			
 
				+licensed under the Apache License, Version 2.0 (the "License")
			
--- a/TensorFlow/LanguageModeling/BERT/README.md
+++ b/TensorFlow/LanguageModeling/BERT/README.md
@@ -24,13 +24,13 @@ This repository provides a script and recipe to train BERT to achieve state of t
 
				   * [Training accuracy results](#training-accuracy-results)
			
 
				   * [Training stability test](#training-stability-test)
			
 
				   * [Training performance results](#training-performance-results)
			
 
				-      * [NVIDIA DGX-1 (8x V100 16G)](#nvidia-dgx-1-8x-v100-16g)
			
 
				-      * [NVIDIA DGX-1 (8x V100 32G)](#nvidia-dgx-1-8x-v100-32g)
			
 
				-      * [NVIDIA DGX-2 (16x V100 32G)](#nvidia-dgx-2-16x-v100-32g)
			
 
				+  * [NVIDIA DGX-1 (8x V100 16G)](#nvidia-dgx-1-8x-v100-16g)
			
 
				+  * [NVIDIA DGX-1 (8x V100 32G)](#nvidia-dgx-1-8x-v100-32g)
			
 
				+  * [NVIDIA DGX-2 (16x V100 32G)](#nvidia-dgx-1-16x-v100-32g)
			
 
				   * [Inference performance results](#inference-performance-results)
			
 
				-      * [NVIDIA DGX-1 16G (1x V100 16G)](#nvidia-dgx-1-16g-1x-v100-16g)
			
 
				-      * [NVIDIA DGX-1 32G (1x V100 32G)](#nvidia-dgx-1-32g-1x-v100-32g)
			
 
				-      * [NVIDIA DGX-2 32G (1x V100 32G)](#nvidia-dgx-2-32g-1x-v100-32g)
			
 
				+  * [NVIDIA DGX-1 16G (1x V100 16G)](#nvidia-dgx-1-16g-1x-v100-16g)
			
 
				+  * [NVIDIA DGX-1 32G (1x V100 32G)](#nvidia-dgx-1-32g-1x-v100-32g)
			
 
				+  * [NVIDIA DGX-2 32G (1x V100 32G)](#nvidia-dgx-1-32g-1x-v100-32g)
			
 
				 * [Changelog](#changelog)
			
 
				 * [Known issues](#known-issues)
			
 
				 
			
@@ -120,7 +120,7 @@ After you build the container image and download the data, you can start an inte
 
				 bash scripts/docker/launch.sh
			
 
				 ```
			
 
				 
			
 
				-The `launch.sh` script assumes that the datasets are in the following locations by default after downloading data. 
			
 
				+The `interactive.sh` script assumes that the datasets are in the following locations by default after downloading data. 
			
 
				 - SQuaD v1.1 - `data/squad/v1.1`
			
 
				 - BERT - `data/pretrained_models_google/uncased_L-24_H-1024_A-16`
			
 
				 - Wikipedia - `data/wikipedia_corpus/final_tfrecords_sharded`
			
@@ -194,9 +194,9 @@ Aside from options to set hyperparameters, the relevant options to control the b
 
				   --[no]amp: Whether to enable AMP ops.(default: 'false')
			
 
				   --[no]amp_fastmath: Whether to enable AMP fasthmath ops.(default: 'false')
			
 
				   --bert_config_file: The config json file corresponding to the pre-trained BERT model. This specifies the model architecture.
			
 
				-  --[no]do_eval: Whether to run evaluation on the dev set.(default: 'false')
			
 
				+  --[no]do_eval: Whether to run eval on the dev set.(default: 'false')
			
 
				   --[no]do_train: Whether to run training.(default: 'false')
			
 
				-  --eval_batch_size: Total batch size for evaluation.(default: '8')(an integer)
			
 
				+  --eval_batch_size: Total batch size for eval.(default: '8')(an integer)
			
 
				   --[no]fastmath: Whether to enable loss scaler for fasthmath ops.(default: 'false')
			
 
				   --[no]horovod: Whether to use Horovod for multi-gpu runs(default: 'false')
			
 
				   --init_checkpoint: Initial checkpoint (usually from a pre-trained BERT model).
			
@@ -207,7 +207,7 @@ Aside from options to set hyperparameters, the relevant options to control the b
 
				 Aside from options to set hyperparameters, some relevant options to control the behaviour of the run_squad.py script are: 
			
 
				 ```bash
			
 
				   --bert_config_file: The config json file corresponding to the pre-trained BERT model. This specifies the model architecture.
			
 
				-  --[no]do_predict: Whether to run evaluation on the dev set. (default: 'false')
			
 
				+  --[no]do_predict: Whether to run eval on the dev set. (default: 'false')
			
 
				   --[no]do_train: Whether to run training. (default: 'false')
			
 
				   --learning_rate: The initial learning rate for Adam.(default: '5e-06')(a number)
			
 
				   --max_answer_length: The maximum length of an answer that can be generated. This is needed because the start and end predictions are not conditioned on one another.(default: '30')(an integer)
			
@@ -234,13 +234,15 @@ Pre-training is performed using the `run_pretraining.py` script along with param
 
				 
			
 
				 
			
 
				 The `run_pretraining.sh` script runs a job on a single node  that trains the BERT-large model from scratch using the Wikipedia and Book corpus datasets as training data. By default, the training script:
			
 
				-- Runs on 8 GPUs with training batch size of 14 and evaluation batch size of 8 per GPU.
			
 
				-- Has FP16 precision enabled.
			
 
				-- Is XLA enabled.
			
 
				-- Trains with default learning rate of 1e-4 for 1144000 steps with 10000 warm-up steps.
			
 
				-- Saves a checkpoint every 5000 iterations.
			
 
				-- Creates a log file containing all the output.
			
 
				-- Evaluates the model at the end of training. To skip evaluation, modify `--do_eval` to `False`.
			
 
				+- Assumes training batch size of 14
			
 
				+- Assumes evaluation batch size of 8
			
 
				+- Assumes learning rate of 1e-4
			
 
				+- Assumes precision of fp16_xla (fp16 math JIT compiled with XLA)
			
 
				+- Assumes you want to run on 8 GPUs
			
 
				+- Assumes 10,000 warmup steps
			
 
				+- Assumes 1144000 training steps
			
 
				+- Assumes checkpoints should be saved every 5000 steps
			
 
				+- Assumes you do want to create a log file for all the output
			
 
				 
			
 
				 These parameters will train Wikipedia + BooksCorpus to reasonable accuracy on a DGX1 with 32GB V100 cards. If you want to match google’s best results from the BERT paper, you should either train for twice as many steps (2,288,000 steps) on a DGX1, or train on 16 GPUs on a DGX2. The DGX2 having 16 GPUs will be able to fit a batch size twice as large as a DGX1 (224 vs 112), hence the DGX2 can finish in half as many steps. 
			
 
				 
			
@@ -251,7 +253,7 @@ run_pretraining.sh <node_type> <training_batch_size> <eval_batch_size> <learning
 
				 ```
			
 
				 
			
 
				 Where:
			
 
				-- <training_batch_size> per-gpu batch size used for training. Batch size varies with <precision>, larger batch sizes run more efficiently, but require more memory.
			
 
				+- <training_batch_size> Batch size varies with <precision>, larger batch sizes run more efficiently, but require more memory.
			
 
				 
			
 
				 - <eval_batch_size> per-gpu batch size used for evaluation after training.<learning_rate> Default rate of 1e-4 is good for global batch size 256.
			
 
				 
			
@@ -295,16 +297,16 @@ Trains BERT-large from scratch on a single DGX-2 using FP16 arithmetic. This wil
 
				 Fine tuning is performed using the `run_squad.py` script along with parameters defined in `scripts/run_squad.sh`.
			
 
				 
			
 
				 The `run_squad.sh` script trains a model and performs evaluation on the SQuaD v1.1 dataset. By default, the training script: 
			
 
				-- Uses 8 GPUs and batch size of 10 on each GPU.
			
 
				-- Has FP16 precision enabled.
			
 
				-- Is XLA enabled.
			
 
				-- Runs for 2 epochs.
			
 
				+- Uses 8 GPUs and batch size of 10 on each GPU
			
 
				+- Has FP16 precision enabled
			
 
				+- Is XLA enabled
			
 
				+- Runs for 2 epochs
			
 
				 - Saves a checkpoint every 1000 iterations (keeps only the latest checkpoint) and at the end of training. All checkpoints, evaluation results and training logs are saved to the `/results` directory (in the container which can be mounted to a local directory).
			
 
				-- Evaluation is done at the end of training. To skip evaluation, modify `--do_predict` to `False`.
			
 
				+- Evaluation is done at the end of training. To skip eval, modify `--do_predict` to `False`.
			
 
				 
			
 
				 This script outputs checkpoints to the `/results` directory, by default, inside the container. Mount point of `/results` can be changed in the `scripts/docker/launch.sh` file. The training log contains information about:
			
 
				-- Loss for the final step
			
 
				-- Training and evaluation performance
			
 
				+- Loss for final step
			
 
				+- Train and eval performance
			
 
				 - F1 and exact match score on the Dev Set of SQuaD after evaluation. 
			
 
				 
			
 
				 The summary after training is printed in the following format:
			
@@ -345,12 +347,12 @@ Inference on a fine tuned Question Answering system is performed using the `run_
 
				 The `run_squad_inference.sh` script trains a model and performs evaluation on the SQuaD v1.1 dataset. By default, the inferencing script: 
			
 
				 - Has FP16 precision enabled
			
 
				 - Is XLA enabled
			
 
				-- Evaluates the latest checkpoint present in `/results` with a batch size of 8
			
 
				+- Does eval on latest checkpoint present in `/results` with a batch size of 8
			
 
				 
			
 
				 This script outputs predictions file to `/results/predictions.json` and computes F1 score and exact match score using SQuaD's `evaluate-v1.1.py`. Mount point of `/results` can be changed in the `scripts/docker/launch.sh` file. 
			
 
				 
			
 
				 The output log contains information about:
			
 
				-- Evaluation performance
			
 
				+- Eval performance
			
 
				 - F1 and exact match score on the Dev Set of SQuaD after evaluation. 
			
 
				 
			
 
				 The summary after inference is printed in the following format:
			
@@ -410,14 +412,14 @@ Our results were obtained by running batch sizes up to 3x GPUs on a 16GB V100 an
 
				 Our results were obtained by running the `scripts/run_squad.sh` training script in the TensorFlow 19.03-py3 NGC container on NVIDIA DGX-1 with 8x V100 16G GPUs. Performance numbers (in tokens per second) were averaged over an entire training epoch.
			
 
				 
			
 
				 
			
 
				-| **Number of GPUs** | **Batch size per GPU** | **FP32 sentences/sec** | **FP16 sentences/sec** | **Speed-up with mixed precision** | **Multi-gpu weak scaling with FP32** | **Multi-gpu weak scaling with FP16** |
			
 
				+| **Number of GPUs** | **Batch size per GPU** | **FP 32 sentences/sec** | **FP16 sentences/sec** | **Speed-up with mixed precision** | **Multi-gpu weak scaling with FP32** | **Multi-gpu weak scaling with FP16** |
			
 
				 |:---:|:---:|:------:|:-----:|:----:|:----:|:----:|
			
 
				 | 1 | 2 | 7.41 |11.86|1.6 |1.0 |1.0 |
			
 
				 | 4 | 2 |23.699|35.34|1.49|3.2 |2.98|
			
 
				 | 8 | 2 |44.29 |64.96|1.47|5.98|5.48|
			
 
				 
			
 
				 
			
 
				-| **Number of GPUs** | **Batch size per GPU** | **FP32 sentences/sec** | **FP16 sentences/sec** | **Speed-up with mixed precision** | **Multi-gpu weak scaling with FP32** | **Multi-gpu weak scaling with FP16** |
			
 
				+| **Number of GPUs** | **Batch size per GPU** | **FP 32 sentences/sec** | **FP16 sentences/sec** | **Speed-up with mixed precision** | **Multi-gpu weak scaling with FP32** | **Multi-gpu weak scaling with FP16** |
			
 
				 |:---:|:---:|:-----:|:-----:|:---:|:---:|:----:|
			
 
				 | 1 | 3 |  -  |14.86| - | - |1.0 |
			
 
				 | 4 | 3 |  -  |44.17| - | - |2.97|
			
@@ -431,14 +433,14 @@ To achieve these same results, follow the [Quick Start Guide](#quick-start-guide
 
				 Our results were obtained by running the `scripts/run_squad.sh` training script in the TensorFlow 19.03-py3 NGC container on NVIDIA DGX-1 with 8x V100 32G GPUs. Performance numbers (in sentences per second) were averaged over an entire training epochs.
			
 
				 
			
 
				 
			
 
				-| **Number of GPUs** | **Batch size per GPU** | **FP32 sentences/sec** | **FP16 sentences/sec** | **Speed-up with mixed precision** | **Multi-gpu weak scaling with FP32** | **Multi-gpu weak scaling with FP16** |
			
 
				+| **Number of GPUs** | **Batch size per GPU** | **FP 32 sentences/sec** | **FP16 sentences/sec** | **Speed-up with mixed precision** | **Multi-gpu weak scaling with FP32** | **Multi-gpu weak scaling with FP16** |
			
 
				 |---|---|-----|-----|----|----|----|
			
 
				 | 1 | 4 | 8.55|18.14|2.12|1.0 |1.0 |
			
 
				 | 4 | 4 |32.13|52.85|1.64|3.76|2.91|
			
 
				 | 8 | 4 |62.83|95.28|1.51|7.35|5.25|
			
 
				 
			
 
				 
			
 
				-| **Number of GPUs** | **Batch size per GPU** | **FP32 sentences/sec** | **FP16 sentences/sec** | **Speed-up with mixed precision** | **Multi-gpu weak scaling with FP32** | **Multi-gpu weak scaling with FP16** |
			
 
				+| **Number of GPUs** | **Batch size per GPU** | **FP 32 sentences/sec** | **FP16 sentences/sec** | **Speed-up with mixed precision** | **Multi-gpu weak scaling with FP32** | **Multi-gpu weak scaling with FP16** |
			
 
				 |---|---|-----|-------|---|---|----|
			
 
				 | 1 | 10|  -  | 27.69 | - | - |1.0 |
			
 
				 | 4 | 10|  -  | 85.193| - | - |3.07|
			
@@ -453,7 +455,7 @@ To achieve these same results, follow the [Quick Start Guide](#quick-start-guide
 
				 Our results were obtained by running the `scripts/run_squad.sh` training script in the TensorFlow 19.03-py3 NGC container on NVIDIA DGX-2 with 16x V100 32G GPUs. Performance numbers (in sentences per second) were averaged over an entire training epoch.
			
 
				 
			
 
				 
			
 
				-| **Number of GPUs** | **Batch size per GPU** | **FP32 sentences/sec** | **FP16 sentences/sec** | **Speed-up with mixed precision** | **Multi-gpu weak scaling with FP32** | **Multi-gpu weak scaling with FP16** |
			
 
				+| **Number of GPUs** | **Batch size per GPU** | **FP 32 sentences/sec** | **FP16 sentences/sec** | **Speed-up with mixed precision** | **Multi-gpu weak scaling with FP32** | **Multi-gpu weak scaling with FP16** |
			
 
				 |---|---|------|------|----|-----|----|
			
 
				 |  1| 4 |  8.80| 17.43|1.98| 1.0 |1.0 |
			
 
				 |  4| 4 | 33.22| 56.87|1.71| 3.78|3.26|
			
@@ -461,7 +463,7 @@ Our results were obtained by running the `scripts/run_squad.sh` training script
 
				 | 16| 4 |117.83|162.29|1.38|13.39|9.31|
			
 
				 
			
 
				 
			
 
				-| **Number of GPUs** | **Batch size per GPU** | **FP32 sentences/sec** | **FP16 sentences/sec** | **Speed-up with mixed precision** | **Multi-gpu weak scaling with FP32** | **Multi-gpu weak scaling with FP16** |
			
 
				+| **Number of GPUs** | **Batch size per GPU** | **FP 32 sentences/sec** | **FP16 sentences/sec** | **Speed-up with mixed precision** | **Multi-gpu weak scaling with FP32** | **Multi-gpu weak scaling with FP16** |
			
 
				 |---|---|---|------|---|---|----|
			
 
				 |  1| 10| - | 28.72| - | - |1.0 |
			
 
				 |  4| 10| - | 92.73| - | - |3.22|
			
@@ -477,7 +479,7 @@ To achieve these same results, follow the [Quick Start Guide](#quick-start-guide
 
				 #### NVIDIA DGX-1 16G (1x V100 16G)
			
 
				 Our results were obtained by running the `scripts/run_squad_inference.sh` training script in the TensorFlow 19.03-py3 NGC container on NVIDIA DGX-1 with 1x V100 16G GPUs. Performance numbers (in sentences per second) were averaged over an entire training epoch.
			
 
				 
			
 
				-| **Number of GPUs** | **Batch size per GPU** | **FP32 sentences/sec** | **FP16 sentences/sec** | **Speedup** |
			
 
				+| **Number of GPUs** | **Batch size per GPU** | **FP 32 sentences/sec** | **FP16 sentences/sec** | **Speedup** |
			
 
				 |---|---|-----|------|----|
			
 
				 | 1 | 8 |41.04|112.55|2.74|
			
 
				 
			
@@ -487,7 +489,7 @@ To achieve these same results, follow the [Quick Start Guide](#quick-start-guide
 
				 #### NVIDIA DGX-1 32G (1x V100 32G)
			
 
				 Our results were obtained by running the `scripts/run_squad_inference.sh` training script in the TensorFlow 19.03-py3 NGC container on NVIDIA DGX-1 with 1x V100 32G GPUs. Performance numbers (in sentences per second) were averaged over an entire training epoch.
			
 
				 
			
 
				-| **Number of GPUs** | **Batch size per GPU** | **FP32 sentences/sec** | **FP16 sentences/sec** | **Speedup** |
			
 
				+| **Number of GPUs** | **Batch size per GPU** | **FP 32 sentences/sec** | **FP16 sentences/sec** | **Speedup** |
			
 
				 |---|---|-----|------|----|
			
 
				 | 1 | 8 |36.78|118.54|3.22|
			
 
				 
			
@@ -496,7 +498,7 @@ To achieve these same results, follow the [Quick Start Guide](#quick-start-guide
 
				 #### NVIDIA DGX-2 32G (1x V100 32G)
			
 
				 Our results were obtained by running the `scripts/run_squad_inference.sh` training script in the TensorFlow 19.03-py3 NGC container on NVIDIA DGX-2 with 1x V100 32G GPUs. Performance numbers (in sentences per second) were averaged over an entire training epoch.
			
 
				 
			
 
				-| **Number of GPUs** | **Batch size per GPU** | **FP32 sentences/sec** | **FP16 sentences/sec** | **Speedup** |
			
 
				+| **Number of GPUs** | **Batch size per GPU** | **FP 32 sentences/sec** | **FP16 sentences/sec** | **Speedup** |
			
 
				 |---|---|-----|------|----|
			
 
				 | 1 | 8 |33.95|108.45|3.19|
			
 
				 
			
--- a/TensorFlow/LanguageModeling/BERT/run_squad.py
+++ b/TensorFlow/LanguageModeling/BERT/run_squad.py
@@ -28,7 +28,7 @@ import optimization
 
				 import tokenization
			
 
				 import six
			
 
				 import tensorflow as tf
			
 
				-
			
 
				+import horovod.tensorflow as hvd
			
 
				 flags = tf.flags
			
 
				 
			
 
				 FLAGS = flags.FLAGS
			
@@ -90,6 +90,7 @@ flags.DEFINE_integer("predict_batch_size", 8,
 
				 
			
 
				 flags.DEFINE_float("learning_rate", 5e-6, "The initial learning rate for Adam.")
			
 
				 
			
 
				+flags.DEFINE_bool("horovod", False, "Whether to use Horovod for multi-gpu runs")
			
 
				 flags.DEFINE_float("num_train_epochs", 3.0,
			
 
				                    "Total number of training epochs to perform.")
			
 
				 
			
@@ -154,7 +155,6 @@ flags.DEFINE_float(
 
				     "If null_score - best_non_null is greater than the threshold predict null.")
			
 
				 
			
 
				 flags.DEFINE_bool("use_fp16", False, "Whether to use fp32 or fp16 arithmetic on GPU.")
			
 
				-
			
 
				 flags.DEFINE_bool("use_xla", False, "Whether to enable XLA JIT compilation.")
			
 
				 
			
 
				 # report samples/sec, total loss and learning rate during training
			
@@ -463,7 +463,7 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length,
 
				         start_position = 0
			
 
				         end_position = 0
			
 
				 
			
 
				-      if example_index < 20:
			
 
				+      if FLAGS.verbose_logging and example_index < 20:
			
 
				         tf.logging.info("*** Example ***")
			
 
				         tf.logging.info("unique_id: %s" % (unique_id))
			
 
				         tf.logging.info("example_index: %s" % (example_index))
			
@@ -593,7 +593,7 @@ def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
 
				       input_mask=input_mask,
			
 
				       token_type_ids=segment_ids,
			
 
				       use_one_hot_embeddings=use_one_hot_embeddings,
			
 
				-      compute_type=tf.float16 if FLAGS.use_fp16 else tf.float32)
			
 
				+      compute_type=tf.float32)
			
 
				 
			
 
				   final_hidden = model.get_sequence_output()
			
 
				 
			
@@ -631,10 +631,10 @@ def model_fn_builder(bert_config, init_checkpoint, learning_rate,
 
				 
			
 
				   def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
			
 
				     """The `model_fn` for TPUEstimator."""
			
 
				-
			
 
				-    tf.logging.info("*** Features ***")
			
 
				-    for name in sorted(features.keys()):
			
 
				-      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))
			
 
				+    if FLAGS.verbose_logging:
			
 
				+        tf.logging.info("*** Features ***")
			
 
				+        for name in sorted(features.keys()):
			
 
				+          tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))
			
 
				 
			
 
				     unique_ids = features["unique_ids"]
			
 
				     input_ids = features["input_ids"]
			
@@ -655,7 +655,7 @@ def model_fn_builder(bert_config, init_checkpoint, learning_rate,
 
				 
			
 
				     initialized_variable_names = {}
			
 
				     scaffold_fn = None
			
 
				-    if init_checkpoint:
			
 
				+    if init_checkpoint and (hvd is None or hvd.rank() == 0):
			
 
				       (assignment_map, initialized_variable_names
			
 
				       ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
			
 
				       if use_tpu:
			
@@ -667,14 +667,16 @@ def model_fn_builder(bert_config, init_checkpoint, learning_rate,
 
				         scaffold_fn = tpu_scaffold
			
 
				       else:
			
 
				         tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
			
 
				+    
			
 
				+    if FLAGS.verbose_logging:
			
 
				+        tf.logging.info("**** Trainable Variables ****")
			
 
				+        for var in tvars:
			
 
				+          init_string = ""
			
 
				+          if var.name in initialized_variable_names:
			
 
				+            init_string = ", *INIT_FROM_CKPT*"
			
 
				+          tf.logging.info(" %d name = %s, shape = %s%s", 0 if hvd is None else hvd.rank(), var.name, var.shape,
			
 
				+                          init_string)
			
 
				 
			
 
				-    tf.logging.info("**** Trainable Variables ****")
			
 
				-    for var in tvars:
			
 
				-      init_string = ""
			
 
				-      if var.name in initialized_variable_names:
			
 
				-        init_string = ", *INIT_FROM_CKPT*"
			
 
				-      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
			
 
				-                      init_string)
			
 
				 
			
 
				     output_spec = None
			
 
				     if mode == tf.estimator.ModeKeys.TRAIN:
			
@@ -721,7 +723,7 @@ def model_fn_builder(bert_config, init_checkpoint, learning_rate,
 
				   return model_fn
			
 
				 
			
 
				 
			
 
				-def input_fn_builder(input_file, seq_length, is_training, drop_remainder):
			
 
				+def input_fn_builder(input_file, seq_length, is_training, drop_remainder, hvd=None):
			
 
				   """Creates an `input_fn` closure to be passed to TPUEstimator."""
			
 
				 
			
 
				   name_to_features = {
			
@@ -751,14 +753,20 @@ def input_fn_builder(input_file, seq_length, is_training, drop_remainder):
 
				 
			
 
				   def input_fn(params):
			
 
				     """The actual input function."""
			
 
				+
			
 
				     batch_size = params["batch_size"]
			
 
				 
			
 
				     # For training, we want a lot of parallel reading and shuffling.
			
 
				     # For eval, we want no shuffling and parallel reading doesn't matter.
			
 
				-    d = tf.data.TFRecordDataset(input_file)
			
 
				     if is_training:
			
 
				-      d = d.repeat()
			
 
				-      d = d.shuffle(buffer_size=100)
			
 
				+        d = tf.data.TFRecordDataset(input_file, num_parallel_reads=4)
			
 
				+        if hvd is not None: d = d.shard(hvd.size(), hvd.rank())
			
 
				+        d = d.apply(tf.data.experimental.ignore_errors())
			
 
				+        d = d.shuffle(buffer_size=100)
			
 
				+        d = d.repeat()
			
 
				+    else:
			
 
				+        d = tf.data.TFRecordDataset(input_file)
			
 
				+
			
 
				 
			
 
				     d = d.apply(
			
 
				         tf.contrib.data.map_and_batch(
			
@@ -771,6 +779,7 @@ def input_fn_builder(input_file, seq_length, is_training, drop_remainder):
 
				   return input_fn
			
 
				 
			
 
				 
			
 
				+
			
 
				 RawResult = collections.namedtuple("RawResult",
			
 
				                                    ["unique_id", "start_logits", "end_logits"])
			
 
				 
			
@@ -1163,6 +1172,9 @@ def validate_flags_or_throw(bert_config):
 
				 def main(_):
			
 
				   tf.logging.set_verbosity(tf.logging.INFO)
			
 
				 
			
 
				+  if FLAGS.horovod:
			
 
				+    hvd.init()
			
 
				+
			
 
				   bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
			
 
				 
			
 
				   validate_flags_or_throw(bert_config)
			
@@ -1203,7 +1215,7 @@ def main(_):
 
				   run_config = tf.contrib.tpu.RunConfig(
			
 
				       cluster=tpu_cluster_resolver,
			
 
				       master=FLAGS.master,
			
 
				-      model_dir=FLAGS.output_dir,
			
 
				+      model_dir=FLAGS.output_dir if master_process else None,
			
 
				       session_config=config,
			
 
				       save_checkpoints_steps=FLAGS.save_checkpoints_steps if master_process else None,
			
 
				       keep_checkpoint_max=1,
			
@@ -1221,7 +1233,7 @@ def main(_):
 
				     train_examples = read_squad_examples(
			
 
				         input_file=FLAGS.train_file, is_training=True)
			
 
				     num_train_steps = int(
			
 
				-        len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
			
 
				+        len(train_examples) / global_batch_size * FLAGS.num_train_epochs)
			
 
				     num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)
			
 
				 
			
 
				     # Pre-shuffle the input to avoid having to make a very large shuffle
			
@@ -1248,7 +1260,7 @@ def main(_):
 
				   model_fn = model_fn_builder(
			
 
				       bert_config=bert_config,
			
 
				       init_checkpoint=FLAGS.init_checkpoint,
			
 
				-      learning_rate=FLAGS.learning_rate,
			
 
				+      learning_rate=learning_rate,
			
 
				       num_train_steps=num_train_steps,
			
 
				       num_warmup_steps=num_warmup_steps,
			
 
				       use_tpu=FLAGS.use_tpu,
			
@@ -1273,7 +1285,7 @@ def main(_):
 
				         filename=tmp_filenames[hvd_rank],
			
 
				         is_training=True)
			
 
				     convert_examples_to_features(
			
 
				-        examples=train_examples,
			
 
				+        examples=train_examples[start_index:end_index],
			
 
				         tokenizer=tokenizer,
			
 
				         max_seq_length=FLAGS.max_seq_length,
			
 
				         doc_stride=FLAGS.doc_stride,
			
@@ -1287,10 +1299,15 @@ def main(_):
 
				     tf.logging.info("  Num split examples = %d", train_writer.num_features)
			
 
				     tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
			
 
				     tf.logging.info("  Num steps = %d", num_train_steps)
			
 
				+    tf.logging.info("  LR = %f", learning_rate)
			
 
				     del train_examples
			
 
				+    if FLAGS.horovod:
			
 
				+        barrier = hvd.allreduce(tf.constant(0))
			
 
				+        with tf.Session(config=config) as sess:
			
 
				+          sess.run(barrier)
			
 
				 
			
 
				     train_input_fn = input_fn_builder(
			
 
				-        input_file=train_writer.filename,
			
 
				+        input_file=tmp_filenames,
			
 
				         seq_length=FLAGS.max_seq_length,
			
 
				         is_training=True,
			
 
				         drop_remainder=True,
			
@@ -1310,7 +1327,7 @@ def main(_):
 
				         tf.logging.info("%d Training Performance = %0.4f sentences/sec", hvd_rank, avg_sentences_per_second)
			
 
				         tf.logging.info("-----------------------------")
			
 
				 
			
 
				-  if FLAGS.do_predict:
			
 
				+  if FLAGS.do_predict and master_process:
			
 
				     eval_examples = read_squad_examples(
			
 
				         input_file=FLAGS.predict_file, is_training=False)
			
 
				 
			
@@ -1338,8 +1355,6 @@ def main(_):
 
				     tf.logging.info("  Num split examples = %d", len(eval_features))
			
 
				     tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)
			
 
				 
			
 
				-    all_results = []
			
 
				-
			
 
				     predict_input_fn = input_fn_builder(
			
 
				         input_file=eval_writer.filename,
			
 
				         seq_length=FLAGS.max_seq_length,