فهرست منبع

Merge pull request #619 from NVIDIA/pribalta/maskrcnn_checkpoint_update

Update checkpoint MaskRCNN
PrzemekS 5 سال پیش
والد
کامیت
cd7d870749

+ 10 - 10
TensorFlow2/Segmentation/MaskRCNN/download_and_process_pretrained_weights.sh

@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-#mkdir -p weights/
+mkdir -p /model
 cd /model
 
 # DOWNLOAD CHECKPOINTS
@@ -29,7 +29,7 @@ wget -N ${BASE_URL}/variables/variables.data-00000-of-00001 -P ${DEST_DIR}/varia
 wget -N ${BASE_URL}/variables/variables.index -P ${DEST_DIR}/variables
 
 ## ====================== resnet-nhwc-2018-02-07 ====================== ##
-BASE_URL="https://storage.googleapis.com/cloud-tpu-artifacts/resnet/resnet-nhwc-2018-02-07"
+BASE_URL="https://storage.googleapis.com/cloud-tpu-checkpoints/retinanet/resnet50-checkpoint-2018-02-07"
 DEST_DIR="resnet/resnet-nhwc-2018-02-07"
 
 wget -N ${BASE_URL}/checkpoint -P ${DEST_DIR}
@@ -38,12 +38,12 @@ wget -N ${BASE_URL}/model.ckpt-112603.index  -P ${DEST_DIR}
 wget -N ${BASE_URL}/model.ckpt-112603.meta -P ${DEST_DIR}
 
 ## ====================== resnet-nhwc-2018-10-14 ====================== ##
-BASE_URL="https://storage.googleapis.com/cloud-tpu-artifacts/resnet/resnet-nhwc-2018-10-14"
-DEST_DIR="resnet/resnet-nhwc-2018-10-14"
-
-wget -N ${BASE_URL}/model.ckpt-112602.data-00000-of-00001 -P ${DEST_DIR}
-wget -N ${BASE_URL}/model.ckpt-112602.index -P ${DEST_DIR}
-wget -N ${BASE_URL}/model.ckpt-112602.meta -P ${DEST_DIR}
+#BASE_URL="https://storage.googleapis.com/cloud-tpu-artifacts/resnet/resnet-nhwc-2018-10-14"
+#DEST_DIR="resnet/resnet-nhwc-2018-10-14"
+#
+#wget -N ${BASE_URL}/model.ckpt-112602.data-00000-of-00001 -P ${DEST_DIR}
+#wget -N ${BASE_URL}/model.ckpt-112602.index -P ${DEST_DIR}
+#wget -N ${BASE_URL}/model.ckpt-112602.meta -P ${DEST_DIR}
 
 # VERIFY CHECKPOINTS
 echo "Verifying and Processing Checkpoints..."
@@ -64,8 +64,8 @@ python inspect_checkpoint.py --file_name=mask-rcnn/1555659850/ckpt/model.ckpt \
 python inspect_checkpoint.py --file_name=resnet/resnet-nhwc-2018-02-07/model.ckpt-112603 \
     > resnet/resnet-nhwc-2018-02-07/tensors_and_shape.txt
 
-python inspect_checkpoint.py --file_name=resnet/resnet-nhwc-2018-10-14/model.ckpt-112602 \
-    > resnet/resnet-nhwc-2018-10-14/tensors_and_shape.txt
+#python inspect_checkpoint.py --file_name=resnet/resnet-nhwc-2018-10-14/model.ckpt-112602 \
+#    > resnet/resnet-nhwc-2018-10-14/tensors_and_shape.txt
 
 python inspect_checkpoint.py --file_name=resnet/extracted_from_maskrcnn/resnet50.ckpt \
     > resnet/extracted_from_maskrcnn/tensors_and_shape.txt

+ 1 - 1
TensorFlow2/Segmentation/MaskRCNN/scripts/benchmark_inference.py

@@ -36,7 +36,7 @@ def main():
         f'python {main_path}'
         f' --mode eval'
         f' --model_dir "{flags.model_dir}"'
-        f' --checkpoint "{os.path.join(flags.weights_dir, "resnet/resnet-nhwc-2018-10-14/model.ckpt-112602")}"'
+        f' --checkpoint "{os.path.join(flags.weights_dir, "resnet/resnet-nhwc-2018-02-07/model.ckpt-112603")}"'
         f' --validation_file_pattern "{os.path.join(flags.data_dir, "val*.tfrecord")}"'
         f' --val_json_file "{os.path.join(flags.data_dir, "annotations/instances_val2017.json")}"' 
         f' --num_steps_per_eval 200'

+ 1 - 1
TensorFlow2/Segmentation/MaskRCNN/scripts/benchmark_training.py

@@ -38,7 +38,7 @@ def main():
         f'python {main_path}'
         f' --mode train'
         f' --model_dir "{flags.model_dir}"'
-        f' --checkpoint "{os.path.join(flags.weights_dir, "resnet/resnet-nhwc-2018-10-14/model.ckpt-112602")}"'
+        f' --checkpoint "{os.path.join(flags.weights_dir, "resnet/resnet-nhwc-2018-02-07/model.ckpt-112603")}"'
         f' --training_file_pattern "{os.path.join(flags.data_dir, "train*.tfrecord")}"'
         f' --init_learning_rate 0.04'
         f' --total_steps 200'

+ 1 - 1
TensorFlow2/Segmentation/MaskRCNN/scripts/train_1GPU.sh

@@ -22,7 +22,7 @@ export CUDA_VISIBLE_DEVICES=0
 
 python ${BASEDIR}/../mask_rcnn_main.py \
     --mode="train_and_eval" \
-    --checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
+    --checkpoint="/model/resnet/resnet-nhwc-2018-02-07/model.ckpt-112603" \
     --eval_samples=5000 \
     --init_learning_rate=0.005 \
     --learning_rate_steps="240000,320000" \

+ 1 - 1
TensorFlow2/Segmentation/MaskRCNN/scripts/train_4GPU.sh

@@ -32,7 +32,7 @@ mpirun \
     --allow-run-as-root \
     python ${BASEDIR}/../mask_rcnn_main.py \
         --mode="train_and_eval" \
-        --checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
+        --checkpoint="/model/resnet/resnet-nhwc-2018-02-07/model.ckpt-112603" \
         --eval_samples=5000 \
         --init_learning_rate=0.02 \
         --learning_rate_steps="60000,80000" \

+ 1 - 1
TensorFlow2/Segmentation/MaskRCNN/scripts/train_8GPU.sh

@@ -30,7 +30,7 @@ mpirun \
     --allow-run-as-root \
     python ${BASEDIR}/../mask_rcnn_main.py \
         --mode="train_and_eval" \
-        --checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
+        --checkpoint="/model/resnet/resnet-nhwc-2018-02-07/model.ckpt-112603" \
         --eval_samples=5000 \
         --init_learning_rate=0.04 \
         --learning_rate_steps="30000,40000" \

+ 1 - 1
TensorFlow2/Segmentation/MaskRCNN/scripts/train_AMP_1GPU.sh

@@ -22,7 +22,7 @@ export CUDA_VISIBLE_DEVICES=0
 
 python ${BASEDIR}/../mask_rcnn_main.py \
     --mode="train_and_eval" \
-    --checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
+    --checkpoint="/model/resnet/resnet-nhwc-2018-02-07/model.ckpt-112603" \
     --eval_samples=5000 \
     --init_learning_rate=0.005 \
     --learning_rate_steps="240000,320000" \

+ 1 - 1
TensorFlow2/Segmentation/MaskRCNN/scripts/train_AMP_4GPU.sh

@@ -32,7 +32,7 @@ mpirun \
     --allow-run-as-root \
     python ${BASEDIR}/../mask_rcnn_main.py \
         --mode="train_and_eval" \
-        --checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
+        --checkpoint="/model/resnet/resnet-nhwc-2018-02-07/model.ckpt-112603" \
         --eval_samples=5000 \
         --init_learning_rate=0.02 \
         --learning_rate_steps="60000,80000" \

+ 1 - 1
TensorFlow2/Segmentation/MaskRCNN/scripts/train_AMP_8GPU.sh

@@ -30,7 +30,7 @@ mpirun \
     --allow-run-as-root \
     python ${BASEDIR}/../mask_rcnn_main.py \
         --mode="train_and_eval" \
-        --checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
+        --checkpoint="/model/resnet/resnet-nhwc-2018-02-07/model.ckpt-112603" \
         --eval_samples=5000 \
         --init_learning_rate=0.04 \
         --learning_rate_steps="30000,40000" \

+ 1 - 7
TensorFlow2/Segmentation/MaskRCNN/weights/extract_RN50_weights.py

@@ -20,7 +20,6 @@ import sys
 import getopt
 import logging
 import tensorflow as tf
-from distutils.version import LooseVersion
 
 """
 python weights/extract_RN50_weights.py \
@@ -45,12 +44,7 @@ def rename(checkpoint_dir, save_to, dry_run, verbose):
 
         total_vars_loaded = 0
 
-        if LooseVersion(tf.__version__) < LooseVersion("2.0.0"):
-            file_list = tf.contrib.framework.list_variables(checkpoint_dir)
-        else:
-            file_list = tf.train.list_variables(checkpoint_dir)
-
-        for var_name, _ in file_list:
+        for var_name, _ in tf.train.list_variables(checkpoint_dir):
 
             if "resnet50" in var_name:
                 # Load the variable