Просмотр исходного кода

[MaskRCNN/TF2] Adding MaskRCNN for TF1 and TF2

Przemek Strzelczyk 6 лет назад
Родитель
Сommit
72ef5f45cb
100 измененных файлов с 17021 добавлено и 3 удалено
  1. 5 3
      README.md
  2. 1 0
      TensorFlow/Segmentation/MaskRCNN/README.md
  3. 32 0
      TensorFlow2/Segmentation/MaskRCNN/.dockerignore
  4. 44 0
      TensorFlow2/Segmentation/MaskRCNN/.gitignore
  5. 57 0
      TensorFlow2/Segmentation/MaskRCNN/Dockerfile
  6. 201 0
      TensorFlow2/Segmentation/MaskRCNN/LICENSE
  7. 598 0
      TensorFlow2/Segmentation/MaskRCNN/README.md
  8. 336 0
      TensorFlow2/Segmentation/MaskRCNN/dataset/create_coco_tf_record.py
  9. 129 0
      TensorFlow2/Segmentation/MaskRCNN/dataset/download_and_preprocess_coco.sh
  10. 73 0
      TensorFlow2/Segmentation/MaskRCNN/download_and_process_pretrained_weights.sh
  11. 0 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/__init__.py
  12. 289 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/anchors.py
  13. 351 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/coco_metric.py
  14. 464 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/dataloader.py
  15. 466 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/dataloader_utils.py
  16. 564 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/distributed_executer.py
  17. 452 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/evaluation.py
  18. 24 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/hooks/__init__.py
  19. 114 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/hooks/ckpt_hook.py
  20. 518 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/hooks/logging_hook.py
  21. 216 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/hooks/pretrained_restore_hook.py
  22. 0 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/hyperparameters/__init__.py
  23. 179 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/hyperparameters/cmdline_utils.py
  24. 85 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/hyperparameters/flags_to_params.py
  25. 226 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/hyperparameters/hyperparameters.py
  26. 102 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/hyperparameters/mask_rcnn_params.py
  27. 398 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/hyperparameters/params_dict.py
  28. 89 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/hyperparameters/params_io.py
  29. 520 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/mask_rcnn_model.py
  30. 0 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/models/__init__.py
  31. 138 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/models/fpn.py
  32. 322 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/models/heads.py
  33. 102 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/models/keras_utils.py
  34. 582 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/models/resnet.py
  35. 0 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/__init__.py
  36. 202 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/argmax_matcher.py
  37. 269 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/balanced_positive_negative_sampler.py
  38. 157 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/box_coder.py
  39. 213 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/box_list.py
  40. 125 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/faster_rcnn_box_coder.py
  41. 244 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/matcher.py
  42. 95 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/minibatch_sampler.py
  43. 84 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/ops.py
  44. 444 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/preprocessor.py
  45. 138 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/region_similarity_calculator.py
  46. 86 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/shape_utils.py
  47. 308 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/target_assigner.py
  48. 153 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/tf_example_decoder.py
  49. 417 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/visualization_utils.py
  50. 0 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/ops/__init__.py
  51. 503 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/ops/box_utils.py
  52. 209 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/ops/nms_ops.py
  53. 297 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/ops/postprocess_ops.py
  54. 206 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/ops/preprocess_ops.py
  55. 440 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/ops/roi_ops.py
  56. 348 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/ops/spatial_transform_ops.py
  57. 356 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/ops/training_ops.py
  58. 0 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/training/__init__.py
  59. 55 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/training/learning_rates.py
  60. 435 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/training/losses.py
  61. 0 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/utils/__init__.py
  62. 588 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/utils/box_utils.py
  63. 256 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/utils/coco_utils.py
  64. 232 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/utils/decorators.py
  65. 94 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/utils/distributed_utils.py
  66. 134 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/utils/lazy_imports.py
  67. 323 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/utils/logging_backend.py
  68. 398 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/utils/logging_formatter.py
  69. 32 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/utils/metaclasses.py
  70. 178 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/utils/meters.py
  71. 106 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/utils/metric_tracking.py
  72. 152 0
      TensorFlow2/Segmentation/MaskRCNN/mask_rcnn_main.py
  73. 33 0
      TensorFlow2/Segmentation/MaskRCNN/scripts/docker/build_tf1.sh
  74. 33 0
      TensorFlow2/Segmentation/MaskRCNN/scripts/docker/build_tf2.sh
  75. 27 0
      TensorFlow2/Segmentation/MaskRCNN/scripts/docker/launch_tf1.sh
  76. 27 0
      TensorFlow2/Segmentation/MaskRCNN/scripts/docker/launch_tf2.sh
  77. 34 0
      TensorFlow2/Segmentation/MaskRCNN/scripts/evaluation_AMP.sh
  78. 34 0
      TensorFlow2/Segmentation/MaskRCNN/scripts/evaluation_FP32.sh
  79. 40 0
      TensorFlow2/Segmentation/MaskRCNN/scripts/train_AMP_1GPU.sh
  80. 40 0
      TensorFlow2/Segmentation/MaskRCNN/scripts/train_AMP_1GPU_XLA.sh
  81. 50 0
      TensorFlow2/Segmentation/MaskRCNN/scripts/train_AMP_4GPU.sh
  82. 50 0
      TensorFlow2/Segmentation/MaskRCNN/scripts/train_AMP_4GPU_XLA.sh
  83. 48 0
      TensorFlow2/Segmentation/MaskRCNN/scripts/train_AMP_8GPU.sh
  84. 48 0
      TensorFlow2/Segmentation/MaskRCNN/scripts/train_AMP_8GPU_XLA.sh
  85. 40 0
      TensorFlow2/Segmentation/MaskRCNN/scripts/train_FP32_1GPU.sh
  86. 40 0
      TensorFlow2/Segmentation/MaskRCNN/scripts/train_FP32_1GPU_XLA.sh
  87. 50 0
      TensorFlow2/Segmentation/MaskRCNN/scripts/train_FP32_4GPU.sh
  88. 50 0
      TensorFlow2/Segmentation/MaskRCNN/scripts/train_FP32_4GPU_XLA.sh
  89. 48 0
      TensorFlow2/Segmentation/MaskRCNN/scripts/train_FP32_8GPU.sh
  90. 48 0
      TensorFlow2/Segmentation/MaskRCNN/scripts/train_FP32_8GPU_XLA.sh
  91. 0 0
      TensorFlow2/Segmentation/MaskRCNN/weights/.gitkeep
  92. 120 0
      TensorFlow2/Segmentation/MaskRCNN/weights/extract_RN50_weights.py
  93. 159 0
      TensorFlow2/Segmentation/MaskRCNN/weights/inspect_checkpoint.py
  94. 0 0
      TensorFlow2/Segmentation/MaskRCNN/weights/mask-rcnn/.gitkeep
  95. 0 0
      TensorFlow2/Segmentation/MaskRCNN/weights/mask-rcnn/1555659850/.gitkeep
  96. 48 0
      TensorFlow2/Segmentation/MaskRCNN/weights/pb_to_ckpt.py
  97. 0 0
      TensorFlow2/Segmentation/MaskRCNN/weights/resnet/.gitkeep
  98. 0 0
      TensorFlow2/Segmentation/MaskRCNN/weights/resnet/extracted_from_maskrcnn/.gitkeep
  99. 0 0
      TensorFlow2/Segmentation/MaskRCNN/weights/resnet/resnet-nhwc-2018-02-07/.gitkeep
  100. 0 0
      TensorFlow2/Segmentation/MaskRCNN/weights/resnet/resnet-nhwc-2018-10-14/.gitkeep

+ 5 - 3
README.md

@@ -19,9 +19,9 @@ The examples are organized first by framework, such as TensorFlow, PyTorch, etc.
 - __ResNext__ [[PyTorch](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Classification/ConvNets)]
 - __SE-ResNext__ [[PyTorch](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Classification/ConvNets)]
 - __SSD__ [[PyTorch](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Detection/SSD)] [[TensorFlow](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/Detection/SSD)]
-- __Mask R-CNN__ [[PyTorch](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Segmentation/MaskRCNN)]
+- __Mask R-CNN__ [[PyTorch](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Segmentation/MaskRCNN)] [[TensorFlow](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/MaskRCNN)] [[TensorFlow 2](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/MaskRCNN)] 
 - __U-Net(industrial)__ [[TensorFlow](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/Segmentation/UNet_Industrial)]
-- __U-Net(medical)__ [[TensorFlow](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/Segmentation/UNet_Medical)] [[TensorFlow2](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/UNet_Medical)]
+- __U-Net(medical)__ [[TensorFlow](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/Segmentation/UNet_Medical)] [[TensorFlow 2](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/UNet_Medical)]
 - __VNet__ [[TensorFlow](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/Segmentation/VNet)]
 
 ### Natural Language Processing
@@ -82,10 +82,12 @@ The examples are organized first by framework, such as TensorFlow, PyTorch, etc.
 | [Variational Autoencoder Collaborative Filtering](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/Recommendation/VAE-CF) |TensorFlow  | N/A  | Yes  | Yes  | -  | -  |   -  | -  | -  |
 | [U-Net Industrial](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/Segmentation/UNet_Industrial) |TensorFlow  | N/A  | Yes  | Yes  | -  | Yes  |   -  | -  | Yes  |
 | [U-Net Medical](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/Segmentation/UNet_Medical) | TensorFlow  | N/A  | Yes  | Yes  | -  |  Yes  |-  |   -  | Yes  |
-| [U-Net Medical](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/UNet_Medical) | TensorFlow2  | N/A  | Yes  | Yes  | -  |  Yes  |-  |   -  | Yes  |
 | [V-Net Medical](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/Segmentation/VNet) | TensorFlow  | N/A  | Yes  | Yes  | -  |  Yes  | Yes |   -  | Yes  |
+| [Mask R-CNN](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/MaskRCNN) |TensorFlow  | N/A  | Yes  | Yes  | -  | -  |   -  | -  | -  |
 | [GNMT v2](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/Translation/GNMT) | TensorFlow  | N/A  | Yes  | Yes  | -  | -  |   -  | -  | -  |
 | [Faster Transformer](https://github.com/NVIDIA/DeepLearningExamples/tree/master/FasterTransformer) | Tensorflow  | N/A  | -  | -  | -  | Yes  |   -  | -  | -  |
+| [U-Net Medical](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/UNet_Medical) | TensorFlow-2  | N/A  | Yes  | Yes  | -  |  Yes  |-  |   -  | Yes  |
+| [Mask R-CNN](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/MaskRCNN) |TensorFlow-2  | N/A  | Yes  | Yes  | -  |  -  |-  |   -  | -  |
 | [ResNet50 v1.5](https://github.com/NVIDIA/DeepLearningExamples/tree/master/MxNet/Classification/RN50v1.5) | MXNet  | Yes  | Yes  | Yes  | -  | -  |   -  | -  | -  |
 | [HMM](https://github.com/NVIDIA/DeepLearningExamples/tree/master/Kaldi/SpeechRecognition) | Kaldi  | N/A  | -  | Yes  | -  | -  |   -  | Yes  | -  |
 

+ 1 - 0
TensorFlow/Segmentation/MaskRCNN/README.md

@@ -0,0 +1 @@
+Both TensorFlow 1.x and TensorFlow 2.x versions of Mask-RCNN are located in [TensorFlow2/Segmentation/MaskRCNN folder](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/MaskRCNN).

+ 32 - 0
TensorFlow2/Segmentation/MaskRCNN/.dockerignore

@@ -0,0 +1,32 @@
+.DS_Store
+.ipynb_checkpoints
+node_modules
+/.bazelrc
+/.tf_configure.bazelrc
+/bazel-*
+/bazel_pip
+/third_party/eigen3/mkl_include
+/third_party/mkl/*
+/tools/python_bin_path.sh
+/tools/git/gen
+/pip_test
+/_python_build
+*.pyc
+__pycache__
+*.swp
+.vscode/
+\.idea/
+\.git/
+core
+core/
+weights/mask-rcnn/
+weights/resnet/
+profiling/
+venv/
+nsys/
+
+# Git Directories
+tf-models/
+
+# Data Directories
+data_dir/

+ 44 - 0
TensorFlow2/Segmentation/MaskRCNN/.gitignore

@@ -0,0 +1,44 @@
+.DS_Store
+.ipynb_checkpoints
+node_modules
+/.bazelrc
+/.tf_configure.bazelrc
+/bazel-*
+/bazel_pip
+/third_party/eigen3/mkl_include
+/third_party/mkl/*
+/tools/python_bin_path.sh
+/tools/git/gen
+/pip_test
+/_python_build
+*.pyc
+__pycache__
+*.swp
+.vscode/
+\.idea/
+venv/
+
+# Git Directories
+tf-models/
+
+# Debug Dirs
+profiling/
+nvtx_plugins/
+nsys/
+
+# Data Directories
+data_dir/
+
+# Pretrained Weights Dir
+weights/mask-rcnn/1555659850/*
+weights/resnet/resnet-nhwc-2018-02-07/*
+weights/resnet/resnet-nhwc-2018-10-14/*
+weights/resnet/extracted_from_maskrcnn/*
+!weights/mask-rcnn/.gitkeep
+!weights/mask-rcnn/1555659850/.gitkeep
+!weights/mask-rcnn/1555659850/ckpt/.gitkeep
+!weights/mask-rcnn/1555659850/variables/.gitkeep
+!weights/resnet/.gitkeep
+!weights/resnet/extracted_from_maskrcnn/.gitkeep
+!weights/resnet/resnet-nhwc-2018-02-07/.gitkeep
+!weights/resnet/resnet-nhwc-2018-10-14/.gitkeep

+ 57 - 0
TensorFlow2/Segmentation/MaskRCNN/Dockerfile

@@ -0,0 +1,57 @@
+#===============================================================================
+#
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ==============================================================================
+
+ARG FROM_IMAGE_NAME=nvcr.io/nvidia/tensorflow:20.02-tf2-py3
+FROM ${FROM_IMAGE_NAME}
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN rm -rf /workspace && mkdir -p /workspace
+ADD . /workspace
+WORKDIR /workspace
+
+RUN apt-get update && \
+    apt-get install -y libsm6 libxext6 libxrender-dev python3-tk cmake && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Make sure python and pip points to pip3 and python3
+RUN python -m pip install --upgrade pip && \
+    pip --no-cache-dir --no-cache install \
+        Cython \
+        matplotlib \
+        opencv-python-headless \
+        mpi4py \
+        Pillow \
+        pytest \
+        pyyaml && \
+    git clone https://github.com/pybind/pybind11 /opt/pybind11 && \
+    cd /opt/pybind11 && cmake . && make install && pip install . && \
+    pip --no-cache-dir --no-cache install \
+        'git+https://github.com/NVIDIA/cocoapi#egg=pycocotools&subdirectory=PythonAPI' && \
+    pip --no-cache-dir --no-cache install \
+        'git+https://github.com/NVIDIA/dllogger'
+
+
+# Update protobuf 3 to 3.3.0
+RUN \
+    curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v3.3.0/protoc-3.3.0-linux-x86_64.zip && \
+    unzip -u protoc-3.3.0-linux-x86_64.zip -d protoc3 && \
+    mv protoc3/bin/* /usr/local/bin/ && \
+    mv protoc3/include/* /usr/local/include/
+

+ 201 - 0
TensorFlow2/Segmentation/MaskRCNN/LICENSE

@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   
+   Copyright 2019 NVIDIA Corporation
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

+ 598 - 0
TensorFlow2/Segmentation/MaskRCNN/README.md

@@ -0,0 +1,598 @@
+# Mask R-CNN For Tensorflow
+
+This repository provides a script and recipe to train the Mask R-CNN model for Tensorflow to achieve state-of-the-art accuracy, and is tested and maintained by NVIDIA.
+
+## Table Of Contents
+
+- [Model overview](#model-overview)
+    * [Model architecture](#model-architecture)
+    * [Default configuration](#default-configuration)
+    * [Feature support matrix](#feature-support-matrix)
+        * [Features](#features)
+    * [Mixed precision training](#mixed-precision-training)
+        * [Enabling mixed precision](#enabling-mixed-precision)
+- [Setup](#setup)
+    * [Requirements](#requirements)
+- [Quick Start Guide](#quick-start-guide)
+- [Advanced](#advanced)
+    * [Scripts and sample code](#scripts-and-sample-code)
+    * [Parameters](#parameters)
+    * [Command-line options](#command-line-options)
+    * [Getting the data](#getting-the-data)
+        * [Dataset guidelines](#dataset-guidelines)
+    * [Training process](#training-process)
+    * [Inference process](#inference-process)
+
+- [Performance](#performance)
+    * [Benchmarking](#benchmarking)
+        * [Training performance benchmark](#training-performance-benchmark)
+        * [Inference performance benchmark](#inference-performance-benchmark)
+    * [Results](#results)
+        * [Training accuracy results in TensorFlow 1.1x](#training-accuracy-results-in-tensorflow-11x)
+            * [Training accuracy: NVIDIA DGX-1 (8x V100 16G)](#training-accuracy-nvidia-dgx-1-8x-v100-16g)
+            * [Training stability test](#training-stability-test)
+            * [Training performance results](#training-performance-results)
+            * [Training performance: NVIDIA DGX-1 (8x V100 16G)](#training-performance-nvidia-dgx-1-8x-v100-16G)
+        * [Training accuracy results in TensorFlow 2.0](#training-accuracy-results-in-tensorflow-20)
+            * [Training accuracy: NVIDIA DGX-1 (8x V100 16G)](#training-accuracy-nvidia-dgx-1-8x-v100-16g_1)
+            * [Training stability test](#training-stability-test_1)
+            * [Training performance results](#training-performance-results_1)
+            * [Training performance: NVIDIA DGX-1 (8x V100 16G)](#training-performance-nvidia-dgx-1-8x-v100-16G_1)
+        * [Inference performance results in TensorFlow 1.1x](#inference-performance-results-in-tensorflow-11x)
+            * [Inference performance: NVIDIA DGX-1 (1x V100 16G)](#inference-performance-nvidia-dgx-1-1x-v100-16g)
+        * [Inference performance results in TensorFlow 2.0](#inference-performance-results-in-tensorflow-2x)
+            * [Inference performance: NVIDIA DGX-1 (1x V100 16G)](#inference-performance-nvidia-dgx-1-1x-v100-16g_1)
+- [Release notes](#release-notes)
+    * [Changelog](#changelog)
+    * [Known issues](#known-issues)
+
+## Model overview
+
+Mask R-CNN is a convolution-based neural network for the task of object instance segmentation. The paper describing the model can be found [here](https://arxiv.org/abs/1703.06870). NVIDIA’s Mask R-CNN 19.12 is an optimized version of [Google's TPU implementation](https://github.com/tensorflow/tpu/tree/master/models/official/mask_rcnn), leveraging mixed precision arithmetic using Tensor Cores on NVIDIA Volta and Turing GPUs while maintaining target accuracy. 
+Because this model trains with mixed precision using Tensor Cores on Volta, researchers can get results much faster than training without Tensor Cores. This model is tested against each NGC monthly container release to ensure consistent 
+accuracy and performance over time.
+
+This repository also contains scripts to interactively launch training, 
+benchmarking and inference routines in a Docker container.
+
+The major differences between the official implementation of the paper and our version of Mask R-CNN are as follows:
+
+- Mixed precision support with [TensorFlow AMP](https://docs.nvidia.com/deeplearning/frameworks/tensorflow-user-guide/index.html#tfamp).
+- Gradient accumulation to simulate larger batches.
+- Custom fused CUDA kernels for faster computations.
+
+There are other publicly NVIDIA available implementations of Mask R-CNN:
+
+- [NVIDIA PyTorch implementation](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Segmentation/MaskRCNN)
+- [Matterport](https://github.com/matterport/Mask_RCNN)
+- [Tensorpack](https://github.com/tensorpack/tensorpack/tree/master/examples/FasterRCNN)
+
+### Model architecture
+
+Mask R-CNN builds on top of Faster R-CNN adding an additional mask head for the task of image segmentation.
+
+The architecture consists of the following:
+
+- ResNet-50 backbone with Feature Pyramid Network (FPN)
+- Region proposal network (RPN) head
+- RoI Align
+- Bounding and classification box head
+- Mask head
+
+### Default configuration
+
+The Mask R-CNN configuration and the hyper-parameters for training and testing purposes are in separate files.
+The default configuration of this model can be found at `mask-rcnn/hyperparameters/mask_rcnn_params.py`. 
+
+The default configuration is as follows:
+
+  - Feature extractor:
+    - Images resized with aspect ratio maintained and smaller side length between [832,1344]
+    - Ground Truth mask size 112
+    - Backbone network weights are frozen after second epoch
+
+  - RPN:
+    - Anchor stride set to 16
+    - Anchor sizes set to (32, 64, 128, 256, 512)
+    - Foreground IOU Threshold set to 0.7, Background IOU Threshold set to 0.3
+    - RPN target fraction of positive proposals set to 0.5
+    - Train Pre-NMS Top proposals set to 2000 per FPN layer
+    - Train Post-NMS Top proposals set to 1000
+    - Test Pre-NMS Top proposals set to 1000 per FPN layer
+    - Test Post-NMS Top proposals set to 1000
+    - RPN NMS Threshold set to 0.7
+
+  - RoI heads:
+    - Foreground threshold set to 0.5
+    - Batch size per image set to 512
+    - Positive fraction of batch set to 0.25
+
+The default hyper-parameters can be found at `mask-rcnn/hyperparameters/cmdline_utils.py`. 
+These hyperparameters can be overridden through the command-line options, in the launch scripts.
+
+### Feature support matrix
+
+The following features are supported by this model:
+
+| **Feature** | **Mask R-CNN** |
+|:---------:|:----------:|
+|Horovod Multi-GPU|Yes|
+|Automatic mixed precision (AMP)|Yes|        
+
+#### Features
+
+The following features are supported by this model.
+
+**Horovod**
+
+Horovod is a distributed training framework for TensorFlow, Keras, PyTorch and MXNet. The goal of Horovod is to make distributed deep learning fast and easy to use. For more information about how to get started with Horovod, see the [Horovod: Official repository](https://github.com/horovod/horovod).
+
+**Multi-GPU training with Horovod**
+
+Our model uses Horovod to implement efficient multi-GPU training with NCCL. For details, see example sources in this repository or see the [TensorFlow tutorial](https://github.com/horovod/horovod/#usage).
+
+**Automatic Mixed Precision (AMP)**
+
+Automatic Mixed Precision (TF-AMP) enables mixed precision training without any changes to the code-base by 
+performing automatic graph rewrites and loss scaling controlled by an environmental variable.
+
+### Mixed precision training
+
+Mixed precision is the combined use of different numerical precision in a computational method. 
+[Mixed precision](https://arxiv.org/abs/1710.03740) training offers significant computational speedup by performing operations in half-precision format while storing minimal information in single-precision to retain as much information as possible in critical parts of the network. Since the introduction of 
+[Tensor Cores](https://developer.nvidia.com/tensor-cores) in the Volta and Turing architecture, significant training speedups are experienced by switching to mixed precision -- up to 3x overall speedup on the most arithmetically intense model architectures. Using mixed precision training requires two steps:
+
+1.  Porting the model to use the FP16 data type where appropriate.
+2.  Adding loss scaling to preserve small gradient values.
+
+The ability to train deep learning networks with lower precision was introduced in the Pascal architecture and first 
+supported in [CUDA 8](https://devblogs.nvidia.com/parallelforall/tag/fp16/) in the NVIDIA Deep Learning SDK.
+
+For information about:
+-   How to train using mixed precision, see the [Mixed Precision Training](https://arxiv.org/abs/1710.03740) paper and 
+[Training With Mixed Precision](https://docs.nvidia.com/deeplearning/sdk/Mixed-Precision-training/index.html) documentation.
+-   Techniques used for mixed precision training, see the 
+[Mixed Precision Training of Deep Neural Networks](https://devblogs.nvidia.com/mixed-precision-training-deep-neural-networks/) blog.
+-   How to access and enable AMP for TensorFlow, see 
+[Using TF-AMP](https://docs.nvidia.com/deeplearning/dgx/tensorflow-user-guide/index.html#tfamp) from the TensorFlow User Guide.
+
+#### Enabling mixed precision
+
+AMP for TensorFlow enables the full 
+[mixed precision methodology](https://docs.nvidia.com/deeplearning/sdk/mixed-precision-training/index.html#tensorflow) 
+in your existing TensorFlow model code.  AMP enables mixed precision training on Volta and Turing GPUs automatically. 
+The TensorFlow framework code makes all necessary model changes internally.
+
+In TF-AMP, the computational graph is optimized to use as few casts as necessary and maximizes the use of FP16, and the loss scaling is automatically applied inside of supported optimizers. AMP can be configured to work with the existing experimental loss scaling optimizer: `tf.compat.v1.train.experimental.MixedPrecisionLossScaleOptimizer` by disabling the AMP scaling with a single environment variable to perform only the automatic mixed precision optimization. It accomplishes this by automatically rewriting all 
+computation graphs with the necessary operations to enable mixed precision training and automatic loss scaling.
+
+## Setup
+
+The following section lists the requirements that you need to meet in order to start training the Mask R-CNN model.
+
+### Requirements
+
+This repository contains Dockerfile which extends the TensorFlow NGC container and encapsulates some dependencies. 
+Aside from these dependencies, ensure you have the following components:
+
+-   [NVIDIA Docker](https://github.com/NVIDIA/nvidia-docker)
+-   [TensorFlow 20.02-tf2-py3 NGC Container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorflow)
+-   [NVIDIA Volta](https://www.nvidia.com/en-us/data-center/volta-gpu-architecture/) or 
+[Turing](https://www.nvidia.com/en-us/geforce/turing/) based GPU
+
+For more information about how to get started with NGC containers, see the following sections from the 
+NVIDIA GPU Cloud Documentation and the Deep Learning Documentation:
+
+-   [Getting Started Using NVIDIA GPU Cloud](https://docs.nvidia.com/ngc/ngc-getting-started-guide/index.html)
+-   [Accessing And Pulling From The NGC Container Registry](https://docs.nvidia.com/deeplearning/frameworks/user-guide/index.html#accessing_registry)
+-   Running [TensorFlow](https://docs.nvidia.com/deeplearning/frameworks/tensorflow-release-notes/running.html#running)
+
+For those unable to use the TensorFlow NGC container, to set up the required environment or create your own 
+container, see the versioned 
+[NVIDIA Container Support Matrix](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html).
+
+## Quick Start Guide
+
+To train your model using mixed precision with Tensor Cores or using FP32, perform the following steps using 
+the default parameters of the Mask R-CNN model on the COCO 2014 dataset.
+
+1. Clone the repository.
+
+    ```bash
+    git clone https://github.com/NVIDIA/DeepLearningExamples.git
+    cd DeepLearningExamples/TensorFlow/Segmentation/MaskRCNN
+    ```
+
+2.  Build the Mask R-CNN TensorFlow NGC container.
+
+    **For TensorFlow 1.1x:** `bash ./scripts/docker/build_tf1.sh`
+
+    **For TensorFlow 2.x:** `bash ./scripts/docker/build_tf2.sh`
+
+3.  Start an interactive session in the NGC container to run training/inference.
+
+    Run the following command to launch the Docker container, the only argument is the *absolute path* to the 
+    `data directory` which holds or will hold the `tfrecords` data. If data has not already been downloaded in the `data directory` then download it in step 4, else step 4 can be skipped.
+    
+    **For TensorFlow 1.1x:** `bash ./scripts/docker/launch_tf1.sh [data directory]`    
+    
+    **For TensorFlow 2.x:** `bash ./scripts/docker/launch_tf2.sh [data directory]`
+
+4.  Download and preprocess the dataset.
+
+    This repository provides scripts to download and extract the [COCO 2017 dataset](http://cocodataset.org/#download).  
+    If you already have the data then you do not need to run the following script, proceed to downloading the pre-trained weights. 
+    Data will be downloaded to the `data directory` provided in step 3.
+    
+    ```bash
+    cd dataset
+    bash download_and_preprocess_coco.sh /data
+    ```
+
+    By default, the data is organized into the following structure:
+
+    ```bash
+    <data/dir>
+    annotations/
+      instances_train2017.json
+      instances_val2017.json
+    train2017/
+      COCO_train2017_*.jpg
+    val2017/
+      COCO_val2017_*.jpg
+    ```
+
+    This repository also provides scripts to download the pre-trained weights of ResNet-50 backbone. 
+    The script will make a new directory with the name `weights` in the current directory and 
+    download the pre-trained weights in it.
+
+    ```bash
+    ./download_and_process_pretrained_weights.sh
+    ```
+
+    Ensure that the `weights` folder created has a `resnet` folder in it. Inside the `resnet` folder there 
+    should be 3 folders for checkpoints and weights: `extracted_from_maskrcnn`, `resnet-nhwc-2018-02-07` and 
+    `resnet-nhwc-2018-10-14`. Before moving to the next step, ensure the above folders are not empty.
+
+
+5. Start training.
+    
+    To run training for a default configuration (on 1/4/8 GPUs, AMP/FP32), run one of the scripts in the 
+    `./scripts` directory called `./scripts/train_{AMP,FP32}_{1,4,8}GPU{_XLA}.sh`. For example: 
+    
+    `bash ./scripts/train_AMP_8GPU.sh`
+
+    The above script trains a model and performs an evaluation on the COCO 2017 dataset. By default, this training script:
+
+    -  Uses 8 GPUs.
+    -  Saves a checkpoint every 3696 iterations and at the end of training. All checkpoints, evaluation results and training logs are saved to the `/results` directory (in the container which can be mounted to a local directory).
+    -  Mixed precision training with Tensor Cores.
+
+6. Start validation/evaluation.
+
+    - For evaluation with AMP precision: `bash ./scripts/evaluation_AMP.sh`
+    - For evaluation with FP32 precision: `bash ./scripts/evaluation_FP32.sh`
+
+## Advanced
+
+The following sections provide greater details of the dataset, running training and inference, and the training results.
+
+### Scripts and sample code
+
+Descriptions of the key scripts and folders are provided below.
+
+-  `mask_rcnn` - Contains codes to build individual components of the model such as 
+backbone, FPN, RPN, mask and bbox heads etc.
+-  `download_and_process_pretrained_weights.sh` - Can be used to download backbone pre-trained weights.
+-  `scripts/` - A folder that contains shell scripts to train the model and perform inferences.
+    -   `train_{AMP,FP32}_{1,4,8}GPU{_XLA}.sh` - Training script on 1, 4, 8 GPUs with AMP or FP32 precision and either with XLA (Accelerated Linear Algebra) of TensorFlow enabled or disabled.
+    -   `evaluation_{AMP,FP32}.sh` - Evaluations script on either AMP precision or FP32 precision.
+-  `dataset/` - A folder that contains shell scripts and Python files to download the dataset.
+-  `mask_rcnn_main.py` - Is the main function that is the starting point for the training and evaluation process.
+-  `docker/` - A folder that contains scripts to build a Docker image and start an interactive session.
+
+### Parameters
+
+#### `mask_rcnn_main.py` script parameters
+
+You can modify the training behavior through the various flags in both the `train_net.py` script and through overriding specific parameters in the config files. Flags in the `mask_rcnn_main.py` script are as follows:
+
+-   `--mode` - Specifies the action to take like `train`, `train_and_eval` or `eval`.
+-   `--checkpoint` - The checkpoint of the backbone.
+-   `--eval_samples` - Number of samples to evaluate.
+-   `--init_learning_rate` - Initial learning rate.
+-   `--learning_rate_steps` - Specifies at which steps to reduce the learning rate.
+-   `--num_steps_per_eval` - Specifies after how many steps of training evaluation should be performed.
+-   `--total_steps` - Specifies the total number of steps for which training should be run.
+-   `--train_batch_size` - Training batch size per GPU.
+-   `--eval_batch_size` - Evaluation batch size per GPU.
+-   `--use_amp` - Specifies to use AMP precision or FP32.
+-   `--use_xla` - Specifies to use XLA (Accelerated Linear Algebra) of TensorFlow or not.
+
+### Command-line options
+
+To see the full list of available options and their descriptions, use the `-h` or `--help` command-line option, for example:
+`python mask_rcnn_main.py --helpfull`
+
+### Getting the data
+
+The Mask R-CNN model was trained on the COCO 2017 dataset.  This dataset comes with a training and validation set.
+
+This repository contains the `./dataset/download_and_preprocess_coco.sh` script which automatically downloads and preprocesses the training and validation sets. The helper scripts are also present in the `dataset/` folder.
+
+#### Dataset guidelines
+
+The data should be organized into the following structure:
+
+```bash
+<data/dir>
+annotations/
+  instances_train2017.json
+  instances_val2017.json
+train2017/
+  COCO_train2017_*.jpg
+val2017/
+  COCO_val2017_*.jpg
+```
+
+### Training process
+
+Training is performed using the `mask_rcnn_main.py` script along with parameters defined in the config files. 
+The default config files can be found in the 
+`mask_rcnn_tf/mask_rcnn/mask_rcnn_params.py, mask_rcnn_tf/mask_rcnn/cmd_utils.py` files. To specify which GPUs to train on, `CUDA_VISIBLE_DEVICES` variable can be changed in the training scripts
+provided in the `scripts` folder. 
+
+This script outputs results to the `/results` directory by default. The training log will contain information about:
+
+-   Loss, time per iteration, learning rate and memory metrics
+-   Performance values such as throughput per step
+-   Test accuracy and test performance values after evaluation
+
+### Inference process
+
+To run inference run `mask_rcnn_main.py` with commandline parameter 
+`mode=eval`. To run inference with a checkpoint, set the commandline 
+parameter `--model_dir` to `[absolute path of checkpoint folder]`.
+
+The inference log will contain information about:
+
+-   Inference time per step
+-   Inference throughput per step
+-   Evaluation accuracy and performance values
+
+
+## Performance
+
+### Benchmarking
+
+The following section shows how to run benchmarks measuring the model performance in training and inference modes.
+
+#### Training performance benchmark
+
+Training benchmarking can be performed by running the script:
+
+To run training on a single GPU with either AMP or FP32 precision with or without XLA, run the following script:
+
+```bash
+bash scripts/train_{AMP,FP32}_1GPU{_XLA}.sh
+```
+
+To run training on 8 GPUs with either AMP or FP32 precision with or without XLA, run the following script:
+
+```bash
+bash scripts/train_{AMP,FP32}_8GPU{_XLA}.sh
+```
+
+#### Inference performance benchmark
+
+Inference benchmarking can be performed by running the script:
+
+To run on a single GPU with either AMP or FP32 precision, run the following script:
+
+```
+bash scripts/evaluation_{AMP, FP32}.sh
+```
+
+### Results
+
+The following sections provide details on how we achieved our performance and accuracy in training and inference.
+
+#### Training accuracy results in Tensorflow 1.1x
+
+##### Training accuracy: NVIDIA DGX-1 (8x V100 16G)
+
+Our results were obtained by building and launching the docker containers for TensorFlow 1.1x `./scripts/docker/build_tf1.sh`, `bash ./scripts/docker/launch_tf1.sh [data directory]` respectively and running the `scripts/train_{AMP,FP32}_{1,4,8}GPU{_XLA}.sh`  training script on NVIDIA DGX-1 with 8x V100 16G GPUs.
+
+| **Number of GPUs** | **Batch Size** | **Training time with AMP (hours)** | **Training time with FP32 (hours)** |
+| --- | --- | ----- | ----- |
+| 8 | 4 | 9.43 | 13.02 |
+
+
+| **Precision** | **Number of GPUs** | **Batch size/GPU** | **Final AP BBox** | **Final AP Segm** |
+| --- | --- | ----- | ----- | ----- |
+| **AMP** | 8 | 4 | 0.378 | 0.343 |
+| **FP32** | 8 | 4 | 0.377 | 0.343 |
+
+
+##### Training stability test
+
+The following tables compare the mAP scores across 5 different training runs with different seeds, for both AMP and FP32 respectively.  The runs showcase consistent convergence on all 5 seeds with very little deviation.
+
+| **Config** | **Seed #1** | **Seed #2** | **Seed #3** |  **Seed #4** | **Seed #5** | **mean** | **std** |
+| --- | --- | ----- | ----- | --- | --- | ----- | ----- |
+|  8 GPUs, AMP, final AP BBox  | 0.377 | 0.378 | 0.379 | 0.376  | 0.379 | 0.378 | 0.001 |
+| 8 GPUs, AMP, final AP Segm | 0.342 | 0.342 | 0.344 | 0.341  | 0.342 | 0.342 | 0.001 |
+
+
+| **Config** | **Seed #1** | **Seed #2** | **Seed #3** |  **Seed #4** | **Seed #5** | **mean** | **std** |
+| --- | --- | ----- | ----- | --- | --- | ----- | ----- |
+|  8 GPUs, FP32, final AP BBox  | 0.379 | 0.378 | 0.376 | 0.376  | 0.378 | 0.377 | 0.001 |
+| 8 GPUs, FP32, final AP Segm | 0.343 | 0.343 | 0.342 | 0.343  | 0.343 | 0.343 | 0.0004 |
+
+
+##### Training performance results
+
+##### Training performance: NVIDIA DGX-1 (8x V100 16G)
+
+| **Number of GPUs** | **Batch size/GPU** | **FP 32 items/sec** | **AMP items/sec** | **Speed-up with mixed precision** |
+| --- | --- | ----- | ----- | --- |
+| 1 | 2 | 3.2 | 4.2 | 1.315 |
+| 4 | 2 | 15.1 | 22.4 | 1.48 |
+| 8 | 2 | 27.8 | 47.3 | 1.701 |
+
+| **Number of GPUs** | **Batch size/GPU** | **FP 32 items/sec** | **AMP items/sec** | **Speed-up with mixed precision** |
+| --- | --- | ----- | ----- | --- |
+| 1 | 4 | 3.4 | 4.7 | 1.38 |
+| 4 | 4 | 21.7 | 31.03 | 1.42 |
+| 8 | 4 | 38.4 | 46.5 | 1.21 |
+
+Model performances can be improved upon by using XLA. 
+
+Note: This feature is still experimental and can be unstable.
+
+| **Number of GPUs** | **Batch size/GPU** | **FP 32 items/sec** | **AMP items/sec** | **Speed-up with mixed precision** |
+| --- | --- | ----- | ----- | ---- |
+| 8   |   4 |  48.9 |  57.7 | 1.18 |
+
+To achieve these same results, follow the steps in the [Quick Start Guide](#quick-start-guide).
+
+#### Training accuracy results in Tensorflow 2.1
+
+##### Training accuracy: NVIDIA DGX-1 (8x V100 16G)
+
+Our results were obtained by running the `scripts/train_{AMP,FP32}_{1,4,8}GPU{_XLA}.sh`  training script in the 
+TensorFlow 20.02-py3 NGC container on NVIDIA DGX-1 with 8x V100 16G GPUs.
+
+| **Number of GPUs** | **Batch Size** | **Training time with AMP (hours)** | **Training time with FP32 (hours)** |
+| --- | --- | ----- | ----- |
+| 8 | 4 | 9.4 | 13.08 |
+
+
+| **Precision** | **Number of GPUs** | **Batch size/GPU** | **Final AP BBox** | **Final AP Segm** |
+| --- | --- | ----- | ----- | ----- |
+| **AMP** | 8 | 4 | 0.378 | 0.340 |
+| **FP32** | 8 | 4 | 0.378 | 0.341 |
+
+
+##### Training stability test
+
+The following tables compare the mAP scores across 5 different training runs with different seeds, for both 
+AMP and FP32 respectively.  The runs showcase consistent convergence on all 5 seeds with very little deviation.
+
+| **Config** | **Seed #1** | **Seed #2** | **Seed #3** |  **Seed #4** | **Seed #5** | **mean** | **std** |
+| --- | --- | ----- | ----- | --- | --- | ----- | ----- |
+|  8 GPUs, AMP, final AP BBox  | 0.378 | 0.376 | 0.377 | 0.379  | 0.377 | 0.3774 | 0.001 |
+| 8 GPUs, AMP, final AP Segm | 0.341 | 0.339 | 0.339 | 0.342  | 0.341 | 0.3402 | 0.001 |
+
+
+| **Config** | **Seed #1** | **Seed #2** | **Seed #3** |  **Seed #4** | **Seed #5** | **mean** | **std** |
+| --- | --- | ----- | ----- | --- | --- | ----- | ----- |
+|  8 GPUs, FP32, final AP BBox  | 0.378 | 0.379 | 0.377 | 0.379  | 0.378 | 0.3782 | 0.001 |
+| 8 GPUs, FP32, final AP Segm | 0.341 | 0.339 | 0.339 | 0.342  | 0.339 | 0.3401 | 0.002 |
+
+
+##### Training performance results
+
+##### Training performance: NVIDIA DGX-1 (8x V100 16G)
+
+| **Number of GPUs** | **Batch size/GPU** | **FP 32 items/sec** | **AMP items/sec** | **Speed-up with mixed precision** |
+| --- | --- | ----- | ----- | --- |
+| 1 | 2 | 4.5 | 6.6 | 1.466 |
+| 4 | 2 | 21.0 | 34.8 | 1.657 |
+| 8 | 2 | 38.3 | 49.8 | 1.300 |
+
+| **Number of GPUs** | **Batch size/GPU** | **FP 32 items/sec** | **AMP items/sec** | **Speed-up with mixed precision** |
+| --- | --- | ----- | ----- | --- |
+| 1 | 4 | 4.7 | 7.0 | 1.489 |
+| 4 | 4 | 22.2 | 35.8 | 1.612 |
+| 8 | 4 | 38.9 | 54.1 | 1.390 |
+
+Model performances can be improved upon by using XLA. 
+
+Note: This feature is still experimental and can be unstable.
+
+| **Number of GPUs** | **Batch size/GPU** | **FP 32 items/sec** | **AMP items/sec** | **Speed-up with mixed precision** |
+| --- | --- | ----- | ----- | ---- |
+| 8   |   4 |  37.4 |  43.5 | 1.16 |
+
+To achieve these same results, follow the steps in the [Quick Start Guide](#quick-start-guide).
+
+#### Inference performance results in TensorFlow 1.1x
+
+##### Inference performance: NVIDIA DGX-1 (1x V100 16G)
+
+Our results were obtained by building and launching the docker containers for TensorFlow 1.1x `./scripts/docker/build_tf1.sh`, `bash ./scripts/docker/launch_tf1.sh [data directory]` respectively and by running the `./scripts/evaluation_{AMP,FP32}.sh` script on NVIDIA DGX-1 with 1x V100 16G GPUs. Performance numbers (in items/images per second)
+were averaged over an entire training epoch.
+
+| **Number of GPUs** | **Batch size/GPU** | **FP 32 items/sec** | **AMP items/sec** | **Speedup** |
+| --- | --- | ----- | ----- | ----- |
+|  1  | 8 | 5.2 | 6.6 | 1.269 |
+
+Latency is computed as the time taken for a batch to process as they are fed in one after another in the model ie no pipelining
+
+Precision AMP
+
+| Batch Size (per GPU) | Throughput-Average (images/sec) | Latency-Average (sec) | Latency-90% (sec) | Latency-95% (sec) | Latency-99% (sec) |
+|------------|------------------------------|---------------------|-----------------|-----------------|-----------------|
+| 2 | 5.5 | 0.4407  | 0.4134 | 0.4234 | 0.4510 |
+| 4 | 6.2 | 0.7716  | 0.7241 | 0.7361 | 0.8878 |
+| 8 | 6.6 | 1.3771  | 1.2011 | 1.2340 | 1.6586 |
+
+
+Precision FP32
+
+| Batch Size (per GPU) | Throughput-Average (images/sec) | Latency-Average (sec) | Latency-90% (sec) | Latency-95% (sec) | Latency-99% (sec) |
+|------------|------------------------------|---------------------|-----------------|-----------------|-----------------|
+| 2 | 4.9 | 0.4638  | 0.4062 | 0.4528 | 0.5144 |++
+| 4 | 4.7 | 0.9632  | 0.9055 | 0.9293 | 0.9868 |
+| 8 | 6.0 | 2.1580  | 2.0107 | 2.0368 | 2.4152 |
+
+To achieve these same results, follow the steps in the [Quick Start Guide](#quick-start-guide).
+
+#### Inference performance results in TensorFlow 2.x
+
+##### Inference performance: NVIDIA DGX-1 (1x V100 16G)
+
+Our results were obtained by running the `./scripts/evaluation_{AMP,FP32}.sh` training script in the TensorFlow 20.02-py3 
+NGC container on NVIDIA DGX-1 with 1x V100 16G GPUs. Performance numbers (in items/images per second)
+were averaged over an entire training epoch.
+
+| **Number of GPUs** | **Batch size/GPU** | **FP 32 items/sec** | **AMP items/sec** | **Speedup** |
+| --- | --- | ----- | ----- | ----- |
+|  1  | 8 | 5.4 | 5.5 | 1.018 |
+
+Latency is computed as the time taken for a batch to process as they are fed in one after another in the model ie no pipelining
+
+Precision AMP
+
+| Batch Size (per GPU) | Throughput-Average (images/sec) | Latency-Average (sec) | Latency-90% (sec) | Latency-95% (sec) | Latency-99% (sec) |
+|------------|------------------------------|---------------------|-----------------|-----------------|-----------------|
+| 2 | 6.8 | 0.39  | 0.38 | 0.39 | 0.42 |
+| 4 | 7.4 | 0.59  | 0.58 | 0.59 | 0.64 |
+| 8 | 5.5 | 1.71  | 1.69 | 1.7 | 1.83 |
+
+
+Precision FP32
+
+| Batch Size (per GPU) | Throughput-Average (images/sec) | Latency-Average (sec) | Latency-90% (sec) | Latency-95% (sec) | Latency-99% (sec) |
+|------------|------------------------------|---------------------|-----------------|-----------------|-----------------|
+| 2 | 5.4 | 0.46  | 0.41 | 0.42 | 0.43 |++
+| 4 | 5.3 | 0.86  | 0.82 | 0.84 | 0.89 |
+| 8 | 5.4 | 1.69  | 1.65 | 1.68 | 1.83 |
+
+To achieve these same results, follow the steps in the [Quick Start Guide](#quick-start-guide).
+
+## Release notes
+
+### Changelog
+
+November 2019
+
+- Initial release
+
+### Known issues
+
+-  The behavior of the model can be unstable when running with TensorFlow XLA enabled.
+
+
+

+ 336 - 0
TensorFlow2/Segmentation/MaskRCNN/dataset/create_coco_tf_record.py

@@ -0,0 +1,336 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Convert raw COCO dataset to TFRecord for object_detection.
+
+Example usage:
+    python create_coco_tf_record.py --logtostderr \
+      --train_image_dir="${TRAIN_IMAGE_DIR}" \
+      --val_image_dir="${VAL_IMAGE_DIR}" \
+      --test_image_dir="${TEST_IMAGE_DIR}" \
+      --train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
+      --val_annotations_file="${VAL_ANNOTATIONS_FILE}" \
+      --testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \
+      --output_dir="${OUTPUT_DIR}"
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+import hashlib
+import io
+import json
+import multiprocessing
+import os
+from absl import app
+from absl import flags
+import numpy as np
+import PIL.Image
+
+from pycocotools import mask
+from research.object_detection.utils import dataset_util
+from research.object_detection.utils import label_map_util
+
+import tensorflow as tf
+
+flags.DEFINE_boolean('include_masks', False,
+                     'Whether to include instance segmentations masks '
+                     '(PNG encoded) in the result. default: False.')
+flags.DEFINE_string('train_image_dir', '', 'Training image directory.')
+flags.DEFINE_string('val_image_dir', '', 'Validation image directory.')
+flags.DEFINE_string('test_image_dir', '', 'Test image directory.')
+flags.DEFINE_string('train_object_annotations_file', '', '')
+flags.DEFINE_string('val_object_annotations_file', '', '')
+flags.DEFINE_string('train_caption_annotations_file', '', '')
+flags.DEFINE_string('val_caption_annotations_file', '', '')
+flags.DEFINE_string('testdev_annotations_file', '',
+                    'Test-dev annotations JSON file.')
+flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
+
+FLAGS = flags.FLAGS
+
+tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
+
+
+def create_tf_example(image,
+                      bbox_annotations,
+                      caption_annotations,
+                      image_dir,
+                      category_index,
+                      include_masks=False):
+  """Converts image and annotations to a tf.Example proto.
+
+  Args:
+    image: dict with keys:
+      [u'license', u'file_name', u'coco_url', u'height', u'width',
+      u'date_captured', u'flickr_url', u'id']
+    bbox_annotations:
+      list of dicts with keys:
+      [u'segmentation', u'area', u'iscrowd', u'image_id',
+      u'bbox', u'category_id', u'id']
+      Notice that bounding box coordinates in the official COCO dataset are
+      given as [x, y, width, height] tuples using absolute coordinates where
+      x, y represent the top-left (0-indexed) corner.  This function converts
+      to the format expected by the Tensorflow Object Detection API (which is
+      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
+      to image size).
+    image_dir: directory containing the image files.
+    category_index: a dict containing COCO category information keyed
+      by the 'id' field of each category.  See the
+      label_map_util.create_category_index function.
+    include_masks: Whether to include instance segmentations masks
+      (PNG encoded) in the result. default: False.
+  Returns:
+    example: The converted tf.Example
+    num_annotations_skipped: Number of (invalid) annotations that were ignored.
+
+  Raises:
+    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
+  """
+  image_height = image['height']
+  image_width = image['width']
+  filename = image['file_name']
+  image_id = image['id']
+
+  full_path = os.path.join(image_dir, filename)
+  with tf.io.gfile.GFile(full_path, 'rb') as fid:
+    encoded_jpg = fid.read()
+  encoded_jpg_io = io.BytesIO(encoded_jpg)
+  image = PIL.Image.open(encoded_jpg_io)
+  key = hashlib.sha256(encoded_jpg).hexdigest()
+
+  xmin = []
+  xmax = []
+  ymin = []
+  ymax = []
+  is_crowd = []
+  category_names = []
+  category_ids = []
+  area = []
+  encoded_mask_png = []
+  num_annotations_skipped = 0
+  for object_annotations in bbox_annotations:
+    (x, y, width, height) = tuple(object_annotations['bbox'])
+    if width <= 0 or height <= 0:
+      num_annotations_skipped += 1
+      continue
+    if x + width > image_width or y + height > image_height:
+      num_annotations_skipped += 1
+      continue
+    xmin.append(float(x) / image_width)
+    xmax.append(float(x + width) / image_width)
+    ymin.append(float(y) / image_height)
+    ymax.append(float(y + height) / image_height)
+    is_crowd.append(object_annotations['iscrowd'])
+    category_id = int(object_annotations['category_id'])
+    category_ids.append(category_id)
+    category_names.append(category_index[category_id]['name'].encode('utf8'))
+    area.append(object_annotations['area'])
+
+    if include_masks:
+      run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
+                                          image_height, image_width)
+      binary_mask = mask.decode(run_len_encoding)
+      if not object_annotations['iscrowd']:
+        binary_mask = np.amax(binary_mask, axis=2)
+      pil_image = PIL.Image.fromarray(binary_mask)
+      output_io = io.BytesIO()
+      pil_image.save(output_io, format='PNG')
+      encoded_mask_png.append(output_io.getvalue())
+
+  captions = []
+  for caption_annotation in caption_annotations:
+    captions.append(caption_annotation['caption'].encode('utf8'))
+
+  feature_dict = {
+      'image/height':
+          dataset_util.int64_feature(image_height),
+      'image/width':
+          dataset_util.int64_feature(image_width),
+      'image/filename':
+          dataset_util.bytes_feature(filename.encode('utf8')),
+      'image/source_id':
+          dataset_util.bytes_feature(str(image_id).encode('utf8')),
+      'image/key/sha256':
+          dataset_util.bytes_feature(key.encode('utf8')),
+      'image/encoded':
+          dataset_util.bytes_feature(encoded_jpg),
+      'image/caption':
+        dataset_util.bytes_list_feature(captions),
+      'image/format':
+          dataset_util.bytes_feature('jpeg'.encode('utf8')),
+      'image/object/bbox/xmin':
+          dataset_util.float_list_feature(xmin),
+      'image/object/bbox/xmax':
+          dataset_util.float_list_feature(xmax),
+      'image/object/bbox/ymin':
+          dataset_util.float_list_feature(ymin),
+      'image/object/bbox/ymax':
+          dataset_util.float_list_feature(ymax),
+      'image/object/class/text':
+          dataset_util.bytes_list_feature(category_names),
+      'image/object/class/label':
+          dataset_util.int64_list_feature(category_ids),
+      'image/object/is_crowd':
+          dataset_util.int64_list_feature(is_crowd),
+      'image/object/area':
+          dataset_util.float_list_feature(area),
+  }
+  if include_masks:
+    feature_dict['image/object/mask'] = (
+        dataset_util.bytes_list_feature(encoded_mask_png))
+  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
+  return key, example, num_annotations_skipped
+
+
+def _pool_create_tf_example(args):
+  return create_tf_example(*args)
+
+
+def _load_object_annotations(object_annotations_file):
+  with tf.io.gfile.GFile(object_annotations_file, 'r') as fid:
+    obj_annotations = json.load(fid)
+
+  images = obj_annotations['images']
+  category_index = label_map_util.create_category_index(
+      obj_annotations['categories'])
+
+  img_to_obj_annotation = collections.defaultdict(list)
+  tf.compat.v1.logging.info('Building bounding box index.')
+  for annotation in obj_annotations['annotations']:
+    image_id = annotation['image_id']
+    img_to_obj_annotation[image_id].append(annotation)
+
+  missing_annotation_count = 0
+  for image in images:
+    image_id = image['id']
+    if image_id not in img_to_obj_annotation:
+      missing_annotation_count += 1
+
+  tf.compat.v1.logging.info('%d images are missing bboxes.', missing_annotation_count)
+
+  return images, img_to_obj_annotation, category_index
+
+
+def _load_caption_annotations(caption_annotations_file):
+  with tf.io.gfile.GFile(caption_annotations_file, 'r') as fid:
+    caption_annotations = json.load(fid)
+
+  img_to_caption_annotation = collections.defaultdict(list)
+  tf.compat.v1.logging.info('Building caption index.')
+  for annotation in caption_annotations['annotations']:
+    image_id = annotation['image_id']
+    img_to_caption_annotation[image_id].append(annotation)
+
+  missing_annotation_count = 0
+  images = caption_annotations['images']
+  for image in images:
+    image_id = image['id']
+    if image_id not in img_to_caption_annotation:
+      missing_annotation_count += 1
+
+  tf.compat.v1.logging.info('%d images are missing captions.', missing_annotation_count)
+
+  return img_to_caption_annotation
+
+
+def _create_tf_record_from_coco_annotations(
+    object_annotations_file,
+    caption_annotations_file,
+    image_dir, output_path, include_masks, num_shards):
+  """Loads COCO annotation json files and converts to tf.Record format.
+
+  Args:
+    object_annotations_file: JSON file containing bounding box annotations.
+    caption_annotations_file: JSON file containing caption annotations.
+    image_dir: Directory containing the image files.
+    output_path: Path to output tf.Record file.
+    include_masks: Whether to include instance segmentations masks
+      (PNG encoded) in the result. default: False.
+    num_shards: Number of output files to create.
+  """
+
+  tf.compat.v1.logging.info('writing to output path: %s', output_path)
+  writers = [
+      tf.io.TFRecordWriter(output_path + '-%05d-of-%05d.tfrecord' %
+                                  (i, num_shards)) for i in range(num_shards)
+  ]
+
+  images, img_to_obj_annotation, category_index = (
+      _load_object_annotations(object_annotations_file))
+  img_to_caption_annotation = (
+      _load_caption_annotations(caption_annotations_file))
+
+  pool = multiprocessing.Pool()
+  total_num_annotations_skipped = 0
+  for idx, (_, tf_example, num_annotations_skipped) in enumerate(
+      pool.imap(_pool_create_tf_example,
+                [(image,
+                  img_to_obj_annotation[image['id']],
+                  img_to_caption_annotation[image['id']],
+                  image_dir,
+                  category_index,
+                  include_masks)
+                 for image in images])):
+    if idx % 100 == 0:
+      tf.compat.v1.logging.info('On image %d of %d', idx, len(images))
+
+    total_num_annotations_skipped += num_annotations_skipped
+    writers[idx % num_shards].write(tf_example.SerializeToString())
+
+  pool.close()
+  pool.join()
+
+  for writer in writers:
+    writer.close()
+
+  tf.compat.v1.logging.info('Finished writing, skipped %d annotations.',
+                  total_num_annotations_skipped)
+
+
+def main(_):
+  assert FLAGS.train_image_dir, '`train_image_dir` missing.'
+  assert FLAGS.val_image_dir, '`val_image_dir` missing.'
+  assert FLAGS.test_image_dir, '`test_image_dir` missing.'
+
+  if not tf.io.gfile.isdir(FLAGS.output_dir):
+    tf.io.gfile.makedirs(FLAGS.output_dir)
+  train_output_path = os.path.join(FLAGS.output_dir, 'train')
+  val_output_path = os.path.join(FLAGS.output_dir, 'val')
+  testdev_output_path = os.path.join(FLAGS.output_dir, 'test-dev')
+
+  _create_tf_record_from_coco_annotations(
+      FLAGS.train_object_annotations_file,
+      FLAGS.train_caption_annotations_file,
+      FLAGS.train_image_dir,
+      train_output_path,
+      FLAGS.include_masks,
+      num_shards=256)
+  _create_tf_record_from_coco_annotations(
+      FLAGS.val_object_annotations_file,
+      FLAGS.val_caption_annotations_file,
+      FLAGS.val_image_dir,
+      val_output_path,
+      FLAGS.include_masks,
+      num_shards=32)
+
+
+if __name__ == '__main__':
+  tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
+  app.run(main)

+ 129 - 0
TensorFlow2/Segmentation/MaskRCNN/dataset/download_and_preprocess_coco.sh

@@ -0,0 +1,129 @@
+#!/bin/bash
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# Script to download and preprocess the COCO data set for detection.
+#
+# The outputs of this script are TFRecord files containing serialized
+# tf.Example protocol buffers. See create_coco_tf_record.py for details of how
+# the tf.Example protocol buffers are constructed and see
+# http://cocodataset.org/#overview for an overview of the dataset.
+#
+# usage:
+#  bash download_and_preprocess_coco.sh /data-dir/coco
+set -e
+set -x
+
+
+if [ -z "$1" ]; then
+  echo "usage download_and_preprocess_coco.sh [data dir]"
+  exit
+fi
+
+#sudo apt install -y protobuf-compiler python-pil python-lxml\
+#  python-pip python-dev git unzip
+
+#pip install Cython git+https://github.com/cocodataset/cocoapi#subdirectory=PythonAPI
+
+echo "Cloning Tensorflow models directory (for conversion utilities)"
+if [ ! -e tf-models ]; then
+  git clone http://github.com/tensorflow/models tf-models
+fi
+
+(cd tf-models/research && protoc object_detection/protos/*.proto --python_out=.)
+
+UNZIP="unzip -nq"
+
+# Create the output directories.
+OUTPUT_DIR="${1%/}"
+SCRATCH_DIR="${OUTPUT_DIR}/raw-data"
+mkdir -p "${OUTPUT_DIR}"
+mkdir -p "${SCRATCH_DIR}"
+CURRENT_DIR=$(pwd)
+
+# Helper function to download and unpack a .zip file.
+function download_and_unzip() {
+  local BASE_URL=${1}
+  local FILENAME=${2}
+
+  if [ ! -f ${FILENAME} ]; then
+    echo "Downloading ${FILENAME} to $(pwd)"
+    wget -nd -c "${BASE_URL}/${FILENAME}"
+  else
+    echo "Skipping download of ${FILENAME}"
+  fi
+  echo "Unzipping ${FILENAME}"
+  ${UNZIP} ${FILENAME}
+}
+
+cd ${SCRATCH_DIR}
+
+# Download the images.
+BASE_IMAGE_URL="http://images.cocodataset.org/zips"
+
+TRAIN_IMAGE_FILE="train2017.zip"
+download_and_unzip ${BASE_IMAGE_URL} ${TRAIN_IMAGE_FILE}
+TRAIN_IMAGE_DIR="${SCRATCH_DIR}/train2017"
+
+VAL_IMAGE_FILE="val2017.zip"
+download_and_unzip ${BASE_IMAGE_URL} ${VAL_IMAGE_FILE}
+VAL_IMAGE_DIR="${SCRATCH_DIR}/val2017"
+
+TEST_IMAGE_FILE="test2017.zip"
+download_and_unzip ${BASE_IMAGE_URL} ${TEST_IMAGE_FILE}
+TEST_IMAGE_DIR="${SCRATCH_DIR}/test2017"
+
+# Download the annotations.
+BASE_INSTANCES_URL="http://images.cocodataset.org/annotations"
+INSTANCES_FILE="annotations_trainval2017.zip"
+download_and_unzip ${BASE_INSTANCES_URL} ${INSTANCES_FILE}
+
+TRAIN_OBJ_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/instances_train2017.json"
+VAL_OBJ_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/instances_val2017.json"
+
+TRAIN_CAPTION_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/captions_train2017.json"
+VAL_CAPTION_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/captions_val2017.json"
+
+# Download the test image info.
+BASE_IMAGE_INFO_URL="http://images.cocodataset.org/annotations"
+IMAGE_INFO_FILE="image_info_test2017.zip"
+download_and_unzip ${BASE_IMAGE_INFO_URL} ${IMAGE_INFO_FILE}
+
+TESTDEV_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/image_info_test-dev2017.json"
+
+# # Build TFRecords of the image data.
+cd "${CURRENT_DIR}"
+
+# Setup packages
+touch tf-models/__init__.py
+touch tf-models/research/__init__.py
+
+# Run our conversion
+SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
+
+PYTHONPATH="tf-models:tf-models/research" python $SCRIPT_DIR/create_coco_tf_record.py \
+  --logtostderr \
+  --include_masks \
+  --train_image_dir="${TRAIN_IMAGE_DIR}" \
+  --val_image_dir="${VAL_IMAGE_DIR}" \
+  --test_image_dir="${TEST_IMAGE_DIR}" \
+  --train_object_annotations_file="${TRAIN_OBJ_ANNOTATIONS_FILE}" \
+  --val_object_annotations_file="${VAL_OBJ_ANNOTATIONS_FILE}" \
+  --train_caption_annotations_file="${TRAIN_CAPTION_ANNOTATIONS_FILE}" \
+  --val_caption_annotations_file="${VAL_CAPTION_ANNOTATIONS_FILE}" \
+  --testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \
+  --output_dir="${OUTPUT_DIR}"
+
+mv ${SCRATCH_DIR}/annotations/ ${OUTPUT_DIR}

+ 73 - 0
TensorFlow2/Segmentation/MaskRCNN/download_and_process_pretrained_weights.sh

@@ -0,0 +1,73 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+mkdir -p weights/
+cd weights
+
+# DOWNLOAD CHECKPOINTS
+
+## Mask RCNN
+## ====================== Mask RCNN ====================== ##
+BASE_URL="https://storage.googleapis.com/cloud-tpu-checkpoints/mask-rcnn/1555659850"
+DEST_DIR="mask-rcnn/1555659850"
+
+wget -N ${BASE_URL}/saved_model.pb -P ${DEST_DIR}
+wget -N ${BASE_URL}/variables/variables.data-00000-of-00001 -P ${DEST_DIR}/variables
+wget -N ${BASE_URL}/variables/variables.index -P ${DEST_DIR}/variables
+
+## ====================== resnet-nhwc-2018-02-07 ====================== ##
+BASE_URL="https://storage.googleapis.com/cloud-tpu-artifacts/resnet/resnet-nhwc-2018-02-07"
+DEST_DIR="resnet/resnet-nhwc-2018-02-07"
+
+wget -N ${BASE_URL}/checkpoint -P ${DEST_DIR}
+wget -N ${BASE_URL}/model.ckpt-112603.data-00000-of-00001 -P ${DEST_DIR}
+wget -N ${BASE_URL}/model.ckpt-112603.index  -P ${DEST_DIR}
+wget -N ${BASE_URL}/model.ckpt-112603.meta -P ${DEST_DIR}
+
+## ====================== resnet-nhwc-2018-10-14 ====================== ##
+BASE_URL="https://storage.googleapis.com/cloud-tpu-artifacts/resnet/resnet-nhwc-2018-10-14"
+DEST_DIR="resnet/resnet-nhwc-2018-10-14"
+
+wget -N ${BASE_URL}/model.ckpt-112602.data-00000-of-00001 -P ${DEST_DIR}
+wget -N ${BASE_URL}/model.ckpt-112602.index -P ${DEST_DIR}
+wget -N ${BASE_URL}/model.ckpt-112602.meta -P ${DEST_DIR}
+
+# VERIFY CHECKPOINTS
+echo "Verifying and Processing Checkpoints..."
+
+python pb_to_ckpt.py \
+    --frozen_model_filename=mask-rcnn/1555659850/ \
+    --output_filename=mask-rcnn/1555659850/ckpt/model.ckpt
+
+python extract_RN50_weights.py \
+    --checkpoint_dir=mask-rcnn/1555659850/ckpt/model.ckpt \
+    --save_to=resnet/extracted_from_maskrcnn
+
+echo "Generating list of tensors and their shape..."
+
+python inspect_checkpoint.py --file_name=mask-rcnn/1555659850/ckpt/model.ckpt \
+    > mask-rcnn/1555659850/tensors_and_shape.txt
+
+python inspect_checkpoint.py --file_name=resnet/resnet-nhwc-2018-02-07/model.ckpt-112603 \
+    > resnet/resnet-nhwc-2018-02-07/tensors_and_shape.txt
+
+python inspect_checkpoint.py --file_name=resnet/resnet-nhwc-2018-10-14/model.ckpt-112602 \
+    > resnet/resnet-nhwc-2018-10-14/tensors_and_shape.txt
+
+python inspect_checkpoint.py --file_name=resnet/extracted_from_maskrcnn/resnet50.ckpt \
+    > resnet/extracted_from_maskrcnn/tensors_and_shape.txt
+
+echo "Script Finished with Success"

+ 0 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/__init__.py


+ 289 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/anchors.py

@@ -0,0 +1,289 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Mask-RCNN anchor definition."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from collections import OrderedDict
+
+import numpy as np
+import tensorflow as tf
+
+from mask_rcnn.object_detection import argmax_matcher
+from mask_rcnn.object_detection import balanced_positive_negative_sampler
+from mask_rcnn.object_detection import box_list
+from mask_rcnn.object_detection import faster_rcnn_box_coder
+from mask_rcnn.object_detection import region_similarity_calculator
+from mask_rcnn.object_detection import target_assigner
+
+
+def _generate_anchor_configs(min_level, max_level, num_scales, aspect_ratios):
+  """Generates mapping from output level to a list of anchor configurations.
+
+  A configuration is a tuple of (num_anchors, scale, aspect_ratio).
+
+  Args:
+      min_level: integer number of minimum level of the output feature pyramid.
+      max_level: integer number of maximum level of the output feature pyramid.
+      num_scales: integer number representing intermediate scales added
+        on each level. For instances, num_scales=2 adds two additional
+        anchor scales [2^0, 2^0.5] on each level.
+      aspect_ratios: list of tuples representing the aspect raito anchors added
+        on each level. For instances, aspect_ratios =
+        [(1, 1), (1.4, 0.7), (0.7, 1.4)] adds three anchors on each level.
+  Returns:
+    anchor_configs: a dictionary with keys as the levels of anchors and
+      values as a list of anchor configuration.
+  """
+  anchor_configs = {}
+  for level in range(min_level, max_level + 1):
+    anchor_configs[level] = []
+    for scale_octave in range(num_scales):
+      for aspect in aspect_ratios:
+        anchor_configs[level].append(
+            (2**level, scale_octave / float(num_scales), aspect))
+  return anchor_configs
+
+
+def _generate_anchor_boxes(image_size, anchor_scale, anchor_configs):
+  """Generates multiscale anchor boxes.
+
+  Args:
+    image_size: integer number of input image size. The input image has the
+      same dimension for width and height. The image_size should be divided by
+      the largest feature stride 2^max_level.
+    anchor_scale: float number representing the scale of size of the base
+      anchor to the feature stride 2^level.
+    anchor_configs: a dictionary with keys as the levels of anchors and
+      values as a list of anchor configuration.
+  Returns:
+    anchor_boxes: a numpy array with shape [N, 4], which stacks anchors on all
+      feature levels.
+  Raises:
+    ValueError: input size must be the multiple of largest feature stride.
+  """
+  boxes_all = []
+  for _, configs in anchor_configs.items():
+    boxes_level = []
+    for config in configs:
+      stride, octave_scale, aspect = config
+      if image_size[0] % stride != 0 or image_size[1] % stride != 0:
+        raise ValueError('input size must be divided by the stride.')
+      base_anchor_size = anchor_scale * stride * 2**octave_scale
+      anchor_size_x_2 = base_anchor_size * aspect[0] / 2.0
+      anchor_size_y_2 = base_anchor_size * aspect[1] / 2.0
+
+      x = np.arange(stride / 2, image_size[1], stride)
+      y = np.arange(stride / 2, image_size[0], stride)
+      xv, yv = np.meshgrid(x, y)
+      xv = xv.reshape(-1)
+      yv = yv.reshape(-1)
+
+      boxes = np.vstack((yv - anchor_size_y_2, xv - anchor_size_x_2,
+                         yv + anchor_size_y_2, xv + anchor_size_x_2))
+      boxes = np.swapaxes(boxes, 0, 1)
+      boxes_level.append(np.expand_dims(boxes, axis=1))
+    # concat anchors on the same level to the reshape NxAx4
+    boxes_level = np.concatenate(boxes_level, axis=1)
+    boxes_all.append(boxes_level.reshape([-1, 4]))
+
+  anchor_boxes = np.vstack(boxes_all)
+  return anchor_boxes
+
+
+class Anchors(object):
+  """Mask-RCNN Anchors class."""
+
+  def __init__(self, min_level, max_level, num_scales, aspect_ratios, anchor_scale, image_size):
+    """Constructs multiscale Mask-RCNN anchors.
+
+    Args:
+      min_level: integer number of minimum level of the output feature pyramid.
+      max_level: integer number of maximum level of the output feature pyramid.
+      num_scales: integer number representing intermediate scales added
+        on each level. For instances, num_scales=2 adds two additional
+        anchor scales [2^0, 2^0.5] on each level.
+      aspect_ratios: list of tuples representing the aspect raito anchors added
+        on each level. For instances, aspect_ratios =
+        [(1, 1), (1.4, 0.7), (0.7, 1.4)] adds three anchors on each level.
+      anchor_scale: float number representing the scale of size of the base
+        anchor to the feature stride 2^level.
+      image_size: integer number of input image size. The input image has the
+        same dimension for width and height. The image_size should be divided by
+        the largest feature stride 2^max_level.
+    """
+    self.min_level = min_level
+    self.max_level = max_level
+    self.num_scales = num_scales
+    self.aspect_ratios = aspect_ratios
+    self.anchor_scale = anchor_scale
+    self.image_size = image_size
+    self.config = self._generate_configs()
+    self.boxes = self._generate_boxes()
+
+  def _generate_configs(self):
+    """Generate configurations of anchor boxes."""
+    return _generate_anchor_configs(self.min_level, self.max_level,
+                                    self.num_scales, self.aspect_ratios)
+
+  def _generate_boxes(self):
+    """Generates multiscale anchor boxes."""
+    boxes = _generate_anchor_boxes(self.image_size, self.anchor_scale,
+                                   self.config)
+    boxes = tf.convert_to_tensor(value=boxes, dtype=tf.float32)
+    return boxes
+
+  def get_anchors_per_location(self):
+    return self.num_scales * len(self.aspect_ratios)
+
+  def get_unpacked_boxes(self):
+    return self.unpack_labels(self.boxes)
+
+  def unpack_labels(self, labels):
+    """Unpacks an array of labels into multiscales labels."""
+    labels_unpacked = OrderedDict()
+    count = 0
+    for level in range(self.min_level, self.max_level + 1):
+      feat_size0 = int(self.image_size[0] / 2**level)
+      feat_size1 = int(self.image_size[1] / 2**level)
+      steps = feat_size0 * feat_size1 * self.get_anchors_per_location()
+      indices = tf.range(count, count + steps)
+      count += steps
+      labels_unpacked[level] = tf.reshape(
+          tf.gather(labels, indices), [feat_size0, feat_size1, -1])
+    return labels_unpacked
+
+
+class AnchorLabeler(object):
+  """Labeler for multiscale anchor boxes."""
+
+  def __init__(self, anchors, num_classes, match_threshold=0.7,
+               unmatched_threshold=0.3, rpn_batch_size_per_im=256,
+               rpn_fg_fraction=0.5):
+    """Constructs anchor labeler to assign labels to anchors.
+
+    Args:
+      anchors: an instance of class Anchors.
+      num_classes: integer number representing number of classes in the dataset.
+      match_threshold: a float number between 0 and 1 representing the
+        lower-bound threshold to assign positive labels for anchors. An anchor
+        with a score over the threshold is labeled positive.
+      unmatched_threshold: a float number between 0 and 1 representing the
+        upper-bound threshold to assign negative labels for anchors. An anchor
+        with a score below the threshold is labeled negative.
+      rpn_batch_size_per_im: a integer number that represents the number of
+        sampled anchors per image in the first stage (region proposal network).
+      rpn_fg_fraction: a float number between 0 and 1 representing the fraction
+        of positive anchors (foreground) in the first stage.
+    """
+    similarity_calc = region_similarity_calculator.IouSimilarity()
+    matcher = argmax_matcher.ArgMaxMatcher(
+        match_threshold,
+        unmatched_threshold=unmatched_threshold,
+        negatives_lower_than_unmatched=True,
+        force_match_for_each_row=True)
+    box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
+
+    self._target_assigner = target_assigner.TargetAssigner(
+        similarity_calc, matcher, box_coder)
+    self._anchors = anchors
+    self._match_threshold = match_threshold
+    self._unmatched_threshold = unmatched_threshold
+    self._rpn_batch_size_per_im = rpn_batch_size_per_im
+    self._rpn_fg_fraction = rpn_fg_fraction
+    self._num_classes = num_classes
+
+  def _get_rpn_samples(self, match_results):
+    """Computes anchor labels.
+
+    This function performs subsampling for foreground (fg) and background (bg)
+    anchors.
+    Args:
+      match_results: A integer tensor with shape [N] representing the
+        matching results of anchors. (1) match_results[i]>=0,
+        meaning that column i is matched with row match_results[i].
+        (2) match_results[i]=-1, meaning that column i is not matched.
+        (3) match_results[i]=-2, meaning that column i is ignored.
+    Returns:
+      score_targets: a integer tensor with the a shape of [N].
+        (1) score_targets[i]=1, the anchor is a positive sample.
+        (2) score_targets[i]=0, negative. (3) score_targets[i]=-1, the anchor is
+        don't care (ignore).
+    """
+    sampler = (
+        balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
+            positive_fraction=self._rpn_fg_fraction, is_static=False))
+    # indicator includes both positive and negative labels.
+    # labels includes only positives labels.
+    # positives = indicator & labels.
+    # negatives = indicator & !labels.
+    # ignore = !indicator.
+    indicator = tf.greater(match_results, -2)
+    labels = tf.greater(match_results, -1)
+
+    samples = sampler.subsample(
+        indicator, self._rpn_batch_size_per_im, labels)
+    positive_labels = tf.where(
+        tf.logical_and(samples, labels),
+        tf.constant(2, dtype=tf.int32, shape=match_results.shape),
+        tf.constant(0, dtype=tf.int32, shape=match_results.shape))
+    negative_labels = tf.where(
+        tf.logical_and(samples, tf.logical_not(labels)),
+        tf.constant(1, dtype=tf.int32, shape=match_results.shape),
+        tf.constant(0, dtype=tf.int32, shape=match_results.shape))
+    ignore_labels = tf.fill(match_results.shape, -1)
+
+    return (ignore_labels + positive_labels + negative_labels,
+            positive_labels, negative_labels)
+
+  def label_anchors(self, gt_boxes, gt_labels):
+    """Labels anchors with ground truth inputs.
+
+    Args:
+      gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
+        For each row, it stores [y0, x0, y1, x1] for four corners of a box.
+      gt_labels: A integer tensor with shape [N, 1] representing groundtruth
+        classes.
+    Returns:
+      score_targets_dict: ordered dictionary with keys
+        [min_level, min_level+1, ..., max_level]. The values are tensor with
+        shape [height_l, width_l, num_anchors]. The height_l and width_l
+        represent the dimension of class logits at l-th level.
+      box_targets_dict: ordered dictionary with keys
+        [min_level, min_level+1, ..., max_level]. The values are tensor with
+        shape [height_l, width_l, num_anchors * 4]. The height_l and
+        width_l represent the dimension of bounding box regression output at
+        l-th level.
+    """
+    gt_box_list = box_list.BoxList(gt_boxes)
+    anchor_box_list = box_list.BoxList(self._anchors.boxes)
+
+    # cls_targets, cls_weights, box_weights are not used
+    _, _, box_targets, _, matches = self._target_assigner.assign(
+        anchor_box_list, gt_box_list, gt_labels)
+
+    # score_targets contains the subsampled positive and negative anchors.
+    score_targets, _, _ = self._get_rpn_samples(matches.match_results)
+
+    # Unpack labels.
+    score_targets_dict = self._anchors.unpack_labels(score_targets)
+    box_targets_dict = self._anchors.unpack_labels(box_targets)
+
+    return score_targets_dict, box_targets_dict

+ 351 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/coco_metric.py

@@ -0,0 +1,351 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""COCO-style evaluation metrics.
+
+Implements the interface of COCO API and metric_fn in tf.TPUEstimator.
+
+COCO API: github.com/cocodataset/cocoapi/
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import atexit
+
+import copy
+import tempfile
+import numpy as np
+
+import tensorflow as tf
+
+from mask_rcnn.utils.logging_formatter import logging
+
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+import pycocotools.mask as maskUtils
+
+import cv2
+
+
+class MaskCOCO(COCO):
+  """COCO object for mask evaluation.
+  """
+
+  def reset(self, dataset):
+    """Reset the dataset and groundtruth data index in this object.
+
+    Args:
+      dataset: dict of groundtruth data. It should has similar structure as the
+        COCO groundtruth JSON file. Must contains three keys: {'images',
+          'annotations', 'categories'}.
+        'images': list of image information dictionary. Required keys: 'id',
+          'width' and 'height'.
+        'annotations': list of dict. Bounding boxes and segmentations related
+          information. Required keys: {'id', 'image_id', 'category_id', 'bbox',
+            'iscrowd', 'area', 'segmentation'}.
+        'categories': list of dict of the category information.
+          Required key: 'id'.
+        Refer to http://cocodataset.org/#format-data for more details.
+
+    Raises:
+      AttributeError: If the dataset is empty or not a dict.
+    """
+    assert dataset, 'Groundtruth should not be empty.'
+    assert isinstance(dataset,
+                      dict), 'annotation file format {} not supported'.format(
+                          type(dataset))
+    self.anns, self.cats, self.imgs = dict(), dict(), dict()
+    self.dataset = copy.deepcopy(dataset)
+    self.createIndex()
+
+  def loadRes(self, detection_results, include_mask, is_image_mask=False):
+    """Load result file and return a result api object.
+
+    Args:
+      detection_results: a dictionary containing predictions results.
+      include_mask: a boolean, whether to include mask in detection results.
+      is_image_mask: a boolean, where the predict mask is a whole image mask.
+
+    Returns:
+      res: result MaskCOCO api object
+    """
+    res = MaskCOCO()
+    res.dataset['images'] = [img for img in self.dataset['images']]
+    logging.info('Loading and preparing results...')
+    predictions = self.load_predictions(
+        detection_results,
+        include_mask=include_mask,
+        is_image_mask=is_image_mask)
+    assert isinstance(predictions, list), 'results in not an array of objects'
+    if predictions:
+      image_ids = [pred['image_id'] for pred in predictions]
+      assert set(image_ids) == (set(image_ids) & set(self.getImgIds())), \
+             'Results do not correspond to current coco set'
+
+      if (predictions and 'bbox' in predictions[0] and predictions[0]['bbox']):
+        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
+        for idx, pred in enumerate(predictions):
+          bb = pred['bbox']
+          x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
+          if 'segmentation' not in pred:
+            pred['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
+          pred['area'] = bb[2] * bb[3]
+          pred['id'] = idx + 1
+          pred['iscrowd'] = 0
+      elif 'segmentation' in predictions[0]:
+        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
+        for idx, pred in enumerate(predictions):
+          # now only support compressed RLE format as segmentation results
+          pred['area'] = maskUtils.area(pred['segmentation'])
+          if 'bbox' not in pred:
+            pred['bbox'] = maskUtils.toBbox(pred['segmentation'])
+          pred['id'] = idx + 1
+          pred['iscrowd'] = 0
+
+      res.dataset['annotations'] = predictions
+
+    res.createIndex()
+    return res
+
+  def load_predictions(self,
+                       detection_results,
+                       include_mask,
+                       is_image_mask=False):
+    """Create prediction dictionary list from detection and mask results.
+
+    Args:
+      detection_results: a dictionary containing numpy arrays which corresponds
+        to prediction results.
+      include_mask: a boolean, whether to include mask in detection results.
+      is_image_mask: a boolean, where the predict mask is a whole image mask.
+
+    Returns:
+      a list of dictionary including different prediction results from the model
+        in numpy form.
+    """
+    predictions = []
+    num_detections = detection_results['detection_scores'].size
+    current_index = 0
+    for i, image_id in enumerate(detection_results['source_id']):
+
+      if include_mask:
+        box_coorindates_in_image = detection_results['detection_boxes'][i]
+        segments = generate_segmentation_from_masks(
+            detection_results['detection_masks'][i],
+            box_coorindates_in_image,
+            int(detection_results['image_info'][i][3]),
+            int(detection_results['image_info'][i][4]),
+            is_image_mask=is_image_mask
+        )
+
+        # Convert the mask to uint8 and then to fortranarray for RLE encoder.
+        encoded_masks = [
+            maskUtils.encode(np.asfortranarray(instance_mask.astype(np.uint8)))
+            for instance_mask in segments
+        ]
+
+      for box_index in range(int(detection_results['num_detections'][i])):
+        if current_index % 1000 == 0:
+          logging.info('{}/{}'.format(current_index, num_detections))
+
+        current_index += 1
+
+        prediction = {
+            'image_id': int(image_id),
+            'bbox': detection_results['detection_boxes'][i][box_index].tolist(),
+            'score': detection_results['detection_scores'][i][box_index],
+            'category_id': int(
+                detection_results['detection_classes'][i][box_index]),
+        }
+
+        if include_mask:
+          prediction['segmentation'] = encoded_masks[box_index]
+
+        predictions.append(prediction)
+
+    return predictions
+
+
+def generate_segmentation_from_masks(masks,
+                                     detected_boxes,
+                                     image_height,
+                                     image_width,
+                                     is_image_mask=False):
+  """Generates segmentation result from instance masks.
+
+  Args:
+    masks: a numpy array of shape [N, mask_height, mask_width] representing the
+      instance masks w.r.t. the `detected_boxes`.
+    detected_boxes: a numpy array of shape [N, 4] representing the reference
+      bounding boxes.
+    image_height: an integer representing the height of the image.
+    image_width: an integer representing the width of the image.
+    is_image_mask: bool. True: input masks are whole-image masks. False: input
+      masks are bounding-box level masks.
+
+  Returns:
+    segms: a numpy array of shape [N, image_height, image_width] representing
+      the instance masks *pasted* on the image canvas.
+  """
+
+  def expand_boxes(boxes, scale):
+    """Expands an array of boxes by a given scale."""
+    # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/boxes.py#L227
+    # The `boxes` in the reference implementation is in [x1, y1, x2, y2] form,
+    # whereas `boxes` here is in [x1, y1, w, h] form
+    w_half = boxes[:, 2] * .5
+    h_half = boxes[:, 3] * .5
+    x_c = boxes[:, 0] + w_half
+    y_c = boxes[:, 1] + h_half
+
+    w_half *= scale
+    h_half *= scale
+
+    boxes_exp = np.zeros(boxes.shape)
+    boxes_exp[:, 0] = x_c - w_half
+    boxes_exp[:, 2] = x_c + w_half
+    boxes_exp[:, 1] = y_c - h_half
+    boxes_exp[:, 3] = y_c + h_half
+
+    return boxes_exp
+
+  # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/test.py#L812
+  # To work around an issue with cv2.resize (it seems to automatically pad
+  # with repeated border values), we manually zero-pad the masks by 1 pixel
+  # prior to resizing back to the original image resolution. This prevents
+  # "top hat" artifacts. We therefore need to expand the reference boxes by an
+  # appropriate factor.
+
+  _, mask_height, mask_width = masks.shape
+  scale = max((mask_width + 2.0) / mask_width,
+              (mask_height + 2.0) / mask_height)
+
+  ref_boxes = expand_boxes(detected_boxes, scale)
+  ref_boxes = ref_boxes.astype(np.int32)
+  padded_mask = np.zeros((mask_height + 2, mask_width + 2), dtype=np.float32)
+  segms = []
+  for mask_ind, mask in enumerate(masks):
+    im_mask = np.zeros((image_height, image_width), dtype=np.uint8)
+    if is_image_mask:
+      # Process whole-image masks.
+      im_mask[:, :] = mask[:, :]
+    else:
+      # Process mask inside bounding boxes.
+      padded_mask[1:-1, 1:-1] = mask[:, :]
+
+      ref_box = ref_boxes[mask_ind, :]
+      w = ref_box[2] - ref_box[0] + 1
+      h = ref_box[3] - ref_box[1] + 1
+      w = np.maximum(w, 1)
+      h = np.maximum(h, 1)
+
+      mask = cv2.resize(padded_mask, (w, h))
+      mask = np.array(mask > 0.5, dtype=np.uint8)
+
+      x_0 = max(ref_box[0], 0)
+      x_1 = min(ref_box[2] + 1, image_width)
+      y_0 = max(ref_box[1], 0)
+      y_1 = min(ref_box[3] + 1, image_height)
+
+      im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - ref_box[1]):(y_1 - ref_box[1]), (
+          x_0 - ref_box[0]):(x_1 - ref_box[0])]
+    segms.append(im_mask)
+
+  segms = np.array(segms)
+  assert masks.shape[0] == segms.shape[0]
+  return segms
+
+
+class EvaluationMetric(object):
+  """COCO evaluation metric class."""
+
+  def __init__(self, filename, include_mask):
+    """Constructs COCO evaluation class.
+
+    The class provides the interface to metrics_fn in TPUEstimator. The
+    _evaluate() loads a JSON file in COCO annotation format as the
+    groundtruths and runs COCO evaluation.
+
+    Args:
+      filename: Ground truth JSON file name. If filename is None, use
+        groundtruth data passed from the dataloader for evaluation.
+      include_mask: boolean to indicate whether or not to include mask eval.
+    """
+    if filename:
+      if filename.startswith('gs://'):
+        _, local_val_json = tempfile.mkstemp(suffix='.json')
+        tf.io.gfile.remove(local_val_json)
+
+        tf.io.gfile.copy(filename, local_val_json)
+        atexit.register(tf.io.gfile.remove, local_val_json)
+      else:
+        local_val_json = filename
+      self.coco_gt = MaskCOCO(local_val_json)
+    self.filename = filename
+    self.metric_names = ['AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'ARmax1',
+                         'ARmax10', 'ARmax100', 'ARs', 'ARm', 'ARl']
+    self._include_mask = include_mask
+    if self._include_mask:
+      mask_metric_names = ['mask_' + x for x in self.metric_names]
+      self.metric_names.extend(mask_metric_names)
+
+    self._reset()
+
+  def _reset(self):
+    """Reset COCO API object."""
+    if self.filename is None and not hasattr(self, 'coco_gt'):
+      self.coco_gt = MaskCOCO()
+
+  def predict_metric_fn(self,
+                        predictions,
+                        is_predict_image_mask=False,
+                        groundtruth_data=None):
+    """Generates COCO metrics."""
+    image_ids = list(set(predictions['source_id']))
+    if groundtruth_data is not None:
+      self.coco_gt.reset(groundtruth_data)
+    coco_dt = self.coco_gt.loadRes(
+        predictions, self._include_mask, is_image_mask=is_predict_image_mask)
+    coco_eval = COCOeval(self.coco_gt, coco_dt, iouType='bbox')
+    coco_eval.params.imgIds = image_ids
+    coco_eval.evaluate()
+    coco_eval.accumulate()
+    coco_eval.summarize()
+    coco_metrics = coco_eval.stats
+
+    if self._include_mask:
+      # Create another object for instance segmentation metric evaluation.
+      mcoco_eval = COCOeval(self.coco_gt, coco_dt, iouType='segm')
+      mcoco_eval.params.imgIds = image_ids
+      mcoco_eval.evaluate()
+      mcoco_eval.accumulate()
+      mcoco_eval.summarize()
+      mask_coco_metrics = mcoco_eval.stats
+
+    if self._include_mask:
+      metrics = np.hstack((coco_metrics, mask_coco_metrics))
+    else:
+      metrics = coco_metrics
+
+    # clean up after evaluation is done.
+    self._reset()
+    metrics = metrics.astype(np.float32)
+
+    metrics_dict = {}
+    for i, name in enumerate(self.metric_names):
+      metrics_dict[name] = metrics[i]
+    return metrics_dict

+ 464 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/dataloader.py

@@ -0,0 +1,464 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Data loader and processing.
+
+Defines input_fn of Mask-RCNN for TF Estimator. The input_fn includes training
+data for category classification, bounding box regression, and number of
+positive examples to normalize the loss during training.
+
+"""
+import functools
+import math
+import multiprocessing
+
+import tensorflow as tf
+
+from mask_rcnn.utils.logging_formatter import logging
+
+from mask_rcnn.utils.distributed_utils import MPI_is_distributed
+from mask_rcnn.utils.distributed_utils import MPI_rank_and_size
+from mask_rcnn.utils.distributed_utils import MPI_rank
+from mask_rcnn.utils.distributed_utils import MPI_size
+
+# common functions
+from mask_rcnn.dataloader_utils import dataset_parser
+
+from distutils.version import LooseVersion
+
+class InputReader(object):
+    """Input reader for dataset."""
+
+    def __init__(
+        self,
+        file_pattern,
+        mode=tf.estimator.ModeKeys.TRAIN,
+        num_examples=0,
+        use_fake_data=False,
+        use_instance_mask=False,
+        seed=None
+    ):
+
+        self._mode = mode
+        self._file_pattern = file_pattern
+        self._num_examples = num_examples
+        self._use_fake_data = use_fake_data
+        self._use_instance_mask = use_instance_mask
+        self._seed = seed
+
+    def _create_dataset_parser_fn(self, params):
+        """Create parser for parsing input data (dictionary)."""
+
+        return functools.partial(
+            dataset_parser,
+            mode=self._mode,
+            params=params,
+            use_instance_mask=self._use_instance_mask,
+            seed=self._seed
+        )
+
+    def __call__(self, params, input_context=None):
+
+        batch_size = params['batch_size'] if 'batch_size' in params else 1
+
+        try:
+            seed = params['seed'] if not MPI_is_distributed() else params['seed'] * MPI_rank()
+        except (KeyError, TypeError):
+            seed = None
+
+        if MPI_is_distributed():
+            n_gpus = MPI_size()
+
+        elif input_context is not None:
+            n_gpus = input_context.num_input_pipelines
+
+        else:
+            n_gpus = 1
+
+        ##################################################
+
+        dataset = tf.data.Dataset.list_files(
+            self._file_pattern,
+            shuffle=False
+        )
+
+        if self._mode == tf.estimator.ModeKeys.TRAIN:
+
+            if input_context is not None:
+                logging.info("Using Dataset Sharding with TF Distributed")
+                _num_shards = input_context.num_input_pipelines
+                _shard_idx = input_context.input_pipeline_id
+
+            elif MPI_is_distributed():
+                logging.info("Using Dataset Sharding with Horovod")
+                _shard_idx, _num_shards = MPI_rank_and_size()
+
+            try:
+                dataset = dataset.shard(
+                    num_shards=_num_shards,
+                    index=_shard_idx
+                )
+                dataset = dataset.shuffle(math.ceil(256 / _num_shards))
+
+            except NameError:  # Not a distributed training setup
+                pass
+
+        def _prefetch_dataset(filename):
+            return tf.data.TFRecordDataset(filename).prefetch(1)
+
+        dataset = dataset.interleave(
+            map_func=_prefetch_dataset,
+            cycle_length=32,
+            block_length=64,
+            num_parallel_calls=tf.data.experimental.AUTOTUNE,
+        )
+
+        if self._num_examples is not None and self._num_examples > 0:
+            logging.info("[*] Limiting the amount of sample to: %d" % self._num_examples)
+            dataset = dataset.take(self._num_examples)
+
+        dataset = dataset.cache()
+
+        if self._mode == tf.estimator.ModeKeys.TRAIN:
+            dataset = dataset.shuffle(
+                buffer_size=4096,
+                reshuffle_each_iteration=True,
+                seed=seed
+            )
+
+            dataset = dataset.repeat()
+
+        # Parse the fetched records to input tensors for model function.
+        dataset = dataset.map(
+            map_func=self._create_dataset_parser_fn(params),
+            num_parallel_calls=16,
+        )
+
+        dataset = dataset.batch(
+            batch_size=batch_size,
+            drop_remainder=True
+        )
+
+        if self._use_fake_data:
+            # Turn this dataset into a semi-fake dataset which always loop at the
+            # first batch. This reduces variance in performance and is useful in
+            # testing.
+            logging.info("Using Fake Dataset Loop...")
+            dataset = dataset.take(1).cache().repeat()
+
+            if self._mode != tf.estimator.ModeKeys.TRAIN:
+                dataset = dataset.take(int(5000 / batch_size))
+
+        dataset = dataset.prefetch(
+            buffer_size=tf.data.experimental.AUTOTUNE,
+        )
+
+        if not tf.distribute.has_strategy():
+            dataset = dataset.apply(
+                tf.data.experimental.prefetch_to_device(
+                    '/gpu:0',  # With Horovod the local GPU is always 0
+                    buffer_size=1,
+                )
+            )
+
+        data_options = tf.data.Options()
+
+        data_options.experimental_deterministic = seed is not None
+        if LooseVersion(tf.__version__) <= LooseVersion("2.0.0"):
+            data_options.experimental_distribute.auto_shard = False
+        else:
+            data_options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
+        # data_options.experimental_distribute.auto_shard = False
+        data_options.experimental_slack = True
+
+        data_options.experimental_threading.max_intra_op_parallelism = 1
+        # data_options.experimental_threading.private_threadpool_size = int(multiprocessing.cpu_count() / n_gpus) * 2
+
+        # ================= experimental_optimization ================= #
+
+        data_options.experimental_optimization.apply_default_optimizations = False
+
+        # data_options.experimental_optimization.autotune = True
+        data_options.experimental_optimization.filter_fusion = True
+        data_options.experimental_optimization.map_and_batch_fusion = True
+        data_options.experimental_optimization.map_and_filter_fusion = True
+        data_options.experimental_optimization.map_fusion = True
+        data_options.experimental_optimization.map_parallelization = True
+
+        map_vectorization_options = tf.data.experimental.MapVectorizationOptions()
+        map_vectorization_options.enabled = True
+        map_vectorization_options.use_choose_fastest = True
+
+        data_options.experimental_optimization.map_vectorization = map_vectorization_options
+
+        data_options.experimental_optimization.noop_elimination = True
+        data_options.experimental_optimization.parallel_batch = True
+        data_options.experimental_optimization.shuffle_and_repeat_fusion = True
+
+        # ========== Stats on TF Data =============
+        # aggregator = tf.data.experimental.StatsAggregator()
+        # data_options.experimental_stats.aggregator = aggregator
+        # data_options.experimental_stats.latency_all_edges = True
+
+        dataset = dataset.with_options(data_options)
+
+        return dataset
+
+
+if __name__ == "__main__":
+    '''
+    Data Loading Benchmark Usage:
+
+    # Real Data - Training
+    python -m mask_rcnn.dataloader \
+        --data_dir="/data/" \
+        --batch_size=2 \
+        --warmup_steps=200 \
+        --benchmark_steps=2000 \
+        --training
+
+    # Real Data - Inference
+    python -m mask_rcnn.dataloader \
+        --data_dir="/data/" \
+        --batch_size=8 \
+        --warmup_steps=200 \
+        --benchmark_steps=2000
+
+    # --------------- #
+
+    # Synthetic Data - Training
+    python -m mask_rcnn.dataloader \
+        --data_dir="/data/" \
+        --batch_size=2 \
+        --warmup_steps=200 \
+        --benchmark_steps=2000 \
+        --training \
+        --use_synthetic_data
+
+    # Synthetic Data - Inference
+    python -m mask_rcnn.dataloader \
+        --data_dir="/data/" \
+        --batch_size=8 \
+        --warmup_steps=200 \
+        --benchmark_steps=2000 \
+        --use_synthetic_data
+
+    # --------------- #
+    '''
+
+    import os
+    import time
+    import argparse
+
+    import numpy as np
+
+    os.environ["CUDA_VISIBLE_DEVICES"] = '0'
+    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+
+    tf.compat.v1.disable_eager_execution()
+
+    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
+    logging.set_verbosity(logging.INFO)
+
+    parser = argparse.ArgumentParser(description="MaskRCNN Dataloader Benchmark")
+
+    parser.add_argument(
+        '--data_dir', required=True, type=str, help="Directory path which contains the preprocessed DAGM 2007 dataset"
+    )
+
+    parser.add_argument(
+        '--batch_size', default=64, type=int, required=True, help="""Batch size used to measure performance."""
+    )
+
+    parser.add_argument(
+        '--warmup_steps',
+        default=200,
+        type=int,
+        required=True,
+        help="""Number of steps considered as warmup and not taken into account for performance measurements."""
+    )
+
+    parser.add_argument(
+        '--benchmark_steps',
+        default=200,
+        type=int,
+        required=True,
+        help="Number of steps used to benchmark dataloading performance. Only used in training"
+    )
+
+    parser.add_argument(
+        '--seed',
+        default=666,
+        type=int,
+        required=False,
+        help="""Reproducibility Seed."""
+    )
+
+    parser.add_argument("--training", default=False, action="store_true", help="Benchmark in training mode")
+
+    parser.add_argument("--use_synthetic_data", default=False, action="store_true", help="Use synthetic dataset")
+
+    FLAGS, unknown_args = parser.parse_known_args()
+
+    if len(unknown_args) > 0:
+
+        for bad_arg in unknown_args:
+            print("ERROR: Unknown command line arg: %s" % bad_arg)
+
+        raise ValueError("Invalid command line arg(s)")
+
+    BURNIN_STEPS = FLAGS.warmup_steps
+
+    if FLAGS.training:
+        TOTAL_STEPS = FLAGS.warmup_steps + FLAGS.benchmark_steps
+    else:
+        TOTAL_STEPS = int(1e6)  # Wait for end of dataset
+
+    if FLAGS.training:
+        input_dataset = InputReader(
+            file_pattern=os.path.join(FLAGS.data_dir, "train*.tfrecord"),
+            mode=tf.estimator.ModeKeys.TRAIN,
+            use_fake_data=FLAGS.use_synthetic_data,
+            use_instance_mask=True,
+            seed=FLAGS.seed
+        )
+
+    else:
+        input_dataset = InputReader(
+            file_pattern=os.path.join(FLAGS.data_dir, "val*.tfrecord"),
+            mode=tf.estimator.ModeKeys.PREDICT,
+            num_examples=5000,
+            use_fake_data=FLAGS.use_synthetic_data,
+            use_instance_mask=True,
+            seed=FLAGS.seed
+        )
+
+    logging.info("[*] Executing Benchmark in %s mode" % ("training" if FLAGS.training else "inference"))
+    logging.info("[*] Benchmark using %s data" % ("synthetic" if FLAGS.use_synthetic_data else "real"))
+
+    time.sleep(1)
+
+    # Build the data input
+    dataset = input_dataset(
+        params={
+            "anchor_scale": 8.0,
+            "aspect_ratios": [[1.0, 1.0], [1.4, 0.7], [0.7, 1.4]],
+            "batch_size": FLAGS.batch_size,
+            "gt_mask_size": 112,
+            "image_size": [1024, 1024],
+            "include_groundtruth_in_features": False,
+            "augment_input_data": True,
+            "max_level": 6,
+            "min_level": 2,
+            "num_classes": 91,
+            "num_scales": 1,
+            "rpn_batch_size_per_im": 256,
+            "rpn_fg_fraction": 0.5,
+            "rpn_min_size": 0.,
+            "rpn_nms_threshold": 0.7,
+            "rpn_negative_overlap": 0.3,
+            "rpn_positive_overlap": 0.7,
+            "rpn_post_nms_topn": 1000,
+            "rpn_pre_nms_topn": 2000,
+            "skip_crowd_during_training": True,
+            "use_category": True,
+            "visualize_images_summary": False,
+        }
+    )
+
+    dataset_iterator = dataset.make_initializable_iterator()
+
+    if FLAGS.training:
+        X, Y = dataset_iterator.get_next()
+    else:
+        X = dataset_iterator.get_next()
+
+    config = tf.compat.v1.ConfigProto()
+    config.gpu_options.allow_growth = True
+    config.log_device_placement = False
+
+    with tf.device("gpu:0"):
+
+        X_gpu_ops = list()
+        Y_gpu_ops = list()
+
+        if FLAGS.training:
+
+            for _, _x in X.items():
+                X_gpu_ops.append(tf.identity(_x))
+
+            for _, _y in Y.items():
+                Y_gpu_ops.append(tf.identity(_y))
+
+        else:
+
+            for _, _x in X["features"].items():
+                X_gpu_ops.append(tf.identity(_x))
+
+        with tf.control_dependencies(X_gpu_ops + Y_gpu_ops):
+            input_op = tf.constant(1.0)
+
+        with tf.compat.v1.Session(config=config) as sess:
+
+            sess.run(dataset_iterator.initializer)
+
+            sess.run(tf.compat.v1.global_variables_initializer())
+
+            total_files_processed = 0
+
+            img_per_sec_arr = []
+            processing_time_arr = []
+
+            processing_start_time = time.time()
+
+            for step in range(TOTAL_STEPS):
+
+                try:
+
+                    start_time = time.time()
+                    sess.run(input_op)
+                    elapsed_time = (time.time() - start_time) * 1000
+
+                    imgs_per_sec = (FLAGS.batch_size / elapsed_time) * 1000
+                    total_files_processed += FLAGS.batch_size
+
+                    if (step + 1) > BURNIN_STEPS:
+                        processing_time_arr.append(elapsed_time)
+                        img_per_sec_arr.append(imgs_per_sec)
+
+                    if (step + 1) % 20 == 0 or (step + 1) == TOTAL_STEPS:
+                        print(
+                            "[STEP %04d] # Batch Size: %03d - Time: %03d msecs - Speed: %6d img/s" %
+                            (step + 1, FLAGS.batch_size, elapsed_time, imgs_per_sec)
+                        )
+
+                except tf.errors.OutOfRangeError:
+                    break
+
+            processing_time = time.time() - processing_start_time
+
+            avg_processing_speed = np.mean(img_per_sec_arr)
+
+            print("\n###################################################################")
+            print("*** Data Loading Performance Metrics ***\n")
+            print("\t=> Number of Steps: %d" % (step + 1))
+            print("\t=> Batch Size: %d" % FLAGS.batch_size)
+            print("\t=> Files Processed: %d" % total_files_processed)
+            print("\t=> Total Execution Time: %d secs" % processing_time)
+            print("\t=> Median Time per step: %3d msecs" % np.median(processing_time_arr))
+            print("\t=> Median Processing Speed: %d images/secs" % np.median(img_per_sec_arr))
+            print("\t=> Median Processing Time: %.2f msecs/image" % (1 / float(np.median(img_per_sec_arr)) * 1000))

+ 466 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/dataloader_utils.py

@@ -0,0 +1,466 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Data loader and processing.
+
+Defines input_fn of Mask-RCNN for TF Estimator. The input_fn includes training
+data for category classification, bounding box regression, and number of
+positive examples to normalize the loss during training.
+
+"""
+
+import tensorflow as tf
+
+from mask_rcnn import anchors
+from mask_rcnn.utils import coco_utils
+from mask_rcnn.ops import preprocess_ops
+
+from mask_rcnn.object_detection import tf_example_decoder
+
+MAX_NUM_INSTANCES = 100
+MAX_NUM_VERTICES_PER_INSTANCE = 1500
+MAX_NUM_POLYGON_LIST_LEN = 2 * MAX_NUM_VERTICES_PER_INSTANCE * MAX_NUM_INSTANCES
+POLYGON_PAD_VALUE = coco_utils.POLYGON_PAD_VALUE
+
+__all__ = [
+    # dataset parser
+    "dataset_parser",
+    # common functions
+    "preprocess_image",
+    "process_groundtruth_is_crowd",
+    "process_source_id",
+    # eval
+    "prepare_labels_for_eval",
+    # training
+    "augment_image",
+    "process_boxes_classes_indices_for_training",
+    "process_gt_masks_for_training",
+    "process_labels_for_training",
+    "process_targets_for_training"
+]
+
+
+###############################################################################################################
+
+def dataset_parser(value, mode, params, use_instance_mask, seed=None, regenerate_source_id=False):
+    """Parse data to a fixed dimension input image and learning targets.
+
+    Args:
+    value: A dictionary contains an image and groundtruth annotations.
+
+    Returns:
+    features: a dictionary that contains the image and auxiliary
+      information. The following describes {key: value} pairs in the
+      dictionary.
+      image: Image tensor that is preproessed to have normalized value and
+        fixed dimension [image_size, image_size, 3]
+      image_info: image information that includes the original height and
+        width, the scale of the proccessed image to the original image, and
+        the scaled height and width.
+      source_ids: Source image id. Default value -1 if the source id is
+        empty in the groundtruth annotation.
+    labels: a dictionary that contains auxiliary information plus (optional)
+      labels. The following describes {key: value} pairs in the dictionary.
+      `labels` is only for training.
+      score_targets_dict: ordered dictionary with keys
+        [min_level, min_level+1, ..., max_level]. The values are tensor with
+        shape [height_l, width_l, num_anchors]. The height_l and width_l
+        represent the dimension of objectiveness score at l-th level.
+      box_targets_dict: ordered dictionary with keys
+        [min_level, min_level+1, ..., max_level]. The values are tensor with
+        shape [height_l, width_l, num_anchors * 4]. The height_l and
+        width_l represent the dimension of bounding box regression output at
+        l-th level.
+      gt_boxes: Groundtruth bounding box annotations. The box is represented
+         in [y1, x1, y2, x2] format. The tennsor is padded with -1 to the
+         fixed dimension [MAX_NUM_INSTANCES, 4].
+      gt_classes: Groundtruth classes annotations. The tennsor is padded
+        with -1 to the fixed dimension [MAX_NUM_INSTANCES].
+      cropped_gt_masks: groundtrugh masks cropped by the bounding box and
+        resized to a fixed size determined by params['gt_mask_size']
+      regenerate_source_id: `bool`, if True TFExampleParser will use hashed
+        value of `image/encoded` for `image/source_id`.
+    """
+    if mode not in [tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.PREDICT, tf.estimator.ModeKeys.EVAL]:
+        raise ValueError("Unknown execution mode received: %s" % mode)
+
+    def create_example_decoder():
+        return tf_example_decoder.TfExampleDecoder(
+            use_instance_mask=use_instance_mask,
+            regenerate_source_id=regenerate_source_id
+    )
+
+    example_decoder = create_example_decoder()
+
+    with tf.xla.experimental.jit_scope(compile_ops=True):
+
+        with tf.name_scope('parser'):
+
+            data = example_decoder.decode(value)
+
+            data['groundtruth_is_crowd'] = process_groundtruth_is_crowd(data)
+
+            image = tf.image.convert_image_dtype(data['image'], dtype=tf.float32)
+
+            source_id = process_source_id(data['source_id'])
+
+            if mode == tf.estimator.ModeKeys.PREDICT:
+
+                features = {
+                    'source_ids': source_id,
+                }
+
+                if params['visualize_images_summary']:
+                    features['orig_images'] = tf.image.resize(image, params['image_size'])
+
+                features["images"], features["image_info"], _, _ = preprocess_image(
+                    image,
+                    boxes=None,
+                    instance_masks=None,
+                    image_size=params['image_size'],
+                    max_level=params['max_level'],
+                    augment_input_data=False,
+                    seed=seed
+                )
+
+                if params['include_groundtruth_in_features']:
+                    labels = prepare_labels_for_eval(
+                        data,
+                        target_num_instances=MAX_NUM_INSTANCES,
+                        target_polygon_list_len=MAX_NUM_POLYGON_LIST_LEN,
+                        use_instance_mask=params['include_mask']
+                    )
+                    return {'features': features, 'labels': labels}
+
+                else:
+                    return {'features': features}
+
+            elif mode == tf.estimator.ModeKeys.TRAIN:
+
+                labels = {}
+                features = {
+                    'source_ids': source_id
+                }
+
+                boxes, classes, indices, instance_masks = process_boxes_classes_indices_for_training(
+                    data,
+                    skip_crowd_during_training=params['skip_crowd_during_training'],
+                    use_category=params['use_category'],
+                    use_instance_mask=use_instance_mask
+                )
+
+                image, image_info, boxes, instance_masks = preprocess_image(
+                    image,
+                    boxes=boxes,
+                    instance_masks=instance_masks,
+                    image_size=params['image_size'],
+                    max_level=params['max_level'],
+                    augment_input_data=params['augment_input_data'],
+                    seed=seed
+                )
+
+                features.update({
+                    'images': image,
+                    'image_info': image_info,
+                })
+
+                padded_image_size = image.get_shape().as_list()[:2]
+
+                # Pads cropped_gt_masks.
+                if use_instance_mask:
+                    labels['cropped_gt_masks'] = process_gt_masks_for_training(
+                        instance_masks,
+                        boxes,
+                        gt_mask_size=params['gt_mask_size'],
+                        padded_image_size=padded_image_size,
+                        max_num_instances=MAX_NUM_INSTANCES
+                    )
+
+                with tf.xla.experimental.jit_scope(compile_ops=False):
+                    # Assign anchors.
+                    (score_targets, box_targets), input_anchor = process_targets_for_training(
+                        padded_image_size=padded_image_size,
+                        boxes=boxes,
+                        classes=classes,
+                        params=params
+                    )
+
+                additional_labels = process_labels_for_training(
+                    image_info, boxes, classes, score_targets, box_targets,
+                    max_num_instances=MAX_NUM_INSTANCES,
+                    min_level=params["min_level"],
+                    max_level=params["max_level"]
+                )
+
+                labels.update(additional_labels)
+                # labels["input_anchor"] = input_anchor
+
+                # Features
+                # {
+                #   'source_ids': <tf.Tensor 'parser/StringToNumber:0' shape=() dtype=float32>,
+                #   'images': <tf.Tensor 'parser/pad_to_bounding_box/Squeeze:0' shape=(1024, 1024, 3) dtype=float32>,
+                #   'image_info': <tf.Tensor 'parser/stack_1:0' shape=(5,) dtype=float32>
+                # }
+
+                FAKE_FEATURES = False
+
+                if FAKE_FEATURES:
+                    labels["source_ids"] = tf.ones(shape=(), dtype=tf.float32)
+                    labels["images"] = tf.ones(shape=(1024, 1024, 3), dtype=tf.float32)
+                    labels["image_info"] = tf.ones(shape=(5,), dtype=tf.float32)
+
+                # Labels
+                # {
+                #   'cropped_gt_masks': <tf.Tensor 'parser/Reshape_4:0' shape=(100, 116, 116) dtype=float32>,
+                #   'score_targets_2': <tf.Tensor 'parser/Reshape_9:0' shape=(256, 256, 3) dtype=int32>,
+                #   'box_targets_2': <tf.Tensor 'parser/Reshape_14:0' shape=(256, 256, 12) dtype=float32>,
+                #   'score_targets_3': <tf.Tensor 'parser/Reshape_10:0' shape=(128, 128, 3) dtype=int32>,
+                #   'box_targets_3': <tf.Tensor 'parser/Reshape_15:0' shape=(128, 128, 12) dtype=float32>,
+                #   'score_targets_4': <tf.Tensor 'parser/Reshape_11:0' shape=(64, 64, 3) dtype=int32>,
+                #   'box_targets_4': <tf.Tensor 'parser/Reshape_16:0' shape=(64, 64, 12) dtype=float32>,
+                #   'score_targets_5': <tf.Tensor 'parser/Reshape_12:0' shape=(32, 32, 3) dtype=int32>,
+                #   'box_targets_5': <tf.Tensor 'parser/Reshape_17:0' shape=(32, 32, 12) dtype=float32>,
+                #   'score_targets_6': <tf.Tensor 'parser/Reshape_13:0' shape=(16, 16, 3) dtype=int32>,
+                #   'box_targets_6': <tf.Tensor 'parser/Reshape_18:0' shape=(16, 16, 12) dtype=float32>,
+                #   'gt_boxes': <tf.Tensor 'parser/Reshape_20:0' shape=(100, 4) dtype=float32>,
+                #   'gt_classes': <tf.Tensor 'parser/Reshape_22:0' shape=(100, 1) dtype=float32>
+                # }
+
+                FAKE_LABELS = False
+
+                if FAKE_LABELS:
+                    labels["cropped_gt_masks"] = tf.ones(shape=(100, 116, 116), dtype=tf.float32)
+                    labels["gt_boxes"] = tf.ones(shape=(100, 4), dtype=tf.float32)
+                    labels["gt_classes"] = tf.ones(shape=(100, 1), dtype=tf.float32)
+
+                    idx = 1
+                    for dim in [256, 128, 64, 32, 16]:
+                        idx += 1  # Starts at 2
+
+                        labels["score_targets_%d" % idx] = tf.ones(shape=(dim, dim, 3), dtype=tf.float32)
+                        labels["box_targets_%d" % idx] = tf.ones(shape=(dim, dim, 12), dtype=tf.float32)
+
+                return features, labels
+
+###############################################################################################################
+
+# common functions
+
+
+def preprocess_image(image, boxes, instance_masks, image_size, max_level, augment_input_data=False, seed=None):
+    image = preprocess_ops.normalize_image(image)
+
+    if augment_input_data:
+        image, boxes, instance_masks = augment_image(image=image, boxes=boxes, instance_masks=instance_masks, seed=seed)
+
+    # Scaling and padding.
+    image, image_info, boxes, instance_masks = preprocess_ops.resize_and_pad(
+        image=image,
+        target_size=image_size,
+        stride=2 ** max_level,
+        boxes=boxes,
+        masks=instance_masks
+    )
+    return image, image_info, boxes, instance_masks
+
+
+def process_groundtruth_is_crowd(data):
+    return tf.cond(
+        pred=tf.greater(tf.size(input=data['groundtruth_is_crowd']), 0),
+        true_fn=lambda: data['groundtruth_is_crowd'],
+        false_fn=lambda: tf.zeros_like(data['groundtruth_classes'], dtype=tf.bool)
+    )
+
+
+# def process_source_id(data):
+#     source_id = tf.where(tf.equal(source_id, tf.constant('')), '-1', source_id)
+#     source_id = tf.strings.to_number(source_id)
+#     return source_id
+
+
+def process_source_id(source_id):
+    """Processes source_id to the right format."""
+    if source_id.dtype == tf.string:
+        source_id = tf.cast(tf.strings.to_number(source_id), tf.int64)
+
+    with tf.control_dependencies([source_id]):
+        source_id = tf.cond(
+            tf.equal(tf.size(source_id), 0),
+            lambda: tf.cast(tf.constant(-1), tf.int64),
+            lambda: tf.identity(source_id)
+        )
+
+    return source_id
+
+
+# eval
+def prepare_labels_for_eval(
+        data,
+        target_num_instances=MAX_NUM_INSTANCES,
+        target_polygon_list_len=MAX_NUM_POLYGON_LIST_LEN,
+        use_instance_mask=False
+):
+
+    """Create labels dict for infeed from data of tf.Example."""
+    image = data['image']
+
+    height, width = tf.shape(input=image)[:2]
+
+    boxes = data['groundtruth_boxes']
+
+    classes = tf.cast(data['groundtruth_classes'], dtype=tf.float32)
+
+    num_labels = tf.shape(input=classes)[0]
+
+    boxes = preprocess_ops.pad_to_fixed_size(boxes, -1, [target_num_instances, 4])
+    classes = preprocess_ops.pad_to_fixed_size(classes, -1, [target_num_instances, 1])
+
+    is_crowd = tf.cast(data['groundtruth_is_crowd'], dtype=tf.float32)
+    is_crowd = preprocess_ops.pad_to_fixed_size(is_crowd, 0, [target_num_instances, 1])
+
+    labels = dict()
+
+    labels['width'] = width
+    labels['height'] = height
+    labels['groundtruth_boxes'] = boxes
+    labels['groundtruth_classes'] = classes
+    labels['num_groundtruth_labels'] = num_labels
+    labels['groundtruth_is_crowd'] = is_crowd
+
+    if use_instance_mask:
+        data['groundtruth_polygons'] = preprocess_ops.pad_to_fixed_size(
+            data=data['groundtruth_polygons'],
+            pad_value=POLYGON_PAD_VALUE,
+            output_shape=[target_polygon_list_len, 1]
+        )
+
+        if 'groundtruth_area' in data:
+            labels['groundtruth_area'] = preprocess_ops.pad_to_fixed_size(
+                data=labels['groundtruth_area'],
+                pad_value=0,
+                output_shape=[target_num_instances, 1]
+            )
+
+    return labels
+
+
+# training
+def augment_image(image, boxes, instance_masks, seed):
+    flipped_results = preprocess_ops.random_horizontal_flip(
+        image,
+        boxes=boxes,
+        masks=instance_masks,
+        seed=seed
+    )
+
+    if instance_masks is not None:
+        image, boxes, instance_masks = flipped_results
+
+    else:
+        image, boxes = flipped_results
+
+    # image = tf.image.random_brightness(image, max_delta=0.1, seed=seed)
+    # image = tf.image.random_contrast(image, lower=0.9, upper=1.1, seed=seed)
+    # image = tf.image.random_saturation(image, lower=0.9, upper=1.1, seed=seed)
+    # image = tf.image.random_jpeg_quality(image, min_jpeg_quality=80, max_jpeg_quality=100, seed=seed)
+
+    return image, boxes, instance_masks
+
+
+def process_boxes_classes_indices_for_training(data, skip_crowd_during_training, use_category, use_instance_mask):
+    boxes = data['groundtruth_boxes']
+    classes = data['groundtruth_classes']
+    classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1])
+    indices = None
+    instance_masks = None
+
+    if not use_category:
+        classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32)
+
+    if skip_crowd_during_training:
+        indices = tf.where(tf.logical_not(data['groundtruth_is_crowd']))
+        classes = tf.gather_nd(classes, indices)
+        boxes = tf.gather_nd(boxes, indices)
+
+        if use_instance_mask:
+            instance_masks = tf.gather_nd(data['groundtruth_instance_masks'], indices)
+
+    return boxes, classes, indices, instance_masks
+
+
+def process_gt_masks_for_training(instance_masks, boxes, gt_mask_size, padded_image_size, max_num_instances):
+    cropped_gt_masks = preprocess_ops.crop_gt_masks(
+        instance_masks=instance_masks,
+        boxes=boxes,
+        gt_mask_size=gt_mask_size,
+        image_size=padded_image_size
+    )
+
+    # cropped_gt_masks = tf.reshape(cropped_gt_masks, [max_num_instances, -1])
+
+    cropped_gt_masks = preprocess_ops.pad_to_fixed_size(
+        data=cropped_gt_masks,
+        pad_value=-1,
+        output_shape=[max_num_instances, (gt_mask_size + 4) ** 2]
+    )
+
+    return tf.reshape(cropped_gt_masks, [max_num_instances, gt_mask_size + 4, gt_mask_size + 4])
+
+
+def process_labels_for_training(
+    image_info, boxes, classes,
+    score_targets, box_targets,
+    max_num_instances, min_level, max_level
+):
+    labels = {}
+
+    # Pad groundtruth data.
+    # boxes *= image_info[2]
+    boxes = preprocess_ops.pad_to_fixed_size(boxes, -1, [max_num_instances, 4])
+
+    classes = preprocess_ops.pad_to_fixed_size(classes, -1, [max_num_instances, 1])
+
+    for level in range(min_level, max_level + 1):
+        labels['score_targets_%d' % level] = score_targets[level]
+        labels['box_targets_%d' % level] = box_targets[level]
+
+    labels['gt_boxes'] = boxes
+    labels['gt_classes'] = classes
+
+    return labels
+
+
+def process_targets_for_training(padded_image_size, boxes, classes, params):
+    input_anchors = anchors.Anchors(
+        params['min_level'],
+        params['max_level'],
+        params['num_scales'],
+        params['aspect_ratios'],
+        params['anchor_scale'],
+        padded_image_size
+    )
+
+    anchor_labeler = anchors.AnchorLabeler(
+        input_anchors,
+        params['num_classes'],
+        params['rpn_positive_overlap'],
+        params['rpn_negative_overlap'],
+        params['rpn_batch_size_per_im'],
+        params['rpn_fg_fraction']
+    )
+
+    return anchor_labeler.label_anchors(boxes, classes), input_anchors

+ 564 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/distributed_executer.py

@@ -0,0 +1,564 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Interface to run mask rcnn model in different distributed strategies."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import abc
+import os
+import six
+
+import math
+
+import multiprocessing
+
+import tensorflow as tf
+
+from mask_rcnn.utils.logging_formatter import logging
+
+from mask_rcnn.utils.distributed_utils import MPI_is_distributed
+from mask_rcnn.utils.distributed_utils import MPI_local_rank
+from mask_rcnn.utils.distributed_utils import MPI_rank
+
+from mask_rcnn.hooks.logging_hook import AutoLoggingHook
+
+from mask_rcnn.utils.lazy_imports import LazyImport
+hvd = LazyImport("horovod.tensorflow")
+
+from tensorflow.core.protobuf import rewriter_config_pb2
+
+from mask_rcnn import evaluation
+from mask_rcnn.hyperparameters import params_io
+from mask_rcnn.hooks import CheckpointSaverHook
+from mask_rcnn.hooks import PretrainedWeightsLoadingHook
+
+
+def get_training_hooks(mode, model_dir, checkpoint_path=None, skip_checkpoint_variables=None):
+
+    assert mode in ('train', 'eval')
+
+    training_hooks = [
+        AutoLoggingHook(
+            # log_every_n_steps=RUNNING_CONFIG.display_step,
+            log_every_n_steps=5 if "NGC_JOB_ID" not in os.environ else 100,
+            # warmup_steps=RUNNING_CONFIG.warmup_steps,
+            warmup_steps=100,
+            is_training=True
+        )
+    ]
+
+    if not MPI_is_distributed() or MPI_rank() == 0:
+        training_hooks.append(PretrainedWeightsLoadingHook(
+            prefix="resnet50/",
+            checkpoint_path=checkpoint_path,
+            skip_variables_regex=skip_checkpoint_variables
+        ))
+
+    if MPI_is_distributed() and mode == "train":
+        training_hooks.append(hvd.BroadcastGlobalVariablesHook(root_rank=0))
+
+    if not MPI_is_distributed() or MPI_rank() == 0:
+        training_hooks.append(CheckpointSaverHook(
+            checkpoint_dir=model_dir,
+            checkpoint_basename="model.ckpt"
+        ))
+
+    return training_hooks
+
+
[email protected]_metaclass(abc.ABCMeta)
+class BaseExecuter(object):
+  """Interface to run Mask RCNN model in TPUs/GPUs.
+
+  Arguments:
+    flags: FLAGS object passed from the user.
+    model_config: Model configuration needed to run distribution strategy.
+    model_fn: Model function to be passed to Estimator.
+  """
+
+  def __init__(self, runtime_config, model_fn):
+
+    self._runtime_config = runtime_config
+    self._model_fn = model_fn
+
+    os.environ['CUDA_CACHE_DISABLE'] = '0'
+
+    os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'] = '1'
+
+    os.environ['TF_ADJUST_HUE_FUSED'] = '1'
+    os.environ['TF_ADJUST_SATURATION_FUSED'] = '1'
+    os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
+
+    os.environ['TF_AUTOTUNE_THRESHOLD'] = '2'
+
+  @staticmethod
+  def _get_session_config(mode, use_xla, use_amp, use_tf_distributed=False, allow_xla_at_inference=False):
+
+      assert mode in ('train', 'eval')
+
+      rewrite_options = rewriter_config_pb2.RewriterConfig(
+          # arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF,
+          # arithmetic_optimization=rewriter_config_pb2.RewriterConfig.ON,
+
+          # constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
+          # constant_folding=rewriter_config_pb2.RewriterConfig.ON,           # TO TEST
+
+          # debug_stripper=rewriter_config_pb2.RewriterConfig.OFF,
+          # debug_stripper=rewriter_config_pb2.RewriterConfig.ON,           # TO TEST
+
+          # dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF,
+          # dependency_optimization=rewriter_config_pb2.RewriterConfig.ON,           # TO TEST
+
+          # disable_model_pruning=False,           # INCOMPATIBLE with AMP
+          # function_optimization=True,
+          # implementation_selector=True,
+
+          # loop_optimization=rewriter_config_pb2.RewriterConfig.OFF,
+          # loop_optimization=rewriter_config_pb2.RewriterConfig.ON,           # TO TEST
+
+          # The default setting (SCHEDULING and SWAPPING HEURISTICS only)
+          # memory_optimization=rewriter_config_pb2.RewriterConfig.DEFAULT_MEM_OPT,
+
+          # Disabled in the meta-optimizer.
+          # memory_optimization=rewriter_config_pb2.RewriterConfig.NO_MEM_OPT,
+
+          # Driven by manual op-level annotations.
+          # memory_optimization=rewriter_config_pb2.RewriterConfig.MANUAL,
+
+          # Swapping heuristic will move a tensor from the GPU to the CPU and move it
+          # back when needed to reduce peak memory usage..
+          # memory_optimization=rewriter_config_pb2.RewriterConfig.SWAPPING_HEURISTICS,
+
+          # Recomputation heuristics will recompute ops (such as Relu activation)
+          # during backprop instead of storing them, reducing peak memory usage.
+          # memory_optimization=rewriter_config_pb2.RewriterConfig.RECOMPUTATION_HEURISTICS,
+
+          # Scheduling will split big ops such as AddN and try to enforce a schedule of
+          # the new computations that decreases peak memory usage.
+          # memory_optimization=rewriter_config_pb2.RewriterConfig.SCHEDULING_HEURISTICS,
+
+          # Use any combination of swapping and recomputation heuristics.
+          # memory_optimization=rewriter_config_pb2.RewriterConfig.HEURISTICS,
+
+          meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.TWO,
+          # meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.ONE,
+          # meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.DEFAULT_NUM_ITERS,
+
+          # pin_to_host_optimization=rewriter_config_pb2.RewriterConfig.OFF,
+          # pin_to_host_optimization=rewriter_config_pb2.RewriterConfig.ON,         # TO TEST
+          #
+          # remapping=rewriter_config_pb2.RewriterConfig.OFF,
+          # remapping=rewriter_config_pb2.RewriterConfig.ON,                   # TO TEST
+
+          # scoped_allocator_optimization=rewriter_config_pb2.RewriterConfig.OFF,
+          # scoped_allocator_optimization=rewriter_config_pb2.RewriterConfig.ON,  # TO TEST
+
+          # shape_optimization=rewriter_config_pb2.RewriterConfig.OFF,
+          # shape_optimization=rewriter_config_pb2.RewriterConfig.ON,           # TO TEST
+      )
+
+      if use_amp:
+          logging.info("[%s] AMP is activated - Experiment Feature" % mode)
+          rewrite_options.auto_mixed_precision = True
+
+      config = tf.compat.v1.ConfigProto(
+          allow_soft_placement=True,
+          log_device_placement=False,
+          graph_options=tf.compat.v1.GraphOptions(
+              rewrite_options=rewrite_options,
+              # infer_shapes=True  # Heavily drops throughput by 30%
+          )
+      )
+
+      if use_tf_distributed:
+        config.gpu_options.force_gpu_compatible = False
+
+      else:
+        config.gpu_options.force_gpu_compatible = True  # Force pinned memory
+
+        if MPI_is_distributed():
+            config.gpu_options.visible_device_list = str(MPI_local_rank())
+
+      if use_xla and (mode == "train" or allow_xla_at_inference):
+          logging.info("[%s] XLA is activated - Experiment Feature" % mode)
+          config.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1
+          # config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_2
+
+      if mode == 'train':
+          config.intra_op_parallelism_threads = 1  # Avoid pool of Eigen threads
+
+          if MPI_is_distributed():
+              config.inter_op_parallelism_threads = max(2, multiprocessing.cpu_count() // hvd.local_size())
+
+          elif not use_tf_distributed:
+              config.inter_op_parallelism_threads = 4
+
+      return config
+
+  @abc.abstractmethod
+  def build_strategy_configuration(self, mode):
+    """Builds run configuration for distributed train/eval.
+
+    Returns:
+      RunConfig with distribution strategy configurations
+      to pass to the constructor of TPUEstimator/Estimator.
+    """
+
+    NotImplementedError('Must be implemented in subclass')
+
+  def build_model_parameters(self, mode):
+    """Builds model parameter."""
+
+    assert mode in ('train', 'eval')
+
+    batch_size = self._runtime_config.train_batch_size if mode == 'train' else self._runtime_config.eval_batch_size
+
+    params = dict(
+        self._runtime_config.values(),
+        mode=mode,
+        batch_size=batch_size,
+        model_dir=self._runtime_config.model_dir,
+    )
+
+    if mode == 'eval':
+      params = dict(
+        params,
+        augment_input_data=False,
+      )
+
+    return params
+
+  def build_mask_rcnn_estimator(self, params, run_config, mode):
+    """Creates TPUEstimator/Estimator instance.
+
+    Arguments:
+      params: A dictionary to pass to Estimator `model_fn`.
+      run_config: RunConfig instance specifying distribution strategy
+        configurations.
+      mode: Mode -- one of 'train` or `eval`.
+
+    Returns:
+      TFEstimator or TPUEstimator instance.
+    """
+    assert mode in ('train', 'eval')
+
+    return tf.estimator.Estimator(
+        model_fn=self._model_fn,
+        model_dir=self._runtime_config.model_dir,
+        config=run_config,
+        params=params
+    )
+
+  def _save_config(self):
+    """Save parameters to config files if model_dir is defined."""
+
+    model_dir = self._runtime_config.model_dir
+
+    if model_dir is not None:
+      if not tf.io.gfile.exists(model_dir):
+        tf.io.gfile.makedirs(model_dir)
+
+      params_io.save_hparams_to_yaml(self._runtime_config, model_dir + '/params.yaml')
+
+  def _write_summary(self, summary_dir, eval_results, predictions, current_step):
+
+    if not self._runtime_config.visualize_images_summary:
+      predictions = None
+
+    evaluation.write_summary(eval_results, summary_dir, current_step, predictions=predictions)
+
+  def train(self, train_input_fn, run_eval_after_train=False, eval_input_fn=None):
+    """Run distributed training on Mask RCNN model."""
+
+    self._save_config()
+    train_run_config = self.build_strategy_configuration('train')
+    train_params = self.build_model_parameters('train')
+    train_estimator = self.build_mask_rcnn_estimator(train_params, train_run_config, 'train')
+
+    train_estimator.train(
+        input_fn=train_input_fn,
+        max_steps=self._runtime_config.total_steps,
+        hooks=get_training_hooks(
+            mode="train",
+            model_dir=self._runtime_config.model_dir,
+            checkpoint_path=self._runtime_config.checkpoint,
+            skip_checkpoint_variables=self._runtime_config.skip_checkpoint_variables
+        )
+    )
+
+    if not run_eval_after_train:
+      return None
+
+    if eval_input_fn is None:
+      raise ValueError('Eval input_fn must be passed to conduct evaluation after training.')
+
+    eval_run_config = self.build_strategy_configuration('eval')
+    eval_params = self.build_model_parameters('eval')
+    eval_estimator = self.build_mask_rcnn_estimator(eval_params, eval_run_config, 'eval')
+
+    last_ckpt = tf.train.latest_checkpoint(self._runtime_config.model_dir, latest_filename=None)
+    logging.info("Restoring parameters from %s\n" % last_ckpt)
+
+    eval_results, predictions = evaluation.evaluate(
+        eval_estimator,
+        eval_input_fn,
+        self._runtime_config.eval_samples,
+        self._runtime_config.eval_batch_size,
+        self._runtime_config.include_mask,
+        self._runtime_config.val_json_file,
+        report_frequency=self._runtime_config.report_frequency
+    )
+
+    output_dir = os.path.join(self._runtime_config.model_dir, 'eval')
+    tf.io.gfile.makedirs(output_dir)
+
+    # Summary writer writes out eval metrics.
+    self._write_summary(output_dir, eval_results, predictions, self._runtime_config.total_steps)
+
+    return eval_results
+
+  def train_and_eval(self, train_input_fn, eval_input_fn):
+    """Run distributed train and eval on Mask RCNN model."""
+
+    self._save_config()
+    output_dir = os.path.join(self._runtime_config.model_dir, 'eval')
+    tf.io.gfile.makedirs(output_dir)
+
+    train_run_config = self.build_strategy_configuration('train')
+    train_params = self.build_model_parameters('train')
+    train_estimator = self.build_mask_rcnn_estimator(train_params, train_run_config, 'train')
+
+    eval_estimator = None
+    eval_results = None
+
+    num_cycles = math.ceil(self._runtime_config.total_steps / self._runtime_config.num_steps_per_eval)
+
+    training_hooks = get_training_hooks(
+        mode="train",
+        model_dir=self._runtime_config.model_dir,
+        checkpoint_path=self._runtime_config.checkpoint,
+        skip_checkpoint_variables=self._runtime_config.skip_checkpoint_variables
+    )
+
+    for cycle in range(1, num_cycles + 1):
+
+      if not MPI_is_distributed() or MPI_rank() == 0:
+
+        print()  # Visual Spacing
+        logging.info("=================================")
+        logging.info('     Start training cycle %02d' % cycle)
+        logging.info("=================================\n")
+
+      max_cycle_step = min(int(cycle * self._runtime_config.num_steps_per_eval), self._runtime_config.total_steps)
+
+      PROFILER_ENABLED = False
+
+      if (not MPI_is_distributed() or MPI_rank() == 0) and PROFILER_ENABLED:
+          profiler_context_manager = tf.contrib.tfprof.ProfileContext
+
+      else:
+          from contextlib import suppress
+          profiler_context_manager = lambda *args, **kwargs: suppress()  # No-Op context manager
+
+      with profiler_context_manager(
+              '/workspace/profiling/',
+              trace_steps=range(100, 200, 3),
+              dump_steps=[200]
+      ) as pctx:
+
+          if (not MPI_is_distributed() or MPI_rank() == 0) and PROFILER_ENABLED:
+            opts = tf.compat.v1.profiler.ProfileOptionBuilder.time_and_memory()
+            pctx.add_auto_profiling('op', opts, [150, 200])
+
+          train_estimator.train(
+              input_fn=train_input_fn,
+              max_steps=max_cycle_step,
+              hooks=training_hooks,
+          )
+
+      if not MPI_is_distributed() or MPI_rank() == 0:
+
+          print()  # Visual Spacing
+          logging.info("=================================")
+          logging.info('    Start evaluation cycle %02d' % cycle)
+          logging.info("=================================\n")
+
+          if eval_estimator is None:
+              eval_run_config = self.build_strategy_configuration('eval')
+              eval_params = self.build_model_parameters('eval')
+              eval_estimator = self.build_mask_rcnn_estimator(eval_params, eval_run_config, 'eval')
+
+          last_ckpt = tf.train.latest_checkpoint(self._runtime_config.model_dir, latest_filename=None)
+          logging.info("Restoring parameters from %s\n" % last_ckpt)
+
+          eval_results, predictions = evaluation.evaluate(
+              eval_estimator,
+              eval_input_fn,
+              self._runtime_config.eval_samples,
+              self._runtime_config.eval_batch_size,
+              self._runtime_config.include_mask,
+              self._runtime_config.val_json_file,
+              report_frequency=self._runtime_config.report_frequency
+          )
+
+          self._write_summary(output_dir, eval_results, predictions, max_cycle_step)
+
+      if MPI_is_distributed():
+          from mpi4py import MPI
+          MPI.COMM_WORLD.Barrier()  # Waiting for all MPI processes to sync
+
+    return eval_results
+
+  def eval(self, eval_input_fn):
+    """Run distributed eval on Mask RCNN model."""
+
+    output_dir = os.path.join(self._runtime_config.model_dir, 'eval')
+    tf.io.gfile.makedirs(output_dir)
+
+    # Summary writer writes out eval metrics.
+    run_config = self.build_strategy_configuration('eval')
+    eval_params = self.build_model_parameters('eval')
+    eval_estimator = self.build_mask_rcnn_estimator(eval_params, run_config, 'eval')
+
+    logging.info('Starting to evaluate.')
+
+    last_ckpt = tf.train.latest_checkpoint(self._runtime_config.model_dir, latest_filename=None)
+
+    if last_ckpt is not None:
+      logging.info("Restoring parameters from %s\n" % last_ckpt)
+      current_step = int(os.path.basename(last_ckpt).split('-')[1])
+
+    else:
+      logging.warning(
+          "Could not find trained model in model_dir: `%s`, running initialization to predict\n" %
+          self._runtime_config.model_dir
+      )
+      current_step = 0
+
+    eval_results, predictions = evaluation.evaluate(
+        eval_estimator,
+        eval_input_fn,
+        self._runtime_config.eval_samples,
+        self._runtime_config.eval_batch_size,
+        self._runtime_config.include_mask,
+        self._runtime_config.val_json_file
+    )
+
+    self._write_summary(output_dir, eval_results, predictions, current_step)
+
+    if current_step >= self._runtime_config.total_steps:
+        logging.info('Evaluation finished after training step %d' % current_step)
+
+    return eval_results
+
+
+class EstimatorExecuter(BaseExecuter):
+  """Interface that runs Mask RCNN model using TPUEstimator."""
+
+  def __init__(self, runtime_config, model_fn):
+    super(EstimatorExecuter, self).__init__(runtime_config, model_fn)
+
+    if MPI_is_distributed():
+      os.environ['HOROVOD_GPU_ALLREDUCE'] = 'NCCL'
+      os.environ['HOROVOD_NUM_NCCL_STREAMS'] = '1'
+      # os.environ['HOROVOD_AUTOTUNE'] = '2'
+
+      hvd.init()
+
+      logging.info("Horovod successfully initialized ...")
+
+    os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'
+    os.environ['TF_GPU_THREAD_COUNT'] = '1' if not MPI_is_distributed() else str(hvd.size())
+
+    os.environ['TF_SYNC_ON_FINISH'] = '0'
+
+  def build_strategy_configuration(self, mode):
+    """Retrieves model configuration for running TF Estimator."""
+
+    run_config = tf.estimator.RunConfig(
+        tf_random_seed=(
+            self._runtime_config.seed
+            if not MPI_is_distributed() or self._runtime_config.seed is None else
+            self._runtime_config.seed + MPI_rank()
+        ),
+        model_dir=self._runtime_config.model_dir,
+        save_summary_steps=None,  # disabled
+        save_checkpoints_steps=None,  # disabled
+        save_checkpoints_secs=None,  # disabled
+        keep_checkpoint_max=20,  # disabled
+        keep_checkpoint_every_n_hours=None,  # disabled
+        log_step_count_steps=None,  # disabled
+        session_config=self._get_session_config(
+            mode=mode,
+            use_xla=self._runtime_config.use_xla,
+            use_amp=self._runtime_config.use_amp,
+            use_tf_distributed=False,
+            allow_xla_at_inference=self._runtime_config.allow_xla_at_inference  # TODO: Remove when XLA at inference fixed
+        ),
+        protocol=None,
+        device_fn=None,
+        train_distribute=None,
+        eval_distribute=None,
+        experimental_distribute=None
+    )
+
+    return run_config
+
+
+class TFDistributedExecuter(BaseExecuter):
+  """Interface that runs Mask RCNN model using MultiWorkerMirroredStrategy."""
+
+  @staticmethod
+  def is_eval_task():
+    return tf.distribute.cluster_resolver.TFConfigClusterResolver().task_type == 'evaluator'
+
+  def build_strategy_configuration(self, mode):
+    """Retrieves model configuration for MultiWorkerMirroredStrategy."""
+
+    distributed_strategy = tf.distribute.MirroredStrategy()
+    # distributed_strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy(
+    #     tf.distribute.experimental.CollectiveCommunication.NCCL
+    # )
+
+    run_config = tf.estimator.RunConfig(
+        tf_random_seed=self._runtime_config.seed,
+        model_dir=self._runtime_config.model_dir,
+        save_summary_steps=None,  # disabled
+        save_checkpoints_steps=None,  # disabled
+        save_checkpoints_secs=None,  # disabled
+        keep_checkpoint_max=20,  # disabled
+        keep_checkpoint_every_n_hours=None,  # disabled
+        log_step_count_steps=None,  # disabled
+        session_config=self._get_session_config(
+            mode=mode,
+            use_xla=self._runtime_config.use_xla,
+            use_amp=self._runtime_config.use_amp,
+            use_tf_distributed=True,
+            # TODO: Remove when XLA at inference fixed
+            allow_xla_at_inference=self._runtime_config.allow_xla_at_inference
+        ),
+        protocol=None,
+        device_fn=None,
+        train_distribute=distributed_strategy if mode == "train" else None,
+        eval_distribute=None,
+        experimental_distribute=None
+    )
+
+    return run_config

+ 452 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/evaluation.py

@@ -0,0 +1,452 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Functions to perform COCO evaluation."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import copy
+import operator
+import pprint
+import six
+import time
+
+import io
+from PIL import Image
+
+import numpy as np
+import tensorflow as tf
+
+from mask_rcnn.utils.logging_formatter import logging
+
+from mask_rcnn import coco_metric
+from mask_rcnn.utils import coco_utils
+
+from mask_rcnn.object_detection import visualization_utils
+
+import dllogger
+from dllogger import Verbosity
+
+
+def process_prediction_for_eval(prediction):
+    """Process the model prediction for COCO eval."""
+    image_info = prediction['image_info']
+    box_coordinates = prediction['detection_boxes']
+    processed_box_coordinates = np.zeros_like(box_coordinates)
+
+    for image_id in range(box_coordinates.shape[0]):
+        scale = image_info[image_id][2]
+
+        for box_id in range(box_coordinates.shape[1]):
+            # Map [y1, x1, y2, x2] -> [x1, y1, w, h] and multiply detections
+            # Map [y1, x1, y2, x2] -> [x1, y1, w, h] and multiply detections
+            # by image scale.
+            y1, x1, y2, x2 = box_coordinates[image_id, box_id, :]
+            new_box = scale * np.array([x1, y1, x2 - x1, y2 - y1])
+            processed_box_coordinates[image_id, box_id, :] = new_box
+
+    prediction['detection_boxes'] = processed_box_coordinates
+    return prediction
+
+
+def compute_coco_eval_metric(predictor,
+                             num_batches=-1,
+                             include_mask=True,
+                             annotation_json_file="",
+                             eval_batch_size=-1,
+                             report_frequency=None):
+    """Compute COCO eval metric given a prediction generator.
+
+    Args:
+    predictor: a generator that iteratively pops a dictionary of predictions
+      with the format compatible with COCO eval tool.
+    num_batches: the number of batches to be aggregated in eval. This is how
+      many times that the predictor gets pulled.
+    include_mask: a boolean that indicates whether we include the mask eval.
+    annotation_json_file: the annotation json file of the eval dataset.
+
+    Returns:
+    eval_results: the aggregated COCO metric eval results.
+    """
+
+    if annotation_json_file == "":
+        annotation_json_file = None
+
+    use_groundtruth_from_json = (annotation_json_file is not None)
+
+    predictions = dict()
+    batch_idx = 0
+
+    if use_groundtruth_from_json:
+        eval_metric = coco_metric.EvaluationMetric(annotation_json_file, include_mask=include_mask)
+
+    else:
+        eval_metric = coco_metric.EvaluationMetric(filename=None, include_mask=include_mask)
+
+    def evaluation_preds(preds):
+
+        # Essential to avoid modifying the source dict
+        _preds = copy.deepcopy(preds)
+
+        for k, v in six.iteritems(_preds):
+            _preds[k] = np.concatenate(_preds[k], axis=0)
+
+        if 'orig_images' in _preds and _preds['orig_images'].shape[0] > 10:
+            # Only samples a few images for visualization.
+            _preds['orig_images'] = _preds['orig_images'][:10]
+
+        if use_groundtruth_from_json:
+            eval_results = eval_metric.predict_metric_fn(_preds)
+
+        else:
+            images, annotations = coco_utils.extract_coco_groundtruth(_preds, include_mask)
+            coco_dataset = coco_utils.create_coco_format_dataset(images, annotations)
+            eval_results = eval_metric.predict_metric_fn(_preds, groundtruth_data=coco_dataset)
+
+        return eval_results
+
+    # Take into account cuDNN & Tensorflow warmup
+    # Drop N first steps for avg throughput calculation
+    BURNIN_STEPS = 100
+    model_throughput_list = list()
+    inference_time_list = list()
+
+    while num_batches < 0 or batch_idx < num_batches:
+
+        try:
+            step_t0 = time.time()
+            step_predictions = six.next(predictor)
+            batch_time = time.time() - step_t0
+
+            throughput = eval_batch_size / batch_time
+            model_throughput_list.append(throughput)
+            inference_time_list.append(batch_time)
+
+            logging.info('Running inference on batch %03d/%03d... - Step Time: %.4fs - Throughput: %.1f imgs/s' % (
+                batch_idx + 1,
+                num_batches,
+                batch_time,
+                throughput
+            ))
+
+        except StopIteration:
+            logging.info('Get StopIteration at %d batch.' % (batch_idx + 1))
+            break
+
+        step_predictions = process_prediction_for_eval(step_predictions)
+
+        for k, v in step_predictions.items():
+
+            if k not in predictions:
+                predictions[k] = [v]
+
+            else:
+                predictions[k].append(v)
+
+        batch_idx = batch_idx + 1
+
+        # If you want the report to happen each report_frequency to happen each report_frequency batches.
+        # Thus, each report is of eval_batch_size * report_frequency
+        if report_frequency and batch_idx % report_frequency == 0:
+            eval_results = evaluation_preds(preds=predictions)
+            logging.info('Eval results: %s' % pprint.pformat(eval_results, indent=4))
+
+    inference_time_list.sort()
+    eval_results = evaluation_preds(preds=predictions)
+
+    average_time = np.mean(inference_time_list)
+    latency_50 = max(inference_time_list[:int(len(inference_time_list) * 0.5)])
+    latency_90 = max(inference_time_list[:int(len(inference_time_list) * 0.90)])
+    latency_95 = max(inference_time_list[:int(len(inference_time_list) * 0.95)])
+    latency_99 = max(inference_time_list[:int(len(inference_time_list) * 0.99)])
+    latency_100 = max(inference_time_list[:int(len(inference_time_list) * 1)])
+
+    print()  # Visual Spacing
+    logging.info("# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #")
+    logging.info("         Evaluation Performance Summary          ")
+    logging.info("# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #")
+
+    total_processing_hours, rem = divmod(np.sum(model_throughput_list), 3600)
+    total_processing_minutes, total_processing_seconds = divmod(rem, 60)
+
+    if len(model_throughput_list) > BURNIN_STEPS:
+        # Take into account cuDNN & Tensorflow warmup
+        # Drop N first steps for avg throughput calculation
+        # Also drop last step which may have a different batch size
+        avg_throughput = np.mean(model_throughput_list[BURNIN_STEPS:-1])
+    else:
+        avg_throughput = -1.
+
+    print()  # Visual Spacing
+    logging.info("Average throughput: {throughput:.1f} samples/sec".format(throughput=avg_throughput))
+    logging.info("Inference Latency Average (s) = {avg:.4f}".format(avg=average_time))
+    logging.info("Inference Latency 50% (s) = {cf_50:.4f}".format(cf_50=latency_50))
+    logging.info("Inference Latency 90%  (s) = {cf_90:.4f}".format(cf_90=latency_90))
+    logging.info("Inference Latency 95%  (s) = {cf_95:.4f}".format(cf_95=latency_95))
+    logging.info("Inference Latency 99%  (s) = {cf_99:.4f}".format(cf_99=latency_99))
+    logging.info("Inference Latency 100%  (s) = {cf_100:.4f}".format(cf_100=latency_100))
+    logging.info("Total processed steps: {total_steps}".format(total_steps=len(model_throughput_list)))
+    logging.info(
+        "Total processing time: {hours}h {minutes:02d}m {seconds:02d}s".format(
+            hours=total_processing_hours,
+            minutes=int(total_processing_minutes),
+            seconds=int(total_processing_seconds)
+        )
+    )
+    dllogger.log(step=(), data={"avg_inference_throughput": avg_throughput}, verbosity=Verbosity.DEFAULT)
+    avg_inference_time = float(total_processing_hours * 3600 + int(total_processing_minutes) * 60 +
+        int(total_processing_seconds))
+    dllogger.log(step=(), data={"avg_inference_time": avg_inference_time}, verbosity=Verbosity.DEFAULT)
+    logging.info("==================== Metrics ====================")
+
+    # logging.info('Eval Epoch results: %s' % pprint.pformat(eval_results, indent=4))
+    for key, value in sorted(eval_results.items(), key=operator.itemgetter(0)):
+        logging.info("%s: %.9f" % (key, value))
+    print()  # Visual Spacing
+
+    return eval_results, predictions
+
+
+def evaluate(eval_estimator,
+             input_fn,
+             num_eval_samples,
+             eval_batch_size,
+             include_mask=True,
+             validation_json_file="",
+             report_frequency=None):
+
+    """Runs COCO evaluation once."""
+    predictor = eval_estimator.predict(
+        input_fn=input_fn,
+        yield_single_examples=False
+    )
+
+    # Every predictor.next() gets a batch of prediction (a dictionary).
+    num_eval_times = num_eval_samples // eval_batch_size
+    assert num_eval_times > 0, 'num_eval_samples must be >= eval_batch_size!'
+
+    eval_results, predictions = compute_coco_eval_metric(
+        predictor,
+        num_eval_times,
+        include_mask,
+        validation_json_file,
+        eval_batch_size=eval_batch_size,
+        report_frequency=report_frequency
+    )
+
+    return eval_results, predictions
+
+
+def write_summary(eval_results, summary_dir, current_step, predictions=None):
+    """Write out eval results for the checkpoint."""
+    with tf.Graph().as_default():
+        summaries = []
+
+        # Summary writer writes out eval metrics.
+        try:
+            # Tensorflow 1.x
+            summary_writer = tf.compat.v1.summary.FileWriter(summary_dir)
+        except AttributeError:
+            # Tensorflow 2.x
+            summary_writer = tf.summary.create_file_writer(summary_dir)
+            summary_writer.as_default()
+
+        eval_results_dict = {}
+        for metric in eval_results:
+            try:
+                summaries.append(tf.compat.v1.Summary.Value(tag=metric, simple_value=eval_results[metric]))
+                eval_results_dict[metric] = float(eval_results[metric])
+
+            except AttributeError:
+                tf.summary.scalar(name=metric, data=eval_results[metric], step=current_step)
+                eval_results_dict[metric] = float(eval_results[metric])
+        dllogger.log(step=(), data=eval_results_dict, verbosity=Verbosity.DEFAULT)
+
+        if isinstance(predictions, dict) and predictions:
+            images_summary = get_image_summary(predictions, current_step)
+
+            try:
+                summaries += images_summary
+            except TypeError:
+                summaries.append(images_summary)
+
+        try:
+            # tf_summaries = tf.compat.v1.Summary(value=list(summaries))
+            tf_summaries = tf.compat.v1.Summary(value=summaries)
+            summary_writer.add_summary(tf_summaries, current_step)
+            summary_writer.flush()
+
+        except AttributeError:
+            tf.summary.flush(summary_writer)
+
+
+def generate_image_preview(image, boxes, scores, classes, gt_boxes=None, segmentations=None):
+    """Creates an image summary given predictions."""
+    max_boxes_to_draw = 100
+    min_score_thresh = 0.1
+
+    # Visualizes the predicitons.
+    image_with_detections = visualization_utils.visualize_boxes_and_labels_on_image_array(
+        image,
+        boxes,
+        classes=classes,
+        scores=scores,
+        category_index={},
+        instance_masks=segmentations,
+        use_normalized_coordinates=False,
+        max_boxes_to_draw=max_boxes_to_draw,
+        min_score_thresh=min_score_thresh,
+        agnostic_mode=False
+    )
+
+    if gt_boxes is not None:
+        # Visualizes the groundtruth boxes. They are in black by default.
+        image_with_detections = visualization_utils.visualize_boxes_and_labels_on_image_array(
+            image_with_detections,
+            gt_boxes,
+            classes=None,
+            scores=None,
+            category_index={},
+            use_normalized_coordinates=False,
+            max_boxes_to_draw=max_boxes_to_draw,
+            agnostic_mode=True
+        )
+
+    return image_with_detections
+
+
+def generate_image_buffer(input_image):
+    buf = io.BytesIO()
+    w, h = input_image.shape[:2]
+    ratio = 1024 / w
+    new_size = [int(w * ratio), int(h * ratio)]
+
+    image = Image.fromarray(input_image.astype(np.uint8))
+    image.thumbnail(new_size)
+    image.save(buf, format='png')
+
+    return buf.getvalue()
+
+
+def get_image_summary(predictions, current_step, max_images=10):
+    """Write out image and prediction for summary."""
+
+    if 'orig_images' not in predictions:
+        logging.info('Missing orig_images in predictions: %s', predictions.keys())
+        return
+
+    max_images = min(
+        len(predictions['orig_images']) * predictions['orig_images'][0].shape[0],
+        max_images
+    )
+
+    _detection_boxes = np.concatenate(predictions['detection_boxes'], axis=0)
+    _detection_scores = np.concatenate(predictions['detection_scores'], axis=0)
+    _detection_classes = np.concatenate(predictions['detection_classes'], axis=0)
+    _image_info = np.concatenate(predictions['image_info'], axis=0)
+    _num_detections = np.concatenate(predictions['num_detections'], axis=0)
+    _orig_images = np.concatenate(predictions['orig_images'], axis=0)
+
+    if 'detection_masks' in predictions:
+        _detection_masks = np.concatenate(predictions['detection_masks'], axis=0)
+    else:
+        _detection_masks = None
+
+    if 'groundtruth_boxes' in predictions:
+        _groundtruth_boxes = np.concatenate(predictions['groundtruth_boxes'], axis=0)
+    else:
+        _groundtruth_boxes = None
+
+    _orig_images = _orig_images * 255
+    _orig_images = _orig_images.astype(np.uint8)
+
+    image_previews = []
+
+    for i in range(max_images):
+        num_detections = min(len(_detection_boxes[i]), int(_num_detections[i]))
+
+        detection_boxes = _detection_boxes[i][:num_detections]
+        detection_scores = _detection_scores[i][:num_detections]
+        detection_classes = _detection_classes[i][:num_detections]
+
+        image = _orig_images[i]
+        image_height = image.shape[0]
+        image_width = image.shape[1]
+
+        # Rescale the box to fit the visualization image.
+        h, w = _image_info[i][3:5]
+        detection_boxes = detection_boxes / np.array([w, h, w, h])
+        detection_boxes = detection_boxes * np.array([image_width, image_height, image_width, image_height])
+
+        if _groundtruth_boxes is not None:
+            gt_boxes = _groundtruth_boxes[i]
+            gt_boxes = gt_boxes * np.array([image_height, image_width, image_height, image_width])
+        else:
+            gt_boxes = None
+
+        if _detection_masks is not None:
+            instance_masks = _detection_masks[i][0:num_detections]
+            segmentations = coco_metric.generate_segmentation_from_masks(
+                instance_masks,
+                detection_boxes,
+                image_height,
+                image_width
+            )
+        else:
+            segmentations = None
+
+        # From [x, y, w, h] to [x1, y1, x2, y2] and
+        # process_prediction_for_eval() set the box to be [x, y] format, need to
+        # reverted them to [y, x] format.
+        xmin, ymin, w, h = np.split(detection_boxes, 4, axis=-1)
+        xmax = xmin + w
+        ymax = ymin + h
+
+        boxes_to_visualize = np.concatenate([ymin, xmin, ymax, xmax], axis=-1)
+
+        image_preview = generate_image_preview(
+            image,
+            boxes=boxes_to_visualize,
+            scores=detection_scores,
+            classes=detection_classes.astype(np.int32),
+            gt_boxes=gt_boxes,
+            segmentations=segmentations
+        )
+        image_previews.append(image_preview)
+
+    try:
+        summaries = []
+
+        for i, image_preview in enumerate(image_previews):
+            image_buffer = generate_image_buffer(image_preview)
+            image_summary = tf.compat.v1.Summary.Image(encoded_image_string=image_buffer)
+            image_value = tf.compat.v1.Summary.Value(tag='%d_input' % i, image=image_summary)
+
+            summaries.append(image_value)
+
+    except AttributeError:
+        image_previews = np.array(image_previews)
+        summaries = tf.summary.image(
+            name='image_summary',
+            data=image_previews,
+            step=current_step,
+            max_outputs=max_images
+        )
+
+    return summaries

+ 24 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/hooks/__init__.py

@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from mask_rcnn.hooks.ckpt_hook import CheckpointSaverHook
+from mask_rcnn.hooks.pretrained_restore_hook import PretrainedWeightsLoadingHook
+
+__all__ = [
+    "CheckpointSaverHook",
+    "PretrainedWeightsLoadingHook",
+]

+ 114 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/hooks/ckpt_hook.py

@@ -0,0 +1,114 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+import tensorflow as tf
+
+from mask_rcnn.utils.logging_formatter import logging
+
+__all__ = ["CheckpointSaverHook"]
+
+
+class CheckpointSaverHook(tf.estimator.SessionRunHook):
+    """Saves checkpoints every N steps or seconds."""
+
+    def __init__(self, checkpoint_dir, checkpoint_basename="model.ckpt"):
+        """Initializes a `CheckpointSaverHook`.
+        Args:
+          checkpoint_dir: `str`, base directory for the checkpoint files.
+          checkpoint_basename: `str`, base name for the checkpoint files.
+        Raises:
+          ValueError: One of `save_steps` or `save_secs` should be set.
+          ValueError: At most one of `saver` or `scaffold` should be set.
+        """
+        logging.info("Create CheckpointSaverHook.")
+
+        self._saver = None
+        self._checkpoint_dir = checkpoint_dir
+        self._save_path = os.path.join(checkpoint_dir, checkpoint_basename)
+
+        self._steps_per_run = 1
+
+        self._is_initialized = False
+
+        self._global_step_tensor = None
+        self._summary_writer = None
+
+    def _set_steps_per_run(self, steps_per_run):
+        self._steps_per_run = steps_per_run
+
+    def begin(self):
+        self._global_step_tensor = tf.compat.v1.train.get_or_create_global_step()
+
+        self._saver = tf.compat.v1.train.Saver()
+
+        from tensorflow.python.training import summary_io
+        self._summary_writer = summary_io.SummaryWriterCache.get(self._checkpoint_dir)
+
+        if self._global_step_tensor is None:
+            raise RuntimeError(
+                "Global step should be created to use CheckpointSaverHook."
+            )
+
+    def after_create_session(self, session, coord):
+
+        if not self._is_initialized:
+            global_step = session.run(self._global_step_tensor)
+            from tensorflow.python.keras.backend import get_graph
+            default_graph = get_graph()
+
+            # We do write graph and saver_def at the first call of before_run.
+            # We cannot do this in begin, since we let other hooks to change graph and
+            # add variables in begin. Graph is finalized after all begin calls.
+            tf.io.write_graph(
+                default_graph.as_graph_def(add_shapes=True),
+                self._checkpoint_dir,
+                "graph.pbtxt"
+            )
+
+            saver_def = self._saver.saver_def
+
+            from tensorflow.python.framework import meta_graph
+
+            meta_graph_def = meta_graph.create_meta_graph_def(
+                graph_def=default_graph.as_graph_def(add_shapes=True),
+                saver_def=saver_def
+            )
+
+            self._summary_writer.add_graph(default_graph)
+            self._summary_writer.add_meta_graph(meta_graph_def)
+
+            # The checkpoint saved here is the state at step "global_step".
+            self._save(session, global_step)
+
+            self._is_initialized = True
+
+    def end(self, session):
+        last_step = session.run(self._global_step_tensor)
+
+        self._save(session, last_step)
+
+    def _save(self, session, step):
+        """Saves the latest checkpoint, returns should_stop."""
+        logging.info("Saving checkpoints for %d into %s.", step, self._save_path)
+
+        self._saver.save(session, self._save_path, global_step=step)
+        self._summary_writer.add_session_log(
+            tf.compat.v1.SessionLog(status=tf.compat.v1.SessionLog.CHECKPOINT, checkpoint_path=self._save_path),
+            step
+        )

+ 518 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/hooks/logging_hook.py

@@ -0,0 +1,518 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import operator
+import time
+
+import numpy as np
+import tensorflow as tf
+
+from distutils.version import LooseVersion
+
+from mask_rcnn.utils.logging_formatter import logging
+
+from mask_rcnn.utils import meters
+
+from mask_rcnn.utils.decorators import atexit_hook
+
+from mask_rcnn.utils.distributed_utils import MPI_is_distributed
+from mask_rcnn.utils.distributed_utils import MPI_rank_and_size
+from mask_rcnn.utils.distributed_utils import MPI_size
+
+from mask_rcnn.utils.logging_backend import LoggingBackend
+from mask_rcnn.utils.logging_backend import RuntimeMode
+
+from mask_rcnn.utils.metric_tracking import clear_registered_metrics
+
+from mask_rcnn.utils.metric_tracking import TF_METRICS
+from mask_rcnn.utils.metric_tracking import KERAS_MODELS
+
+from mask_rcnn.utils.lazy_imports import LazyImport
+hvd = LazyImport("horovod.tensorflow")
+
+__all__ = ["AutoLoggingHook"]
+
+
+@atexit_hook
+class _AutoLoggingHook(tf.estimator.SessionRunHook):
+
+    def __init__(self, log_every_n_steps=200, warmup_steps=500, is_training=True):
+        """
+        AutoLogging Hook for Tensorflow
+
+        :param log_every_n_steps: log will be output on the console every N steps
+        :param warmup_steps: integers, numbers of steps considered as warmup
+        :param is_training: boolean
+        """
+
+        self._logging_proxy = LoggingBackend()
+
+        self._initialized = False
+        self._metrics = copy.copy(TF_METRICS)
+
+        self._batch_size_tensor = None
+
+        self._AMP_steps_since_last_loss_scale = None
+        self._AMP_loss_scale_tensor = None
+
+        self._current_step = None
+        self._amp_steps_non_skipped = None
+
+        self._warmup_steps = warmup_steps
+
+        self._log_every_n_steps = log_every_n_steps
+
+        self._step_t0 = None
+        self._session_t0 = None
+        self._session_run_times = list()
+
+        self._global_step_tensor = None
+
+        self._is_training = is_training
+        self._runtime_mode = RuntimeMode.TRAIN if is_training else RuntimeMode.VALIDATION
+
+        self._model_throughput = meters.MovingAverageMeter(window_size=1000)
+        self._model_stats = None
+
+        self._n_gpus = None
+
+    def __atexit__(self):
+
+        if self._initialized:
+
+            total_processing_time = int(np.sum(self._session_run_times))
+
+            try:
+                avg_throughput = self._model_throughput.read()
+            except ValueError:
+                avg_throughput = -1
+
+            self._logging_proxy.log_summary(
+                is_train=self._is_training,
+                total_steps=self._current_step,
+                total_processing_time=total_processing_time,
+                avg_throughput=avg_throughput
+            )
+
+            metric_data = dict()
+
+            for key, value in self._metrics.items():
+                try:
+                    metric_data[key] = value["aggregator"].read()
+
+                except ValueError:
+                    pass
+
+            self._logging_proxy.log_final_metrics(metric_data=metric_data, runtime_mode=self._runtime_mode)
+
+    def begin(self):
+        """Called once before using the session.
+        When called, the default graph is the one that will be launched in the
+        session.  The hook can modify the graph by adding new operations to it.
+        After the `begin()` call the graph will be finalized and the other callbacks
+        can not modify the graph anymore. Second call of `begin()` on the same
+        graph, should not change the graph.
+        """
+
+        from tensorflow.python.keras.backend import get_graph
+        _graph = get_graph()
+
+        try:
+            self._batch_size_tensor = None
+
+            for tensor in _graph.as_graph_def().node:
+                if "IteratorGetNext" in tensor.name:
+                    _input_tensor = _graph.get_tensor_by_name(tensor.name + ":0")
+                    try:
+                        self._batch_size_tensor = tf.shape(input=_input_tensor)[0]
+                    except TypeError:  # Ragged Tensor
+                        self._batch_size_tensor = _input_tensor.bounding_shape()[0]
+                    break
+            else:
+                raise RuntimeError(
+                    "Tensor `{}` could not be found. "
+                    "Make sure you are using tf.data API".format("IteratorGetNext")
+                )
+
+        except RuntimeError:
+            raise
+
+        except Exception as e:
+            raise RuntimeError(
+                "Impossible to fetch the tensor: `IteratorGetNext`. Make sure you are using tf.data API."
+            ) from e
+
+        self._global_step_tensor = tf.compat.v1.train.get_or_create_global_step()
+
+        try:
+            self._AMP_loss_scale_tensor = _graph.get_tensor_by_name("current_loss_scale/Read/ReadVariableOp:0")
+            self._AMP_steps_since_last_loss_scale = _graph.get_tensor_by_name("current_loss_scale/Read/ReadVariableOp:0")
+
+        except RuntimeError:
+            raise
+
+        # TF-AMP is not activated
+        except Exception:
+            pass
+
+        # if self._is_training:
+        #     self.runtime_data["params_count"] = tf.reduce_sum(
+        #         [tf.reduce_prod(v.shape) for v in tf.trainable_variables()]
+        #     )
+
+    def end(self, session):  # pylint: disable=unused-argument
+        """Called at the end of session.
+        The `session` argument can be used in case the hook wants to run final ops,
+        such as saving a last checkpoint.
+        If `session.run()` raises exception other than OutOfRangeError or
+        StopIteration then `end()` is not called.
+        Note the difference between `end()` and `after_run()` behavior when
+        `session.run()` raises OutOfRangeError or StopIteration. In that case
+        `end()` is called but `after_run()` is not called.
+        Args:
+          session: A TensorFlow Session that will be soon closed.
+        """
+
+        self._session_run_times.append(time.time() - self._session_t0)
+
+    def after_create_session(self, session, coord):  # pylint: disable=unused-argument3
+        """Called when new TensorFlow session is created.
+        This is called to signal the hooks that a new session has been created. This
+        has two essential differences with the situation in which `begin` is called:
+        * When this is called, the graph is finalized and ops can no longer be added
+            to the graph.
+        * This method will also be called as a result of recovering a wrapped
+            session, not only at the beginning of the overall session.
+        Args:
+          session: A TensorFlow Session that has been created.
+          coord: A Coordinator object which keeps track of all threads.
+        """
+
+        # ========= Collect the number of GPUs ======== #
+        if self._is_training:
+
+            if MPI_is_distributed():
+                self._n_gpus = MPI_size()
+
+            elif tf.distribute.has_strategy():
+                self._n_gpus = tf.distribute.get_strategy().num_replicas_in_sync
+
+            else:
+                self._n_gpus = 1
+
+        else:
+            self._n_gpus = 1
+
+        # =========== TensorFlow Hook Setup =========== #
+        _global_step, _metrics = setup_tensorflow_hook(
+            sess=session,
+            logging_proxy=self._logging_proxy,
+            is_training=self._is_training,
+            is_initialized=self._initialized
+        )
+
+        if _global_step >= 0:
+            self._current_step = self._amp_steps_non_skipped = _global_step
+
+        self._metrics.update(_metrics)
+
+        if not self._is_training:
+
+            for metric_name in self._metrics.keys():
+                self._metrics[metric_name]["aggregator"].reset()
+
+        self._initialized = True
+        # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
+
+        self._session_t0 = time.time()
+
+    def before_run(self, run_context):  # pylint: disable=unused-argument
+        """Called before each call to run().
+        You can return from this call a `SessionRunArgs` object indicating ops or
+        tensors to add to the upcoming `run()` call.  These ops/tensors will be run
+        together with the ops/tensors originally passed to the original run() call.
+        The run args you return can also contain feeds to be added to the run()
+        call.
+        The `run_context` argument is a `SessionRunContext` that provides
+        information about the upcoming `run()` call: the originally requested
+        op/tensors, the TensorFlow Session.
+        At this point graph is finalized and you can not add ops.
+        Args:
+          run_context: A `SessionRunContext` object.
+        Returns:
+          None or a `SessionRunArgs` object.
+        """
+
+        self._current_step += 1
+
+        request_fetches = {
+            "global_step": self._global_step_tensor, "metrics": dict(), "batch_size": self._batch_size_tensor
+        }
+
+        if self._is_training and self._AMP_steps_since_last_loss_scale is not None:
+            request_fetches["AMP"] = {
+                "steps_since_last_loss_scale": self._AMP_steps_since_last_loss_scale,
+                "current_loss_scale": self._AMP_loss_scale_tensor,
+            }
+
+        if self._current_step % self._log_every_n_steps == 0:
+            for key, value in self._metrics.items():
+                request_fetches["metrics"][key] = value["tensor"]
+
+        self._step_t0 = time.time()
+
+        return tf.estimator.SessionRunArgs(request_fetches)
+
+    def after_run(self, run_context, run_values):  # pylint: disable=unused-argument
+        """Called after each call to run().
+        The `run_values` argument contains results of requested ops/tensors by
+        `before_run()`.
+        The `run_context` argument is the same one send to `before_run` call.
+        `run_context.request_stop()` can be called to stop the iteration.
+        If `session.run()` raises any exceptions then `after_run()` is not called.
+        Args:
+          run_context: A `SessionRunContext` object.
+          run_values: A SessionRunValues object.
+        """
+
+        batch_time = time.time() - self._step_t0
+
+        _global_step = run_values.results["global_step"]
+
+        if self._is_training and self._AMP_steps_since_last_loss_scale is not None:
+
+            try:
+                AMP_steps_since_last_loss_scale = run_values.results["AMP"]["steps_since_last_loss_scale"]
+                AMP_loss_scale = run_values.results["AMP"]["current_loss_scale"]
+
+            except KeyError:
+                AMP_steps_since_last_loss_scale = None
+                AMP_loss_scale = None
+
+            if AMP_steps_since_last_loss_scale is not None:
+
+                # Step has been skipped
+                if _global_step != (self._amp_steps_non_skipped + 1):
+                    logging.warning(
+                        "AMP - Training iteration `#{step}` has been skipped and loss rescaled. "
+                        "New Loss Scale: {loss_scale}\n".format(step=self._current_step, loss_scale=AMP_loss_scale)
+                    )
+
+                else:
+                    self._amp_steps_non_skipped += 1
+
+                    if AMP_steps_since_last_loss_scale == 0:
+                        logging.warning(
+                            "AMP - Training iteration `#{step}` - Loss scale has been automatically increased. "
+                            "New Loss Scale: {loss_scale}\n".format(step=self._current_step, loss_scale=AMP_loss_scale)
+                        )
+
+        else:
+            AMP_steps_since_last_loss_scale = None
+            AMP_loss_scale = None
+
+        def get_model_throughput():
+            gpu_batch_size = run_values.results["batch_size"]
+            return gpu_batch_size / batch_time * self._n_gpus
+
+        # def get_model_stats():
+        #     return get_tf_model_statistics(batch_size=run_values.results["batch_size"], scope_name=None)
+        #
+        # if self._model_stats is None:
+        #     self._model_stats = get_model_stats()
+
+        is_log_step = self._current_step % self._log_every_n_steps == 0
+
+        if is_log_step:
+
+            if self._current_step > self._warmup_steps:
+
+                try:
+                    model_throughput = self._model_throughput.read()
+                except ValueError:
+                    model_throughput = get_model_throughput()
+
+            else:
+                model_throughput = get_model_throughput()
+
+            self._logging_proxy.log_step(iteration=self._current_step, throughput=model_throughput, gpu_stats=[])
+
+            self._logging_proxy.log_amp_runtime(
+                current_loss_scale=AMP_loss_scale,
+                steps_non_skipped=_global_step,
+                steps_since_last_scale=AMP_steps_since_last_loss_scale,
+            )
+
+            metric_data = dict()
+
+            for name, value in sorted(run_values.results["metrics"].items(), key=operator.itemgetter(0)):
+                self._metrics[name]["aggregator"].record(value)
+
+                metric_data[name] = self._metrics[name]["aggregator"].read()
+
+            self._logging_proxy.log_metrics(
+                metric_data=metric_data, iteration=self._current_step, runtime_mode=self._runtime_mode
+            )
+
+            print()  # Visual Spacing
+
+        elif self._current_step > self._warmup_steps:
+            # Do not store speed for log step due to additional fetches
+            self._model_throughput.record(get_model_throughput())
+
+
+class _SlaveGPUsHook(tf.estimator.SessionRunHook):
+
+    def after_create_session(self, session, coord):
+
+        with logging.temp_verbosity(logging.INFO):  # Do not warn user about metric cleaning
+            clear_registered_metrics()
+
+
+def real_autologging_hook(*args, **kwargs):
+
+    replica_id = tf.distribute.get_replica_context().replica_id_in_sync_group
+
+    # Do not set a logging hook for GPUs != 0
+    if MPI_rank_and_size()[0] != 0 or (isinstance(replica_id, tf.Tensor) and tf.get_static_value(replica_id) != 0):
+        return _SlaveGPUsHook()
+
+    else:
+        _ = LoggingBackend()  # Making sure the backend is defined before any hook due to __atexit__ hook
+        return _AutoLoggingHook(*args, **kwargs)
+
+
+def collect_registered_metrics():
+
+    if TF_METRICS:  # if not empty
+
+        metrics = copy.copy(TF_METRICS)
+
+        # Do not warn user about metric cleaning
+        with logging.temp_verbosity(logging.INFO):
+            clear_registered_metrics()
+
+        return metrics
+
+    else:
+        return dict()
+
+
+def get_model_variables():
+    """return model variables: global variables without optimizer's variables"""
+
+    return [
+        # yapf: disable
+        var for var in tf.compat.v1.global_variables() if (
+            var.name[-11:] not in "/Momentum:0" and
+            var.name[-11:] not in "/Adadelta:0" and
+            var.name[-13:] not in "/Adadelta_1:0" and
+            var.name[-7:] not in "/Adam:0" and
+            var.name[-9:] not in "/Adam_1:0" and
+            var.name[-10:] not in "/Adagrad:0" and
+            var.name[10:] not in "/RMSProp:0" and
+            var.name[-12:] not in "/RMSProp_1:0" and
+            var.name[-16:] not in "/LARSOptimizer:0"
+        )
+        # yapf: enable
+    ]
+
+
+def get_trainable_variables():
+    """Get a list of trainable TensorFlow variables.
+
+    Parameters
+    ----------
+    train_only : boolean
+        If True, only get the trainable variables.
+
+    Returns
+    -------
+    list of Tensor
+        A list of trainable TensorFlow variables
+
+    Examples
+    --------
+
+    """
+    if KERAS_MODELS or LooseVersion(tf.__version__) >= LooseVersion("2.0.0"):
+        logging.warning(
+            "In TF2.x, only trainable variables created with Keras Models are captured for logging.\n"
+            "In TF1.x, if any keras model is defined. Only variables created inside Keras Models will be logged."
+        )
+
+        var_list = list()
+
+        for model in KERAS_MODELS:
+            var_list.extend(model.trainable_variables)
+
+        # Keep only a list of unique variables (remove potential duplicates)
+        var_list = list(set(var_list))
+
+        # clearing the list of Keras Model to avoid memory leaks
+        KERAS_MODELS.clear()
+
+        return [var for var in sorted(var_list, key=lambda v: v.name)]
+
+    else:
+        # return tf.trainable_variables()  # deprecated in TF2.x
+        from tensorflow.python.keras.backend import get_graph
+        return get_graph().get_collection('trainable_variables')
+
+
+def setup_tensorflow_hook(sess, logging_proxy, is_training, is_initialized):
+
+    global_step = -1
+
+    if is_training:
+
+        if not is_initialized:
+
+            _global_step_tensor = tf.compat.v1.train.get_or_create_global_step()
+
+            global_step = sess.run(_global_step_tensor)
+
+            trainable_variables = get_trainable_variables()
+
+            def count_weights_in_varlist(var_list):
+                return np.sum([np.prod(s.get_shape()) for s in var_list])
+
+            logging_proxy.log_git_status()
+
+            logging_proxy.log_model_statistics(
+                model_statistics={
+                    "# Trainable Weights": "{:,}".format(int(count_weights_in_varlist(trainable_variables))),
+                    "# Model Weights": "{:,}".format(int(count_weights_in_varlist(get_model_variables()))),
+                }
+            )
+
+            logging_proxy.log_trainable_variables([(var.name, var.get_shape()) for var in trainable_variables])
+
+    else:
+
+        if not is_initialized:
+            global_step = 0
+
+    metrics = collect_registered_metrics()
+
+    logging_proxy.log_runtime(is_train=is_training)
+
+    return global_step, metrics
+
+
+AutoLoggingHook = lambda *args, **kwargs: real_autologging_hook(*args, **kwargs)

+ 216 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/hooks/pretrained_restore_hook.py

@@ -0,0 +1,216 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import sys
+import re
+import tensorflow as tf
+
+from mask_rcnn.utils.logging_formatter import logging
+from mask_rcnn.utils.distributed_utils import MPI_rank
+
+__all__ = ["PretrainedWeightsLoadingHook"]
+
+
+# pylint: disable=protected-access
+# Currently variable_scope doesn't provide very good APIs to access
+# all variables under scope and retrieve and check existing scopes.
+def get_variable_full_name(var):
+    """Returns the full name of a variable.
+    For normal Variables, this is the same as the var.op.name.  For
+    sliced or PartitionedVariables, this name is the same for all the
+    slices/partitions. In both cases, this is normally the name used in
+    a checkpoint file.
+    Args:
+    var: A `Variable` object.
+    Returns:
+    A string that is the full name.
+    """
+    if var._save_slice_info:
+        return var._save_slice_info.full_name
+    else:
+        return var.op.name
+
+
+def assign_from_checkpoint(model_path, var_list, ignore_missing_vars=False):
+    """Creates an operation to assign specific variables from a checkpoint.
+    Args:
+    model_path: The full path to the model checkpoint. To get latest checkpoint
+      use `model_path = tf.train.latest_checkpoint(checkpoint_dir)`
+    var_list: A list of (possibly partitioned) `Variable` objects or a
+      dictionary mapping names in the checkpoint to the corresponding variables
+      or list of variables to initialize from that checkpoint value. For
+      partitioned Variables, the name in the checkpoint must be the full
+      variable, not the name of the partitioned variable, eg. "my_var" rather
+      than "my_var/part_4". If empty, returns no_op(), {}.
+    ignore_missing_vars: Boolean, if True ignore variables missing in the
+      checkpoint with a warning instead of failing.
+    Returns:
+    the restore_op and the feed_dict that need to be run to restore var_list.
+    Raises:
+    ValueError: If `ignore_missing_vars` is False and the checkpoint specified
+        at `model_path` is missing one of the variables in `var_list`.
+  """
+    # Normalize var_list into a dictionary mapping names in the
+    # checkpoint to the list of variables to initialize from that
+    # checkpoint variable. Sliced (including partitioned) variables will
+    # end up under the same key.
+    grouped_vars = {}
+    if isinstance(var_list, (tuple, list)):
+        for var in var_list:
+            ckpt_name = get_variable_full_name(var)
+            if ckpt_name not in grouped_vars:
+                grouped_vars[ckpt_name] = []
+            grouped_vars[ckpt_name].append(var)
+
+    else:
+        for ckpt_name, value in var_list.items():
+            if isinstance(value, (tuple, list)):
+                grouped_vars[ckpt_name] = value
+            else:
+                grouped_vars[ckpt_name] = [value]
+
+    # Read each checkpoint entry. Create a placeholder variable and
+    # add the (possibly sliced) data from the checkpoint to the feed_dict.
+    reader = tf.compat.v1.train.NewCheckpointReader(model_path)
+    feed_dict = {}
+    assign_ops = []
+    for ckpt_name in grouped_vars:
+        if not reader.has_tensor(ckpt_name):
+            log_str = 'Checkpoint is missing variable [%s]' % ckpt_name
+            if ignore_missing_vars:
+                logging.warning(log_str)
+                continue
+            else:
+                raise ValueError(log_str)
+        ckpt_value = reader.get_tensor(ckpt_name)
+
+        for var in grouped_vars[ckpt_name]:
+            placeholder_tensor = tf.compat.v1.placeholder(
+                dtype=var.dtype.base_dtype,
+                shape=var.get_shape(),
+                name='placeholder/' + var.op.name
+            )
+
+            assign_ops.append(var.assign(placeholder_tensor))
+
+            if not var._save_slice_info:
+                if var.get_shape() != ckpt_value.shape:
+                    raise ValueError(
+                        'Total size of new array must be unchanged for %s '
+                        'lh_shape: [%s], rh_shape: [%s]' %
+                        (ckpt_name, str(ckpt_value.shape), str(var.get_shape())))
+
+                feed_dict[placeholder_tensor] = ckpt_value.reshape(ckpt_value.shape)
+
+            else:
+                slice_dims = zip(var._save_slice_info.var_offset,
+                                 var._save_slice_info.var_shape)
+
+                slice_dims = [(start, start + size) for (start, size) in slice_dims]
+                slice_dims = [slice(*x) for x in slice_dims]
+
+                slice_value = ckpt_value[slice_dims]
+                slice_value = slice_value.reshape(var._save_slice_info.var_shape)
+
+                feed_dict[placeholder_tensor] = slice_value
+
+    print_op = tf.print(
+        "[GPU %02d] Restoring pretrained weights (%d Tensors) from: %s" % (
+            MPI_rank(),
+            len(assign_ops),
+            model_path
+        ),
+        output_stream=sys.stdout
+    )
+
+    with tf.control_dependencies([print_op]):
+        assign_op = tf.group(*assign_ops)
+
+    return assign_op, feed_dict
+
+
+def build_assigment_map(prefix=None, skip_variables_regex=None):
+    """Generate assigment map for loading checkpoints."""
+
+    all_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope=prefix)
+
+    if not prefix:
+        prefix = ''
+
+    assignment_map = {}
+
+    for var in all_vars:
+
+        var_name = var.name
+
+        if (
+                var_name[-11:] in "/Momentum:0" or
+                var_name[-11:] in "/Adadelta:0" or
+                var_name[-13:] in "/Adadelta_1:0" or
+                var_name[-7:] in "/Adam:0" or
+                var_name[-9:] in "/Adam_1:0" or
+                var_name[-10:] in "/Adagrad:0" or
+                var_name[-10:] in "/RMSProp:0" or
+                var_name[-12:] in "/RMSProp_1:0" or
+                var_name[-16:] in "/LARSOptimizer:0"
+        ):
+            continue
+
+        # Trim the index of the variable.
+        if ':' in var_name:
+            var_name = var_name[:var_name.rindex(':')]
+
+        if skip_variables_regex and re.match(skip_variables_regex, var_name[len(prefix):]):
+            continue
+
+        assignment_map[var_name[len(prefix):]] = var
+        # assignment_map[var_name] = var
+
+    return assignment_map
+
+
+class PretrainedWeightsLoadingHook(tf.estimator.SessionRunHook):
+
+    def __init__(self, prefix, checkpoint_path, skip_variables_regex=None):
+        self._prefix = prefix
+        self._checkpoint_path = checkpoint_path
+        self._skip_variables_regex = skip_variables_regex
+
+        self._is_initialized = False
+
+        self._init_op = None
+        self._init_feed_dict = None
+
+    def begin(self):
+        vars_to_load = build_assigment_map(
+            prefix=self._prefix,
+            skip_variables_regex=self._skip_variables_regex
+        )
+
+        self._init_op, self._init_feed_dict = assign_from_checkpoint(
+            model_path=self._checkpoint_path,
+            var_list=vars_to_load,
+            ignore_missing_vars=False
+        )
+
+    def after_create_session(self, session, coord=None):
+        if not self._is_initialized:
+            session.run(self._init_op, feed_dict=self._init_feed_dict)
+            logging.info("Pretrained weights loaded with success...\n")
+
+            self._is_initialized = True

+ 0 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/hyperparameters/__init__.py


+ 179 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/hyperparameters/cmdline_utils.py

@@ -0,0 +1,179 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Defining common model params used across all the models."""
+
+from absl import flags
+
+
+def define_hparams_flags():
+
+    flags.DEFINE_string(
+        'log_path',
+        default="./mrcnn.json",
+        help=(
+            'The path where dllogger json file will be saved. Please include the'
+            ' name of the json file as well.'
+        )
+    )
+
+    flags.DEFINE_string(
+        'data_dir',
+        default=None,
+        help=(
+            'The directory where the input data is stored. Please see the model'
+            ' specific README.md for the expected data format.'
+        )
+    )
+
+    flags.DEFINE_string('checkpoint', default='', help='Checkpoint filepath')
+
+    flags.DEFINE_integer(
+        'eval_batch_size',
+        default=8,
+        help='Batch size for evaluation.'
+    )
+
+    flags.DEFINE_bool(
+        'eval_after_training',
+        default=True,
+        help='Run one eval after the training finishes.'
+    )
+
+    flags.DEFINE_integer('eval_samples', default=5000, help='Number of training steps')
+
+    flags.DEFINE_bool(
+        'include_groundtruth_in_features',
+        default=False,
+        help=(
+            'If `val_json_file` is not provided, one can also read groundtruth'
+            ' from input by setting `include_groundtruth_in_features`=True'
+        )
+    )
+
+    # Gradient clipping is a fairly coarse heuristic to stabilize training.
+    # This model clips the gradient by its L2 norm globally (i.e., across
+    # all variables), using a threshold obtained from multiplying this
+    # parameter with sqrt(number_of_weights), to have a meaningful value
+    # across both training phases and different sizes of imported modules.
+    # Refer value: 0.02, for 25M weights, yields clip norm 10.
+    # Zero or negative number means no clipping.
+    flags.DEFINE_float("global_gradient_clip_ratio", default=-1.0, help="Global Gradient Clipping Ratio")
+
+    flags.DEFINE_float("init_learning_rate", default=2.5e-3, help="Initial Learning Rate")
+
+    flags.DEFINE_float("warmup_learning_rate", default=0., help="Warmup Learning Rate Decay Factor")
+
+    flags.DEFINE_bool('finetune_bn', False, 'is batchnorm training mode')
+
+    flags.DEFINE_float("l2_weight_decay", default=1e-4, help="l2 regularization weight")
+
+    flags.DEFINE_string('mode', default='train_and_eval', help='Mode to run: train or eval')
+
+    flags.DEFINE_string(
+        'model_dir',
+        default=None,
+        help='The directory where the model and training/evaluation summaries are stored.'
+    )
+
+    flags.DEFINE_float("momentum", default=0.9, help="Optimizer Momentum")
+
+    flags.DEFINE_integer('num_steps_per_eval', default=2500, help='Number of steps per evaluation epoch.')
+
+    flags.DEFINE_integer('save_checkpoints_steps', default=2500, help='Save a checkpoint every N steps.')
+
+    flags.DEFINE_integer('seed', default=None, help='Set a debug seed for reproducibility.')
+
+    flags.DEFINE_integer('train_batch_size', default=2, help='Batch size for training.')
+
+    flags.DEFINE_integer(
+        'total_steps',
+        default=938240,
+        help=(
+            'The number of steps to use for training. This flag'
+            ' should be adjusted according to the --train_batch_size flag.'
+        )
+    )
+
+    flags.DEFINE_list(
+        'learning_rate_decay_levels',
+        default=['0.1', '0.01'],
+        help=(
+            'The learning rate decay levels which modify the learning rate using the formula:'
+            ' `lr = decay * init_lr`. Decay factor applied at learning_rate_steps.'
+        )
+    )
+    flags.DEFINE_list(
+        'learning_rate_steps',
+        default=['480000', '640000'],
+        help=(
+            'The steps at which learning rate changes. This flag'
+            ' should be adjusted according to the --train_batch_size flag.'
+        )
+    )
+    flags.DEFINE_integer('warmup_steps', default=1000, help='The number of steps to use warmup learning rate for')
+
+    flags.DEFINE_bool('use_amp', default=False, help='Enable automatic mixed precision')
+
+    flags.DEFINE_bool(
+        'use_batched_nms',
+        default=False,
+        help='Enable Batched NMS at inference.'
+    )
+
+    flags.DEFINE_bool(
+        'use_custom_box_proposals_op',
+        default=False,
+        help='Use GenerateBoundingBoxProposals op.'
+    )
+
+    flags.DEFINE_bool('use_fake_data', False, 'Use fake input.')
+
+    flags.DEFINE_bool(
+        'use_tf_distributed',
+        default=False,
+        help='Use tensorflow distributed API'
+    )
+
+    flags.DEFINE_bool('use_xla', default=False, help='Enable XLA JIT Compiler.')
+
+    flags.DEFINE_string('training_file_pattern', default="", help='TFRecords file pattern for the training files')
+
+    flags.DEFINE_string('validation_file_pattern', default="", help='TFRecords file pattern for the validation files')
+
+    flags.DEFINE_string('val_json_file', default="", help='Filepath for the validation json file')
+
+    ############################# TO BE REMOVED ###################################
+
+    flags.DEFINE_integer(
+        'report_frequency',
+        default=None,
+        help='The amount of batches in between accuracy reports at evaluation time'
+    )
+
+    ############################# TO BE REMOVED ###################################
+
+    ############################### ISSUES TO FIX - FLAGS #############################"
+
+    # TODO: Remove when XLA at inference fixed
+    flags.DEFINE_bool(
+        'allow_xla_at_inference',
+        default=False,
+        help='Enable XLA JIT Compiler at Inference'
+    )
+
+    return flags.FLAGS

+ 85 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/hyperparameters/flags_to_params.py

@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Functions to override model parameters from command-line flags."""
+
+from mask_rcnn.hyperparameters import params_dict
+
+ESSENTIAL_FLAGS = ['tpu', 'data_dir', 'model_dir']
+
+
+def override_params_from_input_flags(params, input_flags):
+    """Update params dictionary with input flags.
+
+  Args:
+    params: ParamsDict object containing dictionary of model parameters.
+    input_flags: All the flags with non-null value of overridden model
+    parameters.
+
+  Returns:
+    ParamsDict object containing dictionary of model parameters.
+  """
+    if params is None:
+        raise ValueError('Input dictionary is empty. It is expected to be loaded with default ' 'values')
+
+    if not isinstance(params, params_dict.ParamsDict):
+        raise ValueError('The base parameter set must be a ParamsDict, was: {}'.format(type(params)))
+
+    essential_flag_dict = {}
+    for key in ESSENTIAL_FLAGS:
+        flag_value = input_flags.get_flag_value(key, None)
+
+        if flag_value is None:
+            raise ValueError('Flag {} could not be None.'.format(key))
+        else:
+            essential_flag_dict[key] = flag_value
+
+    params_dict.override_params_dict(params, essential_flag_dict, is_strict=False)
+
+    normal_flag_dict = get_dictionary_from_flags(params.as_dict(), input_flags)
+
+    params_dict.override_params_dict(params, normal_flag_dict, is_strict=False)
+
+    return params
+
+
+def get_dictionary_from_flags(params, input_flags):
+    """Generate dictionary from non-null flags.
+
+  Args:
+    params: Python dictionary of model parameters.
+    input_flags: All the flags with non-null value of overridden model
+    parameters.
+
+  Returns:
+    Python dict of overriding model parameters.
+  """
+    flag_dict = {}
+    for k, v in params.items():
+        if isinstance(v, dict):
+            d = get_dictionary_from_flags(v, input_flags)
+            flag_dict[k] = d
+        else:
+            try:
+                flag_value = input_flags.get_flag_value(k, None)
+                if flag_value is not None:
+                    flag_dict[k] = flag_value
+            except AttributeError:
+                flag_dict[k] = v
+
+    return flag_dict

+ 226 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/hyperparameters/hyperparameters.py

@@ -0,0 +1,226 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+import warnings
+import six
+
+import yaml
+
+import tensorflow as tf
+
+from mask_rcnn.utils.logging_formatter import logging
+
+
+class _Hyperparameters(object):
+    """_Hyperparameters class to generate final hparams from various inputs."""
+
+    def __init__(self, default_hparams_file, specific_hparams_file, input_flags, hparams_overrides):
+        """Initialze and load parameter dictionary with different input sources.
+
+    Args:
+      default_hparams_file: YAML storing default values of all hyperparameters.
+      specific_hparams_file: YAML file storing accelerator specific values of
+      hyperparameters to override the default values.
+      input_flags: Command line flags values for hyperparameters. [This is
+      for backward compatibility, so that users passing hyperparameters as
+      regular flags should not run into trouble].
+      hparams_overrides: A kv string representing which hyperparameters need to
+      be override from the command-line.
+
+    Raises:
+      ValueError: Raised when 'default_hparams_file' is not readable.
+    """
+        if not tf.io.gfile.exists(default_hparams_file):
+            raise ValueError(
+                'Expected a valid path to a YAML file, which represents the default '
+                'hyperparameters file. {}'.format(default_hparams_file)
+            )
+
+        self._params = {}
+        self._params_source = {}
+        self._default_hparams_file = default_hparams_file
+        self._specific_hparams_file = specific_hparams_file
+        self._input_flags = input_flags
+        self._hparams_overrides = hparams_overrides
+
+    def get_parameters(self, log_params):
+        """Returns the dictionary loaded with final values of all hyperparameters.
+
+    Args:
+      log_params: Bool to specify if the hyperparameters final value need to be
+      logged or not.
+
+    Returns:
+      Python dictionary with all the final hyperparameters.
+    """
+        self._params, self._params_source = load_from_file(
+            self._params, self._params_source, self._default_hparams_file
+        )
+        self._params, self._params_source = load_from_file(
+            self._params, self._params_source, self._specific_hparams_file
+        )
+        self._params, self._params_source = load_from_input_flags(self._params, self._params_source, self._input_flags)
+        self._params, self._params_source = load_from_hparams_overrides(
+            self._params, self._params_source, self._hparams_overrides
+        )
+
+        if log_params:
+            self.log_parameters()
+
+        return self._params
+
+    def log_parameters(self):
+        """Log the hyperparameters value along with the source of those values.
+    """
+        params_log = ''
+
+        for k in self._params:
+            params_log += k + ': \t' + str(self._params[k])
+            params_log += ' \t[' + self._params_source[k] + ']\n'
+
+        logging.info('\nModel hyperparameters [source]:\n%s', params_log)
+
+
+def load_from_file(params, params_source, file_path):
+    """Given a path to a YAML file, read the file and load it to dictionary.
+
+  Args:
+    params: Python dictionary of hyperparameters.
+    params_source: Python dictionary to record source of hyperparameters.
+    file_path: Python string containing path to file.
+
+  Returns:
+    Python dict of hyperparameters.
+  """
+    if file_path is None:
+        return params, params_source
+
+    if not tf.io.gfile.exists(file_path):
+        warnings.warn('Could not read Hyperparameter file : ' + file_path, RuntimeWarning)
+        return params, params_source
+
+    with tf.io.gfile.GFile(file_path, 'r') as f:
+        overrides = yaml.load(f)
+    for key, value in six.iteritems(overrides):
+        params[key] = value
+        params_source[key] = os.path.basename(file_path)
+
+    return params, params_source
+
+
+# TODO(amangu): Once global hyperparameter flags will be removed, we won't need
+# this function. Remove this functions after implementing this.
+def load_from_input_flags(params, params_source, input_flags):
+    """Update params dictionary with input flags.
+
+  Args:
+    params: Python dictionary of hyperparameters.
+    params_source: Python dictionary to record source of hyperparameters.
+    input_flags: All the flags with non-null value of overridden
+    hyperparameters.
+
+  Returns:
+    Python dict of hyperparameters.
+  """
+    if params is None:
+        raise ValueError('Input dictionary is empty. It is expected to be loaded with default ' 'values')
+
+    if not isinstance(params, dict):
+        raise ValueError('The base parameter set must be a Python dict, was: {}'.format(type(params)))
+
+    for key in params:
+        flag_value = input_flags.get_flag_value(key, None)
+
+        if flag_value is not None:
+            params[key] = flag_value
+            params_source[key] = 'Command-line flags'
+
+    return params, params_source
+
+
+# TODO(amangu): Add tests to verify different dtypes of params.
+def load_from_hparams_overrides(params, params_source, hparams_overrides):
+    """Given a dictionary of hyperparameters and a list of overrides, merge them.
+
+  Args:
+    params: Python dict containing a base hyperparameters set.
+    params_source: Python dictionary to record source of hyperparameters.
+    hparams_overrides: Python list of strings. This is a set of k=v overrides
+    for the hyperparameters in `params`; if `k=v1` in `params` but `k=v2` in
+    `hparams_overrides`, the second value wins and the value for `k` is `v2`.
+
+  Returns:
+    Python dict of hyperparameters.
+  """
+    if params is None:
+        raise ValueError('Input dictionary is empty. It is expected to be loaded with default ' 'values')
+
+    if not isinstance(params, dict):
+        raise ValueError('The base hyperparameters set must be a Python dict, was: {}'.format(type(params)))
+
+    if hparams_overrides is None:
+        return params, params_source
+
+    if isinstance(hparams_overrides, six.string_types):
+        hparams_overrides = [hparams_overrides]
+
+    if not isinstance(hparams_overrides, list):
+        raise ValueError(
+            'Expected that hparams_overrides would be `None`, a single string, or a'
+            ' list of strings, was: {}'.format(type(hparams_overrides))
+        )
+
+    for kv_pair in hparams_overrides:
+        if not isinstance(kv_pair, six.string_types):
+            raise ValueError(
+                'Expected that hparams_overrides would contain Python list of strings,'
+                ' but encountered an item: {}'.format(type(kv_pair))
+            )
+        key, value = kv_pair.split('=')
+        parser = type(params[key])
+        if parser is bool:
+            params[key] = value not in ('0', 'False', 'false')
+        else:
+            params[key] = parser(value)
+        params_source[key] = 'Command-line `hparams` flag'
+
+    return params, params_source
+
+
+def get_hyperparameters(default_hparams_file, specific_hparams_file, input_flags, hparams_overrides, log_params=True):
+    """Single function to get hparams for any model using different sources.
+
+  Args:
+    default_hparams_file: YAML storing default values of all hyperparameters.
+    specific_hparams_file: YAML file storing accelerator specific values of
+    hyperparameters to override the default values.
+    input_flags: Command line flags values for hyperparameters. [This is
+    for backward compatibility, so that users passing hyperparameters as
+    regular flags should not run into trouble].
+    hparams_overrides: A kv string representing which hyperparameters need to
+    be override from the command-line.
+    log_params: Bool to specify if the hyperparameters final value need to be
+    logged or not.
+
+  Returns:
+    Python dictionary with all the final hyperparameters.
+  """
+    parameter = _Hyperparameters(default_hparams_file, specific_hparams_file, input_flags, hparams_overrides)
+
+    return parameter.get_parameters(log_params)

+ 102 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/hyperparameters/mask_rcnn_params.py

@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Parameters used to build Mask-RCNN model."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from argparse import Namespace
+
+
+class _Namespace(Namespace):
+    def values(self):
+        return self.__dict__
+
+
+def default_config():
+    return _Namespace(**dict(
+        # input pre-processing parameters
+        image_size=(832, 1344),
+        augment_input_data=True,
+        gt_mask_size=112,
+
+        # dataset specific parameters
+        num_classes=91,
+        # num_classes=81,
+        skip_crowd_during_training=True,
+        use_category=True,
+
+        # Region Proposal Network
+        rpn_positive_overlap=0.7,
+        rpn_negative_overlap=0.3,
+        rpn_batch_size_per_im=256,
+        rpn_fg_fraction=0.5,
+        rpn_min_size=0.,
+
+        # Proposal layer.
+        batch_size_per_im=512,
+        fg_fraction=0.25,
+        fg_thresh=0.5,
+        bg_thresh_hi=0.5,
+        bg_thresh_lo=0.,
+
+        # Faster-RCNN heads.
+        fast_rcnn_mlp_head_dim=1024,
+        bbox_reg_weights=(10., 10., 5., 5.),
+
+        # Mask-RCNN heads.
+        include_mask=True,  # whether or not to include mask branch.   # ===== Not existing in MLPerf ===== #
+        mrcnn_resolution=28,
+
+        # training
+        train_rpn_pre_nms_topn=2000,
+        train_rpn_post_nms_topn=1000,
+        train_rpn_nms_threshold=0.7,
+
+        # evaluation
+        test_detections_per_image=100,
+        test_nms=0.5,
+        test_rpn_pre_nms_topn=1000,
+        test_rpn_post_nms_topn=1000,
+        test_rpn_nms_thresh=0.7,
+
+        # model architecture
+        min_level=2,
+        max_level=6,
+        num_scales=1,
+        aspect_ratios=[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)],
+        anchor_scale=8.0,
+
+        # localization loss
+        rpn_box_loss_weight=1.0,
+        fast_rcnn_box_loss_weight=1.0,
+        mrcnn_weight_loss_mask=1.0,
+
+        # ---------- Training configurations ----------
+
+        # Skips loading variables from the resnet checkpoint. It is used for
+        # skipping nonexistent variables from the constructed graph. The list
+        # of loaded variables is constructed from the scope 'resnetX', where 'X'
+        # is depth of the resnet model. Supports regular expression.
+        skip_checkpoint_variables='^NO_SKIP$',
+
+        # ---------- Eval configurations ----------
+        # Visualizes images and detection boxes on TensorBoard.
+        visualize_images_summary=False,
+    ))

+ 398 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/hyperparameters/params_dict.py

@@ -0,0 +1,398 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""A parameter dictionary class which supports the nest structure."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import copy
+import re
+import six
+import yaml
+
+import tensorflow as tf
+
+# regex pattern that matches on key-value pairs in a comma-separated
+# key-value pair string. It splits each k-v pair on the = sign, and
+# matches on values that are within single quotes, double quotes, single
+# values (e.g. floats, ints, etc.), and a lists within brackets.
+_PARAM_RE = re.compile(
+    r"""
+  (?P<name>[a-zA-Z][\w\.]*)    # variable name: "var" or "x"
+  \s*=\s*
+  ((?P<val>\'[^\]]*\'          # single quote
+  |
+  \"[^\]]*\"                   # double quote
+  |
+  [^,\[]*                      # single value
+  |
+  \[[^\]]*\]))                 # list of values
+  ($|,\s*)""", re.VERBOSE
+)
+
+
+class ParamsDict(object):
+    """A hyperparameter container class."""
+
+    RESERVED_ATTR = ['_locked', '_restrictions']
+
+    def __init__(self, default_params=None, restrictions=None):
+        """Instantiate a ParamsDict.
+
+    Instantiate a ParamsDict given a set of default parameters and a list of
+    restrictions. Upon initialization, it validates itself by checking all the
+    defined restrictions, and raise error if it finds inconsistency.
+
+    Args:
+      default_params: a Python dict or another ParamsDict object including the
+        default parameters to initialize.
+      restrictions: a list of strings, which define a list of restrictions to
+        ensure the consistency of different parameters internally. Each
+        restriction string is defined as a binary relation with a set of
+        operators, including {'==', '!=',  '<', '<=', '>', '>='}.
+    """
+        self._locked = False
+        self._restrictions = []
+        if restrictions:
+            self._restrictions = restrictions
+        if default_params is None:
+            default_params = {}
+        self.override(default_params, is_strict=False)
+        self.validate()
+
+    def _set(self, k, v):
+        if isinstance(v, dict):
+            self.__dict__[k] = ParamsDict(v)
+        else:
+            self.__dict__[k] = copy.deepcopy(v)
+
+    def __setattr__(self, k, v):
+        """Sets the value of the existing key.
+
+    Note that this does not allow directly defining a new key. Use the
+    `override` method with `is_strict=False` instead.
+
+    Args:
+      k: the key string.
+      v: the value to be used to set the key `k`.
+
+    Raises:
+      KeyError: if k is not defined in the ParamsDict.
+    """
+        if k not in ParamsDict.RESERVED_ATTR:
+            if k not in self.__dict__.keys():
+                raise KeyError(
+                    'The key `%{}` does not exist. '
+                    'To extend the existing keys, use '
+                    '`override` with `is_strict` = True.'.format(k)
+                )
+            if self._locked:
+                raise ValueError('The ParamsDict has been locked. ' 'No change is allowed.')
+        self._set(k, v)
+
+    def __getattr__(self, k):
+        """Gets the value of the existing key.
+
+    Args:
+      k: the key string.
+
+    Returns:
+      the value of the key.
+
+    Raises:
+      KeyError: if k is not defined in the ParamsDict.
+    """
+        if k not in self.__dict__.keys():
+            raise KeyError('The key `{}` does not exist. '.format(k))
+        return self.__dict__[k]
+
+    def override(self, override_params, is_strict=True):
+        """Override the ParamsDict with a set of given params.
+
+    Args:
+      override_params: a dict or a ParamsDict specifying the parameters to
+        be overridden.
+      is_strict: a boolean specifying whether override is strict or not. If
+        True, keys in `override_params` must be present in the ParamsDict.
+        If False, keys in `override_params` can be different from what is
+        currently defined in the ParamsDict. In this case, the ParamsDict will
+        be extended to include the new keys.
+    """
+        if self._locked:
+            raise ValueError('The ParamsDict has been locked. No change is allowed.')
+        if isinstance(override_params, ParamsDict):
+            override_params = override_params.as_dict()
+        self._override(override_params, is_strict)  # pylint: disable=protected-access
+
+    def _override(self, override_dict, is_strict=True):
+        """The implementation of `override`."""
+        for k, v in six.iteritems(override_dict):
+            if k in ParamsDict.RESERVED_ATTR:
+                raise KeyError('The key `%{}` is internally reserved. ' 'Can not be overridden.')
+            if k not in self.__dict__.keys():
+                if is_strict:
+                    raise KeyError(
+                        'The key `{}` does not exist. '
+                        'To extend the existing keys, use '
+                        '`override` with `is_strict` = False.'.format(k)
+                    )
+                else:
+                    self._set(k, v)
+            else:
+                if isinstance(v, dict):
+                    self.__dict__[k]._override(v, is_strict)  # pylint: disable=protected-access
+                elif isinstance(v, ParamsDict):
+                    self.__dict__[k]._override(v.as_dict(), is_strict)  # pylint: disable=protected-access
+                else:
+                    self.__dict__[k] = copy.deepcopy(v)
+
+    def lock(self):
+        """Makes the ParamsDict immutable."""
+        self._locked = True
+
+    def as_dict(self):
+        """Returns a dict representation of ParamsDict.
+
+    For the nested ParamsDict, a nested dict will be returned.
+    """
+        params_dict = {}
+        for k, v in six.iteritems(self.__dict__):
+            if k not in ParamsDict.RESERVED_ATTR:
+                if isinstance(v, ParamsDict):
+                    params_dict[k] = v.as_dict()
+                else:
+                    params_dict[k] = copy.deepcopy(v)
+        return params_dict
+
+    def validate(self):
+        """Validate the parameters consistency based on the restrictions.
+
+    This method validates the internal consistency using the pre-defined list of
+    restrictions. A restriction is defined as a string which specfiies a binary
+    operation. The supported binary operations are {'==', '!=', '<', '<=', '>',
+    '>='}. Note that the meaning of these operators are consistent with the
+    underlying Python immplementation. Users should make sure the define
+    restrictions on their type make sense.
+
+    For example, for a ParamsDict like the following
+    ```
+    a:
+      a1: 1
+      a2: 2
+    b:
+      bb:
+        bb1: 10
+        bb2: 20
+      ccc:
+        a1: 1
+        a3: 3
+    ```
+    one can define two restrictions like this
+    ['a.a1 == b.ccc.a1', 'a.a2 <= b.bb.bb2']
+
+    What it enforces are:
+     - a.a1 = 1 == b.ccc.a1 = 2
+     - a.a2 = 2 <= b.bb.bb2 = 20
+
+    Raises:
+      KeyError: if any of the following happens
+        (1) any of parameters in any of restrictions is not defined in
+            ParamsDict,
+        (2) any inconsistency violating the restriction is found.
+      ValueError: if the restriction defined in the string is not supported.
+    """
+
+        def _get_kv(dotted_string, params_dict):
+            tokenized_params = dotted_string.split('.')
+            v = params_dict
+            for t in tokenized_params:
+                v = v[t]
+            return tokenized_params[-1], v
+
+        def _get_kvs(tokens, params_dict):
+            if len(tokens) != 2:
+                raise ValueError('Only support binary relation in restriction.')
+            stripped_tokens = [t.strip() for t in tokens]
+            left_k, left_v = _get_kv(stripped_tokens[0], params_dict)
+            right_k, right_v = _get_kv(stripped_tokens[1], params_dict)
+            return left_k, left_v, right_k, right_v
+
+        params_dict = self.as_dict()
+        for restriction in self._restrictions:
+            if '==' in restriction:
+                tokens = restriction.split('==')
+                _, left_v, _, right_v = _get_kvs(tokens, params_dict)
+                if left_v != right_v:
+                    raise KeyError('Found inconsistncy between key `{}` and key `{}`.'.format(tokens[0], tokens[1]))
+            elif '!=' in restriction:
+                tokens = restriction.split('!=')
+                _, left_v, _, right_v = _get_kvs(tokens, params_dict)
+                if left_v == right_v:
+                    raise KeyError('Found inconsistncy between key `{}` and key `{}`.'.format(tokens[0], tokens[1]))
+            elif '<' in restriction:
+                tokens = restriction.split('<')
+                _, left_v, _, right_v = _get_kvs(tokens, params_dict)
+                if left_v >= right_v:
+                    raise KeyError('Found inconsistncy between key `{}` and key `{}`.'.format(tokens[0], tokens[1]))
+            elif '<=' in restriction:
+                tokens = restriction.split('<=')
+                _, left_v, _, right_v = _get_kvs(tokens, params_dict)
+                if left_v > right_v:
+                    raise KeyError('Found inconsistncy between key `{}` and key `{}`.'.format(tokens[0], tokens[1]))
+            elif '>' in restriction:
+                tokens = restriction.split('>')
+                _, left_v, _, right_v = _get_kvs(tokens, params_dict)
+                if left_v <= right_v:
+                    raise KeyError('Found inconsistncy between key `{}` and key `{}`.'.format(tokens[0], tokens[1]))
+            elif '>=' in restriction:
+                tokens = restriction.split('>=')
+                _, left_v, _, right_v = _get_kvs(tokens, params_dict)
+                if left_v < right_v:
+                    raise KeyError('Found inconsistncy between key `{}` and key `{}`.'.format(tokens[0], tokens[1]))
+            else:
+                raise ValueError('Unsupported relation in restriction.')
+
+
+def read_yaml_to_params_dict(file_path):
+    """Reads a YAML file to a ParamsDict."""
+    with tf.io.gfile.GFile(file_path, 'r') as f:
+        params_dict = yaml.load(f)
+        return ParamsDict(params_dict)
+
+
+def save_params_dict_to_yaml(params, file_path):
+    """Saves the input ParamsDict to a YAML file."""
+    with tf.io.gfile.GFile(file_path, 'w') as f:
+
+        def _my_list_rep(dumper, data):
+            # u'tag:yaml.org,2002:seq' is the YAML internal tag for sequence.
+            return dumper.represent_sequence(u'tag:yaml.org,2002:seq', data, flow_style=True)
+
+        yaml.add_representer(list, _my_list_rep)
+        yaml.dump(params.as_dict(), f, default_flow_style=False)
+
+
+def nested_csv_str_to_json_str(csv_str):
+    """Converts a nested (using '.') comma-separated k=v string to a JSON string.
+
+  Converts a comma-separated string of key/value pairs that supports
+  nesting of keys to a JSON string. Nesting is implemented using
+  '.' between levels for a given key.
+
+  Spacing between commas and = is supported (e.g. there is no difference between
+  "a=1,b=2", "a = 1, b = 2", or "a=1, b=2") but there should be no spaces before
+  keys or after values (e.g. " a=1,b=2" and "a=1,b=2 " are not supported).
+
+  Note that this will only support values supported by CSV, meaning
+  values such as nested lists (e.g. "a=[[1,2,3],[4,5,6]]") are not
+  supported. Strings are supported as well, e.g. "a='hello'".
+
+  An example conversion would be:
+
+  "a=1, b=2, c.a=2, c.b=3, d.a.a=5"
+
+  to
+
+  "{ a: 1, b : 2, c: {a : 2, b : 3}, d: {a: {a : 5}}}"
+
+  Args:
+    csv_str: the comma separated string.
+
+  Returns:
+    the converted JSON string.
+
+  Raises:
+    ValueError: If csv_str is not in a comma separated string or
+      if the string is formatted incorrectly.
+  """
+    if not csv_str:
+        return ''
+
+    formatted_entries = []
+    nested_map = collections.defaultdict(list)
+    pos = 0
+    while pos < len(csv_str):
+        m = _PARAM_RE.match(csv_str, pos)
+        if not m:
+            raise ValueError('Malformed hyperparameter value while parsing ' 'CSV string: %s' % csv_str[pos:])
+        pos = m.end()
+        # Parse the values.
+        m_dict = m.groupdict()
+        name = m_dict['name']
+        v = m_dict['val']
+
+        name_nested = name.split('.')
+        if len(name_nested) > 1:
+            grouping = name_nested[0]
+            value = '.'.join(name_nested[1:]) + '=' + v
+            nested_map[grouping].append(value)
+        else:
+            formatted_entries.append('%s : %s' % (name, v))
+
+    for grouping, value in nested_map.items():
+        value = ','.join(value)
+        value = nested_csv_str_to_json_str(value)
+        formatted_entries.append('%s : %s' % (grouping, value))
+    return '{' + ', '.join(formatted_entries) + '}'
+
+
+def override_params_dict(params, dict_or_string_or_yaml_file, is_strict):
+    """Override a given ParamsDict using a dict, JSON/YAML/CSV string or YAML file.
+
+  The logic of the function is outlined below:
+  1. Test that the input is a dict. If not, proceed to 2.
+  2. Tests that the input is a string. If not, raise unknown ValueError
+  2.1. Test if the string is in a CSV format. If so, parse.
+  If not, proceed to 2.2.
+  2.2. Try loading the string as a YAML/JSON. If successful, parse to
+  dict and use it to override. If not, proceed to 2.3.
+  2.3. Try using the string as a file path and load the YAML file.
+
+  Args:
+    params: a ParamsDict object to be overridden.
+    dict_or_string_or_yaml_file: a Python dict, JSON/YAML/CSV string or
+      path to a YAML file specifying the parameters to be overridden.
+    is_strict: a boolean specifying whether override is strict or not.
+
+  Returns:
+    params: the overridden ParamsDict object.
+
+  Raises:
+    ValueError: if failed to override the parameters.
+  """
+    if not dict_or_string_or_yaml_file:
+        return params
+    if isinstance(dict_or_string_or_yaml_file, dict):
+        params.override(dict_or_string_or_yaml_file, is_strict)
+    elif isinstance(dict_or_string_or_yaml_file, six.string_types):
+        try:
+            dict_or_string_or_yaml_file = (nested_csv_str_to_json_str(dict_or_string_or_yaml_file))
+        except ValueError:
+            pass
+        params_dict = yaml.load(dict_or_string_or_yaml_file)
+        if isinstance(params_dict, dict):
+            params.override(params_dict, is_strict)
+        else:
+            with tf.io.gfile.GFile(dict_or_string_or_yaml_file) as f:
+                params.override(yaml.load(f), is_strict)
+    else:
+        raise ValueError('Unknown input type to parse.')
+    return params

+ 89 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/hyperparameters/params_io.py

@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#============================================================================
+
+"""Utils to handle parameters IO."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import six
+import yaml
+
+import tensorflow as tf
+
+
+def save_hparams_to_yaml(hparams, file_path):
+    with tf.io.gfile.GFile(file_path, 'w') as f:
+        try:
+            hparams_val = hparams.values()
+        except AttributeError:
+            hparams_val = hparams.__dict__
+        yaml.dump(hparams_val, f)
+
+
+def override_hparams(hparams, dict_or_string_or_yaml_file):
+    """Override a given hparams using a dict or a string or a JSON file.
+
+  Args:
+    hparams: a HParams object to be overridden.
+    dict_or_string_or_yaml_file: a Python dict, or a comma-separated string,
+      or a path to a YAML file specifying the parameters to be overridden.
+
+  Returns:
+    hparams: the overridden HParams object.
+
+  Raises:
+    ValueError: if failed to override the parameters.
+  """
+    if not dict_or_string_or_yaml_file:
+        return hparams
+
+    if isinstance(dict_or_string_or_yaml_file, dict):
+
+        for key, val in dict_or_string_or_yaml_file.items():
+
+            if key not in hparams:
+                try:  # TF 1.x
+                    hparams.add_hparam(key, val)
+                except AttributeError:  # TF 2.x
+                    try:  # Dict
+                        hparams[key] = val
+                    except TypeError:  # Namespace
+                        setattr(hparams, key, val)
+            else:
+                raise ValueError("Parameter `%s` is already defined" % key)
+
+        # hparams.override_from_dict(dict_or_string_or_yaml_file)
+
+    elif isinstance(dict_or_string_or_yaml_file, six.string_types):
+        try:
+            hparams.parse(dict_or_string_or_yaml_file)
+
+        except ValueError as parse_error:
+            try:
+                with tf.io.gfile.GFile(dict_or_string_or_yaml_file) as f:
+                    hparams.override_from_dict(yaml.load(f))
+
+            except Exception as read_error:
+                parse_message = ('Failed to parse config string: %s\n' % parse_error.message)
+                read_message = ('Failed to parse yaml file provided. %s' % read_error.message)
+                raise ValueError(parse_message + read_message)
+
+    else:
+        raise ValueError('Unknown input type to parse.')
+    return hparams

+ 520 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/mask_rcnn_model.py

@@ -0,0 +1,520 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Model definition for the Mask-RCNN Model.
+
+Defines model_fn of Mask-RCNN for TF Estimator. The model_fn includes Mask-RCNN
+model architecture, loss function, learning rate schedule, and evaluation
+procedure.
+
+"""
+
+import itertools
+
+import tensorflow as tf
+
+from mask_rcnn import anchors
+
+from mask_rcnn.models import fpn
+from mask_rcnn.models import heads
+from mask_rcnn.models import resnet
+
+from mask_rcnn.training import losses, learning_rates
+
+from mask_rcnn.ops import postprocess_ops
+from mask_rcnn.ops import roi_ops
+from mask_rcnn.ops import spatial_transform_ops
+from mask_rcnn.ops import training_ops
+
+from mask_rcnn.utils.logging_formatter import logging
+
+from mask_rcnn.utils.distributed_utils import MPI_is_distributed
+from mask_rcnn.utils.distributed_utils import MPI_local_rank
+
+from mask_rcnn.utils.meters import StandardMeter
+from mask_rcnn.utils.metric_tracking import register_metric
+
+from mask_rcnn.utils.lazy_imports import LazyImport
+hvd = LazyImport("horovod.tensorflow")
+
+MODELS = dict()
+
+
+def create_optimizer(learning_rate, params):
+    """Creates optimized based on the specified flags."""
+
+    optimizer = tf.compat.v1.train.MomentumOptimizer(learning_rate, momentum=params['momentum'])
+
+    if MPI_is_distributed():
+        optimizer = hvd.DistributedOptimizer(
+            optimizer,
+            name=None,
+            device_dense='/gpu:0',
+            device_sparse='',
+            # compression=hvd.Compression.fp16,
+            compression=hvd.Compression.none,
+            sparse_as_dense=False
+        )
+
+    if params["use_amp"]:
+        loss_scale = tf.train.experimental.DynamicLossScale(
+            initial_loss_scale=(2 ** 15),
+            increment_period=2000,
+            multiplier=2.0
+        )
+        optimizer = tf.compat.v1.train.experimental.MixedPrecisionLossScaleOptimizer(optimizer, loss_scale=loss_scale)
+
+    return optimizer
+
+
+def compute_model_statistics(batch_size, is_training=True):
+    """Compute number of parameters and FLOPS."""
+    options = tf.compat.v1.profiler.ProfileOptionBuilder.float_operation()
+    options['output'] = 'none'
+
+    from tensorflow.python.keras.backend import get_graph
+    flops = tf.compat.v1.profiler.profile(get_graph(), options=options).total_float_ops
+    flops_per_image = flops / batch_size
+
+    logging.info('[%s Compute Statistics] %.1f GFLOPS/image' % (
+        "Training" if is_training else "Inference",
+        flops_per_image/1e9
+    ))
+
+
+def build_model_graph(features, labels, is_training, params):
+    """Builds the forward model graph."""
+    model_outputs = {}
+    is_gpu_inference = not is_training and params['use_batched_nms']
+
+    batch_size, image_height, image_width, _ = features['images'].get_shape().as_list()
+
+    if 'source_ids' not in features:
+        features['source_ids'] = -1 * tf.ones([batch_size], dtype=tf.float32)
+
+    all_anchors = anchors.Anchors(params['min_level'], params['max_level'],
+                                  params['num_scales'], params['aspect_ratios'],
+                                  params['anchor_scale'],
+                                  (image_height, image_width))
+
+    MODELS["backbone"] = resnet.Resnet_Model(
+        "resnet50",
+        data_format='channels_last',
+        trainable=is_training,
+        finetune_bn=params['finetune_bn']
+    )
+
+    backbone_feats = MODELS["backbone"](
+        features['images'],
+        training=is_training,
+    )
+
+    MODELS["FPN"] = fpn.FPNNetwork(params['min_level'], params['max_level'], trainable=is_training)
+    fpn_feats = MODELS["FPN"](backbone_feats, training=is_training)
+
+    model_outputs.update({'fpn_features': fpn_feats})
+
+    def rpn_head_fn(features, min_level=2, max_level=6, num_anchors=3):
+        """Region Proposal Network (RPN) for Mask-RCNN."""
+        scores_outputs = dict()
+        box_outputs = dict()
+
+        MODELS["RPN_Heads"] = heads.RPN_Head_Model(name="rpn_head", num_anchors=num_anchors, trainable=is_training)
+
+        for level in range(min_level, max_level + 1):
+            scores_outputs[level], box_outputs[level] = MODELS["RPN_Heads"](features[level], training=is_training)
+
+        return scores_outputs, box_outputs
+
+    rpn_score_outputs, rpn_box_outputs = rpn_head_fn(
+        features=fpn_feats,
+        min_level=params['min_level'],
+        max_level=params['max_level'],
+        num_anchors=len(params['aspect_ratios'] * params['num_scales'])
+    )
+
+    if is_training:
+        rpn_pre_nms_topn = params['train_rpn_pre_nms_topn']
+        rpn_post_nms_topn = params['train_rpn_post_nms_topn']
+        rpn_nms_threshold = params['train_rpn_nms_threshold']
+
+    else:
+        rpn_pre_nms_topn = params['test_rpn_pre_nms_topn']
+        rpn_post_nms_topn = params['test_rpn_post_nms_topn']
+        rpn_nms_threshold = params['test_rpn_nms_thresh']
+
+    if params['use_custom_box_proposals_op']:
+        rpn_box_scores, rpn_box_rois = roi_ops.custom_multilevel_propose_rois(
+            scores_outputs=rpn_score_outputs,
+            box_outputs=rpn_box_outputs,
+            all_anchors=all_anchors,
+            image_info=features['image_info'],
+            rpn_pre_nms_topn=rpn_pre_nms_topn,
+            rpn_post_nms_topn=rpn_post_nms_topn,
+            rpn_nms_threshold=rpn_nms_threshold,
+            rpn_min_size=params['rpn_min_size']
+        )
+
+    else:
+        rpn_box_scores, rpn_box_rois = roi_ops.multilevel_propose_rois(
+            scores_outputs=rpn_score_outputs,
+            box_outputs=rpn_box_outputs,
+            all_anchors=all_anchors,
+            image_info=features['image_info'],
+            rpn_pre_nms_topn=rpn_pre_nms_topn,
+            rpn_post_nms_topn=rpn_post_nms_topn,
+            rpn_nms_threshold=rpn_nms_threshold,
+            rpn_min_size=params['rpn_min_size'],
+            bbox_reg_weights=None,
+            use_batched_nms=params['use_batched_nms']
+        )
+
+
+    rpn_box_rois = tf.cast(rpn_box_rois, dtype=tf.float32)
+
+    if is_training:
+        rpn_box_rois = tf.stop_gradient(rpn_box_rois)
+        rpn_box_scores = tf.stop_gradient(rpn_box_scores)  # TODO Jonathan: Unused => Shall keep ?
+
+        # Sampling
+        box_targets, class_targets, rpn_box_rois, proposal_to_label_map = training_ops.proposal_label_op(
+            rpn_box_rois,
+            labels['gt_boxes'],
+            labels['gt_classes'],
+            batch_size_per_im=params['batch_size_per_im'],
+            fg_fraction=params['fg_fraction'],
+            fg_thresh=params['fg_thresh'],
+            bg_thresh_hi=params['bg_thresh_hi'],
+            bg_thresh_lo=params['bg_thresh_lo']
+        )
+
+    # Performs multi-level RoIAlign.
+    box_roi_features = spatial_transform_ops.multilevel_crop_and_resize(
+        features=fpn_feats,
+        boxes=rpn_box_rois,
+        output_size=7,
+        is_gpu_inference=is_gpu_inference
+    )
+
+    MODELS["Box_Head"] = heads.Box_Head_Model(
+        num_classes=params['num_classes'],
+        mlp_head_dim=params['fast_rcnn_mlp_head_dim'],
+        trainable=is_training
+    )
+
+    class_outputs, box_outputs, _ = MODELS["Box_Head"](inputs=box_roi_features)
+
+    if not is_training:
+        if params['use_batched_nms']:
+            generate_detections_fn = postprocess_ops.generate_detections_gpu
+
+        else:
+            generate_detections_fn = postprocess_ops.generate_detections_tpu
+
+        detections = generate_detections_fn(
+            class_outputs=class_outputs,
+            box_outputs=box_outputs,
+            anchor_boxes=rpn_box_rois,
+            image_info=features['image_info'],
+            pre_nms_num_detections=params['test_rpn_post_nms_topn'],
+            post_nms_num_detections=params['test_detections_per_image'],
+            nms_threshold=params['test_nms'],
+            bbox_reg_weights=params['bbox_reg_weights']
+        )
+
+        model_outputs.update({
+            'num_detections': detections[0],
+            'detection_boxes': detections[1],
+            'detection_classes': detections[2],
+            'detection_scores': detections[3],
+        })
+
+    else:  # is training
+        encoded_box_targets = training_ops.encode_box_targets(
+            boxes=rpn_box_rois,
+            gt_boxes=box_targets,
+            gt_labels=class_targets,
+            bbox_reg_weights=params['bbox_reg_weights']
+        )
+
+        model_outputs.update({
+            'rpn_score_outputs': rpn_score_outputs,
+            'rpn_box_outputs': rpn_box_outputs,
+            'class_outputs': class_outputs,
+            'box_outputs': box_outputs,
+            'class_targets': class_targets,
+            'box_targets': encoded_box_targets,
+            'box_rois': rpn_box_rois,
+        })
+
+    # Faster-RCNN mode.
+    if not params['include_mask']:
+        return model_outputs
+
+    # Mask sampling
+    if not is_training:
+        selected_box_rois = model_outputs['detection_boxes']
+        class_indices = model_outputs['detection_classes']
+
+        # If using GPU for inference, delay the cast until when Gather ops show up
+        # since GPU inference supports float point better.
+        # TODO(laigd): revisit this when newer versions of GPU libraries is
+        # released.
+        if not params['use_batched_nms']:
+            class_indices = tf.cast(class_indices, dtype=tf.int32)
+
+    else:
+        selected_class_targets, selected_box_targets, \
+        selected_box_rois, proposal_to_label_map = training_ops.select_fg_for_masks(
+            class_targets=class_targets,
+            box_targets=box_targets,
+            boxes=rpn_box_rois,
+            proposal_to_label_map=proposal_to_label_map,
+            max_num_fg=int(params['batch_size_per_im'] * params['fg_fraction'])
+        )
+
+        class_indices = tf.cast(selected_class_targets, dtype=tf.int32)
+
+    mask_roi_features = spatial_transform_ops.multilevel_crop_and_resize(
+        features=fpn_feats,
+        boxes=selected_box_rois,
+        output_size=14,
+        is_gpu_inference=is_gpu_inference
+    )
+
+    MODELS["Mask_Head"] = heads.Mask_Head_Model(
+        class_indices,
+        num_classes=params['num_classes'],
+        mrcnn_resolution=params['mrcnn_resolution'],
+        is_gpu_inference=is_gpu_inference,
+        trainable=is_training,
+        name="mask_head"
+    )
+
+    mask_outputs = MODELS["Mask_Head"](inputs=mask_roi_features)
+
+    if MPI_local_rank() == 0:
+        # Print #FLOPs in model.
+        compute_model_statistics(batch_size, is_training=is_training)
+
+    if is_training:
+        mask_targets = training_ops.get_mask_targets(
+
+            fg_boxes=selected_box_rois,
+            fg_proposal_to_label_map=proposal_to_label_map,
+            fg_box_targets=selected_box_targets,
+            mask_gt_labels=labels['cropped_gt_masks'],
+            output_size=params['mrcnn_resolution']
+        )
+
+        model_outputs.update({
+            'mask_outputs': mask_outputs,
+            'mask_targets': mask_targets,
+            'selected_class_targets': selected_class_targets,
+        })
+
+    else:
+        model_outputs.update({
+            'detection_masks': tf.nn.sigmoid(mask_outputs),
+        })
+
+    return model_outputs
+
+
+def _model_fn(features, labels, mode, params):
+    """Model defination for the Mask-RCNN model based on ResNet.
+
+    Args:
+    features: the input image tensor and auxiliary information, such as
+      `image_info` and `source_ids`. The image tensor has a shape of
+      [batch_size, height, width, 3]. The height and width are fixed and equal.
+    labels: the input labels in a dictionary. The labels include score targets
+      and box targets which are dense label maps. The labels are generated from
+      get_input_fn function in data/dataloader.py
+    mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT.
+    params: the dictionary defines hyperparameters of model. The default
+      settings are in default_hparams function in this file.
+    Returns:
+    tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction.
+    """
+
+    # Set up training loss and learning rate.
+    global_step = tf.compat.v1.train.get_or_create_global_step()
+
+    if mode == tf.estimator.ModeKeys.PREDICT:
+
+        if params['include_groundtruth_in_features'] and 'labels' in features:
+            # In include groundtruth for eval.
+            labels = features['labels']
+
+        else:
+            labels = None
+
+        if 'features' in features:
+            features = features['features']
+            # Otherwise, it is in export mode, the features is past in directly.
+
+    model_outputs = build_model_graph(features, labels, mode == tf.estimator.ModeKeys.TRAIN, params)
+
+    model_outputs.update({
+        'source_id': features['source_ids'],
+        'image_info': features['image_info'],
+    })
+
+    if mode == tf.estimator.ModeKeys.PREDICT and 'orig_images' in features:
+        model_outputs['orig_images'] = features['orig_images']
+
+    # First check if it is in PREDICT mode or EVAL mode to fill out predictions.
+    # Predictions are used during the eval step to generate metrics.
+    if mode in [tf.estimator.ModeKeys.PREDICT, tf.estimator.ModeKeys.EVAL]:
+        predictions = {}
+
+        try:
+            model_outputs['orig_images'] = features['orig_images']
+        except KeyError:
+            pass
+
+        if labels and params['include_groundtruth_in_features']:
+            # Labels can only be embedded in predictions. The prediction cannot output
+            # dictionary as a value.
+            predictions.update(labels)
+
+        model_outputs.pop('fpn_features', None)
+        predictions.update(model_outputs)
+
+        if mode == tf.estimator.ModeKeys.PREDICT:
+            # If we are doing PREDICT, we can return here.
+            return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
+
+    # score_loss and box_loss are for logging. only total_loss is optimized.
+    total_rpn_loss, rpn_score_loss, rpn_box_loss = losses.rpn_loss(
+        score_outputs=model_outputs['rpn_score_outputs'],
+        box_outputs=model_outputs['rpn_box_outputs'],
+        labels=labels,
+        params=params
+    )
+
+    total_fast_rcnn_loss, fast_rcnn_class_loss, fast_rcnn_box_loss = losses.fast_rcnn_loss(
+        class_outputs=model_outputs['class_outputs'],
+        box_outputs=model_outputs['box_outputs'],
+        class_targets=model_outputs['class_targets'],
+        box_targets=model_outputs['box_targets'],
+        params=params
+    )
+
+    # Only training has the mask loss.
+    # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/modeling/model_builder.py
+    if mode == tf.estimator.ModeKeys.TRAIN and params['include_mask']:
+        mask_loss = losses.mask_rcnn_loss(
+            mask_outputs=model_outputs['mask_outputs'],
+            mask_targets=model_outputs['mask_targets'],
+            select_class_targets=model_outputs['selected_class_targets'],
+            params=params
+        )
+
+    else:
+        mask_loss = 0.
+
+    trainable_variables = list(itertools.chain.from_iterable([model.trainable_variables for model in MODELS.values()]))
+
+    l2_regularization_loss = params['l2_weight_decay'] * tf.add_n([
+        tf.nn.l2_loss(v)
+        for v in trainable_variables
+        if not any([pattern in v.name for pattern in ["batch_normalization", "bias", "beta"]])
+    ])
+
+    total_loss = total_rpn_loss + total_fast_rcnn_loss + mask_loss + l2_regularization_loss
+
+    if mode == tf.estimator.ModeKeys.EVAL:
+        # Predictions can only contain a dict of tensors, not a dict of dict of
+        # tensors. These outputs are not used for eval purposes.
+        del predictions['rpn_score_outputs']
+        del predictions['rpn_box_outputs']
+
+        return tf.estimator.EstimatorSpec(
+            mode=mode,
+            predictions=predictions,
+            loss=total_loss
+        )
+
+    if mode == tf.estimator.ModeKeys.TRAIN:
+
+        learning_rate = learning_rates.step_learning_rate_with_linear_warmup(
+            global_step=global_step,
+            init_learning_rate=params['init_learning_rate'],
+            warmup_learning_rate=params['warmup_learning_rate'],
+            warmup_steps=params['warmup_steps'],
+            learning_rate_levels=params['learning_rate_levels'],
+            learning_rate_steps=params['learning_rate_steps']
+        )
+
+        optimizer = create_optimizer(learning_rate, params)
+
+        grads_and_vars = optimizer.compute_gradients(total_loss, trainable_variables, colocate_gradients_with_ops=True)
+
+        gradients, variables = zip(*grads_and_vars)
+        grads_and_vars = []
+
+        # Special treatment for biases (beta is named as bias in reference model)
+        # Reference: https://github.com/ddkang/Detectron/blob/80f3295308/lib/modeling/optimizer.py#L109
+        for grad, var in zip(gradients, variables):
+
+            if grad is not None and any([pattern in var.name for pattern in ["bias", "beta"]]):
+                grad = 2.0 * grad
+
+            grads_and_vars.append((grad, var))
+
+        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
+
+    else:
+        train_op = None
+        learning_rate = None
+
+    replica_id = tf.distribute.get_replica_context().replica_id_in_sync_group
+
+    if not isinstance(replica_id, tf.Tensor) or tf.get_static_value(replica_id) == 0:
+
+        register_metric(name="L2 loss", tensor=l2_regularization_loss, aggregator=StandardMeter())
+        register_metric(name="Mask loss", tensor=mask_loss, aggregator=StandardMeter())
+        register_metric(name="Total loss", tensor=total_loss, aggregator=StandardMeter())
+
+        register_metric(name="RPN box loss", tensor=rpn_box_loss, aggregator=StandardMeter())
+        register_metric(name="RPN score loss", tensor=rpn_score_loss, aggregator=StandardMeter())
+        register_metric(name="RPN total loss", tensor=total_rpn_loss, aggregator=StandardMeter())
+
+        register_metric(name="FastRCNN class loss", tensor=fast_rcnn_class_loss, aggregator=StandardMeter())
+        register_metric(name="FastRCNN box loss", tensor=fast_rcnn_box_loss, aggregator=StandardMeter())
+        register_metric(name="FastRCNN total loss", tensor=total_fast_rcnn_loss, aggregator=StandardMeter())
+
+        register_metric(name="Learning rate", tensor=learning_rate, aggregator=StandardMeter())
+        pass
+    return tf.estimator.EstimatorSpec(
+        mode=mode,
+        loss=total_loss,
+        train_op=train_op,
+    )
+
+
+def mask_rcnn_model_fn(features, labels, mode, params):
+    """Mask-RCNN model."""
+
+    return _model_fn(
+        features,
+        labels,
+        mode,
+        params
+    )

+ 0 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/models/__init__.py


+ 138 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/models/fpn.py

@@ -0,0 +1,138 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Feature Pyramid Network.
+
+Feature Pyramid Networks were proposed in:
+[1] Tsung-Yi Lin, Piotr Dollar, Ross Girshick, Kaiming He, Bharath Hariharan,
+    , and Serge Belongie
+    Feature Pyramid Networks for Object Detection. CVPR 2017.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+from mask_rcnn.ops import spatial_transform_ops
+
+
+class FPNNetwork(tf.keras.models.Model):
+    def __init__(self, min_level=3, max_level=7, filters=256, trainable=True):
+        """Generates multiple scale feature pyramid (FPN).
+
+        Args:
+        feats_bottom_up: a dictionary of tensor with level as keys and bottom up
+          feature tensors as values. They are the features to generate FPN features.
+        min_level: the minimum level number to generate FPN features.
+        max_level: the maximum level number to generate FPN features.
+        filters: the FPN filter size.
+
+        Returns:
+        feats: a dictionary of tensor with level as keys and the generated FPN
+          features as values.
+        """
+        super(FPNNetwork, self).__init__(name="fpn", trainable=trainable)
+
+        self._local_layers = dict()
+
+        self._min_level = min_level
+        self._max_level = max_level
+
+        self._filters = filters
+
+        self._backbone_max_level = 5  # max(feats_bottom_up.keys())
+        self._upsample_max_level = (
+            self._backbone_max_level if self._max_level > self._backbone_max_level else self._max_level
+        )
+
+        self._local_layers["stage1"] = dict()
+        for level in range(self._min_level, self._upsample_max_level + 1):
+            self._local_layers["stage1"][level] = tf.keras.layers.Conv2D(
+                filters=self._filters,
+                kernel_size=(1, 1),
+                padding='same',
+                name='l%d' % level,
+                trainable=trainable
+            )
+
+        self._local_layers["stage2"] = dict()
+        # add post-hoc 3x3 convolution kernel
+        for level in range(self._min_level, self._upsample_max_level + 1):
+            self._local_layers["stage2"][level] = tf.keras.layers.Conv2D(
+                filters=self._filters,
+                strides=(1, 1),
+                kernel_size=(3, 3),
+                padding='same',
+                name='post_hoc_d%d' % level,
+                trainable=trainable
+            )
+
+        self._local_layers["stage3_1"] = dict()
+        self._local_layers["stage3_2"] = dict()
+
+        if self._max_level == self._upsample_max_level + 1:
+            self._local_layers["stage3_1"] = tf.keras.layers.MaxPool2D(
+                pool_size=1,
+                strides=2,
+                padding='valid',
+                name='p%d' % self._max_level,
+                trainable=trainable
+            )
+
+        else:
+            for level in range(self._upsample_max_level + 1, self._max_level + 1):
+                self._local_layers["stage3_2"][level] = tf.keras.layers.Conv2D(
+                    filters=self._filters,
+                    strides=(2, 2),
+                    kernel_size=(3, 3),
+                    padding='same',
+                    name='p%d' % level,
+                    trainable=trainable
+                )
+
+    def call(self, inputs, *args, **kwargs):
+
+        feats_bottom_up = inputs
+
+        # lateral connections
+        feats_lateral = {}
+
+        for level in range(self._min_level, self._upsample_max_level + 1):
+            feats_lateral[level] = self._local_layers["stage1"][level](feats_bottom_up[level])
+
+        # add top-down path
+        feats = {self._upsample_max_level: feats_lateral[self._upsample_max_level]}
+
+        for level in range(self._upsample_max_level - 1, self._min_level - 1, -1):
+            feats[level] = spatial_transform_ops.nearest_upsampling(
+                feats[level + 1], 2
+            ) + feats_lateral[level]
+
+        # add post-hoc 3x3 convolution kernel
+        for level in range(self._min_level, self._upsample_max_level + 1):
+            feats[level] = self._local_layers["stage2"][level](feats[level])
+
+        if self._max_level == self._upsample_max_level + 1:
+            feats[self._max_level] = self._local_layers["stage3_1"](feats[self._max_level - 1])
+
+        else:
+            for level in range(self._upsample_max_level + 1, self._max_level + 1):
+                feats[level] = self._local_layers["stage3_2"][level](feats[level - 1])
+
+        return feats

+ 322 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/models/heads.py

@@ -0,0 +1,322 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functions to build various prediction heads in Mask-RCNN."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+__all__ = ["RPN_Head_Model", "Box_Head_Model", "Mask_Head_Model"]
+
+
+class RPN_Head_Model(tf.keras.models.Model):
+
+        def __init__(self, name, num_anchors, trainable, *args, **kwargs):
+            super(RPN_Head_Model, self).__init__(name=name, trainable=trainable, *args, **kwargs)
+            """Shared RPN heads."""
+            self._local_layers = dict()
+
+            # TODO(chiachenc): check the channel depth of the first convolution.
+            self._local_layers["conv1"] = tf.keras.layers.Conv2D(
+                256,
+                kernel_size=(3, 3),
+                strides=(1, 1),
+                activation=tf.nn.relu,
+                bias_initializer=tf.keras.initializers.Zeros(),
+                kernel_initializer=tf.random_normal_initializer(stddev=0.01),
+                padding='same',
+                trainable=trainable,
+                name='rpn'
+            )
+
+            # Proposal classification scores
+            # scores = tf.keras.layers.Conv2D(
+            self._local_layers["conv2"] = tf.keras.layers.Conv2D(
+                num_anchors,
+                kernel_size=(1, 1),
+                strides=(1, 1),
+                bias_initializer=tf.keras.initializers.Zeros(),
+                kernel_initializer=tf.random_normal_initializer(stddev=0.01),
+                padding='valid',
+                trainable=trainable,
+                name='rpn-class'
+            )
+
+            # Proposal bbox regression deltas
+            # bboxes = tf.keras.layers.Conv2D(
+            self._local_layers["conv3"] = tf.keras.layers.Conv2D(
+                4 * num_anchors,
+                kernel_size=(1, 1),
+                strides=(1, 1),
+                bias_initializer=tf.keras.initializers.Zeros(),
+                kernel_initializer=tf.random_normal_initializer(stddev=0.01),
+                padding='valid',
+                trainable=trainable,
+                name='rpn-box'
+            )
+
+        def call(self, inputs, *args, **kwargs):
+            net = self._local_layers["conv1"](inputs)
+            scores = self._local_layers["conv2"](net)
+            bboxes = self._local_layers["conv3"](net)
+
+            return scores, bboxes
+
+
+class Box_Head_Model(tf.keras.Model):
+
+    def __init__(self, num_classes=91, mlp_head_dim=1024, name="box_head", trainable=True, *args, **kwargs):
+        """Box and class branches for the Mask-RCNN model.
+
+        Args:
+        roi_features: A ROI feature tensor of shape
+          [batch_size, num_rois, height_l, width_l, num_filters].
+        num_classes: a integer for the number of classes.
+        mlp_head_dim: a integer that is the hidden dimension in the fully-connected
+          layers.
+        """
+        super(Box_Head_Model, self).__init__(name=name, trainable=trainable, *args, **kwargs)
+
+        self._num_classes = num_classes
+        self._mlp_head_dim = mlp_head_dim
+
+        self._dense_fc6 = tf.keras.layers.Dense(
+            units=mlp_head_dim,
+            activation=tf.nn.relu,
+            trainable=trainable,
+            name='fc6'
+        )
+
+        self._dense_fc7 = tf.keras.layers.Dense(
+            units=mlp_head_dim,
+            activation=tf.nn.relu,
+            trainable=trainable,
+            name='fc7'
+        )
+
+        self._dense_class = tf.keras.layers.Dense(
+            num_classes,
+            kernel_initializer=tf.random_normal_initializer(stddev=0.01),
+            bias_initializer=tf.keras.initializers.Zeros(),
+            trainable=trainable,
+            name='class-predict'
+        )
+
+        self._dense_box = tf.keras.layers.Dense(
+            num_classes * 4,
+            kernel_initializer=tf.random_normal_initializer(stddev=0.001),
+            bias_initializer=tf.keras.initializers.Zeros(),
+            trainable=trainable,
+            name='box-predict'
+        )
+
+    def call(self, inputs, **kwargs):
+        """
+        Returns:
+        class_outputs: a tensor with a shape of
+          [batch_size, num_rois, num_classes], representing the class predictions.
+        box_outputs: a tensor with a shape of
+          [batch_size, num_rois, num_classes * 4], representing the box predictions.
+        box_features: a tensor with a shape of
+          [batch_size, num_rois, mlp_head_dim], representing the box features.
+        """
+
+        # reshape inputs before FC.
+        batch_size, num_rois, height, width, filters = inputs.get_shape().as_list()
+
+        net = tf.reshape(inputs, [batch_size, num_rois, height * width * filters])
+
+        net = self._dense_fc6(net)
+
+        box_features = self._dense_fc7(net)
+
+        class_outputs = self._dense_class(box_features)
+
+        box_outputs = self._dense_box(box_features)
+
+        return class_outputs, box_outputs, box_features
+
+
+class Mask_Head_Model(tf.keras.Model):
+
+    @staticmethod
+    def _get_stddev_equivalent_to_msra_fill(kernel_size, fan_out):
+        """Returns the stddev of random normal initialization as MSRAFill."""
+        # Reference: https://github.com/pytorch/pytorch/blob/master/caffe2/operators/filler_op.h#L445-L463
+        # For example, kernel size is (3, 3) and fan out is 256, stddev is 0.029.
+        # stddev = (2/(3*3*256))^0.5 = 0.029
+        return (2 / (kernel_size[0] * kernel_size[1] * fan_out)) ** 0.5
+
+    def __init__(
+            self,
+            class_indices,
+            num_classes=91,
+            mrcnn_resolution=28,
+            is_gpu_inference=False,
+            name="mask_head",
+            trainable=True,
+            *args,
+            **kwargs
+    ):
+        """Mask branch for the Mask-RCNN model.
+
+        Args:
+        roi_features: A ROI feature tensor of shape
+          [batch_size, num_rois, height_l, width_l, num_filters].
+        class_indices: a Tensor of shape [batch_size, num_rois], indicating
+          which class the ROI is.
+        num_classes: an integer for the number of classes.
+        mrcnn_resolution: an integer that is the resolution of masks.
+        is_gpu_inference: whether to build the model for GPU inference.
+        """
+        super(Mask_Head_Model, self).__init__(name=name, trainable=trainable, *args, **kwargs)
+
+        self._class_indices = class_indices
+        self._num_classes = num_classes
+        self._mrcnn_resolution = mrcnn_resolution
+        self._is_gpu_inference = is_gpu_inference
+
+        self._conv_stage1 = list()
+        kernel_size = (3, 3)
+        fan_out = 256
+
+        init_stddev = Mask_Head_Model._get_stddev_equivalent_to_msra_fill(kernel_size, fan_out)
+
+        for conv_id in range(4):
+            self._conv_stage1.append(tf.keras.layers.Conv2D(
+                fan_out,
+                kernel_size=kernel_size,
+                strides=(1, 1),
+                padding='same',
+                dilation_rate=(1, 1),
+                activation=tf.nn.relu,
+                kernel_initializer=tf.random_normal_initializer(stddev=init_stddev),
+                bias_initializer=tf.keras.initializers.Zeros(),
+                trainable=trainable,
+                name='mask-conv-l%d' % conv_id
+            ))
+
+        kernel_size = (2, 2)
+        fan_out = 256
+
+        init_stddev = Mask_Head_Model._get_stddev_equivalent_to_msra_fill(kernel_size, fan_out)
+
+        self._conv_stage2 = tf.keras.layers.Conv2DTranspose(
+            fan_out,
+            kernel_size=kernel_size,
+            strides=(2, 2),
+            padding='valid',
+            activation=tf.nn.relu,
+            kernel_initializer=tf.random_normal_initializer(stddev=init_stddev),
+            bias_initializer=tf.keras.initializers.Zeros(),
+            trainable=trainable,
+            name='conv5-mask'
+        )
+
+        kernel_size = (1, 1)
+        fan_out = self._num_classes
+
+        init_stddev = Mask_Head_Model._get_stddev_equivalent_to_msra_fill(kernel_size, fan_out)
+
+        self._conv_stage3 = tf.keras.layers.Conv2D(
+            fan_out,
+            kernel_size=kernel_size,
+            strides=(1, 1),
+            padding='valid',
+            kernel_initializer=tf.random_normal_initializer(stddev=init_stddev),
+            bias_initializer=tf.keras.initializers.Zeros(),
+            trainable=trainable,
+            name='mask_fcn_logits'
+        )
+
+    def call(self, inputs, **kwargs):
+        """
+        Returns:
+        mask_outputs: a tensor with a shape of
+          [batch_size, num_masks, mask_height, mask_width],
+          representing the mask predictions.
+        fg_gather_indices: a tensor with a shape of [batch_size, num_masks, 2],
+          representing the fg mask targets.
+        Raises:
+        ValueError: If boxes is not a rank-3 tensor or the last dimension of
+          boxes is not 4.
+        """
+
+        batch_size, num_rois, height, width, filters = inputs.get_shape().as_list()
+
+        net = tf.reshape(inputs, [-1, height, width, filters])
+
+        for conv_id in range(4):
+            net = self._conv_stage1[conv_id](net)
+
+        net = self._conv_stage2(net)
+
+        mask_outputs = self._conv_stage3(net)
+
+        mask_outputs = tf.reshape(
+            mask_outputs,
+            [-1, num_rois, self._mrcnn_resolution, self._mrcnn_resolution, self._num_classes]
+        )
+
+        with tf.name_scope('masks_post_processing'):
+
+            mask_outputs = tf.transpose(a=mask_outputs, perm=[0, 1, 4, 2, 3])
+
+            indices_dtype = tf.float32 if self._is_gpu_inference else tf.int32
+
+            if batch_size == 1:
+                indices = tf.reshape(
+                    tf.reshape(
+                        tf.range(num_rois, dtype=indices_dtype),
+                        [batch_size, num_rois, 1]
+                    ) * self._num_classes + tf.expand_dims(self._class_indices, axis=-1),
+                    [batch_size, -1]
+                )
+
+                mask_outputs = tf.gather(
+                    tf.reshape(mask_outputs, [batch_size, -1, self._mrcnn_resolution, self._mrcnn_resolution]),
+                    indices,
+                    axis=1
+                )
+
+                mask_outputs = tf.squeeze(mask_outputs, axis=1)
+                mask_outputs = tf.reshape(
+                    mask_outputs,
+                    [batch_size, num_rois, self._mrcnn_resolution, self._mrcnn_resolution])
+
+            else:
+                batch_indices = (
+                        tf.expand_dims(tf.range(batch_size, dtype=indices_dtype), axis=1) *
+                        tf.ones([1, num_rois], dtype=indices_dtype)
+                )
+
+                mask_indices = (
+                        tf.expand_dims(tf.range(num_rois, dtype=indices_dtype), axis=0) *
+                        tf.ones([batch_size, 1], dtype=indices_dtype)
+                )
+
+                gather_indices = tf.stack([batch_indices, mask_indices, self._class_indices], axis=2)
+
+                if self._is_gpu_inference:
+                    gather_indices = tf.cast(gather_indices, dtype=tf.int32)
+
+                mask_outputs = tf.gather_nd(mask_outputs, gather_indices)
+
+        return mask_outputs

+ 102 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/models/keras_utils.py

@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import itertools
+
+import tensorflow as tf
+
+
+__all__ = ["KerasMockLayer"]
+
+
+class KerasMockLayer(tf.Module):
+    """
+    This class reproduces the Keras Layer important APIs without enforcing a variable scope.
+    """
+    def __init__(self, trainable=True, *args, **kwargs):
+        super(KerasMockLayer, self).__init__(*args, **kwargs)
+        self._local_layers = dict()
+        self._trainable = trainable
+
+    @property
+    def trainable(self):
+        return self._trainable
+
+    @trainable.setter
+    def trainable(self, value):
+        self._trainable = value
+        for layer in getattr(self, '_layers', []):
+            layer.trainable = value
+
+    @property
+    def variables(self):
+        """Returns the list of all layer variables/weights.
+        Alias of `self.weights`.
+        Returns:
+          A list of variables.
+        """
+        return self.weights
+
+    @property
+    def trainable_variables(self):
+        return self.trainable_weights
+
+    @property
+    def non_trainable_variables(self):
+        return self.non_trainable_weights
+
+    @property
+    def weights(self):
+        """Returns the list of all layer variables/weights.
+        Returns:
+          A list of variables.
+        """
+        return self.trainable_weights + self.non_trainable_weights
+
+    @property
+    def name(self):
+        return self._name
+
+    @property
+    def trainable_weights(self):
+        layers = list()
+
+        for layer in self._local_layers.values():
+            if not isinstance(layer, dict):
+                layers.append(layer)
+            else:
+                for sublayer in layer.values():
+                    layers.append(sublayer)
+
+        return list(itertools.chain.from_iterable([layer.trainable_variables for layer in layers]))
+
+    @property
+    def non_trainable_weights(self):
+        layers = list()
+
+        for layer in self._local_layers.values():
+            if not isinstance(layer, dict):
+                layers.append(layer)
+            else:
+                for sublayer in layer.values():
+                    layers.append(sublayer)
+
+        return list(itertools.chain.from_iterable([layer.non_trainable_weights for layer in layers]))

+ 582 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/models/resnet.py

@@ -0,0 +1,582 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Resnet."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.keras import backend
+
+from mask_rcnn.models.keras_utils import KerasMockLayer
+
+_BATCH_NORM_DECAY = 0.997
+_BATCH_NORM_EPSILON = 1e-4
+
+
+class BNReLULayer(KerasMockLayer):
+    def __init__(self, trainable, relu=True, init_zero=False, data_format='channels_last'):
+        """Performs a batch normalization followed by a ReLU.
+
+        Args:
+        inputs: `Tensor` of shape `[batch, channels, ...]`.
+        trainable: `bool` for whether to finetune the batchnorm layer.
+        relu: `bool` if False, omits the ReLU operation.
+        init_zero: `bool` if True, initializes scale parameter of batch
+            normalization with 0 instead of 1 (default).
+        data_format: `str` either "channels_first" for `[batch, channels, height,
+            width]` or "channels_last for `[batch, height, width, channels]`.
+        name: the name of the batch normalization layer
+
+        Returns:
+        A normalized `Tensor` with the same `data_format`.
+        """
+        super(BNReLULayer, self).__init__(trainable=trainable)
+
+        if init_zero:
+            gamma_initializer = tf.keras.initializers.Zeros()
+        else:
+            gamma_initializer = tf.keras.initializers.Ones()
+
+        if data_format == 'channels_first':
+            axis = 1
+        else:
+            axis = 3
+
+        self._local_layers = dict()
+        self._local_layers["batchnorm"] = tf.keras.layers.BatchNormalization(
+            axis=axis,
+            momentum=_BATCH_NORM_DECAY,
+            epsilon=_BATCH_NORM_EPSILON,
+            center=True,
+            scale=True,
+            trainable=self._trainable,
+            gamma_initializer=gamma_initializer,
+            fused=True,
+            name="batch_normalization"
+        )
+
+        if relu:
+            self._local_layers["relu"] = tf.keras.layers.ReLU()
+
+    def __call__(self, inputs, training=False, *args, **kwargs):
+
+        net = self._local_layers["batchnorm"](inputs, training=training and self._trainable)
+
+        try:
+            return self._local_layers["relu"](net)
+        except KeyError:
+            return net
+
+
+class FixedPaddingLayer(KerasMockLayer):
+    def __init__(self, kernel_size, data_format='channels_last', trainable=True):
+        """Pads the input along the spatial dimensions independently of input size.
+
+        Args:
+        kernel_size: `int` kernel size to be used for `conv2d` or max_pool2d`
+            operations. Should be a positive integer.
+        data_format: `str` either "channels_first" for `[batch, channels, height,
+            width]` or "channels_last for `[batch, height, width, channels]`.
+        """
+        super(FixedPaddingLayer, self).__init__(trainable=trainable)
+
+        pad_total = kernel_size - 1
+        pad_beg = pad_total // 2
+        pad_end = pad_total - pad_beg
+
+        if data_format == 'channels_first':
+            self._paddings = [[0, 0], [0, 0], [pad_beg, pad_end], [pad_beg, pad_end]]
+        else:
+            self._paddings = [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]
+
+    def __call__(self, inputs, *args, **kwargs):
+        """
+      Args:
+        inputs: `Tensor` of size `[batch, channels, height, width]` or
+            `[batch, height, width, channels]` depending on `data_format`.
+      Returns:
+        A padded `Tensor` of the same `data_format` with size either intact
+        (if `kernel_size == 1`) or padded (if `kernel_size > 1`).
+        :param **kwargs:
+      """
+
+        return tf.pad(tensor=inputs, paddings=self._paddings)
+
+
+class Conv2dFixedPadding(KerasMockLayer):
+    def __init__(self, filters, kernel_size, strides, data_format='channels_last', trainable=False):
+        """Strided 2-D convolution with explicit padding.
+
+        The padding is consistent and is based only on `kernel_size`, not on the
+        dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).
+
+        Args:
+        inputs: `Tensor` of size `[batch, channels, height_in, width_in]`.
+        filters: `int` number of filters in the convolution.
+        kernel_size: `int` size of the kernel to be used in the convolution.
+        strides: `int` strides of the convolution.
+        data_format: `str` either "channels_first" for `[batch, channels, height,
+            width]` or "channels_last for `[batch, height, width, channels]`.
+
+        Returns:
+            A `Tensor` of shape `[batch, filters, height_out, width_out]`.
+        """
+        super(Conv2dFixedPadding, self).__init__(trainable=trainable)
+
+        if strides > 1:
+            self._local_layers["fixed_padding"] = FixedPaddingLayer(kernel_size=kernel_size, data_format=data_format)
+
+        self._local_layers["conv2d"] = tf.keras.layers.Conv2D(
+            filters=filters,
+            kernel_size=kernel_size,
+            strides=strides,
+            padding=('SAME' if strides == 1 else 'VALID'),
+            use_bias=False,
+            kernel_initializer=tf.keras.initializers.VarianceScaling(),
+            data_format=data_format,
+            trainable=self._trainable,
+            name="conv2d"
+        )
+
+    def __call__(self, inputs, *args, **kwargs):
+
+        try:
+            net = self._local_layers["fixed_padding"](inputs)
+        except KeyError:
+            net = inputs
+
+        return self._local_layers["conv2d"](net)
+
+
+class ResidualBlock(KerasMockLayer):
+    def __init__(self, filters, trainable, finetune_bn, strides, use_projection=False, data_format='channels_last'):
+        """Standard building block for residual networks with BN after convolutions.
+
+        Args:
+        filters: `int` number of filters for the first two convolutions. Note that
+            the third and final convolution will use 4 times as many filters.
+        finetune_bn: `bool` for whether the model is in training.
+        strides: `int` block stride. If greater than 1, this block will ultimately downsample the input.
+        use_projection: `bool` for whether this block should use a projection
+            shortcut (versus the default identity shortcut). This is usually `True`
+            for the first block of a block group, which may change the number of
+            filters and the resolution.
+        data_format: `str` either "channels_first" for `[batch, channels, height, width]`
+            or "channels_last for `[batch, height, width, channels]`.
+        """
+        super(ResidualBlock, self).__init__(trainable=trainable)
+
+        self._finetune_bn = finetune_bn
+
+        if use_projection:
+            self._local_layers["projection"] = dict()
+
+            self._local_layers["projection"]["conv2d"] = Conv2dFixedPadding(
+                filters=filters,
+                kernel_size=1,
+                strides=strides,
+                data_format=data_format,
+                trainable=trainable
+            )
+
+            self._local_layers["projection"]["batchnorm"] = BNReLULayer(
+                trainable=finetune_bn and trainable,
+                relu=False,
+                init_zero=False,
+                data_format=data_format,
+            )
+
+        self._local_layers["conv2d_1"] = Conv2dFixedPadding(
+            trainable=trainable,
+            filters=filters,
+            kernel_size=3,
+            strides=strides,
+            data_format=data_format,
+        )
+
+        self._local_layers["conv2d_2"] = Conv2dFixedPadding(
+            trainable=trainable,
+            filters=filters,
+            kernel_size=3,
+            strides=1,
+            data_format=data_format,
+        )
+
+        self._local_layers["batchnorm_1"] = BNReLULayer(
+            trainable=finetune_bn and trainable,
+            relu=True,
+            init_zero=False,
+            data_format=data_format,
+        )
+
+        self._local_layers["batchnorm_2"] = BNReLULayer(
+            trainable=finetune_bn and trainable,
+            relu=False,
+            init_zero=True,
+            data_format=data_format,
+        )
+
+        self._local_layers["activation"] = tf.keras.layers.ReLU()
+
+    def __call__(self, inputs, training=False):
+        """
+        Args:
+        inputs: `Tensor` of size `[batch, channels, height, width]`.
+
+        Returns:
+        The output `Tensor` of the block.
+        """
+
+        try:
+            # Projection shortcut in first layer to match filters and strides
+            shortcut = self._local_layers["projection"]["conv2d"](inputs=inputs)
+
+            shortcut = self._local_layers["projection"]["batchnorm"](
+                inputs=shortcut,
+                training=training and self._trainable and self._finetune_bn
+            )
+
+        except KeyError:
+            shortcut = inputs
+
+        net = inputs
+
+        for i in range(1, 3):
+            net = self._local_layers["conv2d_%d" % i](inputs=net)
+
+            net = self._local_layers["batchnorm_%d" % i](
+                inputs=net,
+                training=training and self._trainable and self._finetune_bn
+            )
+
+        return self._local_layers["activation"](net + shortcut)
+
+
+class BottleneckBlock(KerasMockLayer):
+    def __init__(self, filters, trainable, finetune_bn, strides, use_projection=False, data_format='channels_last'):
+        """Bottleneck block variant for residual networks with BN after convolutions.
+
+        Args:
+        filters: `int` number of filters for the first two convolutions. Note that
+            the third and final convolution will use 4 times as many filters.
+        finetune_bn: `bool` for whether the model is in training.
+        strides: `int` block stride. If greater than 1, this block will ultimately downsample the input.
+        use_projection: `bool` for whether this block should use a projection
+            shortcut (versus the default identity shortcut). This is usually `True`
+            for the first block of a block group, which may change the number of
+            filters and the resolution.
+        data_format: `str` either "channels_first" for `[batch, channels, height, width]`
+            or "channels_last for `[batch, height, width, channels]`.
+        """
+        super(BottleneckBlock, self).__init__(trainable=trainable)
+
+        self._finetune_bn = finetune_bn
+
+        if use_projection:
+            # Projection shortcut only in first block within a group. Bottleneck blocks
+            # end with 4 times the number of filters.
+            filters_out = 4 * filters
+
+            self._local_layers["projection"] = dict()
+
+            self._local_layers["projection"]["conv2d"] = Conv2dFixedPadding(
+                filters=filters_out,
+                kernel_size=1,
+                strides=strides,
+                data_format=data_format,
+                trainable=trainable
+            )
+
+            self._local_layers["projection"]["batchnorm"] = BNReLULayer(
+                trainable=finetune_bn and trainable,
+                relu=False,
+                init_zero=False,
+                data_format=data_format,
+            )
+
+        self._local_layers["conv2d_1"] = Conv2dFixedPadding(
+            filters=filters,
+            kernel_size=1,
+            strides=1,
+            data_format=data_format,
+            trainable=trainable
+        )
+
+        self._local_layers["conv2d_2"] = Conv2dFixedPadding(
+            filters=filters,
+            kernel_size=3,
+            strides=strides,
+            data_format=data_format,
+            trainable=trainable
+        )
+
+        self._local_layers["conv2d_3"] = Conv2dFixedPadding(
+            filters=4 * filters,
+            kernel_size=1,
+            strides=1,
+            data_format=data_format,
+            trainable=trainable
+        )
+
+        self._local_layers["batchnorm_1"] = BNReLULayer(
+            trainable=finetune_bn and trainable,
+            relu=True,
+            init_zero=False,
+            data_format=data_format,
+        )
+
+        self._local_layers["batchnorm_2"] = BNReLULayer(
+            trainable=finetune_bn and trainable,
+            relu=True,
+            init_zero=False,
+            data_format=data_format,
+        )
+
+        self._local_layers["batchnorm_3"] = BNReLULayer(
+            trainable=finetune_bn and trainable,
+            relu=False,
+            init_zero=True,
+            data_format=data_format,
+        )
+
+        self._local_layers["activation"] = tf.keras.layers.ReLU()
+
+    def __call__(self, inputs, training=False):
+        """
+        Args:
+        inputs: `Tensor` of size `[batch, channels, height, width]`.
+
+        Returns:
+        The output `Tensor` of the block.
+        """
+
+        try:
+            # Projection shortcut in first layer to match filters and strides
+            shortcut = self._local_layers["projection"]["conv2d"](inputs=inputs)
+
+            shortcut = self._local_layers["projection"]["batchnorm"](
+                inputs=shortcut,
+                training=training and self._trainable and self._finetune_bn
+            )
+
+        except KeyError:
+            shortcut = inputs
+
+        net = inputs
+
+        for i in range(1, 4):
+            net = self._local_layers["conv2d_%d" % i](inputs=net)
+
+            net = self._local_layers["batchnorm_%d" % i](
+                inputs=net,
+                training=training and self._trainable and self._finetune_bn
+            )
+
+        return self._local_layers["activation"](net + shortcut)
+
+
+class BlockGroup(KerasMockLayer):
+    def __init__(self, filters, block_layer, n_blocks, strides, trainable, finetune_bn, data_format='channels_last'):
+        """Creates one group of blocks for the ResNet model.
+
+        Args:
+        inputs: `Tensor` of size `[batch, channels, height, width]`.
+        filters: `int` number of filters for the first convolution of the layer.
+        block_layer: `layer` for the block to use within the model
+        n_blocks: `int` number of blocks contained in the layer.
+        strides: `int` stride to use for the first convolution of the layer. If
+            greater than 1, this layer will downsample the input.
+        finetune_bn: `bool` for whether the model is training.
+        name: `str`name for the Tensor output of the block layer.
+        data_format: `str` either "channels_first" for `[batch, channels, height,
+            width]` or "channels_last for `[batch, height, width, channels]`.
+
+        Returns:
+        The output `Tensor` of the block layer.
+        """
+        super(BlockGroup, self).__init__(trainable=trainable)
+
+        self._finetune_bn = finetune_bn
+
+        self._n_blocks = n_blocks
+
+        for block_id in range(self._n_blocks):
+            # Only the first block per block_group uses projection shortcut and strides.
+            self._local_layers["block_%d" % (block_id + 1)] = block_layer(
+                filters=filters,
+                finetune_bn=finetune_bn,
+                trainable=trainable,
+                strides=strides if block_id == 0 else 1,
+                use_projection=block_id == 0,
+                data_format=data_format
+            )
+
+    def __call__(self, inputs, training=False):
+
+        net = inputs
+
+        for block_id in range(self._n_blocks):
+            net = self._local_layers["block_%d" % (block_id + 1)](net, training=training and self._trainable)
+
+        return net
+
+
+class Resnet_Model(KerasMockLayer, tf.keras.models.Model):
+    def __init__(self, resnet_model, data_format='channels_last', trainable=True, finetune_bn=False, *args, **kwargs):
+        """
+        Our actual ResNet network.  We return the output of c2, c3,c4,c5
+        N.B. batch norm is always run with trained parameters, as we use very small
+        batches when training the object layers.
+
+        Args:
+        resnet_model: model type. Authorized Values: (resnet18, resnet34, resnet50, resnet101, resnet152, resnet200)
+        data_format: `str` either "channels_first" for
+          `[batch, channels, height, width]` or "channels_last for `[batch, height, width, channels]`.
+        finetune_bn: `bool` for whether the model is training.
+
+        Returns the ResNet model for a given size and number of output classes.
+        """
+        model_params = {
+            'resnet18': {'block': ResidualBlock, 'layers': [2, 2, 2, 2]},
+            'resnet34': {'block': ResidualBlock, 'layers': [3, 4, 6, 3]},
+            'resnet50': {'block': BottleneckBlock, 'layers': [3, 4, 6, 3]},
+            'resnet101': {'block': BottleneckBlock, 'layers': [3, 4, 23, 3]},
+            'resnet152': {'block': BottleneckBlock, 'layers': [3, 8, 36, 3]},
+            'resnet200': {'block': BottleneckBlock, 'layers': [3, 24, 36, 3]}
+        }
+
+        if resnet_model not in model_params:
+            raise ValueError('Not a valid resnet_model: %s' % resnet_model)
+
+        super(Resnet_Model, self).__init__(trainable=trainable, name=resnet_model, *args, **kwargs)
+
+        self._finetune_bn = finetune_bn
+
+        self._data_format = data_format
+        self._block_layer = model_params[resnet_model]['block']
+        self._n_layers = model_params[resnet_model]['layers']
+
+        self._local_layers["conv2d"] = Conv2dFixedPadding(
+            filters=64,
+            kernel_size=7,
+            strides=2,
+            data_format=self._data_format,
+            # Freeze at conv2d and batchnorm first 11 layers based on reference model.
+            # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/config.py#L194
+            trainable=False
+        )
+
+        self._local_layers["batchnorm"] = BNReLULayer(
+            relu=True,
+            init_zero=False,
+            data_format=self._data_format,
+            # Freeze at conv2d and batchnorm first 11 layers based on reference model.
+            # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/config.py#L194
+            trainable=False
+        )
+
+        self._local_layers["maxpool2d"] = tf.keras.layers.MaxPool2D(
+            pool_size=3,
+            strides=2,
+            padding='SAME',
+            data_format=self._data_format
+        )
+
+        self._local_layers["block_1"] = BlockGroup(
+            filters=64,
+            strides=1,
+            n_blocks=self._n_layers[0],
+            block_layer=self._block_layer,
+            data_format=self._data_format,
+            # Freeze at conv2d and batchnorm first 11 layers based on reference model.
+            # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/config.py#L194
+            trainable=False,
+            finetune_bn=False
+        )
+
+        self._local_layers["block_2"] = BlockGroup(
+            filters=128,
+            strides=2,
+            n_blocks=self._n_layers[1],
+            block_layer=self._block_layer,
+            data_format=self._data_format,
+            # Freeze at conv2d and batchnorm first 11 layers based on reference model.
+            # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/config.py#L194
+            trainable=self._trainable,
+            finetune_bn=self._finetune_bn
+        )
+
+        self._local_layers["block_3"] = BlockGroup(
+            filters=256,
+            strides=2,
+            n_blocks=self._n_layers[2],
+            block_layer=self._block_layer,
+            data_format=self._data_format,
+            # Freeze at conv2d and batchnorm first 11 layers based on reference model.
+            # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/config.py#L194
+            trainable=self._trainable,
+            finetune_bn=self._finetune_bn
+        )
+
+        self._local_layers["block_4"] = BlockGroup(
+            filters=512,
+            strides=2,
+            n_blocks=self._n_layers[3],
+            block_layer=self._block_layer,
+            data_format=self._data_format,
+            # Freeze at conv2d and batchnorm first 11 layers based on reference model.
+            # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/config.py#L194
+            trainable=self._trainable,
+            finetune_bn=self._finetune_bn
+        )
+
+    def call(self, inputs, training=True, *args, **kwargs):
+        """Creation of the model graph."""
+        net = self._local_layers["conv2d"](inputs=inputs)
+
+        net = self._local_layers["batchnorm"](
+            inputs=net,
+            training=False
+        )
+
+        net = self._local_layers["maxpool2d"](net)
+
+        c2 = self._local_layers["block_1"](
+            inputs=net,
+            training=False,
+        )
+
+        c3 = self._local_layers["block_2"](
+            inputs=c2,
+            training=training,
+        )
+
+        c4 = self._local_layers["block_3"](
+            inputs=c3,
+            training=training,
+        )
+
+        c5 = self._local_layers["block_4"](
+            inputs=c4,
+            training=training,
+        )
+
+        return {2: c2, 3: c3, 4: c4, 5: c5}

+ 0 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/__init__.py


+ 202 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/argmax_matcher.py

@@ -0,0 +1,202 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Argmax matcher implementation.
+
+This class takes a similarity matrix and matches columns to rows based on the
+maximum value per column. One can specify matched_thresholds and
+to prevent columns from matching to rows (generally resulting in a negative
+training example) and unmatched_theshold to ignore the match (generally
+resulting in neither a positive or negative training example).
+
+This matcher is used in Fast(er)-RCNN.
+
+Note: matchers are used in TargetAssigners. There is a create_target_assigner
+factory function for popular implementations.
+"""
+import tensorflow as tf
+
+from mask_rcnn.object_detection import matcher
+from mask_rcnn.object_detection import shape_utils
+
+
+class ArgMaxMatcher(matcher.Matcher):
+  """Matcher based on highest value.
+
+  This class computes matches from a similarity matrix. Each column is matched
+  to a single row.
+
+  To support object detection target assignment this class enables setting both
+  matched_threshold (upper threshold) and unmatched_threshold (lower thresholds)
+  defining three categories of similarity which define whether examples are
+  positive, negative, or ignored:
+  (1) similarity >= matched_threshold: Highest similarity. Matched/Positive!
+  (2) matched_threshold > similarity >= unmatched_threshold: Medium similarity.
+          Depending on negatives_lower_than_unmatched, this is either
+          Unmatched/Negative OR Ignore.
+  (3) unmatched_threshold > similarity: Lowest similarity. Depending on flag
+          negatives_lower_than_unmatched, either Unmatched/Negative OR Ignore.
+  For ignored matches this class sets the values in the Match object to -2.
+  """
+
+  def __init__(self,
+               matched_threshold,
+               unmatched_threshold=None,
+               negatives_lower_than_unmatched=True,
+               force_match_for_each_row=False):
+    """Construct ArgMaxMatcher.
+
+    Args:
+      matched_threshold: Threshold for positive matches. Positive if
+        sim >= matched_threshold, where sim is the maximum value of the
+        similarity matrix for a given column. Set to None for no threshold.
+      unmatched_threshold: Threshold for negative matches. Negative if
+        sim < unmatched_threshold. Defaults to matched_threshold
+        when set to None.
+      negatives_lower_than_unmatched: Boolean which defaults to True. If True
+        then negative matches are the ones below the unmatched_threshold,
+        whereas ignored matches are in between the matched and umatched
+        threshold. If False, then negative matches are in between the matched
+        and unmatched threshold, and everything lower than unmatched is ignored.
+      force_match_for_each_row: If True, ensures that each row is matched to
+        at least one column (which is not guaranteed otherwise if the
+        matched_threshold is high). Defaults to False. See
+        argmax_matcher_test.testMatcherForceMatch() for an example.
+
+    Raises:
+      ValueError: if unmatched_threshold is set but matched_threshold is not set
+        or if unmatched_threshold > matched_threshold.
+    """
+    if (matched_threshold is None) and (unmatched_threshold is not None):
+      raise ValueError('Need to also define matched_threshold when'
+                       'unmatched_threshold is defined')
+    self._matched_threshold = matched_threshold
+    if unmatched_threshold is None:
+      self._unmatched_threshold = matched_threshold
+    else:
+      if unmatched_threshold > matched_threshold:
+        raise ValueError('unmatched_threshold needs to be smaller or equal'
+                         'to matched_threshold')
+      self._unmatched_threshold = unmatched_threshold
+    if not negatives_lower_than_unmatched:
+      if self._unmatched_threshold == self._matched_threshold:
+        raise ValueError('When negatives are in between matched and '
+                         'unmatched thresholds, these cannot be of equal '
+                         'value. matched: %s, unmatched: %s',
+                         self._matched_threshold, self._unmatched_threshold)
+    self._force_match_for_each_row = force_match_for_each_row
+    self._negatives_lower_than_unmatched = negatives_lower_than_unmatched
+
+  def _match(self, similarity_matrix):
+    """Tries to match each column of the similarity matrix to a row.
+
+    Args:
+      similarity_matrix: tensor of shape [N, M] representing any similarity
+        metric.
+
+    Returns:
+      Match object with corresponding matches for each of M columns.
+    """
+
+    def _match_when_rows_are_empty():
+      """Performs matching when the rows of similarity matrix are empty.
+
+      When the rows are empty, all detections are false positives. So we return
+      a tensor of -1's to indicate that the columns do not match to any rows.
+
+      Returns:
+        matches:  int32 tensor indicating the row each column matches to.
+      """
+      similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
+          similarity_matrix)
+      return -1 * tf.ones([similarity_matrix_shape[1]], dtype=tf.int32)
+
+    def _match_when_rows_are_non_empty():
+      """Performs matching when the rows of similarity matrix are non empty.
+
+      Returns:
+        matches:  int32 tensor indicating the row each column matches to.
+      """
+      # Matches for each column
+      matches = tf.argmax(input=similarity_matrix, axis=0, output_type=tf.int32)
+
+      # Deal with matched and unmatched threshold
+      if self._matched_threshold is not None:
+        # Get logical indices of ignored and unmatched columns as tf.int64
+        matched_vals = tf.reduce_max(input_tensor=similarity_matrix, axis=0)
+        below_unmatched_threshold = tf.greater(self._unmatched_threshold,
+                                               matched_vals)
+        between_thresholds = tf.logical_and(
+            tf.greater_equal(matched_vals, self._unmatched_threshold),
+            tf.greater(self._matched_threshold, matched_vals))
+
+        if self._negatives_lower_than_unmatched:
+          matches = self._set_values_using_indicator(matches,
+                                                     below_unmatched_threshold,
+                                                     -1)
+          matches = self._set_values_using_indicator(matches,
+                                                     between_thresholds,
+                                                     -2)
+        else:
+          matches = self._set_values_using_indicator(matches,
+                                                     below_unmatched_threshold,
+                                                     -2)
+          matches = self._set_values_using_indicator(matches,
+                                                     between_thresholds,
+                                                     -1)
+
+      if self._force_match_for_each_row:
+        similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
+            similarity_matrix)
+        force_match_column_ids = tf.argmax(input=similarity_matrix, axis=1,
+                                           output_type=tf.int32)
+        force_match_column_indicators = tf.one_hot(
+            force_match_column_ids, depth=similarity_matrix_shape[1])
+        force_match_row_ids = tf.argmax(input=force_match_column_indicators, axis=0,
+                                        output_type=tf.int32)
+        force_match_column_mask = tf.cast(
+            tf.reduce_max(input_tensor=force_match_column_indicators, axis=0), tf.bool)
+        final_matches = tf.where(force_match_column_mask,
+                                 force_match_row_ids, matches)
+        return final_matches
+      else:
+        return matches
+
+    if similarity_matrix.shape.is_fully_defined():
+      if similarity_matrix.shape[0].value == 0:
+        return _match_when_rows_are_empty()
+      else:
+        return _match_when_rows_are_non_empty()
+    else:
+      return tf.cond(
+          pred=tf.greater(tf.shape(input=similarity_matrix)[0], 0),
+          true_fn=_match_when_rows_are_non_empty, false_fn=_match_when_rows_are_empty)
+
+  def _set_values_using_indicator(self, x, indicator, val):
+    """Set the indicated fields of x to val.
+
+    Args:
+      x: tensor.
+      indicator: boolean with same shape as x.
+      val: scalar with value to set.
+
+    Returns:
+      modified tensor.
+    """
+    indicator = tf.cast(indicator, x.dtype)
+    return tf.add(tf.multiply(x, 1 - indicator), val * indicator)

+ 269 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/balanced_positive_negative_sampler.py

@@ -0,0 +1,269 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Class to subsample minibatches by balancing positives and negatives.
+
+Subsamples minibatches based on a pre-specified positive fraction in range
+[0,1]. The class presumes there are many more negatives than positive examples:
+if the desired batch_size cannot be achieved with the pre-specified positive
+fraction, it fills the rest with negative examples. If this is not sufficient
+for obtaining the desired batch_size, it returns fewer examples.
+
+The main function to call is Subsample(self, indicator, labels). For convenience
+one can also call SubsampleWeights(self, weights, labels) which is defined in
+the minibatch_sampler base class.
+
+When is_static is True, it implements a method that guarantees static shapes.
+It also ensures the length of output of the subsample is always batch_size, even
+when number of examples set to True in indicator is less than batch_size.
+
+This is originally implemented in TensorFlow Object Detection API.
+"""
+
+import tensorflow as tf
+
+from mask_rcnn.object_detection import minibatch_sampler
+from mask_rcnn.object_detection import ops
+
+
+class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
+  """Subsamples minibatches to a desired balance of positives and negatives."""
+
+  def __init__(self, positive_fraction=0.5, is_static=False):
+    """Constructs a minibatch sampler.
+
+    Args:
+      positive_fraction: desired fraction of positive examples (scalar in [0,1])
+        in the batch.
+      is_static: If True, uses an implementation with static shape guarantees.
+
+    Raises:
+      ValueError: if positive_fraction < 0, or positive_fraction > 1
+    """
+    if positive_fraction < 0 or positive_fraction > 1:
+      raise ValueError('positive_fraction should be in range [0,1]. '
+                       'Received: %s.' % positive_fraction)
+    self._positive_fraction = positive_fraction
+    self._is_static = is_static
+
+  def _get_num_pos_neg_samples(self, sorted_indices_tensor, sample_size):
+    """Counts the number of positives and negatives numbers to be sampled.
+
+    Args:
+      sorted_indices_tensor: A sorted int32 tensor of shape [N] which contains
+        the signed indices of the examples where the sign is based on the label
+        value. The examples that cannot be sampled are set to 0. It samples
+        atmost sample_size*positive_fraction positive examples and remaining
+        from negative examples.
+      sample_size: Size of subsamples.
+
+    Returns:
+      A tuple containing the number of positive and negative labels in the
+      subsample.
+    """
+    input_length = tf.shape(input=sorted_indices_tensor)[0]
+    valid_positive_index = tf.greater(sorted_indices_tensor,
+                                      tf.zeros(input_length, tf.int32))
+    num_sampled_pos = tf.reduce_sum(input_tensor=tf.cast(valid_positive_index, tf.int32))
+    max_num_positive_samples = tf.constant(
+        int(sample_size * self._positive_fraction), tf.int32)
+    num_positive_samples = tf.minimum(max_num_positive_samples, num_sampled_pos)
+    num_negative_samples = tf.constant(sample_size,
+                                       tf.int32) - num_positive_samples
+
+    return num_positive_samples, num_negative_samples
+
+  def _get_values_from_start_and_end(self, input_tensor, num_start_samples,
+                                     num_end_samples, total_num_samples):
+    """slices num_start_samples and last num_end_samples from input_tensor.
+
+    Args:
+      input_tensor: An int32 tensor of shape [N] to be sliced.
+      num_start_samples: Number of examples to be sliced from the beginning
+        of the input tensor.
+      num_end_samples: Number of examples to be sliced from the end of the
+        input tensor.
+      total_num_samples: Sum of is num_start_samples and num_end_samples. This
+        should be a scalar.
+
+    Returns:
+      A tensor containing the first num_start_samples and last num_end_samples
+      from input_tensor.
+
+    """
+    input_length = tf.shape(input=input_tensor)[0]
+    start_positions = tf.less(tf.range(input_length), num_start_samples)
+    end_positions = tf.greater_equal(
+        tf.range(input_length), input_length - num_end_samples)
+    selected_positions = tf.logical_or(start_positions, end_positions)
+    selected_positions = tf.cast(selected_positions, tf.float32)
+    indexed_positions = tf.multiply(tf.cumsum(selected_positions),
+                                    selected_positions)
+    one_hot_selector = tf.one_hot(tf.cast(indexed_positions, tf.int32) - 1,
+                                  total_num_samples,
+                                  dtype=tf.float32)
+    return tf.cast(tf.tensordot(tf.cast(input_tensor, tf.float32),
+                                one_hot_selector, axes=[0, 0]), tf.int32)
+
+  def _static_subsample(self, indicator, batch_size, labels):
+    """Returns subsampled minibatch.
+
+    Args:
+      indicator: boolean tensor of shape [N] whose True entries can be sampled.
+        N should be a complie time constant.
+      batch_size: desired batch size. This scalar cannot be None.
+      labels: boolean tensor of shape [N] denoting positive(=True) and negative
+        (=False) examples. N should be a complie time constant.
+
+    Returns:
+      sampled_idx_indicator: boolean tensor of shape [N], True for entries which
+        are sampled. It ensures the length of output of the subsample is always
+        batch_size, even when number of examples set to True in indicator is
+        less than batch_size.
+
+    Raises:
+      ValueError: if labels and indicator are not 1D boolean tensors.
+    """
+    # Check if indicator and labels have a static size.
+    if not indicator.shape.is_fully_defined():
+      raise ValueError('indicator must be static in shape when is_static is'
+                       'True')
+    if not labels.shape.is_fully_defined():
+      raise ValueError('labels must be static in shape when is_static is'
+                       'True')
+    if not isinstance(batch_size, int):
+      raise ValueError('batch_size has to be an integer when is_static is'
+                       'True.')
+
+    input_length = tf.shape(input=indicator)[0]
+
+    # Set the number of examples set True in indicator to be at least
+    # batch_size.
+    num_true_sampled = tf.reduce_sum(input_tensor=tf.cast(indicator, tf.float32))
+    additional_false_sample = tf.less_equal(
+        tf.cumsum(tf.cast(tf.logical_not(indicator), tf.float32)),
+        batch_size - num_true_sampled)
+    indicator = tf.logical_or(indicator, additional_false_sample)
+
+    # Shuffle indicator and label. Need to store the permutation to restore the
+    # order post sampling.
+    permutation = tf.random.shuffle(tf.range(input_length))
+    indicator = ops.matmul_gather_on_zeroth_axis(
+        tf.cast(indicator, tf.float32), permutation)
+    labels = ops.matmul_gather_on_zeroth_axis(
+        tf.cast(labels, tf.float32), permutation)
+
+    # index (starting from 1) when indicator is True, 0 when False
+    indicator_idx = tf.where(
+        tf.cast(indicator, tf.bool), tf.range(1, input_length + 1),
+        tf.zeros(input_length, tf.int32))
+
+    # Replace -1 for negative, +1 for positive labels
+    signed_label = tf.where(
+        tf.cast(labels, tf.bool), tf.ones(input_length, tf.int32),
+        tf.scalar_mul(-1, tf.ones(input_length, tf.int32)))
+    # negative of index for negative label, positive index for positive label,
+    # 0 when indicator is False.
+    signed_indicator_idx = tf.multiply(indicator_idx, signed_label)
+    sorted_signed_indicator_idx = tf.nn.top_k(
+        signed_indicator_idx, input_length, sorted=True).values
+
+    [num_positive_samples,
+     num_negative_samples] = self._get_num_pos_neg_samples(
+         sorted_signed_indicator_idx, batch_size)
+
+    sampled_idx = self._get_values_from_start_and_end(
+        sorted_signed_indicator_idx, num_positive_samples,
+        num_negative_samples, batch_size)
+
+    # Shift the indices to start from 0 and remove any samples that are set as
+    # False.
+    sampled_idx = tf.abs(sampled_idx) - tf.ones(batch_size, tf.int32)
+    sampled_idx = tf.multiply(
+        tf.cast(tf.greater_equal(sampled_idx, tf.constant(0)), tf.int32),
+        sampled_idx)
+
+    sampled_idx_indicator = tf.cast(tf.reduce_sum(
+        input_tensor=tf.one_hot(sampled_idx, depth=input_length),
+        axis=0), tf.bool)
+
+    # project back the order based on stored permutations
+    reprojections = tf.one_hot(permutation, depth=input_length,
+                               dtype=tf.float32)
+    return tf.cast(tf.tensordot(
+        tf.cast(sampled_idx_indicator, tf.float32),
+        reprojections, axes=[0, 0]), tf.bool)
+
+  def subsample(self, indicator, batch_size, labels, scope=None):
+    """Returns subsampled minibatch.
+
+    Args:
+      indicator: boolean tensor of shape [N] whose True entries can be sampled.
+      batch_size: desired batch size. If None, keeps all positive samples and
+        randomly selects negative samples so that the positive sample fraction
+        matches self._positive_fraction. It cannot be None is is_static is True.
+      labels: boolean tensor of shape [N] denoting positive(=True) and negative
+          (=False) examples.
+      scope: name scope.
+
+    Returns:
+      sampled_idx_indicator: boolean tensor of shape [N], True for entries which
+        are sampled.
+
+    Raises:
+      ValueError: if labels and indicator are not 1D boolean tensors.
+    """
+    if len(indicator.get_shape().as_list()) != 1:
+      raise ValueError('indicator must be 1 dimensional, got a tensor of '
+                       'shape %s' % indicator.get_shape())
+    if len(labels.get_shape().as_list()) != 1:
+      raise ValueError('labels must be 1 dimensional, got a tensor of '
+                       'shape %s' % labels.get_shape())
+    if labels.dtype != tf.bool:
+      raise ValueError('labels should be of type bool. Received: %s' %
+                       labels.dtype)
+    if indicator.dtype != tf.bool:
+      raise ValueError('indicator should be of type bool. Received: %s' %
+                       indicator.dtype)
+  
+    if self._is_static:
+      return self._static_subsample(indicator, batch_size, labels)
+
+    else:
+      # Only sample from indicated samples
+      negative_idx = tf.logical_not(labels)
+      positive_idx = tf.logical_and(labels, indicator)
+      negative_idx = tf.logical_and(negative_idx, indicator)
+
+      # Sample positive and negative samples separately
+      if batch_size is None:
+        max_num_pos = tf.reduce_sum(input_tensor=tf.cast(positive_idx, dtype=tf.int32))
+      else:
+        max_num_pos = int(self._positive_fraction * batch_size)
+      sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos)
+      num_sampled_pos = tf.reduce_sum(input_tensor=tf.cast(sampled_pos_idx, tf.int32))
+      if batch_size is None:
+        negative_positive_ratio = (
+            1 - self._positive_fraction) / self._positive_fraction
+        max_num_neg = tf.cast(
+            negative_positive_ratio * tf.cast(num_sampled_pos, dtype=tf.float32), dtype=tf.int32)
+      else:
+        max_num_neg = batch_size - num_sampled_pos
+      sampled_neg_idx = self.subsample_indicator(negative_idx, max_num_neg)
+
+      return tf.logical_or(sampled_pos_idx, sampled_neg_idx)

+ 157 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/box_coder.py

@@ -0,0 +1,157 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Base box coder.
+
+Box coders convert between coordinate frames, namely image-centric
+(with (0,0) on the top left of image) and anchor-centric (with (0,0) being
+defined by a specific anchor).
+
+Users of a BoxCoder can call two methods:
+ encode: which encodes a box with respect to a given anchor
+  (or rather, a tensor of boxes wrt a corresponding tensor of anchors) and
+ decode: which inverts this encoding with a decode operation.
+In both cases, the arguments are assumed to be in 1-1 correspondence already;
+it is not the job of a BoxCoder to perform matching.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+from abc import abstractproperty
+
+import tensorflow as tf
+
+
+# Box coder types.
+FASTER_RCNN = 'faster_rcnn'
+KEYPOINT = 'keypoint'
+MEAN_STDDEV = 'mean_stddev'
+SQUARE = 'square'
+
+
+class BoxCoder(object):
+  """Abstract base class for box coder."""
+  __metaclass__ = ABCMeta
+
+  @abstractproperty
+  def code_size(self):
+    """Return the size of each code.
+
+    This number is a constant and should agree with the output of the `encode`
+    op (e.g. if rel_codes is the output of self.encode(...), then it should have
+    shape [N, code_size()]).  This abstractproperty should be overridden by
+    implementations.
+
+    Returns:
+      an integer constant
+    """
+    pass
+
+  def encode(self, boxes, anchors):
+    """Encode a box list relative to an anchor collection.
+
+    Args:
+      boxes: BoxList holding N boxes to be encoded
+      anchors: BoxList of N anchors
+
+    Returns:
+      a tensor representing N relative-encoded boxes
+    """
+
+    return self._encode(boxes, anchors)
+
+  def decode(self, rel_codes, anchors):
+    """Decode boxes that are encoded relative to an anchor collection.
+
+    Args:
+      rel_codes: a tensor representing N relative-encoded boxes
+      anchors: BoxList of anchors
+
+    Returns:
+      boxlist: BoxList holding N boxes encoded in the ordinary way (i.e.,
+        with corners y_min, x_min, y_max, x_max)
+    """
+
+    return self._decode(rel_codes, anchors)
+
+  @abstractmethod
+  def _encode(self, boxes, anchors):
+    """Method to be overriden by implementations.
+
+    Args:
+      boxes: BoxList holding N boxes to be encoded
+      anchors: BoxList of N anchors
+
+    Returns:
+      a tensor representing N relative-encoded boxes
+    """
+    pass
+
+  @abstractmethod
+  def _decode(self, rel_codes, anchors):
+    """Method to be overriden by implementations.
+
+    Args:
+      rel_codes: a tensor representing N relative-encoded boxes
+      anchors: BoxList of anchors
+
+    Returns:
+      boxlist: BoxList holding N boxes encoded in the ordinary way (i.e.,
+        with corners y_min, x_min, y_max, x_max)
+    """
+    pass
+
+
+def batch_decode(encoded_boxes, box_coder, anchors):
+  """Decode a batch of encoded boxes.
+
+  This op takes a batch of encoded bounding boxes and transforms
+  them to a batch of bounding boxes specified by their corners in
+  the order of [y_min, x_min, y_max, x_max].
+
+  Args:
+    encoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
+      code_size] representing the location of the objects.
+    box_coder: a BoxCoder object.
+    anchors: a BoxList of anchors used to encode `encoded_boxes`.
+
+  Returns:
+    decoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
+      coder_size] representing the corners of the objects in the order
+      of [y_min, x_min, y_max, x_max].
+
+  Raises:
+    ValueError: if batch sizes of the inputs are inconsistent, or if
+    the number of anchors inferred from encoded_boxes and anchors are
+    inconsistent.
+  """
+
+  if encoded_boxes.get_shape()[1].value != anchors.num_boxes_static():
+    raise ValueError('The number of anchors inferred from encoded_boxes'
+                     ' and anchors are inconsistent: shape[1] of encoded_boxes'
+                     ' %s should be equal to the number of anchors: %s.' %
+                     (
+                        encoded_boxes.get_shape()[1].value,
+                        anchors.num_boxes_static()
+                    )
+    )
+
+  decoded_boxes = tf.stack([
+      box_coder.decode(boxes, anchors).get()
+      for boxes in tf.unstack(encoded_boxes)
+  ])
+  return decoded_boxes

+ 213 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/box_list.py

@@ -0,0 +1,213 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Bounding Box List definition.
+
+BoxList represents a list of bounding boxes as tensorflow
+tensors, where each bounding box is represented as a row of 4 numbers,
+[y_min, x_min, y_max, x_max].  It is assumed that all bounding boxes
+within a given list correspond to a single image.  See also
+box_list_ops.py for common box related operations (such as area, iou, etc).
+
+Optionally, users can add additional related fields (such as weights).
+We assume the following things to be true about fields:
+* they correspond to boxes in the box_list along the 0th dimension
+* they have inferrable rank at graph construction time
+* all dimensions except for possibly the 0th can be inferred
+  (i.e., not None) at graph construction time.
+
+Some other notes:
+  * Following tensorflow conventions, we use height, width ordering,
+  and correspondingly, y,x (or ymin, xmin, ymax, xmax) ordering
+  * Tensors are always provided as (flat) [N, 4] tensors.
+"""
+
+import tensorflow as tf
+
+
+class BoxList(object):
+  """Box collection."""
+
+  def __init__(self, boxes):
+    """Constructs box collection.
+
+    Args:
+      boxes: a tensor of shape [N, 4] representing box corners
+
+    Raises:
+      ValueError: if invalid dimensions for bbox data or if bbox data is not in
+          float32 format.
+    """
+    if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4:
+      raise ValueError('Invalid dimensions for box data.')
+    if boxes.dtype != tf.float32:
+      raise ValueError('Invalid tensor type: should be tf.float32')
+    self.data = {'boxes': boxes}
+
+  def num_boxes(self):
+    """Returns number of boxes held in collection.
+
+    Returns:
+      a tensor representing the number of boxes held in the collection.
+    """
+    return tf.shape(input=self.data['boxes'])[0]
+
+  def num_boxes_static(self):
+    """Returns number of boxes held in collection.
+
+    This number is inferred at graph construction time rather than run-time.
+
+    Returns:
+      Number of boxes held in collection (integer) or None if this is not
+        inferrable at graph construction time.
+    """
+    try:
+      return self.data['boxes'].get_shape()[0].value
+    except AttributeError:
+      return self.data['boxes'].get_shape()[0]
+
+  def get_all_fields(self):
+    """Returns all fields."""
+    return self.data.keys()
+
+  def get_extra_fields(self):
+    """Returns all non-box fields (i.e., everything not named 'boxes')."""
+    return [k for k in self.data.keys() if k != 'boxes']
+
+  def add_field(self, field, field_data):
+    """Add field to box list.
+
+    This method can be used to add related box data such as
+    weights/labels, etc.
+
+    Args:
+      field: a string key to access the data via `get`
+      field_data: a tensor containing the data to store in the BoxList
+    """
+    self.data[field] = field_data
+
+  def has_field(self, field):
+    return field in self.data
+
+  def get(self):
+    """Convenience function for accessing box coordinates.
+
+    Returns:
+      a tensor with shape [N, 4] representing box coordinates.
+    """
+    return self.get_field('boxes')
+
+  def set(self, boxes):
+    """Convenience function for setting box coordinates.
+
+    Args:
+      boxes: a tensor of shape [N, 4] representing box corners
+
+    Raises:
+      ValueError: if invalid dimensions for bbox data
+    """
+    if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4:
+      raise ValueError('Invalid dimensions for box data.')
+    self.data['boxes'] = boxes
+
+  def get_field(self, field):
+    """Accesses a box collection and associated fields.
+
+    This function returns specified field with object; if no field is specified,
+    it returns the box coordinates.
+
+    Args:
+      field: this optional string parameter can be used to specify
+        a related field to be accessed.
+
+    Returns:
+      a tensor representing the box collection or an associated field.
+
+    Raises:
+      ValueError: if invalid field
+    """
+    if not self.has_field(field):
+      raise ValueError('field ' + str(field) + ' does not exist')
+    return self.data[field]
+
+  def set_field(self, field, value):
+    """Sets the value of a field.
+
+    Updates the field of a box_list with a given value.
+
+    Args:
+      field: (string) name of the field to set value.
+      value: the value to assign to the field.
+
+    Raises:
+      ValueError: if the box_list does not have specified field.
+    """
+    if not self.has_field(field):
+      raise ValueError('field %s does not exist' % field)
+    self.data[field] = value
+
+  def get_center_coordinates_and_sizes(self, scope=None):
+    """Computes the center coordinates, height and width of the boxes.
+
+    Args:
+      scope: name scope of the function.
+
+    Returns:
+      a list of 4 1-D tensors [ycenter, xcenter, height, width].
+    """
+  
+    box_corners = self.get()
+    ymin, xmin, ymax, xmax = tf.unstack(tf.transpose(a=box_corners))
+    width = xmax - xmin
+    height = ymax - ymin
+    ycenter = ymin + height / 2.
+    xcenter = xmin + width / 2.
+    return [ycenter, xcenter, height, width]
+
+  def transpose_coordinates(self, scope=None):
+    """Transpose the coordinate representation in a boxlist.
+
+    Args:
+      scope: name scope of the function.
+    """
+  
+    y_min, x_min, y_max, x_max = tf.split(
+        value=self.get(), num_or_size_splits=4, axis=1)
+    self.set(tf.concat([x_min, y_min, x_max, y_max], 1))
+
+  def as_tensor_dict(self, fields=None):
+    """Retrieves specified fields as a dictionary of tensors.
+
+    Args:
+      fields: (optional) list of fields to return in the dictionary.
+        If None (default), all fields are returned.
+
+    Returns:
+      tensor_dict: A dictionary of tensors specified by fields.
+
+    Raises:
+      ValueError: if specified field is not contained in boxlist.
+    """
+    tensor_dict = {}
+    if fields is None:
+      fields = self.get_all_fields()
+    for field in fields:
+      if not self.has_field(field):
+        raise ValueError('boxlist must contain all specified fields')
+      tensor_dict[field] = self.get_field(field)
+    return tensor_dict

+ 125 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/faster_rcnn_box_coder.py

@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Faster RCNN box coder.
+
+Faster RCNN box coder follows the coding schema described below:
+  ty = (y - ya) / ha
+  tx = (x - xa) / wa
+  th = log(h / ha)
+  tw = log(w / wa)
+  where x, y, w, h denote the box's center coordinates, width and height
+  respectively. Similarly, xa, ya, wa, ha denote the anchor's center
+  coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
+  center, width and height respectively.
+
+  See http://arxiv.org/abs/1506.01497 for details.
+"""
+
+import tensorflow as tf
+
+from mask_rcnn.object_detection import box_coder
+from mask_rcnn.object_detection import box_list
+
+EPSILON = 1e-8
+
+
+class FasterRcnnBoxCoder(box_coder.BoxCoder):
+  """Faster RCNN box coder."""
+
+  def __init__(self, scale_factors=None):
+    """Constructor for FasterRcnnBoxCoder.
+
+    Args:
+      scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
+        If set to None, does not perform scaling. For Faster RCNN,
+        the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0].
+    """
+
+    if scale_factors is not None:
+
+      assert len(scale_factors) == 4
+      assert all([scalar > 0 for scalar in scale_factors])
+
+    self._scale_factors = scale_factors
+
+  @property
+  def code_size(self):
+    return 4
+
+  def _encode(self, boxes, anchors):
+    """Encode a box collection with respect to anchor collection.
+
+    Args:
+      boxes: BoxList holding N boxes to be encoded.
+      anchors: BoxList of anchors.
+
+    Returns:
+      a tensor representing N anchor-encoded boxes of the format
+      [ty, tx, th, tw].
+    """
+    # Convert anchors to the center coordinate representation.
+    ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+    ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
+    # Avoid NaN in division and log below.
+    ha += EPSILON
+    wa += EPSILON
+    h += EPSILON
+    w += EPSILON
+
+    tx = (xcenter - xcenter_a) / wa
+    ty = (ycenter - ycenter_a) / ha
+    tw = tf.math.log(w / wa)
+    th = tf.math.log(h / ha)
+
+    # Scales location targets as used in paper for joint training.
+    if self._scale_factors:
+      ty *= self._scale_factors[0]
+      tx *= self._scale_factors[1]
+      th *= self._scale_factors[2]
+      tw *= self._scale_factors[3]
+
+    return tf.transpose(a=tf.stack([ty, tx, th, tw]))
+
+  def _decode(self, rel_codes, anchors):
+    """Decode relative codes to boxes.
+
+    Args:
+      rel_codes: a tensor representing N anchor-encoded boxes.
+      anchors: BoxList of anchors.
+
+    Returns:
+      boxes: BoxList holding N bounding boxes.
+    """
+    ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+
+    ty, tx, th, tw = tf.unstack(tf.transpose(a=rel_codes))
+    if self._scale_factors:
+      ty /= self._scale_factors[0]
+      tx /= self._scale_factors[1]
+      th /= self._scale_factors[2]
+      tw /= self._scale_factors[3]
+    w = tf.exp(tw) * wa
+    h = tf.exp(th) * ha
+    ycenter = ty * ha + ycenter_a
+    xcenter = tx * wa + xcenter_a
+    ymin = ycenter - h / 2.
+    xmin = xcenter - w / 2.
+    ymax = ycenter + h / 2.
+    xmax = xcenter + w / 2.
+    return box_list.BoxList(tf.transpose(a=tf.stack([ymin, xmin, ymax, xmax])))

+ 244 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/matcher.py

@@ -0,0 +1,244 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Matcher interface and Match class.
+
+This module defines the Matcher interface and the Match object. The job of the
+matcher is to match row and column indices based on the similarity matrix and
+other optional parameters. Each column is matched to at most one row. There
+are three possibilities for the matching:
+
+1) match: A column matches a row.
+2) no_match: A column does not match any row.
+3) ignore: A column that is neither 'match' nor no_match.
+
+The ignore case is regularly encountered in object detection: when an anchor has
+a relatively small overlap with a ground-truth box, one neither wants to
+consider this box a positive example (match) nor a negative example (no match).
+
+The Match class is used to store the match results and it provides simple apis
+to query the results.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+
+import tensorflow as tf
+
+
+class Match(object):
+  """Class to store results from the matcher.
+
+  This class is used to store the results from the matcher. It provides
+  convenient methods to query the matching results.
+  """
+
+  def __init__(self, match_results):
+    """Constructs a Match object.
+
+    Args:
+      match_results: Integer tensor of shape [N] with (1) match_results[i]>=0,
+        meaning that column i is matched with row match_results[i].
+        (2) match_results[i]=-1, meaning that column i is not matched.
+        (3) match_results[i]=-2, meaning that column i is ignored.
+
+    Raises:
+      ValueError: if match_results does not have rank 1 or is not an
+        integer int32 scalar tensor
+    """
+    if match_results.shape.ndims != 1:
+      raise ValueError('match_results should have rank 1')
+    if match_results.dtype != tf.int32:
+      raise ValueError('match_results should be an int32 or int64 scalar '
+                       'tensor')
+    self._match_results = match_results
+
+  @property
+  def match_results(self):
+    """The accessor for match results.
+
+    Returns:
+      the tensor which encodes the match results.
+    """
+    return self._match_results
+
+  def matched_column_indices(self):
+    """Returns column indices that match to some row.
+
+    The indices returned by this op are always sorted in increasing order.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return self._reshape_and_cast(tf.where(tf.greater(self._match_results, -1)))
+
+  def matched_column_indicator(self):
+    """Returns column indices that are matched.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return tf.greater_equal(self._match_results, 0)
+
+  def num_matched_columns(self):
+    """Returns number (int32 scalar tensor) of matched columns."""
+    return tf.size(input=self.matched_column_indices())
+
+  def unmatched_column_indices(self):
+    """Returns column indices that do not match any row.
+
+    The indices returned by this op are always sorted in increasing order.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return self._reshape_and_cast(tf.where(tf.equal(self._match_results, -1)))
+
+  def unmatched_column_indicator(self):
+    """Returns column indices that are unmatched.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return tf.equal(self._match_results, -1)
+
+  def num_unmatched_columns(self):
+    """Returns number (int32 scalar tensor) of unmatched columns."""
+    return tf.size(input=self.unmatched_column_indices())
+
+  def ignored_column_indices(self):
+    """Returns column indices that are ignored (neither Matched nor Unmatched).
+
+    The indices returned by this op are always sorted in increasing order.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return self._reshape_and_cast(tf.where(self.ignored_column_indicator()))
+
+  def ignored_column_indicator(self):
+    """Returns boolean column indicator where True means the colum is ignored.
+
+    Returns:
+      column_indicator: boolean vector which is True for all ignored column
+      indices.
+    """
+    return tf.equal(self._match_results, -2)
+
+  def num_ignored_columns(self):
+    """Returns number (int32 scalar tensor) of matched columns."""
+    return tf.size(input=self.ignored_column_indices())
+
+  def unmatched_or_ignored_column_indices(self):
+    """Returns column indices that are unmatched or ignored.
+
+    The indices returned by this op are always sorted in increasing order.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return self._reshape_and_cast(tf.where(tf.greater(0, self._match_results)))
+
+  def matched_row_indices(self):
+    """Returns row indices that match some column.
+
+    The indices returned by this op are ordered so as to be in correspondence
+    with the output of matched_column_indicator().  For example if
+    self.matched_column_indicator() is [0,2], and self.matched_row_indices() is
+    [7, 3], then we know that column 0 was matched to row 7 and column 2 was
+    matched to row 3.
+
+    Returns:
+      row_indices: int32 tensor of shape [K] with row indices.
+    """
+    return self._reshape_and_cast(
+        tf.gather(self._match_results, self.matched_column_indices()))
+
+  def _reshape_and_cast(self, t):
+    return tf.cast(tf.reshape(t, [-1]), tf.int32)
+
+  def gather_based_on_match(self, input_tensor, unmatched_value,
+                            ignored_value):
+    """Gathers elements from `input_tensor` based on match results.
+
+    For columns that are matched to a row, gathered_tensor[col] is set to
+    input_tensor[match_results[col]]. For columns that are unmatched,
+    gathered_tensor[col] is set to unmatched_value. Finally, for columns that
+    are ignored gathered_tensor[col] is set to ignored_value.
+
+    Note that the input_tensor.shape[1:] must match with unmatched_value.shape
+    and ignored_value.shape
+
+    Args:
+      input_tensor: Tensor to gather values from.
+      unmatched_value: Constant tensor value for unmatched columns.
+      ignored_value: Constant tensor value for ignored columns.
+
+    Returns:
+      gathered_tensor: A tensor containing values gathered from input_tensor.
+        The shape of the gathered tensor is [match_results.shape[0]] +
+        input_tensor.shape[1:].
+    """
+    input_tensor = tf.concat([tf.stack([ignored_value, unmatched_value]),
+                              input_tensor], axis=0)
+    gather_indices = tf.maximum(self.match_results + 2, 0)
+    gathered_tensor = tf.gather(input_tensor, gather_indices)
+    return gathered_tensor
+
+
+class Matcher(object):
+  """Abstract base class for matcher.
+  """
+  __metaclass__ = ABCMeta
+
+  def match(self, similarity_matrix, scope=None, **params):
+    """Computes matches among row and column indices and returns the result.
+
+    Computes matches among the row and column indices based on the similarity
+    matrix and optional arguments.
+
+    Args:
+      similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
+        where higher value means more similar.
+      scope: Op scope name. Defaults to 'Match' if None.
+      **params: Additional keyword arguments for specific implementations of
+        the Matcher.
+
+    Returns:
+      A Match object with the results of matching.
+    """
+    
+    return Match(self._match(similarity_matrix, **params))
+
+  @abstractmethod
+  def _match(self, similarity_matrix, **params):
+    """Method to be overridden by implementations.
+
+    Args:
+      similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
+        where higher value means more similar.
+      **params: Additional keyword arguments for specific implementations of
+        the Matcher.
+
+    Returns:
+      match_results: Integer tensor of shape [M]: match_results[i]>=0 means
+        that column i is matched to row match_results[i], match_results[i]=-1
+        means that the column is not matched. match_results[i]=-2 means that
+        the column is ignored (usually this happens when there is a very weak
+        match which one neither wants as positive nor negative example).
+    """
+    pass

+ 95 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/minibatch_sampler.py

@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Base minibatch sampler module.
+
+The job of the minibatch_sampler is to subsample a minibatch based on some
+criterion.
+
+The main function call is:
+    subsample(indicator, batch_size, **params).
+Indicator is a 1d boolean tensor where True denotes which examples can be
+sampled. It returns a boolean indicator where True denotes an example has been
+sampled..
+
+Subclasses should implement the Subsample function and can make use of the
+@staticmethod SubsampleIndicator.
+
+This is originally implemented in TensorFlow Object Detection API.
+"""
+
+from abc import ABCMeta
+from abc import abstractmethod
+
+import tensorflow as tf
+
+from mask_rcnn.object_detection import ops
+
+
+class MinibatchSampler(object):
+  """Abstract base class for subsampling minibatches."""
+  __metaclass__ = ABCMeta
+
+  def __init__(self):
+    """Constructs a minibatch sampler."""
+    pass
+
+  @abstractmethod
+  def subsample(self, indicator, batch_size, **params):
+    """Returns subsample of entries in indicator.
+
+    Args:
+      indicator: boolean tensor of shape [N] whose True entries can be sampled.
+      batch_size: desired batch size.
+      **params: additional keyword arguments for specific implementations of
+          the MinibatchSampler.
+
+    Returns:
+      sample_indicator: boolean tensor of shape [N] whose True entries have been
+      sampled. If sum(indicator) >= batch_size, sum(is_sampled) = batch_size
+    """
+    pass
+
+  @staticmethod
+  def subsample_indicator(indicator, num_samples):
+    """Subsample indicator vector.
+
+    Given a boolean indicator vector with M elements set to `True`, the function
+    assigns all but `num_samples` of these previously `True` elements to
+    `False`. If `num_samples` is greater than M, the original indicator vector
+    is returned.
+
+    Args:
+      indicator: a 1-dimensional boolean tensor indicating which elements
+        are allowed to be sampled and which are not.
+      num_samples: int32 scalar tensor
+
+    Returns:
+      a boolean tensor with the same shape as input (indicator) tensor
+    """
+    indices = tf.where(indicator)
+    indices = tf.random.shuffle(indices)
+    indices = tf.reshape(indices, [-1])
+
+    num_samples = tf.minimum(tf.size(input=indices), num_samples)
+    selected_indices = tf.slice(indices, [0], tf.reshape(num_samples, [1]))
+
+    selected_indicator = ops.indices_to_dense_vector(selected_indices,
+                                                     tf.shape(input=indicator)[0])
+
+    return tf.equal(selected_indicator, 1)

+ 84 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/ops.py

@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""A module for helper tensorflow ops.
+
+This is originally implemented in TensorFlow Object Detection API.
+"""
+
+import tensorflow as tf
+
+from mask_rcnn.object_detection import shape_utils
+
+
+def indices_to_dense_vector(indices,
+                            size,
+                            indices_value=1.,
+                            default_value=0,
+                            dtype=tf.float32):
+  """Creates dense vector with indices set to specific value and rest to zeros.
+
+  This function exists because it is unclear if it is safe to use
+    tf.sparse_to_dense(indices, [size], 1, validate_indices=False)
+  with indices which are not ordered.
+  This function accepts a dynamic size (e.g. tf.shape(tensor)[0])
+
+  Args:
+    indices: 1d Tensor with integer indices which are to be set to
+        indices_values.
+    size: scalar with size (integer) of output Tensor.
+    indices_value: values of elements specified by indices in the output vector
+    default_value: values of other elements in the output vector.
+    dtype: data type.
+
+  Returns:
+    dense 1D Tensor of shape [size] with indices set to indices_values and the
+        rest set to default_value.
+  """
+  size = tf.cast(size, dtype=tf.int32)
+  zeros = tf.ones([size], dtype=dtype) * default_value
+  values = tf.ones_like(indices, dtype=dtype) * indices_value
+
+  return tf.dynamic_stitch([tf.range(size), tf.cast(indices, dtype=tf.int32)],
+                           [zeros, values])
+
+
+def matmul_gather_on_zeroth_axis(params, indices, scope=None):
+  """Matrix multiplication based implementation of tf.gather on zeroth axis.
+
+  TODO(rathodv, jonathanhuang): enable sparse matmul option.
+
+  Args:
+    params: A float32 Tensor. The tensor from which to gather values.
+      Must be at least rank 1.
+    indices: A Tensor. Must be one of the following types: int32, int64.
+      Must be in range [0, params.shape[0])
+    scope: A name for the operation (optional).
+
+  Returns:
+    A Tensor. Has the same type as params. Values from params gathered
+    from indices given by indices, with shape indices.shape + params.shape[1:].
+  """
+
+  params_shape = shape_utils.combined_static_and_dynamic_shape(params)
+  indices_shape = shape_utils.combined_static_and_dynamic_shape(indices)
+  params2d = tf.reshape(params, [params_shape[0], -1])
+  indicator_matrix = tf.one_hot(indices, params_shape[0])
+  gathered_result_flattened = tf.matmul(indicator_matrix, params2d)
+  return tf.reshape(gathered_result_flattened,
+                    tf.stack(indices_shape + params_shape[1:]))

+ 444 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/preprocessor.py

@@ -0,0 +1,444 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Preprocess images and bounding boxes for detection.
+
+We perform two sets of operations in preprocessing stage:
+(a) operations that are applied to both training and testing data,
+(b) operations that are applied only to training data for the purpose of
+    data augmentation.
+
+A preprocessing function receives a set of inputs,
+e.g. an image and bounding boxes,
+performs an operation on them, and returns them.
+Some examples are: randomly cropping the image, randomly mirroring the image,
+                   randomly changing the brightness, contrast, hue and
+                   randomly jittering the bounding boxes.
+
+The image is a rank 4 tensor: [1, height, width, channels] with
+dtype=tf.float32. The groundtruth_boxes is a rank 2 tensor: [N, 4] where
+in each row there is a box with [ymin xmin ymax xmax].
+Boxes are in normalized coordinates meaning
+their coordinate values range in [0, 1]
+
+Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing
+functions receive a rank 3 tensor for processing the image. Thus, inside the
+preprocess function we squeeze the image to become a rank 3 tensor and then
+we pass it to the functions. At the end of the preprocess we expand the image
+back to rank 4.
+"""
+
+import tensorflow as tf
+
+from mask_rcnn.object_detection import box_list
+
+
+def _flip_boxes_left_right(boxes):
+  """Left-right flip the boxes.
+
+  Args:
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+
+  Returns:
+    Flipped boxes.
+  """
+  ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)
+  flipped_xmin = tf.subtract(1.0, xmax)
+  flipped_xmax = tf.subtract(1.0, xmin)
+  flipped_boxes = tf.concat([ymin, flipped_xmin, ymax, flipped_xmax], 1)
+  return flipped_boxes
+
+
+def _flip_masks_left_right(masks):
+  """Left-right flip masks.
+
+  Args:
+    masks: rank 3 float32 tensor with shape
+      [num_instances, height, width] representing instance masks.
+
+  Returns:
+    flipped masks: rank 3 float32 tensor with shape
+      [num_instances, height, width] representing instance masks.
+  """
+  return masks[:, :, ::-1]
+
+
+def keypoint_flip_horizontal(keypoints, flip_point, flip_permutation,
+                             scope=None):
+  """Flips the keypoints horizontally around the flip_point.
+
+  This operation flips the x coordinate for each keypoint around the flip_point
+  and also permutes the keypoints in a manner specified by flip_permutation.
+
+  Args:
+    keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+    flip_point:  (float) scalar tensor representing the x coordinate to flip the
+      keypoints around.
+    flip_permutation: rank 1 int32 tensor containing the keypoint flip
+      permutation. This specifies the mapping from original keypoint indices
+      to the flipped keypoint indices. This is used primarily for keypoints
+      that are not reflection invariant. E.g. Suppose there are 3 keypoints
+      representing ['head', 'right_eye', 'left_eye'], then a logical choice for
+      flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
+      and 'right_eye' after a horizontal flip.
+    scope: name scope.
+
+  Returns:
+    new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+  """
+  
+  keypoints = tf.transpose(a=keypoints, perm=[1, 0, 2])
+  keypoints = tf.gather(keypoints, flip_permutation)
+  v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
+  u = flip_point * 2.0 - u
+  new_keypoints = tf.concat([v, u], 2)
+  new_keypoints = tf.transpose(a=new_keypoints, perm=[1, 0, 2])
+  return new_keypoints
+
+
+def random_horizontal_flip(image,
+                           boxes=None,
+                           masks=None,
+                           keypoints=None,
+                           keypoint_flip_permutation=None,
+                           seed=None):
+  """Randomly flips the image and detections horizontally.
+
+  The probability of flipping the image is 50%.
+
+  Args:
+    image: rank 3 float32 tensor with shape [height, width, channels].
+    boxes: (optional) rank 2 float32 tensor with shape [N, 4]
+           containing the bounding boxes.
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks. The masks
+           are of the same height, width as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]. The keypoints are in y-x
+               normalized coordinates.
+    keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
+                               permutation.
+    seed: random seed
+
+  Returns:
+    image: image which is the same shape as input image.
+
+    If boxes, masks, keypoints, and keypoint_flip_permutation are not None,
+    the function also returns the following tensors.
+
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+    keypoints: rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]
+
+  Raises:
+    ValueError: if keypoints are provided but keypoint_flip_permutation is not.
+  """
+
+  def _flip_image(image):
+    # flip image
+    image_flipped = tf.image.flip_left_right(image)
+    return image_flipped
+
+  if keypoints is not None and keypoint_flip_permutation is None:
+    raise ValueError(
+        'keypoints are provided but keypoints_flip_permutation is not provided')
+
+
+  result = []
+  # random variable defining whether to do flip or not
+  do_a_flip_random = tf.greater(tf.random.uniform([], seed=seed), 0.5)
+
+  # flip image
+  image = tf.cond(pred=do_a_flip_random, true_fn=lambda: _flip_image(image), false_fn=lambda: image)
+  result.append(image)
+
+  # flip boxes
+  if boxes is not None:
+    boxes = tf.cond(pred=do_a_flip_random, true_fn=lambda: _flip_boxes_left_right(boxes),
+                    false_fn=lambda: boxes)
+    result.append(boxes)
+
+  # flip masks
+  if masks is not None:
+    masks = tf.cond(pred=do_a_flip_random, true_fn=lambda: _flip_masks_left_right(masks),
+                    false_fn=lambda: masks)
+    result.append(masks)
+
+  # flip keypoints
+  if keypoints is not None and keypoint_flip_permutation is not None:
+    permutation = keypoint_flip_permutation
+    keypoints = tf.cond(
+        pred=do_a_flip_random,
+        true_fn=lambda: keypoint_flip_horizontal(keypoints, 0.5, permutation),
+        false_fn=lambda: keypoints)
+    result.append(keypoints)
+
+  return tuple(result)
+
+
+def _compute_new_static_size(image, min_dimension, max_dimension):
+  """Compute new static shape for resize_to_range method."""
+  image_shape = image.get_shape().as_list()
+  orig_height = image_shape[0]
+  orig_width = image_shape[1]
+  num_channels = image_shape[2]
+  orig_min_dim = min(orig_height, orig_width)
+  # Calculates the larger of the possible sizes
+  large_scale_factor = min_dimension / float(orig_min_dim)
+  # Scaling orig_(height|width) by large_scale_factor will make the smaller
+  # dimension equal to min_dimension, save for floating point rounding errors.
+  # For reasonably-sized images, taking the nearest integer will reliably
+  # eliminate this error.
+  large_height = int(round(orig_height * large_scale_factor))
+  large_width = int(round(orig_width * large_scale_factor))
+  large_size = [large_height, large_width]
+  if max_dimension:
+    # Calculates the smaller of the possible sizes, use that if the larger
+    # is too big.
+    orig_max_dim = max(orig_height, orig_width)
+    small_scale_factor = max_dimension / float(orig_max_dim)
+    # Scaling orig_(height|width) by small_scale_factor will make the larger
+    # dimension equal to max_dimension, save for floating point rounding
+    # errors. For reasonably-sized images, taking the nearest integer will
+    # reliably eliminate this error.
+    small_height = int(round(orig_height * small_scale_factor))
+    small_width = int(round(orig_width * small_scale_factor))
+    small_size = [small_height, small_width]
+    new_size = large_size
+    if max(large_size) > max_dimension:
+      new_size = small_size
+  else:
+    new_size = large_size
+  return tf.constant(new_size + [num_channels])
+
+
+def _compute_new_dynamic_size(image, min_dimension, max_dimension):
+  """Compute new dynamic shape for resize_to_range method."""
+  image_shape = tf.shape(input=image)
+  orig_height = tf.cast(image_shape[0], dtype=tf.float32)
+  orig_width = tf.cast(image_shape[1], dtype=tf.float32)
+  num_channels = image_shape[2]
+  orig_min_dim = tf.minimum(orig_height, orig_width)
+  # Calculates the larger of the possible sizes
+  min_dimension = tf.constant(min_dimension, dtype=tf.float32)
+  large_scale_factor = min_dimension / orig_min_dim
+  # Scaling orig_(height|width) by large_scale_factor will make the smaller
+  # dimension equal to min_dimension, save for floating point rounding errors.
+  # For reasonably-sized images, taking the nearest integer will reliably
+  # eliminate this error.
+  large_height = tf.cast(tf.round(orig_height * large_scale_factor), dtype=tf.int32)
+  large_width = tf.cast(tf.round(orig_width * large_scale_factor), dtype=tf.int32)
+  large_size = tf.stack([large_height, large_width])
+  if max_dimension:
+    # Calculates the smaller of the possible sizes, use that if the larger
+    # is too big.
+    orig_max_dim = tf.maximum(orig_height, orig_width)
+    max_dimension = tf.constant(max_dimension, dtype=tf.float32)
+    small_scale_factor = max_dimension / orig_max_dim
+    # Scaling orig_(height|width) by small_scale_factor will make the larger
+    # dimension equal to max_dimension, save for floating point rounding
+    # errors. For reasonably-sized images, taking the nearest integer will
+    # reliably eliminate this error.
+    small_height = tf.cast(tf.round(orig_height * small_scale_factor), dtype=tf.int32)
+    small_width = tf.cast(tf.round(orig_width * small_scale_factor), dtype=tf.int32)
+    small_size = tf.stack([small_height, small_width])
+    new_size = tf.cond(
+        pred=tf.cast(tf.reduce_max(input_tensor=large_size), dtype=tf.float32) > max_dimension,
+        true_fn=lambda: small_size, false_fn=lambda: large_size)
+  else:
+    new_size = large_size
+  return tf.stack(tf.unstack(new_size) + [num_channels])
+
+
+def resize_to_range(image,
+                    masks=None,
+                    min_dimension=None,
+                    max_dimension=None,
+                    method=tf.image.ResizeMethod.BILINEAR,
+                    align_corners=False,
+                    pad_to_max_dimension=False):
+  """Resizes an image so its dimensions are within the provided value.
+
+  The output size can be described by two cases:
+  1. If the image can be rescaled so its minimum dimension is equal to the
+     provided value without the other dimension exceeding max_dimension,
+     then do so.
+  2. Otherwise, resize so the largest dimension is equal to max_dimension.
+
+  Args:
+    image: A 3D tensor of shape [height, width, channels]
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks.
+    min_dimension: (optional) (scalar) desired size of the smaller image
+                   dimension.
+    max_dimension: (optional) (scalar) maximum allowed size
+                   of the larger image dimension.
+    method: (optional) interpolation method used in resizing. Defaults to
+            BILINEAR.
+    align_corners: bool. If true, exactly align all 4 corners of the input
+                   and output. Defaults to False.
+    pad_to_max_dimension: Whether to resize the image and pad it with zeros
+      so the resulting image is of the spatial size
+      [max_dimension, max_dimension]. If masks are included they are padded
+      similarly.
+
+  Returns:
+    Note that the position of the resized_image_shape changes based on whether
+    masks are present.
+    resized_image: A 3D tensor of shape [new_height, new_width, channels],
+      where the image has been resized (with bilinear interpolation) so that
+      min(new_height, new_width) == min_dimension or
+      max(new_height, new_width) == max_dimension.
+    resized_masks: If masks is not None, also outputs masks. A 3D tensor of
+      shape [num_instances, new_height, new_width].
+    resized_image_shape: A 1D tensor of shape [3] containing shape of the
+      resized image.
+
+  Raises:
+    ValueError: if the image is not a 3D tensor.
+  """
+  if len(image.get_shape()) != 3:
+    raise ValueError('Image should be 3D tensor')
+
+  
+  if image.get_shape().is_fully_defined():
+    new_size = _compute_new_static_size(image, min_dimension, max_dimension)
+  else:
+    new_size = _compute_new_dynamic_size(image, min_dimension, max_dimension)
+  new_image = tf.image.resize(
+      image, new_size[:-1], method=method)
+
+  if pad_to_max_dimension:
+    new_image = tf.image.pad_to_bounding_box(
+        new_image, 0, 0, max_dimension, max_dimension)
+
+  result = [new_image]
+  if masks is not None:
+    new_masks = tf.expand_dims(masks, 3)
+    new_masks = tf.image.resize(
+        new_masks,
+        new_size[:-1],
+        method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
+    new_masks = tf.squeeze(new_masks, 3)
+    if pad_to_max_dimension:
+      new_masks = tf.image.pad_to_bounding_box(
+          new_masks, 0, 0, max_dimension, max_dimension)
+    result.append(new_masks)
+
+  result.append(new_size)
+  return result
+
+
+def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
+  """Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to.
+
+  Args:
+    boxlist_to_copy_to: BoxList to which extra fields are copied.
+    boxlist_to_copy_from: BoxList from which fields are copied.
+
+  Returns:
+    boxlist_to_copy_to with extra fields.
+  """
+  for field in boxlist_to_copy_from.get_extra_fields():
+    boxlist_to_copy_to.add_field(field, boxlist_to_copy_from.get_field(field))
+  return boxlist_to_copy_to
+
+
+def box_list_scale(boxlist, y_scale, x_scale, scope=None):
+  """scale box coordinates in x and y dimensions.
+
+  Args:
+    boxlist: BoxList holding N boxes
+    y_scale: (float) scalar tensor
+    x_scale: (float) scalar tensor
+    scope: name scope.
+
+  Returns:
+    boxlist: BoxList holding N boxes
+  """
+
+  y_scale = tf.cast(y_scale, tf.float32)
+  x_scale = tf.cast(x_scale, tf.float32)
+  y_min, x_min, y_max, x_max = tf.split(
+      value=boxlist.get(), num_or_size_splits=4, axis=1)
+  y_min = y_scale * y_min
+  y_max = y_scale * y_max
+  x_min = x_scale * x_min
+  x_max = x_scale * x_max
+  scaled_boxlist = box_list.BoxList(
+      tf.concat([y_min, x_min, y_max, x_max], 1))
+  return _copy_extra_fields(scaled_boxlist, boxlist)
+
+
+def keypoint_scale(keypoints, y_scale, x_scale, scope=None):
+  """Scales keypoint coordinates in x and y dimensions.
+
+  Args:
+    keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+    y_scale: (float) scalar tensor
+    x_scale: (float) scalar tensor
+    scope: name scope.
+
+  Returns:
+    new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+  """
+
+  y_scale = tf.cast(y_scale, tf.float32)
+  x_scale = tf.cast(x_scale, tf.float32)
+  new_keypoints = keypoints * [[[y_scale, x_scale]]]
+  return new_keypoints
+
+
+def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
+  """Scales boxes from normalized to pixel coordinates.
+
+  Args:
+    image: A 3D float32 tensor of shape [height, width, channels].
+    boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding
+      boxes in normalized coordinates. Each row is of the form
+      [ymin, xmin, ymax, xmax].
+    keypoints: (optional) rank 3 float32 tensor with shape
+      [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
+      coordinates.
+
+  Returns:
+    image: unchanged input image.
+    scaled_boxes: a 2D float32 tensor of shape [num_boxes, 4] containing the
+      bounding boxes in pixel coordinates.
+    scaled_keypoints: a 3D float32 tensor with shape
+      [num_instances, num_keypoints, 2] containing the keypoints in pixel
+      coordinates.
+  """
+  boxlist = box_list.BoxList(boxes)
+  image_height = tf.shape(input=image)[0]
+  image_width = tf.shape(input=image)[1]
+  scaled_boxes = box_list_scale(boxlist, image_height, image_width).get()
+  result = [image, scaled_boxes]
+  if keypoints is not None:
+    scaled_keypoints = keypoint_scale(keypoints, image_height, image_width)
+    result.append(scaled_keypoints)
+  return tuple(result)

+ 138 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/region_similarity_calculator.py

@@ -0,0 +1,138 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Region Similarity Calculators for BoxLists.
+
+Region Similarity Calculators compare a pairwise measure of similarity
+between the boxes in two BoxLists.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+
+import tensorflow as tf
+
+
+def area(boxlist, scope=None):
+  """Computes area of boxes.
+
+  Args:
+    boxlist: BoxList holding N boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N] representing box areas.
+  """
+
+  y_min, x_min, y_max, x_max = tf.split(
+      value=boxlist.get(), num_or_size_splits=4, axis=1)
+  return tf.squeeze((y_max - y_min) * (x_max - x_min), [1])
+
+
+def intersection(boxlist1, boxlist2, scope=None):
+  """Compute pairwise intersection areas between boxes.
+
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding M boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N, M] representing pairwise intersections
+  """
+  
+  y_min1, x_min1, y_max1, x_max1 = tf.split(
+      value=boxlist1.get(), num_or_size_splits=4, axis=1)
+  y_min2, x_min2, y_max2, x_max2 = tf.split(
+      value=boxlist2.get(), num_or_size_splits=4, axis=1)
+  all_pairs_min_ymax = tf.minimum(y_max1, tf.transpose(a=y_max2))
+  all_pairs_max_ymin = tf.maximum(y_min1, tf.transpose(a=y_min2))
+  intersect_heights = tf.maximum(0.0, all_pairs_min_ymax - all_pairs_max_ymin)
+  all_pairs_min_xmax = tf.minimum(x_max1, tf.transpose(a=x_max2))
+  all_pairs_max_xmin = tf.maximum(x_min1, tf.transpose(a=x_min2))
+  intersect_widths = tf.maximum(0.0, all_pairs_min_xmax - all_pairs_max_xmin)
+  return intersect_heights * intersect_widths
+
+
+def iou(boxlist1, boxlist2, scope=None):
+  """Computes pairwise intersection-over-union between box collections.
+
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding M boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N, M] representing pairwise iou scores.
+  """
+
+  intersections = intersection(boxlist1, boxlist2)
+  areas1 = area(boxlist1)
+  areas2 = area(boxlist2)
+  unions = (
+      tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections)
+  return tf.where(
+      tf.equal(intersections, 0.0),
+      tf.zeros_like(intersections), tf.truediv(intersections, unions))
+
+
+class RegionSimilarityCalculator(object):
+  """Abstract base class for region similarity calculator."""
+  __metaclass__ = ABCMeta
+
+  def compare(self, boxlist1, boxlist2, scope=None):
+    """Computes matrix of pairwise similarity between BoxLists.
+
+    This op (to be overriden) computes a measure of pairwise similarity between
+    the boxes in the given BoxLists. Higher values indicate more similarity.
+
+    Note that this method simply measures similarity and does not explicitly
+    perform a matching.
+
+    Args:
+      boxlist1: BoxList holding N boxes.
+      boxlist2: BoxList holding M boxes.
+      scope: Op scope name. Defaults to 'Compare' if None.
+
+    Returns:
+      a (float32) tensor of shape [N, M] with pairwise similarity score.
+    """
+    
+    return self._compare(boxlist1, boxlist2)
+
+  @abstractmethod
+  def _compare(self, boxlist1, boxlist2):
+    pass
+
+
+class IouSimilarity(RegionSimilarityCalculator):
+  """Class to compute similarity based on Intersection over Union (IOU) metric.
+
+  This class computes pairwise similarity between two BoxLists based on IOU.
+  """
+
+  def _compare(self, boxlist1, boxlist2):
+    """Compute pairwise IOU similarity between the two BoxLists.
+
+    Args:
+      boxlist1: BoxList holding N boxes.
+      boxlist2: BoxList holding M boxes.
+
+    Returns:
+      A tensor with shape [N, M] representing pairwise iou scores.
+    """
+    return iou(boxlist1, boxlist2)

+ 86 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/shape_utils.py

@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Utils used to manipulate tensor shapes."""
+
+import tensorflow as tf
+
+
+def combined_static_and_dynamic_shape(tensor):
+  """Returns a list containing static and dynamic values for the dimensions.
+
+  Returns a list of static and dynamic values for shape dimensions. This is
+  useful to preserve static shapes when available in reshape operation.
+
+  Args:
+    tensor: A tensor of any type.
+
+  Returns:
+    A list of size tensor.shape.ndims containing integers or a scalar tensor.
+  """
+  static_tensor_shape = tensor.shape.as_list()
+  dynamic_tensor_shape = tf.shape(input=tensor)
+  combined_shape = []
+  for index, dim in enumerate(static_tensor_shape):
+    if dim is not None:
+      combined_shape.append(dim)
+    else:
+      combined_shape.append(dynamic_tensor_shape[index])
+  return combined_shape
+
+
+def pad_or_clip_nd(tensor, output_shape):
+  """Pad or Clip given tensor to the output shape.
+
+  Args:
+    tensor: Input tensor to pad or clip.
+    output_shape: A list of integers / scalar tensors (or None for dynamic dim)
+      representing the size to pad or clip each dimension of the input tensor.
+
+  Returns:
+    Input tensor padded and clipped to the output shape.
+  """
+  tensor_shape = tf.shape(input=tensor)
+  clip_size = [
+      tf.where(tensor_shape[i] - shape > 0, shape, -1)
+      if shape is not None else -1 for i, shape in enumerate(output_shape)
+  ]
+  clipped_tensor = tf.slice(
+      tensor,
+      begin=tf.zeros(len(clip_size), dtype=tf.int32),
+      size=clip_size)
+
+  # Pad tensor if the shape of clipped tensor is smaller than the expected
+  # shape.
+  clipped_tensor_shape = tf.shape(input=clipped_tensor)
+  trailing_paddings = [
+      shape - clipped_tensor_shape[i] if shape is not None else 0
+      for i, shape in enumerate(output_shape)
+  ]
+  paddings = tf.stack(
+      [
+          tf.zeros(len(trailing_paddings), dtype=tf.int32),
+          trailing_paddings
+      ],
+      axis=1)
+  padded_tensor = tf.pad(tensor=clipped_tensor, paddings=paddings)
+  output_static_shape = [
+      dim if not isinstance(dim, tf.Tensor) else None for dim in output_shape
+  ]
+  padded_tensor.set_shape(output_static_shape)
+  return padded_tensor

+ 308 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/target_assigner.py

@@ -0,0 +1,308 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Base target assigner module.
+
+The job of a TargetAssigner is, for a given set of anchors (bounding boxes) and
+groundtruth detections (bounding boxes), to assign classification and regression
+targets to each anchor as well as weights to each anchor (specifying, e.g.,
+which anchors should not contribute to training loss).
+
+It assigns classification/regression targets by performing the following steps:
+1) Computing pairwise similarity between anchors and groundtruth boxes using a
+  provided RegionSimilarity Calculator
+2) Computing a matching based on the similarity matrix using a provided Matcher
+3) Assigning regression targets based on the matching and a provided BoxCoder
+4) Assigning classification targets based on the matching and groundtruth labels
+
+Note that TargetAssigners only operate on detections from a single
+image at a time, so any logic for applying a TargetAssigner to multiple
+images must be handled externally.
+"""
+import tensorflow as tf
+
+from mask_rcnn.object_detection import box_list
+from mask_rcnn.object_detection import shape_utils
+
+KEYPOINTS_FIELD_NAME = 'keypoints'
+
+
+class TargetAssigner(object):
+    """Target assigner to compute classification and regression targets."""
+
+    def __init__(self, similarity_calc, matcher, box_coder,
+                 negative_class_weight=1.0, unmatched_cls_target=None):
+        """Construct Object Detection Target Assigner.
+
+        Args:
+          similarity_calc: a RegionSimilarityCalculator
+          matcher: Matcher used to match groundtruth to anchors.
+          box_coder: BoxCoder used to encode matching groundtruth boxes with
+            respect to anchors.
+          negative_class_weight: classification weight to be associated to negative
+            anchors (default: 1.0). The weight must be in [0., 1.].
+          unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
+            which is consistent with the classification target for each
+            anchor (and can be empty for scalar targets).  This shape must thus be
+            compatible with the groundtruth labels that are passed to the "assign"
+            function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
+            If set to None, unmatched_cls_target is set to be [0] for each anchor.
+
+        Raises:
+          ValueError: if similarity_calc is not a RegionSimilarityCalculator or
+            if matcher is not a Matcher or if box_coder is not a BoxCoder
+        """
+        self._similarity_calc = similarity_calc
+        self._matcher = matcher
+        self._box_coder = box_coder
+        self._negative_class_weight = negative_class_weight
+        if unmatched_cls_target is None:
+            self._unmatched_cls_target = tf.constant([0], tf.float32)
+        else:
+            self._unmatched_cls_target = unmatched_cls_target
+
+    @property
+    def box_coder(self):
+        return self._box_coder
+
+    def assign(self, anchors, groundtruth_boxes, groundtruth_labels=None,
+               groundtruth_weights=None, **params):
+        """Assign classification and regression targets to each anchor.
+
+        For a given set of anchors and groundtruth detections, match anchors
+        to groundtruth_boxes and assign classification and regression targets to
+        each anchor as well as weights based on the resulting match (specifying,
+        e.g., which anchors should not contribute to training loss).
+
+        Anchors that are not matched to anything are given a classification target
+        of self._unmatched_cls_target which can be specified via the constructor.
+
+        Args:
+          anchors: a BoxList representing N anchors
+          groundtruth_boxes: a BoxList representing M groundtruth boxes
+          groundtruth_labels:  a tensor of shape [M, d_1, ... d_k]
+            with labels for each of the ground_truth boxes. The subshape
+            [d_1, ... d_k] can be empty (corresponding to scalar inputs).  When set
+            to None, groundtruth_labels assumes a binary problem where all
+            ground_truth boxes get a positive label (of 1).
+          groundtruth_weights: a float tensor of shape [M] indicating the weight to
+            assign to all anchors match to a particular groundtruth box. The weights
+            must be in [0., 1.]. If None, all weights are set to 1.
+          **params: Additional keyword arguments for specific implementations of
+                  the Matcher.
+
+        Returns:
+          cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
+            where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
+            which has shape [num_gt_boxes, d_1, d_2, ... d_k].
+          cls_weights: a float32 tensor with shape [num_anchors]
+          reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
+          reg_weights: a float32 tensor with shape [num_anchors]
+          match: a matcher.Match object encoding the match between anchors and
+            groundtruth boxes, with rows corresponding to groundtruth boxes
+            and columns corresponding to anchors.
+
+        Raises:
+          ValueError: if anchors or groundtruth_boxes are not of type
+            box_list.BoxList
+        """
+        if not isinstance(anchors, box_list.BoxList):
+            raise ValueError('anchors must be an BoxList')
+
+        if not isinstance(groundtruth_boxes, box_list.BoxList):
+            raise ValueError('groundtruth_boxes must be an BoxList')
+
+        if groundtruth_labels is None:
+            groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(),
+                                                        0))
+            groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
+
+        if groundtruth_weights is None:
+            num_gt_boxes = groundtruth_boxes.num_boxes_static()
+
+            if not num_gt_boxes:
+                num_gt_boxes = groundtruth_boxes.num_boxes()
+
+            groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32)
+
+        match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes, anchors)
+        match = self._matcher.match(match_quality_matrix, **params)
+
+        reg_targets = self._create_regression_targets(anchors, groundtruth_boxes, match)
+        cls_targets = self._create_classification_targets(groundtruth_labels, match)
+
+        reg_weights = self._create_regression_weights(match, groundtruth_weights)
+        cls_weights = self._create_classification_weights(match, groundtruth_weights)
+
+        num_anchors = anchors.num_boxes_static()
+
+        if num_anchors is not None:
+            reg_targets = self._reset_target_shape(reg_targets, num_anchors)
+            cls_targets = self._reset_target_shape(cls_targets, num_anchors)
+            reg_weights = self._reset_target_shape(reg_weights, num_anchors)
+            cls_weights = self._reset_target_shape(cls_weights, num_anchors)
+
+        return cls_targets, cls_weights, reg_targets, reg_weights, match
+
+    def _reset_target_shape(self, target, num_anchors):
+        """Sets the static shape of the target.
+
+        Args:
+          target: the target tensor. Its first dimension will be overwritten.
+          num_anchors: the number of anchors, which is used to override the target's
+            first dimension.
+
+        Returns:
+          A tensor with the shape info filled in.
+        """
+        target_shape = target.get_shape().as_list()
+        target_shape[0] = num_anchors
+        target.set_shape(target_shape)
+        return target
+
+    def _create_regression_targets(self, anchors, groundtruth_boxes, match):
+        """Returns a regression target for each anchor.
+
+        Args:
+          anchors: a BoxList representing N anchors
+          groundtruth_boxes: a BoxList representing M groundtruth_boxes
+          match: a matcher.Match object
+
+        Returns:
+          reg_targets: a float32 tensor with shape [N, box_code_dimension]
+        """
+        matched_gt_boxes = match.gather_based_on_match(
+            groundtruth_boxes.get(),
+            unmatched_value=tf.zeros(4),
+            ignored_value=tf.zeros(4)
+        )
+
+        matched_gt_boxlist = box_list.BoxList(matched_gt_boxes)
+
+        if groundtruth_boxes.has_field(KEYPOINTS_FIELD_NAME):
+            groundtruth_keypoints = groundtruth_boxes.get_field(KEYPOINTS_FIELD_NAME)
+            matched_keypoints = match.gather_based_on_match(
+                groundtruth_keypoints,
+                unmatched_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]),
+                ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:])
+            )
+            matched_gt_boxlist.add_field(KEYPOINTS_FIELD_NAME, matched_keypoints)
+
+        matched_reg_targets = self._box_coder.encode(matched_gt_boxlist, anchors)
+        match_results_shape = shape_utils.combined_static_and_dynamic_shape(match.match_results)
+
+        # Zero out the unmatched and ignored regression targets.
+        unmatched_ignored_reg_targets = tf.tile(self._default_regression_target(), [match_results_shape[0], 1])
+        matched_anchors_mask = match.matched_column_indicator()
+
+        matched_anchors_mask = tf.expand_dims(matched_anchors_mask, axis=1)
+        matched_anchors_mask = tf.broadcast_to(matched_anchors_mask, shape=matched_reg_targets.get_shape())
+
+        reg_targets = tf.where(matched_anchors_mask, matched_reg_targets, unmatched_ignored_reg_targets)
+        return reg_targets
+
+    def _default_regression_target(self):
+        """Returns the default target for anchors to regress to.
+
+        Default regression targets are set to zero (though in
+        this implementation what these targets are set to should
+        not matter as the regression weight of any box set to
+        regress to the default target is zero).
+
+        Returns:
+          default_target: a float32 tensor with shape [1, box_code_dimension]
+        """
+        return tf.constant([self._box_coder.code_size * [0]], tf.float32)
+
+    def _create_classification_targets(self, groundtruth_labels, match):
+        """Create classification targets for each anchor.
+
+        Assign a classification target of for each anchor to the matching
+        groundtruth label that is provided by match.  Anchors that are not matched
+        to anything are given the target self._unmatched_cls_target
+
+        Args:
+          groundtruth_labels:  a tensor of shape [num_gt_boxes, d_1, ... d_k]
+            with labels for each of the ground_truth boxes. The subshape
+            [d_1, ... d_k] can be empty (corresponding to scalar labels).
+          match: a matcher.Match object that provides a matching between anchors
+            and groundtruth boxes.
+
+        Returns:
+          a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], where the
+          subshape [d_1, ..., d_k] is compatible with groundtruth_labels which has
+          shape [num_gt_boxes, d_1, d_2, ... d_k].
+        """
+        return match.gather_based_on_match(
+            groundtruth_labels,
+            unmatched_value=self._unmatched_cls_target,
+            ignored_value=self._unmatched_cls_target)
+
+    def _create_regression_weights(self, match, groundtruth_weights):
+        """Set regression weight for each anchor.
+
+        Only positive anchors are set to contribute to the regression loss, so this
+        method returns a weight of 1 for every positive anchor and 0 for every
+        negative anchor.
+
+        Args:
+          match: a matcher.Match object that provides a matching between anchors
+            and groundtruth boxes.
+          groundtruth_weights: a float tensor of shape [M] indicating the weight to
+            assign to all anchors match to a particular groundtruth box.
+
+        Returns:
+          a float32 tensor with shape [num_anchors] representing regression weights.
+        """
+        return match.gather_based_on_match(
+            groundtruth_weights, ignored_value=0., unmatched_value=0.)
+
+    def _create_classification_weights(self,
+                                       match,
+                                       groundtruth_weights):
+        """Create classification weights for each anchor.
+
+        Positive (matched) anchors are associated with a weight of
+        positive_class_weight and negative (unmatched) anchors are associated with
+        a weight of negative_class_weight. When anchors are ignored, weights are set
+        to zero. By default, both positive/negative weights are set to 1.0,
+        but they can be adjusted to handle class imbalance (which is almost always
+        the case in object detection).
+
+        Args:
+          match: a matcher.Match object that provides a matching between anchors
+            and groundtruth boxes.
+          groundtruth_weights: a float tensor of shape [M] indicating the weight to
+            assign to all anchors match to a particular groundtruth box.
+
+        Returns:
+          a float32 tensor with shape [num_anchors] representing classification
+          weights.
+        """
+        return match.gather_based_on_match(
+            groundtruth_weights,
+            ignored_value=0.,
+            unmatched_value=self._negative_class_weight)
+
+    def get_box_coder(self):
+        """Get BoxCoder of this TargetAssigner.
+
+        Returns:
+          BoxCoder object.
+        """
+        return self._box_coder

+ 153 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/tf_example_decoder.py

@@ -0,0 +1,153 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tensorflow Example proto decoder for object detection.
+
+A decoder to decode string tensors containing serialized tensorflow.Example
+protos for object detection.
+"""
+import tensorflow as tf
+
+
+def _get_source_id_from_encoded_image(parsed_tensors):
+    return tf.strings.as_string(tf.strings.to_hash_bucket_fast(parsed_tensors['image/encoded'], 2 ** 63 - 1))
+
+
+class TfExampleDecoder(object):
+    """Tensorflow Example proto decoder."""
+
+    def __init__(self, use_instance_mask=False, regenerate_source_id=False):
+        self._include_mask = use_instance_mask
+        self._regenerate_source_id = regenerate_source_id
+        self._keys_to_features = {
+            'image/encoded': tf.io.FixedLenFeature((), tf.string),
+            'image/source_id': tf.io.FixedLenFeature((), tf.string),
+            'image/height': tf.io.FixedLenFeature((), tf.int64),
+            'image/width': tf.io.FixedLenFeature((), tf.int64),
+            'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
+            'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
+            'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
+            'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
+            'image/object/class/label': tf.io.VarLenFeature(tf.int64),
+            'image/object/area': tf.io.VarLenFeature(tf.float32),
+            'image/object/is_crowd': tf.io.VarLenFeature(tf.int64),
+        }
+        if use_instance_mask:
+            self._keys_to_features.update({
+                'image/object/mask': tf.io.VarLenFeature(tf.string),
+            })
+
+    def _decode_image(self, parsed_tensors):
+        """Decodes the image and set its static shape."""
+        image = tf.io.decode_image(parsed_tensors['image/encoded'], channels=3)
+        image.set_shape([None, None, 3])
+        return image
+
+    def _decode_boxes(self, parsed_tensors):
+        """Concat box coordinates in the format of [ymin, xmin, ymax, xmax]."""
+        xmin = parsed_tensors['image/object/bbox/xmin']
+        xmax = parsed_tensors['image/object/bbox/xmax']
+        ymin = parsed_tensors['image/object/bbox/ymin']
+        ymax = parsed_tensors['image/object/bbox/ymax']
+        return tf.stack([ymin, xmin, ymax, xmax], axis=-1)
+
+    def _decode_masks(self, parsed_tensors):
+        """Decode a set of PNG masks to the tf.float32 tensors."""
+
+        def _decode_png_mask(png_bytes):
+            mask = tf.squeeze(tf.io.decode_png(png_bytes, channels=1, dtype=tf.uint8), axis=-1)
+            mask = tf.cast(mask, dtype=tf.float32)
+            mask.set_shape([None, None])
+            return mask
+
+        height = parsed_tensors['image/height']
+        width = parsed_tensors['image/width']
+        masks = parsed_tensors['image/object/mask']
+        return tf.cond(
+            tf.greater(tf.size(masks), 0),
+            lambda: tf.map_fn(_decode_png_mask, masks, dtype=tf.float32),
+            lambda: tf.zeros([0, height, width], dtype=tf.float32)
+        )
+
+    def decode(self, serialized_example):
+        """Decode the serialized example.
+
+    Args:
+      serialized_example: a single serialized tf.Example string.
+
+    Returns:
+      decoded_tensors: a dictionary of tensors with the following fields:
+        - image: a uint8 tensor of shape [None, None, 3].
+        - source_id: a string scalar tensor.
+        - height: an integer scalar tensor.
+        - width: an integer scalar tensor.
+        - groundtruth_classes: a int64 tensor of shape [None].
+        - groundtruth_is_crowd: a bool tensor of shape [None].
+        - groundtruth_area: a float32 tensor of shape [None].
+        - groundtruth_boxes: a float32 tensor of shape [None, 4].
+        - groundtruth_instance_masks: a float32 tensor of shape
+            [None, None, None].
+        - groundtruth_instance_masks_png: a string tensor of shape [None].
+    """
+        parsed_tensors = tf.io.parse_single_example(
+            serialized_example, self._keys_to_features)
+        for k in parsed_tensors:
+            if isinstance(parsed_tensors[k], tf.SparseTensor):
+                if parsed_tensors[k].dtype == tf.string:
+                    parsed_tensors[k] = tf.sparse.to_dense(
+                        parsed_tensors[k], default_value='')
+                else:
+                    parsed_tensors[k] = tf.sparse.to_dense(
+                        parsed_tensors[k], default_value=0)
+
+        image = self._decode_image(parsed_tensors)
+        boxes = self._decode_boxes(parsed_tensors)
+        is_crowd = tf.cast(parsed_tensors['image/object/is_crowd'], dtype=tf.bool)
+
+        if self._include_mask:
+            masks = self._decode_masks(parsed_tensors)
+
+        if self._regenerate_source_id:
+            source_id = _get_source_id_from_encoded_image(parsed_tensors)
+
+        else:
+            source_id = tf.cond(
+                tf.greater(tf.strings.length(parsed_tensors['image/source_id']), 0),
+                lambda: parsed_tensors['image/source_id'],
+                lambda: _get_source_id_from_encoded_image(parsed_tensors)
+            )
+
+        decoded_tensors = {
+            'image': image,
+            # 'source_id': parsed_tensors['image/source_id'],
+            'source_id': source_id,
+            'height': parsed_tensors['image/height'],
+            'width': parsed_tensors['image/width'],
+            'groundtruth_classes': parsed_tensors['image/object/class/label'],
+            'groundtruth_is_crowd': is_crowd,
+            'groundtruth_area': parsed_tensors['image/object/area'],
+            'groundtruth_boxes': boxes,
+        }
+
+        if self._include_mask:
+            decoded_tensors.update({
+                'groundtruth_instance_masks': masks,
+                'groundtruth_instance_masks_png': parsed_tensors['image/object/mask'],
+            })
+
+        return decoded_tensors

+ 417 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/object_detection/visualization_utils.py

@@ -0,0 +1,417 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""A set of functions that are used for visualization.
+
+These functions often receive an image, perform some visualization on the image.
+The functions do not return a value, instead they modify the image itself.
+
+"""
+import collections
+import functools
+
+import six
+
+import matplotlib
+import matplotlib.pyplot as plt
+import PIL.Image as Image
+import PIL.ImageColor as ImageColor
+import PIL.ImageDraw as ImageDraw
+import PIL.ImageFont as ImageFont
+
+import numpy as np
+import tensorflow as tf
+
+from mask_rcnn.object_detection import shape_utils
+
+# Set headless-friendly backend.
+matplotlib.use('Agg')
+
+
+_TITLE_LEFT_MARGIN = 10
+_TITLE_TOP_MARGIN = 10
+STANDARD_COLORS = [
+    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
+    'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
+    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
+    'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
+    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
+    'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
+    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
+    'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
+    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
+    'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
+    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
+    'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
+    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
+    'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
+    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
+    'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
+    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
+    'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
+    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
+    'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
+    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
+    'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
+    'WhiteSmoke', 'Yellow', 'YellowGreen'
+]
+
+
+def draw_bounding_box_on_image_array(image,
+                                     ymin,
+                                     xmin,
+                                     ymax,
+                                     xmax,
+                                     color='red',
+                                     thickness=4,
+                                     display_str_list=(),
+                                     use_normalized_coordinates=True):
+  """Adds a bounding box to an image (numpy array).
+
+  Bounding box coordinates can be specified in either absolute (pixel) or
+  normalized coordinates by setting the use_normalized_coordinates argument.
+
+  Args:
+    image: a numpy array with shape [height, width, 3].
+    ymin: ymin of bounding box.
+    xmin: xmin of bounding box.
+    ymax: ymax of bounding box.
+    xmax: xmax of bounding box.
+    color: color to draw bounding box. Default is red.
+    thickness: line thickness. Default value is 4.
+    display_str_list: list of strings to display in box
+                      (each to be shown on its own line).
+    use_normalized_coordinates: If True (default), treat coordinates
+      ymin, xmin, ymax, xmax as relative to the image.  Otherwise treat
+      coordinates as absolute.
+  """
+  image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
+  draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color,
+                             thickness, display_str_list,
+                             use_normalized_coordinates)
+  np.copyto(image, np.array(image_pil))
+
+
+def draw_bounding_box_on_image(image,
+                               ymin,
+                               xmin,
+                               ymax,
+                               xmax,
+                               color='red',
+                               thickness=4,
+                               display_str_list=(),
+                               use_normalized_coordinates=True):
+  """Adds a bounding box to an image.
+
+  Bounding box coordinates can be specified in either absolute (pixel) or
+  normalized coordinates by setting the use_normalized_coordinates argument.
+
+  Each string in display_str_list is displayed on a separate line above the
+  bounding box in black text on a rectangle filled with the input 'color'.
+  If the top of the bounding box extends to the edge of the image, the strings
+  are displayed below the bounding box.
+
+  Args:
+    image: a PIL.Image object.
+    ymin: ymin of bounding box.
+    xmin: xmin of bounding box.
+    ymax: ymax of bounding box.
+    xmax: xmax of bounding box.
+    color: color to draw bounding box. Default is red.
+    thickness: line thickness. Default value is 4.
+    display_str_list: list of strings to display in box
+                      (each to be shown on its own line).
+    use_normalized_coordinates: If True (default), treat coordinates
+      ymin, xmin, ymax, xmax as relative to the image.  Otherwise treat
+      coordinates as absolute.
+  """
+  draw = ImageDraw.Draw(image)
+  im_width, im_height = image.size
+  if use_normalized_coordinates:
+    (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
+                                  ymin * im_height, ymax * im_height)
+  else:
+    (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
+  draw.line([(left, top), (left, bottom), (right, bottom),
+             (right, top), (left, top)], width=thickness, fill=color)
+  try:
+    font = ImageFont.truetype('arial.ttf', 24)
+  except IOError:
+    font = ImageFont.load_default()
+
+  # If the total height of the display strings added to the top of the bounding
+  # box exceeds the top of the image, stack the strings below the bounding box
+  # instead of above.
+  display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]
+  # Each display_str has a top and bottom margin of 0.05x.
+  total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)
+
+  if top > total_display_str_height:
+    text_bottom = top
+  else:
+    text_bottom = bottom + total_display_str_height
+  # Reverse list and print from bottom to top.
+  for display_str in display_str_list[::-1]:
+    text_width, text_height = font.getsize(display_str)
+    margin = np.ceil(0.05 * text_height)
+    draw.rectangle(
+        [(left, text_bottom - text_height - 2 * margin), (left + text_width,
+                                                          text_bottom)],
+        fill=color)
+    draw.text(
+        (left + margin, text_bottom - text_height - margin),
+        display_str,
+        fill='black',
+        font=font)
+    text_bottom -= text_height - 2 * margin
+
+
+def draw_keypoints_on_image_array(image,
+                                  keypoints,
+                                  color='red',
+                                  radius=2,
+                                  use_normalized_coordinates=True):
+  """Draws keypoints on an image (numpy array).
+
+  Args:
+    image: a numpy array with shape [height, width, 3].
+    keypoints: a numpy array with shape [num_keypoints, 2].
+    color: color to draw the keypoints with. Default is red.
+    radius: keypoint radius. Default value is 2.
+    use_normalized_coordinates: if True (default), treat keypoint values as
+      relative to the image.  Otherwise treat them as absolute.
+  """
+  image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
+  draw_keypoints_on_image(image_pil, keypoints, color, radius,
+                          use_normalized_coordinates)
+  np.copyto(image, np.array(image_pil))
+
+
+def draw_keypoints_on_image(image,
+                            keypoints,
+                            color='red',
+                            radius=2,
+                            use_normalized_coordinates=True):
+  """Draws keypoints on an image.
+
+  Args:
+    image: a PIL.Image object.
+    keypoints: a numpy array with shape [num_keypoints, 2].
+    color: color to draw the keypoints with. Default is red.
+    radius: keypoint radius. Default value is 2.
+    use_normalized_coordinates: if True (default), treat keypoint values as
+      relative to the image.  Otherwise treat them as absolute.
+  """
+  draw = ImageDraw.Draw(image)
+  im_width, im_height = image.size
+  keypoints_x = [k[1] for k in keypoints]
+  keypoints_y = [k[0] for k in keypoints]
+
+  if use_normalized_coordinates:
+    keypoints_x = tuple([im_width * x for x in keypoints_x])
+    keypoints_y = tuple([im_height * y for y in keypoints_y])
+
+  for keypoint_x, keypoint_y in zip(keypoints_x, keypoints_y):
+    draw.ellipse([(keypoint_x - radius, keypoint_y - radius),
+                  (keypoint_x + radius, keypoint_y + radius)],
+                 outline=color, fill=color)
+
+
+def draw_mask_on_image_array(image, mask, color='red', alpha=0.4):
+  """Draws mask on an image.
+
+  Args:
+    image: uint8 numpy array with shape (img_height, img_height, 3)
+    mask: a uint8 numpy array of shape (img_height, img_height) with
+      values between either 0 or 1.
+    color: color to draw the keypoints with. Default is red.
+    alpha: transparency value between 0 and 1. (default: 0.4)
+
+  Raises:
+    ValueError: On incorrect data type for image or masks.
+  """
+  if image.dtype != np.uint8:
+    raise ValueError('`image` not of type np.uint8')
+  if mask.dtype != np.uint8:
+    raise ValueError('`mask` not of type np.uint8')
+  if np.any(np.logical_and(mask != 1, mask != 0)):
+    raise ValueError('`mask` elements should be in [0, 1]')
+  if image.shape[:2] != mask.shape:
+    raise ValueError('The image has spatial dimensions %s but the mask has '
+                     'dimensions %s' % (image.shape[:2], mask.shape))
+  rgb = ImageColor.getrgb(color)
+  pil_image = Image.fromarray(image)
+
+  solid_color = np.expand_dims(np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3])
+
+  pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA')
+  pil_mask = Image.fromarray(np.uint8(255.0*alpha*mask)).convert('L')
+  pil_image = Image.composite(pil_solid_color, pil_image, pil_mask)
+
+  np.copyto(image, np.array(pil_image.convert('RGB')))
+
+
+def visualize_boxes_and_labels_on_image_array(
+    image,
+    boxes,
+    classes,
+    scores,
+    category_index,
+    instance_masks=None,
+    instance_boundaries=None,
+    keypoints=None,
+    use_normalized_coordinates=False,
+    max_boxes_to_draw=20,
+    min_score_thresh=.5,
+    agnostic_mode=False,
+    line_thickness=4,
+    groundtruth_box_visualization_color='black',
+    skip_scores=False,
+    skip_labels=False):
+  """Overlay labeled boxes on an image with formatted scores and label names.
+
+  This function groups boxes that correspond to the same location
+  and creates a display string for each detection and overlays these
+  on the image. Note that this function modifies the image in place, and returns
+  that same image.
+
+  Args:
+    image: uint8 numpy array with shape (img_height, img_width, 3)
+    boxes: a numpy array of shape [N, 4]
+    classes: a numpy array of shape [N]. Note that class indices are 1-based,
+      and match the keys in the label map.
+    scores: a numpy array of shape [N] or None.  If scores=None, then
+      this function assumes that the boxes to be plotted are groundtruth
+      boxes and plot all boxes as black with no classes or scores.
+    category_index: a dict containing category dictionaries (each holding
+      category index `id` and category name `name`) keyed by category indices.
+    instance_masks: a numpy array of shape [N, image_height, image_width] with
+      values ranging between 0 and 1, can be None.
+    instance_boundaries: a numpy array of shape [N, image_height, image_width]
+      with values ranging between 0 and 1, can be None.
+    keypoints: a numpy array of shape [N, num_keypoints, 2], can
+      be None
+    use_normalized_coordinates: whether boxes is to be interpreted as
+      normalized coordinates or not.
+    max_boxes_to_draw: maximum number of boxes to visualize.  If None, draw
+      all boxes.
+    min_score_thresh: minimum score threshold for a box to be visualized
+    agnostic_mode: boolean (default: False) controlling whether to evaluate in
+      class-agnostic mode or not.  This mode will display scores but ignore
+      classes.
+    line_thickness: integer (default: 4) controlling line width of the boxes.
+    groundtruth_box_visualization_color: box color for visualizing groundtruth
+      boxes
+    skip_scores: whether to skip score when drawing a single detection
+    skip_labels: whether to skip label when drawing a single detection
+
+  Returns:
+    uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes.
+  """
+  # Create a display string (and color) for every box location, group any boxes
+  # that correspond to the same location.
+  box_to_display_str_map = collections.defaultdict(list)
+  box_to_color_map = collections.defaultdict(str)
+  box_to_instance_masks_map = {}
+  box_to_instance_boundaries_map = {}
+  box_to_keypoints_map = collections.defaultdict(list)
+
+  if not max_boxes_to_draw:
+    max_boxes_to_draw = boxes.shape[0]
+
+  for i in range(min(max_boxes_to_draw, boxes.shape[0])):
+
+    if scores is None or scores[i] > min_score_thresh:
+      box = tuple(boxes[i].tolist())
+
+      if instance_masks is not None:
+        box_to_instance_masks_map[box] = instance_masks[i]
+
+      if instance_boundaries is not None:
+        box_to_instance_boundaries_map[box] = instance_boundaries[i]
+
+      if keypoints is not None:
+        box_to_keypoints_map[box].extend(keypoints[i])
+
+      if scores is None:
+        box_to_color_map[box] = groundtruth_box_visualization_color
+      else:
+        display_str = ''
+
+        if not skip_labels:
+          if not agnostic_mode:
+            if classes[i] in category_index.keys():
+              class_name = category_index[classes[i]]['name']
+            else:
+              class_name = 'N/A'
+
+            display_str = str(class_name)
+
+        if not skip_scores:
+
+          if not display_str:
+            display_str = '{}%'.format(int(100*scores[i]))
+          else:
+            display_str = '{}: {}%'.format(display_str, int(100*scores[i]))
+
+        box_to_display_str_map[box].append(display_str)
+
+        if agnostic_mode:
+          box_to_color_map[box] = 'DarkOrange'
+        else:
+          box_to_color_map[box] = STANDARD_COLORS[classes[i] % len(STANDARD_COLORS)]
+
+  # Draw all boxes onto image.
+  for box, color in box_to_color_map.items():
+    ymin, xmin, ymax, xmax = box
+
+    if instance_masks is not None:
+      draw_mask_on_image_array(
+          image,
+          box_to_instance_masks_map[box],
+          color=color
+      )
+
+    if instance_boundaries is not None:
+      draw_mask_on_image_array(
+          image,
+          box_to_instance_boundaries_map[box],
+          color='red',
+          alpha=1.0
+      )
+
+    draw_bounding_box_on_image_array(
+        image,
+        ymin,
+        xmin,
+        ymax,
+        xmax,
+        color=color,
+        thickness=line_thickness,
+        display_str_list=box_to_display_str_map[box],
+        use_normalized_coordinates=use_normalized_coordinates)
+
+    if keypoints is not None:
+      draw_keypoints_on_image_array(
+          image,
+          box_to_keypoints_map[box],
+          color=color,
+          radius=line_thickness / 2,
+          use_normalized_coordinates=use_normalized_coordinates)
+
+  return image

+ 0 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/ops/__init__.py


+ 503 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/ops/box_utils.py

@@ -0,0 +1,503 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utility functions for bounding box processing."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import tensorflow as tf
+
+
+EPSILON = 1e-8
+BBOX_XFORM_CLIP = np.log(1000. / 16.)
+
+
+def jitter_boxes(boxes, noise_scale=0.025):
+  """Jitter the box coordinates by some noise distribution.
+
+  Args:
+    boxes: a tensor whose last dimension is 4 representing the coordinates
+      of boxes in ymin, xmin, ymax, xmax order.
+    noise_scale: a python float which specifies the magnitude of noise. The
+      rule of thumb is to set this between (0, 0.1]. The default value is found
+      to mimic the noisy detections best empirically.
+
+  Returns:
+    jittered_boxes: a tensor whose shape is the same as `boxes` representing
+      the jittered boxes.
+
+  Raises:
+    ValueError: If the last dimension of boxes is not 4.
+  """
+  if boxes.shape[-1] != 4:
+    raise ValueError(
+        'boxes.shape[-1] is {:d}, but must be 4.'.format(boxes.shape[-1]))
+
+  with tf.name_scope('jitter_boxes'):
+    bbox_jitters = tf.random_normal(boxes.get_shape(), stddev=noise_scale)
+    ymin = boxes[..., 0:1]
+    xmin = boxes[..., 1:2]
+    ymax = boxes[..., 2:3]
+    xmax = boxes[..., 3:4]
+    width = xmax - xmin
+    height = ymax - ymin
+    new_center_x = (xmin + xmax) / 2.0 + bbox_jitters[..., 0:1] * width
+    new_center_y = (ymin + ymax) / 2.0 + bbox_jitters[..., 1:2] * height
+    new_width = width * tf.exp(bbox_jitters[..., 2:3])
+    new_height = height * tf.exp(bbox_jitters[..., 3:4])
+    jittered_boxes = tf.concat([
+        new_center_y - new_height * 0.5,
+        new_center_x - new_width * 0.5,
+        new_center_y + new_height * 0.5,
+        new_center_x + new_width * 0.5], axis=-1)
+
+    return jittered_boxes
+
+
+def normalize_boxes(boxes, image_shape):
+  """Converts boxes to the normalized coordinates.
+
+  Args:
+    boxes: a tensor whose last dimension is 4 representing the coordinates
+      of boxes in ymin, xmin, ymax, xmax order.
+    image_shape: a list of two integers, a two-element vector or a tensor such
+      that all but the last dimensions are `broadcastable` to `boxes`. The last
+      dimension is 2, which represents [height, width].
+
+  Returns:
+    normalized_boxes: a tensor whose shape is the same as `boxes` representing
+      the normalized boxes.
+
+  Raises:
+    ValueError: If the last dimension of boxes is not 4.
+  """
+  if boxes.shape[-1] != 4:
+    raise ValueError(
+        'boxes.shape[-1] is {:d}, but must be 4.'.format(boxes.shape[-1]))
+
+  with tf.name_scope('normalize_boxes'):
+    if isinstance(image_shape, list) or isinstance(image_shape, tuple):
+      height, width = image_shape
+    else:
+      image_shape = tf.cast(image_shape, dtype=boxes.dtype)
+      height = image_shape[..., 0:1]
+      width = image_shape[..., 1:2]
+
+    ymin = boxes[..., 0:1] / height
+    xmin = boxes[..., 1:2] / width
+    ymax = boxes[..., 2:3] / height
+    xmax = boxes[..., 3:4] / width
+
+    normalized_boxes = tf.concat([ymin, xmin, ymax, xmax], axis=-1)
+    return normalized_boxes
+
+
+def denormalize_boxes(boxes, image_shape):
+  """Converts boxes normalized by [height, width] to pixel coordinates.
+
+  Args:
+    boxes: a tensor whose last dimension is 4 representing the coordinates
+      of boxes in ymin, xmin, ymax, xmax order.
+    image_shape: a list of two integers, a two-element vector or a tensor such
+      that all but the last dimensions are `broadcastable` to `boxes`. The last
+      dimension is 2, which represents [height, width].
+
+  Returns:
+    denormalized_boxes: a tensor whose shape is the same as `boxes` representing
+      the denormalized boxes.
+
+  Raises:
+    ValueError: If the last dimension of boxes is not 4.
+  """
+  with tf.name_scope('denormalize_boxes'):
+    if isinstance(image_shape, list) or isinstance(image_shape, tuple):
+      height, width = image_shape
+    else:
+      image_shape = tf.cast(image_shape, dtype=boxes.dtype)
+      height, width = tf.split(image_shape, 2, axis=-1)
+
+    ymin, xmin, ymax, xmax = tf.split(boxes, 4, axis=-1)
+    ymin = ymin * height
+    xmin = xmin * width
+    ymax = ymax * height
+    xmax = xmax * width
+
+    denormalized_boxes = tf.concat([ymin, xmin, ymax, xmax], axis=-1)
+    return denormalized_boxes
+
+
+def clip_boxes(boxes, image_shape):
+  """Clips boxes to image boundaries.
+
+  Args:
+    boxes: a tensor whose last dimension is 4 representing the coordinates
+      of boxes in ymin, xmin, ymax, xmax order.
+    image_shape: a list of two integers, a two-element vector or a tensor such
+      that all but the last dimensions are `broadcastable` to `boxes`. The last
+      dimension is 2, which represents [height, width].
+
+  Returns:
+    clipped_boxes: a tensor whose shape is the same as `boxes` representing the
+      clipped boxes.
+
+  Raises:
+    ValueError: If the last dimension of boxes is not 4.
+  """
+  if boxes.shape[-1] != 4:
+    raise ValueError(
+        'boxes.shape[-1] is {:d}, but must be 4.'.format(boxes.shape[-1]))
+
+  with tf.name_scope('clip_boxes'):
+    if isinstance(image_shape, list) or isinstance(image_shape, tuple):
+      height, width = image_shape
+    else:
+      image_shape = tf.cast(image_shape, dtype=boxes.dtype)
+      height = image_shape[..., 0:1]
+      width = image_shape[..., 1:2]
+
+    ymin = boxes[..., 0:1]
+    xmin = boxes[..., 1:2]
+    ymax = boxes[..., 2:3]
+    xmax = boxes[..., 3:4]
+
+    clipped_ymin = tf.maximum(tf.minimum(ymin, height - 1.0), 0.0)
+    clipped_ymax = tf.maximum(tf.minimum(ymax, height - 1.0), 0.0)
+    clipped_xmin = tf.maximum(tf.minimum(xmin, width - 1.0), 0.0)
+    clipped_xmax = tf.maximum(tf.minimum(xmax, width - 1.0), 0.0)
+
+    clipped_boxes = tf.concat(
+        [clipped_ymin, clipped_xmin, clipped_ymax, clipped_xmax],
+        axis=-1)
+    return clipped_boxes
+
+
+def compute_outer_boxes(boxes, image_shape, scale=1.0):
+  """Compute outer box encloses an object with a margin.
+
+  Args:
+    boxes: a tensor whose last dimension is 4 representing the coordinates
+      of boxes in ymin, xmin, ymax, xmax order.
+    image_shape: a list of two integers, a two-element vector or a tensor such
+      that all but the last dimensions are `broadcastable` to `boxes`. The last
+      dimension is 2, which represents [height, width].
+    scale: a float number specifying the scale of output outer boxes to input
+      `boxes`.
+
+  Returns:
+    outer_boxes: a tensor whose shape is the same as `boxes` representing the
+      outer boxes.
+  """
+  if scale < 1.0:
+    raise ValueError(
+        'scale is {}, but outer box scale must be greater than 1.0.'.format(
+            scale))
+  centers_y = (boxes[..., 0] + boxes[..., 2]) / 2.0
+  centers_x = (boxes[..., 1] + boxes[..., 3]) / 2.0
+  box_height = (boxes[..., 2] - boxes[..., 0]) * scale
+  box_width = (boxes[..., 3] - boxes[..., 1]) * scale
+  outer_boxes = tf.stack([centers_y - box_height / 2.0,
+                          centers_x - box_width / 2.0,
+                          centers_y + box_height / 2.0,
+                          centers_x + box_width / 2.0], axis=1)
+  outer_boxes = clip_boxes(outer_boxes, image_shape)
+  return outer_boxes
+
+
+def encode_boxes(boxes, anchors, weights=None):
+  """Encode boxes to targets.
+
+  Args:
+    boxes: a tensor whose last dimension is 4 representing the coordinates
+      of boxes in ymin, xmin, ymax, xmax order.
+    anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`,
+      representing the coordinates of anchors in ymin, xmin, ymax, xmax order.
+    weights: None or a list of four float numbers used to scale coordinates.
+
+  Returns:
+    encoded_boxes: a tensor whose shape is the same as `boxes` representing the
+      encoded box targets.
+
+  Raises:
+    ValueError: If the last dimension of boxes is not 4.
+  """
+  if boxes.shape[-1] != 4:
+    raise ValueError(
+        'boxes.shape[-1] is {:d}, but must be 4.'.format(boxes.shape[-1]))
+
+  with tf.name_scope('encode_boxes'):
+    boxes = tf.cast(boxes, dtype=anchors.dtype)
+    ymin = boxes[..., 0:1]
+    xmin = boxes[..., 1:2]
+    ymax = boxes[..., 2:3]
+    xmax = boxes[..., 3:4]
+    box_h = ymax - ymin + 1.0
+    box_w = xmax - xmin + 1.0
+    box_yc = ymin + 0.5 * box_h
+    box_xc = xmin + 0.5 * box_w
+
+    anchor_ymin = anchors[..., 0:1]
+    anchor_xmin = anchors[..., 1:2]
+    anchor_ymax = anchors[..., 2:3]
+    anchor_xmax = anchors[..., 3:4]
+    anchor_h = anchor_ymax - anchor_ymin + 1.0
+    anchor_w = anchor_xmax - anchor_xmin + 1.0
+    anchor_yc = anchor_ymin + 0.5 * anchor_h
+    anchor_xc = anchor_xmin + 0.5 * anchor_w
+
+    encoded_dy = (box_yc - anchor_yc) / anchor_h
+    encoded_dx = (box_xc - anchor_xc) / anchor_w
+    encoded_dh = tf.log(box_h / anchor_h)
+    encoded_dw = tf.log(box_w / anchor_w)
+    if weights:
+      encoded_dy *= weights[0]
+      encoded_dx *= weights[1]
+      encoded_dh *= weights[2]
+      encoded_dw *= weights[3]
+
+    encoded_boxes = tf.concat(
+        [encoded_dy, encoded_dx, encoded_dh, encoded_dw],
+        axis=-1)
+    return encoded_boxes
+
+
+def decode_boxes(encoded_boxes, anchors, weights=None):
+  """Decode boxes.
+
+  Args:
+    encoded_boxes: a tensor whose last dimension is 4 representing the
+      coordinates of encoded boxes in ymin, xmin, ymax, xmax order.
+    anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`,
+      representing the coordinates of anchors in ymin, xmin, ymax, xmax order.
+    weights: None or a list of four float numbers used to scale coordinates.
+
+  Returns:
+    encoded_boxes: a tensor whose shape is the same as `boxes` representing the
+      decoded box targets.
+  """
+  if encoded_boxes.shape[-1] != 4:
+    raise ValueError(
+        'encoded_boxes.shape[-1] is {:d}, but must be 4.'
+        .format(encoded_boxes.shape[-1]))
+
+  with tf.name_scope('decode_boxes'):
+    encoded_boxes = tf.cast(encoded_boxes, dtype=anchors.dtype)
+    dy = encoded_boxes[..., 0:1]
+    dx = encoded_boxes[..., 1:2]
+    dh = encoded_boxes[..., 2:3]
+    dw = encoded_boxes[..., 3:4]
+    if weights:
+      dy /= weights[0]
+      dx /= weights[1]
+      dh /= weights[2]
+      dw /= weights[3]
+    dh = tf.minimum(dh, BBOX_XFORM_CLIP)
+    dw = tf.minimum(dw, BBOX_XFORM_CLIP)
+
+    anchor_ymin = anchors[..., 0:1]
+    anchor_xmin = anchors[..., 1:2]
+    anchor_ymax = anchors[..., 2:3]
+    anchor_xmax = anchors[..., 3:4]
+    anchor_h = anchor_ymax - anchor_ymin + 1.0
+    anchor_w = anchor_xmax - anchor_xmin + 1.0
+    anchor_yc = anchor_ymin + 0.5 * anchor_h
+    anchor_xc = anchor_xmin + 0.5 * anchor_w
+
+    decoded_boxes_yc = dy * anchor_h + anchor_yc
+    decoded_boxes_xc = dx * anchor_w + anchor_xc
+    decoded_boxes_h = tf.exp(dh) * anchor_h
+    decoded_boxes_w = tf.exp(dw) * anchor_w
+
+    decoded_boxes_ymin = decoded_boxes_yc - 0.5 * decoded_boxes_h
+    decoded_boxes_xmin = decoded_boxes_xc - 0.5 * decoded_boxes_w
+    decoded_boxes_ymax = decoded_boxes_ymin + decoded_boxes_h - 1.0
+    decoded_boxes_xmax = decoded_boxes_xmin + decoded_boxes_w - 1.0
+
+    decoded_boxes = tf.concat(
+        [decoded_boxes_ymin, decoded_boxes_xmin,
+         decoded_boxes_ymax, decoded_boxes_xmax],
+        axis=-1)
+    return decoded_boxes
+
+
+def filter_boxes(boxes, scores, image_shape, min_size_threshold):
+  """Filter and remove boxes that are too small or fall outside the image.
+
+  Args:
+    boxes: a tensor whose last dimension is 4 representing the
+      coordinates of boxes in ymin, xmin, ymax, xmax order.
+    scores: a tensor whose shape is the same as tf.shape(boxes)[:-1]
+      representing the original scores of the boxes.
+    image_shape: a tensor whose shape is the same as, or `broadcastable` to
+      `boxes` except the last dimension, which is 2, representing
+      [height, width] of the scaled image.
+    min_size_threshold: a float representing the minimal box size in each
+      side (w.r.t. the scaled image). Boxes whose sides are smaller than it will
+      be filtered out.
+
+  Returns:
+    filtered_boxes: a tensor whose shape is the same as `boxes` but with
+      the position of the filtered boxes are filled with 0.
+    filtered_scores: a tensor whose shape is the same as 'scores' but with
+      the positinon of the filtered boxes filled with 0.
+  """
+  if boxes.shape[-1] != 4:
+    raise ValueError(
+        'boxes.shape[1] is {:d}, but must be 4.'.format(boxes.shape[-1]))
+
+  with tf.name_scope('filter_boxes'):
+    if isinstance(image_shape, list) or isinstance(image_shape, tuple):
+      height, width = image_shape
+    else:
+      image_shape = tf.cast(image_shape, dtype=boxes.dtype)
+      height = image_shape[..., 0]
+      width = image_shape[..., 1]
+
+    ymin = boxes[..., 0]
+    xmin = boxes[..., 1]
+    ymax = boxes[..., 2]
+    xmax = boxes[..., 3]
+
+    h = ymax - ymin + 1.0
+    w = xmax - xmin + 1.0
+    yc = ymin + 0.5 * h
+    xc = xmin + 0.5 * w
+
+    min_size = tf.cast(tf.maximum(min_size_threshold, 1.0), dtype=boxes.dtype)
+
+    filtered_size_mask = tf.logical_and(
+        tf.greater(h, min_size), tf.greater(w, min_size))
+    filtered_center_mask = tf.logical_and(
+        tf.logical_and(tf.greater(yc, 0.0), tf.less(yc, height)),
+        tf.logical_and(tf.greater(xc, 0.0), tf.less(xc, width)))
+    filtered_mask = tf.logical_and(filtered_size_mask, filtered_center_mask)
+
+    filtered_scores = tf.where(filtered_mask, scores, tf.zeros_like(scores))
+    filtered_boxes = tf.cast(
+        tf.expand_dims(filtered_mask, axis=-1), dtype=boxes.dtype) * boxes
+
+    return filtered_boxes, filtered_scores
+
+
+def filter_boxes_by_scores(boxes, scores, min_score_threshold):
+  """Filter and remove boxes whose scores are smaller than the threshold.
+
+  Args:
+    boxes: a tensor whose last dimension is 4 representing the
+      coordinates of boxes in ymin, xmin, ymax, xmax order.
+    scores: a tensor whose shape is the same as tf.shape(boxes)[:-1]
+      representing the original scores of the boxes.
+    min_score_threshold: a float representing the minimal box score threshold.
+      Boxes whose score are smaller than it will be filtered out.
+
+  Returns:
+    filtered_boxes: a tensor whose shape is the same as `boxes` but with
+      the position of the filtered boxes are filled with 0.
+    filtered_scores: a tensor whose shape is the same as 'scores' but with
+      the
+  """
+  if boxes.shape[-1] != 4:
+    raise ValueError(
+        'boxes.shape[1] is {:d}, but must be 4.'.format(boxes.shape[-1]))
+
+  with tf.name_scope('filter_boxes_by_scores'):
+    filtered_mask = tf.greater(scores, min_score_threshold)
+    filtered_scores = tf.where(filtered_mask, scores, tf.zeros_like(scores))
+    filtered_boxes = tf.cast(
+        tf.expand_dims(filtered_mask, axis=-1), dtype=boxes.dtype) * boxes
+
+    return filtered_boxes, filtered_scores
+
+
+def top_k_boxes(boxes, scores, k):
+  """Sort and select top k boxes according to the scores.
+
+  Args:
+    boxes: a tensor of shape [batch_size, N, 4] representing the coordiante of
+      the boxes. N is the number of boxes per image.
+    scores: a tensor of shsape [batch_size, N] representing the socre of the
+      boxes.
+    k: an integer or a tensor indicating the top k number.
+
+  Returns:
+    selected_boxes: a tensor of shape [batch_size, k, 4] representing the
+      selected top k box coordinates.
+    selected_scores: a tensor of shape [batch_size, k] representing the selected
+      top k box scores.
+  """
+  with tf.name_scope('top_k_boxes'):
+    selected_scores, top_k_indices = tf.nn.top_k(scores, k=k, sorted=True)
+
+    batch_size, _ = scores.get_shape().as_list()
+    if batch_size == 1:
+      selected_boxes = tf.squeeze(
+          tf.gather(boxes, top_k_indices, axis=1), axis=1)
+    else:
+      top_k_indices_shape = tf.shape(top_k_indices)
+      batch_indices = (
+          tf.expand_dims(tf.range(top_k_indices_shape[0]), axis=-1) *
+          tf.ones([1, top_k_indices_shape[-1]], dtype=tf.int32))
+      gather_nd_indices = tf.stack([batch_indices, top_k_indices], axis=-1)
+      selected_boxes = tf.gather_nd(boxes, gather_nd_indices)
+
+    return selected_boxes, selected_scores
+
+
+def bbox_overlap(boxes, gt_boxes):
+  """Calculates the overlap between proposal and ground truth boxes.
+
+  Some `gt_boxes` may have been padded.  The returned `iou` tensor for these
+  boxes will be -1.
+
+  Args:
+    boxes: a tensor with a shape of [batch_size, N, 4]. N is the number of
+      proposals before groundtruth assignment (e.g., rpn_post_nms_topn). The
+      last dimension is the pixel coordinates in [ymin, xmin, ymax, xmax] form.
+    gt_boxes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4]. This
+      tensor might have paddings with a negative value.
+  Returns:
+    iou: a tensor with as a shape of [batch_size, N, MAX_NUM_INSTANCES].
+  """
+  with tf.name_scope('bbox_overlap'):
+    bb_y_min, bb_x_min, bb_y_max, bb_x_max = tf.split(
+        value=boxes, num_or_size_splits=4, axis=2)
+    gt_y_min, gt_x_min, gt_y_max, gt_x_max = tf.split(
+        value=gt_boxes, num_or_size_splits=4, axis=2)
+
+    # Calculates the intersection area.
+    i_xmin = tf.maximum(bb_x_min, tf.transpose(gt_x_min, [0, 2, 1]))
+    i_xmax = tf.minimum(bb_x_max, tf.transpose(gt_x_max, [0, 2, 1]))
+    i_ymin = tf.maximum(bb_y_min, tf.transpose(gt_y_min, [0, 2, 1]))
+    i_ymax = tf.minimum(bb_y_max, tf.transpose(gt_y_max, [0, 2, 1]))
+    i_area = tf.maximum((i_xmax - i_xmin), 0) * tf.maximum((i_ymax - i_ymin), 0)
+
+    # Calculates the union area.
+    bb_area = (bb_y_max - bb_y_min) * (bb_x_max - bb_x_min)
+    gt_area = (gt_y_max - gt_y_min) * (gt_x_max - gt_x_min)
+    # Adds a small epsilon to avoid divide-by-zero.
+    u_area = bb_area + tf.transpose(gt_area, [0, 2, 1]) - i_area + 1e-8
+
+    # Calculates IoU.
+    iou = i_area / u_area
+
+    # Fills -1 for padded ground truth boxes.
+    padding_mask = tf.less(i_xmin, tf.zeros_like(i_xmin))
+    iou = tf.where(padding_mask, -tf.ones_like(iou), iou)
+
+    return iou

+ 209 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/ops/nms_ops.py

@@ -0,0 +1,209 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tensorflow implementation of non max suppression."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# Standard Imports
+
+import tensorflow as tf
+
+from mask_rcnn.ops import box_utils
+
+
+NMS_TILE_SIZE = 512
+
+
+def _self_suppression(iou, _, iou_sum):
+    batch_size = tf.shape(iou)[0]
+    can_suppress_others = tf.cast(
+            tf.reshape(tf.reduce_max(iou, 1) <= 0.5, [batch_size, -1, 1]), iou.dtype)
+    iou_suppressed = tf.reshape(
+            tf.cast(tf.reduce_max(can_suppress_others * iou, 1) <= 0.5, iou.dtype),
+            [batch_size, -1, 1]) * iou
+    iou_sum_new = tf.reduce_sum(iou_suppressed, [1, 2])
+    return [
+            iou_suppressed,
+            tf.reduce_any(iou_sum - iou_sum_new > 0.5), iou_sum_new
+    ]
+
+
+def _cross_suppression(boxes, box_slice, iou_threshold, inner_idx):
+    batch_size = tf.shape(boxes)[0]
+    new_slice = tf.slice(boxes, [0, inner_idx * NMS_TILE_SIZE, 0],
+                                             [batch_size, NMS_TILE_SIZE, 4])
+    iou = box_utils.bbox_overlap(new_slice, box_slice)
+    ret_slice = tf.expand_dims(
+            tf.cast(tf.reduce_all(iou < iou_threshold, [1]), box_slice.dtype),
+            2) * box_slice
+    return boxes, ret_slice, iou_threshold, inner_idx + 1
+
+
+def _suppression_loop_body(boxes, iou_threshold, output_size, idx):
+    """Process boxes in the range [idx*NMS_TILE_SIZE, (idx+1)*NMS_TILE_SIZE).
+
+    Args:
+        boxes: a tensor with a shape of [batch_size, anchors, 4].
+        iou_threshold: a float representing the threshold for deciding whether boxes
+            overlap too much with respect to IOU.
+        output_size: an int32 tensor of size [batch_size]. Representing the number
+            of selected boxes for each batch.
+        idx: an integer scalar representing induction variable.
+
+    Returns:
+        boxes: updated boxes.
+        iou_threshold: pass down iou_threshold to the next iteration.
+        output_size: the updated output_size.
+        idx: the updated induction variable.
+    """
+    num_tiles = tf.shape(boxes)[1] // NMS_TILE_SIZE
+    batch_size = tf.shape(boxes)[0]
+
+    # Iterates over tiles that can possibly suppress the current tile.
+    box_slice = tf.slice(boxes, [0, idx * NMS_TILE_SIZE, 0],
+                                             [batch_size, NMS_TILE_SIZE, 4])
+    _, box_slice, _, _ = tf.while_loop(
+            lambda _boxes, _box_slice, _threshold, inner_idx: inner_idx < idx,
+            _cross_suppression, [boxes, box_slice, iou_threshold,
+                                                     tf.constant(0)])
+
+    # Iterates over the current tile to compute self-suppression.
+    iou = box_utils.bbox_overlap(box_slice, box_slice)
+    mask = tf.expand_dims(
+            tf.reshape(tf.range(NMS_TILE_SIZE), [1, -1]) > tf.reshape(
+                    tf.range(NMS_TILE_SIZE), [-1, 1]), 0)
+    iou *= tf.cast(tf.logical_and(mask, iou >= iou_threshold), iou.dtype)
+    suppressed_iou, _, _ = tf.while_loop(
+            lambda _iou, loop_condition, _iou_sum: loop_condition, _self_suppression,
+            [iou, tf.constant(True),
+             tf.reduce_sum(iou, [1, 2])])
+    suppressed_box = tf.reduce_sum(suppressed_iou, 1) > 0
+    box_slice *= tf.expand_dims(1.0 - tf.cast(suppressed_box, box_slice.dtype), 2)
+
+    # Uses box_slice to update the input boxes.
+    mask = tf.reshape(
+            tf.cast(tf.equal(tf.range(num_tiles), idx), boxes.dtype), [1, -1, 1, 1])
+    boxes = tf.tile(tf.expand_dims(
+            box_slice, [1]), [1, num_tiles, 1, 1]) * mask + tf.reshape(
+                    boxes, [batch_size, num_tiles, NMS_TILE_SIZE, 4]) * (1 - mask)
+    boxes = tf.reshape(boxes, [batch_size, -1, 4])
+
+    # Updates output_size.
+    output_size += tf.reduce_sum(
+            tf.cast(tf.reduce_any(box_slice > 0, [2]), tf.int32), [1])
+    return boxes, iou_threshold, output_size, idx + 1
+
+
+def sorted_non_max_suppression_padded(scores,
+                                                                            boxes,
+                                                                            max_output_size,
+                                                                            iou_threshold):
+    """A wrapper that handles non-maximum suppression.
+
+    Assumption:
+        * The boxes are sorted by scores unless the box is a dot (all coordinates
+            are zero).
+        * Boxes with higher scores can be used to suppress boxes with lower scores.
+
+    The overal design of the algorithm is to handle boxes tile-by-tile:
+
+    boxes = boxes.pad_to_multiply_of(tile_size)
+    num_tiles = len(boxes) // tile_size
+    output_boxes = []
+    for i in range(num_tiles):
+        box_tile = boxes[i*tile_size : (i+1)*tile_size]
+        for j in range(i - 1):
+            suppressing_tile = boxes[j*tile_size : (j+1)*tile_size]
+            iou = bbox_overlap(box_tile, suppressing_tile)
+            # if the box is suppressed in iou, clear it to a dot
+            box_tile *= _update_boxes(iou)
+        # Iteratively handle the diagnal tile.
+        iou = _box_overlap(box_tile, box_tile)
+        iou_changed = True
+        while iou_changed:
+            # boxes that are not suppressed by anything else
+            suppressing_boxes = _get_suppressing_boxes(iou)
+            # boxes that are suppressed by suppressing_boxes
+            suppressed_boxes = _get_suppressed_boxes(iou, suppressing_boxes)
+            # clear iou to 0 for boxes that are suppressed, as they cannot be used
+            # to suppress other boxes any more
+            new_iou = _clear_iou(iou, suppressed_boxes)
+            iou_changed = (new_iou != iou)
+            iou = new_iou
+        # remaining boxes that can still suppress others, are selected boxes.
+        output_boxes.append(_get_suppressing_boxes(iou))
+        if len(output_boxes) >= max_output_size:
+            break
+
+    Args:
+        scores: a tensor with a shape of [batch_size, anchors].
+        boxes: a tensor with a shape of [batch_size, anchors, 4].
+        max_output_size: a scalar integer `Tensor` representing the maximum number
+            of boxes to be selected by non max suppression.
+        iou_threshold: a float representing the threshold for deciding whether boxes
+            overlap too much with respect to IOU.
+
+    Returns:
+        nms_scores: a tensor with a shape of [batch_size, anchors]. It has same
+            dtype as input scores.
+        nms_proposals: a tensor with a shape of [batch_size, anchors, 4]. It has
+            same dtype as input boxes.
+    """
+    batch_size = tf.shape(boxes)[0]
+    num_boxes = tf.shape(boxes)[1]
+    pad = tf.cast(
+            tf.ceil(tf.cast(num_boxes, tf.float32) / NMS_TILE_SIZE),
+            tf.int32) * NMS_TILE_SIZE - num_boxes
+    boxes = tf.pad(tf.cast(boxes, tf.float32), [[0, 0], [0, pad], [0, 0]])
+    scores = tf.pad(tf.cast(scores, tf.float32), [[0, 0], [0, pad]])
+    num_boxes += pad
+
+    def _loop_cond(unused_boxes, unused_threshold, output_size, idx):
+        return tf.logical_and(
+                tf.reduce_min(output_size) < max_output_size,
+                idx < num_boxes // NMS_TILE_SIZE)
+
+    selected_boxes, _, output_size, _ = tf.while_loop(
+            _loop_cond, _suppression_loop_body, [
+                    boxes, iou_threshold,
+                    tf.zeros([batch_size], tf.int32),
+                    tf.constant(0)
+            ])
+    idx = num_boxes - tf.cast(
+            tf.nn.top_k(
+                    tf.cast(tf.reduce_any(selected_boxes > 0, [2]), tf.int32) *
+                    tf.expand_dims(tf.range(num_boxes, 0, -1), 0), max_output_size)[0],
+            tf.int32)
+    idx = tf.minimum(idx, num_boxes - 1)
+    idx = tf.reshape(
+            idx + tf.reshape(tf.range(batch_size) * num_boxes, [-1, 1]), [-1])
+    boxes = tf.reshape(
+            tf.gather(tf.reshape(boxes, [-1, 4]), idx),
+            [batch_size, max_output_size, 4])
+    boxes = boxes * tf.cast(
+            tf.reshape(tf.range(max_output_size), [1, -1, 1]) < tf.reshape(
+                    output_size, [-1, 1, 1]), boxes.dtype)
+    scores = tf.reshape(
+            tf.gather(tf.reshape(scores, [-1, 1]), idx),
+            [batch_size, max_output_size])
+    scores = scores * tf.cast(
+            tf.reshape(tf.range(max_output_size), [1, -1]) < tf.reshape(
+                    output_size, [-1, 1]), scores.dtype)
+    return scores, boxes

+ 297 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/ops/postprocess_ops.py

@@ -0,0 +1,297 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Ops used to post-process raw detections."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+from mask_rcnn.utils import box_utils
+
+
+def generate_detections_per_image_tpu(cls_outputs,
+                                      box_outputs,
+                                      anchor_boxes,
+                                      image_info,
+                                      pre_nms_num_detections=1000,
+                                      post_nms_num_detections=100,
+                                      nms_threshold=0.3,
+                                      bbox_reg_weights=(10., 10., 5., 5.)):
+    """Generate the final detections per image given the model outputs.
+
+  Args:
+    cls_outputs: a tensor with shape [N, num_classes], which stacks class
+      logit outputs on all feature levels. The N is the number of total anchors
+      on all levels. The num_classes is the number of classes predicted by the
+      model. Note that the cls_outputs should be the output of softmax().
+    box_outputs: a tensor with shape [N, num_classes*4], which stacks box
+      regression outputs on all feature levels. The N is the number of total
+      anchors on all levels.
+    anchor_boxes: a tensor with shape [N, 4], which stacks anchors on all
+      feature levels. The N is the number of total anchors on all levels.
+    image_info: a tensor of shape [5] which encodes the input image's [height,
+      width, scale, original_height, original_width]
+    pre_nms_num_detections: an integer that specifies the number of candidates
+      before NMS.
+    post_nms_num_detections: an integer that specifies the number of candidates
+      after NMS.
+    nms_threshold: a float number to specify the IOU threshold of NMS.
+    bbox_reg_weights: a list of 4 float scalars, which are default weights on
+      (dx, dy, dw, dh) for normalizing bbox regression targets.
+
+  Returns:
+    detections: Tuple of tensors corresponding to number of valid boxes,
+    box coordinates, object categories for each boxes, and box scores
+    -- respectively.
+  """
+    num_boxes, num_classes = cls_outputs.get_shape().as_list()
+
+    # Remove background class scores.
+    cls_outputs = cls_outputs[:, 1:num_classes]
+    top_k_scores, top_k_indices_with_classes = tf.nn.top_k(
+        tf.reshape(cls_outputs, [-1]),
+        k=pre_nms_num_detections,
+        sorted=False
+    )
+
+    classes = tf.math.mod(top_k_indices_with_classes, num_classes - 1)
+    top_k_indices = tf.math.floordiv(top_k_indices_with_classes, num_classes - 1)
+
+    anchor_boxes = tf.gather(anchor_boxes, top_k_indices)
+    box_outputs = tf.reshape(box_outputs, [num_boxes, num_classes, 4])[:, 1:num_classes, :]
+
+    class_indices = classes
+
+    box_outputs = tf.gather_nd(box_outputs, tf.stack([top_k_indices, class_indices], axis=1))
+
+    # apply bounding box regression to anchors
+    boxes = box_utils.decode_boxes(box_outputs, anchor_boxes, bbox_reg_weights)
+    boxes = box_utils.clip_boxes(boxes, image_info[0], image_info[1])
+
+    list_of_all_boxes = []
+    list_of_all_scores = []
+    list_of_all_classes = []
+
+    # Skip background class.
+    for class_i in range(num_classes):
+        # Compute bitmask for the given classes.
+        class_i_bitmask = tf.cast(tf.equal(classes, class_i), top_k_scores.dtype)
+        # This works because score is in [0, 1].
+        class_i_scores = top_k_scores * class_i_bitmask
+
+        # The TPU and CPU have different behaviors for
+        # tf.image.non_max_suppression_padded (b/116754376).
+        class_i_post_nms_indices, class_i_nms_num_valid = tf.image.non_max_suppression_padded(
+            tf.cast(boxes, dtype=tf.float32),
+            tf.cast(class_i_scores, dtype=tf.float32),
+            post_nms_num_detections,
+            iou_threshold=nms_threshold,
+            score_threshold=0.05,
+            pad_to_max_output_size=True,
+            name='nms_detections_' + str(class_i)
+        )
+
+        class_i_post_nms_boxes = tf.gather(boxes, class_i_post_nms_indices)
+        class_i_post_nms_scores = tf.gather(class_i_scores, class_i_post_nms_indices)
+
+        mask = tf.less(tf.range(post_nms_num_detections), [class_i_nms_num_valid])
+
+        class_i_post_nms_scores = tf.where(
+            mask, class_i_post_nms_scores, tf.zeros_like(class_i_post_nms_scores)
+        )
+
+        class_i_classes = tf.fill(tf.shape(input=class_i_post_nms_scores), class_i + 1)
+        list_of_all_boxes.append(class_i_post_nms_boxes)
+        list_of_all_scores.append(class_i_post_nms_scores)
+        list_of_all_classes.append(class_i_classes)
+
+    post_nms_boxes = tf.concat(list_of_all_boxes, axis=0)
+    post_nms_scores = tf.concat(list_of_all_scores, axis=0)
+    post_nms_classes = tf.concat(list_of_all_classes, axis=0)
+
+    # sort all results.
+    post_nms_scores, sorted_indices = tf.nn.top_k(
+        tf.cast(post_nms_scores, dtype=tf.float32),
+        k=post_nms_num_detections,
+        sorted=True
+    )
+
+    post_nms_boxes = tf.gather(post_nms_boxes, sorted_indices)
+    post_nms_classes = tf.gather(post_nms_classes, sorted_indices)
+
+    valid_mask = tf.where(
+        tf.greater(post_nms_scores, 0), tf.ones_like(post_nms_scores),
+        tf.zeros_like(post_nms_scores)
+    )
+
+    num_valid_boxes = tf.reduce_sum(input_tensor=valid_mask, axis=-1)
+    box_classes = tf.cast(post_nms_classes, dtype=tf.float32)
+
+    return num_valid_boxes, post_nms_boxes, box_classes, post_nms_scores
+
+
+def generate_detections_tpu(class_outputs,
+                            box_outputs,
+                            anchor_boxes,
+                            image_info,
+                            pre_nms_num_detections=1000,
+                            post_nms_num_detections=100,
+                            nms_threshold=0.3,
+                            bbox_reg_weights=(10., 10., 5., 5.)
+                            ):
+    """Generate the final detections given the model outputs (TPU version).
+
+    Args:
+    class_outputs: a tensor with shape [batch_size, N, num_classes], which
+      stacks class logit outputs on all feature levels. The N is the number of
+      total anchors on all levels. The num_classes is the number of classes
+      predicted by the model. Note that the class_outputs here is the raw score.
+    box_outputs: a tensor with shape [batch_size, N, num_classes*4], which
+      stacks box regression outputs on all feature levels. The N is the number
+      of total anchors on all levels.
+    anchor_boxes: a tensor with shape [batch_size, N, 4], which stacks anchors
+      on all feature levels. The N is the number of total anchors on all levels.
+    image_info: a tensor of shape [batch_size, 5] which encodes each image's
+      [height, width, scale, original_height, original_width].
+    pre_nms_num_detections: an integer that specifies the number of candidates
+      before NMS.
+    post_nms_num_detections: an integer that specifies the number of candidates
+      after NMS.
+    nms_threshold: a float number to specify the IOU threshold of NMS.
+    bbox_reg_weights: a list of 4 float scalars, which are default weights on
+      (dx, dy, dw, dh) for normalizing bbox regression targets.
+
+    Returns:
+    a tuple of tensors corresponding to number of valid boxes,
+    box coordinates, object categories for each boxes, and box scores stacked
+    in batch_size.
+    """
+
+    with tf.name_scope('generate_detections'):
+
+        batch_size, _, _ = class_outputs.get_shape().as_list()
+        softmax_class_outputs = tf.nn.softmax(class_outputs)
+
+        num_valid_boxes, box_coordinates, box_classes, box_scores = ([], [], [], [])
+
+        for i in range(batch_size):
+            result = generate_detections_per_image_tpu(
+                softmax_class_outputs[i], box_outputs[i], anchor_boxes[i],
+                image_info[i], pre_nms_num_detections, post_nms_num_detections,
+                nms_threshold, bbox_reg_weights)
+
+            num_valid_boxes.append(result[0])
+            box_coordinates.append(result[1])
+            box_classes.append(result[2])
+            box_scores.append(result[3])
+
+        num_valid_boxes = tf.stack(num_valid_boxes)
+        box_coordinates = tf.stack(box_coordinates)
+        box_classes = tf.stack(box_classes)
+        box_scores = tf.stack(box_scores)
+
+    return num_valid_boxes, box_coordinates, box_classes, box_scores
+
+
+def generate_detections_gpu(class_outputs,
+                            box_outputs,
+                            anchor_boxes,
+                            image_info,
+                            pre_nms_num_detections=1000,
+                            post_nms_num_detections=100,
+                            nms_threshold=0.3,
+                            bbox_reg_weights=(10., 10., 5., 5.)
+                            ):
+    """Generate the final detections given the model outputs (GPU version).
+
+    Args:
+    class_outputs: a tensor with shape [batch_size, N, num_classes], which
+      stacks class logit outputs on all feature levels. The N is the number of
+      total anchors on all levels. The num_classes is the number of classes
+      predicted by the model. Note that the class_outputs here is the raw score.
+    box_outputs: a tensor with shape [batch_size, N, num_classes*4], which
+      stacks box regression outputs on all feature levels. The N is the number
+      of total anchors on all levels.
+    anchor_boxes: a tensor with shape [batch_size, N, 4], which stacks anchors
+      on all feature levels. The N is the number of total anchors on all levels.
+    image_info: a tensor of shape [batch_size, 5] which encodes each image's
+      [height, width, scale, original_height, original_width].
+    pre_nms_num_detections: an integer that specifies the number of candidates
+      before NMS.
+    post_nms_num_detections: an integer that specifies the number of candidates
+      after NMS.
+    nms_threshold: a float number to specify the IOU threshold of NMS.
+    bbox_reg_weights: a list of 4 float scalars, which are default weights on
+      (dx, dy, dw, dh) for normalizing bbox regression targets.
+
+    Returns:
+    a tuple of tensors corresponding to number of valid boxes,
+    box coordinates, object categories for each boxes, and box scores stacked
+    in batch_size.
+    """
+    with tf.name_scope('generate_detections'):
+
+        batch_size, num_boxes, num_classes = class_outputs.get_shape().as_list()
+        softmax_class_outputs = tf.nn.softmax(class_outputs)
+
+        # Remove background
+        scores = tf.slice(softmax_class_outputs, [0, 0, 1], [-1, -1, -1])
+        boxes = tf.slice(
+            tf.reshape(box_outputs, [batch_size, num_boxes, num_classes, 4]),
+            [0, 0, 1, 0], [-1, -1, -1, -1]
+        )
+
+        anchor_boxes = tf.expand_dims(anchor_boxes, axis=2) * tf.ones([1, 1, num_classes - 1, 1])
+
+        num_detections = num_boxes * (num_classes - 1)
+
+        boxes = tf.reshape(boxes, [batch_size, num_detections, 4])
+        scores = tf.reshape(scores, [batch_size, num_detections, 1])
+        anchor_boxes = tf.reshape(anchor_boxes, [batch_size, num_detections, 4])
+
+        # Decode
+        boxes = box_utils.decode_boxes(boxes, anchor_boxes, bbox_reg_weights)
+
+        # Clip boxes
+        height = tf.expand_dims(image_info[:, 0:1], axis=-1)
+        width = tf.expand_dims(image_info[:, 1:2], axis=-1)
+        boxes = box_utils.clip_boxes(boxes, height, width)
+
+        # NMS
+        pre_nms_boxes = box_utils.to_normalized_coordinates(boxes, height, width)
+        pre_nms_boxes = tf.reshape(pre_nms_boxes, [batch_size, num_boxes, num_classes - 1, 4])
+        pre_nms_scores = tf.reshape(scores, [batch_size, num_boxes, num_classes - 1])
+
+        post_nms_boxes, post_nms_scores, post_nms_classes, \
+        post_nms_num_valid_boxes = tf.image.combined_non_max_suppression(
+            pre_nms_boxes,
+            pre_nms_scores,
+            max_output_size_per_class=pre_nms_num_detections,
+            max_total_size=post_nms_num_detections,
+            iou_threshold=nms_threshold,
+            score_threshold=0.0,
+            pad_per_class=False
+        )
+
+        post_nms_classes = post_nms_classes + 1
+
+        post_nms_boxes = box_utils.to_absolute_coordinates(post_nms_boxes, height, width)
+
+    return post_nms_num_valid_boxes, post_nms_boxes, tf.cast(post_nms_classes, dtype=tf.float32), post_nms_scores

+ 206 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/ops/preprocess_ops.py

@@ -0,0 +1,206 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Preprocessing ops."""
+import math
+import tensorflow as tf
+
+from mask_rcnn.object_detection import preprocessor
+
+
+def normalize_image(image):
+    """Normalize the image.
+
+    Args:
+    image: a tensor of shape [height, width, 3] in dtype=tf.float32.
+
+    Returns:
+    normalized_image: a tensor which has the same shape and dtype as image,
+      with pixel values normalized.
+    """
+    offset = tf.constant([0.485, 0.456, 0.406])
+    offset = tf.reshape(offset, shape=(1, 1, 3))
+
+    scale = tf.constant([0.229, 0.224, 0.225])
+    scale = tf.reshape(scale, shape=(1, 1, 3))
+
+    normalized_image = (image - offset) / scale
+
+    return normalized_image
+
+
+def random_horizontal_flip(image, boxes=None, masks=None, seed=None):
+    """Random horizontal flip the image, boxes, and masks.
+
+    Args:
+    image: a tensor of shape [height, width, 3] representing the image.
+    boxes: (Optional) a tensor of shape [num_boxes, 4] represneting the box
+      corners in normalized coordinates.
+    masks: (Optional) a tensor of shape [num_masks, height, width]
+      representing the object masks. Note that the size of the mask is the
+      same as the image.
+
+    Returns:
+    image: the processed image tensor after being randomly flipped.
+    boxes: None or the processed box tensor after being randomly flipped.
+    masks: None or the processed mask tensor after being randomly flipped.
+    """
+    return preprocessor.random_horizontal_flip(image, boxes, masks, seed=seed)
+
+
+def resize_and_pad(image, target_size, stride, boxes=None, masks=None):
+    """Resize and pad images, boxes and masks.
+
+    Resize and pad images, (optionally boxes and masks) given the desired output
+    size of the image and stride size.
+
+    Here are the preprocessing steps.
+    1. For a given image, keep its aspect ratio and rescale the image to make it
+     the largest rectangle to be bounded by the rectangle specified by the
+     `target_size`.
+    2. Pad the rescaled image such that the height and width of the image become
+     the smallest multiple of the stride that is larger or equal to the desired
+     output diemension.
+
+    Args:
+    image: an image tensor of shape [original_height, original_width, 3].
+    target_size: a tuple of two integers indicating the desired output
+      image size. Note that the actual output size could be different from this.
+    stride: the stride of the backbone network. Each of the output image sides
+      must be the multiple of this.
+    boxes: (Optional) a tensor of shape [num_boxes, 4] represneting the box
+      corners in normalized coordinates.
+    masks: (Optional) a tensor of shape [num_masks, height, width]
+      representing the object masks. Note that the size of the mask is the
+      same as the image.
+
+    Returns:
+    image: the processed image tensor after being resized and padded.
+    image_info: a tensor of shape [5] which encodes the height, width before
+      and after resizing and the scaling factor.
+    boxes: None or the processed box tensor after being resized and padded.
+      After the processing, boxes will be in the absolute coordinates w.r.t.
+      the scaled image.
+    masks: None or the processed mask tensor after being resized and padded.
+    """
+
+    input_height, input_width, _ = tf.unstack(
+        tf.cast(tf.shape(input=image), dtype=tf.float32),
+        axis=0
+    )
+
+    target_height, target_width = target_size
+
+    scale_if_resize_height = target_height / input_height
+    scale_if_resize_width = target_width / input_width
+
+    scale = tf.minimum(scale_if_resize_height, scale_if_resize_width)
+
+    scaled_height = tf.cast(scale * input_height, dtype=tf.int32)
+    scaled_width = tf.cast(scale * input_width, dtype=tf.int32)
+
+    image = tf.image.resize(image, [scaled_height, scaled_width], method=tf.image.ResizeMethod.BILINEAR)
+
+    padded_height = int(math.ceil(target_height * 1.0 / stride) * stride)
+    padded_width = int(math.ceil(target_width * 1.0 / stride) * stride)
+
+    image = tf.image.pad_to_bounding_box(image, 0, 0, padded_height, padded_width)
+    image.set_shape([padded_height, padded_width, 3])
+
+    image_info = tf.stack([
+        tf.cast(scaled_height, dtype=tf.float32),
+        tf.cast(scaled_width, dtype=tf.float32),
+        1.0 / scale,
+        input_height,
+        input_width]
+    )
+
+    if boxes is not None:
+        normalized_box_list = preprocessor.box_list.BoxList(boxes)
+        scaled_boxes = preprocessor.box_list_scale(normalized_box_list, scaled_height, scaled_width).get()
+
+    else:
+        scaled_boxes = None
+
+    if masks is not None:
+        scaled_masks = tf.image.resize(
+            tf.expand_dims(masks, -1),
+            [scaled_height, scaled_width],
+            method=tf.image.ResizeMethod.NEAREST_NEIGHBOR
+        )
+        # Check if there is any instance in this image or not.
+        num_masks = tf.shape(input=scaled_masks)[0]
+        scaled_masks = tf.cond(
+            pred=tf.greater(num_masks, 0),
+            true_fn=lambda: tf.image.pad_to_bounding_box(scaled_masks, 0, 0, padded_height, padded_width),
+            false_fn=lambda: tf.zeros([0, padded_height, padded_width, 1])
+        )
+
+    else:
+        scaled_masks = None
+
+    return image, image_info, scaled_boxes, scaled_masks
+
+
+def crop_gt_masks(instance_masks, boxes, gt_mask_size, image_size):
+    """Crops the ground truth binary masks and resize to fixed-size masks."""
+    num_masks = tf.shape(input=instance_masks)[0]
+
+    scale_sizes = tf.convert_to_tensor(value=[image_size[0], image_size[1]] * 2, dtype=tf.float32)
+
+    boxes = boxes / scale_sizes
+
+    cropped_gt_masks = tf.image.crop_and_resize(
+        image=instance_masks,
+        boxes=boxes,
+        box_indices=tf.range(num_masks, dtype=tf.int32),
+        crop_size=[gt_mask_size, gt_mask_size],
+        method='bilinear')[:, :, :, 0]
+
+    cropped_gt_masks = tf.pad(
+        tensor=cropped_gt_masks,
+        paddings=tf.constant([[0, 0], [2, 2], [2, 2]]),
+        mode='CONSTANT',
+        constant_values=0.
+    )
+
+    return cropped_gt_masks
+
+
+def pad_to_fixed_size(data, pad_value, output_shape):
+    """Pad data to a fixed length at the first dimension.
+
+    Args:
+    data: Tensor to be padded to output_shape.
+    pad_value: A constant value assigned to the paddings.
+    output_shape: The output shape of a 2D tensor.
+
+    Returns:
+    The Padded tensor with output_shape [max_num_instances, dimension].
+    """
+    max_num_instances = output_shape[0]
+    dimension = output_shape[1]
+
+    data = tf.reshape(data, [-1, dimension])
+    num_instances = tf.shape(input=data)[0]
+
+    pad_length = max_num_instances - num_instances
+
+    paddings = pad_value * tf.ones([pad_length, dimension])
+
+    padded_data = tf.reshape(tf.concat([data, paddings], axis=0), output_shape)
+    return padded_data

+ 440 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/ops/roi_ops.py

@@ -0,0 +1,440 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ROI-related ops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+from mask_rcnn.utils.logging_formatter import logging
+# from absl import logging
+
+from mask_rcnn.utils import box_utils
+from mask_rcnn.ops import nms_ops
+
+
+# TODO: Remove when Batched NMS stop leading to eval metrics being all 0
+def _propose_rois_tpu(scores,
+                      boxes,
+                      anchor_boxes,
+                      height,
+                      width,
+                      scale,
+                      rpn_pre_nms_topn,
+                      rpn_post_nms_topn,
+                      rpn_nms_threshold,
+                      rpn_min_size,
+                      bbox_reg_weights):
+    """Proposes RoIs giva group of candidates (TPU version).
+
+    Args:
+    scores: a tensor with a shape of [batch_size, num_boxes].
+    boxes: a tensor with a shape of [batch_size, num_boxes, 4],
+      in the encoded form.
+    anchor_boxes: an Anchors object that contains the anchors with a shape of
+      [batch_size, num_boxes, 4].
+    height: a tensor of shape [batch_size, 1, 1] representing the image height.
+    width: a tensor of shape [batch_size, 1, 1] representing the image width.
+    scale: a tensor of shape [batch_size, 1, 1] representing the image scale.
+    rpn_pre_nms_topn: a integer number of top scoring RPN proposals to keep
+      before applying NMS. This is *per FPN level* (not total).
+    rpn_post_nms_topn: a integer number of top scoring RPN proposals to keep
+      after applying NMS. This is the total number of RPN proposals produced.
+    rpn_nms_threshold: a float number between 0 and 1 as the NMS threshold
+      used on RPN proposals.
+    rpn_min_size: a integer number as the minimum proposal height and width as
+      both need to be greater than this number. Note that this number is at
+      origingal image scale; not scale used during training or inference).
+    bbox_reg_weights: None or a list of four integer specifying the weights used
+      when decoding the box.
+
+    Returns:
+    scores: a tensor with a shape of [batch_size, rpn_post_nms_topn, 1]
+      representing the scores of the proposals. It has same dtype as input
+      scores.
+    boxes: a tensor with a shape of [batch_size, rpn_post_nms_topn, 4]
+      represneting the boxes of the proposals. The boxes are in normalized
+      coordinates with a form of [ymin, xmin, ymax, xmax]. It has same dtype as
+      input boxes.
+
+    """
+    _, num_boxes = scores.get_shape().as_list()
+
+    topk_limit = num_boxes if num_boxes < rpn_pre_nms_topn else rpn_pre_nms_topn
+    scores, boxes_list = box_utils.top_k(scores, k=topk_limit, boxes_list=[boxes, anchor_boxes])
+
+    boxes = boxes_list[0]
+    anchor_boxes = boxes_list[1]
+
+    # Decode boxes w.r.t. anchors and transform to the absoluate coordinates.
+    boxes = box_utils.decode_boxes(boxes, anchor_boxes, bbox_reg_weights)
+
+    # Clip boxes that exceed the boundary.
+    boxes = box_utils.clip_boxes(boxes, height, width)
+
+    # Filter boxes that one side is less than rpn_min_size threshold.
+    boxes, scores = box_utils.filter_boxes(
+        boxes,
+        tf.expand_dims(scores, axis=-1),
+        rpn_min_size,
+        height,
+        width,
+        scale
+    )
+
+    scores = tf.squeeze(scores, axis=-1)
+
+    post_nms_topk_limit = topk_limit if topk_limit < rpn_post_nms_topn else rpn_post_nms_topn
+
+    # NMS.
+    if rpn_nms_threshold > 0:
+        scores, boxes = box_utils.sorted_non_max_suppression_padded(
+            scores,
+            boxes,
+            max_output_size=post_nms_topk_limit,
+            iou_threshold=rpn_nms_threshold
+        )
+
+    # Pick top-K post NMS'ed boxes.
+    scores, boxes = box_utils.top_k(scores, k=post_nms_topk_limit, boxes_list=[boxes])
+
+    boxes = boxes[0]
+    return scores, boxes
+
+
+def _propose_rois_gpu(scores,
+                      boxes,
+                      anchor_boxes,
+                      height,
+                      width,
+                      scale,
+                      rpn_pre_nms_topn,
+                      rpn_post_nms_topn,
+                      rpn_nms_threshold,
+                      rpn_min_size,
+                      bbox_reg_weights):
+    """Proposes RoIs giva group of candidates (GPU version).
+
+    Args:
+    scores: a tensor with a shape of [batch_size, num_boxes].
+    boxes: a tensor with a shape of [batch_size, num_boxes, 4],
+      in the encoded form.
+    anchor_boxes: an Anchors object that contains the anchors with a shape of
+      [batch_size, num_boxes, 4].
+    height: a tensor of shape [batch_size, 1, 1] representing the image height.
+    width: a tensor of shape [batch_size, 1, 1] representing the image width.
+    scale: a tensor of shape [batch_size, 1, 1] representing the image scale.
+    rpn_pre_nms_topn: a integer number of top scoring RPN proposals to keep
+      before applying NMS. This is *per FPN level* (not total).
+    rpn_post_nms_topn: a integer number of top scoring RPN proposals to keep
+      after applying NMS. This is the total number of RPN proposals produced.
+    rpn_nms_threshold: a float number between 0 and 1 as the NMS threshold
+      used on RPN proposals.
+    rpn_min_size: a integer number as the minimum proposal height and width as
+      both need to be greater than this number. Note that this number is at
+      origingal image scale; not scale used during training or inference).
+    bbox_reg_weights: None or a list of four integer specifying the weights used
+      when decoding the box.
+
+    Returns:
+    scores: a tensor with a shape of [batch_size, rpn_post_nms_topn, 1]
+      representing the scores of the proposals. It has same dtype as input
+      scores.
+    boxes: a tensor with a shape of [batch_size, rpn_post_nms_topn, 4]
+      represneting the boxes of the proposals. The boxes are in normalized
+      coordinates with a form of [ymin, xmin, ymax, xmax]. It has same dtype as
+      input boxes.
+    """
+    batch_size, num_boxes = scores.get_shape().as_list()
+
+    topk_limit = min(num_boxes, rpn_pre_nms_topn)
+
+    boxes = box_utils.decode_boxes(boxes, anchor_boxes, bbox_reg_weights)
+
+    boxes = box_utils.clip_boxes(boxes, height, width)
+
+    if rpn_min_size > 0.0:
+        boxes, scores = box_utils.filter_boxes(
+            boxes,
+            tf.expand_dims(scores, axis=-1),
+            rpn_min_size,
+            height,
+            width,
+            scale
+        )
+
+        scores = tf.squeeze(scores, axis=-1)
+
+    post_nms_topk_limit = topk_limit if topk_limit < rpn_post_nms_topn else rpn_post_nms_topn
+
+    if rpn_nms_threshold > 0:
+        # Normalize coordinates as combined_non_max_suppression currently
+        # only support normalized coordinates.
+        pre_nms_boxes = box_utils.to_normalized_coordinates(boxes, height, width)
+        pre_nms_boxes = tf.reshape(pre_nms_boxes, [batch_size, num_boxes, 1, 4])
+        pre_nms_scores = tf.reshape(scores, [batch_size, num_boxes, 1])
+
+        with tf.device('CPU:0'):
+          boxes, scores, _, _ = tf.image.combined_non_max_suppression(
+              pre_nms_boxes,
+              pre_nms_scores,
+              max_output_size_per_class=topk_limit,
+              max_total_size=post_nms_topk_limit,
+              iou_threshold=rpn_nms_threshold,
+              score_threshold=0.0,
+              pad_per_class=False
+          )
+
+        boxes = box_utils.to_absolute_coordinates(boxes, height, width)
+
+    else:
+        scores, boxes = box_utils.top_k(scores, k=post_nms_topk_limit, boxes_list=[boxes])
+        boxes = boxes[0]
+
+    return scores, boxes
+
+
+def multilevel_propose_rois(scores_outputs,
+                            box_outputs,
+                            all_anchors,
+                            image_info,
+                            rpn_pre_nms_topn,
+                            rpn_post_nms_topn,
+                            rpn_nms_threshold,
+                            rpn_min_size,
+                            bbox_reg_weights,
+                            use_batched_nms=False):
+    """Proposes RoIs given a group of candidates from different FPN levels.
+
+    Args:
+    scores_outputs: an OrderDict with keys representing levels and values
+      representing logits in [batch_size, height, width, num_anchors].
+    box_outputs: an OrderDict with keys representing levels and values
+      representing box regression targets in
+      [batch_size, height, width, num_anchors * 4]
+    all_anchors: an Anchors object that contains the all anchors.
+    image_info: a tensor of shape [batch_size, 5] where the three columns
+      encode the input image's [height, width, scale,
+      original_height, original_width]. Height and width are for
+      the input to the network, not the original image; scale is the scale
+      factor used to scale the network input size to the original image size.
+      See dataloader.DetectionInputProcessor for details. The last two are
+      original height and width. See dataloader.DetectionInputProcessor for
+      details.
+    rpn_pre_nms_topn: a integer number of top scoring RPN proposals to keep
+      before applying NMS. This is *per FPN level* (not total).
+    rpn_post_nms_topn: a integer number of top scoring RPN proposals to keep
+      after applying NMS. This is the total number of RPN proposals produced.
+    rpn_nms_threshold: a float number between 0 and 1 as the NMS threshold
+      used on RPN proposals.
+    rpn_min_size: a integer number as the minimum proposal height and width as
+      both need to be greater than this number. Note that this number is at
+      origingal image scale; not scale used during training or inference).
+    bbox_reg_weights: None or a list of four integer specifying the weights used
+      when decoding the box.
+    use_batched_nms: whether use batched nms. The batched nms will use
+      tf.combined_non_max_suppression, which is only available for CPU/GPU.
+
+    Returns:
+    scores: a tensor with a shape of [batch_size, rpn_post_nms_topn, 1]
+      representing the scores of the proposals.
+    rois: a tensor with a shape of [batch_size, rpn_post_nms_topn, 4]
+      representing the boxes of the proposals. The boxes are in normalized
+      coordinates with a form of [ymin, xmin, ymax, xmax].
+    """
+    with tf.name_scope('multilevel_propose_rois'):
+
+        levels = scores_outputs.keys()
+        scores = []
+        rois = []
+        anchor_boxes = all_anchors.get_unpacked_boxes()
+
+        height = tf.expand_dims(image_info[:, 0:1], axis=-1)
+        width = tf.expand_dims(image_info[:, 1:2], axis=-1)
+        scale = tf.expand_dims(image_info[:, 2:3], axis=-1)
+
+        for level in levels:
+
+            with tf.name_scope('level_%d' % level) as scope:
+
+                batch_size, feature_h, feature_w, num_anchors_per_location = scores_outputs[level].get_shape().as_list()
+                num_boxes = feature_h * feature_w * num_anchors_per_location
+
+                this_level_scores = tf.reshape(scores_outputs[level], [batch_size, num_boxes])
+                this_level_scores = tf.sigmoid(this_level_scores)
+                this_level_boxes = tf.reshape(box_outputs[level], [batch_size, num_boxes, 4])
+
+                this_level_anchors = tf.cast(
+                    tf.reshape(
+                        tf.expand_dims(anchor_boxes[level], axis=0) *
+                        tf.ones([batch_size, 1, 1, 1]),
+                        [batch_size, num_boxes, 4]
+                    ),
+                    dtype=this_level_scores.dtype
+                )
+
+                # TODO: Remove when Batched NMS stop leading to eval metrics being all 0
+                # commented out because scope no longer exists
+                if use_batched_nms:
+                    logging.info("[ROI OPs] Using Batched NMS... Scope: %s" % scope)
+                    propose_rois_fn = _propose_rois_gpu
+
+                else:
+                    logging.debug("[ROI OPs] Not Using Batched NMS... Scope: %s" % scope)
+                    propose_rois_fn = _propose_rois_tpu
+
+                this_level_scores, this_level_boxes = propose_rois_fn(
+                    this_level_scores,
+                    this_level_boxes,
+                    this_level_anchors,
+                    height,
+                    width,
+                    scale,
+                    rpn_pre_nms_topn,
+                    rpn_post_nms_topn,
+                    rpn_nms_threshold,
+                    rpn_min_size,
+                    bbox_reg_weights
+                )
+
+                scores.append(this_level_scores)
+                rois.append(this_level_boxes)
+
+    scores = tf.concat(scores, axis=1)
+    rois = tf.concat(rois, axis=1)
+
+    with tf.name_scope('roi_post_nms_topk'):
+
+        post_nms_num_anchors = scores.shape[1]
+        post_nms_topk_limit = min(post_nms_num_anchors, rpn_post_nms_topn)
+
+        top_k_scores, top_k_rois = box_utils.top_k(
+            scores,
+            k=post_nms_topk_limit,
+            boxes_list=[rois]
+        )
+
+        top_k_rois = top_k_rois[0]
+
+    return top_k_scores, top_k_rois
+
+
+def custom_multilevel_propose_rois(scores_outputs, box_outputs, all_anchors, image_info,
+                     rpn_pre_nms_topn, rpn_post_nms_topn, rpn_nms_threshold,
+                     rpn_min_size):
+    """Proposes RoIs for the second stage nets.
+
+    This proposal op performs the following operations.
+    1. propose rois at each level.
+    2. collect all proposals.
+    3. keep rpn_post_nms_topn proposals by their sorted scores from the highest
+       to the lowest.
+
+    Reference:
+    https://github.com/facebookresearch/Detectron/blob/master/detectron/ops/collect_and_distribute_fpn_rpn_proposals.py
+
+    Args:
+    scores_outputs: an OrderDict with keys representing levels and values
+      representing logits in [batch_size, height, width, num_anchors].
+    box_outputs: an OrderDict with keys representing levels and values
+      representing box regression targets in
+      [batch_size, height, width, num_anchors * 4]
+    all_anchors: an Anchors object that contains the all anchors.
+    image_info: a tensor of shape [batch_size, 5] where the three columns
+      encode the input image's [height, width, scale,
+      original_height, original_width]. Height and width are for
+      the input to the network, not the original image; scale is the scale
+      factor used to scale the network input size to the original image size.
+      See dataloader.DetectionInputProcessor for details. The last two are
+      original height and width. See dataloader.DetectionInputProcessor for
+      details.
+    rpn_pre_nms_topn: a integer number of top scoring RPN proposals to keep
+      before applying NMS. This is *per FPN level* (not total).
+    rpn_post_nms_topn: a integer number of top scoring RPN proposals to keep
+      after applying NMS. This is the total number of RPN proposals produced.
+    rpn_nms_threshold: a float number between 0 and 1 as the NMS threshold
+      used on RPN proposals.
+    rpn_min_size: a integer number as the minimum proposal height and width as
+      both need to be greater than this number. Note that this number is at
+      origingal image scale; not scale used during training or inference).
+    Returns:
+    scores: a tensor with a shape of [batch_size, rpn_post_nms_topn, 1]
+      representing the scores of the proposals.
+    rois: a tensor with a shape of [batch_size, rpn_post_nms_topn, 4]
+      representing the boxes of the proposals. The boxes are in normalized
+      coordinates with a form of [ymin, xmin, ymax, xmax].
+    """
+
+    with tf.name_scope('proposal'):
+        levels = scores_outputs.keys()
+        scores = []
+        rois = []
+        anchor_boxes = all_anchors.get_unpacked_boxes()
+        for level in levels:
+            # Expands the batch dimension for anchors as anchors do not have batch
+            # dimension. Note that batch_size is invariant across levels.
+            # batch_size = scores_outputs[level].shape[0]
+            # anchor_boxes_batch = tf.cast(
+            #   tf.tile(tf.expand_dims(anchor_boxes[level], axis=0),
+            #         [batch_size, 1, 1, 1]),
+            #   dtype=scores_outputs[level].dtype)
+            logging.debug("[ROI OPs] Using GenerateBoxProposals op... Scope: proposal_%s" % level)
+
+            boxes_per_level, scores_per_level = tf.generate_bounding_box_proposals(
+                scores=tf.reshape(tf.sigmoid(scores_outputs[level]),
+                                  scores_outputs[level].shape),
+                bbox_deltas=box_outputs[level],
+                image_info=image_info,
+                anchors=anchor_boxes[level],
+                pre_nms_topn=rpn_pre_nms_topn,
+                post_nms_topn=rpn_post_nms_topn,
+                nms_threshold=rpn_nms_threshold,
+                min_size=rpn_min_size,
+                name="proposal_%s" % level
+            )
+
+            scores.append(scores_per_level)
+            rois.append(boxes_per_level)
+
+            # a,b=_proposal_op_per_level(
+            #     scores_outputs[level], box_outputs[level], anchor_boxes_batch,
+            #     image_info, rpn_pre_nms_topn, rpn_post_nms_topn, rpn_nms_threshold,
+            #     rpn_min_size, level)
+            # print("SAMI Orig,",a,b,"ours=",scores_per_level,boxes_per_level,rpn_min_size,anchor_boxes)
+        scores = tf.concat(scores, axis=1)
+        rois = tf.concat(rois, axis=1)
+
+        with tf.name_scope('post_nms_topk'):
+            # Selects the top-k rois, k being rpn_post_nms_topn or the number of total
+            # anchors after non-max suppression.
+            post_nms_num_anchors = scores.shape[1]
+
+            post_nms_topk_limit = (
+                post_nms_num_anchors if post_nms_num_anchors < rpn_post_nms_topn
+                else rpn_post_nms_topn
+            )
+
+            top_k_scores, top_k_rois = box_utils.top_k(scores, k=post_nms_topk_limit, boxes_list=[rois])
+            top_k_rois = top_k_rois[0]
+
+        top_k_scores = tf.stop_gradient(top_k_scores)
+        top_k_rois = tf.stop_gradient(top_k_rois)
+
+        return top_k_scores, top_k_rois

+ 348 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/ops/spatial_transform_ops.py

@@ -0,0 +1,348 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+
+def nearest_upsampling(data, scale):
+  """Nearest neighbor upsampling implementation.
+
+  Args:
+    data: A tensor with a shape of [batch, height_in, width_in, channels].
+    scale: An integer multiple to scale resolution of input data.
+
+  Returns:
+    data_up: A tensor with a shape of
+      [batch, height_in*scale, width_in*scale, channels]. Same dtype as input
+      data.
+  """
+  with tf.name_scope('nearest_upsampling'):
+      bs, h, w, c = tf.unstack(tf.shape(data))
+
+      # Use reshape to quickly upsample the input.
+      # The nearest pixel is selected implicitly via broadcasting.
+      # data = tf.reshape(data, [bs, h, 1, w, 1, c]) * tf.ones([1, 1, scale, 1, scale, 1], dtype=data.dtype)
+
+      # Instead of broadcasting with a 6-d tensor, we're using stacking here
+      # for TfLite compatibity.
+      output = tf.stack([data] * scale, axis=3)
+      output = tf.stack([output] * scale, axis=2)
+      return tf.reshape(output, [bs, h * scale, w * scale, c])
+
+  return tf.reshape(data, [bs, h * scale, w * scale, c])
+
+
+def selective_crop_and_resize(features,
+                              boxes,
+                              box_levels,
+                              boundaries,
+                              output_size=7,
+                              is_gpu_inference=False):
+  """Crop and resize boxes on a set of feature maps.
+
+  Given multiple features maps indexed by different levels, and a set of boxes
+  where each box is mapped to a certain level, it selectively crops and resizes
+  boxes from the corresponding feature maps to generate the box features.
+
+  We follow the ROIAlign technique (see https://arxiv.org/pdf/1703.06870.pdf,
+  figure 3 for reference). Specifically, for each feature map, we select an
+  (output_size, output_size) set of pixels corresponding to the box location,
+  and then use bilinear interpolation to select the feature value for each
+  pixel.
+
+  For performance, we perform the gather and interpolation on all layers as a
+  single operation. This is op the multi-level features are first stacked and
+  gathered into [2*output_size, 2*output_size] feature points. Then bilinear
+  interpolation is performed on the gathered feature points to generate
+  [output_size, output_size] RoIAlign feature map.
+
+  Here is the step-by-step algorithm:
+    1. The multi-level features are gathered into a
+       [batch_size, num_boxes, output_size*2, output_size*2, num_filters]
+       Tensor. The Tensor contains four neighboring feature points for each
+       vertice in the output grid.
+    2. Compute the interpolation kernel of shape
+       [batch_size, num_boxes, output_size*2, output_size*2]. The last 2 axis
+       can be seen as stacking 2x2 interpolation kernels for all vertices in the
+       output grid.
+    3. Element-wise multiply the gathered features and interpolation kernel.
+       Then apply 2x2 average pooling to reduce spatial dimension to
+       output_size.
+
+  Args:
+    features: a 5-D tensor of shape
+      [batch_size, num_levels, max_height, max_width, num_filters] where
+      cropping and resizing are based.
+    boxes: a 3-D tensor of shape [batch_size, num_boxes, 4] encoding the
+      information of each box w.r.t. the corresponding feature map.
+      boxes[:, :, 0:2] are the grid position in (y, x) (float) of the top-left
+      corner of each box. boxes[:, :, 2:4] are the box sizes in (h, w) (float)
+        in terms of the number of pixels of the corresponding feature map size.
+    box_levels: a 3-D tensor of shape [batch_size, num_boxes, 1] representing
+      the 0-based corresponding feature level index of each box.
+    boundaries: a 3-D tensor of shape [batch_size, num_boxes, 2] representing
+      the boundary (in (y, x)) of the corresponding feature map for each box.
+      Any resampled grid points that go beyond the bounary will be clipped.
+    output_size: a scalar indicating the output crop size.
+    is_gpu_inference: whether to build the model for GPU inference.
+
+  Returns:
+    features_per_box: a 5-D tensor of shape
+      [batch_size, num_boxes, output_size, output_size, num_filters]
+      representing the cropped features.
+  """
+  (batch_size, num_levels, max_feature_height, max_feature_width,
+   num_filters) = features.get_shape().as_list()
+  _, num_boxes, _ = boxes.get_shape().as_list()
+
+  # Compute the grid position w.r.t. the corresponding feature map.
+  box_grid_x = []
+  box_grid_y = []
+  for i in range(output_size):
+    box_grid_x.append(boxes[:, :, 1:2] +
+                      (i + 0.5) * boxes[:, :, 3:4] / output_size)
+    box_grid_y.append(boxes[:, :, 0:1] +
+                      (i + 0.5) * boxes[:, :, 2:3] / output_size)
+  box_grid_x = tf.concat(box_grid_x, axis=-1)
+  box_grid_y = tf.concat(box_grid_y, axis=-1)
+
+  # Compute indices for gather operation.
+  box_grid_y0 = tf.floor(box_grid_y)
+  box_grid_x0 = tf.floor(box_grid_x)
+  box_grid_x0 = tf.maximum(0., box_grid_x0)
+  box_grid_y0 = tf.maximum(0., box_grid_y0)
+  box_gridx0x1 = tf.stack([
+      tf.minimum(box_grid_x0, boundaries[:, :, 1:2]),
+      tf.minimum(box_grid_x0 + 1, boundaries[:, :, 1:2])
+  ],
+                          axis=3)
+  box_gridy0y1 = tf.stack([
+      tf.minimum(box_grid_y0, boundaries[:, :, 0:1]),
+      tf.minimum(box_grid_y0 + 1, boundaries[:, :, 0:1])
+  ],
+                          axis=3)
+
+  x_indices = tf.reshape(box_gridx0x1, [batch_size, num_boxes, output_size * 2])
+  y_indices = tf.reshape(box_gridy0y1, [batch_size, num_boxes, output_size * 2])
+
+  # If using GPU for inference, delay the cast until when Gather ops show up
+  # since GPU inference supports float point better.
+  # TODO(laigd): revisit this when newer versions of GPU libraries is released.
+  indices_dtype = tf.float32 if is_gpu_inference else tf.int32
+
+  if not is_gpu_inference:
+    x_indices = tf.cast(x_indices, tf.int32)
+    y_indices = tf.cast(y_indices, tf.int32)
+
+  height_dim_offset = max_feature_width
+  level_dim_offset = max_feature_height * height_dim_offset
+  batch_dim_offset = num_levels * level_dim_offset
+
+  batch_dim_indices = (
+      tf.reshape(tf.range(batch_size, dtype=indices_dtype) * batch_dim_offset, [batch_size, 1, 1, 1]) *
+      tf.ones([1, num_boxes, output_size * 2, output_size * 2], dtype=indices_dtype)
+  )
+
+  box_level_indices = (
+      tf.reshape(box_levels * level_dim_offset, [batch_size, num_boxes, 1, 1]) *
+      tf.ones([1, 1, output_size * 2, output_size * 2], dtype=indices_dtype)
+  )
+
+  height_indices = (
+      tf.reshape(y_indices * height_dim_offset, [batch_size, num_boxes, output_size * 2, 1]) *
+      tf.ones([1, 1, 1, output_size * 2], dtype=indices_dtype)
+  )
+
+  width_indices = (
+      tf.reshape(x_indices, [batch_size, num_boxes, 1, output_size * 2]) *
+      tf.ones([1, 1, output_size * 2, 1], dtype=indices_dtype)
+  )
+
+  # TODO(hongjunchoi): Remove the need for temporary variables as
+  # temporary variables with
+
+  if True:
+      batch_dim_indices = tf.cast(batch_dim_indices, tf.float32)
+      box_level_indices = tf.cast(box_level_indices, tf.float32)
+      height_indices = tf.cast(height_indices, tf.float32)
+      width_indices = tf.cast(width_indices, tf.float32)
+
+      indices = tf.add_n([
+          batch_dim_indices,
+          box_level_indices,
+          height_indices,
+          width_indices,
+      ])
+
+      indices = tf.cast(indices, tf.int32)
+
+  else:  # TODO: Restore this API int32 dtype will be supported on GPUs.
+      indices = tf.add_n([
+          batch_dim_indices,
+          box_level_indices,
+          height_indices,
+          width_indices,
+      ])
+
+  if batch_size == 1:
+    # Special handling for single batch input to make it friendly for GPU
+    # inference.
+    indices = tf.reshape(indices, [1, -1])
+
+    if is_gpu_inference:
+      indices = tf.cast(indices, dtype=tf.int32)
+
+    features = tf.reshape(features, [1, -1, num_filters])
+    # Cast should happen at last since GPU has better support for floating point
+    # operations.
+    features_per_box = tf.gather(features, indices, axis=1)
+
+  else:
+    indices = tf.reshape(indices, [-1])
+
+    if is_gpu_inference:
+      indices = tf.cast(indices, dtype=tf.int32)
+
+    features = tf.reshape(features, [-1, num_filters])
+    features_per_box = tf.gather(features, indices)
+
+  features_per_box = tf.reshape(
+      features_per_box,
+      [batch_size, num_boxes, output_size * 2, output_size * 2, num_filters]
+  )
+
+  # The RoIAlign feature f can be computed by bilinear interpolation of four
+  # neighboring feature points f0, f1, f2, and f3.
+  # f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T
+  #                       [f10, f11]]
+  # f(y, x) = (hy*hx)f00 + (hy*lx)f01 + (ly*hx)f10 + (lx*ly)f11
+  # f(y, x) = w00*f00 + w01*f01 + w10*f10 + w11*f11
+  ly = box_grid_y - box_grid_y0
+  lx = box_grid_x - box_grid_x0
+  hy = 1.0 - ly
+  hx = 1.0 - lx
+  kernel_x = tf.reshape(tf.stack([hx, lx], axis=3), [batch_size, num_boxes, 1, output_size * 2])
+  kernel_y = tf.reshape(tf.stack([hy, ly], axis=3), [batch_size, num_boxes, output_size * 2, 1])
+
+  # Use implicit broadcast to generate the interpolation kernel. The
+  # multiplier `4` is for avg pooling.
+  interpolation_kernel = kernel_y * kernel_x * 4
+
+  # Interpolate the gathered features with computed interpolation kernels.
+  features_per_box *= tf.cast(tf.expand_dims(interpolation_kernel, axis=4), dtype=features_per_box.dtype)
+  features_per_box = tf.reshape(
+      features_per_box,
+      [batch_size * num_boxes, output_size * 2, output_size * 2, num_filters]
+  )
+  features_per_box = tf.nn.avg_pool2d(features_per_box, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
+  features_per_box = tf.reshape(features_per_box, [batch_size, num_boxes, output_size, output_size, num_filters])
+
+  return features_per_box
+
+
+def multilevel_crop_and_resize(features,
+                               boxes,
+                               output_size=7,
+                               is_gpu_inference=False):
+  """Crop and resize on multilevel feature pyramid.
+
+  Generate the (output_size, output_size) set of pixels for each input box
+  by first locating the box into the correct feature level, and then cropping
+  and resizing it using the correspoding feature map of that level.
+
+  Args:
+    features: A dictionary with key as pyramid level and value as features. The
+      features are in shape of [batch_size, height_l, width_l, num_filters].
+    boxes: A 3-D Tensor of shape [batch_size, num_boxes, 4]. Each row represents
+      a box with [y1, x1, y2, x2] in un-normalized coordinates.
+    output_size: A scalar to indicate the output crop size.
+    is_gpu_inference: whether to build the model for GPU inference.
+
+  Returns:
+    A 5-D tensor representing feature crop of shape
+    [batch_size, num_boxes, output_size, output_size, num_filters].
+  """
+  with tf.name_scope('multilevel_crop_and_resize'):
+      levels = features.keys()
+      min_level = min(levels)
+      max_level = max(levels)
+      _, max_feature_height, max_feature_width, _ = (
+          features[min_level].get_shape().as_list())
+
+      # Stack feature pyramid into a features_all of shape
+      # [batch_size, levels, height, width, num_filters].
+      features_all = []
+      for level in range(min_level, max_level + 1):
+        features_all.append(tf.image.pad_to_bounding_box(features[level], 0, 0, max_feature_height, max_feature_width))
+
+      features_all = tf.stack(features_all, axis=1)
+
+      # Assign boxes to the right level.
+      box_width = tf.squeeze(boxes[:, :, 3:4] - boxes[:, :, 1:2], axis=-1)
+      box_height = tf.squeeze(boxes[:, :, 2:3] - boxes[:, :, 0:1], axis=-1)
+
+      areas_sqrt = tf.sqrt(box_height * box_width)
+
+      levels = tf.math.floordiv(tf.math.log(tf.divide(areas_sqrt, 224.0)), tf.math.log(2.0)) + 4.0
+
+      if not is_gpu_inference:
+        levels = tf.cast(levels, dtype=tf.int32)
+
+      # Map levels between [min_level, max_level].
+      levels = tf.minimum(
+          float(max_level) if is_gpu_inference else max_level,
+          tf.maximum(levels, float(min_level) if is_gpu_inference else min_level)
+      )
+
+      # Project box location and sizes to corresponding feature levels.
+      scale_to_level = tf.cast(
+          tf.pow(tf.constant(2.0), levels if is_gpu_inference else tf.cast(levels, tf.float32)),
+          dtype=boxes.dtype
+      )
+
+      boxes /= tf.expand_dims(scale_to_level, axis=2)
+
+      box_width /= scale_to_level
+      box_height /= scale_to_level
+
+      boxes = tf.concat(
+          [boxes[:, :, 0:2],
+          tf.expand_dims(box_height, -1),
+          tf.expand_dims(box_width, -1)],
+          axis=-1
+      )
+
+      # Map levels to [0, max_level-min_level].
+      levels -= min_level
+      level_strides = tf.pow([[2.0]], levels if is_gpu_inference else tf.cast(levels, tf.float32))
+
+      boundary = tf.cast(
+          tf.concat(
+              [
+                  tf.expand_dims([[tf.cast(max_feature_height, tf.float32)]] / level_strides - 1, axis=-1),
+                  tf.expand_dims([[tf.cast(max_feature_width, tf.float32)]] / level_strides - 1, axis=-1),
+              ],
+              axis=-1
+          ),
+          boxes.dtype
+      )
+
+  return selective_crop_and_resize(features_all, boxes, levels, boundary, output_size, is_gpu_inference)

+ 356 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/ops/training_ops.py

@@ -0,0 +1,356 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Training specific ops, including sampling, building targets, etc."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+from mask_rcnn.utils import box_utils
+from mask_rcnn.ops import spatial_transform_ops
+
+from mask_rcnn.object_detection import balanced_positive_negative_sampler
+
+_EPSILON = 1e-8
+
+
+def _add_class_assignments(iou, gt_boxes, gt_labels):
+    """Computes object category assignment for each box.
+
+  Args:
+    iou: a tensor for the iou matrix with a shape of
+      [batch_size, K, MAX_NUM_INSTANCES]. K is the number of post-nms RoIs
+      (i.e., rpn_post_nms_topn).
+    gt_boxes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4].
+      This tensor might have paddings with negative values. The coordinates
+      of gt_boxes are in the pixel coordinates of the scaled image scale.
+    gt_labels: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
+      tensor might have paddings with a value of -1.
+  Returns:
+    max_boxes: a tensor with a shape of [batch_size, K, 4], representing
+      the ground truth coordinates of each roi.
+    max_classes: a int32 tensor with a shape of [batch_size, K], representing
+      the ground truth class of each roi.
+    max_overlap: a tensor with a shape of [batch_size, K], representing
+      the maximum overlap of each roi.
+    argmax_iou: a tensor with a shape of [batch_size, K], representing the iou
+      argmax.
+  """
+    with tf.name_scope('add_class_assignments'):
+        batch_size, _, _ = iou.get_shape().as_list()
+
+        argmax_iou = tf.argmax(input=iou, axis=2, output_type=tf.int32)
+
+        indices = tf.reshape(
+            argmax_iou + tf.expand_dims(tf.range(batch_size) * tf.shape(input=gt_labels)[1], 1),
+            shape=[-1]
+        )
+
+        max_classes = tf.reshape(tf.gather(tf.reshape(gt_labels, [-1, 1]), indices), [batch_size, -1])
+
+        max_overlap = tf.reduce_max(input_tensor=iou, axis=2)
+
+        bg_mask = tf.equal(max_overlap, tf.zeros_like(max_overlap))
+
+        max_classes = tf.where(bg_mask, tf.zeros_like(max_classes), max_classes)
+
+        max_boxes = tf.reshape(
+            tf.gather(tf.reshape(gt_boxes, [-1, 4]), indices),
+            [batch_size, -1, 4]
+        )
+
+        max_boxes = tf.where(
+            tf.tile(tf.expand_dims(bg_mask, axis=2), [1, 1, 4]),
+            tf.zeros_like(max_boxes),
+            max_boxes
+        )
+
+    return max_boxes, max_classes, max_overlap, argmax_iou
+
+
+def encode_box_targets(boxes, gt_boxes, gt_labels, bbox_reg_weights):
+    """Encodes predicted boxes with respect to ground truth boxes."""
+    with tf.name_scope('encode_box_targets'):
+        box_targets = box_utils.encode_boxes(boxes=gt_boxes, anchors=boxes, weights=bbox_reg_weights)
+        # If a target is background, the encoded box target should be zeros.
+        mask = tf.tile(tf.expand_dims(tf.equal(gt_labels, tf.zeros_like(gt_labels)), axis=2), [1, 1, 4])
+        box_targets = tf.where(mask, tf.zeros_like(box_targets), box_targets)
+
+    return box_targets
+
+
+def proposal_label_op(boxes, gt_boxes, gt_labels,
+                      batch_size_per_im=512, fg_fraction=0.25, fg_thresh=0.5,
+                      bg_thresh_hi=0.5, bg_thresh_lo=0.):
+    """Assigns the proposals with ground truth labels and performs subsmpling.
+
+    Given proposal `boxes`, `gt_boxes`, and `gt_labels`, the function uses the
+    following algorithm to generate the final `batch_size_per_im` RoIs.
+    1. Calculates the IoU between each proposal box and each gt_boxes.
+    2. Assigns each proposal box with a ground truth class and box label by
+     choosing the largest overlap.
+    3. Samples `batch_size_per_im` boxes from all proposal boxes, and returns
+     box_targets, class_targets, and RoIs.
+    The reference implementations of #1 and #2 are here:
+    https://github.com/facebookresearch/Detectron/blob/master/detectron/datasets/json_dataset.py
+    The reference implementation of #3 is here:
+    https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/fast_rcnn.py
+
+    Args:
+    boxes: a tensor with a shape of [batch_size, N, 4]. N is the number of
+      proposals before groundtruth assignment (e.g., rpn_post_nms_topn). The
+      last dimension is the pixel coordinates of scaled images in
+      [ymin, xmin, ymax, xmax] form.
+    gt_boxes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4]. This
+      tensor might have paddings with a value of -1. The coordinates of gt_boxes
+      are in the pixel coordinates of the scaled image.
+    gt_labels: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
+      tensor might have paddings with a value of -1.
+    batch_size_per_im: a integer represents RoI minibatch size per image.
+    fg_fraction: a float represents the target fraction of RoI minibatch that
+      is labeled foreground (i.e., class > 0).
+    fg_thresh: a float represents the overlap threshold for an RoI to be
+      considered foreground (if >= fg_thresh).
+    bg_thresh_hi: a float represents the overlap threshold for an RoI to be
+      considered background (class = 0 if overlap in [LO, HI)).
+    bg_thresh_lo: a float represents the overlap threshold for an RoI to be
+      considered background (class = 0 if overlap in [LO, HI)).
+    Returns:
+    box_targets: a tensor with a shape of [batch_size, K, 4]. The tensor
+      contains the ground truth pixel coordinates of the scaled images for each
+      roi. K is the number of sample RoIs (e.g., batch_size_per_im).
+    class_targets: a integer tensor with a shape of [batch_size, K]. The tensor
+      contains the ground truth class for each roi.
+    rois: a tensor with a shape of [batch_size, K, 4], representing the
+      coordinates of the selected RoI.
+    proposal_to_label_map: a tensor with a shape of [batch_size, K]. This tensor
+      keeps the mapping between proposal to labels. proposal_to_label_map[i]
+      means the index of the ground truth instance for the i-th proposal.
+    """
+    with tf.name_scope('proposal_label'):
+        batch_size = boxes.shape[0]
+
+        # The reference implementation intentionally includes ground truth boxes in
+        # the proposals.
+        # see https://github.com/facebookresearch/Detectron/blob/master/detectron/datasets/json_dataset.py#L359
+        boxes = tf.concat([boxes, gt_boxes], axis=1)
+        iou = box_utils.bbox_overlap(boxes, gt_boxes)
+
+        (pre_sample_box_targets, pre_sample_class_targets, max_overlap,
+         proposal_to_label_map) = _add_class_assignments(iou, gt_boxes, gt_labels)
+
+        # Generates a random sample of RoIs comprising foreground and background
+        # examples.
+        # reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/fast_rcnn.py#L132
+        positives = tf.greater(max_overlap,
+                               fg_thresh * tf.ones_like(max_overlap))
+        negatives = tf.logical_and(
+            tf.greater_equal(max_overlap, bg_thresh_lo * tf.ones_like(max_overlap)),
+            tf.less(max_overlap, bg_thresh_hi * tf.ones_like(max_overlap))
+        )
+
+        pre_sample_class_targets = tf.where(
+            negatives,
+            tf.zeros_like(pre_sample_class_targets),
+            pre_sample_class_targets
+        )
+
+        proposal_to_label_map = tf.where(
+            negatives,
+            tf.zeros_like(proposal_to_label_map),
+            proposal_to_label_map
+        )
+
+        # Handles ground truth paddings.
+        ignore_mask = tf.less(tf.reduce_min(input_tensor=iou, axis=2), tf.zeros_like(max_overlap))
+
+        # indicator includes both positive and negative labels.
+        # labels includes only positives labels.
+        # positives = indicator & labels.
+        # negatives = indicator & !labels.
+        # ignore = !indicator.
+        labels = positives
+        pos_or_neg = tf.logical_or(positives, negatives)
+        indicator = tf.logical_and(pos_or_neg, tf.logical_not(ignore_mask))
+
+        all_samples = []
+        sampler = balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
+            positive_fraction=fg_fraction,
+            is_static=True
+        )
+
+        # Batch-unroll the sub-sampling process.
+        for i in range(batch_size):
+            samples = sampler.subsample(indicator[i], batch_size_per_im, labels[i])
+            all_samples.append(samples)
+
+        all_samples = tf.stack([all_samples], axis=0)[0]
+        # A workaround to get the indices from the boolean tensors.
+        _, samples_indices = tf.nn.top_k(tf.cast(all_samples, dtype=tf.int32), k=batch_size_per_im, sorted=True)
+
+        # Contructs indices for gather.
+        samples_indices = tf.reshape(
+            samples_indices + tf.expand_dims(tf.range(batch_size) * tf.shape(input=boxes)[1], 1),
+            [-1]
+        )
+
+        rois = tf.reshape(
+            tf.gather(tf.reshape(boxes, [-1, 4]), samples_indices),
+            [batch_size, -1, 4]
+        )
+
+        class_targets = tf.reshape(
+            tf.gather(tf.reshape(pre_sample_class_targets, [-1, 1]), samples_indices),
+            [batch_size, -1]
+        )
+
+        sample_box_targets = tf.reshape(
+            tf.gather(tf.reshape(pre_sample_box_targets, [-1, 4]), samples_indices),
+            [batch_size, -1, 4]
+        )
+
+        sample_proposal_to_label_map = tf.reshape(
+            tf.gather(tf.reshape(proposal_to_label_map, [-1, 1]), samples_indices),
+            [batch_size, -1]
+        )
+
+    return sample_box_targets, class_targets, rois, sample_proposal_to_label_map
+
+
+def select_fg_for_masks(class_targets, box_targets, boxes, proposal_to_label_map, max_num_fg=128):
+    """Selects the fore ground objects for mask branch during training.
+
+    Args:
+    class_targets: a tensor of shape [batch_size, num_boxes]  representing the
+      class label for each box.
+    box_targets: a tensor with a shape of [batch_size, num_boxes, 4]. The tensor
+      contains the ground truth pixel coordinates of the scaled images for each
+      roi.
+    boxes: A 3-D Tensor of shape [batch_size, num_boxes, 4]. Each row
+      represents a box with [y1, x1, y2, x2] in un-normalized coordinates.
+    proposal_to_label_map: a tensor with a shape of [batch_size, num_boxes].
+      This tensor keeps the mapping between proposal to labels.
+      proposal_to_label_map[i] means the index of the ground truth instance for
+      the i-th proposal.
+    max_num_fg: a integer represents the number of masks per image.
+    Returns:
+    class_targets, boxes, proposal_to_label_map, box_targets that have
+    foreground objects.
+    """
+
+    # Masks are for positive (fg) objects only.
+    # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/mask_rcnn.py
+    batch_size = boxes.shape[0]
+    _, fg_indices = tf.nn.top_k(tf.cast(tf.greater(class_targets, 0), dtype=tf.float32), k=max_num_fg)
+
+    # Contructs indices for gather.
+    indices = tf.reshape(fg_indices + tf.expand_dims(tf.range(batch_size) * tf.shape(input=class_targets)[1], 1), [-1])
+
+    fg_class_targets = tf.reshape(
+        tf.gather(tf.reshape(class_targets, [-1, 1]), indices),
+        [batch_size, -1]
+    )
+
+    fg_box_targets = tf.reshape(
+        tf.gather(tf.reshape(box_targets, [-1, 4]), indices),
+        [batch_size, -1, 4]
+    )
+
+    fg_box_rois = tf.reshape(
+        tf.gather(tf.reshape(boxes, [-1, 4]), indices), [batch_size, -1, 4]
+    )
+
+    fg_proposal_to_label_map = tf.reshape(
+        tf.gather(tf.reshape(proposal_to_label_map, [-1, 1]), indices),
+        [batch_size, -1]
+    )
+
+    return (fg_class_targets, fg_box_targets, fg_box_rois,
+            fg_proposal_to_label_map)
+
+
+def get_mask_targets(fg_boxes, fg_proposal_to_label_map, fg_box_targets, mask_gt_labels, output_size=28):
+    """Crop and resize on multilevel feature pyramid.
+
+    Args:
+    fg_boxes: A 3-D tensor of shape [batch_size, num_masks, 4]. Each row
+      represents a box with [y1, x1, y2, x2] in un-normalized coordinates.
+    fg_proposal_to_label_map: A tensor of shape [batch_size, num_masks].
+    fg_box_targets: a float tensor representing the box label for each box
+      with a shape of [batch_size, num_masks, 4].
+    mask_gt_labels: A tensor with a shape of [batch_size, M, H+4, W+4]. M is
+      NUM_MAX_INSTANCES (i.e., 100 in this implementation) in each image, while
+      H and W are ground truth mask size. The `+4` comes from padding of two
+      zeros in both directions of height and width dimension.
+    output_size: A scalar to indicate the output crop size.
+
+    Returns:
+    A 4-D tensor representing feature crop of shape
+    [batch_size, num_boxes, output_size, output_size].
+    """
+
+    _, _, max_feature_height, max_feature_width = mask_gt_labels.get_shape().as_list()
+
+    # proposal_to_label_map might have a -1 paddings.
+    levels = tf.maximum(fg_proposal_to_label_map, 0)
+
+    # Projects box location and sizes to corresponding cropped ground truth
+    # mask coordinates.
+    bb_y_min, bb_x_min, bb_y_max, bb_x_max = tf.split(value=fg_boxes, num_or_size_splits=4, axis=2)
+    gt_y_min, gt_x_min, gt_y_max, gt_x_max = tf.split(value=fg_box_targets, num_or_size_splits=4, axis=2)
+
+    valid_feature_width = max_feature_width - 4
+    valid_feature_height = max_feature_height - 4
+
+    y_transform = (bb_y_min - gt_y_min) * valid_feature_height / (gt_y_max - gt_y_min + _EPSILON) + 2
+    x_transform = (bb_x_min - gt_x_min) * valid_feature_width / (gt_x_max - gt_x_min + _EPSILON) + 2
+    h_transform = (bb_y_max - bb_y_min) * valid_feature_height / (gt_y_max - gt_y_min + _EPSILON)
+    w_transform = (bb_x_max - bb_x_min) * valid_feature_width / (gt_x_max - gt_x_min + _EPSILON)
+
+    boundaries = tf.concat(
+        [
+            tf.cast(tf.ones_like(y_transform) * (max_feature_height - 1), dtype=tf.float32),
+            tf.cast(tf.ones_like(x_transform) * (max_feature_width - 1), dtype=tf.float32)
+        ],
+        axis=-1
+    )
+
+    features_per_box = spatial_transform_ops.selective_crop_and_resize(
+        tf.expand_dims(mask_gt_labels, -1),
+        tf.concat([y_transform, x_transform, h_transform, w_transform], -1),
+        tf.expand_dims(levels, -1),
+        boundaries,
+        output_size
+    )
+
+    features_per_box = tf.squeeze(features_per_box, axis=-1)
+
+    # Masks are binary outputs.
+    features_per_box = tf.where(
+        tf.greater_equal(features_per_box, 0.5),
+        tf.ones_like(features_per_box),
+        tf.zeros_like(features_per_box)
+    )
+
+    # mask_targets depend on box RoIs, which have gradients. This stop_gradient
+    # prevents the flow of gradient to box RoIs.
+    features_per_box = tf.stop_gradient(features_per_box)
+
+    return features_per_box

+ 0 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/training/__init__.py


+ 55 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/training/learning_rates.py

@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Learning rate schedule."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+
+def step_learning_rate_with_linear_warmup(
+    global_step,
+    init_learning_rate,
+    warmup_learning_rate,
+    warmup_steps,
+    learning_rate_levels,
+    learning_rate_steps
+):
+    """Creates the step learning rate tensor with linear warmup."""
+
+    def warmup_lr_fn():
+        return warmup_learning_rate + \
+            tf.cast(global_step, dtype=tf.float32) / warmup_steps * (init_learning_rate - warmup_learning_rate)
+
+    def learning_rate_fn():
+        return tf.compat.v1.train.piecewise_constant(
+            global_step,
+            boundaries=learning_rate_steps,
+            values=[init_learning_rate] + learning_rate_levels
+        )
+
+    learning_rate = tf.where(
+        global_step < warmup_steps,
+        warmup_lr_fn(),
+        learning_rate_fn()
+    )
+
+    return learning_rate
+

+ 435 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/training/losses.py

@@ -0,0 +1,435 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Losses used for Mask-RCNN."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from distutils.version import LooseVersion
+
+import tensorflow as tf
+
+DEBUG_LOSS_IMPLEMENTATION = False
+
+
+if LooseVersion(tf.__version__) < LooseVersion("2.0.0"):
+    from tensorflow.python.keras.utils import losses_utils
+    ReductionV2 = losses_utils.ReductionV2
+else:
+    ReductionV2 = tf.keras.losses.Reduction
+
+
+def _huber_loss(y_true, y_pred, weights, delta):
+
+    num_non_zeros = tf.math.count_nonzero(weights, dtype=tf.float32)
+
+    huber_keras_loss = tf.keras.losses.Huber(
+        delta=delta,
+        reduction=ReductionV2.SUM,
+        name='huber_loss'
+    )
+
+    huber_loss = huber_keras_loss(
+        y_true,
+        y_pred,
+        sample_weight=weights
+    )
+
+    assert huber_loss.dtype == tf.float32
+
+    huber_loss = tf.math.divide_no_nan(huber_loss, num_non_zeros, name="huber_loss")
+
+    assert huber_loss.dtype == tf.float32
+
+    if DEBUG_LOSS_IMPLEMENTATION:
+        mlperf_loss = tf.compat.v1.losses.huber_loss(
+            y_true,
+            y_pred,
+            weights=weights,
+            delta=delta,
+            reduction=tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS
+        )
+
+        print_op = tf.print("Huber Loss - MLPerf:", mlperf_loss, " && Legacy Loss:", huber_loss)
+
+        with tf.control_dependencies([print_op]):
+            huber_loss = tf.identity(huber_loss)
+
+    return huber_loss
+
+
+def _sigmoid_cross_entropy(multi_class_labels, logits, weights, sum_by_non_zeros_weights=False):
+
+    assert weights.dtype == tf.float32
+
+    sigmoid_cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(
+        labels=multi_class_labels,
+        logits=logits,
+        name="x-entropy"
+    )
+
+    assert sigmoid_cross_entropy.dtype == tf.float32
+
+    sigmoid_cross_entropy = tf.math.multiply(sigmoid_cross_entropy, weights)
+    sigmoid_cross_entropy = tf.math.reduce_sum(sigmoid_cross_entropy)
+
+    assert sigmoid_cross_entropy.dtype == tf.float32
+
+    if sum_by_non_zeros_weights:
+        num_non_zeros = tf.math.count_nonzero(weights, dtype=tf.float32)
+        sigmoid_cross_entropy = tf.math.divide_no_nan(
+            sigmoid_cross_entropy,
+            num_non_zeros,
+            name="sum_by_non_zeros_weights"
+        )
+
+    assert sigmoid_cross_entropy.dtype == tf.float32
+
+    if DEBUG_LOSS_IMPLEMENTATION:
+
+        if sum_by_non_zeros_weights:
+            reduction = tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS
+
+        else:
+            reduction = tf.compat.v1.losses.Reduction.SUM
+
+        mlperf_loss = tf.compat.v1.losses.sigmoid_cross_entropy(
+            multi_class_labels=multi_class_labels,
+            logits=logits,
+            weights=weights,
+            reduction=reduction
+        )
+
+        print_op = tf.print(
+            "Sigmoid X-Entropy Loss (%s) - MLPerf:" % reduction, mlperf_loss, " && Legacy Loss:", sigmoid_cross_entropy
+        )
+
+        with tf.control_dependencies([print_op]):
+            sigmoid_cross_entropy = tf.identity(sigmoid_cross_entropy)
+
+    return sigmoid_cross_entropy
+
+
+def _softmax_cross_entropy(onehot_labels, logits):
+
+    num_non_zeros = tf.math.count_nonzero(onehot_labels, dtype=tf.float32)
+
+    softmax_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
+        labels=onehot_labels,
+        logits=logits
+    )
+
+    assert softmax_cross_entropy.dtype == tf.float32
+
+    softmax_cross_entropy = tf.math.reduce_sum(softmax_cross_entropy)
+    softmax_cross_entropy = tf.math.divide_no_nan(softmax_cross_entropy, num_non_zeros, name="softmax_cross_entropy")
+
+    assert softmax_cross_entropy.dtype == tf.float32
+
+    if DEBUG_LOSS_IMPLEMENTATION:
+
+        mlperf_loss = tf.compat.v1.losses.softmax_cross_entropy(
+            onehot_labels=onehot_labels,
+            logits=logits,
+            reduction=tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS
+        )
+
+        print_op = tf.print("Softmax X-Entropy Loss - MLPerf:", mlperf_loss, " && Legacy Loss:", softmax_cross_entropy)
+
+        with tf.control_dependencies([print_op]):
+            softmax_cross_entropy = tf.identity(softmax_cross_entropy)
+
+    return softmax_cross_entropy
+
+
+def _rpn_score_loss(score_outputs, score_targets, normalizer=1.0):
+    """Computes score loss."""
+
+    with tf.name_scope('rpn_score_loss'):
+
+        # score_targets has three values:
+        # * (1) score_targets[i]=1, the anchor is a positive sample.
+        # * (2) score_targets[i]=0, negative.
+        # * (3) score_targets[i]=-1, the anchor is don't care (ignore).
+
+        mask = tf.math.greater_equal(score_targets, 0)
+        mask = tf.cast(mask, dtype=tf.float32)
+
+        score_targets = tf.maximum(score_targets, tf.zeros_like(score_targets))
+        score_targets = tf.cast(score_targets, dtype=tf.float32)
+
+        assert score_outputs.dtype == tf.float32
+        assert score_targets.dtype == tf.float32
+
+        score_loss = _sigmoid_cross_entropy(
+            multi_class_labels=score_targets,
+            logits=score_outputs,
+            weights=mask,
+            sum_by_non_zeros_weights=False
+        )
+
+        assert score_loss.dtype == tf.float32
+
+        if isinstance(normalizer, tf.Tensor) or normalizer != 1.0:
+            score_loss /= normalizer
+
+        assert score_loss.dtype == tf.float32
+
+    return score_loss
+
+
+def _rpn_box_loss(box_outputs, box_targets, normalizer=1.0, delta=1. / 9):
+    """Computes box regression loss."""
+    # delta is typically around the mean value of regression target.
+    # for instances, the regression targets of 512x512 input with 6 anchors on
+    # P2-P6 pyramid is about [0.1, 0.1, 0.2, 0.2].
+
+    with tf.name_scope('rpn_box_loss'):
+        mask = tf.not_equal(box_targets, 0.0)
+        mask = tf.cast(mask, tf.float32)
+
+        assert mask.dtype == tf.float32
+
+        # The loss is normalized by the sum of non-zero weights before additional
+        # normalizer provided by the function caller.
+        box_loss = _huber_loss(y_true=box_targets, y_pred=box_outputs, weights=mask, delta=delta)
+
+        assert box_loss.dtype == tf.float32
+
+        if isinstance(normalizer, tf.Tensor) or normalizer != 1.0:
+            box_loss /= normalizer
+
+        assert box_loss.dtype == tf.float32
+
+    return box_loss
+
+
+def rpn_loss(score_outputs, box_outputs, labels, params):
+    """Computes total RPN detection loss.
+
+    Computes total RPN detection loss including box and score from all levels.
+    Args:
+    score_outputs: an OrderDict with keys representing levels and values
+      representing scores in [batch_size, height, width, num_anchors].
+    box_outputs: an OrderDict with keys representing levels and values
+      representing box regression targets in
+      [batch_size, height, width, num_anchors * 4].
+    labels: the dictionary that returned from dataloader that includes
+      groundturth targets.
+    params: the dictionary including training parameters specified in
+      default_haprams function in this file.
+    Returns:
+    total_rpn_loss: a float tensor representing total loss reduced from
+      score and box losses from all levels.
+    rpn_score_loss: a float tensor representing total score loss.
+    rpn_box_loss: a float tensor representing total box regression loss.
+    """
+    with tf.name_scope('rpn_loss'):
+
+        score_losses = []
+        box_losses = []
+
+        for level in range(int(params['min_level']), int(params['max_level'] + 1)):
+
+            score_targets_at_level = labels['score_targets_%d' % level]
+            box_targets_at_level = labels['box_targets_%d' % level]
+
+            score_losses.append(
+                _rpn_score_loss(
+                    score_outputs=score_outputs[level],
+                    score_targets=score_targets_at_level,
+                    normalizer=tf.cast(params['train_batch_size'] * params['rpn_batch_size_per_im'], dtype=tf.float32)
+                )
+            )
+
+            box_losses.append(_rpn_box_loss(
+                box_outputs=box_outputs[level],
+                box_targets=box_targets_at_level,
+                normalizer=1.0
+            ))
+
+        # Sum per level losses to total loss.
+        rpn_score_loss = tf.add_n(score_losses)
+        rpn_box_loss = params['rpn_box_loss_weight'] * tf.add_n(box_losses)
+
+        total_rpn_loss = rpn_score_loss + rpn_box_loss
+
+    return total_rpn_loss, rpn_score_loss, rpn_box_loss
+
+
+def _fast_rcnn_class_loss(class_outputs, class_targets_one_hot, normalizer=1.0):
+    """Computes classification loss."""
+
+    with tf.name_scope('fast_rcnn_class_loss'):
+        # The loss is normalized by the sum of non-zero weights before additional
+        # normalizer provided by the function caller.
+
+        class_loss = _softmax_cross_entropy(onehot_labels=class_targets_one_hot, logits=class_outputs)
+
+        if isinstance(normalizer, tf.Tensor) or normalizer != 1.0:
+            class_loss /= normalizer
+
+    return class_loss
+
+
+def _fast_rcnn_box_loss(box_outputs, box_targets, class_targets, normalizer=1.0, delta=1.):
+    """Computes box regression loss."""
+    # delta is typically around the mean value of regression target.
+    # for instances, the regression targets of 512x512 input with 6 anchors on
+    # P2-P6 pyramid is about [0.1, 0.1, 0.2, 0.2].
+
+    with tf.name_scope('fast_rcnn_box_loss'):
+        mask = tf.tile(tf.expand_dims(tf.greater(class_targets, 0), axis=2), [1, 1, 4])
+
+        # The loss is normalized by the sum of non-zero weights before additional
+        # normalizer provided by the function caller.
+        box_loss = _huber_loss(y_true=box_targets, y_pred=box_outputs, weights=mask, delta=delta)
+
+        if isinstance(normalizer, tf.Tensor) or normalizer != 1.0:
+            box_loss /= normalizer
+
+    return box_loss
+
+
+def fast_rcnn_loss(class_outputs, box_outputs, class_targets, box_targets, params):
+    """Computes the box and class loss (Fast-RCNN branch) of Mask-RCNN.
+
+    This function implements the classification and box regression loss of the
+    Fast-RCNN branch in Mask-RCNN. As the `box_outputs` produces `num_classes`
+    boxes for each RoI, the reference model expands `box_targets` to match the
+    shape of `box_outputs` and selects only the target that the RoI has a maximum
+    overlap. (Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/fast_rcnn.py)
+    Instead, this function selects the `box_outputs` by the `class_targets` so
+    that it doesn't expand `box_targets`.
+
+    The loss computation has two parts: (1) classification loss is softmax on all
+    RoIs. (2) box loss is smooth L1-loss on only positive samples of RoIs.
+    Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/modeling/fast_rcnn_heads.py
+
+
+    Args:
+    class_outputs: a float tensor representing the class prediction for each box
+      with a shape of [batch_size, num_boxes, num_classes].
+    box_outputs: a float tensor representing the box prediction for each box
+      with a shape of [batch_size, num_boxes, num_classes * 4].
+    class_targets: a float tensor representing the class label for each box
+      with a shape of [batch_size, num_boxes].
+    box_targets: a float tensor representing the box label for each box
+      with a shape of [batch_size, num_boxes, 4].
+    params: the dictionary including training parameters specified in
+      default_haprams function in this file.
+    Returns:
+    total_loss: a float tensor representing total loss reducing from
+      class and box losses from all levels.
+    cls_loss: a float tensor representing total class loss.
+    box_loss: a float tensor representing total box regression loss.
+    """
+    with tf.name_scope('fast_rcnn_loss'):
+        class_targets = tf.cast(class_targets, dtype=tf.int32)
+
+        # Selects the box from `box_outputs` based on `class_targets`, with which
+        # the box has the maximum overlap.
+        batch_size, num_rois, _ = box_outputs.get_shape().as_list()
+        box_outputs = tf.reshape(box_outputs, [batch_size, num_rois, params['num_classes'], 4])
+
+        box_indices = tf.reshape(
+            class_targets +
+            tf.tile(tf.expand_dims(tf.range(batch_size) * num_rois * params['num_classes'], 1), [1, num_rois]) +
+            tf.tile(tf.expand_dims(tf.range(num_rois) * params['num_classes'], 0), [batch_size, 1]),
+            [-1]
+        )
+
+        box_outputs = tf.matmul(
+            tf.one_hot(
+                box_indices,
+                batch_size * num_rois * params['num_classes'],
+                dtype=box_outputs.dtype
+            ),
+            tf.reshape(box_outputs, [-1, 4])
+        )
+
+        box_outputs = tf.reshape(box_outputs, [batch_size, -1, 4])
+        box_loss = _fast_rcnn_box_loss(
+            box_outputs=box_outputs,
+            box_targets=box_targets,
+            class_targets=class_targets,
+            normalizer=1.0
+        )
+        box_loss *= params['fast_rcnn_box_loss_weight']
+
+        use_sparse_x_entropy = False
+
+        _class_targets = class_targets if use_sparse_x_entropy else tf.one_hot(class_targets, params['num_classes'])
+
+        class_loss = _fast_rcnn_class_loss(
+            class_outputs=class_outputs,
+            class_targets_one_hot=_class_targets,
+            normalizer=1.0
+        )
+
+        total_loss = class_loss + box_loss
+
+    return total_loss, class_loss, box_loss
+
+
+def mask_rcnn_loss(mask_outputs, mask_targets, select_class_targets, params):
+    """Computes the mask loss of Mask-RCNN.
+
+    This function implements the mask loss of Mask-RCNN. As the `mask_outputs`
+    produces `num_classes` masks for each RoI, the reference model expands
+    `mask_targets` to match the shape of `mask_outputs` and selects only the
+    target that the RoI has a maximum overlap.
+    (Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/mask_rcnn.py)
+    Instead, this implementation selects the `mask_outputs` by the `class_targets`
+    so that it doesn't expand `mask_targets`. Note that the selection logic is
+    done in the post-processing of mask_rcnn_fn in mask_rcnn_architecture.py.
+
+    Args:
+    mask_outputs: a float tensor representing the prediction for each mask,
+      with a shape of
+      [batch_size, num_masks, mask_height, mask_width].
+    mask_targets: a float tensor representing the binary mask of ground truth
+      labels for each mask with a shape of
+      [batch_size, num_masks, mask_height, mask_width].
+    select_class_targets: a tensor with a shape of [batch_size, num_masks],
+      representing the foreground mask targets.
+    params: the dictionary including training parameters specified in
+      default_haprams function in this file.
+    Returns:
+    mask_loss: a float tensor representing total mask loss.
+    """
+    with tf.name_scope('mask_loss'):
+        batch_size, num_masks, mask_height, mask_width = mask_outputs.get_shape().as_list()
+
+        weights = tf.tile(
+            tf.reshape(tf.greater(select_class_targets, 0), [batch_size, num_masks, 1, 1]),
+            [1, 1, mask_height, mask_width]
+        )
+        weights = tf.cast(weights, tf.float32)
+
+        loss = _sigmoid_cross_entropy(
+            multi_class_labels=mask_targets,
+            logits=mask_outputs,
+            weights=weights,
+            sum_by_non_zeros_weights=True
+        )
+
+        mrcnn_loss = params['mrcnn_weight_loss_mask'] * loss
+
+        return mrcnn_loss

+ 0 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/utils/__init__.py


+ 588 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/utils/box_utils.py

@@ -0,0 +1,588 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Util functions to manipulate boxes."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# Standard Imports
+
+import numpy as np
+import tensorflow as tf
+
+
+BBOX_XFORM_CLIP = np.log(1000. / 16.)
+NMS_TILE_SIZE = 512
+
+
+def bbox_overlap(boxes, gt_boxes):
+  """Calculates the overlap between proposal and ground truth boxes.
+
+  Some `gt_boxes` may have been padded.  The returned `iou` tensor for these
+  boxes will be -1.
+
+  Args:
+    boxes: a tensor with a shape of [batch_size, N, 4]. N is the number of
+      proposals before groundtruth assignment (e.g., rpn_post_nms_topn). The
+      last dimension is the pixel coordinates in [ymin, xmin, ymax, xmax] form.
+    gt_boxes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4]. This
+      tensor might have paddings with a negative value.
+  Returns:
+    iou: a tensor with as a shape of [batch_size, N, MAX_NUM_INSTANCES].
+  """
+
+  with tf.name_scope('bbox_overlap'):
+
+      bb_y_min, bb_x_min, bb_y_max, bb_x_max = tf.split(value=boxes, num_or_size_splits=4, axis=2)
+      gt_y_min, gt_x_min, gt_y_max, gt_x_max = tf.split(value=gt_boxes, num_or_size_splits=4, axis=2)
+
+      # Calculates the intersection area.
+      i_xmin = tf.maximum(bb_x_min, tf.transpose(a=gt_x_min, perm=[0, 2, 1]))
+      i_xmax = tf.minimum(bb_x_max, tf.transpose(a=gt_x_max, perm=[0, 2, 1]))
+      i_ymin = tf.maximum(bb_y_min, tf.transpose(a=gt_y_min, perm=[0, 2, 1]))
+      i_ymax = tf.minimum(bb_y_max, tf.transpose(a=gt_y_max, perm=[0, 2, 1]))
+      i_area = tf.maximum((i_xmax - i_xmin), 0) * tf.maximum((i_ymax - i_ymin), 0)
+
+      # Calculates the union area.
+      bb_area = (bb_y_max - bb_y_min) * (bb_x_max - bb_x_min)
+      gt_area = (gt_y_max - gt_y_min) * (gt_x_max - gt_x_min)
+      # Adds a small epsilon to avoid divide-by-zero.
+      u_area = bb_area + tf.transpose(a=gt_area, perm=[0, 2, 1]) - i_area + 1e-8
+
+      # Calculates IoU.
+      iou = i_area / u_area
+
+      # Fills -1 for padded ground truth boxes.
+      padding_mask = tf.less(i_xmin, tf.zeros_like(i_xmin))
+      iou = tf.where(padding_mask, -tf.ones_like(iou), iou)
+
+  return iou
+
+
+def top_k(scores, k, boxes_list):
+  """A wrapper that returns top-k scores and correponding boxes.
+
+  This functions selects the top-k scores and boxes as follows.
+
+  indices = argsort(scores)[:k]
+  scores = scores[indices]
+  outputs = []
+  for boxes in boxes_list:
+    outputs.append(boxes[indices, :])
+  return scores, outputs
+
+  Args:
+    scores: a tensor with a shape of [batch_size, N]. N is the number of scores.
+    k: an integer for selecting the top-k elements.
+    boxes_list: a list containing at least one element. Each element has a shape
+      of [batch_size, N, 4].
+  Returns:
+    scores: the selected top-k scores with a shape of [batch_size, k].
+    outputs: the list containing the corresponding boxes in the order of the
+      input `boxes_list`.
+  """
+  assert isinstance(boxes_list, list)
+  assert boxes_list  # not empty list
+
+  batch_size, _ = scores.get_shape().as_list()
+
+  scores, top_k_indices = tf.nn.top_k(scores, k=k)
+  outputs = []
+  for boxes in boxes_list:
+    if batch_size == 1:
+      boxes = tf.squeeze(tf.gather(boxes, top_k_indices, axis=1), axis=1)
+    else:
+      boxes_index_offsets = tf.range(batch_size) * tf.shape(input=boxes)[1]
+      boxes_indices = tf.reshape(
+          top_k_indices + tf.expand_dims(boxes_index_offsets, 1), [-1])
+      boxes = tf.reshape(
+          tf.gather(tf.reshape(boxes, [-1, 4]), boxes_indices),
+          [batch_size, -1, 4])
+    outputs.append(boxes)
+  return scores, outputs
+
+
+def _self_suppression(iou, _, iou_sum):
+  batch_size = tf.shape(input=iou)[0]
+  can_suppress_others = tf.cast(
+      tf.reshape(tf.reduce_max(input_tensor=iou, axis=1) <= 0.5, [batch_size, -1, 1]), iou.dtype)
+  iou_suppressed = tf.reshape(
+      tf.cast(tf.reduce_max(input_tensor=can_suppress_others * iou, axis=1) <= 0.5, iou.dtype),
+      [batch_size, -1, 1]) * iou
+  iou_sum_new = tf.reduce_sum(input_tensor=iou_suppressed, axis=[1, 2])
+  return [
+      iou_suppressed,
+      tf.reduce_any(input_tensor=iou_sum - iou_sum_new > 0.5), iou_sum_new
+  ]
+
+
+def _cross_suppression(boxes, box_slice, iou_threshold, inner_idx):
+  batch_size = tf.shape(input=boxes)[0]
+  new_slice = tf.slice(boxes, [0, inner_idx * NMS_TILE_SIZE, 0],
+                       [batch_size, NMS_TILE_SIZE, 4])
+  iou = bbox_overlap(new_slice, box_slice)
+  ret_slice = tf.expand_dims(
+      tf.cast(tf.reduce_all(input_tensor=iou < iou_threshold, axis=[1]), box_slice.dtype),
+      2) * box_slice
+  return boxes, ret_slice, iou_threshold, inner_idx + 1
+
+
+def _suppression_loop_body(boxes, iou_threshold, output_size, idx):
+  """Process boxes in the range [idx*NMS_TILE_SIZE, (idx+1)*NMS_TILE_SIZE).
+
+  Args:
+    boxes: a tensor with a shape of [batch_size, anchors, 4].
+    iou_threshold: a float representing the threshold for deciding whether boxes
+      overlap too much with respect to IOU.
+    output_size: an int32 tensor of size [batch_size]. Representing the number
+      of selected boxes for each batch.
+    idx: an integer scalar representing induction variable.
+
+  Returns:
+    boxes: updated boxes.
+    iou_threshold: pass down iou_threshold to the next iteration.
+    output_size: the updated output_size.
+    idx: the updated induction variable.
+  """
+  num_tiles = tf.shape(input=boxes)[1] // NMS_TILE_SIZE
+  batch_size = tf.shape(input=boxes)[0]
+
+  # Iterates over tiles that can possibly suppress the current tile.
+  box_slice = tf.slice(boxes, [0, idx * NMS_TILE_SIZE, 0],
+                       [batch_size, NMS_TILE_SIZE, 4])
+  _, box_slice, _, _ = tf.while_loop(
+      cond=lambda _boxes, _box_slice, _threshold, inner_idx: inner_idx < idx,
+      body=_cross_suppression, loop_vars=[boxes, box_slice, iou_threshold,
+                           tf.constant(0)])
+
+  # Iterates over the current tile to compute self-suppression.
+  iou = bbox_overlap(box_slice, box_slice)
+  mask = tf.expand_dims(
+      tf.reshape(tf.range(NMS_TILE_SIZE), [1, -1]) > tf.reshape(
+          tf.range(NMS_TILE_SIZE), [-1, 1]), 0)
+  iou *= tf.cast(tf.logical_and(mask, iou >= iou_threshold), iou.dtype)
+  suppressed_iou, _, _ = tf.while_loop(
+      cond=lambda _iou, loop_condition, _iou_sum: loop_condition, body=_self_suppression,
+      loop_vars=[iou, tf.constant(True),
+       tf.reduce_sum(input_tensor=iou, axis=[1, 2])])
+  suppressed_box = tf.reduce_sum(input_tensor=suppressed_iou, axis=1) > 0
+  box_slice *= tf.expand_dims(1.0 - tf.cast(suppressed_box, box_slice.dtype), 2)
+
+  # Uses box_slice to update the input boxes.
+  mask = tf.reshape(
+      tf.cast(tf.equal(tf.range(num_tiles), idx), boxes.dtype), [1, -1, 1, 1])
+  boxes = tf.tile(tf.expand_dims(
+      box_slice, [1]), [1, num_tiles, 1, 1]) * mask + tf.reshape(
+          boxes, [batch_size, num_tiles, NMS_TILE_SIZE, 4]) * (1 - mask)
+  boxes = tf.reshape(boxes, [batch_size, -1, 4])
+
+  # Updates output_size.
+  output_size += tf.reduce_sum(
+      input_tensor=tf.cast(tf.reduce_any(input_tensor=box_slice > 0, axis=[2]), tf.int32), axis=[1])
+  return boxes, iou_threshold, output_size, idx + 1
+
+
+def sorted_non_max_suppression_padded(scores,
+                                      boxes,
+                                      max_output_size,
+                                      iou_threshold):
+  """A wrapper that handles non-maximum suppression.
+
+  Assumption:
+    * The boxes are sorted by scores unless the box is a dot (all coordinates
+      are zero).
+    * Boxes with higher scores can be used to suppress boxes with lower scores.
+
+  The overal design of the algorithm is to handle boxes tile-by-tile:
+
+  boxes = boxes.pad_to_multiply_of(tile_size)
+  num_tiles = len(boxes) // tile_size
+  output_boxes = []
+  for i in range(num_tiles):
+    box_tile = boxes[i*tile_size : (i+1)*tile_size]
+    for j in range(i - 1):
+      suppressing_tile = boxes[j*tile_size : (j+1)*tile_size]
+      iou = bbox_overlap(box_tile, suppressing_tile)
+      # if the box is suppressed in iou, clear it to a dot
+      box_tile *= _update_boxes(iou)
+    # Iteratively handle the diagnal tile.
+    iou = _box_overlap(box_tile, box_tile)
+    iou_changed = True
+    while iou_changed:
+      # boxes that are not suppressed by anything else
+      suppressing_boxes = _get_suppressing_boxes(iou)
+      # boxes that are suppressed by suppressing_boxes
+      suppressed_boxes = _get_suppressed_boxes(iou, suppressing_boxes)
+      # clear iou to 0 for boxes that are suppressed, as they cannot be used
+      # to suppress other boxes any more
+      new_iou = _clear_iou(iou, suppressed_boxes)
+      iou_changed = (new_iou != iou)
+      iou = new_iou
+    # remaining boxes that can still suppress others, are selected boxes.
+    output_boxes.append(_get_suppressing_boxes(iou))
+    if len(output_boxes) >= max_output_size:
+      break
+
+  Args:
+    scores: a tensor with a shape of [batch_size, anchors].
+    boxes: a tensor with a shape of [batch_size, anchors, 4].
+    max_output_size: a scalar integer `Tensor` representing the maximum number
+      of boxes to be selected by non max suppression.
+    iou_threshold: a float representing the threshold for deciding whether boxes
+      overlap too much with respect to IOU.
+
+  Returns:
+    nms_scores: a tensor with a shape of [batch_size, anchors]. It has same
+      dtype as input scores.
+    nms_proposals: a tensor with a shape of [batch_size, anchors, 4]. It has
+      same dtype as input boxes.
+  """
+  batch_size = tf.shape(input=boxes)[0]
+  num_boxes = tf.shape(input=boxes)[1]
+  pad = tf.cast(
+      tf.math.ceil(tf.cast(num_boxes, tf.float32) / NMS_TILE_SIZE),
+      tf.int32) * NMS_TILE_SIZE - num_boxes
+  boxes = tf.pad(tensor=tf.cast(boxes, tf.float32), paddings=[[0, 0], [0, pad], [0, 0]])
+  scores = tf.pad(tensor=tf.cast(scores, tf.float32), paddings=[[0, 0], [0, pad]])
+  num_boxes += pad
+
+  def _loop_cond(unused_boxes, unused_threshold, output_size, idx):
+    return tf.logical_and(
+        tf.reduce_min(input_tensor=output_size) < max_output_size,
+        idx < num_boxes // NMS_TILE_SIZE)
+
+  selected_boxes, _, output_size, _ = tf.while_loop(
+      cond=_loop_cond, body=_suppression_loop_body, loop_vars=[
+          boxes, iou_threshold,
+          tf.zeros([batch_size], tf.int32),
+          tf.constant(0)
+      ])
+  idx = num_boxes - tf.cast(
+      tf.nn.top_k(
+          tf.cast(tf.reduce_any(input_tensor=selected_boxes > 0, axis=[2]), tf.int32) *
+          tf.expand_dims(tf.range(num_boxes, 0, -1), 0), max_output_size)[0],
+      tf.int32)
+  idx = tf.minimum(idx, num_boxes - 1)
+  idx = tf.reshape(
+      idx + tf.reshape(tf.range(batch_size) * num_boxes, [-1, 1]), [-1])
+  boxes = tf.reshape(
+      tf.gather(tf.reshape(boxes, [-1, 4]), idx),
+      [batch_size, max_output_size, 4])
+  boxes = boxes * tf.cast(
+      tf.reshape(tf.range(max_output_size), [1, -1, 1]) < tf.reshape(
+          output_size, [-1, 1, 1]), boxes.dtype)
+  scores = tf.reshape(
+      tf.gather(tf.reshape(scores, [-1, 1]), idx),
+      [batch_size, max_output_size])
+  scores = scores * tf.cast(
+      tf.reshape(tf.range(max_output_size), [1, -1]) < tf.reshape(
+          output_size, [-1, 1]), scores.dtype)
+  return scores, boxes
+
+
+def encode_boxes(boxes, anchors, weights=None):
+  """Encode boxes to targets.
+
+  Args:
+    boxes: a tensor whose last dimension is 4 representing the coordinates
+      of boxes in ymin, xmin, ymax, xmax order.
+    anchors: a tensor whose shape is the same as `boxes` representing the
+      coordinates of anchors in ymin, xmin, ymax, xmax order.
+    weights: None or a list of four float numbers used to scale coordinates.
+
+  Returns:
+    encoded_boxes: a tensor whose shape is the same as `boxes` representing the
+      encoded box targets.
+  """
+  with tf.name_scope('encode_box'):
+      boxes = tf.cast(boxes, dtype=anchors.dtype)
+
+      y_min, x_min, y_max, x_max = tf.split(boxes, 4, axis=-1)
+
+      # y_min = boxes[..., 0:1]
+      # x_min = boxes[..., 1:2]
+      # y_max = boxes[..., 2:3]
+      # x_max = boxes[..., 3:4]
+
+      box_h = y_max - y_min + 1.0
+      box_w = x_max - x_min + 1.0
+      box_yc = y_min + 0.5 * box_h
+      box_xc = x_min + 0.5 * box_w
+
+      anchor_ymin, anchor_xmin, anchor_ymax, anchor_xmax = tf.split(anchors, 4, axis=-1)
+
+      # anchor_ymin = anchors[..., 0:1]
+      # anchor_xmin = anchors[..., 1:2]
+      # anchor_ymax = anchors[..., 2:3]
+      # anchor_xmax = anchors[..., 3:4]
+
+      anchor_h = anchor_ymax - anchor_ymin + 1.0
+      anchor_w = anchor_xmax - anchor_xmin + 1.0
+      anchor_yc = anchor_ymin + 0.5 * anchor_h
+      anchor_xc = anchor_xmin + 0.5 * anchor_w
+
+      encoded_dy = (box_yc - anchor_yc) / anchor_h
+      encoded_dx = (box_xc - anchor_xc) / anchor_w
+      encoded_dh = tf.math.log(box_h / anchor_h)
+      encoded_dw = tf.math.log(box_w / anchor_w)
+
+      if weights:
+        encoded_dy *= weights[0]
+        encoded_dx *= weights[1]
+        encoded_dh *= weights[2]
+        encoded_dw *= weights[3]
+
+      encoded_boxes = tf.concat([encoded_dy, encoded_dx, encoded_dh, encoded_dw], axis=-1)
+  return encoded_boxes
+
+
+def decode_boxes(encoded_boxes, anchors, weights=None):
+  """Decode boxes.
+
+  Args:
+    encoded_boxes: a tensor whose last dimension is 4 representing the
+      coordinates of encoded boxes in ymin, xmin, ymax, xmax order.
+    anchors: a tensor whose shape is the same as `boxes` representing the
+      coordinates of anchors in ymin, xmin, ymax, xmax order.
+    weights: None or a list of four float numbers used to scale coordinates.
+
+  Returns:
+    encoded_boxes: a tensor whose shape is the same as `boxes` representing the
+      decoded box targets.
+  """
+  with tf.name_scope('decode_box'):
+
+      encoded_boxes = tf.cast(encoded_boxes, dtype=anchors.dtype)
+
+      dy, dx, dh, dw = tf.split(encoded_boxes, 4, axis=-1)
+
+      # dy = encoded_boxes[..., 0:1]
+      # dx = encoded_boxes[..., 1:2]
+      # dh = encoded_boxes[..., 2:3]
+      # dw = encoded_boxes[..., 3:4]
+
+      if weights:
+        dy /= weights[0]
+        dx /= weights[1]
+        dh /= weights[2]
+        dw /= weights[3]
+
+      dh = tf.minimum(dh, BBOX_XFORM_CLIP)
+      dw = tf.minimum(dw, BBOX_XFORM_CLIP)
+
+      anchor_ymin, anchor_xmin, anchor_ymax, anchor_xmax = tf.split(anchors, 4, axis=-1)
+
+      # anchor_ymin = anchors[..., 0:1]
+      # anchor_xmin = anchors[..., 1:2]
+      # anchor_ymax = anchors[..., 2:3]
+      # anchor_xmax = anchors[..., 3:4]
+
+      anchor_h = anchor_ymax - anchor_ymin + 1.0
+      anchor_w = anchor_xmax - anchor_xmin + 1.0
+      anchor_yc = anchor_ymin + 0.5 * anchor_h
+      anchor_xc = anchor_xmin + 0.5 * anchor_w
+
+      decoded_boxes_yc = dy * anchor_h + anchor_yc
+      decoded_boxes_xc = dx * anchor_w + anchor_xc
+      decoded_boxes_h = tf.exp(dh) * anchor_h
+      decoded_boxes_w = tf.exp(dw) * anchor_w
+
+      decoded_boxes_ymin = decoded_boxes_yc - 0.5 * decoded_boxes_h
+      decoded_boxes_xmin = decoded_boxes_xc - 0.5 * decoded_boxes_w
+      decoded_boxes_ymax = decoded_boxes_ymin + decoded_boxes_h - 1.0
+      decoded_boxes_xmax = decoded_boxes_xmin + decoded_boxes_w - 1.0
+
+      decoded_boxes = tf.concat(
+          [decoded_boxes_ymin, decoded_boxes_xmin, decoded_boxes_ymax, decoded_boxes_xmax],
+          axis=-1
+      )
+
+  return decoded_boxes
+
+
+def clip_boxes(boxes, height, width):
+  """Clip boxes.
+
+  Args:
+    boxes: a tensor whose last dimension is 4 representing the coordinates
+      of boxes in ymin, xmin, ymax, xmax order.
+    height: an integer, a scalar or a tensor such as all but the last dimensions
+      are the same as `boxes`. The last dimension is 1. It represents the height
+      of the image.
+    width: an integer, a scalar or a tensor such as all but the last dimensions
+      are the same as `boxes`. The last dimension is 1. It represents the width
+      of the image.
+
+  Returns:
+    clipped_boxes: a tensor whose shape is the same as `boxes` representing the
+      clipped boxes.
+  """
+  with tf.name_scope('clip_box'):
+      y_min, x_min, y_max, x_max = tf.split(boxes, 4, axis=-1)
+
+      # y_min = boxes[..., 0:1]
+      # x_min = boxes[..., 1:2]
+      # y_max = boxes[..., 2:3]
+      # x_max = boxes[..., 3:4]
+
+      height = tf.cast(height, dtype=boxes.dtype)
+      width = tf.cast(width, dtype=boxes.dtype)
+
+      clipped_y_min = tf.maximum(tf.minimum(y_min, height - 1.0), 0.0)
+      clipped_y_max = tf.maximum(tf.minimum(y_max, height - 1.0), 0.0)
+      clipped_x_min = tf.maximum(tf.minimum(x_min, width - 1.0), 0.0)
+      clipped_x_max = tf.maximum(tf.minimum(x_max, width - 1.0), 0.0)
+
+      clipped_boxes = tf.concat([clipped_y_min, clipped_x_min, clipped_y_max, clipped_x_max], axis=-1)
+
+  return clipped_boxes
+
+
+def filter_boxes(boxes, scores, min_size, height, width, scale):
+  """Filter out boxes that are too small.
+
+  Args:
+    boxes: a tensor whose last dimension is 4 representing the coordinates
+      of boxes in ymin, xmin, ymax, xmax order.
+    scores: a tensor such as all but the last dimensions are the same as
+      `boxes`. The last dimension is 1. It represents the scores.
+    min_size: an integer specifying the minimal size.
+    height: an integer, a scalar or a tensor such as all but the last dimensions
+      are the same as `boxes`. The last dimension is 1. It represents the height
+      of the image.
+    width: an integer, a scalar or a tensor such as all but the last dimensions
+      are the same as `boxes`. The last dimension is 1. It represents the width
+      of the image.
+    scale: an integer, a scalar or a tensor such as all but the last dimensions
+      are the same as `boxes`. The last dimension is 1. It represents the scale
+      of the image.
+
+  Returns:
+    filtered_boxes: a tensor whose shape is the same as `boxes` representing the
+      filtered boxes.
+    filtered_scores: a tensor whose shape is the same as `scores` representing
+      the filtered scores.
+  """
+  with tf.name_scope('filter_box'):
+      y_min, x_min, y_max, x_max = tf.split(boxes, 4, axis=-1)
+
+      # y_min = boxes[..., 0:1]
+      # x_min = boxes[..., 1:2]
+      # y_max = boxes[..., 2:3]
+      # x_max = boxes[..., 3:4]
+
+      h = y_max - y_min + 1.0
+      w = x_max - x_min + 1.0
+      yc = y_min + h / 2.0
+      xc = x_min + w / 2.0
+
+      height = tf.cast(height, dtype=boxes.dtype)
+      width = tf.cast(width, dtype=boxes.dtype)
+      scale = tf.cast(scale, dtype=boxes.dtype)
+
+      min_size = tf.cast(tf.maximum(min_size, 1), dtype=boxes.dtype)
+
+      size_mask = tf.logical_and(
+          tf.greater_equal(h, min_size * scale),
+          tf.greater_equal(w, min_size * scale)
+      )
+
+      center_mask = tf.logical_and(tf.less(yc, height), tf.less(xc, width))
+      selected_mask = tf.logical_and(size_mask, center_mask)
+
+      filtered_scores = tf.where(selected_mask, scores, tf.zeros_like(scores))
+      filtered_boxes = tf.cast(selected_mask, dtype=boxes.dtype) * boxes
+
+  return filtered_boxes, filtered_scores
+
+
+def to_normalized_coordinates(boxes, height, width):
+  """Converted absolute box coordinates to normalized ones.
+
+  Args:
+    boxes: a tensor whose last dimension is 4 representing the coordinates
+      of boxes in ymin, xmin, ymax, xmax order.
+    height: an integer, a scalar or a tensor such as all but the last dimensions
+      are the same as `boxes`. The last dimension is 1. It represents the height
+      of the image.
+    width: an integer, a scalar or a tensor such as all but the last dimensions
+      are the same as `boxes`. The last dimension is 1. It represents the width
+      of the image.
+
+  Returns:
+    normalized_boxes: a tensor whose shape is the same as `boxes` representing
+      the boxes in normalized coordinates.
+  """
+  with tf.name_scope('normalize_box'):
+      height = tf.cast(height, dtype=boxes.dtype)
+      width = tf.cast(width, dtype=boxes.dtype)
+
+      y_min, x_min, y_max, x_max = tf.split(boxes, 4, axis=-1)
+
+      y_min = y_min / height
+      x_min = x_min / width
+      y_max = y_max / height
+      x_max = x_max / width
+
+      # y_min = boxes[..., 0:1] / height
+      # x_min = boxes[..., 1:2] / width
+      # y_max = boxes[..., 2:3] / height
+      # x_max = boxes[..., 3:4] / width
+
+      normalized_boxes = tf.concat([y_min, x_min, y_max, x_max], axis=-1)
+
+  return normalized_boxes
+
+
+def to_absolute_coordinates(boxes, height, width):
+  """Converted normalized box coordinates to absolute ones.
+
+  Args:
+    boxes: a tensor whose last dimension is 4 representing the coordinates
+      of boxes in ymin, xmin, ymax, xmax order.
+    height: an integer, a scalar or a tensor such as all but the last dimensions
+      are the same as `boxes`. The last dimension is 1. It represents the height
+      of the image.
+    width: an integer, a scalar or a tensor such as all but the last dimensions
+      are the same as `boxes`. The last dimension is 1. It represents the width
+      of the image.
+
+  Returns:
+    absolute_boxes: a tensor whose shape is the same as `boxes` representing the
+      boxes in absolute coordinates.
+  """
+  with tf.name_scope('denormalize_box'):
+      height = tf.cast(height, dtype=boxes.dtype)
+      width = tf.cast(width, dtype=boxes.dtype)
+
+      y_min, x_min, y_max, x_max = tf.split(boxes, 4, axis=-1)
+
+      y_min = y_min * height
+      x_min = x_min * width
+      y_max = y_max * height
+      x_max = x_max * width
+
+      # y_min = boxes[..., 0:1] * height
+      # x_min = boxes[..., 1:2] * width
+      # y_max = boxes[..., 2:3] * height
+      # x_max = boxes[..., 3:4] * width
+
+      absolute_boxes = tf.concat([y_min, x_min, y_max, x_max], axis=-1)
+
+  return absolute_boxes

+ 256 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/utils/coco_utils.py

@@ -0,0 +1,256 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Util functions to manipulate masks."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import pycocotools.mask as coco_mask
+
+POLYGON_PAD_VALUE = -3
+POLYGON_SEPARATOR = -1
+MASK_SEPARATOR = -2
+
+
+def _np_array_split(a, v):
+  """Split numpy array by separator value.
+
+  Args:
+    a: 1-D numpy.array.
+    v: number. Separator value. e.g -1.
+
+  Returns:
+    2-D list of clean separated arrays.
+
+  Example:
+    a = [1, 2, 3, 4, -1, 5, 6, 7, 8]
+    b = _np_array_split(a, -1)
+    # Output: b = [[1, 2, 3, 4], [5, 6, 7, 8]]
+  """
+  a = np.array(a)
+  arrs = np.split(a, np.where(a[:] == v)[0])
+  return [e if (len(e) <= 0 or e[0] != v) else e[1:] for e in arrs]
+
+
+def _unflat_polygons(x):
+  """Unflats/recovers 1-d padded polygons to 3-d polygon list.
+
+  Args:
+    x: numpay.array. shape [num_elements, 1], num_elements = num_obj *
+      num_vertex + padding.
+
+  Returns:
+    A list of three dimensions: [#obj, #polygon, #vertex]
+  """
+  num_segs = _np_array_split(x, MASK_SEPARATOR)
+  polygons = []
+  for s in num_segs:
+    polygons.append(_np_array_split(s, POLYGON_SEPARATOR))
+  polygons = [[polygon.tolist() for polygon in obj] for obj in polygons]  # pylint: disable=g-complex-comprehension
+  return polygons
+
+
+def _denormalize_to_coco_bbox(bbox, height, width):
+  """Denormalize bounding box.
+
+  Args:
+    bbox: numpy.array[float]. Normalized bounding box. Format: ['ymin', 'xmin',
+      'ymax', 'xmax'].
+    height: int. image height.
+    width: int. image width.
+
+  Returns:
+    [x, y, width, height]
+  """
+  y1, x1, y2, x2 = bbox
+  y1 *= height
+  x1 *= width
+  y2 *= height
+  x2 *= width
+  box_height = y2 - y1
+  box_width = x2 - x1
+  return [float(x1), float(y1), float(box_width), float(box_height)]
+
+
+def _extract_image_info(prediction, b):
+  return {
+      'id': int(prediction['source_id'][b]),
+      'width': int(prediction['width'][b]),
+      'height': int(prediction['height'][b]),
+  }
+
+
+def _extract_bbox_annotation(prediction, b, obj_i):
+  """Constructs COCO format bounding box annotation."""
+  height = prediction['height'][b]
+  width = prediction['width'][b]
+
+  bbox = _denormalize_to_coco_bbox(
+    prediction['groundtruth_boxes'][b][obj_i, :], height, width)
+
+  if 'groundtruth_area' in prediction:
+    area = float(prediction['groundtruth_area'][b][obj_i])
+
+  else:
+    # Using the box area to replace the polygon area. This value will not affect
+    # real evaluation but may fail the unit test.
+    area = bbox[2] * bbox[3]
+
+  annotation = {
+      'id': b * 1000 + obj_i,  # place holder of annotation id.
+      'image_id': int(prediction['source_id'][b]),  # source_id,
+      'category_id': int(prediction['groundtruth_classes'][b][obj_i]),
+      'bbox': bbox,
+      'iscrowd': int(prediction['groundtruth_is_crowd'][b][obj_i]),
+      'area': area,
+      'segmentation': [],
+  }
+  return annotation
+
+
+def _extract_polygon_info(prediction, polygons, b, obj_i):
+  """Constructs 'area' and 'segmentation' fields.
+
+  Args:
+    prediction: dict[str, numpy.array]. Model outputs. The value dimension is
+      [batch_size, #objects, #features, ...]
+    polygons: list[list[list]]. Dimensions are [#objects, #polygon, #vertex].
+    b: batch index.
+    obj_i: object index.
+
+  Returns:
+    dict[str, numpy.array]. COCO format annotation with 'area' and
+    'segmentation'.
+  """
+  annotation = {}
+  if 'groundtruth_area' in prediction:
+    groundtruth_area = float(prediction['groundtruth_area'][b][obj_i])
+  else:
+    height = prediction['height'][b]
+    width = prediction['width'][b]
+    rles = coco_mask.frPyObjects(polygons[obj_i], height, width)
+    groundtruth_area = coco_mask.area(rles)
+  annotation['area'] = groundtruth_area
+
+  annotation['segmentation'] = polygons[obj_i]
+
+  # Add dummy polygon to is_crowd instance.
+  if not annotation['segmentation'][0]:
+    # Adds a dummy polygon in case there is no segmentation.
+    # Note that this could affect eval number in a very tiny amount since
+    # for the instance without masks, it creates a fake single pixel mask
+    # in the center of the box.
+    height = prediction['height'][b]
+    width = prediction['width'][b]
+    bbox = _denormalize_to_coco_bbox(
+      prediction['groundtruth_boxes'][b][obj_i, :], height, width)
+    xcenter = bbox[0] + bbox[2] / 2.0
+    ycenter = bbox[1] + bbox[3] / 2.0
+
+    annotation['segmentation'] = [[
+      xcenter, ycenter, xcenter, ycenter, xcenter, ycenter, xcenter, ycenter
+    ]]
+
+  return annotation
+
+
+def _extract_categories(annotations):
+  """Extract categories from annotations."""
+  categories = {}
+  for anno in annotations:
+    category_id = int(anno['category_id'])
+    categories[category_id] = {'id': category_id}
+  return list(categories.values())
+
+
+def extract_coco_groundtruth(prediction, include_mask=False):
+  """Extract COCO format groundtruth.
+
+  Args:
+    prediction: dictionary of batch of prediction result. the first dimension
+      each element is the batch.
+    include_mask: True for including masks in the output annotations.
+
+  Returns:
+    Tuple of (images, annotations).
+    images: list[dict].Required keys: 'id', 'width' and 'height'. The values are
+      image id, width and height.
+    annotations: list[dict]. Required keys: {'id', 'source_id', 'category_id',
+      'bbox', 'iscrowd'} when include_mask=False. If include_mask=True, also
+      required {'area', 'segmentation'}. The 'id' value is the annotation id
+      and can be any **positive** number (>=1).
+      Refer to http://cocodataset.org/#format-data for more details.
+  Raises:
+    ValueError: If any groundtruth fields is missing.
+  """
+  required_fields = [
+      'source_id', 'width', 'height', 'num_groundtruth_labels',
+      'groundtruth_boxes', 'groundtruth_classes'
+  ]
+  if include_mask:
+    required_fields += ['groundtruth_polygons', 'groundtruth_area']
+  for key in required_fields:
+    if key not in prediction.keys():
+      raise ValueError('Missing groundtruth field: "{}" keys: {}'.format(
+          key, prediction.keys()))
+
+  images = []
+  annotations = []
+  for b in range(prediction['source_id'].shape[0]):
+    # Constructs image info.
+    image = _extract_image_info(prediction, b)
+    images.append(image)
+
+    if include_mask:
+      flatten_padded_polygons = prediction['groundtruth_polygons'][b]
+      flatten_polygons = np.delete(
+          flatten_padded_polygons,
+          np.where(flatten_padded_polygons[:] == POLYGON_PAD_VALUE)[0])
+      polygons = _unflat_polygons(flatten_polygons)
+
+    # Constructs annotations.
+    num_labels = prediction['num_groundtruth_labels'][b]
+    for obj_i in range(num_labels):
+      annotation = _extract_bbox_annotation(prediction, b, obj_i)
+
+      if include_mask:
+        polygon_info = _extract_polygon_info(prediction, polygons, b, obj_i)
+        annotation.update(polygon_info)
+
+      annotations.append(annotation)
+  return images, annotations
+
+
+def create_coco_format_dataset(images,
+                               annotations,
+                               regenerate_annotation_id=True):
+  """Creates COCO format dataset with COCO format images and annotations."""
+  if regenerate_annotation_id:
+    for i in range(len(annotations)):
+      # WARNING: The annotation id must be positive.
+      annotations[i]['id'] = i + 1
+
+  categories = _extract_categories(annotations)
+  dataset = {
+      'images': images,
+      'annotations': annotations,
+      'categories': categories,
+  }
+  return dataset

+ 232 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/utils/decorators.py

@@ -0,0 +1,232 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+
+import atexit
+import functools
+import inspect
+import signal
+import wrapt
+
+__all__ = ["atexit_hook"]
+
+_executed_exit_fns = set()
+_registered_exit_fns = set()
+_registered_objects = set()
+
+
+def register_atexit_fn(fun=None, signals=None, logfun=lambda s: print(s, file=sys.stderr)):
+    """Register a function which will be executed on "normal"
+    interpreter exit or in case one of the `signals` is received
+    by this process (differently from atexit.register()).
+    Also, it makes sure to execute any other function which was
+    previously registered via signal.signal(). If any, it will be
+    executed after our own `fun`.
+
+    Functions which were already registered or executed via this
+    function will be ignored.
+
+    Note: there's no way to escape SIGKILL, SIGSTOP or os._exit(0)
+    so don't bother trying.
+
+    You can use this either as a function or as a decorator:
+
+        @register_atexit_fn
+        def cleanup():
+            pass
+
+        # ...or
+
+        register_atexit_fn(cleanup)
+
+    Note about Windows: I tested this some time ago and didn't work
+    exactly the same as on UNIX, then I didn't care about it
+    anymore and didn't test since then so may not work on Windows.
+
+    Parameters:
+
+    - fun: a callable
+    - signals: a list of signals for which this function will be
+      executed (default SIGTERM)
+    - logfun: a logging function which is called when a signal is
+      received. Default: print to standard error. May be set to
+      None if no logging is desired.
+    """
+    '''
+    Source: https://github.com/torvalds/linux/blob/master/include/linux/signal.h
+
+    *    +--------------------+-----------------+
+    *    |  POSIX signal      |  default action |
+    *    +--------------------+-----------------+
+    *    |  SIGHUP            |    terminate    |
+    *    |  SIGINT            |    terminate    |
+    *    |  SIGQUIT           |    coredump     |
+    *    |  SIGILL            |    coredump     |
+    *    |  SIGTRAP           |    coredump     |
+    *    |  SIGABRT/SIGIOT    |    coredump     |
+    *    |  SIGBUS            |    coredump     |
+    *    |  SIGFPE            |    coredump     |
+    *    |  SIGKILL           |    terminate(+) |
+    *    |  SIGUSR1           |    terminate    |
+    *    |  SIGSEGV           |    coredump     |
+    *    |  SIGUSR2           |    terminate    |
+    *    |  SIGPIPE           |    terminate    |
+    *    |  SIGALRM           |    terminate    |
+    *    |  SIGTERM           |    terminate    |
+    *    |  SIGCHLD           |    ignore       |
+    *    |  SIGCONT           |    ignore(*)    |
+    *    |  SIGSTOP           |    stop(*)(+)   |
+    *    |  SIGTSTP           |    stop(*)      |
+    *    |  SIGTTIN           |    stop(*)      |
+    *    |  SIGTTOU           |    stop(*)      |
+    *    |  SIGURG            |    ignore       |
+    *    |  SIGXCPU           |    coredump     |
+    *    |  SIGXFSZ           |    coredump     |
+    *    |  SIGVTALRM         |    terminate    |
+    *    |  SIGPROF           |    terminate    |
+    *    |  SIGPOLL/SIGIO     |    terminate    |
+    *    |  SIGSYS/SIGUNUSED  |    coredump     |
+    *    |  SIGSTKFLT         |    terminate    |
+    *    |  SIGWINCH          |    ignore       |
+    *    |  SIGPWR            |    terminate    |
+    *    |  SIGRTMIN-SIGRTMAX |    terminate    |
+    *    +--------------------+-----------------+
+    *    |  non-POSIX signal  |  default action |
+    *    +--------------------+-----------------+
+    *    |  SIGEMT            |    coredump     |
+    *    +--------------------+-----------------+
+    '''
+
+    if signals is None:
+        signals = [signal.SIGTERM]
+
+    def stringify_sig(signum):
+        if sys.version_info < (3, 5):
+            smap = dict([(getattr(signal, x), x) for x in dir(signal) if x.startswith('SIG')])
+            return smap.get(signum, signum)
+        else:
+            return signum
+
+    def fun_wrapper():
+        if fun not in _executed_exit_fns:
+            try:
+                fun()
+            finally:
+                _executed_exit_fns.add(fun)
+
+    def signal_wrapper(signum=None, frame=None):
+        if signum is not None:
+            if logfun is not None:
+                logfun("signal {} received by process with PID {}".format(stringify_sig(signum), os.getpid()))
+        fun_wrapper()
+        # Only return the original signal this process was hit with
+        # in case fun returns with no errors, otherwise process will
+        # return with sig 1.
+        if signum is not None:
+            if signum == signal.SIGINT:
+                raise KeyboardInterrupt
+            # XXX - should we do the same for SIGTERM / SystemExit?
+            sys.exit(signum)
+
+    def register_fun(fun, signals):
+        if not callable(fun):
+            raise TypeError("{!r} is not callable".format(fun))
+        set([fun])  # raise exc if obj is not hash-able
+
+        signals = set(signals)
+        for sig in signals:
+            # Register function for this signal and pop() the previously
+            # registered one (if any). This can either be a callable,
+            # SIG_IGN (ignore signal) or SIG_DFL (perform default action
+            # for signal).
+            old_handler = signal.signal(sig, signal_wrapper)
+            if old_handler not in (signal.SIG_DFL, signal.SIG_IGN):
+                # ...just for extra safety.
+                if not callable(old_handler):
+                    continue
+                # This is needed otherwise we'll get a KeyboardInterrupt
+                # strace on interpreter exit, even if the process exited
+                # with sig 0.
+                if (sig == signal.SIGINT and old_handler is signal.default_int_handler):
+                    continue
+                # There was a function which was already registered for this
+                # signal. Register it again so it will get executed (after our
+                # new fun).
+                if old_handler not in _registered_exit_fns:
+                    atexit.register(old_handler)
+                    _registered_exit_fns.add(old_handler)
+
+        # This further registration will be executed in case of clean
+        # interpreter exit (no signals received).
+        if fun not in _registered_exit_fns or not signals:
+            atexit.register(fun_wrapper)
+            _registered_exit_fns.add(fun)
+
+    # This piece of machinery handles 3 usage cases. register_atexit_fn()
+    # used as:
+    # - a function
+    # - a decorator without parentheses
+    # - a decorator with parentheses
+    if fun is None:
+
+        @functools.wraps
+        def outer(fun):
+            return register_fun(fun, signals)
+
+        return outer
+    else:
+        register_fun(fun, signals)
+        return fun
+
+
+def atexit_hook(*args, **kwargs):
+
+    @wrapt.decorator
+    def wrapper(wrapped, instance, args, kwargs):
+
+        if not hasattr(wrapped, "__atexit__"):
+            raise AttributeError("The class `%s` does not have an `__atexit__` method" % wrapped.__name__)
+
+        def _func():
+            if instance is None:
+                if inspect.isclass(wrapped):
+                    # Decorator was applied to a class.
+                    return wrapped(*args, **kwargs)
+                else:
+                    # Decorator was applied to a function or staticmethod.
+                    return wrapped(*args, **kwargs)
+            else:
+                if inspect.isclass(instance):
+                    # Decorator was applied to a classmethod.
+                    return wrapped(*args, **kwargs)
+                else:
+                    # Decorator was applied to an instancemethod.
+                    return wrapped(*args, **kwargs)
+
+        _impl = _func()
+
+        object_id = hex(id(_impl))
+
+        if object_id not in _registered_objects:
+            register_atexit_fn(fun=_impl.__atexit__, signals=[signal.SIGTERM, signal.SIGINT])
+            _registered_objects.add(object_id)
+
+        return _impl
+
+    return wrapper(*args, **kwargs)

+ 94 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/utils/distributed_utils.py

@@ -0,0 +1,94 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+
+__all__ = ["MPI_local_rank", "MPI_rank", "MPI_size", "MPI_rank_and_size", "MPI_is_distributed"]
+
+
+def MPI_is_distributed():
+    """Return a boolean whether a distributed training/inference runtime is being used.
+    :return: bool
+    """
+
+    if all([var in os.environ for var in ["OMPI_COMM_WORLD_RANK", "OMPI_COMM_WORLD_SIZE"]]):
+        return True
+
+    else:
+        return False
+
+
+def MPI_local_rank():
+
+    if "OMPI_COMM_WORLD_LOCAL_RANK" in os.environ:
+        return int(os.environ.get("OMPI_COMM_WORLD_LOCAL_RANK"))
+
+    else:
+        return 0
+
+
+def MPI_rank():
+    return MPI_rank_and_size()[0]
+
+
+def MPI_size():
+    return MPI_rank_and_size()[1]
+
+
+def MPI_rank_and_size():
+
+    if "tensorflow" in sys.modules:
+        return mpi_env_MPI_rank_and_size()
+
+    else:
+        return 0, 1
+
+
+# Source: https://github.com/horovod/horovod/blob/c3626e/test/common.py#L25
+def mpi_env_MPI_rank_and_size():
+    """Get MPI rank and size from environment variables and return them as a
+    tuple of integers.
+    Most MPI implementations have an `mpirun` or `mpiexec` command that will
+    run an MPI executable and set up all communication necessary between the
+    different processors. As part of that set up, they will set environment
+    variables that contain the rank and size of the MPI_COMM_WORLD
+    communicator. We can read those environment variables from Python in order
+    to ensure that `hvd.rank()` and `hvd.size()` return the expected values.
+    Since MPI is just a standard, not an implementation, implementations
+    typically choose their own environment variable names. This function tries
+    to support several different implementation, but really it only needs to
+    support whatever implementation we want to use for the TensorFlow test
+    suite.
+    If this is not running under MPI, then defaults of rank zero and size one
+    are returned. (This is appropriate because when you call MPI_Init in an
+    application not started with mpirun, it will create a new independent
+    communicator with only one process in it.)
+
+    Source: https://github.com/horovod/horovod/blob/c3626e/test/common.py#L25
+    """
+    rank_env = 'PMI_RANK OMPI_COMM_WORLD_RANK'.split()
+    size_env = 'PMI_SIZE OMPI_COMM_WORLD_SIZE'.split()
+
+    for rank_var, size_var in zip(rank_env, size_env):
+        rank = os.environ.get(rank_var)
+        size = os.environ.get(size_var)
+        if rank is not None and size is not None:
+            return int(rank), int(size)
+
+    # Default to rank zero and size one if there are no environment variables
+    return 0, 1

+ 134 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/utils/lazy_imports.py

@@ -0,0 +1,134 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2006-2011, NIPY Developers
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#        notice, this list of conditions and the following disclaimer.
+#
+#     * Redistributions in binary form must reproduce the above
+#        copyright notice, this list of conditions and the following
+#        disclaimer in the documentation and/or other materials provided
+#        with the distribution.
+#
+#     * Neither the name of the NIPY Developers nor the names of any
+#        contributors may be used to endorse or promote products derived
+#        from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Source: https://github.com/nipy/nitime/blob/c8eb314/nitime/lazyimports.py
+
+"""This module provides lazy import functionality to improve the import
+performance of nitime. For example, some parts of nitime leverage and import
+matplotlib, which is quite a big package, yet most of the nitime code does not
+depend on matplotlib. By lazily-loading a module, we defer the overhead of
+importing it until the first time it is actually used, thereby speeding up
+nitime imports.
+
+A generic :class:`LazyImport` class is implemented which takes the module name
+as a parameter, and acts as a proxy for that module, importing it only when
+the module is used, but effectively acting as the module in every other way
+(including inside IPython with respect to introspection and tab completion)
+with the *exception* of reload() - reloading a :class:`LazyImport` raises an
+:class:`ImportError`.
+
+Commonly used nitime lazy imports are also defined in :mod:`nitime.lazy`, so
+they can be reused throughout nitime.
+"""
+
+import os
+import sys
+import types
+
+
+class LazyImport(types.ModuleType):
+    """
+    This class takes the module name as a parameter, and acts as a proxy for
+    that module, importing it only when the module is used, but effectively
+    acting as the module in every other way (including inside IPython with
+    respect to introspection and tab completion) with the *exception* of
+    reload()- reloading a :class:`LazyImport` raises an :class:`ImportError`.
+
+    >>> mlab = LazyImport('matplotlib.mlab')
+
+    No import happens on the above line, until we do something like call an
+    ``mlab`` method or try to do tab completion or introspection on ``mlab``
+    in IPython.
+
+    >>> mlab
+    <module 'matplotlib.mlab' will be lazily loaded>
+
+    Now the :class:`LazyImport` will do an actual import, and call the dist
+    function of the imported module.
+
+    >>> mlab.dist(1969,2011)
+    42.0
+    """
+
+    def __getattribute__(self, x):
+        # This method will be called only once, since we'll change
+        # self.__class__ to LoadedLazyImport, and __getattribute__ will point
+        # to module.__getattribute__
+
+        name = object.__getattribute__(self, '__name__')
+        __import__(name)
+
+        # if name above is 'package.foo.bar', package is returned, the docs
+        # recommend that in order to get back the full thing, that we import
+        # and then lookup the full name is sys.modules, see:
+        # http://docs.python.org/library/functions.html#__import__
+
+        module = sys.modules[name]
+
+        # Now that we've done the import, cutout the middleman and make self
+        # act as the imported module
+
+        class LoadedLazyImport(types.ModuleType):
+            __getattribute__ = module.__getattribute__
+            __repr__ = module.__repr__
+
+        object.__setattr__(self, '__class__', LoadedLazyImport)
+
+        # The next line will make "reload(l)" a silent no-op
+        return module.__getattribute__(x)
+
+    def __repr__(self):
+        return "<module '%s' will be lazily loaded>" % object.__getattribute__(self, '__name__')
+
+
+if 'READTHEDOCS' in os.environ:
+    lazy_doc = """
+               WARNING: To get Sphinx documentation to build we disable
+               LazyImports, which makes Sphinx incorrectly report this
+               class as having a base class of object. In reality,
+               :class:`LazyImport`'s base class is
+               :class:`types.ModuleType`.
+               """
+
+    lazy_doc += LazyImport.__doc__
+
+    class LazyImport(object):
+        __doc__ = lazy_doc
+
+        def __init__(self, x):
+            __import__(x)
+            self.module = sys.modules[x]
+
+        def __getattr__(self, x):
+            return self.module.__getattribute__(x)

+ 323 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/utils/logging_backend.py

@@ -0,0 +1,323 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+
+import inspect
+import operator
+import six
+import subprocess
+import time
+
+from enum import Enum
+
+from mask_rcnn.utils.logging_formatter import logging
+
+from mask_rcnn.utils.decorators import atexit_hook
+from mask_rcnn.utils.metaclasses import SingletonMetaClass
+
+from mask_rcnn.utils.meters import ACCEPTED_INT_NUMBER_FORMATS
+from mask_rcnn.utils.meters import ACCEPTED_FLOAT_NUMBER_FORMATS
+
+import dllogger
+from dllogger import Verbosity
+
+__all__ = ["LoggingBackend", "LoggingScope", "DistributedStrategy", "RuntimeMode"]
+
+
+class _BaseEnum(Enum):
+
+    @classmethod
+    def __values__(cls):
+        return [getattr(cls, m.name) for m in cls]
+
+
+class LoggingScope(_BaseEnum):
+    ITER = 'Iteration'
+    EPOCH = 'AllReduce'
+
+
+class DistributedStrategy(_BaseEnum):
+    REDUCE_SUM = 'AllGather'
+    REDUCE_MEAN = 'AllReduce'
+    NONE = None
+
+
+class RuntimeMode(_BaseEnum):
+    TRAIN = 'train'
+    INFERENCE = 'inference'
+    VALIDATION = 'validation'
+    TEST = 'test'
+
+
+def validate_runtime_mode(requested_mode):
+    cls_attributes = inspect.getmembers(RuntimeMode, lambda a: not (inspect.isroutine(a)))
+    authorized_modes = [a for a in cls_attributes if not (a[0].startswith('__') and a[0].endswith('__'))]
+
+    for _, mode in authorized_modes:
+        if mode == requested_mode:
+            return
+    else:
+        raise ValueError(
+            "Unknown requested mode: `%s` - Authorized: %s" % (requested_mode, [name for name, _ in authorized_modes])
+        )
+
+
+@atexit_hook
[email protected]_metaclass(SingletonMetaClass)
+class LoggingBackend(object):
+
+    SEP_TARGET_LENGTH = 50
+
+    # ================= Logging Methods ================= #
+
+    LOGGING_PREFIX = ""
+
+    def __init__(self):
+        # super(LoggingBackend, self).__init__()
+
+        self.runtime_initialized = {"train": False, "evaluation": False}
+
+    # ================= Constructor/Destructor Methods ================= #
+
+    def __atexit__(self):
+
+        is_success = not (hasattr(sys, "last_traceback") and sys.last_traceback is not None)
+
+        print()  # Visual spacing
+        if is_success:
+            self.log_info("Job finished with status: `SUCCESS`")
+        else:
+            logging.error("Job finished with an uncaught exception: `FAILURE`")
+
+    def log_debug(self, message):
+        logging.debug("%s%s" % (self.LOGGING_PREFIX, message))
+
+    def log_info(self, message):
+        logging.info("%s%s" % (self.LOGGING_PREFIX, message))
+
+    def log_warning(self, message):
+        logging.warning("%s%s" % (self.LOGGING_PREFIX, message))
+
+    def log_error(self, message):
+        logging.error("%s%s" % (self.LOGGING_PREFIX, message))
+
+    def log_critical(self, message):
+        logging.critical("%s%s" % (self.LOGGING_PREFIX, message))
+
+    # ================= Automated Logging Methods ================= #
+    @staticmethod
+    def format_metric_value(value):
+
+        if isinstance(value, ACCEPTED_FLOAT_NUMBER_FORMATS):
+
+            if value < 1e-4 or value > 1e4:
+                print_value = "%.4e" % value
+
+            else:
+                print_value = "{}".format(round(value, 5))
+
+        elif isinstance(value, ACCEPTED_INT_NUMBER_FORMATS):
+            print_value = "%d" % value
+
+        else:
+            print_value = value
+
+        return print_value
+
+    # ================= Runtime Logging Method ================= #
+    def log_runtime(self, is_train=False):
+
+        if is_train:
+            if not self.runtime_initialized["train"]:
+                self.runtime_initialized["train"] = True
+                _message = "                 Start Training                  "
+            else:
+                _message = "                Restart Training                 "
+
+        else:
+            if not self.runtime_initialized["evaluation"]:
+                self.runtime_initialized["evaluation"] = True
+                _message = "                Start Evaluation                 "
+            else:
+                _message = "               Restart Evaluation                "
+
+        print()  # Visual Spacing
+        self.log_info("# ============================================= #")
+        self.log_info(_message)
+        self.log_info("# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #")
+        print()  # Visual Spacing
+
+    # ================= Automated Logging Methods ================= #
+
+    def log_git_status(self):
+
+        git_metadata = dict()
+
+        def get_cmd_result(cmd):
+            return subprocess.check_output(cmd, shell=True).decode("utf-8").strip()
+
+        try:
+            git_metadata["branch_name"] = get_cmd_result("git symbolic-ref -q HEAD | cut -d/ -f3-")  # current branch
+            git_metadata["commit_id"] = get_cmd_result("git rev-parse HEAD")  # current commit ID
+            git_metadata["remote_url"] = get_cmd_result("git remote get-url origin")  # git origin url
+
+            if git_metadata["branch_name"] == "":
+                del git_metadata["branch_name"]
+
+        except subprocess.CalledProcessError:  # Not a git repository
+            pass
+
+        if git_metadata is None:
+            raise ValueError("`git_metadata` value received is `None`")
+
+        self.log_info("===================================== GIT REPOSITORY =====================================")
+        for key, value in sorted(git_metadata.items(), key=operator.itemgetter(0)):
+            self.log_info("%s: %s" % (key.replace("_", " ").upper(), value))
+        self.log_info("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n")
+
+    def log_model_statistics(self, model_statistics=None):
+
+        if model_statistics is None:
+            raise ValueError("`model_statistics` value received is `None`")
+
+        if not isinstance(model_statistics, dict):
+            raise ValueError("`model_statistics` should be a `dict`")
+
+        self.log_info("==================================== MODEL STATISTICS ====================================")
+        for key, value in sorted(model_statistics.items(), key=operator.itemgetter(0)):
+            self.log_info("%s: %s" % (key, value))
+        self.log_info("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n")
+
+    def log_trainable_variables(self, var_list=None):
+
+        if var_list is None:
+            raise ValueError("`var_list` value received is `None`")
+
+        self.log_info("=================================== TRAINABLE VARIABLES ==================================")
+        for idx, (var_name, var_shape) in enumerate(var_list):
+            self.log_info(
+                "[#{idx:04d}] {name:<60s} => {shape}".format(idx=idx + 1, name=var_name, shape=str(var_shape))
+            )
+        self.log_info("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n")
+
+    # ================= Step Logging Method ================= #
+
+    def log_step(self, iteration, throughput, gpu_stats):
+
+        # print()  # Visual Spacing
+        self.log_info("timestamp: %s" % time.time())
+        self.log_info("iteration: %d" % int(iteration))
+
+        if throughput is not None:
+            self.log_info("throughput: %.1f samples/sec" % float(throughput))
+        else:
+            self.log_info("throughput: None")
+
+    def log_amp_runtime(self, current_loss_scale, steps_non_skipped, steps_since_last_scale):
+
+        header_name = " AMP Statistics "
+        reference_len = int((LoggingBackend.SEP_TARGET_LENGTH - len(header_name)) / 2)
+
+        if current_loss_scale is not None or steps_since_last_scale is not None:
+            self.log_info(
+                "%s%s%s" % (
+                    "=" * reference_len, header_name, "=" *
+                    (LoggingBackend.SEP_TARGET_LENGTH - len(header_name) - reference_len)
+                )
+            )
+
+            self.log_info("Steps - Non Skipped: %s" % steps_non_skipped)
+
+            if steps_since_last_scale is not None:
+                self.log_info("Steps - Since last loss scale: %s" % steps_since_last_scale)
+
+            if current_loss_scale is not None:
+                self.log_info("Loss Scale: %s" % current_loss_scale)
+
+    # ================= Metric Logging Methods ================= #
+
+    def log_metrics(self, metric_data, iteration, runtime_mode):
+
+        validate_runtime_mode(runtime_mode)
+
+        if not isinstance(metric_data, dict):
+            raise ValueError("`metric_data` should be a dictionary. Received: %s" % type(metric_data))
+
+        if not isinstance(iteration, ACCEPTED_INT_NUMBER_FORMATS):
+            raise ValueError("`iteration` should be an integer. Received: %s" % type(iteration))
+
+        header_name = " Metrics "
+        reference_len = int((LoggingBackend.SEP_TARGET_LENGTH - len(header_name)) / 2)
+
+        self.log_info(
+            "%s%s%s" % (
+                "=" * reference_len, header_name, "=" *
+                (LoggingBackend.SEP_TARGET_LENGTH - len(header_name) - reference_len)
+            )
+        )
+
+        for key, value in sorted(metric_data.items(), key=operator.itemgetter(0)):
+            print_value = LoggingBackend.format_metric_value(value)
+            self.log_info("%s: %s" % (key, print_value))
+
+    def log_final_metrics(self, metric_data, runtime_mode):
+
+        validate_runtime_mode(runtime_mode)
+
+        for key, value in sorted(metric_data.items(), key=operator.itemgetter(0)):
+            print_value = LoggingBackend.format_metric_value(value)
+            self.log_info("%s: %s" % (key, print_value))
+        dllogger.log(step=(), data=metric_data, verbosity=Verbosity.DEFAULT)
+
+    # ================= Summary Logging Method ================= #
+
+    def log_summary(self, is_train, total_steps, total_processing_time, avg_throughput):
+
+        if is_train:
+            _message = "          Training Performance Summary           "
+
+        else:
+            _message = "         Evaluation Performance Summary          "
+
+        print()  # Visual Spacing
+        self.log_info("# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #")
+        self.log_info(_message)
+        self.log_info("# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #")
+
+        total_processing_hours, rem = divmod(total_processing_time, 3600)
+        total_processing_minutes, total_processing_seconds = divmod(rem, 60)
+
+        print()  # Visual Spacing
+        total_processing_time = total_processing_hours * 3600 + int(total_processing_minutes) * 60 + int(total_processing_seconds)
+        dllogger.log(step=(), data={
+            "Average_throughput": avg_throughput,
+            "Total processed steps": int(total_steps), 
+            "Total_processing_time": total_processing_time }, verbosity=Verbosity.DEFAULT)
+        
+        self.log_info("Average throughput: {throughput:.1f} samples/sec".format(throughput=avg_throughput))
+        self.log_info("Total processed steps: {total_steps}".format(total_steps=total_steps))
+        self.log_info(
+            "Total processing time: {hours}h {minutes:02d}m {seconds:02d}s".format(
+                hours=total_processing_hours,
+                minutes=int(total_processing_minutes),
+                seconds=int(total_processing_seconds)
+            )
+        )
+
+        self.log_info("==================== Metrics ====================")

+ 398 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/utils/logging_formatter.py

@@ -0,0 +1,398 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+
+import inspect
+
+from contextlib import contextmanager
+
+from six import add_metaclass
+
+import threading
+import logging as _logging
+
+import warnings
+
+from mask_rcnn.utils.distributed_utils import MPI_rank_and_size
+from mask_rcnn.utils.metaclasses import SingletonMetaClass
+
+__all__ = [
+    "logging",
+    "log_cleaning"
+]
+
+MODEL_NAME = "MaskRCNN"
+
+
+class StdOutFormatter(_logging.Formatter):
+    """
+    Log formatter used in Tornado. Key features of this formatter are:
+    * Color support when logging to a terminal that supports it.
+    * Timestamps on every log line.
+    * Robust against str/bytes encoding problems.
+    """
+    DEFAULT_FORMAT = '%(color)s[{model_name}] %(levelname)-8s: %(end_color)s%(message)s'.format(
+        model_name=MODEL_NAME
+    )
+
+    DEFAULT_DATE_FORMAT = '%Y-%m-%d %H:%M:%S'
+
+    def __init__(self, fmt=None, datefmt=None, style='%'):
+        r"""
+        :arg bool color: Enables color support.
+        :arg string fmt: Log message format.
+          It will be applied to the attributes dict of log records. The
+          text between ``%(color)s`` and ``%(end_color)s`` will be colored
+          depending on the level if color support is on.
+        :arg dict colors: color mappings from logging level to terminal color
+          code
+        :arg string datefmt: Datetime format.
+          Used for formatting ``(asctime)`` placeholder in ``prefix_fmt``.
+        .. versionchanged:: 3.2
+           Added ``fmt`` and ``datefmt`` arguments.
+        """
+
+        if fmt is None:
+            fmt = self.DEFAULT_FORMAT
+
+        if datefmt is None:
+            datefmt = self.DEFAULT_DATE_FORMAT
+
+        # _logging.Formatter.__init__(self, datefmt=datefmt)
+        super(StdOutFormatter, self).__init__(fmt=fmt, datefmt=datefmt, style=style)
+
+        self._fmt = fmt
+        self._colors = {}
+        self._normal = ''
+
+    def format(self, record):
+        try:
+            message = record.getMessage()
+            assert isinstance(message, str)  # guaranteed by logging
+            # Encoding notes:  The logging module prefers to work with character
+            # strings, but only enforces that log messages are instances of
+            # basestring.  In python 2, non-ascii bytestrings will make
+            # their way through the logging framework until they blow up with
+            # an unhelpful decoding error (with this formatter it happens
+            # when we attach the prefix, but there are other opportunities for
+            # exceptions further along in the framework).
+            #
+            # If a byte string makes it this far, convert it to unicode to
+            # ensure it will make it out to the logs.  Use repr() as a fallback
+            # to ensure that all byte strings can be converted successfully,
+            # but don't do it by default so we don't add extra quotes to ascii
+            # bytestrings.  This is a bit of a hacky place to do this, but
+            # it's worth it since the encoding errors that would otherwise
+            # result are so useless (and tornado is fond of using utf8-encoded
+            # byte strings wherever possible).
+            record.message = self.to_unicode(message)
+
+        except Exception as e:
+            record.message = "Bad message (%r): %r" % (e, record.__dict__)
+
+        record.asctime = self.formatTime(record, self.datefmt)
+
+        if record.levelno in self._colors:
+            record.color = self._colors[record.levelno]
+            record.end_color = self._normal
+        else:
+            record.color = record.end_color = ''
+
+        formatted = self._fmt % record.__dict__
+
+        if record.exc_info:
+            if not record.exc_text:
+                record.exc_text = self.formatException(record.exc_info)
+
+        if record.exc_text:
+            # exc_text contains multiple lines.  We need to _safe_unicode
+            # each line separately so that non-utf8 bytes don't cause
+            # all the newlines to turn into '\n'.
+            lines = [formatted.rstrip()]
+            lines.extend(self.to_unicode(ln) for ln in record.exc_text.split('\n'))
+
+            formatted = '\n'.join(lines)
+        return formatted.replace("\n", "\n    ")
+
+    @staticmethod
+    def to_unicode(value):
+        """
+        Converts a string argument to a unicode string.
+        If the argument is already a unicode string or None, it is returned
+        unchanged.  Otherwise it must be a byte string and is decoded as utf8.
+        """
+        try:
+            if isinstance(value, (str, type(None))):
+                return value
+
+            if not isinstance(value, bytes):
+                raise TypeError("Expected bytes, unicode, or None; got %r" % type(value))
+
+            return value.decode("utf-8")
+
+        except UnicodeDecodeError:
+            return repr(value)
+
+
+
+@add_metaclass(SingletonMetaClass)
+class _Logger(object):
+
+    # Level 0
+    NOTSET = _logging.NOTSET
+
+    # Level 10
+    DEBUG = _logging.DEBUG
+
+    # Level 20
+    INFO = _logging.INFO
+
+    # Level 30
+    WARNING = _logging.WARNING
+
+    # Level 40
+    ERROR = _logging.ERROR
+
+    # Level 50
+    CRITICAL = _logging.CRITICAL
+
+    _level_names = {
+        0: 'NOTSET',
+        10: 'DEBUG',
+        20: 'INFO',
+        30: 'WARNING',
+        40: 'ERROR',
+        50: 'CRITICAL',
+    }
+
+    def __init__(self, capture_io=True):
+
+        self._logger = None
+        self._logger_lock = threading.Lock()
+
+        self._handlers = dict()
+
+        self.old_warnings_showwarning = None
+
+        if MPI_rank_and_size()[0] == 0:
+            self._define_logger()
+
+    def _define_logger(self):
+
+        # Use double-checked locking to avoid taking lock unnecessarily.
+        if self._logger is not None:
+            return self._logger
+
+        with self._logger_lock:
+
+            try:
+                # Scope the TensorFlow logger to not conflict with users' loggers.
+                self._logger = _logging.getLogger(MODEL_NAME)
+                self.reset_stream_handler()
+
+            finally:
+                self.set_verbosity(verbosity_level=_Logger.INFO)
+
+            self._logger.propagate = False
+
+    def reset_stream_handler(self):
+
+        if self._logger is None:
+            raise RuntimeError("Impossible to set handlers if the Logger is not predefined")
+
+        # ======== Remove Handler if already existing ========
+
+        try:
+            self._logger.removeHandler(self._handlers["stream_stdout"])
+        except KeyError:
+            pass
+
+        try:
+            self._logger.removeHandler(self._handlers["stream_stderr"])
+        except KeyError:
+            pass
+
+        # ================= Streaming Handler =================
+
+        # Add the output handler.
+        self._handlers["stream_stdout"] = _logging.StreamHandler(sys.stdout)
+        self._handlers["stream_stdout"].addFilter(lambda record: record.levelno <= _logging.INFO)
+
+        self._handlers["stream_stderr"] = _logging.StreamHandler(sys.stderr)
+        self._handlers["stream_stderr"].addFilter(lambda record: record.levelno > _logging.INFO)
+
+        Formatter = StdOutFormatter
+
+        self._handlers["stream_stdout"].setFormatter(Formatter())
+        self._logger.addHandler(self._handlers["stream_stdout"])
+
+        try:
+            self._handlers["stream_stderr"].setFormatter(Formatter())
+            self._logger.addHandler(self._handlers["stream_stderr"])
+        except KeyError:
+            pass
+
+    def get_verbosity(self):
+        """Return how much logging output will be produced."""
+        if self._logger is not None:
+            return self._logger.getEffectiveLevel()
+
+    def set_verbosity(self, verbosity_level):
+        """Sets the threshold for what messages will be logged."""
+        if self._logger is not None:
+            self._logger.setLevel(verbosity_level)
+
+            for handler in self._logger.handlers:
+                handler.setLevel(verbosity_level)
+
+    @contextmanager
+    def temp_verbosity(self, verbosity_level):
+        """Sets the a temporary threshold for what messages will be logged."""
+
+        if self._logger is not None:
+
+            old_verbosity = self.get_verbosity()
+
+            try:
+                self.set_verbosity(verbosity_level)
+                yield
+
+            finally:
+                self.set_verbosity(old_verbosity)
+
+        else:
+            try:
+                yield
+
+            finally:
+                pass
+
+    def captureWarnings(self, capture):
+        """
+        If capture is true, redirect all warnings to the logging package.
+        If capture is False, ensure that warnings are not redirected to logging
+        but to their original destinations.
+        """
+
+        if self._logger is not None:
+
+            if capture and self.old_warnings_showwarning is None:
+                self.old_warnings_showwarning = warnings.showwarning  # Backup Method
+                warnings.showwarning = self._showwarning
+
+            elif not capture and self.old_warnings_showwarning is not None:
+                warnings.showwarning = self.old_warnings_showwarning  # Restore Method
+                self.old_warnings_showwarning = None
+
+    def _showwarning(self, message, category, filename, lineno, file=None, line=None):
+        """
+        Implementation of showwarnings which redirects to logging.
+        It will call warnings.formatwarning and will log the resulting string
+        with level logging.WARNING.
+        """
+        s = warnings.formatwarning(message, category, filename, lineno, line)
+        self.warning("%s", s)
+
+    def debug(self, msg, *args, **kwargs):
+        """
+        Log 'msg % args' with severity 'DEBUG'.
+
+        To pass exception information, use the keyword argument exc_info with
+        a true value, e.g.
+
+        logger.debug("Houston, we have a %s", "thorny problem", exc_info=1)
+        """
+        if self._logger is not None and self._logger.isEnabledFor(_Logger.DEBUG):
+            self._logger._log(_Logger.DEBUG, msg, args, **kwargs)
+
+    def info(self, msg, *args, **kwargs):
+        """
+        Log 'msg % args' with severity 'INFO'.
+
+        To pass exception information, use the keyword argument exc_info with
+        a true value, e.g.
+
+        logger.info("Houston, we have a %s", "interesting problem", exc_info=1)
+        """
+        if self._logger is not None and self._logger.isEnabledFor(_Logger.INFO):
+            self._logger._log(_Logger.INFO, msg, args, **kwargs)
+
+    def warning(self, msg, *args, **kwargs):
+        """
+        Log 'msg % args' with severity 'WARNING'.
+
+        To pass exception information, use the keyword argument exc_info with
+        a true value, e.g.
+
+        logger.warning("Houston, we have a %s", "bit of a problem", exc_info=1)
+        """
+        if self._logger is not None and self._logger.isEnabledFor(_Logger.WARNING):
+            self._logger._log(_Logger.WARNING, msg, args, **kwargs)
+
+    def error(self, msg, *args, **kwargs):
+        """
+        Log 'msg % args' with severity 'ERROR'.
+
+        To pass exception information, use the keyword argument exc_info with
+        a true value, e.g.
+
+        logger.error("Houston, we have a %s", "major problem", exc_info=1)
+        """
+        if self._logger is not None and self._logger.isEnabledFor(_Logger.ERROR):
+            self._logger._log(_Logger.ERROR, msg, args, **kwargs)
+
+    def critical(self, msg, *args, **kwargs):
+        """
+        Log 'msg % args' with severity 'CRITICAL'.
+
+        To pass exception information, use the keyword argument exc_info with
+        a true value, e.g.
+
+        logger.critical("Houston, we have a %s", "major disaster", exc_info=1)
+        """
+        if self._logger is not None and self._logger.isEnabledFor(_Logger.CRITICAL):
+            self._logger._log(_Logger.CRITICAL, msg, args, **kwargs)
+
+
+def log_cleaning(hide_deprecation_warnings=False):
+
+    if hide_deprecation_warnings:
+        warnings.simplefilter("ignore")
+
+        from tensorflow.python.util import deprecation
+        from tensorflow.python.util import deprecation_wrapper
+        deprecation._PRINT_DEPRECATION_WARNINGS = False
+        deprecation_wrapper._PER_MODULE_WARNING_LIMIT = 0
+
+    formatter = _logging.Formatter('[%(levelname)s] %(message)s')
+
+    from tensorflow.python.platform import tf_logging
+    tf_logging.get_logger().propagate = False
+
+    _logging.getLogger().propagate = False
+
+    for handler in _logging.getLogger().handlers:
+        handler.setFormatter(formatter)
+
+
+# Necessary to catch the correct caller
+_logging._srcfile = os.path.normcase(inspect.getfile(_Logger.__class__))
+
+
+logging = _Logger()

+ 32 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/utils/metaclasses.py

@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__all__ = [
+    "SingletonMetaClass",
+]
+
+
+class SingletonMetaClass(type):
+
+    _instances = {}
+
+    def __call__(cls, *args, **kwargs):
+
+        if cls not in cls._instances:
+            cls._instances[cls] = super(SingletonMetaClass, cls).__call__(*args, **kwargs)
+
+        return cls._instances[cls]

+ 178 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/utils/meters.py

@@ -0,0 +1,178 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from abc import ABCMeta
+from abc import abstractmethod
+
+import six
+import collections
+
+from functools import lru_cache
+
+import numpy as np
+
+__all__ = ["MetricMeter", "StandardMeter", "AverageMeter", "MovingAverageMeter", "MemoryLessMovingAverageMeter"]
+
+
+# Supported Numpy DTypes: `np.sctypes`
+ACCEPTED_INT_NUMBER_FORMATS = (
+    int,
+    np.uint8,
+    np.uint16,
+    np.uint32,
+    np.uint64,
+    np.int,
+    np.int8,
+    np.int16,
+    np.int32,
+    np.int64,
+)
+
+ACCEPTED_FLOAT_NUMBER_FORMATS = (
+    float,
+    np.float,
+    np.float16,
+    np.float32,
+    np.float64,
+    np.float128,
+)
+
+ACCEPTED_STR_NUMBER_FORMATS = (
+    str,
+    np.str,
+)
+
+ACCEPTED_NUMBER_FORMATS = \
+    ACCEPTED_INT_NUMBER_FORMATS +  \
+    ACCEPTED_FLOAT_NUMBER_FORMATS +  \
+    ACCEPTED_STR_NUMBER_FORMATS
+
+
[email protected]_metaclass(ABCMeta)
+class AbstractMeterMixin(object):
+
+    @abstractmethod
+    def AUTHORIZED_DTYPES(self):
+        pass
+
+
[email protected]_metaclass(ABCMeta)
+class MetricMeter(AbstractMeterMixin):
+
+    # Supported Numpy DTypes: `np.sctypes`
+    AUTHORIZED_DTYPES = tuple(ACCEPTED_NUMBER_FORMATS)
+
+    @lru_cache(maxsize=128)
+    def __init__(self):
+        self._values = np.array([])
+
+    def reset(self):
+        self._values = np.array([])
+
+    @lru_cache(maxsize=128)
+    def __str__(self):
+        return self.__class__.__name__
+
+    def get_last(self):
+        try:
+            return self._values[-1]
+        except IndexError:
+            raise ValueError("Impossible to get the last value. No value has been recorded yet")
+
+    def record(self, val):
+
+        if not isinstance(val, MetricMeter.AUTHORIZED_DTYPES):
+            raise TypeError("Unsupported datatype received: %s" % str(type(val)))
+
+        if np.isnan(val) or np.isinf(val):
+            raise ValueError("invalid value received: %s" % str(val))
+
+        self._values = np.append(self._values, val)
+
+    @abstractmethod
+    def read(self):
+        raise NotImplementedError()
+
+
+class StandardMeter(MetricMeter):
+
+    def read(self):
+        return self.get_last()
+
+
+class AverageMeter(MetricMeter):
+
+    def read(self):
+        if len(self._values):
+            return np.mean(self._values)
+        else:
+            raise ValueError("NaN Result, Impossible to compute the average of an empty list")
+
+
+class MovingAverageMeter(MetricMeter):
+
+    def __init__(self, window_size):
+
+        super(MovingAverageMeter, self).__init__()
+
+        if not isinstance(window_size, int):
+            raise ValueError("`window_size` must be an integer")
+
+        if window_size < 1:
+            raise ValueError("`window_size` must be superior or equal to 1")
+
+        self._window_size = window_size
+
+    @lru_cache(maxsize=128)
+    def __str__(self):
+        return "%s(window_size=%d)" % (super(MovingAverageMeter, self).__str__(), self._window_size)
+
+    def read(self):
+        if len(self._values):
+            return np.mean(self._values[-self._window_size:])
+        else:
+            raise ValueError("NaN Result, Impossible to compute the moving average of an empty list")
+
+
+class MemoryLessMovingAverageMeter(MetricMeter):
+
+    def __init__(self, window_size):
+
+        super(MemoryLessMovingAverageMeter, self).__init__()
+
+        self._values = collections.deque(maxlen=window_size)
+
+        if not isinstance(window_size, int):
+            raise ValueError("`window_size` must be an integer")
+
+        if window_size < 1:
+            raise ValueError("`window_size` must be superior or equal to 1")
+
+        self._window_size = window_size
+
+    def reset(self):
+        self._values = collections.deque(maxlen=self._window_size)
+
+    @lru_cache(maxsize=128)
+    def __str__(self):
+        return "%s(window_size=%d)" % (super(MemoryLessMovingAverageMeter, self).__str__(), self._window_size)
+
+    def read(self):
+        if len(self._values):
+            return np.mean(self._values)
+        else:
+            raise ValueError("NaN Result, Impossible to compute the moving average of an empty list")

+ 106 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/utils/metric_tracking.py

@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import weakref
+
+from mask_rcnn.utils.logging_backend import DistributedStrategy
+from mask_rcnn.utils.logging_backend import LoggingScope
+
+from mask_rcnn.utils.logging_formatter import logging
+
+from mask_rcnn.utils import meters
+
+__all__ = ["TF_METRICS", "KERAS_MODELS", "KERAS_OPTIMIZERS", "register_metric", "clear_registered_metrics"]
+
+
+class WeakRefList(object):
+    def __init__(self):
+        self._items = list()
+
+    def _clean_iternal_list(self):
+        self._items = [s for s in self._items if s() is not None]
+
+    def __iter__(self):
+        self._clean_iternal_list()
+
+        for obj in self._items:
+            if obj() is None:
+                continue
+
+            yield obj()
+
+    def __len__(self):
+        self._clean_iternal_list()
+        return len(self._items)
+
+    def clear(self):
+        self._items.clear()
+
+    def append(self, new_item):
+        self._items.append(weakref.ref(new_item))
+        self._clean_iternal_list()
+
+
+TF_METRICS = dict()
+KERAS_MODELS = WeakRefList()
+KERAS_OPTIMIZERS = WeakRefList()
+
+
+def register_metric(
+    name,
+    tensor,
+    aggregator=meters.StandardMeter(),
+    metric_scope=LoggingScope.ITER,
+    distributed_strategy=DistributedStrategy.NONE
+):
+
+    if name in TF_METRICS.keys():
+        raise ValueError("A metric with the name `%s` has already been registered" % name)
+
+    if not issubclass(aggregator.__class__, meters.AbstractMeterMixin):
+        raise ValueError("Unknown `aggregator` received: %s" % aggregator.__class__.__name__)
+
+    if metric_scope not in LoggingScope.__values__():
+        raise ValueError(
+            "Unknown `metric_scope` received: %s, authorized: %s" %
+            (metric_scope, LoggingScope.__values__())
+        )
+
+    if distributed_strategy not in DistributedStrategy.__values__():
+        raise ValueError(
+            "Unknown `distributed_strategy` received: %s, authorized: %s" %
+            (distributed_strategy, DistributedStrategy.__values__())
+        )
+
+    TF_METRICS[name] = {
+        "tensor": tensor,
+        "aggregator": aggregator,
+        "distributed_strategy": distributed_strategy,
+        "scope": metric_scope,
+    }
+
+    logging.debug(
+        "New Metric Registered: `{metric_name}`, Aggregator: {aggregator}, "
+        "Scope: {scope}, Distributed Strategy: {distributed_strategy}".format(
+            metric_name=name, aggregator=str(aggregator), distributed_strategy=distributed_strategy, scope=metric_scope
+        )
+    )
+
+
+def clear_registered_metrics():
+    TF_METRICS.clear()
+    logging.debug("All registered metrics have been cleared")

+ 152 - 0
TensorFlow2/Segmentation/MaskRCNN/mask_rcnn_main.py

@@ -0,0 +1,152 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Training script for Mask-RCNN."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # or any {'0', '1', '2'}
+os.environ["TF_CPP_VMODULE"] = 'non_max_suppression_op=0,generate_box_proposals_op=0,executor=0'
+# os.environ["TF_XLA_FLAGS"] = 'tf_xla_print_cluster_outputs=1'
+
+from absl import app
+
+import tensorflow as tf
+from tensorflow.python.framework.ops import disable_eager_execution
+
+from mask_rcnn.utils.logging_formatter import logging
+from mask_rcnn.utils.distributed_utils import MPI_is_distributed
+
+from mask_rcnn import dataloader
+from mask_rcnn import distributed_executer
+from mask_rcnn import mask_rcnn_model
+
+from mask_rcnn.hyperparameters import mask_rcnn_params
+from mask_rcnn.hyperparameters import params_io
+
+from mask_rcnn.hyperparameters.cmdline_utils import define_hparams_flags
+
+from mask_rcnn.utils.logging_formatter import log_cleaning
+import dllogger
+
+FLAGS = define_hparams_flags()
+
+
+def run_executer(runtime_config, train_input_fn=None, eval_input_fn=None):
+    """Runs Mask RCNN model on distribution strategy defined by the user."""
+
+    if runtime_config.use_tf_distributed:
+        executer = distributed_executer.TFDistributedExecuter(runtime_config, mask_rcnn_model.mask_rcnn_model_fn)
+    else:
+        executer = distributed_executer.EstimatorExecuter(runtime_config, mask_rcnn_model.mask_rcnn_model_fn)
+
+    if runtime_config.mode == 'train':
+        executer.train(
+            train_input_fn=train_input_fn,
+            run_eval_after_train=FLAGS.eval_after_training,
+            eval_input_fn=eval_input_fn
+        )
+
+    elif runtime_config.mode == 'eval':
+        executer.eval(eval_input_fn=eval_input_fn)
+
+    elif runtime_config.mode == 'train_and_eval':
+        executer.train_and_eval(train_input_fn=train_input_fn, eval_input_fn=eval_input_fn)
+
+    else:
+        raise ValueError('Mode must be one of `train`, `eval`, or `train_and_eval`')
+
+
+def main(argv):
+    del argv  # Unused.
+
+    # ============================ Configure parameters ============================ #
+    RUN_CONFIG = mask_rcnn_params.default_config()
+
+    temp_config = FLAGS.flag_values_dict()
+    temp_config['learning_rate_decay_levels'] = [float(decay) for decay in temp_config['learning_rate_decay_levels']]
+    temp_config['learning_rate_levels'] = [
+        decay * temp_config['init_learning_rate'] for decay in temp_config['learning_rate_decay_levels']
+    ]
+    temp_config['learning_rate_steps'] = [int(step) for step in temp_config['learning_rate_steps']]
+
+    RUN_CONFIG = params_io.override_hparams(RUN_CONFIG, temp_config)
+    # ============================ Configure parameters ============================ #
+
+    if RUN_CONFIG.use_tf_distributed and MPI_is_distributed():
+        raise RuntimeError("Incompatible Runtime. Impossible to use `--use_tf_distributed` with MPIRun Horovod")
+
+    if RUN_CONFIG.mode in ('train', 'train_and_eval') and not RUN_CONFIG.training_file_pattern:
+        raise RuntimeError('You must specify `training_file_pattern` for training.')
+
+    if RUN_CONFIG.mode in ('eval', 'train_and_eval'):
+        if not RUN_CONFIG.validation_file_pattern:
+            raise RuntimeError('You must specify `validation_file_pattern` for evaluation.')
+
+        if RUN_CONFIG.val_json_file == "" and not RUN_CONFIG.include_groundtruth_in_features:
+            raise RuntimeError(
+                'You must specify `val_json_file` or include_groundtruth_in_features=True for evaluation.')
+
+        if not RUN_CONFIG.include_groundtruth_in_features and not os.path.isfile(RUN_CONFIG.val_json_file):
+            raise FileNotFoundError("Validation JSON File not found: %s" % RUN_CONFIG.val_json_file)
+
+    dllogger.init(backends=[dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE,
+                                                           filename=RUN_CONFIG.log_path)])
+
+    if RUN_CONFIG.mode in ('train', 'train_and_eval'):
+
+        train_input_fn = dataloader.InputReader(
+            file_pattern=RUN_CONFIG.training_file_pattern,
+            mode=tf.estimator.ModeKeys.TRAIN,
+            num_examples=None,
+            use_fake_data=RUN_CONFIG.use_fake_data,
+            use_instance_mask=RUN_CONFIG.include_mask,
+            seed=RUN_CONFIG.seed
+        )
+
+    else:
+        train_input_fn = None
+
+    if RUN_CONFIG.mode in ('eval', 'train_and_eval' or (RUN_CONFIG.mode == 'train' and RUN_CONFIG.eval_after_training)):
+
+        eval_input_fn = dataloader.InputReader(
+            file_pattern=RUN_CONFIG.validation_file_pattern,
+            mode=tf.estimator.ModeKeys.PREDICT,
+            num_examples=RUN_CONFIG.eval_samples,
+            use_fake_data=False,
+            use_instance_mask=RUN_CONFIG.include_mask,
+            seed=RUN_CONFIG.seed
+        )
+
+    else:
+        eval_input_fn = None
+
+    run_executer(RUN_CONFIG, train_input_fn, eval_input_fn)
+
+
+if __name__ == '__main__':
+    logging.set_verbosity(logging.INFO)
+    disable_eager_execution()
+    logging.set_verbosity(logging.DEBUG)
+    tf.autograph.set_verbosity(0)
+    log_cleaning(hide_deprecation_warnings=True)
+
+    app.run(main)

+ 33 - 0
TensorFlow2/Segmentation/MaskRCNN/scripts/docker/build_tf1.sh

@@ -0,0 +1,33 @@
+#!/bin/bash
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+CONTAINER_TF1x_BASE="nvcr.io/nvidia/tensorflow"
+CONTAINER_TF1x_TAG="19.11-tf1-py3"
+
+# ======================== Refresh base image ======================== #
+docker pull "${CONTAINER_TF1x_BASE}:${CONTAINER_TF1x_TAG}"
+
+# ========================== Build container ========================= #
+
+echo -e "\n\nBuilding NVIDIA TF 1.x Container\n\n"
+
+sleep 1
+
+docker build -t joc_tensorflow_maskrcnn:tf1.x-py3 \
+    --build-arg BASE_CONTAINER="${CONTAINER_TF1x_BASE}" \
+    --build-arg IMG_TAG="${CONTAINER_TF1x_TAG}" \
+    --build-arg FROM_IMAGE_NAME="nvcr.io/nvidia/tensorflow:20.02-tf1-py3" \
+    .

+ 33 - 0
TensorFlow2/Segmentation/MaskRCNN/scripts/docker/build_tf2.sh

@@ -0,0 +1,33 @@
+#!/bin/bash
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+CONTAINER_TF2x_BASE="nvcr.io/nvidia/tensorflow"
+CONTAINER_TF2x_TAG="19.11-tf2-py3"
+
+# ======================== Refresh base image ======================== #
+docker pull "${CONTAINER_TF2x_BASE}:${CONTAINER_TF2x_TAG}"
+
+# ========================== Build container ========================= #
+
+echo -e "\n\nBuilding NVIDIA TF 2.x Container\n\n"
+
+sleep 1
+
+docker build -t joc_tensorflow_maskrcnn:tf2.1-py3 \
+    --build-arg BASE_CONTAINER="${CONTAINER_TF2x_BASE}" \
+    --build-arg IMG_TAG="${CONTAINER_TF2x_TAG}" \
+    --build-arg FROM_IMAGE_NAME="nvcr.io/nvidia/tensorflow:20.02-tf2-py3" \
+    .

+ 27 - 0
TensorFlow2/Segmentation/MaskRCNN/scripts/docker/launch_tf1.sh

@@ -0,0 +1,27 @@
+#!/bin/bash
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if [ -z "$1" ]; then
+  echo "usage launch_tf1.sh [absolute data dir]"
+  exit
+fi
+
+nvidia-docker run -it --rm \
+    --shm-size=2g --ulimit memlock=-1 --ulimit stack=67108864 \
+    -v $(pwd)/:/workspace/ \
+    -v $(pwd)/weights/:/model/ \
+    -v ${1}:/data/ \
+    joc_tensorflow_maskrcnn:tf1.x-py3

+ 27 - 0
TensorFlow2/Segmentation/MaskRCNN/scripts/docker/launch_tf2.sh

@@ -0,0 +1,27 @@
+#!/bin/bash
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if [ -z "$1" ]; then
+  echo "usage launch_tf2.sh [absolute data dir]"
+  exit
+fi
+
+nvidia-docker run -it --rm \
+    --shm-size=2g --ulimit memlock=-1 --ulimit stack=67108864 \
+    -v $(pwd)/:/workspace/ \
+    -v $(pwd)/weights/:/model/ \
+    -v "${1}":/data/ \
+    joc_tensorflow_maskrcnn:tf2.1-py3

+ 34 - 0
TensorFlow2/Segmentation/MaskRCNN/scripts/evaluation_AMP.sh

@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+rm -rf /result_tmp/
+
+BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+export CUDA_VISIBLE_DEVICES=0
+
+python ${BASEDIR}/../mask_rcnn_main.py \
+    --mode="eval" \
+    --eval_batch_size=8 \
+    --eval_samples=5000 \
+    --learning_rate_steps="480000,640000" \
+    --model_dir="/result_tmp/" \
+    --validation_file_pattern="/data/val*.tfrecord" \
+    --val_json_file="/data/annotations/instances_val2017.json" \
+    --use_batched_nms \
+    --use_amp \
+    --nouse_xla \
+    --nouse_custom_box_proposals_op

+ 34 - 0
TensorFlow2/Segmentation/MaskRCNN/scripts/evaluation_FP32.sh

@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+rm -rf /result_tmp/
+
+BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+export CUDA_VISIBLE_DEVICES=0
+
+python ${BASEDIR}/../mask_rcnn_main.py \
+    --mode="eval" \
+    --eval_batch_size=8 \
+    --eval_samples=5000 \
+    --learning_rate_steps="480000,640000" \
+    --model_dir="/result_tmp/" \
+    --validation_file_pattern="/data/val*.tfrecord" \
+    --val_json_file="/data/annotations/instances_val2017.json" \
+    --use_batched_nms \
+    --nouse_amp \
+    --nouse_xla \
+    --nouse_custom_box_proposals_op

+ 40 - 0
TensorFlow2/Segmentation/MaskRCNN/scripts/train_AMP_1GPU.sh

@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+rm -rf /results
+
+BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+export CUDA_VISIBLE_DEVICES=0
+
+python ${BASEDIR}/../mask_rcnn_main.py \
+    --mode="train_and_eval" \
+    --checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
+    --eval_samples=5000 \
+    --init_learning_rate=0.005 \
+    --learning_rate_steps="240000,320000" \
+    --model_dir="/results/" \
+    --num_steps_per_eval=29568 \
+    --total_steps=360000 \
+    --train_batch_size=4 \
+    --eval_batch_size=8 \
+    --training_file_pattern="/data/train*.tfrecord" \
+    --validation_file_pattern="/data/val*.tfrecord" \
+    --val_json_file="/data/annotations/instances_val2017.json" \
+    --use_amp \
+    --use_batched_nms \
+    --nouse_xla \
+    --nouse_custom_box_proposals_op

+ 40 - 0
TensorFlow2/Segmentation/MaskRCNN/scripts/train_AMP_1GPU_XLA.sh

@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+rm -rf /results
+
+BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+export CUDA_VISIBLE_DEVICES=0
+
+python ${BASEDIR}/../mask_rcnn_main.py \
+    --mode="train_and_eval" \
+    --checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
+    --eval_samples=5000 \
+    --init_learning_rate=0.005 \
+    --learning_rate_steps="240000,320000" \
+    --model_dir="/results/" \
+    --num_steps_per_eval=29568 \
+    --total_steps=360000 \
+    --train_batch_size=4 \
+    --eval_batch_size=8 \
+    --training_file_pattern="/data/train*.tfrecord" \
+    --validation_file_pattern="/data/val*.tfrecord" \
+    --val_json_file="/data/annotations/instances_val2017.json" \
+    --use_amp \
+    --use_batched_nms \
+    --use_xla \
+    --nouse_custom_box_proposals_op

+ 50 - 0
TensorFlow2/Segmentation/MaskRCNN/scripts/train_AMP_4GPU.sh

@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+rm -rf /results
+
+BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+
+mpirun \
+    -np 4 \
+    -H localhost:4 \
+    -bind-to none \
+    -map-by slot \
+    -x NCCL_DEBUG=VERSION \
+    -x LD_LIBRARY_PATH \
+    -x PATH \
+    -mca pml ob1 -mca btl ^openib \
+    --allow-run-as-root \
+    python ${BASEDIR}/../mask_rcnn_main.py \
+        --mode="train_and_eval" \
+        --checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
+        --eval_samples=5000 \
+        --init_learning_rate=0.02 \
+        --learning_rate_steps="60000,80000" \
+        --model_dir="/results/" \
+        --num_steps_per_eval=7392 \
+        --total_steps=90000 \
+        --train_batch_size=4 \
+        --eval_batch_size=8 \
+        --training_file_pattern="/data/train*.tfrecord" \
+        --validation_file_pattern="/data/val*.tfrecord" \
+        --val_json_file="/data/annotations/instances_val2017.json" \
+        --use_amp \
+        --use_batched_nms \
+        --nouse_xla \
+        --nouse_custom_box_proposals_op

+ 50 - 0
TensorFlow2/Segmentation/MaskRCNN/scripts/train_AMP_4GPU_XLA.sh

@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+rm -rf /results
+
+BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+
+mpirun \
+    -np 4 \
+    -H localhost:4 \
+    -bind-to none \
+    -map-by slot \
+    -x NCCL_DEBUG=VERSION \
+    -x LD_LIBRARY_PATH \
+    -x PATH \
+    -mca pml ob1 -mca btl ^openib \
+    --allow-run-as-root \
+    python ${BASEDIR}/../mask_rcnn_main.py \
+        --mode="train_and_eval" \
+        --checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
+        --eval_samples=5000 \
+        --init_learning_rate=0.02 \
+        --learning_rate_steps="60000,80000" \
+        --model_dir="/results/" \
+        --num_steps_per_eval=7392 \
+        --total_steps=90000 \
+        --train_batch_size=4 \
+        --eval_batch_size=8 \
+        --training_file_pattern="/data/train*.tfrecord" \
+        --validation_file_pattern="/data/val*.tfrecord" \
+        --val_json_file="/data/annotations/instances_val2017.json" \
+        --use_amp \
+        --use_batched_nms \
+        --use_xla \
+        --nouse_custom_box_proposals_op

+ 48 - 0
TensorFlow2/Segmentation/MaskRCNN/scripts/train_AMP_8GPU.sh

@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+rm -rf /results
+
+BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+mpirun \
+    -np 8 \
+    -H localhost:8 \
+    -bind-to none \
+    -map-by slot \
+    -x NCCL_DEBUG=VERSION \
+    -x LD_LIBRARY_PATH \
+    -x PATH \
+    -mca pml ob1 -mca btl ^openib \
+    --allow-run-as-root \
+    python ${BASEDIR}/../mask_rcnn_main.py \
+        --mode="train_and_eval" \
+        --checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
+        --eval_samples=5000 \
+        --init_learning_rate=0.04 \
+        --learning_rate_steps="30000,40000" \
+        --model_dir="/results/" \
+        --num_steps_per_eval=3696 \
+        --total_steps=45000 \
+        --train_batch_size=4 \
+        --eval_batch_size=8 \
+        --training_file_pattern="/data/train*.tfrecord" \
+        --validation_file_pattern="/data/val*.tfrecord" \
+        --val_json_file="/data/annotations/instances_val2017.json" \
+        --use_amp \
+        --use_batched_nms \
+        --nouse_xla \
+        --nouse_custom_box_proposals_op

+ 48 - 0
TensorFlow2/Segmentation/MaskRCNN/scripts/train_AMP_8GPU_XLA.sh

@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+rm -rf /results
+
+BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+mpirun \
+    -np 8 \
+    -H localhost:8 \
+    -bind-to none \
+    -map-by slot \
+    -x NCCL_DEBUG=VERSION \
+    -x LD_LIBRARY_PATH \
+    -x PATH \
+    -mca pml ob1 -mca btl ^openib \
+    --allow-run-as-root \
+    python ${BASEDIR}/../mask_rcnn_main.py \
+        --mode="train_and_eval" \
+        --checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
+        --eval_samples=5000 \
+        --init_learning_rate=0.04 \
+        --learning_rate_steps="30000,40000" \
+        --model_dir="/results/" \
+        --num_steps_per_eval=3696 \
+        --total_steps=45000 \
+        --train_batch_size=4 \
+        --eval_batch_size=8 \
+        --training_file_pattern="/data/train*.tfrecord" \
+        --validation_file_pattern="/data/val*.tfrecord" \
+        --val_json_file="/data/annotations/instances_val2017.json" \
+        --use_amp \
+        --use_batched_nms \
+        --use_xla \
+        --nouse_custom_box_proposals_op

+ 40 - 0
TensorFlow2/Segmentation/MaskRCNN/scripts/train_FP32_1GPU.sh

@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+rm -rf /results
+
+BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+export CUDA_VISIBLE_DEVICES=0
+
+python ${BASEDIR}/../mask_rcnn_main.py \
+    --mode="train_and_eval" \
+    --checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
+    --eval_samples=5000 \
+    --init_learning_rate=0.005 \
+    --learning_rate_steps="240000,320000" \
+    --model_dir="/results/" \
+    --num_steps_per_eval=29568 \
+    --total_steps=360000 \
+    --train_batch_size=4 \
+    --eval_batch_size=8 \
+    --training_file_pattern="/data/train*.tfrecord" \
+    --validation_file_pattern="/data/val*.tfrecord" \
+    --val_json_file="/data/annotations/instances_val2017.json" \
+    --nouse_amp \
+    --use_batched_nms \
+    --nouse_xla \
+    --nouse_custom_box_proposals_op

+ 40 - 0
TensorFlow2/Segmentation/MaskRCNN/scripts/train_FP32_1GPU_XLA.sh

@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+rm -rf /results
+
+BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+export CUDA_VISIBLE_DEVICES=0
+
+python ${BASEDIR}/../mask_rcnn_main.py \
+    --mode="train_and_eval" \
+    --checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
+    --eval_samples=5000 \
+    --init_learning_rate=0.005 \
+    --learning_rate_steps="240000,320000" \
+    --model_dir="/results/" \
+    --num_steps_per_eval=29568 \
+    --total_steps=360000 \
+    --train_batch_size=4 \
+    --eval_batch_size=8 \
+    --training_file_pattern="/data/train*.tfrecord" \
+    --validation_file_pattern="/data/val*.tfrecord" \
+    --val_json_file="/data/annotations/instances_val2017.json" \
+    --nouse_amp \
+    --use_batched_nms \
+    --use_xla \
+    --nouse_custom_box_proposals_op

+ 50 - 0
TensorFlow2/Segmentation/MaskRCNN/scripts/train_FP32_4GPU.sh

@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+rm -rf /results
+
+BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+
+mpirun \
+    -np 4 \
+    -H localhost:4 \
+    -bind-to none \
+    -map-by slot \
+    -x NCCL_DEBUG=VERSION \
+    -x LD_LIBRARY_PATH \
+    -x PATH \
+    -mca pml ob1 -mca btl ^openib \
+    --allow-run-as-root \
+    python ${BASEDIR}/../mask_rcnn_main.py \
+        --mode="train_and_eval" \
+        --checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
+        --eval_samples=5000 \
+        --init_learning_rate=0.02 \
+        --learning_rate_steps="60000,80000" \
+        --model_dir="/results/" \
+        --num_steps_per_eval=7392 \
+        --total_steps=90000 \
+        --train_batch_size=4 \
+        --eval_batch_size=8 \
+        --training_file_pattern="/data/train*.tfrecord" \
+        --validation_file_pattern="/data/val*.tfrecord" \
+        --val_json_file="/data/annotations/instances_val2017.json" \
+        --nouse_amp \
+        --use_batched_nms \
+        --nouse_xla \
+        --nouse_custom_box_proposals_op

+ 50 - 0
TensorFlow2/Segmentation/MaskRCNN/scripts/train_FP32_4GPU_XLA.sh

@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+rm -rf /results
+
+BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+
+mpirun \
+    -np 4 \
+    -H localhost:4 \
+    -bind-to none \
+    -map-by slot \
+    -x NCCL_DEBUG=VERSION \
+    -x LD_LIBRARY_PATH \
+    -x PATH \
+    -mca pml ob1 -mca btl ^openib \
+    --allow-run-as-root \
+    python ${BASEDIR}/../mask_rcnn_main.py \
+        --mode="train_and_eval" \
+        --checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
+        --eval_samples=5000 \
+        --init_learning_rate=0.02 \
+        --learning_rate_steps="60000,80000" \
+        --model_dir="/results/" \
+        --num_steps_per_eval=7392 \
+        --total_steps=90000 \
+        --train_batch_size=4 \
+        --eval_batch_size=8 \
+        --training_file_pattern="/data/train*.tfrecord" \
+        --validation_file_pattern="/data/val*.tfrecord" \
+        --val_json_file="/data/annotations/instances_val2017.json" \
+        --nouse_amp \
+        --use_batched_nms \
+        --use_xla \
+        --nouse_custom_box_proposals_op

+ 48 - 0
TensorFlow2/Segmentation/MaskRCNN/scripts/train_FP32_8GPU.sh

@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+rm -rf /results
+
+BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+mpirun \
+    -np 8 \
+    -H localhost:8 \
+    -bind-to none \
+    -map-by slot \
+    -x NCCL_DEBUG=VERSION \
+    -x LD_LIBRARY_PATH \
+    -x PATH \
+    -mca pml ob1 -mca btl ^openib \
+    --allow-run-as-root \
+    python ${BASEDIR}/../mask_rcnn_main.py \
+        --mode="train_and_eval" \
+        --checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
+        --eval_samples=5000 \
+        --init_learning_rate=0.04 \
+        --learning_rate_steps="30000,40000" \
+        --model_dir="/results/" \
+        --num_steps_per_eval=3696 \
+        --total_steps=45000 \
+        --train_batch_size=4 \
+        --eval_batch_size=8 \
+        --training_file_pattern="/data/train*.tfrecord" \
+        --validation_file_pattern="/data/val*.tfrecord" \
+        --val_json_file="/data/annotations/instances_val2017.json" \
+        --nouse_amp \
+        --use_batched_nms \
+        --nouse_xla \
+        --nouse_custom_box_proposals_op

+ 48 - 0
TensorFlow2/Segmentation/MaskRCNN/scripts/train_FP32_8GPU_XLA.sh

@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+rm -rf /results
+
+BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+mpirun \
+    -np 8 \
+    -H localhost:8 \
+    -bind-to none \
+    -map-by slot \
+    -x NCCL_DEBUG=VERSION \
+    -x LD_LIBRARY_PATH \
+    -x PATH \
+    -mca pml ob1 -mca btl ^openib \
+    --allow-run-as-root \
+    python ${BASEDIR}/../mask_rcnn_main.py \
+        --mode="train_and_eval" \
+        --checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
+        --eval_samples=5000 \
+        --init_learning_rate=0.04 \
+        --learning_rate_steps="30000,40000" \
+        --model_dir="/results/" \
+        --num_steps_per_eval=3696 \
+        --total_steps=45000 \
+        --train_batch_size=4 \
+        --eval_batch_size=8 \
+        --training_file_pattern="/data/train*.tfrecord" \
+        --validation_file_pattern="/data/val*.tfrecord" \
+        --val_json_file="/data/annotations/instances_val2017.json" \
+        --nouse_amp \
+        --use_batched_nms \
+        --use_xla \
+        --nouse_custom_box_proposals_op

+ 0 - 0
TensorFlow2/Segmentation/MaskRCNN/weights/.gitkeep


+ 120 - 0
TensorFlow2/Segmentation/MaskRCNN/weights/extract_RN50_weights.py

@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import getopt
+import logging
+import tensorflow as tf
+
+"""
+python weights/extract_RN50_weights.py \
+    --checkpoint_dir=weights/mask-rcnn/1555659850/ckpt/model.ckpt \
+    --save_to=weights/resnet/extracted_from_maskrcnn \
+    --dry_run
+
+python weights/extract_RN50_weights.py \
+    --checkpoint_dir=weights/mask-rcnn/1555659850/ckpt/model.ckpt \
+    --save_to=weights/resnet/extracted_from_maskrcnn
+"""
+
+usage_str = 'python tensorflow_rename_variables.py --checkpoint_dir=weights/inception_v4.ckpt ' \
+            '--replace_from=substr --replace_to=substr --add_prefix=abc --dry_run'
+
+
+def rename(checkpoint_dir, save_to, dry_run, verbose):
+
+    _ = tf.train.get_checkpoint_state(checkpoint_dir)
+
+    with tf.compat.v1.Session() as sess:
+
+        total_vars_loaded = 0
+
+        for var_name, _ in tf.contrib.framework.list_variables(checkpoint_dir):
+
+            if "resnet50" in var_name:
+                # Load the variable
+                var = tf.train.load_variable(checkpoint_dir, var_name)
+                total_vars_loaded += 1
+            else:
+                continue
+
+            if not dry_run:
+                _ = tf.Variable(var, name=var_name[9:])  # remove "resnet50/"
+                # _ = tf.Variable(var, name=var_name)
+
+            if verbose:
+                print('Loading Variable: %s.' % var_name)
+
+        print("Total Vars Loaded: %d" % total_vars_loaded)
+
+        if not dry_run:
+
+            if not os.path.isdir(save_to):
+                os.makedirs(save_to)
+
+            save_path = os.path.join(save_to, "resnet50.ckpt")
+            print("Model save location: %s" % save_path)
+
+            # Save the variables
+            saver = tf.compat.v1.train.Saver()
+            sess.run(tf.compat.v1.global_variables_initializer())
+            saver.save(sess, save_path)
+
+
+def main(argv):
+
+    checkpoint_dir = None
+    save_to = None
+    dry_run = False
+    verbose = False
+
+    try:
+        opts, args = getopt.getopt(
+            argv, 'h', ['help=', 'checkpoint_dir=', 'save_to=', 'verbose', 'dry_run']
+        )
+    except getopt.GetoptError:
+        print(usage_str)
+        sys.exit(2)
+
+    for opt, arg in opts:
+        if opt in ('-h', '--help'):
+            print(usage_str)
+            sys.exit()
+        elif opt == '--checkpoint_dir':
+            checkpoint_dir = arg
+        elif opt == '--save_to':
+            save_to = arg
+        elif opt == '--verbose':
+            verbose = True
+        elif opt == '--dry_run':
+            dry_run = True
+
+    if not checkpoint_dir:
+        print('Please specify a checkpoint_dir. Usage:')
+        print(usage_str)
+        sys.exit(2)
+
+    rename(checkpoint_dir, save_to, dry_run, verbose)
+
+
+if __name__ == '__main__':
+
+    logging.disable(logging.WARNING)
+    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+
+    main(sys.argv[1:])

+ 159 - 0
TensorFlow2/Segmentation/MaskRCNN/weights/inspect_checkpoint.py

@@ -0,0 +1,159 @@
+#! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""A simple script for inspect checkpoint files."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import sys
+
+import numpy as np
+
+from tensorflow.python import pywrap_tensorflow
+from tensorflow.python.platform import app
+from tensorflow.python.platform import flags
+
+FLAGS = None
+"""
+Usgage: python inspect_checkpoint.py --file_name='weights/vgg16.ckpt'
+Usgage: python inspect_checkpoint.py --file_name='weights/reprocessed/mobilenet.ckpt'
+"""
+
+
+def print_tensors_in_checkpoint_file(file_name, tensor_name, all_tensors, all_tensor_names=False):
+    """Prints tensors in a checkpoint file.
+
+  If no `tensor_name` is provided, prints the tensor names and shapes
+  in the checkpoint file.
+
+  If `tensor_name` is provided, prints the content of the tensor.
+
+  Args:
+    file_name: Name of the checkpoint file.
+    tensor_name: Name of the tensor in the checkpoint file to print.
+    all_tensors: Boolean indicating whether to print all tensors.
+    all_tensor_names: Boolean indicating whether to print all tensor names.
+  """
+    try:
+        reader = pywrap_tensorflow.NewCheckpointReader(file_name)
+        if all_tensors or all_tensor_names:
+            var_to_shape_map = reader.get_variable_to_shape_map()
+            for key in sorted(var_to_shape_map):
+                print("tensor_name: ", key)
+                if all_tensors:
+                    print(reader.get_tensor(key))
+        elif not tensor_name:
+            print(reader.debug_string().decode("utf-8"))
+        else:
+            print("tensor_name: ", tensor_name)
+            print(reader.get_tensor(tensor_name))
+    except Exception as e:  # pylint: disable=broad-except
+        print(str(e))
+        if "corrupted compressed block contents" in str(e):
+            print("It's likely that your checkpoint file has been compressed " "with SNAPPY.")
+        if ("Data loss" in str(e) and (any([e in file_name for e in [".index", ".meta", ".data"]]))):
+            proposed_file = ".".join(file_name.split(".")[0:-1])
+            v2_file_error_template = """
+It's likely that this is a V2 checkpoint and you need to provide the filename
+*prefix*.  Try removing the '.' and extension.  Try:
+inspect checkpoint --file_name = {}"""
+            print(v2_file_error_template.format(proposed_file))
+
+
+def parse_numpy_printoption(kv_str):
+    """Sets a single numpy printoption from a string of the form 'x=y'.
+
+  See documentation on numpy.set_printoptions() for details about what values
+  x and y can take. x can be any option listed there other than 'formatter'.
+
+  Args:
+    kv_str: A string of the form 'x=y', such as 'threshold=100000'
+
+  Raises:
+    argparse.ArgumentTypeError: If the string couldn't be used to set any
+        nump printoption.
+  """
+    k_v_str = kv_str.split("=", 1)
+    if len(k_v_str) != 2 or not k_v_str[0]:
+        raise argparse.ArgumentTypeError("'%s' is not in the form k=v." % kv_str)
+    k, v_str = k_v_str
+    printoptions = np.get_printoptions()
+    if k not in printoptions:
+        raise argparse.ArgumentTypeError("'%s' is not a valid printoption." % k)
+    v_type = type(printoptions[k])
+    if v_type is type(None):
+        raise argparse.ArgumentTypeError("Setting '%s' from the command line is not supported." % k)
+    try:
+        v = (v_type(v_str) if v_type is not bool else flags.BooleanParser().parse(v_str))
+    except ValueError as e:
+        raise argparse.ArgumentTypeError(e.message)
+    np.set_printoptions(**{k: v})
+
+
+def main(unused_argv):
+    if not FLAGS.file_name:
+        print(
+            "Usage: inspect_checkpoint --file_name=checkpoint_file_name "
+            "[--tensor_name=tensor_to_print] "
+            "[--all_tensors] "
+            "[--all_tensor_names] "
+            "[--printoptions]"
+        )
+        sys.exit(1)
+    else:
+        print_tensors_in_checkpoint_file(FLAGS.file_name, FLAGS.tensor_name, FLAGS.all_tensors, FLAGS.all_tensor_names)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.register("type", "bool", lambda v: v.lower() == "true")
+    parser.add_argument(
+        "--file_name",
+        type=str,
+        default="",
+        help="Checkpoint filename. "
+        "Note, if using Checkpoint V2 format, file_name is the "
+        "shared prefix between all files in the checkpoint."
+    )
+    parser.add_argument("--tensor_name", type=str, default="", help="Name of the tensor to inspect")
+    parser.add_argument(
+        "--all_tensors",
+        nargs="?",
+        const=True,
+        type="bool",
+        default=False,
+        help="If True, print the names and values of all the tensors."
+    )
+    parser.add_argument(
+        "--all_tensor_names",
+        nargs="?",
+        const=True,
+        type="bool",
+        default=False,
+        help="If True, print the names of all the tensors."
+    )
+    parser.add_argument(
+        "--printoptions",
+        nargs="*",
+        type=parse_numpy_printoption,
+        help="Argument for numpy.set_printoptions(), in the form 'k=v'."
+    )
+    FLAGS, unparsed = parser.parse_known_args()
+    app.run(main=main, argv=[sys.argv[0]] + unparsed)

+ 0 - 0
TensorFlow2/Segmentation/MaskRCNN/weights/mask-rcnn/.gitkeep


+ 0 - 0
TensorFlow2/Segmentation/MaskRCNN/weights/mask-rcnn/1555659850/.gitkeep


+ 48 - 0
TensorFlow2/Segmentation/MaskRCNN/weights/pb_to_ckpt.py

@@ -0,0 +1,48 @@
+#! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+import argparse
+import logging
+
+import tensorflow as tf
+
+# Pass the filename as an argument
+parser = argparse.ArgumentParser()
+
+parser.add_argument(
+    "--frozen_model_filename", default="/path-to-pb-file/Binary_Protobuf.pb", type=str, help="Pb model file to import"
+)
+
+parser.add_argument(
+    "--output_filename", default="/path-to-ckpt-file/model.ckpt", type=str, help="Pb model file to import"
+)
+
+args = parser.parse_args()
+
+if __name__ == "__main__":
+
+    logging.disable(logging.WARNING)
+    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+
+    with tf.compat.v1.Session(graph=tf.Graph()) as sess:
+        tf.compat.v1.saved_model.loader.load(sess, [tf.saved_model.SERVING], args.frozen_model_filename)
+
+        saver = tf.compat.v1.train.Saver()
+        save_path = saver.save(sess, args.output_filename)
+        print("Model saved to ckpt format")

+ 0 - 0
TensorFlow2/Segmentation/MaskRCNN/weights/resnet/.gitkeep


+ 0 - 0
TensorFlow2/Segmentation/MaskRCNN/weights/resnet/extracted_from_maskrcnn/.gitkeep


+ 0 - 0
TensorFlow2/Segmentation/MaskRCNN/weights/resnet/resnet-nhwc-2018-02-07/.gitkeep


+ 0 - 0
TensorFlow2/Segmentation/MaskRCNN/weights/resnet/resnet-nhwc-2018-10-14/.gitkeep