Explorar el Código

[UNet_Industrial/TF] Adding Jupyter notebooks and small fixes

Przemek Strzelczyk hace 6 años
padre
commit
882bc26d40

+ 4 - 0
TensorFlow/Segmentation/UNet_Industrial/.gitignore

@@ -100,6 +100,10 @@ venv.bak/
 # mkdocs documentation
 /site
 
+# weights
+/pretrained_weights
+/exported_models
+
 # mypy
 .mypy_cache/
 .idea/

+ 5 - 2
TensorFlow/Segmentation/UNet_Industrial/README.md

@@ -511,8 +511,11 @@ To achieve these same results, follow the [Quick Start Guide](#quick-start-guide
 ## Release notes
 
 ### Changelog
-March 18, 2019
-* Initial release
+
+* October 2019
+  * Jupyter notebooks added
+* March,2019
+  * Initial release
 
 ### Known issues
 There are no known issues with this model.

+ 92 - 0
TensorFlow/Segmentation/UNet_Industrial/download_and_preprocess_dagm2007_public.sh

@@ -0,0 +1,92 @@
+#!/bin/bash
+
+##############################################################################
+# Copyright (c) Jonathan Dekhtiar - [email protected]
+# All Rights Reserved.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+##############################################################################
+
+# Usage: ./download_and_preprocess_dagm2007.sh /path/to/dataset/directory/
+
+if [[ ! "$BASH_VERSION" ]] ; then
+    echo "Please do not use sh to run this script ($0), just execute it directly" 1>&2
+    exit 1
+fi
+
+if [[ -z "$1" ]]
+  then
+    echo -e "Error: Argument is missing. No dataset directory received."
+    echo -e "Usage: '$0 /path/to/dataset/directory/'"
+    exit 1
+fi
+
+DATASET_DIR=$(realpath -s $1)
+
+ZIP_FILES_DIR=${DATASET_DIR}/zip_files
+RAW_IMAGES_DIR=${DATASET_DIR}/raw_images
+
+PUBLIC_ZIP_FILES_DIR=${ZIP_FILES_DIR}/public
+PUBLIC_RAW_IMAGES_DIR=${RAW_IMAGES_DIR}/public
+
+if [[ ! -e ${PUBLIC_ZIP_FILES_DIR} ]]; then
+    echo "creating ${PUBLIC_ZIP_FILES_DIR} ..."
+    mkdir -p ${PUBLIC_ZIP_FILES_DIR}
+fi
+
+if [[ ! -e ${PUBLIC_RAW_IMAGES_DIR} ]]; then
+    echo "creating ${PUBLIC_RAW_IMAGES_DIR} ..."
+    mkdir -p ${PUBLIC_RAW_IMAGES_DIR}
+fi
+
+PRIVATE_ZIP_FILES_DIR=${ZIP_FILES_DIR}/private
+PRIVATE_RAW_IMAGES_DIR=${RAW_IMAGES_DIR}/private
+
+if [[ ! -e ${PRIVATE_ZIP_FILES_DIR} ]]; then
+    echo "creating ${PRIVATE_ZIP_FILES_DIR} ..."
+    mkdir -p ${PRIVATE_ZIP_FILES_DIR}
+fi
+
+if [[ ! -e ${PRIVATE_RAW_IMAGES_DIR} ]]; then
+    echo "creating ${PRIVATE_RAW_IMAGES_DIR} ..."
+    mkdir -p ${PRIVATE_RAW_IMAGES_DIR}
+fi
+
+echo -e "\n################################################"
+echo -e "Processing Public Dataset"
+echo -e "################################################\n"
+
+sleep 2
+
+BASE_PUBLIC_URL="https://resources.mpi-inf.mpg.de/conference/dagm/2007"
+
+declare -a arr=(
+    "Class1.zip"
+    "Class1_def.zip"
+    "Class2.zip"
+    "Class2_def.zip"
+    "Class3.zip"
+    "Class3_def.zip"
+    "Class4.zip"
+    "Class4_def.zip"
+    "Class5.zip"
+    "Class5_def.zip"
+    "Class6.zip"
+    "Class6_def.zip"
+)
+
+for file in "${arr[@]}"
+do
+    if [[ ! -e ${PUBLIC_ZIP_FILES_DIR}/${file} ]]; then
+        echo -e "Downloading File: $BASE_PUBLIC_URL/$file ..."
+        wget -N ${BASE_PUBLIC_URL}/${file} -O ${PUBLIC_ZIP_FILES_DIR}/${file}
+    fi
+
+    # Unzip without overwriting
+    unzip -n ${PUBLIC_ZIP_FILES_DIR}/${file} -d ${PUBLIC_RAW_IMAGES_DIR}
+
+done
+
+chmod -R 744 ${PUBLIC_ZIP_FILES_DIR}
+chmod -R 744 ${PUBLIC_RAW_IMAGES_DIR}

+ 221 - 0
TensorFlow/Segmentation/UNet_Industrial/export_saved_model.py

@@ -0,0 +1,221 @@
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# ==============================================================================
+#
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ==============================================================================
+
+"""
+Usage:
+
+    python export_saved_model.py \
+        --activation_fn='relu' \
+        --batch_size=16 \
+        --data_format='NCHW' \
+        --input_dtype="fp32" \
+        --export_dir="exported_models" \
+        --model_checkpoint_path="path/to/checkpoint/model.ckpt-2500" \
+        --unet_variant='tinyUNet' \
+        --use_xla \
+        --use_tf_amp
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import argparse
+import pprint
+
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+
+import tensorflow as tf
+
+from dllogger.logger import LOGGER
+
+from model.unet import UNet_v1
+from model.blocks.activation_blck import authorized_activation_fn
+
+from utils.cmdline_helper import _add_bool_argument
+
+
+def get_export_flags():
+    parser = argparse.ArgumentParser(description="JoC-UNet_v1-TF-ExportFlags")
+
+    parser.add_argument('--export_dir', default=None, required=True, type=str, help='The export directory.')
+    parser.add_argument('--model_checkpoint_path', default=None, required=True, help='Checkpoint path.')
+
+    parser.add_argument(
+        '--data_format',
+        choices=['NHWC', 'NCHW'],
+        type=str,
+        default="NCHW",
+        required=False,
+        help="""Which Tensor format is used for computation inside the mode"""
+    )
+
+    parser.add_argument(
+        '--input_dtype',
+        choices=['fp32', 'fp16'],
+        type=str,
+        default="fp32",
+        required=False,
+        help="""Tensorflow dtype of the input tensor"""
+    )
+
+    parser.add_argument(
+        '--unet_variant',
+        default="tinyUNet",
+        choices=UNet_v1.authorized_models_variants,
+        type=str,
+        required=False,
+        help="""Which model size is used. This parameter control directly the size and the number of parameters"""
+    )
+
+    parser.add_argument(
+        '--activation_fn',
+        choices=authorized_activation_fn,
+        type=str,
+        default="relu",
+        required=False,
+        help="""Which activation function is used after the convolution layers"""
+    )
+
+    _add_bool_argument(
+        parser=parser,
+        name="use_tf_amp",
+        default=False,
+        required=False,
+        help="Enable Automatic Mixed Precision Computation to maximise performance."
+    )
+
+    _add_bool_argument(
+        parser=parser,
+        name="use_xla",
+        default=False,
+        required=False,
+        help="Enable Tensorflow XLA to maximise performance."
+    )
+
+    parser.add_argument('--batch_size', default=16, type=int, help='Evaluation batch size.')
+
+    FLAGS, unknown_args = parser.parse_known_args()
+
+    if len(unknown_args) > 0:
+
+        for bad_arg in unknown_args:
+            print("ERROR: Unknown command line arg: %s" % bad_arg)
+
+        raise ValueError("Invalid command line arg(s)")
+
+    return FLAGS
+
+
+def export_model(RUNNING_CONFIG):
+
+    if RUNNING_CONFIG.use_tf_amp:
+        os.environ["TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE"] = "1"
+
+    model = UNet_v1(
+        model_name="UNet_v1",
+        input_format="NHWC",
+        compute_format=RUNNING_CONFIG.data_format,
+        n_output_channels=1,
+        unet_variant=RUNNING_CONFIG.unet_variant,
+        weight_init_method="he_normal",
+        activation_fn=RUNNING_CONFIG.activation_fn
+    )
+
+    config_proto = tf.ConfigProto()
+
+    config_proto.allow_soft_placement = True
+    config_proto.log_device_placement = False
+
+    config_proto.gpu_options.allow_growth = True
+
+    if RUNNING_CONFIG.use_xla:  # Only working on single GPU
+        LOGGER.log("XLA is activated - Experimental Feature")
+        config_proto.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
+
+    config_proto.gpu_options.force_gpu_compatible = True  # Force pinned memory
+
+    run_config = tf.estimator.RunConfig(
+        model_dir=None,
+        tf_random_seed=None,
+        save_summary_steps=1e9,  # disabled
+        save_checkpoints_steps=None,
+        save_checkpoints_secs=None,
+        session_config=config_proto,
+        keep_checkpoint_max=None,
+        keep_checkpoint_every_n_hours=1e9,  # disabled
+        log_step_count_steps=1e9,
+        train_distribute=None,
+        device_fn=None,
+        protocol=None,
+        eval_distribute=None,
+        experimental_distribute=None
+    )
+
+    estimator = tf.estimator.Estimator(
+        model_fn=model,
+        model_dir=RUNNING_CONFIG.model_checkpoint_path,
+        config=run_config,
+        params={'debug_verbosity': 0}
+    )
+
+    LOGGER.log('[*] Exporting the model ...')
+
+    input_type = tf.float32 if RUNNING_CONFIG.input_dtype else tf.float16
+
+    def get_serving_input_receiver_fn():
+
+        input_shape = [RUNNING_CONFIG.batch_size, 512, 512, 1]
+
+        def serving_input_receiver_fn():
+            features = tf.placeholder(dtype=input_type, shape=input_shape, name='input_tensor')
+
+            return tf.estimator.export.TensorServingInputReceiver(features=features, receiver_tensors=features)
+
+        return serving_input_receiver_fn
+
+    export_path = estimator.export_saved_model(
+        export_dir_base=RUNNING_CONFIG.export_dir,
+        serving_input_receiver_fn=get_serving_input_receiver_fn(),
+        checkpoint_path=RUNNING_CONFIG.model_checkpoint_path
+    )
+
+    LOGGER.log('[*] Done! path: `%s`' % export_path.decode())
+
+
+if __name__ == '__main__':
+
+    tf.logging.set_verbosity(tf.logging.ERROR)
+    tf.disable_eager_execution()
+
+    flags = get_export_flags()
+
+    for endpattern in [".index", ".meta"]:
+        file_to_check = flags.model_checkpoint_path + endpattern
+        if not os.path.isfile(file_to_check):
+            raise FileNotFoundError("The checkpoint file `%s` does not exist" % file_to_check)
+
+    print(" ========================= Export Flags =========================\n")
+    pprint.pprint(dict(flags._get_kwargs()))
+    print("\n %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+    export_model(flags)

+ 8 - 4
TensorFlow/Segmentation/UNet_Industrial/model/unet.py

@@ -157,6 +157,14 @@ class UNet_v1(object):
             if "loss_fn_name" not in params.keys():
                 raise RuntimeError("Parameter `loss_fn_name` is missing...")
 
+        if mode == tf.estimator.ModeKeys.PREDICT:
+            y_pred, y_pred_logits = self.build_model(
+                features, training=False, reuse=False, debug_verbosity=params["debug_verbosity"]
+            )
+
+            predictions = {'logits': y_pred}
+            return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
+
         input_image, mask_image = features
 
         with tf.device("/gpu:0"):
@@ -175,10 +183,6 @@ class UNet_v1(object):
             all_trainable_vars = tf.reduce_sum([tf.reduce_prod(v.shape) for v in tf.trainable_variables()])
             tf.identity(all_trainable_vars, name='trainable_parameters_count_ref')
 
-            if mode == tf.estimator.ModeKeys.PREDICT:
-                predictions = {'logits': y_pred}
-                return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
-
             if mode == tf.estimator.ModeKeys.EVAL:
                 eval_metrics = dict()
 

La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 604 - 0
TensorFlow/Segmentation/UNet_Industrial/notebooks/Colab_UNet_Industrial_TF_TFTRT_inference_demo.ipynb


+ 42 - 0
TensorFlow/Segmentation/UNet_Industrial/notebooks/README.md

@@ -0,0 +1,42 @@
+## Jupyter demo notebooks
+This folder contains demo notebooks for the TensorFlow UNet Industrial model.
+
+### 1. TensorFlow_UNet_Industrial_TF_train_and_inference.ipynb: end to end training and inference demo.
+
+The most convenient way to make use of the NVIDIA Tensorflow UNet model is via a docker container, which provides a self-contained, isolated and re-producible environment for all experiments. Refer to the [Quick Start Guide section](https://github.com/vinhngx/DeepLearningExamples/tree/vinhn_unet_industrial_demo/TensorFlow/Segmentation/UNet_Industrial#requirements) of the Readme documentation for a comprehensive guide. We briefly summarize the steps here.
+
+First, clone the repository:
+
+```
+git clone https://github.com/NVIDIA/DeepLearningExamples.git
+cd DeepLearningExamples/TensorFlow/Segmentation/UNet_Industrial
+```
+
+Next, build the NVIDIA UNet_Industrial container:
+
+```
+docker build . --rm -t unet_industrial:latest
+```
+
+Then launch the container with:
+
+```
+nvidia-docker run -it --rm \
+    --shm-size=2g --ulimit memlock=-1 --ulimit stack=67108864 \
+    -v /path/to/dataset:/data/dagm2007/ \
+    -v /path/to/results:/results \
+    unet_industrial:latest
+```
+where `/path/to/dataset` is the path on the host machine where the data was/is to be downloaded. More on data set preparation in the next section. `/path/to/results` is wher the trained model will be stored.
+
+Within the docker interactive bash session, start Jupyter with
+
+```
+jupyter notebook --ip 0.0.0.0 --port 8888
+```
+
+Then open the Jupyter GUI interface on your host machine at http://localhost:8888. Within the container, this notebook itself is located at `/workspace/unet_industrial/notebooks`.
+
+### 2. Colab_UNet_Industrial_TF_TFTRT_inference_demo.ipynb: inference from a pretrained UNet model with TensorFlow-TensorRT (TF-TRT).
+
+This notebook is designed to run on Google Colab via this [link](https://colab.research.google.com/github/NVIDIA/DeepLearningExamples/blob/master/TensorFlow/Segmentation/UNet_Industrial/notebooks/Colab_UNet_Industrial_TF_TFTRT_inference_demo.ipynb)

La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 535 - 0
TensorFlow/Segmentation/UNet_Industrial/notebooks/TensorFlow_UNet_Industrial_TF_train_and_inference.ipynb


+ 92 - 0
TensorFlow/Segmentation/UNet_Industrial/notebooks/download_and_preprocess_dagm2007_public.sh

@@ -0,0 +1,92 @@
+#!/bin/bash
+
+##############################################################################
+# Copyright (c) Jonathan Dekhtiar - [email protected]
+# All Rights Reserved.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+##############################################################################
+
+# Usage: ./download_and_preprocess_dagm2007.sh /path/to/dataset/directory/
+
+if [[ ! "$BASH_VERSION" ]] ; then
+    echo "Please do not use sh to run this script ($0), just execute it directly" 1>&2
+    exit 1
+fi
+
+if [[ -z "$1" ]]
+  then
+    echo -e "Error: Argument is missing. No dataset directory received."
+    echo -e "Usage: '$0 /path/to/dataset/directory/'"
+    exit 1
+fi
+
+DATASET_DIR=$(realpath -s $1)
+
+ZIP_FILES_DIR=${DATASET_DIR}/zip_files
+RAW_IMAGES_DIR=${DATASET_DIR}/raw_images
+
+PUBLIC_ZIP_FILES_DIR=${ZIP_FILES_DIR}/public
+PUBLIC_RAW_IMAGES_DIR=${RAW_IMAGES_DIR}/public
+
+if [[ ! -e ${PUBLIC_ZIP_FILES_DIR} ]]; then
+    echo "creating ${PUBLIC_ZIP_FILES_DIR} ..."
+    mkdir -p ${PUBLIC_ZIP_FILES_DIR}
+fi
+
+if [[ ! -e ${PUBLIC_RAW_IMAGES_DIR} ]]; then
+    echo "creating ${PUBLIC_RAW_IMAGES_DIR} ..."
+    mkdir -p ${PUBLIC_RAW_IMAGES_DIR}
+fi
+
+PRIVATE_ZIP_FILES_DIR=${ZIP_FILES_DIR}/private
+PRIVATE_RAW_IMAGES_DIR=${RAW_IMAGES_DIR}/private
+
+if [[ ! -e ${PRIVATE_ZIP_FILES_DIR} ]]; then
+    echo "creating ${PRIVATE_ZIP_FILES_DIR} ..."
+    mkdir -p ${PRIVATE_ZIP_FILES_DIR}
+fi
+
+if [[ ! -e ${PRIVATE_RAW_IMAGES_DIR} ]]; then
+    echo "creating ${PRIVATE_RAW_IMAGES_DIR} ..."
+    mkdir -p ${PRIVATE_RAW_IMAGES_DIR}
+fi
+
+echo -e "\n################################################"
+echo -e "Processing Public Dataset"
+echo -e "################################################\n"
+
+sleep 2
+
+BASE_PUBLIC_URL="https://resources.mpi-inf.mpg.de/conference/dagm/2007"
+
+declare -a arr=(
+    "Class1.zip"
+    "Class1_def.zip"
+    "Class2.zip"
+    "Class2_def.zip"
+    "Class3.zip"
+    "Class3_def.zip"
+    "Class4.zip"
+    "Class4_def.zip"
+    "Class5.zip"
+    "Class5_def.zip"
+    "Class6.zip"
+    "Class6_def.zip"
+)
+
+for file in "${arr[@]}"
+do
+    if [[ ! -e ${PUBLIC_ZIP_FILES_DIR}/${file} ]]; then
+        echo -e "Downloading File: $BASE_PUBLIC_URL/$file ..."
+        wget -N ${BASE_PUBLIC_URL}/${file} -O ${PUBLIC_ZIP_FILES_DIR}/${file}
+    fi
+
+    # Unzip without overwriting
+    unzip -n ${PUBLIC_ZIP_FILES_DIR}/${file} -d ${PUBLIC_RAW_IMAGES_DIR}
+
+done
+
+chmod -R 744 ${PUBLIC_ZIP_FILES_DIR}
+chmod -R 744 ${PUBLIC_RAW_IMAGES_DIR}

+ 2 - 2
TensorFlow/Segmentation/UNet_Industrial/runtime/runner.py

@@ -148,7 +148,7 @@ class Runner(object):
 
         os.environ['TF_SYNC_ON_FINISH'] = '0'
         os.environ['TF_AUTOTUNE_THRESHOLD'] = '2'
-        os.environ['TF_DISABLE_NVTX_RANGES'] = '1'
+        # os.environ['TF_DISABLE_NVTX_RANGES'] = '1' 
 
         # =================================================
 
@@ -627,7 +627,7 @@ class Runner(object):
             LOGGER.log('TP', tps)
             LOGGER.log('FN', fns)
             LOGGER.log('TN', tns)
-            LOGGER.log('FP', tps)
+            LOGGER.log('FP', fps)
             LOGGER.log('TPR', tpr)
             LOGGER.log('TNR', tnr)
 

+ 1 - 4
TensorFlow/Segmentation/UNet_Industrial/utils/hooks/profiler_hook.py

@@ -210,10 +210,7 @@ class ProfilerHook(tf.train.SessionRunHook):
             (avg_processing_speed, total_processing_hours, total_processing_minutes, total_processing_seconds)
         )
 
-        perf_dict = {
-            'throughput': str(avg_processing_speed),
-            'processing_time': str(total_processing_time)
-        }
+        perf_dict = {'throughput': str(avg_processing_speed), 'processing_time': str(total_processing_time)}
 
         perf_filename = "performances_%s.json" % ("train" if self._is_training else "eval")
 

Algunos archivos no se mostraron porque demasiados archivos cambiaron en este cambio