Răsfoiți Sursa

Remove dllogger and fix bugs from GH

Signed-off-by: Pablo Ribalta <[email protected]>
Pablo Ribalta 6 ani în urmă
părinte
comite
70f3e4362c
46 a modificat fișierele cu 957 adăugiri și 1345 ștergeri
  1. 7 3
      TensorFlow/Segmentation/UNet_Industrial/Dockerfile
  2. 17 23
      TensorFlow/Segmentation/UNet_Industrial/README.md
  3. 0 2
      TensorFlow/Segmentation/UNet_Industrial/datasets/core.py
  4. 43 31
      TensorFlow/Segmentation/UNet_Industrial/datasets/dagm2007.py
  5. 0 22
      TensorFlow/Segmentation/UNet_Industrial/dllogger/README.md
  6. 0 19
      TensorFlow/Segmentation/UNet_Industrial/dllogger/dllogger/__init__.py
  7. 0 60
      TensorFlow/Segmentation/UNet_Industrial/dllogger/dllogger/autologging.py
  8. 0 531
      TensorFlow/Segmentation/UNet_Industrial/dllogger/dllogger/logger.py
  9. 0 255
      TensorFlow/Segmentation/UNet_Industrial/dllogger/dllogger/tags.py
  10. 0 151
      TensorFlow/Segmentation/UNet_Industrial/dllogger/dummy_run.py
  11. 0 37
      TensorFlow/Segmentation/UNet_Industrial/dllogger/setup.py
  12. 12 17
      TensorFlow/Segmentation/UNet_Industrial/main.py
  13. 2 4
      TensorFlow/Segmentation/UNet_Industrial/model/layers/utils.py
  14. 9 3
      TensorFlow/Segmentation/UNet_Industrial/model/unet.py
  15. 1 0
      TensorFlow/Segmentation/UNet_Industrial/requirements.txt
  16. 94 80
      TensorFlow/Segmentation/UNet_Industrial/runtime/runner.py
  17. 3 3
      TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_AMP_1GPU.sh
  18. 50 0
      TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_AMP_1GPU_XLA.sh
  19. 3 3
      TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_AMP_4GPU.sh
  20. 60 0
      TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_AMP_4GPU_XLA.sh
  21. 3 3
      TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_AMP_8GPU.sh
  22. 60 0
      TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_AMP_8GPU_XLA.sh
  23. 3 3
      TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_AMP_EVAL.sh
  24. 50 0
      TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_AMP_EVAL_XLA.sh
  25. 2 2
      TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_FP32_1GPU.sh
  26. 50 0
      TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_FP32_1GPU_XLA.sh
  27. 2 2
      TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_FP32_4GPU.sh
  28. 60 0
      TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_FP32_4GPU_XLA.sh
  29. 2 2
      TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_FP32_8GPU.sh
  30. 60 0
      TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_FP32_8GPU_XLA.sh
  31. 2 2
      TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_FP32_EVAL.sh
  32. 50 0
      TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_FP32_EVAL_XLA.sh
  33. 11 7
      TensorFlow/Segmentation/UNet_Industrial/scripts/benchmarking/DGX1v_evalbench_AMP.sh
  34. 11 7
      TensorFlow/Segmentation/UNet_Industrial/scripts/benchmarking/DGX1v_evalbench_FP32.sh
  35. 11 7
      TensorFlow/Segmentation/UNet_Industrial/scripts/benchmarking/DGX1v_trainbench_AMP_1GPU.sh
  36. 11 7
      TensorFlow/Segmentation/UNet_Industrial/scripts/benchmarking/DGX1v_trainbench_AMP_4GPU.sh
  37. 11 7
      TensorFlow/Segmentation/UNet_Industrial/scripts/benchmarking/DGX1v_trainbench_AMP_8GPU.sh
  38. 11 7
      TensorFlow/Segmentation/UNet_Industrial/scripts/benchmarking/DGX1v_trainbench_FP32_1GPU.sh
  39. 11 7
      TensorFlow/Segmentation/UNet_Industrial/scripts/benchmarking/DGX1v_trainbench_FP32_4GPU.sh
  40. 11 7
      TensorFlow/Segmentation/UNet_Industrial/scripts/benchmarking/DGX1v_trainbench_FP32_8GPU.sh
  41. 24 0
      TensorFlow/Segmentation/UNet_Industrial/scripts/launch_docker.sh
  42. 1 0
      TensorFlow/Segmentation/UNet_Industrial/utils/__init__.py
  43. 11 3
      TensorFlow/Segmentation/UNet_Industrial/utils/cmdline_helper.py
  44. 137 22
      TensorFlow/Segmentation/UNet_Industrial/utils/hooks/profiler_hook.py
  45. 1 6
      TensorFlow/Segmentation/UNet_Industrial/utils/hvd_utils.py
  46. 50 0
      TensorFlow/Segmentation/UNet_Industrial/utils/logging.py

+ 7 - 3
TensorFlow/Segmentation/UNet_Industrial/Dockerfile

@@ -16,11 +16,15 @@
 #
 # ==============================================================================
 
-FROM nvcr.io/nvidia/tensorflow:19.05-py3
+FROM nvcr.io/nvidia/tensorflow:20.01-tf1-py3
 
 LABEL version="1.0" maintainer="Jonathan DEKHTIAR <[email protected]>"
 
+WORKDIR /opt
+COPY requirements.txt /opt/requirements_unet_tf_industrial.txt
+
+RUN python -m pip --no-cache-dir --no-cache install --upgrade pip && \
+    pip --no-cache-dir --no-cache install -r /opt/requirements_unet_tf_industrial.txt
+
 ADD . /workspace/unet_industrial
 WORKDIR /workspace/unet_industrial
-
-RUN pip install dllogger/

+ 17 - 23
TensorFlow/Segmentation/UNet_Industrial/README.md

@@ -138,7 +138,7 @@ Aside from these dependencies, ensure you have the following components:
 
 * [NVIDIA Docker](https://github.com/NVIDIA/nvidia-docker)
 
-* [TensorFlow 19.03-py3 NGC container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorflow)
+* [TensorFlow 19.12-tf1-py3 NGC container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorflow)
 * (optional) NVIDIA Volta GPU (see section below) - for best training performance using mixed precision
 
 For more information about how to get started with NGC containers, see the
@@ -219,11 +219,6 @@ cd scripts/
 ./UNet_FP32_EVAL.sh <path to result repository> <path to dataset> <DAGM2007 classID (1-10)>
 ```
 
-If you wish to evaluate external checkpoint, make sure to put the TF ckpt files inside a folder named "checkpoints"
-and provide its parent path as `<path to result repository>` in the example above. 
-Be aware that the script will not fail if it does not find the checkpoint. 
-It will randomly initialize the weights and run performance tests.
-
 ## Advanced
 
 The following sections provide greater details of the dataset, running training and inference, and the training results.
@@ -374,7 +369,7 @@ The following sections provide details on the achieved results in training accur
 #### Training accuracy results
 
 Our results were obtained by running the `./scripts/UNet_{FP32, AMP}_{1, 4, 8}GPU.sh` training
-script in the Tensorflow:19.03-py3 NGC container on NVIDIA DGX-1 with 8x V100 16G GPUs.
+script in the Tensorflow:19.12-tf1-py3 NGC container on NVIDIA DGX-1 with 8x V100 16G GPUs.
 
 ##### Threshold = 0.75
 
@@ -481,30 +476,29 @@ script in the Tensorflow:19.03-py3 NGC container on NVIDIA DGX-1 with 8x V100 16
 <!-- Spreedsheet to Markdown: https://thisdavej.com/copy-table-in-excel-and-paste-as-a-markdown-table/ -->
 
 Our results were obtained by running the scripts
-`./scripts/benchmarking/DGX1v_trainbench_{FP16, FP32, FP32AMP, FP32FM}_{1, 4, 8}GPU.sh` training script in the
-TensorFlow 19.03-py3 NGC container on an NVIDIA DGX-1 with 8 V100 16G GPUs.
-
-
-| # GPUs | Precision                       | Throughput (Imgs/sec) | Training Time | Speedup |
-|--------|---------------------------------|-----------------------|---------------|---------|
-| 1      | FP32                            | 89                    | 7m44          | 1.00    |
-| 1      | Automatic Mixed Precision (AMP) | 104                   | 6m40          | 1.17    |
-| 4      | FP32                            | 261                   | 2m48          | 1.00    |
-| 4      | Automatic Mixed Precision (AMP) | 302                   | 2m27          | 1.16    |
-| 8      | FP32                            | 445                   | 1m44          | 1.00    |
-| 8      | Automatic Mixed Precision (AMP) | 491                   | 1m36          | 1.10    |
+`./scripts/benchmarking/DGX1v_trainbench_{FP32, AMP}_{1, 4, 8}GPU.sh` training script in the
+TensorFlow `19.12-tf1-py3` NGC container on an NVIDIA DGX-1 with 8 V100 16G GPUs.
+
+| # GPUs | Precision                       | Throughput (Imgs/sec) | AMP Speedup | Scaling efficiency |
+|--------|---------------------------------|-----------------------|-------------|--------------------|
+| 1      | FP32                            | 92                    | 1.00        | 1.00               |
+| 1      | Automatic Mixed Precision (AMP) | 167                   | 1.82        | 1.00               |
+| 4      | FP32                            | 299                   | 1.00        | 3.25               |
+| 4      | Automatic Mixed Precision (AMP) | 458                   | 1.53        | 2.74               |
+| 8      | FP32                            | 507                   | 1.00        | 5.51               |
+| 8      | Automatic Mixed Precision (AMP) | 561                   | 1.11        | 3.36               |
 
 To achieve these same results, follow the [Quick Start Guide](#quick-start-guide) outlined above.
 
 #### Inference performance results
 
-Our results were obtained by running the aforementioned scripts in the TensorFlow 
-19.03-py3 NGC container on an NVIDIA DGX-1 server with 8 V100 16G GPUs.
+Our results were obtained by running the scripts `./scripts/benchmarking/DGX1v_evalbench_{FP32, AMP}.sh`
+evaluation script in the `19.12-tf1-py3` NGC container on an NVIDIA DGX-1 server with 8 V100 16G GPUs.
 
 | # GPUs | Precision                       | Throughput (Imgs/sec) | Speedup |
 |--------|---------------------------------|-----------------------|---------|
-| 1      | FP32                            | 228                   | 1.00    |
-| 1      | Automatic Mixed Precision (AMP) | 301                   | 1.32    |
+| 1      | FP32                            | 306                   | 1.00    |
+| 1      | Automatic Mixed Precision (AMP) | 550                   | 1.80    |
 
 To achieve these same results, follow the [Quick Start Guide](#quick-start-guide) outlined above.
 

+ 0 - 2
TensorFlow/Segmentation/UNet_Industrial/datasets/core.py

@@ -19,8 +19,6 @@
 #
 # ==============================================================================
 
-from __future__ import print_function
-
 import os
 from abc import ABC, abstractmethod
 

+ 43 - 31
TensorFlow/Segmentation/UNet_Industrial/datasets/dagm2007.py

@@ -37,7 +37,7 @@ from datasets.core import BaseDataset
 
 from utils import hvd_utils
 
-from dllogger.logger import LOGGER
+from dllogger import Logger
 
 __all__ = ['DAGM2007_Dataset']
 
@@ -109,7 +109,21 @@ class DAGM2007_Dataset(BaseDataset):
 
         shuffle_buffer_size = 10000
 
-        def decode_csv(line):
+        image_dir, csv_file = self._get_data_dirs(training=training)
+
+        mask_image_dir = os.path.join(image_dir, "Label")
+
+        dataset = tf.data.TextLineDataset(csv_file)
+
+        dataset = dataset.skip(1)  # Skip CSV Header
+
+        if only_defective_images:
+            dataset = dataset.filter(lambda line: tf.not_equal(tf.strings.substr(line, -1, 1), "0"))
+
+        if hvd_utils.is_using_hvd() and training:
+            dataset = dataset.shard(hvd.size(), hvd.rank())
+
+        def _load_dagm_data(line):
 
             input_image_name, image_mask_name, label = tf.decode_csv(
                 line, record_defaults=[[""], [""], [0]], field_delim=','
@@ -156,10 +170,33 @@ class DAGM2007_Dataset(BaseDataset):
                 ),
             )
 
+            label = tf.cast(label, tf.int32)
+
+            return tf.data.Dataset.from_tensor_slices(([input_image], [mask_image], [label]))
+
+        dataset = dataset.apply(
+            tf.data.experimental.parallel_interleave(
+                _load_dagm_data,
+                cycle_length=batch_size*8,
+                block_length=4,
+                buffer_output_elements=batch_size*8
+            )
+        )
+
+        dataset = dataset.cache()
+
+        if training:
+            dataset = dataset.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=shuffle_buffer_size, seed=seed))
+
+        else:
+            dataset = dataset.repeat()
+
+        def _augment_data(input_image, mask_image, label):
+
             if augment_data:
 
-                if not hvd_utils.is_using_hvd() or hvd.local_rank() == 0:
-                    LOGGER.log("Using data augmentation ...")
+                if not hvd_utils.is_using_hvd() or hvd.rank() == 0:
+                    print("Using data augmentation ...")
 
                 #input_image = tf.image.per_image_standardization(input_image)
 
@@ -173,36 +210,11 @@ class DAGM2007_Dataset(BaseDataset):
                 input_image = tf.image.rot90(input_image, k=n_rots)
                 mask_image = tf.image.rot90(mask_image, k=n_rots)
 
-            label = tf.cast(label, tf.int32)
-
             return (input_image, mask_image), label
 
-        image_dir, csv_file = self._get_data_dirs(training=training)
-
-        mask_image_dir = os.path.join(image_dir, "Label")
-
-        dataset = tf.data.TextLineDataset(csv_file)
-
-        dataset = dataset.skip(1)  # Skip CSV Header
-
-        if only_defective_images:
-            dataset = dataset.filter(lambda line: tf.not_equal(tf.strings.substr(line, -1, 1), "0"))
-
-        dataset = dataset.cache()
-
-        if training:
-
-            dataset = dataset.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=shuffle_buffer_size, seed=seed))
-
-            if hvd_utils.is_using_hvd():
-                dataset = dataset.shard(hvd.size(), hvd.rank())
-
-        else:
-            dataset = dataset.repeat()
-
         dataset = dataset.apply(
             tf.data.experimental.map_and_batch(
-                map_func=decode_csv,
+                map_func=_augment_data,
                 num_parallel_calls=num_threads,
                 batch_size=batch_size,
                 drop_remainder=True,
@@ -212,7 +224,7 @@ class DAGM2007_Dataset(BaseDataset):
         dataset = dataset.prefetch(buffer_size=tf.contrib.data.AUTOTUNE)
 
         if use_gpu_prefetch:
-            dataset.apply(tf.data.experimental.prefetch_to_device(device="/gpu:0", buffer_size=batch_size * 8))
+            dataset.apply(tf.data.experimental.prefetch_to_device(device="/gpu:0", buffer_size=4))
 
         return dataset
 

+ 0 - 22
TensorFlow/Segmentation/UNet_Industrial/dllogger/README.md

@@ -1,22 +0,0 @@
-# Tools for logging DL training
-DLLogger is a tool to generate logs during Deep Learning training.
-
-## Installation
-```
-git clone https://gitlab-master.nvidia.com/dl/JoC/DLLogger.git
-pip install DLLogger/.
-```
-
-## Usage
-You can use DLLogger with the simplest `LOGGER.log()` API:
-```
-from logger.logger import LOGGER
-from logger import tags
-
-LOGGER.model = 'ResNet'
-LOGGER.log(key=tags.INPUT_BATCH_SIZE, value=128)
-```
-For the more advanced usage, please refer to the `dummy_run.py` example.
-
-## Tags
-All available tags are listed in the `logger/tags.py` file.

+ 0 - 19
TensorFlow/Segmentation/UNet_Industrial/dllogger/dllogger/__init__.py

@@ -1,19 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-from .logger import LOGGER, StdOutBackend, MLPerfBackend, JsonBackend, CompactBackend, Scope, AverageMeter, StandardMeter
-from . import tags
-
-__all__ = [LOGGER, StdOutBackend, MLPerfBackend, JsonBackend, CompactBackend, Scope, AverageMeter, StandardMeter, tags]

+ 0 - 60
TensorFlow/Segmentation/UNet_Industrial/dllogger/dllogger/autologging.py

@@ -1,60 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Common values reported
-
-
-import subprocess
-import xml.etree.ElementTree as ET
-
-#TODO: print CUDA version, container version etc
-
-def log_hardware(logger):
-    # TODO: asserts - what if you cannot launch those commands?
-    # number of CPU threads
-    cpu_info_command = 'cat /proc/cpuinfo'
-    cpu_info = subprocess.run(cpu_info_command.split(), stdout=subprocess.PIPE).stdout.split()
-    cpu_num_index = len(cpu_info) - cpu_info[::-1].index(b'processor') + 1
-    cpu_num = int(cpu_info[cpu_num_index]) + 1
-
-    # CPU name
-    cpu_name_begin_index = cpu_info.index(b'name')
-    cpu_name_end_index = cpu_info.index(b'stepping')
-    cpu_name = b' '.join(cpu_info[cpu_name_begin_index + 2:cpu_name_end_index]).decode('utf-8')
-
-    logger.log(key='cpu_info', value={"num": cpu_num, "name": cpu_name})
-
-    # RAM memory
-    ram_info_command = 'free -m -h'
-    ram_info = subprocess.run(ram_info_command.split(), stdout=subprocess.PIPE).stdout.split()
-    ram_index = ram_info.index(b'Mem:') + 1
-    ram = ram_info[ram_index].decode('utf-8')
-
-    logger.log(key='mem_info', value={"ram": ram})
-
-    # GPU
-    nvidia_smi_command = 'nvidia-smi -q -x'
-    nvidia_smi_output = subprocess.run(nvidia_smi_command.split(), stdout=subprocess.PIPE).stdout
-    nvidia_smi = ET.fromstring(nvidia_smi_output)
-    gpus = nvidia_smi.findall('gpu')
-    ver = nvidia_smi.findall('driver_version')
-
-    logger.log(key="gpu_info",
-                 value={
-                      "driver_version": ver[0].text,
-                      "num": len(gpus),
-                      "name": [g.find('product_name').text for g in gpus],
-                      "mem": [g.find('fb_memory_usage').find('total').text for g in gpus]})
-
-def log_args(logger, args):
-    logger.log(key='args', value=vars(args))

+ 0 - 531
TensorFlow/Segmentation/UNet_Industrial/dllogger/dllogger/logger.py

@@ -1,531 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import time
-import json
-import logging
-import inspect
-import sys
-from contextlib import contextmanager
-import functools
-from collections import OrderedDict
-import datetime
-
-from . import autologging
-
-NVLOGGER_NAME = 'nv_dl_logger'
-NVLOGGER_VERSION = '0.3.1'
-NVLOGGER_TOKEN = ':::NVLOG'
-
-MLPERF_NAME = 'mlperf_logger'
-MLPERF_VERSION = '0.5.0'
-MLPERF_TOKEN = ':::MLP'
-
-COMPACT_NAME = 'compact_logger'
-
-DEFAULT_JSON_FILENAME = 'nvlog.json'
-
-class Scope:
-    RUN = 0
-    EPOCH = 1
-    TRAIN_ITER = 2
-
-
-class Level:
-    CRITICAL = 5
-    ERROR = 4
-    WARNING = 3
-    INFO = 2
-    DEBUG = 1
-
-
-_data = OrderedDict([
-    ('model', None),
-    ('epoch', -1),
-    ('iteration', -1),
-    ('total_iteration', -1),
-    ('metrics', OrderedDict()),
-    ('timed_blocks', OrderedDict()),
-    ('current_scope', Scope.RUN)
-    ])
-
-def get_caller(root_dir=None):
-    stack_files = [s.filename.split('/')[-1] for s in inspect.stack()]
-    stack_index = 0
-    while stack_index < len(stack_files) and stack_files[stack_index] != 'logger.py':
-        stack_index += 1
-
-    while (stack_index < len(stack_files) and 
-            stack_files[stack_index] in ['logger.py', 'autologging.py', 'contextlib.py']):
-        stack_index += 1
-
-    while True:
-        try:
-            caller_line = inspect.stack()[stack_index].lineno
-            caller_file = stack_files[stack_index]
-            break
-        except IndexError:
-            stack_index -= 1
-
-        if stack_index < 0:
-            caller_line = 0
-            caller_file = "Unknown Calling File"
-            break
-
-    return "%s:%d" % (caller_file, caller_line)
-
-class StandardMeter(object):
-
-    def __init__(self):
-        self.reset()
-
-    def reset(self):
-        self.value = None
-
-    def record(self, value):
-        self.value = value
-
-    def get_value(self):
-        return self.value
-
-    def get_last(self):
-        return self.value
-
-class AverageMeter(object):
-
-    def __init__(self):
-        self.reset()
-
-    def reset(self):
-        self.count = 0
-        self.value = 0
-        self.last = 0
-
-    def record(self, value, n = 1):
-        self.last = value
-        self.count += n
-        self.value += value * n
-
-    def get_value(self):
-        return self.value / self.count
-
-    def get_last(self):
-        return self.last
-
-class JsonBackend(object):
-
-    def __init__(self, log_file=DEFAULT_JSON_FILENAME, logging_scope=Scope.TRAIN_ITER,
-            iteration_interval=1):
-        self.log_file = log_file
-        self.logging_scope = logging_scope
-        self.iteration_interval = iteration_interval
-
-        self.json_log = OrderedDict([
-            ('run', OrderedDict()),
-            ('epoch', OrderedDict()),
-            ('iter', OrderedDict()),
-            ('event', OrderedDict()),
-            ])
-        
-        self.json_log['epoch']['x'] = []
-        if self.logging_scope == Scope.TRAIN_ITER:
-            self.json_log['iter']['x'] = [[]]
-
-    def register_metric(self, key, metric_scope):
-        if (metric_scope == Scope.TRAIN_ITER and
-                self.logging_scope == Scope.TRAIN_ITER):
-            if not key in self.json_log['iter'].keys():
-                self.json_log['iter'][key] = [[]]
-        if metric_scope == Scope.EPOCH:
-            if not key in self.json_log['epoch'].keys():
-                self.json_log['epoch'][key] = []
-
-    def log(self, key, value):
-        if _data['current_scope'] == Scope.RUN:
-            self.json_log['run'][key] = value
-        elif _data['current_scope'] == Scope.EPOCH: 
-            pass
-        elif _data['current_scope'] == Scope.TRAIN_ITER:
-            pass
-        else:
-            raise ValueError('log function for scope "', _data['current_scope'], 
-                    '" not implemented')
-
-    def log_event(self, key, value):
-        if not key in self.json_log['event'].keys():
-            self.json_log['event'][key] = []
-        entry = OrderedDict()
-        entry['epoch'] = _data['epoch']
-        entry['iter'] = _data['iteration']
-        entry['timestamp'] = time.time()
-        if value:
-            entry['value'] = value
-        self.json_log['event'][key].append(str(entry))
-
-    def log_iteration_summary(self):
-        if (self.logging_scope == Scope.TRAIN_ITER and 
-                _data['total_iteration'] % self.iteration_interval == 0):
-            for key, m in _data['metrics'].items():
-                if m.metric_scope == Scope.TRAIN_ITER:
-                    self.json_log['iter'][key][-1].append(str(m.get_last()))
-
-            # log x for iteration number
-            self.json_log['iter']['x'][-1].append(_data['iteration'])
-
-
-    def dump_json(self):
-        if self.log_file is None:
-            print(json.dumps(self.json_log, indent=4))
-        else:
-            with open(self.log_file, 'w') as f:
-                json.dump(self.json_log, fp=f, indent=4)
-
-    def log_epoch_summary(self):
-        for key, m in _data['metrics'].items():
-            if m.metric_scope == Scope.EPOCH:
-                self.json_log['epoch'][key].append(str(m.get_value()))
-            elif (m.metric_scope == Scope.TRAIN_ITER and 
-                    self.logging_scope == Scope.TRAIN_ITER):
-                # create new sublists for each iter metric in the next epoch
-                self.json_log['iter'][key].append([])
-        
-        # log x for epoch number
-        self.json_log['epoch']['x'].append(_data['epoch'])
-
-        # create new sublist for iter's x in the next epoch
-        if self.logging_scope == Scope.TRAIN_ITER:
-            self.json_log['iter']['x'].append([])
-
-        self.dump_json()
-
-    def timed_block_start(self, name):
-        pass
-
-    def timed_block_stop(self, name):
-        pass
-
-    def finish(self):
-        self.dump_json()
-
-class _ParentStdOutBackend(object):
-
-    def __init__(self, name, token, version, log_file, logging_scope, iteration_interval):
-
-        self.root_dir = None
-        self.worker = [0]
-        self.prefix = ''
-
-        self.name = name
-        self.token = token
-        self.version = version
-        self.log_file = log_file
-        self.logging_scope = logging_scope
-        self.iteration_interval = iteration_interval
-
-        self.logger = logging.getLogger(self.name)
-        self.logger.setLevel(logging.DEBUG)
-        self.logger.handlers = []
-
-        if (self.log_file is None):
-            self.stream_handler = logging.StreamHandler(stream=sys.stdout)
-            self.stream_handler.setLevel(logging.DEBUG)
-            self.logger.addHandler(self.stream_handler)
-        else:
-            self.file_handler = logging.FileHandler(self.log_file, mode='w')
-            self.file_handler.setLevel(logging.DEBUG)
-            self.logger.addHandler(self.file_handler)
-
-    def register_metric(self, key, meter=None, metric_scope=Scope.EPOCH):
-        pass
-
-    def log_epoch_summary(self):
-        pass
-
-    def log_iteration_summary(self):
-        pass
-
-    def log(self, key, value):
-        if _data['current_scope'] > self.logging_scope:
-            pass
-        elif (_data['current_scope'] == Scope.TRAIN_ITER and 
-                _data['total_iteration'] % self.iteration_interval != 0):
-            pass
-        else:
-            self.log_stdout(key, value)
-
-    def log_event(self, key, value):
-        self.log_stdout(key, value)
-        
-    def log_stdout(self, key, value=None, forced=False):
-        # TODO: worker 0 
-        # only the 0-worker will log
-        #if not forced and self.worker != 0:
-        #    pass
-
-        if value is None:
-            msg = key
-        else:
-            str_json = json.dumps(str(value))
-            msg = '{key}: {value}'.format(key=key, value=str_json)
-
-        call_site = get_caller(root_dir=self.root_dir)
-        now = time.time()
-
-        message = '{prefix}{token}v{ver} {model} {secs:.9f} ({call_site}) {msg}'.format(
-            prefix=self.prefix, token=self.token, ver=self.version, secs=now, 
-            model=_data['model'],
-            call_site=call_site, msg=msg)
-
-        self.logger.debug(message)
-
-    def timed_block_start(self, name):
-        self.log_stdout(key=name + "_start")
-
-    def timed_block_stop(self, name):
-        self.log_stdout(key=name + "_stop")
-
-    def finish(self):
-        pass
-
-class StdOutBackend(_ParentStdOutBackend):
-
-    def __init__(self, log_file=None, logging_scope=Scope.TRAIN_ITER, iteration_interval=1):
-        _ParentStdOutBackend.__init__(self, name=NVLOGGER_NAME, token=NVLOGGER_TOKEN, 
-                version=NVLOGGER_VERSION, log_file=log_file, logging_scope=logging_scope, 
-                iteration_interval=iteration_interval)
-        
-class MLPerfBackend(_ParentStdOutBackend):
-
-    def __init__(self, log_file=None, logging_scope=Scope.TRAIN_ITER, iteration_interval=1):
-        _ParentStdOutBackend.__init__(self, name=MLPERF_NAME, token=MLPERF_TOKEN, 
-                version=MLPERF_VERSION, log_file=log_file, logging_scope=logging_scope, 
-                iteration_interval=iteration_interval)
-
-class CompactBackend(object):
-
-    def __init__(self, log_file=None, logging_scope=Scope.TRAIN_ITER, iteration_interval=1):
-        self.log_file = log_file
-        self.logging_scope = logging_scope
-        self.iteration_interval = iteration_interval
-
-        self.logger = logging.getLogger(COMPACT_NAME)
-        self.logger.setLevel(logging.DEBUG)
-        self.logger.handlers = []
-
-        if (self.log_file is None):
-            self.stream_handler = logging.StreamHandler(stream=sys.stdout)
-            self.stream_handler.setLevel(logging.DEBUG)
-            self.logger.addHandler(self.stream_handler)
-        else:
-            self.file_handler = logging.FileHandler(self.log_file, mode='w')
-            self.file_handler.setLevel(logging.DEBUG)
-            self.logger.addHandler(self.file_handler)
-    
-    def register_metric(self, key, meter=None, metric_scope=Scope.EPOCH):
-        pass
-    
-    def timestamp_prefix(self):
-        return datetime.datetime.now().strftime('[%Y-%m-%d %H:%M:%S]')
-
-    def log(self, key, value):
-        if _data['current_scope'] == Scope.RUN:
-            self.log_event(key, value)
-    
-    def log_event(self, key, value):
-        msg = self.timestamp_prefix() + ' ' + str(key)
-        if value is not None:
-            msg += ": " + str(value)
-        self.logger.debug(msg)
-    
-    def log_epoch_summary(self):
-        if self.logging_scope >= Scope.EPOCH:
-            summary = self.timestamp_prefix() + ' Epoch {:<4} '.format(str(_data['epoch']) + ':')
-            for key, m in _data['metrics'].items():
-                if m.metric_scope >= Scope.EPOCH:
-                    summary += str(key) + ": " + str(m.get_value()) + ", "
-            self.logger.debug(summary)
-
-    def log_iteration_summary(self):
-        if self.logging_scope >= Scope.TRAIN_ITER and _data['total_iteration'] % self.iteration_interval == 0:
-            summary = self.timestamp_prefix() + ' Iter {:<5} '.format(str(_data['iteration']) + ':')
-            for key, m in _data['metrics'].items():
-                if m.metric_scope == Scope.TRAIN_ITER:
-                    summary += str(key) + ": " + str(m.get_last()) + ", "
-            self.logger.debug(summary)
- 
-    def timed_block_start(self, name):
-        pass
-
-    def timed_block_stop(self, name):
-        pass
-
-    def finish(self):
-        pass
-
-class _Logger(object):
-    def __init__(self):
-
-        self.backends = [
-                CompactBackend(),
-                JsonBackend()
-                ]
-
-        self.level = Level.INFO
-   
-    def set_model_name(self, name):
-        _data['model'] = name
-
-
-    def set_backends(self, backends):
-        self.backends = backends
-        
-    def register_metric(self, key, meter=None, metric_scope=Scope.EPOCH):
-        if meter is None:
-            meter = StandardMeter()
-        #TODO: move to argument of Meter?
-        meter.metric_scope = metric_scope
-        _data['metrics'][key] = meter
-        for b in self.backends:
-            b.register_metric(key, metric_scope)
-
-    def log(self, key, value=None, forced=False, level=Level.INFO):
-        if level < self.level:
-            return
-
-        if _data['current_scope'] == Scope.TRAIN_ITER or _data['current_scope'] == Scope.EPOCH:
-            if key in _data['metrics'].keys():
-                if _data['metrics'][key].metric_scope == _data['current_scope']:
-                    _data['metrics'][key].record(value)
-        for b in self.backends:
-            b.log(key, value)
-
-    def debug(self, *args, **kwargs):
-        self.log(*args, level=Level.DEBUG, **kwargs)
-
-    def info(self, *args, **kwargs):
-        self.log(*args, level=Level.INFO, **kwargs)
-
-    def warning(self, *args, **kwargs):
-        self.log(*args, level=Level.WARNING, **kwargs)
-
-    def error(self, *args, **kwargs):
-        self.log(*args, level=Level.ERROR, **kwargs)
-
-    def critical(self, *args, **kwargs):
-        self.log(*args, level=Level.CRITICAL, **kwargs)
-
-    def log_event(self, key, value=None):
-        for b in self.backends:
-            b.log_event(key, value)
-    
-    def timed_block_start(self, name):
-        if not name in _data['timed_blocks']:
-            _data['timed_blocks'][name] = OrderedDict()
-        _data['timed_blocks'][name]['start'] = time.time()
-        for b in self.backends:
-            b.timed_block_start(name)
-    
-    def timed_block_stop(self, name):
-        if not name in _data['timed_blocks']:
-            raise ValueError('timed_block_stop called before timed_block_start for ' + name)
-        _data['timed_blocks'][name]['stop'] = time.time()
-        delta = _data['timed_blocks'][name]['stop'] - _data['timed_blocks'][name]['start']
-        self.log(name + '_time', delta)
-        for b in self.backends:
-            b.timed_block_stop(name)
-
-    def iteration_start(self):
-        _data['current_scope'] = Scope.TRAIN_ITER
-        _data['iteration'] += 1
-        _data['total_iteration'] += 1
-
-
-    def iteration_stop(self):
-        for b in self.backends:
-            b.log_iteration_summary()
-        _data['current_scope'] = Scope.EPOCH
-
-    def epoch_start(self):
-        _data['current_scope'] = Scope.EPOCH 
-        _data['epoch'] += 1
-        _data['iteration'] = -1
-
-        for n, m in _data['metrics'].items():
-            if m.metric_scope == Scope.TRAIN_ITER:
-                m.reset()
-
-    def epoch_stop(self):
-        for b in self.backends:
-            b.log_epoch_summary()
-        _data['current_scope'] = Scope.RUN
-
-    def finish(self):
-        for b in self.backends:
-            b.finish()
-
-    def iteration_generator_wrapper(self, gen):
-        for g in gen:
-            self.iteration_start()
-            yield g
-            self.iteration_stop()
-
-    def epoch_generator_wrapper(self, gen):
-        for g in gen:
-            self.epoch_start()
-            yield g
-            self.epoch_stop()
-
-    @contextmanager
-    def timed_block(self, prefix, value=None, forced=False):
-        """ This function helps with timed blocks
-            ----
-            Parameters:
-            prefix - one of items from TIMED_BLOCKS; the action to be timed
-            logger - NVLogger object
-            forced - if True then the events are always logged (even if it should be skipped)
-        """
-        self.timed_block_start(prefix)
-        yield self
-        self.timed_block_stop(prefix)
-
-    def log_hardware(self):
-        autologging.log_hardware(self)
-
-    def log_args(self, args):
-        autologging.log_args(self, args)
-
-    def timed_function(self, prefix, variable=None, forced=False):
-        """ This decorator helps with timed functions
-            ----
-            Parameters:
-            prefix - one of items from TIME_BLOCK; the action to be timed
-            logger - NVLogger object
-            forced - if True then the events are always logged (even if it should be skipped)
-        """
-
-        def timed_function_decorator(func):
-            @functools.wraps(func)
-            def wrapper(*args, **kwargs):
-                value = kwargs.get(variable, next(iter(args), None))
-                with self.timed_block(prefix=prefix, value=value, forced=forced):
-                    func(*args, **kwargs)
-
-            return wrapper
-
-        return timed_function_decorator
-
-
-LOGGER = _Logger()
-

+ 0 - 255
TensorFlow/Segmentation/UNet_Industrial/dllogger/dllogger/tags.py

@@ -1,255 +0,0 @@
-# Copyright 2018 MLBenchmark Group. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Common values reported
-
-VALUE_EPOCH = "epoch"
-VALUE_ITERATION = "iteration"
-VALUE_ACCURACY = "accuracy"
-VALUE_BLEU = "bleu"
-VALUE_TOP1 = "top1"
-VALUE_TOP5 = "top5"
-VALUE_BBOX_MAP = "bbox_map"
-VALUE_MASK_MAP = "mask_map"
-VALUE_BCE = "binary_cross_entropy"
-
-
-# Timed blocks (used with timed_function & timed_block
-# For each there should be *_start and *_stop tags defined
-
-RUN_BLOCK = "run"
-SETUP_BLOCK = "setup"
-PREPROC_BLOCK = "preproc"
-
-TRAIN_BLOCK = "train"
-TRAIN_PREPROC_BLOCK = "train_preproc"
-TRAIN_EPOCH_BLOCK = "train_epoch"
-TRAIN_EPOCH_PREPROC_BLOCK = "train_epoch_preproc"
-TRAIN_CHECKPOINT_BLOCK = "train_checkpoint"
-TRAIN_ITER_BLOCK = "train_iteration"
-
-EVAL_BLOCK = "eval"
-EVAL_ITER_BLOCK = "eval_iteration"
-
-#TODO: to remove?
-TIMED_BLOCKS = {
-    RUN_BLOCK,
-    SETUP_BLOCK,
-    PREPROC_BLOCK,
-    TRAIN_BLOCK,
-    TRAIN_PREPROC_BLOCK,
-    TRAIN_EPOCH_BLOCK,
-    TRAIN_EPOCH_PREPROC_BLOCK,
-    TRAIN_CHECKPOINT_BLOCK,
-    TRAIN_ITER_BLOCK,
-    EVAL_BLOCK,
-    EVAL_ITER_BLOCK,
-}
-
-
-# Events
-
-RUN_INIT = "run_init"
-
-SETUP_START = "setup_start"
-SETUP_STOP = "setup_stop"
-
-PREPROC_START = "preproc_start"
-PREPROC_STOP = "preproc_stop"
-
-RUN_START = "run_start"
-RUN_STOP = "run_stop"
-RUN_FINAL = "run_final"
-
-TRAIN_CHECKPOINT_START = "train_checkpoint_start"
-TRAIN_CHECKPOINT_STOP = "train_checkpoint_stop"
-
-TRAIN_PREPROC_START = "train_preproc_start"
-TRAIN_PREPROC_STOP = "train_preproc_stop"
-
-TRAIN_EPOCH_PREPROC_START = "train_epoch_preproc_start"
-TRAIN_EPOCH_PREPROC_STOP = "train_epoch_preproc_stop"
-
-TRAIN_ITER_START = "train_iter_start"
-TRAIN_ITER_STOP = "train_iter_stop"
-
-TRAIN_EPOCH_START = "train_epoch_start"
-TRAIN_EPOCH_STOP = "train_epoch_stop"
-
-
-# MLPerf specific tags
-
-RUN_CLEAR_CACHES = "run_clear_caches"
-
-PREPROC_NUM_TRAIN_EXAMPLES = "preproc_num_train_examples"
-PREPROC_NUM_EVAL_EXAMPLES = "preproc_num_eval_examples"
-PREPROC_TOKENIZE_TRAINING = "preproc_tokenize_training"
-PREPROC_TOKENIZE_EVAL = "preproc_tokenize_eval"
-PREPROC_VOCAB_SIZE = "preproc_vocab_size"
-
-RUN_SET_RANDOM_SEED = "run_set_random_seed"
-
-INPUT_SIZE = "input_size"
-INPUT_BATCH_SIZE = "input_batch_size"
-INPUT_ORDER = "input_order"
-INPUT_SHARD = "input_shard"
-INPUT_BN_SPAN = "input_bn_span"
-
-INPUT_CENTRAL_CROP = "input_central_crop"
-INPUT_CROP_USES_BBOXES = "input_crop_uses_bboxes"
-INPUT_DISTORTED_CROP_MIN_OBJ_COV = "input_distorted_crop_min_object_covered"
-INPUT_DISTORTED_CROP_RATIO_RANGE = "input_distorted_crop_aspect_ratio_range"
-INPUT_DISTORTED_CROP_AREA_RANGE = "input_distorted_crop_area_range"
-INPUT_DISTORTED_CROP_MAX_ATTEMPTS = "input_distorted_crop_max_attempts"
-INPUT_MEAN_SUBTRACTION = "input_mean_subtraction"
-INPUT_RANDOM_FLIP = "input_random_flip"
-
-INPUT_RESIZE = "input_resize"
-INPUT_RESIZE_ASPECT_PRESERVING = "input_resize_aspect_preserving"
-
-
-# Opt
-
-OPT_NAME = "opt_name"
-
-OPT_LR = "opt_learning_rate"
-OPT_MOMENTUM = "opt_momentum"
-
-OPT_WEIGHT_DECAY = "opt_weight_decay"
-
-OPT_HP_ADAM_BETA1 = "opt_hp_Adam_beta1"
-OPT_HP_ADAM_BETA2 = "opt_hp_Adam_beta2"
-OPT_HP_ADAM_EPSILON = "opt_hp_Adam_epsilon"
-
-OPT_LR_WARMUP_STEPS = "opt_learning_rate_warmup_steps"
-
-
-#  Train
-
-TRAIN_LOOP = "train_loop"
-TRAIN_EPOCH = "train_epoch"
-TRAIN_CHECKPOINT = "train_checkpoint"
-TRAIN_LOSS = "train_loss"
-TRAIN_ITERATION_LOSS = "train_iteration_loss"
-
-
-# Eval
-
-EVAL_START = "eval_start"
-EVAL_SIZE = "eval_size"
-EVAL_TARGET = "eval_target"
-EVAL_ACCURACY = "eval_accuracy"
-EVAL_STOP = "eval_stop"
-
-
-# Perf
-
-PERF_IT_PER_SEC = "perf_it_per_sec"
-PERF_TIME_TO_TRAIN = "time_to_train"
-
-EVAL_ITERATION_ACCURACY = "eval_iteration_accuracy"
-
-
-# Model
-
-MODEL_HP_LOSS_FN = "model_hp_loss_fn"
-
-MODEL_HP_INITIAL_SHAPE = "model_hp_initial_shape"
-MODEL_HP_FINAL_SHAPE = "model_hp_final_shape"
-
-MODEL_L2_REGULARIZATION = "model_l2_regularization"
-MODEL_EXCLUDE_BN_FROM_L2 = "model_exclude_bn_from_l2"
-
-MODEL_HP_RELU = "model_hp_relu"
-MODEL_HP_CONV2D_FIXED_PADDING = "model_hp_conv2d_fixed_padding"
-MODEL_HP_BATCH_NORM = "model_hp_batch_norm"
-MODEL_HP_DENSE = "model_hp_dense"
-
-
-# GNMT specific
-
-MODEL_HP_LOSS_SMOOTHING = "model_hp_loss_smoothing"
-MODEL_HP_NUM_LAYERS = "model_hp_num_layers"
-MODEL_HP_HIDDEN_SIZE = "model_hp_hidden_size"
-MODEL_HP_DROPOUT = "model_hp_dropout"
-
-EVAL_HP_BEAM_SIZE = "eval_hp_beam_size"
-TRAIN_HP_MAX_SEQ_LEN = "train_hp_max_sequence_length"
-EVAL_HP_MAX_SEQ_LEN = "eval_hp_max_sequence_length"
-EVAL_HP_LEN_NORM_CONST = "eval_hp_length_normalization_constant"
-EVAL_HP_LEN_NORM_FACTOR = "eval_hp_length_normalization_factor"
-EVAL_HP_COV_PENALTY_FACTOR = "eval_hp_coverage_penalty_factor"
-
-
-# NCF specific
-
-PREPROC_HP_MIN_RATINGS = "preproc_hp_min_ratings"
-PREPROC_HP_NUM_EVAL = "preproc_hp_num_eval"
-PREPROC_HP_SAMPLE_EVAL_REPLACEMENT = "preproc_hp_sample_eval_replacement"
-
-INPUT_HP_NUM_NEG = "input_hp_num_neg"
-INPUT_HP_SAMPLE_TRAIN_REPLACEMENT = "input_hp_sample_train_replacement"
-INPUT_STEP_TRAIN_NEG_GEN = "input_step_train_neg_gen"
-INPUT_STEP_EVAL_NEG_GEN = "input_step_eval_neg_gen"
-
-EVAL_HP_NUM_USERS = "eval_hp_num_users"
-EVAL_HP_NUM_NEG = "eval_hp_num_neg"
-
-MODEL_HP_MF_DIM = "model_hp_mf_dim"
-MODEL_HP_MLP_LAYER_SIZES = "model_hp_mlp_layer_sizes"
-
-
-# RESNET specific
-
-EVAL_EPOCH_OFFSET = "eval_offset"
-
-MODEL_HP_INITIAL_MAX_POOL = "model_hp_initial_max_pool"
-MODEL_HP_BEGIN_BLOCK = "model_hp_begin_block"
-MODEL_HP_END_BLOCK = "model_hp_end_block"
-MODEL_HP_BLOCK_TYPE = "model_hp_block_type"
-MODEL_HP_PROJECTION_SHORTCUT = "model_hp_projection_shortcut"
-MODEL_HP_SHORTCUT_ADD = "model_hp_shorcut_add"
-MODEL_HP_RESNET_TOPOLOGY = "model_hp_resnet_topology"
-
-
-# Transformer specific
-
-INPUT_MAX_LENGTH = "input_max_length"
-
-MODEL_HP_INITIALIZER_GAIN = "model_hp_initializer_gain"
-MODEL_HP_VOCAB_SIZE = "model_hp_vocab_size"
-MODEL_HP_NUM_HIDDEN_LAYERS = "model_hp_hidden_layers"
-MODEL_HP_EMBEDDING_SHARED_WEIGHTS = "model_hp_embedding_shared_weights"
-MODEL_HP_ATTENTION_DENSE = "model_hp_attention_dense"
-MODEL_HP_ATTENTION_DROPOUT = "model_hp_attention_dropout"
-MODEL_HP_FFN_OUTPUT_DENSE = "model_hp_ffn_output_dense"
-MODEL_HP_FFN_FILTER_DENSE = "model_hp_ffn_filter_dense"
-MODEL_HP_RELU_DROPOUT = "model_hp_relu_dropout"
-MODEL_HP_LAYER_POSTPROCESS_DROPOUT = "model_hp_layer_postprocess_dropout"
-MODEL_HP_NORM = "model_hp_norm"
-MODEL_HP_SEQ_BEAM_SEARCH = "model_hp_sequence_beam_search"
-

+ 0 - 151
TensorFlow/Segmentation/UNet_Industrial/dllogger/dummy_run.py

@@ -1,151 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-from dllogger import LOGGER, CompactBackend, StdOutBackend, MLPerfBackend, JsonBackend, Scope, AverageMeter, tags
-from argparse import ArgumentParser
-import random
-
[email protected]_function("train")
-def train():
-    for i in range(0, 10):
-    #for i in LOGGER.epoch_generator_wrapper(range(0, 10)):
-        LOGGER.epoch_start()
-        LOGGER.log("epoch_nr", i)
-        LOGGER.log("epochs2", 2 * i)
-        train_epoch(i)
-        LOGGER.epoch_stop()
-
-
[email protected]_function("train_epoch", "epoch")
-def train_epoch(epoch):
-    for i in range(epoch*30, (epoch+1)*30, 2):
-    #for i in LOGGER.iteration_generator_wrapper(range(epoch*10, (epoch+1)*10, 2)):
-        LOGGER.iteration_start()
-        LOGGER.log("loss", i*epoch)
-        LOGGER.iteration_stop()
-    if epoch % 3 == 1:
-        with LOGGER.timed_block("eval"):
-            LOGGER.log("accuracy", i * epoch)
-            LOGGER.log_event(key="ep divisible by 3", value=epoch)
-
-
-def main():
-    LOGGER.set_model_name('ResNet')
-    LOGGER.set_backends([
-            StdOutBackend(log_file='std.out',
-                logging_scope=Scope.TRAIN_ITER),
-            CompactBackend(log_file=None,
-                logging_scope=Scope.TRAIN_ITER, iteration_interval=5),
-            JsonBackend(log_file='dummy.json',
-                logging_scope=Scope.TRAIN_ITER, iteration_interval=4)
-            ])
-
-    parser = ArgumentParser()
-    parser.add_argument('--dummy', type=str, default='default_dummy_value')
-    args = parser.parse_args()
-
-    LOGGER.log_hardware()
-    LOGGER.log_args(args)
-
-    LOGGER.log(tags.RUN_INIT)
-    LOGGER.register_metric('loss', meter=AverageMeter(), metric_scope=Scope.TRAIN_ITER)
-    LOGGER.register_metric('epoch_nr', metric_scope=Scope.EPOCH)
-    LOGGER.register_metric('epochs2')
-
-    with LOGGER.timed_block(tags.SETUP_BLOCK):
-        print("This is setup.")
-
-    with LOGGER.timed_block(tags.PREPROC_BLOCK):
-        print("This is preprocessing.")
-
-    with LOGGER.timed_block(tags.RUN_BLOCK):
-        print("This is run.")
-        train()
-        print("This is the end.")
-
-    LOGGER.log(tags.RUN_FINAL)
-
-    LOGGER.finish()
-
-def main2():
-    LOGGER.set_backends([
-            CompactBackend(log_file=None,
-                logging_scope=Scope.TRAIN_ITER),
-            StdOutBackend(log_file='std.out',
-                logging_scope=Scope.EPOCH, iteration_interval=4),
-            JsonBackend(log_file='dummy.json',
-                logging_scope=Scope.TRAIN_ITER, iteration_interval=1)
-            ])
-    LOGGER.log_hardware()
-
-    data_x = range(0,10)
-    data_y = [3.*x + 2. for x in data_x]
-
-    data = list(zip(data_x, data_y))
-
-    LOGGER.register_metric('l', AverageMeter(), metric_scope=Scope.TRAIN_ITER)
-    LOGGER.register_metric('a', metric_scope=Scope.TRAIN_ITER)
-    LOGGER.register_metric('b', metric_scope=Scope.TRAIN_ITER)
-
-    LOGGER.info('RUN_INIT')
-
-    model_a = 1.
-    model_b = 0.
-
-    def model(ma, mb, x):
-        return ma*x+mb
-
-    def loss(y, t):
-        return (y-t)**2
-
-    def update_a(ma, mb, x, t):
-        return ma - 0.001 * 2*x*(ma*x+mb-t)
-
-    def update_b(ma, mb, x, t):
-        return mb - 0.001 * 2*(ma*x+mb-t)
-
-    for e in range(0, 5):
-        LOGGER.epoch_start()
-        for (x, t) in data:
-            LOGGER.iteration_start()
-            y = model(model_a, model_b, x)
-            model_a = update_a(model_a, model_b, x, t)
-            model_b = update_b(model_a, model_b, x, t)
-            l = loss(y, t)
-            LOGGER.info('b', model_b)
-            LOGGER.debug('a', model_a)
-            LOGGER.warning('l', l)
-            #LOGGER.log('a', model_a)
-            LOGGER.iteration_stop()
-        LOGGER.epoch_stop()
-
-    #for e in LOGGER.epoch_generator_wrapper(range(0, 10)):
-    #    for (x, t) in LOGGER.iteration_generator_wrapper(random.sample(data, len(data))):
-    #        y = model(model_a, model_b, x)
-    #        model_a = update_a(model_a, model_b, x, t)
-    #        model_b = update_b(model_a, model_b, x, t)
-    #        l = loss(y, t)
-    #        LOGGER.debug('a', model_a)
-    #        LOGGER.info('b', model_b)
-    #        LOGGER.warning('l', l)
-
-
-    LOGGER.finish()
-
-    print("FINAL: {}*x+{}".format(model_a, model_b))
-
-
-if __name__ == '__main__':
-    main()

+ 0 - 37
TensorFlow/Segmentation/UNet_Industrial/dllogger/setup.py

@@ -1,37 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import setuptools
-
-with open("README.md", "r") as f:
-  long_description = f.read()
-
-setuptools.setup(
-    name="DLLogger",
-    version="0.3.1",
-    author="Lukasz Mazurek",
-    author_email="[email protected]",
-    description="Tools for logging DL training.",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    url="https://github.com/nvlmazurek/DLLogger",
-    packages=['dllogger'],
-    classifiers=[
-      "Programming Language :: Python :: 2",
-      "Programming Language :: Python :: 3",
-      "License :: BSD",
-      "Operating System :: OS Independent",
-    ],
-    license="BSD",
-)

+ 12 - 17
TensorFlow/Segmentation/UNet_Industrial/main.py

@@ -22,6 +22,7 @@
 import os
 
 import warnings
+
 warnings.simplefilter("ignore")
 
 import tensorflow as tf
@@ -32,12 +33,13 @@ from utils import hvd_utils
 from runtime import Runner
 
 from utils.cmdline_helper import parse_cmdline
+from utils.logging import init_dllogger
 
 if __name__ == "__main__":
 
     tf.logging.set_verbosity(tf.logging.ERROR)
-
     FLAGS = parse_cmdline()
+    init_dllogger(FLAGS.log_dir)
 
     RUNNING_CONFIG = tf.contrib.training.HParams(
         exec_mode=FLAGS.exec_mode,
@@ -130,7 +132,6 @@ if __name__ == "__main__":
     )
 
     if RUNNING_CONFIG.exec_mode in ["train", "train_and_evaluate", "training_benchmark"]:
-
         runner.train(
             iter_unit=RUNNING_CONFIG.iter_unit,
             num_iter=RUNNING_CONFIG.num_iter,
@@ -147,18 +148,12 @@ if __name__ == "__main__":
             is_benchmark=RUNNING_CONFIG.exec_mode == 'training_benchmark'
         )
 
-    if RUNNING_CONFIG.exec_mode in ["train_and_evaluate", 'evaluate', 'inference_benchmark']:
-
-        if RUNNING_CONFIG.exec_mode == 'inference_benchmark' and hvd_utils.is_using_hvd():
-            raise NotImplementedError("Only single GPU inference is implemented.")
-
-        elif not hvd_utils.is_using_hvd() or hvd.rank() == 0:
-
-            runner.evaluate(
-                iter_unit=RUNNING_CONFIG.iter_unit if RUNNING_CONFIG.exec_mode != "train_and_evaluate" else "epoch",
-                num_iter=RUNNING_CONFIG.num_iter if RUNNING_CONFIG.exec_mode != "train_and_evaluate" else 1,
-                warmup_steps=RUNNING_CONFIG.warmup_steps,
-                batch_size=RUNNING_CONFIG.batch_size,
-                is_benchmark=RUNNING_CONFIG.exec_mode == 'inference_benchmark',
-                save_eval_results_to_json=RUNNING_CONFIG.save_eval_results_to_json
-            )
+    if RUNNING_CONFIG.exec_mode in ["train_and_evaluate", 'evaluate', 'inference_benchmark'] and hvd.rank() == 0:
+        runner.evaluate(
+            iter_unit=RUNNING_CONFIG.iter_unit if RUNNING_CONFIG.exec_mode != "train_and_evaluate" else "epoch",
+            num_iter=RUNNING_CONFIG.num_iter if RUNNING_CONFIG.exec_mode != "train_and_evaluate" else 1,
+            warmup_steps=RUNNING_CONFIG.warmup_steps,
+            batch_size=RUNNING_CONFIG.batch_size,
+            is_benchmark=RUNNING_CONFIG.exec_mode == 'inference_benchmark',
+            save_eval_results_to_json=RUNNING_CONFIG.save_eval_results_to_json
+        )

+ 2 - 4
TensorFlow/Segmentation/UNet_Industrial/model/layers/utils.py

@@ -19,8 +19,6 @@
 #
 # ==============================================================================
 
-from dllogger.logger import LOGGER
-
 import horovod.tensorflow as hvd
 
 from utils import hvd_utils
@@ -37,5 +35,5 @@ def _log_hparams(classname, layername, **kwargs):
 
     log_msg += "\n"
 
-    if not hvd_utils.is_using_hvd() or hvd.local_rank() == 0:
-        LOGGER.log(log_msg)
+    if not hvd_utils.is_using_hvd() or hvd.rank() == 0:
+        print(log_msg)

+ 9 - 3
TensorFlow/Segmentation/UNet_Industrial/model/unet.py

@@ -32,7 +32,7 @@ from utils import metrics
 
 from utils import image_processing
 
-from dllogger.logger import LOGGER
+from dllogger import Logger
 
 __all__ = ["UNet_v1"]
 
@@ -215,6 +215,12 @@ class UNet_v1(object):
             labels = tf.cast(labels, tf.float32)
             labels_preds = tf.reduce_max(y_pred, axis=(1, 2, 3))
 
+            assert (
+                abs(labels_preds - tf.clip_by_value(labels_preds, 0, 1)) < 0.00001,
+                    "Clipping labels_preds introduces non-trivial loss."
+            )
+            labels_preds = tf.clip_by_value(labels_preds, 0, 1)
+
             with tf.variable_scope("Confusion_Matrix") as scope:
 
                 tp, update_tp = tf.metrics.true_positives_at_thresholds(
@@ -380,8 +386,8 @@ class UNet_v1(object):
 
                     if params["apply_manual_loss_scaling"]:
 
-                        if not hvd_utils.is_using_hvd() or hvd.local_rank() == 0:
-                            LOGGER.log("Applying manual Loss Scaling ...")
+                        # if not hvd_utils.is_using_hvd() or hvd.rank() == 0:
+                        #     Logger.log("Applying manual Loss Scaling ...")
 
                         loss_scale_manager = tf.contrib.mixed_precision.ExponentialUpdateLossScaleManager(
                             init_loss_scale=2**32,  # 4,294,967,296

+ 1 - 0
TensorFlow/Segmentation/UNet_Industrial/requirements.txt

@@ -0,0 +1 @@
+git+git://github.com/NVIDIA/dllogger.git@26a0f8f1958de2c0c460925ff6102a4d2486d6cc

+ 94 - 80
TensorFlow/Segmentation/UNet_Industrial/runtime/runner.py

@@ -19,8 +19,6 @@
 #
 # ==============================================================================
 
-from __future__ import print_function
-
 import os
 import json
 import multiprocessing
@@ -40,8 +38,7 @@ from utils import hvd_utils
 
 from utils.hooks import ProfilerHook
 
-from dllogger.logger import LOGGER
-import dllogger.logger as dllg
+import dllogger as Logger
 
 __all__ = [
     'Runner',
@@ -101,26 +98,11 @@ class Runner(object):
         if data_dir is not None and not os.path.exists(data_dir):
             raise ValueError("The `data_dir` received does not exists: %s" % data_dir)
 
-        LOGGER.set_model_name('UNet_TF')
-
-        LOGGER.set_backends(
-            [
-                dllg.JsonBackend(
-                    log_file=os.path.join(model_dir, 'dlloger_out.json'),
-                    logging_scope=dllg.Scope.TRAIN_ITER,
-                    iteration_interval=log_every_n_steps
-                ),
-                dllg.StdOutBackend(
-                    log_file=None, logging_scope=dllg.Scope.TRAIN_ITER, iteration_interval=log_every_n_steps
-                )
-            ]
-        )
-
         if hvd_utils.is_using_hvd():
             hvd.init()
 
-            if hvd.local_rank() == 0:
-                LOGGER.log("Horovod successfully initialized ...")
+            if hvd.rank() == 0:
+                print("Horovod successfully initialized ...")
 
             tf_seed = 2 * (seed + hvd.rank()) if seed is not None else None
 
@@ -135,10 +117,9 @@ class Runner(object):
 
         os.environ['HOROVOD_GPU_ALLREDUCE'] = 'NCCL'
 
-        # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
-
         os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'
         os.environ['TF_GPU_THREAD_COUNT'] = '1' if not hvd_utils.is_using_hvd() else str(hvd.size())
+        print("WORLD_SIZE", hvd.size())
 
         os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'] = '1'
 
@@ -148,7 +129,6 @@ class Runner(object):
 
         os.environ['TF_SYNC_ON_FINISH'] = '0'
         os.environ['TF_AUTOTUNE_THRESHOLD'] = '2'
-        # os.environ['TF_DISABLE_NVTX_RANGES'] = '1' 
 
         # =================================================
 
@@ -156,8 +136,8 @@ class Runner(object):
 
         if use_tf_amp:
 
-            if not hvd_utils.is_using_hvd() or hvd.local_rank() == 0:
-                LOGGER.log("TF AMP is activated - Experimental Feature")
+            if not hvd_utils.is_using_hvd() or hvd.rank() == 0:
+                print("TF AMP is activated - Experimental Feature")
 
             os.environ["TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE"] = "1"
 
@@ -205,8 +185,8 @@ class Runner(object):
 
         self.run_hparams = Runner._build_hparams(model_hparams, run_config_additional)
 
-        if not hvd_utils.is_using_hvd() or hvd.local_rank() == 0:
-            LOGGER.log('Defining Model Estimator ...\n')
+        if not hvd_utils.is_using_hvd() or hvd.rank() == 0:
+            print('Defining Model Estimator ...\n')
 
         self._model = UNet_v1(
             model_name="UNet_v1",
@@ -220,8 +200,8 @@ class Runner(object):
 
         if self.run_hparams.seed is not None:
 
-            if not hvd_utils.is_using_hvd() or hvd.local_rank() == 0:
-                LOGGER.log("Deterministic Run - Seed: %d\n" % seed)
+            if not hvd_utils.is_using_hvd() or hvd.rank() == 0:
+                print("Deterministic Run - Seed: %d\n" % seed)
 
             tf.set_random_seed(self.run_hparams.seed)
             np.random.seed(self.run_hparams.seed)
@@ -250,7 +230,7 @@ class Runner(object):
                     hparams.add_hparam(name=key, value=val)
 
                 except ValueError:
-                    LOGGER.log(
+                    print(
                         "the parameter `{}` already exists - existing value: {} and duplicated value: {}".format(
                             key, hparams.get(key), val
                         )
@@ -278,13 +258,13 @@ class Runner(object):
         config.log_device_placement = False
 
         config.gpu_options.allow_growth = True
-        # config.gpu_options.per_process_gpu_memory_fraction=0.7
 
         if hvd_utils.is_using_hvd():
-            config.gpu_options.visible_device_list = str(hvd.local_rank())
+            config.gpu_options.visible_device_list = str(hvd.rank())
 
-        if use_xla:  # Only working on single GPU
-            LOGGER.log("XLA is activated - Experimental Feature")
+        if use_xla:
+            if not hvd_utils.is_using_hvd() or hvd.rank() == 0:
+                print("XLA is activated - Experimental Feature")
             config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
 
         config.gpu_options.force_gpu_compatible = True  # Force pinned memory
@@ -382,8 +362,8 @@ class Runner(object):
         if self.run_hparams.use_tf_amp:
             if use_auto_loss_scaling:
 
-                if not hvd_utils.is_using_hvd() or hvd.local_rank() == 0:
-                    LOGGER.log("TF Loss Auto Scaling is activated - Experimental Feature")
+                if not hvd_utils.is_using_hvd() or hvd.rank() == 0:
+                    print("TF Loss Auto Scaling is activated - Experimental Feature")
 
                 os.environ["TF_ENABLE_AUTO_MIXED_PRECISION_LOSS_SCALING"] = "1"
                 apply_manual_loss_scaling = False
@@ -394,9 +374,6 @@ class Runner(object):
         else:
             apply_manual_loss_scaling = False
 
-        if not hvd_utils.is_using_hvd() or hvd.local_rank() == 0:
-            LOGGER.log('Defining Model Estimator ...\n')
-
         global_batch_size = batch_size * self.num_gpus
 
         if self.run_hparams.data_dir is not None:
@@ -416,7 +393,7 @@ class Runner(object):
         if hvd_utils.is_using_hvd():
             training_hooks.append(hvd.BroadcastGlobalVariablesHook(0))
 
-        if not hvd_utils.is_using_hvd() or hvd.local_rank() == 0:
+        if not hvd_utils.is_using_hvd() or hvd.rank() == 0:
             training_hooks.append(
                 ProfilerHook(
                     global_batch_size=global_batch_size,
@@ -427,26 +404,28 @@ class Runner(object):
                 )
             )
 
-            LOGGER.log('Starting Model Training ...\n')
+            print("Starting Model Training ...")
 
-            LOGGER.log("=> Epochs: %d" % num_epochs)
-            LOGGER.log("=> Total Steps: %d" % num_steps)
-            LOGGER.log("=> Steps per Epoch: %d" % steps_per_epoch)
-            LOGGER.log("=> Weight Decay Factor: %.1e" % weight_decay)
-            LOGGER.log("=> Learning Rate: %.1e" % learning_rate)
-            LOGGER.log("=> Learning Rate Decay Factor: %.2f" % learning_rate_decay_factor)
-            LOGGER.log("=> Learning Rate Decay Steps: %d" % learning_rate_decay_steps)
-            LOGGER.log("=> RMSProp - Decay: %.1f" % rmsprop_decay)
-            LOGGER.log("=> RMSProp - Momentum: %.1f" % rmsprop_momentum)
-            LOGGER.log("=> Loss Function Name: %s" % self.run_hparams.loss_fn_name)
+            Logger.log(step=(), data={"Epochs": num_epochs}, verbosity=Logger.Verbosity.DEFAULT)
+            Logger.log(step=(), data={"Total Steps": num_steps}, verbosity=Logger.Verbosity.DEFAULT)
+            Logger.log(step=(), data={"Steps per Epoch": steps_per_epoch}, verbosity=Logger.Verbosity.DEFAULT)
+            Logger.log(step=(), data={"Weight Decay Factor": weight_decay}, verbosity=Logger.Verbosity.DEFAULT)
+            Logger.log(step=(), data={"Learning Rate": learning_rate}, verbosity=Logger.Verbosity.DEFAULT)
+            Logger.log(step=(), data={"Learning Rate Decay Factor": learning_rate_decay_factor}, verbosity=Logger.Verbosity.DEFAULT)
+            Logger.log(step=(), data={"Learning Rate Decay Steps": learning_rate_decay_steps}, verbosity=Logger.Verbosity.DEFAULT)
+            Logger.log(step=(), data={"RMSProp - Decay": rmsprop_decay}, verbosity=Logger.Verbosity.DEFAULT)
+            Logger.log(step=(), data={"RMSProp - Momentum": rmsprop_momentum}, verbosity=Logger.Verbosity.DEFAULT)
+            Logger.log(step=(), data={"Loss Function Name": self.run_hparams.loss_fn_name}, verbosity=Logger.Verbosity.DEFAULT)
 
             if self.run_hparams.use_tf_amp:
-                LOGGER.log("=> Use Auto Loss Scaling: %s" % use_auto_loss_scaling)
+                Logger.log(step=(), data={"Use Auto Loss Scaling": use_auto_loss_scaling}, verbosity=Logger.Verbosity.DEFAULT)
+
+            Logger.log(step=(), data={"# GPUs": self.num_gpus}, verbosity=Logger.Verbosity.DEFAULT)
+            Logger.log(step=(), data={"GPU Batch Size": batch_size}, verbosity=Logger.Verbosity.DEFAULT)
+            Logger.log(step=(), data={"Global Batch Size": global_batch_size}, verbosity=Logger.Verbosity.DEFAULT)
+            Logger.log(step=(), data={"Total Files to be Processed": num_steps * global_batch_size}, verbosity=Logger.Verbosity.DEFAULT)
 
-            LOGGER.log("=> # GPUs: %d" % self.num_gpus)
-            LOGGER.log("=> GPU Batch Size: %d" % batch_size)
-            LOGGER.log("=> Global Batch Size: %d" % global_batch_size)
-            LOGGER.log("=> Total Files to Processed: %d\n" % (num_steps * global_batch_size))
+            print()  # visual spacing
 
         estimator_params = {
             'batch_size': batch_size,
@@ -480,8 +459,8 @@ class Runner(object):
                 )
 
             else:
-                if not hvd_utils.is_using_hvd() or hvd.local_rank() == 0:
-                    LOGGER.log("Using Synthetic Data ...")
+                if not hvd_utils.is_using_hvd() or hvd.rank() == 0:
+                    print("Using Synthetic Data ...")
 
                 return self.dataset.synth_dataset_fn(
                     batch_size=batch_size,
@@ -507,8 +486,8 @@ class Runner(object):
         except KeyboardInterrupt:
             print("Keyboard interrupt")
 
-        if not hvd_utils.is_using_hvd() or hvd.local_rank() == 0:
-            LOGGER.log('Ending Model Training ...')
+        if not hvd_utils.is_using_hvd() or hvd.rank() == 0:
+            print('Ending Model Training ...')
 
     def evaluate(self, iter_unit, num_iter, batch_size, warmup_steps=50, is_benchmark=False, save_eval_results_to_json=False):
 
@@ -518,10 +497,10 @@ class Runner(object):
         if self.run_hparams.data_dir is None and not is_benchmark:
             raise ValueError('`data_dir` must be specified for evaluation!')
 
-        if hvd_utils.is_using_hvd() and hvd.rank() != 0:
-            raise RuntimeError('Multi-GPU inference is not supported')
+        # if hvd_utils.is_using_hvd() and hvd.rank() != 0:
+        #     raise RuntimeError('Multi-GPU inference is not supported')
 
-        LOGGER.log('Defining Model Estimator ...\n')
+        print('Defining Model Estimator ...\n')
 
         if self.run_hparams.data_dir is not None:
             filenames, num_samples, num_steps, num_epochs = self.dataset.get_dataset_runtime_specs(
@@ -545,13 +524,15 @@ class Runner(object):
             )
         ]
 
-        LOGGER.log('Starting Model Evaluation ...\n')
+        print('Starting Model Evaluation ...\n')
+
+        Logger.log(step=(), data={"Epochs": num_epochs}, verbosity=Logger.Verbosity.DEFAULT)
+        Logger.log(step=(), data={"Total Steps": num_steps}, verbosity=Logger.Verbosity.DEFAULT)
+        Logger.log(step=(), data={"Steps per Epoch": steps_per_epoch}, verbosity=Logger.Verbosity.DEFAULT)
+        Logger.log(step=(), data={"GPU Batch Size": batch_size}, verbosity=Logger.Verbosity.DEFAULT)
+        Logger.log(step=(), data={"Total Files to Processed": num_steps * batch_size}, verbosity=Logger.Verbosity.DEFAULT)
 
-        LOGGER.log("=> Epochs: %d" % num_epochs)
-        LOGGER.log("=> Total Steps: %d" % num_steps)
-        LOGGER.log("=> Steps per Epoch: %d" % steps_per_epoch)
-        LOGGER.log("=> GPU Batch Size: %d" % batch_size)
-        LOGGER.log("=> Total Files to Processed: %d\n" % (num_steps * batch_size))
+        print()  # visual spacing
 
         estimator_params = {
             'batch_size': batch_size,
@@ -578,7 +559,7 @@ class Runner(object):
                 )
 
             else:
-                LOGGER.log("Using Synthetic Data ...")
+                print("Using Synthetic Data ...")
 
                 return self.dataset.synth_dataset_fn(
                     batch_size=batch_size,
@@ -602,16 +583,20 @@ class Runner(object):
                 hooks=evaluation_hooks,
             )
 
-            LOGGER.log('Ending Model Evaluation ...')
+            print('Ending Model Evaluation ...')
 
-            LOGGER.log('###################################\n\nEvaluation Results:\n')
+            print('###################################\n\nEvaluation Results:\n')
 
             for key, val in sorted(eval_results.items(), key=operator.itemgetter(0)):
 
                 if any(val in key for val in ["loss", "global_step", "Confusion_Matrix"]):
                     continue
 
-                LOGGER.log('%s: %.3f' % (key, float(val)))
+                Logger.log(
+                    step=(),
+                    data={"{prefix}.{key}".format(prefix=Logger._stage, key=key): float(val)},
+                    verbosity=Logger.Verbosity.DEFAULT
+                )
 
             fns = eval_results["Confusion_Matrix_FN"]
             fps = eval_results["Confusion_Matrix_FP"]
@@ -624,12 +609,41 @@ class Runner(object):
             tpr = np.divide(tps, positives)
             tnr = np.divide(tns, negatives)
 
-            LOGGER.log('TP', tps)
-            LOGGER.log('FN', fns)
-            LOGGER.log('TN', tns)
-            LOGGER.log('FP', fps)
-            LOGGER.log('TPR', tpr)
-            LOGGER.log('TNR', tnr)
+            Logger.log(
+                step=(num_steps,),
+                data={"{prefix}.true_positives".format(prefix=Logger._stage): str(tps)},
+                verbosity=Logger.Verbosity.DEFAULT
+            )
+
+            Logger.log(
+                step=(num_steps,),
+                data={"{prefix}.true_negatives".format(prefix=Logger._stage): str(tns)},
+                verbosity=Logger.Verbosity.DEFAULT
+            )
+
+            Logger.log(
+                step=(num_steps,),
+                data={"{prefix}.false_positives".format(prefix=Logger._stage): str(fps)},
+                verbosity=Logger.Verbosity.DEFAULT
+            )
+
+            Logger.log(
+                step=(num_steps,),
+                data={"{prefix}.false_negatives".format(prefix=Logger._stage): str(fns)},
+                verbosity=Logger.Verbosity.DEFAULT
+            )
+
+            Logger.log(
+                step=(num_steps,),
+                data={"{prefix}.true_positive_rate".format(prefix=Logger._stage): str(["%.3f" % x for x in tpr])},
+                verbosity=Logger.Verbosity.DEFAULT
+            )
+
+            Logger.log(
+                step=(num_steps,),
+                data={"{prefix}.true_negative_rate".format(prefix=Logger._stage): str(["%.3f" % x for x in tnr])},
+                verbosity=Logger.Verbosity.DEFAULT
+            )
 
             if save_eval_results_to_json:
 

+ 3 - 3
TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_AMP_1GPU.sh

@@ -15,13 +15,13 @@
 # limitations under the License.
 
 # This script launches UNet training in FP32-AMP on 1 GPU using 16 batch size (16 per GPU)
-# Usage ./UNet_FP32AMP_1GPU.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
+# Usage ./UNet_AMP_1GPU.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
 
 BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-pip install ${BASEDIR}/../dllogger/
+export TF_CPP_MIN_LOG_LEVEL=3
 
-python ${BASEDIR}/../main.py \
+python "${BASEDIR}/../main.py" \
     --unet_variant='tinyUNet' \
     --activation_fn='relu' \
     --exec_mode='train_and_evaluate' \

+ 50 - 0
TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_AMP_1GPU_XLA.sh

@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script launches UNet training in FP32-AMP on 1 GPU using 16 batch size (16 per GPU)
+# Usage ./UNet_AMP_1GPU_XLA.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
+
+BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+export TF_CPP_MIN_LOG_LEVEL=3
+
+python "${BASEDIR}/../main.py" \
+    --unet_variant='tinyUNet' \
+    --activation_fn='relu' \
+    --exec_mode='train_and_evaluate' \
+    --iter_unit='batch' \
+    --num_iter=2500 \
+    --batch_size=16 \
+    --warmup_step=10 \
+    --results_dir="${1}" \
+    --data_dir="${2}" \
+    --dataset_name='DAGM2007' \
+    --dataset_classID="${3}" \
+    --data_format='NCHW' \
+    --use_auto_loss_scaling \
+    --use_tf_amp \
+    --use_xla \
+    --learning_rate=1e-4 \
+    --learning_rate_decay_factor=0.8 \
+    --learning_rate_decay_steps=500 \
+    --rmsprop_decay=0.9 \
+    --rmsprop_momentum=0.8 \
+    --loss_fn_name='adaptive_loss' \
+    --weight_decay=1e-5 \
+    --weight_init_method='he_uniform' \
+    --augment_data \
+    --display_every=250 \
+    --debug_verbosity=0

+ 3 - 3
TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_AMP_4GPU.sh

@@ -15,11 +15,11 @@
 # limitations under the License.
 
 # This script launches UNet training in FP32-AMP on 4 GPUs using 16 batch size (4 per GPU)
-# Usage ./UNet_FP32AMP_4GPU.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
+# Usage ./UNet_AMP_4GPU.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
 
 BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-pip install ${BASEDIR}/../dllogger/
+export TF_CPP_MIN_LOG_LEVEL=3
 
 mpirun \
     -np 4 \
@@ -31,7 +31,7 @@ mpirun \
     -x PATH \
     -mca pml ob1 -mca btl ^openib \
     --allow-run-as-root \
-    python ${BASEDIR}/../main.py \
+    python "${BASEDIR}/../main.py" \
         --unet_variant='tinyUNet' \
         --activation_fn='relu' \
         --exec_mode='train_and_evaluate' \

+ 60 - 0
TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_AMP_4GPU_XLA.sh

@@ -0,0 +1,60 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script launches UNet training in FP32-AMP on 4 GPUs using 16 batch size (4 per GPU)
+# Usage ./UNet_AMP_4GPU_XLA.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
+
+BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+export TF_CPP_MIN_LOG_LEVEL=3
+
+mpirun \
+    -np 4 \
+    -H localhost:4 \
+    -bind-to none \
+    -map-by slot \
+    -x NCCL_DEBUG=VERSION \
+    -x LD_LIBRARY_PATH \
+    -x PATH \
+    -mca pml ob1 -mca btl ^openib \
+    --allow-run-as-root \
+    python "${BASEDIR}/../main.py" \
+        --unet_variant='tinyUNet' \
+        --activation_fn='relu' \
+        --exec_mode='train_and_evaluate' \
+        --iter_unit='batch' \
+        --num_iter=2500 \
+        --batch_size=4 \
+        --warmup_step=10 \
+        --results_dir="${1}" \
+        --data_dir="${2}" \
+        --dataset_name='DAGM2007' \
+        --dataset_classID="${3}" \
+        --data_format='NCHW' \
+        --use_auto_loss_scaling \
+        --use_tf_amp \
+        --use_xla \
+        --learning_rate=1e-4 \
+        --learning_rate_decay_factor=0.8 \
+        --learning_rate_decay_steps=500 \
+        --rmsprop_decay=0.9 \
+        --rmsprop_momentum=0.8 \
+        --loss_fn_name='adaptive_loss' \
+        --weight_decay=1e-5 \
+        --weight_init_method='he_uniform' \
+        --augment_data \
+        --display_every=250 \
+        --debug_verbosity=0

+ 3 - 3
TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_AMP_8GPU.sh

@@ -15,11 +15,11 @@
 # limitations under the License.
 
 # This script launches UNet training in FP32-AMP on 8 GPUs using 16 batch size (2 per GPU)
-# Usage ./UNet_FP32AMP_8GPU.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
+# Usage ./UNet_AMP_8GPU.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
 
 BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-pip install ${BASEDIR}/../dllogger/
+export TF_CPP_MIN_LOG_LEVEL=3
 
 mpirun \
     -np 8 \
@@ -31,7 +31,7 @@ mpirun \
     -x PATH \
     -mca pml ob1 -mca btl ^openib \
     --allow-run-as-root \
-    python ${BASEDIR}/../main.py \
+    python "${BASEDIR}/../main.py" \
         --unet_variant='tinyUNet' \
         --activation_fn='relu' \
         --exec_mode='train_and_evaluate' \

+ 60 - 0
TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_AMP_8GPU_XLA.sh

@@ -0,0 +1,60 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script launches UNet training in FP32-AMP on 8 GPUs using 16 batch size (2 per GPU)
+# Usage ./UNet_AMP_8GPU_XLA.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
+
+BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+export TF_CPP_MIN_LOG_LEVEL=3
+
+mpirun \
+    -np 8 \
+    -H localhost:8 \
+    -bind-to none \
+    -map-by slot \
+    -x NCCL_DEBUG=VERSION \
+    -x LD_LIBRARY_PATH \
+    -x PATH \
+    -mca pml ob1 -mca btl ^openib \
+    --allow-run-as-root \
+    python "${BASEDIR}/../main.py" \
+        --unet_variant='tinyUNet' \
+        --activation_fn='relu' \
+        --exec_mode='train_and_evaluate' \
+        --iter_unit='batch' \
+        --num_iter=2500 \
+        --batch_size=2 \
+        --warmup_step=10 \
+        --results_dir="${1}" \
+        --data_dir="${2}" \
+        --dataset_name='DAGM2007' \
+        --dataset_classID="${3}" \
+        --data_format='NCHW' \
+        --use_auto_loss_scaling \
+        --use_tf_amp \
+        --use_xla \
+        --learning_rate=1e-4 \
+        --learning_rate_decay_factor=0.8 \
+        --learning_rate_decay_steps=500 \
+        --rmsprop_decay=0.9 \
+        --rmsprop_momentum=0.8 \
+        --loss_fn_name='adaptive_loss' \
+        --weight_decay=1e-5 \
+        --weight_init_method='he_uniform' \
+        --augment_data \
+        --display_every=250 \
+        --debug_verbosity=0

+ 3 - 3
TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_AMP_EVAL.sh

@@ -15,13 +15,13 @@
 # limitations under the License.
 
 # This script launches UNet evaluation in FP32-AMP on 1 GPUs using 16 batch size
-# Usage ./UNet_FP32AMP_EVAL.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
+# Usage ./UNet_AMP_EVAL.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
 
 BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-pip install ${BASEDIR}/../dllogger/
+export TF_CPP_MIN_LOG_LEVEL=3
 
-python ${BASEDIR}/../main.py \
+python "${BASEDIR}/../main.py" \
     --unet_variant='tinyUNet' \
     --activation_fn='relu' \
     --exec_mode='evaluate' \

+ 50 - 0
TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_AMP_EVAL_XLA.sh

@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script launches UNet evaluation in FP32-AMP on 1 GPUs using 16 batch size
+# Usage ./UNet_AMP_EVAL_XLA.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
+
+BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+export TF_CPP_MIN_LOG_LEVEL=3
+
+python "${BASEDIR}/../main.py" \
+    --unet_variant='tinyUNet' \
+    --activation_fn='relu' \
+    --exec_mode='evaluate' \
+    --iter_unit='epoch' \
+    --num_iter=1 \
+    --batch_size=16 \
+    --warmup_step=10 \
+    --results_dir="${1}" \
+    --data_dir="${2}" \
+    --dataset_name='DAGM2007' \
+    --dataset_classID="${3}" \
+    --data_format='NCHW' \
+    --use_auto_loss_scaling \
+    --use_tf_amp \
+    --use_xla \
+    --learning_rate=1e-4 \
+    --learning_rate_decay_factor=0.8 \
+    --learning_rate_decay_steps=500 \
+    --rmsprop_decay=0.9 \
+    --rmsprop_momentum=0.8 \
+    --loss_fn_name='adaptive_loss' \
+    --weight_decay=1e-5 \
+    --weight_init_method='he_uniform' \
+    --augment_data \
+    --display_every=50 \
+    --debug_verbosity=0

+ 2 - 2
TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_FP32_1GPU.sh

@@ -19,9 +19,9 @@
 
 BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-pip install ${BASEDIR}/../dllogger/
+export TF_CPP_MIN_LOG_LEVEL=3
 
-python ${BASEDIR}/../main.py \
+python "${BASEDIR}/../main.py" \
     --unet_variant='tinyUNet' \
     --activation_fn='relu' \
     --exec_mode='train_and_evaluate' \

+ 50 - 0
TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_FP32_1GPU_XLA.sh

@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script launches UNet training in FP32 on 1 GPU using 16 batch size (16 per GPU)
+# Usage ./UNet_FP32_1GPU_XLA.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
+
+BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+export TF_CPP_MIN_LOG_LEVEL=3
+
+python "${BASEDIR}/../main.py" \
+    --unet_variant='tinyUNet' \
+    --activation_fn='relu' \
+    --exec_mode='train_and_evaluate' \
+    --iter_unit='batch' \
+    --num_iter=2500 \
+    --batch_size=16 \
+    --warmup_step=10 \
+    --results_dir="${1}" \
+    --data_dir="${2}" \
+    --dataset_name='DAGM2007' \
+    --dataset_classID="${3}" \
+    --data_format='NCHW' \
+    --use_auto_loss_scaling \
+    --nouse_tf_amp \
+    --use_xla \
+    --learning_rate=1e-4 \
+    --learning_rate_decay_factor=0.8 \
+    --learning_rate_decay_steps=500 \
+    --rmsprop_decay=0.9 \
+    --rmsprop_momentum=0.8 \
+    --loss_fn_name='adaptive_loss' \
+    --weight_decay=1e-5 \
+    --weight_init_method='he_uniform' \
+    --augment_data \
+    --display_every=250 \
+    --debug_verbosity=0

+ 2 - 2
TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_FP32_4GPU.sh

@@ -19,7 +19,7 @@
 
 BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-pip install ${BASEDIR}/../dllogger/
+export TF_CPP_MIN_LOG_LEVEL=3
 
 mpirun \
     -np 4 \
@@ -31,7 +31,7 @@ mpirun \
     -x PATH \
     -mca pml ob1 -mca btl ^openib \
     --allow-run-as-root \
-    python ${BASEDIR}/../main.py \
+    python "${BASEDIR}/../main.py" \
         --unet_variant='tinyUNet' \
         --activation_fn='relu' \
         --exec_mode='train_and_evaluate' \

+ 60 - 0
TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_FP32_4GPU_XLA.sh

@@ -0,0 +1,60 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script launches UNet training in FP32 on 4 GPUs using 16 batch size (4 per GPU)
+# Usage ./UNet_FP32_4GPU_XLA.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
+
+BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+export TF_CPP_MIN_LOG_LEVEL=3
+
+mpirun \
+    -np 4 \
+    -H localhost:4 \
+    -bind-to none \
+    -map-by slot \
+    -x NCCL_DEBUG=VERSION \
+    -x LD_LIBRARY_PATH \
+    -x PATH \
+    -mca pml ob1 -mca btl ^openib \
+    --allow-run-as-root \
+    python "${BASEDIR}/../main.py" \
+        --unet_variant='tinyUNet' \
+        --activation_fn='relu' \
+        --exec_mode='train_and_evaluate' \
+        --iter_unit='batch' \
+        --num_iter=2500 \
+        --batch_size=4 \
+        --warmup_step=10 \
+        --results_dir="${1}" \
+        --data_dir="${2}" \
+        --dataset_name='DAGM2007' \
+        --dataset_classID="${3}" \
+        --data_format='NCHW' \
+        --use_auto_loss_scaling \
+        --nouse_tf_amp \
+        --use_xla \
+        --learning_rate=1e-4 \
+        --learning_rate_decay_factor=0.8 \
+        --learning_rate_decay_steps=500 \
+        --rmsprop_decay=0.9 \
+        --rmsprop_momentum=0.8 \
+        --loss_fn_name='adaptive_loss' \
+        --weight_decay=1e-5 \
+        --weight_init_method='he_uniform' \
+        --augment_data \
+        --display_every=250 \
+        --debug_verbosity=0

+ 2 - 2
TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_FP32_8GPU.sh

@@ -19,7 +19,7 @@
 
 BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-pip install ${BASEDIR}/../dllogger/
+export TF_CPP_MIN_LOG_LEVEL=3
 
 mpirun \
     -np 8 \
@@ -31,7 +31,7 @@ mpirun \
     -x PATH \
     -mca pml ob1 -mca btl ^openib \
     --allow-run-as-root \
-    python ${BASEDIR}/../main.py \
+    python "${BASEDIR}/../main.py" \
         --unet_variant='tinyUNet' \
         --activation_fn='relu' \
         --exec_mode='train_and_evaluate' \

+ 60 - 0
TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_FP32_8GPU_XLA.sh

@@ -0,0 +1,60 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script launches UNet training in FP32 on 8 GPUs using 16 batch size (2 per GPU)
+# Usage ./UNet_FP32_8GPU_XLA.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
+
+BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+export TF_CPP_MIN_LOG_LEVEL=3
+
+mpirun \
+    -np 8 \
+    -H localhost:8 \
+    -bind-to none \
+    -map-by slot \
+    -x NCCL_DEBUG=VERSION \
+    -x LD_LIBRARY_PATH \
+    -x PATH \
+    -mca pml ob1 -mca btl ^openib \
+    --allow-run-as-root \
+    python "${BASEDIR}/../main.py" \
+        --unet_variant='tinyUNet' \
+        --activation_fn='relu' \
+        --exec_mode='train_and_evaluate' \
+        --iter_unit='batch' \
+        --num_iter=2500 \
+        --batch_size=2 \
+        --warmup_step=10 \
+        --results_dir="${1}" \
+        --data_dir="${2}" \
+        --dataset_name='DAGM2007' \
+        --dataset_classID="${3}" \
+        --data_format='NCHW' \
+        --use_auto_loss_scaling \
+        --nouse_tf_amp \
+        --use_xla \
+        --learning_rate=1e-4 \
+        --learning_rate_decay_factor=0.8 \
+        --learning_rate_decay_steps=500 \
+        --rmsprop_decay=0.9 \
+        --rmsprop_momentum=0.8 \
+        --loss_fn_name='adaptive_loss' \
+        --weight_decay=1e-5 \
+        --weight_init_method='he_uniform' \
+        --augment_data \
+        --display_every=250 \
+        --debug_verbosity=0

+ 2 - 2
TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_FP32_EVAL.sh

@@ -19,9 +19,9 @@
 
 BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-pip install ${BASEDIR}/../dllogger/
+export TF_CPP_MIN_LOG_LEVEL=3
 
-python ${BASEDIR}/../main.py \
+python "${BASEDIR}/../main.py" \
     --unet_variant='tinyUNet' \
     --activation_fn='relu' \
     --exec_mode='evaluate' \

+ 50 - 0
TensorFlow/Segmentation/UNet_Industrial/scripts/UNet_FP32_EVAL_XLA.sh

@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script launches UNet evaluation in FP32 on 1 GPUs using 16 batch size
+# Usage ./UNet_FP32_EVAL_XLA.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
+
+BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+export TF_CPP_MIN_LOG_LEVEL=3
+
+python "${BASEDIR}/../main.py" \
+    --unet_variant='tinyUNet' \
+    --activation_fn='relu' \
+    --exec_mode='evaluate' \
+    --iter_unit='epoch' \
+    --num_iter=1 \
+    --batch_size=16 \
+    --warmup_step=10 \
+    --results_dir="${1}" \
+    --data_dir="${2}" \
+    --dataset_name='DAGM2007' \
+    --dataset_classID="${3}" \
+    --data_format='NCHW' \
+    --use_auto_loss_scaling \
+    --nouse_tf_amp \
+    --use_xla \
+    --learning_rate=1e-4 \
+    --learning_rate_decay_factor=0.8 \
+    --learning_rate_decay_steps=500 \
+    --rmsprop_decay=0.9 \
+    --rmsprop_momentum=0.8 \
+    --loss_fn_name='adaptive_loss' \
+    --weight_decay=1e-5 \
+    --weight_init_method='he_uniform' \
+    --augment_data \
+    --display_every=50 \
+    --debug_verbosity=0

+ 11 - 7
TensorFlow/Segmentation/UNet_Industrial/scripts/benchmarking/DGX1v_evalbench_AMP.sh

@@ -15,13 +15,17 @@
 # limitations under the License.
 
 # This script launches UNet evaluation benchmark in FP32-AMP on 1 GPUs using 16 batch size
-# Usage ./DGX1v_evalbench_FP32AMP.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
+# Usage ./DGX1v_evalbench_AMP.sh <path to dataset> <dagm classID (1-10)>
 
 BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-pip install ${BASEDIR}/../../dllogger/
+export TF_CPP_MIN_LOG_LEVEL=3
 
-python ${BASEDIR}/../../main.py \
+# Cleaning up for benchmark
+RESULT_DIR="/tmp"
+rm -rf "${RESULT_DIR}"
+
+python "${BASEDIR}/../../main.py" \
     --unet_variant='tinyUNet' \
     --activation_fn='relu' \
     --exec_mode='inference_benchmark' \
@@ -29,14 +33,14 @@ python ${BASEDIR}/../../main.py \
     --num_iter=1500 \
     --batch_size=16 \
     --warmup_step=500 \
-    --results_dir="${1}" \
-    --data_dir="${2}" \
+    --results_dir="${RESULT_DIR}" \
+    --data_dir="${1}" \
     --dataset_name='DAGM2007' \
-    --dataset_classID="${3}" \
+    --dataset_classID="${2}" \
     --data_format='NCHW' \
     --use_auto_loss_scaling \
     --use_tf_amp \
-    --nouse_xla \
+    --use_xla \
     --learning_rate=1e-4 \
     --learning_rate_decay_factor=0.8 \
     --learning_rate_decay_steps=500 \

+ 11 - 7
TensorFlow/Segmentation/UNet_Industrial/scripts/benchmarking/DGX1v_evalbench_FP32.sh

@@ -15,13 +15,17 @@
 # limitations under the License.
 
 # This script launches UNet evaluation benchmark in FP32 on 1 GPUs using 16 batch size
-# Usage ./DGX1v_evalbench_FP32.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
+# Usage ./DGX1v_evalbench_FP32.sh <path to dataset> <dagm classID (1-10)>
 
 BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-pip install ${BASEDIR}/../../dllogger/
+export TF_CPP_MIN_LOG_LEVEL=3
 
-python ${BASEDIR}/../../main.py \
+# Cleaning up for benchmark
+RESULT_DIR="/tmp"
+rm -rf "${RESULT_DIR}"
+
+python "${BASEDIR}/../../main.py" \
     --unet_variant='tinyUNet' \
     --activation_fn='relu' \
     --exec_mode='inference_benchmark' \
@@ -29,14 +33,14 @@ python ${BASEDIR}/../../main.py \
     --num_iter=1500 \
     --batch_size=16 \
     --warmup_step=500 \
-    --results_dir="${1}" \
-    --data_dir="${2}" \
+    --results_dir="${RESULT_DIR}" \
+    --data_dir="${1}" \
     --dataset_name='DAGM2007' \
-    --dataset_classID="${3}" \
+    --dataset_classID="${2}" \
     --data_format='NCHW' \
     --use_auto_loss_scaling \
     --nouse_tf_amp \
-    --nouse_xla \
+    --use_xla \
     --learning_rate=1e-4 \
     --learning_rate_decay_factor=0.8 \
     --learning_rate_decay_steps=500 \

+ 11 - 7
TensorFlow/Segmentation/UNet_Industrial/scripts/benchmarking/DGX1v_trainbench_AMP_1GPU.sh

@@ -15,13 +15,17 @@
 # limitations under the License.
 
 # This script launches UNet training benchmark in FP32-AMP on 1 GPU using 16 batch size (16 per GPU)
-# Usage ./DGX1v_trainbench_FP32AMP_1GPU.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
+# Usage ./DGX1v_trainbench_AMP_1GPU.sh <path to dataset> <dagm classID (1-10)>
 
 BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-pip install ${BASEDIR}/../../dllogger/
+export TF_CPP_MIN_LOG_LEVEL=3
 
-python ${BASEDIR}/../../main.py \
+# Cleaning up for benchmark
+RESULT_DIR="/tmp"
+rm -rf "${RESULT_DIR}"
+
+python "${BASEDIR}/../../main.py" \
     --unet_variant='tinyUNet' \
     --activation_fn='relu' \
     --exec_mode='training_benchmark' \
@@ -29,14 +33,14 @@ python ${BASEDIR}/../../main.py \
     --num_iter=1500 \
     --batch_size=16 \
     --warmup_step=500 \
-    --results_dir="${1}" \
-    --data_dir="${2}" \
+    --results_dir="${RESULT_DIR}" \
+    --data_dir="${1}" \
     --dataset_name='DAGM2007' \
-    --dataset_classID="${3}" \
+    --dataset_classID="${2}" \
     --data_format='NCHW' \
     --use_auto_loss_scaling \
     --use_tf_amp \
-    --nouse_xla \
+    --use_xla \
     --learning_rate=1e-4 \
     --learning_rate_decay_factor=0.8 \
     --learning_rate_decay_steps=500 \

+ 11 - 7
TensorFlow/Segmentation/UNet_Industrial/scripts/benchmarking/DGX1v_trainbench_AMP_4GPU.sh

@@ -15,11 +15,15 @@
 # limitations under the License.
 
 # This script launches UNet training benchmark in FP32-AMP on 4 GPUs using 16 batch size (4 per GPU)
-# Usage ./DGX1v_trainbench_FP32AMP_4GPU.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
+# Usage ./DGX1v_trainbench_AMP_4GPU.sh <path to dataset> <dagm classID (1-10)>
 
 BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-pip install ${BASEDIR}/../../dllogger/
+export TF_CPP_MIN_LOG_LEVEL=3
+
+# Cleaning up for benchmark
+RESULT_DIR="/tmp"
+rm -rf "${RESULT_DIR}"
 
 mpirun \
     -np 4 \
@@ -31,7 +35,7 @@ mpirun \
     -x PATH \
     -mca pml ob1 -mca btl ^openib \
     --allow-run-as-root \
-    python ${BASEDIR}/../../main.py \
+    python "${BASEDIR}/../../main.py" \
         --unet_variant='tinyUNet' \
         --activation_fn='relu' \
         --exec_mode='training_benchmark' \
@@ -39,14 +43,14 @@ mpirun \
         --num_iter=1500 \
         --batch_size=4 \
         --warmup_step=500 \
-        --results_dir="${1}" \
-        --data_dir="${2}" \
+        --results_dir="${RESULT_DIR}" \
+        --data_dir="${1}" \
         --dataset_name='DAGM2007' \
-        --dataset_classID="${3}" \
+        --dataset_classID="${2}" \
         --data_format='NCHW' \
         --use_auto_loss_scaling \
         --use_tf_amp \
-        --nouse_xla \
+        --use_xla \
         --learning_rate=1e-4 \
         --learning_rate_decay_factor=0.8 \
         --learning_rate_decay_steps=500 \

+ 11 - 7
TensorFlow/Segmentation/UNet_Industrial/scripts/benchmarking/DGX1v_trainbench_AMP_8GPU.sh

@@ -15,11 +15,15 @@
 # limitations under the License.
 
 # This script launches UNet training benchmark in FP32-AMP on 8 GPUs using 16 batch size (2 per GPU)
-# Usage ./DGX1v_trainbench_FP32AMP_8GPU.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
+# Usage ./DGX1v_trainbench_AMP_8GPU.sh <path to dataset> <dagm classID (1-10)>
 
 BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-pip install ${BASEDIR}/../../dllogger/
+export TF_CPP_MIN_LOG_LEVEL=3
+
+# Cleaning up for benchmark
+RESULT_DIR="/tmp"
+rm -rf "${RESULT_DIR}"
 
 mpirun \
     -np 8 \
@@ -31,7 +35,7 @@ mpirun \
     -x PATH \
     -mca pml ob1 -mca btl ^openib \
     --allow-run-as-root \
-    python ${BASEDIR}/../../main.py \
+    python "${BASEDIR}/../../main.py" \
         --unet_variant='tinyUNet' \
         --activation_fn='relu' \
         --exec_mode='training_benchmark' \
@@ -39,14 +43,14 @@ mpirun \
         --num_iter=1500 \
         --batch_size=2 \
         --warmup_step=500 \
-        --results_dir="${1}" \
-        --data_dir="${2}" \
+        --results_dir="${RESULT_DIR}" \
+        --data_dir="${1}" \
         --dataset_name='DAGM2007' \
-        --dataset_classID="${3}" \
+        --dataset_classID="${2}" \
         --data_format='NCHW' \
         --use_auto_loss_scaling \
         --use_tf_amp \
-        --nouse_xla \
+        --use_xla \
         --learning_rate=1e-4 \
         --learning_rate_decay_factor=0.8 \
         --learning_rate_decay_steps=500 \

+ 11 - 7
TensorFlow/Segmentation/UNet_Industrial/scripts/benchmarking/DGX1v_trainbench_FP32_1GPU.sh

@@ -15,13 +15,17 @@
 # limitations under the License.
 
 # This script launches UNet training benchmark in FP32 on 1 GPU using 16 batch size (16 per GPU)
-# Usage ./DGX1v_trainbench_FP32_1GPU.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
+# Usage ./DGX1v_trainbench_FP32_1GPU.sh <path to dataset> <dagm classID (1-10)>
 
 BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-pip install ${BASEDIR}/../../dllogger/
+export TF_CPP_MIN_LOG_LEVEL=3
 
-python ${BASEDIR}/../../main.py \
+# Cleaning up for benchmark
+RESULT_DIR="/tmp"
+rm -rf "${RESULT_DIR}"
+
+python "${BASEDIR}/../../main.py" \
     --unet_variant='tinyUNet' \
     --activation_fn='relu' \
     --exec_mode='training_benchmark' \
@@ -29,14 +33,14 @@ python ${BASEDIR}/../../main.py \
     --num_iter=1500 \
     --batch_size=16 \
     --warmup_step=500 \
-    --results_dir="${1}" \
-    --data_dir="${2}" \
+    --results_dir="${RESULT_DIR}" \
+    --data_dir="${1}" \
     --dataset_name='DAGM2007' \
-    --dataset_classID="${3}" \
+    --dataset_classID="${2}" \
     --data_format='NCHW' \
     --use_auto_loss_scaling \
     --nouse_tf_amp \
-    --nouse_xla \
+    --use_xla \
     --learning_rate=1e-4 \
     --learning_rate_decay_factor=0.8 \
     --learning_rate_decay_steps=500 \

+ 11 - 7
TensorFlow/Segmentation/UNet_Industrial/scripts/benchmarking/DGX1v_trainbench_FP32_4GPU.sh

@@ -15,11 +15,15 @@
 # limitations under the License.
 
 # This script launches UNet training benchmark in FP32 on 4 GPUs using 16 batch size (4 per GPU)
-# Usage ./DGX1v_trainbench_FP32_4GPU.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
+# Usage ./DGX1v_trainbench_FP32_4GPU.sh <path to dataset> <dagm classID (1-10)>
 
 BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-pip install ${BASEDIR}/../../dllogger/
+export TF_CPP_MIN_LOG_LEVEL=3
+
+# Cleaning up for benchmark
+RESULT_DIR="/tmp"
+rm -rf "${RESULT_DIR}"
 
 mpirun \
     -np 4 \
@@ -31,7 +35,7 @@ mpirun \
     -x PATH \
     -mca pml ob1 -mca btl ^openib \
     --allow-run-as-root \
-    python ${BASEDIR}/../../main.py \
+    python "${BASEDIR}/../../main.py" \
         --unet_variant='tinyUNet' \
         --activation_fn='relu' \
         --exec_mode='training_benchmark' \
@@ -39,14 +43,14 @@ mpirun \
         --num_iter=1500 \
         --batch_size=4 \
         --warmup_step=500 \
-        --results_dir="${1}" \
-        --data_dir="${2}" \
+        --results_dir="${RESULT_DIR}" \
+        --data_dir="${1}" \
         --dataset_name='DAGM2007' \
-        --dataset_classID="${3}" \
+        --dataset_classID="${2}" \
         --data_format='NCHW' \
         --use_auto_loss_scaling \
         --nouse_tf_amp \
-        --nouse_xla \
+        --use_xla \
         --learning_rate=1e-4 \
         --learning_rate_decay_factor=0.8 \
         --learning_rate_decay_steps=500 \

+ 11 - 7
TensorFlow/Segmentation/UNet_Industrial/scripts/benchmarking/DGX1v_trainbench_FP32_8GPU.sh

@@ -15,11 +15,15 @@
 # limitations under the License.
 
 # This script launches UNet training benchmark in FP32 on 8 GPUs using 16 batch size (2 per GPU)
-# Usage ./DGX1v_trainbench_FP32_8GPU.sh <path to result repository> <path to dataset> <dagm classID (1-10)>
+# Usage ./DGX1v_trainbench_FP32_8GPU.sh <path to dataset> <dagm classID (1-10)>
 
 BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-pip install ${BASEDIR}/../../dllogger/
+export TF_CPP_MIN_LOG_LEVEL=3
+
+# Cleaning up for benchmark
+RESULT_DIR="/tmp"
+rm -rf "${RESULT_DIR}"
 
 mpirun \
     -np 8 \
@@ -31,7 +35,7 @@ mpirun \
     -x PATH \
     -mca pml ob1 -mca btl ^openib \
     --allow-run-as-root \
-    python ${BASEDIR}/../../main.py \
+    python "${BASEDIR}/../../main.py" \
         --unet_variant='tinyUNet' \
         --activation_fn='relu' \
         --exec_mode='training_benchmark' \
@@ -39,14 +43,14 @@ mpirun \
         --num_iter=1500 \
         --batch_size=2 \
         --warmup_step=500 \
-        --results_dir="${1}" \
-        --data_dir="${2}" \
+        --results_dir="${RESULT_DIR}" \
+        --data_dir="${1}" \
         --dataset_name='DAGM2007' \
-        --dataset_classID="${3}" \
+        --dataset_classID="${2}" \
         --data_format='NCHW' \
         --use_auto_loss_scaling \
         --nouse_tf_amp \
-        --nouse_xla \
+        --use_xla \
         --learning_rate=1e-4 \
         --learning_rate_decay_factor=0.8 \
         --learning_rate_decay_steps=500 \

+ 24 - 0
TensorFlow/Segmentation/UNet_Industrial/scripts/launch_docker.sh

@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+DATASET_DIR=$(realpath -s $1)
+RESULT_DIR=$(realpath -s $2)
+
+if [[ ! -e ${DATASET_DIR} ]]; then
+    echo "creating ${DATASET_DIR} ..."
+    mkdir -p "${DATASET_DIR}"
+fi
+
+if [[ ! -e ${RESULT_DIR} ]]; then
+    echo "creating ${RESULT_DIR} ..."
+    mkdir -p "${RESULT_DIR}"
+fi
+
+# Build the docker container
+docker build . --rm -t unet_industrial:latest
+
+# start the container with nvidia-docker
+nvidia-docker run -it --rm \
+    --shm-size=2g --ulimit memlock=-1 --ulimit stack=67108864 \
+    -v ${DATASET_DIR}:/data/dagm2007/ \
+    -v ${RESULT_DIR}:/results \
+    unet_industrial:latest

+ 1 - 0
TensorFlow/Segmentation/UNet_Industrial/utils/__init__.py

@@ -24,5 +24,6 @@ from utils import hooks
 from utils import cmdline_helper
 from utils import hvd_utils
 from utils import image_processing
+from utils import logging
 from utils import losses
 from utils import metrics

+ 11 - 3
TensorFlow/Segmentation/UNet_Industrial/utils/cmdline_helper.py

@@ -95,6 +95,14 @@ def parse_cmdline():
         help="""Directory in which to write training logs, summaries and checkpoints."""
     )
 
+    p.add_argument(
+        '--log_dir',
+        type=str,
+        required=False,
+        default="dlloger_out.json",
+        help="""Directory in which to write logs."""
+    )
+
     _add_bool_argument(
         parser=p,
         name="save_eval_results_to_json",
@@ -151,11 +159,11 @@ def parse_cmdline():
         help="""Which initialisation method is used to randomly intialize the model during training"""
     )
 
-    p.add_argument('--learning_rate', default=1e-5, type=float, required=False, help="""Learning rate value.""")
+    p.add_argument('--learning_rate', default=1e-4, type=float, required=False, help="""Learning rate value.""")
 
     p.add_argument(
         '--learning_rate_decay_factor',
-        default=0.75,
+        default=0.8,
         type=float,
         required=False,
         help="""Decay factor to decrease the learning rate."""
@@ -173,7 +181,7 @@ def parse_cmdline():
 
     p.add_argument('--rmsprop_momentum', default=0.8, type=float, required=False, help="""RMSProp - Momentum value.""")
 
-    p.add_argument('--weight_decay', default=1e-4, type=float, required=False, help="""Weight Decay scale factor""")
+    p.add_argument('--weight_decay', default=1e-5, type=float, required=False, help="""Weight Decay scale factor""")
 
     _add_bool_argument(
         parser=p, name="use_auto_loss_scaling", default=False, required=False, help="Use AutoLossScaling with TF-AMP"

+ 137 - 22
TensorFlow/Segmentation/UNet_Industrial/utils/hooks/profiler_hook.py

@@ -27,7 +27,7 @@ import operator
 import numpy as np
 import tensorflow as tf
 
-from dllogger.logger import LOGGER
+import dllogger as Logger
 
 __all__ = ["ProfilerHook"]
 
@@ -60,15 +60,79 @@ class ProfilerHook(tf.train.SessionRunHook):
         ret[n:] = ret[n:] - ret[:-n]
         return ret[n - 1:] / n
 
-    def begin(self):
-        LOGGER.log_hardware()
-
     def after_create_session(self, session, coord):
 
         params_count = tf.get_default_graph().get_tensor_by_name("trainable_parameters_count_ref:0")
         _params_count = session.run(params_count)
 
-        LOGGER.log("# Total Trainable Parameters:", int(_params_count))
+        Logger._stage = "train" if self._is_training else "eval"
+
+        Logger.log(
+            step=(),
+            data={"# Total Trainable Parameters": int(_params_count)}, verbosity=Logger.Verbosity.DEFAULT
+        )
+
+        Logger.metadata(
+            metric="{prefix}.avg_ips".format(prefix=Logger._stage),
+            metadata={"unit": "imgs/s", "format": ":.3f", "GOAL": "MAXIMIZE", "STAGE": Logger._stage.upper()}
+        )
+
+        for ths in [0.05, 0.125, 0.25, 0.5, 0.75, 0.85, 0.95, 0.99]:
+            Logger.metadata(
+                metric="{prefix}.IoU_THS_{ths}".format(prefix=Logger._stage, ths=ths),
+                metadata={"format": ":.3f", "GOAL": "MAXIMIZE", "STAGE": Logger._stage.upper()}
+            )
+
+        if self._is_training:
+            Logger.metadata(
+                metric="{prefix}.learning_rate".format(prefix=Logger._stage),
+                metadata={"format": ":.3e", "GOAL": "NONE", "STAGE": Logger._stage.upper()}
+            )
+
+            Logger.metadata(
+                metric="{prefix}.weight_decay".format(prefix=Logger._stage),
+                metadata={"format": ":.3f", "GOAL": "MAXIMIZE", "STAGE": Logger._stage.upper()}
+            )
+
+            Logger.metadata(
+                metric="{prefix}.reconstruction_loss".format(prefix=Logger._stage),
+                metadata={"format": ":.3f", "GOAL": "MINIMIZE", "STAGE": Logger._stage.upper()}
+            )
+
+            Logger.metadata(
+                metric="{prefix}.total_loss".format(prefix=Logger._stage),
+                metadata={"format": ":.3f", "GOAL": "MINIMIZE", "STAGE": Logger._stage.upper()}
+            )
+
+        Logger.metadata(
+            metric="{prefix}.true_positives".format(prefix=Logger._stage),
+            metadata={"STAGE": Logger._stage.upper()}
+        )
+
+        Logger.metadata(
+            metric="{prefix}.true_negatives".format(prefix=Logger._stage),
+            metadata={"STAGE": Logger._stage.upper()}
+        )
+
+        Logger.metadata(
+            metric="{prefix}.false_positives".format(prefix=Logger._stage),
+            metadata={"STAGE": Logger._stage.upper()}
+        )
+
+        Logger.metadata(
+            metric="{prefix}.false_negatives".format(prefix=Logger._stage),
+            metadata={"STAGE": Logger._stage.upper()}
+        )
+
+        Logger.metadata(
+            metric="{prefix}.true_positive_rate".format(prefix=Logger._stage),
+            metadata={"STAGE": Logger._stage.upper()}
+        )
+
+        Logger.metadata(
+            metric="{prefix}.true_negative_rate".format(prefix=Logger._stage),
+            metadata={"STAGE": Logger._stage.upper()}
+        )
 
         self._start_training_time = time.time()
 
@@ -154,22 +218,64 @@ class ProfilerHook(tf.train.SessionRunHook):
             if self._current_step > self._warmup_steps:
                 imgs_per_sec = float(ProfilerHook.moving_average(self._processing_speed_arr, n=30)[-1])
 
-            LOGGER.log("iteration", int(self._current_step))
-            LOGGER.log("total_ips", float(imgs_per_sec))
+            Logger.log(
+                step=(self._current_step,),
+                data={"{prefix}.avg_ips".format(prefix=Logger._stage): float(imgs_per_sec)},
+                verbosity=Logger.Verbosity.DEFAULT
+            )
 
             if self._is_training:
-                LOGGER.log("weight_decay", float(run_values.results["weight_decay"]))
-                LOGGER.log("reconstruction_loss", float(run_values.results["reconstruction_loss"]))
-                LOGGER.log("total_loss", float(run_values.results["total_loss"]))
-                LOGGER.log("learning_rate", float(run_values.results["learning_rate"]))
+                Logger.log(
+                    step=(self._current_step,),
+                    data={"{prefix}.weight_decay".format(prefix=Logger._stage): float(run_values.results["weight_decay"])},
+                    verbosity=Logger.Verbosity.DEFAULT
+                )
+                Logger.log(
+                    step=(self._current_step,),
+                    data={"{prefix}.reconstruction_loss".format(prefix=Logger._stage): float(run_values.results["reconstruction_loss"])},
+                    verbosity=Logger.Verbosity.DEFAULT
+                )
+                Logger.log(
+                    step=(self._current_step,),
+                    data={"{prefix}.total_loss".format(prefix=Logger._stage): float(run_values.results["total_loss"])},
+                    verbosity=Logger.Verbosity.DEFAULT
+                )
+                Logger.log(
+                    step=(self._current_step,),
+                    data={"{prefix}.learning_rate".format(prefix=Logger._stage): float(run_values.results["learning_rate"])},
+                    verbosity=Logger.Verbosity.DEFAULT
+                )
 
             for key, val in sorted(run_values.results["iou_scores"].items(), key=operator.itemgetter(0)):
-                LOGGER.log("iou_score - THS %s" % key, float(val))
-
-            LOGGER.log("True Positives:", run_values.results["confusion_matrix"]["tp"])
-            LOGGER.log("True Negatives:", run_values.results["confusion_matrix"]["tn"])
-            LOGGER.log("False Positives:", run_values.results["confusion_matrix"]["fp"])
-            LOGGER.log("False Negatives:", run_values.results["confusion_matrix"]["fn"])
+                Logger.log(
+                    step=(self._current_step,),
+                    data={"{prefix}.IoU_THS_{ths}".format(prefix=Logger._stage, ths=key): float(val)},
+                    verbosity=Logger.Verbosity.DEFAULT
+                )
+
+            Logger.log(
+                step=(self._current_step,),
+                data={"{prefix}.true_positives".format(prefix=Logger._stage): str(run_values.results["confusion_matrix"]["tp"])},
+                verbosity=Logger.Verbosity.DEFAULT
+            )
+
+            Logger.log(
+                step=(self._current_step,),
+                data={"{prefix}.true_negatives".format(prefix=Logger._stage): str(run_values.results["confusion_matrix"]["tn"])},
+                verbosity=Logger.Verbosity.DEFAULT
+            )
+
+            Logger.log(
+                step=(self._current_step,),
+                data={"{prefix}.false_positives".format(prefix=Logger._stage): str(run_values.results["confusion_matrix"]["fp"])},
+                verbosity=Logger.Verbosity.DEFAULT
+            )
+
+            Logger.log(
+                step=(self._current_step,),
+                data={"{prefix}.false_negatives".format(prefix=Logger._stage): str(run_values.results["confusion_matrix"]["fn"])},
+                verbosity=Logger.Verbosity.DEFAULT
+            )
 
             if self._sample_dir is not None and self._is_training:
 
@@ -203,11 +309,20 @@ class ProfilerHook(tf.train.SessionRunHook):
         total_processing_hours, rem = divmod(total_processing_time, 3600)
         total_processing_minutes, total_processing_seconds = divmod(rem, 60)
 
-        LOGGER.log(
-            "Final Summary:\n"
-            "\t[*] Average Imgs/sec: %d\n"
-            "\t[*] Total Processing Time: %dh %02dm %02ds\n" %
-            (avg_processing_speed, total_processing_hours, total_processing_minutes, total_processing_seconds)
+        print("\n============== Final Summary ==============")
+        Logger.log(
+            step=(),
+            data={"{prefix}.avg_ips".format(prefix=Logger._stage): avg_processing_speed},
+            verbosity=Logger.Verbosity.DEFAULT
+        )
+        Logger.log(
+            step=(),
+            data={"{prefix} - Total Processing Time".format(prefix=Logger._stage.capitalize()): "%dh %02dm %02ds" % (
+                total_processing_hours,
+                total_processing_minutes,
+                total_processing_seconds
+            )},
+            verbosity=Logger.Verbosity.DEFAULT
         )
 
         perf_dict = {'throughput': str(avg_processing_speed), 'processing_time': str(total_processing_time)}

+ 1 - 6
TensorFlow/Segmentation/UNet_Industrial/utils/hvd_utils.py

@@ -25,9 +25,4 @@ __all__ = ["is_using_hvd"]
 
 
 def is_using_hvd():
-    env_vars = ["OMPI_COMM_WORLD_RANK", "OMPI_COMM_WORLD_SIZE"]
-
-    if all([var in os.environ for var in env_vars]):
-        return True
-    else:
-        return False
+    return True

+ 50 - 0
TensorFlow/Segmentation/UNet_Industrial/utils/logging.py

@@ -0,0 +1,50 @@
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# ==============================================================================
+#
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ==============================================================================
+
+import dllogger as Logger
+
+
+def format_step(step):
+    if isinstance(step, str):
+        return step
+
+    if isinstance(step, int):
+        return "Iteration: {} ".format(step)
+
+    s = ""
+
+    if len(step) > 0:
+        s += "Epoch: {} ".format(step[0])
+
+    if len(step) > 1:
+        s += "Iteration: {} ".format(step[1])
+
+    if len(step) > 2:
+        s += "Validation Iteration: {} ".format(step[2])
+
+    return s
+
+
+def init_dllogger(log_dir):
+    Logger.init([
+        Logger.StdOutBackend(Logger.Verbosity.DEFAULT, step_format=format_step),
+        Logger.JSONStreamBackend(Logger.Verbosity.VERBOSE, log_dir)
+    ])