Browse Source

[Jasper/PyT] Triton update

kkudrynski 5 years ago
parent
commit
557f4d01ea
22 changed files with 259 additions and 52 deletions
  1. 2 0
      PyTorch/SpeechRecognition/Jasper/.dockerignore
  2. 4 4
      PyTorch/SpeechRecognition/Jasper/.gitmodules
  3. 95 0
      PyTorch/SpeechRecognition/Jasper/external/Dockerfile.client.patched
  4. 1 0
      PyTorch/SpeechRecognition/Jasper/external/triton-inference-server
  5. 4 0
      PyTorch/SpeechRecognition/Jasper/inference.py
  6. 3 3
      PyTorch/SpeechRecognition/Jasper/model.py
  7. 5 5
      PyTorch/SpeechRecognition/Jasper/optimizers.py
  8. 3 3
      PyTorch/SpeechRecognition/Jasper/scripts/train.sh
  9. 5 3
      PyTorch/SpeechRecognition/Jasper/tensorrt/Dockerfile
  10. 2 2
      PyTorch/SpeechRecognition/Jasper/tensorrt/scripts/docker/build.sh
  11. 1 1
      PyTorch/SpeechRecognition/Jasper/tensorrt/scripts/trt_inference_benchmark.sh
  12. 29 13
      PyTorch/SpeechRecognition/Jasper/triton/Dockerfile
  13. 3 2
      PyTorch/SpeechRecognition/Jasper/triton/scripts/docker/build.sh
  14. 2 2
      PyTorch/SpeechRecognition/Jasper/triton/scripts/docker/launch.sh
  15. 2 2
      PyTorch/SpeechRecognition/Jasper/triton/scripts/execute_all_perf_runs.sh
  16. 3 3
      PyTorch/SpeechRecognition/Jasper/triton/scripts/export_model.sh
  17. 3 3
      PyTorch/SpeechRecognition/Jasper/triton/scripts/export_model_helper.sh
  18. 3 3
      PyTorch/SpeechRecognition/Jasper/triton/scripts/run_client.sh
  19. 2 2
      PyTorch/SpeechRecognition/Jasper/triton/scripts/run_perf_client.sh
  20. 1 1
      PyTorch/SpeechRecognition/Jasper/triton/scripts/run_server.sh
  21. 43 0
      TensorFlow/Translation/GNMT/qa/L1_joc_GNMT_inferbench_fp16.sh
  22. 43 0
      TensorFlow/Translation/GNMT/qa/L1_joc_GNMT_inferbench_fp32.sh

+ 2 - 0
PyTorch/SpeechRecognition/Jasper/.dockerignore

@@ -5,3 +5,5 @@ checkpoints/
 datasets/
 external/tensorrt-inference-server/
 checkpoints/
+triton/model_repo
+triton/deploy

+ 4 - 4
PyTorch/SpeechRecognition/Jasper/.gitmodules

@@ -1,4 +1,4 @@
-[submodule "external/tensorrt-inference-server"]
-	path = external/tensorrt-inference-server
-	url = https://github.com/NVIDIA/tensorrt-inference-server.git
-	branch = r19.06
+[submodule "external/triton-inference-server"]
+	path = external/triton-inference-server
+	url = https://github.com/NVIDIA/triton-inference-server
+	branch = r19.12

+ 95 - 0
PyTorch/SpeechRecognition/Jasper/external/Dockerfile.client.patched

@@ -0,0 +1,95 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Default setting is building on nvidia/cuda:10.1-devel-ubuntu18.04
+ARG BASE_IMAGE=nvidia/cuda:10.1-devel-ubuntu18.04
+
+FROM ${BASE_IMAGE}
+
+# Default to use Python3. Allowed values are "2" and "3".
+ARG PYVER=3
+
+# Ensure apt-get won't prompt for selecting options
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYVER=$PYVER
+
+RUN PYSFX=`[ "$PYVER" != "2" ] && echo "$PYVER" || echo ""` && \
+    apt-get update && \
+    apt-get install -y --no-install-recommends \
+            software-properties-common \
+            autoconf \
+            automake \
+            build-essential \
+            cmake \
+            curl \
+            git \
+            libopencv-dev \
+            libopencv-core-dev \
+            libssl-dev \
+            libtool \
+            pkg-config \
+            python${PYSFX} \
+            python${PYSFX}-pip \
+            python${PYSFX}-dev && \
+    pip${PYSFX} install --upgrade setuptools wheel
+
+RUN PYSFX=`[ "$PYVER" != "2" ] && echo "$PYVER" || echo ""` && \
+    pip${PYSFX} install --upgrade grpcio-tools
+
+# Build expects "python" executable (not python3).
+RUN rm -f /usr/bin/python && \
+    ln -s /usr/bin/python$PYVER /usr/bin/python
+
+# Build the client library and examples
+WORKDIR /workspace
+COPY VERSION .
+COPY build build
+COPY src/clients src/clients
+COPY src/core src/core
+
+RUN cd build && \
+    cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX:PATH=/workspace/install && \
+    make -j16 trtis-clients
+RUN cd install && \
+    export VERSION=`cat /workspace/VERSION` && \
+    tar zcf /workspace/v$VERSION.clients.tar.gz *
+
+# For CI testing need to install a test script.
+COPY qa/L0_client_tar/test.sh /tmp/test.sh
+
+# Install an image needed by the quickstart and other documentation.
+COPY qa/images/mug.jpg images/mug.jpg
+
+# Install the dependencies needed to run the client examples. These
+# are not needed for building but including them allows this image to
+# be used to run the client examples. The special upgrade and handling
+# of pip is needed to get numpy to install correctly with python2 on
+# ubuntu 16.04.
+RUN python -m pip install --user --upgrade pip && \
+    python -m pip install --upgrade install/python/tensorrtserver-*.whl numpy pillow
+
+ENV PATH //workspace/install/bin:${PATH}
+ENV LD_LIBRARY_PATH /workspace/install/lib:${LD_LIBRARY_PATH}

+ 1 - 0
PyTorch/SpeechRecognition/Jasper/external/triton-inference-server

@@ -0,0 +1 @@
+Subproject commit a1f3860ba65c0fd8f2be3adfcab2673efd039348

+ 4 - 0
PyTorch/SpeechRecognition/Jasper/inference.py

@@ -56,6 +56,10 @@ def parse_args():
     parser.add_argument("--wav", type=str, help='absolute path to .wav file (16KHz)')
     parser.add_argument("--cpu", action="store_true", help="Run inference on CPU")
     parser.add_argument("--ema", action="store_true", help="If available, load EMA model weights")
+
+    # FIXME Unused, but passed by Triton helper scripts
+    parser.add_argument("--pyt_fp16", action='store_true', help='use half precision')
+
     return parser.parse_args()
 
 def calc_wer(data_layer, audio_processor,

+ 3 - 3
PyTorch/SpeechRecognition/Jasper/model.py

@@ -100,7 +100,7 @@ class SpecAugment(nn.Module):
     def forward(self, x):
         sh = x.shape
 
-        mask = torch.zeros(x.shape).byte()
+        mask = torch.zeros(x.shape, dtype=torch.bool)
         for idx in range(sh[0]):
             for _ in range(self.cutout_x_regions):
                 cutout_x_left = int(random.uniform(0, sh[1] - self.cutout_x_width))
@@ -130,7 +130,7 @@ class SpecCutoutRegions(nn.Module):
     def forward(self, x):
         sh = x.shape
 
-        mask = torch.zeros(x.shape, dtype=torch.uint8)
+        mask = torch.zeros(x.shape, dtype=torch.bool)
 
         for idx in range(sh[0]):
             for i in range(self.cutout_rect_regions):
@@ -275,7 +275,7 @@ class MaskedConv1d(nn.Conv1d):
 
     def get_seq_len(self, lens):
         return ((lens + 2 * self.padding[0] - self.dilation[0] * (
-            self.kernel_size[0] - 1) - 1) / self.stride[0] + 1)
+            self.kernel_size[0] - 1) - 1) // self.stride[0] + 1)
 
     def forward(self, inp):
         if self.use_conv_mask:

+ 5 - 5
PyTorch/SpeechRecognition/Jasper/optimizers.py

@@ -98,7 +98,7 @@ class AdamW(Optimizer):
   
                 state['step'] += 1
                 # Decay the first and second moment running average coefficient
-                exp_avg.mul_(beta1).add_(1 - beta1, grad)
+                exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
                 exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
                 if amsgrad:
                     # Maintains the maximum of all 2nd moment running avg. till now
@@ -111,7 +111,7 @@ class AdamW(Optimizer):
                 bias_correction1 = 1 - beta1 ** state['step']
                 bias_correction2 = 1 - beta2 ** state['step']
                 step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
-                p.data.add_(-step_size,  torch.mul(p.data, group['weight_decay']).addcdiv_(1, exp_avg, denom) )
+                p.data.add_(torch.mul(p.data, group['weight_decay']).addcdiv_(1, exp_avg, denom), alpha=-step_size)
   
         return loss
   
@@ -201,7 +201,7 @@ class Novograd(Optimizer):
                 if exp_avg_sq == 0:
                     exp_avg_sq.copy_(norm)
                 else:
-                    exp_avg_sq.mul_(beta2).add_(1 - beta2, norm)
+                    exp_avg_sq.mul_(beta2).add_(norm, alpha=1 - beta2)
 
                 if amsgrad:
                     # Maintains the maximum of all 2nd moment running avg. till now
@@ -213,11 +213,11 @@ class Novograd(Optimizer):
 
                 grad.div_(denom)
                 if group['weight_decay'] != 0:
-                    grad.add_(group['weight_decay'], p.data)
+                    grad.add_(p.data, alpha=group['weight_decay'])
                 if group['grad_averaging']:
                     grad.mul_(1 - beta1)
                 exp_avg.mul_(beta1).add_(grad)
 
-                p.data.add_(-group['lr'], exp_avg)
+                p.data.add_(exp_avg, alpha=-group['lr'])
         
         return loss

+ 3 - 3
PyTorch/SpeechRecognition/Jasper/scripts/train.sh

@@ -47,9 +47,9 @@ CMD+=" --seed=$SEED"
 CMD+=" --optimizer=novograd"
 CMD+=" --dataset_dir=$DATA_DIR"
 CMD+=" --val_manifest=$DATA_DIR/librispeech-dev-clean-wav.json"
-CMD+=" --train_manifest=$DATA_DIR/librispeech-train-clean-100-wav.json,"
-CMD+="$DATA_DIR/librispeech-train-clean-360-wav.json,"
-CMD+="$DATA_DIR/librispeech-train-other-500-wav.json"
+CMD+=" --train_manifest=$DATA_DIR/librispeech-train-clean-100-wav.json"
+CMD+=",$DATA_DIR/librispeech-train-clean-360-wav.json"
+CMD+=",$DATA_DIR/librispeech-train-other-500-wav.json"
 CMD+=" --weight_decay=1e-3"
 CMD+=" --save_freq=$SAVE_FREQUENCY"
 CMD+=" --eval_freq=100"

+ 5 - 3
PyTorch/SpeechRecognition/Jasper/tensorrt/Dockerfile

@@ -1,8 +1,10 @@
-ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:20.03-py3
+ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:19.10-py3
 FROM ${FROM_IMAGE_NAME}
 
+RUN apt-get update && apt-get install -y python3
+
 WORKDIR /tmp/onnx-trt
-COPY trt/onnx-trt.patch .
+COPY tensorrt/onnx-trt.patch .
 RUN git clone https://github.com/onnx/onnx-tensorrt.git && cd onnx-tensorrt && git checkout 8716c9b && git submodule update --init --recursive && \
     patch -f < ../onnx-trt.patch && mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DGPU_ARCHS="60 70 75" && make -j16 && make install && mv -f /usr/lib/libnvonnx* /usr/lib/x86_64-linux-gnu/ && ldconfig
 
@@ -11,7 +13,7 @@ RUN git clone https://github.com/onnx/onnx-tensorrt.git && cd onnx-tensorrt && g
 # At the same step, also install TRT pip reqs
 WORKDIR /tmp/pipReqs
 COPY requirements.txt /tmp/pipReqs/jocRequirements.txt
-COPY trt/requirements.txt /tmp/pipReqs/trtRequirements.txt
+COPY tensorrt/requirements.txt /tmp/pipReqs/trtRequirements.txt
 RUN pip install --disable-pip-version-check -U -r jocRequirements.txt -r trtRequirements.txt
 
 

+ 2 - 2
PyTorch/SpeechRecognition/Jasper/tensorrt/scripts/docker/build.sh

@@ -1,5 +1,5 @@
 #!/bin/bash
 
 # Constructs a docker image containing dependencies for execution of JASPER through TRT
-echo "docker build . -f ./trt/Dockerfile -t jasper:trt6"
-docker build . -f ./trt/Dockerfile -t jasper:trt6
+echo "docker build . -f ./tensorrt/Dockerfile -t jasper:trt6"
+docker build . -f ./tensorrt/Dockerfile -t jasper:trt6

+ 1 - 1
PyTorch/SpeechRecognition/Jasper/tensorrt/scripts/trt_inference_benchmark.sh

@@ -130,7 +130,7 @@ else
    PYT_PREDICTION_PATH=" --pyt_prediction_path=${PYT_PREDICTION_PATH}"
 fi
 
-CMD="python trt/perf.py"
+CMD="python tensorrt/perf.py"
 CMD+=" --batch_size $BATCH_SIZE"
 CMD+=" --engine_batch_size $BATCH_SIZE"
 CMD+=" --model_toml configs/jasper10x5dr_nomask.toml"

+ 29 - 13
PyTorch/SpeechRecognition/Jasper/triton/Dockerfile

@@ -1,22 +1,38 @@
-ARG FROM_IMAGE_NAME=nvcr.io/nvidian/pytorch:20.03-py3
-ARG TRITON_BASE_IMAGE=nvcr.io/nvidia/tritonserver:20.03.1-py3-clientsdk
-FROM ${TRITON_BASE_IMAGE} as triton
+ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:19.09-py3
+
+FROM tensorrtserver_client as trtis-client
 FROM ${FROM_IMAGE_NAME}
+RUN apt-get update && apt-get install -y python3
+ARG version=6.0.1-1+cuda10.1
+RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-repo-ubuntu1804_10.1.243-1_amd64.deb \
+&& dpkg -i cuda-repo-*.deb \
+&& wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb \
+&& dpkg -i nvidia-machine-learning-repo-*.deb \
+&& apt-get update \
+&& apt-get install -y --no-install-recommends libnvinfer6=${version} libnvonnxparsers6=${version} libnvparsers6=${version} libnvinfer-plugin6=${version} libnvinfer-dev=${version} libnvonnxparsers-dev=${version} libnvparsers-dev=${version} libnvinfer-plugin-dev=${version} python-libnvinfer=${version} python3-libnvinfer=${version}
+RUN cp -r /usr/lib/python3.6/dist-packages/tensorrt /opt/conda/lib/python3.6/site-packages/tensorrt
 
-ADD requirements.txt .
-RUN pip install -r requirements.txt
-RUN pip install onnxruntime
 
-ADD triton/requirements.txt .
-RUN pip install -r requirements.txt
+ENV PATH=$PATH:/usr/src/tensorrt/bin
+WORKDIR /tmp/onnx-trt
+COPY tensorrt/onnx-trt.patch .
+RUN git clone https://github.com/onnx/onnx-tensorrt.git && cd onnx-tensorrt && git checkout  b677b9cbf19af803fa6f76d05ce558e657e4d8b6  && git submodule update --init --recursive && \
+    patch -f < ../onnx-trt.patch && mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DGPU_ARCHS="60 70 75" && make -j16 && make install && mv -f /usr/lib/libnvonnx* /usr/lib/x86_64-linux-gnu/ && ldconfig
 
-ADD tensorrt/requirements.txt .
-RUN pip install -r requirements.txt
 
-COPY --from=triton /opt/tritonserver/qa/pkgs/tensorrtserver-1.13.0-py3-none-linux_x86_64.whl ./tensorrtserver-1.13.0-py3-none-linux_x86_64.whl
+# Here's a good place to install pip reqs from JoC repo.
+# At the same step, also install TRT pip reqs
+WORKDIR /tmp/pipReqs
+COPY requirements.txt /tmp/pipReqs/pytRequirements.txt
+COPY tensorrt/requirements.txt /tmp/pipReqs/trtRequirements.txt
+COPY triton/requirements.txt /tmp/pipReqs/trtisRequirements.txt
+RUN apt-get update && apt-get install -y --no-install-recommends portaudio19-dev && pip install -r pytRequirements.txt && pip install -r trtRequirements.txt && pip install -r trtisRequirements.txt
 
-RUN pip install tensorrtserver-1.13.0-py3-none-linux_x86_64.whl
+#Copy the perf_client over
+COPY --from=trtis-client /workspace/install/bin/perf_client /workspace/install/bin/perf_client
+#Copy the python wheel and install with pip
+COPY --from=trtis-client /workspace/install/python/tensorrtserver*.whl /tmp/
+RUN pip install /tmp/tensorrtserver*.whl && rm /tmp/tensorrtserver*.whl
 
 WORKDIR /workspace/jasper
 COPY . .
-RUN pip install --no-cache-dir -e .

+ 3 - 2
PyTorch/SpeechRecognition/Jasper/triton/scripts/docker/build.sh

@@ -4,5 +4,6 @@ SCRIPT_DIR=$(cd $(dirname $0); pwd)
 PROJECT_DIR=${SCRIPT_DIR}/../../../
 docker pull nvcr.io/nvidia/tensorrtserver:19.09-py3
 git submodule update --init --recursive
-docker build -t tensorrtserver_client -f ${PROJECT_DIR}/external/triton-inference-server/Dockerfile.client ${PROJECT_DIR}/external/triton-inference-server
-docker build . --rm -f ${PROJECT_DIR}/trtis/Dockerfile -t jasper:trtis
+docker build -t tensorrtserver_client  \
+             -f ${PROJECT_DIR}/external/Dockerfile.client.patched ${PROJECT_DIR}/external/triton-inference-server
+docker build . --rm -f ${PROJECT_DIR}/triton/Dockerfile -t jasper:triton

+ 2 - 2
PyTorch/SpeechRecognition/Jasper/triton/scripts/docker/launch.sh

@@ -29,11 +29,11 @@ fi
 
 echo $MOUNTS
 docker run -it --rm \
-  --runtime=nvidia \
+  --gpus=all \
   --shm-size=4g \
   --ulimit memlock=-1 \
   --ulimit stack=67108864 \
   ${MOUNTS} \
   -v ${JASPER_REPO}:/jasper \
   ${EXTRA_JASPER_ENV} \
-  jasper:trtis bash $PROGRAM_PATH
+  jasper:triton bash $PROGRAM_PATH

+ 2 - 2
PyTorch/SpeechRecognition/Jasper/triton/scripts/execute_all_perf_runs.sh

@@ -44,7 +44,7 @@ export GPU=${GPU:-}
 
 SCRIPT_DIR=$(cd $(dirname $0); pwd)
 PROJECT_DIR=${SCRIPT_DIR}/../..
-MODEL_REPO=${MODEL_REPO:-"${PROJECT_DIR}/trtis/model_repo"}
+MODEL_REPO=${MODEL_REPO:-"${PROJECT_DIR}/triton/model_repo"}
 
 # We need to make sure TRTIS uses only one GPU, same as export does
 # for TRTIS
@@ -78,7 +78,7 @@ do
   
     if [ "${REGENERATE_ENGINES}" == "yes" ]; then
         ARCH=${ARCH} CHECKPOINT_DIR=${CHECKPOINT_DIR} CHECKPOINT=${CHECKPOINT} PRECISION=${PRECISION} MAX_SEQUENCE_LENGTH_FOR_ENGINE=${MAX_SEQUENCE_LENGTH_FOR_ENGINE} \
-        ${PROJECT_DIR}/trtis/scripts/export_model.sh || exit 1
+        ${PROJECT_DIR}/triton/scripts/export_model.sh || exit 1
     fi
   
     for BATCH_SIZE in 1 2 4 8 16 32 64;

+ 3 - 3
PyTorch/SpeechRecognition/Jasper/triton/scripts/export_model.sh

@@ -25,12 +25,12 @@ GPU=${GPU:-0}
 SCRIPT_DIR=$(cd $(dirname $0); pwd)
 PROJECT_DIR=${SCRIPT_DIR}/../..
 if [ -f /.dockerenv ]; then # inside docker
-	CUDA_VISIBLE_DEVICES=${GPU} CHECKPOINT=${CHECKPOINT} CHECKPOINT_DIR=${CHECKPOINT_DIR} PRECISION=${PRECISION} ARCH=${ARCH} MAX_SEQUENCE_LENGTH_FOR_ENGINE=${MAX_SEQUENCE_LENGTH_FOR_ENGINE} ${PROJECT_DIR}/trtis/scripts/export_model_helper.sh || exit 1
+	CUDA_VISIBLE_DEVICES=${GPU} CHECKPOINT=${CHECKPOINT} CHECKPOINT_DIR=${CHECKPOINT_DIR} PRECISION=${PRECISION} ARCH=${ARCH} MAX_SEQUENCE_LENGTH_FOR_ENGINE=${MAX_SEQUENCE_LENGTH_FOR_ENGINE} ${PROJECT_DIR}/triton/scripts/export_model_helper.sh || exit 1
 else
 	set -x
-	PROGRAM_PATH="/jasper/trtis/scripts/export_model_helper.sh"  \
+	PROGRAM_PATH="/jasper/triton/scripts/export_model_helper.sh"  \
 	EXTRA_JASPER_ENV="-e PRECISION=${PRECISION} -e CHECKPOINT=${CHECKPOINT} -e CHECKPOINT_DIR=/checkpoints -e ARCH=${ARCH} -e MAX_SEQUENCE_LENGTH_FOR_ENGINE=${MAX_SEQUENCE_LENGTH_FOR_ENGINE} -e CUDA_VISIBLE_DEVICES=${GPU}" \
 	CHECKPOINT_DIR=${CHECKPOINT_DIR} DATA_DIR= RESULT_DIR= \
-	${PROJECT_DIR}/trtis/scripts/docker/launch.sh || exit 1
+	${PROJECT_DIR}/triton/scripts/docker/launch.sh || exit 1
 	set +x
 fi

+ 3 - 3
PyTorch/SpeechRecognition/Jasper/triton/scripts/export_model_helper.sh

@@ -66,13 +66,13 @@ echo "export_model.sh: Exporting TRT engine, CUDA ARCH = ${ARCH} ... "
 PREC_FLAGS=""
 if [ "$PRECISION" == "fp16" ]
 then
-	PREC_FLAGS="--trt_fp16"
+ 	PREC_FLAGS="--trt_fp16"
 fi
 
 # remove targtes first
 rm -f ${MODEL_REPO}/jasper-trt/1/jasper_${ARCH}.plan ${MODEL_REPO}/jasper-onnx/1/jasper.onnx
 
-python  ${JASPER_REPO}/trt/perf.py \
+python  ${JASPER_REPO}/tensorrt/perf.py \
 	--ckpt_path ${CHECKPOINT_DIR}/${CHECKPOINT} \
 	--wav=${JASPER_REPO}/notebooks/example1.wav \
 	--model_toml=${JASPER_REPO}/configs/${MODEL_CONFIG} \
@@ -85,7 +85,7 @@ if [ "$PRECISION" == "fp16" ]
 then
 	PREC_FLAGS="--trt_fp16 --pyt_fp16"
 fi
-python  ${JASPER_REPO}/trt/perf.py \
+python  ${JASPER_REPO}/tensorrt/perf.py \
 	--ckpt_path ${CHECKPOINT_DIR}/${CHECKPOINT} \
 	--wav=${JASPER_REPO}/notebooks/example1.wav \
 	--model_toml=${JASPER_REPO}/configs/${MODEL_CONFIG} \

+ 3 - 3
PyTorch/SpeechRecognition/Jasper/triton/scripts/run_client.sh

@@ -26,13 +26,13 @@ FILE=${3} # json manifest file, OR single wav file
 JASPER_CONTAINER_TAG=${JASPER_CONTAINER_TAG:-jasper:trtis}
 
 if [ "$#" -ge 1 ] && [ "${FILE: -4}" == ".wav" ]; then 
-  CMD="python /jasper/trtis/jasper-client.py --data_dir /data --audio_filename ${FILE} --model_platform ${MODEL_TYPE}"
+  CMD="python /jasper/triton/jasper-client.py --data_dir /data --audio_filename ${FILE} --model_platform ${MODEL_TYPE}"
   ARGS=""
   ARGS="$ARGS -v $DATA_DIR:/data"
 elif [ "$#" -ge 1 ] && [ "${FILE: -4}" == "json" ]; then
   ARGS=""
   ARGS="$ARGS -v $DATA_DIR:/data"
-  CMD="python /jasper/trtis/jasper-client.py --manifest_filename ${FILE} --model_platform ${MODEL_TYPE} --data_dir /data"
+  CMD="python /jasper/triton/jasper-client.py --manifest_filename ${FILE} --model_platform ${MODEL_TYPE} --data_dir /data"
 else
   ARGS="-it"
   CMD=""
@@ -49,4 +49,4 @@ nvidia-docker run --rm -it \
    -v ${PROJECT_DIR}:/jasper \
    --name=trtis-client \
    ${ARGS} ${JASPER_CONTAINER_TAG} ${CMD}
-set +x
+set +x

+ 2 - 2
PyTorch/SpeechRecognition/Jasper/triton/scripts/run_perf_client.sh

@@ -67,13 +67,13 @@ ARGS="\
 curl -s "http://${SERVER_HOSTNAME}:8000/api/status/${MODEL_NAME}" | grep ready_state | grep SERVER_READY || (echo "Model ${MODEL_NAME} is not ready, perf_client skipped..." && exit 1)
 
 echo "=== STARTING: perf client ${ARGS} --concurrency-range 1:4:1 ==="
-docker run  -e DISPLAY=${DISPLAY}  --runtime nvidia --rm \
+docker run  -e DISPLAY=${DISPLAY}  --gpus all --rm \
 	      --privileged --net=host \
 	      -v ${RESULT_DIR_H}:/results --name jasper-perf-client \
 	      ${TRTIS_CLIENT_CONTAINER_TAG}  perf_client $ARGS -f /results/${OUTPUT_FILE_CSV}_p1 --concurrency-range 1:4:1 2>&1 | tee -a $LOGNAME
 
 echo "=== STARTING: perf client ${ARGS} --concurrency-range 8:${MAX_CONCURRENCY}:8 ==="
-docker run  -e DISPLAY=${DISPLAY}  --runtime nvidia --rm \
+docker run  -e DISPLAY=${DISPLAY}  --gpus all --rm \
 	      --privileged --net=host \
 	      -v ${RESULT_DIR_H}:/results --name jasper-perf-client \
 	      ${TRTIS_CLIENT_CONTAINER_TAG}  perf_client $ARGS -f /results/${OUTPUT_FILE_CSV}_p2 --concurrency-range 8:${MAX_CONCURRENCY}:8 2>&1 | tee -a $LOGNAME

+ 1 - 1
PyTorch/SpeechRecognition/Jasper/triton/scripts/run_server.sh

@@ -48,7 +48,7 @@ RM=${RM:-"--rm"}
 
 set -x
 docker run -p 8000:8000 -p 8001:8001 -p 8002:8002 \
-       --runtime nvidia \
+       --gpus all \
        -e NVIDIA_VISIBLE_DEVICES=${NV_VISIBLE_DEVICES} \
        -v ${MODELS_DIR}:/models \
        -v ${TRTIS_DIR}/model_repo:/model_repo \

+ 43 - 0
TensorFlow/Translation/GNMT/qa/L1_joc_GNMT_inferbench_fp16.sh

@@ -0,0 +1,43 @@
+set -o nounset
+set -o errexit
+set -o pipefail
+
+cd ..
+cp -r /data/joc/gnmt_tf/19.08 output_dir
+
+# hack to work with pytorch dataset
+sed -ie 's/    src_vocab_file = hparams.vocab_prefix + "." + hparams.src/    src_vocab_file = hparams.vocab_prefix/g' nmt.py
+sed -ie 's/    tgt_vocab_file = hparams.vocab_prefix + "." + hparams.tgt/    tgt_vocab_file = hparams.vocab_prefix/g' nmt.py
+
+( python nmt.py --amp --data_dir=/data/pytorch/wmt16_de_en --output_dir=output_dir --mode=infer --infer_batch_size=512 2>&1 ) | tee log.log
+python scripts/parse_log.py log.log | tee log.json
+
+python << END
+import json
+import numpy as np
+from pathlib import Path
+
+baseline = 10254
+bleu_baseline = 25.1
+
+log = json.loads(Path('log.json').read_text())
+speed = np.mean(log['eval_tokens_per_sec'])
+bleu = log['bleu'][0]
+
+print('Eval speed    :', speed)
+print('Baseline      :', baseline)
+
+print('Bleu          :', bleu)
+print('Bleu baseline :', bleu_baseline)
+
+if speed < baseline * 0.9:
+    print("FAILED: speed ({}) doesn't match the baseline ({})".format(speed, baseline))
+    exit(1)
+
+if bleu < bleu_baseline - 0.2:
+    print("FAILED: bleu ({}) doesn't match the baseline ({})".format(bleu, bleu_baseline))
+    exit(1)
+
+print('SUCCESS')
+END
+

+ 43 - 0
TensorFlow/Translation/GNMT/qa/L1_joc_GNMT_inferbench_fp32.sh

@@ -0,0 +1,43 @@
+set -o nounset
+set -o errexit
+set -o pipefail
+
+cd .. 
+cp -r /data/joc/gnmt_tf/19.08 output_dir
+
+# hack to work with pytorch dataset
+sed -ie 's/    src_vocab_file = hparams.vocab_prefix + "." + hparams.src/    src_vocab_file = hparams.vocab_prefix/g' nmt.py
+sed -ie 's/    tgt_vocab_file = hparams.vocab_prefix + "." + hparams.tgt/    tgt_vocab_file = hparams.vocab_prefix/g' nmt.py
+
+( python nmt.py --data_dir=/data/pytorch/wmt16_de_en --output_dir=output_dir --mode=infer --infer_batch_size=512 2>&1 ) | tee log.log
+python scripts/parse_log.py log.log | tee log.json
+
+python << END
+import json
+import numpy as np
+from pathlib import Path
+
+baseline = 5374
+bleu_baseline = 25.1
+
+log = json.loads(Path('log.json').read_text())
+speed = np.mean(log['eval_tokens_per_sec'])
+bleu = log['bleu'][0]
+
+print('Eval speed    :', speed)
+print('Baseline      :', baseline)
+
+print('Bleu          :', bleu)
+print('Bleu baseline :', bleu_baseline)
+
+if speed < baseline * 0.9:
+    print("FAILED: speed ({}) doesn't match the baseline ({})".format(speed, baseline))
+    exit(1)
+
+if bleu < bleu_baseline - 0.2:
+    print("FAILED: bleu ({}) doesn't match the baseline ({})".format(bleu, bleu_baseline))
+    exit(1)
+
+print('SUCCESS')
+END
+