5 years ago · 557f4d01ea
--- a/PyTorch/SpeechRecognition/Jasper/.dockerignore
+++ b/PyTorch/SpeechRecognition/Jasper/.dockerignore
@@ -5,3 +5,5 @@ checkpoints/
 
				 datasets/
			
 
				 external/tensorrt-inference-server/
			
 
				 checkpoints/
			
 
				+triton/model_repo
			
 
				+triton/deploy
			
--- a/PyTorch/SpeechRecognition/Jasper/.gitmodules
+++ b/PyTorch/SpeechRecognition/Jasper/.gitmodules
@@ -1,4 +1,4 @@
 
				-[submodule "external/tensorrt-inference-server"]
			
 
				-	path = external/tensorrt-inference-server
			
 
				-	url = https://github.com/NVIDIA/tensorrt-inference-server.git
			
 
				-	branch = r19.06
			
 
				+[submodule "external/triton-inference-server"]
			
 
				+	path = external/triton-inference-server
			
 
				+	url = https://github.com/NVIDIA/triton-inference-server
			
 
				+	branch = r19.12
			
--- a/PyTorch/SpeechRecognition/Jasper/external/Dockerfile.client.patched
+++ b/PyTorch/SpeechRecognition/Jasper/external/Dockerfile.client.patched
@@ -0,0 +1,95 @@
 
				+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
			
 
				+#
			
 
				+# Redistribution and use in source and binary forms, with or without
			
 
				+# modification, are permitted provided that the following conditions
			
 
				+# are met:
			
 
				+#  * Redistributions of source code must retain the above copyright
			
 
				+#    notice, this list of conditions and the following disclaimer.
			
 
				+#  * Redistributions in binary form must reproduce the above copyright
			
 
				+#    notice, this list of conditions and the following disclaimer in the
			
 
				+#    documentation and/or other materials provided with the distribution.
			
 
				+#  * Neither the name of NVIDIA CORPORATION nor the names of its
			
 
				+#    contributors may be used to endorse or promote products derived
			
 
				+#    from this software without specific prior written permission.
			
 
				+#
			
 
				+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
			
 
				+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
			
 
				+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
			
 
				+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
			
 
				+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
			
 
				+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
			
 
				+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
			
 
				+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
			
 
				+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+
			
 
				+# Default setting is building on nvidia/cuda:10.1-devel-ubuntu18.04
			
 
				+ARG BASE_IMAGE=nvidia/cuda:10.1-devel-ubuntu18.04
			
 
				+
			
 
				+FROM ${BASE_IMAGE}
			
 
				+
			
 
				+# Default to use Python3. Allowed values are "2" and "3".
			
 
				+ARG PYVER=3
			
 
				+
			
 
				+# Ensure apt-get won't prompt for selecting options
			
 
				+ENV DEBIAN_FRONTEND=noninteractive
			
 
				+ENV PYVER=$PYVER
			
 
				+
			
 
				+RUN PYSFX=`[ "$PYVER" != "2" ] && echo "$PYVER" || echo ""` && \
			
 
				+    apt-get update && \
			
 
				+    apt-get install -y --no-install-recommends \
			
 
				+            software-properties-common \
			
 
				+            autoconf \
			
 
				+            automake \
			
 
				+            build-essential \
			
 
				+            cmake \
			
 
				+            curl \
			
 
				+            git \
			
 
				+            libopencv-dev \
			
 
				+            libopencv-core-dev \
			
 
				+            libssl-dev \
			
 
				+            libtool \
			
 
				+            pkg-config \
			
 
				+            python${PYSFX} \
			
 
				+            python${PYSFX}-pip \
			
 
				+            python${PYSFX}-dev && \
			
 
				+    pip${PYSFX} install --upgrade setuptools wheel
			
 
				+
			
 
				+RUN PYSFX=`[ "$PYVER" != "2" ] && echo "$PYVER" || echo ""` && \
			
 
				+    pip${PYSFX} install --upgrade grpcio-tools
			
 
				+
			
 
				+# Build expects "python" executable (not python3).
			
 
				+RUN rm -f /usr/bin/python && \
			
 
				+    ln -s /usr/bin/python$PYVER /usr/bin/python
			
 
				+
			
 
				+# Build the client library and examples
			
 
				+WORKDIR /workspace
			
 
				+COPY VERSION .
			
 
				+COPY build build
			
 
				+COPY src/clients src/clients
			
 
				+COPY src/core src/core
			
 
				+
			
 
				+RUN cd build && \
			
 
				+    cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX:PATH=/workspace/install && \
			
 
				+    make -j16 trtis-clients
			
 
				+RUN cd install && \
			
 
				+    export VERSION=`cat /workspace/VERSION` && \
			
 
				+    tar zcf /workspace/v$VERSION.clients.tar.gz *
			
 
				+
			
 
				+# For CI testing need to install a test script.
			
 
				+COPY qa/L0_client_tar/test.sh /tmp/test.sh
			
 
				+
			
 
				+# Install an image needed by the quickstart and other documentation.
			
 
				+COPY qa/images/mug.jpg images/mug.jpg
			
 
				+
			
 
				+# Install the dependencies needed to run the client examples. These
			
 
				+# are not needed for building but including them allows this image to
			
 
				+# be used to run the client examples. The special upgrade and handling
			
 
				+# of pip is needed to get numpy to install correctly with python2 on
			
 
				+# ubuntu 16.04.
			
 
				+RUN python -m pip install --user --upgrade pip && \
			
 
				+    python -m pip install --upgrade install/python/tensorrtserver-*.whl numpy pillow
			
 
				+
			
 
				+ENV PATH //workspace/install/bin:${PATH}
			
 
				+ENV LD_LIBRARY_PATH /workspace/install/lib:${LD_LIBRARY_PATH}
			
--- a/PyTorch/SpeechRecognition/Jasper/external/triton-inference-server
+++ b/PyTorch/SpeechRecognition/Jasper/external/triton-inference-server
@@ -0,0 +1 @@
 
				+Subproject commit a1f3860ba65c0fd8f2be3adfcab2673efd039348
			
--- a/PyTorch/SpeechRecognition/Jasper/inference.py
+++ b/PyTorch/SpeechRecognition/Jasper/inference.py
@@ -56,6 +56,10 @@ def parse_args():
 
				     parser.add_argument("--wav", type=str, help='absolute path to .wav file (16KHz)')
			
 
				     parser.add_argument("--cpu", action="store_true", help="Run inference on CPU")
			
 
				     parser.add_argument("--ema", action="store_true", help="If available, load EMA model weights")
			
 
				+
			
 
				+    # FIXME Unused, but passed by Triton helper scripts
			
 
				+    parser.add_argument("--pyt_fp16", action='store_true', help='use half precision')
			
 
				+
			
 
				     return parser.parse_args()
			
 
				 
			
 
				 def calc_wer(data_layer, audio_processor,
			
--- a/PyTorch/SpeechRecognition/Jasper/model.py
+++ b/PyTorch/SpeechRecognition/Jasper/model.py
@@ -100,7 +100,7 @@ class SpecAugment(nn.Module):
 
				     def forward(self, x):
			
 
				         sh = x.shape
			
 
				 
			
 
				-        mask = torch.zeros(x.shape).byte()
			
 
				+        mask = torch.zeros(x.shape, dtype=torch.bool)
			
 
				         for idx in range(sh[0]):
			
 
				             for _ in range(self.cutout_x_regions):
			
 
				                 cutout_x_left = int(random.uniform(0, sh[1] - self.cutout_x_width))
			
@@ -130,7 +130,7 @@ class SpecCutoutRegions(nn.Module):
 
				     def forward(self, x):
			
 
				         sh = x.shape
			
 
				 
			
 
				-        mask = torch.zeros(x.shape, dtype=torch.uint8)
			
 
				+        mask = torch.zeros(x.shape, dtype=torch.bool)
			
 
				 
			
 
				         for idx in range(sh[0]):
			
 
				             for i in range(self.cutout_rect_regions):
			
@@ -275,7 +275,7 @@ class MaskedConv1d(nn.Conv1d):
 
				 
			
 
				     def get_seq_len(self, lens):
			
 
				         return ((lens + 2 * self.padding[0] - self.dilation[0] * (
			
 
				-            self.kernel_size[0] - 1) - 1) / self.stride[0] + 1)
			
 
				+            self.kernel_size[0] - 1) - 1) // self.stride[0] + 1)
			
 
				 
			
 
				     def forward(self, inp):
			
 
				         if self.use_conv_mask:
			
--- a/PyTorch/SpeechRecognition/Jasper/optimizers.py
+++ b/PyTorch/SpeechRecognition/Jasper/optimizers.py
@@ -98,7 +98,7 @@ class AdamW(Optimizer):
 
				   
			
 
				                 state['step'] += 1
			
 
				                 # Decay the first and second moment running average coefficient
			
 
				-                exp_avg.mul_(beta1).add_(1 - beta1, grad)
			
 
				+                exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
			
 
				                 exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
			
 
				                 if amsgrad:
			
 
				                     # Maintains the maximum of all 2nd moment running avg. till now
			
@@ -111,7 +111,7 @@ class AdamW(Optimizer):
 
				                 bias_correction1 = 1 - beta1 ** state['step']
			
 
				                 bias_correction2 = 1 - beta2 ** state['step']
			
 
				                 step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
			
 
				-                p.data.add_(-step_size,  torch.mul(p.data, group['weight_decay']).addcdiv_(1, exp_avg, denom) )
			
 
				+                p.data.add_(torch.mul(p.data, group['weight_decay']).addcdiv_(1, exp_avg, denom), alpha=-step_size)
			
 
				   
			
 
				         return loss
			
 
				   
			
@@ -201,7 +201,7 @@ class Novograd(Optimizer):
 
				                 if exp_avg_sq == 0:
			
 
				                     exp_avg_sq.copy_(norm)
			
 
				                 else:
			
 
				-                    exp_avg_sq.mul_(beta2).add_(1 - beta2, norm)
			
 
				+                    exp_avg_sq.mul_(beta2).add_(norm, alpha=1 - beta2)
			
 
				 
			
 
				                 if amsgrad:
			
 
				                     # Maintains the maximum of all 2nd moment running avg. till now
			
@@ -213,11 +213,11 @@ class Novograd(Optimizer):
 
				 
			
 
				                 grad.div_(denom)
			
 
				                 if group['weight_decay'] != 0:
			
 
				-                    grad.add_(group['weight_decay'], p.data)
			
 
				+                    grad.add_(p.data, alpha=group['weight_decay'])
			
 
				                 if group['grad_averaging']:
			
 
				                     grad.mul_(1 - beta1)
			
 
				                 exp_avg.mul_(beta1).add_(grad)
			
 
				 
			
 
				-                p.data.add_(-group['lr'], exp_avg)
			
 
				+                p.data.add_(exp_avg, alpha=-group['lr'])
			
 
				         
			
 
				         return loss
			
--- a/PyTorch/SpeechRecognition/Jasper/scripts/train.sh
+++ b/PyTorch/SpeechRecognition/Jasper/scripts/train.sh
@@ -47,9 +47,9 @@ CMD+=" --seed=$SEED"
 
				 CMD+=" --optimizer=novograd"
			
 
				 CMD+=" --dataset_dir=$DATA_DIR"
			
 
				 CMD+=" --val_manifest=$DATA_DIR/librispeech-dev-clean-wav.json"
			
 
				-CMD+=" --train_manifest=$DATA_DIR/librispeech-train-clean-100-wav.json,"
			
 
				-CMD+="$DATA_DIR/librispeech-train-clean-360-wav.json,"
			
 
				-CMD+="$DATA_DIR/librispeech-train-other-500-wav.json"
			
 
				+CMD+=" --train_manifest=$DATA_DIR/librispeech-train-clean-100-wav.json"
			
 
				+CMD+=",$DATA_DIR/librispeech-train-clean-360-wav.json"
			
 
				+CMD+=",$DATA_DIR/librispeech-train-other-500-wav.json"
			
 
				 CMD+=" --weight_decay=1e-3"
			
 
				 CMD+=" --save_freq=$SAVE_FREQUENCY"
			
 
				 CMD+=" --eval_freq=100"
			
--- a/PyTorch/SpeechRecognition/Jasper/tensorrt/Dockerfile
+++ b/PyTorch/SpeechRecognition/Jasper/tensorrt/Dockerfile
@@ -1,8 +1,10 @@
 
				-ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:20.03-py3
			
 
				+ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:19.10-py3
			
 
				 FROM ${FROM_IMAGE_NAME}
			
 
				 
			
 
				+RUN apt-get update && apt-get install -y python3
			
 
				+
			
 
				 WORKDIR /tmp/onnx-trt
			
 
				-COPY trt/onnx-trt.patch .
			
 
				+COPY tensorrt/onnx-trt.patch .
			
 
				 RUN git clone https://github.com/onnx/onnx-tensorrt.git && cd onnx-tensorrt && git checkout 8716c9b && git submodule update --init --recursive && \
			
 
				     patch -f < ../onnx-trt.patch && mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DGPU_ARCHS="60 70 75" && make -j16 && make install && mv -f /usr/lib/libnvonnx* /usr/lib/x86_64-linux-gnu/ && ldconfig
			
 
				 
			
@@ -11,7 +13,7 @@ RUN git clone https://github.com/onnx/onnx-tensorrt.git && cd onnx-tensorrt && g
 
				 # At the same step, also install TRT pip reqs
			
 
				 WORKDIR /tmp/pipReqs
			
 
				 COPY requirements.txt /tmp/pipReqs/jocRequirements.txt
			
 
				-COPY trt/requirements.txt /tmp/pipReqs/trtRequirements.txt
			
 
				+COPY tensorrt/requirements.txt /tmp/pipReqs/trtRequirements.txt
			
 
				 RUN pip install --disable-pip-version-check -U -r jocRequirements.txt -r trtRequirements.txt
			
 
				 
			
 
				 
			
--- a/PyTorch/SpeechRecognition/Jasper/tensorrt/scripts/docker/build.sh
+++ b/PyTorch/SpeechRecognition/Jasper/tensorrt/scripts/docker/build.sh
@@ -1,5 +1,5 @@
 
				 #!/bin/bash
			
 
				 
			
 
				 # Constructs a docker image containing dependencies for execution of JASPER through TRT
			
 
				-echo "docker build . -f ./trt/Dockerfile -t jasper:trt6"
			
 
				-docker build . -f ./trt/Dockerfile -t jasper:trt6
			
 
				+echo "docker build . -f ./tensorrt/Dockerfile -t jasper:trt6"
			
 
				+docker build . -f ./tensorrt/Dockerfile -t jasper:trt6
			
--- a/PyTorch/SpeechRecognition/Jasper/tensorrt/scripts/trt_inference_benchmark.sh
+++ b/PyTorch/SpeechRecognition/Jasper/tensorrt/scripts/trt_inference_benchmark.sh
@@ -130,7 +130,7 @@ else
 
				    PYT_PREDICTION_PATH=" --pyt_prediction_path=${PYT_PREDICTION_PATH}"
			
 
				 fi
			
 
				 
			
 
				-CMD="python trt/perf.py"
			
 
				+CMD="python tensorrt/perf.py"
			
 
				 CMD+=" --batch_size $BATCH_SIZE"
			
 
				 CMD+=" --engine_batch_size $BATCH_SIZE"
			
 
				 CMD+=" --model_toml configs/jasper10x5dr_nomask.toml"
			
--- a/PyTorch/SpeechRecognition/Jasper/triton/Dockerfile
+++ b/PyTorch/SpeechRecognition/Jasper/triton/Dockerfile
@@ -1,22 +1,38 @@
 
				-ARG FROM_IMAGE_NAME=nvcr.io/nvidian/pytorch:20.03-py3
			
 
				-ARG TRITON_BASE_IMAGE=nvcr.io/nvidia/tritonserver:20.03.1-py3-clientsdk
			
 
				-FROM ${TRITON_BASE_IMAGE} as triton
			
 
				+ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:19.09-py3
			
 
				+
			
 
				+FROM tensorrtserver_client as trtis-client
			
 
				 FROM ${FROM_IMAGE_NAME}
			
 
				+RUN apt-get update && apt-get install -y python3
			
 
				+ARG version=6.0.1-1+cuda10.1
			
 
				+RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-repo-ubuntu1804_10.1.243-1_amd64.deb \
			
 
				+&& dpkg -i cuda-repo-*.deb \
			
 
				+&& wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb \
			
 
				+&& dpkg -i nvidia-machine-learning-repo-*.deb \
			
 
				+&& apt-get update \
			
 
				+&& apt-get install -y --no-install-recommends libnvinfer6=${version} libnvonnxparsers6=${version} libnvparsers6=${version} libnvinfer-plugin6=${version} libnvinfer-dev=${version} libnvonnxparsers-dev=${version} libnvparsers-dev=${version} libnvinfer-plugin-dev=${version} python-libnvinfer=${version} python3-libnvinfer=${version}
			
 
				+RUN cp -r /usr/lib/python3.6/dist-packages/tensorrt /opt/conda/lib/python3.6/site-packages/tensorrt
			
 
				 
			
 
				-ADD requirements.txt .
			
 
				-RUN pip install -r requirements.txt
			
 
				-RUN pip install onnxruntime
			
 
				 
			
 
				-ADD triton/requirements.txt .
			
 
				-RUN pip install -r requirements.txt
			
 
				+ENV PATH=$PATH:/usr/src/tensorrt/bin
			
 
				+WORKDIR /tmp/onnx-trt
			
 
				+COPY tensorrt/onnx-trt.patch .
			
 
				+RUN git clone https://github.com/onnx/onnx-tensorrt.git && cd onnx-tensorrt && git checkout  b677b9cbf19af803fa6f76d05ce558e657e4d8b6  && git submodule update --init --recursive && \
			
 
				+    patch -f < ../onnx-trt.patch && mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DGPU_ARCHS="60 70 75" && make -j16 && make install && mv -f /usr/lib/libnvonnx* /usr/lib/x86_64-linux-gnu/ && ldconfig
			
 
				 
			
 
				-ADD tensorrt/requirements.txt .
			
 
				-RUN pip install -r requirements.txt
			
 
				 
			
 
				-COPY --from=triton /opt/tritonserver/qa/pkgs/tensorrtserver-1.13.0-py3-none-linux_x86_64.whl ./tensorrtserver-1.13.0-py3-none-linux_x86_64.whl
			
 
				+# Here's a good place to install pip reqs from JoC repo.
			
 
				+# At the same step, also install TRT pip reqs
			
 
				+WORKDIR /tmp/pipReqs
			
 
				+COPY requirements.txt /tmp/pipReqs/pytRequirements.txt
			
 
				+COPY tensorrt/requirements.txt /tmp/pipReqs/trtRequirements.txt
			
 
				+COPY triton/requirements.txt /tmp/pipReqs/trtisRequirements.txt
			
 
				+RUN apt-get update && apt-get install -y --no-install-recommends portaudio19-dev && pip install -r pytRequirements.txt && pip install -r trtRequirements.txt && pip install -r trtisRequirements.txt
			
 
				 
			
 
				-RUN pip install tensorrtserver-1.13.0-py3-none-linux_x86_64.whl
			
 
				+#Copy the perf_client over
			
 
				+COPY --from=trtis-client /workspace/install/bin/perf_client /workspace/install/bin/perf_client
			
 
				+#Copy the python wheel and install with pip
			
 
				+COPY --from=trtis-client /workspace/install/python/tensorrtserver*.whl /tmp/
			
 
				+RUN pip install /tmp/tensorrtserver*.whl && rm /tmp/tensorrtserver*.whl
			
 
				 
			
 
				 WORKDIR /workspace/jasper
			
 
				 COPY . .
			
 
				-RUN pip install --no-cache-dir -e .
			
--- a/PyTorch/SpeechRecognition/Jasper/triton/scripts/docker/build.sh
+++ b/PyTorch/SpeechRecognition/Jasper/triton/scripts/docker/build.sh
@@ -4,5 +4,6 @@ SCRIPT_DIR=$(cd $(dirname $0); pwd)
 
				 PROJECT_DIR=${SCRIPT_DIR}/../../../
			
 
				 docker pull nvcr.io/nvidia/tensorrtserver:19.09-py3
			
 
				 git submodule update --init --recursive
			
 
				-docker build -t tensorrtserver_client -f ${PROJECT_DIR}/external/triton-inference-server/Dockerfile.client ${PROJECT_DIR}/external/triton-inference-server
			
 
				-docker build . --rm -f ${PROJECT_DIR}/trtis/Dockerfile -t jasper:trtis
			
 
				+docker build -t tensorrtserver_client  \
			
 
				+             -f ${PROJECT_DIR}/external/Dockerfile.client.patched ${PROJECT_DIR}/external/triton-inference-server
			
 
				+docker build . --rm -f ${PROJECT_DIR}/triton/Dockerfile -t jasper:triton
			
--- a/PyTorch/SpeechRecognition/Jasper/triton/scripts/docker/launch.sh
+++ b/PyTorch/SpeechRecognition/Jasper/triton/scripts/docker/launch.sh
@@ -29,11 +29,11 @@ fi
 
				 
			
 
				 echo $MOUNTS
			
 
				 docker run -it --rm \
			
 
				-  --runtime=nvidia \
			
 
				+  --gpus=all \
			
 
				   --shm-size=4g \
			
 
				   --ulimit memlock=-1 \
			
 
				   --ulimit stack=67108864 \
			
 
				   ${MOUNTS} \
			
 
				   -v ${JASPER_REPO}:/jasper \
			
 
				   ${EXTRA_JASPER_ENV} \
			
 
				-  jasper:trtis bash $PROGRAM_PATH
			
 
				+  jasper:triton bash $PROGRAM_PATH
			
--- a/PyTorch/SpeechRecognition/Jasper/triton/scripts/execute_all_perf_runs.sh
+++ b/PyTorch/SpeechRecognition/Jasper/triton/scripts/execute_all_perf_runs.sh
@@ -44,7 +44,7 @@ export GPU=${GPU:-}
 
				 
			
 
				 SCRIPT_DIR=$(cd $(dirname $0); pwd)
			
 
				 PROJECT_DIR=${SCRIPT_DIR}/../..
			
 
				-MODEL_REPO=${MODEL_REPO:-"${PROJECT_DIR}/trtis/model_repo"}
			
 
				+MODEL_REPO=${MODEL_REPO:-"${PROJECT_DIR}/triton/model_repo"}
			
 
				 
			
 
				 # We need to make sure TRTIS uses only one GPU, same as export does
			
 
				 # for TRTIS
			
@@ -78,7 +78,7 @@ do
 
				   
			
 
				     if [ "${REGENERATE_ENGINES}" == "yes" ]; then
			
 
				         ARCH=${ARCH} CHECKPOINT_DIR=${CHECKPOINT_DIR} CHECKPOINT=${CHECKPOINT} PRECISION=${PRECISION} MAX_SEQUENCE_LENGTH_FOR_ENGINE=${MAX_SEQUENCE_LENGTH_FOR_ENGINE} \
			
 
				-        ${PROJECT_DIR}/trtis/scripts/export_model.sh || exit 1
			
 
				+        ${PROJECT_DIR}/triton/scripts/export_model.sh || exit 1
			
 
				     fi
			
 
				   
			
 
				     for BATCH_SIZE in 1 2 4 8 16 32 64;
			
--- a/PyTorch/SpeechRecognition/Jasper/triton/scripts/export_model.sh
+++ b/PyTorch/SpeechRecognition/Jasper/triton/scripts/export_model.sh
@@ -25,12 +25,12 @@ GPU=${GPU:-0}
 
				 SCRIPT_DIR=$(cd $(dirname $0); pwd)
			
 
				 PROJECT_DIR=${SCRIPT_DIR}/../..
			
 
				 if [ -f /.dockerenv ]; then # inside docker
			
 
				-	CUDA_VISIBLE_DEVICES=${GPU} CHECKPOINT=${CHECKPOINT} CHECKPOINT_DIR=${CHECKPOINT_DIR} PRECISION=${PRECISION} ARCH=${ARCH} MAX_SEQUENCE_LENGTH_FOR_ENGINE=${MAX_SEQUENCE_LENGTH_FOR_ENGINE} ${PROJECT_DIR}/trtis/scripts/export_model_helper.sh || exit 1
			
 
				+	CUDA_VISIBLE_DEVICES=${GPU} CHECKPOINT=${CHECKPOINT} CHECKPOINT_DIR=${CHECKPOINT_DIR} PRECISION=${PRECISION} ARCH=${ARCH} MAX_SEQUENCE_LENGTH_FOR_ENGINE=${MAX_SEQUENCE_LENGTH_FOR_ENGINE} ${PROJECT_DIR}/triton/scripts/export_model_helper.sh || exit 1
			
 
				 else
			
 
				 	set -x
			
 
				-	PROGRAM_PATH="/jasper/trtis/scripts/export_model_helper.sh"  \
			
 
				+	PROGRAM_PATH="/jasper/triton/scripts/export_model_helper.sh"  \
			
 
				 	EXTRA_JASPER_ENV="-e PRECISION=${PRECISION} -e CHECKPOINT=${CHECKPOINT} -e CHECKPOINT_DIR=/checkpoints -e ARCH=${ARCH} -e MAX_SEQUENCE_LENGTH_FOR_ENGINE=${MAX_SEQUENCE_LENGTH_FOR_ENGINE} -e CUDA_VISIBLE_DEVICES=${GPU}" \
			
 
				 	CHECKPOINT_DIR=${CHECKPOINT_DIR} DATA_DIR= RESULT_DIR= \
			
 
				-	${PROJECT_DIR}/trtis/scripts/docker/launch.sh || exit 1
			
 
				+	${PROJECT_DIR}/triton/scripts/docker/launch.sh || exit 1
			
 
				 	set +x
			
 
				 fi
			
--- a/PyTorch/SpeechRecognition/Jasper/triton/scripts/export_model_helper.sh
+++ b/PyTorch/SpeechRecognition/Jasper/triton/scripts/export_model_helper.sh
@@ -66,13 +66,13 @@ echo "export_model.sh: Exporting TRT engine, CUDA ARCH = ${ARCH} ... "
 
				 PREC_FLAGS=""
			
 
				 if [ "$PRECISION" == "fp16" ]
			
 
				 then
			
 
				-	PREC_FLAGS="--trt_fp16"
			
 
				+ 	PREC_FLAGS="--trt_fp16"
			
 
				 fi
			
 
				 
			
 
				 # remove targtes first
			
 
				 rm -f ${MODEL_REPO}/jasper-trt/1/jasper_${ARCH}.plan ${MODEL_REPO}/jasper-onnx/1/jasper.onnx
			
 
				 
			
 
				-python  ${JASPER_REPO}/trt/perf.py \
			
 
				+python  ${JASPER_REPO}/tensorrt/perf.py \
			
 
				 	--ckpt_path ${CHECKPOINT_DIR}/${CHECKPOINT} \
			
 
				 	--wav=${JASPER_REPO}/notebooks/example1.wav \
			
 
				 	--model_toml=${JASPER_REPO}/configs/${MODEL_CONFIG} \
			
@@ -85,7 +85,7 @@ if [ "$PRECISION" == "fp16" ]
 
				 then
			
 
				 	PREC_FLAGS="--trt_fp16 --pyt_fp16"
			
 
				 fi
			
 
				-python  ${JASPER_REPO}/trt/perf.py \
			
 
				+python  ${JASPER_REPO}/tensorrt/perf.py \
			
 
				 	--ckpt_path ${CHECKPOINT_DIR}/${CHECKPOINT} \
			
 
				 	--wav=${JASPER_REPO}/notebooks/example1.wav \
			
 
				 	--model_toml=${JASPER_REPO}/configs/${MODEL_CONFIG} \
			
--- a/PyTorch/SpeechRecognition/Jasper/triton/scripts/run_client.sh
+++ b/PyTorch/SpeechRecognition/Jasper/triton/scripts/run_client.sh
@@ -26,13 +26,13 @@ FILE=${3} # json manifest file, OR single wav file
 
				 JASPER_CONTAINER_TAG=${JASPER_CONTAINER_TAG:-jasper:trtis}
			
 
				 
			
 
				 if [ "$#" -ge 1 ] && [ "${FILE: -4}" == ".wav" ]; then 
			
 
				-  CMD="python /jasper/trtis/jasper-client.py --data_dir /data --audio_filename ${FILE} --model_platform ${MODEL_TYPE}"
			
 
				+  CMD="python /jasper/triton/jasper-client.py --data_dir /data --audio_filename ${FILE} --model_platform ${MODEL_TYPE}"
			
 
				   ARGS=""
			
 
				   ARGS="$ARGS -v $DATA_DIR:/data"
			
 
				 elif [ "$#" -ge 1 ] && [ "${FILE: -4}" == "json" ]; then
			
 
				   ARGS=""
			
 
				   ARGS="$ARGS -v $DATA_DIR:/data"
			
 
				-  CMD="python /jasper/trtis/jasper-client.py --manifest_filename ${FILE} --model_platform ${MODEL_TYPE} --data_dir /data"
			
 
				+  CMD="python /jasper/triton/jasper-client.py --manifest_filename ${FILE} --model_platform ${MODEL_TYPE} --data_dir /data"
			
 
				 else
			
 
				   ARGS="-it"
			
 
				   CMD=""
			
@@ -49,4 +49,4 @@ nvidia-docker run --rm -it \
 
				    -v ${PROJECT_DIR}:/jasper \
			
 
				    --name=trtis-client \
			
 
				    ${ARGS} ${JASPER_CONTAINER_TAG} ${CMD}
			
 
				-set +x
			
 
				+set +x
			
--- a/PyTorch/SpeechRecognition/Jasper/triton/scripts/run_perf_client.sh
+++ b/PyTorch/SpeechRecognition/Jasper/triton/scripts/run_perf_client.sh
@@ -67,13 +67,13 @@ ARGS="\
 
				 curl -s "http://${SERVER_HOSTNAME}:8000/api/status/${MODEL_NAME}" | grep ready_state | grep SERVER_READY || (echo "Model ${MODEL_NAME} is not ready, perf_client skipped..." && exit 1)
			
 
				 
			
 
				 echo "=== STARTING: perf client ${ARGS} --concurrency-range 1:4:1 ==="
			
 
				-docker run  -e DISPLAY=${DISPLAY}  --runtime nvidia --rm \
			
 
				+docker run  -e DISPLAY=${DISPLAY}  --gpus all --rm \
			
 
				 	      --privileged --net=host \
			
 
				 	      -v ${RESULT_DIR_H}:/results --name jasper-perf-client \
			
 
				 	      ${TRTIS_CLIENT_CONTAINER_TAG}  perf_client $ARGS -f /results/${OUTPUT_FILE_CSV}_p1 --concurrency-range 1:4:1 2>&1 | tee -a $LOGNAME
			
 
				 
			
 
				 echo "=== STARTING: perf client ${ARGS} --concurrency-range 8:${MAX_CONCURRENCY}:8 ==="
			
 
				-docker run  -e DISPLAY=${DISPLAY}  --runtime nvidia --rm \
			
 
				+docker run  -e DISPLAY=${DISPLAY}  --gpus all --rm \
			
 
				 	      --privileged --net=host \
			
 
				 	      -v ${RESULT_DIR_H}:/results --name jasper-perf-client \
			
 
				 	      ${TRTIS_CLIENT_CONTAINER_TAG}  perf_client $ARGS -f /results/${OUTPUT_FILE_CSV}_p2 --concurrency-range 8:${MAX_CONCURRENCY}:8 2>&1 | tee -a $LOGNAME
			
--- a/PyTorch/SpeechRecognition/Jasper/triton/scripts/run_server.sh
+++ b/PyTorch/SpeechRecognition/Jasper/triton/scripts/run_server.sh
@@ -48,7 +48,7 @@ RM=${RM:-"--rm"}
 
				 
			
 
				 set -x
			
 
				 docker run -p 8000:8000 -p 8001:8001 -p 8002:8002 \
			
 
				-       --runtime nvidia \
			
 
				+       --gpus all \
			
 
				        -e NVIDIA_VISIBLE_DEVICES=${NV_VISIBLE_DEVICES} \
			
 
				        -v ${MODELS_DIR}:/models \
			
 
				        -v ${TRTIS_DIR}/model_repo:/model_repo \
			
--- a/TensorFlow/Translation/GNMT/qa/L1_joc_GNMT_inferbench_fp16.sh
+++ b/TensorFlow/Translation/GNMT/qa/L1_joc_GNMT_inferbench_fp16.sh
@@ -0,0 +1,43 @@
 
				+set -o nounset
			
 
				+set -o errexit
			
 
				+set -o pipefail
			
 
				+
			
 
				+cd ..
			
 
				+cp -r /data/joc/gnmt_tf/19.08 output_dir
			
 
				+
			
 
				+# hack to work with pytorch dataset
			
 
				+sed -ie 's/    src_vocab_file = hparams.vocab_prefix + "." + hparams.src/    src_vocab_file = hparams.vocab_prefix/g' nmt.py
			
 
				+sed -ie 's/    tgt_vocab_file = hparams.vocab_prefix + "." + hparams.tgt/    tgt_vocab_file = hparams.vocab_prefix/g' nmt.py
			
 
				+
			
 
				+( python nmt.py --amp --data_dir=/data/pytorch/wmt16_de_en --output_dir=output_dir --mode=infer --infer_batch_size=512 2>&1 ) | tee log.log
			
 
				+python scripts/parse_log.py log.log | tee log.json
			
 
				+
			
 
				+python << END
			
 
				+import json
			
 
				+import numpy as np
			
 
				+from pathlib import Path
			
 
				+
			
 
				+baseline = 10254
			
 
				+bleu_baseline = 25.1
			
 
				+
			
 
				+log = json.loads(Path('log.json').read_text())
			
 
				+speed = np.mean(log['eval_tokens_per_sec'])
			
 
				+bleu = log['bleu'][0]
			
 
				+
			
 
				+print('Eval speed    :', speed)
			
 
				+print('Baseline      :', baseline)
			
 
				+
			
 
				+print('Bleu          :', bleu)
			
 
				+print('Bleu baseline :', bleu_baseline)
			
 
				+
			
 
				+if speed < baseline * 0.9:
			
 
				+    print("FAILED: speed ({}) doesn't match the baseline ({})".format(speed, baseline))
			
 
				+    exit(1)
			
 
				+
			
 
				+if bleu < bleu_baseline - 0.2:
			
 
				+    print("FAILED: bleu ({}) doesn't match the baseline ({})".format(bleu, bleu_baseline))
			
 
				+    exit(1)
			
 
				+
			
 
				+print('SUCCESS')
			
 
				+END
			
 
				+
			
--- a/TensorFlow/Translation/GNMT/qa/L1_joc_GNMT_inferbench_fp32.sh
+++ b/TensorFlow/Translation/GNMT/qa/L1_joc_GNMT_inferbench_fp32.sh
@@ -0,0 +1,43 @@
 
				+set -o nounset
			
 
				+set -o errexit
			
 
				+set -o pipefail
			
 
				+
			
 
				+cd .. 
			
 
				+cp -r /data/joc/gnmt_tf/19.08 output_dir
			
 
				+
			
 
				+# hack to work with pytorch dataset
			
 
				+sed -ie 's/    src_vocab_file = hparams.vocab_prefix + "." + hparams.src/    src_vocab_file = hparams.vocab_prefix/g' nmt.py
			
 
				+sed -ie 's/    tgt_vocab_file = hparams.vocab_prefix + "." + hparams.tgt/    tgt_vocab_file = hparams.vocab_prefix/g' nmt.py
			
 
				+
			
 
				+( python nmt.py --data_dir=/data/pytorch/wmt16_de_en --output_dir=output_dir --mode=infer --infer_batch_size=512 2>&1 ) | tee log.log
			
 
				+python scripts/parse_log.py log.log | tee log.json
			
 
				+
			
 
				+python << END
			
 
				+import json
			
 
				+import numpy as np
			
 
				+from pathlib import Path
			
 
				+
			
 
				+baseline = 5374
			
 
				+bleu_baseline = 25.1
			
 
				+
			
 
				+log = json.loads(Path('log.json').read_text())
			
 
				+speed = np.mean(log['eval_tokens_per_sec'])
			
 
				+bleu = log['bleu'][0]
			
 
				+
			
 
				+print('Eval speed    :', speed)
			
 
				+print('Baseline      :', baseline)
			
 
				+
			
 
				+print('Bleu          :', bleu)
			
 
				+print('Bleu baseline :', bleu_baseline)
			
 
				+
			
 
				+if speed < baseline * 0.9:
			
 
				+    print("FAILED: speed ({}) doesn't match the baseline ({})".format(speed, baseline))
			
 
				+    exit(1)
			
 
				+
			
 
				+if bleu < bleu_baseline - 0.2:
			
 
				+    print("FAILED: bleu ({}) doesn't match the baseline ({})".format(bleu, bleu_baseline))
			
 
				+    exit(1)
			
 
				+
			
 
				+print('SUCCESS')
			
 
				+END
			
 
				+
		`@@ -0,0 +1 @@`
		`+Subproject commit a1f3860ba65c0fd8f2be3adfcab2673efd039348`