%!s(int64=5) %!d(string=hai) anos · 550123fbbc
--- a/PyTorch/SpeechSynthesis/Tacotron2/Dockerfile_triton_client
+++ b/PyTorch/SpeechSynthesis/Tacotron2/Dockerfile_triton_client
@@ -11,14 +11,14 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				-FROM nvcr.io/nvidia/tritonserver:20.03-py3-clientsdk AS trt
			
 
				+FROM nvcr.io/nvidia/tritonserver:20.06-py3-clientsdk AS triton
			
 
				 FROM continuumio/miniconda3
			
 
				 RUN apt-get update && apt-get install -y pbzip2 pv bzip2 cabextract mc iputils-ping wget
			
 
				 
			
 
				 WORKDIR /workspace/speech_ai_demo_TTS/
			
 
				 
			
 
				 # Copy the perf_client over
			
 
				-COPY --from=trt /workspace/install/ /workspace/install/
			
 
				+COPY --from=triton /workspace/install/ /workspace/install/
			
 
				 ENV LD_LIBRARY_PATH /workspace/install/lib:${LD_LIBRARY_PATH}
			
 
				 
			
 
				 # set up env variables
			
@@ -27,14 +27,14 @@ RUN cd /workspace/speech_ai_demo_TTS/
 
				 
			
 
				 # jupyter lab extensions
			
 
				 RUN conda install -c conda-forge jupyterlab ipywidgets nodejs python-sounddevice librosa unidecode inflect
			
 
				-RUN pip install /workspace/install/python/tensorrtserver*.whl
			
 
				+RUN pip install /workspace/install/python/triton*.whl
			
 
				 
			
 
				 # Copy the python wheel and install with pip
			
 
				-COPY --from=trt /workspace/install/python/tensorrtserver*.whl /tmp/
			
 
				-RUN pip install /tmp/tensorrtserver*.whl && rm /tmp/tensorrtserver*.whl
			
 
				+COPY --from=triton /workspace/install/python/triton*.whl /tmp/
			
 
				+RUN pip install /tmp/triton*.whl && rm /tmp/triton*.whl
			
 
				 
			
 
				 RUN cd /workspace/speech_ai_demo_TTS/
			
 
				-COPY ./notebooks/trtis/ .
			
 
				+COPY ./notebooks/triton/ .
			
 
				 RUN mkdir /workspace/speech_ai_demo_TTS/tacotron2/
			
 
				 COPY ./tacotron2/text /workspace/speech_ai_demo_TTS/tacotron2/text
			
 
				 RUN chmod a+x /workspace/speech_ai_demo_TTS/run_this.sh
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/common/utils.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/common/utils.py
@@ -30,6 +30,22 @@ from scipy.io.wavfile import read
 
				 import torch
			
 
				 import os
			
 
				 
			
 
				+import argparse
			
 
				+import json
			
 
				+
			
 
				+class ParseFromConfigFile(argparse.Action):
			
 
				+
			
 
				+    def __init__(self, option_strings, type, dest, help=None, required=False):
			
 
				+        super(ParseFromConfigFile, self).__init__(option_strings=option_strings, type=type, dest=dest, help=help, required=required)
			
 
				+
			
 
				+    def __call__(self, parser, namespace, values, option_string):
			
 
				+        with open(values, 'r') as f:
			
 
				+            data = json.load(f)
			
 
				+
			
 
				+        for group in data.keys():
			
 
				+            for k,v in data[group].items():
			
 
				+                underscore_k = k.replace('-', '_')
			
 
				+                setattr(namespace, underscore_k, v)
			
 
				 
			
 
				 def get_mask_from_lengths(lengths):
			
 
				     max_len = torch.max(lengths).item()
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/config.json
+++ b/PyTorch/SpeechSynthesis/Tacotron2/config.json
@@ -0,0 +1,11 @@
 
				+{
			
 
				+    "audio": {
			
 
				+	"max-wav-value": 32768.0,
			
 
				+	"sampling-rate": 22050,
			
 
				+	"filter-length": 1024,
			
 
				+	"hop-length": 256,
			
 
				+	"win-length": 1024,
			
 
				+	"mel-fmin": 0.0,
			
 
				+	"mel-fmax": 7000.0
			
 
				+    }
			
 
				+}
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/data_functions.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/data_functions.py
@@ -33,7 +33,7 @@ from tacotron2.data_function import batch_to_gpu as batch_to_gpu_tacotron2
 
				 from waveglow.data_function import batch_to_gpu as batch_to_gpu_waveglow
			
 
				 
			
 
				 
			
 
				-def get_collate_function(model_name, n_frames_per_step):
			
 
				+def get_collate_function(model_name, n_frames_per_step=1):
			
 
				     if model_name == 'Tacotron2':
			
 
				         collate_fn = TextMelCollate(n_frames_per_step)
			
 
				     elif model_name == 'WaveGlow':
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/inference.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/inference.py
@@ -29,6 +29,7 @@ from tacotron2.text import text_to_sequence
 
				 import models
			
 
				 import torch
			
 
				 import argparse
			
 
				+import os
			
 
				 import numpy as np
			
 
				 from scipy.io.wavfile import write
			
 
				 import matplotlib
			
@@ -106,8 +107,9 @@ def unwrap_distributed(state_dict):
 
				 
			
 
				 
			
 
				 def load_and_setup_model(model_name, parser, checkpoint, fp16_run, cpu_run, forward_is_infer=False):
			
 
				-    model_parser = models.parse_model_args(model_name, parser, add_help=False)
			
 
				+    model_parser = models.model_parser(model_name, parser, add_help=False)
			
 
				     model_args, _ = model_parser.parse_known_args()
			
 
				+
			
 
				     model_config = models.get_model_config(model_name, model_args)
			
 
				     model = models.get_model(model_name, model_config, cpu_run=cpu_run,
			
 
				                              forward_is_infer=forward_is_infer)
			
@@ -195,8 +197,8 @@ def main():
 
				     parser = parse_args(parser)
			
 
				     args, _ = parser.parse_known_args()
			
 
				 
			
 
				-    DLLogger.init(backends=[JSONStreamBackend(Verbosity.DEFAULT,
			
 
				-                                              args.output+'/'+args.log_file),
			
 
				+    log_file = os.path.join(args.output, args.log_file)
			
 
				+    DLLogger.init(backends=[JSONStreamBackend(Verbosity.DEFAULT, log_file),
			
 
				                             StdOutBackend(Verbosity.VERBOSE)])
			
 
				     for k,v in vars(args).items():
			
 
				         DLLogger.log(step="PARAMETER", data={k:v})
			
@@ -245,8 +247,7 @@ def main():
 
				         audios = denoiser(audios, strength=args.denoising_strength).squeeze(1)
			
 
				 
			
 
				     print("Stopping after",mel.size(2),"decoder steps")
			
 
				-
			
 
				-    tacotron2_infer_perf = mel.size(0)*mel.size(2)/measurements['tacotron2_time']   
			
 
				+    tacotron2_infer_perf = mel.size(0)*mel.size(2)/measurements['tacotron2_time']
			
 
				     waveglow_infer_perf = audios.size(0)*audios.size(1)/measurements['waveglow_time']
			
 
				 
			
 
				     DLLogger.log(step=0, data={"tacotron2_items_per_sec": tacotron2_infer_perf})
			
@@ -259,12 +260,12 @@ def main():
 
				     for i, audio in enumerate(audios):
			
 
				 
			
 
				         plt.imshow(alignments[i].float().data.cpu().numpy().T, aspect="auto", origin="lower")
			
 
				-        figure_path = args.output+"alignment_"+str(i)+"_"+args.suffix+".png"
			
 
				+        figure_path = os.path.join(args.output,"alignment_"+str(i)+args.suffix+".png")
			
 
				         plt.savefig(figure_path)
			
 
				 
			
 
				         audio = audio[:mel_lengths[i]*args.stft_hop_length]
			
 
				         audio = audio/torch.max(torch.abs(audio))
			
 
				-        audio_path = args.output+"audio_"+str(i)+"_"+args.suffix+".wav"
			
 
				+        audio_path = os.path.join(args.output,"audio_"+str(i)+args.suffix+".wav")
			
 
				         write(audio_path, args.sampling_rate, audio.cpu().numpy())
			
 
				 
			
 
				     DLLogger.flush()
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/inference_perf.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/inference_perf.py
@@ -78,7 +78,7 @@ def main():
 
				     DLLogger.log(step="PARAMETER", data={'model_name':'Tacotron2_PyT'})
			
 
				 
			
 
				     model = load_and_setup_model(args.model_name, parser, None, args.amp_run,
			
 
				-                                 forward_is_infer=True)
			
 
				+                                 cpu_run=False, forward_is_infer=True)
			
 
				 
			
 
				     if args.model_name == "Tacotron2":
			
 
				         model = torch.jit.script(model)
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/models.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/models.py
@@ -34,13 +34,13 @@ from waveglow.model import WaveGlow
 
				 import torch
			
 
				 
			
 
				 
			
 
				-def parse_model_args(model_name, parser, add_help=False):
			
 
				+def model_parser(model_name, parser, add_help=False):
			
 
				     if model_name == 'Tacotron2':
			
 
				-        from tacotron2.arg_parser import parse_tacotron2_args
			
 
				-        return parse_tacotron2_args(parser, add_help)
			
 
				+        from tacotron2.arg_parser import tacotron2_parser
			
 
				+        return tacotron2_parser(parser, add_help)
			
 
				     if model_name == 'WaveGlow':
			
 
				-        from waveglow.arg_parser import parse_waveglow_args
			
 
				-        return parse_waveglow_args(parser, add_help)
			
 
				+        from waveglow.arg_parser import waveglow_parser
			
 
				+        return waveglow_parser(parser, add_help)
			
 
				     else:
			
 
				         raise NotImplementedError(model_name)
			
 
				 
			
@@ -88,7 +88,7 @@ def get_model(model_name, model_config, cpu_run,
 
				     if uniform_initialize_bn_weight:
			
 
				         init_bn(model)
			
 
				 
			
 
				-    if cpu_run==False:
			
 
				+    if not cpu_run:
			
 
				         model = model.cuda()
			
 
				     return model
			
 
				 
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/README.md
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/README.md
@@ -1,4 +1,3 @@
 
				-This Readme accompanies the GTC 2020 talk: "PyTorch from Research to Production" available [here](https://developer.nvidia.com/gtc/2020/video/s21928).
			
 
				 
			
 
				 ## Model Preparation
			
 
				 
			
@@ -32,15 +31,15 @@ wget https://api.ngc.nvidia.com/v2/models/nvidia/bert_large_pyt_amp_ckpt_squad_q
 
				 ```
			
 
				 
			
 
				 
			
 
				-* [Tacotron 2](https://ngc.nvidia.com/models/nvidia:tacotron2pyt_fp16/files?version=2)
			
 
				+* [Tacotron 2](https://ngc.nvidia.com/catalog/models/nvidia:tacotron2_pyt_ckpt_amp/files?version=19.12.0)
			
 
				 ```bash
			
 
				-wget https://api.ngc.nvidia.com/v2/models/nvidia/tacotron2pyt_fp16/versions/2/files/nvidia_tacotron2pyt_fp16_20190427
			
 
				+wget https://api.ngc.nvidia.com/v2/models/nvidia/tacotron2_pyt_ckpt_amp/versions/19.12.0/files/nvidia_tacotron2pyt_fp16.pt
			
 
				 ```
			
 
				 
			
 
				 
			
 
				-* [WaveGlow](https://ngc.nvidia.com/models/nvidia:waveglow256pyt_fp16/files)
			
 
				+* [WaveGlow](https://ngc.nvidia.com/catalog/models/nvidia:waveglow_ckpt_amp_256/files?version=20.01.0)
			
 
				 ```bash
			
 
				-wget https://api.ngc.nvidia.com/v2/models/nvidia/waveglow256pyt_fp16/versions/1/files/nvidia_waveglow256pyt_fp16
			
 
				+wget https://api.ngc.nvidia.com/v2/models/nvidia/waveglow_ckpt_amp_256/versions/20.01.0/files/nvidia_waveglow256pyt_fp16.pt
			
 
				 ```
			
 
				 
			
 
				 
			
@@ -48,14 +47,13 @@ Move the downloaded checkpoints to `models` directory:
 
				 
			
 
				 ```bash
			
 
				 cd DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai
			
 
				-bert_large_qa.pt nvidia_tacotron2pyt_fp16_20190427 nvidia_waveglow256pyt_fp16 models/
			
 
				 ```
			
 
				 
			
 
				 ### Prepare Jasper
			
 
				 
			
 
				 First, let's generate a TensorRT engine for Jasper using TensorRT version 7.
			
 
				 
			
 
				-Download the Jasper checkpoint from [NGC](https://ngc.nvidia.com/catalog/models/nvidia:jasperpyt_fp16/files) 
			
 
				+Download the Jasper checkpoint from [NGC](https://ngc.nvidia.com/catalog/models/nvidia:jasperpyt_fp16/files)
			
 
				 and move it to `Jasper/checkpoints/` direcotry:
			
 
				 
			
 
				 ```bash
			
@@ -65,8 +63,8 @@ mv jasper_fp16.pt DeepLearningExamples/PyTorch/SpeechRecognition/Jasper/checkpoi
 
				 
			
 
				 Apply a patch to enable support of TensorRT 7:
			
 
				 
			
 
				-```bash 
			
 
				-cd DeepLearningExamples/ 
			
 
				+```bash
			
 
				+cd DeepLearningExamples/
			
 
				 git apply --ignore-space-change --reject --whitespace=fix ../patch_jasper_trt7
			
 
				 ```
			
 
				 
			
@@ -74,7 +72,7 @@ Now, build a container for Jasper:
 
				 
			
 
				 ```bash
			
 
				 cd DeepLearningExamples/PyTorch/SpeechRecognition/Jasper/
			
 
				-bash trt/scripts/docker/build.sh
			
 
				+bash tensorrt/scripts/docker/build.sh
			
 
				 ```
			
 
				 
			
 
				 To run the container, type:
			
@@ -87,15 +85,16 @@ export CHECKPOINT_DIR=$JASPER_DIR/checkpoints/
 
				 export RESULT_DIR=$JASPER_DIR/results/
			
 
				 cd $JASPER_DIR
			
 
				 mkdir -p $DATA_DIR $CHECKPOINT_DIR $RESULT_DIR
			
 
				-bash trt/scripts/docker/launch.sh $DATA_DIR $CHECKPOINT_DIR $RESULT_DIR
			
 
				+bash tensorrt/scripts/docker/launch.sh $DATA_DIR $CHECKPOINT_DIR $RESULT_DIR
			
 
				 ```
			
 
				 
			
 
				 Inside the container export Jasper TensorRT engine by executing:
			
 
				 
			
 
				 ```bash
			
 
				+pip install --upgrade onnx
			
 
				 mkdir -p /results/onnxs/ /results/engines/
			
 
				 cd /jasper
			
 
				-python trt/perf.py --batch_size 1 --engine_batch_size 1 --model_toml configs/jasper10x5dr_nomask.toml --ckpt_path /checkpoints/jasper_fp16.pt --trt_fp16 --pyt_fp16 --engine_path /results/engines/fp16_DYNAMIC.engine --onnx_path /results/onnxs/fp32_DYNAMIC.onnx --seq_len 3600 --make_onnx
			
 
				+python tensorrt/perf.py --batch_size 1 --engine_batch_size 1 --model_toml configs/jasper10x5dr_nomask.toml --ckpt_path /checkpoints/jasper_fp16.pt --trt_fp16 --pyt_fp16 --engine_path /results/engines/jasper_fp16.engine --onnx_path /results/onnxs/fp32_DYNAMIC.onnx --seq_len 3600 --make_onnx
			
 
				 ```
			
 
				 
			
 
				 After successful export, copy the engine to model_repo:
			
@@ -103,7 +102,7 @@ After successful export, copy the engine to model_repo:
 
				 ```bash
			
 
				 cd DeepLearningExamples/Pytorch
			
 
				 mkdir -p SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/jasper-trt/1
			
 
				-cp SpeechRecognition/Jasper/results/engines/fp16_DYNAMIC.engine SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/jasper-trt/1/jasper_fp16.engine
			
 
				+cp SpeechRecognition/Jasper/results/engines/jasper_fp16.engine SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/jasper-trt/1/
			
 
				 ```
			
 
				 
			
 
				 You will also need Jasper feature extractor and decoder. Download them from [NGC](https://ngc.nvidia.com/catalog/models/nvidia:jasperpyt_jit_fp16/files) and move to the model_repo:
			
@@ -121,12 +120,12 @@ wget -P jasper-feature-extractor/1/ https://api.ngc.nvidia.com/v2/models/nvidia/
 
				 
			
 
				 With the generated Jasper model, we can proceed to BERT.
			
 
				 
			
 
				-Download the BERT checkpoint from [NGC](https://ngc.nvidia.com/catalog/models/nvidia:bert_large_pyt_amp_ckpt_squad_qa1_1/files) 
			
 
				+Download the BERT checkpoint from [NGC](https://ngc.nvidia.com/catalog/models/nvidia:bert_large_pyt_amp_ckpt_squad_qa1_1/files)
			
 
				 and move it to `BERT/checkpoints/` direcotry:
			
 
				 
			
 
				 ```bash
			
 
				 mkdir -p DeepLearningExamples/PyTorch/LanguageModeling/BERT/checkpoints/
			
 
				-mv bert_large_qa.pt DeepLearningExamples/PyTorch/LanguageModeling/BERT/checkpoints/
			
 
				+mv bert_large_qa.pt DeepLearningExamples/PyTorch/LanguageModeling/BERT/checkpoints/bert_qa.pt
			
 
				 ```
			
 
				 
			
 
				 Now, build a container for BERT:
			
@@ -146,14 +145,14 @@ The model will be saved in `results/triton_models/bertQA-onnx`, together with Tr
 
				 
			
 
				 ```bash
			
 
				 cd DeepLearningExamples
			
 
				-cp -r PyTorch/LanguageModeling/BERT/results/triton_models/bertQA-onnx DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/
			
 
				+cp -r PyTorch/LanguageModeling/BERT/results/triton_models/bertQA-ts-script DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/
			
 
				 ```
			
 
				 
			
 
				 ### Prepare Tacotron 2 and WaveGlow
			
 
				 
			
 
				 Now to the final part - TTS system.
			
 
				 
			
 
				-Download the [Tacotron 2](https://ngc.nvidia.com/models/nvidia:tacotron2pyt_fp16/files?version=2) and [WaveGlow](https://ngc.nvidia.com/models/nvidia:waveglow256pyt_fp16/files) checkpoints from [NGC](https://ngc.nvidia.com/catalog/models/) 
			
 
				+Download the [Tacotron 2](https://ngc.nvidia.com/models/nvidia:tacotron2pyt_fp16/files?version=2) and [WaveGlow](https://ngc.nvidia.com/models/nvidia:waveglow256pyt_fp16/files) checkpoints from [NGC](https://ngc.nvidia.com/catalog/models/)
			
 
				 and move them to `Tacotron2/checkpoints/` direcotry:
			
 
				 
			
 
				 ```bash
			
@@ -178,29 +177,20 @@ Export Tacotron 2 to TorchScript:
 
				 ```bash
			
 
				 cd /workspace/tacotron2/
			
 
				 mkdir -p output
			
 
				-python exports/export_tacotron2_ts.py --tacotron2 checkpoints/nvidia_tacotron2pyt_fp16_20190427 -o output/model.pt --amp
			
 
				-```
			
 
				-
			
 
				-To export WaveGlow to TensorRT 7, install ONNX-TRT
			
 
				-
			
 
				-```bash
			
 
				-cd /workspace && git clone https://github.com/onnx/onnx-tensorrt.git
			
 
				-cd /workspace/onnx-tensorrt/ && git submodule update --init --recursive
			
 
				-cd /workspace/onnx-tensorrt && mkdir -p build
			
 
				-cd /workspace/onnx-tensorrt/build && cmake .. -DCMAKE_CXX_FLAGS=-isystem\\ /usr/local/cuda/include && make -j12 && make install
			
 
				-cd /workspace/tacotron2
			
 
				+python notebooks/conversationalai/export_tacotron2_ts.py --tacotron2 notebooks/conversationalai/nvidia_tacotron2pyt_fp16.pt -o output/tacotron2_fp16.pt --fp16
			
 
				 ```
			
 
				 
			
 
				 Export WaveGlow to ONNX intermediate representation:
			
 
				 
			
 
				 ```bash
			
 
				-python exports/export_waveglow_onnx.py --waveglow checkpoints/nvidia_waveglow256pyt_fp16 --wn-channels 256 --fp16 -o output/
			
 
				+python tensorrt/convert_waveglow2onnx.py --waveglow notebooks/conversationalai/nvidia_waveglow256pyt_fp16.pt --wn-channels 256 --fp16 -o output/ --config-file config.json
			
 
				 ```
			
 
				 
			
 
				 Use the exported ONNX IR to generate TensorRT engine:
			
 
				 
			
 
				 ```bash
			
 
				-python trt/export_onnx2trt.py --waveglow output/waveglow.onnx -o output/ --fp16
			
 
				+pip install pycuda
			
 
				+python tensorrt/convert_onnx2trt.py --waveglow output/waveglow.onnx -o output/ --fp16
			
 
				 ```
			
 
				 
			
 
				 After successful export, exit the container and copy the Tacotron 2 model and the WaveGlow engine to `model_repo`:
			
@@ -208,8 +198,8 @@ After successful export, exit the container and copy the Tacotron 2 model and th
 
				 ```bash
			
 
				 cd DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/
			
 
				 mkdir -p notebooks/conversationalai/model_repo/tacotron2/1/ notebooks/conversationalai/model_repo/waveglow-trt/1/
			
 
				-cp output/model.pt notebooks/conversationalai/model_repo/tacotron2/1/
			
 
				-cp output/waveglow_fp16.engine mnotebooks/conversationalai/odel_repo/waveglow-trt/1/
			
 
				+cp output/tacotron2_fp16.pt notebooks/conversationalai/model_repo/tacotron2/1/
			
 
				+cp output/waveglow_fp16.engine notebooks/conversationalai/model_repo/waveglow-trt/1/
			
 
				 ```
			
 
				 ## Deployment
			
 
				 
			
@@ -223,12 +213,13 @@ docker build -f Dockerfile --network=host -t speech_ai_client:demo .
 
				 From terminal start the Triton server:
			
 
				 
			
 
				 ```bash
			
 
				-NV_GPU=1 nvidia-docker run --ipc=host --network=host --rm -p8000:8000 -p8001:8001 \\
			
 
				--v /home/gkarch/dev/gtc2020/speechai/model_repo/:/models nvcr.io/nvidia/tensorrtserver:20.01-py3 trtserver --model-store=/models --log-verbose 1
			
 
				+cd DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai
			
 
				+NV_GPU=1 nvidia-docker run --ipc=host --network=host --rm -p8000:8000 -p8001:8001 -v ${PWD}/model_repo/:/models nvcr.io/nvidia/tritonserver:20.06-v1-py3 tritonserver --model-store=/models --log-verbose 1
			
 
				+
			
 
				 ```
			
 
				 
			
 
				 In another another terminal run the client:
			
 
				 
			
 
				 ```bash
			
 
				-docker run -it --rm --network=host --device /dev/snd:/dev/snd --device /dev/usb:/dev/usb speech_ai_client:demo bash /workspace/speech_ai_demo/start_jupyter.sh
			
 
				+docker run -it --rm --network=host --device /dev/snd:/dev/snd speech_ai_client:demo bash /workspace/speech_ai_demo/start_jupyter.sh
			
 
				 ```
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/Dockerfile
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/Dockerfile
@@ -11,7 +11,7 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				-FROM nvcr.io/nvidia/tritonserver:20.03-py3-clientsdk AS trtserver
			
 
				+FROM nvcr.io/nvidia/tritonserver:20.06-v1-py3-clientsdk AS trtserver
			
 
				 FROM continuumio/miniconda3
			
 
				 RUN apt-get update && apt-get install -y pbzip2 pv bzip2 cabextract mc iputils-ping wget
			
 
				 
			
@@ -26,7 +26,7 @@ ENV PATH="$PATH:/opt/conda/bin"
 
				 RUN cd /workspace/speech_ai_demo/
			
 
				 
			
 
				 # jupyter lab extensions
			
 
				-RUN conda install -c conda-forge jupyterlab=1.0 ipywidgets=7.5 nodejs python-sounddevice librosa unidecode inflect
			
 
				+RUN conda install -c conda-forge jupyterlab=1.0 ipywidgets=7.5 nodejs=10.13 python-sounddevice librosa unidecode inflect
			
 
				 RUN jupyter labextension install @jupyter-widgets/jupyterlab-manager
			
 
				 RUN pip install /workspace/install/python/tensorrtserver*.whl
			
 
				 
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/speech_ai_demo.ipynb
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/speech_ai_demo.ipynb
@@ -25,6 +25,7 @@
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
 
				+    "import sys\n",
			
 
				     "import os\n",
			
 
				     "import time\n",
			
 
				     "import numpy as np\n",
			
@@ -85,7 +86,7 @@
 
				     "\n",
			
 
				     "\n",
			
 
				     "# create the inference context for the models\n",
			
 
				-    "infer_ctx_bert = InferContext(args.url, args.protocol, 'bertQA-onnx', -1)\n",
			
 
				+    "infer_ctx_bert = InferContext(args.url, args.protocol, 'bertQA-ts-script', -1)\n",
			
 
				     "infer_ctx_tacotron2 = InferContext(args.url, args.protocol, 'tacotron2', -1)\n",
			
 
				     "infer_ctx_waveglow = InferContext(args.url, args.protocol, 'waveglow-trt', -1)\n",
			
 
				     "infer_jasper = SpeechClient(args.url, args.protocol, 'jasper-trt-ensemble', -1, \n",
			
@@ -211,24 +212,20 @@
 
				     "        ::mel_lengths:: original length of mel spectrogram\n",
			
 
				     "        ::returns:: waveform\n",
			
 
				     "    '''\n",
			
 
				-    "    # padding/trimming mel to dimension 620\n",
			
 
				-    "    mel = mel[:,:,None]\n",
			
 
				     "    # prepare input/output\n",
			
 
				+    "    mel = np.expand_dims(mel, axis=0)\n",
			
 
				     "    input_dict = {}\n",
			
 
				     "    input_dict['mel'] = (mel,)\n",
			
 
				     "    stride = 256\n",
			
 
				-    "    kernel_size = 1024\n",
			
 
				     "    n_group = 8\n",
			
 
				-    "    z_size = (mel.shape[1]-1)*stride + (kernel_size-1) + 1 - (kernel_size-stride)\n",
			
 
				-    "    z_size = z_size//n_group\n",
			
 
				-    "    shape = (n_group,z_size,1)\n",
			
 
				+    "    z_size = mel.shape[2]*stride//n_group\n",
			
 
				+    "    shape = (1,n_group,z_size)\n",
			
 
				     "    input_dict['z'] = np.random.normal(0.0, 1.0, shape).astype(mel.dtype)\n",
			
 
				     "    input_dict['z'] = (input_dict['z'],)\n",
			
 
				     "    output_dict = {}\n",
			
 
				     "    output_dict['audio'] = InferContext.ResultFormat.RAW\n",
			
 
				-    "    batch_size = 1\n",
			
 
				     "    # call waveglow\n",
			
 
				-    "    result = infer_ctx_waveglow.run(input_dict, output_dict, batch_size)\n",
			
 
				+    "    result = infer_ctx_waveglow.run(input_dict, output_dict)\n",
			
 
				     "    # get the results\n",
			
 
				     "    signal = result['audio'][0] # take only the first instance in the output batch\n",
			
 
				     "    # postprocessing of waveglow: trimming signal to its actual size\n",
			
@@ -319,7 +316,6 @@
 
				     "    # \n",
			
 
				     "    result = infer_ctx_bert.run(input_dict, output_dict, batch_size)\n",
			
 
				     "    # \n",
			
 
				-    "    print(\"BANGLA\")\n",
			
 
				     "    start_logits = [float(x) for x in result[\"output__0\"][0].flat]\n",
			
 
				     "    end_logits = [float(x) for x in result[\"output__1\"][0].flat]\n",
			
 
				     "    return start_logits, end_logits\n",
			
@@ -453,13 +449,6 @@
 
				     "question_text.observe(question_text_change, names='value')\n",
			
 
				     "context.observe(context_change, names='value')"
			
 
				    ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": []
			
 
				   }
			
 
				  ],
			
 
				  "metadata": {
			
@@ -478,7 +467,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.7.4"
			
 
				+   "version": "3.7.6"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/bert/preprocessing.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/bert/preprocessing.py
@@ -14,8 +14,10 @@
 
				 # limitations under the License.
			
 
				 
			
 
				 import math
			
 
				+import json
			
 
				+import numpy as np
			
 
				 import collections
			
 
				-from utils.bert.tokenization import BasicTokenizer
			
 
				+from utils.bert.tokenization import (BasicTokenizer, BertTokenizer, whitespace_tokenize)
			
 
				 
			
 
				 
			
 
				 class SquadExample(object):
			
@@ -143,6 +145,8 @@ def convert_example_to_feature(example, tokenizer, max_seq_length,
 
				                 tok_to_orig_index.append(i)
			
 
				                 all_doc_tokens.append(sub_token)
			
 
				 
			
 
				+        tok_start_position = None
			
 
				+        tok_end_position = None
			
 
				 
			
 
				         # The -3 accounts for [CLS], [SEP] and [SEP]
			
 
				         max_tokens_for_doc = max_seq_length - len(query_tokens) - 3
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/jasper/speech_utils.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/jasper/speech_utils.py
@@ -28,10 +28,15 @@
 
				 
			
 
				 import librosa
			
 
				 import soundfile as sf
			
 
				+import math
			
 
				 from os import system
			
 
				 import numpy as np
			
 
				 from tensorrtserver.api import *
			
 
				 import tensorrtserver.api.model_config_pb2 as model_config
			
 
				+import grpc
			
 
				+from tensorrtserver.api import api_pb2
			
 
				+from tensorrtserver.api import grpc_service_pb2
			
 
				+from tensorrtserver.api import grpc_service_pb2_grpc
			
 
				 
			
 
				 WINDOWS_FNS = {"hanning": np.hanning, "hamming": np.hamming, "none": None}
			
 
				 
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/export_tacotron2_ts.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/export_tacotron2_ts.py
@@ -0,0 +1,68 @@
 
				+# *****************************************************************************
			
 
				+#  Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
			
 
				+#
			
 
				+#  Redistribution and use in source and binary forms, with or without
			
 
				+#  modification, are permitted provided that the following conditions are met:
			
 
				+#      * Redistributions of source code must retain the above copyright
			
 
				+#        notice, this list of conditions and the following disclaimer.
			
 
				+#      * Redistributions in binary form must reproduce the above copyright
			
 
				+#        notice, this list of conditions and the following disclaimer in the
			
 
				+#        documentation and/or other materials provided with the distribution.
			
 
				+#      * Neither the name of the NVIDIA CORPORATION nor the
			
 
				+#        names of its contributors may be used to endorse or promote products
			
 
				+#        derived from this software without specific prior written permission.
			
 
				+#
			
 
				+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
			
 
				+#  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
			
 
				+#  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
			
 
				+#  DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
			
 
				+#  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
			
 
				+#  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
			
 
				+#  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
			
 
				+#  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
			
 
				+#  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+#
			
 
				+# *****************************************************************************
			
 
				+
			
 
				+import torch
			
 
				+import argparse
			
 
				+import sys
			
 
				+sys.path.append('./')
			
 
				+from inference import checkpoint_from_distributed, unwrap_distributed, load_and_setup_model
			
 
				+
			
 
				+def parse_args(parser):
			
 
				+    """
			
 
				+    Parse commandline arguments.
			
 
				+    """
			
 
				+    parser.add_argument('--tacotron2', type=str, required=True,
			
 
				+                        help='full path to the Tacotron2 model checkpoint file')
			
 
				+
			
 
				+    parser.add_argument('-o', '--output', type=str, default="trtis_repo/tacotron/1/model.pt",
			
 
				+                        help='filename for the Tacotron 2 TorchScript model')
			
 
				+    parser.add_argument('--fp16', action='store_true',
			
 
				+                        help='inference with mixed precision')
			
 
				+
			
 
				+    return parser
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+
			
 
				+    parser = argparse.ArgumentParser(
			
 
				+        description='PyTorch Tacotron 2 Inference')
			
 
				+    parser = parse_args(parser)
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    tacotron2 = load_and_setup_model('Tacotron2', parser, args.tacotron2,
			
 
				+                                     fp16_run=args.fp16, cpu_run=False,
			
 
				+                                     forward_is_infer=True)
			
 
				+    
			
 
				+    jitted_tacotron2 = torch.jit.script(tacotron2)
			
 
				+
			
 
				+    torch.jit.save(jitted_tacotron2, args.output)
			
 
				+    
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
 
				+
			
 
				+    
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_preparation/patch_jasper_trt7
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_preparation/patch_jasper_trt7
@@ -1,26 +1,17 @@
 
				-diff --git a/PyTorch/SpeechRecognition/Jasper/trt/Dockerfile b/PyTorch/SpeechRecognition/Jasper/trt/Dockerfile
			
 
				+diff --git a/PyTorch/SpeechRecognition/Jasper/trt/Dockerfile b/PyTorch/SpeechRecognition/Jasper/tensorrt/Dockerfile
			
 
				 index e598a67..562be83 100644
			
 
				---- a/PyTorch/SpeechRecognition/Jasper/trt/Dockerfile
			
 
				-+++ b/PyTorch/SpeechRecognition/Jasper/trt/Dockerfile
			
 
				+--- a/PyTorch/SpeechRecognition/Jasper/tensorrt/Dockerfile
			
 
				++++ b/PyTorch/SpeechRecognition/Jasper/tensorrt/Dockerfile
			
 
				 @@ -1,4 +1,4 @@
			
 
				 -ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:19.10-py3 
			
 
				-+ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:20.01-py3
			
 
				++ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:20.08-py3
			
 
				  FROM ${FROM_IMAGE_NAME}
			
 
				  
			
 
				  RUN apt-get update && apt-get install -y python3
			
 
				-@@ -6,7 +6,7 @@ RUN apt-get update && apt-get install -y python3
			
 
				- WORKDIR /tmp/onnx-trt
			
 
				- COPY trt/onnx-trt.patch .
			
 
				- RUN git clone https://github.com/onnx/onnx-tensorrt.git && cd onnx-tensorrt && git submodule update --init --recursive && \
			
 
				--    patch -f < ../onnx-trt.patch && mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DGPU_ARCHS="60 70 75" && make -j16 && make install && mv -f /usr/lib/libnvonnx* /usr/lib/x86_64-linux-gnu/ && ldconfig
			
 
				-+    mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DGPU_ARCHS="60 70 75" && make -j16 && make install && mv -f /usr/lib/libnvonnx* /usr/lib/x86_64-linux-gnu/ && ldconfig
			
 
				- 
			
 
				- 
			
 
				- # Here's a good place to install pip reqs from JoC repo.
			
 
				-diff --git a/PyTorch/SpeechRecognition/Jasper/trt/perf.py b/PyTorch/SpeechRecognition/Jasper/trt/perf.py
			
 
				+diff --git a/PyTorch/SpeechRecognition/Jasper/tensorrt/perf.py b/PyTorch/SpeechRecognition/Jasper/tensorrt/perf.py
			
 
				 index 426ee66..5917a1f 100755
			
 
				---- a/PyTorch/SpeechRecognition/Jasper/trt/perf.py
			
 
				-+++ b/PyTorch/SpeechRecognition/Jasper/trt/perf.py
			
 
				+--- a/PyTorch/SpeechRecognition/Jasper/tensorrt/perf.py
			
 
				++++ b/PyTorch/SpeechRecognition/Jasper/tensorrt/perf.py
			
 
				 @@ -64,6 +64,9 @@ def main(args):
			
 
				              print("TRANSCRIPT: ", hypotheses)
			
 
				              return
			
@@ -31,32 +22,32 @@ index 426ee66..5917a1f 100755
 
				      wer, preds, times = perfprocedures.compare_times_trt_pyt_exhaustive(engine,
			
 
				                                                                          pyt_components,
			
 
				                                                                          args)
			
 
				-diff --git a/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/build.sh b/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/build.sh
			
 
				+diff --git a/PyTorch/SpeechRecognition/Jasper/tensorrt/scripts/docker/build.sh b/PyTorch/SpeechRecognition/Jasper/tensorrt/scripts/docker/build.sh
			
 
				 index 0e44c7f..62e7446 100755
			
 
				---- a/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/build.sh
			
 
				-+++ b/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/build.sh
			
 
				+--- a/PyTorch/SpeechRecognition/Jasper/tensorrt/scripts/docker/build.sh
			
 
				++++ b/PyTorch/SpeechRecognition/Jasper/tensorrt/scripts/docker/build.sh
			
 
				 @@ -1,5 +1,5 @@
			
 
				  #!/bin/bash
			
 
				  
			
 
				  # Constructs a docker image containing dependencies for execution of JASPER through TRT
			
 
				--echo "docker build . -f ./trt/Dockerfile -t jasper:trt6"
			
 
				--docker build . -f ./trt/Dockerfile -t jasper:trt6
			
 
				-+echo "docker build . -f ./trt/Dockerfile -t jasper:trt7"
			
 
				-+docker build . -f ./trt/Dockerfile -t jasper:trt7
			
 
				-diff --git a/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/launch.sh b/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/launch.sh
			
 
				+-echo "docker build . -f ./tensorrt/Dockerfile -t jasper:trt6"
			
 
				+-docker build . -f ./tensorrt/Dockerfile -t jasper:trt6
			
 
				++echo "docker build . -f ./tensorrt/Dockerfile -t jasper:trt7"
			
 
				++docker build . -f ./tensorrt/Dockerfile -t jasper:trt7
			
 
				+diff --git a/PyTorch/SpeechRecognition/Jasper/tensorrt/scripts/docker/launch.sh b/PyTorch/SpeechRecognition/Jasper/tensorrt/scripts/docker/launch.sh
			
 
				 index 9959062..ed5e711 100755
			
 
				---- a/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/launch.sh
			
 
				-+++ b/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/launch.sh
			
 
				+--- a/PyTorch/SpeechRecognition/Jasper/tensorrt/scripts/docker/launch.sh
			
 
				++++ b/PyTorch/SpeechRecognition/Jasper/tensorrt/scripts/docker/launch.sh
			
 
				 @@ -40,4 +40,4 @@ nvidia-docker run -it --rm \
			
 
				    -v $RESULT_DIR:/results/ \
			
 
				    -v ${JASPER_REPO}:/jasper \
			
 
				    ${EXTRA_JASPER_ENV} \
			
 
				 -  jasper:trt6 bash $PROGRAM_PATH
			
 
				 +  jasper:trt7 bash $PROGRAM_PATH
			
 
				-diff --git a/PyTorch/SpeechRecognition/Jasper/trt/trtutils.py b/PyTorch/SpeechRecognition/Jasper/trt/trtutils.py
			
 
				+diff --git a/PyTorch/SpeechRecognition/Jasper/tensorrt/trtutils.py b/PyTorch/SpeechRecognition/Jasper/tensorrt/trtutils.py
			
 
				 index 92460b2..01c8b6a 100644
			
 
				---- a/PyTorch/SpeechRecognition/Jasper/trt/trtutils.py
			
 
				-+++ b/PyTorch/SpeechRecognition/Jasper/trt/trtutils.py
			
 
				+--- a/PyTorch/SpeechRecognition/Jasper/tensorrt/trtutils.py
			
 
				++++ b/PyTorch/SpeechRecognition/Jasper/tensorrt/trtutils.py
			
 
				 @@ -40,7 +40,7 @@ def build_engine_from_parser(args):
			
 
				      '''
			
 
				      TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if args.verbose else trt.Logger(trt.Logger.WARNING)
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/bertQA-ts-script/config.pbtxt
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/bertQA-ts-script/config.pbtxt
@@ -0,0 +1,44 @@
 
				+name: "bertQA-ts-script"
			
 
				+platform: "pytorch_libtorch"
			
 
				+max_batch_size: 8
			
 
				+input [
			
 
				+{
			
 
				+    name: "input__0"
			
 
				+    data_type: TYPE_INT64
			
 
				+    dims: [384]
			
 
				+},
			
 
				+{
			
 
				+    name: "input__1"
			
 
				+    data_type: TYPE_INT64
			
 
				+    dims: [384]
			
 
				+},
			
 
				+{
			
 
				+    name: "input__2"
			
 
				+    data_type: TYPE_INT64
			
 
				+    dims: [384]
			
 
				+}
			
 
				+]
			
 
				+output [
			
 
				+{
			
 
				+    name: "output__0"
			
 
				+    data_type: TYPE_FP16
			
 
				+    dims: [384]
			
 
				+}, 
			
 
				+{
			
 
				+    name: "output__1"
			
 
				+    data_type: TYPE_FP16
			
 
				+    dims: [384]
			
 
				+}
			
 
				+]
			
 
				+optimization {
			
 
				+  cuda {
			
 
				+    graphs: 0
			
 
				+  }
			
 
				+}
			
 
				+instance_group [
			
 
				+    {
			
 
				+        count: 1
			
 
				+        kind: KIND_GPU
			
 
				+        gpus: [ 0 ]
			
 
				+    }
			
 
				+]
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/jasper-trt-ensemble/config.pbtxt
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/jasper-trt-ensemble/config.pbtxt
@@ -1,10 +1,10 @@
 
				 name: "jasper-trt-ensemble"
			
 
				 platform: "ensemble"
			
 
				-max_batch_size: 1
			
 
				+max_batch_size: 1#MAX_BATCH
			
 
				 input {
			
 
				   name: "AUDIO_SIGNAL"
			
 
				   data_type: TYPE_FP32
			
 
				-  dims: -1
			
 
				+  dims: -1#AUDIO_LENGTH
			
 
				 }
			
 
				 input {
			
 
				     name: "NUM_SAMPLES"
			
@@ -58,3 +58,4 @@ ensemble_scheduling {
 
				     }
			
 
				   }
			
 
				 }
			
 
				+
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/waveglow-trt/config.pbtxt
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/waveglow-trt/config.pbtxt
@@ -2,20 +2,20 @@ name: "waveglow-trt"
 
				 platform: "tensorrt_plan"
			
 
				 default_model_filename: "waveglow_fp16.engine"
			
 
				 
			
 
				-max_batch_size: 1
			
 
				+max_batch_size: 0
			
 
				 
			
 
				 input {
			
 
				   name: "mel"
			
 
				   data_type: TYPE_FP16
			
 
				-  dims: [80, -1, 1]
			
 
				+  dims: [1, 80, -1]
			
 
				 }
			
 
				 input {
			
 
				   name: "z"
			
 
				   data_type: TYPE_FP16
			
 
				-  dims: [8, -1, 1]
			
 
				+  dims: [1, 8, -1]
			
 
				 }
			
 
				 output {
			
 
				   name: "audio"
			
 
				   data_type: TYPE_FP16
			
 
				-  dims: [-1]
			
 
				+  dims: [1, -1]
			
 
				 }
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/triton/LICENSE
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/triton/LICENSE
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/triton/README.md
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/triton/README.md
@@ -0,0 +1,146 @@
 
				+# Tacotron 2 and WaveGlow inference on Triton Inference Server
			
 
				+
			
 
				+## Setup
			
 
				+
			
 
				+### Clone the repository.
			
 
				+```bash
			
 
				+git clone https://github.com/NVIDIA/DeepLearningExamples.git
			
 
				+cd DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2
			
 
				+```
			
 
				+
			
 
				+### Obtain models to be loaded in Triton Inference Server.
			
 
				+
			
 
				+We have prepared Tacotron 2 and WaveGlow models that are ready to be loaded in
			
 
				+Triton Inference Server, so you don't need to train and export the models.
			
 
				+Please follow the instructions below to learn how to train,
			
 
				+export --- or simply download the pretrained models.
			
 
				+
			
 
				+### Obtain Tacotron 2 and WaveGlow checkpoints.
			
 
				+
			
 
				+You can either download the pretrained checkpoints or train the models yourself.
			
 
				+
			
 
				+#### (Option 1) Download pretrained checkpoints.
			
 
				+
			
 
				+If you want to use a pretrained checkpoints, download them from [NGC](https://ngc.nvidia.com/catalog/models):
			
 
				+
			
 
				+- [Tacotron2 checkpoint](https://ngc.nvidia.com/models/nvidia:tacotron2pyt_fp16)
			
 
				+- [WaveGlow checkpoint](https://ngc.nvidia.com/models/nvidia:waveglow256pyt_fp16)
			
 
				+
			
 
				+
			
 
				+#### (Option 2) Train Tacotron 2 and WaveGlow models.
			
 
				+
			
 
				+In order to train the models, follow the QuickStart section in the `Tacotron2/README.md`
			
 
				+file by executing points 1-5. You have to train WaveGlow in a different way than described there. Use
			
 
				+the following command instead of the one given in QuickStart at point 5:
			
 
				+
			
 
				+```bash
			
 
				+python -m multiproc train.py -m WaveGlow -o output/ --amp -lr 1e-4 --epochs 2001 --wn-channels 256 -bs 12 --segment-length 16000 --weight-decay 0 --grad-clip-thresh 65504.0 --cudnn-benchmark --cudnn-enabled --log-file output/nvlog.json
			
 
				+```
			
 
				+
			
 
				+This will train the WaveGlow model with a smaller number of residual connections
			
 
				+in the coupling layer networks and larger segment length. Training should take
			
 
				+about 100 hours on DGX-1 (8x V100 16G).
			
 
				+
			
 
				+### Setup Tacotron 2 TorchScript.
			
 
				+
			
 
				+There are two ways to proceed.
			
 
				+
			
 
				+#### (Option 1) Download the Tacotron 2 TorchScript model.
			
 
				+
			
 
				+Download the Tacotron 2 TorchScript model from:
			
 
				+- [Tacotron2 TorchScript](https://ngc.nvidia.com/models/nvidia:tacotron2pyt_jit_fp16)
			
 
				+
			
 
				+Next, save it to `triton_models/tacotron2-ts-script/1/` and rename as `model.pt`:
			
 
				+
			
 
				+```bash
			
 
				+wget https://api.ngc.nvidia.com/v2/models/nvidia/tacotron2pyt_jit_fp16/versions/1/files/nvidia_tacotron2pyt_jit_fp16
			
 
				+mkdir -p triton_models/tacotron2-ts-script/1/
			
 
				+mv nvidia_tacotron2pyt_jit_fp16 triton_models/tacotron2-ts-script/1/model.pt
			
 
				+```
			
 
				+
			
 
				+Copy the Triton config file for the Tacotron 2 model to the model directory:
			
 
				+
			
 
				+```bash
			
 
				+cp notebooks/triton/tacotron2_ts-script_config.pbtxt triton_models/tacotron2-ts-script/config.pbtxt
			
 
				+```
			
 
				+
			
 
				+#### (Option 2) Export the Tacotron 2 model using TorchScript.
			
 
				+
			
 
				+To export the Tacotron 2 model using TorchScript, type:
			
 
				+```bash
			
 
				+python exports/export_tacotron2.py --triton-model-name tacotron2-ts-script --export ts-script -- --checkpoint <Tacotron 2 checkpoint> --config-file config.json
			
 
				+```
			
 
				+This will create the model as file `model.pt` and save it in folder `triton_models/tacotron2-ts-script/1/`.
			
 
				+The command will also generate the Triton configuration file `config.pbtxt` for the Tacotron 2 model.
			
 
				+You can change the folder names using the flags `--triton-models-dir` (default `triton_models`), `--triton-model-name` (default `""`) and `--triton-model-version` (default `1`).
			
 
				+You can also change model file name with the flag `--export-name <filename>`.
			
 
				+
			
 
				+### Setup WaveGlow TensorRT engine.
			
 
				+
			
 
				+There are two ways to proceed.
			
 
				+
			
 
				+#### (Option 1) Download the WaveGlow TensorRT engine.
			
 
				+
			
 
				+Download the WaveGlow TensorRT engine from:
			
 
				+- [WaveGlow TensorRT engine](https://ngc.nvidia.com/models/nvidia:waveglow256pyt_trt_fp16)
			
 
				+Next, save it to `triton_models/waveglow-tensorrt/1/` and rename as `model.plan`:
			
 
				+
			
 
				+```bash
			
 
				+wget https://api.ngc.nvidia.com/v2/models/nvidia/waveglow256pyt_trt_fp16/versions/1/files/nvidia_waveglow256pyt_trt_fp16
			
 
				+mkdir -p triton_models/waveglow-tensorrt/1/
			
 
				+mv nvidia_waveglow256pyt_trt_fp16 triton_models/waveglow-tensorrt/1/model.plan
			
 
				+```
			
 
				+
			
 
				+Copy the Triton config file for the WaveGlow model to the model directory:
			
 
				+
			
 
				+```bash
			
 
				+cp notebooks/triton/waveglow_tensorrt_config.pbtxt triton_models/waveglow-tensorrt/config.pbtxt
			
 
				+```
			
 
				+
			
 
				+#### (Option 2) Export the WaveGlow model to TensorRT.
			
 
				+
			
 
				+In order to export the model into the TensorRT engine, type:
			
 
				+
			
 
				+```bash
			
 
				+python exports/export_waveglow.py --triton-model-name waveglow-tensorrt --export tensorrt --tensorrt-fp16 -- --checkpoint <waveglow_checkpoint> --config-file config.json --wn-channels 256
			
 
				+```
			
 
				+
			
 
				+This will create the model as file `model.plan` and save it in folder `triton_models/waveglow-tensorrt/1/`.
			
 
				+The command will also generate the Triton configuration file `config.pbtxt` for the WaveGlow model.
			
 
				+You can change the folder names using the flags `--triton-models-dir` (default `triton_models`), `--triton-model-name` (default `""`) and `--triton-model-version` (default `1`).
			
 
				+You can also change model file name with the flag `--export-name <filename>`.
			
 
				+
			
 
				+### Setup the Triton Inference Server.
			
 
				+
			
 
				+Download the Triton Inference Server container by typing:
			
 
				+```bash
			
 
				+docker pull nvcr.io/nvidia/tritonserver:20.06-py3
			
 
				+docker tag nvcr.io/nvidia/tritonserver:20.06-py3 tritonserver:20.06
			
 
				+```
			
 
				+
			
 
				+### Setup the Triton notebook client.
			
 
				+
			
 
				+Now go to the root directory of the Tacotron 2 repo, and type:
			
 
				+
			
 
				+```bash
			
 
				+docker build -f Dockerfile_triton_client --network=host -t speech_ai_tts_only:demo .
			
 
				+```
			
 
				+
			
 
				+### Run the Triton Inference Server.
			
 
				+
			
 
				+To run the server, type in the root directory of the Tacotron 2 repo:
			
 
				+```bash
			
 
				+NV_GPU=1 nvidia-docker run -ti --ipc=host --network=host --rm -p8000:8000 -p8001:8001 -v $PWD/triton_models/:/models tritonserver:20.06 tritonserver --model-store=/models --log-verbose 1
			
 
				+```
			
 
				+
			
 
				+The flag `NV_GPU` selects the GPU the server is going to see. If we want it to see all the available GPUs, then run the above command without this flag.
			
 
				+By default, the model repository will be in `triton_models/`.
			
 
				+
			
 
				+### Run the Triton notebook client.
			
 
				+
			
 
				+Leave the server running. In another terminal, type:
			
 
				+```bash
			
 
				+docker run -it --rm --network=host --device /dev/snd:/dev/snd speech_ai_tts_only:demo bash ./run_this.sh
			
 
				+```
			
 
				+
			
 
				+Open the URL in a browser, open `notebook.ipynb`, click play, and enjoy.
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/triton/notebook.ipynb
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/triton/notebook.ipynb
@@ -14,15 +14,14 @@
 
				     "import matplotlib.pyplot as plt\n",
			
 
				     "from matplotlib import cm as cm\n",
			
 
				     "from IPython.display import Audio, display, clear_output, Markdown, Image\n",
			
 
				-    "import librosa\n",
			
 
				-    "import librosa.display\n",
			
 
				+    "#import librosa\n",
			
 
				+    "#import librosa.display\n",
			
 
				     "import ipywidgets as widgets\n",
			
 
				     "# \n",
			
 
				     "from tacotron2.text import text_to_sequence as text_to_sequence_internal\n",
			
 
				     "from tacotron2.text.symbols import symbols\n",
			
 
				     "# \n",
			
 
				-    "from tensorrtserver.api import *\n",
			
 
				-    "\n",
			
 
				+    "import tritonhttpclient as thc\n",
			
 
				     "\n",
			
 
				     "defaults = {\n",
			
 
				     "    # settings\n",
			
@@ -30,7 +29,6 @@
 
				     "    'sampling_rate': 22050,    # don't touch this\n",
			
 
				     "    'stft_hop_length': 256,    # don't touch this\n",
			
 
				     "    'url': 'localhost:8000',   # don't touch this\n",
			
 
				-    "    'protocol': 0,             # 0: http, 1: grpc \n",
			
 
				     "    'autoplay': True,          # autoplay\n",
			
 
				     "    'character_limit_min': 4,  # don't touch this\n",
			
 
				     "    'character_limit_max': 340 # don't touch this\n",
			
@@ -42,33 +40,9 @@
 
				     "    def __init__(self, **entries):\n",
			
 
				     "        self.__dict__.update(entries)\n",
			
 
				     "\n",
			
 
				-    "\n",
			
 
				     "args = Struct(**defaults)\n",
			
 
				     "\n",
			
 
				-    "\n",
			
 
				-    "# create the inference context for the models\n",
			
 
				-    "infer_ctx_tacotron2 = InferContext(args.url, args.protocol, 'tacotron2', -1)\n",
			
 
				-    "infer_ctx_waveglow = InferContext(args.url, args.protocol, 'waveglow', -1)\n",
			
 
				-    "\n",
			
 
				-    "\n",
			
 
				-    "def display_heatmap(sequence, title='preprocessed text'):\n",
			
 
				-    "    ''' displays sequence as a heatmap '''\n",
			
 
				-    "    clear_output(wait=True)\n",
			
 
				-    "    sequence = sequence[None, :]\n",
			
 
				-    "    plt.figure(figsize=(10, 2.5))\n",
			
 
				-    "    plt.title(title)\n",
			
 
				-    "    plt.tick_params(\n",
			
 
				-    "        axis='both',\n",
			
 
				-    "        which='both',\n",
			
 
				-    "        bottom=False,\n",
			
 
				-    "        top=False,\n",
			
 
				-    "        left=False,\n",
			
 
				-    "        right=False,\n",
			
 
				-    "        labelbottom=False,\n",
			
 
				-    "        labelleft=False)\n",
			
 
				-    "    plt.imshow(sequence, cmap='BrBG_r', interpolation='nearest')\n",
			
 
				-    "    plt.show()\n",
			
 
				-    "\n",
			
 
				+    "triton_client = thc.InferenceServerClient(args.url)\n",
			
 
				     "\n",
			
 
				     "def display_sound(signal, title, color):\n",
			
 
				     "    ''' displays signal '''\n",
			
@@ -84,7 +58,13 @@
 
				     "        right=False,\n",
			
 
				     "        labelbottom=True,\n",
			
 
				     "        labelleft=False)\n",
			
 
				-    "    librosa.display.waveplot(signal, color=color)\n",
			
 
				+    "    # librosa.display.waveplot(signal, color=color)\n",
			
 
				+    "    sig = signal[0]\n",
			
 
				+    "    hop = args.stft_hop_length\n",
			
 
				+    "    smoothed = []\n",
			
 
				+    "    for i in range(0, len(sig), hop):\n",
			
 
				+    "        smoothed.append(np.average(sig[i:i+hop]))\n",
			
 
				+    "    plt.plot(smoothed, color=color)\n",
			
 
				     "    plt.show()\n",
			
 
				     "\n",
			
 
				     "\n",
			
@@ -105,7 +85,7 @@
 
				     "        labelleft=False)\n",
			
 
				     "    plt.xlabel('Time')\n",
			
 
				     "    cmap = cm.get_cmap('jet', 30)\n",
			
 
				-    "    cax = ax.imshow(mel.astype(np.float32), interpolation=\"nearest\", cmap=cmap)\n",
			
 
				+    "    cax = ax.imshow(mel[0].astype(np.float32), interpolation=\"nearest\", cmap=cmap)\n",
			
 
				     "    ax.grid(True)\n",
			
 
				     "    plt.show()\n",
			
 
				     "\n",
			
@@ -128,23 +108,24 @@
 
				     "                     mel_lengths contains the length of the unpadded mel, np.array\n",
			
 
				     "                     alignments contains attention weigths, np.array\n",
			
 
				     "    '''\n",
			
 
				-    "    input_lengths = [len(sequence)]\n",
			
 
				-    "    input_lengths = np.array(input_lengths, dtype=np.int64)\n",
			
 
				+    "    sequence = np.reshape(sequence, (1, -1))\n",
			
 
				+    "    input_lengths = np.array([[len(sequence[0])]], dtype=np.int64)\n",
			
 
				     "    # prepare input/output\n",
			
 
				-    "    input_dict = {}\n",
			
 
				-    "    input_dict['sequence__0'] = (sequence,)\n",
			
 
				-    "    input_dict['input_lengths__1'] = (input_lengths,)\n",
			
 
				-    "    output_dict = {}\n",
			
 
				-    "    output_dict['mel_outputs_postnet__0'] = InferContext.ResultFormat.RAW\n",
			
 
				-    "    output_dict['mel_lengths__1'] = InferContext.ResultFormat.RAW\n",
			
 
				-    "    output_dict['alignments__2'] = InferContext.ResultFormat.RAW\n",
			
 
				-    "    batch_size = 1\n",
			
 
				+    "    inputs = []\n",
			
 
				+    "    inputs.append(thc.InferInput('input__0', sequence.shape, 'INT64'))\n",
			
 
				+    "    inputs.append(thc.InferInput('input__1', input_lengths.shape, 'INT64'))\n",
			
 
				+    "    inputs[0].set_data_from_numpy(sequence, binary_data=True)\n",
			
 
				+    "    inputs[1].set_data_from_numpy(input_lengths, binary_data=True)\n",
			
 
				+    "    outputs = []\n",
			
 
				+    "    outputs.append(thc.InferRequestedOutput('output__0', binary_data=True))\n",
			
 
				+    "    outputs.append(thc.InferRequestedOutput('output__1', binary_data=True))\n",
			
 
				+    "    outputs.append(thc.InferRequestedOutput('output__2', binary_data=True))\n",
			
 
				     "    # call tacotron2\n",
			
 
				-    "    result = infer_ctx_tacotron2.run(input_dict, output_dict, batch_size)\n",
			
 
				+    "    result = triton_client.infer(model_name=\"tacotron2-ts-script\", inputs=inputs, outputs=outputs)\n",
			
 
				     "    # get results\n",
			
 
				-    "    mel = result['mel_outputs_postnet__0'][0] # take only the first instance in the output batch\n",
			
 
				-    "    mel_lengths = result['mel_lengths__1'][0] # take only the first instance in the output batch\n",
			
 
				-    "    alignments = result['alignments__2'][0] # take only the first instance in the output batch\n",
			
 
				+    "    mel = result.as_numpy('output__0')\n",
			
 
				+    "    mel_lengths = result.as_numpy('output__1')\n",
			
 
				+    "    alignments = result.as_numpy('output__2')\n",
			
 
				     "    return mel, mel_lengths, alignments\n",
			
 
				     "\n",
			
 
				     "\n",
			
@@ -154,27 +135,27 @@
 
				     "        ::mel_lengths:: original length of mel spectrogram\n",
			
 
				     "        ::returns:: waveform\n",
			
 
				     "    '''\n",
			
 
				-    "    mel = mel[:,:,None]\n",
			
 
				     "    # prepare input/output\n",
			
 
				-    "    input_dict = {}\n",
			
 
				-    "    input_dict['mel'] = (mel,)\n",
			
 
				+    "    mel = mel[:,:,:,None]\n",
			
 
				     "    stride = 256\n",
			
 
				-    "    kernel_size = 1024\n",
			
 
				     "    n_group = 8\n",
			
 
				-    "    z_size = (mel.shape[1]-1)*stride + (kernel_size-1) + 1 - (kernel_size-stride)\n",
			
 
				-    "    z_size = z_size//n_group\n",
			
 
				-    "    shape = (n_group,z_size,1)\n",
			
 
				-    "    input_dict['z'] = np.random.normal(0.0, 1.0, shape).astype(mel.dtype)\n",
			
 
				-    "    input_dict['z'] = (input_dict['z'],)\n",
			
 
				-    "    output_dict = {}\n",
			
 
				-    "    output_dict['audio'] = InferContext.ResultFormat.RAW\n",
			
 
				-    "    batch_size = 1\n",
			
 
				+    "    z_size =  mel.shape[2]*stride//n_group\n",
			
 
				+    "    shape = (1, n_group, z_size, 1)\n",
			
 
				+    "    z = np.random.normal(0.0, 1.0, shape).astype(mel.dtype)\n",
			
 
				+    "    \n",
			
 
				+    "    inputs = []\n",
			
 
				+    "    inputs.append(thc.InferInput('mel', mel.shape, 'FP16'))\n",
			
 
				+    "    inputs.append(thc.InferInput('z', z.shape, 'FP16'))\n",
			
 
				+    "    inputs[0].set_data_from_numpy(mel, binary_data=True)\n",
			
 
				+    "    inputs[1].set_data_from_numpy(z, binary_data=True)\n",
			
 
				+    "    outputs = []\n",
			
 
				+    "    outputs.append(thc.InferRequestedOutput('audio', binary_data=True))\n",
			
 
				     "    # call waveglow\n",
			
 
				-    "    result = infer_ctx_waveglow.run(input_dict, output_dict, batch_size)\n",
			
 
				+    "    result = triton_client.infer(model_name=\"waveglow-tensorrt\", inputs=inputs, outputs=outputs)\n",
			
 
				     "    # get the results\n",
			
 
				-    "    signal = result['audio'][0] # take only the first instance in the output batch\n",
			
 
				+    "    signal = result.as_numpy('audio')\n",
			
 
				     "    # postprocessing of waveglow: trimming signal to its actual size\n",
			
 
				-    "    trimmed_length = mel_lengths[0] * args.stft_hop_length\n",
			
 
				+    "    trimmed_length = mel.shape[2]*args.stft_hop_length\n",
			
 
				     "    signal = signal[:trimmed_length] # trim\n",
			
 
				     "    signal = signal.astype(np.float32)\n",
			
 
				     "    return signal\n",
			
@@ -201,7 +182,6 @@
 
				     ")\n",
			
 
				     "\n",
			
 
				     "\n",
			
 
				-    "plot_text_area_preprocessed = get_output_widget(width='10in',height='1in')\n",
			
 
				     "plot_spectrogram = get_output_widget(width='10in',height='2.1in')\n",
			
 
				     "plot_signal = get_output_widget(width='10in',height='2.1in')\n",
			
 
				     "plot_play = get_output_widget(width='10in',height='1in')\n",
			
@@ -219,8 +199,6 @@
 
				     "        return\n",
			
 
				     "    # preprocess tacotron2\n",
			
 
				     "    sequence = text_to_sequence(text)\n",
			
 
				-    "    with plot_text_area_preprocessed:\n",
			
 
				-    "        display_heatmap(sequence)\n",
			
 
				     "    # run tacotron2\n",
			
 
				     "    mel, mel_lengths, alignments = sequence_to_mel(sequence)\n",
			
 
				     "    with plot_spectrogram:\n",
			
@@ -241,7 +219,6 @@
 
				     "# decorative widgets\n",
			
 
				     "empty = widgets.VBox([], layout=widgets.Layout(height='1in'))\n",
			
 
				     "markdown_4 = Markdown('**tacotron2 input**')\n",
			
 
				-    "markdown_5 = Markdown('**tacotron2 preprocessing**')\n",
			
 
				     "markdown_6 = Markdown('**tacotron2 output / waveglow input**')\n",
			
 
				     "markdown_7 = Markdown('**waveglow output**')\n",
			
 
				     "markdown_8 = Markdown('**play**')\n",
			
@@ -250,7 +227,6 @@
 
				     "display(\n",
			
 
				     "    empty, \n",
			
 
				     "    markdown_4, text_area, \n",
			
 
				-    "#    markdown_5, plot_text_area_preprocessed, \n",
			
 
				     "    markdown_6, plot_spectrogram, \n",
			
 
				     "    markdown_7, plot_signal, \n",
			
 
				     "    markdown_8, plot_play, \n",
			
@@ -285,7 +261,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.7.4"
			
 
				+   "version": "3.7.6"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/triton/run_this.sh
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/triton/run_this.sh
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/triton/tacotron2_ts-script_config.pbtxt
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/triton/tacotron2_ts-script_config.pbtxt
@@ -0,0 +1,39 @@
 
				+name: "tacotron2-ts-script"
			
 
				+platform: "pytorch_libtorch"
			
 
				+max_batch_size: 1
			
 
				+input [
			
 
				+{
			
 
				+    name: "input__0"
			
 
				+    data_type: TYPE_INT64
			
 
				+    dims: [-1]
			
 
				+},
			
 
				+{
			
 
				+    name: "input__1"
			
 
				+    data_type: TYPE_INT64
			
 
				+    dims: [1]
			
 
				+    reshape: { shape: [ ] }
			
 
				+}
			
 
				+]
			
 
				+output [
			
 
				+{
			
 
				+    name: "output__0"
			
 
				+    data_type: TYPE_FP16
			
 
				+    dims: [80, -1]
			
 
				+},
			
 
				+{
			
 
				+    name: "output__1"
			
 
				+    data_type: TYPE_INT32
			
 
				+    dims: [1]
			
 
				+    reshape: { shape: [ ] }
			
 
				+},
			
 
				+{
			
 
				+    name: "output__2"
			
 
				+    data_type: TYPE_FP16
			
 
				+    dims: [-1, -1]
			
 
				+}
			
 
				+]
			
 
				+optimization {
			
 
				+  cuda {
			
 
				+    graphs: 1
			
 
				+  }
			
 
				+}
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/triton/waveglow_tensorrt_config.pbtxt
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/triton/waveglow_tensorrt_config.pbtxt
@@ -0,0 +1,22 @@
 
				+
			
 
				+name: "waveglow-tensorrt"
			
 
				+platform: "tensorrt_plan"
			
 
				+default_model_filename: "model.plan"
			
 
				+
			
 
				+max_batch_size: 0
			
 
				+
			
 
				+input {
			
 
				+  name: "mel"
			
 
				+  data_type: TYPE_FP16
			
 
				+  dims: [1, 80, -1, 1]
			
 
				+}
			
 
				+input {
			
 
				+  name: "z"
			
 
				+  data_type: TYPE_FP16
			
 
				+  dims: [1, 8, -1, 1]
			
 
				+}
			
 
				+output {
			
 
				+  name: "audio"
			
 
				+  data_type: TYPE_FP16
			
 
				+  dims: [1, -1]
			
 
				+}
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/trtis/README.md
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/trtis/README.md
@@ -1,153 +0,0 @@
 
				-
			
 
				-# Tacotron 2 and WaveGlow inference on TRTIS
			
 
				-
			
 
				-## Setup
			
 
				-
			
 
				-### Clone the repository.
			
 
				-```bash
			
 
				-git clone https://github.com/NVIDIA/DeepLearningExamples.git
			
 
				-cd DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2
			
 
				-```
			
 
				-
			
 
				-### Obtain models to be loaded in TRTIS.
			
 
				-
			
 
				-We have prepared Tacotron 2 and WaveGlow models that are ready to be loaded in TRTIS,
			
 
				-so you don't need to train and export the models. Please follow the instructions 
			
 
				-below to learn how to train, export --- or simply download the pretrained models. 
			
 
				-
			
 
				-### Obtain Tacotron 2 and WaveGlow checkpoints.
			
 
				-
			
 
				-You can either download the pretrained checkpoints or train the models yourself.
			
 
				-
			
 
				-#### (Option 1) Download pretrained checkpoints.
			
 
				-
			
 
				-If you want to use a pretrained checkpoints, download them from [NGC](https://ngc.nvidia.com/catalog/models):
			
 
				-
			
 
				-- [Tacotron2 checkpoint](https://ngc.nvidia.com/models/nvidia:tacotron2pyt_fp16)
			
 
				-- [WaveGlow checkpoint](https://ngc.nvidia.com/models/nvidia:waveglow256pyt_fp16)
			
 
				-
			
 
				-
			
 
				-#### (Option 2) Train Tacotron 2 and WaveGlow models.
			
 
				-
			
 
				-In order to train the models, follow the QuickStart section in the `Tacotron2/README.md`
			
 
				-file by executing points 1-5. You have to train WaveGlow in a different way than described there. Use
			
 
				-the following command instead of the one given in QuickStart at point 5:
			
 
				-
			
 
				-```bash
			
 
				-python -m multiproc train.py -m WaveGlow -o output/ --amp -lr 1e-4 --epochs 2001 --wn-channels 256 -bs 12 --segment-length 16000 --weight-decay 0 --grad-clip-thresh 65504.0 --cudnn-benchmark --cudnn-enabled --log-file output/nvlog.json
			
 
				-```
			
 
				-
			
 
				-This will train the WaveGlow model with a smaller number of residual connections
			
 
				-in the coupling layer networks and larger segment length. Training should take 
			
 
				-about 100 hours on DGX-1 (8x V100 16G).
			
 
				-
			
 
				-### Setup Tacotron 2 TorchScript.
			
 
				-
			
 
				-First, you need to create a folder structure for the model to be loaded in TRTIS server.
			
 
				-Follow the Tacotron 2 Quick Start Guide (points 1-4) to start the container.
			
 
				-Inside the container, type:
			
 
				-```bash
			
 
				-cd /workspace/tacotron2/
			
 
				-python exports/export_tacotron2_ts_config.py --fp16
			
 
				-```
			
 
				-
			
 
				-This will export the folder structure of the TRTIS repository and the config file of Tacotron 2. 
			
 
				-By default, it will be found in the `trtis_repo/tacotron2` folder.
			
 
				-
			
 
				-Now there are two ways to proceed.
			
 
				-
			
 
				-#### (Option 1) Download the Tacotron 2 TorchScript model.
			
 
				-
			
 
				-Download the Tacotron 2 TorchScript model from:
			
 
				-- [Tacotron2 TorchScript](https://ngc.nvidia.com/models/nvidia:tacotron2pyt_jit_fp16)
			
 
				-
			
 
				-Move the downloaded model to `trtis_repo/tacotron2/1/model.pt`
			
 
				-
			
 
				-#### (Option 2) Export the Tacotron 2 model using TorchScript.
			
 
				-
			
 
				-To export the Tacotron 2 model using TorchScript, type:
			
 
				-```bash
			
 
				-python exports/export_tacotron2_ts.py --tacotron2 <tacotron2_checkpoint> -o trtis_repo/tacotron2/1/model.pt --fp16
			
 
				-```
			
 
				-
			
 
				-This will save the model as ``trtis_repo/tacotron2/1/model.pt``.
			
 
				-
			
 
				-### Setup WaveGlow TRT engine.
			
 
				-
			
 
				-For WaveGlow, we also need to create the folder structure that will be used by the TRTIS server. 
			
 
				-Inside the container, type:
			
 
				-```bash
			
 
				-cd /workspace/tacotron2/
			
 
				-python exports/export_waveglow_trt_config.py --fp16
			
 
				-```
			
 
				-
			
 
				-This will export the folder structure of the TRTIS repository and the config file of Waveglow. 
			
 
				-By default, it will be found in the `trtis_repo/waveglow` folder.
			
 
				-
			
 
				-There are two ways to proceed. 
			
 
				-
			
 
				-#### (Option 1) Download the WaveGlow TRT engine.
			
 
				-
			
 
				-Download the WaveGlow TRT engine from:
			
 
				-- [WaveGlow TRT engine](https://ngc.nvidia.com/models/nvidia:waveglow256pyt_trt_fp16)
			
 
				-
			
 
				-Move the downloaded model to `trtis_repo/waveglow/1/model.plan`
			
 
				-
			
 
				-#### (Option 2) Export the WaveGlow model to TRT.
			
 
				-
			
 
				-Before exporting the model, you need to install onnx-tensorrt by typing:
			
 
				-```bash
			
 
				-cd /workspace && git clone https://github.com/onnx/onnx-tensorrt.git
			
 
				-cd /workspace/onnx-tensorrt/ && git submodule update --init --recursive
			
 
				-cd /workspace/onnx-tensorrt && mkdir -p build
			
 
				-cd /workspace/onnx-tensorrt/build && cmake .. -DCMAKE_CXX_FLAGS=-isystem\ /usr/local/cuda/include && make -j12 && make install
			
 
				-```
			
 
				-
			
 
				-In order to export the model into the ONNX intermediate representation, type:
			
 
				-
			
 
				-```bash
			
 
				-python exports/export_waveglow_onnx.py --waveglow <waveglow_checkpoint> --wn-channels 256 --fp16 --output ./output
			
 
				-```
			
 
				-
			
 
				-This will save the model as `waveglow.onnx` (you can change its name with the flag `--output <filename>`).
			
 
				-
			
 
				-With the model exported to ONNX, type the following to obtain a TRT engine and save it as `trtis_repo/waveglow/1/model.plan`:
			
 
				-
			
 
				-```bash
			
 
				-python trt/export_onnx2trt.py --waveglow  <exported_waveglow_onnx> -o trtis_repo/waveglow/1/ --fp16
			
 
				-```
			
 
				-
			
 
				-### Setup the TRTIS server.
			
 
				-
			
 
				-Download the TRTIS container by typing:
			
 
				-```bash
			
 
				-docker pull nvcr.io/nvidia/tritonserver:20.03-py3
			
 
				-docker tag nvcr.io/nvidia/tritonserver:20.03-py3 tritonserver:20.03
			
 
				-```
			
 
				-
			
 
				-### Setup the TRTIS notebook client.
			
 
				-
			
 
				-Now go to the root directory of the Tacotron 2 repo, and type: 
			
 
				-
			
 
				-```bash
			
 
				-docker build -f Dockerfile_trtis_client --network=host -t speech_ai_tts_only:demo .
			
 
				-```
			
 
				-
			
 
				-### Run the TRTIS server.
			
 
				-
			
 
				-To run the server, type in the root directory of the Tacotron 2 repo:
			
 
				-```bash
			
 
				-NV_GPU=1 nvidia-docker run -ti --ipc=host --network=host --rm -p8000:8000 -p8001:8001 -v $PWD/trtis_repo/:/models tritonserver:20.03 trtserver --model-store=/models --log-verbose 1
			
 
				-```
			
 
				-
			
 
				-The flag `NV_GPU` selects the GPU the server is going to see. If we want it to see all the available GPUs, then run the above command without this flag.
			
 
				-By default, the model repository will be in `trtis_repo/`.
			
 
				-
			
 
				-### Run the TRTIS notebook client.
			
 
				-
			
 
				-Leave the server running. In another terminal, type:
			
 
				-```bash
			
 
				-docker run -it --rm --network=host --device /dev/snd:/dev/snd --device /dev/usb:/dev/usb speech_ai_tts_only:demo bash ./run_this.sh
			
 
				-```
			
 
				-
			
 
				-Open the URL in a browser, open `notebook.ipynb`, click play, and enjoy.
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/tacotron2/arg_parser.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/tacotron2/arg_parser.py
@@ -30,7 +30,7 @@ import argparse
 
				 from tacotron2.text import symbols
			
 
				 
			
 
				 
			
 
				-def parse_tacotron2_args(parent, add_help=False):
			
 
				+def tacotron2_parser(parent, add_help=False):
			
 
				     """
			
 
				     Parse commandline arguments.
			
 
				     """
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/tensorrt/README.md
+++ b/PyTorch/SpeechSynthesis/Tacotron2/tensorrt/README.md
@@ -47,27 +47,27 @@ NVIDIA TensorRT is a platform for high-performance deep learning inference. It i
 
				     dpkg -l | grep TensorRT
			
 
				     ```
			
 
				 
			
 
				-6. Export the models to ONNX intermediate representation (ONNX IR).
			
 
				-   Export Tacotron 2 to three ONNX parts: Encoder, Decoder, and Postnet:
			
 
				+6. Convert the models to ONNX intermediate representation (ONNX IR).
			
 
				+   Convert Tacotron 2 to three ONNX parts: Encoder, Decoder, and Postnet:
			
 
				 
			
 
				 	```bash
			
 
				 	mkdir -p output
			
 
				-	python exports/export_tacotron2_onnx.py --tacotron2 ./checkpoints/nvidia_tacotron2pyt_fp16_20190427 -o output/ --fp16
			
 
				+	python tensorrt/convert_tacotron22onnx.py --tacotron2 ./checkpoints/nvidia_tacotron2pyt_fp16_20190427 -o output/ --fp16
			
 
				 	```
			
 
				 
			
 
				-    Export WaveGlow to ONNX IR:
			
 
				+    Convert WaveGlow to ONNX IR:
			
 
				 
			
 
				 	```bash
			
 
				-	python exports/export_waveglow_onnx.py --waveglow ./checkpoints/nvidia_waveglow256pyt_fp16 --wn-channels 256 -o output/ --fp16
			
 
				-	```
			
 
				+	python tensorrt/convert_waveglow2onnx.py --waveglow ./checkpoints/nvidia_waveglow256pyt_fp16 --config-file config.json --wn-channels 256 -o output/ --fp16
			
 
				+    ```
			
 
				 
			
 
				 	After running the above commands, there should be four new ONNX files in `./output/` directory:
			
 
				     `encoder.onnx`, `decoder_iter.onnx`, `postnet.onnx`, and `waveglow.onnx`.
			
 
				 
			
 
				-7. Export the ONNX IRs to TensorRT engines with fp16 mode enabled:
			
 
				+7. Convert the ONNX IRs to TensorRT engines with fp16 mode enabled:
			
 
				 
			
 
				 	```bash
			
 
				-	python trt/export_onnx2trt.py --encoder output/encoder.onnx --decoder output/decoder_iter.onnx --postnet output/postnet.onnx --waveglow output/waveglow.onnx -o output/ --fp16
			
 
				+	python tensorrt/convert_onnx2trt.py --encoder output/encoder.onnx --decoder output/decoder_iter.onnx --postnet output/postnet.onnx --waveglow output/waveglow.onnx -o output/ --fp16
			
 
				 	```
			
 
				 
			
 
				 	After running the command, there should be four new engine files in `./output/` directory:
			
@@ -76,14 +76,14 @@ NVIDIA TensorRT is a platform for high-performance deep learning inference. It i
 
				 8. Run TTS inference pipeline with fp16:
			
 
				 
			
 
				 	```bash
			
 
				-	python trt/inference_trt.py -i phrases/phrase.txt --encoder output/encoder_fp16.engine --decoder output/decoder_iter_fp16.engine --postnet output/postnet_fp16.engine --waveglow output/waveglow_fp16.engine -o output/ --fp16
			
 
				+	python tensorrt/inference_trt.py -i phrases/phrase.txt --encoder output/encoder_fp16.engine --decoder output/decoder_iter_fp16.engine --postnet output/postnet_fp16.engine --waveglow output/waveglow_fp16.engine -o output/ --fp16
			
 
				 	```
			
 
				 
			
 
				 ## Inference performance: NVIDIA T4
			
 
				 
			
 
				-Our results were obtained by running the `./trt/run_latency_tests_trt.sh` script in the PyTorch-19.11-py3 NGC container. Please note that to reproduce the results, you need to provide pretrained checkpoints for Tacotron 2 and WaveGlow. Please edit the script to provide your checkpoint filenames. For all tests in this table, we used WaveGlow with 256 residual channels.
			
 
				+Our results were obtained by running the `./tensorrt/run_latency_tests_trt.sh` script in the PyTorch-19.11-py3 NGC container. Please note that to reproduce the results, you need to provide pretrained checkpoints for Tacotron 2 and WaveGlow. Please edit the script to provide your checkpoint filenames. For all tests in this table, we used WaveGlow with 256 residual channels.
			
 
				 
			
 
				-|Framework|Batch size|Input length|Precision|Avg latency (s)|Latency std (s)|Latency confidence interval 90% (s)|Latency confidence interval 95% (s)|Latency confidence interval 99% (s)|Throughput (samples/sec)|Speed-up PyT+TRT/TRT|Avg mels generated (81 mels=1 sec of speech)|Avg audio length (s)|Avg RTF|
			
 
				+|Framework|Batch size|Input length|Precision|Avg latency (s)|Latency std (s)|Latency confidence interval 90% (s)|Latency confidence interval 95% (s)|Latency confidence interval 99% (s)|Throughput (samples/sec)|Speed-up PyTorch+TensorRT / TensorRT|Avg mels generated (81 mels=1 sec of speech)|Avg audio length (s)|Avg RTF|
			
 
				 |---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|
			
 
				-|PyT+TRT|1| 128| FP16| 1.02| 0.05| 1.09| 1.10| 1.14| 150,439| 1.59| 602| 6.99| 6.86|
			
 
				-|PyT    |1| 128| FP16| 1.63| 0.07| 1.71| 1.73| 1.81|  94,758| 1.00| 601| 6.98| 4.30|
			
 
				+|PyTorch+TensorRT|1| 128| FP16| 1.02| 0.05| 1.09| 1.10| 1.14| 150,439| 1.59| 602| 6.99| 6.86|
			
 
				+|PyTorch         |1| 128| FP16| 1.63| 0.07| 1.71| 1.73| 1.81|  94,758| 1.00| 601| 6.98| 4.30|
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/tensorrt/convert_onnx2trt.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/tensorrt/convert_onnx2trt.py
@@ -27,14 +27,15 @@
 
				 
			
 
				 import pycuda.driver as cuda
			
 
				 import pycuda.autoinit
			
 
				-import tensorrt as trt
			
 
				 import onnx
			
 
				 import argparse
			
 
				+import tensorrt as trt
			
 
				+import os
			
 
				 
			
 
				 import sys
			
 
				 sys.path.append('./')
			
 
				 
			
 
				-from trt.trt_utils import build_engine
			
 
				+from trt_utils import build_engine
			
 
				 
			
 
				 def parse_args(parser):
			
 
				     """
			
@@ -119,7 +120,8 @@ def main():
 
				         print("Building WaveGlow ...")
			
 
				         waveglow_engine = build_engine(args.waveglow, shapes=shapes, fp16=args.fp16)
			
 
				         if waveglow_engine is not None:
			
 
				-            with open(args.output+"/"+"waveglow"+engine_prec+".engine", 'wb') as f:
			
 
				+            engine_path = os.path.join(args.output, "waveglow"+engine_prec+".engine")
			
 
				+            with open(engine_path, 'wb') as f:
			
 
				                 f.write(waveglow_engine.serialize())
			
 
				         else:
			
 
				             print("Failed to build engine from", args.waveglow)
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/tensorrt/convert_tacotron22onnx.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/tensorrt/convert_tacotron22onnx.py
@@ -0,0 +1,405 @@
 
				+# *****************************************************************************
			
 
				+#  Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
			
 
				+#
			
 
				+#  Redistribution and use in source and binary forms, with or without
			
 
				+#  modification, are permitted provided that the following conditions are met:
			
 
				+#      * Redistributions of source code must retain the above copyright
			
 
				+#        notice, this list of conditions and the following disclaimer.
			
 
				+#      * Redistributions in binary form must reproduce the above copyright
			
 
				+#        notice, this list of conditions and the following disclaimer in the
			
 
				+#        documentation and/or other materials provided with the distribution.
			
 
				+#      * Neither the name of the NVIDIA CORPORATION nor the
			
 
				+#        names of its contributors may be used to endorse or promote products
			
 
				+#        derived from this software without specific prior written permission.
			
 
				+#
			
 
				+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
			
 
				+#  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
			
 
				+#  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
			
 
				+#  DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
			
 
				+#  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
			
 
				+#  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
			
 
				+#  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
			
 
				+#  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
			
 
				+#  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+#
			
 
				+# *****************************************************************************
			
 
				+
			
 
				+import torch
			
 
				+from torch import nn
			
 
				+from torch.nn import functional as F
			
 
				+import argparse
			
 
				+
			
 
				+import sys
			
 
				+sys.path.append('./')
			
 
				+
			
 
				+import models
			
 
				+from inference import checkpoint_from_distributed, unwrap_distributed, load_and_setup_model, prepare_input_sequence
			
 
				+from common.utils import to_gpu, get_mask_from_lengths
			
 
				+
			
 
				+def parse_args(parser):
			
 
				+    """
			
 
				+    Parse commandline arguments.
			
 
				+    """
			
 
				+    parser.add_argument('--tacotron2', type=str,
			
 
				+                        help='full path to the Tacotron2 model checkpoint file')
			
 
				+    parser.add_argument('-o', '--output', type=str, required=True,
			
 
				+                        help='Directory for the exported Tacotron 2 ONNX model')
			
 
				+    parser.add_argument('--fp16', action='store_true',
			
 
				+                        help='Export with half precision to ONNX')
			
 
				+
			
 
				+    return parser
			
 
				+
			
 
				+
			
 
				+def encoder_infer(self, x, input_lengths):
			
 
				+    device = x.device
			
 
				+    for conv in self.convolutions:
			
 
				+        x = F.dropout(F.relu(conv(x.to(device))), 0.5, False)
			
 
				+
			
 
				+    x = x.transpose(1, 2)
			
 
				+
			
 
				+    x = nn.utils.rnn.pack_padded_sequence(
			
 
				+        x, input_lengths, batch_first=True)
			
 
				+
			
 
				+    outputs, _ = self.lstm(x)
			
 
				+
			
 
				+    outputs, _ = nn.utils.rnn.pad_packed_sequence(
			
 
				+        outputs, batch_first=True)
			
 
				+
			
 
				+    lens = input_lengths*2
			
 
				+
			
 
				+    return outputs, lens
			
 
				+
			
 
				+
			
 
				+class Encoder(torch.nn.Module):
			
 
				+    def __init__(self, tacotron2):
			
 
				+        super(Encoder, self).__init__()
			
 
				+        self.tacotron2 = tacotron2
			
 
				+        self.tacotron2.encoder.lstm.flatten_parameters()
			
 
				+        self.infer = encoder_infer
			
 
				+
			
 
				+    def forward(self, sequence, sequence_lengths):
			
 
				+        embedded_inputs = self.tacotron2.embedding(sequence).transpose(1, 2)
			
 
				+        memory, lens = self.infer(self.tacotron2.encoder, embedded_inputs, sequence_lengths)
			
 
				+        processed_memory = self.tacotron2.decoder.attention_layer.memory_layer(memory)
			
 
				+        return memory, processed_memory, lens
			
 
				+
			
 
				+class Postnet(torch.nn.Module):
			
 
				+    def __init__(self, tacotron2):
			
 
				+        super(Postnet, self).__init__()
			
 
				+        self.tacotron2 = tacotron2
			
 
				+
			
 
				+    def forward(self, mel_outputs):
			
 
				+        mel_outputs_postnet = self.tacotron2.postnet(mel_outputs)
			
 
				+        return mel_outputs + mel_outputs_postnet
			
 
				+
			
 
				+def lstmcell2lstm_params(lstm_mod, lstmcell_mod):
			
 
				+    lstm_mod.weight_ih_l0 = torch.nn.Parameter(lstmcell_mod.weight_ih)
			
 
				+    lstm_mod.weight_hh_l0 = torch.nn.Parameter(lstmcell_mod.weight_hh)
			
 
				+    lstm_mod.bias_ih_l0 = torch.nn.Parameter(lstmcell_mod.bias_ih)
			
 
				+    lstm_mod.bias_hh_l0 = torch.nn.Parameter(lstmcell_mod.bias_hh)
			
 
				+
			
 
				+
			
 
				+def prenet_infer(self, x):
			
 
				+    x1 = x[:]
			
 
				+    for linear in self.layers:
			
 
				+        x1 = F.relu(linear(x1))
			
 
				+        x0 = x1[0].unsqueeze(0)
			
 
				+        mask = torch.le(torch.rand(256, device='cuda').to(x.dtype), 0.5).to(x.dtype)
			
 
				+        mask = mask.expand(x1.size(0), x1.size(1))
			
 
				+        x1 = x1*mask*2.0
			
 
				+
			
 
				+    return x1
			
 
				+
			
 
				+class DecoderIter(torch.nn.Module):
			
 
				+    def __init__(self, tacotron2):
			
 
				+        super(DecoderIter, self).__init__()
			
 
				+
			
 
				+        self.tacotron2 = tacotron2
			
 
				+        dec = tacotron2.decoder
			
 
				+
			
 
				+        self.p_attention_dropout = dec.p_attention_dropout
			
 
				+        self.p_decoder_dropout = dec.p_decoder_dropout
			
 
				+        self.prenet = dec.prenet
			
 
				+
			
 
				+        self.prenet.infer = prenet_infer
			
 
				+
			
 
				+        self.attention_rnn = nn.LSTM(dec.prenet_dim + dec.encoder_embedding_dim,
			
 
				+                                     dec.attention_rnn_dim, 1)
			
 
				+        lstmcell2lstm_params(self.attention_rnn, dec.attention_rnn)
			
 
				+        self.attention_rnn.flatten_parameters()
			
 
				+
			
 
				+        self.attention_layer = dec.attention_layer
			
 
				+
			
 
				+        self.decoder_rnn = nn.LSTM(dec.attention_rnn_dim + dec.encoder_embedding_dim,
			
 
				+                                   dec.decoder_rnn_dim, 1)
			
 
				+        lstmcell2lstm_params(self.decoder_rnn, dec.decoder_rnn)
			
 
				+        self.decoder_rnn.flatten_parameters()
			
 
				+
			
 
				+        self.linear_projection = dec.linear_projection
			
 
				+        self.gate_layer = dec.gate_layer
			
 
				+
			
 
				+
			
 
				+    def decode(self, decoder_input, in_attention_hidden, in_attention_cell,
			
 
				+               in_decoder_hidden, in_decoder_cell, in_attention_weights,
			
 
				+               in_attention_weights_cum, in_attention_context, memory,
			
 
				+               processed_memory, mask):
			
 
				+
			
 
				+        cell_input = torch.cat((decoder_input, in_attention_context), -1)
			
 
				+
			
 
				+        _, (out_attention_hidden, out_attention_cell) = self.attention_rnn(
			
 
				+            cell_input.unsqueeze(0), (in_attention_hidden.unsqueeze(0),
			
 
				+                                      in_attention_cell.unsqueeze(0)))
			
 
				+        out_attention_hidden = out_attention_hidden.squeeze(0)
			
 
				+        out_attention_cell = out_attention_cell.squeeze(0)
			
 
				+
			
 
				+        out_attention_hidden = F.dropout(
			
 
				+            out_attention_hidden, self.p_attention_dropout, False)
			
 
				+
			
 
				+        attention_weights_cat = torch.cat(
			
 
				+            (in_attention_weights.unsqueeze(1),
			
 
				+             in_attention_weights_cum.unsqueeze(1)), dim=1)
			
 
				+        out_attention_context, out_attention_weights = self.attention_layer(
			
 
				+            out_attention_hidden, memory, processed_memory,
			
 
				+            attention_weights_cat, mask)
			
 
				+
			
 
				+        out_attention_weights_cum = in_attention_weights_cum + out_attention_weights
			
 
				+        decoder_input_tmp = torch.cat(
			
 
				+            (out_attention_hidden, out_attention_context), -1)
			
 
				+
			
 
				+        _, (out_decoder_hidden, out_decoder_cell) = self.decoder_rnn(
			
 
				+            decoder_input_tmp.unsqueeze(0), (in_decoder_hidden.unsqueeze(0),
			
 
				+                                             in_decoder_cell.unsqueeze(0)))
			
 
				+        out_decoder_hidden = out_decoder_hidden.squeeze(0)
			
 
				+        out_decoder_cell = out_decoder_cell.squeeze(0)
			
 
				+
			
 
				+        out_decoder_hidden = F.dropout(
			
 
				+            out_decoder_hidden, self.p_decoder_dropout, False)
			
 
				+
			
 
				+        decoder_hidden_attention_context = torch.cat(
			
 
				+            (out_decoder_hidden, out_attention_context), 1)
			
 
				+
			
 
				+        decoder_output = self.linear_projection(
			
 
				+            decoder_hidden_attention_context)
			
 
				+
			
 
				+        gate_prediction = self.gate_layer(decoder_hidden_attention_context)
			
 
				+
			
 
				+        return (decoder_output, gate_prediction, out_attention_hidden,
			
 
				+                out_attention_cell, out_decoder_hidden, out_decoder_cell,
			
 
				+                out_attention_weights, out_attention_weights_cum, out_attention_context)
			
 
				+
			
 
				+    # @torch.jit.script
			
 
				+    def forward(self,
			
 
				+                decoder_input,
			
 
				+                attention_hidden,
			
 
				+                attention_cell,
			
 
				+                decoder_hidden,
			
 
				+                decoder_cell,
			
 
				+                attention_weights,
			
 
				+                attention_weights_cum,
			
 
				+                attention_context,
			
 
				+                memory,
			
 
				+                processed_memory,
			
 
				+                mask):
			
 
				+        decoder_input1 = self.prenet.infer(self.prenet, decoder_input)
			
 
				+        outputs = self.decode(decoder_input1,
			
 
				+                              attention_hidden,
			
 
				+                              attention_cell,
			
 
				+                              decoder_hidden,
			
 
				+                              decoder_cell,
			
 
				+                              attention_weights,
			
 
				+                              attention_weights_cum,
			
 
				+                              attention_context,
			
 
				+                              memory,
			
 
				+                              processed_memory,
			
 
				+                              mask)
			
 
				+        return outputs
			
 
				+
			
 
				+
			
 
				+def test_inference(encoder, decoder_iter, postnet):
			
 
				+
			
 
				+    encoder.eval()
			
 
				+    decoder_iter.eval()
			
 
				+    postnet.eval()
			
 
				+
			
 
				+    sys.path.append('./tensorrt')
			
 
				+    from inference_trt import init_decoder_inputs
			
 
				+
			
 
				+    texts = ["Hello World, good day."]
			
 
				+    sequences, sequence_lengths = prepare_input_sequence(texts)
			
 
				+
			
 
				+    measurements = {}
			
 
				+
			
 
				+    print("Running Tacotron2 Encoder")
			
 
				+    with torch.no_grad():
			
 
				+        memory, processed_memory, lens = encoder(sequences, sequence_lengths)
			
 
				+
			
 
				+    print("Running Tacotron2 Decoder")
			
 
				+    device = memory.device
			
 
				+    dtype = memory.dtype
			
 
				+    mel_lengths = torch.zeros([memory.size(0)], dtype=torch.int32, device = device)
			
 
				+    not_finished = torch.ones([memory.size(0)], dtype=torch.int32, device = device)
			
 
				+    mel_outputs, gate_outputs, alignments = (torch.zeros(1), torch.zeros(1), torch.zeros(1))
			
 
				+    gate_threshold = 0.6
			
 
				+    max_decoder_steps = 1000
			
 
				+    first_iter = True
			
 
				+
			
 
				+    (decoder_input, attention_hidden, attention_cell, decoder_hidden,
			
 
				+     decoder_cell, attention_weights, attention_weights_cum,
			
 
				+     attention_context, memory, processed_memory,
			
 
				+     mask) = init_decoder_inputs(memory, processed_memory, sequence_lengths)
			
 
				+
			
 
				+    while True:
			
 
				+        with torch.no_grad():
			
 
				+            (mel_output, gate_output,
			
 
				+             attention_hidden, attention_cell,
			
 
				+             decoder_hidden, decoder_cell,
			
 
				+             attention_weights, attention_weights_cum,
			
 
				+             attention_context) = decoder_iter(decoder_input, attention_hidden, attention_cell, decoder_hidden,
			
 
				+                                               decoder_cell, attention_weights, attention_weights_cum,
			
 
				+                                               attention_context, memory, processed_memory, mask)
			
 
				+
			
 
				+        if first_iter:
			
 
				+            mel_outputs = torch.unsqueeze(mel_output, 2)
			
 
				+            gate_outputs = torch.unsqueeze(gate_output, 2)
			
 
				+            alignments = torch.unsqueeze(attention_weights, 2)
			
 
				+            first_iter = False
			
 
				+        else:
			
 
				+            mel_outputs = torch.cat((mel_outputs, torch.unsqueeze(mel_output, 2)), 2)
			
 
				+            gate_outputs = torch.cat((gate_outputs, torch.unsqueeze(gate_output, 2)), 2)
			
 
				+            alignments = torch.cat((alignments, torch.unsqueeze(attention_weights, 2)), 2)
			
 
				+
			
 
				+        dec = torch.le(torch.sigmoid(gate_output), gate_threshold).to(torch.int32).squeeze(1)
			
 
				+        not_finished = not_finished*dec
			
 
				+        mel_lengths += not_finished
			
 
				+
			
 
				+        if torch.sum(not_finished) == 0:
			
 
				+            print("Stopping after ",mel_outputs.size(2)," decoder steps")
			
 
				+            break
			
 
				+        if mel_outputs.size(2) == max_decoder_steps:
			
 
				+            print("Warning! Reached max decoder steps")
			
 
				+            break
			
 
				+
			
 
				+        decoder_input = mel_output
			
 
				+
			
 
				+
			
 
				+    print("Running Tacotron2 PostNet")
			
 
				+    with torch.no_grad():
			
 
				+        mel_outputs_postnet = postnet(mel_outputs)
			
 
				+
			
 
				+    return mel_outputs_postnet
			
 
				+
			
 
				+def main():
			
 
				+
			
 
				+    parser = argparse.ArgumentParser(
			
 
				+        description='PyTorch Tacotron 2 export to TRT')
			
 
				+    parser = parse_args(parser)
			
 
				+    args, _ = parser.parse_known_args()
			
 
				+
			
 
				+    tacotron2 = load_and_setup_model('Tacotron2', parser, args.tacotron2,
			
 
				+                                     fp16_run=args.fp16, cpu_run=False)
			
 
				+
			
 
				+    opset_version = 10
			
 
				+
			
 
				+    sequences = torch.randint(low=0, high=148, size=(1,50),
			
 
				+                             dtype=torch.long).cuda()
			
 
				+    sequence_lengths = torch.IntTensor([sequences.size(1)]).cuda().long()
			
 
				+    dummy_input = (sequences, sequence_lengths)
			
 
				+
			
 
				+    encoder = Encoder(tacotron2)
			
 
				+    encoder.eval()
			
 
				+    with torch.no_grad():
			
 
				+        encoder(*dummy_input)
			
 
				+
			
 
				+    torch.onnx.export(encoder, dummy_input, args.output+"/"+"encoder.onnx",
			
 
				+                      opset_version=opset_version,
			
 
				+                      do_constant_folding=True,
			
 
				+                      input_names=["sequences", "sequence_lengths"],
			
 
				+                      output_names=["memory", "processed_memory", "lens"],
			
 
				+                      dynamic_axes={"sequences": {1: "text_seq"},
			
 
				+                                    "memory": {1: "mem_seq"},
			
 
				+                                    "processed_memory": {1: "mem_seq"}
			
 
				+                      })
			
 
				+
			
 
				+    decoder_iter = DecoderIter(tacotron2)
			
 
				+    memory = torch.randn((1,sequence_lengths[0],512)).cuda() #encoder_outputs
			
 
				+    if args.fp16:
			
 
				+        memory = memory.half()
			
 
				+    memory_lengths = sequence_lengths
			
 
				+    # initialize decoder states for dummy_input
			
 
				+    decoder_input = tacotron2.decoder.get_go_frame(memory)
			
 
				+    mask = get_mask_from_lengths(memory_lengths)
			
 
				+    (attention_hidden,
			
 
				+     attention_cell,
			
 
				+     decoder_hidden,
			
 
				+     decoder_cell,
			
 
				+     attention_weights,
			
 
				+     attention_weights_cum,
			
 
				+     attention_context,
			
 
				+     processed_memory) = tacotron2.decoder.initialize_decoder_states(memory)
			
 
				+    dummy_input = (decoder_input,
			
 
				+                   attention_hidden,
			
 
				+                   attention_cell,
			
 
				+                   decoder_hidden,
			
 
				+                   decoder_cell,
			
 
				+                   attention_weights,
			
 
				+                   attention_weights_cum,
			
 
				+                   attention_context,
			
 
				+                   memory,
			
 
				+                   processed_memory,
			
 
				+                   mask)
			
 
				+
			
 
				+    decoder_iter = DecoderIter(tacotron2)
			
 
				+    decoder_iter.eval()
			
 
				+    with torch.no_grad():
			
 
				+        decoder_iter(*dummy_input)
			
 
				+
			
 
				+    torch.onnx.export(decoder_iter, dummy_input, args.output+"/"+"decoder_iter.onnx",
			
 
				+                      opset_version=opset_version,
			
 
				+                      do_constant_folding=True,
			
 
				+                      input_names=["decoder_input",
			
 
				+                                   "attention_hidden",
			
 
				+                                   "attention_cell",
			
 
				+                                   "decoder_hidden",
			
 
				+                                   "decoder_cell",
			
 
				+                                   "attention_weights",
			
 
				+                                   "attention_weights_cum",
			
 
				+                                   "attention_context",
			
 
				+                                   "memory",
			
 
				+                                   "processed_memory",
			
 
				+                                   "mask"],
			
 
				+                      output_names=["decoder_output",
			
 
				+                                    "gate_prediction",
			
 
				+                                    "out_attention_hidden",
			
 
				+                                    "out_attention_cell",
			
 
				+                                    "out_decoder_hidden",
			
 
				+                                    "out_decoder_cell",
			
 
				+                                    "out_attention_weights",
			
 
				+                                    "out_attention_weights_cum",
			
 
				+                                    "out_attention_context"],
			
 
				+                      dynamic_axes={"attention_weights" : {1: "seq_len"},
			
 
				+                                    "attention_weights_cum" : {1: "seq_len"},
			
 
				+                                    "memory" : {1: "seq_len"},
			
 
				+                                    "processed_memory" : {1: "seq_len"},
			
 
				+                                    "mask" : {1: "seq_len"},
			
 
				+                                    "out_attention_weights" : {1: "seq_len"},
			
 
				+                                    "out_attention_weights_cum" : {1: "seq_len"}
			
 
				+                      })
			
 
				+
			
 
				+    postnet = Postnet(tacotron2)
			
 
				+    dummy_input = torch.randn((1,80,620)).cuda()
			
 
				+    if args.fp16:
			
 
				+        dummy_input = dummy_input.half()
			
 
				+    torch.onnx.export(postnet, dummy_input, args.output+"/"+"postnet.onnx",
			
 
				+                      opset_version=opset_version,
			
 
				+                      do_constant_folding=True,
			
 
				+                      input_names=["mel_outputs"],
			
 
				+                      output_names=["mel_outputs_postnet"],
			
 
				+                      dynamic_axes={"mel_outputs": {2: "mel_seq"},
			
 
				+                                    "mel_outputs_postnet": {2: "mel_seq"}})
			
 
				+
			
 
				+    mel = test_inference(encoder, decoder_iter, postnet)
			
 
				+    torch.save(mel, "mel.pt")
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/tensorrt/convert_waveglow2onnx.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/tensorrt/convert_waveglow2onnx.py
@@ -0,0 +1,104 @@
 
				+# *****************************************************************************
			
 
				+#  Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
			
 
				+#
			
 
				+#  Redistribution and use in source and binary forms, with or without
			
 
				+#  modification, are permitted provided that the following conditions are met:
			
 
				+#      * Redistributions of source code must retain the above copyright
			
 
				+#        notice, this list of conditions and the following disclaimer.
			
 
				+#      * Redistributions in binary form must reproduce the above copyright
			
 
				+#        notice, this list of conditions and the following disclaimer in the
			
 
				+#        documentation and/or other materials provided with the distribution.
			
 
				+#      * Neither the name of the NVIDIA CORPORATION nor the
			
 
				+#        names of its contributors may be used to endorse or promote products
			
 
				+#        derived from this software without specific prior written permission.
			
 
				+#
			
 
				+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
			
 
				+#  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
			
 
				+#  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
			
 
				+#  DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
			
 
				+#  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
			
 
				+#  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
			
 
				+#  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
			
 
				+#  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
			
 
				+#  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+#
			
 
				+# *****************************************************************************
			
 
				+
			
 
				+import torch
			
 
				+import argparse
			
 
				+import os
			
 
				+import sys
			
 
				+sys.path.append('./')
			
 
				+
			
 
				+from common.utils import ParseFromConfigFile
			
 
				+from inference import load_and_setup_model
			
 
				+
			
 
				+def parse_args(parser):
			
 
				+    """
			
 
				+    Parse commandline arguments.
			
 
				+    """
			
 
				+    parser.add_argument('--waveglow', type=str, required=True,
			
 
				+                        help='full path to the WaveGlow model checkpoint file')
			
 
				+    parser.add_argument('-o', '--output', type=str, required=True,
			
 
				+                        help='Directory for the exported WaveGlow ONNX model')
			
 
				+    parser.add_argument('--fp16', action='store_true',
			
 
				+                        help='inference with AMP')
			
 
				+    parser.add_argument('-s', '--sigma-infer', default=0.6, type=float)
			
 
				+
			
 
				+    parser.add_argument('--config-file', action=ParseFromConfigFile,
			
 
				+                         type=str, help='Path to configuration file')
			
 
				+
			
 
				+    return parser
			
 
				+
			
 
				+
			
 
				+def export_onnx(parser, args):
			
 
				+
			
 
				+    waveglow = load_and_setup_model('WaveGlow', parser, args.waveglow,
			
 
				+                                    fp16_run=args.fp16, cpu_run=False,
			
 
				+                                    forward_is_infer=False)
			
 
				+
			
 
				+    # 80 mel channels, 620 mel spectrograms ~ 7 seconds of speech
			
 
				+    mel = torch.randn(1, 80, 620).cuda()
			
 
				+    stride = 256 # value from waveglow upsample
			
 
				+    n_group = 8
			
 
				+    z_size2 = (mel.size(2)*stride)//n_group
			
 
				+    z = torch.randn(1, n_group, z_size2).cuda()
			
 
				+
			
 
				+    if args.fp16:
			
 
				+        mel = mel.half()
			
 
				+        z = z.half()
			
 
				+    with torch.no_grad():
			
 
				+        # run inference to force calculation of inverses
			
 
				+        waveglow.infer(mel, sigma=args.sigma_infer)
			
 
				+
			
 
				+        # export to ONNX
			
 
				+        if args.fp16:
			
 
				+            waveglow = waveglow.half()
			
 
				+
			
 
				+        waveglow.forward = waveglow.infer_onnx
			
 
				+
			
 
				+        opset_version = 12
			
 
				+
			
 
				+        output_path = os.path.join(args.output, "waveglow.onnx")
			
 
				+        torch.onnx.export(waveglow, (mel, z), output_path,
			
 
				+                          opset_version=opset_version,
			
 
				+                          do_constant_folding=True,
			
 
				+                          input_names=["mel", "z"],
			
 
				+                          output_names=["audio"],
			
 
				+                          dynamic_axes={"mel":   {0: "batch_size", 2: "mel_seq"},
			
 
				+                                        "z":     {0: "batch_size", 2: "z_seq"},
			
 
				+                                        "audio": {0: "batch_size", 1: "audio_seq"}})
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+
			
 
				+    parser = argparse.ArgumentParser(
			
 
				+        description='PyTorch Tacotron 2 Inference')
			
 
				+    parser = parse_args(parser)
			
 
				+    args, _ = parser.parse_known_args()
			
 
				+
			
 
				+    export_onnx(parser, args)
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/tensorrt/inference_trt.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/tensorrt/inference_trt.py
@@ -31,8 +31,8 @@ from scipy.io.wavfile import write
 
				 import time
			
 
				 import torch
			
 
				 import argparse
			
 
				-import sys
			
 
				 
			
 
				+import sys
			
 
				 sys.path.append('./')
			
 
				 
			
 
				 from common.utils import to_gpu, get_mask_from_lengths
			
@@ -40,7 +40,7 @@ from tacotron2.text import text_to_sequence
 
				 from inference import MeasureTime, prepare_input_sequence, load_and_setup_model
			
 
				 import dllogger as DLLogger
			
 
				 from dllogger import StdOutBackend, JSONStreamBackend, Verbosity
			
 
				-from trt.trt_utils import load_engine, run_trt_engine
			
 
				+from trt_utils import load_engine, run_trt_engine
			
 
				 
			
 
				 from waveglow.denoiser import Denoiser
			
 
				 
			
@@ -284,9 +284,9 @@ def infer_waveglow_trt(waveglow, waveglow_context, mel, measurements, fp16):
 
				 
			
 
				     waveglow_tensors = {
			
 
				         "inputs" :
			
 
				-        {'mel': mel, 'z': z},
			
 
				+        {'input__0': mel, 'input__1': z},
			
 
				         "outputs" :
			
 
				-        {'audio': audios}
			
 
				+        {'output__0': audios}
			
 
				     }
			
 
				     print("Running WaveGlow")
			
 
				     with MeasureTime(measurements, "waveglow_time"):
			
@@ -343,6 +343,7 @@ def main():
 
				     sequences, sequence_lengths = prepare_input_sequence(texts)
			
 
				     sequences = sequences.to(torch.int32)
			
 
				     sequence_lengths = sequence_lengths.to(torch.int32)
			
 
				+
			
 
				     with MeasureTime(measurements, "latency"):
			
 
				         mel, mel_lengths = infer_tacotron2_trt(encoder, decoder_iter, postnet,
			
 
				                                                encoder_context, decoder_context, postnet_context,
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/tensorrt/run_latency_tests_trt.sh
+++ b/PyTorch/SpeechSynthesis/Tacotron2/tensorrt/run_latency_tests_trt.sh
@@ -0,0 +1 @@
 
				+bash test_infer.sh --test tensorrt/test_infer_trt.py -bs 1 -il 128 --fp16 --num-iters 1003 --encoder ./output/encoder_fp16.engine --decoder ./output/decoder_iter_fp16.engine --postnet ./output/postnet_fp16.engine --waveglow ./output/waveglow_fp16.engine
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/tensorrt/test_infer_trt.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/tensorrt/test_infer_trt.py
@@ -34,10 +34,10 @@ import argparse
 
				 import numpy as np
			
 
				 from scipy.io.wavfile import write
			
 
				 
			
 
				-from inference import checkpoint_from_distributed, unwrap_distributed, MeasureTime, prepare_input_sequence
			
 
				+from inference import checkpoint_from_distributed, unwrap_distributed, MeasureTime, prepare_input_sequence, load_and_setup_model
			
 
				 from inference_trt import infer_tacotron2_trt, infer_waveglow_trt
			
 
				 
			
 
				-from trt.trt_utils import load_engine
			
 
				+from trt_utils import load_engine
			
 
				 import tensorrt as trt
			
 
				 
			
 
				 import time
			
@@ -79,34 +79,6 @@ def parse_args(parser):
 
				     return parser
			
 
				 
			
 
				 
			
 
				-def load_and_setup_model(model_name, parser, checkpoint, amp_run, to_cuda=True):
			
 
				-    model_parser = models.parse_model_args(model_name, parser, add_help=False)
			
 
				-    model_args, _ = model_parser.parse_known_args()
			
 
				-
			
 
				-    model_config = models.get_model_config(model_name, model_args)
			
 
				-    model = models.get_model(model_name, model_config, to_cuda=to_cuda)
			
 
				-
			
 
				-    if checkpoint is not None:
			
 
				-        if to_cuda:
			
 
				-            state_dict = torch.load(checkpoint)['state_dict']
			
 
				-        else:
			
 
				-            state_dict = torch.load(checkpoint,map_location='cpu')['state_dict']
			
 
				-        if checkpoint_from_distributed(state_dict):
			
 
				-            state_dict = unwrap_distributed(state_dict)
			
 
				-
			
 
				-        model.load_state_dict(state_dict)
			
 
				-
			
 
				-    if model_name == "WaveGlow":
			
 
				-        model = model.remove_weightnorm(model)
			
 
				-
			
 
				-    model.eval()
			
 
				-
			
 
				-    if amp_run:
			
 
				-        model, _ = amp.initialize(model, [], opt_level="O3")
			
 
				-
			
 
				-    return model
			
 
				-
			
 
				-
			
 
				 def print_stats(measurements_all):
			
 
				 
			
 
				     print(np.mean(measurements_all['latency'][1:]),
			
@@ -137,7 +109,7 @@ def print_stats(measurements_all):
 
				     print("Throughput average (samples/sec) = {:.4f}".format(np.mean(throughput)))
			
 
				     print("Preprocessing average (seconds) = {:.4f}".format(np.mean(preprocessing)))
			
 
				     print("Postprocessing average (seconds) = {:.4f}".format(np.mean(postprocessing)))
			
 
				-    print("Number of mels per audio average = {}".format(np.mean(num_mels_per_audio)))
			
 
				+    print("Number of mels per audio average = {}".format(np.mean(num_mels_per_audio))) #
			
 
				     print("Latency average (seconds) = {:.4f}".format(np.mean(latency)))
			
 
				     print("Latency std (seconds) = {:.4f}".format(np.std(latency)))
			
 
				     print("Latency cl 50 (seconds) = {:.4f}".format(cf_50))
			
@@ -190,8 +162,11 @@ def main():
 
				 
			
 
				     if args.waveglow_ckpt != "":
			
 
				         # setup denoiser using WaveGlow PyTorch checkpoint
			
 
				-        waveglow_ckpt = load_and_setup_model('WaveGlow', parser, args.waveglow_ckpt,
			
 
				-                                             True, forward_is_infer=True)
			
 
				+        waveglow_ckpt = load_and_setup_model('WaveGlow', parser,
			
 
				+                                             args.waveglow_ckpt,
			
 
				+                                             fp16_run=args.fp16,
			
 
				+                                             cpu_run=False,
			
 
				+                                             forward_is_infer=True)
			
 
				         denoiser = Denoiser(waveglow_ckpt).cuda()
			
 
				         # after initialization, we don't need WaveGlow PyTorch checkpoint
			
 
				         # anymore - deleting
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/tensorrt/trt_utils.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/tensorrt/trt_utils.py
--- a/PyTorch/SpeechSynthesis/Tacotron2/test_infer.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/test_infer.py
@@ -73,6 +73,8 @@ def parse_args(parser):
 
				                         help='Input length')
			
 
				     parser.add_argument('-bs', '--batch-size', type=int, default=1,
			
 
				                         help='Batch size')
			
 
				+
			
 
				+
			
 
				     return parser
			
 
				 
			
 
				 
			
@@ -177,7 +179,6 @@ def main():
 
				                 num_mels = mel.size(0)*mel.size(2)
			
 
				                 num_samples = audios.size(0)*audios.size(1)
			
 
				 
			
 
				-
			
 
				                 with MeasureTime(measurements, "type_conversion", args.cpu):
			
 
				                     audios = audios.float()
			
 
				 
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/test_infer.sh
+++ b/PyTorch/SpeechSynthesis/Tacotron2/test_infer.sh
@@ -78,7 +78,7 @@ TMP_LOGFILE=tmp_log_${LOG_SUFFIX}.log
 
				 LOGFILE=log_${LOG_SUFFIX}.log
			
 
				 
			
 
				 
			
 
				-if [ "$TEST_PROGRAM" = "trt/test_infer_trt.py" ]
			
 
				+if [ "$TEST_PROGRAM" = "tensorrt/test_infer_trt.py" ]
			
 
				 then
			
 
				     TACOTRON2_PARAMS="--encoder $ENCODER_CKPT --decoder $DECODER_CKPT --postnet $POSTNET_CKPT"
			
 
				 else
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/train.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/train.py
@@ -44,6 +44,7 @@ from apex.parallel import DistributedDataParallel as DDP
 
				 import models
			
 
				 import loss_functions
			
 
				 import data_functions
			
 
				+from common.utils import ParseFromConfigFile
			
 
				 
			
 
				 import dllogger as DLLogger
			
 
				 from dllogger import StdOutBackend, JSONStreamBackend, Verbosity
			
@@ -73,6 +74,9 @@ def parse_args(parser):
 
				     parser.add_argument('--anneal-factor', type=float, choices=[0.1, 0.3], default=0.1,
			
 
				                         help='Factor for annealing learning rate')
			
 
				 
			
 
				+    parser.add_argument('--config-file', action=ParseFromConfigFile,
			
 
				+                         type=str, help='Path to configuration file')
			
 
				+
			
 
				     # training
			
 
				     training = parser.add_argument_group('training setup')
			
 
				     training.add_argument('--epochs', type=int, required=True,
			
@@ -162,7 +166,7 @@ def parse_args(parser):
 
				 def reduce_tensor(tensor, num_gpus):
			
 
				     rt = tensor.clone()
			
 
				     dist.all_reduce(rt, op=dist.reduce_op.SUM)
			
 
				-    rt /= num_gpus
			
 
				+    rt = torch.true_divide(rt, num_gpus)
			
 
				     return rt
			
 
				 
			
 
				 
			
@@ -211,8 +215,7 @@ def save_checkpoint(model, optimizer, epoch, config, amp_run, output_dir, model_
 
				             checkpoint['amp'] = amp.state_dict()
			
 
				 
			
 
				         checkpoint_filename = "checkpoint_{}_{}.pt".format(model_name, epoch)
			
 
				-        checkpoint_path = os.path.join(
			
 
				-            output_dir, checkpoint_filename)
			
 
				+        checkpoint_path = os.path.join(output_dir, checkpoint_filename)
			
 
				         print("Saving model and optimizer state at epoch {} to {}".format(
			
 
				             epoch, checkpoint_path))
			
 
				         torch.save(checkpoint, checkpoint_path)
			
@@ -221,7 +224,7 @@ def save_checkpoint(model, optimizer, epoch, config, amp_run, output_dir, model_
 
				         symlink_dst = os.path.join(
			
 
				             output_dir, "checkpoint_{}_last.pt".format(model_name))
			
 
				         if os.path.exists(symlink_dst) and os.path.islink(symlink_dst):
			
 
				-            print("|||| Updating symlink", symlink_dst, "to point to", symlink_src)
			
 
				+            print("Updating symlink", symlink_dst, "to point to", symlink_src)
			
 
				             os.remove(symlink_dst)
			
 
				 
			
 
				         os.symlink(symlink_src, symlink_dst)
			
@@ -230,10 +233,10 @@ def save_checkpoint(model, optimizer, epoch, config, amp_run, output_dir, model_
 
				 def get_last_checkpoint_filename(output_dir, model_name):
			
 
				     symlink = os.path.join(output_dir, "checkpoint_{}_last.pt".format(model_name))
			
 
				     if os.path.exists(symlink):
			
 
				-        print("|||| Loading checkpoint from symlink", symlink)
			
 
				+        print("Loading checkpoint from symlink", symlink)
			
 
				         return os.path.join(output_dir, os.readlink(symlink))
			
 
				     else:
			
 
				-        print("|||| No last checkpoint available - starting from epoch 0 ")
			
 
				+        print("No last checkpoint available - starting from epoch 0 ")
			
 
				         return ""
			
 
				 
			
 
				 
			
@@ -350,8 +353,8 @@ def main():
 
				     distributed_run = world_size > 1
			
 
				 
			
 
				     if local_rank == 0:
			
 
				-        DLLogger.init(backends=[JSONStreamBackend(Verbosity.DEFAULT,
			
 
				-                                                  args.output+'/'+args.log_file),
			
 
				+        log_file = os.path.join(args.output, args.log_file)
			
 
				+        DLLogger.init(backends=[JSONStreamBackend(Verbosity.DEFAULT, log_file),
			
 
				                                 StdOutBackend(Verbosity.VERBOSE)])
			
 
				     else:
			
 
				         DLLogger.init(backends=[])
			
@@ -361,7 +364,7 @@ def main():
 
				     DLLogger.log(step="PARAMETER", data={'model_name':'Tacotron2_PyT'})
			
 
				 
			
 
				     model_name = args.model_name
			
 
				-    parser = models.parse_model_args(model_name, parser)
			
 
				+    parser = models.model_parser(model_name, parser)
			
 
				     args, _ = parser.parse_known_args()
			
 
				 
			
 
				     torch.backends.cudnn.enabled = args.cudnn_enabled
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/trt/run_latency_tests_trt.sh
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trt/run_latency_tests_trt.sh
@@ -1 +0,0 @@
 
				-bash test_infer.sh --test trt/test_infer_trt.py -bs 1 -il 128 -p fp16 --num-iters 1003 --encoder ./output/encoder_fp16.engine --decoder ./output/decoder_iter_fp16.engine --postnet ./output/postnet_fp16.engine --waveglow ./output/waveglow_fp16.engine
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/waveglow/arg_parser.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/waveglow/arg_parser.py
@@ -27,7 +27,7 @@
 
				 
			
 
				 import argparse
			
 
				 
			
 
				-def parse_waveglow_args(parent, add_help=False):
			
 
				+def waveglow_parser(parent, add_help=False):
			
 
				     """
			
 
				     Parse commandline arguments.
			
 
				     """
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/waveglow/denoiser.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/waveglow/denoiser.py
@@ -34,7 +34,7 @@ from common.layers import STFT
 
				 class Denoiser(torch.nn.Module):
			
 
				     """ Removes model bias from audio produced with waveglow """
			
 
				 
			
 
				-    def __init__(self, waveglow, cpu_run=False, filter_length=1024, n_overlap=4,
			
 
				+    def __init__(self, waveglow, filter_length=1024, n_overlap=4,
			
 
				                  win_length=1024, mode='zeros'):
			
 
				         super(Denoiser, self).__init__()
			
 
				         device = waveglow.upsample.weight.device
			
--- a/PyTorch/SpeechSynthesis/Tacotron2/waveglow/model.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/waveglow/model.py
@@ -58,6 +58,7 @@ class Invertible1x1Conv(torch.nn.Module):
 
				         if torch.det(W) < 0:
			
 
				             W[:, 0] = -1 * W[:, 0]
			
 
				         W = W.view(c, c, 1)
			
 
				+        W = W.contiguous()
			
 
				         self.conv.weight.data = W
			
 
				 
			
 
				     def forward(self, z):
			
@@ -279,6 +280,49 @@ class WaveGlow(torch.nn.Module):
 
				         return audio
			
 
				 
			
 
				 
			
 
				+    def infer_onnx(self, spect, z, sigma=0.9):
			
 
				+
			
 
				+        spect = self.upsample(spect)
			
 
				+        # trim conv artifacts. maybe pad spec to kernel multiple
			
 
				+        time_cutoff = self.upsample.kernel_size[0] - self.upsample.stride[0]
			
 
				+        spect = spect[:, :, :-time_cutoff]
			
 
				+
			
 
				+        length_spect_group = spect.size(2)//8
			
 
				+        mel_dim = 80
			
 
				+        batch_size = spect.size(0)
			
 
				+
			
 
				+        spect = spect.view((batch_size, mel_dim, length_spect_group, self.n_group))
			
 
				+        spect = spect.permute(0, 2, 1, 3)
			
 
				+        spect = spect.contiguous()
			
 
				+        spect = spect.view((batch_size, length_spect_group, self.n_group*mel_dim))
			
 
				+        spect = spect.permute(0, 2, 1)
			
 
				+        spect = spect.contiguous()
			
 
				+
			
 
				+        audio = z[:, :self.n_remaining_channels, :]
			
 
				+        z = z[:, self.n_remaining_channels:self.n_group, :]
			
 
				+        audio = sigma*audio
			
 
				+
			
 
				+        for k in reversed(range(self.n_flows)):
			
 
				+            n_half = int(audio.size(1) // 2)
			
 
				+            audio_0 = audio[:, :n_half, :]
			
 
				+            audio_1 = audio[:, n_half:(n_half+n_half), :]
			
 
				+
			
 
				+            output = self.WN[k]((audio_0, spect))
			
 
				+            s = output[:, n_half:(n_half+n_half), :]
			
 
				+            b = output[:, :n_half, :]
			
 
				+            audio_1 = (audio_1 - b) / torch.exp(s)
			
 
				+            audio = torch.cat([audio_0, audio_1], 1)
			
 
				+            audio = self.convinv[k].infer(audio)
			
 
				+
			
 
				+            if k % self.n_early_every == 0 and k > 0:
			
 
				+                audio = torch.cat((z[:, :self.n_early_size, :], audio), 1)
			
 
				+                z = z[:, self.n_early_size:self.n_group, :]
			
 
				+
			
 
				+        audio = audio.permute(0,2,1).contiguous().view(batch_size, (length_spect_group * self.n_group))
			
 
				+
			
 
				+        return audio
			
 
				+
			
 
				+
			
 
				     @staticmethod
			
 
				     def remove_weightnorm(model):
			
 
				         waveglow = model