há 6 anos atrás · a98df279fe
--- a/TensorFlow/LanguageModeling/BERT/.dockerignore
+++ b/TensorFlow/LanguageModeling/BERT/.dockerignore
@@ -1,6 +1,24 @@
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				 .idea/
			
 
				 .git/
			
 
				 __pycache__/
			
 
				 results/
			
 
				-data/
			
 
				+data/download
			
 
				+data/extracted
			
 
				+data/formatted_one_article_per_line
			
 
				+data/sharded
			
 
				+data/hdf5*
			
 
				+data/tfrecord*
			
 
				 checkpoints/
			
--- a/TensorFlow/LanguageModeling/BERT/.gitignore
+++ b/TensorFlow/LanguageModeling/BERT/.gitignore
@@ -9,7 +9,12 @@ __pycache__/
 
				 *.so
			
 
				 
			
 
				 #Data
			
 
				-data/*/*/
			
 
				+data/download
			
 
				+data/extracted
			
 
				+data/formatted_one_article_per_line
			
 
				+data/sharded
			
 
				+data/hdf5*
			
 
				+data/tfrecord*
			
 
				 data/*/*.zip
			
 
				 
			
 
				 #Resutls
			
--- a/TensorFlow/LanguageModeling/BERT/Dockerfile
+++ b/TensorFlow/LanguageModeling/BERT/Dockerfile
@@ -1,4 +1,4 @@
 
				-ARG FROM_IMAGE_NAME=nvcr.io/nvidia/tensorflow:19.06-py3
			
 
				+ARG FROM_IMAGE_NAME=nvcr.io/nvidia/tensorflow:19.08-py3
			
 
				 
			
 
				 FROM tensorrtserver_client as trt
			
 
				 
			
@@ -12,16 +12,19 @@ WORKDIR /workspace
 
				 RUN git clone https://github.com/openai/gradient-checkpointing.git
			
 
				 RUN git clone https://github.com/attardi/wikiextractor.git
			
 
				 RUN git clone https://github.com/soskek/bookcorpus.git
			
 
				+RUN git clone https://github.com/titipata/pubmed_parser
			
 
				 
			
 
				-# Copy the perf_client over
			
 
				+RUN pip3 install /workspace/pubmed_parser
			
 
				+
			
 
				+#Copy the perf_client over
			
 
				 COPY --from=trt /workspace/build/perf_client /workspace/build/perf_client
			
 
				 
			
 
				-# Copy the python wheel and install with pip
			
 
				+#Copy the python wheel and install with pip
			
 
				 COPY --from=trt /workspace/build/dist/dist/tensorrtserver*.whl /tmp/
			
 
				 RUN pip install /tmp/tensorrtserver*.whl && rm /tmp/tensorrtserver*.whl
			
 
				 
			
 
				-
			
 
				 WORKDIR /workspace/bert
			
 
				 COPY . .
			
 
				 
			
 
				-ENV PYTHONPATH=/workspace/bert
			
 
				+ENV PYTHONPATH /workspace/bert
			
 
				+ENV BERT_PREP_WORKING_DIR /workspace/bert/data
			
--- a/TensorFlow/LanguageModeling/BERT/README.md
+++ b/TensorFlow/LanguageModeling/BERT/README.md
--- a/TensorFlow/LanguageModeling/BERT/configurations.yml
+++ b/TensorFlow/LanguageModeling/BERT/configurations.yml
@@ -0,0 +1,218 @@
 
				+# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+#1 DGX1 phase1
			
 
				+bert--DGX1:
			
 
				+  <<: *BERT_ON_CLUSTER
			
 
				+  <<: *DGX1
			
 
				+  variables:
			
 
				+    <<: *DGX1_VARS
			
 
				+    NNODES: "1"
			
 
				+    BATCHSIZE: "8"
			
 
				+    LEARNING_RATE: "7.5e-4"
			
 
				+    NUM_ACCUMULATION_STEPS: "1024"
			
 
				+    PHASE: "1"
			
 
				+
			
 
				+#4 DGX1 phase1
			
 
				+bert--DGX1_n4:
			
 
				+  <<: *BERT_ON_CLUSTER
			
 
				+  <<: *DGX1
			
 
				+  variables:
			
 
				+    <<: *DGX1_VARS
			
 
				+    NNODES: "4"
			
 
				+    BATCHSIZE: "8"
			
 
				+    LEARNING_RATE: "1.875e-4"
			
 
				+    NUM_ACCUMULATION_STEPS: "256"
			
 
				+    PHASE: "1"
			
 
				+
			
 
				+#16 DGX1 phase1
			
 
				+bert--DGX1_n16:
			
 
				+  <<: *BERT_ON_CLUSTER
			
 
				+  <<: *DGX1
			
 
				+  variables:
			
 
				+    <<: *DGX1_VARS
			
 
				+    NNODES: "16"
			
 
				+    BATCHSIZE: "8"
			
 
				+    LEARNING_RATE: "4.6875e-5"
			
 
				+    NUM_ACCUMULATION_STEPS: "64"
			
 
				+    PHASE: "1"
			
 
				+
			
 
				+#32 DGX1 phase1
			
 
				+bert--DGX1_n32:
			
 
				+  <<: *BERT_ON_CLUSTER
			
 
				+  <<: *DGX1
			
 
				+  variables:
			
 
				+    <<: *DGX1_VARS
			
 
				+    NNODES: "32"
			
 
				+    BATCHSIZE: "8"
			
 
				+    LEARNING_RATE: "2.34375e-5"
			
 
				+    NUM_ACCUMULATION_STEPS: "32"
			
 
				+    PHASE: "1"
			
 
				+
			
 
				+#1 DGX2 phase1
			
 
				+bert--DGX2:
			
 
				+  <<: *BERT_ON_CLUSTER
			
 
				+  <<: *DGX2
			
 
				+  variables:
			
 
				+    <<: *DGX2_VARS
			
 
				+    NNODES: "1"
			
 
				+    BATCHSIZE: "32"
			
 
				+    LEARNING_RATE: "3.75e-4"
			
 
				+    NUM_ACCUMULATION_STEPS: "128"
			
 
				+    PHASE: "1"
			
 
				+
			
 
				+#4 DGX2 phase1
			
 
				+bert--DGX2_n4:
			
 
				+  <<: *BERT_ON_CLUSTER
			
 
				+  <<: *DGX2
			
 
				+  variables:
			
 
				+    <<: *DGX2_VARS
			
 
				+    NNODES: "4"
			
 
				+    BATCHSIZE: "32"
			
 
				+    LEARNING_RATE: "9.375e-5"
			
 
				+    NUM_ACCUMULATION_STEPS: "32"
			
 
				+    PHASE: "1"
			
 
				+
			
 
				+#16 DGX2 phase1
			
 
				+bert--DGX2_n16:
			
 
				+  <<: *BERT_ON_CLUSTER
			
 
				+  <<: *DGX2
			
 
				+  variables:
			
 
				+    <<: *DGX2_VARS
			
 
				+    NNODES: "16"
			
 
				+    BATCHSIZE: "256"
			
 
				+    LEARNING_RATE: "3.75e-4"
			
 
				+    NUM_ACCUMULATION_STEPS: "4"
			
 
				+    PHASE: "1"
			
 
				+
			
 
				+#32 DGX2 phase1
			
 
				+bert--DGX2_n32:
			
 
				+  <<: *BERT_ON_CLUSTER
			
 
				+  <<: *DGX2
			
 
				+  variables:
			
 
				+    <<: *DGX2_VARS
			
 
				+    NNODES: "32"
			
 
				+    BATCHSIZE: "32"
			
 
				+    LEARNING_RATE: "2.34375e-5"
			
 
				+    NUM_ACCUMULATION_STEPS: "8"
			
 
				+    PHASE: "1"
			
 
				+
			
 
				+#64 DGX2 phase1
			
 
				+bert--DGX2_n64:
			
 
				+  <<: *BERT_ON_CLUSTER
			
 
				+  <<: *DGX2
			
 
				+  variables:
			
 
				+    <<: *DGX2_VARS
			
 
				+    NNODES: "32"
			
 
				+    BATCHSIZE: "32"
			
 
				+    LEARNING_RATE: "1.171875e-5"
			
 
				+    NUM_ACCUMULATION_STEPS: "4"
			
 
				+    PHASE: "1"
			
 
				+
			
 
				+#1 DGX1 phase2
			
 
				+bert--DGX1_n1p2:
			
 
				+  <<: *BERT_ON_CLUSTER
			
 
				+  <<: *DGX1
			
 
				+  variables:
			
 
				+    <<: *DGX1_VARS
			
 
				+    NNODES: "1"
			
 
				+    BATCHSIZE: "2"
			
 
				+    LEARNING_RATE: "5e-4"
			
 
				+    NUM_ACCUMULATION_STEPS: "4096"
			
 
				+    PHASE: "2"
			
 
				+
			
 
				+#4 DGX1 phase2
			
 
				+bert--DGX1_n4p2:
			
 
				+  <<: *BERT_ON_CLUSTER
			
 
				+  <<: *DGX1
			
 
				+  variables:
			
 
				+    <<: *DGX1_VARS
			
 
				+    NNODES: "4"
			
 
				+    BATCHSIZE: "2"
			
 
				+    LEARNING_RATE: "1.25e-4"
			
 
				+    NUM_ACCUMULATION_STEPS: "512"
			
 
				+    PHASE: "2"
			
 
				+
			
 
				+#16 DGX1 phase2
			
 
				+bert--DGX1_n16p2:
			
 
				+  <<: *BERT_ON_CLUSTER
			
 
				+  <<: *DGX1
			
 
				+  variables:
			
 
				+    <<: *DGX1_VARS
			
 
				+    NNODES: "16"
			
 
				+    BATCHSIZE: "2"
			
 
				+    LEARNING_RATE: "1.5625e-5"
			
 
				+    NUM_ACCUMULATION_STEPS: "128"
			
 
				+    PHASE: "2"
			
 
				+
			
 
				+#32 DGX1 phase2
			
 
				+bert--DGX1_n32p2:
			
 
				+  <<: *BERT_ON_CLUSTER
			
 
				+  <<: *DGX1
			
 
				+  variables:
			
 
				+    <<: *DGX1_VARS
			
 
				+    NNODES: "32"
			
 
				+    BATCHSIZE: "2"
			
 
				+    LEARNING_RATE: "1.5625e-5"
			
 
				+    NUM_ACCUMULATION_STEPS: "64"
			
 
				+    PHASE: "2"
			
 
				+
			
 
				+#1 DGX2 phase2
			
 
				+bert--DGX2_n1p2:
			
 
				+  <<: *BERT_ON_CLUSTER
			
 
				+  <<: *DGX2
			
 
				+  variables:
			
 
				+    <<: *DGX2_VARS
			
 
				+    NNODES: "1"
			
 
				+    BATCHSIZE: "8"
			
 
				+    LEARNING_RATE: "2.5e-5"
			
 
				+    NUM_ACCUMULATION_STEPS: "256"
			
 
				+    PHASE: "2"
			
 
				+
			
 
				+#4 DGX2 phase2
			
 
				+bert--DGX2_n4p2:
			
 
				+  <<: *BERT_ON_CLUSTER
			
 
				+  <<: *DGX2
			
 
				+  variables:
			
 
				+    <<: *DGX2_VARS
			
 
				+    NNODES: "4"
			
 
				+    BATCHSIZE: "8"
			
 
				+    LEARNING_RATE: "6.25e-5"
			
 
				+    NUM_ACCUMULATION_STEPS: "64"
			
 
				+    PHASE: "2"
			
 
				+
			
 
				+#16 DGX2 phase2
			
 
				+bert--DGX2_n16p2:
			
 
				+  <<: *BERT_ON_CLUSTER
			
 
				+  <<: *DGX2
			
 
				+  variables:
			
 
				+    <<: *DGX2_VARS
			
 
				+    NNODES: "16"
			
 
				+    BATCHSIZE: "8"
			
 
				+    LEARNING_RATE: "1.5625e-5"
			
 
				+    NUM_ACCUMULATION_STEPS: "16"
			
 
				+    PHASE: "2"
			
 
				+
			
 
				+#32 DGX2 phase2
			
 
				+bert--DGX2_n32p2:
			
 
				+  <<: *BERT_ON_CLUSTER
			
 
				+  <<: *DGX2
			
 
				+  variables:
			
 
				+    <<: *DGX2_VARS
			
 
				+    NNODES: "32"
			
 
				+    BATCHSIZE: "8"
			
 
				+    LEARNING_RATE: "7.8125e-6"
			
 
				+    NUM_ACCUMULATION_STEPS: "8"
			
 
				+    PHASE: "2"
			
 
				+
			
--- a/TensorFlow/LanguageModeling/BERT/data/BooksDownloader.py
+++ b/TensorFlow/LanguageModeling/BERT/data/BooksDownloader.py
@@ -0,0 +1,26 @@
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import subprocess
			
 
				+
			
 
				+class BooksDownloader:
			
 
				+    def __init__(self, save_path):
			
 
				+        self.save_path = save_path
			
 
				+        pass
			
 
				+
			
 
				+
			
 
				+    def download(self):
			
 
				+        bookscorpus_download_command = 'python3 /workspace/bookcorpus/download_files.py --list /workspace/bookcorpus/url_list.jsonl --out'
			
 
				+        bookscorpus_download_command += ' ' + self.save_path + '/bookscorpus'
			
 
				+        bookscorpus_download_command += ' --trash-bad-count'
			
 
				+        bookscorpus_download_process = subprocess.run(bookscorpus_download_command, shell=True, check=True)
			
--- a/TensorFlow/LanguageModeling/BERT/data/BookscorpusTextFormatting.py
+++ b/TensorFlow/LanguageModeling/BERT/data/BookscorpusTextFormatting.py
@@ -0,0 +1,32 @@
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import glob
			
 
				+import os
			
 
				+
			
 
				+class BookscorpusTextFormatting:
			
 
				+    def __init__(self, books_path, output_filename, recursive = False):
			
 
				+        self.books_path = books_path
			
 
				+        self.recursive = recursive
			
 
				+        self.output_filename = output_filename
			
 
				+
			
 
				+
			
 
				+    # This puts one book per line
			
 
				+    def merge(self):
			
 
				+        with open(self.output_filename, mode='w', newline='\n') as ofile:
			
 
				+            for filename in glob.glob(self.books_path + '/' + '*.txt', recursive=True):
			
 
				+                with open(filename, mode='r', encoding='utf-8-sig', newline='\n') as file:
			
 
				+                    for line in file:
			
 
				+                        if line.strip() != '':
			
 
				+                            ofile.write(line.strip() + ' ')
			
 
				+                ofile.write("\n\n")
			
--- a/TensorFlow/LanguageModeling/BERT/data/Downloader.py
+++ b/TensorFlow/LanguageModeling/BERT/data/Downloader.py
@@ -0,0 +1,120 @@
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from GooglePretrainedWeightDownloader import GooglePretrainedWeightDownloader
			
 
				+from NVIDIAPretrainedWeightDownloader import NVIDIAPretrainedWeightDownloader
			
 
				+from WikiDownloader import WikiDownloader
			
 
				+from BooksDownloader import BooksDownloader
			
 
				+from GLUEDownloader import GLUEDownloader
			
 
				+from SquadDownloader import SquadDownloader
			
 
				+from PubMedDownloader import PubMedDownloader
			
 
				+
			
 
				+class Downloader:
			
 
				+    def __init__(self, dataset_name, save_path):
			
 
				+        self.dataset_name = dataset_name
			
 
				+        self.save_path = save_path
			
 
				+
			
 
				+
			
 
				+    def download(self):
			
 
				+        if self.dataset_name == 'bookscorpus':
			
 
				+            self.download_bookscorpus()
			
 
				+
			
 
				+        elif self.dataset_name == 'wikicorpus_en':
			
 
				+            self.download_wikicorpus('en')
			
 
				+
			
 
				+        elif self.dataset_name == 'wikicorpus_zh':
			
 
				+            self.download_wikicorpus('zh')
			
 
				+
			
 
				+        elif self.dataset_name == 'pubmed_baseline':
			
 
				+            self.download_pubmed('baseline')
			
 
				+
			
 
				+        elif self.dataset_name == 'pubmed_daily_update':
			
 
				+            self.download_pubmed('daily_update')
			
 
				+
			
 
				+        elif self.dataset_name == 'pubmed_fulltext':
			
 
				+            self.download_pubmed('fulltext')
			
 
				+
			
 
				+        elif self.dataset_name == 'pubmed_open_access':
			
 
				+            self.download_pubmed('open_access')
			
 
				+
			
 
				+        elif self.dataset_name == 'google_pretrained_weights':
			
 
				+            self.download_google_pretrained_weights()
			
 
				+
			
 
				+        elif self.dataset_name == 'nvidia_pretrained_weights':
			
 
				+            self.download_nvidia_pretrained_weights()
			
 
				+
			
 
				+        elif self.dataset_name == 'MRPC':
			
 
				+            self.download_glue(self.dataset_name)
			
 
				+
			
 
				+        elif self.dataset_name == 'MNLI':
			
 
				+            self.download_glue(self.dataset_name)
			
 
				+
			
 
				+        elif self.dataset_name == 'CoLA':
			
 
				+            self.download_glue(self.dataset_name)
			
 
				+
			
 
				+        elif self.dataset_name == 'squad':
			
 
				+            self.download_squad()
			
 
				+
			
 
				+        elif self.dataset_name == 'all':
			
 
				+            self.download_bookscorpus()
			
 
				+            self.download_wikicorpus('en')
			
 
				+            self.download_wikicorpus('zh')
			
 
				+            self.download_pubmed('baseline')
			
 
				+            self.download_pubmed('daily_update')
			
 
				+            self.download_pubmed('fulltext')
			
 
				+            self.download_pubmed('open_access')
			
 
				+            self.download_google_pretrained_weights()
			
 
				+            self.download_nvidia_pretrained_weights()
			
 
				+            self.download_glue("CoLA")
			
 
				+            self.download_glue("MNLI")
			
 
				+            self.download_glue("MRPC")
			
 
				+            self.download_squad()
			
 
				+
			
 
				+        else:
			
 
				+            print(self.dataset_name)
			
 
				+            assert False, 'Unknown dataset_name provided to downloader'
			
 
				+
			
 
				+
			
 
				+    def download_bookscorpus(self):
			
 
				+        downloader = BooksDownloader(self.save_path)
			
 
				+        downloader.download()
			
 
				+
			
 
				+
			
 
				+    def download_wikicorpus(self, language):
			
 
				+        downloader = WikiDownloader(language, self.save_path)
			
 
				+        downloader.download()
			
 
				+
			
 
				+
			
 
				+    def download_pubmed(self, subset):
			
 
				+        downloader = PubMedDownloader(subset, self.save_path)
			
 
				+        downloader.download()
			
 
				+
			
 
				+
			
 
				+    def download_google_pretrained_weights(self):
			
 
				+        downloader = GooglePretrainedWeightDownloader(self.save_path)
			
 
				+        downloader.download()
			
 
				+
			
 
				+
			
 
				+    def download_nvidia_pretrained_weights(self):
			
 
				+        downloader = NVIDIAPretrainedWeightDownloader(self.save_path)
			
 
				+        downloader.download()
			
 
				+
			
 
				+
			
 
				+    def download_glue(self, glue_task_name):
			
 
				+        downloader = GLUEDownloader(glue_task_name, self.save_path)
			
 
				+        downloader.download()
			
 
				+
			
 
				+
			
 
				+    def download_squad(self):
			
 
				+        downloader = SquadDownloader(self.save_path)
			
 
				+        downloader.download()
			
--- a/TensorFlow/LanguageModeling/BERT/data/GLUEDownloader.py
+++ b/TensorFlow/LanguageModeling/BERT/data/GLUEDownloader.py
@@ -0,0 +1,109 @@
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import bz2
			
 
				+import os
			
 
				+import urllib
			
 
				+import sys
			
 
				+import zipfile
			
 
				+import io
			
 
				+
			
 
				+URLLIB=urllib
			
 
				+if sys.version_info >= (3, 0):
			
 
				+    URLLIB=urllib.request
			
 
				+
			
 
				+class GLUEDownloader:
			
 
				+    def __init__(self, task, save_path):
			
 
				+
			
 
				+        # Documentation - Download link obtained from here: https://github.com/nyu-mll/GLUE-baselines/blob/master/download_glue_data.py
			
 
				+
			
 
				+        self.TASK2PATH = {"CoLA":'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FCoLA.zip?alt=media&token=46d5e637-3411-4188-bc44-5809b5bfb5f4',
			
 
				+                     "SST":'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FSST-2.zip?alt=media&token=aabc5f6b-e466-44a2-b9b4-cf6337f84ac8',
			
 
				+                     "MRPC":{"mrpc_dev": 'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2Fmrpc_dev_ids.tsv?alt=media&token=ec5c0836-31d5-48f4-b431-7480817f1adc',
			
 
				+                            "mrpc_train": 'https://dl.fbaipublicfiles.com/senteval/senteval_data/msr_paraphrase_train.txt',
			
 
				+                            "mrpc_test": 'https://dl.fbaipublicfiles.com/senteval/senteval_data/msr_paraphrase_test.txt'},
			
 
				+                     "QQP":'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FQQP.zip?alt=media&token=700c6acf-160d-4d89-81d1-de4191d02cb5',
			
 
				+                     "STS":'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FSTS-B.zip?alt=media&token=bddb94a7-8706-4e0d-a694-1109e12273b5',
			
 
				+                     "MNLI":'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FMNLI.zip?alt=media&token=50329ea1-e339-40e2-809c-10c40afff3ce',
			
 
				+                     "SNLI":'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FSNLI.zip?alt=media&token=4afcfbb2-ff0c-4b2d-a09a-dbf07926f4df',
			
 
				+                     "QNLI":'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FQNLI.zip?alt=media&token=c24cad61-f2df-4f04-9ab6-aa576fa829d0',
			
 
				+                     "RTE":'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FRTE.zip?alt=media&token=5efa7e85-a0bb-4f19-8ea2-9e1840f077fb',
			
 
				+                     "WNLI":'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FWNLI.zip?alt=media&token=068ad0a0-ded7-4bd7-99a5-5e00222e0faf',
			
 
				+                     "diagnostic":'https://storage.googleapis.com/mtl-sentence-representations.appspot.com/tsvsWithoutLabels%2FAX.tsv?GoogleAccessId=firebase-adminsdk-0khhl@mtl-sentence-representations.iam.gserviceaccount.com&Expires=2498860800&Signature=DuQ2CSPt2Yfre0C%2BiISrVYrIFaZH1Lc7hBVZDD4ZyR7fZYOMNOUGpi8QxBmTNOrNPjR3z1cggo7WXFfrgECP6FBJSsURv8Ybrue8Ypt%2FTPxbuJ0Xc2FhDi%2BarnecCBFO77RSbfuz%2Bs95hRrYhTnByqu3U%2FYZPaj3tZt5QdfpH2IUROY8LiBXoXS46LE%2FgOQc%2FKN%2BA9SoscRDYsnxHfG0IjXGwHN%2Bf88q6hOmAxeNPx6moDulUF6XMUAaXCSFU%2BnRO2RDL9CapWxj%2BDl7syNyHhB7987hZ80B%2FwFkQ3MEs8auvt5XW1%2Bd4aCU7ytgM69r8JDCwibfhZxpaa4gd50QXQ%3D%3D'}
			
 
				+
			
 
				+
			
 
				+        self.save_path = save_path
			
 
				+        if not os.path.exists(self.save_path):
			
 
				+            os.makedirs(self.save_path)
			
 
				+
			
 
				+        self.task = task
			
 
				+
			
 
				+    def download(self):
			
 
				+
			
 
				+        if self.task == 'MRPC':
			
 
				+            self.download_mrpc()
			
 
				+        elif self.task == 'diagnostic':
			
 
				+            self.download_diagnostic()
			
 
				+        else:
			
 
				+            self.download_and_extract(self.task)
			
 
				+
			
 
				+    def download_and_extract(self, task):
			
 
				+        print("Downloading and extracting %s..." % task)
			
 
				+        data_file = "%s.zip" % task
			
 
				+        URLLIB.urlretrieve(self.TASK2PATH[task], data_file)
			
 
				+        print(data_file,"\n\n\n")
			
 
				+        with zipfile.ZipFile(data_file) as zip_ref:
			
 
				+            zip_ref.extractall(self.save_path)
			
 
				+        os.remove(data_file)
			
 
				+        print("\tCompleted!")
			
 
				+
			
 
				+    def download_mrpc(self):
			
 
				+        print("Processing MRPC...")
			
 
				+        mrpc_dir = os.path.join(self.save_path, "MRPC")
			
 
				+        if not os.path.isdir(mrpc_dir):
			
 
				+            os.mkdir(mrpc_dir)
			
 
				+
			
 
				+        mrpc_train_file = os.path.join(mrpc_dir, "msr_paraphrase_train.txt")
			
 
				+        mrpc_dev_file = os.path.join(mrpc_dir, "dev_ids.tsv")
			
 
				+        mrpc_test_file = os.path.join(mrpc_dir, "msr_paraphrase_test.txt")
			
 
				+
			
 
				+        URLLIB.urlretrieve(self.TASK2PATH["MRPC"]["mrpc_train"], mrpc_train_file)
			
 
				+        URLLIB.urlretrieve(self.TASK2PATH["MRPC"]["mrpc_test"], mrpc_test_file)
			
 
				+        URLLIB.urlretrieve(self.TASK2PATH["MRPC"]["mrpc_dev"], mrpc_dev_file)
			
 
				+
			
 
				+        dev_ids = []
			
 
				+        with io.open(os.path.join(mrpc_dir, "dev_ids.tsv"), encoding='utf-8') as ids_fh:
			
 
				+            for row in ids_fh:
			
 
				+                dev_ids.append(row.strip().split('\t'))
			
 
				+
			
 
				+        with io.open(mrpc_train_file, encoding='utf-8') as data_fh, \
			
 
				+                io.open(os.path.join(mrpc_dir, "train.tsv"), 'w', encoding='utf-8') as train_fh, \
			
 
				+                io.open(os.path.join(mrpc_dir, "dev.tsv"), 'w', encoding='utf-8') as dev_fh:
			
 
				+            header = data_fh.readline()
			
 
				+            train_fh.write(header)
			
 
				+            dev_fh.write(header)
			
 
				+            for row in data_fh:
			
 
				+                label, id1, id2, s1, s2 = row.strip().split('\t')
			
 
				+                if [id1, id2] in dev_ids:
			
 
				+                    dev_fh.write("%s\t%s\t%s\t%s\t%s\n" % (label, id1, id2, s1, s2))
			
 
				+                else:
			
 
				+                    train_fh.write("%s\t%s\t%s\t%s\t%s\n" % (label, id1, id2, s1, s2))
			
 
				+
			
 
				+        with io.open(mrpc_test_file, encoding='utf-8') as data_fh, \
			
 
				+                io.open(os.path.join(mrpc_dir, "test.tsv"), 'w', encoding='utf-8') as test_fh:
			
 
				+            header = data_fh.readline()
			
 
				+            test_fh.write("index\t#1 ID\t#2 ID\t#1 String\t#2 String\n")
			
 
				+            for idx, row in enumerate(data_fh):
			
 
				+                label, id1, id2, s1, s2 = row.strip().split('\t')
			
 
				+                test_fh.write("%d\t%s\t%s\t%s\t%s\n" % (idx, id1, id2, s1, s2))
			
 
				+        print("\tCompleted!")
			
--- a/TensorFlow/LanguageModeling/BERT/data/GooglePretrainedWeightDownloader.py
+++ b/TensorFlow/LanguageModeling/BERT/data/GooglePretrainedWeightDownloader.py
@@ -0,0 +1,158 @@
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import hashlib
			
 
				+import os
			
 
				+import urllib.request
			
 
				+import zipfile
			
 
				+
			
 
				+class GooglePretrainedWeightDownloader:
			
 
				+    def __init__(self, save_path):
			
 
				+        self.save_path = save_path + '/google_pretrained_weights'
			
 
				+
			
 
				+        if not os.path.exists(self.save_path):
			
 
				+            os.makedirs(self.save_path)
			
 
				+
			
 
				+        # Download urls
			
 
				+        self.model_urls = {
			
 
				+            'bert_base_uncased': ('https://storage.googleapis.com/bert_models/2018_10_18/uncased_L-12_H-768_A-12.zip', 'uncased_L-12_H-768_A-12.zip'),
			
 
				+            'bert_large_uncased': ('https://storage.googleapis.com/bert_models/2018_10_18/uncased_L-24_H-1024_A-16.zip', 'uncased_L-24_H-1024_A-16.zip'),
			
 
				+            'bert_base_cased': ('https://storage.googleapis.com/bert_models/2018_10_18/cased_L-12_H-768_A-12.zip', 'cased_L-12_H-768_A-12.zip'),
			
 
				+            'bert_large_cased': ('https://storage.googleapis.com/bert_models/2018_10_18/cased_L-24_H-1024_A-16.zip', 'cased_L-24_H-1024_A-16.zip'),
			
 
				+            'bert_base_multilingual_cased': ('https://storage.googleapis.com/bert_models/2018_11_23/multi_cased_L-12_H-768_A-12.zip', 'multi_cased_L-12_H-768_A-12.zip'),
			
 
				+            'bert_large_multilingual_uncased': ('https://storage.googleapis.com/bert_models/2018_11_03/multilingual_L-12_H-768_A-12.zip', 'multilingual_L-12_H-768_A-12.zip'),
			
 
				+            'bert_base_chinese': ('https://storage.googleapis.com/bert_models/2018_11_03/chinese_L-12_H-768_A-12.zip', 'chinese_L-12_H-768_A-12.zip')
			
 
				+        }
			
 
				+
			
 
				+        # SHA256sum verification for file download integrity (and checking for changes from the download source over time)
			
 
				+        self.bert_base_uncased_sha = {
			
 
				+            'bert_config.json': '7b4e5f53efbd058c67cda0aacfafb340113ea1b5797d9ce6ee411704ba21fcbc',
			
 
				+            'bert_model.ckpt.data-00000-of-00001': '58580dc5e0bf0ae0d2efd51d0e8272b2f808857f0a43a88aaf7549da6d7a8a84',
			
 
				+            'bert_model.ckpt.index': '04c1323086e2f1c5b7c0759d8d3e484afbb0ab45f51793daab9f647113a0117b',
			
 
				+            'bert_model.ckpt.meta': 'dd5682170a10c3ea0280c2e9b9a45fee894eb62da649bbdea37b38b0ded5f60e',
			
 
				+            'vocab.txt': '07eced375cec144d27c900241f3e339478dec958f92fddbc551f295c992038a3',
			
 
				+        }
			
 
				+
			
 
				+        self.bert_large_uncased_sha = {
			
 
				+            'bert_config.json': 'bfa42236d269e2aeb3a6d30412a33d15dbe8ea597e2b01dc9518c63cc6efafcb',
			
 
				+            'bert_model.ckpt.data-00000-of-00001': 'bc6b3363e3be458c99ecf64b7f472d2b7c67534fd8f564c0556a678f90f4eea1',
			
 
				+            'bert_model.ckpt.index': '68b52f2205ffc64dc627d1120cf399c1ef1cbc35ea5021d1afc889ffe2ce2093',
			
 
				+            'bert_model.ckpt.meta': '6fcce8ff7628f229a885a593625e3d5ff9687542d5ef128d9beb1b0c05edc4a1',
			
 
				+            'vocab.txt': '07eced375cec144d27c900241f3e339478dec958f92fddbc551f295c992038a3',
			
 
				+        }
			
 
				+
			
 
				+        self.bert_base_cased_sha = {
			
 
				+            'bert_config.json': 'f11dfb757bea16339a33e1bf327b0aade6e57fd9c29dc6b84f7ddb20682f48bc',
			
 
				+            'bert_model.ckpt.data-00000-of-00001': '734d5a1b68bf98d4e9cb6b6692725d00842a1937af73902e51776905d8f760ea',
			
 
				+            'bert_model.ckpt.index': '517d6ef5c41fc2ca1f595276d6fccf5521810d57f5a74e32616151557790f7b1',
			
 
				+            'bert_model.ckpt.meta': '5f8a9771ff25dadd61582abb4e3a748215a10a6b55947cbb66d0f0ba1694be98',
			
 
				+            'vocab.txt': 'eeaa9875b23b04b4c54ef759d03db9d1ba1554838f8fb26c5d96fa551df93d02',
			
 
				+        }
			
 
				+
			
 
				+        self.bert_large_cased_sha = {
			
 
				+            'bert_config.json': '7adb2125c8225da495656c982fd1c5f64ba8f20ad020838571a3f8a954c2df57',
			
 
				+            'bert_model.ckpt.data-00000-of-00001': '6ff33640f40d472f7a16af0c17b1179ca9dcc0373155fb05335b6a4dd1657ef0',
			
 
				+            'bert_model.ckpt.index': 'ef42a53f577fbe07381f4161b13c7cab4f4fc3b167cec6a9ae382c53d18049cf',
			
 
				+            'bert_model.ckpt.meta': 'd2ddff3ed33b80091eac95171e94149736ea74eb645e575d942ec4a5e01a40a1',
			
 
				+            'vocab.txt': 'eeaa9875b23b04b4c54ef759d03db9d1ba1554838f8fb26c5d96fa551df93d02',
			
 
				+        }
			
 
				+
			
 
				+        self.bert_base_multilingual_cased_sha = {
			
 
				+            'bert_config.json': 'e76c3964bc14a8bb37a5530cdc802699d2f4a6fddfab0611e153aa2528f234f0',
			
 
				+            'bert_model.ckpt.data-00000-of-00001': '55b8a2df41f69c60c5180e50a7c31b7cdf6238909390c4ddf05fbc0d37aa1ac5',
			
 
				+            'bert_model.ckpt.index': '7d8509c2a62b4e300feb55f8e5f1eef41638f4998dd4d887736f42d4f6a34b37',
			
 
				+            'bert_model.ckpt.meta': '95e5f1997e8831f1c31e5cf530f1a2e99f121e9cd20887f2dce6fe9e3343e3fa',
			
 
				+            'vocab.txt': 'fe0fda7c425b48c516fc8f160d594c8022a0808447475c1a7c6d6479763f310c',
			
 
				+        }
			
 
				+
			
 
				+        self.bert_large_multilingual_uncased_sha = {
			
 
				+            'bert_config.json': '49063bb061390211d2fdd108cada1ed86faa5f90b80c8f6fdddf406afa4c4624',
			
 
				+            'bert_model.ckpt.data-00000-of-00001': '3cd83912ebeb0efe2abf35c9f1d5a515d8e80295e61c49b75c8853f756658429',
			
 
				+            'bert_model.ckpt.index': '87c372c1a3b1dc7effaaa9103c80a81b3cbab04c7933ced224eec3b8ad2cc8e7',
			
 
				+            'bert_model.ckpt.meta': '27f504f34f02acaa6b0f60d65195ec3e3f9505ac14601c6a32b421d0c8413a29',
			
 
				+            'vocab.txt': '87b44292b452f6c05afa49b2e488e7eedf79ea4f4c39db6f2f4b37764228ef3f',
			
 
				+        }
			
 
				+
			
 
				+        self.bert_base_chinese_sha = {
			
 
				+            'bert_config.json': '7aaad0335058e2640bcb2c2e9a932b1cd9da200c46ea7b8957d54431f201c015',
			
 
				+            'bert_model.ckpt.data-00000-of-00001': '756699356b78ad0ef1ca9ba6528297bcb3dd1aef5feadd31f4775d7c7fc989ba',
			
 
				+            'bert_model.ckpt.index': '46315546e05ce62327b3e2cd1bed22836adcb2ff29735ec87721396edb21b82e',
			
 
				+            'bert_model.ckpt.meta': 'c0f8d51e1ab986604bc2b25d6ec0af7fd21ff94cf67081996ec3f3bf5d823047',
			
 
				+            'vocab.txt': '45bbac6b341c319adc98a532532882e91a9cefc0329aa57bac9ae761c27b291c',
			
 
				+        }
			
 
				+
			
 
				+        # Relate SHA to urls for loop below
			
 
				+        self.model_sha = {
			
 
				+            'bert_base_uncased': self.bert_base_uncased_sha,
			
 
				+            'bert_large_uncased': self.bert_large_uncased_sha,
			
 
				+            'bert_base_cased': self.bert_base_cased_sha,
			
 
				+            'bert_large_cased': self.bert_large_cased_sha,
			
 
				+            'bert_base_multilingual_cased': self.bert_base_multilingual_cased_sha,
			
 
				+            'bert_large_multilingual_uncased': self.bert_large_multilingual_uncased_sha,
			
 
				+            'bert_base_chinese': self.bert_base_chinese_sha
			
 
				+        }
			
 
				+
			
 
				+    # Helper to get sha256sum of a file
			
 
				+    def sha256sum(self, filename):
			
 
				+      h  = hashlib.sha256()
			
 
				+      b  = bytearray(128*1024)
			
 
				+      mv = memoryview(b)
			
 
				+      with open(filename, 'rb', buffering=0) as f:
			
 
				+        for n in iter(lambda : f.readinto(mv), 0):
			
 
				+          h.update(mv[:n])
			
 
				+
			
 
				+      return h.hexdigest()
			
 
				+
			
 
				+    def download(self):
			
 
				+        # Iterate over urls: download, unzip, verify sha256sum
			
 
				+        found_mismatch_sha = False
			
 
				+        for model in self.model_urls:
			
 
				+          url = self.model_urls[model][0]
			
 
				+          file = self.save_path + '/' + self.model_urls[model][1]
			
 
				+
			
 
				+          print('Downloading', url)
			
 
				+          response = urllib.request.urlopen(url)
			
 
				+          with open(file, 'wb') as handle:
			
 
				+            handle.write(response.read())
			
 
				+
			
 
				+          print('Unzipping', file)
			
 
				+          zip = zipfile.ZipFile(file, 'r')
			
 
				+          zip.extractall(self.save_path)
			
 
				+          zip.close()
			
 
				+
			
 
				+          sha_dict = self.model_sha[model]
			
 
				+          for extracted_file in sha_dict:
			
 
				+            sha = sha_dict[extracted_file]
			
 
				+            if sha != self.sha256sum(file[:-4] + '/' + extracted_file):
			
 
				+              found_mismatch_sha = True
			
 
				+              print('SHA256sum does not match on file:', extracted_file, 'from download url:', url)
			
 
				+            else:
			
 
				+              print(file[:-4] + '/' + extracted_file, '\t', 'verified')
			
 
				+
			
 
				+        if not found_mismatch_sha:
			
 
				+          print("All downloads pass sha256sum verification.")
			
 
				+
			
 
				+    def serialize(self):
			
 
				+        pass
			
 
				+
			
 
				+    def deserialize(self):
			
 
				+        pass
			
 
				+
			
 
				+    def listAvailableWeights(self):
			
 
				+        print("Available Weight Datasets")
			
 
				+        for item in self.model_urls:
			
 
				+            print(item)
			
 
				+
			
 
				+    def listLocallyStoredWeights(self):
			
 
				+        pass
			
 
				+
			
--- a/TensorFlow/LanguageModeling/BERT/data/NVIDIAPretrainedWeightDownloader.py
+++ b/TensorFlow/LanguageModeling/BERT/data/NVIDIAPretrainedWeightDownloader.py
@@ -0,0 +1,27 @@
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import os
			
 
				+
			
 
				+class NVIDIAPretrainedWeightDownloader:
			
 
				+    def __init__(self, save_path):
			
 
				+        self.save_path = save_path + '/nvidia_pretrained_weights'
			
 
				+
			
 
				+        if not os.path.exists(self.save_path):
			
 
				+            os.makedirs(self.save_path)
			
 
				+
			
 
				+        pass
			
 
				+
			
 
				+
			
 
				+    def download(self):
			
 
				+        assert False, 'NVIDIAPretrainedWeightDownloader not implemented yet.'
			
--- a/TensorFlow/LanguageModeling/BERT/data/PubMedDownloader.py
+++ b/TensorFlow/LanguageModeling/BERT/data/PubMedDownloader.py
@@ -0,0 +1,93 @@
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import bz2
			
 
				+import glob
			
 
				+import gzip
			
 
				+import os
			
 
				+import urllib.request
			
 
				+import shutil
			
 
				+import sys
			
 
				+
			
 
				+class PubMedDownloader:
			
 
				+    def __init__(self, subset, save_path):
			
 
				+        self.subset = subset
			
 
				+        # Modifying self.save_path in two steps to handle creation of subdirectories
			
 
				+        self.save_path = save_path + '/pubmed' + '/'
			
 
				+
			
 
				+        if not os.path.exists(self.save_path):
			
 
				+            os.makedirs(self.save_path)
			
 
				+
			
 
				+        self.save_path = self.save_path + '/' + subset
			
 
				+
			
 
				+        if not os.path.exists(self.save_path):
			
 
				+            os.makedirs(self.save_path)
			
 
				+
			
 
				+        self.download_urls = {
			
 
				+            'baseline' : 'ftp://ftp.ncbi.nlm.nih.gov/pubmed/baseline/',
			
 
				+            'daily_update' : 'ftp://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/',
			
 
				+            'fulltext' : 'ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/oa_bulk/',
			
 
				+            'open_access' : 'ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/oa_bulk/'
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+    def download(self):
			
 
				+        print('subset:', self.subset)
			
 
				+        url = self.download_urls[self.subset]
			
 
				+        self.download_files(url)
			
 
				+        self.extract_files()
			
 
				+
			
 
				+
			
 
				+    def download_files(self, url):
			
 
				+        url = self.download_urls[self.subset]
			
 
				+        output = os.popen('curl ' + url).read()
			
 
				+
			
 
				+        if self.subset == 'fulltext' or self.subset == 'open_access':
			
 
				+            line_split = 'comm_use' if self.subset == 'fulltext' else 'non_comm_use'
			
 
				+            for line in output.splitlines():
			
 
				+                if line[-10:] == 'xml.tar.gz' and \
			
 
				+                        line.split(' ')[-1].split('.')[0] == line_split:
			
 
				+                    file = os.path.join(self.save_path, line.split(' ')[-1])
			
 
				+                    if not os.path.isfile(file):
			
 
				+                        print('Downloading', file)
			
 
				+                        response = urllib.request.urlopen(url + line.split(' ')[-1])
			
 
				+                        with open(file, "wb") as handle:
			
 
				+                            handle.write(response.read())
			
 
				+
			
 
				+        elif self.subset == 'baseline' or self.subset == 'daily_update':
			
 
				+            for line in output.splitlines():
			
 
				+                if line[-3:] == '.gz':
			
 
				+                    file = os.path.join(self.save_path, line.split(' ')[-1])
			
 
				+                    if not os.path.isfile(file):
			
 
				+                        print('Downloading', file)
			
 
				+                        response = urllib.request.urlopen(url + line.split(' ')[-1])
			
 
				+                        with open(file, "wb") as handle:
			
 
				+                            handle.write(response.read())
			
 
				+        else:
			
 
				+            assert False, 'Invalid PubMed dataset/subset specified.'
			
 
				+
			
 
				+    def extract_files(self):
			
 
				+        files = glob.glob(self.save_path + '/*.xml.gz')
			
 
				+
			
 
				+        for file in files:
			
 
				+            print('file:', file)
			
 
				+            input = gzip.GzipFile(file, mode='rb')
			
 
				+            s = input.read()
			
 
				+            input.close()
			
 
				+
			
 
				+            out = open(file[:-3], mode='wb')
			
 
				+            out.write(s)
			
 
				+            out.close()
			
 
				+
			
 
				+
			
 
				+
			
--- a/TensorFlow/LanguageModeling/BERT/data/PubMedTextFormatting.py
+++ b/TensorFlow/LanguageModeling/BERT/data/PubMedTextFormatting.py
@@ -0,0 +1,44 @@
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import glob
			
 
				+import os
			
 
				+import pubmed_parser as pmp
			
 
				+
			
 
				+class PubMedTextFormatting:
			
 
				+    def __init__(self, pubmed_path, output_filename, recursive = False):
			
 
				+        self.pubmed_path = pubmed_path
			
 
				+        self.recursive = recursive
			
 
				+        self.output_filename = output_filename
			
 
				+
			
 
				+
			
 
				+    # This puts one article per line
			
 
				+    def merge(self):
			
 
				+        print('PubMed path:', self.pubmed_path)
			
 
				+
			
 
				+        with open(self.output_filename, mode='w', newline='\n') as ofile:
			
 
				+            for filename in glob.glob(self.pubmed_path + '/*.xml', recursive=self.recursive):
			
 
				+                print('file:', filename)
			
 
				+                dicts_out = pmp.parse_medline_xml(filename)
			
 
				+                for dict_out in dicts_out:
			
 
				+                    if not dict_out['abstract']:
			
 
				+                        continue
			
 
				+                    try:
			
 
				+                        for line in dict_out['abstract'].splitlines():
			
 
				+                            if len(line) < 30:
			
 
				+                                continue
			
 
				+                            ofile.write(line.strip() + " ")
			
 
				+                        ofile.write("\n\n")
			
 
				+                    except:
			
 
				+                        ofile.write("\n\n")
			
 
				+                        continue
			
--- a/TensorFlow/LanguageModeling/BERT/data/SquadDownloader.py
+++ b/TensorFlow/LanguageModeling/BERT/data/SquadDownloader.py
@@ -0,0 +1,54 @@
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import bz2
			
 
				+import os
			
 
				+import urllib.request
			
 
				+import sys
			
 
				+
			
 
				+class SquadDownloader:
			
 
				+    def __init__(self, save_path):
			
 
				+        self.save_path = save_path + '/squad'
			
 
				+
			
 
				+        if not os.path.exists(self.save_path):
			
 
				+            os.makedirs(self.save_path)
			
 
				+
			
 
				+        if not os.path.exists(self.save_path + '/v1.1'):
			
 
				+            os.makedirs(self.save_path + '/v1.1')
			
 
				+
			
 
				+        if not os.path.exists(self.save_path + '/v2.0'):
			
 
				+            os.makedirs(self.save_path + '/v2.0')
			
 
				+
			
 
				+        self.download_urls = {
			
 
				+            'https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json' : 'v1.1/train-v1.1.json',
			
 
				+            'https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json' : 'v1.1/dev-v1.1.json',
			
 
				+            'https://worksheets.codalab.org/rest/bundles/0xbcd57bee090b421c982906709c8c27e1/contents/blob/' : 'v1.1/evaluate-v1.1.py',
			
 
				+            'https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json' : 'v2.0/train-v2.0.json',
			
 
				+            'https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json' : 'v2.0/dev-v2.0.json',
			
 
				+            'https://worksheets.codalab.org/rest/bundles/0x6b567e1cf2e041ec80d7098f031c5c9e/contents/blob/' : 'v2.0/evaluate-v2.0.py',
			
 
				+        }
			
 
				+
			
 
				+    def download(self):
			
 
				+        for item in self.download_urls:
			
 
				+            url = item
			
 
				+            file = self.download_urls[item]
			
 
				+
			
 
				+            print('Downloading:', url)
			
 
				+            if os.path.isfile(self.save_path + '/' + file):
			
 
				+                print('** Download file already exists, skipping download')
			
 
				+            else:
			
 
				+                response = urllib.request.urlopen(url)
			
 
				+                with open(self.save_path + '/' + file, "wb") as handle:
			
 
				+                    handle.write(response.read())
			
 
				+
			
 
				+
			
--- a/TensorFlow/LanguageModeling/BERT/data/TextSharding.py
+++ b/TensorFlow/LanguageModeling/BERT/data/TextSharding.py
@@ -0,0 +1,331 @@
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from collections import defaultdict
			
 
				+from itertools import islice
			
 
				+
			
 
				+import multiprocessing
			
 
				+import os
			
 
				+import statistics
			
 
				+
			
 
				+class Sharding:
			
 
				+    def __init__(self, input_files, output_name_prefix, n_training_shards, n_test_shards, fraction_test_set):
			
 
				+        assert len(input_files) > 0, 'The input file list must contain at least one file.'
			
 
				+        assert n_training_shards > 0, 'There must be at least one output shard.'
			
 
				+        assert n_test_shards > 0, 'There must be at least one output shard.'
			
 
				+
			
 
				+        self.n_training_shards = n_training_shards
			
 
				+        self.n_test_shards = n_test_shards
			
 
				+        self.fraction_test_set = fraction_test_set
			
 
				+
			
 
				+        self.input_files = input_files
			
 
				+
			
 
				+        self.output_name_prefix = output_name_prefix
			
 
				+        self.output_training_identifier = '_training'
			
 
				+        self.output_test_identifier = '_test'
			
 
				+        self.output_file_extension = '.txt'
			
 
				+
			
 
				+        self.articles = {}    # key: integer identifier, value: list of articles
			
 
				+        self.sentences = {}    # key: integer identifier, value: list of sentences
			
 
				+        self.output_training_files = {}    # key: filename, value: list of articles to go into file
			
 
				+        self.output_test_files = {}  # key: filename, value: list of articles to go into file
			
 
				+
			
 
				+        self.init_output_files()
			
 
				+
			
 
				+
			
 
				+    # Remember, the input files contain one article per line (the whitespace check is to skip extraneous blank lines)
			
 
				+    def load_articles(self):
			
 
				+        print('Start: Loading Articles')
			
 
				+
			
 
				+        global_article_count = 0
			
 
				+        for input_file in self.input_files:
			
 
				+            print('input file:', input_file)
			
 
				+            with open(input_file, mode='r', newline='\n') as f:
			
 
				+                for i, line in enumerate(f):
			
 
				+                    if line.strip():
			
 
				+                        self.articles[global_article_count] = line.rstrip()
			
 
				+                        global_article_count += 1
			
 
				+
			
 
				+        print('End: Loading Articles: There are', len(self.articles), 'articles.')
			
 
				+
			
 
				+
			
 
				+    def segment_articles_into_sentences(self, segmenter):
			
 
				+        print('Start: Sentence Segmentation')
			
 
				+        if len(self.articles) is 0:
			
 
				+            self.load_articles()
			
 
				+
			
 
				+        assert len(self.articles) is not 0, 'Please check that input files are present and contain data.'
			
 
				+
			
 
				+        # TODO: WIP: multiprocessing (create independent ranges and spawn processes)
			
 
				+        use_multiprocessing = 'serial'
			
 
				+
			
 
				+        def chunks(data, size=len(self.articles)):
			
 
				+            it = iter(data)
			
 
				+            for i in range(0, len(data), size):
			
 
				+                yield {k: data[k] for k in islice(it, size)}
			
 
				+
			
 
				+        if use_multiprocessing == 'manager':
			
 
				+            manager = multiprocessing.Manager()
			
 
				+            return_dict = manager.dict()
			
 
				+            jobs = []
			
 
				+            n_processes = 7    # in addition to the main process, total = n_proc+1
			
 
				+
			
 
				+            def work(articles, return_dict):
			
 
				+                sentences = {}
			
 
				+                for i, article in enumerate(articles):
			
 
				+                    sentences[i] = segmenter.segment_string(articles[article])
			
 
				+
			
 
				+                    if i % 5000 == 0:
			
 
				+                        print('Segmenting article', i)
			
 
				+
			
 
				+                return_dict.update(sentences)
			
 
				+
			
 
				+            for item in chunks(self.articles, len(self.articles)):
			
 
				+                p = multiprocessing.Process(target=work, args=(item, return_dict))
			
 
				+
			
 
				+                # Busy wait
			
 
				+                while len(jobs) >= n_processes:
			
 
				+                    pass
			
 
				+
			
 
				+                jobs.append(p)
			
 
				+                p.start()
			
 
				+
			
 
				+            for proc in jobs:
			
 
				+                proc.join()
			
 
				+
			
 
				+        elif use_multiprocessing == 'queue':
			
 
				+            work_queue = multiprocessing.Queue()
			
 
				+            jobs = []
			
 
				+
			
 
				+            for item in chunks(self.articles, len(self.articles)):
			
 
				+                pass
			
 
				+
			
 
				+        else:    # serial option
			
 
				+            for i, article in enumerate(self.articles):
			
 
				+                self.sentences[i] = segmenter.segment_string(self.articles[article])
			
 
				+
			
 
				+                if i % 5000 == 0:
			
 
				+                    print('Segmenting article', i)
			
 
				+
			
 
				+        print('End: Sentence Segmentation')
			
 
				+
			
 
				+
			
 
				+    def init_output_files(self):
			
 
				+        print('Start: Init Output Files')
			
 
				+        assert len(self.output_training_files) is 0, 'Internal storage self.output_files already contains data. This function is intended to be used by the constructor only.'
			
 
				+        assert len(self.output_test_files) is 0, 'Internal storage self.output_files already contains data. This function is intended to be used by the constructor only.'
			
 
				+
			
 
				+        for i in range(self.n_training_shards):
			
 
				+            name = self.output_name_prefix + self.output_training_identifier + '_' + str(i) + self.output_file_extension
			
 
				+            self.output_training_files[name] = []
			
 
				+
			
 
				+        for i in range(self.n_test_shards):
			
 
				+            name = self.output_name_prefix + self.output_test_identifier + '_' + str(i) + self.output_file_extension
			
 
				+            self.output_test_files[name] = []
			
 
				+
			
 
				+        print('End: Init Output Files')
			
 
				+
			
 
				+
			
 
				+    def get_sentences_per_shard(self, shard):
			
 
				+        result = 0
			
 
				+        for article_id in shard:
			
 
				+            result += len(self.sentences[article_id])
			
 
				+
			
 
				+        return result
			
 
				+
			
 
				+
			
 
				+    def distribute_articles_over_shards(self):
			
 
				+        print('Start: Distribute Articles Over Shards')
			
 
				+        assert len(self.articles) >= self.n_training_shards + self.n_test_shards, 'There are fewer articles than shards. Please add more data or reduce the number of shards requested.'
			
 
				+
			
 
				+        # Create dictionary with - key: sentence count per article, value: article id number
			
 
				+        sentence_counts = defaultdict(lambda: [])
			
 
				+
			
 
				+        max_sentences = 0
			
 
				+        total_sentences = 0
			
 
				+
			
 
				+        for article_id in self.sentences:
			
 
				+            current_length = len(self.sentences[article_id])
			
 
				+            sentence_counts[current_length].append(article_id)
			
 
				+            max_sentences = max(max_sentences, current_length)
			
 
				+            total_sentences += current_length
			
 
				+
			
 
				+        n_sentences_assigned_to_training = int((1 - self.fraction_test_set) * total_sentences)
			
 
				+        nominal_sentences_per_training_shard = n_sentences_assigned_to_training // self.n_training_shards
			
 
				+        nominal_sentences_per_test_shard = (total_sentences - n_sentences_assigned_to_training) // self.n_test_shards
			
 
				+
			
 
				+        consumed_article_set = set({})
			
 
				+        unused_article_set = set(self.articles.keys())
			
 
				+
			
 
				+        # Make first pass and add one article worth of lines per file
			
 
				+        for file in self.output_training_files:
			
 
				+            current_article_id = sentence_counts[max_sentences][-1]
			
 
				+            sentence_counts[max_sentences].pop(-1)
			
 
				+            self.output_training_files[file].append(current_article_id)
			
 
				+            consumed_article_set.add(current_article_id)
			
 
				+            unused_article_set.remove(current_article_id)
			
 
				+
			
 
				+            # Maintain the max sentence count
			
 
				+            while len(sentence_counts[max_sentences]) == 0 and max_sentences > 0:
			
 
				+                max_sentences -= 1
			
 
				+
			
 
				+            if len(self.sentences[current_article_id]) > nominal_sentences_per_training_shard:
			
 
				+                nominal_sentences_per_training_shard = len(self.sentences[current_article_id])
			
 
				+                print('Warning: A single article contains more than the nominal number of sentences per training shard.')
			
 
				+
			
 
				+        for file in self.output_test_files:
			
 
				+            current_article_id = sentence_counts[max_sentences][-1]
			
 
				+            sentence_counts[max_sentences].pop(-1)
			
 
				+            self.output_test_files[file].append(current_article_id)
			
 
				+            consumed_article_set.add(current_article_id)
			
 
				+            unused_article_set.remove(current_article_id)
			
 
				+
			
 
				+            # Maintain the max sentence count
			
 
				+            while len(sentence_counts[max_sentences]) == 0 and max_sentences > 0:
			
 
				+                max_sentences -= 1
			
 
				+
			
 
				+            if len(self.sentences[current_article_id]) > nominal_sentences_per_test_shard:
			
 
				+                nominal_sentences_per_test_shard = len(self.sentences[current_article_id])
			
 
				+                print('Warning: A single article contains more than the nominal number of sentences per test shard.')
			
 
				+
			
 
				+        training_counts = []
			
 
				+        test_counts = []
			
 
				+
			
 
				+        for shard in self.output_training_files:
			
 
				+            training_counts.append(self.get_sentences_per_shard(self.output_training_files[shard]))
			
 
				+
			
 
				+        for shard in self.output_test_files:
			
 
				+            test_counts.append(self.get_sentences_per_shard(self.output_test_files[shard]))
			
 
				+
			
 
				+        training_median = statistics.median(training_counts)
			
 
				+        test_median = statistics.median(test_counts)
			
 
				+
			
 
				+        # Make subsequent passes over files to find articles to add without going over limit
			
 
				+        history_remaining = []
			
 
				+        n_history_remaining = 4
			
 
				+
			
 
				+        while len(consumed_article_set) < len(self.articles):
			
 
				+            for fidx, file in enumerate(self.output_training_files):
			
 
				+                nominal_next_article_size = min(nominal_sentences_per_training_shard - training_counts[fidx], max_sentences)
			
 
				+
			
 
				+                # Maintain the max sentence count
			
 
				+                while len(sentence_counts[max_sentences]) == 0 and max_sentences > 0:
			
 
				+                    max_sentences -= 1
			
 
				+
			
 
				+                while len(sentence_counts[nominal_next_article_size]) == 0 and nominal_next_article_size > 0:
			
 
				+                    nominal_next_article_size -= 1
			
 
				+
			
 
				+                if nominal_next_article_size not in sentence_counts or nominal_next_article_size is 0 or training_counts[fidx] > training_median:
			
 
				+                    continue    # skip adding to this file, will come back later if no file can accept unused articles
			
 
				+
			
 
				+                current_article_id = sentence_counts[nominal_next_article_size][-1]
			
 
				+                sentence_counts[nominal_next_article_size].pop(-1)
			
 
				+
			
 
				+                self.output_training_files[file].append(current_article_id)
			
 
				+                consumed_article_set.add(current_article_id)
			
 
				+                unused_article_set.remove(current_article_id)
			
 
				+
			
 
				+            for fidx, file in enumerate(self.output_test_files):
			
 
				+                nominal_next_article_size = min(nominal_sentences_per_test_shard - test_counts[fidx], max_sentences)
			
 
				+
			
 
				+                # Maintain the max sentence count
			
 
				+                while len(sentence_counts[max_sentences]) == 0 and max_sentences > 0:
			
 
				+                    max_sentences -= 1
			
 
				+
			
 
				+                while len(sentence_counts[nominal_next_article_size]) == 0 and nominal_next_article_size > 0:
			
 
				+                    nominal_next_article_size -= 1
			
 
				+
			
 
				+                if nominal_next_article_size not in sentence_counts or nominal_next_article_size is 0 or test_counts[fidx] > test_median:
			
 
				+                    continue    # skip adding to this file, will come back later if no file can accept unused articles
			
 
				+
			
 
				+                current_article_id = sentence_counts[nominal_next_article_size][-1]
			
 
				+                sentence_counts[nominal_next_article_size].pop(-1)
			
 
				+
			
 
				+                self.output_test_files[file].append(current_article_id)
			
 
				+                consumed_article_set.add(current_article_id)
			
 
				+                unused_article_set.remove(current_article_id)
			
 
				+
			
 
				+            # If unable to place articles a few times, bump up nominal sizes by fraction until articles get placed
			
 
				+            if len(history_remaining) == n_history_remaining:
			
 
				+                history_remaining.pop(0)
			
 
				+            history_remaining.append(len(unused_article_set))
			
 
				+
			
 
				+            history_same = True
			
 
				+            for i in range(1, len(history_remaining)):
			
 
				+                history_same = history_same and (history_remaining[i-1] == history_remaining[i])
			
 
				+
			
 
				+            if history_same:
			
 
				+                nominal_sentences_per_training_shard += 1
			
 
				+                # nominal_sentences_per_test_shard += 1
			
 
				+
			
 
				+            training_counts = []
			
 
				+            test_counts = []
			
 
				+            for shard in self.output_training_files:
			
 
				+                training_counts.append(self.get_sentences_per_shard(self.output_training_files[shard]))
			
 
				+
			
 
				+            for shard in self.output_test_files:
			
 
				+                test_counts.append(self.get_sentences_per_shard(self.output_test_files[shard]))
			
 
				+
			
 
				+            training_median = statistics.median(training_counts)
			
 
				+            test_median = statistics.median(test_counts)
			
 
				+
			
 
				+            print('Distributing data over shards:', len(unused_article_set), 'articles remaining.')
			
 
				+
			
 
				+
			
 
				+        if len(unused_article_set) != 0:
			
 
				+            print('Warning: Some articles did not make it into output files.')
			
 
				+
			
 
				+
			
 
				+        for shard in self.output_training_files:
			
 
				+            print('Training shard:', self.get_sentences_per_shard(self.output_training_files[shard]))
			
 
				+
			
 
				+        for shard in self.output_test_files:
			
 
				+            print('Test shard:', self.get_sentences_per_shard(self.output_test_files[shard]))
			
 
				+
			
 
				+        print('End: Distribute Articles Over Shards')
			
 
				+
			
 
				+
			
 
				+    def write_shards_to_disk(self):
			
 
				+        print('Start: Write Shards to Disk')
			
 
				+        for shard in self.output_training_files:
			
 
				+            self.write_single_shard(shard, self.output_training_files[shard], 'training')
			
 
				+
			
 
				+        for shard in self.output_test_files:
			
 
				+            self.write_single_shard(shard, self.output_test_files[shard], 'test')
			
 
				+
			
 
				+        print('End: Write Shards to Disk')
			
 
				+
			
 
				+
			
 
				+    def write_single_shard(self, shard_name, shard, split):
			
 
				+        shard_split = os.path.split(shard_name)
			
 
				+        shard_name = shard_split[0] + '/' + split + '/' + shard_split[1]
			
 
				+        
			
 
				+        with open(shard_name, mode='w', newline='\n') as f:
			
 
				+            for article_id in shard:
			
 
				+                for line in self.sentences[article_id]:
			
 
				+                    f.write(line + '\n')
			
 
				+
			
 
				+                f.write('\n')  # Line break between articles
			
 
				+
			
 
				+
			
 
				+import nltk
			
 
				+
			
 
				+nltk.download('punkt')
			
 
				+
			
 
				+class NLTKSegmenter:
			
 
				+    def __init(self):
			
 
				+        pass
			
 
				+
			
 
				+    def segment_string(self, article):
			
 
				+        return nltk.tokenize.sent_tokenize(article)
			
 
				+
			
--- a/TensorFlow/LanguageModeling/BERT/data/WikiDownloader.py
+++ b/TensorFlow/LanguageModeling/BERT/data/WikiDownloader.py
@@ -0,0 +1,58 @@
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import bz2
			
 
				+import os
			
 
				+import urllib.request
			
 
				+import sys
			
 
				+import subprocess
			
 
				+
			
 
				+class WikiDownloader:
			
 
				+    def __init__(self, language, save_path):
			
 
				+        self.save_path = save_path + '/wikicorpus_' + language
			
 
				+
			
 
				+        if not os.path.exists(self.save_path):
			
 
				+            os.makedirs(self.save_path)
			
 
				+
			
 
				+        self.language = language
			
 
				+        self.download_urls = {
			
 
				+            'en' : 'https://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2',
			
 
				+            'zh' : 'https://dumps.wikimedia.org/zhwiki/latest/zhwiki-latest-pages-articles.xml.bz2'
			
 
				+        }
			
 
				+
			
 
				+        self.output_files = {
			
 
				+            'en' : 'wikicorpus_en.xml.bz2',
			
 
				+            'zh' : 'wikicorpus_zh.xml.bz2'
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+    def download(self):
			
 
				+        if self.language in self.download_urls:
			
 
				+            url = self.download_urls[self.language]
			
 
				+            filename = self.output_files[self.language]
			
 
				+
			
 
				+            print('Downloading:', url)
			
 
				+            if os.path.isfile(self.save_path + '/' + filename):
			
 
				+                print('** Download file already exists, skipping download')
			
 
				+            else:
			
 
				+                response = urllib.request.urlopen(url)
			
 
				+                with open(self.save_path + '/' + filename, "wb") as handle:
			
 
				+                    handle.write(response.read())
			
 
				+
			
 
				+            # Always unzipping since this is relatively fast and will overwrite
			
 
				+            print('Unzipping:', self.output_files[self.language])
			
 
				+            subprocess.run('bzip2 -dk ' + self.save_path + '/' + filename, shell=True, check=True)
			
 
				+
			
 
				+        else:
			
 
				+            assert False, 'WikiDownloader not implemented for this language yet.'
			
 
				+
			
--- a/TensorFlow/LanguageModeling/BERT/data/WikicorpusTextFormatting.py
+++ b/TensorFlow/LanguageModeling/BERT/data/WikicorpusTextFormatting.py
@@ -0,0 +1,46 @@
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import glob
			
 
				+import os
			
 
				+
			
 
				+class WikicorpusTextFormatting:
			
 
				+    def __init__(self, wiki_path, output_filename, recursive = False):
			
 
				+        self.wiki_path = wiki_path
			
 
				+        self.recursive = recursive
			
 
				+        self.output_filename = output_filename
			
 
				+
			
 
				+
			
 
				+    # This puts one article per line
			
 
				+    def merge(self):
			
 
				+        with open(self.output_filename, mode='w', newline='\n') as ofile:
			
 
				+            for dirname in glob.glob(self.wiki_path + '/*/', recursive=False):
			
 
				+                for filename in glob.glob(dirname + 'wiki_*', recursive=self.recursive):
			
 
				+                    print(filename)
			
 
				+                    article_lines = []
			
 
				+                    article_open = False
			
 
				+
			
 
				+                    with open(filename, mode='r', newline='\n') as file:
			
 
				+                        for line in file:
			
 
				+                            if '<doc id=' in line:
			
 
				+                                article_open = True
			
 
				+                            elif '</doc>' in line:
			
 
				+                                article_open = False
			
 
				+                                for oline in article_lines[1:]:
			
 
				+                                    if oline != '\n':
			
 
				+                                        ofile.write(oline.rstrip() + " ")
			
 
				+                                ofile.write("\n\n")
			
 
				+                                article_lines = []
			
 
				+                            else:
			
 
				+                                if article_open:
			
 
				+                                    article_lines.append(line)
			
--- a/TensorFlow/LanguageModeling/BERT/data/__init__.py
+++ b/TensorFlow/LanguageModeling/BERT/data/__init__.py
@@ -0,0 +1,12 @@
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
--- a/TensorFlow/LanguageModeling/BERT/data/bertPrep.py
+++ b/TensorFlow/LanguageModeling/BERT/data/bertPrep.py
@@ -0,0 +1,389 @@
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import BookscorpusTextFormatting
			
 
				+import Downloader
			
 
				+import TextSharding
			
 
				+import WikicorpusTextFormatting
			
 
				+import PubMedTextFormatting
			
 
				+
			
 
				+import argparse
			
 
				+import itertools
			
 
				+import multiprocessing
			
 
				+import os
			
 
				+import pprint
			
 
				+import subprocess
			
 
				+
			
 
				+
			
 
				+def main(args):
			
 
				+    working_dir = os.environ['BERT_PREP_WORKING_DIR']
			
 
				+
			
 
				+    print('Working Directory:', working_dir)
			
 
				+    print('Action:', args.action)
			
 
				+    print('Dataset Name:', args.dataset)
			
 
				+
			
 
				+    if args.input_files:
			
 
				+        args.input_files = args.input_files.split(',')
			
 
				+
			
 
				+    hdf5_tfrecord_folder_prefix = "/lower_case_" + str(args.do_lower_case) + "_seq_len_" + str(args.max_seq_length) \
			
 
				+                                  + "_max_pred_" + str(args.max_predictions_per_seq) + "_masked_lm_prob_" + str(args.masked_lm_prob) \
			
 
				+                                  + "_random_seed_" + str(args.random_seed) + "_dupe_factor_" + str(args.dupe_factor) \
			
 
				+                                  + "_shard_" + str(args.n_training_shards) + "_test_split_" + str(int(args.fraction_test_set * 100))
			
 
				+    directory_structure = {
			
 
				+        'download' : working_dir + '/download',    # Downloaded and decompressed
			
 
				+        'extracted' : working_dir +'/extracted',    # Extracted from whatever the initial format is (e.g., wikiextractor)
			
 
				+        'formatted' : working_dir + '/formatted_one_article_per_line',    # This is the level where all sources should look the same
			
 
				+        'sharded' : working_dir + '/sharded',
			
 
				+        'tfrecord' : working_dir + '/tfrecord' + hdf5_tfrecord_folder_prefix,
			
 
				+        'hdf5': working_dir + '/hdf5'+ hdf5_tfrecord_folder_prefix,
			
 
				+    }
			
 
				+
			
 
				+    print('\nDirectory Structure:')
			
 
				+    pp = pprint.PrettyPrinter(indent=2)
			
 
				+    pp.pprint(directory_structure)
			
 
				+    print('')
			
 
				+
			
 
				+    if args.action == 'download':
			
 
				+        if not os.path.exists(directory_structure['download']):
			
 
				+            os.makedirs(directory_structure['download'])
			
 
				+
			
 
				+        downloader = Downloader.Downloader(args.dataset, directory_structure['download'])
			
 
				+        downloader.download()
			
 
				+
			
 
				+    elif args.action == 'text_formatting':
			
 
				+        assert args.dataset != 'google_pretrained_weights' and args.dataset != 'nvidia_pretrained_weights' \
			
 
				+               and args.dataset != 'squad' and args.dataset != 'MRPC' and args.dataset != 'CoLA' and \
			
 
				+               args.dataset != 'MNLI', 'Cannot perform text_formatting on pretrained weights'
			
 
				+
			
 
				+        if not os.path.exists(directory_structure['extracted']):
			
 
				+            os.makedirs(directory_structure['extracted'])
			
 
				+
			
 
				+        if not os.path.exists(directory_structure['formatted']):
			
 
				+            os.makedirs(directory_structure['formatted'])
			
 
				+
			
 
				+        if args.dataset == 'bookscorpus':
			
 
				+            books_path = directory_structure['download'] + '/bookscorpus'
			
 
				+            #books_path = directory_structure['download']
			
 
				+            output_filename = directory_structure['formatted'] + '/bookscorpus_one_book_per_line.txt'
			
 
				+            books_formatter = BookscorpusTextFormatting.BookscorpusTextFormatting(books_path, output_filename, recursive=True)
			
 
				+            books_formatter.merge()
			
 
				+
			
 
				+        elif args.dataset == 'wikicorpus_en':
			
 
				+            if args.skip_wikiextractor == 0:
			
 
				+                path_to_wikiextractor_in_container = '/workspace/wikiextractor/WikiExtractor.py'
			
 
				+                wikiextractor_command = path_to_wikiextractor_in_container + ' ' + directory_structure['download'] + '/' + args.dataset + '/wikicorpus_en.xml ' + '-b 100M --processes ' + str(args.n_processes) + ' -o ' + directory_structure['extracted'] + '/' + args.dataset
			
 
				+                print('WikiExtractor Command:', wikiextractor_command)
			
 
				+                wikiextractor_process = subprocess.run(wikiextractor_command, shell=True, check=True)
			
 
				+
			
 
				+            wiki_path = directory_structure['extracted'] + '/wikicorpus_en'
			
 
				+            output_filename = directory_structure['formatted'] + '/wikicorpus_en_one_article_per_line.txt'
			
 
				+            wiki_formatter = WikicorpusTextFormatting.WikicorpusTextFormatting(wiki_path, output_filename, recursive=True)
			
 
				+            wiki_formatter.merge()
			
 
				+
			
 
				+        elif args.dataset == 'wikicorpus_zh':
			
 
				+            assert False, 'wikicorpus_zh not fully supported at this time. The simplified/tradition Chinese data needs to be translated and properly segmented still, and should work once this step is added.'
			
 
				+            if args.skip_wikiextractor == 0:
			
 
				+                path_to_wikiextractor_in_container = '/workspace/wikiextractor/WikiExtractor.py'
			
 
				+                wikiextractor_command = path_to_wikiextractor_in_container + ' ' + directory_structure['download'] + '/' + args.dataset + '/wikicorpus_zh.xml ' + '-b 100M --processes ' + str(args.n_processes) + ' -o ' + directory_structure['extracted'] + '/' + args.dataset
			
 
				+                print('WikiExtractor Command:', wikiextractor_command)
			
 
				+                wikiextractor_process = subprocess.run(wikiextractor_command, shell=True, check=True)
			
 
				+
			
 
				+            wiki_path = directory_structure['extracted'] + '/wikicorpus_zh'
			
 
				+            output_filename = directory_structure['formatted'] + '/wikicorpus_zh_one_article_per_line.txt'
			
 
				+            wiki_formatter = WikicorpusTextFormatting.WikicorpusTextFormatting(wiki_path, output_filename, recursive=True)
			
 
				+            wiki_formatter.merge()
			
 
				+
			
 
				+        elif args.dataset == 'pubmed_baseline':
			
 
				+            pubmed_path = directory_structure['download'] + '/pubmed' + '/baseline'
			
 
				+            output_filename = directory_structure['formatted'] + '/pubmed_baseline_one_article_per_line.txt'
			
 
				+            pubmed_formatter = PubMedTextFormatting.PubMedTextFormatting(pubmed_path, output_filename, recursive=True)
			
 
				+            pubmed_formatter.merge()
			
 
				+
			
 
				+    elif args.action == 'sharding':
			
 
				+        # Note: books+wiki requires user to provide list of input_files (comma-separated with no spaces)
			
 
				+        if args.dataset == 'bookscorpus' or 'wikicorpus' in args.dataset or 'books_wiki' in args.dataset or 'pubmed' in args.dataset:
			
 
				+            if args.input_files is None:
			
 
				+                if args.dataset == 'bookscorpus':
			
 
				+                    args.input_files = [directory_structure['formatted'] + '/bookscorpus_one_book_per_line.txt']
			
 
				+                elif args.dataset == 'wikicorpus_en':
			
 
				+                    args.input_files = [directory_structure['formatted'] + '/wikicorpus_en_one_article_per_line.txt']
			
 
				+                elif args.dataset == 'wikicorpus_zh':
			
 
				+                    args.input_files = [directory_structure['formatted'] + '/wikicorpus_zh_one_article_per_line.txt']
			
 
				+                elif args.dataset == 'books_wiki_en_corpus':
			
 
				+                    args.input_files = [directory_structure['formatted'] + '/bookscorpus_one_book_per_line.txt', directory_structure['formatted'] + '/wikicorpus_en_one_article_per_line.txt']
			
 
				+                elif args.dataset == 'pubmed_baseline':
			
 
				+                    args.input_files = [directory_structure['formatted'] + '/pubmed_baseline_one_article_per_line.txt']
			
 
				+
			
 
				+            output_file_prefix = directory_structure['sharded'] + '/' + args.dataset + '/' + args.dataset
			
 
				+
			
 
				+            if not os.path.exists(directory_structure['sharded']):
			
 
				+                os.makedirs(directory_structure['sharded'])
			
 
				+
			
 
				+            if not os.path.exists(directory_structure['sharded'] + '/' + args.dataset):
			
 
				+                os.makedirs(directory_structure['sharded'] + '/' + args.dataset)
			
 
				+                
			
 
				+            if not os.path.exists(directory_structure['sharded'] + '/' + args.dataset + '/training'):
			
 
				+                os.makedirs(directory_structure['sharded'] + '/' + args.dataset + '/training')
			
 
				+                
			
 
				+            if not os.path.exists(directory_structure['sharded'] + '/' + args.dataset + '/test'):
			
 
				+                os.makedirs(directory_structure['sharded'] + '/' + args.dataset + '/test')
			
 
				+
			
 
				+            # Segmentation is here because all datasets look the same in one article/book/whatever per line format, and
			
 
				+            # it seemed unnecessarily complicated to add an additional preprocessing step to call just for this.
			
 
				+            # Different languages (e.g., Chinese simplified/traditional) may require translation and
			
 
				+            # other packages to be called from here -- just add a conditional branch for those extra steps
			
 
				+            segmenter = TextSharding.NLTKSegmenter()
			
 
				+            sharding = TextSharding.Sharding(args.input_files, output_file_prefix, args.n_training_shards, args.n_test_shards, args.fraction_test_set)
			
 
				+
			
 
				+            sharding.load_articles()
			
 
				+            sharding.segment_articles_into_sentences(segmenter)
			
 
				+            sharding.distribute_articles_over_shards()
			
 
				+            sharding.write_shards_to_disk()
			
 
				+
			
 
				+        else:
			
 
				+            assert False, 'Unsupported dataset for sharding'
			
 
				+
			
 
				+    elif args.action == 'create_tfrecord_files':
			
 
				+        if not os.path.exists(directory_structure['tfrecord'] + "/" + args.dataset):
			
 
				+            os.makedirs(directory_structure['tfrecord'] + "/" + args.dataset)
			
 
				+        
			
 
				+        if not os.path.exists(directory_structure['tfrecord'] + "/" + args.dataset + '/training'):
			
 
				+            os.makedirs(directory_structure['tfrecord'] + "/" + args.dataset + '/training')
			
 
				+            
			
 
				+        if not os.path.exists(directory_structure['tfrecord'] + "/" + args.dataset + '/test'):
			
 
				+            os.makedirs(directory_structure['tfrecord'] + "/" + args.dataset + '/test')
			
 
				+
			
 
				+        last_process = None
			
 
				+
			
 
				+        def create_record_worker(filename_prefix, shard_id, output_format='tfrecord', split='training'):
			
 
				+            bert_preprocessing_command = 'python /workspace/bert/utils/create_pretraining_data.py'
			
 
				+            bert_preprocessing_command += ' --input_file=' + directory_structure['sharded'] + '/' + args.dataset + '/' + split + '/' + filename_prefix + '_' + str(shard_id) + '.txt'
			
 
				+            bert_preprocessing_command += ' --output_file=' + directory_structure['tfrecord'] + '/' + args.dataset + '/' + split + '/' + filename_prefix + '_' + str(shard_id) + '.' + output_format
			
 
				+            bert_preprocessing_command += ' --vocab_file=' + args.vocab_file
			
 
				+            bert_preprocessing_command += ' --do_lower_case' if args.do_lower_case else ''
			
 
				+            bert_preprocessing_command += ' --max_seq_length=' + str(args.max_seq_length)
			
 
				+            bert_preprocessing_command += ' --max_predictions_per_seq=' + str(args.max_predictions_per_seq)
			
 
				+            bert_preprocessing_command += ' --masked_lm_prob=' + str(args.masked_lm_prob)
			
 
				+            bert_preprocessing_command += ' --random_seed=' + str(args.random_seed)
			
 
				+            bert_preprocessing_command += ' --dupe_factor=' + str(args.dupe_factor)
			
 
				+            bert_preprocessing_process = subprocess.Popen(bert_preprocessing_command, shell=True)
			
 
				+            bert_preprocessing_process.communicate()
			
 
				+
			
 
				+            last_process = bert_preprocessing_process
			
 
				+
			
 
				+            # This could be better optimized (fine if all take equal time)
			
 
				+            if shard_id % args.n_processes == 0 and shard_id > 0:
			
 
				+                bert_preprocessing_process.wait()
			
 
				+
			
 
				+            return last_process
			
 
				+
			
 
				+        output_file_prefix = args.dataset
			
 
				+
			
 
				+        for i in range(args.n_training_shards):
			
 
				+            last_process = create_record_worker(output_file_prefix + '_training', i, 'tfrecord', 'training')
			
 
				+
			
 
				+        last_process.wait()
			
 
				+
			
 
				+        for i in range(args.n_test_shards):
			
 
				+            last_process = create_record_worker(output_file_prefix + '_test', i, 'tfrecord', 'test')
			
 
				+
			
 
				+        last_process.wait()
			
 
				+
			
 
				+
			
 
				+    elif args.action == 'create_hdf5_files':
			
 
				+        assert False, 'HDF5 format not fully supported in this release.'
			
 
				+
			
 
				+        if not os.path.exists(directory_structure['hdf5'] + "/" + args.dataset):
			
 
				+            os.makedirs(directory_structure['hdf5'] + "/" + args.dataset)
			
 
				+
			
 
				+        last_process = None
			
 
				+
			
 
				+        def create_record_worker(filename_prefix, shard_id, output_format='hdf5'):
			
 
				+            bert_preprocessing_command = 'python /workspace/bert/utils/create_pretraining_data.py'
			
 
				+            bert_preprocessing_command += ' --input_file=' + directory_structure['sharded'] + '/' + args.dataset + '/' + filename_prefix + '_' + str(shard_id) + '.txt'
			
 
				+            bert_preprocessing_command += ' --output_file=' + directory_structure['hdf5'] + '/' + args.dataset + '/' + filename_prefix + '_' + str(shard_id) + '.' + output_format
			
 
				+            bert_preprocessing_command += ' --vocab_file=' + args.vocab_file
			
 
				+            bert_preprocessing_command += ' --do_lower_case' if args.do_lower_case else ''
			
 
				+            bert_preprocessing_command += ' --max_seq_length=' + args.max_seq_length
			
 
				+            bert_preprocessing_command += ' --max_predictions_per_seq=' + args.max_predictions_per_seq
			
 
				+            bert_preprocessing_command += ' --masked_lm_prob=' + args.masked_lm_prob
			
 
				+            bert_preprocessing_command += ' --random_seed=' + args.random_seed
			
 
				+            bert_preprocessing_command += ' --dupe_factor=' + args.dupe_factor
			
 
				+            bert_preprocessing_process = subprocess.Popen(bert_preprocessing_command, shell=True)
			
 
				+            bert_preprocessing_process.communicate()
			
 
				+
			
 
				+            last_process = bert_preprocessing_process
			
 
				+
			
 
				+            # This could be better optimized (fine if all take equal time)
			
 
				+            if shard_id % args.n_processes == 0 and shard_id > 0:
			
 
				+                bert_preprocessing_process.wait()
			
 
				+
			
 
				+        for i in range(args.n_training_shards):
			
 
				+            create_record_worker(args.output_file_prefix + '_training', i)
			
 
				+
			
 
				+        last_process.wait()
			
 
				+
			
 
				+        for i in range(args.n_test_shards):
			
 
				+            create_record_worker(args.output_file_prefix + '_test', i)
			
 
				+
			
 
				+        last_process.wait()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    parser = argparse.ArgumentParser(
			
 
				+        description='Preprocessing Application for Everything BERT-related'
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        '--action',
			
 
				+        type=str,
			
 
				+        help='Specify the action you want the app to take. e.g., generate vocab, segment, create tfrecords',
			
 
				+        choices={
			
 
				+            'download',                   # Download and verify mdf5/sha sums
			
 
				+            'text_formatting',            # Convert into a file that contains one article/book per line
			
 
				+            'sharding',                   # Convert previous formatted text into shards containing one sentence per line
			
 
				+            'create_tfrecord_files',      # Turn each shard into a TFrecord with masking and next sentence prediction info
			
 
				+            'create_hdf5_files'           # Turn each shard into a HDF5 file with masking and next sentence prediction info
			
 
				+        }
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        '--dataset',
			
 
				+        type=str,
			
 
				+        help='Specify the dataset to perform --action on',
			
 
				+        choices={
			
 
				+            'bookscorpus',
			
 
				+            'wikicorpus_en',
			
 
				+            'wikicorpus_zh',
			
 
				+            'books_wiki_en_corpus',
			
 
				+            'pubmed_baseline',
			
 
				+            'pubmed_daily_update',
			
 
				+            'pubmed_fulltext',
			
 
				+            'pubmed_open_access',
			
 
				+            'google_pretrained_weights',
			
 
				+            'nvidia_pretrained_weights',
			
 
				+            'squad',
			
 
				+            'MRPC',
			
 
				+            'CoLA',
			
 
				+            'MNLI',
			
 
				+            'all'
			
 
				+        }
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        '--input_files',
			
 
				+        type=str,
			
 
				+        help='Specify the input files in a comma-separated list (no spaces)'
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        '--n_training_shards',
			
 
				+        type=int,
			
 
				+        help='Specify the number of training shards to generate',
			
 
				+        default=256
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        '--n_test_shards',
			
 
				+        type=int,
			
 
				+        help='Specify the number of test shards to generate',
			
 
				+        default=256
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        '--fraction_test_set',
			
 
				+        type=float,
			
 
				+        help='Specify the fraction (0..1) of the data to withhold for the test data split (based on number of sequences)',
			
 
				+        default=0.2
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        '--segmentation_method',
			
 
				+        type=str,
			
 
				+        help='Specify your choice of sentence segmentation',
			
 
				+        choices={
			
 
				+            'nltk'
			
 
				+        },
			
 
				+        default='nltk'
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        '--n_processes',
			
 
				+        type=int,
			
 
				+        help='Specify the max number of processes to allow at one time',
			
 
				+        default=4
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        '--random_seed',
			
 
				+        type=int,
			
 
				+        help='Specify the base seed to use for any random number generation',
			
 
				+        default=12345
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        '--dupe_factor',
			
 
				+        type=int,
			
 
				+        help='Specify the duplication factor',
			
 
				+        default=5
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        '--masked_lm_prob',
			
 
				+        type=float,
			
 
				+        help='Specify the probability for masked lm',
			
 
				+        default=0.15
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        '--max_seq_length',
			
 
				+        type=int,
			
 
				+        help='Specify the maximum sequence length',
			
 
				+        default=512
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        '--max_predictions_per_seq',
			
 
				+        type=int,
			
 
				+        help='Specify the maximum number of masked words per sequence',
			
 
				+        default=20
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        '--do_lower_case',
			
 
				+        type=int,
			
 
				+        help='Specify whether it is cased (0) or uncased (1) (any number greater than 0 will be treated as uncased)',
			
 
				+        default=1
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        '--vocab_file',
			
 
				+        type=str,
			
 
				+        help='Specify absolute path to vocab file to use)'
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        '--skip_wikiextractor',
			
 
				+        type=int,
			
 
				+        help='Specify whether to skip wikiextractor step 0=False, 1=True',
			
 
				+        default=0
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        '--interactive_json_config_generator',
			
 
				+        type=str,
			
 
				+        help='Specify the action you want the app to take. e.g., generate vocab, segment, create tfrecords'
			
 
				+    )
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+    main(args)
			
--- a/TensorFlow/LanguageModeling/BERT/data/bookcorpus/clean_and_merge_text.py
+++ b/TensorFlow/LanguageModeling/BERT/data/bookcorpus/clean_and_merge_text.py
@@ -1,15 +0,0 @@
 
				-# NVIDIA
			
 
				-
			
 
				-import glob
			
 
				-import os
			
 
				-
			
 
				-output_file = os.environ['WORKING_DIR'] + '/intermediate_files/bookcorpus.txt'
			
 
				-download_path = os.environ['WORKING_DIR'] + '/download/'
			
 
				-
			
 
				-with open(output_file, "w") as ofile:
			
 
				-  for filename in glob.glob(download_path + '*.txt', recursive=True):
			
 
				-    with open(filename, mode='r', encoding="utf-8-sig") as file:
			
 
				-      for line in file:
			
 
				-        if line.strip() != "":
			
 
				-          ofile.write(line.strip() + " ")
			
 
				-    ofile.write("\n\n ")
			
--- a/TensorFlow/LanguageModeling/BERT/data/bookcorpus/config.sh
+++ b/TensorFlow/LanguageModeling/BERT/data/bookcorpus/config.sh
@@ -1,27 +0,0 @@
 
				-#! /bin/bash
			
 
				-
			
 
				-set -e
			
 
				-
			
 
				-USE_BERT_LARGE=true
			
 
				-MAX_SEQUENCE_LENGTH=512
			
 
				-MAX_PREDICTIONS_PER_SEQUENCE=80
			
 
				-MASKED_LM_PROB=0.15
			
 
				-SEED=12345
			
 
				-DUPE_FACTOR=5
			
 
				-DO_LOWER_CASE="True"
			
 
				-N_LINES_PER_SHARD_APPROX=396000   # Default=396000 creates 256 shards
			
 
				-
			
 
				-N_PROCS_PREPROCESS=4    # Adjust this based on memory requirements and available number of cores
			
 
				-export WORKING_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
			
 
				-
			
 
				-BERT_BASE_DIR="${WORKING_DIR}/../pretrained_models_google/uncased_L-12_H-768_A-12"
			
 
				-BERT_LARGE_DIR="${WORKING_DIR}/../pretrained_models_google/uncased_L-24_H-1024_A-16"
			
 
				-
			
 
				-if [ "$USE_BERT_LARGE" = true ] ; then
			
 
				-  VOCAB_FILE="${BERT_LARGE_DIR}/vocab.txt"
			
 
				-else
			
 
				-  VOCAB_FILE="${BERT_BASE_DIR}/vocab.txt"
			
 
				-fi
			
 
				-
			
 
				-OUTPUT_DIR="${WORKING_DIR}/final_tfrecords_sharded/bert_large_bookcorpus_seq_${MAX_SEQUENCE_LENGTH}_pred_${MAX_PREDICTIONS_PER_SEQUENCE}"
			
 
				-
			
--- a/TensorFlow/LanguageModeling/BERT/data/bookcorpus/create_pseudo_test_set.py
+++ b/TensorFlow/LanguageModeling/BERT/data/bookcorpus/create_pseudo_test_set.py
@@ -1,18 +0,0 @@
 
				-# NVIDIA
			
 
				-
			
 
				-import glob
			
 
				-import os
			
 
				-import random
			
 
				-import shutil
			
 
				-
			
 
				-input_dir = os.environ['WORKING_DIR'] + '/final_text_files_sharded/'
			
 
				-output_dir = os.environ['WORKING_DIR'] + '/test_set_text_files/'
			
 
				-
			
 
				-random.seed(13254)
			
 
				-n_shards_to_keep = 3
			
 
				-
			
 
				-file_glob = glob.glob(input_dir + '/*', recursive=False)
			
 
				-file_glob = random.sample(file_glob, n_shards_to_keep)
			
 
				-
			
 
				-for filename in file_glob:
			
 
				-  shutil.copy(filename, output_dir) 
			
--- a/TensorFlow/LanguageModeling/BERT/data/bookcorpus/create_pseudo_test_set.sh
+++ b/TensorFlow/LanguageModeling/BERT/data/bookcorpus/create_pseudo_test_set.sh
@@ -1,10 +0,0 @@
 
				-#! /bin/bash
			
 
				-
			
 
				-source /workspace/bert/data/bookcorpus/config.sh
			
 
				-
			
 
				-# Convert test set sharded text files into tfrecords that are ready for BERT pretraining
			
 
				-echo "Creating test set tfrecords for each text shard"
			
 
				-mkdir -p ${WORKING_DIR}/test_set_text_files
			
 
				-mkdir -p ${WORKING_DIR}/test_set_tfrecords
			
 
				-python3 ${WORKING_DIR}/create_pseudo_test_set.py
			
 
				-. ${WORKING_DIR}/preprocessing_test_set_xargs_wrapper.sh ${N_PROCS_PREPROCESS}
			
--- a/TensorFlow/LanguageModeling/BERT/data/bookcorpus/preprocessing.sh
+++ b/TensorFlow/LanguageModeling/BERT/data/bookcorpus/preprocessing.sh
@@ -1,23 +0,0 @@
 
				-#! /bin/bash
			
 
				-
			
 
				-SHARD_INDEX=${1}
			
 
				-INPUT_FILE="${WORKING_DIR}/final_text_files_sharded/bookcorpus.segmented.part.${SHARD_INDEX}.txt"
			
 
				-
			
 
				-source /workspace/bert/data/bookcorpus/config.sh
			
 
				-
			
 
				-OUTPUT_DIR=${WORKING_DIR}/final_tfrecords_sharded
			
 
				-mkdir -p ${OUTPUT_DIR}
			
 
				-
			
 
				-OUTPUT_FILE="${OUTPUT_DIR}/tf_examples.tfrecord000${SHARD_INDEX}"
			
 
				-
			
 
				-python /workspace/bert/utils/create_pretraining_data.py \
			
 
				-  --input_file=${INPUT_FILE} \
			
 
				-  --output_file=${OUTPUT_FILE} \
			
 
				-  --vocab_file=${VOCAB_FILE} \
			
 
				-  --do_lower_case=${DO_LOWER_CASE} \
			
 
				-  --max_seq_length=${MAX_SEQUENCE_LENGTH} \
			
 
				-  --max_predictions_per_seq=${MAX_PREDICTIONS_PER_SEQUENCE} \
			
 
				-  --masked_lm_prob=${MASKED_LM_PROB} \
			
 
				-  --random_seed=${SEED} \
			
 
				-  --dupe_factor=${DUPE_FACTOR}
			
 
				-
			
--- a/TensorFlow/LanguageModeling/BERT/data/bookcorpus/preprocessing_test_set.sh
+++ b/TensorFlow/LanguageModeling/BERT/data/bookcorpus/preprocessing_test_set.sh
@@ -1,28 +0,0 @@
 
				-#! /bin/bash
			
 
				-
			
 
				-INPUT_FILE=${1}
			
 
				-
			
 
				-source /workspace/bert/data/bookcorpus/config.sh
			
 
				-
			
 
				-OUTPUT_DIR=${WORKING_DIR}/test_set_tfrecords
			
 
				-mkdir -p ${OUTPUT_DIR}
			
 
				-
			
 
				-#SHARD_INDEX=$(( echo ${INPUT_FILE} | egrep -o [0-9]+ ))
			
 
				-SHARD_INDEX=$( eval echo ${INPUT_FILE} | sed -e s/[^0-9]//g )
			
 
				-OUTPUT_FILE="${OUTPUT_DIR}/tf_examples.tfrecord000${SHARD_INDEX}"
			
 
				-
			
 
				-SEED=13254
			
 
				-
			
 
				-echo "Shard index ${SHARD_INDEX}"
			
 
				-
			
 
				-python /workspace/bert/utils/create_pretraining_data.py \
			
 
				-  --input_file=${INPUT_FILE} \
			
 
				-  --output_file=${OUTPUT_FILE} \
			
 
				-  --vocab_file=${VOCAB_FILE} \
			
 
				-  --do_lower_case=${DO_LOWER_CASE} \
			
 
				-  --max_seq_length=${MAX_SEQUENCE_LENGTH} \
			
 
				-  --max_predictions_per_seq=${MAX_PREDICTIONS_PER_SEQUENCE} \
			
 
				-  --masked_lm_prob=${MASKED_LM_PROB} \
			
 
				-  --random_seed=${SEED} \
			
 
				-  --dupe_factor=${DUPE_FACTOR}
			
 
				-
			
--- a/TensorFlow/LanguageModeling/BERT/data/bookcorpus/preprocessing_test_set_xargs_wrapper.sh
+++ b/TensorFlow/LanguageModeling/BERT/data/bookcorpus/preprocessing_test_set_xargs_wrapper.sh
@@ -1,12 +0,0 @@
 
				-#! /bin/bash
			
 
				-
			
 
				-source /workspace/bert/data/bookcorpus/config.sh
			
 
				-
			
 
				-SHARD_COUNT=0
			
 
				-rm -rf /workspace/bert/data/bookcorpus/xarg_list.txt
			
 
				-touch /workspace/bert/data/bookcorpus/xarg_list.txt
			
 
				-for file in /workspace/bert/data/bookcorpus/test_set_text_files/*; do
			
 
				-  echo ${file} >> /workspace/bert/data/bookcorpus/xarg_list.txt
			
 
				-done
			
 
				-
			
 
				-xargs -n 1 --max-procs=${N_PROCS_PREPROCESS} --arg-file=/workspace/bert/data/bookcorpus/xarg_list.txt /workspace/bert/data/bookcorpus/preprocessing_test_set.sh
			
--- a/TensorFlow/LanguageModeling/BERT/data/bookcorpus/preprocessing_xargs_wrapper.sh
+++ b/TensorFlow/LanguageModeling/BERT/data/bookcorpus/preprocessing_xargs_wrapper.sh
@@ -1,13 +0,0 @@
 
				-#! /bin/bash
			
 
				-
			
 
				-source /workspace/bert/data/bookcorpus/config.sh
			
 
				-
			
 
				-SHARD_COUNT=0
			
 
				-rm -rf /workspace/bert/data/bookcorpus/xarg_list.txt
			
 
				-touch /workspace/bert/data/bookcorpus/xarg_list.txt
			
 
				-for file in /workspace/bert/data/bookcorpus/final_text_files_sharded/*; do
			
 
				-  echo ${SHARD_COUNT} >> /workspace/bert/data/bookcorpus/xarg_list.txt
			
 
				-  SHARD_COUNT=$((SHARD_COUNT+1))
			
 
				-done
			
 
				-
			
 
				-xargs -n 1 --max-procs=${N_PROCS_PREPROCESS} --arg-file=/workspace/bert/data/bookcorpus/xarg_list.txt /workspace/bert/data/bookcorpus/preprocessing.sh
			
--- a/TensorFlow/LanguageModeling/BERT/data/bookcorpus/run_preprocessing.sh
+++ b/TensorFlow/LanguageModeling/BERT/data/bookcorpus/run_preprocessing.sh
@@ -1,28 +0,0 @@
 
				-#! /bin/bash
			
 
				-
			
 
				-source /workspace/bert/data/bookcorpus/config.sh
			
 
				-
			
 
				-# Download books
			
 
				-mkdir -p download
			
 
				-python3 /workspace/bookcorpus/download_files.py --list /workspace/bookcorpus/url_list.jsonl --out ${WORKING_DIR}/download --trash-bad-count
			
 
				-
			
 
				-# Clean and prep (one book per line)
			
 
				-mkdir -p ${WORKING_DIR}/intermediate_files
			
 
				-python3 ${WORKING_DIR}/clean_and_merge_text.py
			
 
				-
			
 
				-# Split books into one-sentence-per-line format for use with BERT scripts
			
 
				-echo "Applying sentence segmentation to get one sentence per line"
			
 
				-mkdir -p ${WORKING_DIR}/final_text_file_single
			
 
				-python3 ${WORKING_DIR}/sentence_segmentation_nltk.py
			
 
				-# Note: NLTK can be replaced with Spacy, although it is slower (2 variations provided)
			
 
				-
			
 
				-# Shard finalized text so that it has a chance of fitting in memory when creating pretraining data into tfrecords (choose appropriate number of shards for distributed training)
			
 
				-echo "Shard text files - size is approximate to prevent splitting a book across shards"
			
 
				-mkdir -p ${WORKING_DIR}/final_text_files_sharded
			
 
				-python3 ${WORKING_DIR}/shard_text_input_file.py
			
 
				-
			
 
				-# Convert sharded text files into tfrecords that are ready for BERT pretraining
			
 
				-echo "Creating tfrecords for each text shard"
			
 
				-mkdir -p ${WORKING_DIR}/final_tfrecords_sharded
			
 
				-. ${WORKING_DIR}/preprocessing_xargs_wrapper.sh ${N_PROCS_PREPROCESS}
			
 
				-
			
--- a/TensorFlow/LanguageModeling/BERT/data/bookcorpus/sentence_segmentation_nltk.py
+++ b/TensorFlow/LanguageModeling/BERT/data/bookcorpus/sentence_segmentation_nltk.py
@@ -1,20 +0,0 @@
 
				-# NVIDIA
			
 
				-
			
 
				-import nltk
			
 
				-import os
			
 
				-
			
 
				-nltk.download('punkt')
			
 
				-
			
 
				-input_file = os.environ['WORKING_DIR'] + '/intermediate_files/bookcorpus.txt'
			
 
				-output_file = os.environ['WORKING_DIR'] + '/final_text_file_single/bookcorpus.segmented.nltk.txt'
			
 
				-
			
 
				-doc_seperator = "\n"
			
 
				-
			
 
				-with open(input_file) as ifile:
			
 
				-  with open(output_file, "w") as ofile:
			
 
				-    for line in ifile:
			
 
				-      if line != "\n":
			
 
				-        sent_list = nltk.tokenize.sent_tokenize(line)
			
 
				-        for sent in sent_list:
			
 
				-          ofile.write(sent + "\n")
			
 
				-        ofile.write(doc_seperator)
			
--- a/TensorFlow/LanguageModeling/BERT/data/bookcorpus/shard_text_input_file.py
+++ b/TensorFlow/LanguageModeling/BERT/data/bookcorpus/shard_text_input_file.py
@@ -1,41 +0,0 @@
 
				-# NVIDIA
			
 
				-
			
 
				-import os
			
 
				-
			
 
				-input_file = os.environ['WORKING_DIR'] + '/final_text_file_single/bookcorpus.segmented.nltk.txt'
			
 
				-output_file = os.environ['WORKING_DIR'] + '/final_text_files_sharded/bookcorpus.segmented.part.'
			
 
				-
			
 
				-doc_seperator = "\n"
			
 
				-
			
 
				-line_buffer = []
			
 
				-shard_size = 396000 # Approximate, will split at next article break
			
 
				-line_counter = 0
			
 
				-shard_index = 0
			
 
				-
			
 
				-ifile_lines = 0
			
 
				-with open(input_file) as ifile:
			
 
				-  for line in ifile:
			
 
				-    ifile_lines += 1
			
 
				-
			
 
				-print("Input file contains", ifile_lines, "lines.")
			
 
				-
			
 
				-iline_counter = 1
			
 
				-with open(input_file) as ifile:
			
 
				-  for line in ifile:
			
 
				-    if line_counter < shard_size and iline_counter < ifile_lines:
			
 
				-      line_buffer.append(line)
			
 
				-      line_counter += 1
			
 
				-      iline_counter += 1
			
 
				-    elif line_counter >= shard_size and line != "\n" and iline_counter < ifile_lines:
			
 
				-      line_buffer.append(line)
			
 
				-      line_counter += 1
			
 
				-      iline_counter += 1
			
 
				-    else:
			
 
				-       with open(output_file + str(shard_index) + ".txt", "w") as ofile:
			
 
				-         for oline in line_buffer:
			
 
				-           ofile.write(oline)
			
 
				-         line_buffer = []
			
 
				-         line_counter = 0
			
 
				-         shard_index += 1
			
 
				-
			
 
				-    
			
--- a/TensorFlow/LanguageModeling/BERT/data/create_datasets_from_start.sh
+++ b/TensorFlow/LanguageModeling/BERT/data/create_datasets_from_start.sh
@@ -0,0 +1,46 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+export BERT_PREP_WORKING_DIR="${BERT_PREP_WORKING_DIR}"
			
 
				+
			
 
				+# Download
			
 
				+python3 ${BERT_PREP_WORKING_DIR}/bertPrep.py --action download --dataset bookscorpus
			
 
				+python3 ${BERT_PREP_WORKING_DIR}/bertPrep.py --action download --dataset wikicorpus_en
			
 
				+
			
 
				+python3 ${BERT_PREP_WORKING_DIR}/bertPrep.py --action download --dataset google_pretrained_weights  # Includes vocab
			
 
				+
			
 
				+python3 ${BERT_PREP_WORKING_DIR}/bertPrep.py --action download --dataset squad
			
 
				+python3 ${BERT_PREP_WORKING_DIR}/bertPrep.py --action download --dataset "CoLA"
			
 
				+python3 ${BERT_PREP_WORKING_DIR}/bertPrep.py --action download --dataset "MRPC"
			
 
				+python3 ${BERT_PREP_WORKING_DIR}/bertPrep.py --action download --dataset "MNLI"
			
 
				+
			
 
				+
			
 
				+# Properly format the text files
			
 
				+python3 ${BERT_PREP_WORKING_DIR}/bertPrep.py --action text_formatting --dataset bookscorpus
			
 
				+python3 ${BERT_PREP_WORKING_DIR}/bertPrep.py --action text_formatting --dataset wikicorpus_en
			
 
				+
			
 
				+
			
 
				+# Shard the text files (group wiki+books then shard)
			
 
				+python3 ${BERT_PREP_WORKING_DIR}/bertPrep.py --action sharding --dataset books_wiki_en_corpus
			
 
				+
			
 
				+
			
 
				+# Create TFRecord files Phase 1
			
 
				+python3 ${BERT_PREP_WORKING_DIR}/bertPrep.py --action create_tfrecord_files --dataset books_wiki_en_corpus --max_seq_length 128 \
			
 
				+ --max_predictions_per_seq 20 --vocab_file ${BERT_PREP_WORKING_DIR}/download/google_pretrained_weights/uncased_L-24_H-1024_A-16/vocab.txt
			
 
				+
			
 
				+
			
 
				+# Create TFRecord files Phase 2
			
 
				+python3 ${BERT_PREP_WORKING_DIR}/bertPrep.py --action create_tfrecord_files --dataset books_wiki_en_corpus --max_seq_length 512 \
			
 
				+ --max_predictions_per_seq 80 --vocab_file ${BERT_PREP_WORKING_DIR}/download/google_pretrained_weights/uncased_L-24_H-1024_A-16/vocab.txt
			
--- a/TensorFlow/LanguageModeling/BERT/data/glue/download_glue_data.py
+++ b/TensorFlow/LanguageModeling/BERT/data/glue/download_glue_data.py
@@ -1,153 +0,0 @@
 
				-#
			
 
				-#
			
 
				-#  @unpublished{wang2018glue
			
 
				-#      title={{GLUE}: A Multi-Task Benchmark and Analysis Platform for
			
 
				-#              Natural Language Understanding}
			
 
				-#      author={Wang, Alex and Singh, Amanpreet and Michael, Julian and Hill,
			
 
				-#              Felix and Levy, Omer and Bowman, Samuel R.}
			
 
				-#      note={arXiv preprint 1804.07461}
			
 
				-#      year={2018}
			
 
				-#  }
			
 
				-#
			
 
				-#  Script for downloading all GLUE data.
			
 
				-# Note: for legal reasons, we are unable to host MRPC.
			
 
				-# You can either use the version hosted by the SentEval team, which is already tokenized,
			
 
				-# or you can download the original data from (https://download.microsoft.com/download/D/4/6/D46FF87A-F6B9-4252-AA8B-3604ED519838/MSRParaphraseCorpus.msi) and extract the data from it manually.
			
 
				-# For Windows users, you can run the .msi file. For Mac and Linux users, consider an external library such as 'cabextract' (see below for an example).
			
 
				-# You should then rename and place specific files in a folder (see below for an example).
			
 
				-# mkdir MRPC
			
 
				-# cabextract MSRParaphraseCorpus.msi -d MRPC
			
 
				-# cat MRPC/_2DEC3DBE877E4DB192D17C0256E90F1D | tr -d $'\r' > MRPC/msr_paraphrase_train.txt
			
 
				-# cat MRPC/_D7B391F9EAFF4B1B8BCE8F21B20B1B61 | tr -d $'\r' > MRPC/msr_paraphrase_test.txt
			
 
				-# rm MRPC/_*
			
 
				-# rm MSRParaphraseCorpus.msi
			
 
				-
			
 
				-
			
 
				-import os
			
 
				-import sys
			
 
				-import shutil
			
 
				-import argparse
			
 
				-import tempfile
			
 
				-import urllib
			
 
				-import io
			
 
				-if sys.version_info >= (3, 0):
			
 
				-    import urllib.request
			
 
				-import zipfile
			
 
				-
			
 
				-URLLIB=urllib
			
 
				-if sys.version_info >= (3, 0):
			
 
				-    URLLIB=urllib.request
			
 
				-
			
 
				-TASKS = ["CoLA", "SST", "MRPC", "QQP", "STS", "MNLI", "SNLI", "QNLI", "RTE", "WNLI", "diagnostic"]
			
 
				-TASK2PATH = {"CoLA":'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FCoLA.zip?alt=media&token=46d5e637-3411-4188-bc44-5809b5bfb5f4',
			
 
				-             "SST":'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FSST-2.zip?alt=media&token=aabc5f6b-e466-44a2-b9b4-cf6337f84ac8',
			
 
				-             "MRPC":'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2Fmrpc_dev_ids.tsv?alt=media&token=ec5c0836-31d5-48f4-b431-7480817f1adc',
			
 
				-             "QQP":'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FQQP.zip?alt=media&token=700c6acf-160d-4d89-81d1-de4191d02cb5',
			
 
				-             "STS":'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FSTS-B.zip?alt=media&token=bddb94a7-8706-4e0d-a694-1109e12273b5',
			
 
				-             "MNLI":'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FMNLI.zip?alt=media&token=50329ea1-e339-40e2-809c-10c40afff3ce',
			
 
				-             "SNLI":'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FSNLI.zip?alt=media&token=4afcfbb2-ff0c-4b2d-a09a-dbf07926f4df',
			
 
				-             "QNLI":'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FQNLI.zip?alt=media&token=c24cad61-f2df-4f04-9ab6-aa576fa829d0',
			
 
				-             "RTE":'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FRTE.zip?alt=media&token=5efa7e85-a0bb-4f19-8ea2-9e1840f077fb',
			
 
				-             "WNLI":'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FWNLI.zip?alt=media&token=068ad0a0-ded7-4bd7-99a5-5e00222e0faf',
			
 
				-             "diagnostic":'https://storage.googleapis.com/mtl-sentence-representations.appspot.com/tsvsWithoutLabels%2FAX.tsv?GoogleAccessId=firebase-adminsdk-0khhl@mtl-sentence-representations.iam.gserviceaccount.com&Expires=2498860800&Signature=DuQ2CSPt2Yfre0C%2BiISrVYrIFaZH1Lc7hBVZDD4ZyR7fZYOMNOUGpi8QxBmTNOrNPjR3z1cggo7WXFfrgECP6FBJSsURv8Ybrue8Ypt%2FTPxbuJ0Xc2FhDi%2BarnecCBFO77RSbfuz%2Bs95hRrYhTnByqu3U%2FYZPaj3tZt5QdfpH2IUROY8LiBXoXS46LE%2FgOQc%2FKN%2BA9SoscRDYsnxHfG0IjXGwHN%2Bf88q6hOmAxeNPx6moDulUF6XMUAaXCSFU%2BnRO2RDL9CapWxj%2BDl7syNyHhB7987hZ80B%2FwFkQ3MEs8auvt5XW1%2Bd4aCU7ytgM69r8JDCwibfhZxpaa4gd50QXQ%3D%3D'}
			
 
				-
			
 
				-MRPC_TRAIN = 'https://dl.fbaipublicfiles.com/senteval/senteval_data/msr_paraphrase_train.txt'
			
 
				-MRPC_TEST = 'https://dl.fbaipublicfiles.com/senteval/senteval_data/msr_paraphrase_test.txt'
			
 
				-
			
 
				-def download_and_extract(task, data_dir):
			
 
				-    print("Downloading and extracting %s..." % task)
			
 
				-    data_file = "%s.zip" % task
			
 
				-    URLLIB.urlretrieve(TASK2PATH[task], data_file)
			
 
				-    with zipfile.ZipFile(data_file) as zip_ref:
			
 
				-        zip_ref.extractall(data_dir)
			
 
				-    os.remove(data_file)
			
 
				-    print("\tCompleted!")
			
 
				-
			
 
				-def format_mrpc(data_dir, path_to_data):
			
 
				-    print("Processing MRPC...")
			
 
				-    mrpc_dir = os.path.join(data_dir, "MRPC")
			
 
				-    if not os.path.isdir(mrpc_dir):
			
 
				-        os.mkdir(mrpc_dir)
			
 
				-    if path_to_data:
			
 
				-        mrpc_train_file = os.path.join(path_to_data, "msr_paraphrase_train.txt")
			
 
				-        mrpc_test_file = os.path.join(path_to_data, "msr_paraphrase_test.txt")
			
 
				-    else:
			
 
				-        mrpc_train_file = os.path.join(mrpc_dir, "msr_paraphrase_train.txt")
			
 
				-        mrpc_test_file = os.path.join(mrpc_dir, "msr_paraphrase_test.txt")
			
 
				-        URLLIB.urlretrieve(MRPC_TRAIN, mrpc_train_file)
			
 
				-        URLLIB.urlretrieve(MRPC_TEST, mrpc_test_file)
			
 
				-    assert os.path.isfile(mrpc_train_file), "Train data not found at %s" % mrpc_train_file
			
 
				-    assert os.path.isfile(mrpc_test_file), "Test data not found at %s" % mrpc_test_file
			
 
				-    URLLIB.urlretrieve(TASK2PATH["MRPC"], os.path.join(mrpc_dir, "dev_ids.tsv"))
			
 
				-
			
 
				-    dev_ids = []
			
 
				-    with io.open(os.path.join(mrpc_dir, "dev_ids.tsv"), encoding='utf-8') as ids_fh:
			
 
				-        for row in ids_fh:
			
 
				-            dev_ids.append(row.strip().split('\t'))
			
 
				-
			
 
				-    with io.open(mrpc_train_file, encoding='utf-8') as data_fh, \
			
 
				-         io.open(os.path.join(mrpc_dir, "train.tsv"), 'w', encoding='utf-8') as train_fh, \
			
 
				-         io.open(os.path.join(mrpc_dir, "dev.tsv"), 'w', encoding='utf-8') as dev_fh:
			
 
				-        header = data_fh.readline()
			
 
				-        train_fh.write(header)
			
 
				-        dev_fh.write(header)
			
 
				-        for row in data_fh:
			
 
				-            label, id1, id2, s1, s2 = row.strip().split('\t')
			
 
				-            if [id1, id2] in dev_ids:
			
 
				-                dev_fh.write("%s\t%s\t%s\t%s\t%s\n" % (label, id1, id2, s1, s2))
			
 
				-            else:
			
 
				-                train_fh.write("%s\t%s\t%s\t%s\t%s\n" % (label, id1, id2, s1, s2))
			
 
				-
			
 
				-    with io.open(mrpc_test_file, encoding='utf-8') as data_fh, \
			
 
				-            io.open(os.path.join(mrpc_dir, "test.tsv"), 'w', encoding='utf-8') as test_fh:
			
 
				-        header = data_fh.readline()
			
 
				-        test_fh.write("index\t#1 ID\t#2 ID\t#1 String\t#2 String\n")
			
 
				-        for idx, row in enumerate(data_fh):
			
 
				-            label, id1, id2, s1, s2 = row.strip().split('\t')
			
 
				-            test_fh.write("%d\t%s\t%s\t%s\t%s\n" % (idx, id1, id2, s1, s2))
			
 
				-    print("\tCompleted!")
			
 
				-
			
 
				-def download_diagnostic(data_dir):
			
 
				-    print("Downloading and extracting diagnostic...")
			
 
				-    if not os.path.isdir(os.path.join(data_dir, "diagnostic")):
			
 
				-        os.mkdir(os.path.join(data_dir, "diagnostic"))
			
 
				-    data_file = os.path.join(data_dir, "diagnostic", "diagnostic.tsv")
			
 
				-    URLLIB.urlretrieve(TASK2PATH["diagnostic"], data_file)
			
 
				-    print("\tCompleted!")
			
 
				-    return
			
 
				-
			
 
				-def get_tasks(task_names):
			
 
				-    task_names = task_names.split(',')
			
 
				-    if "all" in task_names:
			
 
				-        tasks = TASKS
			
 
				-    else:
			
 
				-        tasks = []
			
 
				-        for task_name in task_names:
			
 
				-            assert task_name in TASKS, "Task %s not found!" % task_name
			
 
				-            tasks.append(task_name)
			
 
				-    return tasks
			
 
				-
			
 
				-def main(arguments):
			
 
				-    parser = argparse.ArgumentParser()
			
 
				-    parser.add_argument('-d', '--data_dir', help='directory to save data to', type=str, default='.')
			
 
				-    parser.add_argument('-t', '--tasks', help='tasks to download data for as a comma separated string',
			
 
				-                        type=str, default='all')
			
 
				-    parser.add_argument('--path_to_mrpc', help='path to directory containing extracted MRPC data, msr_paraphrase_train.txt and msr_paraphrase_text.txt',
			
 
				-                        type=str, default='')
			
 
				-    args = parser.parse_args(arguments)
			
 
				-
			
 
				-    if not os.path.isdir(args.data_dir):
			
 
				-        os.mkdir(args.data_dir)
			
 
				-    tasks = get_tasks(args.tasks)
			
 
				-
			
 
				-    for task in tasks:
			
 
				-        if task == 'MRPC':
			
 
				-            format_mrpc(args.data_dir, args.path_to_mrpc)
			
 
				-        elif task == 'diagnostic':
			
 
				-            download_diagnostic(args.data_dir)
			
 
				-        else:
			
 
				-            download_and_extract(task, args.data_dir)
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    sys.exit(main(sys.argv[1:]))
			
--- a/TensorFlow/LanguageModeling/BERT/data/pretrained_models_google/download_models.py
+++ b/TensorFlow/LanguageModeling/BERT/data/pretrained_models_google/download_models.py
@@ -1,123 +0,0 @@
 
				-# NVIDIA
			
 
				-
			
 
				-import hashlib
			
 
				-import urllib.request
			
 
				-import zipfile
			
 
				-
			
 
				-# Download urls
			
 
				-model_urls = {
			
 
				-  'bert_base_uncased' : ('https://storage.googleapis.com/bert_models/2018_10_18/uncased_L-12_H-768_A-12.zip', 'uncased_L-12_H-768_A-12.zip'),
			
 
				-  'bert_large_uncased' : ('https://storage.googleapis.com/bert_models/2018_10_18/uncased_L-24_H-1024_A-16.zip', 'uncased_L-24_H-1024_A-16.zip'),
			
 
				-  'bert_base_cased' : ('https://storage.googleapis.com/bert_models/2018_10_18/cased_L-12_H-768_A-12.zip', 'cased_L-12_H-768_A-12.zip'),
			
 
				-  'bert_large_cased' : ('https://storage.googleapis.com/bert_models/2018_10_18/cased_L-24_H-1024_A-16.zip', 'cased_L-24_H-1024_A-16.zip'),
			
 
				-  'bert_base_multilingual_cased' : ('https://storage.googleapis.com/bert_models/2018_11_23/multi_cased_L-12_H-768_A-12.zip', 'multi_cased_L-12_H-768_A-12.zip'),
			
 
				-  'bert_large_multilingual_uncased' : ('https://storage.googleapis.com/bert_models/2018_11_03/multilingual_L-12_H-768_A-12.zip', 'multilingual_L-12_H-768_A-12.zip'),
			
 
				-  'bert_base_chinese' : ('https://storage.googleapis.com/bert_models/2018_11_03/chinese_L-12_H-768_A-12.zip', 'chinese_L-12_H-768_A-12.zip')
			
 
				-}
			
 
				-
			
 
				-# SHA256sum verification for file download integrity (and checking for changes from the download source over time)
			
 
				-bert_base_uncased_sha = {
			
 
				-  'bert_config.json' : '7b4e5f53efbd058c67cda0aacfafb340113ea1b5797d9ce6ee411704ba21fcbc',
			
 
				-  'bert_model.ckpt.data-00000-of-00001' : '58580dc5e0bf0ae0d2efd51d0e8272b2f808857f0a43a88aaf7549da6d7a8a84',
			
 
				-  'bert_model.ckpt.index' : '04c1323086e2f1c5b7c0759d8d3e484afbb0ab45f51793daab9f647113a0117b',
			
 
				-  'bert_model.ckpt.meta' : 'dd5682170a10c3ea0280c2e9b9a45fee894eb62da649bbdea37b38b0ded5f60e',
			
 
				-  'vocab.txt' : '07eced375cec144d27c900241f3e339478dec958f92fddbc551f295c992038a3',
			
 
				-}
			
 
				-
			
 
				-bert_large_uncased_sha = {
			
 
				-  'bert_config.json' : 'bfa42236d269e2aeb3a6d30412a33d15dbe8ea597e2b01dc9518c63cc6efafcb',
			
 
				-  'bert_model.ckpt.data-00000-of-00001' : 'bc6b3363e3be458c99ecf64b7f472d2b7c67534fd8f564c0556a678f90f4eea1',
			
 
				-  'bert_model.ckpt.index' : '68b52f2205ffc64dc627d1120cf399c1ef1cbc35ea5021d1afc889ffe2ce2093',
			
 
				-  'bert_model.ckpt.meta' : '6fcce8ff7628f229a885a593625e3d5ff9687542d5ef128d9beb1b0c05edc4a1',
			
 
				-  'vocab.txt' : '07eced375cec144d27c900241f3e339478dec958f92fddbc551f295c992038a3',
			
 
				-}
			
 
				-
			
 
				-bert_base_cased_sha = {
			
 
				-  'bert_config.json' : 'f11dfb757bea16339a33e1bf327b0aade6e57fd9c29dc6b84f7ddb20682f48bc',
			
 
				-  'bert_model.ckpt.data-00000-of-00001' : '734d5a1b68bf98d4e9cb6b6692725d00842a1937af73902e51776905d8f760ea',
			
 
				-  'bert_model.ckpt.index' : '517d6ef5c41fc2ca1f595276d6fccf5521810d57f5a74e32616151557790f7b1',
			
 
				-  'bert_model.ckpt.meta' : '5f8a9771ff25dadd61582abb4e3a748215a10a6b55947cbb66d0f0ba1694be98',
			
 
				-  'vocab.txt' : 'eeaa9875b23b04b4c54ef759d03db9d1ba1554838f8fb26c5d96fa551df93d02',
			
 
				-}
			
 
				-
			
 
				-bert_large_cased_sha = {
			
 
				-  'bert_config.json' : '7adb2125c8225da495656c982fd1c5f64ba8f20ad020838571a3f8a954c2df57',
			
 
				-  'bert_model.ckpt.data-00000-of-00001' : '6ff33640f40d472f7a16af0c17b1179ca9dcc0373155fb05335b6a4dd1657ef0',
			
 
				-  'bert_model.ckpt.index' : 'ef42a53f577fbe07381f4161b13c7cab4f4fc3b167cec6a9ae382c53d18049cf',
			
 
				-  'bert_model.ckpt.meta' : 'd2ddff3ed33b80091eac95171e94149736ea74eb645e575d942ec4a5e01a40a1',
			
 
				-  'vocab.txt' : 'eeaa9875b23b04b4c54ef759d03db9d1ba1554838f8fb26c5d96fa551df93d02',
			
 
				-}
			
 
				-
			
 
				-bert_base_multilingual_cased_sha = {
			
 
				-  'bert_config.json' : 'e76c3964bc14a8bb37a5530cdc802699d2f4a6fddfab0611e153aa2528f234f0',
			
 
				-  'bert_model.ckpt.data-00000-of-00001' : '55b8a2df41f69c60c5180e50a7c31b7cdf6238909390c4ddf05fbc0d37aa1ac5',
			
 
				-  'bert_model.ckpt.index' : '7d8509c2a62b4e300feb55f8e5f1eef41638f4998dd4d887736f42d4f6a34b37',
			
 
				-  'bert_model.ckpt.meta' : '95e5f1997e8831f1c31e5cf530f1a2e99f121e9cd20887f2dce6fe9e3343e3fa',
			
 
				-  'vocab.txt' : 'fe0fda7c425b48c516fc8f160d594c8022a0808447475c1a7c6d6479763f310c',
			
 
				-}
			
 
				-
			
 
				-bert_large_multilingual_uncased_sha = {
			
 
				-  'bert_config.json' : '49063bb061390211d2fdd108cada1ed86faa5f90b80c8f6fdddf406afa4c4624',
			
 
				-  'bert_model.ckpt.data-00000-of-00001' : '3cd83912ebeb0efe2abf35c9f1d5a515d8e80295e61c49b75c8853f756658429',
			
 
				-  'bert_model.ckpt.index' : '87c372c1a3b1dc7effaaa9103c80a81b3cbab04c7933ced224eec3b8ad2cc8e7',
			
 
				-  'bert_model.ckpt.meta' : '27f504f34f02acaa6b0f60d65195ec3e3f9505ac14601c6a32b421d0c8413a29',
			
 
				-  'vocab.txt' : '87b44292b452f6c05afa49b2e488e7eedf79ea4f4c39db6f2f4b37764228ef3f',
			
 
				-}
			
 
				-
			
 
				-bert_base_chinese_sha = {
			
 
				-  'bert_config.json' : '7aaad0335058e2640bcb2c2e9a932b1cd9da200c46ea7b8957d54431f201c015',
			
 
				-  'bert_model.ckpt.data-00000-of-00001' : '756699356b78ad0ef1ca9ba6528297bcb3dd1aef5feadd31f4775d7c7fc989ba',
			
 
				-  'bert_model.ckpt.index' : '46315546e05ce62327b3e2cd1bed22836adcb2ff29735ec87721396edb21b82e',
			
 
				-  'bert_model.ckpt.meta' : 'c0f8d51e1ab986604bc2b25d6ec0af7fd21ff94cf67081996ec3f3bf5d823047',
			
 
				-  'vocab.txt' : '45bbac6b341c319adc98a532532882e91a9cefc0329aa57bac9ae761c27b291c',
			
 
				-}
			
 
				-
			
 
				-# Relate SHA to urls for loop below
			
 
				-model_sha = {
			
 
				-  'bert_base_uncased' : bert_base_uncased_sha,
			
 
				-  'bert_large_uncased' : bert_large_uncased_sha,
			
 
				-  'bert_base_cased' : bert_base_cased_sha,
			
 
				-  'bert_large_cased' : bert_large_cased_sha,
			
 
				-  'bert_base_multilingual_cased' : bert_base_multilingual_cased_sha,
			
 
				-  'bert_large_multilingual_uncased' : bert_large_multilingual_uncased_sha,
			
 
				-  'bert_base_chinese' : bert_base_chinese_sha
			
 
				-}
			
 
				-
			
 
				-# Helper to get sha256sum of a file
			
 
				-def sha256sum(filename):
			
 
				-  h  = hashlib.sha256()
			
 
				-  b  = bytearray(128*1024)
			
 
				-  mv = memoryview(b)
			
 
				-  with open(filename, 'rb', buffering=0) as f:
			
 
				-    for n in iter(lambda : f.readinto(mv), 0):
			
 
				-      h.update(mv[:n])
			
 
				-  return h.hexdigest()
			
 
				-
			
 
				-# Iterate over urls: download, unzip, verify sha256sum
			
 
				-found_mismatch_sha = False
			
 
				-for model in model_urls:
			
 
				-  url = model_urls[model][0]
			
 
				-  file = model_urls[model][1]
			
 
				-
			
 
				-  print("Downloading", url)
			
 
				-  response = urllib.request.urlopen(url)
			
 
				-  with open(file, "wb") as handle:
			
 
				-    handle.write(response.read())
			
 
				-
			
 
				-  print("Unzipping", file)
			
 
				-  zip = zipfile.ZipFile(file, 'r')
			
 
				-  zip.extractall()
			
 
				-  zip.close()
			
 
				-
			
 
				-  sha_dict = model_sha[model]
			
 
				-  for extracted_file in sha_dict:
			
 
				-    sha = sha_dict[extracted_file]
			
 
				-    if sha != sha256sum(file[:-4] + "/" + extracted_file):
			
 
				-      found_mismatch_sha = True
			
 
				-      print("SHA256sum does not match on file:", extracted_file, "from download url:", url)
			
 
				-    else:
			
 
				-      print(file[:-4] + "/" + extracted_file, "\t", "verified")
			
 
				-
			
 
				-if not found_mismatch_sha:
			
 
				-  print("All downloads pass sha256sum verification.")
			
 
				-
			
--- a/TensorFlow/LanguageModeling/BERT/data/squad/squad_download.sh
+++ b/TensorFlow/LanguageModeling/BERT/data/squad/squad_download.sh
@@ -1,60 +0,0 @@
 
				-#!/usr/bin/env bash
			
 
				-
			
 
				-echo "Downloading dataset for squad..."
			
 
				-
			
 
				-# Download SQuAD
			
 
				-
			
 
				-v1="v1.1"
			
 
				-mkdir $v1
			
 
				-wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json -O $v1/train-v1.1.json
			
 
				-wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json -O $v1/dev-v1.1.json
			
 
				-wget https://worksheets.codalab.org/rest/bundles/0xbcd57bee090b421c982906709c8c27e1/contents/blob/ -O $v1/evaluate-v1.1.py
			
 
				-
			
 
				-EXP_TRAIN_v1='981b29407e0affa3b1b156f72073b945  -'
			
 
				-EXP_DEV_v1='3e85deb501d4e538b6bc56f786231552  -'
			
 
				-EXP_EVAL_v1='afb04912d18ff20696f7f88eed49bea9  -'
			
 
				-CALC_TRAIN_v1=`cat ${v1}/train-v1.1.json |md5sum`
			
 
				-CALC_DEV_v1=`cat ${v1}/dev-v1.1.json |md5sum`
			
 
				-CALC_EVAL_v1=`cat ${v1}/evaluate-v1.1.py |md5sum`
			
 
				-
			
 
				-v2="v2.0"
			
 
				-mkdir $v2
			
 
				-wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json -O $v2/train-v2.0.json
			
 
				-wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json -O $v2/dev-v2.0.json
			
 
				-wget https://worksheets.codalab.org/rest/bundles/0x6b567e1cf2e041ec80d7098f031c5c9e/contents/blob/ -O $v2/evaluate-v2.0.py
			
 
				-
			
 
				-EXP_TRAIN_v2='62108c273c268d70893182d5cf8df740  -'
			
 
				-EXP_DEV_v2='246adae8b7002f8679c027697b0b7cf8  -'
			
 
				-EXP_EVAL_v2='ff23213bed5516ea4a6d9edb6cd7d627  -'
			
 
				-
			
 
				-CALC_TRAIN_v2=`cat ${v2}/train-v2.0.json |md5sum`
			
 
				-CALC_DEV_v2=`cat ${v2}/dev-v2.0.json |md5sum`
			
 
				-CALC_EVAL_v2=`cat ${v2}/evaluate-v2.0.py |md5sum`
			
 
				-
			
 
				-echo "Squad data download done!"
			
 
				-
			
 
				-echo "Verifying Dataset...."
			
 
				-
			
 
				-if [ "$EXP_TRAIN_v1" != "$CALC_TRAIN_v1" ]; then
			
 
				-    echo "train-v1.1.json is corrupted! md5sum doesn't match"
			
 
				-fi
			
 
				-
			
 
				-if [ "$EXP_DEV_v1" != "$CALC_DEV_v1" ]; then
			
 
				-    echo "dev-v1.1.json is corrupted! md5sum doesn't match"
			
 
				-fi
			
 
				-if [ "$EXP_EVAL_v1" != "$CALC_EVAL_v1" ]; then
			
 
				-    echo "evaluate-v1.1.py is corrupted! md5sum doesn't match"
			
 
				-fi
			
 
				-
			
 
				-
			
 
				-if [ "$EXP_TRAIN_v2" != "$CALC_TRAIN_v2" ]; then
			
 
				-    echo "train-v2.0.json is corrupted! md5sum doesn't match"
			
 
				-fi
			
 
				-if [ "$EXP_DEV_v2" != "$CALC_DEV_v2" ]; then
			
 
				-    echo "dev-v2.0.json is corrupted! md5sum doesn't match"
			
 
				-fi
			
 
				-if [ "$EXP_EVAL_v2" != "$CALC_EVAL_v2" ]; then
			
 
				-    echo "evaluate-v2.0.py is corrupted! md5sum doesn't match"
			
 
				-fi
			
 
				-
			
 
				-echo "SQuAD download complete!"
			
--- a/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/config.sh
+++ b/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/config.sh
@@ -1,28 +0,0 @@
 
				-#! /bin/bash
			
 
				-
			
 
				-set -e
			
 
				-
			
 
				-USE_BERT_LARGE=true
			
 
				-MAX_SEQUENCE_LENGTH=512
			
 
				-MAX_PREDICTIONS_PER_SEQUENCE=80
			
 
				-MASKED_LM_PROB=0.15
			
 
				-SEED=12345
			
 
				-DUPE_FACTOR=5
			
 
				-DO_LOWER_CASE="True"
			
 
				-N_LINES_PER_SHARD_APPROX=396000   # Default=396000 creates 256 shards
			
 
				-
			
 
				-N_PROCS_PREPROCESS=4    # Adjust this based on memory requirements and available number of cores
			
 
				-export WORKING_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
			
 
				-
			
 
				-WIKI_DUMP="ftp://ftpmirror.your.org/pub/wikimedia/dumps/enwiki/20190301/enwiki-20190301-pages-articles-multistream.xml.bz2"
			
 
				-BERT_BASE_DIR="${WORKING_DIR}/../pretrained_models_google/uncased_L-12_H-768_A-12"
			
 
				-BERT_LARGE_DIR="${WORKING_DIR}/../pretrained_models_google/uncased_L-24_H-1024_A-16"
			
 
				-
			
 
				-if [ "$USE_BERT_LARGE" = true ] ; then
			
 
				-  VOCAB_FILE="${BERT_LARGE_DIR}/vocab.txt"
			
 
				-else
			
 
				-  VOCAB_FILE="${BERT_BASE_DIR}/vocab.txt"
			
 
				-fi
			
 
				-
			
 
				-OUTPUT_DIR="${WORKING_DIR}/final_tfrecords_sharded/bert_large_wikipedia_seq_${MAX_SEQUENCE_LENGTH}_pred_${MAX_PREDICTIONS_PER_SEQUENCE}"
			
 
				-
			
--- a/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/create_pseudo_test_set.py
+++ b/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/create_pseudo_test_set.py
@@ -1,18 +0,0 @@
 
				-# NVIDIA
			
 
				-
			
 
				-import glob
			
 
				-import os
			
 
				-import random
			
 
				-import shutil
			
 
				-
			
 
				-input_dir = os.environ['WORKING_DIR'] + '/final_text_files_sharded/'
			
 
				-output_dir = os.environ['WORKING_DIR'] + '/test_set_text_files/'
			
 
				-
			
 
				-random.seed(13254)
			
 
				-n_shards_to_keep = 3
			
 
				-
			
 
				-file_glob = glob.glob(input_dir + '/*', recursive=False)
			
 
				-file_glob = random.sample(file_glob, n_shards_to_keep)
			
 
				-
			
 
				-for filename in file_glob:
			
 
				-  shutil.copy(filename, output_dir) 
			
--- a/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/create_pseudo_test_set.sh
+++ b/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/create_pseudo_test_set.sh
@@ -1,10 +0,0 @@
 
				-#! /bin/bash
			
 
				-
			
 
				-source /workspace/bert/data/wikipedia_corpus/config.sh
			
 
				-
			
 
				-# Convert test set sharded text files into tfrecords that are ready for BERT pretraining
			
 
				-echo "Creating test set tfrecords for each text shard"
			
 
				-mkdir -p ${WORKING_DIR}/test_set_text_files
			
 
				-mkdir -p ${WORKING_DIR}/test_set_tfrecords
			
 
				-python3 ${WORKING_DIR}/create_pseudo_test_set.py
			
 
				-. ${WORKING_DIR}/preprocessing_test_set_xargs_wrapper.sh ${N_PROCS_PREPROCESS}
			
--- a/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/preprocessing.sh
+++ b/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/preprocessing.sh
@@ -1,23 +0,0 @@
 
				-#! /bin/bash
			
 
				-
			
 
				-SHARD_INDEX=${1}
			
 
				-INPUT_FILE="${WORKING_DIR}/final_text_files_sharded/wikipedia.segmented.part.${SHARD_INDEX}.txt"
			
 
				-
			
 
				-source /workspace/bert/data/wikipedia_corpus/config.sh
			
 
				-
			
 
				-OUTPUT_DIR=${WORKING_DIR}/final_tfrecords_sharded
			
 
				-mkdir -p ${OUTPUT_DIR}
			
 
				-
			
 
				-OUTPUT_FILE="${OUTPUT_DIR}/tf_examples.tfrecord000${SHARD_INDEX}"
			
 
				-
			
 
				-python /workspace/bert/utils/create_pretraining_data.py \
			
 
				-  --input_file=${INPUT_FILE} \
			
 
				-  --output_file=${OUTPUT_FILE} \
			
 
				-  --vocab_file=${VOCAB_FILE} \
			
 
				-  --do_lower_case=${DO_LOWER_CASE} \
			
 
				-  --max_seq_length=${MAX_SEQUENCE_LENGTH} \
			
 
				-  --max_predictions_per_seq=${MAX_PREDICTIONS_PER_SEQUENCE} \
			
 
				-  --masked_lm_prob=${MASKED_LM_PROB} \
			
 
				-  --random_seed=${SEED} \
			
 
				-  --dupe_factor=${DUPE_FACTOR}
			
 
				-
			
--- a/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/preprocessing_test_set.sh
+++ b/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/preprocessing_test_set.sh
@@ -1,28 +0,0 @@
 
				-#! /bin/bash
			
 
				-
			
 
				-INPUT_FILE=${1}
			
 
				-
			
 
				-source /workspace/bert/data/wikipedia_corpus/config.sh
			
 
				-
			
 
				-OUTPUT_DIR=${WORKING_DIR}/test_set_tfrecords
			
 
				-mkdir -p ${OUTPUT_DIR}
			
 
				-
			
 
				-#SHARD_INDEX=$(( echo ${INPUT_FILE} | egrep -o [0-9]+ ))
			
 
				-SHARD_INDEX=$( eval echo ${INPUT_FILE} | sed -e s/[^0-9]//g )
			
 
				-OUTPUT_FILE="${OUTPUT_DIR}/tf_examples.tfrecord000${SHARD_INDEX}"
			
 
				-
			
 
				-SEED=13254
			
 
				-
			
 
				-echo "Shard index ${SHARD_INDEX}"
			
 
				-
			
 
				-python /workspace/bert/utils/create_pretraining_data.py \
			
 
				-  --input_file=${INPUT_FILE} \
			
 
				-  --output_file=${OUTPUT_FILE} \
			
 
				-  --vocab_file=${VOCAB_FILE} \
			
 
				-  --do_lower_case=${DO_LOWER_CASE} \
			
 
				-  --max_seq_length=${MAX_SEQUENCE_LENGTH} \
			
 
				-  --max_predictions_per_seq=${MAX_PREDICTIONS_PER_SEQUENCE} \
			
 
				-  --masked_lm_prob=${MASKED_LM_PROB} \
			
 
				-  --random_seed=${SEED} \
			
 
				-  --dupe_factor=${DUPE_FACTOR}
			
 
				-
			
--- a/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/preprocessing_test_set_xargs_wrapper.sh
+++ b/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/preprocessing_test_set_xargs_wrapper.sh
@@ -1,12 +0,0 @@
 
				-#! /bin/bash
			
 
				-
			
 
				-source /workspace/bert/data/wikipedia_corpus/config.sh
			
 
				-
			
 
				-SHARD_COUNT=0
			
 
				-rm -rf /workspace/bert/data/wikipedia_corpus/xarg_list.txt
			
 
				-touch /workspace/bert/data/wikipedia_corpus/xarg_list.txt
			
 
				-for file in /workspace/bert/data/wikipedia_corpus/test_set_text_files/*; do
			
 
				-  echo ${file} >> /workspace/bert/data/wikipedia_corpus/xarg_list.txt
			
 
				-done
			
 
				-
			
 
				-xargs -n 1 --max-procs=${N_PROCS_PREPROCESS} --arg-file=/workspace/bert/data/wikipedia_corpus/xarg_list.txt /workspace/bert/data/wikipedia_corpus/preprocessing_test_set.sh
			
--- a/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/preprocessing_xargs_wrapper.sh
+++ b/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/preprocessing_xargs_wrapper.sh
@@ -1,13 +0,0 @@
 
				-#! /bin/bash
			
 
				-
			
 
				-source /workspace/bert/data/wikipedia_corpus/config.sh
			
 
				-
			
 
				-SHARD_COUNT=0
			
 
				-rm -rf /workspace/bert/data/wikipedia_corpus/xarg_list.txt
			
 
				-touch /workspace/bert/data/wikipedia_corpus/xarg_list.txt
			
 
				-for file in /workspace/bert/data/wikipedia_corpus/final_text_files_sharded/*; do
			
 
				-  echo ${SHARD_COUNT} >> /workspace/bert/data/wikipedia_corpus/xarg_list.txt
			
 
				-  SHARD_COUNT=$((SHARD_COUNT+1))
			
 
				-done
			
 
				-
			
 
				-xargs -n 1 --max-procs=${N_PROCS_PREPROCESS} --arg-file=/workspace/bert/data/wikipedia_corpus/xarg_list.txt /workspace/bert/data/wikipedia_corpus/preprocessing.sh
			
--- a/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/remove_tags_and_clean.py
+++ b/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/remove_tags_and_clean.py
@@ -1,30 +0,0 @@
 
				-# NVIDIA
			
 
				-
			
 
				-import glob
			
 
				-import os
			
 
				-
			
 
				-output_file = os.environ['WORKING_DIR'] + '/intermediate_files/wikipedia.txt'
			
 
				-
			
 
				-with open(output_file, "w") as ofile:
			
 
				-  for dirname in glob.glob('extracted_articles/*/', recursive=False):
			
 
				-    for filename in glob.glob(dirname + 'wiki_*', recursive=True):
			
 
				-      print(filename)
			
 
				-      article_lines = []
			
 
				-      article_open = False
			
 
				-      
			
 
				-      with open(filename, "r") as file:
			
 
				-        for line in file:
			
 
				-          if "<doc id=" in line:
			
 
				-            article_open = True
			
 
				-          elif "</doc>" in line:
			
 
				-            article_open = False
			
 
				-            for oline in article_lines[1:]:
			
 
				-              if oline != "\n":
			
 
				-                ofile.write(oline.rstrip() + " ")
			
 
				-            ofile.write("\n\n")
			
 
				-            article_lines = []
			
 
				-          else:
			
 
				-            if article_open:
			
 
				-              article_lines.append(line)
			
 
				-            
			
 
				-
			
--- a/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/run_preprocessing.sh
+++ b/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/run_preprocessing.sh
@@ -1,49 +0,0 @@
 
				-#! /bin/bash
			
 
				-
			
 
				-source /workspace/bert/data/wikipedia_corpus/config.sh
			
 
				-
			
 
				-# Note: There are several directories created to make it clear what has been performed at each stage of preprocessing. The intermediate files may be useful if you want to further clean/prepare/augment the data for your own applications.
			
 
				-# NLTK was chosen as the default over spaCy simply due to speed of sentence segmentation on the large files.
			
 
				-
			
 
				-# Download Wikipedia dump file
			
 
				-mkdir -p ${WORKING_DIR}/download
			
 
				-
			
 
				-# Not using --noclobber since it emits an error if exists (incompatible with bash 'set -e')
			
 
				-echo "Downloading Wikidump"
			
 
				-if [ ! -f ${WORKING_DIR}/download/wikidump.xml.bz2 ]; then
			
 
				-  cd ${WORKING_DIR}/download && wget -O wikidump.xml.bz2 ${WIKI_DUMP}
			
 
				-fi
			
 
				-
			
 
				-# Extract dump
			
 
				-echo "Extracting Wikidump"
			
 
				-mkdir -p ${WORKING_DIR}/raw_data
			
 
				-#cd ${WORKING_DIR}/raw_data && pv ${WORKING_DIR}/download/wikidump.xml.bz2 | pbzip2 -kdc > ${WORKING_DIR}/raw_data/wikidump.xml
			
 
				-cd ${WORKING_DIR}/raw_data && pv ${WORKING_DIR}/download/wikidump.xml.bz2 | bunzip2 -kdc > ${WORKING_DIR}/raw_data/wikidump.xml
			
 
				-#cd ${WORKING_DIR}/raw_data && bunzip2 -kdc ${WORKING_DIR}/download/wikidump.xml.bz2 > ${WORKING_DIR}/raw_data/wikidump.xml
			
 
				- 
			
 
				-# Wikiextractor.py - Creates lots of folders/files in "doc format"
			
 
				-echo "Running Wikiextractor"
			
 
				-mkdir -p ${WORKING_DIR}/extracted_articles
			
 
				-/workspace/wikiextractor/WikiExtractor.py ${WORKING_DIR}/raw_data/wikidump.xml -b 1000M --processes ${N_PROCS_PREPROCESS} -o ${WORKING_DIR}/extracted_articles
			
 
				-
			
 
				-# Remove XML Tags and extraneous titles (since they are not sentences)
			
 
				-# Also clean to remove lines between paragraphs within article and use space-separated articles
			
 
				-echo "Cleaning and formatting files (one article per line)"
			
 
				-mkdir -p ${WORKING_DIR}/intermediate_files
			
 
				-python3 ${WORKING_DIR}/remove_tags_and_clean.py
			
 
				-
			
 
				-# Split articles into one-sentence-per-line format for use with BERT scripts
			
 
				-echo "Applying sentence segmentation to get one sentence per line"
			
 
				-mkdir -p ${WORKING_DIR}/final_text_file_single
			
 
				-python3 ${WORKING_DIR}/wiki_sentence_segmentation_nltk.py
			
 
				-# Note: NLTK can be replaced with Spacy, although it is slower (2 variations provided)
			
 
				-
			
 
				-# Shard finalized text so that it has a chance of fitting in memory when creating pretraining data into tfrecords (choose appropriate number of shards for distributed training)
			
 
				-echo "Shard text files - size is approximate to prevent splitting an article across shards"
			
 
				-mkdir -p ${WORKING_DIR}/final_text_files_sharded
			
 
				-python3 ${WORKING_DIR}/shard_text_input_file.py
			
 
				-
			
 
				-# Convert sharded text files into tfrecords that are ready for BERT pretraining
			
 
				-echo "Creating tfrecords for each text shard"
			
 
				-mkdir -p ${WORKING_DIR}/final_tfrecords_sharded
			
 
				-. ${WORKING_DIR}/preprocessing_xargs_wrapper.sh ${N_PROCS_PREPROCESS}
			
--- a/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/shard_text_input_file.py
+++ b/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/shard_text_input_file.py
@@ -1,39 +0,0 @@
 
				-# NVIDIA
			
 
				-
			
 
				-import os
			
 
				-
			
 
				-input_file = os.environ['WORKING_DIR'] + '/final_text_file_single/wikipedia.segmented.nltk.txt'
			
 
				-output_file = os.environ['WORKING_DIR'] + '/final_text_files_sharded/wikipedia.segmented.part.'
			
 
				-
			
 
				-doc_seperator = "\n"
			
 
				-
			
 
				-line_buffer = []
			
 
				-shard_size = 396000 # Approximate, will split at next article break
			
 
				-line_counter = 0
			
 
				-shard_index = 0
			
 
				-
			
 
				-ifile_lines = 0
			
 
				-with open(input_file) as ifile:
			
 
				-  for line in ifile:
			
 
				-    ifile_lines += 1
			
 
				-
			
 
				-print("Input file contains", ifile_lines, "lines.")
			
 
				-
			
 
				-iline_counter = 1
			
 
				-with open(input_file) as ifile:
			
 
				-  for line in ifile:
			
 
				-    if line_counter < shard_size and iline_counter < ifile_lines:
			
 
				-      line_buffer.append(line)
			
 
				-      line_counter += 1
			
 
				-      iline_counter += 1
			
 
				-    elif line_counter >= shard_size and line != "\n" and iline_counter < ifile_lines:
			
 
				-      line_buffer.append(line)
			
 
				-      line_counter += 1
			
 
				-      iline_counter += 1
			
 
				-    else:
			
 
				-       with open(output_file + str(shard_index) + ".txt", "w") as ofile:
			
 
				-         for oline in line_buffer:
			
 
				-           ofile.write(oline)
			
 
				-         line_buffer = []
			
 
				-         line_counter = 0
			
 
				-         shard_index += 1
			
--- a/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/wiki_sentence_segmentation_nltk.py
+++ b/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/wiki_sentence_segmentation_nltk.py
@@ -1,20 +0,0 @@
 
				-# NVIDIA
			
 
				-
			
 
				-import nltk
			
 
				-import os
			
 
				-
			
 
				-nltk.download('punkt')
			
 
				-
			
 
				-input_file = os.environ['WORKING_DIR'] + '/intermediate_files/wikipedia.txt'
			
 
				-output_file = os.environ['WORKING_DIR'] + '/final_text_file_single/wikipedia.segmented.nltk.txt'
			
 
				-
			
 
				-doc_seperator = "\n"
			
 
				-
			
 
				-with open(input_file) as ifile:
			
 
				-  with open(output_file, "w") as ofile:
			
 
				-    for line in ifile:
			
 
				-      if line != "\n":
			
 
				-        sent_list = nltk.tokenize.sent_tokenize(line)
			
 
				-        for sent in sent_list:
			
 
				-          ofile.write(sent + "\n")
			
 
				-        ofile.write(doc_seperator)
			
--- a/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/wiki_sentence_segmentation_spacy.py
+++ b/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/wiki_sentence_segmentation_spacy.py
@@ -1,22 +0,0 @@
 
				-# NVIDIA
			
 
				-
			
 
				-import os
			
 
				-import spacy
			
 
				-
			
 
				-#spacy.prefer_gpu()
			
 
				-spacy.require_gpu()
			
 
				-
			
 
				-input_file = os.environ['WORKING_DIR'] + '/intermediate_files/wikipedia.txt'
			
 
				-output_file = os.environ['WORKING_DIR'] + '/final_test_file_single/wikipedia.segmented.txt'
			
 
				-
			
 
				-nlp = spacy.load('en_core_web_sm')
			
 
				-
			
 
				-doc_seperator = "\n"
			
 
				-
			
 
				-with open(input_file) as ifile:
			
 
				-  with open(output_file, "w") as ofile:
			
 
				-    for line in ifile:
			
 
				-      if line != "\n":
			
 
				-        doc = nlp(line)
			
 
				-        for sent in doc.sents:
			
 
				-          ofile.write(sent.text + "\n")
			
--- a/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/wiki_sentence_segmentation_spacy_pipe.py
+++ b/TensorFlow/LanguageModeling/BERT/data/wikipedia_corpus/wiki_sentence_segmentation_spacy_pipe.py
@@ -1,33 +0,0 @@
 
				-# NVIDIA
			
 
				-
			
 
				-import os
			
 
				-import spacy
			
 
				-
			
 
				-#spacy.prefer_gpu()
			
 
				-spacy.require_gpu()
			
 
				-
			
 
				-input_file = os.environ['WORKING_DIR'] + '/intermediate_files/wikipedia.txt'
			
 
				-output_file = os.environ['WORKING_DIR'] + '/final_test_file_single/wikipedia.segmented.txt'
			
 
				-
			
 
				-nlp = spacy.load('en_core_web_sm')
			
 
				-
			
 
				-doc_seperator = "\n"
			
 
				-
			
 
				-file_mem = []
			
 
				-
			
 
				-print("Reading file into memory.")
			
 
				-with open(input_file) as ifile:
			
 
				-  for line in ifile:
			
 
				-    if line != "\n":
			
 
				-      file_mem.append(line)
			
 
				-
			
 
				-print("File read.")
			
 
				-print("Starting nlp.pipe")
			
 
				-docs = nlp.pipe(file_mem, batch_size=1000)
			
 
				-
			
 
				-print("Starting to write output")
			
 
				-with open(output_file, "w") as ofile:
			
 
				-  for item in docs:
			
 
				-    for sent in item.sents:
			
 
				-      if sent.text != "\n":
			
 
				-        ofile.write(sent.text + "\n")
			
--- a/TensorFlow/LanguageModeling/BERT/optimization.py
+++ b/TensorFlow/LanguageModeling/BERT/optimization.py
@@ -1,4 +1,5 @@
 
				 # coding=utf-8
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				 # Copyright 2018 The Google AI Language Team Authors.
			
 
				 #
			
 
				 # Licensed under the Apache License, Version 2.0 (the "License");
			
@@ -12,6 +13,7 @@
 
				 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				+
			
 
				 """Functions and classes related to optimization (weight updates)."""
			
 
				 
			
 
				 from __future__ import absolute_import
			
@@ -20,14 +22,25 @@ from __future__ import print_function
 
				 
			
 
				 import re
			
 
				 import tensorflow as tf
			
 
				+from tensorflow.python.ops import array_ops
			
 
				+from tensorflow.python.ops import linalg_ops
			
 
				+from tensorflow.python.ops import math_ops
			
 
				+from horovod.tensorflow.compression import Compression
			
 
				 
			
 
				-
			
 
				-def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, hvd=None, manual_fp16=False, use_fp16=False):
			
 
				+def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, hvd=None, manual_fp16=False, use_fp16=False, num_accumulation_steps=1,
			
 
				+                     optimizer_type="adam", allreduce_post_accumulation=False):
			
 
				   """Creates an optimizer training op."""
			
 
				   global_step = tf.train.get_or_create_global_step()
			
 
				-
			
 
				+  
			
 
				   # avoid step change in learning rate at end of warmup phase
			
 
				-  decayed_learning_rate_at_crossover_point = init_lr * (1.0-float(num_warmup_steps)/float(num_train_steps))
			
 
				+  if optimizer_type == "adam":
			
 
				+      power = 1.0
			
 
				+      decayed_learning_rate_at_crossover_point = init_lr * (
			
 
				+                  (1.0 - float(num_warmup_steps) / float(num_train_steps)) ** power)
			
 
				+  else:
			
 
				+      power = 0.5
			
 
				+      decayed_learning_rate_at_crossover_point = init_lr
			
 
				+
			
 
				   adjusted_init_lr = init_lr * (init_lr / decayed_learning_rate_at_crossover_point)
			
 
				   print('decayed_learning_rate_at_crossover_point = %e, adjusted_init_lr = %e' % (decayed_learning_rate_at_crossover_point, adjusted_init_lr))
			
 
				 
			
@@ -39,7 +52,7 @@ def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, hvd=None,
 
				       global_step,
			
 
				       num_train_steps,
			
 
				       end_learning_rate=0.0,
			
 
				-      power=1.0,
			
 
				+      power=power,
			
 
				       cycle=False)
			
 
				 
			
 
				   # Implements linear warmup. I.e., if global_step < num_warmup_steps, the
			
@@ -58,49 +71,120 @@ def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, hvd=None,
 
				     learning_rate = (
			
 
				         (1.0 - is_warmup) * learning_rate + is_warmup * warmup_learning_rate)
			
 
				 
			
 
				-  # It is recommended that you use this optimizer for fine tuning, since this
			
 
				-  # is how the model was trained (note that the Adam m/v variables are NOT
			
 
				-  # loaded from init_checkpoint.)
			
 
				-  optimizer = AdamWeightDecayOptimizer(
			
 
				-      learning_rate=learning_rate,
			
 
				-      weight_decay_rate=0.01,
			
 
				-      beta_1=0.9,
			
 
				-      beta_2=0.999,
			
 
				-      epsilon=1e-6,
			
 
				-      exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"])
			
 
				-
			
 
				-  if hvd is not None:
			
 
				-    from horovod.tensorflow.compression import Compression
			
 
				-    optimizer = hvd.DistributedOptimizer(optimizer, sparse_as_dense=True, compression=Compression.none)
			
 
				+  if optimizer_type == "lamb":
			
 
				+      print("Initializing LAMB Optimizer")
			
 
				+      optimizer = LAMBOptimizer(
			
 
				+          learning_rate=learning_rate,
			
 
				+          weight_decay_rate=0.01,
			
 
				+          beta_1=0.9,
			
 
				+          beta_2=0.999,
			
 
				+          epsilon=1e-6,
			
 
				+          exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"])
			
 
				+  else:
			
 
				+      print("Initializing ADAM Weight Decay Optimizer")
			
 
				+      # It is recommended that you use this optimizer for fine tuning, since this
			
 
				+      # is how the model was trained (note that the Adam m/v variables are NOT
			
 
				+      # loaded from init_checkpoint.)
			
 
				+      optimizer = AdamWeightDecayOptimizer(
			
 
				+          learning_rate=learning_rate,
			
 
				+          weight_decay_rate=0.01,
			
 
				+          beta_1=0.9,
			
 
				+          beta_2=0.999,
			
 
				+          epsilon=1e-6,
			
 
				+          exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"])
			
 
				+
			
 
				+  if hvd is not None and (num_accumulation_steps == 1 or (not allreduce_post_accumulation)):
			
 
				+    optimizer = hvd.DistributedOptimizer(optimizer, sparse_as_dense=True, compression=Compression.fp16 if use_fp16 or manual_fp16 else Compression.none)
			
 
				   if manual_fp16 or use_fp16:
			
 
				     loss_scale_manager = tf.contrib.mixed_precision.ExponentialUpdateLossScaleManager(init_loss_scale=2**32, incr_every_n_steps=1000, decr_every_n_nan_or_inf=2, decr_ratio=0.5)
			
 
				     optimizer = tf.contrib.mixed_precision.LossScaleOptimizer(optimizer, loss_scale_manager)
			
 
				 
			
 
				   tvars = tf.trainable_variables()
			
 
				-  grads_and_vars = optimizer.compute_gradients(loss, tvars)
			
 
				-  grads_and_vars = [(g,v) for g,v in grads_and_vars if g is not None]
			
 
				-  grads, tvars = list(zip(*grads_and_vars))
			
 
				-  all_are_finite = tf.reduce_all([tf.reduce_all(tf.is_finite(g)) for g in grads]) if manual_fp16 or use_fp16 else tf.constant(True, dtype=tf.bool)
			
 
				-
			
 
				-  # This is how the model was pre-trained.
			
 
				-  # ensure global norm is a finite number 
			
 
				-  # to prevent clip_by_global_norm from having a hizzy fit.
			
 
				-  (clipped_grads, _) = tf.clip_by_global_norm(
			
 
				-        grads, clip_norm=1.0, 
			
 
				-        use_norm=tf.cond(
			
 
				-            all_are_finite,
			
 
				-            lambda: tf.global_norm(grads),
			
 
				-            lambda: tf.constant(1.0)))
			
 
				-
			
 
				-  train_op = optimizer.apply_gradients(
			
 
				-      list(zip(clipped_grads, tvars)), global_step=global_step)
			
 
				-
			
 
				-  # Normally the global step update is done inside of `apply_gradients`.
			
 
				-  # However, `AdamWeightDecayOptimizer` doesn't do this. But if you use
			
 
				-  # a different optimizer, you should probably take this line out.
			
 
				-  new_global_step = tf.cond(all_are_finite, lambda: global_step+1, lambda: global_step)
			
 
				-  new_global_step = tf.identity(new_global_step, name='step_update')
			
 
				-  train_op = tf.group(train_op, [global_step.assign(new_global_step)])
			
 
				+  grads_and_vars = optimizer.compute_gradients(loss * 1.0 / num_accumulation_steps, tvars)
			
 
				+
			
 
				+  if num_accumulation_steps > 1:
			
 
				+      local_step = tf.get_variable(name="local_step", shape=[], dtype=tf.int32, trainable=False,
			
 
				+                                   initializer=tf.zeros_initializer)
			
 
				+      batch_finite = tf.get_variable(name="batch_finite", shape=[], dtype=tf.bool, trainable=False,
			
 
				+                                     initializer=tf.ones_initializer)
			
 
				+      accum_vars = [tf.get_variable(
			
 
				+          name=tvar.name.split(":")[0] + "/accum",
			
 
				+          shape=tvar.shape.as_list(),
			
 
				+          dtype=tf.float32,
			
 
				+          trainable=False,
			
 
				+          initializer=tf.zeros_initializer()) for tvar in tf.trainable_variables()]
			
 
				+
			
 
				+      reset_step = tf.cast(tf.math.equal(local_step % num_accumulation_steps, 0), dtype=tf.bool)
			
 
				+      local_step = tf.cond(reset_step, lambda:local_step.assign(tf.ones_like(local_step)), lambda:local_step.assign_add(1))
			
 
				+
			
 
				+      grads_and_vars_and_accums = [(gv[0],gv[1],accum_vars[i]) for i, gv in enumerate(grads_and_vars) if gv[0] is not None]
			
 
				+      grads, tvars, accum_vars = list(zip(*grads_and_vars_and_accums))
			
 
				+
			
 
				+      all_are_finite = tf.reduce_all([tf.reduce_all(tf.is_finite(g)) for g in grads]) if manual_fp16 or use_fp16 else tf.constant(True, dtype=tf.bool)
			
 
				+      batch_finite = tf.cond(reset_step,
			
 
				+        lambda: batch_finite.assign(tf.math.logical_and(tf.constant(True, dtype=tf.bool), all_are_finite)),
			
 
				+        lambda:batch_finite.assign(tf.math.logical_and(batch_finite, all_are_finite)))
			
 
				+
			
 
				+      # This is how the model was pre-trained.
			
 
				+      # ensure global norm is a finite number
			
 
				+      # to prevent clip_by_global_norm from having a hizzy fit.
			
 
				+      (clipped_grads, _) = tf.clip_by_global_norm(
			
 
				+            grads, clip_norm=1.0,
			
 
				+            use_norm=tf.cond(
			
 
				+                all_are_finite,
			
 
				+                lambda: tf.global_norm(grads),
			
 
				+                lambda: tf.constant(1.0)))
			
 
				+
			
 
				+      accum_vars = tf.cond(reset_step,
			
 
				+              lambda: [accum_vars[i].assign(grad) for i, grad in enumerate(clipped_grads)],
			
 
				+              lambda: [accum_vars[i].assign_add(grad) for i, grad in enumerate(clipped_grads)])
			
 
				+
			
 
				+      def update(accum_vars):
			
 
				+          if allreduce_post_accumulation and hvd is not None:
			
 
				+              accum_vars = [hvd.allreduce(tf.convert_to_tensor(accum_var), compression=Compression.fp16 if use_fp16 or manual_fp16 else Compression.none) if isinstance(accum_var, tf.IndexedSlices)
			
 
				+                            else hvd.allreduce(accum_var, compression=Compression.fp16 if use_fp16 or manual_fp16 else Compression.none) for accum_var in accum_vars]
			
 
				+          return optimizer.apply_gradients(list(zip(accum_vars, tvars)), global_step=global_step)
			
 
				+
			
 
				+      update_step = tf.identity(tf.cast(tf.math.equal(local_step % num_accumulation_steps, 0), dtype=tf.bool), name="update_step")
			
 
				+      update_op = tf.cond(update_step,
			
 
				+                          lambda: update(accum_vars), lambda: tf.no_op())
			
 
				+
			
 
				+      # Normally the global step update is done inside of `apply_gradients`.
			
 
				+      # However, `AdamWeightDecayOptimizer` doesn't do this. But if you use
			
 
				+      # a different optimizer, you should probably take this line out.
			
 
				+      # new_global_step = tf.identity(tf.cond(tf.math.logical_and(update_step, batch_finite), lambda: global_step.assign_add(1), lambda: global_step.assign(global_step)), name='step_update')
			
 
				+      # train_op = tf.group(update_op, new_global_step)
			
 
				+      new_global_step = tf.cond(tf.math.logical_and(update_step, batch_finite), lambda: global_step+1, lambda: global_step)
			
 
				+      new_global_step = tf.identity(new_global_step, name='step_update')
			
 
				+      train_op = tf.group(update_op, [global_step.assign(new_global_step)])
			
 
				+  else:
			
 
				+      grads_and_vars = [(g, v) for g, v in grads_and_vars if g is not None]
			
 
				+      grads, tvars = list(zip(*grads_and_vars))
			
 
				+      all_are_finite = tf.reduce_all(
			
 
				+          [tf.reduce_all(tf.is_finite(g)) for g in grads]) if use_fp16 or manual_fp16 else tf.constant(True, dtype=tf.bool)
			
 
				+
			
 
				+      # This is how the model was pre-trained.
			
 
				+      # ensure global norm is a finite number
			
 
				+      # to prevent clip_by_global_norm from having a hizzy fit.
			
 
				+      (clipped_grads, _) = tf.clip_by_global_norm(
			
 
				+          grads, clip_norm=1.0,
			
 
				+          use_norm=tf.cond(
			
 
				+              all_are_finite,
			
 
				+              lambda: tf.global_norm(grads),
			
 
				+              lambda: tf.constant(1.0)))
			
 
				+
			
 
				+      train_op = optimizer.apply_gradients(
			
 
				+          list(zip(clipped_grads, tvars)), global_step=global_step)
			
 
				+
			
 
				+      # Normally the global step update is done inside of `apply_gradients`.
			
 
				+      # However, `AdamWeightDecayOptimizer` doesn't do this. But if you use
			
 
				+      # a different optimizer, you should probably take this line out.
			
 
				+      new_global_step = tf.cond(all_are_finite, lambda: global_step + 1, lambda: global_step)
			
 
				+      new_global_step = tf.identity(new_global_step, name='step_update')
			
 
				+      train_op = tf.group(train_op, [global_step.assign(new_global_step)])
			
 
				+
			
 
				+      # new_global_step = tf.identity(tf.cond(all_are_finite, lambda: global_step.assign_add(1), lambda: global_step.assign(global_step)), name='step_update')
			
 
				+      # train_op = tf.group(update_op, new_global_step)
			
 
				   return train_op
			
 
				 
			
 
				 
			
@@ -206,3 +290,120 @@ class AdamWeightDecayOptimizer(tf.train.Optimizer):
 
				     if m is not None:
			
 
				       param_name = m.group(1)
			
 
				     return param_name
			
 
				+
			
 
				+
			
 
				+class LAMBOptimizer(tf.train.Optimizer):
			
 
				+  """A LAMB optimizer that includes "correct" L2 weight decay."""
			
 
				+
			
 
				+  def __init__(self,
			
 
				+               learning_rate,
			
 
				+               weight_decay_rate=0.0,
			
 
				+               beta_1=0.9,
			
 
				+               beta_2=0.999,
			
 
				+               epsilon=1e-6,
			
 
				+               exclude_from_weight_decay=None,
			
 
				+               name="LAMBOptimizer"):
			
 
				+    """Constructs a LAMBOptimizer."""
			
 
				+    super(LAMBOptimizer, self).__init__(False, name)
			
 
				+
			
 
				+    self.learning_rate = tf.identity(learning_rate, name='learning_rate')
			
 
				+    self.weight_decay_rate = weight_decay_rate
			
 
				+    self.beta_1 = beta_1
			
 
				+    self.beta_2 = beta_2
			
 
				+    self.epsilon = epsilon
			
 
				+    self.exclude_from_weight_decay = exclude_from_weight_decay
			
 
				+    self.steps = 0
			
 
				+
			
 
				+  def apply_gradients(self, grads_and_vars, global_step=None, name=None,
			
 
				+      manual_fp16=False):
			
 
				+    """See base class."""
			
 
				+    assignments = []
			
 
				+    for (grad, param) in grads_and_vars:
			
 
				+      if grad is None or param is None:
			
 
				+        continue
			
 
				+
			
 
				+      param_name = self._get_variable_name(param.name)
			
 
				+      has_shadow = manual_fp16 and param.dtype.base_dtype != tf.float32
			
 
				+      if has_shadow:
			
 
				+        # create shadow fp32 weights for fp16 variable
			
 
				+        param_fp32 = tf.get_variable(
			
 
				+            name=param_name + "/shadow",
			
 
				+            dtype=tf.float32,
			
 
				+            trainable=False,
			
 
				+            initializer=tf.cast(param.initialized_value(),tf.float32))
			
 
				+      else:
			
 
				+        param_fp32 = param
			
 
				+
			
 
				+      m = tf.get_variable(
			
 
				+          name=param_name + "/adam_m",
			
 
				+          shape=param.shape.as_list(),
			
 
				+          dtype=tf.float32,
			
 
				+          trainable=False,
			
 
				+          initializer=tf.zeros_initializer())
			
 
				+      v = tf.get_variable(
			
 
				+          name=param_name + "/adam_v",
			
 
				+          shape=param.shape.as_list(),
			
 
				+          dtype=tf.float32,
			
 
				+          trainable=False,
			
 
				+          initializer=tf.zeros_initializer())
			
 
				+
			
 
				+      # LAMB update
			
 
				+      next_m = (
			
 
				+          tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, grad))
			
 
				+      next_v = (
			
 
				+          tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2,
			
 
				+                                                    tf.square(grad)))
			
 
				+
			
 
				+      self.steps += 1
			
 
				+      beta1_correction = (1 - self.beta_1 ** self.steps)
			
 
				+      beta2_correction = (1 - self.beta_2 ** self.steps)
			
 
				+
			
 
				+      next_m_unbiased = next_m / beta1_correction
			
 
				+      next_v_unbiased = next_v / beta2_correction
			
 
				+
			
 
				+      update = next_m_unbiased / (tf.sqrt(next_v_unbiased) + self.epsilon)
			
 
				+
			
 
				+      # Just adding the square of the weights to the loss function is *not*
			
 
				+      # the correct way of using L2 regularization/weight decay with Adam,
			
 
				+      # since that will interact with the m and v parameters in strange ways.
			
 
				+      #
			
 
				+      # Instead we want ot decay the weights in a manner that doesn't interact
			
 
				+      # with the m/v parameters. This is equivalent to adding the square
			
 
				+      # of the weights to the loss with plain (non-momentum) SGD.
			
 
				+      if self._do_use_weight_decay(param_name):
			
 
				+        update += self.weight_decay_rate * param_fp32
			
 
				+
			
 
				+      w_norm = linalg_ops.norm(param, ord=2)
			
 
				+      g_norm = linalg_ops.norm(update, ord=2)
			
 
				+      ratio = array_ops.where(math_ops.greater(w_norm, 0), array_ops.where(
			
 
				+          math_ops.greater(g_norm, 0), (w_norm / g_norm), 1.0), 1.0)
			
 
				+
			
 
				+      update_with_lr = ratio * self.learning_rate * update
			
 
				+
			
 
				+      next_param = param_fp32 - update_with_lr
			
 
				+
			
 
				+      if has_shadow:
			
 
				+        # cast shadow fp32 weights to fp16 and assign to trainable variable
			
 
				+        param.assign(tf.cast(next_param, param.dtype.base_dtype))
			
 
				+      assignments.extend(
			
 
				+          [param_fp32.assign(next_param),
			
 
				+           m.assign(next_m),
			
 
				+           v.assign(next_v)])
			
 
				+    return tf.group(*assignments, name=name)
			
 
				+
			
 
				+  def _do_use_weight_decay(self, param_name):
			
 
				+    """Whether to use L2 weight decay for `param_name`."""
			
 
				+    if not self.weight_decay_rate:
			
 
				+      return False
			
 
				+    if self.exclude_from_weight_decay:
			
 
				+      for r in self.exclude_from_weight_decay:
			
 
				+        if re.search(r, param_name) is not None:
			
 
				+          return False
			
 
				+    return True
			
 
				+
			
 
				+  def _get_variable_name(self, param_name):
			
 
				+    """Get the variable name from the tensor name."""
			
 
				+    m = re.match("^(.*):\\d+$", param_name)
			
 
				+    if m is not None:
			
 
				+      param_name = m.group(1)
			
 
				+    return param_name
			
--- a/TensorFlow/LanguageModeling/BERT/run.sub
+++ b/TensorFlow/LanguageModeling/BERT/run.sub
@@ -0,0 +1,73 @@
 
				+#!/bin/bash
			
 
				+#SBATCH --exclusive
			
 
				+#SBATCH --mem=0
			
 
				+#SBATCH --overcommit
			
 
				+
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+set -eux
			
 
				+
			
 
				+readonly docker_image="nvcr.io/nvidia/tensorflow:19.08-py3"
			
 
				+readonly datadir="/raid/data/bert"
			
 
				+readonly checkpointdir="$PWD/checkpoints"
			
 
				+
			
 
				+readonly mounts=".:/workspace/bert,${datadir}:/workspace/bert/data,${checkpointdir}:/results"
			
 
				+
			
 
				+
			
 
				+srun --ntasks="${SLURM_JOB_NUM_NODES}" --ntasks-per-node=1 mkdir -p "${checkpointdir}/phase_1"
			
 
				+srun --ntasks="${SLURM_JOB_NUM_NODES}" --ntasks-per-node=1 mkdir -p "${checkpointdir}/phase_2"
			
 
				+
			
 
				+PHASE1="\
			
 
				+     --train_batch_size=${BATCHSIZE:-16} \
			
 
				+     --learning_rate=${LEARNING_RATE:-1.875e-4} \
			
 
				+     --num_accumulation_steps=${NUM_ACCUMULATION_STEPS:-128} \
			
 
				+     --input_files_dir=/workspace/bert/data/tfrecord/lower_case_1_seq_len_128_max_pred_20_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5_shard_1472_test_split_10/books_wiki_en_corpus/training \
			
 
				+     --eval_files_dir=/workspace/bert/data/tfrecord/lower_case_1_seq_len_128_max_pred_20_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5_shard_1472_test_split_10/books_wiki_en_corpus/test \
			
 
				+     --max_seq_length=128 \
			
 
				+     --max_predictions_per_seq=20 \
			
 
				+     --num_train_steps=7038 \
			
 
				+     --num_warmup_steps=2000 \
			
 
				+     --output_dir=/results/phase_1 \
			
 
				+     "
			
 
				+
			
 
				+PHASE2="\
			
 
				+     --train_batch_size=${BATCHSIZE:-2} \
			
 
				+     --learning_rate=${LEARNING_RATE:-1.25e-4} \
			
 
				+     --num_accumulation_steps=${NUM_ACCUMULATION_STEPS:-512} \
			
 
				+     --input_files_dir=/workspace/bert/data/tfrecord/lower_case_1_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5_shard_1472_test_split_10/books_wiki_en_corpus/training \
			
 
				+     --eval_files_dir=/workspace/bert/data/tfrecord/lower_case_1_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5_shard_1472_test_split_10/books_wiki_en_corpus/test \
			
 
				+     --max_seq_length=512 \
			
 
				+     --max_predictions_per_seq=80 \
			
 
				+     --num_train_steps=1564 \
			
 
				+     --num_warmup_steps=200 \
			
 
				+     --output_dir=/results/phase_2 \
			
 
				+     --init_checkpoint=/results/phase_1/model.ckpt-7038 \
			
 
				+    "
			
 
				+
			
 
				+PHASES=( "$PHASE1" "$PHASE2" )
			
 
				+
			
 
				+PHASE=${PHASE:-1}
			
 
				+
			
 
				+BERT_CMD="\
			
 
				+    python /workspace/bert/run_pretraining.py \
			
 
				+     ${PHASES[$((PHASE-1))]} \
			
 
				+     --bert_config_file=/workspace/bert/data/download/google_pretrained_weights/uncased_L-24_H-1024_A-16/bert_config.json \
			
 
				+     --do_train=True \
			
 
				+     --do_eval=True \
			
 
				+     --save_checkpoints_steps=100 \
			
 
				+     --horovod --use_fp16 --use_xla \
			
 
				+     --allreduce_post_accumulation=True \
			
 
				+     --eval_batch_size=8"
			
 
				+
			
 
				+srun --mpi=pmi2 -l --container-image="${docker_image}" --container-mounts="${mounts}" bash -c "${BERT_CMD}"
			
--- a/TensorFlow/LanguageModeling/BERT/run_classifier.py
+++ b/TensorFlow/LanguageModeling/BERT/run_classifier.py
@@ -1,4 +1,5 @@
 
				 # coding=utf-8
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				 # Copyright 2018 The Google AI Language Team Authors.
			
 
				 #
			
 
				 # Licensed under the Apache License, Version 2.0 (the "License");
			
@@ -12,6 +13,7 @@
 
				 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				+
			
 
				 """BERT finetuning runner."""
			
 
				 
			
 
				 from __future__ import absolute_import
			
@@ -103,7 +105,9 @@ flags.DEFINE_integer("save_checkpoints_steps", 1000,
 
				 
			
 
				 flags.DEFINE_integer("iterations_per_loop", 1000,
			
 
				                      "How many steps to make in each estimator call.")
			
 
				-
			
 
				+flags.DEFINE_integer("num_accumulation_steps", 1,
			
 
				+                     "Number of accumulation steps before gradient update" 
			
 
				+                      "Global batch size = num_accumulation_steps * train_batch_size")
			
 
				 flags.DEFINE_bool("use_fp16", False, "Whether to use fp32 or fp16 arithmetic on GPU.")
			
 
				 
			
 
				 flags.DEFINE_bool("use_xla", False, "Whether to enable XLA JIT compilation.")
			
@@ -264,7 +268,7 @@ def get_frozen_tftrt_model(bert_config, shape, num_labels, use_one_hot_embedding
 
				 
			
 
				 
			
 
				 
			
 
				-def model_fn_builder(bert_config, num_labels, init_checkpoint, learning_rate,
			
 
				+def model_fn_builder(task_name, bert_config, num_labels, init_checkpoint, learning_rate,
			
 
				                      num_train_steps, num_warmup_steps,
			
 
				                      use_one_hot_embeddings, hvd=None):
			
 
				   """Returns `model_fn` closure for Estimator."""
			
@@ -272,6 +276,25 @@ def model_fn_builder(bert_config, num_labels, init_checkpoint, learning_rate,
 
				   def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
			
 
				     """The `model_fn` for Estimator."""
			
 
				 
			
 
				+    def metric_fn(per_example_loss, label_ids, logits):
			
 
				+        predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
			
 
				+        if task_name == "cola":
			
 
				+            FN, FN_op = tf.metrics.false_negatives(labels=label_ids, predictions=predictions)
			
 
				+            FP, FP_op = tf.metrics.false_positives(labels=label_ids, predictions=predictions)
			
 
				+            TP, TP_op = tf.metrics.true_positives(labels=label_ids, predictions=predictions)
			
 
				+            TN, TN_op = tf.metrics.true_negatives(labels=label_ids, predictions=predictions)
			
 
				+
			
 
				+            MCC = (TP * TN - FP * FN) / ((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)) ** 0.5
			
 
				+            MCC_op = tf.group(FN_op, TN_op, TP_op, FP_op, tf.identity(MCC, name="MCC"))
			
 
				+            return {"MCC": (MCC, MCC_op)}
			
 
				+        else:
			
 
				+            accuracy = tf.metrics.accuracy(
			
 
				+                labels=label_ids, predictions=predictions)
			
 
				+            loss = tf.metrics.mean(values=per_example_loss)
			
 
				+            return {
			
 
				+                "eval_accuracy": accuracy,
			
 
				+                "eval_loss": loss,
			
 
				+            }
			
 
				     tf.logging.info("*** Features ***")
			
 
				     for name in sorted(features.keys()):
			
 
				       tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))
			
@@ -294,16 +317,6 @@ def model_fn_builder(bert_config, num_labels, init_checkpoint, learning_rate,
 
				             output_spec = tf.estimator.EstimatorSpec(
			
 
				                 mode=mode, predictions=predictions)
			
 
				         elif mode == tf.estimator.ModeKeys.EVAL:
			
 
				-            def metric_fn(per_example_loss, label_ids, logits):
			
 
				-              predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
			
 
				-              accuracy = tf.metrics.accuracy(
			
 
				-                  labels=label_ids, predictions=predictions)
			
 
				-              loss = tf.metrics.mean(values=per_example_loss)
			
 
				-              return {
			
 
				-                  "eval_accuracy": accuracy,
			
 
				-                  "eval_loss": loss,
			
 
				-              }
			
 
				-
			
 
				             eval_metric_ops = metric_fn(per_example_loss, label_ids, logits)
			
 
				             output_spec = tf.estimator.EstimatorSpec(
			
 
				                 mode=mode,
			
@@ -335,23 +348,13 @@ def model_fn_builder(bert_config, num_labels, init_checkpoint, learning_rate,
 
				 
			
 
				       train_op = optimization.create_optimizer(
			
 
				           total_loss, learning_rate, num_train_steps, num_warmup_steps,
			
 
				-          hvd, FLAGS.use_fp16)
			
 
				+          hvd, False, FLAGS.use_fp16, FLAGS.num_accumulation_steps)
			
 
				 
			
 
				       output_spec = tf.estimator.EstimatorSpec(
			
 
				           mode=mode,
			
 
				           loss=total_loss,
			
 
				           train_op=train_op)
			
 
				     elif mode == tf.estimator.ModeKeys.EVAL:
			
 
				-
			
 
				-      def metric_fn(per_example_loss, label_ids, logits):
			
 
				-        predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
			
 
				-        accuracy = tf.metrics.accuracy(label_ids, predictions)
			
 
				-        loss = tf.metrics.mean(per_example_loss)
			
 
				-        return {
			
 
				-            "eval_accuracy": accuracy,
			
 
				-            "eval_loss": loss,
			
 
				-        }
			
 
				-
			
 
				       eval_metric_ops = metric_fn(per_example_loss, label_ids, logits)
			
 
				       output_spec = tf.estimator.EstimatorSpec(
			
 
				           mode=mode,
			
@@ -424,7 +427,8 @@ def main(_):
 
				 
			
 
				   if FLAGS.horovod:
			
 
				     hvd.init()
			
 
				-
			
 
				+  if FLAGS.use_fp16:
			
 
				+    os.environ["TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE"] = "1"
			
 
				   processors = {
			
 
				       "cola": ColaProcessor,
			
 
				       "mnli": MnliProcessor,
			
@@ -460,7 +464,7 @@ def main(_):
 
				 
			
 
				   master_process = True
			
 
				   training_hooks = []
			
 
				-  global_batch_size = FLAGS.train_batch_size
			
 
				+  global_batch_size = FLAGS.train_batch_size * FLAGS.num_accumulation_steps
			
 
				   hvd_rank = 0
			
 
				 
			
 
				   config = tf.ConfigProto()
			
@@ -468,7 +472,7 @@ def main(_):
 
				 
			
 
				       tf.logging.info("Multi-GPU training with TF Horovod")
			
 
				       tf.logging.info("hvd.size() = %d hvd.rank() = %d", hvd.size(), hvd.rank())
			
 
				-      global_batch_size = FLAGS.train_batch_size * hvd.size()
			
 
				+      global_batch_size = FLAGS.train_batch_size * FLAGS.num_accumulation_steps * hvd.size()
			
 
				       master_process = (hvd.rank() == 0)
			
 
				       hvd_rank = hvd.rank()
			
 
				       config.gpu_options.allow_growth = True
			
@@ -517,6 +521,7 @@ def main(_):
 
				         end_index = start_index + (num_examples_per_rank)
			
 
				 
			
 
				   model_fn = model_fn_builder(
			
 
				+      task_name=task_name,
			
 
				       bert_config=bert_config,
			
 
				       num_labels=len(label_list),
			
 
				       init_checkpoint=FLAGS.init_checkpoint,
			
@@ -700,4 +705,4 @@ if __name__ == "__main__":
 
				   flags.mark_flag_as_required("vocab_file")
			
 
				   flags.mark_flag_as_required("bert_config_file")
			
 
				   flags.mark_flag_as_required("output_dir")
			
 
				-  tf.app.run()
			
 
				+  tf.app.run()
			
--- a/TensorFlow/LanguageModeling/BERT/run_pretraining.py
+++ b/TensorFlow/LanguageModeling/BERT/run_pretraining.py
@@ -1,4 +1,5 @@
 
				 # coding=utf-8
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				 # Copyright 2018 The Google AI Language Team Authors.
			
 
				 #
			
 
				 # Licensed under the Apache License, Version 2.0 (the "License");
			
@@ -12,6 +13,7 @@
 
				 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				+
			
 
				 """Run masked LM/next sentence masked_lm pre-training for BERT."""
			
 
				 
			
 
				 from __future__ import absolute_import
			
@@ -23,6 +25,7 @@ import time
 
				 import modeling
			
 
				 import optimization
			
 
				 import tensorflow as tf
			
 
				+import glob
			
 
				 
			
 
				 flags = tf.flags
			
 
				 
			
@@ -35,8 +38,12 @@ flags.DEFINE_string(
 
				     "This specifies the model architecture.")
			
 
				 
			
 
				 flags.DEFINE_string(
			
 
				-    "input_file", None,
			
 
				-    "Input TF example files (can be a glob or comma separated).")
			
 
				+    "input_files_dir", None,
			
 
				+    "Directory with input files, comma separated or single directory.")
			
 
				+
			
 
				+flags.DEFINE_string(
			
 
				+    "eval_files_dir", None,
			
 
				+    "Directory with eval files, comma separated or single directory. ")
			
 
				 
			
 
				 flags.DEFINE_string(
			
 
				     "output_dir", None,
			
@@ -47,6 +54,10 @@ flags.DEFINE_string(
 
				     "init_checkpoint", None,
			
 
				     "Initial checkpoint (usually from a pre-trained BERT model).")
			
 
				 
			
 
				+flags.DEFINE_string(
			
 
				+    "optimizer_type", "lamb",
			
 
				+    "Optimizer used for training - LAMB or ADAM")
			
 
				+
			
 
				 flags.DEFINE_integer(
			
 
				     "max_seq_length", 512,
			
 
				     "The maximum total input sequence length after WordPiece tokenization. "
			
@@ -74,15 +85,27 @@ flags.DEFINE_integer("num_warmup_steps", 10000, "Number of warmup steps.")
 
				 
			
 
				 flags.DEFINE_integer("save_checkpoints_steps", 1000,
			
 
				                      "How often to save the model checkpoint.")
			
 
				+flags.DEFINE_integer("display_loss_steps", 10,
			
 
				+                     "How often to print loss")
			
 
				 
			
 
				 flags.DEFINE_integer("iterations_per_loop", 1000,
			
 
				                      "How many steps to make in each estimator call.")
			
 
				 
			
 
				 flags.DEFINE_integer("max_eval_steps", 100, "Maximum number of eval steps.")
			
 
				 
			
 
				+flags.DEFINE_integer("num_accumulation_steps", 1,
			
 
				+                     "Number of accumulation steps before gradient update." 
			
 
				+                      "Global batch size = num_accumulation_steps * train_batch_size")
			
 
				+
			
 
				+flags.DEFINE_bool("allreduce_post_accumulation", False, "Whether to all reduce after accumulation of N steps or after each step")
			
 
				+
			
 
				+flags.DEFINE_bool(
			
 
				+    "verbose_logging", False,
			
 
				+    "If true, all of the trainable parameters are printed")
			
 
				+
			
 
				 flags.DEFINE_bool("horovod", False, "Whether to use Horovod for multi-gpu runs")
			
 
				 
			
 
				-flags.DEFINE_bool("report_loss", False, "Whether to report total loss during training.")
			
 
				+flags.DEFINE_bool("report_loss", True, "Whether to report total loss during training.")
			
 
				 
			
 
				 flags.DEFINE_bool("manual_fp16", False, "Whether to use fp32 or fp16 arithmetic on GPU. "
			
 
				                                         "Manual casting is done instead of using AMP")
			
@@ -93,52 +116,83 @@ flags.DEFINE_bool("use_fp16", False, "Whether to enable AMP ops.")
 
				 
			
 
				 # report samples/sec, total loss and learning rate during training
			
 
				 class _LogSessionRunHook(tf.train.SessionRunHook):
			
 
				-  def __init__(self, global_batch_size, display_every=10, hvd_rank=-1):
			
 
				+  def __init__(self, global_batch_size, num_accumulation_steps, display_every=10, hvd_rank=-1):
			
 
				     self.global_batch_size = global_batch_size
			
 
				     self.display_every = display_every
			
 
				     self.hvd_rank = hvd_rank
			
 
				+    self.num_accumulation_steps = num_accumulation_steps
			
 
				   def after_create_session(self, session, coord):
			
 
				     self.elapsed_secs = 0.
			
 
				     self.count = 0
			
 
				+    self.all_count = 0
			
 
				+    self.avg_loss = 0.0
			
 
				+
			
 
				   def before_run(self, run_context):
			
 
				     self.t0 = time.time()
			
 
				-    if FLAGS.manual_fp16 or FLAGS.use_fp16:
			
 
				-      return tf.train.SessionRunArgs(
			
 
				-          fetches=['step_update:0', 'total_loss:0',
			
 
				-                   'learning_rate:0', 'nsp_loss:0',
			
 
				-                   'mlm_loss:0', 'loss_scale:0'])
			
 
				+    if self.num_accumulation_steps <= 1:
			
 
				+        if FLAGS.manual_fp16 or FLAGS.use_fp16:
			
 
				+            return tf.train.SessionRunArgs(
			
 
				+                fetches=['step_update:0', 'total_loss:0',
			
 
				+                         'learning_rate:0', 'nsp_loss:0',
			
 
				+                         'mlm_loss:0', 'loss_scale:0'])
			
 
				+        else:
			
 
				+            return tf.train.SessionRunArgs(
			
 
				+                fetches=['step_update:0', 'total_loss:0',
			
 
				+                         'learning_rate:0', 'nsp_loss:0',
			
 
				+                         'mlm_loss:0'])
			
 
				     else:
			
 
				-      return tf.train.SessionRunArgs(
			
 
				-          fetches=['step_update:0', 'total_loss:0',
			
 
				-                   'learning_rate:0', 'nsp_loss:0',
			
 
				-                   'mlm_loss:0'])
			
 
				+        if FLAGS.manual_fp16 or FLAGS.use_fp16:
			
 
				+          return tf.train.SessionRunArgs(
			
 
				+              fetches=['step_update:0', 'update_step:0', 'total_loss:0',
			
 
				+                       'learning_rate:0', 'nsp_loss:0',
			
 
				+                       'mlm_loss:0', 'loss_scale:0'])
			
 
				+        else:
			
 
				+          return tf.train.SessionRunArgs(
			
 
				+              fetches=['step_update:0', 'update_step:0', 'total_loss:0',
			
 
				+                       'learning_rate:0', 'nsp_loss:0',
			
 
				+                       'mlm_loss:0'])
			
 
				   def after_run(self, run_context, run_values):
			
 
				     self.elapsed_secs += time.time() - self.t0
			
 
				-    self.count += 1
			
 
				-    if FLAGS.manual_fp16 or FLAGS.use_fp16:
			
 
				-      global_step, total_loss, lr, nsp_loss, mlm_loss, loss_scaler = run_values.results
			
 
				+    if self.num_accumulation_steps <=1:
			
 
				+        if FLAGS.manual_fp16 or FLAGS.use_fp16:
			
 
				+            global_step, total_loss, lr, nsp_loss, mlm_loss, loss_scaler = run_values.results
			
 
				+        else:
			
 
				+            global_step, total_loss, lr, nsp_loss, mlm_loss = run_values. \
			
 
				+                results
			
 
				+        update_step = True
			
 
				     else:
			
 
				-      global_step, total_loss, lr, nsp_loss, mlm_loss = run_values.results
			
 
				-    print_step = global_step + 1 # One-based index for printing.
			
 
				-    if print_step == 1 or print_step % self.display_every == 0:
			
 
				-        dt = self.elapsed_secs / self.count
			
 
				-        img_per_sec = self.global_batch_size / dt
			
 
				-        if self.hvd_rank >= 0:
			
 
				-          if FLAGS.manual_fp16 or FLAGS.use_fp16:
			
 
				-            print('Rank = %2d :: Step = %6i Throughput = %11.1f MLM Loss = %10.4e NSP Loss = %10.4e Loss = %6.3f LR = %6.4e Loss scale = %6.4e' %
			
 
				-                  (self.hvd_rank, print_step, img_per_sec, mlm_loss, nsp_loss, total_loss, lr, loss_scaler))
			
 
				-          else:
			
 
				-            print('Rank = %2d :: Step = %6i Throughput = %11.1f MLM Loss = %10.4e NSP Loss = %10.4e Loss = %6.3f LR = %6.4e' %
			
 
				-                  (self.hvd_rank, print_step, img_per_sec, mlm_loss, nsp_loss, total_loss, lr))
			
 
				+        if FLAGS.manual_fp16 or FLAGS.use_fp16:
			
 
				+          global_step, update_step, total_loss, lr, nsp_loss, mlm_loss, loss_scaler = run_values.results
			
 
				         else:
			
 
				-          if FLAGS.manual_fp16 or FLAGS.use_fp16:
			
 
				-            print('Step = %6i Throughput = %11.1f MLM Loss = %10.4e NSP Loss = %10.4e Loss = %6.3f LR = %6.4e Loss scale = %6.4e' %
			
 
				-                  (print_step, img_per_sec, mlm_loss, nsp_loss, total_loss, lr, loss_scaler))
			
 
				-          else:
			
 
				-            print('Step = %6i Throughput = %11.1f MLM Loss = %10.4e NSP Loss = %10.4e Loss = %6.3f LR = %6.4e' %
			
 
				-                  (print_step, img_per_sec, mlm_loss, nsp_loss, total_loss, lr))
			
 
				-        self.elapsed_secs = 0.
			
 
				-        self.count = 0
			
 
				+          global_step, update_step, total_loss, lr, nsp_loss, mlm_loss = run_values.\
			
 
				+              results
			
 
				+    print_step = global_step + 1 # One-based index for printing.
			
 
				+    self.avg_loss += total_loss
			
 
				+    self.all_count += 1
			
 
				+    if update_step:
			
 
				+        self.count += 1
			
 
				+        if (print_step == 1 or print_step % self.display_every == 0):
			
 
				+            dt = self.elapsed_secs / self.count
			
 
				+            sent_per_sec = self.global_batch_size / dt
			
 
				+            avg_loss_step = self.avg_loss / self.all_count
			
 
				+            if self.hvd_rank >= 0:
			
 
				+              if FLAGS.manual_fp16 or FLAGS.use_fp16:
			
 
				+                print('Rank = %2d :: Step = %6i Throughput = %11.1f MLM Loss = %10.4e NSP Loss = %10.4e Loss = %6.3f Average Loss = %6.3f LR = %6.4e Loss scale = %6.4e' %
			
 
				+                      (self.hvd_rank, print_step, sent_per_sec, mlm_loss, nsp_loss, total_loss, avg_loss_step, lr, loss_scaler))
			
 
				+              else:
			
 
				+                print('Rank = %2d :: Step = %6i Throughput = %11.1f MLM Loss = %10.4e NSP Loss = %10.4e Loss = %6.3f Average Loss = %6.3f LR = %6.4e' %
			
 
				+                      (self.hvd_rank, print_step, sent_per_sec, mlm_loss, nsp_loss, total_loss, avg_loss_step, lr))
			
 
				+            else:
			
 
				+              if FLAGS.manual_fp16 or FLAGS.use_fp16:
			
 
				+                print('Step = %6i Throughput = %11.1f MLM Loss = %10.4e NSP Loss = %10.4e Loss = %6.3f Average Loss = %6.3f LR = %6.4e Loss scale = %6.4e' %
			
 
				+                      (print_step, sent_per_sec, mlm_loss, nsp_loss, total_loss, avg_loss_step, lr, loss_scaler))
			
 
				+              else:
			
 
				+                print('Step = %6i Throughput = %11.1f MLM Loss = %10.4e NSP Loss = %10.4e Loss = %6.3f Average Loss = %6.3f LR = %6.4e' %
			
 
				+                      (print_step, sent_per_sec, mlm_loss, nsp_loss, total_loss, avg_loss_step, lr))
			
 
				+            self.elapsed_secs = 0.
			
 
				+            self.count = 0
			
 
				+            self.avg_loss = 0.0
			
 
				+            self.all_count = 0
			
 
				 
			
 
				 def model_fn_builder(bert_config, init_checkpoint, learning_rate,
			
 
				                      num_train_steps, num_warmup_steps,
			
@@ -195,19 +249,20 @@ def model_fn_builder(bert_config, init_checkpoint, learning_rate,
 
				 
			
 
				       tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
			
 
				 
			
 
				-    tf.logging.info("**** Trainable Variables ****")
			
 
				-    for var in tvars:
			
 
				-      init_string = ""
			
 
				-      if var.name in initialized_variable_names:
			
 
				-        init_string = ", *INIT_FROM_CKPT*"
			
 
				-      tf.logging.info("  %d :: name = %s, shape = %s%s", 0 if hvd is None else hvd.rank(), var.name, var.shape,
			
 
				-                      init_string)
			
 
				+    if FLAGS.verbose_logging:
			
 
				+        tf.logging.info("**** Trainable Variables ****")
			
 
				+        for var in tvars:
			
 
				+          init_string = ""
			
 
				+          if var.name in initialized_variable_names:
			
 
				+            init_string = ", *INIT_FROM_CKPT*"
			
 
				+          tf.logging.info("  %d :: name = %s, shape = %s%s", 0 if hvd is None else hvd.rank(), var.name, var.shape,
			
 
				+                          init_string)
			
 
				 
			
 
				     output_spec = None
			
 
				     if mode == tf.estimator.ModeKeys.TRAIN:
			
 
				       train_op = optimization.create_optimizer(
			
 
				           total_loss, learning_rate, num_train_steps, num_warmup_steps,
			
 
				-          hvd, FLAGS.manual_fp16, FLAGS.use_fp16)
			
 
				+          hvd, FLAGS.manual_fp16, FLAGS.use_fp16, FLAGS.num_accumulation_steps, FLAGS.optimizer_type, FLAGS.allreduce_post_accumulation)
			
 
				 
			
 
				       output_spec = tf.estimator.EstimatorSpec(
			
 
				           mode=mode,
			
@@ -453,27 +508,28 @@ def main(_):
 
				   tf.gfile.MakeDirs(FLAGS.output_dir)
			
 
				 
			
 
				   input_files = []
			
 
				-  for input_pattern in FLAGS.input_file.split(","):
			
 
				-    input_files.extend(tf.gfile.Glob(input_pattern))
			
 
				+  for input_file_dir in FLAGS.input_files_dir.split(","):
			
 
				+    input_files.extend(tf.gfile.Glob(os.path.join(input_file_dir, "*")))
			
 
				 
			
 
				-  tf.logging.info("*** Input Files ***")
			
 
				-  for input_file in input_files:
			
 
				-    tf.logging.info("  %s" % input_file)
			
 
				+  if FLAGS.horovod and len(input_files) < hvd.size():
			
 
				+      raise ValueError("Input Files must be sharded")
			
 
				+  if FLAGS.use_fp16 and FLAGS.manual_fp16:
			
 
				+      raise ValueError("AMP and Manual Mixed Precision Training are both activated! Error")
			
 
				 
			
 
				-  config = tf.ConfigProto()
			
 
				-  if FLAGS.horovod: 
			
 
				-    config.gpu_options.visible_device_list = str(hvd.local_rank())
			
 
				-    if len(input_files) < hvd.size():
			
 
				-        raise ValueError("Input Files must be sharded")
			
 
				-  if FLAGS.use_xla: 
			
 
				-    config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
			
 
				   is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
			
 
				   config = tf.ConfigProto()
			
 
				   if FLAGS.horovod:
			
 
				     config.gpu_options.visible_device_list = str(hvd.local_rank())
			
 
				     config.gpu_options.allow_growth = True
			
 
				+    if hvd.rank() == 0:
			
 
				+      tf.logging.info("***** Configuaration *****")
			
 
				+      for key in FLAGS.__flags.keys():
			
 
				+          tf.logging.info('  {}: {}'.format(key, getattr(FLAGS, key)))
			
 
				+      tf.logging.info("**************************")
			
 
				+
			
 
				 #    config.gpu_options.per_process_gpu_memory_fraction = 0.7
			
 
				-  if FLAGS.use_xla: config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1	  
			
 
				+  if FLAGS.use_xla: config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
			
 
				+
			
 
				   run_config = tf.estimator.RunConfig(
			
 
				       model_dir=FLAGS.output_dir,
			
 
				       session_config=config,
			
@@ -494,18 +550,11 @@ def main(_):
 
				       use_one_hot_embeddings=False,
			
 
				       hvd=None if not FLAGS.horovod else hvd)
			
 
				 
			
 
				-  training_hooks = []
			
 
				-  if FLAGS.horovod and hvd.size() > 1:
			
 
				-    training_hooks.append(hvd.BroadcastGlobalVariablesHook(0))
			
 
				-  if FLAGS.report_loss:
			
 
				-    global_batch_size = FLAGS.train_batch_size if not FLAGS.horovod else FLAGS.train_batch_size*hvd.size()
			
 
				-    training_hooks.append(_LogSessionRunHook(global_batch_size,1,-1 if not FLAGS.horovod else hvd.rank()))
			
 
				-
			
 
				   training_hooks = []
			
 
				   if FLAGS.report_loss and (not FLAGS.horovod or hvd.rank() == 0):
			
 
				-    global_batch_size = FLAGS.train_batch_size if not FLAGS.horovod else FLAGS.train_batch_size*hvd.size()
			
 
				-    training_hooks.append(_LogSessionRunHook(global_batch_size,100))
			
 
				-  if FLAGS.horovod:
			
 
				+    global_batch_size = FLAGS.train_batch_size * FLAGS.num_accumulation_steps if not FLAGS.horovod else FLAGS.train_batch_size * FLAGS.num_accumulation_steps * hvd.size()
			
 
				+    training_hooks.append(_LogSessionRunHook(global_batch_size, FLAGS.num_accumulation_steps, FLAGS.display_loss_steps))
			
 
				+  if FLAGS.horovod and hvd.size() > 1:
			
 
				     training_hooks.append(hvd.BroadcastGlobalVariablesHook(0))
			
 
				 
			
 
				   estimator = tf.estimator.Estimator(
			
@@ -522,14 +571,19 @@ def main(_):
 
				         max_predictions_per_seq=FLAGS.max_predictions_per_seq,
			
 
				         is_training=True,
			
 
				         hvd=None if not FLAGS.horovod else hvd)
			
 
				+
			
 
				     estimator.train(input_fn=train_input_fn, hooks=training_hooks, max_steps=FLAGS.num_train_steps)
			
 
				 
			
 
				   if FLAGS.do_eval and (not FLAGS.horovod or hvd.rank() == 0):
			
 
				     tf.logging.info("***** Running evaluation *****")
			
 
				     tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)
			
 
				 
			
 
				+    eval_files = []
			
 
				+    for eval_file_dir in FLAGS.eval_files_dir.split(","):
			
 
				+        eval_files.extend(tf.gfile.Glob(os.path.join(eval_file_dir, "*")))
			
 
				+
			
 
				     eval_input_fn = input_fn_builder(
			
 
				-        input_files=input_files,
			
 
				+        input_files=eval_files,
			
 
				         batch_size=FLAGS.eval_batch_size,
			
 
				         max_seq_length=FLAGS.max_seq_length,
			
 
				         max_predictions_per_seq=FLAGS.max_predictions_per_seq,
			
@@ -548,7 +602,8 @@ def main(_):
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				-  flags.mark_flag_as_required("input_file")
			
 
				+  flags.mark_flag_as_required("input_files_dir")
			
 
				+  flags.mark_flag_as_required("eval_files_dir")
			
 
				   flags.mark_flag_as_required("bert_config_file")
			
 
				   flags.mark_flag_as_required("output_dir")
			
 
				   if FLAGS.use_xla and FLAGS.manual_fp16:
			
--- a/TensorFlow/LanguageModeling/BERT/run_pretraining.sh
+++ b/TensorFlow/LanguageModeling/BERT/run_pretraining.sh
@@ -1,19 +0,0 @@
 
				-#! /bin/bash
			
 
				-
			
 
				-mpiexec --allow-run-as-root --bind-to socket -np 8 python3 run_pretraining.py \
			
 
				-  --input_file=/workspace/data/bert_large_wikipedia_seq_512_pred_20/tf_examples.tfrecord* \
			
 
				-  --output_dir=/workspace/checkpoints/pretraining_base_output \
			
 
				-  --do_train=True \
			
 
				-  --do_eval=True \
			
 
				-  --bert_config_file=$BERT_BASE_DIR/bert_config.json \
			
 
				-  --train_batch_size=14 \
			
 
				-  --max_seq_length=512 \
			
 
				-  --max_predictions_per_seq=20 \
			
 
				-  --num_train_steps=250000 \
			
 
				-  --num_warmup_steps=10000 \
			
 
				-  --learning_rate=1e-4 \
			
 
				-  --use_fp16 \
			
 
				-  --use_xla \
			
 
				-  --report_loss \
			
 
				-  --horovod
			
 
				-
			
--- a/TensorFlow/LanguageModeling/BERT/run_squad.py
+++ b/TensorFlow/LanguageModeling/BERT/run_squad.py
@@ -1,4 +1,5 @@
 
				 # coding=utf-8
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				 # Copyright 2018 The Google AI Language Team Authors.
			
 
				 #
			
 
				 # Licensed under the Apache License, Version 2.0 (the "License");
			
@@ -12,6 +13,7 @@
 
				 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				+
			
 
				 """Run BERT on SQuAD 1.1 and SQuAD 2.0."""
			
 
				 
			
 
				 from __future__ import absolute_import, division, print_function
			
@@ -114,6 +116,10 @@ flags.DEFINE_integer("save_checkpoints_steps", 1000,
 
				 flags.DEFINE_integer("iterations_per_loop", 1000,
			
 
				                      "How many steps to make in each estimator call.")
			
 
				 
			
 
				+flags.DEFINE_integer("num_accumulation_steps", 1,
			
 
				+                     "Number of accumulation steps before gradient update" 
			
 
				+                      "Global batch size = num_accumulation_steps * train_batch_size")
			
 
				+
			
 
				 flags.DEFINE_integer(
			
 
				     "n_best_size", 20,
			
 
				     "The total number of n-best predictions to generate in the "
			
@@ -336,7 +342,7 @@ def model_fn_builder(bert_config, init_checkpoint, learning_rate,
 
				       total_loss = (start_loss + end_loss) / 2.0
			
 
				 
			
 
				       train_op = optimization.create_optimizer(
			
 
				-          total_loss, learning_rate, num_train_steps, num_warmup_steps, hvd, amp=use_fp16)
			
 
				+          total_loss, learning_rate, num_train_steps, num_warmup_steps, hvd, False, use_fp16, FLAGS.num_accumulation_steps)
			
 
				 
			
 
				       output_spec = tf.estimator.EstimatorSpec(
			
 
				           mode=mode,
			
@@ -899,6 +905,8 @@ def main(_):
 
				 
			
 
				   if FLAGS.horovod:
			
 
				     hvd.init()
			
 
				+  if FLAGS.use_fp16:
			
 
				+    os.environ["TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE"] = "1"
			
 
				 
			
 
				   bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
			
 
				 
			
@@ -911,7 +919,7 @@ def main(_):
 
				 
			
 
				   master_process = True
			
 
				   training_hooks = []
			
 
				-  global_batch_size = FLAGS.train_batch_size
			
 
				+  global_batch_size = FLAGS.train_batch_size * FLAGS.num_accumulation_steps
			
 
				   hvd_rank = 0
			
 
				   hvd_local_rank = 0
			
 
				 
			
@@ -921,7 +929,7 @@ def main(_):
 
				 
			
 
				       tf.logging.info("Multi-GPU training with TF Horovod")
			
 
				       tf.logging.info("hvd.size() = %d hvd.rank() = %d", hvd.size(), hvd.rank())
			
 
				-      global_batch_size = FLAGS.train_batch_size * hvd.size()
			
 
				+      global_batch_size = FLAGS.train_batch_size * hvd.size() * FLAGS.num_accumulation_steps
			
 
				       learning_rate = learning_rate * hvd.size()
			
 
				       master_process = (hvd.rank() == 0)
			
 
				       hvd_rank = hvd.rank()
			
--- a/TensorFlow/LanguageModeling/BERT/run_squad_trtis_client.py
+++ b/TensorFlow/LanguageModeling/BERT/run_squad_trtis_client.py
@@ -1,3 +1,16 @@
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				 import modeling
			
 
				 import tokenization
			
 
				 from tensorrtserver.api import ProtocolType, InferContext, ServerStatusContext, grpc_service_pb2_grpc, grpc_service_pb2, model_config_pb2
			
--- a/TensorFlow/LanguageModeling/BERT/scripts/data_download.sh
+++ b/TensorFlow/LanguageModeling/BERT/scripts/data_download.sh
@@ -1,6 +1,19 @@
 
				 #!/usr/bin/env bash
			
 
				 
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				 docker run --runtime=nvidia -v $PWD:/workspace/bert \
			
 
				     --rm --shm-size=1g --ulimit memlock=-1 \
			
 
				     --ulimit stack=67108864 --ipc=host -t -i \
			
 
				-    bert bash -c "bash scripts/data_download_helper.sh"
			
 
				+    bert bash -c "bash data/create_datasets_from_start.sh"
			
--- a/TensorFlow/LanguageModeling/BERT/scripts/data_download_helper.sh
+++ b/TensorFlow/LanguageModeling/BERT/scripts/data_download_helper.sh
@@ -1,17 +0,0 @@
 
				-#!/usr/bin/env bash
			
 
				-
			
 
				-# Download pretrained_models
			
 
				-cd /workspace/bert/data/pretrained_models_google && python3 download_models.py
			
 
				-
			
 
				-# Download SQUAD
			
 
				-cd /workspace/bert/data/squad && . squad_download.sh
			
 
				-
			
 
				-# Download GLUE
			
 
				-cd /workspace/bert/data/glue && python3 download_glue_data.py
			
 
				-
			
 
				-# WIKI Download, set config in data_generators/wikipedia_corpus/config.sh
			
 
				-cd /workspace/bert/data/wikipedia_corpus && . run_preprocessing.sh
			
 
				-
			
 
				-cd /workspace/bert/data/bookcorpus && . run_preprocessing.sh
			
 
				-
			
 
				-cd /workspace/bert/data/glue && python3 download_glue_data.py 
			
--- a/TensorFlow/LanguageModeling/BERT/scripts/finetune_inference_benchmark.sh
+++ b/TensorFlow/LanguageModeling/BERT/scripts/finetune_inference_benchmark.sh
@@ -1,13 +1,26 @@
 
				 #!/bin/bash
			
 
				 
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				 bert_model=${1:-"large"}
			
 
				 use_xla=${2:-"true"}
			
 
				 task=${3:-"squad"}
			
 
				 
			
 
				 if [ "$bert_model" = "large" ] ; then
			
 
				-    export BERT_DIR=data/pretrained_models_google/uncased_L-24_H-1024_A-16
			
 
				+    export BERT_DIR=data/download/google_pretrained_weights/uncased_L-24_H-1024_A-16
			
 
				 else
			
 
				-    export BERT_DIR=data/pretrained_models_google/uncased_L-12_H-768_A-12
			
 
				+    export BERT_DIR=data/download/google_pretrained_weights/uncased_L-12_H-768_A-12
			
 
				 fi
			
 
				 echo  "BERT directory set as " $BERT_DIR
			
 
				 
			
@@ -31,7 +44,7 @@ echo "Results directory set as " $RESULTS_DIR
 
				 LOGFILE="${RESULTS_DIR}/${task}_inference_benchmark_bert_${bert_model}.log"
			
 
				 tmp_file="/tmp/${task}_inference_benchmark.log"
			
 
				 if [ "$task" = "squad" ] ; then
			
 
				-    export SQUAD_DIR=data/squad/v1.1
			
 
				+    export SQUAD_DIR=data/download/squad/v1.1
			
 
				 
			
 
				     echo "Squad directory set as " $SQUAD_DIR
			
 
				 
			
@@ -48,11 +61,9 @@ if [ "$task" = "squad" ] ; then
 
				 
			
 
				         if [ "$precision" = "fp16" ] ; then
			
 
				             echo "fp16 activated!"
			
 
				-            export TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE=1
			
 
				             use_fp16="--use_fp16"
			
 
				         else
			
 
				             echo "fp32 activated!"
			
 
				-            export TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE=0
			
 
				             use_fp16=""
			
 
				         fi
			
 
				 
			
--- a/TensorFlow/LanguageModeling/BERT/scripts/finetune_train_benchmark.sh
+++ b/TensorFlow/LanguageModeling/BERT/scripts/finetune_train_benchmark.sh
@@ -1,15 +1,27 @@
 
				 #!/bin/bash
			
 
				 
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				 bert_model=${1:-"large"}
			
 
				-precision=${2:-"fp16"}
			
 
				-use_xla=${3:-"true"}
			
 
				-num_gpu=${4:-"8"}
			
 
				-task=${5:-"squad"}
			
 
				+use_xla=${2:-"true"}
			
 
				+num_gpu=${3:-"8"}
			
 
				+task=${4:-"squad"}
			
 
				 
			
 
				 if [ "$bert_model" = "large" ] ; then
			
 
				-    export BERT_DIR=data/pretrained_models_google/uncased_L-24_H-1024_A-16
			
 
				+    export BERT_DIR=data/download/google_pretrained_weights/uncased_L-24_H-1024_A-16
			
 
				 else
			
 
				-    export BERT_DIR=data/pretrained_models_google/uncased_L-12_H-768_A-12
			
 
				+    export BERT_DIR=data/download/google_pretrained_weights/uncased_L-12_H-768_A-12
			
 
				 fi
			
 
				 
			
 
				 echo  "BERT directory set as " $BERT_DIR
			
@@ -25,12 +37,6 @@ if [ ! -d "$RESULTS_DIR" ] ; then
 
				 fi
			
 
				 echo "Results directory set as " $RESULTS_DIR
			
 
				 
			
 
				-use_fp16=""
			
 
				-if [ "$precision" = "fp16" ] ; then
			
 
				-        export TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE=1
			
 
				-        use_fp16="--use_fp16"
			
 
				-fi
			
 
				-
			
 
				 
			
 
				 if [ "$use_xla" = "true" ] ; then
			
 
				     use_xla_tag="--use_xla"
			
@@ -53,7 +59,7 @@ fi
 
				 LOGFILE="${RESULTS_DIR}/${task}_training_benchmark_bert_${bert_model}_gpu_${num_gpu}.log"
			
 
				 
			
 
				 if [ "$task" = "squad" ] ; then
			
 
				-    export SQUAD_DIR=data/squad/v1.1
			
 
				+    export SQUAD_DIR=data/download/squad/v1.1
			
 
				     epochs="2.0"
			
 
				     echo "Squad directory set as " $SQUAD_DIR
			
 
				 
			
@@ -76,11 +82,9 @@ if [ "$task" = "squad" ] ; then
 
				 
			
 
				                 if [ "$precision" = "fp16" ] ; then
			
 
				                     echo "fp16 activated!"
			
 
				-                    export TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE=1
			
 
				                     use_fp16="--use_fp16"
			
 
				                 else
			
 
				                     echo "fp32 activated!"
			
 
				-                    export TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE=0
			
 
				                     use_fp16=""
			
 
				                 fi
			
 
				 
			
--- a/TensorFlow/LanguageModeling/BERT/scripts/run_glue.sh
+++ b/TensorFlow/LanguageModeling/BERT/scripts/run_glue.sh
@@ -1,49 +1,47 @@
 
				 #!/usr/bin/env bash
			
 
				 
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				 echo "Container nvidia build = " $NVIDIA_BUILD_ID
			
 
				 
			
 
				-batch_size=${1:-"32"}
			
 
				-learning_rate=${2:-"2e-5"}
			
 
				-precision=${3:-"fp16"}
			
 
				-use_xla=${4:-"true"}
			
 
				-num_gpu=${5:-"8"}
			
 
				-seq_length=${6:-"128"}
			
 
				-bert_model=${7:-"large"}
			
 
				+task_name=${1:-"MRPC"}
			
 
				+batch_size=${2:-"32"}
			
 
				+learning_rate=${3:-"2e-5"}
			
 
				+precision=${4:-"fp16"}
			
 
				+use_xla=${5:-"true"}
			
 
				+num_gpu=${6:-"8"}
			
 
				+seq_length=${7:-"128"}
			
 
				+doc_stride=${8:-"64"}
			
 
				+bert_model=${9:-"large"}
			
 
				 
			
 
				 if [ "$bert_model" = "large" ] ; then
			
 
				-    export BERT_DIR=data/pretrained_models_google/uncased_L-24_H-1024_A-16
			
 
				+    export BERT_DIR=data/download/google_pretrained_weights/uncased_L-24_H-1024_A-16
			
 
				 else
			
 
				-    export BERT_DIR=data/pretrained_models_google/uncased_L-12_H-768_A-12
			
 
				+    export BERT_DIR=data/download/google_pretrained_weights/uncased_L-12_H-768_A-12
			
 
				 fi
			
 
				-export GLUE_DIR=data/glue
			
 
				-
			
 
				-epochs=${8:-"3.0"}
			
 
				-ws=${9:-"0.1"}
			
 
				-init_checkpoint=${10:-"$BERT_DIR/bert_model.ckpt"}
			
 
				+export GLUE_DIR=data/download
			
 
				 
			
 
				-#Edit to save logs & checkpoints in a different directory
			
 
				-RESULTS_DIR=/results
			
 
				 
			
 
				-if [ ! -d "$BERT_DIR" ] ; then
			
 
				-   echo "Error! $BERT_DIR directory missing. Please mount pretrained BERT dataset."
			
 
				-   exit -1
			
 
				-fi
			
 
				-if [ ! -d "$GLUE_DIR" ] ; then
			
 
				-   echo "Error! $GLUE_DIR directory missing. Please mount SQuAD dataset."
			
 
				-   exit -1
			
 
				-fi
			
 
				-if [ ! -d "$RESULTS_DIR" ] ; then
			
 
				-   echo "Error! $RESULTS_DIR directory missing."
			
 
				-   exit -1
			
 
				-fi
			
 
				+epochs=${10:-"3.0"}
			
 
				+ws=${11:-"0.1"}
			
 
				+init_checkpoint=${12:-"$BERT_DIR/bert_model.ckpt"}
			
 
				 
			
 
				 echo "GLUE directory set as " $GLUE_DIR " BERT directory set as " $BERT_DIR
			
 
				-echo "Results directory set as " $RESULTS_DIR
			
 
				 
			
 
				 use_fp16=""
			
 
				 if [ "$precision" = "fp16" ] ; then
			
 
				         echo "fp16 activated!"
			
 
				-        export TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE=1
			
 
				         use_fp16="--use_fp16"
			
 
				 fi
			
 
				 
			
@@ -60,34 +58,42 @@ if [ $num_gpu -gt 1 ] ; then
 
				     -x NCCL_DEBUG=INFO \
			
 
				     -x LD_LIBRARY_PATH \
			
 
				     -x PATH -mca pml ob1 -mca btl ^openib"
			
 
				-    use_hvd="--horovod"
			
 
				 else
			
 
				     mpi_command=""
			
 
				-    use_hvd=""
			
 
				 fi
			
 
				 
			
 
				-  export GBS=$(expr $batch_size \* $num_gpu)
			
 
				-  printf -v TAG "tf_bert_%s_glue_1n_%s_gbs%d" "$bert_model" "$precision" $GBS
			
 
				-  DATESTAMP=`date +'%y%m%d%H%M%S'`
			
 
				-  RESULTS_DIR=${RESULTS_DIR}/${TAG}_${DATESTAMP}
			
 
				-  mkdir $RESULTS_DIR
			
 
				-  LOGFILE=$RESULTS_DIR/$TAG.$DATESTAMP.log
			
 
				-  printf "Saving checkpoints to %s\n" "$RESULTS_DIR"
			
 
				-  printf "Writing logs to %s\n" "$LOGFILE"
			
 
				+export GBS=$(expr $batch_size \* $num_gpu)
			
 
				+printf -v TAG "tf_bert_finetuning_glue_%s_%s_%s_gbs%d" "$task_name" "$bert_model" "$precision" $GBS
			
 
				+DATESTAMP=`date +'%y%m%d%H%M%S'`
			
 
				+#Edit to save logs & checkpoints in a different directory
			
 
				+RESULTS_DIR=/results/${TAG}_${DATESTAMP}
			
 
				+LOGFILE=$RESULTS_DIR/$TAG.$DATESTAMP.log
			
 
				+mkdir -m 777 -p $RESULTS_DIR
			
 
				+printf "Saving checkpoints to %s\n" "$RESULTS_DIR"
			
 
				+printf "Logs written to %s\n" "$LOGFILE"
			
 
				+
			
 
				+#Check if all necessary files are available before training
			
 
				+for DIR_or_file in $GLUE_DIR/${task_name} $RESULTS_DIR $BERT_DIR/vocab.txt $BERT_DIR/bert_config.json; do
			
 
				+  echo $DIR_or_file
			
 
				+  if [ ! -d "$DIR_or_file" ] && [ ! -f "$DIR_or_file" ]; then
			
 
				+     echo "Error! $DIR_or_file directory missing. Please mount correctly"
			
 
				+     exit -1
			
 
				+  fi
			
 
				+done
			
 
				 
			
 
				 $mpi_command python run_classifier.py \
			
 
				-  --task_name=MRPC \
			
 
				+  --task_name=$task_name \
			
 
				   --do_train=true \
			
 
				   --do_eval=true \
			
 
				-  --data_dir=$GLUE_DIR/MRPC \
			
 
				+  --data_dir=$GLUE_DIR/$task_name \
			
 
				   --vocab_file=$BERT_DIR/vocab.txt \
			
 
				   --bert_config_file=$BERT_DIR/bert_config.json \
			
 
				   --init_checkpoint=$init_checkpoint \
			
 
				   --max_seq_length=$seq_length \
			
 
				+  --doc_stride=$doc_stride \
			
 
				   --train_batch_size=$batch_size \
			
 
				   --learning_rate=$learning_rate \
			
 
				   --num_train_epochs=$epochs \
			
 
				   --output_dir=$RESULTS_DIR \
			
 
				-    "$use_hvd" \
			
 
				-    "$use_fp16" \
			
 
				-    $use_xla_tag --warmup_proportion=$ws |& tee $LOGFILE
			
 
				+  --horovod "$use_fp16" \
			
 
				+  $use_xla_tag --warmup_proportion=$ws |& tee $LOGFILE
			
--- a/TensorFlow/LanguageModeling/BERT/scripts/run_glue_inference.sh
+++ b/TensorFlow/LanguageModeling/BERT/scripts/run_glue_inference.sh
@@ -0,0 +1,78 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+echo "Container nvidia build = " $NVIDIA_BUILD_ID
			
 
				+task_name=${1:-"MRPC"}
			
 
				+init_checkpoint=${2:-"$BERT_DIR/bert_model.ckpt"}
			
 
				+batch_size=${3:-"32"}
			
 
				+precision=${4:-"fp16"}
			
 
				+use_xla=${5:-"true"}
			
 
				+seq_length=${6:-"128"}
			
 
				+doc_stride=${7:-"64"}
			
 
				+bert_model=${8:-"large"}
			
 
				+
			
 
				+if [ "$bert_model" = "large" ] ; then
			
 
				+    BERT_DIR=data/download/google_pretrained_weights/uncased_L-24_H-1024_A-16
			
 
				+else
			
 
				+    BERT_DIR=data/download/google_pretrained_weights/uncased_L-12_H-768_A-12
			
 
				+fi
			
 
				+GLUE_DIR=data/download
			
 
				+
			
 
				+echo "GLUE directory set as " $GLUE_DIR " BERT directory set as " $BERT_DIR
			
 
				+
			
 
				+use_fp16=""
			
 
				+if [ "$precision" = "fp16" ] ; then
			
 
				+        echo "fp16 activated!"
			
 
				+        use_fp16="--use_fp16"
			
 
				+fi
			
 
				+
			
 
				+if [ "$use_xla" = "true" ] ; then
			
 
				+    use_xla_tag="--use_xla"
			
 
				+    echo "XLA activated"
			
 
				+else
			
 
				+    use_xla_tag=""
			
 
				+fi
			
 
				+
			
 
				+
			
 
				+export GBS=$(expr $batch_size \* $num_gpu)
			
 
				+printf -v TAG "tf_bert_finetuning_glue_%s_inf_%s_%s_gbs%d_ckpt_%s" "$task_name" "$bert_model" "$precision" $GBS "$init_checkpoint"
			
 
				+DATESTAMP=`date +'%y%m%d%H%M%S'`
			
 
				+#Edit to save logs & checkpoints in a different directory
			
 
				+RESULTS_DIR=/results
			
 
				+LOGFILE=$RESULTS_DIR/$TAG.$DATESTAMP.log
			
 
				+printf "Logs written to %s\n" "$LOGFILE"
			
 
				+
			
 
				+#Check if all necessary files are available before training
			
 
				+for DIR_or_file in $GLUE_DIR $RESULTS_DIR $BERT_DIR/vocab.txt $BERT_DIR/bert_config.json; do
			
 
				+  if [ ! -d "$DIR_or_file" ] && [ ! -f "$DIR_or_file" ]; then
			
 
				+     echo "Error! $DIR_or_file directory missing. Please mount correctly"
			
 
				+     exit -1
			
 
				+  fi
			
 
				+done
			
 
				+
			
 
				+$mpi_command python run_classifier.py \
			
 
				+  --task_name=$task_name \
			
 
				+  --predict_batch_size=$batch_size \
			
 
				+  --eval_batch_size=$batch_size \
			
 
				+  --do_eval=true \
			
 
				+  --data_dir=$GLUE_DIR/$task_name \
			
 
				+  --vocab_file=$BERT_DIR/vocab.txt \
			
 
				+  --bert_config_file=$BERT_DIR/bert_config.json \
			
 
				+  --init_checkpoint=$init_checkpoint \
			
 
				+  --max_seq_length=$seq_length \
			
 
				+  --doc_stride=$doc_stride \
			
 
				+  --output_dir=$RESULTS_DIR \
			
 
				+  --horovod "$use_fp16" \
			
 
				+  $use_xla_tag |& tee $LOGFILE
			
--- a/TensorFlow/LanguageModeling/BERT/scripts/run_pretraining.sh
+++ b/TensorFlow/LanguageModeling/BERT/scripts/run_pretraining.sh
@@ -1,102 +0,0 @@
 
				-#! /bin/bash
			
 
				-
			
 
				-echo "Container nvidia build = " $NVIDIA_BUILD_ID
			
 
				-
			
 
				-WIKI_DIR=/workspace/bert/data/wikipedia_corpus/final_tfrecords_sharded
			
 
				-BOOKS_DIR=/workspace/bert/data/bookcorpus/final_tfrecords_sharded
			
 
				-BERT_CONFIG=/workspace/bert/data/pretrained_models_google/uncased_L-24_H-1024_A-16/bert_config.json
			
 
				-
			
 
				-#Edit to save logs & checkpoints in a different directory
			
 
				-RESULTS_DIR=/results
			
 
				-
			
 
				-if [ ! -d "$WIKI_DIR" ] ; then
			
 
				-   echo "Error! $WIKI_DIR directory missing. Please mount wikipedia dataset."
			
 
				-   exit -1
			
 
				-else
			
 
				-   SOURCES="$WIKI_DIR/*"
			
 
				-fi
			
 
				-if [ ! -d "$BOOKS_DIR" ] ; then
			
 
				-   echo "Warning! $BOOKS_DIR directory missing. Training will proceed without book corpus."
			
 
				-else
			
 
				-   SOURCES+=" $BOOKS_DIR/*"
			
 
				-fi
			
 
				-if [ ! -d "$RESULTS_DIR" ] ; then
			
 
				-   echo "Error! $RESULTS_DIR directory missing."
			
 
				-   exit -1
			
 
				-fi
			
 
				-
			
 
				-if [ ! -f "$BERT_CONFIG" ] ; then
			
 
				-   echo "Error! BERT large configuration file not found at $BERT_CONFIG"
			
 
				-   exit -1
			
 
				-fi
			
 
				-
			
 
				-train_batch_size=${1:-14}
			
 
				-eval_batch_size=${2:-8}
			
 
				-learning_rate=${3:-"1e-4"}
			
 
				-precision=${4:-"manual_fp16"}
			
 
				-use_xla=${5:-"true"}
			
 
				-num_gpus=${6:-1}
			
 
				-warmup_steps=${7:-"10000"}
			
 
				-train_steps=${8:-1144000}
			
 
				-save_checkpoints_steps=${9:-5000}
			
 
				-
			
 
				-PREC=""
			
 
				-if [ "$precision" = "fp16" ] ; then
			
 
				-   PREC="--use_fp16"
			
 
				-elif [ "$precision" = "fp32" ] ; then
			
 
				-   PREC=""
			
 
				-elif [ "$precision" = "manual_fp16" ] ; then
			
 
				-   PREC="--manual_fp16"
			
 
				-else
			
 
				-   echo "Unknown <precision> argument"
			
 
				-   exit -2
			
 
				-fi
			
 
				-
			
 
				-if [ "$use_xla" = "true" ] ; then
			
 
				-    PREC="$PREC --use_xla"
			
 
				-    echo "XLA activated"
			
 
				-fi
			
 
				-
			
 
				-export GBS=$(expr $train_batch_size \* $num_gpus)
			
 
				-printf -v TAG "tf_bert_pretraining_%s_gbs%d" "$precision" $GBS
			
 
				-DATESTAMP=`date +'%y%m%d%H%M%S'`
			
 
				-RESULTS_DIR=${RESULTS_DIR}/${TAG}_${DATESTAMP}
			
 
				-LOGFILE=$RESULTS_DIR/$TAG.$DATESTAMP.log
			
 
				-printf "Saving checkpoints to %s\n" "$RESULTS_DIR"
			
 
				-printf "Logs written to %s\n" "$LOGFILE"
			
 
				-
			
 
				-echo $SOURCES
			
 
				-INPUT_FILES=$(eval ls $SOURCES | tr " " "\n" | awk '{printf "%s,",$1}' | sed s'/.$//')
			
 
				-CMD="python3 /workspace/bert/run_pretraining.py"
			
 
				-CMD+=" --input_file=$INPUT_FILES"
			
 
				-CMD+=" --output_dir=$RESULTS_DIR"
			
 
				-CMD+=" --bert_config_file=$BERT_CONFIG"
			
 
				-CMD+=" --do_train=True"
			
 
				-CMD+=" --do_eval=True"
			
 
				-CMD+=" --train_batch_size=$train_batch_size"
			
 
				-CMD+=" --eval_batch_size=$eval_batch_size"
			
 
				-CMD+=" --max_seq_length=512"
			
 
				-CMD+=" --max_predictions_per_seq=80"
			
 
				-CMD+=" --num_train_steps=$train_steps"
			
 
				-CMD+=" --num_warmup_steps=$warmup_steps"
			
 
				-CMD+=" --save_checkpoints_steps=$save_checkpoints_steps"
			
 
				-CMD+=" --learning_rate=$learning_rate"
			
 
				-CMD+=" --report_loss"
			
 
				-CMD+=" --horovod $PREC"
			
 
				-
			
 
				-if [ $num_gpus -gt 1 ] ; then
			
 
				-   CMD="mpiexec --allow-run-as-root -np $num_gpus --bind-to socket $CMD"
			
 
				-fi
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-set -x
			
 
				-if [ -z "$LOGFILE" ] ; then
			
 
				-   $CMD
			
 
				-else
			
 
				-   (
			
 
				-     $CMD
			
 
				-   ) |& tee $LOGFILE
			
 
				-fi
			
 
				-set +x
			
--- a/TensorFlow/LanguageModeling/BERT/scripts/run_pretraining_adam.sh
+++ b/TensorFlow/LanguageModeling/BERT/scripts/run_pretraining_adam.sh
@@ -0,0 +1,111 @@
 
				+#! /bin/bash
			
 
				+
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+echo "Container nvidia build = " $NVIDIA_BUILD_ID
			
 
				+
			
 
				+train_batch_size=${1:-14}
			
 
				+eval_batch_size=${2:-8}
			
 
				+learning_rate=${3:-"1e-4"}
			
 
				+precision=${4:-"manual_fp16"}
			
 
				+use_xla=${5:-"true"}
			
 
				+num_gpus=${6:-8}
			
 
				+warmup_steps=${7:-"10000"}
			
 
				+train_steps=${8:-1144000}
			
 
				+save_checkpoints_steps=${9:-5000}
			
 
				+bert_model=${10:-"large"}
			
 
				+num_accumulation_steps=${11:-1}
			
 
				+seq_len=${12:-512}
			
 
				+max_pred_per_seq=${13:-80}
			
 
				+
			
 
				+DATA_DIR=data/tfrecord/lower_case_1_seq_len_${seq_len}_max_pred_${max_pred_per_seq}_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5_shard_1472_test_split_10/books_wiki_en_corpus
			
 
				+
			
 
				+if [ "$bert_model" = "large" ] ; then
			
 
				+    export BERT_CONFIG=data/download/google_pretrained_weights/uncased_L-24_H-1024_A-16/bert_config.json
			
 
				+else
			
 
				+    export BERT_CONFIG=data/download/google_pretrained_weights/uncased_L-12_H-768_A-12/bert_config.json
			
 
				+fi
			
 
				+
			
 
				+PREC=""
			
 
				+if [ "$precision" = "fp16" ] ; then
			
 
				+   PREC="--use_fp16"
			
 
				+elif [ "$precision" = "fp32" ] ; then
			
 
				+   PREC=""
			
 
				+elif [ "$precision" = "manual_fp16" ] ; then
			
 
				+   PREC="--manual_fp16"
			
 
				+else
			
 
				+   echo "Unknown <precision> argument"
			
 
				+   exit -2
			
 
				+fi
			
 
				+
			
 
				+if [ "$use_xla" = "true" ] ; then
			
 
				+    PREC="$PREC --use_xla"
			
 
				+    echo "XLA activated"
			
 
				+fi
			
 
				+
			
 
				+export GBS=$(expr $train_batch_size \* $num_gpus \* $num_accumulation_steps)
			
 
				+printf -v TAG "tf_bert_pretraining_adam_%s_%s_gbs%d" "$bert_model" "$precision" $GBS
			
 
				+DATESTAMP=`date +'%y%m%d%H%M%S'`
			
 
				+
			
 
				+#Edit to save logs & checkpoints in a different directory
			
 
				+RESULTS_DIR=${RESULTS_DIR:-/results/${TAG}_${DATESTAMP}}
			
 
				+LOGFILE=$RESULTS_DIR/$TAG.$DATESTAMP.log
			
 
				+mkdir -m 777 -p $RESULTS_DIR
			
 
				+printf "Saving checkpoints to %s\n" "$RESULTS_DIR"
			
 
				+printf "Logs written to %s\n" "$LOGFILE"
			
 
				+
			
 
				+INPUT_FILES="$DATA_DIR/training"
			
 
				+EVAL_FILES="$DATA_DIR/test"
			
 
				+
			
 
				+CMD="python3 /workspace/bert/run_pretraining.py"
			
 
				+CMD+=" --input_files_dir=$INPUT_FILES"
			
 
				+CMD+=" --eval_files_dir=$EVAL_FILES"
			
 
				+CMD+=" --output_dir=$RESULTS_DIR"
			
 
				+CMD+=" --bert_config_file=$BERT_CONFIG"
			
 
				+CMD+=" --do_train=True"
			
 
				+CMD+=" --do_eval=True"
			
 
				+CMD+=" --train_batch_size=$train_batch_size"
			
 
				+CMD+=" --eval_batch_size=$eval_batch_size"
			
 
				+CMD+=" --max_seq_length=$seq_len"
			
 
				+CMD+=" --max_predictions_per_seq=$max_pred_per_seq"
			
 
				+CMD+=" --num_train_steps=$train_steps"
			
 
				+CMD+=" --num_warmup_steps=$warmup_steps"
			
 
				+CMD+=" --num_accumulation_steps=$num_accumulation_steps"
			
 
				+CMD+=" --save_checkpoints_steps=$save_checkpoints_steps"
			
 
				+CMD+=" --learning_rate=$learning_rate"
			
 
				+CMD+=" --optimizer_type=adam"
			
 
				+CMD+=" --horovod $PREC"
			
 
				+CMD+=" --allreduce_post_accumulation=True"
			
 
				+
			
 
				+#Check if all necessary files are available before training
			
 
				+for DIR_or_file in $DATA_DIR $BERT_CONFIG $RESULTS_DIR; do
			
 
				+  if [ ! -d "$DIR_or_file" ] && [ ! -f "$DIR_or_file" ]; then
			
 
				+     echo "Error! $DIR_or_file directory missing. Please mount correctly"
			
 
				+     exit -1
			
 
				+  fi
			
 
				+done
			
 
				+
			
 
				+if [ $num_gpus -gt 1 ] ; then
			
 
				+   CMD="mpiexec --allow-run-as-root -np $num_gpus --bind-to socket $CMD"
			
 
				+fi
			
 
				+
			
 
				+set -x
			
 
				+if [ -z "$LOGFILE" ] ; then
			
 
				+   $CMD
			
 
				+else
			
 
				+   (
			
 
				+     $CMD
			
 
				+   ) |& tee $LOGFILE
			
 
				+fi
			
 
				+set +x
			
--- a/TensorFlow/LanguageModeling/BERT/scripts/run_pretraining_lamb.sh
+++ b/TensorFlow/LanguageModeling/BERT/scripts/run_pretraining_lamb.sh
@@ -0,0 +1,60 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+echo "Container nvidia build = " $NVIDIA_BUILD_ID
			
 
				+
			
 
				+train_batch_size_phase1=${1:-64}
			
 
				+train_batch_size_phase2=${2:-8}
			
 
				+eval_batch_size=${3:-8}
			
 
				+learning_rate_phase1=${4:-"7.5e-4"}
			
 
				+learning_rate_phase2=${5:-"5e-4"}
			
 
				+precision=${6:-"fp16"}
			
 
				+use_xla=${7:-"true"}
			
 
				+num_gpus=${8:-8}
			
 
				+warmup_steps_phase1=${9:-"2000"}
			
 
				+warmup_steps_phase2=${10:-"200"}
			
 
				+train_steps=${11:-7820}
			
 
				+save_checkpoints_steps=${12:-100}
			
 
				+num_accumulation_steps_phase1=${13:-128}
			
 
				+num_accumulation_steps_phase2=${14:-512}
			
 
				+bert_model=${15:-"large"}
			
 
				+
			
 
				+DATA_DIR=data
			
 
				+export DATA_DIR=$DATA_DIR
			
 
				+
			
 
				+GBS1=$(expr $train_batch_size_phase1 \* $num_gpus \* $num_accumulation_steps_phase1)
			
 
				+GBS2=$(expr $train_batch_size_phase2 \* $num_gpus \* $num_accumulation_steps_phase2)
			
 
				+printf -v TAG "tf_bert_pretraining_lamb_%s_%s_gbs1%d_gbs2%d" "$bert_model" "$precision" $GBS1 $GBS2
			
 
				+DATESTAMP=`date +'%y%m%d%H%M%S'`
			
 
				+
			
 
				+#Edit to save logs & checkpoints in a different directory
			
 
				+RESULTS_DIR=${RESULTS_DIR:-/results/${TAG}_${DATESTAMP}}
			
 
				+LOGFILE=$RESULTS_DIR/$TAG.$DATESTAMP.log
			
 
				+mkdir -m 777 -p $RESULTS_DIR
			
 
				+printf "Saving checkpoints to %s\n" "$RESULTS_DIR"
			
 
				+printf "Logs written to %s\n" "$LOGFILE"
			
 
				+export RESULTS_DIR=$RESULTS_DIR
			
 
				+
			
 
				+printf -v SCRIPT_ARGS "%d %d %d %e %e %s %s %d %d %d %d %d %d %d %s %s" \
			
 
				+                      $train_batch_size_phase1 $train_batch_size_phase2 $eval_batch_size $learning_rate_phase1 \
			
 
				+                      $learning_rate_phase2 "$precision" "$use_xla" $num_gpus $warmup_steps_phase1 \
			
 
				+                      $warmup_steps_phase2 $train_steps $save_checkpoints_steps \
			
 
				+                      $num_accumulation_steps_phase1 $num_accumulation_steps_phase2 "$bert_model"
			
 
				+
			
 
				+# RUN PHASE 1
			
 
				+bash scripts/run_pretraining_lamb_phase1.sh $SCRIPT_ARGS |& tee -a $LOGFILE
			
 
				+
			
 
				+# RUN PHASE 2
			
 
				+bash scripts/run_pretraining_lamb_phase2.sh $SCRIPT_ARGS |& tee -a $LOGFILE
			
--- a/TensorFlow/LanguageModeling/BERT/scripts/run_pretraining_lamb_phase1.sh
+++ b/TensorFlow/LanguageModeling/BERT/scripts/run_pretraining_lamb_phase1.sh
@@ -0,0 +1,103 @@
 
				+#! /bin/bash
			
 
				+
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+echo "Container nvidia build = " $NVIDIA_BUILD_ID
			
 
				+
			
 
				+train_batch_size_phase1=${1:-64}
			
 
				+train_batch_size_phase2=${2:-8}
			
 
				+eval_batch_size=${3:-8}
			
 
				+learning_rate_phase1=${4:-"7.5e-4"}
			
 
				+learning_rate_phase2=${5:-"5e-4"}
			
 
				+precision=${6:-"fp16"}
			
 
				+use_xla=${7:-"true"}
			
 
				+num_gpus=${8:-2}
			
 
				+warmup_steps_phase1=${9:-"2000"}
			
 
				+warmup_steps_phase2=${10:-"200"}
			
 
				+train_steps=${11:-7820}
			
 
				+save_checkpoints_steps=${12:-100}
			
 
				+num_accumulation_steps_phase1=${13:-128}
			
 
				+num_accumulation_steps_phase2=${14:-512}
			
 
				+bert_model=${15:-"large"}
			
 
				+
			
 
				+DATA_DIR=${DATA_DIR:-data}
			
 
				+#Edit to save logs & checkpoints in a different directory
			
 
				+RESULTS_DIR=${RESULTS_DIR:-/results}
			
 
				+
			
 
				+if [ "$bert_model" = "large" ] ; then
			
 
				+    export BERT_CONFIG=data/download/google_pretrained_weights/uncased_L-24_H-1024_A-16/bert_config.json
			
 
				+else
			
 
				+    export BERT_CONFIG=data/download/google_pretrained_weights/uncased_L-12_H-768_A-12/bert_config.json
			
 
				+fi
			
 
				+
			
 
				+PREC=""
			
 
				+if [ "$precision" = "fp16" ] ; then
			
 
				+   PREC="--use_fp16"
			
 
				+elif [ "$precision" = "fp32" ] ; then
			
 
				+   PREC=""
			
 
				+elif [ "$precision" = "manual_fp16" ] ; then
			
 
				+   PREC="--manual_fp16"
			
 
				+else
			
 
				+   echo "Unknown <precision> argument"
			
 
				+   exit -2
			
 
				+fi
			
 
				+
			
 
				+if [ "$use_xla" = "true" ] ; then
			
 
				+    PREC="$PREC --use_xla"
			
 
				+    echo "XLA activated"
			
 
				+fi
			
 
				+
			
 
				+mpi=""
			
 
				+if [ $num_gpus -gt 1 ] ; then
			
 
				+   mpi="mpiexec --allow-run-as-root -np $num_gpus --bind-to socket"
			
 
				+fi
			
 
				+
			
 
				+#PHASE 1
			
 
				+
			
 
				+train_steps_phase1=$(expr $train_steps \* 9 \/ 10) #Phase 1 is 10% of training
			
 
				+gbs_phase1=$(expr $train_batch_size_phase1 \* $num_accumulation_steps_phase1)
			
 
				+seq_len=128
			
 
				+max_pred_per_seq=20
			
 
				+RESULTS_DIR_PHASE1=${RESULTS_DIR}/phase_1
			
 
				+mkdir -m 777 -p $RESULTS_DIR_PHASE1
			
 
				+
			
 
				+INPUT_FILES="$DATA_DIR/tfrecord/lower_case_1_seq_len_${seq_len}_max_pred_${max_pred_per_seq}_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5_shard_1472_test_split_10/books_wiki_en_corpus/training"
			
 
				+EVAL_FILES="$DATA_DIR/tfrecord/lower_case_1_seq_len_${seq_len}_max_pred_${max_pred_per_seq}_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5_shard_1472_test_split_10/books_wiki_en_corpus/test"
			
 
				+
			
 
				+#Check if all necessary files are available before training
			
 
				+for DIR_or_file in $DATA_DIR $RESULTS_DIR_PHASE1 $BERT_CONFIG; do
			
 
				+  if [ ! -d "$DIR_or_file" ] && [ ! -f "$DIR_or_file" ]; then
			
 
				+     echo "Error! $DIR_or_file directory missing. Please mount correctly"
			
 
				+     exit -1
			
 
				+  fi
			
 
				+done
			
 
				+
			
 
				+ $mpi python /workspace/bert/run_pretraining.py \
			
 
				+     --input_files_dir=$INPUT_FILES \
			
 
				+     --eval_files_dir=$EVAL_FILES \
			
 
				+     --output_dir=$RESULTS_DIR_PHASE1 \
			
 
				+     --bert_config_file=$BERT_CONFIG \
			
 
				+     --do_train=True \
			
 
				+     --do_eval=True \
			
 
				+     --train_batch_size=$train_batch_size_phase1 \
			
 
				+     --eval_batch_size=$eval_batch_size \
			
 
				+     --max_seq_length=$seq_len \
			
 
				+     --max_predictions_per_seq=$max_pred_per_seq \
			
 
				+     --num_train_steps=$train_steps_phase1 \
			
 
				+     --num_accumulation_steps=$num_accumulation_steps_phase1 \
			
 
				+     --num_warmup_steps=$warmup_steps_phase1 \
			
 
				+     --save_checkpoints_steps=$save_checkpoints_steps \
			
 
				+     --learning_rate=$learning_rate_phase1 \
			
 
				+     --horovod $PREC \
			
 
				+     --allreduce_post_accumulation=True
			
--- a/TensorFlow/LanguageModeling/BERT/scripts/run_pretraining_lamb_phase2.sh
+++ b/TensorFlow/LanguageModeling/BERT/scripts/run_pretraining_lamb_phase2.sh
@@ -0,0 +1,115 @@
 
				+#! /bin/bash
			
 
				+
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+echo "Container nvidia build = " $NVIDIA_BUILD_ID
			
 
				+
			
 
				+train_batch_size_phase1=${1:-64}
			
 
				+train_batch_size_phase2=${2:-8}
			
 
				+eval_batch_size=${3:-8}
			
 
				+learning_rate_phase1=${4:-"7.5e-4"}
			
 
				+learning_rate_phase2=${5:-"5e-4"}
			
 
				+precision=${6:-"fp16"}
			
 
				+use_xla=${7:-"true"}
			
 
				+num_gpus=${8:-2}
			
 
				+warmup_steps_phase1=${9:-"2000"}
			
 
				+warmup_steps_phase2=${10:-"200"}
			
 
				+train_steps=${11:-7820}
			
 
				+save_checkpoints_steps=${12:-100}
			
 
				+num_accumulation_steps_phase1=${13:-128}
			
 
				+num_accumulation_steps_phase2=${14:-512}
			
 
				+bert_model=${15:-"large"}
			
 
				+
			
 
				+DATA_DIR=${DATA_DIR:-data}
			
 
				+#Edit to save logs & checkpoints in a different directory
			
 
				+RESULTS_DIR=${RESULTS_DIR:-/results}
			
 
				+
			
 
				+if [ "$bert_model" = "large" ] ; then
			
 
				+    export BERT_CONFIG=data/download/google_pretrained_weights/uncased_L-24_H-1024_A-16/bert_config.json
			
 
				+else
			
 
				+    export BERT_CONFIG=data/download/google_pretrained_weights/uncased_L-12_H-768_A-12/bert_config.json
			
 
				+fi
			
 
				+
			
 
				+echo "Container nvidia build = " $NVIDIA_BUILD_ID
			
 
				+
			
 
				+PREC=""
			
 
				+if [ "$precision" = "fp16" ] ; then
			
 
				+   PREC="--use_fp16"
			
 
				+elif [ "$precision" = "fp32" ] ; then
			
 
				+   PREC=""
			
 
				+elif [ "$precision" = "manual_fp16" ] ; then
			
 
				+   PREC="--manual_fp16"
			
 
				+else
			
 
				+   echo "Unknown <precision> argument"
			
 
				+   exit -2
			
 
				+fi
			
 
				+
			
 
				+if [ "$use_xla" = "true" ] ; then
			
 
				+    PREC="$PREC --use_xla"
			
 
				+    echo "XLA activated"
			
 
				+fi
			
 
				+
			
 
				+mpi=""
			
 
				+if [ $num_gpus -gt 1 ] ; then
			
 
				+   mpi="mpiexec --allow-run-as-root -np $num_gpus --bind-to socket"
			
 
				+fi
			
 
				+
			
 
				+#PHASE 1 Config
			
 
				+
			
 
				+train_steps_phase1=$(expr $train_steps \* 9 \/ 10) #Phase 1 is 10% of training
			
 
				+gbs_phase1=$(expr $train_batch_size_phase1 \* $num_accumulation_steps_phase1)
			
 
				+PHASE1_CKPT=${RESULTS_DIR}/phase_1/model.ckpt-${train_steps_phase1}
			
 
				+
			
 
				+#PHASE 2
			
 
				+
			
 
				+seq_len=512
			
 
				+max_pred_per_seq=80
			
 
				+train_steps_phase2=$(expr $train_steps \* 1 \/ 10) #Phase 2 is 10% of training
			
 
				+gbs_phase2=$(expr $train_batch_size_phase2 \* $num_accumulation_steps_phase2)
			
 
				+train_steps_phase2=$(expr $train_steps_phase2 \* $gbs_phase1 \/ $gbs_phase2) # Adjust for batch size
			
 
				+
			
 
				+RESULTS_DIR_PHASE2=${RESULTS_DIR}/phase_2
			
 
				+mkdir -m 777 -p $RESULTS_DIR_PHASE2
			
 
				+
			
 
				+INPUT_FILES="$DATA_DIR/tfrecord/lower_case_1_seq_len_${seq_len}_max_pred_${max_pred_per_seq}_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5_shard_1472_test_split_10/books_wiki_en_corpus/training"
			
 
				+EVAL_FILES="$DATA_DIR/tfrecord/lower_case_1_seq_len_${seq_len}_max_pred_${max_pred_per_seq}_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5_shard_1472_test_split_10/books_wiki_en_corpus/test"
			
 
				+
			
 
				+#Check if all necessary files are available before training
			
 
				+for DIR_or_file in $DATA_DIR $RESULTS_DIR $BERT_CONFIG ${PHASE1_CKPT}.meta; do
			
 
				+  if [ ! -d "$DIR_or_file" ] && [ ! -f "$DIR_or_file" ]; then
			
 
				+     echo "Error! $DIR_or_file directory missing. Please mount correctly"
			
 
				+     exit -1
			
 
				+  fi
			
 
				+done
			
 
				+
			
 
				+$mpi python /workspace/bert/run_pretraining.py \
			
 
				+    --input_files_dir=$INPUT_FILES \
			
 
				+    --init_checkpoint=$PHASE1_CKPT \
			
 
				+    --eval_files_dir=$EVAL_FILES \
			
 
				+    --output_dir=$RESULTS_DIR_PHASE2 \
			
 
				+    --bert_config_file=$BERT_CONFIG \
			
 
				+    --do_train=True \
			
 
				+    --do_eval=True \
			
 
				+    --train_batch_size=$train_batch_size_phase2 \
			
 
				+    --eval_batch_size=$eval_batch_size \
			
 
				+    --max_seq_length=$seq_len \
			
 
				+    --max_predictions_per_seq=$max_pred_per_seq \
			
 
				+    --num_train_steps=$train_steps_phase2 \
			
 
				+    --num_accumulation_steps=$num_accumulation_steps_phase2 \
			
 
				+    --num_warmup_steps=$warmup_steps_phase2 \
			
 
				+    --save_checkpoints_steps=$save_checkpoints_steps \
			
 
				+    --learning_rate=$learning_rate_phase2 \
			
 
				+    --horovod $PREC \
			
 
				+    --allreduce_post_accumulation=True
			
 
				+
			
--- a/TensorFlow/LanguageModeling/BERT/scripts/run_squad.sh
+++ b/TensorFlow/LanguageModeling/BERT/scripts/run_squad.sh
@@ -1,5 +1,18 @@
 
				 #!/usr/bin/env bash
			
 
				 
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				 echo "Container nvidia build = " $NVIDIA_BUILD_ID
			
 
				 
			
 
				 batch_size=${1:-"8"}
			
@@ -12,14 +25,14 @@ doc_stride=${7:-"128"}
 
				 bert_model=${8:-"large"}
			
 
				 
			
 
				 if [ "$bert_model" = "large" ] ; then
			
 
				-    export BERT_DIR=data/pretrained_models_google/uncased_L-24_H-1024_A-16
			
 
				+    export BERT_DIR=data/download/google_pretrained_weights/uncased_L-24_H-1024_A-16
			
 
				 else
			
 
				-    export BERT_DIR=data/pretrained_models_google/uncased_L-12_H-768_A-12
			
 
				+    export BERT_DIR=data/download/google_pretrained_weights/uncased_L-12_H-768_A-12
			
 
				 fi
			
 
				 
			
 
				 squad_version=${9:-"1.1"}
			
 
				 
			
 
				-export SQUAD_DIR=data/squad/v${squad_version}
			
 
				+export SQUAD_DIR=data/download/squad/v${squad_version}
			
 
				 if [ "$squad_version" = "1.1" ] ; then
			
 
				     version_2_with_negative="False"
			
 
				 else
			
@@ -29,29 +42,11 @@ fi
 
				 init_checkpoint=${10:-"$BERT_DIR/bert_model.ckpt"}
			
 
				 epochs=${11:-"2.0"}
			
 
				 
			
 
				-#Edit to save logs & checkpoints in a different directory
			
 
				-RESULTS_DIR=/results
			
 
				-
			
 
				-if [ ! -d "$SQUAD_DIR" ] ; then
			
 
				-   echo "Error! $SQUAD_DIR directory missing. Please mount SQuAD dataset."
			
 
				-   exit -1
			
 
				-fi
			
 
				-if [ ! -d "$BERT_DIR" ] ; then
			
 
				-   echo "Error! $BERT_DIR directory missing. Please mount pretrained BERT dataset."
			
 
				-   exit -1
			
 
				-fi
			
 
				-if [ ! -d "$RESULTS_DIR" ] ; then
			
 
				-   echo "Error! $RESULTS_DIR directory missing."
			
 
				-   exit -1
			
 
				-fi
			
 
				-
			
 
				 echo "Squad directory set as " $SQUAD_DIR " BERT directory set as " $BERT_DIR
			
 
				-echo "Results directory set as " $RESULTS_DIR
			
 
				 
			
 
				 use_fp16=""
			
 
				 if [ "$precision" = "fp16" ] ; then
			
 
				         echo "fp16 activated!"
			
 
				-        export TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE=1
			
 
				         use_fp16="--use_fp16"
			
 
				 fi
			
 
				 
			
@@ -68,40 +63,45 @@ if [ $num_gpu -gt 1 ] ; then
 
				     -x NCCL_DEBUG=INFO \
			
 
				     -x LD_LIBRARY_PATH \
			
 
				     -x PATH -mca pml ob1 -mca btl ^openib"
			
 
				-    use_hvd="--horovod"
			
 
				 else
			
 
				     mpi_command=""
			
 
				-    use_hvd=""
			
 
				 fi
			
 
				 
			
 
				+export GBS=$(expr $batch_size \* $num_gpu)
			
 
				+printf -v TAG "tf_bert_finetuning_squad_%s_%s_gbs%d" "$bert_model" "$precision" $GBS
			
 
				+DATESTAMP=`date +'%y%m%d%H%M%S'`
			
 
				 
			
 
				-  export GBS=$(expr $batch_size \* $num_gpu)
			
 
				-  printf -v TAG "tf_bert_%s_squad_1n_%s_gbs%d" "$bert_model" "$precision" $GBS
			
 
				-  DATESTAMP=`date +'%y%m%d%H%M%S'`
			
 
				-
			
 
				-  RESULTS_DIR=${RESULTS_DIR}/${TAG}_${DATESTAMP}
			
 
				-  mkdir $RESULTS_DIR
			
 
				-  LOGFILE=$RESULTS_DIR/$TAG.$DATESTAMP.log
			
 
				-  printf "Saving checkpoints to %s\n" "$RESULTS_DIR"
			
 
				-  printf "Writing logs to %s\n" "$LOGFILE"
			
 
				-
			
 
				-    $mpi_command python run_squad.py \
			
 
				-    --vocab_file=$BERT_DIR/vocab.txt \
			
 
				-    --bert_config_file=$BERT_DIR/bert_config.json \
			
 
				-    --init_checkpoint=$init_checkpoint \
			
 
				-    --do_train=True \
			
 
				-    --train_file=$SQUAD_DIR/train-v${squad_version}.json \
			
 
				-    --do_predict=True \
			
 
				-    --predict_file=$SQUAD_DIR/dev-v${squad_version}.json \
			
 
				-    --train_batch_size=$batch_size \
			
 
				-    --learning_rate=$learning_rate \
			
 
				-    --num_train_epochs=$epochs \
			
 
				-    --max_seq_length=$seq_length \
			
 
				-    --doc_stride=$doc_stride \
			
 
				-    --save_checkpoints_steps 1000 \
			
 
				-    --output_dir=$RESULTS_DIR \
			
 
				-    "$use_hvd" \
			
 
				-    "$use_fp16" \
			
 
				-    $use_xla_tag --version_2_with_negative=${version_2_with_negative} |& tee $LOGFILE
			
 
				+#Edit to save logs & checkpoints in a different directory
			
 
				+RESULTS_DIR=/results/${TAG}_${DATESTAMP}
			
 
				+LOGFILE=$RESULTS_DIR/$TAG.$DATESTAMP.log
			
 
				+mkdir -m 777 -p $RESULTS_DIR
			
 
				+printf "Saving checkpoints to %s\n" "$RESULTS_DIR"
			
 
				+printf "Logs written to %s\n" "$LOGFILE"
			
 
				+
			
 
				+#Check if all necessary files are available before training
			
 
				+for DIR_or_file in $SQUAD_DIR $RESULTS_DIR $BERT_DIR/bert_config.json $BERT_DIR/vocab.txt; do
			
 
				+  if [ ! -d "$DIR_or_file" ] && [ ! -f "$DIR_or_file" ]; then
			
 
				+     echo "Error! $DIR_or_file directory missing. Please mount correctly"
			
 
				+     exit -1
			
 
				+  fi
			
 
				+done
			
 
				+
			
 
				+$mpi_command python run_squad.py \
			
 
				+--vocab_file=$BERT_DIR/vocab.txt \
			
 
				+--bert_config_file=$BERT_DIR/bert_config.json \
			
 
				+--init_checkpoint=$init_checkpoint \
			
 
				+--do_train=True \
			
 
				+--train_file=$SQUAD_DIR/train-v${squad_version}.json \
			
 
				+--do_predict=True \
			
 
				+--predict_file=$SQUAD_DIR/dev-v${squad_version}.json \
			
 
				+--train_batch_size=$batch_size \
			
 
				+--learning_rate=$learning_rate \
			
 
				+--num_train_epochs=$epochs \
			
 
				+--max_seq_length=$seq_length \
			
 
				+--doc_stride=$doc_stride \
			
 
				+--save_checkpoints_steps 1000 \
			
 
				+--output_dir=$RESULTS_DIR \
			
 
				+--horovod "$use_fp16" \
			
 
				+$use_xla_tag --version_2_with_negative=${version_2_with_negative} |& tee $LOGFILE
			
 
				 
			
 
				 python $SQUAD_DIR/evaluate-v${squad_version}.py $SQUAD_DIR/dev-v${squad_version}.json ${RESULTS_DIR}/predictions.json |& tee -a $LOGFILE
			
--- a/TensorFlow/LanguageModeling/BERT/scripts/run_squad_inference.sh
+++ b/TensorFlow/LanguageModeling/BERT/scripts/run_squad_inference.sh
@@ -1,5 +1,18 @@
 
				 #!/usr/bin/env bash
			
 
				 
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				 echo "Container nvidia build = " $NVIDIA_BUILD_ID
			
 
				 
			
 
				 init_checkpoint=${1:-"/results/model.ckpt"}
			
@@ -12,33 +25,18 @@ bert_model=${7:-"large"}
 
				 squad_version=${8:-"1.1"}
			
 
				 
			
 
				 if [ "$bert_model" = "large" ] ; then
			
 
				-    export BERT_DIR=data/pretrained_models_google/uncased_L-24_H-1024_A-16
			
 
				+    export BERT_DIR=data/download/google_pretrained_weights/uncased_L-24_H-1024_A-16
			
 
				 else
			
 
				-    export BERT_DIR=data/pretrained_models_google/uncased_L-12_H-768_A-12
			
 
				+    export BERT_DIR=data/download/google_pretrained_weights/uncased_L-12_H-768_A-12
			
 
				 fi
			
 
				 
			
 
				-export SQUAD_DIR=data/squad/v${squad_version}
			
 
				+export SQUAD_DIR=data/download/squad/v${squad_version}
			
 
				 if [ "$squad_version" = "1.1" ] ; then
			
 
				     version_2_with_negative="False"
			
 
				 else
			
 
				     version_2_with_negative="True"
			
 
				 fi
			
 
				 
			
 
				-#Edit to save logs & checkpoints in a different directory
			
 
				-RESULTS_DIR=/results
			
 
				-
			
 
				-if [ ! -d "$SQUAD_DIR" ] ; then
			
 
				-   echo "Error! $SQUAD_DIR directory missing. Please mount SQuAD dataset."
			
 
				-   exit -1
			
 
				-fi
			
 
				-if [ ! -d "$BERT_DIR" ] ; then
			
 
				-   echo "Error! $BERT_DIR directory missing. Please mount pretrained BERT dataset."
			
 
				-   exit -1
			
 
				-fi
			
 
				-if [ ! -d "$RESULTS_DIR" ] ; then
			
 
				-   echo "Error! $RESULTS_DIR directory missing."
			
 
				-   exit -1
			
 
				-fi
			
 
				 
			
 
				 echo "Squad directory set as " $SQUAD_DIR " BERT directory set as " $BERT_DIR
			
 
				 echo "Results directory set as " $RESULTS_DIR
			
@@ -46,7 +44,6 @@ echo "Results directory set as " $RESULTS_DIR
 
				 use_fp16=""
			
 
				 if [ "$precision" = "fp16" ] ; then
			
 
				         echo "fp16 activated!"
			
 
				-        export TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE=1
			
 
				         use_fp16="--use_fp16"
			
 
				 fi
			
 
				 
			
@@ -57,10 +54,20 @@ else
 
				     use_xla_tag=""
			
 
				 fi
			
 
				 
			
 
				-  printf -v TAG "tf_bert_%s_squad_inf_1n_%s_gbs%d_ckpt_%s" "$bert_model" "$precision" $batch_size "$init_checkpoint"
			
 
				-  DATESTAMP=`date +'%y%m%d%H%M%S'`
			
 
				-  LOGFILE=$RESULTS_DIR/$TAG.$DATESTAMP.log
			
 
				-  printf "Writing logs to %s\n" "$LOGFILE"
			
 
				+printf -v TAG "tf_bert_finetuning_squad_%s_inf_%s_gbs%d_ckpt_%s" "$bert_model" "$precision" $batch_size "$init_checkpoint"
			
 
				+DATESTAMP=`date +'%y%m%d%H%M%S'`
			
 
				+#Edit to save logs & checkpoints in a different directory
			
 
				+RESULTS_DIR=/results
			
 
				+LOGFILE=$RESULTS_DIR/$TAG.$DATESTAMP.log
			
 
				+printf "Logs written to %s\n" "$LOGFILE"
			
 
				+
			
 
				+#Check if all necessary files are available before training
			
 
				+for DIR_or_file in $SQUAD_DIR $RESULTS_DIR $BERT_DIR/vocab.txt $BERT_DIR/bert_config.json; do
			
 
				+  if [ ! -d "$DIR_or_file" ] && [ ! -f "$DIR_or_file" ]; then
			
 
				+     echo "Error! $DIR_or_file directory missing. Please mount correctly"
			
 
				+     exit -1
			
 
				+  fi
			
 
				+done
			
 
				 
			
 
				 python run_squad.py \
			
 
				 --vocab_file=$BERT_DIR/vocab.txt \
			
@@ -68,6 +75,7 @@ python run_squad.py \
 
				 --init_checkpoint=$init_checkpoint \
			
 
				 --do_predict=True \
			
 
				 --predict_file=$SQUAD_DIR/dev-v${squad_version}.json \
			
 
				+--predict_batch_size=$batch_size \
			
 
				 --max_seq_length=$seq_length \
			
 
				 --doc_stride=$doc_stride \
			
 
				 --predict_batch_size=$batch_size \
			
--- a/TensorFlow/LanguageModeling/BERT/scripts/trtis/export_model.sh
+++ b/TensorFlow/LanguageModeling/BERT/scripts/trtis/export_model.sh
@@ -1,10 +1,25 @@
 
				-init_checkpoint=${1:-"/results/model.ckpt"}
			
 
				+#!/bin/bash
			
 
				+
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+init_checkpoint=${1:-"/results/models/bert_large_fp16_384_v1/model.ckpt-5474"}
			
 
				 batch_size=${2:-"8"}
			
 
				 precision=${3:-"fp16"}
			
 
				 use_xla=${4:-"true"}
			
 
				 seq_length=${5:-"384"}
			
 
				 doc_stride=${6:-"128"}
			
 
				-BERT_DIR=${7:-"data/pretrained_models_google/uncased_L-24_H-1024_A-16"}
			
 
				+BERT_DIR=${7:-"data/download/google_pretrained_weights/uncased_L-24_H-1024_A-16"}
			
 
				 trtis_model_version=${8:-1}
			
 
				 trtis_model_name=${9:-"bert"}
			
 
				 trtis_dyn_batching_delay=${10:-0}
			
@@ -17,7 +32,6 @@ additional_args="--trtis_model_version=$trtis_model_version --trtis_model_name=$
 
				 
			
 
				 if [ "$precision" = "fp16" ] ; then
			
 
				    echo "fp16 activated!"
			
 
				-   export TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE=1
			
 
				    additional_args="$additional_args --use_fp16"
			
 
				 fi
			
 
				 
			
--- a/TensorFlow/LanguageModeling/BERT/scripts/trtis/generate_figures.sh
+++ b/TensorFlow/LanguageModeling/BERT/scripts/trtis/generate_figures.sh
@@ -1,3 +1,18 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				 # Set the number of devices to use
			
 
				 export NVIDIA_VISIBLE_DEVICES=0
			
 
				 
			
@@ -12,9 +27,9 @@ init_checkpoint=${4:-"/results/models/bert_tf_${bert_model}_${precision}_${seq_l
 
				 MODEL_NAME="bert_${bert_model}_${seq_length}_${precision}"
			
 
				 
			
 
				 if [ "$bert_model" = "large" ] ; then
			
 
				-    export BERT_DIR=data/pretrained_models_google/uncased_L-24_H-1024_A-16
			
 
				+    export BERT_DIR=data/download/google_pretrained_weights/uncased_L-24_H-1024_A-16
			
 
				 else
			
 
				-    export BERT_DIR=data/pretrained_models_google/uncased_L-12_H-768_A-12
			
 
				+    export BERT_DIR=data/download/google_pretrained_weights/uncased_L-12_H-768_A-12
			
 
				 fi
			
 
				 
			
 
				 doc_stride=128
			
--- a/TensorFlow/LanguageModeling/BERT/scripts/trtis/run_client.sh
+++ b/TensorFlow/LanguageModeling/BERT/scripts/trtis/run_client.sh
@@ -1,12 +1,27 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				 batch_size=${1:-"8"}
			
 
				 seq_length=${2:-"384"}
			
 
				 doc_stride=${3:-"128"}
			
 
				 trtis_version_name=${4:-"1"}
			
 
				 trtis_model_name=${5:-"bert"}
			
 
				-BERT_DIR=${6:-"data/pretrained_models_google/uncased_L-24_H-1024_A-16"}
			
 
				+BERT_DIR=${6:-"data/download/google_pretrained_weights/uncased_L-24_H-1024_A-16"}
			
 
				 squad_version=${7:-"1.1"}
			
 
				 
			
 
				-export SQUAD_DIR=data/squad/v${squad_version}
			
 
				+export SQUAD_DIR=data/download/squad/v${squad_version}
			
 
				 if [ "$squad_version" = "1.1" ] ; then
			
 
				     version_2_with_negative="False"
			
 
				 else
			
--- a/TensorFlow/LanguageModeling/BERT/scripts/trtis/run_perf_client.sh
+++ b/TensorFlow/LanguageModeling/BERT/scripts/trtis/run_perf_client.sh
@@ -1,6 +1,18 @@
 
				-
			
 
				 #!/bin/bash
			
 
				 
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				 MODEL_NAME=${1:-"bert"}
			
 
				 MODEL_VERSION=${2:-1}
			
 
				 precision=${3:-"fp16"}
			
--- a/TensorFlow/LanguageModeling/BERT/scripts/trtis/run_trtis.sh
+++ b/TensorFlow/LanguageModeling/BERT/scripts/trtis/run_trtis.sh
@@ -1,4 +1,19 @@
 
				-init_checkpoint=${1:-"/results/model.ckpt"}
			
 
				+#!/bin/bash
			
 
				+
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+init_checkpoint=${1:-"/results/models/bert_large_fp16_384_v1/model.ckpt-5474"}
			
 
				 batch_size=${2:-"8"}
			
 
				 precision=${3:-"fp16"}
			
 
				 use_xla=${4:-"true"}
			
@@ -14,9 +29,9 @@ trtis_engine_count=${13:-1}
 
				 trtis_model_overwrite=${14:-"False"}
			
 
				 
			
 
				 if [ "$bert_model" = "large" ] ; then
			
 
				-    export BERT_DIR=data/pretrained_models_google/uncased_L-24_H-1024_A-16
			
 
				+    export BERT_DIR=data/download/google_pretrained_weights/uncased_L-24_H-1024_A-16
			
 
				 else
			
 
				-    export BERT_DIR=data/pretrained_models_google/uncased_L-12_H-768_A-12
			
 
				+    export BERT_DIR=data/download/google_pretrained_weights/uncased_L-12_H-768_A-12
			
 
				 fi
			
 
				 
			
 
				 if [ ! -d "$BERT_DIR" ] ; then
			
--- a/TensorFlow/LanguageModeling/BERT/scripts/trtis/wait_for_trtis_server.sh
+++ b/TensorFlow/LanguageModeling/BERT/scripts/trtis/wait_for_trtis_server.sh
@@ -1,5 +1,18 @@
 
				 #!/bin/bash
			
 
				 
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				 SERVER_URI=${1:-"localhost"}
			
 
				 
			
 
				 echo "Waiting for TRTIS Server to be ready at http://$SERVER_URI:8000..."
			
--- a/TensorFlow/LanguageModeling/BERT/tokenization.py
+++ b/TensorFlow/LanguageModeling/BERT/tokenization.py
@@ -1,5 +1,6 @@
 
				 # coding=utf-8
			
 
				-# Copyright 2018 The Google AI Language Team Authors.
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
			
 
				 #
			
 
				 # Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 # you may not use this file except in compliance with the License.
			
@@ -12,6 +13,7 @@
 
				 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				+
			
 
				 """Tokenization classes."""
			
 
				 
			
 
				 from __future__ import absolute_import
			
@@ -23,6 +25,18 @@ import unicodedata
 
				 import six
			
 
				 import tensorflow as tf
			
 
				 import re
			
 
				+import os
			
 
				+
			
 
				+
			
 
				+PRETRAINED_VOCAB_ARCHIVE_MAP = {
			
 
				+    'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt",
			
 
				+    'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt",
			
 
				+    'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt",
			
 
				+    'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt",
			
 
				+    'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-vocab.txt",
			
 
				+    'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt",
			
 
				+    'bert-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-vocab.txt",
			
 
				+}
			
 
				 
			
 
				 def validate_case_matches_checkpoint(do_lower_case, init_checkpoint):
			
 
				   """Checks whether the casing config is consistent with the checkpoint name."""
			
@@ -76,61 +90,41 @@ def validate_case_matches_checkpoint(do_lower_case, init_checkpoint):
 
				 
			
 
				 
			
 
				 def convert_to_unicode(text):
			
 
				-  """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
			
 
				-  if six.PY3:
			
 
				+    """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
			
 
				     if isinstance(text, str):
			
 
				-      return text
			
 
				+        return text
			
 
				     elif isinstance(text, bytes):
			
 
				-      return text.decode("utf-8", "ignore")
			
 
				+        return text.decode("utf-8", "ignore")
			
 
				     else:
			
 
				-      raise ValueError("Unsupported string type: %s" % (type(text)))
			
 
				-  elif six.PY2:
			
 
				-    if isinstance(text, str):
			
 
				-      return text.decode("utf-8", "ignore")
			
 
				-    elif isinstance(text, unicode):
			
 
				-      return text
			
 
				-    else:
			
 
				-      raise ValueError("Unsupported string type: %s" % (type(text)))
			
 
				-  else:
			
 
				-    raise ValueError("Not running on Python2 or Python 3?")
			
 
				+        raise ValueError("Unsupported string type: %s" % (type(text)))
			
 
				 
			
 
				 
			
 
				 def printable_text(text):
			
 
				-  """Returns text encoded in a way suitable for print or `tf.logging`."""
			
 
				+    """Returns text encoded in a way suitable for print or `tf.logging`."""
			
 
				 
			
 
				-  # These functions want `str` for both Python2 and Python3, but in one case
			
 
				-  # it's a Unicode string and in the other it's a byte string.
			
 
				-  if six.PY3:
			
 
				+    # These functions want `str` for both Python2 and Python3, but in one case
			
 
				+    # it's a Unicode string and in the other it's a byte string.
			
 
				     if isinstance(text, str):
			
 
				-      return text
			
 
				+        return text
			
 
				     elif isinstance(text, bytes):
			
 
				-      return text.decode("utf-8", "ignore")
			
 
				-    else:
			
 
				-      raise ValueError("Unsupported string type: %s" % (type(text)))
			
 
				-  elif six.PY2:
			
 
				-    if isinstance(text, str):
			
 
				-      return text
			
 
				-    elif isinstance(text, unicode):
			
 
				-      return text.encode("utf-8")
			
 
				+        return text.decode("utf-8", "ignore")
			
 
				     else:
			
 
				-      raise ValueError("Unsupported string type: %s" % (type(text)))
			
 
				-  else:
			
 
				-    raise ValueError("Not running on Python2 or Python 3?")
			
 
				+        raise ValueError("Unsupported string type: %s" % (type(text)))
			
 
				 
			
 
				 
			
 
				 def load_vocab(vocab_file):
			
 
				-  """Loads a vocabulary file into a dictionary."""
			
 
				-  vocab = collections.OrderedDict()
			
 
				-  index = 0
			
 
				-  with tf.gfile.GFile(vocab_file, "r") as reader:
			
 
				-    while True:
			
 
				-      token = convert_to_unicode(reader.readline())
			
 
				-      if not token:
			
 
				-        break
			
 
				-      token = token.strip()
			
 
				-      vocab[token] = index
			
 
				-      index += 1
			
 
				-  return vocab
			
 
				+    """Loads a vocabulary file into a dictionary."""
			
 
				+    vocab = collections.OrderedDict()
			
 
				+    index = 0
			
 
				+    with open(vocab_file, "r") as reader:
			
 
				+        while True:
			
 
				+            token = convert_to_unicode(reader.readline())
			
 
				+            if not token:
			
 
				+                break
			
 
				+            token = token.strip()
			
 
				+            vocab[token] = index
			
 
				+            index += 1
			
 
				+    return vocab
			
 
				 
			
 
				 
			
 
				 def convert_by_vocab(vocab, items):
			
@@ -141,21 +135,13 @@ def convert_by_vocab(vocab, items):
 
				   return output
			
 
				 
			
 
				 
			
 
				-def convert_tokens_to_ids(vocab, tokens):
			
 
				-  return convert_by_vocab(vocab, tokens)
			
 
				-
			
 
				-
			
 
				-def convert_ids_to_tokens(inv_vocab, ids):
			
 
				-  return convert_by_vocab(inv_vocab, ids)
			
 
				-
			
 
				-
			
 
				 def whitespace_tokenize(text):
			
 
				-  """Runs basic whitespace cleaning and splitting on a piece of text."""
			
 
				-  text = text.strip()
			
 
				-  if not text:
			
 
				-    return []
			
 
				-  tokens = text.split()
			
 
				-  return tokens
			
 
				+    """Runs basic whitespace cleaning and splitting on a peice of text."""
			
 
				+    text = text.strip()
			
 
				+    if not text:
			
 
				+        return []
			
 
				+    tokens = text.split()
			
 
				+    return tokens
			
 
				 
			
 
				 
			
 
				 class FullTokenizer(object):
			
@@ -182,131 +168,197 @@ class FullTokenizer(object):
 
				     return convert_by_vocab(self.inv_vocab, ids)
			
 
				 
			
 
				 
			
 
				+class BertTokenizer(object):
			
 
				+    """Runs end-to-end tokenization: punctuation splitting + wordpiece"""
			
 
				+
			
 
				+    def __init__(self, vocab_file, do_lower_case=True):
			
 
				+        if not os.path.isfile(vocab_file):
			
 
				+            raise ValueError(
			
 
				+                "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
			
 
				+                "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file))
			
 
				+        self.vocab = load_vocab(vocab_file)
			
 
				+        self.ids_to_tokens = collections.OrderedDict(
			
 
				+            [(ids, tok) for tok, ids in self.vocab.items()])
			
 
				+        self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case)
			
 
				+        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
			
 
				+
			
 
				+    def tokenize(self, text):
			
 
				+        split_tokens = []
			
 
				+        for token in self.basic_tokenizer.tokenize(text):
			
 
				+            for sub_token in self.wordpiece_tokenizer.tokenize(token):
			
 
				+                split_tokens.append(sub_token)
			
 
				+        return split_tokens
			
 
				+
			
 
				+    def convert_tokens_to_ids(self, tokens):
			
 
				+        """Converts a sequence of tokens into ids using the vocab."""
			
 
				+        ids = []
			
 
				+        for token in tokens:
			
 
				+            ids.append(self.vocab[token])
			
 
				+        return ids
			
 
				+
			
 
				+    def convert_ids_to_tokens(self, ids):
			
 
				+        """Converts a sequence of ids in wordpiece tokens using the vocab."""
			
 
				+        tokens = []
			
 
				+        for i in ids:
			
 
				+            tokens.append(self.ids_to_tokens[i])
			
 
				+        return tokens
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_pretrained(cls, pretrained_model_name, do_lower_case=True):
			
 
				+        """
			
 
				+    Instantiate a PreTrainedBertModel from a pre-trained model file.
			
 
				+    Download and cache the pre-trained model file if needed.
			
 
				+    """
			
 
				+        if pretrained_model_name in PRETRAINED_VOCAB_ARCHIVE_MAP:
			
 
				+            vocab_file = PRETRAINED_VOCAB_ARCHIVE_MAP[pretrained_model_name]
			
 
				+        else:
			
 
				+            vocab_file = pretrained_model_name
			
 
				+        # redirect to the cache, if necessary
			
 
				+        try:
			
 
				+            resolved_vocab_file = cached_path(vocab_file)
			
 
				+            if resolved_vocab_file == vocab_file:
			
 
				+
			
 
				+                logger.info("loading vocabulary file {}".format(vocab_file))
			
 
				+            else:
			
 
				+                logger.info("loading vocabulary file {} from cache at {}".format(
			
 
				+                    vocab_file, resolved_vocab_file))
			
 
				+            # Instantiate tokenizer.
			
 
				+            tokenizer = cls(resolved_vocab_file, do_lower_case)
			
 
				+        except FileNotFoundError:
			
 
				+            logger.error(
			
 
				+                "Model name '{}' was not found in model name list ({}). "
			
 
				+                "We assumed '{}' was a path or url but couldn't find any file "
			
 
				+                "associated to this path or url.".format(
			
 
				+                    pretrained_model_name,
			
 
				+                    ', '.join(PRETRAINED_VOCAB_ARCHIVE_MAP.keys()),
			
 
				+                    pretrained_model_name))
			
 
				+            tokenizer = None
			
 
				+        return tokenizer
			
 
				+
			
 
				+
			
 
				 class BasicTokenizer(object):
			
 
				-  """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
			
 
				+    """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
			
 
				 
			
 
				-  def __init__(self, do_lower_case=True):
			
 
				-    """Constructs a BasicTokenizer.
			
 
				+    def __init__(self, do_lower_case=True):
			
 
				+        """Constructs a BasicTokenizer.
			
 
				 
			
 
				     Args:
			
 
				       do_lower_case: Whether to lower case the input.
			
 
				     """
			
 
				-    self.do_lower_case = do_lower_case
			
 
				-
			
 
				-  def tokenize(self, text):
			
 
				-    """Tokenizes a piece of text."""
			
 
				-    text = convert_to_unicode(text)
			
 
				-    text = self._clean_text(text)
			
 
				-
			
 
				-    # This was added on November 1st, 2018 for the multilingual and Chinese
			
 
				-    # models. This is also applied to the English models now, but it doesn't
			
 
				-    # matter since the English models were not trained on any Chinese data
			
 
				-    # and generally don't have any Chinese data in them (there are Chinese
			
 
				-    # characters in the vocabulary because Wikipedia does have some Chinese
			
 
				-    # words in the English Wikipedia.).
			
 
				-    text = self._tokenize_chinese_chars(text)
			
 
				-
			
 
				-    orig_tokens = whitespace_tokenize(text)
			
 
				-    split_tokens = []
			
 
				-    for token in orig_tokens:
			
 
				-      if self.do_lower_case:
			
 
				-        token = token.lower()
			
 
				-        token = self._run_strip_accents(token)
			
 
				-      split_tokens.extend(self._run_split_on_punc(token))
			
 
				-
			
 
				-    output_tokens = whitespace_tokenize(" ".join(split_tokens))
			
 
				-    return output_tokens
			
 
				-
			
 
				-  def _run_strip_accents(self, text):
			
 
				-    """Strips accents from a piece of text."""
			
 
				-    text = unicodedata.normalize("NFD", text)
			
 
				-    output = []
			
 
				-    for char in text:
			
 
				-      cat = unicodedata.category(char)
			
 
				-      if cat == "Mn":
			
 
				-        continue
			
 
				-      output.append(char)
			
 
				-    return "".join(output)
			
 
				-
			
 
				-  def _run_split_on_punc(self, text):
			
 
				-    """Splits punctuation on a piece of text."""
			
 
				-    chars = list(text)
			
 
				-    i = 0
			
 
				-    start_new_word = True
			
 
				-    output = []
			
 
				-    while i < len(chars):
			
 
				-      char = chars[i]
			
 
				-      if _is_punctuation(char):
			
 
				-        output.append([char])
			
 
				+        self.do_lower_case = do_lower_case
			
 
				+
			
 
				+    def tokenize(self, text):
			
 
				+        """Tokenizes a piece of text."""
			
 
				+        text = convert_to_unicode(text)
			
 
				+        text = self._clean_text(text)
			
 
				+        # This was added on November 1st, 2018 for the multilingual and Chinese
			
 
				+        # models. This is also applied to the English models now, but it doesn't
			
 
				+        # matter since the English models were not trained on any Chinese data
			
 
				+        # and generally don't have any Chinese data in them (there are Chinese
			
 
				+        # characters in the vocabulary because Wikipedia does have some Chinese
			
 
				+        # words in the English Wikipedia.).
			
 
				+        text = self._tokenize_chinese_chars(text)
			
 
				+        orig_tokens = whitespace_tokenize(text)
			
 
				+        split_tokens = []
			
 
				+        for token in orig_tokens:
			
 
				+            if self.do_lower_case:
			
 
				+                token = token.lower()
			
 
				+                token = self._run_strip_accents(token)
			
 
				+            split_tokens.extend(self._run_split_on_punc(token))
			
 
				+
			
 
				+        output_tokens = whitespace_tokenize(" ".join(split_tokens))
			
 
				+        return output_tokens
			
 
				+
			
 
				+    def _run_strip_accents(self, text):
			
 
				+        """Strips accents from a piece of text."""
			
 
				+        text = unicodedata.normalize("NFD", text)
			
 
				+        output = []
			
 
				+        for char in text:
			
 
				+            cat = unicodedata.category(char)
			
 
				+            if cat == "Mn":
			
 
				+                continue
			
 
				+            output.append(char)
			
 
				+        return "".join(output)
			
 
				+
			
 
				+    def _run_split_on_punc(self, text):
			
 
				+        """Splits punctuation on a piece of text."""
			
 
				+        chars = list(text)
			
 
				+        i = 0
			
 
				         start_new_word = True
			
 
				-      else:
			
 
				-        if start_new_word:
			
 
				-          output.append([])
			
 
				-        start_new_word = False
			
 
				-        output[-1].append(char)
			
 
				-      i += 1
			
 
				-
			
 
				-    return ["".join(x) for x in output]
			
 
				-
			
 
				-  def _tokenize_chinese_chars(self, text):
			
 
				-    """Adds whitespace around any CJK character."""
			
 
				-    output = []
			
 
				-    for char in text:
			
 
				-      cp = ord(char)
			
 
				-      if self._is_chinese_char(cp):
			
 
				-        output.append(" ")
			
 
				-        output.append(char)
			
 
				-        output.append(" ")
			
 
				-      else:
			
 
				-        output.append(char)
			
 
				-    return "".join(output)
			
 
				-
			
 
				-  def _is_chinese_char(self, cp):
			
 
				-    """Checks whether CP is the codepoint of a CJK character."""
			
 
				-    # This defines a "chinese character" as anything in the CJK Unicode block:
			
 
				-    #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
			
 
				-    #
			
 
				-    # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
			
 
				-    # despite its name. The modern Korean Hangul alphabet is a different block,
			
 
				-    # as is Japanese Hiragana and Katakana. Those alphabets are used to write
			
 
				-    # space-separated words, so they are not treated specially and handled
			
 
				-    # like the all of the other languages.
			
 
				-    if ((cp >= 0x4E00 and cp <= 0x9FFF) or  #
			
 
				-        (cp >= 0x3400 and cp <= 0x4DBF) or  #
			
 
				-        (cp >= 0x20000 and cp <= 0x2A6DF) or  #
			
 
				-        (cp >= 0x2A700 and cp <= 0x2B73F) or  #
			
 
				-        (cp >= 0x2B740 and cp <= 0x2B81F) or  #
			
 
				-        (cp >= 0x2B820 and cp <= 0x2CEAF) or
			
 
				-        (cp >= 0xF900 and cp <= 0xFAFF) or  #
			
 
				-        (cp >= 0x2F800 and cp <= 0x2FA1F)):  #
			
 
				-      return True
			
 
				-
			
 
				-    return False
			
 
				-
			
 
				-  def _clean_text(self, text):
			
 
				-    """Performs invalid character removal and whitespace cleanup on text."""
			
 
				-    output = []
			
 
				-    for char in text:
			
 
				-      cp = ord(char)
			
 
				-      if cp == 0 or cp == 0xfffd or _is_control(char):
			
 
				-        continue
			
 
				-      if _is_whitespace(char):
			
 
				-        output.append(" ")
			
 
				-      else:
			
 
				-        output.append(char)
			
 
				-    return "".join(output)
			
 
				+        output = []
			
 
				+        while i < len(chars):
			
 
				+            char = chars[i]
			
 
				+            if _is_punctuation(char):
			
 
				+                output.append([char])
			
 
				+                start_new_word = True
			
 
				+            else:
			
 
				+                if start_new_word:
			
 
				+                    output.append([])
			
 
				+                start_new_word = False
			
 
				+                output[-1].append(char)
			
 
				+            i += 1
			
 
				+
			
 
				+        return ["".join(x) for x in output]
			
 
				+
			
 
				+    def _tokenize_chinese_chars(self, text):
			
 
				+        """Adds whitespace around any CJK character."""
			
 
				+        output = []
			
 
				+        for char in text:
			
 
				+            cp = ord(char)
			
 
				+            if self._is_chinese_char(cp):
			
 
				+                output.append(" ")
			
 
				+                output.append(char)
			
 
				+                output.append(" ")
			
 
				+            else:
			
 
				+                output.append(char)
			
 
				+        return "".join(output)
			
 
				+
			
 
				+    def _is_chinese_char(self, cp):
			
 
				+        """Checks whether CP is the codepoint of a CJK character."""
			
 
				+        # This defines a "chinese character" as anything in the CJK Unicode block:
			
 
				+        #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
			
 
				+        #
			
 
				+        # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
			
 
				+        # despite its name. The modern Korean Hangul alphabet is a different block,
			
 
				+        # as is Japanese Hiragana and Katakana. Those alphabets are used to write
			
 
				+        # space-separated words, so they are not treated specially and handled
			
 
				+        # like the all of the other languages.
			
 
				+        if ((cp >= 0x4E00 and cp <= 0x9FFF) or  #
			
 
				+                (cp >= 0x3400 and cp <= 0x4DBF) or  #
			
 
				+                (cp >= 0x20000 and cp <= 0x2A6DF) or  #
			
 
				+                (cp >= 0x2A700 and cp <= 0x2B73F) or  #
			
 
				+                (cp >= 0x2B740 and cp <= 0x2B81F) or  #
			
 
				+                (cp >= 0x2B820 and cp <= 0x2CEAF) or
			
 
				+                (cp >= 0xF900 and cp <= 0xFAFF) or  #
			
 
				+                (cp >= 0x2F800 and cp <= 0x2FA1F)):  #
			
 
				+            return True
			
 
				+
			
 
				+        return False
			
 
				+
			
 
				+    def _clean_text(self, text):
			
 
				+        """Performs invalid character removal and whitespace cleanup on text."""
			
 
				+        output = []
			
 
				+        for char in text:
			
 
				+            cp = ord(char)
			
 
				+            if cp == 0 or cp == 0xfffd or _is_control(char):
			
 
				+                continue
			
 
				+            if _is_whitespace(char):
			
 
				+                output.append(" ")
			
 
				+            else:
			
 
				+                output.append(char)
			
 
				+        return "".join(output)
			
 
				 
			
 
				 
			
 
				 class WordpieceTokenizer(object):
			
 
				-  """Runs WordPiece tokenziation."""
			
 
				+    """Runs WordPiece tokenization."""
			
 
				 
			
 
				-  def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=200):
			
 
				-    self.vocab = vocab
			
 
				-    self.unk_token = unk_token
			
 
				-    self.max_input_chars_per_word = max_input_chars_per_word
			
 
				+    def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=100):
			
 
				+        self.vocab = vocab
			
 
				+        self.unk_token = unk_token
			
 
				+        self.max_input_chars_per_word = max_input_chars_per_word
			
 
				 
			
 
				-  def tokenize(self, text):
			
 
				-    """Tokenizes a piece of text into its word pieces.
			
 
				+    def tokenize(self, text):
			
 
				+        """Tokenizes a piece of text into its word pieces.
			
 
				 
			
 
				     This uses a greedy longest-match-first algorithm to perform tokenization
			
 
				     using the given vocabulary.
			
@@ -323,77 +375,77 @@ class WordpieceTokenizer(object):
 
				       A list of wordpiece tokens.
			
 
				     """
			
 
				 
			
 
				-    text = convert_to_unicode(text)
			
 
				-
			
 
				-    output_tokens = []
			
 
				-    for token in whitespace_tokenize(text):
			
 
				-      chars = list(token)
			
 
				-      if len(chars) > self.max_input_chars_per_word:
			
 
				-        output_tokens.append(self.unk_token)
			
 
				-        continue
			
 
				-
			
 
				-      is_bad = False
			
 
				-      start = 0
			
 
				-      sub_tokens = []
			
 
				-      while start < len(chars):
			
 
				-        end = len(chars)
			
 
				-        cur_substr = None
			
 
				-        while start < end:
			
 
				-          substr = "".join(chars[start:end])
			
 
				-          if start > 0:
			
 
				-            substr = "##" + substr
			
 
				-          if substr in self.vocab:
			
 
				-            cur_substr = substr
			
 
				-            break
			
 
				-          end -= 1
			
 
				-        if cur_substr is None:
			
 
				-          is_bad = True
			
 
				-          break
			
 
				-        sub_tokens.append(cur_substr)
			
 
				-        start = end
			
 
				-
			
 
				-      if is_bad:
			
 
				-        output_tokens.append(self.unk_token)
			
 
				-      else:
			
 
				-        output_tokens.extend(sub_tokens)
			
 
				-    return output_tokens
			
 
				+        text = convert_to_unicode(text)
			
 
				+
			
 
				+        output_tokens = []
			
 
				+        for token in whitespace_tokenize(text):
			
 
				+            chars = list(token)
			
 
				+            if len(chars) > self.max_input_chars_per_word:
			
 
				+                output_tokens.append(self.unk_token)
			
 
				+                continue
			
 
				+
			
 
				+            is_bad = False
			
 
				+            start = 0
			
 
				+            sub_tokens = []
			
 
				+            while start < len(chars):
			
 
				+                end = len(chars)
			
 
				+                cur_substr = None
			
 
				+                while start < end:
			
 
				+                    substr = "".join(chars[start:end])
			
 
				+                    if start > 0:
			
 
				+                        substr = "##" + substr
			
 
				+                    if substr in self.vocab:
			
 
				+                        cur_substr = substr
			
 
				+                        break
			
 
				+                    end -= 1
			
 
				+                if cur_substr is None:
			
 
				+                    is_bad = True
			
 
				+                    break
			
 
				+                sub_tokens.append(cur_substr)
			
 
				+                start = end
			
 
				+
			
 
				+            if is_bad:
			
 
				+                output_tokens.append(self.unk_token)
			
 
				+            else:
			
 
				+                output_tokens.extend(sub_tokens)
			
 
				+        return output_tokens
			
 
				 
			
 
				 
			
 
				 def _is_whitespace(char):
			
 
				-  """Checks whether `chars` is a whitespace character."""
			
 
				-  # \t, \n, and \r are technically contorl characters but we treat them
			
 
				-  # as whitespace since they are generally considered as such.
			
 
				-  if char == " " or char == "\t" or char == "\n" or char == "\r":
			
 
				-    return True
			
 
				-  cat = unicodedata.category(char)
			
 
				-  if cat == "Zs":
			
 
				-    return True
			
 
				-  return False
			
 
				+    """Checks whether `chars` is a whitespace character."""
			
 
				+    # \t, \n, and \r are technically contorl characters but we treat them
			
 
				+    # as whitespace since they are generally considered as such.
			
 
				+    if char == " " or char == "\t" or char == "\n" or char == "\r":
			
 
				+        return True
			
 
				+    cat = unicodedata.category(char)
			
 
				+    if cat == "Zs":
			
 
				+        return True
			
 
				+    return False
			
 
				 
			
 
				 
			
 
				 def _is_control(char):
			
 
				-  """Checks whether `chars` is a control character."""
			
 
				-  # These are technically control characters but we count them as whitespace
			
 
				-  # characters.
			
 
				-  if char == "\t" or char == "\n" or char == "\r":
			
 
				+    """Checks whether `chars` is a control character."""
			
 
				+    # These are technically control characters but we count them as whitespace
			
 
				+    # characters.
			
 
				+    if char == "\t" or char == "\n" or char == "\r":
			
 
				+        return False
			
 
				+    cat = unicodedata.category(char)
			
 
				+    if cat.startswith("C"):
			
 
				+        return True
			
 
				     return False
			
 
				-  cat = unicodedata.category(char)
			
 
				-  if cat in ("Cc", "Cf"):
			
 
				-    return True
			
 
				-  return False
			
 
				 
			
 
				 
			
 
				 def _is_punctuation(char):
			
 
				-  """Checks whether `chars` is a punctuation character."""
			
 
				-  cp = ord(char)
			
 
				-  # We treat all non-letter/number ASCII as punctuation.
			
 
				-  # Characters such as "^", "$", and "`" are not in the Unicode
			
 
				-  # Punctuation class but we treat them as punctuation anyways, for
			
 
				-  # consistency.
			
 
				-  if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
			
 
				-      (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
			
 
				-    return True
			
 
				-  cat = unicodedata.category(char)
			
 
				-  if cat.startswith("P"):
			
 
				-    return True
			
 
				-  return False
			
 
				+    """Checks whether `chars` is a punctuation character."""
			
 
				+    cp = ord(char)
			
 
				+    # We treat all non-letter/number ASCII as punctuation.
			
 
				+    # Characters such as "^", "$", and "`" are not in the Unicode
			
 
				+    # Punctuation class but we treat them as punctuation anyways, for
			
 
				+    # consistency.
			
 
				+    if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
			
 
				+            (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
			
 
				+        return True
			
 
				+    cat = unicodedata.category(char)
			
 
				+    if cat.startswith("P"):
			
 
				+        return True
			
 
				+    return False
			
--- a/TensorFlow/LanguageModeling/BERT/utils/create_glue_data.py
+++ b/TensorFlow/LanguageModeling/BERT/utils/create_glue_data.py
@@ -1,3 +1,16 @@
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				 from __future__ import absolute_import
			
 
				 from __future__ import division
			
 
				 from __future__ import print_function
			
--- a/TensorFlow/LanguageModeling/BERT/utils/create_pretraining_data.py
+++ b/TensorFlow/LanguageModeling/BERT/utils/create_pretraining_data.py
@@ -1,4 +1,5 @@
 
				 # coding=utf-8
			
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				 # Copyright 2018 The Google AI Language Team Authors.
			
 
				 #
			
 
				 # Licensed under the Apache License, Version 2.0 (the "License");
			
@@ -12,54 +13,26 @@
 
				 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				+
			
 
				 """Create masked LM/next sentence masked_lm TF examples for BERT."""
			
 
				 
			
 
				-from __future__ import absolute_import
			
 
				-from __future__ import division
			
 
				-from __future__ import print_function
			
 
				+from __future__ import absolute_import, division, print_function, unicode_literals
			
 
				 
			
 
				-import collections
			
 
				+import argparse
			
 
				+import logging
			
 
				+import os
			
 
				 import random
			
 
				-import tokenization
			
 
				+from io import open
			
 
				+import h5py
			
 
				 import tensorflow as tf
			
 
				+import numpy as np
			
 
				+from tqdm import tqdm, trange
			
 
				 
			
 
				-flags = tf.flags
			
 
				-
			
 
				-FLAGS = flags.FLAGS
			
 
				-
			
 
				-flags.DEFINE_string("input_file", None,
			
 
				-                    "Input raw text file (or comma-separated list of files).")
			
 
				-
			
 
				-flags.DEFINE_string(
			
 
				-    "output_file", None,
			
 
				-    "Output TF example file (or comma-separated list of files).")
			
 
				-
			
 
				-flags.DEFINE_string("vocab_file", None,
			
 
				-                    "The vocabulary file that the BERT model was trained on.")
			
 
				-
			
 
				-flags.DEFINE_bool(
			
 
				-    "do_lower_case", True,
			
 
				-    "Whether to lower case the input text. Should be True for uncased "
			
 
				-    "models and False for cased models.")
			
 
				-
			
 
				-flags.DEFINE_integer("max_seq_length", 128, "Maximum sequence length.")
			
 
				-
			
 
				-flags.DEFINE_integer("max_predictions_per_seq", 20,
			
 
				-                     "Maximum number of masked LM predictions per sequence.")
			
 
				-
			
 
				-flags.DEFINE_integer("random_seed", 12345, "Random seed for data generation.")
			
 
				-
			
 
				-flags.DEFINE_integer(
			
 
				-    "dupe_factor", 10,
			
 
				-    "Number of times to duplicate the input data (with different masks).")
			
 
				-
			
 
				-flags.DEFINE_float("masked_lm_prob", 0.15, "Masked LM probability.")
			
 
				-
			
 
				-flags.DEFINE_float(
			
 
				-    "short_seq_prob", 0.1,
			
 
				-    "Probability of creating sequences which are shorter than the "
			
 
				-    "maximum length.")
			
 
				+from tokenization import BertTokenizer
			
 
				+import tokenization as tokenization
			
 
				 
			
 
				+import random
			
 
				+import collections
			
 
				 
			
 
				 class TrainingInstance(object):
			
 
				   """A single training instance (sentence pair)."""
			
@@ -90,7 +63,7 @@ class TrainingInstance(object):
 
				 
			
 
				 
			
 
				 def write_instance_to_example_files(instances, tokenizer, max_seq_length,
			
 
				-                                    max_predictions_per_seq, output_files):
			
 
				+                                    max_predictions_per_seq, output_files, output_formats="tfrecord"):
			
 
				   """Create TF example files from `TrainingInstance`s."""
			
 
				   writers = []
			
 
				   for output_file in output_files:
			
@@ -99,6 +72,16 @@ def write_instance_to_example_files(instances, tokenizer, max_seq_length,
 
				   writer_index = 0
			
 
				 
			
 
				   total_written = 0
			
 
				+  if 'hdf5' in output_formats:
			
 
				+    features_hdf5 = collections.OrderedDict()
			
 
				+    num_instances = len(instances)
			
 
				+    features_hdf5["input_ids"] = np.zeros([num_instances, max_seq_length], dtype="int32")
			
 
				+    features_hdf5["input_mask"] = np.zeros([num_instances, max_seq_length], dtype="int32")
			
 
				+    features_hdf5["segment_ids"] = np.zeros([num_instances, max_seq_length], dtype="int32")
			
 
				+    features_hdf5["masked_lm_positions"] =  np.zeros([num_instances, max_predictions_per_seq], dtype="int32")
			
 
				+    features_hdf5["masked_lm_ids"] = np.zeros([num_instances, max_predictions_per_seq], dtype="int32")
			
 
				+    features_hdf5["next_sentence_labels"] = np.zeros(num_instances, dtype="int32")
			
 
				+
			
 
				   for (inst_index, instance) in enumerate(instances):
			
 
				     input_ids = tokenizer.convert_tokens_to_ids(instance.tokens)
			
 
				     input_mask = [1] * len(input_ids)
			
@@ -134,9 +117,19 @@ def write_instance_to_example_files(instances, tokenizer, max_seq_length,
 
				     features["masked_lm_weights"] = create_float_feature(masked_lm_weights)
			
 
				     features["next_sentence_labels"] = create_int_feature([next_sentence_label])
			
 
				 
			
 
				-    tf_example = tf.train.Example(features=tf.train.Features(feature=features))
			
 
				+    if 'tfrecord' in output_formats:
			
 
				+      tf_example = tf.train.Example(features=tf.train.Features(feature=features))
			
 
				+      writers[writer_index].write(tf_example.SerializeToString())
			
 
				+    if 'hdf5' in output_formats:
			
 
				+      features_hdf5["input_ids"][inst_index] = input_ids
			
 
				+      features_hdf5["input_mask"][inst_index] = input_mask
			
 
				+      features_hdf5["segment_ids"][inst_index] = segment_ids
			
 
				+      features_hdf5["masked_lm_positions"][inst_index] = masked_lm_positions
			
 
				+      features_hdf5["masked_lm_ids"][inst_index] = masked_lm_ids
			
 
				+      features_hdf5["next_sentence_labels"][inst_index] = next_sentence_label
			
 
				+    if 'tfrecord' not in output_formats and 'hdf5' not in output_formats:
			
 
				+      assert False, 'Either empty output_formats list or unsupported type specified. Try: tfrecord or hdf5'
			
 
				 
			
 
				-    writers[writer_index].write(tf_example.SerializeToString())
			
 
				     writer_index = (writer_index + 1) % len(writers)
			
 
				 
			
 
				     total_written += 1
			
@@ -159,6 +152,17 @@ def write_instance_to_example_files(instances, tokenizer, max_seq_length,
 
				   for writer in writers:
			
 
				     writer.close()
			
 
				 
			
 
				+  if 'hdf5' in output_formats:
			
 
				+    f = h5py.File(output_file, 'w')
			
 
				+    f.create_dataset("input_ids", data=features_hdf5["input_ids"], dtype='i4', compression='gzip')
			
 
				+    f.create_dataset("input_mask", data=features_hdf5["input_mask"], dtype='i1', compression='gzip')
			
 
				+    f.create_dataset("segment_ids", data=features_hdf5["segment_ids"], dtype='i1', compression='gzip')
			
 
				+    f.create_dataset("masked_lm_positions", data=features_hdf5["masked_lm_positions"], dtype='i4', compression='gzip')
			
 
				+    f.create_dataset("masked_lm_ids", data=features_hdf5["masked_lm_ids"], dtype='i4', compression='gzip')
			
 
				+    f.create_dataset("next_sentence_labels", data=features_hdf5["next_sentence_labels"], dtype='i1', compression='gzip')
			
 
				+    f.flush()
			
 
				+    f.close()
			
 
				+
			
 
				   tf.logging.info("Wrote %d total instances", total_written)
			
 
				 
			
 
				 
			
@@ -175,160 +179,161 @@ def create_float_feature(values):
 
				 def create_training_instances(input_files, tokenizer, max_seq_length,
			
 
				                               dupe_factor, short_seq_prob, masked_lm_prob,
			
 
				                               max_predictions_per_seq, rng):
			
 
				-  """Create `TrainingInstance`s from raw text."""
			
 
				-  all_documents = [[]]
			
 
				-
			
 
				-  # Input file format:
			
 
				-  # (1) One sentence per line. These should ideally be actual sentences, not
			
 
				-  # entire paragraphs or arbitrary spans of text. (Because we use the
			
 
				-  # sentence boundaries for the "next sentence prediction" task).
			
 
				-  # (2) Blank lines between documents. Document boundaries are needed so
			
 
				-  # that the "next sentence prediction" task doesn't span between documents.
			
 
				-  for input_file in input_files:
			
 
				-    with tf.gfile.GFile(input_file, "r") as reader:
			
 
				-      while True:
			
 
				-        line = tokenization.convert_to_unicode(reader.readline())
			
 
				-        if not line:
			
 
				-          break
			
 
				-        line = line.strip()
			
 
				-
			
 
				-        # Empty lines are used as document delimiters
			
 
				-        if not line:
			
 
				-          all_documents.append([])
			
 
				-        tokens = tokenizer.tokenize(line)
			
 
				-        if tokens:
			
 
				-          all_documents[-1].append(tokens)
			
 
				-
			
 
				-  # Remove empty documents
			
 
				-  all_documents = [x for x in all_documents if x]
			
 
				-  rng.shuffle(all_documents)
			
 
				-
			
 
				-  vocab_words = list(tokenizer.vocab.keys())
			
 
				-  instances = []
			
 
				-  for _ in range(dupe_factor):
			
 
				-    for document_index in range(len(all_documents)):
			
 
				-      instances.extend(
			
 
				-          create_instances_from_document(
			
 
				-              all_documents, document_index, max_seq_length, short_seq_prob,
			
 
				-              masked_lm_prob, max_predictions_per_seq, vocab_words, rng))
			
 
				-
			
 
				-  rng.shuffle(instances)
			
 
				-  return instances
			
 
				+    """Create `TrainingInstance`s from raw text."""
			
 
				+    all_documents = [[]]
			
 
				+
			
 
				+    # Input file format:
			
 
				+    # (1) One sentence per line. These should ideally be actual sentences, not
			
 
				+    # entire paragraphs or arbitrary spans of text. (Because we use the
			
 
				+    # sentence boundaries for the "next sentence prediction" task).
			
 
				+    # (2) Blank lines between documents. Document boundaries are needed so
			
 
				+    # that the "next sentence prediction" task doesn't span between documents.
			
 
				+    for input_file in input_files:
			
 
				+        print("creating instance from {}".format(input_file))
			
 
				+        with open(input_file, "r") as reader:
			
 
				+            while True:
			
 
				+                line = tokenization.convert_to_unicode(reader.readline())
			
 
				+                if not line:
			
 
				+                    break
			
 
				+                line = line.strip()
			
 
				+
			
 
				+                # Empty lines are used as document delimiters
			
 
				+                if not line:
			
 
				+                    all_documents.append([])
			
 
				+                tokens = tokenizer.tokenize(line)
			
 
				+                if tokens:
			
 
				+                    all_documents[-1].append(tokens)
			
 
				+
			
 
				+    # Remove empty documents
			
 
				+    all_documents = [x for x in all_documents if x]
			
 
				+    rng.shuffle(all_documents)
			
 
				+
			
 
				+    vocab_words = list(tokenizer.vocab.keys())
			
 
				+    instances = []
			
 
				+    for _ in range(dupe_factor):
			
 
				+        for document_index in range(len(all_documents)):
			
 
				+            instances.extend(
			
 
				+                create_instances_from_document(
			
 
				+                    all_documents, document_index, max_seq_length, short_seq_prob,
			
 
				+                    masked_lm_prob, max_predictions_per_seq, vocab_words, rng))
			
 
				+
			
 
				+    rng.shuffle(instances)
			
 
				+    return instances
			
 
				 
			
 
				 
			
 
				 def create_instances_from_document(
			
 
				-    all_documents, document_index, max_seq_length, short_seq_prob,
			
 
				-    masked_lm_prob, max_predictions_per_seq, vocab_words, rng):
			
 
				-  """Creates `TrainingInstance`s for a single document."""
			
 
				-  document = all_documents[document_index]
			
 
				-
			
 
				-  # Account for [CLS], [SEP], [SEP]
			
 
				-  max_num_tokens = max_seq_length - 3
			
 
				-
			
 
				-  # We *usually* want to fill up the entire sequence since we are padding
			
 
				-  # to `max_seq_length` anyways, so short sequences are generally wasted
			
 
				-  # computation. However, we *sometimes*
			
 
				-  # (i.e., short_seq_prob == 0.1 == 10% of the time) want to use shorter
			
 
				-  # sequences to minimize the mismatch between pre-training and fine-tuning.
			
 
				-  # The `target_seq_length` is just a rough target however, whereas
			
 
				-  # `max_seq_length` is a hard limit.
			
 
				-  target_seq_length = max_num_tokens
			
 
				-  if rng.random() < short_seq_prob:
			
 
				-    target_seq_length = rng.randint(2, max_num_tokens)
			
 
				-
			
 
				-  # We DON'T just concatenate all of the tokens from a document into a long
			
 
				-  # sequence and choose an arbitrary split point because this would make the
			
 
				-  # next sentence prediction task too easy. Instead, we split the input into
			
 
				-  # segments "A" and "B" based on the actual "sentences" provided by the user
			
 
				-  # input.
			
 
				-  instances = []
			
 
				-  current_chunk = []
			
 
				-  current_length = 0
			
 
				-  i = 0
			
 
				-  while i < len(document):
			
 
				-    segment = document[i]
			
 
				-    current_chunk.append(segment)
			
 
				-    current_length += len(segment)
			
 
				-    if i == len(document) - 1 or current_length >= target_seq_length:
			
 
				-      if current_chunk:
			
 
				-        # `a_end` is how many segments from `current_chunk` go into the `A`
			
 
				-        # (first) sentence.
			
 
				-        a_end = 1
			
 
				-        if len(current_chunk) >= 2:
			
 
				-          a_end = rng.randint(1, len(current_chunk) - 1)
			
 
				-
			
 
				-        tokens_a = []
			
 
				-        for j in range(a_end):
			
 
				-          tokens_a.extend(current_chunk[j])
			
 
				-
			
 
				-        tokens_b = []
			
 
				-        # Random next
			
 
				-        is_random_next = False
			
 
				-        if len(current_chunk) == 1 or rng.random() < 0.5:
			
 
				-          is_random_next = True
			
 
				-          target_b_length = target_seq_length - len(tokens_a)
			
 
				-
			
 
				-          # This should rarely go for more than one iteration for large
			
 
				-          # corpora. However, just to be careful, we try to make sure that
			
 
				-          # the random document is not the same as the document
			
 
				-          # we're processing.
			
 
				-          for _ in range(10):
			
 
				-            random_document_index = rng.randint(0, len(all_documents) - 1)
			
 
				-            if random_document_index != document_index:
			
 
				-              break
			
 
				-
			
 
				-          random_document = all_documents[random_document_index]
			
 
				-          random_start = rng.randint(0, len(random_document) - 1)
			
 
				-          for j in range(random_start, len(random_document)):
			
 
				-            tokens_b.extend(random_document[j])
			
 
				-            if len(tokens_b) >= target_b_length:
			
 
				-              break
			
 
				-          # We didn't actually use these segments so we "put them back" so
			
 
				-          # they don't go to waste.
			
 
				-          num_unused_segments = len(current_chunk) - a_end
			
 
				-          i -= num_unused_segments
			
 
				-        # Actual next
			
 
				-        else:
			
 
				-          is_random_next = False
			
 
				-          for j in range(a_end, len(current_chunk)):
			
 
				-            tokens_b.extend(current_chunk[j])
			
 
				-        truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng)
			
 
				-
			
 
				-        assert len(tokens_a) >= 1
			
 
				-        assert len(tokens_b) >= 1
			
 
				-
			
 
				-        tokens = []
			
 
				-        segment_ids = []
			
 
				-        tokens.append("[CLS]")
			
 
				-        segment_ids.append(0)
			
 
				-        for token in tokens_a:
			
 
				-          tokens.append(token)
			
 
				-          segment_ids.append(0)
			
 
				-
			
 
				-        tokens.append("[SEP]")
			
 
				-        segment_ids.append(0)
			
 
				-
			
 
				-        for token in tokens_b:
			
 
				-          tokens.append(token)
			
 
				-          segment_ids.append(1)
			
 
				-        tokens.append("[SEP]")
			
 
				-        segment_ids.append(1)
			
 
				-
			
 
				-        (tokens, masked_lm_positions,
			
 
				-         masked_lm_labels) = create_masked_lm_predictions(
			
 
				-             tokens, masked_lm_prob, max_predictions_per_seq, vocab_words, rng)
			
 
				-        instance = TrainingInstance(
			
 
				-            tokens=tokens,
			
 
				-            segment_ids=segment_ids,
			
 
				-            is_random_next=is_random_next,
			
 
				-            masked_lm_positions=masked_lm_positions,
			
 
				-            masked_lm_labels=masked_lm_labels)
			
 
				-        instances.append(instance)
			
 
				-      current_chunk = []
			
 
				-      current_length = 0
			
 
				-    i += 1
			
 
				-
			
 
				-  return instances
			
 
				+        all_documents, document_index, max_seq_length, short_seq_prob,
			
 
				+        masked_lm_prob, max_predictions_per_seq, vocab_words, rng):
			
 
				+    """Creates `TrainingInstance`s for a single document."""
			
 
				+    document = all_documents[document_index]
			
 
				+
			
 
				+    # Account for [CLS], [SEP], [SEP]
			
 
				+    max_num_tokens = max_seq_length - 3
			
 
				+
			
 
				+    # We *usually* want to fill up the entire sequence since we are padding
			
 
				+    # to `max_seq_length` anyways, so short sequences are generally wasted
			
 
				+    # computation. However, we *sometimes*
			
 
				+    # (i.e., short_seq_prob == 0.1 == 10% of the time) want to use shorter
			
 
				+    # sequences to minimize the mismatch between pre-training and fine-tuning.
			
 
				+    # The `target_seq_length` is just a rough target however, whereas
			
 
				+    # `max_seq_length` is a hard limit.
			
 
				+    target_seq_length = max_num_tokens
			
 
				+    if rng.random() < short_seq_prob:
			
 
				+        target_seq_length = rng.randint(2, max_num_tokens)
			
 
				+
			
 
				+    # We DON'T just concatenate all of the tokens from a document into a long
			
 
				+    # sequence and choose an arbitrary split point because this would make the
			
 
				+    # next sentence prediction task too easy. Instead, we split the input into
			
 
				+    # segments "A" and "B" based on the actual "sentences" provided by the user
			
 
				+    # input.
			
 
				+    instances = []
			
 
				+    current_chunk = []
			
 
				+    current_length = 0
			
 
				+    i = 0
			
 
				+    while i < len(document):
			
 
				+        segment = document[i]
			
 
				+        current_chunk.append(segment)
			
 
				+        current_length += len(segment)
			
 
				+        if i == len(document) - 1 or current_length >= target_seq_length:
			
 
				+            if current_chunk:
			
 
				+                # `a_end` is how many segments from `current_chunk` go into the `A`
			
 
				+                # (first) sentence.
			
 
				+                a_end = 1
			
 
				+                if len(current_chunk) >= 2:
			
 
				+                    a_end = rng.randint(1, len(current_chunk) - 1)
			
 
				+
			
 
				+                tokens_a = []
			
 
				+                for j in range(a_end):
			
 
				+                    tokens_a.extend(current_chunk[j])
			
 
				+
			
 
				+                tokens_b = []
			
 
				+                # Random next
			
 
				+                is_random_next = False
			
 
				+                if len(current_chunk) == 1 or rng.random() < 0.5:
			
 
				+                    is_random_next = True
			
 
				+                    target_b_length = target_seq_length - len(tokens_a)
			
 
				+
			
 
				+                    # This should rarely go for more than one iteration for large
			
 
				+                    # corpora. However, just to be careful, we try to make sure that
			
 
				+                    # the random document is not the same as the document
			
 
				+                    # we're processing.
			
 
				+                    for _ in range(10):
			
 
				+                        random_document_index = rng.randint(0, len(all_documents) - 1)
			
 
				+                        if random_document_index != document_index:
			
 
				+                            break
			
 
				+
			
 
				+                    random_document = all_documents[random_document_index]
			
 
				+                    random_start = rng.randint(0, len(random_document) - 1)
			
 
				+                    for j in range(random_start, len(random_document)):
			
 
				+                        tokens_b.extend(random_document[j])
			
 
				+                        if len(tokens_b) >= target_b_length:
			
 
				+                            break
			
 
				+                    # We didn't actually use these segments so we "put them back" so
			
 
				+                    # they don't go to waste.
			
 
				+                    num_unused_segments = len(current_chunk) - a_end
			
 
				+                    i -= num_unused_segments
			
 
				+                # Actual next
			
 
				+                else:
			
 
				+                    is_random_next = False
			
 
				+                    for j in range(a_end, len(current_chunk)):
			
 
				+                        tokens_b.extend(current_chunk[j])
			
 
				+                truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng)
			
 
				+
			
 
				+                assert len(tokens_a) >= 1
			
 
				+                assert len(tokens_b) >= 1
			
 
				+
			
 
				+                tokens = []
			
 
				+                segment_ids = []
			
 
				+                tokens.append("[CLS]")
			
 
				+                segment_ids.append(0)
			
 
				+                for token in tokens_a:
			
 
				+                    tokens.append(token)
			
 
				+                    segment_ids.append(0)
			
 
				+
			
 
				+                tokens.append("[SEP]")
			
 
				+                segment_ids.append(0)
			
 
				+
			
 
				+                for token in tokens_b:
			
 
				+                    tokens.append(token)
			
 
				+                    segment_ids.append(1)
			
 
				+                tokens.append("[SEP]")
			
 
				+                segment_ids.append(1)
			
 
				+
			
 
				+                (tokens, masked_lm_positions,
			
 
				+                 masked_lm_labels) = create_masked_lm_predictions(
			
 
				+                    tokens, masked_lm_prob, max_predictions_per_seq, vocab_words, rng)
			
 
				+                instance = TrainingInstance(
			
 
				+                    tokens=tokens,
			
 
				+                    segment_ids=segment_ids,
			
 
				+                    is_random_next=is_random_next,
			
 
				+                    masked_lm_positions=masked_lm_positions,
			
 
				+                    masked_lm_labels=masked_lm_labels)
			
 
				+                instances.append(instance)
			
 
				+            current_chunk = []
			
 
				+            current_length = 0
			
 
				+        i += 1
			
 
				+
			
 
				+    return instances
			
 
				 
			
 
				 
			
 
				 MaskedLmInstance = collections.namedtuple("MaskedLmInstance",
			
@@ -337,106 +342,160 @@ MaskedLmInstance = collections.namedtuple("MaskedLmInstance",
 
				 
			
 
				 def create_masked_lm_predictions(tokens, masked_lm_prob,
			
 
				                                  max_predictions_per_seq, vocab_words, rng):
			
 
				-  """Creates the predictions for the masked LM objective."""
			
 
				-
			
 
				-  cand_indexes = []
			
 
				-  for (i, token) in enumerate(tokens):
			
 
				-    if token == "[CLS]" or token == "[SEP]":
			
 
				-      continue
			
 
				-    cand_indexes.append(i)
			
 
				-
			
 
				-  rng.shuffle(cand_indexes)
			
 
				-
			
 
				-  output_tokens = list(tokens)
			
 
				-
			
 
				-  num_to_predict = min(max_predictions_per_seq,
			
 
				-                       max(1, int(round(len(tokens) * masked_lm_prob))))
			
 
				-
			
 
				-  masked_lms = []
			
 
				-  covered_indexes = set()
			
 
				-  for index in cand_indexes:
			
 
				-    if len(masked_lms) >= num_to_predict:
			
 
				-      break
			
 
				-    if index in covered_indexes:
			
 
				-      continue
			
 
				-    covered_indexes.add(index)
			
 
				-
			
 
				-    masked_token = None
			
 
				-    # 80% of the time, replace with [MASK]
			
 
				-    if rng.random() < 0.8:
			
 
				-      masked_token = "[MASK]"
			
 
				-    else:
			
 
				-      # 10% of the time, keep original
			
 
				-      if rng.random() < 0.5:
			
 
				-        masked_token = tokens[index]
			
 
				-      # 10% of the time, replace with random word
			
 
				-      else:
			
 
				-        masked_token = vocab_words[rng.randint(0, len(vocab_words) - 1)]
			
 
				+    """Creates the predictions for the masked LM objective."""
			
 
				+
			
 
				+    cand_indexes = []
			
 
				+    for (i, token) in enumerate(tokens):
			
 
				+        if token == "[CLS]" or token == "[SEP]":
			
 
				+            continue
			
 
				+        cand_indexes.append(i)
			
 
				+
			
 
				+    rng.shuffle(cand_indexes)
			
 
				+
			
 
				+    output_tokens = list(tokens)
			
 
				+
			
 
				+    num_to_predict = min(max_predictions_per_seq,
			
 
				+                         max(1, int(round(len(tokens) * masked_lm_prob))))
			
 
				+
			
 
				+    masked_lms = []
			
 
				+    covered_indexes = set()
			
 
				+    for index in cand_indexes:
			
 
				+        if len(masked_lms) >= num_to_predict:
			
 
				+            break
			
 
				+        if index in covered_indexes:
			
 
				+            continue
			
 
				+        covered_indexes.add(index)
			
 
				+
			
 
				+        masked_token = None
			
 
				+        # 80% of the time, replace with [MASK]
			
 
				+        if rng.random() < 0.8:
			
 
				+            masked_token = "[MASK]"
			
 
				+        else:
			
 
				+            # 10% of the time, keep original
			
 
				+            if rng.random() < 0.5:
			
 
				+                masked_token = tokens[index]
			
 
				+            # 10% of the time, replace with random word
			
 
				+            else:
			
 
				+                masked_token = vocab_words[rng.randint(0, len(vocab_words) - 1)]
			
 
				 
			
 
				-    output_tokens[index] = masked_token
			
 
				+        output_tokens[index] = masked_token
			
 
				 
			
 
				-    masked_lms.append(MaskedLmInstance(index=index, label=tokens[index]))
			
 
				+        masked_lms.append(MaskedLmInstance(index=index, label=tokens[index]))
			
 
				 
			
 
				-  masked_lms = sorted(masked_lms, key=lambda x: x.index)
			
 
				+    masked_lms = sorted(masked_lms, key=lambda x: x.index)
			
 
				 
			
 
				-  masked_lm_positions = []
			
 
				-  masked_lm_labels = []
			
 
				-  for p in masked_lms:
			
 
				-    masked_lm_positions.append(p.index)
			
 
				-    masked_lm_labels.append(p.label)
			
 
				+    masked_lm_positions = []
			
 
				+    masked_lm_labels = []
			
 
				+    for p in masked_lms:
			
 
				+        masked_lm_positions.append(p.index)
			
 
				+        masked_lm_labels.append(p.label)
			
 
				 
			
 
				-  return (output_tokens, masked_lm_positions, masked_lm_labels)
			
 
				+    return (output_tokens, masked_lm_positions, masked_lm_labels)
			
 
				 
			
 
				 
			
 
				 def truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng):
			
 
				-  """Truncates a pair of sequences to a maximum sequence length."""
			
 
				-  while True:
			
 
				-    total_length = len(tokens_a) + len(tokens_b)
			
 
				-    if total_length <= max_num_tokens:
			
 
				-      break
			
 
				-
			
 
				-    trunc_tokens = tokens_a if len(tokens_a) > len(tokens_b) else tokens_b
			
 
				-    assert len(trunc_tokens) >= 1
			
 
				-
			
 
				-    # We want to sometimes truncate from the front and sometimes from the
			
 
				-    # back to add more randomness and avoid biases.
			
 
				-    if rng.random() < 0.5:
			
 
				-      del trunc_tokens[0]
			
 
				+    """Truncates a pair of sequences to a maximum sequence length."""
			
 
				+    while True:
			
 
				+        total_length = len(tokens_a) + len(tokens_b)
			
 
				+        if total_length <= max_num_tokens:
			
 
				+            break
			
 
				+
			
 
				+        trunc_tokens = tokens_a if len(tokens_a) > len(tokens_b) else tokens_b
			
 
				+        assert len(trunc_tokens) >= 1
			
 
				+
			
 
				+        # We want to sometimes truncate from the front and sometimes from the
			
 
				+        # back to add more randomness and avoid biases.
			
 
				+        if rng.random() < 0.5:
			
 
				+            del trunc_tokens[0]
			
 
				+        else:
			
 
				+            trunc_tokens.pop()
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    parser = argparse.ArgumentParser()
			
 
				+    ## Required parameters
			
 
				+    parser.add_argument("--vocab_file",
			
 
				+                        default=None,
			
 
				+                        type=str,
			
 
				+                        required=True,
			
 
				+                        help="The vocabulary the BERT model will train on.")
			
 
				+    parser.add_argument("--input_file",
			
 
				+                        default=None,
			
 
				+                        type=str,
			
 
				+                        required=True,
			
 
				+                        help="The input train corpus. can be directory with .txt files or a path to a single file")
			
 
				+    parser.add_argument("--output_file",
			
 
				+                        default=None,
			
 
				+                        type=str,
			
 
				+                        required=True,
			
 
				+                        help="The output file where the model checkpoints will be written.")
			
 
				+
			
 
				+    ## Other parameters
			
 
				+    # int
			
 
				+    parser.add_argument("--max_seq_length",
			
 
				+                        default=128,
			
 
				+                        type=int,
			
 
				+                        help="The maximum total input sequence length after WordPiece tokenization. \n"
			
 
				+                             "Sequences longer than this will be truncated, and sequences shorter \n"
			
 
				+                             "than this will be padded.")
			
 
				+    parser.add_argument("--dupe_factor",
			
 
				+                        default=10,
			
 
				+                        type=int,
			
 
				+                        help="Number of times to duplicate the input data (with different masks).")
			
 
				+    parser.add_argument("--max_predictions_per_seq",
			
 
				+                        default=20,
			
 
				+                        type=int,
			
 
				+                        help="Maximum sequence length.")
			
 
				+
			
 
				+    # floats
			
 
				+
			
 
				+    parser.add_argument("--masked_lm_prob",
			
 
				+                        default=0.15,
			
 
				+                        type=float,
			
 
				+                        help="Masked LM probability.")
			
 
				+
			
 
				+    parser.add_argument("--short_seq_prob",
			
 
				+                        default=0.1,
			
 
				+                        type=float,
			
 
				+                        help="Probability to create a sequence shorter than maximum sequence length")
			
 
				+
			
 
				+    parser.add_argument("--do_lower_case",
			
 
				+                        action='store_true',
			
 
				+                        default=True,
			
 
				+                        help="Whether to lower case the input text. True for uncased models, False for cased models.")
			
 
				+    parser.add_argument('--random_seed',
			
 
				+                        type=int,
			
 
				+                        default=12345,
			
 
				+                        help="random seed for initialization")
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    tokenizer = BertTokenizer(args.vocab_file, do_lower_case=args.do_lower_case)
			
 
				+
			
 
				+    input_files = []
			
 
				+    if os.path.isfile(args.input_file):
			
 
				+        input_files.append(args.input_file)
			
 
				+    elif os.path.isdir(args.input_file):
			
 
				+        input_files = [os.path.join(args.input_file, f) for f in os.listdir(args.input_file) if
			
 
				+                       (os.path.isfile(os.path.join(args.input_file, f)) and f.endswith('.txt'))]
			
 
				     else:
			
 
				-      trunc_tokens.pop()
			
 
				-
			
 
				-
			
 
				-def main(_):
			
 
				-  tf.logging.set_verbosity(tf.logging.INFO)
			
 
				+        raise ValueError("{} is not a valid path".format(args.input_file))
			
 
				 
			
 
				-  tokenizer = tokenization.FullTokenizer(
			
 
				-      vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)
			
 
				+    rng = random.Random(args.random_seed)
			
 
				+    instances = create_training_instances(
			
 
				+        input_files, tokenizer, args.max_seq_length, args.dupe_factor,
			
 
				+        args.short_seq_prob, args.masked_lm_prob, args.max_predictions_per_seq,
			
 
				+        rng)
			
 
				 
			
 
				-  input_files = []
			
 
				-  for input_pattern in FLAGS.input_file.split(","):
			
 
				-    input_files.extend(tf.gfile.Glob(input_pattern))
			
 
				+    output_files = args.output_file.split(",")
			
 
				+    print("*** Writing to output files ***")
			
 
				+    for output_file in output_files:
			
 
				+        print(output_file)
			
 
				 
			
 
				-  tf.logging.info("*** Reading from input files ***")
			
 
				-  for input_file in input_files:
			
 
				-    tf.logging.info("  %s", input_file)
			
 
				-
			
 
				-  rng = random.Random(FLAGS.random_seed)
			
 
				-  instances = create_training_instances(
			
 
				-      input_files, tokenizer, FLAGS.max_seq_length, FLAGS.dupe_factor,
			
 
				-      FLAGS.short_seq_prob, FLAGS.masked_lm_prob, FLAGS.max_predictions_per_seq,
			
 
				-      rng)
			
 
				-
			
 
				-  output_files = FLAGS.output_file.split(",")
			
 
				-  tf.logging.info("*** Writing to output files ***")
			
 
				-  for output_file in output_files:
			
 
				-    tf.logging.info("  %s", output_file)
			
 
				 
			
 
				-  write_instance_to_example_files(instances, tokenizer, FLAGS.max_seq_length,
			
 
				-                                  FLAGS.max_predictions_per_seq, output_files)
			
 
				+    write_instance_to_example_files(instances, tokenizer, args.max_seq_length,
			
 
				+                                       args.max_predictions_per_seq, output_files)
			
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				-  flags.mark_flag_as_required("input_file")
			
 
				-  flags.mark_flag_as_required("output_file")
			
 
				-  flags.mark_flag_as_required("vocab_file")
			
 
				-  tf.app.run()
			
 
				+    main()
			
--- a/TensorFlow/LanguageModeling/BERT/utils/create_squad_data.py
+++ b/TensorFlow/LanguageModeling/BERT/utils/create_squad_data.py
@@ -1,3 +1,16 @@
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				 from __future__ import absolute_import
			
 
				 from __future__ import division
			
 
				 from __future__ import print_function
			
--- a/TensorFlow/LanguageModeling/BERT/utils/utils.py
+++ b/TensorFlow/LanguageModeling/BERT/utils/utils.py
@@ -1,3 +1,16 @@
 
				+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				 import tensorflow as tf
			
 
				 import time