пре 3 година · 959c6773fe
--- a/PyTorch/SpeechRecognition/Jasper/inference.py
+++ b/PyTorch/SpeechRecognition/Jasper/inference.py
@@ -324,7 +324,7 @@ def main():
 
				                 feats, feat_lens = feat_proc(audio, audio_lens)
			
 
				 
			
 
				             sync()
			
 
				-            t1 = time.perf_counter()
			
 
				+            t1 = time.time()
			
 
				 
			
 
				             if args.amp:
			
 
				                 feats = feats.half()
			
@@ -340,7 +340,7 @@ def main():
 
				             preds = greedy_decoder(log_probs)
			
 
				 
			
 
				             sync()
			
 
				-            t2 = time.perf_counter()
			
 
				+            t2 = time.time()
			
 
				 
			
 
				             # burn-in period; wait for a new loader due to num_workers
			
 
				             if it >= 1 and (args.steps == 0 or it >= args.warmup_steps):
			
@@ -358,7 +358,7 @@ def main():
 
				                 break
			
 
				 
			
 
				             sync()
			
 
				-            t0 = time.perf_counter()
			
 
				+            t0 = time.time()
			
 
				 
			
 
				         # communicate the results
			
 
				         if args.transcribe_wav:
			
--- a/PyTorch/SpeechRecognition/Jasper/train.py
+++ b/PyTorch/SpeechRecognition/Jasper/train.py
@@ -142,6 +142,7 @@ def evaluate(epoch, step, val_loader, val_feat_proc, labels, model,
 
				             continue
			
 
				 
			
 
				         model.eval()
			
 
				+        torch.cuda.synchronize()
			
 
				         start_time = time.time()
			
 
				         agg = {'losses': [], 'preds': [], 'txts': []}
			
 
				 
			
@@ -166,6 +167,7 @@ def evaluate(epoch, step, val_loader, val_feat_proc, labels, model,
 
				             agg['txts'] += helpers.gather_transcripts([txt], [txt_lens], labels)
			
 
				 
			
 
				         wer, loss = process_evaluation_epoch(agg)
			
 
				+        torch.cuda.synchronize()
			
 
				         log(() if epoch is None else (epoch,),
			
 
				             step, subset, {'loss': loss, 'wer': 100.0 * wer,
			
 
				                            'took': time.time() - start_time})
			
@@ -379,11 +381,11 @@ def main():
 
				         if multi_gpu and not use_dali:
			
 
				             train_loader.sampler.set_epoch(epoch)
			
 
				 
			
 
				+        torch.cuda.synchronize()
			
 
				+        epoch_start_time = time.time()
			
 
				         epoch_utts = 0
			
 
				         epoch_loss = 0
			
 
				         accumulated_batches = 0
			
 
				-        epoch_start_time = time.time()
			
 
				-        epoch_eval_time = 0
			
 
				 
			
 
				         for batch in train_loader:
			
 
				 
			
@@ -461,7 +463,6 @@ def main():
 
				                 step_start_time = time.time()
			
 
				 
			
 
				                 if step % args.eval_frequency == 0:
			
 
				-                    tik = time.time()
			
 
				                     wer = evaluate(epoch, step, val_loader, val_feat_proc,
			
 
				                                    symbols, model, ema_model, ctc_loss,
			
 
				                                    greedy_decoder, args.amp, use_dali)
			
@@ -470,7 +471,6 @@ def main():
 
				                         checkpointer.save(model, ema_model, optimizer, scaler,
			
 
				                                           epoch, step, best_wer, is_best=True)
			
 
				                         best_wer = wer
			
 
				-                    epoch_eval_time += time.time() - tik
			
 
				 
			
 
				                 step += 1
			
 
				                 accumulated_batches = 0
			
@@ -481,6 +481,7 @@ def main():
 
				             if not use_dali and step > steps_per_epoch * epoch:
			
 
				                 break
			
 
				 
			
 
				+        torch.cuda.synchronize()
			
 
				         epoch_time = time.time() - epoch_start_time
			
 
				         epoch_loss /= steps_per_epoch
			
 
				         log((epoch,), None, 'train_avg', {'throughput': epoch_utts / epoch_time,
			
--- a/PyTorch/SpeechRecognition/QuartzNet/inference.py
+++ b/PyTorch/SpeechRecognition/QuartzNet/inference.py
@@ -334,7 +334,7 @@ def main():
 
				                 feats, feat_lens = feat_proc(audio, audio_lens)
			
 
				 
			
 
				             sync()
			
 
				-            t1 = time.perf_counter()
			
 
				+            t1 = time.time()
			
 
				 
			
 
				             if args.amp:
			
 
				                 feats = feats.half()
			
@@ -347,7 +347,7 @@ def main():
 
				             preds = greedy_decoder(log_probs)
			
 
				 
			
 
				             sync()
			
 
				-            t2 = time.perf_counter()
			
 
				+            t2 = time.time()
			
 
				 
			
 
				             # burn-in period; wait for a new loader due to num_workers
			
 
				             if it >= 1 and (args.steps == 0 or it >= args.warmup_steps):
			
@@ -365,7 +365,7 @@ def main():
 
				                 break
			
 
				 
			
 
				             sync()
			
 
				-            t0 = time.perf_counter()
			
 
				+            t0 = time.time()
			
 
				 
			
 
				         # communicate the results
			
 
				         if args.transcribe_wav:
			
--- a/PyTorch/SpeechRecognition/QuartzNet/train.py
+++ b/PyTorch/SpeechRecognition/QuartzNet/train.py
@@ -163,6 +163,7 @@ def evaluate(epoch, step, val_loader, val_feat_proc, labels, model,
 
				             continue
			
 
				 
			
 
				         model.eval()
			
 
				+        torch.cuda.synchronize()
			
 
				         start_time = time.time()
			
 
				         agg = {'losses': [], 'preds': [], 'txts': []}
			
 
				 
			
@@ -187,6 +188,7 @@ def evaluate(epoch, step, val_loader, val_feat_proc, labels, model,
 
				             agg['txts'] += helpers.gather_transcripts([txt], [txt_lens], labels)
			
 
				 
			
 
				         wer, loss = process_evaluation_epoch(agg)
			
 
				+        torch.cuda.synchronize()
			
 
				         log(() if epoch is None else (epoch,),
			
 
				             step, subset, {'loss': loss, 'wer': 100.0 * wer,
			
 
				                            'took': time.time() - start_time})
			
@@ -410,11 +412,11 @@ def main():
 
				         if multi_gpu and not use_dali:
			
 
				             train_loader.sampler.set_epoch(epoch)
			
 
				 
			
 
				+        torch.cuda.synchronize()
			
 
				+        epoch_start_time = time.time()
			
 
				         epoch_utts = 0
			
 
				         epoch_loss = 0
			
 
				         accumulated_batches = 0
			
 
				-        epoch_start_time = time.time()
			
 
				-        epoch_eval_time = 0
			
 
				 
			
 
				         for batch in train_loader:
			
 
				 
			
@@ -493,7 +495,6 @@ def main():
 
				                 step_start_time = time.time()
			
 
				 
			
 
				                 if step % args.eval_frequency == 0:
			
 
				-                    tik = time.time()
			
 
				                     wer = evaluate(epoch, step, val_loader, val_feat_proc,
			
 
				                                    symbols, model, ema_model, ctc_loss,
			
 
				                                    greedy_decoder, args.amp, use_dali)
			
@@ -502,7 +503,6 @@ def main():
 
				                         checkpointer.save(model, ema_model, optimizer, scaler,
			
 
				                                           epoch, step, best_wer, is_best=True)
			
 
				                         best_wer = wer
			
 
				-                    epoch_eval_time += time.time() - tik
			
 
				 
			
 
				                 step += 1
			
 
				                 accumulated_batches = 0
			
@@ -513,6 +513,7 @@ def main():
 
				             if not use_dali and step > steps_per_epoch * epoch:
			
 
				                 break
			
 
				 
			
 
				+        torch.cuda.synchronize()
			
 
				         epoch_time = time.time() - epoch_start_time
			
 
				         epoch_loss /= steps_per_epoch
			
 
				         log((epoch,), None, 'train_avg', {'throughput': epoch_utts / epoch_time,
			
--- a/PyTorch/SpeechRecognition/wav2vec2/common/metrics.py
+++ b/PyTorch/SpeechRecognition/wav2vec2/common/metrics.py
@@ -75,7 +75,8 @@ class MetricsAggregator:
 
				                  benchmark_epochs=0,
			
 
				                  reduce_mean=(),
			
 
				                  reduce_last=(),
			
 
				-                 group_tb_entries=False):
			
 
				+                 group_tb_entries=False,
			
 
				+                 cuda=True):
			
 
				         """
			
 
				         Args:
			
 
				             scopes: possible scopes of metrics accumulation
			
@@ -100,9 +101,10 @@ class MetricsAggregator:
 
				         self.benchmark_keys = benchmark_keys
			
 
				         self.scopes = scopes
			
 
				         self.group_tb_entries = group_tb_entries
			
 
				+        self.cuda = cuda
			
 
				 
			
 
				     def log_scalar(self, key, val, accum_reduction=None):
			
 
				-        """ Main primitive for logging partial metrics from single batch.
			
 
				+        """Main primitive for logging partial metrics from single batch.
			
 
				 
			
 
				         NOTE: Assumption: `log_scalar` cannot be called with different
			
 
				         `accum_reduction` for the same `key`. This results in undefined behavior
			
@@ -197,9 +199,13 @@ class MetricsAggregator:
 
				         self._start_accumulating(iter, True, 'train')
			
 
				 
			
 
				     def start_epoch(self, epoch):
			
 
				+        if self.cuda:
			
 
				+            torch.cuda.synchronize()
			
 
				         self._start_accumulating(epoch, True, 'train_avg')
			
 
				 
			
 
				     def start_val(self):
			
 
				+        if self.cuda:
			
 
				+            torch.cuda.synchronize()
			
 
				         self._start_accumulating(None, True, 'val')
			
 
				 
			
 
				     def finish_iter(self):
			
@@ -209,6 +215,8 @@ class MetricsAggregator:
 
				         self._finish_accumulating('train')
			
 
				 
			
 
				     def finish_epoch(self):
			
 
				+        if self.cuda:
			
 
				+            torch.cuda.synchronize()
			
 
				         self._accumulate_time('train_avg')
			
 
				         self._finish_accumulating('train_avg')
			
 
				 
			
@@ -220,6 +228,8 @@ class MetricsAggregator:
 
				                 metr[k].pop(0)
			
 
				 
			
 
				     def finish_val(self, scope='val'):
			
 
				+        if self.cuda:
			
 
				+            torch.cuda.synchronize()
			
 
				         self._accumulate_time(scope)
			
 
				         self._finish_accumulating(scope)
			
 
				 
			
--- a/PyTorch/SpeechRecognition/wav2vec2/inference.py
+++ b/PyTorch/SpeechRecognition/wav2vec2/inference.py
@@ -249,7 +249,7 @@ def main():
 
				         batch = utils.move_to_cuda(batch)
			
 
				 
			
 
				         sync()
			
 
				-        t1 = time.perf_counter()
			
 
				+        t1 = time.time()
			
 
				 
			
 
				         if args.fp16:
			
 
				             batch = fp_convert_batch(batch, 'fp16')
			
@@ -266,7 +266,7 @@ def main():
 
				             preds = logp.argmax(dim=-1, keepdim=False).int()
			
 
				 
			
 
				         sync()
			
 
				-        t2 = time.perf_counter()
			
 
				+        t2 = time.time()
			
 
				 
			
 
				         # burn-in period; wait for a new loader due to num_workers
			
 
				         if it >= 1 and (args.steps == 0 or it >= args.warmup_steps):
			
@@ -292,7 +292,7 @@ def main():
 
				             break
			
 
				 
			
 
				         sync()
			
 
				-        t0 = time.perf_counter()
			
 
				+        t0 = time.time()
			
 
				 
			
 
				     tdict = target_dictionary
			
 
				     agg['preds'] = [pred.replace(tdict[tdict.nspecial], ' ')
			
--- a/PyTorch/SpeechRecognition/wav2vec2/train.py
+++ b/PyTorch/SpeechRecognition/wav2vec2/train.py
@@ -150,9 +150,10 @@ def main():
 
				         Metrics = W2v2Metrics
			
 
				         criterion = Wav2vecCriterion(args)
			
 
				 
			
 
				-    metrics = Metrics(args.benchmark_epochs_num)
			
 
				-    val_metrics = Metrics(args.benchmark_epochs_num, scopes=['val'])
			
 
				-    val_ema_metrics = Metrics(args.benchmark_epochs_num, scopes=['val_ema'])
			
 
				+    kw = {'benchmark_epochs': args.benchmark_epochs_num, 'cuda': not args.cpu}
			
 
				+    metrics = Metrics(**kw)
			
 
				+    val_metrics = Metrics(scopes=['val'], **kw)
			
 
				+    val_ema_metrics = Metrics(scopes=['val_ema'], **kw)
			
 
				 
			
 
				     init_logger(args.output_dir, args.log_file, args.ema)
			
 
				     logger.log_parameters(vars(args), tb_subset='train')
			
--- a/PyTorch/SpeechRecognition/wav2vec2/wav2vec2/logging.py
+++ b/PyTorch/SpeechRecognition/wav2vec2/wav2vec2/logging.py
@@ -111,7 +111,7 @@ def init_infer_metadata():
 
				 
			
 
				 class W2v2Metrics(MetricsAggregator):
			
 
				 
			
 
				-    def __init__(self, benchmark_epochs, scopes=('train', 'train_avg')):
			
 
				+    def __init__(self, benchmark_epochs, scopes=('train', 'train_avg'), cuda=True):
			
 
				         super().__init__(
			
 
				             benchmark_epochs=benchmark_epochs,
			
 
				             benchmark_keys=('took', 'accuracy', 'loss', 'ntokens/s'),
			
@@ -120,7 +120,8 @@ class W2v2Metrics(MetricsAggregator):
 
				                            'code_perplexity',
			
 
				                            'took', 'loss_scale', 'lr', 'ntokens/s'),
			
 
				             reduce_mean=('temp', 'prob_perplexity', 'code_perplexity'),
			
 
				-            reduce_last=('lr', 'loss_scale'))
			
 
				+            reduce_last=('lr', 'loss_scale'),
			
 
				+            cuda=cuda)
			
 
				 
			
 
				     def accumulate(self, scopes=None):
			
 
				         if 'ignore' not in self.partials or self.partials['ignore'] == 0.0:
			
@@ -155,11 +156,12 @@ class W2v2FineTuningMetrics(MetricsAggregator):
 
				                            'prob_perplexity', 'took', 'ntokens/s', 'uer',
			
 
				                            'wer', 'raw_wer'),
			
 
				             reduce_mean=('temp', 'prob_perplexity', 'code_perplexity'),
			
 
				-            reduce_last=('lr',)):
			
 
				+            reduce_last=('lr',),
			
 
				+            cuda=True):
			
 
				         super().__init__(
			
 
				             benchmark_epochs=benchmark_epochs, benchmark_keys=benchmark_keys,
			
 
				             scopes=scopes, dllogger_keys=dllogger_keys,
			
 
				-            reduce_mean=reduce_mean, reduce_last=reduce_last)
			
 
				+            reduce_mean=reduce_mean, reduce_last=reduce_last, cuda=cuda)
			
 
				 
			
 
				     def accumulate(self, scopes=None):
			
 
				         if 'ignore' not in self.partials or self.partials['ignore'] == 0.0:
			
--- a/PyTorch/SpeechSynthesis/HiFiGAN/hifigan/logging.py
+++ b/PyTorch/SpeechSynthesis/HiFiGAN/hifigan/logging.py
@@ -123,7 +123,7 @@ class Metrics(dict):
 
				     def __init__(self, scopes=['train', 'train_avg'],
			
 
				                  dll_keys=['loss_gen', 'loss_discrim', 'loss_mel',
			
 
				                            'frames/s', 'took', 'lrate_gen', 'lrate_discrim'],
			
 
				-                 benchmark_epochs=0):
			
 
				+                 benchmark_epochs=0, cuda=True):
			
 
				         super().__init__()
			
 
				 
			
 
				         self.dll_keys = dll_keys
			
@@ -133,6 +133,7 @@ class Metrics(dict):
 
				         self.benchmark_epochs = benchmark_epochs
			
 
				         if benchmark_epochs > 0:
			
 
				             self.metrics['train_benchmark'] = defaultdict(list)
			
 
				+        self.cuda = cuda
			
 
				 
			
 
				     def __setitem__(self, key, val):
			
 
				         if type(val) is dict:
			
@@ -182,15 +183,21 @@ class Metrics(dict):
 
				         self.start_accumulating(iter, start_timer, 'train')
			
 
				 
			
 
				     def start_epoch(self, epoch, start_timer=True):
			
 
				+        if self.cuda:
			
 
				+            torch.cuda.synchronize()
			
 
				         self.start_accumulating(epoch, start_timer, 'train_avg')
			
 
				 
			
 
				     def start_val(self, start_timer=True):
			
 
				+        if self.cuda:
			
 
				+            torch.cuda.synchronize()
			
 
				         self.start_accumulating(None, start_timer, 'val')
			
 
				 
			
 
				     def finish_iter(self, stop_timer=True):
			
 
				         self.finish_accumulating(stop_timer, 'train')
			
 
				 
			
 
				     def finish_epoch(self, stop_timer=True):
			
 
				+        if self.cuda:
			
 
				+            torch.cuda.synchronize()
			
 
				         self.finish_accumulating(stop_timer, 'train_avg')
			
 
				 
			
 
				         metr = self.metrics['train_benchmark']
			
@@ -201,6 +208,8 @@ class Metrics(dict):
 
				                 metr[k].pop(0)
			
 
				 
			
 
				     def finish_val(self, stop_timer=True):
			
 
				+        if self.cuda:
			
 
				+            torch.cuda.synchronize()
			
 
				         self.finish_accumulating(stop_timer, 'val')
			
 
				 
			
 
				     def get_metrics(self, scope='train', target='dll'):
			
--- a/PyTorch/SpeechSynthesis/HiFiGAN/train.py
+++ b/PyTorch/SpeechSynthesis/HiFiGAN/train.py
@@ -237,8 +237,9 @@ def main():
 
				         init_distributed(args, args.world_size, args.local_rank)
			
 
				 
			
 
				     metrics = Metrics(scopes=['train', 'train_avg'],
			
 
				-                      benchmark_epochs=args.benchmark_epochs_num)
			
 
				-    val_metrics = Metrics(scopes=['val'])
			
 
				+                      benchmark_epochs=args.benchmark_epochs_num,
			
 
				+                      cuda=args.cuda)
			
 
				+    val_metrics = Metrics(scopes=['val'], cuda=args.cuda)
			
 
				     init_logger(args.output, args.log_file, args.ema_decay)
			
 
				     logger.parameters(vars(args), tb_subset='train')