2 سال پیش · 8ed53a4581
--- a/PyTorch/SpeechRecognition/Jasper/Dockerfile
+++ b/PyTorch/SpeechRecognition/Jasper/Dockerfile
@@ -24,7 +24,7 @@ COPY requirements.txt .
 
				 RUN if [[ ! -z "$(command -v conda)" ]]; then conda install -y pyyaml==5.4.1; fi
			
 
				 RUN pip install --disable-pip-version-check -U -r requirements.txt
			
 
				 
			
 
				-RUN pip install --force-reinstall --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda110==1.9.0
			
 
				+RUN pip install --force-reinstall --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda110==1.27.0
			
 
				 
			
 
				 # Copy rest of files
			
 
				 COPY . .
			
--- a/PyTorch/SpeechRecognition/Jasper/common/audio.py
+++ b/PyTorch/SpeechRecognition/Jasper/common/audio.py
@@ -45,7 +45,7 @@ class AudioSegment(object):
 
				                  duration=0, trim=False, trim_db=60):
			
 
				         """Create audio segment from samples.
			
 
				 
			
 
				-        Samples are convert float32 internally, with int scaled to [-1, 1].
			
 
				+        Samples are converted to float32 internally, with int scaled to [-1, 1].
			
 
				         Load a file supported by librosa and return as an AudioSegment.
			
 
				         :param filename: path of file to load
			
 
				         :param target_sr: the desired sample rate
			
@@ -67,10 +67,11 @@ class AudioSegment(object):
 
				 
			
 
				         samples = self._convert_samples_to_float32(samples)
			
 
				         if target_sr is not None and target_sr != sample_rate:
			
 
				-            samples = librosa.core.resample(samples, sample_rate, target_sr)
			
 
				+            samples = librosa.resample(samples, orig_sr=sample_rate,
			
 
				+                                       target_sr=target_sr)
			
 
				             sample_rate = target_sr
			
 
				         if trim:
			
 
				-            samples, _ = librosa.effects.trim(samples, trim_db)
			
 
				+            samples, _ = librosa.effects.trim(samples, top_db=trim_db)
			
 
				         self._samples = samples
			
 
				         self._sample_rate = sample_rate
			
 
				         if self._samples.ndim >= 2:
			
--- a/PyTorch/SpeechRecognition/Jasper/common/features.py
+++ b/PyTorch/SpeechRecognition/Jasper/common/features.py
@@ -233,7 +233,7 @@ class FilterbankFeatures(BaseFeatures):
 
				         window_tensor = window_fn(self.win_length,
			
 
				                                   periodic=False) if window_fn else None
			
 
				         filterbanks = torch.tensor(
			
 
				-            librosa.filters.mel(sample_rate, self.n_fft, n_mels=n_filt,
			
 
				+            librosa.filters.mel(sr=sample_rate, n_fft=self.n_fft, n_mels=n_filt,
			
 
				                                 fmin=lowfreq, fmax=highfreq),
			
 
				             dtype=torch.float).unsqueeze(0)
			
 
				         # torchscript
			
--- a/PyTorch/SpeechRecognition/Jasper/requirements.txt
+++ b/PyTorch/SpeechRecognition/Jasper/requirements.txt
@@ -1,6 +1,6 @@
 
				 inflect==5.3.0
			
 
				 ipdb
			
 
				-librosa==0.8.0
			
 
				+librosa==0.9.0
			
 
				 pandas==1.5.2
			
 
				 pyyaml>=5.4
			
 
				 soundfile
			
--- a/PyTorch/SpeechRecognition/Jasper/train.py
+++ b/PyTorch/SpeechRecognition/Jasper/train.py
@@ -54,7 +54,7 @@ def parse_args():
 
				     training.add_argument('--amp', '--fp16', action='store_true', default=False,
			
 
				                           help='Use pytorch native mixed precision training')
			
 
				     training.add_argument('--seed', default=42, type=int, help='Random seed')
			
 
				-    training.add_argument('--local_rank', default=os.getenv('LOCAL_RANK', 0),
			
 
				+    training.add_argument('--local_rank', '--local-rank', default=os.getenv('LOCAL_RANK', 0),
			
 
				                           type=int, help='GPU id used for distributed training')
			
 
				     training.add_argument('--pre_allocate_range', default=None, type=int, nargs=2,
			
 
				                           help='Warmup with batches of length [min, max] before training')
			
--- a/PyTorch/SpeechRecognition/Jasper/utils/preprocessing_utils.py
+++ b/PyTorch/SpeechRecognition/Jasper/utils/preprocessing_utils.py
@@ -15,7 +15,6 @@
 
				 #!/usr/bin/env python
			
 
				 import os
			
 
				 import multiprocessing
			
 
				-import librosa
			
 
				 import functools
			
 
				 
			
 
				 import sox
			
--- a/PyTorch/SpeechRecognition/QuartzNet/Dockerfile
+++ b/PyTorch/SpeechRecognition/QuartzNet/Dockerfile
@@ -24,7 +24,7 @@ COPY requirements.txt .
 
				 RUN if [[ ! -z "$(command -v conda)" ]]; then conda install -y pyyaml==5.4.1; fi
			
 
				 RUN pip install --disable-pip-version-check -U -r requirements.txt
			
 
				 
			
 
				-RUN pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda110==1.9.0
			
 
				+RUN pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda110==1.27.0
			
 
				 
			
 
				 # Copy rest of files
			
 
				 COPY . .
			
--- a/PyTorch/SpeechRecognition/QuartzNet/train.py
+++ b/PyTorch/SpeechRecognition/QuartzNet/train.py
@@ -56,7 +56,7 @@ def parse_args():
 
				     training.add_argument('--amp', '--fp16', action='store_true', default=False,
			
 
				                           help='Use pytorch native mixed precision training')
			
 
				     training.add_argument('--seed', default=None, type=int, help='Random seed')
			
 
				-    training.add_argument('--local_rank', default=os.getenv('LOCAL_RANK', 0), type=int,
			
 
				+    training.add_argument('--local_rank', '--local-rank', default=os.getenv('LOCAL_RANK', 0), type=int,
			
 
				                           help='GPU id used for distributed training')
			
 
				     training.add_argument('--pre_allocate_range', default=None, type=int, nargs=2,
			
 
				                           help='Warmup with batches of length [min, max] before training')