| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364 |
- import argparse
- import torch
- from tacotron2.data_function import TextMelLoader
- from tacotron2_common.utils import load_filepaths_and_text
- def parse_args(parser):
- """
- Parse commandline arguments.
- """
- parser.add_argument('-d', '--dataset-path', type=str,
- default='./', help='Path to dataset')
- parser.add_argument('--wav-files', required=True,
- type=str, help='Path to filelist with audio paths and text')
- parser.add_argument('--mel-files', required=True,
- type=str, help='Path to filelist with mel paths and text')
- parser.add_argument('--text-cleaners', nargs='*',
- default=['english_cleaners'], type=str,
- help='Type of text cleaners for input text')
- parser.add_argument('--max-wav-value', default=32768.0, type=float,
- help='Maximum audiowave value')
- parser.add_argument('--sampling-rate', default=22050, type=int,
- help='Sampling rate')
- parser.add_argument('--filter-length', default=1024, type=int,
- help='Filter length')
- parser.add_argument('--hop-length', default=256, type=int,
- help='Hop (stride) length')
- parser.add_argument('--win-length', default=1024, type=int,
- help='Window length')
- parser.add_argument('--mel-fmin', default=0.0, type=float,
- help='Minimum mel frequency')
- parser.add_argument('--mel-fmax', default=8000.0, type=float,
- help='Maximum mel frequency')
- parser.add_argument('--n-mel-channels', default=80, type=int,
- help='Number of bins in mel-spectrograms')
- return parser
- def audio2mel(dataset_path, audiopaths_and_text, melpaths_and_text, args):
- melpaths_and_text_list = load_filepaths_and_text(dataset_path, melpaths_and_text)
- audiopaths_and_text_list = load_filepaths_and_text(dataset_path, audiopaths_and_text)
- data_loader = TextMelLoader(dataset_path, audiopaths_and_text, args)
- for i in range(len(melpaths_and_text_list)):
- if i%100 == 0:
- print("done", i, "/", len(melpaths_and_text_list))
- mel = data_loader.get_mel(audiopaths_and_text_list[i][0])
- torch.save(mel, melpaths_and_text_list[i][0])
- def main():
- parser = argparse.ArgumentParser(description='PyTorch Tacotron 2 Training')
- parser = parse_args(parser)
- args = parser.parse_args()
- args.load_mel_from_disk = False
- audio2mel(args.dataset_path, args.wav_files, args.mel_files, args)
- if __name__ == '__main__':
- main()
|