preprocess_audio2mel.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. import argparse
  2. import torch
  3. from tacotron2.data_function import TextMelLoader
  4. from tacotron2_common.utils import load_filepaths_and_text
  5. def parse_args(parser):
  6. """
  7. Parse commandline arguments.
  8. """
  9. parser.add_argument('-d', '--dataset-path', type=str,
  10. default='./', help='Path to dataset')
  11. parser.add_argument('--wav-files', required=True,
  12. type=str, help='Path to filelist with audio paths and text')
  13. parser.add_argument('--mel-files', required=True,
  14. type=str, help='Path to filelist with mel paths and text')
  15. parser.add_argument('--text-cleaners', nargs='*',
  16. default=['english_cleaners'], type=str,
  17. help='Type of text cleaners for input text')
  18. parser.add_argument('--max-wav-value', default=32768.0, type=float,
  19. help='Maximum audiowave value')
  20. parser.add_argument('--sampling-rate', default=22050, type=int,
  21. help='Sampling rate')
  22. parser.add_argument('--filter-length', default=1024, type=int,
  23. help='Filter length')
  24. parser.add_argument('--hop-length', default=256, type=int,
  25. help='Hop (stride) length')
  26. parser.add_argument('--win-length', default=1024, type=int,
  27. help='Window length')
  28. parser.add_argument('--mel-fmin', default=0.0, type=float,
  29. help='Minimum mel frequency')
  30. parser.add_argument('--mel-fmax', default=8000.0, type=float,
  31. help='Maximum mel frequency')
  32. parser.add_argument('--n-mel-channels', default=80, type=int,
  33. help='Number of bins in mel-spectrograms')
  34. return parser
  35. def audio2mel(dataset_path, audiopaths_and_text, melpaths_and_text, args):
  36. melpaths_and_text_list = load_filepaths_and_text(dataset_path, melpaths_and_text)
  37. audiopaths_and_text_list = load_filepaths_and_text(dataset_path, audiopaths_and_text)
  38. data_loader = TextMelLoader(dataset_path, audiopaths_and_text, args)
  39. for i in range(len(melpaths_and_text_list)):
  40. if i%100 == 0:
  41. print("done", i, "/", len(melpaths_and_text_list))
  42. mel = data_loader.get_mel(audiopaths_and_text_list[i][0])
  43. torch.save(mel, melpaths_and_text_list[i][0])
  44. def main():
  45. parser = argparse.ArgumentParser(description='PyTorch Tacotron 2 Training')
  46. parser = parse_args(parser)
  47. args = parser.parse_args()
  48. args.load_mel_from_disk = False
  49. audio2mel(args.dataset_path, args.wav_files, args.mel_files, args)
  50. if __name__ == '__main__':
  51. main()