hparams.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. # BSD 3-Clause License
  2. # Copyright (c) 2018-2020, NVIDIA Corporation
  3. # All rights reserved.
  4. # Redistribution and use in source and binary forms, with or without
  5. # modification, are permitted provided that the following conditions are met:
  6. # * Redistributions of source code must retain the above copyright notice, this
  7. # list of conditions and the following disclaimer.
  8. # * Redistributions in binary form must reproduce the above copyright notice,
  9. # this list of conditions and the following disclaimer in the documentation
  10. # and/or other materials provided with the distribution.
  11. # * Neither the name of the copyright holder nor the names of its
  12. # contributors may be used to endorse or promote products derived from
  13. # this software without specific prior written permission.
  14. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  15. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  17. # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  18. # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19. # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  20. # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  21. # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  22. # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  23. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24. """https://github.com/NVIDIA/tacotron2"""
  25. from fastspeech.text_norm import symbols
  26. class Hparams:
  27. """ hyper parameters """
  28. def __init__(self):
  29. ################################
  30. # Experiment Parameters #
  31. ################################
  32. self.epochs = 500
  33. self.iters_per_checkpoint = 1000
  34. self.seed = 1234
  35. self.dynamic_loss_scaling = True
  36. self.fp16_run = False
  37. self.distributed_run = False
  38. self.dist_backend = "nccl"
  39. self.dist_url = "tcp://localhost:54321"
  40. self.cudnn_enabled = True
  41. self.cudnn_benchmark = False
  42. self.ignore_layers = ['embedding.weight']
  43. ################################
  44. # Data Parameters #
  45. ################################
  46. self.load_mel_from_disk = False
  47. self.training_files = 'filelists/ljs_audio_text_train_filelist.txt'
  48. self.validation_files = 'filelists/ljs_audio_text_val_filelist.txt'
  49. self.text_cleaners = ['english_cleaners']
  50. ################################
  51. # Audio Parameters #
  52. ################################
  53. self.max_wav_value = 32768.0
  54. self.sampling_rate = 22050
  55. self.filter_length = 1024
  56. self.hop_length = 256
  57. self.win_length = 1024
  58. self.n_mel_channels = 80
  59. self.mel_fmin = 0.0
  60. self.mel_fmax = 8000.0
  61. ################################
  62. # Model Parameters #
  63. ################################
  64. self.n_symbols = len(symbols)
  65. self.symbols_embedding_dim = 512
  66. # Encoder parameters
  67. self.encoder_kernel_size = 5
  68. self.encoder_n_convolutions = 3
  69. self.encoder_embedding_dim = 512
  70. # Decoder parameters
  71. self.n_frames_per_step = 1 # currently only 1 is supported
  72. self.decoder_rnn_dim = 1024
  73. self.prenet_dim = 256
  74. self.max_decoder_steps = 1000
  75. self.gate_threshold = 0.5
  76. self.p_attention_dropout = 0.1
  77. self.p_decoder_dropout = 0.1
  78. # Attention parameters
  79. self.attention_rnn_dim = 1024
  80. self.attention_dim = 128
  81. # Location Layer parameters
  82. self.attention_location_n_filters = 32
  83. self.attention_location_kernel_size = 31
  84. # Mel-post processing network parameters
  85. self.postnet_embedding_dim = 512
  86. self.postnet_kernel_size = 5
  87. self.postnet_n_convolutions = 5
  88. ################################
  89. # Optimization Hyperparameters #
  90. ################################
  91. self.use_saved_learning_rate = False
  92. self.learning_rate = 1e-3
  93. self.weight_decay = 1e-6
  94. self.grad_clip_thresh = 1.0
  95. self.batch_size = 64
  96. self.mask_padding = True # set model's padded outputs to padded values
  97. def return_self(self):
  98. return self
  99. def create_hparams():
  100. hparams = Hparams()
  101. return hparams.return_self()