| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124 |
- # BSD 3-Clause License
- # Copyright (c) 2018-2020, NVIDIA Corporation
- # All rights reserved.
- # Redistribution and use in source and binary forms, with or without
- # modification, are permitted provided that the following conditions are met:
- # * Redistributions of source code must retain the above copyright notice, this
- # list of conditions and the following disclaimer.
- # * Redistributions in binary form must reproduce the above copyright notice,
- # this list of conditions and the following disclaimer in the documentation
- # and/or other materials provided with the distribution.
- # * Neither the name of the copyright holder nor the names of its
- # contributors may be used to endorse or promote products derived from
- # this software without specific prior written permission.
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- """https://github.com/NVIDIA/tacotron2"""
- from fastspeech.text_norm import symbols
- class Hparams:
- """ hyper parameters """
- def __init__(self):
- ################################
- # Experiment Parameters #
- ################################
- self.epochs = 500
- self.iters_per_checkpoint = 1000
- self.seed = 1234
- self.dynamic_loss_scaling = True
- self.fp16_run = False
- self.distributed_run = False
- self.dist_backend = "nccl"
- self.dist_url = "tcp://localhost:54321"
- self.cudnn_enabled = True
- self.cudnn_benchmark = False
- self.ignore_layers = ['embedding.weight']
- ################################
- # Data Parameters #
- ################################
- self.load_mel_from_disk = False
- self.training_files = 'filelists/ljs_audio_text_train_filelist.txt'
- self.validation_files = 'filelists/ljs_audio_text_val_filelist.txt'
- self.text_cleaners = ['english_cleaners']
- ################################
- # Audio Parameters #
- ################################
- self.max_wav_value = 32768.0
- self.sampling_rate = 22050
- self.filter_length = 1024
- self.hop_length = 256
- self.win_length = 1024
- self.n_mel_channels = 80
- self.mel_fmin = 0.0
- self.mel_fmax = 8000.0
- ################################
- # Model Parameters #
- ################################
- self.n_symbols = len(symbols)
- self.symbols_embedding_dim = 512
- # Encoder parameters
- self.encoder_kernel_size = 5
- self.encoder_n_convolutions = 3
- self.encoder_embedding_dim = 512
- # Decoder parameters
- self.n_frames_per_step = 1 # currently only 1 is supported
- self.decoder_rnn_dim = 1024
- self.prenet_dim = 256
- self.max_decoder_steps = 1000
- self.gate_threshold = 0.5
- self.p_attention_dropout = 0.1
- self.p_decoder_dropout = 0.1
- # Attention parameters
- self.attention_rnn_dim = 1024
- self.attention_dim = 128
- # Location Layer parameters
- self.attention_location_n_filters = 32
- self.attention_location_kernel_size = 31
- # Mel-post processing network parameters
- self.postnet_embedding_dim = 512
- self.postnet_kernel_size = 5
- self.postnet_n_convolutions = 5
- ################################
- # Optimization Hyperparameters #
- ################################
- self.use_saved_learning_rate = False
- self.learning_rate = 1e-3
- self.weight_decay = 1e-6
- self.grad_clip_thresh = 1.0
- self.batch_size = 64
- self.mask_padding = True # set model's padded outputs to padded values
- def return_self(self):
- return self
- def create_hparams():
- hparams = Hparams()
- return hparams.return_self()
|