arg_parser.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. # *****************************************************************************
  2. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  3. #
  4. # Redistribution and use in source and binary forms, with or without
  5. # modification, are permitted provided that the following conditions are met:
  6. # * Redistributions of source code must retain the above copyright
  7. # notice, this list of conditions and the following disclaimer.
  8. # * Redistributions in binary form must reproduce the above copyright
  9. # notice, this list of conditions and the following disclaimer in the
  10. # documentation and/or other materials provided with the distribution.
  11. # * Neither the name of the NVIDIA CORPORATION nor the
  12. # names of its contributors may be used to endorse or promote products
  13. # derived from this software without specific prior written permission.
  14. #
  15. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  16. # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  17. # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. # DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  19. # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  20. # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  21. # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  22. # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  24. # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. #
  26. # *****************************************************************************
  27. import argparse
  28. from tacotron2.text import symbols
  29. def tacotron2_parser(parent, add_help=False):
  30. """
  31. Parse commandline arguments.
  32. """
  33. parser = argparse.ArgumentParser(parents=[parent], add_help=add_help)
  34. # misc parameters
  35. parser.add_argument('--mask-padding', default=False, type=bool,
  36. help='Use mask padding')
  37. parser.add_argument('--n-mel-channels', default=80, type=int,
  38. help='Number of bins in mel-spectrograms')
  39. # symbols parameters
  40. global symbols
  41. len_symbols = len(symbols)
  42. symbols = parser.add_argument_group('symbols parameters')
  43. symbols.add_argument('--n-symbols', default=len_symbols, type=int,
  44. help='Number of symbols in dictionary')
  45. symbols.add_argument('--symbols-embedding-dim', default=512, type=int,
  46. help='Input embedding dimension')
  47. # encoder parameters
  48. encoder = parser.add_argument_group('encoder parameters')
  49. encoder.add_argument('--encoder-kernel-size', default=5, type=int,
  50. help='Encoder kernel size')
  51. encoder.add_argument('--encoder-n-convolutions', default=3, type=int,
  52. help='Number of encoder convolutions')
  53. encoder.add_argument('--encoder-embedding-dim', default=512, type=int,
  54. help='Encoder embedding dimension')
  55. # decoder parameters
  56. decoder = parser.add_argument_group('decoder parameters')
  57. decoder.add_argument('--n-frames-per-step', default=1,
  58. type=int,
  59. help='Number of frames processed per step') # currently only 1 is supported
  60. decoder.add_argument('--decoder-rnn-dim', default=1024, type=int,
  61. help='Number of units in decoder LSTM')
  62. decoder.add_argument('--prenet-dim', default=256, type=int,
  63. help='Number of ReLU units in prenet layers')
  64. decoder.add_argument('--max-decoder-steps', default=2000, type=int,
  65. help='Maximum number of output mel spectrograms')
  66. decoder.add_argument('--gate-threshold', default=0.5, type=float,
  67. help='Probability threshold for stop token')
  68. decoder.add_argument('--p-attention-dropout', default=0.1, type=float,
  69. help='Dropout probability for attention LSTM')
  70. decoder.add_argument('--p-decoder-dropout', default=0.1, type=float,
  71. help='Dropout probability for decoder LSTM')
  72. decoder.add_argument('--decoder-no-early-stopping', action='store_true',
  73. help='Stop decoding once all samples are finished')
  74. # attention parameters
  75. attention = parser.add_argument_group('attention parameters')
  76. attention.add_argument('--attention-rnn-dim', default=1024, type=int,
  77. help='Number of units in attention LSTM')
  78. attention.add_argument('--attention-dim', default=128, type=int,
  79. help='Dimension of attention hidden representation')
  80. # location layer parameters
  81. location = parser.add_argument_group('location parameters')
  82. location.add_argument(
  83. '--attention-location-n-filters', default=32, type=int,
  84. help='Number of filters for location-sensitive attention')
  85. location.add_argument(
  86. '--attention-location-kernel-size', default=31, type=int,
  87. help='Kernel size for location-sensitive attention')
  88. # Mel-post processing network parameters
  89. postnet = parser.add_argument_group('postnet parameters')
  90. postnet.add_argument('--postnet-embedding-dim', default=512, type=int,
  91. help='Postnet embedding dimension')
  92. postnet.add_argument('--postnet-kernel-size', default=5, type=int,
  93. help='Postnet kernel size')
  94. postnet.add_argument('--postnet-n-convolutions', default=5, type=int,
  95. help='Number of postnet convolutions')
  96. return parser