arg_parser.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. # *****************************************************************************
  2. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  3. #
  4. # Redistribution and use in source and binary forms, with or without
  5. # modification, are permitted provided that the following conditions are met:
  6. # * Redistributions of source code must retain the above copyright
  7. # notice, this list of conditions and the following disclaimer.
  8. # * Redistributions in binary form must reproduce the above copyright
  9. # notice, this list of conditions and the following disclaimer in the
  10. # documentation and/or other materials provided with the distribution.
  11. # * Neither the name of the NVIDIA CORPORATION nor the
  12. # names of its contributors may be used to endorse or promote products
  13. # derived from this software without specific prior written permission.
  14. #
  15. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  16. # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  17. # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. # DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  19. # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  20. # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  21. # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  22. # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  24. # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. #
  26. # *****************************************************************************
  27. import argparse
  28. from common.text import symbols
  29. def parse_fastpitch_args(parent, add_help=False):
  30. """
  31. Parse commandline arguments.
  32. """
  33. parser = argparse.ArgumentParser(parents=[parent], add_help=add_help,
  34. allow_abbrev=False)
  35. io = parser.add_argument_group('io parameters')
  36. io.add_argument('--n-mel-channels', default=80, type=int,
  37. help='Number of bins in mel-spectrograms')
  38. io.add_argument('--max-seq-len', default=2048, type=int,
  39. help='')
  40. global symbols
  41. len_symbols = len(symbols)
  42. symbols = parser.add_argument_group('symbols parameters')
  43. symbols.add_argument('--n-symbols', default=len_symbols, type=int,
  44. help='Number of symbols in dictionary')
  45. symbols.add_argument('--symbols-embedding-dim', default=384, type=int,
  46. help='Input embedding dimension')
  47. in_fft = parser.add_argument_group('input FFT parameters')
  48. in_fft.add_argument('--in-fft-n-layers', default=6, type=int,
  49. help='Number of FFT blocks')
  50. in_fft.add_argument('--in-fft-n-heads', default=1, type=int,
  51. help='Number of attention heads')
  52. in_fft.add_argument('--in-fft-d-head', default=64, type=int,
  53. help='Dim of attention heads')
  54. in_fft.add_argument('--in-fft-conv1d-kernel-size', default=3, type=int,
  55. help='Conv-1D kernel size')
  56. in_fft.add_argument('--in-fft-conv1d-filter-size', default=1536, type=int,
  57. help='Conv-1D filter size')
  58. in_fft.add_argument('--in-fft-output-size', default=384, type=int,
  59. help='Output dim')
  60. in_fft.add_argument('--p-in-fft-dropout', default=0.1, type=float,
  61. help='Dropout probability')
  62. in_fft.add_argument('--p-in-fft-dropatt', default=0.1, type=float,
  63. help='Multi-head attention dropout')
  64. in_fft.add_argument('--p-in-fft-dropemb', default=0.0, type=float,
  65. help='Dropout added to word+positional embeddings')
  66. out_fft = parser.add_argument_group('output FFT parameters')
  67. out_fft.add_argument('--out-fft-n-layers', default=6, type=int,
  68. help='Number of FFT blocks')
  69. out_fft.add_argument('--out-fft-n-heads', default=1, type=int,
  70. help='Number of attention heads')
  71. out_fft.add_argument('--out-fft-d-head', default=64, type=int,
  72. help='Dim of attention head')
  73. out_fft.add_argument('--out-fft-conv1d-kernel-size', default=3, type=int,
  74. help='Conv-1D kernel size')
  75. out_fft.add_argument('--out-fft-conv1d-filter-size', default=1536, type=int,
  76. help='Conv-1D filter size')
  77. out_fft.add_argument('--out-fft-output-size', default=384, type=int,
  78. help='Output dim')
  79. out_fft.add_argument('--p-out-fft-dropout', default=0.1, type=float,
  80. help='Dropout probability for out_fft')
  81. out_fft.add_argument('--p-out-fft-dropatt', default=0.1, type=float,
  82. help='Multi-head attention dropout')
  83. out_fft.add_argument('--p-out-fft-dropemb', default=0.0, type=float,
  84. help='Dropout added to word+positional embeddings')
  85. dur_pred = parser.add_argument_group('duration predictor parameters')
  86. dur_pred.add_argument('--dur-predictor-kernel-size', default=3, type=int,
  87. help='Duration predictor conv-1D kernel size')
  88. dur_pred.add_argument('--dur-predictor-filter-size', default=256, type=int,
  89. help='Duration predictor conv-1D filter size')
  90. dur_pred.add_argument('--p-dur-predictor-dropout', default=0.1, type=float,
  91. help='Dropout probability for duration predictor')
  92. dur_pred.add_argument('--dur-predictor-n-layers', default=2, type=int,
  93. help='Number of conv-1D layers')
  94. pitch_pred = parser.add_argument_group('pitch predictor parameters')
  95. pitch_pred.add_argument('--pitch-predictor-kernel-size', default=3, type=int,
  96. help='Pitch predictor conv-1D kernel size')
  97. pitch_pred.add_argument('--pitch-predictor-filter-size', default=256, type=int,
  98. help='Pitch predictor conv-1D filter size')
  99. pitch_pred.add_argument('--p-pitch-predictor-dropout', default=0.1, type=float,
  100. help='Pitch probability for pitch predictor')
  101. pitch_pred.add_argument('--pitch-predictor-n-layers', default=2, type=int,
  102. help='Number of conv-1D layers')
  103. return parser