layers.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. # *****************************************************************************
  2. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  3. #
  4. # Redistribution and use in source and binary forms, with or without
  5. # modification, are permitted provided that the following conditions are met:
  6. # * Redistributions of source code must retain the above copyright
  7. # notice, this list of conditions and the following disclaimer.
  8. # * Redistributions in binary form must reproduce the above copyright
  9. # notice, this list of conditions and the following disclaimer in the
  10. # documentation and/or other materials provided with the distribution.
  11. # * Neither the name of the NVIDIA CORPORATION nor the
  12. # names of its contributors may be used to endorse or promote products
  13. # derived from this software without specific prior written permission.
  14. #
  15. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  16. # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  17. # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. # DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  19. # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  20. # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  21. # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  22. # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  24. # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. #
  26. # *****************************************************************************
  27. import torch
  28. from librosa.filters import mel as librosa_mel_fn
  29. from common.audio_processing import dynamic_range_compression, dynamic_range_decompression
  30. from common.stft import STFT
  31. class LinearNorm(torch.nn.Module):
  32. def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'):
  33. super(LinearNorm, self).__init__()
  34. self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias)
  35. torch.nn.init.xavier_uniform_(
  36. self.linear_layer.weight,
  37. gain=torch.nn.init.calculate_gain(w_init_gain))
  38. def forward(self, x):
  39. return self.linear_layer(x)
  40. class ConvNorm(torch.nn.Module):
  41. def __init__(self, in_channels, out_channels, kernel_size=1, stride=1,
  42. padding=None, dilation=1, bias=True, w_init_gain='linear'):
  43. super(ConvNorm, self).__init__()
  44. if padding is None:
  45. assert(kernel_size % 2 == 1)
  46. padding = int(dilation * (kernel_size - 1) / 2)
  47. self.conv = torch.nn.Conv1d(in_channels, out_channels,
  48. kernel_size=kernel_size, stride=stride,
  49. padding=padding, dilation=dilation,
  50. bias=bias)
  51. torch.nn.init.xavier_uniform_(
  52. self.conv.weight,
  53. gain=torch.nn.init.calculate_gain(w_init_gain))
  54. def forward(self, signal):
  55. return self.conv(signal)
  56. class TacotronSTFT(torch.nn.Module):
  57. def __init__(self, filter_length=1024, hop_length=256, win_length=1024,
  58. n_mel_channels=80, sampling_rate=22050, mel_fmin=0.0,
  59. mel_fmax=8000.0):
  60. super(TacotronSTFT, self).__init__()
  61. self.n_mel_channels = n_mel_channels
  62. self.sampling_rate = sampling_rate
  63. self.stft_fn = STFT(filter_length, hop_length, win_length)
  64. mel_basis = librosa_mel_fn(
  65. sampling_rate, filter_length, n_mel_channels, mel_fmin, mel_fmax)
  66. mel_basis = torch.from_numpy(mel_basis).float()
  67. self.register_buffer('mel_basis', mel_basis)
  68. def spectral_normalize(self, magnitudes):
  69. output = dynamic_range_compression(magnitudes)
  70. return output
  71. def spectral_de_normalize(self, magnitudes):
  72. output = dynamic_range_decompression(magnitudes)
  73. return output
  74. def mel_spectrogram(self, y):
  75. """Computes mel-spectrograms from a batch of waves
  76. PARAMS
  77. ------
  78. y: Variable(torch.FloatTensor) with shape (B, T) in range [-1, 1]
  79. RETURNS
  80. -------
  81. mel_output: torch.FloatTensor of shape (B, n_mel_channels, T)
  82. """
  83. assert(torch.min(y.data) >= -1)
  84. assert(torch.max(y.data) <= 1)
  85. magnitudes, phases = self.stft_fn.transform(y)
  86. magnitudes = magnitudes.data
  87. mel_output = torch.matmul(self.mel_basis, magnitudes)
  88. mel_output = self.spectral_normalize(mel_output)
  89. return mel_output