model.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565
  1. # BSD 3-Clause License
  2. # Copyright (c) 2018-2020, NVIDIA Corporation
  3. # All rights reserved.
  4. # Redistribution and use in source and binary forms, with or without
  5. # modification, are permitted provided that the following conditions are met:
  6. # * Redistributions of source code must retain the above copyright notice, this
  7. # list of conditions and the following disclaimer.
  8. # * Redistributions in binary form must reproduce the above copyright notice,
  9. # this list of conditions and the following disclaimer in the documentation
  10. # and/or other materials provided with the distribution.
  11. # * Neither the name of the copyright holder nor the names of its
  12. # contributors may be used to endorse or promote products derived from
  13. # this software without specific prior written permission.
  14. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  15. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  17. # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  18. # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19. # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  20. # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  21. # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  22. # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  23. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24. """https://github.com/NVIDIA/tacotron2"""
  25. from math import sqrt
  26. import torch
  27. from torch.autograd import Variable
  28. from torch import nn
  29. from torch.nn import functional as F
  30. from tacotron2.layers import ConvNorm, LinearNorm
  31. from tacotron2.utils import to_gpu, get_mask_from_lengths
  32. class LocationLayer(nn.Module):
  33. def __init__(self, attention_n_filters, attention_kernel_size,
  34. attention_dim):
  35. super(LocationLayer, self).__init__()
  36. padding = int((attention_kernel_size - 1) / 2)
  37. self.location_conv = ConvNorm(2, attention_n_filters,
  38. kernel_size=attention_kernel_size,
  39. padding=padding, bias=False, stride=1,
  40. dilation=1)
  41. self.location_dense = LinearNorm(attention_n_filters, attention_dim,
  42. bias=False, w_init_gain='tanh')
  43. def forward(self, attention_weights_cat):
  44. processed_attention = self.location_conv(attention_weights_cat)
  45. processed_attention = processed_attention.transpose(1, 2)
  46. processed_attention = self.location_dense(processed_attention)
  47. return processed_attention
  48. class Attention(nn.Module):
  49. def __init__(self, attention_rnn_dim, embedding_dim, attention_dim,
  50. attention_location_n_filters, attention_location_kernel_size):
  51. super(Attention, self).__init__()
  52. self.query_layer = LinearNorm(attention_rnn_dim, attention_dim,
  53. bias=False, w_init_gain='tanh')
  54. self.memory_layer = LinearNorm(embedding_dim, attention_dim, bias=False,
  55. w_init_gain='tanh')
  56. self.v = LinearNorm(attention_dim, 1, bias=False)
  57. self.location_layer = LocationLayer(attention_location_n_filters,
  58. attention_location_kernel_size,
  59. attention_dim)
  60. self.score_mask_value = -1e9
  61. def get_alignment_energies(self, query, processed_memory,
  62. attention_weights_cat):
  63. """
  64. PARAMS
  65. ------
  66. query: decoder output (batch, n_mel_channels * n_frames_per_step)
  67. processed_memory: processed encoder outputs (B, T_in, attention_dim)
  68. attention_weights_cat: cumulative and prev. att weights (B, 2, max_time)
  69. RETURNS
  70. -------
  71. alignment (batch, max_time)
  72. """
  73. processed_query = self.query_layer(query.unsqueeze(1))
  74. processed_attention_weights = self.location_layer(
  75. attention_weights_cat)
  76. energies = self.v(torch.tanh(
  77. processed_query + processed_attention_weights + processed_memory))
  78. energies = energies.squeeze(-1)
  79. return energies
  80. def forward(self, attention_hidden_state, memory, processed_memory,
  81. attention_weights_cat, mask):
  82. """
  83. PARAMS
  84. ------
  85. attention_hidden_state: attention rnn last output
  86. memory: encoder outputs
  87. processed_memory: processed encoder outputs
  88. attention_weights_cat: previous and cummulative attention weights
  89. mask: binary mask for padded data
  90. """
  91. alignment = self.get_alignment_energies(
  92. attention_hidden_state, processed_memory, attention_weights_cat)
  93. if mask is not None:
  94. alignment.data.masked_fill_(mask, self.score_mask_value)
  95. attention_weights = F.softmax(alignment, dim=1)
  96. attention_context = torch.bmm(attention_weights.unsqueeze(1), memory)
  97. attention_context = attention_context.squeeze(1)
  98. return attention_context, attention_weights
  99. class Prenet(nn.Module):
  100. def __init__(self, in_dim, sizes):
  101. super(Prenet, self).__init__()
  102. in_sizes = [in_dim] + sizes[:-1]
  103. self.layers = nn.ModuleList(
  104. [LinearNorm(in_size, out_size, bias=False)
  105. for (in_size, out_size) in zip(in_sizes, sizes)])
  106. def forward(self, x):
  107. for linear in self.layers:
  108. x = F.dropout(F.relu(linear(x)), p=0.5, training=True)
  109. return x
  110. class Postnet(nn.Module):
  111. """Postnet
  112. - Five 1-d convolution with 512 channels and kernel size 5
  113. """
  114. def __init__(self, hparams):
  115. super(Postnet, self).__init__()
  116. self.convolutions = nn.ModuleList()
  117. self.convolutions.append(
  118. nn.Sequential(
  119. ConvNorm(hparams.n_mel_channels, hparams.postnet_embedding_dim,
  120. kernel_size=hparams.postnet_kernel_size, stride=1,
  121. padding=int((hparams.postnet_kernel_size - 1) / 2),
  122. dilation=1, w_init_gain='tanh'),
  123. nn.BatchNorm1d(hparams.postnet_embedding_dim))
  124. )
  125. for i in range(1, hparams.postnet_n_convolutions - 1):
  126. self.convolutions.append(
  127. nn.Sequential(
  128. ConvNorm(hparams.postnet_embedding_dim,
  129. hparams.postnet_embedding_dim,
  130. kernel_size=hparams.postnet_kernel_size, stride=1,
  131. padding=int(
  132. (hparams.postnet_kernel_size - 1) / 2),
  133. dilation=1, w_init_gain='tanh'),
  134. nn.BatchNorm1d(hparams.postnet_embedding_dim))
  135. )
  136. self.convolutions.append(
  137. nn.Sequential(
  138. ConvNorm(hparams.postnet_embedding_dim, hparams.n_mel_channels,
  139. kernel_size=hparams.postnet_kernel_size, stride=1,
  140. padding=int((hparams.postnet_kernel_size - 1) / 2),
  141. dilation=1, w_init_gain='linear'),
  142. nn.BatchNorm1d(hparams.n_mel_channels))
  143. )
  144. def forward(self, x):
  145. for i in range(len(self.convolutions) - 1):
  146. x = F.dropout(torch.tanh(
  147. self.convolutions[i](x)), 0.5, self.training)
  148. x = F.dropout(self.convolutions[-1](x), 0.5, self.training)
  149. return x
  150. class Encoder(nn.Module):
  151. """Encoder module:
  152. - Three 1-d convolution banks
  153. - Bidirectional LSTM
  154. """
  155. def __init__(self, hparams):
  156. super(Encoder, self).__init__()
  157. convolutions = []
  158. for _ in range(hparams.encoder_n_convolutions):
  159. conv_layer = nn.Sequential(
  160. ConvNorm(hparams.encoder_embedding_dim,
  161. hparams.encoder_embedding_dim,
  162. kernel_size=hparams.encoder_kernel_size, stride=1,
  163. padding=int((hparams.encoder_kernel_size - 1) / 2),
  164. dilation=1, w_init_gain='relu'),
  165. nn.BatchNorm1d(hparams.encoder_embedding_dim))
  166. convolutions.append(conv_layer)
  167. self.convolutions = nn.ModuleList(convolutions)
  168. self.lstm = nn.LSTM(hparams.encoder_embedding_dim,
  169. int(hparams.encoder_embedding_dim / 2), 1,
  170. batch_first=True, bidirectional=True)
  171. def forward(self, x, input_lengths):
  172. for conv in self.convolutions:
  173. x = F.dropout(F.relu(conv(x)), 0.5, self.training)
  174. x = x.transpose(1, 2)
  175. # pytorch tensor are not reversible, hence the conversion
  176. input_lengths = input_lengths.cpu().numpy()
  177. x = nn.utils.rnn.pack_padded_sequence(
  178. x, input_lengths, batch_first=True)
  179. self.lstm.flatten_parameters()
  180. outputs, _ = self.lstm(x)
  181. outputs, _ = nn.utils.rnn.pad_packed_sequence(
  182. outputs, batch_first=True)
  183. return outputs
  184. def inference(self, x):
  185. for conv in self.convolutions:
  186. x = F.dropout(F.relu(conv(x)), 0.5, self.training)
  187. x = x.transpose(1, 2)
  188. self.lstm.flatten_parameters()
  189. outputs, _ = self.lstm(x)
  190. return outputs
  191. class Decoder(nn.Module):
  192. def __init__(self, hparams):
  193. super(Decoder, self).__init__()
  194. self.n_mel_channels = hparams.n_mel_channels
  195. self.n_frames_per_step = hparams.n_frames_per_step
  196. self.encoder_embedding_dim = hparams.encoder_embedding_dim
  197. self.attention_rnn_dim = hparams.attention_rnn_dim
  198. self.decoder_rnn_dim = hparams.decoder_rnn_dim
  199. self.prenet_dim = hparams.prenet_dim
  200. self.max_decoder_steps = hparams.max_decoder_steps
  201. self.gate_threshold = hparams.gate_threshold
  202. self.p_attention_dropout = hparams.p_attention_dropout
  203. self.p_decoder_dropout = hparams.p_decoder_dropout
  204. self.prenet = Prenet(
  205. hparams.n_mel_channels * hparams.n_frames_per_step,
  206. [hparams.prenet_dim, hparams.prenet_dim])
  207. self.attention_rnn = nn.LSTMCell(
  208. hparams.prenet_dim + hparams.encoder_embedding_dim,
  209. hparams.attention_rnn_dim)
  210. self.attention_layer = Attention(
  211. hparams.attention_rnn_dim, hparams.encoder_embedding_dim,
  212. hparams.attention_dim, hparams.attention_location_n_filters,
  213. hparams.attention_location_kernel_size)
  214. self.decoder_rnn = nn.LSTMCell(
  215. hparams.attention_rnn_dim + hparams.encoder_embedding_dim,
  216. hparams.decoder_rnn_dim, 1)
  217. self.linear_projection = LinearNorm(
  218. hparams.decoder_rnn_dim + hparams.encoder_embedding_dim,
  219. hparams.n_mel_channels * hparams.n_frames_per_step)
  220. self.gate_layer = LinearNorm(
  221. hparams.decoder_rnn_dim + hparams.encoder_embedding_dim, 1,
  222. bias=True, w_init_gain='sigmoid')
  223. def get_go_frame(self, memory):
  224. """ Gets all zeros frames to use as first decoder input
  225. PARAMS
  226. ------
  227. memory: decoder outputs
  228. RETURNS
  229. -------
  230. decoder_input: all zeros frames
  231. """
  232. B = memory.size(0)
  233. decoder_input = Variable(memory.data.new(
  234. B, self.n_mel_channels * self.n_frames_per_step).zero_())
  235. return decoder_input
  236. def initialize_decoder_states(self, memory, mask):
  237. """ Initializes attention rnn states, decoder rnn states, attention
  238. weights, attention cumulative weights, attention context, stores memory
  239. and stores processed memory
  240. PARAMS
  241. ------
  242. memory: Encoder outputs
  243. mask: Mask for padded data if training, expects None for inference
  244. """
  245. B = memory.size(0)
  246. MAX_TIME = memory.size(1)
  247. self.attention_hidden = Variable(memory.data.new(
  248. B, self.attention_rnn_dim).zero_())
  249. self.attention_cell = Variable(memory.data.new(
  250. B, self.attention_rnn_dim).zero_())
  251. self.decoder_hidden = Variable(memory.data.new(
  252. B, self.decoder_rnn_dim).zero_())
  253. self.decoder_cell = Variable(memory.data.new(
  254. B, self.decoder_rnn_dim).zero_())
  255. self.attention_weights = Variable(memory.data.new(
  256. B, MAX_TIME).zero_())
  257. self.attention_weights_cum = Variable(memory.data.new(
  258. B, MAX_TIME).zero_())
  259. self.attention_context = Variable(memory.data.new(
  260. B, self.encoder_embedding_dim).zero_())
  261. self.memory = memory
  262. self.processed_memory = self.attention_layer.memory_layer(memory)
  263. self.mask = mask
  264. def parse_decoder_inputs(self, decoder_inputs):
  265. """ Prepares decoder inputs, i.e. mel outputs
  266. PARAMS
  267. ------
  268. decoder_inputs: inputs used for teacher-forced training, i.e. mel-specs
  269. RETURNS
  270. -------
  271. inputs: processed decoder inputs
  272. """
  273. # (B, n_mel_channels, T_out) -> (B, T_out, n_mel_channels)
  274. decoder_inputs = decoder_inputs.transpose(1, 2)
  275. decoder_inputs = decoder_inputs.view(
  276. decoder_inputs.size(0),
  277. int(decoder_inputs.size(1)/self.n_frames_per_step), -1)
  278. # (B, T_out, n_mel_channels) -> (T_out, B, n_mel_channels)
  279. decoder_inputs = decoder_inputs.transpose(0, 1)
  280. return decoder_inputs
  281. def parse_decoder_outputs(self, mel_outputs, gate_outputs, alignments):
  282. """ Prepares decoder outputs for output
  283. PARAMS
  284. ------
  285. mel_outputs:
  286. gate_outputs: gate output energies
  287. alignments:
  288. RETURNS
  289. -------
  290. mel_outputs:
  291. gate_outpust: gate output energies
  292. alignments:
  293. """
  294. # (T_out, B) -> (B, T_out)
  295. alignments = torch.stack(alignments).transpose(0, 1)
  296. # (T_out, B) -> (B, T_out)
  297. gate_outputs = torch.stack(gate_outputs).transpose(0, 1)
  298. gate_outputs = gate_outputs.contiguous()
  299. # (T_out, B, n_mel_channels) -> (B, T_out, n_mel_channels)
  300. mel_outputs = torch.stack(mel_outputs).transpose(0, 1).contiguous()
  301. # decouple frames per step
  302. mel_outputs = mel_outputs.view(
  303. mel_outputs.size(0), -1, self.n_mel_channels)
  304. # (B, T_out, n_mel_channels) -> (B, n_mel_channels, T_out)
  305. mel_outputs = mel_outputs.transpose(1, 2)
  306. return mel_outputs, gate_outputs, alignments
  307. def decode(self, decoder_input):
  308. """ Decoder step using stored states, attention and memory
  309. PARAMS
  310. ------
  311. decoder_input: previous mel output
  312. RETURNS
  313. -------
  314. mel_output:
  315. gate_output: gate output energies
  316. attention_weights:
  317. """
  318. cell_input = torch.cat((decoder_input, self.attention_context), -1)
  319. self.attention_hidden, self.attention_cell = self.attention_rnn(
  320. cell_input, (self.attention_hidden, self.attention_cell))
  321. self.attention_hidden = F.dropout(
  322. self.attention_hidden, self.p_attention_dropout, self.training)
  323. attention_weights_cat = torch.cat(
  324. (self.attention_weights.unsqueeze(1),
  325. self.attention_weights_cum.unsqueeze(1)), dim=1)
  326. self.attention_context, self.attention_weights = self.attention_layer(
  327. self.attention_hidden, self.memory, self.processed_memory,
  328. attention_weights_cat, self.mask)
  329. self.attention_weights_cum += self.attention_weights
  330. decoder_input = torch.cat(
  331. (self.attention_hidden, self.attention_context), -1)
  332. self.decoder_hidden, self.decoder_cell = self.decoder_rnn(
  333. decoder_input, (self.decoder_hidden, self.decoder_cell))
  334. self.decoder_hidden = F.dropout(
  335. self.decoder_hidden, self.p_decoder_dropout, self.training)
  336. decoder_hidden_attention_context = torch.cat(
  337. (self.decoder_hidden, self.attention_context), dim=1)
  338. decoder_output = self.linear_projection(
  339. decoder_hidden_attention_context)
  340. gate_prediction = self.gate_layer(decoder_hidden_attention_context)
  341. return decoder_output, gate_prediction, self.attention_weights
  342. def forward(self, memory, decoder_inputs, memory_lengths):
  343. """ Decoder forward pass for training
  344. PARAMS
  345. ------
  346. memory: Encoder outputs
  347. decoder_inputs: Decoder inputs for teacher forcing. i.e. mel-specs
  348. memory_lengths: Encoder output lengths for attention masking.
  349. RETURNS
  350. -------
  351. mel_outputs: mel outputs from the decoder
  352. gate_outputs: gate outputs from the decoder
  353. alignments: sequence of attention weights from the decoder
  354. """
  355. decoder_input = self.get_go_frame(memory).unsqueeze(0)
  356. decoder_inputs = self.parse_decoder_inputs(decoder_inputs)
  357. decoder_inputs = torch.cat((decoder_input, decoder_inputs), dim=0)
  358. decoder_inputs = self.prenet(decoder_inputs)
  359. self.initialize_decoder_states(
  360. memory, mask=~get_mask_from_lengths(memory_lengths))
  361. mel_outputs, gate_outputs, alignments = [], [], []
  362. while len(mel_outputs) < decoder_inputs.size(0) - 1:
  363. decoder_input = decoder_inputs[len(mel_outputs)]
  364. mel_output, gate_output, attention_weights = self.decode(
  365. decoder_input)
  366. mel_outputs += [mel_output]
  367. gate_outputs += [gate_output.squeeze(1)]
  368. alignments += [attention_weights]
  369. mel_outputs, gate_outputs, alignments = self.parse_decoder_outputs(
  370. mel_outputs, gate_outputs, alignments)
  371. return mel_outputs, gate_outputs, alignments
  372. def inference(self, memory):
  373. """ Decoder inference
  374. PARAMS
  375. ------
  376. memory: Encoder outputs
  377. RETURNS
  378. -------
  379. mel_outputs: mel outputs from the decoder
  380. gate_outputs: gate outputs from the decoder
  381. alignments: sequence of attention weights from the decoder
  382. """
  383. decoder_input = self.get_go_frame(memory)
  384. self.initialize_decoder_states(memory, mask=None)
  385. mel_outputs, gate_outputs, alignments = [], [], []
  386. while True:
  387. decoder_input = self.prenet(decoder_input)
  388. mel_output, gate_output, alignment = self.decode(decoder_input)
  389. mel_outputs += [mel_output.squeeze(1)]
  390. gate_outputs += [gate_output]
  391. alignments += [alignment]
  392. if torch.sigmoid(gate_output.data) > self.gate_threshold:
  393. break
  394. elif len(mel_outputs) == self.max_decoder_steps:
  395. # print("Warning! Reached max decoder steps")
  396. break
  397. decoder_input = mel_output
  398. mel_outputs, gate_outputs, alignments = self.parse_decoder_outputs(
  399. mel_outputs, gate_outputs, alignments)
  400. return mel_outputs, gate_outputs, alignments
  401. class Tacotron2(nn.Module):
  402. def __init__(self, hparams):
  403. super(Tacotron2, self).__init__()
  404. self.mask_padding = hparams.mask_padding
  405. self.fp16_run = hparams.fp16_run
  406. self.n_mel_channels = hparams.n_mel_channels
  407. self.n_frames_per_step = hparams.n_frames_per_step
  408. self.embedding = nn.Embedding(
  409. hparams.n_symbols, hparams.symbols_embedding_dim)
  410. std = sqrt(2.0 / (hparams.n_symbols + hparams.symbols_embedding_dim))
  411. val = sqrt(3.0) * std # uniform bounds for std
  412. self.embedding.weight.data.uniform_(-val, val)
  413. self.encoder = Encoder(hparams)
  414. self.decoder = Decoder(hparams)
  415. self.postnet = Postnet(hparams)
  416. def parse_batch(self, batch):
  417. text_padded, input_lengths, mel_padded, gate_padded, \
  418. output_lengths = batch
  419. text_padded = to_gpu(text_padded).long()
  420. input_lengths = to_gpu(input_lengths).long()
  421. max_len = torch.max(input_lengths.data).item()
  422. mel_padded = to_gpu(mel_padded).float()
  423. gate_padded = to_gpu(gate_padded).float()
  424. output_lengths = to_gpu(output_lengths).long()
  425. return (
  426. (text_padded, input_lengths, mel_padded, max_len, output_lengths),
  427. (mel_padded, gate_padded))
  428. def parse_output(self, outputs, output_lengths=None):
  429. if self.mask_padding and output_lengths is not None:
  430. mask = ~get_mask_from_lengths(output_lengths)
  431. mask = mask.expand(self.n_mel_channels, mask.size(0), mask.size(1))
  432. mask = mask.permute(1, 0, 2)
  433. outputs[0].data.masked_fill_(mask, 0.0)
  434. outputs[1].data.masked_fill_(mask, 0.0)
  435. outputs[2].data.masked_fill_(mask[:, 0, :], 1e3) # gate energies
  436. return outputs
  437. def forward(self, inputs):
  438. text_inputs, text_lengths, mels, max_len, output_lengths = inputs
  439. text_lengths, output_lengths = text_lengths.data, output_lengths.data
  440. embedded_inputs = self.embedding(text_inputs).transpose(1, 2)
  441. encoder_outputs = self.encoder(embedded_inputs, text_lengths)
  442. mel_outputs, gate_outputs, alignments = self.decoder(
  443. encoder_outputs, mels, memory_lengths=text_lengths)
  444. mel_outputs_postnet = self.postnet(mel_outputs)
  445. mel_outputs_postnet = mel_outputs + mel_outputs_postnet
  446. return self.parse_output(
  447. [mel_outputs, mel_outputs_postnet, gate_outputs, alignments],
  448. output_lengths)
  449. def inference(self, inputs):
  450. embedded_inputs = self.embedding(inputs).transpose(1, 2)
  451. encoder_outputs = self.encoder.inference(embedded_inputs)
  452. mel_outputs, gate_outputs, alignments = self.decoder.inference(
  453. encoder_outputs)
  454. mel_outputs_postnet = self.postnet(mel_outputs)
  455. mel_outputs_postnet = mel_outputs + mel_outputs_postnet
  456. outputs = self.parse_output(
  457. [mel_outputs, mel_outputs_postnet, gate_outputs, alignments])
  458. return outputs