| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889 |
- # Copyright (c) 2022 NVIDIA Corporation. All rights reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import paddle
- class CrossEntropyLossForSQuAD(paddle.nn.Layer):
- """
- Loss function for SQuAD
- """
- def __init__(self):
- super().__init__()
- def forward(self, y, label):
- start_logits, end_logits = y
- start_position, end_position = label
- start_position = paddle.unsqueeze(start_position, axis=-1)
- end_position = paddle.unsqueeze(end_position, axis=-1)
- start_loss = paddle.nn.functional.softmax_with_cross_entropy(
- logits=start_logits, label=start_position, soft_label=False)
- start_loss = paddle.mean(start_loss)
- end_loss = paddle.nn.functional.softmax_with_cross_entropy(
- logits=end_logits, label=end_position, soft_label=False)
- end_loss = paddle.mean(end_loss)
- loss = (start_loss + end_loss) / 2
- return loss
- class BertPretrainingCriterion(paddle.nn.Layer):
- """
- Loss function for BertPretraining.
- Args:
- vocab_size(int):
- Vocabulary size of `inputs_ids` in `BertModel`.
- """
- def __init__(self, vocab_size):
- super().__init__()
- self.loss_fn = paddle.nn.loss.CrossEntropyLoss(ignore_index=-1)
- self.vocab_size = vocab_size
- def forward(self, prediction_scores, seq_relationship_score,
- masked_lm_labels, next_sentence_labels):
- """
- Args:
- prediction_scores(Tensor):
- The scores of masked token prediction. Its data type should be float32.
- If `masked_positions` is None, its shape is [batch_size, sequence_length, vocab_size].
- Otherwise, its shape is [batch_size, mask_token_num, vocab_size]
- seq_relationship_score(Tensor):
- The scores of next sentence prediction. Its data type should be float32 and
- its shape is [batch_size, 2]
- masked_lm_labels(Tensor):
- The labels of the masked language modeling, its dimensionality is equal to `prediction_scores`.
- Its data type should be int64. If `masked_positions` is None, its shape is [batch_size, sequence_length, 1].
- Otherwise, its shape is [batch_size, mask_token_num, 1]
- next_sentence_labels(Tensor):
- The labels of the next sentence prediction task, the dimensionality of `next_sentence_labels`
- is equal to `seq_relation_labels`. Its data type should be int64 and
- its shape is [batch_size, 1]
- masked_lm_scale(Tensor or int):
- The scale of masked tokens. Used for the normalization of masked language modeling loss.
- If it is a `Tensor`, its data type should be int64 and its shape is equal to `prediction_scores`.
- Returns:
- Tensor: The pretraining loss, equals to the sum of `masked_lm_loss` plus the mean of `next_sentence_loss`.
- Its data type should be float32 and its shape is [1].
- """
- with paddle.static.amp.fp16_guard():
- masked_lm_labels_flat = masked_lm_labels.reshape([-1])
- mlm_labels = masked_lm_labels_flat[masked_lm_labels_flat != -1]
- masked_lm_loss = self.loss_fn(prediction_scores, mlm_labels)
- if next_sentence_labels.ndim == 1:
- next_sentence_labels = next_sentence_labels.unsqueeze(axis=-1)
- next_sentence_loss = self.loss_fn(seq_relationship_score,
- next_sentence_labels)
- return masked_lm_loss + next_sentence_loss
|