#!/usr/bin/env bash # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Pre-trains a BASE model on LibriSpeech set -a # IO : ${OUTPUT_DIR:="results/pretrain_base"} # Batching # To best utilize hw, increase batch size by increasing NUM_CONCAT_BATCHES, and lowering UPDATE_FREQ. # Keep NUM_NODES x $NUM_GPUS x $NUM_CONCAT_BATCHES x $UPDATE_FREQ = 64. # Note that this script does not control NUM_NODES. : ${NUM_GPUS:=8} : ${MAX_TOKENS:=1400000} : ${NUM_CONCAT_BATCHES:=8} : ${UPDATE_FREQ:=1} : ${MAX_SAMPLE_SIZE:=250000} # Training : ${MAX_UPDATE:=400000} : ${LOSS_WEIGHTS:="0.1 10.0"} : ${LEARNING_RATE:=0.0005} # Model : ${NORMALIZE:=false} : ${MASK_PROB:=0.65} : ${EXTRACTOR_MODE:="default"} : ${LAYER_NORM_FIRST:=false} : ${FINAL_DIM:=256} : ${LATENT_TEMP:="2.0 0.5 0.999995"} : ${ENCODER_LAYERDROP:=0.05} : ${DROPOUT_INPUT:=0.1} : ${DROPOUT_FEATURES:=0.1} : ${DROPOUT:=0.1} : ${ATTENTION_DROPOUT:=0.1} : ${CONV_BIAS:=false} : ${ENCODER_LAYERS:=12} : ${ENCODER_EMBED_DIM:=768} : ${ENCODER_FFN_EMBED_DIM:=3072} : ${ENCODER_ATTENTION_HEADS:=12} : ${FEATURE_GRAD_MULT:=0.1} : ${HOURGLASS_CONFIG="[2,(8,4),2]"} bash scripts/pretrain_large.sh "$@"