| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788 |
- #!/bin/bash
- echo "Container nvidia build = " $NVIDIA_BUILD_ID
- init_checkpoint=${1:-"/results/biobert_tf_uncased_base/model.ckpt-4340"}
- train_batch_size=${2:-8}
- learning_rate=${3:-3.125e-6}
- cased=${4:-false}
- precision=${5:-"fp16"}
- use_xla=${6:-"true"}
- num_gpu=${7:-"16"}
- seq_length=${8:-128}
- bert_model=${9:-"base"}
- eval_batch_size=${10:-8} #Eval and Predict BS is assumed to be same
- epochs=${11:-"100.0"}
- if [ "$cased" = "true" ] ; then
- DO_LOWER_CASE=0
- CASING_DIR_PREFIX="cased"
- case_flag="--do_lower_case=False"
- else
- DO_LOWER_CASE=1
- CASING_DIR_PREFIX="uncased"
- case_flag="--do_lower_case=True"
- fi
- if [ "$bert_model" = "large" ] ; then
- export BERT_DIR=/workspace/bert/data/download/google_pretrained_weights/${CASING_DIR_PREFIX}_L-24_H-1024_A-16
- else
- export BERT_DIR=/workspace/bert/data/download/google_pretrained_weights/${CASING_DIR_PREFIX}_L-12_H-768_A-12
- fi
- export GBS=$(expr $train_batch_size \* $num_gpu)
- printf -v TAG "tf_bert_biobert_ner_bc5cdr_disease_%s_%s_gbs%d" "$bert_model" "$precision" $GBS
- DATESTAMP=`date +'%y%m%d%H%M%S'`
- DATASET_DIR=/workspace/bert/data/biobert/BC5CDR/disease
- OUTPUT_DIR=/results/${TAG}_${DATESTAMP}
- mkdir -p ${OUTPUT_DIR}
- use_fp16=""
- if [ "$precision" = "fp16" ] ; then
- echo "fp16 activated!"
- use_fp16="--amp"
- else
- echo "fp32/tf32 activated!"
- use_fp16="--noamp"
- fi
- if [ "$use_xla" = "true" ] ; then
- use_xla_tag="--use_xla"
- echo "XLA activated"
- else
- use_xla_tag="--nouse_xla"
- fi
- if [ $num_gpu -gt 1 ] ; then
- mpi_command="mpirun -np $num_gpu -H localhost:$num_gpu \
- --allow-run-as-root -bind-to none -map-by slot \
- -x NCCL_DEBUG=INFO \
- -x LD_LIBRARY_PATH \
- -x PATH -mca pml ob1 -mca btl ^openib"
- use_hvd="--horovod"
- else
- mpi_command=""
- use_hvd=""
- fi
- $mpi_command python3 /workspace/bert/run_ner.py \
- --do_prepare=true \
- --do_train=true \
- --do_eval=true \
- --do_predict=true \
- --task_name="bc5cdr" \
- --vocab_file=$BERT_DIR/vocab.txt \
- --bert_config_file=$BERT_DIR/bert_config.json \
- --init_checkpoint=$init_checkpoint \
- --num_train_epochs=$epochs \
- --data_dir=$DATASET_DIR \
- --output_dir=$OUTPUT_DIR \
- --learning_rate=$learning_rate \
- --train_batch_size=$train_batch_size \
- --eval_batch_size=$eval_batch_size \
- --predict_batch_size=$eval_batch_size \
- --max_seq_length=$seq_length \
- "$use_hvd" "$use_fp16" $use_xla_tag $case_flag
|