ner_bc5cdr-disease.sh 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. #!/bin/bash
  2. echo "Container nvidia build = " $NVIDIA_BUILD_ID
  3. init_checkpoint=${1:-"/results/biobert_tf_uncased_base/model.ckpt-4340"}
  4. train_batch_size=${2:-8}
  5. learning_rate=${3:-3.125e-6}
  6. cased=${4:-false}
  7. precision=${5:-"fp16"}
  8. use_xla=${6:-"true"}
  9. num_gpu=${7:-"16"}
  10. seq_length=${8:-128}
  11. bert_model=${9:-"base"}
  12. eval_batch_size=${10:-8} #Eval and Predict BS is assumed to be same
  13. epochs=${11:-"100.0"}
  14. if [ "$cased" = "true" ] ; then
  15. DO_LOWER_CASE=0
  16. CASING_DIR_PREFIX="cased"
  17. case_flag="--do_lower_case=False"
  18. else
  19. DO_LOWER_CASE=1
  20. CASING_DIR_PREFIX="uncased"
  21. case_flag="--do_lower_case=True"
  22. fi
  23. if [ "$bert_model" = "large" ] ; then
  24. export BERT_DIR=/workspace/bert/data/download/google_pretrained_weights/${CASING_DIR_PREFIX}_L-24_H-1024_A-16
  25. else
  26. export BERT_DIR=/workspace/bert/data/download/google_pretrained_weights/${CASING_DIR_PREFIX}_L-12_H-768_A-12
  27. fi
  28. export GBS=$(expr $train_batch_size \* $num_gpu)
  29. printf -v TAG "tf_bert_biobert_ner_bc5cdr_disease_%s_%s_gbs%d" "$bert_model" "$precision" $GBS
  30. DATESTAMP=`date +'%y%m%d%H%M%S'`
  31. DATASET_DIR=/workspace/bert/data/biobert/BC5CDR/disease
  32. OUTPUT_DIR=/results/${TAG}_${DATESTAMP}
  33. mkdir -p ${OUTPUT_DIR}
  34. use_fp16=""
  35. if [ "$precision" = "fp16" ] ; then
  36. echo "fp16 activated!"
  37. use_fp16="--amp"
  38. else
  39. echo "fp32/tf32 activated!"
  40. use_fp16="--noamp"
  41. fi
  42. if [ "$use_xla" = "true" ] ; then
  43. use_xla_tag="--use_xla"
  44. echo "XLA activated"
  45. else
  46. use_xla_tag="--nouse_xla"
  47. fi
  48. if [ $num_gpu -gt 1 ] ; then
  49. mpi_command="mpirun -np $num_gpu -H localhost:$num_gpu \
  50. --allow-run-as-root -bind-to none -map-by slot \
  51. -x NCCL_DEBUG=INFO \
  52. -x LD_LIBRARY_PATH \
  53. -x PATH -mca pml ob1 -mca btl ^openib"
  54. use_hvd="--horovod"
  55. else
  56. mpi_command=""
  57. use_hvd=""
  58. fi
  59. $mpi_command python3 /workspace/bert/run_ner.py \
  60. --do_prepare=true \
  61. --do_train=true \
  62. --do_eval=true \
  63. --do_predict=true \
  64. --task_name="bc5cdr" \
  65. --vocab_file=$BERT_DIR/vocab.txt \
  66. --bert_config_file=$BERT_DIR/bert_config.json \
  67. --init_checkpoint=$init_checkpoint \
  68. --num_train_epochs=$epochs \
  69. --data_dir=$DATASET_DIR \
  70. --output_dir=$OUTPUT_DIR \
  71. --learning_rate=$learning_rate \
  72. --train_batch_size=$train_batch_size \
  73. --eval_batch_size=$eval_batch_size \
  74. --predict_batch_size=$eval_batch_size \
  75. --max_seq_length=$seq_length \
  76. "$use_hvd" "$use_fp16" $use_xla_tag $case_flag