run_glue_inference.sh 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. #!/usr/bin/env bash
  2. # Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. echo "Container nvidia build = " $NVIDIA_BUILD_ID
  15. task_name=${1:-"MRPC"}
  16. init_checkpoint=${2:-"$BERT_DIR/model.ckpt"}
  17. batch_size=${3:-"32"}
  18. precision=${4:-"fp16"}
  19. use_xla=${5:-"true"}
  20. seq_length=${6:-"128"}
  21. doc_stride=${7:-"64"}
  22. bert_model=${8:-"large"}
  23. if [ "$bert_model" = "large" ] ; then
  24. export BERT_DIR=data/download/nvidia_pretrained/bert_tf_pretraining_large_lamb
  25. else
  26. export BERT_DIR=data/download/nvidia_pretrained/bert_tf_squad11_base_128
  27. fi
  28. GLUE_DIR=data/download
  29. echo "GLUE directory set as " $GLUE_DIR " BERT directory set as " $BERT_DIR
  30. use_fp16=""
  31. if [ "$precision" = "fp16" ] ; then
  32. echo "fp16 activated!"
  33. use_fp16="--amp"
  34. else
  35. echo "fp32/tf32 activated!"
  36. use_fp16="--noamp"
  37. fi
  38. if [ "$use_xla" = "true" ] ; then
  39. use_xla_tag="--use_xla"
  40. echo "XLA activated"
  41. else
  42. use_xla_tag="--nouse_xla"
  43. fi
  44. num_gpu=1
  45. ckpt_str=${init_checkpoint//\//-}
  46. export GBS=$(expr $batch_size \* $num_gpu)
  47. printf -v TAG "tf_bert_finetuning_glue_%s_inf_%s_%s_gbs%d_ckpt_%s" "$task_name" "$bert_model" "$precision" $GBS "$ckpt_str"
  48. DATESTAMP=`date +'%y%m%d%H%M%S'`
  49. #Edit to save logs & checkpoints in a different directory
  50. RESULTS_DIR=/results
  51. LOGFILE=$RESULTS_DIR/$TAG.$DATESTAMP.log
  52. printf "Logs written to %s\n" "$LOGFILE"
  53. #Check if all necessary files are available before training
  54. for DIR_or_file in $GLUE_DIR $RESULTS_DIR $BERT_DIR/vocab.txt $BERT_DIR/bert_config.json; do
  55. if [ ! -d "$DIR_or_file" ] && [ ! -f "$DIR_or_file" ]; then
  56. echo "Error! $DIR_or_file directory missing. Please mount correctly"
  57. exit -1
  58. fi
  59. done
  60. $mpi_command python run_classifier.py \
  61. --task_name=$task_name \
  62. --predict_batch_size=$batch_size \
  63. --eval_batch_size=$batch_size \
  64. --do_eval=true \
  65. --data_dir=$GLUE_DIR/$task_name \
  66. --vocab_file=$BERT_DIR/vocab.txt \
  67. --bert_config_file=$BERT_DIR/bert_config.json \
  68. --init_checkpoint=$init_checkpoint \
  69. --max_seq_length=$seq_length \
  70. --doc_stride=$doc_stride \
  71. --output_dir=$RESULTS_DIR \
  72. --horovod "$use_fp16" \
  73. $use_xla_tag |& tee $LOGFILE