run_pretraining_p1.sh 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. # Copyright (c) 2022 NVIDIA Corporation. All rights reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. python3 -m paddle.distributed.launch \
  15. --gpus="0,1,2,3,4,5,6,7" \
  16. ./run_pretraining.py \
  17. --input-dir=pretrain/phase1/unbinned/parquet \
  18. --vocab-file=vocab/bert-large-uncased-vocab.txt \
  19. --output-dir=./results/checkpoints \
  20. --bert-model=bert-large-uncased \
  21. --from-checkpoint=./results/checkpoints/bert-large-uncased/phase1 \
  22. --last-step-of-checkpoint=auto \
  23. --batch-size=256 \
  24. --max-steps=7038 \
  25. --num-steps-per-checkpoint=200 \
  26. --log-freq=1 \
  27. --max-seq-length=128 \
  28. --max-predictions-per-seq=20 \
  29. --gradient-merge-steps=32 \
  30. --amp \
  31. --use-dynamic-loss-scaling \
  32. --optimizer=Lamb \
  33. --fuse-mha \
  34. --phase1 \
  35. --scale-loss=1048576 \
  36. --learning-rate=6e-3 \
  37. --warmup-proportion=0.2843 \
  38. --report-file=./results/dllogger_p1.json