squad_config.sh 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. #!/usr/bin/env bash
  2. # Full SQuAD training configs for NVIDIA DGX A100 (8x NVIDIA A100 40GB GPU)
  3. dgxa100_8gpu_fp16 ()
  4. {
  5. batch_size=32
  6. learning_rate=5e-6
  7. precision=fp16
  8. use_xla=true
  9. num_gpu=8
  10. seq_length=384
  11. doc_stride=128
  12. bert_model="large"
  13. echo $batch_size $learning_rate $precision $use_xla $num_gpu $seq_length $doc_stride $bert_model
  14. }
  15. dgxa100_8gpu_tf32 ()
  16. {
  17. batch_size=16
  18. learning_rate=5e-6
  19. precision=tf32
  20. use_xla=true
  21. num_gpu=8
  22. seq_length=384
  23. doc_stride=128
  24. bert_model="large"
  25. echo $batch_size $learning_rate $precision $use_xla $num_gpu $seq_length $doc_stride $bert_model
  26. }
  27. # Full SQuAD training configs for NVIDIA DGX-2H (16x NVIDIA V100 32GB GPU)
  28. dgx2_16gpu_fp16 ()
  29. {
  30. batch_size=24
  31. learning_rate=2.5e-6
  32. precision=fp16
  33. use_xla=true
  34. num_gpu=16
  35. seq_length=384
  36. doc_stride=128
  37. bert_model="large"
  38. echo $batch_size $learning_rate $precision $use_xla $num_gpu $seq_length $doc_stride $bert_model
  39. }
  40. dgx2_16gpu_fp32 ()
  41. {
  42. batch_size=8
  43. learning_rate=2.5e-6
  44. precision=fp32
  45. use_xla=true
  46. num_gpu=16
  47. seq_length=384
  48. doc_stride=128
  49. bert_model="large"
  50. echo $batch_size $learning_rate $precision $use_xla $num_gpu $seq_length $doc_stride $bert_model
  51. }
  52. # Full SQuAD training configs for NVIDIA DGX-1 (8x NVIDIA V100 16GB GPU)
  53. dgx1_8gpu_fp16 ()
  54. {
  55. batch_size=4
  56. learning_rate=5e-6
  57. precision=fp16
  58. use_xla=true
  59. num_gpu=8
  60. seq_length=384
  61. doc_stride=128
  62. bert_model="large"
  63. echo $batch_size $learning_rate $precision $use_xla $num_gpu $seq_length $doc_stride $bert_model
  64. }
  65. dgx1_8gpu_fp32 ()
  66. {
  67. batch_size=2
  68. learning_rate=5e-6
  69. precision=fp32
  70. use_xla=true
  71. num_gpu=8
  72. seq_length=384
  73. doc_stride=128
  74. bert_model="large"
  75. echo $batch_size $learning_rate $precision $use_xla $num_gpu $seq_length $doc_stride $bert_model
  76. }