config_NVIDIA-T4.yaml 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. checkpoints:
  2. - name: electricity_bin
  3. url: https://api.ngc.nvidia.com/v2/models/nvidia/dle/tft_base_pyt_ckpt_ds-electricity/versions/22.11.0_amp/zip
  4. - name: traffic_bin
  5. url: https://api.ngc.nvidia.com/v2/models/nvidia/dle/tft_base_pyt_ckpt_ds-traffic/versions/22.11.0_amp/zip
  6. configurations:
  7. - accelerator: none
  8. batch_size:
  9. - 1
  10. - 2
  11. - 4
  12. - 8
  13. - 16
  14. - 32
  15. - 64
  16. - 128
  17. - 256
  18. - 512
  19. - 1024
  20. batch_sizes: 1 2 4 8 16 32 64 128 256 512 1024
  21. capture_cuda_graph: 0
  22. checkpoint_variant: electricity_bin
  23. dataset: electricity_bin
  24. device: gpu
  25. export_format: onnx
  26. export_precision: fp32
  27. format: trt
  28. max_batch_size: 1024
  29. precision: fp16
  30. request_count: 500
  31. triton_gpu_engine_count: 2
  32. triton_max_queue_delay: 1
  33. triton_preferred_batch_sizes: 512 1024
  34. - accelerator: none
  35. batch_size:
  36. - 1
  37. - 2
  38. - 4
  39. - 8
  40. - 16
  41. - 32
  42. - 64
  43. - 128
  44. - 256
  45. - 512
  46. - 1024
  47. batch_sizes: 1 2 4 8 16 32 64 128 256 512 1024
  48. capture_cuda_graph: 0
  49. checkpoint_variant: traffic_bin
  50. dataset: traffic_bin
  51. device: gpu
  52. export_format: onnx
  53. export_precision: fp32
  54. format: trt
  55. max_batch_size: 1024
  56. precision: fp16
  57. request_count: 500
  58. triton_gpu_engine_count: 2
  59. triton_max_queue_delay: 1
  60. triton_preferred_batch_sizes: 512 1024
  61. - accelerator: none
  62. batch_size:
  63. - 1
  64. - 2
  65. - 4
  66. - 8
  67. - 16
  68. - 32
  69. - 64
  70. - 128
  71. - 256
  72. - 512
  73. - 1024
  74. batch_sizes: 1 2 4 8 16 32 64 128 256 512 1024
  75. capture_cuda_graph: 0
  76. checkpoint_variant: electricity_bin
  77. dataset: electricity_bin
  78. device: gpu
  79. export_format: ts-trace
  80. export_precision: fp32
  81. format: ts-trace
  82. max_batch_size: 1024
  83. precision: fp16
  84. request_count: 500
  85. triton_gpu_engine_count: 2
  86. triton_max_queue_delay: 1
  87. triton_preferred_batch_sizes: 512 1024
  88. - accelerator: none
  89. batch_size:
  90. - 1
  91. - 2
  92. - 4
  93. - 8
  94. - 16
  95. - 32
  96. - 64
  97. - 128
  98. - 256
  99. - 512
  100. - 1024
  101. batch_sizes: 1 2 4 8 16 32 64 128 256 512 1024
  102. capture_cuda_graph: 0
  103. checkpoint_variant: traffic_bin
  104. dataset: traffic_bin
  105. device: gpu
  106. export_format: ts-trace
  107. export_precision: fp32
  108. format: ts-trace
  109. max_batch_size: 1024
  110. precision: fp16
  111. request_count: 500
  112. triton_gpu_engine_count: 2
  113. triton_max_queue_delay: 1
  114. triton_preferred_batch_sizes: 512 1024
  115. container_version: '22.11'
  116. datasets:
  117. - name: electricity_bin
  118. - name: traffic_bin
  119. datasets_dir: datasets
  120. framework: PyTorch
  121. model_name: TFT
  122. triton_container_image: null
  123. triton_custom_operations: null
  124. triton_dockerfile: null
  125. triton_load_model_method: explicit