inference.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. # Copyright (c) 2022 NVIDIA Corporation. All rights reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import time
  16. import glob
  17. import numpy as np
  18. import dllogger
  19. from paddle.fluid import LoDTensor
  20. from paddle.inference import Config, PrecisionType, create_predictor
  21. from dali import dali_dataloader, dali_synthetic_dataloader
  22. from utils.config import parse_args, print_args
  23. from utils.mode import Mode
  24. from utils.logger import setup_dllogger
  25. def init_predictor(args):
  26. infer_dir = args.inference_dir
  27. assert os.path.isdir(
  28. infer_dir), f'inference_dir = "{infer_dir}" is not a directory'
  29. pdiparams_path = glob.glob(os.path.join(infer_dir, '*.pdiparams'))
  30. pdmodel_path = glob.glob(os.path.join(infer_dir, '*.pdmodel'))
  31. assert len(pdiparams_path) == 1, \
  32. f'There should be only 1 pdiparams in {infer_dir}, but there are {len(pdiparams_path)}'
  33. assert len(pdmodel_path) == 1, \
  34. f'There should be only 1 pdmodel in {infer_dir}, but there are {len(pdmodel_path)}'
  35. predictor_config = Config(pdmodel_path[0], pdiparams_path[0])
  36. predictor_config.enable_memory_optim()
  37. predictor_config.enable_use_gpu(0, args.device)
  38. precision = args.precision
  39. max_batch_size = args.batch_size
  40. assert precision in ['FP32', 'FP16', 'INT8'], \
  41. 'precision should be FP32/FP16/INT8'
  42. if precision == 'INT8':
  43. precision_mode = PrecisionType.Int8
  44. elif precision == 'FP16':
  45. precision_mode = PrecisionType.Half
  46. elif precision == 'FP32':
  47. precision_mode = PrecisionType.Float32
  48. else:
  49. raise NotImplementedError
  50. predictor_config.enable_tensorrt_engine(
  51. workspace_size=args.workspace_size,
  52. max_batch_size=max_batch_size,
  53. min_subgraph_size=args.min_subgraph_size,
  54. precision_mode=precision_mode,
  55. use_static=args.use_static,
  56. use_calib_mode=args.use_calib_mode)
  57. predictor_config.set_trt_dynamic_shape_info(
  58. {"data": (1,) + tuple(args.image_shape)},
  59. {"data": (args.batch_size,) + tuple(args.image_shape)},
  60. {"data": (args.batch_size,) + tuple(args.image_shape)},
  61. )
  62. predictor = create_predictor(predictor_config)
  63. return predictor
  64. def predict(predictor, input_data):
  65. '''
  66. Args:
  67. predictor: Paddle inference predictor
  68. input_data: A list of input
  69. Returns:
  70. output_data: A list of output
  71. '''
  72. # copy image data to input tensor
  73. input_names = predictor.get_input_names()
  74. for i, name in enumerate(input_names):
  75. input_tensor = predictor.get_input_handle(name)
  76. if isinstance(input_data[i], LoDTensor):
  77. input_tensor.share_external_data(input_data[i])
  78. else:
  79. input_tensor.reshape(input_data[i].shape)
  80. input_tensor.copy_from_cpu(input_data[i])
  81. # do the inference
  82. predictor.run()
  83. results = []
  84. # get out data from output tensor
  85. output_names = predictor.get_output_names()
  86. for i, name in enumerate(output_names):
  87. output_tensor = predictor.get_output_handle(name)
  88. output_data = output_tensor.copy_to_cpu()
  89. results.append(output_data)
  90. return results
  91. def benchmark_dataset(args):
  92. """
  93. Benchmark DALI format dataset, which reflects real the pipeline throughput including
  94. 1. Read images
  95. 2. Pre-processing
  96. 3. Inference
  97. 4. H2D, D2H
  98. """
  99. predictor = init_predictor(args)
  100. dali_iter = dali_dataloader(args, Mode.EVAL, 'gpu:' + str(args.device))
  101. # Warmup some samples for the stable performance number
  102. batch_size = args.batch_size
  103. image_shape = args.image_shape
  104. images = np.zeros((batch_size, *image_shape)).astype(np.float32)
  105. for _ in range(args.benchmark_warmup_steps):
  106. predict(predictor, [images])[0]
  107. total_images = 0
  108. correct_predict = 0
  109. latency = []
  110. start = time.perf_counter()
  111. last_time_step = time.perf_counter()
  112. for dali_data in dali_iter:
  113. for data in dali_data:
  114. label = np.asarray(data['label'])
  115. total_images += label.shape[0]
  116. label = label.flatten()
  117. images = data['data']
  118. predict_label = predict(predictor, [images])[0]
  119. correct_predict += (label == predict_label).sum()
  120. batch_end_time_step = time.perf_counter()
  121. batch_latency = batch_end_time_step - last_time_step
  122. latency.append(batch_latency)
  123. last_time_step = time.perf_counter()
  124. end = time.perf_counter()
  125. latency = np.array(latency) * 1000
  126. quantile = np.quantile(latency, [0.9, 0.95, 0.99])
  127. statistics = {
  128. 'precision': args.precision,
  129. 'batch_size': batch_size,
  130. 'throughput': total_images / (end - start),
  131. 'accuracy': correct_predict / total_images,
  132. 'eval_latency_avg': np.mean(latency),
  133. 'eval_latency_p90': quantile[0],
  134. 'eval_latency_p95': quantile[1],
  135. 'eval_latency_p99': quantile[2],
  136. }
  137. return statistics
  138. def benchmark_synthetic(args):
  139. """
  140. Benchmark on the synthetic data and bypass all pre-processing.
  141. The host to device copy is still included.
  142. This used to find the upper throughput bound when tunning the full input pipeline.
  143. """
  144. predictor = init_predictor(args)
  145. dali_iter = dali_synthetic_dataloader(args, 'gpu:' + str(args.device))
  146. batch_size = args.batch_size
  147. image_shape = args.image_shape
  148. images = np.random.random((batch_size, *image_shape)).astype(np.float32)
  149. latency = []
  150. # warmup
  151. for _ in range(args.benchmark_warmup_steps):
  152. predict(predictor, [images])[0]
  153. # benchmark
  154. start = time.perf_counter()
  155. last_time_step = time.perf_counter()
  156. for dali_data in dali_iter:
  157. for data in dali_data:
  158. images = data['data']
  159. predict(predictor, [images])[0]
  160. batch_end_time_step = time.perf_counter()
  161. batch_latency = batch_end_time_step - last_time_step
  162. latency.append(batch_latency)
  163. last_time_step = time.perf_counter()
  164. end = time.perf_counter()
  165. latency = np.array(latency) * 1000
  166. quantile = np.quantile(latency, [0.9, 0.95, 0.99])
  167. statistics = {
  168. 'precision': args.precision,
  169. 'batch_size': batch_size,
  170. 'throughput': args.benchmark_steps * batch_size / (end - start),
  171. 'eval_latency_avg': np.mean(latency),
  172. 'eval_latency_p90': quantile[0],
  173. 'eval_latency_p95': quantile[1],
  174. 'eval_latency_p99': quantile[2],
  175. }
  176. return statistics
  177. def main(args):
  178. setup_dllogger(args.report_file)
  179. if args.show_config:
  180. print_args(args)
  181. if args.use_synthetic:
  182. statistics = benchmark_synthetic(args)
  183. else:
  184. statistics = benchmark_dataset(args)
  185. dllogger.log(step=tuple(), data=statistics)
  186. if __name__ == '__main__':
  187. main(parse_args(script='inference'))