| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221 |
- # Copyright (c) 2022 NVIDIA Corporation. All rights reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import os
- import time
- import glob
- import numpy as np
- import dllogger
- from paddle.fluid import LoDTensor
- from paddle.inference import Config, PrecisionType, create_predictor
- from dali import dali_dataloader, dali_synthetic_dataloader
- from utils.config import parse_args, print_args
- from utils.mode import Mode
- from utils.logger import setup_dllogger
- def init_predictor(args):
- infer_dir = args.inference_dir
- assert os.path.isdir(
- infer_dir), f'inference_dir = "{infer_dir}" is not a directory'
- pdiparams_path = glob.glob(os.path.join(infer_dir, '*.pdiparams'))
- pdmodel_path = glob.glob(os.path.join(infer_dir, '*.pdmodel'))
- assert len(pdiparams_path) == 1, \
- f'There should be only 1 pdiparams in {infer_dir}, but there are {len(pdiparams_path)}'
- assert len(pdmodel_path) == 1, \
- f'There should be only 1 pdmodel in {infer_dir}, but there are {len(pdmodel_path)}'
- predictor_config = Config(pdmodel_path[0], pdiparams_path[0])
- predictor_config.enable_memory_optim()
- predictor_config.enable_use_gpu(0, args.device)
- precision = args.precision
- max_batch_size = args.batch_size
- assert precision in ['FP32', 'FP16', 'INT8'], \
- 'precision should be FP32/FP16/INT8'
- if precision == 'INT8':
- precision_mode = PrecisionType.Int8
- elif precision == 'FP16':
- precision_mode = PrecisionType.Half
- elif precision == 'FP32':
- precision_mode = PrecisionType.Float32
- else:
- raise NotImplementedError
- predictor_config.enable_tensorrt_engine(
- workspace_size=args.workspace_size,
- max_batch_size=max_batch_size,
- min_subgraph_size=args.min_subgraph_size,
- precision_mode=precision_mode,
- use_static=args.use_static,
- use_calib_mode=args.use_calib_mode)
- predictor_config.set_trt_dynamic_shape_info(
- {"data": (1,) + tuple(args.image_shape)},
- {"data": (args.batch_size,) + tuple(args.image_shape)},
- {"data": (args.batch_size,) + tuple(args.image_shape)},
- )
- predictor = create_predictor(predictor_config)
- return predictor
- def predict(predictor, input_data):
- '''
- Args:
- predictor: Paddle inference predictor
- input_data: A list of input
- Returns:
- output_data: A list of output
- '''
- # copy image data to input tensor
- input_names = predictor.get_input_names()
- for i, name in enumerate(input_names):
- input_tensor = predictor.get_input_handle(name)
- if isinstance(input_data[i], LoDTensor):
- input_tensor.share_external_data(input_data[i])
- else:
- input_tensor.reshape(input_data[i].shape)
- input_tensor.copy_from_cpu(input_data[i])
- # do the inference
- predictor.run()
- results = []
- # get out data from output tensor
- output_names = predictor.get_output_names()
- for i, name in enumerate(output_names):
- output_tensor = predictor.get_output_handle(name)
- output_data = output_tensor.copy_to_cpu()
- results.append(output_data)
- return results
- def benchmark_dataset(args):
- """
- Benchmark DALI format dataset, which reflects real the pipeline throughput including
- 1. Read images
- 2. Pre-processing
- 3. Inference
- 4. H2D, D2H
- """
- predictor = init_predictor(args)
- dali_iter = dali_dataloader(args, Mode.EVAL, 'gpu:' + str(args.device))
- # Warmup some samples for the stable performance number
- batch_size = args.batch_size
- image_shape = args.image_shape
- images = np.zeros((batch_size, *image_shape)).astype(np.float32)
- for _ in range(args.benchmark_warmup_steps):
- predict(predictor, [images])[0]
- total_images = 0
- correct_predict = 0
- latency = []
- start = time.perf_counter()
- last_time_step = time.perf_counter()
- for dali_data in dali_iter:
- for data in dali_data:
- label = np.asarray(data['label'])
- total_images += label.shape[0]
- label = label.flatten()
- images = data['data']
- predict_label = predict(predictor, [images])[0]
- correct_predict += (label == predict_label).sum()
- batch_end_time_step = time.perf_counter()
- batch_latency = batch_end_time_step - last_time_step
- latency.append(batch_latency)
- last_time_step = time.perf_counter()
- end = time.perf_counter()
- latency = np.array(latency) * 1000
- quantile = np.quantile(latency, [0.9, 0.95, 0.99])
- statistics = {
- 'precision': args.precision,
- 'batch_size': batch_size,
- 'throughput': total_images / (end - start),
- 'accuracy': correct_predict / total_images,
- 'eval_latency_avg': np.mean(latency),
- 'eval_latency_p90': quantile[0],
- 'eval_latency_p95': quantile[1],
- 'eval_latency_p99': quantile[2],
- }
- return statistics
- def benchmark_synthetic(args):
- """
- Benchmark on the synthetic data and bypass all pre-processing.
- The host to device copy is still included.
- This used to find the upper throughput bound when tunning the full input pipeline.
- """
- predictor = init_predictor(args)
- dali_iter = dali_synthetic_dataloader(args, 'gpu:' + str(args.device))
- batch_size = args.batch_size
- image_shape = args.image_shape
- images = np.random.random((batch_size, *image_shape)).astype(np.float32)
- latency = []
- # warmup
- for _ in range(args.benchmark_warmup_steps):
- predict(predictor, [images])[0]
- # benchmark
- start = time.perf_counter()
- last_time_step = time.perf_counter()
- for dali_data in dali_iter:
- for data in dali_data:
- images = data['data']
- predict(predictor, [images])[0]
- batch_end_time_step = time.perf_counter()
- batch_latency = batch_end_time_step - last_time_step
- latency.append(batch_latency)
- last_time_step = time.perf_counter()
- end = time.perf_counter()
- latency = np.array(latency) * 1000
- quantile = np.quantile(latency, [0.9, 0.95, 0.99])
- statistics = {
- 'precision': args.precision,
- 'batch_size': batch_size,
- 'throughput': args.benchmark_steps * batch_size / (end - start),
- 'eval_latency_avg': np.mean(latency),
- 'eval_latency_p90': quantile[0],
- 'eval_latency_p95': quantile[1],
- 'eval_latency_p99': quantile[2],
- }
- return statistics
- def main(args):
- setup_dllogger(args.report_file)
- if args.show_config:
- print_args(args)
- if args.use_synthetic:
- statistics = benchmark_synthetic(args)
- else:
- statistics = benchmark_dataset(args)
- dllogger.log(step=tuple(), data=statistics)
- if __name__ == '__main__':
- main(parse_args(script='inference'))
|