validate.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. #!/usr/bin/env python
  2. """ COCO validation script
  3. Hacked together by Ross Wightman (https://github.com/rwightman)
  4. """
  5. # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
  6. #
  7. # Licensed under the Apache License, Version 2.0 (the "License");
  8. # you may not use this file except in compliance with the License.
  9. # You may obtain a copy of the License at
  10. #
  11. # http://www.apache.org/licenses/LICENSE-2.0
  12. #
  13. # Unless required by applicable law or agreed to in writing, software
  14. # distributed under the License is distributed on an "AS IS" BASIS,
  15. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. # See the License for the specific language governing permissions and
  17. # limitations under the License.
  18. import argparse
  19. import os
  20. import json
  21. import time
  22. import logging
  23. import torch
  24. import torch.nn.parallel
  25. from torch.nn.parallel import DistributedDataParallel as DDP
  26. import ctypes
  27. import dllogger
  28. from effdet.factory import create_model
  29. from effdet.evaluator import COCOEvaluator
  30. from utils.utils import setup_dllogger
  31. from data import create_loader, CocoDetection
  32. from utils.utils import AverageMeter, setup_default_logging
  33. from pycocotools.coco import COCO
  34. from pycocotools.cocoeval import COCOeval
  35. from tabulate import tabulate
  36. import numpy as np
  37. import itertools
  38. torch.backends.cudnn.benchmark = True
  39. _libcudart = ctypes.CDLL('libcudart.so')
  40. def add_bool_arg(parser, name, default=False, help=''): # FIXME move to utils
  41. dest_name = name.replace('-', '_')
  42. group = parser.add_mutually_exclusive_group(required=False)
  43. group.add_argument('--' + name, dest=dest_name, action='store_true', help=help)
  44. group.add_argument('--no-' + name, dest=dest_name, action='store_false', help=help)
  45. parser.set_defaults(**{dest_name: default})
  46. parser = argparse.ArgumentParser(description='PyTorch ImageNet Validation')
  47. parser.add_argument('data', metavar='DIR',
  48. help='path to dataset')
  49. parser.add_argument('--waymo', action='store_true', default=False,
  50. help='Train on Waymo dataset or COCO dataset. Default: False (COCO dataset)')
  51. parser.add_argument('--anno', default='val2017',
  52. help='mscoco annotation set (one of val2017, train2017, test-dev2017)')
  53. parser.add_argument('--model', '-m', metavar='MODEL', default='tf_efficientdet_d1',
  54. help='model architecture (default: tf_efficientdet_d1)')
  55. add_bool_arg(parser, 'redundant-bias', default=None,
  56. help='override model config for redundant bias layers')
  57. parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
  58. help='number of data loading workers (default: 4)')
  59. parser.add_argument('-b', '--batch-size', default=128, type=int,
  60. metavar='N', help='mini-batch size (default: 128)')
  61. parser.add_argument('--input_size', type=int, default=None, metavar='PCT',
  62. help='Image size (default: None) if this is not set default model image size is taken')
  63. parser.add_argument('--num_classes', type=int, default=None, metavar='PCT',
  64. help='Number of classes the model needs to be trained for (default: None)')
  65. parser.add_argument('--amp', action='store_true', default=False,
  66. help='use NVIDIA amp for mixed precision training')
  67. parser.add_argument('--interpolation', default='bilinear', type=str, metavar='NAME',
  68. help='Image resize interpolation type (overrides model)')
  69. parser.add_argument('--fill-color', default='mean', type=str, metavar='NAME',
  70. help='Image augmentation fill (background) color ("mean" or int)')
  71. parser.add_argument('--log-freq', default=10, type=int,
  72. metavar='N', help='batch logging frequency (default: 10)')
  73. parser.add_argument('--checkpoint', default='', type=str, metavar='PATH',
  74. help='path to latest checkpoint (default: none)')
  75. parser.add_argument('--pretrained', dest='pretrained', action='store_true',
  76. help='use pre-trained model')
  77. parser.add_argument('--num-gpu', type=int, default=1,
  78. help='Number of GPUS to use')
  79. parser.add_argument('--no-prefetcher', action='store_true', default=False,
  80. help='disable fast prefetcher')
  81. parser.add_argument('--pin-mem', action='store_true', default=False,
  82. help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.')
  83. parser.add_argument("--memory-format", type=str, default="nchw", choices=["nchw", "nhwc"],
  84. help="memory layout, nchw or nhwc")
  85. parser.add_argument('--use-ema', dest='use_ema', action='store_true',
  86. help='use ema version of weights if present')
  87. parser.add_argument('--inference', dest='inference', action='store_true',
  88. help='If true then inference else evaluation.')
  89. parser.add_argument('--use-soft-nms', dest='use_soft_nms', action='store_true', default=False,
  90. help='use softnms instead of default nms for eval')
  91. parser.add_argument('--torchscript', dest='torchscript', action='store_true',
  92. help='convert model torchscript for inference')
  93. parser.add_argument('--results', default='./results.json', type=str, metavar='FILENAME',
  94. help='JSON filename for evaluation results')
  95. parser.add_argument('--dllogger-file', default='log.json', type=str, metavar='PATH',
  96. help='File name of dllogger json file (default: log.json, current dir)')
  97. parser.add_argument("--local_rank", default=os.getenv('LOCAL_RANK', 0), type=int)
  98. parser.add_argument('--seed', type=int, default=42, metavar='S',
  99. help='random seed (default: 42)')
  100. parser.add_argument('--waymo-val', default=None, type=str,
  101. help='Path to waymo validation images relative to data (default: "None")')
  102. parser.add_argument('--waymo-val-annotation', default=None, type=str,
  103. help='Absolute Path to waymo validation annotation (default: "None")')
  104. def validate(args):
  105. setup_dllogger(0, filename=args.dllogger_file)
  106. dllogger.metadata('total_inference_time', {'unit': 's'})
  107. dllogger.metadata('inference_throughput', {'unit': 'images/s'})
  108. dllogger.metadata('inference_time', {'unit': 's'})
  109. dllogger.metadata('map', {'unit': None})
  110. dllogger.metadata('total_eval_time', {'unit': 's'})
  111. if args.checkpoint != '':
  112. args.pretrained = True
  113. args.prefetcher = not args.no_prefetcher
  114. if args.waymo:
  115. assert args.waymo_val is not None
  116. memory_format = (
  117. torch.channels_last if args.memory_format == "nhwc" else torch.contiguous_format
  118. )
  119. args.distributed = False
  120. if 'WORLD_SIZE' in os.environ:
  121. args.distributed = int(os.environ['WORLD_SIZE']) > 1
  122. args.device = 'cuda:0'
  123. args.world_size = 1
  124. args.rank = 0 # global rank
  125. if args.distributed:
  126. torch.cuda.manual_seed_all(args.seed)
  127. args.device = 'cuda:%d' % args.local_rank
  128. torch.cuda.set_device(args.local_rank)
  129. torch.distributed.init_process_group(backend='nccl', init_method='env://')
  130. args.world_size = torch.distributed.get_world_size()
  131. args.rank = torch.distributed.get_rank()
  132. # Set device limit on the current device
  133. # cudaLimitMaxL2FetchGranularity = 0x05
  134. pValue = ctypes.cast((ctypes.c_int*1)(), ctypes.POINTER(ctypes.c_int))
  135. _libcudart.cudaDeviceSetLimit(ctypes.c_int(0x05), ctypes.c_int(128))
  136. _libcudart.cudaDeviceGetLimit(pValue, ctypes.c_int(0x05))
  137. assert pValue.contents.value == 128
  138. assert args.rank >= 0
  139. # create model
  140. bench = create_model(
  141. args.model,
  142. input_size=args.input_size,
  143. num_classes=args.num_classes,
  144. bench_task='predict',
  145. pretrained=args.pretrained,
  146. redundant_bias=args.redundant_bias,
  147. checkpoint_path=args.checkpoint,
  148. checkpoint_ema=args.use_ema,
  149. soft_nms=args.use_soft_nms,
  150. strict_load=False
  151. )
  152. input_size = bench.config.image_size
  153. data_config = bench.config
  154. param_count = sum([m.numel() for m in bench.parameters()])
  155. print('Model %s created, param count: %d' % (args.model, param_count))
  156. bench = bench.cuda().to(memory_format=memory_format)
  157. if args.distributed > 1:
  158. raise ValueError("Evaluation is supported only on single GPU. args.num_gpu must be 1")
  159. bench = DDP(bench, device_ids=[args.device]) # torch.nn.DataParallel(bench, device_ids=list(range(args.num_gpu)))
  160. if args.waymo:
  161. annotation_path = args.waymo_val_annotation
  162. image_dir = args.waymo_val
  163. else:
  164. if 'test' in args.anno:
  165. annotation_path = os.path.join(args.data, 'annotations', f'image_info_{args.anno}.json')
  166. image_dir = 'test2017'
  167. else:
  168. annotation_path = os.path.join(args.data, 'annotations', f'instances_{args.anno}.json')
  169. image_dir = args.anno
  170. dataset = CocoDetection(os.path.join(args.data, image_dir), annotation_path, data_config)
  171. evaluator = COCOEvaluator(dataset.coco, distributed=args.distributed, waymo=args.waymo)
  172. loader = create_loader(
  173. dataset,
  174. input_size=input_size,
  175. batch_size=args.batch_size,
  176. use_prefetcher=args.prefetcher,
  177. interpolation=args.interpolation,
  178. fill_color=args.fill_color,
  179. num_workers=args.workers,
  180. distributed=args.distributed,
  181. pin_mem=args.pin_mem,
  182. memory_format=memory_format)
  183. img_ids = []
  184. results = []
  185. dllogger_metric = {}
  186. bench.eval()
  187. batch_time = AverageMeter()
  188. throughput = AverageMeter()
  189. end = time.time()
  190. total_time_start = time.time()
  191. with torch.no_grad():
  192. for i, (input, target) in enumerate(loader):
  193. with torch.cuda.amp.autocast(enabled=args.amp):
  194. output = bench(input, target['img_scale'], target['img_size'])
  195. batch_time.update(time.time() - end)
  196. throughput.update(input.size(0) / batch_time.val)
  197. evaluator.add_predictions(output, target)
  198. torch.cuda.synchronize()
  199. # measure elapsed time
  200. if i == 9:
  201. batch_time.reset()
  202. throughput.reset()
  203. if args.rank == 0 and i % args.log_freq == 0:
  204. print(
  205. 'Test: [{0:>4d}/{1}] '
  206. 'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s) '
  207. .format(
  208. i, len(loader), batch_time=batch_time,
  209. rate_avg=input.size(0) / batch_time.avg,
  210. )
  211. )
  212. end = time.time()
  213. dllogger_metric['total_inference_time'] = time.time() - total_time_start
  214. dllogger_metric['inference_throughput'] = throughput.avg
  215. dllogger_metric['inference_time'] = 1000 / throughput.avg
  216. total_time_start = time.time()
  217. mean_ap = 0.
  218. if not args.inference:
  219. if 'test' not in args.anno:
  220. mean_ap = evaluator.evaluate()
  221. else:
  222. evaluator.save_predictions(args.results)
  223. dllogger_metric['map'] = mean_ap
  224. dllogger_metric['total_eval_time'] = time.time() - total_time_start
  225. else:
  226. evaluator.save_predictions(args.results)
  227. if not args.distributed or args.rank == 0:
  228. dllogger.log(step=(), data=dllogger_metric, verbosity=0)
  229. return results
  230. def main():
  231. args = parser.parse_args()
  232. validate(args)
  233. if __name__ == '__main__':
  234. main()