report.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # Report JSON file structure:
  15. # - "model" : architecture of the model (e.g. "resnet50").
  16. # - "ngpus" : number of gpus on which training was performed.
  17. # - "total_duration" : total duration of training in seconds.
  18. # - "cmd" : list of application arguments.
  19. # - "metrics" : per epoch metrics for train and validation
  20. # (some of below metrics may not exist in the report,
  21. # depending on application arguments)
  22. # - "train.top1" : training top1 accuracy in epoch.
  23. # - "train.top5" : training top5 accuracy in epoch.
  24. # - "train.loss" : training loss in epoch.
  25. # - "train.time" : average training time of iteration in seconds.
  26. # - "train.total_ips" : training speed (data and compute time taken into account) for epoch in images/sec.
  27. # - "val.top1", "val.top5", "val.loss", "val.time", "val.total_ips" : the same but for validation.
  28. import json
  29. from collections import defaultdict, OrderedDict
  30. class Report:
  31. def __init__(self, model_name, ngpus, cmd):
  32. self.model_name = model_name
  33. self.ngpus = ngpus
  34. self.cmd = cmd
  35. self.total_duration = 0
  36. self.metrics = defaultdict(lambda: [])
  37. def add_value(self, metric, value):
  38. self.metrics[metric].append(value)
  39. def set_total_duration(self, duration):
  40. self.total_duration = duration
  41. def save(self, filename):
  42. report = OrderedDict([
  43. ('model', self.model_name),
  44. ('ngpus', self.ngpus),
  45. ('total_duration', self.total_duration),
  46. ('cmd', self.cmd),
  47. ('metrics', self.metrics),
  48. ])
  49. with open(filename, 'w') as f:
  50. json.dump(report, f, indent=4)