logger.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import math
  15. import numpy as np
  16. import dllogger as DLLogger
  17. class EpochMeter:
  18. def __init__(self, name):
  19. self.name = name
  20. self.data = []
  21. def update(self, epoch, val):
  22. self.data.append((epoch, val))
  23. class IterationMeter:
  24. def __init__(self, name):
  25. self.name = name
  26. self.data = []
  27. def update(self, epoch, iteration, val):
  28. self.data.append((epoch, iteration, val))
  29. class IterationAverageMeter:
  30. def __init__(self, name):
  31. self.name = name
  32. self.data = []
  33. self.n = 0
  34. self.sum = 0
  35. def update_iter(self, val):
  36. if math.isfinite(val): # sometimes loss === 'inf'
  37. self.n += 1
  38. self.sum += 0 if math.isinf(val) else val
  39. def update_epoch(self, epoch):
  40. self.data.append((epoch, self.sum / self.n))
  41. self.n = 0
  42. self.sum = 0
  43. class Logger:
  44. def __init__(self, name, json_output=None, log_interval=20):
  45. self.name = name
  46. self.train_loss_logger = IterationAverageMeter("Training loss")
  47. self.train_epoch_time_logger = EpochMeter("Training 1 epoch time")
  48. self.val_acc_logger = EpochMeter("Validation accuracy")
  49. self.log_interval = log_interval
  50. backends = [ DLLogger.StdOutBackend(DLLogger.Verbosity.DEFAULT) ]
  51. if json_output:
  52. backends.append(DLLogger.JSONStreamBackend(DLLogger.Verbosity.VERBOSE, json_output))
  53. DLLogger.init(backends)
  54. DLLogger.metadata("mAP", {"unit": None})
  55. self.epoch = 0
  56. self.train_iter = 0
  57. self.summary = {}
  58. def step(self):
  59. return (
  60. self.epoch,
  61. self.train_iter,
  62. )
  63. def log_params(self, data):
  64. DLLogger.log("PARAMETER", data)
  65. DLLogger.flush()
  66. def log(self, key, value):
  67. DLLogger.log(self.step(), { key: value })
  68. DLLogger.flush()
  69. def add_to_summary(self, data):
  70. for key, value in data.items():
  71. self.summary[key] = value
  72. def log_summary(self):
  73. DLLogger.log((), self.summary)
  74. def update_iter(self, epoch, iteration, loss):
  75. self.epoch = epoch
  76. self.train_iter = iteration
  77. self.train_loss_logger.update_iter(loss)
  78. if iteration % self.log_interval == 0:
  79. self.log('loss', loss)
  80. def update_epoch(self, epoch, acc):
  81. self.epoch = epoch
  82. self.train_loss_logger.update_epoch(epoch)
  83. self.val_acc_logger.update(epoch, acc)
  84. data = { 'mAP': acc }
  85. self.add_to_summary(data)
  86. DLLogger.log((self.epoch,), data)
  87. def update_epoch_time(self, epoch, time):
  88. self.epoch = epoch
  89. self.train_epoch_time_logger.update(epoch, time)
  90. DLLogger.log((self.epoch,), { 'time': time })
  91. def print_results(self):
  92. return self.train_loss_logger.data, self.val_acc_logger.data, self.train_epoch_time_logger
  93. class BenchmarkMeter:
  94. def __init__(self, name):
  95. self.name = name
  96. self.data = []
  97. self.total_images = 0
  98. self.total_time = 0
  99. self.avr_images_per_second = 0
  100. def update(self, bs, time):
  101. self.total_images += bs
  102. self.total_time += time
  103. self.avr_images_per_second = self.total_images / self.total_time
  104. self.data.append(bs / time)
  105. class BenchLogger(Logger):
  106. def __init__(self, *args, **kwargs):
  107. super().__init__(*args, **kwargs)
  108. self.images_per_ses = BenchmarkMeter(self.name)
  109. DLLogger.metadata("avg_img/sec", {"unit": "images/s"})
  110. DLLogger.metadata("med_img/sec", {"unit": "images/s"})
  111. DLLogger.metadata("min_img/sec", {"unit": "images/s"})
  112. DLLogger.metadata("max_img/sec", {"unit": "images/s"})
  113. def update(self, bs, time):
  114. self.images_per_ses.update(bs, time)
  115. def print_result(self):
  116. total_bs = self.images_per_ses.total_images
  117. total_time = self.images_per_ses.total_time
  118. avr = self.images_per_ses.avr_images_per_second
  119. data = np.array(self.images_per_ses.data)
  120. med = np.median(data)
  121. DLLogger.log((), {
  122. 'avg_img/sec': avr,
  123. 'med_img/sec': np.median(data),
  124. 'min_img/sec': np.min(data),
  125. 'max_img/sec': np.max(data),
  126. })
  127. print("Done benchmarking. Total images: {}\ttotal time: {:.3f}\tAverage images/sec: {:.3f}\tMedian images/sec: {:.3f}".format(
  128. total_bs,
  129. total_time,
  130. avr,
  131. med
  132. ))
  133. return med