metrics.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import time
  15. from collections import defaultdict
  16. from copy import copy
  17. import numpy as np
  18. import torch
  19. from common.utils import all_reduce_cpu_scalars, print_once
  20. def __levenshtein(a, b):
  21. """Calculates the Levenshtein distance between two sequences."""
  22. n, m = len(a), len(b)
  23. if n > m:
  24. # Make sure n <= m, to use O(min(n,m)) space
  25. a, b = b, a
  26. n, m = m, n
  27. current = list(range(n + 1))
  28. for i in range(1, m + 1):
  29. previous, current = current, [i] + [0] * n
  30. for j in range(1, n + 1):
  31. add, delete = previous[j] + 1, current[j - 1] + 1
  32. change = previous[j - 1]
  33. if a[j - 1] != b[i - 1]:
  34. change = change + 1
  35. current[j] = min(add, delete, change)
  36. return current[n]
  37. def word_error_rate(hypotheses, references):
  38. """Computes average Word Error Rate (WER) between two text lists."""
  39. scores = 0
  40. words = 0
  41. len_diff = len(references) - len(hypotheses)
  42. if len_diff > 0:
  43. raise ValueError("Uneqal number of hypthoses and references: "
  44. "{0} and {1}".format(len(hypotheses), len(references)))
  45. elif len_diff < 0:
  46. hypotheses = hypotheses[:len_diff]
  47. for h, r in zip(hypotheses, references):
  48. h_list = h.split()
  49. r_list = r.split()
  50. words += len(r_list)
  51. scores += __levenshtein(h_list, r_list)
  52. if words != 0:
  53. wer = 1.0*scores/words
  54. else:
  55. wer = float('inf')
  56. return wer, scores, words
  57. class MetricsAggregator:
  58. def __init__(self, scopes=('train', 'train_avg'),
  59. dllogger_keys=(),
  60. benchmark_keys=(),
  61. benchmark_epochs=0,
  62. reduce_mean=(),
  63. reduce_last=(),
  64. group_tb_entries=False,
  65. cuda=True):
  66. """
  67. Args:
  68. scopes: possible scopes of metrics accumulation
  69. dll_keys: metrics to log with dllogger
  70. benchmark_keys: metrics to log as benchmark metrics
  71. benchmark_epochs: num of last epochs to benchmark
  72. """
  73. super().__init__()
  74. self.dll_keys = dllogger_keys
  75. self.partials = defaultdict(float)
  76. self.partial_counts = defaultdict(int)
  77. self.accum_reductions = defaultdict(lambda: 'sum')
  78. self.accum_reductions.update({k: 'mean' for k in reduce_mean})
  79. self.accum_reductions.update({k: 'last' for k in reduce_last})
  80. self.metrics = {scope: defaultdict(float) for scope in scopes}
  81. self.metric_counts = {scope: defaultdict(int) for scope in scopes}
  82. self.start_time = {scope: None for scope in scopes}
  83. self.done_accumulating = {scope: True for scope in scopes}
  84. self.benchmark_epochs = benchmark_epochs
  85. self.metrics['train_benchmark'] = defaultdict(list)
  86. self.benchmark_keys = benchmark_keys
  87. self.scopes = scopes
  88. self.group_tb_entries = group_tb_entries
  89. self.cuda = cuda
  90. def log_scalar(self, key, val, accum_reduction=None):
  91. """Main primitive for logging partial metrics from single batch.
  92. NOTE: Assumption: `log_scalar` cannot be called with different
  93. `accum_reduction` for the same `key`. This results in undefined behavior
  94. Args:
  95. key: metric key
  96. val: metric value
  97. accum_reduction: defines how to accumulate given metric:
  98. - 'sum': sums metrics across grad acc and devices batches
  99. - 'mean': same as 'sum' but with averaging
  100. - 'last': overwrites previous accumulated values. Useful for
  101. logging metric once in a grad acc batch, e.g. learning rate.
  102. If None, a default value is fetched from self.accum_reductions.
  103. If not None, overwrites defaults in self.accum_reductions
  104. """
  105. if accum_reduction is None:
  106. accum_reduction = self.accum_reductions[key]
  107. else:
  108. self.accum_reductions[key] = accum_reduction
  109. if accum_reduction == 'sum':
  110. self.partials[key] += val
  111. self.partial_counts[key] = 1
  112. elif accum_reduction == 'mean':
  113. self.partials[key] += val
  114. self.partial_counts[key] += 1
  115. elif accum_reduction == 'last':
  116. self.partials[key] = val # overwrite accumulation
  117. self.partial_counts[key] = 1
  118. else:
  119. raise ValueError(accum_reduction)
  120. def log_scalars(self, scalars_dict, accum_reduction=None):
  121. """ Log whole dict of metrics at once """
  122. for k, v in scalars_dict.items():
  123. self.log_scalar(k, v, accum_reduction)
  124. def __setitem__(self, key, val):
  125. """ Convenience logging method. Use sparingly (see NOTE below).
  126. Uses 'last' aggregation and extracts tensors.
  127. Example:
  128. >>> metrics['lr'] = optim.param_groups[0]['lr']
  129. NOTE: `metrics['lr'] = ...` is very different
  130. from `metrics.partial['lr'] = ...`
  131. """
  132. extract = lambda t: t.item() if type(t) is torch.Tensor else t
  133. if type(val) is dict:
  134. for k, v in val.items():
  135. self.log_scalar(k, extract(v), 'last')
  136. else:
  137. self.log_scalar(key, extract(val), 'last')
  138. def accumulate(self, scopes=None):
  139. """ Accumulates partial metrics in metrics for given scopes.
  140. Defines boundaries of accum_reduction in `log_scalar` method.
  141. Intended to run after each gradient accumulation adjusted iteration.
  142. """
  143. scopes = scopes if scopes is not None else self.scopes
  144. for scope in scopes:
  145. for k, v in self.partials.items():
  146. self.metrics[scope][k] += v
  147. self.metric_counts[scope][k] += self.partial_counts.get(k, 1)
  148. self.partials.clear()
  149. self.partial_counts.clear()
  150. def all_reduce(self, world_size):
  151. """ Reduce metrics across devices.
  152. Currently assumes that all metrics are float scalars.
  153. After reducing, `log_scalar` method with accumulation other than 'last'
  154. shouldn't be called prior to calling `accumulate`.
  155. """
  156. if world_size == 1:
  157. return
  158. self.partials = defaultdict(float,
  159. all_reduce_cpu_scalars(self.partials))
  160. for k, v in self.partials.items():
  161. if self.accum_reductions[k] in ('mean', 'last'):
  162. self.partial_counts[k] *= (world_size - self.partials.get('ignore', 0))
  163. if self.partials.get('ignore', 0) > 0:
  164. assert self.accum_reductions[k] == 'mean'
  165. print_once(f'reducing with world size {world_size - self.partials.get("ignore", 0)}')
  166. def start_iter(self, iter):
  167. self._start_accumulating(iter, True, 'train')
  168. def start_epoch(self, epoch):
  169. if self.cuda:
  170. torch.cuda.synchronize()
  171. self._start_accumulating(epoch, True, 'train_avg')
  172. def start_val(self):
  173. if self.cuda:
  174. torch.cuda.synchronize()
  175. self._start_accumulating(None, True, 'val')
  176. def finish_iter(self):
  177. self._accumulate_time('train')
  178. def finish_logging_interval(self):
  179. self._finish_accumulating('train')
  180. def finish_epoch(self):
  181. if self.cuda:
  182. torch.cuda.synchronize()
  183. self._accumulate_time('train_avg')
  184. self._finish_accumulating('train_avg')
  185. metr = self.metrics['train_benchmark']
  186. for k in self.benchmark_keys:
  187. metr[k].append(self.metrics['train_avg'][k])
  188. if len(metr[k]) > self.benchmark_epochs:
  189. metr[k].pop(0)
  190. def finish_val(self, scope='val'):
  191. if self.cuda:
  192. torch.cuda.synchronize()
  193. self._accumulate_time(scope)
  194. self._finish_accumulating(scope)
  195. def get_metrics(self, scope='train', target='dll'):
  196. if scope == 'train_benchmark':
  197. metr = self.metrics[scope]
  198. ret = {'train_avg_' + k: np.mean(v) for k, v in metr.items()}
  199. ret['benchmark_epochs_num'] = len(list(metr.values())[0])
  200. return ret
  201. assert self.done_accumulating[scope]
  202. ret = copy(self.metrics[scope])
  203. if target == 'dll':
  204. ret = {f'{scope}_{k}': v
  205. for k, v in ret.items() if k in self.dll_keys}
  206. elif target == 'tb' and self.group_tb_entries:
  207. # Rename keys so they would group nicely inside TensorBoard
  208. def split_key(k):
  209. pos = k.rfind('_')
  210. return k[:pos] + '/' + k[pos+1:] if pos >= 0 else k
  211. ret = {split_key(k): v for k, v in ret.items()}
  212. return ret
  213. def _start_accumulating(self, step, start_timer=True, scope='train'):
  214. del step # unused
  215. assert not self.partials, 'metrics.accumulate call missed'
  216. assert not self.partial_counts, 'metrics.accumulate call missed'
  217. if self.done_accumulating[scope]:
  218. self.metrics[scope].clear()
  219. self.metric_counts[scope].clear()
  220. if start_timer:
  221. self.start_time[scope] = time.time()
  222. self.done_accumulating[scope] = False
  223. def _finish_accumulating(self, scope='train'):
  224. assert not self.done_accumulating[scope]
  225. metr = self.metrics[scope]
  226. counts = self.metric_counts[scope]
  227. for k, v in metr.items():
  228. metr[k] = v / counts[k]
  229. self.done_accumulating[scope] = True
  230. def _accumulate_time(self, scope='train'):
  231. assert not self.done_accumulating[scope]
  232. took = time.time() - self.start_time[scope]
  233. self.start_time[scope] = None
  234. self.metrics[scope]['took'] += took
  235. self.metric_counts[scope]['took'] = 1 # not +=