benchmark_hooks.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import time
  15. import numpy as np
  16. import tensorflow as tf
  17. __all__ = ['BenchmarkHook']
  18. class BenchmarkHook(tf.train.SessionRunHook):
  19. latencies = ['avg', 50, 90, 95, 99, 100]
  20. def __init__(self, global_batch_size, warmup_steps=10):
  21. self.warmup_steps = warmup_steps
  22. self.global_batch_size = global_batch_size
  23. self.iter_times = []
  24. def before_run(self, run_context):
  25. self.t0 = time.time()
  26. def after_run(self, run_context, run_values):
  27. batch_time = time.time() - self.t0
  28. self.iter_times.append(batch_time)
  29. def get_average_speed_and_latencies(self):
  30. if len(self.iter_times) > self.warmup_steps + 5:
  31. warmup_steps = self.warmup_steps
  32. elif len(self.iter_times) > 15:
  33. warmup_steps = 10
  34. elif len(self.iter_times) > 10:
  35. warmup_steps = 5
  36. elif len(self.iter_times) > 4:
  37. warmup_steps = 2
  38. elif len(self.iter_times) > 1:
  39. warmup_steps = 1
  40. else:
  41. warmup_steps = 0
  42. times = self.iter_times[warmup_steps:]
  43. avg_time = np.mean(times)
  44. speed = self.global_batch_size / avg_time
  45. latencies = {}
  46. for lat in self.latencies:
  47. if lat == 'avg':
  48. val = avg_time
  49. else:
  50. val = np.percentile(times, lat)
  51. latencies[str(lat)] = val
  52. return speed, latencies