inference.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. #
  2. # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. import torch.jit
  16. import time
  17. from argparse import ArgumentParser
  18. import numpy as np
  19. import torch
  20. from neumf import NeuMF
  21. import dllogger
  22. def parse_args():
  23. parser = ArgumentParser(description="Benchmark inference performance of the NCF model")
  24. parser.add_argument('--load_checkpoint_path', default=None, type=str,
  25. help='Path to the checkpoint file to be loaded before training/evaluation')
  26. parser.add_argument('--n_users', default=138493, type=int,
  27. help='Number of users. Defaults to the number of users in the ml-20m dataset after preprocessing')
  28. parser.add_argument('--n_items', default=26744, type=int,
  29. help='Number of items. Defaults to the number of users in the ml-20m dataset after preprocessing')
  30. parser.add_argument('-f', '--factors', type=int, default=64,
  31. help='Number of predictive factors')
  32. parser.add_argument('--dropout', type=float, default=0.5,
  33. help='Dropout probability, if equal to 0 will not use dropout at all')
  34. parser.add_argument('--layers', nargs='+', type=int,
  35. default=[256, 256, 128, 64],
  36. help='Sizes of hidden layers for MLP')
  37. parser.add_argument('--batch_sizes', default='1,4,16,64,256,1024,4096,16384,65536,262144,1048576', type=str,
  38. help='A list of comma-separated batch size values to benchmark')
  39. parser.add_argument('--num_batches', default=200, type=int,
  40. help='Number of batches for which to measure latency and throughput')
  41. parser.add_argument('--fp16', action='store_true', help='Cast the model to FP16 precision', default=False)
  42. parser.add_argument('--log_path', default='log.json', type=str,
  43. help='Path for the JSON training log')
  44. return parser.parse_args()
  45. def main():
  46. args = parse_args()
  47. dllogger.init(backends=[dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE,
  48. filename=args.log_path),
  49. dllogger.StdOutBackend(verbosity=dllogger.Verbosity.VERBOSE)])
  50. dllogger.log(data=vars(args), step='PARAMETER')
  51. model = NeuMF(nb_users=args.n_users, nb_items=args.n_items, mf_dim=args.factors,
  52. mlp_layer_sizes=args.layers, dropout=args.dropout)
  53. model = model.cuda()
  54. if args.load_checkpoint_path:
  55. state_dict = torch.load(args.load_checkpoint_path)
  56. model.load_state_dict(state_dict)
  57. if args.fp16:
  58. model.half()
  59. model.eval()
  60. batch_sizes = args.batch_sizes.split(',')
  61. batch_sizes = [int(s) for s in batch_sizes]
  62. result_data = {}
  63. for batch_size in batch_sizes:
  64. print('benchmarking batch size: ', batch_size)
  65. users = torch.cuda.LongTensor(batch_size).random_(0, args.n_users)
  66. items = torch.cuda.LongTensor(batch_size).random_(0, args.n_items)
  67. latencies = []
  68. for i in range(args.num_batches):
  69. torch.cuda.synchronize()
  70. start = time.time()
  71. _ = model(users, items, sigmoid=True)
  72. torch.cuda.synchronize()
  73. end_time = time.time()
  74. if i < 10: # warmup iterations
  75. continue
  76. latencies.append(end_time - start)
  77. result_data[f'batch_{batch_size}_mean_throughput'] = batch_size / np.mean(latencies)
  78. result_data[f'batch_{batch_size}_mean_latency'] = np.mean(latencies)
  79. result_data[f'batch_{batch_size}_p90_latency'] = np.percentile(latencies, 90)
  80. result_data[f'batch_{batch_size}_p95_latency'] = np.percentile(latencies, 95)
  81. result_data[f'batch_{batch_size}_p99_latency'] = np.percentile(latencies, 99)
  82. for batch_size in batch_sizes:
  83. dllogger.metadata(f'batch_{batch_size}_mean_throughput', {'unit': 'samples/s'})
  84. for p in ['mean', 'p90', 'p95', 'p99']:
  85. dllogger.metadata(f'batch_{batch_size}_{p}_latency', {'unit': 's'})
  86. dllogger.log(data=result_data, step=tuple())
  87. dllogger.flush()
  88. return
  89. if __name__ == '__main__':
  90. main()