deployer_lib.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534
  1. #!/usr/bin/python
  2. # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. import os
  16. import sys
  17. import shutil
  18. import time
  19. import json
  20. import onnx
  21. import torch
  22. import argparse
  23. import statistics
  24. import onnxruntime
  25. from collections import Counter
  26. torch_type_to_triton_type = {
  27. torch.bool: 'TYPE_BOOL',
  28. torch.int8: 'TYPE_INT8',
  29. torch.int16: 'TYPE_INT16',
  30. torch.int32: 'TYPE_INT32',
  31. torch.int64: 'TYPE_INT64',
  32. torch.uint8: 'TYPE_UINT8',
  33. torch.float16: 'TYPE_FP16',
  34. torch.float32: 'TYPE_FP32',
  35. torch.float64: 'TYPE_FP64'
  36. }
  37. CONFIG_TEMPLATE = r"""
  38. name: "{model_name}"
  39. platform: "{platform}"
  40. max_batch_size: {max_batch_size}
  41. input [
  42. {spec_inputs}
  43. ]
  44. output [
  45. {spec_outputs}
  46. ]
  47. {dynamic_batching}
  48. {model_optimizations}
  49. instance_group [
  50. {{
  51. count: {engine_count}
  52. kind: {kind}
  53. gpus: [ {gpu_list} ]
  54. }}
  55. ]
  56. """
  57. INPUT_TEMPLATE = r"""
  58. {{
  59. name: "input__{num}"
  60. data_type: {type}
  61. dims: {dims}
  62. {reshape}
  63. }},"""
  64. OUTPUT_TEMPLATE = r"""
  65. {{
  66. name: "output__{num}"
  67. data_type: {type}
  68. dims: {dims}
  69. {reshape}
  70. }},"""
  71. MODEL_OPTIMIZATION_TEMPLATE = r"""
  72. optimization {{
  73. execution_accelerators {{
  74. gpu_execution_accelerator: [
  75. {{
  76. name: "tensorrt"
  77. }}
  78. ]
  79. }}
  80. }}
  81. """
  82. def remove_empty_lines(text):
  83. ''' removes empty lines from text, returns the result '''
  84. ret = "".join([s for s in text.strip().splitlines(True) if s.strip()])
  85. return ret
  86. def create_deployer(argv, model_args_parser):
  87. ''' takes a list of arguments, returns a deployer object and the list of unused arguments '''
  88. parser = argparse.ArgumentParser()
  89. # required args
  90. method = parser.add_mutually_exclusive_group(required=True)
  91. method.add_argument('--ts-script',
  92. action='store_true',
  93. help='convert to torchscript using torch.jit.script')
  94. method.add_argument('--ts-trace',
  95. action='store_true',
  96. help='convert to torchscript using torch.jit.trace')
  97. method.add_argument('--onnx',
  98. action='store_true',
  99. help='convert to onnx using torch.onnx.export')
  100. # triton related args
  101. arguments = parser.add_argument_group('triton related flags')
  102. arguments.add_argument('--triton-no-cuda',
  103. action='store_true',
  104. help='Use the CPU for tracing.')
  105. arguments.add_argument(
  106. '--triton-model-name',
  107. type=str,
  108. default="model",
  109. help="exports to appropriate directory structure for triton")
  110. arguments.add_argument(
  111. "--triton-model-version",
  112. type=int,
  113. default=1,
  114. help="exports to appropriate directory structure for triton")
  115. arguments.add_argument(
  116. "--triton-max-batch-size",
  117. type=int,
  118. default=8,
  119. help="Specifies the 'max_batch_size' in the triton model config.\
  120. See the triton documentation for more info.")
  121. arguments.add_argument(
  122. "--triton-dyn-batching-delay",
  123. type=float,
  124. default=0,
  125. help=
  126. "Determines the dynamic_batching queue delay in milliseconds(ms) for\
  127. the triton model config. Use '0' or '-1' to specify static batching.\
  128. See the triton documentation for more info.")
  129. arguments.add_argument(
  130. "--triton-engine-count",
  131. type=int,
  132. default=1,
  133. help=
  134. "Specifies the 'instance_group' count value in the triton model config.\
  135. See the triton documentation for more info.")
  136. arguments.add_argument('--save-dir',
  137. type=str,
  138. default='./triton_models',
  139. help='Saved model directory')
  140. parser.add_argument("--deploy_cpu", default=False, action="store_true")
  141. # other args
  142. arguments = parser.add_argument_group('other flags')
  143. # remainder args
  144. arguments.add_argument(
  145. 'model_arguments',
  146. nargs=argparse.REMAINDER,
  147. help=
  148. 'arguments that will be ignored by deployer lib and will be forwarded to your deployer script'
  149. )
  150. #
  151. args = parser.parse_args(argv)
  152. model_args = model_args_parser(args.model_arguments[1:])
  153. model_args_no_def = {
  154. k: v
  155. for k, v in vars(model_args).items()
  156. if k in [arg[2:] for arg in args.model_arguments[1:]]
  157. }
  158. deployer = Deployer(args, model_args_no_def)
  159. #
  160. return deployer, model_args
  161. class DeployerLibrary:
  162. def __init__(self, args, model_args):
  163. self.args = args
  164. self.model_args = model_args
  165. self.platform = None
  166. def set_platform(self, platform):
  167. ''' sets the platform
  168. :: platform :: "pytorch_libtorch" or "onnxruntime_onnx"
  169. '''
  170. self.platform = platform
  171. def prepare_inputs(self, dataloader, device):
  172. ''' load sample inputs to device '''
  173. inputs = []
  174. for batch in dataloader:
  175. if type(batch) is torch.Tensor:
  176. batch_d = batch.to(device)
  177. batch_d = (batch_d, )
  178. inputs.append(batch_d)
  179. else:
  180. batch_d = []
  181. for x in batch:
  182. assert type(x) is torch.Tensor, "input is not a tensor"
  183. batch_d.append(x.to(device) if device else x)
  184. batch_d = tuple(batch_d)
  185. inputs.append(batch_d)
  186. return inputs
  187. def get_list_of_shapes(self, l, fun):
  188. ''' returns the list of min/max shapes, depending on fun
  189. :: l :: list of tuples of tensors
  190. :: fun :: min or max
  191. '''
  192. tensor_tuple = l[0]
  193. shapes = [list(x.shape) for x in tensor_tuple]
  194. for tensor_tuple in l:
  195. assert len(tensor_tuple) == len(
  196. shapes), "tensors with varying shape lengths are not supported"
  197. for i, x in enumerate(tensor_tuple):
  198. for j in range(len(x.shape)):
  199. shapes[i][j] = fun(shapes[i][j], x.shape[j])
  200. return shapes # a list of shapes
  201. def get_tuple_of_min_shapes(self, l):
  202. ''' returns the tuple of min shapes
  203. :: l :: list of tuples of tensors '''
  204. shapes = self.get_list_of_shapes(l, min)
  205. min_batch = 1
  206. shapes = [[min_batch, *shape[1:]] for shape in shapes]
  207. shapes = tuple(shapes)
  208. return shapes # tuple of min shapes
  209. def get_tuple_of_max_shapes(self, l):
  210. ''' returns the tuple of max shapes
  211. :: l :: list of tuples of tensors '''
  212. shapes = self.get_list_of_shapes(l, max)
  213. max_batch = max(2, shapes[0][0])
  214. shapes = [[max_batch, *shape[1:]] for shape in shapes]
  215. shapes = tuple(shapes)
  216. return shapes # tuple of max shapes
  217. def get_tuple_of_opt_shapes(self, l):
  218. ''' returns the tuple of opt shapes
  219. :: l :: list of tuples of tensors '''
  220. counter = Counter()
  221. for tensor_tuple in l:
  222. shapes = [x.shape for x in tensor_tuple]
  223. shapes = tuple(shapes)
  224. counter[shapes] += 1
  225. shapes = counter.most_common(1)[0][0]
  226. return shapes # tuple of most common occuring shapes
  227. def get_tuple_of_dynamic_shapes(self, l):
  228. ''' returns a tuple of dynamic shapes: variable tensor dimensions
  229. (for ex. batch size) occur as -1 in the tuple
  230. :: l :: list of tuples of tensors '''
  231. tensor_tuple = l[0]
  232. shapes = [list(x.shape) for x in tensor_tuple]
  233. for tensor_tuple in l:
  234. err_msg = "tensors with varying shape lengths are not supported"
  235. assert len(tensor_tuple) == len(shapes), err_msg
  236. for i, x in enumerate(tensor_tuple):
  237. for j in range(len(x.shape)):
  238. if shapes[i][j] != x.shape[j] or j == 0:
  239. shapes[i][j] = -1
  240. shapes = tuple(shapes)
  241. return shapes # tuple of dynamic shapes
  242. def run_models(self, models, inputs):
  243. ''' run the models on inputs, return the outputs and execution times '''
  244. ret = []
  245. for model in models:
  246. torch.cuda.synchronize()
  247. time_start = time.time()
  248. outputs = []
  249. for input in inputs:
  250. with torch.no_grad():
  251. output = model(*input)
  252. if type(output) is torch.Tensor:
  253. output = [output]
  254. outputs.append(output)
  255. torch.cuda.synchronize()
  256. time_end = time.time()
  257. t = time_end - time_start
  258. ret.append(outputs)
  259. ret.append(t)
  260. return ret
  261. def compute_errors(self, outputs_A, outputs_B):
  262. ''' returns the list of L_inf errors computed over every single output tensor '''
  263. Linf_errors = []
  264. for output_A, output_B in zip(outputs_A, outputs_B):
  265. for x, y in zip(output_A, output_B):
  266. error = (x - y).norm(float('inf')).item()
  267. Linf_errors.append(error)
  268. return Linf_errors
  269. def print_errors(self, Linf_errors):
  270. ''' print various statistcs of Linf errors '''
  271. print()
  272. print("conversion correctness test results")
  273. print("-----------------------------------")
  274. print("maximal absolute error over dataset (L_inf): ",
  275. max(Linf_errors))
  276. print()
  277. print("average L_inf error over output tensors: ",
  278. statistics.mean(Linf_errors))
  279. print("variance of L_inf error over output tensors: ",
  280. statistics.variance(Linf_errors))
  281. print("stddev of L_inf error over output tensors: ",
  282. statistics.stdev(Linf_errors))
  283. print()
  284. def write_config(self,
  285. config_filename,
  286. input_shapes,
  287. input_types,
  288. output_shapes,
  289. output_types):
  290. ''' writes triton config file
  291. :: config_filename :: the file to write the config file into
  292. :: input_shapes :: tuple of dynamic shapes of the input tensors
  293. :: input_types :: tuple of torch types of the input tensors
  294. :: output_shapes :: tuple of dynamic shapes of the output tensors
  295. :: output_types :: tuple of torch types of the output tensors
  296. '''
  297. assert self.platform is not None, "error - platform is not set"
  298. config_template = CONFIG_TEMPLATE
  299. accelerator_template = MODEL_OPTIMIZATION_TEMPLATE
  300. input_template = INPUT_TEMPLATE
  301. spec_inputs = r""""""
  302. for i,(shape,typ) in enumerate(zip(input_shapes,input_types)):
  303. d = {
  304. 'num' : str(i),
  305. 'type': torch_type_to_triton_type[typ],
  306. 'dims': str([1]) if len(shape) == 1 else str(list(shape)[1:]) # first dimension is the batch size
  307. }
  308. d['reshape'] = 'reshape: { shape: [ ] }' if len(shape) == 1 else ''
  309. spec_inputs += input_template.format_map(d)
  310. spec_inputs = spec_inputs[:-1]
  311. output_template = OUTPUT_TEMPLATE
  312. spec_outputs = r""""""
  313. for i,(shape,typ) in enumerate(zip(output_shapes,output_types)):
  314. d = {
  315. 'num' : str(i),
  316. 'type': torch_type_to_triton_type[typ],
  317. 'dims': str([1]) if len(shape) == 1 else str(list(shape)[1:]) # first dimension is the batch size
  318. }
  319. d['reshape'] = 'reshape: { shape: [ ] }' if len(shape) == 1 else ''
  320. spec_outputs += output_template.format_map(d)
  321. spec_outputs = spec_outputs[:-1]
  322. batching_str = ""
  323. parameters_str = ""
  324. max_batch_size = self.args.triton_max_batch_size
  325. accelerator_str = ""
  326. if (self.args.triton_dyn_batching_delay > 0):
  327. # Use only full and half full batches
  328. pref_batch_size = [int(max_batch_size / 2.0), max_batch_size]
  329. batching_str = r"""
  330. dynamic_batching {{
  331. preferred_batch_size: [{0}]
  332. max_queue_delay_microseconds: {1}
  333. }}""".format(", ".join([str(x) for x in pref_batch_size]),
  334. int(self.args.triton_dyn_batching_delay * 1000.0))
  335. if self.platform == 'onnxruntime_onnx':
  336. accelerator_str = accelerator_template.format_map({})
  337. config_values = {
  338. "model_name": self.args.triton_model_name,
  339. "platform": self.platform,
  340. "max_batch_size": max_batch_size,
  341. "spec_inputs": spec_inputs,
  342. "spec_outputs": spec_outputs,
  343. "dynamic_batching": batching_str,
  344. "model_parameters": parameters_str,
  345. "model_optimizations": accelerator_str,
  346. "gpu_list": "" if self.args.deploy_cpu else ", ".join([str(x) for x in range(torch.cuda.device_count())]),
  347. "engine_count": self.args.triton_engine_count,
  348. "kind": "KIND_CPU" if self.args.deploy_cpu else "KIND_GPU"
  349. }
  350. # write config
  351. with open(config_filename, "w") as file:
  352. final_config_str = config_template.format_map(config_values)
  353. final_config_str = remove_empty_lines(final_config_str)
  354. file.write(final_config_str)
  355. class Deployer:
  356. def __init__(self, args, model_args):
  357. self.args = args
  358. self.lib = DeployerLibrary(args, model_args)
  359. def deploy(self, dataloader, model):
  360. ''' deploy the model and test for correctness with dataloader '''
  361. if self.args.ts_script or self.args.ts_trace:
  362. self.lib.set_platform("pytorch_libtorch")
  363. print("deploying model " + self.args.triton_model_name +
  364. " in format " + self.lib.platform)
  365. self.to_triton_torchscript(dataloader, model)
  366. elif self.args.onnx:
  367. self.lib.set_platform("onnxruntime_onnx")
  368. print("deploying model " + self.args.triton_model_name +
  369. " in format " + self.lib.platform)
  370. self.to_triton_onnx(dataloader, model)
  371. else:
  372. assert False, "error"
  373. print("done")
  374. def to_triton_onnx(self, dataloader, model):
  375. ''' export the model to onnx and test correctness on dataloader '''
  376. model.eval()
  377. assert not model.training, "internal error - model should be in eval() mode! "
  378. # prepare inputs
  379. inputs = self.lib.prepare_inputs(dataloader, device=None)
  380. # generate outputs
  381. outputs = []
  382. for input in inputs:
  383. with torch.no_grad():
  384. output = model(*input)
  385. if type(output) is torch.Tensor:
  386. output = [output]
  387. outputs.append(output)
  388. # generate input shapes - dynamic tensor shape support
  389. input_shapes = self.lib.get_tuple_of_dynamic_shapes(inputs)
  390. # generate output shapes - dynamic tensor shape support
  391. output_shapes = self.lib.get_tuple_of_dynamic_shapes(outputs)
  392. # generate input types
  393. input_types = [x.dtype for x in inputs[0]]
  394. # generate output types
  395. output_types = [x.dtype for x in outputs[0]]
  396. # get input names
  397. rng = range(len(input_types))
  398. input_names = ["input__" + str(num) for num in rng]
  399. # get output names
  400. rng = range(len(output_types))
  401. output_names = ["output__" + str(num) for num in rng]
  402. # prepare save path
  403. model_folder = os.path.join(self.args.save_dir, self.args.triton_model_name)
  404. version_folder = os.path.join(model_folder, str(self.args.triton_model_version))
  405. if not os.path.exists(version_folder):
  406. os.makedirs(version_folder)
  407. final_model_path = os.path.join(version_folder, 'model.onnx')
  408. if not os.path.exists(final_model_path):
  409. os.makedirs(final_model_path)
  410. final_model_path = os.path.join(final_model_path, 'model.onnx')
  411. # get indices of dynamic input and output shapes
  412. dynamic_axes = {}
  413. for input_name,input_shape in zip(input_names,input_shapes):
  414. dynamic_axes[input_name] = [i for i,x in enumerate(input_shape) if x == -1]
  415. for output_name,output_shape in zip(output_names,output_shapes):
  416. dynamic_axes[output_name] = [i for i,x in enumerate(output_shape) if x == -1]
  417. # export the model
  418. assert not model.training, "internal error - model should be in eval() mode! "
  419. with torch.no_grad():
  420. torch.onnx.export(model, inputs[0], final_model_path, verbose=False,
  421. input_names=input_names, output_names=output_names,
  422. dynamic_axes=dynamic_axes, opset_version=11,
  423. use_external_data_format=True)
  424. config_filename = os.path.join(model_folder, "config.pbtxt")
  425. self.lib.write_config(config_filename,
  426. input_shapes, input_types,
  427. output_shapes, output_types)
  428. def to_triton_torchscript(self, dataloader, model):
  429. ''' export the model to torchscript and test correctness on dataloader '''
  430. model.eval()
  431. assert not model.training, "internal error - model should be in eval() mode! "
  432. # prepare inputs
  433. inputs = self.lib.prepare_inputs(dataloader, device=None)
  434. # generate input shapes - dynamic tensor shape support
  435. input_shapes = self.lib.get_tuple_of_dynamic_shapes(inputs)
  436. # generate input types
  437. input_types = [x.dtype for x in inputs[0]]
  438. # prepare save path
  439. model_folder = os.path.join(self.args.save_dir, self.args.triton_model_name)
  440. version_folder = os.path.join(model_folder, str(self.args.triton_model_version))
  441. if not os.path.exists(version_folder):
  442. os.makedirs(version_folder)
  443. final_model_path = os.path.join(version_folder, 'model.pt')
  444. # convert the model
  445. with torch.no_grad():
  446. if self.args.ts_trace: # trace it
  447. model_ts = torch.jit.trace(model, inputs[0])
  448. if self.args.ts_script: # script it
  449. model_ts = torch.jit.script(model)
  450. # generate outputs
  451. outputs = []
  452. for input in inputs:
  453. with torch.no_grad():
  454. output = model(*input)
  455. if type(output) is torch.Tensor:
  456. output = [output]
  457. outputs.append(output)
  458. # save the model
  459. torch.jit.save(model_ts, final_model_path)
  460. # generate output shapes - dynamic tensor shape support
  461. output_shapes = self.lib.get_tuple_of_dynamic_shapes(outputs)
  462. # generate output types
  463. output_types = [x.dtype for x in outputs[0]]
  464. # now we build the config for triton
  465. config_filename = os.path.join(model_folder, "config.pbtxt")
  466. self.lib.write_config(config_filename,
  467. input_shapes, input_types,
  468. output_shapes, output_types)