| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157 |
- #!/bin/bash
- # Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- export TRITON_MODEL_OVERWRITE=True
- NV_VISIBLE_DEVICES=0
- bert_model=${1:-"large"}
- precision=${2:-"fp32"}
- init_checkpoint=${3:-"/workspace/bert/checkpoints/bert_qa.pt"}
- EXPORT_FORMAT=${4:-"ts-script"}
- PROFILING_DATA="triton/profiling_data_int64"
- MODEL_NAME="bert_${bert_model}_${precision}_${EXPORT_FORMAT}"
- BERT_DIR="/workspace/bert"
- # Create common bridge for client and server
- BRIDGE_NAME="tritonnet"
- docker network create ${BRIDGE_NAME}
- # Start Server
- echo Starting server...
- #bash triton/launch_triton_server.sh
- SERVER_ID=$( ./triton/launch_triton_server.sh ${BRIDGE_NAME} --NV_VISIBLE_DEVICES=$NV_VISIBLE_DEVICES )
- SERVER_IP=$( docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' ${SERVER_ID} )
- EXPORT_MODEL_ARGS="${BERT_DIR} ${EXPORT_FORMAT} ${precision} 1 ${MODEL_NAME}"
- PERF_CLIENT_ARGS="50000 10 20"
- # Restart Server
- restart_server() {
- docker kill trt_server_cont
- #bash triton/launch_triton_server.sh
- SERVER_ID=$( ./triton/launch_triton_server.sh ${BRIDGE_NAME} --NV_VISIBLE_DEVICES=$NV_VISIBLE_DEVICES )
- SERVER_IP=$( docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' ${SERVER_ID} )
- }
- # Clean up
- cleanup() {
- docker kill trt_server_cont
- docker network rm ${BRIDGE_NAME}
- }
- trap cleanup EXIT
- ############## Dynamic Batching Comparison ##############
- SERVER_BATCH_SIZE=8
- CLIENT_BATCH_SIZE=1
- TRITON_ENGINE_COUNT=1
- TEST_NAME="DYN_BATCH_"
- # Dynamic batching 10 ms
- TRITON_DYN_BATCHING_DELAY=10
- ./triton/export_model.sh ${NV_VISIBLE_DEVICES} ${BRIDGE_NAME} ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
- restart_server
- sleep 15
- ./triton/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} ${PERF_CLIENT_ARGS} ${SERVER_IP} ${BRIDGE_NAME} ${TEST_NAME}${TRITON_DYN_BATCHING_DELAY} ${PROFILING_DATA} ${NV_VISIBLE_DEVICES}
- # Dynamic batching 5 ms
- TRITON_DYN_BATCHING_DELAY=5
- ./triton/export_model.sh ${NV_VISIBLE_DEVICES} ${BRIDGE_NAME} ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
- restart_server
- sleep 15
- ./triton/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} ${PERF_CLIENT_ARGS} ${SERVER_IP} ${BRIDGE_NAME} ${TEST_NAME}${TRITON_DYN_BATCHING_DELAY} ${PROFILING_DATA} ${NV_VISIBLE_DEVICES}
- # Dynamic batching 2 ms
- TRITON_DYN_BATCHING_DELAY=2
- ./triton/export_model.sh ${NV_VISIBLE_DEVICES} ${BRIDGE_NAME} ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
- restart_server
- sleep 15
- ./triton/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} ${PERF_CLIENT_ARGS} ${SERVER_IP} ${BRIDGE_NAME} ${TEST_NAME}${TRITON_DYN_BATCHING_DELAY} ${PROFILING_DATA} ${NV_VISIBLE_DEVICES}
- # Static Batching (i.e. Dynamic batching 0 ms)
- TRITON_DYN_BATCHING_DELAY=0
- ./triton/export_model.sh ${NV_VISIBLE_DEVICES} ${BRIDGE_NAME} ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
- restart_server
- sleep 15
- ./triton/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} ${PERF_CLIENT_ARGS} ${SERVER_IP} ${BRIDGE_NAME} ${TEST_NAME}${TRITON_DYN_BATCHING_DELAY} ${PROFILING_DATA} ${NV_VISIBLE_DEVICES}
- ############## Engine Count Comparison ##############
- SERVER_BATCH_SIZE=1
- CLIENT_BATCH_SIZE=1
- TRITON_DYN_BATCHING_DELAY=0
- TEST_NAME="ENGINE_C_"
- # Engine Count = 4
- TRITON_ENGINE_COUNT=4
- ./triton/export_model.sh ${NV_VISIBLE_DEVICES} ${BRIDGE_NAME} ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
- restart_server
- sleep 15
- ./triton/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} ${PERF_CLIENT_ARGS} ${SERVER_IP} ${BRIDGE_NAME} ${TEST_NAME}${TRITON_ENGINE_COUNT} ${PROFILING_DATA} ${NV_VISIBLE_DEVICES}
- # Engine Count = 2
- TRITON_ENGINE_COUNT=2
- ./triton/export_model.sh ${NV_VISIBLE_DEVICES} ${BRIDGE_NAME} ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
- restart_server
- sleep 15
- ./triton/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} ${PERF_CLIENT_ARGS} ${SERVER_IP} ${BRIDGE_NAME} ${TEST_NAME}${TRITON_ENGINE_COUNT} ${PROFILING_DATA} ${NV_VISIBLE_DEVICES}
- # Engine Count = 1
- TRITON_ENGINE_COUNT=1
- ./triton/export_model.sh ${NV_VISIBLE_DEVICES} ${BRIDGE_NAME} ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
- restart_server
- sleep 15
- ./triton/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} ${PERF_CLIENT_ARGS} ${SERVER_IP} ${BRIDGE_NAME} ${TEST_NAME}${TRITON_ENGINE_COUNT} ${PROFILING_DATA} ${NV_VISIBLE_DEVICES}
- ############## Batch Size Comparison ##############
- # BATCH=1 Generate model and perf
- SERVER_BATCH_SIZE=1
- CLIENT_BATCH_SIZE=1
- TRITON_ENGINE_COUNT=1
- TRITON_DYN_BATCHING_DELAY=0
- TEST_NAME="BATCH_SIZE_"
- ./triton/export_model.sh ${NV_VISIBLE_DEVICES} ${BRIDGE_NAME} ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
- restart_server
- sleep 15
- ./triton/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} 1000 10 64 ${SERVER_IP} ${BRIDGE_NAME} ${TEST_NAME}${SERVER_BATCH_SIZE} ${PROFILING_DATA} ${NV_VISIBLE_DEVICES}
- # BATCH=2 Generate model and perf
- SERVER_BATCH_SIZE=2
- CLIENT_BATCH_SIZE=2
- ./triton/export_model.sh ${NV_VISIBLE_DEVICES} ${BRIDGE_NAME} ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
- restart_server
- sleep 15
- ./triton/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} 1000 10 32 ${SERVER_IP} ${BRIDGE_NAME} ${TEST_NAME}${SERVER_BATCH_SIZE} ${PROFILING_DATA} ${NV_VISIBLE_DEVICES}
- # BATCH=4 Generate model and perf
- SERVER_BATCH_SIZE=4
- CLIENT_BATCH_SIZE=4
- ./triton/export_model.sh ${NV_VISIBLE_DEVICES} ${BRIDGE_NAME} ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
- restart_server
- sleep 15
- ./triton/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} 1000 10 16 ${SERVER_IP} ${BRIDGE_NAME} ${TEST_NAME}${SERVER_BATCH_SIZE} ${PROFILING_DATA} ${NV_VISIBLE_DEVICES}
- # BATCH=8 Generate model and perf
- SERVER_BATCH_SIZE=8
- CLIENT_BATCH_SIZE=8
- ./triton/export_model.sh ${NV_VISIBLE_DEVICES} ${BRIDGE_NAME} ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
- restart_server
- sleep 15
- ./triton/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} 1000 10 8 ${SERVER_IP} ${BRIDGE_NAME} ${TEST_NAME}${SERVER_BATCH_SIZE} ${PROFILING_DATA} ${NV_VISIBLE_DEVICES}
|