asr_client_imp.h 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include <grpc_client.h>
  15. #include <queue>
  16. #include <string>
  17. #include <unordered_map>
  18. #include <vector>
  19. #ifndef TRITON_KALDI_ASR_CLIENT_H_
  20. #define TRITON_KALDI_ASR_CLIENT_H_
  21. namespace ni = nvidia::inferenceserver;
  22. namespace nic = nvidia::inferenceserver::client;
  23. // time with arbitrary reference
  24. double inline gettime_monotonic() {
  25. struct timespec ts;
  26. clock_gettime(CLOCK_MONOTONIC, &ts);
  27. double time = ts.tv_sec;
  28. time += (double)(ts.tv_nsec) / 1e9;
  29. return time;
  30. }
  31. class TritonASRClient {
  32. struct TritonClient {
  33. std::unique_ptr<nic::InferenceServerGrpcClient> triton_client;
  34. };
  35. std::string url_;
  36. std::string model_name_;
  37. std::vector<TritonClient> clients_;
  38. int nclients_;
  39. std::vector<uint8_t> chunk_buf_;
  40. std::vector<int64_t> shape_;
  41. int max_chunk_byte_size_;
  42. std::atomic<int> n_in_flight_;
  43. double started_at_;
  44. double total_audio_;
  45. bool print_results_;
  46. bool print_partial_results_;
  47. bool ctm_;
  48. std::mutex stdout_m_;
  49. int samps_per_chunk_;
  50. float samp_freq_;
  51. struct Result {
  52. std::string raw_lattice;
  53. double latency;
  54. };
  55. std::unordered_map<uint64_t, double> start_timestamps_;
  56. std::mutex start_timestamps_m_;
  57. std::unordered_map<uint64_t, Result> results_;
  58. std::mutex results_m_;
  59. public:
  60. TritonASRClient(const std::string& url, const std::string& model_name,
  61. const int ncontextes, bool print_results,
  62. bool print_partial_results, bool ctm, float samp_freq);
  63. void CreateClientContext();
  64. void SendChunk(uint64_t corr_id, bool start_of_sequence, bool end_of_sequence,
  65. float* chunk, int chunk_byte_size, uint64_t index);
  66. void WaitForCallbacks();
  67. void PrintStats(bool print_latency_stats, bool print_throughput);
  68. void WriteLatticesToFile(
  69. const std::string& clat_wspecifier,
  70. const std::unordered_map<uint64_t, std::string>& corr_id_and_keys);
  71. };
  72. #endif // TRITON_KALDI_ASR_CLIENT_H_