compute_wer.sh 852 B

123456789101112131415161718192021222324
  1. #!/bin/bash
  2. model_path="/data/models/LibriSpeech"
  3. librispeech_path="/data/datasets/LibriSpeech/test_clean"
  4. result_path="/data/results"
  5. # Correctness
  6. cat $model_path/words.txt | tr '[:upper:]' '[:lower:]' > $result_path/words.txt
  7. cat $librispeech_path/$test_set/text | tr '[:upper:]' '[:lower:]' > $result_path/text
  8. oovtok=$(cat $result_path/words.txt | grep "<unk>" | awk '{print $2}')
  9. /opt/kaldi/egs/wsj/s5/utils/sym2int.pl --map-oov $oovtok -f 2- $result_path/words.txt $result_path/text > $result_path/text_ints 2> /dev/null
  10. # convert lattice to transcript
  11. /opt/kaldi/src/latbin/lattice-best-path \
  12. "ark:gunzip -c $result_path/lat.cuda-asr.gz |"\
  13. "ark,t:$result_path/trans.cuda-asr" 2> /dev/null
  14. # calculate wer
  15. /opt/kaldi/src/bin/compute-wer --mode=present \
  16. "ark:$result_path/text_ints" \
  17. "ark:$result_path/trans.cuda-asr" 2> /dev/null