2
0

audio.py 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. #!/usr/bin/env python3
  2. # -*- coding: UTF-8 -*-
  3. import os
  4. import logging
  5. logger = logging.getLogger(__name__)
  6. from common.textutil import get_file_b64
  7. from common.procutil import subproc_succ
  8. SILK_DECODER = os.path.join(os.path.dirname(__file__),
  9. '../third-party/silk/decoder')
  10. if not os.path.exists(SILK_DECODER):
  11. logger.error("Silk decoder is not compiled. Please see README.md.")
  12. raise RuntimeError()
  13. def parse_wechat_audio_file(file_name):
  14. try:
  15. return do_parse_wechat_audio_file(file_name)
  16. except Exception as e:
  17. logger.exception("Error when parsing audio file {}".format(file_name))
  18. return "", 0
  19. def do_parse_wechat_audio_file(file_name):
  20. """ return a mp3 stored in base64 unicode string, and the duration"""
  21. if not file_name: return "", 0
  22. mp3_file = os.path.join('/tmp',
  23. os.path.basename(file_name)[:-4] + '.mp3')
  24. with open(file_name, 'rb') as f:
  25. header = f.read(10)
  26. if b'AMR' in header:
  27. raise NotImplementedError("AMR decoding not implemented because it seems deprecated since WeChat6.0+")
  28. # The below is python2 only. It should be equivalent to using sox from command line?
  29. import pysox
  30. infile = pysox.CSoxStream(file_name)
  31. outfile = pysox.CSoxStream(mp3_file, 'w', infile.get_signal())
  32. chain = pysox.CEffectsChain(infile, outfile)
  33. chain.flow_effects()
  34. outfile.close()
  35. signal = infile.get_signal().get_signalinfo()
  36. duration = signal['length'] * 1.0 / signal['rate']
  37. elif b'SILK' in header:
  38. raw_file = os.path.join('/tmp',
  39. os.path.basename(file_name)[:-4] + '.raw')
  40. cmd = '{0} {1} {2}'.format(SILK_DECODER, file_name, raw_file)
  41. out = subproc_succ(cmd)
  42. for line in out.split(b'\n'):
  43. if b'File length' in line:
  44. duration = float(line[13:-3].strip())
  45. break
  46. else:
  47. raise RuntimeError("Error decoding silk audio file!")
  48. # TODO don't know how to do this with python
  49. subproc_succ('sox -r 24000 -e signed -b 16 -c 1 {} {}'.format(raw_file, mp3_file))
  50. os.unlink(raw_file)
  51. else:
  52. raise NotImplementedError("Unsupported Audio Format! This is a bug!")
  53. mp3_string = get_file_b64(mp3_file)
  54. os.unlink(mp3_file)
  55. return mp3_string, duration
  56. if __name__ == '__main__':
  57. import sys
  58. fname = sys.argv[1]
  59. print(parse_wechat_audio_file(fname)[1])