audio.py 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. #!/usr/bin/env python3
  2. # -*- coding: UTF-8 -*-
  3. import os
  4. import tempfile
  5. import logging
  6. logger = logging.getLogger(__name__)
  7. from .common.textutil import get_file_b64
  8. from .common.procutil import subproc_succ
  9. SILK_DECODER = os.path.join(os.path.dirname(__file__),
  10. '../third-party/silk/decoder')
  11. def parse_wechat_audio_file(file_name):
  12. try:
  13. return do_parse_wechat_audio_file(file_name)
  14. except Exception as e:
  15. logger.error(f"Error when parsing audio file {file_name}: {str(e)}")
  16. return "", 0
  17. def do_parse_wechat_audio_file(file_name):
  18. """ return a mp3 stored in base64 unicode string, and the duration"""
  19. if not file_name: return "", 0
  20. with tempfile.TemporaryDirectory(prefix="wechatdump_audio") as temp:
  21. mp3_file = os.path.join(temp,
  22. os.path.basename(file_name)[:-4] + '.mp3')
  23. with open(file_name, 'rb') as f:
  24. header = f.read(10)
  25. if b'AMR' in header:
  26. cmd = f"sox -e signed -c 1 {file_name} {mp3_file}"
  27. subproc_succ(cmd)
  28. cmd = f"soxi -D {mp3_file}"
  29. duration = float(subproc_succ(cmd))
  30. # The below is python2 only. It should be equivalent to using sox from command line
  31. # import pysox
  32. # infile = pysox.CSoxStream(file_name)
  33. # outfile = pysox.CSoxStream(mp3_file, 'w', infile.get_signal())
  34. # chain = pysox.CEffectsChain(infile, outfile)
  35. # chain.flow_effects()
  36. # outfile.close()
  37. # signal = infile.get_signal().get_signalinfo()
  38. # duration = signal['length'] * 1.0 / signal['rate']
  39. elif b'SILK' in header:
  40. if not os.path.exists(SILK_DECODER):
  41. raise RuntimeError("Silk decoder is not compiled. Please see README.md.")
  42. raw_file = os.path.join(temp,
  43. os.path.basename(file_name)[:-4] + '.raw')
  44. cmd = '{0} {1} {2}'.format(SILK_DECODER, file_name, raw_file)
  45. out = subproc_succ(cmd)
  46. for line in out.split(b'\n'):
  47. if b'File length' in line:
  48. duration = float(line[13:-3].strip())
  49. break
  50. else:
  51. raise RuntimeError("Error decoding silk audio file!" + out.decode('utf-8'))
  52. # TODO don't know how to do this with python
  53. subproc_succ('sox -r 24000 -e signed -b 16 -c 1 {} {}'.format(raw_file, mp3_file))
  54. else:
  55. raise NotImplementedError("Audio file format cannot be recognized.")
  56. mp3_string = get_file_b64(mp3_file)
  57. return mp3_string, duration
  58. if __name__ == '__main__':
  59. import sys
  60. fname = sys.argv[1]
  61. print(parse_wechat_audio_file(fname)[1])