audio.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. #!/usr/bin/env python2
  2. # -*- coding: UTF-8 -*-
  3. # File: audio.py
  4. # Date: Fri Jun 26 10:42:41 2015 +0800
  5. # Author: Yuxin Wu <[email protected]>
  6. import os
  7. from subprocess import PIPE, Popen, call
  8. import logging
  9. logger = logging.getLogger(__name__)
  10. import pysox
  11. from common.textutil import get_file_b64
  12. SILK_DECODER = os.path.join(os.path.dirname(__file__),
  13. '../third-party/silk/decoder')
  14. if not os.path.exists(SILK_DECODER):
  15. logger.error("Silk decoder is not compiled. Please see README.md.")
  16. raise RuntimeError()
  17. def parse_wechat_audio_file(file_name):
  18. try:
  19. return do_parse_wechat_audio_file(file_name)
  20. except Exception as e:
  21. logger.error("Pase audio file {} error!".format(file_name))
  22. logger.error(e)
  23. return "", 0
  24. def do_parse_wechat_audio_file(file_name):
  25. """ return a mp3 base64 string, and the duration"""
  26. if not file_name: return "", 0
  27. mp3_file = os.path.join('/tmp',
  28. os.path.basename(file_name)[:-4] + '.mp3')
  29. with open(file_name) as f:
  30. header = f.read(10)
  31. if 'AMR' in header:
  32. # maybe this is faster than calling sox from command line?
  33. infile = pysox.CSoxStream(file_name)
  34. outfile = pysox.CSoxStream(mp3_file, 'w', infile.get_signal())
  35. chain = pysox.CEffectsChain(infile, outfile)
  36. chain.flow_effects()
  37. outfile.close()
  38. signal = infile.get_signal().get_signalinfo()
  39. duration = signal['length'] * 1.0 / signal['rate']
  40. elif 'SILK' in header:
  41. raw_file = os.path.join('/tmp',
  42. os.path.basename(file_name)[:-4] + '.raw')
  43. proc = Popen('{0} {1} {2}'.format(SILK_DECODER,
  44. file_name, raw_file),
  45. shell=True, stdout=PIPE, stderr=PIPE)
  46. stdout = proc.communicate()[0]
  47. for line in stdout.split('\n'):
  48. if 'File length' in line:
  49. duration = float(line[13:-3].strip())
  50. break
  51. else:
  52. raise RuntimeError("Error decoding silk audio file!")
  53. # I don't know how to do this with pysox
  54. proc = call('sox -r 24000 -e signed -b 16 -c 1 {} {}'.format(
  55. raw_file, mp3_file), shell=True)
  56. os.unlink(raw_file)
  57. else:
  58. raise NotImplementedError("Unsupported Audio Format! This is a bug!")
  59. try:
  60. mp3_string = get_file_b64(mp3_file)
  61. os.unlink(mp3_file)
  62. except:
  63. raise RuntimeError("Failed to decode audio file: {}".format(file_name))
  64. return mp3_string, duration
  65. if __name__ == '__main__':
  66. import sys
  67. fname = sys.argv[1]
  68. print parse_wechat_audio_file(fname)[1]