res.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. #!/usr/bin/env python2
  2. # -*- coding: UTF-8 -*-
  3. # File: res.py
  4. # Date: Thu Jun 18 00:02:21 2015 +0800
  5. # Author: Yuxin Wu <[email protected]>
  6. import glob
  7. import os
  8. import re
  9. # TODO: perhaps we don't need to introduce PIL and numpy. libjpeg might be enough
  10. from PIL import Image
  11. import cStringIO
  12. import base64
  13. import logging
  14. logger = logging.getLogger(__name__)
  15. import imghdr
  16. from multiprocessing import Pool
  17. import atexit
  18. import cPickle as pickle
  19. import requests
  20. from .avatar import AvatarReader
  21. from common.textutil import md5, get_file_b64
  22. from common.timer import timing
  23. from .msg import TYPE_SPEAK
  24. from .audio import parse_wechat_audio_file
  25. LIB_PATH = os.path.dirname(os.path.abspath(__file__))
  26. INTERNAL_EMOJI_DIR = os.path.join(LIB_PATH, 'static', 'internal_emoji')
  27. VOICE_DIRNAME = 'voice2'
  28. IMG_DIRNAME = 'image2'
  29. EMOJI_DIRNAME = 'emoji'
  30. AVATAR_DIRNAME = 'sfs'
  31. JPEG_QUALITY = 50
  32. class EmojiCache(object):
  33. def __init__(self, fname):
  34. self.fname = fname
  35. if os.path.isfile(fname):
  36. self.dic = pickle.load(open(fname))
  37. else:
  38. self.dic = {}
  39. def query(self, md5):
  40. return self.dic.get(md5, (None, None))
  41. def fetch(self, md5, url):
  42. try:
  43. logger.info("Requesting emoji {} from {} ...".format(md5, url))
  44. r = requests.get(url).content
  45. im = Image.open(cStringIO.StringIO(r))
  46. format = im.format.lower()
  47. ret = (base64.b64encode(r), format)
  48. self.dic[md5] = ret
  49. self.flush()
  50. return ret
  51. except Exception as e:
  52. logger.exception("Error processing emoji from {}".format(url))
  53. return None, None
  54. def flush(self):
  55. with open(self.fname, 'wb') as f:
  56. pickle.dump(self.dic, f)
  57. class Resource(object):
  58. """ multimedia resources in chat"""
  59. def __init__(self, parser, res_dir, avt_db,
  60. emoji_cache_file='emoji.cache'):
  61. def check(subdir):
  62. assert os.path.isdir(os.path.join(res_dir, subdir)), \
  63. "No such directory: {}".format(subdir)
  64. [check(k) for k in ['', AVATAR_DIRNAME, IMG_DIRNAME, EMOJI_DIRNAME, VOICE_DIRNAME]]
  65. self.emoji_cache = EmojiCache(emoji_cache_file)
  66. self.res_dir = res_dir
  67. self.parser = parser
  68. self.voice_cache_idx = {}
  69. self.img_dir = os.path.join(res_dir, IMG_DIRNAME)
  70. self.voice_dir = os.path.join(res_dir, VOICE_DIRNAME)
  71. self.emoji_dir = os.path.join(res_dir, EMOJI_DIRNAME)
  72. self.avt_reader = AvatarReader(os.path.join(res_dir, AVATAR_DIRNAME), avt_db)
  73. def get_voice_filename(self, imgpath):
  74. fname = md5(imgpath)
  75. dir1, dir2 = fname[:2], fname[2:4]
  76. ret = os.path.join(self.voice_dir, dir1, dir2,
  77. 'msg_{}.amr'.format(imgpath))
  78. if not os.path.isfile(ret):
  79. logger.error("Voice file not found for {}".format(imgpath))
  80. return ""
  81. return ret
  82. def get_voice_mp3(self, imgpath):
  83. """ return mp3 and duration, or empty string and 0 on failure"""
  84. idx = self.voice_cache_idx.get(imgpath)
  85. if idx is None:
  86. return parse_wechat_audio_file(
  87. self.get_voice_filename(imgpath))
  88. return self.voice_cache[idx].get()
  89. def cache_voice_mp3(self, msgs):
  90. """ for speed.
  91. msgs: a collection of WeChatMsg, to cache for later fetch"""
  92. voice_paths = [msg.imgPath for msg in msgs if msg.type == TYPE_SPEAK]
  93. self.voice_cache_idx = {k: idx for idx, k in enumerate(voice_paths)}
  94. pool = Pool(3)
  95. atexit.register(lambda x: x.terminate(), pool)
  96. self.voice_cache = [pool.apply_async(parse_wechat_audio_file,
  97. (self.get_voice_filename(k),)) for k in voice_paths]
  98. # single-threaded version, for debug
  99. #self.voice_cache = map(parse_wechat_audio_file,
  100. #(self.get_voice_filename(k) for k in voice_paths))
  101. def get_avatar(self, username):
  102. """ return base64 string"""
  103. im = self.avt_reader.get_avatar(username)
  104. if im is None:
  105. return ""
  106. buf = cStringIO.StringIO()
  107. im.save(buf, 'JPEG', quality=JPEG_QUALITY)
  108. jpeg_str = buf.getvalue()
  109. return base64.b64encode(jpeg_str)
  110. def get_contact_avatar(self, nickname):
  111. return self.get_avatar(self.parser.contacts_rev[nickname])
  112. def _get_img_file(self, fnames):
  113. """ fnames: a list of filename to search for
  114. return (filename, filename) of (big, small) image.
  115. could be empty string.
  116. """
  117. cands = []
  118. for fname in fnames:
  119. dir1, dir2 = fname[:2], fname[2:4]
  120. dirname = os.path.join(self.img_dir, dir1, dir2)
  121. if not os.path.isdir(dirname):
  122. logger.warn("Directory not found: {}".format(dirname))
  123. continue
  124. for f in os.listdir(dirname):
  125. if fname in f:
  126. full_name = os.path.join(dirname, f)
  127. size = os.path.getsize(full_name)
  128. if size > 0:
  129. cands.append((full_name, size))
  130. if not cands:
  131. return ("", "")
  132. cands = sorted(cands, key=lambda x: x[1])
  133. def name_is_thumbnail(name):
  134. return os.path.basename(name).startswith('th_') \
  135. and not name.endswith('hd')
  136. if len(cands) == 1:
  137. name = cands[0][0]
  138. if name_is_thumbnail(name):
  139. # thumbnail
  140. return ("", name)
  141. else:
  142. logger.warn("Found big image but not thumbnail: {}".format(fname))
  143. return (name, "")
  144. big = cands[-1]
  145. ths = filter(name_is_thumbnail, [k[0] for k in cands])
  146. if not ths:
  147. return (big[0], "")
  148. return (big[0], ths[0])
  149. def get_img(self, fnames):
  150. """
  151. :params fnames: possible file paths
  152. :returns: two base64 jpg string
  153. """
  154. fnames = [k for k in fnames if k] # filter out empty string
  155. big_file, small_file = self._get_img_file(fnames)
  156. def get_jpg_b64(img_file):
  157. if not img_file:
  158. return None
  159. if not img_file.endswith('jpg') and \
  160. imghdr.what(img_file) != 'jpeg':
  161. im = Image.open(open(img_file, 'rb'))
  162. buf = cStringIO.StringIO()
  163. im.convert('RGB').save(buf, 'JPEG', quality=JPEG_QUALITY)
  164. return base64.b64encode(buf.getvalue())
  165. return get_file_b64(img_file)
  166. big_file = get_jpg_b64(big_file)
  167. if big_file:
  168. return big_file
  169. return get_jpg_b64(small_file)
  170. def _get_res_emoji(self, md5, pack_id):
  171. path = self.emoji_dir
  172. if pack_id:
  173. path = os.path.join(path, pack_id)
  174. candidates = glob.glob(os.path.join(path, '{}*'.format(md5)))
  175. candidates = [k for k in candidates if not k.endswith('_thumb') \
  176. and not re.match('.*_[0-9]+$', k)]
  177. def try_use(f):
  178. if not f: return None
  179. if not imghdr.what(f[0]): # cannot recognize file type
  180. return None
  181. return f[0]
  182. f = try_use([k for k in candidates if not k.endswith('_cover')])
  183. if f:
  184. return get_file_b64(f), imghdr.what(f)
  185. f = try_use([k for k in candidates if k.endswith('_cover')])
  186. if f:
  187. return get_file_b64(f), imghdr.what(f)
  188. return None, None
  189. def _get_internal_emoji(self, fname):
  190. f = os.path.join(INTERNAL_EMOJI_DIR, fname)
  191. return get_file_b64(f), imghdr.what(f)
  192. def get_emoji_by_md5(self, md5):
  193. """ :returns: (b64 img, format)"""
  194. if md5 in self.parser.internal_emojis:
  195. emoji_img, format = self._get_internal_emoji(self.parser.internal_emojis[md5])
  196. logger.warn("Cannot get emoji {}".format(md5))
  197. return None, None
  198. else:
  199. img, format = self.emoji_cache.query(md5)
  200. if format:
  201. return img, format
  202. group = self.parser.emoji_groups.get(md5, None)
  203. emoji_img, format = self._get_res_emoji(md5, group)
  204. if format:
  205. return emoji_img, format
  206. url = self.parser.emoji_url.get(md5, None)
  207. if url:
  208. emoji_img, format = self.emoji_cache.fetch(md5, url)
  209. if format:
  210. return emoji_img, format
  211. logger.warn("Cannot get emoji {} in {}".format(md5, group))
  212. return None, None