res.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. # -*- coding: UTF-8 -*-
  2. import os
  3. from PIL import Image
  4. import requests
  5. import time
  6. import io
  7. import base64
  8. import logging
  9. logger = logging.getLogger(__name__)
  10. from multiprocessing import Pool
  11. import atexit
  12. from .emoji import EmojiReader
  13. from .avatar import AvatarReader
  14. from .common.imgutil import what as img_what
  15. from .common.textutil import md5 as get_md5_hex, get_file_b64
  16. from .msg import TYPE_SPEAK
  17. from .audio import parse_wechat_audio_file
  18. from .wxgf import WxgfDecoder, is_wxgf_file
  19. LIB_PATH = os.path.dirname(os.path.abspath(__file__))
  20. VOICE_DIRNAME = 'voice2'
  21. IMG_DIRNAME = 'image2'
  22. EMOJI_DIRNAME = 'emoji'
  23. VIDEO_DIRNAME = 'video'
  24. JPEG_QUALITY = 50
  25. THUMB_JPEG_QUALITY = 35
  26. class Resource(object):
  27. """ Multimedia resources parser."""
  28. def __init__(self, parser,
  29. res_dir: str,
  30. *,
  31. wxgf_server: str | None = None,
  32. avt_db: str | None = None):
  33. """
  34. Args:
  35. res_dir: path to the resource directory
  36. wxgf_server: "hostname:port" that points to the wxgf converter android app
  37. avt_db: "avatar.index" file that only exists in old versions of wechat
  38. """
  39. def check(subdir):
  40. dir_to_check = os.path.join(res_dir, subdir)
  41. assert os.path.isdir(dir_to_check), f"No such directory: {dir_to_check}"
  42. [check(k) for k in ['', IMG_DIRNAME, EMOJI_DIRNAME, VOICE_DIRNAME]]
  43. self.res_dir = res_dir
  44. self.parser = parser
  45. self.voice_cache_idx = {}
  46. self.img_dir = os.path.join(res_dir, IMG_DIRNAME)
  47. self.voice_dir = os.path.join(res_dir, VOICE_DIRNAME)
  48. self.video_dir = os.path.join(res_dir, VIDEO_DIRNAME)
  49. self.avt_reader = AvatarReader(res_dir, avt_db)
  50. self.wxgf_decoder = WxgfDecoder(wxgf_server)
  51. self.emoji_reader = EmojiReader(res_dir, self.parser, wxgf_decoder=self.wxgf_decoder)
  52. def _get_voice_filename(self, imgpath):
  53. fname = get_md5_hex(imgpath.encode('ascii'))
  54. dir1, dir2 = fname[:2], fname[2:4]
  55. ret = os.path.join(self.voice_dir, dir1, dir2,
  56. 'msg_{}.amr'.format(imgpath))
  57. if not os.path.isfile(ret):
  58. logger.error(f"Cannot find voice file {imgpath}, {fname}")
  59. return ""
  60. return ret
  61. def get_voice_mp3(self, imgpath):
  62. """ return mp3 and duration, or empty string and 0 on failure"""
  63. idx = self.voice_cache_idx.get(imgpath)
  64. if idx is None:
  65. return parse_wechat_audio_file(
  66. self._get_voice_filename(imgpath))
  67. return self.voice_cache[idx].get()
  68. def cache_voice_mp3(self, msgs):
  69. """ for speed.
  70. msgs: a collection of WeChatMsg, to cache for later fetch"""
  71. voice_paths = [msg.imgPath for msg in msgs if msg.type == TYPE_SPEAK]
  72. # NOTE: remove all the caching code to debug serial decoding
  73. self.voice_cache_idx = {k: idx for idx, k in enumerate(voice_paths)}
  74. pool = Pool(3)
  75. atexit.register(lambda x: x.terminate(), pool)
  76. self.voice_cache = [pool.apply_async(parse_wechat_audio_file,
  77. (self._get_voice_filename(k),)) for k in voice_paths]
  78. def get_avatar(self, username) -> str:
  79. """ return base64 unicode string"""
  80. im = self.avt_reader.get_avatar(username)
  81. if im is None:
  82. # Try downloading the avatar directly.
  83. avatar_url = self.parser.avatar_urls.get(username)
  84. if avatar_url is None:
  85. return ""
  86. logger.info(f"Requesting avatar of {username} from {avatar_url} ...")
  87. try:
  88. r = requests.get(avatar_url).content
  89. im = Image.open(io.BytesIO(r))
  90. except Exception:
  91. logger.exception(f"Failed to fetch avatar of {username}.")
  92. return ""
  93. else:
  94. self.avt_reader.save_avatar_to_avtdir(username, im)
  95. buf = io.BytesIO()
  96. try:
  97. im.save(buf, 'JPEG', quality=JPEG_QUALITY)
  98. except IOError:
  99. try:
  100. # sometimes it works the second time...
  101. im.save(buf, 'JPEG', quality=JPEG_QUALITY)
  102. except IOError:
  103. return ""
  104. jpeg_str = buf.getvalue()
  105. return base64.b64encode(jpeg_str).decode('ascii')
  106. def _get_img_file(self, fnames):
  107. """ fnames: a list of filename to search for
  108. return (filename, filename) of (big, small) image.
  109. could be empty string.
  110. """
  111. cands = []
  112. for fname in fnames:
  113. dir1, dir2 = fname[:2], fname[2:4]
  114. dirname = os.path.join(self.img_dir, dir1, dir2)
  115. if not os.path.isdir(dirname):
  116. logger.warn("Directory not found: {}".format(dirname))
  117. continue
  118. for f in os.listdir(dirname):
  119. if fname in f:
  120. full_name = os.path.join(dirname, f)
  121. size = os.path.getsize(full_name)
  122. if size > 0:
  123. cands.append((full_name, size))
  124. if not cands:
  125. return ("", "")
  126. cands = sorted(cands, key=lambda x: x[1])
  127. def name_is_thumbnail(name):
  128. return os.path.basename(name).startswith('th_') \
  129. and not name.endswith('hd')
  130. if len(cands) == 1:
  131. name = cands[0][0]
  132. if name_is_thumbnail(name):
  133. # thumbnail
  134. return ("", name)
  135. else:
  136. logger.warn("Found big image but not thumbnail: {}".format(fname))
  137. return (name, "")
  138. big = cands[-1]
  139. ths = list(filter(name_is_thumbnail, [k[0] for k in cands]))
  140. if not ths:
  141. return (big[0], "")
  142. return (big[0], ths[0])
  143. def get_img(self, fnames):
  144. """
  145. :params fnames: possible file paths
  146. :returns: two base64 jpg string
  147. """
  148. fnames = [k for k in fnames if k] # filter out empty string
  149. big_file, small_file = self._get_img_file(fnames)
  150. big_file = self._img_file_to_jpg_b64(big_file)
  151. if big_file:
  152. return big_file
  153. return self._img_file_to_jpg_b64(small_file)
  154. def _img_file_to_jpg_b64(self, img_file: str, *, max_size: int | None = None, quality: int = JPEG_QUALITY) -> str | None:
  155. if not img_file:
  156. return None
  157. # True jpeg. Simplest case. Avoid re-compressing.
  158. if max_size is None and img_file.endswith('jpg') and img_what(img_file) == 'jpeg':
  159. return get_file_b64(img_file)
  160. if is_wxgf_file(img_file):
  161. start = time.time()
  162. buf = self.wxgf_decoder.decode_with_cache(img_file, None)
  163. if buf is None:
  164. if not self.wxgf_decoder.has_server():
  165. logger.warning("Cannot decode wxgf images. Install ffmpeg+ffprobe or provide a wxgf decoder server with --wxgf-server.")
  166. else:
  167. logger.error("Failed to decode wxgf file: {}".format(img_file))
  168. return None
  169. else:
  170. elapsed = time.time() - start
  171. if elapsed > 0.01 and self.wxgf_decoder.has_server():
  172. logger.info(f"Decoded {img_file} in {elapsed:.2f} seconds")
  173. else:
  174. with open(img_file, "rb") as f:
  175. buf = f.read()
  176. # If we don't need resize/convert and it's already jpeg, avoid re-compressing.
  177. if max_size is None and img_what(file=None, h=buf) == 'jpeg':
  178. return base64.b64encode(buf).decode('ascii')
  179. try:
  180. im = Image.open(io.BytesIO(buf))
  181. except Exception:
  182. return None
  183. try:
  184. im = im.convert("RGB")
  185. if max_size:
  186. im.thumbnail((max_size, max_size))
  187. bufio = io.BytesIO()
  188. im.save(bufio, "JPEG", quality=quality)
  189. return base64.b64encode(bufio.getvalue()).decode("ascii")
  190. except Exception:
  191. return None
  192. def get_img_thumb(self, fnames, *, max_size: int = 64) -> str | None:
  193. """Return a small JPEG thumbnail (b64) for an image message."""
  194. fnames = [k for k in fnames if k]
  195. big_file, small_file = self._get_img_file(fnames)
  196. return (
  197. self._img_file_to_jpg_b64(small_file, max_size=max_size, quality=THUMB_JPEG_QUALITY)
  198. or self._img_file_to_jpg_b64(big_file, max_size=max_size, quality=THUMB_JPEG_QUALITY)
  199. )
  200. def get_video_thumb(self, videoid: str, *, max_size: int = 64) -> str | None:
  201. """Return a small JPEG thumbnail (b64) for a video message, if available."""
  202. if not videoid:
  203. return None
  204. video_thumbnail_file = os.path.join(self.video_dir, videoid + ".jpg")
  205. if not os.path.exists(video_thumbnail_file):
  206. return None
  207. return self._img_file_to_jpg_b64(video_thumbnail_file, max_size=max_size, quality=THUMB_JPEG_QUALITY)
  208. def get_emoji_by_md5(self, md5):
  209. """ Returns: (b64 encoded img string, format) """
  210. return self.emoji_reader.get_emoji(md5)
  211. def get_video(self, videoid) -> str | None:
  212. video_file = os.path.join(self.video_dir, videoid + ".mp4")
  213. video_thumbnail_file = os.path.join(self.video_dir, videoid + ".jpg")
  214. if os.path.exists(video_file):
  215. return video_file
  216. elif os.path.exists(video_thumbnail_file):
  217. return video_thumbnail_file
  218. return None