|
|
@@ -4,7 +4,6 @@ import glob
|
|
|
import os
|
|
|
import re
|
|
|
from PIL import Image
|
|
|
-import tempfile
|
|
|
import io
|
|
|
import base64
|
|
|
import logging
|
|
|
@@ -12,18 +11,15 @@ logger = logging.getLogger(__name__)
|
|
|
import imghdr
|
|
|
from multiprocessing import Pool
|
|
|
import atexit
|
|
|
-import pickle
|
|
|
-import requests
|
|
|
|
|
|
+from .emoji import EmojiReader
|
|
|
from .avatar import AvatarReader
|
|
|
-from .common.textutil import md5 as get_md5_hex, get_file_b64, get_file_md5
|
|
|
-from .common.procutil import subproc_succ
|
|
|
+from .common.textutil import md5 as get_md5_hex, get_file_b64
|
|
|
from .common.timer import timing
|
|
|
from .msg import TYPE_SPEAK
|
|
|
from .audio import parse_wechat_audio_file
|
|
|
|
|
|
LIB_PATH = os.path.dirname(os.path.abspath(__file__))
|
|
|
-INTERNAL_EMOJI_DIR = os.path.join(LIB_PATH, 'static', 'internal_emoji')
|
|
|
VOICE_DIRNAME = 'voice2'
|
|
|
IMG_DIRNAME = 'image2'
|
|
|
EMOJI_DIRNAME = 'emoji'
|
|
|
@@ -31,73 +27,6 @@ VIDEO_DIRNAME = 'video'
|
|
|
|
|
|
JPEG_QUALITY = 50
|
|
|
|
|
|
-class EmojiCache(object):
|
|
|
- def __init__(self, fname):
|
|
|
- self.fname = fname
|
|
|
- if os.path.isfile(fname):
|
|
|
- with open(fname, 'rb') as f:
|
|
|
- self.dic = pickle.load(f)
|
|
|
- else:
|
|
|
- self.dic = {}
|
|
|
-
|
|
|
- self._curr_size = len(self.dic)
|
|
|
-
|
|
|
- def query(self, md5):
|
|
|
- data, format = self.dic.get(md5, (None, None))
|
|
|
- if data is not None and not isinstance(data, str):
|
|
|
- data = data.decode('ascii')
|
|
|
- return data, format
|
|
|
-
|
|
|
- def fetch(self, md5, urls):
|
|
|
- cdnurl, encrypturl, aeskey = urls
|
|
|
- ret = None
|
|
|
- if cdnurl:
|
|
|
- try:
|
|
|
- logger.info("Requesting emoji {} from {} ...".format(md5, cdnurl))
|
|
|
- r = requests.get(cdnurl).content
|
|
|
- emoji_md5 = get_md5_hex(r)
|
|
|
- im = Image.open(io.BytesIO(r))
|
|
|
- ret = (base64.b64encode(r).decode('ascii'), im.format.lower())
|
|
|
- if emoji_md5 == md5:
|
|
|
- self.add(md5, ret)
|
|
|
- return ret
|
|
|
- else:
|
|
|
- raise ValueError("Emoji MD5 from CDNURL does not match")
|
|
|
- except Exception:
|
|
|
- logger.exception("Error processing cdnurl {}".format(cdnurl))
|
|
|
-
|
|
|
- if encrypturl:
|
|
|
- try:
|
|
|
- logger.info("Requesting encrypted emoji {} from {} ...".format(md5, encrypturl))
|
|
|
- buf = requests.get(encrypturl).content
|
|
|
- with tempfile.TemporaryDirectory(prefix="wechat_dump_download") as d:
|
|
|
- fname = os.path.join(d, md5)
|
|
|
- with open(fname, 'wb') as f:
|
|
|
- f.write(buf)
|
|
|
- cmd = f"openssl enc -d -aes-128-cbc -in {fname} -K {aeskey} -iv {aeskey}"
|
|
|
- decrypted_buf = subproc_succ(cmd)
|
|
|
- im = Image.open(io.BytesIO(decrypted_buf))
|
|
|
- ret = (base64.b64encode(decrypted_buf).decode('ascii'), im.format.lower())
|
|
|
- self.add(md5, ret)
|
|
|
- return ret
|
|
|
- except Exception:
|
|
|
- logger.exception("Error processing encrypturl {}".format(encrypturl))
|
|
|
- if ret is not None:
|
|
|
- # ret may become something with wrong md5. Try it anyway, but don't cache.
|
|
|
- return ret
|
|
|
- return None, None
|
|
|
-
|
|
|
- def add(self, md5, values):
|
|
|
- self.dic[md5] = values
|
|
|
- if len(self.dic) >= self._curr_size + 10:
|
|
|
- self.flush()
|
|
|
-
|
|
|
- def flush(self):
|
|
|
- if len(self.dic) > self._curr_size:
|
|
|
- self._curr_size = len(self.dic)
|
|
|
- with open(self.fname, 'wb') as f:
|
|
|
- pickle.dump(self.dic, f)
|
|
|
-
|
|
|
class Resource(object):
|
|
|
""" multimedia resources in chat"""
|
|
|
def __init__(self, parser, res_dir, avt_db):
|
|
|
@@ -106,19 +35,16 @@ class Resource(object):
|
|
|
assert os.path.isdir(dir_to_check), f"No such directory: {dir_to_check}"
|
|
|
[check(k) for k in ['', IMG_DIRNAME, EMOJI_DIRNAME, VOICE_DIRNAME]]
|
|
|
|
|
|
- self.emoji_cache = EmojiCache(
|
|
|
- os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
|
|
- '..', 'emoji.cache'))
|
|
|
self.res_dir = res_dir
|
|
|
self.parser = parser
|
|
|
self.voice_cache_idx = {}
|
|
|
self.img_dir = os.path.join(res_dir, IMG_DIRNAME)
|
|
|
self.voice_dir = os.path.join(res_dir, VOICE_DIRNAME)
|
|
|
- self.emoji_dir = os.path.join(res_dir, EMOJI_DIRNAME)
|
|
|
self.video_dir = os.path.join(res_dir, VIDEO_DIRNAME)
|
|
|
self.avt_reader = AvatarReader(res_dir, avt_db)
|
|
|
+ self.emoji_reader = EmojiReader(res_dir, self.parser)
|
|
|
|
|
|
- def get_voice_filename(self, imgpath):
|
|
|
+ def _get_voice_filename(self, imgpath):
|
|
|
fname = get_md5_hex(imgpath.encode('ascii'))
|
|
|
dir1, dir2 = fname[:2], fname[2:4]
|
|
|
ret = os.path.join(self.voice_dir, dir1, dir2,
|
|
|
@@ -133,7 +59,7 @@ class Resource(object):
|
|
|
idx = self.voice_cache_idx.get(imgpath)
|
|
|
if idx is None:
|
|
|
return parse_wechat_audio_file(
|
|
|
- self.get_voice_filename(imgpath))
|
|
|
+ self._get_voice_filename(imgpath))
|
|
|
return self.voice_cache[idx].get()
|
|
|
|
|
|
def cache_voice_mp3(self, msgs):
|
|
|
@@ -145,7 +71,7 @@ class Resource(object):
|
|
|
pool = Pool(3)
|
|
|
atexit.register(lambda x: x.terminate(), pool)
|
|
|
self.voice_cache = [pool.apply_async(parse_wechat_audio_file,
|
|
|
- (self.get_voice_filename(k),)) for k in voice_paths]
|
|
|
+ (self._get_voice_filename(k),)) for k in voice_paths]
|
|
|
|
|
|
def get_avatar(self, username):
|
|
|
""" return base64 unicode string"""
|
|
|
@@ -228,65 +154,9 @@ class Resource(object):
|
|
|
return big_file
|
|
|
return get_jpg_b64(small_file)
|
|
|
|
|
|
- def _get_res_emoji(self, md5, pack_id, fallback=False):
|
|
|
- """
|
|
|
- pack_id: can be None
|
|
|
- fallback:
|
|
|
- 1. allow cover/thumb that are non-animated.
|
|
|
- 2. allow file to have mismatch md5 (often non-animated cover as well)
|
|
|
- """
|
|
|
- path = os.path.join(self.emoji_dir, pack_id or '')
|
|
|
- candidates = glob.glob(os.path.join(path, '{}*'.format(md5)))
|
|
|
- candidates = [k for k in candidates if not re.match('.*_[0-9]+$', k)]
|
|
|
- candidates = [k for k in candidates if (fallback or (not k.endswith('_cover') and not k.endswith('_thumb')))]
|
|
|
-
|
|
|
- for cand in candidates:
|
|
|
- if imghdr.what(cand): # does not recognize
|
|
|
- if not fallback:
|
|
|
- emoji_md5 = get_file_md5(cand)
|
|
|
- if emoji_md5 != md5:
|
|
|
- continue
|
|
|
- return get_file_b64(cand), imghdr.what(cand)
|
|
|
- return None, None
|
|
|
-
|
|
|
- def _get_internal_emoji(self, fname):
|
|
|
- f = os.path.join(INTERNAL_EMOJI_DIR, fname)
|
|
|
- return get_file_b64(f), imghdr.what(f)
|
|
|
-
|
|
|
def get_emoji_by_md5(self, md5):
|
|
|
- """ :returns: (b64 unicode img, format)"""
|
|
|
- assert md5, md5
|
|
|
- if md5 in self.parser.internal_emojis:
|
|
|
- emoji_img, format = self._get_internal_emoji(self.parser.internal_emojis[md5])
|
|
|
- return emoji_img, format
|
|
|
- else:
|
|
|
- # check cache
|
|
|
- img, format = self.emoji_cache.query(md5)
|
|
|
- if format:
|
|
|
- return img, format
|
|
|
-
|
|
|
- # check resource/emoji/ dir
|
|
|
- group = self.parser.emoji_groups.get(md5, None)
|
|
|
- emoji_img, format = self._get_res_emoji(md5, group)
|
|
|
- if format:
|
|
|
- return emoji_img, format
|
|
|
-
|
|
|
- # check url
|
|
|
- urls = self.parser.emoji_url.get(md5, None)
|
|
|
- if urls:
|
|
|
- emoji_img, format = self.emoji_cache.fetch(md5, urls)
|
|
|
- if format:
|
|
|
- return emoji_img, format
|
|
|
-
|
|
|
- # check resource/emoji dir again, with fallback
|
|
|
- emoji_img, format = self._get_res_emoji(md5, group, fallback=True)
|
|
|
- if format:
|
|
|
- logger.info(f"Using fallback for emoji {md5}")
|
|
|
- return emoji_img, format
|
|
|
-
|
|
|
- # TODO: first 1k in emoji is encrypted
|
|
|
- logger.warning("Cannot get emoji {} in group {}".format(md5, group))
|
|
|
- return None, None
|
|
|
+ """ Returns: (b64 encoded img string, format) """
|
|
|
+ return self.emoji_reader.get_emoji(md5)
|
|
|
|
|
|
def get_video(self, videoid):
|
|
|
video_file = os.path.join(self.video_dir, videoid + ".mp4")
|