Преглед на файлове

request emoji from cdnUrl

Yuxin Wu преди 9 години
родител
ревизия
1344589d8d
променени са 5 файла, в които са добавени 87 реда и са изтрити 34 реда
  1. 1 1
      dump-html.py
  2. 1 9
      wechat/libchathelper.py
  3. 12 8
      wechat/parser.py
  4. 4 11
      wechat/render.py
  5. 69 5
      wechat/res.py

+ 1 - 1
dump-html.py

@@ -29,7 +29,7 @@ if __name__ == '__main__':
     output_file = args.output
 
     parser = WeChatDBParser(args.db)
-    res = Resource(args.res, args.avt)
+    res = Resource(parser, args.res, args.avt)
 
     if name and name in parser.msgs_by_chat:
         msgs = parser.msgs_by_chat[name]

+ 1 - 9
wechat/libchathelper.py

@@ -46,15 +46,7 @@ class LibChatHelper(object):
             return img, 'jpeg'
         elif msg.type == TYPE_EMOJI:
             md5 = msg.imgPath
-            if md5 in self.parser.internal_emojis:
-                emoji_img, format = self.res.get_internal_emoji(
-                    self.parser.internal_emojis[md5])
-            else:
-                if md5 in self.parser.emojis:
-                    group, _ = self.parser.emojis[md5]
-                else:
-                    group = None
-                emoji_img, format = self.res.get_emoji(md5, group)
+            emoji_img, format = self.res.get_emoji_by_md5(md5)
             return emoji_img, format
         elif msg.type == TYPE_CUSTOM_EMOJI:
             pq = PyQuery(msg.content)

+ 12 - 8
wechat/parser.py

@@ -33,7 +33,8 @@ class WeChatDBParser(object):
         self.cc = self.db_conn.cursor()
         self.contacts = {}
         self.msgs_by_chat = defaultdict(list)
-        self.emojis = {}
+        self.emoji_groups = {}
+        self.emoji_url = {}
         self.internal_emojis = {}
         self._parse()
 
@@ -59,7 +60,7 @@ SELECT username,conRemark,nickname FROM rcontact
 SELECT {} FROM message
 """.format(','.join(WeChatDBParser.FIELDS)))
         for row in db_msgs:
-            values = self._parse_row(row)
+            values = self._parse_msg_row(row)
             if not values:
                 continue
             msg = WeChatMsg(values)
@@ -96,16 +97,18 @@ SELECT {} FROM message
     def _parse_emoji(self):
         # wechat provided emojis
         emojiinfo_q = self.cc.execute(
-""" SELECT md5, desc, groupid FROM EmojiInfoDesc """)
+""" SELECT md5, groupid FROM EmojiInfoDesc """)
         for row in emojiinfo_q:
-            md5, desc, group = row
-            self.emojis[md5] = (group, desc)
+            md5, group = row
+            self.emoji_groups[md5] = group
 
         NEEDED_EMOJI_CATALOG = [49, 50, 17]
         emojiinfo_q = self.cc.execute(
-""" SELECT md5, catalog, name FROM EmojiInfo WHERE name <> ''""")
+""" SELECT md5, catalog, name, cdnUrl FROM EmojiInfo""")
         for row in emojiinfo_q:
-            md5, catalog, name = row
+            md5, catalog, name, cdnUrl = row
+            if cdnUrl:
+                self.emoji_url[md5] = cdnUrl
             if catalog not in NEEDED_EMOJI_CATALOG:
                 continue
             self.internal_emojis[md5] = name
@@ -119,7 +122,8 @@ SELECT {} FROM message
         self._parse_emoji()
 
     # process the values in a row
-    def _parse_row(self, row):
+    def _parse_msg_row(self, row):
+        """ parse a record of message into my format"""
         values = dict(zip(WeChatDBParser.FIELDS, row))
         if values['content']:
             values['content'] = ensure_unicode(values['content'])

+ 4 - 11
wechat/render.py

@@ -44,8 +44,8 @@ class HTMLRender(object):
         self.time_html = open(TIME_HTML_FILE).read()
         self.parser = parser
         self.res = res
-        if self.res is None:
-            logger.warn("Resource Directory not given. Images / Voice Message won't be displayed.")
+        assert self.res is not None, \
+            "Resource Directory not given. Cannot render HTML."
         self.smiley = SmileyProvider()
 
         css_files = glob.glob(os.path.join(LIB_PATH, 'static/*.css'))
@@ -124,14 +124,7 @@ class HTMLRender(object):
             return template.format(**format_dict)
         elif msg.type == TYPE_EMOJI:
             md5 = msg.imgPath
-            if md5 in self.parser.internal_emojis:
-                emoji_img, format = self.res.get_internal_emoji(self.parser.internal_emojis[md5])
-            else:
-                if md5 in self.parser.emojis:
-                    group, _ = self.parser.emojis[md5]
-                else:
-                    group = None
-                emoji_img, format = self.res.get_emoji(md5, group)
+            emoji_img, format = self.res.get_emoji_by_md5(md5)
             format_dict['emoji_format'] = format
             format_dict['emoji_img'] = emoji_img
             return template.format(**format_dict)
@@ -184,7 +177,7 @@ class HTMLRender(object):
         css = avatar_tpl.format(name='me', avatar=my_avatar)
 
         for talker in talkers:
-            avatar = self.res.get_avatar(self.parser.contacts_rev[talker])
+            avatar = self.res.get_contact_avatar(talker)
             css += avatar_tpl.format(name=talker, avatar=avatar)
         self.css_string.append(css)
 

+ 69 - 5
wechat/res.py

@@ -15,7 +15,9 @@ import logging
 logger = logging.getLogger(__name__)
 import imghdr
 from multiprocessing import Pool
-
+import atexit
+import cPickle as pickle
+import requests
 
 from .avatar import AvatarReader
 from common.textutil import md5, get_file_b64
@@ -32,15 +34,47 @@ AVATAR_DIRNAME = 'sfs'
 
 JPEG_QUALITY = 50
 
+class EmojiCache(object):
+    def __init__(self, fname):
+        self.fname = fname
+        if os.path.isfile(fname):
+            self.dic = pickle.load(open(fname))
+        else:
+            self.dic = {}
+
+    def query(self, md5):
+        return self.dic.get(md5, (None, None))
+
+    def fetch(self, md5, url):
+        try:
+            logger.info("Requesting emoji {} from {} ...".format(md5, url))
+            r = requests.get(url).content
+            im = Image.open(cStringIO.StringIO(r))
+            format = im.format.lower()
+            ret = (base64.b64encode(r), format)
+            self.dic[md5] = ret
+            self.flush()
+            return ret
+        except Exception as e:
+            logger.exception("Error processing emoji from {}".format(url))
+            return None, None
+
+    def flush(self):
+        with open(self.fname, 'wb') as f:
+            pickle.dump(self.dic, f)
+
 class Resource(object):
     """ multimedia resources in chat"""
-    def __init__(self, res_dir, avt_db):
+    def __init__(self, parser, res_dir, avt_db,
+            emoji_cache_file='emoji.cache'):
         def check(subdir):
             assert os.path.isdir(os.path.join(res_dir, subdir)), \
                     "No such directory: {}".format(subdir)
         [check(k) for k in ['', AVATAR_DIRNAME, IMG_DIRNAME, EMOJI_DIRNAME, VOICE_DIRNAME]]
 
+        self.emoji_cache = EmojiCache(emoji_cache_file)
         self.res_dir = res_dir
+        self.parser = parser
         self.voice_cache_idx = {}
         self.img_dir = os.path.join(res_dir, IMG_DIRNAME)
         self.voice_dir = os.path.join(res_dir, VOICE_DIRNAME)
@@ -71,6 +105,7 @@ class Resource(object):
         voice_paths = [msg.imgPath for msg in msgs if msg.type == TYPE_SPEAK]
         self.voice_cache_idx = {k: idx for idx, k in enumerate(voice_paths)}
         pool = Pool(3)
+        atexit.register(lambda x: x.terminate(), pool)
         self.voice_cache = [pool.apply_async(parse_wechat_audio_file,
                                              (self.get_voice_filename(k),)) for k in voice_paths]
 # single-threaded version, for debug
@@ -87,6 +122,9 @@ class Resource(object):
         jpeg_str = buf.getvalue()
         return base64.b64encode(jpeg_str)
 
+    def get_contact_avatar(self, nickname):
+        return self.get_avatar(self.parser.contacts_rev[nickname])
+
     def _get_img_file(self, fnames):
         """ fnames: a list of filename to search for
             return (filename, filename) of (big, small) image.
@@ -128,7 +166,10 @@ class Resource(object):
 
 
     def get_img(self, fnames):
-        """ return two base64 jpg string"""
+        """
+        :params fnames: possible file paths
+        :returns: two base64 jpg string
+        """
         fnames = [k for k in fnames if k]   # filter out empty string
         big_file, small_file = self._get_img_file(fnames)
 
@@ -147,7 +188,7 @@ class Resource(object):
             return big_file
         return get_jpg_b64(small_file)
 
-    def get_emoji(self, md5, pack_id):
+    def _get_res_emoji(self, md5, pack_id):
         path = self.emoji_dir
         if pack_id:
             path = os.path.join(path, pack_id)
@@ -170,8 +211,31 @@ class Resource(object):
             return get_file_b64(f), imghdr.what(f)
         return None, None
 
-    def get_internal_emoji(self, fname):
+    def _get_internal_emoji(self, fname):
         f = os.path.join(INTERNAL_EMOJI_DIR, fname)
         return get_file_b64(f), imghdr.what(f)
 
+    def get_emoji_by_md5(self, md5):
+        """ :returns: (b64 img, format)"""
+        if md5 in self.parser.internal_emojis:
+            emoji_img, format = self._get_internal_emoji(self.parser.internal_emojis[md5])
+            logger.warn("Cannot get emoji {}".format(md5))
+            return None, None
+        else:
+            img, format = self.emoji_cache.query(md5)
+            if format:
+                return img, format
+            group = self.parser.emoji_groups.get(md5, None)
+            emoji_img, format = self._get_res_emoji(md5, group)
+            if format:
+                return emoji_img, format
+            url = self.parser.emoji_url.get(md5, None)
+            if url:
+                emoji_img, format = self.emoji_cache.fetch(md5, url)
+                if format:
+                    return emoji_img, format
+
+            logger.warn("Cannot get emoji {} in {}".format(md5, group))
+            return None, None
+