Selaa lähdekoodia

support downloading avatar

Yuxin Wu 1 vuosi sitten
vanhempi
sitoutus
eedaa9c9e0
5 muutettua tiedostoa jossa 50 lisäystä ja 5 poistoa
  1. 6 0
      dump-html.py
  2. 11 2
      wechat/avatar.py
  3. 6 0
      wechat/msg.py
  4. 11 0
      wechat/parser.py
  5. 16 3
      wechat/res.py

+ 6 - 0
dump-html.py

@@ -3,6 +3,7 @@ import os
 import sys
 import argparse
 import logging
+from datetime import datetime
 
 from wechat.parser import WeChatDBParser
 from wechat.res import Resource
@@ -19,6 +20,8 @@ def get_args():
     parser.add_argument('--res', default='resource', help='the resource directory')
     parser.add_argument('--wxgf-server', help='address of the wxgf image decoder server')
     parser.add_argument('--avt', default='avatar.index', help='path to avatar.index file that only exists in old version of wechat. Ignore for new version of wechat.')
+    parser.add_argument('--start', help='start time in format of YYYY-MM-DD HH:MM:SS',
+                        type=datetime.fromisoformat)
     args = parser.parse_args()
     return args
 
@@ -43,6 +46,9 @@ if __name__ == '__main__':
     msgs = parser.msgs_by_chat[chatid]
     logger.info(f"Number of Messages for chatid {chatid}: {len(msgs)}")
     assert len(msgs) > 0
+    if args.start is not None:
+        msgs = [msg for msg in msgs if msg.createTime > args.start]
+        logger.info(f"Number of Messages after {args.start}: {len(msgs)}")
 
     render = HTMLRender(parser, res)
     htmls = render.render_msgs(msgs)

+ 11 - 2
wechat/avatar.py

@@ -49,7 +49,7 @@ class AvatarReader(object):
         except Exception:
             pass
 
-    def get_avatar_from_avtdir(self, avtid):
+    def get_avatar_from_avtdir(self, avtid) -> Image.Image | None:
         dir1, dir2 = avtid[:2], avtid[2:4]
         candidates = glob.glob(os.path.join(self.avt_dir, dir1, dir2, f"*{avtid}*"))
         candidates = sorted(set(candidates), key=_filename_priority, reverse=True)
@@ -83,7 +83,16 @@ class AvatarReader(object):
             ret = self.get_avatar_from_avtdir(avtid)
             if ret is not None:
                 return ret
-        logger.warning("Avatar for {} not found anywhere.".format(username))
+        logger.warning("Avatar file for {} not found.".format(username))
+
+    def save_avatar_to_avtdir(self, username: str, im: Image.Image):
+        """Save a downloaded avatar to avtdir so it can be reused next time."""
+        avtid = md5(username.encode('utf-8'))
+        dir1, dir2 = avtid[:2], avtid[2:4]
+        fname = os.path.join(self.avt_dir, dir1, dir2, f"user_{avtid}.png")
+        os.makedirs(os.path.dirname(fname), exist_ok=True)
+        logger.info(f"Caching downloaded avatar for {username} to {fname}.")
+        im.save(fname, 'PNG')
 
     def read_img_from_block(self, filename, pos, size):
         file_idx = pos >> 32

+ 6 - 0
wechat/msg.py

@@ -155,6 +155,12 @@ class WeChatMsg(object):
         else:
             return ret
 
+    def __eq__(self, r):
+        return self.createTime == r.createTime and \
+                self.talker == r.talker and \
+                self.isSend == r.isSend
+        # imgPath might change after migration.
+
     def __lt__(self, r):
         return self.createTime < r.createTime
 

+ 11 - 0
wechat/parser.py

@@ -16,6 +16,7 @@ addr_upload2
 chatroom
 message
 rcontact
+img_flag
 """
 
 class WeChatDBParser(object):
@@ -36,6 +37,7 @@ class WeChatDBParser(object):
         self.emoji_groups = {}
         self.emoji_info = {}
         self.emoji_encryption_key = None
+        self.avatar_urls = {}
         self._parse()
 
     def _parse_contact(self):
@@ -126,6 +128,14 @@ SELECT {} FROM message
                 if cdnUrl or encrypturl:
                     self.emoji_info[md5] = (catalog, cdnUrl, encrypturl, aeskey)
 
+    def _parse_img_flag(self):
+        """Parse the img_flag table which stores avatar for each id."""
+        query = self.cc.execute(
+""" SELECT username, reserved1 FROM img_flag """)
+        for row in query:
+            username, url = row
+            if url:
+                self.avatar_urls[username] = url
 
     def _parse(self):
         self._parse_contact()
@@ -133,6 +143,7 @@ SELECT {} FROM message
         self._parse_msg()
         self._parse_imginfo()
         self._parse_emoji()
+        self._parse_img_flag()
 
     def get_emoji_encryption_key(self):
         # obtain local encryption key in a special entry in the database

+ 16 - 3
wechat/res.py

@@ -2,6 +2,7 @@
 
 import os
 from PIL import Image
+import requests
 import time
 import io
 import base64
@@ -83,12 +84,24 @@ class Resource(object):
         self.voice_cache = [pool.apply_async(parse_wechat_audio_file,
                                              (self._get_voice_filename(k),)) for k in voice_paths]
 
-    def get_avatar(self, username):
+    def get_avatar(self, username) -> str:
         """ return base64 unicode string"""
         im = self.avt_reader.get_avatar(username)
         if im is None:
-            logger.warning(f"Cannot find avatar for {username}.")
-            return ""
+            # Try downloading the avatar directly.
+            avatar_url = self.parser.avatar_urls.get(username)
+            if avatar_url is None:
+                return ""
+            logger.info(f"Requesting avatar of {username} from {avatar_url} ...")
+            try:
+                r = requests.get(avatar_url).content
+                im = Image.open(io.BytesIO(r))
+            except Exception:
+                logger.exception(f"Failed to fetch avatar of {username}.")
+                return ""
+            else:
+                self.avt_reader.save_avatar_to_avtdir(username, im)
+
         buf = io.BytesIO()
         try:
             im.save(buf, 'JPEG', quality=JPEG_QUALITY)