Bläddra i källkod

compatibility with an old database file

Yuxin Wu 5 år sedan
förälder
incheckning
aa9a1c9129
6 ändrade filer med 78 tillägg och 41 borttagningar
  1. 14 10
      wechat/audio.py
  2. 42 20
      wechat/avatar.py
  3. 1 1
      wechat/msgslice.py
  4. 19 9
      wechat/parser.py
  5. 1 0
      wechat/res.py
  6. 1 1
      wechat/static/TP_SPEAK.html

+ 14 - 10
wechat/audio.py

@@ -30,16 +30,20 @@ def do_parse_wechat_audio_file(file_name):
     with open(file_name, 'rb') as f:
         header = f.read(10)
     if b'AMR' in header:
-        raise NotImplementedError("AMR decoding not implemented because it seems deprecated since WeChat6.0+")
-        # The below is python2 only. It should be equivalent to using sox from command line?
-        import pysox
-        infile = pysox.CSoxStream(file_name)
-        outfile = pysox.CSoxStream(mp3_file, 'w', infile.get_signal())
-        chain = pysox.CEffectsChain(infile, outfile)
-        chain.flow_effects()
-        outfile.close()
-        signal = infile.get_signal().get_signalinfo()
-        duration = signal['length'] * 1.0 / signal['rate']
+        cmd = f"sox -e signed -c 1 {file_name} {mp3_file}"
+        subproc_succ(cmd)
+        cmd = f"soxi -D {mp3_file}"
+        duration = float(subproc_succ(cmd))
+
+        # The below is python2 only. It should be equivalent to using sox from command line
+        # import pysox
+        # infile = pysox.CSoxStream(file_name)
+        # outfile = pysox.CSoxStream(mp3_file, 'w', infile.get_signal())
+        # chain = pysox.CEffectsChain(infile, outfile)
+        # chain.flow_effects()
+        # outfile.close()
+        # signal = infile.get_signal().get_signalinfo()
+        # duration = signal['length'] * 1.0 / signal['rate']
     elif b'SILK' in header:
         raw_file = os.path.join('/tmp',
                                 os.path.basename(file_name)[:-4] + '.raw')

+ 42 - 20
wechat/avatar.py

@@ -1,3 +1,4 @@
+#!/usr/bin/env python3
 # -*- coding: UTF-8 -*-
 
 from PIL import Image
@@ -35,32 +36,36 @@ class AvatarReader(object):
         """ username: `username` field in db.rcontact"""
         if not self._use_avt:
             return None
+        username = ensure_unicode(username)
+        avtid = md5(username.encode('utf-8'))
+        dir1, dir2 = avtid[:2], avtid[2:4]
+        candidates = glob.glob(os.path.join(self.avt_dir, dir1, dir2, f"*{avtid}*"))
+        default_candidate = os.path.join(self.avt_dir, dir1, dir2, f"user_{avtid}.png")
+        candidates.append(default_candidate)
 
-        username = ensure_unicode(username).encode('utf-8')
-        filename = md5(username)
-        dir1, dir2 = filename[:2], filename[2:4]
-        filename = os.path.join(dir1, dir2,
-                                "user_{}.png".format(filename))
+        def priority(s):
+            if "_hd" in s and s.endswith(".png"):
+                return 10
+            else:
+                return 1
 
-        try:
+        candidates = sorted(set(candidates), key=priority, reverse=True)
+
+        for cand in candidates:
             try:
                 if self.avt_use_db:
-                    pos, size = self.query_index(filename)
+                    pos, size = self.query_index(cand)
                     return self.read_img(pos, size)
                 else:
-                    img_file = os.path.join(self.avt_dir, filename)
-                    if os.path.exists(img_file):
-                        return Image.open(img_file)
-                    else:
-                        return None
-            except TypeError:
-                logger.warning("Avatar for {} not found in avatar database.".format(username))
-                return None
-        except Exception as e:
-            raise
-            # logger.exception("Failed to retrieve avatar!")
-            # return None
-
+                    if os.path.exists(cand):
+                        if cand.endswith(".bm"):
+                            return self.read_bm_file(cand)
+                        else:
+                            return Image.open(cand)
+            except Exception:
+                logger.exception("HHH")
+                pass
+        logger.warning("Avatar for {} not found in avatar database.".format(username))
 
     def read_img(self, pos, size):
         file_idx = pos >> 32
@@ -78,8 +83,25 @@ class AvatarReader(object):
             logger.warn("Cannot read avatar from {}: {}".format(fname, str(e)))
             return None
 
+    def read_bm_file(self, fname):
+        # history at https://github.com/ppwwyyxx/wechat-dump/pull/14
+        with open(fname, 'rb') as f:
+            # filesize is 36880=96x96x4+16
+            size = (96, 96, 3)
+            img = np.zeros(size, dtype='uint8')
+            for i in range(96):
+                for j in range(96):
+                    r, g, b, a = f.read(4)
+                    img[i,j] = (r, g, b)
+            return Image.fromarray(img, mode="RGB")
+
     def query_index(self, filename):
         conn = sqlite3.connect(self.avt_db)
         cursor = conn.execute("select Offset,Size from Index_avatar where FileName='{}'".format(filename))
         pos, size = cursor.fetchone()
         return pos, size
+
+if __name__ == '__main__':
+    import sys
+    r = AvatarReader(sys.argv[1], sys.argv[2])
+    print(r.get_avatar(sys.argv[3]))

+ 1 - 1
wechat/msgslice.py

@@ -32,7 +32,7 @@ class MessageSlicerBySize(object):
     """ Separate messages into slices by max slice size,
         to avoid too large html.
     """
-    def __init__(self, size=1000):
+    def __init__(self, size=2000):
         """ a slice will have <= 1.5 * cnt messages"""
         self.size = size
         assert self.size > 1

+ 19 - 9
wechat/parser.py

@@ -101,15 +101,25 @@ SELECT {} FROM message
             self.emoji_groups[md5] = group
 
         NEEDED_EMOJI_CATALOG = [49, 50, 17]
-        emojiinfo_q = self.cc.execute(
-""" SELECT md5, catalog, name, cdnUrl FROM EmojiInfo""")
-        for row in emojiinfo_q:
-            md5, catalog, name, cdnUrl = row
-            if cdnUrl:
-                self.emoji_url[md5] = cdnUrl
-            if catalog not in NEEDED_EMOJI_CATALOG:
-                continue
-            self.internal_emojis[md5] = name
+        try:
+            emojiinfo_q = self.cc.execute(
+    """ SELECT md5, catalog, name, cdnUrl FROM EmojiInfo""")
+        except: # old database does not have cdnurl
+            emojiinfo_q = self.cc.execute(
+    """ SELECT md5, catalog, name FROM EmojiInfo""")
+            for row in emojiinfo_q:
+                md5, catalog, name = row
+                if catalog not in NEEDED_EMOJI_CATALOG:
+                    continue
+                self.internal_emojis[md5] = name
+        else:
+            for row in emojiinfo_q:
+                md5, catalog, name, cdnUrl = row
+                if cdnUrl:
+                    self.emoji_url[md5] = cdnUrl
+                if catalog not in NEEDED_EMOJI_CATALOG:
+                    continue
+                self.internal_emojis[md5] = name
 
 
     def _parse(self):

+ 1 - 0
wechat/res.py

@@ -121,6 +121,7 @@ class Resource(object):
         """ return base64 unicode string"""
         im = self.avt_reader.get_avatar(username)
         if im is None:
+            logger.warning(f"Avatar for {username} is missing.")
             return ""
         buf = io.BytesIO()
         try:

+ 1 - 1
wechat/static/TP_SPEAK.html

@@ -4,7 +4,7 @@
     <div class="cloud cloudVoice" onclick="playVoice(event)" style="width:80px">
       <div class="cloudPannel" title="{time}" {nickname}>
         <div class="sendStatus">
-          <span class="second">{voice_duration}"</span>
+          <span class="second">{voice_duration:.1f}"</span>
         </div>
         <div class="cloudBody">
           <div class="cloudContent">