Selaa lähdekoodia

improve rendering of various types

Yuxin Wu 1 vuosi sitten
vanhempi
sitoutus
c782325cfa
7 muutettua tiedostoa jossa 85 lisäystä ja 27 poistoa
  1. 2 6
      WXGFDecoder/.gitignore
  2. 1 1
      wechat/__init__.py
  3. 20 5
      wechat/msg.py
  4. 13 4
      wechat/parser.py
  5. 28 10
      wechat/render.py
  6. 18 0
      wechat/static/TP_QQMUSIC.html
  7. 3 1
      wechat/static/wx.css

+ 2 - 6
WXGFDecoder/.gitignore

@@ -1,12 +1,8 @@
 *.iml
 .gradle
 local.properties
-.idea/caches
-.idea/libraries
-.idea/modules.xml
-.idea/workspace.xml
-.idea/navEditor.xml
-.idea/assetWizardSettings.xml
+.idea
+app/release
 .DS_Store
 build
 captures

+ 1 - 1
wechat/__init__.py

@@ -21,7 +21,7 @@ def set_level_color(lvl, color):
     logger.addHandler(handler)
 
 set_level_color(logging.INFO, '\033[1;32m')
-set_level_color(logging.WARN, '\033[1;31m')
+set_level_color(logging.WARN, '\033[1;33m')
 set_level_color(logging.ERROR, '\033[1;31m')
 
 if __name__ == '__main__':

+ 20 - 5
wechat/msg.py

@@ -15,12 +15,16 @@ TYPE_REDENVELOPE = 436207665
 TYPE_MONEY_TRANSFER = 419430449  # 微信转账
 TYPE_LOCATION_SHARING = -1879048186
 TYPE_REPLY = 822083633  # 回复的消息.
+TYPE_FILE = 1090519089
+TYPE_QQMUSIC = 1040187441
 TYPE_APP_MSG = 16777265
 
-_KNOWN_TYPES = [eval(k) for k in dir() if k.startswith('TYPE_')]
+_KNOWN_TYPES = tuple([eval(k) for k in dir() if k.startswith('TYPE_')])
 
 import re
+import json
 import io
+import html
 from pyquery import PyQuery
 import xml.etree.ElementTree as ET
 import logging
@@ -40,10 +44,7 @@ class WeChatMsg(object):
     def __init__(self, values):
         for k, v in values.items():
             setattr(self, k, v)
-        if self.type not in _KNOWN_TYPES:
-            logger.warn("Unhandled message type: {}".format(self.type))
-            # only to supress repeated warning:
-            _KNOWN_TYPES.append(self.type)
+        self.known_type = self.type in _KNOWN_TYPES
 
     def msg_str(self):
         if self.type == TYPE_LOCATION:
@@ -119,6 +120,20 @@ class WeChatMsg(object):
             msg = titles[0].text
             # TODO parse reply.
             return msg
+        elif self.type == TYPE_FILE:
+            pq = PyQuery(self.content_xml_ready)
+            titles = pq('title')
+            if len(titles) == 0:
+                return self.content_xml_ready
+            return "FILE:" + titles[0].text
+        elif self.type == TYPE_QQMUSIC:
+            pq = PyQuery(self.content_xml_ready)
+            title = pq('title')[0].text
+            singer = pq('des')[0].text
+            url = html.unescape(pq('url')[0].text)
+            return json.dumps(dict(
+                title=title, singer=singer, url=url
+            ))
         else:
             # TODO replace smiley with text
             return self.content

+ 13 - 4
wechat/parser.py

@@ -1,7 +1,7 @@
 # -*- coding: UTF-8 -*-
 
 import sqlite3
-from collections import defaultdict
+from collections import defaultdict, Counter
 import itertools
 from datetime import datetime
 import logging
@@ -61,6 +61,7 @@ SELECT username,conRemark,nickname FROM rcontact
 """
 SELECT {} FROM message
 """.format(','.join(WeChatDBParser.FIELDS)))
+        unknown_type_cnt = Counter()
         for row in db_msgs:
             values = self._parse_msg_row(row)
             if not values:
@@ -69,6 +70,9 @@ SELECT {} FROM message
             # TODO keep system message?
             if not WeChatMsg.filter_type(msg.type):
                 self.msgs_by_chat[msg.chat].append(msg)
+            if not msg.known_type:
+                unknown_type_cnt[msg.type] += 1
+        logger.warning("[Parser] Unhandled messages (type->cnt): {}".format(unknown_type_cnt))
 
         for k, v in self.msgs_by_chat.items():
             self.msgs_by_chat[k] = sorted(v, key=lambda x: x.createTime)
@@ -79,9 +83,14 @@ SELECT {} FROM message
         userinfo_q = self.cc.execute(""" SELECT id, value FROM userinfo """)
         userinfo = dict(userinfo_q)
         self.username = userinfo.get(2, None)
+        if self.username is None:
+            nickname = userinfo.get(4, None)
+            if nickname is not None:
+                self.username = self.contacts_rev.get(nickname, [None])[0]
         if self.username is None:
             logger.error("Cannot find username in userinfo table!")
             self.username = input("Please enter your username:")
+        assert isinstance(self.username, str), self.username
         logger.info("Your username is: {}".format(self.username))
 
     def _parse_imginfo(self):
@@ -120,8 +129,8 @@ SELECT {} FROM message
 
 
     def _parse(self):
-        self._parse_userinfo()
         self._parse_contact()
+        self._parse_userinfo()  # depend on self.contacts
         self._parse_msg()
         self._parse_imginfo()
         self._parse_emoji()
@@ -140,16 +149,16 @@ SELECT {} FROM message
     def _parse_msg_row(self, row):
         """ parse a record of message into my format"""
         values = dict(zip(WeChatDBParser.FIELDS, row))
+        values['createTime'] = datetime.fromtimestamp(values['createTime']/ 1000)
         try:
             values['content'].decode()
         except:
-            logger.warning(f"Invalid byte sequence in message content (type={values['type']})")
+            logger.warning(f"Invalid byte sequence in message content (type={values['type']}, createTime={values['createTime']})")
             values['content'] = 'FAILED TO DECODE'
         if values['content']:
             values['content'] = ensure_unicode(values['content'])
         else:
             values['content'] = ''
-        values['createTime'] = datetime.fromtimestamp(values['createTime']/ 1000)
 
         values['talker'] = values['talker'].decode()
         if values['imgPath']:

+ 28 - 10
wechat/render.py

@@ -2,7 +2,7 @@
 # -*- coding: UTF-8 -*-
 
 import os
-import base64
+from collections import Counter
 import glob
 from pyquery import PyQuery
 import logging
@@ -32,7 +32,8 @@ TEMPLATES_FILES = {TYPE_MSG: "TP_MSG",
                    TYPE_EMOJI: "TP_EMOJI",
                    TYPE_CUSTOM_EMOJI: "TP_EMOJI",
                    TYPE_LINK: "TP_MSG",
-                   TYPE_VIDEO_FILE: "TP_VIDEO_FILE"
+                   TYPE_VIDEO_FILE: "TP_VIDEO_FILE",
+                   TYPE_QQMUSIC: "TP_QQMUSIC",
                   }
 TEMPLATES = {
     k: open(os.path.join(STATIC_PATH, '{}.html'.format(v))).read()
@@ -65,6 +66,8 @@ class HTMLRender(object):
             js = ensure_unicode(open(js).read())
             self.js_string.append(js)
 
+        self.unknown_type_cnt = Counter()
+
     @property
     def all_css(self):
         # call after processing all messages,
@@ -90,12 +93,14 @@ class HTMLRender(object):
         return self.final_js
 
     #@timing(total=True)
-    def render_msg(self, msg):
+    def render_msg(self, msg: WeChatMsg):
         """ render a message, return the html block"""
         # TODO for chatroom, add nickname on avatar
         sender = u'you ' + msg.talker if not msg.isSend else 'me'
         format_dict = {'sender_label': sender,
                        'time': msg.createTime }
+        if not msg.known_type:
+            self.unknown_type_cnt[msg.type] += 1
         if(not msg.isSend and msg.is_chatroom()):
             format_dict['nickname'] = '>\n       <pre align=\'left\'>'+msg.talker_nickname+'</pre'
         else:
@@ -104,8 +109,11 @@ class HTMLRender(object):
         def fallback():
             template = TEMPLATES[TYPE_MSG]
             content = msg.msg_str()
-            format_dict['content'] = self.smiley.replace_smileycode(content)
-            return template.format(**format_dict)
+            content = self.smiley.replace_smileycode(content)
+            if not msg.known_type:
+                # Show raw (usually xml) content if unknown.
+                content = html.escape(content)
+            return template.format(content=content, **format_dict)
 
         template = TEMPLATES.get(msg.type)
         if msg.type == TYPE_SPEAK:
@@ -128,6 +136,15 @@ class HTMLRender(object):
             # TODO do not show fancybox when no bigimg found
             format_dict['img'] = (img, 'jpeg')
             return template.format(**format_dict)
+        elif msg.type == TYPE_QQMUSIC:
+            jobj = json.loads(msg.msg_str())
+            content = f"{jobj['title']} - {jobj['singer']}"
+
+            # imgPath was original THUMBNAIL_DIRPATH://th_xxxxxxxxx
+            imgpath = msg.imgPath.split('_')[-1]
+            img = self.res.get_img([imgpath])
+            format_dict['img'] = (img, 'jpeg')
+            return template.format(url=jobj['url'], content=content, **format_dict)
         elif msg.type == TYPE_EMOJI or msg.type == TYPE_CUSTOM_EMOJI:
             if 'emoticonmd5' in msg.content:
                 pq = PyQuery(msg.content)
@@ -145,11 +162,11 @@ class HTMLRender(object):
                 import IPython as IP; IP.embed()
             return template.format(**format_dict)
         elif msg.type == TYPE_LINK:
-            content = msg.msg_str()
-            # TODO show a short link with long href, if link too long
-            if content.startswith(u'URL:'):
-                url = content[4:]
-                content = u'URL:<a target="_blank" href="{0}">{0}</a>'.format(url)
+            pq = PyQuery(msg.content_xml_ready)
+            url = pq('url').text()
+            if url:
+                title = pq('title')[0].text
+                content = '<a target="_blank" href="{0}">{1}</a>'.format(url, title)
                 format_dict['content'] = content
                 return template.format(**format_dict)
         elif msg.type == TYPE_VIDEO_FILE:
@@ -224,6 +241,7 @@ class HTMLRender(object):
         slice_by_size = MessageSlicerBySize().slice(msgs)
         ret = [self._render_partial_msgs(s) for s in slice_by_size]
         self.prgs.finish()
+        logger.warning("[HTMLRenderer] Unhandled messages (type->cnt): {}".format(self.unknown_type_cnt))
         return ret
 
 if __name__ == '__main__':

+ 18 - 0
wechat/static/TP_QQMUSIC.html

@@ -0,0 +1,18 @@
+<div class="chatItem {sender_label}">
+  <div class="chatItemContent">
+    <span class="avatar"></span>
+    <div class="cloud cloudText">
+      <div class="cloudPannel" title="{time}" {nickname}>
+        <div class="cloudBody">
+          <div class="cloudContent">
+            <pre style="white-space:pre-wrap">
+QQ Music:<a href="{url}" target="_blank">{content}</a> </pre>
+            <span class="img_wrap">
+              <img class="imageBorder" src="data:image/{img[1]};base64,{img[0]}"/>
+            </span>
+          </div>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>

+ 3 - 1
wechat/static/wx.css

@@ -5141,7 +5141,9 @@ a.btnBlue:active .btnBluePanel {
 }
 .cloudText img {
   vertical-align: text-bottom;
-  margin-bottom: -5px;
+  max-width: 200px;
+  /* *margin-bottom: -5px; */
+  /* qqmusic cover img */
 }
 .cloudImg {
   line-height: 0;