소스 검색

Implement reply quote UI for HTML export (#107)

* render: improve reply quote UI

* render: address PR review comments

* res: avoid masking errors in img decode
CK Zhang 2 달 전
부모
커밋
30eef32ae6
5개의 변경된 파일288개의 추가작업 그리고 41개의 파일을 삭제
  1. 79 2
      wechat/msg.py
  2. 65 0
      wechat/render.py
  3. 63 39
      wechat/res.py
  4. 20 0
      wechat/static/TP_REPLY.html
  5. 61 0
      wechat/static/wx.css

+ 79 - 2
wechat/msg.py

@@ -45,6 +45,10 @@ class WeChatMsg(object):
         self.known_type = self.type in _KNOWN_TYPES
 
     def msg_str(self):
+        if self.type == TYPE_IMG:
+            return "Image"
+        elif self.type == TYPE_SPEAK:
+            return "Voice"
         if self.type == TYPE_LOCATION:
             try:
                 pq = PyQuery(self.content_xml_ready, parser='xml')
@@ -65,7 +69,7 @@ class WeChatMsg(object):
                 title = pq('title').text()
                 if title:  # may not be correct
                     return "FILE:{}".format(title)
-                return "NOT IMPLEMENTED: " + self.content_xml_ready
+                return "Link"
             return "URL:{}".format(url)
         elif self.type == TYPE_NAMECARD:
             pq = PyQuery(self.content_xml_ready, parser='xml')
@@ -89,7 +93,13 @@ class WeChatMsg(object):
             return "LOCATION SHARING"
         elif self.type == TYPE_EMOJI:
             # TODO add emoji name
+            if self.content.lstrip().startswith("<"):
+                return "Emoji"
+            if not self.content:
+                return "Emoji"
             return self.content
+        elif self.type == TYPE_CUSTOM_EMOJI:
+            return "Emoji"
         elif self.type == TYPE_REDENVELOPE:
             data_to_parse = io.BytesIO(self.content.encode('utf-8'))
             try:
@@ -136,6 +146,74 @@ class WeChatMsg(object):
             # TODO replace smiley with text
             return self.content
 
+    def reply_info(self):
+        """Parse TYPE_REPLY payload.
+
+        Returns: {title, ref_name, ref_content, ref_type, ref_svrid}.
+        """
+        if self.type != TYPE_REPLY:
+            return None
+
+        def _one_line(text: str, *, max_len: int) -> str:
+            text = re.sub(r"\s+", " ", (text or "")).strip()
+            if len(text) > max_len:
+                return text[: max_len - 1] + "…"
+            return text
+
+        xml = self.content_xml_ready
+        idx = xml.find("<msg")
+        if idx != -1:
+            xml = xml[idx:]
+
+        try:
+            pq = PyQuery(xml, parser="xml")
+        except Exception:
+            return None
+
+        title = html.unescape(pq("appmsg > title").text() or pq("title").eq(0).text() or "")
+
+        ref_name_raw = pq("refermsg displayname").text() or pq("refermsg fromusr").text() or ""
+        ref_name = _one_line(html.unescape(ref_name_raw), max_len=80)
+
+        ref_content_raw = pq("refermsg content").text() or ""
+
+        ref_svrid_i = None
+        ref_svrid = pq("refermsg svrid").text() or pq("refermsg svrId").text()
+        if ref_svrid:
+            try:
+                ref_svrid_i = int(ref_svrid)
+            except Exception:
+                ref_svrid_i = None
+
+        ref_type_i = None
+        ref_type = pq("refermsg type").text()
+        if ref_type:
+            try:
+                ref_type_i = int(ref_type)
+            except Exception:
+                ref_type_i = None
+
+        ref_content_fallback = html.unescape(ref_content_raw or "")
+        if ref_type_i is None:
+            ref_content = ref_content_fallback
+        else:
+            try:
+                ref_content = WeChatMsg({"type": ref_type_i, "content": ref_content_fallback}).msg_str()
+            except Exception:
+                ref_content = ref_content_fallback
+        ref_content = _one_line(ref_content, max_len=200)
+
+        if not title and not ref_name and not ref_content:
+            return None
+
+        return {
+            "title": title.strip(),
+            "ref_name": ref_name,
+            "ref_content": ref_content,
+            "ref_type": ref_type_i,
+            "ref_svrid": ref_svrid_i,
+        }
+
     @property
     def content_xml_ready(self):
         # remove xml headers to avoid possible errors it may create
@@ -180,4 +258,3 @@ class WeChatMsg(object):
         if not emoji:
             return None
         return emoji.attrs['productid']
-

+ 65 - 0
wechat/render.py

@@ -33,6 +33,7 @@ TEMPLATES_FILES = {TYPE_MSG: "TP_MSG",
                    TYPE_EMOJI: "TP_EMOJI",
                    TYPE_CUSTOM_EMOJI: "TP_EMOJI",
                    TYPE_LINK: "TP_MSG",
+                   TYPE_REPLY: "TP_REPLY",
                    TYPE_VIDEO_FILE: "TP_VIDEO_FILE",
                    TYPE_QQMUSIC: "TP_QQMUSIC",
                   }
@@ -160,6 +161,68 @@ class HTMLRender(object):
             else:
                 template = get_template("TP_QQMUSIC_NOIMG")
             return template.format(url=jobj['url'], content=content, **format_dict)
+        elif msg.type == TYPE_REPLY:
+            info = msg.reply_info()
+            if not info:
+                return fallback()
+
+            def _escape_fmt(s: str) -> str:
+                return s.replace("{", "{{").replace("}", "}}")
+
+            title = info.get("title") or ""
+            reply_to = info.get("ref_name") or "unknown"
+            reply_quote = info.get("ref_content") or ""
+            ref_svrid = info.get("ref_svrid")
+
+            if not title and not reply_quote:
+                return fallback()
+
+            format_dict["content"] = _escape_fmt(self.smiley.replace_smileycode(title))
+            format_dict["reply_to"] = _escape_fmt(reply_to)
+
+            reply_thumb_html = ""
+            ref_msg = getattr(self, "_msg_by_svrid", {}).get(ref_svrid) if ref_svrid is not None else None
+            if ref_msg is not None:
+                try:
+                    if ref_msg.type == TYPE_IMG and ref_msg.imgPath:
+                        imgpath = ref_msg.imgPath.split("_")[-1]
+                        bigimgpath = self.parser.imginfo.get(ref_msg.msgSvrId)
+                        fnames = [k for k in [imgpath, bigimgpath] if k]
+                        b64 = self.res.get_img_thumb(fnames, max_size=64)
+                        if b64:
+                            reply_thumb_html = f'<img class="replyThumb" src="data:image/jpeg;base64,{b64}" />'
+                    elif ref_msg.type in (TYPE_VIDEO_FILE, TYPE_WX_VIDEO) and ref_msg.imgPath:
+                        b64 = self.res.get_video_thumb(ref_msg.imgPath, max_size=64)
+                        if b64:
+                            reply_thumb_html = f'<img class="replyThumb" src="data:image/jpeg;base64,{b64}" />'
+                    elif ref_msg.type in (TYPE_EMOJI, TYPE_CUSTOM_EMOJI):
+                        if "emoticonmd5" in ref_msg.content:
+                            pq = PyQuery(ref_msg.content)
+                            md5 = pq("emoticonmd5").text()
+                        else:
+                            md5 = ref_msg.imgPath
+                        if md5:
+                            emoji_img, fmt = self.res.get_emoji_by_md5(md5)
+                            if emoji_img and fmt:
+                                fmt = fmt.lower()
+                                if fmt == "jpg":
+                                    fmt = "jpeg"
+                                reply_thumb_html = (
+                                    f'<img class="replyThumb replyThumbEmoji" '
+                                    f'src="data:image/{fmt};base64,{emoji_img}" />'
+                                )
+                except Exception:
+                    logger.exception("Failed to render reply thumbnail (%s).", ref_svrid)
+
+            if reply_thumb_html:
+                reply_quote_html = reply_thumb_html
+            else:
+                quote_text = self.smiley.replace_smileycode(reply_quote)
+                reply_quote_html = f'<span class="replyText">{quote_text}</span>'
+            format_dict["reply_quote_html"] = _escape_fmt(reply_quote_html)
+
+            template = template or get_template(TYPE_MSG)
+            return template.format(**format_dict)
         elif msg.type == TYPE_EMOJI or msg.type == TYPE_CUSTOM_EMOJI:
             if 'emoticonmd5' in msg.content:
                 pq = PyQuery(msg.content)
@@ -247,6 +310,8 @@ class HTMLRender(object):
 
     def render_msgs(self, msgs):
         """ render msgs of one chat, return a list of html"""
+        chatid = msgs[0].chat
+        self._msg_by_svrid = {m.msgSvrId: m for m in self.parser.msgs_by_chat.get(chatid, msgs)}
         if msgs[0].is_chatroom():
             talkers = set([m.talker for m in msgs])
         else:

+ 63 - 39
wechat/res.py

@@ -26,6 +26,7 @@ EMOJI_DIRNAME = 'emoji'
 VIDEO_DIRNAME = 'video'
 
 JPEG_QUALITY = 50
+THUMB_JPEG_QUALITY = 35
 
 class Resource(object):
     """ Multimedia resources parser."""
@@ -160,49 +161,72 @@ class Resource(object):
         """
         fnames = [k for k in fnames if k]   # filter out empty string
         big_file, small_file = self._get_img_file(fnames)
-
-        def get_jpg_b64(img_file):
-            if not img_file:
-                return None
-
-            # True jpeg. Simplest case.
-            if img_file.endswith('jpg') and \
-                   img_what(img_file) == 'jpeg':
-                return get_file_b64(img_file)
-
-            if is_wxgf_file(img_file):
-                start = time.time()
-                buf = self.wxgf_decoder.decode_with_cache(img_file, None)
-                if buf is None:
-                    if not self.wxgf_decoder.has_server():
-                        logger.warning("wxgf decoder server is not provided. Cannot decode wxgf images. Please follow instructions to create wxgf decoder server if these images need to be decoded.")
-                    else:
-                        logger.error("Failed to decode wxgf file: {}".format(img_file))
-                    return None
+        big_file = self._img_file_to_jpg_b64(big_file)
+        if big_file:
+            return big_file
+        return self._img_file_to_jpg_b64(small_file)
+
+    def _img_file_to_jpg_b64(self, img_file: str, *, max_size: int | None = None, quality: int = JPEG_QUALITY) -> str | None:
+        if not img_file:
+            return None
+
+        # True jpeg. Simplest case. Avoid re-compressing.
+        if max_size is None and img_file.endswith('jpg') and img_what(img_file) == 'jpeg':
+            return get_file_b64(img_file)
+
+        if is_wxgf_file(img_file):
+            start = time.time()
+            buf = self.wxgf_decoder.decode_with_cache(img_file, None)
+            if buf is None:
+                if not self.wxgf_decoder.has_server():
+                    logger.warning("wxgf decoder server is not provided. Cannot decode wxgf images. Please follow instructions to create wxgf decoder server if these images need to be decoded.")
                 else:
-                    elapsed = time.time() - start
-                    if elapsed > 0.01 and self.wxgf_decoder.has_server():
-                        logger.info(f"Decoded {img_file} in {elapsed:.2f} seconds")
+                    logger.error("Failed to decode wxgf file: {}".format(img_file))
+                return None
             else:
-                with open(img_file, 'rb') as f:
-                    buf = f.read()
-
-            # File is not actually jpeg. Convert.
-            if img_what(file=None, h=buf) != 'jpeg':
-                try:
-                    im = Image.open(io.BytesIO(buf))
-                except:
-                    return None
-                else:
-                    bufio = io.BytesIO()
-                    im.convert('RGB').save(bufio, 'JPEG', quality=JPEG_QUALITY)
-                    buf = bufio.getvalue()
+                elapsed = time.time() - start
+                if elapsed > 0.01 and self.wxgf_decoder.has_server():
+                    logger.info(f"Decoded {img_file} in {elapsed:.2f} seconds")
+        else:
+            with open(img_file, "rb") as f:
+                buf = f.read()
+
+        # If we don't need resize/convert and it's already jpeg, avoid re-compressing.
+        if max_size is None and img_what(file=None, h=buf) == 'jpeg':
             return base64.b64encode(buf).decode('ascii')
 
-        big_file = get_jpg_b64(big_file)
-        if big_file:
-            return big_file
-        return get_jpg_b64(small_file)
+        try:
+            im = Image.open(io.BytesIO(buf))
+        except Exception:
+            return None
+
+        try:
+            im = im.convert("RGB")
+            if max_size:
+                im.thumbnail((max_size, max_size))
+            bufio = io.BytesIO()
+            im.save(bufio, "JPEG", quality=quality)
+            return base64.b64encode(bufio.getvalue()).decode("ascii")
+        except Exception:
+            return None
+
+    def get_img_thumb(self, fnames, *, max_size: int = 64) -> str | None:
+        """Return a small JPEG thumbnail (b64) for an image message."""
+        fnames = [k for k in fnames if k]
+        big_file, small_file = self._get_img_file(fnames)
+        return (
+            self._img_file_to_jpg_b64(small_file, max_size=max_size, quality=THUMB_JPEG_QUALITY)
+            or self._img_file_to_jpg_b64(big_file, max_size=max_size, quality=THUMB_JPEG_QUALITY)
+        )
+
+    def get_video_thumb(self, videoid: str, *, max_size: int = 64) -> str | None:
+        """Return a small JPEG thumbnail (b64) for a video message, if available."""
+        if not videoid:
+            return None
+        video_thumbnail_file = os.path.join(self.video_dir, videoid + ".jpg")
+        if not os.path.exists(video_thumbnail_file):
+            return None
+        return self._img_file_to_jpg_b64(video_thumbnail_file, max_size=max_size, quality=THUMB_JPEG_QUALITY)
 
     def get_emoji_by_md5(self, md5):
         """ Returns: (b64 encoded img string, format) """

+ 20 - 0
wechat/static/TP_REPLY.html

@@ -0,0 +1,20 @@
+<div class="chatItem {sender_label}">
+  <div class="chatItemContent">
+    <span class="avatar"></span>
+    <div class="cloud cloudText">
+       <div class="cloudPannel" title="{time}" {nickname}>
+        <div class="cloudBody">
+          <div class="cloudContent">
+            <pre style="white-space:pre-wrap">{content}</pre>
+          </div>
+        </div>
+      </div>
+      <div class="replyWrap">
+        <div class="replyQuote">
+          <span class="replyName">{reply_to}:</span>
+          {reply_quote_html}
+        </div>
+      </div>
+    </div>
+  </div>
+</div>

+ 61 - 0
wechat/static/wx.css

@@ -5598,3 +5598,64 @@ a.btnBlue:active .btnBluePanel {
   padding: 0px\9;
   margin: 0px\9;
 }
+
+/* Reply message (TYPE_REPLY) */
+.cloudText .replyWrap {
+  width: 0;
+  display: flex;
+}
+.me .cloudText .replyWrap {
+  margin-left: auto;
+  justify-content: flex-end;
+}
+.cloudText .replyQuote {
+  margin: 4px 0 0 0;
+  padding: 6px 8px;
+  border-left: 3px solid rgba(0, 0, 0, 0.25);
+  background: rgba(0, 0, 0, 0.06);
+  border-radius: 4px;
+  display: inline-flex;
+  align-items: flex-start;
+  text-align: left;
+  flex: 0 0 auto;
+  max-width: 300px;
+}
+.me .cloudText .replyQuote {
+  border-left-color: rgba(0, 0, 0, 0.18);
+  background: rgba(255, 255, 255, 0.18);
+}
+.cloudText .replyName {
+  font-size: 12px;
+  font-weight: 600;
+  line-height: 1.3;
+  margin-right: 6px;
+  flex: 0 0 auto;
+  opacity: 0.95;
+}
+.cloudText .replyThumb {
+  width: 32px;
+  height: 32px;
+  margin-right: 6px;
+  border-radius: 3px;
+  object-fit: cover;
+  flex: 0 0 auto;
+  background: rgba(0, 0, 0, 0.05);
+}
+.cloudText .replyThumbEmoji {
+  object-fit: contain;
+  background: transparent;
+}
+.cloudText .replyText {
+  font-size: 12px;
+  line-height: 1.3;
+  opacity: 0.85;
+  text-align: left;
+  overflow: hidden;
+  min-width: 0;
+  flex: 1 1 auto;
+  display: -webkit-box;
+  -webkit-box-orient: vertical;
+  -webkit-line-clamp: 2;
+  text-overflow: ellipsis;
+  word-break: break-word;
+}