render.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. #!/usr/bin/env python3
  2. # -*- coding: UTF-8 -*-
  3. import os
  4. from collections import Counter
  5. from functools import lru_cache
  6. import glob
  7. from pyquery import PyQuery
  8. import logging
  9. logger = logging.getLogger(__name__)
  10. LIB_PATH = os.path.dirname(os.path.abspath(__file__))
  11. STATIC_PATH = os.path.join(LIB_PATH, 'static')
  12. HTML_FILE = os.path.join(STATIC_PATH, 'TP_INDEX.html')
  13. TIME_HTML_FILE = os.path.join(STATIC_PATH, 'TP_TIME.html')
  14. FRIEND_AVATAR_CSS_FILE = os.path.join(STATIC_PATH, 'avatar.css.tpl')
  15. try:
  16. from csscompressor import compress as css_compress
  17. except ImportError:
  18. css_compress = lambda x: x
  19. from .msg import *
  20. from .common.textutil import get_file_b64
  21. from .common.progress import ProgressReporter
  22. from .common.timer import timing
  23. from .smiley import SmileyProvider
  24. from .msgslice import MessageSlicerByTime, MessageSlicerBySize
  25. TEMPLATES_FILES = {TYPE_MSG: "TP_MSG",
  26. TYPE_IMG: "TP_IMG",
  27. TYPE_SPEAK: "TP_SPEAK",
  28. TYPE_EMOJI: "TP_EMOJI",
  29. TYPE_CUSTOM_EMOJI: "TP_EMOJI",
  30. TYPE_LINK: "TP_MSG",
  31. TYPE_REPLY: "TP_REPLY",
  32. TYPE_VIDEO_FILE: "TP_VIDEO_FILE",
  33. TYPE_QQMUSIC: "TP_QQMUSIC",
  34. }
  35. @lru_cache()
  36. def get_template(name: str | int) -> str | None:
  37. """Return the html template given a file name or msg type."""
  38. if isinstance(name, int):
  39. name = TEMPLATES_FILES.get(name, None)
  40. if name is None:
  41. return None
  42. html_path = os.path.join(STATIC_PATH, f"{name}.html")
  43. with open(html_path) as f:
  44. return f.read()
  45. class HTMLRender(object):
  46. def __init__(self, parser, res=None):
  47. with open(HTML_FILE) as f:
  48. self.html = f.read()
  49. with open(TIME_HTML_FILE) as f:
  50. self.time_html = f.read()
  51. self.parser = parser
  52. self.res = res
  53. assert self.res is not None, \
  54. "Resource Directory not given. Cannot render HTML."
  55. self.smiley = SmileyProvider()
  56. css_files = glob.glob(os.path.join(LIB_PATH, 'static/*.css'))
  57. self.css_string = [] # css to add
  58. for css in css_files:
  59. logger.info("Loading {}".format(os.path.basename(css)))
  60. with open(css) as f:
  61. self.css_string.append(f.read())
  62. js_files = glob.glob(os.path.join(LIB_PATH, 'static/*.js'))
  63. # to load jquery before other js
  64. js_files = sorted(js_files, key=lambda f: 'jquery-latest' in f, reverse=True)
  65. self.js_string = []
  66. for js in js_files:
  67. logger.info("Loading {}".format(os.path.basename(js)))
  68. with open(js) as f:
  69. self.js_string.append(f.read())
  70. self.unknown_type_cnt = Counter()
  71. @property
  72. def all_css(self):
  73. # call after processing all messages,
  74. # because smiley css need to be included only when necessary
  75. def process(css):
  76. css = css_compress(css)
  77. return u'<style type="text/css">{}</style>'.format(css)
  78. if hasattr(self, 'final_css'):
  79. return self.final_css + process(self.smiley.gen_used_smiley_css())
  80. self.final_css = u"\n".join(map(process, self.css_string))
  81. return self.final_css + process(self.smiley.gen_used_smiley_css())
  82. @property
  83. def all_js(self):
  84. if hasattr(self, 'final_js'):
  85. return self.final_js
  86. def process(js):
  87. # TODO: add js compress
  88. return u'<script type="text/javascript">{}</script>'.format(js)
  89. self.final_js = u"\n".join(map(process, self.js_string))
  90. return self.final_js
  91. #@timing(total=True)
  92. def render_msg(self, msg: WeChatMsg):
  93. """ render a message, return the html block"""
  94. # TODO for chatroom, add nickname on avatar
  95. sender = u'you ' + msg.talker if not msg.isSend else 'me'
  96. format_dict = {'sender_label': sender,
  97. 'time': msg.createTime }
  98. if not msg.known_type:
  99. self.unknown_type_cnt[msg.type] += 1
  100. if(not msg.isSend and msg.is_chatroom()):
  101. format_dict['nickname'] = '>\n <pre align=\'left\'>'+msg.talker_nickname+'</pre'
  102. else:
  103. format_dict['nickname'] = ' '
  104. def fallback():
  105. template = get_template(TYPE_MSG)
  106. content = msg.msg_str()
  107. content = self.smiley.replace_smileycode(content)
  108. if not msg.known_type:
  109. # Show raw (usually xml) content if unknown.
  110. content = html.escape(content)
  111. return template.format(content=content, **format_dict)
  112. template = get_template(msg.type)
  113. if msg.type == TYPE_SPEAK:
  114. audio_str, duration = self.res.get_voice_mp3(msg.imgPath)
  115. format_dict['voice_duration'] = duration
  116. format_dict['voice_str'] = audio_str
  117. return template.format(**format_dict)
  118. elif msg.type == TYPE_IMG:
  119. # imgPath was original THUMBNAIL_DIRPATH://th_xxxxxxxxx
  120. imgpath = msg.imgPath.split('_')[-1]
  121. if not imgpath:
  122. logger.warn('No imgpath in an image message. Perhaps a bug in wechat.')
  123. return fallback()
  124. bigimgpath = self.parser.imginfo.get(msg.msgSvrId)
  125. fnames = [k for k in [imgpath, bigimgpath] if k]
  126. img = self.res.get_img(fnames)
  127. if not img:
  128. logger.warn("No image thumbnail found for {}".format(imgpath))
  129. return fallback()
  130. # TODO do not show fancybox when no bigimg found
  131. format_dict['img'] = (img, 'jpeg')
  132. return template.format(**format_dict)
  133. elif msg.type == TYPE_QQMUSIC:
  134. jobj = json.loads(msg.msg_str())
  135. content = f"{jobj['title']} - {jobj['singer']}"
  136. if msg.imgPath is not None:
  137. # imgPath was original THUMBNAIL_DIRPATH://th_xxxxxxxxx
  138. imgpath = msg.imgPath.split('_')[-1]
  139. img = self.res.get_img([imgpath])
  140. format_dict['img'] = (img, 'jpeg')
  141. else:
  142. template = get_template("TP_QQMUSIC_NOIMG")
  143. return template.format(url=jobj['url'], content=content, **format_dict)
  144. elif msg.type == TYPE_REPLY:
  145. info = msg.reply_info()
  146. if not info:
  147. return fallback()
  148. def _escape_fmt(s: str) -> str:
  149. return s.replace("{", "{{").replace("}", "}}")
  150. title = info.get("title") or ""
  151. reply_to = info.get("ref_name") or "unknown"
  152. reply_quote = info.get("ref_content") or ""
  153. ref_svrid = info.get("ref_svrid")
  154. if not title and not reply_quote:
  155. return fallback()
  156. format_dict["content"] = _escape_fmt(self.smiley.replace_smileycode(title))
  157. format_dict["reply_to"] = _escape_fmt(reply_to)
  158. reply_thumb_html = ""
  159. ref_msg = getattr(self, "_msg_by_svrid", {}).get(ref_svrid) if ref_svrid is not None else None
  160. if ref_msg is not None:
  161. try:
  162. if ref_msg.type == TYPE_IMG and ref_msg.imgPath:
  163. imgpath = ref_msg.imgPath.split("_")[-1]
  164. bigimgpath = self.parser.imginfo.get(ref_msg.msgSvrId)
  165. fnames = [k for k in [imgpath, bigimgpath] if k]
  166. b64 = self.res.get_img_thumb(fnames, max_size=64)
  167. if b64:
  168. reply_thumb_html = f'<img class="replyThumb" src="data:image/jpeg;base64,{b64}" />'
  169. elif ref_msg.type in (TYPE_VIDEO_FILE, TYPE_WX_VIDEO) and ref_msg.imgPath:
  170. b64 = self.res.get_video_thumb(ref_msg.imgPath, max_size=64)
  171. if b64:
  172. reply_thumb_html = f'<img class="replyThumb" src="data:image/jpeg;base64,{b64}" />'
  173. elif ref_msg.type in (TYPE_EMOJI, TYPE_CUSTOM_EMOJI):
  174. if "emoticonmd5" in ref_msg.content:
  175. pq = PyQuery(ref_msg.content)
  176. md5 = pq("emoticonmd5").text()
  177. else:
  178. md5 = ref_msg.imgPath
  179. if md5:
  180. emoji_img, fmt = self.res.get_emoji_by_md5(md5)
  181. if emoji_img and fmt:
  182. fmt = fmt.lower()
  183. if fmt == "jpg":
  184. fmt = "jpeg"
  185. reply_thumb_html = (
  186. f'<img class="replyThumb replyThumbEmoji" '
  187. f'src="data:image/{fmt};base64,{emoji_img}" />'
  188. )
  189. except Exception:
  190. logger.exception("Failed to render reply thumbnail (%s).", ref_svrid)
  191. if reply_thumb_html:
  192. reply_quote_html = reply_thumb_html
  193. else:
  194. quote_text = self.smiley.replace_smileycode(reply_quote)
  195. reply_quote_html = f'<span class="replyText">{quote_text}</span>'
  196. format_dict["reply_quote_html"] = _escape_fmt(reply_quote_html)
  197. template = template or get_template(TYPE_MSG)
  198. return template.format(**format_dict)
  199. elif msg.type == TYPE_EMOJI or msg.type == TYPE_CUSTOM_EMOJI:
  200. if 'emoticonmd5' in msg.content:
  201. pq = PyQuery(msg.content)
  202. md5 = pq('emoticonmd5').text()
  203. else:
  204. md5 = msg.imgPath
  205. # TODO md5 could exist in both.
  206. # first is emoji md5, second is image2/ md5
  207. # can use fallback here.
  208. if md5:
  209. emoji_img, format = self.res.get_emoji_by_md5(md5)
  210. format_dict['emoji_format'] = format
  211. format_dict['emoji_img'] = emoji_img
  212. else:
  213. import IPython as IP; IP.embed()
  214. return template.format(**format_dict)
  215. elif msg.type == TYPE_LINK:
  216. pq = PyQuery(msg.content_xml_ready)
  217. url = pq('url').text()
  218. if url:
  219. try:
  220. title = pq('title')[0].text
  221. except Exception as e:
  222. logger.warning('No title found in LINK message: ' + str(e))
  223. title = url
  224. content = '<a target="_blank" href="{0}">{1}</a>'.format(url, title)
  225. format_dict['content'] = content
  226. return template.format(**format_dict)
  227. elif msg.type == TYPE_VIDEO_FILE:
  228. video = self.res.get_video(msg.imgPath)
  229. if video is None:
  230. logger.warning(f"Cannot find video {msg.imgPath} ({msg.createTime})")
  231. # fallback
  232. format_dict['content'] = f"VIDEO FILE {msg.imgPath}"
  233. return get_template(TYPE_MSG).format(**format_dict)
  234. elif video.endswith(".mp4"):
  235. video_str = get_file_b64(video)
  236. format_dict["video_str"] = video_str
  237. return template.format(**format_dict)
  238. elif video.endswith(".jpg"):
  239. # only has thumbnail
  240. image_str = get_file_b64(video)
  241. format_dict["img"] = (image_str, 'jpeg')
  242. return get_template(TYPE_IMG).format(**format_dict)
  243. elif msg.type == TYPE_WX_VIDEO:
  244. # TODO: fetch video from resource
  245. return fallback()
  246. return fallback()
  247. def _render_partial_msgs(self, msgs):
  248. """ return single html"""
  249. self.smiley.reset()
  250. slicer = MessageSlicerByTime()
  251. slices = slicer.slice(msgs)
  252. blocks = []
  253. for idx, slice in enumerate(slices):
  254. nowtime = slice[0].createTime
  255. if idx == 0 or \
  256. slices[idx - 1][0].createTime.date() != nowtime.date():
  257. timestr = nowtime.strftime("%m/%d %H:%M:%S")
  258. else:
  259. timestr = nowtime.strftime("%H:%M:%S")
  260. blocks.append(self.time_html.format(time=timestr))
  261. blocks.extend([self.render_msg(m) for m in slice])
  262. self.prgs.trigger(len(slice))
  263. # string operation is extremely slow
  264. return self.html.format(extra_css=self.all_css,
  265. extra_js=self.all_js,
  266. chat=msgs[0].chat_nickname,
  267. messages=u''.join(blocks)
  268. )
  269. def prepare_avatar_css(self, talkers):
  270. with open(FRIEND_AVATAR_CSS_FILE) as f:
  271. avatar_tpl = f.read()
  272. my_avatar = self.res.get_avatar(self.parser.username)
  273. css = avatar_tpl.format(name='me', avatar=my_avatar)
  274. for talker in talkers:
  275. avatar = self.res.get_avatar(talker)
  276. css += avatar_tpl.format(name=talker, avatar=avatar)
  277. self.css_string.append(css)
  278. def render_msgs(self, msgs):
  279. """ render msgs of one chat, return a list of html"""
  280. chatid = msgs[0].chat
  281. self._msg_by_svrid = {m.msgSvrId: m for m in self.parser.msgs_by_chat.get(chatid, msgs)}
  282. if msgs[0].is_chatroom():
  283. talkers = set([m.talker for m in msgs])
  284. else:
  285. talkers = set([msgs[0].talker])
  286. self.prepare_avatar_css(talkers)
  287. self.res.cache_voice_mp3(msgs)
  288. chat = msgs[0].chat_nickname
  289. logger.info(u"Rendering {} messages of {}".format(
  290. len(msgs), chat))
  291. self.prgs = ProgressReporter("Render", total=len(msgs))
  292. slice_by_size = MessageSlicerBySize().slice(msgs)
  293. ret = [self._render_partial_msgs(s) for s in slice_by_size]
  294. self.prgs.finish()
  295. logger.warning("[HTMLRenderer] Unhandled messages (type->cnt): {}".format(self.unknown_type_cnt))
  296. return ret
  297. if __name__ == '__main__':
  298. r = HTMLRender()
  299. with open('/tmp/a.html', 'w') as f:
  300. print >> f, r.html.format(style=r.css, talker='talker',
  301. messages='haha')