11 年之前 · 5cc0cefb2b
--- a/libchat/libchat.py
+++ b/libchat/libchat.py
@@ -1,10 +1,12 @@
 
				 #!/usr/bin/env python2
			
 
				 # -*- coding: UTF-8 -*-
			
 
				 # File: libchat.py
			
 
				-# Date: Wed Mar 25 22:46:51 2015 +0800
			
 
				+# Date: Sat Mar 28 00:08:29 2015 +0800
			
 
				 # Author: Yuxin Wu <[email protected]>
			
 
				 import sqlite3
			
 
				 import os
			
 
				+from datetime import datetime
			
 
				+import time
			
 
				 from collections import namedtuple
			
 
				 
			
 
				 SOURCE_ID = {'wechat': 0}
			
@@ -13,7 +15,7 @@ ChatMsgBase = namedtuple('ChatMsgBase',
 
				           ['source', 'time', 'sender', 'chatroom',
			
 
				            'text', 'image', 'sound', 'extra_data'])
			
 
				 class ChatMsg(ChatMsgBase):
			
 
				-    def __repr__(self):
			
 
				+    def __repr__(self): # repr must return str?
			
 
				         return "Msg@{}/{}-{}/{}/{}/{}/{}".format(
			
 
				             self.time, self.sender, self.chatroom,
			
 
				             self.text.encode('utf-8'), 'IMG' if self.image else '',
			
@@ -24,14 +26,19 @@ class SqliteLibChat(object):
 
				 
			
 
				     def __init__(self, db_file):
			
 
				         self.db_file = db_file
			
 
				+        exist = os.path.isfile(db_file)
			
 
				         self.conn = sqlite3.connect(db_file)
			
 
				+        self.conn.text_factory = str    # to allow use of raw-byte string
			
 
				         self.c = self.conn.cursor()
			
 
				 
			
 
				+        if not exist:
			
 
				+            self.create()
			
 
				+
			
 
				     def create(self):
			
 
				         self.c.execute("""
			
 
				           CREATE TABLE message (
			
 
				           source SMALLINT,
			
 
				-          time INTEGER,
			
 
				+          time TEXT,
			
 
				           sender TEXT,
			
 
				           chatroom TEXT,
			
 
				           text TEXT,
			
@@ -52,15 +59,24 @@ class SqliteLibChat(object):
 
				         self.c = self.conn.cursor()
			
 
				         for m in msgs:
			
 
				             self._add_msg(SqliteLibChat.prefilter(m))
			
 
				-            self.conn.commit()
			
 
				+        self.conn.commit()
			
 
				 
			
 
				     @staticmethod
			
 
				     def prefilter(msg):
			
 
				         source = msg.source
			
 
				         if isinstance(source, basestring):
			
 
				             source = SOURCE_ID[source]
			
 
				-        return ChatMsg(source, *msg[1:])
			
 
				+        tm = int(time.mktime(msg[1].timetuple()))
			
 
				+        return ChatMsg(source, tm, *msg[2:])
			
 
				 
			
 
				+    @staticmethod
			
 
				+    def postfilter(msg):
			
 
				+        # source
			
 
				+        text = msg[4].decode('utf-8')
			
 
				+        time = datetime.fromtimestamp(int(msg[1]))
			
 
				+        return ChatMsg(msg[0], time, msg[2], msg[3],
			
 
				+                       text=text, image=msg[5],
			
 
				+                       sound=msg[6], extra_data=msg[7])
			
 
				 
			
 
				     def iterate_all_msg(self, predicate=None):
			
 
				         """ predicate: a dict used as SELECT filter
			
@@ -73,18 +89,17 @@ class SqliteLibChat(object):
 
				                 ' AND '.join(["{} = {}".format(k, v)
			
 
				                               for k, v in predicate.iteritems()])))
			
 
				         for row in self.c.fetchall():
			
 
				-            yield ChatMsg(
			
 
				-                *row[:5],
			
 
				-                image=str(row[5]),
			
 
				-                sound=str(row[6]),
			
 
				-                extra_data=str(row[7])
			
 
				-            )   # use str to get raw bytes
			
 
				+            yield ChatMsg(*SqliteLibChat.postfilter(row))
			
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				-    msg = ChatMsg(-1, 1000, 'me', 'room', 'hello', '\x01\x02\x03', '', '')
			
 
				-    db = SqliteLibChat('./message.db')
			
 
				+    db = SqliteLibChat(os.path.join(
			
 
				+        os.path.dirname(__file__), './message.db'))
			
 
				+
			
 
				+    #msg = ChatMsg(-1, 1000, 'me', 'room', 'hello', '\x01\x02\x03', '', '')
			
 
				     #db.add_msgs([msg])
			
 
				-    for k in db.get_all_msg():
			
 
				+
			
 
				+    for k in db.iterate_all_msg():
			
 
				+        from IPython import embed; embed()
			
 
				         print k
			
 
				 
			
--- a/wechat/libchathelper.py
+++ b/wechat/libchathelper.py
@@ -1,7 +1,7 @@
 
				 #!/usr/bin/env python2
			
 
				 # -*- coding: UTF-8 -*-
			
 
				 # File: libchathelper.py
			
 
				-# Date: Wed Mar 25 23:01:59 2015 +0800
			
 
				+# Date: Fri Mar 27 22:25:14 2015 +0800
			
 
				 # Author: Yuxin Wu <[email protected]>
			
 
				 
			
 
				 import base64
			
@@ -12,10 +12,13 @@ logger = logging.getLogger(__name__)
 
				 
			
 
				 from libchat.libchat import SqliteLibChat, ChatMsg
			
 
				 from .msg import *
			
 
				+from .utils import timing, ProgressReporter
			
 
				 
			
 
				 class LibChatHelper(object):
			
 
				     """ Build LibChat messages from WeChat Msg"""
			
 
				 
			
 
				+    """ Types of message whose contents are fully parsed.
			
 
				+    No need to save extra data for them. """
			
 
				     FullyParsed = [TYPE_MSG, TYPE_SPEAK, TYPE_EMOJI,
			
 
				                     TYPE_CUSTOM_EMOJI, TYPE_IMG]
			
 
				 
			
@@ -63,7 +66,7 @@ class LibChatHelper(object):
 
				     def _get_sound(self, msg):
			
 
				         if msg.type == TYPE_SPEAK:
			
 
				             audio_str, duration = self.res.get_voice_mp3(msg.imgPath)
			
 
				-            return '{}:{}'.format(duration, audio_str)
			
 
				+            return '{}:{}'.format(duration, base64.b64decode(audio_str))
			
 
				         return ''
			
 
				 
			
 
				     def _get_extra(self, msg):
			
@@ -84,9 +87,14 @@ class LibChatHelper(object):
 
				 # TODO do we need to save format?
			
 
				         sound = self._get_sound(msg)
			
 
				         extra = self._get_extra(msg)
			
 
				+
			
 
				+        self.prgs.trigger()
			
 
				         return ChatMsg(
			
 
				             'wechat', msg.createTime, sender, chatroom,
			
 
				             text, img, sound, extra)
			
 
				 
			
 
				     def convert_msgs(self, msgs):
			
 
				-        return [self._convert_msg(m) for m in msgs]
			
 
				+        self.prgs = ProgressReporter("Convert", total=len(msgs))
			
 
				+        ret = [self._convert_msg(m) for m in msgs]
			
 
				+        self.prgs.finish()
			
 
				+        return ret
			
--- a/wechat/render.py
+++ b/wechat/render.py
@@ -1,7 +1,7 @@
 
				 #!/usr/bin/env python2
			
 
				 # -*- coding: UTF-8 -*-
			
 
				 # File: render.py
			
 
				-# Date: Wed Mar 25 22:24:53 2015 +0800
			
 
				+# Date: Fri Mar 27 23:41:46 2015 +0800
			
 
				 # Author: Yuxin Wu <[email protected]>
			
 
				 
			
 
				 import os
			
@@ -197,8 +197,7 @@ class HTMLRender(object):
 
				             talkers = set([talker_id])
			
 
				         self.prepare_avatar_css(talkers)
			
 
				 
			
 
				-        voice_paths = [msg.imgPath for msg in msgs if msg.type == TYPE_SPEAK]
			
 
				-        self.res.cache_voice_mp3(voice_paths)
			
 
				+        self.res.cache_voice_mp3(msgs)
			
 
				 
			
 
				         logger.info(u"Rendering {} messages of {}({})".format(
			
 
				             len(msgs), self.parser.contacts[talker_id], talker_id))
			
--- a/wechat/res.py
+++ b/wechat/res.py
@@ -1,7 +1,7 @@
 
				 #!/usr/bin/env python2
			
 
				 # -*- coding: UTF-8 -*-
			
 
				 # File: res.py
			
 
				-# Date: Wed Mar 25 22:39:59 2015 +0800
			
 
				+# Date: Fri Mar 27 23:42:16 2015 +0800
			
 
				 # Author: Yuxin Wu <[email protected]>
			
 
				 
			
 
				 import glob
			
@@ -20,6 +20,7 @@ import pysox
 
				 
			
 
				 from .avatar import AvatarReader
			
 
				 from .utils import timing, md5, get_file_b64
			
 
				+from .msg import TYPE_SPEAK
			
 
				 
			
 
				 LIB_PATH = os.path.dirname(os.path.abspath(__file__))
			
 
				 INTERNAL_EMOJI_DIR = os.path.join(LIB_PATH, 'static', 'internal_emoji')
			
@@ -81,8 +82,10 @@ class Resource(object):
 
				                 self.get_voice_filename(imgpath))
			
 
				         return self.voice_cache[idx].get()
			
 
				 
			
 
				-    def cache_voice_mp3(self, voice_paths):
			
 
				-        """ for speed. voice_paths: a collection of imgpath """
			
 
				+    def cache_voice_mp3(self, msgs):
			
 
				+        """ for speed.
			
 
				+        msgs: a collection of WeChatMsg, to cache for later fetch"""
			
 
				+        voice_paths = [msg.imgPath for msg in msgs if msg.type == TYPE_SPEAK]
			
 
				         self.voice_cache_idx = {k: idx for idx, k in enumerate(voice_paths)}
			
 
				         pool = Pool(3)
			
 
				         self.voice_cache = [pool.apply_async(do_get_voice_mp3,