浏览代码

msg get through sqlite

ppwwyyxx 11 年之前
父节点
当前提交
5cc0cefb2b
共有 4 个文件被更改,包括 48 次插入23 次删除
  1. 29 14
      libchat/libchat.py
  2. 11 3
      wechat/libchathelper.py
  3. 2 3
      wechat/render.py
  4. 6 3
      wechat/res.py

+ 29 - 14
libchat/libchat.py

@@ -1,10 +1,12 @@
 #!/usr/bin/env python2
 # -*- coding: UTF-8 -*-
 # File: libchat.py
-# Date: Wed Mar 25 22:46:51 2015 +0800
+# Date: Sat Mar 28 00:08:29 2015 +0800
 # Author: Yuxin Wu <[email protected]>
 import sqlite3
 import os
+from datetime import datetime
+import time
 from collections import namedtuple
 
 SOURCE_ID = {'wechat': 0}
@@ -13,7 +15,7 @@ ChatMsgBase = namedtuple('ChatMsgBase',
           ['source', 'time', 'sender', 'chatroom',
            'text', 'image', 'sound', 'extra_data'])
 class ChatMsg(ChatMsgBase):
-    def __repr__(self):
+    def __repr__(self): # repr must return str?
         return "Msg@{}/{}-{}/{}/{}/{}/{}".format(
             self.time, self.sender, self.chatroom,
             self.text.encode('utf-8'), 'IMG' if self.image else '',
@@ -24,14 +26,19 @@ class SqliteLibChat(object):
 
     def __init__(self, db_file):
         self.db_file = db_file
+        exist = os.path.isfile(db_file)
         self.conn = sqlite3.connect(db_file)
+        self.conn.text_factory = str    # to allow use of raw-byte string
         self.c = self.conn.cursor()
 
+        if not exist:
+            self.create()
+
     def create(self):
         self.c.execute("""
           CREATE TABLE message (
           source SMALLINT,
-          time INTEGER,
+          time TEXT,
           sender TEXT,
           chatroom TEXT,
           text TEXT,
@@ -52,15 +59,24 @@ class SqliteLibChat(object):
         self.c = self.conn.cursor()
         for m in msgs:
             self._add_msg(SqliteLibChat.prefilter(m))
-            self.conn.commit()
+        self.conn.commit()
 
     @staticmethod
     def prefilter(msg):
         source = msg.source
         if isinstance(source, basestring):
             source = SOURCE_ID[source]
-        return ChatMsg(source, *msg[1:])
+        tm = int(time.mktime(msg[1].timetuple()))
+        return ChatMsg(source, tm, *msg[2:])
 
+    @staticmethod
+    def postfilter(msg):
+        # source
+        text = msg[4].decode('utf-8')
+        time = datetime.fromtimestamp(int(msg[1]))
+        return ChatMsg(msg[0], time, msg[2], msg[3],
+                       text=text, image=msg[5],
+                       sound=msg[6], extra_data=msg[7])
 
     def iterate_all_msg(self, predicate=None):
         """ predicate: a dict used as SELECT filter
@@ -73,18 +89,17 @@ class SqliteLibChat(object):
                 ' AND '.join(["{} = {}".format(k, v)
                               for k, v in predicate.iteritems()])))
         for row in self.c.fetchall():
-            yield ChatMsg(
-                *row[:5],
-                image=str(row[5]),
-                sound=str(row[6]),
-                extra_data=str(row[7])
-            )   # use str to get raw bytes
+            yield ChatMsg(*SqliteLibChat.postfilter(row))
 
 
 if __name__ == '__main__':
-    msg = ChatMsg(-1, 1000, 'me', 'room', 'hello', '\x01\x02\x03', '', '')
-    db = SqliteLibChat('./message.db')
+    db = SqliteLibChat(os.path.join(
+        os.path.dirname(__file__), './message.db'))
+
+    #msg = ChatMsg(-1, 1000, 'me', 'room', 'hello', '\x01\x02\x03', '', '')
     #db.add_msgs([msg])
-    for k in db.get_all_msg():
+
+    for k in db.iterate_all_msg():
+        from IPython import embed; embed()
         print k
 

+ 11 - 3
wechat/libchathelper.py

@@ -1,7 +1,7 @@
 #!/usr/bin/env python2
 # -*- coding: UTF-8 -*-
 # File: libchathelper.py
-# Date: Wed Mar 25 23:01:59 2015 +0800
+# Date: Fri Mar 27 22:25:14 2015 +0800
 # Author: Yuxin Wu <[email protected]>
 
 import base64
@@ -12,10 +12,13 @@ logger = logging.getLogger(__name__)
 
 from libchat.libchat import SqliteLibChat, ChatMsg
 from .msg import *
+from .utils import timing, ProgressReporter
 
 class LibChatHelper(object):
     """ Build LibChat messages from WeChat Msg"""
 
+    """ Types of message whose contents are fully parsed.
+    No need to save extra data for them. """
     FullyParsed = [TYPE_MSG, TYPE_SPEAK, TYPE_EMOJI,
                     TYPE_CUSTOM_EMOJI, TYPE_IMG]
 
@@ -63,7 +66,7 @@ class LibChatHelper(object):
     def _get_sound(self, msg):
         if msg.type == TYPE_SPEAK:
             audio_str, duration = self.res.get_voice_mp3(msg.imgPath)
-            return '{}:{}'.format(duration, audio_str)
+            return '{}:{}'.format(duration, base64.b64decode(audio_str))
         return ''
 
     def _get_extra(self, msg):
@@ -84,9 +87,14 @@ class LibChatHelper(object):
 # TODO do we need to save format?
         sound = self._get_sound(msg)
         extra = self._get_extra(msg)
+
+        self.prgs.trigger()
         return ChatMsg(
             'wechat', msg.createTime, sender, chatroom,
             text, img, sound, extra)
 
     def convert_msgs(self, msgs):
-        return [self._convert_msg(m) for m in msgs]
+        self.prgs = ProgressReporter("Convert", total=len(msgs))
+        ret = [self._convert_msg(m) for m in msgs]
+        self.prgs.finish()
+        return ret

+ 2 - 3
wechat/render.py

@@ -1,7 +1,7 @@
 #!/usr/bin/env python2
 # -*- coding: UTF-8 -*-
 # File: render.py
-# Date: Wed Mar 25 22:24:53 2015 +0800
+# Date: Fri Mar 27 23:41:46 2015 +0800
 # Author: Yuxin Wu <[email protected]>
 
 import os
@@ -197,8 +197,7 @@ class HTMLRender(object):
             talkers = set([talker_id])
         self.prepare_avatar_css(talkers)
 
-        voice_paths = [msg.imgPath for msg in msgs if msg.type == TYPE_SPEAK]
-        self.res.cache_voice_mp3(voice_paths)
+        self.res.cache_voice_mp3(msgs)
 
         logger.info(u"Rendering {} messages of {}({})".format(
             len(msgs), self.parser.contacts[talker_id], talker_id))

+ 6 - 3
wechat/res.py

@@ -1,7 +1,7 @@
 #!/usr/bin/env python2
 # -*- coding: UTF-8 -*-
 # File: res.py
-# Date: Wed Mar 25 22:39:59 2015 +0800
+# Date: Fri Mar 27 23:42:16 2015 +0800
 # Author: Yuxin Wu <[email protected]>
 
 import glob
@@ -20,6 +20,7 @@ import pysox
 
 from .avatar import AvatarReader
 from .utils import timing, md5, get_file_b64
+from .msg import TYPE_SPEAK
 
 LIB_PATH = os.path.dirname(os.path.abspath(__file__))
 INTERNAL_EMOJI_DIR = os.path.join(LIB_PATH, 'static', 'internal_emoji')
@@ -81,8 +82,10 @@ class Resource(object):
                 self.get_voice_filename(imgpath))
         return self.voice_cache[idx].get()
 
-    def cache_voice_mp3(self, voice_paths):
-        """ for speed. voice_paths: a collection of imgpath """
+    def cache_voice_mp3(self, msgs):
+        """ for speed.
+        msgs: a collection of WeChatMsg, to cache for later fetch"""
+        voice_paths = [msg.imgPath for msg in msgs if msg.type == TYPE_SPEAK]
         self.voice_cache_idx = {k: idx for idx, k in enumerate(voice_paths)}
         pool = Pool(3)
         self.voice_cache = [pool.apply_async(do_get_voice_mp3,