Jelajahi Sumber

use userid instead of nickname as identifiers everywhere. (#39)

Yuxin Wu 9 tahun lalu
induk
melakukan
f8ede162c0
8 mengubah file dengan 54 tambahan dan 27 penghapusan
  1. 8 6
      dump-html.py
  2. 10 2
      dump-msg.py
  3. 1 1
      list-chats.py
  4. 2 1
      wechat/avatar.py
  5. 1 1
      wechat/msg.py
  6. 27 6
      wechat/parser.py
  7. 5 7
      wechat/render.py
  8. 0 3
      wechat/res.py

+ 8 - 6
dump-html.py

@@ -29,14 +29,16 @@ if __name__ == '__main__':
     output_file = args.output
 
     parser = WeChatDBParser(args.db)
-    res = Resource(parser, args.res, args.avt)
 
-    if name and name in parser.msgs_by_chat:
-        msgs = parser.msgs_by_chat[name]
-    else:
-        sys.stderr.write(u"Valid Contacts: {}\n".format(u'\n'.join(parser.msgs_by_chat.keys())))
-        sys.stderr.write(u"Couldn't find that contact {}.".format(name));
+    try:
+        chatid = parser.get_id_by_nickname(name)
+    except KeyError:
+        sys.stderr.write(u"Valid Contacts: {}\n".format(
+            u'\n'.join(parser.all_chat_nicknames)))
+        sys.stderr.write(u"Couldn't find the chat {}.".format(name));
         sys.exit(1)
+    res = Resource(parser, args.res, args.avt)
+    msgs = parser.msgs_by_chat[chatid]
     print "Number of Messages: ", len(msgs)
     assert len(msgs) > 0
 

+ 10 - 2
dump-msg.py

@@ -22,9 +22,17 @@ if not os.path.isdir(output_dir):
 
 parser = WeChatDBParser(db_file)
 
-for name, msgs in parser.msgs_by_chat.iteritems():
+for chatid, msgs in parser.msgs_by_chat.iteritems():
+    name = parser.contacts[chatid]
+    if len(name) == 0:
+        print u"Chat {} doesn't have a valid display name".format(chatid)
+        name = str(id(chatid))
     print u"Writing msgs for {}".format(name)
     safe_name = safe_filename(name)
-    with open(os.path.join(output_dir, safe_name + '.txt'), 'w') as f:
+    outf = os.path.join(output_dir, safe_name + '.txt')
+    if os.path.isfile(outf):
+        print(u"File {} exists! Skip contact {}".format(outf, name))
+        continue
+    with open(outf, 'w') as f:
         for m in msgs:
             print >> f, m

+ 1 - 1
list-chats.py

@@ -14,4 +14,4 @@ db_file = sys.argv[1]
 parser = WeChatDBParser(db_file)
 chats = parser.msgs_by_chat.keys()
 for k in chats:
-    print k.encode('utf-8'),  '\t', parser.contacts_rev[k].encode('utf-8')
+    print parser.contacts[k],  '\t', k

+ 2 - 1
wechat/avatar.py

@@ -26,7 +26,8 @@ class AvatarReader(object):
 
     def get_avatar(self, username):
         """ username: `username` field in db.rcontact"""
-        if self.avt_db is None: return None
+        if self.avt_db is None:
+            return None
 
         username = ensure_bin_str(username)
         filename = md5(username)

+ 1 - 1
wechat/msg.py

@@ -108,7 +108,7 @@ class WeChatMsg(object):
     def __repr__(self):
         ret = u"{}|{}:{}:{}".format(
             self.type,
-            self.talker if not self.isSend else 'me',
+            self.talker_nickname if not self.isSend else 'me',
             self.createTime,
             ensure_unicode(self.msg_str())).encode('utf-8')
         if self.imgPath:

+ 27 - 6
wechat/parser.py

@@ -31,7 +31,8 @@ class WeChatDBParser(object):
         self.db_fname = db_fname
         self.db_conn = sqlite3.connect(self.db_fname)
         self.cc = self.db_conn.cursor()
-        self.contacts = {}
+        self.contacts = {}      # username -> nickname
+        self.contacts_rev = defaultdict(list)
         self.msgs_by_chat = defaultdict(list)
         self.emoji_groups = {}
         self.emoji_url = {}
@@ -50,7 +51,8 @@ SELECT username,conRemark,nickname FROM rcontact
             else:
                 self.contacts[username] = ensure_unicode(nickname)
 
-        self.contacts_rev = {v: k for k, v in self.contacts.iteritems()}
+        for k, v in self.contacts.iteritems():
+            self.contacts_rev[v].append(k)
         logger.info("Found {} names in `contact` table.".format(len(self.contacts)))
 
     def _parse_msg(self):
@@ -133,7 +135,7 @@ SELECT {} FROM message
         values['chat'] = values['talker']
         try:
             if values['chat'].endswith('@chatroom'):
-                values['chat'] = self.contacts[values['chat']]
+                values['chat_nickname'] = self.contacts[values['chat']]
                 content = values['content']
 
                 if values['isSend'] == 1:
@@ -142,15 +144,34 @@ SELECT {} FROM message
                     values['talker'] = u'SYSTEM'
                 else:
                     talker = content[:content.find(':')]
-                    values['talker'] = self.contacts.get(talker, talker)
+                    values['talker'] = talker
+                    values['talker_nickname'] = self.contacts.get(talker, talker)
 
                 values['content'] = content[content.find('\n') + 1:]
             else:
                 tk_id = values['talker']
-                values['chat'] = self.contacts[tk_id]
-                values['talker'] = self.contacts[tk_id]
+                values['chat'] = tk_id
+                values['chat_nickname'] = self.contacts[tk_id]
+                values['talker'] = tk_id
+                values['talker_nickname'] = self.contacts[tk_id]
         except KeyError:
             # It's possible that messages are kept in database after contacts been deleted
             logger.warn("Unknown contact: {}".format(values.get('talker', '')))
             return None
         return values
+
+    @property
+    def all_chat_ids(self):
+        return self.msgs_by_chat.keys()
+
+    @property
+    def all_chat_nicknames(self):
+        return [self.contacts[k] for k in self.all_chat_ids]
+
+    def get_id_by_nickname(self, nickname):
+        l = self.contacts_rev[nickname]
+        if len(l) == 0:
+            raise KeyError("No contacts have nickname {}".format(nickname))
+        if len(l) > 1:
+            logger.warn("More than one contacts have nickname {}! Using the first contact".format(nickname))
+        return l[0]

+ 5 - 7
wechat/render.py

@@ -167,7 +167,7 @@ class HTMLRender(object):
         # string operation is extremely slow
         return self.html.format(extra_css=self.all_css,
                             extra_js=self.all_js,
-                            chat=msgs[0].chat,
+                            chat=msgs[0].chat_nickname,
                             messages=u''.join(blocks)
                            )
 
@@ -177,23 +177,21 @@ class HTMLRender(object):
         css = avatar_tpl.format(name='me', avatar=my_avatar)
 
         for talker in talkers:
-            avatar = self.res.get_contact_avatar(talker)
+            avatar = self.res.get_avatar(talker)
             css += avatar_tpl.format(name=talker, avatar=avatar)
         self.css_string.append(css)
 
     def render_msgs(self, msgs):
         """ render msgs of one chat, return a list of html"""
-        chat = msgs[0].chat
         if msgs[0].is_chatroom():
-            talkers = set()
-            for msg in msgs:
-                talkers.add(msg.talker)
+            talkers = set([m.talker for m in msgs])
         else:
-            talkers = set([chat])
+            talkers = set([msgs[0].talker])
         self.prepare_avatar_css(talkers)
 
         self.res.cache_voice_mp3(msgs)
 
+        chat = msgs[0].chat_nickname
         logger.info(u"Rendering {} messages of {}".format(
             len(msgs), chat))
 

+ 0 - 3
wechat/res.py

@@ -130,9 +130,6 @@ class Resource(object):
         jpeg_str = buf.getvalue()
         return base64.b64encode(jpeg_str)
 
-    def get_contact_avatar(self, nickname):
-        return self.get_avatar(self.parser.contacts_rev[nickname])
-
     def _get_img_file(self, fnames):
         """ fnames: a list of filename to search for
             return (filename, filename) of (big, small) image.