Yuxin Wu 1 年之前
父節點
當前提交
5320456e80
共有 8 個文件被更改,包括 50 次插入21 次删除
  1. 12 10
      README.md
  2. 2 1
      android-interact.sh
  3. 11 6
      decrypt-db.py
  4. 1 1
      third-party/silk/Makefile
  5. 5 0
      wechat/avatar.py
  6. 6 0
      wechat/msg.py
  7. 4 1
      wechat/parser.py
  8. 9 2
      wechat/res.py

+ 12 - 10
README.md

@@ -8,14 +8,14 @@ We reverse-engineered the storage protocol of WeChat messages, and
 provide this tool to decrypt and parse WeChat messages on a rooted android phone.
 It can also render the messages into self-contained html files including voice messages, images, emojis, videos, etc.
 
+The tool is last verified to work with latest version of wechat on 2025/01/01.
 If the tool works for you, please take a moment to add your phone/OS to [the wiki](https://github.com/ppwwyyxx/wechat-dump/wiki).
 
 ## How to use:
 
 #### Dependencies:
 + adb and rooted android phone connected to a Linux/Mac OSX/Win10+Bash.
-  If the phone does not come with adb support, you can try download an app.
-+ Python >= 3.6
++ Python >= 3.8
 + [sqlcipher](https://github.com/sqlcipher/sqlcipher) >= 4.1
 + sox (command line tools)
 + Silk audio decoder (included; build it with `./third-party/compile_silk.sh`)
@@ -26,7 +26,8 @@ If the tool works for you, please take a moment to add your phone/OS to [the wik
 1. Pull database file and (for older wechat versions) avatar index:
   + Automatic: `./android-interact.sh db`. It may use an incorrect userid.
   + Manual:
-    + Figure out your `${userid}` by inspecting the contents of `/data/data/com.tencent.mm/MicroMsg` on the __root__ filesystem of the device. It should be a 32-character-long name consisting of hexadecimal digits.
+    + Figure out your `${userid}` by inspecting the contents of `/data/data/com.tencent.mm/MicroMsg` on the __root__ filesystem of the device.
+      It should be a 32-character-long name consisting of hexadecimal digits.
     + Get `/data/data/com.tencent.mm/MicroMsg/${userid}/EnMicroMsg.db` from the device.
 2. Decrypt database file:
   + Automatic: `./decrypt-db.py decrypt --input EnMicroMsg.db`
@@ -52,11 +53,12 @@ If the tool works for you, please take a moment to add your phone/OS to [the wik
   If the above decryption doesn't work, you can also try the [password cracker](https://github.com/chg-hou/EnMicroMsg.db-Password-Cracker)
   to brute-force the key. The encryption key is not very strong.
 
-3. Copy the WeChat user resource directory `/mnt/sdcard/tencent/MicroMsg/${userid}/{avatar,emoji,image2,sfs,video,voice2}` from the phone to the `resource` directory:
+3. Copy the WeChat user resource directory `/data/data/com.tencent.mm/MicroMsg/${userid}/{avatar,emoji,image2,sfs,video,voice2}` from the phone to the `resource` directory:
 	+ `./android-interact.sh res`
 	+ Change `RES_DIR` in the script if the location of these directories is different on your phone.
-	+ This can take a while. Can be faster to first archive it with `tar` with or without compression, and then copy the archive,
-		`busybox tar` is recommended as the Android system's `tar` may choke on long paths.
+      For older version of wechat, the directory may be `/mnt/sdcard/tencent/MicroMsg/`
+	+ This can take a while. It can be faster to first archive it with `tar` with or without compression, and then copy the archive,
+  	  `busybox tar` is recommended as the Android system's `tar` may choke on long paths.
 	+ In the end, we need a `resource` directory with the following subdir: `avatar,emoji,image2,sfs,video,voice2`.
 
 4. (Optional) Download the emoji cache from [here](https://github.com/ppwwyyxx/wechat-dump/releases/download/0.1/emoji.cache.tar.bz2)
@@ -101,10 +103,10 @@ Screenshots of generated html:
 
 See [here](http://ppwwyyxx.com/static/wechat/example.html) for an example html.
 
-### TODO List
-+ Fix rare unhandled message types: > 10000 and < 0
-+ Better user experiences... see `grep 'TODO' wechat -R`
-
+### TODO List (help needed!)
+* __IMPORTANT__ Some emojis and chat images are stored in a proprietary "wxgf" format. We don't yet know how to decode this format.
+* Fix rare unhandled message types: > 10000 and < 0
+* Better user experiences... see `grep 'TODO' wechat -R`
 
 ### Donate!
 <a href="https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=7BC299GRDLEDU&lc=US&item_name=wechat%2ddump&item_number=wechat%2ddump&currency_code=USD&bn=PP%2dDonationsBF%3abtn_donate_SM%2egif%3aNonHosted">

+ 2 - 1
android-interact.sh

@@ -6,7 +6,8 @@ PROG_DIR=`dirname "$PROG_NAME"`
 cd "$PROG_DIR"
 
 # Please check that your path is the same, since this might be different among devices
-RES_DIR="/mnt/sdcard/tencent/MicroMsg"
+# RES_DIR="/mnt/sdcard/tencent/MicroMsg"  # old version of wechat use this path.
+RES_DIR="/data/data/com.tencent.mm"
 MM_DIR="/data/data/com.tencent.mm"
 
 echo "Starting rooted adb server..."

+ 11 - 6
decrypt-db.py

@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 
 import os
+import shlex
 import sys
 import re
 import struct
@@ -21,11 +22,15 @@ RES_DIR = "/mnt/sdcard/tencent/MicroMsg"
 MM_DIR = "/data/data/com.tencent.mm"
 
 
+def adb_command(command):
+    return subproc_succ("adb shell su -c " + shlex.quote(command))
+
+
 def get_uin():
     candidates = []
     try:
         uin = None
-        out = subproc_succ(f"adb shell cat {MM_DIR}/shared_prefs/system_config_prefs.xml")
+        out = adb_command(f"cat {MM_DIR}/shared_prefs/system_config_prefs.xml")
         for line in out.decode('utf-8').split("\n"):
             if "default_uin" in line:
                 line = PyQuery(line)
@@ -40,7 +45,7 @@ def get_uin():
 
     try:
         uin = None
-        out = subproc_succ(f"adb shell cat {MM_DIR}/shared_prefs/com.tencent.mm_preferences.xml")
+        out = adb_command(f"cat {MM_DIR}/shared_prefs/com.tencent.mm_preferences.xml")
         for line in out.decode('utf-8').split("\n"):
             if "last_login_uin" in line:
                 line = PyQuery(line)
@@ -55,7 +60,7 @@ def get_uin():
 
     try:
         uin = None
-        out = subproc_succ(f"adb shell cat {MM_DIR}/shared_prefs/auth_info_key_prefs.xml")
+        out = adb_command(f"cat {MM_DIR}/shared_prefs/auth_info_key_prefs.xml")
         for line in out.decode('utf-8').split("\n"):
             if "auth_uin" in line:
                 line = PyQuery(line)
@@ -69,7 +74,7 @@ def get_uin():
         logger.info(f"found uin={uin} in auth_info_key_prefs.xml")
 
     try:
-        out = subproc_succ(f"adb shell cat {MM_DIR}/MicroMsg/systemInfo.cfg")
+        out = adb_command(f"cat {MM_DIR}/MicroMsg/systemInfo.cfg")
         uin = int(javaobj.loads(out).get(1, 0))
     except:
         logger.warning("default uin not found in systemInfo.cfg")
@@ -101,13 +106,13 @@ def get_imei():
         def get_utf16(self, offset=4):
             return (self.data[offset + 4: offset+4+self.get_int(offset) * 2]).decode('utf-16')
 
-    out = subproc_succ("adb shell service call iphonesubinfo 1")
+    out = adb_command(f"service call iphonesubinfo 1")
     imei = Parcel(out.strip()).get_utf16()
     logger.info(f"found imei={imei} from iphonesubinfo")
     candidates.append(imei)
 
     try:
-        out = subproc_succ(f"adb shell cat {MM_DIR}/MicroMsg/CompatibleInfo.cfg")
+        out = adb_command(f"cat {MM_DIR}/MicroMsg/CompatibleInfo.cfg")
         # https://gist.github.com/ChiChou/36556fd412a9e3216abecf06e084e4d9
         jobj = javaobj.loads(out)
         imei = jobj[258]

+ 1 - 1
third-party/silk/Makefile

@@ -47,7 +47,7 @@ ifeq (yes,$(USE_NEON))
 endif
 
 
-CFLAGS	+= -Wall -enable-threads -O3
+CFLAGS	+= -Wall -O3
 
 CFLAGS  += $(call cppflags-from-defines,$(CDEFINES))
 CFLAGS  += $(call cppflags-from-defines,$(ADDED_DEFINES))

+ 5 - 0
wechat/avatar.py

@@ -54,6 +54,11 @@ class AvatarReader(object):
         candidates = glob.glob(os.path.join(self.avt_dir, dir1, dir2, f"*{avtid}*"))
         candidates = sorted(set(candidates), key=_filename_priority, reverse=True)
         for cand in candidates:
+            if os.path.isdir(cand):
+                candidates.extend(os.path.join(cand, x) for x in os.listdir(cand))
+        for cand in candidates:
+            if os.path.isdir(cand):
+                continue
             try:
                 if cand.endswith(".bm"):
                     return self.read_bm_file(cand)

+ 6 - 0
wechat/msg.py

@@ -14,6 +14,7 @@ TYPE_CUSTOM_EMOJI = 1048625
 TYPE_REDENVELOPE = 436207665
 TYPE_MONEY_TRANSFER = 419430449  # 微信转账
 TYPE_LOCATION_SHARING = -1879048186
+TYPE_REPLY = 822083633  # 回复的消息.
 TYPE_APP_MSG = 16777265
 
 _KNOWN_TYPES = [eval(k) for k in dir() if k.startswith('TYPE_')]
@@ -110,6 +111,11 @@ class WeChatMsg(object):
             except:
                 pass
             return "[Money Transfer]"
+        elif self.type == TYPE_REPLY:
+            pq = PyQuery(self.content_xml_ready)
+            msg = pq('title').text()
+            # TODO parse reply.
+            return msg
         else:
             # TODO replace smiley with text
             return self.content

+ 4 - 1
wechat/parser.py

@@ -74,7 +74,10 @@ SELECT {} FROM message
     def _parse_userinfo(self):
         userinfo_q = self.cc.execute(""" SELECT id, value FROM userinfo """)
         userinfo = dict(userinfo_q)
-        self.username = userinfo[2]
+        self.username = userinfo.get(2, None)
+        if self.username is None:
+            logger.error("Cannot find username in userinfo table!")
+            self.username = input("Please enter your username:")
         logger.info("Your username is: {}".format(self.username))
 
     def _parse_imginfo(self):

+ 9 - 2
wechat/res.py

@@ -147,11 +147,18 @@ class Resource(object):
             if not img_file:
                 return None
             if not img_file.endswith('jpg') and \
-               imghdr.what(img_file) != 'jpeg':
-                im = Image.open(open(img_file, 'rb'))
+                   imghdr.what(img_file) != 'jpeg':
+                try:
+                    im = Image.open(open(img_file, 'rb'))
+                except:
+                    return None
                 buf = io.BytesIO()
                 im.convert('RGB').save(buf, 'JPEG', quality=JPEG_QUALITY)
                 return base64.b64encode(buf.getvalue()).decode('ascii')
+            with open(img_file, 'rb') as f:
+                if f.read(4) == b'wxgf':
+                    logger.warning(f"Don't know how to decode wxgf image {img_file}")
+                    return None
             return get_file_b64(img_file)
 
         big_file = get_jpg_b64(big_file)