msg.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. # -*- coding: UTF-8 -*-
  2. TYPE_MSG = 1
  3. TYPE_IMG = 3
  4. TYPE_SPEAK = 34
  5. TYPE_NAMECARD = 42
  6. TYPE_VIDEO_FILE = 43
  7. TYPE_EMOJI = 47
  8. TYPE_LOCATION = 48
  9. TYPE_LINK = 49 # link share OR file from web, see https://github.com/ppwwyyxx/wechat-dump/issues/52
  10. TYPE_VOIP = 50
  11. TYPE_WX_VIDEO = 62 # video took by wechat
  12. TYPE_SYSTEM = 10000
  13. TYPE_CUSTOM_EMOJI = 1048625
  14. TYPE_REDENVELOPE = 436207665
  15. TYPE_MONEY_TRANSFER = 419430449 # 微信转账
  16. TYPE_LOCATION_SHARING = -1879048186
  17. TYPE_APP_MSG = 16777265
  18. _KNOWN_TYPES = [eval(k) for k in dir() if k.startswith('TYPE_')]
  19. import re
  20. import io
  21. from pyquery import PyQuery
  22. import xml.etree.ElementTree as ET
  23. import logging
  24. logger = logging.getLogger(__name__)
  25. from common.textutil import ensure_unicode
  26. class WeChatMsg(object):
  27. @staticmethod
  28. def filter_type(tp):
  29. if tp in [TYPE_SYSTEM]:
  30. return True
  31. return False
  32. def __init__(self, values):
  33. for k, v in values.items():
  34. setattr(self, k, v)
  35. if self.type not in _KNOWN_TYPES:
  36. logger.warn("Unhandled message type: {}".format(self.type))
  37. # only to supress repeated warning:
  38. _KNOWN_TYPES.append(self.type)
  39. def msg_str(self):
  40. if self.type == TYPE_LOCATION:
  41. try:
  42. pq = PyQuery(self.content_xml_ready, parser='xml')
  43. loc = pq('location').attr
  44. label = loc['label']
  45. poiname = loc['poiname']
  46. if poiname:
  47. label = poiname
  48. return "LOCATION:" + label + " ({},{})".format(loc['x'], loc['y'])
  49. except:
  50. return "LOCATION: unknown"
  51. elif self.type == TYPE_LINK:
  52. pq = PyQuery(self.content_xml_ready)
  53. url = pq('url').text()
  54. if not url:
  55. # TODO: see https://github.com/ppwwyyxx/wechat-dump/issues/52 for
  56. # more logic to implement
  57. title = pq('title').text()
  58. if title: # may not be correct
  59. return "FILE:{}".format(title)
  60. return "NOT IMPLEMENTED: " + self.content_xml_ready
  61. return "URL:{}".format(url)
  62. elif self.type == TYPE_NAMECARD:
  63. pq = PyQuery(self.content_xml_ready, parser='xml')
  64. msg = pq('msg').attr
  65. name = msg['nickname']
  66. if not name:
  67. name = msg['alias']
  68. if not name:
  69. name = ""
  70. return "NAMECARD: {}".format(self.content_xml_ready)
  71. elif self.type == TYPE_APP_MSG:
  72. pq = PyQuery(self.content_xml_ready, parser='xml')
  73. return pq('title').text()
  74. elif self.type == TYPE_VIDEO_FILE:
  75. return "VIDEO FILE"
  76. elif self.type == TYPE_WX_VIDEO:
  77. return "WeChat VIDEO"
  78. elif self.type == TYPE_VOIP:
  79. return "REQUEST VIDEO CHAT"
  80. elif self.type == TYPE_LOCATION_SHARING:
  81. return "LOCATION SHARING"
  82. elif self.type == TYPE_EMOJI:
  83. # TODO add emoji name
  84. return self.content
  85. elif self.type == TYPE_REDENVELOPE:
  86. data_to_parse = io.BytesIO(self.content.encode('utf-8'))
  87. try:
  88. for event, elem in ET.iterparse(data_to_parse, events=('end',)):
  89. if elem.tag == 'sendertitle':
  90. title = elem.text
  91. return "[RED ENVELOPE]\n{}".format(title)
  92. except:
  93. pass
  94. return "[RED ENVELOPE]"
  95. elif self.type == TYPE_MONEY_TRANSFER:
  96. data_to_parse = io.BytesIO(self.content.encode('utf-8'))
  97. try:
  98. for event, elem in ET.iterparse(data_to_parse, events=('end',)):
  99. if elem.tag == 'des':
  100. title = elem.text
  101. return "[Money Transfer]\n{}".format(title)
  102. except:
  103. pass
  104. return "[Money Transfer]"
  105. else:
  106. # TODO replace smiley with text
  107. return self.content
  108. @property
  109. def content_xml_ready(self):
  110. # remove xml headers to avoid possible errors it may create
  111. header = re.compile(r'<\?.*\?>')
  112. msg = header.sub("", self.content)
  113. return msg
  114. def __repr__(self):
  115. ret = "{}|{}:{}:{}".format(
  116. self.type,
  117. self.talker_nickname if not self.isSend else 'me',
  118. self.createTime,
  119. ensure_unicode(self.msg_str()))
  120. if self.imgPath:
  121. ret = "{}|img:{}".format(ensure_unicode(ret.strip()), self.imgPath)
  122. return ret
  123. else:
  124. return ret
  125. def __lt__(self, r):
  126. return self.createTime < r.createTime
  127. def is_chatroom(self):
  128. return self.talker != self.chat
  129. def get_chatroom(self):
  130. if self.is_chatroom():
  131. return self.chat
  132. else:
  133. return ''
  134. def get_emoji_product_id(self):
  135. assert self.type == TYPE_EMOJI, "Wrong call to get_emoji_product_id()!"
  136. pq = PyQuery(self.content_xml_ready, parser='xml')
  137. emoji = pq('emoji')
  138. if not emoji:
  139. return None
  140. return emoji.attrs['productid']