|
@@ -70,9 +70,18 @@ def generate_dataset(messages, output_path_source, output_path_target):
|
|
|
pbar.update()
|
|
pbar.update()
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if __name__ == "__main__":
|
|
|
|
|
+ '''
|
|
|
print('read message')
|
|
print('read message')
|
|
|
msg = read_qq_history_file('data/Octoon 开发组.txt')
|
|
msg = read_qq_history_file('data/Octoon 开发组.txt')
|
|
|
print('filter message')
|
|
print('filter message')
|
|
|
filter_msg(msg)
|
|
filter_msg(msg)
|
|
|
print('write to file')
|
|
print('write to file')
|
|
|
- generate_dataset(msg, 'data/octoon_source.txt', 'data/octoon_target.txt')
|
|
|
|
|
|
|
+ generate_dataset(msg, 'data/octoon_source.txt', 'data/octoon_target.txt')
|
|
|
|
|
+ '''
|
|
|
|
|
+
|
|
|
|
|
+ print('read message')
|
|
|
|
|
+ msg = read_qq_history_file('data/ISOIEC C++ China Unofficial.txt')
|
|
|
|
|
+ print('filter message')
|
|
|
|
|
+ filter_msg(msg)
|
|
|
|
|
+ print('write to file')
|
|
|
|
|
+ generate_dataset(msg, 'data/train_source.txt', 'data/train_target.txt')
|