|
|
@@ -20,6 +20,12 @@ import os
|
|
|
import math
|
|
|
import argparse
|
|
|
|
|
|
+def compat_splitting(line):
|
|
|
+ if sys.version > "3":
|
|
|
+ return line.split()
|
|
|
+ else: # if version is 2
|
|
|
+ return line.decode('utf8').split()
|
|
|
+
|
|
|
def similarity(v1, v2):
|
|
|
n1 = np.linalg.norm(v1)
|
|
|
n2 = np.linalg.norm(v2)
|
|
|
@@ -34,7 +40,7 @@ vectors = {}
|
|
|
fin = open(args.modelPath, 'r')
|
|
|
for i, line in enumerate(fin):
|
|
|
try:
|
|
|
- tab = line.decode('utf8').split()
|
|
|
+ tab = compat_splitting(line)
|
|
|
vec = np.array(tab[1:], dtype=float)
|
|
|
word = tab[0]
|
|
|
if not word in vectors:
|
|
|
@@ -52,7 +58,7 @@ nwords = 0.0
|
|
|
|
|
|
fin = open(args.dataPath, 'r')
|
|
|
for line in fin:
|
|
|
- tline = line.decode('utf8').split()
|
|
|
+ tline = compat_splitting(line)
|
|
|
word1 = tline[0].lower()
|
|
|
word2 = tline[1].lower()
|
|
|
nwords = nwords + 1.0
|