| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- # Copyright (c) 2017-present, Facebook, Inc.
- # All rights reserved.
- #
- # This source code is licensed under the MIT license found in the
- # LICENSE file in the root directory of this source tree.
- from __future__ import absolute_import
- from __future__ import division
- from __future__ import print_function
- from __future__ import unicode_literals
- from fasttext import load_model
- from fasttext import tokenize
- import sys
- import time
- import tempfile
- import argparse
- def get_word_vector(data, model):
- t1 = time.time()
- print("Reading")
- with open(data, 'r') as f:
- tokens = tokenize(f.read())
- t2 = time.time()
- print("Read TIME: " + str(t2 - t1))
- print("Read NUM : " + str(len(tokens)))
- f = load_model(model)
- # This is not equivalent to piping the data into
- # print-word-vector, because the data is tokenized
- # first.
- t3 = time.time()
- i = 0
- for t in tokens:
- f.get_word_vector(t)
- i += 1
- if i % 10000 == 0:
- sys.stderr.write("\ri: " + str(float(i / len(tokens))))
- sys.stderr.flush()
- t4 = time.time()
- print("\nVectoring: " + str(t4 - t3))
- if __name__ == "__main__":
- parser = argparse.ArgumentParser(
- description='Simple benchmark for get_word_vector.')
- parser.add_argument('model', help='A model file to use for benchmarking.')
- parser.add_argument('data', help='A data file to use for benchmarking.')
- args = parser.parse_args()
- get_word_vector(args.data, args.model)
|