bin_to_vec.py 1.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041
  1. #!/usr/bin/env python
  2. # Copyright (c) 2017-present, Facebook, Inc.
  3. # All rights reserved.
  4. #
  5. # This source code is licensed under the MIT license found in the
  6. # LICENSE file in the root directory of this source tree.
  7. from __future__ import absolute_import
  8. from __future__ import division
  9. from __future__ import print_function
  10. from __future__ import unicode_literals
  11. from __future__ import division, absolute_import, print_function
  12. from fasttext import load_model
  13. import argparse
  14. import errno
  15. if __name__ == "__main__":
  16. parser = argparse.ArgumentParser(
  17. description=("Print fasttext .vec file to stdout from .bin file")
  18. )
  19. parser.add_argument(
  20. "model",
  21. help="Model to use",
  22. )
  23. args = parser.parse_args()
  24. f = load_model(args.model)
  25. words = f.get_words()
  26. print(str(len(words)) + " " + str(f.get_dimension()))
  27. for w in words:
  28. v = f.get_word_vector(w)
  29. vstr = ""
  30. for vi in v:
  31. vstr += " " + str(vi)
  32. try:
  33. print(w + vstr)
  34. except IOError as e:
  35. if e.errno == errno.EPIPE:
  36. pass