get_vocab.py 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. #!/usr/bin/env python
  2. # Copyright (c) 2017-present, Facebook, Inc.
  3. # All rights reserved.
  4. #
  5. # This source code is licensed under the MIT license found in the
  6. # LICENSE file in the root directory of this source tree.
  7. from __future__ import absolute_import
  8. from __future__ import division
  9. from __future__ import print_function
  10. from __future__ import unicode_literals
  11. from __future__ import division, absolute_import, print_function
  12. from fasttext import load_model
  13. import argparse
  14. import errno
  15. if __name__ == "__main__":
  16. parser = argparse.ArgumentParser(
  17. description=(
  18. "Print words or labels and frequency of a model's dictionary"
  19. )
  20. )
  21. parser.add_argument(
  22. "model",
  23. help="Model to use",
  24. )
  25. parser.add_argument(
  26. "-l",
  27. "--labels",
  28. help="Print labels instead of words",
  29. action='store_true',
  30. default=False,
  31. )
  32. args = parser.parse_args()
  33. f = load_model(args.model)
  34. if args.labels:
  35. words, freq = f.get_labels(include_freq=True)
  36. else:
  37. words, freq = f.get_words(include_freq=True)
  38. for w, f in zip(words, freq):
  39. try:
  40. print(w + "\t" + str(f))
  41. except IOError as e:
  42. if e.errno == errno.EPIPE:
  43. pass