hace 8 años · 520b59d722
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -0,0 +1,63 @@
 
															+#
														
 
															+# Copyright (c) 2016-present, Facebook, Inc.
														
 
															+# All rights reserved.
														
 
															+#
														
 
															+# This source code is licensed under the BSD-style license found in the
														
 
															+# LICENSE file in the root directory of this source tree. An additional grant
														
 
															+# of patent rights can be found in the PATENTS file in the same directory.
														
 
															+#
														
 
															+
														
 
															+cmake_minimum_required(VERSION 3.5.2)
														
 
															+project(fasttext)
														
 
															+
														
 
															+# The version number.
														
 
															+set (fasttext_VERSION_MAJOR 0)
														
 
															+set (fasttext_VERSION_MINOR 1)
														
 
															+
														
 
															+include_directories(fasttext)
														
 
															+
														
 
															+set(CMAKE_CXX_FLAGS " -pthread -std=c++11 -funroll-loops -O3")
														
 
															+
														
 
															+set(HEADER_FILES
														
 
															+    src/args.h
														
 
															+    src/dictionary.h
														
 
															+    src/fasttext.h
														
 
															+    src/matrix.h
														
 
															+    src/model.h
														
 
															+    src/productquantizer.h
														
 
															+    src/qmatrix.h
														
 
															+    src/real.h
														
 
															+    src/utils.h
														
 
															+    src/vector.h)
														
 
															+
														
 
															+set(SOURCE_FILES
														
 
															+    src/args.cc
														
 
															+    src/dictionary.cc
														
 
															+    src/fasttext.cc
														
 
															+    src/main.cc
														
 
															+    src/matrix.cc
														
 
															+    src/model.cc
														
 
															+    src/productquantizer.cc
														
 
															+    src/qmatrix.cc
														
 
															+    src/utils.cc
														
 
															+    src/vector.cc)
														
 
															+
														
 
															+add_library(fasttext-shared SHARED ${SOURCE_FILES} ${HEADER_FILES})
														
 
															+add_library(fasttext-static STATIC ${SOURCE_FILES} ${HEADER_FILES})
														
 
															+add_library(fasttext-static_pic STATIC ${SOURCE_FILES} ${HEADER_FILES})
														
 
															+set_target_properties(fasttext-shared PROPERTIES OUTPUT_NAME fasttext)
														
 
															+set_target_properties(fasttext-static PROPERTIES OUTPUT_NAME fasttext)
														
 
															+set_target_properties(fasttext-static_pic PROPERTIES OUTPUT_NAME fasttext_pic
														
 
															+  POSITION_INDEPENDENT_CODE True)
														
 
															+add_executable(fasttext-bin src/main.cc)
														
 
															+target_link_libraries(fasttext-bin pthread fasttext-static)
														
 
															+set_target_properties(fasttext-bin PROPERTIES PUBLIC_HEADER "${HEADER_FILES}" OUTPUT_NAME fasttext)
														
 
															+install (TARGETS fasttext-shared
														
 
															+    LIBRARY DESTINATION lib)
														
 
															+install (TARGETS fasttext-static
														
 
															+    ARCHIVE DESTINATION lib)
														
 
															+install (TARGETS fasttext-static_pic
														
 
															+    ARCHIVE DESTINATION lib)
														
 
															+install (TARGETS fasttext-bin
														
 
															+    RUNTIME DESTINATION bin
														
 
															+ PUBLIC_HEADER DESTINATION include/fasttext)
														
--- a/README.md
+++ b/README.md
@@ -14,9 +14,11 @@ We also provide a [cheatsheet](https://fasttext.cc/docs/en/cheatsheet.html#conte
 
															 Since it uses C++11 features, it requires a compiler with good C++11 support.
														
 
															 These include :
														
 
															-* (gcc-4.6.3 or newer) or (clang-3.3 or newer)
														
 
															+* (gcc-4.8.5 or newer) or (clang-3.3 or newer)
														
 
															 Compilation is carried out using a Makefile, so you will need to have a working **make**.
														
 
															+If you want to use **cmake** you need at least version 3.5.2.
														
 
															+
														
 
															 For the word-similarity evaluation script you will need:
														
 
															 * python 2.6 or newer
														
@@ -28,6 +30,8 @@ For the python bindings (see the subdirectory python) you will need:
 
															 * numpy & scipy
														
 
															 * [pybind11](https://github.com/pybind/pybind11)
														
 
															+If these requirements make it impossible for you to use fastText, please open an issue and we will try to accommodate you.
														
 
															+
														
 
															 ## Building fastText
														
 
															 In order to build `fastText`, use the following:
														
@@ -41,6 +45,19 @@ $ make
 
															 This will produce object files for all the classes as well as the main binary `fasttext`.
														
 
															 If you do not plan on using the default system-wide compiler, update the two macros defined at the beginning of the Makefile (CC and INCLUDES).
														
 
															+## Building fastText using cmake
														
 
															+
														
 
															+You can also use cmake to build fastText
														
 
															+
														
 
															+```
														
 
															+$ git clone https://github.com/facebookresearch/fastText.git
														
 
															+$ cd fastText
														
 
															+$ mkdir build && cd build && cmake ..
														
 
															+$ make && make install
														
 
															+```
														
 
															+
														
 
															+This will create the fasttext binary and also all relevant libraries (shared, static, PIC).
														
 
															+
														
 
															 ## Building fastText for Python
														
 
															 Alternatively you can also use the Python bindings.
														
--- a/eval.py
+++ b/eval.py
@@ -15,27 +15,43 @@ from __future__ import print_function
 
															 from __future__ import unicode_literals
														
 
															 import numpy as np
														
 
															 from scipy import stats
														
 
															-import sys
														
 
															 import os
														
 
															 import math
														
 
															 import argparse
														
 
															+
														
 
															 def compat_splitting(line):
														
 
															     return line.decode('utf8').split()
														
 
															+
														
 
															 def similarity(v1, v2):
														
 
															     n1 = np.linalg.norm(v1)
														
 
															     n2 = np.linalg.norm(v2)
														
 
															     return np.dot(v1, v2) / n1 / n2
														
 
															+
														
 
															 parser = argparse.ArgumentParser(description='Process some integers.')
														
 
															-parser.add_argument('--model', '-m', dest='modelPath', action='store', required=True, help='path to model')
														
 
															-parser.add_argument('--data', '-d', dest='dataPath', action='store', required=True, help='path to data')
														
 
															+parser.add_argument(
														
 
															+    '--model',
														
 
															+    '-m',
														
 
															+    dest='modelPath',
														
 
															+    action='store',
														
 
															+    required=True,
														
 
															+    help='path to model'
														
 
															+)
														
 
															+parser.add_argument(
														
 
															+    '--data',
														
 
															+    '-d',
														
 
															+    dest='dataPath',
														
 
															+    action='store',
														
 
															+    required=True,
														
 
															+    help='path to data'
														
 
															+)
														
 
															 args = parser.parse_args()
														
 
															 vectors = {}
														
 
															 fin = open(args.modelPath, 'rb')
														
 
															-for i, line in enumerate(fin):
														
 
															+for _, line in enumerate(fin):
														
 
															     try:
														
 
															         tab = compat_splitting(line)
														
 
															         vec = np.array(tab[1:], dtype=float)
														
@@ -74,5 +90,7 @@ fin.close()
 
															 corr = stats.spearmanr(mysim, gold)
														
 
															 dataset = os.path.basename(args.dataPath)
														
 
															-print("{0:20s}: {1:2.0f}  (OOV: {2:2.0f}%)"
														
 
															-      .format(dataset, corr[0] * 100, math.ceil(drop / nwords * 100.0)))
														
 
															+print(
														
 
															+    "{0:20s}: {1:2.0f}  (OOV: {2:2.0f}%)"
														
 
															+    .format(dataset, corr[0] * 100, math.ceil(drop / nwords * 100.0))
														
 
															+)
														
--- a/tutorials/supervised-learning.md
+++ b/tutorials/supervised-learning.md
@@ -32,13 +32,18 @@ usage: fasttext <command> <args>
 
															 The commands supported by fasttext are:
														
 
															-supervised     train a supervised classifier
														
 
															-test           evaluate a supervised classifier
														
 
															-predict        predict most likely labels
														
 
															-predict-prob   predict most likely labels with probabilities
														
 
															-skipgram       train a skipgram model
														
 
															-cbow           train a cbow model
														
 
															-print-vectors  print vectors given a trained model
														
 
															+  supervised              train a supervised classifier
														
 
															+  quantize                quantize a model to reduce the memory usage
														
 
															+  test                    evaluate a supervised classifier
														
 
															+  predict                 predict most likely labels
														
 
															+  predict-prob            predict most likely labels with probabilities
														
 
															+  skipgram                train a skipgram model
														
 
															+  cbow                    train a cbow model
														
 
															+  print-word-vectors      print word vectors given a trained model
														
 
															+  print-sentence-vectors  print sentence vectors given a trained model
														
 
															+  nn                      query for nearest neighbors
														
 
															+  analogies               query for analogies
														
 
															+
														
 
															 ```
														
 
															 In this tutorial, we mainly use the `supervised`, `test` and `predict` subcommands, which corresponds to learning (and using) text classifier. For an introduction to the other functionalities of fastText, please see the [tutorial about learning word vectors](https://github.com/facebookresearch/fastText/blob/master/tutorials/unsupervised-learning.md).
														
--- a/tutorials/unsupervised-learning.md
+++ b/tutorials/unsupervised-learning.md
@@ -29,7 +29,7 @@ $ perl wikifil.pl data/enwik9 > data/fil9
 
															 We can check the file by running the following command:
														
 
															 ```
														
 
															-$ head -c 80 data/text9
														
 
															+$ head -c 80 data/fil9
														
 
															 anarchism originated as a term of abuse first used against early working class
														
 
															 ```