před 8 roky · 520b59d722
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -0,0 +1,63 @@
 
				+#
			
 
				+# Copyright (c) 2016-present, Facebook, Inc.
			
 
				+# All rights reserved.
			
 
				+#
			
 
				+# This source code is licensed under the BSD-style license found in the
			
 
				+# LICENSE file in the root directory of this source tree. An additional grant
			
 
				+# of patent rights can be found in the PATENTS file in the same directory.
			
 
				+#
			
 
				+
			
 
				+cmake_minimum_required(VERSION 3.5.2)
			
 
				+project(fasttext)
			
 
				+
			
 
				+# The version number.
			
 
				+set (fasttext_VERSION_MAJOR 0)
			
 
				+set (fasttext_VERSION_MINOR 1)
			
 
				+
			
 
				+include_directories(fasttext)
			
 
				+
			
 
				+set(CMAKE_CXX_FLAGS " -pthread -std=c++11 -funroll-loops -O3")
			
 
				+
			
 
				+set(HEADER_FILES
			
 
				+    src/args.h
			
 
				+    src/dictionary.h
			
 
				+    src/fasttext.h
			
 
				+    src/matrix.h
			
 
				+    src/model.h
			
 
				+    src/productquantizer.h
			
 
				+    src/qmatrix.h
			
 
				+    src/real.h
			
 
				+    src/utils.h
			
 
				+    src/vector.h)
			
 
				+
			
 
				+set(SOURCE_FILES
			
 
				+    src/args.cc
			
 
				+    src/dictionary.cc
			
 
				+    src/fasttext.cc
			
 
				+    src/main.cc
			
 
				+    src/matrix.cc
			
 
				+    src/model.cc
			
 
				+    src/productquantizer.cc
			
 
				+    src/qmatrix.cc
			
 
				+    src/utils.cc
			
 
				+    src/vector.cc)
			
 
				+
			
 
				+add_library(fasttext-shared SHARED ${SOURCE_FILES} ${HEADER_FILES})
			
 
				+add_library(fasttext-static STATIC ${SOURCE_FILES} ${HEADER_FILES})
			
 
				+add_library(fasttext-static_pic STATIC ${SOURCE_FILES} ${HEADER_FILES})
			
 
				+set_target_properties(fasttext-shared PROPERTIES OUTPUT_NAME fasttext)
			
 
				+set_target_properties(fasttext-static PROPERTIES OUTPUT_NAME fasttext)
			
 
				+set_target_properties(fasttext-static_pic PROPERTIES OUTPUT_NAME fasttext_pic
			
 
				+  POSITION_INDEPENDENT_CODE True)
			
 
				+add_executable(fasttext-bin src/main.cc)
			
 
				+target_link_libraries(fasttext-bin pthread fasttext-static)
			
 
				+set_target_properties(fasttext-bin PROPERTIES PUBLIC_HEADER "${HEADER_FILES}" OUTPUT_NAME fasttext)
			
 
				+install (TARGETS fasttext-shared
			
 
				+    LIBRARY DESTINATION lib)
			
 
				+install (TARGETS fasttext-static
			
 
				+    ARCHIVE DESTINATION lib)
			
 
				+install (TARGETS fasttext-static_pic
			
 
				+    ARCHIVE DESTINATION lib)
			
 
				+install (TARGETS fasttext-bin
			
 
				+    RUNTIME DESTINATION bin
			
 
				+ PUBLIC_HEADER DESTINATION include/fasttext)
			
--- a/README.md
+++ b/README.md
@@ -14,9 +14,11 @@ We also provide a [cheatsheet](https://fasttext.cc/docs/en/cheatsheet.html#conte
 
				 Since it uses C++11 features, it requires a compiler with good C++11 support.
			
 
				 These include :
			
 
				 
			
 
				-* (gcc-4.6.3 or newer) or (clang-3.3 or newer)
			
 
				+* (gcc-4.8.5 or newer) or (clang-3.3 or newer)
			
 
				 
			
 
				 Compilation is carried out using a Makefile, so you will need to have a working **make**.
			
 
				+If you want to use **cmake** you need at least version 3.5.2.
			
 
				+
			
 
				 For the word-similarity evaluation script you will need:
			
 
				 
			
 
				 * python 2.6 or newer
			
@@ -28,6 +30,8 @@ For the python bindings (see the subdirectory python) you will need:
 
				 * numpy & scipy
			
 
				 * [pybind11](https://github.com/pybind/pybind11)
			
 
				 
			
 
				+If these requirements make it impossible for you to use fastText, please open an issue and we will try to accommodate you.
			
 
				+
			
 
				 ## Building fastText
			
 
				 
			
 
				 In order to build `fastText`, use the following:
			
@@ -41,6 +45,19 @@ $ make
 
				 This will produce object files for all the classes as well as the main binary `fasttext`.
			
 
				 If you do not plan on using the default system-wide compiler, update the two macros defined at the beginning of the Makefile (CC and INCLUDES).
			
 
				 
			
 
				+## Building fastText using cmake
			
 
				+
			
 
				+You can also use cmake to build fastText
			
 
				+
			
 
				+```
			
 
				+$ git clone https://github.com/facebookresearch/fastText.git
			
 
				+$ cd fastText
			
 
				+$ mkdir build && cd build && cmake ..
			
 
				+$ make && make install
			
 
				+```
			
 
				+
			
 
				+This will create the fasttext binary and also all relevant libraries (shared, static, PIC).
			
 
				+
			
 
				 ## Building fastText for Python
			
 
				 
			
 
				 Alternatively you can also use the Python bindings.
			
--- a/eval.py
+++ b/eval.py
@@ -15,27 +15,43 @@ from __future__ import print_function
 
				 from __future__ import unicode_literals
			
 
				 import numpy as np
			
 
				 from scipy import stats
			
 
				-import sys
			
 
				 import os
			
 
				 import math
			
 
				 import argparse
			
 
				 
			
 
				+
			
 
				 def compat_splitting(line):
			
 
				     return line.decode('utf8').split()
			
 
				 
			
 
				+
			
 
				 def similarity(v1, v2):
			
 
				     n1 = np.linalg.norm(v1)
			
 
				     n2 = np.linalg.norm(v2)
			
 
				     return np.dot(v1, v2) / n1 / n2
			
 
				 
			
 
				+
			
 
				 parser = argparse.ArgumentParser(description='Process some integers.')
			
 
				-parser.add_argument('--model', '-m', dest='modelPath', action='store', required=True, help='path to model')
			
 
				-parser.add_argument('--data', '-d', dest='dataPath', action='store', required=True, help='path to data')
			
 
				+parser.add_argument(
			
 
				+    '--model',
			
 
				+    '-m',
			
 
				+    dest='modelPath',
			
 
				+    action='store',
			
 
				+    required=True,
			
 
				+    help='path to model'
			
 
				+)
			
 
				+parser.add_argument(
			
 
				+    '--data',
			
 
				+    '-d',
			
 
				+    dest='dataPath',
			
 
				+    action='store',
			
 
				+    required=True,
			
 
				+    help='path to data'
			
 
				+)
			
 
				 args = parser.parse_args()
			
 
				 
			
 
				 vectors = {}
			
 
				 fin = open(args.modelPath, 'rb')
			
 
				-for i, line in enumerate(fin):
			
 
				+for _, line in enumerate(fin):
			
 
				     try:
			
 
				         tab = compat_splitting(line)
			
 
				         vec = np.array(tab[1:], dtype=float)
			
@@ -74,5 +90,7 @@ fin.close()
 
				 
			
 
				 corr = stats.spearmanr(mysim, gold)
			
 
				 dataset = os.path.basename(args.dataPath)
			
 
				-print("{0:20s}: {1:2.0f}  (OOV: {2:2.0f}%)"
			
 
				-      .format(dataset, corr[0] * 100, math.ceil(drop / nwords * 100.0)))
			
 
				+print(
			
 
				+    "{0:20s}: {1:2.0f}  (OOV: {2:2.0f}%)"
			
 
				+    .format(dataset, corr[0] * 100, math.ceil(drop / nwords * 100.0))
			
 
				+)
			
--- a/tutorials/supervised-learning.md
+++ b/tutorials/supervised-learning.md
@@ -32,13 +32,18 @@ usage: fasttext <command> <args>
 
				 
			
 
				 The commands supported by fasttext are:
			
 
				 
			
 
				-supervised     train a supervised classifier
			
 
				-test           evaluate a supervised classifier
			
 
				-predict        predict most likely labels
			
 
				-predict-prob   predict most likely labels with probabilities
			
 
				-skipgram       train a skipgram model
			
 
				-cbow           train a cbow model
			
 
				-print-vectors  print vectors given a trained model
			
 
				+  supervised              train a supervised classifier
			
 
				+  quantize                quantize a model to reduce the memory usage
			
 
				+  test                    evaluate a supervised classifier
			
 
				+  predict                 predict most likely labels
			
 
				+  predict-prob            predict most likely labels with probabilities
			
 
				+  skipgram                train a skipgram model
			
 
				+  cbow                    train a cbow model
			
 
				+  print-word-vectors      print word vectors given a trained model
			
 
				+  print-sentence-vectors  print sentence vectors given a trained model
			
 
				+  nn                      query for nearest neighbors
			
 
				+  analogies               query for analogies
			
 
				+
			
 
				 ```
			
 
				 
			
 
				 In this tutorial, we mainly use the `supervised`, `test` and `predict` subcommands, which corresponds to learning (and using) text classifier. For an introduction to the other functionalities of fastText, please see the [tutorial about learning word vectors](https://github.com/facebookresearch/fastText/blob/master/tutorials/unsupervised-learning.md).
			
--- a/tutorials/unsupervised-learning.md
+++ b/tutorials/unsupervised-learning.md
@@ -29,7 +29,7 @@ $ perl wikifil.pl data/enwik9 > data/fil9
 
				 We can check the file by running the following command:
			
 
				 
			
 
				 ```
			
 
				-$ head -c 80 data/text9
			
 
				+$ head -c 80 data/fil9
			
 
				 anarchism originated as a term of abuse first used against early working class
			
 
				 ```