Procházet zdrojové kódy

CMakeLists.txt / eval.py lint / pull requests 236, 347, 371

Summary: See title.

Reviewed By: rhysparry

Differential Revision: D6325293

fbshipit-source-id: 1bf7a6a8c1628fac9434a2210ff59ba02eac5897
Christian Puhrsch před 8 roky
rodič
revize
520b59d722
5 změnil soubory, kde provedl 118 přidání a 15 odebrání
  1. 63 0
      CMakeLists.txt
  2. 18 1
      README.md
  3. 24 6
      eval.py
  4. 12 7
      tutorials/supervised-learning.md
  5. 1 1
      tutorials/unsupervised-learning.md

+ 63 - 0
CMakeLists.txt

@@ -0,0 +1,63 @@
+#
+# Copyright (c) 2016-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree. An additional grant
+# of patent rights can be found in the PATENTS file in the same directory.
+#
+
+cmake_minimum_required(VERSION 3.5.2)
+project(fasttext)
+
+# The version number.
+set (fasttext_VERSION_MAJOR 0)
+set (fasttext_VERSION_MINOR 1)
+
+include_directories(fasttext)
+
+set(CMAKE_CXX_FLAGS " -pthread -std=c++11 -funroll-loops -O3")
+
+set(HEADER_FILES
+    src/args.h
+    src/dictionary.h
+    src/fasttext.h
+    src/matrix.h
+    src/model.h
+    src/productquantizer.h
+    src/qmatrix.h
+    src/real.h
+    src/utils.h
+    src/vector.h)
+
+set(SOURCE_FILES
+    src/args.cc
+    src/dictionary.cc
+    src/fasttext.cc
+    src/main.cc
+    src/matrix.cc
+    src/model.cc
+    src/productquantizer.cc
+    src/qmatrix.cc
+    src/utils.cc
+    src/vector.cc)
+
+add_library(fasttext-shared SHARED ${SOURCE_FILES} ${HEADER_FILES})
+add_library(fasttext-static STATIC ${SOURCE_FILES} ${HEADER_FILES})
+add_library(fasttext-static_pic STATIC ${SOURCE_FILES} ${HEADER_FILES})
+set_target_properties(fasttext-shared PROPERTIES OUTPUT_NAME fasttext)
+set_target_properties(fasttext-static PROPERTIES OUTPUT_NAME fasttext)
+set_target_properties(fasttext-static_pic PROPERTIES OUTPUT_NAME fasttext_pic
+  POSITION_INDEPENDENT_CODE True)
+add_executable(fasttext-bin src/main.cc)
+target_link_libraries(fasttext-bin pthread fasttext-static)
+set_target_properties(fasttext-bin PROPERTIES PUBLIC_HEADER "${HEADER_FILES}" OUTPUT_NAME fasttext)
+install (TARGETS fasttext-shared
+    LIBRARY DESTINATION lib)
+install (TARGETS fasttext-static
+    ARCHIVE DESTINATION lib)
+install (TARGETS fasttext-static_pic
+    ARCHIVE DESTINATION lib)
+install (TARGETS fasttext-bin
+    RUNTIME DESTINATION bin
+ PUBLIC_HEADER DESTINATION include/fasttext)

+ 18 - 1
README.md

@@ -14,9 +14,11 @@ We also provide a [cheatsheet](https://fasttext.cc/docs/en/cheatsheet.html#conte
 Since it uses C++11 features, it requires a compiler with good C++11 support.
 These include :
 
-* (gcc-4.6.3 or newer) or (clang-3.3 or newer)
+* (gcc-4.8.5 or newer) or (clang-3.3 or newer)
 
 Compilation is carried out using a Makefile, so you will need to have a working **make**.
+If you want to use **cmake** you need at least version 3.5.2.
+
 For the word-similarity evaluation script you will need:
 
 * python 2.6 or newer
@@ -28,6 +30,8 @@ For the python bindings (see the subdirectory python) you will need:
 * numpy & scipy
 * [pybind11](https://github.com/pybind/pybind11)
 
+If these requirements make it impossible for you to use fastText, please open an issue and we will try to accommodate you.
+
 ## Building fastText
 
 In order to build `fastText`, use the following:
@@ -41,6 +45,19 @@ $ make
 This will produce object files for all the classes as well as the main binary `fasttext`.
 If you do not plan on using the default system-wide compiler, update the two macros defined at the beginning of the Makefile (CC and INCLUDES).
 
+## Building fastText using cmake
+
+You can also use cmake to build fastText
+
+```
+$ git clone https://github.com/facebookresearch/fastText.git
+$ cd fastText
+$ mkdir build && cd build && cmake ..
+$ make && make install
+```
+
+This will create the fasttext binary and also all relevant libraries (shared, static, PIC).
+
 ## Building fastText for Python
 
 Alternatively you can also use the Python bindings.

+ 24 - 6
eval.py

@@ -15,27 +15,43 @@ from __future__ import print_function
 from __future__ import unicode_literals
 import numpy as np
 from scipy import stats
-import sys
 import os
 import math
 import argparse
 
+
 def compat_splitting(line):
     return line.decode('utf8').split()
 
+
 def similarity(v1, v2):
     n1 = np.linalg.norm(v1)
     n2 = np.linalg.norm(v2)
     return np.dot(v1, v2) / n1 / n2
 
+
 parser = argparse.ArgumentParser(description='Process some integers.')
-parser.add_argument('--model', '-m', dest='modelPath', action='store', required=True, help='path to model')
-parser.add_argument('--data', '-d', dest='dataPath', action='store', required=True, help='path to data')
+parser.add_argument(
+    '--model',
+    '-m',
+    dest='modelPath',
+    action='store',
+    required=True,
+    help='path to model'
+)
+parser.add_argument(
+    '--data',
+    '-d',
+    dest='dataPath',
+    action='store',
+    required=True,
+    help='path to data'
+)
 args = parser.parse_args()
 
 vectors = {}
 fin = open(args.modelPath, 'rb')
-for i, line in enumerate(fin):
+for _, line in enumerate(fin):
     try:
         tab = compat_splitting(line)
         vec = np.array(tab[1:], dtype=float)
@@ -74,5 +90,7 @@ fin.close()
 
 corr = stats.spearmanr(mysim, gold)
 dataset = os.path.basename(args.dataPath)
-print("{0:20s}: {1:2.0f}  (OOV: {2:2.0f}%)"
-      .format(dataset, corr[0] * 100, math.ceil(drop / nwords * 100.0)))
+print(
+    "{0:20s}: {1:2.0f}  (OOV: {2:2.0f}%)"
+    .format(dataset, corr[0] * 100, math.ceil(drop / nwords * 100.0))
+)

+ 12 - 7
tutorials/supervised-learning.md

@@ -32,13 +32,18 @@ usage: fasttext <command> <args>
 
 The commands supported by fasttext are:
 
-supervised     train a supervised classifier
-test           evaluate a supervised classifier
-predict        predict most likely labels
-predict-prob   predict most likely labels with probabilities
-skipgram       train a skipgram model
-cbow           train a cbow model
-print-vectors  print vectors given a trained model
+  supervised              train a supervised classifier
+  quantize                quantize a model to reduce the memory usage
+  test                    evaluate a supervised classifier
+  predict                 predict most likely labels
+  predict-prob            predict most likely labels with probabilities
+  skipgram                train a skipgram model
+  cbow                    train a cbow model
+  print-word-vectors      print word vectors given a trained model
+  print-sentence-vectors  print sentence vectors given a trained model
+  nn                      query for nearest neighbors
+  analogies               query for analogies
+
 ```
 
 In this tutorial, we mainly use the `supervised`, `test` and `predict` subcommands, which corresponds to learning (and using) text classifier. For an introduction to the other functionalities of fastText, please see the [tutorial about learning word vectors](https://github.com/facebookresearch/fastText/blob/master/tutorials/unsupervised-learning.md).

+ 1 - 1
tutorials/unsupervised-learning.md

@@ -29,7 +29,7 @@ $ perl wikifil.pl data/enwik9 > data/fil9
 We can check the file by running the following command:
 
 ```
-$ head -c 80 data/text9
+$ head -c 80 data/fil9
 anarchism originated as a term of abuse first used against early working class
 ```