Explorar el Código

setup.py to support packaging and replace throw with exit

Summary: setup.py in main directory to enable packaging and replacing exit with throw to prevent Python from quitting on fastText library errors.

Reviewed By: ebetica

Differential Revision: D6414897

fbshipit-source-id: 0d614f0dec4b603c4083a9ef8d3bc0a657f1cd99
Christian Puhrsch hace 8 años
padre
commit
a0fa139c06
Se han modificado 6 ficheros con 48 adiciones y 28 borrados
  1. 5 0
      MANIFEST.in
  2. 18 0
      README.md
  3. 0 0
      setup.cfg
  4. 6 5
      setup.py
  5. 2 3
      src/dictionary.cc
  6. 17 20
      src/fasttext.cc

+ 5 - 0
MANIFEST.in

@@ -0,0 +1,5 @@
+include LICENSE
+include PATENTS
+
+recursive-include python *.md *.rst
+recursive-include src *.h

+ 18 - 0
README.md

@@ -22,6 +22,12 @@ For the word-similarity evaluation script you will need:
 * python 2.6 or newer
 * numpy & scipy
 
+For the python bindings (see the subdirectory python) you will need:
+
+* python 2.7 or newer
+* numpy & scipy
+* [pybind11](https://github.com/pybind/pybind11)
+
 ## Building fastText
 
 In order to build `fastText`, use the following:
@@ -35,6 +41,18 @@ $ make
 This will produce object files for all the classes as well as the main binary `fasttext`.
 If you do not plan on using the default system-wide compiler, update the two macros defined at the beginning of the Makefile (CC and INCLUDES).
 
+## Building fastText for Python
+
+Alternatively you can also use the Python bindings.
+
+```
+$ git clone https://github.com/facebookresearch/fastText.git
+$ cd fastText
+$ python setup.py install
+```
+
+For further information and introduction see python/README.md
+
 ## Example use cases
 
 This library has two main use cases: word representation learning and text classification.

+ 0 - 0
python/setup.cfg → setup.cfg


+ 6 - 5
python/setup.py → setup.py

@@ -18,8 +18,8 @@ import sys
 import setuptools
 import os
 
-__version__ = '0.0.1'
-FASTTEXT_SRC = "../src"
+__version__ = '0.0.2'
+FASTTEXT_SRC = "src"
 
 # Based on https://github.com/pybind/python_example
 
@@ -37,7 +37,7 @@ class get_pybind_include(object):
         return pybind11.get_include(self.user)
 
 
-fasttext_src_files = os.listdir(FASTTEXT_SRC)
+fasttext_src_files = map(str, os.listdir(FASTTEXT_SRC))
 fasttext_src_cc = list(filter(lambda x: x.endswith('.cc'), fasttext_src_files))
 
 fasttext_src_cc = list(
@@ -48,7 +48,7 @@ ext_modules = [
     Extension(
         str('fasttext_pybind'),
         [
-            str('fastText/pybind/fasttext_pybind.cc'),
+            str('python/fastText/pybind/fasttext_pybind.cc'),
         ] + fasttext_src_cc,
         include_dirs=[
             # Path to pybind11 headers
@@ -131,8 +131,9 @@ setup(
     ext_modules=ext_modules,
     url='https://github.com/facebookresearch/fastText',
     license='BSD',
-    install_requires=['pybind11>=2.2'],
+    install_requires=['pybind11>=2.2', "setuptools >= 0.7.0"],
     cmdclass={'build_ext': BuildExt},
     packages=[str('fastText')],
+    package_dir={str(''): str('python')},
     zip_safe=False
 )

+ 2 - 3
src/dictionary.cc

@@ -236,9 +236,8 @@ void Dictionary::readFromFile(std::istream& in) {
     std::cerr << "Number of labels: " << nlabels_ << std::endl;
   }
   if (size_ == 0) {
-    std::cerr << "Empty vocabulary. Try a smaller -minCount value."
-              << std::endl;
-    exit(EXIT_FAILURE);
+    throw std::invalid_argument(
+        "Empty vocabulary. Try a smaller -minCount value.");
   }
 }
 

+ 17 - 20
src/fasttext.cc

@@ -86,8 +86,8 @@ void FastText::getSubwordVector(Vector& vec, const std::string& subword)
 void FastText::saveVectors() {
   std::ofstream ofs(args_->output + ".vec");
   if (!ofs.is_open()) {
-    std::cerr << "Error opening file for saving vectors." << std::endl;
-    exit(EXIT_FAILURE);
+    throw std::invalid_argument(
+        args_->output + ".vec" + " cannot be opened for saving vectors!");
   }
   ofs << dict_->nwords() << " " << args_->dim << std::endl;
   Vector vec(args_->dim);
@@ -102,13 +102,12 @@ void FastText::saveVectors() {
 void FastText::saveOutput() {
   std::ofstream ofs(args_->output + ".output");
   if (!ofs.is_open()) {
-    std::cerr << "Error opening file for saving vectors." << std::endl;
-    exit(EXIT_FAILURE);
+    throw std::invalid_argument(
+        args_->output + ".output" + " cannot be opened for saving vectors!");
   }
   if (quant_) {
-    std::cerr << "Option -saveOutput is not supported for quantized models."
-              << std::endl;
-    return;
+    throw std::invalid_argument(
+        "Option -saveOutput is not supported for quantized models.");
   }
   int32_t n = (args_->model == model_name::sup) ? dict_->nlabels()
                                                 : dict_->nwords();
@@ -216,10 +215,10 @@ void FastText::loadModel(std::istream& in) {
   }
 
   if (!quant_input && dict_->isPruned()) {
-    std::cerr << "Invalid model file.\n"
-              << "Please download the updated model from www.fasttext.cc.\n"
-              << "See issue #332 on Github for more information.\n";
-    exit(1);
+    throw std::invalid_argument(
+        "Invalid model file.\n"
+        "Please download the updated model from www.fasttext.cc.\n"
+        "See issue #332 on Github for more information.\n");
   }
 
   in.read((char*) &args_->qout, sizeof(bool));
@@ -601,14 +600,13 @@ void FastText::loadVectors(std::string filename) {
   std::shared_ptr<Matrix> mat; // temp. matrix for pretrained vectors
   int64_t n, dim;
   if (!in.is_open()) {
-    std::cerr << "Pretrained vectors file cannot be opened!" << std::endl;
-    exit(EXIT_FAILURE);
+    throw std::invalid_argument(filename + " cannot be opened for loading!");
   }
   in >> n >> dim;
   if (dim != args_->dim) {
-    std::cerr << "Dimension of pretrained vectors does not match -dim option"
-              << std::endl;
-    exit(EXIT_FAILURE);
+    throw std::invalid_argument(
+        "Dimension of pretrained vectors (" + std::to_string(dim) +
+        ") does not match dimension (" + std::to_string(args_->dim) + ")!");
   }
   mat = std::make_shared<Matrix>(n, dim);
   for (size_t i = 0; i < n; i++) {
@@ -640,13 +638,12 @@ void FastText::train(std::shared_ptr<Args> args) {
   dict_ = std::make_shared<Dictionary>(args_);
   if (args_->input == "-") {
     // manage expectations
-    std::cerr << "Cannot use stdin for training!" << std::endl;
-    exit(EXIT_FAILURE);
+    throw std::invalid_argument("Cannot use stdin for training!");
   }
   std::ifstream ifs(args_->input);
   if (!ifs.is_open()) {
-    std::cerr << "Input file cannot be opened!" << std::endl;
-    exit(EXIT_FAILURE);
+    throw std::invalid_argument(
+        args_->input + " cannot be opened for training!");
   }
   dict_->readFromFile(ifs);
   ifs.close();