浏览代码

WebAssembly

Summary: This commit introduces WebAssembly module for fastText.

Reviewed By: EdouardGrave

Differential Revision: D19021740

fbshipit-source-id: e378f0bb70c0e1f4d6382e1e45af03d1e6ddb4f1
Onur Çelebi 5 年之前
父节点
当前提交
13096686fc

+ 4 - 1
.gitignore

@@ -2,8 +2,11 @@
 *.o
 *.bin
 *.vec
+*.bc
+.DS_Store
 data
 fasttext
 result
 website/node_modules/
-
+package-lock.json
+node_modules/

+ 60 - 1
Makefile

@@ -20,6 +20,12 @@ coverage: fasttext
 debug: CXXFLAGS += -g -O0 -fno-inline
 debug: fasttext
 
+wasm: webassembly/fasttext_wasm.js
+
+wasmdebug: export EMCC_DEBUG=1
+wasmdebug: webassembly/fasttext_wasm.js
+
+
 args.o: src/args.cc src/args.h
 	$(CXX) $(CXXFLAGS) -c src/args.cc
 
@@ -63,4 +69,57 @@ fasttext: $(OBJS) src/fasttext.cc
 	$(CXX) $(CXXFLAGS) $(OBJS) src/main.cc -o fasttext
 
 clean:
-	rm -rf *.o *.gcno *.gcda fasttext
+	rm -rf *.o *.gcno *.gcda fasttext *.bc webassembly/fasttext_wasm.js webassembly/fasttext_wasm.wasm
+
+
+EMCXX = em++
+EMCXXFLAGS = --bind --std=c++11 -s WASM=1 -s ALLOW_MEMORY_GROWTH=1 -s "EXTRA_EXPORTED_RUNTIME_METHODS=['addOnPostRun', 'FS']" -s "DISABLE_EXCEPTION_CATCHING=0" -s "EXCEPTION_DEBUG=1" -s "FORCE_FILESYSTEM=1" -s "MODULARIZE=1" -s "EXPORT_ES6=1" -s 'EXPORT_NAME="FastTextModule"' -Isrc/
+EMOBJS = args.bc autotune.bc matrix.bc dictionary.bc loss.bc productquantizer.bc densematrix.bc quantmatrix.bc vector.bc model.bc utils.bc meter.bc fasttext.bc main.bc
+
+
+main.bc: webassembly/fasttext_wasm.cc
+	$(EMCXX) $(EMCXXFLAGS)  webassembly/fasttext_wasm.cc -o main.bc
+
+args.bc: src/args.cc src/args.h
+	$(EMCXX) $(EMCXXFLAGS)  src/args.cc -o args.bc
+
+autotune.bc: src/autotune.cc src/autotune.h
+	$(EMCXX) $(EMCXXFLAGS)  src/autotune.cc -o autotune.bc
+
+matrix.bc: src/matrix.cc src/matrix.h
+	$(EMCXX) $(EMCXXFLAGS) src/matrix.cc -o matrix.bc
+
+dictionary.bc: src/dictionary.cc src/dictionary.h src/args.h
+	$(EMCXX) $(EMCXXFLAGS)  src/dictionary.cc -o dictionary.bc
+
+loss.bc: src/loss.cc src/loss.h src/matrix.h src/real.h
+	$(EMCXX) $(EMCXXFLAGS) src/loss.cc -o loss.bc
+
+productquantizer.bc: src/productquantizer.cc src/productquantizer.h src/utils.h
+	$(EMCXX) $(EMCXXFLAGS)  src/productquantizer.cc -o productquantizer.bc
+
+densematrix.bc: src/densematrix.cc src/densematrix.h src/utils.h src/matrix.h
+	$(EMCXX) $(EMCXXFLAGS) src/densematrix.cc -o densematrix.bc
+
+quantmatrix.bc: src/quantmatrix.cc src/quantmatrix.h src/utils.h src/matrix.h
+	$(EMCXX) $(EMCXXFLAGS) src/quantmatrix.cc -o quantmatrix.bc
+
+vector.bc: src/vector.cc src/vector.h src/utils.h
+	$(EMCXX) $(EMCXXFLAGS)  src/vector.cc -o vector.bc
+
+model.bc: src/model.cc src/model.h src/args.h
+	$(EMCXX) $(EMCXXFLAGS)  src/model.cc -o model.bc
+
+utils.bc: src/utils.cc src/utils.h
+	$(EMCXX) $(EMCXXFLAGS)  src/utils.cc -o utils.bc
+
+meter.bc: src/meter.cc src/meter.h
+	$(EMCXX) $(EMCXXFLAGS)  src/meter.cc -o meter.bc
+
+fasttext.bc: src/fasttext.cc src/*.h
+	$(EMCXX) $(EMCXXFLAGS)  src/fasttext.cc -o fasttext.bc
+
+webassembly/fasttext_wasm.js: $(EMOBJS) webassembly/fasttext_wasm.cc Makefile
+	$(EMCXX) $(EMCXXFLAGS) $(EMOBJS) -o webassembly/fasttext_wasm.js
+
+

+ 21 - 15
src/args.cc

@@ -262,7 +262,8 @@ void Args::printTrainingHelp() {
   std::cerr
       << "\nThe following arguments for training are optional:\n"
       << "  -lr                 learning rate [" << lr << "]\n"
-      << "  -lrUpdateRate       change the rate of updates for the learning rate ["
+      << "  -lrUpdateRate       change the rate of updates for the learning "
+         "rate ["
       << lrUpdateRate << "]\n"
       << "  -dim                size of word vectors [" << dim << "]\n"
       << "  -ws                 size of the context window [" << ws << "]\n"
@@ -270,9 +271,11 @@ void Args::printTrainingHelp() {
       << "  -neg                number of negatives sampled [" << neg << "]\n"
       << "  -loss               loss function {ns, hs, softmax, one-vs-all} ["
       << lossToString(loss) << "]\n"
-      << "  -thread             number of threads (set to 1 to ensure reproducible results) ["
+      << "  -thread             number of threads (set to 1 to ensure "
+         "reproducible results) ["
       << thread << "]\n"
-      << "  -pretrainedVectors  pretrained word vectors for supervised learning ["
+      << "  -pretrainedVectors  pretrained word vectors for supervised "
+         "learning ["
       << pretrainedVectors << "]\n"
       << "  -saveOutput         whether output params should be saved ["
       << boolToString(saveOutput) << "]\n"
@@ -280,17 +283,19 @@ void Args::printTrainingHelp() {
 }
 
 void Args::printAutotuneHelp() {
-  std::cerr
-      << "\nThe following arguments are for autotune:\n"
-      << "  -autotune-validation            validation file to be used for evaluation\n"
-      << "  -autotune-metric                metric objective {f1, f1:labelname} ["
-      << autotuneMetric << "]\n"
-      << "  -autotune-predictions           number of predictions used for evaluation  ["
-      << autotunePredictions << "]\n"
-      << "  -autotune-duration              maximum duration in seconds ["
-      << autotuneDuration << "]\n"
-      << "  -autotune-modelsize             constraint model file size ["
-      << autotuneModelSize << "] (empty = do not quantize)\n";
+  std::cerr << "\nThe following arguments are for autotune:\n"
+            << "  -autotune-validation            validation file to be used "
+               "for evaluation\n"
+            << "  -autotune-metric                metric objective {f1, "
+               "f1:labelname} ["
+            << autotuneMetric << "]\n"
+            << "  -autotune-predictions           number of predictions used "
+               "for evaluation  ["
+            << autotunePredictions << "]\n"
+            << "  -autotune-duration              maximum duration in seconds ["
+            << autotuneDuration << "]\n"
+            << "  -autotune-modelsize             constraint model file size ["
+            << autotuneModelSize << "] (empty = do not quantize)\n";
 }
 
 void Args::printQuantizationHelp() {
@@ -298,7 +303,8 @@ void Args::printQuantizationHelp() {
       << "\nThe following arguments for quantization are optional:\n"
       << "  -cutoff             number of words and ngrams to retain ["
       << cutoff << "]\n"
-      << "  -retrain            whether embeddings are finetuned if a cutoff is applied ["
+      << "  -retrain            whether embeddings are finetuned if a cutoff "
+         "is applied ["
       << boolToString(retrain) << "]\n"
       << "  -qnorm              whether the norm is quantized separately ["
       << boolToString(qnorm) << "]\n"

+ 8 - 6
src/autotune.cc

@@ -416,10 +416,10 @@ void Autotune::train(const Args& autotuneArgs) {
         if (!sizeConstraintWarning && trials_ > 10 &&
             sizeConstraintFailed_ > (trials_ / 2)) {
           sizeConstraintWarning = true;
-          std::cerr
-              << std::endl
-              << "Warning : requested model size is probably too small. You may want to increase `autotune-modelsize`."
-              << std::endl;
+          std::cerr << std::endl
+                    << "Warning : requested model size is probably too small. "
+                       "You may want to increase `autotune-modelsize`."
+                    << std::endl;
         }
       }
     } catch (DenseMatrix::EncounteredNaNError&) {
@@ -442,10 +442,12 @@ void Autotune::train(const Args& autotuneArgs) {
     std::string errorMessage;
     if (sizeConstraintWarning) {
       errorMessage =
-          "Couldn't fulfil model size constraint: please increase `autotune-modelsize`.";
+          "Couldn't fulfil model size constraint: please increase "
+          "`autotune-modelsize`.";
     } else {
       errorMessage =
-          "Didn't have enough time to train once: please increase `autotune-duration`.";
+          "Didn't have enough time to train once: please increase "
+          "`autotune-duration`.";
     }
     throw std::runtime_error(errorMessage);
   } else {

+ 11 - 6
src/densematrix.cc

@@ -43,12 +43,17 @@ void DenseMatrix::uniformThread(real a, int block, int32_t seed) {
 }
 
 void DenseMatrix::uniform(real a, unsigned int thread, int32_t seed) {
-  std::vector<std::thread> threads;
-  for (int i = 0; i < thread; i++) {
-    threads.push_back(std::thread([=]() { uniformThread(a, i, seed); }));
-  }
-  for (int32_t i = 0; i < threads.size(); i++) {
-    threads[i].join();
+  if (thread > 1) {
+    std::vector<std::thread> threads;
+    for (int i = 0; i < thread; i++) {
+      threads.push_back(std::thread([=]() { uniformThread(a, i, seed); }));
+    }
+    for (int32_t i = 0; i < threads.size(); i++) {
+      threads[i].join();
+    }
+  } else {
+    // webassembly can't instantiate `std::thread`
+    uniformThread(a, 0, seed);
   }
 }
 

+ 35 - 15
src/fasttext.cc

@@ -263,7 +263,7 @@ void FastText::loadModel(std::istream& in) {
   buildModel();
 }
 
-void FastText::printInfo(real progress, real loss, std::ostream& log_stream) {
+std::tuple<int64_t, double, double> FastText::progressInfo(real progress) {
   double t = utils::getDuration(start_, std::chrono::steady_clock::now());
   double lr = args_->lr * (1.0 - progress);
   double wst = 0;
@@ -271,14 +271,22 @@ void FastText::printInfo(real progress, real loss, std::ostream& log_stream) {
   int64_t eta = 2592000; // Default to one month in seconds (720 * 3600)
 
   if (progress > 0 && t >= 0) {
-    progress = progress * 100;
-    eta = t * (100 - progress) / progress;
+    eta = t * (1 - progress) / progress;
     wst = double(tokenCount_) / t / args_->thread;
   }
 
+  return std::tuple<double, double, int64_t>(wst, lr, eta);
+}
+
+void FastText::printInfo(real progress, real loss, std::ostream& log_stream) {
+  double wst;
+  double lr;
+  int64_t eta;
+  std::tie<double, double, int64_t>(wst, lr, eta) = progressInfo(progress);
+
   log_stream << std::fixed;
   log_stream << "Progress: ";
-  log_stream << std::setprecision(1) << std::setw(5) << progress << "%";
+  log_stream << std::setprecision(1) << std::setw(5) << (progress * 100) << "%";
   log_stream << " words/sec/thread: " << std::setw(7) << int64_t(wst);
   log_stream << " lr: " << std::setw(9) << std::setprecision(6) << lr;
   log_stream << " avg.loss: " << std::setw(9) << std::setprecision(6) << loss;
@@ -304,7 +312,7 @@ std::vector<int32_t> FastText::selectEmbeddings(int32_t cutoff) const {
   return idx;
 }
 
-void FastText::quantize(const Args& qargs) {
+void FastText::quantize(const Args& qargs, const TrainCallback& callback) {
   if (args_->model != model_name::sup) {
     throw std::invalid_argument(
         "For now we only support quantization of supervised models");
@@ -336,10 +344,9 @@ void FastText::quantize(const Args& qargs) {
       args_->verbose = qargs.verbose;
       auto loss = createLoss(output_);
       model_ = std::make_shared<Model>(input, output, loss, normalizeGradient);
-      startThreads();
+      startThreads(callback);
     }
   }
-
   input_ = std::make_shared<QuantMatrix>(
       std::move(*(input.get())), qargs.dsub, qargs.qnorm);
 
@@ -347,7 +354,6 @@ void FastText::quantize(const Args& qargs) {
     output_ = std::make_shared<QuantMatrix>(
         std::move(*(output.get())), 2, qargs.qnorm);
   }
-
   quant_ = true;
   auto loss = createLoss(output_);
   model_ = std::make_shared<Model>(input_, output_, loss, normalizeGradient);
@@ -615,7 +621,7 @@ bool FastText::keepTraining(const int64_t ntokens) const {
   return tokenCount_ < args_->epoch * ntokens && !trainException_;
 }
 
-void FastText::trainThread(int32_t threadId) {
+void FastText::trainThread(int32_t threadId, const TrainCallback& callback) {
   std::ifstream ifs(args_->input);
   utils::seek(ifs, threadId * utils::size(ifs) / args_->thread);
 
@@ -624,9 +630,18 @@ void FastText::trainThread(int32_t threadId) {
   const int64_t ntokens = dict_->ntokens();
   int64_t localTokenCount = 0;
   std::vector<int32_t> line, labels;
+  uint64_t callbackCounter = 0;
   try {
     while (keepTraining(ntokens)) {
       real progress = real(tokenCount_) / (args_->epoch * ntokens);
+      if (callback && ((callbackCounter++ % 64) == 0)) {
+        double wst;
+        double lr;
+        int64_t eta;
+        std::tie<double, double, int64_t>(wst, lr, eta) =
+            progressInfo(progress);
+        callback(progress, loss_, wst, lr, eta);
+      }
       real lr = args_->lr * (1.0 - progress);
       if (args_->model == model_name::sup) {
         localTokenCount += dict_->getLine(ifs, line, labels);
@@ -717,7 +732,7 @@ std::shared_ptr<Matrix> FastText::createTrainOutputMatrix() const {
   return output;
 }
 
-void FastText::train(const Args& args) {
+void FastText::train(const Args& args, const TrainCallback& callback) {
   args_ = std::make_shared<Args>(args);
   dict_ = std::make_shared<Dictionary>(args_);
   if (args_->input == "-") {
@@ -742,7 +757,7 @@ void FastText::train(const Args& args) {
   auto loss = createLoss(output_);
   bool normalizeGradient = (args_->model == model_name::sup);
   model_ = std::make_shared<Model>(input_, output_, loss, normalizeGradient);
-  startThreads();
+  startThreads(callback);
 }
 
 void FastText::abort() {
@@ -753,14 +768,19 @@ void FastText::abort() {
   }
 }
 
-void FastText::startThreads() {
+void FastText::startThreads(const TrainCallback& callback) {
   start_ = std::chrono::steady_clock::now();
   tokenCount_ = 0;
   loss_ = -1;
   trainException_ = nullptr;
   std::vector<std::thread> threads;
-  for (int32_t i = 0; i < args_->thread; i++) {
-    threads.push_back(std::thread([=]() { trainThread(i); }));
+  if (args_->thread > 1) {
+    for (int32_t i = 0; i < args_->thread; i++) {
+      threads.push_back(std::thread([=]() { trainThread(i, callback); }));
+    }
+  } else {
+    // webassembly can't instantiate `std::thread`
+    trainThread(0, callback);
   }
   const int64_t ntokens = dict_->ntokens();
   // Same condition as trainThread
@@ -772,7 +792,7 @@ void FastText::startThreads() {
       printInfo(progress, loss_, std::cerr);
     }
   }
-  for (int32_t i = 0; i < args_->thread; i++) {
+  for (int32_t i = 0; i < threads.size(); i++) {
     threads[i].join();
   }
   if (trainException_) {

+ 10 - 4
src/fasttext.h

@@ -12,6 +12,7 @@
 
 #include <atomic>
 #include <chrono>
+#include <functional>
 #include <iostream>
 #include <memory>
 #include <queue>
@@ -31,6 +32,10 @@
 namespace fasttext {
 
 class FastText {
+ public:
+  using TrainCallback =
+      std::function<void(float, float, double, double, int64_t)>;
+
  protected:
   std::shared_ptr<Args> args_;
   std::shared_ptr<Dictionary> dict_;
@@ -47,9 +52,9 @@ class FastText {
 
   void signModel(std::ostream&);
   bool checkModel(std::istream&);
-  void startThreads();
+  void startThreads(const TrainCallback& callback = {});
   void addInputVector(Vector&, int32_t) const;
-  void trainThread(int32_t);
+  void trainThread(int32_t, const TrainCallback& callback);
   std::vector<std::pair<real, std::string>> getNN(
       const DenseMatrix& wordVectors,
       const Vector& queryVec,
@@ -73,6 +78,7 @@ class FastText {
   void precomputeWordVectors(DenseMatrix& wordVectors);
   bool keepTraining(const int64_t ntokens) const;
   void buildModel();
+  std::tuple<int64_t, double, double> progressInfo(real progress);
 
  public:
   FastText();
@@ -114,7 +120,7 @@ class FastText {
 
   void getSentenceVector(std::istream& in, Vector& vec);
 
-  void quantize(const Args& qargs);
+  void quantize(const Args& qargs, const TrainCallback& callback = {});
 
   std::tuple<int64_t, double, double>
   test(std::istream& in, int32_t k, real threshold = 0.0);
@@ -146,7 +152,7 @@ class FastText {
       const std::string& wordB,
       const std::string& wordC);
 
-  void train(const Args& args);
+  void train(const Args& args, const TrainCallback& callback = {});
 
   void abort();
 

+ 12 - 6
src/main.cc

@@ -21,19 +21,25 @@ void printUsage() {
       << "usage: fasttext <command> <args>\n\n"
       << "The commands supported by fasttext are:\n\n"
       << "  supervised              train a supervised classifier\n"
-      << "  quantize                quantize a model to reduce the memory usage\n"
+      << "  quantize                quantize a model to reduce the memory "
+         "usage\n"
       << "  test                    evaluate a supervised classifier\n"
-      << "  test-label              print labels with precision and recall scores\n"
+      << "  test-label              print labels with precision and recall "
+         "scores\n"
       << "  predict                 predict most likely labels\n"
-      << "  predict-prob            predict most likely labels with probabilities\n"
+      << "  predict-prob            predict most likely labels with "
+         "probabilities\n"
       << "  skipgram                train a skipgram model\n"
       << "  cbow                    train a cbow model\n"
       << "  print-word-vectors      print word vectors given a trained model\n"
-      << "  print-sentence-vectors  print sentence vectors given a trained model\n"
-      << "  print-ngrams            print ngrams given a trained model and word\n"
+      << "  print-sentence-vectors  print sentence vectors given a trained "
+         "model\n"
+      << "  print-ngrams            print ngrams given a trained model and "
+         "word\n"
       << "  nn                      query for nearest neighbors\n"
       << "  analogies               query for analogies\n"
-      << "  dump                    dump arguments,dictionary,input/output vectors\n"
+      << "  dump                    dump arguments,dictionary,input/output "
+         "vectors\n"
       << std::endl;
 }
 

+ 0 - 1
src/real.h

@@ -11,5 +11,4 @@
 namespace fasttext {
 
 typedef float real;
-
 }

+ 37 - 0
webassembly/README.md

@@ -0,0 +1,37 @@
+ fastText [![CircleCI](https://circleci.com/gh/facebookresearch/fastText/tree/master.svg?style=svg)](https://circleci.com/gh/facebookresearch/fastText/tree/master)
+
+[fastText](https://fasttext.cc/) is a library for efficient learning of word representations and sentence classification.
+
+In this document we present how to use fastText in a browser with WebAssembly.
+
+
+# Requirements
+
+[fastText](https://fasttext.cc/) builds on modern Mac OS and Linux distributions.
+Since it uses C\++11 features, it requires a compiler with good C++11 support.
+You will need [emscripten](https://emscripten.org/) and a [browser that supports WebAssembly](https://caniuse.com/#feat=wasm).
+
+
+# Building WebAssembly binaries
+
+First, download and install emscripten sdk as [described here](https://emscripten.org/docs/getting_started/downloads.html#installation-instructions)
+
+
+Make sure you activated the PATH for emscripten:
+```bash
+$ source /path/to/emsdk/emsdk_env.sh
+```
+
+Clone our [repository](https://github.com/facebookresearch/fastText/).
+
+```bash
+$ git clone [email protected]:facebookresearch/fastText.git
+```
+
+Build WebAssembly binaries:
+```bash
+$ cd fastText
+$ make wasm
+```
+
+

+ 62 - 0
webassembly/doc/examples/misc.html

@@ -0,0 +1,62 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1.0, maximum-scale=1.0, user-scalable=no">
+</head>
+<body>
+    <script type="module">
+        const printVector = function(predictions, limit) {
+            limit = limit || Infinity;
+
+            for (let i=0; i<predictions.size() && i<limit; i++){
+                let prediction = predictions.get(i);
+                console.log(predictions.get(i));
+            }
+        }
+
+        import {FastText, addOnPostRun} from "./fasttext.js";
+
+        addOnPostRun(() => {
+            let ft = new FastText();
+
+            const url = "lid.176.ftz";
+            ft.loadModel(url).then(model => {
+                /* isQuant */
+                console.log(model.isQuant());
+
+                /* getDimension */
+                console.log(model.getDimension());
+
+                /* getWordVector */
+                let v = model.getWordVector("Hello");
+                console.log(v);
+
+                /* getSentenceVector */
+                let v1 = model.getSentenceVector("Hello");
+                console.log(v1);
+                let v2 = model.getSentenceVector("Hello this is a sentence");
+                console.log(v2);
+
+                /* getNearestNeighbors */
+                printVector(model.getNearestNeighbors("Hello", 10));
+
+                /* getAnalogies */
+                printVector(model.getAnalogies("paris", "france", "london", 10));
+
+                /* getWordId */
+                console.log(model.getWordId("Hello"));
+
+                /* getSubwords */
+                let subWordInformation = model.getSubwords("désinstitutionnalisation");
+                printVector(subWordInformation[0]);
+
+                /* getInputVector */
+                console.log(model.getInputVector(832));
+            });
+        });
+
+    </script>
+</body>
+
+</html>

+ 42 - 0
webassembly/doc/examples/predict.html

@@ -0,0 +1,42 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1.0, maximum-scale=1.0, user-scalable=no">
+</head>
+<body>
+    <script type="module">
+        const printVector = function(predictions, limit) {
+            limit = limit || Infinity;
+
+            for (let i=0; i<predictions.size() && i<limit; i++){
+                let prediction = predictions.get(i);
+                console.log(predictions.get(i));
+            }
+        }
+
+        import {FastText, addOnPostRun} from "./fasttext.js";
+
+        addOnPostRun(() => {
+            let ft = new FastText();
+
+            const url = "lid.176.ftz";
+            ft.loadModel(url).then(model => {
+                let text = "Bonjour à tous. Ceci est du français";
+                console.log(text);
+                printVector(model.predict(text, 5, 0.0));
+
+                text = "Hello, world. This is english";
+                console.log(text);
+                printVector(model.predict(text, 5, 0.0));
+
+                text = "Merhaba dünya. Bu da türkçe"
+                console.log(text);
+                printVector(model.predict(text, 5, 0.0));
+            });
+        });
+
+    </script>
+</body>
+
+</html>

+ 66 - 0
webassembly/doc/examples/train_supervised.html

@@ -0,0 +1,66 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1.0, maximum-scale=1.0, user-scalable=no">
+</head>
+<body>
+    <script type="module">
+        const printVector = function(predictions, limit) {
+            limit = limit || Infinity;
+
+            for (let i=0; i<predictions.size() && i<limit; i++){
+                let prediction = predictions.get(i);
+                console.log(predictions.get(i));
+            }
+        }
+
+        const trainCallback = (progress, loss, wst, lr, eta) => {
+            console.log([progress, loss, wst, lr, eta]);
+        };
+
+        import {FastText, addOnPostRun} from "./fasttext.js";
+
+        addOnPostRun(() => {
+            let ft = new FastText();
+
+            ft.trainSupervised("cooking.train", {
+                'lr':1.0,
+                'epoch':10,
+                'loss':'hs',
+                'wordNgrams':2,
+                'dim':50,
+                'bucket':200000
+            }, trainCallback).then(model => {
+                console.log('Trained.');
+
+                printVector(model.predict("Which baking dish is best to bake a banana bread ?", 5, 0.0));
+
+                /* getInputMatrix */
+                let inputMatrix = model.getInputMatrix();
+                console.log(inputMatrix.cols());
+                console.log(inputMatrix.rows());
+                console.log(inputMatrix.at(1, 2));
+
+                /* getOutputMatrix */
+                let outputMatrix = model.getOutputMatrix();
+                console.log(outputMatrix.cols());
+                console.log(outputMatrix.rows());
+                console.log(outputMatrix.at(1, 2));
+
+                /* getWords */
+                let wordsInformation = model.getWords();
+                printVector(wordsInformation[0], 30);   // words
+                printVector(wordsInformation[1], 30);   // frequencies
+
+                /* getLabels */
+                let labelsInformation = model.getLabels();
+                printVector(labelsInformation[0], 30);  // labels
+                printVector(labelsInformation[1], 30);  // frequencies
+            });
+        });
+
+    </script>
+</body>
+
+</html>

+ 44 - 0
webassembly/doc/examples/train_unsupervised.html

@@ -0,0 +1,44 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1.0, maximum-scale=1.0, user-scalable=no">
+</head>
+<body>
+    <script type="module">
+        const printVector = function(predictions, limit) {
+            limit = limit || Infinity;
+
+            for (let i=0; i<predictions.size() && i<limit; i++){
+                let prediction = predictions.get(i);
+                console.log(predictions.get(i));
+            }
+        }
+
+        const trainCallback = (progress, loss, wst, lr, eta) => {
+            console.log([progress, loss, wst, lr, eta]);
+        };
+
+        import {FastText, addOnPostRun} from "./fasttext.js";
+
+        addOnPostRun(() => {
+            let ft = new FastText();
+
+            ft.trainUnsupervised("fil9", 'skipgram', {
+                'lr':0.1,
+                'epoch':1,
+                'loss':'ns',
+                'wordNgrams':2,
+                'dim':50,
+                'bucket':200000
+            }, trainCallback).then(model => {
+                let wordsInformation = model.getWords();
+                printVector(wordsInformation[0], 30);   // words
+                printVector(wordsInformation[1], 30);   // frequencies
+            });
+        });
+
+    </script>
+</body>
+
+</html>

+ 520 - 0
webassembly/fasttext.js

@@ -0,0 +1,520 @@
+/**
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+import fastTextModularized from './fasttext_wasm.js';
+const fastTextModule = fastTextModularized();
+
+let postRunFunc = null;
+const addOnPostRun = function(func) {
+  postRunFunc = func;
+};
+
+fastTextModule.addOnPostRun(() => {
+  if (postRunFunc) {
+    postRunFunc();
+  }
+});
+
+const thisModule = this;
+const trainFileInWasmFs = 'train.txt';
+const testFileInWasmFs = 'test.txt';
+const modelFileInWasmFs = 'model.bin';
+
+const getFloat32ArrayFromHeap = (len) => {
+  const dataBytes = len * Float32Array.BYTES_PER_ELEMENT;
+  const dataPtr = fastTextModule._malloc(dataBytes);
+  const dataHeap = new Uint8Array(fastTextModule.HEAPU8.buffer,
+    dataPtr,
+    dataBytes);
+  return {
+    'ptr':dataHeap.byteOffset,
+    'size':len,
+    'buffer':dataHeap.buffer
+  };
+};
+
+const heapToFloat32 = (r) => new Float32Array(r.buffer, r.ptr, r.size);
+
+class FastText {
+  constructor() {
+    this.f = new fastTextModule.FastText();
+  }
+
+  /**
+   * loadModel
+   *
+   * Loads the model file from the specified url, and returns the
+   * corresponding `FastTextModel` object.
+   *
+   * @param {string}     url
+   *     the url of the model file.
+   *
+   * @return {Promise}   promise object that resolves to a `FastTextModel`
+   *
+   */
+  loadModel(url) {
+    const fetchFunc = (thisModule && thisModule.fetch) || fetch;
+
+    const fastTextNative = this.f;
+    return new Promise(function(resolve, reject) {
+      fetchFunc(url).then(response => {
+        return response.arrayBuffer();
+      }).then(bytes => {
+        const byteArray = new Uint8Array(bytes);
+        const FS = fastTextModule.FS;
+        FS.writeFile(modelFileInWasmFs, byteArray);
+      }).then(() =>  {
+        fastTextNative.loadModel(modelFileInWasmFs);
+        resolve(new FastTextModel(fastTextNative));
+      }).catch(error => {
+        reject(error);
+      });
+    });
+  }
+
+  _train(url, modelName, kwargs = {}, callback = null) {
+    const fetchFunc = (thisModule && thisModule.fetch) || fetch;
+    const fastTextNative = this.f;
+
+    return new Promise(function(resolve, reject) {
+      fetchFunc(url).then(response => {
+        return response.arrayBuffer();
+      }).then(bytes => {
+        const byteArray = new Uint8Array(bytes);
+        const FS = fastTextModule.FS;
+        FS.writeFile(trainFileInWasmFs, byteArray);
+      }).then(() =>  {
+        const argsList = ['lr', 'lrUpdateRate', 'dim', 'ws', 'epoch',
+          'minCount', 'minCountLabel', 'neg', 'wordNgrams', 'loss',
+          'model', 'bucket', 'minn', 'maxn', 't', 'label', 'verbose',
+          'pretrainedVectors', 'saveOutput', 'seed', 'qout', 'retrain',
+          'qnorm', 'cutoff', 'dsub', 'qnorm', 'autotuneValidationFile',
+          'autotuneMetric', 'autotunePredictions', 'autotuneDuration',
+          'autotuneModelSize'];
+        const args = new fastTextModule.Args();
+        argsList.forEach(k => {
+          if (k in kwargs) {
+            args[k] = kwargs[k];
+          }
+        });
+        args.model = fastTextModule.ModelName[modelName];
+        args.loss = ('loss' in kwargs) ?
+          fastTextModule.LossName[kwargs['loss']] : 'hs';
+        args.thread = 1;
+        args.input = trainFileInWasmFs;
+
+        fastTextNative.train(args, callback);
+
+        resolve(new FastTextModel(fastTextNative));
+      }).catch(error => {
+        reject(error);
+      });
+    });
+  }
+
+  /**
+   * trainSupervised
+   *
+   * Downloads the input file from the specified url, trains a supervised
+   * model and returns a `FastTextModel` object.
+   *
+   * @param {string}     url
+   *     the url of the input file.
+   *     The input file must must contain at least one label per line. For an
+   *     example consult the example datasets which are part of the fastText
+   *     repository such as the dataset pulled by classification-example.sh.
+   *
+   * @param {dict}       kwargs
+   *     train parameters.
+   *     For example {'lr': 0.5, 'epoch': 5}
+   *
+   * @param {function}   callback
+   *     train callback function
+   *     `callback` function is called regularly from the train loop:
+   *     `callback(progress, loss, wordsPerSec, learningRate, eta)`
+   *
+   * @return {Promise}   promise object that resolves to a `FastTextModel`
+   *
+   */
+  trainSupervised(url, kwargs = {}, callback) {
+    const self = this;
+    return new Promise(function(resolve, reject) {
+      self._train(url, 'supervised', kwargs, callback).then(model => {
+        resolve(model);
+      }).catch(error => {
+        reject(error);
+      });
+    });
+  }
+
+  /**
+   * trainUnsupervised
+   *
+   * Downloads the input file from the specified url, trains an unsupervised
+   * model and returns a `FastTextModel` object.
+   *
+   * @param {string}     url
+   *     the url of the input file.
+   *     The input file must not contain any labels or use the specified label
+   *     prefixunless it is ok for those words to be ignored. For an example
+   *     consult the dataset pulled by the example script word-vector-example.sh
+   *     which is part of the fastText repository.
+   *
+   * @param {string}     modelName
+   *     Model to be used for unsupervised learning. `cbow` or `skipgram`.
+   *
+   * @param {dict}       kwargs
+   *     train parameters.
+   *     For example {'lr': 0.5, 'epoch': 5}
+   *
+   * @param {function}   callback
+   *     train callback function
+   *     `callback` function is called regularly from the train loop:
+   *     `callback(progress, loss, wordsPerSec, learningRate, eta)`
+   *
+   * @return {Promise}   promise object that resolves to a `FastTextModel`
+   *
+   */
+  trainUnsupervised(url, modelName, kwargs = {}, callback) {
+    const self = this;
+    return new Promise(function(resolve, reject) {
+      self._train(url, modelName, kwargs, callback).then(model => {
+        resolve(model);
+      }).catch(error => {
+        reject(error);
+      });
+    });
+  }
+
+}
+
+
+class FastTextModel {
+  /**
+     * `FastTextModel` represents a trained model.
+     *
+     * @constructor
+     *
+     * @param {object}       fastTextNative
+     *     webassembly object that makes the bridge between js and C++
+     */
+  constructor(fastTextNative) {
+    this.f = fastTextNative;
+  }
+
+  /**
+     * isQuant
+     *
+     * @return {bool}   true if the model is quantized
+     *
+     */
+  isQuant() {
+    return this.f.isQuant;
+  }
+
+  /**
+     * getDimension
+     *
+     * @return {int}    the dimension (size) of a lookup vector (hidden layer)
+     *
+     */
+  getDimension() {
+    return this.f.args.dim;
+  }
+
+  /**
+     * getWordVector
+     *
+     * @param {string}          word
+     *
+     * @return {Float32Array}   the vector representation of `word`.
+     *
+     */
+  getWordVector(word) {
+    const b = getFloat32ArrayFromHeap(this.getDimension());
+    this.f.getWordVector(b, word);
+
+    return heapToFloat32(b);
+  }
+
+  /**
+     * getSentenceVector
+     *
+     * @param {string}          text
+     *
+     * @return {Float32Array}   the vector representation of `text`.
+     *
+     */
+  getSentenceVector(text) {
+    if (text.indexOf('\n') != -1) {
+      "sentence vector processes one line at a time (remove '\\n')";
+    }
+    text += '\n';
+    const b = getFloat32ArrayFromHeap(this.getDimension());
+    this.f.getSentenceVector(b, text);
+
+    return heapToFloat32(b);
+  }
+
+  /**
+     * getNearestNeighbors
+     *
+     * returns the nearest `k` neighbors of `word`.
+     *
+     * @param {string}          word
+     * @param {int}             k
+     *
+     * @return {Array.<Pair.<number, string>>}
+     *     words and their corresponding cosine similarities.
+     *
+     */
+  getNearestNeighbors(word, k = 10) {
+    return this.f.getNN(word, k);
+  }
+
+  /**
+     * getAnalogies
+     *
+     * returns the nearest `k` neighbors of the operation
+     * `wordA - wordB + wordC`.
+     *
+     * @param {string}          wordA
+     * @param {string}          wordB
+     * @param {string}          wordC
+     * @param {int}             k
+     *
+     * @return {Array.<Pair.<number, string>>}
+     *     words and their corresponding cosine similarities
+     *
+     */
+  getAnalogies(wordA, wordB, wordC, k) {
+    return this.f.getAnalogies(k, wordA, wordB, wordC);
+  }
+
+  /**
+     * getWordId
+     *
+     * Given a word, get the word id within the dictionary.
+     * Returns -1 if word is not in the dictionary.
+     *
+     * @return {int}    word id
+     *
+     */
+  getWordId(word) {
+    return this.f.getWordId(word);
+  }
+
+  /**
+     * getSubwordId
+     *
+     * Given a subword, return the index (within input matrix) it hashes to.
+     *
+     * @return {int}    subword id
+     *
+     */
+  getSubwordId(subword) {
+    return this.f.getSubwordId(subword);
+  }
+
+  /**
+     * getSubwords
+     *
+     * returns the subwords and their indicies.
+     *
+     * @param {string}          word
+     *
+     * @return {Pair.<Array.<string>, Array.<int>>}
+     *     words and their corresponding indicies
+     *
+     */
+  getSubwords(word) {
+    return this.f.getSubwords(word);
+  }
+
+  /**
+     * getInputVector
+     *
+     * Given an index, get the corresponding vector of the Input Matrix.
+     *
+     * @param {int}             ind
+     *
+     * @return {Float32Array}   the vector of the `ind`'th index
+     *
+     */
+  getInputVector(ind) {
+    const b = getFloat32ArrayFromHeap(this.getDimension());
+    this.f.getInputVector(b, ind);
+
+    return heapToFloat32(b);
+  }
+
+  /**
+     * predict
+     *
+     * Given a string, get a list of labels and a list of corresponding
+     * probabilities. k controls the number of returned labels.
+     *
+     * @param {string}          text
+     * @param {int}             k, the number of predictions to be returned
+     * @param {number}          probability threshold
+     *
+     * @return {Array.<Pair.<number, string>>}
+     *     labels and their probabilities
+     *
+     */
+  predict(text, k = 1, threshold = 0.0) {
+    return this.f.predict(text, k, threshold);
+  }
+
+  /**
+     * getInputMatrix
+     *
+     * Get a reference to the full input matrix of a Model. This only
+     * works if the model is not quantized.
+     *
+     * @return {DenseMatrix}
+     *     densematrix with functions: `rows`, `cols`, `at(i,j)`
+     *
+     * example:
+     *     let inputMatrix = model.getInputMatrix();
+     *     let value = inputMatrix.at(1, 2);
+     */
+  getInputMatrix() {
+    if (this.isQuant()) {
+      throw new Error("Can't get quantized Matrix");
+    }
+    return this.f.getInputMatrix();
+  }
+
+  /**
+     * getOutputMatrix
+     *
+     * Get a reference to the full input matrix of a Model. This only
+     * works if the model is not quantized.
+     *
+     * @return {DenseMatrix}
+     *     densematrix with functions: `rows`, `cols`, `at(i,j)`
+     *
+     * example:
+     *     let outputMatrix = model.getOutputMatrix();
+     *     let value = outputMatrix.at(1, 2);
+     */
+  getOutputMatrix() {
+    if (this.isQuant()) {
+      throw new Error("Can't get quantized Matrix");
+    }
+    return this.f.getOutputMatrix();
+  }
+
+  /**
+     * getWords
+     *
+     * Get the entire list of words of the dictionary including the frequency
+     * of the individual words. This does not include any subwords. For that
+     * please consult the function get_subwords.
+     *
+     * @return {Pair.<Array.<string>, Array.<int>>}
+     *     words and their corresponding frequencies
+     *
+     */
+  getWords() {
+    return this.f.getWords();
+  }
+
+  /**
+     * getLabels
+     *
+     * Get the entire list of labels of the dictionary including the frequency
+     * of the individual labels.
+     *
+     * @return {Pair.<Array.<string>, Array.<int>>}
+     *     labels and their corresponding frequencies
+     *
+     */
+  getLabels() {
+    return this.f.getLabels();
+  }
+
+  /**
+     * getLine
+     *
+     * Split a line of text into words and labels. Labels must start with
+     * the prefix used to create the model (__label__ by default).
+     *
+     * @param {string}          text
+     *
+     * @return {Pair.<Array.<string>, Array.<string>>}
+     *     words and labels
+     *
+     */
+  getLine(text) {
+    return this.f.getLine(text);
+  }
+
+  /**
+     * saveModel
+     *
+     * Saves the model file in web assembly in-memory FS and returns a blob
+     *
+     * @return {Blob}           blob data of the file saved in web assembly FS
+     *
+     */
+  saveModel() {
+    this.f.saveModel(modelFileInWasmFs);
+    const content = fastTextModule.FS.readFile(modelFileInWasmFs,
+      { encoding: 'binary' });
+    return new Blob(
+      [new Uint8Array(content, content.byteOffset, content.length)],
+      { type: ' application/octet-stream' }
+    );
+  }
+
+  /**
+     * test
+     *
+     * Downloads the test file from the specified url, evaluates the supervised
+     * model with it.
+     *
+     * @param {string}          url
+     * @param {int}             k, the number of predictions to be returned
+     * @param {number}          probability threshold
+     *
+     * @return {Promise}   promise object that resolves to a `Meter` object
+     *
+     * example:
+     * model.test("/absolute/url/to/test.txt", 1, 0.0).then((meter) => {
+     *     console.log(meter.precision);
+     *     console.log(meter.recall);
+     *     console.log(meter.f1Score);
+     *     console.log(meter.nexamples());
+     * });
+     *
+     */
+  test(url, k, threshold) {
+    const fetchFunc = (thisModule && thisModule.fetch) || fetch;
+    const fastTextNative = this.f;
+
+    return new Promise(function(resolve, reject) {
+      fetchFunc(url).then(response => {
+        return response.arrayBuffer();
+      }).then(bytes => {
+        const byteArray = new Uint8Array(bytes);
+        const FS = fastTextModule.FS;
+        FS.writeFile(testFileInWasmFs, byteArray);
+      }).then(() =>  {
+        const meter = fastTextNative.test(testFileInWasmFs, k, threshold);
+        resolve(meter);
+      }).catch(error => {
+        reject(error);
+      });
+    });
+  }
+}
+
+
+export {FastText, addOnPostRun};

+ 328 - 0
webassembly/fasttext_wasm.cc

@@ -0,0 +1,328 @@
+/**
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <emscripten.h>
+#include <emscripten/bind.h>
+#include <fasttext.h>
+#include <functional>
+#include <sstream>
+#include <string>
+#include <vector>
+
+using namespace emscripten;
+using namespace fasttext;
+
+struct Float32ArrayBridge {
+  uintptr_t ptr;
+  int size;
+};
+
+void fillFloat32ArrayFromVector(
+    const Float32ArrayBridge& vecFloat,
+    const Vector& v) {
+  float* buffer = reinterpret_cast<float*>(vecFloat.ptr);
+  assert(vecFloat.size == v.size());
+  for (int i = 0; i < v.size(); i++) {
+    buffer[i] = v[i];
+  }
+}
+
+std::vector<std::pair<float, std::string>>
+predict(FastText* fasttext, std::string text, int k, double threshold) {
+  std::stringstream ioss(text + std::string("\n"));
+
+  std::vector<std::pair<float, std::string>> predictions;
+  fasttext->predictLine(ioss, predictions, k, threshold);
+
+  return predictions;
+}
+
+void getWordVector(
+    FastText* fasttext,
+    const Float32ArrayBridge& vecFloat,
+    std::string word) {
+  assert(fasttext);
+  Vector v(fasttext->getDimension());
+  fasttext->getWordVector(v, word);
+
+  fillFloat32ArrayFromVector(vecFloat, v);
+}
+
+void getSentenceVector(
+    FastText* fasttext,
+    const Float32ArrayBridge& vecFloat,
+    std::string text) {
+  assert(fasttext);
+  Vector v(fasttext->getDimension());
+  std::stringstream ioss(text);
+  fasttext->getSentenceVector(ioss, v);
+
+  fillFloat32ArrayFromVector(vecFloat, v);
+}
+
+std::pair<std::vector<std::string>, std::vector<int32_t>> getSubwords(
+    FastText* fasttext,
+    std::string word) {
+  assert(fasttext);
+  std::vector<std::string> subwords;
+  std::vector<int32_t> ngrams;
+  std::shared_ptr<const Dictionary> d = fasttext->getDictionary();
+  d->getSubwords(word, ngrams, subwords);
+
+  return std::pair<std::vector<std::string>, std::vector<int32_t>>(
+      subwords, ngrams);
+}
+
+void getInputVector(
+    FastText* fasttext,
+    const Float32ArrayBridge& vecFloat,
+    int32_t ind) {
+  assert(fasttext);
+  Vector v(fasttext->getDimension());
+  fasttext->getInputVector(v, ind);
+
+  fillFloat32ArrayFromVector(vecFloat, v);
+}
+
+void train(FastText* fasttext, Args* args, emscripten::val jsCallback) {
+  assert(args);
+  assert(fasttext);
+  fasttext->train(
+      *args,
+      [=](float progress, float loss, double wst, double lr, int64_t eta) {
+        jsCallback(progress, loss, wst, lr, static_cast<int32_t>(eta));
+      });
+}
+
+const DenseMatrix* getInputMatrix(FastText* fasttext) {
+  assert(fasttext);
+  std::shared_ptr<const DenseMatrix> mm = fasttext->getInputMatrix();
+  return mm.get();
+}
+
+const DenseMatrix* getOutputMatrix(FastText* fasttext) {
+  assert(fasttext);
+  std::shared_ptr<const DenseMatrix> mm = fasttext->getOutputMatrix();
+  return mm.get();
+}
+
+std::pair<std::vector<std::string>, std::vector<int32_t>> getTokens(
+    const FastText& fasttext,
+    const std::function<std::string(const Dictionary&, int32_t)> getter,
+    entry_type entryType) {
+  std::vector<std::string> tokens;
+  std::vector<int32_t> retVocabFrequencies;
+  std::shared_ptr<const Dictionary> d = fasttext.getDictionary();
+  std::vector<int64_t> vocabFrequencies = d->getCounts(entryType);
+  for (int32_t i = 0; i < vocabFrequencies.size(); i++) {
+    tokens.push_back(getter(*d, i));
+    retVocabFrequencies.push_back(vocabFrequencies[i]);
+  }
+  return std::pair<std::vector<std::string>, std::vector<int32_t>>(
+      tokens, retVocabFrequencies);
+}
+
+std::pair<std::vector<std::string>, std::vector<int32_t>> getWords(
+    FastText* fasttext) {
+  assert(fasttext);
+  return getTokens(*fasttext, &Dictionary::getWord, entry_type::word);
+}
+
+std::pair<std::vector<std::string>, std::vector<int32_t>> getLabels(
+    FastText* fasttext) {
+  assert(fasttext);
+  return getTokens(*fasttext, &Dictionary::getLabel, entry_type::label);
+}
+
+std::pair<std::vector<std::string>, std::vector<std::string>> getLine(
+    FastText* fasttext,
+    const std::string text) {
+  assert(fasttext);
+  std::shared_ptr<const Dictionary> d = fasttext->getDictionary();
+  std::stringstream ioss(text);
+  std::string token;
+  std::vector<std::string> words;
+  std::vector<std::string> labels;
+  while (d->readWord(ioss, token)) {
+    uint32_t h = d->hash(token);
+    int32_t wid = d->getId(token, h);
+    entry_type type = wid < 0 ? d->getType(token) : d->getType(wid);
+
+    if (type == entry_type::word) {
+      words.push_back(token);
+    } else if (type == entry_type::label && wid >= 0) {
+      labels.push_back(token);
+    }
+    if (token == Dictionary::EOS)
+      break;
+  }
+  return std::pair<std::vector<std::string>, std::vector<std::string>>(
+      words, labels);
+}
+
+Meter test(
+    FastText* fasttext,
+    const std::string& filename,
+    int32_t k,
+    float threshold) {
+  assert(fasttext);
+  std::ifstream ifs(filename);
+  if (!ifs.is_open()) {
+    throw std::invalid_argument("Test file cannot be opened!");
+  }
+  Meter meter;
+  fasttext->test(ifs, k, threshold, meter);
+  ifs.close();
+
+  return meter;
+}
+
+EMSCRIPTEN_BINDINGS(fasttext) {
+  class_<Args>("Args")
+      .constructor<>()
+      .property("input", &Args::input)
+      .property("output", &Args::output)
+      .property("lr", &Args::lr)
+      .property("lrUpdateRate", &Args::lrUpdateRate)
+      .property("dim", &Args::dim)
+      .property("ws", &Args::ws)
+      .property("epoch", &Args::epoch)
+      .property("minCount", &Args::minCount)
+      .property("minCountLabel", &Args::minCountLabel)
+      .property("neg", &Args::neg)
+      .property("wordNgrams", &Args::wordNgrams)
+      .property("loss", &Args::loss)
+      .property("model", &Args::model)
+      .property("bucket", &Args::bucket)
+      .property("minn", &Args::minn)
+      .property("maxn", &Args::maxn)
+      .property("thread", &Args::thread)
+      .property("t", &Args::t)
+      .property("label", &Args::label)
+      .property("verbose", &Args::verbose)
+      .property("pretrainedVectors", &Args::pretrainedVectors)
+      .property("saveOutput", &Args::saveOutput)
+      .property("seed", &Args::seed)
+      .property("qout", &Args::qout)
+      .property("retrain", &Args::retrain)
+      .property("qnorm", &Args::qnorm)
+      .property("cutoff", &Args::cutoff)
+      .property("dsub", &Args::dsub)
+      .property("qnorm", &Args::qnorm)
+      .property("autotuneValidationFile", &Args::autotuneValidationFile)
+      .property("autotuneMetric", &Args::autotuneMetric)
+      .property("autotunePredictions", &Args::autotunePredictions)
+      .property("autotuneDuration", &Args::autotuneDuration)
+      .property("autotuneModelSize", &Args::autotuneModelSize);
+
+  class_<FastText>("FastText")
+      .constructor<>()
+      .function(
+          "loadModel",
+          select_overload<void(const std::string&)>(&FastText::loadModel))
+      .function(
+          "getNN",
+          select_overload<std::vector<std::pair<real, std::string>>(
+              const std::string& word, int32_t k)>(&FastText::getNN))
+      .function("getAnalogies", &FastText::getAnalogies)
+      .function("getWordId", &FastText::getWordId)
+      .function("getSubwordId", &FastText::getSubwordId)
+      .function("getInputMatrix", &getInputMatrix, allow_raw_pointers())
+      .function("getOutputMatrix", &getOutputMatrix, allow_raw_pointers())
+      .function("getWords", &getWords, allow_raw_pointers())
+      .function("getLabels", &getLabels, allow_raw_pointers())
+      .function("getLine", &getLine, allow_raw_pointers())
+      .function("test", &test, allow_raw_pointers())
+      .function("predict", &predict, allow_raw_pointers())
+      .function("getWordVector", &getWordVector, allow_raw_pointers())
+      .function("getSentenceVector", &getSentenceVector, allow_raw_pointers())
+      .function("getSubwords", &getSubwords, allow_raw_pointers())
+      .function("getInputVector", &getInputVector, allow_raw_pointers())
+      .function("train", &train, allow_raw_pointers())
+      .function("saveModel", &FastText::saveModel)
+      .property("isQuant", &FastText::isQuant)
+      .property("args", &FastText::getArgs);
+
+  class_<DenseMatrix>("DenseMatrix")
+      .constructor<>()
+      // we return int32_t because "JS can't represent int64s"
+      .function(
+          "rows",
+          optional_override(
+              [](const DenseMatrix* self) -> int32_t { return self->rows(); }),
+          allow_raw_pointers())
+      .function(
+          "cols",
+          optional_override(
+              [](const DenseMatrix* self) -> int32_t { return self->cols(); }),
+          allow_raw_pointers())
+      .function(
+          "at",
+          optional_override(
+              [](const DenseMatrix* self, int32_t i, int32_t j) -> const float {
+                return self->at(i, j);
+              }),
+          allow_raw_pointers());
+
+  class_<Meter>("Meter")
+      .constructor<>()
+      .property(
+          "precision", select_overload<double(void) const>(&Meter::precision))
+      .property("recall", select_overload<double(void) const>(&Meter::recall))
+      .property("f1Score", select_overload<double(void) const>(&Meter::f1Score))
+      .function(
+          "nexamples",
+          optional_override(
+              [](const Meter* self) -> int32_t { return self->nexamples(); }),
+          allow_raw_pointers());
+
+  enum_<model_name>("ModelName")
+      .value("cbow", model_name::cbow)
+      .value("skipgram", model_name::sg)
+      .value("supervised", model_name::sup);
+
+  enum_<loss_name>("LossName")
+      .value("hs", loss_name::hs)
+      .value("ns", loss_name::ns)
+      .value("softmax", loss_name::softmax)
+      .value("ova", loss_name::ova);
+
+  emscripten::value_object<Float32ArrayBridge>("Float32ArrayBridge")
+      .field("ptr", &Float32ArrayBridge::ptr)
+      .field("size", &Float32ArrayBridge::size);
+
+  emscripten::value_array<std::pair<float, std::string>>(
+      "std::pair<float, std::string>")
+      .element(&std::pair<float, std::string>::first)
+      .element(&std::pair<float, std::string>::second);
+
+  emscripten::register_vector<std::pair<float, std::string>>(
+      "std::vector<std::pair<float, std::string>>");
+
+  emscripten::value_array<
+      std::pair<std::vector<std::string>, std::vector<int32_t>>>(
+      "std::pair<std::vector<std::string>, std::vector<int32_t>>")
+      .element(
+          &std::pair<std::vector<std::string>, std::vector<int32_t>>::first)
+      .element(
+          &std::pair<std::vector<std::string>, std::vector<int32_t>>::second);
+
+  emscripten::value_array<
+      std::pair<std::vector<std::string>, std::vector<std::string>>>(
+      "std::pair<std::vector<std::string>, std::vector<std::string>>")
+      .element(
+          &std::pair<std::vector<std::string>, std::vector<std::string>>::first)
+      .element(&std::pair<std::vector<std::string>, std::vector<std::string>>::
+                   second);
+
+  emscripten::register_vector<float>("std::vector<float>");
+
+  emscripten::register_vector<int32_t>("std::vector<int32_t>");
+
+  emscripten::register_vector<std::string>("std::vector<std::string>");
+}