il y a 5 ans · 13096686fc
--- a/.gitignore
+++ b/.gitignore
@@ -2,8 +2,11 @@
 
															 *.o
														
 
															 *.bin
														
 
															 *.vec
														
 
															+*.bc
														
 
															+.DS_Store
														
 
															 data
														
 
															 fasttext
														
 
															 result
														
 
															 website/node_modules/
														
 
															-
														
 
															+package-lock.json
														
 
															+node_modules/
														
--- a/Makefile
+++ b/Makefile
@@ -20,6 +20,12 @@ coverage: fasttext
 
															 debug: CXXFLAGS += -g -O0 -fno-inline
														
 
															 debug: fasttext
														
 
															+wasm: webassembly/fasttext_wasm.js
														
 
															+
														
 
															+wasmdebug: export EMCC_DEBUG=1
														
 
															+wasmdebug: webassembly/fasttext_wasm.js
														
 
															+
														
 
															+
														
 
															 args.o: src/args.cc src/args.h
														
 
															 	$(CXX) $(CXXFLAGS) -c src/args.cc
														
@@ -63,4 +69,57 @@ fasttext: $(OBJS) src/fasttext.cc
 
															 	$(CXX) $(CXXFLAGS) $(OBJS) src/main.cc -o fasttext
														
 
															 clean:
														
 
															-	rm -rf *.o *.gcno *.gcda fasttext
														
 
															+	rm -rf *.o *.gcno *.gcda fasttext *.bc webassembly/fasttext_wasm.js webassembly/fasttext_wasm.wasm
														
 
															+
														
 
															+
														
 
															+EMCXX = em++
														
 
															+EMCXXFLAGS = --bind --std=c++11 -s WASM=1 -s ALLOW_MEMORY_GROWTH=1 -s "EXTRA_EXPORTED_RUNTIME_METHODS=['addOnPostRun', 'FS']" -s "DISABLE_EXCEPTION_CATCHING=0" -s "EXCEPTION_DEBUG=1" -s "FORCE_FILESYSTEM=1" -s "MODULARIZE=1" -s "EXPORT_ES6=1" -s 'EXPORT_NAME="FastTextModule"' -Isrc/
														
 
															+EMOBJS = args.bc autotune.bc matrix.bc dictionary.bc loss.bc productquantizer.bc densematrix.bc quantmatrix.bc vector.bc model.bc utils.bc meter.bc fasttext.bc main.bc
														
 
															+
														
 
															+
														
 
															+main.bc: webassembly/fasttext_wasm.cc
														
 
															+	$(EMCXX) $(EMCXXFLAGS)  webassembly/fasttext_wasm.cc -o main.bc
														
 
															+
														
 
															+args.bc: src/args.cc src/args.h
														
 
															+	$(EMCXX) $(EMCXXFLAGS)  src/args.cc -o args.bc
														
 
															+
														
 
															+autotune.bc: src/autotune.cc src/autotune.h
														
 
															+	$(EMCXX) $(EMCXXFLAGS)  src/autotune.cc -o autotune.bc
														
 
															+
														
 
															+matrix.bc: src/matrix.cc src/matrix.h
														
 
															+	$(EMCXX) $(EMCXXFLAGS) src/matrix.cc -o matrix.bc
														
 
															+
														
 
															+dictionary.bc: src/dictionary.cc src/dictionary.h src/args.h
														
 
															+	$(EMCXX) $(EMCXXFLAGS)  src/dictionary.cc -o dictionary.bc
														
 
															+
														
 
															+loss.bc: src/loss.cc src/loss.h src/matrix.h src/real.h
														
 
															+	$(EMCXX) $(EMCXXFLAGS) src/loss.cc -o loss.bc
														
 
															+
														
 
															+productquantizer.bc: src/productquantizer.cc src/productquantizer.h src/utils.h
														
 
															+	$(EMCXX) $(EMCXXFLAGS)  src/productquantizer.cc -o productquantizer.bc
														
 
															+
														
 
															+densematrix.bc: src/densematrix.cc src/densematrix.h src/utils.h src/matrix.h
														
 
															+	$(EMCXX) $(EMCXXFLAGS) src/densematrix.cc -o densematrix.bc
														
 
															+
														
 
															+quantmatrix.bc: src/quantmatrix.cc src/quantmatrix.h src/utils.h src/matrix.h
														
 
															+	$(EMCXX) $(EMCXXFLAGS) src/quantmatrix.cc -o quantmatrix.bc
														
 
															+
														
 
															+vector.bc: src/vector.cc src/vector.h src/utils.h
														
 
															+	$(EMCXX) $(EMCXXFLAGS)  src/vector.cc -o vector.bc
														
 
															+
														
 
															+model.bc: src/model.cc src/model.h src/args.h
														
 
															+	$(EMCXX) $(EMCXXFLAGS)  src/model.cc -o model.bc
														
 
															+
														
 
															+utils.bc: src/utils.cc src/utils.h
														
 
															+	$(EMCXX) $(EMCXXFLAGS)  src/utils.cc -o utils.bc
														
 
															+
														
 
															+meter.bc: src/meter.cc src/meter.h
														
 
															+	$(EMCXX) $(EMCXXFLAGS)  src/meter.cc -o meter.bc
														
 
															+
														
 
															+fasttext.bc: src/fasttext.cc src/*.h
														
 
															+	$(EMCXX) $(EMCXXFLAGS)  src/fasttext.cc -o fasttext.bc
														
 
															+
														
 
															+webassembly/fasttext_wasm.js: $(EMOBJS) webassembly/fasttext_wasm.cc Makefile
														
 
															+	$(EMCXX) $(EMCXXFLAGS) $(EMOBJS) -o webassembly/fasttext_wasm.js
														
 
															+
														
 
															+
														
--- a/src/args.cc
+++ b/src/args.cc
@@ -262,7 +262,8 @@ void Args::printTrainingHelp() {
 
															   std::cerr
														
 
															       << "\nThe following arguments for training are optional:\n"
														
 
															       << "  -lr                 learning rate [" << lr << "]\n"
														
 
															-      << "  -lrUpdateRate       change the rate of updates for the learning rate ["
														
 
															+      << "  -lrUpdateRate       change the rate of updates for the learning "
														
 
															+         "rate ["
														
 
															       << lrUpdateRate << "]\n"
														
 
															       << "  -dim                size of word vectors [" << dim << "]\n"
														
 
															       << "  -ws                 size of the context window [" << ws << "]\n"
														
@@ -270,9 +271,11 @@ void Args::printTrainingHelp() {
 
															       << "  -neg                number of negatives sampled [" << neg << "]\n"
														
 
															       << "  -loss               loss function {ns, hs, softmax, one-vs-all} ["
														
 
															       << lossToString(loss) << "]\n"
														
 
															-      << "  -thread             number of threads (set to 1 to ensure reproducible results) ["
														
 
															+      << "  -thread             number of threads (set to 1 to ensure "
														
 
															+         "reproducible results) ["
														
 
															       << thread << "]\n"
														
 
															-      << "  -pretrainedVectors  pretrained word vectors for supervised learning ["
														
 
															+      << "  -pretrainedVectors  pretrained word vectors for supervised "
														
 
															+         "learning ["
														
 
															       << pretrainedVectors << "]\n"
														
 
															       << "  -saveOutput         whether output params should be saved ["
														
 
															       << boolToString(saveOutput) << "]\n"
														
@@ -280,17 +283,19 @@ void Args::printTrainingHelp() {
 
															 }
														
 
															 void Args::printAutotuneHelp() {
														
 
															-  std::cerr
														
 
															-      << "\nThe following arguments are for autotune:\n"
														
 
															-      << "  -autotune-validation            validation file to be used for evaluation\n"
														
 
															-      << "  -autotune-metric                metric objective {f1, f1:labelname} ["
														
 
															-      << autotuneMetric << "]\n"
														
 
															-      << "  -autotune-predictions           number of predictions used for evaluation  ["
														
 
															-      << autotunePredictions << "]\n"
														
 
															-      << "  -autotune-duration              maximum duration in seconds ["
														
 
															-      << autotuneDuration << "]\n"
														
 
															-      << "  -autotune-modelsize             constraint model file size ["
														
 
															-      << autotuneModelSize << "] (empty = do not quantize)\n";
														
 
															+  std::cerr << "\nThe following arguments are for autotune:\n"
														
 
															+            << "  -autotune-validation            validation file to be used "
														
 
															+               "for evaluation\n"
														
 
															+            << "  -autotune-metric                metric objective {f1, "
														
 
															+               "f1:labelname} ["
														
 
															+            << autotuneMetric << "]\n"
														
 
															+            << "  -autotune-predictions           number of predictions used "
														
 
															+               "for evaluation  ["
														
 
															+            << autotunePredictions << "]\n"
														
 
															+            << "  -autotune-duration              maximum duration in seconds ["
														
 
															+            << autotuneDuration << "]\n"
														
 
															+            << "  -autotune-modelsize             constraint model file size ["
														
 
															+            << autotuneModelSize << "] (empty = do not quantize)\n";
														
 
															 }
														
 
															 void Args::printQuantizationHelp() {
														
@@ -298,7 +303,8 @@ void Args::printQuantizationHelp() {
 
															       << "\nThe following arguments for quantization are optional:\n"
														
 
															       << "  -cutoff             number of words and ngrams to retain ["
														
 
															       << cutoff << "]\n"
														
 
															-      << "  -retrain            whether embeddings are finetuned if a cutoff is applied ["
														
 
															+      << "  -retrain            whether embeddings are finetuned if a cutoff "
														
 
															+         "is applied ["
														
 
															       << boolToString(retrain) << "]\n"
														
 
															       << "  -qnorm              whether the norm is quantized separately ["
														
 
															       << boolToString(qnorm) << "]\n"
														
--- a/src/autotune.cc
+++ b/src/autotune.cc
@@ -416,10 +416,10 @@ void Autotune::train(const Args& autotuneArgs) {
 
															         if (!sizeConstraintWarning && trials_ > 10 &&
														
 
															             sizeConstraintFailed_ > (trials_ / 2)) {
														
 
															           sizeConstraintWarning = true;
														
 
															-          std::cerr
														
 
															-              << std::endl
														
 
															-              << "Warning : requested model size is probably too small. You may want to increase `autotune-modelsize`."
														
 
															-              << std::endl;
														
 
															+          std::cerr << std::endl
														
 
															+                    << "Warning : requested model size is probably too small. "
														
 
															+                       "You may want to increase `autotune-modelsize`."
														
 
															+                    << std::endl;
														
 
															         }
														
 
															       }
														
 
															     } catch (DenseMatrix::EncounteredNaNError&) {
														
@@ -442,10 +442,12 @@ void Autotune::train(const Args& autotuneArgs) {
 
															     std::string errorMessage;
														
 
															     if (sizeConstraintWarning) {
														
 
															       errorMessage =
														
 
															-          "Couldn't fulfil model size constraint: please increase `autotune-modelsize`.";
														
 
															+          "Couldn't fulfil model size constraint: please increase "
														
 
															+          "`autotune-modelsize`.";
														
 
															     } else {
														
 
															       errorMessage =
														
 
															-          "Didn't have enough time to train once: please increase `autotune-duration`.";
														
 
															+          "Didn't have enough time to train once: please increase "
														
 
															+          "`autotune-duration`.";
														
 
															     }
														
 
															     throw std::runtime_error(errorMessage);
														
 
															   } else {
														
--- a/src/densematrix.cc
+++ b/src/densematrix.cc
@@ -43,12 +43,17 @@ void DenseMatrix::uniformThread(real a, int block, int32_t seed) {
 
															 }
														
 
															 void DenseMatrix::uniform(real a, unsigned int thread, int32_t seed) {
														
 
															-  std::vector<std::thread> threads;
														
 
															-  for (int i = 0; i < thread; i++) {
														
 
															-    threads.push_back(std::thread([=]() { uniformThread(a, i, seed); }));
														
 
															-  }
														
 
															-  for (int32_t i = 0; i < threads.size(); i++) {
														
 
															-    threads[i].join();
														
 
															+  if (thread > 1) {
														
 
															+    std::vector<std::thread> threads;
														
 
															+    for (int i = 0; i < thread; i++) {
														
 
															+      threads.push_back(std::thread([=]() { uniformThread(a, i, seed); }));
														
 
															+    }
														
 
															+    for (int32_t i = 0; i < threads.size(); i++) {
														
 
															+      threads[i].join();
														
 
															+    }
														
 
															+  } else {
														
 
															+    // webassembly can't instantiate `std::thread`
														
 
															+    uniformThread(a, 0, seed);
														
 
															   }
														
 
															 }
														
--- a/src/fasttext.cc
+++ b/src/fasttext.cc
@@ -263,7 +263,7 @@ void FastText::loadModel(std::istream& in) {
 
															   buildModel();
														
 
															 }
														
 
															-void FastText::printInfo(real progress, real loss, std::ostream& log_stream) {
														
 
															+std::tuple<int64_t, double, double> FastText::progressInfo(real progress) {
														
 
															   double t = utils::getDuration(start_, std::chrono::steady_clock::now());
														
 
															   double lr = args_->lr * (1.0 - progress);
														
 
															   double wst = 0;
														
@@ -271,14 +271,22 @@ void FastText::printInfo(real progress, real loss, std::ostream& log_stream) {
 
															   int64_t eta = 2592000; // Default to one month in seconds (720 * 3600)
														
 
															   if (progress > 0 && t >= 0) {
														
 
															-    progress = progress * 100;
														
 
															-    eta = t * (100 - progress) / progress;
														
 
															+    eta = t * (1 - progress) / progress;
														
 
															     wst = double(tokenCount_) / t / args_->thread;
														
 
															   }
														
 
															+  return std::tuple<double, double, int64_t>(wst, lr, eta);
														
 
															+}
														
 
															+
														
 
															+void FastText::printInfo(real progress, real loss, std::ostream& log_stream) {
														
 
															+  double wst;
														
 
															+  double lr;
														
 
															+  int64_t eta;
														
 
															+  std::tie<double, double, int64_t>(wst, lr, eta) = progressInfo(progress);
														
 
															+
														
 
															   log_stream << std::fixed;
														
 
															   log_stream << "Progress: ";
														
 
															-  log_stream << std::setprecision(1) << std::setw(5) << progress << "%";
														
 
															+  log_stream << std::setprecision(1) << std::setw(5) << (progress * 100) << "%";
														
 
															   log_stream << " words/sec/thread: " << std::setw(7) << int64_t(wst);
														
 
															   log_stream << " lr: " << std::setw(9) << std::setprecision(6) << lr;
														
 
															   log_stream << " avg.loss: " << std::setw(9) << std::setprecision(6) << loss;
														
@@ -304,7 +312,7 @@ std::vector<int32_t> FastText::selectEmbeddings(int32_t cutoff) const {
 
															   return idx;
														
 
															 }
														
 
															-void FastText::quantize(const Args& qargs) {
														
 
															+void FastText::quantize(const Args& qargs, const TrainCallback& callback) {
														
 
															   if (args_->model != model_name::sup) {
														
 
															     throw std::invalid_argument(
														
 
															         "For now we only support quantization of supervised models");
														
@@ -336,10 +344,9 @@ void FastText::quantize(const Args& qargs) {
 
															       args_->verbose = qargs.verbose;
														
 
															       auto loss = createLoss(output_);
														
 
															       model_ = std::make_shared<Model>(input, output, loss, normalizeGradient);
														
 
															-      startThreads();
														
 
															+      startThreads(callback);
														
 
															     }
														
 
															   }
														
 
															-
														
 
															   input_ = std::make_shared<QuantMatrix>(
														
 
															       std::move(*(input.get())), qargs.dsub, qargs.qnorm);
														
@@ -347,7 +354,6 @@ void FastText::quantize(const Args& qargs) {
 
															     output_ = std::make_shared<QuantMatrix>(
														
 
															         std::move(*(output.get())), 2, qargs.qnorm);
														
 
															   }
														
 
															-
														
 
															   quant_ = true;
														
 
															   auto loss = createLoss(output_);
														
 
															   model_ = std::make_shared<Model>(input_, output_, loss, normalizeGradient);
														
@@ -615,7 +621,7 @@ bool FastText::keepTraining(const int64_t ntokens) const {
 
															   return tokenCount_ < args_->epoch * ntokens && !trainException_;
														
 
															 }
														
 
															-void FastText::trainThread(int32_t threadId) {
														
 
															+void FastText::trainThread(int32_t threadId, const TrainCallback& callback) {
														
 
															   std::ifstream ifs(args_->input);
														
 
															   utils::seek(ifs, threadId * utils::size(ifs) / args_->thread);
														
@@ -624,9 +630,18 @@ void FastText::trainThread(int32_t threadId) {
 
															   const int64_t ntokens = dict_->ntokens();
														
 
															   int64_t localTokenCount = 0;
														
 
															   std::vector<int32_t> line, labels;
														
 
															+  uint64_t callbackCounter = 0;
														
 
															   try {
														
 
															     while (keepTraining(ntokens)) {
														
 
															       real progress = real(tokenCount_) / (args_->epoch * ntokens);
														
 
															+      if (callback && ((callbackCounter++ % 64) == 0)) {
														
 
															+        double wst;
														
 
															+        double lr;
														
 
															+        int64_t eta;
														
 
															+        std::tie<double, double, int64_t>(wst, lr, eta) =
														
 
															+            progressInfo(progress);
														
 
															+        callback(progress, loss_, wst, lr, eta);
														
 
															+      }
														
 
															       real lr = args_->lr * (1.0 - progress);
														
 
															       if (args_->model == model_name::sup) {
														
 
															         localTokenCount += dict_->getLine(ifs, line, labels);
														
@@ -717,7 +732,7 @@ std::shared_ptr<Matrix> FastText::createTrainOutputMatrix() const {
 
															   return output;
														
 
															 }
														
 
															-void FastText::train(const Args& args) {
														
 
															+void FastText::train(const Args& args, const TrainCallback& callback) {
														
 
															   args_ = std::make_shared<Args>(args);
														
 
															   dict_ = std::make_shared<Dictionary>(args_);
														
 
															   if (args_->input == "-") {
														
@@ -742,7 +757,7 @@ void FastText::train(const Args& args) {
 
															   auto loss = createLoss(output_);
														
 
															   bool normalizeGradient = (args_->model == model_name::sup);
														
 
															   model_ = std::make_shared<Model>(input_, output_, loss, normalizeGradient);
														
 
															-  startThreads();
														
 
															+  startThreads(callback);
														
 
															 }
														
 
															 void FastText::abort() {
														
@@ -753,14 +768,19 @@ void FastText::abort() {
 
															   }
														
 
															 }
														
 
															-void FastText::startThreads() {
														
 
															+void FastText::startThreads(const TrainCallback& callback) {
														
 
															   start_ = std::chrono::steady_clock::now();
														
 
															   tokenCount_ = 0;
														
 
															   loss_ = -1;
														
 
															   trainException_ = nullptr;
														
 
															   std::vector<std::thread> threads;
														
 
															-  for (int32_t i = 0; i < args_->thread; i++) {
														
 
															-    threads.push_back(std::thread([=]() { trainThread(i); }));
														
 
															+  if (args_->thread > 1) {
														
 
															+    for (int32_t i = 0; i < args_->thread; i++) {
														
 
															+      threads.push_back(std::thread([=]() { trainThread(i, callback); }));
														
 
															+    }
														
 
															+  } else {
														
 
															+    // webassembly can't instantiate `std::thread`
														
 
															+    trainThread(0, callback);
														
 
															   }
														
 
															   const int64_t ntokens = dict_->ntokens();
														
 
															   // Same condition as trainThread
														
@@ -772,7 +792,7 @@ void FastText::startThreads() {
 
															       printInfo(progress, loss_, std::cerr);
														
 
															     }
														
 
															   }
														
 
															-  for (int32_t i = 0; i < args_->thread; i++) {
														
 
															+  for (int32_t i = 0; i < threads.size(); i++) {
														
 
															     threads[i].join();
														
 
															   }
														
 
															   if (trainException_) {
														
--- a/src/fasttext.h
+++ b/src/fasttext.h
@@ -12,6 +12,7 @@
 
															 #include <atomic>
														
 
															 #include <chrono>
														
 
															+#include <functional>
														
 
															 #include <iostream>
														
 
															 #include <memory>
														
 
															 #include <queue>
														
@@ -31,6 +32,10 @@
 
															 namespace fasttext {
														
 
															 class FastText {
														
 
															+ public:
														
 
															+  using TrainCallback =
														
 
															+      std::function<void(float, float, double, double, int64_t)>;
														
 
															+
														
 
															  protected:
														
 
															   std::shared_ptr<Args> args_;
														
 
															   std::shared_ptr<Dictionary> dict_;
														
@@ -47,9 +52,9 @@ class FastText {
 
															   void signModel(std::ostream&);
														
 
															   bool checkModel(std::istream&);
														
 
															-  void startThreads();
														
 
															+  void startThreads(const TrainCallback& callback = {});
														
 
															   void addInputVector(Vector&, int32_t) const;
														
 
															-  void trainThread(int32_t);
														
 
															+  void trainThread(int32_t, const TrainCallback& callback);
														
 
															   std::vector<std::pair<real, std::string>> getNN(
														
 
															       const DenseMatrix& wordVectors,
														
 
															       const Vector& queryVec,
														
@@ -73,6 +78,7 @@ class FastText {
 
															   void precomputeWordVectors(DenseMatrix& wordVectors);
														
 
															   bool keepTraining(const int64_t ntokens) const;
														
 
															   void buildModel();
														
 
															+  std::tuple<int64_t, double, double> progressInfo(real progress);
														
 
															  public:
														
 
															   FastText();
														
@@ -114,7 +120,7 @@ class FastText {
 
															   void getSentenceVector(std::istream& in, Vector& vec);
														
 
															-  void quantize(const Args& qargs);
														
 
															+  void quantize(const Args& qargs, const TrainCallback& callback = {});
														
 
															   std::tuple<int64_t, double, double>
														
 
															   test(std::istream& in, int32_t k, real threshold = 0.0);
														
@@ -146,7 +152,7 @@ class FastText {
 
															       const std::string& wordB,
														
 
															       const std::string& wordC);
														
 
															-  void train(const Args& args);
														
 
															+  void train(const Args& args, const TrainCallback& callback = {});
														
 
															   void abort();
														
--- a/src/main.cc
+++ b/src/main.cc
@@ -21,19 +21,25 @@ void printUsage() {
 
															       << "usage: fasttext <command> <args>\n\n"
														
 
															       << "The commands supported by fasttext are:\n\n"
														
 
															       << "  supervised              train a supervised classifier\n"
														
 
															-      << "  quantize                quantize a model to reduce the memory usage\n"
														
 
															+      << "  quantize                quantize a model to reduce the memory "
														
 
															+         "usage\n"
														
 
															       << "  test                    evaluate a supervised classifier\n"
														
 
															-      << "  test-label              print labels with precision and recall scores\n"
														
 
															+      << "  test-label              print labels with precision and recall "
														
 
															+         "scores\n"
														
 
															       << "  predict                 predict most likely labels\n"
														
 
															-      << "  predict-prob            predict most likely labels with probabilities\n"
														
 
															+      << "  predict-prob            predict most likely labels with "
														
 
															+         "probabilities\n"
														
 
															       << "  skipgram                train a skipgram model\n"
														
 
															       << "  cbow                    train a cbow model\n"
														
 
															       << "  print-word-vectors      print word vectors given a trained model\n"
														
 
															-      << "  print-sentence-vectors  print sentence vectors given a trained model\n"
														
 
															-      << "  print-ngrams            print ngrams given a trained model and word\n"
														
 
															+      << "  print-sentence-vectors  print sentence vectors given a trained "
														
 
															+         "model\n"
														
 
															+      << "  print-ngrams            print ngrams given a trained model and "
														
 
															+         "word\n"
														
 
															       << "  nn                      query for nearest neighbors\n"
														
 
															       << "  analogies               query for analogies\n"
														
 
															-      << "  dump                    dump arguments,dictionary,input/output vectors\n"
														
 
															+      << "  dump                    dump arguments,dictionary,input/output "
														
 
															+         "vectors\n"
														
 
															       << std::endl;
														
 
															 }
														
--- a/src/real.h
+++ b/src/real.h
@@ -11,5 +11,4 @@
 
															 namespace fasttext {
														
 
															 typedef float real;
														
 
															-
														
 
															 }
														
--- a/webassembly/README.md
+++ b/webassembly/README.md
@@ -0,0 +1,37 @@
 
															+ fastText [![CircleCI](https://circleci.com/gh/facebookresearch/fastText/tree/master.svg?style=svg)](https://circleci.com/gh/facebookresearch/fastText/tree/master)
														
 
															+
														
 
															+[fastText](https://fasttext.cc/) is a library for efficient learning of word representations and sentence classification.
														
 
															+
														
 
															+In this document we present how to use fastText in a browser with WebAssembly.
														
 
															+
														
 
															+
														
 
															+# Requirements
														
 
															+
														
 
															+[fastText](https://fasttext.cc/) builds on modern Mac OS and Linux distributions.
														
 
															+Since it uses C\++11 features, it requires a compiler with good C++11 support.
														
 
															+You will need [emscripten](https://emscripten.org/) and a [browser that supports WebAssembly](https://caniuse.com/#feat=wasm).
														
 
															+
														
 
															+
														
 
															+# Building WebAssembly binaries
														
 
															+
														
 
															+First, download and install emscripten sdk as [described here](https://emscripten.org/docs/getting_started/downloads.html#installation-instructions)
														
 
															+
														
 
															+
														
 
															+Make sure you activated the PATH for emscripten:
														
 
															+```bash
														
 
															+$ source /path/to/emsdk/emsdk_env.sh
														
 
															+```
														
 
															+
														
 
															+Clone our [repository](https://github.com/facebookresearch/fastText/).
														
 
															+
														
 
															+```bash
														
 
															+$ git clone [email protected]:facebookresearch/fastText.git
														
 
															+```
														
 
															+
														
 
															+Build WebAssembly binaries:
														
 
															+```bash
														
 
															+$ cd fastText
														
 
															+$ make wasm
														
 
															+```
														
 
															+
														
 
															+
														
--- a/webassembly/doc/examples/misc.html
+++ b/webassembly/doc/examples/misc.html
@@ -0,0 +1,62 @@
 
															+<!DOCTYPE html>
														
 
															+<html>
														
 
															+<head>
														
 
															+    <meta charset="UTF-8">
														
 
															+    <meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1.0, maximum-scale=1.0, user-scalable=no">
														
 
															+</head>
														
 
															+<body>
														
 
															+    <script type="module">
														
 
															+        const printVector = function(predictions, limit) {
														
 
															+            limit = limit || Infinity;
														
 
															+
														
 
															+            for (let i=0; i<predictions.size() && i<limit; i++){
														
 
															+                let prediction = predictions.get(i);
														
 
															+                console.log(predictions.get(i));
														
 
															+            }
														
 
															+        }
														
 
															+
														
 
															+        import {FastText, addOnPostRun} from "./fasttext.js";
														
 
															+
														
 
															+        addOnPostRun(() => {
														
 
															+            let ft = new FastText();
														
 
															+
														
 
															+            const url = "lid.176.ftz";
														
 
															+            ft.loadModel(url).then(model => {
														
 
															+                /* isQuant */
														
 
															+                console.log(model.isQuant());
														
 
															+
														
 
															+                /* getDimension */
														
 
															+                console.log(model.getDimension());
														
 
															+
														
 
															+                /* getWordVector */
														
 
															+                let v = model.getWordVector("Hello");
														
 
															+                console.log(v);
														
 
															+
														
 
															+                /* getSentenceVector */
														
 
															+                let v1 = model.getSentenceVector("Hello");
														
 
															+                console.log(v1);
														
 
															+                let v2 = model.getSentenceVector("Hello this is a sentence");
														
 
															+                console.log(v2);
														
 
															+
														
 
															+                /* getNearestNeighbors */
														
 
															+                printVector(model.getNearestNeighbors("Hello", 10));
														
 
															+
														
 
															+                /* getAnalogies */
														
 
															+                printVector(model.getAnalogies("paris", "france", "london", 10));
														
 
															+
														
 
															+                /* getWordId */
														
 
															+                console.log(model.getWordId("Hello"));
														
 
															+
														
 
															+                /* getSubwords */
														
 
															+                let subWordInformation = model.getSubwords("désinstitutionnalisation");
														
 
															+                printVector(subWordInformation[0]);
														
 
															+
														
 
															+                /* getInputVector */
														
 
															+                console.log(model.getInputVector(832));
														
 
															+            });
														
 
															+        });
														
 
															+
														
 
															+    </script>
														
 
															+</body>
														
 
															+
														
 
															+</html>
														
--- a/webassembly/doc/examples/predict.html
+++ b/webassembly/doc/examples/predict.html
@@ -0,0 +1,42 @@
 
															+<!DOCTYPE html>
														
 
															+<html>
														
 
															+<head>
														
 
															+    <meta charset="UTF-8">
														
 
															+    <meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1.0, maximum-scale=1.0, user-scalable=no">
														
 
															+</head>
														
 
															+<body>
														
 
															+    <script type="module">
														
 
															+        const printVector = function(predictions, limit) {
														
 
															+            limit = limit || Infinity;
														
 
															+
														
 
															+            for (let i=0; i<predictions.size() && i<limit; i++){
														
 
															+                let prediction = predictions.get(i);
														
 
															+                console.log(predictions.get(i));
														
 
															+            }
														
 
															+        }
														
 
															+
														
 
															+        import {FastText, addOnPostRun} from "./fasttext.js";
														
 
															+
														
 
															+        addOnPostRun(() => {
														
 
															+            let ft = new FastText();
														
 
															+
														
 
															+            const url = "lid.176.ftz";
														
 
															+            ft.loadModel(url).then(model => {
														
 
															+                let text = "Bonjour à tous. Ceci est du français";
														
 
															+                console.log(text);
														
 
															+                printVector(model.predict(text, 5, 0.0));
														
 
															+
														
 
															+                text = "Hello, world. This is english";
														
 
															+                console.log(text);
														
 
															+                printVector(model.predict(text, 5, 0.0));
														
 
															+
														
 
															+                text = "Merhaba dünya. Bu da türkçe"
														
 
															+                console.log(text);
														
 
															+                printVector(model.predict(text, 5, 0.0));
														
 
															+            });
														
 
															+        });
														
 
															+
														
 
															+    </script>
														
 
															+</body>
														
 
															+
														
 
															+</html>
														
--- a/webassembly/doc/examples/train_supervised.html
+++ b/webassembly/doc/examples/train_supervised.html
@@ -0,0 +1,66 @@
 
															+<!DOCTYPE html>
														
 
															+<html>
														
 
															+<head>
														
 
															+    <meta charset="UTF-8">
														
 
															+    <meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1.0, maximum-scale=1.0, user-scalable=no">
														
 
															+</head>
														
 
															+<body>
														
 
															+    <script type="module">
														
 
															+        const printVector = function(predictions, limit) {
														
 
															+            limit = limit || Infinity;
														
 
															+
														
 
															+            for (let i=0; i<predictions.size() && i<limit; i++){
														
 
															+                let prediction = predictions.get(i);
														
 
															+                console.log(predictions.get(i));
														
 
															+            }
														
 
															+        }
														
 
															+
														
 
															+        const trainCallback = (progress, loss, wst, lr, eta) => {
														
 
															+            console.log([progress, loss, wst, lr, eta]);
														
 
															+        };
														
 
															+
														
 
															+        import {FastText, addOnPostRun} from "./fasttext.js";
														
 
															+
														
 
															+        addOnPostRun(() => {
														
 
															+            let ft = new FastText();
														
 
															+
														
 
															+            ft.trainSupervised("cooking.train", {
														
 
															+                'lr':1.0,
														
 
															+                'epoch':10,
														
 
															+                'loss':'hs',
														
 
															+                'wordNgrams':2,
														
 
															+                'dim':50,
														
 
															+                'bucket':200000
														
 
															+            }, trainCallback).then(model => {
														
 
															+                console.log('Trained.');
														
 
															+
														
 
															+                printVector(model.predict("Which baking dish is best to bake a banana bread ?", 5, 0.0));
														
 
															+
														
 
															+                /* getInputMatrix */
														
 
															+                let inputMatrix = model.getInputMatrix();
														
 
															+                console.log(inputMatrix.cols());
														
 
															+                console.log(inputMatrix.rows());
														
 
															+                console.log(inputMatrix.at(1, 2));
														
 
															+
														
 
															+                /* getOutputMatrix */
														
 
															+                let outputMatrix = model.getOutputMatrix();
														
 
															+                console.log(outputMatrix.cols());
														
 
															+                console.log(outputMatrix.rows());
														
 
															+                console.log(outputMatrix.at(1, 2));
														
 
															+
														
 
															+                /* getWords */
														
 
															+                let wordsInformation = model.getWords();
														
 
															+                printVector(wordsInformation[0], 30);   // words
														
 
															+                printVector(wordsInformation[1], 30);   // frequencies
														
 
															+
														
 
															+                /* getLabels */
														
 
															+                let labelsInformation = model.getLabels();
														
 
															+                printVector(labelsInformation[0], 30);  // labels
														
 
															+                printVector(labelsInformation[1], 30);  // frequencies
														
 
															+            });
														
 
															+        });
														
 
															+
														
 
															+    </script>
														
 
															+</body>
														
 
															+
														
 
															+</html>
														
--- a/webassembly/doc/examples/train_unsupervised.html
+++ b/webassembly/doc/examples/train_unsupervised.html
@@ -0,0 +1,44 @@
 
															+<!DOCTYPE html>
														
 
															+<html>
														
 
															+<head>
														
 
															+    <meta charset="UTF-8">
														
 
															+    <meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1.0, maximum-scale=1.0, user-scalable=no">
														
 
															+</head>
														
 
															+<body>
														
 
															+    <script type="module">
														
 
															+        const printVector = function(predictions, limit) {
														
 
															+            limit = limit || Infinity;
														
 
															+
														
 
															+            for (let i=0; i<predictions.size() && i<limit; i++){
														
 
															+                let prediction = predictions.get(i);
														
 
															+                console.log(predictions.get(i));
														
 
															+            }
														
 
															+        }
														
 
															+
														
 
															+        const trainCallback = (progress, loss, wst, lr, eta) => {
														
 
															+            console.log([progress, loss, wst, lr, eta]);
														
 
															+        };
														
 
															+
														
 
															+        import {FastText, addOnPostRun} from "./fasttext.js";
														
 
															+
														
 
															+        addOnPostRun(() => {
														
 
															+            let ft = new FastText();
														
 
															+
														
 
															+            ft.trainUnsupervised("fil9", 'skipgram', {
														
 
															+                'lr':0.1,
														
 
															+                'epoch':1,
														
 
															+                'loss':'ns',
														
 
															+                'wordNgrams':2,
														
 
															+                'dim':50,
														
 
															+                'bucket':200000
														
 
															+            }, trainCallback).then(model => {
														
 
															+                let wordsInformation = model.getWords();
														
 
															+                printVector(wordsInformation[0], 30);   // words
														
 
															+                printVector(wordsInformation[1], 30);   // frequencies
														
 
															+            });
														
 
															+        });
														
 
															+
														
 
															+    </script>
														
 
															+</body>
														
 
															+
														
 
															+</html>
														
--- a/webassembly/fasttext.js
+++ b/webassembly/fasttext.js
@@ -0,0 +1,520 @@
 
															+/**
														
 
															+ * Copyright (c) 2016-present, Facebook, Inc.
														
 
															+ * All rights reserved.
														
 
															+ *
														
 
															+ * This source code is licensed under the MIT license found in the
														
 
															+ * LICENSE file in the root directory of this source tree.
														
 
															+ */
														
 
															+
														
 
															+import fastTextModularized from './fasttext_wasm.js';
														
 
															+const fastTextModule = fastTextModularized();
														
 
															+
														
 
															+let postRunFunc = null;
														
 
															+const addOnPostRun = function(func) {
														
 
															+  postRunFunc = func;
														
 
															+};
														
 
															+
														
 
															+fastTextModule.addOnPostRun(() => {
														
 
															+  if (postRunFunc) {
														
 
															+    postRunFunc();
														
 
															+  }
														
 
															+});
														
 
															+
														
 
															+const thisModule = this;
														
 
															+const trainFileInWasmFs = 'train.txt';
														
 
															+const testFileInWasmFs = 'test.txt';
														
 
															+const modelFileInWasmFs = 'model.bin';
														
 
															+
														
 
															+const getFloat32ArrayFromHeap = (len) => {
														
 
															+  const dataBytes = len * Float32Array.BYTES_PER_ELEMENT;
														
 
															+  const dataPtr = fastTextModule._malloc(dataBytes);
														
 
															+  const dataHeap = new Uint8Array(fastTextModule.HEAPU8.buffer,
														
 
															+    dataPtr,
														
 
															+    dataBytes);
														
 
															+  return {
														
 
															+    'ptr':dataHeap.byteOffset,
														
 
															+    'size':len,
														
 
															+    'buffer':dataHeap.buffer
														
 
															+  };
														
 
															+};
														
 
															+
														
 
															+const heapToFloat32 = (r) => new Float32Array(r.buffer, r.ptr, r.size);
														
 
															+
														
 
															+class FastText {
														
 
															+  constructor() {
														
 
															+    this.f = new fastTextModule.FastText();
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * loadModel
														
 
															+   *
														
 
															+   * Loads the model file from the specified url, and returns the
														
 
															+   * corresponding `FastTextModel` object.
														
 
															+   *
														
 
															+   * @param {string}     url
														
 
															+   *     the url of the model file.
														
 
															+   *
														
 
															+   * @return {Promise}   promise object that resolves to a `FastTextModel`
														
 
															+   *
														
 
															+   */
														
 
															+  loadModel(url) {
														
 
															+    const fetchFunc = (thisModule && thisModule.fetch) || fetch;
														
 
															+
														
 
															+    const fastTextNative = this.f;
														
 
															+    return new Promise(function(resolve, reject) {
														
 
															+      fetchFunc(url).then(response => {
														
 
															+        return response.arrayBuffer();
														
 
															+      }).then(bytes => {
														
 
															+        const byteArray = new Uint8Array(bytes);
														
 
															+        const FS = fastTextModule.FS;
														
 
															+        FS.writeFile(modelFileInWasmFs, byteArray);
														
 
															+      }).then(() =>  {
														
 
															+        fastTextNative.loadModel(modelFileInWasmFs);
														
 
															+        resolve(new FastTextModel(fastTextNative));
														
 
															+      }).catch(error => {
														
 
															+        reject(error);
														
 
															+      });
														
 
															+    });
														
 
															+  }
														
 
															+
														
 
															+  _train(url, modelName, kwargs = {}, callback = null) {
														
 
															+    const fetchFunc = (thisModule && thisModule.fetch) || fetch;
														
 
															+    const fastTextNative = this.f;
														
 
															+
														
 
															+    return new Promise(function(resolve, reject) {
														
 
															+      fetchFunc(url).then(response => {
														
 
															+        return response.arrayBuffer();
														
 
															+      }).then(bytes => {
														
 
															+        const byteArray = new Uint8Array(bytes);
														
 
															+        const FS = fastTextModule.FS;
														
 
															+        FS.writeFile(trainFileInWasmFs, byteArray);
														
 
															+      }).then(() =>  {
														
 
															+        const argsList = ['lr', 'lrUpdateRate', 'dim', 'ws', 'epoch',
														
 
															+          'minCount', 'minCountLabel', 'neg', 'wordNgrams', 'loss',
														
 
															+          'model', 'bucket', 'minn', 'maxn', 't', 'label', 'verbose',
														
 
															+          'pretrainedVectors', 'saveOutput', 'seed', 'qout', 'retrain',
														
 
															+          'qnorm', 'cutoff', 'dsub', 'qnorm', 'autotuneValidationFile',
														
 
															+          'autotuneMetric', 'autotunePredictions', 'autotuneDuration',
														
 
															+          'autotuneModelSize'];
														
 
															+        const args = new fastTextModule.Args();
														
 
															+        argsList.forEach(k => {
														
 
															+          if (k in kwargs) {
														
 
															+            args[k] = kwargs[k];
														
 
															+          }
														
 
															+        });
														
 
															+        args.model = fastTextModule.ModelName[modelName];
														
 
															+        args.loss = ('loss' in kwargs) ?
														
 
															+          fastTextModule.LossName[kwargs['loss']] : 'hs';
														
 
															+        args.thread = 1;
														
 
															+        args.input = trainFileInWasmFs;
														
 
															+
														
 
															+        fastTextNative.train(args, callback);
														
 
															+
														
 
															+        resolve(new FastTextModel(fastTextNative));
														
 
															+      }).catch(error => {
														
 
															+        reject(error);
														
 
															+      });
														
 
															+    });
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * trainSupervised
														
 
															+   *
														
 
															+   * Downloads the input file from the specified url, trains a supervised
														
 
															+   * model and returns a `FastTextModel` object.
														
 
															+   *
														
 
															+   * @param {string}     url
														
 
															+   *     the url of the input file.
														
 
															+   *     The input file must must contain at least one label per line. For an
														
 
															+   *     example consult the example datasets which are part of the fastText
														
 
															+   *     repository such as the dataset pulled by classification-example.sh.
														
 
															+   *
														
 
															+   * @param {dict}       kwargs
														
 
															+   *     train parameters.
														
 
															+   *     For example {'lr': 0.5, 'epoch': 5}
														
 
															+   *
														
 
															+   * @param {function}   callback
														
 
															+   *     train callback function
														
 
															+   *     `callback` function is called regularly from the train loop:
														
 
															+   *     `callback(progress, loss, wordsPerSec, learningRate, eta)`
														
 
															+   *
														
 
															+   * @return {Promise}   promise object that resolves to a `FastTextModel`
														
 
															+   *
														
 
															+   */
														
 
															+  trainSupervised(url, kwargs = {}, callback) {
														
 
															+    const self = this;
														
 
															+    return new Promise(function(resolve, reject) {
														
 
															+      self._train(url, 'supervised', kwargs, callback).then(model => {
														
 
															+        resolve(model);
														
 
															+      }).catch(error => {
														
 
															+        reject(error);
														
 
															+      });
														
 
															+    });
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * trainUnsupervised
														
 
															+   *
														
 
															+   * Downloads the input file from the specified url, trains an unsupervised
														
 
															+   * model and returns a `FastTextModel` object.
														
 
															+   *
														
 
															+   * @param {string}     url
														
 
															+   *     the url of the input file.
														
 
															+   *     The input file must not contain any labels or use the specified label
														
 
															+   *     prefixunless it is ok for those words to be ignored. For an example
														
 
															+   *     consult the dataset pulled by the example script word-vector-example.sh
														
 
															+   *     which is part of the fastText repository.
														
 
															+   *
														
 
															+   * @param {string}     modelName
														
 
															+   *     Model to be used for unsupervised learning. `cbow` or `skipgram`.
														
 
															+   *
														
 
															+   * @param {dict}       kwargs
														
 
															+   *     train parameters.
														
 
															+   *     For example {'lr': 0.5, 'epoch': 5}
														
 
															+   *
														
 
															+   * @param {function}   callback
														
 
															+   *     train callback function
														
 
															+   *     `callback` function is called regularly from the train loop:
														
 
															+   *     `callback(progress, loss, wordsPerSec, learningRate, eta)`
														
 
															+   *
														
 
															+   * @return {Promise}   promise object that resolves to a `FastTextModel`
														
 
															+   *
														
 
															+   */
														
 
															+  trainUnsupervised(url, modelName, kwargs = {}, callback) {
														
 
															+    const self = this;
														
 
															+    return new Promise(function(resolve, reject) {
														
 
															+      self._train(url, modelName, kwargs, callback).then(model => {
														
 
															+        resolve(model);
														
 
															+      }).catch(error => {
														
 
															+        reject(error);
														
 
															+      });
														
 
															+    });
														
 
															+  }
														
 
															+
														
 
															+}
														
 
															+
														
 
															+
														
 
															+class FastTextModel {
														
 
															+  /**
														
 
															+     * `FastTextModel` represents a trained model.
														
 
															+     *
														
 
															+     * @constructor
														
 
															+     *
														
 
															+     * @param {object}       fastTextNative
														
 
															+     *     webassembly object that makes the bridge between js and C++
														
 
															+     */
														
 
															+  constructor(fastTextNative) {
														
 
															+    this.f = fastTextNative;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+     * isQuant
														
 
															+     *
														
 
															+     * @return {bool}   true if the model is quantized
														
 
															+     *
														
 
															+     */
														
 
															+  isQuant() {
														
 
															+    return this.f.isQuant;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+     * getDimension
														
 
															+     *
														
 
															+     * @return {int}    the dimension (size) of a lookup vector (hidden layer)
														
 
															+     *
														
 
															+     */
														
 
															+  getDimension() {
														
 
															+    return this.f.args.dim;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+     * getWordVector
														
 
															+     *
														
 
															+     * @param {string}          word
														
 
															+     *
														
 
															+     * @return {Float32Array}   the vector representation of `word`.
														
 
															+     *
														
 
															+     */
														
 
															+  getWordVector(word) {
														
 
															+    const b = getFloat32ArrayFromHeap(this.getDimension());
														
 
															+    this.f.getWordVector(b, word);
														
 
															+
														
 
															+    return heapToFloat32(b);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+     * getSentenceVector
														
 
															+     *
														
 
															+     * @param {string}          text
														
 
															+     *
														
 
															+     * @return {Float32Array}   the vector representation of `text`.
														
 
															+     *
														
 
															+     */
														
 
															+  getSentenceVector(text) {
														
 
															+    if (text.indexOf('\n') != -1) {
														
 
															+      "sentence vector processes one line at a time (remove '\\n')";
														
 
															+    }
														
 
															+    text += '\n';
														
 
															+    const b = getFloat32ArrayFromHeap(this.getDimension());
														
 
															+    this.f.getSentenceVector(b, text);
														
 
															+
														
 
															+    return heapToFloat32(b);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+     * getNearestNeighbors
														
 
															+     *
														
 
															+     * returns the nearest `k` neighbors of `word`.
														
 
															+     *
														
 
															+     * @param {string}          word
														
 
															+     * @param {int}             k
														
 
															+     *
														
 
															+     * @return {Array.<Pair.<number, string>>}
														
 
															+     *     words and their corresponding cosine similarities.
														
 
															+     *
														
 
															+     */
														
 
															+  getNearestNeighbors(word, k = 10) {
														
 
															+    return this.f.getNN(word, k);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+     * getAnalogies
														
 
															+     *
														
 
															+     * returns the nearest `k` neighbors of the operation
														
 
															+     * `wordA - wordB + wordC`.
														
 
															+     *
														
 
															+     * @param {string}          wordA
														
 
															+     * @param {string}          wordB
														
 
															+     * @param {string}          wordC
														
 
															+     * @param {int}             k
														
 
															+     *
														
 
															+     * @return {Array.<Pair.<number, string>>}
														
 
															+     *     words and their corresponding cosine similarities
														
 
															+     *
														
 
															+     */
														
 
															+  getAnalogies(wordA, wordB, wordC, k) {
														
 
															+    return this.f.getAnalogies(k, wordA, wordB, wordC);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+     * getWordId
														
 
															+     *
														
 
															+     * Given a word, get the word id within the dictionary.
														
 
															+     * Returns -1 if word is not in the dictionary.
														
 
															+     *
														
 
															+     * @return {int}    word id
														
 
															+     *
														
 
															+     */
														
 
															+  getWordId(word) {
														
 
															+    return this.f.getWordId(word);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+     * getSubwordId
														
 
															+     *
														
 
															+     * Given a subword, return the index (within input matrix) it hashes to.
														
 
															+     *
														
 
															+     * @return {int}    subword id
														
 
															+     *
														
 
															+     */
														
 
															+  getSubwordId(subword) {
														
 
															+    return this.f.getSubwordId(subword);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+     * getSubwords
														
 
															+     *
														
 
															+     * returns the subwords and their indicies.
														
 
															+     *
														
 
															+     * @param {string}          word
														
 
															+     *
														
 
															+     * @return {Pair.<Array.<string>, Array.<int>>}
														
 
															+     *     words and their corresponding indicies
														
 
															+     *
														
 
															+     */
														
 
															+  getSubwords(word) {
														
 
															+    return this.f.getSubwords(word);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+     * getInputVector
														
 
															+     *
														
 
															+     * Given an index, get the corresponding vector of the Input Matrix.
														
 
															+     *
														
 
															+     * @param {int}             ind
														
 
															+     *
														
 
															+     * @return {Float32Array}   the vector of the `ind`'th index
														
 
															+     *
														
 
															+     */
														
 
															+  getInputVector(ind) {
														
 
															+    const b = getFloat32ArrayFromHeap(this.getDimension());
														
 
															+    this.f.getInputVector(b, ind);
														
 
															+
														
 
															+    return heapToFloat32(b);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+     * predict
														
 
															+     *
														
 
															+     * Given a string, get a list of labels and a list of corresponding
														
 
															+     * probabilities. k controls the number of returned labels.
														
 
															+     *
														
 
															+     * @param {string}          text
														
 
															+     * @param {int}             k, the number of predictions to be returned
														
 
															+     * @param {number}          probability threshold
														
 
															+     *
														
 
															+     * @return {Array.<Pair.<number, string>>}
														
 
															+     *     labels and their probabilities
														
 
															+     *
														
 
															+     */
														
 
															+  predict(text, k = 1, threshold = 0.0) {
														
 
															+    return this.f.predict(text, k, threshold);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+     * getInputMatrix
														
 
															+     *
														
 
															+     * Get a reference to the full input matrix of a Model. This only
														
 
															+     * works if the model is not quantized.
														
 
															+     *
														
 
															+     * @return {DenseMatrix}
														
 
															+     *     densematrix with functions: `rows`, `cols`, `at(i,j)`
														
 
															+     *
														
 
															+     * example:
														
 
															+     *     let inputMatrix = model.getInputMatrix();
														
 
															+     *     let value = inputMatrix.at(1, 2);
														
 
															+     */
														
 
															+  getInputMatrix() {
														
 
															+    if (this.isQuant()) {
														
 
															+      throw new Error("Can't get quantized Matrix");
														
 
															+    }
														
 
															+    return this.f.getInputMatrix();
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+     * getOutputMatrix
														
 
															+     *
														
 
															+     * Get a reference to the full input matrix of a Model. This only
														
 
															+     * works if the model is not quantized.
														
 
															+     *
														
 
															+     * @return {DenseMatrix}
														
 
															+     *     densematrix with functions: `rows`, `cols`, `at(i,j)`
														
 
															+     *
														
 
															+     * example:
														
 
															+     *     let outputMatrix = model.getOutputMatrix();
														
 
															+     *     let value = outputMatrix.at(1, 2);
														
 
															+     */
														
 
															+  getOutputMatrix() {
														
 
															+    if (this.isQuant()) {
														
 
															+      throw new Error("Can't get quantized Matrix");
														
 
															+    }
														
 
															+    return this.f.getOutputMatrix();
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+     * getWords
														
 
															+     *
														
 
															+     * Get the entire list of words of the dictionary including the frequency
														
 
															+     * of the individual words. This does not include any subwords. For that
														
 
															+     * please consult the function get_subwords.
														
 
															+     *
														
 
															+     * @return {Pair.<Array.<string>, Array.<int>>}
														
 
															+     *     words and their corresponding frequencies
														
 
															+     *
														
 
															+     */
														
 
															+  getWords() {
														
 
															+    return this.f.getWords();
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+     * getLabels
														
 
															+     *
														
 
															+     * Get the entire list of labels of the dictionary including the frequency
														
 
															+     * of the individual labels.
														
 
															+     *
														
 
															+     * @return {Pair.<Array.<string>, Array.<int>>}
														
 
															+     *     labels and their corresponding frequencies
														
 
															+     *
														
 
															+     */
														
 
															+  getLabels() {
														
 
															+    return this.f.getLabels();
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+     * getLine
														
 
															+     *
														
 
															+     * Split a line of text into words and labels. Labels must start with
														
 
															+     * the prefix used to create the model (__label__ by default).
														
 
															+     *
														
 
															+     * @param {string}          text
														
 
															+     *
														
 
															+     * @return {Pair.<Array.<string>, Array.<string>>}
														
 
															+     *     words and labels
														
 
															+     *
														
 
															+     */
														
 
															+  getLine(text) {
														
 
															+    return this.f.getLine(text);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+     * saveModel
														
 
															+     *
														
 
															+     * Saves the model file in web assembly in-memory FS and returns a blob
														
 
															+     *
														
 
															+     * @return {Blob}           blob data of the file saved in web assembly FS
														
 
															+     *
														
 
															+     */
														
 
															+  saveModel() {
														
 
															+    this.f.saveModel(modelFileInWasmFs);
														
 
															+    const content = fastTextModule.FS.readFile(modelFileInWasmFs,
														
 
															+      { encoding: 'binary' });
														
 
															+    return new Blob(
														
 
															+      [new Uint8Array(content, content.byteOffset, content.length)],
														
 
															+      { type: ' application/octet-stream' }
														
 
															+    );
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+     * test
														
 
															+     *
														
 
															+     * Downloads the test file from the specified url, evaluates the supervised
														
 
															+     * model with it.
														
 
															+     *
														
 
															+     * @param {string}          url
														
 
															+     * @param {int}             k, the number of predictions to be returned
														
 
															+     * @param {number}          probability threshold
														
 
															+     *
														
 
															+     * @return {Promise}   promise object that resolves to a `Meter` object
														
 
															+     *
														
 
															+     * example:
														
 
															+     * model.test("/absolute/url/to/test.txt", 1, 0.0).then((meter) => {
														
 
															+     *     console.log(meter.precision);
														
 
															+     *     console.log(meter.recall);
														
 
															+     *     console.log(meter.f1Score);
														
 
															+     *     console.log(meter.nexamples());
														
 
															+     * });
														
 
															+     *
														
 
															+     */
														
 
															+  test(url, k, threshold) {
														
 
															+    const fetchFunc = (thisModule && thisModule.fetch) || fetch;
														
 
															+    const fastTextNative = this.f;
														
 
															+
														
 
															+    return new Promise(function(resolve, reject) {
														
 
															+      fetchFunc(url).then(response => {
														
 
															+        return response.arrayBuffer();
														
 
															+      }).then(bytes => {
														
 
															+        const byteArray = new Uint8Array(bytes);
														
 
															+        const FS = fastTextModule.FS;
														
 
															+        FS.writeFile(testFileInWasmFs, byteArray);
														
 
															+      }).then(() =>  {
														
 
															+        const meter = fastTextNative.test(testFileInWasmFs, k, threshold);
														
 
															+        resolve(meter);
														
 
															+      }).catch(error => {
														
 
															+        reject(error);
														
 
															+      });
														
 
															+    });
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+
														
 
															+export {FastText, addOnPostRun};
														
--- a/webassembly/fasttext_wasm.cc
+++ b/webassembly/fasttext_wasm.cc
@@ -0,0 +1,328 @@
 
															+/**
														
 
															+ * Copyright (c) 2016-present, Facebook, Inc.
														
 
															+ * All rights reserved.
														
 
															+ *
														
 
															+ * This source code is licensed under the MIT license found in the
														
 
															+ * LICENSE file in the root directory of this source tree.
														
 
															+ */
														
 
															+
														
 
															+#include <emscripten.h>
														
 
															+#include <emscripten/bind.h>
														
 
															+#include <fasttext.h>
														
 
															+#include <functional>
														
 
															+#include <sstream>
														
 
															+#include <string>
														
 
															+#include <vector>
														
 
															+
														
 
															+using namespace emscripten;
														
 
															+using namespace fasttext;
														
 
															+
														
 
															+struct Float32ArrayBridge {
														
 
															+  uintptr_t ptr;
														
 
															+  int size;
														
 
															+};
														
 
															+
														
 
															+void fillFloat32ArrayFromVector(
														
 
															+    const Float32ArrayBridge& vecFloat,
														
 
															+    const Vector& v) {
														
 
															+  float* buffer = reinterpret_cast<float*>(vecFloat.ptr);
														
 
															+  assert(vecFloat.size == v.size());
														
 
															+  for (int i = 0; i < v.size(); i++) {
														
 
															+    buffer[i] = v[i];
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+std::vector<std::pair<float, std::string>>
														
 
															+predict(FastText* fasttext, std::string text, int k, double threshold) {
														
 
															+  std::stringstream ioss(text + std::string("\n"));
														
 
															+
														
 
															+  std::vector<std::pair<float, std::string>> predictions;
														
 
															+  fasttext->predictLine(ioss, predictions, k, threshold);
														
 
															+
														
 
															+  return predictions;
														
 
															+}
														
 
															+
														
 
															+void getWordVector(
														
 
															+    FastText* fasttext,
														
 
															+    const Float32ArrayBridge& vecFloat,
														
 
															+    std::string word) {
														
 
															+  assert(fasttext);
														
 
															+  Vector v(fasttext->getDimension());
														
 
															+  fasttext->getWordVector(v, word);
														
 
															+
														
 
															+  fillFloat32ArrayFromVector(vecFloat, v);
														
 
															+}
														
 
															+
														
 
															+void getSentenceVector(
														
 
															+    FastText* fasttext,
														
 
															+    const Float32ArrayBridge& vecFloat,
														
 
															+    std::string text) {
														
 
															+  assert(fasttext);
														
 
															+  Vector v(fasttext->getDimension());
														
 
															+  std::stringstream ioss(text);
														
 
															+  fasttext->getSentenceVector(ioss, v);
														
 
															+
														
 
															+  fillFloat32ArrayFromVector(vecFloat, v);
														
 
															+}
														
 
															+
														
 
															+std::pair<std::vector<std::string>, std::vector<int32_t>> getSubwords(
														
 
															+    FastText* fasttext,
														
 
															+    std::string word) {
														
 
															+  assert(fasttext);
														
 
															+  std::vector<std::string> subwords;
														
 
															+  std::vector<int32_t> ngrams;
														
 
															+  std::shared_ptr<const Dictionary> d = fasttext->getDictionary();
														
 
															+  d->getSubwords(word, ngrams, subwords);
														
 
															+
														
 
															+  return std::pair<std::vector<std::string>, std::vector<int32_t>>(
														
 
															+      subwords, ngrams);
														
 
															+}
														
 
															+
														
 
															+void getInputVector(
														
 
															+    FastText* fasttext,
														
 
															+    const Float32ArrayBridge& vecFloat,
														
 
															+    int32_t ind) {
														
 
															+  assert(fasttext);
														
 
															+  Vector v(fasttext->getDimension());
														
 
															+  fasttext->getInputVector(v, ind);
														
 
															+
														
 
															+  fillFloat32ArrayFromVector(vecFloat, v);
														
 
															+}
														
 
															+
														
 
															+void train(FastText* fasttext, Args* args, emscripten::val jsCallback) {
														
 
															+  assert(args);
														
 
															+  assert(fasttext);
														
 
															+  fasttext->train(
														
 
															+      *args,
														
 
															+      [=](float progress, float loss, double wst, double lr, int64_t eta) {
														
 
															+        jsCallback(progress, loss, wst, lr, static_cast<int32_t>(eta));
														
 
															+      });
														
 
															+}
														
 
															+
														
 
															+const DenseMatrix* getInputMatrix(FastText* fasttext) {
														
 
															+  assert(fasttext);
														
 
															+  std::shared_ptr<const DenseMatrix> mm = fasttext->getInputMatrix();
														
 
															+  return mm.get();
														
 
															+}
														
 
															+
														
 
															+const DenseMatrix* getOutputMatrix(FastText* fasttext) {
														
 
															+  assert(fasttext);
														
 
															+  std::shared_ptr<const DenseMatrix> mm = fasttext->getOutputMatrix();
														
 
															+  return mm.get();
														
 
															+}
														
 
															+
														
 
															+std::pair<std::vector<std::string>, std::vector<int32_t>> getTokens(
														
 
															+    const FastText& fasttext,
														
 
															+    const std::function<std::string(const Dictionary&, int32_t)> getter,
														
 
															+    entry_type entryType) {
														
 
															+  std::vector<std::string> tokens;
														
 
															+  std::vector<int32_t> retVocabFrequencies;
														
 
															+  std::shared_ptr<const Dictionary> d = fasttext.getDictionary();
														
 
															+  std::vector<int64_t> vocabFrequencies = d->getCounts(entryType);
														
 
															+  for (int32_t i = 0; i < vocabFrequencies.size(); i++) {
														
 
															+    tokens.push_back(getter(*d, i));
														
 
															+    retVocabFrequencies.push_back(vocabFrequencies[i]);
														
 
															+  }
														
 
															+  return std::pair<std::vector<std::string>, std::vector<int32_t>>(
														
 
															+      tokens, retVocabFrequencies);
														
 
															+}
														
 
															+
														
 
															+std::pair<std::vector<std::string>, std::vector<int32_t>> getWords(
														
 
															+    FastText* fasttext) {
														
 
															+  assert(fasttext);
														
 
															+  return getTokens(*fasttext, &Dictionary::getWord, entry_type::word);
														
 
															+}
														
 
															+
														
 
															+std::pair<std::vector<std::string>, std::vector<int32_t>> getLabels(
														
 
															+    FastText* fasttext) {
														
 
															+  assert(fasttext);
														
 
															+  return getTokens(*fasttext, &Dictionary::getLabel, entry_type::label);
														
 
															+}
														
 
															+
														
 
															+std::pair<std::vector<std::string>, std::vector<std::string>> getLine(
														
 
															+    FastText* fasttext,
														
 
															+    const std::string text) {
														
 
															+  assert(fasttext);
														
 
															+  std::shared_ptr<const Dictionary> d = fasttext->getDictionary();
														
 
															+  std::stringstream ioss(text);
														
 
															+  std::string token;
														
 
															+  std::vector<std::string> words;
														
 
															+  std::vector<std::string> labels;
														
 
															+  while (d->readWord(ioss, token)) {
														
 
															+    uint32_t h = d->hash(token);
														
 
															+    int32_t wid = d->getId(token, h);
														
 
															+    entry_type type = wid < 0 ? d->getType(token) : d->getType(wid);
														
 
															+
														
 
															+    if (type == entry_type::word) {
														
 
															+      words.push_back(token);
														
 
															+    } else if (type == entry_type::label && wid >= 0) {
														
 
															+      labels.push_back(token);
														
 
															+    }
														
 
															+    if (token == Dictionary::EOS)
														
 
															+      break;
														
 
															+  }
														
 
															+  return std::pair<std::vector<std::string>, std::vector<std::string>>(
														
 
															+      words, labels);
														
 
															+}
														
 
															+
														
 
															+Meter test(
														
 
															+    FastText* fasttext,
														
 
															+    const std::string& filename,
														
 
															+    int32_t k,
														
 
															+    float threshold) {
														
 
															+  assert(fasttext);
														
 
															+  std::ifstream ifs(filename);
														
 
															+  if (!ifs.is_open()) {
														
 
															+    throw std::invalid_argument("Test file cannot be opened!");
														
 
															+  }
														
 
															+  Meter meter;
														
 
															+  fasttext->test(ifs, k, threshold, meter);
														
 
															+  ifs.close();
														
 
															+
														
 
															+  return meter;
														
 
															+}
														
 
															+
														
 
															+EMSCRIPTEN_BINDINGS(fasttext) {
														
 
															+  class_<Args>("Args")
														
 
															+      .constructor<>()
														
 
															+      .property("input", &Args::input)
														
 
															+      .property("output", &Args::output)
														
 
															+      .property("lr", &Args::lr)
														
 
															+      .property("lrUpdateRate", &Args::lrUpdateRate)
														
 
															+      .property("dim", &Args::dim)
														
 
															+      .property("ws", &Args::ws)
														
 
															+      .property("epoch", &Args::epoch)
														
 
															+      .property("minCount", &Args::minCount)
														
 
															+      .property("minCountLabel", &Args::minCountLabel)
														
 
															+      .property("neg", &Args::neg)
														
 
															+      .property("wordNgrams", &Args::wordNgrams)
														
 
															+      .property("loss", &Args::loss)
														
 
															+      .property("model", &Args::model)
														
 
															+      .property("bucket", &Args::bucket)
														
 
															+      .property("minn", &Args::minn)
														
 
															+      .property("maxn", &Args::maxn)
														
 
															+      .property("thread", &Args::thread)
														
 
															+      .property("t", &Args::t)
														
 
															+      .property("label", &Args::label)
														
 
															+      .property("verbose", &Args::verbose)
														
 
															+      .property("pretrainedVectors", &Args::pretrainedVectors)
														
 
															+      .property("saveOutput", &Args::saveOutput)
														
 
															+      .property("seed", &Args::seed)
														
 
															+      .property("qout", &Args::qout)
														
 
															+      .property("retrain", &Args::retrain)
														
 
															+      .property("qnorm", &Args::qnorm)
														
 
															+      .property("cutoff", &Args::cutoff)
														
 
															+      .property("dsub", &Args::dsub)
														
 
															+      .property("qnorm", &Args::qnorm)
														
 
															+      .property("autotuneValidationFile", &Args::autotuneValidationFile)
														
 
															+      .property("autotuneMetric", &Args::autotuneMetric)
														
 
															+      .property("autotunePredictions", &Args::autotunePredictions)
														
 
															+      .property("autotuneDuration", &Args::autotuneDuration)
														
 
															+      .property("autotuneModelSize", &Args::autotuneModelSize);
														
 
															+
														
 
															+  class_<FastText>("FastText")
														
 
															+      .constructor<>()
														
 
															+      .function(
														
 
															+          "loadModel",
														
 
															+          select_overload<void(const std::string&)>(&FastText::loadModel))
														
 
															+      .function(
														
 
															+          "getNN",
														
 
															+          select_overload<std::vector<std::pair<real, std::string>>(
														
 
															+              const std::string& word, int32_t k)>(&FastText::getNN))
														
 
															+      .function("getAnalogies", &FastText::getAnalogies)
														
 
															+      .function("getWordId", &FastText::getWordId)
														
 
															+      .function("getSubwordId", &FastText::getSubwordId)
														
 
															+      .function("getInputMatrix", &getInputMatrix, allow_raw_pointers())
														
 
															+      .function("getOutputMatrix", &getOutputMatrix, allow_raw_pointers())
														
 
															+      .function("getWords", &getWords, allow_raw_pointers())
														
 
															+      .function("getLabels", &getLabels, allow_raw_pointers())
														
 
															+      .function("getLine", &getLine, allow_raw_pointers())
														
 
															+      .function("test", &test, allow_raw_pointers())
														
 
															+      .function("predict", &predict, allow_raw_pointers())
														
 
															+      .function("getWordVector", &getWordVector, allow_raw_pointers())
														
 
															+      .function("getSentenceVector", &getSentenceVector, allow_raw_pointers())
														
 
															+      .function("getSubwords", &getSubwords, allow_raw_pointers())
														
 
															+      .function("getInputVector", &getInputVector, allow_raw_pointers())
														
 
															+      .function("train", &train, allow_raw_pointers())
														
 
															+      .function("saveModel", &FastText::saveModel)
														
 
															+      .property("isQuant", &FastText::isQuant)
														
 
															+      .property("args", &FastText::getArgs);
														
 
															+
														
 
															+  class_<DenseMatrix>("DenseMatrix")
														
 
															+      .constructor<>()
														
 
															+      // we return int32_t because "JS can't represent int64s"
														
 
															+      .function(
														
 
															+          "rows",
														
 
															+          optional_override(
														
 
															+              [](const DenseMatrix* self) -> int32_t { return self->rows(); }),
														
 
															+          allow_raw_pointers())
														
 
															+      .function(
														
 
															+          "cols",
														
 
															+          optional_override(
														
 
															+              [](const DenseMatrix* self) -> int32_t { return self->cols(); }),
														
 
															+          allow_raw_pointers())
														
 
															+      .function(
														
 
															+          "at",
														
 
															+          optional_override(
														
 
															+              [](const DenseMatrix* self, int32_t i, int32_t j) -> const float {
														
 
															+                return self->at(i, j);
														
 
															+              }),
														
 
															+          allow_raw_pointers());
														
 
															+
														
 
															+  class_<Meter>("Meter")
														
 
															+      .constructor<>()
														
 
															+      .property(
														
 
															+          "precision", select_overload<double(void) const>(&Meter::precision))
														
 
															+      .property("recall", select_overload<double(void) const>(&Meter::recall))
														
 
															+      .property("f1Score", select_overload<double(void) const>(&Meter::f1Score))
														
 
															+      .function(
														
 
															+          "nexamples",
														
 
															+          optional_override(
														
 
															+              [](const Meter* self) -> int32_t { return self->nexamples(); }),
														
 
															+          allow_raw_pointers());
														
 
															+
														
 
															+  enum_<model_name>("ModelName")
														
 
															+      .value("cbow", model_name::cbow)
														
 
															+      .value("skipgram", model_name::sg)
														
 
															+      .value("supervised", model_name::sup);
														
 
															+
														
 
															+  enum_<loss_name>("LossName")
														
 
															+      .value("hs", loss_name::hs)
														
 
															+      .value("ns", loss_name::ns)
														
 
															+      .value("softmax", loss_name::softmax)
														
 
															+      .value("ova", loss_name::ova);
														
 
															+
														
 
															+  emscripten::value_object<Float32ArrayBridge>("Float32ArrayBridge")
														
 
															+      .field("ptr", &Float32ArrayBridge::ptr)
														
 
															+      .field("size", &Float32ArrayBridge::size);
														
 
															+
														
 
															+  emscripten::value_array<std::pair<float, std::string>>(
														
 
															+      "std::pair<float, std::string>")
														
 
															+      .element(&std::pair<float, std::string>::first)
														
 
															+      .element(&std::pair<float, std::string>::second);
														
 
															+
														
 
															+  emscripten::register_vector<std::pair<float, std::string>>(
														
 
															+      "std::vector<std::pair<float, std::string>>");
														
 
															+
														
 
															+  emscripten::value_array<
														
 
															+      std::pair<std::vector<std::string>, std::vector<int32_t>>>(
														
 
															+      "std::pair<std::vector<std::string>, std::vector<int32_t>>")
														
 
															+      .element(
														
 
															+          &std::pair<std::vector<std::string>, std::vector<int32_t>>::first)
														
 
															+      .element(
														
 
															+          &std::pair<std::vector<std::string>, std::vector<int32_t>>::second);
														
 
															+
														
 
															+  emscripten::value_array<
														
 
															+      std::pair<std::vector<std::string>, std::vector<std::string>>>(
														
 
															+      "std::pair<std::vector<std::string>, std::vector<std::string>>")
														
 
															+      .element(
														
 
															+          &std::pair<std::vector<std::string>, std::vector<std::string>>::first)
														
 
															+      .element(&std::pair<std::vector<std::string>, std::vector<std::string>>::
														
 
															+                   second);
														
 
															+
														
 
															+  emscripten::register_vector<float>("std::vector<float>");
														
 
															+
														
 
															+  emscripten::register_vector<int32_t>("std::vector<int32_t>");
														
 
															+
														
 
															+  emscripten::register_vector<std::string>("std::vector<std::string>");
														
 
															+}