6 년 전 · 02c61efaa6
--- a/docs/crawl-vectors.md
+++ b/docs/crawl-vectors.md
@@ -7,6 +7,73 @@ We distribute pre-trained word vectors for 157 languages, trained on [*Common Cr
 
				 These models were trained using CBOW with position-weights, in dimension 300, with character n-grams of length 5, a window of size 5 and 10 negatives.
			
 
				 We also distribute three new word analogy datasets, for French, Hindi and Polish.
			
 
				 
			
 
				+### Download directly with command line or from python
			
 
				+
			
 
				+In order to download with command line or from python code, you must have installed the python package as [described here](http://localhost:3000/docs/en/support.html#building-fasttext-python-module).
			
 
				+
			
 
				+<!--DOCUSAURUS_CODE_TABS-->
			
 
				+<!--Command line-->
			
 
				+```bash
			
 
				+$ ./download_model.py en     # English
			
 
				+Downloading https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.bin.gz
			
 
				+ (19.78%) [=========>                                         ]
			
 
				+```
			
 
				+Once the download is finished, use the model as usual:
			
 
				+```bash
			
 
				+$ ./fasttext nn cc.en.300.bin 10
			
 
				+Query word?
			
 
				+```
			
 
				+<!--Python-->
			
 
				+```py
			
 
				+>>> import fasttext.util
			
 
				+>>> fasttext.util.download_model('en', if_exists='ignore')  # English
			
 
				+>>> ft = fasttext.load_model('cc.en.300.bin')
			
 
				+```
			
 
				+<!--END_DOCUSAURUS_CODE_TABS-->
			
 
				+
			
 
				+### Adapt the dimension
			
 
				+
			
 
				+The pre-trained word vectors we distribute have dimension 300. If you need a smaller size, you can use our dimension reducer.
			
 
				+In order to use that feature, you must have installed the python package as [described here](http://localhost:3000/docs/en/support.html#building-fasttext-python-module).
			
 
				+
			
 
				+For example, in order to get vectors of dimension 100:
			
 
				+<!--DOCUSAURUS_CODE_TABS-->
			
 
				+
			
 
				+<!--Command line-->
			
 
				+```bash
			
 
				+$ ./reduce_model.py cc.en.300.bin 100
			
 
				+Loading model
			
 
				+Reducing matrix dimensions
			
 
				+Saving model
			
 
				+cc.en.100.bin saved
			
 
				+```
			
 
				+Then you can use the `cc.en.100.bin` model file as usual.
			
 
				+
			
 
				+<!--Python-->
			
 
				+```py
			
 
				+>>> import fasttext
			
 
				+>>> import fasttext.util
			
 
				+>>> ft = fasttext.load_model('cc.en.300.bin')
			
 
				+>>> ft.get_dimension()
			
 
				+300
			
 
				+>>> fasttext.util.reduce_model(ft, 100)
			
 
				+>>> ft.get_dimension()
			
 
				+100
			
 
				+```
			
 
				+Then you can use `ft` model object as usual:
			
 
				+```py
			
 
				+>>> ft.get_word_vector('hello').shape
			
 
				+(100,)
			
 
				+>>> ft.get_nearest_neighbors('hello')
			
 
				+[(0.775576114654541, u'heyyyy'), (0.7686290144920349, u'hellow'), (0.7663413286209106, u'hello-'), (0.7579624056816101, u'heyyyyy'), (0.7495524287223816, u'hullo'), (0.7473770380020142, u'.hello'), (0.7407292127609253, u'Hiiiii'), (0.7402616739273071, u'hellooo'), (0.7399682402610779, u'hello.'), (0.7396857738494873, u'Heyyyyy')]
			
 
				+```
			
 
				+or save it for later use:
			
 
				+```py
			
 
				+>>> ft.save_model('cc.en.100.bin')
			
 
				+```
			
 
				+<!--END_DOCUSAURUS_CODE_TABS-->
			
 
				+
			
 
				+
			
 
				 ### Format
			
 
				 
			
 
				 The word vectors are available in both binary and text formats.
			
--- a/docs/faqs.md
+++ b/docs/faqs.md
@@ -61,3 +61,6 @@ If you run fastText multiple times you'll obtain slightly different results each
 
				 
			
 
				 ## Why do I get a probability of 1.00001?
			
 
				 This is a known rounding issue. You can consider it as 1.0.
			
 
				+
			
 
				+## How can I change the dimension of word vectors of a model file?
			
 
				+If you already trained a model, or downloaded a pre-trained word vectors model, you can adapt the dimension of the word vectors with the `reduce_model.py` script or by calling `fasttext.util.reduce_model` from python, as [described here](/docs/en/crawl-vectors.html#adapt-the-dimension)
			
--- a/download_model.py
+++ b/download_model.py
@@ -0,0 +1,48 @@
 
				+#!/usr/bin/env python
			
 
				+# -*- coding: utf-8 -*-
			
 
				+#
			
 
				+# Copyright (c) 2017-present, Facebook, Inc.
			
 
				+# All rights reserved.
			
 
				+#
			
 
				+# This source code is licensed under the MIT license found in the
			
 
				+# LICENSE file in the root directory of this source tree.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+from __future__ import unicode_literals
			
 
				+
			
 
				+import argparse
			
 
				+
			
 
				+import fasttext.util
			
 
				+
			
 
				+
			
 
				+args = None
			
 
				+
			
 
				+
			
 
				+def command_download(lang_id, if_exists):
			
 
				+    """
			
 
				+        Download pre-trained common-crawl vectors from fastText's website
			
 
				+        https://fasttext.cc/docs/en/crawl-vectors.html
			
 
				+    """
			
 
				+    fasttext.util.download_model(lang_id, if_exists)
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    global args
			
 
				+
			
 
				+    parser = argparse.ArgumentParser(
			
 
				+        description='fastText helper tool to reduce model dimensions.')
			
 
				+    parser.add_argument("language", type=str, default="en",
			
 
				+                        help="language identifier of the pre-trained vectors. For example `en` or `fr`.")
			
 
				+    parser.add_argument("--overwrite", action="store_true",
			
 
				+                        help="overwrite if file exists.")
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    command_download(args.language, if_exists=(
			
 
				+        'overwrite' if args.overwrite else 'strict'))
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/python/benchmarks/get_word_vector.py
+++ b/python/benchmarks/get_word_vector.py
@@ -42,7 +42,8 @@ def get_word_vector(data, model):
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				-    parser = argparse.ArgumentParser(description='Simple benchmark for get_word_vector.')
			
 
				+    parser = argparse.ArgumentParser(
			
 
				+        description='Simple benchmark for get_word_vector.')
			
 
				     parser.add_argument('model', help='A model file to use for benchmarking.')
			
 
				     parser.add_argument('data', help='A data file to use for benchmarking.')
			
 
				     args = parser.parse_args()
			
--- a/python/doc/examples/train_supervised.py
+++ b/python/doc/examples/train_supervised.py
@@ -20,6 +20,7 @@ def print_results(N, p, r):
 
				     print("P@{}\t{:.3f}".format(1, p))
			
 
				     print("R@{}\t{:.3f}".format(1, r))
			
 
				 
			
 
				+
			
 
				 if __name__ == "__main__":
			
 
				     train_data = os.path.join(os.getenv("DATADIR", ''), 'cooking.train')
			
 
				     valid_data = os.path.join(os.getenv("DATADIR", ''), 'cooking.valid')
			
--- a/python/fasttext_module/fasttext/FastText.py
+++ b/python/fasttext_module/fasttext/FastText.py
@@ -170,7 +170,7 @@ class _FastText(object):
 
				 
			
 
				     def get_input_matrix(self):
			
 
				         """
			
 
				-        Get a copy of the full input matrix of a Model. This only
			
 
				+        Get a reference to the full input matrix of a Model. This only
			
 
				         works if the model is not quantized.
			
 
				         """
			
 
				         if self.f.isQuant():
			
@@ -179,7 +179,7 @@ class _FastText(object):
 
				 
			
 
				     def get_output_matrix(self):
			
 
				         """
			
 
				-        Get a copy of the full output matrix of a Model. This only
			
 
				+        Get a reference to the full output matrix of a Model. This only
			
 
				         works if the model is not quantized.
			
 
				         """
			
 
				         if self.f.isQuant():
			
@@ -292,6 +292,14 @@ class _FastText(object):
 
				             qnorm
			
 
				         )
			
 
				 
			
 
				+    def set_matrices(self, input_matrix, output_matrix):
			
 
				+        """
			
 
				+        Set input and output matrices. This function assumes you know what you
			
 
				+        are doing.
			
 
				+        """
			
 
				+        self.f.setMatrices(input_matrix.astype(np.float32),
			
 
				+                           output_matrix.astype(np.float32))
			
 
				+
			
 
				     @property
			
 
				     def words(self):
			
 
				         if self._words is None:
			
--- a/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc
+++ b/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc
@@ -180,14 +180,34 @@ PYBIND11_MODULE(fasttext_pybind, m) {
 
				           [](fasttext::FastText& m) {
			
 
				             std::shared_ptr<const fasttext::DenseMatrix> mm =
			
 
				                 m.getInputMatrix();
			
 
				-            return *mm.get();
			
 
				-          })
			
 
				+            return mm.get();
			
 
				+          },
			
 
				+          pybind11::return_value_policy::reference)
			
 
				       .def(
			
 
				           "getOutputMatrix",
			
 
				           [](fasttext::FastText& m) {
			
 
				             std::shared_ptr<const fasttext::DenseMatrix> mm =
			
 
				                 m.getOutputMatrix();
			
 
				-            return *mm.get();
			
 
				+            return mm.get();
			
 
				+          },
			
 
				+          pybind11::return_value_policy::reference)
			
 
				+      .def(
			
 
				+          "setMatrices",
			
 
				+          [](fasttext::FastText& m,
			
 
				+             py::buffer inputMatrixBuffer,
			
 
				+             py::buffer outputMatrixBuffer) {
			
 
				+            py::buffer_info inputMatrixInfo = inputMatrixBuffer.request();
			
 
				+            py::buffer_info outputMatrixInfo = outputMatrixBuffer.request();
			
 
				+
			
 
				+            m.setMatrices(
			
 
				+                std::make_shared<fasttext::DenseMatrix>(
			
 
				+                    inputMatrixInfo.shape[0],
			
 
				+                    inputMatrixInfo.shape[1],
			
 
				+                    static_cast<float*>(inputMatrixInfo.ptr)),
			
 
				+                std::make_shared<fasttext::DenseMatrix>(
			
 
				+                    outputMatrixInfo.shape[0],
			
 
				+                    outputMatrixInfo.shape[1],
			
 
				+                    static_cast<float*>(outputMatrixInfo.ptr)));
			
 
				           })
			
 
				       .def(
			
 
				           "loadModel",
			
--- a/python/fasttext_module/fasttext/util/__init__.py
+++ b/python/fasttext_module/fasttext/util/__init__.py
@@ -11,3 +11,5 @@ from __future__ import unicode_literals
 
				 
			
 
				 from .util import test
			
 
				 from .util import find_nearest_neighbor
			
 
				+from .util import reduce_model
			
 
				+from .util import download_model
			
--- a/python/fasttext_module/fasttext/util/util.py
+++ b/python/fasttext_module/fasttext/util/util.py
@@ -1,3 +1,5 @@
 
				+#!/usr/bin/env python
			
 
				+# -*- coding: utf-8 -*-
			
 
				 # Copyright (c) 2017-present, Facebook, Inc.
			
 
				 # All rights reserved.
			
 
				 #
			
@@ -18,6 +20,35 @@ from __future__ import print_function
 
				 from __future__ import unicode_literals
			
 
				 
			
 
				 import numpy as np
			
 
				+import sys
			
 
				+import shutil
			
 
				+import os
			
 
				+import gzip
			
 
				+
			
 
				+try:
			
 
				+    from urllib.request import urlopen
			
 
				+except ImportError:
			
 
				+    from urllib2 import urlopen
			
 
				+
			
 
				+
			
 
				+valid_lang_ids = {"af", "sq", "als", "am", "ar", "an", "hy", "as", "ast",
			
 
				+                  "az", "ba", "eu", "bar", "be", "bn", "bh", "bpy", "bs",
			
 
				+                  "br", "bg", "my", "ca", "ceb", "bcl", "ce", "zh", "cv",
			
 
				+                  "co", "hr", "cs", "da", "dv", "nl", "pa", "arz", "eml",
			
 
				+                  "en", "myv", "eo", "et", "hif", "fi", "fr", "gl", "ka",
			
 
				+                  "de", "gom", "el", "gu", "ht", "he", "mrj", "hi", "hu",
			
 
				+                  "is", "io", "ilo", "id", "ia", "ga", "it", "ja", "jv",
			
 
				+                  "kn", "pam", "kk", "km", "ky", "ko", "ku", "ckb", "la",
			
 
				+                  "lv", "li", "lt", "lmo", "nds", "lb", "mk", "mai", "mg",
			
 
				+                  "ms", "ml", "mt", "gv", "mr", "mzn", "mhr", "min", "xmf",
			
 
				+                  "mwl", "mn", "nah", "nap", "ne", "new", "frr", "nso",
			
 
				+                  "no", "nn", "oc", "or", "os", "pfl", "ps", "fa", "pms",
			
 
				+                  "pl", "pt", "qu", "ro", "rm", "ru", "sah", "sa", "sc",
			
 
				+                  "sco", "gd", "sr", "sh", "scn", "sd", "si", "sk", "sl",
			
 
				+                  "so", "azb", "es", "su", "sw", "sv", "tl", "tg", "ta",
			
 
				+                  "tt", "te", "th", "bo", "tr", "tk", "uk", "hsb", "ur",
			
 
				+                  "ug", "uz", "vec", "vi", "vo", "wa", "war", "cy", "vls",
			
 
				+                  "fy", "pnb", "yi", "yo", "diq", "zea"}
			
 
				 
			
 
				 
			
 
				 # TODO: Add example on reproducing model.test with util.test and model.get_line
			
@@ -58,3 +89,121 @@ def find_nearest_neighbor(query, vectors, ban_set, cossims=None):
 
				         rank -= 1
			
 
				         result_i = np.argpartition(cossims, rank)[rank]
			
 
				     return result_i
			
 
				+
			
 
				+
			
 
				+def _reduce_matrix(X_orig, dim, eigv):
			
 
				+    """
			
 
				+    Reduces the dimension of a (m × n)   matrix `X_orig` to
			
 
				+                          to a (m × dim) matrix `X_reduced`
			
 
				+    It uses only the first 100000 rows of `X_orig` to do the mapping.
			
 
				+    Matrix types are all `np.float32` in order to avoid unncessary copies.
			
 
				+    """
			
 
				+    if eigv is None:
			
 
				+        mapping_size = 100000
			
 
				+        X = X_orig[:mapping_size]
			
 
				+        X = X - X.mean(axis=0, dtype=np.float32)
			
 
				+        C = np.divide(np.matmul(X.T, X), X.shape[0] - 1, dtype=np.float32)
			
 
				+        _, U = np.linalg.eig(C)
			
 
				+        eigv = U[:, :dim]
			
 
				+
			
 
				+    X_reduced = np.matmul(X_orig, eigv)
			
 
				+
			
 
				+    return (X_reduced, eigv)
			
 
				+
			
 
				+
			
 
				+def reduce_model(ft_model, target_dim):
			
 
				+    """
			
 
				+    ft_model is an instance of `_FastText` class
			
 
				+    This function computes the PCA of the input and the output matrices
			
 
				+    and sets the reduced ones.
			
 
				+    """
			
 
				+    inp_reduced, proj = _reduce_matrix(
			
 
				+        ft_model.get_input_matrix(), target_dim, None)
			
 
				+    out_reduced, _ = _reduce_matrix(
			
 
				+        ft_model.get_output_matrix(), target_dim, proj)
			
 
				+
			
 
				+    ft_model.set_matrices(inp_reduced, out_reduced)
			
 
				+
			
 
				+    return ft_model
			
 
				+
			
 
				+
			
 
				+def _print_progress(downloaded_bytes, total_size):
			
 
				+    percent = float(downloaded_bytes) / total_size
			
 
				+    bar_size = 50
			
 
				+    bar = int(percent * bar_size)
			
 
				+    percent = round(percent * 100, 2)
			
 
				+    sys.stdout.write(" (%0.2f%%) [" % percent)
			
 
				+    sys.stdout.write("=" * bar)
			
 
				+    sys.stdout.write(">")
			
 
				+    sys.stdout.write(" " * (bar_size - bar))
			
 
				+    sys.stdout.write("]\r")
			
 
				+    sys.stdout.flush()
			
 
				+
			
 
				+    if downloaded_bytes >= total_size:
			
 
				+        sys.stdout.write('\n')
			
 
				+
			
 
				+
			
 
				+def _download_file(url, write_file_name, chunk_size=2**13):
			
 
				+    print("Downloading %s" % url)
			
 
				+    response = urlopen(url)
			
 
				+    if hasattr(response, 'getheader'):
			
 
				+        file_size = int(response.getheader('Content-Length').strip())
			
 
				+    else:
			
 
				+        file_size = int(response.info().getheader('Content-Length').strip())
			
 
				+    downloaded = 0
			
 
				+    download_file_name = write_file_name + ".part"
			
 
				+    with open(download_file_name, 'wb') as f:
			
 
				+        while True:
			
 
				+            chunk = response.read(chunk_size)
			
 
				+            downloaded += len(chunk)
			
 
				+            if not chunk:
			
 
				+                break
			
 
				+            f.write(chunk)
			
 
				+            _print_progress(downloaded, file_size)
			
 
				+
			
 
				+    os.rename(download_file_name, write_file_name)
			
 
				+
			
 
				+
			
 
				+def _download_gz_model(gz_file_name, if_exists):
			
 
				+    if os.path.isfile(gz_file_name):
			
 
				+        if if_exists == 'ignore':
			
 
				+            return True
			
 
				+        elif if_exists == 'strict':
			
 
				+            print("gzip File exists. Use --overwrite to download anyway.")
			
 
				+            return False
			
 
				+        elif if_exists == 'overwrite':
			
 
				+            pass
			
 
				+
			
 
				+    url = "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/%s" % gz_file_name
			
 
				+    _download_file(url, gz_file_name)
			
 
				+
			
 
				+    return True
			
 
				+
			
 
				+
			
 
				+def download_model(lang_id, if_exists='strict', dimension=None):
			
 
				+    """
			
 
				+        Download pre-trained common-crawl vectors from fastText's website
			
 
				+        https://fasttext.cc/docs/en/crawl-vectors.html
			
 
				+    """
			
 
				+    if lang_id not in valid_lang_ids:
			
 
				+        raise Exception("Invalid lang id. Please select among %s" %
			
 
				+                        repr(valid_lang_ids))
			
 
				+
			
 
				+    file_name = "cc.%s.300.bin" % lang_id
			
 
				+    gz_file_name = "%s.gz" % file_name
			
 
				+
			
 
				+    if os.path.isfile(file_name):
			
 
				+        if if_exists == 'ignore':
			
 
				+            return file_name
			
 
				+        elif if_exists == 'strict':
			
 
				+            print("File exists. Use --overwrite to download anyway.")
			
 
				+            return
			
 
				+        elif if_exists == 'overwrite':
			
 
				+            pass
			
 
				+
			
 
				+    if _download_gz_model(gz_file_name, if_exists):
			
 
				+        with gzip.open(gz_file_name, 'rb') as f:
			
 
				+            with open(file_name, 'wb') as f_out:
			
 
				+                shutil.copyfileobj(f, f_out)
			
 
				+
			
 
				+    return file_name
			
--- a/reduce_model.py
+++ b/reduce_model.py
@@ -0,0 +1,98 @@
 
				+#!/usr/bin/env python
			
 
				+# -*- coding: utf-8 -*-
			
 
				+#
			
 
				+# Copyright (c) 2017-present, Facebook, Inc.
			
 
				+# All rights reserved.
			
 
				+#
			
 
				+# This source code is licensed under the MIT license found in the
			
 
				+# LICENSE file in the root directory of this source tree.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+from __future__ import unicode_literals
			
 
				+
			
 
				+import argparse
			
 
				+import os
			
 
				+import re
			
 
				+import sys
			
 
				+
			
 
				+import fasttext
			
 
				+import fasttext.util
			
 
				+
			
 
				+args = None
			
 
				+
			
 
				+
			
 
				+def eprint(*args, **kwargs):
			
 
				+    print(*args, file=sys.stderr, **kwargs)
			
 
				+
			
 
				+
			
 
				+def guess_target_name(model_file, initial_dim, target_dim):
			
 
				+    """
			
 
				+    Given a model name with the convention a.<dim>.b, this function
			
 
				+    returns the model's name with `target_dim` value.
			
 
				+    For example model_file name `cc.en.300.bin` with initial dim 300 becomes
			
 
				+    `cc.en.100.bin` when the `target_dim` is 100.
			
 
				+    """
			
 
				+    prg = re.compile("(.*).%s.(.*)" % initial_dim)
			
 
				+    m = prg.match(model_file)
			
 
				+    if m:
			
 
				+        return "%s.%d.%s" % (m.group(1), target_dim, m.group(2))
			
 
				+
			
 
				+    sp_ext = os.path.splitext(model_file)
			
 
				+    return "%s.%d%s" % (sp_ext[0], target_dim, sp_ext[1])
			
 
				+
			
 
				+
			
 
				+def command_reduce(model_file, target_dim, if_exists):
			
 
				+    """
			
 
				+    Given a `model_file`, this function reduces its dimension to `target_dim`
			
 
				+    by applying a PCA.
			
 
				+    """
			
 
				+    eprint("Loading model")
			
 
				+
			
 
				+    ft = fasttext.load_model(model_file)
			
 
				+    initial_dim = ft.get_dimension()
			
 
				+    if target_dim >= initial_dim:
			
 
				+        raise Exception("Target dimension (%d) should be less than initial dimension (%d)." % (
			
 
				+            target_dim, initial_dim))
			
 
				+
			
 
				+    result_filename = guess_target_name(model_file, initial_dim, target_dim)
			
 
				+    if os.path.isfile(result_filename):
			
 
				+        if if_exists == 'overwrite':
			
 
				+            pass
			
 
				+        elif if_exists == 'strict':
			
 
				+            raise Exception(
			
 
				+                "File already exists. Use --overwrite to overwrite.")
			
 
				+        elif if_exists == 'ignore':
			
 
				+            return result_filename
			
 
				+
			
 
				+    eprint("Reducing matrix dimensions")
			
 
				+    fasttext.util.reduce_model(ft, target_dim)
			
 
				+
			
 
				+    eprint("Saving model")
			
 
				+    ft.save_model(result_filename)
			
 
				+    eprint("%s saved" % result_filename)
			
 
				+
			
 
				+    return result_filename
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    global args
			
 
				+
			
 
				+    parser = argparse.ArgumentParser(
			
 
				+        description='fastText helper tool to reduce model dimensions.')
			
 
				+    parser.add_argument("model", type=str,
			
 
				+                        help="model file to reduce. model.bin")
			
 
				+    parser.add_argument("dim", type=int,
			
 
				+                        help="targeted dimension of word vectors.")
			
 
				+    parser.add_argument("--overwrite", action="store_true",
			
 
				+                        help="overwrite if file exists.")
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    command_reduce(args.model, args.dim, if_exists=(
			
 
				+        'overwrite' if args.overwrite else 'strict'))
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/src/densematrix.cc
+++ b/src/densematrix.cc
@@ -24,6 +24,9 @@ DenseMatrix::DenseMatrix(int64_t m, int64_t n) : Matrix(m, n), data_(m * n) {}
 
				 DenseMatrix::DenseMatrix(DenseMatrix&& other) noexcept
			
 
				     : Matrix(other.m_, other.n_), data_(std::move(other.data_)) {}
			
 
				 
			
 
				+DenseMatrix::DenseMatrix(int64_t m, int64_t n, real* dataPtr)
			
 
				+    : Matrix(m, n), data_(dataPtr, dataPtr + (m * n)) {}
			
 
				+
			
 
				 void DenseMatrix::zero() {
			
 
				   std::fill(data_.begin(), data_.end(), 0.0);
			
 
				 }
			
--- a/src/densematrix.h
+++ b/src/densematrix.h
@@ -30,6 +30,7 @@ class DenseMatrix : public Matrix {
 
				  public:
			
 
				   DenseMatrix();
			
 
				   explicit DenseMatrix(int64_t, int64_t);
			
 
				+  explicit DenseMatrix(int64_t m, int64_t n, real* dataPtr);
			
 
				   DenseMatrix(const DenseMatrix&) = default;
			
 
				   DenseMatrix(DenseMatrix&&) noexcept;
			
 
				   DenseMatrix& operator=(const DenseMatrix&) = delete;
			
--- a/src/fasttext.cc
+++ b/src/fasttext.cc
@@ -70,6 +70,19 @@ std::shared_ptr<const DenseMatrix> FastText::getInputMatrix() const {
 
				   return std::dynamic_pointer_cast<DenseMatrix>(input_);
			
 
				 }
			
 
				 
			
 
				+void FastText::setMatrices(
			
 
				+    const std::shared_ptr<DenseMatrix>& inputMatrix,
			
 
				+    const std::shared_ptr<DenseMatrix>& outputMatrix) {
			
 
				+  assert(input_->size(1) == output_->size(1));
			
 
				+
			
 
				+  input_ = std::dynamic_pointer_cast<Matrix>(inputMatrix);
			
 
				+  output_ = std::dynamic_pointer_cast<Matrix>(outputMatrix);
			
 
				+  wordVectors_.reset();
			
 
				+  args_->dim = input_->size(1);
			
 
				+
			
 
				+  buildModel();
			
 
				+}
			
 
				+
			
 
				 std::shared_ptr<const DenseMatrix> FastText::getOutputMatrix() const {
			
 
				   if (quant_ && args_->qout) {
			
 
				     throw std::runtime_error("Can't export quantized matrix");
			
@@ -209,6 +222,12 @@ std::vector<int64_t> FastText::getTargetCounts() const {
 
				   }
			
 
				 }
			
 
				 
			
 
				+void FastText::buildModel() {
			
 
				+  auto loss = createLoss(output_);
			
 
				+  bool normalizeGradient = (args_->model == model_name::sup);
			
 
				+  model_ = std::make_shared<Model>(input_, output_, loss, normalizeGradient);
			
 
				+}
			
 
				+
			
 
				 void FastText::loadModel(std::istream& in) {
			
 
				   args_ = std::make_shared<Args>();
			
 
				   input_ = std::make_shared<DenseMatrix>();
			
@@ -241,9 +260,7 @@ void FastText::loadModel(std::istream& in) {
 
				   }
			
 
				   output_->load(in);
			
 
				 
			
 
				-  auto loss = createLoss(output_);
			
 
				-  bool normalizeGradient = (args_->model == model_name::sup);
			
 
				-  model_ = std::make_shared<Model>(input_, output_, loss, normalizeGradient);
			
 
				+  buildModel();
			
 
				 }
			
 
				 
			
 
				 void FastText::printInfo(real progress, real loss, std::ostream& log_stream) {
			
--- a/src/fasttext.h
+++ b/src/fasttext.h
@@ -72,6 +72,7 @@ class FastText {
 
				   std::vector<int32_t> selectEmbeddings(int32_t cutoff) const;
			
 
				   void precomputeWordVectors(DenseMatrix& wordVectors);
			
 
				   bool keepTraining(const int64_t ntokens) const;
			
 
				+  void buildModel();
			
 
				 
			
 
				  public:
			
 
				   FastText();
			
@@ -95,6 +96,10 @@ class FastText {
 
				 
			
 
				   std::shared_ptr<const DenseMatrix> getInputMatrix() const;
			
 
				 
			
 
				+  void setMatrices(
			
 
				+      const std::shared_ptr<DenseMatrix>& inputMatrix,
			
 
				+      const std::shared_ptr<DenseMatrix>& outputMatrix);
			
 
				+
			
 
				   std::shared_ptr<const DenseMatrix> getOutputMatrix() const;
			
 
				 
			
 
				   void saveVectors(const std::string& filename);