index.html 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. <!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta http-equiv="X-UA-Compatible" content="IE=edge"/><title>Blog · fastText</title><meta name="viewport" content="width=device-width"/><meta name="generator" content="Docusaurus"/><meta name="description" content="Library for efficient text classification and representation learning"/><meta name="docsearch:language" content="en"/><meta property="og:title" content="Blog · fastText"/><meta property="og:type" content="website"/><meta property="og:url" content="https://fasttext.cc/index.html"/><meta property="og:description" content="Library for efficient text classification and representation learning"/><meta property="og:image" content="https://fasttext.cc/img/ogimage.png"/><meta name="twitter:card" content="summary"/><link rel="shortcut icon" href="/img/fasttext-icon-bg-web.png"/><link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css"/><link rel="alternate" type="application/atom+xml" href="https://fasttext.cc/blog/atom.xml" title="fastText Blog ATOM Feed"/><link rel="alternate" type="application/rss+xml" href="https://fasttext.cc/blog/feed.xml" title="fastText Blog RSS Feed"/><script>
  2. (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  3. (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  4. m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  5. })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
  6. ga('create', 'UA-44373548-30', 'auto');
  7. ga('send', 'pageview');
  8. </script><script src="/js/scrollSpy.js"></script><link rel="stylesheet" href="/css/main.css"/><script src="/js/codetabs.js"></script></head><body class="blog"><div class="fixedHeaderContainer"><div class="headerWrapper wrapper"><header><a href="/"><img class="logo" src="/img/fasttext-icon-white-web.png" alt="fastText"/></a><div class="navigationWrapper navigationSlider"><nav class="slidingNav"><ul class="nav-site nav-site-internal"><li class=""><a href="/docs/en/support.html" target="_self">Docs</a></li><li class=""><a href="/docs/en/english-vectors.html" target="_self">Resources</a></li><li class="siteNavGroupActive siteNavItemActive"><a href="/blog/" target="_self">Blog</a></li><li class=""><a href="https://github.com/facebookresearch/fastText/" target="_blank">GitHub</a></li></ul></nav></div></header></div></div><div class="navPusher"><div class="docMainWrapper wrapper"><div class="container docsNavContainer" id="docsNav"><nav class="toc"><div class="toggleNav"><section class="navWrapper wrapper"><div class="navBreadcrumb wrapper"><div class="navToggle" id="navToggler"><div class="hamburger-menu"><div class="line1"></div><div class="line2"></div><div class="line3"></div></div></div><h2><i>›</i><span>Recent Posts</span></h2></div><div class="navGroups"><div class="navGroup"><h3 class="navGroupCategoryTitle">Recent Posts</h3><ul class=""><li class="navListItem"><a class="navItem" href="/blog/2019/06/25/blog-post.html">New release of python module</a></li><li class="navListItem"><a class="navItem" href="/blog/2017/10/02/blog-post.html">Language identification</a></li><li class="navListItem"><a class="navItem" href="/blog/2017/05/02/blog-post.html">fastText on mobile</a></li><li class="navListItem"><a class="navItem" href="/blog/2016/08/18/blog-post.html">Releasing fastText</a></li></ul></div></div></section></div><script>
  9. var coll = document.getElementsByClassName('collapsible');
  10. var checkActiveCategory = true;
  11. for (var i = 0; i < coll.length; i++) {
  12. var links = coll[i].nextElementSibling.getElementsByTagName('*');
  13. if (checkActiveCategory){
  14. for (var j = 0; j < links.length; j++) {
  15. if (links[j].classList.contains('navListItemActive')){
  16. coll[i].nextElementSibling.classList.toggle('hide');
  17. coll[i].childNodes[1].classList.toggle('rotate');
  18. checkActiveCategory = false;
  19. break;
  20. }
  21. }
  22. }
  23. coll[i].addEventListener('click', function() {
  24. var arrow = this.childNodes[1];
  25. arrow.classList.toggle('rotate');
  26. var content = this.nextElementSibling;
  27. content.classList.toggle('hide');
  28. });
  29. }
  30. document.addEventListener('DOMContentLoaded', function() {
  31. createToggler('#navToggler', '#docsNav', 'docsSliderActive');
  32. createToggler('#tocToggler', 'body', 'tocActive');
  33. var headings = document.querySelector('.toc-headings');
  34. headings && headings.addEventListener('click', function(event) {
  35. var el = event.target;
  36. while(el !== headings){
  37. if (el.tagName === 'A') {
  38. document.body.classList.remove('tocActive');
  39. break;
  40. } else{
  41. el = el.parentNode;
  42. }
  43. }
  44. }, false);
  45. function createToggler(togglerSelector, targetSelector, className) {
  46. var toggler = document.querySelector(togglerSelector);
  47. var target = document.querySelector(targetSelector);
  48. if (!toggler) {
  49. return;
  50. }
  51. toggler.onclick = function(event) {
  52. event.preventDefault();
  53. target.classList.toggle(className);
  54. };
  55. }
  56. });
  57. </script></nav></div><div class="container mainContainer postContainer blogContainer"><div class="wrapper"><div class="posts"><div class="post"><header class="postHeader"><h1 class="postHeaderTitle"><a href="/blog/2019/06/25/blog-post.html">New release of python module</a></h1><p class="post-meta">June 25, 2019</p><div class="authorBlock"><p class="post-authorName"><a href="https://research.fb.com/people/celebi-onur/" target="_blank" rel="noreferrer noopener">Onur Çelebi</a></p><div class="authorPhoto"><a href="https://research.fb.com/people/celebi-onur/" target="_blank" rel="noreferrer noopener"><img src="https://graph.facebook.com/663146146/picture/?height=200&amp;width=200" alt="Onur Çelebi"/></a></div></div></header><article class="post-content"><div><span><p>Today, we are happy to release a new version of the fastText python library. The main goal of this release is to merge two existing python modules: the official <code>fastText</code> module which was available on our github repository and the unofficial <code>fasttext</code> module which was available on pypi.org. We hope that this new version will address the confusion due to the previous existence of two similar, but different, python modules.</p>
  58. <p>The new version of our library is now available on <a href="https://pypi.org/project/fasttext/">pypi.org</a> as well as on our github repository, and you can find <a href="/docs/en/python-module.html">an overview of its API here</a>.</p>
  59. <h2><a class="anchor" aria-hidden="true" id="fasttext-vs-fasttext-what-happened"></a><a href="#fasttext-vs-fasttext-what-happened" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>fastText vs fasttext: what happened?</h2>
  60. <p>There was an ongoing confusion among our user community about the existence of both <code>fastText</code> and <code>fasttext</code> modules.</p>
  61. <p>When fastText was first released in 2016, it was a command line only utility. Very soon, people wanted to use fastText's capabilities from python without having to call a binary for each action. In August 2016, <a href="https://github.com/pyk">Bayu Aldi Yansyah</a>, a developer outside of Facebook, published a python wrapper of fastText. His work was very helpful to a lot of people in our community and he published his unofficial python library on pypi with the pretty straighforward module name <code>fasttext</code> (note the lowercase <code>t</code>).</p>
  62. <p>Later, our team began to work on an official python binding of fastText, that was published under the same github repository as the C++ source code. However, the module name for this official library was <code>fastText</code> (note the uppercase <code>T</code>).</p>
  63. <p>Last year, Bayu Aldi Yansyah gave us admin access to the pypi project so that we could merge the two libraries.</p>
  64. <p>To sum up, we ended up with two libraries that had:</p>
  65. <ul>
  66. <li>almost the same name</li>
  67. <li>different APIs</li>
  68. <li>different versions</li>
  69. <li>different ways to install</li>
  70. </ul>
  71. <p>That was a very confusing situation for the community.</p>
  72. <h2><a class="anchor" aria-hidden="true" id="what-actions-did-we-take"></a><a href="#what-actions-did-we-take" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>What actions did we take?</h2>
  73. <p>Today we are merging the two python libraries. We decided to keep the official API and top level functions such as <code>train_unsupervised</code> and <code>train_supervised</code> as well as returning numpy objects. We remove <code>cbow</code>, <code>skipgram</code> and <code>supervised</code> functions from the unofficial API. However, <a href="#wordvectormodel-and-supervisedmodel-objects">we bring nice ideas</a> from the unofficial API to the official one. In particular, we liked the pythonic approach of <code>WordVectorModel</code>. This new python module is named <code>fasttext</code>, and is available on both <a href="https://pypi.org/project/fasttext/">pypi</a> and our <a href="https://github.com/facebookresearch/fastText">github</a> repository.</p>
  74. <p>From now, we will refer to the tool as &quot;fastText&quot;, however the name of the python module is <code>fasttext</code>.</p>
  75. <h2><a class="anchor" aria-hidden="true" id="what-is-the-right-way-to-do-now"></a><a href="#what-is-the-right-way-to-do-now" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>What is the right way to do now?</h2>
  76. <p>Before, you would either use <code>fastText</code> (uppercase <code>T</code>):</p>
  77. <pre><code class="hljs css language-python"><span class="hljs-keyword">import</span> fastText
  78. <span class="hljs-comment"># and call:</span>
  79. fastText.train_supervised
  80. fastText.train_unsupervised
  81. </code></pre>
  82. <p>or use <code>fasttext</code> (lowercase <code>t</code>):</p>
  83. <pre><code class="hljs css language-python"><span class="hljs-keyword">import</span> fasttext
  84. <span class="hljs-comment"># and call:</span>
  85. fasttext.cbow
  86. fasttext.skipgram
  87. fasttext.supervised
  88. </code></pre>
  89. <p>Now, the right way to do is to
  90. <code>import fasttext</code> (lowercase <code>t</code>)
  91. and use</p>
  92. <pre><code class="hljs css language-python"><span class="hljs-keyword">import</span> fasttext
  93. <span class="hljs-comment"># and call:</span>
  94. fasttext.train_supervised
  95. fasttext.train_unsupervised
  96. </code></pre>
  97. <p>We are keeping the lowercase <code>fasttext</code> module name, while we keep the <code>fastText</code> API.</p>
  98. <p>This is because:</p>
  99. <ul>
  100. <li>the standard way to name python modules is all lowercases</li>
  101. <li>the API from <code>fastText</code> is exposing numpy arrays, which is widely used by the machine learning community.</li>
  102. </ul>
  103. <p>You can find a more comprehensive overview of our python API <a href="/docs/en/python-module.html">here</a>.</p>
  104. <h2><a class="anchor" aria-hidden="true" id="should-i-modify-my-existing-code"></a><a href="#should-i-modify-my-existing-code" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Should I modify my existing code?</h2>
  105. <p>Depending on the version of the python module you were using, you might need to do some little modifications on your existing code.</p>
  106. <h3><a class="anchor" aria-hidden="true" id="1-you-were-using-the-official-fasttext-module"></a><a href="#1-you-were-using-the-official-fasttext-module" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>1) You were using the official <code>fastText</code> module:</h3>
  107. <p>You don't have to do much. Just replace your <code>import fastText</code> lines by <code>import fasttext</code> and everything should work as usual.</p>
  108. <h3><a class="anchor" aria-hidden="true" id="2-you-were-using-the-unofficial-fasttext-module"></a><a href="#2-you-were-using-the-unofficial-fasttext-module" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>2) You were using the unofficial <code>fasttext</code> module:</h3>
  109. <p>If you were using the functions <code>cbow</code>, <code>skipgram</code>, <code>supervised</code> and/or <code>WordVectorModel</code>, <code>SupervisedModel</code> objects, you were using the unofficial <code>fasttext</code> module.</p>
  110. <p>Updating your code should be pretty straightforward, but it still implies some little changes.</p>
  111. <h4><a class="anchor" aria-hidden="true" id="cbow-function-use-train_unsupervised-instead"></a><a href="#cbow-function-use-train_unsupervised-instead" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a><code>cbow</code> function: use <code>train_unsupervised</code> instead.</h4>
  112. <p>For example, replace:</p>
  113. <pre><code class="hljs">fasttext.cbow(<span class="hljs-string">"train.txt"</span>, <span class="hljs-string">"model_file"</span>, <span class="hljs-attribute">lr</span>=0.05, <span class="hljs-attribute">dim</span>=100, <span class="hljs-attribute">ws</span>=5, <span class="hljs-attribute">epoch</span>=5)
  114. </code></pre>
  115. <p>with</p>
  116. <pre><code class="hljs">model = fasttext.train_unsupervised(<span class="hljs-string">"train.txt"</span>, <span class="hljs-attribute">model</span>=<span class="hljs-string">'cbow'</span>, <span class="hljs-attribute">lr</span>=0.05, <span class="hljs-attribute">dim</span>=100, <span class="hljs-attribute">ws</span>=5, <span class="hljs-attribute">epoch</span>=5)
  117. model.save_model(<span class="hljs-string">"model_file.bin"</span>)
  118. </code></pre>
  119. <h4><a class="anchor" aria-hidden="true" id="skipgram-function-use-train_unsupervised-instead"></a><a href="#skipgram-function-use-train_unsupervised-instead" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a><code>skipgram</code> function: use <code>train_unsupervised</code> instead.</h4>
  120. <p>For example, replace:</p>
  121. <pre><code class="hljs">fasttext.skipgram(<span class="hljs-string">"train.txt"</span>, <span class="hljs-string">"model_file"</span>, <span class="hljs-attribute">lr</span>=0.05, <span class="hljs-attribute">dim</span>=100, <span class="hljs-attribute">ws</span>=5, <span class="hljs-attribute">epoch</span>=5)
  122. </code></pre>
  123. <p>with</p>
  124. <pre><code class="hljs">model = fasttext.train_unsupervised(<span class="hljs-string">"train.txt"</span>, <span class="hljs-attribute">model</span>=<span class="hljs-string">'skipgram'</span>, <span class="hljs-attribute">lr</span>=0.05, <span class="hljs-attribute">dim</span>=100, <span class="hljs-attribute">ws</span>=5, <span class="hljs-attribute">epoch</span>=5)
  125. model.save_model(<span class="hljs-string">"model_file.bin"</span>)
  126. </code></pre>
  127. <h4><a class="anchor" aria-hidden="true" id="supervised-function-use-train_supervised-instead"></a><a href="#supervised-function-use-train_supervised-instead" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a><code>supervised</code> function: use <code>train_supervised</code> instead</h4>
  128. <p>For example, replace:</p>
  129. <pre><code class="hljs">fasttext.supervised(<span class="hljs-string">"train.txt"</span>, <span class="hljs-string">"model_file"</span>, <span class="hljs-attribute">lr</span>=0.1, <span class="hljs-attribute">dim</span>=100, <span class="hljs-attribute">epoch</span>=5, <span class="hljs-attribute">word_ngrams</span>=2, <span class="hljs-attribute">loss</span>=<span class="hljs-string">'softmax'</span>)
  130. </code></pre>
  131. <p>with</p>
  132. <pre><code class="hljs">model = fasttext.train_supervised(<span class="hljs-string">"train.txt"</span>, <span class="hljs-attribute">lr</span>=0.1, <span class="hljs-attribute">dim</span>=100, <span class="hljs-attribute">epoch</span>=5, , <span class="hljs-attribute">word_ngrams</span>=2, <span class="hljs-attribute">loss</span>=<span class="hljs-string">'softmax'</span>)
  133. model.save_model(<span class="hljs-string">"model_file.bin"</span>)
  134. </code></pre>
  135. <h4><a class="anchor" aria-hidden="true" id="parameters"></a><a href="#parameters" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Parameters</h4>
  136. <ul>
  137. <li>As you can see, you can use either <code>word_ngrams</code> or <code>wordNgrams</code> as parameter name. Because the parameter names from the unofficial API are mapped to the official ones: <code>min_count</code> to <code>minCount</code>, <code>word_ngrams</code> to <code>wordNgrams</code>, <code>lr_update_rate</code> to <code>lrUpdateRate</code>, <code>label_prefix</code> to <code>label</code> and <code>pretrained_vectors</code> to <code>pretrainedVectors</code>.</li>
  138. <li><code>silent</code> parameter is not supported. Use <code>verbose</code> parameter instead.</li>
  139. <li><code>encoding</code> parameter is not supported, every input should be encoded in <code>utf-8</code>.</li>
  140. </ul>
  141. <h3><a class="anchor" aria-hidden="true" id="wordvectormodel-and-supervisedmodel-objects"></a><a href="#wordvectormodel-and-supervisedmodel-objects" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a><code>WordVectorModel</code> and <code>SupervisedModel</code> objects</h3>
  142. <p>Instead of <code>WordVectorModel</code> and <code>SupervisedModel</code> objects, we return a model object that mimics some nice ideas from the unofficial API.</p>
  143. <pre><code class="hljs css language-python">model = fasttext.train_unsupervised(<span class="hljs-string">"train.txt"</span>, model=<span class="hljs-string">'skipgram'</span>)
  144. print(model.words) <span class="hljs-comment"># list of words in dictionary</span>
  145. print(model[<span class="hljs-string">'king'</span>]) <span class="hljs-comment"># get the vector of the word 'king'</span>
  146. print(<span class="hljs-string">'king'</span> <span class="hljs-keyword">in</span> model) <span class="hljs-comment"># check if a word is in dictionary</span>
  147. </code></pre>
  148. <pre><code class="hljs css language-python">model = fasttext.train_supervised(<span class="hljs-string">"train.txt"</span>)
  149. print(model.words) <span class="hljs-comment"># list of words in dictionary</span>
  150. print(model.labels) <span class="hljs-comment"># list of labels</span>
  151. </code></pre>
  152. <p>The model object also contains the arguments of the training:</p>
  153. <pre><code class="hljs css language-python">print(model.epoch)
  154. print(model.loss)
  155. print(model.wordNgrams)
  156. </code></pre>
  157. <h2><a class="anchor" aria-hidden="true" id="thank-you"></a><a href="#thank-you" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Thank you!</h2>
  158. <p>We want to thank our incredible community. We truly appreciate your feedback, a big thank you to everyone reporting issues and contributing to the project. In particular we want to express how grateful we are to <a href="https://github.com/pyk">Bayu Aldi Yansyah</a> who did a great job with his python library and for giving us the ownership of the pypi <code>fasttext</code> project.</p>
  159. </span></div></article></div><div class="post"><header class="postHeader"><h1 class="postHeaderTitle"><a href="/blog/2017/10/02/blog-post.html">Language identification</a></h1><p class="post-meta">October 2, 2017</p><div class="authorBlock"><p class="post-authorName"><a href="https://research.fb.com/people/grave-edouard/" target="_blank" rel="noreferrer noopener">Edouard Grave</a></p><div class="authorPhoto"><a href="https://research.fb.com/people/grave-edouard/" target="_blank" rel="noreferrer noopener"><img src="https://graph.facebook.com/534178442/picture/?height=200&amp;width=200" alt="Edouard Grave"/></a></div></div></header><article class="post-content"><div><span><h2><a class="anchor" aria-hidden="true" id="fast-and-accurate-language-identification-using-fasttext"></a><a href="#fast-and-accurate-language-identification-using-fasttext" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Fast and accurate language identification using fastText</h2>
  160. <p>We are excited to announce that we are publishing a fast and accurate tool for text-based language identification. It can recognize more than 170 languages, takes less than 1MB of memory and can classify thousands of documents per second. It is based on fastText library and is released <a href="https://fasttext.cc/docs/en/language-identification.html">here</a> as open source, free to use by everyone. We are releasing several versions of the model, each optimized for different memory usage, and compared them to the popular tool <a href="https://github.com/saffsd/langid.py">langid.py</a>.</p>
  161. </span></div><div class="read-more"><a class="button" href="/blog/2017/10/02/blog-post.html">Read More</a></div></article></div><div class="post"><header class="postHeader"><h1 class="postHeaderTitle"><a href="/blog/2017/05/02/blog-post.html">fastText on mobile</a></h1><p class="post-meta">May 2, 2017</p><div class="authorBlock"><p class="post-authorName"><a href="https://research.fb.com/people/joulin-armand/" target="_blank" rel="noreferrer noopener">Armand Joulin</a></p><div class="authorPhoto"><a href="https://research.fb.com/people/joulin-armand/" target="_blank" rel="noreferrer noopener"><img src="https://graph.facebook.com/696297201/picture/?height=200&amp;width=200" alt="Armand Joulin"/></a></div></div></header><article class="post-content"><div><span><p>Today, the Facebook AI Research (FAIR) team released pre-trained vectors in 294 languages, accompanied by two quick-start tutorials, to increase fastText’s accessibility to the large community of students, software developers, and researchers interested in machine learning. fastText’s models now fit on smartphones and small computers like Raspberry Pi devices thanks to a new functionality that reduces memory usage.</p>
  162. <p>First open-sourced last summer, <a href="https://github.com/facebookresearch/fastText">fastText</a> was designed to be accessible to anyone with generic hardware like notebooks and X86 cloud instances, or almost any platform with enough memory. Smartphone and small computer support extend fastText’s accessibility to an even larger community and a greater range of applications.</p>
  163. </span></div><div class="read-more"><a class="button" href="/blog/2017/05/02/blog-post.html">Read More</a></div></article></div><div class="post"><header class="postHeader"><h1 class="postHeaderTitle"><a href="/blog/2016/08/18/blog-post.html">Releasing fastText</a></h1><p class="post-meta">August 18, 2016</p><div class="authorBlock"><p class="post-authorName"><a href="https://research.fb.com/people/grave-edouard/" target="_blank" rel="noreferrer noopener">Edouard Grave</a></p><div class="authorPhoto"><a href="https://research.fb.com/people/grave-edouard/" target="_blank" rel="noreferrer noopener"><img src="https://graph.facebook.com/534178442/picture/?height=200&amp;width=200" alt="Edouard Grave"/></a></div></div></header><article class="post-content"><div><span><h2><a class="anchor" aria-hidden="true" id="faster-better-text-classification"></a><a href="#faster-better-text-classification" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Faster, better text classification!</h2>
  164. <p>Understanding the meaning of words that roll off your tongue as you talk, or your fingertips as you tap out posts is one of the biggest technical challenges facing artificial intelligence researchers. But it is an essential need. Automatic text processing forms a key part of the day-to-day interaction with your computer; it’s a critical component of everything from web search and content ranking to spam filtering, and when it works well, it’s completely invisible to you. With the growing amount of online data, there is a need for more flexible tools to better understand the content of very large datasets, in order to provide more accurate classification results.</p>
  165. <p>To address this need, the <a href="https://research.fb.com/category/facebook-ai-research-fair/">Facebook AI Research (FAIR) lab</a> is open-sourcing <a href="https://github.com/facebookresearch/fastText">fastText</a>, a library designed to help build scalable solutions for text representation and classification. Our ongoing commitment to collaboration and sharing with the community extends beyond just delivering code. We know it’s important to share our learnings to advance the field, so have also <a href="http://arxiv.org/abs/1607.04606">published</a> <a href="http://arxiv.org/abs/1607.01759">our research</a> relating to fastText.</p>
  166. <p>FastText combines some of the most successful concepts introduced by the natural language processing and machine learning communities in the last few decades. These include representing sentences with bag of words and bag of n-grams, as well as using subword information, and sharing information across classes through a hidden representation. We also employ a hierachical softmax that takes advantage of the unbalanced distribution of the classes to speed up computation. These different concepts are being used for two different tasks: efficient text classification and learning word vector representations.</p>
  167. </span></div><div class="read-more"><a class="button" href="/blog/2016/08/18/blog-post.html">Read More</a></div></article></div><div class="docs-prevnext"></div></div></div></div></div><footer class="nav-footer" id="footer"><section class="sitemap"><a href="/" class="nav-home"><img src="/img/fasttext-icon-white-web.png" alt="fastText"/></a><div><h5>Support</h5><a href="/docs/en/support.html">Getting Started</a><a href="/docs/en/supervised-tutorial.html">Tutorials</a><a href="/docs/en/faqs.html">FAQs</a><a href="/docs/en/api.html">API</a></div><div><h5>Community</h5><a href="https://www.facebook.com/groups/1174547215919768/" target="_blank">Facebook Group</a><a href="http://stackoverflow.com/questions/tagged/fasttext" target="_blank">Stack Overflow</a><a href="https://groups.google.com/forum/#!forum/fasttext-library" target="_blank">Google Group</a></div><div><h5>More</h5><a href="/blog">Blog</a><a href="https://github.com/facebookresearch/fastText" target="_blank">GitHub</a><a class="github-button" href="https://github.com/facebookresearch/fastText/" data-icon="octicon-star" data-count-href="/fastText/stargazers" data-count-api="/repos/fastText#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star this project on GitHub">Star</a></div></section><a href="https://code.facebook.com/projects/" target="_blank" class="fbOpenSource"><img src="/img/oss_logo.png" alt="Facebook Open Source" width="170" height="45"/></a><section class="copyright">Copyright © 2019 Facebook Inc.</section></footer></div></body></html>