blog-post.html 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. <!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta http-equiv="X-UA-Compatible" content="IE=edge"/><title>Language identification · fastText</title><meta name="viewport" content="width=device-width"/><meta name="generator" content="Docusaurus"/><meta name="description" content="## Fast and accurate language identification using fastText"/><meta name="docsearch:language" content="en"/><meta property="og:title" content="Language identification · fastText"/><meta property="og:type" content="website"/><meta property="og:url" content="https://fasttext.cc/blog/2017/10/02/blog-post.html"/><meta property="og:description" content="## Fast and accurate language identification using fastText"/><meta property="og:image" content="https://fasttext.cc/img/ogimage.png"/><meta name="twitter:card" content="summary"/><link rel="shortcut icon" href="/img/fasttext-icon-bg-web.png"/><link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css"/><link rel="alternate" type="application/atom+xml" href="https://fasttext.cc/blog/atom.xml" title="fastText Blog ATOM Feed"/><link rel="alternate" type="application/rss+xml" href="https://fasttext.cc/blog/feed.xml" title="fastText Blog RSS Feed"/><script>
  2. (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  3. (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  4. m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  5. })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
  6. ga('create', 'UA-44373548-30', 'auto');
  7. ga('send', 'pageview');
  8. </script><script type="text/javascript" src="/tabber.js"></script><script src="/js/scrollSpy.js"></script><link rel="stylesheet" href="/css/main.css"/><script src="/js/codetabs.js"></script></head><body class="sideNavVisible"><div class="fixedHeaderContainer"><div class="headerWrapper wrapper"><header><a href="/"><img class="logo" src="/img/fasttext-icon-white-web.png" alt="fastText"/></a><div class="navigationWrapper navigationSlider"><nav class="slidingNav"><ul class="nav-site nav-site-internal"><li class=""><a href="/docs/en/support.html" target="_self">Docs</a></li><li class=""><a href="/docs/en/english-vectors.html" target="_self">Resources</a></li><li class="siteNavGroupActive"><a href="/blog/" target="_self">Blog</a></li><li class=""><a href="https://github.com/facebookresearch/fastText/" target="_blank">GitHub</a></li></ul></nav></div></header></div></div><div class="navPusher"><div class="docMainWrapper wrapper"><div class="docsNavContainer" id="docsNav"><nav class="toc"><div class="toggleNav"><section class="navWrapper wrapper"><div class="navBreadcrumb wrapper"><div class="navToggle" id="navToggler"><div class="hamburger-menu"><div class="line1"></div><div class="line2"></div><div class="line3"></div></div></div><h2><i>›</i><span>Recent Posts</span></h2></div><div class="navGroups"><div class="navGroup"><h3 class="navGroupCategoryTitle">Recent Posts</h3><ul class=""><li class="navListItem"><a class="navItem" href="/blog/2019/06/25/blog-post.html">New release of python module</a></li><li class="navListItem navListItemActive"><a class="navItem" href="/blog/2017/10/02/blog-post.html">Language identification</a></li><li class="navListItem"><a class="navItem" href="/blog/2017/05/02/blog-post.html">fastText on mobile</a></li><li class="navListItem"><a class="navItem" href="/blog/2016/08/18/blog-post.html">Releasing fastText</a></li></ul></div></div></section></div><script>
  9. var coll = document.getElementsByClassName('collapsible');
  10. var checkActiveCategory = true;
  11. for (var i = 0; i < coll.length; i++) {
  12. var links = coll[i].nextElementSibling.getElementsByTagName('*');
  13. if (checkActiveCategory){
  14. for (var j = 0; j < links.length; j++) {
  15. if (links[j].classList.contains('navListItemActive')){
  16. coll[i].nextElementSibling.classList.toggle('hide');
  17. coll[i].childNodes[1].classList.toggle('rotate');
  18. checkActiveCategory = false;
  19. break;
  20. }
  21. }
  22. }
  23. coll[i].addEventListener('click', function() {
  24. var arrow = this.childNodes[1];
  25. arrow.classList.toggle('rotate');
  26. var content = this.nextElementSibling;
  27. content.classList.toggle('hide');
  28. });
  29. }
  30. document.addEventListener('DOMContentLoaded', function() {
  31. createToggler('#navToggler', '#docsNav', 'docsSliderActive');
  32. createToggler('#tocToggler', 'body', 'tocActive');
  33. var headings = document.querySelector('.toc-headings');
  34. headings && headings.addEventListener('click', function(event) {
  35. var el = event.target;
  36. while(el !== headings){
  37. if (el.tagName === 'A') {
  38. document.body.classList.remove('tocActive');
  39. break;
  40. } else{
  41. el = el.parentNode;
  42. }
  43. }
  44. }, false);
  45. function createToggler(togglerSelector, targetSelector, className) {
  46. var toggler = document.querySelector(togglerSelector);
  47. var target = document.querySelector(targetSelector);
  48. if (!toggler) {
  49. return;
  50. }
  51. toggler.onclick = function(event) {
  52. event.preventDefault();
  53. target.classList.toggle(className);
  54. };
  55. }
  56. });
  57. </script></nav></div><div class="container mainContainer postContainer blogContainer"><div class="wrapper"><div class="lonePost"><div class="post"><header class="postHeader"><h1 class="postHeaderTitle"><a href="/blog/2017/10/02/blog-post.html">Language identification</a></h1><p class="post-meta">October 2, 2017</p><div class="authorBlock"><p class="post-authorName"><a href="https://research.fb.com/people/grave-edouard/" target="_blank" rel="noreferrer noopener">Edouard Grave</a></p><div class="authorPhoto"><a href="https://research.fb.com/people/grave-edouard/" target="_blank" rel="noreferrer noopener"><img src="https://graph.facebook.com/534178442/picture/?height=200&amp;width=200" alt="Edouard Grave"/></a></div></div></header><div><span><h2><a class="anchor" aria-hidden="true" id="fast-and-accurate-language-identification-using-fasttext"></a><a href="#fast-and-accurate-language-identification-using-fasttext" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Fast and accurate language identification using fastText</h2>
  58. <p>We are excited to announce that we are publishing a fast and accurate tool for text-based language identification. It can recognize more than 170 languages, takes less than 1MB of memory and can classify thousands of documents per second. It is based on fastText library and is released <a href="https://fasttext.cc/docs/en/language-identification.html">here</a> as open source, free to use by everyone. We are releasing several versions of the model, each optimized for different memory usage, and compared them to the popular tool <a href="https://github.com/saffsd/langid.py">langid.py</a>.</p>
  59. <!--truncate-->
  60. <p><img src="../../../../img/blog/2017-10-02-blog-post-img1.png" alt="Evaluation of our models"></p>
  61. <p>Our tool uses various features offered by the fastText library, such as subwords or model compression. In the remainder of this blogpost, we will explain how these work, and how to use them to build a fast and small language detector.</p>
  62. <h2><a class="anchor" aria-hidden="true" id="training-your-own-language-detector"></a><a href="#training-your-own-language-detector" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Training your own language detector</h2>
  63. <p>Building a fast and small language detector with fastText can be done with a few command lines, as we will show below. First, we need a dataset to train our model. Here, we propose to use sentences from the Tatoeba website, which can be downloaded from <a href="https://tatoeba.org/eng/downloads">https://tatoeba.org/eng/downloads</a>. Note that for the sake of simplicity, we use a small quantity of data for this blogpost . If you want to train a state-of-the-art model comparable with our pre-trained model, you will need to use a larger quantity of data.</p>
  64. <h3><a class="anchor" aria-hidden="true" id="training-data"></a><a href="#training-data" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Training data</h3>
  65. <p>First, let's download the training data:</p>
  66. <pre><code class="hljs css language-bash">&gt;&gt; wget http://downloads.tatoeba.org/exports/sentences.tar.bz2
  67. &gt;&gt; bunzip2 sentences.tar.bz2
  68. &gt;&gt; tar xvf sentences.tar
  69. </code></pre>
  70. <p>Then, we need to put our training data into fastText format, which is easily done using:</p>
  71. <pre><code class="hljs css language-bash">&gt;&gt; awk -F<span class="hljs-string">"\t"</span> <span class="hljs-string">'{print"__label__"$2" "$3}'</span> &lt; sentences.csv | shuf &gt; all.txt
  72. </code></pre>
  73. <p>We can then split our training data into training and validation sets:</p>
  74. <pre><code class="hljs css language-bash">&gt;&gt; head -n 10000 all.txt &gt; valid.txt
  75. &gt;&gt; tail -n +10001 all.txt &gt; train.txt
  76. </code></pre>
  77. <h3><a class="anchor" aria-hidden="true" id="first-model"></a><a href="#first-model" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>First model</h3>
  78. <p>We can now train our first model</p>
  79. <pre><code class="hljs css language-bash">&gt;&gt; ./fasttext supervised -input train.txt -output langdetect -dim 16
  80. </code></pre>
  81. <p>and test it on the held out data:</p>
  82. <pre><code class="hljs css language-bash">&gt;&gt; ./fasttext <span class="hljs-built_in">test</span> langdetect.bin valid.txt
  83. </code></pre>
  84. <p>This model should have an accuracy around 96.5%. Let's see if we can do better, by changing the default parameters.</p>
  85. <h3><a class="anchor" aria-hidden="true" id="using-subword-features"></a><a href="#using-subword-features" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Using subword features</h3>
  86. <p>The first way to improve our baseline model is to use subword features, which enhance the classifier by taking into account the structure of words. It uses a simple, yet effective way of incorporating such information: each word is represented by the set of all character ngrams of a given length appearing in that word. As an example, when using subwords of length 3, the word skiing is represented by</p>
  87. <pre><code class="hljs">{ skiing, ski, kii, iin, ing }
  88. </code></pre>
  89. <p>A key advantage of these features is that out-of-vocabulary words, such as misspelled words, can still be represented at test time by their subwords representations. This make text classifiers much more robust, especially for problems with small training sets, or for morphologically rich languages. Users can enable these features by simply specifying the value of the minimum and maximum character ngram size with the command line options -minn and -maxn:</p>
  90. <pre><code class="hljs css language-bash">&gt;&gt; ./fasttext supervised -input train.txt -output langdetect -dim 16 -minn 2 -maxn 4
  91. </code></pre>
  92. <p>In that case, fastText now uses all the character ngrams of length 2, 3 and 4. The accuracy of the classifier should improve, and be above 98.5%. We can also make the training and testing faster, by using the hierarchical softmax:</p>
  93. <pre><code class="hljs css language-bash">&gt;&gt; ./fasttext supervised -input train.txt -output langdetect -dim 16 -minn 2 -maxn 4 -loss hs
  94. </code></pre>
  95. <h3><a class="anchor" aria-hidden="true" id="model-compression"></a><a href="#model-compression" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Model compression</h3>
  96. <p>Finally, we can make the size of the model file much smaller, by using model compression:</p>
  97. <pre><code class="hljs css language-bash">&gt;&gt; ./fasttext quantize -input train.txt -output langdetect -qnorm -cutoff 50000 -retrain
  98. </code></pre>
  99. <p>After running this command line, you should get a new model, langdetect.ftz, with a file size smaller than 1MB (instead of 350MB for the original model).</p>
  100. <p>How does model quantization work? It is quite simple, and relies on two operations: weight quantization and feature selection. We now briefly describe these two operations in detail.</p>
  101. <p><strong>Weight quantization.</strong> The first operation is to compress the weights of the models using a technique called vector quantization. Quantization is the process of mapping values from a large set (e.g. floating point numbers) to a smaller set (e.g. bytes). Here, we use a variant which is well suited to compress vectors, instead of scalar values. The algorithm, called product quantization, works as follow. First, each vector is split into smaller vectors, for example of dimension 2. Then, we run the k-means algorithm on these sub-vectors, and represent each sub-vector by the closest centroid obtained with k-means. Therefore, each 2-dimension vector is now represented by 1 byte (to store the centroid), instead of 8 bytes (to store the 2 floats), therefore achieving a compression rate of 8. If we instead split the vectors into sub-vectors of dimension 4, we can achieve a compression rate of 16 (but often with a higher distortion rate). This tradeoff between compression and distortion can be controlled using the -dsub command line option, which set the dimension of the sub-vectors.</p>
  102. <p><strong>Feature selection.</strong> The second operation we apply to compress models is to remove features which do not have a big influence on the decision of the classifier. For this, our goal is to find the model with a given number of feature (e.g. 50,000 in the previous example) which is the closest from the original model. The solution of this problem is to keep the features (either words, subwords, or ngrams), which have the vectors with the largest norms.</p>
  103. <h3><a class="anchor" aria-hidden="true" id="references"></a><a href="#references" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>References</h3>
  104. <ul>
  105. <li><a href="https://en.wikipedia.org/wiki/Quantization_%28signal_processing%29">Quantization</a></li>
  106. <li><a href="https://en.wikipedia.org/wiki/Vector_quantization">Vector quantization</a></li>
  107. <li><a href="https://en.wikipedia.org/wiki/K-means_clustering">k-means algorithm</a></li>
  108. <li><a href="https://en.wikipedia.org/wiki/Feature_selection">Feature selection</a></li>
  109. </ul>
  110. <h3><a class="anchor" aria-hidden="true" id="iso-codes-of-languages-supported"></a><a href="#iso-codes-of-languages-supported" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>ISO codes of languages supported</h3>
  111. <pre><code class="hljs">af als <span class="hljs-keyword">am</span> <span class="hljs-keyword">an</span> <span class="hljs-keyword">ar</span> arz <span class="hljs-keyword">as</span> ast av az azb <span class="hljs-keyword">ba</span> bar bcl <span class="hljs-keyword">be</span> bg bh <span class="hljs-keyword">bn</span> <span class="hljs-keyword">bo</span> bpy <span class="hljs-keyword">br</span> bs bxr <span class="hljs-keyword">ca</span> cbk <span class="hljs-keyword">ce</span> ceb ckb <span class="hljs-keyword">co</span> <span class="hljs-keyword">cs</span> cv cy da de diq dsb dty dv <span class="hljs-keyword">el</span> eml <span class="hljs-keyword">en</span> eo es et eu fa fi fr frr fy ga gd gl gn gom <span class="hljs-keyword">gu</span> <span class="hljs-keyword">gv</span> he <span class="hljs-keyword">hi</span> hif hr hsb ht hu hy <span class="hljs-keyword">ia</span> id ie ilo io <span class="hljs-keyword">is</span> it ja jbo jv ka kk km kn ko krc ku kv kw ky <span class="hljs-keyword">la</span> <span class="hljs-keyword">lb</span> lez li lmo <span class="hljs-keyword">lo</span> lrc <span class="hljs-keyword">lt</span> <span class="hljs-keyword">lv</span> mai mg mhr <span class="hljs-built_in">min</span> <span class="hljs-keyword">mk</span> ml mn mr mrj ms mt mwl my myv mzn nah nap nds ne <span class="hljs-keyword">new</span> nl <span class="hljs-keyword">nn</span> <span class="hljs-keyword">no</span> oc <span class="hljs-built_in">or</span> os pa pam pfl pl pms pnb <span class="hljs-keyword">ps</span> <span class="hljs-keyword">pt</span> qu rm ro <span class="hljs-keyword">ru</span> rue <span class="hljs-keyword">sa</span> sah sc scn sco sd <span class="hljs-keyword">sh</span> si sk <span class="hljs-keyword">sl</span> <span class="hljs-keyword">so</span> sq sr su <span class="hljs-keyword">sv</span> <span class="hljs-keyword">sw</span> <span class="hljs-keyword">ta</span> <span class="hljs-keyword">te</span> tg <span class="hljs-keyword">th</span> tk <span class="hljs-keyword">tl</span> <span class="hljs-keyword">tr</span> tt tyv ug uk ur uz vec vep <span class="hljs-keyword">vi</span> vls vo <span class="hljs-keyword">wa</span> war wuu xal xmf yi yo yue zh
  112. </code></pre>
  113. </span></div></div><div class="blogSocialSection"></div></div><div class="blog-recent"><a class="button" href="/blog/">Recent Posts</a></div></div></div></div><footer class="nav-footer" id="footer"><section class="sitemap"><a href="/" class="nav-home"><img src="/img/fasttext-icon-white-web.png" alt="fastText"/></a><div><h5>Support</h5><a href="/docs/en/support.html">Getting Started</a><a href="/docs/en/supervised-tutorial.html">Tutorials</a><a href="/docs/en/faqs.html">FAQs</a><a href="/docs/en/api.html">API</a></div><div><h5>Community</h5><a href="https://www.facebook.com/groups/1174547215919768/" target="_blank">Facebook Group</a><a href="http://stackoverflow.com/questions/tagged/fasttext" target="_blank">Stack Overflow</a><a href="https://groups.google.com/forum/#!forum/fasttext-library" target="_blank">Google Group</a></div><div><h5>More</h5><a href="/blog">Blog</a><a href="https://github.com/facebookresearch/fastText" target="_blank">GitHub</a><a class="github-button" href="https://github.com/facebookresearch/fastText/" data-icon="octicon-star" data-count-href="/fastText/stargazers" data-count-api="/repos/fastText#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star this project on GitHub">Star</a></div></section><a href="https://code.facebook.com/projects/" target="_blank" class="fbOpenSource"><img src="/img/oss_logo.png" alt="Facebook Open Source" width="170" height="45"/></a><section class="copyright">Copyright © 2020 Facebook Inc.</section></footer></div></body></html>