Explorar o código

merge computeSubwords functions

Summary: In the class Dictionary of fastText, merge the two methods named `computeSubwords`, by making the third argument an optional pointer.

Reviewed By: EdouardGrave

Differential Revision: D9766500

fbshipit-source-id: ab12c432b371cf5b924660e12e79a5d7cea708e2
Onur Çelebi %!s(int64=7) %!d(string=hai) anos
pai
achega
c0e7a138bf
Modificáronse 2 ficheiros con 6 adicións e 23 borrados
  1. 5 21
      src/dictionary.cc
  2. 1 2
      src/dictionary.h

+ 5 - 21
src/dictionary.cc

@@ -103,7 +103,7 @@ void Dictionary::getSubwords(const std::string& word,
     substrings.push_back(words_[i].word);
   }
   if (word != EOS) {
-    computeSubwords(BOW + word + EOW, ngrams, substrings);
+    computeSubwords(BOW + word + EOW, ngrams, &substrings);
   }
 }
 
@@ -159,26 +159,7 @@ uint32_t Dictionary::hash(const std::string& str) const {
 
 void Dictionary::computeSubwords(const std::string& word,
                                std::vector<int32_t>& ngrams,
-                               std::vector<std::string>& substrings) const {
-  for (size_t i = 0; i < word.size(); i++) {
-    std::string ngram;
-    if ((word[i] & 0xC0) == 0x80) continue;
-    for (size_t j = i, n = 1; j < word.size() && n <= args_->maxn; n++) {
-      ngram.push_back(word[j++]);
-      while (j < word.size() && (word[j] & 0xC0) == 0x80) {
-        ngram.push_back(word[j++]);
-      }
-      if (n >= args_->minn && !(n == 1 && (i == 0 || j == word.size()))) {
-        int32_t h = hash(ngram) % args_->bucket;
-        ngrams.push_back(nwords_ + h);
-        substrings.push_back(ngram);
-      }
-    }
-  }
-}
-
-void Dictionary::computeSubwords(const std::string& word,
-                               std::vector<int32_t>& ngrams) const {
+                               std::vector<std::string>* substrings) const {
   for (size_t i = 0; i < word.size(); i++) {
     std::string ngram;
     if ((word[i] & 0xC0) == 0x80) continue;
@@ -190,6 +171,9 @@ void Dictionary::computeSubwords(const std::string& word,
       if (n >= args_->minn && !(n == 1 && (i == 0 || j == word.size()))) {
         int32_t h = hash(ngram) % args_->bucket;
         pushHash(ngrams, h);
+        if (substrings) {
+          substrings->push_back(ngram);
+        }
       }
     }
   }

+ 1 - 2
src/dictionary.h

@@ -85,11 +85,10 @@ class Dictionary {
         const std::string&,
         std::vector<int32_t>&,
         std::vector<std::string>&) const;
-    void computeSubwords(const std::string&, std::vector<int32_t>&) const;
     void computeSubwords(
         const std::string&,
         std::vector<int32_t>&,
-        std::vector<std::string>&) const;
+        std::vector<std::string>* substrings=nullptr) const;
     uint32_t hash(const std::string& str) const;
     void add(const std::string&);
     bool readWord(std::istream&, std::string&) const;