Prechádzať zdrojové kódy

Remove deprecated FastText functions

Summary: This commit removes functions in FastText class that were marked as deprecated.

Reviewed By: EdouardGrave

Differential Revision: D16089965

fbshipit-source-id: 25f0a1d260ec10fc6c03160eab85923ecfb883eb
Onur Çelebi 6 rokov pred
rodič
commit
ad95d9bf5c
2 zmenil súbory, kde vykonal 6 pridanie a 114 odobranie
  1. 0 65
      src/fasttext.cc
  2. 6 49
      src/fasttext.h

+ 0 - 65
src/fasttext.cc

@@ -97,10 +97,6 @@ void FastText::getWordVector(Vector& vec, const std::string& word) const {
   }
 }
 
-void FastText::getVector(Vector& vec, const std::string& word) const {
-  getWordVector(vec, word);
-}
-
 void FastText::getSubwordVector(Vector& vec, const std::string& subword) const {
   vec.zero();
   int32_t h = dict_->hash(subword) % args_->bucket;
@@ -124,10 +120,6 @@ void FastText::saveVectors(const std::string& filename) {
   ofs.close();
 }
 
-void FastText::saveVectors() {
-  saveVectors(args_->output + ".vec");
-}
-
 void FastText::saveOutput(const std::string& filename) {
   std::ofstream ofs(filename);
   if (!ofs.is_open()) {
@@ -152,10 +144,6 @@ void FastText::saveOutput(const std::string& filename) {
   ofs.close();
 }
 
-void FastText::saveOutput() {
-  saveOutput(args_->output + ".output");
-}
-
 bool FastText::checkModel(std::istream& in) {
   int32_t magic;
   in.read((char*)&(magic), sizeof(int32_t));
@@ -176,16 +164,6 @@ void FastText::signModel(std::ostream& out) {
   out.write((char*)&(version), sizeof(int32_t));
 }
 
-void FastText::saveModel() {
-  std::string fn(args_->output);
-  if (quant_) {
-    fn += ".ftz";
-  } else {
-    fn += ".bin";
-  }
-  saveModel(fn);
-}
-
 void FastText::saveModel(const std::string& filename) {
   std::ofstream ofs(filename, std::ofstream::binary);
   if (!ofs.is_open()) {
@@ -521,16 +499,6 @@ std::vector<std::pair<std::string, Vector>> FastText::getNgramVectors(
   return result;
 }
 
-// deprecated. use getNgramVectors instead
-void FastText::ngramVectors(std::string word) {
-  std::vector<std::pair<std::string, Vector>> ngramVectors =
-      getNgramVectors(word);
-
-  for (const auto& ngramVector : ngramVectors) {
-    std::cout << ngramVector.first << " " << ngramVector.second << std::endl;
-  }
-}
-
 void FastText::precomputeWordVectors(DenseMatrix& wordVectors) {
   Vector vec(args_->dim);
   wordVectors.zero();
@@ -598,17 +566,6 @@ std::vector<std::pair<real, std::string>> FastText::getNN(
   return heap;
 }
 
-// depracted. use getNN instead
-void FastText::findNN(
-    const DenseMatrix& wordVectors,
-    const Vector& query,
-    int32_t k,
-    const std::set<std::string>& banSet,
-    std::vector<std::pair<real, std::string>>& results) {
-  results.clear();
-  results = getNN(wordVectors, query, k, banSet);
-}
-
 std::vector<std::pair<real, std::string>> FastText::getAnalogies(
     int32_t k,
     const std::string& wordA,
@@ -630,24 +587,6 @@ std::vector<std::pair<real, std::string>> FastText::getAnalogies(
   return getNN(*wordVectors_, query, k, {wordA, wordB, wordC});
 }
 
-// depreacted, use getAnalogies instead
-void FastText::analogies(int32_t k) {
-  std::string prompt("Query triplet (A - B + C)? ");
-  std::string wordA, wordB, wordC;
-  std::cout << prompt;
-  while (true) {
-    std::cin >> wordA;
-    std::cin >> wordB;
-    std::cin >> wordC;
-    auto results = getAnalogies(k, wordA, wordB, wordC);
-
-    for (auto& pair : results) {
-      std::cout << pair.second << " " << pair.first << std::endl;
-    }
-    std::cout << prompt;
-  }
-}
-
 void FastText::trainThread(int32_t threadId) {
   std::ifstream ifs(args_->input);
   utils::seek(ifs, threadId * utils::size(ifs) / args_->thread);
@@ -727,10 +666,6 @@ std::shared_ptr<Matrix> FastText::getInputMatrixFromFile(
   return input;
 }
 
-void FastText::loadVectors(const std::string& filename) {
-  input_ = getInputMatrixFromFile(filename);
-}
-
 std::shared_ptr<Matrix> FastText::createRandomMatrix() const {
   std::shared_ptr<DenseMatrix> input = std::make_shared<DenseMatrix>(
       dict_->nwords() + args_->bucket, args_->dim);

+ 6 - 49
src/fasttext.h

@@ -34,16 +34,16 @@ class FastText {
  protected:
   std::shared_ptr<Args> args_;
   std::shared_ptr<Dictionary> dict_;
-
   std::shared_ptr<Matrix> input_;
   std::shared_ptr<Matrix> output_;
-
   std::shared_ptr<Model> model_;
-
   std::atomic<int64_t> tokenCount_{};
   std::atomic<real> loss_{};
-
   std::chrono::steady_clock::time_point start_;
+  bool quant_;
+  int32_t version;
+  std::unique_ptr<DenseMatrix> wordVectors_;
+
   void signModel(std::ostream&);
   bool checkModel(std::istream&);
   void startThreads();
@@ -68,10 +68,8 @@ class FastText {
       const std::vector<int32_t>& labels);
   void cbow(Model::State& state, real lr, const std::vector<int32_t>& line);
   void skipgram(Model::State& state, real lr, const std::vector<int32_t>& line);
-
-  bool quant_;
-  int32_t version;
-  std::unique_ptr<DenseMatrix> wordVectors_;
+  std::vector<int32_t> selectEmbeddings(int32_t cutoff) const;
+  void precomputeWordVectors(DenseMatrix& wordVectors);
 
  public:
   FastText();
@@ -146,46 +144,5 @@ class FastText {
   int getDimension() const;
 
   bool isQuant() const;
-
-  FASTTEXT_DEPRECATED("loadVectors is being deprecated.")
-  void loadVectors(const std::string& filename);
-
-  FASTTEXT_DEPRECATED(
-      "getVector is being deprecated and replaced by getWordVector.")
-  void getVector(Vector& vec, const std::string& word) const;
-
-  FASTTEXT_DEPRECATED(
-      "ngramVectors is being deprecated and replaced by getNgramVectors.")
-  void ngramVectors(std::string word);
-
-  FASTTEXT_DEPRECATED(
-      "analogies is being deprecated and replaced by getAnalogies.")
-  void analogies(int32_t k);
-
-  FASTTEXT_DEPRECATED("selectEmbeddings is being deprecated.")
-  std::vector<int32_t> selectEmbeddings(int32_t cutoff) const;
-
-  FASTTEXT_DEPRECATED(
-      "saveVectors is being deprecated, please use the other signature.")
-  void saveVectors();
-
-  FASTTEXT_DEPRECATED(
-      "saveOutput is being deprecated, please use the other signature.")
-  void saveOutput();
-
-  FASTTEXT_DEPRECATED(
-      "saveModel is being deprecated, please use the other signature.")
-  void saveModel();
-
-  FASTTEXT_DEPRECATED("precomputeWordVectors is being deprecated.")
-  void precomputeWordVectors(DenseMatrix& wordVectors);
-
-  FASTTEXT_DEPRECATED("findNN is being deprecated and replaced by getNN.")
-  void findNN(
-      const DenseMatrix& wordVectors,
-      const Vector& query,
-      int32_t k,
-      const std::set<std::string>& banSet,
-      std::vector<std::pair<real, std::string>>& results);
 };
 } // namespace fasttext