1
0

data.sh 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. #!/usr/bin/env bash
  2. #
  3. # Copyright (c) 2017-present, Facebook, Inc.
  4. # All rights reserved.
  5. #
  6. # This source code is licensed under the MIT license found in the
  7. # LICENSE file in the root directory of this source tree.
  8. #
  9. set -e
  10. DATADIR=data/
  11. if [ ! -d "$DATADIR" ]; then
  12. mkdir $DATADIR
  13. fi
  14. cd $DATADIR
  15. echo "preparing WN18"
  16. #wget -P . https://everest.hds.utc.fr/lib/exe/fetch.php?media=en:wordnet-mlj12.tar.gz
  17. #mv fetch.php\?media\=en\:wordnet-mlj12.tar.gz wordnet-mlj12.tar.gz
  18. wget -P . https://github.com/mana-ysh/knowledge-graph-embeddings/raw/master/dat/wordnet-mlj12.tar.gz
  19. tar -xzvf wordnet-mlj12.tar.gz
  20. DIR=wordnet-mlj12
  21. for f in ${DIR}/wordnet-ml*.txt;
  22. do
  23. fn=${DIR}/ft_$(basename $f)
  24. awk '{print "__label__"$1,"0_"$2, $3;print $1,"1_"$2," __label__"$3}' < ${f} > ${fn};
  25. done
  26. cat ${DIR}/ft_* > ${DIR}/ft_wordnet-mlj12-full.txt
  27. cat ${DIR}/ft_*train.txt ${DIR}/ft_*valid.txt > ${DIR}/ft_wordnet-mlj12-valid+train.txt
  28. echo "preparing FB15K"
  29. #wget https://everest.hds.utc.fr/lib/exe/fetch.php?media=en:fb15k.tgz
  30. #mv fetch.php\?media\=en\:fb15k.tgz fb15k.tgz
  31. wget https://github.com/mana-ysh/knowledge-graph-embeddings/raw/master/dat/fb15k.tgz
  32. tar -xzvf fb15k.tgz
  33. DIR=FB15k/
  34. for f in ${DIR}/freebase*.txt;
  35. do
  36. fn=${DIR}/ft_$(basename $f)
  37. echo $f " --> " $fn
  38. awk '{print "__label__"$1,"0_"$2, $3;print $1,"1_"$2," __label__"$3}' < ${f} > ${fn};
  39. done
  40. cat ${DIR}/ft_* > ${DIR}/ft_freebase_mtr100_mte100-full.txt
  41. cat ${DIR}/ft_*train.txt ${DIR}/ft_*valid.txt > ${DIR}/ft_freebase_mtr100_mte100-valid+train.txt
  42. echo "preparing FB15K-237"
  43. wget https://download.microsoft.com/download/8/7/0/8700516A-AB3D-4850-B4BB-805C515AECE1/FB15K-237.2.zip
  44. unzip FB15K-237.2.zip
  45. DIR=Release/
  46. for f in train.txt test.txt valid.txt
  47. do
  48. fn=${DIR}/ft_$(basename $f)
  49. echo $f " --> " $fn
  50. awk -F "\t" '{print "__label__"$1,"0_"$2, $3;print $1,"1_"$2," __label__"$3}' < ${DIR}/${f} > ${fn};
  51. done
  52. cat ${DIR}/ft_*.txt > ${DIR}/ft_full.txt
  53. cat ${DIR}/ft_train.txt ${DIR}/ft_valid.txt > ${DIR}/ft_valid+train.txt
  54. echo "preparing SVO"
  55. wget . https://everest.hds.utc.fr/lib/exe/fetch.php?media=en:svo-tensor-dataset.tar.gz
  56. mv fetch.php?media=en:svo-tensor-dataset.tar.gz svo-tensor-dataset.tar.gz
  57. tar -xzvf svo-tensor-dataset.tar.gz
  58. DIR=SVO-tensor-dataset
  59. for f in ${DIR}/svo_data*.dat;
  60. do
  61. fn=${DIR}/ft_$(basename $f)
  62. awk '{print "0_"$1,"1_"$3,"__label__"$2;}' < ${f} > ${fn};
  63. done
  64. cat ${DIR}/ft_*train*.dat ${DIR}/ft_*valid*.dat > ${DIR}/ft_svo_data-valid+train.dat