| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869 |
- #!/usr/bin/env bash
- #
- # Copyright (c) 2017-present, Facebook, Inc.
- # All rights reserved.
- #
- # This source code is licensed under the MIT license found in the
- # LICENSE file in the root directory of this source tree.
- #
- set -e
- DATADIR=data/
- if [ ! -d "$DATADIR" ]; then
- mkdir $DATADIR
- fi
- cd $DATADIR
- echo "preparing WN18"
- #wget -P . https://everest.hds.utc.fr/lib/exe/fetch.php?media=en:wordnet-mlj12.tar.gz
- #mv fetch.php\?media\=en\:wordnet-mlj12.tar.gz wordnet-mlj12.tar.gz
- wget -P . https://github.com/mana-ysh/knowledge-graph-embeddings/raw/master/dat/wordnet-mlj12.tar.gz
- tar -xzvf wordnet-mlj12.tar.gz
- DIR=wordnet-mlj12
- for f in ${DIR}/wordnet-ml*.txt;
- do
- fn=${DIR}/ft_$(basename $f)
- awk '{print "__label__"$1,"0_"$2, $3;print $1,"1_"$2," __label__"$3}' < ${f} > ${fn};
- done
- cat ${DIR}/ft_* > ${DIR}/ft_wordnet-mlj12-full.txt
- cat ${DIR}/ft_*train.txt ${DIR}/ft_*valid.txt > ${DIR}/ft_wordnet-mlj12-valid+train.txt
- echo "preparing FB15K"
- #wget https://everest.hds.utc.fr/lib/exe/fetch.php?media=en:fb15k.tgz
- #mv fetch.php\?media\=en\:fb15k.tgz fb15k.tgz
- wget https://github.com/mana-ysh/knowledge-graph-embeddings/raw/master/dat/fb15k.tgz
- tar -xzvf fb15k.tgz
- DIR=FB15k/
- for f in ${DIR}/freebase*.txt;
- do
- fn=${DIR}/ft_$(basename $f)
- echo $f " --> " $fn
- awk '{print "__label__"$1,"0_"$2, $3;print $1,"1_"$2," __label__"$3}' < ${f} > ${fn};
- done
- cat ${DIR}/ft_* > ${DIR}/ft_freebase_mtr100_mte100-full.txt
- cat ${DIR}/ft_*train.txt ${DIR}/ft_*valid.txt > ${DIR}/ft_freebase_mtr100_mte100-valid+train.txt
- echo "preparing FB15K-237"
- wget https://download.microsoft.com/download/8/7/0/8700516A-AB3D-4850-B4BB-805C515AECE1/FB15K-237.2.zip
- unzip FB15K-237.2.zip
- DIR=Release/
- for f in train.txt test.txt valid.txt
- do
- fn=${DIR}/ft_$(basename $f)
- echo $f " --> " $fn
- awk -F "\t" '{print "__label__"$1,"0_"$2, $3;print $1,"1_"$2," __label__"$3}' < ${DIR}/${f} > ${fn};
- done
- cat ${DIR}/ft_*.txt > ${DIR}/ft_full.txt
- cat ${DIR}/ft_train.txt ${DIR}/ft_valid.txt > ${DIR}/ft_valid+train.txt
- echo "preparing SVO"
- wget . https://everest.hds.utc.fr/lib/exe/fetch.php?media=en:svo-tensor-dataset.tar.gz
- mv fetch.php?media=en:svo-tensor-dataset.tar.gz svo-tensor-dataset.tar.gz
- tar -xzvf svo-tensor-dataset.tar.gz
- DIR=SVO-tensor-dataset
- for f in ${DIR}/svo_data*.dat;
- do
- fn=${DIR}/ft_$(basename $f)
- awk '{print "0_"$1,"1_"$3,"__label__"$2;}' < ${f} > ${fn};
- done
- cat ${DIR}/ft_*train*.dat ${DIR}/ft_*valid*.dat > ${DIR}/ft_svo_data-valid+train.dat
|