Newer
Older
INPUT_PATH="${1}"
SOURCE_TEXT_ARTICLES="${2}"
OUTPUT="${3}"
if [ -d "${OUTPUT}" ]
then
N=1
while [ -d "${OUTPUT}.${N}" ]
do
N=$((N+1))
done
mv "${OUTPUT}" "${OUTPUT}.${N}"
fi
WORKDIR=$(mktemp -d /tmp/parallel-corpus.XXX)
while read LINE
do
ID="${LINE%%,*}"
LINE="${LINE#*,}"
RELATIVE_PATH="${LINE%%,*}"
cp "${SOURCE_TEXT_ARTICLES}/${RELATIVE_PATH#*/}.*" "${WORKDIR}/${RELATIVE_PATH}"
#cp "${SOURCE_TEXT_ARTICLES}/T${T}/ById/${ID}."* "${WORKDIR}/T${T}"
done < <(tail -n +2 ${INPUT_METADATA})
mv ${WORKDIR} ${OUTPUT}