Skip to content
Snippets Groups Projects
Commit eee52960 authored by Alice Brenon's avatar Alice Brenon
Browse files

Add source extractor for LGE using the newest syntax for ProcessingLGE and soprano

parent 63c08d18
No related branches found
No related tags found
No related merge requests found
#!/bin/sh
source ${0%/*}/../lib.sh
if [ "$#" != 2 ]
then
die "${PROG_NAME} SOURCE_DIRECTORY TARGET_DIRECTORY"
else
SOURCE="${1}"
TARGET="${2}"
[ -d "${SOURCE}" ] || die "SOURCE_DIRECTORY must be a directory (containing one ALTO/LGE/T<T> directory with one ALTO-XML file per page)"
[ -d "${TARGET}" ] || die "TARGET_DIRECTORY must be a directory (where output will be generated)"
fi
cd "${SOURCE}"
for T in {1..31}
do
LGEprepareVolume.sh ${T} "${TARGET}" ALTO
LGEencode.sh "${TARGET}/ALTO/LGE/T${T}" -k Text --metadata --text-root "${TARGET}/Text" 2> /tmp/LGE.log
done
......@@ -10,11 +10,11 @@ then
else
SOURCE="${1}"
TARGET="${2}"
[ -d "${SOURCE}" ] || die "SOURCE_DIRECTORY must be a directory (containing 1 .tei file per tome)"
[ -d "${SOURCE}" ] || die "SOURCE_DIRECTORY must be a directory (containing a folder for EDdA and one for LGE with the structure found in Source.squashfs)"
[ -d "${TARGET}" ] || die "TARGET_DIRECTORY must be a directory (where output will be generated)"
fi
FILES_TSV="${TARGET}/files.tsv"
printf "book tome rank headWord name page\n" > "${FILES_TSV}"
${BASE_DIR}/EDdA/extract-from-source.sh "${SOURCE}/EDdA/ARTFL" ${TARGET} >> "${FILES_TSV}"
#${BASE_DIR}/LGE/extract-from-source.sh "${SOURCE}/LGE/BnF" ${TARGET} >> "${FILES_TSV}"
${BASE_DIR}/LGE/extract-from-source.sh "${SOURCE}/LGE/BnF" ${TARGET} >> "${FILES_TSV}"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment