From eee529601791c604a7a4b73d8d6005d44c8cf87a Mon Sep 17 00:00:00 2001
From: Alice BRENON <alice.brenon@ens-lyon.fr>
Date: Mon, 4 Sep 2023 20:06:31 +0200
Subject: [PATCH] Add source extractor for LGE using the newest syntax for
 ProcessingLGE and soprano

---
 scripts/LGE/extract-from-source.sh | 20 ++++++++++++++++++++
 scripts/extract-from-source.sh     |  4 ++--
 2 files changed, 22 insertions(+), 2 deletions(-)
 create mode 100755 scripts/LGE/extract-from-source.sh

diff --git a/scripts/LGE/extract-from-source.sh b/scripts/LGE/extract-from-source.sh
new file mode 100755
index 0000000..e0af410
--- /dev/null
+++ b/scripts/LGE/extract-from-source.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+source ${0%/*}/../lib.sh
+
+if [ "$#" != 2 ]
+then
+	die "${PROG_NAME} SOURCE_DIRECTORY TARGET_DIRECTORY"
+else
+	SOURCE="${1}"
+	TARGET="${2}"
+	[ -d "${SOURCE}" ] || die "SOURCE_DIRECTORY must be a directory (containing one ALTO/LGE/T<T> directory with one ALTO-XML file per page)"
+	[ -d "${TARGET}" ] || die "TARGET_DIRECTORY must be a directory (where output will be generated)"
+fi
+
+cd "${SOURCE}"
+for T in {1..31}
+do
+	LGEprepareVolume.sh ${T} "${TARGET}" ALTO
+	LGEencode.sh "${TARGET}/ALTO/LGE/T${T}" -k Text --metadata --text-root "${TARGET}/Text" 2> /tmp/LGE.log
+done
diff --git a/scripts/extract-from-source.sh b/scripts/extract-from-source.sh
index 0275b82..ba74ce0 100755
--- a/scripts/extract-from-source.sh
+++ b/scripts/extract-from-source.sh
@@ -10,11 +10,11 @@ then
 else
 	SOURCE="${1}"
 	TARGET="${2}"
-	[ -d "${SOURCE}" ] || die "SOURCE_DIRECTORY must be a directory (containing 1 .tei file per tome)"
+	[ -d "${SOURCE}" ] || die "SOURCE_DIRECTORY must be a directory (containing a folder for EDdA and one for LGE with the structure found in Source.squashfs)"
 	[ -d "${TARGET}" ] || die "TARGET_DIRECTORY must be a directory (where output will be generated)"
 fi
 
 FILES_TSV="${TARGET}/files.tsv"
 printf "book	tome	rank	headWord	name	page\n" > "${FILES_TSV}"
 ${BASE_DIR}/EDdA/extract-from-source.sh "${SOURCE}/EDdA/ARTFL" ${TARGET} >> "${FILES_TSV}"
-#${BASE_DIR}/LGE/extract-from-source.sh "${SOURCE}/LGE/BnF" ${TARGET} >> "${FILES_TSV}"
+${BASE_DIR}/LGE/extract-from-source.sh "${SOURCE}/LGE/BnF" ${TARGET} >> "${FILES_TSV}"
-- 
GitLab