From 352ebe5c25ab2a26911d84da7e59379210b63385 Mon Sep 17 00:00:00 2001
From: Alice BRENON <alice.brenon@ens-lyon.fr>
Date: Sat, 10 Dec 2022 17:13:53 +0100
Subject: [PATCH] Duplicate extraction script for EDdA; fix shebang because
 scripts actually uses features from bash

---
 scripts/extract-parallel-EDdA.sh | 35 ++++++++++++++++++++++++++++++++
 scripts/extract-parallel-LGE.sh  |  2 +-
 2 files changed, 36 insertions(+), 1 deletion(-)
 create mode 100755 scripts/extract-parallel-EDdA.sh

diff --git a/scripts/extract-parallel-EDdA.sh b/scripts/extract-parallel-EDdA.sh
new file mode 100755
index 0000000..b18305e
--- /dev/null
+++ b/scripts/extract-parallel-EDdA.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+INPUT_METADATA="${1}"
+SOURCE_TEXT_ARTICLES="${2}"
+OUTPUT="${3}"
+if [ -d "${OUTPUT}" ]
+then
+	N=1
+	while [ -d "${OUTPUT}.${N}" ]
+	do
+		N=$((N+1))
+	done
+	mv "${OUTPUT}" "${OUTPUT}.${N}"
+fi
+
+WORKDIR=$(mktemp -d /tmp/parallel-EDdA.XXX)
+
+for T in {1..17}
+do
+	mkdir -p "${WORKDIR}/T${T}"
+done
+
+while read LINE
+do
+	LINE="${LINE#*,}"
+	LINE="${LINE#*,}"
+	LINE="${LINE#*,}"
+	LINE="${LINE#*,}"
+	T="${LINE%%,*}"
+	LINE="${LINE#*,}"
+	RANK="${LINE%%,*}"
+	cp "${SOURCE_TEXT_ARTICLES}/T${T}/article${RANK}."* "${WORKDIR}/T${T}"
+done < <(tail -n +2 ${INPUT_METADATA})
+
+mv ${WORKDIR} ${OUTPUT}
diff --git a/scripts/extract-parallel-LGE.sh b/scripts/extract-parallel-LGE.sh
index 1411ce2..b1c0c3a 100755
--- a/scripts/extract-parallel-LGE.sh
+++ b/scripts/extract-parallel-LGE.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 
 INPUT_METADATA="${1}"
 SOURCE_TEXT_ARTICLES="${2}"
-- 
GitLab