From 3807b311d5334fe04f2389cc54cc9d0cdf407dd9 Mon Sep 17 00:00:00 2001
From: Alice BRENON <alice.brenon@ens-lyon.fr>
Date: Fri, 9 Feb 2024 11:54:54 +0100
Subject: [PATCH] Convert the linearization script to take TSV as input and
 handle the structure itself, making it simpler to call

---
 scripts/linearize.hs | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/scripts/linearize.hs b/scripts/linearize.hs
index e589ea5..cf0748a 100755
--- a/scripts/linearize.hs
+++ b/scripts/linearize.hs
@@ -1,13 +1,23 @@
 #!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib/haskell"
 
+import GEODE.Metadata (ArticleRecord, Document(..), ReadTSV(..), Record(..))
+import System.Directory (createDirectoryIfMissing)
 import System.Environment (getArgs)
+import System.FilePath ((</>), takeDirectory)
 import System.Script (syntax)
-import Text.Filter (xargs)
 import Text.Filter.Linearize (linearize)
 
+to :: FilePath -> FilePath -> Bool -> ArticleRecord -> IO ()
+to source target inferParagraphs article = do
+  createDirectoryIfMissing True (takeDirectory output)
+  readFile (source </> path) >>= writeFile output . linearize inferParagraphs
+  where
+    path = relativePath article "txt"
+    output = target </> path
+
 main :: IO ()
 main = getArgs >>= cli
   where
-    cli [inferParagraphs, target] =
-      xargs (pure.linearize (inferParagraphs == "Y")) target
-    cli _ = syntax "[Y|N] TARGET_DIR"
+    cli [inferParagraphs, source, target] =
+      readTSV () >>= mapM_ (source `to` target $ inferParagraphs == "Y") . rows
+    cli _ = syntax "[Y|N] SOURCE_DIR TARGET_DIR"
-- 
GitLab