From 3807b311d5334fe04f2389cc54cc9d0cdf407dd9 Mon Sep 17 00:00:00 2001 From: Alice BRENON <alice.brenon@ens-lyon.fr> Date: Fri, 9 Feb 2024 11:54:54 +0100 Subject: [PATCH] Convert the linearization script to take TSV as input and handle the structure itself, making it simpler to call --- scripts/linearize.hs | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/scripts/linearize.hs b/scripts/linearize.hs index e589ea5..cf0748a 100755 --- a/scripts/linearize.hs +++ b/scripts/linearize.hs @@ -1,13 +1,23 @@ #!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib/haskell" +import GEODE.Metadata (ArticleRecord, Document(..), ReadTSV(..), Record(..)) +import System.Directory (createDirectoryIfMissing) import System.Environment (getArgs) +import System.FilePath ((</>), takeDirectory) import System.Script (syntax) -import Text.Filter (xargs) import Text.Filter.Linearize (linearize) +to :: FilePath -> FilePath -> Bool -> ArticleRecord -> IO () +to source target inferParagraphs article = do + createDirectoryIfMissing True (takeDirectory output) + readFile (source </> path) >>= writeFile output . linearize inferParagraphs + where + path = relativePath article "txt" + output = target </> path + main :: IO () main = getArgs >>= cli where - cli [inferParagraphs, target] = - xargs (pure.linearize (inferParagraphs == "Y")) target - cli _ = syntax "[Y|N] TARGET_DIR" + cli [inferParagraphs, source, target] = + readTSV () >>= mapM_ (source `to` target $ inferParagraphs == "Y") . rows + cli _ = syntax "[Y|N] SOURCE_DIR TARGET_DIR" -- GitLab