diff --git a/scripts/linearize.hs b/scripts/linearize.hs index e589ea5e8449878048513cafe8c96c30a82dfe8e..cf0748a7773a8b37044cab97f20fea1ef49b0989 100755 --- a/scripts/linearize.hs +++ b/scripts/linearize.hs @@ -1,13 +1,23 @@ #!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib/haskell" +import GEODE.Metadata (ArticleRecord, Document(..), ReadTSV(..), Record(..)) +import System.Directory (createDirectoryIfMissing) import System.Environment (getArgs) +import System.FilePath ((</>), takeDirectory) import System.Script (syntax) -import Text.Filter (xargs) import Text.Filter.Linearize (linearize) +to :: FilePath -> FilePath -> Bool -> ArticleRecord -> IO () +to source target inferParagraphs article = do + createDirectoryIfMissing True (takeDirectory output) + readFile (source </> path) >>= writeFile output . linearize inferParagraphs + where + path = relativePath article "txt" + output = target </> path + main :: IO () main = getArgs >>= cli where - cli [inferParagraphs, target] = - xargs (pure.linearize (inferParagraphs == "Y")) target - cli _ = syntax "[Y|N] TARGET_DIR" + cli [inferParagraphs, source, target] = + readTSV () >>= mapM_ (source `to` target $ inferParagraphs == "Y") . rows + cli _ = syntax "[Y|N] SOURCE_DIR TARGET_DIR"