-
Alice Brenon authoredf8b70bc4
add-domain-metadata.hs 1.48 KiB
#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib"
{-# LANGUAGE DeriveGeneric, NamedFieldPuns, OverloadedStrings #-}
--import Data.ByteString.Lazy as ByteString (readFile)
import Data.Csv (FromRecord)
import Data.Metadata (Domains(..), readCsv)
--import Data.Metadata (Article, articles)
import Data.Text as Text (Text, intercalate, isPrefixOf)
import GHC.Generics (Generic)
import System.Environment (getArgs)
import System.FilePath ((</>), (<.>))
import System.Exit (die)
import System.Script (syntax)
import Text.Editor (apply)
data Line = Line {
tome :: String
, rank :: String
, domains :: Domains
} deriving (Generic)
instance FromRecord Line
to :: FilePath -> FilePath -> Line -> IO ()
to source target (Line {tome, rank, domains}) =
apply (pure . addMeta domains) target input
where
input = source </> "T" <> tome <> "_article" <> rank <.> "xml"
addMeta :: Domains -> [Text] -> [Text]
addMeta (Domains domains) textLines =
take 6 textLines
++ [("domain\t" <> intercalate " | " domains)
,("class\t" <> getClass domains)]
++ drop 6 textLines
getClass :: [Text] -> Text
getClass domainsList
| all hasGeo domainsList = "geography"
| any hasGeo domainsList = "some_geography"
| otherwise = "other"
where
hasGeo = ("Géographie" `isPrefixOf`)
main :: IO ()
main = getArgs >>= run
where
run [inputCSV, source, target] =
readCsv inputCSV >>= either die (mapM_ (source `to` target))
run _ = syntax "METADATA_CSV_FILE SOURCE TARGET"