Skip to content
Snippets Groups Projects
add-domain-metadata.hs 1.48 KiB
#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib"
{-# LANGUAGE DeriveGeneric, NamedFieldPuns, OverloadedStrings #-}

--import Data.ByteString.Lazy as ByteString (readFile)
import Data.Csv (FromRecord)
import Data.Metadata (Domains(..), readCsv)
--import Data.Metadata (Article, articles)
import Data.Text as Text (Text, intercalate, isPrefixOf)
import GHC.Generics (Generic)
import System.Environment (getArgs)
import System.FilePath ((</>), (<.>))
import System.Exit (die)
import System.Script (syntax)
import Text.Editor (apply)

data Line = Line {
      tome :: String
    , rank :: String
    , domains :: Domains
  } deriving (Generic)

instance FromRecord Line

to :: FilePath -> FilePath -> Line -> IO ()
to source target (Line {tome, rank, domains}) =
  apply (pure . addMeta domains) target input
  where
    input = source </> "T" <> tome <> "_article" <> rank <.> "xml"

addMeta :: Domains -> [Text] -> [Text]
addMeta (Domains domains) textLines =
    take 6 textLines
  ++ [("domain\t" <> intercalate " | " domains)
     ,("class\t" <> getClass domains)]
  ++ drop 6 textLines

getClass :: [Text] -> Text
getClass domainsList
  | all hasGeo domainsList = "geography"
  | any hasGeo domainsList = "some_geography"
  | otherwise = "other"
  where
    hasGeo = ("Géographie" `isPrefixOf`)

main :: IO ()
main = getArgs >>= run
  where
    run [inputCSV, source, target] =
      readCsv inputCSV >>= either die (mapM_ (source `to` target))
    run _ = syntax "METADATA_CSV_FILE SOURCE TARGET"