Newer
Older
#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib"
{-# LANGUAGE DeriveGeneric, ExplicitNamespaces, OverloadedStrings, ScopedTypeVariables #-}
import Control.Monad.Except (ExceptT(..), runExceptT)
import Data.Foldable (toList)
import Data.Map (Map, (!?))
import Data.Maybe (catMaybes)
import Data.Text (Text, pack)
import GEODE.Metadata (Contrastive(..), Entry, Has(..), MultiText(..), PrimaryKey, type(@)(..), indexBy, readNamedTsv, tsvFile)
import System.Environment (getArgs)
import System.Exit (die)
import System.Script (syntax)
leftJoin :: forall a b c t. (Foldable t, Functor t, Has PrimaryKey a, Has PrimaryKey b) =>
(a -> Maybe b -> c) -> t a -> t b -> t c
leftJoin f left right = outputRow <$> left
indexed :: Map PrimaryKey b
indexed = indexBy get right
outputRow leftLine =
f leftLine (indexed !? get leftLine)
rightJoin :: (Foldable t, Functor t, Has PrimaryKey a, Has PrimaryKey b) =>
(Maybe a -> b -> c) -> t a -> t b -> t c
rightJoin f left right = leftJoin (flip f) right left
join :: forall a b c t. (Foldable t, Functor t, Has PrimaryKey a, Has PrimaryKey b) =>
(a -> b -> c) -> t a -> t b -> [c]
join f left right = catMaybes . toList $ outputRow <$> left
where
indexed :: Map PrimaryKey b
indexed = indexBy get right
outputRow leftLine =
f leftLine <$> (indexed !? get leftLine)
merge :: (PrimaryKey @ Entry) -> (PrimaryKey @ Contrastive) -> (PrimaryKey @ Entry @ Contrastive)
merge (pK :@: e) (_ :@: c) = pK :@: e :@: c
tag :: Text -> Maybe PrimaryKey -> (PrimaryKey @ Entry @ Contrastive) -> (PrimaryKey @ Entry @ Contrastive)
tag _ Nothing l = l
tag name (Just _) (pK :@: e :@: contr) =
pK :@: e :@: contr { subCorpus = MultiText (name : subCorpora) }
where
MultiText subCorpora = subCorpus contr
main :: IO ()
main = getArgs >>= run
where
run [left, right, output] =
runExceptT ((join merge)
<$> ExceptT (readNamedTsv left)
<*> ExceptT (readNamedTsv right) )
>>= either die (tsvFile output)
run [name, left, right, output] =
runExceptT ((rightJoin (tag $ pack name))
<$> ExceptT (readNamedTsv left)
<*> ExceptT (readNamedTsv right) )
>>= either die (tsvFile output)
run _ = syntax "PRIMARY_KEY.tsv INPUT_METADATA.tsv OUTPUT_METADATA.tsv"