#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib" {-# LANGUAGE DeriveGeneric, ExplicitNamespaces, OverloadedStrings, ScopedTypeVariables #-} import Control.Monad.Except (ExceptT(..), runExceptT) import Data.Foldable (toList) import Data.Map (Map, (!?)) import Data.Maybe (catMaybes) import Data.Text (Text, pack) import GEODE.Metadata (Contrastive(..), Entry, Has(..), MultiText(..), PrimaryKey, type(@)(..), indexBy, readNamedTsv, tsvFile) import System.Environment (getArgs) import System.Exit (die) import System.Script (syntax) leftJoin :: forall a b c t. (Foldable t, Functor t, Has PrimaryKey a, Has PrimaryKey b) => (a -> Maybe b -> c) -> t a -> t b -> t c leftJoin f left right = outputRow <$> left where indexed :: Map PrimaryKey b indexed = indexBy get right outputRow leftLine = f leftLine (indexed !? get leftLine) rightJoin :: (Foldable t, Functor t, Has PrimaryKey a, Has PrimaryKey b) => (Maybe a -> b -> c) -> t a -> t b -> t c rightJoin f left right = leftJoin (flip f) right left join :: forall a b c t. (Foldable t, Functor t, Has PrimaryKey a, Has PrimaryKey b) => (a -> b -> c) -> t a -> t b -> [c] join f left right = catMaybes . toList $ outputRow <$> left where indexed :: Map PrimaryKey b indexed = indexBy get right outputRow leftLine = f leftLine <$> (indexed !? get leftLine) merge :: (PrimaryKey @ Entry) -> (PrimaryKey @ Contrastive) -> (PrimaryKey @ Entry @ Contrastive) merge (pK :@: e) (_ :@: c) = pK :@: e :@: c tag :: Text -> Maybe PrimaryKey -> (PrimaryKey @ Entry @ Contrastive) -> (PrimaryKey @ Entry @ Contrastive) tag _ Nothing l = l tag name (Just _) (pK :@: e :@: contr) = pK :@: e :@: contr { subCorpus = MultiText (name : subCorpora) } where MultiText subCorpora = subCorpus contr main :: IO () main = getArgs >>= run where run [left, right, output] = runExceptT ((join merge) <$> ExceptT (readNamedTsv left) <*> ExceptT (readNamedTsv right) ) >>= either die (tsvFile output) run [name, left, right, output] = runExceptT ((rightJoin (tag $ pack name)) <$> ExceptT (readNamedTsv left) <*> ExceptT (readNamedTsv right) ) >>= either die (tsvFile output) run _ = syntax "PRIMARY_KEY.tsv INPUT_METADATA.tsv OUTPUT_METADATA.tsv"