Skip to content
Snippets Groups Projects
Commit b1cf10a1 authored by Alice Brenon's avatar Alice Brenon
Browse files

Implement new features to merge script

parent 7b177653
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib" #!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib"
{-# LANGUAGE DeriveGeneric, ExplicitNamespaces, OverloadedStrings #-} {-# LANGUAGE DeriveGeneric, ExplicitNamespaces, OverloadedStrings, ScopedTypeVariables #-}
import Control.Monad.Except (ExceptT(..), runExceptT) import Control.Monad.Except (ExceptT(..), runExceptT)
import Data.Map ((!?)) import Data.Foldable (toList)
import Data.Map (Map, (!?))
import Data.Maybe (catMaybes) import Data.Maybe (catMaybes)
import Data.Vector (Vector, toList) import Data.Text (Text, pack)
import GEODE.Metadata (Contrastive, Entry, Has(..), PrimaryKey, type(@)(..), indexBy, readNamedTsv, tsvFile) import GEODE.Metadata (Contrastive(..), Entry, Has(..), MultiText(..), PrimaryKey, type(@)(..), indexBy, readNamedTsv, tsvFile)
import System.Environment (getArgs) import System.Environment (getArgs)
import System.Exit (die) import System.Exit (die)
import System.Script (syntax) import System.Script (syntax)
merge :: Vector (PrimaryKey @ Entry) -> Vector (PrimaryKey @ Contrastive) -> [PrimaryKey @ Entry @ Contrastive] leftJoin :: forall a b c t. (Foldable t, Functor t, Has PrimaryKey a, Has PrimaryKey b) =>
merge left right = catMaybes (mergeRow <$> toList right) (a -> Maybe b -> c) -> t a -> t b -> t c
leftJoin f left right = outputRow <$> left
where where
indexed = get <$> indexBy get left indexed :: Map PrimaryKey b
mergeRow (pK :@: contrastive) = indexed = indexBy get right
(\entry -> (pK :@: entry :@: contrastive)) <$> (indexed !? pK) outputRow leftLine =
f leftLine (indexed !? get leftLine)
rightJoin :: (Foldable t, Functor t, Has PrimaryKey a, Has PrimaryKey b) =>
(Maybe a -> b -> c) -> t a -> t b -> t c
rightJoin f left right = leftJoin (flip f) right left
join :: forall a b c t. (Foldable t, Functor t, Has PrimaryKey a, Has PrimaryKey b) =>
(a -> b -> c) -> t a -> t b -> [c]
join f left right = catMaybes . toList $ outputRow <$> left
where
indexed :: Map PrimaryKey b
indexed = indexBy get right
outputRow leftLine =
f leftLine <$> (indexed !? get leftLine)
merge :: (PrimaryKey @ Entry) -> (PrimaryKey @ Contrastive) -> (PrimaryKey @ Entry @ Contrastive)
merge (pK :@: e) (_ :@: c) = pK :@: e :@: c
tag :: Text -> Maybe PrimaryKey -> (PrimaryKey @ Entry @ Contrastive) -> (PrimaryKey @ Entry @ Contrastive)
tag _ Nothing l = l
tag name (Just _) (pK :@: e :@: contr) =
pK :@: e :@: contr { subCorpus = MultiText (name : subCorpora) }
where
MultiText subCorpora = subCorpus contr
main :: IO () main :: IO ()
main = getArgs >>= run main = getArgs >>= run
where where
run [left, right, output] = run [left, right, output] =
runExceptT (merge runExceptT ((join merge)
<$> ExceptT (readNamedTsv left)
<*> ExceptT (readNamedTsv right) )
>>= either die (tsvFile output)
run [name, left, right, output] =
runExceptT ((rightJoin (tag $ pack name))
<$> ExceptT (readNamedTsv left) <$> ExceptT (readNamedTsv left)
<*> ExceptT (readNamedTsv right) ) <*> ExceptT (readNamedTsv right) )
>>= either die (tsvFile output) >>= either die (tsvFile output)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment