Newer
Older
#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib"
{-# LANGUAGE ExplicitNamespaces #-}
module Main where
import Data.Char as Text (toLower)
import Data.Text as Text (map)
import GEODE.Metadata
( Entry(..), Has(..), PrimaryKey(..), type(@), groupBy, readNamedTsv, tsvFile )
import System.Environment (getArgs)
import System.Exit (die)
import System.Script (syntax)
type Line = PrimaryKey @ Entry
findPairs :: Foldable t => Bool -> t Line -> [Line]
findPairs caseInsensitive =
concatMap snd . filter isPair . groupBy (normalize . headWord . get)
where
isPair = oneInEach . groupBy (book.get) . snd
oneInEach = (&&) <$> bothBooks <*> oneByBook
bothBooks = ((2 ==) . length)
oneByBook = all ((1 ==) . length . snd)
normalize
| caseInsensitive = Text.map toLower
| otherwise = id
main :: IO ()
main = getArgs >>= popCaseInsensitive run
where
run caseInsensitive [input, output] = readNamedTsv input
>>= either
die
(tsvFile output . findPairs caseInsensitive)
run _ _ = syntax "[-i] SOURCE_METADATA.tsv TARGET_METADATA.tsv"
popCaseInsensitive f ("-i":args) = f True args
popCaseInsensitive f args = f False args