Skip to content
Snippets Groups Projects
parallel-links.hs 1.18 KiB
Newer Older
#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib"
{-# LANGUAGE ExplicitNamespaces #-}
module Main where

import Data.Char as Text (toLower)
import Data.Text as Text (map)
import GEODE.Metadata
  ( Entry(..), Has(..), PrimaryKey(..), type(@), groupBy, readNamedTsv, tsvFile )
import System.Environment (getArgs)
import System.Exit (die)
import System.Script (syntax)

type Line = PrimaryKey @ Entry

findPairs :: Foldable t => Bool -> t Line -> [Line]
findPairs caseInsensitive =
  concatMap snd . filter isPair . groupBy (normalize . headWord . get)
  where
    isPair = oneInEach . groupBy (book.get) . snd
    oneInEach = (&&) <$> bothBooks <*> oneByBook
    bothBooks = ((2 ==) . length)
    oneByBook = all ((1 ==) . length . snd)
    normalize
      | caseInsensitive = Text.map toLower
      | otherwise = id

main :: IO ()
main = getArgs >>= popCaseInsensitive run
  where
    run caseInsensitive [input, output] = readNamedTsv input
      >>= either
            die
            (tsvFile output . findPairs caseInsensitive)
    run _ _ = syntax "[-i] SOURCE_METADATA.tsv TARGET_METADATA.tsv"
    popCaseInsensitive f ("-i":args) = f True args
    popCaseInsensitive f args = f False args