-
Alice Brenon authoredf8b70bc4
fix-tsv.hs 910 B
#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib"
{-# LANGUAGE OverloadedStrings #-}
import Data.ByteString as BS (toStrict)
import Data.Csv (EncodeOptions(..), encodeWith, defaultEncodeOptions)
import Data.Text as Text (Text, splitOn)
import Data.Text.Encoding as Text (decodeUtf8)
import System.Environment (getArgs)
import System.Exit (die)
import System.Script (syntax)
import Text.Filter (xargs)
fixTsv :: [Text] -> IO Text
fixTsv = fmap toTsv . mapM fixLine
where
toTsv = decodeUtf8 . BS.toStrict . encodeWith
(defaultEncodeOptions { encDelimiter = toEnum $ fromEnum '\t' })
fixLine = escapeFormLemma . Text.splitOn "\t"
escapeFormLemma [n, form, lemma, pos, ene] = pure (n, form, lemma, pos, ene)
escapeFormLemma l = die $ show l
main :: IO ()
main = getArgs >>= run
where
run [target] = xargs fixTsv target
run _ = syntax "TARGET_DIRECTORY"