Skip to content
Snippets Groups Projects
fix-tsv.hs 910 B
#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib"
{-# LANGUAGE OverloadedStrings #-}

import Data.ByteString as BS (toStrict)
import Data.Csv (EncodeOptions(..), encodeWith, defaultEncodeOptions)
import Data.Text as Text (Text, splitOn)
import Data.Text.Encoding as Text (decodeUtf8)
import System.Environment (getArgs)
import System.Exit (die)
import System.Script (syntax)
import Text.Filter (xargs)

fixTsv :: [Text] -> IO Text
fixTsv = fmap toTsv . mapM fixLine
  where
    toTsv = decodeUtf8 . BS.toStrict . encodeWith
              (defaultEncodeOptions { encDelimiter = toEnum $ fromEnum '\t' })
    fixLine = escapeFormLemma . Text.splitOn "\t"
    escapeFormLemma [n, form, lemma, pos, ene] = pure (n, form, lemma, pos, ene)
    escapeFormLemma l = die $ show l

main :: IO ()
main = getArgs >>= run
  where
    run [target] = xargs fixTsv target
    run _ = syntax "TARGET_DIRECTORY"