{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.TEIWA.Source (
      Format
    , Origin(..)
    , Source(..)
    , coNLLX
    , csv
    , runTEIWAParser
    , parse
    , tsv
  ) where

import Control.Monad.Except (MonadError(..))
import Control.Monad.Reader (MonadReader(..), ReaderT(..))
import Control.Monad.IO.Class (MonadIO(..))
import Data.Text.Lazy as Text (Text, toLower, unpack)
import Data.Text.Lazy.IO as Text (readFile)
import Text.Parsec (SourceName, runParserT)
import Text.TEIWA.Annotation (
    Annotation(..), Attributes, SentenceAnnotation(..), TokenAnnotation(..)
  )
import Text.TEIWA.Config (Config(..))
import Text.TEIWA.Error (Error(..))
import Text.TEIWA.Source.Common (
    AnnotationContext(..), Field, Row, TEIWAParser, annotationContext
  )
import qualified Text.TEIWA.Source.CoNLLX as CoNLLX (header, sentences)
import qualified Text.TEIWA.Source.SSV as SSV (body, header)

type Format = TEIWAParser Annotation

isTEI :: Text -> Bool
isTEI = (`elem` ["pos", "lemma", "msd"])

filterAttributes :: MonadReader Config m => [(Text, Field)] -> m Attributes
filterAttributes attributes = reader $ (attributes >>=) . filterIf . strictTEI
  where
    filterIf _ ("", _) = []
    filterIf _ (_, Nothing) = []
    filterIf False (k, Just v) = [(k, v)]
    filterIf True (k, Just v) =
      let k' = Text.toLower k in if isTEI k' then [(k', v)] else []

annotateToken :: (MonadError Error m, MonadReader Config m) =>
  AnnotationContext -> Row -> m TokenAnnotation
annotateToken (AnnotationContext {columnIndex, columnName, header}) (atLine, record) =
  case splitAt columnIndex record of
    (before, (Just form):after) ->
      TokenAnnotation form <$> filterAttributes (zip header $ before ++ after)
    _ -> throwError . MissingColumn atLine $ Text.unpack columnName

coNLLX :: Format
coNLLX = do
  context <- annotationContext CoNLLX.header
  SentenceLevel <$> (
      CoNLLX.sentences >>= mapM (
          fmap SentenceAnnotation . mapM (annotateToken context)
        )
    )

ssv :: Char -> Format
ssv separator = do
  context <- annotationContext =<< SSV.header separator
  TokenLevel <$> (SSV.body separator >>= mapM (annotateToken context))

csv :: Format
csv = ssv ','

tsv :: Format
tsv = ssv '\t'

data Origin = File FilePath | Text Text

data Source = Source {
      format :: Format
    , origin :: Origin
  }

runTEIWAParser :: MonadError Error m =>
  Config -> TEIWAParser a -> SourceName -> Text -> m a
runTEIWAParser config p s = flattenErrors . (`runReaderT` config) . runParserT p () s
  where
    flattenErrors = either throwError (either (throwError . ParsingError) pure)

parse :: (MonadIO m, MonadError Error m) =>
  Config -> Source -> m Annotation
parse config (Source {format, origin}) = parseFrom format origin
  where
    parseFrom p (File f) = liftIO (Text.readFile f) >>= runTEIWAParser config p f
    parseFrom p (Text t) = runTEIWAParser config p "" t