Skip to content
Snippets Groups Projects
Commit 09f7f057 authored by Alice Brenon's avatar Alice Brenon
Browse files

Implement strictTEI filtering

parent d786c741
No related branches found
No related tags found
No related merge requests found
{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.TEIWA.Source ( module Text.TEIWA.Source (
Format Format
, Origin(..) , Origin(..)
...@@ -15,7 +16,7 @@ module Text.TEIWA.Source ( ...@@ -15,7 +16,7 @@ module Text.TEIWA.Source (
import Control.Monad.Except (MonadError(..)) import Control.Monad.Except (MonadError(..))
import Control.Monad.Reader (MonadReader(..), ReaderT(..)) import Control.Monad.Reader (MonadReader(..), ReaderT(..))
import Control.Monad.IO.Class (MonadIO(..)) import Control.Monad.IO.Class (MonadIO(..))
import Data.Text.Lazy as Text (Text, unpack) import Data.Text.Lazy as Text (Text, toLower, unpack)
import Data.Text.Lazy.IO as Text (readFile) import Data.Text.Lazy.IO as Text (readFile)
import Text.Parsec (ParsecT, SourceName, runParserT) import Text.Parsec (ParsecT, SourceName, runParserT)
import Text.TEIWA.Annotation ( import Text.TEIWA.Annotation (
...@@ -23,20 +24,30 @@ import Text.TEIWA.Annotation ( ...@@ -23,20 +24,30 @@ import Text.TEIWA.Annotation (
) )
import Text.TEIWA.Config (Config(..)) import Text.TEIWA.Config (Config(..))
import Text.TEIWA.Error (Error(..)) import Text.TEIWA.Error (Error(..))
import Text.TEIWA.Source.Common (AnnotationContext(..), Row) import Text.TEIWA.Source.Common (AnnotationContext(..), Field, Row)
import qualified Text.TEIWA.Source.ConLLX as ConLLX (getContext, sentences) import qualified Text.TEIWA.Source.ConLLX as ConLLX (getContext, sentences)
import qualified Text.TEIWA.Source.SSV as SSV (body, getContext) import qualified Text.TEIWA.Source.SSV as SSV (body, getContext)
type TEIWAParser = ParsecT Text () (ReaderT Config (Either Error)) type TEIWAParser = ParsecT Text () (ReaderT Config (Either Error))
type Format = TEIWAParser Annotation type Format = TEIWAParser Annotation
isTEI :: Field -> Bool
isTEI = (`elem` ["pos", "lemma", "msd"])
filterAttributes :: MonadReader Config m => [(Field, Field)] -> m [(Field, Field)]
filterAttributes attributes = reader $ filterIf . strictTEI
where
filterIf False = attributes
filterIf True =
[(k', v) | (k, v) <- attributes, let k' = Text.toLower k, isTEI k']
annotateToken :: (MonadError Error m, MonadReader Config m) => annotateToken :: (MonadError Error m, MonadReader Config m) =>
AnnotationContext -> Row -> m TokenAnnotation AnnotationContext -> Row -> m TokenAnnotation
annotateToken (AnnotationContext {columnIndex, columnName, header}) (atLine, record) = annotateToken (AnnotationContext {columnIndex, columnName, header}) (atLine, record) =
case splitAt columnIndex record of case splitAt columnIndex record of
(_, []) -> throwError . MissingColumn atLine $ Text.unpack columnName (_, []) -> throwError . MissingColumn atLine $ Text.unpack columnName
(before, form:after) -> (before, form:after) ->
return $ TokenAnnotation {form, annotated = zip header (before ++ after)} TokenAnnotation form <$> filterAttributes (zip header $ before ++ after)
coNLLX :: Format coNLLX :: Format
coNLLX = do coNLLX = do
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment