Skip to content
Snippets Groups Projects
Commit 09f7f057 authored by Alice Brenon's avatar Alice Brenon
Browse files

Implement strictTEI filtering

parent d786c741
No related branches found
No related tags found
No related merge requests found
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.TEIWA.Source (
Format
, Origin(..)
......@@ -15,7 +16,7 @@ module Text.TEIWA.Source (
import Control.Monad.Except (MonadError(..))
import Control.Monad.Reader (MonadReader(..), ReaderT(..))
import Control.Monad.IO.Class (MonadIO(..))
import Data.Text.Lazy as Text (Text, unpack)
import Data.Text.Lazy as Text (Text, toLower, unpack)
import Data.Text.Lazy.IO as Text (readFile)
import Text.Parsec (ParsecT, SourceName, runParserT)
import Text.TEIWA.Annotation (
......@@ -23,20 +24,30 @@ import Text.TEIWA.Annotation (
)
import Text.TEIWA.Config (Config(..))
import Text.TEIWA.Error (Error(..))
import Text.TEIWA.Source.Common (AnnotationContext(..), Row)
import Text.TEIWA.Source.Common (AnnotationContext(..), Field, Row)
import qualified Text.TEIWA.Source.ConLLX as ConLLX (getContext, sentences)
import qualified Text.TEIWA.Source.SSV as SSV (body, getContext)
type TEIWAParser = ParsecT Text () (ReaderT Config (Either Error))
type Format = TEIWAParser Annotation
isTEI :: Field -> Bool
isTEI = (`elem` ["pos", "lemma", "msd"])
filterAttributes :: MonadReader Config m => [(Field, Field)] -> m [(Field, Field)]
filterAttributes attributes = reader $ filterIf . strictTEI
where
filterIf False = attributes
filterIf True =
[(k', v) | (k, v) <- attributes, let k' = Text.toLower k, isTEI k']
annotateToken :: (MonadError Error m, MonadReader Config m) =>
AnnotationContext -> Row -> m TokenAnnotation
annotateToken (AnnotationContext {columnIndex, columnName, header}) (atLine, record) =
case splitAt columnIndex record of
(_, []) -> throwError . MissingColumn atLine $ Text.unpack columnName
(before, form:after) ->
return $ TokenAnnotation {form, annotated = zip header (before ++ after)}
TokenAnnotation form <$> filterAttributes (zip header $ before ++ after)
coNLLX :: Format
coNLLX = do
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment