From 09f7f0576290aab3ceae2e374de33908e170dd93 Mon Sep 17 00:00:00 2001
From: Alice BRENON <alice.brenon@ens-lyon.fr>
Date: Thu, 18 Mar 2021 10:00:26 +0100
Subject: [PATCH] Implement strictTEI filtering

---
 lib/Text/TEIWA/Source.hs | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/lib/Text/TEIWA/Source.hs b/lib/Text/TEIWA/Source.hs
index b4664db..48b6fb9 100644
--- a/lib/Text/TEIWA/Source.hs
+++ b/lib/Text/TEIWA/Source.hs
@@ -1,5 +1,6 @@
 {-# LANGUAGE NamedFieldPuns #-}
 {-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE OverloadedStrings #-}
 module Text.TEIWA.Source (
       Format
     , Origin(..)
@@ -15,7 +16,7 @@ module Text.TEIWA.Source (
 import Control.Monad.Except (MonadError(..))
 import Control.Monad.Reader (MonadReader(..), ReaderT(..))
 import Control.Monad.IO.Class (MonadIO(..))
-import Data.Text.Lazy as Text (Text, unpack)
+import Data.Text.Lazy as Text (Text, toLower, unpack)
 import Data.Text.Lazy.IO as Text (readFile)
 import Text.Parsec (ParsecT, SourceName, runParserT)
 import Text.TEIWA.Annotation (
@@ -23,20 +24,30 @@ import Text.TEIWA.Annotation (
   )
 import Text.TEIWA.Config (Config(..))
 import Text.TEIWA.Error (Error(..))
-import Text.TEIWA.Source.Common (AnnotationContext(..), Row)
+import Text.TEIWA.Source.Common (AnnotationContext(..), Field, Row)
 import qualified Text.TEIWA.Source.ConLLX as ConLLX (getContext, sentences)
 import qualified Text.TEIWA.Source.SSV as SSV (body, getContext)
 
 type TEIWAParser = ParsecT Text () (ReaderT Config (Either Error))
 type Format = TEIWAParser Annotation
 
+isTEI :: Field -> Bool
+isTEI = (`elem` ["pos", "lemma", "msd"])
+
+filterAttributes :: MonadReader Config m => [(Field, Field)] -> m [(Field, Field)]
+filterAttributes attributes = reader $ filterIf . strictTEI
+  where
+    filterIf False = attributes
+    filterIf True =
+      [(k', v) | (k, v) <- attributes, let k' = Text.toLower k, isTEI k']
+
 annotateToken :: (MonadError Error m, MonadReader Config m) =>
   AnnotationContext -> Row -> m TokenAnnotation
 annotateToken (AnnotationContext {columnIndex, columnName, header}) (atLine, record) =
   case splitAt columnIndex record of
     (_, []) -> throwError . MissingColumn atLine $ Text.unpack columnName
     (before, form:after) ->
-      return $ TokenAnnotation {form, annotated = zip header (before ++ after)}
+      TokenAnnotation form <$> filterAttributes (zip header $ before ++ after)
 
 coNLLX :: Format
 coNLLX = do
-- 
GitLab