From 9081ea7905af7a5080d495655f8c56a8f8fdc3f9 Mon Sep 17 00:00:00 2001
From: Alice BRENON <alice.brenon@ens-lyon.fr>
Date: Fri, 20 Aug 2021 16:37:28 +0200
Subject: [PATCH] A little bit of gardening: splitting a module growing a
 little bit too bushy, regrouping some conceptually linked operations

---
 app/Main.hs                          |   7 +-
 lib/Text/TEIWA.hs                    |  14 +---
 lib/Text/TEIWA/Annotation.hs         | 101 +++++++++------------------
 lib/Text/TEIWA/Annotation/Context.hs |  44 ++++++++++++
 lib/Text/TEIWA/Annotation/Data.hs    |  31 ++++++++
 lib/Text/TEIWA/Annotation/Editor.hs  |  55 +++++++++++++++
 lib/Text/TEIWA/Source.hs             |  67 +++---------------
 lib/Text/TEIWA/Source/CoNLLX.hs      |  30 ++++----
 lib/Text/TEIWA/Source/Common.hs      |  62 +++++++---------
 lib/Text/TEIWA/Source/SSV.hs         |  11 ++-
 lib/Text/TEIWA/Source/WebAnno.hs     |  34 ++++-----
 teiwa.cabal                          |   3 +
 12 files changed, 251 insertions(+), 208 deletions(-)
 create mode 100644 lib/Text/TEIWA/Annotation/Context.hs
 create mode 100644 lib/Text/TEIWA/Annotation/Data.hs
 create mode 100644 lib/Text/TEIWA/Annotation/Editor.hs

diff --git a/app/Main.hs b/app/Main.hs
index 6e78398..e9f5746 100644
--- a/app/Main.hs
+++ b/app/Main.hs
@@ -2,10 +2,13 @@
 module Main where
 
 import CLI (Command(..), getCommand)
+import Control.Applicative ((<|>))
 import Control.Monad.Except (ExceptT(..), runExceptT)
 import Data.Text.Lazy as Text (Text)
 import Data.Text.Lazy.IO as Text (getContents, putStr)
-import Text.TEIWA (Error, Origin(..), Source(..), annotateWith, coNLLX, csv, tsv)
+import Text.TEIWA (
+    Error, Origin(..), Source(..), annotateWith, coNLLX, csv, tsv, webAnno
+  )
 import System.FilePath (takeExtension)
 import System.Exit (die)
 
@@ -16,7 +19,7 @@ annotator (Command {annotationsFile, config}) =
     extension = takeExtension annotationsFile
     format
       | extension == ".csv" = csv
-      | extension == ".tsv" = tsv
+      | extension == ".tsv" = webAnno <|> tsv
       | otherwise = coNLLX
 
 main :: IO ()
diff --git a/lib/Text/TEIWA.hs b/lib/Text/TEIWA.hs
index d4ad823..0a6b212 100644
--- a/lib/Text/TEIWA.hs
+++ b/lib/Text/TEIWA.hs
@@ -6,9 +6,7 @@ module Text.TEIWA (
     , module Source
     , annotate
     , annotateWith
-    , fromCSV
-    , fromCoNLLX
-    , fromWebAnno
+    , from
   ) where
 
 import Control.Monad.Except (MonadError(..))
@@ -30,11 +28,5 @@ annotateWith config source input = do
 annotate :: (MonadError Error m, MonadIO m) => Source -> Filter m
 annotate = annotateWith defaultConfig
 
-fromCSV :: (MonadError Error m, MonadIO m) => Origin -> Filter m
-fromCSV = annotate . Source csv
-
-fromCoNLLX :: (MonadError Error m, MonadIO m) => Origin -> Filter m
-fromCoNLLX = annotate . Source coNLLX
-
-fromWebAnno :: (MonadError Error m, MonadIO m) => Origin -> Filter m
-fromWebAnno = annotate . Source webAnno
+from :: (MonadError Error m, MonadIO m) => Format -> Origin -> Filter m
+from f = annotate . Source f
diff --git a/lib/Text/TEIWA/Annotation.hs b/lib/Text/TEIWA/Annotation.hs
index 8c1d502..8fba39e 100644
--- a/lib/Text/TEIWA/Annotation.hs
+++ b/lib/Text/TEIWA/Annotation.hs
@@ -1,79 +1,46 @@
-{-# LANGUAGE NamedFieldPuns #-}
-{-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleContexts, NamedFieldPuns, OverloadedStrings #-}
 module Text.TEIWA.Annotation (
-      Annotation(..)
-    , Attributes
-    , Node(..)
-    , Tag(..)
-    , apply
+      apply
+    , s_
+    , tagToken
   ) where
 
 import Control.Monad.Except (MonadError(..))
-import Control.Monad.RWS (RWST, evalRWST, gets, modify, tell)
-import Data.Int (Int64)
-import Data.Text.Lazy as Text (Text, breakOn, concat, drop, length, pack, unpack)
-import Text.Printf (printf)
+import Control.Monad.RWS (MonadReader, evalRWST, reader)
+import Data.Text.Lazy as Text (Text, toLower, unpack)
+import Text.TEIWA.Annotation.Context (Context(..))
+import Text.TEIWA.Annotation.Data (Annotation(..), Node(..), Tag(..))
+import Text.TEIWA.Annotation.Editor (annotateWith, start)
 import Text.TEIWA.Config (Config(..))
 import Text.TEIWA.Error (Error(..))
+import Text.TEIWA.Source.Common (Attributes, Field, Row)
 
-type Attributes = [(Text, Text)]
-
-data Tag = Tag {
-      name :: Text
-    , annotated :: Attributes
-  } deriving Show
-data Node = Node {
-      tag :: Tag
-    , inside :: Annotation
-  } deriving Show
-data Annotation = Token Text | Annotations [Node] deriving Show
-
-attribute :: (Text, Text) -> Text
-attribute (k, v) = Text.concat [" ", k, "=\"", v, "\""]
-
-openTag :: Tag -> Editor m
-openTag (Tag {name, annotated}) = tell . Text.pack $
-  printf "<%s%s>" name . Text.concat $ attribute <$> annotated
-
-closeTag :: Tag -> Editor m
-closeTag = tell . Text.pack $ printf "</%s>" . name
-
-data EditorState = EditorState {
-      input :: Text
-    , tagStack :: [Tag]
-  }
-type Editor m = RWST Config Text EditorState m ()
-
-editStack :: MonadState EditorState m => ([Tag] -> (a, [Tag])) -> m a
-editStack f = state $ \editorState ->
-  let (output, newTagStack) = f $ tagStack editorState in
-  (output, editorState {tagStack = newTagStack})
-
-flushTags :: Monad m => Editor m
-flushTags = editStack (\stack -> (stack, [])) >>= mapM_ openTag
+apply :: MonadError Error m => Config -> Annotation -> Text -> m Text
+apply config annotation =
+  fmap snd . evalRWST (annotateWith annotation) config . start
 
-forget :: Monad m => Int64 -> Editor m
-forget count = modify $
-  \editorState -> editorState {input = Text.drop count $ input editorState}
+isTEI :: Text -> Bool
+isTEI = (`elem` ["pos", "lemma", "msd"])
 
-annotateNode :: MonadError Error m => Node -> Editor m
-annotateNode (Node {tag, inside}) =
-  editStack (\s -> ((), tag:s)) *> annotator inside *> closeTag tag
+s_ :: Tag
+s_ = Tag {name = "s", annotated = []}
 
-annotator :: MonadError Error m => Annotation -> Editor m
-annotator (Token t) = gets (Text.breakOn t . input) >>= wrap
+filterAttributes :: MonadReader Config m => [(Text, Field)] -> m Attributes
+filterAttributes attributes = reader $ (attributes >>=) . filterIf . strictTEI
   where
-    tokenLength = Text.length t
-    wrap (before, after) =
-      let totalLength = Text.length before + tokenLength in
-      if Text.length after >= tokenLength
-      then tell before *> flushTags *> tell t *> forget totalLength
-      else throwError (TermNotFound $ Text.unpack t)
-annotator (Annotations l) = mapM_ annotateNode l
+    filterIf _ ("", _) = []
+    filterIf _ (_, Nothing) = []
+    filterIf False (k, Just v) = [(k, v)]
+    filterIf True (k, Just v) =
+      let k' = Text.toLower k in if isTEI k' then [(k', v)] else []
+
+tagToken :: (MonadError Error m, MonadReader Config m) =>
+  Context -> Row -> m Node
+tagToken (Context {columnIndex, columnName, header, tagger}) (atLine, record) =
+  case splitAt columnIndex record of
+    (before, (Just form):after) -> do
+      annotated <- filterAttributes (zip header $ before ++ after)
+      let name = tagger form annotated
+      pure $ Node {tag = Tag {name, annotated}, inside = Token form}
+    _ -> throwError . MissingColumn atLine $ Text.unpack columnName
 
-apply :: MonadError Error m => Config -> Annotation -> Text -> m Text
-apply config annotation = fmap snd . evalRWST annotate config . start
-  where
-    start input = EditorState {input, tagStack = []}
-    annotate = annotator annotation *> gets input >>= tell
diff --git a/lib/Text/TEIWA/Annotation/Context.hs b/lib/Text/TEIWA/Annotation/Context.hs
new file mode 100644
index 0000000..7a6e44c
--- /dev/null
+++ b/lib/Text/TEIWA/Annotation/Context.hs
@@ -0,0 +1,44 @@
+{-# LANGUAGE FlexibleContexts, NamedFieldPuns, OverloadedStrings #-}
+module Text.TEIWA.Annotation.Context (
+      Context(..)
+    , ofHeader
+  ) where
+
+import Control.Applicative ((<|>))
+import Control.Monad.Except (MonadError(..))
+import Control.Monad.Reader (MonadReader(..))
+import Data.List (findIndex)
+import Data.Text.Lazy as Text (Text, toLower, unpack)
+import Text.Parsec (Column)
+import Text.TEIWA.Config (Config(..))
+import Text.TEIWA.Error (Error(..))
+import Text.TEIWA.Source.Common (Attributes, Header, teiTagger)
+
+data Context = Context {
+      columnIndex :: Column
+    , columnName :: Text
+    , header :: Header
+    , tagger :: Text -> Attributes -> Text
+  }
+
+ofHeader :: (MonadError Error m, MonadReader Config m) =>
+  Header -> m Context
+ofHeader defaultFields = do
+  Config {formColumn, headerOverride} <- ask
+  let headerFields = maybe defaultFields id headerOverride
+  column <- catchNothing ((Right <$> formColumn) <|> findForm headerFields)
+  buildContext column headerFields
+  where
+    findForm = fmap Left . findIndex ((==) "form" . Text.toLower)
+    catchNothing = maybe (throwError NoFormColumn) return
+
+buildContext :: MonadError Error m =>
+  Either Int Text -> Header -> m Context
+buildContext column = build . either splitAt (break . (==)) column
+  where
+    build (_, []) =
+      throwError . NoSuchColumn $ either (\_ -> "form") Text.unpack column
+    build (before, columnName:after) =
+      let columnIndex = length before in
+      let header = (before ++ after) in
+      return $ Context {columnIndex, columnName, header, tagger = teiTagger}
diff --git a/lib/Text/TEIWA/Annotation/Data.hs b/lib/Text/TEIWA/Annotation/Data.hs
new file mode 100644
index 0000000..1599c61
--- /dev/null
+++ b/lib/Text/TEIWA/Annotation/Data.hs
@@ -0,0 +1,31 @@
+{-# LANGUAGE NamedFieldPuns #-}
+module Text.TEIWA.Annotation.Data (
+      Annotation(..)
+    , Format
+    , Node(..)
+    , Tag(..)
+    , openTag
+    , closeTag
+  ) where
+
+import Data.Text.Lazy as Text (Text, concat, pack)
+import Text.TEIWA.Source.Common (Attributes, TEIWAParser, attribute)
+import Text.Printf (printf)
+
+data Tag = Tag {
+      name :: Text
+    , annotated :: Attributes
+  } deriving Show
+data Node = Node {
+      tag :: Tag
+    , inside :: Annotation
+  } deriving Show
+data Annotation = Token Text | Annotations [Node] deriving Show
+type Format = TEIWAParser Annotation
+
+openTag :: Tag -> Text
+openTag (Tag {name, annotated}) =
+  Text.pack . printf "<%s%s>" name . Text.concat $ attribute <$> annotated
+
+closeTag :: Tag -> Text
+closeTag = Text.pack . printf "</%s>" . name
diff --git a/lib/Text/TEIWA/Annotation/Editor.hs b/lib/Text/TEIWA/Annotation/Editor.hs
new file mode 100644
index 0000000..78b2417
--- /dev/null
+++ b/lib/Text/TEIWA/Annotation/Editor.hs
@@ -0,0 +1,55 @@
+{-# LANGUAGE FlexibleContexts, NamedFieldPuns #-}
+module Text.TEIWA.Annotation.Editor (
+      annotateWith
+    , start
+  ) where
+
+import Control.Monad.Except (MonadError(..))
+import Control.Monad.RWS (MonadState, RWST, gets, modify, state, tell)
+import Data.Int (Int64)
+import Data.Text.Lazy as Text (Text, breakOn, drop, length, unpack)
+import Text.TEIWA.Annotation.Data (
+    Annotation(..), Node(..), Tag(..), closeTag, openTag
+  )
+import Text.TEIWA.Config (Config(..))
+import Text.TEIWA.Error (Error(..))
+
+data State = State {
+      input :: Text
+    , tagStack :: [Tag]
+  }
+
+start :: Text -> State
+start input = State {input, tagStack = []}
+
+type Editor m = RWST Config Text State m ()
+
+editStack :: MonadState State m => ([Tag] -> (a, [Tag])) -> m a
+editStack f = state $ \editorState ->
+  let (output, newTagStack) = f $ tagStack editorState in
+  (output, editorState {tagStack = newTagStack})
+
+flushTags :: Monad m => Editor m
+flushTags = editStack (\stack -> (reverse stack, [])) >>= mapM_ (tell . openTag)
+
+forget :: Monad m => Int64 -> Editor m
+forget count = modify $
+  \editorState -> editorState {input = Text.drop count $ input editorState}
+
+annotateNode :: MonadError Error m => Node -> Editor m
+annotateNode (Node {tag, inside}) =
+  editStack (\s -> ((), tag:s)) *> annotator inside *> tell (closeTag tag)
+
+annotator :: MonadError Error m => Annotation -> Editor m
+annotator (Token t) = gets (Text.breakOn t . input) >>= wrap
+  where
+    tokenLength = Text.length t
+    wrap (before, after) =
+      let totalLength = Text.length before + tokenLength in
+      if Text.length after >= tokenLength
+      then tell before *> flushTags *> tell t *> forget totalLength
+      else throwError (TermNotFound $ Text.unpack t)
+annotator (Annotations l) = mapM_ annotateNode l
+
+annotateWith :: MonadError Error m => Annotation -> Editor m
+annotateWith annotation = annotator annotation *> gets input >>= tell
diff --git a/lib/Text/TEIWA/Source.hs b/lib/Text/TEIWA/Source.hs
index d6b9f66..4fcb725 100644
--- a/lib/Text/TEIWA/Source.hs
+++ b/lib/Text/TEIWA/Source.hs
@@ -9,79 +9,30 @@ module Text.TEIWA.Source (
     , csv
     , runTEIWAParser
     , parse
+    , ssv
     , tsv
     , webAnno
   ) where
 
-import Control.Applicative ((<|>))
 import Control.Monad.Except (MonadError(..))
-import Control.Monad.Reader (MonadReader(..), ReaderT(..))
+import Control.Monad.Reader (ReaderT(..))
 import Control.Monad.IO.Class (MonadIO(..))
-import Data.Text.Lazy as Text (Text, toLower, unpack)
+import Data.Text.Lazy as Text (Text)
 import Data.Text.Lazy.IO as Text (readFile)
 import Text.Parsec (SourceName, runParserT)
-import Text.TEIWA.Annotation (
-    Annotation(..), Attributes, SentenceAnnotation(..), TokenAnnotation(..)
-  )
 import Text.TEIWA.Config (Config(..))
 import Text.TEIWA.Error (Error(..))
-import Text.TEIWA.Source.Common (
-    AnnotationContext(..), Field, Row, TEIWAParser, annotationContext
-  )
-import qualified Text.TEIWA.Source.CoNLLX as CoNLLX (header, sentences)
-import qualified Text.TEIWA.Source.SSV as SSV (body, header)
-import qualified Text.TEIWA.Source.WebAnno as WebAnno (header, sentences)
-
-type Format = TEIWAParser Annotation
-
-isTEI :: Text -> Bool
-isTEI = (`elem` ["pos", "lemma", "msd"])
-
-filterAttributes :: MonadReader Config m => [(Text, Field)] -> m Attributes
-filterAttributes attributes = reader $ (attributes >>=) . filterIf . strictTEI
-  where
-    filterIf _ ("", _) = []
-    filterIf _ (_, Nothing) = []
-    filterIf False (k, Just v) = [(k, v)]
-    filterIf True (k, Just v) =
-      let k' = Text.toLower k in if isTEI k' then [(k', v)] else []
-
-annotateToken :: (MonadError Error m, MonadReader Config m) =>
-  AnnotationContext -> Row -> m TokenAnnotation
-annotateToken (AnnotationContext {columnIndex, columnName, header}) (atLine, record) =
-  case splitAt columnIndex record of
-    (before, (Just form):after) ->
-      TokenAnnotation form <$> filterAttributes (zip header $ before ++ after)
-    _ -> throwError . MissingColumn atLine $ Text.unpack columnName
-
-coNLLX :: Format
-coNLLX = do
-  context <- annotationContext CoNLLX.header
-  SentenceLevel <$> (
-      CoNLLX.sentences >>= mapM (
-          fmap SentenceAnnotation . mapM (annotateToken context)
-        )
-    )
-
-ssv :: Char -> Format
-ssv separator = do
-  context <- annotationContext =<< SSV.header separator
-  TokenLevel <$> (SSV.body separator >>= mapM (annotateToken context))
+import Text.TEIWA.Annotation.Data (Annotation, Format)
+import Text.TEIWA.Source.Common (TEIWAParser)
+import Text.TEIWA.Source.CoNLLX (coNLLX)
+import Text.TEIWA.Source.SSV (ssv)
+import Text.TEIWA.Source.WebAnno (webAnno)
 
 csv :: Format
 csv = ssv ','
 
 tsv :: Format
-tsv = webAnno <|> ssv '\t'
-
-webAnno :: Format
-webAnno = do
-  context <- annotationContext =<< WebAnno.header
-  SentenceLevel <$> (
-      WebAnno.sentences >>= mapM (
-          fmap SentenceAnnotation . mapM (annotateToken context)
-        )
-    )
+tsv = ssv '\t'
 
 data Origin = File FilePath | Text Text
 
diff --git a/lib/Text/TEIWA/Source/CoNLLX.hs b/lib/Text/TEIWA/Source/CoNLLX.hs
index 0a7f957..28b4b49 100644
--- a/lib/Text/TEIWA/Source/CoNLLX.hs
+++ b/lib/Text/TEIWA/Source/CoNLLX.hs
@@ -1,14 +1,15 @@
-{-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleContexts, OverloadedStrings #-}
 module Text.TEIWA.Source.CoNLLX (
-      header
-    , sentences
+    coNLLX
   ) where
 
 import Control.Applicative ((<|>), many)
 import Data.Text.Lazy as Text (concat, pack)
 import Text.Parsec (ParsecT, Stream, char, digit, many1, noneOf, string, try)
-import Text.TEIWA.Source.Common (Field, Header, Row, eol, recordLine)
+import Text.TEIWA.Annotation (s_, tagToken)
+import Text.TEIWA.Annotation.Data (Annotation(..), Format, Node(..))
+import Text.TEIWA.Annotation.Context as Context (ofHeader)
+import Text.TEIWA.Source.Common (Field, Header, Row, eol, recordLine, sentences)
 
 header :: Header
 header = [
@@ -28,7 +29,7 @@ field :: Stream s m Char => ParsecT s u m Field
 field = build <$> many1 (noneOf "\t\n\r")
   where
     build "_" = Nothing
-    build s = Just $ Text.pack s
+    build other = Just $ Text.pack other
 
 type Range = (Int, Int)
 
@@ -54,12 +55,11 @@ row = recordLine (rowID >>= either range singleLine)
     combineFields f@(Just _) Nothing = f
     combineFields (Just a) (Just b) = Just $ Text.concat [a, "+", b]
 
-type Sentence = [Row]
-
-sentence :: Stream s m Char => ParsecT s u m Sentence
-sentence = many comment *> many1 row
-  where
-    comment = char '#' *> many (noneOf "\r\n") <* eol
-
-sentences :: Stream s m Char => ParsecT s u m [Sentence]
-sentences = many (sentence <* many1 eol)
+coNLLX :: Format
+coNLLX = do
+  context <- Context.ofHeader header
+  Annotations <$> (
+      sentences row >>= mapM (
+          fmap (Node s_ . Annotations) . mapM (tagToken context)
+        )
+    )
diff --git a/lib/Text/TEIWA/Source/Common.hs b/lib/Text/TEIWA/Source/Common.hs
index 122da7e..787ac02 100644
--- a/lib/Text/TEIWA/Source/Common.hs
+++ b/lib/Text/TEIWA/Source/Common.hs
@@ -1,38 +1,33 @@
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE NamedFieldPuns #-}
-{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE FlexibleContexts, OverloadedStrings #-}
 module Text.TEIWA.Source.Common (
-      AnnotationContext(..)
+      Attributes
     , Field
     , Header
     , Row
     , TEIWAParser
-    , annotationContext
+    , attribute
     , eol
     , recordLine
+    , sentences
+    , teiTagger
   ) where
 
-import Control.Applicative ((<|>))
-import Control.Monad.Except (MonadError(..))
-import Control.Monad.Reader (MonadReader(..), ReaderT)
-import Data.List (findIndex)
-import Data.Text.Lazy as Text (Text, toLower, unpack)
+import Control.Applicative ((<|>), many)
+import Control.Monad.Reader (ReaderT)
+import Data.Char (isPunctuation)
+import Data.Text.Lazy as Text (Text, concat, head, length)
 import Text.Parsec (
-      Column, Line, ParsecT, Stream, char, endOfLine, getParserState, sourceLine, statePos
-    , try
+      Line, ParsecT, Stream, char, endOfLine, getParserState, many1, noneOf
+    , sourceLine, statePos, try
   )
 import Text.TEIWA.Config (Config(..))
 import Text.TEIWA.Error (Error(..))
 
-data AnnotationContext = AnnotationContext {
-      columnIndex :: Column
-    , columnName :: Text
-    , header :: Header
-  }
-
 type Field = Maybe Text
 type Header = [Text]
 type Row = (Line, [Field])
+type Sentence = [Row]
+type Attributes = [(Text, Text)]
 
 type TEIWAParser = ParsecT Text () (ReaderT Config (Either Error))
 
@@ -44,23 +39,18 @@ recordLine p = (,) <$> currentLine <*> p
   where
     currentLine = sourceLine . statePos <$> getParserState
 
-annotationContext :: Header -> TEIWAParser AnnotationContext
-annotationContext defaultFields = do
-  Config {formColumn, headerOverride} <- ask
-  let headerFields = maybe defaultFields id headerOverride
-  column <- catchNothing ((Right <$> formColumn) <|> findForm headerFields)
-  buildContext column headerFields
-  where
-    findForm = fmap Left . findIndex ((==) "form" . Text.toLower)
-    catchNothing = maybe (throwError NoFormColumn) return
+attribute :: (Text, Text) -> Text
+attribute (k, v) = Text.concat [" ", k, "=\"", v, "\""]
 
-buildContext :: MonadError Error m =>
-  Either Int Text -> Header -> m AnnotationContext
-buildContext column = build . either splitAt (break . (==)) column
+sentence :: Stream s m Char => ParsecT s u m Row -> ParsecT s u m Sentence
+sentence row = many comment *> many1 row
   where
-    build (_, []) =
-      throwError . NoSuchColumn $ either (\_ -> "form") Text.unpack column
-    build (before, columnName:after) =
-      let columnIndex = length before in
-      let header = (before ++ after) in
-      return $ AnnotationContext {columnIndex, columnName, header}
+    comment = char '#' *> many (noneOf "\r\n") <* eol
+
+sentences :: Stream s m Char => ParsecT s u m Row -> ParsecT s u m [Sentence]
+sentences row = many (many1 eol *> sentence row)
+
+teiTagger :: Text -> Attributes -> Text
+teiTagger t _
+  | Text.length t == 1 && isPunctuation (Text.head t) = "pc"
+  | otherwise = "w"
diff --git a/lib/Text/TEIWA/Source/SSV.hs b/lib/Text/TEIWA/Source/SSV.hs
index b973771..ed5d3e0 100644
--- a/lib/Text/TEIWA/Source/SSV.hs
+++ b/lib/Text/TEIWA/Source/SSV.hs
@@ -1,7 +1,6 @@
 {-# LANGUAGE FlexibleContexts #-}
 module Text.TEIWA.Source.SSV (
-      body
-    , header
+    ssv
   ) where
 
 import Control.Applicative ((<|>), many)
@@ -9,6 +8,9 @@ import Control.Monad.Except (MonadError(..))
 import Data.Maybe (catMaybes)
 import Data.Text.Lazy as Text (pack)
 import Text.Parsec (ParsecT, Stream, between, char, noneOf, sepBy, string, try)
+import Text.TEIWA.Annotation (tagToken)
+import Text.TEIWA.Annotation.Context as Context (ofHeader)
+import Text.TEIWA.Annotation.Data (Annotation(..), Format)
 import Text.TEIWA.Source.Common (Field, Header, Row, TEIWAParser, eol, recordLine)
 import Text.TEIWA.Error (Error(..))
 
@@ -33,3 +35,8 @@ header separator = fields separator >>= ensureNotEmpty . catMaybes
 
 body :: Stream s m Char => Char -> ParsecT s u m [Row]
 body = many . recordLine . fields
+
+ssv :: Char -> Format
+ssv separator = do
+  context <- Context.ofHeader =<< header separator
+  Annotations <$> (mapM (tagToken context) =<< body separator)
diff --git a/lib/Text/TEIWA/Source/WebAnno.hs b/lib/Text/TEIWA/Source/WebAnno.hs
index a20a2a9..ed75a3e 100644
--- a/lib/Text/TEIWA/Source/WebAnno.hs
+++ b/lib/Text/TEIWA/Source/WebAnno.hs
@@ -1,8 +1,6 @@
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE FlexibleContexts, OverloadedStrings #-}
 module Text.TEIWA.Source.WebAnno (
-      header
-    , sentences
+    webAnno
   ) where
 
 import Control.Applicative (many)
@@ -10,8 +8,11 @@ import Data.Text.Lazy as Text (pack)
 import Text.Parsec (
     ParsecT, Stream, char, digit, many1, noneOf, sepBy1, skipMany1, string
   )
+import Text.TEIWA.Annotation (s_, tagToken)
+import Text.TEIWA.Annotation.Context as Context (ofHeader)
+import Text.TEIWA.Annotation.Data (Annotation(..), Format, Node(..))
 import Text.TEIWA.Source.Common (
-    Field, Header, Row, TEIWAParser, eol, recordLine
+    Field, Header, Row, TEIWAParser, eol, recordLine, sentences
   )
 
 header :: TEIWAParser Header
@@ -32,20 +33,19 @@ field :: Stream s m Char => ParsecT s u m Field
 field = build <$> many1 (noneOf "\t\n\r")
   where
     build "_" = Nothing
-    build s = Just . Text.pack $ unescape s
+    build other = Just . Text.pack $ unescape other
     unescape "" = ""
-    unescape ('\\':c:s) = c:unescape s
-    unescape (c:s) = c:unescape s
+    unescape ('\\':c:cs) = c:unescape cs
+    unescape (c:cs) = c:unescape cs
 
 row :: Stream s m Char => ParsecT s u m Row
 row = recordLine (many1 (field <* char '\t') <* eol)
 
-type Sentence = [Row]
-
-sentence :: Stream s m Char => ParsecT s u m Sentence
-sentence = many comment *> many1 row
-  where
-    comment = char '#' *> many (noneOf "\r\n") <* eol
-
-sentences :: Stream s m Char => ParsecT s u m [Sentence]
-sentences = many (many1 eol *> sentence)
+webAnno :: Format
+webAnno = do
+  context <- Context.ofHeader =<< header
+  Annotations <$> (
+      sentences row >>= mapM (
+          fmap (Node s_ . Annotations) . mapM (tagToken context)
+        )
+    )
diff --git a/teiwa.cabal b/teiwa.cabal
index bfd3856..ded3950 100644
--- a/teiwa.cabal
+++ b/teiwa.cabal
@@ -25,6 +25,9 @@ extra-source-files:  CHANGELOG.md
 library
   exposed-modules:     Text.TEIWA
   other-modules:       Text.TEIWA.Annotation
+                     , Text.TEIWA.Annotation.Context
+                     , Text.TEIWA.Annotation.Data
+                     , Text.TEIWA.Annotation.Editor
                      , Text.TEIWA.Config
                      , Text.TEIWA.Error
                      , Text.TEIWA.Source
-- 
GitLab