-
Alice Brenon authored
Generalize Annotations to accomodate arbitrary tags (and not only <s/>, <w/> and <pc/>) on arbitrarily nested structure
97c90f5e
Annotation.hs 2.46 KiB
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
module Text.TEIWA.Annotation (
Annotation(..)
, Attributes
, Node(..)
, Tag(..)
, apply
) where
import Control.Monad.Except (MonadError(..))
import Control.Monad.RWS (RWST, evalRWST, gets, modify, tell)
import Data.Int (Int64)
import Data.Text.Lazy as Text (Text, breakOn, concat, drop, length, pack, unpack)
import Text.Printf (printf)
import Text.TEIWA.Config (Config(..))
import Text.TEIWA.Error (Error(..))
type Attributes = [(Text, Text)]
data Tag = Tag {
name :: Text
, annotated :: Attributes
} deriving Show
data Node = Node {
tag :: Tag
, inside :: Annotation
} deriving Show
data Annotation = Token Text | Annotations [Node] deriving Show
attribute :: (Text, Text) -> Text
attribute (k, v) = Text.concat [" ", k, "=\"", v, "\""]
openTag :: Tag -> String
openTag (Tag {name, annotated}) =
printf "<%s%s>" name . Text.concat $ attribute <$> annotated
closeTag :: Tag -> String
closeTag = printf "</%s>" . name
data EditorState = EditorState {
input :: Text
, tagStack :: [Tag]
}
type Editor m = RWST Config Text EditorState m ()
enter :: Monad m => Tag -> Editor m
enter t = modify $
\editorState -> editorState {tagStack = t:tagStack editorState}
flushTags :: Monad m => Editor m
flushTags = do
tags <- gets tagStack
modify $ \editorState -> editorState {tagStack = []}
mapM_ (tell . Text.pack . openTag) tags
forget :: Monad m => Int64 -> Editor m
forget count = modify $
\editorState -> editorState {input = Text.drop count $ input editorState}
annotateNode :: MonadError Error m => Node -> Editor m
annotateNode (Node {tag, inside}) =
enter tag *> annotator inside *> tell (Text.pack $ closeTag tag)
annotator :: MonadError Error m => Annotation -> Editor m
annotator (Token t) = gets (Text.breakOn t . input) >>= wrap
where
tokenLength = Text.length t
wrap (before, after) =
let totalLength = Text.length before + tokenLength in
if Text.length after >= tokenLength
then tell before *> flushTags *> tell t *> forget totalLength
else throwError (TermNotFound $ Text.unpack t)
annotator (Annotations l) = mapM_ annotateNode l
apply :: MonadError Error m => Config -> Annotation -> Text -> m Text
apply config annotation = fmap snd . evalRWST annotate config . start
where
start input = EditorState {input, tagStack = []}
annotate = annotator annotation *> gets input >>= tell