{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE FlexibleContexts #-} module Text.TEIWA.Annotation ( Annotation(..) , Attributes , Node(..) , Tag(..) , apply ) where import Control.Monad.Except (MonadError(..)) import Control.Monad.RWS (RWST, evalRWST, gets, modify, tell) import Data.Int (Int64) import Data.Text.Lazy as Text (Text, breakOn, concat, drop, length, pack, unpack) import Text.Printf (printf) import Text.TEIWA.Config (Config(..)) import Text.TEIWA.Error (Error(..)) type Attributes = [(Text, Text)] data Tag = Tag { name :: Text , annotated :: Attributes } deriving Show data Node = Node { tag :: Tag , inside :: Annotation } deriving Show data Annotation = Token Text | Annotations [Node] deriving Show attribute :: (Text, Text) -> Text attribute (k, v) = Text.concat [" ", k, "=\"", v, "\""] openTag :: Tag -> String openTag (Tag {name, annotated}) = printf "<%s%s>" name . Text.concat $ attribute <$> annotated closeTag :: Tag -> String closeTag = printf "</%s>" . name data EditorState = EditorState { input :: Text , tagStack :: [Tag] } type Editor m = RWST Config Text EditorState m () enter :: Monad m => Tag -> Editor m enter t = modify $ \editorState -> editorState {tagStack = t:tagStack editorState} flushTags :: Monad m => Editor m flushTags = do tags <- gets tagStack modify $ \editorState -> editorState {tagStack = []} mapM_ (tell . Text.pack . openTag) tags forget :: Monad m => Int64 -> Editor m forget count = modify $ \editorState -> editorState {input = Text.drop count $ input editorState} annotateNode :: MonadError Error m => Node -> Editor m annotateNode (Node {tag, inside}) = enter tag *> annotator inside *> tell (Text.pack $ closeTag tag) annotator :: MonadError Error m => Annotation -> Editor m annotator (Token t) = gets (Text.breakOn t . input) >>= wrap where tokenLength = Text.length t wrap (before, after) = let totalLength = Text.length before + tokenLength in if Text.length after >= tokenLength then tell before *> flushTags *> tell t *> forget totalLength else throwError (TermNotFound $ Text.unpack t) annotator (Annotations l) = mapM_ annotateNode l apply :: MonadError Error m => Config -> Annotation -> Text -> m Text apply config annotation = fmap snd . evalRWST annotate config . start where start input = EditorState {input, tagStack = []} annotate = annotator annotation *> gets input >>= tell