Skip to content
Snippets Groups Projects
Annotation.hs 2.46 KiB
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
module Text.TEIWA.Annotation (
      Annotation(..)
    , Attributes
    , Node(..)
    , Tag(..)
    , apply
  ) where

import Control.Monad.Except (MonadError(..))
import Control.Monad.RWS (RWST, evalRWST, gets, modify, tell)
import Data.Int (Int64)
import Data.Text.Lazy as Text (Text, breakOn, concat, drop, length, pack, unpack)
import Text.Printf (printf)
import Text.TEIWA.Config (Config(..))
import Text.TEIWA.Error (Error(..))

type Attributes = [(Text, Text)]

data Tag = Tag {
      name :: Text
    , annotated :: Attributes
  } deriving Show
data Node = Node {
      tag :: Tag
    , inside :: Annotation
  } deriving Show
data Annotation = Token Text | Annotations [Node] deriving Show

attribute :: (Text, Text) -> Text
attribute (k, v) = Text.concat [" ", k, "=\"", v, "\""]

openTag :: Tag -> String
openTag (Tag {name, annotated}) =
  printf "<%s%s>" name . Text.concat $ attribute <$> annotated

closeTag :: Tag -> String
closeTag = printf "</%s>" . name

data EditorState = EditorState {
      input :: Text
    , tagStack :: [Tag]
  }
type Editor m = RWST Config Text EditorState m ()

enter :: Monad m => Tag -> Editor m
enter t = modify $
  \editorState -> editorState {tagStack = t:tagStack editorState}

flushTags :: Monad m => Editor m
flushTags = do
  tags <- gets tagStack
  modify $ \editorState -> editorState {tagStack = []}
  mapM_ (tell . Text.pack . openTag) tags

forget :: Monad m => Int64 -> Editor m
forget count = modify $
  \editorState -> editorState {input = Text.drop count $ input editorState}

annotateNode :: MonadError Error m => Node -> Editor m
annotateNode (Node {tag, inside}) =
  enter tag *> annotator inside *> tell (Text.pack $ closeTag tag)

annotator :: MonadError Error m => Annotation -> Editor m
annotator (Token t) = gets (Text.breakOn t . input) >>= wrap
  where
    tokenLength = Text.length t
    wrap (before, after) =
      let totalLength = Text.length before + tokenLength in
      if Text.length after >= tokenLength
      then tell before *> flushTags *> tell t *> forget totalLength
      else throwError (TermNotFound $ Text.unpack t)
annotator (Annotations l) = mapM_ annotateNode l

apply :: MonadError Error m => Config -> Annotation -> Text -> m Text
apply config annotation = fmap snd . evalRWST annotate config . start
  where
    start input = EditorState {input, tagStack = []}
    annotate = annotator annotation *> gets input >>= tell