diff --git a/lib/Data/Map/Lazy/Extra.hs b/lib/Data/Map/Lazy/Extra.hs index 32fecae1f98bc373ffadb4d49cbe27d51c7920ee..b32e0647c74d275ad56e6e98b52c43f6a4443682 100644 --- a/lib/Data/Map/Lazy/Extra.hs +++ b/lib/Data/Map/Lazy/Extra.hs @@ -1,13 +1,30 @@ module Data.Map.Lazy.Extra ( - maxFor + foldMWithKey + , maxFor + , splitOn ) where -import Data.Map as Map (Map, foldrWithKey) +import Control.Monad.State (evalState, get, state) +import Data.Map as Map (Map, foldlWithKey', foldrWithKey, spanAntitone) -maxFor :: Ord b => (k -> a -> b) -> Map k a -> Maybe (k, a) -maxFor weight = fmap fst . foldrWithKey (\k a -> Just . keepMax k a) Nothing +-- | Evaluate a function on each entry contained in a map to find its maximum. +-- It returns not only the key and input value but also the maximum reached +maxFor :: Ord b => (k -> a -> b) -> Map k a -> Maybe (k, a, b) +maxFor weight = foldrWithKey (\k a -> Just . keepMax k a) Nothing where - keepMax k a Nothing = ((k, a), weight k a) - keepMax k a (Just current@(_, w)) = + keepMax k a Nothing = (k, a, weight k a) + keepMax k a (Just current@(_, _, w)) = let w' = weight k a in - if w' > w then ((k, a), w') else current + if w' > w then (k, a, w') else current + +foldMWithKey :: Monad m => (b -> k -> a -> m b) -> b -> Map k a -> m b +foldMWithKey f = foldlWithKey' (\m k v -> m >>= (\a -> f a k v)) . pure + +-- | Given two keys '(from, to)', cuts a map in three: +-- + the submap containing the keys lower than 'from' and their values +-- + the submap of keys between 'from' and 'to' (inclusive) +-- + the submap of keys strictly greater than 'to' +splitOn :: Ord k => (k, k) -> Map k a -> (Map k a, Map k a, Map k a) +splitOn (from, to) = evalState ((,,) <$> pop (< from) <*> pop (<= to) <*> get) + where + pop = state . spanAntitone diff --git a/lib/Data/Tuple/Extra.hs b/lib/Data/Tuple/Extra.hs new file mode 100644 index 0000000000000000000000000000000000000000..afd6740736caca2ffe1aab22dc0434d98586b3b3 --- /dev/null +++ b/lib/Data/Tuple/Extra.hs @@ -0,0 +1,10 @@ +module Data.Tuple.Extra ( + onEach + , zipTriple + ) where + +zipTriple :: (a -> b -> c) -> (a, a, a) -> (b, b, b) -> (c, c, c) +zipTriple f (a0, b0, c0) (a1, b1, c1) = (f a0 a1, f b0 b1, f c0 c1) + +onEach :: Applicative f => (a -> f b) -> (a, a, a) -> f (b, b, b) +onEach f (a, b, c) = (,,) <$> f a <*> f b <*> f c diff --git a/lib/Text/TEIWA/Annotation.hs b/lib/Text/TEIWA/Annotation.hs index ac76d8241bcfddbeb27f3eeb192be01498180054..7a928eef51ac8b1cca7113c8bfd26edeea58eb89 100644 --- a/lib/Text/TEIWA/Annotation.hs +++ b/lib/Text/TEIWA/Annotation.hs @@ -19,6 +19,8 @@ apply :: MonadError Error m => Config -> Annotation -> Text -> m Text apply config annotation = fmap snd . evalRWST (annotateWith annotation) config . start +-- | TEI-C defines a Lightweight Linguistic Annotation framework +-- https://tei-c.org/release/doc/tei-p5-doc/en/html/AI.html#AILALW isTEI :: Text -> Bool isTEI = (`elem` ["pos", "lemma", "msd"]) diff --git a/lib/Text/TEIWA/Annotation/Data.hs b/lib/Text/TEIWA/Annotation/Data.hs index 909350827a40acbc7167b3445b4eff4460c2cc96..eff8242fc76c8d49d923974aae0d5d139a446dd2 100644 --- a/lib/Text/TEIWA/Annotation/Data.hs +++ b/lib/Text/TEIWA/Annotation/Data.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE NamedFieldPuns, OverloadedStrings #-} module Text.TEIWA.Annotation.Data ( Annotation(..) , Format @@ -9,7 +9,7 @@ module Text.TEIWA.Annotation.Data ( ) where import Data.Text.Lazy as Text (Text, concat, pack) -import Text.TEIWA.Source.Common (Attributes, TEIWAParser, attribute) +import Text.TEIWA.Source.Common (Attributes, TEIWAParser) import Text.Printf (printf) data Tag = Tag { @@ -29,3 +29,6 @@ openTag (Tag {name, annotated}) = closeTag :: Tag -> Text closeTag = Text.pack . printf "</%s>" . name + +attribute :: (Text, Text) -> Text +attribute (k, v) = Text.concat [" ", k, "=\"", v, "\""] diff --git a/lib/Text/TEIWA/Error.hs b/lib/Text/TEIWA/Error.hs index 6fd86c38288ae06b9dd7c2c91d64898c542e3cc6..972f9b6e6e953c8677e634a0c0d076dc99ef3bb1 100644 --- a/lib/Text/TEIWA/Error.hs +++ b/lib/Text/TEIWA/Error.hs @@ -13,6 +13,8 @@ data Error = | ParsingError ParseError | TermNotFound String | OverlappingAnnotation (Line, String) (Line, String) + | Debug String + deriving (Eq) instance Show Error where show (NoSuchColumn s) = @@ -25,3 +27,5 @@ instance Show Error where show (TermNotFound t) = printf "Annotated term \"%s\" wasn't found in the input" t show (OverlappingAnnotation (l1, s1) (l2, s2)) = printf "Annotations \"%s\" (l.%d) and \"%s\" (l.%d) overlap" s1 l1 s2 l2 + show (Debug s) = + printf "DEBUG: %s" s diff --git a/lib/Text/TEIWA/Source.hs b/lib/Text/TEIWA/Source.hs index 7a807af082f209b2aee0302b1a22018930ec18f0..fa5a30f11a5fc49fe5309fa77e634dac0b3343b4 100644 --- a/lib/Text/TEIWA/Source.hs +++ b/lib/Text/TEIWA/Source.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE FlexibleContexts, NamedFieldPuns, OverloadedStrings #-} +{-# LANGUAGE FlexibleContexts, NamedFieldPuns, OverloadedStrings, ScopedTypeVariables #-} module Text.TEIWA.Source ( Format , Origin(..) diff --git a/lib/Text/TEIWA/Source/CoNLLX.hs b/lib/Text/TEIWA/Source/CoNLLX.hs index 38baa7d2e462ccf0bb3fc99bb7a5d9a662b3e556..6410154d848e729f0e929eaf021d0d9e76dff3c5 100644 --- a/lib/Text/TEIWA/Source/CoNLLX.hs +++ b/lib/Text/TEIWA/Source/CoNLLX.hs @@ -1,15 +1,17 @@ -{-# LANGUAGE FlexibleContexts, OverloadedStrings #-} +{-# LANGUAGE FlexibleContexts, NamedFieldPuns, OverloadedStrings #-} module Text.TEIWA.Source.CoNLLX ( coNLLX ) where import Control.Applicative ((<|>), many) -import Data.Text.Lazy as Text (concat, pack) +import Data.Text.Lazy as Text (Text, concat) import Text.Parsec (ParsecT, Stream, char, string) import Text.TEIWA.Annotation (tagToken, teiSentence) import Text.TEIWA.Annotation.Data (Annotation(..), Format) import Text.TEIWA.Annotation.Context as Context (ofHeader) -import Text.TEIWA.Source.Common (Field, Header, avoid, eol, int, sentences) +import Text.TEIWA.Source.Common ( + Field, Header, Range(..), avoid, eol, int, range, sentences, toField + ) header :: Header header = [ @@ -25,34 +27,29 @@ header = [ , "PDEPREL" ] -field :: Stream s m Char => ParsecT s u m Field -field = build <$> avoid "\t\n\r" +fields :: Stream s m Char => ParsecT s u m [Field] +fields = many (char '\t' *> (build <$> avoid "\t\n\r")) <* eol where build "_" = Nothing build other = Just other -type Range = (Int, Int) - -rowID :: Stream s m Char => ParsecT s u m (Either Range Int) -rowID = int >>= \n -> ((char '-' *> (Left . (,) n <$> int)) <|> pure (Right n)) - -row :: Stream s m Char => ParsecT s u m [Field] -row = rowID >>= either range singleLine +combineSubs :: [Maybe Text] -> [Maybe Text] -> [Maybe Text] +combineSubs = zipWith combineFields where - toText = Text.pack . show - rest = many (char '\t' *> field) <* eol - singleLine n = (Just (toText n):) <$> rest - range (from, to) = - let rangeID = Text.concat [toText from, "-", toText to] in do - main <- rest - subs <- mapM (\k -> string (show k) *> rest) [from .. to] - return $ (Just rangeID):(zipWith (<|>) main $ foldl1 combineSubs subs) - combineSubs = zipWith combineFields combineFields Nothing Nothing = Nothing combineFields Nothing f@(Just _) = f combineFields f@(Just _) Nothing = f combineFields (Just a) (Just b) = Just $ Text.concat [a, "+", b] +row :: Stream s m Char => ParsecT s u m [Field] +row = (range >>= rangeLines) <|> (int >>= singleLine) + where + singleLine n = (toField n:) <$> fields + rangeLines r@(Range {from, to}) = do + main <- fields + subs <- mapM (\k -> string (show k) *> fields) [from .. to] + pure $ (toField r):(zipWith (<|>) main $ foldl1 combineSubs subs) + coNLLX :: Format coNLLX = do context <- Context.ofHeader header diff --git a/lib/Text/TEIWA/Source/Common.hs b/lib/Text/TEIWA/Source/Common.hs index 3f4c0533253d721c36bf8af3c0dc71ede0353953..cf5c264911ba08d9aab18541c9bbaa8021e139f0 100644 --- a/lib/Text/TEIWA/Source/Common.hs +++ b/lib/Text/TEIWA/Source/Common.hs @@ -1,23 +1,28 @@ -{-# LANGUAGE FlexibleContexts, OverloadedStrings #-} +{-# LANGUAGE FlexibleContexts, NamedFieldPuns, OverloadedStrings #-} module Text.TEIWA.Source.Common ( Attributes , Field , Header , Numbered + , Range(..) , TEIWAParser - , attribute , avoid , eol + , hull , int + , range , recordLine , sentences + , size , teiTagger + , toField + , within ) where import Control.Applicative ((<|>), many, optional) import Control.Monad.Reader (ReaderT) import Data.Char (isPunctuation) -import Data.Text.Lazy as Text (Text, concat, head, length, pack) +import Data.Text.Lazy as Text (Text, head, length, pack) import Text.Parsec ( Line, ParsecT, Stream, char, digit, endOfLine, getParserState, many1 , noneOf, sepEndBy, sourceLine, statePos, try @@ -27,6 +32,24 @@ import Text.TEIWA.Error (Error(..)) type Field = Maybe Text type Header = [Text] +data Range = Range { + from :: Int + , to :: Int + } deriving (Eq) + +instance Show Range where + show (Range {from, to}) = show from ++ "-" ++ show to + +within :: Int -> Range -> Bool +within i (Range {from, to}) = from <= i && i <= to + +hull :: Range -> Range -> Range +hull (Range newMin newMax) (Range oldMin oldMax) = + Range (min newMin oldMin) (max newMax oldMax) + +size :: Range -> Int +size (Range {from, to}) = to - from + type Numbered a = (Line, a) type Attributes = [(Text, Text)] @@ -47,9 +70,6 @@ currentLine = sourceLine . statePos <$> getParserState recordLine :: Monad m => ParsecT s u m a -> ParsecT s u m (Numbered a) recordLine p = (,) <$> currentLine <*> p -attribute :: (Text, Text) -> Text -attribute (k, v) = Text.concat [" ", k, "=\"", v, "\""] - sentence :: Stream s m Char => ParsecT s u m p -> ParsecT s u m [Numbered p] sentence row = many comment *> many1 (recordLine row) where @@ -57,9 +77,15 @@ sentence row = many comment *> many1 (recordLine row) sentences :: Stream s m Char => ParsecT s u m p -> ParsecT s u m [[Numbered p]] sentences row = sentence row `sepEndBy` many1 eol ---sentences row = many (many1 eol *> sentence row) + +range :: Stream s m Char => ParsecT s u m Range +range = try (Range <$> int <* char '-' <*> int) + +toField :: Show a => a -> Field +toField = Just . Text.pack . show teiTagger :: Text -> Attributes -> Text teiTagger t _ | Text.length t == 1 && isPunctuation (Text.head t) = "pc" + | Text.length t == 0 = "rs" | otherwise = "w" diff --git a/lib/Text/TEIWA/Source/WebAnno.hs b/lib/Text/TEIWA/Source/WebAnno.hs index df8447e412200fbe3e0a15295cb284e031a768c2..760c45d45dfb82bfeef8a46a4a81e46a20844a42 100644 --- a/lib/Text/TEIWA/Source/WebAnno.hs +++ b/lib/Text/TEIWA/Source/WebAnno.hs @@ -1,27 +1,18 @@ {-# LANGUAGE FlexibleContexts, NamedFieldPuns, OverloadedStrings #-} -module Text.TEIWA.Source.WebAnno {-( +module Text.TEIWA.Source.WebAnno ( webAnno - )-} where + ) where import Control.Applicative ((<|>), many, optional) -import Control.Monad.Except (MonadError) -import Data.Either (partitionEithers) -import Data.Map as Map (Map, empty, insertWith) -import Data.Map.Lazy.Extra as Map (maxFor) -import Data.List (foldl') import Data.Text.Lazy as Text (Text, pack) import Text.Parsec ( - Line, ParsecT, Stream, anyChar, char, count, many1, noneOf, sepBy1, skipMany1 - , string - ) -import Text.TEIWA.Annotation (tagToken, teiSentence) -import Text.TEIWA.Annotation.Context (Context) -import qualified Text.TEIWA.Annotation.Context as Context (Context(..), ofHeader) -import Text.TEIWA.Annotation.Data (Annotation(..), Format, Node(..)) -import Text.TEIWA.Error (Error) -import Text.TEIWA.Source.Common ( - Field, Header, Numbered, TEIWAParser, avoid, eol, int, recordLine, sentences + ParsecT, Stream, anyChar, char, many1, noneOf, sepBy1, string ) +import Text.TEIWA.Annotation.Context as Context (ofHeader) +import Text.TEIWA.Annotation.Data (Format) +import Text.TEIWA.Source.Common (Header, avoid, eol, int, range, sentences) +import Text.TEIWA.Source.WebAnno.Annotator (annotate) +import Text.TEIWA.Source.WebAnno.Data (AnnotationValue(..), Row(..)) pragma :: Stream s m Char => ParsecT s u m Text pragma = char '#' *> avoid "=" <* char '=' @@ -29,8 +20,7 @@ pragma = char '#' *> avoid "=" <* char '=' header :: Stream s m Char => ParsecT s u m Header header = do pragma *> string "WebAnno TSV " *> int `sepBy1` char '.' *> eol - --concat <$> (pragma >>= columns) `sepBy1` eol - ("FORM":) . concat <$> many ((pragma >>= columns) <* eol) + concat . (["ID", "SPAN", "FORM"]:) <$> many ((pragma >>= columns) <* eol) where columns "T_SP" = drop 1 <$> avoid "|\r\n" `sepBy1` char '|' columns _ = avoid "\r\n" *> pure [] @@ -38,57 +28,23 @@ header = do reserved :: [Char] reserved = "\\[]|_-;\t\n*" -type TokenAnnotation = (Text, Int) - -field :: Stream s m Char => ParsecT s u m [TokenAnnotation] +field :: Stream s m Char => ParsecT s u m [AnnotationValue] field = noValue <|> component `sepBy1` (char '|') where noValue = char '_' *> pure [] - uniqueID = char '[' *> int <* char ']' - component = (,) + bracketedID = char '[' *> int <* char ']' + component = AnnotationValue <$> (Text.pack <$> many1 (noneOf reserved <|> (char '\\' *> anyChar))) - <*> (maybe 0 id <$> optional uniqueID) - -data Row = Row { - form :: Text - , fields :: [[TokenAnnotation]] - } + <*> (optional bracketedID) row :: Stream s m Char => ParsecT s u m Row -row = makeRow <$> count 3 column <*> many1 (field <* char '\t') <* eol - where - column = avoid "\t\n" <* char '\t' - makeRow columns fields = Row {form = columns !! 2, fields} - -getSpans :: Context -> [Numbered Row] -> Map (Text, TokenAnnotation) (Line, Line) -getSpans context numbered = - foldl' (\tmpMap (lineNumber, row) -> - foldl' (\tmpMap' (hK, labels) -> - foldr (\label -> - Map.insertWith extendSpan (hK, label) (lineNumber, lineNumber) - ) tmpMap' labels - ) tmpMap row - ) Map.empty byHeader - where - byHeader = fmap (zip (Context.header context) . fields) <$> numbered - extendSpan (_, newMax) (oldMin, _) = (oldMin, newMax) - -extractNodes :: MonadError Error m => Context -> [Numbered Row] -> m [Node] -extractNodes _ [] = pure [] -extractNodes context numbered = undefined - where - spans = getSpans context numbered - maximalSpan = maxFor (\_ (minL, maxL) -> maxL - minL) - -{- -extractNodes context ((lineNumber, Row {fields, spans = []}):others) = - tagToken (lineNumber, fields) : extractNodes context others -extractNode context ((lineNumber, Row {fields, spans}):others) = undefined --} +row = Row + <$> (range <* char '\t') + <*> (range <* char '\t') + <*> (avoid "\t\n" <* char '\t') + <*> many1 (field <* char '\t') <* eol webAnno :: Format webAnno = do context <- Context.ofHeader =<< header - Annotations . fmap teiSentence <$> ( - (many eol *> sentences row) >>= mapM (extractNodes context) - ) + (many eol *> sentences row) >>= annotate context diff --git a/lib/Text/TEIWA/Source/WebAnno/Annotator.hs b/lib/Text/TEIWA/Source/WebAnno/Annotator.hs new file mode 100644 index 0000000000000000000000000000000000000000..5d3c0699629ac547c357fcace8eba467e38c5c76 --- /dev/null +++ b/lib/Text/TEIWA/Source/WebAnno/Annotator.hs @@ -0,0 +1,105 @@ +{-# LANGUAGE FlexibleContexts, NamedFieldPuns, OverloadedStrings #-} +module Text.TEIWA.Source.WebAnno.Annotator ( + AnnotationDB(..) + , UniqueCoordinate(..) + , annotate + , buildTree + , index + , indexCell + , splitDB + ) where + +import Control.Monad.Except (MonadError(..)) +import Control.Monad.RWS (MonadReader) +import Data.Map as Map (Map, delete, empty, insert, insertWith, toList) +import Data.Map.Lazy.Extra as Map (foldMWithKey, maxFor, splitOn) +import Data.List as List (foldl') +import Data.Text.Lazy as Text (Text, intercalate) +import Data.Tuple.Extra (onEach, zipTriple) +import Text.Parsec (Line) +import Text.TEIWA.Annotation (tagToken, teiSentence) +import Text.TEIWA.Annotation.Context (Context) +import qualified Text.TEIWA.Annotation.Context as Context (Context(..)) +import Text.TEIWA.Annotation.Data (Annotation(..), Node(..), Tag(..)) +import Text.TEIWA.Config (Config) +import Text.TEIWA.Error (Error(..)) +import Text.TEIWA.Source.Common ( + Field, Numbered, Range(..), hull , size, toField + ) +import Text.TEIWA.Source.WebAnno.Data ( + AnnotationDB(..), AnnotationValue(..), Row(..), UniqueCoordinate(..) + ) + +-- | Each cell in a WebAnno file may contain several 'AnnotationValue's which +-- are indexed separately: +-- + the multiline ones, with an ID, get pushed to the 'Range' map +-- + the others are simply joined by a "+" and become a 'Field' in the row +indexCell :: Line -> (Map UniqueCoordinate Range, [Field]) -> (Text, [AnnotationValue]) + -> (Map UniqueCoordinate Range, [Field]) +indexCell line (db, lineFields) (tagName, annotationValues) = + let (newDB, tags) = foldr switch (db, []) annotationValues in + (newDB, (fieldOfTags tags):lineFields) + where + switch (AnnotationValue newTag Nothing) (tmpDB, tmpTags) = + (tmpDB, newTag:tmpTags) + switch (AnnotationValue value (Just uniqueID)) (tmpDB, tmpTags) = + let uniqueCoords = UniqueCoordinate {tagName, value, uniqueID} in + (Map.insertWith hull uniqueCoords (Range line line) tmpDB, tmpTags) + fieldOfTags [] = Nothing + fieldOfTags tags = Just $ Text.intercalate "+" tags + +-- | Given a 'Context' c to provide the header fields, we can now index a +-- '[[Numbered Rows]]' into an 'AnnotationDB' by folding twice on the rows, and +-- for each one build the corresponding line by folding the previous function on +-- each cell +index :: Context -> [[Numbered Row]] -> AnnotationDB +index context = foldl' (foldl' indexLine) (AnnotationDB Map.empty Map.empty) + where + indexLine db (line, Row {rowId, charSpan, form, fields}) = + let cells = zip (drop 2 $ Context.header context) fields + -- FORM column will already have been removed by Context.ofHeader so we + -- need to drop only 2 fields (ID and SPAN, which are built-in for this + -- format) + tmpPair = (multiRows db, [Just form, toField charSpan, toField rowId]) + (newMRs, newLine) = foldl' (indexCell line) tmpPair cells in + AnnotationDB newMRs (Map.insert line (reverse newLine) $ lineFields db) + +-- | As the annotation tree will be built recursively, we will need to split the +-- DB at each step into the DBs of lines strictly before the maximal multiline +-- annotation found, the lines contained within it and the lines strictly after +-- it +splitDB :: MonadError Error m => (UniqueCoordinate, Range) -> AnnotationDB -> + m (AnnotationDB, AnnotationDB, AnnotationDB) +splitDB (maxCoords, Range {from, to}) (AnnotationDB {multiRows, lineFields}) = + zipTriple (flip AnnotationDB) + (splitOn (from, to) lineFields) + <$> (foldMWithKey classify emptyTriple $ Map.delete maxCoords multiRows) + where + classify (lower, within, greater) k r@(Range otherFrom otherTo) + | otherTo < from = pure (Map.insert k r lower, within, greater) + | to < otherFrom = pure (lower, within, Map.insert k r greater) + | from <= otherFrom && otherTo <= to = + pure (lower, Map.insert k r within, greater) + | otherwise = throwError $ + OverlappingAnnotation (from, show maxCoords) (otherFrom, show k) + emptyTriple = (Map.empty, Map.empty, Map.empty) + +-- | Once the annotations database is built, an 'Annotation' tree may be +-- retrieved by taking the multiline annotations by decreasing inclusion order, +-- and outputting the simple rows for each interval the delimitate +buildTree :: (MonadError Error m, MonadReader Config m) => + Context -> AnnotationDB -> m [Node] +buildTree context db@(AnnotationDB {multiRows, lineFields}) = + case maxFor (\_ -> size) multiRows of + Just (maxCoords@(UniqueCoordinate {tagName, value}), bounds, _) -> do + shards <- splitDB (maxCoords, bounds) db + (lower, within, greater) <- onEach (buildTree context) shards + let annotated = [(tagName, value)] + let name = (Context.tagger context) "" annotated + let n = Node (Tag {name, annotated}) (Annotations within) + pure (lower ++ (n:greater)) + _ -> mapM (tagToken context) $ Map.toList lineFields + +annotate :: (MonadError Error m, MonadReader Config m) => + Context -> [[Numbered Row]] -> m Annotation +annotate context = fmap Annotations . buildTree context . index context diff --git a/lib/Text/TEIWA/Source/WebAnno/Data.hs b/lib/Text/TEIWA/Source/WebAnno/Data.hs new file mode 100644 index 0000000000000000000000000000000000000000..8cef1a1eb24a8e921a94ab30746a9db858de2eb0 --- /dev/null +++ b/lib/Text/TEIWA/Source/WebAnno/Data.hs @@ -0,0 +1,47 @@ +{-# LANGUAGE NamedFieldPuns #-} +module Text.TEIWA.Source.WebAnno.Data ( + AnnotationDB(..) + , AnnotationValue(..) + , Row(..) + , UniqueCoordinate(..) + ) where + +import Data.Map as Map (Map) +import Data.Text.Lazy (Text, unpack) +import Text.Parsec (Line) +import Text.TEIWA.Source.Common (Field, Range) + +data AnnotationValue = AnnotationValue { + labelTag :: Text + , labelId :: Maybe Int + } deriving (Eq, Ord) + +instance Show AnnotationValue where + show (AnnotationValue {labelTag, labelId}) = + unpack labelTag ++ maybe "" show labelId + +data Row = Row { + rowId :: Range + , charSpan :: Range + , form :: Text + , fields :: [[AnnotationValue]] + } deriving (Eq, Show) + +-- | Since annotation tags can be found on several lines, they are distinguished +-- | with their numeric ID written within brackets. +-- | It entails that a pair containing a tag name and a value isn't unique, so +-- | we need to define a type for unique coordinates: +data UniqueCoordinate = UniqueCoordinate { + tagName :: Text + , value :: Text + , uniqueID :: Int + } deriving (Eq, Ord, Show) + +-- | We can now define a type representing the indexed data for an annotation, +-- separating the annotations spanning on several rows from the single-row +-- ones which are much similar to the types found in the other supported +-- formats. +data AnnotationDB = AnnotationDB { + multiRows :: Map UniqueCoordinate Range + , lineFields :: Map Line [Field] + } deriving (Eq, Show) diff --git a/teiwa.cabal b/teiwa.cabal index 279b75cc92a623cd91557920dc73c8711e739e32..2347d84f4d7921aebc6b2411af28429db48308e9 100644 --- a/teiwa.cabal +++ b/teiwa.cabal @@ -23,19 +23,22 @@ build-type: Simple extra-source-files: CHANGELOG.md library - exposed-modules: Text.TEIWA - , Text.TEIWA.Annotation.Data - other-modules: Data.Map.Lazy.Extra - , Text.TEIWA.Annotation + exposed-modules: Data.Map.Lazy.Extra + , Data.Tuple.Extra + , Text.TEIWA , Text.TEIWA.Annotation.Context + , Text.TEIWA.Annotation.Data + , Text.TEIWA.Source.Common + , Text.TEIWA.Source.WebAnno + , Text.TEIWA.Source.WebAnno.Annotator + , Text.TEIWA.Source.WebAnno.Data + other-modules: Text.TEIWA.Annotation , Text.TEIWA.Annotation.Editor , Text.TEIWA.Config , Text.TEIWA.Error , Text.TEIWA.Source - , Text.TEIWA.Source.Common , Text.TEIWA.Source.CoNLLX , Text.TEIWA.Source.SSV - , Text.TEIWA.Source.WebAnno build-depends: base >=4.12 && <4.15 , bytestring , containers @@ -66,10 +69,30 @@ test-suite regression type: detailed-0.9 test-module: Regression other-modules: Mock.Annotation - , Sources + , Regression.Text.TEIWA.Source + , Utils + build-depends: base + , Cabal + , mtl + , teiwa + , text + hs-source-dirs: test + ghc-options: -Wall + default-language: Haskell2010 + +test-suite unit + type: detailed-0.9 + test-module: Unit + other-modules: Mock.Map + , Mock.Text.TEIWA + , Mock.WebAnno + , Unit.Data.Map.Lazy.Extra + , Unit.Text.TEIWA.Source.WebAnno.Annotator + , Unit.Text.TEIWA.Annotation.Context , Utils build-depends: base , Cabal + , containers , mtl , teiwa , text diff --git a/test/Mock/Annotation.hs b/test/Mock/Annotation.hs index b327d60e23a85cd2cd10793525afb8e1ea5ebc4d..6affa096dec00d416b8c5a98e206744da0a6f918 100644 --- a/test/Mock/Annotation.hs +++ b/test/Mock/Annotation.hs @@ -1,6 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} module Mock.Annotation ( coNLLX + , node , ssv , webAnno ) where @@ -95,6 +96,41 @@ ssv = Annotations $ zipWith (node ["LEMMA", "POS"]) (concat sentences) [ , [".", "PUNCT"] ] +webAnno :: Annotation +webAnno = Annotations $ + (zipWith mkNode (sentences !! 0) [ + ["1-1", "0-3", "you", "PRON"] + , ["1-2", "4-7", "can", "AUX"] + , ["1-3", "8-12", "talk", "VERB"] + , ["1-4", "13-15", "to", "ADP"] + , ["1-5", "16-18", "I", "PRON"] + , ["1-6", "18-19", ".", "PUNCT"] + ]) + ++ [ + syntax "CONDITION" [ + mkNode (sentences !! 1 !! 0) ["2-1", "0-2", "if", "SCONJ"] + , syntax "HYPOTHESIS" $ + zipWith mkNode (take 3 $ drop 1 (sentences !! 1)) [ + ["2-2", "3-6", "you", "PRON"] + , ["2-3", "6-9", "be", "AUX"] + , ["2-4", "10-16", "lonely", "ADJ"] + ] + , syntax "CONCLUSION" $ + zipWith mkNode (take 5 $ drop 4 (sentences !! 1)) [ + ["2-5", "17-20", "you", "PRON"] + , ["2-6", "21-24", "can", "AUX"] + , ["2-7", "25-29", "talk", "VERB"] + , ["2-8", "30-32", "to", "ADP"] + , ["2-9", "32-34", "I", "PRON"] + ] + ] + , mkNode (sentences !! 1 !! 9) ["2-10", "34-35", ".", "PUNCT"] + ] + where + mkNode = node ["ID", "SPAN", "LEMMA", "POS"] + syntax s = Node (Tag "rs" [("SYNTAX", s)]) . Annotations + +{- webAnno :: Annotation webAnno = Annotations $ sentence <$> @@ -121,3 +157,4 @@ webAnno = Annotations $ ) where mkNode = node ["SPAN", "LEMMA", "POS"] +-} diff --git a/test/Mock/Map.hs b/test/Mock/Map.hs new file mode 100644 index 0000000000000000000000000000000000000000..05700c9156dd707119b9d11c1c07638910528b58 --- /dev/null +++ b/test/Mock/Map.hs @@ -0,0 +1,23 @@ +module Mock.Map ( + frenchDigits + , h2G2 + ) where + +import Data.Map.Lazy as Map (Map, fromList) + +h2G2 :: Map Int String +h2G2 = Map.fromList [(42, "answer")] + +frenchDigits :: Map Int String +frenchDigits = Map.fromList [ + (0, "zero") + , (1, "un") + , (2, "deux") + , (3, "trois") + , (4, "quatre") + , (5, "cinq") + , (6, "six") + , (7, "sept") + , (8, "huit") + , (9, "neuf") + ] diff --git a/test/Mock/Text/TEIWA.hs b/test/Mock/Text/TEIWA.hs new file mode 100644 index 0000000000000000000000000000000000000000..f78219cb543babeb3e2fbc35451c9cab59143472 --- /dev/null +++ b/test/Mock/Text/TEIWA.hs @@ -0,0 +1,33 @@ +{-# LANGUAGE OverloadedStrings #-} +module Mock.Text.TEIWA ( + customHeaderConfig + , defaultHeader + , newFormConfig + , otherFormConfig + , otherHeader + ) where + +import Text.TEIWA (Config(..), defaultConfig) +import Text.TEIWA.Source.Common (Header) + +defaultHeader :: Header +defaultHeader = ["form", "lemma", "pos"] + +otherHeader :: Header +otherHeader = ["Test", "lemma", "pos"] + +newFormConfig :: Config +newFormConfig = defaultConfig { + formColumn = Just "Test" + } + +otherFormConfig :: Config +otherFormConfig = defaultConfig { + formColumn = Just "lemma" + } + +customHeaderConfig :: Config +customHeaderConfig = defaultConfig { + formColumn = Just "Test" + , headerOverride = Just otherHeader + } diff --git a/test/Mock/WebAnno.hs b/test/Mock/WebAnno.hs new file mode 100644 index 0000000000000000000000000000000000000000..d9f86c41794bac224a116ea5cfe379251fc22289 --- /dev/null +++ b/test/Mock/WebAnno.hs @@ -0,0 +1,120 @@ +{-# LANGUAGE NamedFieldPuns, OverloadedStrings #-} +module Mock.WebAnno ( + anotherGreen + , blue + , context + , dbStep0 + , dbStep1 + , dbStep2 + , dbStep3 + , emptyDB + , green + , greenUC + , linesDB + , multiRowStep0 + , multiRowStep1 + , multiRowStep2 + , multiRowStep3 + , numberedRows + , red + ) where + +import Data.Map as Map (Map, empty, fromList, restrictKeys) +import Data.Set as Set (fromList) +import Data.Text.Lazy (Text) +import Text.TEIWA.Annotation.Context (Context(..)) +import Text.TEIWA.Source.Common (Field, Numbered, Range(..)) +import Text.TEIWA.Source.WebAnno.Data ( + AnnotationDB(..), AnnotationValue(..), Row(..), UniqueCoordinate(..) + ) + +context :: Context +context = Context { + columnIndex = 2 + , columnName = "FORM" + , header = ["ID", "SPAN", "LEMMA", "enc_tags"] + , tagger = \_ _ -> "w" + } + +green :: AnnotationValue +green = AnnotationValue {labelTag = "GREEN", labelId = Just 0} + +anotherGreen :: AnnotationValue +anotherGreen = AnnotationValue {labelTag = "GREEN", labelId = Just 1} + +blue :: AnnotationValue +blue = AnnotationValue {labelTag = "BLUE", labelId = Nothing} + +red :: AnnotationValue +red = AnnotationValue {labelTag = "RED", labelId = Nothing} + +lemma :: Text -> AnnotationValue +lemma labelTag = AnnotationValue {labelTag, labelId = Nothing} + +greenUC :: Int -> UniqueCoordinate +greenUC = UniqueCoordinate "enc_tags" "GREEN" + +multiRowStep0 :: Map UniqueCoordinate Range +multiRowStep0 = Map.empty + +multiRowStep1 :: Map UniqueCoordinate Range +multiRowStep1 = Map.fromList [(greenUC 0, Range 7 7)] + +multiRowStep2 :: Map UniqueCoordinate Range +multiRowStep2 = Map.fromList [(greenUC 0, Range 7 8)] + +multiRowStep3 :: Map UniqueCoordinate Range +multiRowStep3 = Map.fromList [(greenUC 0, Range 7 8), (greenUC 1, Range 9 9)] + +numberedRows :: [Numbered Row] +numberedRows = [ + (6, Row { + rowId = Range 1 1 + , charSpan = Range 0 4 + , form = "This" + , fields = [[lemma "this"], [blue]] + }) + , (7, Row { + rowId = Range 1 2 + , charSpan = Range 5 7 + , form = "is" + , fields = [[lemma "be"], [green]] + }) + , (8, Row { + rowId = Range 1 3 + , charSpan = Range 10 11 + , form = "a" + , fields = [[lemma "a"], [green, blue]] + }) + , (9, Row { + rowId = Range 1 4 + , charSpan = Range 16 20 + , form = "test" + , fields = [[lemma "test"], []] + }) + ] + +linesDB :: Int -> Int -> Map Int [Field] +linesDB a b = Map.restrictKeys numberedFields $ Set.fromList [a .. b] + where + numberedFields = Map.fromList [ + (6, [Just "1-1", Just "0-4", Just "This", Just "this", Just "BLUE"]) + , (7, [Just "1-2", Just "5-7", Just "is", Just "be", Nothing]) + , (8, [Just "1-3", Just "10-11", Just "a", Just "a", Just "BLUE"]) + , (9, [Just "1-4", Just "16-20", Just "test", Just "test", Nothing]) + ] + +emptyDB :: AnnotationDB +emptyDB = AnnotationDB Map.empty Map.empty + +dbStep0 :: AnnotationDB +dbStep0 = emptyDB {lineFields = linesDB 6 6} + +dbStep1 :: AnnotationDB +dbStep1 = AnnotationDB {multiRows = multiRowStep1, lineFields = linesDB 6 7} + +dbStep2 :: AnnotationDB +dbStep2 = AnnotationDB {multiRows = multiRowStep2, lineFields = linesDB 6 8} + +dbStep3 :: AnnotationDB +dbStep3 = dbStep2 {lineFields = linesDB 6 9} diff --git a/test/Regression.hs b/test/Regression.hs index def8e067611a81351e66aa4be262847765ab4efa..fead11a4e48ca41264eaf8980ec173fcc5f036ad 100644 --- a/test/Regression.hs +++ b/test/Regression.hs @@ -3,7 +3,7 @@ module Regression ( ) where import Distribution.TestSuite (Test) -import Sources (parsing) +import Regression.Text.TEIWA.Source as Sources (parsing) tests :: IO [Test] diff --git a/test/Sources.hs b/test/Regression/Text/TEIWA/Source.hs similarity index 83% rename from test/Sources.hs rename to test/Regression/Text/TEIWA/Source.hs index 9866537cd61cac10a81b9a2bd05ff47bfc71fd46..d93f6f1fafde14c6cbe97c26c6cbe93745dc6b75 100644 --- a/test/Sources.hs +++ b/test/Regression/Text/TEIWA/Source.hs @@ -1,18 +1,17 @@ {-# LANGUAGE NamedFieldPuns #-} -module Sources ( +module Regression.Text.TEIWA.Source ( parsing ) where import Control.Monad.Except (runExceptT) import Distribution.TestSuite (Progress(..), Test(..), Result(..)) import qualified Mock.Annotation as Annotation (coNLLX, ssv, webAnno) -import Text.Printf (printf) import Text.TEIWA ( Format, Origin(..), Source(..), coNLLX, csv, defaultConfig, parse, tsv , webAnno ) import Text.TEIWA.Annotation.Data (Annotation) -import Utils (simpleTest) +import Utils (diff, simpleTest) data TestCase = TestCase { label :: String @@ -37,11 +36,9 @@ testSource (TestCase {label, caseFormat, file, expected}) = simpleTest label $ d case actual of Left reason -> Fail $ show reason Right annotation -> - if annotation == expected then Pass else Fail $ diff annotation - where - expectedS = show expected - diff a = - printf "Result differs from expectations: %s vs %s" (show a) expectedS + if annotation == expected + then Pass + else Fail $ diff annotation expected parsing :: Test parsing = Group { diff --git a/test/Unit.hs b/test/Unit.hs new file mode 100644 index 0000000000000000000000000000000000000000..745afe1717e25e2fbc95f67da40104fb2af88c88 --- /dev/null +++ b/test/Unit.hs @@ -0,0 +1,11 @@ +module Unit ( + tests + ) where + +import Distribution.TestSuite (Test) +import Unit.Data.Map.Lazy.Extra (mapTests) +import Unit.Text.TEIWA.Annotation.Context (ofHeaderTest) +import Unit.Text.TEIWA.Source.WebAnno.Annotator (annotationsTree) + +tests :: IO [Test] +tests = pure [annotationsTree, mapTests, ofHeaderTest] diff --git a/test/Unit/Data/Map/Lazy/Extra.hs b/test/Unit/Data/Map/Lazy/Extra.hs new file mode 100644 index 0000000000000000000000000000000000000000..3d740eb42de1bf5748e80708c715583ccf69009e --- /dev/null +++ b/test/Unit/Data/Map/Lazy/Extra.hs @@ -0,0 +1,56 @@ +module Unit.Data.Map.Lazy.Extra ( + mapTests + ) where + +import Data.Map.Lazy as Map (empty, filterWithKey, restrictKeys) +import Data.Map.Lazy.Extra (maxFor, splitOn) +import Data.Set as Set (fromList) +import Distribution.TestSuite (Test(..)) +import Mock.Map (frenchDigits, h2G2) +import Utils (pureTest) + + +maxForTest :: Test +maxForTest = Group { + groupName = "maxSpan" + , concurrently = True + , groupTests = pureTest (uncurry maxFor) <$> [ + ("empty map", (byKey, Map.empty), Nothing) + , ("singleton", (byKey, h2G2), Just (42, "answer", 42)) + , ("byKey", (byKey, frenchDigits), Just (9, "neuf", 9)) + , ("byLength", (byLength, frenchDigits), Just (4, "quatre", 6)) + , ("odd byLength", + (byLength, Map.filterWithKey (\k _ -> odd k) frenchDigits), + Just (3, "trois", 5)) + ] + } + where + byKey k _ = k + byLength _ = length + +splitOnTest :: Test +splitOnTest = Group { + groupName = "splitOn" + , concurrently = True + , groupTests = pureTest (uncurry splitOn) <$> [ + ("empty map", ((0, 0), Map.empty), (Map.empty, Map.empty, Map.empty)) + , ("singleton (lower)", ((7, 10), h2G2), (Map.empty, Map.empty, h2G2)) + , ("singleton (within)", ((40, 50), h2G2), (Map.empty, h2G2, Map.empty)) + , ("singleton (just on it)", ((42, 42), h2G2), (Map.empty, h2G2, Map.empty)) + , ("singleton (higher)", ((50, 50), h2G2), (h2G2, Map.empty, Map.empty)) + , ("some range", ((3, 5), frenchDigits), (sub 0 2, sub 3 5, sub 6 9)) + , ("single point", ((4, 4), frenchDigits), (sub 0 3, sub 4 4, sub 5 9)) + , ("outside before", ((-3, 6), frenchDigits), (Map.empty, sub 0 6, sub 7 9)) + , ("outside after", ((8, 14), frenchDigits), (sub 0 7, sub 8 9, Map.empty)) + , ("reverse bounds", ((8, 5), frenchDigits), (sub 0 7, Map.empty, sub 8 9)) + ] + } + where + sub a b = Map.restrictKeys frenchDigits $ Set.fromList [a .. b] + +mapTests :: Test +mapTests = Group { + groupName = "mapTests" + , concurrently = True + , groupTests = [maxForTest, splitOnTest] + } diff --git a/test/Unit/Text/TEIWA/Annotation/Context.hs b/test/Unit/Text/TEIWA/Annotation/Context.hs new file mode 100644 index 0000000000000000000000000000000000000000..af3c075173f5c1d7781d34de05ad0343fa0b87b2 --- /dev/null +++ b/test/Unit/Text/TEIWA/Annotation/Context.hs @@ -0,0 +1,91 @@ +{-# LANGUAGE NamedFieldPuns, OverloadedStrings #-} +module Unit.Text.TEIWA.Annotation.Context ( + ofHeaderTest + ) where + +import Control.Monad.Except (runExceptT) +import Control.Monad.Reader (runReaderT) +import Data.Text.Lazy (Text) +import Distribution.TestSuite (Test(..)) +import Mock.Text.TEIWA ( + customHeaderConfig, defaultHeader, newFormConfig, otherFormConfig, otherHeader + ) +import Mock.WebAnno as WebAnno (context) +import Text.TEIWA (Config, Error(..), defaultConfig) +import Text.TEIWA.Annotation.Context (Context(..), ofHeader) +import Text.TEIWA.Source.Common (Header) +import Utils (monadicTest) + +type TestableContext = (Int, Text, Header) +type TestCase = (String, (Header, Config), Either Error TestableContext) + +simpleHeaderDefaultConfig :: TestCase +simpleHeaderDefaultConfig = ( + "simple header with default config" + , (defaultHeader, defaultConfig) + , Right (0, "form", drop 1 defaultHeader) + ) + +replacingForm :: TestCase +replacingForm = ( + "replacing form" + , (otherHeader, newFormConfig) + , Right (0, "Test", drop 1 otherHeader) + ) + +anotherColumn :: TestCase +anotherColumn = ( + "using another column" + , (defaultHeader, otherFormConfig) + , Right (1, "lemma", ["form", "pos"]) + ) + +customHeaderCustomColumn :: TestCase +customHeaderCustomColumn = ( + "custom header with custom column" + , (defaultHeader, customHeaderConfig) + , Right (0, "Test", drop 1 otherHeader) + ) + +noForm :: TestCase +noForm = ( + "no form" + , (otherHeader, defaultConfig) + , Left NoFormColumn + ) + +noSuchColumn :: TestCase +noSuchColumn = ( + "no such column" + , (defaultHeader, newFormConfig) + , Left $ NoSuchColumn "Test" + ) + +webAnno :: TestCase +webAnno = ( + "web anno" + , (["ID", "SPAN", "FORM", "LEMMA", "enc_tags"], defaultConfig) + , Right (columnIndex, columnName, header) + ) + where + Context {columnIndex, columnName, header} = WebAnno.context + +ofHeaderTest :: Test +ofHeaderTest = Group { + groupName = "ofHeader" + , concurrently = True + , groupTests = monadicTest evaluator <$> [ + simpleHeaderDefaultConfig + , replacingForm + , anotherColumn + , customHeaderCustomColumn + , noForm + , noSuchColumn + , webAnno + ] + } + where + evaluator (h, config) = + fmap dropTagger <$> runExceptT (runReaderT (ofHeader h) config) + dropTagger (Context {columnIndex, columnName, header}) = + (columnIndex, columnName, header) diff --git a/test/Unit/Text/TEIWA/Source/WebAnno/Annotator.hs b/test/Unit/Text/TEIWA/Source/WebAnno/Annotator.hs new file mode 100644 index 0000000000000000000000000000000000000000..4e7ce79b5c9b76d5f7001ec56aa734e8252acd7b --- /dev/null +++ b/test/Unit/Text/TEIWA/Source/WebAnno/Annotator.hs @@ -0,0 +1,105 @@ +{-# LANGUAGE OverloadedStrings #-} +module Unit.Text.TEIWA.Source.WebAnno.Annotator ( + annotationsTree + ) where + +import Control.Monad.Except (runExceptT) +import Control.Monad.Reader (runReaderT) +import Distribution.TestSuite (Test(..)) +import Mock.Annotation (node) +import Mock.WebAnno ( + anotherGreen, blue, context, dbStep0, dbStep1, dbStep2, dbStep3, emptyDB + , green, greenUC, linesDB, multiRowStep0, multiRowStep1, multiRowStep2 + , multiRowStep3, numberedRows, red + ) +import Text.TEIWA (Error(..), defaultConfig) +import Text.TEIWA.Annotation.Context (Context(..)) +import Text.TEIWA.Source.Common (Range(..)) +import Text.TEIWA.Source.WebAnno.Annotator (buildTree, index, indexCell, splitDB) +import Text.TEIWA.Source.WebAnno.Data (AnnotationDB(..)) +import Utils (monadicTest, pureTest) + +indexCellTest :: Test +indexCellTest = Group { + groupName = "indexCell" + , concurrently = True + , groupTests = pureTest (uncurry3 indexCell) <$> [ + ("empty cell", + (undefined, (multiRowStep0, []), ("enc_tags", [])), + (multiRowStep0, [Nothing])) + , ("empty cell on non-empty fields", + (undefined, (multiRowStep0, [Just "test"]), ("enc_tags", [])), + (multiRowStep0, [Nothing, Just "test"])) + , ("one single-line annotation", + (undefined, (multiRowStep0, []), (undefined, [blue])), + (multiRowStep0, [Just "BLUE"])) + , ("two single-line annotations", + (undefined, (multiRowStep0, []), (undefined, [blue, red])), + (multiRowStep0, [Just "BLUE+RED"])) + , ("one multiline annotation", + (7, (multiRowStep0, []), ("enc_tags", [green])), + (multiRowStep1, [Nothing])) + , ("both single-line and multiline annotations", + (7, (multiRowStep0, []), ("enc_tags", [green, blue])), + (multiRowStep1, [Just "BLUE"])) + , ("expanding a multiline annotation", + (8, (multiRowStep1, [Just "TEST"]), ("enc_tags", [green])), + (multiRowStep2, [Nothing, Just "TEST"])) + , ("mixing several single-line and multiline annotations", + (9, (multiRowStep2, [Nothing]), ("enc_tags", [red, anotherGreen, blue])), + (multiRowStep3, [Just "RED+BLUE", Nothing])) + ] + } + where + uncurry3 f (a, b, c) = f a b c + +indexTest :: Test +indexTest = Group { + groupName = "index" + , concurrently = True + , groupTests = pureTest (uncurry index) <$> [ + ("no line", (context, []), emptyDB) + , ("after 1st line", (context, [take 1 numberedRows]), dbStep0) + , ("after 2 lines", (context, [take 2 numberedRows]), dbStep1) + , ("after 3 lines", (context, [take 3 numberedRows]), dbStep2) + , ("with all lines", (context, [numberedRows]), dbStep3) + ] + } + +splitDBTest :: Test +splitDBTest = Group { + groupName = "splitDB" + , concurrently = True + , groupTests = monadicTest (runExceptT . uncurry splitDB) <$> [ + ("1st split from whole DB", + ((greenUC 0, Range 7 8), dbStep3), + Right ( + emptyDB {lineFields = linesDB 6 6} + , emptyDB {lineFields = linesDB 7 8} + , emptyDB {lineFields = linesDB 9 9} + )) + , ("overlapping annotations in DB", + ((greenUC 1, Range 6 7), dbStep3), + Left (OverlappingAnnotation (6, show $ greenUC 1) (7, show $ greenUC 0))) + ] + } + +buildTreeTest :: Test +buildTreeTest = Group { + groupName = "buildTree" + , concurrently = True + , groupTests = monadicTest evaluator <$> [ + ("1st line only", + (context, dbStep0), + Right [node (header context) ("This", "w") ["1-1", "0-4", "this", "BLUE"]]) + ] + } + where + evaluator = runExceptT . flip runReaderT defaultConfig . uncurry buildTree + +annotationsTree :: Test +annotationsTree = Group { + groupName = "annotations" + , concurrently = True + , groupTests = [indexCellTest, indexTest, splitDBTest, buildTreeTest] + } diff --git a/test/Utils.hs b/test/Utils.hs index c74d44c10e7bbabc27a05f3faff75b352a94adac..b43844264c1a6cb7d5b5b52bba43ffc26b99128c 100644 --- a/test/Utils.hs +++ b/test/Utils.hs @@ -1,9 +1,13 @@ {-# LANGUAGE NamedFieldPuns #-} module Utils ( - simpleTest + diff + , monadicTest + , pureTest + , simpleTest ) where -import Distribution.TestSuite (Progress, Test(..), TestInstance(..)) +import Distribution.TestSuite (Progress(..), Result(..), Test(..), TestInstance(..)) +import Text.Printf (printf) simpleTest :: String -> IO Progress -> Test simpleTest name run = Test $ TestInstance { @@ -13,3 +17,17 @@ simpleTest name run = Test $ TestInstance { , options = [] , setOption = \_ _ -> Left "Options not supported for simpleTest" } + +diff :: Show a => a -> a -> String +diff a = printf "Result differs from expectations: %s vs %s" (show a) . show + +pureTest :: (Eq b, Show b) => (a -> b) -> (String, a, b) -> Test +pureTest f = monadicTest (pure . f) + +monadicTest :: (Eq b, Show b) => (a -> IO b) -> (String, a, b) -> Test +monadicTest f (name, input, expected) = simpleTest name $ do + actual <- f input + pure . Finished $ + if actual == expected + then Pass + else Fail $ diff actual expected diff --git a/test/source/hey bulldog.webanno.tsv b/test/source/hey bulldog.webanno.tsv index 5db3180b0c93e8b86eab282e88401476bff66231..d4d46728826e429ec2e962abc96a983a975c6f19 100644 --- a/test/source/hey bulldog.webanno.tsv +++ b/test/source/hey bulldog.webanno.tsv @@ -1,23 +1,24 @@ #FORMAT=WebAnno TSV 3.2 #T_SP=webanno.custom.LEMMA|LEMMA #T_SP=webanno.custom.POS|POS +#T_SP=webanno.custom.SYNTAX|SYNTAX #Text=You can talk to me. -1-1 0-3 You you PRON -1-2 4-7 can can AUX -1-3 8-12 talk talk VERB -1-4 13-15 to to ADP -1-5 16-18 me I PRON -1-6 18-19 . . PUNCT +1-1 0-3 You you PRON _ +1-2 4-7 can can AUX _ +1-3 8-12 talk talk VERB _ +1-4 13-15 to to ADP _ +1-5 16-18 me I PRON _ +1-6 18-19 . . PUNCT _ #Text=If you're lonely you can talk to me. -2-1 0-2 If if SCONJ -2-2 3-6 you you PRON -2-3 6-9 're be AUX -2-4 10-16 lonely lonely ADJ -2-5 17-20 you you PRON -2-6 21-24 can can AUX -2-7 25-29 talk talk VERB -2-8 30-32 to to ADP -2-9 32-34 me I PRON -2-10 34-35 . . PUNCT +2-1 0-2 If if SCONJ CONDITION[0] +2-2 3-6 you you PRON CONDITION[0]|HYPOTHESIS[1] +2-3 6-9 're be AUX CONDITION[0]|HYPOTHESIS[1] +2-4 10-16 lonely lonely ADJ CONDITION[0]|HYPOTHESIS[1] +2-5 17-20 you you PRON CONDITION[0]|CONCLUSION[2] +2-6 21-24 can can AUX CONDITION[0]|CONCLUSION[2] +2-7 25-29 talk talk VERB CONDITION[0]|CONCLUSION[2] +2-8 30-32 to to ADP CONDITION[0]|CONCLUSION[2] +2-9 32-34 me I PRON CONDITION[0]|CONCLUSION[2] +2-10 34-35 . . PUNCT _