From 5e2080924755ec1b963d24b09e0929827dfb4879 Mon Sep 17 00:00:00 2001 From: Alice BRENON <alice.brenon@ens-lyon.fr> Date: Fri, 29 Oct 2021 15:24:42 +0200 Subject: [PATCH] Add regression tests --- lib/Text/TEIWA/Annotation/Data.hs | 6 +- teiwa.cabal | 17 +++- test/Mock/Annotation.hs | 123 ++++++++++++++++++++++++++++ test/Regression.hs | 10 +++ test/Sources.hs | 51 ++++++++++++ test/Utils.hs | 15 ++++ test/source/hey bulldog.cnl | 21 +++++ test/source/hey bulldog.csv | 17 ++++ test/source/hey bulldog.tsv | 17 ++++ test/source/hey bulldog.webanno.tsv | 23 ++++++ 10 files changed, 296 insertions(+), 4 deletions(-) create mode 100644 test/Mock/Annotation.hs create mode 100644 test/Regression.hs create mode 100644 test/Sources.hs create mode 100644 test/Utils.hs create mode 100644 test/source/hey bulldog.cnl create mode 100644 test/source/hey bulldog.csv create mode 100644 test/source/hey bulldog.tsv create mode 100644 test/source/hey bulldog.webanno.tsv diff --git a/lib/Text/TEIWA/Annotation/Data.hs b/lib/Text/TEIWA/Annotation/Data.hs index 1599c61..9093508 100644 --- a/lib/Text/TEIWA/Annotation/Data.hs +++ b/lib/Text/TEIWA/Annotation/Data.hs @@ -15,12 +15,12 @@ import Text.Printf (printf) data Tag = Tag { name :: Text , annotated :: Attributes - } deriving Show + } deriving (Eq, Show) data Node = Node { tag :: Tag , inside :: Annotation - } deriving Show -data Annotation = Token Text | Annotations [Node] deriving Show + } deriving (Eq, Show) +data Annotation = Token Text | Annotations [Node] deriving (Eq, Show) type Format = TEIWAParser Annotation openTag :: Tag -> Text diff --git a/teiwa.cabal b/teiwa.cabal index ded3950..e810e7a 100644 --- a/teiwa.cabal +++ b/teiwa.cabal @@ -24,9 +24,9 @@ extra-source-files: CHANGELOG.md library exposed-modules: Text.TEIWA + , Text.TEIWA.Annotation.Data other-modules: Text.TEIWA.Annotation , Text.TEIWA.Annotation.Context - , Text.TEIWA.Annotation.Data , Text.TEIWA.Annotation.Editor , Text.TEIWA.Config , Text.TEIWA.Error @@ -59,3 +59,18 @@ executable teiwa hs-source-dirs: app default-language: Haskell2010 ghc-options: -Wall + +test-suite regression + type: detailed-0.9 + test-module: Regression + other-modules: Mock.Annotation + , Sources + , Utils + build-depends: base + , Cabal + , mtl + , teiwa + , text + hs-source-dirs: test + ghc-options: -Wall + default-language: Haskell2010 diff --git a/test/Mock/Annotation.hs b/test/Mock/Annotation.hs new file mode 100644 index 0000000..b327d60 --- /dev/null +++ b/test/Mock/Annotation.hs @@ -0,0 +1,123 @@ +{-# LANGUAGE OverloadedStrings #-} +module Mock.Annotation ( + coNLLX + , ssv + , webAnno + ) where + +import Data.Text.Lazy (Text, pack) +import Text.TEIWA.Annotation.Data (Annotation(..), Node(..), Tag(..)) + +freshIDs :: [Int] +freshIDs = [1..] + +labelize :: (Text, Text) -> Node -> Node +labelize attribute someNode = someNode { + tag = tagged {annotated = attribute:(annotated tagged)} + } + where + tagged = tag someNode + +setIDs :: Maybe Int -> [Node] -> [Node] +setIDs sentenceID = zipWith labelize ((,) "ID" . prefix <$> freshIDs) + where + prefix = pack . maybe id ((++) . (++ "-") . show) sentenceID . show + +node :: [Text] -> (Text, Text) -> [Text] -> Node +node header (form, teiTag) attributeValues = + Node (Tag teiTag $ zip header attributeValues) (Token form) + +sentence :: [Node] -> Node +sentence = Node (Tag "s" []) . Annotations + +sentences :: [[(Text, Text)]] +sentences = [[ + ("You", "w") + , ("can", "w") + , ("talk", "w") + , ("to", "w") + , ("me", "w") + , (".", "pc") + ], [ + ("If", "w") + , ("you", "w") + , ("'re", "w") + , ("lonely", "w") + , ("you", "w") + , ("can", "w") + , ("talk", "w") + , ("to", "w") + , ("me", "w") + , (".", "pc") + ]] + +coNLLX :: Annotation +coNLLX = Annotations $ + (sentence . setIDs Nothing) <$> zipWith (zipWith mkNode) sentences [[ + ["you", "PRON"] + , ["can", "AUX"] + , ["talk", "VERB"] + , ["to", "ADP"] + , ["I", "PRON"] + , [".", "PUNCT"] + ], [ + ["if", "SCONJ"] + , ["you", "PRON"] + , ["be", "AUX"] + , ["lonely", "ADJ"] + , ["you", "PRON"] + , ["can", "AUX"] + , ["talk", "VERB"] + , ["to", "ADP"] + , ["I", "PRON"] + , [".", "PUNCT"] + ]] + where + mkNode = node ["LEMMA", "CPOSTAG"] + +ssv :: Annotation +ssv = Annotations $ zipWith (node ["LEMMA", "POS"]) (concat sentences) [ + ["you", "PRON"] + , ["can", "AUX"] + , ["talk", "VERB"] + , ["to", "ADP"] + , ["I", "PRON"] + , [".", "PUNCT"] + , ["if", "SCONJ"] + , ["you", "PRON"] + , ["be", "AUX"] + , ["lonely", "ADJ"] + , ["you", "PRON"] + , ["can", "AUX"] + , ["talk", "VERB"] + , ["to", "ADP"] + , ["I", "PRON"] + , [".", "PUNCT"] + ] + +webAnno :: Annotation +webAnno = Annotations $ + sentence <$> + zipWith (setIDs . Just) freshIDs ( + zipWith (zipWith mkNode) sentences [[ + ["0-3", "you", "PRON"] + , ["4-7", "can", "AUX"] + , ["8-12", "talk", "VERB"] + , ["13-15", "to", "ADP"] + , ["16-18", "I", "PRON"] + , ["18-19", ".", "PUNCT"] + ], [ + ["0-2", "if", "SCONJ"] + , ["3-6", "you", "PRON"] + , ["6-9", "be", "AUX"] + , ["10-16", "lonely", "ADJ"] + , ["17-20", "you", "PRON"] + , ["21-24", "can", "AUX"] + , ["25-29", "talk", "VERB"] + , ["30-32", "to", "ADP"] + , ["32-34", "I", "PRON"] + , ["34-35", ".", "PUNCT"] + ]] + ) + where + mkNode = node ["SPAN", "LEMMA", "POS"] diff --git a/test/Regression.hs b/test/Regression.hs new file mode 100644 index 0000000..def8e06 --- /dev/null +++ b/test/Regression.hs @@ -0,0 +1,10 @@ +module Regression ( + tests + ) where + +import Distribution.TestSuite (Test) +import Sources (parsing) + + +tests :: IO [Test] +tests = pure [Sources.parsing] diff --git a/test/Sources.hs b/test/Sources.hs new file mode 100644 index 0000000..9866537 --- /dev/null +++ b/test/Sources.hs @@ -0,0 +1,51 @@ +{-# LANGUAGE NamedFieldPuns #-} +module Sources ( + parsing + ) where + +import Control.Monad.Except (runExceptT) +import Distribution.TestSuite (Progress(..), Test(..), Result(..)) +import qualified Mock.Annotation as Annotation (coNLLX, ssv, webAnno) +import Text.Printf (printf) +import Text.TEIWA ( + Format, Origin(..), Source(..), coNLLX, csv, defaultConfig, parse, tsv + , webAnno + ) +import Text.TEIWA.Annotation.Data (Annotation) +import Utils (simpleTest) + +data TestCase = TestCase { + label :: String + , caseFormat :: Format + , file :: FilePath + , expected :: Annotation + } + +testCases :: [TestCase] +testCases = [ + TestCase "CSV" csv "test/source/hey bulldog.csv" Annotation.ssv + , TestCase "TSV" tsv "test/source/hey bulldog.tsv" Annotation.ssv + , TestCase "CoNLL-X" coNLLX "test/source/hey bulldog.cnl" Annotation.coNLLX + , TestCase "WebAnno" + webAnno "test/source/hey bulldog.webanno.tsv" Annotation.webAnno + ] + +testSource :: TestCase -> Test +testSource (TestCase {label, caseFormat, file, expected}) = simpleTest label $ do + actual <- runExceptT . parse defaultConfig . Source caseFormat $ File file + pure . Finished $ + case actual of + Left reason -> Fail $ show reason + Right annotation -> + if annotation == expected then Pass else Fail $ diff annotation + where + expectedS = show expected + diff a = + printf "Result differs from expectations: %s vs %s" (show a) expectedS + +parsing :: Test +parsing = Group { + groupName = "sources parsing" + , concurrently = True + , groupTests = testSource <$> testCases + } diff --git a/test/Utils.hs b/test/Utils.hs new file mode 100644 index 0000000..c74d44c --- /dev/null +++ b/test/Utils.hs @@ -0,0 +1,15 @@ +{-# LANGUAGE NamedFieldPuns #-} +module Utils ( + simpleTest + ) where + +import Distribution.TestSuite (Progress, Test(..), TestInstance(..)) + +simpleTest :: String -> IO Progress -> Test +simpleTest name run = Test $ TestInstance { + run + , name + , tags = [] + , options = [] + , setOption = \_ _ -> Left "Options not supported for simpleTest" + } diff --git a/test/source/hey bulldog.cnl b/test/source/hey bulldog.cnl new file mode 100644 index 0000000..361cd11 --- /dev/null +++ b/test/source/hey bulldog.cnl @@ -0,0 +1,21 @@ +# sent_id = 1 +# text = You can talk to me. +1 You you PRON _ _ _ _ _ _ +2 can can AUX _ _ _ _ _ _ +3 talk talk VERB _ _ _ _ _ _ +4 to to ADP _ _ _ _ _ _ +5 me I PRON _ _ _ _ _ _ +6 . . PUNCT _ _ _ _ _ _ + +# sent_id = 2 +# text = If you're lonely you can talk to me. +1 If if SCONJ _ _ _ _ _ _ +2 you you PRON _ _ _ _ _ _ +3 're be AUX _ _ _ _ _ _ +4 lonely lonely ADJ _ _ _ _ _ _ +5 you you PRON _ _ _ _ _ _ +6 can can AUX _ _ _ _ _ _ +7 talk talk VERB _ _ _ _ _ _ +8 to to ADP _ _ _ _ _ _ +9 me I PRON _ _ _ _ _ _ +10 . . PUNCT _ _ _ _ _ _ diff --git a/test/source/hey bulldog.csv b/test/source/hey bulldog.csv new file mode 100644 index 0000000..2371522 --- /dev/null +++ b/test/source/hey bulldog.csv @@ -0,0 +1,17 @@ +FORM,LEMMA,POS +You,you,PRON +can,can,AUX +talk,talk,VERB +to,to,ADP +me,I,PRON +.,.,PUNCT +If,if,SCONJ +you,you,PRON +'re,be,AUX +lonely,lonely,ADJ +you,you,PRON +can,can,AUX +talk,talk,VERB +to,to,ADP +me,I,PRON +.,.,PUNCT diff --git a/test/source/hey bulldog.tsv b/test/source/hey bulldog.tsv new file mode 100644 index 0000000..8f3b4cf --- /dev/null +++ b/test/source/hey bulldog.tsv @@ -0,0 +1,17 @@ +FORM LEMMA POS +You you PRON +can can AUX +talk talk VERB +to to ADP +me I PRON +. . PUNCT +If if SCONJ +you you PRON +'re be AUX +lonely lonely ADJ +you you PRON +can can AUX +talk talk VERB +to to ADP +me I PRON +. . PUNCT diff --git a/test/source/hey bulldog.webanno.tsv b/test/source/hey bulldog.webanno.tsv new file mode 100644 index 0000000..5db3180 --- /dev/null +++ b/test/source/hey bulldog.webanno.tsv @@ -0,0 +1,23 @@ +#FORMAT=WebAnno TSV 3.2 +#T_SP=webanno.custom.LEMMA|LEMMA +#T_SP=webanno.custom.POS|POS + +#Text=You can talk to me. +1-1 0-3 You you PRON +1-2 4-7 can can AUX +1-3 8-12 talk talk VERB +1-4 13-15 to to ADP +1-5 16-18 me I PRON +1-6 18-19 . . PUNCT + +#Text=If you're lonely you can talk to me. +2-1 0-2 If if SCONJ +2-2 3-6 you you PRON +2-3 6-9 're be AUX +2-4 10-16 lonely lonely ADJ +2-5 17-20 you you PRON +2-6 21-24 can can AUX +2-7 25-29 talk talk VERB +2-8 30-32 to to ADP +2-9 32-34 me I PRON +2-10 34-35 . . PUNCT -- GitLab