From 5e2080924755ec1b963d24b09e0929827dfb4879 Mon Sep 17 00:00:00 2001
From: Alice BRENON <alice.brenon@ens-lyon.fr>
Date: Fri, 29 Oct 2021 15:24:42 +0200
Subject: [PATCH] Add regression tests

---
 lib/Text/TEIWA/Annotation/Data.hs   |   6 +-
 teiwa.cabal                         |  17 +++-
 test/Mock/Annotation.hs             | 123 ++++++++++++++++++++++++++++
 test/Regression.hs                  |  10 +++
 test/Sources.hs                     |  51 ++++++++++++
 test/Utils.hs                       |  15 ++++
 test/source/hey bulldog.cnl         |  21 +++++
 test/source/hey bulldog.csv         |  17 ++++
 test/source/hey bulldog.tsv         |  17 ++++
 test/source/hey bulldog.webanno.tsv |  23 ++++++
 10 files changed, 296 insertions(+), 4 deletions(-)
 create mode 100644 test/Mock/Annotation.hs
 create mode 100644 test/Regression.hs
 create mode 100644 test/Sources.hs
 create mode 100644 test/Utils.hs
 create mode 100644 test/source/hey bulldog.cnl
 create mode 100644 test/source/hey bulldog.csv
 create mode 100644 test/source/hey bulldog.tsv
 create mode 100644 test/source/hey bulldog.webanno.tsv

diff --git a/lib/Text/TEIWA/Annotation/Data.hs b/lib/Text/TEIWA/Annotation/Data.hs
index 1599c61..9093508 100644
--- a/lib/Text/TEIWA/Annotation/Data.hs
+++ b/lib/Text/TEIWA/Annotation/Data.hs
@@ -15,12 +15,12 @@ import Text.Printf (printf)
 data Tag = Tag {
       name :: Text
     , annotated :: Attributes
-  } deriving Show
+  } deriving (Eq, Show)
 data Node = Node {
       tag :: Tag
     , inside :: Annotation
-  } deriving Show
-data Annotation = Token Text | Annotations [Node] deriving Show
+  } deriving (Eq, Show)
+data Annotation = Token Text | Annotations [Node] deriving (Eq, Show)
 type Format = TEIWAParser Annotation
 
 openTag :: Tag -> Text
diff --git a/teiwa.cabal b/teiwa.cabal
index ded3950..e810e7a 100644
--- a/teiwa.cabal
+++ b/teiwa.cabal
@@ -24,9 +24,9 @@ extra-source-files:  CHANGELOG.md
 
 library
   exposed-modules:     Text.TEIWA
+                     , Text.TEIWA.Annotation.Data
   other-modules:       Text.TEIWA.Annotation
                      , Text.TEIWA.Annotation.Context
-                     , Text.TEIWA.Annotation.Data
                      , Text.TEIWA.Annotation.Editor
                      , Text.TEIWA.Config
                      , Text.TEIWA.Error
@@ -59,3 +59,18 @@ executable teiwa
   hs-source-dirs:      app
   default-language:    Haskell2010
   ghc-options:         -Wall
+
+test-suite regression
+  type:                detailed-0.9
+  test-module:         Regression
+  other-modules:       Mock.Annotation
+                     , Sources
+                     , Utils
+  build-depends:       base
+                     , Cabal
+                     , mtl
+                     , teiwa
+                     , text
+  hs-source-dirs:      test
+  ghc-options:         -Wall
+  default-language:    Haskell2010
diff --git a/test/Mock/Annotation.hs b/test/Mock/Annotation.hs
new file mode 100644
index 0000000..b327d60
--- /dev/null
+++ b/test/Mock/Annotation.hs
@@ -0,0 +1,123 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Mock.Annotation (
+      coNLLX
+    , ssv
+    , webAnno
+  ) where
+
+import Data.Text.Lazy (Text, pack)
+import Text.TEIWA.Annotation.Data (Annotation(..), Node(..), Tag(..))
+
+freshIDs :: [Int]
+freshIDs = [1..]
+
+labelize :: (Text, Text) -> Node -> Node
+labelize attribute someNode = someNode {
+    tag = tagged {annotated = attribute:(annotated tagged)}
+  }
+  where
+    tagged = tag someNode
+
+setIDs :: Maybe Int -> [Node] -> [Node]
+setIDs sentenceID = zipWith labelize ((,) "ID" . prefix <$> freshIDs)
+  where
+    prefix = pack . maybe id ((++) . (++ "-") . show) sentenceID . show
+
+node :: [Text] -> (Text, Text) -> [Text] -> Node
+node header (form, teiTag) attributeValues =
+  Node (Tag teiTag $ zip header attributeValues) (Token form)
+
+sentence :: [Node] -> Node
+sentence = Node (Tag "s" []) . Annotations
+
+sentences :: [[(Text, Text)]]
+sentences = [[
+      ("You", "w")
+    , ("can", "w")
+    , ("talk", "w")
+    , ("to", "w")
+    , ("me", "w")
+    , (".", "pc")
+  ], [
+      ("If", "w")
+    , ("you", "w")
+    , ("'re", "w")
+    , ("lonely", "w")
+    , ("you", "w")
+    , ("can", "w")
+    , ("talk", "w")
+    , ("to", "w")
+    , ("me", "w")
+    , (".", "pc")
+  ]]
+
+coNLLX :: Annotation
+coNLLX = Annotations $
+  (sentence . setIDs Nothing) <$> zipWith (zipWith mkNode) sentences [[
+        ["you", "PRON"]
+      , ["can", "AUX"]
+      , ["talk", "VERB"]
+      , ["to", "ADP"]
+      , ["I", "PRON"]
+      , [".", "PUNCT"]
+    ], [
+        ["if", "SCONJ"]
+      , ["you", "PRON"]
+      , ["be", "AUX"]
+      , ["lonely", "ADJ"]
+      , ["you", "PRON"]
+      , ["can", "AUX"]
+      , ["talk", "VERB"]
+      , ["to", "ADP"]
+      , ["I", "PRON"]
+      , [".", "PUNCT"]
+    ]]
+  where
+    mkNode = node ["LEMMA", "CPOSTAG"]
+
+ssv :: Annotation
+ssv = Annotations $ zipWith (node ["LEMMA", "POS"]) (concat sentences) [
+      ["you", "PRON"]
+    , ["can", "AUX"]
+    , ["talk", "VERB"]
+    , ["to", "ADP"]
+    , ["I", "PRON"]
+    , [".", "PUNCT"]
+    , ["if", "SCONJ"]
+    , ["you", "PRON"]
+    , ["be", "AUX"]
+    , ["lonely", "ADJ"]
+    , ["you", "PRON"]
+    , ["can", "AUX"]
+    , ["talk", "VERB"]
+    , ["to", "ADP"]
+    , ["I", "PRON"]
+    , [".", "PUNCT"]
+  ]
+
+webAnno :: Annotation
+webAnno = Annotations $
+  sentence <$>
+    zipWith (setIDs . Just) freshIDs (
+        zipWith (zipWith mkNode) sentences [[
+              ["0-3", "you", "PRON"]
+            , ["4-7", "can", "AUX"]
+            , ["8-12", "talk", "VERB"]
+            , ["13-15", "to", "ADP"]
+            , ["16-18", "I", "PRON"]
+            , ["18-19", ".", "PUNCT"]
+          ], [
+              ["0-2", "if", "SCONJ"]
+            , ["3-6", "you", "PRON"]
+            , ["6-9", "be", "AUX"]
+            , ["10-16", "lonely", "ADJ"]
+            , ["17-20", "you", "PRON"]
+            , ["21-24", "can", "AUX"]
+            , ["25-29", "talk", "VERB"]
+            , ["30-32", "to", "ADP"]
+            , ["32-34", "I", "PRON"]
+            , ["34-35", ".", "PUNCT"]
+          ]]
+      )
+  where
+    mkNode = node ["SPAN", "LEMMA", "POS"]
diff --git a/test/Regression.hs b/test/Regression.hs
new file mode 100644
index 0000000..def8e06
--- /dev/null
+++ b/test/Regression.hs
@@ -0,0 +1,10 @@
+module Regression (
+    tests
+  ) where
+
+import Distribution.TestSuite (Test)
+import Sources (parsing)
+
+
+tests :: IO [Test]
+tests = pure [Sources.parsing]
diff --git a/test/Sources.hs b/test/Sources.hs
new file mode 100644
index 0000000..9866537
--- /dev/null
+++ b/test/Sources.hs
@@ -0,0 +1,51 @@
+{-# LANGUAGE NamedFieldPuns #-}
+module Sources (
+    parsing
+  ) where
+
+import Control.Monad.Except (runExceptT)
+import Distribution.TestSuite (Progress(..), Test(..), Result(..))
+import qualified Mock.Annotation as Annotation (coNLLX, ssv, webAnno)
+import Text.Printf (printf)
+import Text.TEIWA (
+      Format, Origin(..), Source(..), coNLLX, csv, defaultConfig, parse, tsv
+    , webAnno
+  )
+import Text.TEIWA.Annotation.Data (Annotation)
+import Utils (simpleTest)
+
+data TestCase = TestCase {
+      label :: String
+    , caseFormat :: Format
+    , file :: FilePath
+    , expected :: Annotation
+  }
+
+testCases :: [TestCase]
+testCases = [
+      TestCase "CSV" csv "test/source/hey bulldog.csv" Annotation.ssv
+    , TestCase "TSV" tsv "test/source/hey bulldog.tsv" Annotation.ssv
+    , TestCase "CoNLL-X" coNLLX "test/source/hey bulldog.cnl" Annotation.coNLLX
+    , TestCase "WebAnno"
+        webAnno "test/source/hey bulldog.webanno.tsv" Annotation.webAnno
+  ]
+
+testSource :: TestCase -> Test
+testSource (TestCase {label, caseFormat, file, expected}) = simpleTest label $ do
+  actual <- runExceptT . parse defaultConfig . Source caseFormat $ File file
+  pure . Finished $
+    case actual of
+      Left reason -> Fail $ show reason
+      Right annotation ->
+        if annotation == expected then Pass else Fail $ diff annotation
+  where
+    expectedS = show expected
+    diff a =
+      printf "Result differs from expectations: %s vs %s" (show a) expectedS
+
+parsing :: Test
+parsing = Group {
+      groupName = "sources parsing"
+    , concurrently = True
+    , groupTests = testSource <$> testCases
+  }
diff --git a/test/Utils.hs b/test/Utils.hs
new file mode 100644
index 0000000..c74d44c
--- /dev/null
+++ b/test/Utils.hs
@@ -0,0 +1,15 @@
+{-# LANGUAGE NamedFieldPuns #-}
+module Utils (
+    simpleTest
+  ) where
+
+import Distribution.TestSuite (Progress, Test(..), TestInstance(..))
+
+simpleTest :: String -> IO Progress -> Test
+simpleTest name run = Test $ TestInstance {
+      run
+    , name
+    , tags = []
+    , options = []
+    , setOption = \_ _ -> Left "Options not supported for simpleTest"
+  }
diff --git a/test/source/hey bulldog.cnl b/test/source/hey bulldog.cnl
new file mode 100644
index 0000000..361cd11
--- /dev/null
+++ b/test/source/hey bulldog.cnl	
@@ -0,0 +1,21 @@
+# sent_id = 1
+# text = You can talk to me.
+1	You	you	PRON	_	_	_	_	_	_
+2	can	can	AUX	_	_	_	_	_	_
+3	talk	talk	VERB	_	_	_	_	_	_
+4	to	to	ADP	_	_	_	_	_	_
+5	me	I	PRON	_	_	_	_	_	_
+6	.	.	PUNCT	_	_	_	_	_	_
+
+# sent_id = 2
+# text = If you're lonely you can talk to me.
+1	If	if	SCONJ	_	_	_	_	_	_
+2	you	you	PRON	_	_	_	_	_	_
+3	're	be	AUX	_	_	_	_	_	_
+4	lonely	lonely	ADJ	_	_	_	_	_	_
+5	you	you	PRON	_	_	_	_	_	_
+6	can	can	AUX	_	_	_	_	_	_
+7	talk	talk	VERB	_	_	_	_	_	_
+8	to	to	ADP	_	_	_	_	_	_
+9	me	I	PRON	_	_	_	_	_	_
+10	.	.	PUNCT	_	_	_	_	_	_
diff --git a/test/source/hey bulldog.csv b/test/source/hey bulldog.csv
new file mode 100644
index 0000000..2371522
--- /dev/null
+++ b/test/source/hey bulldog.csv	
@@ -0,0 +1,17 @@
+FORM,LEMMA,POS
+You,you,PRON
+can,can,AUX
+talk,talk,VERB
+to,to,ADP
+me,I,PRON
+.,.,PUNCT
+If,if,SCONJ
+you,you,PRON
+'re,be,AUX
+lonely,lonely,ADJ
+you,you,PRON
+can,can,AUX
+talk,talk,VERB
+to,to,ADP
+me,I,PRON
+.,.,PUNCT
diff --git a/test/source/hey bulldog.tsv b/test/source/hey bulldog.tsv
new file mode 100644
index 0000000..8f3b4cf
--- /dev/null
+++ b/test/source/hey bulldog.tsv	
@@ -0,0 +1,17 @@
+FORM	LEMMA	POS
+You	you	PRON
+can	can	AUX
+talk	talk	VERB
+to	to	ADP
+me	I	PRON
+.	.	PUNCT
+If	if	SCONJ
+you	you	PRON
+'re	be	AUX
+lonely	lonely	ADJ
+you	you	PRON
+can	can	AUX
+talk	talk	VERB
+to	to	ADP
+me	I	PRON
+.	.	PUNCT
diff --git a/test/source/hey bulldog.webanno.tsv b/test/source/hey bulldog.webanno.tsv
new file mode 100644
index 0000000..5db3180
--- /dev/null
+++ b/test/source/hey bulldog.webanno.tsv	
@@ -0,0 +1,23 @@
+#FORMAT=WebAnno TSV 3.2
+#T_SP=webanno.custom.LEMMA|LEMMA
+#T_SP=webanno.custom.POS|POS
+
+#Text=You can talk to me.
+1-1	0-3	You	you	PRON	
+1-2	4-7	can	can	AUX	
+1-3	8-12	talk	talk	VERB	
+1-4	13-15	to	to	ADP	
+1-5	16-18	me	I	PRON	
+1-6	18-19	.	.	PUNCT	
+
+#Text=If you're lonely you can talk to me.
+2-1	0-2	If	if	SCONJ	
+2-2	3-6	you	you	PRON	
+2-3	6-9	're	be	AUX	
+2-4	10-16	lonely	lonely	ADJ	
+2-5	17-20	you	you	PRON	
+2-6	21-24	can	can	AUX	
+2-7	25-29	talk	talk	VERB	
+2-8	30-32	to	to	ADP	
+2-9	32-34	me	I	PRON	
+2-10	34-35	.	.	PUNCT	
-- 
GitLab