Skip to content
Snippets Groups Projects
Commit 7f8047ee authored by Alice Brenon's avatar Alice Brenon
Browse files

Fix CoNLL and WebAnno formats to pass the regression tests

parent 5e208092
No related branches found
No related tags found
No related merge requests found
...@@ -18,7 +18,7 @@ import Data.Char (isPunctuation) ...@@ -18,7 +18,7 @@ import Data.Char (isPunctuation)
import Data.Text.Lazy as Text (Text, concat, head, length) import Data.Text.Lazy as Text (Text, concat, head, length)
import Text.Parsec ( import Text.Parsec (
Line, ParsecT, Stream, char, endOfLine, getParserState, many1, noneOf Line, ParsecT, Stream, char, endOfLine, getParserState, many1, noneOf
, sourceLine, statePos, try , sepEndBy, sourceLine, statePos, try
) )
import Text.TEIWA.Config (Config(..)) import Text.TEIWA.Config (Config(..))
import Text.TEIWA.Error (Error(..)) import Text.TEIWA.Error (Error(..))
...@@ -48,7 +48,7 @@ sentence row = many comment *> many1 row ...@@ -48,7 +48,7 @@ sentence row = many comment *> many1 row
comment = char '#' *> many (noneOf "\r\n") <* eol comment = char '#' *> many (noneOf "\r\n") <* eol
sentences :: Stream s m Char => ParsecT s u m Row -> ParsecT s u m [Sentence] sentences :: Stream s m Char => ParsecT s u m Row -> ParsecT s u m [Sentence]
sentences row = many (many1 eol *> sentence row) sentences row = sentence row `sepEndBy` many1 eol
teiTagger :: Text -> Attributes -> Text teiTagger :: Text -> Attributes -> Text
teiTagger t _ teiTagger t _
......
...@@ -23,10 +23,11 @@ header = do ...@@ -23,10 +23,11 @@ header = do
"ID" "ID"
, "SPAN" , "SPAN"
, "FORM" , "FORM"
, "LABEL" , "LEMMA"
, "POS"
] ]
where where
version = skipMany1 digit `sepBy1` char '.' *> eol version = skipMany1 digit `sepBy1` char '.' *> eol
comment = char '#' *> many (noneOf "\r\n") <* eol comment = char '#' *> many (noneOf "\r\n") <* eol
field :: Stream s m Char => ParsecT s u m Field field :: Stream s m Char => ParsecT s u m Field
...@@ -45,7 +46,7 @@ webAnno :: Format ...@@ -45,7 +46,7 @@ webAnno :: Format
webAnno = do webAnno = do
context <- Context.ofHeader =<< header context <- Context.ofHeader =<< header
Annotations <$> ( Annotations <$> (
sentences row >>= mapM ( (many eol *> sentences row) >>= mapM (
fmap (Node s_ . Annotations) . mapM (tagToken context) fmap (Node s_ . Annotations) . mapM (tagToken context)
) )
) )
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment