diff --git a/lib/Text/TEIWA/Source/Common.hs b/lib/Text/TEIWA/Source/Common.hs index 787ac02604d02abcb6e4606b2882ce7f5183204a..ba31db2fbf7ba941144331b7b012fd515d1299a7 100644 --- a/lib/Text/TEIWA/Source/Common.hs +++ b/lib/Text/TEIWA/Source/Common.hs @@ -18,7 +18,7 @@ import Data.Char (isPunctuation) import Data.Text.Lazy as Text (Text, concat, head, length) import Text.Parsec ( Line, ParsecT, Stream, char, endOfLine, getParserState, many1, noneOf - , sourceLine, statePos, try + , sepEndBy, sourceLine, statePos, try ) import Text.TEIWA.Config (Config(..)) import Text.TEIWA.Error (Error(..)) @@ -48,7 +48,7 @@ sentence row = many comment *> many1 row comment = char '#' *> many (noneOf "\r\n") <* eol sentences :: Stream s m Char => ParsecT s u m Row -> ParsecT s u m [Sentence] -sentences row = many (many1 eol *> sentence row) +sentences row = sentence row `sepEndBy` many1 eol teiTagger :: Text -> Attributes -> Text teiTagger t _ diff --git a/lib/Text/TEIWA/Source/WebAnno.hs b/lib/Text/TEIWA/Source/WebAnno.hs index ed75a3eff5c234e5fe6c60d57e8e5a1079210ebb..f2f905b3247846c69f4f38a79f52494eaea619bf 100644 --- a/lib/Text/TEIWA/Source/WebAnno.hs +++ b/lib/Text/TEIWA/Source/WebAnno.hs @@ -23,10 +23,11 @@ header = do "ID" , "SPAN" , "FORM" - , "LABEL" + , "LEMMA" + , "POS" ] where - version = skipMany1 digit `sepBy1` char '.' *> eol + version = skipMany1 digit `sepBy1` char '.' *> eol comment = char '#' *> many (noneOf "\r\n") <* eol field :: Stream s m Char => ParsecT s u m Field @@ -45,7 +46,7 @@ webAnno :: Format webAnno = do context <- Context.ofHeader =<< header Annotations <$> ( - sentences row >>= mapM ( + (many eol *> sentences row) >>= mapM ( fmap (Node s_ . Annotations) . mapM (tagToken context) ) )