Skip to content
Snippets Groups Projects
Commit 4b5d0966 authored by Alice Brenon's avatar Alice Brenon
Browse files

Make Config accessible from within the TEIWAParser monad

parent 92ca859d
No related branches found
No related tags found
No related merge requests found
......@@ -13,6 +13,7 @@ module Text.TEIWA.Source (
) where
import Control.Monad.Except (MonadError(..))
import Control.Monad.Reader (MonadReader(..), ReaderT(..))
import Control.Monad.IO.Class (MonadIO(..))
import Data.Text.Lazy as Text (Text, unpack)
import Data.Text.Lazy.IO as Text (readFile)
......@@ -26,10 +27,10 @@ import Text.TEIWA.Source.Common (AnnotationContext(..), Row)
import qualified Text.TEIWA.Source.ConLLX as ConLLX (getContext, sentences)
import qualified Text.TEIWA.Source.CSV as CSV (body, getContext)
type TEIWAParser = ParsecT Text () (Either Error)
type Format = Config -> TEIWAParser Annotation
type TEIWAParser = ParsecT Text () (ReaderT Config (Either Error))
type Format = TEIWAParser Annotation
annotateToken :: MonadError Error m =>
annotateToken :: (MonadError Error m, MonadReader Config m) =>
AnnotationContext -> Row -> m TokenAnnotation
annotateToken (AnnotationContext {columnIndex, columnName, header}) (atLine, record) =
case splitAt columnIndex record of
......@@ -38,23 +39,24 @@ annotateToken (AnnotationContext {columnIndex, columnName, header}) (atLine, rec
return $ TokenAnnotation {form, annotated = zip header (before ++ after)}
coNLLX :: Format
coNLLX (Config {formColumn}) = do
context <- ConLLX.getContext formColumn
coNLLX = do
context <- ConLLX.getContext =<< reader formColumn
SentenceLevel <$> (
ConLLX.sentences >>= mapM (
fmap SentenceAnnotation . mapM (annotateToken context)
)
)
ssv :: Char -> Format
ssv separator = do
context <- CSV.getContext separator =<< reader formColumn
TokenLevel <$> (CSV.body separator >>= mapM (annotateToken context))
csv :: Format
csv (Config {formColumn}) = do
context <- CSV.getContext ',' formColumn
TokenLevel <$> (CSV.body ',' >>= mapM (annotateToken context))
csv = ssv ','
tsv :: Format
tsv (Config {formColumn}) = do
context <- CSV.getContext '\t' formColumn
TokenLevel <$> (CSV.body '\t' >>= mapM (annotateToken context))
tsv = ssv '\t'
data Origin = File FilePath | Text Text
......@@ -64,14 +66,14 @@ data Source = Source {
}
runTEIWAParser :: MonadError Error m =>
TEIWAParser a -> SourceName -> Text -> m a
runTEIWAParser p s = flattenErrors . runParserT p () s
Config -> TEIWAParser a -> SourceName -> Text -> m a
runTEIWAParser config p s = flattenErrors . (`runReaderT` config) . runParserT p () s
where
flattenErrors = either throwError (either (throwError . ParsingError) pure)
parse :: (MonadIO m, MonadError Error m) =>
Config -> Source -> m Annotation
parse config (Source {format, origin}) = parseFrom (format config) origin
parse config (Source {format, origin}) = parseFrom format origin
where
parseFrom p (File f) = liftIO (Text.readFile f) >>= runTEIWAParser p f
parseFrom p (Text t) = runTEIWAParser p "" t
parseFrom p (File f) = liftIO (Text.readFile f) >>= runTEIWAParser config p f
parseFrom p (Text t) = runTEIWAParser config p "" t
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment