diff --git a/lib/Text/TEIWA/Source.hs b/lib/Text/TEIWA/Source.hs index 1a1db4de22685012170b1908692eb14bec279d9d..f81d57e65dc8408f91a225d17a1c601ab298a9b2 100644 --- a/lib/Text/TEIWA/Source.hs +++ b/lib/Text/TEIWA/Source.hs @@ -13,6 +13,7 @@ module Text.TEIWA.Source ( ) where import Control.Monad.Except (MonadError(..)) +import Control.Monad.Reader (MonadReader(..), ReaderT(..)) import Control.Monad.IO.Class (MonadIO(..)) import Data.Text.Lazy as Text (Text, unpack) import Data.Text.Lazy.IO as Text (readFile) @@ -26,10 +27,10 @@ import Text.TEIWA.Source.Common (AnnotationContext(..), Row) import qualified Text.TEIWA.Source.ConLLX as ConLLX (getContext, sentences) import qualified Text.TEIWA.Source.CSV as CSV (body, getContext) -type TEIWAParser = ParsecT Text () (Either Error) -type Format = Config -> TEIWAParser Annotation +type TEIWAParser = ParsecT Text () (ReaderT Config (Either Error)) +type Format = TEIWAParser Annotation -annotateToken :: MonadError Error m => +annotateToken :: (MonadError Error m, MonadReader Config m) => AnnotationContext -> Row -> m TokenAnnotation annotateToken (AnnotationContext {columnIndex, columnName, header}) (atLine, record) = case splitAt columnIndex record of @@ -38,23 +39,24 @@ annotateToken (AnnotationContext {columnIndex, columnName, header}) (atLine, rec return $ TokenAnnotation {form, annotated = zip header (before ++ after)} coNLLX :: Format -coNLLX (Config {formColumn}) = do - context <- ConLLX.getContext formColumn +coNLLX = do + context <- ConLLX.getContext =<< reader formColumn SentenceLevel <$> ( ConLLX.sentences >>= mapM ( fmap SentenceAnnotation . mapM (annotateToken context) ) ) +ssv :: Char -> Format +ssv separator = do + context <- CSV.getContext separator =<< reader formColumn + TokenLevel <$> (CSV.body separator >>= mapM (annotateToken context)) + csv :: Format -csv (Config {formColumn}) = do - context <- CSV.getContext ',' formColumn - TokenLevel <$> (CSV.body ',' >>= mapM (annotateToken context)) +csv = ssv ',' tsv :: Format -tsv (Config {formColumn}) = do - context <- CSV.getContext '\t' formColumn - TokenLevel <$> (CSV.body '\t' >>= mapM (annotateToken context)) +tsv = ssv '\t' data Origin = File FilePath | Text Text @@ -64,14 +66,14 @@ data Source = Source { } runTEIWAParser :: MonadError Error m => - TEIWAParser a -> SourceName -> Text -> m a -runTEIWAParser p s = flattenErrors . runParserT p () s + Config -> TEIWAParser a -> SourceName -> Text -> m a +runTEIWAParser config p s = flattenErrors . (`runReaderT` config) . runParserT p () s where flattenErrors = either throwError (either (throwError . ParsingError) pure) parse :: (MonadIO m, MonadError Error m) => Config -> Source -> m Annotation -parse config (Source {format, origin}) = parseFrom (format config) origin +parse config (Source {format, origin}) = parseFrom format origin where - parseFrom p (File f) = liftIO (Text.readFile f) >>= runTEIWAParser p f - parseFrom p (Text t) = runTEIWAParser p "" t + parseFrom p (File f) = liftIO (Text.readFile f) >>= runTEIWAParser config p f + parseFrom p (Text t) = runTEIWAParser config p "" t