From 5b2ee7c7eefa9f96ffc89ed7638f5e47064df1d7 Mon Sep 17 00:00:00 2001 From: Alice BRENON <alice.brenon@ens-lyon.fr> Date: Tue, 16 Jan 2024 16:33:05 +0100 Subject: [PATCH] Generalize TSV read/write implementation with class types --- lib/GEODE/Metadata/TSV.hs | 143 +++++++++++++++++++++++++++++++------- 1 file changed, 116 insertions(+), 27 deletions(-) diff --git a/lib/GEODE/Metadata/TSV.hs b/lib/GEODE/Metadata/TSV.hs index 26fd950..42e0796 100644 --- a/lib/GEODE/Metadata/TSV.hs +++ b/lib/GEODE/Metadata/TSV.hs @@ -1,41 +1,130 @@ -{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE FlexibleInstances, MultiParamTypeClasses, NamedFieldPuns, TypeSynonymInstances #-} module GEODE.Metadata.TSV - ( fromTsv - , readNamedTsv - , readTsv - , toTsv - , tsvFile - , tsvLines ) where - -import Data.ByteString.Lazy as ByteString (putStr, readFile, writeFile) + ( Document(..) + , ReadTSV(..) + , Result + , WriteTSV(..) + , fromTSV + , toTSV ) where + +import Control.Monad.Except (ExceptT(..)) +import Data.ByteString.Lazy as ByteString + ( ByteString, getContents, putStr, readFile, writeFile ) import Data.Csv ( DecodeOptions(..), EncodeOptions(..), FromNamedRecord(..), FromRecord - , HasHeader(..), ToNamedRecord(..), ToRecord(..), decodeByNameWith + , HasHeader(..), Header, ToNamedRecord(..), ToRecord(..), decodeByNameWith , decodeWith, defaultEncodeOptions, encodeByNameWith, encodeWith ) import Data.Foldable (toList) import Data.Vector (Vector) -import GEODE.Metadata.TSV.Header (HasDefaultHeader, getHeader, for) +import System.Exit (die) + +-- * General definitions +-- +-- | A data type to represent documents, with their Header and rows represented +-- contained in a Vector +data Document a = Document + { header :: Header + , rows :: Vector a } + deriving Show +type Result = ExceptT String IO + +-- * Reading TSV data +-- +-- | Typeclass of contexts from which a TSV file can be read +class ReadTSV s t a where + readTSV :: s -> t a + +-- | The DecodeOptions for the TSV format +fromTSV :: DecodeOptions +fromTSV = DecodeOptions {decDelimiter = fromIntegral (fromEnum '\t')} + +-- ** Reading Vectors of data which can be read from Record +-- +-- | Three functions to factor a snippet common to all instances +noHeader :: FromRecord a => ByteString -> Either String (Vector a) +noHeader = decodeWith fromTSV NoHeader + +parseRecords :: FromRecord a => IO ByteString -> Result (Vector a) +parseRecords = ExceptT . fmap noHeader + +try :: Either String a -> IO a +try = either die pure + +-- *** Reporting errors +-- | from stdin +instance FromRecord a => ReadTSV () Result (Vector a) where + readTSV _ = parseRecords $ ByteString.getContents + +-- | from a file +instance FromRecord a => ReadTSV FilePath Result (Vector a) where + readTSV = parseRecords . ByteString.readFile -fromTsv :: DecodeOptions -fromTsv = DecodeOptions {decDelimiter = fromIntegral (fromEnum '\t')} +-- *** And making any error fatal +-- | from stdin +instance FromRecord a => ReadTSV () IO (Vector a) where + readTSV _ = noHeader <$> ByteString.getContents >>= try -readNamedTsv :: FromNamedRecord a => FilePath -> IO (Either String (Vector a)) -readNamedTsv source = - (fmap snd . decodeByNameWith fromTsv) <$> ByteString.readFile source +-- | from a file +instance FromRecord a => ReadTSV FilePath IO (Vector a) where + readTSV path = noHeader <$> ByteString.readFile path >>= try -readTsv :: FromRecord a => FilePath -> IO (Either String (Vector a)) -readTsv source = decodeWith fromTsv NoHeader <$> ByteString.readFile source +-- ** Now the same instances for data which can be read from NamedRecord +-- +-- | Two functions to factor a snippet common to all instances +named :: FromNamedRecord a => ByteString -> Either String (Document a) +named = fmap (uncurry Document) . decodeByNameWith fromTSV -toTsv :: EncodeOptions -toTsv = defaultEncodeOptions +parseNamedRecords :: FromNamedRecord a => IO ByteString -> Result (Document a) +parseNamedRecords = ExceptT . fmap named + +-- *** Reporting errors +-- | from stdin +instance FromNamedRecord a => ReadTSV () Result (Document a) where + readTSV _ = parseNamedRecords $ ByteString.getContents + +-- | from a file +instance FromNamedRecord a => ReadTSV FilePath Result (Document a) where + readTSV = parseNamedRecords . ByteString.readFile + +-- *** And making any error fatal +-- | from stdin +instance FromNamedRecord a => ReadTSV () IO (Document a) where + readTSV _ = named <$> ByteString.getContents >>= try + +-- | from a file +instance FromNamedRecord a => ReadTSV FilePath IO (Document a) where + readTSV path = named <$> ByteString.readFile path >>= try + +-- * Writing TSV data +-- +-- | A class type to represent processes which output TSV +class WriteTSV d t a where + writeTSV :: d -> a -> t () + +-- | The EncodeOptions for the TSV format +toTSV :: EncodeOptions +toTSV = defaultEncodeOptions { encDelimiter = fromIntegral (fromEnum '\t') , encUseCrLf = False } -tsvFile :: forall a t. (Foldable t, HasDefaultHeader a, ToNamedRecord a) => - FilePath -> t a -> IO () -tsvFile target = ByteString.writeFile target . encode . toList - where - encode = encodeByNameWith toTsv (getHeader (for :: a)) +-- ** We know how to handle data which can be written to a Record +-- +-- | to stdout +instance {-# OVERLAPPABLE #-} (Foldable t, ToRecord a) => WriteTSV () IO (t a) where + writeTSV _ = ByteString.putStr . encodeWith toTSV . toList + +-- | to a file +instance {-# OVERLAPPABLE #-} (Foldable t, ToRecord a) => WriteTSV FilePath IO (t a) where + writeTSV path = ByteString.writeFile path . encodeWith toTSV . toList + +-- ** And we can also handle data which can be written to a NamedRecord +-- +-- | to stdout +instance ToNamedRecord a => WriteTSV () IO (Document a) where + writeTSV _ (Document {header, rows}) = + ByteString.putStr . encodeByNameWith toTSV header $ toList rows -tsvLines :: (Foldable t, ToRecord a) => t a -> IO () -tsvLines = ByteString.putStr . encodeWith toTsv . toList +-- | to a file +instance ToNamedRecord a => WriteTSV FilePath IO (Document a) where + writeTSV path (Document {header, rows}) = + ByteString.writeFile path . encodeByNameWith toTSV header $ toList rows -- GitLab