{-# LANGUAGE FlexibleInstances, MultiParamTypeClasses, NamedFieldPuns, TypeSynonymInstances #-} module GEODE.Metadata.TSV ( Document(..) , ReadTSV(..) , Result , WriteTSV(..) , fromTSV , toTSV ) where import Control.Monad.Except (ExceptT(..)) import Data.ByteString.Lazy as ByteString ( ByteString, getContents, putStr, readFile, writeFile ) import Data.Csv ( DecodeOptions(..), EncodeOptions(..), FromNamedRecord(..), FromRecord , HasHeader(..), Header, ToNamedRecord(..), ToRecord(..), decodeByNameWith , decodeWith, defaultEncodeOptions, encodeByNameWith, encodeWith ) import Data.Foldable (toList) import Data.Vector (Vector) import System.Exit (die) -- * General definitions -- -- | A data type to represent documents, with their Header and rows represented -- contained in a Vector data Document a = Document { header :: Header , rows :: Vector a } deriving Show type Result = ExceptT String IO -- * Reading TSV data -- -- | Typeclass of contexts from which a TSV file can be read class ReadTSV s t a where readTSV :: s -> t a -- | The DecodeOptions for the TSV format fromTSV :: DecodeOptions fromTSV = DecodeOptions {decDelimiter = fromIntegral (fromEnum '\t')} -- ** Reading Vectors of data which can be read from Record -- -- | Three functions to factor a snippet common to all instances noHeader :: FromRecord a => ByteString -> Either String (Vector a) noHeader = decodeWith fromTSV NoHeader parseRecords :: FromRecord a => IO ByteString -> Result (Vector a) parseRecords = ExceptT . fmap noHeader try :: Either String a -> IO a try = either die pure -- *** Reporting errors -- | from stdin instance FromRecord a => ReadTSV () Result (Vector a) where readTSV _ = parseRecords $ ByteString.getContents -- | from a file instance FromRecord a => ReadTSV FilePath Result (Vector a) where readTSV = parseRecords . ByteString.readFile -- *** And making any error fatal -- | from stdin instance FromRecord a => ReadTSV () IO (Vector a) where readTSV _ = noHeader <$> ByteString.getContents >>= try -- | from a file instance FromRecord a => ReadTSV FilePath IO (Vector a) where readTSV path = noHeader <$> ByteString.readFile path >>= try -- ** Now the same instances for data which can be read from NamedRecord -- -- | Two functions to factor a snippet common to all instances named :: FromNamedRecord a => ByteString -> Either String (Document a) named = fmap (uncurry Document) . decodeByNameWith fromTSV parseNamedRecords :: FromNamedRecord a => IO ByteString -> Result (Document a) parseNamedRecords = ExceptT . fmap named -- *** Reporting errors -- | from stdin instance FromNamedRecord a => ReadTSV () Result (Document a) where readTSV _ = parseNamedRecords $ ByteString.getContents -- | from a file instance FromNamedRecord a => ReadTSV FilePath Result (Document a) where readTSV = parseNamedRecords . ByteString.readFile -- *** And making any error fatal -- | from stdin instance FromNamedRecord a => ReadTSV () IO (Document a) where readTSV _ = named <$> ByteString.getContents >>= try -- | from a file instance FromNamedRecord a => ReadTSV FilePath IO (Document a) where readTSV path = named <$> ByteString.readFile path >>= try -- * Writing TSV data -- -- | A class type to represent processes which output TSV class WriteTSV d t a where writeTSV :: d -> a -> t () -- | The EncodeOptions for the TSV format toTSV :: EncodeOptions toTSV = defaultEncodeOptions { encDelimiter = fromIntegral (fromEnum '\t') , encUseCrLf = False } -- ** We know how to handle data which can be written to a Record -- -- | to stdout instance {-# OVERLAPPABLE #-} (Foldable t, ToRecord a) => WriteTSV () IO (t a) where writeTSV _ = ByteString.putStr . encodeWith toTSV . toList -- | to a file instance {-# OVERLAPPABLE #-} (Foldable t, ToRecord a) => WriteTSV FilePath IO (t a) where writeTSV path = ByteString.writeFile path . encodeWith toTSV . toList -- ** And we can also handle data which can be written to a NamedRecord -- -- | to stdout instance ToNamedRecord a => WriteTSV () IO (Document a) where writeTSV _ (Document {header, rows}) = ByteString.putStr . encodeByNameWith toTSV header $ toList rows -- | to a file instance ToNamedRecord a => WriteTSV FilePath IO (Document a) where writeTSV path (Document {header, rows}) = ByteString.writeFile path . encodeByNameWith toTSV header $ toList rows