Skip to content
Snippets Groups Projects
Commit 5b2ee7c7 authored by Alice Brenon's avatar Alice Brenon
Browse files

Generalize TSV read/write implementation with class types

parent ed58c5cf
No related branches found
No related tags found
No related merge requests found
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE FlexibleInstances, MultiParamTypeClasses, NamedFieldPuns, TypeSynonymInstances #-}
module GEODE.Metadata.TSV module GEODE.Metadata.TSV
( fromTsv ( Document(..)
, readNamedTsv , ReadTSV(..)
, readTsv , Result
, toTsv , WriteTSV(..)
, tsvFile , fromTSV
, tsvLines ) where , toTSV ) where
import Data.ByteString.Lazy as ByteString (putStr, readFile, writeFile) import Control.Monad.Except (ExceptT(..))
import Data.ByteString.Lazy as ByteString
( ByteString, getContents, putStr, readFile, writeFile )
import Data.Csv import Data.Csv
( DecodeOptions(..), EncodeOptions(..), FromNamedRecord(..), FromRecord ( DecodeOptions(..), EncodeOptions(..), FromNamedRecord(..), FromRecord
, HasHeader(..), ToNamedRecord(..), ToRecord(..), decodeByNameWith , HasHeader(..), Header, ToNamedRecord(..), ToRecord(..), decodeByNameWith
, decodeWith, defaultEncodeOptions, encodeByNameWith, encodeWith ) , decodeWith, defaultEncodeOptions, encodeByNameWith, encodeWith )
import Data.Foldable (toList) import Data.Foldable (toList)
import Data.Vector (Vector) import Data.Vector (Vector)
import GEODE.Metadata.TSV.Header (HasDefaultHeader, getHeader, for) import System.Exit (die)
-- * General definitions
--
-- | A data type to represent documents, with their Header and rows represented
-- contained in a Vector
data Document a = Document
{ header :: Header
, rows :: Vector a }
deriving Show
type Result = ExceptT String IO
-- * Reading TSV data
--
-- | Typeclass of contexts from which a TSV file can be read
class ReadTSV s t a where
readTSV :: s -> t a
-- | The DecodeOptions for the TSV format
fromTSV :: DecodeOptions
fromTSV = DecodeOptions {decDelimiter = fromIntegral (fromEnum '\t')}
-- ** Reading Vectors of data which can be read from Record
--
-- | Three functions to factor a snippet common to all instances
noHeader :: FromRecord a => ByteString -> Either String (Vector a)
noHeader = decodeWith fromTSV NoHeader
parseRecords :: FromRecord a => IO ByteString -> Result (Vector a)
parseRecords = ExceptT . fmap noHeader
try :: Either String a -> IO a
try = either die pure
-- *** Reporting errors
-- | from stdin
instance FromRecord a => ReadTSV () Result (Vector a) where
readTSV _ = parseRecords $ ByteString.getContents
-- | from a file
instance FromRecord a => ReadTSV FilePath Result (Vector a) where
readTSV = parseRecords . ByteString.readFile
fromTsv :: DecodeOptions -- *** And making any error fatal
fromTsv = DecodeOptions {decDelimiter = fromIntegral (fromEnum '\t')} -- | from stdin
instance FromRecord a => ReadTSV () IO (Vector a) where
readTSV _ = noHeader <$> ByteString.getContents >>= try
readNamedTsv :: FromNamedRecord a => FilePath -> IO (Either String (Vector a)) -- | from a file
readNamedTsv source = instance FromRecord a => ReadTSV FilePath IO (Vector a) where
(fmap snd . decodeByNameWith fromTsv) <$> ByteString.readFile source readTSV path = noHeader <$> ByteString.readFile path >>= try
readTsv :: FromRecord a => FilePath -> IO (Either String (Vector a)) -- ** Now the same instances for data which can be read from NamedRecord
readTsv source = decodeWith fromTsv NoHeader <$> ByteString.readFile source --
-- | Two functions to factor a snippet common to all instances
named :: FromNamedRecord a => ByteString -> Either String (Document a)
named = fmap (uncurry Document) . decodeByNameWith fromTSV
toTsv :: EncodeOptions parseNamedRecords :: FromNamedRecord a => IO ByteString -> Result (Document a)
toTsv = defaultEncodeOptions parseNamedRecords = ExceptT . fmap named
-- *** Reporting errors
-- | from stdin
instance FromNamedRecord a => ReadTSV () Result (Document a) where
readTSV _ = parseNamedRecords $ ByteString.getContents
-- | from a file
instance FromNamedRecord a => ReadTSV FilePath Result (Document a) where
readTSV = parseNamedRecords . ByteString.readFile
-- *** And making any error fatal
-- | from stdin
instance FromNamedRecord a => ReadTSV () IO (Document a) where
readTSV _ = named <$> ByteString.getContents >>= try
-- | from a file
instance FromNamedRecord a => ReadTSV FilePath IO (Document a) where
readTSV path = named <$> ByteString.readFile path >>= try
-- * Writing TSV data
--
-- | A class type to represent processes which output TSV
class WriteTSV d t a where
writeTSV :: d -> a -> t ()
-- | The EncodeOptions for the TSV format
toTSV :: EncodeOptions
toTSV = defaultEncodeOptions
{ encDelimiter = fromIntegral (fromEnum '\t') { encDelimiter = fromIntegral (fromEnum '\t')
, encUseCrLf = False } , encUseCrLf = False }
tsvFile :: forall a t. (Foldable t, HasDefaultHeader a, ToNamedRecord a) => -- ** We know how to handle data which can be written to a Record
FilePath -> t a -> IO () --
tsvFile target = ByteString.writeFile target . encode . toList -- | to stdout
where instance {-# OVERLAPPABLE #-} (Foldable t, ToRecord a) => WriteTSV () IO (t a) where
encode = encodeByNameWith toTsv (getHeader (for :: a)) writeTSV _ = ByteString.putStr . encodeWith toTSV . toList
-- | to a file
instance {-# OVERLAPPABLE #-} (Foldable t, ToRecord a) => WriteTSV FilePath IO (t a) where
writeTSV path = ByteString.writeFile path . encodeWith toTSV . toList
-- ** And we can also handle data which can be written to a NamedRecord
--
-- | to stdout
instance ToNamedRecord a => WriteTSV () IO (Document a) where
writeTSV _ (Document {header, rows}) =
ByteString.putStr . encodeByNameWith toTSV header $ toList rows
tsvLines :: (Foldable t, ToRecord a) => t a -> IO () -- | to a file
tsvLines = ByteString.putStr . encodeWith toTsv . toList instance ToNamedRecord a => WriteTSV FilePath IO (Document a) where
writeTSV path (Document {header, rows}) =
ByteString.writeFile path . encodeByNameWith toTSV header $ toList rows
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment