-
Alice Brenon authored5b2ee7c7
TSV.hs 4.30 KiB
{-# LANGUAGE FlexibleInstances, MultiParamTypeClasses, NamedFieldPuns, TypeSynonymInstances #-}
module GEODE.Metadata.TSV
( Document(..)
, ReadTSV(..)
, Result
, WriteTSV(..)
, fromTSV
, toTSV ) where
import Control.Monad.Except (ExceptT(..))
import Data.ByteString.Lazy as ByteString
( ByteString, getContents, putStr, readFile, writeFile )
import Data.Csv
( DecodeOptions(..), EncodeOptions(..), FromNamedRecord(..), FromRecord
, HasHeader(..), Header, ToNamedRecord(..), ToRecord(..), decodeByNameWith
, decodeWith, defaultEncodeOptions, encodeByNameWith, encodeWith )
import Data.Foldable (toList)
import Data.Vector (Vector)
import System.Exit (die)
-- * General definitions
--
-- | A data type to represent documents, with their Header and rows represented
-- contained in a Vector
data Document a = Document
{ header :: Header
, rows :: Vector a }
deriving Show
type Result = ExceptT String IO
-- * Reading TSV data
--
-- | Typeclass of contexts from which a TSV file can be read
class ReadTSV s t a where
readTSV :: s -> t a
-- | The DecodeOptions for the TSV format
fromTSV :: DecodeOptions
fromTSV = DecodeOptions {decDelimiter = fromIntegral (fromEnum '\t')}
-- ** Reading Vectors of data which can be read from Record
--
-- | Three functions to factor a snippet common to all instances
noHeader :: FromRecord a => ByteString -> Either String (Vector a)
noHeader = decodeWith fromTSV NoHeader
parseRecords :: FromRecord a => IO ByteString -> Result (Vector a)
parseRecords = ExceptT . fmap noHeader
try :: Either String a -> IO a
try = either die pure
-- *** Reporting errors
-- | from stdin
instance FromRecord a => ReadTSV () Result (Vector a) where
readTSV _ = parseRecords $ ByteString.getContents
-- | from a file
instance FromRecord a => ReadTSV FilePath Result (Vector a) where
readTSV = parseRecords . ByteString.readFile
-- *** And making any error fatal
-- | from stdin
instance FromRecord a => ReadTSV () IO (Vector a) where
readTSV _ = noHeader <$> ByteString.getContents >>= try
-- | from a file
instance FromRecord a => ReadTSV FilePath IO (Vector a) where
readTSV path = noHeader <$> ByteString.readFile path >>= try
-- ** Now the same instances for data which can be read from NamedRecord
--
-- | Two functions to factor a snippet common to all instances
named :: FromNamedRecord a => ByteString -> Either String (Document a)
named = fmap (uncurry Document) . decodeByNameWith fromTSV
parseNamedRecords :: FromNamedRecord a => IO ByteString -> Result (Document a)
parseNamedRecords = ExceptT . fmap named
-- *** Reporting errors
-- | from stdin
instance FromNamedRecord a => ReadTSV () Result (Document a) where
readTSV _ = parseNamedRecords $ ByteString.getContents
-- | from a file
instance FromNamedRecord a => ReadTSV FilePath Result (Document a) where
readTSV = parseNamedRecords . ByteString.readFile
-- *** And making any error fatal
-- | from stdin
instance FromNamedRecord a => ReadTSV () IO (Document a) where
readTSV _ = named <$> ByteString.getContents >>= try
-- | from a file
instance FromNamedRecord a => ReadTSV FilePath IO (Document a) where
readTSV path = named <$> ByteString.readFile path >>= try
-- * Writing TSV data
--
-- | A class type to represent processes which output TSV
class WriteTSV d t a where
writeTSV :: d -> a -> t ()
-- | The EncodeOptions for the TSV format
toTSV :: EncodeOptions
toTSV = defaultEncodeOptions
{ encDelimiter = fromIntegral (fromEnum '\t')
, encUseCrLf = False }
-- ** We know how to handle data which can be written to a Record
--
-- | to stdout
instance {-# OVERLAPPABLE #-} (Foldable t, ToRecord a) => WriteTSV () IO (t a) where
writeTSV _ = ByteString.putStr . encodeWith toTSV . toList
-- | to a file
instance {-# OVERLAPPABLE #-} (Foldable t, ToRecord a) => WriteTSV FilePath IO (t a) where
writeTSV path = ByteString.writeFile path . encodeWith toTSV . toList
-- ** And we can also handle data which can be written to a NamedRecord
--
-- | to stdout
instance ToNamedRecord a => WriteTSV () IO (Document a) where
writeTSV _ (Document {header, rows}) =
ByteString.putStr . encodeByNameWith toTSV header $ toList rows
-- | to a file
instance ToNamedRecord a => WriteTSV FilePath IO (Document a) where
writeTSV path (Document {header, rows}) =
ByteString.writeFile path . encodeByNameWith toTSV header $ toList rows