Skip to content
Snippets Groups Projects
TSV.hs 4.30 KiB
{-# LANGUAGE FlexibleInstances, MultiParamTypeClasses, NamedFieldPuns, TypeSynonymInstances #-}
module GEODE.Metadata.TSV
  ( Document(..)
  , ReadTSV(..)
  , Result
  , WriteTSV(..)
  , fromTSV
  , toTSV ) where

import Control.Monad.Except (ExceptT(..))
import Data.ByteString.Lazy as ByteString
  ( ByteString, getContents, putStr, readFile, writeFile )
import Data.Csv
  ( DecodeOptions(..), EncodeOptions(..), FromNamedRecord(..), FromRecord
  , HasHeader(..), Header, ToNamedRecord(..), ToRecord(..), decodeByNameWith
  , decodeWith, defaultEncodeOptions, encodeByNameWith, encodeWith )
import Data.Foldable (toList)
import Data.Vector (Vector)
import System.Exit (die)

-- * General definitions
--
-- | A data type to represent documents, with their Header and rows represented
-- contained in a Vector
data Document a = Document
  { header :: Header
  , rows :: Vector a }
  deriving Show
type Result = ExceptT String IO

-- * Reading TSV data
--
-- | Typeclass of contexts from which a TSV file can be read
class ReadTSV s t a where
  readTSV :: s -> t a

-- | The DecodeOptions for the TSV format
fromTSV :: DecodeOptions
fromTSV = DecodeOptions {decDelimiter = fromIntegral (fromEnum '\t')}

-- ** Reading Vectors of data which can be read from Record
--
-- | Three functions to factor a snippet common to all instances
noHeader :: FromRecord a => ByteString -> Either String (Vector a)
noHeader = decodeWith fromTSV NoHeader

parseRecords :: FromRecord a => IO ByteString -> Result (Vector a)
parseRecords = ExceptT . fmap noHeader

try :: Either String a -> IO a
try = either die pure

-- *** Reporting errors
-- | from stdin
instance FromRecord a => ReadTSV () Result (Vector a) where
  readTSV _ = parseRecords $ ByteString.getContents

-- | from a file
instance FromRecord a => ReadTSV FilePath Result (Vector a) where
  readTSV = parseRecords . ByteString.readFile

-- *** And making any error fatal
-- | from stdin
instance FromRecord a => ReadTSV () IO (Vector a) where
  readTSV _ = noHeader <$> ByteString.getContents >>= try

-- | from a file
instance FromRecord a => ReadTSV FilePath IO (Vector a) where
  readTSV path = noHeader <$> ByteString.readFile path >>= try
-- ** Now the same instances for data which can be read from NamedRecord
--
-- | Two functions to factor a snippet common to all instances
named :: FromNamedRecord a => ByteString -> Either String (Document a)
named = fmap (uncurry Document) . decodeByNameWith fromTSV

parseNamedRecords :: FromNamedRecord a => IO ByteString -> Result (Document a)
parseNamedRecords = ExceptT . fmap named

-- *** Reporting errors
-- | from stdin
instance FromNamedRecord a => ReadTSV () Result (Document a) where
  readTSV _ = parseNamedRecords $ ByteString.getContents

-- | from a file
instance FromNamedRecord a => ReadTSV FilePath Result (Document a) where
  readTSV = parseNamedRecords . ByteString.readFile

-- *** And making any error fatal
-- | from stdin
instance FromNamedRecord a => ReadTSV () IO (Document a) where
  readTSV _ = named <$> ByteString.getContents >>= try

-- | from a file
instance FromNamedRecord a => ReadTSV FilePath IO (Document a) where
  readTSV path = named <$> ByteString.readFile path >>= try

-- * Writing TSV data
--
-- | A class type to represent processes which output TSV
class WriteTSV d t a where
  writeTSV :: d -> a -> t ()

-- | The EncodeOptions for the TSV format
toTSV :: EncodeOptions
toTSV = defaultEncodeOptions
        { encDelimiter = fromIntegral (fromEnum '\t')
        , encUseCrLf = False }

-- ** We know how to handle data which can be written to a Record
--
-- | to stdout
instance {-# OVERLAPPABLE #-} (Foldable t, ToRecord a) => WriteTSV () IO (t a) where
  writeTSV _ = ByteString.putStr . encodeWith toTSV . toList

-- | to a file
instance {-# OVERLAPPABLE #-} (Foldable t, ToRecord a) => WriteTSV FilePath IO (t a) where
  writeTSV path = ByteString.writeFile path . encodeWith toTSV . toList

-- ** And we can also handle data which can be written to a NamedRecord
--
-- | to stdout
instance ToNamedRecord a => WriteTSV () IO (Document a) where
  writeTSV _ (Document {header, rows}) =
    ByteString.putStr . encodeByNameWith toTSV header $ toList rows

-- | to a file
instance ToNamedRecord a => WriteTSV FilePath IO (Document a) where
  writeTSV path (Document {header, rows}) =
    ByteString.writeFile path . encodeByNameWith toTSV header $ toList rows