Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • geode/ghc-geode
1 result
Show changes
Commits on Source (4)
......@@ -24,7 +24,6 @@ library
exposed-modules: GEODE.Metadata
, GEODE.Metadata.Entry
, GEODE.Metadata.SplitContext
, GEODE.Metadata.TSV.Header
, GEODE.Options
-- Modules included in this library but not exported.
......@@ -32,6 +31,7 @@ library
, GEODE.Metadata.ArticleRecord
, GEODE.Metadata.Record
, GEODE.Metadata.TSV
, GEODE.Metadata.TSV.Header
, GEODE.Metadata.Types
, GEODE.Metadata.Work
......
......@@ -24,7 +24,6 @@ import GEODE.Metadata.Record as Record
import GEODE.Metadata.SplitContext as SplitContext hiding (get, page, rank)
import GEODE.Metadata.TSV as TSV
import GEODE.Metadata.TSV.Header as TSV_Header
( DefaultFields(..), HasDefaultHeader(..), WithDefaultHeader(..), glue )
import GEODE.Metadata.Types as Types
import GEODE.Metadata.Work as Work
......
......@@ -7,10 +7,9 @@ module GEODE.Metadata.ArticleRecord
import Data.Aeson ((.=), FromJSON(..), ToJSON(..))
import Data.Aeson.KeyMap as KeyMap (fromList)
import Data.Csv (FromNamedRecord(..), ToNamedRecord(..))
import Data.Csv (DefaultOrdered(..), FromNamedRecord(..), ToNamedRecord(..))
import GEODE.Metadata.Record (Record(..))
import GEODE.Metadata.Types (ToJSONObject(..))
import GEODE.Metadata.TSV.Header (DefaultFields(..), HasDefaultHeader(..))
import GEODE.Metadata.Work (Work(..))
import GHC.Generics (Generic)
import System.FilePath ((</>), (<.>))
......@@ -23,6 +22,7 @@ data ArticleRecord = ArticleRecord
instance FromNamedRecord ArticleRecord
instance ToNamedRecord ArticleRecord
instance DefaultOrdered ArticleRecord
instance ToJSONObject ArticleRecord where
toJSONObject (ArticleRecord {work, volume, article}) = KeyMap.fromList
......@@ -42,6 +42,3 @@ instance Record ArticleRecord where
relativePath (ArticleRecord {work, volume, article}) extension =
(show work) </> ("T" <> show volume) </> (show article) <.> extension
instance HasDefaultHeader ArticleRecord where
defaultFields = DefaultFields [ "work", "volume", "article" ]
......@@ -5,14 +5,18 @@ module GEODE.Metadata.Contrastive
, formatList ) where
import Data.Csv
( FromField(..), FromNamedRecord(..), ToNamedRecord(..), ToField(..) )
( DefaultOrdered(..), FromField(..), FromNamedRecord(..), ToNamedRecord(..)
, ToField(..) )
import Data.Text (Text, intercalate, splitOn, uncons, unsnoc)
import GEODE.Metadata.TSV.Header (DefaultFields(..), HasDefaultHeader(..))
import GHC.Generics (Generic)
newtype MultiText = MultiText
{ getList :: [Text] } deriving (Show)
-- | We represent fields with multiple values by ':'-surrounded values
-- (including a leading and trailing ':') to be able to always write the same
-- regex to match for their value (/.*:Géographie:.*/, no matter if it occurs at
-- the begining, the end or in the middle of the field
formatList :: MultiText -> Text
formatList = colonFormat . getList
where
......@@ -39,8 +43,6 @@ data Contrastive = Contrastive
, domains :: MultiText
, subCorpus :: MultiText } deriving (Generic, Show)
instance DefaultOrdered Contrastive
instance FromNamedRecord Contrastive
instance ToNamedRecord Contrastive
instance HasDefaultHeader Contrastive where
defaultFields = DefaultFields [ "authors", "domains", "subCorpus" ]
......@@ -5,11 +5,9 @@ module GEODE.Metadata.Entry
, normalize ) where
import Data.Char (isAlphaNum, isSpace, isUpper, toLower)
import Data.Csv (FromNamedRecord(..), ToNamedRecord(..))
import Data.Csv (DefaultOrdered(..), FromNamedRecord(..), ToNamedRecord(..))
import Data.Text as Text (Text, concat, foldl', pack, snoc)
import GEODE.Metadata.SplitContext (Field(..), SplitContext(..), next)
import GEODE.Metadata.TSV.Header
(DefaultFields(..), HasDefaultHeader(..), HasDefaultHeader(..))
import GHC.Generics (Generic)
data Entry = Entry
......@@ -17,12 +15,10 @@ data Entry = Entry
, name :: Text
, page :: Int } deriving (Generic, Show)
instance DefaultOrdered Entry
instance FromNamedRecord Entry
instance ToNamedRecord Entry
instance HasDefaultHeader Entry where
defaultFields = DefaultFields [ "headword", "name", "page" ]
normalize :: Text -> Text
normalize = Text.foldl' appendIf mempty
where
......
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE FlexibleInstances, MultiParamTypeClasses, NamedFieldPuns, TypeSynonymInstances #-}
module GEODE.Metadata.TSV
( fromTsv
, readNamedTsv
, readTsv
, toTsv
, tsvFile
, tsvLines ) where
import Data.ByteString.Lazy as ByteString (putStr, readFile, writeFile)
( Document(..)
, ReadTSV(..)
, Result
, WriteTSV(..)
, fromTSV
, toTSV ) where
import Control.Monad.Except (ExceptT(..))
import Data.ByteString.Lazy as ByteString
( ByteString, getContents, putStr, readFile, writeFile )
import Data.Csv
( DecodeOptions(..), EncodeOptions(..), FromNamedRecord(..), FromRecord
, HasHeader(..), ToNamedRecord(..), ToRecord(..), decodeByNameWith
, HasHeader(..), Header, ToNamedRecord(..), ToRecord(..), decodeByNameWith
, decodeWith, defaultEncodeOptions, encodeByNameWith, encodeWith )
import Data.Foldable (toList)
import Data.Vector (Vector)
import GEODE.Metadata.TSV.Header (HasDefaultHeader, getHeader, for)
import System.Exit (die)
-- * General definitions
--
-- | A data type to represent documents, with their Header and rows represented
-- contained in a Vector
data Document a = Document
{ header :: Header
, rows :: Vector a }
deriving Show
type Result = ExceptT String IO
-- * Reading TSV data
--
-- | Typeclass of contexts from which a TSV file can be read
class ReadTSV s t a where
readTSV :: s -> t a
-- | The DecodeOptions for the TSV format
fromTSV :: DecodeOptions
fromTSV = DecodeOptions {decDelimiter = fromIntegral (fromEnum '\t')}
-- ** Reading Vectors of data which can be read from Record
--
-- | Three functions to factor a snippet common to all instances
noHeader :: FromRecord a => ByteString -> Either String (Vector a)
noHeader = decodeWith fromTSV NoHeader
parseRecords :: FromRecord a => IO ByteString -> Result (Vector a)
parseRecords = ExceptT . fmap noHeader
try :: Either String a -> IO a
try = either die pure
-- *** Reporting errors
-- | from stdin
instance FromRecord a => ReadTSV () Result (Vector a) where
readTSV _ = parseRecords $ ByteString.getContents
-- | from a file
instance FromRecord a => ReadTSV FilePath Result (Vector a) where
readTSV = parseRecords . ByteString.readFile
fromTsv :: DecodeOptions
fromTsv = DecodeOptions {decDelimiter = fromIntegral (fromEnum '\t')}
-- *** And making any error fatal
-- | from stdin
instance FromRecord a => ReadTSV () IO (Vector a) where
readTSV _ = noHeader <$> ByteString.getContents >>= try
readNamedTsv :: FromNamedRecord a => FilePath -> IO (Either String (Vector a))
readNamedTsv source =
(fmap snd . decodeByNameWith fromTsv) <$> ByteString.readFile source
-- | from a file
instance FromRecord a => ReadTSV FilePath IO (Vector a) where
readTSV path = noHeader <$> ByteString.readFile path >>= try
readTsv :: FromRecord a => FilePath -> IO (Either String (Vector a))
readTsv source = decodeWith fromTsv NoHeader <$> ByteString.readFile source
-- ** Now the same instances for data which can be read from NamedRecord
--
-- | Two functions to factor a snippet common to all instances
named :: FromNamedRecord a => ByteString -> Either String (Document a)
named = fmap (uncurry Document) . decodeByNameWith fromTSV
toTsv :: EncodeOptions
toTsv = defaultEncodeOptions
parseNamedRecords :: FromNamedRecord a => IO ByteString -> Result (Document a)
parseNamedRecords = ExceptT . fmap named
-- *** Reporting errors
-- | from stdin
instance FromNamedRecord a => ReadTSV () Result (Document a) where
readTSV _ = parseNamedRecords $ ByteString.getContents
-- | from a file
instance FromNamedRecord a => ReadTSV FilePath Result (Document a) where
readTSV = parseNamedRecords . ByteString.readFile
-- *** And making any error fatal
-- | from stdin
instance FromNamedRecord a => ReadTSV () IO (Document a) where
readTSV _ = named <$> ByteString.getContents >>= try
-- | from a file
instance FromNamedRecord a => ReadTSV FilePath IO (Document a) where
readTSV path = named <$> ByteString.readFile path >>= try
-- * Writing TSV data
--
-- | A class type to represent processes which output TSV
class WriteTSV d t a where
writeTSV :: d -> a -> t ()
-- | The EncodeOptions for the TSV format
toTSV :: EncodeOptions
toTSV = defaultEncodeOptions
{ encDelimiter = fromIntegral (fromEnum '\t')
, encUseCrLf = False }
tsvFile :: forall a t. (Foldable t, HasDefaultHeader a, ToNamedRecord a) =>
FilePath -> t a -> IO ()
tsvFile target = ByteString.writeFile target . encode . toList
where
encode = encodeByNameWith toTsv (getHeader (for :: a))
-- ** We know how to handle data which can be written to a Record
--
-- | to stdout
instance {-# OVERLAPPABLE #-} (Foldable t, ToRecord a) => WriteTSV () IO (t a) where
writeTSV _ = ByteString.putStr . encodeWith toTSV . toList
-- | to a file
instance {-# OVERLAPPABLE #-} (Foldable t, ToRecord a) => WriteTSV FilePath IO (t a) where
writeTSV path = ByteString.writeFile path . encodeWith toTSV . toList
-- ** And we can also handle data which can be written to a NamedRecord
--
-- | to stdout
instance ToNamedRecord a => WriteTSV () IO (Document a) where
writeTSV _ (Document {header, rows}) =
ByteString.putStr . encodeByNameWith toTSV header $ toList rows
tsvLines :: (Foldable t, ToRecord a) => t a -> IO ()
tsvLines = ByteString.putStr . encodeWith toTsv . toList
-- | to a file
instance ToNamedRecord a => WriteTSV FilePath IO (Document a) where
writeTSV path (Document {header, rows}) =
ByteString.writeFile path . encodeByNameWith toTSV header $ toList rows
{-# LANGUAGE ExplicitNamespaces, ScopedTypeVariables, TypeOperators #-}
module GEODE.Metadata.TSV.Header
( DefaultFields(..)
, HasDefaultHeader(..)
, WithDefaultHeader(..)
( WithDefaultHeader(..)
, for
, getHeader
, glue ) where
import Data.ByteString.Char8 as StrictByteString (pack)
import Data.Csv (Header, ToNamedRecord(..), ToRecord(..))
import Data.Csv (DefaultOrdered(..), Header, ToNamedRecord(..), ToRecord(..))
import Data.HashMap.Strict ((!))
import Data.Vector (fromList)
import GEODE.Metadata.Types (type (@)(..))
newtype WithDefaultHeader a = WithDefaultHeader a
newtype DefaultFields a = DefaultFields [String]
class HasDefaultHeader a where
defaultFields :: DefaultFields a
instance (HasDefaultHeader a, HasDefaultHeader b) => HasDefaultHeader (a @ b) where
defaultFields = DefaultFields (a ++ b)
where
DefaultFields a = (defaultFields :: DefaultFields a)
DefaultFields b = (defaultFields :: DefaultFields b)
-- | An alias to shorten and make more intuitive the magic of
-- the DefaultOrdered class type
getHeader :: DefaultOrdered a => a -> Header
getHeader = headerOrder
getHeader :: forall a. HasDefaultHeader a => a -> Header
getHeader _ = StrictByteString.pack <$> fromList fields
where
DefaultFields fields = (defaultFields :: DefaultFields a)
for :: HasDefaultHeader a => a
-- | This is just `undefined`, but nicer to read and making more sense with the
-- type notation `(for :: SomeTypeOfYours)`
for :: DefaultOrdered a => a
for = undefined
instance (HasDefaultHeader a, ToNamedRecord a) => ToRecord (WithDefaultHeader a) where
instance (DefaultOrdered a, ToNamedRecord a) => ToRecord (WithDefaultHeader a) where
toRecord (WithDefaultHeader a) = (toNamedRecord a !) <$> getHeader a
glue :: a -> b -> WithDefaultHeader (a @ b)
......
{-# LANGUAGE ExplicitNamespaces, FlexibleInstances, MultiParamTypeClasses, TypeOperators, UndecidableInstances #-}
{-# LANGUAGE ExplicitNamespaces, FlexibleInstances, MultiParamTypeClasses, ScopedTypeVariables, TypeOperators, UndecidableInstances #-}
module GEODE.Metadata.Types
( Has(..)
, ToJSONObject(..)
......@@ -6,7 +6,7 @@ module GEODE.Metadata.Types
import Data.Aeson (FromJSON(..), Object, Series, ToJSON(..), Value(..), pairs)
import Data.Aeson.KeyMap as Object (union)
import Data.Csv (FromNamedRecord(..), ToNamedRecord(..))
import Data.Csv (DefaultOrdered(..), FromNamedRecord(..), ToNamedRecord(..))
import Data.HashMap.Strict as Hash (union)
infixl 9 @
......@@ -25,6 +25,15 @@ instance Has a b => Has a (b @ c) where
instance {-# OVERLAPS #-} Has b (a @ b) where
get (_ :@: b) = b
instance (Eq a, Eq b) => Eq (a @ b) where
(a1 :@: b1) == (a2 :@: b2) = a1 == a2 && b1 == b2
instance (Ord a, Ord b) => Ord (a @ b) where
(a1 :@: b1) <= (a2 :@: b2) = a1 < a2 || (a1 == a2 && b1 <= b2)
instance (DefaultOrdered a, DefaultOrdered b) => DefaultOrdered (a @ b) where
headerOrder _ = headerOrder (undefined :: a) <> headerOrder (undefined :: b)
instance (ToNamedRecord a, ToNamedRecord b) => ToNamedRecord (a @ b) where
toNamedRecord (a :@: b) = Hash.union (toNamedRecord a) (toNamedRecord b)
......
......@@ -5,7 +5,7 @@ import Data.Csv (ToNamedRecord(..))
import Data.Foldable (toList)
import Data.HashMap.Strict ((!?))
import GEODE.Metadata (Work(..), ArticleRecord(..))
import GEODE.Metadata.TSV.Header (getHeader, for)
import GEODE.Metadata (getHeader, for)
import Test.HUnit (Test(..), (~?=))
import Test.HUnit.Extra (isJust)
......
......@@ -5,9 +5,8 @@ import Data.Csv (ToNamedRecord(..))
import Data.Foldable (toList)
import Data.HashMap.Strict ((!?))
import Data.Text (dropEnd)
import GEODE.Metadata (evalSplit, newEntry, normalize)
import GEODE.Metadata (evalSplit, for, getHeader, newEntry, normalize)
import GEODE.Metadata.Entry (Entry(..))
import GEODE.Metadata.TSV.Header (getHeader, for)
import Test.HUnit (Test(..), (~?=))
import Test.HUnit.Extra (isJust)
......