Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • geode/ghc-geode
1 result
Show changes
Commits on Source (4)
...@@ -24,7 +24,6 @@ library ...@@ -24,7 +24,6 @@ library
exposed-modules: GEODE.Metadata exposed-modules: GEODE.Metadata
, GEODE.Metadata.Entry , GEODE.Metadata.Entry
, GEODE.Metadata.SplitContext , GEODE.Metadata.SplitContext
, GEODE.Metadata.TSV.Header
, GEODE.Options , GEODE.Options
-- Modules included in this library but not exported. -- Modules included in this library but not exported.
...@@ -32,6 +31,7 @@ library ...@@ -32,6 +31,7 @@ library
, GEODE.Metadata.ArticleRecord , GEODE.Metadata.ArticleRecord
, GEODE.Metadata.Record , GEODE.Metadata.Record
, GEODE.Metadata.TSV , GEODE.Metadata.TSV
, GEODE.Metadata.TSV.Header
, GEODE.Metadata.Types , GEODE.Metadata.Types
, GEODE.Metadata.Work , GEODE.Metadata.Work
......
...@@ -24,7 +24,6 @@ import GEODE.Metadata.Record as Record ...@@ -24,7 +24,6 @@ import GEODE.Metadata.Record as Record
import GEODE.Metadata.SplitContext as SplitContext hiding (get, page, rank) import GEODE.Metadata.SplitContext as SplitContext hiding (get, page, rank)
import GEODE.Metadata.TSV as TSV import GEODE.Metadata.TSV as TSV
import GEODE.Metadata.TSV.Header as TSV_Header import GEODE.Metadata.TSV.Header as TSV_Header
( DefaultFields(..), HasDefaultHeader(..), WithDefaultHeader(..), glue )
import GEODE.Metadata.Types as Types import GEODE.Metadata.Types as Types
import GEODE.Metadata.Work as Work import GEODE.Metadata.Work as Work
......
...@@ -7,10 +7,9 @@ module GEODE.Metadata.ArticleRecord ...@@ -7,10 +7,9 @@ module GEODE.Metadata.ArticleRecord
import Data.Aeson ((.=), FromJSON(..), ToJSON(..)) import Data.Aeson ((.=), FromJSON(..), ToJSON(..))
import Data.Aeson.KeyMap as KeyMap (fromList) import Data.Aeson.KeyMap as KeyMap (fromList)
import Data.Csv (FromNamedRecord(..), ToNamedRecord(..)) import Data.Csv (DefaultOrdered(..), FromNamedRecord(..), ToNamedRecord(..))
import GEODE.Metadata.Record (Record(..)) import GEODE.Metadata.Record (Record(..))
import GEODE.Metadata.Types (ToJSONObject(..)) import GEODE.Metadata.Types (ToJSONObject(..))
import GEODE.Metadata.TSV.Header (DefaultFields(..), HasDefaultHeader(..))
import GEODE.Metadata.Work (Work(..)) import GEODE.Metadata.Work (Work(..))
import GHC.Generics (Generic) import GHC.Generics (Generic)
import System.FilePath ((</>), (<.>)) import System.FilePath ((</>), (<.>))
...@@ -23,6 +22,7 @@ data ArticleRecord = ArticleRecord ...@@ -23,6 +22,7 @@ data ArticleRecord = ArticleRecord
instance FromNamedRecord ArticleRecord instance FromNamedRecord ArticleRecord
instance ToNamedRecord ArticleRecord instance ToNamedRecord ArticleRecord
instance DefaultOrdered ArticleRecord
instance ToJSONObject ArticleRecord where instance ToJSONObject ArticleRecord where
toJSONObject (ArticleRecord {work, volume, article}) = KeyMap.fromList toJSONObject (ArticleRecord {work, volume, article}) = KeyMap.fromList
...@@ -42,6 +42,3 @@ instance Record ArticleRecord where ...@@ -42,6 +42,3 @@ instance Record ArticleRecord where
relativePath (ArticleRecord {work, volume, article}) extension = relativePath (ArticleRecord {work, volume, article}) extension =
(show work) </> ("T" <> show volume) </> (show article) <.> extension (show work) </> ("T" <> show volume) </> (show article) <.> extension
instance HasDefaultHeader ArticleRecord where
defaultFields = DefaultFields [ "work", "volume", "article" ]
...@@ -5,14 +5,18 @@ module GEODE.Metadata.Contrastive ...@@ -5,14 +5,18 @@ module GEODE.Metadata.Contrastive
, formatList ) where , formatList ) where
import Data.Csv import Data.Csv
( FromField(..), FromNamedRecord(..), ToNamedRecord(..), ToField(..) ) ( DefaultOrdered(..), FromField(..), FromNamedRecord(..), ToNamedRecord(..)
, ToField(..) )
import Data.Text (Text, intercalate, splitOn, uncons, unsnoc) import Data.Text (Text, intercalate, splitOn, uncons, unsnoc)
import GEODE.Metadata.TSV.Header (DefaultFields(..), HasDefaultHeader(..))
import GHC.Generics (Generic) import GHC.Generics (Generic)
newtype MultiText = MultiText newtype MultiText = MultiText
{ getList :: [Text] } deriving (Show) { getList :: [Text] } deriving (Show)
-- | We represent fields with multiple values by ':'-surrounded values
-- (including a leading and trailing ':') to be able to always write the same
-- regex to match for their value (/.*:Géographie:.*/, no matter if it occurs at
-- the begining, the end or in the middle of the field
formatList :: MultiText -> Text formatList :: MultiText -> Text
formatList = colonFormat . getList formatList = colonFormat . getList
where where
...@@ -39,8 +43,6 @@ data Contrastive = Contrastive ...@@ -39,8 +43,6 @@ data Contrastive = Contrastive
, domains :: MultiText , domains :: MultiText
, subCorpus :: MultiText } deriving (Generic, Show) , subCorpus :: MultiText } deriving (Generic, Show)
instance DefaultOrdered Contrastive
instance FromNamedRecord Contrastive instance FromNamedRecord Contrastive
instance ToNamedRecord Contrastive instance ToNamedRecord Contrastive
instance HasDefaultHeader Contrastive where
defaultFields = DefaultFields [ "authors", "domains", "subCorpus" ]
...@@ -5,11 +5,9 @@ module GEODE.Metadata.Entry ...@@ -5,11 +5,9 @@ module GEODE.Metadata.Entry
, normalize ) where , normalize ) where
import Data.Char (isAlphaNum, isSpace, isUpper, toLower) import Data.Char (isAlphaNum, isSpace, isUpper, toLower)
import Data.Csv (FromNamedRecord(..), ToNamedRecord(..)) import Data.Csv (DefaultOrdered(..), FromNamedRecord(..), ToNamedRecord(..))
import Data.Text as Text (Text, concat, foldl', pack, snoc) import Data.Text as Text (Text, concat, foldl', pack, snoc)
import GEODE.Metadata.SplitContext (Field(..), SplitContext(..), next) import GEODE.Metadata.SplitContext (Field(..), SplitContext(..), next)
import GEODE.Metadata.TSV.Header
(DefaultFields(..), HasDefaultHeader(..), HasDefaultHeader(..))
import GHC.Generics (Generic) import GHC.Generics (Generic)
data Entry = Entry data Entry = Entry
...@@ -17,12 +15,10 @@ data Entry = Entry ...@@ -17,12 +15,10 @@ data Entry = Entry
, name :: Text , name :: Text
, page :: Int } deriving (Generic, Show) , page :: Int } deriving (Generic, Show)
instance DefaultOrdered Entry
instance FromNamedRecord Entry instance FromNamedRecord Entry
instance ToNamedRecord Entry instance ToNamedRecord Entry
instance HasDefaultHeader Entry where
defaultFields = DefaultFields [ "headword", "name", "page" ]
normalize :: Text -> Text normalize :: Text -> Text
normalize = Text.foldl' appendIf mempty normalize = Text.foldl' appendIf mempty
where where
......
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE FlexibleInstances, MultiParamTypeClasses, NamedFieldPuns, TypeSynonymInstances #-}
module GEODE.Metadata.TSV module GEODE.Metadata.TSV
( fromTsv ( Document(..)
, readNamedTsv , ReadTSV(..)
, readTsv , Result
, toTsv , WriteTSV(..)
, tsvFile , fromTSV
, tsvLines ) where , toTSV ) where
import Data.ByteString.Lazy as ByteString (putStr, readFile, writeFile) import Control.Monad.Except (ExceptT(..))
import Data.ByteString.Lazy as ByteString
( ByteString, getContents, putStr, readFile, writeFile )
import Data.Csv import Data.Csv
( DecodeOptions(..), EncodeOptions(..), FromNamedRecord(..), FromRecord ( DecodeOptions(..), EncodeOptions(..), FromNamedRecord(..), FromRecord
, HasHeader(..), ToNamedRecord(..), ToRecord(..), decodeByNameWith , HasHeader(..), Header, ToNamedRecord(..), ToRecord(..), decodeByNameWith
, decodeWith, defaultEncodeOptions, encodeByNameWith, encodeWith ) , decodeWith, defaultEncodeOptions, encodeByNameWith, encodeWith )
import Data.Foldable (toList) import Data.Foldable (toList)
import Data.Vector (Vector) import Data.Vector (Vector)
import GEODE.Metadata.TSV.Header (HasDefaultHeader, getHeader, for) import System.Exit (die)
-- * General definitions
--
-- | A data type to represent documents, with their Header and rows represented
-- contained in a Vector
data Document a = Document
{ header :: Header
, rows :: Vector a }
deriving Show
type Result = ExceptT String IO
-- * Reading TSV data
--
-- | Typeclass of contexts from which a TSV file can be read
class ReadTSV s t a where
readTSV :: s -> t a
-- | The DecodeOptions for the TSV format
fromTSV :: DecodeOptions
fromTSV = DecodeOptions {decDelimiter = fromIntegral (fromEnum '\t')}
-- ** Reading Vectors of data which can be read from Record
--
-- | Three functions to factor a snippet common to all instances
noHeader :: FromRecord a => ByteString -> Either String (Vector a)
noHeader = decodeWith fromTSV NoHeader
parseRecords :: FromRecord a => IO ByteString -> Result (Vector a)
parseRecords = ExceptT . fmap noHeader
try :: Either String a -> IO a
try = either die pure
-- *** Reporting errors
-- | from stdin
instance FromRecord a => ReadTSV () Result (Vector a) where
readTSV _ = parseRecords $ ByteString.getContents
-- | from a file
instance FromRecord a => ReadTSV FilePath Result (Vector a) where
readTSV = parseRecords . ByteString.readFile
fromTsv :: DecodeOptions -- *** And making any error fatal
fromTsv = DecodeOptions {decDelimiter = fromIntegral (fromEnum '\t')} -- | from stdin
instance FromRecord a => ReadTSV () IO (Vector a) where
readTSV _ = noHeader <$> ByteString.getContents >>= try
readNamedTsv :: FromNamedRecord a => FilePath -> IO (Either String (Vector a)) -- | from a file
readNamedTsv source = instance FromRecord a => ReadTSV FilePath IO (Vector a) where
(fmap snd . decodeByNameWith fromTsv) <$> ByteString.readFile source readTSV path = noHeader <$> ByteString.readFile path >>= try
readTsv :: FromRecord a => FilePath -> IO (Either String (Vector a)) -- ** Now the same instances for data which can be read from NamedRecord
readTsv source = decodeWith fromTsv NoHeader <$> ByteString.readFile source --
-- | Two functions to factor a snippet common to all instances
named :: FromNamedRecord a => ByteString -> Either String (Document a)
named = fmap (uncurry Document) . decodeByNameWith fromTSV
toTsv :: EncodeOptions parseNamedRecords :: FromNamedRecord a => IO ByteString -> Result (Document a)
toTsv = defaultEncodeOptions parseNamedRecords = ExceptT . fmap named
-- *** Reporting errors
-- | from stdin
instance FromNamedRecord a => ReadTSV () Result (Document a) where
readTSV _ = parseNamedRecords $ ByteString.getContents
-- | from a file
instance FromNamedRecord a => ReadTSV FilePath Result (Document a) where
readTSV = parseNamedRecords . ByteString.readFile
-- *** And making any error fatal
-- | from stdin
instance FromNamedRecord a => ReadTSV () IO (Document a) where
readTSV _ = named <$> ByteString.getContents >>= try
-- | from a file
instance FromNamedRecord a => ReadTSV FilePath IO (Document a) where
readTSV path = named <$> ByteString.readFile path >>= try
-- * Writing TSV data
--
-- | A class type to represent processes which output TSV
class WriteTSV d t a where
writeTSV :: d -> a -> t ()
-- | The EncodeOptions for the TSV format
toTSV :: EncodeOptions
toTSV = defaultEncodeOptions
{ encDelimiter = fromIntegral (fromEnum '\t') { encDelimiter = fromIntegral (fromEnum '\t')
, encUseCrLf = False } , encUseCrLf = False }
tsvFile :: forall a t. (Foldable t, HasDefaultHeader a, ToNamedRecord a) => -- ** We know how to handle data which can be written to a Record
FilePath -> t a -> IO () --
tsvFile target = ByteString.writeFile target . encode . toList -- | to stdout
where instance {-# OVERLAPPABLE #-} (Foldable t, ToRecord a) => WriteTSV () IO (t a) where
encode = encodeByNameWith toTsv (getHeader (for :: a)) writeTSV _ = ByteString.putStr . encodeWith toTSV . toList
-- | to a file
instance {-# OVERLAPPABLE #-} (Foldable t, ToRecord a) => WriteTSV FilePath IO (t a) where
writeTSV path = ByteString.writeFile path . encodeWith toTSV . toList
-- ** And we can also handle data which can be written to a NamedRecord
--
-- | to stdout
instance ToNamedRecord a => WriteTSV () IO (Document a) where
writeTSV _ (Document {header, rows}) =
ByteString.putStr . encodeByNameWith toTSV header $ toList rows
tsvLines :: (Foldable t, ToRecord a) => t a -> IO () -- | to a file
tsvLines = ByteString.putStr . encodeWith toTsv . toList instance ToNamedRecord a => WriteTSV FilePath IO (Document a) where
writeTSV path (Document {header, rows}) =
ByteString.writeFile path . encodeByNameWith toTSV header $ toList rows
{-# LANGUAGE ExplicitNamespaces, ScopedTypeVariables, TypeOperators #-} {-# LANGUAGE ExplicitNamespaces, ScopedTypeVariables, TypeOperators #-}
module GEODE.Metadata.TSV.Header module GEODE.Metadata.TSV.Header
( DefaultFields(..) ( WithDefaultHeader(..)
, HasDefaultHeader(..)
, WithDefaultHeader(..)
, for , for
, getHeader , getHeader
, glue ) where , glue ) where
import Data.ByteString.Char8 as StrictByteString (pack) import Data.Csv (DefaultOrdered(..), Header, ToNamedRecord(..), ToRecord(..))
import Data.Csv (Header, ToNamedRecord(..), ToRecord(..))
import Data.HashMap.Strict ((!)) import Data.HashMap.Strict ((!))
import Data.Vector (fromList)
import GEODE.Metadata.Types (type (@)(..)) import GEODE.Metadata.Types (type (@)(..))
newtype WithDefaultHeader a = WithDefaultHeader a newtype WithDefaultHeader a = WithDefaultHeader a
newtype DefaultFields a = DefaultFields [String]
class HasDefaultHeader a where
defaultFields :: DefaultFields a
instance (HasDefaultHeader a, HasDefaultHeader b) => HasDefaultHeader (a @ b) where -- | An alias to shorten and make more intuitive the magic of
defaultFields = DefaultFields (a ++ b) -- the DefaultOrdered class type
where getHeader :: DefaultOrdered a => a -> Header
DefaultFields a = (defaultFields :: DefaultFields a) getHeader = headerOrder
DefaultFields b = (defaultFields :: DefaultFields b)
getHeader :: forall a. HasDefaultHeader a => a -> Header -- | This is just `undefined`, but nicer to read and making more sense with the
getHeader _ = StrictByteString.pack <$> fromList fields -- type notation `(for :: SomeTypeOfYours)`
where for :: DefaultOrdered a => a
DefaultFields fields = (defaultFields :: DefaultFields a)
for :: HasDefaultHeader a => a
for = undefined for = undefined
instance (HasDefaultHeader a, ToNamedRecord a) => ToRecord (WithDefaultHeader a) where instance (DefaultOrdered a, ToNamedRecord a) => ToRecord (WithDefaultHeader a) where
toRecord (WithDefaultHeader a) = (toNamedRecord a !) <$> getHeader a toRecord (WithDefaultHeader a) = (toNamedRecord a !) <$> getHeader a
glue :: a -> b -> WithDefaultHeader (a @ b) glue :: a -> b -> WithDefaultHeader (a @ b)
......
{-# LANGUAGE ExplicitNamespaces, FlexibleInstances, MultiParamTypeClasses, TypeOperators, UndecidableInstances #-} {-# LANGUAGE ExplicitNamespaces, FlexibleInstances, MultiParamTypeClasses, ScopedTypeVariables, TypeOperators, UndecidableInstances #-}
module GEODE.Metadata.Types module GEODE.Metadata.Types
( Has(..) ( Has(..)
, ToJSONObject(..) , ToJSONObject(..)
...@@ -6,7 +6,7 @@ module GEODE.Metadata.Types ...@@ -6,7 +6,7 @@ module GEODE.Metadata.Types
import Data.Aeson (FromJSON(..), Object, Series, ToJSON(..), Value(..), pairs) import Data.Aeson (FromJSON(..), Object, Series, ToJSON(..), Value(..), pairs)
import Data.Aeson.KeyMap as Object (union) import Data.Aeson.KeyMap as Object (union)
import Data.Csv (FromNamedRecord(..), ToNamedRecord(..)) import Data.Csv (DefaultOrdered(..), FromNamedRecord(..), ToNamedRecord(..))
import Data.HashMap.Strict as Hash (union) import Data.HashMap.Strict as Hash (union)
infixl 9 @ infixl 9 @
...@@ -25,6 +25,15 @@ instance Has a b => Has a (b @ c) where ...@@ -25,6 +25,15 @@ instance Has a b => Has a (b @ c) where
instance {-# OVERLAPS #-} Has b (a @ b) where instance {-# OVERLAPS #-} Has b (a @ b) where
get (_ :@: b) = b get (_ :@: b) = b
instance (Eq a, Eq b) => Eq (a @ b) where
(a1 :@: b1) == (a2 :@: b2) = a1 == a2 && b1 == b2
instance (Ord a, Ord b) => Ord (a @ b) where
(a1 :@: b1) <= (a2 :@: b2) = a1 < a2 || (a1 == a2 && b1 <= b2)
instance (DefaultOrdered a, DefaultOrdered b) => DefaultOrdered (a @ b) where
headerOrder _ = headerOrder (undefined :: a) <> headerOrder (undefined :: b)
instance (ToNamedRecord a, ToNamedRecord b) => ToNamedRecord (a @ b) where instance (ToNamedRecord a, ToNamedRecord b) => ToNamedRecord (a @ b) where
toNamedRecord (a :@: b) = Hash.union (toNamedRecord a) (toNamedRecord b) toNamedRecord (a :@: b) = Hash.union (toNamedRecord a) (toNamedRecord b)
......
...@@ -5,7 +5,7 @@ import Data.Csv (ToNamedRecord(..)) ...@@ -5,7 +5,7 @@ import Data.Csv (ToNamedRecord(..))
import Data.Foldable (toList) import Data.Foldable (toList)
import Data.HashMap.Strict ((!?)) import Data.HashMap.Strict ((!?))
import GEODE.Metadata (Work(..), ArticleRecord(..)) import GEODE.Metadata (Work(..), ArticleRecord(..))
import GEODE.Metadata.TSV.Header (getHeader, for) import GEODE.Metadata (getHeader, for)
import Test.HUnit (Test(..), (~?=)) import Test.HUnit (Test(..), (~?=))
import Test.HUnit.Extra (isJust) import Test.HUnit.Extra (isJust)
......
...@@ -5,9 +5,8 @@ import Data.Csv (ToNamedRecord(..)) ...@@ -5,9 +5,8 @@ import Data.Csv (ToNamedRecord(..))
import Data.Foldable (toList) import Data.Foldable (toList)
import Data.HashMap.Strict ((!?)) import Data.HashMap.Strict ((!?))
import Data.Text (dropEnd) import Data.Text (dropEnd)
import GEODE.Metadata (evalSplit, newEntry, normalize) import GEODE.Metadata (evalSplit, for, getHeader, newEntry, normalize)
import GEODE.Metadata.Entry (Entry(..)) import GEODE.Metadata.Entry (Entry(..))
import GEODE.Metadata.TSV.Header (getHeader, for)
import Test.HUnit (Test(..), (~?=)) import Test.HUnit (Test(..), (~?=))
import Test.HUnit.Extra (isJust) import Test.HUnit.Extra (isJust)
......