Skip to content
Snippets Groups Projects
Commit 7fca2bd5 authored by Alice Brenon's avatar Alice Brenon
Browse files

Trim Metadata a bit to make it only an inclusion shortcut, put TSV-specific...

Trim Metadata a bit to make it only an inclusion shortcut, put TSV-specific stuff out in separate module and cut Article in two
parent 80fd8c29
Branches main
Tags 0.0.1
No related merge requests found
{-# LANGUAGE NamedFieldPuns, OverloadedStrings #-} {-# LANGUAGE NamedFieldPuns, OverloadedStrings #-}
module GEODE.Metadata ( module GEODE.Metadata
Article(..) ( module ID
, Authors(..) , module Entry
, Book(..) , module TSV
, Domains(..) , module Projector
, FromBook(..) , module Types
, HasAuthors(..) , groupBy
, HasDomains(..) , indexBy
, InFile(..) , list
, TXMText , sortBy ) where
, Unique(..)
, groupBy
, list
, readTsv
, sortBy
, tsvFile
, tsvLines
) where
import Data.ByteString.Lazy as ByteString (putStr, readFile, writeFile)
import Data.ByteString.Char8 as StrictByteString (pack)
import Data.Csv
( DecodeOptions(..), EncodeOptions(..), FromRecord, HasHeader(..)
, ToNamedRecord, ToRecord, decodeWith, defaultEncodeOptions, encodeByNameWith
, encodeWith, header )
import Data.Foldable as Foldable (toList) import Data.Foldable as Foldable (toList)
import Data.List (sortOn) import Data.List (sortOn)
import Data.Map.Strict as Map (alter, empty, toList) import Data.Map.Strict as Map (Map, alter, empty, insert, toList)
import Data.Text as Text (Text, intercalate, unpack) import Data.Text as Text (Text, intercalate, unpack)
import Data.Vector as Vector (Vector) import GEODE.Metadata.ID as ID
import GEODE.Metadata.Article import GEODE.Metadata.Entry as Entry
import GEODE.Metadata.Projector import GEODE.Metadata.Projector as Projector
(FromBook(..), HasAuthors(..), HasDomains(..), InFile(..), TXMText, Unique(..)) import GEODE.Metadata.TSV as TSV
import GEODE.Metadata.Types (Authors(..), Book(..), Domains(..)) import GEODE.Metadata.Types as Types
list :: [Text] -> String list :: [Text] -> String
list ts = Text.unpack $ ":" <> intercalate ":" ts <> ":" list ts = Text.unpack $ ":" <> intercalate ":" ts <> ":"
readTsv :: FromRecord a => FilePath -> IO (Either String (Vector a))
readTsv source = decodeWith fromTsv HasHeader <$> ByteString.readFile source
where
fromTsv = DecodeOptions {decDelimiter = fromIntegral (fromEnum '\t')}
toTsv :: EncodeOptions
toTsv = defaultEncodeOptions
{ encDelimiter = fromIntegral (fromEnum '\t')
, encUseCrLf = False }
tsvFile :: ToNamedRecord a => FilePath -> [String] -> [a] -> IO ()
tsvFile target fields =
ByteString.writeFile target
. encodeByNameWith toTsv (header $ StrictByteString.pack <$> fields)
tsvLines :: ToRecord a => [a] -> IO ()
tsvLines = ByteString.putStr . encodeWith toTsv
sortBy :: (Foldable t, Ord k) => (a -> k) -> t a -> [a] sortBy :: (Foldable t, Ord k) => (a -> k) -> t a -> [a]
sortBy field = sortOn field . Foldable.toList sortBy field = sortOn field . Foldable.toList
...@@ -62,3 +30,6 @@ groupBy :: (Foldable t, Ord k) => (v -> k) -> t v -> [(k, [v])] ...@@ -62,3 +30,6 @@ groupBy :: (Foldable t, Ord k) => (v -> k) -> t v -> [(k, [v])]
groupBy field = Map.toList . foldr group Map.empty groupBy field = Map.toList . foldr group Map.empty
where where
group article = Map.alter (Just . maybe [article] (article:)) (field article) group article = Map.alter (Just . maybe [article] (article:)) (field article)
indexBy :: (Foldable t, Ord k) => (a -> k) -> t a -> Map k a
indexBy f = foldr (\a -> Map.insert (f a) a) Map.empty
module GEODE.Metadata.Entry
( Entry(..)
, headerSection ) where
import Data.Csv (FromNamedRecord(..), ToNamedRecord(..))
import Data.Text (Text)
import GEODE.Metadata.TSV (Default(..), DefaultHeader(..))
import GHC.Generics (Generic)
data Entry = Entry
{ headWord :: Text
, rank :: Int
, page :: Int } deriving (Generic, Show)
instance FromNamedRecord Entry
instance ToNamedRecord Entry
instance DefaultHeader Entry where
headerSection = Default [ "headWord", "rank", "page" ]
module GEODE.Metadata.ID
( HasID(..)
, ID
, book
, tome
, name
, headerSection
, relativePath
, uid ) where
import Data.Csv (FromNamedRecord(..), ToNamedRecord(..))
import Data.Text (Text, unpack)
import GEODE.Metadata.Types (Book)
--import GEODE.Metadata.Projector (InFile(..), Unique(..))
import GEODE.Metadata.TSV (Default(..), DefaultHeader(..))
import GHC.Generics (Generic)
import System.FilePath ((</>), (<.>))
import Text.Printf (printf)
data ID = ID
{ book_ :: Book
, tome_ :: Int
, name_ :: Text } deriving (Eq, Ord, Generic, Show)
class HasID t where
iD :: t -> ID
instance HasID ID where
iD = id
instance HasID (ID, a) where
iD = fst
book :: HasID a => a -> Book
book = book_ . iD
tome :: HasID a => a -> Int
tome = tome_ . iD
name :: HasID a => a -> Text
name = name_ . iD
instance FromNamedRecord ID
instance ToNamedRecord ID
uid :: HasID a => a -> String
uid a = printf "%s_%d_%s" (show $ book a) (tome a) (unpack $ name a)
relativePath :: HasID a => a -> String -> FilePath
relativePath a extension =
(show $ book a) </> (show $ tome a) </> (unpack $ name a) <.> extension
{-
instance Unique ID where
uid (ID {book, tome, name}) =
printf "%s_%d_%s" (show book) tome (unpack name)
instance InFile ID where
relativePath (ID {book, tome, name}) extension =
show book </> show tome </> unpack name <.> extension
-}
instance DefaultHeader ID where
headerSection = Default [ "book", "tome", "name" ]
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds #-}
module GEODE.Metadata.Projector module GEODE.Metadata.Projector
( FromBook(..) --( FromBook(..)
, HasAuthors(..) --, FromTome(..)
, HasDomains(..) ( HasAuthors(..)
, InFile(..) , HasDomains(..) ) where
, TXMText --, InFile(..)
, Unique(..) ) where --, Named(..)
--, TXMText
import GEODE.Metadata.Types (Authors(..), Book, Domains(..)) --, Unique(..) ) where
import GEODE.Metadata.Types (Authors(..), Domains(..))
--import GEODE.Metadata.Types (Authors(..), Book, Domains(..))
import Data.Text (Text) import Data.Text (Text)
{-
class Unique a where class Unique a where
uid :: a -> String uid :: a -> String
class FromBook a where class FromBook a where
book :: a -> Book book :: a -> Book
class FromTome a where
tome :: a -> Int
class Named a where
name :: a -> Text
-}
class HasAuthors a where class HasAuthors a where
authors_ :: a -> Authors authors_ :: a -> Authors
...@@ -28,7 +39,9 @@ class HasDomains a where ...@@ -28,7 +39,9 @@ class HasDomains a where
domains :: a -> [Text] domains :: a -> [Text]
domains = getDomains . domains_ domains = getDomains . domains_
{-
class InFile a where class InFile a where
relativePath :: a -> FilePath relativePath :: a -> String -> FilePath
-}
type TXMText a = (Unique a, FromBook a, HasAuthors a, HasDomains a) --type TXMText a = (Unique a, FromBook a, HasAuthors a, HasDomains a)
{-# LANGUAGE ScopedTypeVariables, UndecidableInstances #-}
module GEODE.Metadata.TSV
( Default(..)
, DefaultHeader(..)
, readNamedTsv
, readTsv
, toTsv
, tsvFile
, tsvLines ) where
import Data.ByteString.Char8 as StrictByteString (pack)
import Data.ByteString.Lazy as ByteString (putStr, readFile, writeFile)
import Data.Csv
( DecodeOptions(..), EncodeOptions(..), FromNamedRecord, FromRecord
, HasHeader(..), ToNamedRecord(..), ToRecord(..), decodeByNameWith, decodeWith
, defaultEncodeOptions, encodeByNameWith, encodeWith, header )
import Data.Foldable (toList)
import Data.HashMap.Strict ((!))
import Data.Vector (Vector, fromList)
readNamedTsv :: FromNamedRecord a => FilePath -> IO (Either String (Vector a))
readNamedTsv source =
(fmap snd . decodeByNameWith fromTsv) <$> ByteString.readFile source
where
fromTsv = DecodeOptions {decDelimiter = fromIntegral (fromEnum '\t')}
readTsv :: FromRecord a => FilePath -> IO (Either String (Vector a))
readTsv source = decodeWith fromTsv NoHeader <$> ByteString.readFile source
where
fromTsv = DecodeOptions {decDelimiter = fromIntegral (fromEnum '\t')}
newtype Default a = Default { defaultHeader :: [String] }
class DefaultHeader a where
headerSection :: Default a
instance (DefaultHeader a, DefaultHeader b) => DefaultHeader (a, b) where
headerSection = Default (a ++ b)
where
Default a = (headerSection :: Default a)
Default b = (headerSection :: Default b)
instance (DefaultHeader a, ToNamedRecord a) => ToRecord a where
toRecord = fromList . prepare . toNamedRecord
where
Default fields = (headerSection :: Default a)
prepare namedRecord = (namedRecord !) . StrictByteString.pack <$> fields
toTsv :: EncodeOptions
toTsv = defaultEncodeOptions
{ encDelimiter = fromIntegral (fromEnum '\t')
, encUseCrLf = False }
tsvFile :: forall a. (DefaultHeader a, ToNamedRecord a) => FilePath -> [a] -> IO ()
tsvFile target =
ByteString.writeFile target
. encodeByNameWith toTsv (header $ StrictByteString.pack <$> fields)
where
Default fields = (headerSection :: Default a)
tsvLines :: (Foldable t, ToRecord a) => t a -> IO ()
tsvLines = ByteString.putStr . encodeWith toTsv . toList
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment