diff --git a/geode.cabal b/geode.cabal index bb713ea6313d69858da112de7f1050f06f9f9e58..030f41e311130398eb2a706963a5e30bb94ebe4d 100644 --- a/geode.cabal +++ b/geode.cabal @@ -28,8 +28,8 @@ library , GEODE.Options -- Modules included in this library but not exported. - other-modules: GEODE.Metadata.PrimaryKey - , GEODE.Metadata.Projector + other-modules: GEODE.Metadata.Contrastive + , GEODE.Metadata.PrimaryKey , GEODE.Metadata.TSV , GEODE.Metadata.Types diff --git a/lib/GEODE/Metadata.hs b/lib/GEODE/Metadata.hs index 964e1aa1f9311c323e2a8b587747eda04ecd13ae..34515a138e651c2a3aed4096d1aaa193b96d71a9 100644 --- a/lib/GEODE/Metadata.hs +++ b/lib/GEODE/Metadata.hs @@ -1,8 +1,8 @@ -{-# LANGUAGE NamedFieldPuns, OverloadedStrings #-} +{-# LANGUAGE OverloadedStrings #-} module GEODE.Metadata - ( module PrimaryKey + ( module Contrastive , module Entry - , module Projector + , module PrimaryKey , module SplitContext , module TSV , module TSV_Header @@ -16,11 +16,11 @@ import Data.Foldable as Foldable (toList) import Data.List (sortOn) import Data.Map.Strict as Map (Map, alter, empty, insert, toList) import Data.Text as Text (Text, intercalate, unpack) -import GEODE.Metadata.PrimaryKey as PrimaryKey +import GEODE.Metadata.Contrastive as Contrastive import GEODE.Metadata.Entry as Entry ( Entry(headWord, name, page), newEntry, normalize ) -import GEODE.Metadata.Projector as Projector -import GEODE.Metadata.SplitContext as SplitContext hiding (page, rank) +import GEODE.Metadata.PrimaryKey as PrimaryKey +import GEODE.Metadata.SplitContext as SplitContext hiding (get, page, rank) import GEODE.Metadata.TSV as TSV import GEODE.Metadata.TSV.Header as TSV_Header ( DefaultFields(..), HasDefaultHeader(..), WithDefaultHeader(..), glue ) diff --git a/lib/GEODE/Metadata/Contrastive.hs b/lib/GEODE/Metadata/Contrastive.hs new file mode 100644 index 0000000000000000000000000000000000000000..5a437995e0c26dac35f8c3818132a76545fab691 --- /dev/null +++ b/lib/GEODE/Metadata/Contrastive.hs @@ -0,0 +1,25 @@ +{-# LANGUAGE OverloadedStrings #-} +module GEODE.Metadata.Contrastive + ( Authors(..) + , Domains(..) ) where + +import Data.Csv (Field, FromField(..), Parser, ToField(..)) +import Data.Text (Text, intercalate, splitOn) +newtype Authors = Authors + { getAuthors :: [Text] } deriving Show +newtype Domains = Domains + { getDomains :: [Text] } deriving Show + +sepBy :: Text -> Field -> Parser [Text] +sepBy s = fmap (splitOn s) . parseField + +instance FromField Authors where + parseField = fmap Authors . sepBy " & " +instance ToField Authors where + toField = toField . intercalate " & ". getAuthors + +instance FromField Domains where + parseField = fmap Domains . sepBy " | " +instance ToField Domains where + toField = toField . intercalate " | ". getDomains + diff --git a/lib/GEODE/Metadata/Entry.hs b/lib/GEODE/Metadata/Entry.hs index fca0819fc0c39d5159364010da99745f3d895ad4..c67ea4b946d492e99ab6f75c42ce7fcf80e64671 100644 --- a/lib/GEODE/Metadata/Entry.hs +++ b/lib/GEODE/Metadata/Entry.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE DeriveGeneric, FlexibleContexts, NamedFieldPuns, OverloadedStrings #-} +{-# LANGUAGE DeriveGeneric, FlexibleContexts, OverloadedStrings #-} module GEODE.Metadata.Entry ( Entry(..) , newEntry diff --git a/lib/GEODE/Metadata/PrimaryKey.hs b/lib/GEODE/Metadata/PrimaryKey.hs index 385d66de8407b348a046669b399d2bdc59c6772b..2bfffa76c24bf604b7a4b585469885735178d8bc 100644 --- a/lib/GEODE/Metadata/PrimaryKey.hs +++ b/lib/GEODE/Metadata/PrimaryKey.hs @@ -1,45 +1,47 @@ -{-# LANGUAGE DeriveGeneric, FlexibleInstances, NamedFieldPuns #-} +{-# LANGUAGE DeriveGeneric, FlexibleContexts, FlexibleInstances, NamedFieldPuns, OverloadedStrings #-} module GEODE.Metadata.PrimaryKey - ( HasPK(..) + ( Book(..) , PrimaryKey(..) , relativePath , uid ) where -import Data.Csv (FromNamedRecord(..), ToNamedRecord(..)) -import GEODE.Metadata.Types (Book) +import Data.Csv (FromField(..), FromNamedRecord(..), ToNamedRecord(..), ToField(..)) +import GEODE.Metadata.Types (Has(..)) import GEODE.Metadata.TSV.Header (DefaultFields(..), HasDefaultHeader(..), HasDefaultHeader(..)) import GHC.Generics (Generic) import System.FilePath ((</>), (<.>)) import Text.Printf (printf) +data Book = EDdA | LGE | Wikipedia deriving (Eq, Ord, Read, Show) + +instance FromField Book where + parseField "EDdA" = pure EDdA + parseField "LGE" = pure LGE + parseField "Wikipedia" = pure Wikipedia + parseField _ = mempty + +instance ToField Book where + toField = toField . show + data PrimaryKey = PrimaryKey { book :: Book , tome :: Int , rank :: Int } deriving (Eq, Ord, Generic, Show) -class HasPK t where - pKey :: t -> PrimaryKey - -instance HasPK PrimaryKey where - pKey = id - -instance HasPK (PrimaryKey, a) where - pKey = fst - instance FromNamedRecord PrimaryKey instance ToNamedRecord PrimaryKey -uid :: HasPK a => a -> String +uid :: Has PrimaryKey a => a -> String uid a = printf "%s_%d_%d" (show $ book) tome rank where - PrimaryKey {book, tome, rank} = pKey a + PrimaryKey {book, tome, rank} = get a -relativePath :: HasPK a => a -> String -> FilePath +relativePath :: Has PrimaryKey a => a -> String -> FilePath relativePath a extension = (show book) </> (show tome) </> (show rank) <.> extension where - PrimaryKey {book, tome, rank} = pKey a + PrimaryKey {book, tome, rank} = get a instance HasDefaultHeader PrimaryKey where defaultFields = DefaultFields [ "book", "tome", "rank" ] diff --git a/lib/GEODE/Metadata/TSV.hs b/lib/GEODE/Metadata/TSV.hs index 87766c7452ffc494588f6f3dc7c53be7da70e62e..0b5c6719be8252a9c5def25060b80d57607a08ee 100644 --- a/lib/GEODE/Metadata/TSV.hs +++ b/lib/GEODE/Metadata/TSV.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE FlexibleInstances, ScopedTypeVariables #-} +{-# LANGUAGE ScopedTypeVariables #-} module GEODE.Metadata.TSV ( readNamedTsv , readTsv @@ -31,9 +31,11 @@ toTsv = defaultEncodeOptions { encDelimiter = fromIntegral (fromEnum '\t') , encUseCrLf = False } -tsvFile :: forall a. (HasDefaultHeader a, ToNamedRecord a) => FilePath -> [a] -> IO () -tsvFile target = - ByteString.writeFile target . encodeByNameWith toTsv (getHeader (for :: a)) +tsvFile :: forall a t. (Foldable t, HasDefaultHeader a, ToNamedRecord a) => + FilePath -> t a -> IO () +tsvFile target = ByteString.writeFile target . encode . toList + where + encode = encodeByNameWith toTsv (getHeader (for :: a)) tsvLines :: (Foldable t, ToRecord a) => t a -> IO () tsvLines = ByteString.putStr . encodeWith toTsv . toList diff --git a/lib/GEODE/Metadata/TSV/Header.hs b/lib/GEODE/Metadata/TSV/Header.hs index 0defa5eb7dc23d279f50fcfffd09d64ff549300d..6e084e628230f23e007fe6d9d711ab2f0a7d2b04 100644 --- a/lib/GEODE/Metadata/TSV/Header.hs +++ b/lib/GEODE/Metadata/TSV/Header.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE FlexibleInstances, ScopedTypeVariables, UndecidableInstances #-} +{-# LANGUAGE ExplicitNamespaces, ScopedTypeVariables, TypeOperators #-} module GEODE.Metadata.TSV.Header ( DefaultFields(..) , HasDefaultHeader(..) @@ -8,16 +8,17 @@ module GEODE.Metadata.TSV.Header , glue ) where import Data.ByteString.Char8 as StrictByteString (pack) -import Data.Csv (FromNamedRecord(..), Header, ToNamedRecord(..), ToRecord(..)) -import Data.HashMap.Strict ((!), union) +import Data.Csv (Header, ToNamedRecord(..), ToRecord(..)) +import Data.HashMap.Strict ((!)) import Data.Vector (fromList) +import GEODE.Metadata.Types (type (@)(..)) newtype WithDefaultHeader a = WithDefaultHeader a newtype DefaultFields a = DefaultFields [String] class HasDefaultHeader a where defaultFields :: DefaultFields a -instance (HasDefaultHeader a, HasDefaultHeader b) => HasDefaultHeader (a, b) where +instance (HasDefaultHeader a, HasDefaultHeader b) => HasDefaultHeader (a @ b) where defaultFields = DefaultFields (a ++ b) where DefaultFields a = (defaultFields :: DefaultFields a) @@ -31,14 +32,8 @@ getHeader _ = StrictByteString.pack <$> fromList fields for :: HasDefaultHeader a => a for = undefined -glue :: a -> b -> WithDefaultHeader (a, b) -glue a b = WithDefaultHeader (a, b) - -instance (ToNamedRecord a, ToNamedRecord b) => ToNamedRecord (a, b) where - toNamedRecord (a, b) = union (toNamedRecord a) (toNamedRecord b) - -instance (FromNamedRecord a, FromNamedRecord b) => FromNamedRecord (a, b) where - parseNamedRecord nr = (,) <$> parseNamedRecord nr <*> parseNamedRecord nr - instance (HasDefaultHeader a, ToNamedRecord a) => ToRecord (WithDefaultHeader a) where toRecord (WithDefaultHeader a) = (toNamedRecord a !) <$> getHeader a + +glue :: a -> b -> WithDefaultHeader (a @ b) +glue a b = WithDefaultHeader (a :@: b) diff --git a/lib/GEODE/Metadata/Types.hs b/lib/GEODE/Metadata/Types.hs index 5e43d627af505e4947b5bf7f3111ce523f3d0dee..d8ecc47c02e4eb87827eeb7ca304fecf5fcc1965 100644 --- a/lib/GEODE/Metadata/Types.hs +++ b/lib/GEODE/Metadata/Types.hs @@ -1,38 +1,27 @@ -{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ExplicitNamespaces, FlexibleInstances, MultiParamTypeClasses, TypeOperators #-} module GEODE.Metadata.Types - ( Authors(..) - , Book(..) - , Domains(..) ) where - -import Control.Applicative (empty) -import Data.Csv (Field, FromField(..), Parser, ToField(..)) -import Data.Text (Text, intercalate, splitOn) - -data Book = EDdA | LGE | Wikipedia deriving (Eq, Ord, Read, Show) - -newtype Authors = Authors - { getAuthors :: [Text] } deriving Show -newtype Domains = Domains - { getDomains :: [Text] } deriving Show - -instance FromField Book where - parseField "EDdA" = pure EDdA - parseField "LGE" = pure LGE - parseField "Wikipedia" = pure Wikipedia - parseField _ = empty -instance ToField Book where - toField = toField . show - -sepBy :: Text -> Field -> Parser [Text] -sepBy s = fmap (splitOn s) . parseField - -instance FromField Authors where - parseField = fmap Authors . sepBy " & " -instance ToField Authors where - toField = toField . intercalate " & ". getAuthors - -instance FromField Domains where - parseField = fmap Domains . sepBy " | " -instance ToField Domains where - toField = toField . intercalate " | ". getDomains + ( Has(..) + , type (@)(..) ) where +import Data.Csv (FromNamedRecord(..), ToNamedRecord(..)) +import Data.HashMap.Strict (union) + +class Has a b where + get :: b -> a + +instance Has a (a, b) where + get = fst + +instance Has a b => Has a (b, c) where + get = get . fst + +instance {-# OVERLAPS #-} Has a (b, a) where + get = snd + +data a @ b = a :@: b + +instance (ToNamedRecord a, ToNamedRecord b) => ToNamedRecord (a @ b) where + toNamedRecord (a :@: b) = union (toNamedRecord a) (toNamedRecord b) + +instance (FromNamedRecord a, FromNamedRecord b) => FromNamedRecord (a @ b) where + parseNamedRecord nr = (:@:) <$> parseNamedRecord nr <*> parseNamedRecord nr