diff --git a/geode.cabal b/geode.cabal index cab450a8ffde587f953c035c31536412160ed9f1..03309af6c853032fca4a8b70ede85bb069c80933 100644 --- a/geode.cabal +++ b/geode.cabal @@ -29,9 +29,11 @@ library -- Modules included in this library but not exported. other-modules: GEODE.Metadata.Contrastive - , GEODE.Metadata.PrimaryKey + , GEODE.Metadata.ArticleRecord + , GEODE.Metadata.Record , GEODE.Metadata.TSV , GEODE.Metadata.Types + , GEODE.Metadata.Work -- LANGUAGE extensions used by modules in this package. -- other-extensions: @@ -55,7 +57,7 @@ test-suite ghc-geode-test hs-source-dirs: test main-is: Main.hs other-modules: GEODE.Metadata.TestEntry - , GEODE.Metadata.TestPrimaryKey + , GEODE.Metadata.TestArticleRecord , Test.HUnit.Extra build-depends: base , bytestring diff --git a/lib/GEODE/Metadata.hs b/lib/GEODE/Metadata.hs index d07145b197b50f89ae72d6822e94893324ff23d5..4376658769573f94b626c63381fce62e6b70cdef 100644 --- a/lib/GEODE/Metadata.hs +++ b/lib/GEODE/Metadata.hs @@ -1,12 +1,14 @@ {-# LANGUAGE OverloadedStrings #-} module GEODE.Metadata - ( module Contrastive + ( module ArticleRecord + , module Contrastive , module Entry - , module PrimaryKey + , module Record , module SplitContext , module TSV , module TSV_Header , module Types + , module Work , groupBy , indexBy , sortBy ) where @@ -16,13 +18,15 @@ import Data.List (sortOn) import Data.Map.Strict as Map (Map, alter, empty, insert, toList) import GEODE.Metadata.Contrastive as Contrastive import GEODE.Metadata.Entry as Entry - ( Entry(headWord, name, page), newEntry, normalize ) -import GEODE.Metadata.PrimaryKey as PrimaryKey + ( Entry(headword, name, page), newEntry, normalize ) +import GEODE.Metadata.ArticleRecord as ArticleRecord +import GEODE.Metadata.Record as Record import GEODE.Metadata.SplitContext as SplitContext hiding (get, page, rank) import GEODE.Metadata.TSV as TSV import GEODE.Metadata.TSV.Header as TSV_Header ( DefaultFields(..), HasDefaultHeader(..), WithDefaultHeader(..), glue ) import GEODE.Metadata.Types as Types +import GEODE.Metadata.Work as Work sortBy :: (Foldable t, Ord k) => (a -> k) -> t a -> [a] sortBy field = sortOn field . Foldable.toList diff --git a/lib/GEODE/Metadata/ArticleRecord.hs b/lib/GEODE/Metadata/ArticleRecord.hs new file mode 100644 index 0000000000000000000000000000000000000000..fd589ff97a4f409337d7da7dfb0a0eaa990fa376 --- /dev/null +++ b/lib/GEODE/Metadata/ArticleRecord.hs @@ -0,0 +1,53 @@ +{-# LANGUAGE DeriveGeneric, FlexibleContexts, FlexibleInstances, NamedFieldPuns, OverloadedStrings #-} +module GEODE.Metadata.ArticleRecord + ( Work(..) + , ArticleRecord(..) + , relativePath + , uid ) where + +import Data.Aeson ((.=), ToJSON(..)) +import Data.Aeson.KeyMap as KeyMap (fromList) +import Data.Csv (FromField(..), FromNamedRecord(..), ToNamedRecord(..), ToField(..)) +import GEODE.Metadata.Record (Record(..)) +import GEODE.Metadata.Types (Has(..), ToJSONObject(..)) +import GEODE.Metadata.TSV.Header (DefaultFields(..), HasDefaultHeader(..)) +import GEODE.Metadata.Work (Work(..)) +import GHC.Generics (Generic) +import System.FilePath ((</>), (<.>)) +import Text.Printf (printf) + +data ArticleRecord = ArticleRecord + { work :: Work + , volume :: Int + , article :: Int } deriving (Eq, Ord, Generic, Show) + +instance FromNamedRecord ArticleRecord +instance ToNamedRecord ArticleRecord + +instance ToJSONObject ArticleRecord where + toJSONObject (ArticleRecord {work, volume, article}) = KeyMap.fromList + [ ("work", toJSON work) + , ("volume", toJSON volume) + , ("article", toJSON article) ] + toJSONPairs (ArticleRecord {work, volume, article}) = + "work" .= work + <> "volume" .= volume + <> "article" .= article + +instance Record ArticleRecord where + uid (ArticleRecord {work, volume, article}) = + printf "%s_%d_%d" (show $ work) volume article + + relativePath (ArticleRecord {work, volume, article}) extension = + (show work) </> ("T" <> show volume) </> (show article) <.> extension + +{- +relativePath :: Has ArticleRecord a => a -> String -> FilePath +relativePath a extension = + (show work) </> ("T" <> show volume) </> (show article) <.> extension + where + ArticleRecord {work, volume, article} = get a +-} + +instance HasDefaultHeader ArticleRecord where + defaultFields = DefaultFields [ "work", "volume", "article" ] diff --git a/lib/GEODE/Metadata/Entry.hs b/lib/GEODE/Metadata/Entry.hs index c67ea4b946d492e99ab6f75c42ce7fcf80e64671..3a3ae4494325615187235b2661013bbc620b2a25 100644 --- a/lib/GEODE/Metadata/Entry.hs +++ b/lib/GEODE/Metadata/Entry.hs @@ -13,7 +13,7 @@ import GEODE.Metadata.TSV.Header import GHC.Generics (Generic) data Entry = Entry - { headWord :: Text + { headword :: Text , name :: Text , page :: Int } deriving (Generic, Show) @@ -21,7 +21,7 @@ instance FromNamedRecord Entry instance ToNamedRecord Entry instance HasDefaultHeader Entry where - defaultFields = DefaultFields [ "headWord", "name", "page" ] + defaultFields = DefaultFields [ "headword", "name", "page" ] normalize :: Text -> Text normalize = Text.foldl' appendIf mempty @@ -33,8 +33,8 @@ normalize = Text.foldl' appendIf mempty | otherwise = tmpText `snoc` '-' newEntry :: SplitContext m => Text -> m Entry -newEntry headWord = do +newEntry headword = do count <- Text.pack . show <$> next (HeadWord prefix) - Entry headWord (Text.concat [prefix, "-", count]) <$> get Page + Entry headword (Text.concat [prefix, "-", count]) <$> get Page where - prefix = normalize headWord + prefix = normalize headword diff --git a/lib/GEODE/Metadata/PrimaryKey.hs b/lib/GEODE/Metadata/PrimaryKey.hs deleted file mode 100644 index 393db90b8ff2b33cd48499d2cbd2d5f6ca710df2..0000000000000000000000000000000000000000 --- a/lib/GEODE/Metadata/PrimaryKey.hs +++ /dev/null @@ -1,50 +0,0 @@ -{-# LANGUAGE DeriveGeneric, FlexibleContexts, FlexibleInstances, NamedFieldPuns, OverloadedStrings #-} -module GEODE.Metadata.PrimaryKey - ( Book(..) - , PrimaryKey(..) - , relativePath - , uid ) where - -import Data.Aeson (ToJSON(..)) -import Data.Csv (FromField(..), FromNamedRecord(..), ToNamedRecord(..), ToField(..)) -import GEODE.Metadata.Types (Has(..)) -import GEODE.Metadata.TSV.Header (DefaultFields(..), HasDefaultHeader(..)) -import GHC.Generics (Generic) -import System.FilePath ((</>), (<.>)) -import Text.Printf (printf) - -data Book = EDdA | LGE | Wikipedia deriving (Eq, Ord, Read, Show) - -instance FromField Book where - parseField "EDdA" = pure EDdA - parseField "LGE" = pure LGE - parseField "Wikipedia" = pure Wikipedia - parseField _ = mempty - -instance ToField Book where - toField = toField . show - -instance ToJSON Book where - toJSON = toJSON . show - -data PrimaryKey = PrimaryKey - { book :: Book - , tome :: Int - , rank :: Int } deriving (Eq, Ord, Generic, Show) - -instance FromNamedRecord PrimaryKey -instance ToNamedRecord PrimaryKey - -uid :: Has PrimaryKey a => a -> String -uid a = printf "%s_%d_%d" (show $ book) tome rank - where - PrimaryKey {book, tome, rank} = get a - -relativePath :: Has PrimaryKey a => a -> String -> FilePath -relativePath a extension = - (show book) </> ("T" <> show tome) </> (show rank) <.> extension - where - PrimaryKey {book, tome, rank} = get a - -instance HasDefaultHeader PrimaryKey where - defaultFields = DefaultFields [ "book", "tome", "rank" ] diff --git a/lib/GEODE/Metadata/Record.hs b/lib/GEODE/Metadata/Record.hs new file mode 100644 index 0000000000000000000000000000000000000000..094fc1065700778e9eb007a2fb9db8229e16c4d8 --- /dev/null +++ b/lib/GEODE/Metadata/Record.hs @@ -0,0 +1,16 @@ +{-# LANGUAGE ExplicitNamespaces, TypeOperators #-} +module GEODE.Metadata.Record + ( Record(..) ) where + +import GEODE.Metadata.Types (type (@)(..)) +import System.FilePath ((</>)) +import Text.Printf (printf) + +class Record a where + uid :: a -> String + relativePath :: a -> String -> FilePath + +instance (Record a, Record b) => Record (a @ b) where + uid (a :@: b) = printf "%s_%s" (uid a) (uid b) + relativePath (a :@: b) extension = + relativePath a "" </> relativePath b extension diff --git a/lib/GEODE/Metadata/Types.hs b/lib/GEODE/Metadata/Types.hs index 362be18e9626830368a66fe8879a155d5f825747..0999f7dd27b918138673f3a04cb55751d0ecf5da 100644 --- a/lib/GEODE/Metadata/Types.hs +++ b/lib/GEODE/Metadata/Types.hs @@ -1,10 +1,13 @@ -{-# LANGUAGE ExplicitNamespaces, FlexibleInstances, MultiParamTypeClasses, TypeOperators #-} +{-# LANGUAGE ExplicitNamespaces, FlexibleInstances, MultiParamTypeClasses, TypeOperators, UndecidableInstances #-} module GEODE.Metadata.Types ( Has(..) + , ToJSONObject(..) , type (@)(..) ) where +import Data.Aeson (Object, Series, ToJSON(..), Value(..), pairs) +import Data.Aeson.KeyMap as Object (union) import Data.Csv (FromNamedRecord(..), ToNamedRecord(..)) -import Data.HashMap.Strict (union) +import Data.HashMap.Strict as Hash (union) infixr 9 @ infixr 9 :@: @@ -23,7 +26,19 @@ instance {-# OVERLAPS #-} Has a (a @ b) where get (a :@: _) = a instance (ToNamedRecord a, ToNamedRecord b) => ToNamedRecord (a @ b) where - toNamedRecord (a :@: b) = union (toNamedRecord a) (toNamedRecord b) + toNamedRecord (a :@: b) = Hash.union (toNamedRecord a) (toNamedRecord b) instance (FromNamedRecord a, FromNamedRecord b) => FromNamedRecord (a @ b) where parseNamedRecord nr = (:@:) <$> parseNamedRecord nr <*> parseNamedRecord nr + +class ToJSONObject a where + toJSONObject :: a -> Object + toJSONPairs :: a -> Series + +instance {-# OVERLAPPABLE #-} ToJSONObject a => ToJSON a where + toJSON = Object . toJSONObject + toEncoding = pairs . toJSONPairs + +instance (ToJSONObject a, ToJSONObject b) => ToJSONObject (a @ b) where + toJSONObject (a :@: b) = toJSONObject a `Object.union` toJSONObject b + toJSONPairs (a :@: b) = toJSONPairs a <> toJSONPairs b diff --git a/lib/GEODE/Metadata/Work.hs b/lib/GEODE/Metadata/Work.hs new file mode 100644 index 0000000000000000000000000000000000000000..9c0aa93fb41875768f0e9b736fa5d69c37c8e9f3 --- /dev/null +++ b/lib/GEODE/Metadata/Work.hs @@ -0,0 +1,25 @@ +{-# LANGUAGE OverloadedStrings #-} +module GEODE.Metadata.Work + ( Work(..) ) where + +import Data.Aeson (ToJSON(..)) +import Data.Csv (FromField(..), ToField(..)) +import Data.Char (toLower) +import Data.ByteString.Char8 as ByteString (map) + +data Work = EDdA | LGE | Wikipedia deriving (Eq, Ord, Read, Show) + +instance FromField Work where + parseField = recognize . ByteString.map toLower + where + recognize "EDdA" = pure EDdA + recognize "LGE" = pure LGE + recognize "Wikipedia" = pure Wikipedia + recognize _ = mempty + +instance ToField Work where + toField = toField . show + +instance ToJSON Work where + toJSON = toJSON . show + diff --git a/test/GEODE/Metadata/TestPrimaryKey.hs b/test/GEODE/Metadata/TestArticleRecord.hs similarity index 59% rename from test/GEODE/Metadata/TestPrimaryKey.hs rename to test/GEODE/Metadata/TestArticleRecord.hs index 62c9662db9ead2e23e8f4fc9075d63c254c97af5..87ade5597b9b7b2fe59582fd3fedb4fb5a5c061a 100644 --- a/test/GEODE/Metadata/TestPrimaryKey.hs +++ b/test/GEODE/Metadata/TestArticleRecord.hs @@ -1,16 +1,16 @@ {-# LANGUAGE OverloadedStrings #-} -module GEODE.Metadata.TestPrimaryKey (testPrimaryKey) where +module GEODE.Metadata.TestArticleRecord (testArticleRecord) where import Data.Csv (ToNamedRecord(..)) import Data.Foldable (toList) import Data.HashMap.Strict ((!?)) -import GEODE.Metadata (Book(..), PrimaryKey(..)) +import GEODE.Metadata (Work(..), ArticleRecord(..)) import GEODE.Metadata.TSV.Header (getHeader, for) import Test.HUnit (Test(..), (~?=)) import Test.HUnit.Extra (isJust) -testPrimaryKey :: Test -testPrimaryKey = TestLabel "Testing the PrimaryKey data type" $ +testArticleRecord :: Test +testArticleRecord = TestLabel "Testing the ArticleRecord data type" $ TestList [ testToNamedRecord ] testToNamedRecord :: Test @@ -19,8 +19,8 @@ testToNamedRecord = TestLabel "Testing ToNamedRecord instance" $ where has3Keys = length aNamedRecord ~?= 3 validDefaultHeader = TestList . toList $ - (isJust . (aNamedRecord !?)) <$> getHeader (for :: PrimaryKey) + (isJust . (aNamedRecord !?)) <$> getHeader (for :: ArticleRecord) aNamedRecord = toNamedRecord aPrimaryKey -aPrimaryKey :: PrimaryKey -aPrimaryKey = PrimaryKey LGE 1 1212 -- ALCALA DE HÉNARÈS +aPrimaryKey :: ArticleRecord +aPrimaryKey = ArticleRecord LGE 1 1212 -- ALCALA DE HÉNARÈS diff --git a/test/GEODE/Metadata/TestEntry.hs b/test/GEODE/Metadata/TestEntry.hs index bc85c1a676c0d1de59bd5076886678a0fc543850..bf31fc76cca625834bd486d6190a80c28539accc 100644 --- a/test/GEODE/Metadata/TestEntry.hs +++ b/test/GEODE/Metadata/TestEntry.hs @@ -28,7 +28,7 @@ testNormalize :: Test testNormalize = TestLabel "Testing function normalize" . TestList $ check <$> [ ("", "") , ("é", "é") - , (headWord anEntry, dropEnd 2 $ name anEntry) ] + , (headword anEntry, dropEnd 2 $ name anEntry) ] where check (a, b) = normalize a ~?= b diff --git a/test/Main.hs b/test/Main.hs index 5aa6b4dc2c1d8aaefce5c99a6c346ebf47ef6e1c..4b7addc6167caebe05f57fd99fa616d0ffa7379d 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -1,12 +1,12 @@ module Main (main) where import Test.HUnit (Test(..), runTestTTAndExit) -import GEODE.Metadata.TestPrimaryKey (testPrimaryKey) +import GEODE.Metadata.TestArticleRecord (testArticleRecord) import GEODE.Metadata.TestEntry (testEntry) testMetadata :: Test testMetadata = TestLabel "Metadata suite" $ - TestList [ testPrimaryKey, testEntry ] + TestList [ testArticleRecord, testEntry ] main :: IO () main = runTestTTAndExit testMetadata