From 5c766a5ad2484dfbbdf17cb2170942f245ff799d Mon Sep 17 00:00:00 2001 From: Alice BRENON <alice.brenon@ens-lyon.fr> Date: Mon, 11 Sep 2023 17:46:51 +0200 Subject: [PATCH] Rename poorly named fields, improve type composition --- geode.cabal | 6 ++- lib/GEODE/Metadata.hs | 12 +++-- lib/GEODE/Metadata/ArticleRecord.hs | 53 +++++++++++++++++++ lib/GEODE/Metadata/Entry.hs | 10 ++-- lib/GEODE/Metadata/PrimaryKey.hs | 50 ----------------- lib/GEODE/Metadata/Record.hs | 16 ++++++ lib/GEODE/Metadata/Types.hs | 21 ++++++-- lib/GEODE/Metadata/Work.hs | 25 +++++++++ ...TestPrimaryKey.hs => TestArticleRecord.hs} | 14 ++--- test/GEODE/Metadata/TestEntry.hs | 2 +- test/Main.hs | 4 +- 11 files changed, 139 insertions(+), 74 deletions(-) create mode 100644 lib/GEODE/Metadata/ArticleRecord.hs delete mode 100644 lib/GEODE/Metadata/PrimaryKey.hs create mode 100644 lib/GEODE/Metadata/Record.hs create mode 100644 lib/GEODE/Metadata/Work.hs rename test/GEODE/Metadata/{TestPrimaryKey.hs => TestArticleRecord.hs} (59%) diff --git a/geode.cabal b/geode.cabal index cab450a..03309af 100644 --- a/geode.cabal +++ b/geode.cabal @@ -29,9 +29,11 @@ library -- Modules included in this library but not exported. other-modules: GEODE.Metadata.Contrastive - , GEODE.Metadata.PrimaryKey + , GEODE.Metadata.ArticleRecord + , GEODE.Metadata.Record , GEODE.Metadata.TSV , GEODE.Metadata.Types + , GEODE.Metadata.Work -- LANGUAGE extensions used by modules in this package. -- other-extensions: @@ -55,7 +57,7 @@ test-suite ghc-geode-test hs-source-dirs: test main-is: Main.hs other-modules: GEODE.Metadata.TestEntry - , GEODE.Metadata.TestPrimaryKey + , GEODE.Metadata.TestArticleRecord , Test.HUnit.Extra build-depends: base , bytestring diff --git a/lib/GEODE/Metadata.hs b/lib/GEODE/Metadata.hs index d07145b..4376658 100644 --- a/lib/GEODE/Metadata.hs +++ b/lib/GEODE/Metadata.hs @@ -1,12 +1,14 @@ {-# LANGUAGE OverloadedStrings #-} module GEODE.Metadata - ( module Contrastive + ( module ArticleRecord + , module Contrastive , module Entry - , module PrimaryKey + , module Record , module SplitContext , module TSV , module TSV_Header , module Types + , module Work , groupBy , indexBy , sortBy ) where @@ -16,13 +18,15 @@ import Data.List (sortOn) import Data.Map.Strict as Map (Map, alter, empty, insert, toList) import GEODE.Metadata.Contrastive as Contrastive import GEODE.Metadata.Entry as Entry - ( Entry(headWord, name, page), newEntry, normalize ) -import GEODE.Metadata.PrimaryKey as PrimaryKey + ( Entry(headword, name, page), newEntry, normalize ) +import GEODE.Metadata.ArticleRecord as ArticleRecord +import GEODE.Metadata.Record as Record import GEODE.Metadata.SplitContext as SplitContext hiding (get, page, rank) import GEODE.Metadata.TSV as TSV import GEODE.Metadata.TSV.Header as TSV_Header ( DefaultFields(..), HasDefaultHeader(..), WithDefaultHeader(..), glue ) import GEODE.Metadata.Types as Types +import GEODE.Metadata.Work as Work sortBy :: (Foldable t, Ord k) => (a -> k) -> t a -> [a] sortBy field = sortOn field . Foldable.toList diff --git a/lib/GEODE/Metadata/ArticleRecord.hs b/lib/GEODE/Metadata/ArticleRecord.hs new file mode 100644 index 0000000..fd589ff --- /dev/null +++ b/lib/GEODE/Metadata/ArticleRecord.hs @@ -0,0 +1,53 @@ +{-# LANGUAGE DeriveGeneric, FlexibleContexts, FlexibleInstances, NamedFieldPuns, OverloadedStrings #-} +module GEODE.Metadata.ArticleRecord + ( Work(..) + , ArticleRecord(..) + , relativePath + , uid ) where + +import Data.Aeson ((.=), ToJSON(..)) +import Data.Aeson.KeyMap as KeyMap (fromList) +import Data.Csv (FromField(..), FromNamedRecord(..), ToNamedRecord(..), ToField(..)) +import GEODE.Metadata.Record (Record(..)) +import GEODE.Metadata.Types (Has(..), ToJSONObject(..)) +import GEODE.Metadata.TSV.Header (DefaultFields(..), HasDefaultHeader(..)) +import GEODE.Metadata.Work (Work(..)) +import GHC.Generics (Generic) +import System.FilePath ((</>), (<.>)) +import Text.Printf (printf) + +data ArticleRecord = ArticleRecord + { work :: Work + , volume :: Int + , article :: Int } deriving (Eq, Ord, Generic, Show) + +instance FromNamedRecord ArticleRecord +instance ToNamedRecord ArticleRecord + +instance ToJSONObject ArticleRecord where + toJSONObject (ArticleRecord {work, volume, article}) = KeyMap.fromList + [ ("work", toJSON work) + , ("volume", toJSON volume) + , ("article", toJSON article) ] + toJSONPairs (ArticleRecord {work, volume, article}) = + "work" .= work + <> "volume" .= volume + <> "article" .= article + +instance Record ArticleRecord where + uid (ArticleRecord {work, volume, article}) = + printf "%s_%d_%d" (show $ work) volume article + + relativePath (ArticleRecord {work, volume, article}) extension = + (show work) </> ("T" <> show volume) </> (show article) <.> extension + +{- +relativePath :: Has ArticleRecord a => a -> String -> FilePath +relativePath a extension = + (show work) </> ("T" <> show volume) </> (show article) <.> extension + where + ArticleRecord {work, volume, article} = get a +-} + +instance HasDefaultHeader ArticleRecord where + defaultFields = DefaultFields [ "work", "volume", "article" ] diff --git a/lib/GEODE/Metadata/Entry.hs b/lib/GEODE/Metadata/Entry.hs index c67ea4b..3a3ae44 100644 --- a/lib/GEODE/Metadata/Entry.hs +++ b/lib/GEODE/Metadata/Entry.hs @@ -13,7 +13,7 @@ import GEODE.Metadata.TSV.Header import GHC.Generics (Generic) data Entry = Entry - { headWord :: Text + { headword :: Text , name :: Text , page :: Int } deriving (Generic, Show) @@ -21,7 +21,7 @@ instance FromNamedRecord Entry instance ToNamedRecord Entry instance HasDefaultHeader Entry where - defaultFields = DefaultFields [ "headWord", "name", "page" ] + defaultFields = DefaultFields [ "headword", "name", "page" ] normalize :: Text -> Text normalize = Text.foldl' appendIf mempty @@ -33,8 +33,8 @@ normalize = Text.foldl' appendIf mempty | otherwise = tmpText `snoc` '-' newEntry :: SplitContext m => Text -> m Entry -newEntry headWord = do +newEntry headword = do count <- Text.pack . show <$> next (HeadWord prefix) - Entry headWord (Text.concat [prefix, "-", count]) <$> get Page + Entry headword (Text.concat [prefix, "-", count]) <$> get Page where - prefix = normalize headWord + prefix = normalize headword diff --git a/lib/GEODE/Metadata/PrimaryKey.hs b/lib/GEODE/Metadata/PrimaryKey.hs deleted file mode 100644 index 393db90..0000000 --- a/lib/GEODE/Metadata/PrimaryKey.hs +++ /dev/null @@ -1,50 +0,0 @@ -{-# LANGUAGE DeriveGeneric, FlexibleContexts, FlexibleInstances, NamedFieldPuns, OverloadedStrings #-} -module GEODE.Metadata.PrimaryKey - ( Book(..) - , PrimaryKey(..) - , relativePath - , uid ) where - -import Data.Aeson (ToJSON(..)) -import Data.Csv (FromField(..), FromNamedRecord(..), ToNamedRecord(..), ToField(..)) -import GEODE.Metadata.Types (Has(..)) -import GEODE.Metadata.TSV.Header (DefaultFields(..), HasDefaultHeader(..)) -import GHC.Generics (Generic) -import System.FilePath ((</>), (<.>)) -import Text.Printf (printf) - -data Book = EDdA | LGE | Wikipedia deriving (Eq, Ord, Read, Show) - -instance FromField Book where - parseField "EDdA" = pure EDdA - parseField "LGE" = pure LGE - parseField "Wikipedia" = pure Wikipedia - parseField _ = mempty - -instance ToField Book where - toField = toField . show - -instance ToJSON Book where - toJSON = toJSON . show - -data PrimaryKey = PrimaryKey - { book :: Book - , tome :: Int - , rank :: Int } deriving (Eq, Ord, Generic, Show) - -instance FromNamedRecord PrimaryKey -instance ToNamedRecord PrimaryKey - -uid :: Has PrimaryKey a => a -> String -uid a = printf "%s_%d_%d" (show $ book) tome rank - where - PrimaryKey {book, tome, rank} = get a - -relativePath :: Has PrimaryKey a => a -> String -> FilePath -relativePath a extension = - (show book) </> ("T" <> show tome) </> (show rank) <.> extension - where - PrimaryKey {book, tome, rank} = get a - -instance HasDefaultHeader PrimaryKey where - defaultFields = DefaultFields [ "book", "tome", "rank" ] diff --git a/lib/GEODE/Metadata/Record.hs b/lib/GEODE/Metadata/Record.hs new file mode 100644 index 0000000..094fc10 --- /dev/null +++ b/lib/GEODE/Metadata/Record.hs @@ -0,0 +1,16 @@ +{-# LANGUAGE ExplicitNamespaces, TypeOperators #-} +module GEODE.Metadata.Record + ( Record(..) ) where + +import GEODE.Metadata.Types (type (@)(..)) +import System.FilePath ((</>)) +import Text.Printf (printf) + +class Record a where + uid :: a -> String + relativePath :: a -> String -> FilePath + +instance (Record a, Record b) => Record (a @ b) where + uid (a :@: b) = printf "%s_%s" (uid a) (uid b) + relativePath (a :@: b) extension = + relativePath a "" </> relativePath b extension diff --git a/lib/GEODE/Metadata/Types.hs b/lib/GEODE/Metadata/Types.hs index 362be18..0999f7d 100644 --- a/lib/GEODE/Metadata/Types.hs +++ b/lib/GEODE/Metadata/Types.hs @@ -1,10 +1,13 @@ -{-# LANGUAGE ExplicitNamespaces, FlexibleInstances, MultiParamTypeClasses, TypeOperators #-} +{-# LANGUAGE ExplicitNamespaces, FlexibleInstances, MultiParamTypeClasses, TypeOperators, UndecidableInstances #-} module GEODE.Metadata.Types ( Has(..) + , ToJSONObject(..) , type (@)(..) ) where +import Data.Aeson (Object, Series, ToJSON(..), Value(..), pairs) +import Data.Aeson.KeyMap as Object (union) import Data.Csv (FromNamedRecord(..), ToNamedRecord(..)) -import Data.HashMap.Strict (union) +import Data.HashMap.Strict as Hash (union) infixr 9 @ infixr 9 :@: @@ -23,7 +26,19 @@ instance {-# OVERLAPS #-} Has a (a @ b) where get (a :@: _) = a instance (ToNamedRecord a, ToNamedRecord b) => ToNamedRecord (a @ b) where - toNamedRecord (a :@: b) = union (toNamedRecord a) (toNamedRecord b) + toNamedRecord (a :@: b) = Hash.union (toNamedRecord a) (toNamedRecord b) instance (FromNamedRecord a, FromNamedRecord b) => FromNamedRecord (a @ b) where parseNamedRecord nr = (:@:) <$> parseNamedRecord nr <*> parseNamedRecord nr + +class ToJSONObject a where + toJSONObject :: a -> Object + toJSONPairs :: a -> Series + +instance {-# OVERLAPPABLE #-} ToJSONObject a => ToJSON a where + toJSON = Object . toJSONObject + toEncoding = pairs . toJSONPairs + +instance (ToJSONObject a, ToJSONObject b) => ToJSONObject (a @ b) where + toJSONObject (a :@: b) = toJSONObject a `Object.union` toJSONObject b + toJSONPairs (a :@: b) = toJSONPairs a <> toJSONPairs b diff --git a/lib/GEODE/Metadata/Work.hs b/lib/GEODE/Metadata/Work.hs new file mode 100644 index 0000000..9c0aa93 --- /dev/null +++ b/lib/GEODE/Metadata/Work.hs @@ -0,0 +1,25 @@ +{-# LANGUAGE OverloadedStrings #-} +module GEODE.Metadata.Work + ( Work(..) ) where + +import Data.Aeson (ToJSON(..)) +import Data.Csv (FromField(..), ToField(..)) +import Data.Char (toLower) +import Data.ByteString.Char8 as ByteString (map) + +data Work = EDdA | LGE | Wikipedia deriving (Eq, Ord, Read, Show) + +instance FromField Work where + parseField = recognize . ByteString.map toLower + where + recognize "EDdA" = pure EDdA + recognize "LGE" = pure LGE + recognize "Wikipedia" = pure Wikipedia + recognize _ = mempty + +instance ToField Work where + toField = toField . show + +instance ToJSON Work where + toJSON = toJSON . show + diff --git a/test/GEODE/Metadata/TestPrimaryKey.hs b/test/GEODE/Metadata/TestArticleRecord.hs similarity index 59% rename from test/GEODE/Metadata/TestPrimaryKey.hs rename to test/GEODE/Metadata/TestArticleRecord.hs index 62c9662..87ade55 100644 --- a/test/GEODE/Metadata/TestPrimaryKey.hs +++ b/test/GEODE/Metadata/TestArticleRecord.hs @@ -1,16 +1,16 @@ {-# LANGUAGE OverloadedStrings #-} -module GEODE.Metadata.TestPrimaryKey (testPrimaryKey) where +module GEODE.Metadata.TestArticleRecord (testArticleRecord) where import Data.Csv (ToNamedRecord(..)) import Data.Foldable (toList) import Data.HashMap.Strict ((!?)) -import GEODE.Metadata (Book(..), PrimaryKey(..)) +import GEODE.Metadata (Work(..), ArticleRecord(..)) import GEODE.Metadata.TSV.Header (getHeader, for) import Test.HUnit (Test(..), (~?=)) import Test.HUnit.Extra (isJust) -testPrimaryKey :: Test -testPrimaryKey = TestLabel "Testing the PrimaryKey data type" $ +testArticleRecord :: Test +testArticleRecord = TestLabel "Testing the ArticleRecord data type" $ TestList [ testToNamedRecord ] testToNamedRecord :: Test @@ -19,8 +19,8 @@ testToNamedRecord = TestLabel "Testing ToNamedRecord instance" $ where has3Keys = length aNamedRecord ~?= 3 validDefaultHeader = TestList . toList $ - (isJust . (aNamedRecord !?)) <$> getHeader (for :: PrimaryKey) + (isJust . (aNamedRecord !?)) <$> getHeader (for :: ArticleRecord) aNamedRecord = toNamedRecord aPrimaryKey -aPrimaryKey :: PrimaryKey -aPrimaryKey = PrimaryKey LGE 1 1212 -- ALCALA DE HÉNARÈS +aPrimaryKey :: ArticleRecord +aPrimaryKey = ArticleRecord LGE 1 1212 -- ALCALA DE HÉNARÈS diff --git a/test/GEODE/Metadata/TestEntry.hs b/test/GEODE/Metadata/TestEntry.hs index bc85c1a..bf31fc7 100644 --- a/test/GEODE/Metadata/TestEntry.hs +++ b/test/GEODE/Metadata/TestEntry.hs @@ -28,7 +28,7 @@ testNormalize :: Test testNormalize = TestLabel "Testing function normalize" . TestList $ check <$> [ ("", "") , ("é", "é") - , (headWord anEntry, dropEnd 2 $ name anEntry) ] + , (headword anEntry, dropEnd 2 $ name anEntry) ] where check (a, b) = normalize a ~?= b diff --git a/test/Main.hs b/test/Main.hs index 5aa6b4d..4b7addc 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -1,12 +1,12 @@ module Main (main) where import Test.HUnit (Test(..), runTestTTAndExit) -import GEODE.Metadata.TestPrimaryKey (testPrimaryKey) +import GEODE.Metadata.TestArticleRecord (testArticleRecord) import GEODE.Metadata.TestEntry (testEntry) testMetadata :: Test testMetadata = TestLabel "Metadata suite" $ - TestList [ testPrimaryKey, testEntry ] + TestList [ testArticleRecord, testEntry ] main :: IO () main = runTestTTAndExit testMetadata -- GitLab