From 7fca2bd55f883e2faa144feec5a9e0639195bdf9 Mon Sep 17 00:00:00 2001
From: Alice BRENON <alice.brenon@ens-lyon.fr>
Date: Wed, 7 Jun 2023 18:25:36 +0200
Subject: [PATCH] Trim Metadata a bit to make it only an inclusion shortcut,
 put TSV-specific stuff out in separate module and cut Article in two

---
 lib/GEODE/Metadata.hs           | 67 ++++++++++-----------------------
 lib/GEODE/Metadata/Entry.hs     | 19 ++++++++++
 lib/GEODE/Metadata/ID.hs        | 64 +++++++++++++++++++++++++++++++
 lib/GEODE/Metadata/Projector.hs | 33 +++++++++++-----
 lib/GEODE/Metadata/TSV.hs       | 63 +++++++++++++++++++++++++++++++
 5 files changed, 188 insertions(+), 58 deletions(-)
 create mode 100644 lib/GEODE/Metadata/Entry.hs
 create mode 100644 lib/GEODE/Metadata/ID.hs
 create mode 100644 lib/GEODE/Metadata/TSV.hs

diff --git a/lib/GEODE/Metadata.hs b/lib/GEODE/Metadata.hs
index f71e970..04b6417 100644
--- a/lib/GEODE/Metadata.hs
+++ b/lib/GEODE/Metadata.hs
@@ -1,60 +1,28 @@
 {-# LANGUAGE NamedFieldPuns, OverloadedStrings #-}
-module GEODE.Metadata (
-      Article(..)
-    , Authors(..)
-    , Book(..)
-    , Domains(..)
-    , FromBook(..)
-    , HasAuthors(..)
-    , HasDomains(..)
-    , InFile(..)
-    , TXMText
-    , Unique(..)
-    , groupBy
-    , list
-    , readTsv
-    , sortBy
-    , tsvFile
-    , tsvLines
-  ) where
+module GEODE.Metadata
+  ( module ID
+  , module Entry
+  , module TSV
+  , module Projector
+  , module Types
+  , groupBy
+  , indexBy
+  , list
+  , sortBy ) where
 
-import Data.ByteString.Lazy as ByteString (putStr, readFile, writeFile)
-import Data.ByteString.Char8 as StrictByteString (pack)
-import Data.Csv
-  ( DecodeOptions(..), EncodeOptions(..), FromRecord, HasHeader(..)
-  , ToNamedRecord, ToRecord, decodeWith, defaultEncodeOptions, encodeByNameWith
-  , encodeWith, header )
 import Data.Foldable as Foldable (toList)
 import Data.List (sortOn)
-import Data.Map.Strict as Map (alter, empty, toList)
+import Data.Map.Strict as Map (Map, alter, empty, insert, toList)
 import Data.Text as Text (Text, intercalate, unpack)
-import Data.Vector as Vector (Vector)
-import GEODE.Metadata.Article
-import GEODE.Metadata.Projector
-  (FromBook(..), HasAuthors(..), HasDomains(..), InFile(..), TXMText, Unique(..))
-import GEODE.Metadata.Types (Authors(..), Book(..), Domains(..))
+import GEODE.Metadata.ID as ID
+import GEODE.Metadata.Entry as Entry
+import GEODE.Metadata.Projector as Projector
+import GEODE.Metadata.TSV as TSV
+import GEODE.Metadata.Types as Types
 
 list :: [Text] -> String
 list ts = Text.unpack $ ":" <> intercalate ":" ts <> ":"
 
-readTsv :: FromRecord a => FilePath -> IO (Either String (Vector a))
-readTsv source = decodeWith fromTsv HasHeader <$> ByteString.readFile source
-  where
-    fromTsv = DecodeOptions {decDelimiter = fromIntegral (fromEnum '\t')}
-
-toTsv :: EncodeOptions
-toTsv = defaultEncodeOptions
-        { encDelimiter = fromIntegral (fromEnum '\t')
-        , encUseCrLf = False }
-
-tsvFile :: ToNamedRecord a => FilePath -> [String] -> [a] -> IO ()
-tsvFile target fields =
-  ByteString.writeFile target
-  . encodeByNameWith toTsv (header $ StrictByteString.pack <$> fields)
-
-tsvLines :: ToRecord a => [a] -> IO ()
-tsvLines = ByteString.putStr . encodeWith toTsv
-
 sortBy :: (Foldable t, Ord k) => (a -> k) -> t a -> [a]
 sortBy field = sortOn field . Foldable.toList
 
@@ -62,3 +30,6 @@ groupBy :: (Foldable t, Ord k) => (v -> k) -> t v -> [(k, [v])]
 groupBy field = Map.toList . foldr group Map.empty
   where
     group article = Map.alter (Just . maybe [article] (article:)) (field article)
+
+indexBy :: (Foldable t, Ord k) => (a -> k) -> t a -> Map k a
+indexBy f = foldr (\a -> Map.insert (f a) a) Map.empty
diff --git a/lib/GEODE/Metadata/Entry.hs b/lib/GEODE/Metadata/Entry.hs
new file mode 100644
index 0000000..6e19a21
--- /dev/null
+++ b/lib/GEODE/Metadata/Entry.hs
@@ -0,0 +1,19 @@
+module GEODE.Metadata.Entry
+  ( Entry(..)
+  , headerSection ) where
+
+import Data.Csv (FromNamedRecord(..), ToNamedRecord(..))
+import Data.Text (Text)
+import GEODE.Metadata.TSV (Default(..), DefaultHeader(..))
+import GHC.Generics (Generic)
+
+data Entry = Entry
+  { headWord :: Text
+  , rank :: Int 
+  , page :: Int } deriving (Generic, Show)
+
+instance FromNamedRecord Entry
+instance ToNamedRecord Entry
+
+instance DefaultHeader Entry where
+  headerSection = Default [ "headWord", "rank", "page" ]
diff --git a/lib/GEODE/Metadata/ID.hs b/lib/GEODE/Metadata/ID.hs
new file mode 100644
index 0000000..2d9f1f8
--- /dev/null
+++ b/lib/GEODE/Metadata/ID.hs
@@ -0,0 +1,64 @@
+module GEODE.Metadata.ID
+  ( HasID(..)
+  , ID
+  , book
+  , tome
+  , name
+  , headerSection
+  , relativePath
+  , uid ) where
+
+import Data.Csv (FromNamedRecord(..), ToNamedRecord(..))
+import Data.Text (Text, unpack)
+import GEODE.Metadata.Types (Book)
+--import GEODE.Metadata.Projector (InFile(..), Unique(..))
+import GEODE.Metadata.TSV (Default(..), DefaultHeader(..))
+import GHC.Generics (Generic)
+import System.FilePath ((</>), (<.>))
+import Text.Printf (printf)
+
+data ID = ID
+  { book_ :: Book
+  , tome_ :: Int
+  , name_ :: Text } deriving (Eq, Ord, Generic, Show)
+
+class HasID t where
+  iD :: t -> ID
+
+instance HasID ID where
+  iD = id
+
+instance HasID (ID, a) where
+  iD = fst
+
+book :: HasID a => a -> Book
+book = book_ . iD
+
+tome :: HasID a => a -> Int
+tome = tome_ . iD
+
+name :: HasID a => a -> Text
+name = name_ . iD
+
+instance FromNamedRecord ID
+instance ToNamedRecord ID
+
+uid :: HasID a => a -> String
+uid a = printf "%s_%d_%s" (show $ book a) (tome a) (unpack $ name a)
+
+relativePath :: HasID a => a -> String -> FilePath
+relativePath a extension =
+  (show $ book a) </> (show $ tome a) </> (unpack $ name a) <.> extension
+
+{-
+instance Unique ID where
+  uid (ID {book, tome, name}) =
+    printf "%s_%d_%s" (show book) tome (unpack name)
+
+instance InFile ID where
+  relativePath (ID {book, tome, name}) extension =
+    show book </> show tome </> unpack name <.> extension
+-}
+
+instance DefaultHeader ID where
+  headerSection = Default [ "book", "tome", "name" ]
diff --git a/lib/GEODE/Metadata/Projector.hs b/lib/GEODE/Metadata/Projector.hs
index f70547c..3ea0b5c 100644
--- a/lib/GEODE/Metadata/Projector.hs
+++ b/lib/GEODE/Metadata/Projector.hs
@@ -1,21 +1,32 @@
 {-# LANGUAGE ConstraintKinds #-}
 module GEODE.Metadata.Projector
-  ( FromBook(..)
-  , HasAuthors(..) 
-  , HasDomains(..)
-  , InFile(..)
-  , TXMText
-  , Unique(..) ) where
-
-import GEODE.Metadata.Types (Authors(..), Book, Domains(..))
+  --( FromBook(..)
+  --, FromTome(..) 
+  ( HasAuthors(..) 
+  , HasDomains(..) ) where
+  --, InFile(..)
+  --, Named(..)
+  --, TXMText
+  --, Unique(..) ) where
+
+import GEODE.Metadata.Types (Authors(..), Domains(..))
+--import GEODE.Metadata.Types (Authors(..), Book, Domains(..))
 import Data.Text (Text)
 
+{-
 class Unique a where
   uid :: a -> String
 
 class FromBook a where
   book :: a -> Book
 
+class FromTome a where
+  tome :: a -> Int
+
+class Named a where
+  name :: a -> Text
+-}
+
 class HasAuthors a where
   authors_ :: a -> Authors
 
@@ -28,7 +39,9 @@ class HasDomains a where
   domains :: a -> [Text]
   domains = getDomains . domains_
 
+{-
 class InFile a where
-  relativePath :: a -> FilePath
+  relativePath :: a -> String -> FilePath
+-}
 
-type TXMText a = (Unique a, FromBook a, HasAuthors a, HasDomains a)
+--type TXMText a = (Unique a, FromBook a, HasAuthors a, HasDomains a)
diff --git a/lib/GEODE/Metadata/TSV.hs b/lib/GEODE/Metadata/TSV.hs
new file mode 100644
index 0000000..cd06047
--- /dev/null
+++ b/lib/GEODE/Metadata/TSV.hs
@@ -0,0 +1,63 @@
+{-# LANGUAGE ScopedTypeVariables, UndecidableInstances #-}
+module GEODE.Metadata.TSV
+  ( Default(..)
+  , DefaultHeader(..)
+  , readNamedTsv
+  , readTsv
+  , toTsv
+  , tsvFile
+  , tsvLines ) where
+
+import Data.ByteString.Char8 as StrictByteString (pack)
+import Data.ByteString.Lazy as ByteString (putStr, readFile, writeFile)
+import Data.Csv
+  ( DecodeOptions(..), EncodeOptions(..), FromNamedRecord, FromRecord
+  , HasHeader(..), ToNamedRecord(..), ToRecord(..), decodeByNameWith, decodeWith
+  , defaultEncodeOptions, encodeByNameWith, encodeWith, header )
+import Data.Foldable (toList)
+import Data.HashMap.Strict ((!))
+import Data.Vector (Vector, fromList)
+
+readNamedTsv :: FromNamedRecord a => FilePath -> IO (Either String (Vector a))
+readNamedTsv source =
+  (fmap snd . decodeByNameWith fromTsv) <$> ByteString.readFile source
+  where
+    fromTsv = DecodeOptions {decDelimiter = fromIntegral (fromEnum '\t')}
+
+readTsv :: FromRecord a => FilePath -> IO (Either String (Vector a))
+readTsv source = decodeWith fromTsv NoHeader <$> ByteString.readFile source
+  where
+    fromTsv = DecodeOptions {decDelimiter = fromIntegral (fromEnum '\t')}
+
+newtype Default a = Default { defaultHeader :: [String] }
+
+class DefaultHeader a where
+  headerSection :: Default a
+
+instance (DefaultHeader a, DefaultHeader b) => DefaultHeader (a, b) where
+  headerSection = Default (a ++ b)
+    where
+      Default a = (headerSection :: Default a)
+      Default b = (headerSection :: Default b)
+
+instance (DefaultHeader a, ToNamedRecord a) => ToRecord a where
+  toRecord = fromList . prepare . toNamedRecord
+    where
+      Default fields = (headerSection :: Default a)
+      prepare namedRecord = (namedRecord !) . StrictByteString.pack <$> fields
+
+toTsv :: EncodeOptions
+toTsv = defaultEncodeOptions
+        { encDelimiter = fromIntegral (fromEnum '\t')
+        , encUseCrLf = False }
+
+tsvFile :: forall a. (DefaultHeader a, ToNamedRecord a) => FilePath -> [a] -> IO ()
+tsvFile target =
+  ByteString.writeFile target
+  . encodeByNameWith toTsv (header $ StrictByteString.pack <$> fields)
+  where
+    Default fields = (headerSection :: Default a)
+
+tsvLines :: (Foldable t, ToRecord a) => t a -> IO ()
+tsvLines = ByteString.putStr . encodeWith toTsv . toList
+
-- 
GitLab