diff --git a/geode.cabal b/geode.cabal index ae999fca87c32e9d2fd0f23e12af88c7ea914263..bb713ea6313d69858da112de7f1050f06f9f9e58 100644 --- a/geode.cabal +++ b/geode.cabal @@ -23,6 +23,8 @@ extra-source-files: CHANGELOG.md library exposed-modules: GEODE.Metadata , GEODE.Metadata.Entry + , GEODE.Metadata.SplitContext + , GEODE.Metadata.TSV.Header , GEODE.Options -- Modules included in this library but not exported. diff --git a/lib/GEODE/Metadata.hs b/lib/GEODE/Metadata.hs index 99027885fff42f948ca834f5fb9d1220cf573ce7..5bdbfc464f96dd66ccc3ebcadaaf6dd26d2dea78 100644 --- a/lib/GEODE/Metadata.hs +++ b/lib/GEODE/Metadata.hs @@ -2,8 +2,10 @@ module GEODE.Metadata ( module PrimaryKey , module Entry - , module TSV , module Projector + , module SplitContext + , module TSV + , module TSV_Header , module Types , groupBy , indexBy @@ -16,9 +18,13 @@ import Data.Map.Strict as Map (Map, alter, empty, insert, toList) import Data.Text as Text (Text, intercalate, unpack) import GEODE.Metadata.PrimaryKey as PrimaryKey import GEODE.Metadata.Entry as Entry - ( Entry(headWord, name, page), entry, normalize ) + ( Entry(headWord, name, page), newEntry, normalize ) import GEODE.Metadata.Projector as Projector +import GEODE.Metadata.SplitContext as SplitContext hiding (page, rank) import GEODE.Metadata.TSV as TSV +import GEODE.Metadata.TSV.Header as TSV_Header + ( Concat(..), DefaultFields(..), Glue(..), HasDefaultHeader(..) + , HasDefaultHeader(..) ) import GEODE.Metadata.Types as Types list :: [Text] -> String diff --git a/lib/GEODE/Metadata/Entry.hs b/lib/GEODE/Metadata/Entry.hs index 3ca2186d527c3a552355a17f9705eb3f0b1f4367..fca0819fc0c39d5159364010da99745f3d895ad4 100644 --- a/lib/GEODE/Metadata/Entry.hs +++ b/lib/GEODE/Metadata/Entry.hs @@ -1,16 +1,15 @@ {-# LANGUAGE DeriveGeneric, FlexibleContexts, NamedFieldPuns, OverloadedStrings #-} module GEODE.Metadata.Entry ( Entry(..) - , HeadWords - , entry + , newEntry , normalize ) where -import Control.Monad.State (MonadState(..)) import Data.Char (isAlphaNum, isSpace, isUpper, toLower) import Data.Csv (FromNamedRecord(..), ToNamedRecord(..)) -import Data.Map as Map (Map, insertLookupWithKey) import Data.Text as Text (Text, concat, foldl', pack, snoc) -import GEODE.Metadata.TSV (Default(..), DefaultHeader(..)) +import GEODE.Metadata.SplitContext (Field(..), SplitContext(..), next) +import GEODE.Metadata.TSV.Header + (DefaultFields(..), HasDefaultHeader(..), HasDefaultHeader(..)) import GHC.Generics (Generic) data Entry = Entry @@ -21,8 +20,8 @@ data Entry = Entry instance FromNamedRecord Entry instance ToNamedRecord Entry -instance DefaultHeader Entry where - headerSection = Default [ "headWord", "name", "page" ] +instance HasDefaultHeader Entry where + defaultFields = DefaultFields [ "headWord", "name", "page" ] normalize :: Text -> Text normalize = Text.foldl' appendIf mempty @@ -33,12 +32,9 @@ normalize = Text.foldl' appendIf mempty | isAlphaNum newChar = tmpText `snoc` newChar | otherwise = tmpText `snoc` '-' -type HeadWords = Map Text Int - -entry :: MonadState HeadWords m => Text -> Int -> m Entry -entry headWord page = do - count <- maybe "0" (Text.pack . show) <$> state nextId - pure $ Entry { headWord, name = Text.concat [prefix, "-", count], page } +newEntry :: SplitContext m => Text -> m Entry +newEntry headWord = do + count <- Text.pack . show <$> next (HeadWord prefix) + Entry headWord (Text.concat [prefix, "-", count]) <$> get Page where - nextId = insertLookupWithKey (\_ _ n -> n+1) prefix 1 prefix = normalize headWord diff --git a/lib/GEODE/Metadata/PrimaryKey.hs b/lib/GEODE/Metadata/PrimaryKey.hs index dde111492f623886bc3db8fa867ce8170a59604d..385d66de8407b348a046669b399d2bdc59c6772b 100644 --- a/lib/GEODE/Metadata/PrimaryKey.hs +++ b/lib/GEODE/Metadata/PrimaryKey.hs @@ -2,14 +2,13 @@ module GEODE.Metadata.PrimaryKey ( HasPK(..) , PrimaryKey(..) - , headerSection , relativePath , uid ) where import Data.Csv (FromNamedRecord(..), ToNamedRecord(..)) -import Data.Text (Text, unpack) import GEODE.Metadata.Types (Book) -import GEODE.Metadata.TSV (Default(..), DefaultHeader(..)) +import GEODE.Metadata.TSV.Header + (DefaultFields(..), HasDefaultHeader(..), HasDefaultHeader(..)) import GHC.Generics (Generic) import System.FilePath ((</>), (<.>)) import Text.Printf (printf) @@ -42,5 +41,5 @@ relativePath a extension = where PrimaryKey {book, tome, rank} = pKey a -instance DefaultHeader PrimaryKey where - headerSection = Default [ "book", "tome", "rank" ] +instance HasDefaultHeader PrimaryKey where + defaultFields = DefaultFields [ "book", "tome", "rank" ] diff --git a/lib/GEODE/Metadata/SplitContext.hs b/lib/GEODE/Metadata/SplitContext.hs new file mode 100644 index 0000000000000000000000000000000000000000..7ea4d6e7089929f1a82d86c9098accb8cc024995 --- /dev/null +++ b/lib/GEODE/Metadata/SplitContext.hs @@ -0,0 +1,87 @@ +{-# LANGUAGE DeriveFunctor, GADTs #-} +module GEODE.Metadata.SplitContext + ( Current(..) + , Field(..) + , HeadWords + , SplitContext(..) + , SplitContextT(..) + , evalSplit + , evalSplitT + , next + , runSplit + , runSplitT ) where + +import Control.Monad.Identity (Identity(..)) +import Data.Map as Map (Map, (!?), delete, empty, insert) +import Data.Text (Text) + +type HeadWords = Map Text Int +data Current = Current { page :: Int, rank :: Int, headWords :: HeadWords } +data Field a where + Page :: Field Int + Rank :: Field Int + HeadWord :: Text -> Field (Maybe Int) + +class Monad m => SplitContext m where + context :: m Current + update :: (Current -> Current) -> m () + + get :: Field a -> m a + get Page = page <$> context + get Rank = rank <$> context + get (HeadWord h) = ((!? h) . headWords) <$> context + + set :: Field a -> a -> m () + set Page p = update $ \c -> c { page = p } + set Rank r = update $ \c -> c { rank = r } + set (HeadWord h) Nothing = update $ \c -> c { headWords = delete h $ headWords c } + set (HeadWord h) (Just n) = update $ \c -> c { headWords = insert h n $ headWords c } + + editLookup :: Field a -> (a -> a) -> m a + editLookup field f = get field >>= (\a -> set field a *> pure a) . f + + lookupEdit :: Field a -> (a -> a) -> m a + lookupEdit field f = get field >>= \a -> set field (f a) *> pure a + + edit :: Field a -> (a -> a) -> m () + edit field = (() <$) . editLookup field + + {-# MINIMAL context, update #-} + +newtype SplitContextT m a = + SplitContextT { runWithContext :: Current -> m (a, Current) } + deriving (Functor) + +instance Monad m => Applicative (SplitContextT m) where + pure a = SplitContextT $ \current -> pure (a, current) + sCF <*> sCA = SplitContextT $ \current0 -> do + (f, current1) <- runWithContext sCF current0 + (a, current2) <- runWithContext sCA current1 + pure (f a, current2) + +instance Monad m => Monad (SplitContextT m) where + sCA >>= f = SplitContextT $ \current0 -> do + (a, current1) <- runWithContext sCA current0 + runWithContext (f a) current1 + +instance Monad m => SplitContext (SplitContextT m) where + context = SplitContextT $ \current -> pure (current, current) + update f = SplitContextT $ \current -> pure ((), f current) + +next :: SplitContext m => Field a -> m Int +next f@(HeadWord _) = lookupEdit f (Just . maybe 1 (+1)) >>= pure . maybe 0 id +next Page = lookupEdit Page (+1) +next Rank = lookupEdit Rank (+1) + +runSplitT :: SplitContextT m a -> m (a, Current) +runSplitT = flip runWithContext $ + Current { page = 1, rank = 1, headWords = Map.empty } + +evalSplitT :: Functor m => SplitContextT m a -> m a +evalSplitT = fmap fst . runSplitT + +runSplit :: SplitContextT Identity a -> (a, Current) +runSplit = runIdentity . runSplitT + +evalSplit :: SplitContextT Identity a -> a +evalSplit = fst . runSplit diff --git a/lib/GEODE/Metadata/TSV.hs b/lib/GEODE/Metadata/TSV.hs index e8e84d2c7fd59407ca396b6e0a225483c7afa089..87766c7452ffc494588f6f3dc7c53be7da70e62e 100644 --- a/lib/GEODE/Metadata/TSV.hs +++ b/lib/GEODE/Metadata/TSV.hs @@ -1,24 +1,19 @@ -{-# LANGUAGE DeriveGeneric, FlexibleInstances, ScopedTypeVariables, UndecidableInstances #-} +{-# LANGUAGE FlexibleInstances, ScopedTypeVariables #-} module GEODE.Metadata.TSV - ( Default(..) - , DefaultHeader(..) - , Glue(..) - , Sections(..) - , readNamedTsv + ( readNamedTsv , readTsv , toTsv , tsvFile , tsvLines ) where -import Data.ByteString.Char8 as StrictByteString (pack) import Data.ByteString.Lazy as ByteString (putStr, readFile, writeFile) import Data.Csv ( DecodeOptions(..), EncodeOptions(..), FromNamedRecord(..), FromRecord - , HasHeader(..), ToNamedRecord(..), ToRecord(..), decodeByNameWith, decodeWith - , defaultEncodeOptions, encodeByNameWith, encodeWith, header ) + , HasHeader(..), ToNamedRecord(..), ToRecord(..), decodeByNameWith + , decodeWith, defaultEncodeOptions, encodeByNameWith, encodeWith ) import Data.Foldable (toList) -import Data.HashMap.Strict ((!), union) -import Data.Vector (Vector, fromList) +import Data.Vector (Vector) +import GEODE.Metadata.TSV.Header (HasDefaultHeader, getHeader, for) readNamedTsv :: FromNamedRecord a => FilePath -> IO (Either String (Vector a)) readNamedTsv source = @@ -31,53 +26,14 @@ readTsv source = decodeWith fromTsv NoHeader <$> ByteString.readFile source where fromTsv = DecodeOptions {decDelimiter = fromIntegral (fromEnum '\t')} -newtype Default a = Default { defaultHeader :: [String] } - -class DefaultHeader a where - headerSection :: Default a - -data Glue a b = Glue a b -newtype Sections a = Sections a - -instance (DefaultHeader a, DefaultHeader b) => DefaultHeader (Glue a b) where - headerSection = Default (a ++ b) - where - Default a = (headerSection :: Default a) - Default b = (headerSection :: Default b) - -instance (DefaultHeader a, DefaultHeader b) => DefaultHeader (Sections (a, b)) where - headerSection = Default (a ++ b) - where - Default a = (headerSection :: Default a) - Default b = (headerSection :: Default b) - -instance (ToNamedRecord a, ToNamedRecord b) => ToNamedRecord (Glue a b) where - toNamedRecord (Glue a b) = union (toNamedRecord a) (toNamedRecord b) - -instance (ToNamedRecord a, ToNamedRecord b) => ToNamedRecord (Sections (a, b)) where - toNamedRecord (Sections (a, b)) = union (toNamedRecord a) (toNamedRecord b) - -instance (FromNamedRecord a, FromNamedRecord b) => FromNamedRecord (Glue a b) where - parseNamedRecord nr = Glue <$> parseNamedRecord nr <*> parseNamedRecord nr - -instance (DefaultHeader a, ToNamedRecord a) => ToRecord a where - toRecord = fromList . prepare . toNamedRecord - where - Default fields = (headerSection :: Default a) - prepare namedRecord = (namedRecord !) . StrictByteString.pack <$> fields - toTsv :: EncodeOptions toTsv = defaultEncodeOptions { encDelimiter = fromIntegral (fromEnum '\t') , encUseCrLf = False } -tsvFile :: forall a. (DefaultHeader a, ToNamedRecord a) => FilePath -> [a] -> IO () +tsvFile :: forall a. (HasDefaultHeader a, ToNamedRecord a) => FilePath -> [a] -> IO () tsvFile target = - ByteString.writeFile target - . encodeByNameWith toTsv (header $ StrictByteString.pack <$> fields) - where - Default fields = (headerSection :: Default a) + ByteString.writeFile target . encodeByNameWith toTsv (getHeader (for :: a)) tsvLines :: (Foldable t, ToRecord a) => t a -> IO () tsvLines = ByteString.putStr . encodeWith toTsv . toList - diff --git a/lib/GEODE/Metadata/TSV/Header.hs b/lib/GEODE/Metadata/TSV/Header.hs new file mode 100644 index 0000000000000000000000000000000000000000..bfb43c770f5ffcab7d06d5fe280526762d31e7ec --- /dev/null +++ b/lib/GEODE/Metadata/TSV/Header.hs @@ -0,0 +1,56 @@ +{-# LANGUAGE FlexibleInstances, ScopedTypeVariables, UndecidableInstances #-} +module GEODE.Metadata.TSV.Header + ( DefaultFields(..) + , Glue(..) + , HasDefaultHeader(..) + , Concat(..) + , WithDefaultHeader(..) + , for + , getHeader ) where + +import Data.ByteString.Char8 as StrictByteString (pack) +import Data.Csv (FromNamedRecord(..), Header, ToNamedRecord(..), ToRecord(..)) +import Data.HashMap.Strict ((!), union) +import Data.Vector (fromList) + +newtype WithDefaultHeader a = WithDefaultHeader a +newtype DefaultFields a = DefaultFields [String] +class HasDefaultHeader a where + defaultFields :: DefaultFields a + +instance (HasDefaultHeader a, HasDefaultHeader b) => + HasDefaultHeader (Glue a b) where + defaultFields = DefaultFields (a ++ b) + where + DefaultFields a = (defaultFields :: DefaultFields a) + DefaultFields b = (defaultFields :: DefaultFields b) + +instance (HasDefaultHeader a, HasDefaultHeader b) => + HasDefaultHeader (Concat (a, b)) where + defaultFields = DefaultFields (a ++ b) + where + DefaultFields a = (defaultFields :: DefaultFields a) + DefaultFields b = (defaultFields :: DefaultFields b) + +getHeader :: forall a. HasDefaultHeader a => a -> Header +getHeader _ = StrictByteString.pack <$> fromList fields + where + DefaultFields fields = (defaultFields :: DefaultFields a) + +for :: HasDefaultHeader a => a +for = undefined + +data Glue a b = Glue a b +newtype Concat a = Concat a + +instance (ToNamedRecord a, ToNamedRecord b) => ToNamedRecord (Glue a b) where + toNamedRecord (Glue a b) = union (toNamedRecord a) (toNamedRecord b) + +instance (ToNamedRecord a, ToNamedRecord b) => ToNamedRecord (Concat (a, b)) where + toNamedRecord (Concat (a, b)) = union (toNamedRecord a) (toNamedRecord b) + +instance (FromNamedRecord a, FromNamedRecord b) => FromNamedRecord (Glue a b) where + parseNamedRecord nr = Glue <$> parseNamedRecord nr <*> parseNamedRecord nr + +instance (HasDefaultHeader a, ToNamedRecord a) => ToRecord (WithDefaultHeader a) where + toRecord (WithDefaultHeader a) = (toNamedRecord a !) <$> getHeader a diff --git a/test/GEODE/Metadata/TestEntry.hs b/test/GEODE/Metadata/TestEntry.hs index 3faadd7616f9e3ef242f06044cbdb86648cb8d91..ec7aa2c047dd48c732ceb9e30af901c956e5513b 100644 --- a/test/GEODE/Metadata/TestEntry.hs +++ b/test/GEODE/Metadata/TestEntry.hs @@ -4,11 +4,13 @@ module GEODE.Metadata.TestEntry (testEntry) where import Control.Monad.State (evalState) import Data.ByteString.Char8 as BS (pack) import Data.Csv (ToNamedRecord(..)) +import Data.Foldable (toList) import Data.HashMap.Strict ((!?)) import Data.Map as Map (empty) import Data.Text (dropEnd) -import GEODE.Metadata (Default(..), DefaultHeader(..), entry, normalize) +import GEODE.Metadata (evalSplit, newEntry, normalize) import GEODE.Metadata.Entry (Entry(..)) +import GEODE.Metadata.TSV.Header (getHeader, for) import Test.HUnit (Test(..), (~?=)) import Test.HUnit.Extra (isJust) @@ -21,10 +23,9 @@ testToNamedRecord = TestLabel "Testing ToNamedRecord instance" $ TestList [ has3Keys, validDefaultHeader ] where has3Keys = length aNamedRecord ~?= 3 - validDefaultHeader = - TestList ((isJust . (aNamedRecord !?) . BS.pack) <$> defaultHeader) + validDefaultHeader = TestList . toList $ + (isJust . (aNamedRecord !?)) <$> getHeader (for :: Entry) aNamedRecord = toNamedRecord anEntry - Default defaultHeader = (headerSection :: Default Entry) testNormalize :: Test testNormalize = TestLabel "Testing function normalize" . TestList $ @@ -40,10 +41,10 @@ testEntryConstructor = TestLabel "Testing the entry constructor" . TestList $ , a1 ~?= "a-1" , b0 ~?= "b-0" ] where - (a0, a1, b0) = flip evalState Map.empty $ (,,) - <$> (name <$> entry "A" 1) - <*> (name <$> entry "A" 1) - <*> (name <$> entry "B" 2) + (a0, a1, b0) = evalSplit $ (,,) + <$> (name <$> newEntry "A") + <*> (name <$> newEntry "A") + <*> (name <$> newEntry "B") anEntry :: Entry anEntry = Entry "ALCALA DE HÉNARÈS" "alcaladehénarès-0" 1212 diff --git a/test/GEODE/Metadata/TestPrimaryKey.hs b/test/GEODE/Metadata/TestPrimaryKey.hs index aa32db01300e10a47253c708395f8911bfe2b0b3..62c9662db9ead2e23e8f4fc9075d63c254c97af5 100644 --- a/test/GEODE/Metadata/TestPrimaryKey.hs +++ b/test/GEODE/Metadata/TestPrimaryKey.hs @@ -1,10 +1,11 @@ {-# LANGUAGE OverloadedStrings #-} module GEODE.Metadata.TestPrimaryKey (testPrimaryKey) where -import Data.ByteString.Char8 as BS (pack) import Data.Csv (ToNamedRecord(..)) +import Data.Foldable (toList) import Data.HashMap.Strict ((!?)) -import GEODE.Metadata (Book(..), Default(..), DefaultHeader(..), PrimaryKey(..)) +import GEODE.Metadata (Book(..), PrimaryKey(..)) +import GEODE.Metadata.TSV.Header (getHeader, for) import Test.HUnit (Test(..), (~?=)) import Test.HUnit.Extra (isJust) @@ -17,10 +18,9 @@ testToNamedRecord = TestLabel "Testing ToNamedRecord instance" $ TestList [ has3Keys, validDefaultHeader ] where has3Keys = length aNamedRecord ~?= 3 - validDefaultHeader = - TestList ((isJust . (aNamedRecord !?) . BS.pack) <$> defaultHeader) + validDefaultHeader = TestList . toList $ + (isJust . (aNamedRecord !?)) <$> getHeader (for :: PrimaryKey) aNamedRecord = toNamedRecord aPrimaryKey - Default defaultHeader = (headerSection :: Default PrimaryKey) aPrimaryKey :: PrimaryKey aPrimaryKey = PrimaryKey LGE 1 1212 -- ALCALA DE HÉNARÈS diff --git a/test/Main b/test/Main new file mode 100755 index 0000000000000000000000000000000000000000..23ac0dc919d62b7ef40a3873297c16a3620c54b3 Binary files /dev/null and b/test/Main differ diff --git a/test/Main.hs b/test/Main.hs index 38a4f8a4a9194b2b9d3883d905cbd10b5cbdc377..39fad5f72f73bf381c724aa55dc00b48331bbe01 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -1,6 +1,6 @@ module Main (main) where -import Test.HUnit (Counts(..), Test(..), runTestTT, showCounts) +import Test.HUnit (Test(..), runTestTTAndExit) import GEODE.Metadata.TestPrimaryKey (testPrimaryKey) import GEODE.Metadata.TestEntry (testEntry) import System.Exit (exitFailure, exitSuccess) @@ -10,9 +10,4 @@ testMetadata = TestLabel "Metadata suite" $ TestList [ testPrimaryKey, testEntry ] main :: IO () -main = do - result <- runTestTT testMetadata - putStr $ showCounts result - if (errors result == 0) && (failures result == 0) - then exitSuccess - else exitFailure +main = runTestTTAndExit testMetadata