{-# LANGUAGE DeriveGeneric, OverloadedStrings #-} module GEODE.Metadata.Contrastive ( Contrastive(..) , MultiText(..) , formatList ) where import Data.Csv ( DefaultOrdered(..), FromField(..), FromNamedRecord(..), ToNamedRecord(..) , ToField(..) ) import Data.Text (Text, intercalate, splitOn, uncons, unsnoc) import GHC.Generics (Generic) newtype MultiText = MultiText { getList :: [Text] } deriving (Show) -- | We represent fields with multiple values by ':'-surrounded values -- (including a leading and trailing ':') to be able to always write the same -- regex to match for their value (/.*:Géographie:.*/, no matter if it occurs at -- the begining, the end or in the middle of the field formatList :: MultiText -> Text formatList = colonFormat . getList where colonFormat ts = ":" <> intercalate ":" ts <> ":" instance FromField MultiText where parseField f = parseField f >>= checkAndSplit where popBoundaries t0 = do (firstChar,t1) <- uncons t0 (middle,lastChar) <- unsnoc t1 pure (firstChar,middle,lastChar) checkAndSplit t = case popBoundaries t of Just (':',"",':') -> pure $ MultiText [] Just (':',fields,':') -> pure.MultiText $ splitOn ":" fields _ -> mempty instance ToField MultiText where toField = toField . formatList data Contrastive = Contrastive { authors :: MultiText , domains :: MultiText , subCorpus :: MultiText } deriving (Generic, Show) instance DefaultOrdered Contrastive instance FromNamedRecord Contrastive instance ToNamedRecord Contrastive