Skip to content
Snippets Groups Projects
Commit f7b5f829 authored by Alice Brenon's avatar Alice Brenon
Browse files

Exposing some basic stuff required by recent changes in EDdA-Clinic

parents
No related branches found
No related tags found
No related merge requests found
# Revision history for ghc-geode
## 0.1.0.0 -- 2023-05-23
* Exposing some basic stuff required by recent changes in [EDdA-Clinic](https://gitlab.huma-num.fr/alicebrenon/EDdAClinic)
This diff is collapsed.
cabal-version: 2.4
name: ghc-geode
version: 0.1.0.0
synopsis:
Data structures and tooling used in project GEODE
-- A longer description of the package.
-- description:
homepage: https://geode-project.github.io/
-- A URL where users can report bugs.
-- bug-reports:
license: GPL-3.0-or-later
license-file: LICENSE
author: Alice BRENON
maintainer: alice.brenon@ens-lyon.fr
-- A copyright notice.
-- copyright:
-- category:
extra-source-files: CHANGELOG.md
library
exposed-modules: GEODE.Metadata
-- Modules included in this library but not exported.
other-modules: GEODE.Metadata.Article
, GEODE.Metadata.Projector
, GEODE.Metadata.Types
-- LANGUAGE extensions used by modules in this package.
-- other-extensions:
build-depends: base >=4.16 && <4.17
, bytestring >= 0.11.3 && <0.12
, containers >= 0.6.5.1 && <0.7
, cassava >= 0.5.3 && <0.6
, text >= 1.2.5 && <1.3
, vector >= 0.12.3.1 && <0.13
hs-source-dirs: lib
default-language: Haskell2010
guix.scm 0 → 100644
(use-modules ((gnu packages haskell-xyz) #:select (ghc-cassava))
((guix build-system haskell) #:select (haskell-build-system))
((guix git-download) #:select (git-predicate))
((guix gexp) #:select (local-file))
((guix licenses) #:select (gpl3+))
((guix packages) #:select (origin package)))
(let
((%source-dir (dirname (current-filename))))
(package
(name "ghc-geode")
(version "0.1.0.0")
(source
(local-file %source-dir
#:recursive? #t
#:select? (git-predicate %source-dir)))
(build-system haskell-build-system)
(inputs (list ghc-cassava))
(home-page "https://gitlab.liris.cnrs.fr/geode/ghc-geode")
(synopsis "Data structures and tooling used in project GEODE")
(description
"Reads the files as streams of markup events to allow altering the file
content on the fly and fixing broken files which aren't proper XML")
(license gpl3+)))
{-# LANGUAGE NamedFieldPuns, OverloadedStrings #-}
module GEODE.Metadata (
Article(..)
, Authors(..)
, Book(..)
, Domains(..)
, FromBook(..)
, HasAuthors(..)
, HasDomains(..)
, InFile(..)
, TXMText
, Unique(..)
, groupBy
, list
, readTsv
, sortBy
, tsvFile
, tsvLines
) where
import Data.ByteString.Lazy as ByteString (putStr, readFile, writeFile)
import Data.ByteString.Char8 as StrictByteString (pack)
import Data.Csv
( DecodeOptions(..), EncodeOptions(..), FromRecord, HasHeader(..)
, ToNamedRecord, ToRecord, decodeWith, defaultEncodeOptions, encodeByNameWith
, encodeWith, header )
import Data.Foldable as Foldable (toList)
import Data.List (sortOn)
import Data.Map.Strict as Map (alter, empty, toList)
import Data.Text as Text (Text, intercalate, unpack)
import Data.Vector as Vector (Vector)
import GEODE.Metadata.Article
import GEODE.Metadata.Projector
(FromBook(..), HasAuthors(..), HasDomains(..), InFile(..), TXMText, Unique(..))
import GEODE.Metadata.Types (Authors(..), Book(..), Domains(..))
list :: [Text] -> String
list ts = Text.unpack $ ":" <> intercalate ":" ts <> ":"
readTsv :: FromRecord a => FilePath -> IO (Either String (Vector a))
readTsv source = decodeWith fromTsv HasHeader <$> ByteString.readFile source
where
fromTsv = DecodeOptions {decDelimiter = fromIntegral (fromEnum '\t')}
toTsv :: EncodeOptions
toTsv = defaultEncodeOptions
{ encDelimiter = fromIntegral (fromEnum '\t')
, encUseCrLf = False }
tsvFile :: ToNamedRecord a => FilePath -> [String] -> [a] -> IO ()
tsvFile target fields =
ByteString.writeFile target
. encodeByNameWith toTsv (header $ StrictByteString.pack <$> fields)
tsvLines :: ToRecord a => [a] -> IO ()
tsvLines = ByteString.putStr . encodeWith toTsv
sortBy :: (Foldable t, Ord k) => (a -> k) -> t a -> [a]
sortBy field = sortOn field . Foldable.toList
groupBy :: (Foldable t, Ord k) => (v -> k) -> t v -> [(k, [v])]
groupBy field = Map.toList . foldr group Map.empty
where
group article = Map.alter (Just . maybe [article] (article:)) (field article)
{-# LANGUAGE DeriveGeneric, NamedFieldPuns #-}
module GEODE.Metadata.Article
( Article(tome, name, headWord, rank, page) ) where
import Data.Csv (FromRecord(..), ToNamedRecord(..), ToRecord(..))
import GEODE.Metadata.Types (Book)
import GEODE.Metadata.Projector (InFile(..), Unique(..))
import qualified GEODE.Metadata.Projector as Projector (FromBook(..))
import Data.Text (Text, unpack)
import GHC.Generics (Generic)
import Text.Printf (printf)
data Article = Article
{ book :: Book
, tome :: Int
, name :: Text
, headWord :: Text
, rank :: Int
, page :: Int } deriving (Generic, Show)
instance FromRecord Article
instance ToRecord Article
instance ToNamedRecord Article
instance Unique Article where
uid (Article {book, tome, name}) =
printf "%s_%d_%s" (show book) tome (unpack name)
instance Projector.FromBook Article where
book = book
instance InFile Article where
relativePath (Article {book, tome, name}) =
printf "%s/T%d/%s" (show book) tome (unpack name)
{-# LANGUAGE ConstraintKinds #-}
module GEODE.Metadata.Projector
( FromBook(..)
, HasAuthors(..)
, HasDomains(..)
, InFile(..)
, TXMText
, Unique(..) ) where
import GEODE.Metadata.Types (Authors(..), Book, Domains(..))
import Data.Text (Text)
class Unique a where
uid :: a -> String
class FromBook a where
book :: a -> Book
class HasAuthors a where
authors_ :: a -> Authors
authors :: a -> [Text]
authors = getAuthors . authors_
class HasDomains a where
domains_ :: a -> Domains
domains :: a -> [Text]
domains = getDomains . domains_
class InFile a where
relativePath :: a -> FilePath
type TXMText a = (Unique a, FromBook a, HasAuthors a, HasDomains a)
{-# LANGUAGE OverloadedStrings #-}
module GEODE.Metadata.Types
( Authors(..)
, Book(..)
, Domains(..) ) where
import Control.Applicative (empty)
import Data.Csv (Field, FromField(..), Parser, ToField(..))
import Data.Text (Text, intercalate, splitOn)
data Book = EDdA | LGE | Wikipedia deriving (Eq, Ord, Read, Show)
newtype Authors = Authors
{ getAuthors :: [Text] } deriving Show
newtype Domains = Domains
{ getDomains :: [Text] } deriving Show
instance FromField Book where
parseField "EDdA" = pure EDdA
parseField "LGE" = pure LGE
parseField "Wikipedia" = pure Wikipedia
parseField _ = empty
instance ToField Book where
toField = toField . show
sepBy :: Text -> Field -> Parser [Text]
sepBy s = fmap (splitOn s) . parseField
instance FromField Authors where
parseField = fmap Authors . sepBy " & "
instance ToField Authors where
toField = toField . intercalate " & ". getAuthors
instance FromField Domains where
parseField = fmap Domains . sepBy " | "
instance ToField Domains where
toField = toField . intercalate " | ". getDomains
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment