Skip to content
Snippets Groups Projects
Commit d797a0d0 authored by Alice Brenon's avatar Alice Brenon
Browse files

Add module (geode packages encoding) containing various projects used to process input files

parent 334d866d
No related branches found
No related tags found
No related merge requests found
(define-module (geode packages encoding)
#:use-module ((gnu packages compression) #:select (unzip zip))
#:use-module ((gnu packages haskell-xyz) #:select (ghc-attoparsec
ghc-edit-distance
ghc-optparse-applicative
ghc-pipes
ghc-xml))
#:use-module ((guix build-system gnu) #:select (gnu-build-system))
#:use-module ((guix build-system haskell) #:select (hackage-uri haskell-build-system))
#:use-module ((guix download) #:select (url-fetch))
#:use-module ((guix git-download) #:select (git-fetch git-reference))
#:use-module ((guix licenses) #:select (bsd-3))
#:use-module ((guix packages) #:select (package origin base32)))
(define-public ghc-html5-entity
(package
(name "ghc-html5-entity")
(version "0.2.0.3")
(source (origin
(method url-fetch)
(uri (hackage-uri "html5-entity" version))
(sha256
(base32
"0bmmzshxanzw5y2y0hvgzz9yw18jqgv535i1xq2a5lf7w8wpj1if"))))
(build-system haskell-build-system)
(home-page "https://github.com/zudov/html5-entity/")
(synopsis "A library for looking up and validating HTML5 entities.")
(description
"This package provides a library for looking up and validating HTML5 entities.
The <http://html.spec.whatwg.org/multipage/entities.json following> document is
used as an authoritative source of the valid entity names and their
corresponding codepoints. You can think of this library as about bindings to
the data from that file. For usage see the Text.Html5.Entity module.")
(license bsd-3)))
(define-public ghc-xmlfilter
(package
(name "ghc-xmlfilter")
(version "0.3.1.0")
(home-page "https://gitlab.huma-num.fr/alicebrenon/XMLFilter")
(source (origin
(method git-fetch)
(uri (git-reference (url home-page) (commit version)))
(sha256
(base32
"1prsrhpzqx7wcbvzvmc4845f03bvxfazn3v2kfajnr1fnrcbb8nf"))))
(build-system haskell-build-system)
(inputs (list ghc-attoparsec))
(synopsis "SAX-based library to handle XML-like documents")
(description
"Reads the files as streams of markup events to allow altering the file
content on the fly and fixing broken files which aren't proper XML")
(license bsd-3)))
(define-public processing-lge
(package
(name "ProcessingLGE")
(version "0.1.0.0")
(home-page "https://gitlab.huma-num.fr/disco-lge/processinglge")
(source (origin
(method git-fetch)
(uri (git-reference (url home-page) (commit version)))
(sha256
(base32
"0hzlcwy2zdzgy5a2il19zk24159lwpgnbv4cqbi8jpxv5jwb0sww"))))
(build-system gnu-build-system)
(arguments
'(#:phases (modify-phases %standard-phases
(delete 'check))))
(propagated-inputs (list soprano unzip zip))
(synopsis "Scripts and data for project DISCO")
(description
"Scripts to process La Grande Encyclopédie and suggestions of filters to
clean the ALTO files.")
(license bsd-3)))
(define-public soprano
(package
(name "soprano")
(version "0.2.0.1")
(home-page "https://gitlab.huma-num.fr/disco-lge/soprano")
(source (origin
(method git-fetch)
(uri (git-reference (url home-page) (commit version)))
(sha256
(base32 "17njp16nn2lf7xldkdbkhm7p7vji61pmli3fsdbk9azys1k2yhvy"))))
(build-system haskell-build-system)
(inputs (list ghc-edit-distance ghc-pipes ghc-roman-numerals ghc-xml
ghc-optparse-applicative))
(synopsis "A tool to extract textual content from ALTO files")
(description
"Soprano is a tool developed for project GÉODE
(@xref{https://geode-project.github.io/}) which studies the geographic discourse
in encyclopedias. It was desiged to extract encyclopedia articles from OCRed
pages represented by a set of ALTO files.
It lets one apply various filtering in the process, for instance specifying the
type of ALTO blocks to retain or setting a quality threshold on OCR recognition.
Articles can be output in raw text or encoded in XML-TEI.")
(license bsd-3)))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment