-
Alice Brenon authoredec41aafb
TEI.hs 3.51 KiB
module Text.TEI
( (.=)
, corpusHeader
, publicationStmt
, sourceDesc
, teiHeader
, text
, title ) where
import Data.Text (unpack)
import GEODE.Metadata
(Book(..), Contrastive(..), Has(..), PrimaryKey(..), formatList, uid)
import Text.XML.HXT.Core (ArrowXml, XmlTree, aelem, attr, mkelem, selem, txt)
corpusHeader :: ArrowXml a => Book -> String -> a b XmlTree
corpusHeader EDdA = eddaHeader
corpusHeader LGE = lgeHeader
corpusHeader Wikipedia = wikiHeader
eddaHeader :: ArrowXml a => String -> a b XmlTree
eddaHeader strTome =
teiHeader
(titleStmt strTome ("Digitized" `by` "University of Chicago Library"
++ "Published" `by` "ARTFL"))
(corpusPublicationStmt)
(sourceDesc
[ title ("L'Encyclopédie T" <> strTome)
, selem "author" [ txt "Collective" ]
, selem "creation" [ selem "date" [ txt "1752" ] ] ])
lgeHeader :: ArrowXml a => String -> a b XmlTree
lgeHeader strTome =
teiHeader
(titleStmt strTome ("Digitized" `by` "Bibliothèque Nationale de France"))
(corpusPublicationStmt)
(sourceDesc
[ title ("La Grande Encyclopédie T" <> strTome)
, selem "author" [ txt "Collective" ]
, selem "creation" [ selem "date" [ txt "1885" ] ]
, selem "imprint" [ selem "date" [ txt "1885" ]
, selem "publisher" [ txt "H. Lamirault et Cie," ]
, selem "pubplace" [ txt "Paris" ] ]
, mkelem "biblScope" [ attr "unit" (txt "volume") ] [ txt strTome ] ])
wikiHeader :: ArrowXml a => String -> a b XmlTree
wikiHeader _ =
teiHeader
(titleStmt "Wikipédia, L'encyclopédie libre" [])
(corpusPublicationStmt)
(sourceDesc
[ selem "author" [ selem "orgName" [ txt "Wikimedia Foundation" ] ] ])
teiHeader :: ArrowXml a =>
a b XmlTree -> a b XmlTree -> a b XmlTree -> a b XmlTree
teiHeader aTitle aPublication aSource =
selem "teiHeader"
[ selem "fileDesc" [ aTitle, aPublication, aSource ] ]
titleStmt :: ArrowXml a => String -> [a b XmlTree] -> a b XmlTree
titleStmt tome resps =
selem "titleStmt"
[ title tome
, selem "respStmt" (resps ++ "Annotated and encoded" `by` "ICAR") ]
title :: ArrowXml a => String -> a b XmlTree
title titleContent = selem "title" [txt titleContent]
corpusPublicationStmt :: ArrowXml a => a b XmlTree
corpusPublicationStmt =
(publicationStmt
[selem "distributor"
[selem "orgName" [txt "Project GÉODE"]
,selem "address"
[selem "addrline" [txt "ICAR UMR 5191"]
,selem "addrline" [txt "ENS de Lyon"]]]])
publicationStmt :: ArrowXml a => [a b XmlTree] -> a b XmlTree
publicationStmt = selem "publicationStmt"
sourceDesc :: ArrowXml a => [a b XmlTree] -> a b XmlTree
sourceDesc biblContent =
selem "sourceDesc" [selem "bibl" biblContent]
by :: ArrowXml a => String -> String -> [a b XmlTree]
by resp orgName = [ selem "resp" [ txt (resp <> " by") ]
, selem "orgName" [ txt orgName ] ]
type TXMText a = (Has Contrastive a, Has PrimaryKey a)
text :: (ArrowXml a, TXMText b) => String -> b -> a c XmlTree -> a c XmlTree
text unitName txmText body =
mkelem "text" metadata [ body, aelem "milestone" [ "unit" .= unitName ] ]
where
metadata =
[ "uid" .= uid txmText
, "book" .= (show . book $ get txmText)
, "author" .= (unpack . formatList . authors $ get txmText)
, "domains" .= (unpack . formatList . domains $ get txmText)
, "subCorpus" .= (unpack . formatList . subCorpus $ get txmText) ]
(.=) :: ArrowXml a => String -> String -> a b XmlTree
key .= value = attr key (txt value)