Newer
Older
module Text.TEI
( (.=)
, corpusHeader
, publicationStmt
, sourceDesc
, teiHeader
, text
, title ) where
import Data.Text (unpack)
import GEODE.Metadata
(Book(..), Contrastive(..), Has(..), PrimaryKey(..), formatList, uid)
import Text.XML.HXT.Core (ArrowXml, XmlTree, aelem, attr, mkelem, selem, txt)
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
corpusHeader :: ArrowXml a => Book -> String -> a b XmlTree
corpusHeader EDdA = eddaHeader
corpusHeader LGE = lgeHeader
corpusHeader Wikipedia = wikiHeader
eddaHeader :: ArrowXml a => String -> a b XmlTree
eddaHeader strTome =
teiHeader
(titleStmt strTome ("Digitized" `by` "University of Chicago Library"
++ "Published" `by` "ARTFL"))
(corpusPublicationStmt)
(sourceDesc
[ title ("L'Encyclopédie T" <> strTome)
, selem "author" [ txt "Collective" ]
, selem "creation" [ selem "date" [ txt "1752" ] ] ])
lgeHeader :: ArrowXml a => String -> a b XmlTree
lgeHeader strTome =
teiHeader
(titleStmt strTome ("Digitized" `by` "Bibliothèque Nationale de France"))
(corpusPublicationStmt)
(sourceDesc
[ title ("La Grande Encyclopédie T" <> strTome)
, selem "author" [ txt "Collective" ]
, selem "creation" [ selem "date" [ txt "1885" ] ]
, selem "imprint" [ selem "date" [ txt "1885" ]
, selem "publisher" [ txt "H. Lamirault et Cie," ]
, selem "pubplace" [ txt "Paris" ] ]
, mkelem "biblScope" [ attr "unit" (txt "volume") ] [ txt strTome ] ])
wikiHeader :: ArrowXml a => String -> a b XmlTree
wikiHeader _ =
teiHeader
(titleStmt "Wikipédia, L'encyclopédie libre" [])
(corpusPublicationStmt)
(sourceDesc
[ selem "author" [ selem "orgName" [ txt "Wikimedia Foundation" ] ] ])
teiHeader :: ArrowXml a =>
a b XmlTree -> a b XmlTree -> a b XmlTree -> a b XmlTree
teiHeader aTitle aPublication aSource =
selem "teiHeader"
[ selem "fileDesc" [ aTitle, aPublication, aSource ] ]
titleStmt :: ArrowXml a => String -> [a b XmlTree] -> a b XmlTree
titleStmt tome resps =
selem "titleStmt"
[ title tome
, selem "respStmt" (resps ++ "Annotated and encoded" `by` "ICAR") ]
title :: ArrowXml a => String -> a b XmlTree
title titleContent = selem "title" [txt titleContent]
corpusPublicationStmt :: ArrowXml a => a b XmlTree
corpusPublicationStmt =
(publicationStmt
[selem "distributor"
[selem "orgName" [txt "Project GÉODE"]
,selem "address"
[selem "addrline" [txt "ICAR UMR 5191"]
,selem "addrline" [txt "ENS de Lyon"]]]])
publicationStmt :: ArrowXml a => [a b XmlTree] -> a b XmlTree
publicationStmt = selem "publicationStmt"
sourceDesc :: ArrowXml a => [a b XmlTree] -> a b XmlTree
sourceDesc biblContent =
selem "sourceDesc" [selem "bibl" biblContent]
by :: ArrowXml a => String -> String -> [a b XmlTree]
by resp orgName = [ selem "resp" [ txt (resp <> " by") ]
, selem "orgName" [ txt orgName ] ]
type TXMText a = (Has Contrastive a, Has PrimaryKey a)
text :: (ArrowXml a, TXMText b) => String -> b -> a c XmlTree -> a c XmlTree
text unitName txmText body =
mkelem "text" metadata [ body, aelem "milestone" [ "unit" .= unitName ] ]
where
metadata =
[ "uid" .= uid txmText
, "book" .= (show . book $ get txmText)
, "author" .= (unpack . formatList . authors $ get txmText)
, "domains" .= (unpack . formatList . domains $ get txmText)
, "subCorpus" .= (unpack . formatList . subCorpus $ get txmText) ]
(.=) :: ArrowXml a => String -> String -> a b XmlTree
key .= value = attr key (txt value)