Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
module Text.TEI
( (.=)
, corpusHeader
, publicationStmt
, sourceDesc
, teiHeader
, text
, title ) where
import Data.Metadata
( Book(..), FromBook(..), HasAuthors(..), HasDomains(..), TXMText, Unique(..)
, list )
import Text.XML.HXT.Core ( ArrowXml, XmlTree, aelem, attr, mkelem, selem, txt )
corpusHeader :: ArrowXml a => Book -> String -> a b XmlTree
corpusHeader EDdA = eddaHeader
corpusHeader LGE = lgeHeader
corpusHeader Wikipedia = wikiHeader
eddaHeader :: ArrowXml a => String -> a b XmlTree
eddaHeader strTome =
teiHeader
(titleStmt strTome ("Digitized" `by` "University of Chicago Library"
++ "Published" `by` "ARTFL"))
(corpusPublicationStmt)
(sourceDesc
[ title ("L'Encyclopédie T" <> strTome)
, selem "author" [ txt "Collective" ]
, selem "creation" [ selem "date" [ txt "1752" ] ] ])
lgeHeader :: ArrowXml a => String -> a b XmlTree
lgeHeader strTome =
teiHeader
(titleStmt strTome ("Digitized" `by` "Bibliothèque Nationale de France"))
(corpusPublicationStmt)
(sourceDesc
[ title ("La Grande Encyclopédie T" <> strTome)
, selem "author" [ txt "Collective" ]
, selem "creation" [ selem "date" [ txt "1885" ] ]
, selem "imprint" [ selem "date" [ txt "1885" ]
, selem "publisher" [ txt "H. Lamirault et Cie," ]
, selem "pubplace" [ txt "Paris" ] ]
, mkelem "biblScope" [ attr "unit" (txt "volume") ] [ txt strTome ] ])
wikiHeader :: ArrowXml a => String -> a b XmlTree
wikiHeader _ =
teiHeader
(titleStmt "Wikipédia, L'encyclopédie libre" [])
(corpusPublicationStmt)
(sourceDesc
[ selem "author" [ selem "orgName" [ txt "Wikimedia Foundation" ] ] ])
teiHeader :: ArrowXml a =>
a b XmlTree -> a b XmlTree -> a b XmlTree -> a b XmlTree
teiHeader aTitle aPublication aSource =
selem "teiHeader"
[ selem "fileDesc" [ aTitle, aPublication, aSource ] ]
titleStmt :: ArrowXml a => String -> [a b XmlTree] -> a b XmlTree
titleStmt tome resps =
selem "titleStmt"
[ title tome
, selem "respStmt" (resps ++ "Annotated and encoded" `by` "ICAR") ]
title :: ArrowXml a => String -> a b XmlTree
title titleContent = selem "title" [txt titleContent]
corpusPublicationStmt :: ArrowXml a => a b XmlTree
corpusPublicationStmt =
(publicationStmt
[selem "distributor"
[selem "orgName" [txt "Project GÉODE"]
,selem "address"
[selem "addrline" [txt "ICAR UMR 5191"]
,selem "addrline" [txt "ENS de Lyon"]]]])
publicationStmt :: ArrowXml a => [a b XmlTree] -> a b XmlTree
publicationStmt = selem "publicationStmt"
sourceDesc :: ArrowXml a => [a b XmlTree] -> a b XmlTree
sourceDesc biblContent =
selem "sourceDesc" [selem "bibl" biblContent]
by :: ArrowXml a => String -> String -> [a b XmlTree]
by resp orgName = [ selem "resp" [ txt (resp <> " by") ]
, selem "orgName" [ txt orgName ] ]
text :: (ArrowXml a, TXMText b) => String -> b -> a c XmlTree -> a c XmlTree
text unitName txmText body =
mkelem "text" metadata [ body, aelem "milestone" [ "unit" .= unitName ] ]
where
metadata =
[ "uid" .= uid txmText
, "book" .= show (book txmText)
, "author" .= list (authors txmText)
, "domains" .= list (domains txmText) ]
(.=) :: ArrowXml a => String -> String -> a b XmlTree
key .= value = attr key (txt value)