module Text.TEI ( (.=) , corpusHeader , publicationStmt , sourceDesc , teiHeader , text , title ) where import Data.Text (unpack) import GEODE.Metadata (Book(..), Contrastive(..), Has(..), PrimaryKey(..), formatList, uid) import Text.XML.HXT.Core (ArrowXml, XmlTree, aelem, attr, mkelem, selem, txt) corpusHeader :: ArrowXml a => Book -> String -> a b XmlTree corpusHeader EDdA = eddaHeader corpusHeader LGE = lgeHeader corpusHeader Wikipedia = wikiHeader eddaHeader :: ArrowXml a => String -> a b XmlTree eddaHeader strTome = teiHeader (titleStmt strTome ("Digitized" `by` "University of Chicago Library" ++ "Published" `by` "ARTFL")) (corpusPublicationStmt) (sourceDesc [ title ("L'Encyclopédie T" <> strTome) , selem "author" [ txt "Collective" ] , selem "creation" [ selem "date" [ txt "1752" ] ] ]) lgeHeader :: ArrowXml a => String -> a b XmlTree lgeHeader strTome = teiHeader (titleStmt strTome ("Digitized" `by` "Bibliothèque Nationale de France")) (corpusPublicationStmt) (sourceDesc [ title ("La Grande Encyclopédie T" <> strTome) , selem "author" [ txt "Collective" ] , selem "creation" [ selem "date" [ txt "1885" ] ] , selem "imprint" [ selem "date" [ txt "1885" ] , selem "publisher" [ txt "H. Lamirault et Cie," ] , selem "pubplace" [ txt "Paris" ] ] , mkelem "biblScope" [ attr "unit" (txt "volume") ] [ txt strTome ] ]) wikiHeader :: ArrowXml a => String -> a b XmlTree wikiHeader _ = teiHeader (titleStmt "Wikipédia, L'encyclopédie libre" []) (corpusPublicationStmt) (sourceDesc [ selem "author" [ selem "orgName" [ txt "Wikimedia Foundation" ] ] ]) teiHeader :: ArrowXml a => a b XmlTree -> a b XmlTree -> a b XmlTree -> a b XmlTree teiHeader aTitle aPublication aSource = selem "teiHeader" [ selem "fileDesc" [ aTitle, aPublication, aSource ] ] titleStmt :: ArrowXml a => String -> [a b XmlTree] -> a b XmlTree titleStmt tome resps = selem "titleStmt" [ title tome , selem "respStmt" (resps ++ "Annotated and encoded" `by` "ICAR") ] title :: ArrowXml a => String -> a b XmlTree title titleContent = selem "title" [txt titleContent] corpusPublicationStmt :: ArrowXml a => a b XmlTree corpusPublicationStmt = (publicationStmt [selem "distributor" [selem "orgName" [txt "Project GÉODE"] ,selem "address" [selem "addrline" [txt "ICAR UMR 5191"] ,selem "addrline" [txt "ENS de Lyon"]]]]) publicationStmt :: ArrowXml a => [a b XmlTree] -> a b XmlTree publicationStmt = selem "publicationStmt" sourceDesc :: ArrowXml a => [a b XmlTree] -> a b XmlTree sourceDesc biblContent = selem "sourceDesc" [selem "bibl" biblContent] by :: ArrowXml a => String -> String -> [a b XmlTree] by resp orgName = [ selem "resp" [ txt (resp <> " by") ] , selem "orgName" [ txt orgName ] ] type TXMText a = (Has Contrastive a, Has PrimaryKey a) text :: (ArrowXml a, TXMText b) => String -> b -> a c XmlTree -> a c XmlTree text unitName txmText body = mkelem "text" metadata [ body, aelem "milestone" [ "unit" .= unitName ] ] where metadata = [ "uid" .= uid txmText , "book" .= (show . book $ get txmText) , "author" .= (unpack . formatList . authors $ get txmText) , "domains" .= (unpack . formatList . domains $ get txmText) ] (.=) :: ArrowXml a => String -> String -> a b XmlTree key .= value = attr key (txt value)