title={A {Formal} {Model} of {Dictionary} {Structure} and {Content}},
copyright={Assumed arXiv.org perpetual, non-exclusive license to distribute this article for submissions made before January 2004},
url={https://arxiv.org/abs/0707.3270},
doi={10.48550/ARXIV.0707.3270},
abstract={We show that a general model of lexical information conforms to an abstract model that reflects the hierarchy of information found in a typical dictionary entry. We show that this model can be mapped into a well-formed XML document, and how the XSL transformation language can be used to implement a semantics defined over the abstract model to enable extraction and manipulation of the information in any format.},
urldate={2023-05-31},
author={Romary, Laurent and Ide, Nancy and Kilgarriff, Adam},
year={2007},
keywords={Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
}
@article{tei_consortium_tei_2023,
title={{TEI} {P5}: {Guidelines} for {Electronic} {Text} {Encoding} and {Interchange}},
copyright={Creative Commons Attribution 3.0 Unported, Open Access},
shorttitle={{TEI} {P5}},
url={https://zenodo.org/record/3413524},
doi={10.5281/ZENODO.3413524},
abstract={Release 4.6.0 is codenamed ‘The Peace Release’. This release introduces new features and resolves a number of issues raised by the TEI community. As always, the majority of these changes and corrections are a consequence of feature requests or bugs reported by the TEI community using the GitHub tracking system. A full list of the issues resolved in the course of this release cycle may be found under the 4.6.0 milestone. The ‘Peace Release’ delivers a new Roma v1.0.0, available at https://roma.tei-c.org/. TEI Roma now has a localized user interfaces for English, Spanish, German, French, Italian, and Japanese, with thanks to translators listed on the home page. The original Roma remains available at http://romaantiqua.tei-c.org/. The following changes are particularly worth highlighting in this release: New encoding features The calendar element is now a member of att.typed, giving it the type and subtype attributes (\#2396). The lg element may now contain any of the members of model.pPart.transcriptional, including supplied, redo, and damage (\#2242). The gender attribute has been added to the elements in model.castItem.part and the sex attribute to the actor element for the encoding of cast lists in performance texts (\#2400). vColl elements may now self-nest, so that the TEI specifications match the Guidelines prose (2034). Changes to content models It is anticippated that in a future relase the content model of the content element will require that it have one and only one child element (\#2381). Thus a Schematron warning for ODD writers about this upcoming change has been added to this release (PR \#2410). Changes to classes The model.glossLike class (which contained altIdent, equiv, and gloss) has been renamed to model.identSynonyms. Improvements of prose and examples The biblStruct element now has a new example featuring the use of a type attribute (\#1773). An outdated link in an example in att.global was updated with a new example from a current archive, and the Bibliography was updated to reflect the change (\#2366). A misrepresented source for an example in the Manuscript Description chapter was corrected (\#2372). To improve consistency, many desc elements were converted to gloss elements and vice versa, and underscores were removed from glosses in several element specifications (PR \#2388). The explanation of the attribute ns has been clarified in att.namespaceable to indicate more precisely that it refers to an element being defined (\#2395). In addition, several typos, unclear expressions, and faulty encodings have been corrected (\#2252, \#2403, \#2387, \#2225, PR \#2375, \#2356). Housekeeping The TEI oXygen framework has been updated to accomodate the fact that versions 25+ of oXygen use Saxon 11 (\#2251) The title elements with ref attributes used to refer from the Guidelines to some external standards have been replaced with ref elements with target attributes both for internal consistency of encoding and to ensure their values are properly processed as links (\#2368). In addition, many improvements have been made to the XSLT stylesheets (which provide processing of TEI ODD files for Roma and TEIGarage as well as other TEI conversions). The Stylesheets are maintained separately from the Guidelines at https://github.com/TEIC/Stylesheets. A full list of the issues resolved in the course of this release cycle may be found under the 7.55.0 milestone.},
language={en},
urldate={2023-05-31},
author={{TEI Consortium}},
month=apr,
year={2023},
keywords={Text Encoding, Digital Humanities, Text Encoding Initiative},
title={{TEI}-{Lex0} guidelines for the encoding of dictionary information on written and spoken forms},
url={https://inria.hal.science/hal-01757108},
abstract={T},
language={en},
urldate={2023-05-31},
author={Bański, Piotr and Bowers, Jack and Erjavec, Tomaz},
month=sep,
year={2017},
}
@article{bowers_bridging_2018,
title={Bridging the {Gaps} between {Digital} {Humanities}, {Lexicography}, and {Linguistics}: {A} {TEI} {Dictionary} for the {Documentation} of {Mixtepec}-{Mixtec}},
volume={39},
shorttitle={Bridging the {Gaps} between {Digital} {Humanities}, {Lexicography}, and {Linguistics}},
url={https://inria.hal.science/hal-01968871},
abstract={This paper discusses the digital dictionary component in an ongoing language documentation project for the Mixtepec-Mixtec language (iso 639-3: mix). Mixtepec-Mixtec (Sa'an Savi 'rain language') is an Oto-monguean language spoken by roughly 9,000-10,000 people in the Juxtlahuaca district of Oaxaca Mexico. Creating a digital dictionary for an under-resourced language entails a number of challenges that require unique and nuanced encoding solutions in which a delicate balance between the linguistic content, data structure, potential linked resources, and editorial metadata must be found. Herein we demonstrate how we use TEI to create a reusable, extensible, and machine readable language resource with an emphasis on how our solutions using a combination of novel and established TEI dictionary structures enable us to address our specific needs for Mixtepec-Mixtec and also provide a relevant roadmap for similar under-resourced language projects.},
language={en},
number={2},
urldate={2023-05-31},
journal={Dictionaries: Journal of the Dictionary Society of North America},
author={Bowers, Jack and Romary, Laurent},
year={2018},
pages={79},
}
@article{ide_background_1998,
author={Ide, Nancy and Vronis, Jean},
year={1998},
month={06},
pages={},
title={Background and context for the development of a Corpus Encoding Standard}
}
@article{ide_extracting_2000,
author={Ide, Nancy and Vronis, Jean},
year={2000},
month={06},
pages={},
title={Extracting Knowledge Bases From Machinereadable Dictionaries : Have We Wasted Our Time?}
}
@article{ide_encoding_1995,
title={Encoding dictionaries},
volume={29},
issn={1572-8412},
url={https://doi.org/10.1007/BF01830710},
doi={10.1007/BF01830710},
abstract={This article describes the major problems in devising a TEI encoding format for dictionaries, which, because of their high degree of structuring and compression of information, are among the most complex text types treated in the TEI. The major problems for this task were (1) the tension between generality of the description, in order to be widely applicable across dictionaries, and descriptive power, that is, the ability to describe with precision the particular structure of any given dictionary; and (2) the need to accommodate different views and uses of the encoded dictionary, for example, as printed object and as a database of information.},