-
Khalleud authoreddb8395ca
TEIFile.py 1.72 KiB
from data_process.data_functions import read_tei
class TEIFile(object):
def __init__(self, filename, textfilename):
self.filename = filename
self.soup = read_tei(filename)
self._text = None
self._Head = ''
self._Objecttype = ''
self._attribution = ''
self._Class = ''
self._normclass = ''
self._englishclass = ''
self._generatedclass = ''
self._author = ''
if self.soup.find('index', type='head'):
self._Head = self.soup.find('index', type='head')['value']
if self.soup.find('index', type='objecttype'):
self._Objecttype = self.soup.find('index', type='objecttype')['value']
if self.soup.find('index', type='attribution'):
self._attribution = self.soup.find('index', type='attribution')['value']
if self.soup.find('index', type='class') and self.soup.find('index', type='class').has_attr('value') :
self._Class = self.soup.find('index', type='class')['value']
if self.soup.find('index', type='normclass'):
self._normclass = self.soup.find('index', type='normclass')['value']
if self.soup.find('index', type='englishclass'):
self._englishclass = self.soup.find('index', type='englishclass')['value']
if self.soup.find('index', type='generatedclass'):
self._generatedclass = self.soup.find('index', type='generatedclass')['value']
if self.soup.find('index', type = 'author'):
self._author = self.soup.find('index', type='author')['value']
ps = self.soup.find_all('p')
Texts = []
for p in ps[1:]:
Texts.append(p.getText())
self._text = ' '.join(Texts)