Skip to content
Snippets Groups Projects
TEIFile.py 1.72 KiB
from data_process.data_functions import read_tei

class TEIFile(object):
    def __init__(self, filename, textfilename):
        self.filename = filename
        self.soup = read_tei(filename)
        self._text = None
        self._Head = ''
        self._Objecttype = ''
        self._attribution = ''
        self._Class = ''
        self._normclass = ''
        self._englishclass = ''
        self._generatedclass = ''
        self._author = ''


        if self.soup.find('index', type='head'):
            self._Head = self.soup.find('index', type='head')['value']

        if self.soup.find('index', type='objecttype'):
            self._Objecttype = self.soup.find('index', type='objecttype')['value']


        if self.soup.find('index', type='attribution'):
            self._attribution = self.soup.find('index', type='attribution')['value']

        if self.soup.find('index', type='class') and self.soup.find('index', type='class').has_attr('value') :

            self._Class = self.soup.find('index', type='class')['value']

        if self.soup.find('index', type='normclass'):
            self._normclass =  self.soup.find('index', type='normclass')['value']

        if self.soup.find('index', type='englishclass'):
            self._englishclass = self.soup.find('index', type='englishclass')['value']

        if self.soup.find('index', type='generatedclass'):
            self._generatedclass = self.soup.find('index', type='generatedclass')['value']

        if self.soup.find('index', type = 'author'):
            self._author = self.soup.find('index', type='author')['value']



        ps = self.soup.find_all('p')
        Texts = []
        for p in ps[1:]:
            Texts.append(p.getText())

        self._text = ' '.join(Texts)