Skip to content
Snippets Groups Projects
Commit cbb53ca3 authored by Alice Brenon's avatar Alice Brenon
Browse files

Add a script to retrieve a Simple train set from a Multi one

parent 7ccb5f7e
No related branches found
No related tags found
No related merge requests found
...@@ -57,14 +57,15 @@ class TSVIndexed(Corpus): ...@@ -57,14 +57,15 @@ class TSVIndexed(Corpus):
d[self.column_name] = self.content(key, row).strip() + '\n' d[self.column_name] = self.content(key, row).strip() + '\n'
return d return d
def get_all(self, projector=None): def get_all(self, projector=None, where=None):
if projector is None: if projector is None:
projector = self.full projector = self.full
elif type(projector) == str and projector in self.projectors: elif type(projector) == str and projector in self.projectors:
projector = self.__getattribute__(projector) projector = self.__getattribute__(projector)
self.load() self.load()
for row in self.data.iterrows(): for row in self.data.iterrows():
yield projector(*row) if where is None or where(*row):
yield projector(*row)
class SelfContained(TSVIndexed): class SelfContained(TSVIndexed):
""" """
......
#!/usr/bin/env python3
from Corpus import Directory, SelfContained
from GEODE import fromKey, toKey
import GEODE.discursive as discursive
from prodigyAcceptedJSONLToTSV import acceptedToTSV
from sys import argv
def isAccepted(key, row):
return row['answer'] == 'accept'
def withLabel(corpus, label):
return lambda key, row: dict(**corpus.full(key, row)
, paragraphFunction=label)
def simpleTrainOfMulti(multiDirectory, outputTSV):
annotations = []
for className in discursive.functions:
corpus = Directory(multiDirectory, tsv_filename=className)
p = withLabel(corpus, className)
annotations += list(corpus.get_all(projector=p, where=isAccepted))
output = SelfContained(outputTSV)
output.save(sorted(annotations, key=toKey))
if __name__ == '__main__':
simpleTrainOfMulti(argv[1], argv[2])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment