diff --git a/scripts/ML/Corpus.py b/scripts/ML/Corpus.py index b01a7c8181c49a64ec9ef433b0011b154c7b0e8a..d2ea16d6d55da99723b053afc87cdf7dc12f8d7d 100644 --- a/scripts/ML/Corpus.py +++ b/scripts/ML/Corpus.py @@ -1,5 +1,6 @@ import pandas -import os +from os import makedirs +from os.path import dirname, isdir, isfile def abstract(f): def wrapped(*args, **kwargs): @@ -128,7 +129,7 @@ class Directory(TSVIndexed): def write_text(self, primary_key, content): path = self.path_to(primary_key) - os.makedirs(os.path.dirname(path), exist_ok=True) + makedirs(dirname(path), exist_ok=True) with open(path, 'w') as file: file.write(content) @@ -138,3 +139,11 @@ class Directory(TSVIndexed): for _, row in self.data.iterrows(): self.write_text(row, row[self.column_name]) self.data[self.keys].to_csv(self.tsv_path, sep='\t', index=False) + +def corpus(path): + if path[-1:] == '/': + return Directory(path) + elif path[-4:] == '.tsv': + return SelfContained(path) + else: + raise FileNotFoundError(path) diff --git a/scripts/ML/convert-corpus.py b/scripts/ML/convert-corpus.py index 98135d464ab54b81efe9eade5f297b30670788a1..a37fb2c0e623b07542f75a8127aed6cbe43352d9 100755 --- a/scripts/ML/convert-corpus.py +++ b/scripts/ML/convert-corpus.py @@ -1,15 +1,8 @@ #!/usr/bin/env python3 -import Corpus -from os.path import isdir +from Corpus import corpus import sys -def detect(path): - if isdir(path): - return Corpus.Directory(path) - else: - return Corpus.SelfContained(path) - if __name__ == '__main__': - source = detect(sys.argv[1]) - destination = detect(sys.argv[2]) + source = corpus(sys.argv[1]) + destination = corpus(sys.argv[2]) destination.save(source.get_all())