-
Alice Brenon authoredf8b70bc4
parallel-metadata.py 1.56 KiB
#!/usr/bin/env python3
import pandas
from sys import argv
def edda_parallel_line(row):
return ('edda_' + str(row.tomeEDdA) + '_' + str(row.article),
'EDdA/T' + str(row.tomeEDdA) + '/article' + str(row.article),
'EDdA',
row.tomeEDdA,
row.article,
row['head'],
row.author,
row.ENCCRE_domain)
def lge_parallel_line(row):
return ('lge_' + str(row.tomeLGE) + '_' + str(row.id),
'LGE/T' + str(row.tomeLGE) + '/' + str(row.id),
'LGE',
row.tomeLGE,
row.rankLGE,
row['head'],
'',
row.ENCCRE_domain)
def merge(edda_meta, edda_lge_mapping):
return pandas.merge(
edda_meta,
edda_lge_mapping,
how='inner',
on='head')
def output_metadata(input_meta, row_mapper):
output = input_meta.apply(row_mapper, result_type='expand', axis=1)
output.columns = ['uid', 'path', 'book', 'tome', 'rank', 'head', 'authors', 'domains']
return output
if __name__ == '__main__':
if len(argv) == 4:
parallel_meta = merge(pandas.read_csv(argv[1]), pandas.read_csv(argv[2]))
edda_output = f"{argv[3]}/edda_parallel_metadata.csv"
lge_output = f"{argv[3]}/lge_parallel_metadata.csv"
output_metadata(parallel_meta, edda_parallel_line).to_csv(edda_output, index=False)
output_metadata(parallel_meta, lge_parallel_line).to_csv(lge_output, index=False)
else:
print(f"Syntax: {argv[0]} EDdA_metadata EDdA_LGE_mapping OUTPUT_DIRECTORY")