Skip to content
Snippets Groups Projects
Commit 6b612e5e authored by Ludovic Moncla's avatar Ludovic Moncla
Browse files

Update Predict_LGE.py

parent 01434873
No related branches found
No related tags found
No related merge requests found
......@@ -111,16 +111,18 @@ def text_folder_to_dataframe(path):
# id,tome,filename,nb_words,content,domain
for tome in sorted(os.listdir(path)):
for article in sorted(os.listdir(path + "/" + tome)):
filename = article[:-4]
id = tome + filename
if article[-4:] == ".txt":
with open(path + "/" + tome + "/" + article) as f:
content = f.read()
data.append([id, tome, filename, content, len(content.split(' '))])
try:
for article in sorted(os.listdir(path + "/" + tome)):
filename = article[:-4]
id = tome + filename
if article[-4:] == ".txt":
with open(path + "/" + tome + "/" + article) as f:
content = f.read()
data.append([id, tome, filename, content, len(content.split(' '))])
except NotADirectoryError:
pass
return pd.DataFrame(data, columns=['id', 'tome', 'filename', 'content', 'nb_words'])
......@@ -148,10 +150,10 @@ if __name__ == '__main__':
## Load data
print("* Load data")
path = "/Users/lmoncla/Documents/Data/Corpus/LGE/Text/"
input_path = "/Users/lmoncla/Documents/Data/Corpus/LGE/Text"
path = "../"
df_LGE = text_folder_to_dataframe(path)
df_LGE = text_folder_to_dataframe(input_path)
#df_LGE = pd.read_csv(path + "data/LGE_withContent.tsv", sep="\t")
data_LGE = df_LGE["content"].values
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment