Skip to content
Snippets Groups Projects
Commit 6b612e5e authored by Ludovic Moncla's avatar Ludovic Moncla
Browse files

Update Predict_LGE.py

parent 01434873
No related branches found
No related tags found
No related merge requests found
...@@ -111,16 +111,18 @@ def text_folder_to_dataframe(path): ...@@ -111,16 +111,18 @@ def text_folder_to_dataframe(path):
# id,tome,filename,nb_words,content,domain # id,tome,filename,nb_words,content,domain
for tome in sorted(os.listdir(path)): for tome in sorted(os.listdir(path)):
for article in sorted(os.listdir(path + "/" + tome)): try:
filename = article[:-4] for article in sorted(os.listdir(path + "/" + tome)):
id = tome + filename filename = article[:-4]
id = tome + filename
if article[-4:] == ".txt":
with open(path + "/" + tome + "/" + article) as f: if article[-4:] == ".txt":
content = f.read() with open(path + "/" + tome + "/" + article) as f:
content = f.read()
data.append([id, tome, filename, content, len(content.split(' '))])
data.append([id, tome, filename, content, len(content.split(' '))])
except NotADirectoryError:
pass
return pd.DataFrame(data, columns=['id', 'tome', 'filename', 'content', 'nb_words']) return pd.DataFrame(data, columns=['id', 'tome', 'filename', 'content', 'nb_words'])
...@@ -148,10 +150,10 @@ if __name__ == '__main__': ...@@ -148,10 +150,10 @@ if __name__ == '__main__':
## Load data ## Load data
print("* Load data") print("* Load data")
path = "/Users/lmoncla/Documents/Data/Corpus/LGE/Text/" input_path = "/Users/lmoncla/Documents/Data/Corpus/LGE/Text"
path = "../"
df_LGE = text_folder_to_dataframe(path) df_LGE = text_folder_to_dataframe(input_path)
#df_LGE = pd.read_csv(path + "data/LGE_withContent.tsv", sep="\t") #df_LGE = pd.read_csv(path + "data/LGE_withContent.tsv", sep="\t")
data_LGE = df_LGE["content"].values data_LGE = df_LGE["content"].values
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment