Skip to content
Snippets Groups Projects
Commit c92b911a authored by Jacques Fize's avatar Jacques Fize
Browse files

Forgot to add helpers.py

parent 34f50041
No related branches found
No related tags found
No related merge requests found
import os
import time
import pandas as pd
import matplotlib.pyplot as plt
def read_geonames(file):
"""
Return a dataframe that contains Geonames data.
Parameters
----------
file : str
path of the Geonames Csv file
Returns
-------
pd.DataFrame
geonames data
"""
dtypes_dict = {
0: int, # geonameid
1: str, # name
2: str, # asciiname
3: str, # alternatenames
4: float, # latitude
5: float, # longitude
6: str, # feature class
7: str, # feature code
8: str, # country code
9: str, # cc2
10: str, # admin1 code
11: str, # admin2 code
12: str, # admin3 code
13: str, # admin4 code
14: int, # population
15: str, # elevation
16: int, # dem (digital elevation model)
17: str, # timezone
18: str # modification date yyyy-MM-dd
}
rename_cols = {
0:"geonameid", # geonameid
1:"name", # name
2:"asciiname", # asciiname
3:"alternatenames", # alternatenames
4:"latitude", # latitude
5:"longitude", # longitude
6:"feature_class", # feature class
7:"feature_code", # feature code
8:"country_code", # country code
9:"cc2", # cc2
10:"admin1_code", # admin1 code
11:"admin2_code", # admin2 code
12:"admin3_code", # admin3 code
13:"admin4_code", # admin4 code
14:"population", # population
15:"elevation", # elevation
16:"dem", # dem (digital elevation model)
17:"timezone", # timezone
18:"modification_date" # modification date yyyy-MM-dd
}
data = pd.read_csv(file, sep="\t", header = None, quoting=3,dtype=dtypes_dict,na_values='', keep_default_na=False,error_bad_lines=False)
data.rename(columns=rename_cols,inplace=True)
return data
def plot_accuracy_from_history(model_name,history_data,output_layer_name,outpu_filename,parameter_string,output_dirname="outputs",validation=True,show=False):
# Plot training & validation loss values
plt.gcf()
plt.gca()
plt.plot(history_data['{0}_accuracy'.format(output_layer_name)].values,label="Train Data")
if validation:
plt.plot(history_data['val_{0}_accuracy'.format(output_layer_name)].values,label = "Test Data")
plt.title('Layer {0} accuracy'.format(output_layer_name))
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.ylim((0,1.1)) #1.1 if accuracy = 1
plt.legend()
plt.savefig("outputs/{0}_{1}_{2}.png".format(model_name,parameter_string,output_layer_name,))
if show :
plt.show()
def save_embedding(model,tokenizer,layer_idx,fn):
embedding_matrix = model.get_weights()[0]
with open(os.path.join(fn), 'w') as f:
for word, i in tokenizer.word_index.items():
f.write(word)
for i in embedding_matrix[i]: f.write(' ' + repr(i))
f.write('\n')
class Chronometer():
def __init__(self):
self.__task_begin_timestamp = {}
def start(self,task_name):
"""
Start a new task chronometer
Parameters
----------
task_name : str
task id
Raises
------
ValueError
if a running task already exists with that name
"""
if task_name in self.__task_begin_timestamp:
raise ValueError("A running task exists with the name {0}!".format(task_name))
self.__task_begin_timestamp[task_name] = time.time()
def stop(self,task_name):
"""
Stop and return the duration of the task
Parameters
----------
task_name : str
task id
Returns
-------
float
duration of the task in seconds
Raises
------
ValueError
if no task exist with the id `task_name`
"""
if not task_name in self.__task_begin_timestamp:
raise ValueError("The {0} task does not exist!".format(task_name))
duration = time.time() - self.__task_begin_timestamp[task_name]
del self.__task_begin_timestamp[task_name]
return duration
if __name__ == "__main__":
chrono = Chronometer()
chrono.start("test")
chrono.start("test2")
time.sleep(3)
print(chrono.stop("test"))
time.sleep(3)
print(chrono.stop("test2"))
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment