Skip to content
Snippets Groups Projects
Commit 196cc045 authored by Jacques Fize's avatar Jacques Fize
Browse files

Change files organisation

parent 1576e3e5
No related branches found
No related tags found
No related merge requests found
...@@ -384,4 +384,5 @@ model.save(MODEL_OUTPUT_FN) ...@@ -384,4 +384,5 @@ model.save(MODEL_OUTPUT_FN)
# Erase Model Checkpoint file # Erase Model Checkpoint file
if os.path.exists(MODEL_OUTPUT_FN + ".part"): if os.path.exists(MODEL_OUTPUT_FN + ".part"):
os.remove(MODEL_OUTPUT_FN + ".part") import shutil
\ No newline at end of file shutil.rmtree(MODEL_OUTPUT_FN + ".part")
\ No newline at end of file
...@@ -12,9 +12,8 @@ logging.basicConfig( ...@@ -12,9 +12,8 @@ logging.basicConfig(
) )
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from shapely.geometry import Point
from lib.geo import Grid from lib.geo import latlon2healpix
from helpers import read_geonames from helpers import read_geonames
from tqdm import tqdm from tqdm import tqdm
...@@ -32,47 +31,25 @@ FEATURE_CLASSES = args.feature_classes ...@@ -32,47 +31,25 @@ FEATURE_CLASSES = args.feature_classes
logging.info("Load Geonames data...") logging.info("Load Geonames data...")
geoname_data = read_geonames(GEONAME_FN).fillna("") geoname_data = read_geonames(GEONAME_FN).fillna("")
geoname_data["geometry"] = geoname_data["longitude latitude".split()].apply(lambda x: Point(x.longitude,x.latitude),axis=1)
geoname_data = gpd.GeoDataFrame(geoname_data)
logging.info("Geonames data loaded!") logging.info("Geonames data loaded!")
# SELECT ENTRY with class == to A and P (Areas and Populated Places) # SELECT ENTRY with class == to A and P (Areas and Populated Places)
filtered = geoname_data[geoname_data.feature_class.isin(FEATURE_CLASSES.split())].copy() # Only take area and populated places filtered = geoname_data[geoname_data.feature_class.isin(FEATURE_CLASSES.split())].copy() # Only take area and populated places
# World Shape bounds filtered["cat"] = filtered.apply(lambda x:latlon2healpix(x.latitude,x.longitude,64),axis=1)
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world["nn"] = 1
dissolved = world.dissolve(by="nn").iloc[0].geometry
#Creating Grid
logging.info("Initializing Grid (360,180)...")
g = Grid(*dissolved.bounds,[360,180])
logging.info("Fit Data to the Grid...")
g.fit_data(filtered)
logging.info("Placing place into the grid...")
[g+(int(row.geonameid),row.latitude,row.longitude) for ix,row in tqdm(filtered.iterrows(),total=len(filtered))]
#ASSOCIATE CELL NUMBER TO EACH PLACE IN THE GEONAME DATAFRAME
logging.info("Associate a cell number to each place in the Geoname Dataframe")
def foo(g,id_):
for ix,cell in enumerate(g.cells):
if id_ in cell.list_object:
return ix
filtered["cat"] = filtered.geonameid.apply(lambda x:foo(g,x))
# TRAIN AND TEST SPLIT # TRAIN AND TEST SPLIT
logging.info("Split Between Train and Test") logging.info("Split Between Train and Test")
# Cell can be empty # Cell can be empty
i=0 cat_unique = filtered.cat.unique()
ci=0
while 1: while 1:
if len(filtered[filtered.cat == i])> 1: if len(filtered[filtered.cat == cat_unique[ci]])> 1:
X_train,X_test = train_test_split(filtered[filtered.cat == i]) X_train,X_test = train_test_split(filtered[filtered.cat == cat_unique[ci]])
break break
i+=1 ci+=1
for i in range(i+1,len(g.cells)): for i in cat_unique[ci:] :
try: try:
x_train,x_test = train_test_split(filtered[filtered.cat == i]) x_train,x_test = train_test_split(filtered[filtered.cat == i])
X_train,X_test = pd.concat((X_train,x_train)),pd.concat((X_test,x_test)) X_train,X_test = pd.concat((X_train,x_train)),pd.concat((X_test,x_test))
...@@ -80,12 +57,9 @@ for i in range(i+1,len(g.cells)): ...@@ -80,12 +57,9 @@ for i in range(i+1,len(g.cells)):
pass #print("Error",len(filtered[filtered.cat == i])) pass #print("Error",len(filtered[filtered.cat == i]))
del X_train["geometry"]
del X_train["nn"]
del X_train["cat"] del X_train["cat"]
del X_test["cat"] del X_test["cat"]
del X_test["geometry"]
del X_test["nn"]
# SAVING THE DATA # SAVING THE DATA
logging.info("Saving Output !") logging.info("Saving Output !")
X_train.to_csv(GEONAME_FN+"_train.csv") X_train.to_csv(GEONAME_FN+"_train.csv")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment