-
Walid Megherbi authored1d3e60e5
struc2vec_flight.py 1.54 KiB
import numpy as np
from ge.classify import read_node_label,Classifier
from ge import Struc2Vec
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
import networkx as nx
from sklearn.manifold import TSNE
def evaluate_embeddings(embeddings):
X, Y = read_node_label('../data/flight/labels-brazil-airports.txt',skip_head=True)
tr_frac = 0.8
print("Training classifier using {:.2f}% nodes...".format(
tr_frac * 100))
clf = Classifier(embeddings=embeddings, clf=LogisticRegression())
clf.split_train_evaluate(X, Y, tr_frac)
def plot_embeddings(embeddings,):
X, Y = read_node_label('../data/flight/labels-brazil-airports.txt',skip_head=True)
emb_list = []
for k in X:
emb_list.append(embeddings[k])
emb_list = np.array(emb_list)
model = TSNE(n_components=2)
node_pos = model.fit_transform(emb_list)
color_idx = {}
for i in range(len(X)):
color_idx.setdefault(Y[i][0], [])
color_idx[Y[i][0]].append(i)
for c, idx in color_idx.items():
plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c) # c=node_colors)
plt.legend()
plt.show()
if __name__ == "__main__":
G = nx.read_edgelist('../data/flight/brazil-airports.edgelist', create_using=nx.DiGraph(), nodetype=None,
data=[('weight', int)])
model = Struc2Vec(G, 10, 80, workers=4, verbose=40, )
model.train()
embeddings = model.get_embeddings()
evaluate_embeddings(embeddings)
plot_embeddings(embeddings)