Skip to content
Snippets Groups Projects
Commit feb11af7 authored by Abd Errahmane Kiouche's avatar Abd Errahmane Kiouche :speech_balloon:
Browse files

Update codes/Geometric_graph_similarity.py, codes/Sparsification.py files

parents
No related branches found
No related tags found
No related merge requests found
import networkx as nx
from lxml import etree as ET
import numpy as np
import math
from scipy.spatial import distance
from os import walk
import operator
import sys
import time
from scipy.optimize import linear_sum_assignment
def read_graph_from_xml(file_path):
G = nx.Graph()
nodes={}
tree = ET.parse(file_path)
root = tree.getroot()
id = 0
for graph in root:
for element in graph:
if element.tag == "node":
nodes[str(element.attrib['id'])]=id
i = 0
x_v=y_v=0
for attrib in element:
for value in attrib:
if i==0:
x_v=float(value.text)
else:
y_v=float(value.text)
i+=1
G.add_node(id,x=x_v,y=y_v)
id+=1
elif element.tag=="edge" :# it's an edge
source = nodes[str(element.attrib['from'])]
target = nodes[str(element.attrib["to"])]
#distance = float(element.attrib["weight"])
distance = math.sqrt(math.pow(G.nodes[source]['x']-G.nodes[target]['x'],2) + math.pow(G.nodes[source]['y']-G.nodes[target]['y'],2))
G.add_edge(source,target,weight=distance)
#distance = math.sqrt(math.pow(G.nodes[0]['x']-G.nodes[len(G.nodes())-1]['x'],2) + math.pow(G.nodes[0]['y']-G.nodes[len(G.nodes())-1]['y'],2))
#G.add_edge(0,len(G.nodes())-1,weight=distance)
return G
def euclidean_distance(a,b):
return math.sqrt(math.pow(a[0]-b[0],2) + math.pow(a[1]-b[1],2))
def distance_1(m1,m2,size):
dis = 0.0
for i in range(size):
v1 = m1[i].copy()
v2 = m2[i].copy()
#print("the difference = " + str(len(v1)) + " "+ str(len(v2)))
if (len(v1)<len(v2)):
for j in range(len(v2)-len(v1)):
v1.append(0)
elif (len(v2)<len(v1)):
for j in range(len(v1)-len(v2)):
v2.append(0)
dis += distance.euclidean(v1,v2)
return dis
def compute_distance_to_set(point,X):
d = float('inf')
for id in X:
if (d>euclidean_distance(point,X[id])):
d=euclidean_distance(point,X[id])
return d
def angle_between(G,p1,p2,p3):
a = math.sqrt(math.pow(G.nodes[p1]['x']-G.nodes[p3]['x'],2) + math.pow(G.nodes[p1]['y']-G.nodes[p3]['y'],2))
b = math.sqrt(math.pow(G.nodes[p2]['x']-G.nodes[p3]['x'],2) + math.pow(G.nodes[p2]['y']-G.nodes[p3]['y'],2))
c = math.sqrt(math.pow(G.nodes[p1]['x']-G.nodes[p2]['x'],2) + math.pow(G.nodes[p1]['y']-G.nodes[p2]['y'],2))
d = round((b*b+c*c-a*a)/(2*b*c),4)
ang_rd = math.acos(d)
angle = math.degrees(ang_rd)
return angle
def get_score(G,path):
pred1 = -1
pred2 = -1
sum_angles= 0
for n in path:
if (pred1!=-1 and pred2!=-1):
ang = angle_between(G,pred2,pred1,n)
sum_angles += ang
pred2 = pred1
pred1 = n
return (sum_angles)
def paths_to_vec(G,paths):
mat={}
for i in paths:
vec = []
path = paths[i]
pred = -1
pred2 = -1
vec.append(1)
for p in path:
if (pred!=-1 and pred2!=-1):
vec.append(angle_between(G,pred2,pred,p))
pred2=pred
pred = p
mat[i]=vec
print(vec)
return mat
def embed_nodes_to_vectors(G,size_neighborhood):
node_vec = {}
nodes_list = list(G.nodes())
for n1 in nodes_list:
vec=[]
dist= {}
for i in nodes_list:
if (i!=n1):
distance = math.sqrt(math.pow(G.nodes[n1]['x']-G.nodes[i]['x'],2) + math.pow(G.nodes[n1]['y']-G.nodes[i]['y'],2))
dist[i] = distance
sorted_list_nodes = sorted(dist.items(), key = lambda x : x[1])
count =0
for pair in sorted_list_nodes:
if count >= size_neighborhood:
break
n2 = pair[0]
distance = pair[1]
if (n1!=n2):
path = nx.shortest_path(G,n1,n2,weight="weight")
path_length = nx.shortest_path_length(G,n1,n2,weight="weight")
score = get_score(G,path)
vec.append(float(distance/path_length)*score)
count+=1
node_vec[n1]= vec
return node_vec
def construct_cost_matrix(vec1,vec2,size):
cost_matrix = []
for i in vec1:
row = []
for j in vec2:
row.append(distance.euclidean(vec1[i],vec2[j]) )# distance_1(vec1[i],vec2[j],size))
cost_matrix.append(row)
cost_matrix = np.array(cost_matrix)
#print(cost_matrix)
return cost_matrix
def geometric_graph_distance(mat1,mat2,size):
cost_matrix = construct_cost_matrix(mat1,mat2,size)
row_ind, col_ind = linear_sum_assignment(cost_matrix)
#print(cost_matrix[row_ind, col_ind])
dist = cost_matrix[row_ind, col_ind].sum()
#dist = 1-float(dist/number_of_nodes)
return dist
if __name__ == "__main__":
sys.setrecursionlimit(10000)
original_graph_path = sys.argv[1]
output_file_result = sys.argv[2]
nb_items_per_class = int(sys.argv[3])
size_n = int(sys.argv[4]) // parameter K
class_labels={0:"C1",1:"C2",2:"C3",3:"C4",4:"C5",5:"C6",6:"C7",7:"C8",8:"C9",9:"C10"}
dict_graphs = {}
print("Reading graphs")
for (dirpath, dirname, filenames) in walk(original_graph_path):
for filename in filenames:
GG = read_graph_from_xml(original_graph_path+"\\"+filename)
dict_graphs[filename]= GG
print("Computing embeddings")
Mats={}
start = time.time()
list_graphs_names = list(dict_graphs.keys())
for i in range(len(list_graphs_names)):
Mats[list_graphs_names[i]] = embed_nodes_to_vectors(dict_graphs[list_graphs_names[i]],size_n)
print("Embedding graphs has took " + str((time.time() - start)) + "sec")
print("Computing distance matrix")
mat_distance = {}
start = time.time()
dist_map={}
for i in range(len(list_graphs_names)):
dist_map[i]={}
for i in range(len(list_graphs_names)):
print(i)
dist_map[i][i]=0
for j in range(i+1,len(list_graphs_names)):
dij = geometric_graph_distance(Mats[list_graphs_names[i]],Mats[list_graphs_names[j]],size_n)
dist_map[i][j]=dij
dist_map[j][i]=dij
for i in range(len(list_graphs_names)):
mat_distance[list_graphs_names[i]]=[]
for j in range(len(list_graphs_names)):
mat_distance[list_graphs_names[i]].append(dist_map[i][j])
print("Computing distances has took " + str((time.time() - start)) + "sec")
print("Writing results")
file = open(output_file_result,"w")
i=0
for name in list_graphs_names:
file.write(str(class_labels[i//nb_items_per_class])+":"+str(mat_distance[name])+"\n")
i+=1
import networkx as nx
from lxml import etree as ET
import numpy as np
import math
from scipy.spatial import distance
from os import walk
import operator
import sys
import time
from scipy.optimize import linear_sum_assignment
def read_graph_from_xml(file_path):
G = nx.Graph()
nodes={}
tree = ET.parse(file_path)
root = tree.getroot()
id = 0
for graph in root:
for element in graph:
if element.tag == "node":
nodes[str(element.attrib['id'])]=id
i = 0
x_v=y_v=0
for attrib in element:
for value in attrib:
if i==0:
x_v=float(value.text)
else:
y_v=float(value.text)
i+=1
G.add_node(id,x=x_v,y=y_v)
id+=1
elif element.tag=="edge" :# it's an edge
source = nodes[str(element.attrib['from'])]
target = nodes[str(element.attrib["to"])]
#distance = float(element.attrib["weight"])
distance = math.sqrt(math.pow(G.nodes[source]['x']-G.nodes[target]['x'],2) + math.pow(G.nodes[source]['y']-G.nodes[target]['y'],2))
G.add_edge(source,target,weight=distance)
#distance = math.sqrt(math.pow(G.nodes[0]['x']-G.nodes[len(G.nodes())-1]['x'],2) + math.pow(G.nodes[0]['y']-G.nodes[len(G.nodes())-1]['y'],2))
#G.add_edge(0,len(G.nodes())-1,weight=distance)
return G
def graph_to_xml(G,file_path):
gxl = ET.Element("gxl")
graph_xml = ET.SubElement(gxl,"graph")
for n in G.nodes():
node_xml = ET.SubElement(graph_xml,"node", id =str(n))
attrib = ET.SubElement(node_xml,"attr", name =str("x"))
x_cord = ET.SubElement(attrib,"float").text = str(G.nodes[n]['x'])
attrib = ET.SubElement(node_xml,"attr", name =str("y"))
y_cord = ET.SubElement(attrib,"float").text = str(G.nodes[n]['y'])
i = 0
for e in G.edges():
edge_xml = ET.SubElement(graph_xml,"edge", id =str(i),source=str(e[0]),to = str(e[1]))#,weight = str(G[e[0]][e[1]]["weight"]))
i+=1
tree = ET.ElementTree(gxl)
tree.write(file_path,pretty_print=False)
def euclidean_distance(a,b):
return math.sqrt(math.pow(a[0]-b[0],2) + math.pow(a[1]-b[1],2))
def compute_distance_to_set(point,X):
d = float('inf')
for id in X:
if (d>euclidean_distance(point,X[id])):
d=euclidean_distance(point,X[id])
return d
def greedy_diversity_selection(S,m):
Sel = {}
number_of_nodes = len(list(S))
while len(Sel) < m and len(Sel) < number_of_nodes :
d = {}
for s in S:
if s not in Sel:
d[s] = compute_distance_to_set(S[s],Sel)
node_to_add = max(d.items(), key=operator.itemgetter(1))[0]
Sel[node_to_add] = S[node_to_add]
del S[node_to_add]
return Sel
def graph_longest_path(G):
path=[]
for n in G.nodes():
if len(list(G.neighbors(n)))>1:
continue
marked = []
new_path = longest_path(G,n,marked)
if len(path) < len(new_path):
path = new_path
return path
def longest_path(G, n, marked):
marked_copy = marked
marked_copy.append(n)
neighors = G.neighbors(n)
path = []
for n2 in neighors:
if ( n2 in marked_copy):
continue
new_path = longest_path(G,n2,marked_copy)
if (len(new_path)>len(path)):
path = new_path
path.append(n)
return path
def construct_graph_from_path(G,path):
path_graph = nx.Graph()
for n in path:
path_graph.add_node(n,x=G.nodes[n]['x'],y=G.nodes[n]['y'])
for i in range(len(path)-1):
distance = math.sqrt(math.pow(G.nodes[path[i]]['x']-G.nodes[path[i+1]]['x'],2) + math.pow(G.nodes[path[i]]['y']-G.nodes[path[i+1]]['y'],2))
path_graph.add_edge(path[i],path[i+1],weight=distance)
return path_graph
def angle_between(G,p1,p2,p3):
a = math.sqrt(math.pow(G.nodes[p1]['x']-G.nodes[p3]['x'],2) + math.pow(G.nodes[p1]['y']-G.nodes[p3]['y'],2))
b = math.sqrt(math.pow(G.nodes[p2]['x']-G.nodes[p3]['x'],2) + math.pow(G.nodes[p2]['y']-G.nodes[p3]['y'],2))
c = math.sqrt(math.pow(G.nodes[p1]['x']-G.nodes[p2]['x'],2) + math.pow(G.nodes[p1]['y']-G.nodes[p2]['y'],2))
d = round((b*b+c*c-a*a)/(2*b*c),4)
ang_rd = math.acos(d)
angle = math.degrees(ang_rd)
return angle
def Sparsification(G,longest_p,nb_points):
normalized_path=[]
S={}
Sel={}
for n in longest_p:
S[n] = np.array([G.nodes[n]['x'],G.nodes[n]['y']])
Sel = greedy_diversity_selection(S,nb_points)
for n in longest_p:
if (n in Sel):
normalized_path.append(n)
new_graph = construct_graph_from_path(G,normalized_path)
return new_graph
def paths_to_vec(G,paths):
mat={}
for i in paths:
vec = []
path = paths[i]
pred = -1
pred2 = -1
vec.append(1)
for p in path:
if (pred!=-1 and pred2!=-1):
vec.append(angle_between(G,pred2,pred,p))
pred2=pred
pred = p
mat[i]=vec
print(vec)
return mat
if __name__ == "__main__":
sys.setrecursionlimit(10000)
input_graphs_directory = sys.argv[1]
output_longest_paths_directory = sys.argv[2]
output_sparsified_paths_directory = sys.argv[3]
nb_points = int(sys.argv[4])
print("reading graphs and computing spasified graphs")
for (dirpath, dirname, filenames) in walk(input_graphs_directory):
for filename in filenames:
print(filename)
G = read_graph_from_xml(input_graphs_directory+"\\"+filename)
longest_p = graph_longest_path(G)
longest_path_graph = construct_graph_from_path(G,longest_p)
sparsified_path = Sparsification(G,longest_p,nb_points)
graph_to_xml(longest_path_graph,output_longest_paths_directory+"\\"+filename)
graph_to_xml(sparsified_path,output_sparsified_paths_directory+"\\"+filename)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment