Skip to content
Snippets Groups Projects
Commit 4be5fd41 authored by Abd Errahmane Kiouche's avatar Abd Errahmane Kiouche :speech_balloon:
Browse files

Update codes/Dwivedi_approach.py, codes/Armiti_approach.py, codes/SED_approach.py files

parent f6493de2
No related branches found
No related tags found
No related merge requests found
import networkx as nx
from lxml import etree as ET
import numpy as np
import math
from scipy.spatial import distance
from os import walk
import operator
import sys
import time
from scipy.optimize import linear_sum_assignment
import collections
""" Input Outpout functions """
def read_graph_from_xml(file_path):
G = nx.Graph()
nodes={}
tree = ET.parse(file_path)
root = tree.getroot()
id = 0
for graph in root:
for element in graph:
if element.tag == "node":
nodes[str(element.attrib['id'])]=id
i = 0
x_v=y_v=0
for attrib in element:
for value in attrib:
if i==0:
x_v=float(value.text)
else:
y_v=float(value.text)
i+=1
G.add_node(id,x=x_v,y=y_v)
id+=1
elif element.tag=="edge" :# it's an edge
source = nodes[str(element.attrib['from'])]
target = nodes[str(element.attrib["to"])]
#distance = float(element.attrib["weight"])
distance = math.sqrt(math.pow(G.nodes[source]['x']-G.nodes[target]['x'],2) + math.pow(G.nodes[source]['y']-G.nodes[target]['y'],2))
G.add_edge(source,target,weight=distance)
return G
""" String edit distance functions """
def edge_subtitution_cost(e1,e2):
l1,a1 = e1
l2,a2 = e2
r = math.pow(l1,2)+math.pow(l2,2)-2*l1*l2*math.cos(abs(a1-a2))
if ( r< 0):
r= 0
return math.sqrt(r)
def edge_insertion_cost(e1):
return e1[0]
def edge_deletion_cost(e1):
return e1[0]
""" Geometric vertex distance """
def vertex_features(G):
F_V = {}
for n1 in G.nodes():
x_c = G.nodes[n1]['x']
y_c = G.nodes[n1]['y']
l = []
ngh = list(G.neighbors(n1))
ordered_ngh = order_neighbors_counter_clockwise(G,n1,ngh)
for i in range(len(ordered_ngh)):
x = G.nodes[ordered_ngh[i]]['x']
y = G.nodes[ordered_ngh[i]]['y']
length = math.sqrt(math.pow(x-x_c,2)+math.pow(y-y_c,2))
x1 = x-x_c
y1 = y-y_c
if (i==0):
x2 = G.nodes[ordered_ngh[len(ordered_ngh)-1]]['x'] - x_c
y2 = G.nodes[ordered_ngh[len(ordered_ngh)-1]]['x'] - y_c
else :
x2 = G.nodes[ordered_ngh[i-1]]['x'] - x_c
y2 = G.nodes[ordered_ngh[i-1]]['x'] - y_c
angle= angle_between(x1,y1,x2,y2)
l.append((length,angle))
F_V[n1] = l
return F_V
def order_neighbors_counter_clockwise(G,center,neighbors):
points=[]
first_point = neighbors[0]
points.append({ 'id' : first_point, 'angle' : 0})
x1 = G.nodes[first_point]['x']- G.nodes[center]['x']
y1 = G.nodes[first_point]['y']- G.nodes[center]['y']
for i in range(1,len(neighbors)):
x2 = G.nodes[i]['x'] - G.nodes[center]['x']
y2 = G.nodes[i]['y'] - G.nodes[center]['y']
points.append({'id' : i, 'angle' :angle_between(x1,y1,x2,y2)})
sorted_points = sorted(points,key= lambda i: i['angle'],reverse = False)
ordered_neighbors = []
for i in range(len(sorted_points)):
ordered_neighbors.append(sorted_points[i]['id'])
return ordered_neighbors
def angle_between(x1,y1,x2,y2):
angle = math.atan2(x1*y2-y1*x2,x1*x2+y1*y2)
if (angle < 0 ):
angle += 2*math.pi
return angle
def cyclic_string_edit_distance(FV1,FV2):
cyc_sed = string_edit_distance(FV1,FV2)
for i in range(1,len(FV2)):
FV3 = collections.deque(FV2)
FV3.rotate(i)
FV3 = list(FV3)
dist = string_edit_distance(FV1,FV3)
if (dist < cyc_sed):
cyc_sed = dist
return cyc_sed
def string_edit_distance(FV1,FV2):
""" Compute the minimum edit distance
between two strings using the
Wagner-Fischer algorithm (Dynamic programming)"""
# Create (m+1)x(n+1) matrix
cost_matrix = [ [ 0 for j in range(0, len(FV2) +1)]
for i in range(0, len(FV1) +1)
]
# Initialisation
for i in range(0, len(FV1) +1):
if i == 0:
cost_matrix[i][0] = 0
else:
cost_matrix[i][0] = edge_deletion_cost(FV1[i-1]) + cost_matrix[i-1][0]
# Initialisation
for j in range(0, len(FV2) +1):
if j==0:
cost_matrix[0][j] = 0
else:
cost_matrix[0][j] = edge_insertion_cost(FV2[j-1]) + cost_matrix[0][j-1]
for i in range(1, len(FV1) +1):
for j in range(1, len(FV2) +1):
S1Index = i - 1
S2Index = j - 1
costs= [ cost_matrix[i][j-1] + edge_insertion_cost(FV2[S2Index]),
cost_matrix[i-1][j] + edge_deletion_cost(FV1[S1Index]),
cost_matrix[i-1][j-1] + edge_subtitution_cost(FV1[S1Index],FV2[S2Index])
]
costs.sort()
cost_matrix[i][j] = costs[0]
return cost_matrix[len(FV1)][len(FV2)]
""" Geometric graph distance """
def vertex_insertion_deletion_cost(FV):
return sum(e[0] for e in FV)
def vertex_subitution_cost(FV1,FV2):
return cyclic_string_edit_distance(FV1,FV2)
def Geometric_graph_distances(FV1,FV2):
# embed the graphs nodes
#FV1 = vertex_features(G1)
#FV2 = vertex_features(G2)
# construct cost matrix
if ( len(FV1) > len(FV2)):
FV1,FV2 = FV2,FV1
cost_matrix = [ [ 0 for j in range(len(FV2))]
for i in range(len(FV2) )]
for i in range(len(FV1)):
cost_matrix[i][i]= vertex_subitution_cost(FV1[i],FV2[i])
for j in range(i+1,len(FV2)):
dij = vertex_subitution_cost(FV1[i],FV2[j])
cost_matrix[i][j]=dij
cost_matrix[j][i]=dij
# padding matrix to be square
for j in range(len(FV2)):
c_empty_j = vertex_insertion_deletion_cost(FV2[j])
for i in range(len(FV1),len(FV2)):
cost_matrix[i][j]=c_empty_j
cost_matrix = np.array(cost_matrix)
# computing assignment cost using hungarian algorithm
row_ind, col_ind = linear_sum_assignment(cost_matrix)
dist = cost_matrix[row_ind, col_ind].sum()
return dist
if __name__ == "__main__":
sys.setrecursionlimit(10000)
sys.setrecursionlimit(10000)
original_graph_path = sys.argv[1]
output_file_result = sys.argv[2]
nb_items_per_class = int(sys.argv[3])
class_labels={0:"C1",1:"C2",2:"C3",3:"C4",4:"C5",5:"C6",6:"C7",7:"C8",8:"C9",9:"C10"}
dict_graphs = {}
print("reading graphs")
start = time.time()
for (dirpath, dirname, filenames) in walk(original_graph_path):
for filename in filenames:
GG = read_graph_from_xml(original_graph_path+"\\"+filename)
dict_graphs[filename]= GG
print("computing distance matrix")
mat_distance = {}
start = time.time()
dist_map={}
list_graphs_names = list(dict_graphs.keys())
dict_FV = {}
for i in range(len(list_graphs_names)):
dict_FV[list_graphs_names[i]] = vertex_features(dict_graphs[list_graphs_names[i]])
dist_map[i]={}
for i in range(len(list_graphs_names)):
print(i)
dist_map[i][i]=0
for j in range(i+1,len(list_graphs_names)):
dij = Geometric_graph_distances(dict_FV[list_graphs_names[i]],dict_FV[list_graphs_names[j]])
dist_map[i][j]=dij
dist_map[j][i]=dij
for i in range(len(list_graphs_names)):
#print(list_graphs_names[i])
mat_distance[list_graphs_names[i]]=[]
for j in range(len(list_graphs_names)):
mat_distance[list_graphs_names[i]].append(dist_map[i][j])
print("Computing distances has took " + str((time.time() - start)) + "sec")
print("printing cost matrix")
file = open(output_file_result,"w")
i=0
for name in list_graphs_names:
file.write(str(class_labels[i//nb_items_per_class])+":"+str(mat_distance[name])+"\n")
i+=1
#print(mat_distance[name])
print("end")
import networkx as nx
from lxml import etree as ET
import numpy as np
import math
from scipy.spatial import distance
from os import walk
import operator
import sys
import time
from scipy.optimize import linear_sum_assignment
def read_graph_from_xml(file_path):
G = nx.Graph()
nodes={}
tree = ET.parse(file_path)
root = tree.getroot()
id = 0
for graph in root:
for element in graph:
if element.tag == "node":
nodes[str(element.attrib['id'])]=id
i = 0
x_v=y_v=0
for attrib in element:
for value in attrib:
if i==0:
x_v=float(value.text)
else:
y_v=float(value.text)
i+=1
G.add_node(id,x=x_v,y=y_v)
id+=1
elif element.tag=="edge" :# it's an edge
source = nodes[str(element.attrib["from"])]
target = nodes[str(element.attrib["to"])]
#distance = float(element.attrib["weight"])
distance = math.sqrt(math.pow(G.nodes[source]['x']-G.nodes[target]['x'],2) + math.pow(G.nodes[source]['y']-G.nodes[target]['y'],2))
G.add_edge(source,target,weight=distance)
return G
def Dwivedi_GDM(G1,G2,w1,w2,w3,w4):
return VD(G1,G2,w1)+EDM(G1,G2,w2,w3,w4)
def E_A(ai,bi,ap,bp,xj,yj,xq,yq):
if ( math.sqrt(pow(ap-ai,2)+pow(bp-bi,2))==0) :
Oi =0
else:
Oi = np.arcsin(math.sqrt(pow(ap-ap,2)+pow(bp-bi,2))/math.sqrt(pow(ap-ai,2)+pow(bp-bi,2)))
if (math.sqrt(pow(xq-xj,2)+pow(yq-yj,2))==0):
Oj=0
else:
Oj = np.arcsin(math.sqrt(pow(xq-xq,2)+pow(yq-yj,2))/math.sqrt(pow(xq-xj,2)+pow(yq-yj,2)))
return abs(Oi-Oj)
def E_L(ai,bi,ap,bp,xj,yj,xq,yq):
li = math.sqrt(pow(ap-ai,2)+pow(bp-bi,2))
lj = math.sqrt(pow(xq-xj,2)+pow(yq-yj,2))
return abs(li-lj)
def E_P(ai,bi,ap,bp,xj,yj,xq,yq):
ep = ( math.sqrt(pow(ai-xj,2)+pow(bi-yj,2)) + math.sqrt(pow(ap-xq,2)+pow(bp-yq,2)))/2
return ep
def EDM(G1,G2,w2,w3,w4):
cost_matrix = []
for e1 in G1.edges():
ai = G1.nodes[e1[0]]['x']
bi = G1.nodes[e1[0]]['y']
ap = G1.nodes[e1[1]]['x']
bp = G1.nodes[e1[1]]['y']
row=[]
for e2 in G2.edges():
xj = G2.nodes[e2[0]]['x']
yj = G2.nodes[e2[0]]['y']
xq = G2.nodes[e2[1]]['x']
yq = G2.nodes[e2[1]]['y']
edm = E_A(ai,bi,ap,bp,xj,yj,xq,yq) \
+ E_L(ai,bi,ap,bp,xj,yj,xq,yq) \
+ E_P(ai,bi,ap,bp,xj,yj,xq,yq)
row.append(edm)
cost_matrix.append(row)
cost_matrix = np.array(cost_matrix)
row_ind, col_ind = linear_sum_assignment(cost_matrix)
EDM_distance = 0
le1 = list(G1.edges())
le2 = list (G2.edges())
for i in range(0,row_ind.shape[0]):
e1 = le1[row_ind[i]]
e2 = le2[col_ind[i]]
ai = G1.nodes[e1[0]]['x']
bi = G1.nodes[e1[0]]['y']
ap = G1.nodes[e1[1]]['x']
bp = G1.nodes[e1[1]]['y']
xj = G2.nodes[e2[0]]['x']
yj = G2.nodes[e2[0]]['y']
xq = G2.nodes[e2[1]]['x']
yp = G2.nodes[e2[1]]['y']
EDM_distance += w2*E_A(ai,bi,ap,bp,xj,yj,xq,yq) \
+ w3*E_L(ai,bi,ap,bp,xj,yj,xq,yq) \
+ w4*E_P(ai,bi,ap,bp,xj,yj,xq,yq)
return EDM_distance
def VD(G1,G2,w1):
cost_matrix = []
for v1 in G1.nodes():
ai = G1.nodes[v1]['x']
bi = G1.nodes[v1]['y']
row=[]
for v2 in G2.nodes():
xj = G2.nodes[v2]['x']
yj = G2.nodes[v2]['y']
vd = math.sqrt(pow(ai-xj,2)+pow(bi-yj,2))
row.append(vd)
cost_matrix.append(row)
cost_matrix = np.array(cost_matrix)
row_ind, col_ind = linear_sum_assignment(cost_matrix)
return w1*cost_matrix[row_ind, col_ind].sum()
if __name__ == "__main__":
sys.setrecursionlimit(10000)
original_graph_path = sys.argv[1]
output_file_result = sys.argv[2]
nb_items_per_class = int(sys.argv[3])
class_labels={0:"C1",1:"C2",2:"C3",3:"C4",4:"C5",5:"C6",6:"C7",7:"C8",8:"C9",9:"C10"}
dict_graphs = {}
dict_graphs = {}
print("reading graphs")
start = time.time()
for (dirpath, dirname, filenames) in walk(original_graph_path):
for filename in filenames:
GG = read_graph_from_xml(original_graph_path+"\\"+filename)
dict_graphs[filename]= GG
print("computing distance matrix")
mat_distance = {}
start = time.time()
dist_map={}
list_graphs_names = list(dict_graphs.keys())
for i in range(len(list_graphs_names)):
dist_map[i]={}
for i in range(len(list_graphs_names)):
print(i)
dist_map[i][i]=0
for j in range(i+1,len(list_graphs_names)):
dij = Dwivedi_GDM(dict_graphs[list_graphs_names[i]],dict_graphs[list_graphs_names[j]],0.35,0.23,0.11,0.31)
dist_map[i][j]=dij
dist_map[j][i]=dij
for i in range(len(list_graphs_names)):
#print(list_graphs_names[i])
mat_distance[list_graphs_names[i]]=[]
for j in range(len(list_graphs_names)):
mat_distance[list_graphs_names[i]].append(dist_map[i][j]) #geometric_graph_distance(Mats[list_graphs_names[i]],Mats[list_graphs_names[j]],5))
print("Computing distances has took " + str((time.time() - start)) + "sec")
print("printing cost matrix")
file = open(output_file_result,"w")
i=0
for name in list_graphs_names:
file.write(str(class_labels[i//nb_items_per_class])+":"+str(mat_distance[name])+"\n")
i+=1
#print(mat_distance[name])
print("end")
import networkx as nx
from lxml import etree as ET
import numpy as np
import math
from scipy.spatial import distance
from os import walk
import operator
import sys
import time
from scipy.optimize import linear_sum_assignment
import collections
""" Input Outpout functions """
def read_graph_from_xml(file_path):
G = nx.Graph()
nodes={}
tree = ET.parse(file_path)
root = tree.getroot()
id = 0
for graph in root:
for element in graph:
if element.tag == "node":
nodes[str(element.attrib['id'])]=id
i = 0
x_v=y_v=0
for attrib in element:
for value in attrib:
if i==0:
x_v=float(value.text)
else:
y_v=float(value.text)
i+=1
G.add_node(id,x=x_v,y=y_v)
id+=1
elif element.tag=="edge" :# it's an edge
source = nodes[str(element.attrib['from'])]
target = nodes[str(element.attrib["to"])]
#distance = float(element.attrib["weight"])
distance = math.sqrt(math.pow(G.nodes[source]['x']-G.nodes[target]['x'],2) + math.pow(G.nodes[source]['y']-G.nodes[target]['y'],2))
G.add_edge(source,target,weight=distance)
distance = math.sqrt(math.pow(G.nodes[0]['x']-G.nodes[len(G.nodes())-1]['x'],2) + math.pow(G.nodes[0]['y']-G.nodes[len(G.nodes())-1]['y'],2))
G.add_edge(0,len(G.nodes())-1,weight=distance)
return G
""" String edit distance functions """
def angle_between(x1,y1,x2,y2):
angle = math.atan2(x1*y2-y1*x2,x1*x2+y1*y2)
if (angle < 0 ):
angle += 2*math.pi
return angle
def path_to_string(G):
path_string = []
edges_list = list(G.edges())
for i in range(1,len(edges_list)):
e_i = edges_list[i-1]
e_j = edges_list[i]
x1 = G.nodes[e_i[0]]['x']- G.nodes[e_i[1]]['x']
y1 = G.nodes[e_i[0]]['y']- G.nodes[e_i[1]]['y']
x2 = G.nodes[e_j[1]]['x']- G.nodes[e_j[0]]['x']
y2 = G.nodes[e_j[1]]['y']- G.nodes[e_j[0]]['y']
path_string.append(angle_between(x1,y1,x2,y2))
return path_string
def cost_insertion(a):
return abs(a)
def cost_deletion(a):
return abs(a)
def cost_subtitution(a,b):
return abs(a-b)
def cyclic_SED(PS1,PS2):
cyc_sed = string_edit_distance(PS1,PS2)
for i in range(1,len(PS2)):
PS3 = collections.deque(PS2)
PS3.rotate(i)
PS3 = list(PS3)
dist = string_edit_distance(PS1,PS3)
if (dist < cyc_sed):
cyc_sed = dist
return cyc_sed
def string_edit_distance(PS1,PS2):
""" Compute the minimum edit distance
between two strings using the
Wagner-Fischer algorithm (Dynamic programming)"""
# Create (m+1)x(n+1) matrix
cost_matrix = [ [ 0 for j in range(0, len(PS2) +1)]
for i in range(0, len(PS1) +1)
]
# Initialisation
for i in range(0, len(PS1) +1):
if i == 0:
cost_matrix[i][0] = 0
else:
cost_matrix[i][0] = cost_deletion(PS1[i-1]) + cost_matrix[i-1][0]
# Initialisation
for j in range(0, len(PS2) +1):
if j==0:
cost_matrix[0][j] = 0
else:
cost_matrix[0][j] = cost_insertion(PS2[j-1]) + cost_matrix[0][j-1]
for i in range(1, len(PS1) +1):
for j in range(1, len(PS2) +1):
S1Index = i - 1
S2Index = j - 1
costs= [ cost_matrix[i][j-1] + cost_insertion(PS2[S2Index]),
cost_matrix[i-1][j] + cost_deletion(PS1[S1Index]),
cost_matrix[i-1][j-1] + cost_subtitution(PS1[S1Index],PS2[S2Index])
]
costs.sort()
cost_matrix[i][j] = costs[0]
return cost_matrix[len(PS1)][len(PS2)]
if __name__ == "__main__":
sys.setrecursionlimit(10000)
sys.setrecursionlimit(10000)
original_graph_path = sys.argv[1]
output_file_result = sys.argv[2]
nb_items_per_class = int(sys.argv[3])
class_labels={0:"C1",1:"C2",2:"C3",3:"C4",4:"C5",5:"C6",6:"C7",7:"C8",8:"C9",9:"C10"}
dict_graphs = {}
print("reading graphs")
start = time.time()
for (dirpath, dirname, filenames) in walk(original_graph_path):
for filename in filenames:
GG = read_graph_from_xml(original_graph_path+"\\"+filename)
dict_graphs[filename]= GG
print("computing distance matrix")
mat_distance = {}
start = time.time()
dist_map={}
list_graphs_names = list(dict_graphs.keys())
dict_strings = {}
for i in range(len(list_graphs_names)):
dict_strings[list_graphs_names[i]] = path_to_string(dict_graphs[list_graphs_names[i]])
dist_map[i]={}
for i in range(len(list_graphs_names)):
print(i)
dist_map[i][i]=0
for j in range(i+1,len(list_graphs_names)):
dij = cyclic_SED(dict_strings[list_graphs_names[i]],dict_strings[list_graphs_names[j]])
dist_map[i][j]=dij
dist_map[j][i]=dij
for i in range(len(list_graphs_names)):
#print(list_graphs_names[i])
mat_distance[list_graphs_names[i]]=[]
for j in range(len(list_graphs_names)):
mat_distance[list_graphs_names[i]].append(dist_map[i][j])
print("Computing distances has took " + str((time.time() - start)) + "sec")
print("printing cost matrix")
file = open(output_file_result,"w")
label={0:"C1",1:"C2",2:"C3",3:"C4",4:"C5",5:"C6",6:"C7",7:"C8",8:"C9",9:"C10"}
i=0
for name in list_graphs_names:
file.write(str(class_labels[i//nb_items_per_class])+":"+str(mat_distance[name])+"\n")
i+=1
#print(mat_distance[name])
print("end")
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment