From 4be5fd412ecd447e3a36cf6e2dcc940b57e549d7 Mon Sep 17 00:00:00 2001 From: Abd Errahmane Kiouche <abd-errahmane.kiouche@etu.univ-lyon1.fr> Date: Wed, 28 Jul 2021 13:13:22 +0000 Subject: [PATCH] Update codes/Dwivedi_approach.py, codes/Armiti_approach.py, codes/SED_approach.py files --- codes/Armiti_approach.py | 246 ++++++++++++++++++++++++++++++++++++++ codes/Dwivedi_approach.py | 174 +++++++++++++++++++++++++++ codes/SED_approach.py | 176 +++++++++++++++++++++++++++ 3 files changed, 596 insertions(+) create mode 100644 codes/Armiti_approach.py create mode 100644 codes/Dwivedi_approach.py create mode 100644 codes/SED_approach.py diff --git a/codes/Armiti_approach.py b/codes/Armiti_approach.py new file mode 100644 index 0000000..ce53393 --- /dev/null +++ b/codes/Armiti_approach.py @@ -0,0 +1,246 @@ +import networkx as nx +from lxml import etree as ET +import numpy as np +import math +from scipy.spatial import distance +from os import walk +import operator +import sys +import time +from scipy.optimize import linear_sum_assignment +import collections +""" Input Outpout functions """ +def read_graph_from_xml(file_path): + G = nx.Graph() + nodes={} + tree = ET.parse(file_path) + root = tree.getroot() + id = 0 + for graph in root: + for element in graph: + if element.tag == "node": + nodes[str(element.attrib['id'])]=id + i = 0 + x_v=y_v=0 + for attrib in element: + for value in attrib: + if i==0: + x_v=float(value.text) + else: + y_v=float(value.text) + i+=1 + G.add_node(id,x=x_v,y=y_v) + id+=1 + elif element.tag=="edge" :# it's an edge + source = nodes[str(element.attrib['from'])] + target = nodes[str(element.attrib["to"])] + #distance = float(element.attrib["weight"]) + distance = math.sqrt(math.pow(G.nodes[source]['x']-G.nodes[target]['x'],2) + math.pow(G.nodes[source]['y']-G.nodes[target]['y'],2)) + G.add_edge(source,target,weight=distance) + return G +""" String edit distance functions """ +def edge_subtitution_cost(e1,e2): + l1,a1 = e1 + l2,a2 = e2 + r = math.pow(l1,2)+math.pow(l2,2)-2*l1*l2*math.cos(abs(a1-a2)) + if ( r< 0): + r= 0 + return math.sqrt(r) + +def edge_insertion_cost(e1): + return e1[0] + +def edge_deletion_cost(e1): + return e1[0] + +""" Geometric vertex distance """ + +def vertex_features(G): + F_V = {} + for n1 in G.nodes(): + x_c = G.nodes[n1]['x'] + y_c = G.nodes[n1]['y'] + l = [] + ngh = list(G.neighbors(n1)) + ordered_ngh = order_neighbors_counter_clockwise(G,n1,ngh) + for i in range(len(ordered_ngh)): + x = G.nodes[ordered_ngh[i]]['x'] + y = G.nodes[ordered_ngh[i]]['y'] + length = math.sqrt(math.pow(x-x_c,2)+math.pow(y-y_c,2)) + x1 = x-x_c + y1 = y-y_c + if (i==0): + x2 = G.nodes[ordered_ngh[len(ordered_ngh)-1]]['x'] - x_c + y2 = G.nodes[ordered_ngh[len(ordered_ngh)-1]]['x'] - y_c + else : + x2 = G.nodes[ordered_ngh[i-1]]['x'] - x_c + y2 = G.nodes[ordered_ngh[i-1]]['x'] - y_c + angle= angle_between(x1,y1,x2,y2) + l.append((length,angle)) + F_V[n1] = l + return F_V + +def order_neighbors_counter_clockwise(G,center,neighbors): + points=[] + first_point = neighbors[0] + points.append({ 'id' : first_point, 'angle' : 0}) + x1 = G.nodes[first_point]['x']- G.nodes[center]['x'] + y1 = G.nodes[first_point]['y']- G.nodes[center]['y'] + for i in range(1,len(neighbors)): + x2 = G.nodes[i]['x'] - G.nodes[center]['x'] + y2 = G.nodes[i]['y'] - G.nodes[center]['y'] + points.append({'id' : i, 'angle' :angle_between(x1,y1,x2,y2)}) + sorted_points = sorted(points,key= lambda i: i['angle'],reverse = False) + ordered_neighbors = [] + for i in range(len(sorted_points)): + ordered_neighbors.append(sorted_points[i]['id']) + return ordered_neighbors + +def angle_between(x1,y1,x2,y2): + angle = math.atan2(x1*y2-y1*x2,x1*x2+y1*y2) + if (angle < 0 ): + angle += 2*math.pi + return angle + + +def cyclic_string_edit_distance(FV1,FV2): + cyc_sed = string_edit_distance(FV1,FV2) + for i in range(1,len(FV2)): + FV3 = collections.deque(FV2) + FV3.rotate(i) + FV3 = list(FV3) + dist = string_edit_distance(FV1,FV3) + if (dist < cyc_sed): + cyc_sed = dist + return cyc_sed + + + +def string_edit_distance(FV1,FV2): + """ Compute the minimum edit distance + between two strings using the + Wagner-Fischer algorithm (Dynamic programming)""" + + # Create (m+1)x(n+1) matrix + cost_matrix = [ [ 0 for j in range(0, len(FV2) +1)] + for i in range(0, len(FV1) +1) + ] + # Initialisation + for i in range(0, len(FV1) +1): + if i == 0: + cost_matrix[i][0] = 0 + else: + cost_matrix[i][0] = edge_deletion_cost(FV1[i-1]) + cost_matrix[i-1][0] + + # Initialisation + for j in range(0, len(FV2) +1): + if j==0: + cost_matrix[0][j] = 0 + else: + cost_matrix[0][j] = edge_insertion_cost(FV2[j-1]) + cost_matrix[0][j-1] + for i in range(1, len(FV1) +1): + for j in range(1, len(FV2) +1): + S1Index = i - 1 + S2Index = j - 1 + costs= [ cost_matrix[i][j-1] + edge_insertion_cost(FV2[S2Index]), + cost_matrix[i-1][j] + edge_deletion_cost(FV1[S1Index]), + cost_matrix[i-1][j-1] + edge_subtitution_cost(FV1[S1Index],FV2[S2Index]) + ] + costs.sort() + cost_matrix[i][j] = costs[0] + return cost_matrix[len(FV1)][len(FV2)] + + +""" Geometric graph distance """ + +def vertex_insertion_deletion_cost(FV): + return sum(e[0] for e in FV) + +def vertex_subitution_cost(FV1,FV2): + return cyclic_string_edit_distance(FV1,FV2) + +def Geometric_graph_distances(FV1,FV2): + # embed the graphs nodes + #FV1 = vertex_features(G1) + #FV2 = vertex_features(G2) + # construct cost matrix + if ( len(FV1) > len(FV2)): + FV1,FV2 = FV2,FV1 + + cost_matrix = [ [ 0 for j in range(len(FV2))] + for i in range(len(FV2) )] + for i in range(len(FV1)): + cost_matrix[i][i]= vertex_subitution_cost(FV1[i],FV2[i]) + for j in range(i+1,len(FV2)): + dij = vertex_subitution_cost(FV1[i],FV2[j]) + cost_matrix[i][j]=dij + cost_matrix[j][i]=dij + # padding matrix to be square + for j in range(len(FV2)): + c_empty_j = vertex_insertion_deletion_cost(FV2[j]) + for i in range(len(FV1),len(FV2)): + cost_matrix[i][j]=c_empty_j + cost_matrix = np.array(cost_matrix) + # computing assignment cost using hungarian algorithm + row_ind, col_ind = linear_sum_assignment(cost_matrix) + dist = cost_matrix[row_ind, col_ind].sum() + return dist + + + +if __name__ == "__main__": + sys.setrecursionlimit(10000) + + sys.setrecursionlimit(10000) + original_graph_path = sys.argv[1] + output_file_result = sys.argv[2] + nb_items_per_class = int(sys.argv[3]) + + class_labels={0:"C1",1:"C2",2:"C3",3:"C4",4:"C5",5:"C6",6:"C7",7:"C8",8:"C9",9:"C10"} + + dict_graphs = {} + + + + + print("reading graphs") + start = time.time() + for (dirpath, dirname, filenames) in walk(original_graph_path): + for filename in filenames: + GG = read_graph_from_xml(original_graph_path+"\\"+filename) + dict_graphs[filename]= GG + print("computing distance matrix") + mat_distance = {} + start = time.time() + dist_map={} + list_graphs_names = list(dict_graphs.keys()) + dict_FV = {} + for i in range(len(list_graphs_names)): + dict_FV[list_graphs_names[i]] = vertex_features(dict_graphs[list_graphs_names[i]]) + dist_map[i]={} + + for i in range(len(list_graphs_names)): + print(i) + dist_map[i][i]=0 + for j in range(i+1,len(list_graphs_names)): + dij = Geometric_graph_distances(dict_FV[list_graphs_names[i]],dict_FV[list_graphs_names[j]]) + dist_map[i][j]=dij + dist_map[j][i]=dij + + for i in range(len(list_graphs_names)): + #print(list_graphs_names[i]) + mat_distance[list_graphs_names[i]]=[] + for j in range(len(list_graphs_names)): + mat_distance[list_graphs_names[i]].append(dist_map[i][j]) + print("Computing distances has took " + str((time.time() - start)) + "sec") + print("printing cost matrix") + file = open(output_file_result,"w") + + i=0 + for name in list_graphs_names: + file.write(str(class_labels[i//nb_items_per_class])+":"+str(mat_distance[name])+"\n") + i+=1 + #print(mat_distance[name]) + print("end") + + diff --git a/codes/Dwivedi_approach.py b/codes/Dwivedi_approach.py new file mode 100644 index 0000000..9973832 --- /dev/null +++ b/codes/Dwivedi_approach.py @@ -0,0 +1,174 @@ +import networkx as nx +from lxml import etree as ET +import numpy as np +import math +from scipy.spatial import distance +from os import walk +import operator +import sys +import time +from scipy.optimize import linear_sum_assignment + + +def read_graph_from_xml(file_path): + G = nx.Graph() + nodes={} + tree = ET.parse(file_path) + root = tree.getroot() + id = 0 + for graph in root: + for element in graph: + if element.tag == "node": + nodes[str(element.attrib['id'])]=id + i = 0 + x_v=y_v=0 + for attrib in element: + for value in attrib: + if i==0: + x_v=float(value.text) + else: + y_v=float(value.text) + i+=1 + G.add_node(id,x=x_v,y=y_v) + id+=1 + elif element.tag=="edge" :# it's an edge + source = nodes[str(element.attrib["from"])] + target = nodes[str(element.attrib["to"])] + #distance = float(element.attrib["weight"]) + distance = math.sqrt(math.pow(G.nodes[source]['x']-G.nodes[target]['x'],2) + math.pow(G.nodes[source]['y']-G.nodes[target]['y'],2)) + G.add_edge(source,target,weight=distance) + return G + +def Dwivedi_GDM(G1,G2,w1,w2,w3,w4): + return VD(G1,G2,w1)+EDM(G1,G2,w2,w3,w4) + +def E_A(ai,bi,ap,bp,xj,yj,xq,yq): + if ( math.sqrt(pow(ap-ai,2)+pow(bp-bi,2))==0) : + Oi =0 + else: + Oi = np.arcsin(math.sqrt(pow(ap-ap,2)+pow(bp-bi,2))/math.sqrt(pow(ap-ai,2)+pow(bp-bi,2))) + if (math.sqrt(pow(xq-xj,2)+pow(yq-yj,2))==0): + Oj=0 + else: + Oj = np.arcsin(math.sqrt(pow(xq-xq,2)+pow(yq-yj,2))/math.sqrt(pow(xq-xj,2)+pow(yq-yj,2))) + return abs(Oi-Oj) + +def E_L(ai,bi,ap,bp,xj,yj,xq,yq): + li = math.sqrt(pow(ap-ai,2)+pow(bp-bi,2)) + lj = math.sqrt(pow(xq-xj,2)+pow(yq-yj,2)) + return abs(li-lj) + +def E_P(ai,bi,ap,bp,xj,yj,xq,yq): + ep = ( math.sqrt(pow(ai-xj,2)+pow(bi-yj,2)) + math.sqrt(pow(ap-xq,2)+pow(bp-yq,2)))/2 + return ep + +def EDM(G1,G2,w2,w3,w4): + cost_matrix = [] + for e1 in G1.edges(): + ai = G1.nodes[e1[0]]['x'] + bi = G1.nodes[e1[0]]['y'] + ap = G1.nodes[e1[1]]['x'] + bp = G1.nodes[e1[1]]['y'] + row=[] + for e2 in G2.edges(): + xj = G2.nodes[e2[0]]['x'] + yj = G2.nodes[e2[0]]['y'] + xq = G2.nodes[e2[1]]['x'] + yq = G2.nodes[e2[1]]['y'] + edm = E_A(ai,bi,ap,bp,xj,yj,xq,yq) \ + + E_L(ai,bi,ap,bp,xj,yj,xq,yq) \ + + E_P(ai,bi,ap,bp,xj,yj,xq,yq) + row.append(edm) + cost_matrix.append(row) + cost_matrix = np.array(cost_matrix) + row_ind, col_ind = linear_sum_assignment(cost_matrix) + EDM_distance = 0 + le1 = list(G1.edges()) + le2 = list (G2.edges()) + for i in range(0,row_ind.shape[0]): + e1 = le1[row_ind[i]] + e2 = le2[col_ind[i]] + ai = G1.nodes[e1[0]]['x'] + bi = G1.nodes[e1[0]]['y'] + ap = G1.nodes[e1[1]]['x'] + bp = G1.nodes[e1[1]]['y'] + xj = G2.nodes[e2[0]]['x'] + yj = G2.nodes[e2[0]]['y'] + xq = G2.nodes[e2[1]]['x'] + yp = G2.nodes[e2[1]]['y'] + EDM_distance += w2*E_A(ai,bi,ap,bp,xj,yj,xq,yq) \ + + w3*E_L(ai,bi,ap,bp,xj,yj,xq,yq) \ + + w4*E_P(ai,bi,ap,bp,xj,yj,xq,yq) + + return EDM_distance + +def VD(G1,G2,w1): + cost_matrix = [] + for v1 in G1.nodes(): + ai = G1.nodes[v1]['x'] + bi = G1.nodes[v1]['y'] + row=[] + for v2 in G2.nodes(): + xj = G2.nodes[v2]['x'] + yj = G2.nodes[v2]['y'] + vd = math.sqrt(pow(ai-xj,2)+pow(bi-yj,2)) + row.append(vd) + cost_matrix.append(row) + cost_matrix = np.array(cost_matrix) + row_ind, col_ind = linear_sum_assignment(cost_matrix) + return w1*cost_matrix[row_ind, col_ind].sum() + + + +if __name__ == "__main__": + sys.setrecursionlimit(10000) + original_graph_path = sys.argv[1] + output_file_result = sys.argv[2] + nb_items_per_class = int(sys.argv[3]) + + class_labels={0:"C1",1:"C2",2:"C3",3:"C4",4:"C5",5:"C6",6:"C7",7:"C8",8:"C9",9:"C10"} + + dict_graphs = {} + + + + + dict_graphs = {} + print("reading graphs") + start = time.time() + for (dirpath, dirname, filenames) in walk(original_graph_path): + for filename in filenames: + GG = read_graph_from_xml(original_graph_path+"\\"+filename) + dict_graphs[filename]= GG + print("computing distance matrix") + mat_distance = {} + start = time.time() + dist_map={} + list_graphs_names = list(dict_graphs.keys()) + for i in range(len(list_graphs_names)): + dist_map[i]={} + for i in range(len(list_graphs_names)): + print(i) + dist_map[i][i]=0 + for j in range(i+1,len(list_graphs_names)): + dij = Dwivedi_GDM(dict_graphs[list_graphs_names[i]],dict_graphs[list_graphs_names[j]],0.35,0.23,0.11,0.31) + dist_map[i][j]=dij + dist_map[j][i]=dij + + for i in range(len(list_graphs_names)): + #print(list_graphs_names[i]) + mat_distance[list_graphs_names[i]]=[] + for j in range(len(list_graphs_names)): + mat_distance[list_graphs_names[i]].append(dist_map[i][j]) #geometric_graph_distance(Mats[list_graphs_names[i]],Mats[list_graphs_names[j]],5)) + print("Computing distances has took " + str((time.time() - start)) + "sec") + print("printing cost matrix") + file = open(output_file_result,"w") + i=0 + for name in list_graphs_names: + file.write(str(class_labels[i//nb_items_per_class])+":"+str(mat_distance[name])+"\n") + i+=1 + #print(mat_distance[name]) + print("end") + + + diff --git a/codes/SED_approach.py b/codes/SED_approach.py new file mode 100644 index 0000000..456fc22 --- /dev/null +++ b/codes/SED_approach.py @@ -0,0 +1,176 @@ +import networkx as nx +from lxml import etree as ET +import numpy as np +import math +from scipy.spatial import distance +from os import walk +import operator +import sys +import time +from scipy.optimize import linear_sum_assignment +import collections + + +""" Input Outpout functions """ +def read_graph_from_xml(file_path): + G = nx.Graph() + nodes={} + tree = ET.parse(file_path) + root = tree.getroot() + id = 0 + for graph in root: + for element in graph: + if element.tag == "node": + nodes[str(element.attrib['id'])]=id + i = 0 + x_v=y_v=0 + for attrib in element: + for value in attrib: + if i==0: + x_v=float(value.text) + else: + y_v=float(value.text) + i+=1 + G.add_node(id,x=x_v,y=y_v) + id+=1 + elif element.tag=="edge" :# it's an edge + source = nodes[str(element.attrib['from'])] + target = nodes[str(element.attrib["to"])] + #distance = float(element.attrib["weight"]) + distance = math.sqrt(math.pow(G.nodes[source]['x']-G.nodes[target]['x'],2) + math.pow(G.nodes[source]['y']-G.nodes[target]['y'],2)) + G.add_edge(source,target,weight=distance) + distance = math.sqrt(math.pow(G.nodes[0]['x']-G.nodes[len(G.nodes())-1]['x'],2) + math.pow(G.nodes[0]['y']-G.nodes[len(G.nodes())-1]['y'],2)) + G.add_edge(0,len(G.nodes())-1,weight=distance) + return G +""" String edit distance functions """ + +def angle_between(x1,y1,x2,y2): + angle = math.atan2(x1*y2-y1*x2,x1*x2+y1*y2) + if (angle < 0 ): + angle += 2*math.pi + return angle + + +def path_to_string(G): + path_string = [] + edges_list = list(G.edges()) + for i in range(1,len(edges_list)): + e_i = edges_list[i-1] + e_j = edges_list[i] + x1 = G.nodes[e_i[0]]['x']- G.nodes[e_i[1]]['x'] + y1 = G.nodes[e_i[0]]['y']- G.nodes[e_i[1]]['y'] + x2 = G.nodes[e_j[1]]['x']- G.nodes[e_j[0]]['x'] + y2 = G.nodes[e_j[1]]['y']- G.nodes[e_j[0]]['y'] + path_string.append(angle_between(x1,y1,x2,y2)) + return path_string + +def cost_insertion(a): + return abs(a) +def cost_deletion(a): + return abs(a) +def cost_subtitution(a,b): + return abs(a-b) + + + +def cyclic_SED(PS1,PS2): + cyc_sed = string_edit_distance(PS1,PS2) + for i in range(1,len(PS2)): + PS3 = collections.deque(PS2) + PS3.rotate(i) + PS3 = list(PS3) + dist = string_edit_distance(PS1,PS3) + if (dist < cyc_sed): + cyc_sed = dist + return cyc_sed + + + + +def string_edit_distance(PS1,PS2): + """ Compute the minimum edit distance + between two strings using the + Wagner-Fischer algorithm (Dynamic programming)""" + + # Create (m+1)x(n+1) matrix + cost_matrix = [ [ 0 for j in range(0, len(PS2) +1)] + for i in range(0, len(PS1) +1) + ] + # Initialisation + for i in range(0, len(PS1) +1): + if i == 0: + cost_matrix[i][0] = 0 + else: + cost_matrix[i][0] = cost_deletion(PS1[i-1]) + cost_matrix[i-1][0] + + # Initialisation + for j in range(0, len(PS2) +1): + if j==0: + cost_matrix[0][j] = 0 + else: + cost_matrix[0][j] = cost_insertion(PS2[j-1]) + cost_matrix[0][j-1] + for i in range(1, len(PS1) +1): + for j in range(1, len(PS2) +1): + S1Index = i - 1 + S2Index = j - 1 + costs= [ cost_matrix[i][j-1] + cost_insertion(PS2[S2Index]), + cost_matrix[i-1][j] + cost_deletion(PS1[S1Index]), + cost_matrix[i-1][j-1] + cost_subtitution(PS1[S1Index],PS2[S2Index]) + ] + costs.sort() + cost_matrix[i][j] = costs[0] + return cost_matrix[len(PS1)][len(PS2)] + + + +if __name__ == "__main__": + sys.setrecursionlimit(10000) + sys.setrecursionlimit(10000) + original_graph_path = sys.argv[1] + output_file_result = sys.argv[2] + nb_items_per_class = int(sys.argv[3]) + + class_labels={0:"C1",1:"C2",2:"C3",3:"C4",4:"C5",5:"C6",6:"C7",7:"C8",8:"C9",9:"C10"} + + dict_graphs = {} + + + print("reading graphs") + start = time.time() + for (dirpath, dirname, filenames) in walk(original_graph_path): + for filename in filenames: + GG = read_graph_from_xml(original_graph_path+"\\"+filename) + dict_graphs[filename]= GG + print("computing distance matrix") + mat_distance = {} + start = time.time() + dist_map={} + list_graphs_names = list(dict_graphs.keys()) + dict_strings = {} + for i in range(len(list_graphs_names)): + dict_strings[list_graphs_names[i]] = path_to_string(dict_graphs[list_graphs_names[i]]) + dist_map[i]={} + + for i in range(len(list_graphs_names)): + print(i) + dist_map[i][i]=0 + for j in range(i+1,len(list_graphs_names)): + dij = cyclic_SED(dict_strings[list_graphs_names[i]],dict_strings[list_graphs_names[j]]) + dist_map[i][j]=dij + dist_map[j][i]=dij + + for i in range(len(list_graphs_names)): + #print(list_graphs_names[i]) + mat_distance[list_graphs_names[i]]=[] + for j in range(len(list_graphs_names)): + mat_distance[list_graphs_names[i]].append(dist_map[i][j]) + print("Computing distances has took " + str((time.time() - start)) + "sec") + print("printing cost matrix") + file = open(output_file_result,"w") + label={0:"C1",1:"C2",2:"C3",3:"C4",4:"C5",5:"C6",6:"C7",7:"C8",8:"C9",9:"C10"} + i=0 + for name in list_graphs_names: + file.write(str(class_labels[i//nb_items_per_class])+":"+str(mat_distance[name])+"\n") + i+=1 + #print(mat_distance[name]) + print("end") \ No newline at end of file -- GitLab