diff --git a/quick_del_edges.py b/quick_del_edges.py new file mode 100644 index 0000000000000000000000000000000000000000..a81047a069221822fd5719c76b32d57e10877e92 --- /dev/null +++ b/quick_del_edges.py @@ -0,0 +1,300 @@ +from math import ceil + +def quick_quasi_cliques(G, gamma, min_size): + + def test_indeg_exdeg(X, cand_exts, indeg_X, exdeg_X): + "verify that indeg_X[u] = d_X(u) and exdeg_X[u] = d_cand_exts(u) for all u, used only when debug = True" + + for u in (set(X) | set(cand_exts)): + if indeg_X[u] != len(set(X) & set(G[u])): + print("Erreur indeg", indeg_X[u],len(set(X) & set(G[u]))) + print(X,cand_exts) + return 1/0 + if exdeg_X[u] != len(set(cand_exts) & set(G[u])): + print("Erreur exdeg", exdeg_X[u],len(set(cand_exts) & set(G[u]))) + print(X,cand_exts) + return 1/0 + + #print("ok",X,cand_exts) + + def is_quasi_clique(X, indeg_X, exdeg_X, bool_all_vertices = False): #bool_all_vertices = True if all current vertices are in X, False otherwise + "verify that X is a gamma quasi-clique of G" + + if len(X) < min_size: + return False + + seuil = ceil(gamma*(len(X)-1)) + + for u in X: + d_X_u = indeg_X[u] + (exdeg_X[u] if bool_all_vertices else 0) + if debug and (d_X_u != len(set(X) & set(G[u]))): + print("Erreur") + return 1/0 + if d_X_u < seuil: + return False + + return True + + def neighbourhood(v, k, current_vertices): + "compute the list of vertices k hops a far from v" + + current_vertices = set(current_vertices) + dist = {} + + for u in current_vertices: + dist[u] = -1 + dist[v] = 0 + l = [v] + N_k_v = [] + + while len(l)>0: + + u = l.pop(0) + N_k_v.append(u) + + if dist[u]<k: + for u2 in current_vertices & set(G[u]): + if dist[u2] == -1: + l.append(u2) + dist[u2] = dist[u]+1 + + return N_k_v + + def upper_lower_bounds(X, cand_exts, indeg_X, exdeg_X): + "compute L_X and U_X" + + if debug: + test_indeg_exdeg(X, cand_exts, indeg_X, exdeg_X) + + deg_min_X = min([indeg_X[u]+exdeg_X[u] for u in X]) + U_X_min = min(int(deg_min_X/gamma)+1 - len(X), len(cand_exts)) + + indeg_cand_exts_sorted = sorted([indeg_X[u] for u in cand_exts], key=lambda x: -x) # on trie cand_exts par indeg_X decroissant + + sum_X_indeg_X = sum([indeg_X[u] for u in X]) + sum_cand_exts = sum(indeg_cand_exts_sorted[:U_X_min]) + + flag = False + + for t in range(U_X_min,0,-1): + if sum_X_indeg_X+sum_cand_exts >= (len(X) * ceil(gamma*(len(X)+t-1))): + U_X = t + flag = True + break + sum_cand_exts -= indeg_cand_exts_sorted[t-1] + + if not flag: + U_X = 0 + + indeg_min_X = min([indeg_X[u] for u in X]) + + t = 0 + while (indeg_min_X + t) < ceil(gamma*(len(X)+t-1)) and t<=len(cand_exts): + t += 1 + L_X_min = t + + flag = False + sum_cand_exts = sum(indeg_cand_exts_sorted[:L_X_min-1]) + + for t in range(L_X_min, len(cand_exts)+1): + + sum_cand_exts += indeg_cand_exts_sorted[t-1] + if sum_X_indeg_X+sum_cand_exts >= (len(X) * ceil(gamma*(len(X)+t-1))): + L_X = t + flag = True + break + + if not flag: + L_X = len(cand_exts)+1 + + return U_X, L_X + + def compute_indeg_exdeg(X,cand_exts): + "compute indeg and exdeg" + + X = set(X) + cand_exts = set(cand_exts) + + indeg = {} + exdeg = {} + + for u in (X | cand_exts): + N_u = set(G[u]) + indeg[u] = len(X & N_u) + exdeg[u] = len(cand_exts & N_u) + + return indeg,exdeg + + def update_indeg_exdeg(u,X,cand_exts,indeg,exdeg,update): + "update indeg and exdeg" + + if update=='del_X': # remove u from X + indeg.pop(u) + exdeg.pop(u) + + for neigh_u in ((set(X) | set(cand_exts)) & set(G[u])): + indeg[neigh_u] -= 1 + + if update=='del_cand_exts': # remove u from cand_exts + indeg.pop(u) + exdeg.pop(u) + + for neigh_u in ((set(X) | set(cand_exts)) & set(G[u])): + exdeg[neigh_u] -= 1 + + + + + def aux_quick(X,cand_exts, indeg_X, exdeg_X): # indeg_X[i] = nb of neigbhours of i in X + # exdeg_X[i] = nb of nieghbours of i in cand_exts + + nonlocal last_clique + global id_rec + + if debug: + print("id_rec:", id_rec) + + id_rec += 1 + + b_has_qclq = False + + while len(X)+len(cand_exts) >= min_size: + + current_vertices = set(X+cand_exts) + + if current_vertices.issubset(last_clique): + return True + + if is_quasi_clique(current_vertices, indeg_X, exdeg_X, True): + Quasi_Cliques.append(current_vertices) + last_clique = current_vertices + current_vertices = list(current_vertices) + G.remove_edges_from([(current_vertices[i], current_vertices[j]) for i in range(1,len(current_vertices)) for j in range(i)]) + return True + + if len(cand_exts) == 0: + return b_has_qclq + + + v = cand_exts.pop(0) + Y = X+[v] + cand_exts_Y = list(set(cand_exts) & set(neighbourhood(v,2,current_vertices))) + + indeg_Y, exdeg_Y = compute_indeg_exdeg(Y,cand_exts_Y) + + flag_repeat_until = (len(cand_exts_Y) > 0) + U_Y, L_Y = len(cand_exts_Y), 0 + + while flag_repeat_until: + + U_Y, L_Y = upper_lower_bounds(Y, cand_exts_Y, indeg_Y, exdeg_Y) + + if L_Y > U_Y: + flag_repeat_until = False + break + + for u in X: + maxi = max(exdeg_Y[u], L_Y) + cond = (indeg_Y[u]+exdeg_Y[u] < ceil(gamma * (len(Y)+maxi-1))) or (indeg_Y[u]+U_Y < ceil(gamma*(len(Y)+U_Y-1))) + if cond: + cand_exts_Y = [] + break + + + flag_repeat_until = False + + cand_exts_Y_copy = cand_exts_Y.copy() + + for u in cand_exts_Y_copy: + maxi = max(exdeg_Y[u], L_Y) + cond = (indeg_Y[u]+exdeg_Y[u] < ceil(gamma * (len(Y)+maxi-1))) or (indeg_Y[u]+U_Y < ceil(gamma*(len(Y)+U_Y-1))) + if cond: + flag_repeat_until = True + cand_exts_Y.remove(u) + update_indeg_exdeg(u,Y,cand_exts_Y,indeg_Y,exdeg_Y,'del_cand_exts') + + if debug: + test_indeg_exdeg(Y,cand_exts_Y,indeg_Y,exdeg_Y) + + + flag_repeat_until = flag_repeat_until and (len(cand_exts_Y) > 0) and (len(cand_exts_Y) >= L_Y) + + b_has_superqclq = False + + if (L_Y <= U_Y) and (len(cand_exts_Y) >= L_Y) and (len(Y)+len(cand_exts_Y) >= min_size): + + b_has_superqclq = aux_quick(Y, cand_exts_Y, indeg_Y, exdeg_Y) + b_has_qclq = b_has_qclq or b_has_superqclq + + if (not b_has_superqclq) and is_quasi_clique(Y, indeg_Y, exdeg_Y, False): + set_Y = set(Y) + if not (set_Y.issubset(last_clique)): + Quasi_Cliques.append(set_Y) + last_clique = set_Y + G.remove_edges_from([(Y[i], Y[j]) for i in range(1,len(Y)) for j in range(i)]) + + b_has_qclq = True + + + indeg_X, exdeg_X = compute_indeg_exdeg(X,cand_exts) + + if debug: + test_indeg_exdeg(X,cand_exts,indeg_X,exdeg_X) + + return b_has_qclq + + Quasi_Cliques = [] + last_clique = set() # last_clique is used to avoid some non maximal quasi-cliques + + indeg_X = {} # indeg_X[i] is the number of neighbours of i in X + exdeg_X = {} # exdeg_X[i] is the number of neighbours of i in cand_exts + + for u in G.nodes: + indeg_X[u] = 0 + exdeg_X[u] = len(G[u]) + + aux_quick([], list(G.nodes), indeg_X, exdeg_X) + + return Quasi_Cliques + + + +debug = False # significantly slow down the processing, use only when debugging +id_rec = 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +