From f0dd309201028ae87b69bc8b88c4b85b2f33ee58 Mon Sep 17 00:00:00 2001 From: Yassin <aliyassin4@hotmail.com> Date: Sun, 7 May 2023 04:31:03 +0200 Subject: [PATCH] Updating HSS --- netbone/structural/high_salience_skeleton.py | 224 +++++++++++++------ 1 file changed, 151 insertions(+), 73 deletions(-) diff --git a/netbone/structural/high_salience_skeleton.py b/netbone/structural/high_salience_skeleton.py index 39b946c..aed9f97 100644 --- a/netbone/structural/high_salience_skeleton.py +++ b/netbone/structural/high_salience_skeleton.py @@ -7,85 +7,163 @@ from netbone.backbone import Backbone from netbone.filters import boolean_filter, threshold_filter, fraction_filter # algo: high_salience_skeleton.py +warnings.filterwarnings('ignore') def high_salience_skeleton(data): - graph = data.copy() + # sys.stderr.write("Calculating HSS score...\n") + undirected=True + return_self_loops=False + if isinstance(data, pd.DataFrame): - data = nx.from_pandas_edgelist(data) + table = data.copy() + elif isinstance(data, nx.Graph): + table = nx.to_pandas_edgelist(data) else: print("data should be a panads dataframe or nx graph") return - - wes= nx.get_edge_attributes(graph, 'weight') - values = {pair:1/wes[pair] for pair in wes} - nx.set_edge_attributes(graph, values, name='distance') - - nx.set_edge_attributes(graph, 0, name='score') - for source in graph.nodes(): - tree = nx.single_source_dijkstra(graph, source, cutoff=None, weight='distance')[1] - paths = list(tree.values())[1:] - for path in paths: - pairs = [(path[i], path[i+1]) for i in range(len(path) - 1)] - for u,v in pairs: - graph[u][v]['score'] +=1 - - scores= nx.get_edge_attributes(graph, 'score') - N = len(graph) - values = {pair:scores[pair]/N for pair in scores} - nx.set_edge_attributes(graph, values, name='score') - return Backbone(graph, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter]) - # table["distance"] = 1.0 / table["weight"] - # nodes = set(table["source"]) | set(table["target"]) - # G = nx.from_pandas_edgelist( - # table, source="source", target="target", edge_attr="distance", create_using=nx.DiGraph()) - # cs = defaultdict(float) - # for s in nodes: - # pred = defaultdict(list) - # dist = {t: float("inf") for t in nodes} - # dist[s] = 0.0 - # Q = defaultdict(list) - # for w in dist: - # Q[dist[w]].append(w) - # S = [] - # while len(Q) > 0: - # v = Q[min(Q.keys())].pop(0) - # S.append(v) - # for _, w, l in G.edges(nbunch=[v, ], data=True): - # new_distance = dist[v] + l["distance"] - # if dist[w] > new_distance: - # Q[dist[w]].remove(w) - # dist[w] = new_distance - # Q[dist[w]].append(w) - # pred[w] = [] - # if dist[w] == new_distance: - # pred[w].append(v) - # while len(S) > 0: - # w = S.pop() - # for v in pred[w]: - # cs[(v, w)] += 1.0 - # Q = defaultdict(list, {k: v for k, v in Q.items() if len(v) > 0}) - # table["score"] = table.apply( - # lambda x: cs[(x["source"], x["target"])] / len(nodes), axis=1) - # if not return_self_loops: - # table = table[table["source"] != table["target"]] - # if undirected: - # table["edge"] = table.apply( - # lambda x: "%s-%s" % (min(x["source"], x["target"]), max(x["source"], x["target"])), axis=1) - # table_maxscore = table.groupby(by="edge")["score"].sum().reset_index() - # table = table.merge(table_maxscore, on="edge", suffixes=("_min", "")) - # table = table.drop_duplicates(subset=["edge"]) - # table = table.drop("edge", 1) - # table = table.drop("score_min", 1) - # - # G = nx.from_pandas_edgelist(table, edge_attr=['weight', 'score']) - # for u,v in G.edges(): - # if G[u][v]['score']>=0.8: - # G[u][v]['high_salience_skeleton'] = True - # else: - # G[u][v]['high_salience_skeleton'] = False - # - # return Backbone(G, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter]) - # # return table[["source", "target", "weight", "score"]], "high_salience_skeleton" + table["distance"] = 1.0 / table["weight"] + nodes = set(table["source"]) | set(table["target"]) + G = nx.from_pandas_edgelist( + table, source="source", target="target", edge_attr="distance", create_using=nx.DiGraph()) + cs = defaultdict(float) + for s in nodes: + pred = defaultdict(list) + dist = {t: float("inf") for t in nodes} + dist[s] = 0.0 + Q = defaultdict(list) + for w in dist: + Q[dist[w]].append(w) + S = [] + while len(Q) > 0: + v = Q[min(Q.keys())].pop(0) + S.append(v) + for _, w, l in G.edges(nbunch=[v, ], data=True): + new_distance = dist[v] + l["distance"] + if dist[w] > new_distance: + Q[dist[w]].remove(w) + dist[w] = new_distance + Q[dist[w]].append(w) + pred[w] = [] + if dist[w] == new_distance: + pred[w].append(v) + while len(S) > 0: + w = S.pop() + for v in pred[w]: + cs[(v, w)] += 1.0 + Q = defaultdict(list, {k: v for k, v in Q.items() if len(v) > 0}) + table["score"] = table.apply( + lambda x: cs[(x["source"], x["target"])] / len(nodes), axis=1) + if not return_self_loops: + table = table[table["source"] != table["target"]] + if undirected: + table["edge"] = table.apply( + lambda x: "%s-%s" % (min(x["source"], x["target"]), max(x["source"], x["target"])), axis=1) + table_maxscore = table.groupby(by="edge")["score"].sum().reset_index() + table = table.merge(table_maxscore, on="edge", suffixes=("_min", "")) + table = table.drop_duplicates(subset=["edge"]) + table = table.drop("edge", 1) + table = table.drop("score_min", 1) + + G = nx.from_pandas_edgelist(table, edge_attr=['weight', 'score']) + for u,v in G.edges(): + if G[u][v]['score']>=0.8: + G[u][v]['high_salience_skeleton'] = True + else: + G[u][v]['high_salience_skeleton'] = False + return Backbone(G, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter]) + # return table[["source", "target", "weight", "score"]], "high_salience_skeleton" +# +# +# from collections import defaultdict +# import networkx as nx +# import pandas as pd +# import warnings +# from netbone.backbone import Backbone +# from netbone.filters import boolean_filter, threshold_filter, fraction_filter +# +# # algo: high_salience_skeleton.py +# +# def high_salience_skeleton(data): +# graph = data.copy() +# if isinstance(data, pd.DataFrame): +# data = nx.from_pandas_edgelist(data) +# else: +# print("data should be a panads dataframe or nx graph") +# return +# +# wes= nx.get_edge_attributes(graph, 'weight') +# values = {pair:1/wes[pair] for pair in wes} +# nx.set_edge_attributes(graph, values, name='distance') +# +# nx.set_edge_attributes(graph, 0, name='score') +# for source in graph.nodes(): +# tree = nx.single_source_dijkstra(graph, source, cutoff=None, weight='distance')[1] +# paths = list(tree.values())[1:] +# for path in paths: +# pairs = [(path[i], path[i+1]) for i in range(len(path) - 1)] +# for u,v in pairs: +# graph[u][v]['score'] +=1 +# +# scores= nx.get_edge_attributes(graph, 'score') +# N = len(graph) +# values = {pair:scores[pair]/N for pair in scores} +# nx.set_edge_attributes(graph, values, name='score') +# return Backbone(graph, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter]) +# +# # table["distance"] = 1.0 / table["weight"] +# # nodes = set(table["source"]) | set(table["target"]) +# # G = nx.from_pandas_edgelist( +# # table, source="source", target="target", edge_attr="distance", create_using=nx.DiGraph()) +# # cs = defaultdict(float) +# # for s in nodes: +# # pred = defaultdict(list) +# # dist = {t: float("inf") for t in nodes} +# # dist[s] = 0.0 +# # Q = defaultdict(list) +# # for w in dist: +# # Q[dist[w]].append(w) +# # S = [] +# # while len(Q) > 0: +# # v = Q[min(Q.keys())].pop(0) +# # S.append(v) +# # for _, w, l in G.edges(nbunch=[v, ], data=True): +# # new_distance = dist[v] + l["distance"] +# # if dist[w] > new_distance: +# # Q[dist[w]].remove(w) +# # dist[w] = new_distance +# # Q[dist[w]].append(w) +# # pred[w] = [] +# # if dist[w] == new_distance: +# # pred[w].append(v) +# # while len(S) > 0: +# # w = S.pop() +# # for v in pred[w]: +# # cs[(v, w)] += 1.0 +# # Q = defaultdict(list, {k: v for k, v in Q.items() if len(v) > 0}) +# # table["score"] = table.apply( +# # lambda x: cs[(x["source"], x["target"])] / len(nodes), axis=1) +# # if not return_self_loops: +# # table = table[table["source"] != table["target"]] +# # if undirected: +# # table["edge"] = table.apply( +# # lambda x: "%s-%s" % (min(x["source"], x["target"]), max(x["source"], x["target"])), axis=1) +# # table_maxscore = table.groupby(by="edge")["score"].sum().reset_index() +# # table = table.merge(table_maxscore, on="edge", suffixes=("_min", "")) +# # table = table.drop_duplicates(subset=["edge"]) +# # table = table.drop("edge", 1) +# # table = table.drop("score_min", 1) +# # +# # G = nx.from_pandas_edgelist(table, edge_attr=['weight', 'score']) +# # for u,v in G.edges(): +# # if G[u][v]['score']>=0.8: +# # G[u][v]['high_salience_skeleton'] = True +# # else: +# # G[u][v]['high_salience_skeleton'] = False +# # +# # return Backbone(G, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter]) +# # # return table[["source", "target", "weight", "score"]], "high_salience_skeleton" +# +# -- GitLab