From 14dbdd2b908ec17aab4c841496dcca8ad9ba02ad Mon Sep 17 00:00:00 2001 From: Yassin <aliyassin4@hotmail.com> Date: Sun, 7 May 2023 03:51:44 +0200 Subject: [PATCH] Updating HSS --- netbone/structural/high_salience_skeleton.py | 131 ++++++++++--------- 1 file changed, 72 insertions(+), 59 deletions(-) diff --git a/netbone/structural/high_salience_skeleton.py b/netbone/structural/high_salience_skeleton.py index 0a9bbdd..39b946c 100644 --- a/netbone/structural/high_salience_skeleton.py +++ b/netbone/structural/high_salience_skeleton.py @@ -7,72 +7,85 @@ from netbone.backbone import Backbone from netbone.filters import boolean_filter, threshold_filter, fraction_filter # algo: high_salience_skeleton.py -warnings.filterwarnings('ignore') def high_salience_skeleton(data): - # sys.stderr.write("Calculating HSS score...\n") - undirected=True - return_self_loops=False - + graph = data.copy() if isinstance(data, pd.DataFrame): - table = data.copy() - elif isinstance(data, nx.Graph): - table = nx.to_pandas_edgelist(data) + data = nx.from_pandas_edgelist(data) else: print("data should be a panads dataframe or nx graph") return - - table["distance"] = 1.0 / table["weight"] - nodes = set(table["source"]) | set(table["target"]) - G = nx.from_pandas_edgelist( - table, source="source", target="target", edge_attr="distance", create_using=nx.DiGraph()) - cs = defaultdict(float) - for s in nodes: - pred = defaultdict(list) - dist = {t: float("inf") for t in nodes} - dist[s] = 0.0 - Q = defaultdict(list) - for w in dist: - Q[dist[w]].append(w) - S = [] - while len(Q) > 0: - v = Q[min(Q.keys())].pop(0) - S.append(v) - for _, w, l in G.edges(nbunch=[v, ], data=True): - new_distance = dist[v] + l["distance"] - if dist[w] > new_distance: - Q[dist[w]].remove(w) - dist[w] = new_distance - Q[dist[w]].append(w) - pred[w] = [] - if dist[w] == new_distance: - pred[w].append(v) - while len(S) > 0: - w = S.pop() - for v in pred[w]: - cs[(v, w)] += 1.0 - Q = defaultdict(list, {k: v for k, v in Q.items() if len(v) > 0}) - table["score"] = table.apply( - lambda x: cs[(x["source"], x["target"])] / len(nodes), axis=1) - if not return_self_loops: - table = table[table["source"] != table["target"]] - if undirected: - table["edge"] = table.apply( - lambda x: "%s-%s" % (min(x["source"], x["target"]), max(x["source"], x["target"])), axis=1) - table_maxscore = table.groupby(by="edge")["score"].sum().reset_index() - table = table.merge(table_maxscore, on="edge", suffixes=("_min", "")) - table = table.drop_duplicates(subset=["edge"]) - table = table.drop("edge", 1) - table = table.drop("score_min", 1) - G = nx.from_pandas_edgelist(table, edge_attr=['weight', 'score']) - for u,v in G.edges(): - if G[u][v]['score']>=0.8: - G[u][v]['high_salience_skeleton'] = True - else: - G[u][v]['high_salience_skeleton'] = False + wes= nx.get_edge_attributes(graph, 'weight') + values = {pair:1/wes[pair] for pair in wes} + nx.set_edge_attributes(graph, values, name='distance') - return Backbone(G, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter]) - # return table[["source", "target", "weight", "score"]], "high_salience_skeleton" + nx.set_edge_attributes(graph, 0, name='score') + for source in graph.nodes(): + tree = nx.single_source_dijkstra(graph, source, cutoff=None, weight='distance')[1] + paths = list(tree.values())[1:] + for path in paths: + pairs = [(path[i], path[i+1]) for i in range(len(path) - 1)] + for u,v in pairs: + graph[u][v]['score'] +=1 + + scores= nx.get_edge_attributes(graph, 'score') + N = len(graph) + values = {pair:scores[pair]/N for pair in scores} + nx.set_edge_attributes(graph, values, name='score') + return Backbone(graph, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter]) + + # table["distance"] = 1.0 / table["weight"] + # nodes = set(table["source"]) | set(table["target"]) + # G = nx.from_pandas_edgelist( + # table, source="source", target="target", edge_attr="distance", create_using=nx.DiGraph()) + # cs = defaultdict(float) + # for s in nodes: + # pred = defaultdict(list) + # dist = {t: float("inf") for t in nodes} + # dist[s] = 0.0 + # Q = defaultdict(list) + # for w in dist: + # Q[dist[w]].append(w) + # S = [] + # while len(Q) > 0: + # v = Q[min(Q.keys())].pop(0) + # S.append(v) + # for _, w, l in G.edges(nbunch=[v, ], data=True): + # new_distance = dist[v] + l["distance"] + # if dist[w] > new_distance: + # Q[dist[w]].remove(w) + # dist[w] = new_distance + # Q[dist[w]].append(w) + # pred[w] = [] + # if dist[w] == new_distance: + # pred[w].append(v) + # while len(S) > 0: + # w = S.pop() + # for v in pred[w]: + # cs[(v, w)] += 1.0 + # Q = defaultdict(list, {k: v for k, v in Q.items() if len(v) > 0}) + # table["score"] = table.apply( + # lambda x: cs[(x["source"], x["target"])] / len(nodes), axis=1) + # if not return_self_loops: + # table = table[table["source"] != table["target"]] + # if undirected: + # table["edge"] = table.apply( + # lambda x: "%s-%s" % (min(x["source"], x["target"]), max(x["source"], x["target"])), axis=1) + # table_maxscore = table.groupby(by="edge")["score"].sum().reset_index() + # table = table.merge(table_maxscore, on="edge", suffixes=("_min", "")) + # table = table.drop_duplicates(subset=["edge"]) + # table = table.drop("edge", 1) + # table = table.drop("score_min", 1) + # + # G = nx.from_pandas_edgelist(table, edge_attr=['weight', 'score']) + # for u,v in G.edges(): + # if G[u][v]['score']>=0.8: + # G[u][v]['high_salience_skeleton'] = True + # else: + # G[u][v]['high_salience_skeleton'] = False + # + # return Backbone(G, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter]) + # # return table[["source", "target", "weight", "score"]], "high_salience_skeleton" -- GitLab