diff --git a/netbone/structural/high_salience_skeleton.py b/netbone/structural/high_salience_skeleton.py index aed9f976f03ca615833b6344681810b575f308c7..77bb084dd05de67097f142b660fad80ce2aadccc 100644 --- a/netbone/structural/high_salience_skeleton.py +++ b/netbone/structural/high_salience_skeleton.py @@ -6,164 +6,35 @@ import warnings from netbone.backbone import Backbone from netbone.filters import boolean_filter, threshold_filter, fraction_filter -# algo: high_salience_skeleton.py -warnings.filterwarnings('ignore') - +# change distance def high_salience_skeleton(data): - # sys.stderr.write("Calculating HSS score...\n") - undirected=True - return_self_loops=False - + graph = data.copy() if isinstance(data, pd.DataFrame): - table = data.copy() - elif isinstance(data, nx.Graph): - table = nx.to_pandas_edgelist(data) + graph = nx.from_pandas_edgelist(data) else: print("data should be a panads dataframe or nx graph") return - - table["distance"] = 1.0 / table["weight"] - nodes = set(table["source"]) | set(table["target"]) - G = nx.from_pandas_edgelist( - table, source="source", target="target", edge_attr="distance", create_using=nx.DiGraph()) - cs = defaultdict(float) - for s in nodes: - pred = defaultdict(list) - dist = {t: float("inf") for t in nodes} - dist[s] = 0.0 - Q = defaultdict(list) - for w in dist: - Q[dist[w]].append(w) - S = [] - while len(Q) > 0: - v = Q[min(Q.keys())].pop(0) - S.append(v) - for _, w, l in G.edges(nbunch=[v, ], data=True): - new_distance = dist[v] + l["distance"] - if dist[w] > new_distance: - Q[dist[w]].remove(w) - dist[w] = new_distance - Q[dist[w]].append(w) - pred[w] = [] - if dist[w] == new_distance: - pred[w].append(v) - while len(S) > 0: - w = S.pop() - for v in pred[w]: - cs[(v, w)] += 1.0 - Q = defaultdict(list, {k: v for k, v in Q.items() if len(v) > 0}) - table["score"] = table.apply( - lambda x: cs[(x["source"], x["target"])] / len(nodes), axis=1) - if not return_self_loops: - table = table[table["source"] != table["target"]] - if undirected: - table["edge"] = table.apply( - lambda x: "%s-%s" % (min(x["source"], x["target"]), max(x["source"], x["target"])), axis=1) - table_maxscore = table.groupby(by="edge")["score"].sum().reset_index() - table = table.merge(table_maxscore, on="edge", suffixes=("_min", "")) - table = table.drop_duplicates(subset=["edge"]) - table = table.drop("edge", 1) - table = table.drop("score_min", 1) - G = nx.from_pandas_edgelist(table, edge_attr=['weight', 'score']) - for u,v in G.edges(): - if G[u][v]['score']>=0.8: - G[u][v]['high_salience_skeleton'] = True - else: - G[u][v]['high_salience_skeleton'] = False + wes= nx.get_edge_attributes(graph, 'weight') + values = {pair:1/wes[pair] for pair in wes} + nx.set_edge_attributes(graph, values, name='distance') + + nx.set_edge_attributes(graph, 0, name='score') + + for source in graph.nodes(): + tree = nx.single_source_dijkstra(graph, source, cutoff=None, weight='distance')[1] + node_tree_scores = dict() + + paths = list(tree.values())[1:] + for path in paths: + for i in range(len(path) - 1): + node_tree_scores[(path[i], path[i+1])] = 1 - return Backbone(G, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter]) - # return table[["source", "target", "weight", "score"]], "high_salience_skeleton" + for u,v in node_tree_scores: + graph[u][v]['score'] +=1 -# -# -# from collections import defaultdict -# import networkx as nx -# import pandas as pd -# import warnings -# from netbone.backbone import Backbone -# from netbone.filters import boolean_filter, threshold_filter, fraction_filter -# -# # algo: high_salience_skeleton.py -# -# def high_salience_skeleton(data): -# graph = data.copy() -# if isinstance(data, pd.DataFrame): -# data = nx.from_pandas_edgelist(data) -# else: -# print("data should be a panads dataframe or nx graph") -# return -# -# wes= nx.get_edge_attributes(graph, 'weight') -# values = {pair:1/wes[pair] for pair in wes} -# nx.set_edge_attributes(graph, values, name='distance') -# -# nx.set_edge_attributes(graph, 0, name='score') -# for source in graph.nodes(): -# tree = nx.single_source_dijkstra(graph, source, cutoff=None, weight='distance')[1] -# paths = list(tree.values())[1:] -# for path in paths: -# pairs = [(path[i], path[i+1]) for i in range(len(path) - 1)] -# for u,v in pairs: -# graph[u][v]['score'] +=1 -# -# scores= nx.get_edge_attributes(graph, 'score') -# N = len(graph) -# values = {pair:scores[pair]/N for pair in scores} -# nx.set_edge_attributes(graph, values, name='score') -# return Backbone(graph, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter]) -# -# # table["distance"] = 1.0 / table["weight"] -# # nodes = set(table["source"]) | set(table["target"]) -# # G = nx.from_pandas_edgelist( -# # table, source="source", target="target", edge_attr="distance", create_using=nx.DiGraph()) -# # cs = defaultdict(float) -# # for s in nodes: -# # pred = defaultdict(list) -# # dist = {t: float("inf") for t in nodes} -# # dist[s] = 0.0 -# # Q = defaultdict(list) -# # for w in dist: -# # Q[dist[w]].append(w) -# # S = [] -# # while len(Q) > 0: -# # v = Q[min(Q.keys())].pop(0) -# # S.append(v) -# # for _, w, l in G.edges(nbunch=[v, ], data=True): -# # new_distance = dist[v] + l["distance"] -# # if dist[w] > new_distance: -# # Q[dist[w]].remove(w) -# # dist[w] = new_distance -# # Q[dist[w]].append(w) -# # pred[w] = [] -# # if dist[w] == new_distance: -# # pred[w].append(v) -# # while len(S) > 0: -# # w = S.pop() -# # for v in pred[w]: -# # cs[(v, w)] += 1.0 -# # Q = defaultdict(list, {k: v for k, v in Q.items() if len(v) > 0}) -# # table["score"] = table.apply( -# # lambda x: cs[(x["source"], x["target"])] / len(nodes), axis=1) -# # if not return_self_loops: -# # table = table[table["source"] != table["target"]] -# # if undirected: -# # table["edge"] = table.apply( -# # lambda x: "%s-%s" % (min(x["source"], x["target"]), max(x["source"], x["target"])), axis=1) -# # table_maxscore = table.groupby(by="edge")["score"].sum().reset_index() -# # table = table.merge(table_maxscore, on="edge", suffixes=("_min", "")) -# # table = table.drop_duplicates(subset=["edge"]) -# # table = table.drop("edge", 1) -# # table = table.drop("score_min", 1) -# # -# # G = nx.from_pandas_edgelist(table, edge_attr=['weight', 'score']) -# # for u,v in G.edges(): -# # if G[u][v]['score']>=0.8: -# # G[u][v]['high_salience_skeleton'] = True -# # else: -# # G[u][v]['high_salience_skeleton'] = False -# # -# # return Backbone(G, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter]) -# # # return table[["source", "target", "weight", "score"]], "high_salience_skeleton" -# -# + scores= nx.get_edge_attributes(graph, 'score') + N = len(graph) + values = {pair:scores[pair]/N for pair in scores} + nx.set_edge_attributes(graph, values, name='score') + return Backbone(graph, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter])