Skip to content
Snippets Groups Projects
Commit f0dd3092 authored by Yassin's avatar Yassin
Browse files

Updating HSS

parent 14dbdd2b
No related branches found
No related tags found
No related merge requests found
...@@ -7,85 +7,163 @@ from netbone.backbone import Backbone ...@@ -7,85 +7,163 @@ from netbone.backbone import Backbone
from netbone.filters import boolean_filter, threshold_filter, fraction_filter from netbone.filters import boolean_filter, threshold_filter, fraction_filter
# algo: high_salience_skeleton.py # algo: high_salience_skeleton.py
warnings.filterwarnings('ignore')
def high_salience_skeleton(data): def high_salience_skeleton(data):
graph = data.copy() # sys.stderr.write("Calculating HSS score...\n")
undirected=True
return_self_loops=False
if isinstance(data, pd.DataFrame): if isinstance(data, pd.DataFrame):
data = nx.from_pandas_edgelist(data) table = data.copy()
elif isinstance(data, nx.Graph):
table = nx.to_pandas_edgelist(data)
else: else:
print("data should be a panads dataframe or nx graph") print("data should be a panads dataframe or nx graph")
return return
wes= nx.get_edge_attributes(graph, 'weight')
values = {pair:1/wes[pair] for pair in wes}
nx.set_edge_attributes(graph, values, name='distance')
nx.set_edge_attributes(graph, 0, name='score')
for source in graph.nodes():
tree = nx.single_source_dijkstra(graph, source, cutoff=None, weight='distance')[1]
paths = list(tree.values())[1:]
for path in paths:
pairs = [(path[i], path[i+1]) for i in range(len(path) - 1)]
for u,v in pairs:
graph[u][v]['score'] +=1
scores= nx.get_edge_attributes(graph, 'score')
N = len(graph)
values = {pair:scores[pair]/N for pair in scores}
nx.set_edge_attributes(graph, values, name='score')
return Backbone(graph, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter])
# table["distance"] = 1.0 / table["weight"] table["distance"] = 1.0 / table["weight"]
# nodes = set(table["source"]) | set(table["target"]) nodes = set(table["source"]) | set(table["target"])
# G = nx.from_pandas_edgelist( G = nx.from_pandas_edgelist(
# table, source="source", target="target", edge_attr="distance", create_using=nx.DiGraph()) table, source="source", target="target", edge_attr="distance", create_using=nx.DiGraph())
# cs = defaultdict(float) cs = defaultdict(float)
# for s in nodes: for s in nodes:
# pred = defaultdict(list) pred = defaultdict(list)
# dist = {t: float("inf") for t in nodes} dist = {t: float("inf") for t in nodes}
# dist[s] = 0.0 dist[s] = 0.0
# Q = defaultdict(list) Q = defaultdict(list)
# for w in dist: for w in dist:
# Q[dist[w]].append(w) Q[dist[w]].append(w)
# S = [] S = []
# while len(Q) > 0: while len(Q) > 0:
# v = Q[min(Q.keys())].pop(0) v = Q[min(Q.keys())].pop(0)
# S.append(v) S.append(v)
# for _, w, l in G.edges(nbunch=[v, ], data=True): for _, w, l in G.edges(nbunch=[v, ], data=True):
# new_distance = dist[v] + l["distance"] new_distance = dist[v] + l["distance"]
# if dist[w] > new_distance: if dist[w] > new_distance:
# Q[dist[w]].remove(w) Q[dist[w]].remove(w)
# dist[w] = new_distance dist[w] = new_distance
# Q[dist[w]].append(w) Q[dist[w]].append(w)
# pred[w] = [] pred[w] = []
# if dist[w] == new_distance: if dist[w] == new_distance:
# pred[w].append(v) pred[w].append(v)
# while len(S) > 0: while len(S) > 0:
# w = S.pop() w = S.pop()
# for v in pred[w]: for v in pred[w]:
# cs[(v, w)] += 1.0 cs[(v, w)] += 1.0
# Q = defaultdict(list, {k: v for k, v in Q.items() if len(v) > 0}) Q = defaultdict(list, {k: v for k, v in Q.items() if len(v) > 0})
# table["score"] = table.apply( table["score"] = table.apply(
# lambda x: cs[(x["source"], x["target"])] / len(nodes), axis=1) lambda x: cs[(x["source"], x["target"])] / len(nodes), axis=1)
# if not return_self_loops: if not return_self_loops:
# table = table[table["source"] != table["target"]] table = table[table["source"] != table["target"]]
# if undirected: if undirected:
# table["edge"] = table.apply( table["edge"] = table.apply(
# lambda x: "%s-%s" % (min(x["source"], x["target"]), max(x["source"], x["target"])), axis=1) lambda x: "%s-%s" % (min(x["source"], x["target"]), max(x["source"], x["target"])), axis=1)
# table_maxscore = table.groupby(by="edge")["score"].sum().reset_index() table_maxscore = table.groupby(by="edge")["score"].sum().reset_index()
# table = table.merge(table_maxscore, on="edge", suffixes=("_min", "")) table = table.merge(table_maxscore, on="edge", suffixes=("_min", ""))
# table = table.drop_duplicates(subset=["edge"]) table = table.drop_duplicates(subset=["edge"])
# table = table.drop("edge", 1) table = table.drop("edge", 1)
# table = table.drop("score_min", 1) table = table.drop("score_min", 1)
#
# G = nx.from_pandas_edgelist(table, edge_attr=['weight', 'score']) G = nx.from_pandas_edgelist(table, edge_attr=['weight', 'score'])
# for u,v in G.edges(): for u,v in G.edges():
# if G[u][v]['score']>=0.8: if G[u][v]['score']>=0.8:
# G[u][v]['high_salience_skeleton'] = True G[u][v]['high_salience_skeleton'] = True
# else: else:
# G[u][v]['high_salience_skeleton'] = False G[u][v]['high_salience_skeleton'] = False
#
# return Backbone(G, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter])
# # return table[["source", "target", "weight", "score"]], "high_salience_skeleton"
return Backbone(G, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter])
# return table[["source", "target", "weight", "score"]], "high_salience_skeleton"
#
#
# from collections import defaultdict
# import networkx as nx
# import pandas as pd
# import warnings
# from netbone.backbone import Backbone
# from netbone.filters import boolean_filter, threshold_filter, fraction_filter
#
# # algo: high_salience_skeleton.py
#
# def high_salience_skeleton(data):
# graph = data.copy()
# if isinstance(data, pd.DataFrame):
# data = nx.from_pandas_edgelist(data)
# else:
# print("data should be a panads dataframe or nx graph")
# return
#
# wes= nx.get_edge_attributes(graph, 'weight')
# values = {pair:1/wes[pair] for pair in wes}
# nx.set_edge_attributes(graph, values, name='distance')
#
# nx.set_edge_attributes(graph, 0, name='score')
# for source in graph.nodes():
# tree = nx.single_source_dijkstra(graph, source, cutoff=None, weight='distance')[1]
# paths = list(tree.values())[1:]
# for path in paths:
# pairs = [(path[i], path[i+1]) for i in range(len(path) - 1)]
# for u,v in pairs:
# graph[u][v]['score'] +=1
#
# scores= nx.get_edge_attributes(graph, 'score')
# N = len(graph)
# values = {pair:scores[pair]/N for pair in scores}
# nx.set_edge_attributes(graph, values, name='score')
# return Backbone(graph, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter])
#
# # table["distance"] = 1.0 / table["weight"]
# # nodes = set(table["source"]) | set(table["target"])
# # G = nx.from_pandas_edgelist(
# # table, source="source", target="target", edge_attr="distance", create_using=nx.DiGraph())
# # cs = defaultdict(float)
# # for s in nodes:
# # pred = defaultdict(list)
# # dist = {t: float("inf") for t in nodes}
# # dist[s] = 0.0
# # Q = defaultdict(list)
# # for w in dist:
# # Q[dist[w]].append(w)
# # S = []
# # while len(Q) > 0:
# # v = Q[min(Q.keys())].pop(0)
# # S.append(v)
# # for _, w, l in G.edges(nbunch=[v, ], data=True):
# # new_distance = dist[v] + l["distance"]
# # if dist[w] > new_distance:
# # Q[dist[w]].remove(w)
# # dist[w] = new_distance
# # Q[dist[w]].append(w)
# # pred[w] = []
# # if dist[w] == new_distance:
# # pred[w].append(v)
# # while len(S) > 0:
# # w = S.pop()
# # for v in pred[w]:
# # cs[(v, w)] += 1.0
# # Q = defaultdict(list, {k: v for k, v in Q.items() if len(v) > 0})
# # table["score"] = table.apply(
# # lambda x: cs[(x["source"], x["target"])] / len(nodes), axis=1)
# # if not return_self_loops:
# # table = table[table["source"] != table["target"]]
# # if undirected:
# # table["edge"] = table.apply(
# # lambda x: "%s-%s" % (min(x["source"], x["target"]), max(x["source"], x["target"])), axis=1)
# # table_maxscore = table.groupby(by="edge")["score"].sum().reset_index()
# # table = table.merge(table_maxscore, on="edge", suffixes=("_min", ""))
# # table = table.drop_duplicates(subset=["edge"])
# # table = table.drop("edge", 1)
# # table = table.drop("score_min", 1)
# #
# # G = nx.from_pandas_edgelist(table, edge_attr=['weight', 'score'])
# # for u,v in G.edges():
# # if G[u][v]['score']>=0.8:
# # G[u][v]['high_salience_skeleton'] = True
# # else:
# # G[u][v]['high_salience_skeleton'] = False
# #
# # return Backbone(G, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter])
# # # return table[["source", "target", "weight", "score"]], "high_salience_skeleton"
#
#
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment