Updating HSS

f0dd3092 · Yassin · 14dbdd2b · f0dd3092
Commit f0dd3092 authored 2 years ago by Yassin
--- a/netbone/structural/high_salience_skeleton.py
+++ b/netbone/structural/high_salience_skeleton.py
@@ -7,85 +7,163 @@ from netbone.backbone import Backbone
 from netbone.filters import boolean_filter, threshold_filter, fraction_filter
 # algo: high_salience_skeleton.py
+warnings.filterwarnings('ignore')
 def high_salience_skeleton(data):
-    graph = data.copy()
+    # sys.stderr.write("Calculating HSS score...\n")
+    undirected=True
+    return_self_loops=False
    if isinstance(data, pd.DataFrame):
-        data = nx.from_pandas_edgelist(data)
+        table = data.copy()
+    elif isinstance(data, nx.Graph):
+        table = nx.to_pandas_edgelist(data)
    else:
        print("data should be a panads dataframe or nx graph")
        return
-    wes= nx.get_edge_attributes(graph, 'weight')
-    values = {pair:1/wes[pair] for pair in wes}
-    nx.set_edge_attributes(graph, values, name='distance')
-    nx.set_edge_attributes(graph, 0, name='score')
-    for source in graph.nodes():
-        tree = nx.single_source_dijkstra(graph, source, cutoff=None, weight='distance')[1]
-        paths = list(tree.values())[1:]
-        for path in paths:
-            pairs = [(path[i], path[i+1]) for i in range(len(path) - 1)]
-            for u,v in pairs:
-                graph[u][v]['score'] +=1
-    scores= nx.get_edge_attributes(graph, 'score')
-    N = len(graph)
-    values = {pair:scores[pair]/N for pair in scores}
-    nx.set_edge_attributes(graph, values, name='score')
-    return Backbone(graph, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter])
-    # table["distance"] = 1.0 / table["weight"]
+    table["distance"] = 1.0 / table["weight"]
-    # nodes = set(table["source"]) | set(table["target"])
+    nodes = set(table["source"]) | set(table["target"])
-    # G = nx.from_pandas_edgelist(
+    G = nx.from_pandas_edgelist(
-    #     table, source="source", target="target", edge_attr="distance", create_using=nx.DiGraph())
+        table, source="source", target="target", edge_attr="distance", create_using=nx.DiGraph())
-    # cs = defaultdict(float)
+    cs = defaultdict(float)
-    # for s in nodes:
+    for s in nodes:
-    #     pred = defaultdict(list)
+        pred = defaultdict(list)
-    #     dist = {t: float("inf") for t in nodes}
+        dist = {t: float("inf") for t in nodes}
-    #     dist[s] = 0.0
+        dist[s] = 0.0
-    #     Q = defaultdict(list)
+        Q = defaultdict(list)
-    #     for w in dist:
+        for w in dist:
-    #         Q[dist[w]].append(w)
+            Q[dist[w]].append(w)
-    #     S = []
+        S = []
-    #     while len(Q) > 0:
+        while len(Q) > 0:
-    #         v = Q[min(Q.keys())].pop(0)
+            v = Q[min(Q.keys())].pop(0)
-    #         S.append(v)
+            S.append(v)
-    #         for _, w, l in G.edges(nbunch=[v, ], data=True):
+            for _, w, l in G.edges(nbunch=[v, ], data=True):
-    #             new_distance = dist[v] + l["distance"]
+                new_distance = dist[v] + l["distance"]
-    #             if dist[w] > new_distance:
+                if dist[w] > new_distance:
-    #                 Q[dist[w]].remove(w)
+                    Q[dist[w]].remove(w)
-    #                 dist[w] = new_distance
+                    dist[w] = new_distance
-    #                 Q[dist[w]].append(w)
+                    Q[dist[w]].append(w)
-    #                 pred[w] = []
+                    pred[w] = []
-    #             if dist[w] == new_distance:
+                if dist[w] == new_distance:
-    #                 pred[w].append(v)
+                    pred[w].append(v)
-    #         while len(S) > 0:
+            while len(S) > 0:
-    #             w = S.pop()
+                w = S.pop()
-    #             for v in pred[w]:
+                for v in pred[w]:
-    #                 cs[(v, w)] += 1.0
+                    cs[(v, w)] += 1.0
-    #         Q = defaultdict(list, {k: v for k, v in Q.items() if len(v) > 0})
+            Q = defaultdict(list, {k: v for k, v in Q.items() if len(v) > 0})
-    # table["score"] = table.apply(
+    table["score"] = table.apply(
-    #     lambda x: cs[(x["source"], x["target"])] / len(nodes), axis=1)
+        lambda x: cs[(x["source"], x["target"])] / len(nodes), axis=1)
-    # if not return_self_loops:
+    if not return_self_loops:
-    #     table = table[table["source"] != table["target"]]
+        table = table[table["source"] != table["target"]]
-    # if undirected:
+    if undirected:
-    #     table["edge"] = table.apply(
+        table["edge"] = table.apply(
-    #         lambda x: "%s-%s" % (min(x["source"], x["target"]), max(x["source"], x["target"])), axis=1)
+            lambda x: "%s-%s" % (min(x["source"], x["target"]), max(x["source"], x["target"])), axis=1)
-    #     table_maxscore = table.groupby(by="edge")["score"].sum().reset_index()
+        table_maxscore = table.groupby(by="edge")["score"].sum().reset_index()
-    #     table = table.merge(table_maxscore, on="edge", suffixes=("_min", ""))
+        table = table.merge(table_maxscore, on="edge", suffixes=("_min", ""))
-    #     table = table.drop_duplicates(subset=["edge"])
+        table = table.drop_duplicates(subset=["edge"])
-    #     table = table.drop("edge", 1)
+        table = table.drop("edge", 1)
-    #     table = table.drop("score_min", 1)
+        table = table.drop("score_min", 1)
-    #
-    # G = nx.from_pandas_edgelist(table, edge_attr=['weight', 'score'])
+    G = nx.from_pandas_edgelist(table, edge_attr=['weight', 'score'])
-    # for u,v in G.edges():
+    for u,v in G.edges():
-    #     if G[u][v]['score']>=0.8:
+        if G[u][v]['score']>=0.8:
-    #         G[u][v]['high_salience_skeleton'] = True
+            G[u][v]['high_salience_skeleton'] = True
-    #     else:
+        else:
-    #         G[u][v]['high_salience_skeleton'] = False
+            G[u][v]['high_salience_skeleton'] = False
-    #
-    # return Backbone(G, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter])
-    # # return table[["source", "target", "weight", "score"]], "high_salience_skeleton"
+    return Backbone(G, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter])
+    # return table[["source", "target", "weight", "score"]], "high_salience_skeleton"
+#
+#
+# from collections import defaultdict
+# import networkx as nx
+# import pandas as pd
+# import warnings
+# from netbone.backbone import Backbone
+# from netbone.filters import boolean_filter, threshold_filter, fraction_filter
+#
+# # algo: high_salience_skeleton.py
+#
+# def high_salience_skeleton(data):
+#     graph = data.copy()
+#     if isinstance(data, pd.DataFrame):
+#         data = nx.from_pandas_edgelist(data)
+#     else:
+#         print("data should be a panads dataframe or nx graph")
+#         return
+#
+#     wes= nx.get_edge_attributes(graph, 'weight')
+#     values = {pair:1/wes[pair] for pair in wes}
+#     nx.set_edge_attributes(graph, values, name='distance')
+#
+#     nx.set_edge_attributes(graph, 0, name='score')
+#     for source in graph.nodes():
+#         tree = nx.single_source_dijkstra(graph, source, cutoff=None, weight='distance')[1]
+#         paths = list(tree.values())[1:]
+#         for path in paths:
+#             pairs = [(path[i], path[i+1]) for i in range(len(path) - 1)]
+#             for u,v in pairs:
+#                 graph[u][v]['score'] +=1
+#
+#     scores= nx.get_edge_attributes(graph, 'score')
+#     N = len(graph)
+#     values = {pair:scores[pair]/N for pair in scores}
+#     nx.set_edge_attributes(graph, values, name='score')
+#     return Backbone(graph, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter])
+#
+#     # table["distance"] = 1.0 / table["weight"]
+#     # nodes = set(table["source"]) | set(table["target"])
+#     # G = nx.from_pandas_edgelist(
+#     #     table, source="source", target="target", edge_attr="distance", create_using=nx.DiGraph())
+#     # cs = defaultdict(float)
+#     # for s in nodes:
+#     #     pred = defaultdict(list)
+#     #     dist = {t: float("inf") for t in nodes}
+#     #     dist[s] = 0.0
+#     #     Q = defaultdict(list)
+#     #     for w in dist:
+#     #         Q[dist[w]].append(w)
+#     #     S = []
+#     #     while len(Q) > 0:
+#     #         v = Q[min(Q.keys())].pop(0)
+#     #         S.append(v)
+#     #         for _, w, l in G.edges(nbunch=[v, ], data=True):
+#     #             new_distance = dist[v] + l["distance"]
+#     #             if dist[w] > new_distance:
+#     #                 Q[dist[w]].remove(w)
+#     #                 dist[w] = new_distance
+#     #                 Q[dist[w]].append(w)
+#     #                 pred[w] = []
+#     #             if dist[w] == new_distance:
+#     #                 pred[w].append(v)
+#     #         while len(S) > 0:
+#     #             w = S.pop()
+#     #             for v in pred[w]:
+#     #                 cs[(v, w)] += 1.0
+#     #         Q = defaultdict(list, {k: v for k, v in Q.items() if len(v) > 0})
+#     # table["score"] = table.apply(
+#     #     lambda x: cs[(x["source"], x["target"])] / len(nodes), axis=1)
+#     # if not return_self_loops:
+#     #     table = table[table["source"] != table["target"]]
+#     # if undirected:
+#     #     table["edge"] = table.apply(
+#     #         lambda x: "%s-%s" % (min(x["source"], x["target"]), max(x["source"], x["target"])), axis=1)
+#     #     table_maxscore = table.groupby(by="edge")["score"].sum().reset_index()
+#     #     table = table.merge(table_maxscore, on="edge", suffixes=("_min", ""))
+#     #     table = table.drop_duplicates(subset=["edge"])
+#     #     table = table.drop("edge", 1)
+#     #     table = table.drop("score_min", 1)
+#     #
+#     # G = nx.from_pandas_edgelist(table, edge_attr=['weight', 'score'])
+#     # for u,v in G.edges():
+#     #     if G[u][v]['score']>=0.8:
+#     #         G[u][v]['high_salience_skeleton'] = True
+#     #     else:
+#     #         G[u][v]['high_salience_skeleton'] = False
+#     #
+#     # return Backbone(G, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter])
+#     # # return table[["source", "target", "weight", "score"]], "high_salience_skeleton"
+#
+#