diff --git a/netbone/__init__.py b/netbone/__init__.py index 6cdaf3f6e0d1fd041efd421cdce1bdd39755ab00..f5e20e16bf782d8e5a7ed932268976db653e5f47 100644 --- a/netbone/__init__.py +++ b/netbone/__init__.py @@ -33,7 +33,7 @@ except ImportError: def marginal_likelihood(data): data = data.copy() mlf = MLF(directed=False) - return Backbone(mlf.fit_transform(data), name="Marginal Likelihood Filter", column="p_value", ascending=True, filters=[threshold_filter, fraction_filter]) + return Backbone(mlf.fit_transform(data), method_name="Marginal Likelihood Filter", property_name="p_value", ascending=True, compatible_filters=[threshold_filter, fraction_filter], filter_on='Edges') diff --git a/netbone/backbone.py b/netbone/backbone.py index 598b262fded5c1fda5e7fb79945f0d12bed8d277..ece5d132a67ab81c2e052bebb5db15d875fd8370 100644 --- a/netbone/backbone.py +++ b/netbone/backbone.py @@ -1,64 +1,62 @@ import networkx as nx +import pandas as pd from pandas import DataFrame from netbone.utils.utils import edge_properties + class Backbone: - def __init__(self, graph, name, column, ascending, filters): + def __init__(self, graph, method_name, property_name, ascending, compatible_filters, filter_on): if isinstance(graph, DataFrame): graph = nx.from_pandas_edgelist(graph, edge_attr=edge_properties(graph)) self.graph = graph - self.name = name - self.column = column + self.method_name = method_name + self.property_name = property_name self.ascending = ascending - self.compatible_filters = filters - + self.compatible_filters = compatible_filters + self.filter_on = filter_on def to_dataframe(self): - return nx.to_pandas_edgelist(self.graph) - + if self.filter_on == 'Edges': + return nx.to_pandas_edgelist(self.graph) + else: + node_attrs = {} + for node in self.graph.nodes(): + node_attrs[node] = self.graph.nodes[node] + # Convert the dictionary to a Pandas DataFrame + return pd.DataFrame.from_dict(node_attrs, orient='index') def narrate(self): - match self.name: + match self.method_name: case "Disparity Filter": - print(self.name) + print(self.method_name) case "Enhanced Configuration Model Filter": - print(self.name) + print(self.method_name) case "Marginal Likelihood Filter": - print(self.name) + print(self.method_name) case "Locally Adaptive Network Sparsification Filter": - print(self.name) + print(self.method_name) case "Noise Corrected Filter": - print(self.name) + print(self.method_name) case 'High Salience Skeleton Filter': - print(self.name) + print(self.method_name) case 'Modularity Filter': - print(self.name) + print(self.method_name) case 'Ultrametric Distance Filter': - print(self.name) + print(self.method_name) case 'Maximum Spanning Tree': - print(self.name) + print(self.method_name) case 'Metric Distance Filter': - print(self.name) + print(self.method_name) case 'H-Backbone Filter': - print(self.name) + print(self.method_name) case 'Doubly Stochastic Filter': - print(self.name) + print(self.method_name) case 'Global Threshold Filter': - print(self.name) + print(self.method_name) case _: print("Citation here") - - def filters(self): + def compatible_filters(self): return self.compatible_filters - # match self.name: - # case "Disparity Filter" | 'Noise Corrected Filter' | "Enhanced Configuration Model Filter" | "Marginal Likelihood Filter" | 'Locally Adaptive Network Sparsification Filter' | 'Global Threshold Filter': - # return [fraction_filter, threshold_filter] - # case "H-Backbone Filter" | 'Metric Distance Filter' | 'Maximum Spanning Tree' | 'Ultrametric Distance Filter' | 'Modularity Filter': - # return [boolean_filter] - # case "Doubly Stochastic Filter" | "High Salience Skeleton Filter": - # return [boolean_filter, fraction_filter, threshold_filter] - # case _: - # print("Error " + self.name + " does not exist") diff --git a/netbone/compare.py b/netbone/compare.py index 665f63bdddfd895ea5abf84fe887ba82ec99ecef..16c40a974908c39d713e2c11730535dd474c7467 100644 --- a/netbone/compare.py +++ b/netbone/compare.py @@ -43,7 +43,7 @@ class Compare: if self.filter_values == []: raise Exception('Please enter the filter values.') - results = pd.DataFrame(index=['Original'] + [backbone.name for backbone in self.backbones]) + results = pd.DataFrame(index=['Original'] + [backbone.method_name for backbone in self.backbones]) props_arrays = dict() for property in self.props: @@ -70,7 +70,7 @@ class Compare: raise Exception('Please enter the filter values.') props_res = dict() for property in self.props: - props_res[property] = pd.DataFrame(index=[backbone.name for backbone in self.backbones]) + props_res[property] = pd.DataFrame(index=[backbone.method_name for backbone in self.backbones]) for value in self.filter_values: temp_props = dict() for property in self.props: @@ -97,7 +97,7 @@ class Compare: raise Exception('Please enter the filter values.') dist = dict() - ks_statistics = pd.DataFrame(index=[backbone.name for backbone in self.backbones]) + ks_statistics = pd.DataFrame(index=[backbone.method_name for backbone in self.backbones]) for property in self.props: dist_values = dict() @@ -108,7 +108,7 @@ class Compare: for i, backbone in enumerate(self.backbones): extracted_backbone = self.filter(backbone, value=self.filter_values[i], narrate=False) values1 = self.props[property](extracted_backbone) - dist_values[backbone.name] = cumulative_dist(property, extracted_backbone.name, values1, increasing) + dist_values[backbone.method_name] = cumulative_dist(property, extracted_backbone.method_name, values1, increasing) vals.append(kstest(values0, values1)[0]) # ks_statistics = pd.DataFrame(index=['Original'] + [backbone.name for backbone in self.backbones]) diff --git a/netbone/filters.py b/netbone/filters.py index 4c740beba01aa6b03d287ce58e5e0d990e402ebd..b8e7d5833749aaa91b13a9db7624e0bcfbba653a 100644 --- a/netbone/filters.py +++ b/netbone/filters.py @@ -2,79 +2,57 @@ import math import networkx as nx from netbone.utils.utils import edge_properties + + def boolean_filter(backbone, narrate=True, value=[]): - if boolean_filter in backbone.filters(): + if boolean_filter in backbone.compatible_filters(): data = backbone.graph - column = backbone.column + column = 'in_backbone' if isinstance(data, nx.Graph): data = nx.to_pandas_edgelist(data) if narrate: backbone.narrate() return nx.from_pandas_edgelist(data[data[column] == True], edge_attr=edge_properties(data)) - print("The accepted filters for " + backbone.name + " are: " + ', '.join([fun.__name__ for fun in backbone.filters()])) - -def threshold_filter(backbone, value, narrate=True , secondary_column = 'weight', secondary_column_ascending = False, **kwargs): - data = backbone.graph - column = backbone.column - ascending = backbone.ascending + print("The accepted filters for " + backbone.method_name + " are: " + ', '.join( + [fun.__name__ for fun in backbone.compatible_filters()])) - if isinstance(data, nx.Graph): - data = nx.to_pandas_edgelist(data) - keys = kwargs.keys() - if "value" in keys: - value = kwargs["value"] - if "secondary_column" in keys: - secondary_column = kwargs['secondary_column'] +def threshold_filter(backbone, value, narrate=True, secondary_property='weight', secondary_property_ascending=False, + **kwargs): + data = backbone.to_dataframe() + property_name = backbone.property_name + ascending = backbone.ascending - if threshold_filter in backbone.filters(): - if boolean_filter in backbone.filters(): - column = 'score' - data = data.sort_values(by=[column, secondary_column], ascending=[ascending, secondary_column_ascending]) + if threshold_filter in backbone.compatible_filters(): + data = data.sort_values(by=[property_name, secondary_property], ascending=[ascending, secondary_property_ascending]) if narrate: backbone.narrate() - if column == "p_value": - return nx.from_pandas_edgelist(data[data[column] < value], edge_attr=edge_properties(data)) - elif column == "score": - return nx.from_pandas_edgelist(data[data[column] > value], edge_attr=edge_properties(data)) + if ascending: + return nx.from_pandas_edgelist(data[data[property_name] < value], edge_attr=edge_properties(data)) else: - print("Column name can not be " + column) - - print("The accepted filters for " + backbone.name + " are: " + ', '.join([fun.__name__ for fun in backbone.filters()])) - + return nx.from_pandas_edgelist(data[data[property_name] > value], edge_attr=edge_properties(data)) + print("The accepted filters for " + backbone.method_name + " are: " + ', '.join( + [fun.__name__ for fun in backbone.compatible_filters()])) -def fraction_filter(backbone, value, narrate=True, secondary_column='weight', secondary_column_ascending=False, **kwargs): - data = backbone.graph - column = backbone.column +def fraction_filter(backbone, value, narrate=True, secondary_property='weight', secondary_property_ascending=False, + **kwargs): + data = backbone.to_dataframe() + column = backbone.property_name ascending = backbone.ascending - if isinstance(data, nx.Graph): - data = nx.to_pandas_edgelist(data) - - keys = kwargs.keys() - if "value" in keys: - value = kwargs["value"] - if "secondary_column" in keys: - secondary_column = kwargs['secondary_column'] - value = math.ceil(value * len(data)) - if fraction_filter in backbone.filters(): - if boolean_filter in backbone.filters(): - column = 'score' - data = data.sort_values(by=[column, secondary_column], ascending=[ascending, secondary_column_ascending]) + if fraction_filter in backbone.compatible_filters(): + data = data.sort_values(by=[column, secondary_property], ascending=[ascending, secondary_property_ascending]) if narrate: backbone.narrate() - return nx.from_pandas_edgelist(data[:value], edge_attr=edge_properties(data)) - - print("The accepted filters for " + backbone.name + " are: " + ', '.join([fun.__name__ for fun in backbone.filters()])) - - - + return nx.from_pandas_edgelist(data[:value], edge_attr=edge_properties(data)) + print("The accepted filters for " + backbone.method_name + " are: " + ', '.join( + [fun.__name__ for fun in backbone.compatible_filters()])) diff --git a/netbone/statistical/disparity.py b/netbone/statistical/disparity.py index ddfab323d914493c9bd7c7e83df04ee5ac5f3dc4..c7c11959def8999d4008cd723864df9abd060a0d 100644 --- a/netbone/statistical/disparity.py +++ b/netbone/statistical/disparity.py @@ -28,21 +28,4 @@ def disparity(data, weight='weight'): g[node][neighbour]['p_value'] = alpha_ij else: g[node][neighbour]['p_value'] = alpha_ij - return Backbone(g, name="Disparity Filter", column="p_value", ascending=True, filters=[threshold_filter, fraction_filter]) - - # b = nx.Graph() - # for u in g: - # k = len(g[u]) - # if k > 1: - # sum_w = sum(np.absolute(g[u][v][weight]) for v in g[u]) - # for v in g[u]: - # w = g[u][v][weight] - # p_ij = float(np.absolute(w))/sum_w - # alpha_ij = 1 - \ - # (k-1) * integrate.quad(lambda x: (1-x) - # ** (k-2), 0, p_ij)[0] - # # float('%.4f' % alpha_ij) - # b.add_edge(u, v, weight=w, p_value=float(alpha_ij)) - # return Backbone(b, name="Disparity Filter", column="p_value", ascending=True, filters=[threshold_filter, fraction_filter]) - - + return Backbone(g, method_name="Disparity Filter", property_name="p_value", ascending=True, compatible_filters=[threshold_filter, fraction_filter], filter_on='Edges') diff --git a/netbone/statistical/global_threshold.py b/netbone/statistical/global_threshold.py index 38a609a07ff1cbe5b0d077aa91479fa4ab38a8f2..b282b2d82d2f7ccfe729e01b4e9eded7c1d2c113 100644 --- a/netbone/statistical/global_threshold.py +++ b/netbone/statistical/global_threshold.py @@ -18,11 +18,5 @@ def global_threshold(data): table['score'] = table['weight'] g = nx.from_pandas_edgelist(table, edge_attr=edge_properties(table)) - # average = table['weight'].mean() - # for u,v in g.edges(): - # if g[u][v]['score']>=average: - # g[u][v]['global_threshold'] = True - # else: - # g[u][v]['global_threshold'] = False - # return Backbone(g, name="Global Threshold Filter", column="global_threshold", ascending=False, filters=[boolean_filter, fraction_filter, threshold_filter]) - return Backbone(g, name="Global Threshold Filter", column="score", ascending=False, filters=[fraction_filter, threshold_filter]) \ No newline at end of file + + return Backbone(g, method_name="Global Threshold Filter", property_name="score", ascending=False, compatible_filters=[fraction_filter, threshold_filter], filter_on='Edges') \ No newline at end of file diff --git a/netbone/statistical/lans.py b/netbone/statistical/lans.py index 76d4f33b8bf8f860df49e70c4fa095b714856c30..6c27979bfad6a101febd675e5a10b17821e97956 100644 --- a/netbone/statistical/lans.py +++ b/netbone/statistical/lans.py @@ -14,8 +14,8 @@ def lans(data): return for u, v, w in g.edges(data='weight'): g[u][v]['p_value'] = min(compute_pvalue(g, v, w), compute_pvalue(g, u, w)) - return Backbone(g, name="Locally Adaptive Network Sparsification Filter", column="p_value", ascending=True, - filters=[threshold_filter, fraction_filter]) + return Backbone(g, method_name="Locally Adaptive Network Sparsification Filter", property_name="p_value", ascending=True, + compatible_filters=[threshold_filter, fraction_filter], filter_on='Edges') def compute_pvalue(G, node, w): diff --git a/netbone/statistical/maxent_graph/ecm_main.py b/netbone/statistical/maxent_graph/ecm_main.py index 2a14b3fe01bd5aeb27ffec4eb72a44bb3846009b..fc76c1849dd676c743f6d3cd0eadd0f741d07c90 100644 --- a/netbone/statistical/maxent_graph/ecm_main.py +++ b/netbone/statistical/maxent_graph/ecm_main.py @@ -37,4 +37,4 @@ def ecm(data): nx.set_edge_attributes(data, {(u,v):w for u,v,w in list(g.edges(data='p_value'))}, name='p_value') #subgraph = nx.subgraph_view(g)#, filter_edge=filter_edge) - return Backbone(data, name="Enhanced Configuration Model Filter", column="p_value", ascending=True, filters=[threshold_filter, fraction_filter]) \ No newline at end of file + return Backbone(data, method_name="Enhanced Configuration Model Filter", property_name="p_value", ascending=True, compatible_filters=[threshold_filter, fraction_filter], filter_on='Edges') \ No newline at end of file diff --git a/netbone/statistical/noise_corrected.py b/netbone/statistical/noise_corrected.py index ea33c941e5ffa73ce823e2dc5a7d87b7e89cd6eb..018f053f6cc699d8069902e5aed0ec4224f69ea2 100644 --- a/netbone/statistical/noise_corrected.py +++ b/netbone/statistical/noise_corrected.py @@ -40,5 +40,5 @@ def noise_corrected(data, approximation=True): g[i][j]['nc_sdev'] = sdev_cij g[i][j]['score'] = score - return Backbone(g, name="Noise Corrected Filter", column="p_value", ascending=True, filters=[threshold_filter, fraction_filter]) + return Backbone(g, method_name="Noise Corrected Filter", property_name="p_value", ascending=True, compatible_filters=[threshold_filter, fraction_filter], filter_on='Edges') diff --git a/netbone/structural/doubly_stochastic.py b/netbone/structural/doubly_stochastic.py index f19879b39f0d4bed56dd86ac321401f04441dc3b..75e696ed7fe7442a35206655290df7fc8d0ef3c3 100644 --- a/netbone/structural/doubly_stochastic.py +++ b/netbone/structural/doubly_stochastic.py @@ -4,13 +4,14 @@ import pandas as pd import networkx as nx from netbone.backbone import Backbone from netbone.filters import boolean_filter, threshold_filter, fraction_filter + # algo: doubly_stochastic.py warnings.filterwarnings('ignore') + def doubly_stochastic(data): - - undirected=True - return_self_loops=False + undirected = True + return_self_loops = False if isinstance(data, pd.DataFrame): table = data.copy() @@ -40,7 +41,7 @@ def doubly_stochastic(data): table = table[table["source"] < table["target"]] table = table[table["value"] > 0].sort_values(by="value", ascending=False) table = table.merge(table2[["source", "target", "weight"]], on=[ - "source", "target"]) + "source", "target"]) i = 0 doubly_nodes = len(set(table["source"]) | set(table["target"])) edges = table.shape[0] @@ -52,27 +53,29 @@ def doubly_stochastic(data): edge = table.iloc[i] G.add_edge(edge["source"], edge["target"], weight=edge["value"]) table.loc[table.loc[(table['source'] == edge["source"]) & ( - table['target'] == edge["target"])].index[0], 'ds_backbone'] = True + table['target'] == edge["target"])].index[0], 'in_backbone'] = True i += 1 else: G = nx.DiGraph() while nx.number_weakly_connected_components(G) != 1 or len(G) < doubly_nodes or nx.is_connected(G) == False: - if i== edges: + if i == edges: break edge = table.iloc[i] G.add_edge(edge["source"], edge["target"], weight=edge["value"]) table.loc[table.loc[(table['source'] == edge["source"]) & ( - table['target'] == edge["target"])].index[0], 'ds_backbone'] = True + table['target'] == edge["target"])].index[0], 'in_backbone'] = True i += 1 # table = pd.melt(nx.to_pandas_adjacency(G).reset_index(), id_vars = "index") table = table[table["value"] >= 0] table.rename(columns={"index": "source", - "variable": "target", "value": "score"}, inplace=True) + "variable": "target", "value": "score"}, inplace=True) table = table.fillna(False) if not return_self_loops: table = table[table["source"] != table["target"]] if undirected: table = table[table["source"] <= table["target"]] - return Backbone(nx.from_pandas_edgelist(table, edge_attr=['weight', 'score', 'ds_backbone']), name="Doubly Stochastic Filter", column="ds_backbone", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter]) + return Backbone(nx.from_pandas_edgelist(table, edge_attr=['weight', 'score', 'in_backbone']), + method_name="Doubly Stochastic Filter", property_name="score", ascending=False, + compatible_filters=[boolean_filter, threshold_filter, fraction_filter], filter_on='Edges') diff --git a/netbone/structural/h_backbone.py b/netbone/structural/h_backbone.py index 057c557e5812e6b1b7183a1e329180a3c43fb936..3d13433fe83f442fc9879d31ff04c01fa6779266 100644 --- a/netbone/structural/h_backbone.py +++ b/netbone/structural/h_backbone.py @@ -2,17 +2,16 @@ import networkx as nx import pandas as pd from netbone.backbone import Backbone from netbone.filters import boolean_filter + + # algo: h_backbone # calculating H-Index def h_backbone(data): - is_graph=False - if isinstance(data, pd.DataFrame): G = nx.from_pandas_edgelist(data, edge_attr='weight', create_using=nx.Graph()) elif isinstance(data, nx.Graph): G = data.copy() - is_graph=True else: print("data should be a panads dataframe or nx graph") return @@ -21,9 +20,9 @@ def h_backbone(data): G, weight='weight', normalized=False) nx.set_edge_attributes(G, {edge: {'h_bridge': round( - betweenness_values[edge]/len(G.nodes()), 3)} for edge in betweenness_values}) -# for u, v in G.edges(): -# G[u][v]['bridge'] = round(betweenness_values[(u,v)]/len(G.nodes()),3) + betweenness_values[edge] / len(G.nodes()), 3)} for edge in betweenness_values}) + # for u, v in G.edges(): + # G[u][v]['bridge'] = round(betweenness_values[(u,v)]/len(G.nodes()),3) weight_values = list(nx.get_edge_attributes(G, 'weight').values()) bridge_values = list(nx.get_edge_attributes(G, 'h_bridge').values()) @@ -59,10 +58,9 @@ def h_backbone(data): for u, v in G.edges(): if G[u][v]['h_bridge'] >= h_bridge or G[u][v]['weight'] >= h_weight: - G[u][v]['h_backbone'] = True + G[u][v]['in_backbone'] = True else: - G[u][v]['h_backbone'] = False - if is_graph: - return Backbone(G, name="H-Backbone Filter", column="h_backbone", ascending=False, filters=[boolean_filter]) #h_bridge, h_weight, G - # return nx.to_pandas_edgelist(G.to_directed()), "h_backbone" - return Backbone(nx.to_pandas_edgelist(G), name="H-Backbone Filter", column="h_backbone", ascending=False, filters=[boolean_filter]) + G[u][v]['in_backbone'] = False + + return Backbone(nx.to_pandas_edgelist(G), method_name="H-Backbone Filter", property_name="h_bridge", + ascending=False, compatible_filters=[boolean_filter], filter_on='Edges') diff --git a/netbone/structural/high_salience_skeleton.py b/netbone/structural/high_salience_skeleton.py index 9309e77a4c24e41c0ee6b68877c8dc634d9f5869..80b2da884eb9ba5e9eb1eda1dedbd7a8f71469d1 100644 --- a/netbone/structural/high_salience_skeleton.py +++ b/netbone/structural/high_salience_skeleton.py @@ -1,12 +1,9 @@ - -from collections import defaultdict import networkx as nx import pandas as pd -import warnings from netbone.backbone import Backbone from netbone.filters import boolean_filter, threshold_filter, fraction_filter -# change distance + def high_salience_skeleton(data): if isinstance(data, pd.DataFrame): graph = nx.from_pandas_edgelist(data, edge_attr='weight', create_using=nx.Graph()) @@ -16,8 +13,8 @@ def high_salience_skeleton(data): print("data should be a panads dataframe or nx graph") return - wes= nx.get_edge_attributes(graph, 'weight') - values = {pair:1/wes[pair] for pair in wes} + wes = nx.get_edge_attributes(graph, 'weight') + values = {pair: 1 / wes[pair] for pair in wes} nx.set_edge_attributes(graph, values, name='distance') nx.set_edge_attributes(graph, 0, name='score') @@ -29,29 +26,26 @@ def high_salience_skeleton(data): paths = list(tree.values())[1:] for path in paths: for i in range(len(path) - 1): - node_tree_scores[(path[i], path[i+1])] = 1 + node_tree_scores[(path[i], path[i + 1])] = 1 - for u,v in node_tree_scores: - graph[u][v]['score'] +=1 + for u, v in node_tree_scores: + graph[u][v]['score'] += 1 - scores= nx.get_edge_attributes(graph, 'score') + scores = nx.get_edge_attributes(graph, 'score') N = len(graph) score_values = dict() backbone_edges = dict() for pair in scores: - score_values[pair] = scores[pair]/N - if scores[pair]/N > 0.8: + score_values[pair] = scores[pair] / N + if scores[pair] / N > 0.8: backbone_edges[pair] = True else: backbone_edges[pair] = False # score_values = {pair:scores[pair]/N for pair in scores} - nx.set_edge_attributes(graph, score_values, name='score') - nx.set_edge_attributes(graph, backbone_edges, name='high_salience_skeleton') - - # for u,v in graph.edges(): - # if graph[u][v]['score']>=0.8: - # graph[u][v]['high_salience_skeleton'] = True - # else: - # graph[u][v]['high_salience_skeleton'] = False - return Backbone(graph, name="High Salience Skeleton Filter", column="high_salience_skeleton", ascending=False, filters=[boolean_filter, threshold_filter, fraction_filter]) + nx.set_edge_attributes(graph, score_values, name='salience') + nx.set_edge_attributes(graph, backbone_edges, name='in_backbone') + + return Backbone(graph, method_name="High Salience Skeleton Filter", property_name="salience", + ascending=False, compatible_filters=[boolean_filter, threshold_filter, fraction_filter], + filter_on='Edges') diff --git a/netbone/structural/maximum_spanning_tree.py b/netbone/structural/maximum_spanning_tree.py index 907cd80e9b0767015d0efe87c3f43f3e7a0eba01..3d68bebfc56c8c5df5e2bc5627cff276e06aef97 100644 --- a/netbone/structural/maximum_spanning_tree.py +++ b/netbone/structural/maximum_spanning_tree.py @@ -20,12 +20,12 @@ def maximum_spanning_tree(data): nx.set_edge_attributes(G, nx.get_edge_attributes(nx.from_pandas_edgelist(df, edge_attr='distance'), 'distance'), name='distance') msp = nx.minimum_spanning_tree(G, weight='distance') - nx.set_edge_attributes(G, True, name='msp_backbone') + nx.set_edge_attributes(G, True, name='in_backbone') - missing_edges = {edge: {"msp_backbone": False} for edge in set(G.edges()).difference(set(msp.edges()))} + missing_edges = {edge: {"in_backbone": False} for edge in set(G.edges()).difference(set(msp.edges()))} nx.set_edge_attributes(G, missing_edges) - return Backbone(G, name="Maximum Spanning Tree", column="msp_backbone", ascending=False, filters=[boolean_filter]) + return Backbone(G, method_name="Maximum Spanning Tree", property_name="distance", ascending=False, compatible_filters=[boolean_filter], filter_on='Edges') diff --git a/netbone/structural/metric_distance_backbone.py b/netbone/structural/metric_distance_backbone.py index 1a640fa1835f5c8d76e214bf2cc593cbab4ae31d..5cd16643cf84fd1927fa88aa897e3ae2b9169dfb 100644 --- a/netbone/structural/metric_distance_backbone.py +++ b/netbone/structural/metric_distance_backbone.py @@ -14,47 +14,9 @@ def metric_distance_backbone(data): G[u][v]['distance'] = 1/G[u][v]['weight'] m_backbone = dc_backbone.metric_backbone(G, weight='distance') - nx.set_edge_attributes(G, True, name='metric_distance_backbone') + nx.set_edge_attributes(G, True, name='in_backbone') - missing_edges = {edge: {"metric_distance_backbone": False} for edge in set(G.edges()).difference(set(m_backbone.edges()))} + missing_edges = {edge: {"in_backbone": False} for edge in set(G.edges()).difference(set(m_backbone.edges()))} nx.set_edge_attributes(G, missing_edges) - return Backbone(G, name="Metric Distance Filter", column="metric_distance_backbone", ascending=False, filters=[boolean_filter]) - -# -# def metric_distance_backbone(data): -# # distance closure -# -# if isinstance(data, pd.DataFrame): -# #create graph from the edge list -# labeled_G = nx.from_pandas_edgelist(data, edge_attr='weight', create_using=nx.Graph()) -# else: -# labeled_G=data -# -# #convert node labels to integers and store the labels as attributes and get the label used for mapping later -# G = nx.convert_node_labels_to_integers(labeled_G, label_attribute='name') -# mapping_lables = nx.get_node_attributes(G, name='name') -# -# #create the adjacency matrix of the graph -# W = nx.adjacency_matrix(G).todense() -# -# #calculate the proximity matrix using the weighted jaccard algorithm -# P = dc_distance.pairwise_proximity(W, metric='jaccard_weighted') -# -# #convert the proximity matrix to a distance matrix -# D = np.vectorize(dc_utils.prox2dist)(P) -# -# #create a distance graph from the distance matrix containing only the edges observed in the original network -# DG = nx.from_numpy_matrix(D) -# for u,v in DG.edges(): -# edge = (u,v) -# if edge not in G.edges(): -# DG.remove_edge(u, v) -# -# #apply the distance closure algorithm to obtain the metric and ultrametric backbones -# m_backbone = dc.distance_closure(DG, kind='metric', weight='weight', only_backbone=True) -# -# #relabel the graphs with the original labels -# m_backbone = nx.relabel_nodes(m_backbone, mapping_lables) -# -# return Backbone(m_backbone, name="Metric Distance Filter", column="metric_distance_backbone") \ No newline at end of file + return Backbone(G, method_name="Metric Distance Filter", property_name="distance", ascending=False, compatible_filters=[boolean_filter], filter_on='Edges') diff --git a/netbone/structural/modulairy_backbone.py b/netbone/structural/modulairy_backbone.py index 049927c52bfcd803a1fa744540a38b6d5bc4e664..c73de25c2bf74e87c58cce140cfe43b6b28f5bf0 100644 --- a/netbone/structural/modulairy_backbone.py +++ b/netbone/structural/modulairy_backbone.py @@ -1,91 +1,56 @@ +from netbone.backbone import Backbone +from netbone.filters import threshold_filter, fraction_filter import community.community_louvain as community -import heapq -import operator -import math -import networkx as nx -import numpy as np from scipy.sparse import csr_matrix from scipy.sparse import diags +import networkx as nx import pandas as pd -from netbone.backbone import Backbone -from netbone.filters import boolean_filter - - +import numpy as np -def orderCommunities(c): - i = 0 - keys_partition = list() - for j in c: - keys_partition.append(i) - i = i + 1 - partition = dict() - for i in keys_partition: - partition[i] = [] +# +# def swap_key_value_dict(old_dict): +# new_dict = {} +# for key, value in old_dict.items(): +# if value not in new_dict: +# new_dict[value] = [] +# new_dict[value].append(key) +# return new_dict +def modularity_backbone(data): + if isinstance(data, pd.DataFrame): + g = nx.from_pandas_edgelist(data, edge_attr='weight', create_using=nx.Graph()) + elif isinstance(data, nx.Graph): + g = data.copy() + else: + print("data should be a panads dataframe or nx graph") + return - i = 0 - for j in c: - for k in c[j]: - partition[i].append(k) - i = i + 1 + node_communities = community.best_partition(g, random_state=123) + modularity_value = community.modularity(node_communities, g) + # communities = swap_key_value_dict(node_communities) - return partition + membership = list(node_communities.values()) -def communityInfo(c, partition): - print('Number of partitions: ', len(partition)) - l = list() - for i in c: - for j in c[i]: - l.append(j) - print('Number of nodes in the communities detected: ', len(l)) + weight_key = None + index = list(range(len(g))) + m = sum([g.degree(node, weight=weight_key) for node in g.nodes()]) / 2 - s = set(l) - print('Number of repetitions: ', len(l) - len(s)) - print() - print() + A = nx.to_scipy_sparse_matrix(g) -def getSparseA(g): - return nx.to_scipy_sparse_matrix(g) - # return nx.to_scipy_sparse_array(g) + vals = np.ones(len(membership)) + group_indicator_mat = csr_matrix((vals, (index, membership)), shape=(len(g), max(membership) + 1)) -def getGroupIndicator(g, membership, rows=None): - if not rows: - rows = list(range(g.vcount())) - cols = membership - vals = np.ones(len(cols)) - group_indicator_mat = csr_matrix((vals, (rows, cols)), - shape=(len(g), max(membership) + 1)) - return group_indicator_mat + node_deg_by_group = A * group_indicator_mat + internal_edges = node_deg_by_group[index, membership].sum() / 2 -def getDegMat(node_deg_by_group, rows, cols): degrees = node_deg_by_group.sum(1) degrees = np.array(degrees).flatten() - deg_mat = csr_matrix((degrees, (rows, cols)), + deg_mat = csr_matrix((degrees, (index, membership)), shape=node_deg_by_group.shape) degrees = degrees[:, np.newaxis] - return degrees, deg_mat - - -def newMods(g, part): - #if g.is_weighted(): - # weight_key = 'weight' - #else: - weight_key = None - index = list(range(len(g))) - membership = part.membership_list # Steph: "part" is an instance of a class that has a "membership attribute" - - m = sum([g.degree(node, weight=weight_key) for node in g.nodes()])/2 - - A = getSparseA(g) - self_loops = A.diagonal().sum() - group_indicator_mat = getGroupIndicator(g, membership, rows=index) - node_deg_by_group = A * group_indicator_mat - - internal_edges = (node_deg_by_group[index, membership].sum() + self_loops) / 2 - degrees, deg_mat = getDegMat(node_deg_by_group, index, membership) node_deg_by_group += deg_mat group_degs = (deg_mat + diags(A.diagonal()) * group_indicator_mat).sum(0) @@ -93,139 +58,16 @@ def newMods(g, part): internal_deg = node_deg_by_group[index, membership].transpose() - degrees q1_links = (internal_edges - internal_deg) / (m - degrees) - # expanding out (group_degs - node_deg_by_group)^2 is slightly faster: + expected_impact = np.power(group_degs, 2).sum() - 2 * (node_deg_by_group * group_degs.transpose()) + \ node_deg_by_group.multiply(node_deg_by_group).sum(1) - q1_degrees = expected_impact / (4 * (m - degrees)**2) + q1_degrees = expected_impact / (4 * (m - degrees) ** 2) q1s = q1_links - q1_degrees q1s = np.array(q1s).flatten() - return q1s - - -def modularity_vitality(g, modularity, part): - q0 = modularity - q1s = newMods(g, part) - vitalities = (q0 - q1s).tolist() - return vitalities - - -def mappingAndRelabeling(g): - # Mapping - g_nx=g.copy() - l_nodes = g_nx.nodes() - taille=len(l_nodes) - dict_graph = dict () # nodes in the key and themselves - for i in l_nodes: - dict_graph[i] = [i] - index = 0 - for i in dict_graph: - for j in dict_graph[i]: - dict_graph[i] = index - index = index + 1 - - # Relabling: Construct a new graph with those mappings now - mapping = dict_graph - g_relabled = nx.relabel_nodes(g, mapping, copy=True) - - return g_relabled - -def flip_nodes_and_communities(dict_nodes_communities): - # Step 1: initialize communities as keys - new_dict = {} - for k, v in dict_nodes_communities.items(): - new_dict[v]=[] - - # Step 2: Fill in nodes - for kk,vv in new_dict.items(): - for k,v in dict_nodes_communities.items(): - if dict_nodes_communities[k] == kk: # If the community number (value) in `best` is the same as new_dict key (key), append the node (key) in `best` - #print(k,v) - new_dict[kk].append(k) - - return new_dict - -class communityInformation: - def __init__(self, modularity_value, communities): - self.modularity = modularity_value - self.membership = communities - self.membership_list = list() - for i in self.membership: - self.membership_list.append(self.membership[i]) - -# Returns a list of the top_k nodes and their centralities, and heap (list) of top k nodes --> heap will be used for removal -def get_top_k_best_nodes(dict_centrality, k): - - # The sorted() function returns a sorted list of the specified iterable object - top_k = sorted(dict_centrality.items(), key=operator.itemgetter(1), reverse=True)[:k] - first_nodes = heapq.nlargest(k, dict_centrality, key=dict_centrality.get) - - return top_k, first_nodes - -def modularity_backbone(data, node_fraction): - - if isinstance(data, pd.DataFrame): - g = nx.from_pandas_edgelist(data, edge_attr='weight', create_using=nx.Graph()) - elif isinstance(data, nx.Graph): - g = data.copy() - else: - print("data should be a panads dataframe or nx graph") - return - g1 = g.copy() - k = len(g1)-math.ceil(len(g1)*node_fraction) - communities = community.best_partition(g1, random_state=123) - - modularity_value = community.modularity(communities, g1) - - infomap_communities = flip_nodes_and_communities(communities) - infomap_communities_organized = orderCommunities(infomap_communities) - - communities_instance = communityInformation(modularity_value, communities) - - list_modv = modularity_vitality(g1, communities_instance.modularity, communities_instance) - - dict_original_modv_absolute = {} - for i, node in enumerate(g1.nodes()): - dict_original_modv_absolute[node] = abs(list_modv[i]) - - - #print(dict_original_modv_absolute) - - top_y, top_x = get_top_k_best_nodes(dict_original_modv_absolute, len(g1)) - - nodes_removed = [] - modularity_at_each_node_removal = [] - modularity_at_each_node_removal.append(community.modularity(communities, g)) # Intiial modularity - communities_flipped_prunned = {} - - nx.set_node_attributes(g1, dict_original_modv_absolute, name='modularity') - - for i in range(k): - last_element = top_x.pop() # Get the node to be removed - - - # Working on Q1 - g1.remove_node(last_element) # Remove it from the network - communities.pop(last_element) # Remove it from the communities - modularity_value_after_removal = community.modularity(communities, g1) - modularity_at_each_node_removal.append(modularity_value_after_removal) - - - # Working on Q3 - nodes_removed.append(last_element) - - # Working on Q2 - for k,v in infomap_communities_organized.items(): - communities_flipped_prunned[k] = [] - for node1 in v: - if node1 in nodes_removed: - continue - else: - communities_flipped_prunned[k].append(node1) + vitalities = (modularity_value - q1s).tolist() - nx.set_edge_attributes(g, True, name='modularity_backbone') + nx.set_node_attributes(g, dict(zip(list(g.nodes()), np.absolute(vitalities))), name='score') - missing_edges = {edge: {"modularity_backbone": False} for edge in set(g.edges()).difference(set(g1.edges()))} - nx.set_edge_attributes(g, missing_edges) - # return g1, modularity_at_each_node_removal, communities_flipped_prunned, nodes_removed, top_x - return Backbone(g, name="Modularity Filter", column='modularity_backbone', ascending=False, filters=[boolean_filter]) \ No newline at end of file + return Backbone(g, method_name="Modularity Filter", property_name='score', ascending=False, + compatible_filters=[threshold_filter, fraction_filter], filter_on='Nodes') diff --git a/netbone/structural/ultrametric_distance_backbone.py b/netbone/structural/ultrametric_distance_backbone.py index 69e4fb1efcc05e7b1f629fd3228b63978f0ff0f6..3907400048bcd00c76c5b0686ead9b799cd00528 100644 --- a/netbone/structural/ultrametric_distance_backbone.py +++ b/netbone/structural/ultrametric_distance_backbone.py @@ -14,48 +14,9 @@ def ultrametric_distance_backbone(data): G[u][v]['distance'] = 1/G[u][v]['weight'] um_backbone = dc_backbone.ultrametric_backbone(G, weight='distance') - nx.set_edge_attributes(G, True, name='utlrametric_distance_backbone') + nx.set_edge_attributes(G, True, name='in_backbone') - missing_edges = {edge: {"utlrametric_distance_backbone": False} for edge in set(G.edges()).difference(set(um_backbone.edges()))} + missing_edges = {edge: {"in_backbone": False} for edge in set(G.edges()).difference(set(um_backbone.edges()))} nx.set_edge_attributes(G, missing_edges) - return Backbone(G, name="Ultrametric Distance Filter", column="utlrametric_distance_backbone", ascending=False, filters=[boolean_filter]) - - - -# def ultrametric_distance_backbone(data): -# # distance closure -# -# if isinstance(data, pd.DataFrame): -# #create graph from the edge list -# labeled_G = nx.from_pandas_edgelist(data, edge_attr='weight', create_using=nx.Graph()) -# else: -# labeled_G=data -# -# #convert node labels to integers and store the labels as attributes and get the label used for mapping later -# G = nx.convert_node_labels_to_integers(labeled_G, label_attribute='name') -# mapping_lables = nx.get_node_attributes(G, name='name') -# -# #create the adjacency matrix of the graph -# W = nx.adjacency_matrix(G).todense() -# -# #calculate the proximity matrix using the weighted jaccard algorithm -# P = dc_distance.pairwise_proximity(W, metric='jaccard_weighted') -# -# #convert the proximity matrix to a distance matrix -# D = np.vectorize(dc_utils.prox2dist)(P) -# -# #create a distance graph from the distance matrix containing only the edges observed in the original network -# DG = nx.from_numpy_matrix(D) -# for u,v in DG.edges(): -# edge = (u,v) -# if edge not in G.edges(): -# DG.remove_edge(u, v) -# -# #apply the distance closure algorithm to obtain the metric and ultrametric backbones -# um_backbone = dc.distance_closure(DG, kind='ultrametric', weight='weight', only_backbone=True) -# -# #relabel the graphs with the original labels -# um_backbone = nx.relabel_nodes(um_backbone, mapping_lables) -# -# return Backbone(um_backbone, name="Ultrametric Distance Filter", column='ultrametric_distance_backbone') \ No newline at end of file + return Backbone(G, method_name="Ultrametric Distance Filter", property_name="distance", ascending=False, compatible_filters=[boolean_filter], filter_on='Edges') diff --git a/netbone/utils/utils.py b/netbone/utils/utils.py index 0f31d2cef2682fb5e73637181578829382353704..7f573116cbe656770e67340d0c827aca2a66d112 100644 --- a/netbone/utils/utils.py +++ b/netbone/utils/utils.py @@ -13,7 +13,7 @@ def cumulative_dist(name, method, values, increasing=True): y = np.arange(1, len(x) + 1)/len(x) df = pd.DataFrame(index=x) - df.index.name = name + df.index.method_name = name df[method] = y return df