diff --git a/.gitignore b/.gitignore index fb14214040395ec62d55235abe14d361050b9ed8..b16ebdd6ded7a37e6a2eebf15c3bfdc94ebef0dd 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,7 @@ setup.cfg dist/* samples/* NetBone.egg-info/* -/.idea/* \ No newline at end of file +/.idea/* +examples/* +build/* +netbone.iml \ No newline at end of file diff --git a/netbone/__init__.py b/netbone/__init__.py index f5e20e16bf782d8e5a7ed932268976db653e5f47..2bd9d8a08c605490ad5201b9c5ee765a2d55b5e5 100644 --- a/netbone/__init__.py +++ b/netbone/__init__.py @@ -16,9 +16,18 @@ from netbone.statistical.marginal_likelihood import MLF from netbone.statistical.lans import lans from netbone.structural.ultrametric_distance_backbone import ultrametric_distance_backbone from netbone.structural.metric_distance_backbone import metric_distance_backbone -from netbone.statistical.global_threshold import global_threshold +from netbone.structural.global_threshold import global_threshold from netbone.structural.modulairy_backbone import modularity_backbone from netbone.structural.maximum_spanning_tree import maximum_spanning_tree +from netbone.hybrid.glanb import glanb +from netbone.structural.pmfg import pmfg +from netbone.structural.plam import plam +from netbone.structural.mlam import mlam +from netbone.structural.gspar import gspar +from netbone.structural.degree import degree +from netbone.structural.betweenness import betweenness +# from netbone.statistical.correlation_and_statistic import correlation_and_statistic + from netbone.filters import threshold_filter, fraction_filter from netbone import compare from netbone import filters diff --git a/netbone/compare.py b/netbone/compare.py index 16c40a974908c39d713e2c11730535dd474c7467..bf04168cc7f2ceb42b159e97d6236af702a22750 100644 --- a/netbone/compare.py +++ b/netbone/compare.py @@ -90,29 +90,134 @@ class Compare: props_res[res].index.name = self.value_name return props_res - def distribution_ks_statistic(self, increasing=True): + def distribution_ks_statistic(self, increasing=True, consent=False): if self.filter == boolean_filter: self.filter_values = [0] * len(self.backbones) if self.filter_values == []: raise Exception('Please enter the filter values.') + cons = [] + if consent == False: + for backbon in self.backbones: + cons.append(False) + consent = cons dist = dict() - ks_statistics = pd.DataFrame(index=[backbone.method_name for backbone in self.backbones]) - + if True in consent: + ks_statistics = pd.DataFrame(index=[backbone.method_name for backbone in self.backbones] + ['Consensual Backbone']) + else: + ks_statistics = pd.DataFrame(index=[backbone.method_name for backbone in self.backbones]) for property in self.props: dist_values = dict() vals = [] values0 = self.props[property](self.network) dist_values['Original'] = cumulative_dist(property, 'Original', values0, increasing) + if True in consent: + consensual_backbone = '' + nodes_labels = dict(zip(self.network.nodes(), nx.convert_node_labels_to_integers(self.network.copy()).nodes())) + inverse_nodes_labels = dict(zip(nx.convert_node_labels_to_integers(self.network.copy()).nodes(), self.network.nodes())) + + for i, backbone in enumerate(self.backbones): extracted_backbone = self.filter(backbone, value=self.filter_values[i], narrate=False) + if consent[i]: + if consensual_backbone == '': + consensual_backbone = nx.relabel_nodes(extracted_backbone, nodes_labels) + else: + extracted_backbone = nx.relabel_nodes(extracted_backbone, nodes_labels) + old_consensual = consensual_backbone.copy() + consensual_backbone.remove_nodes_from(n for n in old_consensual if n not in extracted_backbone) + consensual_backbone.remove_edges_from(e for e in old_consensual.edges if e not in extracted_backbone.edges) + values1 = self.props[property](extracted_backbone) - dist_values[backbone.method_name] = cumulative_dist(property, extracted_backbone.method_name, values1, increasing) + dist_values[backbone.method_name] = cumulative_dist(property, backbone.method_name, values1, increasing) + vals.append(kstest(values0, values1)[0]) + if consent[i]: + consensual_backbone.remove_nodes_from(list(nx.isolates(consensual_backbone))) + consensual_backbone = nx.relabel_nodes(consensual_backbone, inverse_nodes_labels) + values1 = self.props[property](consensual_backbone) + dist_values['Consensual Backbone'] = cumulative_dist(property, 'Consensual Backbone', values1, increasing) vals.append(kstest(values0, values1)[0]) # ks_statistics = pd.DataFrame(index=['Original'] + [backbone.name for backbone in self.backbones]) dist[property] = dist_values ks_statistics[property] = vals - return ks_statistics, dist + if True in consent: + return ks_statistics, dist, consensual_backbone + else: + return ks_statistics, dist + + # def distribution_ks_statistic(self, increasing=True, consent=True): + # if self.filter == boolean_filter: + # self.filter_values = [0] * len(self.backbones) + # if self.filter_values == []: + # raise Exception('Please enter the filter values.') + # + # dist = dict() + # if consent: + # ks_statistics = pd.DataFrame(index=[backbone.method_name for backbone in self.backbones] + ['Consensual Backbone']) + # else: + # ks_statistics = pd.DataFrame(index=[backbone.method_name for backbone in self.backbones]) + # for property in self.props: + # dist_values = dict() + # vals = [] + # values0 = self.props[property](self.network) + # dist_values['Original'] = cumulative_dist(property, 'Original', values0, increasing) + # + # if consent: + # consensual_backbone = '' + # nodes_labels = dict(zip(self.network.nodes(), nx.convert_node_labels_to_integers(self.network.copy()).nodes())) + # inverse_nodes_labels = dict(zip(nx.convert_node_labels_to_integers(self.network.copy()).nodes(), self.network.nodes())) + # + # + # for i, backbone in enumerate(self.backbones): + # extracted_backbone = self.filter(backbone, value=self.filter_values[i], narrate=False) + # if consent: + # if i==0: + # consensual_backbone = nx.relabel_nodes(extracted_backbone, nodes_labels) + # else: + # extracted_backbone = nx.relabel_nodes(extracted_backbone, nodes_labels) + # old_consensual = consensual_backbone.copy() + # consensual_backbone.remove_nodes_from(n for n in old_consensual if n not in extracted_backbone) + # consensual_backbone.remove_edges_from(e for e in old_consensual.edges if e not in extracted_backbone.edges) + # + # values1 = self.props[property](extracted_backbone) + # dist_values[backbone.method_name] = cumulative_dist(property, backbone.method_name, values1, increasing) + # vals.append(kstest(values0, values1)[0]) + # if consent: + # consensual_backbone = nx.relabel_nodes(consensual_backbone, inverse_nodes_labels) + # values1 = self.props[property](consensual_backbone) + # dist_values['Consensual Backbone'] = cumulative_dist(property, 'Consensual Backbone', values1, increasing) + # vals.append(kstest(values0, values1)[0]) + # + # # ks_statistics = pd.DataFrame(index=['Original'] + [backbone.name for backbone in self.backbones]) + # dist[property] = dist_values + # ks_statistics[property] = vals + # + # if consent: + # return ks_statistics, dist, consensual_backbone + # else: + # return ks_statistics, dist + + def consent(self): + if self.filter == boolean_filter: + self.filter_values = [0] * len(self.backbones) + if self.filter_values == []: + raise Exception('Please enter the filter values.') + + nodes_labels = dict(zip(self.network.nodes(), nx.convert_node_labels_to_integers(self.network.copy()).nodes())) + inverse_nodes_labels = dict(zip(nx.convert_node_labels_to_integers(self.network.copy()).nodes(), self.network.nodes())) + consensual_backbone = '' + for i, backbone in enumerate(self.backbones): + extracted_backbone = self.filter(backbone, value=self.filter_values[i], narrate=False) + if i==0: + consensual_backbone = nx.relabel_nodes(extracted_backbone, nodes_labels) + else: + extracted_backbone = nx.relabel_nodes(extracted_backbone, nodes_labels) + old_consensual = consensual_backbone.copy() + consensual_backbone.remove_nodes_from(n for n in old_consensual if n not in extracted_backbone) + consensual_backbone.remove_edges_from(e for e in old_consensual.edges if e not in extracted_backbone.edges) + + consensual_backbone.remove_nodes_from(list(nx.isolates(consensual_backbone))) + return nx.relabel_nodes(consensual_backbone, inverse_nodes_labels) diff --git a/netbone/hybrid/__init__.py b/netbone/hybrid/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/netbone/statistical/glanb.py b/netbone/hybrid/glanb.py similarity index 100% rename from netbone/statistical/glanb.py rename to netbone/hybrid/glanb.py diff --git a/netbone/measures.py b/netbone/measures.py index e97bfc1aa1fb40aaf23da761c9e6b343a209a932..55ca634137caa3f17b3704cde0bf2a57fb6f1620 100644 --- a/netbone/measures.py +++ b/netbone/measures.py @@ -36,7 +36,7 @@ def reachability(original, G): return r/(len(G)*(len(G) - 1)) def number_connected_components(original, G): - nx.number_connected_components(G) + return nx.number_connected_components(G) def diameter(original, G): return ig.Graph.from_networkx(lcc(G)).diameter(directed=False, unconn=True) @@ -53,8 +53,8 @@ def lcc_weight_fraction(original, G): def weights(G): return list(nx.get_edge_attributes(G, 'weight').values()) -def degrees(G): - return list(dict(G.degree()).values()) +def degrees(G, weight=None): + return list(dict(G.degree(weight=weight)).values()) def average_clustering_coefficient(original, G): node_clustering = ig.Graph.from_networkx(G).transitivity_local_undirected(mode="nan") diff --git a/netbone/structural/betweenness.py b/netbone/structural/betweenness.py new file mode 100644 index 0000000000000000000000000000000000000000..7c2d09cd6f449d2a3e0cca7fd4bef14b58d88457 --- /dev/null +++ b/netbone/structural/betweenness.py @@ -0,0 +1,24 @@ +import networkx as nx +from netbone.filters import threshold_filter, fraction_filter +from netbone.backbone import Backbone +from pandas import DataFrame +from netbone.utils.utils import edge_properties + +def betweenness(data, weighted=True, normalized=True): + if isinstance(data, DataFrame): + g = nx.from_pandas_edgelist(data, edge_attr=edge_properties(data)) + elif isinstance(data, nx.Graph): + g = data.copy() + else: + print("data should be a panads dataframe or nx graph") + return + + + if weighted: + nx.set_edge_attributes(g, nx.edge_betweenness_centrality(g, normalized=normalized, weight='weight', seed=100), name='weighted-betweenness') + return Backbone(g, method_name="Weighted Betweenness", property_name="weighted-betweenness", ascending=False, compatible_filters=[threshold_filter, fraction_filter], filter_on='Edges') + else: + nx.set_edge_attributes(g, nx.edge_betweenness_centrality(g, normalized=normalized, seed=100), name='betweenness') + return Backbone(g, method_name="Betweenness", property_name="betweenness", ascending=False, compatible_filters=[threshold_filter, fraction_filter], filter_on='Edges') + + diff --git a/netbone/structural/degree.py b/netbone/structural/degree.py new file mode 100644 index 0000000000000000000000000000000000000000..a738cfc4635b8be764974ba3af07a0f39153e39b --- /dev/null +++ b/netbone/structural/degree.py @@ -0,0 +1,21 @@ +import networkx as nx +from netbone.filters import threshold_filter, fraction_filter +from netbone.backbone import Backbone +from pandas import DataFrame +from netbone.utils.utils import edge_properties + +def degree(data, weighted=False): + if isinstance(data, DataFrame): + g = nx.from_pandas_edgelist(data, edge_attr=edge_properties(data)) + elif isinstance(data, nx.Graph): + g = data.copy() + else: + print("data should be a panads dataframe or nx graph") + return + + if weighted: + nx.set_node_attributes(g,dict(g.degree(weight='weight')), name='weighted-degree') + return Backbone(g, method_name="Weighted Degree", property_name="weighted-degree", ascending=False, compatible_filters=[threshold_filter, fraction_filter], filter_on='Nodes') + else: + nx.set_node_attributes(g,dict(g.degree()), name='degree') + return Backbone(g, method_name="Degree", property_name="degree", ascending=False, compatible_filters=[threshold_filter, fraction_filter], filter_on='Nodes') \ No newline at end of file diff --git a/netbone/statistical/global_threshold.py b/netbone/structural/global_threshold.py similarity index 92% rename from netbone/statistical/global_threshold.py rename to netbone/structural/global_threshold.py index b282b2d82d2f7ccfe729e01b4e9eded7c1d2c113..bf3da7ea2c19c3b39743dc0fa3d69424752ed1c5 100644 --- a/netbone/statistical/global_threshold.py +++ b/netbone/structural/global_threshold.py @@ -9,8 +9,7 @@ def global_threshold(data): if isinstance(data, DataFrame): table = data.copy() elif isinstance(data, Graph): - table = to_pandas_edgelist(data) - is_graph=True + table = nx.to_pandas_edgelist(data) else: print("data should be a panads dataframe or nx graph") return diff --git a/netbone/structural/gspar.py b/netbone/structural/gspar.py new file mode 100644 index 0000000000000000000000000000000000000000..597da7d0e0ed58441bf883cc00f990d5da94fcf2 --- /dev/null +++ b/netbone/structural/gspar.py @@ -0,0 +1,29 @@ +import networkx as nx +from netbone.filters import threshold_filter, fraction_filter +from netbone.backbone import Backbone +from pandas import DataFrame +from netbone.utils.utils import edge_properties + +def jaccard(a, b): + # convert to set + a = set(a) + b = set(b) + # calucate jaccard similarity + return float(len(a.intersection(b))) / len(a.union(b)) + +def get_neighbours(graph, node): + return list(dict(graph[node]).keys()) + [node] + +def gspar(data): + if isinstance(data, DataFrame): + g = nx.from_pandas_edgelist(data, edge_attr=edge_properties(data)) + elif isinstance(data, nx.Graph): + g = data.copy() + else: + print("data should be a panads dataframe or nx graph") + return + + for u, v in g.edges(): + g[u][v]['jaccard-sim'] = jaccard(get_neighbours(g, u), get_neighbours(g, v)) + + return Backbone(g, method_name="Global Sparsification", property_name="jaccard-sim", ascending=False, compatible_filters=[threshold_filter, fraction_filter], filter_on='Edges') \ No newline at end of file diff --git a/netbone/structural/maximum_spanning_tree.py b/netbone/structural/maximum_spanning_tree.py index 3d68bebfc56c8c5df5e2bc5627cff276e06aef97..993e9983fce682c369111cbced966e9079f59491 100644 --- a/netbone/structural/maximum_spanning_tree.py +++ b/netbone/structural/maximum_spanning_tree.py @@ -2,30 +2,26 @@ import networkx as nx import pandas as pd from netbone.backbone import Backbone from netbone.filters import boolean_filter +from netbone.utils.utils import edge_properties # algo: minimum_spanning_tree # calculating MSP def maximum_spanning_tree(data): if isinstance(data, pd.DataFrame): - G = nx.from_pandas_edgelist(data, edge_attr='weight', create_using=nx.Graph()) + g = nx.from_pandas_edgelist(data, edge_attr=edge_properties(data)) elif isinstance(data, nx.Graph): - G = data.copy() + g = data.copy() else: print("data should be a panads dataframe or nx graph") return + nx.set_edge_attributes(g, True, name='in_backbone') + msp = nx.maximum_spanning_tree(g, weight='weight') - df = nx.to_pandas_edgelist((G)) - df['distance'] = df.apply(lambda row : 1/row['weight'], axis = 1) + missing_edges = {edge: {"in_backbone": False} for edge in set(g.edges()).difference(set(msp.edges()))} + nx.set_edge_attributes(g, missing_edges) - nx.set_edge_attributes(G, nx.get_edge_attributes(nx.from_pandas_edgelist(df, edge_attr='distance'), 'distance'), name='distance') - msp = nx.minimum_spanning_tree(G, weight='distance') - nx.set_edge_attributes(G, True, name='in_backbone') - - missing_edges = {edge: {"in_backbone": False} for edge in set(G.edges()).difference(set(msp.edges()))} - nx.set_edge_attributes(G, missing_edges) - - return Backbone(G, method_name="Maximum Spanning Tree", property_name="distance", ascending=False, compatible_filters=[boolean_filter], filter_on='Edges') + return Backbone(g, method_name="Maximum Spanning Tree", property_name="weight", ascending=False, compatible_filters=[boolean_filter], filter_on='Edges') diff --git a/netbone/structural/mlam.py b/netbone/structural/mlam.py new file mode 100644 index 0000000000000000000000000000000000000000..051961d25f6fa069ba436f969657e2fc57e86b9d --- /dev/null +++ b/netbone/structural/mlam.py @@ -0,0 +1,72 @@ +import numpy as np +import networkx as nx +from netbone.filters import boolean_filter +from netbone.backbone import Backbone +from pandas import DataFrame +from netbone.utils.utils import edge_properties +from math import isnan +def get_neighbor_weights(graph, node): + # Get the neighbors and weights of the given node from the graph + neighbors = graph[node].keys() + weights = [graph[node][neighbor]['weight'] for neighbor in neighbors] + + # Calculate the total weight + total_weight = sum(weights) + + # Normalize the weights + normalized_weights = [weight / total_weight * 100 for weight in weights] + + # Sort the neighbors based on the normalized weights in descending order + sorted_neighbors = sorted(zip(neighbors, normalized_weights), key=lambda x: x[1], reverse=True) + + return dict(sorted_neighbors) + +def get_ideal_distribution(i, total): + array = [0] * total # initialize the array with zeros + percentage = 100 / (i + 1) # calculate the percentage value for the current loop + for j in range(i + 1): + array[j] = percentage # format the percentage value with two decimal places + return array + +def compute_cod(f, y): + corr_matrix = np.corrcoef(f,y) + corr = corr_matrix[0,1] + return round(corr**2, 2) + + +def mlam(data): + if isinstance(data, DataFrame): + g = nx.from_pandas_edgelist(data, edge_attr=edge_properties(data)) + elif isinstance(data, nx.Graph): + g = data.copy() + else: + print("data should be a panads dataframe or nx graph") + return + + nx.set_edge_attributes(g, False, name='in_backbone') + for node in g.nodes(): + edge_index = 0 + neighbors_weights = get_neighbor_weights(g, node) + real_distribution = list(neighbors_weights.values()) + neighbors_count = len(neighbors_weights) + old_cod = 0 + if neighbors_count != 1: + for i in range(neighbors_count): + new_cod = compute_cod(real_distribution, get_ideal_distribution(i, neighbors_count)) + if isnan(new_cod): + break + if old_cod <= new_cod: + old_cod = new_cod + edge_index = i + else: + break + if i == neighbors_count-1: + edge_index = i + + for j, neighbour in enumerate(neighbors_weights.keys()): + if j>edge_index: + break + g[node][neighbour]['in_backbone'] = True + + + return Backbone(g, method_name="Multiple Linkage Analysis", property_name="weight", ascending=False, compatible_filters=[boolean_filter], filter_on='Edges') \ No newline at end of file diff --git a/netbone/structural/plam.py b/netbone/structural/plam.py new file mode 100644 index 0000000000000000000000000000000000000000..64a5297f42705266de864b3766ad785771c01914 --- /dev/null +++ b/netbone/structural/plam.py @@ -0,0 +1,32 @@ +import networkx as nx +from netbone.filters import boolean_filter +from netbone.backbone import Backbone +from pandas import DataFrame +from netbone.utils.utils import edge_properties + +def get_max_weight_edge(graph, node): + neighbors = graph.neighbors(node) + max_weight = float('-inf') + max_edge = None + for neighbor in neighbors: + weight = graph[node][neighbor]['weight'] + if weight > max_weight: + max_weight = weight + max_edge = (node, neighbor) + return max_edge[0], max_edge[1], max_weight + +def plam(data): + if isinstance(data, DataFrame): + g = nx.from_pandas_edgelist(data, edge_attr=edge_properties(data)) + elif isinstance(data, nx.Graph): + g = data.copy() + else: + print("data should be a panads dataframe or nx graph") + return + + nx.set_edge_attributes(g, False, name='in_backbone') + for node in g.nodes(): + source, target, weight = get_max_weight_edge(g, node) + g[source][target]['in_backbone'] = True + + return Backbone(g, method_name="Primary Linkage Analysis", property_name="weight", ascending=False, compatible_filters=[boolean_filter], filter_on='Edges') \ No newline at end of file diff --git a/netbone/structural/pmfg.py b/netbone/structural/pmfg.py new file mode 100644 index 0000000000000000000000000000000000000000..191fe4452be68b45ce322c3ad1a4aa227a9585c3 --- /dev/null +++ b/netbone/structural/pmfg.py @@ -0,0 +1,32 @@ +import networkx as nx +from netbone.filters import boolean_filter +from netbone.backbone import Backbone +from pandas import DataFrame +from networkx import Graph +from netbone.utils.utils import edge_properties + +def pmfg(data): + if isinstance(data, DataFrame): + table = data.copy() + elif isinstance(data, Graph): + table = nx.to_pandas_edgelist(data) + else: + print("data should be a panads dataframe or nx graph") + return + + g = nx.from_pandas_edgelist(table, edge_attr=edge_properties(table)) + nx.set_edge_attributes(g, False, name='in_backbone') + + backbone = nx.Graph() + table = table.sort_values(by='weight', ascending=False) + + for row in table.itertuples(): + backbone.add_edge(row.source, row.target) + if not nx.is_planar(backbone): + backbone.remove_edge(row.source, row.target) + else: + g[row.source][row.target]['in_backbone'] = True + if len(backbone.edges()) == 3*(len(g)-2): + break + + return Backbone(g, method_name="Planar Maximally Filtered Graph", property_name="weight", ascending=False, compatible_filters=[boolean_filter], filter_on='Edges') \ No newline at end of file diff --git a/netbone/visualize.py b/netbone/visualize.py index 58d5aa280dee931de47e9e3a8879d160fb36497c..d751ecae21f8f1bc7aa41121b7235d9a1e16ae4c 100644 --- a/netbone/visualize.py +++ b/netbone/visualize.py @@ -238,7 +238,7 @@ def plot_distribution(dist, title): axs.spines['left'].set_color('0.3') - axs.set_xlabel(df[method].index.name) + axs.set_xlabel(prop) axs.set_ylabel('P') axs.legend(loc='center left', bbox_to_anchor=(1.04,0.5))