From c6721735124041debb8374a39ff523e0db7f3e89 Mon Sep 17 00:00:00 2001 From: Yassin <aliyassin4@hotmail.com> Date: Mon, 26 Jun 2023 10:30:48 +0200 Subject: [PATCH] Adding new methods and the consensual backbone extraction --- .gitignore | 5 +- netbone/__init__.py | 11 +- netbone/compare.py | 115 +++++++++++++++++- netbone/hybrid/__init__.py | 0 netbone/{statistical => hybrid}/glanb.py | 0 netbone/measures.py | 6 +- netbone/structural/betweenness.py | 24 ++++ netbone/structural/degree.py | 21 ++++ .../global_threshold.py | 3 +- netbone/structural/gspar.py | 29 +++++ netbone/structural/maximum_spanning_tree.py | 20 ++- netbone/structural/mlam.py | 72 +++++++++++ netbone/structural/plam.py | 32 +++++ netbone/structural/pmfg.py | 32 +++++ netbone/visualize.py | 2 +- 15 files changed, 347 insertions(+), 25 deletions(-) create mode 100644 netbone/hybrid/__init__.py rename netbone/{statistical => hybrid}/glanb.py (100%) create mode 100644 netbone/structural/betweenness.py create mode 100644 netbone/structural/degree.py rename netbone/{statistical => structural}/global_threshold.py (92%) create mode 100644 netbone/structural/gspar.py create mode 100644 netbone/structural/mlam.py create mode 100644 netbone/structural/plam.py create mode 100644 netbone/structural/pmfg.py diff --git a/.gitignore b/.gitignore index fb14214..b16ebdd 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,7 @@ setup.cfg dist/* samples/* NetBone.egg-info/* -/.idea/* \ No newline at end of file +/.idea/* +examples/* +build/* +netbone.iml \ No newline at end of file diff --git a/netbone/__init__.py b/netbone/__init__.py index f5e20e1..2bd9d8a 100644 --- a/netbone/__init__.py +++ b/netbone/__init__.py @@ -16,9 +16,18 @@ from netbone.statistical.marginal_likelihood import MLF from netbone.statistical.lans import lans from netbone.structural.ultrametric_distance_backbone import ultrametric_distance_backbone from netbone.structural.metric_distance_backbone import metric_distance_backbone -from netbone.statistical.global_threshold import global_threshold +from netbone.structural.global_threshold import global_threshold from netbone.structural.modulairy_backbone import modularity_backbone from netbone.structural.maximum_spanning_tree import maximum_spanning_tree +from netbone.hybrid.glanb import glanb +from netbone.structural.pmfg import pmfg +from netbone.structural.plam import plam +from netbone.structural.mlam import mlam +from netbone.structural.gspar import gspar +from netbone.structural.degree import degree +from netbone.structural.betweenness import betweenness +# from netbone.statistical.correlation_and_statistic import correlation_and_statistic + from netbone.filters import threshold_filter, fraction_filter from netbone import compare from netbone import filters diff --git a/netbone/compare.py b/netbone/compare.py index 16c40a9..bf04168 100644 --- a/netbone/compare.py +++ b/netbone/compare.py @@ -90,29 +90,134 @@ class Compare: props_res[res].index.name = self.value_name return props_res - def distribution_ks_statistic(self, increasing=True): + def distribution_ks_statistic(self, increasing=True, consent=False): if self.filter == boolean_filter: self.filter_values = [0] * len(self.backbones) if self.filter_values == []: raise Exception('Please enter the filter values.') + cons = [] + if consent == False: + for backbon in self.backbones: + cons.append(False) + consent = cons dist = dict() - ks_statistics = pd.DataFrame(index=[backbone.method_name for backbone in self.backbones]) - + if True in consent: + ks_statistics = pd.DataFrame(index=[backbone.method_name for backbone in self.backbones] + ['Consensual Backbone']) + else: + ks_statistics = pd.DataFrame(index=[backbone.method_name for backbone in self.backbones]) for property in self.props: dist_values = dict() vals = [] values0 = self.props[property](self.network) dist_values['Original'] = cumulative_dist(property, 'Original', values0, increasing) + if True in consent: + consensual_backbone = '' + nodes_labels = dict(zip(self.network.nodes(), nx.convert_node_labels_to_integers(self.network.copy()).nodes())) + inverse_nodes_labels = dict(zip(nx.convert_node_labels_to_integers(self.network.copy()).nodes(), self.network.nodes())) + + for i, backbone in enumerate(self.backbones): extracted_backbone = self.filter(backbone, value=self.filter_values[i], narrate=False) + if consent[i]: + if consensual_backbone == '': + consensual_backbone = nx.relabel_nodes(extracted_backbone, nodes_labels) + else: + extracted_backbone = nx.relabel_nodes(extracted_backbone, nodes_labels) + old_consensual = consensual_backbone.copy() + consensual_backbone.remove_nodes_from(n for n in old_consensual if n not in extracted_backbone) + consensual_backbone.remove_edges_from(e for e in old_consensual.edges if e not in extracted_backbone.edges) + values1 = self.props[property](extracted_backbone) - dist_values[backbone.method_name] = cumulative_dist(property, extracted_backbone.method_name, values1, increasing) + dist_values[backbone.method_name] = cumulative_dist(property, backbone.method_name, values1, increasing) + vals.append(kstest(values0, values1)[0]) + if consent[i]: + consensual_backbone.remove_nodes_from(list(nx.isolates(consensual_backbone))) + consensual_backbone = nx.relabel_nodes(consensual_backbone, inverse_nodes_labels) + values1 = self.props[property](consensual_backbone) + dist_values['Consensual Backbone'] = cumulative_dist(property, 'Consensual Backbone', values1, increasing) vals.append(kstest(values0, values1)[0]) # ks_statistics = pd.DataFrame(index=['Original'] + [backbone.name for backbone in self.backbones]) dist[property] = dist_values ks_statistics[property] = vals - return ks_statistics, dist + if True in consent: + return ks_statistics, dist, consensual_backbone + else: + return ks_statistics, dist + + # def distribution_ks_statistic(self, increasing=True, consent=True): + # if self.filter == boolean_filter: + # self.filter_values = [0] * len(self.backbones) + # if self.filter_values == []: + # raise Exception('Please enter the filter values.') + # + # dist = dict() + # if consent: + # ks_statistics = pd.DataFrame(index=[backbone.method_name for backbone in self.backbones] + ['Consensual Backbone']) + # else: + # ks_statistics = pd.DataFrame(index=[backbone.method_name for backbone in self.backbones]) + # for property in self.props: + # dist_values = dict() + # vals = [] + # values0 = self.props[property](self.network) + # dist_values['Original'] = cumulative_dist(property, 'Original', values0, increasing) + # + # if consent: + # consensual_backbone = '' + # nodes_labels = dict(zip(self.network.nodes(), nx.convert_node_labels_to_integers(self.network.copy()).nodes())) + # inverse_nodes_labels = dict(zip(nx.convert_node_labels_to_integers(self.network.copy()).nodes(), self.network.nodes())) + # + # + # for i, backbone in enumerate(self.backbones): + # extracted_backbone = self.filter(backbone, value=self.filter_values[i], narrate=False) + # if consent: + # if i==0: + # consensual_backbone = nx.relabel_nodes(extracted_backbone, nodes_labels) + # else: + # extracted_backbone = nx.relabel_nodes(extracted_backbone, nodes_labels) + # old_consensual = consensual_backbone.copy() + # consensual_backbone.remove_nodes_from(n for n in old_consensual if n not in extracted_backbone) + # consensual_backbone.remove_edges_from(e for e in old_consensual.edges if e not in extracted_backbone.edges) + # + # values1 = self.props[property](extracted_backbone) + # dist_values[backbone.method_name] = cumulative_dist(property, backbone.method_name, values1, increasing) + # vals.append(kstest(values0, values1)[0]) + # if consent: + # consensual_backbone = nx.relabel_nodes(consensual_backbone, inverse_nodes_labels) + # values1 = self.props[property](consensual_backbone) + # dist_values['Consensual Backbone'] = cumulative_dist(property, 'Consensual Backbone', values1, increasing) + # vals.append(kstest(values0, values1)[0]) + # + # # ks_statistics = pd.DataFrame(index=['Original'] + [backbone.name for backbone in self.backbones]) + # dist[property] = dist_values + # ks_statistics[property] = vals + # + # if consent: + # return ks_statistics, dist, consensual_backbone + # else: + # return ks_statistics, dist + + def consent(self): + if self.filter == boolean_filter: + self.filter_values = [0] * len(self.backbones) + if self.filter_values == []: + raise Exception('Please enter the filter values.') + + nodes_labels = dict(zip(self.network.nodes(), nx.convert_node_labels_to_integers(self.network.copy()).nodes())) + inverse_nodes_labels = dict(zip(nx.convert_node_labels_to_integers(self.network.copy()).nodes(), self.network.nodes())) + consensual_backbone = '' + for i, backbone in enumerate(self.backbones): + extracted_backbone = self.filter(backbone, value=self.filter_values[i], narrate=False) + if i==0: + consensual_backbone = nx.relabel_nodes(extracted_backbone, nodes_labels) + else: + extracted_backbone = nx.relabel_nodes(extracted_backbone, nodes_labels) + old_consensual = consensual_backbone.copy() + consensual_backbone.remove_nodes_from(n for n in old_consensual if n not in extracted_backbone) + consensual_backbone.remove_edges_from(e for e in old_consensual.edges if e not in extracted_backbone.edges) + + consensual_backbone.remove_nodes_from(list(nx.isolates(consensual_backbone))) + return nx.relabel_nodes(consensual_backbone, inverse_nodes_labels) diff --git a/netbone/hybrid/__init__.py b/netbone/hybrid/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/netbone/statistical/glanb.py b/netbone/hybrid/glanb.py similarity index 100% rename from netbone/statistical/glanb.py rename to netbone/hybrid/glanb.py diff --git a/netbone/measures.py b/netbone/measures.py index e97bfc1..55ca634 100644 --- a/netbone/measures.py +++ b/netbone/measures.py @@ -36,7 +36,7 @@ def reachability(original, G): return r/(len(G)*(len(G) - 1)) def number_connected_components(original, G): - nx.number_connected_components(G) + return nx.number_connected_components(G) def diameter(original, G): return ig.Graph.from_networkx(lcc(G)).diameter(directed=False, unconn=True) @@ -53,8 +53,8 @@ def lcc_weight_fraction(original, G): def weights(G): return list(nx.get_edge_attributes(G, 'weight').values()) -def degrees(G): - return list(dict(G.degree()).values()) +def degrees(G, weight=None): + return list(dict(G.degree(weight=weight)).values()) def average_clustering_coefficient(original, G): node_clustering = ig.Graph.from_networkx(G).transitivity_local_undirected(mode="nan") diff --git a/netbone/structural/betweenness.py b/netbone/structural/betweenness.py new file mode 100644 index 0000000..7c2d09c --- /dev/null +++ b/netbone/structural/betweenness.py @@ -0,0 +1,24 @@ +import networkx as nx +from netbone.filters import threshold_filter, fraction_filter +from netbone.backbone import Backbone +from pandas import DataFrame +from netbone.utils.utils import edge_properties + +def betweenness(data, weighted=True, normalized=True): + if isinstance(data, DataFrame): + g = nx.from_pandas_edgelist(data, edge_attr=edge_properties(data)) + elif isinstance(data, nx.Graph): + g = data.copy() + else: + print("data should be a panads dataframe or nx graph") + return + + + if weighted: + nx.set_edge_attributes(g, nx.edge_betweenness_centrality(g, normalized=normalized, weight='weight', seed=100), name='weighted-betweenness') + return Backbone(g, method_name="Weighted Betweenness", property_name="weighted-betweenness", ascending=False, compatible_filters=[threshold_filter, fraction_filter], filter_on='Edges') + else: + nx.set_edge_attributes(g, nx.edge_betweenness_centrality(g, normalized=normalized, seed=100), name='betweenness') + return Backbone(g, method_name="Betweenness", property_name="betweenness", ascending=False, compatible_filters=[threshold_filter, fraction_filter], filter_on='Edges') + + diff --git a/netbone/structural/degree.py b/netbone/structural/degree.py new file mode 100644 index 0000000..a738cfc --- /dev/null +++ b/netbone/structural/degree.py @@ -0,0 +1,21 @@ +import networkx as nx +from netbone.filters import threshold_filter, fraction_filter +from netbone.backbone import Backbone +from pandas import DataFrame +from netbone.utils.utils import edge_properties + +def degree(data, weighted=False): + if isinstance(data, DataFrame): + g = nx.from_pandas_edgelist(data, edge_attr=edge_properties(data)) + elif isinstance(data, nx.Graph): + g = data.copy() + else: + print("data should be a panads dataframe or nx graph") + return + + if weighted: + nx.set_node_attributes(g,dict(g.degree(weight='weight')), name='weighted-degree') + return Backbone(g, method_name="Weighted Degree", property_name="weighted-degree", ascending=False, compatible_filters=[threshold_filter, fraction_filter], filter_on='Nodes') + else: + nx.set_node_attributes(g,dict(g.degree()), name='degree') + return Backbone(g, method_name="Degree", property_name="degree", ascending=False, compatible_filters=[threshold_filter, fraction_filter], filter_on='Nodes') \ No newline at end of file diff --git a/netbone/statistical/global_threshold.py b/netbone/structural/global_threshold.py similarity index 92% rename from netbone/statistical/global_threshold.py rename to netbone/structural/global_threshold.py index b282b2d..bf3da7e 100644 --- a/netbone/statistical/global_threshold.py +++ b/netbone/structural/global_threshold.py @@ -9,8 +9,7 @@ def global_threshold(data): if isinstance(data, DataFrame): table = data.copy() elif isinstance(data, Graph): - table = to_pandas_edgelist(data) - is_graph=True + table = nx.to_pandas_edgelist(data) else: print("data should be a panads dataframe or nx graph") return diff --git a/netbone/structural/gspar.py b/netbone/structural/gspar.py new file mode 100644 index 0000000..597da7d --- /dev/null +++ b/netbone/structural/gspar.py @@ -0,0 +1,29 @@ +import networkx as nx +from netbone.filters import threshold_filter, fraction_filter +from netbone.backbone import Backbone +from pandas import DataFrame +from netbone.utils.utils import edge_properties + +def jaccard(a, b): + # convert to set + a = set(a) + b = set(b) + # calucate jaccard similarity + return float(len(a.intersection(b))) / len(a.union(b)) + +def get_neighbours(graph, node): + return list(dict(graph[node]).keys()) + [node] + +def gspar(data): + if isinstance(data, DataFrame): + g = nx.from_pandas_edgelist(data, edge_attr=edge_properties(data)) + elif isinstance(data, nx.Graph): + g = data.copy() + else: + print("data should be a panads dataframe or nx graph") + return + + for u, v in g.edges(): + g[u][v]['jaccard-sim'] = jaccard(get_neighbours(g, u), get_neighbours(g, v)) + + return Backbone(g, method_name="Global Sparsification", property_name="jaccard-sim", ascending=False, compatible_filters=[threshold_filter, fraction_filter], filter_on='Edges') \ No newline at end of file diff --git a/netbone/structural/maximum_spanning_tree.py b/netbone/structural/maximum_spanning_tree.py index 3d68beb..993e998 100644 --- a/netbone/structural/maximum_spanning_tree.py +++ b/netbone/structural/maximum_spanning_tree.py @@ -2,30 +2,26 @@ import networkx as nx import pandas as pd from netbone.backbone import Backbone from netbone.filters import boolean_filter +from netbone.utils.utils import edge_properties # algo: minimum_spanning_tree # calculating MSP def maximum_spanning_tree(data): if isinstance(data, pd.DataFrame): - G = nx.from_pandas_edgelist(data, edge_attr='weight', create_using=nx.Graph()) + g = nx.from_pandas_edgelist(data, edge_attr=edge_properties(data)) elif isinstance(data, nx.Graph): - G = data.copy() + g = data.copy() else: print("data should be a panads dataframe or nx graph") return + nx.set_edge_attributes(g, True, name='in_backbone') + msp = nx.maximum_spanning_tree(g, weight='weight') - df = nx.to_pandas_edgelist((G)) - df['distance'] = df.apply(lambda row : 1/row['weight'], axis = 1) + missing_edges = {edge: {"in_backbone": False} for edge in set(g.edges()).difference(set(msp.edges()))} + nx.set_edge_attributes(g, missing_edges) - nx.set_edge_attributes(G, nx.get_edge_attributes(nx.from_pandas_edgelist(df, edge_attr='distance'), 'distance'), name='distance') - msp = nx.minimum_spanning_tree(G, weight='distance') - nx.set_edge_attributes(G, True, name='in_backbone') - - missing_edges = {edge: {"in_backbone": False} for edge in set(G.edges()).difference(set(msp.edges()))} - nx.set_edge_attributes(G, missing_edges) - - return Backbone(G, method_name="Maximum Spanning Tree", property_name="distance", ascending=False, compatible_filters=[boolean_filter], filter_on='Edges') + return Backbone(g, method_name="Maximum Spanning Tree", property_name="weight", ascending=False, compatible_filters=[boolean_filter], filter_on='Edges') diff --git a/netbone/structural/mlam.py b/netbone/structural/mlam.py new file mode 100644 index 0000000..051961d --- /dev/null +++ b/netbone/structural/mlam.py @@ -0,0 +1,72 @@ +import numpy as np +import networkx as nx +from netbone.filters import boolean_filter +from netbone.backbone import Backbone +from pandas import DataFrame +from netbone.utils.utils import edge_properties +from math import isnan +def get_neighbor_weights(graph, node): + # Get the neighbors and weights of the given node from the graph + neighbors = graph[node].keys() + weights = [graph[node][neighbor]['weight'] for neighbor in neighbors] + + # Calculate the total weight + total_weight = sum(weights) + + # Normalize the weights + normalized_weights = [weight / total_weight * 100 for weight in weights] + + # Sort the neighbors based on the normalized weights in descending order + sorted_neighbors = sorted(zip(neighbors, normalized_weights), key=lambda x: x[1], reverse=True) + + return dict(sorted_neighbors) + +def get_ideal_distribution(i, total): + array = [0] * total # initialize the array with zeros + percentage = 100 / (i + 1) # calculate the percentage value for the current loop + for j in range(i + 1): + array[j] = percentage # format the percentage value with two decimal places + return array + +def compute_cod(f, y): + corr_matrix = np.corrcoef(f,y) + corr = corr_matrix[0,1] + return round(corr**2, 2) + + +def mlam(data): + if isinstance(data, DataFrame): + g = nx.from_pandas_edgelist(data, edge_attr=edge_properties(data)) + elif isinstance(data, nx.Graph): + g = data.copy() + else: + print("data should be a panads dataframe or nx graph") + return + + nx.set_edge_attributes(g, False, name='in_backbone') + for node in g.nodes(): + edge_index = 0 + neighbors_weights = get_neighbor_weights(g, node) + real_distribution = list(neighbors_weights.values()) + neighbors_count = len(neighbors_weights) + old_cod = 0 + if neighbors_count != 1: + for i in range(neighbors_count): + new_cod = compute_cod(real_distribution, get_ideal_distribution(i, neighbors_count)) + if isnan(new_cod): + break + if old_cod <= new_cod: + old_cod = new_cod + edge_index = i + else: + break + if i == neighbors_count-1: + edge_index = i + + for j, neighbour in enumerate(neighbors_weights.keys()): + if j>edge_index: + break + g[node][neighbour]['in_backbone'] = True + + + return Backbone(g, method_name="Multiple Linkage Analysis", property_name="weight", ascending=False, compatible_filters=[boolean_filter], filter_on='Edges') \ No newline at end of file diff --git a/netbone/structural/plam.py b/netbone/structural/plam.py new file mode 100644 index 0000000..64a5297 --- /dev/null +++ b/netbone/structural/plam.py @@ -0,0 +1,32 @@ +import networkx as nx +from netbone.filters import boolean_filter +from netbone.backbone import Backbone +from pandas import DataFrame +from netbone.utils.utils import edge_properties + +def get_max_weight_edge(graph, node): + neighbors = graph.neighbors(node) + max_weight = float('-inf') + max_edge = None + for neighbor in neighbors: + weight = graph[node][neighbor]['weight'] + if weight > max_weight: + max_weight = weight + max_edge = (node, neighbor) + return max_edge[0], max_edge[1], max_weight + +def plam(data): + if isinstance(data, DataFrame): + g = nx.from_pandas_edgelist(data, edge_attr=edge_properties(data)) + elif isinstance(data, nx.Graph): + g = data.copy() + else: + print("data should be a panads dataframe or nx graph") + return + + nx.set_edge_attributes(g, False, name='in_backbone') + for node in g.nodes(): + source, target, weight = get_max_weight_edge(g, node) + g[source][target]['in_backbone'] = True + + return Backbone(g, method_name="Primary Linkage Analysis", property_name="weight", ascending=False, compatible_filters=[boolean_filter], filter_on='Edges') \ No newline at end of file diff --git a/netbone/structural/pmfg.py b/netbone/structural/pmfg.py new file mode 100644 index 0000000..191fe44 --- /dev/null +++ b/netbone/structural/pmfg.py @@ -0,0 +1,32 @@ +import networkx as nx +from netbone.filters import boolean_filter +from netbone.backbone import Backbone +from pandas import DataFrame +from networkx import Graph +from netbone.utils.utils import edge_properties + +def pmfg(data): + if isinstance(data, DataFrame): + table = data.copy() + elif isinstance(data, Graph): + table = nx.to_pandas_edgelist(data) + else: + print("data should be a panads dataframe or nx graph") + return + + g = nx.from_pandas_edgelist(table, edge_attr=edge_properties(table)) + nx.set_edge_attributes(g, False, name='in_backbone') + + backbone = nx.Graph() + table = table.sort_values(by='weight', ascending=False) + + for row in table.itertuples(): + backbone.add_edge(row.source, row.target) + if not nx.is_planar(backbone): + backbone.remove_edge(row.source, row.target) + else: + g[row.source][row.target]['in_backbone'] = True + if len(backbone.edges()) == 3*(len(g)-2): + break + + return Backbone(g, method_name="Planar Maximally Filtered Graph", property_name="weight", ascending=False, compatible_filters=[boolean_filter], filter_on='Edges') \ No newline at end of file diff --git a/netbone/visualize.py b/netbone/visualize.py index 58d5aa2..d751eca 100644 --- a/netbone/visualize.py +++ b/netbone/visualize.py @@ -238,7 +238,7 @@ def plot_distribution(dist, title): axs.spines['left'].set_color('0.3') - axs.set_xlabel(df[method].index.name) + axs.set_xlabel(prop) axs.set_ylabel('P') axs.legend(loc='center left', bbox_to_anchor=(1.04,0.5)) -- GitLab