Adding new methods and the consensual backbone extraction

c6721735 · Yassin · e69d4bc6 · c6721735 · c6721735 · c6721735
Commit c6721735 authored 2 years ago by Yassin
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,7 @@ setup.cfg
 dist/*
 samples/*
 NetBone.egg-info/*
 /.idea/*
\ No newline at end of file
+examples/*
+build/*
+netbone.iml
\ No newline at end of file
--- a/netbone/__init__.py
+++ b/netbone/__init__.py
@@ -16,9 +16,18 @@ from netbone.statistical.marginal_likelihood import MLF
 from netbone.statistical.lans import lans
 from netbone.structural.ultrametric_distance_backbone import ultrametric_distance_backbone
 from netbone.structural.metric_distance_backbone import metric_distance_backbone
-from netbone.statistical.global_threshold import global_threshold
+from netbone.structural.global_threshold import global_threshold
 from netbone.structural.modulairy_backbone import modularity_backbone
 from netbone.structural.maximum_spanning_tree import maximum_spanning_tree
+from netbone.hybrid.glanb import glanb
+from netbone.structural.pmfg import pmfg
+from netbone.structural.plam import plam
+from netbone.structural.mlam import mlam
+from netbone.structural.gspar import gspar
+from netbone.structural.degree import degree
+from netbone.structural.betweenness import betweenness
+# from netbone.statistical.correlation_and_statistic import correlation_and_statistic
 from netbone.filters import threshold_filter, fraction_filter
 from netbone import compare
 from netbone import filters

--- a/netbone/compare.py
+++ b/netbone/compare.py
@@ -90,29 +90,134 @@ class Compare:
            props_res[res].index.name = self.value_name
        return props_res
-    def distribution_ks_statistic(self, increasing=True):
+    def distribution_ks_statistic(self, increasing=True, consent=False):
        if self.filter == boolean_filter:
            self.filter_values = [0] * len(self.backbones)
        if self.filter_values == []:
            raise Exception('Please enter the filter values.')
+        cons = []
+        if consent == False:
+            for backbon in self.backbones:
+                cons.append(False)
+            consent = cons
        dist = dict()
-        ks_statistics = pd.DataFrame(index=[backbone.method_name for backbone in self.backbones])
+        if True in consent:
+            ks_statistics = pd.DataFrame(index=[backbone.method_name for backbone in self.backbones] + ['Consensual Backbone'])
+        else:
+            ks_statistics = pd.DataFrame(index=[backbone.method_name for backbone in self.backbones])
        for property in self.props:
            dist_values = dict()
            vals = []
            values0 = self.props[property](self.network)
            dist_values['Original'] = cumulative_dist(property, 'Original', values0, increasing)
+            if True in consent:
+                consensual_backbone = ''
+                nodes_labels = dict(zip(self.network.nodes(), nx.convert_node_labels_to_integers(self.network.copy()).nodes()))
+                inverse_nodes_labels = dict(zip(nx.convert_node_labels_to_integers(self.network.copy()).nodes(), self.network.nodes()))
            for i, backbone in enumerate(self.backbones):
                extracted_backbone = self.filter(backbone, value=self.filter_values[i], narrate=False)
+                if consent[i]:
+                    if consensual_backbone == '':
+                        consensual_backbone = nx.relabel_nodes(extracted_backbone, nodes_labels)
+                    else:
+                        extracted_backbone = nx.relabel_nodes(extracted_backbone, nodes_labels)
+                        old_consensual = consensual_backbone.copy()
+                        consensual_backbone.remove_nodes_from(n for n in old_consensual if n not in extracted_backbone)
+                        consensual_backbone.remove_edges_from(e for e in old_consensual.edges if e not in extracted_backbone.edges)
                values1 = self.props[property](extracted_backbone)
-                dist_values[backbone.method_name] = cumulative_dist(property, extracted_backbone.method_name, values1, increasing)
+                dist_values[backbone.method_name] = cumulative_dist(property, backbone.method_name, values1, increasing)
+                vals.append(kstest(values0, values1)[0])
+            if consent[i]:
+                consensual_backbone.remove_nodes_from(list(nx.isolates(consensual_backbone)))
+                consensual_backbone = nx.relabel_nodes(consensual_backbone, inverse_nodes_labels)
+                values1 = self.props[property](consensual_backbone)
+                dist_values['Consensual Backbone'] = cumulative_dist(property, 'Consensual Backbone', values1, increasing)
                vals.append(kstest(values0, values1)[0])
            # ks_statistics = pd.DataFrame(index=['Original'] + [backbone.name for backbone in self.backbones])
            dist[property] = dist_values
            ks_statistics[property] = vals
-        return ks_statistics, dist
+        if True in consent:
+            return ks_statistics, dist, consensual_backbone
+        else:
+            return ks_statistics, dist
+    # def distribution_ks_statistic(self, increasing=True, consent=True):
+    #     if self.filter == boolean_filter:
+    #         self.filter_values = [0] * len(self.backbones)
+    #     if self.filter_values == []:
+    #         raise Exception('Please enter the filter values.')
+    #
+    #     dist = dict()
+    #     if consent:
+    #         ks_statistics = pd.DataFrame(index=[backbone.method_name for backbone in self.backbones] + ['Consensual Backbone'])
+    #     else:
+    #         ks_statistics = pd.DataFrame(index=[backbone.method_name for backbone in self.backbones])
+    #     for property in self.props:
+    #         dist_values = dict()
+    #         vals = []
+    #         values0 = self.props[property](self.network)
+    #         dist_values['Original'] = cumulative_dist(property, 'Original', values0, increasing)
+    #
+    #         if consent:
+    #             consensual_backbone = ''
+    #             nodes_labels = dict(zip(self.network.nodes(), nx.convert_node_labels_to_integers(self.network.copy()).nodes()))
+    #             inverse_nodes_labels = dict(zip(nx.convert_node_labels_to_integers(self.network.copy()).nodes(), self.network.nodes()))
+    #
+    #
+    #         for i, backbone in enumerate(self.backbones):
+    #             extracted_backbone = self.filter(backbone, value=self.filter_values[i], narrate=False)
+    #             if consent:
+    #                 if i==0:
+    #                     consensual_backbone = nx.relabel_nodes(extracted_backbone, nodes_labels)
+    #                 else:
+    #                     extracted_backbone = nx.relabel_nodes(extracted_backbone, nodes_labels)
+    #                     old_consensual = consensual_backbone.copy()
+    #                     consensual_backbone.remove_nodes_from(n for n in old_consensual if n not in extracted_backbone)
+    #                     consensual_backbone.remove_edges_from(e for e in old_consensual.edges if e not in extracted_backbone.edges)
+    #
+    #             values1 = self.props[property](extracted_backbone)
+    #             dist_values[backbone.method_name] = cumulative_dist(property, backbone.method_name, values1, increasing)
+    #             vals.append(kstest(values0, values1)[0])
+    #         if consent:
+    #             consensual_backbone = nx.relabel_nodes(consensual_backbone, inverse_nodes_labels)
+    #             values1 = self.props[property](consensual_backbone)
+    #             dist_values['Consensual Backbone'] = cumulative_dist(property, 'Consensual Backbone', values1, increasing)
+    #             vals.append(kstest(values0, values1)[0])
+    #
+    #         # ks_statistics = pd.DataFrame(index=['Original'] + [backbone.name for backbone in self.backbones])
+    #         dist[property] = dist_values
+    #         ks_statistics[property] = vals
+    #
+    #     if consent:
+    #         return ks_statistics, dist, consensual_backbone
+    #     else:
+    #         return ks_statistics, dist
+    def consent(self):
+        if self.filter == boolean_filter:
+            self.filter_values = [0] * len(self.backbones)
+        if self.filter_values == []:
+            raise Exception('Please enter the filter values.')
+        nodes_labels = dict(zip(self.network.nodes(), nx.convert_node_labels_to_integers(self.network.copy()).nodes()))
+        inverse_nodes_labels = dict(zip(nx.convert_node_labels_to_integers(self.network.copy()).nodes(), self.network.nodes()))
+        consensual_backbone = ''
+        for i, backbone in enumerate(self.backbones):
+            extracted_backbone = self.filter(backbone, value=self.filter_values[i], narrate=False)
+            if i==0:
+                consensual_backbone = nx.relabel_nodes(extracted_backbone, nodes_labels)
+            else:
+                extracted_backbone = nx.relabel_nodes(extracted_backbone, nodes_labels)
+                old_consensual = consensual_backbone.copy()
+                consensual_backbone.remove_nodes_from(n for n in old_consensual if n not in extracted_backbone)
+                consensual_backbone.remove_edges_from(e for e in old_consensual.edges if e not in extracted_backbone.edges)
+        consensual_backbone.remove_nodes_from(list(nx.isolates(consensual_backbone)))
+        return nx.relabel_nodes(consensual_backbone, inverse_nodes_labels)
--- a/netbone/hybrid/__init__.py
+++ b/netbone/hybrid/__init__.py
--- a/netbone/statistical/glanb.py
+++ b/netbone/statistical/glanb.py
--- a/netbone/measures.py
+++ b/netbone/measures.py
@@ -36,7 +36,7 @@ def reachability(original, G):
    return r/(len(G)*(len(G) - 1))
 def number_connected_components(original, G):
-    nx.number_connected_components(G)
+    return nx.number_connected_components(G)
 def diameter(original, G):
    return ig.Graph.from_networkx(lcc(G)).diameter(directed=False, unconn=True)
@@ -53,8 +53,8 @@ def lcc_weight_fraction(original, G):
 def weights(G):
    return list(nx.get_edge_attributes(G, 'weight').values())
-def degrees(G):
+def degrees(G, weight=None):
-    return list(dict(G.degree()).values())
+    return list(dict(G.degree(weight=weight)).values())
 def average_clustering_coefficient(original, G):
    node_clustering = ig.Graph.from_networkx(G).transitivity_local_undirected(mode="nan")

--- a/netbone/structural/betweenness.py
+++ b/netbone/structural/betweenness.py
+import networkx as nx
+from netbone.filters import threshold_filter, fraction_filter
+from netbone.backbone import Backbone
+from pandas import DataFrame
+from netbone.utils.utils import edge_properties
+def betweenness(data, weighted=True, normalized=True):
+    if isinstance(data, DataFrame):
+        g = nx.from_pandas_edgelist(data, edge_attr=edge_properties(data))
+    elif isinstance(data, nx.Graph):
+        g = data.copy()
+    else:
+        print("data should be a panads dataframe or nx graph")
+        return
+    if weighted:
+        nx.set_edge_attributes(g, nx.edge_betweenness_centrality(g, normalized=normalized, weight='weight', seed=100), name='weighted-betweenness')
+        return Backbone(g, method_name="Weighted Betweenness", property_name="weighted-betweenness", ascending=False, compatible_filters=[threshold_filter, fraction_filter], filter_on='Edges')
+    else:
+        nx.set_edge_attributes(g, nx.edge_betweenness_centrality(g, normalized=normalized, seed=100), name='betweenness')
+        return Backbone(g, method_name="Betweenness", property_name="betweenness", ascending=False, compatible_filters=[threshold_filter, fraction_filter], filter_on='Edges')
--- a/netbone/structural/degree.py
+++ b/netbone/structural/degree.py
+import networkx as nx
+from netbone.filters import threshold_filter, fraction_filter
+from netbone.backbone import Backbone
+from pandas import DataFrame
+from netbone.utils.utils import edge_properties
+def degree(data, weighted=False):
+    if isinstance(data, DataFrame):
+        g = nx.from_pandas_edgelist(data, edge_attr=edge_properties(data))
+    elif isinstance(data, nx.Graph):
+        g = data.copy()
+    else:
+        print("data should be a panads dataframe or nx graph")
+        return
+    if weighted:
+        nx.set_node_attributes(g,dict(g.degree(weight='weight')), name='weighted-degree')
+        return Backbone(g, method_name="Weighted Degree", property_name="weighted-degree", ascending=False, compatible_filters=[threshold_filter, fraction_filter], filter_on='Nodes')
+    else:
+        nx.set_node_attributes(g,dict(g.degree()), name='degree')
+        return Backbone(g, method_name="Degree", property_name="degree", ascending=False, compatible_filters=[threshold_filter, fraction_filter], filter_on='Nodes')
\ No newline at end of file
--- a/netbone/statistical/global_threshold.py
+++ b/netbone/statistical/global_threshold.py
@@ -9,8 +9,7 @@ def global_threshold(data):
    if isinstance(data, DataFrame):
        table = data.copy()
    elif isinstance(data, Graph):
-        table = to_pandas_edgelist(data)
+        table = nx.to_pandas_edgelist(data)
-        is_graph=True
    else:
        print("data should be a panads dataframe or nx graph")
        return

--- a/netbone/structural/gspar.py
+++ b/netbone/structural/gspar.py
+import networkx as nx
+from netbone.filters import threshold_filter, fraction_filter
+from netbone.backbone import Backbone
+from pandas import DataFrame
+from netbone.utils.utils import edge_properties
+def jaccard(a, b):
+    # convert to set
+    a = set(a)
+    b = set(b)
+    # calucate jaccard similarity
+    return float(len(a.intersection(b))) / len(a.union(b))
+def get_neighbours(graph, node):
+    return list(dict(graph[node]).keys()) + [node]
+def gspar(data):
+    if isinstance(data, DataFrame):
+        g = nx.from_pandas_edgelist(data, edge_attr=edge_properties(data))
+    elif isinstance(data, nx.Graph):
+        g = data.copy()
+    else:
+        print("data should be a panads dataframe or nx graph")
+        return
+    for u, v in g.edges():
+        g[u][v]['jaccard-sim'] = jaccard(get_neighbours(g, u), get_neighbours(g, v))
+    return Backbone(g, method_name="Global Sparsification", property_name="jaccard-sim", ascending=False, compatible_filters=[threshold_filter, fraction_filter], filter_on='Edges')
\ No newline at end of file
--- a/netbone/structural/maximum_spanning_tree.py
+++ b/netbone/structural/maximum_spanning_tree.py
@@ -2,30 +2,26 @@ import networkx as nx
 import pandas as pd
 from netbone.backbone import Backbone
 from netbone.filters import boolean_filter
+from netbone.utils.utils import edge_properties
 # algo: minimum_spanning_tree
 # calculating MSP
 def maximum_spanning_tree(data):
    if isinstance(data, pd.DataFrame):
-        G = nx.from_pandas_edgelist(data, edge_attr='weight', create_using=nx.Graph())
+        g = nx.from_pandas_edgelist(data, edge_attr=edge_properties(data))
    elif isinstance(data, nx.Graph):
-        G = data.copy()
+        g = data.copy()
    else:
        print("data should be a panads dataframe or nx graph")
        return
+    nx.set_edge_attributes(g, True, name='in_backbone')
+    msp = nx.maximum_spanning_tree(g, weight='weight')
-    df = nx.to_pandas_edgelist((G))
+    missing_edges = {edge: {"in_backbone": False} for edge in set(g.edges()).difference(set(msp.edges()))}
-    df['distance'] = df.apply(lambda row : 1/row['weight'], axis = 1)
+    nx.set_edge_attributes(g, missing_edges)
-    nx.set_edge_attributes(G, nx.get_edge_attributes(nx.from_pandas_edgelist(df, edge_attr='distance'), 'distance'), name='distance')
+    return Backbone(g, method_name="Maximum Spanning Tree", property_name="weight", ascending=False, compatible_filters=[boolean_filter], filter_on='Edges')
-    msp = nx.minimum_spanning_tree(G, weight='distance')
-    nx.set_edge_attributes(G, True, name='in_backbone')
-    missing_edges = {edge: {"in_backbone": False} for edge in set(G.edges()).difference(set(msp.edges()))}
-    nx.set_edge_attributes(G, missing_edges)
-    return Backbone(G, method_name="Maximum Spanning Tree", property_name="distance", ascending=False, compatible_filters=[boolean_filter], filter_on='Edges')
--- a/netbone/structural/mlam.py
+++ b/netbone/structural/mlam.py
+import numpy as np
+import networkx as nx
+from netbone.filters import boolean_filter
+from netbone.backbone import Backbone
+from pandas import DataFrame
+from netbone.utils.utils import edge_properties
+from math import isnan
+def get_neighbor_weights(graph, node):
+    # Get the neighbors and weights of the given node from the graph
+    neighbors = graph[node].keys()
+    weights = [graph[node][neighbor]['weight'] for neighbor in neighbors]
+    # Calculate the total weight
+    total_weight = sum(weights)
+    # Normalize the weights
+    normalized_weights = [weight / total_weight * 100 for weight in weights]
+    # Sort the neighbors based on the normalized weights in descending order
+    sorted_neighbors = sorted(zip(neighbors, normalized_weights), key=lambda x: x[1], reverse=True)
+    return dict(sorted_neighbors)
+def get_ideal_distribution(i, total):
+    array = [0] * total  # initialize the array with zeros
+    percentage = 100 / (i + 1)  # calculate the percentage value for the current loop
+    for j in range(i + 1):
+        array[j] = percentage # format the percentage value with two decimal places
+    return array
+def compute_cod(f, y):
+    corr_matrix  = np.corrcoef(f,y)
+    corr = corr_matrix[0,1]
+    return round(corr**2, 2)
+def mlam(data):
+    if isinstance(data, DataFrame):
+        g = nx.from_pandas_edgelist(data, edge_attr=edge_properties(data))
+    elif isinstance(data, nx.Graph):
+        g = data.copy()
+    else:
+        print("data should be a panads dataframe or nx graph")
+        return
+    nx.set_edge_attributes(g, False, name='in_backbone')
+    for node in g.nodes():
+        edge_index = 0
+        neighbors_weights =  get_neighbor_weights(g, node)
+        real_distribution = list(neighbors_weights.values())
+        neighbors_count = len(neighbors_weights)
+        old_cod = 0
+        if neighbors_count != 1:
+            for i in range(neighbors_count):
+                new_cod = compute_cod(real_distribution, get_ideal_distribution(i, neighbors_count))
+                if isnan(new_cod):
+                    break
+                if old_cod <= new_cod:
+                    old_cod = new_cod
+                    edge_index = i
+                else:
+                    break
+                if i == neighbors_count-1:
+                    edge_index = i
+        for j, neighbour in enumerate(neighbors_weights.keys()):
+            if j>edge_index:
+                break
+            g[node][neighbour]['in_backbone'] = True
+    return Backbone(g, method_name="Multiple Linkage Analysis", property_name="weight", ascending=False, compatible_filters=[boolean_filter], filter_on='Edges')
\ No newline at end of file
--- a/netbone/structural/plam.py
+++ b/netbone/structural/plam.py
+import networkx as nx
+from netbone.filters import boolean_filter
+from netbone.backbone import Backbone
+from pandas import DataFrame
+from netbone.utils.utils import edge_properties
+def get_max_weight_edge(graph, node):
+    neighbors = graph.neighbors(node)
+    max_weight = float('-inf')
+    max_edge = None
+    for neighbor in neighbors:
+        weight = graph[node][neighbor]['weight']
+        if weight > max_weight:
+            max_weight = weight
+            max_edge = (node, neighbor)
+    return max_edge[0], max_edge[1], max_weight
+def plam(data):
+    if isinstance(data, DataFrame):
+        g = nx.from_pandas_edgelist(data, edge_attr=edge_properties(data))
+    elif isinstance(data, nx.Graph):
+        g = data.copy()
+    else:
+        print("data should be a panads dataframe or nx graph")
+        return
+    nx.set_edge_attributes(g, False, name='in_backbone')
+    for node in g.nodes():
+        source, target, weight = get_max_weight_edge(g, node)
+        g[source][target]['in_backbone'] = True
+    return Backbone(g, method_name="Primary Linkage Analysis", property_name="weight", ascending=False, compatible_filters=[boolean_filter], filter_on='Edges')
\ No newline at end of file
--- a/netbone/structural/pmfg.py
+++ b/netbone/structural/pmfg.py
+import networkx as nx
+from netbone.filters import boolean_filter
+from netbone.backbone import Backbone
+from pandas import DataFrame
+from networkx import Graph
+from netbone.utils.utils import edge_properties
+def pmfg(data):
+    if isinstance(data, DataFrame):
+        table = data.copy()
+    elif isinstance(data, Graph):
+        table = nx.to_pandas_edgelist(data)
+    else:
+        print("data should be a panads dataframe or nx graph")
+        return
+    g = nx.from_pandas_edgelist(table, edge_attr=edge_properties(table))
+    nx.set_edge_attributes(g, False, name='in_backbone')
+    backbone = nx.Graph()
+    table = table.sort_values(by='weight', ascending=False)
+    for row in table.itertuples():
+        backbone.add_edge(row.source, row.target)
+        if not nx.is_planar(backbone):
+            backbone.remove_edge(row.source, row.target)
+        else:
+            g[row.source][row.target]['in_backbone'] = True
+        if len(backbone.edges()) == 3*(len(g)-2):
+            break
+    return Backbone(g, method_name="Planar Maximally Filtered Graph", property_name="weight", ascending=False, compatible_filters=[boolean_filter], filter_on='Edges')
\ No newline at end of file
--- a/netbone/visualize.py
+++ b/netbone/visualize.py
@@ -238,7 +238,7 @@ def plot_distribution(dist, title):
        axs.spines['left'].set_color('0.3')
-        axs.set_xlabel(df[method].index.name)
+        axs.set_xlabel(prop)
        axs.set_ylabel('P')
        axs.legend(loc='center left', bbox_to_anchor=(1.04,0.5))