From 56a37278fed9678adf26a5ecce4707977a31b4d9 Mon Sep 17 00:00:00 2001 From: Abd Errahmane Kiouche <abd-errahmane.kiouche@etu.univ-lyon1.fr> Date: Fri, 15 Dec 2023 10:35:14 +0000 Subject: [PATCH] Upload New File --- (p,t)_sparsification/p_k_compression.cpp | 391 +++++++++++++++++++++++ 1 file changed, 391 insertions(+) create mode 100644 (p,t)_sparsification/p_k_compression.cpp diff --git a/(p,t)_sparsification/p_k_compression.cpp b/(p,t)_sparsification/p_k_compression.cpp new file mode 100644 index 0000000..e068a8a --- /dev/null +++ b/(p,t)_sparsification/p_k_compression.cpp @@ -0,0 +1,391 @@ +// +// Created by Kiouche on 1/20/2020. +// + +#include "p_k_compression.h" +#include <chrono> +#include <map> +#include <list> +#include <iostream> +#include <algorithm> +#include <random> +#include <queue> + +#include "hash.h" + + + +namespace std { + + + bool test_insert(edge &e, bool directed,graph &constructed_graph,graph &compressed_graph ,int k, vector<double> &p){ + uint32_t first, second; + first = e.first; + second = e.second; + if(!directed) { + if (constructed_graph[e.first].size() > constructed_graph[e.second].size()) { + first = e.second; + second = e.first; + } + } + bool insert = BFS(first,compressed_graph,constructed_graph,p,k); + if ( insert){ + return true; + } + else if (!directed) { + return BFS(second,compressed_graph,constructed_graph,p,k); + } + else return insert; + } + + void get_neighbors(uint32_t node, graph &g,unordered_map<int,unordered_set<uint32_t>> &neighbors, + int &maxDepth){ + + // Mark all the vertices as not visited + unordered_map<uint32_t , int> node_visited; + // Create a queue for BFS + deque<uint32_t> queue; + // array_queue *queue = new array_queue(); + // Mark the current node as visited and enqueue it + node_visited[node]= 1; + queue.push_back(node); + + int currentDepth = 1, + elementsToDepthIncrease = 1, + nextElementsToDepthIncrease = 0; + + while(!queue.empty()) { + // Dequeue a vertex from queue and print it + uint32_t s = queue.front(); + queue.pop_front(); + // node_visited[s] = 1; + for (auto &v : g[s]){ + if (node_visited[v]==0) { + nextElementsToDepthIncrease++; + node_visited[v]=1; + queue.push_back(v); + for (int i = currentDepth; i <= maxDepth; i++) neighbors[i].insert(v); + } + } + if (--elementsToDepthIncrease == 0) { + if (++currentDepth > maxDepth) break; + elementsToDepthIncrease = nextElementsToDepthIncrease; + nextElementsToDepthIncrease = 0; + } + } + } + + bool check_constraints(graph &original_graph,graph &compressed_graph,vector<double> p, int k){ + + for (auto v : original_graph){ + unordered_map<int,unordered_set<uint32_t>> n_v; + get_neighbors(v.first,compressed_graph,n_v,k); + for (int i=1;i<= k;i++){ + unordered_set<uint32_t> nghs = intersection( n_v[i],original_graph[v.first]); + if ((double) nghs.size() < v.second.size()*p.at(i)){ + cout << i << "---" << v.first << " " << nghs.size() << " " + << v.second.size() << endl; + return false; + } + } + } + return true; + } + + bool BFS(uint32_t node, graph &g,graph &constructed_graph,vector<double> &p,int &maxDepth){ + + unordered_map<int,unordered_set<uint32_t>> neighbors; + // Mark all the vertices as not visited + unordered_map<uint32_t , int> node_visited; + // Create a queue for BFS + deque<uint32_t> queue; + // Mark the current node as visited and enqueue it + node_visited[node]= 1; + queue.push_back(node); + + int currentDepth = 1, + elementsToDepthIncrease = 1, + nextElementsToDepthIncrease = 0; + + while(!queue.empty()) { + // Dequeue a vertex from queue and print it + uint32_t s = queue.front(); + queue.pop_front(); + for (auto &v : g[s]){ + if (node_visited[v]==0) { + nextElementsToDepthIncrease++; + node_visited[v]=1; + queue.push_back(v); + for (int i = currentDepth; i <= maxDepth; i++) neighbors[i].insert(v); + } + } + if (--elementsToDepthIncrease == 0) { + double nb_nghrs = constructed_graph[node].size(); + unordered_set<uint32_t> s_neighbors = intersection( neighbors[currentDepth],constructed_graph[node]); + if ((double) s_neighbors.size() < nb_nghrs * p.at(currentDepth)) return true; + if (++currentDepth > maxDepth) break; + elementsToDepthIncrease = nextElementsToDepthIncrease; + nextElementsToDepthIncrease = 0; + } + } + if (currentDepth <= maxDepth){ + for (int i= currentDepth;i<=maxDepth;i++){ + double nb_nghrs = constructed_graph[node].size(); + unordered_set<uint32_t> s_neighbors = intersection( neighbors[i],constructed_graph[node]); + if ((double) s_neighbors.size() < nb_nghrs * p.at(i)) return true; + } + } + return false; + } + + graph compress_graph_LP(graph &initial_graph, unordered_map<edge,double> edges_scores, int k, vector<double> p,bool directed){ + + double compression_rate; + vector<edge> inserted_edges; + + vector<pair<edge,double>> es_vec; + + for ( auto & e : edges_scores ) es_vec.push_back(make_pair(e.first,e.second)); + sort(es_vec.begin(), es_vec.end(), // sort by PL scores + [](const pair<edge,double> & l, const pair<edge,double>& r) { + return l.second > r.second; + }); + cout << es_vec.size() << endl; + graph compressed_graph,constructed_graph; + int number_edges = 0; + int i=0; + for (auto e_s : es_vec){ + edge e = e_s.first; + if (i%10000==0) cout << i<< endl; + i++; + constructed_graph[e.first].insert(e.second); + if(!directed) constructed_graph[e.second].insert(e.first); + bool insert =test_insert(e,directed,constructed_graph,compressed_graph,k,p); + + if ( insert) { + inserted_edges.push_back(e); + number_edges++; + compressed_graph[e.first].insert(e.second); + if (!directed) compressed_graph[e.second].insert(e.first); + } + } + cout << "number of edges " << number_edges << endl; + if (check_constraints(initial_graph,compressed_graph,p,k)) cout << "feasible compression!"<< endl; + return compressed_graph; + } + + + graph compress_graph_basic(graph &initial_graph, int k, vector<double> p,bool directed){ + + double compression_rate; + vector<edge> inserted_edges; + vector<edge> s = get_edges(initial_graph,directed); + std::mt19937 g(rand()); + std::shuffle(s.begin(), s.end(), g); + + graph compressed_graph,constructed_graph; + int number_edges = 0; + int i=0; + for (auto e : s){ + if (i%10000==0) cout << i<< endl; + i++; + constructed_graph[e.first].insert(e.second); + if(!directed) constructed_graph[e.second].insert(e.first); + bool insert =test_insert(e,directed,constructed_graph,compressed_graph,k,p); + if (insert){ + inserted_edges.push_back(e); + number_edges++; + compressed_graph[e.first].insert(e.second); + if(!directed) compressed_graph[e.second].insert(e.first); + } + } + cout << "number of edges " << number_edges << endl; + if (check_constraints(initial_graph,compressed_graph,p,k)) cout << "feasible compression!"<< endl; + return compressed_graph; + } + + + vector<edge> perturbate_solution(vector<edge> &s){ + vector<edge> s2; + for (auto e : s ) s2.push_back(e); + int max_perturbations = 2; //s.size()/100; + std::mt19937 gen(rand()); + std::uniform_int_distribution<> dis(0, s.size()-1); + for (int i=0;i<max_perturbations;i++){ + int r1,r2; + r1 = dis(gen); + r2 = dis(gen); + edge e = s2.at(r1); + s2.at(r1).first= s2.at(r2).first; + s2.at(r1).second= s2.at(r2).second; + s2.at(r2).first = e.first; + s2.at(r2).second = e.second; + } + return s2; + } + + + tuple<double,graph> evaluate_permutation ( vector<edge> &permutation,bool directed, vector<double> &p, + int k,graph &g){ + //double compression_rate; + graph compressed_graph,constructed_graph; + int inserted_edges = 0; + for (auto e : permutation){ + // update constructed graph + constructed_graph[e.first].insert(e.second); + if (!directed) constructed_graph[e.second].insert(e.first); + bool insert =test_insert(e,false,constructed_graph,compressed_graph,k,p); + if (insert){ + inserted_edges++; + compressed_graph[e.first].insert(e.second); + if (!directed) compressed_graph[e.second].insert(e.first); + } + } + + return make_tuple(inserted_edges,compressed_graph); + } + + + + graph Simulated_annealing ( int max_iterations, + double initial_temperature, + double decrease_factor, + graph &initial_graph, + bool directed, + int k,vector<double> p){ + auto start = chrono::steady_clock::now(); + graph gr; + vector<edge> best_permutation; + /// generate initial solution + vector<edge> s = get_edges(initial_graph,false); + best_permutation = s; + std::mt19937 g(rand()); + std::shuffle(s.begin(), s.end(), g); + + double cost_s,cost_s2,T,best; + T= initial_temperature; + cost_s = get<0> (evaluate_permutation(s,directed,p,k,initial_graph)); + gr = get<1> (evaluate_permutation(s,directed,p,k,initial_graph)); + best = cost_s; + for (int i = 0;i<max_iterations;i++){ + vector<edge> s2 = perturbate_solution(s); + cost_s2 = get<0> (evaluate_permutation(s2,directed, p,k,initial_graph)); + if (cost_s2 < best){ + best_permutation = s2; + gr = get<1> (evaluate_permutation(s2,directed,p,k,initial_graph)); + best = cost_s2; + auto finish = chrono::steady_clock::now(); + double elapsed_time= chrono::duration_cast<chrono::duration<double>>(finish - start).count(); + //cout <<i<< '\t'<<best<< '\t' << elapsed_time << endl; + } + if (cost_s2 < cost_s){ + s = s2; + cost_s = cost_s2; + } + else { + double r = ((double) rand() /(RAND_MAX)); + if (std::exp( (cost_s -cost_s2) / T) > r ){ + s = s2; + cost_s = cost_s2; + } + } + T = decrease_factor*T; + } + + if ( check_constraints(initial_graph,gr,p,k)) { + cout << "feasible solution ! " << endl; + } + + return gr; + } + + + + + graph compress_graph_greedy(graph &initial_graph, int k, vector<double> p,bool directed){ + + double compression_rate; + vector<edge> inserted_edges; + vector<edge> s = greedy_edges_order(initial_graph,directed,k); + + + graph compressed_graph,constructed_graph; + int number_edges = 0; + int i=0; + for (auto e : s){ + if (i%10000==0) cout << i<< endl; + i++; + constructed_graph[e.first].insert(e.second); + if(!directed) constructed_graph[e.second].insert(e.first); + bool insert =test_insert(e,directed,constructed_graph,compressed_graph,k,p); + if (insert){ + inserted_edges.push_back(e); + number_edges++; + compressed_graph[e.first].insert(e.second); + if(!directed) compressed_graph[e.second].insert(e.first); + } + } + cout << "number of edges " << number_edges << endl; + if (check_constraints(initial_graph,compressed_graph,p,k)) cout << "feasible compression!"<< endl; + return compressed_graph; + } + + vector<edge> greedy_edges_order(graph &g,bool directed,int k){ + vector<edge> edges = get_edges(g,directed); + map<pair<uint32_t ,uint32_t >,int> edge_score; + for (auto e : edges ){ + unordered_set<uint32_t > neighbors_u; + unordered_set<uint32_t> neighbors_v; + neighbors_u = neighbors(g,k-1,e.first,e.second); + neighbors_v = neighbors(g,k-1,e.second,e.first); + for (auto n : neighbors_u){ + if (g[n].find(e.second)!=g[n].end()) edge_score[e]=edge_score[e]+1; + } + for (auto n : neighbors_v){ + if (g[n].find(e.first)!=g[n].end()) edge_score[e]=edge_score[e]+1; + } + } + sort(edges.begin(),edges.end(),[&]( const edge &e1, const edge &e2 ) + { return edge_score[e1] > edge_score[e2];} ); + return edges; + } + + + unordered_set<uint32_t> neighbors(graph &g,int max_depth,uint32_t u,uint32_t v){ + unordered_set<uint32_t> nghrs; + // Mark all the vertices as not visited + unordered_map<uint32_t , int> node_visited; + // Create a queue for BFS + deque<uint32_t> queue; + // array_queue *queue = new array_queue(); + // Mark the current node as visited and enqueue it + node_visited[u]= 1; + node_visited[v] = 1; + queue.push_back(u); + int currentDepth = 1, + elementsToDepthIncrease = 1, + nextElementsToDepthIncrease = 0; + + + while(!queue.empty()) { + // Dequeue a vertex from queue and print it + uint32_t s = queue.front(); + queue.pop_front(); + // node_visited[s] = 1; + for (auto &v : g[s]){ + if (node_visited[v]==0) { + node_visited[v]=1; + queue.push_back(v); + nghrs.insert(v); + } + } + if (--elementsToDepthIncrease == 0) { + if (++currentDepth > max_depth) break; + elementsToDepthIncrease = nextElementsToDepthIncrease; + nextElementsToDepthIncrease = 0; + } + } + return nghrs; + } + +} \ No newline at end of file -- GitLab