Skip to content
Snippets Groups Projects
Commit 99742c94 authored by Abd Errahmane Kiouche's avatar Abd Errahmane Kiouche :speech_balloon:
Browse files

Update src/Training_Phase/graphs_to_vectors/docopt.cpp,...

Update src/Training_Phase/graphs_to_vectors/docopt.cpp, src/Training_Phase/graphs_to_vectors/docopt.h, src/Training_Phase/graphs_to_vectors/docopt_private.h, src/Training_Phase/graphs_to_vectors/docopt_util.h, src/Training_Phase/graphs_to_vectors/graph.cpp, src/Training_Phase/graphs_to_vectors/graph.h, src/Training_Phase/graphs_to_vectors/io.h, src/Training_Phase/graphs_to_vectors/hash.h, src/Training_Phase/graphs_to_vectors/docopt_value.h, src/Training_Phase/graphs_to_vectors/io.cpp, src/Training_Phase/graphs_to_vectors/main.cpp, src/Training_Phase/graphs_to_vectors/param.h, src/Training_Phase/graphs_to_vectors/util.h files
parent 1a43fd0e
No related branches found
No related tags found
No related merge requests found
Showing
with 2453 additions and 0 deletions
This diff is collapsed.
//
// docopt.h
// docopt
//
// Created by Jared Grubb on 2013-11-03.
// Copyright (c) 2013 Jared Grubb. All rights reserved.
//
#ifndef docopt__docopt_h_
#define docopt__docopt_h_
#include "docopt_value.h"
#include <map>
#include <vector>
#include <string>
namespace docopt {
// Usage string could not be parsed (ie, the developer did something wrong)
struct DocoptLanguageError : std::runtime_error { using runtime_error::runtime_error; };
// Arguments passed by user were incorrect (ie, developer was good, user is wrong)
struct DocoptArgumentError : std::runtime_error { using runtime_error::runtime_error; };
// Arguments contained '--help' and parsing was aborted early
struct DocoptExitHelp : std::runtime_error { DocoptExitHelp(); };
// Arguments contained '--version' and parsing was aborted early
struct DocoptExitVersion : std::runtime_error { DocoptExitVersion(); };
/// Parse user options from the given option string.
///
/// @param doc The usage string
/// @param argv The user-supplied arguments
/// @param help Whether to end early if '-h' or '--help' is in the argv
/// @param version Whether to end early if '--version' is in the argv
/// @param options_first Whether options must precede all args (true), or if args and options
/// can be arbitrarily mixed.
///
/// @throws DocoptLanguageError if the doc usage string had errors itself
/// @throws DocoptExitHelp if 'help' is true and the user has passed the '--help' argument
/// @throws DocoptExitVersion if 'version' is true and the user has passed the '--version' argument
/// @throws DocoptArgumentError if the user's argv did not match the usage patterns
std::map<std::string, value> docopt_parse(std::string const& doc,
std::vector<std::string> const& argv,
bool help = true,
bool version = true,
bool options_first = false);
/// Parse user options from the given string, and exit appropriately
///
/// Calls 'docopt_parse' and will terminate the program if any of the exceptions above occur:
/// * DocoptLanguageError - print error and terminate (with exit code -1)
/// * DocoptExitHelp - print usage string and terminate (with exit code 0)
/// * DocoptExitVersion - print version and terminate (with exit code 0)
/// * DocoptArgumentError - print error and usage string and terminate (with exit code -1)
std::map<std::string, value> docopt(std::string const& doc,
std::vector<std::string> const& argv,
bool help = true,
std::string const& version = {},
bool options_first = false) noexcept;
}
#endif /* defined(docopt__docopt_h_) */
//
// docopt_private.h
// docopt
//
// Created by Jared Grubb on 2013-11-04.
// Copyright (c) 2013 Jared Grubb. All rights reserved.
//
#ifndef docopt_docopt_private_h
#define docopt_docopt_private_h
#include <vector>
#include <memory>
#include <unordered_set>
#include "docopt_value.h"
namespace docopt {
class Pattern;
class LeafPattern;
using PatternList = std::vector<std::shared_ptr<Pattern>>;
// Utility to use Pattern types in std hash-containers
struct PatternHasher {
template <typename P>
size_t operator()(std::shared_ptr<P> const& pattern) const {
return pattern->hash();
}
template <typename P>
size_t operator()(P const* pattern) const {
return pattern->hash();
}
template <typename P>
size_t operator()(P const& pattern) const {
return pattern.hash();
}
};
// Utility to use 'hash' as the equality operator as well in std containers
struct PatternPointerEquality {
template <typename P1, typename P2>
bool operator()(std::shared_ptr<P1> const& p1, std::shared_ptr<P2> const& p2) const {
return p1->hash()==p2->hash();
}
template <typename P1, typename P2>
bool operator()(P1 const* p1, P2 const* p2) const {
return p1->hash()==p2->hash();
}
};
// A hash-set that uniques by hash value
using UniquePatternSet = std::unordered_set<std::shared_ptr<Pattern>, PatternHasher, PatternPointerEquality>;
class Pattern {
public:
// flatten out children, stopping descent when the given filter returns 'true'
virtual std::vector<Pattern*> flat(bool (*filter)(Pattern const*)) = 0;
// flatten out all children into a list of LeafPattern objects
virtual void collect_leaves(std::vector<LeafPattern*>&) = 0;
// flatten out all children into a list of LeafPattern objects
std::vector<LeafPattern*> leaves();
// Attempt to find something in 'left' that matches this pattern's spec, and if so, move it to 'collected'
virtual bool match(PatternList& left, std::vector<std::shared_ptr<LeafPattern>>& collected) const = 0;
virtual std::string const& name() const = 0;
virtual bool hasValue() const { return false; }
virtual size_t hash() const = 0;
virtual ~Pattern() = default;
};
class LeafPattern
: public Pattern {
public:
LeafPattern(std::string name, value v = {})
: fName(std::move(name)),
fValue(std::move(v))
{}
virtual std::vector<Pattern*> flat(bool (*filter)(Pattern const*)) override {
if (filter(this)) {
return { this };
}
return {};
}
virtual void collect_leaves(std::vector<LeafPattern*>& lst) override final {
lst.push_back(this);
}
virtual bool match(PatternList& left, std::vector<std::shared_ptr<LeafPattern>>& collected) const override;
virtual bool hasValue() const override { return static_cast<bool>(fValue); }
value const& getValue() const { return fValue; }
void setValue(value&& v) { fValue = std::move(v); }
virtual std::string const& name() const override { return fName; }
virtual size_t hash() const override {
size_t seed = typeid(*this).hash_code();
hash_combine(seed, fName);
hash_combine(seed, fValue);
return seed;
}
protected:
virtual std::pair<size_t, std::shared_ptr<LeafPattern>> single_match(PatternList const&) const = 0;
private:
std::string fName;
value fValue;
};
class BranchPattern
: public Pattern {
public:
BranchPattern(PatternList children = {})
: fChildren(std::move(children))
{}
Pattern& fix() {
UniquePatternSet patterns;
fix_identities(patterns);
fix_repeating_arguments();
return *this;
}
virtual std::string const& name() const override {
throw std::runtime_error("Logic error: name() shouldnt be called on a BranchPattern");
}
virtual value const& getValue() const {
throw std::runtime_error("Logic error: name() shouldnt be called on a BranchPattern");
}
virtual std::vector<Pattern*> flat(bool (*filter)(Pattern const*)) override {
if (filter(this)) {
return {this};
}
std::vector<Pattern*> ret;
for(auto& child : fChildren) {
auto sublist = child->flat(filter);
ret.insert(ret.end(), sublist.begin(), sublist.end());
}
return ret;
}
virtual void collect_leaves(std::vector<LeafPattern*>& lst) override final {
for(auto& child : fChildren) {
child->collect_leaves(lst);
}
}
void setChildren(PatternList children) {
fChildren = std::move(children);
}
PatternList const& children() const { return fChildren; }
virtual void fix_identities(UniquePatternSet& patterns) {
for(auto& child : fChildren) {
// this will fix up all its children, if needed
if (auto bp = dynamic_cast<BranchPattern*>(child.get())) {
bp->fix_identities(patterns);
}
// then we try to add it to the list
auto inserted = patterns.insert(child);
if (!inserted.second) {
// already there? then reuse the existing shared_ptr for that thing
child = *inserted.first;
}
}
}
virtual size_t hash() const override {
size_t seed = typeid(*this).hash_code();
hash_combine(seed, fChildren.size());
for(auto const& child : fChildren) {
hash_combine(seed, child->hash());
}
return seed;
}
private:
void fix_repeating_arguments();
protected:
PatternList fChildren;
};
class Argument
: public LeafPattern {
public:
using LeafPattern::LeafPattern;
protected:
virtual std::pair<size_t, std::shared_ptr<LeafPattern>> single_match(PatternList const& left) const override;
};
class Command : public Argument {
public:
Command(std::string name, value v = value{false})
: Argument(std::move(name), std::move(v))
{}
protected:
virtual std::pair<size_t, std::shared_ptr<LeafPattern>> single_match(PatternList const& left) const override;
};
class Option final
: public LeafPattern
{
public:
static Option parse(std::string const& option_description);
Option(std::string shortOption,
std::string longOption,
int argcount = 0,
value v = value{false})
: LeafPattern(longOption.empty() ? shortOption : longOption,
std::move(v)),
fShortOption(std::move(shortOption)),
fLongOption(std::move(longOption)),
fArgcount(argcount)
{
// From Python:
// self.value = None if value is False and argcount else value
if (argcount && v.isBool() && !v.asBool()) {
setValue(value{});
}
}
Option(Option const&) = default;
Option(Option&&) = default;
Option& operator=(Option const&) = default;
Option& operator=(Option&&) = default;
using LeafPattern::setValue;
std::string const& longOption() const { return fLongOption; }
std::string const& shortOption() const { return fShortOption; }
int argCount() const { return fArgcount; }
virtual size_t hash() const override {
size_t seed = LeafPattern::hash();
hash_combine(seed, fShortOption);
hash_combine(seed, fLongOption);
hash_combine(seed, fArgcount);
return seed;
}
protected:
virtual std::pair<size_t, std::shared_ptr<LeafPattern>> single_match(PatternList const& left) const override;
private:
std::string fShortOption;
std::string fLongOption;
int fArgcount;
};
class Required : public BranchPattern {
public:
using BranchPattern::BranchPattern;
bool match(PatternList& left, std::vector<std::shared_ptr<LeafPattern>>& collected) const override;
};
class Optional : public BranchPattern {
public:
using BranchPattern::BranchPattern;
bool match(PatternList& left, std::vector<std::shared_ptr<LeafPattern>>& collected) const override {
for(auto const& pattern : fChildren) {
pattern->match(left, collected);
}
return true;
}
};
class OptionsShortcut : public Optional {
using Optional::Optional;
};
class OneOrMore : public BranchPattern {
public:
using BranchPattern::BranchPattern;
bool match(PatternList& left, std::vector<std::shared_ptr<LeafPattern>>& collected) const override;
};
class Either : public BranchPattern {
public:
using BranchPattern::BranchPattern;
bool match(PatternList& left, std::vector<std::shared_ptr<LeafPattern>>& collected) const override;
};
}
#endif
//
// docopt_util.h
// docopt
//
// Created by Jared Grubb on 2013-11-04.
// Copyright (c) 2013 Jared Grubb. All rights reserved.
//
#ifndef docopt_docopt_util_h
#define docopt_docopt_util_h
namespace {
bool starts_with(std::string const& str, std::string const& prefix)
{
if (str.length() < prefix.length())
return false;
return std::equal(prefix.begin(), prefix.end(),
str.begin());
}
std::string trim(std::string&& str,
const std::string& whitespace = " \t\n")
{
const auto strEnd = str.find_last_not_of(whitespace);
if (strEnd==std::string::npos)
return {}; // no content
str.erase(strEnd+1);
const auto strBegin = str.find_first_not_of(whitespace);
str.erase(0, strBegin);
return std::move(str);
}
std::vector<std::string> split(std::string const& str, size_t pos = 0)
{
const char* const anySpace = " \t\r\n\v\f";
std::vector<std::string> ret;
while (pos != std::string::npos) {
auto start = str.find_first_not_of(anySpace, pos);
if (start == std::string::npos) break;
auto end = str.find_first_of(anySpace, start);
auto size = end==std::string::npos ? end : end-start;
ret.emplace_back(str.substr(start, size));
pos = end;
}
return ret;
}
std::tuple<std::string, std::string, std::string> partition(std::string str, std::string const& point)
{
std::tuple<std::string, std::string, std::string> ret;
auto i = str.find(point);
if (i == std::string::npos) {
// no match: string goes in 0th spot only
} else {
std::get<2>(ret) = str.substr(i + point.size());
std::get<1>(ret) = point;
str.resize(i);
}
std::get<0>(ret) = std::move(str);
return ret;
}
template <typename I>
std::string join(I iter, I end, std::string const& delim) {
if (iter==end)
return {};
std::string ret = *iter;
for(++iter; iter!=end; ++iter) {
ret.append(delim);
ret.append(*iter);
}
return ret;
}
}
namespace docopt {
template <class T>
inline void hash_combine(std::size_t& seed, T const& v)
{
// stolen from boost::hash_combine
std::hash<T> hasher;
seed ^= hasher(v) + 0x9e3779b9 + (seed<<6) + (seed>>2);
}
}
#endif
//
// value.h
// docopt
//
// Created by Jared Grubb on 2013-10-14.
// Copyright (c) 2013 Jared Grubb. All rights reserved.
//
#ifndef docopt__value_h_
#define docopt__value_h_
#include <string>
#include <vector>
#include <functional> // std::hash
#include <iosfwd>
namespace docopt {
/// A generic type to hold the various types that can be produced by docopt.
///
/// This type can be one of: {bool, long, string, vector<string>}, or empty.
struct value {
/// An empty value
value() {}
value(std::string);
value(std::vector<std::string>);
explicit value(bool);
explicit value(long);
explicit value(int v) : value(static_cast<long>(v)) {}
~value();
value(value const&);
value(value&&) noexcept;
value& operator=(value const&);
value& operator=(value&&) noexcept;
// Test if this object has any contents at all
explicit operator bool() const { return kind != Kind::Empty; }
// Test the type contained by this value object
bool isBool() const { return kind==Kind::Bool; }
bool isString() const { return kind==Kind::String; }
bool isLong() const { return kind==Kind::Long; }
bool isStringList() const { return kind==Kind::StringList; }
// Throws std::invalid_argument if the type does not match
bool asBool() const;
long asLong() const;
std::string const& asString() const;
std::vector<std::string> const& asStringList() const;
size_t hash() const noexcept;
// equality is based on hash-equality
friend bool operator==(value const&, value const&);
friend bool operator!=(value const&, value const&);
private:
enum class Kind {
Empty,
Bool,
Long,
String,
StringList
};
union Variant {
Variant() {}
~Variant() { /* do nothing; will be destroyed by ~value */ }
bool boolValue;
long longValue;
std::string strValue;
std::vector<std::string> strList;
};
static const char* kindAsString(Kind);
void throwIfNotKind(Kind expected) const;
private:
Kind kind = Kind::Empty;
Variant variant {};
};
/// Write out the contents to the ostream
std::ostream& operator<<(std::ostream&, value const&);
}
namespace std {
template <>
struct hash<docopt::value> {
size_t operator()(docopt::value const& val) const noexcept {
return val.hash();
}
};
}
namespace docopt {
inline
value::value(bool v)
: kind(Kind::Bool)
{
variant.boolValue = v;
}
inline
value::value(long v)
: kind(Kind::Long)
{
variant.longValue = v;
}
inline
value::value(std::string v)
: kind(Kind::String)
{
new (&variant.strValue) std::string(std::move(v));
}
inline
value::value(std::vector<std::string> v)
: kind(Kind::StringList)
{
new (&variant.strList) std::vector<std::string>(std::move(v));
}
inline
value::value(value const& other)
: kind(other.kind)
{
switch (kind) {
case Kind::String:
new (&variant.strValue) std::string(other.variant.strValue);
break;
case Kind::StringList:
new (&variant.strList) std::vector<std::string>(other.variant.strList);
break;
case Kind::Bool:
variant.boolValue = other.variant.boolValue;
break;
case Kind::Long:
variant.longValue = other.variant.longValue;
break;
case Kind::Empty:
default:
break;
}
}
inline
value::value(value&& other) noexcept
: kind(other.kind)
{
switch (kind) {
case Kind::String:
new (&variant.strValue) std::string(std::move(other.variant.strValue));
break;
case Kind::StringList:
new (&variant.strList) std::vector<std::string>(std::move(other.variant.strList));
break;
case Kind::Bool:
variant.boolValue = other.variant.boolValue;
break;
case Kind::Long:
variant.longValue = other.variant.longValue;
break;
case Kind::Empty:
default:
break;
}
}
inline
value::~value()
{
switch (kind) {
case Kind::String:
variant.strValue.~basic_string();
break;
case Kind::StringList:
variant.strList.~vector();
break;
case Kind::Empty:
case Kind::Bool:
case Kind::Long:
default:
// trivial dtor
break;
}
}
inline
value& value::operator=(value const& other) {
// make a copy and move from it; way easier.
return *this = value{other};
}
inline
value& value::operator=(value&& other) noexcept {
// move of all the types involved is noexcept, so we dont have to worry about
// these two statements throwing, which gives us a consistency guarantee.
this->~value();
new (this) value(std::move(other));
return *this;
}
template <class T>
void hash_combine(std::size_t& seed, const T& v);
inline
size_t value::hash() const noexcept
{
switch (kind) {
case Kind::String:
return std::hash<std::string>()(variant.strValue);
case Kind::StringList: {
size_t seed = std::hash<size_t>()(variant.strList.size());
for(auto const& str : variant.strList) {
hash_combine(seed, str);
}
return seed;
}
case Kind::Bool:
return std::hash<bool>()(variant.boolValue);
case Kind::Long:
return std::hash<long>()(variant.longValue);
case Kind::Empty:
default:
return std::hash<void*>()(nullptr);
}
}
inline
bool value::asBool() const
{
throwIfNotKind(Kind::Bool);
return variant.boolValue;
}
inline
long value::asLong() const
{
// Attempt to convert a string to a long
if (kind == Kind::String) {
const std::string& str = variant.strValue;
std::size_t pos;
const long ret = stol(str, &pos); // Throws if it can't convert
if (pos != str.length()) {
// The string ended in non-digits.
throw std::runtime_error( str + " contains non-numeric characters.");
}
return ret;
}
throwIfNotKind(Kind::Long);
return variant.longValue;
}
inline
std::string const& value::asString() const
{
throwIfNotKind(Kind::String);
return variant.strValue;
}
inline
std::vector<std::string> const& value::asStringList() const
{
throwIfNotKind(Kind::StringList);
return variant.strList;
}
inline
bool operator==(value const& v1, value const& v2)
{
if (v1.kind != v2.kind)
return false;
switch (v1.kind) {
case value::Kind::String:
return v1.variant.strValue==v2.variant.strValue;
case value::Kind::StringList:
return v1.variant.strList==v2.variant.strList;
case value::Kind::Bool:
return v1.variant.boolValue==v2.variant.boolValue;
case value::Kind::Long:
return v1.variant.longValue==v2.variant.longValue;
case value::Kind::Empty:
default:
return true;
}
}
inline
bool operator!=(value const& v1, value const& v2)
{
return !(v1 == v2);
}
}
#endif /* defined(docopt__value_h_) */
//
// Created by Abderrahmane on 6/16/2018.
//
#include <algorithm>
#include <iostream>
#include <set>
#include <cmath>
#include <fstream>
#include "graph.h"
#include "hash.h"
#include "param.h"
namespace std {
void update_graphs(edge &e, vector<graph> &graphs) {
auto &src_id = get<F_S>(e);
auto &src_type = get<F_STYPE>(e);
auto &dst_id = get<F_D>(e);
auto &dst_type = get<F_DTYPE>(e);
auto &e_type = get<F_ETYPE>(e);
auto &gid = get<F_GID>(e);
// append edge to the edge list for the source
graphs[gid][make_pair(src_id,
src_type)].push_back(make_tuple(dst_id,
dst_type,
e_type));
}
tuple<vector<Branch>,vector<Branch>> construct_prototype_branches(unordered_map<uint32_t, unordered_map<uint32_t, Branch>> &map_graph_branches, vector<uint32_t> train_gids,string dataset) {
vector<Branch> prototype_branches; // the prototype branches selected from train graphs
vector<Branch> median_branches; // the median branches
unordered_map<uint32_t, vector<Branch>> all_branches; /* All the branches grouped by the classes of
benign train graphs*/
if (dataset.compare("AUTH")==0){
for (auto gid: train_gids) {
for (auto &m: map_graph_branches[gid]) all_branches[0].push_back(m.second); // we have one class
}
}
else {
for (auto gid: train_gids) {
for (auto &m: map_graph_branches[gid]) all_branches[(gid) / 100].push_back(m.second);
}
}
tie(prototype_branches,median_branches) = SPS_C(all_branches); // the used algorithm for selecting the prototype branches is SPS-C
return tie(prototype_branches,median_branches);
}
double compute_branch_edit_distance(Branch &Br1, Branch &Br2) {
double bed = 0;
double max_bed = 1+max(Br1.d_out, Br2.d_out)+max(Br1.d_in, Br2.d_in);
if (Br1.r != Br2.r) bed += 1;
bed += max(Br1.d_out, Br2.d_out);
bed += max(Br1.d_in, Br2.d_in);
if (max(Br1.d_out, Br2.d_out) == Br2.d_out) {
for (auto &a : Br1.es_out)bed -= min(a.second, Br2.es_out[a.first]);
} else {
for (auto &a : Br2.es_out)bed -= min(a.second, Br1.es_out[a.first]);
}
if (max(Br1.d_in, Br2.d_in) == Br2.d_in) {
for (auto &a : Br1.es_in)bed -= min(a.second, Br2.es_in[a.first]);
} else {
for (auto &a : Br2.es_in)bed -= min(a.second, Br1.es_in[a.first]);
}
return bed/max_bed;
}
tuple<vector<Branch>,vector<Branch>> SPS_C(unordered_map<uint32_t, vector<Branch>> &all_branches) {
/* the SPS-C (Spanning Class wise) algorithm for choosing the K prototype branches
* reference : RIESEN, K. and H. BUNKE, GRAPH CLASSIFICATION BASED ON VECTOR SPACE EMBEDDING.
* International Journal of Pattern Recognition and Artificial Intelligence, 2009. 23(06): p. 1053-1081.
*/
int index, median_index, furthest_branch_index, c;
double d = 0;
vector<Branch> prototype_branches;
vector<Branch> median_branches;
int nc = all_branches.size(); // the number of classes of train graphs
c = 1; // the class id
for (auto &m : all_branches) { // compute the prototype branches for each class
cout << c << endl;
// Find the median graph of the class
vector<pair<int, double>> min_distances;
index = 0;
cout << "number of branches :" << m.second.size() << endl; // delete this
/* for (auto &b1: m.second) {
d = 0;
for (auto &b2:m.second) d+=compute_branch_edit_distance(b1,b2);
min_distances.push_back(pair<int, double>(index, d));
index++;
cout << "index: " << index << endl;
}
median_index =(*min_element(min_distances.begin(),min_distances.end(),[](pair<int, double> a,pair<int, double>b){
return a.second < b.second;
})).first;*/
median_index = rand()% (m.second.size());
median_branches.push_back(m.second.at(median_index));
prototype_branches.push_back(m.second.at(median_index)); // the median branch
m.second.erase(m.second.begin() + median_index); // delete the median branch from the set of all branch
vector<pair<int, double >>().swap(min_distances); // free the allocated memory
// the spanning selector
int counter = 1;
while (counter < (M / nc)) { // Select the furthest Branch away from the already selected prototypes
index = 0;
for (auto &br : m.second) {
d = INF;
for (auto &bp : prototype_branches) {
double bed = compute_branch_edit_distance(br, bp);
if (bed < d) d = bed;
}
min_distances.push_back(pair<int, double>(index, d));
index++;
}
furthest_branch_index = (*max_element(min_distances.begin(), min_distances.end(),
[](pair<int, double> a, pair<int, double> b) {
return a.second < b.second;
})).first;
prototype_branches.push_back(m.second.at(furthest_branch_index)); // add the furthest branch
m.second.erase(m.second.begin() +
furthest_branch_index); // delete the already added branch from the set of all branch
vector<pair<int, double >>().swap(min_distances); // free the allocated memory
counter++;
}
c++;
}
return tie(prototype_branches,median_branches);
}
unordered_map<uint32_t, unordered_map<uint32_t, Branch>> graph_to_branches(vector<graph> &graphs){
unordered_map<uint32_t, unordered_map<uint32_t, Branch>> map_graph_branches; /* key1= gid , key2= root id */
unordered_map<uint32_t, vector<Branch>> all_branches; /* All the branches grouped by the classes of
benign train graphs*/
int index = 0;
// construct graph branches
for (auto &g : graphs) {
int gid = index;
for (auto &e:g) { //
int src_id = e.first.first;// the source root id
string src_label = e.first.second; // the source root label
for (auto &v: e.second) {
int dest_id = get<0>(v);
string dest_label = get<1>(v);
string edge_label = get<2>(v);
map_graph_branches[gid][src_id].r = src_label;
map_graph_branches[gid][dest_id].r = dest_label;
map_graph_branches[gid][src_id].es_out[edge_label]++;
map_graph_branches[gid][dest_id].es_in[edge_label]++;
map_graph_branches[gid][src_id].d_out++;
map_graph_branches[gid][dest_id].d_in++;
}
}
index++;
}
return map_graph_branches;
}
}
//
// Created by Abderrahmane on 6/16/2018.
//
#ifndef NAADSG_GRAPH_H
#define NAADSG_GRAPH_H
#include <vector>
#include <tuple>
#include <unordered_map>
namespace std {
// edge field indices
#define F_S 0 // source node id
#define F_STYPE 1 // source node type
#define F_D 2 // destination node id
#define F_DTYPE 3 // destination node type
#define F_ETYPE 4 // edge type
#define F_GID 5 // graph id (tag)
// data structures
typedef struct Branch{
string r; // the root of the branch
unordered_map<string, int> es_out; // the edge structure of the outgoing edges from r
unordered_map<string, int> es_in; // the edge structure of the incoming edges to r
uint32_t d_out; // the number of outgoing edges
uint32_t d_in; // the number of incoming edges
} Branch;
typedef tuple<uint32_t, string, uint32_t, string, string, uint32_t> edge;
typedef unordered_map<pair<uint32_t,string>, vector<tuple<uint32_t,string,string>>> graph;
typedef vector<double> graph_vector; // vector representation of a graph
void update_graphs(edge& e, vector<graph>& graphs);
double compute_branch_edit_distance(Branch &Br1,Branch &Br2);
tuple<vector<Branch>,vector<Branch>> construct_prototype_branches(unordered_map<uint32_t, unordered_map<uint32_t, Branch>>
&map_graph_branches, vector<uint32_t> train_gids,string dataset);
tuple<vector<Branch>,vector<Branch>> SPS_C(unordered_map<uint32_t ,vector<Branch>> &all_branches); /* the Spanning wise class prototypes
* selector */
unordered_map<uint32_t, unordered_map<uint32_t, Branch>> graph_to_branches(vector<graph> &graphs); /* graph
decomposition to
branches*/
}
#endif //NAADSG_GRAPH_H
#ifndef NAADSG_HASH_H
#define NAADSG_HASH_H
#include <string>
#include <vector>
namespace std {
/* Combination hash from Boost */
template <class T>
inline void hash_combine(size_t& seed, const T& v)
{
hash<T> hasher;
seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
}
template<typename S, typename T> struct hash<pair<S, T>>
{
inline size_t operator()(const pair<S, T>& v) const
{
size_t seed = 0;
hash_combine(seed, v.first);
hash_combine(seed, v.second);
return seed;
}
};
/* End combination hash from Boost */
}
#endif //NAADSG_HASH_H
#include <fcntl.h>
#include <fstream>
#include "graph.h"
#include "io.h"
#include <iostream>
#include "param.h"
#include <string>
#include <sstream>
#include <tuple>
#include <unistd.h>
#include "util.h"
#include <vector>
#include <algorithm>
namespace std {
tuple<uint32_t,vector<uint32_t>> read_train_gids(string filename) {
// read train gids into memory
cerr << "Reading Train gids from: " << filename << endl;
vector<uint32_t> gids;
uint32_t num_train_graphs = 0;
ifstream f(filename);
string line;
// read train gids from the file
while ( getline(f, line)){
uint32_t graph_id;
stringstream ss;
ss.str(line);
ss >> graph_id;
gids.push_back(graph_id);
num_train_graphs++;
}
#ifdef DEBUG
for (uint32_t i = 0; i < gids.size(); i++) {
cout << "graph " << gids.at(i) << endl;
}
cout << "Number of train graphs: " << num_train_graphs << endl;
#endif
return make_tuple(num_train_graphs, gids);
}
tuple<uint32_t,vector<edge>> read_edges (string filename,vector<uint32_t> &train_gids){
vector<edge> train_edges;
uint32_t num_train_edges = 0;
ifstream f(filename);
string line;
cerr << "Reading edges from: " << filename << endl;
// read edges from the file
uint32_t i = 0;
uint32_t max_gid = 0;
while ( getline(f, line)){
string src_type, dst_type, e_type;
uint32_t src_id,dst_id,graph_id;
stringstream ss;
ss.str(line);
ss >> src_id;
ss >> src_type;
ss >> dst_id;
ss >> dst_type;
ss >> e_type;
ss >> graph_id;
if (graph_id > max_gid) {
max_gid = graph_id;
}
i++; // skip newline
if (find(train_gids.begin(),train_gids.end(),graph_id)!=train_gids.end()){ // if the graph is a train graph
train_edges.push_back(make_tuple(src_id, src_type,
dst_id, dst_type,
e_type, graph_id));
num_train_edges++;
}
}
return make_tuple(max_gid + 1, train_edges);
}
void branches_to_file(vector<Branch> &branches, string branches_file){
ofstream out;
out.open(branches_file);
// save prototype_branches
for(auto &b: branches){
out << b.r <<'\t' << b.d_out <<'\t' << b.d_in << endl;
if (b.d_out >0){
for (auto &e: b.es_out){
out << e.first <<'\t'<< e.second<<'\t';
}
out << endl;
} else {
out << endl;
}
if (b.d_in > 0 ){
for (auto &e: b.es_in){
out << e.first <<'\t'<< e.second<<'\t';
}
out << endl;
} else{
out << endl;
}
}
out.close();
}
}
#ifndef NAADSG_IO_H_
#define NAADSG_IO_H_
#include "graph.h"
#include <string>
#include <tuple>
#include <vector>
namespace std {
tuple<uint32_t,vector<uint32_t>> read_train_gids(string filename);
tuple<uint32_t,vector<edge>> read_edges(string filename,vector<uint32_t> &train_gids);
void branches_to_file(vector<Branch> &branches, string branches_file);
}
#endif
#include <algorithm>
#include <bitset>
#include <cassert>
#include <deque>
#include <iostream>
#include <string>
#include <unordered_map>
#include <set>
#include <vector>
#include <algorithm>
#include <sstream>
#include <random>
#include <chrono>
#include <fstream>
#include "docopt.h"
#include "graph.h"
#include "hash.h"
#include "io.h"
#include "param.h"
using namespace std;
static const char USAGE[] =
R"(LEADS (Training phase).
Usage:
LEADS --edges=<edge file>
--train=<train graphs file>
--dataset=<dataset>
--M=<number of branches>
--prototypes=<prototype branchs output>
--graph_vectors=<train graph vectors output>
LEADS (-h | --help)
Options:
-h, --help Show this screen.
--edges=<edge file> Incoming stream of edges.
--train=<train graphs file> Train graphs id's
--dataset=<dataset> 'ALL', 'YDC', 'GFC', 'YDG','AUTH,.
--M=<number of branches> Number of prototype branches
--prototypes=<prototype branchs output> Output file of prototype branches
--graph_vectors=<train graph vectors output> Output file of train graph vectors
)";
long M;
int main(int argc, char *argv[]) {
// arguments
map<string, docopt::value> args = docopt::docopt(USAGE, {argv + 1, argv + argc});
string edge_file(args["--edges"].asString());
string train_gids_file(args["--train"].asString());
string prototypes_output(args["--prototypes"].asString());
string graph_vectors_output(args["--graph_vectors"].asString());
M = args["--M"].asLong();
string dataset(args["--dataset"].asString());
if (!(dataset.compare("ALL") == 0 ||
dataset.compare("AUTH")== 0 ||
dataset.compare("YDC") == 0 ||
dataset.compare("YDG") == 0 ||
dataset.compare("GFC") == 0)) {
cout << "Invalid dataset: " << dataset << ". ";
exit(-1);
}
uint32_t num_graphs;
vector<uint32_t> train_gids;
vector<edge> train_edges;
// reading training Gids
cerr << "Reading training gids..." << endl;
tie(num_graphs, train_gids) = read_train_gids(train_gids_file);
cerr << "Reading ALL edges..." << endl;
tie(num_graphs, train_edges) = read_edges(edge_file, train_gids);
// per-graph data structures
unordered_map<uint32_t, graph_vector> graphs_vectors; // key = gid , value = the graph vector
// initialization of graphs vectors
for (int i = 0; i < num_graphs; i++) {
for (int j = 0; j < M; j++) graphs_vectors[i].push_back(0);
}
unordered_map<uint32_t, unordered_map<uint32_t, Branch>> train_graphs_to_branches; /* key 1 = gid
key 2 = branch id
value = branch*/
// construct training graphs
vector<graph> graphs(num_graphs);
cerr << "Constructing " << num_graphs << "train graphs..." << endl;
for (auto &e : train_edges) {
update_graphs(e, graphs);
}
cerr << "End of train graphs instantiation" << endl;
cerr << "Decomposition of " << num_graphs << "train graph to branches..." << endl;
// decompose the training graphs to vectors of branches
train_graphs_to_branches = graph_to_branches(graphs);// key = gid , value = map of the branches
cerr << "End of the decomposition " << endl;
cerr << "The selection of prototype branches" << endl;
vector<Branch> prototype_branches;
vector<Branch> median_branches;
tie (prototype_branches,median_branches) = construct_prototype_branches(train_graphs_to_branches, train_gids,dataset);
cout << prototype_branches.size() << endl;
// saving prototype and median branches to files
branches_to_file(prototype_branches,prototypes_output);
cerr << "Transform training graphs to vectors" << endl;
vector<double> train_graphs_sizes; // index = gid , value = the size of the graph
unordered_map<uint32_t, double> graphs_sizes; // key = gid ; value = the size of the graph
for (auto g : graphs) {
uint32_t size = 0;
for (auto &r : g) {
size += r.second.size();
}
train_graphs_sizes.push_back(size);
}
vector<graph>().swap(graphs);
cerr << "Converting test graphs to vectors" << endl;
for (auto &g : train_graphs_to_branches) {
if (find(train_gids.begin(), train_gids.end(), g.first) != train_gids.end()) { // is a train graph
for (int i = 0; i < prototype_branches.size(); i++) {
graphs_vectors[g.first].push_back(0);
}
cout << g.first << endl;
for (auto &br : g.second) {
double w = (br.second.d_in + br.second.d_out) /
(2 * train_graphs_sizes.at(g.first)); // the weight of the branch br
int k = 0;
for (auto &brp:prototype_branches) {
double bed = compute_branch_edit_distance(br.second, brp);
graphs_vectors[g.first].at(k) += w * (1 - bed);
k++;
}
}
}
}
/// saving the train graph vectors to file
ofstream out_file;
out_file.open(graph_vectors_output);
for (auto &gid:train_gids) {
cout << gid << "\t";
out_file << gid << "\t";
for (int i = 0; i < (M-1); i++) {
out_file << graphs_vectors[gid].at(i) <<"\t";
cout << graphs_vectors[gid].at(i) << "\t";
}
out_file << graphs_vectors[gid].at(M-1) << endl;
cout << graphs_vectors[gid].at(M-1) << endl;
}
return 0;
}
\ No newline at end of file
#ifndef NAADSG_PARAM_H_
#define NAADSG_PARAM_H_
#ifdef DEBUG
#define NDEBUG 0
#endif
extern long M;
//#define M 25
#define SEED 23
#define INF 5000000
#endif
#ifndef NAADSG_UTIL_H_
#define NAADSG_UTIL_H_
#include <string>
#include <iostream>
namespace std {
inline void panic(string message) {
cout << message << endl;
exit(-1);
}
}
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment