Skip to content
Snippets Groups Projects
Commit 99742c94 authored by Abd Errahmane Kiouche's avatar Abd Errahmane Kiouche :speech_balloon:
Browse files

Update src/Training_Phase/graphs_to_vectors/docopt.cpp,...

Update src/Training_Phase/graphs_to_vectors/docopt.cpp, src/Training_Phase/graphs_to_vectors/docopt.h, src/Training_Phase/graphs_to_vectors/docopt_private.h, src/Training_Phase/graphs_to_vectors/docopt_util.h, src/Training_Phase/graphs_to_vectors/graph.cpp, src/Training_Phase/graphs_to_vectors/graph.h, src/Training_Phase/graphs_to_vectors/io.h, src/Training_Phase/graphs_to_vectors/hash.h, src/Training_Phase/graphs_to_vectors/docopt_value.h, src/Training_Phase/graphs_to_vectors/io.cpp, src/Training_Phase/graphs_to_vectors/main.cpp, src/Training_Phase/graphs_to_vectors/param.h, src/Training_Phase/graphs_to_vectors/util.h files
parent 1a43fd0e
No related branches found
No related tags found
No related merge requests found
Showing
with 2453 additions and 0 deletions
//
// docopt.cpp
// docopt
//
// Created by Jared Grubb on 2013-11-03.
// Copyright (c) 2013 Jared Grubb. All rights reserved.
//
#include "docopt.h"
#include "docopt_util.h"
#include "docopt_private.h"
#include "docopt_value.h"
#include <vector>
#include <unordered_set>
#include <unordered_map>
#include <map>
#include <string>
#include <regex>
#include <iostream>
#include <cassert>
#include <cstddef>
using namespace docopt;
DocoptExitHelp::DocoptExitHelp()
: std::runtime_error("Docopt --help argument encountered")
{}
DocoptExitVersion::DocoptExitVersion()
: std::runtime_error("Docopt --version argument encountered")
{}
const char* value::kindAsString(Kind kind)
{
switch (kind) {
case Kind::Empty: return "empty";
case Kind::Bool: return "bool";
case Kind::Long: return "long";
case Kind::String: return "string";
case Kind::StringList: return "string-list";
}
return "unknown";
}
void value::throwIfNotKind(Kind expected) const
{
if (kind == expected)
return;
std::string error = "Illegal cast to ";
error += kindAsString(expected);
error += "; type is actually ";
error += kindAsString(kind);
throw std::runtime_error(std::move(error));
}
std::ostream& docopt::operator<<(std::ostream& os, value const& val)
{
if (val.isBool()) {
bool b = val.asBool();
os << (b ? "true" : "false");
} else if (val.isLong()) {
long v = val.asLong();
os << v;
} else if (val.isString()) {
std::string const& str = val.asString();
os << '"' << str << '"';
} else if (val.isStringList()) {
auto const& list = val.asStringList();
os << "[";
bool first = true;
for(auto const& el : list) {
if (first) {
first = false;
} else {
os << ", ";
}
os << '"' << el << '"';
}
os << "]";
} else {
os << "null";
}
return os;
}
std::vector<LeafPattern*> Pattern::leaves() {
std::vector<LeafPattern*> ret;
collect_leaves(ret);
return ret;
}
bool Required::match(PatternList& left, std::vector<std::shared_ptr<LeafPattern>>& collected) const
{
auto l = left;
auto c = collected;
for(auto const& pattern : fChildren) {
bool ret = pattern->match(l, c);
if (!ret) {
// leave (left, collected) untouched
return false;
}
}
left = std::move(l);
collected = std::move(c);
return true;
}
bool LeafPattern::match(PatternList& left, std::vector<std::shared_ptr<LeafPattern>>& collected) const
{
auto match = single_match(left);
if (!match.second) {
return false;
}
left.erase(left.begin()+static_cast<std::ptrdiff_t>(match.first));
auto same_name = std::find_if(collected.begin(), collected.end(), [&](std::shared_ptr<LeafPattern> const& p) {
return p->name()==name();
});
if (getValue().isLong()) {
long val = 1;
if (same_name == collected.end()) {
collected.push_back(match.second);
match.second->setValue(value{val});
} else if ((**same_name).getValue().isLong()) {
val += (**same_name).getValue().asLong();
(**same_name).setValue(value{val});
} else {
(**same_name).setValue(value{val});
}
} else if (getValue().isStringList()) {
std::vector<std::string> val;
if (match.second->getValue().isString()) {
val.push_back(match.second->getValue().asString());
} else if (match.second->getValue().isStringList()) {
val = match.second->getValue().asStringList();
} else {
/// cant be!?
}
if (same_name == collected.end()) {
collected.push_back(match.second);
match.second->setValue(value{val});
} else if ((**same_name).getValue().isStringList()) {
std::vector<std::string> const& list = (**same_name).getValue().asStringList();
val.insert(val.begin(), list.begin(), list.end());
(**same_name).setValue(value{val});
} else {
(**same_name).setValue(value{val});
}
} else {
collected.push_back(match.second);
}
return true;
}
Option Option::parse(std::string const& option_description)
{
std::string shortOption, longOption;
int argcount = 0;
value val { false };
auto double_space = option_description.find(" ");
auto options_end = option_description.end();
if (double_space != std::string::npos) {
options_end = option_description.begin() + static_cast<std::ptrdiff_t>(double_space);
}
static const std::regex pattern {"(-{1,2})?(.*?)([,= ]|$)"};
for(std::sregex_iterator i {option_description.begin(), options_end, pattern, std::regex_constants::match_not_null},
e{};
i != e;
++i)
{
std::smatch const& match = *i;
if (match[1].matched) { // [1] is optional.
if (match[1].length()==1) {
shortOption = "-" + match[2].str();
} else {
longOption = "--" + match[2].str();
}
} else if (match[2].length() > 0) { // [2] always matches.
std::string m = match[2];
argcount = 1;
} else {
// delimeter
}
if (match[3].length() == 0) { // [3] always matches.
// Hit end of string. For some reason 'match_not_null' will let us match empty
// at the end, and then we'll spin in an infinite loop. So, if we hit an empty
// match, we know we must be at the end.
break;
}
}
if (argcount) {
std::smatch match;
if (std::regex_search(options_end, option_description.end(),
match,
std::regex{"\\[default: (.*)\\]", std::regex::icase}))
{
val = match[1].str();
}
}
return {std::move(shortOption),
std::move(longOption),
argcount,
std::move(val)};
}
bool OneOrMore::match(PatternList& left, std::vector<std::shared_ptr<LeafPattern>>& collected) const
{
assert(fChildren.size() == 1);
auto l = left;
auto c = collected;
bool matched = true;
size_t times = 0;
decltype(l) l_;
bool firstLoop = true;
while (matched) {
// could it be that something didn't match but changed l or c?
matched = fChildren[0]->match(l, c);
if (matched)
++times;
if (firstLoop) {
firstLoop = false;
} else if (l == l_) {
break;
}
l_ = l;
}
if (times == 0) {
return false;
}
left = std::move(l);
collected = std::move(c);
return true;
}
bool Either::match(PatternList& left, std::vector<std::shared_ptr<LeafPattern>>& collected) const
{
using Outcome = std::pair<PatternList, std::vector<std::shared_ptr<LeafPattern>>>;
std::vector<Outcome> outcomes;
for(auto const& pattern : fChildren) {
// need a copy so we apply the same one for every iteration
auto l = left;
auto c = collected;
bool matched = pattern->match(l, c);
if (matched) {
outcomes.emplace_back(std::move(l), std::move(c));
}
}
auto min = std::min_element(outcomes.begin(), outcomes.end(), [](Outcome const& o1, Outcome const& o2) {
return o1.first.size() < o2.first.size();
});
if (min == outcomes.end()) {
// (left, collected) unchanged
return false;
}
std::tie(left, collected) = std::move(*min);
return true;
}
std::pair<size_t, std::shared_ptr<LeafPattern>> Argument::single_match(PatternList const& left) const
{
std::pair<size_t, std::shared_ptr<LeafPattern>> ret {};
for(size_t i = 0, size = left.size(); i < size; ++i)
{
auto arg = dynamic_cast<Argument const*>(left[i].get());
if (arg) {
ret.first = i;
ret.second = std::make_shared<Argument>(name(), arg->getValue());
break;
}
}
return ret;
}
std::pair<size_t, std::shared_ptr<LeafPattern>> Command::single_match(PatternList const& left) const
{
std::pair<size_t, std::shared_ptr<LeafPattern>> ret {};
for(size_t i = 0, size = left.size(); i < size; ++i)
{
auto arg = dynamic_cast<Argument const*>(left[i].get());
if (arg) {
if (name() == arg->getValue()) {
ret.first = i;
ret.second = std::make_shared<Command>(name(), value{true});
}
break;
}
}
return ret;
}
std::pair<size_t, std::shared_ptr<LeafPattern>> Option::single_match(PatternList const& left) const
{
std::pair<size_t, std::shared_ptr<LeafPattern>> ret {};
for(size_t i = 0, size = left.size(); i < size; ++i)
{
auto leaf = std::dynamic_pointer_cast<LeafPattern>(left[i]);
if (leaf && name() == leaf->name()) {
ret.first = i;
ret.second = leaf;
break;
}
}
return ret;
}
static std::vector<PatternList> transform(PatternList pattern);
void BranchPattern::fix_repeating_arguments()
{
std::vector<PatternList> either = transform(children());
for(auto const& group : either) {
// use multiset to help identify duplicate entries
std::unordered_multiset<std::shared_ptr<Pattern>, PatternHasher> group_set {group.begin(), group.end()};
for(auto const& e : group_set) {
if (group_set.count(e) == 1)
continue;
LeafPattern* leaf = dynamic_cast<LeafPattern*>(e.get());
if (!leaf) continue;
bool ensureList = false;
bool ensureInt = false;
if (dynamic_cast<Command*>(leaf)) {
ensureInt = true;
} else if (dynamic_cast<Argument*>(leaf)) {
ensureList = true;
} else if (Option* o = dynamic_cast<Option*>(leaf)) {
if (o->argCount()) {
ensureList = true;
} else {
ensureInt = true;
}
}
if (ensureList) {
std::vector<std::string> newValue;
if (leaf->getValue().isString()) {
newValue = split(leaf->getValue().asString());
}
if (!leaf->getValue().isStringList()) {
leaf->setValue(value{newValue});
}
} else if (ensureInt) {
leaf->setValue(value{0});
}
}
}
}
static std::vector<PatternList> transform(PatternList pattern)
{
std::vector<PatternList> result;
std::vector<PatternList> groups;
groups.emplace_back(std::move(pattern));
while(!groups.empty()) {
// pop off the first element
auto children = std::move(groups[0]);
groups.erase(groups.begin());
// find the first branch node in the list
auto child_iter = std::find_if(children.begin(), children.end(), [](std::shared_ptr<Pattern> const& p) {
return dynamic_cast<BranchPattern const*>(p.get());
});
// no branch nodes left : expansion is complete for this grouping
if (child_iter == children.end()) {
result.emplace_back(std::move(children));
continue;
}
// pop the child from the list
auto child = std::move(*child_iter);
children.erase(child_iter);
// expand the branch in the appropriate way
if (Either* either = dynamic_cast<Either*>(child.get())) {
// "[e] + children" for each child 'e' in Either
for(auto const& eitherChild : either->children()) {
PatternList group = { eitherChild };
group.insert(group.end(), children.begin(), children.end());
groups.emplace_back(std::move(group));
}
} else if (OneOrMore* oneOrMore = dynamic_cast<OneOrMore*>(child.get())) {
// child.children * 2 + children
auto const& subchildren = oneOrMore->children();
PatternList group = subchildren;
group.insert(group.end(), subchildren.begin(), subchildren.end());
group.insert(group.end(), children.begin(), children.end());
groups.emplace_back(std::move(group));
} else { // Required, Optional, OptionsShortcut
BranchPattern* branch = dynamic_cast<BranchPattern*>(child.get());
// child.children + children
PatternList group = branch->children();
group.insert(group.end(), children.begin(), children.end());
groups.emplace_back(std::move(group));
}
}
return result;
}
class Tokens {
public:
Tokens(std::vector<std::string> tokens, bool isParsingArgv = true)
: fTokens(std::move(tokens)),
fIsParsingArgv(isParsingArgv)
{}
explicit operator bool() const {
return fIndex < fTokens.size();
}
static Tokens from_pattern(std::string const& source) {
static const std::regex re_separators {
"(?:\\s*)" // any spaces (non-matching subgroup)
"("
"[\\[\\]\\(\\)\\|]" // one character of brackets or parens or pipe character
"|"
"\\.\\.\\." // elipsis
")" };
static const std::regex re_strings {
"(?:\\s*)" // any spaces (non-matching subgroup)
"("
"\\S*<.*?>" // strings, but make sure to keep "< >" strings together
"|"
"[^<>\\s]+" // string without <>
")" };
// We do two stages of regex matching. The '[]()' and '...' are strong delimeters
// and need to be split out anywhere they occur (even at the end of a token). We
// first split on those, and then parse the stuff between them to find the string
// tokens. This is a little harder than the python version, since they have regex.split
// and we dont have anything like that.
std::vector<std::string> tokens;
std::for_each(std::sregex_iterator{ source.begin(), source.end(), re_separators },
std::sregex_iterator{},
[&](std::smatch const& match)
{
// handle anything before the separator (this is the "stuff" between the delimeters)
if (match.prefix().matched) {
std::for_each(std::sregex_iterator{match.prefix().first, match.prefix().second, re_strings},
std::sregex_iterator{},
[&](std::smatch const& m)
{
tokens.push_back(m[1].str());
});
}
// handle the delimter token itself
if (match[1].matched) {
tokens.push_back(match[1].str());
}
});
return Tokens(tokens, false);
}
std::string const& current() const {
if (*this)
return fTokens[fIndex];
static std::string const empty;
return empty;
}
std::string the_rest() const {
if (!*this)
return {};
return join(fTokens.begin()+static_cast<std::ptrdiff_t>(fIndex),
fTokens.end(),
" ");
}
std::string pop() {
return std::move(fTokens.at(fIndex++));
}
bool isParsingArgv() const { return fIsParsingArgv; }
struct OptionError : std::runtime_error { using runtime_error::runtime_error; };
private:
std::vector<std::string> fTokens;
size_t fIndex = 0;
bool fIsParsingArgv;
};
// Get all instances of 'T' from the pattern
template <typename T>
std::vector<T*> flat_filter(Pattern& pattern) {
std::vector<Pattern*> flattened = pattern.flat([](Pattern const* p) -> bool {
return dynamic_cast<T const*>(p) != nullptr;
});
// now, we're guaranteed to have T*'s, so just use static_cast
std::vector<T*> ret;
std::transform(flattened.begin(), flattened.end(), std::back_inserter(ret), [](Pattern* p) {
return static_cast<T*>(p);
});
return ret;
}
static std::vector<std::string> parse_section(std::string const& name, std::string const& source) {
// ECMAScript regex only has "?=" for a non-matching lookahead. In order to make sure we always have
// a newline to anchor our matching, we have to avoid matching the final newline of each grouping.
// Therefore, our regex is adjusted from the docopt Python one to use ?= to match the newlines before
// the following lines, rather than after.
std::regex const re_section_pattern {
"(?:^|\\n)" // anchored at a linebreak (or start of string)
"("
"[^\\n]*" + name + "[^\\n]*(?=\\n?)" // a line that contains the name
"(?:\\n[ \\t].*?(?=\\n|$))*" // followed by any number of lines that are indented
")",
std::regex::icase
};
std::vector<std::string> ret;
std::for_each(std::sregex_iterator(source.begin(), source.end(), re_section_pattern),
std::sregex_iterator(),
[&](std::smatch const& match)
{
ret.push_back(trim(match[1].str()));
});
return ret;
}
static bool is_argument_spec(std::string const& token) {
if (token.empty())
return false;
if (token[0]=='<' && token[token.size()-1]=='>')
return true;
if (std::all_of(token.begin(), token.end(), &::isupper))
return true;
return false;
}
template <typename I>
std::vector<std::string> longOptions(I iter, I end) {
std::vector<std::string> ret;
std::transform(iter, end,
std::back_inserter(ret),
[](typename I::reference opt) { return opt->longOption(); });
return ret;
}
static PatternList parse_long(Tokens& tokens, std::vector<Option>& options)
{
// long ::= '--' chars [ ( ' ' | '=' ) chars ] ;
std::string longOpt, equal;
value val;
std::tie(longOpt, equal, val) = partition(tokens.pop(), "=");
assert(starts_with(longOpt, "--"));
if (equal.empty()) {
val = value{};
}
// detect with options match this long option
std::vector<Option const*> similar;
for(auto const& option : options) {
if (option.longOption()==longOpt)
similar.push_back(&option);
}
// maybe allow similar options that match by prefix
if (tokens.isParsingArgv() && similar.empty()) {
for(auto const& option : options) {
if (option.longOption().empty())
continue;
if (starts_with(option.longOption(), longOpt))
similar.push_back(&option);
}
}
PatternList ret;
if (similar.size() > 1) { // might be simply specified ambiguously 2+ times?
std::vector<std::string> prefixes = longOptions(similar.begin(), similar.end());
std::string error = "'" + longOpt + "' is not a unique prefix: ";
error.append(join(prefixes.begin(), prefixes.end(), ", "));
throw Tokens::OptionError(std::move(error));
} else if (similar.empty()) {
int argcount = equal.empty() ? 0 : 1;
options.emplace_back("", longOpt, argcount);
auto o = std::make_shared<Option>(options.back());
if (tokens.isParsingArgv()) {
o->setValue(argcount ? value{val} : value{true});
}
ret.push_back(o);
} else {
auto o = std::make_shared<Option>(*similar[0]);
if (o->argCount() == 0) {
if (val) {
std::string error = o->longOption() + " must not have an argument";
throw Tokens::OptionError(std::move(error));
}
} else {
if (!val) {
auto const& token = tokens.current();
if (token.empty() || token=="--") {
std::string error = o->longOption() + " requires an argument";
throw Tokens::OptionError(std::move(error));
}
val = tokens.pop();
}
}
if (tokens.isParsingArgv()) {
o->setValue(val ? std::move(val) : value{true});
}
ret.push_back(o);
}
return ret;
}
static PatternList parse_short(Tokens& tokens, std::vector<Option>& options)
{
// shorts ::= '-' ( chars )* [ [ ' ' ] chars ] ;
auto token = tokens.pop();
assert(starts_with(token, "-"));
assert(!starts_with(token, "--"));
auto i = token.begin();
++i; // skip the leading '-'
PatternList ret;
while (i != token.end()) {
std::string shortOpt = { '-', *i };
++i;
std::vector<Option const*> similar;
for(auto const& option : options) {
if (option.shortOption()==shortOpt)
similar.push_back(&option);
}
if (similar.size() > 1) {
std::string error = shortOpt + " is specified ambiguously "
+ std::to_string(similar.size()) + " times";
throw Tokens::OptionError(std::move(error));
} else if (similar.empty()) {
options.emplace_back(shortOpt, "", 0);
auto o = std::make_shared<Option>(options.back());
if (tokens.isParsingArgv()) {
o->setValue(value{true});
}
ret.push_back(o);
} else {
auto o = std::make_shared<Option>(*similar[0]);
value val;
if (o->argCount()) {
if (i == token.end()) {
// consume the next token
auto const& ttoken = tokens.current();
if (ttoken.empty() || ttoken=="--") {
std::string error = shortOpt + " requires an argument";
throw Tokens::OptionError(std::move(error));
}
val = tokens.pop();
} else {
// consume all the rest
val = std::string{i, token.end()};
i = token.end();
}
}
if (tokens.isParsingArgv()) {
o->setValue(val ? std::move(val) : value{true});
}
ret.push_back(o);
}
}
return ret;
}
static PatternList parse_expr(Tokens& tokens, std::vector<Option>& options);
static PatternList parse_atom(Tokens& tokens, std::vector<Option>& options)
{
// atom ::= '(' expr ')' | '[' expr ']' | 'options'
// | long | shorts | argument | command ;
std::string const& token = tokens.current();
PatternList ret;
if (token == "[") {
tokens.pop();
auto expr = parse_expr(tokens, options);
auto trailing = tokens.pop();
if (trailing != "]") {
throw DocoptLanguageError("Mismatched '['");
}
ret.emplace_back(std::make_shared<Optional>(std::move(expr)));
} else if (token=="(") {
tokens.pop();
auto expr = parse_expr(tokens, options);
auto trailing = tokens.pop();
if (trailing != ")") {
throw DocoptLanguageError("Mismatched '('");
}
ret.emplace_back(std::make_shared<Required>(std::move(expr)));
} else if (token == "options") {
tokens.pop();
ret.emplace_back(std::make_shared<OptionsShortcut>());
} else if (starts_with(token, "--") && token != "--") {
ret = parse_long(tokens, options);
} else if (starts_with(token, "-") && token != "-" && token != "--") {
ret = parse_short(tokens, options);
} else if (is_argument_spec(token)) {
ret.emplace_back(std::make_shared<Argument>(tokens.pop()));
} else {
ret.emplace_back(std::make_shared<Command>(tokens.pop()));
}
return ret;
}
static PatternList parse_seq(Tokens& tokens, std::vector<Option>& options)
{
// seq ::= ( atom [ '...' ] )* ;"""
PatternList ret;
while (tokens) {
auto const& token = tokens.current();
if (token=="]" || token==")" || token=="|")
break;
auto atom = parse_atom(tokens, options);
if (tokens.current() == "...") {
ret.emplace_back(std::make_shared<OneOrMore>(std::move(atom)));
tokens.pop();
} else {
std::move(atom.begin(), atom.end(), std::back_inserter(ret));
}
}
return ret;
}
static std::shared_ptr<Pattern> maybe_collapse_to_required(PatternList&& seq)
{
if (seq.size()==1) {
return std::move(seq[0]);
}
return std::make_shared<Required>(std::move(seq));
}
static std::shared_ptr<Pattern> maybe_collapse_to_either(PatternList&& seq)
{
if (seq.size()==1) {
return std::move(seq[0]);
}
return std::make_shared<Either>(std::move(seq));
}
PatternList parse_expr(Tokens& tokens, std::vector<Option>& options)
{
// expr ::= seq ( '|' seq )* ;
auto seq = parse_seq(tokens, options);
if (tokens.current() != "|")
return seq;
PatternList ret;
ret.emplace_back(maybe_collapse_to_required(std::move(seq)));
while (tokens.current() == "|") {
tokens.pop();
seq = parse_seq(tokens, options);
ret.emplace_back(maybe_collapse_to_required(std::move(seq)));
}
return { maybe_collapse_to_either(std::move(ret)) };
}
static Required parse_pattern(std::string const& source, std::vector<Option>& options)
{
auto tokens = Tokens::from_pattern(source);
auto result = parse_expr(tokens, options);
if (tokens)
throw DocoptLanguageError("Unexpected ending: '" + tokens.the_rest() + "'");
assert(result.size() == 1 && "top level is always one big");
return Required{ std::move(result) };
}
static std::string formal_usage(std::string const& section) {
std::string ret = "(";
auto i = section.find(':')+1; // skip past "usage:"
auto parts = split(section, i);
for(size_t ii = 1; ii < parts.size(); ++ii) {
if (parts[ii] == parts[0]) {
ret += " ) | (";
} else {
ret.push_back(' ');
ret += parts[ii];
}
}
ret += " )";
return ret;
}
static PatternList parse_argv(Tokens tokens, std::vector<Option>& options, bool options_first)
{
// Parse command-line argument vector.
//
// If options_first:
// argv ::= [ long | shorts ]* [ argument ]* [ '--' [ argument ]* ] ;
// else:
// argv ::= [ long | shorts | argument ]* [ '--' [ argument ]* ] ;
PatternList ret;
while (tokens) {
auto const& token = tokens.current();
if (token=="--") {
// option list is done; convert all the rest to arguments
while (tokens) {
ret.emplace_back(std::make_shared<Argument>("", tokens.pop()));
}
} else if (starts_with(token, "--")) {
auto&& parsed = parse_long(tokens, options);
std::move(parsed.begin(), parsed.end(), std::back_inserter(ret));
} else if (token[0]=='-' && token != "-") {
auto&& parsed = parse_short(tokens, options);
std::move(parsed.begin(), parsed.end(), std::back_inserter(ret));
} else if (options_first) {
// option list is done; convert all the rest to arguments
while (tokens) {
ret.emplace_back(std::make_shared<Argument>("", tokens.pop()));
}
} else {
ret.emplace_back(std::make_shared<Argument>("", tokens.pop()));
}
}
return ret;
}
static std::vector<Option> parse_defaults(std::string const& doc) {
// This pattern is a bit more complex than the python docopt one due to lack of
// re.split. Effectively, it grabs any line with leading whitespace and then a
// hyphen; it stops grabbing when it hits another line that also looks like that.
static std::regex const pattern {
"(?:^|\\n)[ \\t]*" // a new line with leading whitespace
"(-(.|\\n)*?)" // a hyphen, and then grab everything it can...
"(?=\\n[ \\t]*-|$)" // .. until it hits another new line with space and a hyphen
};
std::vector<Option> defaults;
for(auto s : parse_section("options:", doc)) {
s.erase(s.begin(), s.begin()+static_cast<std::ptrdiff_t>(s.find(':'))+1); // get rid of "options:"
std::for_each(std::sregex_iterator{ s.begin(), s.end(), pattern },
std::sregex_iterator{},
[&](std::smatch const& m)
{
std::string opt = m[1].str();
if (starts_with(opt, "-")) {
defaults.emplace_back(Option::parse(opt));
}
});
}
return defaults;
}
static bool isOptionSet(PatternList const& options, std::string const& opt1, std::string const& opt2 = "") {
return std::any_of(options.begin(), options.end(), [&](std::shared_ptr<Pattern const> const& opt) -> bool {
auto const& name = opt->name();
if (name==opt1 || (!opt2.empty() && name==opt2)) {
return opt->hasValue();
}
return false;
});
}
static void extras(bool help, bool version, PatternList const& options) {
if (help && isOptionSet(options, "-h", "--help")) {
throw DocoptExitHelp();
}
if (version && isOptionSet(options, "--version")) {
throw DocoptExitVersion();
}
}
// Parse the doc string and generate the Pattern tree
static std::pair<Required, std::vector<Option>> create_pattern_tree(std::string const& doc)
{
auto usage_sections = parse_section("usage:", doc);
if (usage_sections.empty()) {
throw DocoptLanguageError("'usage:' (case-insensitive) not found.");
}
if (usage_sections.size() > 1) {
throw DocoptLanguageError("More than one 'usage:' (case-insensitive).");
}
std::vector<Option> options = parse_defaults(doc);
Required pattern = parse_pattern(formal_usage(usage_sections[0]), options);
std::vector<Option const*> pattern_options = flat_filter<Option const>(pattern);
using UniqueOptions = std::unordered_set<Option const*, PatternHasher, PatternPointerEquality>;
UniqueOptions const uniq_pattern_options { pattern_options.begin(), pattern_options.end() };
// Fix up any "[options]" shortcuts with the actual option tree
for(auto& options_shortcut : flat_filter<OptionsShortcut>(pattern)) {
std::vector<Option> doc_options = parse_defaults(doc);
// set(doc_options) - set(pattern_options)
UniqueOptions uniq_doc_options;
for(auto const& opt : doc_options) {
if (uniq_pattern_options.count(&opt))
continue;
uniq_doc_options.insert(&opt);
}
// turn into shared_ptr's and set as children
PatternList children;
std::transform(uniq_doc_options.begin(), uniq_doc_options.end(),
std::back_inserter(children), [](Option const* opt) {
return std::make_shared<Option>(*opt);
});
options_shortcut->setChildren(std::move(children));
}
return { std::move(pattern), std::move(options) };
}
std::map<std::string, value>
docopt::docopt_parse(std::string const& doc,
std::vector<std::string> const& argv,
bool help,
bool version,
bool options_first)
{
Required pattern;
std::vector<Option> options;
try {
std::tie(pattern, options) = create_pattern_tree(doc);
} catch (Tokens::OptionError const& error) {
throw DocoptLanguageError(error.what());
}
PatternList argv_patterns;
try {
argv_patterns = parse_argv(Tokens(argv), options, options_first);
} catch (Tokens::OptionError const& error) {
throw DocoptArgumentError(error.what());
}
extras(help, version, argv_patterns);
std::vector<std::shared_ptr<LeafPattern>> collected;
bool matched = pattern.fix().match(argv_patterns, collected);
if (matched && argv_patterns.empty()) {
std::map<std::string, value> ret;
// (a.name, a.value) for a in (pattern.flat() + collected)
for (auto* p : pattern.leaves()) {
ret[p->name()] = p->getValue();
}
for (auto const& p : collected) {
ret[p->name()] = p->getValue();
}
return ret;
}
if (matched) {
std::string leftover = join(argv.begin(), argv.end(), ", ");
throw DocoptArgumentError("Unexpected argument: " + leftover);
}
throw DocoptArgumentError("Arguments did not match expected patterns"); // BLEH. Bad error.
}
std::map<std::string, value>
docopt::docopt(std::string const& doc,
std::vector<std::string> const& argv,
bool help,
std::string const& version,
bool options_first) noexcept
{
try {
return docopt_parse(doc, argv, help, !version.empty(), options_first);
} catch (DocoptExitHelp const&) {
std::cout << doc << std::endl;
std::exit(0);
} catch (DocoptExitVersion const&) {
std::cout << version << std::endl;
std::exit(0);
} catch (DocoptLanguageError const& error) {
std::cerr << "Docopt usage string could not be parsed" << std::endl;
std::cerr << error.what() << std::endl;
std::exit(-1);
} catch (DocoptArgumentError const& error) {
std::cerr << error.what();
std::cout << std::endl;
std::cout << doc << std::endl;
std::exit(-1);
} /* Any other exception is unexpected: let std::terminate grab it */
}
//
// docopt.h
// docopt
//
// Created by Jared Grubb on 2013-11-03.
// Copyright (c) 2013 Jared Grubb. All rights reserved.
//
#ifndef docopt__docopt_h_
#define docopt__docopt_h_
#include "docopt_value.h"
#include <map>
#include <vector>
#include <string>
namespace docopt {
// Usage string could not be parsed (ie, the developer did something wrong)
struct DocoptLanguageError : std::runtime_error { using runtime_error::runtime_error; };
// Arguments passed by user were incorrect (ie, developer was good, user is wrong)
struct DocoptArgumentError : std::runtime_error { using runtime_error::runtime_error; };
// Arguments contained '--help' and parsing was aborted early
struct DocoptExitHelp : std::runtime_error { DocoptExitHelp(); };
// Arguments contained '--version' and parsing was aborted early
struct DocoptExitVersion : std::runtime_error { DocoptExitVersion(); };
/// Parse user options from the given option string.
///
/// @param doc The usage string
/// @param argv The user-supplied arguments
/// @param help Whether to end early if '-h' or '--help' is in the argv
/// @param version Whether to end early if '--version' is in the argv
/// @param options_first Whether options must precede all args (true), or if args and options
/// can be arbitrarily mixed.
///
/// @throws DocoptLanguageError if the doc usage string had errors itself
/// @throws DocoptExitHelp if 'help' is true and the user has passed the '--help' argument
/// @throws DocoptExitVersion if 'version' is true and the user has passed the '--version' argument
/// @throws DocoptArgumentError if the user's argv did not match the usage patterns
std::map<std::string, value> docopt_parse(std::string const& doc,
std::vector<std::string> const& argv,
bool help = true,
bool version = true,
bool options_first = false);
/// Parse user options from the given string, and exit appropriately
///
/// Calls 'docopt_parse' and will terminate the program if any of the exceptions above occur:
/// * DocoptLanguageError - print error and terminate (with exit code -1)
/// * DocoptExitHelp - print usage string and terminate (with exit code 0)
/// * DocoptExitVersion - print version and terminate (with exit code 0)
/// * DocoptArgumentError - print error and usage string and terminate (with exit code -1)
std::map<std::string, value> docopt(std::string const& doc,
std::vector<std::string> const& argv,
bool help = true,
std::string const& version = {},
bool options_first = false) noexcept;
}
#endif /* defined(docopt__docopt_h_) */
//
// docopt_private.h
// docopt
//
// Created by Jared Grubb on 2013-11-04.
// Copyright (c) 2013 Jared Grubb. All rights reserved.
//
#ifndef docopt_docopt_private_h
#define docopt_docopt_private_h
#include <vector>
#include <memory>
#include <unordered_set>
#include "docopt_value.h"
namespace docopt {
class Pattern;
class LeafPattern;
using PatternList = std::vector<std::shared_ptr<Pattern>>;
// Utility to use Pattern types in std hash-containers
struct PatternHasher {
template <typename P>
size_t operator()(std::shared_ptr<P> const& pattern) const {
return pattern->hash();
}
template <typename P>
size_t operator()(P const* pattern) const {
return pattern->hash();
}
template <typename P>
size_t operator()(P const& pattern) const {
return pattern.hash();
}
};
// Utility to use 'hash' as the equality operator as well in std containers
struct PatternPointerEquality {
template <typename P1, typename P2>
bool operator()(std::shared_ptr<P1> const& p1, std::shared_ptr<P2> const& p2) const {
return p1->hash()==p2->hash();
}
template <typename P1, typename P2>
bool operator()(P1 const* p1, P2 const* p2) const {
return p1->hash()==p2->hash();
}
};
// A hash-set that uniques by hash value
using UniquePatternSet = std::unordered_set<std::shared_ptr<Pattern>, PatternHasher, PatternPointerEquality>;
class Pattern {
public:
// flatten out children, stopping descent when the given filter returns 'true'
virtual std::vector<Pattern*> flat(bool (*filter)(Pattern const*)) = 0;
// flatten out all children into a list of LeafPattern objects
virtual void collect_leaves(std::vector<LeafPattern*>&) = 0;
// flatten out all children into a list of LeafPattern objects
std::vector<LeafPattern*> leaves();
// Attempt to find something in 'left' that matches this pattern's spec, and if so, move it to 'collected'
virtual bool match(PatternList& left, std::vector<std::shared_ptr<LeafPattern>>& collected) const = 0;
virtual std::string const& name() const = 0;
virtual bool hasValue() const { return false; }
virtual size_t hash() const = 0;
virtual ~Pattern() = default;
};
class LeafPattern
: public Pattern {
public:
LeafPattern(std::string name, value v = {})
: fName(std::move(name)),
fValue(std::move(v))
{}
virtual std::vector<Pattern*> flat(bool (*filter)(Pattern const*)) override {
if (filter(this)) {
return { this };
}
return {};
}
virtual void collect_leaves(std::vector<LeafPattern*>& lst) override final {
lst.push_back(this);
}
virtual bool match(PatternList& left, std::vector<std::shared_ptr<LeafPattern>>& collected) const override;
virtual bool hasValue() const override { return static_cast<bool>(fValue); }
value const& getValue() const { return fValue; }
void setValue(value&& v) { fValue = std::move(v); }
virtual std::string const& name() const override { return fName; }
virtual size_t hash() const override {
size_t seed = typeid(*this).hash_code();
hash_combine(seed, fName);
hash_combine(seed, fValue);
return seed;
}
protected:
virtual std::pair<size_t, std::shared_ptr<LeafPattern>> single_match(PatternList const&) const = 0;
private:
std::string fName;
value fValue;
};
class BranchPattern
: public Pattern {
public:
BranchPattern(PatternList children = {})
: fChildren(std::move(children))
{}
Pattern& fix() {
UniquePatternSet patterns;
fix_identities(patterns);
fix_repeating_arguments();
return *this;
}
virtual std::string const& name() const override {
throw std::runtime_error("Logic error: name() shouldnt be called on a BranchPattern");
}
virtual value const& getValue() const {
throw std::runtime_error("Logic error: name() shouldnt be called on a BranchPattern");
}
virtual std::vector<Pattern*> flat(bool (*filter)(Pattern const*)) override {
if (filter(this)) {
return {this};
}
std::vector<Pattern*> ret;
for(auto& child : fChildren) {
auto sublist = child->flat(filter);
ret.insert(ret.end(), sublist.begin(), sublist.end());
}
return ret;
}
virtual void collect_leaves(std::vector<LeafPattern*>& lst) override final {
for(auto& child : fChildren) {
child->collect_leaves(lst);
}
}
void setChildren(PatternList children) {
fChildren = std::move(children);
}
PatternList const& children() const { return fChildren; }
virtual void fix_identities(UniquePatternSet& patterns) {
for(auto& child : fChildren) {
// this will fix up all its children, if needed
if (auto bp = dynamic_cast<BranchPattern*>(child.get())) {
bp->fix_identities(patterns);
}
// then we try to add it to the list
auto inserted = patterns.insert(child);
if (!inserted.second) {
// already there? then reuse the existing shared_ptr for that thing
child = *inserted.first;
}
}
}
virtual size_t hash() const override {
size_t seed = typeid(*this).hash_code();
hash_combine(seed, fChildren.size());
for(auto const& child : fChildren) {
hash_combine(seed, child->hash());
}
return seed;
}
private:
void fix_repeating_arguments();
protected:
PatternList fChildren;
};
class Argument
: public LeafPattern {
public:
using LeafPattern::LeafPattern;
protected:
virtual std::pair<size_t, std::shared_ptr<LeafPattern>> single_match(PatternList const& left) const override;
};
class Command : public Argument {
public:
Command(std::string name, value v = value{false})
: Argument(std::move(name), std::move(v))
{}
protected:
virtual std::pair<size_t, std::shared_ptr<LeafPattern>> single_match(PatternList const& left) const override;
};
class Option final
: public LeafPattern
{
public:
static Option parse(std::string const& option_description);
Option(std::string shortOption,
std::string longOption,
int argcount = 0,
value v = value{false})
: LeafPattern(longOption.empty() ? shortOption : longOption,
std::move(v)),
fShortOption(std::move(shortOption)),
fLongOption(std::move(longOption)),
fArgcount(argcount)
{
// From Python:
// self.value = None if value is False and argcount else value
if (argcount && v.isBool() && !v.asBool()) {
setValue(value{});
}
}
Option(Option const&) = default;
Option(Option&&) = default;
Option& operator=(Option const&) = default;
Option& operator=(Option&&) = default;
using LeafPattern::setValue;
std::string const& longOption() const { return fLongOption; }
std::string const& shortOption() const { return fShortOption; }
int argCount() const { return fArgcount; }
virtual size_t hash() const override {
size_t seed = LeafPattern::hash();
hash_combine(seed, fShortOption);
hash_combine(seed, fLongOption);
hash_combine(seed, fArgcount);
return seed;
}
protected:
virtual std::pair<size_t, std::shared_ptr<LeafPattern>> single_match(PatternList const& left) const override;
private:
std::string fShortOption;
std::string fLongOption;
int fArgcount;
};
class Required : public BranchPattern {
public:
using BranchPattern::BranchPattern;
bool match(PatternList& left, std::vector<std::shared_ptr<LeafPattern>>& collected) const override;
};
class Optional : public BranchPattern {
public:
using BranchPattern::BranchPattern;
bool match(PatternList& left, std::vector<std::shared_ptr<LeafPattern>>& collected) const override {
for(auto const& pattern : fChildren) {
pattern->match(left, collected);
}
return true;
}
};
class OptionsShortcut : public Optional {
using Optional::Optional;
};
class OneOrMore : public BranchPattern {
public:
using BranchPattern::BranchPattern;
bool match(PatternList& left, std::vector<std::shared_ptr<LeafPattern>>& collected) const override;
};
class Either : public BranchPattern {
public:
using BranchPattern::BranchPattern;
bool match(PatternList& left, std::vector<std::shared_ptr<LeafPattern>>& collected) const override;
};
}
#endif
//
// docopt_util.h
// docopt
//
// Created by Jared Grubb on 2013-11-04.
// Copyright (c) 2013 Jared Grubb. All rights reserved.
//
#ifndef docopt_docopt_util_h
#define docopt_docopt_util_h
namespace {
bool starts_with(std::string const& str, std::string const& prefix)
{
if (str.length() < prefix.length())
return false;
return std::equal(prefix.begin(), prefix.end(),
str.begin());
}
std::string trim(std::string&& str,
const std::string& whitespace = " \t\n")
{
const auto strEnd = str.find_last_not_of(whitespace);
if (strEnd==std::string::npos)
return {}; // no content
str.erase(strEnd+1);
const auto strBegin = str.find_first_not_of(whitespace);
str.erase(0, strBegin);
return std::move(str);
}
std::vector<std::string> split(std::string const& str, size_t pos = 0)
{
const char* const anySpace = " \t\r\n\v\f";
std::vector<std::string> ret;
while (pos != std::string::npos) {
auto start = str.find_first_not_of(anySpace, pos);
if (start == std::string::npos) break;
auto end = str.find_first_of(anySpace, start);
auto size = end==std::string::npos ? end : end-start;
ret.emplace_back(str.substr(start, size));
pos = end;
}
return ret;
}
std::tuple<std::string, std::string, std::string> partition(std::string str, std::string const& point)
{
std::tuple<std::string, std::string, std::string> ret;
auto i = str.find(point);
if (i == std::string::npos) {
// no match: string goes in 0th spot only
} else {
std::get<2>(ret) = str.substr(i + point.size());
std::get<1>(ret) = point;
str.resize(i);
}
std::get<0>(ret) = std::move(str);
return ret;
}
template <typename I>
std::string join(I iter, I end, std::string const& delim) {
if (iter==end)
return {};
std::string ret = *iter;
for(++iter; iter!=end; ++iter) {
ret.append(delim);
ret.append(*iter);
}
return ret;
}
}
namespace docopt {
template <class T>
inline void hash_combine(std::size_t& seed, T const& v)
{
// stolen from boost::hash_combine
std::hash<T> hasher;
seed ^= hasher(v) + 0x9e3779b9 + (seed<<6) + (seed>>2);
}
}
#endif
//
// value.h
// docopt
//
// Created by Jared Grubb on 2013-10-14.
// Copyright (c) 2013 Jared Grubb. All rights reserved.
//
#ifndef docopt__value_h_
#define docopt__value_h_
#include <string>
#include <vector>
#include <functional> // std::hash
#include <iosfwd>
namespace docopt {
/// A generic type to hold the various types that can be produced by docopt.
///
/// This type can be one of: {bool, long, string, vector<string>}, or empty.
struct value {
/// An empty value
value() {}
value(std::string);
value(std::vector<std::string>);
explicit value(bool);
explicit value(long);
explicit value(int v) : value(static_cast<long>(v)) {}
~value();
value(value const&);
value(value&&) noexcept;
value& operator=(value const&);
value& operator=(value&&) noexcept;
// Test if this object has any contents at all
explicit operator bool() const { return kind != Kind::Empty; }
// Test the type contained by this value object
bool isBool() const { return kind==Kind::Bool; }
bool isString() const { return kind==Kind::String; }
bool isLong() const { return kind==Kind::Long; }
bool isStringList() const { return kind==Kind::StringList; }
// Throws std::invalid_argument if the type does not match
bool asBool() const;
long asLong() const;
std::string const& asString() const;
std::vector<std::string> const& asStringList() const;
size_t hash() const noexcept;
// equality is based on hash-equality
friend bool operator==(value const&, value const&);
friend bool operator!=(value const&, value const&);
private:
enum class Kind {
Empty,
Bool,
Long,
String,
StringList
};
union Variant {
Variant() {}
~Variant() { /* do nothing; will be destroyed by ~value */ }
bool boolValue;
long longValue;
std::string strValue;
std::vector<std::string> strList;
};
static const char* kindAsString(Kind);
void throwIfNotKind(Kind expected) const;
private:
Kind kind = Kind::Empty;
Variant variant {};
};
/// Write out the contents to the ostream
std::ostream& operator<<(std::ostream&, value const&);
}
namespace std {
template <>
struct hash<docopt::value> {
size_t operator()(docopt::value const& val) const noexcept {
return val.hash();
}
};
}
namespace docopt {
inline
value::value(bool v)
: kind(Kind::Bool)
{
variant.boolValue = v;
}
inline
value::value(long v)
: kind(Kind::Long)
{
variant.longValue = v;
}
inline
value::value(std::string v)
: kind(Kind::String)
{
new (&variant.strValue) std::string(std::move(v));
}
inline
value::value(std::vector<std::string> v)
: kind(Kind::StringList)
{
new (&variant.strList) std::vector<std::string>(std::move(v));
}
inline
value::value(value const& other)
: kind(other.kind)
{
switch (kind) {
case Kind::String:
new (&variant.strValue) std::string(other.variant.strValue);
break;
case Kind::StringList:
new (&variant.strList) std::vector<std::string>(other.variant.strList);
break;
case Kind::Bool:
variant.boolValue = other.variant.boolValue;
break;
case Kind::Long:
variant.longValue = other.variant.longValue;
break;
case Kind::Empty:
default:
break;
}
}
inline
value::value(value&& other) noexcept
: kind(other.kind)
{
switch (kind) {
case Kind::String:
new (&variant.strValue) std::string(std::move(other.variant.strValue));
break;
case Kind::StringList:
new (&variant.strList) std::vector<std::string>(std::move(other.variant.strList));
break;
case Kind::Bool:
variant.boolValue = other.variant.boolValue;
break;
case Kind::Long:
variant.longValue = other.variant.longValue;
break;
case Kind::Empty:
default:
break;
}
}
inline
value::~value()
{
switch (kind) {
case Kind::String:
variant.strValue.~basic_string();
break;
case Kind::StringList:
variant.strList.~vector();
break;
case Kind::Empty:
case Kind::Bool:
case Kind::Long:
default:
// trivial dtor
break;
}
}
inline
value& value::operator=(value const& other) {
// make a copy and move from it; way easier.
return *this = value{other};
}
inline
value& value::operator=(value&& other) noexcept {
// move of all the types involved is noexcept, so we dont have to worry about
// these two statements throwing, which gives us a consistency guarantee.
this->~value();
new (this) value(std::move(other));
return *this;
}
template <class T>
void hash_combine(std::size_t& seed, const T& v);
inline
size_t value::hash() const noexcept
{
switch (kind) {
case Kind::String:
return std::hash<std::string>()(variant.strValue);
case Kind::StringList: {
size_t seed = std::hash<size_t>()(variant.strList.size());
for(auto const& str : variant.strList) {
hash_combine(seed, str);
}
return seed;
}
case Kind::Bool:
return std::hash<bool>()(variant.boolValue);
case Kind::Long:
return std::hash<long>()(variant.longValue);
case Kind::Empty:
default:
return std::hash<void*>()(nullptr);
}
}
inline
bool value::asBool() const
{
throwIfNotKind(Kind::Bool);
return variant.boolValue;
}
inline
long value::asLong() const
{
// Attempt to convert a string to a long
if (kind == Kind::String) {
const std::string& str = variant.strValue;
std::size_t pos;
const long ret = stol(str, &pos); // Throws if it can't convert
if (pos != str.length()) {
// The string ended in non-digits.
throw std::runtime_error( str + " contains non-numeric characters.");
}
return ret;
}
throwIfNotKind(Kind::Long);
return variant.longValue;
}
inline
std::string const& value::asString() const
{
throwIfNotKind(Kind::String);
return variant.strValue;
}
inline
std::vector<std::string> const& value::asStringList() const
{
throwIfNotKind(Kind::StringList);
return variant.strList;
}
inline
bool operator==(value const& v1, value const& v2)
{
if (v1.kind != v2.kind)
return false;
switch (v1.kind) {
case value::Kind::String:
return v1.variant.strValue==v2.variant.strValue;
case value::Kind::StringList:
return v1.variant.strList==v2.variant.strList;
case value::Kind::Bool:
return v1.variant.boolValue==v2.variant.boolValue;
case value::Kind::Long:
return v1.variant.longValue==v2.variant.longValue;
case value::Kind::Empty:
default:
return true;
}
}
inline
bool operator!=(value const& v1, value const& v2)
{
return !(v1 == v2);
}
}
#endif /* defined(docopt__value_h_) */
//
// Created by Abderrahmane on 6/16/2018.
//
#include <algorithm>
#include <iostream>
#include <set>
#include <cmath>
#include <fstream>
#include "graph.h"
#include "hash.h"
#include "param.h"
namespace std {
void update_graphs(edge &e, vector<graph> &graphs) {
auto &src_id = get<F_S>(e);
auto &src_type = get<F_STYPE>(e);
auto &dst_id = get<F_D>(e);
auto &dst_type = get<F_DTYPE>(e);
auto &e_type = get<F_ETYPE>(e);
auto &gid = get<F_GID>(e);
// append edge to the edge list for the source
graphs[gid][make_pair(src_id,
src_type)].push_back(make_tuple(dst_id,
dst_type,
e_type));
}
tuple<vector<Branch>,vector<Branch>> construct_prototype_branches(unordered_map<uint32_t, unordered_map<uint32_t, Branch>> &map_graph_branches, vector<uint32_t> train_gids,string dataset) {
vector<Branch> prototype_branches; // the prototype branches selected from train graphs
vector<Branch> median_branches; // the median branches
unordered_map<uint32_t, vector<Branch>> all_branches; /* All the branches grouped by the classes of
benign train graphs*/
if (dataset.compare("AUTH")==0){
for (auto gid: train_gids) {
for (auto &m: map_graph_branches[gid]) all_branches[0].push_back(m.second); // we have one class
}
}
else {
for (auto gid: train_gids) {
for (auto &m: map_graph_branches[gid]) all_branches[(gid) / 100].push_back(m.second);
}
}
tie(prototype_branches,median_branches) = SPS_C(all_branches); // the used algorithm for selecting the prototype branches is SPS-C
return tie(prototype_branches,median_branches);
}
double compute_branch_edit_distance(Branch &Br1, Branch &Br2) {
double bed = 0;
double max_bed = 1+max(Br1.d_out, Br2.d_out)+max(Br1.d_in, Br2.d_in);
if (Br1.r != Br2.r) bed += 1;
bed += max(Br1.d_out, Br2.d_out);
bed += max(Br1.d_in, Br2.d_in);
if (max(Br1.d_out, Br2.d_out) == Br2.d_out) {
for (auto &a : Br1.es_out)bed -= min(a.second, Br2.es_out[a.first]);
} else {
for (auto &a : Br2.es_out)bed -= min(a.second, Br1.es_out[a.first]);
}
if (max(Br1.d_in, Br2.d_in) == Br2.d_in) {
for (auto &a : Br1.es_in)bed -= min(a.second, Br2.es_in[a.first]);
} else {
for (auto &a : Br2.es_in)bed -= min(a.second, Br1.es_in[a.first]);
}
return bed/max_bed;
}
tuple<vector<Branch>,vector<Branch>> SPS_C(unordered_map<uint32_t, vector<Branch>> &all_branches) {
/* the SPS-C (Spanning Class wise) algorithm for choosing the K prototype branches
* reference : RIESEN, K. and H. BUNKE, GRAPH CLASSIFICATION BASED ON VECTOR SPACE EMBEDDING.
* International Journal of Pattern Recognition and Artificial Intelligence, 2009. 23(06): p. 1053-1081.
*/
int index, median_index, furthest_branch_index, c;
double d = 0;
vector<Branch> prototype_branches;
vector<Branch> median_branches;
int nc = all_branches.size(); // the number of classes of train graphs
c = 1; // the class id
for (auto &m : all_branches) { // compute the prototype branches for each class
cout << c << endl;
// Find the median graph of the class
vector<pair<int, double>> min_distances;
index = 0;
cout << "number of branches :" << m.second.size() << endl; // delete this
/* for (auto &b1: m.second) {
d = 0;
for (auto &b2:m.second) d+=compute_branch_edit_distance(b1,b2);
min_distances.push_back(pair<int, double>(index, d));
index++;
cout << "index: " << index << endl;
}
median_index =(*min_element(min_distances.begin(),min_distances.end(),[](pair<int, double> a,pair<int, double>b){
return a.second < b.second;
})).first;*/
median_index = rand()% (m.second.size());
median_branches.push_back(m.second.at(median_index));
prototype_branches.push_back(m.second.at(median_index)); // the median branch
m.second.erase(m.second.begin() + median_index); // delete the median branch from the set of all branch
vector<pair<int, double >>().swap(min_distances); // free the allocated memory
// the spanning selector
int counter = 1;
while (counter < (M / nc)) { // Select the furthest Branch away from the already selected prototypes
index = 0;
for (auto &br : m.second) {
d = INF;
for (auto &bp : prototype_branches) {
double bed = compute_branch_edit_distance(br, bp);
if (bed < d) d = bed;
}
min_distances.push_back(pair<int, double>(index, d));
index++;
}
furthest_branch_index = (*max_element(min_distances.begin(), min_distances.end(),
[](pair<int, double> a, pair<int, double> b) {
return a.second < b.second;
})).first;
prototype_branches.push_back(m.second.at(furthest_branch_index)); // add the furthest branch
m.second.erase(m.second.begin() +
furthest_branch_index); // delete the already added branch from the set of all branch
vector<pair<int, double >>().swap(min_distances); // free the allocated memory
counter++;
}
c++;
}
return tie(prototype_branches,median_branches);
}
unordered_map<uint32_t, unordered_map<uint32_t, Branch>> graph_to_branches(vector<graph> &graphs){
unordered_map<uint32_t, unordered_map<uint32_t, Branch>> map_graph_branches; /* key1= gid , key2= root id */
unordered_map<uint32_t, vector<Branch>> all_branches; /* All the branches grouped by the classes of
benign train graphs*/
int index = 0;
// construct graph branches
for (auto &g : graphs) {
int gid = index;
for (auto &e:g) { //
int src_id = e.first.first;// the source root id
string src_label = e.first.second; // the source root label
for (auto &v: e.second) {
int dest_id = get<0>(v);
string dest_label = get<1>(v);
string edge_label = get<2>(v);
map_graph_branches[gid][src_id].r = src_label;
map_graph_branches[gid][dest_id].r = dest_label;
map_graph_branches[gid][src_id].es_out[edge_label]++;
map_graph_branches[gid][dest_id].es_in[edge_label]++;
map_graph_branches[gid][src_id].d_out++;
map_graph_branches[gid][dest_id].d_in++;
}
}
index++;
}
return map_graph_branches;
}
}
//
// Created by Abderrahmane on 6/16/2018.
//
#ifndef NAADSG_GRAPH_H
#define NAADSG_GRAPH_H
#include <vector>
#include <tuple>
#include <unordered_map>
namespace std {
// edge field indices
#define F_S 0 // source node id
#define F_STYPE 1 // source node type
#define F_D 2 // destination node id
#define F_DTYPE 3 // destination node type
#define F_ETYPE 4 // edge type
#define F_GID 5 // graph id (tag)
// data structures
typedef struct Branch{
string r; // the root of the branch
unordered_map<string, int> es_out; // the edge structure of the outgoing edges from r
unordered_map<string, int> es_in; // the edge structure of the incoming edges to r
uint32_t d_out; // the number of outgoing edges
uint32_t d_in; // the number of incoming edges
} Branch;
typedef tuple<uint32_t, string, uint32_t, string, string, uint32_t> edge;
typedef unordered_map<pair<uint32_t,string>, vector<tuple<uint32_t,string,string>>> graph;
typedef vector<double> graph_vector; // vector representation of a graph
void update_graphs(edge& e, vector<graph>& graphs);
double compute_branch_edit_distance(Branch &Br1,Branch &Br2);
tuple<vector<Branch>,vector<Branch>> construct_prototype_branches(unordered_map<uint32_t, unordered_map<uint32_t, Branch>>
&map_graph_branches, vector<uint32_t> train_gids,string dataset);
tuple<vector<Branch>,vector<Branch>> SPS_C(unordered_map<uint32_t ,vector<Branch>> &all_branches); /* the Spanning wise class prototypes
* selector */
unordered_map<uint32_t, unordered_map<uint32_t, Branch>> graph_to_branches(vector<graph> &graphs); /* graph
decomposition to
branches*/
}
#endif //NAADSG_GRAPH_H
#ifndef NAADSG_HASH_H
#define NAADSG_HASH_H
#include <string>
#include <vector>
namespace std {
/* Combination hash from Boost */
template <class T>
inline void hash_combine(size_t& seed, const T& v)
{
hash<T> hasher;
seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
}
template<typename S, typename T> struct hash<pair<S, T>>
{
inline size_t operator()(const pair<S, T>& v) const
{
size_t seed = 0;
hash_combine(seed, v.first);
hash_combine(seed, v.second);
return seed;
}
};
/* End combination hash from Boost */
}
#endif //NAADSG_HASH_H
#include <fcntl.h>
#include <fstream>
#include "graph.h"
#include "io.h"
#include <iostream>
#include "param.h"
#include <string>
#include <sstream>
#include <tuple>
#include <unistd.h>
#include "util.h"
#include <vector>
#include <algorithm>
namespace std {
tuple<uint32_t,vector<uint32_t>> read_train_gids(string filename) {
// read train gids into memory
cerr << "Reading Train gids from: " << filename << endl;
vector<uint32_t> gids;
uint32_t num_train_graphs = 0;
ifstream f(filename);
string line;
// read train gids from the file
while ( getline(f, line)){
uint32_t graph_id;
stringstream ss;
ss.str(line);
ss >> graph_id;
gids.push_back(graph_id);
num_train_graphs++;
}
#ifdef DEBUG
for (uint32_t i = 0; i < gids.size(); i++) {
cout << "graph " << gids.at(i) << endl;
}
cout << "Number of train graphs: " << num_train_graphs << endl;
#endif
return make_tuple(num_train_graphs, gids);
}
tuple<uint32_t,vector<edge>> read_edges (string filename,vector<uint32_t> &train_gids){
vector<edge> train_edges;
uint32_t num_train_edges = 0;
ifstream f(filename);
string line;
cerr << "Reading edges from: " << filename << endl;
// read edges from the file
uint32_t i = 0;
uint32_t max_gid = 0;
while ( getline(f, line)){
string src_type, dst_type, e_type;
uint32_t src_id,dst_id,graph_id;
stringstream ss;
ss.str(line);
ss >> src_id;
ss >> src_type;
ss >> dst_id;
ss >> dst_type;
ss >> e_type;
ss >> graph_id;
if (graph_id > max_gid) {
max_gid = graph_id;
}
i++; // skip newline
if (find(train_gids.begin(),train_gids.end(),graph_id)!=train_gids.end()){ // if the graph is a train graph
train_edges.push_back(make_tuple(src_id, src_type,
dst_id, dst_type,
e_type, graph_id));
num_train_edges++;
}
}
return make_tuple(max_gid + 1, train_edges);
}
void branches_to_file(vector<Branch> &branches, string branches_file){
ofstream out;
out.open(branches_file);
// save prototype_branches
for(auto &b: branches){
out << b.r <<'\t' << b.d_out <<'\t' << b.d_in << endl;
if (b.d_out >0){
for (auto &e: b.es_out){
out << e.first <<'\t'<< e.second<<'\t';
}
out << endl;
} else {
out << endl;
}
if (b.d_in > 0 ){
for (auto &e: b.es_in){
out << e.first <<'\t'<< e.second<<'\t';
}
out << endl;
} else{
out << endl;
}
}
out.close();
}
}
#ifndef NAADSG_IO_H_
#define NAADSG_IO_H_
#include "graph.h"
#include <string>
#include <tuple>
#include <vector>
namespace std {
tuple<uint32_t,vector<uint32_t>> read_train_gids(string filename);
tuple<uint32_t,vector<edge>> read_edges(string filename,vector<uint32_t> &train_gids);
void branches_to_file(vector<Branch> &branches, string branches_file);
}
#endif
#include <algorithm>
#include <bitset>
#include <cassert>
#include <deque>
#include <iostream>
#include <string>
#include <unordered_map>
#include <set>
#include <vector>
#include <algorithm>
#include <sstream>
#include <random>
#include <chrono>
#include <fstream>
#include "docopt.h"
#include "graph.h"
#include "hash.h"
#include "io.h"
#include "param.h"
using namespace std;
static const char USAGE[] =
R"(LEADS (Training phase).
Usage:
LEADS --edges=<edge file>
--train=<train graphs file>
--dataset=<dataset>
--M=<number of branches>
--prototypes=<prototype branchs output>
--graph_vectors=<train graph vectors output>
LEADS (-h | --help)
Options:
-h, --help Show this screen.
--edges=<edge file> Incoming stream of edges.
--train=<train graphs file> Train graphs id's
--dataset=<dataset> 'ALL', 'YDC', 'GFC', 'YDG','AUTH,.
--M=<number of branches> Number of prototype branches
--prototypes=<prototype branchs output> Output file of prototype branches
--graph_vectors=<train graph vectors output> Output file of train graph vectors
)";
long M;
int main(int argc, char *argv[]) {
// arguments
map<string, docopt::value> args = docopt::docopt(USAGE, {argv + 1, argv + argc});
string edge_file(args["--edges"].asString());
string train_gids_file(args["--train"].asString());
string prototypes_output(args["--prototypes"].asString());
string graph_vectors_output(args["--graph_vectors"].asString());
M = args["--M"].asLong();
string dataset(args["--dataset"].asString());
if (!(dataset.compare("ALL") == 0 ||
dataset.compare("AUTH")== 0 ||
dataset.compare("YDC") == 0 ||
dataset.compare("YDG") == 0 ||
dataset.compare("GFC") == 0)) {
cout << "Invalid dataset: " << dataset << ". ";
exit(-1);
}
uint32_t num_graphs;
vector<uint32_t> train_gids;
vector<edge> train_edges;
// reading training Gids
cerr << "Reading training gids..." << endl;
tie(num_graphs, train_gids) = read_train_gids(train_gids_file);
cerr << "Reading ALL edges..." << endl;
tie(num_graphs, train_edges) = read_edges(edge_file, train_gids);
// per-graph data structures
unordered_map<uint32_t, graph_vector> graphs_vectors; // key = gid , value = the graph vector
// initialization of graphs vectors
for (int i = 0; i < num_graphs; i++) {
for (int j = 0; j < M; j++) graphs_vectors[i].push_back(0);
}
unordered_map<uint32_t, unordered_map<uint32_t, Branch>> train_graphs_to_branches; /* key 1 = gid
key 2 = branch id
value = branch*/
// construct training graphs
vector<graph> graphs(num_graphs);
cerr << "Constructing " << num_graphs << "train graphs..." << endl;
for (auto &e : train_edges) {
update_graphs(e, graphs);
}
cerr << "End of train graphs instantiation" << endl;
cerr << "Decomposition of " << num_graphs << "train graph to branches..." << endl;
// decompose the training graphs to vectors of branches
train_graphs_to_branches = graph_to_branches(graphs);// key = gid , value = map of the branches
cerr << "End of the decomposition " << endl;
cerr << "The selection of prototype branches" << endl;
vector<Branch> prototype_branches;
vector<Branch> median_branches;
tie (prototype_branches,median_branches) = construct_prototype_branches(train_graphs_to_branches, train_gids,dataset);
cout << prototype_branches.size() << endl;
// saving prototype and median branches to files
branches_to_file(prototype_branches,prototypes_output);
cerr << "Transform training graphs to vectors" << endl;
vector<double> train_graphs_sizes; // index = gid , value = the size of the graph
unordered_map<uint32_t, double> graphs_sizes; // key = gid ; value = the size of the graph
for (auto g : graphs) {
uint32_t size = 0;
for (auto &r : g) {
size += r.second.size();
}
train_graphs_sizes.push_back(size);
}
vector<graph>().swap(graphs);
cerr << "Converting test graphs to vectors" << endl;
for (auto &g : train_graphs_to_branches) {
if (find(train_gids.begin(), train_gids.end(), g.first) != train_gids.end()) { // is a train graph
for (int i = 0; i < prototype_branches.size(); i++) {
graphs_vectors[g.first].push_back(0);
}
cout << g.first << endl;
for (auto &br : g.second) {
double w = (br.second.d_in + br.second.d_out) /
(2 * train_graphs_sizes.at(g.first)); // the weight of the branch br
int k = 0;
for (auto &brp:prototype_branches) {
double bed = compute_branch_edit_distance(br.second, brp);
graphs_vectors[g.first].at(k) += w * (1 - bed);
k++;
}
}
}
}
/// saving the train graph vectors to file
ofstream out_file;
out_file.open(graph_vectors_output);
for (auto &gid:train_gids) {
cout << gid << "\t";
out_file << gid << "\t";
for (int i = 0; i < (M-1); i++) {
out_file << graphs_vectors[gid].at(i) <<"\t";
cout << graphs_vectors[gid].at(i) << "\t";
}
out_file << graphs_vectors[gid].at(M-1) << endl;
cout << graphs_vectors[gid].at(M-1) << endl;
}
return 0;
}
\ No newline at end of file
#ifndef NAADSG_PARAM_H_
#define NAADSG_PARAM_H_
#ifdef DEBUG
#define NDEBUG 0
#endif
extern long M;
//#define M 25
#define SEED 23
#define INF 5000000
#endif
#ifndef NAADSG_UTIL_H_
#define NAADSG_UTIL_H_
#include <string>
#include <iostream>
namespace std {
inline void panic(string message) {
cout << message << endl;
exit(-1);
}
}
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment