diff --git a/src/Training_Phase/Clustering/constants.py b/src/Training_Phase/Clustering/constants.py
new file mode 100644
index 0000000000000000000000000000000000000000..efe7dd36a07184ea8b6b7809a0c2c07574e595fb
--- /dev/null
+++ b/src/Training_Phase/Clustering/constants.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python
+
+SEED = 30
+NUM_TRIALS = 30
+NUM_DEVS = 3.
diff --git a/src/Training_Phase/Clustering/create_seed_clusters.py b/src/Training_Phase/Clustering/create_seed_clusters.py
new file mode 100644
index 0000000000000000000000000000000000000000..2605d6be87c61aa5516d93bad4322b042324ffb2
--- /dev/null
+++ b/src/Training_Phase/Clustering/create_seed_clusters.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+
+"""
+Source:  https://github.com/sbustreamspot/sbustreamspot-train
+"""
+
+
+import argparse
+from constants import *
+import numpy as np
+import random
+from medoids import _k_medoids_spawn_once, k_medoids
+from scipy.spatial.distance import pdist, squareform
+from sklearn.metrics import silhouette_score
+
+# finding best number of clusters
+# http://stackoverflow.com/questions/15376075/cluster-analysis-in-r-determine-the-optimal-number-of-clusters/15376462#15376462
+
+random.seed(SEED)
+np.random.seed(SEED)
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--input', help='Training graph vectors',
+                    required=True)
+args = vars(parser.parse_args())
+
+input_file = args['input']
+with open(input_file, 'r') as f:
+    
+    X = [] # note: row i = graph ID i
+    G = []
+    for line in f:
+        fields = list(map(float, line.strip().split('\t')))
+        graph_id = fields[0]
+        graph_vector = fields[1:]
+        G.append(graph_id)
+        X.append(graph_vector)
+
+    X = np.array(X)
+    dists = squareform(pdist(X, metric='euclidean'))
+    def distance(a, b):
+        return dists[a][b]
+
+    best_n_clusters = -1
+    best_silhouette_avg = -1
+    best_cluster_labels = None
+    best_cluster_centers = None
+    for n_clusters in range(3,20):
+        for trial in range(NUM_TRIALS):
+            # run many trials for a given number of clusters
+            _, medoids = _k_medoids_spawn_once(points=list(range(X.shape[0])),
+                                               k=n_clusters,
+                                               distance=distance,
+                                               max_iterations=10000,
+                                               verbose=False)
+            cluster_labels = [-1] * X.shape[0]
+            size = 0
+            for medoid_idx, medoid in enumerate(medoids):
+                size +=1
+                graphs = medoid.elements
+                for graph in graphs:
+                    cluster_labels[graph] = medoid_idx 
+            cluster_labels = np.array(cluster_labels)
+            if (size >1):
+                silhouette_avg = silhouette_score(X, cluster_labels, metric='euclidean')
+            #print(str(n_clusters)+'\t'+str(silhouette_avg)+'\n')
+            #print n_clusters, trial, 'silhouette score =', silhouette_avg
+                if silhouette_avg > best_silhouette_avg or\
+                   (silhouette_avg == best_silhouette_avg and\
+                    n_clusters > best_n_clusters): # favour more clusters
+                    best_silhouette_avg = silhouette_avg
+                    best_n_clusters = n_clusters
+                    best_cluster_labels = cluster_labels
+                    best_cluster_centers = medoids
+    
+    all_cluster_dists = []
+    cluster_threshold = [-1] * best_n_clusters
+    for cluster_idx in range(best_n_clusters):
+        print(best_n_clusters)
+        cluster_center = best_cluster_centers[cluster_idx].kernel
+        cluster_graphs = best_cluster_centers[cluster_idx].elements
+
+        cluster_dists = [dists[cluster_center][graph] for graph in cluster_graphs
+                         if graph != cluster_center]
+        all_cluster_dists.extend(cluster_dists)
+        mean_dist = np.mean(cluster_dists)
+        std_dist = np.std(cluster_dists)
+
+        if len(cluster_dists) == 0: # singleton clusters, shouldnt happen
+            mean_dist = 0.0
+            std_dist = 0.0
+            all_cluster_dists.append(0.0)
+
+        cluster_threshold[cluster_idx] = mean_dist + NUM_DEVS * std_dist # P(>) <= 10%
+    mean_all_cluster_dists = np.mean(all_cluster_dists)
+    std_all_cluster_dists = np.mean(all_cluster_dists)
+    all_cluster_threshold = mean_all_cluster_dists + NUM_DEVS * std_all_cluster_dists
+
+    print((str(best_n_clusters) + '\t' + str(X.shape[0]) + '\t'), end=' ')
+    print("{:3.4f}".format(all_cluster_threshold))
+
+    for cluster_idx in range(best_n_clusters):
+        cluster_graphs = best_cluster_centers[cluster_idx].elements
+        threshold = cluster_threshold[cluster_idx]
+        print("{:3.4f}".format(threshold) + '\t','\t'.join([str(G[graph]) for graph in cluster_graphs]))
+        #print('\t'.join([str(graph) for graph in cluster_graphs]))
diff --git a/src/Training_Phase/Clustering/medoids.py b/src/Training_Phase/Clustering/medoids.py
new file mode 100644
index 0000000000000000000000000000000000000000..06441f7cdb06c2148711bf7a09376a3dd1e53307
--- /dev/null
+++ b/src/Training_Phase/Clustering/medoids.py
@@ -0,0 +1,169 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+"""
+Source: https://github.com/alexprengere/medoids
+"""
+
+
+try:
+    range = xrange
+except NameError:
+    pass
+
+import random
+from operator import itemgetter
+_MAX_ITER = int(1e3)
+
+from constants import *
+random.seed(SEED)
+
+class Medoid(object):
+    __slots__ = ['kernel', 'elements']
+
+    def __init__(self, kernel, elements=None):
+        self.kernel = kernel
+        self.elements = [] if elements is None else elements
+
+    def __repr__(self):
+        return 'Medoid({0}, {1})'.format(self.kernel, self.elements)
+
+    def __iter__(self):
+        return iter(self.elements)
+
+    def compute_kernel(self, distance):
+        return min(self, key=lambda e: sum(distance(e, other) for other in self))
+
+    def compute_diameter(self, distance):
+        return max(distance(a, b) for a in self for b in self)
+
+
+def _k_medoids_spawn_once(points, k, distance, max_iterations=_MAX_ITER, verbose=True):
+    """K-medoids algorithm with one spawn of medoid kernels.
+
+    :param points:    the list of points
+    :param k:         the number of clusters
+    :param distance:  the distance function, distance(p, q) = ||q - p||
+    :param max_iterations: the maximum number of iterations
+    :param verbose:   verbosity
+    :returns:         the partition, structured as \
+        a list of [kernel of the cluster, [elements in the cluster]]
+
+    >>> points = [1, 2, 3, 4, 5, 6, 7]
+    >>> def distance(a, b):
+    ...     return abs(b - a)
+    >>> diameter, medoids = _k_medoids_spawn_once(points, k=2, distance=distance) #doctest: +SKIP
+    * New chosen kernels: [6, 3]
+    * Iteration over after 3 steps, max diameter 3
+    """
+    if k <= 0:
+        raise ValueError('Number of medoids must be strictly positive')
+    if k > len(points):
+        raise ValueError('Number of medoids exceeds number of points')
+
+    # Medoids initialization
+    medoids = [Medoid(kernel=p) for p in random.sample(points, k)]
+    if verbose:
+        print('* New chosen kernels: {0}'.format([m.kernel for m in medoids]))
+
+    for n in range(1, 1 + max_iterations):
+        # Resetting medoids
+        for m in medoids:
+            m.elements = []
+
+        # Putting points in closest medoids
+        for p in points:
+            closest_medoid = min(medoids, key=lambda m: distance(m.kernel, p))
+            closest_medoid.elements.append(p)
+
+        # Removing empty medoids
+        medoids = [m for m in medoids if m.elements]
+        # Electing new kernels for each medoids
+        change = False
+        for m in medoids:
+            new_kernel = m.compute_kernel(distance)
+            if new_kernel != m.kernel:
+                m.kernel = new_kernel
+                change = True
+
+        if not change:
+            break
+
+    diameter = max(m.compute_diameter(distance) for m in medoids)
+    if verbose:
+        print('* Iteration over after {0} steps, max diameter {1}'.format(n, diameter))
+
+    return diameter, medoids
+
+
+def k_medoids(points, k, distance, spawn, max_iterations=_MAX_ITER, verbose=True):
+    """
+    Same as _k_medoids_spawn_once, but we iterate also the spawning process.
+    We keep the minimum of the biggest diameter as a reference for the best spawn.
+
+    :param points:    the list of points
+    :param k:         the number of clusters
+    :param distance:  the distance function, distance(p, q) = ||q - p||
+    :param spawn:     the number of spawns
+    :param max_iterations: the maximum number of iterations
+    :param verbose:   boolean, verbosity status
+    :returns:         the partition, structured as \
+        a list of [kernel of the cluster, [elements in the cluster]]
+    """
+    kw = {
+        'points': points,
+        'k': k,
+        'distance': distance,
+        'max_iterations': max_iterations,
+        'verbose': verbose,
+    }
+    # Here the result of _k_medoids_spawn_once function is a tuple containing
+    # in the second element the diameter of the biggest medoid, so the min
+    # function will return the best medoids arrangement, in the sense that the
+    # diameter max will be minimum
+    diameter, medoids = min((_k_medoids_spawn_once(**kw) for _ in range(spawn)), key=itemgetter(0))
+    if verbose:
+        print(('~~ Spawn end: min of max diameters {0:.3f} '
+               'for medoids: {1}').format(diameter, medoids))
+
+    return diameter, medoids
+
+
+def k_medoids_auto_k(points, distance, spawn, diam_max, max_iterations=_MAX_ITER, verbose=True):
+    """
+    Same as k_medoids, but we increase the number of clusters until we have a
+    good enough similarity between points.
+
+    :param points:    the list of points
+    :param diam_max:  the maximum diameter allowed, otherwise \
+        the algorithm will start over and increment the number of clusters
+    :param distance:  the distance function, distance(p, q) = ||q - p||
+    :param spawn:     the number of spawns
+    :param iteration: the maximum number of iterations
+    :param verbose:   verbosity
+    :returns:         the partition, structured as \
+        a list of [kernel of the cluster, [elements in the cluster]]
+    """
+    if not points:
+        raise ValueError('No points given!')
+
+    kw = {
+        'distance': distance,
+        'spawn': spawn,
+        'max_iterations': max_iterations,
+        'verbose': verbose,
+    }
+
+    for k, _ in enumerate(points, start=1):
+        diameter, medoids = k_medoids(points, k, **kw)
+        if diameter <= diam_max:
+            break
+        if verbose:
+            print('*** Diameter too big {0:.3f} > {1:.3f}'.format(diameter, diam_max))
+            print('*** Now trying {0} clusters\n'.format(k + 1))
+
+    if verbose:
+        print('*** Diameter ok {0:.3f} <= {1:.3f}'.format(diameter, diam_max))
+        print('*** Stopping, {0} clusters enough ({1} points initially)'.format(k, len(points)))
+
+    return diameter, medoids