diff --git a/GEODE/Visualisation/LexicalSimilarities.py b/GEODE/Visualisation/LexicalSimilarities.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed7ba848fdfc016bc0821144a779e6c6542203bc
--- /dev/null
+++ b/GEODE/Visualisation/LexicalSimilarities.py
@@ -0,0 +1,58 @@
+import argparse
+from GEODE.Classification.NGrams import loadFrequencies
+from GEODE.Store import JSON
+from GEODE.Visualisation.Label import add_labels_argument, getLabels
+import math
+
+def keysIntersection(d1, d2):
+    return len(set(d1).intersection(d2)) / len(d1)
+
+def scalarProduct(d1, d2):
+    return sum([d1[k] * d2[k] for k in set(d1.keys()).intersection(d2)])
+
+def norm(d):
+    return math.sqrt(scalarProduct(d, d))
+
+def colinearity(d1, d2):
+    return scalarProduct(d1, d2) / (norm(d1) * norm(d2))
+
+metrics = {f.__name__: f for f in [colinearity, keysIntersection]}
+
+def getLexicalSimilarities(vectors, metric):
+    m = []
+    dimension = len(vectors)
+    for a in range(0, dimension):
+        m.append(dimension * [None])
+        for b in range(0, dimension):
+            m[a][b] = metric(vectors[a], vectors[b])
+    return m
+
+def extractLexicalSimilarities(inputDir, outputJSON, labels, metric, top=None):
+    vectors = [loadFrequencies(f"{inputDir}/{domain}.tsv", top=top)
+               for domain in labels]
+    matrix = getLexicalSimilarities(vectors, metrics[metric])
+    JSON.save({'matrix': matrix, 'labels': labels}, outputJSON)
+
+def getArgs(arguments):
+    description = "Extract a similarity matrix from n-grams features"
+    cli = argparse.ArgumentParser(prog='similarityMatrix',
+                                  description=description)
+    cli.add_argument('inputDir', help="path containing the n-grams for each class")
+    cli.add_argument('outputJSON')
+    add_labels_argument(cli)
+    cli.add_argument(
+            '-m', '--metric',
+            help="metric used to compare the classes (one of: {names})".format(
+                names=metrics.keys()))
+    cli.add_argument('-t', '--top', type=int,
+                     help="number of top elements compared from each class")
+    return cli.parse_args(arguments)
+
+def extractLexicalSimilaritiesCLI(arguments):
+    args = getArgs(arguments)
+    labels = getLabels(args)
+    extractLexicalSimilarities(args.inputDir,
+                               args.outputJSON,
+                               labels,
+                               args.metric,
+                               top=args.top)
diff --git a/GEODE/Visualisation/__init__.py b/GEODE/Visualisation/__init__.py
index 3cc602b36c656a14eadf6cdd1efdccd12b4e90bb..7b23498ce2752eea41649da4d1fdee473314582d 100644
--- a/GEODE/Visualisation/__init__.py
+++ b/GEODE/Visualisation/__init__.py
@@ -5,3 +5,4 @@ from GEODE.Visualisation.DensityProfile import densityProfile, \
 from GEODE.Visualisation.DrawMatrix import drawMatrix
 from GEODE.Visualisation.Graph import drawGraph
 from GEODE.Visualisation.Legend import trim as legend
+from GEODE.Visualisation.LexicalSimilarities import extractLexicalSimilarities
diff --git a/GEODE/__init__.py b/GEODE/__init__.py
index a3fd4123c92aef7df0134d17b3f8fca292077863..53acc070e2cf37a3069cec57427652ce34c51670 100644
--- a/GEODE/__init__.py
+++ b/GEODE/__init__.py
@@ -29,12 +29,14 @@ from GEODE.Visualisation.ConfusionMatrix import extractConfusionMatrixCLI
 from GEODE.Visualisation.DensityProfile import drawDensityProfileCLI
 from GEODE.Visualisation.DrawMatrix import drawMatrixCLI
 from GEODE.Visualisation.Graph import drawGraphCLI
+from GEODE.Visualisation.LexicalSimilarities import extractLexicalSimilaritiesCLI
 
 commands = {
         'confusionMatrix': extractConfusionMatrixCLI,
         'densityProfile': drawDensityProfileCLI,
         'drawMatrix': drawMatrixCLI,
-        'graph': drawGraphCLI
+        'graph': drawGraphCLI,
+        'lexicalSimilarities': extractLexicalSimilaritiesCLI
         }
 
 def geopyckCLI():