Skip to content
Snippets Groups Projects
Commit abdb63a0 authored by Tetiana Yemelianenko's avatar Tetiana Yemelianenko
Browse files

Upload New File

parent b34de20d
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id:f8b76e74 tags:
``` python
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from annoy import AnnoyIndex
import random
base_dir = 'path_to_the_dataset'
top_count = 100
top_K = 11
```
%% Cell type:code id:43c9c917 tags:
``` python
#the 1st column is index, the second is path to the paintings, rest columns are pre-calculated embeddings
df_artist = pd.read_csv('dino_features_wikiart_artist.csv')
df_genre = pd.read_csv('dino_features_wikiart_genre.csv')
df_style = pd.read_csv('dino_features_wikiart_style.csv')
```
%% Cell type:code id:be5120fe tags:
``` python
def build_index(df, filename):
NUMBER_OF_TREES = 25
feature_dim = len(df.columns) - 2
t = AnnoyIndex(feature_dim, metric='euclidean')
for i in range(len(df)):
vector = df.loc[i][2:]
t.add_item(i, vector)
_ = t.build(NUMBER_OF_TREES)
#save indexes
t.save(base_dir + filename)
return t
```
%% Cell type:code id:afa13d41 tags:
``` python
t_genre = build_index(df_genre, 'dino_genre.ann')
t_style = build_index(df_style, 'dino_style.ann')
t_artist = build_index(df_artist, 'dino_artist.ann')
```
%% Cell type:code id:feff4c2f tags:
``` python
#download saved indexes
feature_dim = len(df_genre.columns) - 2
t_genre = AnnoyIndex(feature_dim, metric='euclidean')
t_genre.load('dino_genre.ann')
t_style = AnnoyIndex(feature_dim, metric='euclidean')
t_style.load('dino_style.ann')
t_artist = AnnoyIndex(feature_dim, metric='euclidean')
t_artist.load('dino_artist.ann')
```
%% Cell type:code id:ee4c29ab tags:
``` python
#receive similar images using ANNOY
def get_similar_images_annoy(vector, t):
indices, dists = t.get_nns_by_vector(vector, top_count+1, include_distances=True)
return indices, dists
```
%% Cell type:code id:1b10c4d0 tags:
``` python
def show_similar(img_str, img_list, k, distances):
img = Image.open(base_dir + img_str)
plt.title('Request painting')
plt.axis("off")
plt.imshow(img)
plt.figure(figsize=(30, 30))
ax = plt.subplot(1, k, 1)
i = 0
for key in img_list:
ax = plt.subplot(1, k, i+1)
img = Image.open(base_dir + key)
plt.title("{}".format(round(distances[i], 4)))
plt.imshow(img)
i+=1
plt.axis("off")
plt.show()
```
%% Cell type:code id:29ccae77 tags:
``` python
#request image
img_str = 'Impressionism/pierre-auguste-renoir_children-on-the-seashore-1883.jpg'
#pre-calculated embeddings of the request painting
img_req_g = np.array(df_genre[df_genre['file_path']==img_str])[0][2:]
img_req_s = np.array(df_style[df_style['file_path']==img_str])[0][2:]
img_req_a = np.array(df_artist[df_artist['file_path']==img_str])[0][2:]
```
%% Cell type:code id:9c2c37c1 tags:
``` python
similar_images_g = []
similar_images_s = []
similar_images_a = []
similar_img_ids_g, distances_g = get_similar_images_annoy(img_req_g, t_genre)
similar_images_g = list(df_genre.iloc[similar_img_ids_g]['file_path'])
similar_img_ids_s, distances_s = get_similar_images_annoy(img_req_s, t_style)
similar_images_s = list(df_style.iloc[similar_img_ids_s]['file_path'])
similar_img_ids_a, distances_a = get_similar_images_annoy(img_req_a, t_artist)
similar_images_a = list(df_artist.iloc[similar_img_ids_a]['file_path'])
```
%% Cell type:code id:eac7636a tags:
``` python
#pre-calculated values of mean and std of eucledian distances on WikiArt dataset
mean_genre = 19.6129883775738
std_genre = 2.9717604654152274
mean_style = 18.328733753985198
std_style = 2.5707980486965547
mean_artist = 20.973407871326454
std_artist = 2.9063593849149183
```
%% Cell type:code id:08e0941d tags:
``` python
# Calculating cosine distance
#request painting belongs to the dataset, so we ignore the first similar painting
df_genre_sim = df_genre[df_genre['file_path'].isin(similar_images_g)]
cosines_genre = distances_g[1:]
df_style_sim = df_style[df_style['file_path'].isin(similar_images_s)]
cosines_style = distances_s[1:]
df_artist_sim = df_artist[df_artist['file_path'].isin(similar_images_a)]
cosines_artist = distances_a[1:]
```
%% Cell type:code id:b48542d3 tags:
``` python
d = {'file_path': list(df_genre_sim['file_path'].iloc[1:]), 'd_g': cosines_genre}
df_g = pd.DataFrame(data=d)
d = {'file_path': list(df_style_sim['file_path'].iloc[1:]), 'd_s': cosines_style}
df_s = pd.DataFrame(data=d)
d = {'file_path': list(df_artist_sim['file_path'].iloc[1:]), 'd_a': cosines_artist}
df_a = pd.DataFrame(data=d)
df_u = pd.merge(df_g, df_s, on='file_path', how='outer')
df_u = pd.merge(df_u, df_a, on='file_path', how='outer')
```
%% Cell type:code id:92bb15c8 tags:
``` python
def func(x):
missing_indices = x.index[x.isnull()]
for index in missing_indices:
if x.name == 'd_g':
img_sim = df_u.loc[index, 'file_path']
eucl_distance = np.linalg.norm(img_req_g - df_genre[df_genre['file_path'] == img_sim].iloc[0, 2:])
x.loc[index] = eucl_distance
elif x.name == 'd_s':
img_sim = df_u.loc[index, 'file_path']
eucl_distance = np.linalg.norm(img_req_s - df_style[df_style['file_path'] == img_sim].iloc[0, 2:])
x.loc[index] = eucl_distance
elif x.name == 'd_a':
img_sim = df_u.loc[index, 'file_path']
eucl_distance = np.linalg.norm(img_req_a - df_artist[df_artist['file_path'] == img_sim].iloc[0, 2:])
x.loc[index] = eucl_distance
return x
df_u = df_u.apply(func, axis=0)
```
%% Cell type:code id:7e464ba0 tags:
``` python
df_u['d_g'] = (df_u['d_g'] - mean_genre)/std_genre
df_u['d_s'] = (df_u['d_s'] - mean_style)/std_style
df_u['d_a'] = (df_u['d_a'] - mean_style)/std_style
```
%% Cell type:code id:5ea5a057 tags:
``` python
#combined recommendations with the same weights
df_u['combined'] = df_u['d_g'] + df_u['d_s'] + df_u['d_a']
```
%% Cell type:code id:b9e0d83f tags:
``` python
paintings = list(df_u.sort_values(by=['combined'])['file_path'])[:top_K]
dist = list(df_u.sort_values(by=['combined'])['combined'])[:top_K]
show_similar(img_str, paintings, top_K, dist)
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment