Skip to content
Snippets Groups Projects
Commit fef12f23 authored by Tetiana Yemelianenko's avatar Tetiana Yemelianenko
Browse files

Replace create_owl_dataset.py

parent 1091bce0
No related branches found
No related tags found
No related merge requests found
......@@ -9,20 +9,19 @@ import torch
import numpy as np
from transformers.utils.constants import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD
TOP_COUNT = 30
TOP_COUNT = 10
feature_dim = 512
parent_dir = 'path_to_the_main_dir'
parent_dir = '/home/tetiana/owl/'
#path to the directory with images annotated on image level
parentpath = os.path.join(parent_dir, 'dataset/')
#print(parentpath)
parentpath = os.path.join(parent_dir, 'test/')
#directory in which we save selected images
owlpath = os.path.join(parent_dir, 'owl_dataset')
#directory in wich we save selected images
owlpath = os.path.join(parent_dir, 't_dataset')
ext = '.jpg' #"Image file extension [.jpg or .png]"
#path to the directory with non-annotated data
base_dir = 'path_to_the_non_annotated_dataset'
base_dir = '/home/tetiana/ty/projects/Archive_/'
if not os.path.isdir(owlpath):
os.mkdir(owlpath)
......@@ -39,11 +38,11 @@ processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16-ensemble")
model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble").to(device)
#path to the csv file with saved TOP 50 regions of interests with their coordinates pre-calculated for the each painting from WikiArt dataset
df = pd.read_csv("path_to_the_objectnesses_fle/objectness_wikiart.csv")
df = pd.read_csv("/home/tetiana/ty/projects/Archive_/objectness_wikiart_final_all_new.csv")
#path to the ANNOY index file for WikiArt dataset with information about similarity of the objects in the images
t = AnnoyIndex(feature_dim, metric='angular')
t.load('path_to_the_annoy_index/annoy_wikiart.ann')
t.load('/home/tetiana/ty/projects/Archive_/annoy_new.ann')
#receive similar images using ANNOY
......@@ -152,10 +151,10 @@ def center_to_corners_format(box):
bboxes_corners = [x1, y1, x2, y2]
return bboxes_corners
def rescale_owl(raw_image, box):
def rescale_owl(box, width, height):
# rescale coordinates
img_h = raw_image.height
img_w = raw_image.width
img_h = height
img_w = width
width_ratio = 1
height_ratio = 1
......@@ -180,6 +179,103 @@ def create_new_name(counter):
return fname
def to_yolo_format(box, width, height):
"""
Parameters
----------
box: bounding box to convert
wifth, height of the image
----------
function convert from OWL to YOLO format
"""
box = center_to_corners_format(box)
box = rescale_owl(box, width, height)
x = (box[0] + box[2]) / 2 / width
y = (box[1] + box[3]) / 2 / height
w = (box[2] - box[0]) / width
h = (box[3] - box[1]) / height
return x, y, w, h
def find_for_label(label, folder, imgpaths, threshold=0.4):
"""
Parameters
----------
label: current label for annotation
folder: current folder with images annotated on image level
imgpaths: list with paths to the images
threshold: current threshold
----------
function for creating annotations using files from folder annotated on image level
"""
annot = []
count = 0
iter = 0
for file in imgpaths:
iter += 1
print(iter)
try:
boxes, query_embeddings = detectobject(file, label, threshold)
except:
print(file)
continue
#if current type of object wasn't detected with OWL, we skip this image
if len(boxes) == 0:
continue
#for all found objects we use their embeddings to find similar objects in WikiArt dataset using ANNOY
for j in range(len(query_embeddings)):
#search objects similar to found embeddings using ANNOY
similar_img_ids, distances = get_similar_images_annoy(query_embeddings[j])
df_selected = df.iloc[similar_img_ids]
#path to the image
similar_images = list(df_selected['file_path'])
#coordinates of the bounding box for the object
cxs = list(df_selected['cx'])
cys = list(df_selected['cy'])
ws = list(df_selected['w'])
hs = list(df_selected['h'])
for k in range(TOP_COUNT):
found_file = os.path.join(base_dir, similar_images[k])
raw_image = Image.open(found_file)
width, height = raw_image.size
raw_image.close()
try:
found_boxes, _ = detectobject(found_file, label, threshold)
except:
print(found_file)
if len(found_boxes) == 0:
continue
box = center_to_corners_format([cxs[k], cys[k], ws[k], hs[k]])
max_iou = 0
for p in range(len(found_boxes)):
iou = calculate_iou(box, center_to_corners_format(found_boxes[p]))
if iou > max_iou:
max_iou = iou
if max_iou > 0.8:
#save found boundary boxes in YOLO format
x,y,w,h = to_yolo_format([cxs[k], cys[k], ws[k], hs[k]], width, height)
annot.append([found_file, folder, x, y, w, h])
count += 1
#save boxes for the request image too
raw_image = Image.open(file)
width, height = raw_image.size
raw_image.close()
x,y,w,h = to_yolo_format(boxes[j], width, height)
annot.append([file, folder, x, y, w, h])
count += 1
return annot, count
#creation of annotations
def owl_annoy_annotation(labels, folders):
"""
......@@ -199,78 +295,25 @@ def owl_annoy_annotation(labels, folders):
imgpaths = glob(parentpath + folders[i] + "/*" + ext)
label = labels[i]
folder = folders[i]
print(label)
iter = 0
#for all images of the current label
for file in imgpaths:
iter += 1
print(iter)
try:
boxes, query_embeddings = detectobject(file, label, 0.4)
except:
print(file)
continue
#if current type of object wasn't detected with OWL, we skip this image
if len(boxes) == 0:
continue
#for all found objects we use their embeddings to find similar objects in WikiArt dataset using ANNOY
for j in range(len(query_embeddings)):
#search objects similar to found embeddings using ANNOY
similar_img_ids, distances = get_similar_images_annoy(query_embeddings[j])
df_selected = df.iloc[similar_img_ids]
#path to the image
similar_images = list(df_selected['file_path'])
#coordinates of the bounding box for the object
cxs = list(df_selected['cx'])
cys = list(df_selected['cy'])
ws = list(df_selected['w'])
hs = list(df_selected['h'])
for k in range(TOP_COUNT):
found_file = os.path.join(base_dir, similar_images[k])
raw_image = Image.open(found_file)
width, height = raw_image.size
raw_image.close()
try:
found_boxes, _ = detectobject(found_file, label, 0.4)
except:
print(found_file)
if len(found_boxes) == 0:
continue
box = center_to_corners_format([cxs[k], cys[k], ws[k], hs[k]])
max_iou = 0
for p in range(len(found_boxes)):
iou = calculate_iou(box, center_to_corners_format(found_boxes[p]))
if iou > max_iou:
max_iou = iou
if max_iou > 0.8:
#save found boundary boxes in YOLO format
box = center_to_corners_format([cxs[k], cys[k], ws[k], hs[k]])
box = rescale_owl(raw_image, box)
x = (box[0] + box[2]) / 2 / width
y = (box[1] + box[3]) / 2 / height
w = (box[2] - box[0]) / width
h = (box[3] - box[1]) / raw_image.height
annotations.append([found_file, folder, x, y, w, h])
raw_image.close()
#save boxes for the request image too
raw_image = Image.open(file)
width, height = raw_image.size
box = center_to_corners_format(boxes[j])
box = rescale_owl(raw_image, box)
x = (box[0] + box[2]) / 2 / width
y = (box[1] + box[3]) / 2 / height
w = (box[2] - box[0]) / width
h = (box[3] - box[1]) / raw_image.height
raw_image.close()
annotations.append([file, folder, x, y, w, h])
print('Current label: ' + label)
# request_count = round(len(imgpaths)*1.2, 0)
request_count = 45
selected_threshold = 0.4
not_enough_count = True
while not_enough_count:
print('next round')
cur_annot, found_count = find_for_label(label, folder, imgpaths, selected_threshold)
print('found on this round: ' + str(found_count))
if found_count < request_count:
selected_threshold -= 0.1
if selected_threshold < 0.2:
not_enough_count = False
else:
not_enough_count = False
annotations += cur_annot
#get rid of from the duplicates and sort by file name the final list
annotations = [list(x) for x in set(tuple(row) for row in annotations)]
......@@ -309,11 +352,13 @@ def owl_annoy_annotation(labels, folders):
#list of labels
labels = ['an apple', 'a banana', 'a butterfly', 'a boat', 'a cat', 'a cow', 'a crucifixion', 'a deer', 'a dog', 'a white dove', 'an eagle', 'a horse', 'a monkey', >
'a nude', 'a rooster', 'a serpent', 'a skull', 'a sheep', 'a swan', 'a trumpet']
#labels = ['an apple', 'a banana', 'a butterfly', 'a boat', 'a cat', 'a cow', 'a crucifixion', 'a deer', 'a dog', 'a white dove', 'an eagle', 'a horse', 'a monkey', >
# 'a nude', 'a rooster', 'a serpent', 'a skull', 'a sheep', 'a swan', 'a trumpet']
labels = ['a dog', 'a swan']
#name of folders with previously collected request images annotated on image level
folders = ['apple', 'banana', 'butterfly', 'boat', 'cat', 'cow', 'crucifixion', 'deer', 'dog', 'dove', 'eagle', 'horse', 'monkey', 'orange',
'nude', 'rooster', 'serpent', 'skull', 'sheep', 'swan', 'trumpet']
#folders = ['apple', 'banana', 'butterfly', 'boat', 'cat', 'cow', 'crucifixion', 'deer', 'dog', 'dove', 'eagle', 'horse', 'monkey', 'orange',
# 'nude', 'rooster', 'serpent', 'skull', 'sheep', 'swan', 'trumpet']
folders = ['dog', 'swan']
owl_annoy_annotation(labels, folders)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment