Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
ECCV AI4DH 2024
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ANR AAA
ECCV AI4DH 2024
Commits
1091bce0
Commit
1091bce0
authored
10 months ago
by
Tetiana Yemelianenko
Browse files
Options
Downloads
Patches
Plain Diff
Upload script for the dataset creation
parent
69e3b54c
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
create_owl_dataset.py
+319
-0
319 additions, 0 deletions
create_owl_dataset.py
with
319 additions
and
0 deletions
create_owl_dataset.py
0 → 100644
+
319
−
0
View file @
1091bce0
import
os
from
PIL
import
Image
from
glob
import
glob
import
pandas
as
pd
import
shutil
from
annoy
import
AnnoyIndex
from
transformers
import
Owlv2Processor
,
Owlv2ForObjectDetection
import
torch
import
numpy
as
np
from
transformers.utils.constants
import
OPENAI_CLIP_MEAN
,
OPENAI_CLIP_STD
TOP_COUNT
=
30
feature_dim
=
512
parent_dir
=
'
path_to_the_main_dir
'
#path to the directory with images annotated on image level
parentpath
=
os
.
path
.
join
(
parent_dir
,
'
dataset/
'
)
#print(parentpath)
#directory in which we save selected images
owlpath
=
os
.
path
.
join
(
parent_dir
,
'
owl_dataset
'
)
ext
=
'
.jpg
'
#"Image file extension [.jpg or .png]"
#path to the directory with non-annotated data
base_dir
=
'
path_to_the_non_annotated_dataset
'
if
not
os
.
path
.
isdir
(
owlpath
):
os
.
mkdir
(
owlpath
)
im_path
=
os
.
path
.
join
(
owlpath
,
'
images
'
)
if
not
os
.
path
.
isdir
(
im_path
):
os
.
mkdir
(
im_path
)
lb_path
=
os
.
path
.
join
(
owlpath
,
'
labels
'
)
if
not
os
.
path
.
isdir
(
lb_path
):
os
.
mkdir
(
lb_path
)
device
=
"
cuda:0
"
if
torch
.
cuda
.
is_available
()
else
"
cpu
"
processor
=
Owlv2Processor
.
from_pretrained
(
"
google/owlv2-base-patch16-ensemble
"
)
model
=
Owlv2ForObjectDetection
.
from_pretrained
(
"
google/owlv2-base-patch16-ensemble
"
).
to
(
device
)
#path to the csv file with saved TOP 50 regions of interests with their coordinates pre-calculated for the each painting from WikiArt dataset
df
=
pd
.
read_csv
(
"
path_to_the_objectnesses_fle/objectness_wikiart.csv
"
)
#path to the ANNOY index file for WikiArt dataset with information about similarity of the objects in the images
t
=
AnnoyIndex
(
feature_dim
,
metric
=
'
angular
'
)
t
.
load
(
'
path_to_the_annoy_index/annoy_wikiart.ann
'
)
#receive similar images using ANNOY
def
get_similar_images_annoy
(
base_vector
):
indices
,
dists
=
t
.
get_nns_by_vector
(
base_vector
,
TOP_COUNT
,
include_distances
=
True
)
return
indices
,
dists
def
get_preprocessed_image
(
pixel_values
):
pixel_values
=
pixel_values
.
detach
().
cpu
().
squeeze
().
numpy
()
unnormalized_image
=
(
pixel_values
*
np
.
array
(
OPENAI_CLIP_STD
)[:,
None
,
None
])
+
np
.
array
(
OPENAI_CLIP_MEAN
)[:,
None
,
None
]
unnormalized_image
=
(
unnormalized_image
*
255
).
astype
(
np
.
uint8
)
unnormalized_image
=
np
.
moveaxis
(
unnormalized_image
,
0
,
-
1
)
unnormalized_image
=
Image
.
fromarray
(
unnormalized_image
)
return
unnormalized_image
#object detection using OWL-ViT
def
detectobject
(
imgpath
,
label
,
threshold
=
0.4
):
"""
Parameters
----------
imgpath: string with the path to the image
label: string with searched object
threshold
Returns
-------
boxes: list of found bounding boxes
query_embeddings: list of embeddings of the found object
"""
raw_image
=
Image
.
open
(
imgpath
)
texts
=
[[
label
]]
inputs
=
processor
(
text
=
texts
,
images
=
raw_image
,
return_tensors
=
"
pt
"
).
to
(
device
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs
)
unnormalized_image
=
get_preprocessed_image
(
inputs
.
pixel_values
)
# Convert outputs (bounding boxes and class logits)
target_sizes
=
torch
.
Tensor
([
unnormalized_image
.
size
[::
-
1
]])
image_features
=
outputs
.
image_embeds
.
reshape
(
3600
,
768
)
source_class_embeddings
=
model
.
class_predictor
(
image_features
)[
1
]
probs
=
torch
.
max
(
outputs
.
logits
,
dim
=-
1
)
pred_boxes
=
outputs
.
pred_boxes
[
0
]
scores_
=
torch
.
sigmoid
(
probs
.
values
)[
0
]
query_embeddings
=
[]
boxes
=
[]
for
i
in
range
(
len
(
scores_
)):
if
scores_
[
i
]
>
threshold
:
query_embeddings
.
append
(
source_class_embeddings
[
i
])
boxes
.
append
(
pred_boxes
[
i
].
detach
().
cpu
().
numpy
())
raw_image
.
close
()
return
boxes
,
query_embeddings
#Calculate the Intersection over Union (IoU) of two bounding boxes
def
calculate_iou
(
box1
,
box2
):
"""
Parameters
----------
box1 : list, tuple or array-like
The (x1, y1, x2, y2) coordinates of the first bounding box.
box2 : list, tuple or array-like
The (x1, y1, x2, y2) coordinates of the second bounding box.
Returns
-------
float
The IoU of box1 and box2.
"""
x1_1
,
y1_1
,
x2_1
,
y2_1
=
box1
x1_2
,
y1_2
,
x2_2
,
y2_2
=
box2
# Calculate the coordinates of the intersection rectangle
xi1
=
max
(
x1_1
,
x1_2
)
yi1
=
max
(
y1_1
,
y1_2
)
xi2
=
min
(
x2_1
,
x2_2
)
yi2
=
min
(
y2_1
,
y2_2
)
# Calculate the area of the intersection rectangle
inter_width
=
max
(
xi2
-
xi1
,
0
)
inter_height
=
max
(
yi2
-
yi1
,
0
)
inter_area
=
inter_width
*
inter_height
# Calculate the area of both bounding boxes
box1_area
=
(
x2_1
-
x1_1
)
*
(
y2_1
-
y1_1
)
box2_area
=
(
x2_2
-
x1_2
)
*
(
y2_2
-
y1_2
)
# Calculate the area of the union
union_area
=
box1_area
+
box2_area
-
inter_area
# Calculate the IoU
iou
=
inter_area
/
union_area
if
union_area
!=
0
else
0
return
iou
#for request images
def
center_to_corners_format
(
box
):
center_x
,
center_y
,
width
,
height
=
box
x1
=
max
(
center_x
-
0.5
*
width
,
0
)
y1
=
max
(
center_y
-
0.5
*
height
,
0
)
x2
=
min
(
center_x
+
0.5
*
width
,
1
)
y2
=
min
(
center_y
+
0.5
*
height
,
1
)
bboxes_corners
=
[
x1
,
y1
,
x2
,
y2
]
return
bboxes_corners
def
rescale_owl
(
raw_image
,
box
):
# rescale coordinates
img_h
=
raw_image
.
height
img_w
=
raw_image
.
width
width_ratio
=
1
height_ratio
=
1
if
img_w
<
img_h
:
width_ratio
=
img_w
/
img_h
elif
img_h
<
img_w
:
height_ratio
=
img_h
/
img_w
img_w
=
img_w
/
width_ratio
img_h
=
img_h
/
height_ratio
scale_fct
=
[
img_w
,
img_h
,
img_w
,
img_h
]
box
=
np
.
array
(
box
)
*
np
.
array
(
scale_fct
)
return
box
def
create_new_name
(
counter
):
fname
=
"
e
"
fname
+=
"
0
"
*
(
6
-
len
(
str
(
counter
)))
fname
+=
str
(
counter
)
return
fname
#creation of annotations
def
owl_annoy_annotation
(
labels
,
folders
):
"""
Parameters
----------
labels: list of labels used fot the annotations
folders: list of folders with files pre-selected and annotated on image level
----------
function copies images and create txt files with annotations in YOLO style
"""
#all objects with their boundary boxes
annotations
=
[]
for
i
in
range
(
len
(
labels
)):
imgpaths
=
glob
(
parentpath
+
folders
[
i
]
+
"
/*
"
+
ext
)
label
=
labels
[
i
]
folder
=
folders
[
i
]
print
(
label
)
iter
=
0
#for all images of the current label
for
file
in
imgpaths
:
iter
+=
1
print
(
iter
)
try
:
boxes
,
query_embeddings
=
detectobject
(
file
,
label
,
0.4
)
except
:
print
(
file
)
continue
#if current type of object wasn't detected with OWL, we skip this image
if
len
(
boxes
)
==
0
:
continue
#for all found objects we use their embeddings to find similar objects in WikiArt dataset using ANNOY
for
j
in
range
(
len
(
query_embeddings
)):
#search objects similar to found embeddings using ANNOY
similar_img_ids
,
distances
=
get_similar_images_annoy
(
query_embeddings
[
j
])
df_selected
=
df
.
iloc
[
similar_img_ids
]
#path to the image
similar_images
=
list
(
df_selected
[
'
file_path
'
])
#coordinates of the bounding box for the object
cxs
=
list
(
df_selected
[
'
cx
'
])
cys
=
list
(
df_selected
[
'
cy
'
])
ws
=
list
(
df_selected
[
'
w
'
])
hs
=
list
(
df_selected
[
'
h
'
])
for
k
in
range
(
TOP_COUNT
):
found_file
=
os
.
path
.
join
(
base_dir
,
similar_images
[
k
])
raw_image
=
Image
.
open
(
found_file
)
width
,
height
=
raw_image
.
size
raw_image
.
close
()
try
:
found_boxes
,
_
=
detectobject
(
found_file
,
label
,
0.4
)
except
:
print
(
found_file
)
if
len
(
found_boxes
)
==
0
:
continue
box
=
center_to_corners_format
([
cxs
[
k
],
cys
[
k
],
ws
[
k
],
hs
[
k
]])
max_iou
=
0
for
p
in
range
(
len
(
found_boxes
)):
iou
=
calculate_iou
(
box
,
center_to_corners_format
(
found_boxes
[
p
]))
if
iou
>
max_iou
:
max_iou
=
iou
if
max_iou
>
0.8
:
#save found boundary boxes in YOLO format
box
=
center_to_corners_format
([
cxs
[
k
],
cys
[
k
],
ws
[
k
],
hs
[
k
]])
box
=
rescale_owl
(
raw_image
,
box
)
x
=
(
box
[
0
]
+
box
[
2
])
/
2
/
width
y
=
(
box
[
1
]
+
box
[
3
])
/
2
/
height
w
=
(
box
[
2
]
-
box
[
0
])
/
width
h
=
(
box
[
3
]
-
box
[
1
])
/
raw_image
.
height
annotations
.
append
([
found_file
,
folder
,
x
,
y
,
w
,
h
])
raw_image
.
close
()
#save boxes for the request image too
raw_image
=
Image
.
open
(
file
)
width
,
height
=
raw_image
.
size
box
=
center_to_corners_format
(
boxes
[
j
])
box
=
rescale_owl
(
raw_image
,
box
)
x
=
(
box
[
0
]
+
box
[
2
])
/
2
/
width
y
=
(
box
[
1
]
+
box
[
3
])
/
2
/
height
w
=
(
box
[
2
]
-
box
[
0
])
/
width
h
=
(
box
[
3
]
-
box
[
1
])
/
raw_image
.
height
raw_image
.
close
()
annotations
.
append
([
file
,
folder
,
x
,
y
,
w
,
h
])
#get rid of from the duplicates and sort by file name the final list
annotations
=
[
list
(
x
)
for
x
in
set
(
tuple
(
row
)
for
row
in
annotations
)]
annotations
=
sorted
(
annotations
,
key
=
lambda
l
:
l
[
0
])
#path to the file with list of classes
f
=
open
(
"
/home/tetiana/yolo/DEArt/deart_classes.txt
"
,
"
r
"
)
lines
=
f
.
readlines
()
codes
=
dict
()
count
=
0
for
line
in
lines
:
codes
[
line
.
strip
()]
=
count
count
+=
1
counter
=
0
previous_filename
=
""
#copy images and create files with annotations in YOLO style
for
annotation
in
annotations
:
filename
=
annotation
[
0
]
if
filename
!=
previous_filename
:
counter
+=
1
new_name
=
create_new_name
(
counter
)
shutil
.
copyfile
(
filename
,
os
.
path
.
join
(
owlpath
,
"
images
"
,
new_name
+
"
.jpg
"
))
if
os
.
path
.
exists
(
filename
):
with
open
(
os
.
path
.
join
(
owlpath
,
"
labels
"
,
new_name
+
"
.txt
"
),
"
w
"
)
as
f
:
f
.
write
(
"
%d %.06f %.06f %.06f %.06f
\n
"
%
(
codes
[
annotation
[
1
]],
annotation
[
2
],
annotation
[
3
],
annotation
[
4
],
annotation
[
5
]))
else
:
with
open
(
os
.
path
.
join
(
owlpath
,
"
labels
"
,
new_name
+
"
.txt
"
),
"
a
"
)
as
f
:
f
.
write
(
"
%d %.06f %.06f %.06f %.06f
\n
"
%
(
codes
[
annotation
[
1
]],
annotation
[
2
],
annotation
[
3
],
annotation
[
4
],
annotation
[
5
]))
previous_filename
=
filename
#list of labels
labels
=
[
'
an apple
'
,
'
a banana
'
,
'
a butterfly
'
,
'
a boat
'
,
'
a cat
'
,
'
a cow
'
,
'
a crucifixion
'
,
'
a deer
'
,
'
a dog
'
,
'
a white dove
'
,
'
an eagle
'
,
'
a horse
'
,
'
a monkey
'
,
>
'
a nude
'
,
'
a rooster
'
,
'
a serpent
'
,
'
a skull
'
,
'
a sheep
'
,
'
a swan
'
,
'
a trumpet
'
]
#name of folders with previously collected request images annotated on image level
folders
=
[
'
apple
'
,
'
banana
'
,
'
butterfly
'
,
'
boat
'
,
'
cat
'
,
'
cow
'
,
'
crucifixion
'
,
'
deer
'
,
'
dog
'
,
'
dove
'
,
'
eagle
'
,
'
horse
'
,
'
monkey
'
,
'
orange
'
,
'
nude
'
,
'
rooster
'
,
'
serpent
'
,
'
skull
'
,
'
sheep
'
,
'
swan
'
,
'
trumpet
'
]
owl_annoy_annotation
(
labels
,
folders
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment