Skip to content
Snippets Groups Projects
Commit 24f3fb70 authored by Jiwen Tang's avatar Jiwen Tang
Browse files

Merge branch 'det' into main

parents 42f71b6a cfb8760a
No related branches found
No related tags found
No related merge requests found
Showing
with 2784 additions and 0 deletions
from itertools import count
import os
import os.path as osp
from omegaconf import OmegaConf
import torch
import detectron2.data.transforms as T
from detectron2.config import LazyCall as L
from detectron2.data import get_detection_dataset_dicts
from detectron2.solver.build import get_default_optimizer_params
# import torch.nn as nn
from det.yolox.models import YOLOX, YOLOPAFPN, YOLOXHead
from det.yolox.data import (
# COCODataset,
TrainTransform,
ValTransform,
# YoloBatchSampler,
# DataLoader,
# InfiniteSampler,
MosaicDetection,
build_yolox_train_loader,
build_yolox_test_loader,
)
from det.yolox.data.datasets import Base_DatasetFromList
from det.yolox.utils import LRScheduler
# from detectron2.evaluation import COCOEvaluator
# from det.yolox.evaluators import COCOEvaluator
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.lr_scheduler import flat_and_anneal_lr_scheduler
# Common training-related configs that are designed for "tools/lazyconfig_train_net.py"
# You can use your own instead, together with your own train_net.py
train = dict(
# NOTE: need to copy these two lines to get correct dirs
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
seed=-1,
cudnn_deterministic=False,
cudnn_benchmark=True,
init_checkpoint="",
# init_checkpoint="pretrained_models/yolox/yolox_s.pth",
resume_from="",
# init_checkpoint="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
# max_iter=90000,
amp=dict( # options for Automatic Mixed Precision
enabled=True,
),
grad_clip=dict( # options for grad clipping
enabled=False,
clip_type="full_model", # value, norm, full_model
clip_value=1.0,
norm_type=2.0,
),
ddp=dict( # options for DistributedDataParallel
broadcast_buffers=False,
find_unused_parameters=False,
fp16_compression=False,
),
# NOTE: epoch based period
checkpointer=dict(period=1, max_to_keep=10), # options for PeriodicCheckpointer
# eval_period=5000,
eval_period=-1, # epoch based
log_period=20,
device="cuda",
# ...
basic_lr_per_img=0.01 / 64.0, # 1.5625e-4
random_size=(14, 26), # set None to disable; randomly choose a int in this range, and *32
mscale=(0.8, 1.6),
ema=True,
total_epochs=16,
warmup_epochs=5,
no_aug_epochs=2,
sync_norm_period=10, # sync norm every n epochs
# l1 loss:
# 1) if use_l1 and l1_from_sctrach: use l1 for the whole training phase
# 2) use_l1=False: no l1 at all
# 3) use_l1 and l1_from_scratch=False: just use l1 after closing mosaic (YOLOX default)
l1_from_scratch=False,
use_l1=True,
anneal_after_warmup=True,
# ...
occupy_gpu=False,
)
train = OmegaConf.create(train)
# OmegaConf.register_new_resolver(
# "mul2", lambda x: x*2
# )
# --------------------------------------------------------------------
# model
# --------------------------------------------------------------------
model = L(YOLOX)(
backbone=L(YOLOPAFPN)(
depth=1.0,
width=1.0,
in_channels=[256, 512, 1024],
),
head=L(YOLOXHead)(
num_classes=1,
width="${..backbone.width}",
# width="${mul2: ${..backbone.width}}", # NOTE: do not forget $
in_channels="${..backbone.in_channels}",
),
)
# --------------------------------------------------------------------
# optimizer
# --------------------------------------------------------------------
optimizer = L(torch.optim.SGD)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.01, # bs=64
momentum=0.9,
weight_decay=5e-4,
nesterov=True,
)
lr_config = L(flat_and_anneal_lr_scheduler)(
warmup_method="pow",
warmup_pow=2,
warmup_factor=0.0,
# to be set
# optimizer=
# total_iters=total_iters, # to be set
# warmup_iters=epoch_len * 3,
# anneal_point=5 / (total_epochs - 15),
anneal_method="cosine",
target_lr_factor=0.05,
)
DATASETS = dict(TRAIN=("",), TEST=("",))
DATASETS = OmegaConf.create(DATASETS)
dataloader = OmegaConf.create()
dataloader.train = L(build_yolox_train_loader)(
dataset=L(Base_DatasetFromList)(
split="train",
lst=L(get_detection_dataset_dicts)(names=DATASETS.TRAIN),
img_size=(640, 640),
preproc=L(TrainTransform)(
max_labels=50,
),
),
aug_wrapper=L(MosaicDetection)(
mosaic=True,
img_size="${..dataset.img_size}",
preproc=L(TrainTransform)(
max_labels=120,
),
degrees=10.0,
translate=0.1,
mosaic_scale=(0.1, 2),
mixup_scale=(0.5, 1.5),
shear=2.0,
enable_mixup=True,
mosaic_prob=1.0,
mixup_prob=1.0,
),
# reference_batch_size=64,
total_batch_size=64, # 8x8gpu
num_workers=4,
pin_memory=True,
)
val = dict(
eval_cached=False,
)
val = OmegaConf.create(val)
test = dict(
test_dataset_names=DATASETS.TEST,
test_size=(640, 640), # (height, width)
conf_thr=0.01,
nms_thr=0.65,
num_classes="${model.head.num_classes}",
amp_test=False,
half_test=True,
precise_bn=dict(
enabled=False,
num_iter=200,
),
# fuse_conv_bn=False,
fuse_conv_bn=True,
)
test = OmegaConf.create(test)
# NOTE: for multiple test loaders, just write it as a list
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
# total_batch_size=1,
total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
import os.path as osp
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
from .yolox_base import train, val, test, model, dataloader, optimizer, lr_config, DATASETS # noqa
from det.yolox.data import build_yolox_test_loader, ValTransform
from det.yolox.data.datasets import Base_DatasetFromList
from detectron2.data import get_detection_dataset_dicts
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.ranger import Ranger
train.update(
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
)
train.amp.enabled = True
model.backbone.depth = 1.33
model.backbone.width = 1.25
model.head.num_classes = 33
train.init_checkpoint = "pretrained_models/yolox/yolox_x.pth"
# datasets
DATASETS.TRAIN = ["hb_pbr_train"]
DATASETS.TEST = ["hb_test_primesense_bop19"]
dataloader.train.dataset.lst.names = DATASETS.TRAIN
dataloader.train.total_batch_size = 32
# color aug
dataloader.train.aug_wrapper.COLOR_AUG_PROB = 0.8
dataloader.train.aug_wrapper.COLOR_AUG_TYPE = "code"
dataloader.train.aug_wrapper.COLOR_AUG_CODE = (
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
# "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
)
# hsv color aug
dataloader.train.aug_wrapper.AUG_HSV_PROB = 1.0
dataloader.train.aug_wrapper.HSV_H = 0.015
dataloader.train.aug_wrapper.HSV_S = 0.7
dataloader.train.aug_wrapper.HSV_V = 0.4
dataloader.train.aug_wrapper.FORMAT = "RGB"
optimizer = L(Ranger)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.001, # bs=64
# momentum=0.9,
weight_decay=0,
# nesterov=True,
)
train.total_epochs = 30
train.no_aug_epochs = 15
train.checkpointer = dict(period=2, max_to_keep=10)
test.test_dataset_names = DATASETS.TEST
test.augment = True
test.scales = (1, 0.75, 0.83, 1.12, 1.25)
test.conf_thr = 0.001
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
total_batch_size=1,
# total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
\ No newline at end of file
import os.path as osp
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
from .yolox_base import train, val, test, model, dataloader, optimizer, lr_config, DATASETS # noqa
from det.yolox.data import build_yolox_test_loader, ValTransform
from det.yolox.data.datasets import Base_DatasetFromList
from detectron2.data import get_detection_dataset_dicts
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.ranger import Ranger
train.update(
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
)
train.amp.enabled = True
model.backbone.depth = 1.33
model.backbone.width = 1.25
model.head.num_classes = 2
train.init_checkpoint = "pretrained_models/yolox/yolox_x.pth"
# datasets
DATASETS.TRAIN = ["icbin_pbr_train"]
DATASETS.TEST = ["icbin_bop_test"]
dataloader.train.dataset.lst.names = DATASETS.TRAIN
dataloader.train.total_batch_size = 32
# color aug
dataloader.train.aug_wrapper.COLOR_AUG_PROB = 0.8
dataloader.train.aug_wrapper.COLOR_AUG_TYPE = "code"
dataloader.train.aug_wrapper.COLOR_AUG_CODE = (
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
# "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
)
# hsv color aug
dataloader.train.aug_wrapper.AUG_HSV_PROB = 1.0
dataloader.train.aug_wrapper.HSV_H = 0.015
dataloader.train.aug_wrapper.HSV_S = 0.7
dataloader.train.aug_wrapper.HSV_V = 0.4
dataloader.train.aug_wrapper.FORMAT = "RGB"
optimizer = L(Ranger)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.001, # bs=64
# momentum=0.9,
weight_decay=0,
# nesterov=True,
)
train.total_epochs = 30
train.no_aug_epochs = 15
train.checkpointer = dict(period=2, max_to_keep=10)
test.test_dataset_names = DATASETS.TEST
test.augment = True
test.scales = (1, 0.75, 0.83, 1.12, 1.25)
test.conf_thr = 0.001
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
total_batch_size=1,
# total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
\ No newline at end of file
import os.path as osp
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
from .yolox_base import train, val, test, model, dataloader, optimizer, lr_config, DATASETS # noqa
from det.yolox.data import build_yolox_test_loader, ValTransform
from det.yolox.data.datasets import Base_DatasetFromList
from detectron2.data import get_detection_dataset_dicts
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.ranger import Ranger
train.update(
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
)
train.amp.enabled = True
model.backbone.depth = 1.33
model.backbone.width = 1.25
model.head.num_classes = 28
train.init_checkpoint = "pretrained_models/yolox/yolox_x.pth"
# datasets
DATASETS.TRAIN = ["itodd_pbr_train"]
DATASETS.TEST = ["itodd_bop_test"]
dataloader.train.dataset.lst.names = DATASETS.TRAIN
dataloader.train.total_batch_size = 32
# color aug
dataloader.train.aug_wrapper.COLOR_AUG_PROB = 0.8
dataloader.train.aug_wrapper.COLOR_AUG_TYPE = "code"
dataloader.train.aug_wrapper.COLOR_AUG_CODE = (
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
# "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
)
# hsv color aug
dataloader.train.aug_wrapper.AUG_HSV_PROB = 1.0
dataloader.train.aug_wrapper.HSV_H = 0.015
dataloader.train.aug_wrapper.HSV_S = 0.7
dataloader.train.aug_wrapper.HSV_V = 0.4
dataloader.train.aug_wrapper.FORMAT = "RGB"
optimizer = L(Ranger)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.001, # bs=64
# momentum=0.9,
weight_decay=0,
# nesterov=True,
)
train.total_epochs = 30
train.no_aug_epochs = 15
train.checkpointer = dict(period=2, max_to_keep=10)
test.test_dataset_names = DATASETS.TEST
test.augment = True
test.scales = (1, 0.75, 0.83, 1.12, 1.25)
test.conf_thr = 0.001
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
total_batch_size=1,
# total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
\ No newline at end of file
import os.path as osp
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
from .yolox_base import train, val, test, model, dataloader, optimizer, lr_config, DATASETS # noqa
from det.yolox.data import build_yolox_test_loader, ValTransform
from det.yolox.data.datasets import Base_DatasetFromList
from detectron2.data import get_detection_dataset_dicts
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.ranger import Ranger
train.update(
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
)
train.amp.enabled = True
model.backbone.depth = 1.33
model.backbone.width = 1.25
model.head.num_classes = 8
train.init_checkpoint = "pretrained_models/yolox/yolox_x.pth"
# datasets
DATASETS.TRAIN = ["lmo_pbr_train"]
DATASETS.TEST = ["lmo_bop_test"]
dataloader.train.dataset.lst.names = DATASETS.TRAIN
dataloader.train.total_batch_size = 32
# color aug
dataloader.train.aug_wrapper.COLOR_AUG_PROB = 0.8
dataloader.train.aug_wrapper.COLOR_AUG_TYPE = "code"
dataloader.train.aug_wrapper.COLOR_AUG_CODE = (
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
# "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
)
# hsv color aug
dataloader.train.aug_wrapper.AUG_HSV_PROB = 1.0
dataloader.train.aug_wrapper.HSV_H = 0.015
dataloader.train.aug_wrapper.HSV_S = 0.7
dataloader.train.aug_wrapper.HSV_V = 0.4
dataloader.train.aug_wrapper.FORMAT = "RGB"
optimizer = L(Ranger)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.001, # bs=64
# momentum=0.9,
weight_decay=0,
# nesterov=True,
)
train.total_epochs = 30
train.no_aug_epochs = 15
train.checkpointer = dict(period=2, max_to_keep=10)
test.test_dataset_names = DATASETS.TEST
test.augment = True
test.scales = (1, 0.75, 0.83, 1.12, 1.25)
test.conf_thr = 0.001
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
total_batch_size=1,
# total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
\ No newline at end of file
import os.path as osp
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
from .yolox_base import train, val, test, model, dataloader, optimizer, lr_config, DATASETS # noqa
from det.yolox.data import build_yolox_test_loader, ValTransform
from det.yolox.data.datasets import Base_DatasetFromList
from detectron2.data import get_detection_dataset_dicts
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.ranger import Ranger
train.update(
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
)
train.amp.enabled = True
model.backbone.depth = 1.33
model.backbone.width = 1.25
model.head.num_classes = 30
train.init_checkpoint = "pretrained_models/yolox/yolox_x.pth"
# datasets
DATASETS.TRAIN = ["tless_pbr_train"]
DATASETS.TEST = ["tless_bop_test_primesense"]
dataloader.train.dataset.lst.names = DATASETS.TRAIN
dataloader.train.total_batch_size = 32
# color aug
dataloader.train.aug_wrapper.COLOR_AUG_PROB = 0.8
dataloader.train.aug_wrapper.COLOR_AUG_TYPE = "code"
dataloader.train.aug_wrapper.COLOR_AUG_CODE = (
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
# "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
)
# hsv color aug
dataloader.train.aug_wrapper.AUG_HSV_PROB = 1.0
dataloader.train.aug_wrapper.HSV_H = 0.015
dataloader.train.aug_wrapper.HSV_S = 0.7
dataloader.train.aug_wrapper.HSV_V = 0.4
dataloader.train.aug_wrapper.FORMAT = "RGB"
optimizer = L(Ranger)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.001, # bs=64
# momentum=0.9,
weight_decay=0,
# nesterov=True,
)
train.total_epochs = 30
train.no_aug_epochs = 15
train.checkpointer = dict(period=2, max_to_keep=10)
test.test_dataset_names = DATASETS.TEST
test.augment = True
test.scales = (1, 0.75, 0.83, 1.12, 1.25)
test.conf_thr = 0.001
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
total_batch_size=1,
# total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
\ No newline at end of file
import os.path as osp
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
from .yolox_base import train, val, test, model, dataloader, optimizer, lr_config, DATASETS # noqa
from det.yolox.data import build_yolox_test_loader, ValTransform
from det.yolox.data.datasets import Base_DatasetFromList
from detectron2.data import get_detection_dataset_dicts
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.ranger import Ranger
train.update(
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
)
train.amp.enabled = True
model.backbone.depth = 1.33
model.backbone.width = 1.25
model.head.num_classes = 30
train.init_checkpoint = "pretrained_models/yolox/yolox_x.pth"
# datasets
DATASETS.TRAIN = ["tless_pbr_train", "tless_primesense_train"]
DATASETS.TEST = ["tless_bop_test_primesense"]
dataloader.train.dataset.lst.names = DATASETS.TRAIN
dataloader.train.total_batch_size = 32
# color aug
dataloader.train.aug_wrapper.COLOR_AUG_PROB = 0.8
dataloader.train.aug_wrapper.COLOR_AUG_TYPE = "code"
dataloader.train.aug_wrapper.COLOR_AUG_CODE = (
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
# "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
)
# hsv color aug
dataloader.train.aug_wrapper.AUG_HSV_PROB = 1.0
dataloader.train.aug_wrapper.HSV_H = 0.015
dataloader.train.aug_wrapper.HSV_S = 0.7
dataloader.train.aug_wrapper.HSV_V = 0.4
dataloader.train.aug_wrapper.FORMAT = "RGB"
optimizer = L(Ranger)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.001, # bs=64
# momentum=0.9,
weight_decay=0,
# nesterov=True,
)
train.total_epochs = 30
train.no_aug_epochs = 15
train.checkpointer = dict(period=2, max_to_keep=10)
test.test_dataset_names = DATASETS.TEST
test.augment = True
test.scales = (1, 0.75, 0.83, 1.12, 1.25)
test.conf_thr = 0.001
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
total_batch_size=1,
# total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
\ No newline at end of file
import os.path as osp
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
from .yolox_base import train, val, test, model, dataloader, optimizer, lr_config, DATASETS # noqa
from det.yolox.data import build_yolox_test_loader, ValTransform
from det.yolox.data.datasets import Base_DatasetFromList
from detectron2.data import get_detection_dataset_dicts
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.ranger import Ranger
train.update(
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
)
train.amp.enabled = True
model.backbone.depth = 1.33
model.backbone.width = 1.25
model.head.num_classes = 3
train.init_checkpoint = "pretrained_models/yolox/yolox_x.pth"
# datasets
DATASETS.TRAIN = ["tudl_pbr_train"]
DATASETS.TEST = ["tudl_bop_test"]
dataloader.train.dataset.lst.names = DATASETS.TRAIN
dataloader.train.total_batch_size = 32
# color aug
dataloader.train.aug_wrapper.COLOR_AUG_PROB = 0.8
dataloader.train.aug_wrapper.COLOR_AUG_TYPE = "code"
dataloader.train.aug_wrapper.COLOR_AUG_CODE = (
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
# "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
)
# hsv color aug
dataloader.train.aug_wrapper.AUG_HSV_PROB = 1.0
dataloader.train.aug_wrapper.HSV_H = 0.015
dataloader.train.aug_wrapper.HSV_S = 0.7
dataloader.train.aug_wrapper.HSV_V = 0.4
dataloader.train.aug_wrapper.FORMAT = "RGB"
optimizer = L(Ranger)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.001, # bs=64
# momentum=0.9,
weight_decay=0,
# nesterov=True,
)
train.total_epochs = 30
train.no_aug_epochs = 15
train.checkpointer = dict(period=2, max_to_keep=10)
test.test_dataset_names = DATASETS.TEST
test.augment = True
test.scales = (1, 0.75, 0.83, 1.12, 1.25)
test.conf_thr = 0.001
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
total_batch_size=1,
# total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
\ No newline at end of file
import os.path as osp
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
from .yolox_base import train, val, test, model, dataloader, optimizer, lr_config, DATASETS # noqa
from det.yolox.data import build_yolox_test_loader, ValTransform
from det.yolox.data.datasets import Base_DatasetFromList
from detectron2.data import get_detection_dataset_dicts
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.ranger import Ranger
train.update(
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
)
train.amp.enabled = True
model.backbone.depth = 1.33
model.backbone.width = 1.25
model.head.num_classes = 3
train.init_checkpoint = "pretrained_models/yolox/yolox_x.pth"
# datasets
DATASETS.TRAIN = ["tudl_pbr_train", "tudl_train_real"]
DATASETS.TEST = ["tudl_bop_test"]
dataloader.train.dataset.lst.names = DATASETS.TRAIN
dataloader.train.total_batch_size = 32
# color aug
dataloader.train.aug_wrapper.COLOR_AUG_PROB = 0.8
dataloader.train.aug_wrapper.COLOR_AUG_TYPE = "code"
dataloader.train.aug_wrapper.COLOR_AUG_CODE = (
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
# "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
)
# hsv color aug
dataloader.train.aug_wrapper.AUG_HSV_PROB = 1.0
dataloader.train.aug_wrapper.HSV_H = 0.015
dataloader.train.aug_wrapper.HSV_S = 0.7
dataloader.train.aug_wrapper.HSV_V = 0.4
dataloader.train.aug_wrapper.FORMAT = "RGB"
optimizer = L(Ranger)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.001, # bs=64
# momentum=0.9,
weight_decay=0,
# nesterov=True,
)
train.total_epochs = 30
train.no_aug_epochs = 15
train.checkpointer = dict(period=2, max_to_keep=10)
test.test_dataset_names = DATASETS.TEST
test.augment = True
test.scales = (1, 0.75, 0.83, 1.12, 1.25)
test.conf_thr = 0.001
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
total_batch_size=1,
# total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
\ No newline at end of file
import os.path as osp
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
from .yolox_base import train, val, test, model, dataloader, optimizer, lr_config, DATASETS # noqa
from det.yolox.data import build_yolox_test_loader, ValTransform
from det.yolox.data.datasets import Base_DatasetFromList
from detectron2.data import get_detection_dataset_dicts
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.ranger import Ranger
train.update(
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
)
train.amp.enabled = True
model.backbone.depth = 1.33
model.backbone.width = 1.25
model.head.num_classes = 21
train.init_checkpoint = "pretrained_models/yolox/yolox_x.pth"
# datasets
DATASETS.TRAIN = ["ycbv_train_pbr"]
DATASETS.TEST = ["ycbv_bop_test"]
dataloader.train.dataset.lst.names = DATASETS.TRAIN
dataloader.train.total_batch_size = 32
# color aug
dataloader.train.aug_wrapper.COLOR_AUG_PROB = 0.8
dataloader.train.aug_wrapper.COLOR_AUG_TYPE = "code"
dataloader.train.aug_wrapper.COLOR_AUG_CODE = (
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
# "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
)
# hsv color aug
dataloader.train.aug_wrapper.AUG_HSV_PROB = 1.0
dataloader.train.aug_wrapper.HSV_H = 0.015
dataloader.train.aug_wrapper.HSV_S = 0.7
dataloader.train.aug_wrapper.HSV_V = 0.4
dataloader.train.aug_wrapper.FORMAT = "RGB"
optimizer = L(Ranger)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.001, # bs=64
# momentum=0.9,
weight_decay=0,
# nesterov=True,
)
train.total_epochs = 30
train.no_aug_epochs = 15
train.checkpointer = dict(period=2, max_to_keep=10)
test.test_dataset_names = DATASETS.TEST
test.augment = True
test.scales = (1, 0.75, 0.83, 1.12, 1.25)
test.conf_thr = 0.001
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
total_batch_size=1,
# total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
\ No newline at end of file
import os.path as osp
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
from .yolox_base import train, val, test, model, dataloader, optimizer, lr_config, DATASETS # noqa
from det.yolox.data import build_yolox_test_loader, ValTransform
from det.yolox.data.datasets import Base_DatasetFromList
from detectron2.data import get_detection_dataset_dicts
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.ranger import Ranger
train.update(
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
)
train.amp.enabled = True
model.backbone.depth = 1.33
model.backbone.width = 1.25
model.head.num_classes = 21
train.init_checkpoint = "pretrained_models/yolox/yolox_x.pth"
# datasets
DATASETS.TRAIN = ["ycbv_train_pbr", "ycbv_train_real"]
DATASETS.TEST = ["ycbv_bop_test"]
dataloader.train.dataset.lst.names = DATASETS.TRAIN
dataloader.train.total_batch_size = 32
# color aug
dataloader.train.aug_wrapper.COLOR_AUG_PROB = 0.8
dataloader.train.aug_wrapper.COLOR_AUG_TYPE = "code"
dataloader.train.aug_wrapper.COLOR_AUG_CODE = (
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
# "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
)
# hsv color aug
dataloader.train.aug_wrapper.AUG_HSV_PROB = 1.0
dataloader.train.aug_wrapper.HSV_H = 0.015
dataloader.train.aug_wrapper.HSV_S = 0.7
dataloader.train.aug_wrapper.HSV_V = 0.4
dataloader.train.aug_wrapper.FORMAT = "RGB"
optimizer = L(Ranger)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.001, # bs=64
# momentum=0.9,
weight_decay=0,
# nesterov=True,
)
train.total_epochs = 30
train.no_aug_epochs = 15
train.checkpointer = dict(period=2, max_to_keep=10)
test.test_dataset_names = DATASETS.TEST
test.augment = True
test.scales = (1, 0.75, 0.83, 1.12, 1.25)
test.conf_thr = 0.001
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
total_batch_size=1,
# total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
from .utils import configure_module
configure_module()
__version__ = "0.1.0"
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Copyright (c) Megvii, Inc. and its affiliates.
from .data_augment import TrainTransform, ValTransform
from .data_prefetcher import DataPrefetcher
from .dataloading import (
DataLoader,
build_yolox_train_loader,
build_yolox_batch_data_loader,
build_yolox_test_loader,
)
from .dataloading import yolox_worker_init_reset_seed as worker_init_reset_seed
from .datasets import *
from .samplers import InfiniteSampler, YoloBatchSampler
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Copyright (c) Megvii, Inc. and its affiliates.
"""Data augmentation functionality. Passed as callable transformations to
Dataset classes.
The data augmentation procedures were interpreted from @weiliu89's SSD
paper http://arxiv.org/abs/1512.02325
"""
import math
import random
import cv2
import numpy as np
from det.yolox.utils import xyxy2cxcywh
def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5, source_format="BGR"):
r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
if source_format == "RGB":
hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_RGB2HSV))
else: # default BGR
hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
dtype = img.dtype # uint8
x = np.arange(0, 256, dtype=np.int16)
lut_hue = ((x * r[0]) % 180).astype(dtype)
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
if source_format == "RGB":
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB, dst=img) # no return needed
else:
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
# Histogram equalization
# if random.random() < 0.2:
# for i in range(3):
# img[:, :, i] = cv2.equalizeHist(img[:, :, i])
# def augment_hsv(img, hgain=5, sgain=30, vgain=30):
# hsv_augs = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] # random gains
# hsv_augs *= np.random.randint(0, 2, 3) # random selection of h, s, v
# hsv_augs = hsv_augs.astype(np.int16)
# img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.int16)
# img_hsv[..., 0] = (img_hsv[..., 0] + hsv_augs[0]) % 180
# img_hsv[..., 1] = np.clip(img_hsv[..., 1] + hsv_augs[1], 0, 255)
# img_hsv[..., 2] = np.clip(img_hsv[..., 2] + hsv_augs[2], 0, 255)
# cv2.cvtColor(img_hsv.astype(img.dtype), cv2.COLOR_HSV2BGR, dst=img) # no return needed
def get_aug_params(value, center=0):
if isinstance(value, float):
return random.uniform(center - value, center + value)
elif len(value) == 2:
return random.uniform(value[0], value[1])
else:
raise ValueError(
"Affine params should be either a sequence containing two values\
or single float values. Got {}".format(
value
)
)
def get_affine_matrix(
target_size,
degrees=10,
translate=0.1,
scales=0.1,
shear=10,
):
twidth, theight = target_size
# Rotation and Scale
angle = get_aug_params(degrees)
scale = get_aug_params(scales, center=1.0)
if scale <= 0.0:
raise ValueError("Argument scale should be positive")
R = cv2.getRotationMatrix2D(angle=angle, center=(0, 0), scale=scale)
M = np.ones([2, 3])
# Shear
shear_x = math.tan(get_aug_params(shear) * math.pi / 180)
shear_y = math.tan(get_aug_params(shear) * math.pi / 180)
M[0] = R[0] + shear_y * R[1]
M[1] = R[1] + shear_x * R[0]
# Translation
translation_x = get_aug_params(translate) * twidth # x translation (pixels)
translation_y = get_aug_params(translate) * theight # y translation (pixels)
M[0, 2] = translation_x
M[1, 2] = translation_y
return M, scale
def apply_affine_to_bboxes(targets, target_size, M, scale):
num_gts = len(targets)
# warp corner points
twidth, theight = target_size
corner_points = np.ones((4 * num_gts, 3))
corner_points[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(4 * num_gts, 2) # x1y1, x2y2, x1y2, x2y1
corner_points = corner_points @ M.T # apply affine transform
corner_points = corner_points.reshape(num_gts, 8)
# create new boxes
corner_xs = corner_points[:, 0::2]
corner_ys = corner_points[:, 1::2]
new_bboxes = (
np.concatenate((corner_xs.min(1), corner_ys.min(1), corner_xs.max(1), corner_ys.max(1))).reshape(4, num_gts).T
)
# clip boxes
new_bboxes[:, 0::2] = new_bboxes[:, 0::2].clip(0, twidth)
new_bboxes[:, 1::2] = new_bboxes[:, 1::2].clip(0, theight)
targets[:, :4] = new_bboxes
return targets
def random_affine(
img,
targets=(),
target_size=(640, 640),
degrees=10,
translate=0.1,
scales=0.1,
shear=10,
):
M, scale = get_affine_matrix(target_size, degrees, translate, scales, shear)
img = cv2.warpAffine(img, M, dsize=target_size, borderValue=(114, 114, 114))
# Transform label coordinates
if len(targets) > 0:
targets = apply_affine_to_bboxes(targets, target_size, M, scale)
return img, targets
def _mirror(image, boxes, prob=0.5):
_, width, _ = image.shape
if random.random() < prob:
image = image[:, ::-1]
boxes[:, 0::2] = width - boxes[:, 2::-2]
return image, boxes
def preproc(img, input_size, swap=(2, 0, 1)):
if len(img.shape) == 3:
padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
else:
padded_img = np.ones(input_size, dtype=np.uint8) * 114
r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
resized_img = cv2.resize(
img,
(int(img.shape[1] * r), int(img.shape[0] * r)),
interpolation=cv2.INTER_LINEAR,
).astype(np.uint8)
padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
padded_img = padded_img.transpose(swap)
padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
return padded_img, r
class TrainTransform:
def __init__(self, max_labels=50, flip_prob=0.5, hsv_prob=1.0):
self.max_labels = max_labels
self.flip_prob = flip_prob
self.hsv_prob = hsv_prob
def __call__(self, image, targets, input_dim):
boxes = targets[:, :4].copy()
labels = targets[:, 4].copy()
if len(boxes) == 0:
targets = np.zeros((self.max_labels, 5), dtype=np.float32)
image, r_o = preproc(image, input_dim)
return image, targets
image_o = image.copy()
targets_o = targets.copy()
height_o, width_o, _ = image_o.shape
boxes_o = targets_o[:, :4]
labels_o = targets_o[:, 4]
# bbox_o: [xyxy] to [c_x,c_y,w,h]
boxes_o = xyxy2cxcywh(boxes_o)
if random.random() < self.hsv_prob:
augment_hsv(image)
image_t, boxes = _mirror(image, boxes, self.flip_prob)
height, width, _ = image_t.shape
image_t, r_ = preproc(image_t, input_dim)
# boxes [xyxy] 2 [cx,cy,w,h]
boxes = xyxy2cxcywh(boxes)
boxes *= r_
mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1
boxes_t = boxes[mask_b]
labels_t = labels[mask_b]
if len(boxes_t) == 0:
image_t, r_o = preproc(image_o, input_dim)
boxes_o *= r_o
boxes_t = boxes_o
labels_t = labels_o
labels_t = np.expand_dims(labels_t, 1)
targets_t = np.hstack((labels_t, boxes_t))
padded_labels = np.zeros((self.max_labels, 5))
padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[: self.max_labels]
padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)
return image_t, padded_labels
class ValTransform:
"""Defines the transformations that should be applied to test PIL image for
input into the network.
dimension -> tensorize -> color adj
Arguments:
resize (int): input dimension to SSD
rgb_means ((int,int,int)): average RGB of the dataset
(104,117,123)
swap ((int,int,int)): final order of channels
Returns:
transform (transform) : callable transform to be applied to test/val
data
"""
def __init__(self, swap=(2, 0, 1), legacy=False):
self.swap = swap
self.legacy = legacy
# assume input is cv2 img for now
def __call__(self, img, res, input_size):
img, _ = preproc(img, input_size, self.swap)
if self.legacy:
img = img[::-1, :, :].copy()
img /= 255.0
img -= np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1)
img /= np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1)
return img, np.zeros((1, 5))
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Copyright (c) Megvii, Inc. and its affiliates.
import torch
class DataPrefetcher:
"""DataPrefetcher is inspired by code of following file:
https://github.com/NVIDIA/apex/blob/master/examples/imagenet/main_amp.py
It could speedup your pytorch dataloader. For more information, please check
https://github.com/NVIDIA/apex/issues/304#issuecomment-493562789.
"""
def __init__(self, loader):
self.loader_iter = iter(loader)
self.stream = torch.cuda.Stream()
self.input_cuda = self._input_cuda_for_image
self.record_stream = DataPrefetcher._record_stream_for_image
self.preload()
def preload(self):
try:
self.next_input, self.next_target, _, _, _ = next(self.loader_iter)
except StopIteration:
self.next_input = None
self.next_target = None
return
with torch.cuda.stream(self.stream):
self.input_cuda()
self.next_target = self.next_target.cuda(non_blocking=True)
def next(self):
torch.cuda.current_stream().wait_stream(self.stream)
input = self.next_input
target = self.next_target
if input is not None:
self.record_stream(input)
if target is not None:
target.record_stream(torch.cuda.current_stream())
self.preload()
return input, target
def _input_cuda_for_image(self):
self.next_input = self.next_input.cuda(non_blocking=True)
@staticmethod
def _record_stream_for_image(input):
input.record_stream(torch.cuda.current_stream())
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Copyright (c) Megvii, Inc. and its affiliates.
import os
import random
import uuid
import numpy as np
import torch
from torch.utils.data.dataloader import DataLoader as torchDataLoader
from torch.utils.data.dataloader import default_collate
import operator
from detectron2.data.build import (
AspectRatioGroupedDataset,
worker_init_reset_seed,
trivial_batch_collator,
InferenceSampler,
)
from core.utils.my_comm import get_world_size
from .samplers import YoloBatchSampler, InfiniteSampler
# from .datasets import Base_DatasetFromList
class DataLoader(torchDataLoader):
"""Lightnet dataloader that enables on the fly resizing of the images.
See :class:`torch.utils.data.DataLoader` for more information on the arguments.
Check more on the following website:
https://gitlab.com/EAVISE/lightnet/-/blob/master/lightnet/data/_dataloading.py
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.__initialized = False
shuffle = False
batch_sampler = None
if len(args) > 5:
shuffle = args[2]
sampler = args[3]
batch_sampler = args[4]
elif len(args) > 4:
shuffle = args[2]
sampler = args[3]
if "batch_sampler" in kwargs:
batch_sampler = kwargs["batch_sampler"]
elif len(args) > 3:
shuffle = args[2]
if "sampler" in kwargs:
sampler = kwargs["sampler"]
if "batch_sampler" in kwargs:
batch_sampler = kwargs["batch_sampler"]
else:
if "shuffle" in kwargs:
shuffle = kwargs["shuffle"]
if "sampler" in kwargs:
sampler = kwargs["sampler"]
if "batch_sampler" in kwargs:
batch_sampler = kwargs["batch_sampler"]
# Use custom BatchSampler
if batch_sampler is None:
if sampler is None:
if shuffle:
sampler = torch.utils.data.sampler.RandomSampler(self.dataset)
# sampler = torch.utils.data.DistributedSampler(self.dataset)
else:
sampler = torch.utils.data.sampler.SequentialSampler(self.dataset)
batch_sampler = YoloBatchSampler(
sampler,
self.batch_size,
self.drop_last,
input_dimension=self.dataset.input_dim,
)
# batch_sampler = IterationBasedBatchSampler(batch_sampler, num_iterations =
self.batch_sampler = batch_sampler
self.__initialized = True
def close_mosaic(self):
self.batch_sampler.mosaic = False
# def list_collate(batch):
# """
# Function that collates lists or tuples together into one list (of lists/tuples).
# Use this as the collate function in a Dataloader, if you want to have a list of
# items as an output, as opposed to tensors (eg. Brambox.boxes).
# """
# items = list(zip(*batch))
# for i in range(len(items)):
# if isinstance(items[i][0], (list, tuple)):
# items[i] = list(items[i])
# else:
# items[i] = default_collate(items[i])
# return items
def build_yolox_batch_data_loader(
dataset, sampler, total_batch_size, *, aspect_ratio_grouping=False, num_workers=0, pin_memory=False
):
"""
Build a batched dataloader. The main differences from `torch.utils.data.DataLoader` are:
1. support aspect ratio grouping options
2. use no "batch collation", because this is common for detection training
Args:
dataset (torch.utils.data.Dataset): map-style PyTorch dataset. Can be indexed.
sampler (torch.utils.data.sampler.Sampler): a sampler that produces indices
total_batch_size, aspect_ratio_grouping, num_workers): see
:func:`build_detection_train_loader`.
Returns:
iterable[list]. Length of each list is the batch size of the current
GPU. Each element in the list comes from the dataset.
"""
world_size = get_world_size()
assert (
total_batch_size > 0 and total_batch_size % world_size == 0
), "Total batch size ({}) must be divisible by the number of gpus ({}).".format(total_batch_size, world_size)
batch_size = total_batch_size // world_size
if aspect_ratio_grouping:
data_loader = torch.utils.data.DataLoader(
dataset,
sampler=sampler,
num_workers=num_workers,
batch_sampler=None,
collate_fn=operator.itemgetter(0), # don't batch, but yield individual elements
worker_init_fn=worker_init_reset_seed,
) # yield individual mapped dict
return AspectRatioGroupedDataset(data_loader, batch_size)
else:
# batch_sampler = torch.utils.data.sampler.BatchSampler(
# sampler, batch_size, drop_last=True
# ) # drop_last so the batch always have the same size
if hasattr(dataset, "enable_mosaic"):
mosaic = dataset.enable_mosaic
else:
mosaic = False
batch_sampler = YoloBatchSampler(
mosaic=mosaic,
sampler=sampler,
batch_size=batch_size,
drop_last=False, # NOTE: different to d2
# input_dimension=dataset.input_dim,
)
return DataLoader(
dataset,
num_workers=num_workers,
batch_sampler=batch_sampler,
# collate_fn=trivial_batch_collator, # TODO: use this when item is changed to dict
worker_init_fn=worker_init_reset_seed,
pin_memory=pin_memory,
)
def build_yolox_train_loader(
dataset,
*,
aug_wrapper,
total_batch_size,
sampler=None,
aspect_ratio_grouping=False,
num_workers=0,
pin_memory=False,
seed=None
):
"""Build a dataloader for object detection with some default features. This
interface is experimental.
Args:
dataset (torch.utils.data.Dataset): Base_DatasetFromList
aug_wrapper (callable): MosaciDetection
total_batch_size (int): total batch size across all workers. Batching
simply puts data into a list.
sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces
indices to be applied on ``dataset``. Default to :class:`TrainingSampler`,
which coordinates an infinite random shuffle sequence across all workers.
aspect_ratio_grouping (bool): whether to group images with similar
aspect ratio for efficiency. When enabled, it requires each
element in dataset be a dict with keys "width" and "height".
num_workers (int): number of parallel data loading workers
Returns:
torch.utils.data.DataLoader:
a dataloader. Each output from it is a ``list[mapped_element]`` of length
``total_batch_size / num_workers``, where ``mapped_element`` is produced
by the ``mapper``.
"""
if aug_wrapper is not None:
# MosaicDetection (mosaic, mixup, other augs)
dataset = aug_wrapper.init_dataset(dataset)
if sampler is None:
# sampler = TrainingSampler(len(dataset))
sampler = InfiniteSampler(len(dataset), seed=0 if seed is None else seed)
assert isinstance(sampler, torch.utils.data.sampler.Sampler)
return build_yolox_batch_data_loader(
dataset,
sampler,
total_batch_size,
aspect_ratio_grouping=aspect_ratio_grouping,
num_workers=num_workers,
pin_memory=pin_memory,
)
def build_yolox_test_loader(
dataset, *, aug_wrapper=None, total_batch_size=1, sampler=None, num_workers=0, pin_memory=False
):
"""Similar to `build_detection_train_loader`, but uses a batch size of 1,
and :class:`InferenceSampler`. This sampler coordinates all workers to
produce the exact set of all samples. This interface is experimental.
Args:
dataset (list or torch.utils.data.Dataset): a list of dataset dicts,
or a map-style pytorch dataset. They can be obtained by using
:func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
aug_wrapper (callable): MosaciDetection
total_batch_size (int): total batch size across all workers. Batching
simply puts data into a list. Default test batch size is 1.
sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces
indices to be applied on ``dataset``. Default to :class:`InferenceSampler`,
which splits the dataset across all workers.
num_workers (int): number of parallel data loading workers
Returns:
DataLoader: a torch DataLoader, that loads the given detection
dataset, with test-time transformation and batching.
Examples:
::
data_loader = build_detection_test_loader(
DatasetRegistry.get("my_test"),
mapper=DatasetMapper(...))
# or, instantiate with a CfgNode:
data_loader = build_detection_test_loader(cfg, "my_test")
"""
if aug_wrapper is not None:
# MosaicDetection (mosaic, mixup, other augs)
dataset = aug_wrapper.init_dataset(dataset)
world_size = get_world_size()
batch_size = total_batch_size // world_size
if sampler is None:
sampler = InferenceSampler(len(dataset))
# Always use 1 image per worker during inference since this is the
# standard when reporting inference time in papers.
batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, batch_size, drop_last=False)
data_loader = torch.utils.data.DataLoader(
dataset,
# batch_size=batch_size,
num_workers=num_workers,
batch_sampler=batch_sampler,
# collate_fn=trivial_batch_collator,
pin_memory=pin_memory,
)
return data_loader
def yolox_worker_init_reset_seed(worker_id):
seed = uuid.uuid4().int % 2**32
random.seed(seed)
torch.set_rng_state(torch.manual_seed(seed).get_state())
np.random.seed(seed)
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Copyright (c) Megvii, Inc. and its affiliates.
from .coco import COCODataset
from .coco_classes import COCO_CLASSES
from .datasets_wrapper import ConcatDataset, Dataset, MixConcatDataset
from .mosaicdetection import MosaicDetection
from .voc import VOCDetection
from .base_data_from_list import Base_DatasetFromList
# -*- coding: utf-8 -*-
import copy
import hashlib
import logging
import os
import os.path as osp
import random
import cv2
import mmcv
import numpy as np
import pickle
from omegaconf import OmegaConf
from detectron2.data import detection_utils as utils
from detectron2.structures import BoxMode
from core.utils.data_utils import resize_short_edge, read_image_mmcv
from core.utils.augment import AugmentRGB
from core.utils.dataset_utils import flat_dataset_dicts
from lib.utils.utils import lazy_property
from .datasets_wrapper import Dataset
logger = logging.getLogger(__name__)
default_input_cfg = OmegaConf.create(
dict(
img_format="BGR",
# depth
with_depth=False,
aug_depth=False,
# bg ----------------
bg_type="VOC_table",
bg_imgs_root="datasets/VOCdevkit/VOC2012/",
num_bg_imgs=10000,
change_bg_prob=0.0, # prob to change bg of real image
bg_keep_aspect_ratio=True,
# truncation fg (randomly replace some side of fg with bg during replace_bg)
truncate_fg=False,
# color aug ---------------
color_aug_prob=0.0,
color_aug_type="AAE",
color_aug_code="",
# color normalization
pixel_mean=[0.0, 0.0, 0.0], # to [0, 1]
pixel_std=[255.0, 255.0, 255.0],
# box aug
bbox_aug_type="",
bbox_aug_scale_ratio=1.0,
bbox_aug_shift_ratio=0.0,
# box aug dzi
dzi_type="none", # uniform, truncnorm, none, roi10d
dzi_pad_scale=1.0,
dzi_scale_ratio=0.25, # wh scale
dzi_shift_ratio=0.25, # center shift
)
)
class Base_DatasetFromList(Dataset):
"""# https://github.com/facebookresearch/detectron2/blob/master/detectron2/
data/common.py Wrap a list to a torch Dataset.
It produces elements of the list as data.
"""
def __init__(
self,
split,
lst: list,
*,
cfg=default_input_cfg,
img_size=(416, 416),
preproc=None,
copy: bool = True,
serialize: bool = True,
flatten=False,
):
"""
Args:
lst (list): a list which contains elements to produce.
img_size (tuple): (h, w)
copy (bool): whether to deepcopy the element when producing it,
so that the result can be modified in place without affecting the
source in the list.
serialize (bool): whether to hold memory using serialized objects, when
enabled, data loader workers can use shared RAM from master
process instead of making a copy.
"""
super().__init__(img_size)
self.cfg = cfg
self.img_size = img_size
self.preproc = preproc
self.split = split # train | val | test
if split == "train" and cfg.color_aug_prob > 0:
self.color_augmentor = self._get_color_augmentor(aug_type=cfg.color_aug_type, aug_code=cfg.color_aug_code)
else:
self.color_augmentor = None
# --------------------------------------------------------
self._lst = flat_dataset_dicts(lst) if flatten else lst
self._copy = copy
self._serialize = serialize
def _serialize(data):
buffer = pickle.dumps(data, protocol=-1)
return np.frombuffer(buffer, dtype=np.uint8)
if self._serialize:
logger.info("Serializing {} elements to byte tensors and concatenating them all ...".format(len(self._lst)))
self._lst = [_serialize(x) for x in self._lst]
self._addr = np.asarray([len(x) for x in self._lst], dtype=np.int64)
self._addr = np.cumsum(self._addr)
self._lst = np.concatenate(self._lst)
logger.info("Serialized dataset takes {:.2f} MiB".format(len(self._lst) / 1024**2))
def __len__(self):
if self._serialize:
return len(self._addr)
else:
return len(self._lst)
def read_data(self, dataset_dict):
raise NotImplementedError("Not implemented")
def _rand_another(self, idx):
pool = [i for i in range(self.__len__()) if i != idx]
return np.random.choice(pool)
def load_anno(self, index):
# cfg = self.cfg
dataset_dict = self._get_sample_dict(index)
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
# im annos
width = dataset_dict["width"]
height = dataset_dict["height"]
# get target--------------------
if dataset_dict.get("annotations", None) != None:
annotations = dataset_dict["annotations"]
objs = []
for obj in annotations: # filter instances by area ------------------
xyxy = BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)
x1 = np.max((xyxy[0], 0))
y1 = np.max((xyxy[1], 0))
x2 = np.min((xyxy[2], width))
y2 = np.min((xyxy[3], height))
if "area" in obj:
area = obj["area"]
else:
area = (x2 - x1) * (y2 - y1)
if area > 0 and x2 >= x1 and y2 >= y1:
obj["clean_bbox"] = [x1, y1, x2, y2]
objs.append(obj)
num_objs = len(objs)
res = np.zeros((num_objs, 5))
for ix, obj in enumerate(objs):
_cls = obj["category_id"] # 0-based
res[ix, 0:4] = obj["clean_bbox"]
res[ix, 4] = _cls
r = min(self.img_size[0] / height, self.img_size[1] / width)
res[:, :4] *= r
elif self.split == "train":
raise SystemExit("Failed to load labels.")
else:
r = min(self.img_size[0] / height, self.img_size[1] / width)
res = np.zeros((1, 5))
resized_info = (int(height * r), int(width * r))
return res, resized_info
def load_resized_img(self, file_name):
img = self.load_image(file_name)
r = min(self.img_size[0] / img.shape[0], self.img_size[1] / img.shape[1])
resized_img = cv2.resize(
img,
(int(img.shape[1] * r), int(img.shape[0] * r)),
interpolation=cv2.INTER_LINEAR,
).astype(np.uint8)
return resized_img
def load_image(self, file_name):
img = read_image_mmcv(file_name, format=self.cfg.img_format) # BGR
assert img is not None
return img
def pull_item(self, index):
"""Returns the original image and target at an index for mixup.
Note: not using self.__getitem__(), as any transformations passed in
could mess up this functionality.
Argument:
index (int): index of img to show
Return:
img, target
"""
cfg = self.cfg
dataset_dict = self._get_sample_dict(index)
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
file_name = dataset_dict["file_name"]
img = self.load_resized_img(file_name)
target, resized_info = self.load_anno(index)
width = dataset_dict["width"]
height = dataset_dict["height"]
img_info = (height, width)
scene_im_id = dataset_dict.get("scene_im_id", 0)
img_id = dataset_dict["image_id"]
return img, target.copy(), scene_im_id, img_info, np.array([img_id])
@Dataset.mosaic_getitem
def __getitem__(self, index):
img, target, scene_im_id, img_info, img_id = self.pull_item(index)
if self.preproc is not None:
img, target = self.preproc(img, target, self.input_dim)
# import ipdb; ipdb.set_trace()
return img, target, scene_im_id, img_info, img_id
def _get_sample_dict(self, idx):
if self._serialize:
start_addr = 0 if idx == 0 else self._addr[idx - 1].item()
end_addr = self._addr[idx].item()
bytes = memoryview(self._lst[start_addr:end_addr])
dataset_dict = pickle.loads(bytes)
elif self._copy:
dataset_dict = copy.deepcopy(self._lst[idx])
else:
dataset_dict = self._lst[idx]
return dataset_dict
def normalize_image(self, image):
# image: CHW format
cfg = self.cfg
pixel_mean = np.array(cfg.pixel_mean).reshape(-1, 1, 1)
pixel_std = np.array(cfg.pixel_std).reshape(-1, 1, 1)
return (image - pixel_mean) / pixel_std
def aug_bbox_non_square(self, bbox_xyxy, im_H, im_W):
"""Similar to DZI, but the resulted bbox is not square, and not enlarged
Args:
cfg (ConfigDict):
bbox_xyxy (np.ndarray): (4,)
im_H (int):
im_W (int):
Returns:
augmented bbox (ndarray)
"""
cfg = self.cfg
x1, y1, x2, y2 = bbox_xyxy.copy()
cx = 0.5 * (x1 + x2)
cy = 0.5 * (y1 + y2)
bh = y2 - y1
bw = x2 - x1
if cfg.bbox_aug_type.lower() == "uniform":
# different to DZI: scale both w and h
scale_ratio = 1 + cfg.bbox_aug_scale_ratio * (2 * np.random.random_sample(2) - 1) # [1-0.25, 1+0.25]
shift_ratio = cfg.bbox_aug_shift_ratio * (2 * np.random.random_sample(2) - 1) # [-0.25, 0.25]
bbox_center = np.array([cx + bw * shift_ratio[0], cy + bh * shift_ratio[1]]) # (h/2, w/2)
new_bw = bw * scale_ratio[0]
new_bh = bh * scale_ratio[1]
x1 = min(max(bbox_center[0] - new_bw / 2, 0), im_W)
y1 = min(max(bbox_center[1] - new_bh / 2, 0), im_W)
x2 = min(max(bbox_center[0] + new_bw / 2, 0), im_W)
y2 = min(max(bbox_center[1] + new_bh / 2, 0), im_W)
bbox_auged = np.array([x1, y1, x2, y2])
elif cfg.bbox_aug_type.lower() == "roi10d":
# shift (x1,y1), (x2,y2) by 15% in each direction
_a = -0.15
_b = 0.15
x1 += bw * (np.random.rand() * (_b - _a) + _a)
x2 += bw * (np.random.rand() * (_b - _a) + _a)
y1 += bh * (np.random.rand() * (_b - _a) + _a)
y2 += bh * (np.random.rand() * (_b - _a) + _a)
x1 = min(max(x1, 0), im_W)
x2 = min(max(x1, 0), im_W)
y1 = min(max(y1, 0), im_H)
y2 = min(max(y2, 0), im_H)
bbox_auged = np.array([x1, y1, x2, y2])
elif cfg.bbox_aug_type.lower() == "truncnorm":
raise NotImplementedError("BBOX_AUG_TYPE truncnorm is not implemented yet.")
else:
bbox_auged = bbox_xyxy.copy()
return bbox_auged
def aug_bbox_DZI(self, cfg, bbox_xyxy, im_H, im_W):
"""Used for DZI, the augmented box is a square (maybe enlarged)
Args:
bbox_xyxy (np.ndarray):
Returns:
center, scale
"""
x1, y1, x2, y2 = bbox_xyxy.copy()
cx = 0.5 * (x1 + x2)
cy = 0.5 * (y1 + y2)
bh = y2 - y1
bw = x2 - x1
if cfg.dzi_type.lower() == "uniform":
scale_ratio = 1 + cfg.dzi_scale_ratio * (2 * np.random.random_sample() - 1) # [1-0.25, 1+0.25]
shift_ratio = cfg.dzi_shift_ratio * (2 * np.random.random_sample(2) - 1) # [-0.25, 0.25]
bbox_center = np.array([cx + bw * shift_ratio[0], cy + bh * shift_ratio[1]]) # (h/2, w/2)
scale = max(y2 - y1, x2 - x1) * scale_ratio * cfg.dzi_pad_scale
elif cfg.dzi_type.lower() == "roi10d":
# shift (x1,y1), (x2,y2) by 15% in each direction
_a = -0.15
_b = 0.15
x1 += bw * (np.random.rand() * (_b - _a) + _a)
x2 += bw * (np.random.rand() * (_b - _a) + _a)
y1 += bh * (np.random.rand() * (_b - _a) + _a)
y2 += bh * (np.random.rand() * (_b - _a) + _a)
x1 = min(max(x1, 0), im_W)
x2 = min(max(x1, 0), im_W)
y1 = min(max(y1, 0), im_H)
y2 = min(max(y2, 0), im_H)
bbox_center = np.array([0.5 * (x1 + x2), 0.5 * (y1 + y2)])
scale = max(y2 - y1, x2 - x1) * cfg.dzi_pad_scale
elif cfg.dzi_type.lower() == "truncnorm":
raise NotImplementedError("DZI truncnorm not implemented yet.")
else:
bbox_center = np.array([cx, cy]) # (w/2, h/2)
scale = max(y2 - y1, x2 - x1)
scale = min(scale, max(im_H, im_W)) * 1.0
return bbox_center, scale
def _get_color_augmentor(self, aug_type="ROI10D", aug_code=None):
# fmt: off
cfg = self.cfg
if aug_type.lower() == "roi10d":
color_augmentor = AugmentRGB(
brightness_delta=2.5 / 255., # 0,
lighting_std=0.3,
saturation_var=(0.95, 1.05), # (1, 1),
contrast_var=(0.95, 1.05)) # (1, 1)) #
elif aug_type.lower() == "aae":
import imgaug.augmenters as iaa # noqa
from imgaug.augmenters import (Sequential, SomeOf, OneOf, Sometimes, WithColorspace, WithChannels, Noop,
Lambda, AssertLambda, AssertShape, Scale, CropAndPad, Pad, Crop, Fliplr,
Flipud, Superpixels, ChangeColorspace, PerspectiveTransform, Grayscale,
GaussianBlur, AverageBlur, MedianBlur, Convolve, Sharpen, Emboss, EdgeDetect,
DirectedEdgeDetect, Add, AddElementwise, AdditiveGaussianNoise, Multiply,
MultiplyElementwise, Dropout, CoarseDropout, Invert, ContrastNormalization,
Affine, PiecewiseAffine, ElasticTransformation, pillike, LinearContrast) # noqa
aug_code = """Sequential([
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),
Sometimes(0.5, GaussianBlur(1.2*np.random.rand())),
Sometimes(0.5, Add((-25, 25), per_channel=0.3)),
Sometimes(0.3, Invert(0.2, per_channel=True)),
Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),
Sometimes(0.5, Multiply((0.6, 1.4))),
Sometimes(0.5, LinearContrast((0.5, 2.2), per_channel=0.3))
], random_order = False)"""
# for darker objects, e.g. LM driller: use BOOTSTRAP_RATIO: 16 and weaker augmentation
aug_code_weaker = """Sequential([
Sometimes(0.4, CoarseDropout( p=0.1, size_percent=0.05) ),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
Sometimes(0.5, GaussianBlur(np.random.rand())),
Sometimes(0.5, Add((-20, 20), per_channel=0.3)),
Sometimes(0.4, Invert(0.20, per_channel=True)),
Sometimes(0.5, Multiply((0.7, 1.4), per_channel=0.8)),
Sometimes(0.5, Multiply((0.7, 1.4))),
Sometimes(0.5, LinearContrast((0.5, 2.0), per_channel=0.3))
], random_order=False)"""
color_augmentor = eval(aug_code)
elif aug_type.lower() == "code": # assume imgaug
import imgaug.augmenters as iaa
from imgaug.augmenters import (Sequential, SomeOf, OneOf, Sometimes, WithColorspace, WithChannels, Noop,
Lambda, AssertLambda, AssertShape, Scale, CropAndPad, Pad, Crop, Fliplr,
Flipud, Superpixels, ChangeColorspace, PerspectiveTransform, Grayscale,
GaussianBlur, AverageBlur, MedianBlur, Convolve, Sharpen, Emboss, EdgeDetect,
DirectedEdgeDetect, Add, AddElementwise, AdditiveGaussianNoise, Multiply,
MultiplyElementwise, Dropout, CoarseDropout, Invert, ContrastNormalization,
Affine, PiecewiseAffine, ElasticTransformation, pillike, LinearContrast, Canny) # noqa
aug_code = cfg.color_aug_code
color_augmentor = eval(aug_code)
elif aug_type.lower() == 'code_albu':
from albumentations import (HorizontalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion,
HueSaturationValue, IAAAdditiveGaussianNoise, GaussNoise, MotionBlur,
MedianBlur, IAAPiecewiseAffine, IAASharpen, IAAEmboss, RandomContrast,
RandomBrightness, Flip, OneOf, Compose, CoarseDropout, RGBShift, RandomGamma,
RandomBrightnessContrast, JpegCompression, InvertImg) # noqa
aug_code = """Compose([
CoarseDropout(max_height=0.05*480, max_holes=0.05*640, p=0.4),
OneOf([
IAAAdditiveGaussianNoise(p=0.5),
GaussNoise(p=0.5),
], p=0.2),
OneOf([
MotionBlur(p=0.2),
MedianBlur(blur_limit=3, p=0.1),
Blur(blur_limit=3, p=0.1),
], p=0.2),
OneOf([
CLAHE(clip_limit=2),
IAASharpen(),
IAAEmboss(),
RandomBrightnessContrast(),
], p=0.3),
InvertImg(p=0.2),
RGBShift(r_shift_limit=105, g_shift_limit=45, b_shift_limit=40, p=0.5),
RandomContrast(limit=0.9, p=0.5),
RandomGamma(gamma_limit=(80,120), p=0.5),
RandomBrightness(limit=1.2, p=0.5),
HueSaturationValue(hue_shift_limit=172, sat_shift_limit=20, val_shift_limit=27, p=0.3),
JpegCompression(quality_lower=4, quality_upper=100, p=0.4),
], p=0.8)"""
color_augmentor = eval(cfg.color_aug_code)
else:
color_augmentor = None
# fmt: on
return color_augmentor
def _color_aug(self, image, aug_type="ROI10D"):
# assume image in [0, 255] uint8
if aug_type.lower() == "roi10d": # need normalized image in [0,1]
image = np.asarray(image / 255.0, dtype=np.float32).copy()
image = self.color_augmentor.augment(image)
image = (image * 255.0 + 0.5).astype(np.uint8)
return image
elif aug_type.lower() in ["aae", "code"]:
# imgaug need uint8
return self.color_augmentor.augment_image(image)
elif aug_type.lower() in ["code_albu"]:
augmented = self.color_augmentor(image=image)
return augmented["image"]
else:
raise ValueError("aug_type: {} is not supported.".format(aug_type))
@lazy_property
def _bg_img_paths(self):
logger.info("get bg image paths")
cfg = self.cfg
# random.choice(bg_img_paths)
bg_type = cfg.bg_type
bg_root = cfg.bg_imgs_root
hashed_file_name = hashlib.md5(
("{}_{}_{}_get_bg_imgs".format(bg_root, cfg.num_bg_imgs, bg_type)).encode("utf-8")
).hexdigest()
cache_path = osp.join(".cache/bg_paths_{}_{}.pkl".format(bg_type, hashed_file_name))
mmcv.mkdir_or_exist(osp.dirname(cache_path))
if osp.exists(cache_path):
logger.info("get bg_paths from cache file: {}".format(cache_path))
bg_img_paths = mmcv.load(cache_path)
logger.info("num bg imgs: {}".format(len(bg_img_paths)))
assert len(bg_img_paths) > 0
return bg_img_paths
logger.info("building bg imgs cache {}...".format(bg_type))
assert osp.exists(bg_root), f"BG ROOT: {bg_root} does not exist"
if bg_type == "coco":
img_paths = [
osp.join(bg_root, fn.name) for fn in os.scandir(bg_root) if ".png" in fn.name or "jpg" in fn.name
]
elif bg_type == "VOC_table": # used in original deepim
VOC_root = bg_root # path to "VOCdevkit/VOC2012"
VOC_image_set_dir = osp.join(VOC_root, "ImageSets/Main")
VOC_bg_list_path = osp.join(VOC_image_set_dir, "diningtable_trainval.txt")
with open(VOC_bg_list_path, "r") as f:
VOC_bg_list = [
line.strip("\r\n").split()[0] for line in f.readlines() if line.strip("\r\n").split()[1] == "1"
]
img_paths = [osp.join(VOC_root, "JPEGImages/{}.jpg".format(bg_idx)) for bg_idx in VOC_bg_list]
elif bg_type == "VOC":
VOC_root = bg_root # path to "VOCdevkit/VOC2012"
img_paths = [
osp.join(VOC_root, "JPEGImages", fn.name)
for fn in os.scandir(osp.join(bg_root, "JPEGImages"))
if ".jpg" in fn.name
]
elif bg_type == "SUN2012":
img_paths = [
osp.join(bg_root, "JPEGImages", fn.name)
for fn in os.scandir(osp.join(bg_root, "JPEGImages"))
if ".jpg" in fn.name
]
else:
raise ValueError(f"BG_TYPE: {bg_type} is not supported")
assert len(img_paths) > 0, len(img_paths)
num_bg_imgs = min(len(img_paths), cfg.num_bg_imgs)
bg_img_paths = np.random.choice(img_paths, num_bg_imgs)
mmcv.dump(bg_img_paths, cache_path)
logger.info("num bg imgs: {}".format(len(bg_img_paths)))
assert len(bg_img_paths) > 0
return bg_img_paths
def replace_bg(self, im, im_mask, return_mask=False, truncate_fg=False):
cfg = self.cfg
# add background to the image
H, W = im.shape[:2]
ind = random.randint(0, len(self._bg_img_paths) - 1)
filename = self._bg_img_paths[ind]
if cfg.get("bg_keep_aspect_ratio", True):
bg_img = self.get_bg_image(filename, H, W)
else:
bg_img = self.get_bg_image_v2(filename, H, W)
if len(bg_img.shape) != 3:
bg_img = np.zeros((H, W, 3), dtype=np.uint8)
logger.warning("bad background image: {}".format(filename))
mask = im_mask.copy().astype(np.bool)
if truncate_fg:
mask = self.trunc_mask(im_mask)
mask_bg = ~mask
im[mask_bg] = bg_img[mask_bg]
im = im.astype(np.uint8)
if return_mask:
return im, mask # bool fg mask
else:
return im
def trunc_mask(self, mask):
# return the bool truncated mask
mask = mask.copy().astype(np.bool)
nonzeros = np.nonzero(mask.astype(np.uint8))
x1, y1 = np.min(nonzeros, axis=1)
x2, y2 = np.max(nonzeros, axis=1)
c_h = 0.5 * (x1 + x2)
c_w = 0.5 * (y1 + y2)
rnd = random.random()
# print(x1, x2, y1, y2, c_h, c_w, rnd, mask.shape)
if rnd < 0.2: # block upper
c_h_ = int(random.uniform(x1, c_h))
mask[:c_h_, :] = False
elif rnd < 0.4: # block bottom
c_h_ = int(random.uniform(c_h, x2))
mask[c_h_:, :] = False
elif rnd < 0.6: # block left
c_w_ = int(random.uniform(y1, c_w))
mask[:, :c_w_] = False
elif rnd < 0.8: # block right
c_w_ = int(random.uniform(c_w, y2))
mask[:, c_w_:] = False
else:
pass
return mask
def get_bg_image(self, filename, imH, imW, channel=3):
"""keep aspect ratio of bg during resize target image size:
imHximWxchannel.
"""
cfg = self.cfg
target_size = min(imH, imW)
max_size = max(imH, imW)
real_hw_ratio = float(imH) / float(imW)
bg_image = utils.read_image(filename, format=cfg.img_format)
bg_h, bg_w, bg_c = bg_image.shape
bg_image_resize = np.zeros((imH, imW, channel), dtype="uint8")
if (float(imH) / float(imW) < 1 and float(bg_h) / float(bg_w) < 1) or (
float(imH) / float(imW) >= 1 and float(bg_h) / float(bg_w) >= 1
):
if bg_h >= bg_w:
bg_h_new = int(np.ceil(bg_w * real_hw_ratio))
if bg_h_new < bg_h:
bg_image_crop = bg_image[0:bg_h_new, 0:bg_w, :]
else:
bg_image_crop = bg_image
else:
bg_w_new = int(np.ceil(bg_h / real_hw_ratio))
if bg_w_new < bg_w:
bg_image_crop = bg_image[0:bg_h, 0:bg_w_new, :]
else:
bg_image_crop = bg_image
else:
if bg_h >= bg_w:
bg_h_new = int(np.ceil(bg_w * real_hw_ratio))
bg_image_crop = bg_image[0:bg_h_new, 0:bg_w, :]
else: # bg_h < bg_w
bg_w_new = int(np.ceil(bg_h / real_hw_ratio))
# logger.info(bg_w_new)
bg_image_crop = bg_image[0:bg_h, 0:bg_w_new, :]
bg_image_resize_0 = resize_short_edge(bg_image_crop, target_size, max_size)
h, w, c = bg_image_resize_0.shape
bg_image_resize[0:h, 0:w, :] = bg_image_resize_0
return bg_image_resize
def get_bg_image_v2(self, filename, imH, imW, channel=3):
cfg = self.cfg
_bg_img = utils.read_image(filename, format=cfg.img_format)
try:
# randomly crop a region as background
bw = _bg_img.shape[1]
bh = _bg_img.shape[0]
x1 = np.random.randint(0, int(bw / 3))
y1 = np.random.randint(0, int(bh / 3))
x2 = np.random.randint(int(2 * bw / 3), bw)
y2 = np.random.randint(int(2 * bh / 3), bh)
bg_img = cv2.resize(
_bg_img[y1:y2, x1:x2],
(imW, imH),
interpolation=cv2.INTER_LINEAR,
)
except:
bg_img = np.zeros((imH, imW, 3), dtype=np.uint8)
logger.warning("bad background image: {}".format(filename))
return bg_img
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Copyright (c) Megvii, Inc. and its affiliates.
import os
from loguru import logger
import cv2
import numpy as np
from pycocotools.coco import COCO
from det.yolox.utils.setup_env import get_yolox_datadir
from .datasets_wrapper import Dataset
class COCODataset(Dataset):
"""COCO dataset class."""
def __init__(
self,
data_dir=None,
json_file="instances_train2017.json",
name="train2017",
img_size=(416, 416),
preproc=None,
cache=False,
):
"""COCO dataset initialization.
Annotation data are read into memory by COCO API.
Args:
data_dir (str): dataset root directory
json_file (str): COCO json file name
name (str): COCO data name (e.g. 'train2017' or 'val2017')
img_size (int): target image size after pre-processing
preproc: data augmentation strategy
"""
super().__init__(img_size)
if data_dir is None:
data_dir = os.path.join(get_yolox_datadir(), "coco")
self.data_dir = data_dir
self.json_file = json_file
self.coco = COCO(os.path.join(self.data_dir, "annotations", self.json_file))
self.ids = self.coco.getImgIds()
self.class_ids = sorted(self.coco.getCatIds())
cats = self.coco.loadCats(self.coco.getCatIds())
self._classes = tuple([c["name"] for c in cats])
self.imgs = None
self.name = name
self.img_size = img_size
self.preproc = preproc
self.annotations = self._load_coco_annotations()
if cache:
self._cache_images()
def __len__(self):
return len(self.ids)
def __del__(self):
del self.imgs
def _load_coco_annotations(self):
return [self.load_anno_from_ids(_ids) for _ids in self.ids]
def _cache_images(self):
logger.warning(
"\n********************************************************************************\n"
"You are using cached images in RAM to accelerate training.\n"
"This requires large system RAM.\n"
"Make sure you have 200G+ RAM and 136G available disk space for training COCO.\n"
"********************************************************************************\n"
)
max_h = self.img_size[0]
max_w = self.img_size[1]
cache_file = self.data_dir + "/img_resized_cache_" + self.name + ".array"
if not os.path.exists(cache_file):
logger.info("Caching images for the first time. This might take about 20 minutes for COCO")
self.imgs = np.memmap(
cache_file,
shape=(len(self.ids), max_h, max_w, 3),
dtype=np.uint8,
mode="w+",
)
from tqdm import tqdm
from multiprocessing.pool import ThreadPool
NUM_THREADs = min(8, os.cpu_count())
loaded_images = ThreadPool(NUM_THREADs).imap(
lambda x: self.load_resized_img(x),
range(len(self.annotations)),
)
pbar = tqdm(enumerate(loaded_images), total=len(self.annotations))
for k, out in pbar:
self.imgs[k][: out.shape[0], : out.shape[1], :] = out.copy()
self.imgs.flush()
pbar.close()
else:
logger.warning(
"You are using cached imgs! Make sure your dataset is not changed!!\n"
"Everytime the self.input_size is changed in your exp file, you need to delete\n"
"the cached data and re-generate them.\n"
)
logger.info("Loading cached imgs...")
self.imgs = np.memmap(
cache_file,
shape=(len(self.ids), max_h, max_w, 3),
dtype=np.uint8,
mode="r+",
)
def load_anno_from_ids(self, id_):
im_ann = self.coco.loadImgs(id_)[0]
width = im_ann["width"]
height = im_ann["height"]
anno_ids = self.coco.getAnnIds(imgIds=[int(id_)], iscrowd=False)
annotations = self.coco.loadAnns(anno_ids)
objs = []
for obj in annotations:
x1 = np.max((0, obj["bbox"][0]))
y1 = np.max((0, obj["bbox"][1]))
x2 = np.min((width, x1 + np.max((0, obj["bbox"][2]))))
y2 = np.min((height, y1 + np.max((0, obj["bbox"][3]))))
if obj["area"] > 0 and x2 >= x1 and y2 >= y1:
obj["clean_bbox"] = [x1, y1, x2, y2]
objs.append(obj)
num_objs = len(objs)
res = np.zeros((num_objs, 5))
for ix, obj in enumerate(objs):
cls = self.class_ids.index(obj["category_id"])
res[ix, 0:4] = obj["clean_bbox"]
res[ix, 4] = cls
r = min(self.img_size[0] / height, self.img_size[1] / width)
res[:, :4] *= r
img_info = (height, width)
resized_info = (int(height * r), int(width * r))
file_name = im_ann["file_name"] if "file_name" in im_ann else "{:012}".format(id_) + ".jpg"
return (res, img_info, resized_info, file_name)
def load_anno(self, index):
return self.annotations[index][0]
def load_resized_img(self, index):
img = self.load_image(index)
r = min(self.img_size[0] / img.shape[0], self.img_size[1] / img.shape[1])
resized_img = cv2.resize(
img,
(int(img.shape[1] * r), int(img.shape[0] * r)),
interpolation=cv2.INTER_LINEAR,
).astype(np.uint8)
return resized_img
def load_image(self, index):
file_name = self.annotations[index][3]
img_file = os.path.join(self.data_dir, self.name, file_name)
img = cv2.imread(img_file)
assert img is not None
return img
def pull_item(self, index):
id_ = self.ids[index]
res, img_info, resized_info, _ = self.annotations[index]
if self.imgs is not None:
pad_img = self.imgs[index]
img = pad_img[: resized_info[0], : resized_info[1], :].copy()
else:
img = self.load_resized_img(index)
return img, res.copy(), img_info, np.array([id_])
@Dataset.mosaic_getitem
def __getitem__(self, index):
"""One image / label pair for the given index is picked up and pre-
processed.
Args:
index (int): data index
Returns:
img (numpy.ndarray): pre-processed image
padded_labels (torch.Tensor): pre-processed label data.
The shape is :math:`[max_labels, 5]`.
each label consists of [class, xc, yc, w, h]:
class (float): class index.
xc, yc (float) : center of bbox whose values range from 0 to 1.
w, h (float) : size of bbox whose values range from 0 to 1.
info_img : tuple of h, w.
h, w (int): original shape of the image
img_id (int): same as the input index. Used for evaluation.
"""
img, target, img_info, img_id = self.pull_item(index)
if self.preproc is not None:
img, target = self.preproc(img, target, self.input_dim)
return img, target, img_info, img_id
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment