Skip to content
Snippets Groups Projects
Commit 24f3fb70 authored by Jiwen Tang's avatar Jiwen Tang
Browse files

Merge branch 'det' into main

parents 42f71b6a cfb8760a
No related branches found
No related tags found
No related merge requests found
Showing
with 2784 additions and 0 deletions
from itertools import count
import os
import os.path as osp
from omegaconf import OmegaConf
import torch
import detectron2.data.transforms as T
from detectron2.config import LazyCall as L
from detectron2.data import get_detection_dataset_dicts
from detectron2.solver.build import get_default_optimizer_params
# import torch.nn as nn
from det.yolox.models import YOLOX, YOLOPAFPN, YOLOXHead
from det.yolox.data import (
# COCODataset,
TrainTransform,
ValTransform,
# YoloBatchSampler,
# DataLoader,
# InfiniteSampler,
MosaicDetection,
build_yolox_train_loader,
build_yolox_test_loader,
)
from det.yolox.data.datasets import Base_DatasetFromList
from det.yolox.utils import LRScheduler
# from detectron2.evaluation import COCOEvaluator
# from det.yolox.evaluators import COCOEvaluator
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.lr_scheduler import flat_and_anneal_lr_scheduler
# Common training-related configs that are designed for "tools/lazyconfig_train_net.py"
# You can use your own instead, together with your own train_net.py
train = dict(
# NOTE: need to copy these two lines to get correct dirs
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
seed=-1,
cudnn_deterministic=False,
cudnn_benchmark=True,
init_checkpoint="",
# init_checkpoint="pretrained_models/yolox/yolox_s.pth",
resume_from="",
# init_checkpoint="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
# max_iter=90000,
amp=dict( # options for Automatic Mixed Precision
enabled=True,
),
grad_clip=dict( # options for grad clipping
enabled=False,
clip_type="full_model", # value, norm, full_model
clip_value=1.0,
norm_type=2.0,
),
ddp=dict( # options for DistributedDataParallel
broadcast_buffers=False,
find_unused_parameters=False,
fp16_compression=False,
),
# NOTE: epoch based period
checkpointer=dict(period=1, max_to_keep=10), # options for PeriodicCheckpointer
# eval_period=5000,
eval_period=-1, # epoch based
log_period=20,
device="cuda",
# ...
basic_lr_per_img=0.01 / 64.0, # 1.5625e-4
random_size=(14, 26), # set None to disable; randomly choose a int in this range, and *32
mscale=(0.8, 1.6),
ema=True,
total_epochs=16,
warmup_epochs=5,
no_aug_epochs=2,
sync_norm_period=10, # sync norm every n epochs
# l1 loss:
# 1) if use_l1 and l1_from_sctrach: use l1 for the whole training phase
# 2) use_l1=False: no l1 at all
# 3) use_l1 and l1_from_scratch=False: just use l1 after closing mosaic (YOLOX default)
l1_from_scratch=False,
use_l1=True,
anneal_after_warmup=True,
# ...
occupy_gpu=False,
)
train = OmegaConf.create(train)
# OmegaConf.register_new_resolver(
# "mul2", lambda x: x*2
# )
# --------------------------------------------------------------------
# model
# --------------------------------------------------------------------
model = L(YOLOX)(
backbone=L(YOLOPAFPN)(
depth=1.0,
width=1.0,
in_channels=[256, 512, 1024],
),
head=L(YOLOXHead)(
num_classes=1,
width="${..backbone.width}",
# width="${mul2: ${..backbone.width}}", # NOTE: do not forget $
in_channels="${..backbone.in_channels}",
),
)
# --------------------------------------------------------------------
# optimizer
# --------------------------------------------------------------------
optimizer = L(torch.optim.SGD)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.01, # bs=64
momentum=0.9,
weight_decay=5e-4,
nesterov=True,
)
lr_config = L(flat_and_anneal_lr_scheduler)(
warmup_method="pow",
warmup_pow=2,
warmup_factor=0.0,
# to be set
# optimizer=
# total_iters=total_iters, # to be set
# warmup_iters=epoch_len * 3,
# anneal_point=5 / (total_epochs - 15),
anneal_method="cosine",
target_lr_factor=0.05,
)
DATASETS = dict(TRAIN=("",), TEST=("",))
DATASETS = OmegaConf.create(DATASETS)
dataloader = OmegaConf.create()
dataloader.train = L(build_yolox_train_loader)(
dataset=L(Base_DatasetFromList)(
split="train",
lst=L(get_detection_dataset_dicts)(names=DATASETS.TRAIN),
img_size=(640, 640),
preproc=L(TrainTransform)(
max_labels=50,
),
),
aug_wrapper=L(MosaicDetection)(
mosaic=True,
img_size="${..dataset.img_size}",
preproc=L(TrainTransform)(
max_labels=120,
),
degrees=10.0,
translate=0.1,
mosaic_scale=(0.1, 2),
mixup_scale=(0.5, 1.5),
shear=2.0,
enable_mixup=True,
mosaic_prob=1.0,
mixup_prob=1.0,
),
# reference_batch_size=64,
total_batch_size=64, # 8x8gpu
num_workers=4,
pin_memory=True,
)
val = dict(
eval_cached=False,
)
val = OmegaConf.create(val)
test = dict(
test_dataset_names=DATASETS.TEST,
test_size=(640, 640), # (height, width)
conf_thr=0.01,
nms_thr=0.65,
num_classes="${model.head.num_classes}",
amp_test=False,
half_test=True,
precise_bn=dict(
enabled=False,
num_iter=200,
),
# fuse_conv_bn=False,
fuse_conv_bn=True,
)
test = OmegaConf.create(test)
# NOTE: for multiple test loaders, just write it as a list
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
# total_batch_size=1,
total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
import os.path as osp
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
from .yolox_base import train, val, test, model, dataloader, optimizer, lr_config, DATASETS # noqa
from det.yolox.data import build_yolox_test_loader, ValTransform
from det.yolox.data.datasets import Base_DatasetFromList
from detectron2.data import get_detection_dataset_dicts
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.ranger import Ranger
train.update(
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
)
train.amp.enabled = True
model.backbone.depth = 1.33
model.backbone.width = 1.25
model.head.num_classes = 33
train.init_checkpoint = "pretrained_models/yolox/yolox_x.pth"
# datasets
DATASETS.TRAIN = ["hb_pbr_train"]
DATASETS.TEST = ["hb_test_primesense_bop19"]
dataloader.train.dataset.lst.names = DATASETS.TRAIN
dataloader.train.total_batch_size = 32
# color aug
dataloader.train.aug_wrapper.COLOR_AUG_PROB = 0.8
dataloader.train.aug_wrapper.COLOR_AUG_TYPE = "code"
dataloader.train.aug_wrapper.COLOR_AUG_CODE = (
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
# "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
)
# hsv color aug
dataloader.train.aug_wrapper.AUG_HSV_PROB = 1.0
dataloader.train.aug_wrapper.HSV_H = 0.015
dataloader.train.aug_wrapper.HSV_S = 0.7
dataloader.train.aug_wrapper.HSV_V = 0.4
dataloader.train.aug_wrapper.FORMAT = "RGB"
optimizer = L(Ranger)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.001, # bs=64
# momentum=0.9,
weight_decay=0,
# nesterov=True,
)
train.total_epochs = 30
train.no_aug_epochs = 15
train.checkpointer = dict(period=2, max_to_keep=10)
test.test_dataset_names = DATASETS.TEST
test.augment = True
test.scales = (1, 0.75, 0.83, 1.12, 1.25)
test.conf_thr = 0.001
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
total_batch_size=1,
# total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
\ No newline at end of file
import os.path as osp
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
from .yolox_base import train, val, test, model, dataloader, optimizer, lr_config, DATASETS # noqa
from det.yolox.data import build_yolox_test_loader, ValTransform
from det.yolox.data.datasets import Base_DatasetFromList
from detectron2.data import get_detection_dataset_dicts
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.ranger import Ranger
train.update(
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
)
train.amp.enabled = True
model.backbone.depth = 1.33
model.backbone.width = 1.25
model.head.num_classes = 2
train.init_checkpoint = "pretrained_models/yolox/yolox_x.pth"
# datasets
DATASETS.TRAIN = ["icbin_pbr_train"]
DATASETS.TEST = ["icbin_bop_test"]
dataloader.train.dataset.lst.names = DATASETS.TRAIN
dataloader.train.total_batch_size = 32
# color aug
dataloader.train.aug_wrapper.COLOR_AUG_PROB = 0.8
dataloader.train.aug_wrapper.COLOR_AUG_TYPE = "code"
dataloader.train.aug_wrapper.COLOR_AUG_CODE = (
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
# "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
)
# hsv color aug
dataloader.train.aug_wrapper.AUG_HSV_PROB = 1.0
dataloader.train.aug_wrapper.HSV_H = 0.015
dataloader.train.aug_wrapper.HSV_S = 0.7
dataloader.train.aug_wrapper.HSV_V = 0.4
dataloader.train.aug_wrapper.FORMAT = "RGB"
optimizer = L(Ranger)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.001, # bs=64
# momentum=0.9,
weight_decay=0,
# nesterov=True,
)
train.total_epochs = 30
train.no_aug_epochs = 15
train.checkpointer = dict(period=2, max_to_keep=10)
test.test_dataset_names = DATASETS.TEST
test.augment = True
test.scales = (1, 0.75, 0.83, 1.12, 1.25)
test.conf_thr = 0.001
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
total_batch_size=1,
# total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
\ No newline at end of file
import os.path as osp
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
from .yolox_base import train, val, test, model, dataloader, optimizer, lr_config, DATASETS # noqa
from det.yolox.data import build_yolox_test_loader, ValTransform
from det.yolox.data.datasets import Base_DatasetFromList
from detectron2.data import get_detection_dataset_dicts
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.ranger import Ranger
train.update(
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
)
train.amp.enabled = True
model.backbone.depth = 1.33
model.backbone.width = 1.25
model.head.num_classes = 28
train.init_checkpoint = "pretrained_models/yolox/yolox_x.pth"
# datasets
DATASETS.TRAIN = ["itodd_pbr_train"]
DATASETS.TEST = ["itodd_bop_test"]
dataloader.train.dataset.lst.names = DATASETS.TRAIN
dataloader.train.total_batch_size = 32
# color aug
dataloader.train.aug_wrapper.COLOR_AUG_PROB = 0.8
dataloader.train.aug_wrapper.COLOR_AUG_TYPE = "code"
dataloader.train.aug_wrapper.COLOR_AUG_CODE = (
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
# "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
)
# hsv color aug
dataloader.train.aug_wrapper.AUG_HSV_PROB = 1.0
dataloader.train.aug_wrapper.HSV_H = 0.015
dataloader.train.aug_wrapper.HSV_S = 0.7
dataloader.train.aug_wrapper.HSV_V = 0.4
dataloader.train.aug_wrapper.FORMAT = "RGB"
optimizer = L(Ranger)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.001, # bs=64
# momentum=0.9,
weight_decay=0,
# nesterov=True,
)
train.total_epochs = 30
train.no_aug_epochs = 15
train.checkpointer = dict(period=2, max_to_keep=10)
test.test_dataset_names = DATASETS.TEST
test.augment = True
test.scales = (1, 0.75, 0.83, 1.12, 1.25)
test.conf_thr = 0.001
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
total_batch_size=1,
# total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
\ No newline at end of file
import os.path as osp
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
from .yolox_base import train, val, test, model, dataloader, optimizer, lr_config, DATASETS # noqa
from det.yolox.data import build_yolox_test_loader, ValTransform
from det.yolox.data.datasets import Base_DatasetFromList
from detectron2.data import get_detection_dataset_dicts
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.ranger import Ranger
train.update(
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
)
train.amp.enabled = True
model.backbone.depth = 1.33
model.backbone.width = 1.25
model.head.num_classes = 8
train.init_checkpoint = "pretrained_models/yolox/yolox_x.pth"
# datasets
DATASETS.TRAIN = ["lmo_pbr_train"]
DATASETS.TEST = ["lmo_bop_test"]
dataloader.train.dataset.lst.names = DATASETS.TRAIN
dataloader.train.total_batch_size = 32
# color aug
dataloader.train.aug_wrapper.COLOR_AUG_PROB = 0.8
dataloader.train.aug_wrapper.COLOR_AUG_TYPE = "code"
dataloader.train.aug_wrapper.COLOR_AUG_CODE = (
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
# "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
)
# hsv color aug
dataloader.train.aug_wrapper.AUG_HSV_PROB = 1.0
dataloader.train.aug_wrapper.HSV_H = 0.015
dataloader.train.aug_wrapper.HSV_S = 0.7
dataloader.train.aug_wrapper.HSV_V = 0.4
dataloader.train.aug_wrapper.FORMAT = "RGB"
optimizer = L(Ranger)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.001, # bs=64
# momentum=0.9,
weight_decay=0,
# nesterov=True,
)
train.total_epochs = 30
train.no_aug_epochs = 15
train.checkpointer = dict(period=2, max_to_keep=10)
test.test_dataset_names = DATASETS.TEST
test.augment = True
test.scales = (1, 0.75, 0.83, 1.12, 1.25)
test.conf_thr = 0.001
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
total_batch_size=1,
# total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
\ No newline at end of file
import os.path as osp
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
from .yolox_base import train, val, test, model, dataloader, optimizer, lr_config, DATASETS # noqa
from det.yolox.data import build_yolox_test_loader, ValTransform
from det.yolox.data.datasets import Base_DatasetFromList
from detectron2.data import get_detection_dataset_dicts
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.ranger import Ranger
train.update(
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
)
train.amp.enabled = True
model.backbone.depth = 1.33
model.backbone.width = 1.25
model.head.num_classes = 30
train.init_checkpoint = "pretrained_models/yolox/yolox_x.pth"
# datasets
DATASETS.TRAIN = ["tless_pbr_train"]
DATASETS.TEST = ["tless_bop_test_primesense"]
dataloader.train.dataset.lst.names = DATASETS.TRAIN
dataloader.train.total_batch_size = 32
# color aug
dataloader.train.aug_wrapper.COLOR_AUG_PROB = 0.8
dataloader.train.aug_wrapper.COLOR_AUG_TYPE = "code"
dataloader.train.aug_wrapper.COLOR_AUG_CODE = (
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
# "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
)
# hsv color aug
dataloader.train.aug_wrapper.AUG_HSV_PROB = 1.0
dataloader.train.aug_wrapper.HSV_H = 0.015
dataloader.train.aug_wrapper.HSV_S = 0.7
dataloader.train.aug_wrapper.HSV_V = 0.4
dataloader.train.aug_wrapper.FORMAT = "RGB"
optimizer = L(Ranger)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.001, # bs=64
# momentum=0.9,
weight_decay=0,
# nesterov=True,
)
train.total_epochs = 30
train.no_aug_epochs = 15
train.checkpointer = dict(period=2, max_to_keep=10)
test.test_dataset_names = DATASETS.TEST
test.augment = True
test.scales = (1, 0.75, 0.83, 1.12, 1.25)
test.conf_thr = 0.001
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
total_batch_size=1,
# total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
\ No newline at end of file
import os.path as osp
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
from .yolox_base import train, val, test, model, dataloader, optimizer, lr_config, DATASETS # noqa
from det.yolox.data import build_yolox_test_loader, ValTransform
from det.yolox.data.datasets import Base_DatasetFromList
from detectron2.data import get_detection_dataset_dicts
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.ranger import Ranger
train.update(
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
)
train.amp.enabled = True
model.backbone.depth = 1.33
model.backbone.width = 1.25
model.head.num_classes = 30
train.init_checkpoint = "pretrained_models/yolox/yolox_x.pth"
# datasets
DATASETS.TRAIN = ["tless_pbr_train", "tless_primesense_train"]
DATASETS.TEST = ["tless_bop_test_primesense"]
dataloader.train.dataset.lst.names = DATASETS.TRAIN
dataloader.train.total_batch_size = 32
# color aug
dataloader.train.aug_wrapper.COLOR_AUG_PROB = 0.8
dataloader.train.aug_wrapper.COLOR_AUG_TYPE = "code"
dataloader.train.aug_wrapper.COLOR_AUG_CODE = (
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
# "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
)
# hsv color aug
dataloader.train.aug_wrapper.AUG_HSV_PROB = 1.0
dataloader.train.aug_wrapper.HSV_H = 0.015
dataloader.train.aug_wrapper.HSV_S = 0.7
dataloader.train.aug_wrapper.HSV_V = 0.4
dataloader.train.aug_wrapper.FORMAT = "RGB"
optimizer = L(Ranger)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.001, # bs=64
# momentum=0.9,
weight_decay=0,
# nesterov=True,
)
train.total_epochs = 30
train.no_aug_epochs = 15
train.checkpointer = dict(period=2, max_to_keep=10)
test.test_dataset_names = DATASETS.TEST
test.augment = True
test.scales = (1, 0.75, 0.83, 1.12, 1.25)
test.conf_thr = 0.001
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
total_batch_size=1,
# total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
\ No newline at end of file
import os.path as osp
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
from .yolox_base import train, val, test, model, dataloader, optimizer, lr_config, DATASETS # noqa
from det.yolox.data import build_yolox_test_loader, ValTransform
from det.yolox.data.datasets import Base_DatasetFromList
from detectron2.data import get_detection_dataset_dicts
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.ranger import Ranger
train.update(
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
)
train.amp.enabled = True
model.backbone.depth = 1.33
model.backbone.width = 1.25
model.head.num_classes = 3
train.init_checkpoint = "pretrained_models/yolox/yolox_x.pth"
# datasets
DATASETS.TRAIN = ["tudl_pbr_train"]
DATASETS.TEST = ["tudl_bop_test"]
dataloader.train.dataset.lst.names = DATASETS.TRAIN
dataloader.train.total_batch_size = 32
# color aug
dataloader.train.aug_wrapper.COLOR_AUG_PROB = 0.8
dataloader.train.aug_wrapper.COLOR_AUG_TYPE = "code"
dataloader.train.aug_wrapper.COLOR_AUG_CODE = (
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
# "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
)
# hsv color aug
dataloader.train.aug_wrapper.AUG_HSV_PROB = 1.0
dataloader.train.aug_wrapper.HSV_H = 0.015
dataloader.train.aug_wrapper.HSV_S = 0.7
dataloader.train.aug_wrapper.HSV_V = 0.4
dataloader.train.aug_wrapper.FORMAT = "RGB"
optimizer = L(Ranger)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.001, # bs=64
# momentum=0.9,
weight_decay=0,
# nesterov=True,
)
train.total_epochs = 30
train.no_aug_epochs = 15
train.checkpointer = dict(period=2, max_to_keep=10)
test.test_dataset_names = DATASETS.TEST
test.augment = True
test.scales = (1, 0.75, 0.83, 1.12, 1.25)
test.conf_thr = 0.001
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
total_batch_size=1,
# total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
\ No newline at end of file
import os.path as osp
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
from .yolox_base import train, val, test, model, dataloader, optimizer, lr_config, DATASETS # noqa
from det.yolox.data import build_yolox_test_loader, ValTransform
from det.yolox.data.datasets import Base_DatasetFromList
from detectron2.data import get_detection_dataset_dicts
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.ranger import Ranger
train.update(
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
)
train.amp.enabled = True
model.backbone.depth = 1.33
model.backbone.width = 1.25
model.head.num_classes = 3
train.init_checkpoint = "pretrained_models/yolox/yolox_x.pth"
# datasets
DATASETS.TRAIN = ["tudl_pbr_train", "tudl_train_real"]
DATASETS.TEST = ["tudl_bop_test"]
dataloader.train.dataset.lst.names = DATASETS.TRAIN
dataloader.train.total_batch_size = 32
# color aug
dataloader.train.aug_wrapper.COLOR_AUG_PROB = 0.8
dataloader.train.aug_wrapper.COLOR_AUG_TYPE = "code"
dataloader.train.aug_wrapper.COLOR_AUG_CODE = (
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
# "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
)
# hsv color aug
dataloader.train.aug_wrapper.AUG_HSV_PROB = 1.0
dataloader.train.aug_wrapper.HSV_H = 0.015
dataloader.train.aug_wrapper.HSV_S = 0.7
dataloader.train.aug_wrapper.HSV_V = 0.4
dataloader.train.aug_wrapper.FORMAT = "RGB"
optimizer = L(Ranger)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.001, # bs=64
# momentum=0.9,
weight_decay=0,
# nesterov=True,
)
train.total_epochs = 30
train.no_aug_epochs = 15
train.checkpointer = dict(period=2, max_to_keep=10)
test.test_dataset_names = DATASETS.TEST
test.augment = True
test.scales = (1, 0.75, 0.83, 1.12, 1.25)
test.conf_thr = 0.001
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
total_batch_size=1,
# total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
\ No newline at end of file
import os.path as osp
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
from .yolox_base import train, val, test, model, dataloader, optimizer, lr_config, DATASETS # noqa
from det.yolox.data import build_yolox_test_loader, ValTransform
from det.yolox.data.datasets import Base_DatasetFromList
from detectron2.data import get_detection_dataset_dicts
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.ranger import Ranger
train.update(
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
)
train.amp.enabled = True
model.backbone.depth = 1.33
model.backbone.width = 1.25
model.head.num_classes = 21
train.init_checkpoint = "pretrained_models/yolox/yolox_x.pth"
# datasets
DATASETS.TRAIN = ["ycbv_train_pbr"]
DATASETS.TEST = ["ycbv_bop_test"]
dataloader.train.dataset.lst.names = DATASETS.TRAIN
dataloader.train.total_batch_size = 32
# color aug
dataloader.train.aug_wrapper.COLOR_AUG_PROB = 0.8
dataloader.train.aug_wrapper.COLOR_AUG_TYPE = "code"
dataloader.train.aug_wrapper.COLOR_AUG_CODE = (
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
# "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
)
# hsv color aug
dataloader.train.aug_wrapper.AUG_HSV_PROB = 1.0
dataloader.train.aug_wrapper.HSV_H = 0.015
dataloader.train.aug_wrapper.HSV_S = 0.7
dataloader.train.aug_wrapper.HSV_V = 0.4
dataloader.train.aug_wrapper.FORMAT = "RGB"
optimizer = L(Ranger)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.001, # bs=64
# momentum=0.9,
weight_decay=0,
# nesterov=True,
)
train.total_epochs = 30
train.no_aug_epochs = 15
train.checkpointer = dict(period=2, max_to_keep=10)
test.test_dataset_names = DATASETS.TEST
test.augment = True
test.scales = (1, 0.75, 0.83, 1.12, 1.25)
test.conf_thr = 0.001
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
total_batch_size=1,
# total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
\ No newline at end of file
import os.path as osp
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
from .yolox_base import train, val, test, model, dataloader, optimizer, lr_config, DATASETS # noqa
from det.yolox.data import build_yolox_test_loader, ValTransform
from det.yolox.data.datasets import Base_DatasetFromList
from detectron2.data import get_detection_dataset_dicts
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.ranger import Ranger
train.update(
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
)
train.amp.enabled = True
model.backbone.depth = 1.33
model.backbone.width = 1.25
model.head.num_classes = 21
train.init_checkpoint = "pretrained_models/yolox/yolox_x.pth"
# datasets
DATASETS.TRAIN = ["ycbv_train_pbr", "ycbv_train_real"]
DATASETS.TEST = ["ycbv_bop_test"]
dataloader.train.dataset.lst.names = DATASETS.TRAIN
dataloader.train.total_batch_size = 32
# color aug
dataloader.train.aug_wrapper.COLOR_AUG_PROB = 0.8
dataloader.train.aug_wrapper.COLOR_AUG_TYPE = "code"
dataloader.train.aug_wrapper.COLOR_AUG_CODE = (
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
# "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
)
# hsv color aug
dataloader.train.aug_wrapper.AUG_HSV_PROB = 1.0
dataloader.train.aug_wrapper.HSV_H = 0.015
dataloader.train.aug_wrapper.HSV_S = 0.7
dataloader.train.aug_wrapper.HSV_V = 0.4
dataloader.train.aug_wrapper.FORMAT = "RGB"
optimizer = L(Ranger)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.001, # bs=64
# momentum=0.9,
weight_decay=0,
# nesterov=True,
)
train.total_epochs = 30
train.no_aug_epochs = 15
train.checkpointer = dict(period=2, max_to_keep=10)
test.test_dataset_names = DATASETS.TEST
test.augment = True
test.scales = (1, 0.75, 0.83, 1.12, 1.25)
test.conf_thr = 0.001
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
total_batch_size=1,
# total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
from .utils import configure_module
configure_module()
__version__ = "0.1.0"
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Copyright (c) Megvii, Inc. and its affiliates.
from .data_augment import TrainTransform, ValTransform
from .data_prefetcher import DataPrefetcher
from .dataloading import (
DataLoader,
build_yolox_train_loader,
build_yolox_batch_data_loader,
build_yolox_test_loader,
)
from .dataloading import yolox_worker_init_reset_seed as worker_init_reset_seed
from .datasets import *
from .samplers import InfiniteSampler, YoloBatchSampler
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Copyright (c) Megvii, Inc. and its affiliates.
"""Data augmentation functionality. Passed as callable transformations to
Dataset classes.
The data augmentation procedures were interpreted from @weiliu89's SSD
paper http://arxiv.org/abs/1512.02325
"""
import math
import random
import cv2
import numpy as np
from det.yolox.utils import xyxy2cxcywh
def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5, source_format="BGR"):
r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
if source_format == "RGB":
hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_RGB2HSV))
else: # default BGR
hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
dtype = img.dtype # uint8
x = np.arange(0, 256, dtype=np.int16)
lut_hue = ((x * r[0]) % 180).astype(dtype)
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
if source_format == "RGB":
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB, dst=img) # no return needed
else:
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
# Histogram equalization
# if random.random() < 0.2:
# for i in range(3):
# img[:, :, i] = cv2.equalizeHist(img[:, :, i])
# def augment_hsv(img, hgain=5, sgain=30, vgain=30):
# hsv_augs = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] # random gains
# hsv_augs *= np.random.randint(0, 2, 3) # random selection of h, s, v
# hsv_augs = hsv_augs.astype(np.int16)
# img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.int16)
# img_hsv[..., 0] = (img_hsv[..., 0] + hsv_augs[0]) % 180
# img_hsv[..., 1] = np.clip(img_hsv[..., 1] + hsv_augs[1], 0, 255)
# img_hsv[..., 2] = np.clip(img_hsv[..., 2] + hsv_augs[2], 0, 255)
# cv2.cvtColor(img_hsv.astype(img.dtype), cv2.COLOR_HSV2BGR, dst=img) # no return needed
def get_aug_params(value, center=0):
if isinstance(value, float):
return random.uniform(center - value, center + value)
elif len(value) == 2:
return random.uniform(value[0], value[1])
else:
raise ValueError(
"Affine params should be either a sequence containing two values\
or single float values. Got {}".format(
value
)
)
def get_affine_matrix(
target_size,
degrees=10,
translate=0.1,
scales=0.1,
shear=10,
):
twidth, theight = target_size
# Rotation and Scale
angle = get_aug_params(degrees)
scale = get_aug_params(scales, center=1.0)
if scale <= 0.0:
raise ValueError("Argument scale should be positive")
R = cv2.getRotationMatrix2D(angle=angle, center=(0, 0), scale=scale)
M = np.ones([2, 3])
# Shear
shear_x = math.tan(get_aug_params(shear) * math.pi / 180)
shear_y = math.tan(get_aug_params(shear) * math.pi / 180)
M[0] = R[0] + shear_y * R[1]
M[1] = R[1] + shear_x * R[0]
# Translation
translation_x = get_aug_params(translate) * twidth # x translation (pixels)
translation_y = get_aug_params(translate) * theight # y translation (pixels)
M[0, 2] = translation_x
M[1, 2] = translation_y
return M, scale
def apply_affine_to_bboxes(targets, target_size, M, scale):
num_gts = len(targets)
# warp corner points
twidth, theight = target_size
corner_points = np.ones((4 * num_gts, 3))
corner_points[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(4 * num_gts, 2) # x1y1, x2y2, x1y2, x2y1
corner_points = corner_points @ M.T # apply affine transform
corner_points = corner_points.reshape(num_gts, 8)
# create new boxes
corner_xs = corner_points[:, 0::2]
corner_ys = corner_points[:, 1::2]
new_bboxes = (
np.concatenate((corner_xs.min(1), corner_ys.min(1), corner_xs.max(1), corner_ys.max(1))).reshape(4, num_gts).T
)
# clip boxes
new_bboxes[:, 0::2] = new_bboxes[:, 0::2].clip(0, twidth)
new_bboxes[:, 1::2] = new_bboxes[:, 1::2].clip(0, theight)
targets[:, :4] = new_bboxes
return targets
def random_affine(
img,
targets=(),
target_size=(640, 640),
degrees=10,
translate=0.1,
scales=0.1,
shear=10,
):
M, scale = get_affine_matrix(target_size, degrees, translate, scales, shear)
img = cv2.warpAffine(img, M, dsize=target_size, borderValue=(114, 114, 114))
# Transform label coordinates
if len(targets) > 0:
targets = apply_affine_to_bboxes(targets, target_size, M, scale)
return img, targets
def _mirror(image, boxes, prob=0.5):
_, width, _ = image.shape
if random.random() < prob:
image = image[:, ::-1]
boxes[:, 0::2] = width - boxes[:, 2::-2]
return image, boxes
def preproc(img, input_size, swap=(2, 0, 1)):
if len(img.shape) == 3:
padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
else:
padded_img = np.ones(input_size, dtype=np.uint8) * 114
r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
resized_img = cv2.resize(
img,
(int(img.shape[1] * r), int(img.shape[0] * r)),
interpolation=cv2.INTER_LINEAR,
).astype(np.uint8)
padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
padded_img = padded_img.transpose(swap)
padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
return padded_img, r
class TrainTransform:
def __init__(self, max_labels=50, flip_prob=0.5, hsv_prob=1.0):
self.max_labels = max_labels
self.flip_prob = flip_prob
self.hsv_prob = hsv_prob
def __call__(self, image, targets, input_dim):
boxes = targets[:, :4].copy()
labels = targets[:, 4].copy()
if len(boxes) == 0:
targets = np.zeros((self.max_labels, 5), dtype=np.float32)
image, r_o = preproc(image, input_dim)
return image, targets
image_o = image.copy()
targets_o = targets.copy()
height_o, width_o, _ = image_o.shape
boxes_o = targets_o[:, :4]
labels_o = targets_o[:, 4]
# bbox_o: [xyxy] to [c_x,c_y,w,h]
boxes_o = xyxy2cxcywh(boxes_o)
if random.random() < self.hsv_prob:
augment_hsv(image)
image_t, boxes = _mirror(image, boxes, self.flip_prob)
height, width, _ = image_t.shape
image_t, r_ = preproc(image_t, input_dim)
# boxes [xyxy] 2 [cx,cy,w,h]
boxes = xyxy2cxcywh(boxes)
boxes *= r_
mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1
boxes_t = boxes[mask_b]
labels_t = labels[mask_b]
if len(boxes_t) == 0:
image_t, r_o = preproc(image_o, input_dim)
boxes_o *= r_o
boxes_t = boxes_o
labels_t = labels_o
labels_t = np.expand_dims(labels_t, 1)
targets_t = np.hstack((labels_t, boxes_t))
padded_labels = np.zeros((self.max_labels, 5))
padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[: self.max_labels]
padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)
return image_t, padded_labels
class ValTransform:
"""Defines the transformations that should be applied to test PIL image for
input into the network.
dimension -> tensorize -> color adj
Arguments:
resize (int): input dimension to SSD
rgb_means ((int,int,int)): average RGB of the dataset
(104,117,123)
swap ((int,int,int)): final order of channels
Returns:
transform (transform) : callable transform to be applied to test/val
data
"""
def __init__(self, swap=(2, 0, 1), legacy=False):
self.swap = swap
self.legacy = legacy
# assume input is cv2 img for now
def __call__(self, img, res, input_size):
img, _ = preproc(img, input_size, self.swap)
if self.legacy:
img = img[::-1, :, :].copy()
img /= 255.0
img -= np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1)
img /= np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1)
return img, np.zeros((1, 5))
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Copyright (c) Megvii, Inc. and its affiliates.
import torch
class DataPrefetcher:
"""DataPrefetcher is inspired by code of following file:
https://github.com/NVIDIA/apex/blob/master/examples/imagenet/main_amp.py
It could speedup your pytorch dataloader. For more information, please check
https://github.com/NVIDIA/apex/issues/304#issuecomment-493562789.
"""
def __init__(self, loader):
self.loader_iter = iter(loader)
self.stream = torch.cuda.Stream()
self.input_cuda = self._input_cuda_for_image
self.record_stream = DataPrefetcher._record_stream_for_image
self.preload()
def preload(self):
try:
self.next_input, self.next_target, _, _, _ = next(self.loader_iter)
except StopIteration:
self.next_input = None
self.next_target = None
return
with torch.cuda.stream(self.stream):
self.input_cuda()
self.next_target = self.next_target.cuda(non_blocking=True)
def next(self):
torch.cuda.current_stream().wait_stream(self.stream)
input = self.next_input
target = self.next_target
if input is not None:
self.record_stream(input)
if target is not None:
target.record_stream(torch.cuda.current_stream())
self.preload()
return input, target
def _input_cuda_for_image(self):
self.next_input = self.next_input.cuda(non_blocking=True)
@staticmethod
def _record_stream_for_image(input):
input.record_stream(torch.cuda.current_stream())
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Copyright (c) Megvii, Inc. and its affiliates.
import os
import random
import uuid
import numpy as np
import torch
from torch.utils.data.dataloader import DataLoader as torchDataLoader
from torch.utils.data.dataloader import default_collate
import operator
from detectron2.data.build import (
AspectRatioGroupedDataset,
worker_init_reset_seed,
trivial_batch_collator,
InferenceSampler,
)
from core.utils.my_comm import get_world_size
from .samplers import YoloBatchSampler, InfiniteSampler
# from .datasets import Base_DatasetFromList
class DataLoader(torchDataLoader):
"""Lightnet dataloader that enables on the fly resizing of the images.
See :class:`torch.utils.data.DataLoader` for more information on the arguments.
Check more on the following website:
https://gitlab.com/EAVISE/lightnet/-/blob/master/lightnet/data/_dataloading.py
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.__initialized = False
shuffle = False
batch_sampler = None
if len(args) > 5:
shuffle = args[2]
sampler = args[3]
batch_sampler = args[4]
elif len(args) > 4:
shuffle = args[2]
sampler = args[3]
if "batch_sampler" in kwargs:
batch_sampler = kwargs["batch_sampler"]
elif len(args) > 3:
shuffle = args[2]
if "sampler" in kwargs:
sampler = kwargs["sampler"]
if "batch_sampler" in kwargs:
batch_sampler = kwargs["batch_sampler"]
else:
if "shuffle" in kwargs:
shuffle = kwargs["shuffle"]
if "sampler" in kwargs:
sampler = kwargs["sampler"]
if "batch_sampler" in kwargs:
batch_sampler = kwargs["batch_sampler"]
# Use custom BatchSampler
if batch_sampler is None:
if sampler is None:
if shuffle:
sampler = torch.utils.data.sampler.RandomSampler(self.dataset)
# sampler = torch.utils.data.DistributedSampler(self.dataset)
else:
sampler = torch.utils.data.sampler.SequentialSampler(self.dataset)
batch_sampler = YoloBatchSampler(
sampler,
self.batch_size,
self.drop_last,
input_dimension=self.dataset.input_dim,
)
# batch_sampler = IterationBasedBatchSampler(batch_sampler, num_iterations =
self.batch_sampler = batch_sampler
self.__initialized = True
def close_mosaic(self):
self.batch_sampler.mosaic = False
# def list_collate(batch):
# """
# Function that collates lists or tuples together into one list (of lists/tuples).
# Use this as the collate function in a Dataloader, if you want to have a list of
# items as an output, as opposed to tensors (eg. Brambox.boxes).
# """
# items = list(zip(*batch))
# for i in range(len(items)):
# if isinstance(items[i][0], (list, tuple)):
# items[i] = list(items[i])
# else:
# items[i] = default_collate(items[i])
# return items
def build_yolox_batch_data_loader(
dataset, sampler, total_batch_size, *, aspect_ratio_grouping=False, num_workers=0, pin_memory=False
):
"""
Build a batched dataloader. The main differences from `torch.utils.data.DataLoader` are:
1. support aspect ratio grouping options
2. use no "batch collation", because this is common for detection training
Args:
dataset (torch.utils.data.Dataset): map-style PyTorch dataset. Can be indexed.
sampler (torch.utils.data.sampler.Sampler): a sampler that produces indices
total_batch_size, aspect_ratio_grouping, num_workers): see
:func:`build_detection_train_loader`.
Returns:
iterable[list]. Length of each list is the batch size of the current
GPU. Each element in the list comes from the dataset.
"""
world_size = get_world_size()
assert (
total_batch_size > 0 and total_batch_size % world_size == 0
), "Total batch size ({}) must be divisible by the number of gpus ({}).".format(total_batch_size, world_size)
batch_size = total_batch_size // world_size
if aspect_ratio_grouping:
data_loader = torch.utils.data.DataLoader(
dataset,
sampler=sampler,
num_workers=num_workers,
batch_sampler=None,
collate_fn=operator.itemgetter(0), # don't batch, but yield individual elements
worker_init_fn=worker_init_reset_seed,
) # yield individual mapped dict
return AspectRatioGroupedDataset(data_loader, batch_size)
else:
# batch_sampler = torch.utils.data.sampler.BatchSampler(
# sampler, batch_size, drop_last=True
# ) # drop_last so the batch always have the same size
if hasattr(dataset, "enable_mosaic"):
mosaic = dataset.enable_mosaic
else:
mosaic = False
batch_sampler = YoloBatchSampler(
mosaic=mosaic,
sampler=sampler,
batch_size=batch_size,
drop_last=False, # NOTE: different to d2
# input_dimension=dataset.input_dim,
)
return DataLoader(
dataset,
num_workers=num_workers,
batch_sampler=batch_sampler,
# collate_fn=trivial_batch_collator, # TODO: use this when item is changed to dict
worker_init_fn=worker_init_reset_seed,
pin_memory=pin_memory,
)
def build_yolox_train_loader(
dataset,
*,
aug_wrapper,
total_batch_size,
sampler=None,
aspect_ratio_grouping=False,
num_workers=0,
pin_memory=False,
seed=None
):
"""Build a dataloader for object detection with some default features. This
interface is experimental.
Args:
dataset (torch.utils.data.Dataset): Base_DatasetFromList
aug_wrapper (callable): MosaciDetection
total_batch_size (int): total batch size across all workers. Batching
simply puts data into a list.
sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces
indices to be applied on ``dataset``. Default to :class:`TrainingSampler`,
which coordinates an infinite random shuffle sequence across all workers.
aspect_ratio_grouping (bool): whether to group images with similar
aspect ratio for efficiency. When enabled, it requires each
element in dataset be a dict with keys "width" and "height".
num_workers (int): number of parallel data loading workers
Returns:
torch.utils.data.DataLoader:
a dataloader. Each output from it is a ``list[mapped_element]`` of length
``total_batch_size / num_workers``, where ``mapped_element`` is produced
by the ``mapper``.
"""
if aug_wrapper is not None:
# MosaicDetection (mosaic, mixup, other augs)
dataset = aug_wrapper.init_dataset(dataset)
if sampler is None:
# sampler = TrainingSampler(len(dataset))
sampler = InfiniteSampler(len(dataset), seed=0 if seed is None else seed)
assert isinstance(sampler, torch.utils.data.sampler.Sampler)
return build_yolox_batch_data_loader(
dataset,
sampler,
total_batch_size,
aspect_ratio_grouping=aspect_ratio_grouping,
num_workers=num_workers,
pin_memory=pin_memory,
)
def build_yolox_test_loader(
dataset, *, aug_wrapper=None, total_batch_size=1, sampler=None, num_workers=0, pin_memory=False
):
"""Similar to `build_detection_train_loader`, but uses a batch size of 1,
and :class:`InferenceSampler`. This sampler coordinates all workers to
produce the exact set of all samples. This interface is experimental.
Args:
dataset (list or torch.utils.data.Dataset): a list of dataset dicts,
or a map-style pytorch dataset. They can be obtained by using
:func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
aug_wrapper (callable): MosaciDetection
total_batch_size (int): total batch size across all workers. Batching
simply puts data into a list. Default test batch size is 1.
sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces
indices to be applied on ``dataset``. Default to :class:`InferenceSampler`,
which splits the dataset across all workers.
num_workers (int): number of parallel data loading workers
Returns:
DataLoader: a torch DataLoader, that loads the given detection
dataset, with test-time transformation and batching.
Examples:
::
data_loader = build_detection_test_loader(
DatasetRegistry.get("my_test"),
mapper=DatasetMapper(...))
# or, instantiate with a CfgNode:
data_loader = build_detection_test_loader(cfg, "my_test")
"""
if aug_wrapper is not None:
# MosaicDetection (mosaic, mixup, other augs)
dataset = aug_wrapper.init_dataset(dataset)
world_size = get_world_size()
batch_size = total_batch_size // world_size
if sampler is None:
sampler = InferenceSampler(len(dataset))
# Always use 1 image per worker during inference since this is the
# standard when reporting inference time in papers.
batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, batch_size, drop_last=False)
data_loader = torch.utils.data.DataLoader(
dataset,
# batch_size=batch_size,
num_workers=num_workers,
batch_sampler=batch_sampler,
# collate_fn=trivial_batch_collator,
pin_memory=pin_memory,
)
return data_loader
def yolox_worker_init_reset_seed(worker_id):
seed = uuid.uuid4().int % 2**32
random.seed(seed)
torch.set_rng_state(torch.manual_seed(seed).get_state())
np.random.seed(seed)
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Copyright (c) Megvii, Inc. and its affiliates.
from .coco import COCODataset
from .coco_classes import COCO_CLASSES
from .datasets_wrapper import ConcatDataset, Dataset, MixConcatDataset
from .mosaicdetection import MosaicDetection
from .voc import VOCDetection
from .base_data_from_list import Base_DatasetFromList
This diff is collapsed.
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Copyright (c) Megvii, Inc. and its affiliates.
import os
from loguru import logger
import cv2
import numpy as np
from pycocotools.coco import COCO
from det.yolox.utils.setup_env import get_yolox_datadir
from .datasets_wrapper import Dataset
class COCODataset(Dataset):
"""COCO dataset class."""
def __init__(
self,
data_dir=None,
json_file="instances_train2017.json",
name="train2017",
img_size=(416, 416),
preproc=None,
cache=False,
):
"""COCO dataset initialization.
Annotation data are read into memory by COCO API.
Args:
data_dir (str): dataset root directory
json_file (str): COCO json file name
name (str): COCO data name (e.g. 'train2017' or 'val2017')
img_size (int): target image size after pre-processing
preproc: data augmentation strategy
"""
super().__init__(img_size)
if data_dir is None:
data_dir = os.path.join(get_yolox_datadir(), "coco")
self.data_dir = data_dir
self.json_file = json_file
self.coco = COCO(os.path.join(self.data_dir, "annotations", self.json_file))
self.ids = self.coco.getImgIds()
self.class_ids = sorted(self.coco.getCatIds())
cats = self.coco.loadCats(self.coco.getCatIds())
self._classes = tuple([c["name"] for c in cats])
self.imgs = None
self.name = name
self.img_size = img_size
self.preproc = preproc
self.annotations = self._load_coco_annotations()
if cache:
self._cache_images()
def __len__(self):
return len(self.ids)
def __del__(self):
del self.imgs
def _load_coco_annotations(self):
return [self.load_anno_from_ids(_ids) for _ids in self.ids]
def _cache_images(self):
logger.warning(
"\n********************************************************************************\n"
"You are using cached images in RAM to accelerate training.\n"
"This requires large system RAM.\n"
"Make sure you have 200G+ RAM and 136G available disk space for training COCO.\n"
"********************************************************************************\n"
)
max_h = self.img_size[0]
max_w = self.img_size[1]
cache_file = self.data_dir + "/img_resized_cache_" + self.name + ".array"
if not os.path.exists(cache_file):
logger.info("Caching images for the first time. This might take about 20 minutes for COCO")
self.imgs = np.memmap(
cache_file,
shape=(len(self.ids), max_h, max_w, 3),
dtype=np.uint8,
mode="w+",
)
from tqdm import tqdm
from multiprocessing.pool import ThreadPool
NUM_THREADs = min(8, os.cpu_count())
loaded_images = ThreadPool(NUM_THREADs).imap(
lambda x: self.load_resized_img(x),
range(len(self.annotations)),
)
pbar = tqdm(enumerate(loaded_images), total=len(self.annotations))
for k, out in pbar:
self.imgs[k][: out.shape[0], : out.shape[1], :] = out.copy()
self.imgs.flush()
pbar.close()
else:
logger.warning(
"You are using cached imgs! Make sure your dataset is not changed!!\n"
"Everytime the self.input_size is changed in your exp file, you need to delete\n"
"the cached data and re-generate them.\n"
)
logger.info("Loading cached imgs...")
self.imgs = np.memmap(
cache_file,
shape=(len(self.ids), max_h, max_w, 3),
dtype=np.uint8,
mode="r+",
)
def load_anno_from_ids(self, id_):
im_ann = self.coco.loadImgs(id_)[0]
width = im_ann["width"]
height = im_ann["height"]
anno_ids = self.coco.getAnnIds(imgIds=[int(id_)], iscrowd=False)
annotations = self.coco.loadAnns(anno_ids)
objs = []
for obj in annotations:
x1 = np.max((0, obj["bbox"][0]))
y1 = np.max((0, obj["bbox"][1]))
x2 = np.min((width, x1 + np.max((0, obj["bbox"][2]))))
y2 = np.min((height, y1 + np.max((0, obj["bbox"][3]))))
if obj["area"] > 0 and x2 >= x1 and y2 >= y1:
obj["clean_bbox"] = [x1, y1, x2, y2]
objs.append(obj)
num_objs = len(objs)
res = np.zeros((num_objs, 5))
for ix, obj in enumerate(objs):
cls = self.class_ids.index(obj["category_id"])
res[ix, 0:4] = obj["clean_bbox"]
res[ix, 4] = cls
r = min(self.img_size[0] / height, self.img_size[1] / width)
res[:, :4] *= r
img_info = (height, width)
resized_info = (int(height * r), int(width * r))
file_name = im_ann["file_name"] if "file_name" in im_ann else "{:012}".format(id_) + ".jpg"
return (res, img_info, resized_info, file_name)
def load_anno(self, index):
return self.annotations[index][0]
def load_resized_img(self, index):
img = self.load_image(index)
r = min(self.img_size[0] / img.shape[0], self.img_size[1] / img.shape[1])
resized_img = cv2.resize(
img,
(int(img.shape[1] * r), int(img.shape[0] * r)),
interpolation=cv2.INTER_LINEAR,
).astype(np.uint8)
return resized_img
def load_image(self, index):
file_name = self.annotations[index][3]
img_file = os.path.join(self.data_dir, self.name, file_name)
img = cv2.imread(img_file)
assert img is not None
return img
def pull_item(self, index):
id_ = self.ids[index]
res, img_info, resized_info, _ = self.annotations[index]
if self.imgs is not None:
pad_img = self.imgs[index]
img = pad_img[: resized_info[0], : resized_info[1], :].copy()
else:
img = self.load_resized_img(index)
return img, res.copy(), img_info, np.array([id_])
@Dataset.mosaic_getitem
def __getitem__(self, index):
"""One image / label pair for the given index is picked up and pre-
processed.
Args:
index (int): data index
Returns:
img (numpy.ndarray): pre-processed image
padded_labels (torch.Tensor): pre-processed label data.
The shape is :math:`[max_labels, 5]`.
each label consists of [class, xc, yc, w, h]:
class (float): class index.
xc, yc (float) : center of bbox whose values range from 0 to 1.
w, h (float) : size of bbox whose values range from 0 to 1.
info_img : tuple of h, w.
h, w (int): original shape of the image
img_id (int): same as the input index. Used for evaluation.
"""
img, target, img_info, img_id = self.pull_item(index)
if self.preproc is not None:
img, target = self.preproc(img, target, self.input_dim)
return img, target, img_info, img_id
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment