Skip to content
Snippets Groups Projects
Commit de5eb6a7 authored by asamsone's avatar asamsone
Browse files

added files for fruitbin

parent 57283a76
No related branches found
No related tags found
No related merge requests found
Showing
with 4103 additions and 0 deletions
# about 3 days
_base_ = ["../../_base_/gdrn_base.py"]
OUTPUT_DIR = "output/gdrn/ycbv/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_ycbv"
INPUT = dict(
DZI_PAD_SCALE=1.5,
TRUNCATE_FG=True,
CHANGE_BG_PROB=0.5,
COLOR_AUG_PROB=0.8,
COLOR_AUG_TYPE="code",
COLOR_AUG_CODE=(
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
"Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
),
)
SOLVER = dict(
IMS_PER_BATCH=48,
TOTAL_EPOCHS=40, # 10
LR_SCHEDULER_NAME="flat_and_anneal",
ANNEAL_METHOD="cosine", # "cosine"
ANNEAL_POINT=0.72,
OPTIMIZER_CFG=dict(_delete_=True, type="Ranger", lr=8e-4, weight_decay=0.01),
WEIGHT_DECAY=0.0,
WARMUP_FACTOR=0.001,
WARMUP_ITERS=1000,
)
DATASETS = dict(
TRAIN=("ycbv_train_real", "ycbv_train_pbr"),
TEST=("ycbv_test",),
DET_FILES_TEST=("datasets/BOP_DATASETS/ycbv/test/test_bboxes/yolox_x_640_ycbv_real_pbr_ycbv_bop_test.json",),
SYM_OBJS=[
"024_bowl",
"036_wood_block",
"051_large_clamp",
"052_extra_large_clamp",
"061_foam_brick",
], # used for custom evalutor
)
DATALOADER = dict(
# Number of data loading threads
NUM_WORKERS=8,
FILTER_VISIB_THR=0.3,
)
MODEL = dict(
LOAD_DETS_TEST=True,
PIXEL_MEAN=[0.0, 0.0, 0.0],
PIXEL_STD=[255.0, 255.0, 255.0],
BBOX_TYPE="AMODAL_CLIP", # VISIB or AMODAL
POSE_NET=dict(
NAME="GDRN_double_mask",
XYZ_ONLINE=True,
NUM_CLASSES=21,
BACKBONE=dict(
FREEZE=False,
PRETRAINED="timm",
INIT_CFG=dict(
type="timm/convnext_base",
pretrained=True,
in_chans=3,
features_only=True,
out_indices=(3,),
),
),
## geo head: Mask, XYZ, Region
GEO_HEAD=dict(
FREEZE=False,
INIT_CFG=dict(
type="TopDownDoubleMaskXyzRegionHead",
in_dim=1024, # this is num out channels of backbone conv feature
),
NUM_REGIONS=64,
XYZ_CLASS_AWARE=True,
MASK_CLASS_AWARE=True,
REGION_CLASS_AWARE=True,
),
PNP_NET=dict(
INIT_CFG=dict(norm="GN", act="gelu"),
REGION_ATTENTION=True,
WITH_2D_COORD=True,
ROT_TYPE="allo_rot6d",
TRANS_TYPE="centroid_z",
),
LOSS_CFG=dict(
# xyz loss ----------------------------
XYZ_LOSS_TYPE="L1", # L1 | CE_coor
XYZ_LOSS_MASK_GT="visib", # trunc | visib | obj
XYZ_LW=1.0,
# mask loss ---------------------------
MASK_LOSS_TYPE="L1", # L1 | BCE | CE
MASK_LOSS_GT="trunc", # trunc | visib | gt
MASK_LW=1.0,
# full mask loss ---------------------------
FULL_MASK_LOSS_TYPE="L1", # L1 | BCE | CE
FULL_MASK_LW=1.0,
# region loss -------------------------
REGION_LOSS_TYPE="CE", # CE
REGION_LOSS_MASK_GT="visib", # trunc | visib | obj
REGION_LW=1.0,
# pm loss --------------
PM_LOSS_SYM=True, # NOTE: sym loss
PM_R_ONLY=True, # only do R loss in PM
PM_LW=1.0,
# centroid loss -------
CENTROID_LOSS_TYPE="L1",
CENTROID_LW=1.0,
# z loss -----------
Z_LOSS_TYPE="L1",
Z_LW=1.0,
),
),
)
VAL = dict(
DATASET_NAME="ycbv",
SPLIT_TYPE="",
SCRIPT_PATH="lib/pysixd/scripts/eval_pose_results_more.py",
TARGETS_FILENAME="test_targets_bop19.json",
ERROR_TYPES="vsd,mspd,mssd",
USE_BOP=True, # whether to use bop toolkit
)
TEST = dict(EVAL_PERIOD=0, VIS=False, TEST_BBOX_TYPE="est") # gt | est
_base_ = ["../../../_base_/gdrn_base.py"]
OUTPUT_DIR = "output/gdrn/ycbvPbrSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_ycbv/002_master_chef_can"
INPUT = dict(
DZI_PAD_SCALE=1.5,
TRUNCATE_FG=True,
CHANGE_BG_PROB=0.5,
COLOR_AUG_PROB=0.8,
COLOR_AUG_TYPE="code",
COLOR_AUG_CODE=(
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
"Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
),
)
SOLVER = dict(
IMS_PER_BATCH=36,
TOTAL_EPOCHS=100,
LR_SCHEDULER_NAME="flat_and_anneal",
ANNEAL_METHOD="cosine", # "cosine"
ANNEAL_POINT=0.72,
OPTIMIZER_CFG=dict(_delete_=True, type="Ranger", lr=8e-4, weight_decay=0.01),
WEIGHT_DECAY=0.0,
WARMUP_FACTOR=0.001,
WARMUP_ITERS=1000,
)
DATASETS = dict(
TRAIN=("ycbv_002_master_chef_can_train_pbr",),
TEST=("ycbv_test",),
DET_FILES_TEST=("datasets/BOP_DATASETS/ycbv/test/test_bboxes/yolox_x_640_ycbv_pbr_ycbv_bop_test.json",),
SYM_OBJS=[
"024_bowl",
"036_wood_block",
"051_large_clamp",
"052_extra_large_clamp",
"061_foam_brick",
], # used for custom evalutor
)
DATALOADER = dict(
# Number of data loading threads
NUM_WORKERS=8,
FILTER_VISIB_THR=0.3,
)
MODEL = dict(
LOAD_DETS_TEST=True,
PIXEL_MEAN=[0.0, 0.0, 0.0],
PIXEL_STD=[255.0, 255.0, 255.0],
BBOX_TYPE="AMODAL_CLIP", # VISIB or AMODAL
POSE_NET=dict(
NAME="GDRN_double_mask",
XYZ_ONLINE=True,
BACKBONE=dict(
FREEZE=False,
PRETRAINED="timm",
INIT_CFG=dict(
type="timm/convnext_base",
pretrained=True,
in_chans=3,
features_only=True,
out_indices=(3,),
),
),
## geo head: Mask, XYZ, Region
GEO_HEAD=dict(
FREEZE=False,
INIT_CFG=dict(
type="TopDownDoubleMaskXyzRegionHead",
in_dim=1024, # this is num out channels of backbone conv feature
),
NUM_REGIONS=64,
),
PNP_NET=dict(
INIT_CFG=dict(norm="GN", act="gelu"),
REGION_ATTENTION=True,
WITH_2D_COORD=True,
ROT_TYPE="allo_rot6d",
TRANS_TYPE="centroid_z",
),
LOSS_CFG=dict(
# xyz loss ----------------------------
XYZ_LOSS_TYPE="L1", # L1 | CE_coor
XYZ_LOSS_MASK_GT="visib", # trunc | visib | obj
XYZ_LW=1.0,
# mask loss ---------------------------
MASK_LOSS_TYPE="L1", # L1 | BCE | CE
MASK_LOSS_GT="trunc", # trunc | visib | gt
MASK_LW=1.0,
# full mask loss ---------------------------
FULL_MASK_LOSS_TYPE="L1", # L1 | BCE | CE
FULL_MASK_LW=1.0,
# region loss -------------------------
REGION_LOSS_TYPE="CE", # CE
REGION_LOSS_MASK_GT="visib", # trunc | visib | obj
REGION_LW=1.0,
# pm loss --------------
PM_LOSS_SYM=True, # NOTE: sym loss
PM_R_ONLY=True, # only do R loss in PM
PM_LW=1.0,
# centroid loss -------
CENTROID_LOSS_TYPE="L1",
CENTROID_LW=1.0,
# z loss -----------
Z_LOSS_TYPE="L1",
Z_LW=1.0,
),
),
)
VAL = dict(
DATASET_NAME="ycbv",
SPLIT_TYPE="",
SCRIPT_PATH="lib/pysixd/scripts/eval_pose_results_more.py",
TARGETS_FILENAME="test_targets_bop19.json",
ERROR_TYPES="vsd,mspd,mssd",
USE_BOP=True, # whether to use bop toolkit
)
TEST = dict(EVAL_PERIOD=0, VIS=False, TEST_BBOX_TYPE="est") # gt | est
_base_ = "./002_master_chef_can.py"
OUTPUT_DIR = "output/gdrn/ycbvPbrSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_ycbv/003_cracker_box"
DATASETS = dict(TRAIN=("ycbv_003_cracker_box_train_pbr",))
_base_ = "./002_master_chef_can.py"
OUTPUT_DIR = "output/gdrn/ycbvPbrSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_ycbv/004_sugar_box"
DATASETS = dict(TRAIN=("ycbv_004_sugar_box_train_pbr",))
_base_ = ["../../../_base_/gdrn_base.py"]
OUTPUT_DIR = "output/gdrn/ycbvSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_ycbv/002_master_chef_can"
INPUT = dict(
DZI_PAD_SCALE=1.5,
TRUNCATE_FG=True,
CHANGE_BG_PROB=0.5,
COLOR_AUG_PROB=0.8,
COLOR_AUG_TYPE="code",
COLOR_AUG_CODE=(
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
"Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
),
)
SOLVER = dict(
IMS_PER_BATCH=36,
TOTAL_EPOCHS=100,
LR_SCHEDULER_NAME="flat_and_anneal",
ANNEAL_METHOD="cosine", # "cosine"
ANNEAL_POINT=0.72,
OPTIMIZER_CFG=dict(_delete_=True, type="Ranger", lr=8e-4, weight_decay=0.01),
WEIGHT_DECAY=0.0,
WARMUP_FACTOR=0.001,
WARMUP_ITERS=1000,
)
DATASETS = dict(
TRAIN=("ycbv_002_master_chef_can_train_pbr", "ycbv_002_master_chef_can_train_real"),
TEST=("ycbv_test",),
DET_FILES_TEST=("datasets/BOP_DATASETS/ycbv/test/test_bboxes/yolox_x_640_ycbv_real_pbr_ycbv_bop_test.json",),
SYM_OBJS=[
"024_bowl",
"036_wood_block",
"051_large_clamp",
"052_extra_large_clamp",
"061_foam_brick",
], # used for custom evalutor
)
DATALOADER = dict(
# Number of data loading threads
NUM_WORKERS=8,
FILTER_VISIB_THR=0.3,
)
MODEL = dict(
LOAD_DETS_TEST=True,
PIXEL_MEAN=[0.0, 0.0, 0.0],
PIXEL_STD=[255.0, 255.0, 255.0],
BBOX_TYPE="AMODAL_CLIP", # VISIB or AMODAL
POSE_NET=dict(
NAME="GDRN_double_mask",
XYZ_ONLINE=True,
BACKBONE=dict(
FREEZE=False,
PRETRAINED="timm",
INIT_CFG=dict(
type="timm/convnext_base",
pretrained=True,
in_chans=3,
features_only=True,
out_indices=(3,),
),
),
## geo head: Mask, XYZ, Region
GEO_HEAD=dict(
FREEZE=False,
INIT_CFG=dict(
type="TopDownDoubleMaskXyzRegionHead",
in_dim=1024, # this is num out channels of backbone conv feature
),
NUM_REGIONS=64,
),
PNP_NET=dict(
INIT_CFG=dict(norm="GN", act="gelu"),
REGION_ATTENTION=True,
WITH_2D_COORD=True,
ROT_TYPE="allo_rot6d",
TRANS_TYPE="centroid_z",
),
LOSS_CFG=dict(
# xyz loss ----------------------------
XYZ_LOSS_TYPE="L1", # L1 | CE_coor
XYZ_LOSS_MASK_GT="visib", # trunc | visib | obj
XYZ_LW=1.0,
# mask loss ---------------------------
MASK_LOSS_TYPE="L1", # L1 | BCE | CE
MASK_LOSS_GT="trunc", # trunc | visib | gt
MASK_LW=1.0,
# full mask loss ---------------------------
FULL_MASK_LOSS_TYPE="L1", # L1 | BCE | CE
FULL_MASK_LW=1.0,
# region loss -------------------------
REGION_LOSS_TYPE="CE", # CE
REGION_LOSS_MASK_GT="visib", # trunc | visib | obj
REGION_LW=1.0,
# pm loss --------------
PM_LOSS_SYM=True, # NOTE: sym loss
PM_R_ONLY=True, # only do R loss in PM
PM_LW=1.0,
# centroid loss -------
CENTROID_LOSS_TYPE="L1",
CENTROID_LW=1.0,
# z loss -----------
Z_LOSS_TYPE="L1",
Z_LW=1.0,
),
),
)
VAL = dict(
DATASET_NAME="ycbv",
SPLIT_TYPE="",
SCRIPT_PATH="lib/pysixd/scripts/eval_pose_results_more.py",
TARGETS_FILENAME="test_targets_bop19.json",
ERROR_TYPES="vsd,mspd,mssd",
USE_BOP=True, # whether to use bop toolkit
)
TEST = dict(EVAL_PERIOD=0, VIS=False, TEST_BBOX_TYPE="est") # gt | est
_base_ = "./002_master_chef_can.py"
OUTPUT_DIR = "output/gdrn/ycbvSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_ycbv/003_cracker_box"
DATASETS = dict(TRAIN=("ycbv_003_cracker_box_train_pbr", "ycbv_003_cracker_box_train_real"))
_base_ = "./002_master_chef_can.py"
OUTPUT_DIR = "output/gdrn/ycbvSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_ycbv/004_sugar_box"
DATASETS = dict(TRAIN=("ycbv_004_sugar_box_train_pbr", "ycbv_004_sugar_box_train_real"))
import os.path as osp
import torch
from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params
from .yolox_base import train, val, test, model, dataloader, optimizer, lr_config, DATASETS # noqa
from det.yolox.data import build_yolox_test_loader, ValTransform
from det.yolox.data.datasets import Base_DatasetFromList
from detectron2.data import get_detection_dataset_dicts
from det.yolox.evaluators import YOLOX_COCOEvaluator
from lib.torch_utils.solver.ranger import Ranger
train.update(
output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3],
exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py
)
train.amp.enabled = True
model.backbone.depth = 1.33
model.backbone.width = 1.25
model.head.num_classes = 21
train.init_checkpoint = "pretrained_models/yolox/yolox_x.pth"
# datasets
DATASETS.TRAIN = ["ycbv_train_pbr"]
DATASETS.TEST = ["ycbv_bop_test"]
dataloader.train.dataset.lst.names = DATASETS.TRAIN
dataloader.train.total_batch_size = 32
# color aug
dataloader.train.aug_wrapper.COLOR_AUG_PROB = 0.8
dataloader.train.aug_wrapper.COLOR_AUG_TYPE = "code"
dataloader.train.aug_wrapper.COLOR_AUG_CODE = (
"Sequential(["
# Sometimes(0.5, PerspectiveTransform(0.05)),
# Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))),
# Sometimes(0.5, Affine(scale=(1.0, 1.2))),
"Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) ),"
"Sometimes(0.4, GaussianBlur((0., 3.))),"
"Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.))),"
"Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.))),"
"Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.))),"
"Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.))),"
"Sometimes(0.5, Add((-25, 25), per_channel=0.3)),"
"Sometimes(0.3, Invert(0.2, per_channel=True)),"
"Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5)),"
"Sometimes(0.5, Multiply((0.6, 1.4))),"
"Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True)),"
"Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3)),"
# "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det
"], random_order=True)"
# cosy+aae
)
# hsv color aug
dataloader.train.aug_wrapper.AUG_HSV_PROB = 1.0
dataloader.train.aug_wrapper.HSV_H = 0.015
dataloader.train.aug_wrapper.HSV_S = 0.7
dataloader.train.aug_wrapper.HSV_V = 0.4
dataloader.train.aug_wrapper.FORMAT = "RGB"
optimizer = L(Ranger)(
params=L(get_default_optimizer_params)(
# params.model is meant to be set to the model object, before instantiating
# the optimizer.
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
lr=0.001, # bs=64
# momentum=0.9,
weight_decay=0,
# nesterov=True,
)
train.total_epochs = 30
train.no_aug_epochs = 15
train.checkpointer = dict(period=2, max_to_keep=10)
test.test_dataset_names = DATASETS.TEST
test.augment = True
test.scales = (1, 0.75, 0.83, 1.12, 1.25)
test.conf_thr = 0.001
dataloader.test = [
L(build_yolox_test_loader)(
dataset=L(Base_DatasetFromList)(
split="test",
lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False),
img_size="${test.test_size}",
preproc=L(ValTransform)(
legacy=False,
),
),
total_batch_size=1,
# total_batch_size=64,
num_workers=4,
pin_memory=True,
)
for test_dataset_name in test.test_dataset_names
]
dataloader.evaluator = [
L(YOLOX_COCOEvaluator)(
dataset_name=test_dataset_name,
filter_scene=False,
)
for test_dataset_name in test.test_dataset_names
]
import hashlib
import logging
import os
import os.path as osp
import sys
cur_dir = osp.dirname(osp.abspath(__file__))
PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../.."))
sys.path.insert(0, PROJ_ROOT)
import time
from collections import OrderedDict
import mmcv
import numpy as np
from tqdm import tqdm
from transforms3d.quaternions import mat2quat, quat2mat
import ref
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode
from lib.pysixd import inout, misc
from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask
from lib.utils.utils import dprint, iprint, lazy_property
logger = logging.getLogger(__name__)
DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
class YCBV_BOP_TEST_Dataset:
"""ycbv bop test."""
def __init__(self, data_cfg):
"""
Set with_depth and with_masks default to True,
and decide whether to load them into dataloader/network later
with_masks:
"""
self.name = data_cfg["name"]
self.data_cfg = data_cfg
self.objs = data_cfg["objs"] # selected objects
# all classes are self.objs, but this enables us to evaluate on selected objs
self.select_objs = data_cfg.get("select_objs", self.objs)
self.ann_file = data_cfg["ann_file"] # json file with scene_id and im_id items
self.dataset_root = data_cfg["dataset_root"] # BOP_DATASETS/ycbv/test
self.models_root = data_cfg["models_root"] # BOP_DATASETS/ycbv/models
self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001
self.with_masks = data_cfg["with_masks"] # True (load masks but may not use it)
self.with_depth = data_cfg["with_depth"] # True (load depth path here, but may not use it)
self.height = data_cfg["height"] # 480
self.width = data_cfg["width"] # 640
self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache")) # .cache
self.use_cache = data_cfg.get("use_cache", True)
self.num_to_load = data_cfg["num_to_load"] # -1
self.filter_invalid = data_cfg["filter_invalid"]
##################################################
# NOTE: careful! Only the selected objects
self.cat_ids = [cat_id for cat_id, obj_name in ref.ycbv.id2obj.items() if obj_name in self.objs]
# map selected objs to [0, num_objs-1]
self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map
self.label2cat = {label: cat for cat, label in self.cat2label.items()}
self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
##########################################################
def __call__(self):
"""Load light-weight instance annotations of all images into a list of
dicts in Detectron2 format.
Do not load heavy data into memory in this file, since we will
load the annotations of all images into memory.
"""
# cache the dataset_dicts to avoid loading masks from files
hashed_file_name = hashlib.md5(
(
"".join([str(fn) for fn in self.objs])
+ "dataset_dicts_{}_{}_{}_{}_{}".format(
self.name,
self.dataset_root,
self.with_masks,
self.with_depth,
__name__,
)
).encode("utf-8")
).hexdigest()
cache_path = osp.join(
self.cache_dir,
"dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name),
)
if osp.exists(cache_path) and self.use_cache:
logger.info("load cached dataset dicts from {}".format(cache_path))
return mmcv.load(cache_path)
t_start = time.perf_counter()
logger.info("loading dataset dicts: {}".format(self.name))
self.num_instances_without_valid_segmentation = 0
self.num_instances_without_valid_box = 0
dataset_dicts = [] # ######################################################
im_id_global = 0
if True:
targets = mmcv.load(self.ann_file)
scene_im_ids = [(item["scene_id"], item["im_id"]) for item in targets]
scene_im_ids = sorted(list(set(scene_im_ids)))
# load infos for each scene
gt_dicts = {}
gt_info_dicts = {}
cam_dicts = {}
for scene_id, im_id in scene_im_ids:
scene_root = osp.join(self.dataset_root, f"{scene_id:06d}")
if scene_id not in gt_dicts:
gt_dicts[scene_id] = mmcv.load(osp.join(scene_root, "scene_gt.json"))
if scene_id not in gt_info_dicts:
gt_info_dicts[scene_id] = mmcv.load(
osp.join(scene_root, "scene_gt_info.json")
) # bbox_obj, bbox_visib
if scene_id not in cam_dicts:
cam_dicts[scene_id] = mmcv.load(osp.join(scene_root, "scene_camera.json"))
for scene_id, im_id in tqdm(scene_im_ids):
str_im_id = str(im_id)
scene_root = osp.join(self.dataset_root, f"{scene_id:06d}")
rgb_path = osp.join(scene_root, "rgb/{:06d}.png").format(im_id)
assert osp.exists(rgb_path), rgb_path
depth_path = osp.join(scene_root, "depth/{:06d}.png".format(im_id))
scene_id = int(rgb_path.split("/")[-3])
cam = np.array(cam_dicts[scene_id][str_im_id]["cam_K"], dtype=np.float32).reshape(3, 3)
depth_factor = 1000.0 / cam_dicts[scene_id][str_im_id]["depth_scale"]
record = {
"dataset_name": self.name,
"file_name": osp.relpath(rgb_path, PROJ_ROOT),
"depth_file": osp.relpath(depth_path, PROJ_ROOT),
"depth_factor": depth_factor,
"height": self.height,
"width": self.width,
"image_id": im_id_global, # unique image_id in the dataset, for coco evaluation
"scene_im_id": "{}/{}".format(scene_id, im_id), # for evaluation
"cam": cam,
"img_type": "real",
}
im_id_global += 1
insts = []
for anno_i, anno in enumerate(gt_dicts[scene_id][str_im_id]):
obj_id = anno["obj_id"]
if ref.ycbv.id2obj[obj_id] not in self.select_objs:
continue
cur_label = self.cat2label[obj_id] # 0-based label
R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3)
t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0
pose = np.hstack([R, t.reshape(3, 1)])
quat = mat2quat(R).astype("float32")
proj = (record["cam"] @ t.T).T
proj = proj[:2] / proj[2]
bbox_visib = gt_info_dicts[scene_id][str_im_id][anno_i]["bbox_visib"]
bbox_obj = gt_info_dicts[scene_id][str_im_id][anno_i]["bbox_obj"]
x1, y1, w, h = bbox_visib
if self.filter_invalid:
if h <= 1 or w <= 1:
self.num_instances_without_valid_box += 1
continue
mask_file = osp.join(
scene_root,
"mask/{:06d}_{:06d}.png".format(im_id, anno_i),
)
mask_visib_file = osp.join(
scene_root,
"mask_visib/{:06d}_{:06d}.png".format(im_id, anno_i),
)
assert osp.exists(mask_file), mask_file
assert osp.exists(mask_visib_file), mask_visib_file
# load mask visib
mask_single = mmcv.imread(mask_visib_file, "unchanged")
area = mask_single.sum()
if area < 3: # filter out too small or nearly invisible instances
self.num_instances_without_valid_segmentation += 1
continue
mask_rle = binary_mask_to_rle(mask_single, compressed=True)
# load mask full
mask_full = mmcv.imread(mask_file, "unchanged")
mask_full = mask_full.astype("bool")
mask_full_rle = binary_mask_to_rle(mask_full, compressed=True)
inst = {
"category_id": cur_label, # 0-based label
"bbox": bbox_visib,
"bbox_obj": bbox_obj,
"bbox_mode": BoxMode.XYWH_ABS,
"pose": pose,
"quat": quat,
"trans": t,
"centroid_2d": proj, # absolute (cx, cy)
"segmentation": mask_rle,
"mask_full": mask_full_rle, # TODO: load as mask_full, rle
}
model_info = self.models_info[str(obj_id)]
inst["model_info"] = model_info
# TODO: using full mask and full xyz
for key in ["bbox3d_and_center"]:
inst[key] = self.models[cur_label][key]
insts.append(inst)
if len(insts) == 0: # filter im without anno
continue
record["annotations"] = insts
dataset_dicts.append(record)
if self.num_instances_without_valid_segmentation > 0:
logger.warning(
"Filtered out {} instances without valid segmentation. "
"There might be issues in your dataset generation process.".format(
self.num_instances_without_valid_segmentation
)
)
if self.num_instances_without_valid_box > 0:
logger.warning(
"Filtered out {} instances without valid box. "
"There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
)
##########################################################################
if self.num_to_load > 0:
self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
dataset_dicts = dataset_dicts[: self.num_to_load]
logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start))
mmcv.mkdir_or_exist(osp.dirname(cache_path))
mmcv.dump(dataset_dicts, cache_path, protocol=4)
logger.info("Dumped dataset_dicts to {}".format(cache_path))
return dataset_dicts
@lazy_property
def models_info(self):
models_info_path = osp.join(self.models_root, "models_info.json")
assert osp.exists(models_info_path), models_info_path
models_info = mmcv.load(models_info_path) # key is str(obj_id)
return models_info
@lazy_property
def models(self):
"""Load models into a list."""
cache_path = osp.join(self.models_root, f"models_{self.name}.pkl")
if osp.exists(cache_path) and self.use_cache:
# dprint("{}: load cached object models from {}".format(self.name, cache_path))
return mmcv.load(cache_path)
models = []
for obj_name in self.objs:
model = inout.load_ply(
osp.join(
self.models_root,
f"obj_{ref.ycbv.obj2id[obj_name]:06d}.ply",
),
vertex_scale=self.scale_to_meter,
)
# NOTE: the bbox3d_and_center is not obtained from centered vertices
# for BOP models, not a big problem since they had been centered
model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
models.append(model)
logger.info("cache models to {}".format(cache_path))
mmcv.dump(models, cache_path, protocol=4)
return models
def image_aspect_ratio(self):
return self.width / self.height # 4/3
########### register datasets ############################################################
def get_ycbv_metadata(obj_names, ref_key):
"""task specific metadata."""
data_ref = ref.__dict__[ref_key]
cur_sym_infos = {} # label based key
loaded_models_info = data_ref.get_models_info()
for i, obj_name in enumerate(obj_names):
obj_id = data_ref.obj2id[obj_name]
model_info = loaded_models_info[str(obj_id)]
if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
else:
sym_info = None
cur_sym_infos[i] = sym_info
meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
return meta
################################################################################
SPLITS_YCBV = dict(
ycbv_bop_test=dict(
name="ycbv_bop_test",
dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/test"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/models"),
objs=ref.ycbv.objects, # selected objects
ann_file=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/test_targets_bop19.json"),
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_invalid=False,
ref_key="ycbv",
)
)
# single objs (num_class is from all objs)
for obj in ref.ycbv.objects:
name = "ycbv_bop_{}_test".format(obj)
select_objs = [obj]
if name not in SPLITS_YCBV:
SPLITS_YCBV[name] = dict(
name=name,
dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/test"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/models"),
objs=[obj], # only this obj
select_objs=select_objs, # selected objects
ann_file=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/test_targets_bop19.json"),
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_invalid=False,
ref_key="ycbv",
)
def register_with_name_cfg(name, data_cfg=None):
"""Assume pre-defined datasets live in `./datasets`.
Args:
name: datasnet_name,
data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
otherwise requires data_cfg
data_cfg can be set in cfg.DATA_CFG.name
"""
dprint("register dataset: {}".format(name))
if name in SPLITS_YCBV:
used_cfg = SPLITS_YCBV[name]
else:
assert data_cfg is not None, f"dataset name {name} is not registered"
used_cfg = data_cfg
DatasetCatalog.register(name, YCBV_BOP_TEST_Dataset(used_cfg))
# something like eval_types
MetadataCatalog.get(name).set(
id="ycbv", # NOTE: for pvnet to determine module
ref_key=used_cfg["ref_key"],
objs=used_cfg["objs"],
eval_error_types=["ad", "rete", "proj"],
evaluator_type="bop",
**get_ycbv_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
)
def get_available_datasets():
return list(SPLITS_YCBV.keys())
#### tests ###############################################
def test_vis():
dset_name = sys.argv[1]
assert dset_name in DatasetCatalog.list()
meta = MetadataCatalog.get(dset_name)
dprint("MetadataCatalog: ", meta)
objs = meta.objs
t_start = time.perf_counter()
dicts = DatasetCatalog.get(dset_name)
logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
dirname = "output/{}-data-vis".format(dset_name)
os.makedirs(dirname, exist_ok=True)
for d in dicts:
img = read_image_mmcv(d["file_name"], format="BGR")
depth = mmcv.imread(d["depth_file"], "unchanged") / d["depth_factor"]
imH, imW = img.shape[:2]
annos = d["annotations"]
masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos]
bboxes = [anno["bbox"] for anno in annos]
bbox_modes = [anno["bbox_mode"] for anno in annos]
bboxes_xyxy = np.array(
[BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)]
)
kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
quats = [anno["quat"] for anno in annos]
transes = [anno["trans"] for anno in annos]
Rs = [quat2mat(quat) for quat in quats]
# 0-based label
cat_ids = [anno["category_id"] for anno in annos]
K = d["cam"]
kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)]
# # TODO: visualize pose and keypoints
labels = [objs[cat_id] for cat_id in cat_ids]
# img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels)
img_vis = vis_image_mask_bbox_cv2(img, masks, bboxes=bboxes_xyxy, labels=labels)
img_vis_kpts2d = img.copy()
for anno_i in range(len(annos)):
img_vis_kpts2d = misc.draw_projected_box3d(img_vis_kpts2d, kpts_2d[anno_i])
grid_show(
[
img[:, :, [2, 1, 0]],
img_vis[:, :, [2, 1, 0]],
img_vis_kpts2d[:, :, [2, 1, 0]],
depth,
],
[f"img:{d['file_name']}", "vis_img", "img_vis_kpts2d", "depth"],
row=2,
col=2,
)
if __name__ == "__main__":
"""Test the dataset loader.
Usage:
python -m core.datasets.ycbv_bop_test dataset_name
"""
from lib.vis_utils.image import grid_show
from lib.utils.setup_logger import setup_my_logger
import detectron2.data.datasets # noqa # add pre-defined metadata
from core.utils.data_utils import read_image_mmcv
from lib.vis_utils.image import vis_image_mask_bbox_cv2
print("sys.argv:", sys.argv)
logger = setup_my_logger(name="core")
register_with_name_cfg(sys.argv[1])
print("dataset catalog: ", DatasetCatalog.list())
test_vis()
This diff is collapsed.
This diff is collapsed.
import mmcv
import sys
from tqdm import tqdm
import json
path = "/data2/lxy/Storage/bop22_results/yolovx_amodal/ycbv/yolox_x_640_ycbv_real_pbr_ycbv_bop_test.json"
ds = mmcv.load(path)
outs = {}
for d in tqdm(ds):
scene_id = d["scene_id"]
image_id = d["image_id"]
scene_im_id = f"{scene_id}/{image_id}"
obj_id = d["category_id"]
score = d["score"]
bbox = d["bbox"]
time = d["time"]
cur_dict = {
"bbox_est": bbox,
"obj_id": obj_id,
"score": score,
"time": time,
}
if scene_im_id in outs.keys():
outs[scene_im_id].append(cur_dict)
else:
outs[scene_im_id] = [cur_dict]
def save_json(path, content, sort=False):
"""Saves the provided content to a JSON file.
:param path: Path to the output JSON file.
:param content: Dictionary/list to save.
"""
with open(path, "w") as f:
if isinstance(content, dict):
f.write("{\n")
if sort:
content_sorted = sorted(content.items(), key=lambda x: x[0])
else:
content_sorted = content.items()
for elem_id, (k, v) in enumerate(content_sorted):
f.write(' "{}": {}'.format(k, json.dumps(v, sort_keys=True)))
if elem_id != len(content) - 1:
f.write(",")
f.write("\n")
f.write("}")
elif isinstance(content, list):
f.write("[\n")
for elem_id, elem in enumerate(content):
f.write(" {}".format(json.dumps(elem, sort_keys=True)))
if elem_id != len(content) - 1:
f.write(",")
f.write("\n")
f.write("]")
else:
json.dump(content, f, sort_keys=True)
save_json("datasets/BOP_DATASETS/ycbv/test/test_bboxes/yolox_x_640_ycbv_real_pbr_ycbv_bop_test.json", outs)
import hashlib
import logging
import os
import os.path as osp
import sys
cur_dir = osp.dirname(osp.abspath(__file__))
PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../../.."))
sys.path.insert(0, PROJ_ROOT)
import time
from collections import OrderedDict
import mmcv
import numpy as np
from tqdm import tqdm
from transforms3d.quaternions import mat2quat, quat2mat
import ref
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode
from lib.pysixd import inout, misc
from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask
from lib.utils.utils import dprint, iprint, lazy_property
logger = logging.getLogger(__name__)
DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
class YCBV_BOP_TEST_Dataset:
"""ycbv bop test."""
def __init__(self, data_cfg):
"""
Set with_depth and with_masks default to True,
and decide whether to load them into dataloader/network later
with_masks:
"""
self.name = data_cfg["name"]
self.data_cfg = data_cfg
self.objs = data_cfg["objs"] # selected objects
# all classes are self.objs, but this enables us to evaluate on selected objs
self.select_objs = data_cfg.get("select_objs", self.objs)
self.ann_file = data_cfg["ann_file"] # json file with scene_id and im_id items
self.dataset_root = data_cfg["dataset_root"] # BOP_DATASETS/ycbv/test
self.models_root = data_cfg["models_root"] # BOP_DATASETS/ycbv/models
self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001
self.with_masks = data_cfg["with_masks"] # True (load masks but may not use it)
self.with_depth = data_cfg["with_depth"] # True (load depth path here, but may not use it)
self.height = data_cfg["height"] # 480
self.width = data_cfg["width"] # 640
self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache")) # .cache
self.use_cache = data_cfg.get("use_cache", True)
self.num_to_load = data_cfg["num_to_load"] # -1
self.filter_invalid = data_cfg["filter_invalid"]
##################################################
# NOTE: careful! Only the selected objects
self.cat_ids = [cat_id for cat_id, obj_name in ref.ycbv.id2obj.items() if obj_name in self.objs]
# map selected objs to [0, num_objs-1]
self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map
self.label2cat = {label: cat for cat, label in self.cat2label.items()}
self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
##########################################################
def __call__(self):
"""Load light-weight instance annotations of all images into a list of
dicts in Detectron2 format.
Do not load heavy data into memory in this file, since we will
load the annotations of all images into memory.
"""
# cache the dataset_dicts to avoid loading masks from files
hashed_file_name = hashlib.md5(
(
"".join([str(fn) for fn in self.objs])
+ "dataset_dicts_{}_{}_{}_{}_{}".format(
self.name,
self.dataset_root,
self.with_masks,
self.with_depth,
__name__,
)
).encode("utf-8")
).hexdigest()
cache_path = osp.join(
self.cache_dir,
"dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name),
)
if osp.exists(cache_path) and self.use_cache:
logger.info("load cached dataset dicts from {}".format(cache_path))
return mmcv.load(cache_path)
t_start = time.perf_counter()
logger.info("loading dataset dicts: {}".format(self.name))
self.num_instances_without_valid_segmentation = 0
self.num_instances_without_valid_box = 0
dataset_dicts = [] # ######################################################
im_id_global = 0
if True:
targets = mmcv.load(self.ann_file)
scene_im_ids = [(item["scene_id"], item["im_id"]) for item in targets]
scene_im_ids = sorted(list(set(scene_im_ids)))
# load infos for each scene
gt_dicts = {}
gt_info_dicts = {}
cam_dicts = {}
for scene_id, im_id in scene_im_ids:
scene_root = osp.join(self.dataset_root, f"{scene_id:06d}")
if scene_id not in gt_dicts:
gt_dicts[scene_id] = mmcv.load(osp.join(scene_root, "scene_gt.json"))
if scene_id not in gt_info_dicts:
gt_info_dicts[scene_id] = mmcv.load(
osp.join(scene_root, "scene_gt_info.json")
) # bbox_obj, bbox_visib
if scene_id not in cam_dicts:
cam_dicts[scene_id] = mmcv.load(osp.join(scene_root, "scene_camera.json"))
for scene_id, im_id in tqdm(scene_im_ids):
str_im_id = str(im_id)
scene_root = osp.join(self.dataset_root, f"{scene_id:06d}")
rgb_path = osp.join(scene_root, "rgb/{:06d}.png").format(im_id)
assert osp.exists(rgb_path), rgb_path
depth_path = osp.join(scene_root, "depth/{:06d}.png".format(im_id))
scene_id = int(rgb_path.split("/")[-3])
cam = np.array(cam_dicts[scene_id][str_im_id]["cam_K"], dtype=np.float32).reshape(3, 3)
depth_factor = 1000.0 / cam_dicts[scene_id][str_im_id]["depth_scale"]
record = {
"dataset_name": self.name,
"file_name": osp.relpath(rgb_path, PROJ_ROOT),
"depth_file": osp.relpath(depth_path, PROJ_ROOT),
"depth_factor": depth_factor,
"height": self.height,
"width": self.width,
"image_id": im_id_global, # unique image_id in the dataset, for coco evaluation
"scene_im_id": "{}/{}".format(scene_id, im_id), # for evaluation
"cam": cam,
"img_type": "real",
}
im_id_global += 1
insts = []
for anno_i, anno in enumerate(gt_dicts[scene_id][str_im_id]):
obj_id = anno["obj_id"]
if ref.ycbv.id2obj[obj_id] not in self.select_objs:
continue
cur_label = self.cat2label[obj_id] # 0-based label
R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3)
t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0
pose = np.hstack([R, t.reshape(3, 1)])
quat = mat2quat(R).astype("float32")
proj = (record["cam"] @ t.T).T
proj = proj[:2] / proj[2]
bbox_visib = gt_info_dicts[scene_id][str_im_id][anno_i]["bbox_visib"]
bbox_obj = gt_info_dicts[scene_id][str_im_id][anno_i]["bbox_obj"]
x1, y1, w, h = bbox_visib
if self.filter_invalid:
if h <= 1 or w <= 1:
self.num_instances_without_valid_box += 1
continue
mask_file = osp.join(
scene_root,
"mask/{:06d}_{:06d}.png".format(im_id, anno_i),
)
mask_visib_file = osp.join(
scene_root,
"mask_visib/{:06d}_{:06d}.png".format(im_id, anno_i),
)
assert osp.exists(mask_file), mask_file
assert osp.exists(mask_visib_file), mask_visib_file
# load mask visib
mask_single = mmcv.imread(mask_visib_file, "unchanged")
area = mask_single.sum()
if area < 3: # filter out too small or nearly invisible instances
self.num_instances_without_valid_segmentation += 1
continue
mask_rle = binary_mask_to_rle(mask_single, compressed=True)
# load mask full
mask_full = mmcv.imread(mask_file, "unchanged")
mask_full = mask_full.astype("bool")
mask_full_rle = binary_mask_to_rle(mask_full, compressed=True)
inst = {
"category_id": cur_label, # 0-based label
"bbox": bbox_obj, # TODO: load both bbox_obj and bbox_visib
"bbox_mode": BoxMode.XYWH_ABS,
"pose": pose,
"quat": quat,
"trans": t,
"centroid_2d": proj, # absolute (cx, cy)
"segmentation": mask_rle,
"mask_full": mask_full_rle, # TODO: load as mask_full, rle
}
model_info = self.models_info[str(obj_id)]
inst["model_info"] = model_info
# TODO: using full mask and full xyz
for key in ["bbox3d_and_center"]:
inst[key] = self.models[cur_label][key]
insts.append(inst)
if len(insts) == 0: # filter im without anno
continue
record["annotations"] = insts
dataset_dicts.append(record)
if self.num_instances_without_valid_segmentation > 0:
logger.warning(
"Filtered out {} instances without valid segmentation. "
"There might be issues in your dataset generation process.".format(
self.num_instances_without_valid_segmentation
)
)
if self.num_instances_without_valid_box > 0:
logger.warning(
"Filtered out {} instances without valid box. "
"There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
)
##########################################################################
if self.num_to_load > 0:
self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
dataset_dicts = dataset_dicts[: self.num_to_load]
logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start))
mmcv.mkdir_or_exist(osp.dirname(cache_path))
mmcv.dump(dataset_dicts, cache_path, protocol=4)
logger.info("Dumped dataset_dicts to {}".format(cache_path))
return dataset_dicts
@lazy_property
def models_info(self):
models_info_path = osp.join(self.models_root, "models_info.json")
assert osp.exists(models_info_path), models_info_path
models_info = mmcv.load(models_info_path) # key is str(obj_id)
return models_info
@lazy_property
def models(self):
"""Load models into a list."""
cache_path = osp.join(self.models_root, f"models_{self.name}.pkl")
if osp.exists(cache_path) and self.use_cache:
# dprint("{}: load cached object models from {}".format(self.name, cache_path))
return mmcv.load(cache_path)
models = []
for obj_name in self.objs:
model = inout.load_ply(
osp.join(
self.models_root,
f"obj_{ref.ycbv.obj2id[obj_name]:06d}.ply",
),
vertex_scale=self.scale_to_meter,
)
# NOTE: the bbox3d_and_center is not obtained from centered vertices
# for BOP models, not a big problem since they had been centered
model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
models.append(model)
logger.info("cache models to {}".format(cache_path))
mmcv.dump(models, cache_path, protocol=4)
return models
def image_aspect_ratio(self):
return self.width / self.height # 4/3
########### register datasets ############################################################
def get_ycbv_metadata(obj_names, ref_key):
"""task specific metadata."""
data_ref = ref.__dict__[ref_key]
cur_sym_infos = {} # label based key
loaded_models_info = data_ref.get_models_info()
for i, obj_name in enumerate(obj_names):
obj_id = data_ref.obj2id[obj_name]
model_info = loaded_models_info[str(obj_id)]
if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
else:
sym_info = None
cur_sym_infos[i] = sym_info
meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
return meta
################################################################################
SPLITS_YCBV = dict(
ycbv_bop_test=dict(
name="ycbv_bop_test",
dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/test"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/models"),
objs=ref.ycbv.objects, # selected objects
ann_file=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/test_targets_bop19.json"),
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_invalid=False,
ref_key="ycbv",
)
)
# single objs (num_class is from all objs)
for obj in ref.ycbv.objects:
name = "ycbv_bop_{}_test".format(obj)
select_objs = [obj]
if name not in SPLITS_YCBV:
SPLITS_YCBV[name] = dict(
name=name,
dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/test"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/models"),
objs=[obj], # only this obj
select_objs=select_objs, # selected objects
ann_file=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/test_targets_bop19.json"),
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_invalid=False,
ref_key="ycbv",
)
def register_with_name_cfg(name, data_cfg=None):
"""Assume pre-defined datasets live in `./datasets`.
Args:
name: datasnet_name,
data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
otherwise requires data_cfg
data_cfg can be set in cfg.DATA_CFG.name
"""
dprint("register dataset: {}".format(name))
if name in SPLITS_YCBV:
used_cfg = SPLITS_YCBV[name]
else:
assert data_cfg is not None, f"dataset name {name} is not registered"
used_cfg = data_cfg
DatasetCatalog.register(name, YCBV_BOP_TEST_Dataset(used_cfg))
# something like eval_types
MetadataCatalog.get(name).set(
id="ycbv", # NOTE: for pvnet to determine module
ref_key=used_cfg["ref_key"],
objs=used_cfg["objs"],
eval_error_types=["ad", "rete", "proj"],
evaluator_type="bop",
**get_ycbv_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
)
def get_available_datasets():
return list(SPLITS_YCBV.keys())
#### tests ###############################################
def test_vis():
dset_name = sys.argv[1]
assert dset_name in DatasetCatalog.list()
meta = MetadataCatalog.get(dset_name)
dprint("MetadataCatalog: ", meta)
objs = meta.objs
t_start = time.perf_counter()
dicts = DatasetCatalog.get(dset_name)
logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
dirname = "output/{}-data-vis".format(dset_name)
os.makedirs(dirname, exist_ok=True)
for d in dicts:
img = read_image_mmcv(d["file_name"], format="BGR")
depth = mmcv.imread(d["depth_file"], "unchanged") / d["depth_factor"]
imH, imW = img.shape[:2]
annos = d["annotations"]
masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos]
bboxes = [anno["bbox"] for anno in annos]
bbox_modes = [anno["bbox_mode"] for anno in annos]
bboxes_xyxy = np.array(
[BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)]
)
kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
quats = [anno["quat"] for anno in annos]
transes = [anno["trans"] for anno in annos]
Rs = [quat2mat(quat) for quat in quats]
# 0-based label
cat_ids = [anno["category_id"] for anno in annos]
K = d["cam"]
kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)]
# # TODO: visualize pose and keypoints
labels = [objs[cat_id] for cat_id in cat_ids]
# img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels)
img_vis = vis_image_mask_bbox_cv2(img, masks, bboxes=bboxes_xyxy, labels=labels)
img_vis_kpts2d = img.copy()
for anno_i in range(len(annos)):
img_vis_kpts2d = misc.draw_projected_box3d(img_vis_kpts2d, kpts_2d[anno_i])
grid_show(
[
img[:, :, [2, 1, 0]],
img_vis[:, :, [2, 1, 0]],
img_vis_kpts2d[:, :, [2, 1, 0]],
depth,
],
[f"img:{d['file_name']}", "vis_img", "img_vis_kpts2d", "depth"],
row=2,
col=2,
)
if __name__ == "__main__":
"""Test the dataset loader.
Usage:
python -m core.datasets.ycbv_bop_test dataset_name
"""
from lib.vis_utils.image import grid_show
from lib.utils.setup_logger import setup_my_logger
import detectron2.data.datasets # noqa # add pre-defined metadata
from core.utils.data_utils import read_image_mmcv
from lib.vis_utils.image import vis_image_mask_bbox_cv2
print("sys.argv:", sys.argv)
logger = setup_my_logger(name="core")
register_with_name_cfg(sys.argv[1])
print("dataset catalog: ", DatasetCatalog.list())
test_vis()
This diff is collapsed.
This diff is collapsed.
# encoding: utf-8
"""This file includes necessary params, info."""
import os
import mmcv
import os.path as osp
import numpy as np
# ---------------------------------------------------------------- #
# ROOT PATH INFO
# ---------------------------------------------------------------- #
cur_dir = osp.abspath(osp.dirname(__file__))
root_dir = osp.normpath(osp.join(cur_dir, ".."))
# directory storing experiment data (result, model checkpoints, etc).
output_dir = osp.join(root_dir, "output")
data_root = osp.join(root_dir, "datasets")
bop_root = osp.join(data_root, "BOP_DATASETS/")
# ---------------------------------------------------------------- #
# YCBV DATASET
# ---------------------------------------------------------------- #
dataset_root = osp.join(bop_root, "ycbv")
train_real_dir = osp.join(dataset_root, "train_real")
train_render_dir = osp.join(dataset_root, "train_synt")
train_pbr_dir = osp.join(dataset_root, "train_pbr")
test_dir = osp.join(dataset_root, "test")
test_scenes = [i for i in range(48, 59 + 1)]
train_real_scenes = [i for i in range(0, 91 + 1) if i not in test_scenes]
train_synt_scenes = [i for i in range(0, 79 + 1)]
train_pbr_scenes = [i for i in range(0, 49 + 1)]
model_dir = osp.join(dataset_root, "models")
fine_model_dir = osp.join(dataset_root, "models_fine")
model_eval_dir = osp.join(dataset_root, "models_eval")
model_scaled_simple_dir = osp.join(dataset_root, "models_rescaled") # m, .obj
vertex_scale = 0.001
# object info
id2obj = {
1: "002_master_chef_can", # [1.3360, -0.5000, 3.5105]
2: "003_cracker_box", # [0.5575, 1.7005, 4.8050]
3: "004_sugar_box", # [-0.9520, 1.4670, 4.3645]
4: "005_tomato_soup_can", # [-0.0240, -1.5270, 8.4035]
5: "006_mustard_bottle", # [1.2995, 2.4870, -11.8290]
6: "007_tuna_fish_can", # [-0.1565, 0.1150, 4.2625]
7: "008_pudding_box", # [1.1645, -4.2015, 3.1190]
8: "009_gelatin_box", # [1.4460, -0.5915, 3.6085]
9: "010_potted_meat_can", # [2.4195, 0.3075, 8.0715]
10: "011_banana", # [-18.6730, 12.1915, -1.4635]
11: "019_pitcher_base", # [5.3370, 5.8855, 25.6115]
12: "021_bleach_cleanser", # [4.9290, -2.4800, -13.2920]
13: "024_bowl", # [-0.2270, 0.7950, -2.9675]
14: "025_mug", # [-8.4675, -0.6995, -1.6145]
15: "035_power_drill", # [9.0710, 20.9360, -2.1190]
16: "036_wood_block", # [1.4265, -2.5305, 17.1890]
17: "037_scissors", # [7.0535, -28.1320, 0.0420]
18: "040_large_marker", # [0.0460, -2.1040, 0.3500]
19: "051_large_clamp", # [10.5180, -1.9640, -0.4745]
20: "052_extra_large_clamp", # [-0.3950, -10.4130, 0.1620]
21: "061_foam_brick", # [-0.0805, 0.0805, -8.2435]
}
objects = list(id2obj.values())
obj_num = len(id2obj)
obj2id = {_name: _id for _id, _name in id2obj.items()}
model_paths = [osp.join(model_dir, "obj_{:06d}.ply").format(_id) for _id in id2obj] # TODO: check this
texture_paths = [osp.join(model_dir, "obj_{:06d}.png".format(_id)) for _id in id2obj]
model_colors = [((i + 1) * 10, (i + 1) * 10, (i + 1) * 10) for i in range(obj_num)] # for renderer
# yapf: disable
diameters = np.array([172.063, 269.573, 198.377, 120.543, 196.463,
89.797, 142.543, 114.053, 129.540, 197.796,
259.534, 259.566, 161.922, 124.990, 226.170,
237.299, 203.973, 121.365, 174.746, 217.094,
102.903]) / 1000.0
# yapf: enable
# Camera info
width = 640
height = 480
zNear = 0.25
zFar = 6.0
center = (height / 2, width / 2)
# default: 0000~0059 and synt
camera_matrix = uw_camera_matrix = np.array([[1066.778, 0.0, 312.9869], [0.0, 1067.487, 241.3109], [0.0, 0.0, 1.0]])
# 0060~0091
cmu_camera_matrix = np.array([[1077.836, 0.0, 323.7872], [0.0, 1078.189, 279.6921], [0.0, 0.0, 1.0]])
depth_factor = 10000.0
def get_models_info():
"""key is str(obj_id)"""
models_info_path = osp.join(model_dir, "models_info.json")
assert osp.exists(models_info_path), models_info_path
models_info = mmcv.load(models_info_path) # key is str(obj_id)
return models_info
def get_fps_points():
"""key is str(obj_id) generated by
core/gdrn_modeling/tools/ycbv/ycbv_1_compute_fps.py."""
fps_points_path = osp.join(model_dir, "fps_points.pkl")
assert osp.exists(fps_points_path), fps_points_path
fps_dict = mmcv.load(fps_points_path)
return fps_dict
def get_keypoints_3d():
"""key is str(obj_id) generated by
core/roi_pvnet/tools/ycbv/ycbv_1_compute_keypoints_3d.py."""
keypoints_3d_path = osp.join(model_dir, "keypoints_3d.pkl")
assert osp.exists(keypoints_3d_path), keypoints_3d_path
kpts_dict = mmcv.load(keypoints_3d_path)
return kpts_dict
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment