diff --git a/configs/gdrn/fruitbin/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_fruitbin.py b/configs/gdrn/fruitbin/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_fruitbin.py new file mode 100644 index 0000000000000000000000000000000000000000..b167a6c65f29fdd8eb8dc32f0485f15a9f41ec7b --- /dev/null +++ b/configs/gdrn/fruitbin/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_fruitbin.py @@ -0,0 +1,143 @@ +# about 3 days +_base_ = ["../../_base_/gdrn_base.py"] + +OUTPUT_DIR = "output/gdrn/ycbv/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_ycbv" +INPUT = dict( + DZI_PAD_SCALE=1.5, + TRUNCATE_FG=True, + CHANGE_BG_PROB=0.5, + COLOR_AUG_PROB=0.8, + COLOR_AUG_TYPE="code", + COLOR_AUG_CODE=( + "Sequential([" + # Sometimes(0.5, PerspectiveTransform(0.05)), + # Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))), + # Sometimes(0.5, Affine(scale=(1.0, 1.2))), + "Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) )," + "Sometimes(0.4, GaussianBlur((0., 3.)))," + "Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.)))," + "Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.)))," + "Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.)))," + "Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.)))," + "Sometimes(0.5, Add((-25, 25), per_channel=0.3))," + "Sometimes(0.3, Invert(0.2, per_channel=True))," + "Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5))," + "Sometimes(0.5, Multiply((0.6, 1.4)))," + "Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True))," + "Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3))," + "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det + "], random_order=True)" + # cosy+aae + ), +) + +SOLVER = dict( + IMS_PER_BATCH=48, + TOTAL_EPOCHS=40, # 10 + LR_SCHEDULER_NAME="flat_and_anneal", + ANNEAL_METHOD="cosine", # "cosine" + ANNEAL_POINT=0.72, + OPTIMIZER_CFG=dict(_delete_=True, type="Ranger", lr=8e-4, weight_decay=0.01), + WEIGHT_DECAY=0.0, + WARMUP_FACTOR=0.001, + WARMUP_ITERS=1000, +) + +DATASETS = dict( + TRAIN=("ycbv_train_real", "ycbv_train_pbr"), + TEST=("ycbv_test",), + DET_FILES_TEST=("datasets/BOP_DATASETS/ycbv/test/test_bboxes/yolox_x_640_ycbv_real_pbr_ycbv_bop_test.json",), + SYM_OBJS=[ + "024_bowl", + "036_wood_block", + "051_large_clamp", + "052_extra_large_clamp", + "061_foam_brick", + ], # used for custom evalutor +) + +DATALOADER = dict( + # Number of data loading threads + NUM_WORKERS=8, + FILTER_VISIB_THR=0.3, +) + +MODEL = dict( + LOAD_DETS_TEST=True, + PIXEL_MEAN=[0.0, 0.0, 0.0], + PIXEL_STD=[255.0, 255.0, 255.0], + BBOX_TYPE="AMODAL_CLIP", # VISIB or AMODAL + POSE_NET=dict( + NAME="GDRN_double_mask", + XYZ_ONLINE=True, + NUM_CLASSES=21, + BACKBONE=dict( + FREEZE=False, + PRETRAINED="timm", + INIT_CFG=dict( + type="timm/convnext_base", + pretrained=True, + in_chans=3, + features_only=True, + out_indices=(3,), + ), + ), + ## geo head: Mask, XYZ, Region + GEO_HEAD=dict( + FREEZE=False, + INIT_CFG=dict( + type="TopDownDoubleMaskXyzRegionHead", + in_dim=1024, # this is num out channels of backbone conv feature + ), + NUM_REGIONS=64, + XYZ_CLASS_AWARE=True, + MASK_CLASS_AWARE=True, + REGION_CLASS_AWARE=True, + ), + PNP_NET=dict( + INIT_CFG=dict(norm="GN", act="gelu"), + REGION_ATTENTION=True, + WITH_2D_COORD=True, + ROT_TYPE="allo_rot6d", + TRANS_TYPE="centroid_z", + ), + LOSS_CFG=dict( + # xyz loss ---------------------------- + XYZ_LOSS_TYPE="L1", # L1 | CE_coor + XYZ_LOSS_MASK_GT="visib", # trunc | visib | obj + XYZ_LW=1.0, + # mask loss --------------------------- + MASK_LOSS_TYPE="L1", # L1 | BCE | CE + MASK_LOSS_GT="trunc", # trunc | visib | gt + MASK_LW=1.0, + # full mask loss --------------------------- + FULL_MASK_LOSS_TYPE="L1", # L1 | BCE | CE + FULL_MASK_LW=1.0, + # region loss ------------------------- + REGION_LOSS_TYPE="CE", # CE + REGION_LOSS_MASK_GT="visib", # trunc | visib | obj + REGION_LW=1.0, + # pm loss -------------- + PM_LOSS_SYM=True, # NOTE: sym loss + PM_R_ONLY=True, # only do R loss in PM + PM_LW=1.0, + # centroid loss ------- + CENTROID_LOSS_TYPE="L1", + CENTROID_LW=1.0, + # z loss ----------- + Z_LOSS_TYPE="L1", + Z_LW=1.0, + ), + ), +) + +VAL = dict( + DATASET_NAME="ycbv", + SPLIT_TYPE="", + SCRIPT_PATH="lib/pysixd/scripts/eval_pose_results_more.py", + TARGETS_FILENAME="test_targets_bop19.json", + ERROR_TYPES="vsd,mspd,mssd", + USE_BOP=True, # whether to use bop toolkit +) + +TEST = dict(EVAL_PERIOD=0, VIS=False, TEST_BBOX_TYPE="est") # gt | est diff --git a/configs/gdrn/fruitbinPbrSOr/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/banana1.py b/configs/gdrn/fruitbinPbrSOr/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/banana1.py new file mode 100644 index 0000000000000000000000000000000000000000..a7f363fbf837ed4dd4d4143e0a5f89f4efb498b1 --- /dev/null +++ b/configs/gdrn/fruitbinPbrSOr/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/banana1.py @@ -0,0 +1,138 @@ +_base_ = ["../../../_base_/gdrn_base.py"] + +OUTPUT_DIR = "output/gdrn/ycbvPbrSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_ycbv/002_master_chef_can" +INPUT = dict( + DZI_PAD_SCALE=1.5, + TRUNCATE_FG=True, + CHANGE_BG_PROB=0.5, + COLOR_AUG_PROB=0.8, + COLOR_AUG_TYPE="code", + COLOR_AUG_CODE=( + "Sequential([" + # Sometimes(0.5, PerspectiveTransform(0.05)), + # Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))), + # Sometimes(0.5, Affine(scale=(1.0, 1.2))), + "Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) )," + "Sometimes(0.4, GaussianBlur((0., 3.)))," + "Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.)))," + "Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.)))," + "Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.)))," + "Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.)))," + "Sometimes(0.5, Add((-25, 25), per_channel=0.3))," + "Sometimes(0.3, Invert(0.2, per_channel=True))," + "Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5))," + "Sometimes(0.5, Multiply((0.6, 1.4)))," + "Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True))," + "Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3))," + "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det + "], random_order=True)" + # cosy+aae + ), +) + +SOLVER = dict( + IMS_PER_BATCH=36, + TOTAL_EPOCHS=100, + LR_SCHEDULER_NAME="flat_and_anneal", + ANNEAL_METHOD="cosine", # "cosine" + ANNEAL_POINT=0.72, + OPTIMIZER_CFG=dict(_delete_=True, type="Ranger", lr=8e-4, weight_decay=0.01), + WEIGHT_DECAY=0.0, + WARMUP_FACTOR=0.001, + WARMUP_ITERS=1000, +) + +DATASETS = dict( + TRAIN=("ycbv_002_master_chef_can_train_pbr",), + TEST=("ycbv_test",), + DET_FILES_TEST=("datasets/BOP_DATASETS/ycbv/test/test_bboxes/yolox_x_640_ycbv_pbr_ycbv_bop_test.json",), + SYM_OBJS=[ + "024_bowl", + "036_wood_block", + "051_large_clamp", + "052_extra_large_clamp", + "061_foam_brick", + ], # used for custom evalutor +) + +DATALOADER = dict( + # Number of data loading threads + NUM_WORKERS=8, + FILTER_VISIB_THR=0.3, +) + +MODEL = dict( + LOAD_DETS_TEST=True, + PIXEL_MEAN=[0.0, 0.0, 0.0], + PIXEL_STD=[255.0, 255.0, 255.0], + BBOX_TYPE="AMODAL_CLIP", # VISIB or AMODAL + POSE_NET=dict( + NAME="GDRN_double_mask", + XYZ_ONLINE=True, + BACKBONE=dict( + FREEZE=False, + PRETRAINED="timm", + INIT_CFG=dict( + type="timm/convnext_base", + pretrained=True, + in_chans=3, + features_only=True, + out_indices=(3,), + ), + ), + ## geo head: Mask, XYZ, Region + GEO_HEAD=dict( + FREEZE=False, + INIT_CFG=dict( + type="TopDownDoubleMaskXyzRegionHead", + in_dim=1024, # this is num out channels of backbone conv feature + ), + NUM_REGIONS=64, + ), + PNP_NET=dict( + INIT_CFG=dict(norm="GN", act="gelu"), + REGION_ATTENTION=True, + WITH_2D_COORD=True, + ROT_TYPE="allo_rot6d", + TRANS_TYPE="centroid_z", + ), + LOSS_CFG=dict( + # xyz loss ---------------------------- + XYZ_LOSS_TYPE="L1", # L1 | CE_coor + XYZ_LOSS_MASK_GT="visib", # trunc | visib | obj + XYZ_LW=1.0, + # mask loss --------------------------- + MASK_LOSS_TYPE="L1", # L1 | BCE | CE + MASK_LOSS_GT="trunc", # trunc | visib | gt + MASK_LW=1.0, + # full mask loss --------------------------- + FULL_MASK_LOSS_TYPE="L1", # L1 | BCE | CE + FULL_MASK_LW=1.0, + # region loss ------------------------- + REGION_LOSS_TYPE="CE", # CE + REGION_LOSS_MASK_GT="visib", # trunc | visib | obj + REGION_LW=1.0, + # pm loss -------------- + PM_LOSS_SYM=True, # NOTE: sym loss + PM_R_ONLY=True, # only do R loss in PM + PM_LW=1.0, + # centroid loss ------- + CENTROID_LOSS_TYPE="L1", + CENTROID_LW=1.0, + # z loss ----------- + Z_LOSS_TYPE="L1", + Z_LW=1.0, + ), + ), +) + +VAL = dict( + DATASET_NAME="ycbv", + SPLIT_TYPE="", + SCRIPT_PATH="lib/pysixd/scripts/eval_pose_results_more.py", + TARGETS_FILENAME="test_targets_bop19.json", + ERROR_TYPES="vsd,mspd,mssd", + USE_BOP=True, # whether to use bop toolkit +) + +TEST = dict(EVAL_PERIOD=0, VIS=False, TEST_BBOX_TYPE="est") # gt | est diff --git a/configs/gdrn/fruitbinPbrSOr/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/lemon2.py b/configs/gdrn/fruitbinPbrSOr/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/lemon2.py new file mode 100644 index 0000000000000000000000000000000000000000..59dc1d681ca12b9b51c48e8d03e52b822c7b8888 --- /dev/null +++ b/configs/gdrn/fruitbinPbrSOr/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/lemon2.py @@ -0,0 +1,3 @@ +_base_ = "./002_master_chef_can.py" +OUTPUT_DIR = "output/gdrn/ycbvPbrSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_ycbv/003_cracker_box" +DATASETS = dict(TRAIN=("ycbv_003_cracker_box_train_pbr",)) diff --git a/configs/gdrn/fruitbinPbrSOr/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/pear2.py b/configs/gdrn/fruitbinPbrSOr/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/pear2.py new file mode 100644 index 0000000000000000000000000000000000000000..d03bd4d1c6b09a755b29e730ef591d415a97b306 --- /dev/null +++ b/configs/gdrn/fruitbinPbrSOr/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/pear2.py @@ -0,0 +1,3 @@ +_base_ = "./002_master_chef_can.py" +OUTPUT_DIR = "output/gdrn/ycbvPbrSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_ycbv/004_sugar_box" +DATASETS = dict(TRAIN=("ycbv_004_sugar_box_train_pbr",)) diff --git a/configs/gdrn/fruitbinSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/002_master_chef_can.py b/configs/gdrn/fruitbinSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/002_master_chef_can.py new file mode 100644 index 0000000000000000000000000000000000000000..8551bf536e4cd2d79dbcc44b9606c3fd4bf0d3cc --- /dev/null +++ b/configs/gdrn/fruitbinSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/002_master_chef_can.py @@ -0,0 +1,138 @@ +_base_ = ["../../../_base_/gdrn_base.py"] + +OUTPUT_DIR = "output/gdrn/ycbvSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_ycbv/002_master_chef_can" +INPUT = dict( + DZI_PAD_SCALE=1.5, + TRUNCATE_FG=True, + CHANGE_BG_PROB=0.5, + COLOR_AUG_PROB=0.8, + COLOR_AUG_TYPE="code", + COLOR_AUG_CODE=( + "Sequential([" + # Sometimes(0.5, PerspectiveTransform(0.05)), + # Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))), + # Sometimes(0.5, Affine(scale=(1.0, 1.2))), + "Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) )," + "Sometimes(0.4, GaussianBlur((0., 3.)))," + "Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.)))," + "Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.)))," + "Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.)))," + "Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.)))," + "Sometimes(0.5, Add((-25, 25), per_channel=0.3))," + "Sometimes(0.3, Invert(0.2, per_channel=True))," + "Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5))," + "Sometimes(0.5, Multiply((0.6, 1.4)))," + "Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True))," + "Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3))," + "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det + "], random_order=True)" + # cosy+aae + ), +) + +SOLVER = dict( + IMS_PER_BATCH=36, + TOTAL_EPOCHS=100, + LR_SCHEDULER_NAME="flat_and_anneal", + ANNEAL_METHOD="cosine", # "cosine" + ANNEAL_POINT=0.72, + OPTIMIZER_CFG=dict(_delete_=True, type="Ranger", lr=8e-4, weight_decay=0.01), + WEIGHT_DECAY=0.0, + WARMUP_FACTOR=0.001, + WARMUP_ITERS=1000, +) + +DATASETS = dict( + TRAIN=("ycbv_002_master_chef_can_train_pbr", "ycbv_002_master_chef_can_train_real"), + TEST=("ycbv_test",), + DET_FILES_TEST=("datasets/BOP_DATASETS/ycbv/test/test_bboxes/yolox_x_640_ycbv_real_pbr_ycbv_bop_test.json",), + SYM_OBJS=[ + "024_bowl", + "036_wood_block", + "051_large_clamp", + "052_extra_large_clamp", + "061_foam_brick", + ], # used for custom evalutor +) + +DATALOADER = dict( + # Number of data loading threads + NUM_WORKERS=8, + FILTER_VISIB_THR=0.3, +) + +MODEL = dict( + LOAD_DETS_TEST=True, + PIXEL_MEAN=[0.0, 0.0, 0.0], + PIXEL_STD=[255.0, 255.0, 255.0], + BBOX_TYPE="AMODAL_CLIP", # VISIB or AMODAL + POSE_NET=dict( + NAME="GDRN_double_mask", + XYZ_ONLINE=True, + BACKBONE=dict( + FREEZE=False, + PRETRAINED="timm", + INIT_CFG=dict( + type="timm/convnext_base", + pretrained=True, + in_chans=3, + features_only=True, + out_indices=(3,), + ), + ), + ## geo head: Mask, XYZ, Region + GEO_HEAD=dict( + FREEZE=False, + INIT_CFG=dict( + type="TopDownDoubleMaskXyzRegionHead", + in_dim=1024, # this is num out channels of backbone conv feature + ), + NUM_REGIONS=64, + ), + PNP_NET=dict( + INIT_CFG=dict(norm="GN", act="gelu"), + REGION_ATTENTION=True, + WITH_2D_COORD=True, + ROT_TYPE="allo_rot6d", + TRANS_TYPE="centroid_z", + ), + LOSS_CFG=dict( + # xyz loss ---------------------------- + XYZ_LOSS_TYPE="L1", # L1 | CE_coor + XYZ_LOSS_MASK_GT="visib", # trunc | visib | obj + XYZ_LW=1.0, + # mask loss --------------------------- + MASK_LOSS_TYPE="L1", # L1 | BCE | CE + MASK_LOSS_GT="trunc", # trunc | visib | gt + MASK_LW=1.0, + # full mask loss --------------------------- + FULL_MASK_LOSS_TYPE="L1", # L1 | BCE | CE + FULL_MASK_LW=1.0, + # region loss ------------------------- + REGION_LOSS_TYPE="CE", # CE + REGION_LOSS_MASK_GT="visib", # trunc | visib | obj + REGION_LW=1.0, + # pm loss -------------- + PM_LOSS_SYM=True, # NOTE: sym loss + PM_R_ONLY=True, # only do R loss in PM + PM_LW=1.0, + # centroid loss ------- + CENTROID_LOSS_TYPE="L1", + CENTROID_LW=1.0, + # z loss ----------- + Z_LOSS_TYPE="L1", + Z_LW=1.0, + ), + ), +) + +VAL = dict( + DATASET_NAME="ycbv", + SPLIT_TYPE="", + SCRIPT_PATH="lib/pysixd/scripts/eval_pose_results_more.py", + TARGETS_FILENAME="test_targets_bop19.json", + ERROR_TYPES="vsd,mspd,mssd", + USE_BOP=True, # whether to use bop toolkit +) + +TEST = dict(EVAL_PERIOD=0, VIS=False, TEST_BBOX_TYPE="est") # gt | est diff --git a/configs/gdrn/fruitbinSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/lemon2.py b/configs/gdrn/fruitbinSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/lemon2.py new file mode 100644 index 0000000000000000000000000000000000000000..23e99a379bb037b8ad33df51c390f8f2ee0120b9 --- /dev/null +++ b/configs/gdrn/fruitbinSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/lemon2.py @@ -0,0 +1,3 @@ +_base_ = "./002_master_chef_can.py" +OUTPUT_DIR = "output/gdrn/ycbvSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_ycbv/003_cracker_box" +DATASETS = dict(TRAIN=("ycbv_003_cracker_box_train_pbr", "ycbv_003_cracker_box_train_real")) diff --git a/configs/gdrn/fruitbinSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/pear2.py b/configs/gdrn/fruitbinSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/pear2.py new file mode 100644 index 0000000000000000000000000000000000000000..efa07cb00fa8d4d1863dc6dce7ff5c9eb262407f --- /dev/null +++ b/configs/gdrn/fruitbinSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/pear2.py @@ -0,0 +1,3 @@ +_base_ = "./002_master_chef_can.py" +OUTPUT_DIR = "output/gdrn/ycbvSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_ycbv/004_sugar_box" +DATASETS = dict(TRAIN=("ycbv_004_sugar_box_train_pbr", "ycbv_004_sugar_box_train_real")) diff --git a/configs/yolox/bop_pbr/yolox_x_640_augCozyAAEhsv_ranger_30_epochs_fruitbin_pbr_fruitbin_bop_test.py b/configs/yolox/bop_pbr/yolox_x_640_augCozyAAEhsv_ranger_30_epochs_fruitbin_pbr_fruitbin_bop_test.py new file mode 100644 index 0000000000000000000000000000000000000000..729c297d4a926c1db49bc47780d732e864793f41 --- /dev/null +++ b/configs/yolox/bop_pbr/yolox_x_640_augCozyAAEhsv_ranger_30_epochs_fruitbin_pbr_fruitbin_bop_test.py @@ -0,0 +1,112 @@ +import os.path as osp + +import torch +from detectron2.config import LazyCall as L +from detectron2.solver.build import get_default_optimizer_params + +from .yolox_base import train, val, test, model, dataloader, optimizer, lr_config, DATASETS # noqa +from det.yolox.data import build_yolox_test_loader, ValTransform +from det.yolox.data.datasets import Base_DatasetFromList +from detectron2.data import get_detection_dataset_dicts +from det.yolox.evaluators import YOLOX_COCOEvaluator +from lib.torch_utils.solver.ranger import Ranger + +train.update( + output_dir=osp.abspath(__file__).replace("configs", "output", 1)[0:-3], + exp_name=osp.split(osp.abspath(__file__))[1][0:-3], # .py +) +train.amp.enabled = True + +model.backbone.depth = 1.33 +model.backbone.width = 1.25 + +model.head.num_classes = 21 + +train.init_checkpoint = "pretrained_models/yolox/yolox_x.pth" + +# datasets +DATASETS.TRAIN = ["ycbv_train_pbr"] +DATASETS.TEST = ["ycbv_bop_test"] + +dataloader.train.dataset.lst.names = DATASETS.TRAIN +dataloader.train.total_batch_size = 32 + +# color aug +dataloader.train.aug_wrapper.COLOR_AUG_PROB = 0.8 +dataloader.train.aug_wrapper.COLOR_AUG_TYPE = "code" +dataloader.train.aug_wrapper.COLOR_AUG_CODE = ( + "Sequential([" + # Sometimes(0.5, PerspectiveTransform(0.05)), + # Sometimes(0.5, CropAndPad(percent=(-0.05, 0.1))), + # Sometimes(0.5, Affine(scale=(1.0, 1.2))), + "Sometimes(0.5, CoarseDropout( p=0.2, size_percent=0.05) )," + "Sometimes(0.4, GaussianBlur((0., 3.)))," + "Sometimes(0.3, pillike.EnhanceSharpness(factor=(0., 50.)))," + "Sometimes(0.3, pillike.EnhanceContrast(factor=(0.2, 50.)))," + "Sometimes(0.5, pillike.EnhanceBrightness(factor=(0.1, 6.)))," + "Sometimes(0.3, pillike.EnhanceColor(factor=(0., 20.)))," + "Sometimes(0.5, Add((-25, 25), per_channel=0.3))," + "Sometimes(0.3, Invert(0.2, per_channel=True))," + "Sometimes(0.5, Multiply((0.6, 1.4), per_channel=0.5))," + "Sometimes(0.5, Multiply((0.6, 1.4)))," + "Sometimes(0.1, AdditiveGaussianNoise(scale=10, per_channel=True))," + "Sometimes(0.5, iaa.contrast.LinearContrast((0.5, 2.2), per_channel=0.3))," + # "Sometimes(0.5, Grayscale(alpha=(0.0, 1.0)))," # maybe remove for det + "], random_order=True)" + # cosy+aae +) + +# hsv color aug +dataloader.train.aug_wrapper.AUG_HSV_PROB = 1.0 +dataloader.train.aug_wrapper.HSV_H = 0.015 +dataloader.train.aug_wrapper.HSV_S = 0.7 +dataloader.train.aug_wrapper.HSV_V = 0.4 +dataloader.train.aug_wrapper.FORMAT = "RGB" + +optimizer = L(Ranger)( + params=L(get_default_optimizer_params)( + # params.model is meant to be set to the model object, before instantiating + # the optimizer. + weight_decay_norm=0.0, + weight_decay_bias=0.0, + ), + lr=0.001, # bs=64 + # momentum=0.9, + weight_decay=0, + # nesterov=True, +) + +train.total_epochs = 30 +train.no_aug_epochs = 15 +train.checkpointer = dict(period=2, max_to_keep=10) + +test.test_dataset_names = DATASETS.TEST +test.augment = True +test.scales = (1, 0.75, 0.83, 1.12, 1.25) +test.conf_thr = 0.001 + +dataloader.test = [ + L(build_yolox_test_loader)( + dataset=L(Base_DatasetFromList)( + split="test", + lst=L(get_detection_dataset_dicts)(names=test_dataset_name, filter_empty=False), + img_size="${test.test_size}", + preproc=L(ValTransform)( + legacy=False, + ), + ), + total_batch_size=1, + # total_batch_size=64, + num_workers=4, + pin_memory=True, + ) + for test_dataset_name in test.test_dataset_names +] + +dataloader.evaluator = [ + L(YOLOX_COCOEvaluator)( + dataset_name=test_dataset_name, + filter_scene=False, + ) + for test_dataset_name in test.test_dataset_names +] diff --git a/core/gdrn_modeling/datasets/fruitbin_bop_test.py b/core/gdrn_modeling/datasets/fruitbin_bop_test.py new file mode 100644 index 0000000000000000000000000000000000000000..43dd74bf91817e068b844d52cdb2a9c7904218fd --- /dev/null +++ b/core/gdrn_modeling/datasets/fruitbin_bop_test.py @@ -0,0 +1,455 @@ +import hashlib +import logging +import os +import os.path as osp +import sys + +cur_dir = osp.dirname(osp.abspath(__file__)) +PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../..")) +sys.path.insert(0, PROJ_ROOT) +import time +from collections import OrderedDict +import mmcv +import numpy as np +from tqdm import tqdm +from transforms3d.quaternions import mat2quat, quat2mat +import ref +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.structures import BoxMode +from lib.pysixd import inout, misc +from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask +from lib.utils.utils import dprint, iprint, lazy_property + + +logger = logging.getLogger(__name__) +DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets")) + + +class YCBV_BOP_TEST_Dataset: + """ycbv bop test.""" + + def __init__(self, data_cfg): + """ + Set with_depth and with_masks default to True, + and decide whether to load them into dataloader/network later + with_masks: + """ + self.name = data_cfg["name"] + self.data_cfg = data_cfg + + self.objs = data_cfg["objs"] # selected objects + # all classes are self.objs, but this enables us to evaluate on selected objs + self.select_objs = data_cfg.get("select_objs", self.objs) + + self.ann_file = data_cfg["ann_file"] # json file with scene_id and im_id items + + self.dataset_root = data_cfg["dataset_root"] # BOP_DATASETS/ycbv/test + self.models_root = data_cfg["models_root"] # BOP_DATASETS/ycbv/models + self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001 + + self.with_masks = data_cfg["with_masks"] # True (load masks but may not use it) + self.with_depth = data_cfg["with_depth"] # True (load depth path here, but may not use it) + + self.height = data_cfg["height"] # 480 + self.width = data_cfg["width"] # 640 + + self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache")) # .cache + self.use_cache = data_cfg.get("use_cache", True) + self.num_to_load = data_cfg["num_to_load"] # -1 + self.filter_invalid = data_cfg["filter_invalid"] + ################################################## + + # NOTE: careful! Only the selected objects + self.cat_ids = [cat_id for cat_id, obj_name in ref.ycbv.id2obj.items() if obj_name in self.objs] + # map selected objs to [0, num_objs-1] + self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map + self.label2cat = {label: cat for cat, label in self.cat2label.items()} + self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs)) + ########################################################## + + def __call__(self): + """Load light-weight instance annotations of all images into a list of + dicts in Detectron2 format. + + Do not load heavy data into memory in this file, since we will + load the annotations of all images into memory. + """ + # cache the dataset_dicts to avoid loading masks from files + hashed_file_name = hashlib.md5( + ( + "".join([str(fn) for fn in self.objs]) + + "dataset_dicts_{}_{}_{}_{}_{}".format( + self.name, + self.dataset_root, + self.with_masks, + self.with_depth, + __name__, + ) + ).encode("utf-8") + ).hexdigest() + cache_path = osp.join( + self.cache_dir, + "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name), + ) + + if osp.exists(cache_path) and self.use_cache: + logger.info("load cached dataset dicts from {}".format(cache_path)) + return mmcv.load(cache_path) + + t_start = time.perf_counter() + + logger.info("loading dataset dicts: {}".format(self.name)) + self.num_instances_without_valid_segmentation = 0 + self.num_instances_without_valid_box = 0 + dataset_dicts = [] # ###################################################### + im_id_global = 0 + + if True: + targets = mmcv.load(self.ann_file) + scene_im_ids = [(item["scene_id"], item["im_id"]) for item in targets] + scene_im_ids = sorted(list(set(scene_im_ids))) + + # load infos for each scene + gt_dicts = {} + gt_info_dicts = {} + cam_dicts = {} + for scene_id, im_id in scene_im_ids: + scene_root = osp.join(self.dataset_root, f"{scene_id:06d}") + if scene_id not in gt_dicts: + gt_dicts[scene_id] = mmcv.load(osp.join(scene_root, "scene_gt.json")) + if scene_id not in gt_info_dicts: + gt_info_dicts[scene_id] = mmcv.load( + osp.join(scene_root, "scene_gt_info.json") + ) # bbox_obj, bbox_visib + if scene_id not in cam_dicts: + cam_dicts[scene_id] = mmcv.load(osp.join(scene_root, "scene_camera.json")) + + for scene_id, im_id in tqdm(scene_im_ids): + str_im_id = str(im_id) + scene_root = osp.join(self.dataset_root, f"{scene_id:06d}") + rgb_path = osp.join(scene_root, "rgb/{:06d}.png").format(im_id) + assert osp.exists(rgb_path), rgb_path + + depth_path = osp.join(scene_root, "depth/{:06d}.png".format(im_id)) + + scene_id = int(rgb_path.split("/")[-3]) + + cam = np.array(cam_dicts[scene_id][str_im_id]["cam_K"], dtype=np.float32).reshape(3, 3) + depth_factor = 1000.0 / cam_dicts[scene_id][str_im_id]["depth_scale"] + record = { + "dataset_name": self.name, + "file_name": osp.relpath(rgb_path, PROJ_ROOT), + "depth_file": osp.relpath(depth_path, PROJ_ROOT), + "depth_factor": depth_factor, + "height": self.height, + "width": self.width, + "image_id": im_id_global, # unique image_id in the dataset, for coco evaluation + "scene_im_id": "{}/{}".format(scene_id, im_id), # for evaluation + "cam": cam, + "img_type": "real", + } + im_id_global += 1 + insts = [] + for anno_i, anno in enumerate(gt_dicts[scene_id][str_im_id]): + obj_id = anno["obj_id"] + if ref.ycbv.id2obj[obj_id] not in self.select_objs: + continue + cur_label = self.cat2label[obj_id] # 0-based label + R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3) + t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0 + pose = np.hstack([R, t.reshape(3, 1)]) + quat = mat2quat(R).astype("float32") + + proj = (record["cam"] @ t.T).T + proj = proj[:2] / proj[2] + + bbox_visib = gt_info_dicts[scene_id][str_im_id][anno_i]["bbox_visib"] + bbox_obj = gt_info_dicts[scene_id][str_im_id][anno_i]["bbox_obj"] + x1, y1, w, h = bbox_visib + if self.filter_invalid: + if h <= 1 or w <= 1: + self.num_instances_without_valid_box += 1 + continue + + mask_file = osp.join( + scene_root, + "mask/{:06d}_{:06d}.png".format(im_id, anno_i), + ) + mask_visib_file = osp.join( + scene_root, + "mask_visib/{:06d}_{:06d}.png".format(im_id, anno_i), + ) + assert osp.exists(mask_file), mask_file + assert osp.exists(mask_visib_file), mask_visib_file + # load mask visib + mask_single = mmcv.imread(mask_visib_file, "unchanged") + area = mask_single.sum() + if area < 3: # filter out too small or nearly invisible instances + self.num_instances_without_valid_segmentation += 1 + continue + mask_rle = binary_mask_to_rle(mask_single, compressed=True) + + # load mask full + mask_full = mmcv.imread(mask_file, "unchanged") + mask_full = mask_full.astype("bool") + mask_full_rle = binary_mask_to_rle(mask_full, compressed=True) + + inst = { + "category_id": cur_label, # 0-based label + "bbox": bbox_visib, + "bbox_obj": bbox_obj, + "bbox_mode": BoxMode.XYWH_ABS, + "pose": pose, + "quat": quat, + "trans": t, + "centroid_2d": proj, # absolute (cx, cy) + "segmentation": mask_rle, + "mask_full": mask_full_rle, # TODO: load as mask_full, rle + } + + model_info = self.models_info[str(obj_id)] + inst["model_info"] = model_info + # TODO: using full mask and full xyz + for key in ["bbox3d_and_center"]: + inst[key] = self.models[cur_label][key] + insts.append(inst) + if len(insts) == 0: # filter im without anno + continue + record["annotations"] = insts + dataset_dicts.append(record) + + if self.num_instances_without_valid_segmentation > 0: + logger.warning( + "Filtered out {} instances without valid segmentation. " + "There might be issues in your dataset generation process.".format( + self.num_instances_without_valid_segmentation + ) + ) + if self.num_instances_without_valid_box > 0: + logger.warning( + "Filtered out {} instances without valid box. " + "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box) + ) + ########################################################################## + if self.num_to_load > 0: + self.num_to_load = min(int(self.num_to_load), len(dataset_dicts)) + dataset_dicts = dataset_dicts[: self.num_to_load] + logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start)) + + mmcv.mkdir_or_exist(osp.dirname(cache_path)) + mmcv.dump(dataset_dicts, cache_path, protocol=4) + logger.info("Dumped dataset_dicts to {}".format(cache_path)) + return dataset_dicts + + @lazy_property + def models_info(self): + models_info_path = osp.join(self.models_root, "models_info.json") + assert osp.exists(models_info_path), models_info_path + models_info = mmcv.load(models_info_path) # key is str(obj_id) + return models_info + + @lazy_property + def models(self): + """Load models into a list.""" + cache_path = osp.join(self.models_root, f"models_{self.name}.pkl") + if osp.exists(cache_path) and self.use_cache: + # dprint("{}: load cached object models from {}".format(self.name, cache_path)) + return mmcv.load(cache_path) + + models = [] + for obj_name in self.objs: + model = inout.load_ply( + osp.join( + self.models_root, + f"obj_{ref.ycbv.obj2id[obj_name]:06d}.ply", + ), + vertex_scale=self.scale_to_meter, + ) + # NOTE: the bbox3d_and_center is not obtained from centered vertices + # for BOP models, not a big problem since they had been centered + model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"]) + + models.append(model) + logger.info("cache models to {}".format(cache_path)) + mmcv.dump(models, cache_path, protocol=4) + return models + + def image_aspect_ratio(self): + return self.width / self.height # 4/3 + + +########### register datasets ############################################################ + + +def get_ycbv_metadata(obj_names, ref_key): + """task specific metadata.""" + data_ref = ref.__dict__[ref_key] + + cur_sym_infos = {} # label based key + loaded_models_info = data_ref.get_models_info() + + for i, obj_name in enumerate(obj_names): + obj_id = data_ref.obj2id[obj_name] + model_info = loaded_models_info[str(obj_id)] + if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info: + sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01) + sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32) + else: + sym_info = None + cur_sym_infos[i] = sym_info + + meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos} + return meta + + +################################################################################ + +SPLITS_YCBV = dict( + ycbv_bop_test=dict( + name="ycbv_bop_test", + dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/test"), + models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/models"), + objs=ref.ycbv.objects, # selected objects + ann_file=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/test_targets_bop19.json"), + scale_to_meter=0.001, + with_masks=True, # (load masks but may not use it) + with_depth=True, # (load depth path here, but may not use it) + height=480, + width=640, + cache_dir=osp.join(PROJ_ROOT, ".cache"), + use_cache=True, + num_to_load=-1, + filter_invalid=False, + ref_key="ycbv", + ) +) + + +# single objs (num_class is from all objs) +for obj in ref.ycbv.objects: + name = "ycbv_bop_{}_test".format(obj) + select_objs = [obj] + if name not in SPLITS_YCBV: + SPLITS_YCBV[name] = dict( + name=name, + dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/test"), + models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/models"), + objs=[obj], # only this obj + select_objs=select_objs, # selected objects + ann_file=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/test_targets_bop19.json"), + scale_to_meter=0.001, + with_masks=True, # (load masks but may not use it) + with_depth=True, # (load depth path here, but may not use it) + height=480, + width=640, + cache_dir=osp.join(PROJ_ROOT, ".cache"), + use_cache=True, + num_to_load=-1, + filter_invalid=False, + ref_key="ycbv", + ) + + +def register_with_name_cfg(name, data_cfg=None): + """Assume pre-defined datasets live in `./datasets`. + + Args: + name: datasnet_name, + data_cfg: if name is in existing SPLITS, use pre-defined data_cfg + otherwise requires data_cfg + data_cfg can be set in cfg.DATA_CFG.name + """ + dprint("register dataset: {}".format(name)) + if name in SPLITS_YCBV: + used_cfg = SPLITS_YCBV[name] + else: + assert data_cfg is not None, f"dataset name {name} is not registered" + used_cfg = data_cfg + DatasetCatalog.register(name, YCBV_BOP_TEST_Dataset(used_cfg)) + # something like eval_types + MetadataCatalog.get(name).set( + id="ycbv", # NOTE: for pvnet to determine module + ref_key=used_cfg["ref_key"], + objs=used_cfg["objs"], + eval_error_types=["ad", "rete", "proj"], + evaluator_type="bop", + **get_ycbv_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]), + ) + + +def get_available_datasets(): + return list(SPLITS_YCBV.keys()) + + +#### tests ############################################### +def test_vis(): + dset_name = sys.argv[1] + assert dset_name in DatasetCatalog.list() + + meta = MetadataCatalog.get(dset_name) + dprint("MetadataCatalog: ", meta) + objs = meta.objs + + t_start = time.perf_counter() + dicts = DatasetCatalog.get(dset_name) + logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start)) + + dirname = "output/{}-data-vis".format(dset_name) + os.makedirs(dirname, exist_ok=True) + for d in dicts: + img = read_image_mmcv(d["file_name"], format="BGR") + depth = mmcv.imread(d["depth_file"], "unchanged") / d["depth_factor"] + + imH, imW = img.shape[:2] + annos = d["annotations"] + masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos] + bboxes = [anno["bbox"] for anno in annos] + bbox_modes = [anno["bbox_mode"] for anno in annos] + bboxes_xyxy = np.array( + [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)] + ) + kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos] + quats = [anno["quat"] for anno in annos] + transes = [anno["trans"] for anno in annos] + Rs = [quat2mat(quat) for quat in quats] + # 0-based label + cat_ids = [anno["category_id"] for anno in annos] + K = d["cam"] + kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)] + # # TODO: visualize pose and keypoints + labels = [objs[cat_id] for cat_id in cat_ids] + # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels) + img_vis = vis_image_mask_bbox_cv2(img, masks, bboxes=bboxes_xyxy, labels=labels) + img_vis_kpts2d = img.copy() + for anno_i in range(len(annos)): + img_vis_kpts2d = misc.draw_projected_box3d(img_vis_kpts2d, kpts_2d[anno_i]) + grid_show( + [ + img[:, :, [2, 1, 0]], + img_vis[:, :, [2, 1, 0]], + img_vis_kpts2d[:, :, [2, 1, 0]], + depth, + ], + [f"img:{d['file_name']}", "vis_img", "img_vis_kpts2d", "depth"], + row=2, + col=2, + ) + + +if __name__ == "__main__": + """Test the dataset loader. + + Usage: + python -m core.datasets.ycbv_bop_test dataset_name + """ + from lib.vis_utils.image import grid_show + from lib.utils.setup_logger import setup_my_logger + import detectron2.data.datasets # noqa # add pre-defined metadata + from core.utils.data_utils import read_image_mmcv + from lib.vis_utils.image import vis_image_mask_bbox_cv2 + + print("sys.argv:", sys.argv) + logger = setup_my_logger(name="core") + register_with_name_cfg(sys.argv[1]) + print("dataset catalog: ", DatasetCatalog.list()) + test_vis() diff --git a/core/gdrn_modeling/datasets/fruitbin_d2.py b/core/gdrn_modeling/datasets/fruitbin_d2.py new file mode 100755 index 0000000000000000000000000000000000000000..385f463603ce13afb9ac5c8e022a8fa3907b688e --- /dev/null +++ b/core/gdrn_modeling/datasets/fruitbin_d2.py @@ -0,0 +1,741 @@ +import hashlib +import copy +import logging +import os +import os.path as osp +import sys + +cur_dir = osp.dirname(osp.abspath(__file__)) +PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../..")) +sys.path.insert(0, PROJ_ROOT) +import time +from collections import OrderedDict +import mmcv +import numpy as np +from tqdm import tqdm +from transforms3d.quaternions import mat2quat, quat2mat +import ref +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.structures import BoxMode +from lib.pysixd import inout, misc +from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask +from lib.utils.utils import dprint, iprint, lazy_property + + +logger = logging.getLogger(__name__) +DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets")) + + +class YCBV_Dataset: + """use image_sets(scene/image_id) and image root to get data; Here we use + bop models, which are center aligned and have some offsets compared to + original models.""" + + def __init__(self, data_cfg): + """ + Set with_depth and with_masks default to True, + and decide whether to load them into dataloader/network later + with_masks: + """ + self.name = data_cfg["name"] + self.data_cfg = data_cfg + + self.objs = data_cfg["objs"] # selected objects + + self.ann_files = data_cfg["ann_files"] # provide scene/im_id list + self.image_prefixes = data_cfg["image_prefixes"] # image root + + self.dataset_root = data_cfg["dataset_root"] # BOP_DATASETS/ycbv/ + assert osp.exists(self.dataset_root), self.dataset_root + self.models_root = data_cfg["models_root"] # BOP_DATASETS/ycbv/models + self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001 + + self.with_masks = data_cfg["with_masks"] # True (load masks but may not use it) + self.with_depth = data_cfg["with_depth"] # True (load depth path here, but may not use it) + self.with_xyz = data_cfg["with_xyz"] + + self.height = data_cfg["height"] # 480 + self.width = data_cfg["width"] # 640 + + self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache")) # .cache + self.use_cache = data_cfg.get("use_cache", True) + self.num_to_load = data_cfg["num_to_load"] # -1 + self.filter_invalid = data_cfg["filter_invalid"] + + self.align_K_by_change_pose = data_cfg.get("align_K_by_change_pose", False) + # default: 0000~0059 and synt + self.cam = np.array( + [ + [1066.778, 0.0, 312.9869], + [0.0, 1067.487, 241.3109], + [0.0, 0.0, 1.0], + ], + dtype="float32", + ) + # 0060~0091 + # cmu_cam = np.array([[1077.836, 0.0, 323.7872], [0.0, 1078.189, 279.6921], [0.0, 0.0, 1.0]], dtype='float32') + ################################################## + + # NOTE: careful! Only the selected objects + self.cat_ids = [cat_id for cat_id, obj_name in ref.ycbv.id2obj.items() if obj_name in self.objs] + # map selected objs to [0, num_objs-1] + self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map + self.label2cat = {label: cat for cat, label in self.cat2label.items()} + self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs)) + ########################################################## + + def _load_from_idx_file(self, idx_file, image_root): + """ + idx_file: the scene/image ids + image_root/scene contains: + scene_gt.json + scene_gt_info.json + scene_camera.json + """ + xyz_root = osp.join(image_root, "xyz_crop") + scene_gt_dicts = {} + scene_gt_info_dicts = {} + scene_cam_dicts = {} + scene_im_ids = [] # store tuples of (scene_id, im_id) + with open(idx_file, "r") as f: + for line in f: + line_split = line.strip("\r\n").split("/") + scene_id = int(line_split[0]) + im_id = int(line_split[1]) + scene_im_ids.append((scene_id, im_id)) + if scene_id not in scene_gt_dicts: + scene_gt_file = osp.join(image_root, f"{scene_id:06d}/scene_gt.json") + assert osp.exists(scene_gt_file), scene_gt_file + scene_gt_dicts[scene_id] = mmcv.load(scene_gt_file) + + if scene_id not in scene_gt_info_dicts: + scene_gt_info_file = osp.join(image_root, f"{scene_id:06d}/scene_gt_info.json") + assert osp.exists(scene_gt_info_file), scene_gt_info_file + scene_gt_info_dicts[scene_id] = mmcv.load(scene_gt_info_file) + + if scene_id not in scene_cam_dicts: + scene_cam_file = osp.join(image_root, f"{scene_id:06d}/scene_camera.json") + assert osp.exists(scene_cam_file), scene_cam_file + scene_cam_dicts[scene_id] = mmcv.load(scene_cam_file) + ###################################################### + scene_im_ids = sorted(scene_im_ids) # sort to make it reproducible + dataset_dicts = [] + + num_instances_without_valid_segmentation = 0 + num_instances_without_valid_box = 0 + + for (scene_id, im_id) in tqdm(scene_im_ids): + rgb_path = osp.join(image_root, f"{scene_id:06d}/rgb/{im_id:06d}.png") + assert osp.exists(rgb_path), rgb_path + str_im_id = str(im_id) + + scene_im_id = f"{scene_id}/{im_id}" + + # for ycbv/tless, load cam K from image infos + cam_anno = np.array(scene_cam_dicts[scene_id][str_im_id]["cam_K"], dtype=np.float32).reshape(3, 3) + adapth_this_K = False + if self.align_K_by_change_pose: + if (cam_anno != self.cam).any(): + adapth_this_K = True + cam_anno_ori = cam_anno.copy() + cam_anno = self.cam + + depth_factor = 1000.0 / scene_cam_dicts[scene_id][str_im_id]["depth_scale"] + # dprint(record['cam']) + if "/train_synt/" in rgb_path: + img_type = "syn" + else: + img_type = "real" + record = { + "dataset_name": self.name, + "file_name": osp.relpath(rgb_path, PROJ_ROOT), + "height": self.height, + "width": self.width, + "image_id": self._unique_im_id, + "scene_im_id": scene_im_id, # for evaluation + "cam": cam_anno, # self.cam, + "depth_factor": depth_factor, + "img_type": img_type, + } + + if self.with_depth: + depth_file = osp.join(image_root, f"{scene_id:06d}/depth/{im_id:06d}.png") + assert osp.exists(depth_file), depth_file + record["depth_file"] = osp.relpath(depth_file, PROJ_ROOT) + + insts = [] + anno_dict_list = scene_gt_dicts[scene_id][str(im_id)] + info_dict_list = scene_gt_info_dicts[scene_id][str(im_id)] + for anno_i, anno in enumerate(anno_dict_list): + info = info_dict_list[anno_i] + obj_id = anno["obj_id"] + if obj_id not in self.cat_ids: + continue + # 0-based label now + cur_label = self.cat2label[obj_id] + ################ pose ########################### + R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3) + trans = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0 # mm->m + pose = np.hstack([R, trans.reshape(3, 1)]) + if adapth_this_K: + # pose_uw = inv(K_uw) @ K_cmu @ pose_cmu + pose = np.linalg.inv(cam_anno) @ cam_anno_ori @ pose + # R = pose[:3, :3] + trans = pose[:3, 3] + + quat = mat2quat(pose[:3, :3]) + + ############# bbox ############################ + bbox = info["bbox_obj"] + x1, y1, w, h = bbox + x2 = x1 + w + y2 = y1 + h + x1 = max(min(x1, self.width), 0) + y1 = max(min(y1, self.height), 0) + x2 = max(min(x2, self.width), 0) + y2 = max(min(y2, self.height), 0) + bbox = [x1, y1, x2, y2] + if self.filter_invalid: + bw = bbox[2] - bbox[0] + bh = bbox[3] - bbox[1] + if bh <= 1 or bw <= 1: + num_instances_without_valid_box += 1 + continue + + ############## mask ####################### + if self.with_masks: # either list[list[float]] or dict(RLE) + mask_visib_file = osp.join( + image_root, + f"{scene_id:06d}/mask_visib/{im_id:06d}_{anno_i:06d}.png", + ) + assert osp.exists(mask_visib_file), mask_visib_file + mask = mmcv.imread(mask_visib_file, "unchanged") + area = mask.sum() + if area < 30 and self.filter_invalid: + num_instances_without_valid_segmentation += 1 + continue + mask_rle = binary_mask_to_rle(mask) + + mask_full_file = osp.join( + image_root, + f"{scene_id:06d}/mask/{im_id:06d}_{anno_i:06d}.png", + ) + assert osp.exists(mask_full_file), mask_full_file + + # load mask full + mask_full = mmcv.imread(mask_full_file, "unchanged") + mask_full = mask_full.astype("bool") + mask_full_rle = binary_mask_to_rle(mask_full, compressed=True) + + proj = (self.cam @ trans.T).T # NOTE: use self.cam here + proj = proj[:2] / proj[2] + + inst = { + "category_id": cur_label, # 0-based label + "bbox": bbox, + "bbox_obj": bbox, + "bbox_mode": BoxMode.XYXY_ABS, + "pose": pose, + "quat": quat, + "trans": trans, + "centroid_2d": proj, # absolute (cx, cy) + "segmentation": mask_rle, + "mask_full": mask_full_rle, + } + + if self.with_xyz: + xyz_path = osp.join( + xyz_root, + f"{scene_id:06d}/{im_id:06d}_{anno_i:06d}-xyz.pkl", + ) + # assert osp.exists(xyz_path), xyz_path + inst["xyz_path"] = xyz_path + + model_info = self.models_info[str(obj_id)] + inst["model_info"] = model_info + # TODO: using full mask and full xyz + for key in ["bbox3d_and_center"]: + inst[key] = self.models[cur_label][key] + insts.append(inst) + if len(insts) == 0: # and self.filter_invalid: + continue + record["annotations"] = insts + dataset_dicts.append(record) + self._unique_im_id += 1 + + if num_instances_without_valid_segmentation > 0: + logger.warning( + "Filtered out {} instances without valid segmentation. " + "There might be issues in your dataset generation process.".format( + num_instances_without_valid_segmentation + ) + ) + if num_instances_without_valid_box > 0: + logger.warning( + "Filtered out {} instances without valid box. " + "There might be issues in your dataset generation process.".format(num_instances_without_valid_box) + ) + return dataset_dicts + + def __call__(self): # YCBV_Dataset + """Load light-weight instance annotations of all images into a list of + dicts in Detectron2 format. + + Do not load heavy data into memory in this file, since we will + load the annotations of all images into memory. + """ + # cache the dataset_dicts to avoid loading masks from files + hashed_file_name = hashlib.md5( + ( + "".join([str(fn) for fn in self.objs]) + + "dataset_dicts_{}_{}_{}_{}_{}_{}".format( + self.name, + self.dataset_root, + self.with_masks, + self.with_depth, + self.with_xyz, + __name__, + ) + ).encode("utf-8") + ).hexdigest() + cache_path = osp.join( + self.cache_dir, + "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name), + ) + + if osp.exists(cache_path) and self.use_cache: + logger.info("load cached dataset dicts from {}".format(cache_path)) + return mmcv.load(cache_path) + + logger.info("loading dataset dicts: {}".format(self.name)) + t_start = time.perf_counter() + dataset_dicts = [] + self._unique_im_id = 0 + for ann_file, image_root in zip(self.ann_files, self.image_prefixes): + # logger.info("loading coco json: {}".format(ann_file)) + dataset_dicts.extend(self._load_from_idx_file(ann_file, image_root)) + + ########################################################################## + if self.num_to_load > 0: + self.num_to_load = min(int(self.num_to_load), len(dataset_dicts)) + dataset_dicts = dataset_dicts[: self.num_to_load] + logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start)) + + mmcv.mkdir_or_exist(osp.dirname(cache_path)) + mmcv.dump(dataset_dicts, cache_path, protocol=4) + logger.info("Dumped dataset_dicts to {}".format(cache_path)) + return dataset_dicts + + @lazy_property + def models_info(self): + models_info_path = osp.join(self.models_root, "models_info.json") + assert osp.exists(models_info_path), models_info_path + models_info = mmcv.load(models_info_path) # key is str(obj_id) + return models_info + + @lazy_property + def models(self): + """Load models into a list.""" + cache_path = osp.join(self.models_root, "models_{}.pkl".format(self.name)) + if osp.exists(cache_path) and self.use_cache: + # dprint("{}: load cached object models from {}".format(self.name, cache_path)) + return mmcv.load(cache_path) + + models = [] + for obj_name in self.objs: + model = inout.load_ply( + osp.join( + self.models_root, + f"obj_{ref.ycbv.obj2id[obj_name]:06d}.ply", + ), + vertex_scale=self.scale_to_meter, + ) + # NOTE: the bbox3d_and_center is not obtained from centered vertices + # for BOP models, not a big problem since they had been centered + model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"]) + + models.append(model) + logger.info("cache models to {}".format(cache_path)) + mmcv.dump(models, cache_path, protocol=4) + return models + + def image_aspect_ratio(self): + return self.width / self.height # 4/3 + + +########### register datasets ############################################################ + + +def get_ycbv_metadata(obj_names, ref_key): + """task specific metadata.""" + data_ref = ref.__dict__[ref_key] + + cur_sym_infos = {} # label based key + loaded_models_info = data_ref.get_models_info() + + for i, obj_name in enumerate(obj_names): + obj_id = data_ref.obj2id[obj_name] + model_info = loaded_models_info[str(obj_id)] + if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info: + sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01) + sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32) + else: + sym_info = None + cur_sym_infos[i] = sym_info + + meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos} + return meta + + +ycbv_model_root = "BOP_DATASETS/ycbv/models/" +################################################################################ +default_cfg = dict( + # name="ycbv_train_real", + dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/"), + models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/models"), # models_simple + objs=ref.ycbv.objects, # all objects + # NOTE: this contains all classes + # ann_files=[osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/image_sets/train.txt")], + # image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_real")], + scale_to_meter=0.001, + with_masks=True, # (load masks but may not use it) + with_depth=True, # (load depth path here, but may not use it) + with_xyz=True, + height=480, + width=640, + align_K_by_change_pose=False, + cache_dir=osp.join(PROJ_ROOT, ".cache"), + use_cache=True, + num_to_load=-1, + filter_invalid=True, + ref_key="ycbv", +) +SPLITS_YCBV = {} +update_cfgs = { + "ycbv_train_real": { + "ann_files": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/image_sets/train.txt")], + "image_prefixes": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_real")], + }, + "ycbv_train_real_aligned_Kuw": { + "ann_files": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/image_sets/train.txt")], + "image_prefixes": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_real")], + "align_K_by_change_pose": True, + }, + "ycbv_train_real_uw": { + "ann_files": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/image_sets/train_real_uw.txt")], + "image_prefixes": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_real")], + }, + "ycbv_train_real_uw_every10": { + "ann_files": [ + osp.join( + DATASETS_ROOT, + "BOP_DATASETS/ycbv/image_sets/train_real_uw_every10.txt", + ) + ], + "image_prefixes": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_real")], + }, + "ycbv_train_real_cmu": { + "ann_files": [ + osp.join( + DATASETS_ROOT, + "BOP_DATASETS/ycbv/image_sets/train_real_cmu.txt", + ) + ], + "image_prefixes": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_real")], + }, + "ycbv_train_real_cmu_aligned_Kuw": { + "ann_files": [ + osp.join( + DATASETS_ROOT, + "BOP_DATASETS/ycbv/image_sets/train_real_cmu.txt", + ) + ], + "image_prefixes": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_real")], + "align_K_by_change_pose": True, + }, + "ycbv_train_synt": { + "ann_files": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/image_sets/train_synt.txt")], + "image_prefixes": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_synt")], + }, + "ycbv_train_synt_50k": { + "ann_files": [ + osp.join( + DATASETS_ROOT, + "BOP_DATASETS/ycbv/image_sets/train_synt_50k.txt", + ) + ], + "image_prefixes": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_synt")], + }, + "ycbv_train_synt_30k": { + "ann_files": [ + osp.join( + DATASETS_ROOT, + "BOP_DATASETS/ycbv/image_sets/train_synt_30k.txt", + ) + ], + "image_prefixes": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_synt")], + }, + "ycbv_train_synt_100": { + "ann_files": [ + osp.join( + DATASETS_ROOT, + "BOP_DATASETS/ycbv/image_sets/train_synt_100.txt", + ) + ], + "image_prefixes": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_synt")], + }, + "ycbv_test": { + "ann_files": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/image_sets/keyframe.txt")], + "image_prefixes": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/test")], + "with_xyz": False, + "filter_invalid": False, + }, +} +for name, update_cfg in update_cfgs.items(): + used_cfg = copy.deepcopy(default_cfg) + used_cfg["name"] = name + used_cfg.update(update_cfg) + num_to_load = -1 + if "_100" in name: + num_to_load = 100 + used_cfg["num_to_load"] = num_to_load + SPLITS_YCBV[name] = used_cfg + +# single object splits ###################################################### +for obj in ref.ycbv.objects: + for split in [ + "train_real", + "train_real_aligned_Kuw", + "train_real_uw", + "train_real_uw_every10", + "train_real_cmu", + "train_real_cmu_aligned_Kuw", + "train_synt", + "train_synt_30k", + "test", + ]: + name = "ycbv_{}_{}".format(obj, split) + if split in [ + "train_real", + "train_real_aligned_Kuw", + "train_real_uw", + "train_real_uw_every10", + "train_real_cmu", + "train_real_cmu_aligned_Kuw", + "train_synt", + "train_synt_30k", + ]: + filter_invalid = True + with_xyz = True + elif split in ["test"]: + filter_invalid = False + with_xyz = False + else: + raise ValueError("{}".format(split)) + + if split in ["train_real_aligned_Kuw", "train_real_cmu_aligned_Kuw"]: + align_K_by_change_pose = True + else: + align_K_by_change_pose = False + + split_idx_file_dict = { + "train_real": ("train_real", "train.txt"), + "train_real_aligned_Kuw": ("train_real", "train.txt"), + "train_real_uw": ("train_real", "train_real_uw.txt"), + "train_real_uw_every10": ( + "train_real", + "train_real_uw_every10.txt", + ), + "train_real_cmu": ("train_real", "train_real_cmu.txt"), + "train_real_cmu_aligned_Kuw": ("train_real", "train_real_cmu.txt"), + "train_synt": ("train_synt", "train_synt.txt"), + "train_synt_30k": ("train_synt", "train_synt_30k.txt"), + "test": ("test", "keyframe.txt"), + } + root_name, idx_file = split_idx_file_dict[split] + + if name not in SPLITS_YCBV: + SPLITS_YCBV[name] = dict( + name=name, + dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/"), + models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/models"), + objs=[obj], + ann_files=[ + osp.join( + DATASETS_ROOT, + "BOP_DATASETS/ycbv/image_sets/{}".format(idx_file), + ) + ], + image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/{}".format(root_name))], + scale_to_meter=0.001, + with_masks=True, # (load masks but may not use it) + with_depth=True, # (load depth path here, but may not use it) + with_xyz=with_xyz, + height=480, + width=640, + align_K_by_change_pose=align_K_by_change_pose, + cache_dir=osp.join(PROJ_ROOT, ".cache"), + use_cache=True, + num_to_load=-1, + filter_invalid=filter_invalid, + ref_key="ycbv", + ) + + +def register_with_name_cfg(name, data_cfg=None): + """Assume pre-defined datasets live in `./datasets`. + + Args: + name: datasnet_name, + data_cfg: if name is in existing SPLITS, use pre-defined data_cfg + otherwise requires data_cfg + data_cfg can be set in cfg.DATA_CFG.name + """ + dprint("register dataset: {}".format(name)) + if name in SPLITS_YCBV: + used_cfg = SPLITS_YCBV[name] + else: + assert ( + data_cfg is not None + ), f"dataset name {name} is not registered. available datasets: {list(SPLITS_YCBV.keys())}" + used_cfg = data_cfg + DatasetCatalog.register(name, YCBV_Dataset(used_cfg)) + # something like eval_types + MetadataCatalog.get(name).set( + id="ycbv", # NOTE: for pvnet to determine module + ref_key=used_cfg["ref_key"], + objs=used_cfg["objs"], + eval_error_types=["ad", "rete", "proj"], + evaluator_type="bop", + **get_ycbv_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]), + ) + + +def get_available_datasets(): + return list(SPLITS_YCBV.keys()) + + +#### tests ############################################### +def test_vis(): + # python -m core.datasets.ycbv_d2 ycbv_test + dataset_name = sys.argv[1] + meta = MetadataCatalog.get(dataset_name) + t_start = time.perf_counter() + dicts = DatasetCatalog.get(dataset_name) + with_xyz = False if "test" in dataset_name else True + logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start)) + + dirname = "output/ycbv_test-data-vis" + os.makedirs(dirname, exist_ok=True) + objs = meta.objs + for d in dicts: + img = read_image_mmcv(d["file_name"], format="BGR") + depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0 + + imH, imW = img.shape[:2] + annos = d["annotations"] + masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos] + bboxes = [anno["bbox"] for anno in annos] + bbox_modes = [anno["bbox_mode"] for anno in annos] + bboxes_xyxy = np.array( + [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)] + ) + kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos] + quats = [anno["quat"] for anno in annos] + transes = [anno["trans"] for anno in annos] + Rs = [quat2mat(quat) for quat in quats] + # 0-based label + cat_ids = [anno["category_id"] for anno in annos] + K = d["cam"] + kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)] + # # TODO: visualize pose and keypoints + labels = [objs[cat_id] for cat_id in cat_ids] + for _i in range(len(annos)): + img_vis = vis_image_mask_bbox_cv2( + img, + masks[_i : _i + 1], + bboxes=bboxes_xyxy[_i : _i + 1], + labels=labels[_i : _i + 1], + ) + img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i]) + if with_xyz: + xyz_path = annos[_i]["xyz_path"] + xyz_info = mmcv.load(xyz_path) + x1, y1, x2, y2 = xyz_info["xyxy"] + xyz_crop = xyz_info["xyz_crop"].astype(np.float32) + xyz = np.zeros((imH, imW, 3), dtype=np.float32) + xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop + xyz_show = get_emb_show(xyz) + xyz_crop_show = get_emb_show(xyz_crop) + img_xyz = img.copy() / 255.0 + mask_xyz = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) | (xyz[:, :, 2] != 0)).astype("uint8") + fg_idx = np.where(mask_xyz != 0) + img_xyz[fg_idx[0], fg_idx[1], :] = ( + 0.5 * xyz_show[fg_idx[0], fg_idx[1], :3] + 0.5 * img_xyz[fg_idx[0], fg_idx[1], :] + ) + img_xyz_crop = img_xyz[y1 : y2 + 1, x1 : x2 + 1, :] + img_vis_crop = img_vis[y1 : y2 + 1, x1 : x2 + 1, :] + # diff mask + diff_mask_xyz = np.abs(masks[_i] - mask_xyz)[y1 : y2 + 1, x1 : x2 + 1] + + grid_show( + [ + img[:, :, [2, 1, 0]], + img_vis[:, :, [2, 1, 0]], + img_vis_kpts2d[:, :, [2, 1, 0]], + depth, + # xyz_show, + diff_mask_xyz, + xyz_crop_show, + img_xyz[:, :, [2, 1, 0]], + img_xyz_crop[:, :, [2, 1, 0]], + img_vis_crop[:, :, ::-1], + ], + [ + "img", + "vis_img", + "img_vis_kpts2d", + "depth", + "diff_mask_xyz", + "xyz_crop_show", + "img_xyz", + "img_xyz_crop", + "img_vis_crop", + ], + row=3, + col=3, + ) + else: + grid_show( + [ + img[:, :, [2, 1, 0]], + img_vis[:, :, [2, 1, 0]], + img_vis_kpts2d[:, :, [2, 1, 0]], + depth, + ], + ["img", "vis_img", "img_vis_kpts2d", "depth"], + row=2, + col=2, + ) + + +if __name__ == "__main__": + """Test the dataset loader. + + Usage: + python -m this_module dataset_name + "dataset_name" can be any pre-registered ones + """ + from lib.vis_utils.image import grid_show + from lib.utils.setup_logger import setup_my_logger + + import detectron2.data.datasets # noqa # add pre-defined metadata + from lib.vis_utils.image import vis_image_mask_bbox_cv2 + from core.utils.utils import get_emb_show + from core.utils.data_utils import read_image_mmcv + + print("sys.argv:", sys.argv) + logger = setup_my_logger(name="core") + register_with_name_cfg(sys.argv[1]) + print("dataset catalog: ", DatasetCatalog.list()) + test_vis() diff --git a/core/gdrn_modeling/datasets/fruitbin_pbr.py b/core/gdrn_modeling/datasets/fruitbin_pbr.py new file mode 100644 index 0000000000000000000000000000000000000000..de7ecb2fa0340ebf2b7923385c0a83edce420ca8 --- /dev/null +++ b/core/gdrn_modeling/datasets/fruitbin_pbr.py @@ -0,0 +1,491 @@ +import hashlib +import logging +import os +import os.path as osp +import sys + +cur_dir = osp.dirname(osp.abspath(__file__)) +PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../..")) +sys.path.insert(0, PROJ_ROOT) +import time +from collections import OrderedDict +import mmcv +import numpy as np +from tqdm import tqdm +from transforms3d.quaternions import mat2quat, quat2mat +import ref +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.structures import BoxMode +from lib.pysixd import inout, misc +from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask +from lib.utils.utils import dprint, iprint, lazy_property + + +logger = logging.getLogger(__name__) +DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets")) + + +class YCBV_PBR_Dataset: + def __init__(self, data_cfg): + """ + Set with_depth and with_masks default to True, + and decide whether to load them into dataloader/network later + with_masks: + """ + self.name = data_cfg["name"] + self.data_cfg = data_cfg + + self.objs = data_cfg["objs"] # selected objects + + self.dataset_root = data_cfg.get( + "dataset_root", + osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_pbr"), + ) + self.xyz_root = data_cfg.get("xyz_root", osp.join(self.dataset_root, "xyz_crop")) + assert osp.exists(self.dataset_root), self.dataset_root + self.models_root = data_cfg["models_root"] # BOP_DATASETS/ycbv/models + self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001 + + self.with_masks = data_cfg["with_masks"] + self.with_depth = data_cfg["with_depth"] + + self.height = data_cfg["height"] # 480 + self.width = data_cfg["width"] # 640 + + self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache")) # .cache + self.use_cache = data_cfg.get("use_cache", True) + self.num_to_load = data_cfg["num_to_load"] # -1 + self.filter_invalid = data_cfg.get("filter_invalid", True) + ################################################## + + # NOTE: careful! Only the selected objects + self.cat_ids = [cat_id for cat_id, obj_name in ref.ycbv.id2obj.items() if obj_name in self.objs] + # map selected objs to [0, num_objs-1] + self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map + self.label2cat = {label: cat for cat, label in self.cat2label.items()} + self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs)) + ########################################################## + + self.scenes = [f"{i:06d}" for i in range(50)] + + def __call__(self): + """Load light-weight instance annotations of all images into a list of + dicts in Detectron2 format. + + Do not load heavy data into memory in this file, since we will + load the annotations of all images into memory. + """ + # cache the dataset_dicts to avoid loading masks from files + hashed_file_name = hashlib.md5( + ( + "".join([str(fn) for fn in self.objs]) + + "dataset_dicts_{}_{}_{}_{}_{}".format( + self.name, + self.dataset_root, + self.with_masks, + self.with_depth, + __name__, + ) + ).encode("utf-8") + ).hexdigest() + cache_path = osp.join( + self.cache_dir, + "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name), + ) + + if osp.exists(cache_path) and self.use_cache: + logger.info("load cached dataset dicts from {}".format(cache_path)) + return mmcv.load(cache_path) + + t_start = time.perf_counter() + + logger.info("loading dataset dicts: {}".format(self.name)) + self.num_instances_without_valid_segmentation = 0 + self.num_instances_without_valid_box = 0 + dataset_dicts = [] # ###################################################### + # it is slow because of loading and converting masks to rle + for scene in tqdm(self.scenes): + scene_id = int(scene) + scene_root = osp.join(self.dataset_root, scene) + + gt_dict = mmcv.load(osp.join(scene_root, "scene_gt.json")) + gt_info_dict = mmcv.load(osp.join(scene_root, "scene_gt_info.json")) + cam_dict = mmcv.load(osp.join(scene_root, "scene_camera.json")) + + for str_im_id in tqdm(gt_dict, postfix=f"{scene_id}"): + int_im_id = int(str_im_id) + rgb_path = osp.join(scene_root, "rgb/{:06d}.jpg").format(int_im_id) + assert osp.exists(rgb_path), rgb_path + + depth_path = osp.join(scene_root, "depth/{:06d}.png".format(int_im_id)) + + scene_im_id = f"{scene_id}/{int_im_id}" + + K = np.array(cam_dict[str_im_id]["cam_K"], dtype=np.float32).reshape(3, 3) + depth_factor = 1000.0 / cam_dict[str_im_id]["depth_scale"] # 10000 + + record = { + "dataset_name": self.name, + "file_name": osp.relpath(rgb_path, PROJ_ROOT), + "depth_file": osp.relpath(depth_path, PROJ_ROOT), + "height": self.height, + "width": self.width, + "image_id": int_im_id, + "scene_im_id": scene_im_id, # for evaluation + "cam": K, + "depth_factor": depth_factor, + "img_type": "syn_pbr", # NOTE: has background + } + insts = [] + for anno_i, anno in enumerate(gt_dict[str_im_id]): + obj_id = anno["obj_id"] + if obj_id not in self.cat_ids: + continue + cur_label = self.cat2label[obj_id] # 0-based label + R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3) + t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0 + pose = np.hstack([R, t.reshape(3, 1)]) + quat = mat2quat(R).astype("float32") + + proj = (record["cam"] @ t.T).T + proj = proj[:2] / proj[2] + + bbox_visib = gt_info_dict[str_im_id][anno_i]["bbox_visib"] + bbox_obj = gt_info_dict[str_im_id][anno_i]["bbox_obj"] + x1, y1, w, h = bbox_visib + if self.filter_invalid: + if h <= 1 or w <= 1: + self.num_instances_without_valid_box += 1 + continue + + mask_file = osp.join( + scene_root, + "mask/{:06d}_{:06d}.png".format(int_im_id, anno_i), + ) + mask_visib_file = osp.join( + scene_root, + "mask_visib/{:06d}_{:06d}.png".format(int_im_id, anno_i), + ) + assert osp.exists(mask_file), mask_file + assert osp.exists(mask_visib_file), mask_visib_file + # load mask visib TODO: load both mask_visib and mask_full + mask_single = mmcv.imread(mask_visib_file, "unchanged") + area = mask_single.sum() + if area <= 64: # filter out too small or nearly invisible instances + self.num_instances_without_valid_segmentation += 1 + continue + mask_rle = binary_mask_to_rle(mask_single, compressed=True) + + # load mask full + mask_full = mmcv.imread(mask_file, "unchanged") + mask_full = mask_full.astype("bool") + mask_full_rle = binary_mask_to_rle(mask_full, compressed=True) + + visib_fract = gt_info_dict[str_im_id][anno_i].get("visib_fract", 1.0) + + xyz_path = osp.join(self.xyz_root, f"{scene_id:06d}/{int_im_id:06d}_{anno_i:06d}-xyz.pkl") + # assert osp.exists(xyz_path), xyz_path + inst = { + "category_id": cur_label, # 0-based label + "bbox": bbox_visib, # TODO: load both bbox_obj and bbox_visib + "bbox_obj": bbox_obj, + "bbox_mode": BoxMode.XYWH_ABS, + "pose": pose, + "quat": quat, + "trans": t, + "centroid_2d": proj, # absolute (cx, cy) + "segmentation": mask_rle, + "mask_full": mask_full_rle, # TODO: load as mask_full, rle + "visib_fract": visib_fract, + "xyz_path": xyz_path, + } + + model_info = self.models_info[str(obj_id)] + inst["model_info"] = model_info + # TODO: using full mask and full xyz + for key in ["bbox3d_and_center"]: + inst[key] = self.models[cur_label][key] + insts.append(inst) + if len(insts) == 0: # filter im without anno + continue + record["annotations"] = insts + dataset_dicts.append(record) + + if self.num_instances_without_valid_segmentation > 0: + logger.warning( + "Filtered out {} instances without valid segmentation. " + "There might be issues in your dataset generation process.".format( + self.num_instances_without_valid_segmentation + ) + ) + if self.num_instances_without_valid_box > 0: + logger.warning( + "Filtered out {} instances without valid box. " + "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box) + ) + ########################################################################## + if self.num_to_load > 0: + self.num_to_load = min(int(self.num_to_load), len(dataset_dicts)) + dataset_dicts = dataset_dicts[: self.num_to_load] + logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start)) + + mmcv.mkdir_or_exist(osp.dirname(cache_path)) + mmcv.dump(dataset_dicts, cache_path, protocol=4) + logger.info("Dumped dataset_dicts to {}".format(cache_path)) + return dataset_dicts + + @lazy_property + def models_info(self): + models_info_path = osp.join(self.models_root, "models_info.json") + assert osp.exists(models_info_path), models_info_path + models_info = mmcv.load(models_info_path) # key is str(obj_id) + return models_info + + @lazy_property + def models(self): + """Load models into a list.""" + cache_path = osp.join(self.models_root, "models_{}.pkl".format(self.name)) + if osp.exists(cache_path) and self.use_cache: + # dprint("{}: load cached object models from {}".format(self.name, cache_path)) + return mmcv.load(cache_path) + + models = [] + for obj_name in self.objs: + model = inout.load_ply( + osp.join( + self.models_root, + f"obj_{ref.ycbv.obj2id[obj_name]:06d}.ply", + ), + vertex_scale=self.scale_to_meter, + ) + # NOTE: the bbox3d_and_center is not obtained from centered vertices + # for BOP models, not a big problem since they had been centered + model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"]) + + models.append(model) + logger.info("cache models to {}".format(cache_path)) + mmcv.dump(models, cache_path, protocol=4) + return models + + def image_aspect_ratio(self): + return self.width / self.height # 4/3 + + +########### register datasets ############################################################ + + +def get_ycbv_metadata(obj_names, ref_key): + """task specific metadata.""" + data_ref = ref.__dict__[ref_key] + + cur_sym_infos = {} # label based key + loaded_models_info = data_ref.get_models_info() + + for i, obj_name in enumerate(obj_names): + obj_id = data_ref.obj2id[obj_name] + model_info = loaded_models_info[str(obj_id)] + if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info: + sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01) + sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32) + else: + sym_info = None + cur_sym_infos[i] = sym_info + + meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos} + return meta + + +ycbv_model_root = "BOP_DATASETS/ycbv/models/" +################################################################################ + + +SPLITS_YCBV_PBR = dict( + ycbv_train_pbr=dict( + name="ycbv_train_pbr", + objs=ref.ycbv.objects, # selected objects + dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_pbr"), + models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/models"), + xyz_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_pbr/xyz_crop"), + scale_to_meter=0.001, + with_masks=True, # (load masks but may not use it) + with_depth=True, # (load depth path here, but may not use it) + height=480, + width=640, + use_cache=True, + num_to_load=-1, + filter_invalid=True, + ref_key="ycbv", + ) +) + +# single obj splits +for obj in ref.ycbv.objects: + for split in ["train_pbr"]: + name = "ycbv_{}_{}".format(obj, split) + if split in ["train_pbr"]: + filter_invalid = True + elif split in ["test"]: + filter_invalid = False + else: + raise ValueError("{}".format(split)) + if name not in SPLITS_YCBV_PBR: + SPLITS_YCBV_PBR[name] = dict( + name=name, + objs=[obj], # only this obj + dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_pbr"), + models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/models"), + xyz_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_pbr/xyz_crop"), + scale_to_meter=0.001, + with_masks=True, # (load masks but may not use it) + with_depth=True, # (load depth path here, but may not use it) + height=480, + width=640, + use_cache=True, + num_to_load=-1, + filter_invalid=filter_invalid, + ref_key="ycbv", + ) + + +def register_with_name_cfg(name, data_cfg=None): + """Assume pre-defined datasets live in `./datasets`. + + Args: + name: datasnet_name, + data_cfg: if name is in existing SPLITS, use pre-defined data_cfg + otherwise requires data_cfg + data_cfg can be set in cfg.DATA_CFG.name + """ + dprint("register dataset: {}".format(name)) + if name in SPLITS_YCBV_PBR: + used_cfg = SPLITS_YCBV_PBR[name] + else: + assert data_cfg is not None, f"dataset name {name} is not registered" + used_cfg = data_cfg + DatasetCatalog.register(name, YCBV_PBR_Dataset(used_cfg)) + # something like eval_types + MetadataCatalog.get(name).set( + id="ycbv", # NOTE: for pvnet to determine module + ref_key=used_cfg["ref_key"], + objs=used_cfg["objs"], + eval_error_types=["ad", "rete", "proj"], + evaluator_type="bop", + **get_ycbv_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]), + ) + + +def get_available_datasets(): + return list(SPLITS_YCBV_PBR.keys()) + + +#### tests ############################################### +def test_vis(): + dset_name = sys.argv[1] + assert dset_name in DatasetCatalog.list() + + meta = MetadataCatalog.get(dset_name) + dprint("MetadataCatalog: ", meta) + objs = meta.objs + + t_start = time.perf_counter() + dicts = DatasetCatalog.get(dset_name) + logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start)) + + dirname = "output/{}-data-vis".format(dset_name) + os.makedirs(dirname, exist_ok=True) + for d in dicts: + img = read_image_mmcv(d["file_name"], format="BGR") + depth = mmcv.imread(d["depth_file"], "unchanged") / 10000.0 + + imH, imW = img.shape[:2] + annos = d["annotations"] + masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos] + bboxes = [anno["bbox"] for anno in annos] + bbox_modes = [anno["bbox_mode"] for anno in annos] + bboxes_xyxy = np.array( + [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)] + ) + kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos] + quats = [anno["quat"] for anno in annos] + transes = [anno["trans"] for anno in annos] + Rs = [quat2mat(quat) for quat in quats] + # 0-based label + cat_ids = [anno["category_id"] for anno in annos] + K = d["cam"] + kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)] + + labels = [objs[cat_id] for cat_id in cat_ids] + for _i in range(len(annos)): + img_vis = vis_image_mask_bbox_cv2( + img, + masks[_i : _i + 1], + bboxes=bboxes_xyxy[_i : _i + 1], + labels=labels[_i : _i + 1], + ) + img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i]) + xyz_path = annos[_i]["xyz_path"] + xyz_info = mmcv.load(xyz_path) + x1, y1, x2, y2 = xyz_info["xyxy"] + xyz_crop = xyz_info["xyz_crop"].astype(np.float32) + xyz = np.zeros((imH, imW, 3), dtype=np.float32) + xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop + xyz_show = get_emb_show(xyz) + xyz_crop_show = get_emb_show(xyz_crop) + img_xyz = img.copy() / 255.0 + mask_xyz = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) | (xyz[:, :, 2] != 0)).astype("uint8") + fg_idx = np.where(mask_xyz != 0) + img_xyz[fg_idx[0], fg_idx[1], :] = xyz_show[fg_idx[0], fg_idx[1], :3] + img_xyz_crop = img_xyz[y1 : y2 + 1, x1 : x2 + 1, :] + img_vis_crop = img_vis[y1 : y2 + 1, x1 : x2 + 1, :] + # diff mask + diff_mask_xyz = np.abs(masks[_i] - mask_xyz)[y1 : y2 + 1, x1 : x2 + 1] + + grid_show( + [ + img[:, :, [2, 1, 0]], + img_vis[:, :, [2, 1, 0]], + img_vis_kpts2d[:, :, [2, 1, 0]], + depth, + # xyz_show, + diff_mask_xyz, + xyz_crop_show, + img_xyz[:, :, [2, 1, 0]], + img_xyz_crop[:, :, [2, 1, 0]], + img_vis_crop, + ], + [ + "img", + "vis_img", + "img_vis_kpts2d", + "depth", + "diff_mask_xyz", + "xyz_crop_show", + "img_xyz", + "img_xyz_crop", + "img_vis_crop", + ], + row=3, + col=3, + ) + + +if __name__ == "__main__": + """Test the dataset loader. + + Usage: + python -m this_module ycbv_pbr_train + """ + from lib.vis_utils.image import grid_show + from lib.utils.setup_logger import setup_my_logger + + import detectron2.data.datasets # noqa # add pre-defined metadata + from lib.vis_utils.image import vis_image_mask_bbox_cv2 + from core.utils.utils import get_emb_show + from core.utils.data_utils import read_image_mmcv + + print("sys.argv:", sys.argv) + logger = setup_my_logger(name="core") + register_with_name_cfg(sys.argv[1]) + print("dataset catalog: ", DatasetCatalog.list()) + + test_vis() diff --git a/core/gdrn_modeling/tools/fruitbin/__init__.py b/core/gdrn_modeling/tools/fruitbin/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/core/gdrn_modeling/tools/fruitbin/convert_det_to_our_format.py b/core/gdrn_modeling/tools/fruitbin/convert_det_to_our_format.py new file mode 100644 index 0000000000000000000000000000000000000000..e5de9ead58a7b171868b0c0344d3afb7d4b5e918 --- /dev/null +++ b/core/gdrn_modeling/tools/fruitbin/convert_det_to_our_format.py @@ -0,0 +1,69 @@ +import mmcv +import sys +from tqdm import tqdm + +import json + +path = "/data2/lxy/Storage/bop22_results/yolovx_amodal/ycbv/yolox_x_640_ycbv_real_pbr_ycbv_bop_test.json" +ds = mmcv.load(path) + +outs = {} +for d in tqdm(ds): + scene_id = d["scene_id"] + image_id = d["image_id"] + scene_im_id = f"{scene_id}/{image_id}" + + obj_id = d["category_id"] + score = d["score"] + + bbox = d["bbox"] + time = d["time"] + + cur_dict = { + "bbox_est": bbox, + "obj_id": obj_id, + "score": score, + "time": time, + } + + if scene_im_id in outs.keys(): + outs[scene_im_id].append(cur_dict) + else: + outs[scene_im_id] = [cur_dict] + + +def save_json(path, content, sort=False): + """Saves the provided content to a JSON file. + + :param path: Path to the output JSON file. + :param content: Dictionary/list to save. + """ + with open(path, "w") as f: + + if isinstance(content, dict): + f.write("{\n") + if sort: + content_sorted = sorted(content.items(), key=lambda x: x[0]) + else: + content_sorted = content.items() + for elem_id, (k, v) in enumerate(content_sorted): + f.write(' "{}": {}'.format(k, json.dumps(v, sort_keys=True))) + if elem_id != len(content) - 1: + f.write(",") + f.write("\n") + f.write("}") + + elif isinstance(content, list): + f.write("[\n") + for elem_id, elem in enumerate(content): + f.write(" {}".format(json.dumps(elem, sort_keys=True))) + if elem_id != len(content) - 1: + f.write(",") + f.write("\n") + f.write("]") + + else: + json.dump(content, f, sort_keys=True) + + +save_json("datasets/BOP_DATASETS/ycbv/test/test_bboxes/yolox_x_640_ycbv_real_pbr_ycbv_bop_test.json", outs) diff --git a/det/yolox/data/datasets/fruitbin_bop_test.py b/det/yolox/data/datasets/fruitbin_bop_test.py new file mode 100644 index 0000000000000000000000000000000000000000..c9a9324ef3bfaeb485a7e798548fc6a0695615ad --- /dev/null +++ b/det/yolox/data/datasets/fruitbin_bop_test.py @@ -0,0 +1,454 @@ +import hashlib +import logging +import os +import os.path as osp +import sys + +cur_dir = osp.dirname(osp.abspath(__file__)) +PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../../..")) +sys.path.insert(0, PROJ_ROOT) +import time +from collections import OrderedDict +import mmcv +import numpy as np +from tqdm import tqdm +from transforms3d.quaternions import mat2quat, quat2mat +import ref +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.structures import BoxMode +from lib.pysixd import inout, misc +from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask +from lib.utils.utils import dprint, iprint, lazy_property + + +logger = logging.getLogger(__name__) +DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets")) + + +class YCBV_BOP_TEST_Dataset: + """ycbv bop test.""" + + def __init__(self, data_cfg): + """ + Set with_depth and with_masks default to True, + and decide whether to load them into dataloader/network later + with_masks: + """ + self.name = data_cfg["name"] + self.data_cfg = data_cfg + + self.objs = data_cfg["objs"] # selected objects + # all classes are self.objs, but this enables us to evaluate on selected objs + self.select_objs = data_cfg.get("select_objs", self.objs) + + self.ann_file = data_cfg["ann_file"] # json file with scene_id and im_id items + + self.dataset_root = data_cfg["dataset_root"] # BOP_DATASETS/ycbv/test + self.models_root = data_cfg["models_root"] # BOP_DATASETS/ycbv/models + self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001 + + self.with_masks = data_cfg["with_masks"] # True (load masks but may not use it) + self.with_depth = data_cfg["with_depth"] # True (load depth path here, but may not use it) + + self.height = data_cfg["height"] # 480 + self.width = data_cfg["width"] # 640 + + self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache")) # .cache + self.use_cache = data_cfg.get("use_cache", True) + self.num_to_load = data_cfg["num_to_load"] # -1 + self.filter_invalid = data_cfg["filter_invalid"] + ################################################## + + # NOTE: careful! Only the selected objects + self.cat_ids = [cat_id for cat_id, obj_name in ref.ycbv.id2obj.items() if obj_name in self.objs] + # map selected objs to [0, num_objs-1] + self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map + self.label2cat = {label: cat for cat, label in self.cat2label.items()} + self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs)) + ########################################################## + + def __call__(self): + """Load light-weight instance annotations of all images into a list of + dicts in Detectron2 format. + + Do not load heavy data into memory in this file, since we will + load the annotations of all images into memory. + """ + # cache the dataset_dicts to avoid loading masks from files + hashed_file_name = hashlib.md5( + ( + "".join([str(fn) for fn in self.objs]) + + "dataset_dicts_{}_{}_{}_{}_{}".format( + self.name, + self.dataset_root, + self.with_masks, + self.with_depth, + __name__, + ) + ).encode("utf-8") + ).hexdigest() + cache_path = osp.join( + self.cache_dir, + "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name), + ) + + if osp.exists(cache_path) and self.use_cache: + logger.info("load cached dataset dicts from {}".format(cache_path)) + return mmcv.load(cache_path) + + t_start = time.perf_counter() + + logger.info("loading dataset dicts: {}".format(self.name)) + self.num_instances_without_valid_segmentation = 0 + self.num_instances_without_valid_box = 0 + dataset_dicts = [] # ###################################################### + im_id_global = 0 + + if True: + targets = mmcv.load(self.ann_file) + scene_im_ids = [(item["scene_id"], item["im_id"]) for item in targets] + scene_im_ids = sorted(list(set(scene_im_ids))) + + # load infos for each scene + gt_dicts = {} + gt_info_dicts = {} + cam_dicts = {} + for scene_id, im_id in scene_im_ids: + scene_root = osp.join(self.dataset_root, f"{scene_id:06d}") + if scene_id not in gt_dicts: + gt_dicts[scene_id] = mmcv.load(osp.join(scene_root, "scene_gt.json")) + if scene_id not in gt_info_dicts: + gt_info_dicts[scene_id] = mmcv.load( + osp.join(scene_root, "scene_gt_info.json") + ) # bbox_obj, bbox_visib + if scene_id not in cam_dicts: + cam_dicts[scene_id] = mmcv.load(osp.join(scene_root, "scene_camera.json")) + + for scene_id, im_id in tqdm(scene_im_ids): + str_im_id = str(im_id) + scene_root = osp.join(self.dataset_root, f"{scene_id:06d}") + rgb_path = osp.join(scene_root, "rgb/{:06d}.png").format(im_id) + assert osp.exists(rgb_path), rgb_path + + depth_path = osp.join(scene_root, "depth/{:06d}.png".format(im_id)) + + scene_id = int(rgb_path.split("/")[-3]) + + cam = np.array(cam_dicts[scene_id][str_im_id]["cam_K"], dtype=np.float32).reshape(3, 3) + depth_factor = 1000.0 / cam_dicts[scene_id][str_im_id]["depth_scale"] + record = { + "dataset_name": self.name, + "file_name": osp.relpath(rgb_path, PROJ_ROOT), + "depth_file": osp.relpath(depth_path, PROJ_ROOT), + "depth_factor": depth_factor, + "height": self.height, + "width": self.width, + "image_id": im_id_global, # unique image_id in the dataset, for coco evaluation + "scene_im_id": "{}/{}".format(scene_id, im_id), # for evaluation + "cam": cam, + "img_type": "real", + } + im_id_global += 1 + insts = [] + for anno_i, anno in enumerate(gt_dicts[scene_id][str_im_id]): + obj_id = anno["obj_id"] + if ref.ycbv.id2obj[obj_id] not in self.select_objs: + continue + cur_label = self.cat2label[obj_id] # 0-based label + R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3) + t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0 + pose = np.hstack([R, t.reshape(3, 1)]) + quat = mat2quat(R).astype("float32") + + proj = (record["cam"] @ t.T).T + proj = proj[:2] / proj[2] + + bbox_visib = gt_info_dicts[scene_id][str_im_id][anno_i]["bbox_visib"] + bbox_obj = gt_info_dicts[scene_id][str_im_id][anno_i]["bbox_obj"] + x1, y1, w, h = bbox_visib + if self.filter_invalid: + if h <= 1 or w <= 1: + self.num_instances_without_valid_box += 1 + continue + + mask_file = osp.join( + scene_root, + "mask/{:06d}_{:06d}.png".format(im_id, anno_i), + ) + mask_visib_file = osp.join( + scene_root, + "mask_visib/{:06d}_{:06d}.png".format(im_id, anno_i), + ) + assert osp.exists(mask_file), mask_file + assert osp.exists(mask_visib_file), mask_visib_file + # load mask visib + mask_single = mmcv.imread(mask_visib_file, "unchanged") + area = mask_single.sum() + if area < 3: # filter out too small or nearly invisible instances + self.num_instances_without_valid_segmentation += 1 + continue + mask_rle = binary_mask_to_rle(mask_single, compressed=True) + + # load mask full + mask_full = mmcv.imread(mask_file, "unchanged") + mask_full = mask_full.astype("bool") + mask_full_rle = binary_mask_to_rle(mask_full, compressed=True) + + inst = { + "category_id": cur_label, # 0-based label + "bbox": bbox_obj, # TODO: load both bbox_obj and bbox_visib + "bbox_mode": BoxMode.XYWH_ABS, + "pose": pose, + "quat": quat, + "trans": t, + "centroid_2d": proj, # absolute (cx, cy) + "segmentation": mask_rle, + "mask_full": mask_full_rle, # TODO: load as mask_full, rle + } + + model_info = self.models_info[str(obj_id)] + inst["model_info"] = model_info + # TODO: using full mask and full xyz + for key in ["bbox3d_and_center"]: + inst[key] = self.models[cur_label][key] + insts.append(inst) + if len(insts) == 0: # filter im without anno + continue + record["annotations"] = insts + dataset_dicts.append(record) + + if self.num_instances_without_valid_segmentation > 0: + logger.warning( + "Filtered out {} instances without valid segmentation. " + "There might be issues in your dataset generation process.".format( + self.num_instances_without_valid_segmentation + ) + ) + if self.num_instances_without_valid_box > 0: + logger.warning( + "Filtered out {} instances without valid box. " + "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box) + ) + ########################################################################## + if self.num_to_load > 0: + self.num_to_load = min(int(self.num_to_load), len(dataset_dicts)) + dataset_dicts = dataset_dicts[: self.num_to_load] + logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start)) + + mmcv.mkdir_or_exist(osp.dirname(cache_path)) + mmcv.dump(dataset_dicts, cache_path, protocol=4) + logger.info("Dumped dataset_dicts to {}".format(cache_path)) + return dataset_dicts + + @lazy_property + def models_info(self): + models_info_path = osp.join(self.models_root, "models_info.json") + assert osp.exists(models_info_path), models_info_path + models_info = mmcv.load(models_info_path) # key is str(obj_id) + return models_info + + @lazy_property + def models(self): + """Load models into a list.""" + cache_path = osp.join(self.models_root, f"models_{self.name}.pkl") + if osp.exists(cache_path) and self.use_cache: + # dprint("{}: load cached object models from {}".format(self.name, cache_path)) + return mmcv.load(cache_path) + + models = [] + for obj_name in self.objs: + model = inout.load_ply( + osp.join( + self.models_root, + f"obj_{ref.ycbv.obj2id[obj_name]:06d}.ply", + ), + vertex_scale=self.scale_to_meter, + ) + # NOTE: the bbox3d_and_center is not obtained from centered vertices + # for BOP models, not a big problem since they had been centered + model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"]) + + models.append(model) + logger.info("cache models to {}".format(cache_path)) + mmcv.dump(models, cache_path, protocol=4) + return models + + def image_aspect_ratio(self): + return self.width / self.height # 4/3 + + +########### register datasets ############################################################ + + +def get_ycbv_metadata(obj_names, ref_key): + """task specific metadata.""" + data_ref = ref.__dict__[ref_key] + + cur_sym_infos = {} # label based key + loaded_models_info = data_ref.get_models_info() + + for i, obj_name in enumerate(obj_names): + obj_id = data_ref.obj2id[obj_name] + model_info = loaded_models_info[str(obj_id)] + if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info: + sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01) + sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32) + else: + sym_info = None + cur_sym_infos[i] = sym_info + + meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos} + return meta + + +################################################################################ + +SPLITS_YCBV = dict( + ycbv_bop_test=dict( + name="ycbv_bop_test", + dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/test"), + models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/models"), + objs=ref.ycbv.objects, # selected objects + ann_file=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/test_targets_bop19.json"), + scale_to_meter=0.001, + with_masks=True, # (load masks but may not use it) + with_depth=True, # (load depth path here, but may not use it) + height=480, + width=640, + cache_dir=osp.join(PROJ_ROOT, ".cache"), + use_cache=True, + num_to_load=-1, + filter_invalid=False, + ref_key="ycbv", + ) +) + + +# single objs (num_class is from all objs) +for obj in ref.ycbv.objects: + name = "ycbv_bop_{}_test".format(obj) + select_objs = [obj] + if name not in SPLITS_YCBV: + SPLITS_YCBV[name] = dict( + name=name, + dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/test"), + models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/models"), + objs=[obj], # only this obj + select_objs=select_objs, # selected objects + ann_file=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/test_targets_bop19.json"), + scale_to_meter=0.001, + with_masks=True, # (load masks but may not use it) + with_depth=True, # (load depth path here, but may not use it) + height=480, + width=640, + cache_dir=osp.join(PROJ_ROOT, ".cache"), + use_cache=True, + num_to_load=-1, + filter_invalid=False, + ref_key="ycbv", + ) + + +def register_with_name_cfg(name, data_cfg=None): + """Assume pre-defined datasets live in `./datasets`. + + Args: + name: datasnet_name, + data_cfg: if name is in existing SPLITS, use pre-defined data_cfg + otherwise requires data_cfg + data_cfg can be set in cfg.DATA_CFG.name + """ + dprint("register dataset: {}".format(name)) + if name in SPLITS_YCBV: + used_cfg = SPLITS_YCBV[name] + else: + assert data_cfg is not None, f"dataset name {name} is not registered" + used_cfg = data_cfg + DatasetCatalog.register(name, YCBV_BOP_TEST_Dataset(used_cfg)) + # something like eval_types + MetadataCatalog.get(name).set( + id="ycbv", # NOTE: for pvnet to determine module + ref_key=used_cfg["ref_key"], + objs=used_cfg["objs"], + eval_error_types=["ad", "rete", "proj"], + evaluator_type="bop", + **get_ycbv_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]), + ) + + +def get_available_datasets(): + return list(SPLITS_YCBV.keys()) + + +#### tests ############################################### +def test_vis(): + dset_name = sys.argv[1] + assert dset_name in DatasetCatalog.list() + + meta = MetadataCatalog.get(dset_name) + dprint("MetadataCatalog: ", meta) + objs = meta.objs + + t_start = time.perf_counter() + dicts = DatasetCatalog.get(dset_name) + logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start)) + + dirname = "output/{}-data-vis".format(dset_name) + os.makedirs(dirname, exist_ok=True) + for d in dicts: + img = read_image_mmcv(d["file_name"], format="BGR") + depth = mmcv.imread(d["depth_file"], "unchanged") / d["depth_factor"] + + imH, imW = img.shape[:2] + annos = d["annotations"] + masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos] + bboxes = [anno["bbox"] for anno in annos] + bbox_modes = [anno["bbox_mode"] for anno in annos] + bboxes_xyxy = np.array( + [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)] + ) + kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos] + quats = [anno["quat"] for anno in annos] + transes = [anno["trans"] for anno in annos] + Rs = [quat2mat(quat) for quat in quats] + # 0-based label + cat_ids = [anno["category_id"] for anno in annos] + K = d["cam"] + kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)] + # # TODO: visualize pose and keypoints + labels = [objs[cat_id] for cat_id in cat_ids] + # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels) + img_vis = vis_image_mask_bbox_cv2(img, masks, bboxes=bboxes_xyxy, labels=labels) + img_vis_kpts2d = img.copy() + for anno_i in range(len(annos)): + img_vis_kpts2d = misc.draw_projected_box3d(img_vis_kpts2d, kpts_2d[anno_i]) + grid_show( + [ + img[:, :, [2, 1, 0]], + img_vis[:, :, [2, 1, 0]], + img_vis_kpts2d[:, :, [2, 1, 0]], + depth, + ], + [f"img:{d['file_name']}", "vis_img", "img_vis_kpts2d", "depth"], + row=2, + col=2, + ) + + +if __name__ == "__main__": + """Test the dataset loader. + + Usage: + python -m core.datasets.ycbv_bop_test dataset_name + """ + from lib.vis_utils.image import grid_show + from lib.utils.setup_logger import setup_my_logger + import detectron2.data.datasets # noqa # add pre-defined metadata + from core.utils.data_utils import read_image_mmcv + from lib.vis_utils.image import vis_image_mask_bbox_cv2 + + print("sys.argv:", sys.argv) + logger = setup_my_logger(name="core") + register_with_name_cfg(sys.argv[1]) + print("dataset catalog: ", DatasetCatalog.list()) + test_vis() diff --git a/det/yolox/data/datasets/fruitbin_d2.py b/det/yolox/data/datasets/fruitbin_d2.py new file mode 100755 index 0000000000000000000000000000000000000000..95341811b3f03748d634bc133904a1806a18452b --- /dev/null +++ b/det/yolox/data/datasets/fruitbin_d2.py @@ -0,0 +1,739 @@ +import hashlib +import copy +import logging +import os +import os.path as osp +import sys + +cur_dir = osp.dirname(osp.abspath(__file__)) +PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../../..")) +sys.path.insert(0, PROJ_ROOT) +import time +from collections import OrderedDict +import mmcv +import numpy as np +from tqdm import tqdm +from transforms3d.quaternions import mat2quat, quat2mat +import ref +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.structures import BoxMode +from lib.pysixd import inout, misc +from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask +from lib.utils.utils import dprint, iprint, lazy_property + + +logger = logging.getLogger(__name__) +DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets")) + + +class YCBV_Dataset: + """use image_sets(scene/image_id) and image root to get data; Here we use + bop models, which are center aligned and have some offsets compared to + original models.""" + + def __init__(self, data_cfg): + """ + Set with_depth and with_masks default to True, + and decide whether to load them into dataloader/network later + with_masks: + """ + self.name = data_cfg["name"] + self.data_cfg = data_cfg + + self.objs = data_cfg["objs"] # selected objects + + self.ann_files = data_cfg["ann_files"] # provide scene/im_id list + self.image_prefixes = data_cfg["image_prefixes"] # image root + + self.dataset_root = data_cfg["dataset_root"] # BOP_DATASETS/ycbv/ + assert osp.exists(self.dataset_root), self.dataset_root + self.models_root = data_cfg["models_root"] # BOP_DATASETS/ycbv/models + self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001 + + self.with_masks = data_cfg["with_masks"] # True (load masks but may not use it) + self.with_depth = data_cfg["with_depth"] # True (load depth path here, but may not use it) + self.with_xyz = data_cfg["with_xyz"] + + self.height = data_cfg["height"] # 480 + self.width = data_cfg["width"] # 640 + + self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache")) # .cache + self.use_cache = data_cfg.get("use_cache", True) + self.num_to_load = data_cfg["num_to_load"] # -1 + self.filter_invalid = data_cfg["filter_invalid"] + + self.align_K_by_change_pose = data_cfg.get("align_K_by_change_pose", False) + # default: 0000~0059 and synt + self.cam = np.array( + [ + [1066.778, 0.0, 312.9869], + [0.0, 1067.487, 241.3109], + [0.0, 0.0, 1.0], + ], + dtype="float32", + ) + # 0060~0091 + # cmu_cam = np.array([[1077.836, 0.0, 323.7872], [0.0, 1078.189, 279.6921], [0.0, 0.0, 1.0]], dtype='float32') + ################################################## + + # NOTE: careful! Only the selected objects + self.cat_ids = [cat_id for cat_id, obj_name in ref.ycbv.id2obj.items() if obj_name in self.objs] + # map selected objs to [0, num_objs-1] + self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map + self.label2cat = {label: cat for cat, label in self.cat2label.items()} + self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs)) + ########################################################## + + def _load_from_idx_file(self, idx_file, image_root): + """ + idx_file: the scene/image ids + image_root/scene contains: + scene_gt.json + scene_gt_info.json + scene_camera.json + """ + xyz_root = osp.join(image_root, "xyz_crop") + scene_gt_dicts = {} + scene_gt_info_dicts = {} + scene_cam_dicts = {} + scene_im_ids = [] # store tuples of (scene_id, im_id) + with open(idx_file, "r") as f: + for line in f: + line_split = line.strip("\r\n").split("/") + scene_id = int(line_split[0]) + im_id = int(line_split[1]) + scene_im_ids.append((scene_id, im_id)) + if scene_id not in scene_gt_dicts: + scene_gt_file = osp.join(image_root, f"{scene_id:06d}/scene_gt.json") + assert osp.exists(scene_gt_file), scene_gt_file + scene_gt_dicts[scene_id] = mmcv.load(scene_gt_file) + + if scene_id not in scene_gt_info_dicts: + scene_gt_info_file = osp.join(image_root, f"{scene_id:06d}/scene_gt_info.json") + assert osp.exists(scene_gt_info_file), scene_gt_info_file + scene_gt_info_dicts[scene_id] = mmcv.load(scene_gt_info_file) + + if scene_id not in scene_cam_dicts: + scene_cam_file = osp.join(image_root, f"{scene_id:06d}/scene_camera.json") + assert osp.exists(scene_cam_file), scene_cam_file + scene_cam_dicts[scene_id] = mmcv.load(scene_cam_file) + ###################################################### + scene_im_ids = sorted(scene_im_ids) # sort to make it reproducible + dataset_dicts = [] + + num_instances_without_valid_segmentation = 0 + num_instances_without_valid_box = 0 + + for (scene_id, im_id) in tqdm(scene_im_ids): + rgb_path = osp.join(image_root, f"{scene_id:06d}/rgb/{im_id:06d}.png") + assert osp.exists(rgb_path), rgb_path + str_im_id = str(im_id) + + scene_im_id = f"{scene_id}/{im_id}" + + # for ycbv/tless, load cam K from image infos + cam_anno = np.array(scene_cam_dicts[scene_id][str_im_id]["cam_K"], dtype=np.float32).reshape(3, 3) + adapth_this_K = False + if self.align_K_by_change_pose: + if (cam_anno != self.cam).any(): + adapth_this_K = True + cam_anno_ori = cam_anno.copy() + cam_anno = self.cam + + depth_factor = 1000.0 / scene_cam_dicts[scene_id][str_im_id]["depth_scale"] + # dprint(record['cam']) + if "/train_synt/" in rgb_path: + img_type = "syn" + else: + img_type = "real" + record = { + "dataset_name": self.name, + "file_name": osp.relpath(rgb_path, PROJ_ROOT), + "height": self.height, + "width": self.width, + "image_id": self._unique_im_id, + "scene_im_id": scene_im_id, # for evaluation + "cam": cam_anno, # self.cam, + "depth_factor": depth_factor, + "img_type": img_type, + } + + if self.with_depth: + depth_file = osp.join(image_root, f"{scene_id:06d}/depth/{im_id:06d}.png") + assert osp.exists(depth_file), depth_file + record["depth_file"] = osp.relpath(depth_file, PROJ_ROOT) + + insts = [] + anno_dict_list = scene_gt_dicts[scene_id][str(im_id)] + info_dict_list = scene_gt_info_dicts[scene_id][str(im_id)] + for anno_i, anno in enumerate(anno_dict_list): + info = info_dict_list[anno_i] + obj_id = anno["obj_id"] + if obj_id not in self.cat_ids: + continue + # 0-based label now + cur_label = self.cat2label[obj_id] + ################ pose ########################### + R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3) + trans = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0 # mm->m + pose = np.hstack([R, trans.reshape(3, 1)]) + if adapth_this_K: + # pose_uw = inv(K_uw) @ K_cmu @ pose_cmu + pose = np.linalg.inv(cam_anno) @ cam_anno_ori @ pose + # R = pose[:3, :3] + trans = pose[:3, 3] + + quat = mat2quat(pose[:3, :3]) + + ############# bbox ############################ + bbox = info["bbox_obj"] + x1, y1, w, h = bbox + x2 = x1 + w + y2 = y1 + h + x1 = max(min(x1, self.width), 0) + y1 = max(min(y1, self.height), 0) + x2 = max(min(x2, self.width), 0) + y2 = max(min(y2, self.height), 0) + bbox = [x1, y1, x2, y2] + if self.filter_invalid: + bw = bbox[2] - bbox[0] + bh = bbox[3] - bbox[1] + if bh <= 1 or bw <= 1: + num_instances_without_valid_box += 1 + continue + + ############## mask ####################### + if self.with_masks: # either list[list[float]] or dict(RLE) + mask_visib_file = osp.join( + image_root, + f"{scene_id:06d}/mask_visib/{im_id:06d}_{anno_i:06d}.png", + ) + assert osp.exists(mask_visib_file), mask_visib_file + mask = mmcv.imread(mask_visib_file, "unchanged") + area = mask.sum() + if area < 30 and self.filter_invalid: + num_instances_without_valid_segmentation += 1 + continue + mask_rle = binary_mask_to_rle(mask) + + mask_full_file = osp.join( + image_root, + f"{scene_id:06d}/mask/{im_id:06d}_{anno_i:06d}.png", + ) + assert osp.exists(mask_full_file), mask_full_file + + # load mask full + mask_full = mmcv.imread(mask_full_file, "unchanged") + mask_full = mask_full.astype("bool") + mask_full_rle = binary_mask_to_rle(mask_full, compressed=True) + + proj = (self.cam @ trans.T).T # NOTE: use self.cam here + proj = proj[:2] / proj[2] + + inst = { + "category_id": cur_label, # 0-based label + "bbox": bbox, # TODO: load both bbox_obj and bbox_visib + "bbox_mode": BoxMode.XYXY_ABS, + "pose": pose, + "quat": quat, + "trans": trans, + "centroid_2d": proj, # absolute (cx, cy) + "segmentation": mask_rle, + "mask_full": mask_full_rle, + } + + if self.with_xyz: + xyz_path = osp.join( + xyz_root, + f"{scene_id:06d}/{im_id:06d}_{anno_i:06d}-xyz.pkl", + ) + inst["xyz_path"] = xyz_path + + model_info = self.models_info[str(obj_id)] + inst["model_info"] = model_info + # TODO: using full mask and full xyz + for key in ["bbox3d_and_center"]: + inst[key] = self.models[cur_label][key] + insts.append(inst) + if len(insts) == 0: # and self.filter_invalid: + continue + record["annotations"] = insts + dataset_dicts.append(record) + self._unique_im_id += 1 + + if num_instances_without_valid_segmentation > 0: + logger.warning( + "Filtered out {} instances without valid segmentation. " + "There might be issues in your dataset generation process.".format( + num_instances_without_valid_segmentation + ) + ) + if num_instances_without_valid_box > 0: + logger.warning( + "Filtered out {} instances without valid box. " + "There might be issues in your dataset generation process.".format(num_instances_without_valid_box) + ) + return dataset_dicts + + def __call__(self): # YCBV_Dataset + """Load light-weight instance annotations of all images into a list of + dicts in Detectron2 format. + + Do not load heavy data into memory in this file, since we will + load the annotations of all images into memory. + """ + # cache the dataset_dicts to avoid loading masks from files + hashed_file_name = hashlib.md5( + ( + "".join([str(fn) for fn in self.objs]) + + "dataset_dicts_{}_{}_{}_{}_{}_{}".format( + self.name, + self.dataset_root, + self.with_masks, + self.with_depth, + self.with_xyz, + __name__, + ) + ).encode("utf-8") + ).hexdigest() + cache_path = osp.join( + self.cache_dir, + "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name), + ) + + if osp.exists(cache_path) and self.use_cache: + logger.info("load cached dataset dicts from {}".format(cache_path)) + return mmcv.load(cache_path) + + logger.info("loading dataset dicts: {}".format(self.name)) + t_start = time.perf_counter() + dataset_dicts = [] + self._unique_im_id = 0 + for ann_file, image_root in zip(self.ann_files, self.image_prefixes): + # logger.info("loading coco json: {}".format(ann_file)) + dataset_dicts.extend(self._load_from_idx_file(ann_file, image_root)) + + ########################################################################## + if self.num_to_load > 0: + self.num_to_load = min(int(self.num_to_load), len(dataset_dicts)) + dataset_dicts = dataset_dicts[: self.num_to_load] + logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start)) + + mmcv.mkdir_or_exist(osp.dirname(cache_path)) + mmcv.dump(dataset_dicts, cache_path, protocol=4) + logger.info("Dumped dataset_dicts to {}".format(cache_path)) + return dataset_dicts + + @lazy_property + def models_info(self): + models_info_path = osp.join(self.models_root, "models_info.json") + assert osp.exists(models_info_path), models_info_path + models_info = mmcv.load(models_info_path) # key is str(obj_id) + return models_info + + @lazy_property + def models(self): + """Load models into a list.""" + cache_path = osp.join(self.models_root, "models_{}.pkl".format(self.name)) + if osp.exists(cache_path) and self.use_cache: + # dprint("{}: load cached object models from {}".format(self.name, cache_path)) + return mmcv.load(cache_path) + + models = [] + for obj_name in self.objs: + model = inout.load_ply( + osp.join( + self.models_root, + f"obj_{ref.ycbv.obj2id[obj_name]:06d}.ply", + ), + vertex_scale=self.scale_to_meter, + ) + # NOTE: the bbox3d_and_center is not obtained from centered vertices + # for BOP models, not a big problem since they had been centered + model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"]) + + models.append(model) + logger.info("cache models to {}".format(cache_path)) + mmcv.dump(models, cache_path, protocol=4) + return models + + def image_aspect_ratio(self): + return self.width / self.height # 4/3 + + +########### register datasets ############################################################ + + +def get_ycbv_metadata(obj_names, ref_key): + """task specific metadata.""" + data_ref = ref.__dict__[ref_key] + + cur_sym_infos = {} # label based key + loaded_models_info = data_ref.get_models_info() + + for i, obj_name in enumerate(obj_names): + obj_id = data_ref.obj2id[obj_name] + model_info = loaded_models_info[str(obj_id)] + if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info: + sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01) + sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32) + else: + sym_info = None + cur_sym_infos[i] = sym_info + + meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos} + return meta + + +ycbv_model_root = "BOP_DATASETS/ycbv/models/" +################################################################################ +default_cfg = dict( + # name="ycbv_train_real", + dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/"), + models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/models"), # models_simple + objs=ref.ycbv.objects, # all objects + # NOTE: this contains all classes + # ann_files=[osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/image_sets/train.txt")], + # image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_real")], + scale_to_meter=0.001, + with_masks=True, # (load masks but may not use it) + with_depth=True, # (load depth path here, but may not use it) + with_xyz=True, + height=480, + width=640, + align_K_by_change_pose=False, + cache_dir=osp.join(PROJ_ROOT, ".cache"), + use_cache=True, + num_to_load=-1, + filter_invalid=True, + ref_key="ycbv", +) +SPLITS_YCBV = {} +update_cfgs = { + "ycbv_train_real": { + "ann_files": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/image_sets/train.txt")], + "image_prefixes": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_real")], + }, + "ycbv_train_real_aligned_Kuw": { + "ann_files": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/image_sets/train.txt")], + "image_prefixes": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_real")], + "align_K_by_change_pose": True, + }, + "ycbv_train_real_uw": { + "ann_files": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/image_sets/train_real_uw.txt")], + "image_prefixes": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_real")], + }, + "ycbv_train_real_uw_every10": { + "ann_files": [ + osp.join( + DATASETS_ROOT, + "BOP_DATASETS/ycbv/image_sets/train_real_uw_every10.txt", + ) + ], + "image_prefixes": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_real")], + }, + "ycbv_train_real_cmu": { + "ann_files": [ + osp.join( + DATASETS_ROOT, + "BOP_DATASETS/ycbv/image_sets/train_real_cmu.txt", + ) + ], + "image_prefixes": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_real")], + }, + "ycbv_train_real_cmu_aligned_Kuw": { + "ann_files": [ + osp.join( + DATASETS_ROOT, + "BOP_DATASETS/ycbv/image_sets/train_real_cmu.txt", + ) + ], + "image_prefixes": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_real")], + "align_K_by_change_pose": True, + }, + "ycbv_train_synt": { + "ann_files": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/image_sets/train_synt.txt")], + "image_prefixes": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_synt")], + }, + "ycbv_train_synt_50k": { + "ann_files": [ + osp.join( + DATASETS_ROOT, + "BOP_DATASETS/ycbv/image_sets/train_synt_50k.txt", + ) + ], + "image_prefixes": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_synt")], + }, + "ycbv_train_synt_30k": { + "ann_files": [ + osp.join( + DATASETS_ROOT, + "BOP_DATASETS/ycbv/image_sets/train_synt_30k.txt", + ) + ], + "image_prefixes": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_synt")], + }, + "ycbv_train_synt_100": { + "ann_files": [ + osp.join( + DATASETS_ROOT, + "BOP_DATASETS/ycbv/image_sets/train_synt_100.txt", + ) + ], + "image_prefixes": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_synt")], + }, + "ycbv_test": { + "ann_files": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/image_sets/keyframe.txt")], + "image_prefixes": [osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/test")], + "with_xyz": False, + "filter_invalid": False, + }, +} +for name, update_cfg in update_cfgs.items(): + used_cfg = copy.deepcopy(default_cfg) + used_cfg["name"] = name + used_cfg.update(update_cfg) + num_to_load = -1 + if "_100" in name: + num_to_load = 100 + used_cfg["num_to_load"] = num_to_load + SPLITS_YCBV[name] = used_cfg + +# single object splits ###################################################### +for obj in ref.ycbv.objects: + for split in [ + "train_real", + "train_real_aligned_Kuw", + "train_real_uw", + "train_real_uw_every10", + "train_real_cmu", + "train_real_cmu_aligned_Kuw", + "train_synt", + "train_synt_30k", + "test", + ]: + name = "ycbv_{}_{}".format(obj, split) + if split in [ + "train_real", + "train_real_aligned_Kuw", + "train_real_uw", + "train_real_uw_every10", + "train_real_cmu", + "train_real_cmu_aligned_Kuw", + "train_synt", + "train_synt_30k", + ]: + filter_invalid = True + with_xyz = True + elif split in ["test"]: + filter_invalid = False + with_xyz = False + else: + raise ValueError("{}".format(split)) + + if split in ["train_real_aligned_Kuw", "train_real_cmu_aligned_Kuw"]: + align_K_by_change_pose = True + else: + align_K_by_change_pose = False + + split_idx_file_dict = { + "train_real": ("train_real", "train.txt"), + "train_real_aligned_Kuw": ("train_real", "train.txt"), + "train_real_uw": ("train_real", "train_real_uw.txt"), + "train_real_uw_every10": ( + "train_real", + "train_real_uw_every10.txt", + ), + "train_real_cmu": ("train_real", "train_real_cmu.txt"), + "train_real_cmu_aligned_Kuw": ("train_real", "train_real_cmu.txt"), + "train_synt": ("train_synt", "train_synt.txt"), + "train_synt_30k": ("train_synt", "train_synt_30k.txt"), + "test": ("test", "keyframe.txt"), + } + root_name, idx_file = split_idx_file_dict[split] + + if name not in SPLITS_YCBV: + SPLITS_YCBV[name] = dict( + name=name, + dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/"), + models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/models"), + objs=[obj], + ann_files=[ + osp.join( + DATASETS_ROOT, + "BOP_DATASETS/ycbv/image_sets/{}".format(idx_file), + ) + ], + image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/{}".format(root_name))], + scale_to_meter=0.001, + with_masks=True, # (load masks but may not use it) + with_depth=True, # (load depth path here, but may not use it) + with_xyz=with_xyz, + height=480, + width=640, + align_K_by_change_pose=align_K_by_change_pose, + cache_dir=osp.join(PROJ_ROOT, ".cache"), + use_cache=True, + num_to_load=-1, + filter_invalid=filter_invalid, + ref_key="ycbv", + ) + + +def register_with_name_cfg(name, data_cfg=None): + """Assume pre-defined datasets live in `./datasets`. + + Args: + name: datasnet_name, + data_cfg: if name is in existing SPLITS, use pre-defined data_cfg + otherwise requires data_cfg + data_cfg can be set in cfg.DATA_CFG.name + """ + dprint("register dataset: {}".format(name)) + if name in SPLITS_YCBV: + used_cfg = SPLITS_YCBV[name] + else: + assert ( + data_cfg is not None + ), f"dataset name {name} is not registered. available datasets: {list(SPLITS_YCBV.keys())}" + used_cfg = data_cfg + DatasetCatalog.register(name, YCBV_Dataset(used_cfg)) + # something like eval_types + MetadataCatalog.get(name).set( + id="ycbv", # NOTE: for pvnet to determine module + ref_key=used_cfg["ref_key"], + objs=used_cfg["objs"], + eval_error_types=["ad", "rete", "proj"], + evaluator_type="bop", + **get_ycbv_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]), + ) + + +def get_available_datasets(): + return list(SPLITS_YCBV.keys()) + + +#### tests ############################################### +def test_vis(): + # python -m core.datasets.ycbv_d2 ycbv_test + dataset_name = sys.argv[1] + meta = MetadataCatalog.get(dataset_name) + t_start = time.perf_counter() + dicts = DatasetCatalog.get(dataset_name) + with_xyz = False if "test" in dataset_name else True + logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start)) + + dirname = "output/ycbv_test-data-vis" + os.makedirs(dirname, exist_ok=True) + objs = meta.objs + for d in dicts: + img = read_image_mmcv(d["file_name"], format="BGR") + depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0 + + imH, imW = img.shape[:2] + annos = d["annotations"] + masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos] + bboxes = [anno["bbox"] for anno in annos] + bbox_modes = [anno["bbox_mode"] for anno in annos] + bboxes_xyxy = np.array( + [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)] + ) + kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos] + quats = [anno["quat"] for anno in annos] + transes = [anno["trans"] for anno in annos] + Rs = [quat2mat(quat) for quat in quats] + # 0-based label + cat_ids = [anno["category_id"] for anno in annos] + K = d["cam"] + kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)] + # # TODO: visualize pose and keypoints + labels = [objs[cat_id] for cat_id in cat_ids] + for _i in range(len(annos)): + img_vis = vis_image_mask_bbox_cv2( + img, + masks[_i : _i + 1], + bboxes=bboxes_xyxy[_i : _i + 1], + labels=labels[_i : _i + 1], + ) + img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i]) + if with_xyz: + xyz_path = annos[_i]["xyz_path"] + xyz_info = mmcv.load(xyz_path) + x1, y1, x2, y2 = xyz_info["xyxy"] + xyz_crop = xyz_info["xyz_crop"].astype(np.float32) + xyz = np.zeros((imH, imW, 3), dtype=np.float32) + xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop + xyz_show = get_emb_show(xyz) + xyz_crop_show = get_emb_show(xyz_crop) + img_xyz = img.copy() / 255.0 + mask_xyz = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) | (xyz[:, :, 2] != 0)).astype("uint8") + fg_idx = np.where(mask_xyz != 0) + img_xyz[fg_idx[0], fg_idx[1], :] = ( + 0.5 * xyz_show[fg_idx[0], fg_idx[1], :3] + 0.5 * img_xyz[fg_idx[0], fg_idx[1], :] + ) + img_xyz_crop = img_xyz[y1 : y2 + 1, x1 : x2 + 1, :] + img_vis_crop = img_vis[y1 : y2 + 1, x1 : x2 + 1, :] + # diff mask + diff_mask_xyz = np.abs(masks[_i] - mask_xyz)[y1 : y2 + 1, x1 : x2 + 1] + + grid_show( + [ + img[:, :, [2, 1, 0]], + img_vis[:, :, [2, 1, 0]], + img_vis_kpts2d[:, :, [2, 1, 0]], + depth, + # xyz_show, + diff_mask_xyz, + xyz_crop_show, + img_xyz[:, :, [2, 1, 0]], + img_xyz_crop[:, :, [2, 1, 0]], + img_vis_crop[:, :, ::-1], + ], + [ + "img", + "vis_img", + "img_vis_kpts2d", + "depth", + "diff_mask_xyz", + "xyz_crop_show", + "img_xyz", + "img_xyz_crop", + "img_vis_crop", + ], + row=3, + col=3, + ) + else: + grid_show( + [ + img[:, :, [2, 1, 0]], + img_vis[:, :, [2, 1, 0]], + img_vis_kpts2d[:, :, [2, 1, 0]], + depth, + ], + ["img", "vis_img", "img_vis_kpts2d", "depth"], + row=2, + col=2, + ) + + +if __name__ == "__main__": + """Test the dataset loader. + + Usage: + python -m this_module dataset_name + "dataset_name" can be any pre-registered ones + """ + from lib.vis_utils.image import grid_show + from lib.utils.setup_logger import setup_my_logger + + import detectron2.data.datasets # noqa # add pre-defined metadata + from lib.vis_utils.image import vis_image_mask_bbox_cv2 + from core.utils.utils import get_emb_show + from core.utils.data_utils import read_image_mmcv + + print("sys.argv:", sys.argv) + logger = setup_my_logger(name="core") + register_with_name_cfg(sys.argv[1]) + print("dataset catalog: ", DatasetCatalog.list()) + test_vis() diff --git a/det/yolox/data/datasets/fruitbin_pbr.py b/det/yolox/data/datasets/fruitbin_pbr.py new file mode 100644 index 0000000000000000000000000000000000000000..9295d4c9ac48572daee96041ec38e883f9f19cbc --- /dev/null +++ b/det/yolox/data/datasets/fruitbin_pbr.py @@ -0,0 +1,492 @@ +import hashlib +import logging +import os +import os.path as osp +import sys + +cur_dir = osp.dirname(osp.abspath(__file__)) +PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../../..")) +sys.path.insert(0, PROJ_ROOT) +import time +from collections import OrderedDict +import mmcv +import numpy as np +from tqdm import tqdm +from transforms3d.quaternions import mat2quat, quat2mat +import ref +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.structures import BoxMode +from lib.pysixd import inout, misc +from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask +from lib.utils.utils import dprint, iprint, lazy_property + + +logger = logging.getLogger(__name__) +DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets")) + + +class YCBV_PBR_Dataset: + def __init__(self, data_cfg): + """ + Set with_depth and with_masks default to True, + and decide whether to load them into dataloader/network later + with_masks: + """ + self.name = data_cfg["name"] + self.data_cfg = data_cfg + + self.objs = data_cfg["objs"] # selected objects + + self.dataset_root = data_cfg.get( + "dataset_root", + osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_pbr"), + ) + self.xyz_root = data_cfg.get("xyz_root", osp.join(self.dataset_root, "xyz_crop")) + assert osp.exists(self.dataset_root), self.dataset_root + self.models_root = data_cfg["models_root"] # BOP_DATASETS/ycbv/models + self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001 + + self.with_masks = data_cfg["with_masks"] + self.with_depth = data_cfg["with_depth"] + + self.height = data_cfg["height"] # 480 + self.width = data_cfg["width"] # 640 + + self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache")) # .cache + self.use_cache = data_cfg.get("use_cache", True) + self.num_to_load = data_cfg["num_to_load"] # -1 + self.filter_invalid = data_cfg.get("filter_invalid", True) + ################################################## + + # NOTE: careful! Only the selected objects + self.cat_ids = [cat_id for cat_id, obj_name in ref.ycbv.id2obj.items() if obj_name in self.objs] + # map selected objs to [0, num_objs-1] + self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map + self.label2cat = {label: cat for cat, label in self.cat2label.items()} + self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs)) + ########################################################## + + self.scenes = [f"{i:06d}" for i in range(50)] + + def __call__(self): + """Load light-weight instance annotations of all images into a list of + dicts in Detectron2 format. + + Do not load heavy data into memory in this file, since we will + load the annotations of all images into memory. + """ + # cache the dataset_dicts to avoid loading masks from files + hashed_file_name = hashlib.md5( + ( + "".join([str(fn) for fn in self.objs]) + + "dataset_dicts_{}_{}_{}_{}_{}".format( + self.name, + self.dataset_root, + self.with_masks, + self.with_depth, + __name__, + ) + ).encode("utf-8") + ).hexdigest() + cache_path = osp.join( + self.cache_dir, + "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name), + ) + + if osp.exists(cache_path) and self.use_cache: + logger.info("load cached dataset dicts from {}".format(cache_path)) + return mmcv.load(cache_path) + + t_start = time.perf_counter() + + logger.info("loading dataset dicts: {}".format(self.name)) + self.num_instances_without_valid_segmentation = 0 + self.num_instances_without_valid_box = 0 + dataset_dicts = [] # ###################################################### + # it is slow because of loading and converting masks to rle + for scene in tqdm(self.scenes): + scene_id = int(scene) + scene_root = osp.join(self.dataset_root, scene) + + gt_dict = mmcv.load(osp.join(scene_root, "scene_gt.json")) + gt_info_dict = mmcv.load(osp.join(scene_root, "scene_gt_info.json")) + cam_dict = mmcv.load(osp.join(scene_root, "scene_camera.json")) + + for str_im_id in tqdm(gt_dict, postfix=f"{scene_id}"): + int_im_id = int(str_im_id) + rgb_path = osp.join(scene_root, "rgb/{:06d}.jpg").format(int_im_id) + assert osp.exists(rgb_path), rgb_path + + depth_path = osp.join(scene_root, "depth/{:06d}.png".format(int_im_id)) + + scene_im_id = f"{scene_id}/{int_im_id}" + + K = np.array(cam_dict[str_im_id]["cam_K"], dtype=np.float32).reshape(3, 3) + depth_factor = 1000.0 / cam_dict[str_im_id]["depth_scale"] # 10000 + + record = { + "dataset_name": self.name, + "file_name": osp.relpath(rgb_path, PROJ_ROOT), + "depth_file": osp.relpath(depth_path, PROJ_ROOT), + "height": self.height, + "width": self.width, + "image_id": int_im_id, + "scene_im_id": scene_im_id, # for evaluation + "cam": K, + "depth_factor": depth_factor, + "img_type": "syn_pbr", # NOTE: has background + } + insts = [] + for anno_i, anno in enumerate(gt_dict[str_im_id]): + obj_id = anno["obj_id"] + if obj_id not in self.cat_ids: + continue + cur_label = self.cat2label[obj_id] # 0-based label + R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3) + t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0 + pose = np.hstack([R, t.reshape(3, 1)]) + quat = mat2quat(R).astype("float32") + + proj = (record["cam"] @ t.T).T + proj = proj[:2] / proj[2] + + bbox_visib = gt_info_dict[str_im_id][anno_i]["bbox_visib"] + bbox_obj = gt_info_dict[str_im_id][anno_i]["bbox_obj"] + x1, y1, w, h = bbox_visib + if self.filter_invalid: + if h <= 1 or w <= 1: + self.num_instances_without_valid_box += 1 + continue + + mask_file = osp.join( + scene_root, + "mask/{:06d}_{:06d}.png".format(int_im_id, anno_i), + ) + mask_visib_file = osp.join( + scene_root, + "mask_visib/{:06d}_{:06d}.png".format(int_im_id, anno_i), + ) + assert osp.exists(mask_file), mask_file + assert osp.exists(mask_visib_file), mask_visib_file + # load mask visib TODO: load both mask_visib and mask_full + mask_single = mmcv.imread(mask_visib_file, "unchanged") + area = mask_single.sum() + if area <= 64: # filter out too small or nearly invisible instances + self.num_instances_without_valid_segmentation += 1 + continue + mask_rle = binary_mask_to_rle(mask_single, compressed=True) + + # load mask full + mask_full = mmcv.imread(mask_file, "unchanged") + mask_full = mask_full.astype("bool") + mask_full_rle = binary_mask_to_rle(mask_full, compressed=True) + + visib_fract = gt_info_dict[str_im_id][anno_i].get("visib_fract", 1.0) + + xyz_path = osp.join( + self.xyz_root, + f"{scene_id:06d}/{int_im_id:06d}_{anno_i:06d}-xyz.pkl", + ) + inst = { + "category_id": cur_label, # 0-based label + "bbox": bbox_obj, # TODO: load both bbox_obj and bbox_visib + "bbox_mode": BoxMode.XYWH_ABS, + "pose": pose, + "quat": quat, + "trans": t, + "centroid_2d": proj, # absolute (cx, cy) + "segmentation": mask_rle, + "mask_full": mask_full_rle, # TODO: load as mask_full, rle + "visib_fract": visib_fract, + "xyz_path": xyz_path, + } + + model_info = self.models_info[str(obj_id)] + inst["model_info"] = model_info + # TODO: using full mask and full xyz + for key in ["bbox3d_and_center"]: + inst[key] = self.models[cur_label][key] + insts.append(inst) + if len(insts) == 0: # filter im without anno + continue + record["annotations"] = insts + dataset_dicts.append(record) + + if self.num_instances_without_valid_segmentation > 0: + logger.warning( + "Filtered out {} instances without valid segmentation. " + "There might be issues in your dataset generation process.".format( + self.num_instances_without_valid_segmentation + ) + ) + if self.num_instances_without_valid_box > 0: + logger.warning( + "Filtered out {} instances without valid box. " + "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box) + ) + ########################################################################## + if self.num_to_load > 0: + self.num_to_load = min(int(self.num_to_load), len(dataset_dicts)) + dataset_dicts = dataset_dicts[: self.num_to_load] + logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start)) + + mmcv.mkdir_or_exist(osp.dirname(cache_path)) + mmcv.dump(dataset_dicts, cache_path, protocol=4) + logger.info("Dumped dataset_dicts to {}".format(cache_path)) + return dataset_dicts + + @lazy_property + def models_info(self): + models_info_path = osp.join(self.models_root, "models_info.json") + assert osp.exists(models_info_path), models_info_path + models_info = mmcv.load(models_info_path) # key is str(obj_id) + return models_info + + @lazy_property + def models(self): + """Load models into a list.""" + cache_path = osp.join(self.models_root, "models_{}.pkl".format(self.name)) + if osp.exists(cache_path) and self.use_cache: + # dprint("{}: load cached object models from {}".format(self.name, cache_path)) + return mmcv.load(cache_path) + + models = [] + for obj_name in self.objs: + model = inout.load_ply( + osp.join( + self.models_root, + f"obj_{ref.ycbv.obj2id[obj_name]:06d}.ply", + ), + vertex_scale=self.scale_to_meter, + ) + # NOTE: the bbox3d_and_center is not obtained from centered vertices + # for BOP models, not a big problem since they had been centered + model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"]) + + models.append(model) + logger.info("cache models to {}".format(cache_path)) + mmcv.dump(models, cache_path, protocol=4) + return models + + def image_aspect_ratio(self): + return self.width / self.height # 4/3 + + +########### register datasets ############################################################ + + +def get_ycbv_metadata(obj_names, ref_key): + """task specific metadata.""" + data_ref = ref.__dict__[ref_key] + + cur_sym_infos = {} # label based key + loaded_models_info = data_ref.get_models_info() + + for i, obj_name in enumerate(obj_names): + obj_id = data_ref.obj2id[obj_name] + model_info = loaded_models_info[str(obj_id)] + if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info: + sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01) + sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32) + else: + sym_info = None + cur_sym_infos[i] = sym_info + + meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos} + return meta + + +ycbv_model_root = "BOP_DATASETS/ycbv/models/" +################################################################################ + + +SPLITS_YCBV_PBR = dict( + ycbv_train_pbr=dict( + name="ycbv_train_pbr", + objs=ref.ycbv.objects, # selected objects + dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_pbr"), + models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/models"), + xyz_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_pbr/xyz_crop"), + scale_to_meter=0.001, + with_masks=True, # (load masks but may not use it) + with_depth=True, # (load depth path here, but may not use it) + height=480, + width=640, + use_cache=True, + num_to_load=-1, + filter_invalid=True, + ref_key="ycbv", + ) +) + +# single obj splits +for obj in ref.ycbv.objects: + for split in ["train_pbr"]: + name = "ycbv_{}_{}".format(obj, split) + if split in ["train_pbr"]: + filter_invalid = True + elif split in ["test"]: + filter_invalid = False + else: + raise ValueError("{}".format(split)) + if name not in SPLITS_YCBV_PBR: + SPLITS_YCBV_PBR[name] = dict( + name=name, + objs=[obj], # only this obj + dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_pbr"), + models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/models"), + xyz_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/ycbv/train_pbr/xyz_crop"), + scale_to_meter=0.001, + with_masks=True, # (load masks but may not use it) + with_depth=True, # (load depth path here, but may not use it) + height=480, + width=640, + use_cache=True, + num_to_load=-1, + filter_invalid=filter_invalid, + ref_key="ycbv", + ) + + +def register_with_name_cfg(name, data_cfg=None): + """Assume pre-defined datasets live in `./datasets`. + + Args: + name: datasnet_name, + data_cfg: if name is in existing SPLITS, use pre-defined data_cfg + otherwise requires data_cfg + data_cfg can be set in cfg.DATA_CFG.name + """ + dprint("register dataset: {}".format(name)) + if name in SPLITS_YCBV_PBR: + used_cfg = SPLITS_YCBV_PBR[name] + else: + assert data_cfg is not None, f"dataset name {name} is not registered" + used_cfg = data_cfg + DatasetCatalog.register(name, YCBV_PBR_Dataset(used_cfg)) + # something like eval_types + MetadataCatalog.get(name).set( + id="ycbv", # NOTE: for pvnet to determine module + ref_key=used_cfg["ref_key"], + objs=used_cfg["objs"], + eval_error_types=["ad", "rete", "proj"], + evaluator_type="bop", + **get_ycbv_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]), + ) + + +def get_available_datasets(): + return list(SPLITS_YCBV_PBR.keys()) + + +#### tests ############################################### +def test_vis(): + dset_name = sys.argv[1] + assert dset_name in DatasetCatalog.list() + + meta = MetadataCatalog.get(dset_name) + dprint("MetadataCatalog: ", meta) + objs = meta.objs + + t_start = time.perf_counter() + dicts = DatasetCatalog.get(dset_name) + logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start)) + + dirname = "output/{}-data-vis".format(dset_name) + os.makedirs(dirname, exist_ok=True) + for d in dicts: + img = read_image_mmcv(d["file_name"], format="BGR") + depth = mmcv.imread(d["depth_file"], "unchanged") / 10000.0 + + imH, imW = img.shape[:2] + annos = d["annotations"] + masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos] + bboxes = [anno["bbox"] for anno in annos] + bbox_modes = [anno["bbox_mode"] for anno in annos] + bboxes_xyxy = np.array( + [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)] + ) + kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos] + quats = [anno["quat"] for anno in annos] + transes = [anno["trans"] for anno in annos] + Rs = [quat2mat(quat) for quat in quats] + # 0-based label + cat_ids = [anno["category_id"] for anno in annos] + K = d["cam"] + kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)] + + labels = [objs[cat_id] for cat_id in cat_ids] + for _i in range(len(annos)): + img_vis = vis_image_mask_bbox_cv2( + img, + masks[_i : _i + 1], + bboxes=bboxes_xyxy[_i : _i + 1], + labels=labels[_i : _i + 1], + ) + img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i]) + xyz_path = annos[_i]["xyz_path"] + xyz_info = mmcv.load(xyz_path) + x1, y1, x2, y2 = xyz_info["xyxy"] + xyz_crop = xyz_info["xyz_crop"].astype(np.float32) + xyz = np.zeros((imH, imW, 3), dtype=np.float32) + xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop + xyz_show = get_emb_show(xyz) + xyz_crop_show = get_emb_show(xyz_crop) + img_xyz = img.copy() / 255.0 + mask_xyz = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) | (xyz[:, :, 2] != 0)).astype("uint8") + fg_idx = np.where(mask_xyz != 0) + img_xyz[fg_idx[0], fg_idx[1], :] = xyz_show[fg_idx[0], fg_idx[1], :3] + img_xyz_crop = img_xyz[y1 : y2 + 1, x1 : x2 + 1, :] + img_vis_crop = img_vis[y1 : y2 + 1, x1 : x2 + 1, :] + # diff mask + diff_mask_xyz = np.abs(masks[_i] - mask_xyz)[y1 : y2 + 1, x1 : x2 + 1] + + grid_show( + [ + img[:, :, [2, 1, 0]], + img_vis[:, :, [2, 1, 0]], + img_vis_kpts2d[:, :, [2, 1, 0]], + depth, + # xyz_show, + diff_mask_xyz, + xyz_crop_show, + img_xyz[:, :, [2, 1, 0]], + img_xyz_crop[:, :, [2, 1, 0]], + img_vis_crop, + ], + [ + "img", + "vis_img", + "img_vis_kpts2d", + "depth", + "diff_mask_xyz", + "xyz_crop_show", + "img_xyz", + "img_xyz_crop", + "img_vis_crop", + ], + row=3, + col=3, + ) + + +if __name__ == "__main__": + """Test the dataset loader. + + Usage: + python -m this_module ycbv_pbr_train + """ + from lib.vis_utils.image import grid_show + from lib.utils.setup_logger import setup_my_logger + + import detectron2.data.datasets # noqa # add pre-defined metadata + from lib.vis_utils.image import vis_image_mask_bbox_cv2 + from core.utils.utils import get_emb_show + from core.utils.data_utils import read_image_mmcv + + print("sys.argv:", sys.argv) + logger = setup_my_logger(name="core") + register_with_name_cfg(sys.argv[1]) + print("dataset catalog: ", DatasetCatalog.list()) + + test_vis() diff --git a/ref/fruitbin.py b/ref/fruitbin.py new file mode 100644 index 0000000000000000000000000000000000000000..83502f9b283287b79e81b55fb4e2017d813a01bc --- /dev/null +++ b/ref/fruitbin.py @@ -0,0 +1,119 @@ +# encoding: utf-8 +"""This file includes necessary params, info.""" +import os +import mmcv +import os.path as osp + +import numpy as np + +# ---------------------------------------------------------------- # +# ROOT PATH INFO +# ---------------------------------------------------------------- # +cur_dir = osp.abspath(osp.dirname(__file__)) +root_dir = osp.normpath(osp.join(cur_dir, "..")) +# directory storing experiment data (result, model checkpoints, etc). +output_dir = osp.join(root_dir, "output") + +data_root = osp.join(root_dir, "datasets") +bop_root = osp.join(data_root, "BOP_DATASETS/") + +# ---------------------------------------------------------------- # +# YCBV DATASET +# ---------------------------------------------------------------- # +dataset_root = osp.join(bop_root, "ycbv") + +train_real_dir = osp.join(dataset_root, "train_real") +train_render_dir = osp.join(dataset_root, "train_synt") +train_pbr_dir = osp.join(dataset_root, "train_pbr") + +test_dir = osp.join(dataset_root, "test") + +test_scenes = [i for i in range(48, 59 + 1)] +train_real_scenes = [i for i in range(0, 91 + 1) if i not in test_scenes] +train_synt_scenes = [i for i in range(0, 79 + 1)] +train_pbr_scenes = [i for i in range(0, 49 + 1)] + +model_dir = osp.join(dataset_root, "models") +fine_model_dir = osp.join(dataset_root, "models_fine") +model_eval_dir = osp.join(dataset_root, "models_eval") +model_scaled_simple_dir = osp.join(dataset_root, "models_rescaled") # m, .obj +vertex_scale = 0.001 + +# object info +id2obj = { + 1: "002_master_chef_can", # [1.3360, -0.5000, 3.5105] + 2: "003_cracker_box", # [0.5575, 1.7005, 4.8050] + 3: "004_sugar_box", # [-0.9520, 1.4670, 4.3645] + 4: "005_tomato_soup_can", # [-0.0240, -1.5270, 8.4035] + 5: "006_mustard_bottle", # [1.2995, 2.4870, -11.8290] + 6: "007_tuna_fish_can", # [-0.1565, 0.1150, 4.2625] + 7: "008_pudding_box", # [1.1645, -4.2015, 3.1190] + 8: "009_gelatin_box", # [1.4460, -0.5915, 3.6085] + 9: "010_potted_meat_can", # [2.4195, 0.3075, 8.0715] + 10: "011_banana", # [-18.6730, 12.1915, -1.4635] + 11: "019_pitcher_base", # [5.3370, 5.8855, 25.6115] + 12: "021_bleach_cleanser", # [4.9290, -2.4800, -13.2920] + 13: "024_bowl", # [-0.2270, 0.7950, -2.9675] + 14: "025_mug", # [-8.4675, -0.6995, -1.6145] + 15: "035_power_drill", # [9.0710, 20.9360, -2.1190] + 16: "036_wood_block", # [1.4265, -2.5305, 17.1890] + 17: "037_scissors", # [7.0535, -28.1320, 0.0420] + 18: "040_large_marker", # [0.0460, -2.1040, 0.3500] + 19: "051_large_clamp", # [10.5180, -1.9640, -0.4745] + 20: "052_extra_large_clamp", # [-0.3950, -10.4130, 0.1620] + 21: "061_foam_brick", # [-0.0805, 0.0805, -8.2435] +} +objects = list(id2obj.values()) + +obj_num = len(id2obj) +obj2id = {_name: _id for _id, _name in id2obj.items()} + +model_paths = [osp.join(model_dir, "obj_{:06d}.ply").format(_id) for _id in id2obj] # TODO: check this +texture_paths = [osp.join(model_dir, "obj_{:06d}.png".format(_id)) for _id in id2obj] +model_colors = [((i + 1) * 10, (i + 1) * 10, (i + 1) * 10) for i in range(obj_num)] # for renderer + +# yapf: disable +diameters = np.array([172.063, 269.573, 198.377, 120.543, 196.463, + 89.797, 142.543, 114.053, 129.540, 197.796, + 259.534, 259.566, 161.922, 124.990, 226.170, + 237.299, 203.973, 121.365, 174.746, 217.094, + 102.903]) / 1000.0 +# yapf: enable +# Camera info +width = 640 +height = 480 +zNear = 0.25 +zFar = 6.0 +center = (height / 2, width / 2) +# default: 0000~0059 and synt +camera_matrix = uw_camera_matrix = np.array([[1066.778, 0.0, 312.9869], [0.0, 1067.487, 241.3109], [0.0, 0.0, 1.0]]) +# 0060~0091 +cmu_camera_matrix = np.array([[1077.836, 0.0, 323.7872], [0.0, 1078.189, 279.6921], [0.0, 0.0, 1.0]]) + +depth_factor = 10000.0 + + +def get_models_info(): + """key is str(obj_id)""" + models_info_path = osp.join(model_dir, "models_info.json") + assert osp.exists(models_info_path), models_info_path + models_info = mmcv.load(models_info_path) # key is str(obj_id) + return models_info + + +def get_fps_points(): + """key is str(obj_id) generated by + core/gdrn_modeling/tools/ycbv/ycbv_1_compute_fps.py.""" + fps_points_path = osp.join(model_dir, "fps_points.pkl") + assert osp.exists(fps_points_path), fps_points_path + fps_dict = mmcv.load(fps_points_path) + return fps_dict + + +def get_keypoints_3d(): + """key is str(obj_id) generated by + core/roi_pvnet/tools/ycbv/ycbv_1_compute_keypoints_3d.py.""" + keypoints_3d_path = osp.join(model_dir, "keypoints_3d.pkl") + assert osp.exists(keypoints_3d_path), keypoints_3d_path + kpts_dict = mmcv.load(keypoints_3d_path) + return kpts_dict