Skip to content
Snippets Groups Projects
Commit 25ceb527 authored by liuxingyu's avatar liuxingyu
Browse files

remove useless files

parent bae23a6b
No related branches found
No related tags found
No related merge requests found
......@@ -8,12 +8,8 @@ import detectron2.utils.comm as comm
import ref
from detectron2.data import DatasetCatalog, MetadataCatalog
from core.gdrn_modeling.datasets import (
lm_syn_imgn,
lm_dataset_d2,
lm_syn_egl,
lm_pbr,
lm_blender,
lm_dataset_crop_d2,
ycbv_pbr,
ycbv_d2,
ycbv_bop_test,
......@@ -22,7 +18,6 @@ from core.gdrn_modeling.datasets import (
hb_bop_test,
hb_bench_driller_phone_d2,
duck_frames,
lm_new_duck_pbr,
tudl_pbr,
tudl_d2,
tudl_bop_test,
......
import hashlib
import logging
import os
import os.path as osp
import sys
cur_dir = osp.dirname(osp.abspath(__file__))
PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../.."))
sys.path.insert(0, PROJ_ROOT)
import time
from collections import OrderedDict
import mmcv
import numpy as np
from tqdm import tqdm
from transforms3d.quaternions import mat2quat, quat2mat
import random
import ref
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode
from lib.pysixd import inout, misc
from lib.utils.mask_utils import (
binary_mask_to_rle,
cocosegm2mask,
mask2bbox_xywh,
)
from lib.utils.utils import dprint, iprint, lazy_property
logger = logging.getLogger(__name__)
DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
class LM_BLENDER_Dataset(object):
"""lm blender data, from pvnet-rendering."""
def __init__(self, data_cfg):
"""
Set with_depth and with_masks default to True,
and decide whether to load them into dataloader/network later
with_masks:
"""
self.name = data_cfg["name"]
self.data_cfg = data_cfg
self.objs = data_cfg["objs"] # selected objects
self.ann_files = data_cfg["ann_files"] # json files with image ids and pose/bbox
self.image_prefixes = data_cfg["image_prefixes"]
self.dataset_root = data_cfg["dataset_root"] # lm_renders_blender/
self.models_root = data_cfg["models_root"] # BOP_DATASETS/lm/models
self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001
self.with_masks = data_cfg["with_masks"] # True (load masks but may not use it)
self.with_depth = data_cfg["with_depth"] # True (load depth path here, but may not use it)
self.with_xyz = data_cfg["with_xyz"]
self.depth_factor = data_cfg["depth_factor"] # 1000.0
self.cam = data_cfg["cam"] #
self.height = data_cfg["height"] # 480
self.width = data_cfg["width"] # 640
self.cache_dir = data_cfg["cache_dir"] # .cache
self.use_cache = data_cfg["use_cache"] # True
# sample uniformly to get n items
self.n_per_obj = data_cfg.get("n_per_obj", 10000)
self.filter_invalid = data_cfg["filter_invalid"]
##################################################
if self.cam is None:
self.cam = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]])
# NOTE: careful! Only the selected objects
self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs]
# map selected objs to [0, num_objs-1]
self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map
self.label2cat = {label: cat for cat, label in self.cat2label.items()}
self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
##########################################################
def __call__(self): # LM_BLENDER
"""Load light-weight instance annotations of all images into a list of
dicts in Detectron2 format.
Do not load heavy data into memory in this file, since we will
load the annotations of all images into memory.
"""
# cache the dataset_dicts to avoid loading masks from files
hashed_file_name = hashlib.md5(
(
"".join([str(fn) for fn in self.objs])
+ "dataset_dicts_{}_{}_{}_{}_{}_{}_{}".format(
self.name,
self.dataset_root,
self.with_masks,
self.with_depth,
self.with_xyz,
self.n_per_obj,
__name__,
)
).encode("utf-8")
).hexdigest()
cache_path = osp.join(
self.cache_dir,
"dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name),
)
if osp.exists(cache_path) and self.use_cache:
logger.info("load cached dataset dicts from {}".format(cache_path))
return mmcv.load(cache_path)
t_start = time.perf_counter()
logger.info("loading dataset dicts: {}".format(self.name))
self.num_instances_without_valid_segmentation = 0
self.num_instances_without_valid_box = 0
dataset_dicts = [] #######################################################
assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}"
for ann_file, scene_root in zip(tqdm(self.ann_files), self.image_prefixes):
# each scene is an object
assert osp.exists(ann_file), ann_file
scene_gt_dict = mmcv.load(ann_file)
# sample uniformly (equal space)
indices = list(scene_gt_dict.keys())
if self.n_per_obj > 0:
sample_num = min(self.n_per_obj, len(scene_gt_dict))
sel_indices_idx = np.linspace(0, len(scene_gt_dict) - 1, sample_num, dtype=np.int32)
sel_indices = [indices[int(_i)] for _i in sel_indices_idx]
else:
sel_indices = indices
for str_im_id in tqdm(sel_indices):
int_im_id = int(str_im_id)
rgb_path = osp.join(scene_root, "{}.jpg").format(str_im_id)
assert osp.exists(rgb_path), rgb_path
depth_path = osp.join(scene_root, "{}_depth_opengl.png".format(str_im_id))
obj_name = osp.basename(ann_file).split("_")[0] # obj_gt.json
obj_id = ref.lm_full.obj2id[obj_name]
if obj_name not in self.objs:
continue
record = {
"dataset_name": self.name,
"file_name": osp.relpath(rgb_path, PROJ_ROOT),
"depth_file": osp.relpath(depth_path, PROJ_ROOT),
"height": self.height,
"width": self.width,
"image_id": int_im_id,
"scene_im_id": f"{obj_id}/{int_im_id}",
"cam": self.cam,
"img_type": "syn_blender", # has bg
}
cur_label = self.obj2label[obj_name] # 0-based label
anno = scene_gt_dict[str_im_id][0] # only one object
R = np.array(anno["cam_R_m2c"]).reshape(3, 3)
t = np.array(anno["cam_t_m2c"]).reshape(-1) / 1000
pose = np.hstack([R, t.reshape(3, 1)])
quat = mat2quat(R).astype("float32")
proj = (record["cam"] @ t.T).T
proj = proj[:2] / proj[2]
bbox_visib = anno["bbox_visib"]
x1, y1, w, h = bbox_visib
cx, cy = proj
crop_x1 = round(np.clip(cx - 64, 0, self.width - 1))
crop_x2 = round(np.clip(cx + 64, 0, self.width - 1))
crop_y1 = round(np.clip(cy - 64, 0, self.height - 1))
crop_y2 = round(np.clip(cy + 64, 0, self.height - 1))
# convert to xywh
crop_w = crop_x2 - crop_x1
crop_h = crop_y2 - crop_y1
bbox_128 = [crop_x1, crop_y1, crop_w, crop_h]
if self.filter_invalid:
if h <= 1 or w <= 1:
self.num_instances_without_valid_box += 1
continue
mask_path = osp.join(scene_root, "{}_mask_opengl.png".format(str_im_id))
mask = mmcv.imread(mask_path, "unchanged")
mask = (mask > 0).astype(np.uint8)
area = mask.sum()
if area < 3: # filter out too small or nearly invisible instances
self.num_instances_without_valid_segmentation += 1
continue
mask_rle = binary_mask_to_rle(mask, compressed=True)
xyz_path = osp.join(scene_root, "{}_xyz_bop.pkl".format(str_im_id))
assert osp.exists(xyz_path), xyz_path
visib_fract = anno.get("visib_fract", 1.0)
inst = {
"category_id": cur_label, # 0-based label
"bbox": bbox_visib, # TODO: load both bbox_obj and bbox_visib
"bbox_mode": BoxMode.XYWH_ABS,
"bbox_crop": bbox_128,
"pose": pose,
"quat": quat,
"trans": t,
"centroid_2d": proj, # absolute (cx, cy)
"segmentation": mask_rle,
"xyz_path": xyz_path,
"visib_fract": visib_fract,
"mask_full": mask_rle, # NOTE!
}
model_info = self.models_info[str(obj_id)]
inst["model_info"] = model_info
for key in ["bbox3d_and_center"]:
inst[key] = self.models[cur_label][key]
record["annotations"] = [inst]
dataset_dicts.append(record)
if self.num_instances_without_valid_segmentation > 0:
logger.warning(
"Filtered out {} instances without valid segmentation. "
"There might be issues in your dataset generation process.".format(
self.num_instances_without_valid_segmentation
)
)
if self.num_instances_without_valid_box > 0:
logger.warning(
"Filtered out {} instances without valid box. "
"There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
)
##########################################################################
# if self.num_to_load > 0:
# self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
# random.shuffle(dataset_dicts)
# dataset_dicts = dataset_dicts[: self.num_to_load]
logger.info(
"loaded dataset dicts, num_images: {}, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start)
)
mmcv.dump(dataset_dicts, cache_path, protocol=4)
logger.info("Dumped dataset_dicts to {}".format(cache_path))
return dataset_dicts
@lazy_property
def models_info(self):
models_info_path = osp.join(self.models_root, "models_info.json")
assert osp.exists(models_info_path), models_info_path
models_info = mmcv.load(models_info_path) # key is str(obj_id)
return models_info
@lazy_property
def models(self):
"""Load models into a list."""
cache_path = osp.join(self.models_root, "models_{}.pkl".format("_".join(self.objs)))
if osp.exists(cache_path) and self.use_cache:
# dprint("{}: load cached object models from {}".format(self.name, cache_path))
return mmcv.load(cache_path)
models = []
for obj_name in self.objs:
model = inout.load_ply(
osp.join(
self.models_root,
f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply",
),
vertex_scale=self.scale_to_meter,
)
# NOTE: the bbox3d_and_center is not obtained from centered vertices
# for BOP models, not a big problem since they had been centered
model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
models.append(model)
logger.info("cache models to {}".format(cache_path))
mmcv.dump(models, cache_path, protocol=4)
return models
def image_aspect_ratio(self):
return self.width / self.height # 4/3
########### register datasets ############################################################
def get_lm_metadata(obj_names, ref_key):
"""task specific metadata."""
data_ref = ref.__dict__[ref_key]
cur_sym_infos = {} # label based key
loaded_models_info = data_ref.get_models_info()
for i, obj_name in enumerate(obj_names):
obj_id = data_ref.obj2id[obj_name]
model_info = loaded_models_info[str(obj_id)]
if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
else:
sym_info = None
cur_sym_infos[i] = sym_info
meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
return meta
LM_13_OBJECTS = [
"ape",
"benchvise",
"camera",
"can",
"cat",
"driller",
"duck",
"eggbox",
"glue",
"holepuncher",
"iron",
"lamp",
"phone",
] # no bowl, cup
LM_OCC_OBJECTS = [
"ape",
"can",
"cat",
"driller",
"duck",
"eggbox",
"glue",
"holepuncher",
]
################################################################################
SPLITS_LM_BLENDER = dict(
lm_blender_13_train=dict(
name="lm_blender_13_train", # BB8 training set
dataset_root=osp.join(DATASETS_ROOT, "lm_renders_blender/"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
objs=LM_13_OBJECTS, # selected objects
ann_files=[
osp.join(
DATASETS_ROOT,
"lm_renders_blender/renders/{}_gt.json".format(_obj),
)
for _obj in LM_13_OBJECTS
],
image_prefixes=[
osp.join(DATASETS_ROOT, "lm_renders_blender/renders/{}".format(_obj)) for _obj in LM_13_OBJECTS
],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
with_xyz=True,
depth_factor=1000.0,
cam=ref.lm_full.camera_matrix,
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
n_per_obj=-1, # num per class, -1 for all 10k
filter_invalid=False,
ref_key="lm_full",
),
lmo_blender_train=dict(
name="lmo_blender_train",
dataset_root=osp.join(DATASETS_ROOT, "lm_renders_blender/"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
objs=LM_OCC_OBJECTS, # selected objects
ann_files=[
osp.join(
DATASETS_ROOT,
"lm_renders_blender/renders/{}_gt.json".format(_obj),
)
for _obj in LM_OCC_OBJECTS
],
image_prefixes=[
osp.join(DATASETS_ROOT, "lm_renders_blender/renders/{}".format(_obj)) for _obj in LM_OCC_OBJECTS
],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
with_xyz=True,
depth_factor=1000.0,
cam=ref.lmo_full.camera_matrix,
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
n_per_obj=-1, # n per class, -1 for all 10k
filter_invalid=False,
ref_key="lmo_full",
),
)
# single obj splits
for obj in ref.lm_full.objects:
for split in ["train"]:
for name_prefix in ["lm", "lmo"]:
name = "{}_blender_{}_{}".format(name_prefix, obj, split)
ref_key = f"{name_prefix}_full"
ann_files = [
osp.join(
DATASETS_ROOT,
"lm_renders_blender/renders/{}_gt.json".format(obj),
)
]
if split in ["train"]:
filter_invalid = True
else:
raise ValueError("{}".format(split))
if name not in SPLITS_LM_BLENDER:
SPLITS_LM_BLENDER[name] = dict(
name=name,
dataset_root=osp.join(DATASETS_ROOT, "lm_renders_blender/"),
models_root=osp.join(DATASETS_ROOT, f"BOP_DATASETS/{name_prefix}/models"),
objs=[obj], # only this obj
ann_files=ann_files,
image_prefixes=[osp.join(DATASETS_ROOT, f"lm_renders_blender/renders/{obj}")],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
with_xyz=True,
depth_factor=1000.0,
cam=ref.__dict__[ref_key].camera_matrix,
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
n_per_obj=-1,
filter_invalid=False,
ref_key=ref_key,
)
def register_with_name_cfg(name, data_cfg=None):
"""Assume pre-defined datasets live in `./datasets`.
Args:
name: datasnet_name,
data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
otherwise requires data_cfg
data_cfg can be set in cfg.DATA_CFG.name
"""
dprint("register dataset: {}".format(name))
if name in SPLITS_LM_BLENDER:
used_cfg = SPLITS_LM_BLENDER[name]
else:
assert data_cfg is not None, f"dataset name {name} is not registered"
used_cfg = data_cfg
DatasetCatalog.register(name, LM_BLENDER_Dataset(used_cfg))
# something like eval_types
MetadataCatalog.get(name).set(
ref_key=used_cfg["ref_key"],
objs=used_cfg["objs"],
eval_error_types=["ad", "rete", "proj"],
evaluator_type="bop",
**get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
)
def get_available_datasets():
return list(SPLITS_LM_BLENDER.keys())
#### tests ###############################################
def test_vis():
dset_name = sys.argv[1]
assert dset_name in DatasetCatalog.list()
meta = MetadataCatalog.get(dset_name)
dprint("MetadataCatalog: ", meta)
objs = meta.objs
t_start = time.perf_counter()
dicts = DatasetCatalog.get(dset_name)
logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
dirname = "output/{}-data-vis".format(dset_name)
os.makedirs(dirname, exist_ok=True)
for d in dicts:
img = read_image_mmcv(d["file_name"], format="BGR")
depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0
anno = d["annotations"][0] # only one instance per image
imH, imW = img.shape[:2]
mask = cocosegm2mask(anno["segmentation"], imH, imW)
bbox = anno["bbox"]
bbox_mode = anno["bbox_mode"]
bbox_xyxy = np.array(BoxMode.convert(bbox, bbox_mode, BoxMode.XYXY_ABS))
kpt3d = anno["bbox3d_and_center"]
quat = anno["quat"]
trans = anno["trans"]
R = quat2mat(quat)
# 0-based label
cat_id = anno["category_id"]
K = d["cam"]
kpt_2d = misc.project_pts(kpt3d, K, R, trans)
# # TODO: visualize pose and keypoints
label = objs[cat_id]
# img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels)
img_vis = vis_image_mask_bbox_cv2(img, [mask], bboxes=[bbox_xyxy], labels=[label])
img_vis_kpt2d = img.copy()
img_vis_kpt2d = misc.draw_projected_box3d(
img_vis_kpt2d,
kpt_2d,
middle_color=None,
bottom_color=(128, 128, 128),
)
xyz_info = mmcv.load(anno["xyz_path"])
xyz = np.zeros((imH, imW, 3), dtype=np.float32)
xyz_crop = xyz_info["xyz_crop"].astype(np.float32)
x1, y1, x2, y2 = xyz_info["xyxy"]
xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop
xyz_show = get_emb_show(xyz)
grid_show(
[
img[:, :, [2, 1, 0]],
img_vis[:, :, [2, 1, 0]],
img_vis_kpt2d[:, :, [2, 1, 0]],
depth,
xyz_show,
],
["img", "vis_img", "img_vis_kpts2d", "depth", "emb_show"],
row=2,
col=3,
)
if __name__ == "__main__":
"""Test the dataset loader.
Usage:
python -m core.datasets.lm_blender dataset_name
"""
from lib.vis_utils.image import grid_show
from lib.utils.setup_logger import setup_my_logger
import detectron2.data.datasets # noqa # add pre-defined metadata
from lib.vis_utils.image import vis_image_mask_bbox_cv2
from core.utils.utils import get_emb_show
from core.utils.data_utils import read_image_mmcv
print("sys.argv:", sys.argv)
logger = setup_my_logger(name="core")
register_with_name_cfg(sys.argv[1])
print("dataset catalog: ", DatasetCatalog.list())
test_vis()
import hashlib
import logging
import os
import os.path as osp
import sys
import time
from collections import OrderedDict
import mmcv
import numpy as np
from tqdm import tqdm
from transforms3d.quaternions import mat2quat, quat2mat
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode
cur_dir = osp.dirname(osp.abspath(__file__))
PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../.."))
sys.path.insert(0, PROJ_ROOT)
import ref
from lib.pysixd import inout, misc
from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask
from lib.utils.utils import dprint, iprint, lazy_property
logger = logging.getLogger(__name__)
DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
class LM_CROP_Dataset(object):
"""lm crop splits."""
def __init__(self, data_cfg):
"""
Set with_depth and with_masks default to True,
and decide whether to load them into dataloader/network later
with_masks:
"""
self.name = data_cfg["name"]
self.data_cfg = data_cfg
self.objs = data_cfg["objs"] # selected objects
self.ann_files = data_cfg["ann_files"] # idx files with image ids
self.image_prefixes = data_cfg["image_prefixes"]
self.dataset_root = data_cfg["dataset_root"] # BOP_DATASETS/lm/
assert osp.exists(self.dataset_root), self.dataset_root
self.models_root = data_cfg["models_root"] # BOP_DATASETS/lm/models
self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001
self.with_masks = data_cfg["with_masks"] # True (load masks but may not use it)
self.with_depth = data_cfg["with_depth"] # True (load depth path here, but may not use it)
self.depth_factor = data_cfg["depth_factor"] # 1000.0
self.cam_type = data_cfg["cam_type"]
self.cam = data_cfg["cam"] #
self.height = data_cfg["height"] # 480
self.width = data_cfg["width"] # 640
self.cache_dir = data_cfg["cache_dir"] # .cache
self.use_cache = data_cfg["use_cache"] # True
self.num_to_load = data_cfg["num_to_load"] # -1
self.filter_invalid = data_cfg["filter_invalid"]
self.filter_scene = data_cfg.get("filter_scene", False)
##################################################
if self.cam is None:
assert self.cam_type in ["local", "dataset"]
if self.cam_type == "dataset":
self.cam = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]])
elif self.cam_type == "local":
# self.cam = np.array([[539.8100, 0, 318.2700], [0, 539.8300, 239.5600], [0, 0, 1]])
# yapf: disable
self.cam = np.array(
[[518.81993115, 0., 320.50653699],
[0., 518.86581081, 243.5604188 ],
[0., 0., 1. ]])
# yapf: enable
# RMS: 0.14046169348724977
# camera matrix:
# [[518.81993115 0. 320.50653699]
# [ 0. 518.86581081 243.5604188 ]
# [ 0. 0. 1. ]]
# distortion coefficients: [ 0.04147325 -0.21469544 -0.00053707 -0.00251986 0.17406399]
# NOTE: careful! Only the selected objects
self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs]
# map selected objs to [0, num_objs-1]
self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map
self.label2cat = {label: cat for cat, label in self.cat2label.items()}
self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
##########################################################
def __call__(self): # LM_CROP_Dataset
"""Load light-weight instance annotations of all images into a list of
dicts in Detectron2 format.
Do not load heavy data into memory in this file, since we will
load the annotations of all images into memory.
"""
# cache the dataset_dicts to avoid loading masks from files
hashed_file_name = hashlib.md5(
(
"".join([str(fn) for fn in self.objs])
+ "dataset_dicts_{}_{}_{}_{}_{}".format(
self.name, self.dataset_root, self.with_masks, self.with_depth, self.cam_type
)
).encode("utf-8")
).hexdigest()
cache_path = osp.join(self.cache_dir, "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name))
if osp.exists(cache_path) and self.use_cache:
logger.info("load cached dataset dicts from {}".format(cache_path))
return mmcv.load(cache_path)
t_start = time.perf_counter()
logger.info("loading dataset dicts: {}".format(self.name))
self.num_instances_without_valid_segmentation = 0
self.num_instances_without_valid_box = 0
dataset_dicts = [] #######################################################
assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}"
unique_im_id = 0
for ann_file, scene_root in zip(self.ann_files, self.image_prefixes):
# linemod each scene is an object
with open(ann_file, "r") as f_ann:
indices = [line.strip("\r\n") for line in f_ann.readlines()] # string ids
gt_dict = mmcv.load(osp.join(scene_root, "scene_gt.json"))
gt_info_dict = mmcv.load(osp.join(scene_root, "scene_gt_info.json")) # bbox_obj, bbox_visib
for im_id in tqdm(indices):
int_im_id = int(im_id)
rgb_path = osp.join(scene_root, "rgb/{:06d}.png").format(int_im_id)
assert osp.exists(rgb_path), rgb_path
depth_path = osp.join(scene_root, "depth/{:06d}.png".format(int_im_id))
scene_id = int(rgb_path.split("/")[-3])
scene_im_id = "{}/{}".format(scene_id, int_im_id)
if self.filter_scene:
if scene_id not in self.cat_ids:
continue
record = {
"dataset_name": self.name,
"file_name": osp.relpath(rgb_path, PROJ_ROOT),
"depth_file": osp.relpath(depth_path, PROJ_ROOT),
"height": self.height,
"width": self.width,
"image_id": unique_im_id,
"scene_im_id": scene_im_id, # for evaluation
"cam": self.cam,
"depth_factor": self.depth_factor,
"img_type": "real",
}
unique_im_id += 1
insts = []
for anno_i, anno in enumerate(gt_dict[im_id]):
obj_id = anno["obj_id"]
if obj_id not in self.cat_ids:
continue
cur_label = self.cat2label[obj_id] # 0-based label
R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3)
t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0
pose = np.hstack([R, t.reshape(3, 1)])
quat = mat2quat(R).astype("float32")
proj = (record["cam"] @ t.T).T
proj = proj[:2] / proj[2]
bbox_visib = gt_info_dict[im_id][anno_i]["bbox_visib"]
bbox_obj = gt_info_dict[im_id][anno_i]["bbox_obj"]
x1, y1, w, h = bbox_visib
cx, cy = proj
crop_x1 = round(np.clip(cx - 64, 0, self.width - 1))
crop_x2 = round(np.clip(cx + 64, 0, self.width - 1))
crop_y1 = round(np.clip(cy - 64, 0, self.height - 1))
crop_y2 = round(np.clip(cy + 64, 0, self.height - 1))
# convert to xywh
crop_w = crop_x2 - crop_x1
crop_h = crop_y2 - crop_y1
bbox_128 = [crop_x1, crop_y1, crop_w, crop_h]
if self.filter_invalid:
if h <= 1 or w <= 1:
self.num_instances_without_valid_box += 1
continue
mask_file = osp.join(scene_root, "mask/{:06d}_{:06d}.png".format(int_im_id, anno_i))
mask_visib_file = osp.join(scene_root, "mask_visib/{:06d}_{:06d}.png".format(int_im_id, anno_i))
assert osp.exists(mask_file), mask_file
assert osp.exists(mask_visib_file), mask_visib_file
# load mask visib TODO: load both mask_visib and mask_full
mask_single = mmcv.imread(mask_visib_file, "unchanged")
mask_single = mask_single.astype("bool")
area = mask_single.sum()
if area < 3: # filter out too small or nearly invisible instances
self.num_instances_without_valid_segmentation += 1
continue
mask_rle = binary_mask_to_rle(mask_single, compressed=True)
# load mask full
mask_full = mmcv.imread(mask_file, "unchanged")
mask_full = mask_full.astype("bool")
mask_full_rle = binary_mask_to_rle(mask_full, compressed=True)
inst = {
"category_id": cur_label, # 0-based label
"bbox": bbox_visib, # TODO: load both bbox_obj and bbox_visib
"bbox_mode": BoxMode.XYWH_ABS,
"bbox_crop": bbox_128,
"pose": pose,
"quat": quat,
"trans": t,
"centroid_2d": proj, # absolute (cx, cy)
"segmentation": mask_rle,
"mask_full": mask_full_rle,
}
model_info = self.models_info[str(obj_id)]
inst["model_info"] = model_info
for key in ["bbox3d_and_center"]:
inst[key] = self.models[cur_label][key]
insts.append(inst)
if len(insts) == 0: # filter im without anno
continue
record["annotations"] = insts
dataset_dicts.append(record)
if self.num_instances_without_valid_segmentation > 0:
logger.warning(
"Filtered out {} instances without valid segmentation. "
"There might be issues in your dataset generation process.".format(
self.num_instances_without_valid_segmentation
)
)
if self.num_instances_without_valid_box > 0:
logger.warning(
"Filtered out {} instances without valid box. "
"There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
)
##########################################################################
if self.num_to_load > 0:
self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
dataset_dicts = dataset_dicts[: self.num_to_load]
logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start))
mmcv.mkdir_or_exist(osp.dirname(cache_path))
mmcv.dump(dataset_dicts, cache_path, protocol=4)
logger.info("Dumped dataset_dicts to {}".format(cache_path))
return dataset_dicts
@lazy_property
def models_info(self):
models_info_path = osp.join(self.models_root, "models_info.json")
assert osp.exists(models_info_path), models_info_path
models_info = mmcv.load(models_info_path) # key is str(obj_id)
return models_info
@lazy_property
def models(self):
"""Load models into a list."""
cache_path = osp.join(self.cache_dir, "models_{}.pkl".format("_".join(self.objs)))
if osp.exists(cache_path) and self.use_cache:
# dprint("{}: load cached object models from {}".format(self.name, cache_path))
return mmcv.load(cache_path)
models = []
for obj_name in self.objs:
model = inout.load_ply(
osp.join(
self.models_root,
f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply",
),
vertex_scale=self.scale_to_meter,
)
# NOTE: the bbox3d_and_center is not obtained from centered vertices
# for BOP models, not a big problem since they had been centered
model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
models.append(model)
logger.info("cache models to {}".format(cache_path))
mmcv.mkdir_or_exist(osp.dirname(cache_path))
mmcv.dump(models, cache_path, protocol=4)
return models
def __len__(self):
# return len(self.images)
return self.num_to_load
def image_aspect_ratio(self):
return self.width / self.height # 4/3
########### register datasets ############################################################
def get_lm_metadata(obj_names, ref_key):
"""task specific metadata."""
data_ref = ref.__dict__[ref_key]
cur_sym_infos = {} # label based key
loaded_models_info = data_ref.get_models_info()
for i, obj_name in enumerate(obj_names):
obj_id = data_ref.obj2id[obj_name]
model_info = loaded_models_info[str(obj_id)]
if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
else:
sym_info = None
cur_sym_infos[i] = sym_info
meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
return meta
LM_CROP_11_OBJECTS = [
"ape",
"benchvise",
"camera",
"can",
"cat",
"driller",
"duck",
"holepuncher",
"iron",
"lamp",
"phone",
] # no bowl, cup, eggbox, glue
################################################################################
SPLITS_LM_CROP = dict(
lm_crop_11_train=dict(
name="lm_crop_11_train",
dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
objs=LM_CROP_11_OBJECTS, # selected objects
ann_files=[
osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/image_set_lm_crop/lm_crop_{}_{}.txt".format("train", _obj))
for _obj in LM_CROP_11_OBJECTS
],
image_prefixes=[
osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}".format(ref.lm_full.obj2id[_obj]))
for _obj in LM_CROP_11_OBJECTS
],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
depth_factor=1000.0,
cam_type="dataset",
cam=ref.lm_full.camera_matrix,
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_scene=True,
filter_invalid=False,
ref_key="lm_full",
),
lm_crop_11_test=dict(
name="lm_crop_11_test",
dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
objs=LM_CROP_11_OBJECTS,
ann_files=[
osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/image_set_lm_crop/lm_crop_{}_{}.txt".format("test", _obj))
for _obj in LM_CROP_11_OBJECTS
],
# NOTE: scene root
image_prefixes=[
osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[_obj])
for _obj in LM_CROP_11_OBJECTS
],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
depth_factor=1000.0,
cam_type="dataset",
cam=ref.lm_full.camera_matrix,
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_scene=True,
filter_invalid=False,
ref_key="lm_full",
),
)
# single obj splits
for obj in ref.lm_full.objects:
for split in ["train", "test"]:
name = "lm_crop_{}_{}".format(obj, split)
ann_files = [osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/image_set_lm_crop/lm_crop_{}_{}.txt".format(split, obj))]
if split in ["train"]:
filter_invalid = True
elif split in ["test"]:
filter_invalid = False
else:
raise ValueError("{}".format(split))
if name not in SPLITS_LM_CROP:
SPLITS_LM_CROP[name] = dict(
name=name,
dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
objs=[obj], # only this obj
ann_files=ann_files,
image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[obj])],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
depth_factor=1000.0,
cam_type="dataset",
cam=ref.lm_full.camera_matrix,
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_invalid=False,
filter_scene=True,
ref_key="lm_full",
)
def register_with_name_cfg(name, data_cfg=None):
"""Assume pre-defined datasets live in `./datasets`.
Args:
name: datasnet_name,
data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
otherwise requires data_cfg
data_cfg can be set in cfg.DATA_CFG.name
"""
dprint("register dataset: {}".format(name))
if name in SPLITS_LM_CROP:
used_cfg = SPLITS_LM_CROP[name]
else:
assert data_cfg is not None, f"dataset name {name} is not registered"
used_cfg = data_cfg
DatasetCatalog.register(name, LM_CROP_Dataset(used_cfg))
# something like eval_types
MetadataCatalog.get(name).set(
ref_key=used_cfg["ref_key"],
objs=used_cfg["objs"],
eval_error_types=["ad", "rete", "proj"],
evaluator_type="bop",
**get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
)
def get_available_datasets():
return list(SPLITS_LM_CROP.keys())
#### tests ###############################################
def test_vis():
# python -m this_module lmo_test
dset_name = sys.argv[1]
assert dset_name in DatasetCatalog.list()
meta = MetadataCatalog.get(dset_name)
dprint("MetadataCatalog: ", meta)
objs = meta.objs
t_start = time.perf_counter()
dicts = DatasetCatalog.get(dset_name)
logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
dirname = "output/{}-data-vis".format(dset_name)
os.makedirs(dirname, exist_ok=True)
for d in dicts:
img = read_image_mmcv(d["file_name"], format="BGR")
depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0
imH, imW = img.shape[:2]
annos = d["annotations"]
masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos]
bboxes = [anno["bbox"] for anno in annos]
bbox_modes = [anno["bbox_mode"] for anno in annos]
bboxes_xyxy = np.array(
[BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)]
)
kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
quats = [anno["quat"] for anno in annos]
centers_2d = [anno["centroid_2d"] for anno in annos]
bboxes_128 = []
for center_2d in centers_2d:
cx, cy = center_2d
bboxes_128.append([cx - 64, cy - 64, cx + 64, cy + 64])
bboxes_128 = np.array(bboxes_128)
bboxes_128[:, 0] = np.clip(bboxes_128[:, 0], 0, imW - 1)
bboxes_128[:, 2] = np.clip(bboxes_128[:, 2], 0, imW - 1)
bboxes_128[:, 1] = np.clip(bboxes_128[:, 1], 0, imH - 1)
bboxes_128[:, 3] = np.clip(bboxes_128[:, 3], 0, imH - 1)
transes = [anno["trans"] for anno in annos]
Rs = [quat2mat(quat) for quat in quats]
# 0-based label
cat_ids = [anno["category_id"] for anno in annos]
K = d["cam"]
kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)]
# visualizer = Visualizer(img[:, :, [2,1,0]], metadata=meta)
# vis = visualizer.draw_dataset_dict(d) # TODO: add pose visualization and depth visualization
# # fpath = osp.join(dirname, osp.basename(d["file_name"]))
# # vis.save(fpath)
# img_vis = vis.get_image()[:, :, [2,1,0]]
# # TODO: visualize pose and keypoints
labels = [objs[cat_id] for cat_id in cat_ids]
# img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels)
img_vis = vis_image_mask_bbox_cv2(img, masks, bboxes=bboxes_xyxy, labels=labels)
img_vis_kpts2d = img.copy()
for anno_i in range(len(annos)):
img_vis_kpts2d = misc.draw_projected_box3d(img_vis_kpts2d, kpts_2d[anno_i])
img_vis_bbox_128 = vis_image_mask_bbox_cv2(img, masks, bboxes=bboxes_128, labels=labels)
# grid_show(
# [img[:, :, [2, 1, 0]], img_vis[:, :, [2, 1, 0]], img_vis_bbox_64[:, :, ::-1], img_vis_kpts2d[:, :, [2, 1, 0]], depth],
# ["img", "vis_img", "img_vis_bbox_64", "img_vis_kpts2d", "depth"],
# row=2,
# col=3,
# )
grid_show(
[img_vis_bbox_128[:, :, ::-1], depth],
["img_vis_bbox_128", "depth"],
row=1,
col=2,
)
if __name__ == "__main__":
"""Test the dataset loader.
Usage:
python -m core.datasets.lm_dataset_d2 dataset_name
"""
from lib.vis_utils.image import grid_show
from lib.utils.setup_logger import setup_my_logger
from detectron2.utils.visualizer import Visualizer
import detectron2.data.datasets # noqa # add pre-defined metadata
from lib.pysixd import misc
from core.utils.data_utils import read_image_mmcv
from lib.vis_utils.image import vis_image_mask_bbox_cv2
from lib.utils.mask_utils import cocosegm2mask
from lib.utils.bbox_utils import xywh_to_xyxy
print("sys.argv:", sys.argv)
logger = setup_my_logger(name="core")
register_with_name_cfg(sys.argv[1])
print("dataset catalog: ", DatasetCatalog.list())
test_vis()
import hashlib
import logging
import os
import os.path as osp
import sys
import time
from collections import OrderedDict
import mmcv
import numpy as np
from tqdm import tqdm
from transforms3d.quaternions import mat2quat, quat2mat
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode
cur_dir = osp.dirname(osp.abspath(__file__))
PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../.."))
sys.path.insert(0, PROJ_ROOT)
import ref
from lib.pysixd import inout, misc
from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask
from lib.utils.utils import dprint, lazy_property
logger = logging.getLogger(__name__)
DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
class LM_NEW_DUCK_PBR_Dataset:
def __init__(self, data_cfg):
"""
Set with_depth and with_masks default to True,
and decide whether to load them into dataloader/network later
with_masks:
"""
self.name = data_cfg["name"]
self.data_cfg = data_cfg
self.objs = data_cfg["objs"] # selected objects
self.dataset_root = data_cfg.get(
"dataset_root",
osp.join(DATASETS_ROOT, "duck_fabi/train_pbr"),
)
assert osp.exists(self.dataset_root), self.dataset_root
self.models_root = data_cfg["models_root"] # duck_fabi/models
self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001
self.with_masks = data_cfg["with_masks"]
self.with_depth = data_cfg["with_depth"]
self.height = data_cfg["height"]
self.width = data_cfg["width"]
self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache")) # .cache
self.use_cache = data_cfg.get("use_cache", True)
self.num_to_load = data_cfg.get("num_to_load", -1) # -1
self.filter_invalid = data_cfg.get("filter_invalid", True)
##################################################
# NOTE: careful! Only the selected objects
self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_duck_fabi.id2obj.items() if obj_name in self.objs]
# map selected objs to [0, num_objs-1]
self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map
self.label2cat = {label: cat for cat, label in self.cat2label.items()}
self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
##########################################################
self.scenes = [f"{i:06d}" for i in range(50)]
def __call__(self):
"""Load light-weight instance annotations of all images into a list of
dicts in Detectron2 format.
Do not load heavy data into memory in this file, since we will
load the annotations of all images into memory.
"""
# cache the dataset_dicts to avoid loading masks from files
hashed_file_name = hashlib.md5(
(
"".join([str(fn) for fn in self.objs])
+ "dataset_dicts_{}_{}_{}_{}_{}".format(
self.name,
self.dataset_root,
self.with_masks,
self.with_depth,
__name__,
)
).encode("utf-8")
).hexdigest()
cache_path = osp.join(
self.cache_dir,
"dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name),
)
if osp.exists(cache_path) and self.use_cache:
logger.info("load cached dataset dicts from {}".format(cache_path))
return mmcv.load(cache_path)
t_start = time.perf_counter()
logger.info("loading dataset dicts: {}".format(self.name))
self.num_instances_without_valid_segmentation = 0
self.num_instances_without_valid_box = 0
dataset_dicts = [] # ######################################################
# it is slow because of loading and converting masks to rle
for scene in tqdm(self.scenes):
scene_id = int(scene)
scene_root = osp.join(self.dataset_root, scene)
gt_dict = mmcv.load(osp.join(scene_root, "scene_gt.json"))
gt_info_dict = mmcv.load(osp.join(scene_root, "scene_gt_info.json"))
cam_dict = mmcv.load(osp.join(scene_root, "scene_camera.json"))
for str_im_id in tqdm(gt_dict, postfix=f"{scene_id}"):
int_im_id = int(str_im_id)
rgb_path = osp.join(scene_root, "rgb/{:06d}.jpg").format(int_im_id)
assert osp.exists(rgb_path), rgb_path
depth_path = osp.join(scene_root, "depth/{:06d}.png".format(int_im_id))
scene_im_id = f"{scene_id}/{int_im_id}"
K = np.array(cam_dict[str_im_id]["cam_K"], dtype=np.float32).reshape(3, 3)
depth_factor = 1000.0 / cam_dict[str_im_id]["depth_scale"] # 10000
record = {
"dataset_name": self.name,
"file_name": osp.relpath(rgb_path, PROJ_ROOT),
"depth_file": osp.relpath(depth_path, PROJ_ROOT),
"height": self.height,
"width": self.width,
"image_id": int_im_id,
"scene_im_id": scene_im_id, # for evaluation
"cam": K,
"depth_factor": depth_factor,
"img_type": "syn_pbr", # NOTE: has background
}
insts = []
for anno_i, anno in enumerate(gt_dict[str_im_id]):
obj_id = anno["obj_id"]
if obj_id not in self.cat_ids:
continue
cur_label = self.cat2label[obj_id] # 0-based label
R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3)
t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0
pose = np.hstack([R, t.reshape(3, 1)])
quat = mat2quat(R).astype("float32")
proj = (record["cam"] @ t.T).T
proj = proj[:2] / proj[2]
bbox_visib = gt_info_dict[str_im_id][anno_i]["bbox_visib"]
bbox_obj = gt_info_dict[str_im_id][anno_i]["bbox_obj"]
x1, y1, w, h = bbox_visib
if self.filter_invalid:
if h <= 1 or w <= 1:
self.num_instances_without_valid_box += 1
continue
mask_file = osp.join(
scene_root,
"mask/{:06d}_{:06d}.png".format(int_im_id, anno_i),
)
mask_visib_file = osp.join(
scene_root,
"mask_visib/{:06d}_{:06d}.png".format(int_im_id, anno_i),
)
assert osp.exists(mask_file), mask_file
assert osp.exists(mask_visib_file), mask_visib_file
# load mask visib
mask_single = mmcv.imread(mask_visib_file, "unchanged")
mask_single = mask_single.astype("bool")
area = mask_single.sum()
if area < 30: # filter out too small or nearly invisible instances
self.num_instances_without_valid_segmentation += 1
continue
mask_rle = binary_mask_to_rle(mask_single, compressed=True)
# load mask full
mask_full = mmcv.imread(mask_file, "unchanged")
mask_full = mask_full.astype("bool")
mask_full_rle = binary_mask_to_rle(mask_full, compressed=True)
visib_fract = gt_info_dict[str_im_id][anno_i].get("visib_fract", 1.0)
inst = {
"category_id": cur_label, # 0-based label
"bbox": bbox_visib, # TODO: load both bbox_obj and bbox_visib
"bbox_mode": BoxMode.XYWH_ABS,
"pose": pose,
"quat": quat,
"trans": t,
"centroid_2d": proj, # absolute (cx, cy)
"segmentation": mask_rle,
"mask_full": mask_full_rle,
"visib_fract": visib_fract,
}
model_info = self.models_info[str(obj_id)]
inst["model_info"] = model_info
for key in ["bbox3d_and_center"]:
inst[key] = self.models[cur_label][key]
insts.append(inst)
if len(insts) == 0: # filter im without anno
continue
record["annotations"] = insts
dataset_dicts.append(record)
if self.num_instances_without_valid_segmentation > 0:
logger.warning(
"Filtered out {} instances without valid segmentation. "
"There might be issues in your dataset generation process.".format(
self.num_instances_without_valid_segmentation
)
)
if self.num_instances_without_valid_box > 0:
logger.warning(
"Filtered out {} instances without valid box. "
"There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
)
##########################################################################
if self.num_to_load > 0:
self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
dataset_dicts = dataset_dicts[: self.num_to_load]
logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start))
mmcv.mkdir_or_exist(osp.dirname(cache_path))
mmcv.dump(dataset_dicts, cache_path, protocol=4)
logger.info("Dumped dataset_dicts to {}".format(cache_path))
return dataset_dicts
@lazy_property
def models_info(self):
models_info_path = osp.join(self.models_root, "models_info.json")
assert osp.exists(models_info_path), models_info_path
models_info = mmcv.load(models_info_path) # key is str(obj_id)
return models_info
@lazy_property
def models(self):
"""Load models into a list."""
cache_path = osp.join(self.models_root, "models_{}.pkl".format("_".join(self.objs)))
if osp.exists(cache_path) and self.use_cache:
# logger.info("load cached object models from {}".format(cache_path))
return mmcv.load(cache_path)
models = []
for obj_name in self.objs:
model = inout.load_ply(
osp.join(
self.models_root,
f"obj_{ref.lm_duck_fabi.obj2id[obj_name]:06d}.ply",
),
vertex_scale=self.scale_to_meter,
)
# NOTE: the bbox3d_and_center is not obtained from centered vertices
# for BOP models, not a big problem since they had been centered
model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
models.append(model)
logger.info("cache models to {}".format(cache_path))
mmcv.dump(models, cache_path, protocol=4)
return models
def image_aspect_ratio(self):
return self.width / self.height
########### register datasets ############################################################
def get_lm_metadata(obj_names, ref_key):
"""task specific metadata."""
data_ref = ref.__dict__[ref_key]
cur_sym_infos = {} # label based key
loaded_models_info = data_ref.get_models_info()
for i, obj_name in enumerate(obj_names):
obj_id = data_ref.obj2id[obj_name]
model_info = loaded_models_info[str(obj_id)]
if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
else:
sym_info = None
cur_sym_infos[i] = sym_info
meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
return meta
LM_13_OBJECTS = [
"ape",
"benchvise",
"camera",
"can",
"cat",
"driller",
"duck",
"eggbox",
"glue",
"holepuncher",
"iron",
"lamp",
"phone",
] # no bowl, cup
LM_OCC_OBJECTS = [
"ape",
"can",
"cat",
"driller",
"duck",
"eggbox",
"glue",
"holepuncher",
]
################################################################################
SPLITS_LM_NEW_DUCK_PBR = dict(
lm_new_duck_pbr_13_train=dict(
name="lm_new_duck_pbr_13_train",
objs=LM_13_OBJECTS, # selected objects
dataset_root=osp.join(DATASETS_ROOT, "duck_fabi/train_pbr"),
models_root=osp.join(DATASETS_ROOT, "duck_fabi/models"),
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
height=720,
width=1280,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_invalid=True,
ref_key="lm_duck_fabi",
),
lm_new_duck_pbr_8_train=dict(
name="lm_new_duck_pbr_8_train",
objs=LM_OCC_OBJECTS, # selected objects
dataset_root=osp.join(DATASETS_ROOT, "duck_fabi/train_pbr"),
models_root=osp.join(DATASETS_ROOT, "duck_fabi/models"),
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
height=720,
width=1280,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_invalid=True,
ref_key="lm_duck_fabi", # TODO: maybe have bug
),
)
# single obj splits
for obj in ref.lm_duck_fabi.objects:
for split in ["train"]:
name = "lm_new_duck_pbr_{}_{}".format(obj, split)
if split in ["train"]:
filter_invalid = True
else:
raise ValueError("{}".format(split))
if name not in SPLITS_LM_NEW_DUCK_PBR:
SPLITS_LM_NEW_DUCK_PBR[name] = dict(
name=name,
objs=[obj], # only this obj
dataset_root=osp.join(DATASETS_ROOT, "duck_fabi/train_pbr"),
models_root=osp.join(DATASETS_ROOT, "duck_fabi/models"),
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
height=720,
width=1280,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_invalid=filter_invalid,
ref_key="lm_duck_fabi",
)
def register_with_name_cfg(name, data_cfg=None):
"""Assume pre-defined datasets live in `./datasets`.
Args:
name: datasnet_name,
data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
otherwise requires data_cfg
data_cfg can be set in cfg.DATA_CFG.name
"""
dprint("register dataset: {}".format(name))
if name in SPLITS_LM_NEW_DUCK_PBR:
used_cfg = SPLITS_LM_NEW_DUCK_PBR[name]
else:
assert data_cfg is not None, f"dataset name {name} is not registered"
used_cfg = data_cfg
DatasetCatalog.register(name, LM_NEW_DUCK_PBR_Dataset(used_cfg))
# something like eval_types
MetadataCatalog.get(name).set(
ref_key=used_cfg["ref_key"],
objs=used_cfg["objs"],
eval_error_types=["ad", "rete", "proj"],
evaluator_type="bop",
**get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
)
def get_available_datasets():
return list(SPLITS_LM_NEW_DUCK_PBR.keys())
#### tests ###############################################
def test_vis():
dset_name = sys.argv[1]
assert dset_name in DatasetCatalog.list()
meta = MetadataCatalog.get(dset_name)
dprint("MetadataCatalog: ", meta)
objs = meta.objs
t_start = time.perf_counter()
dicts = DatasetCatalog.get(dset_name)
logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
dirname = "output/{}-data-vis".format(dset_name)
os.makedirs(dirname, exist_ok=True)
for d in dicts:
img = read_image_mmcv(d["file_name"], format="BGR")
depth = mmcv.imread(d["depth_file"], "unchanged") / 10000.0
imH, imW = img.shape[:2]
annos = d["annotations"]
masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos]
bboxes = [anno["bbox"] for anno in annos]
bbox_modes = [anno["bbox_mode"] for anno in annos]
bboxes_xyxy = np.array(
[BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)]
)
kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
quats = [anno["quat"] for anno in annos]
transes = [anno["trans"] for anno in annos]
Rs = [quat2mat(quat) for quat in quats]
# 0-based label
cat_ids = [anno["category_id"] for anno in annos]
K = d["cam"]
kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)]
# # TODO: visualize pose and keypoints
labels = [objs[cat_id] for cat_id in cat_ids]
for _i in range(len(annos)):
img_vis = vis_image_mask_bbox_cv2(
img,
masks[_i : _i + 1],
bboxes=bboxes_xyxy[_i : _i + 1],
labels=labels[_i : _i + 1],
)
img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i])
grid_show(
[
img[:, :, [2, 1, 0]],
img_vis[:, :, [2, 1, 0]],
img_vis_kpts2d[:, :, [2, 1, 0]],
depth,
],
["img", "vis_img", "img_vis_kpts2d", "depth"],
row=2,
col=2,
)
if __name__ == "__main__":
"""Test the dataset loader.
Usage:
python -m core.gdrn_modeling.datasets.lm_new_duck_pbr dataset_name
"""
from lib.vis_utils.image import grid_show
from lib.utils.setup_logger import setup_my_logger
import detectron2.data.datasets # noqa # add pre-defined metadata
from lib.vis_utils.image import vis_image_mask_bbox_cv2
from core.utils.data_utils import read_image_mmcv
print("sys.argv:", sys.argv)
logger = setup_my_logger(name="core")
register_with_name_cfg(sys.argv[1])
print("dataset catalog: ", DatasetCatalog.list())
test_vis()
import hashlib
import logging
import os
import os.path as osp
import sys
cur_dir = osp.dirname(osp.abspath(__file__))
PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../.."))
sys.path.insert(0, PROJ_ROOT)
import time
from collections import OrderedDict
import mmcv
import numpy as np
from tqdm import tqdm
from transforms3d.quaternions import mat2quat, quat2mat
import ref
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode
from lib.pysixd import inout, misc
from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask, mask2bbox_xywh
from lib.utils.utils import dprint, iprint, lazy_property
logger = logging.getLogger(__name__)
DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
class LM_SYN_EGL_Dataset(object):
"""lm synthetic data by egl renderer."""
def __init__(self, data_cfg):
"""
Set with_depth and with_masks default to True,
and decide whether to load them into dataloader/network later
with_masks:
"""
self.name = data_cfg["name"]
self.data_cfg = data_cfg
self.objs = data_cfg["objs"] # selected objects
self.dataset_root = data_cfg.get("dataset_root", osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/train_egl"))
self.xyz_root = data_cfg.get("xyz_root", osp.join(self.dataset_root, "xyz_crop"))
assert osp.exists(self.dataset_root), self.dataset_root
self.gt_path = osp.join(self.dataset_root, "gt.json")
assert osp.exists(self.gt_path)
self.models_root = data_cfg["models_root"] # BOP_DATASETS/lm/models
self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001
self.with_masks = data_cfg["with_masks"]
self.with_depth = data_cfg["with_depth"]
self.depth_factor = data_cfg.get("depth_factor", 10000.0)
self.height = data_cfg["height"] # 480
self.width = data_cfg["width"] # 640
self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache")) # .cache
self.use_cache = data_cfg.get("use_cache", True)
self.num_to_load = data_cfg["num_to_load"] # -1
self.filter_invalid = data_cfg.get("filter_invalid", True)
##################################################
self.cam = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]])
# NOTE: careful! Only the selected objects
self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs]
# map selected objs to [0, num_objs-1]
self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map
self.label2cat = {label: cat for cat, label in self.cat2label.items()}
self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
##########################################################
def __call__(self): # LM_SYN_EGL_Dataset
"""Load light-weight instance annotations of all images into a list of
dicts in Detectron2 format.
Do not load heavy data into memory in this file, since we will
load the annotations of all images into memory.
"""
# cache the dataset_dicts to avoid loading masks from files
hashed_file_name = hashlib.md5(
(
"".join([str(fn) for fn in self.objs])
+ "dataset_dicts_{}_{}_{}_{}_{}_{}".format(
self.name,
self.dataset_root,
self.with_masks,
self.with_depth,
self.num_to_load,
__name__,
)
).encode("utf-8")
).hexdigest()
cache_path = osp.join(self.cache_dir, "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name))
if osp.exists(cache_path) and self.use_cache:
logger.info("load cached dataset dicts from {}".format(cache_path))
return mmcv.load(cache_path)
t_start = time.perf_counter()
logger.info("loading dataset dicts: {}".format(self.name))
self.num_instances_without_valid_segmentation = 0
self.num_instances_without_valid_box = 0
dataset_dicts = [] #######################################################
gt_dict = mmcv.load(self.gt_path)
unique_im_id = 0
for str_im_id, annos in tqdm(gt_dict.items()):
int_im_id = int(str_im_id)
rgb_path = osp.join(self.dataset_root, "rgb/{:06d}.jpg".format(int_im_id))
assert osp.exists(rgb_path), rgb_path
depth_path = osp.join(self.dataset_root, "depth/{:06d}.png".format(int_im_id))
record = {
"dataset_name": self.name,
"file_name": osp.relpath(rgb_path, PROJ_ROOT),
"depth_file": osp.relpath(depth_path, PROJ_ROOT),
"height": self.height,
"width": self.width,
"image_id": unique_im_id,
"scene_im_id": f"0/{int_im_id}", # for pose evaluation
"cam": self.cam,
"depth_factor": self.depth_factor,
"img_type": "syn_egl", # NOTE: has background
}
unique_im_id += 1
insts = []
for anno_i, anno in enumerate(annos):
obj_id = anno["obj_id"]
if obj_id not in self.cat_ids:
continue
cur_label = self.cat2label[obj_id] # 0-based label
pose = np.array(anno["pose"])
R = pose[:3, :3]
t = pose[:3, 3]
quat = mat2quat(R).astype("float32")
proj = (record["cam"] @ t.T).T
proj = proj[:2] / proj[2]
mask_vis_rle = anno["mask_visib"]
mask_full_rle = anno["mask_full"]
bbox = anno["bbox"]
x1, y1, w, h = bbox
if self.filter_invalid:
if h <= 1 or w <= 1:
self.num_instances_without_valid_box += 1
continue
mask_vis = cocosegm2mask(mask_vis_rle, self.height, self.width)
vis_area = mask_vis.sum()
if vis_area < 30: # filter out too small or nearly invisible instances
self.num_instances_without_valid_segmentation += 1
continue
mask_full = cocosegm2mask(mask_full_rle, self.height, self.width)
full_area = mask_full.sum()
if full_area > 0:
visib_fract = vis_area / full_area
else:
visib_fract = 0
xyz_path = osp.join(self.xyz_root, f"{int_im_id:06d}_{anno_i:06d}-xyz.pkl")
assert osp.exists(xyz_path), xyz_path
inst = {
"category_id": cur_label, # 0-based label
"bbox": bbox, # TODO: load both bbox_obj and bbox_visib
"bbox_mode": BoxMode.XYWH_ABS,
"pose": pose,
"quat": quat,
"trans": t,
"centroid_2d": proj, # absolute (cx, cy)
"segmentation": mask_vis_rle,
"mask_full": mask_full_rle,
"visib_fract": visib_fract,
"xyz_path": xyz_path,
}
model_info = self.models_info[str(obj_id)]
inst["model_info"] = model_info
# TODO: using full mask and full xyz
for key in ["bbox3d_and_center"]:
inst[key] = self.models[cur_label][key]
insts.append(inst)
if len(insts) == 0: # filter im without anno
continue
record["annotations"] = insts
dataset_dicts.append(record)
if self.num_instances_without_valid_segmentation > 0:
logger.warning(
"Filtered out {} instances without valid segmentation. "
"There might be issues in your dataset generation process.".format(
self.num_instances_without_valid_segmentation
)
)
if self.num_instances_without_valid_box > 0:
logger.warning(
"Filtered out {} instances without valid box. "
"There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
)
##########################################################################
if self.num_to_load > 0:
self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
dataset_dicts = dataset_dicts[: self.num_to_load]
logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start))
mmcv.mkdir_or_exist(osp.dirname(cache_path))
mmcv.dump(dataset_dicts, cache_path, protocol=4)
logger.info("Dumped dataset_dicts to {}".format(cache_path))
return dataset_dicts
@lazy_property
def models_info(self):
models_info_path = osp.join(self.models_root, "models_info.json")
assert osp.exists(models_info_path), models_info_path
models_info = mmcv.load(models_info_path) # key is str(obj_id)
return models_info
@lazy_property
def models(self):
"""Load models into a list."""
cache_path = osp.join(self.models_root, "models_{}.pkl".format("_".join(self.objs)))
if osp.exists(cache_path) and self.use_cache:
# dprint("{}: load cached object models from {}".format(self.name, cache_path))
return mmcv.load(cache_path)
models = []
for obj_name in self.objs:
model = inout.load_ply(
osp.join(
self.models_root,
f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply",
),
vertex_scale=self.scale_to_meter,
)
# NOTE: the bbox3d_and_center is not obtained from centered vertices
# for BOP models, not a big problem since they had been centered
model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
models.append(model)
logger.info("cache models to {}".format(cache_path))
mmcv.dump(models, cache_path, protocol=4)
return models
def image_aspect_ratio(self):
return self.width / self.height # 4/3
########### register datasets ############################################################
def get_lm_metadata(obj_names, ref_key):
"""task specific metadata."""
data_ref = ref.__dict__[ref_key]
cur_sym_infos = {} # label based key
loaded_models_info = data_ref.get_models_info()
for i, obj_name in enumerate(obj_names):
obj_id = data_ref.obj2id[obj_name]
model_info = loaded_models_info[str(obj_id)]
if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
else:
sym_info = None
cur_sym_infos[i] = sym_info
meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
return meta
LM_13_OBJECTS = [
"ape",
"benchvise",
"camera",
"can",
"cat",
"driller",
"duck",
"eggbox",
"glue",
"holepuncher",
"iron",
"lamp",
"phone",
] # no bowl, cup
LM_OCC_OBJECTS = [
"ape",
"can",
"cat",
"driller",
"duck",
"eggbox",
"glue",
"holepuncher",
]
lm_model_root = "BOP_DATASETS/lm/models/"
lmo_model_root = "BOP_DATASETS/lmo/models/"
################################################################################
SPLITS_LM_EGL = dict(
lm_egl_13_train=dict(
name="lm_egl_13_train",
objs=LM_13_OBJECTS, # selected objects
dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/train_egl"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
xyz_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/train_egl/xyz_crop"),
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
depth_factor=10000.0,
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_scene=True,
ref_key="lm_full",
),
lmo_egl_train=dict(
name="lmo_egl_train",
objs=LM_OCC_OBJECTS, # selected objects
dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/train_egl"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
# NOTE: soft link to lm/train_egl
xyz_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/train_egl/xyz_crop"),
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
depth_factor=10000.0,
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_invalid=True,
ref_key="lmo_full",
),
)
# single obj splits
for obj in ref.lm_full.objects:
for split in ["train"]:
name = "lm_egl_{}_{}".format(obj, split)
if split in ["train"]:
filter_invalid = True
else:
raise ValueError("{}".format(split))
if name not in SPLITS_LM_EGL:
SPLITS_LM_EGL[name] = dict(
name=name,
objs=[obj], # only this obj
dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/train_egl"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
xyz_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/train_egl/xyz_crop"),
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
depth_factor=10000.0,
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_invalid=filter_invalid,
ref_key="lm_full",
)
# lmo single objs
for obj in ref.lmo_full.objects:
for split in ["train"]:
name = "lmo_egl_{}_{}".format(obj, split)
if split in ["train"]:
filter_invalid = True
else:
raise ValueError("{}".format(split))
if name not in SPLITS_LM_EGL:
SPLITS_LM_EGL[name] = dict(
name=name,
objs=[obj], # only this obj
dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/train_egl"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
# NOTE: soft link to lm/train_egl
xyz_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/train_egl/xyz_crop"),
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
depth_factor=10000.0,
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_invalid=filter_invalid,
ref_key="lmo_full",
)
def register_with_name_cfg(name, data_cfg=None):
"""Assume pre-defined datasets live in `./datasets`.
Args:
name: datasnet_name,
data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
otherwise requires data_cfg
data_cfg can be set in cfg.DATA_CFG.name
"""
dprint("register dataset: {}".format(name))
if name in SPLITS_LM_EGL:
used_cfg = SPLITS_LM_EGL[name]
else:
assert data_cfg is not None, f"dataset name {name} is not registered"
used_cfg = data_cfg
DatasetCatalog.register(name, LM_SYN_EGL_Dataset(used_cfg))
# something like eval_types
MetadataCatalog.get(name).set(
ref_key=used_cfg["ref_key"],
objs=used_cfg["objs"],
eval_error_types=["ad", "rete", "proj"],
evaluator_type="bop",
**get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
)
def get_available_datasets():
return list(SPLITS_LM_EGL.keys())
#### tests ###############################################
def test_vis():
dset_name = sys.argv[1]
assert dset_name in DatasetCatalog.list()
meta = MetadataCatalog.get(dset_name)
dprint("MetadataCatalog: ", meta)
objs = meta.objs
t_start = time.perf_counter()
dicts = DatasetCatalog.get(dset_name)
logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
dirname = "output/{}-data-vis".format(dset_name)
os.makedirs(dirname, exist_ok=True)
for d in dicts:
img = read_image_mmcv(d["file_name"], format="BGR")
depth = mmcv.imread(d["depth_file"], "unchanged") / 10000.0
imH, imW = img.shape[:2]
annos = d["annotations"]
masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos]
bboxes = [anno["bbox"] for anno in annos]
bbox_modes = [anno["bbox_mode"] for anno in annos]
bboxes_xyxy = np.array(
[BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)]
)
kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
quats = [anno["quat"] for anno in annos]
transes = [anno["trans"] for anno in annos]
Rs = [quat2mat(quat) for quat in quats]
# 0-based label
cat_ids = [anno["category_id"] for anno in annos]
K = d["cam"]
kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)]
# # TODO: visualize pose and keypoints
labels = [objs[cat_id] for cat_id in cat_ids]
for _i in range(len(annos)):
img_vis = vis_image_mask_bbox_cv2(
img,
masks[_i : _i + 1],
bboxes=bboxes_xyxy[_i : _i + 1],
labels=labels[_i : _i + 1],
)
img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i])
xyz_path = annos[_i]["xyz_path"]
xyz_info = mmcv.load(xyz_path)
x1, y1, x2, y2 = xyz_info["xyxy"]
xyz_crop = xyz_info["xyz_crop"].astype(np.float32)
xyz = np.zeros((imH, imW, 3), dtype=np.float32)
xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop
xyz_show = get_emb_show(xyz)
xyz_crop_show = get_emb_show(xyz_crop)
img_xyz = img.copy() / 255.0
mask_xyz = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) | (xyz[:, :, 2] != 0)).astype("uint8")
fg_idx = np.where(mask_xyz != 0)
img_xyz[fg_idx[0], fg_idx[1], :] = xyz_show[fg_idx[0], fg_idx[1], :3]
img_xyz_crop = img_xyz[y1 : y2 + 1, x1 : x2 + 1, :]
img_vis_crop = img_vis[y1 : y2 + 1, x1 : x2 + 1, :]
# diff mask
diff_mask_xyz = np.abs(masks[_i] - mask_xyz)[y1 : y2 + 1, x1 : x2 + 1]
grid_show(
[
img[:, :, [2, 1, 0]],
img_vis[:, :, [2, 1, 0]],
img_vis_kpts2d[:, :, [2, 1, 0]],
depth,
# xyz_show,
diff_mask_xyz,
xyz_crop_show,
img_xyz[:, :, [2, 1, 0]],
img_xyz_crop[:, :, [2, 1, 0]],
img_vis_crop,
],
[
"img",
"vis_img",
"img_vis_kpts2d",
"depth",
"diff_mask_xyz",
"xyz_crop_show",
"img_xyz",
"img_xyz_crop",
"img_vis_crop",
],
row=3,
col=3,
)
if __name__ == "__main__":
"""Test the dataset loader.
Usage:
python -m this_module dataset_name
"""
from lib.vis_utils.image import grid_show
from lib.utils.setup_logger import setup_my_logger
import detectron2.data.datasets # noqa # add pre-defined metadata
from lib.vis_utils.image import vis_image_mask_bbox_cv2
from core.utils.utils import get_emb_show
from core.utils.data_utils import read_image_mmcv
print("sys.argv:", sys.argv)
logger = setup_my_logger(name="core")
register_with_name_cfg(sys.argv[1])
print("dataset catalog: ", DatasetCatalog.list())
test_vis()
import hashlib
import logging
import os
import os.path as osp
import sys
cur_dir = osp.dirname(osp.abspath(__file__))
PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../.."))
sys.path.insert(0, PROJ_ROOT)
import time
from collections import OrderedDict
import mmcv
import numpy as np
from tqdm import tqdm
from transforms3d.quaternions import mat2quat, quat2mat
import random
import ref
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode
from lib.pysixd import inout, misc
from lib.utils.mask_utils import (
binary_mask_to_rle,
cocosegm2mask,
mask2bbox_xywh,
)
from lib.utils.utils import dprint, iprint, lazy_property
logger = logging.getLogger(__name__)
DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
class LM_SYN_IMGN_Dataset(object):
"""lm synthetic data, imgn(imagine) from DeepIM."""
def __init__(self, data_cfg):
"""
Set with_depth and with_masks default to True,
and decide whether to load them into dataloader/network later
with_masks:
"""
self.name = data_cfg["name"]
self.data_cfg = data_cfg
self.objs = data_cfg["objs"] # selected objects
self.ann_files = data_cfg["ann_files"] # idx files with image ids
self.image_prefixes = data_cfg["image_prefixes"]
self.xyz_prefixes = data_cfg["xyz_prefixes"]
self.dataset_root = data_cfg["dataset_root"] # lm_imgn
self.models_root = data_cfg["models_root"] # BOP_DATASETS/lm/models
self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001
self.with_masks = data_cfg["with_masks"] # True (load masks but may not use it)
self.with_depth = data_cfg["with_depth"] # True (load depth path here, but may not use it)
self.depth_factor = data_cfg["depth_factor"] # 1000.0
self.cam = data_cfg["cam"] #
self.height = data_cfg["height"] # 480
self.width = data_cfg["width"] # 640
self.cache_dir = data_cfg["cache_dir"] # .cache
self.use_cache = data_cfg["use_cache"] # True
# sample uniformly to get n items
self.n_per_obj = data_cfg.get("n_per_obj", 1000)
self.filter_invalid = data_cfg["filter_invalid"]
self.filter_scene = data_cfg.get("filter_scene", False)
##################################################
if self.cam is None:
self.cam = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]])
# NOTE: careful! Only the selected objects
self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs]
# map selected objs to [0, num_objs-1]
self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map
self.label2cat = {label: cat for cat, label in self.cat2label.items()}
self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
##########################################################
def __call__(self): # LM_SYN_IMGN_Dataset
"""Load light-weight instance annotations of all images into a list of
dicts in Detectron2 format.
Do not load heavy data into memory in this file, since we will
load the annotations of all images into memory.
"""
# cache the dataset_dicts to avoid loading masks from files
hashed_file_name = hashlib.md5(
(
"".join([str(fn) for fn in self.objs])
+ "dataset_dicts_{}_{}_{}_{}_{}_{}".format(
self.name,
self.dataset_root,
self.with_masks,
self.with_depth,
self.n_per_obj,
__name__,
)
).encode("utf-8")
).hexdigest()
cache_path = osp.join(
self.dataset_root,
"dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name),
)
if osp.exists(cache_path) and self.use_cache:
logger.info("load cached dataset dicts from {}".format(cache_path))
return mmcv.load(cache_path)
t_start = time.perf_counter()
logger.info("loading dataset dicts: {}".format(self.name))
self.num_instances_without_valid_segmentation = 0
self.num_instances_without_valid_box = 0
dataset_dicts = [] #######################################################
assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}"
assert len(self.ann_files) == len(self.xyz_prefixes), f"{len(self.ann_files)} != {len(self.xyz_prefixes)}"
for ann_file, scene_root, xyz_root in zip(self.ann_files, self.image_prefixes, self.xyz_prefixes):
# linemod each scene is an object
with open(ann_file, "r") as f_ann:
indices = [line.strip("\r\n").split()[-1] for line in f_ann.readlines()] # string ids
# sample uniformly (equal space)
if self.n_per_obj > 0:
sample_num = min(self.n_per_obj, len(indices))
sel_indices_idx = np.linspace(0, len(indices) - 1, sample_num, dtype=np.int32)
sel_indices = [indices[int(_i)] for _i in sel_indices_idx]
else:
sel_indices = indices
for im_id in tqdm(sel_indices):
rgb_path = osp.join(scene_root, "{}-color.png").format(im_id)
assert osp.exists(rgb_path), rgb_path
depth_path = osp.join(scene_root, "{}-depth.png".format(im_id))
obj_name = im_id.split("/")[0]
if obj_name == "benchviseblue":
obj_name = "benchvise"
obj_id = ref.lm_full.obj2id[obj_name]
if self.filter_scene:
if obj_name not in self.objs:
continue
record = {
"dataset_name": self.name,
"file_name": osp.relpath(rgb_path, PROJ_ROOT),
"depth_file": osp.relpath(depth_path, PROJ_ROOT),
"height": self.height,
"width": self.width,
"image_id": im_id.split("/")[-1],
"scene_im_id": im_id,
"cam": self.cam,
"img_type": "syn",
}
cur_label = self.obj2label[obj_name] # 0-based label
pose_path = osp.join(scene_root, "{}-pose.txt".format(im_id))
pose = np.loadtxt(pose_path, skiprows=1)
R = pose[:3, :3]
t = pose[:3, 3]
quat = mat2quat(R).astype("float32")
proj = (record["cam"] @ t.T).T
proj = proj[:2] / proj[2]
depth = mmcv.imread(depth_path, "unchanged") / 1000.0
mask = (depth > 0).astype(np.uint8)
bbox_obj = mask2bbox_xywh(mask)
x1, y1, w, h = bbox_obj
if self.filter_invalid:
if h <= 1 or w <= 1:
self.num_instances_without_valid_box += 1
continue
area = mask.sum()
if area < 3: # filter out too small or nearly invisible instances
self.num_instances_without_valid_segmentation += 1
continue
mask_rle = binary_mask_to_rle(mask, compressed=True)
xyz_path = osp.join(xyz_root, f"{im_id}-xyz.pkl")
assert osp.exists(xyz_path), xyz_path
inst = {
"category_id": cur_label, # 0-based label
"bbox": bbox_obj, # TODO: load both bbox_obj and bbox_visib
"bbox_mode": BoxMode.XYWH_ABS,
"pose": pose,
"quat": quat,
"trans": t,
"centroid_2d": proj, # absolute (cx, cy)
"segmentation": mask_rle,
"mask_full": mask_rle, # only one object
"visib_fract": 1.0,
"xyz_path": xyz_path,
}
model_info = self.models_info[str(obj_id)]
inst["model_info"] = model_info
# TODO: using full mask and full xyz
for key in ["bbox3d_and_center"]:
inst[key] = self.models[cur_label][key]
record["annotations"] = [inst]
dataset_dicts.append(record)
if self.num_instances_without_valid_segmentation > 0:
logger.warning(
"Filtered out {} instances without valid segmentation. "
"There might be issues in your dataset generation process.".format(
self.num_instances_without_valid_segmentation
)
)
if self.num_instances_without_valid_box > 0:
logger.warning(
"Filtered out {} instances without valid box. "
"There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
)
##########################################################################
# if self.num_to_load > 0:
# self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
# random.shuffle(dataset_dicts)
# dataset_dicts = dataset_dicts[: self.num_to_load]
logger.info(
"loaded dataset dicts, num_images: {}, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start)
)
mmcv.dump(dataset_dicts, cache_path, protocol=4)
logger.info("Dumped dataset_dicts to {}".format(cache_path))
return dataset_dicts
@lazy_property
def models_info(self):
models_info_path = osp.join(self.models_root, "models_info.json")
assert osp.exists(models_info_path), models_info_path
models_info = mmcv.load(models_info_path) # key is str(obj_id)
return models_info
@lazy_property
def models(self):
"""Load models into a list."""
cache_path = osp.join(self.models_root, "models_{}.pkl".format("_".join(self.objs)))
if osp.exists(cache_path) and self.use_cache:
# logger.info("load cached object models from {}".format(cache_path))
return mmcv.load(cache_path)
models = []
for obj_name in self.objs:
model = inout.load_ply(
osp.join(
self.models_root,
f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply",
),
vertex_scale=self.scale_to_meter,
)
# NOTE: the bbox3d_and_center is not obtained from centered vertices
# for BOP models, not a big problem since they had been centered
model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
models.append(model)
logger.info("cache models to {}".format(cache_path))
mmcv.dump(models, cache_path, protocol=4)
return models
def image_aspect_ratio(self):
# return 1
return self.width / self.height # 4/3
########### register datasets ############################################################
def get_lm_metadata(obj_names, ref_key):
"""task specific metadata."""
data_ref = ref.__dict__[ref_key]
cur_sym_infos = {} # label based key
loaded_models_info = data_ref.get_models_info()
for i, obj_name in enumerate(obj_names):
obj_id = data_ref.obj2id[obj_name]
model_info = loaded_models_info[str(obj_id)]
if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
else:
sym_info = None
cur_sym_infos[i] = sym_info
meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
return meta
LM_13_OBJECTS = [
"ape",
"benchvise",
"camera",
"can",
"cat",
"driller",
"duck",
"eggbox",
"glue",
"holepuncher",
"iron",
"lamp",
"phone",
] # no bowl, cup
################################################################################
SPLITS_LM_IMGN_13 = dict(
lm_imgn_13_train_1k_per_obj=dict(
name="lm_imgn_13_train_1k_per_obj",
dataset_root=osp.join(DATASETS_ROOT, "lm_imgn/"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
objs=LM_13_OBJECTS, # selected objects
ann_files=[
osp.join(
DATASETS_ROOT,
"lm_imgn/image_set/{}_{}.txt".format("train", _obj),
)
for _obj in LM_13_OBJECTS
],
image_prefixes=[osp.join(DATASETS_ROOT, "lm_imgn/imgn") for _obj in LM_13_OBJECTS],
xyz_prefixes=[osp.join(DATASETS_ROOT, "lm_imgn/xyz_crop_imgn/") for _obj in LM_13_OBJECTS],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
depth_factor=1000.0,
cam=ref.lm_full.camera_matrix,
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
n_per_obj=1000, # 1000 per class
filter_scene=True,
filter_invalid=False,
ref_key="lm_full",
)
)
# single obj splits
for obj in ref.lm_full.objects:
for split in ["train"]:
name = "lm_imgn_13_{}_{}_1k".format(obj, split)
ann_files = [osp.join(DATASETS_ROOT, "lm_imgn/image_set/{}_{}.txt".format(split, obj))]
if split in ["train"]:
filter_invalid = True
elif split in ["test"]:
filter_invalid = False
else:
raise ValueError("{}".format(split))
if name not in SPLITS_LM_IMGN_13:
SPLITS_LM_IMGN_13[name] = dict(
name=name,
dataset_root=osp.join(DATASETS_ROOT, "lm_imgn/"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
objs=[obj], # only this obj
ann_files=ann_files,
image_prefixes=[osp.join(DATASETS_ROOT, "lm_imgn/imgn/")],
xyz_prefixes=[osp.join(DATASETS_ROOT, "lm_imgn/xyz_crop_imgn/")],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
depth_factor=1000.0,
cam=ref.lm_full.camera_matrix,
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
n_per_obj=1000,
filter_invalid=False,
filter_scene=True,
ref_key="lm_full",
)
def register_with_name_cfg(name, data_cfg=None):
"""Assume pre-defined datasets live in `./datasets`.
Args:
name: datasnet_name,
data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
otherwise requires data_cfg
data_cfg can be set in cfg.DATA_CFG.name
"""
dprint("register dataset: {}".format(name))
if name in SPLITS_LM_IMGN_13:
used_cfg = SPLITS_LM_IMGN_13[name]
else:
assert data_cfg is not None, f"dataset name {name} is not registered"
used_cfg = data_cfg
DatasetCatalog.register(name, LM_SYN_IMGN_Dataset(used_cfg))
# something like eval_types
MetadataCatalog.get(name).set(
ref_key=used_cfg["ref_key"],
objs=used_cfg["objs"],
eval_error_types=["ad", "rete", "proj"],
evaluator_type="bop",
**get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
)
def get_available_datasets():
return list(SPLITS_LM_IMGN_13.keys())
#### tests ###############################################
def test_vis():
dset_name = sys.argv[1]
assert dset_name in DatasetCatalog.list()
meta = MetadataCatalog.get(dset_name)
dprint("MetadataCatalog: ", meta)
objs = meta.objs
t_start = time.perf_counter()
dicts = DatasetCatalog.get(dset_name)
logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
dirname = "output/{}-data-vis".format(dset_name)
os.makedirs(dirname, exist_ok=True)
for d in dicts:
img = read_image_mmcv(d["file_name"], format="BGR")
depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0
anno = d["annotations"][0] # only one instance per image
imH, imW = img.shape[:2]
mask = cocosegm2mask(anno["segmentation"], imH, imW)
bbox = anno["bbox"]
bbox_mode = anno["bbox_mode"]
bbox_xyxy = np.array(BoxMode.convert(bbox, bbox_mode, BoxMode.XYXY_ABS))
kpt3d = anno["bbox3d_and_center"]
quat = anno["quat"]
trans = anno["trans"]
R = quat2mat(quat)
# 0-based label
cat_id = anno["category_id"]
K = d["cam"]
kpt_2d = misc.project_pts(kpt3d, K, R, trans)
# # TODO: visualize pose and keypoints
label = objs[cat_id]
# img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels)
img_vis = vis_image_mask_bbox_cv2(img, [mask], bboxes=[bbox_xyxy], labels=[label])
img_vis_kpt2d = img.copy()
img_vis_kpt2d = misc.draw_projected_box3d(
img_vis_kpt2d,
kpt_2d,
middle_color=None,
bottom_color=(128, 128, 128),
)
xyz_info = mmcv.load(anno["xyz_path"])
xyz = np.zeros((imH, imW, 3), dtype=np.float32)
xyz_crop = xyz_info["xyz_crop"].astype(np.float32)
x1, y1, x2, y2 = xyz_info["xyxy"]
xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop
xyz_show = get_emb_show(xyz)
grid_show(
[
img[:, :, [2, 1, 0]],
img_vis[:, :, [2, 1, 0]],
img_vis_kpt2d[:, :, [2, 1, 0]],
depth,
xyz_show,
],
["img", "vis_img", "img_vis_kpts2d", "depth", "emb_show"],
row=2,
col=3,
)
if __name__ == "__main__":
"""Test the dataset loader.
Usage:
python -m core.datasets.lm_syn_imgn dataset_name
"""
from lib.vis_utils.image import grid_show
from lib.utils.setup_logger import setup_my_logger
import detectron2.data.datasets # noqa # add pre-defined metadata
from lib.vis_utils.image import vis_image_mask_bbox_cv2
from core.utils.utils import get_emb_show
from core.utils.data_utils import read_image_mmcv
print("sys.argv:", sys.argv)
logger = setup_my_logger(name="core")
register_with_name_cfg(sys.argv[1])
print("dataset catalog: ", DatasetCatalog.list())
test_vis()
......@@ -3,12 +3,7 @@ import os.path as osp
import mmcv
from detectron2.data import DatasetCatalog
from . import (
lm_syn_imgn,
lm_dataset_d2,
# lm_syn_egl,
lm_pbr,
lm_blender,
# lm_dataset_crop_d2,
ycbv_pbr,
ycbv_d2,
ycbv_bop_test,
......@@ -16,8 +11,6 @@ from . import (
hb_bop_val,
hb_bop_test,
hb_bench_driller_phone_d2,
# duck_frames,
# lm_new_duck_pbr,
tudl_train_real,
tudl_pbr,
tudl_bop_test,
......
import hashlib
import logging
import os
import os.path as osp
import sys
import time
from collections import OrderedDict
import mmcv
import numpy as np
from tqdm import tqdm
from transforms3d.quaternions import mat2quat, quat2mat
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode
cur_dir = osp.dirname(osp.abspath(__file__))
PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../../.."))
sys.path.insert(0, PROJ_ROOT)
import ref
from lib.pysixd import inout, misc
from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask
from lib.utils.utils import dprint, lazy_property
logger = logging.getLogger(__name__)
DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
class LM_BLENDER_Dataset(object):
"""lm blender data, from pvnet-rendering."""
def __init__(self, data_cfg):
"""
Set with_depth and with_masks default to True,
and decide whether to load them into dataloader/network later
with_masks:
"""
self.name = data_cfg["name"]
self.data_cfg = data_cfg
self.objs = data_cfg["objs"] # selected objects
self.ann_files = data_cfg["ann_files"] # json files with image ids and pose/bbox
self.image_prefixes = data_cfg["image_prefixes"]
self.dataset_root = data_cfg["dataset_root"] # lm_renders_blender/
self.models_root = data_cfg["models_root"] # BOP_DATASETS/lm/models
self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001
self.with_masks = data_cfg["with_masks"] # True (load masks but may not use it)
self.with_depth = data_cfg["with_depth"] # True (load depth path here, but may not use it)
self.depth_factor = data_cfg["depth_factor"] # 1000.0
self.cam = data_cfg["cam"] #
self.height = data_cfg["height"] # 480
self.width = data_cfg["width"] # 640
self.cache_dir = data_cfg["cache_dir"] # .cache
self.use_cache = data_cfg["use_cache"] # True
# sample uniformly to get n items
self.n_per_obj = data_cfg.get("n_per_obj", 10000)
self.filter_invalid = data_cfg["filter_invalid"]
##################################################
if self.cam is None:
self.cam = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]])
# NOTE: careful! Only the selected objects
self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs]
# map selected objs to [0, num_objs-1]
self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map
self.label2cat = {label: cat for cat, label in self.cat2label.items()}
self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
##########################################################
def __call__(self): # LM_BLENDER
"""Load light-weight instance annotations of all images into a list of
dicts in Detectron2 format.
Do not load heavy data into memory in this file, since we will
load the annotations of all images into memory.
"""
# cache the dataset_dicts to avoid loading masks from files
hashed_file_name = hashlib.md5(
(
"".join([str(fn) for fn in self.objs])
+ "dataset_dicts_{}_{}_{}_{}_{}_{}".format(
self.name,
self.dataset_root,
self.with_masks,
self.with_depth,
self.n_per_obj,
__name__,
)
).encode("utf-8")
).hexdigest()
cache_path = osp.join(
self.cache_dir,
"dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name),
)
if osp.exists(cache_path) and self.use_cache:
logger.info("load cached dataset dicts from {}".format(cache_path))
return mmcv.load(cache_path)
t_start = time.perf_counter()
logger.info("loading dataset dicts: {}".format(self.name))
self.num_instances_without_valid_segmentation = 0
self.num_instances_without_valid_box = 0
dataset_dicts = [] #######################################################
assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}"
for ann_file, scene_root in zip(tqdm(self.ann_files), self.image_prefixes):
# each scene is an object
assert osp.exists(ann_file), ann_file
scene_gt_dict = mmcv.load(ann_file)
# sample uniformly (equal space)
indices = list(scene_gt_dict.keys())
if self.n_per_obj > 0:
sample_num = min(self.n_per_obj, len(scene_gt_dict))
sel_indices_idx = np.linspace(0, len(scene_gt_dict) - 1, sample_num, dtype=np.int32)
sel_indices = [indices[int(_i)] for _i in sel_indices_idx]
else:
sel_indices = indices
for str_im_id in tqdm(sel_indices):
int_im_id = int(str_im_id)
rgb_path = osp.join(scene_root, "{}.jpg").format(str_im_id)
assert osp.exists(rgb_path), rgb_path
depth_path = osp.join(scene_root, "{}_depth_opengl.png".format(str_im_id))
obj_name = osp.basename(ann_file).split("_")[0] # obj_gt.json
obj_id = ref.lm_full.obj2id[obj_name]
if obj_name not in self.objs:
continue
record = {
"dataset_name": self.name,
"file_name": osp.relpath(rgb_path, PROJ_ROOT),
"depth_file": osp.relpath(depth_path, PROJ_ROOT),
"height": self.height,
"width": self.width,
"image_id": int_im_id,
"scene_im_id": f"{obj_id}/{int_im_id}",
"cam": self.cam,
"img_type": "syn_blender", # has bg
}
cur_label = self.obj2label[obj_name] # 0-based label
anno = scene_gt_dict[str_im_id][0] # only one object
R = np.array(anno["cam_R_m2c"]).reshape(3, 3)
t = np.array(anno["cam_t_m2c"]).reshape(-1) / 1000
pose = np.hstack([R, t.reshape(3, 1)])
quat = mat2quat(R).astype("float32")
proj = (record["cam"] @ t.T).T
proj = proj[:2] / proj[2]
bbox_visib = anno["bbox_visib"]
x1, y1, w, h = bbox_visib
if self.filter_invalid:
if h <= 1 or w <= 1:
self.num_instances_without_valid_box += 1
continue
mask_path = osp.join(scene_root, "{}_mask_opengl.png".format(str_im_id))
mask = mmcv.imread(mask_path, "unchanged")
mask = (mask > 0).astype(np.uint8)
area = mask.sum()
if area < 3: # filter out too small or nearly invisible instances
self.num_instances_without_valid_segmentation += 1
continue
mask_rle = binary_mask_to_rle(mask, compressed=True)
visib_fract = anno.get("visib_fract", 1.0)
inst = {
"category_id": cur_label, # 0-based label
"bbox": bbox_visib, # TODO: load both bbox_obj and bbox_visib
"bbox_mode": BoxMode.XYWH_ABS,
"pose": pose,
"quat": quat,
"trans": t,
"centroid_2d": proj, # absolute (cx, cy)
"segmentation": mask_rle,
"visib_fract": visib_fract,
}
model_info = self.models_info[str(obj_id)]
inst["model_info"] = model_info
for key in ["bbox3d_and_center"]:
inst[key] = self.models[cur_label][key]
record["annotations"] = [inst]
dataset_dicts.append(record)
if self.num_instances_without_valid_segmentation > 0:
logger.warning(
"Filtered out {} instances without valid segmentation. "
"There might be issues in your dataset generation process.".format(
self.num_instances_without_valid_segmentation
)
)
if self.num_instances_without_valid_box > 0:
logger.warning(
"Filtered out {} instances without valid box. "
"There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
)
##########################################################################
# if self.num_to_load > 0:
# self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
# random.shuffle(dataset_dicts)
# dataset_dicts = dataset_dicts[: self.num_to_load]
logger.info(
"loaded dataset dicts, num_images: {}, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start)
)
mmcv.dump(dataset_dicts, cache_path, protocol=4)
logger.info("Dumped dataset_dicts to {}".format(cache_path))
return dataset_dicts
@lazy_property
def models_info(self):
models_info_path = osp.join(self.models_root, "models_info.json")
assert osp.exists(models_info_path), models_info_path
models_info = mmcv.load(models_info_path) # key is str(obj_id)
return models_info
@lazy_property
def models(self):
"""Load models into a list."""
cache_path = osp.join(self.models_root, "models_{}.pkl".format("_".join(self.objs)))
if osp.exists(cache_path) and self.use_cache:
# dprint("{}: load cached object models from {}".format(self.name, cache_path))
return mmcv.load(cache_path)
models = []
for obj_name in self.objs:
model = inout.load_ply(
osp.join(
self.models_root,
f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply",
),
vertex_scale=self.scale_to_meter,
)
# NOTE: the bbox3d_and_center is not obtained from centered vertices
# for BOP models, not a big problem since they had been centered
model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
models.append(model)
logger.info("cache models to {}".format(cache_path))
mmcv.dump(models, cache_path, protocol=4)
return models
def image_aspect_ratio(self):
return self.width / self.height # 4/3
########### register datasets ############################################################
def get_lm_metadata(obj_names, ref_key):
"""task specific metadata."""
data_ref = ref.__dict__[ref_key]
cur_sym_infos = {} # label based key
loaded_models_info = data_ref.get_models_info()
for i, obj_name in enumerate(obj_names):
obj_id = data_ref.obj2id[obj_name]
model_info = loaded_models_info[str(obj_id)]
if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
else:
sym_info = None
cur_sym_infos[i] = sym_info
meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
return meta
LM_13_OBJECTS = [
"ape",
"benchvise",
"camera",
"can",
"cat",
"driller",
"duck",
"eggbox",
"glue",
"holepuncher",
"iron",
"lamp",
"phone",
] # no bowl, cup
LM_OCC_OBJECTS = [
"ape",
"can",
"cat",
"driller",
"duck",
"eggbox",
"glue",
"holepuncher",
]
################################################################################
SPLITS_LM_BLENDER = dict(
lm_blender_13_train=dict(
name="lm_blender_13_train", # BB8 training set
dataset_root=osp.join(DATASETS_ROOT, "lm_renders_blender/"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
objs=LM_13_OBJECTS, # selected objects
ann_files=[
osp.join(
DATASETS_ROOT,
"lm_renders_blender/renders/{}_gt.json".format(_obj),
)
for _obj in LM_13_OBJECTS
],
image_prefixes=[
osp.join(DATASETS_ROOT, "lm_renders_blender/renders/{}".format(_obj)) for _obj in LM_13_OBJECTS
],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
depth_factor=1000.0,
cam=ref.lm_full.camera_matrix,
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
n_per_obj=-1, # num per class, -1 for all 10k
filter_invalid=False,
ref_key="lm_full",
),
lmo_blender_train=dict(
name="lmo_blender_train",
dataset_root=osp.join(DATASETS_ROOT, "lm_renders_blender/"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
objs=LM_OCC_OBJECTS, # selected objects
ann_files=[
osp.join(
DATASETS_ROOT,
"lm_renders_blender/renders/{}_gt.json".format(_obj),
)
for _obj in LM_OCC_OBJECTS
],
image_prefixes=[
osp.join(DATASETS_ROOT, "lm_renders_blender/renders/{}".format(_obj)) for _obj in LM_OCC_OBJECTS
],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
depth_factor=1000.0,
cam=ref.lmo_full.camera_matrix,
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
n_per_obj=-1, # n per class, -1 for all 10k
filter_invalid=False,
ref_key="lmo_full",
),
)
# single obj splits
for obj in ref.lm_full.objects:
for split in ["train"]:
for name_prefix in ["lm", "lmo"]:
name = "{}_blender_{}_{}".format(name_prefix, obj, split)
ref_key = f"{name_prefix}_full"
ann_files = [
osp.join(
DATASETS_ROOT,
"lm_renders_blender/renders/{}_gt.json".format(obj),
)
]
if split in ["train"]:
filter_invalid = True
else:
raise ValueError("{}".format(split))
if name not in SPLITS_LM_BLENDER:
SPLITS_LM_BLENDER[name] = dict(
name=name,
dataset_root=osp.join(DATASETS_ROOT, "lm_renders_blender/"),
models_root=osp.join(DATASETS_ROOT, f"BOP_DATASETS/{name_prefix}/models"),
objs=[obj], # only this obj
ann_files=ann_files,
image_prefixes=[osp.join(DATASETS_ROOT, f"lm_renders_blender/renders/{obj}")],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
depth_factor=1000.0,
cam=ref.__dict__[ref_key].camera_matrix,
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
n_per_obj=-1,
filter_invalid=False,
ref_key=ref_key,
)
def register_with_name_cfg(name, data_cfg=None):
"""Assume pre-defined datasets live in `./datasets`.
Args:
name: datasnet_name,
data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
otherwise requires data_cfg
data_cfg can be set in cfg.DATA_CFG.name
"""
dprint("register dataset: {}".format(name))
if name in SPLITS_LM_BLENDER:
used_cfg = SPLITS_LM_BLENDER[name]
else:
assert data_cfg is not None, f"dataset name {name} is not registered"
used_cfg = data_cfg
DatasetCatalog.register(name, LM_BLENDER_Dataset(used_cfg))
# something like eval_types
MetadataCatalog.get(name).set(
ref_key=used_cfg["ref_key"],
objs=used_cfg["objs"],
eval_error_types=["ad", "rete", "proj"],
evaluator_type="coco_bop",
**get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
)
def get_available_datasets():
return list(SPLITS_LM_BLENDER.keys())
#### tests ###############################################
def test_vis():
dset_name = sys.argv[1]
assert dset_name in DatasetCatalog.list()
meta = MetadataCatalog.get(dset_name)
dprint("MetadataCatalog: ", meta)
objs = meta.objs
t_start = time.perf_counter()
dicts = DatasetCatalog.get(dset_name)
logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
dirname = "output/{}-data-vis".format(dset_name)
os.makedirs(dirname, exist_ok=True)
for d in dicts:
img = read_image_mmcv(d["file_name"], format="BGR")
depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0
anno = d["annotations"][0] # only one instance per image
imH, imW = img.shape[:2]
mask = cocosegm2mask(anno["segmentation"], imH, imW)
bbox = anno["bbox"]
bbox_mode = anno["bbox_mode"]
bbox_xyxy = np.array(BoxMode.convert(bbox, bbox_mode, BoxMode.XYXY_ABS))
kpt3d = anno["bbox3d_and_center"]
quat = anno["quat"]
trans = anno["trans"]
R = quat2mat(quat)
# 0-based label
cat_id = anno["category_id"]
K = d["cam"]
kpt_2d = misc.project_pts(kpt3d, K, R, trans)
# # TODO: visualize pose and keypoints
label = objs[cat_id]
# img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels)
img_vis = vis_image_mask_bbox_cv2(img, [mask], bboxes=[bbox_xyxy], labels=[label])
img_vis_kpt2d = img.copy()
img_vis_kpt2d = misc.draw_projected_box3d(
img_vis_kpt2d,
kpt_2d,
middle_color=None,
bottom_color=(128, 128, 128),
)
grid_show(
[
img[:, :, [2, 1, 0]],
img_vis[:, :, [2, 1, 0]],
img_vis_kpt2d[:, :, [2, 1, 0]],
depth,
],
["img", "vis_img", "img_vis_kpts2d", "depth"],
row=2,
col=2,
)
if __name__ == "__main__":
"""Test the dataset loader.
Usage:
python -m core.datasets.lm_blender dataset_name
"""
from lib.vis_utils.image import grid_show
from lib.utils.setup_logger import setup_my_logger
import detectron2.data.datasets # noqa # add pre-defined metadata
from lib.vis_utils.image import vis_image_mask_bbox_cv2
from core.utils.data_utils import read_image_mmcv
print("sys.argv:", sys.argv)
logger = setup_my_logger(name="core")
register_with_name_cfg(sys.argv[1])
print("dataset catalog: ", DatasetCatalog.list())
test_vis()
import hashlib
import logging
import os
import os.path as osp
import sys
import time
from collections import OrderedDict
import mmcv
import numpy as np
from tqdm import tqdm
from transforms3d.quaternions import mat2quat, quat2mat
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode
cur_dir = osp.dirname(osp.abspath(__file__))
PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../../.."))
sys.path.insert(0, PROJ_ROOT)
import ref
from lib.pysixd import inout, misc
from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask
from lib.utils.utils import dprint, iprint, lazy_property
logger = logging.getLogger(__name__)
DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
class LM_Dataset(object):
"""lm splits."""
def __init__(self, data_cfg):
"""
Set with_depth and with_masks default to True,
and decide whether to load them into dataloader/network later
with_masks:
"""
self.name = data_cfg["name"]
self.data_cfg = data_cfg
self.objs = data_cfg["objs"] # selected objects
self.ann_files = data_cfg["ann_files"] # idx files with image ids
self.image_prefixes = data_cfg["image_prefixes"]
self.xyz_prefixes = data_cfg["xyz_prefixes"]
self.dataset_root = data_cfg["dataset_root"] # BOP_DATASETS/lm/
assert osp.exists(self.dataset_root), self.dataset_root
self.models_root = data_cfg["models_root"] # BOP_DATASETS/lm/models
self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001
self.with_masks = data_cfg["with_masks"] # True (load masks but may not use it)
self.with_depth = data_cfg["with_depth"] # True (load depth path here, but may not use it)
self.height = data_cfg["height"] # 480
self.width = data_cfg["width"] # 640
self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache")) # .cache
self.use_cache = data_cfg.get("use_cache", True)
self.num_to_load = data_cfg["num_to_load"] # -1
self.filter_invalid = data_cfg["filter_invalid"]
self.filter_scene = data_cfg.get("filter_scene", False)
self.debug_im_id = data_cfg.get("debug_im_id", None)
##################################################
# NOTE: careful! Only the selected objects
self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs]
# map selected objs to [0, num_objs-1]
self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map
self.label2cat = {label: cat for cat, label in self.cat2label.items()}
self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
##########################################################
def __call__(self): # LM_Dataset
"""Load light-weight instance annotations of all images into a list of
dicts in Detectron2 format.
Do not load heavy data into memory in this file, since we will
load the annotations of all images into memory.
"""
# cache the dataset_dicts to avoid loading masks from files
hashed_file_name = hashlib.md5(
(
"".join([str(fn) for fn in self.objs])
+ "dataset_dicts_{}_{}_{}_{}_{}".format(
self.name,
self.dataset_root,
self.with_masks,
self.with_depth,
__name__,
)
).encode("utf-8")
).hexdigest()
cache_path = osp.join(
self.cache_dir,
"dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name),
)
if osp.exists(cache_path) and self.use_cache:
logger.info("load cached dataset dicts from {}".format(cache_path))
return mmcv.load(cache_path)
t_start = time.perf_counter()
logger.info("loading dataset dicts: {}".format(self.name))
self.num_instances_without_valid_segmentation = 0
self.num_instances_without_valid_box = 0
dataset_dicts = [] # ######################################################
assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}"
assert len(self.ann_files) == len(self.xyz_prefixes), f"{len(self.ann_files)} != {len(self.xyz_prefixes)}"
unique_im_id = 0
for ann_file, scene_root, xyz_root in zip(tqdm(self.ann_files), self.image_prefixes, self.xyz_prefixes):
# linemod each scene is an object
with open(ann_file, "r") as f_ann:
indices = [line.strip("\r\n") for line in f_ann.readlines()] # string ids
gt_dict = mmcv.load(osp.join(scene_root, "scene_gt.json"))
gt_info_dict = mmcv.load(osp.join(scene_root, "scene_gt_info.json")) # bbox_obj, bbox_visib
cam_dict = mmcv.load(osp.join(scene_root, "scene_camera.json"))
for im_id in tqdm(indices):
int_im_id = int(im_id)
str_im_id = str(int_im_id)
rgb_path = osp.join(scene_root, "rgb/{:06d}.png").format(int_im_id)
assert osp.exists(rgb_path), rgb_path
depth_path = osp.join(scene_root, "depth/{:06d}.png".format(int_im_id))
scene_id = int(rgb_path.split("/")[-3])
scene_im_id = f"{scene_id}/{int_im_id}"
if self.debug_im_id is not None:
if self.debug_im_id != scene_im_id:
continue
K = np.array(cam_dict[str_im_id]["cam_K"], dtype=np.float32).reshape(3, 3)
depth_factor = 1000.0 / cam_dict[str_im_id]["depth_scale"]
if self.filter_scene:
if scene_id not in self.cat_ids:
continue
record = {
"dataset_name": self.name,
"file_name": osp.relpath(rgb_path, PROJ_ROOT),
"depth_file": osp.relpath(depth_path, PROJ_ROOT),
"height": self.height,
"width": self.width,
"image_id": unique_im_id,
"scene_im_id": scene_im_id, # for evaluation
"cam": K,
"depth_factor": depth_factor,
"img_type": "real",
}
unique_im_id += 1
insts = []
for anno_i, anno in enumerate(gt_dict[str_im_id]):
obj_id = anno["obj_id"]
if obj_id not in self.cat_ids:
continue
cur_label = self.cat2label[obj_id] # 0-based label
R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3)
t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0
pose = np.hstack([R, t.reshape(3, 1)])
quat = mat2quat(R).astype("float32")
proj = (record["cam"] @ t.T).T
proj = proj[:2] / proj[2]
bbox_visib = gt_info_dict[str_im_id][anno_i]["bbox_visib"]
bbox_obj = gt_info_dict[str_im_id][anno_i]["bbox_obj"]
x1, y1, w, h = bbox_visib
if self.filter_invalid:
if h <= 1 or w <= 1:
self.num_instances_without_valid_box += 1
continue
mask_file = osp.join(
scene_root,
"mask/{:06d}_{:06d}.png".format(int_im_id, anno_i),
)
mask_visib_file = osp.join(
scene_root,
"mask_visib/{:06d}_{:06d}.png".format(int_im_id, anno_i),
)
assert osp.exists(mask_file), mask_file
assert osp.exists(mask_visib_file), mask_visib_file
# load mask visib
mask_single = mmcv.imread(mask_visib_file, "unchanged")
mask_single = mask_single.astype("bool")
area = mask_single.sum()
if area < 3: # filter out too small or nearly invisible instances
self.num_instances_without_valid_segmentation += 1
continue
mask_rle = binary_mask_to_rle(mask_single, compressed=True)
# load mask full
mask_full = mmcv.imread(mask_file, "unchanged")
mask_full = mask_full.astype("bool")
mask_full_rle = binary_mask_to_rle(mask_full, compressed=True)
inst = {
"category_id": cur_label, # 0-based label
"bbox": bbox_obj, # TODO: load both bbox_obj and bbox_visib
"bbox_mode": BoxMode.XYWH_ABS,
"pose": pose,
"quat": quat,
"trans": t,
"centroid_2d": proj, # absolute (cx, cy)
"segmentation": mask_rle,
"mask_full": mask_full_rle,
}
if "test" not in self.name.lower():
# if True:
xyz_path = osp.join(xyz_root, f"{int_im_id:06d}_{anno_i:06d}.pkl")
assert osp.exists(xyz_path), xyz_path
inst["xyz_path"] = xyz_path
model_info = self.models_info[str(obj_id)]
inst["model_info"] = model_info
# TODO: using full mask and full xyz
for key in ["bbox3d_and_center"]:
inst[key] = self.models[cur_label][key]
insts.append(inst)
if len(insts) == 0: # filter im without anno
continue
record["annotations"] = insts
dataset_dicts.append(record)
if self.num_instances_without_valid_segmentation > 0:
logger.warning(
"Filtered out {} instances without valid segmentation. "
"There might be issues in your dataset generation process.".format(
self.num_instances_without_valid_segmentation
)
)
if self.num_instances_without_valid_box > 0:
logger.warning(
"Filtered out {} instances without valid box. "
"There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
)
##########################################################################
if self.num_to_load > 0:
self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
dataset_dicts = dataset_dicts[: self.num_to_load]
logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start))
mmcv.mkdir_or_exist(osp.dirname(cache_path))
mmcv.dump(dataset_dicts, cache_path, protocol=4)
logger.info("Dumped dataset_dicts to {}".format(cache_path))
return dataset_dicts
@lazy_property
def models_info(self):
models_info_path = osp.join(self.models_root, "models_info.json")
assert osp.exists(models_info_path), models_info_path
models_info = mmcv.load(models_info_path) # key is str(obj_id)
return models_info
@lazy_property
def models(self):
"""Load models into a list."""
cache_path = osp.join(self.cache_dir, "models_{}.pkl".format("_".join(self.objs)))
if osp.exists(cache_path) and self.use_cache:
# dprint("{}: load cached object models from {}".format(self.name, cache_path))
return mmcv.load(cache_path)
models = []
for obj_name in self.objs:
model = inout.load_ply(
osp.join(
self.models_root,
f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply",
),
vertex_scale=self.scale_to_meter,
)
# NOTE: the bbox3d_and_center is not obtained from centered vertices
# for BOP models, not a big problem since they had been centered
model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
models.append(model)
logger.info("cache models to {}".format(cache_path))
mmcv.mkdir_or_exist(osp.dirname(cache_path))
mmcv.dump(models, cache_path, protocol=4)
return models
def image_aspect_ratio(self):
return self.width / self.height # 4/3
########### register datasets ############################################################
def get_lm_metadata(obj_names, ref_key):
"""task specific metadata."""
data_ref = ref.__dict__[ref_key]
cur_sym_infos = {} # label based key
loaded_models_info = data_ref.get_models_info()
for i, obj_name in enumerate(obj_names):
obj_id = data_ref.obj2id[obj_name]
model_info = loaded_models_info[str(obj_id)]
if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
else:
sym_info = None
cur_sym_infos[i] = sym_info
meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
return meta
LM_13_OBJECTS = [
"ape",
"benchvise",
"camera",
"can",
"cat",
"driller",
"duck",
"eggbox",
"glue",
"holepuncher",
"iron",
"lamp",
"phone",
] # no bowl, cup
LM_OCC_OBJECTS = [
"ape",
"can",
"cat",
"driller",
"duck",
"eggbox",
"glue",
"holepuncher",
]
################################################################################
SPLITS_LM = dict(
lm_13_train=dict(
name="lm_13_train",
dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
objs=LM_13_OBJECTS, # selected objects
ann_files=[
osp.join(
DATASETS_ROOT,
"BOP_DATASETS/lm/image_set/{}_{}.txt".format(_obj, "train"),
)
for _obj in LM_13_OBJECTS
],
image_prefixes=[
osp.join(
DATASETS_ROOT,
"BOP_DATASETS/lm/test/{:06d}".format(ref.lm_full.obj2id[_obj]),
)
for _obj in LM_13_OBJECTS
],
xyz_prefixes=[
osp.join(
DATASETS_ROOT,
"BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lm_full.obj2id[_obj]),
)
for _obj in LM_13_OBJECTS
],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_scene=True,
filter_invalid=True,
ref_key="lm_full",
),
lm_13_test=dict(
name="lm_13_test",
dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
objs=LM_13_OBJECTS,
ann_files=[
osp.join(
DATASETS_ROOT,
"BOP_DATASETS/lm/image_set/{}_{}.txt".format(_obj, "test"),
)
for _obj in LM_13_OBJECTS
],
# NOTE: scene root
image_prefixes=[
osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[_obj])
for _obj in LM_13_OBJECTS
],
xyz_prefixes=[
osp.join(
DATASETS_ROOT,
"BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lm_full.obj2id[_obj]),
)
for _obj in LM_13_OBJECTS
],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_scene=True,
filter_invalid=False,
ref_key="lm_full",
),
lmo_train=dict(
name="lmo_train",
# use lm real all (8 objects) to train for lmo
dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
objs=LM_OCC_OBJECTS, # selected objects
ann_files=[
osp.join(
DATASETS_ROOT,
"BOP_DATASETS/lm/image_set/{}_{}.txt".format(_obj, "all"),
)
for _obj in LM_OCC_OBJECTS
],
image_prefixes=[
osp.join(
DATASETS_ROOT,
"BOP_DATASETS/lm/test/{:06d}".format(ref.lmo_full.obj2id[_obj]),
)
for _obj in LM_OCC_OBJECTS
],
xyz_prefixes=[
osp.join(
DATASETS_ROOT,
"BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lmo_full.obj2id[_obj]),
)
for _obj in LM_OCC_OBJECTS
],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_scene=True,
filter_invalid=True,
ref_key="lmo_full",
),
lmo_NoBopTest_train=dict(
name="lmo_NoBopTest_train",
dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
objs=LM_OCC_OBJECTS,
ann_files=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/image_set/lmo_no_bop_test.txt")],
image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
xyz_prefixes=[
osp.join(
DATASETS_ROOT,
"BOP_DATASETS/lmo/test/xyz_crop/{:06d}".format(2),
)
],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_scene=False,
filter_invalid=True,
ref_key="lmo_full",
),
lmo_test=dict(
name="lmo_test",
dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
objs=LM_OCC_OBJECTS,
ann_files=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/image_set/lmo_test.txt")],
# NOTE: scene root
image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
xyz_prefixes=[None],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_scene=False,
filter_invalid=False,
ref_key="lmo_full",
),
lmo_bop_test=dict(
name="lmo_bop_test",
dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
objs=LM_OCC_OBJECTS,
ann_files=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/image_set/lmo_bop_test.txt")],
# NOTE: scene root
image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
xyz_prefixes=[None],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_scene=False,
filter_invalid=False,
ref_key="lmo_full",
),
)
# single obj splits for lm real
for obj in ref.lm_full.objects:
for split in ["train", "test", "all"]:
name = "lm_real_{}_{}".format(obj, split)
ann_files = [
osp.join(
DATASETS_ROOT,
"BOP_DATASETS/lm/image_set/{}_{}.txt".format(obj, split),
)
]
if split in ["train", "all"]: # all is used to train lmo
filter_invalid = True
elif split in ["test"]:
filter_invalid = False
else:
raise ValueError("{}".format(split))
if name not in SPLITS_LM:
SPLITS_LM[name] = dict(
name=name,
dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
objs=[obj], # only this obj
ann_files=ann_files,
image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[obj])],
xyz_prefixes=[
osp.join(
DATASETS_ROOT,
"BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lm_full.obj2id[obj]),
)
],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_invalid=filter_invalid,
filter_scene=True,
ref_key="lm_full",
)
# single obj splits for lmo_NoBopTest_train
for obj in ref.lmo_full.objects:
for split in ["train"]:
name = "lmo_NoBopTest_{}_{}".format(obj, split)
if split in ["train"]:
filter_invalid = True
else:
raise ValueError("{}".format(split))
if name not in SPLITS_LM:
SPLITS_LM[name] = dict(
name=name,
dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
objs=[obj],
ann_files=[
osp.join(
DATASETS_ROOT,
"BOP_DATASETS/lmo/image_set/lmo_no_bop_test.txt",
)
],
# NOTE: scene root
image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
xyz_prefixes=[
osp.join(
DATASETS_ROOT,
"BOP_DATASETS/lmo/test/xyz_crop/{:06d}".format(2),
)
],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_scene=False,
filter_invalid=filter_invalid,
ref_key="lmo_full",
)
# single obj splits for lmo_test
for obj in ref.lmo_full.objects:
for split in ["test"]:
name = "lmo_{}_{}".format(obj, split)
if split in ["train", "all"]: # all is used to train lmo
filter_invalid = True
elif split in ["test"]:
filter_invalid = False
else:
raise ValueError("{}".format(split))
if name not in SPLITS_LM:
SPLITS_LM[name] = dict(
name=name,
dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
objs=[obj],
ann_files=[
osp.join(
DATASETS_ROOT,
"BOP_DATASETS/lmo/image_set/lmo_test.txt",
)
],
# NOTE: scene root
image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
xyz_prefixes=[None],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_scene=False,
filter_invalid=False,
ref_key="lmo_full",
)
# single obj splits for lmo_bop_test
for obj in ref.lmo_full.objects:
for split in ["test"]:
name = "lmo_{}_bop_{}".format(obj, split)
if split in ["train", "all"]: # all is used to train lmo
filter_invalid = True
elif split in ["test"]:
filter_invalid = False
else:
raise ValueError("{}".format(split))
if name not in SPLITS_LM:
SPLITS_LM[name] = dict(
name=name,
dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
objs=[obj],
ann_files=[
osp.join(
DATASETS_ROOT,
"BOP_DATASETS/lmo/image_set/lmo_bop_test.txt",
)
],
# NOTE: scene root
image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
xyz_prefixes=[None],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_scene=False,
filter_invalid=False,
ref_key="lmo_full",
)
# ================ add single image dataset for debug =======================================
debug_im_ids = {
"train": {obj: [] for obj in ref.lm_full.objects},
"test": {obj: [] for obj in ref.lm_full.objects},
}
for obj in ref.lm_full.objects:
for split in ["train", "test"]:
cur_ann_file = osp.join(DATASETS_ROOT, f"BOP_DATASETS/lm/image_set/{obj}_{split}.txt")
ann_files = [cur_ann_file]
im_ids = []
with open(cur_ann_file, "r") as f:
for line in f:
# scene_id(obj_id)/im_id
im_ids.append("{}/{}".format(ref.lm_full.obj2id[obj], int(line.strip("\r\n"))))
debug_im_ids[split][obj] = im_ids
for debug_im_id in debug_im_ids[split][obj]:
name = "lm_single_{}{}_{}".format(obj, debug_im_id.split("/")[1], split)
if name not in SPLITS_LM:
SPLITS_LM[name] = dict(
name=name,
dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
objs=[obj], # only this obj
ann_files=ann_files,
image_prefixes=[
osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[obj])
],
xyz_prefixes=[
osp.join(
DATASETS_ROOT,
"BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lm_full.obj2id[obj]),
)
],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
num_to_load=-1,
filter_invalid=False,
filter_scene=True,
ref_key="lm_full",
debug_im_id=debug_im_id, # NOTE: debug im id
)
def register_with_name_cfg(name, data_cfg=None):
"""Assume pre-defined datasets live in `./datasets`.
Args:
name: datasnet_name,
data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
otherwise requires data_cfg
data_cfg can be set in cfg.DATA_CFG.name
"""
dprint("register dataset: {}".format(name))
if name in SPLITS_LM:
used_cfg = SPLITS_LM[name]
else:
assert data_cfg is not None, f"dataset name {name} is not registered"
used_cfg = data_cfg
DatasetCatalog.register(name, LM_Dataset(used_cfg))
# something like eval_types
MetadataCatalog.get(name).set(
id="linemod", # NOTE: for pvnet to determine module
ref_key=used_cfg["ref_key"],
objs=used_cfg["objs"],
eval_error_types=["ad", "rete", "proj"],
evaluator_type="bop",
**get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
)
def get_available_datasets():
return list(SPLITS_LM.keys())
#### tests ###############################################
def test_vis():
dset_name = sys.argv[1]
assert dset_name in DatasetCatalog.list()
meta = MetadataCatalog.get(dset_name)
dprint("MetadataCatalog: ", meta)
objs = meta.objs
t_start = time.perf_counter()
dicts = DatasetCatalog.get(dset_name)
logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
dirname = "output/{}-data-vis".format(dset_name)
os.makedirs(dirname, exist_ok=True)
for d in dicts:
img = read_image_mmcv(d["file_name"], format="BGR")
depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0
imH, imW = img.shape[:2]
annos = d["annotations"]
masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos]
bboxes = [anno["bbox"] for anno in annos]
bbox_modes = [anno["bbox_mode"] for anno in annos]
bboxes_xyxy = np.array(
[BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)]
)
kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
quats = [anno["quat"] for anno in annos]
transes = [anno["trans"] for anno in annos]
Rs = [quat2mat(quat) for quat in quats]
# 0-based label
cat_ids = [anno["category_id"] for anno in annos]
K = d["cam"]
kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)]
# # TODO: visualize pose and keypoints
labels = [objs[cat_id] for cat_id in cat_ids]
for _i in range(len(annos)):
img_vis = vis_image_mask_bbox_cv2(
img,
masks[_i : _i + 1],
bboxes=bboxes_xyxy[_i : _i + 1],
labels=labels[_i : _i + 1],
)
img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i])
if "test" not in dset_name.lower():
xyz_path = annos[_i]["xyz_path"]
xyz_info = mmcv.load(xyz_path)
x1, y1, x2, y2 = xyz_info["xyxy"]
xyz_crop = xyz_info["xyz_crop"].astype(np.float32)
xyz = np.zeros((imH, imW, 3), dtype=np.float32)
xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop
xyz_show = get_emb_show(xyz)
xyz_crop_show = get_emb_show(xyz_crop)
img_xyz = img.copy() / 255.0
mask_xyz = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) | (xyz[:, :, 2] != 0)).astype("uint8")
fg_idx = np.where(mask_xyz != 0)
img_xyz[fg_idx[0], fg_idx[1], :] = xyz_show[fg_idx[0], fg_idx[1], :3]
img_xyz_crop = img_xyz[y1 : y2 + 1, x1 : x2 + 1, :]
img_vis_crop = img_vis[y1 : y2 + 1, x1 : x2 + 1, :]
# diff mask
diff_mask_xyz = np.abs(masks[_i] - mask_xyz)[y1 : y2 + 1, x1 : x2 + 1]
grid_show(
[
img[:, :, [2, 1, 0]],
img_vis[:, :, [2, 1, 0]],
img_vis_kpts2d[:, :, [2, 1, 0]],
depth,
# xyz_show,
diff_mask_xyz,
xyz_crop_show,
img_xyz[:, :, [2, 1, 0]],
img_xyz_crop[:, :, [2, 1, 0]],
img_vis_crop,
],
[
"img",
"vis_img",
"img_vis_kpts2d",
"depth",
"diff_mask_xyz",
"xyz_crop_show",
"img_xyz",
"img_xyz_crop",
"img_vis_crop",
],
row=3,
col=3,
)
else:
grid_show(
[
img[:, :, [2, 1, 0]],
img_vis[:, :, [2, 1, 0]],
img_vis_kpts2d[:, :, [2, 1, 0]],
depth,
],
["img", "vis_img", "img_vis_kpts2d", "depth"],
row=2,
col=2,
)
if __name__ == "__main__":
"""Test the dataset loader.
python this_file.py dataset_name
"""
from lib.vis_utils.image import grid_show
from lib.utils.setup_logger import setup_my_logger
import detectron2.data.datasets # noqa # add pre-defined metadata
from lib.vis_utils.image import vis_image_mask_bbox_cv2
from core.utils.utils import get_emb_show
from core.utils.data_utils import read_image_mmcv
print("sys.argv:", sys.argv)
logger = setup_my_logger(name="core")
register_with_name_cfg(sys.argv[1])
print("dataset catalog: ", DatasetCatalog.list())
test_vis()
import hashlib
import logging
import os
import os.path as osp
import sys
import time
from collections import OrderedDict
import mmcv
import numpy as np
from tqdm import tqdm
from transforms3d.quaternions import mat2quat, quat2mat
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode
cur_dir = osp.dirname(osp.abspath(__file__))
PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../../.."))
sys.path.insert(0, PROJ_ROOT)
import ref
from lib.pysixd import inout, misc
from lib.utils.mask_utils import (
binary_mask_to_rle,
cocosegm2mask,
mask2bbox_xywh,
)
from lib.utils.utils import dprint, lazy_property
logger = logging.getLogger(__name__)
DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
class LM_SYN_IMGN_Dataset(object):
"""lm synthetic data, imgn(imagine) from DeepIM."""
def __init__(self, data_cfg):
"""
Set with_depth and with_masks default to True,
and decide whether to load them into dataloader/network later
with_masks:
"""
self.name = data_cfg["name"]
self.data_cfg = data_cfg
self.objs = data_cfg["objs"] # selected objects
self.ann_files = data_cfg["ann_files"] # idx files with image ids
self.image_prefixes = data_cfg["image_prefixes"]
self.dataset_root = data_cfg["dataset_root"] # lm_imgn
self.models_root = data_cfg["models_root"] # BOP_DATASETS/lm/models
self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001
self.with_masks = data_cfg["with_masks"] # True (load masks but may not use it)
self.with_depth = data_cfg["with_depth"] # True (load depth path here, but may not use it)
self.depth_factor = data_cfg["depth_factor"] # 1000.0
self.cam = data_cfg["cam"] #
self.height = data_cfg["height"] # 480
self.width = data_cfg["width"] # 640
self.cache_dir = data_cfg["cache_dir"] # .cache
self.use_cache = data_cfg["use_cache"] # True
# sample uniformly to get n items
self.n_per_obj = data_cfg.get("n_per_obj", 1000)
self.filter_invalid = data_cfg["filter_invalid"]
self.filter_scene = data_cfg.get("filter_scene", False)
##################################################
if self.cam is None:
self.cam = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]])
# NOTE: careful! Only the selected objects
self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs]
# map selected objs to [0, num_objs-1]
self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map
self.label2cat = {label: cat for cat, label in self.cat2label.items()}
self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
##########################################################
def __call__(self): # LM_SYN_IMGN_Dataset
"""Load light-weight instance annotations of all images into a list of
dicts in Detectron2 format.
Do not load heavy data into memory in this file, since we will
load the annotations of all images into memory.
"""
# cache the dataset_dicts to avoid loading masks from files
hashed_file_name = hashlib.md5(
(
"".join([str(fn) for fn in self.objs])
+ "dataset_dicts_{}_{}_{}_{}_{}_{}".format(
self.name,
self.dataset_root,
self.with_masks,
self.with_depth,
self.n_per_obj,
__name__,
)
).encode("utf-8")
).hexdigest()
cache_path = osp.join(
self.dataset_root,
"dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name),
)
if osp.exists(cache_path) and self.use_cache:
logger.info("load cached dataset dicts from {}".format(cache_path))
return mmcv.load(cache_path)
t_start = time.perf_counter()
logger.info("loading dataset dicts: {}".format(self.name))
self.num_instances_without_valid_segmentation = 0
self.num_instances_without_valid_box = 0
dataset_dicts = [] #######################################################
assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}"
for ann_file, scene_root in zip(self.ann_files, self.image_prefixes):
# linemod each scene is an object
with open(ann_file, "r") as f_ann:
indices = [line.strip("\r\n").split()[-1] for line in f_ann.readlines()] # string ids
# sample uniformly (equal space)
if self.n_per_obj > 0:
sample_num = min(self.n_per_obj, len(indices))
sel_indices_idx = np.linspace(0, len(indices) - 1, sample_num, dtype=np.int32)
sel_indices = [indices[int(_i)] for _i in sel_indices_idx]
else:
sel_indices = indices
for im_id in tqdm(sel_indices):
rgb_path = osp.join(scene_root, "{}-color.png").format(im_id)
assert osp.exists(rgb_path), rgb_path
depth_path = osp.join(scene_root, "{}-depth.png".format(im_id))
obj_name = im_id.split("/")[0]
if obj_name == "benchviseblue":
obj_name = "benchvise"
obj_id = ref.lm_full.obj2id[obj_name]
if self.filter_scene:
if obj_name not in self.objs:
continue
record = {
"dataset_name": self.name,
"file_name": osp.relpath(rgb_path, PROJ_ROOT),
"depth_file": osp.relpath(depth_path, PROJ_ROOT),
"height": self.height,
"width": self.width,
"image_id": im_id.split("/")[-1],
"scene_im_id": im_id,
"cam": self.cam,
"img_type": "syn",
}
cur_label = self.obj2label[obj_name] # 0-based label
pose_path = osp.join(scene_root, "{}-pose.txt".format(im_id))
pose = np.loadtxt(pose_path, skiprows=1)
R = pose[:3, :3]
t = pose[:3, 3]
quat = mat2quat(R).astype("float32")
proj = (record["cam"] @ t.T).T
proj = proj[:2] / proj[2]
depth = mmcv.imread(depth_path, "unchanged") / 1000.0
mask = (depth > 0).astype(np.uint8)
bbox_obj = mask2bbox_xywh(mask)
x1, y1, w, h = bbox_obj
if self.filter_invalid:
if h <= 1 or w <= 1:
self.num_instances_without_valid_box += 1
continue
area = mask.sum()
if area < 3: # filter out too small or nearly invisible instances
self.num_instances_without_valid_segmentation += 1
continue
mask_rle = binary_mask_to_rle(mask, compressed=True)
inst = {
"category_id": cur_label, # 0-based label
"bbox": bbox_obj, # TODO: load both bbox_obj and bbox_visib
"bbox_mode": BoxMode.XYWH_ABS,
"pose": pose,
"quat": quat,
"trans": t,
"centroid_2d": proj, # absolute (cx, cy)
"segmentation": mask_rle,
}
model_info = self.models_info[str(obj_id)]
inst["model_info"] = model_info
# TODO: using full mask
for key in ["bbox3d_and_center"]:
inst[key] = self.models[cur_label][key]
record["annotations"] = [inst]
dataset_dicts.append(record)
if self.num_instances_without_valid_segmentation > 0:
logger.warning(
"Filtered out {} instances without valid segmentation. "
"There might be issues in your dataset generation process.".format(
self.num_instances_without_valid_segmentation
)
)
if self.num_instances_without_valid_box > 0:
logger.warning(
"Filtered out {} instances without valid box. "
"There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
)
##########################################################################
# if self.num_to_load > 0:
# self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
# random.shuffle(dataset_dicts)
# dataset_dicts = dataset_dicts[: self.num_to_load]
logger.info(
"loaded dataset dicts, num_images: {}, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start)
)
mmcv.dump(dataset_dicts, cache_path, protocol=4)
logger.info("Dumped dataset_dicts to {}".format(cache_path))
return dataset_dicts
@lazy_property
def models_info(self):
models_info_path = osp.join(self.models_root, "models_info.json")
assert osp.exists(models_info_path), models_info_path
models_info = mmcv.load(models_info_path) # key is str(obj_id)
return models_info
@lazy_property
def models(self):
"""Load models into a list."""
cache_path = osp.join(self.models_root, "models_{}.pkl".format("_".join(self.objs)))
if osp.exists(cache_path) and self.use_cache:
# logger.info("load cached object models from {}".format(cache_path))
return mmcv.load(cache_path)
models = []
for obj_name in self.objs:
model = inout.load_ply(
osp.join(
self.models_root,
f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply",
),
vertex_scale=self.scale_to_meter,
)
# NOTE: the bbox3d_and_center is not obtained from centered vertices
# for BOP models, not a big problem since they had been centered
model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
models.append(model)
logger.info("cache models to {}".format(cache_path))
mmcv.dump(models, cache_path, protocol=4)
return models
def image_aspect_ratio(self):
# return 1
return self.width / self.height # 4/3
########### register datasets ############################################################
def get_lm_metadata(obj_names, ref_key):
"""task specific metadata."""
data_ref = ref.__dict__[ref_key]
cur_sym_infos = {} # label based key
loaded_models_info = data_ref.get_models_info()
for i, obj_name in enumerate(obj_names):
obj_id = data_ref.obj2id[obj_name]
model_info = loaded_models_info[str(obj_id)]
if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
else:
sym_info = None
cur_sym_infos[i] = sym_info
meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
return meta
LM_13_OBJECTS = [
"ape",
"benchvise",
"camera",
"can",
"cat",
"driller",
"duck",
"eggbox",
"glue",
"holepuncher",
"iron",
"lamp",
"phone",
] # no bowl, cup
################################################################################
SPLITS_LM_IMGN_13 = dict(
lm_imgn_13_train_1k_per_obj=dict(
name="lm_imgn_13_train_1k_per_obj", # BB8 training set
dataset_root=osp.join(DATASETS_ROOT, "lm_imgn/"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
objs=LM_13_OBJECTS, # selected objects
ann_files=[
osp.join(
DATASETS_ROOT,
"lm_imgn/image_set/{}_{}.txt".format("train", _obj),
)
for _obj in LM_13_OBJECTS
],
image_prefixes=[osp.join(DATASETS_ROOT, "lm_imgn/imgn") for _obj in LM_13_OBJECTS],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
depth_factor=1000.0,
cam=ref.lm_full.camera_matrix,
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
n_per_obj=1000, # 1000 per class
filter_scene=True,
filter_invalid=False,
ref_key="lm_full",
)
)
# single obj splits
for obj in ref.lm_full.objects:
for split in ["train"]:
name = "lm_imgn_13_{}_{}_1k".format(obj, split)
ann_files = [osp.join(DATASETS_ROOT, "lm_imgn/image_set/{}_{}.txt".format(split, obj))]
if split in ["train"]:
filter_invalid = True
elif split in ["test"]:
filter_invalid = False
else:
raise ValueError("{}".format(split))
if name not in SPLITS_LM_IMGN_13:
SPLITS_LM_IMGN_13[name] = dict(
name=name,
dataset_root=osp.join(DATASETS_ROOT, "lm_imgn/"),
models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
objs=[obj], # only this obj
ann_files=ann_files,
image_prefixes=[osp.join(DATASETS_ROOT, "lm_imgn/imgn/")],
scale_to_meter=0.001,
with_masks=True, # (load masks but may not use it)
with_depth=True, # (load depth path here, but may not use it)
depth_factor=1000.0,
cam=ref.lm_full.camera_matrix,
height=480,
width=640,
cache_dir=osp.join(PROJ_ROOT, ".cache"),
use_cache=True,
n_per_obj=1000,
filter_invalid=False,
filter_scene=True,
ref_key="lm_full",
)
def register_with_name_cfg(name, data_cfg=None):
"""Assume pre-defined datasets live in `./datasets`.
Args:
name: datasnet_name,
data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
otherwise requires data_cfg
data_cfg can be set in cfg.DATA_CFG.name
"""
dprint("register dataset: {}".format(name))
if name in SPLITS_LM_IMGN_13:
used_cfg = SPLITS_LM_IMGN_13[name]
else:
assert data_cfg is not None, f"dataset name {name} is not registered"
used_cfg = data_cfg
DatasetCatalog.register(name, LM_SYN_IMGN_Dataset(used_cfg))
# something like eval_types
MetadataCatalog.get(name).set(
ref_key=used_cfg["ref_key"],
objs=used_cfg["objs"],
eval_error_types=["ad", "rete", "proj"],
evaluator_type="coco_bop",
**get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
)
def get_available_datasets():
return list(SPLITS_LM_IMGN_13.keys())
#### tests ###############################################
def test_vis():
dset_name = sys.argv[1]
assert dset_name in DatasetCatalog.list()
meta = MetadataCatalog.get(dset_name)
dprint("MetadataCatalog: ", meta)
objs = meta.objs
t_start = time.perf_counter()
dicts = DatasetCatalog.get(dset_name)
logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
dirname = "output/{}-data-vis".format(dset_name)
os.makedirs(dirname, exist_ok=True)
for d in dicts:
img = read_image_mmcv(d["file_name"], format="BGR")
depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0
anno = d["annotations"][0] # only one instance per image
imH, imW = img.shape[:2]
mask = cocosegm2mask(anno["segmentation"], imH, imW)
bbox = anno["bbox"]
bbox_mode = anno["bbox_mode"]
bbox_xyxy = np.array(BoxMode.convert(bbox, bbox_mode, BoxMode.XYXY_ABS))
kpt3d = anno["bbox3d_and_center"]
quat = anno["quat"]
trans = anno["trans"]
R = quat2mat(quat)
# 0-based label
cat_id = anno["category_id"]
K = d["cam"]
kpt_2d = misc.project_pts(kpt3d, K, R, trans)
# # TODO: visualize pose and keypoints
label = objs[cat_id]
# img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels)
img_vis = vis_image_mask_bbox_cv2(img, [mask], bboxes=[bbox_xyxy], labels=[label])
img_vis_kpt2d = img.copy()
img_vis_kpt2d = misc.draw_projected_box3d(
img_vis_kpt2d,
kpt_2d,
middle_color=None,
bottom_color=(128, 128, 128),
)
grid_show(
[
img[:, :, [2, 1, 0]],
img_vis[:, :, [2, 1, 0]],
img_vis_kpt2d[:, :, [2, 1, 0]],
depth,
],
["img", "vis_img", "img_vis_kpts2d", "depth"],
row=2,
col=2,
)
if __name__ == "__main__":
"""Test the dataset loader.
Usage:
python -m det.yolov4.datasets.lm_syn_imgn dataset_name
"""
from lib.vis_utils.image import grid_show
from lib.utils.setup_logger import setup_my_logger
import detectron2.data.datasets # noqa # add pre-defined metadata
from lib.vis_utils.image import vis_image_mask_bbox_cv2
from core.utils.data_utils import read_image_mmcv
print("sys.argv:", sys.argv)
logger = setup_my_logger(name="core")
register_with_name_cfg(sys.argv[1])
print("dataset catalog: ", DatasetCatalog.list())
test_vis()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment