diff --git a/configs/_base_/gdrn_base.py b/configs/_base_/gdrn_base.py index 182eaa5bcf6738d76f740daa985d21040d0cd9dc..68b748a214829f228e3b70f2fee959a305c6d63f 100644 --- a/configs/_base_/gdrn_base.py +++ b/configs/_base_/gdrn_base.py @@ -159,7 +159,7 @@ MODEL = dict( TEST = dict( EVAL_PERIOD=0, VIS=False, - TEST_BBOX_TYPE="est", # gt | est + TEST_BBOX_TYPE="gt", # gt | est COLOR_AUG=False, # use random color aug as train USE_PNP=False, # use pnp or direct prediction SAVE_RESULTS_ONLY=False, # turn this on to only save the predicted results diff --git a/configs/gdrn/fruitbin/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_fruitbin.py b/configs/gdrn/fruitbin/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_fruitbin.py index 339caa115f77ec9c748f24fa2513689e86a36a4a..e0199bc0b7d7e13d304adab88688383bd97fc9ee 100644 --- a/configs/gdrn/fruitbin/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_fruitbin.py +++ b/configs/gdrn/fruitbin/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_fruitbin.py @@ -32,8 +32,8 @@ INPUT = dict( ) SOLVER = dict( - IMS_PER_BATCH=36, - TOTAL_EPOCHS=1, # 10 + IMS_PER_BATCH=33, + TOTAL_EPOCHS=30, # 10 LR_SCHEDULER_NAME="flat_and_anneal", ANNEAL_METHOD="cosine", # "cosine" ANNEAL_POINT=0.72, @@ -46,11 +46,15 @@ SOLVER = dict( DATASETS = dict( TRAIN=("fruitbin_train_pbr",), TEST=("fruitbin_test",), - DET_FILES_TEST=("datasets/BOP_DATASETS/fruitbin/test/test_bboxes/yolox_x_640_fruitbin_real_pbr_fruitbin_bop_test.json",), + DET_FILES_TEST=("/gdrnpp_bop2022/datasets/BOP_DATASETS/fruitbin/test/test_bboxes/gt_all_fruits_fruitbin_pbr_fruitbin_bop_test.json",), SYM_OBJS=[ + "apple2", + "apricot", + "kiwi1", "lemon2", "orange2", - ], # used for custom evalutor + "peach1", + ], # used for custom evalutoryolox_x_640_fruitbin_pbr_fruitbin_bop_test ) DATALOADER = dict( @@ -137,4 +141,4 @@ VAL = dict( USE_BOP=True, # whether to use bop toolkit ) -TEST = dict(EVAL_PERIOD=0, VIS=False, TEST_BBOX_TYPE="est") # gt | est +TEST = dict(EVAL_PERIOD=0, VIS=False, TEST_BBOX_TYPE="gt") # gt | est diff --git a/configs/gdrn/fruitbinPbrSOr/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/banana1.py b/configs/gdrn/fruitbinPbrSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/banana1.py similarity index 100% rename from configs/gdrn/fruitbinPbrSOr/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/banana1.py rename to configs/gdrn/fruitbinPbrSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/banana1.py diff --git a/configs/gdrn/fruitbinPbrSOr/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/lemon2.py b/configs/gdrn/fruitbinPbrSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/lemon2.py similarity index 100% rename from configs/gdrn/fruitbinPbrSOr/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/lemon2.py rename to configs/gdrn/fruitbinPbrSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/lemon2.py diff --git a/configs/gdrn/fruitbinPbrSOr/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/pear2.py b/configs/gdrn/fruitbinPbrSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/pear2.py similarity index 100% rename from configs/gdrn/fruitbinPbrSOr/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/pear2.py rename to configs/gdrn/fruitbinPbrSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/pear2.py diff --git a/configs/gdrn/fruitbinSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/banana1.py b/configs/gdrn/fruitbinSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/banana1.py index dbfda48f373e15f1b7130dfb45dafeb4c9eec005..cfafd641ac0f49b51c31ada36fa216c3ba56cbbb 100644 --- a/configs/gdrn/fruitbinSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/banana1.py +++ b/configs/gdrn/fruitbinSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/banana1.py @@ -47,10 +47,10 @@ DATASETS = dict( TRAIN=("fruitbin_train_pbr"), TEST=("fruitbin",), DET_FILES_TEST=("datasets/BOP_DATASETS/fruitbin/test/test_bboxes/yolox_x_640_ycbv_pbr_ycbv_bop_test.json",), - SYM_OBJS=[ - "lemon2", - "orange2" - ], # used for custom evalutor + # SYM_OBJS=[ + # "pear2", + # "orange2" + # ], # used for custom evalutor ) DATALOADER = dict( @@ -133,4 +133,4 @@ VAL = dict( USE_BOP=True, # whether to use bop toolkit ) -TEST = dict(EVAL_PERIOD=0, VIS=False, TEST_BBOX_TYPE="est") # gt | est +TEST = dict(EVAL_PERIOD=0, VIS=False, TEST_BBOX_TYPE="gt") # gt | est diff --git a/configs/gdrn/fruitbinSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/lemon2.py b/configs/gdrn/fruitbinSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/orange2.py similarity index 51% rename from configs/gdrn/fruitbinSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/lemon2.py rename to configs/gdrn/fruitbinSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/orange2.py index 068ba4483142646b34fdefca6615dc446dfc17ce..928cc53a14a46dc5aed78eb2be7e7061c815a4e2 100644 --- a/configs/gdrn/fruitbinSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/lemon2.py +++ b/configs/gdrn/fruitbinSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/orange2.py @@ -1,3 +1,3 @@ _base_ = "./banana1.py" -OUTPUT_DIR = "output/gdrn/fruitbinPbrSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/lemon2" -DATASETS = dict(TRAIN=("fruitbin_lemon2",)) +OUTPUT_DIR = "output/gdrn/fruitbinPbrSO/convnext_AugCosyAAEGray_DMask_amodalClipBox_fruitbin/orange2" +DATASETS = dict(TRAIN=("fruitbin_orange2",)) diff --git a/core/gdrn_modeling/datasets/dataset_factory.py b/core/gdrn_modeling/datasets/dataset_factory.py index 9a7b745860e21b4e9012e9a72b071bbb405f5e5d..eb19773506a31c763d7ae45dd51ca31cc993f62d 100644 --- a/core/gdrn_modeling/datasets/dataset_factory.py +++ b/core/gdrn_modeling/datasets/dataset_factory.py @@ -30,7 +30,6 @@ from core.gdrn_modeling.datasets import ( fruitbin_d2, fruitbin_pbr, fruitbin_bop_test, - ) @@ -65,7 +64,6 @@ _DSET_MOD_NAMES = [ "fruitbin_d2", "fruitbin_pbr", "fruitbin_bop_test", - ] logger = logging.getLogger(__name__) diff --git a/core/gdrn_modeling/datasets/fruitbin_bop_test.py b/core/gdrn_modeling/datasets/fruitbin_bop_test.py index 956111b529e5c6b09331cd45c45ff510595575d6..ba38fd5b1b55a12f0154ff4fa717af1896d5b3a5 100644 --- a/core/gdrn_modeling/datasets/fruitbin_bop_test.py +++ b/core/gdrn_modeling/datasets/fruitbin_bop_test.py @@ -127,10 +127,10 @@ class FRUITBIN_BOP_TEST_Dataset: for scene_id, im_id in tqdm(scene_im_ids): str_im_id = str(im_id) scene_root = osp.join(self.dataset_root, f"{scene_id:06d}") - rgb_path = osp.join(scene_root, "rgb/{:d}.png").format(im_id) + rgb_path = osp.join(scene_root, "rgb/{:06d}.png").format(im_id) assert osp.exists(rgb_path), rgb_path - depth_path = osp.join(scene_root, "depth/{:d}.png".format(im_id)) + depth_path = osp.join(scene_root, "depth/{:06d}.png".format(im_id)) scene_id = int(rgb_path.split("/")[-3]) @@ -177,7 +177,7 @@ class FRUITBIN_BOP_TEST_Dataset: # ) mask_visib_file = osp.join( scene_root, - "mask_visib/{:d}.png".format(im_id, anno_i), + "mask_visib/{:06d}.png".format(im_id, anno_i), ) # assert osp.exists(mask_file), mask_file assert osp.exists(mask_visib_file), mask_visib_file diff --git a/core/gdrn_modeling/datasets/fruitbin_d2.py b/core/gdrn_modeling/datasets/fruitbin_d2.py index 45d8d351b8cc8fe3c466942e85ea31897a6b2a0c..f39ec9152ab828aa34b210eb399b34b7aa05d820 100755 --- a/core/gdrn_modeling/datasets/fruitbin_d2.py +++ b/core/gdrn_modeling/datasets/fruitbin_d2.py @@ -125,7 +125,7 @@ class FRUITBIN_Dataset: num_instances_without_valid_box = 0 for (scene_id, im_id) in tqdm(scene_im_ids): - rgb_path = osp.join(image_root, f"{scene_id:06d}/rgb/{im_id:d}.png") + rgb_path = osp.join(image_root, f"{scene_id:06d}/rgb/{im_id:06d}.png") assert osp.exists(rgb_path), rgb_path str_im_id = str(im_id) @@ -206,7 +206,7 @@ class FRUITBIN_Dataset: if self.with_masks: # either list[list[float]] or dict(RLE) mask_visib_file = osp.join( image_root, - f"{scene_id:06d}/mask_visib/{im_id:d}.png", + f"{scene_id:06d}/mask_visib/{im_id:06d}.png", ) assert osp.exists(mask_visib_file), mask_visib_file mask = mmcv.imread(mask_visib_file, "unchanged") diff --git a/core/gdrn_modeling/datasets/fruitbin_pbr.py b/core/gdrn_modeling/datasets/fruitbin_pbr.py index 4cede9ec4451fbbee030f8b398c220ab3f878e0f..6c470c76ac4f93ef5c8e1ce80a1f1377e0adf9e4 100644 --- a/core/gdrn_modeling/datasets/fruitbin_pbr.py +++ b/core/gdrn_modeling/datasets/fruitbin_pbr.py @@ -43,7 +43,7 @@ class FRUITBIN_PBR_Dataset: ) self.xyz_root = data_cfg.get("xyz_root", osp.join(self.dataset_root, "xyz_crop")) assert osp.exists(self.dataset_root), self.dataset_root - self.models_root = data_cfg["models_root"] # BOP_DATASETS/ycbv/models + self.models_root = data_cfg["models_root"] # BOP_DATASETS/fruitbin/models self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001 self.with_masks = data_cfg["with_masks"] @@ -111,12 +111,13 @@ class FRUITBIN_PBR_Dataset: gt_dict = mmcv.load(osp.join(scene_root, "scene_gt.json")) gt_info_dict = mmcv.load(osp.join(scene_root, "scene_gt_info.json")) cam_dict = mmcv.load(osp.join(scene_root, "scene_camera.json")) + for str_im_id in tqdm(gt_dict, postfix=f"{scene_id}"): int_im_id = int(str_im_id) - rgb_path = osp.join(scene_root, "rgb/{:d}.png").format(int_im_id) + rgb_path = osp.join(scene_root, "rgb/{:06d}.png").format(int_im_id) assert osp.exists(rgb_path), rgb_path - depth_path = osp.join(scene_root, "depth/{:d}.png".format(int_im_id)) + depth_path = osp.join(scene_root, "depth/{:06d}.png".format(int_im_id)) scene_im_id = f"{scene_id}/{int_im_id}" @@ -163,7 +164,7 @@ class FRUITBIN_PBR_Dataset: ) mask_visib_file = osp.join( scene_root, - "mask_visib/{:d}.png".format(int_im_id), + "mask_visib/{:06d}.png".format(int_im_id), ) # assert osp.exists(mask_file), mask_file assert osp.exists(mask_visib_file), mask_visib_file @@ -339,7 +340,7 @@ for obj in ref.fruitbin.objects: with_depth=True, # (load depth path here, but may not use it) height=480, width=640, - use_cache=False, + use_cache=True, num_to_load=-1, filter_invalid=filter_invalid, ref_key="fruitbin", diff --git a/core/gdrn_modeling/engine/test_utils.py b/core/gdrn_modeling/engine/test_utils.py index 7656d612cce5691122395897f78cf6fa5fc0bf7c..a7cdba923a89c8e412ebecf17a1e3762b89b37b2 100644 --- a/core/gdrn_modeling/engine/test_utils.py +++ b/core/gdrn_modeling/engine/test_utils.py @@ -152,6 +152,7 @@ def get_data_ref(dataset_name): "hb": "hb", "hbs": "hb_bop19", "itodd": "itodd", + "fruitbin": "fruitbin", } ref_key = ref_key_dict[dataset_name] return ref.__dict__[ref_key] @@ -266,7 +267,7 @@ def summary_scores( # mean of selected objs num_objs = len(sel_obj_ids) - if num_objs > 1: + if num_objs >= 1: sel_obj_recalls = [_recall for _id, _recall in score_dict["obj_recalls"].items() if int(_id) in sel_obj_ids] if not is_weighted_average_metric(error_type): mean_obj_recall = np.mean(sel_obj_recalls) @@ -361,6 +362,7 @@ def load_and_print_val_scores_tab( "tyol": 15, "ycbv": 15, "ycbvposecnn": 15, + "fruitbin": 15, } ntop = cfg.VAL.N_TOP val_dataset_name = cfg.VAL.DATASET_NAME diff --git a/core/gdrn_modeling/eval_pose.sh b/core/gdrn_modeling/eval_pose.sh new file mode 100644 index 0000000000000000000000000000000000000000..00f229e4bc088a69241be70369e38d4119a04ce0 --- /dev/null +++ b/core/gdrn_modeling/eval_pose.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +MODELS=("model_0129739") + +for MODEL in "${MODELS[@]}" +do + MODEL_PATH="/gdrnpp_bop2022/output/output_world_occ_01/gdrn/fruitbin/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_fruitbin/$MODEL.pth" + + /gdrnpp_bop2022/core/gdrn_modeling/test_gdrn.sh /gdrnpp_bop2022/configs/gdrn/fruitbin/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_fruitbin.py 0 $MODEL_PATH + + LOGFILE="/gdrnpp_bop2022/output/output_world_occ_01/gdrn/fruitbin/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_fruitbin/$MODEL.log" + PRED_PATH="/gdrnpp_bop2022/output/gdrn/fruitbin/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_fruitbin/inference_$MODEL/fruitbin_test/convnext-a6-AugCosyAAEGray-BG05-mlL1-DMask-amodalClipBox-classAware-fruitbin-test-iter0_fruitbin-test.csv" + + declare -A classes=( + ["apple2"]="True" + ["apricot"]="True" + ["banana1"]="False" + ["kiwi1"]="True" + ["lemon2"]="True" + ["orange2"]="True" + ["peach1"]="True" + ["pear2"]="False" + ) + + for CLASS in "${!classes[@]}" + do + echo "Evaluating $CLASS" | tee -a $LOGFILE + python /gdrnpp_bop2022/core/gdrn_modeling/tools/fruitbin/eval_pose.py --path_data=/gdrnpp_bop2022/datasets/BOP_DATASETS/fruitbin/ --pred_path=$PRED_PATH --class_name=$CLASS --symmetry=${classes[$CLASS]} | tee -a $LOGFILE + done + +done diff --git a/core/gdrn_modeling/tools/fruitbin/convert_det_to_our_format.py b/core/gdrn_modeling/tools/fruitbin/convert_det_to_our_format.py new file mode 100644 index 0000000000000000000000000000000000000000..2e2bbbfc1cfeeff4a7a57217a8f51f114bfd5378 --- /dev/null +++ b/core/gdrn_modeling/tools/fruitbin/convert_det_to_our_format.py @@ -0,0 +1,69 @@ +import mmcv +import sys +from tqdm import tqdm + +import json + +path = "/gdrnpp_bop2022/output/yolox/bop_pbr/yolox_x_640_augCozyAAEhsv_ranger_30_epochs_fruitbin_pbr_fruitbin_bop_test/inference/fruitbin_bop_test/coco_instances_results_bop.json" +ds = mmcv.load(path) + +outs = {} +for d in tqdm(ds): + scene_id = d["scene_id"] + image_id = d["image_id"] + scene_im_id = f"{scene_id}/{image_id}" + + obj_id = d["category_id"] + score = d["score"] + + bbox = d["bbox"] + time = d["time"] + + cur_dict = { + "bbox_est": bbox, + "obj_id": obj_id, + "score": score, + "time": time, + } + + if scene_im_id in outs.keys(): + outs[scene_im_id].append(cur_dict) + else: + outs[scene_im_id] = [cur_dict] + + +def save_json(path, content, sort=False): + """Saves the provided content to a JSON file. + + :param path: Path to the output JSON file. + :param content: Dictionary/list to save. + """ + with open(path, "w") as f: + + if isinstance(content, dict): + f.write("{\n") + if sort: + content_sorted = sorted(content.items(), key=lambda x: x[0]) + else: + content_sorted = content.items() + for elem_id, (k, v) in enumerate(content_sorted): + f.write(' "{}": {}'.format(k, json.dumps(v, sort_keys=True))) + if elem_id != len(content) - 1: + f.write(",") + f.write("\n") + f.write("}") + + elif isinstance(content, list): + f.write("[\n") + for elem_id, elem in enumerate(content): + f.write(" {}".format(json.dumps(elem, sort_keys=True))) + if elem_id != len(content) - 1: + f.write(",") + f.write("\n") + f.write("]") + + else: + json.dump(content, f, sort_keys=True) + + +save_json("datasets/BOP_DATASETS/fruitbin/test/test_bboxes/yolox_x_640_fruitbin_pbr_fruitbin_bop_test.json", outs) diff --git a/core/gdrn_modeling/tools/fruitbin/eval_pose.py b/core/gdrn_modeling/tools/fruitbin/eval_pose.py new file mode 100644 index 0000000000000000000000000000000000000000..cf2d1e1045c8f96061be1c65a9c3db399f0cc6a4 --- /dev/null +++ b/core/gdrn_modeling/tools/fruitbin/eval_pose.py @@ -0,0 +1,383 @@ +import numpy as np +import math +from scipy import spatial +from scipy.linalg import logm +from numpy import linalg as LA +import open3d as o3d +import argparse +import os +import sys +from plyfile import PlyData, PlyElement +import json +import pandas as pd + + +def read_diameter(path, obj_number): + filename = f'{path}/models/models_info.json' + with open(filename, 'r') as f: + models_info = json.load(f) + + if str(obj_number) in models_info: + diameter_in_cm = models_info[str(obj_number)]["diameter"] + return diameter_in_cm + + #return diameter_in_cm * 0.01 + return diameter_in_cm + + +def transform_pts_Rt(pts, R, t): + """Applies a rigid transformation to 3D points. + :param pts: nx3 ndarray with 3D points. + :param R: 3x3 ndarray with a rotation matrix. + :param t: 3x1 ndarray with a translation vector. + :return: nx3 ndarray with transformed 3D points. + """ + assert (pts.shape[1] == 3) + pts_t = R.dot(pts.T) + t.reshape((3, 1)) + return pts_t.T + + +def project_pts(pts, K, R, t): + """Projects 3D points. + :param pts: nx3 ndarray with the 3D points. + :param K: 3x3 ndarray with an intrinsic camera matrix. + :param R: 3x3 ndarray with a rotation matrix. + :param t: 3x1 ndarray with a translation vector. + :return: nx2 ndarray with 2D image coordinates of the projections. + """ + assert (pts.shape[1] == 3) + P = K.dot(np.hstack((R, t))) + pts_h = np.hstack((pts, np.ones((pts.shape[0], 1)))) + pts_im = P.dot(pts_h.T) + pts_im /= pts_im[2, :] + return pts_im[:2, :].T + + +def add(R_est, t_est, R_gt, t_gt, pts): + """Average Distance of Model Points for objects with no indistinguishable + views - by Hinterstoisser et al. (ACCV'12). + :param R_est: 3x3 ndarray with the estimated rotation matrix. + :param t_est: 3x1 ndarray with the estimated translation vector. + :param R_gt: 3x3 ndarray with the ground-truth rotation matrix. + :param t_gt: 3x1 ndarray with the ground-truth translation vector. + :param pts: nx3 ndarray with 3D model points. + :return: The calculated error. + """ + pts_est = transform_pts_Rt(pts, R_est, t_est) + pts_gt = transform_pts_Rt(pts, R_gt, t_gt) + e = np.linalg.norm(pts_est - pts_gt, axis=1).mean() + return e + + +def adi(R_est, t_est, R_gt, t_gt, pts): + """Average Distance of Model Points for objects with indistinguishable views + - by Hinterstoisser et al. (ACCV'12). + :param R_est: 3x3 ndarray with the estimated rotation matrix. + :param t_est: 3x1 ndarray with the estimated translation vector. + :param R_gt: 3x3 ndarray with the ground-truth rotation matrix. + :param t_gt: 3x1 ndarray with the ground-truth translation vector. + :param pts: nx3 ndarray with 3D model points. + :return: The calculated error. + """ + pts_est = transform_pts_Rt(pts, R_est, t_est) + pts_gt = transform_pts_Rt(pts, R_gt, t_gt) + + # Calculate distances to the nearest neighbors from vertices in the + # ground-truth pose to vertices in the estimated pose. + nn_index = spatial.cKDTree(pts_est) + nn_dists, _ = nn_index.query(pts_gt, k=1) + + e = nn_dists.mean() + return e + + +def re(R_est, R_gt): + """Rotational Error. + :param R_est: 3x3 ndarray with the estimated rotation matrix. + :param R_gt: 3x3 ndarray with the ground-truth rotation matrix. + :return: The calculated error. + """ + error_cos = float(0.5 * (np.trace(R_est.dot(np.linalg.inv(R_gt))) - 1.0)) + + # Avoid invalid values due to numerical errors. + error_cos = min(1.0, max(-1.0, error_cos)) + + error = math.acos(error_cos) + error = 180.0 * error / np.pi # Convert [rad] to [deg]. + return error + + +def te(t_est, t_gt): + """Translational Error. + :param t_est: 3x1 ndarray with the estimated translation vector. + :param t_gt: 3x1 ndarray with the ground-truth translation vector. + :return: The calculated error. + """ + assert (t_est.size == t_gt.size == 3) + error = np.linalg.norm(t_gt - t_est) + return error + + +def proj(R_est, t_est, R_gt, t_gt, K, pts): + """Average distance of projections of object model vertices [px] + - by Brachmann et al. (CVPR'16). + :param R_est: 3x3 ndarray with the estimated rotation matrix. + :param t_est: 3x1 ndarray with the estimated translation vector. + :param R_gt: 3x3 ndarray with the ground-truth rotation matrix. + :param t_gt: 3x1 ndarray with the ground-truth translation vector. + :param K: 3x3 ndarray with an intrinsic camera matrix. + :param pts: nx3 ndarray with 3D model points. + :return: The calculated error. + """ + proj_est = project_pts(pts, K, R_est, t_est) + proj_gt = project_pts(pts, K, R_gt, t_gt) + e = np.linalg.norm(proj_est - proj_gt, axis=1).mean() + return e + + +def compute_add_score(pts3d, diameter, R_pred, t_pred, R_gt, t_gt, percentage=0.1): + # R_gt, t_gt = pose_gt + # R_pred, t_pred = pose_pred + count = R_gt.shape[0] + mean_distances = np.zeros((count,), dtype=np.float32) + for i in range(count): + pts_xformed_gt = R_gt[i] * pts3d.transpose() + t_gt[i] + pts_xformed_pred = R_pred[i] * pts3d.transpose() + t_pred[i] + distance = np.linalg.norm(pts_xformed_gt - pts_xformed_pred, axis=0) + mean_distances[i] = np.mean(distance) + + threshold = diameter * percentage + score = (mean_distances < threshold).sum() / count + return score + + +def compute_adds_score(pts3d, diameter, pose_gt, pose_pred, percentage=0.1): + R_gt, t_gt = pose_gt + R_pred, t_pred = pose_pred + + count = R_gt.shape[0] + mean_distances = np.zeros((count,), dtype=np.float32) + for i in range(count): + if np.isnan(np.sum(t_pred[i])): + mean_distances[i] = np.inf + continue + pts_xformed_gt = R_gt[i] * pts3d.transpose() + t_gt[i] + pts_xformed_pred = R_pred[i] * pts3d.transpose() + t_pred[i] + kdt = spatial.KDTree(pts_xformed_gt.transpose(), metric='euclidean') + distance, _ = kdt.query(pts_xformed_pred.transpose(), k=1) + mean_distances[i] = np.mean(distance) + threshold = diameter * percentage + score = (mean_distances < threshold).sum() / count + return score + + +def compute_pose_error(diameter, pose_gt, pose_pred): + R_gt, t_gt = pose_gt + R_pred, t_pred = pose_pred + + count = R_gt.shape[0] + R_err = np.zeros(count) + t_err = np.zeros(count) + for i in range(count): + if np.isnan(np.sum(t_pred[i])): + continue + r_err = logm(np.dot(R_pred[i].transpose(), R_gt[i])) / 2 + R_err[i] = LA.norm(r_err, 'fro') + t_err[i] = LA.norm(t_pred[i] - t_gt[i]) + return np.median(R_err) * 180 / np.pi, np.median(t_err) / diameter + + +def cou_mask(mask_est, mask_gt): + """Complement over Union of 2D binary masks. + :param mask_est: hxw ndarray with the estimated mask. + :param mask_gt: hxw ndarray with the ground-truth mask. + :return: The calculated error. + """ + mask_est_bool = mask_est.astype(bool) + mask_gt_bool = mask_gt.astype(bool) + + inter = np.logical_and(mask_gt_bool, mask_est_bool) + union = np.logical_or(mask_gt_bool, mask_est_bool) + + union_count = float(union.sum()) + if union_count > 0: + e = 1.0 - inter.sum() / union_count + else: + e = 1.0 + return e + + +def find_nn_idx(pc_src, pc_target): + """ + pc_src: (N1, 3) array + pc_target: (N2, 3) array + """ + dist_sq = -np.dot(pc_src, pc_target.T) * 2 + \ + np.square(np.linalg.norm(pc_src, axis=-1, keepdims=True)) + \ + np.square(np.linalg.norm(pc_target.T, axis=0, keepdims=True)) + idx_min = np.argmin(dist_sq, axis=0) + return idx_min + + +def add_metric(pose_pred, pose_targets, obj_points, diameter, symm=False, percentage=0.1, gpu=False): + + diam = diameter * percentage + model_pred = np.dot(obj_points, pose_pred[:, :3].T) + pose_pred[:, 3] + model_targets = np.dot(obj_points, pose_targets[:, :3].T) + pose_targets[:, 3] + + if symm: + # if gpu: + # idxs = nn_utils.find_nearest_point_idx(model_pred, model_targets) + # else: + idxs = find_nn_idx(model_pred, model_targets) + mean_dist = np.mean(np.linalg.norm(model_pred[idxs] - model_targets, 2, 1)) + else: + mean_dist = np.mean(np.linalg.norm(model_pred - model_targets, axis=-1)) + + return mean_dist < diam, (mean_dist, diam) + + +def eval_pose(r_est, t_est, r_gt, t_gt, pc, k, diameter, sym=False): + add_res = add(r_est, t_est, r_gt, t_gt, pc) + is_add = add(r_est, t_est, r_gt, t_gt, pc) < diameter * 0.1 + proj_res = proj(r_est, t_est, r_gt, t_gt, k, pc) + is_proj = proj(r_est, t_est, r_gt, t_gt, k, pc) < 5 + adi_res = 0 + is_adi = False + if sym: + add_res = adi(r_est, t_est, r_gt, t_gt, pc) + is_add = adi(r_est, t_est, r_gt, t_gt, pc) < diameter * 0.1 + # print("add_res", add_res, "is_add", is_add) + + return add_res, is_add, proj_res, is_proj, adi_res, is_adi + + +def load_predicted_csv(fname): + df = pd.read_csv(fname) + info_list = df.to_dict("records") + return info_list + + +def parse_Rt_in_csv(_item): + return np.array([float(i) for i in _item.strip(" ").split(" ")]) + + +def load_gt(json_path, obj_ids): + with open(json_path, 'r') as f: + gt_data = json.load(f) + + filtered_gt_data = {} + for obj_id in obj_ids: + if obj_id in gt_data: + filtered_gt_data[obj_id] = gt_data[obj_id] + else: + print(f"Warning: {obj_id} not in gt_data") + return filtered_gt_data + + +if __name__ == '__main__': + ap = argparse.ArgumentParser() + ap.add_argument("--path_data", type=str, required=True) + ap.add_argument("--pred_path", type=str, required=True) + ap.add_argument("-cls_name", "--class_name", type=str, + default='kiwi1', + help="[apple2, apricot, banana1, kiwi1, lemon2, orange2, peach1, pear2]") + ap.add_argument("-sym", "--symmetry", type=bool, + default=False) + + args = vars(ap.parse_args()) + + class_name = args["class_name"] + symmetry = args["symmetry"] + path_data = args["path_data"] + pred_path = args["pred_path"] + + id2obj = { + 1: "apple2", + 2: "apricot", + 3: "banana1", + 4: "kiwi1", + 5: "lemon2", + 6: "orange2", + 7: "peach1", + 8: "pear2" + } + + obj_number = list(id2obj.keys())[list(id2obj.values()).index(class_name)] + + basePath = args["path_data"] + "/test/{:06d}/".format(obj_number - 1) + print(basePath) + + pc_path = '/gdrnpp_bop2022/datasets/BOP_DATASETS/fruitbin/models/obj_{:06d}.ply'.format(obj_number) + + plydata = PlyData.read(pc_path) + elm = plydata.elements + data = np.asarray(elm[0][:]) + pc = np.zeros((len(data), 3)) + print("len(data)", len(data)) + print("len(pc)", len(pc)) + + diameter = read_diameter(path_data, obj_number) + for i in range(len(data)): + pc[i][0], pc[i][1], pc[i][2] = data[i][0], data[i][1], data[i][2] + + + add_res_ls = [] + proj_res_ls = [] + count_add = 0 + count_iadd = 0 + count_proj = 0 + + # ============== Loading Pose =============== + preds_csv = load_predicted_csv(pred_path) + preds = {} + obj_ids = [] + for item in preds_csv: + if item["obj_id"] == obj_number: + im_key = "{}".format(item["im_id"]) + item["R"] = parse_Rt_in_csv(item["R"]).reshape(3, 3) + item["t"] = parse_Rt_in_csv(item["t"]).reshape(3, 1) + if im_key not in preds: + preds[im_key] = [] + preds[im_key].append(item) + obj_ids.append(str(item["im_id"])) + + length_data=len(preds) + print("number of evaluating data :", length_data) + + gt_data = load_gt(f"{basePath}scene_gt.json", obj_ids) + + for im_id, items in preds.items(): + gt_items = gt_data.get(im_id, []) + for item in items: + if gt_items: + item["gt_R"] = np.array(gt_items[0]["cam_R_m2c"]).reshape(3, 3) + item["gt_t"] = np.array(gt_items[0]["cam_t_m2c"]).reshape(3, 1) + + # ============== Evaluation =============== + + k = np.array([[543.25272224, 0., 320.25], + [0., 724.33696299, 240.33333333], + [0., 0., 1.]]) + + for im_id, items in preds.items(): + for item in items: + r_est, t_est = item["R"], item["t"] + r_gt, t_gt = item.get("gt_R"), item.get("gt_t") + + add_res, is_add, proj_res, is_proj, adi_res, is_adi = eval_pose(r_est, t_est, r_gt, t_gt, pc, k, diameter, symmetry) + if is_add: + count_add += 1 + # print("count_add", count_add, "/", length_data) + if is_proj: + count_proj += 1 + if is_adi: + count_iadd += 1 + + print("results for class : ", class_name) + print(f"ADD_Res: {count_add / length_data}") + print(f"ADI_Res: {count_iadd / length_data}") + print(f"Proj_Res: {count_proj / length_data}") + print(f"======================") + + print("Done") diff --git a/core/gdrn_modeling/tools/fruitbin/fruitbin_2_vis_poses.py b/core/gdrn_modeling/tools/fruitbin/fruitbin_2_vis_poses.py new file mode 100644 index 0000000000000000000000000000000000000000..6366f2bd01d1198af19fe7d03620d7bb4bc01426 --- /dev/null +++ b/core/gdrn_modeling/tools/fruitbin/fruitbin_2_vis_poses.py @@ -0,0 +1,219 @@ +import mmcv +import os.path as osp +import numpy as np +import sys +from tqdm import tqdm +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.structures import BoxMode +import torch +import pandas as pd + +cur_dir = osp.dirname(osp.abspath(__file__)) +sys.path.insert(0, osp.join(cur_dir, "../../../../")) + +from lib.vis_utils.colormap import colormap +from lib.utils.mask_utils import mask2bbox_xyxy, cocosegm2mask, get_edge +from core.utils.data_utils import crop_resize_by_warp_affine, read_image_mmcv +from core.gdrn_modeling.datasets.dataset_factory import register_datasets +from transforms3d.quaternions import quat2mat +from lib.egl_renderer.egl_renderer_v3 import EGLRenderer + + +out_size = 512 +score_thr = 0.3 +colors = colormap(rgb=False, maximum=255) + +# object info +id2obj = { + 1: "apple2", + 2: "apricot", + 3: "banana1", + 4: "kiwi1", + 5: "lemon2", + 6: "orange2", + 7: "peach1", + 8: "pear2" +} +objects = list(id2obj.values()) + + +def load_predicted_csv(fname): + df = pd.read_csv(fname) + info_list = df.to_dict("records") + return info_list + + +def parse_Rt_in_csv(_item): + return np.array([float(i) for i in _item.strip(" ").split(" ")]) + + +tensor_kwargs = {"device": torch.device("cuda"), "dtype": torch.float32} +image_tensor = torch.empty((out_size, out_size, 4), **tensor_kwargs).detach() +seg_tensor = torch.empty((out_size, out_size, 4), **tensor_kwargs).detach() +# image_tensor = torch.empty((480, 640, 4), **tensor_kwargs).detach() + +model_dir = "datasets/BOP_DATASETS/fruitbin/models/" + +model_paths = [osp.join(model_dir, f"obj_{obj_id:06d}.ply") for obj_id in id2obj] +texture_paths = [osp.join(model_dir, f"obj_{obj_id:06d}.png") for obj_id in id2obj] + +ren = EGLRenderer( + model_paths, + texture_paths=texture_paths, + vertex_scale=0.001, + use_cache=True, + width=out_size, + height=out_size, +) + +# NOTE: this is for fruitbin_bop_test +pred_path = "/gdrnpp_bop2022/output/output_camera_occ_03/fruitbin/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_fruitbin/inference_model_0128519/fruitbin_test/convnext-a6-AugCosyAAEGray-BG05-mlL1-DMask-amodalClipBox-classAware-fruitbin-test-iter0_fruitbin-test.csv" + +vis_dir = "/gdrnpp_bop2022/output/output_camera_occ_03/fruitbin/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_fruitbin/inference_/vis_gt_pred" +mmcv.mkdir_or_exist(vis_dir) + +print(pred_path) +preds_csv = load_predicted_csv(pred_path) +preds = {} +for item in preds_csv: + im_key = "{}/{}".format(item["scene_id"], item["im_id"]) + item["time"] = float(item["time"]) + item["score"] = float(item["score"]) + item["R"] = parse_Rt_in_csv(item["R"]).reshape(3, 3) + item["t"] = parse_Rt_in_csv(item["t"]) / 1000 + item["obj_name"] = id2obj[item["obj_id"]] + if im_key not in preds: + preds[im_key] = [] + preds[im_key].append(item) + +dataset_name = "fruitbin_test" +print(dataset_name) +register_datasets([dataset_name]) + +meta = MetadataCatalog.get(dataset_name) +print("MetadataCatalog: ", meta) +objs = meta.objs + +dset_dicts = DatasetCatalog.get(dataset_name) +for d in tqdm(dset_dicts): + K = d["cam"] + file_name = d["file_name"] + scene_im_id = d["scene_im_id"] + img = read_image_mmcv(file_name, format="BGR") + + scene_im_id_split = d["scene_im_id"].split("/") + scene_id = scene_im_id_split[0] + im_id = int(scene_im_id_split[1]) + + imH, imW = img.shape[:2] + annos = d["annotations"] + masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos] + fg_mask = sum(masks).astype("bool").astype("uint8") + minx, miny, maxx, maxy = mask2bbox_xyxy(fg_mask) + + bboxes = [anno["bbox"] for anno in annos] + bbox_modes = [anno["bbox_mode"] for anno in annos] + bboxes_xyxy = np.array( + [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)] + ) + + quats = [anno["quat"] for anno in annos] + transes = [anno["trans"] for anno in annos] + Rs = [quat2mat(quat) for quat in quats] + # 0-based label + cat_ids = [anno["category_id"] for anno in annos] + obj_names = [objs[cat_id] for cat_id in cat_ids] + + gt_Rs = [] + gt_ts = [] + gt_labels = [] + + for anno_i, anno in enumerate(annos): + obj_name = obj_names[anno_i] + gt_labels.append(objects.index(obj_name)) # 0-based label + + gt_Rs.append(Rs[anno_i]) + gt_ts.append(transes[anno_i]) + + if scene_im_id not in preds: + print(scene_im_id, "not detected") + continue + cur_preds = preds[scene_im_id] + kpts_2d_est = [] + est_Rs = [] + est_ts = [] + est_labels = [] + for pred_i, pred in enumerate(cur_preds): + try: + R_est = pred["R"] + t_est = pred["t"] + score = pred["score"] + obj_name = pred["obj_name"] + except: + continue + if score < score_thr: + continue + + est_Rs.append(R_est) + est_ts.append(t_est) + est_labels.append(objects.index(obj_name)) # 0-based label + + center = np.array([(minx + maxx) / 2, (miny + maxy) / 2]) + scale = max(maxx - minx, maxy - miny) * 1.5 # + 10 + crop_minx = max(0, center[0] - scale / 2) + crop_miny = max(0, center[1] - scale / 2) + crop_maxx = min(imW - 1, center[0] + scale / 2) + crop_maxy = min(imH - 1, center[1] + scale / 2) + scale = min(scale, min(crop_maxx - crop_minx, crop_maxy - crop_miny)) + + zoomed_im = crop_resize_by_warp_affine(img, center, scale, out_size) + im_zoom_gray = mmcv.bgr2gray(zoomed_im, keepdim=True) + im_zoom_gray_3 = np.concatenate([im_zoom_gray, im_zoom_gray, im_zoom_gray], axis=2) + # print(im_zoom_gray.shape) + K_zoom = K.copy() + K_zoom[0, 2] -= center[0] - scale / 2 + K_zoom[1, 2] -= center[1] - scale / 2 + K_zoom[0, :] *= out_size / scale + K_zoom[1, :] *= out_size / scale + + gt_poses = [np.hstack([_R, _t.reshape(3, 1)]) for _R, _t in zip(gt_Rs, gt_ts)] + est_poses = [np.hstack([_R, _t.reshape(3, 1)]) for _R, _t in zip(est_Rs, est_ts)] + + ren.render( + est_labels, + est_poses, + K=K_zoom, + image_tensor=image_tensor, + background=im_zoom_gray_3, + ) + ren_bgr = (image_tensor[:, :, :3].detach().cpu().numpy() + 0.5).astype("uint8") + + for gt_label, gt_pose in zip(gt_labels, gt_poses): + ren.render([gt_label], [gt_pose], K=K_zoom, seg_tensor=seg_tensor) + gt_mask = (seg_tensor[:, :, 0].detach().cpu().numpy() > 0).astype("uint8") + gt_edge = get_edge(gt_mask, bw=3, out_channel=1) + ren_bgr[gt_edge != 0] = np.array(mmcv.color_val("blue")) + + for est_label, est_pose in zip(est_labels, est_poses): + ren.render([est_label], [est_pose], K=K_zoom, seg_tensor=seg_tensor) + est_mask = (seg_tensor[:, :, 0].detach().cpu().numpy() > 0).astype("uint8") + est_edge = get_edge(est_mask, bw=3, out_channel=1) + ren_bgr[est_edge != 0] = np.array(mmcv.color_val("green")) + + vis_im = ren_bgr + + # vis_im_add = (im_zoom_gray_3 * 0.3 + ren_bgr * 0.7).astype("uint8") + + save_path_0 = osp.join(vis_dir, "{}_{:06d}_vis0.png".format(scene_id, im_id)) + mmcv.imwrite(zoomed_im, save_path_0) + + save_path_1 = osp.join(vis_dir, "{}_{:06d}_vis1.png".format(scene_id, im_id)) + mmcv.imwrite(vis_im, save_path_1) + + # if True: + # # grid_show([zoomed_im[:, :, ::-1], vis_im[:, :, ::-1]], ["im", "est"], row=1, col=2) + # # im_show = cv2.hconcat([zoomed_im, vis_im, vis_im_add]) + # im_show = cv2.hconcat([zoomed_im, vis_im]) + # cv2.imshow("im_est", im_show) + # if cv2.waitKey(0) == 27: + # break # esc to quit diff --git a/core/gdrn_modeling/tools/fruitbin/fruitbin_3_vis_poses_full.py b/core/gdrn_modeling/tools/fruitbin/fruitbin_3_vis_poses_full.py new file mode 100644 index 0000000000000000000000000000000000000000..4180535a71e86a7748d4636e3fd0ddbe772295e0 --- /dev/null +++ b/core/gdrn_modeling/tools/fruitbin/fruitbin_3_vis_poses_full.py @@ -0,0 +1,205 @@ +import mmcv +import os.path as osp +import numpy as np +import sys +from tqdm import tqdm +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.structures import BoxMode +import torch +import pandas as pd +# import json + +cur_dir = osp.dirname(osp.abspath(__file__)) +sys.path.insert(0, osp.join(cur_dir, "../../../../")) + +from lib.vis_utils.colormap import colormap +from lib.utils.mask_utils import mask2bbox_xyxy, cocosegm2mask, get_edge +from core.utils.data_utils import read_image_mmcv +from core.gdrn_modeling.datasets.dataset_factory import register_datasets +from transforms3d.quaternions import quat2mat +from lib.egl_renderer.egl_renderer_v3 import EGLRenderer + + +score_thr = 0.3 +colors = colormap(rgb=False, maximum=255) + +# object info +id2obj = { + 1: "apple2", + 2: "apricot", + 3: "banana1", + 4: "kiwi1", + 5: "lemon2", + 6: "orange2", + 7: "peach1", + 8: "pear2", +} +objects = list(id2obj.values()) + + +def load_predicted_csv(fname): + df = pd.read_csv(fname) + info_list = df.to_dict("records") + return info_list + + +def parse_Rt_in_csv(_item): + return np.array([float(i) for i in _item.strip(" ").split(" ")]) + + +width = 640 +height = 480 + +tensor_kwargs = {"device": torch.device("cuda"), "dtype": torch.float32} +image_tensor = torch.empty((height, width, 4), **tensor_kwargs).detach() +seg_tensor = torch.empty((height, width, 4), **tensor_kwargs).detach() +# image_tensor = torch.empty((480, 640, 4), **tensor_kwargs).detach() + +model_dir = "datasets/BOP_DATASETS/fruitbin/models/" + +model_paths = [osp.join(model_dir, f"obj_{obj_id:06d}.ply") for obj_id in id2obj] +texture_paths = [osp.join(model_dir, f"obj_{obj_id:06d}.png") for obj_id in id2obj] + +ren = EGLRenderer( + model_paths, + texture_paths=texture_paths, + vertex_scale=0.001, + use_cache=True, + width=width, + height=height, +) + +# NOTE: this is for fruitbin_bop_test +pred_path = "/gdrnpp_bop2022/output/output_camera_occ_03/fruitbin/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_fruitbin/inference_model_0128519/fruitbin_test/convnext-a6-AugCosyAAEGray-BG05-mlL1-DMask-amodalClipBox-classAware-fruitbin-test-iter0_fruitbin-test.csv" + +vis_dir = "/gdrnpp_bop2022/output/output_camera_occ_03/fruitbin/convnext_a6_AugCosyAAEGray_BG05_mlL1_DMask_amodalClipBox_classAware_fruitbin/inference_/vis_gt_pred" +mmcv.mkdir_or_exist(vis_dir) + +print(pred_path) +preds_csv = load_predicted_csv(pred_path) +preds = {} +for item in preds_csv: + im_key = "{}/{}".format(item["scene_id"], item["im_id"]) + item["time"] = float(item["time"]) + item["score"] = float(item["score"]) + item["R"] = parse_Rt_in_csv(item["R"]).reshape(3, 3) + item["t"] = parse_Rt_in_csv(item["t"]) / 1000 + item["obj_name"] = id2obj[item["obj_id"]] + if im_key not in preds: + preds[im_key] = [] + preds[im_key].append(item) + +dataset_name = "fruitbin_test" +print(dataset_name) +register_datasets([dataset_name]) + +meta = MetadataCatalog.get(dataset_name) +print("MetadataCatalog: ", meta) +objs = meta.objs + +dset_dicts = DatasetCatalog.get(dataset_name) +for d in tqdm(dset_dicts): + K = d["cam"] + file_name = d["file_name"] + scene_im_id = d["scene_im_id"] + img = read_image_mmcv(file_name, format="BGR") + + scene_im_id_split = d["scene_im_id"].split("/") + scene_id = scene_im_id_split[0] + im_id = int(scene_im_id_split[1]) + + imH, imW = img.shape[:2] + annos = d["annotations"] + masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos] + fg_mask = sum(masks).astype("bool").astype("uint8") + minx, miny, maxx, maxy = mask2bbox_xyxy(fg_mask) + + bboxes = [anno["bbox"] for anno in annos] + bbox_modes = [anno["bbox_mode"] for anno in annos] + bboxes_xyxy = np.array( + [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)] + ) + + quats = [anno["quat"] for anno in annos] + transes = [anno["trans"] for anno in annos] + Rs = [quat2mat(quat) for quat in quats] + # 0-based label + cat_ids = [anno["category_id"] for anno in annos] + obj_names = [objs[cat_id] for cat_id in cat_ids] + + gt_Rs = [] + gt_ts = [] + gt_labels = [] + + for anno_i, anno in enumerate(annos): + obj_name = obj_names[anno_i] + gt_labels.append(objects.index(obj_name)) # 0-based label + + gt_Rs.append(Rs[anno_i]) + gt_ts.append(transes[anno_i]) + if scene_im_id not in preds: + print(scene_im_id, "not detected") + continue + cur_preds = preds[scene_im_id] + kpts_2d_est = [] + est_Rs = [] + est_ts = [] + est_labels = [] + for pred_i, pred in enumerate(cur_preds): + try: + R_est = pred["R"] + t_est = pred["t"] + score = pred["score"] + obj_name = pred["obj_name"] + except: + continue + if score < score_thr: + continue + + est_Rs.append(R_est) + est_ts.append(t_est) + est_labels.append(objects.index(obj_name)) # 0-based label + + im_gray = mmcv.bgr2gray(img, keepdim=True) + im_gray_3 = np.concatenate([im_gray, im_gray, im_gray], axis=2) + + gt_poses = [np.hstack([_R, _t.reshape(3, 1)]) for _R, _t in zip(gt_Rs, gt_ts)] + est_poses = [np.hstack([_R, _t.reshape(3, 1)]) for _R, _t in zip(est_Rs, est_ts)] + + ren.render( + est_labels, + est_poses, + K=K, + image_tensor=image_tensor, + background=im_gray_3, + ) + ren_bgr = (image_tensor[:, :, :3].detach().cpu().numpy() + 0.5).astype("uint8") + + for gt_label, gt_pose in zip(gt_labels, gt_poses): + ren.render([gt_label], [gt_pose], K=K, seg_tensor=seg_tensor) + gt_mask = (seg_tensor[:, :, 0].detach().cpu().numpy() > 0).astype("uint8") + gt_edge = get_edge(gt_mask, bw=3, out_channel=1) + ren_bgr[gt_edge != 0] = np.array(mmcv.color_val("blue")) + + for est_label, est_pose in zip(est_labels, est_poses): + ren.render([est_label], [est_pose], K=K, seg_tensor=seg_tensor) + est_mask = (seg_tensor[:, :, 0].detach().cpu().numpy() > 0).astype("uint8") + est_edge = get_edge(est_mask, bw=3, out_channel=1) + ren_bgr[est_edge != 0] = np.array(mmcv.color_val("green")) + + vis_im = ren_bgr + + save_path_0 = osp.join(vis_dir, "{}_{:06d}_vis0.png".format(scene_id, im_id)) + mmcv.imwrite(img, save_path_0) + + save_path_1 = osp.join(vis_dir, "{}_{:06d}_vis1.png".format(scene_id, im_id)) + mmcv.imwrite(vis_im, save_path_1) + + # if True: + # # grid_show([img[:, :, ::-1], vis_im[:, :, ::-1]], ["im", "est"], row=1, col=2) + # # im_show = cv2.hconcat([img, vis_im, vis_im_add]) + # im_show = cv2.hconcat([img, vis_im]) + # cv2.imshow("im_est", im_show) + # if cv2.waitKey(0) == 27: + # break # esc to quit +# ffmpeg -r 5 -f image2 -s 1920x1080 -pattern_type glob -i "./ycbv_vis_gt_pred_full_video/*.png" -vcodec libx264 -crf 25 -pix_fmt yuv420p ycbv_vis_video.mp4 diff --git a/core/utils/dataset_utils.py b/core/utils/dataset_utils.py index decc0ace37ea290fcb591af3253f789910cb18ea..56caa5f2645336bd2fa5777702cc527fabca4d9d 100644 --- a/core/utils/dataset_utils.py +++ b/core/utils/dataset_utils.py @@ -194,7 +194,7 @@ def load_detections_into_dataset( obj_annotations = {obj: [] for obj in objs} for det in dets_i: obj_id = det["obj_id"] - bbox_est = det["bbox_est"] # xywh + bbox = det["bbox"] # xywh time = det.get("time", 0.0) score = det.get("score", 1.0) if score < score_thr: @@ -211,7 +211,7 @@ def load_detections_into_dataset( label = objs.index(obj_name) inst = { "category_id": label, - "bbox_est": bbox_est, + "bbox": bbox, "bbox_mode": BoxMode.XYWH_ABS, "score": score, "time": time, @@ -292,7 +292,7 @@ def load_init_poses_into_dataset( obj_id = det["obj_id"] # NOTE: need to prepare init poses into this format pose_est = np.array(det["pose_est"], dtype=np.float32).reshape(3, 4) - bbox_est = det.get("bbox_est", None) # xywh or None + bbox = det.get("bbox", None) # xywh or None time = det.get("time", 0.0) score = det.get("score", 1.0) if score < score_thr: @@ -314,8 +314,8 @@ def load_init_poses_into_dataset( "time": time, "model_info": models_info[str(obj_id)], # TODO: maybe just load this in the main function } - if bbox_est is not None: # if None, compute bboxes from poses and 3D points later - inst["bbox_est"] = bbox_est + if bbox is not None: # if None, compute bboxes from poses and 3D points later + inst["bbox"] = bbox inst["bbox_mode"] = BoxMode.XYWH_ABS obj_annotations[obj_name].append(inst) for obj, cur_annos in obj_annotations.items(): @@ -400,7 +400,7 @@ def load_catre_init_into_dataset( "time": time, } if with_bboxes: - inst["bbox_est"] = det["bbox_est"] + inst["bbox"] = det["bbox"] inst["bbox_mode"] = BoxMode.XYXY_ABS if with_masks: inst["segmentation"] = det["segmentation"] # overwrite gt masks diff --git a/det/yolox/data/datasets/fruitbin_bop_test.py b/det/yolox/data/datasets/fruitbin_bop_test.py index fd1abeb1fbabe1226b92a1f8c98e785e33064b8d..b0996daf24ba19d2f20cafff2d5f80acda3e6746 100644 --- a/det/yolox/data/datasets/fruitbin_bop_test.py +++ b/det/yolox/data/datasets/fruitbin_bop_test.py @@ -127,10 +127,10 @@ class FRUITBIN_BOP_TEST_Dataset: for scene_id, im_id in tqdm(scene_im_ids): str_im_id = str(im_id) scene_root = osp.join(self.dataset_root, f"{scene_id:06d}") - rgb_path = osp.join(scene_root, "rgb/{:d}.png").format(im_id) + rgb_path = osp.join(scene_root, "rgb/{:06d}.png").format(im_id) assert osp.exists(rgb_path), rgb_path - depth_path = osp.join(scene_root, "depth/{:d}.png".format(im_id)) + depth_path = osp.join(scene_root, "depth/{:06d}.png".format(im_id)) scene_id = int(rgb_path.split("/")[-3]) @@ -177,7 +177,7 @@ class FRUITBIN_BOP_TEST_Dataset: # ) mask_visib_file = osp.join( scene_root, - "mask_visib/{:d}.png".format(im_id, anno_i), + "mask_visib/{:06d}.png".format(im_id, anno_i), ) # assert osp.exists(mask_file), mask_file assert osp.exists(mask_visib_file), mask_visib_file @@ -196,7 +196,7 @@ class FRUITBIN_BOP_TEST_Dataset: inst = { "category_id": cur_label, # 0-based label - "bbox": bbox_obj, # TODO: load both bbox_obj and bbox_visib + "bbox": bbox_visib, # TODO: load both bbox_obj and bbox_visib "bbox_mode": BoxMode.XYWH_ABS, "pose": pose, "quat": quat, diff --git a/det/yolox/data/datasets/fruitbin_pbr.py b/det/yolox/data/datasets/fruitbin_pbr.py index 93bda782a1f716bf4243cf7f6206d01efd84aa93..eb30293592bc141d633498a5c9e24cacdf334e55 100644 --- a/det/yolox/data/datasets/fruitbin_pbr.py +++ b/det/yolox/data/datasets/fruitbin_pbr.py @@ -43,7 +43,7 @@ class FRUITBIN_PBR_Dataset: ) self.xyz_root = data_cfg.get("xyz_root", osp.join(self.dataset_root, "xyz_crop")) assert osp.exists(self.dataset_root), self.dataset_root - self.models_root = data_cfg["models_root"] # BOP_DATASETS/ycbv/models + self.models_root = data_cfg["models_root"] # BOP_DATASETS/fruitbin/models self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001 self.with_masks = data_cfg["with_masks"] @@ -114,10 +114,10 @@ class FRUITBIN_PBR_Dataset: for str_im_id in tqdm(gt_dict, postfix=f"{scene_id}"): int_im_id = int(str_im_id) - rgb_path = osp.join(scene_root, "rgb/{:d}.png").format(int_im_id) + rgb_path = osp.join(scene_root, "rgb/{:06d}.png").format(int_im_id) assert osp.exists(rgb_path), rgb_path - depth_path = osp.join(scene_root, "depth/{:d}.png".format(int_im_id)) + depth_path = osp.join(scene_root, "depth/{:06d}.png".format(int_im_id)) scene_im_id = f"{scene_id}/{int_im_id}" @@ -164,7 +164,7 @@ class FRUITBIN_PBR_Dataset: ) mask_visib_file = osp.join( scene_root, - "mask_visib/{:d}.png".format(int_im_id), + "mask_visib/{:06d}.png".format(int_im_id), ) # assert osp.exists(mask_file), mask_file assert osp.exists(mask_visib_file), mask_visib_file diff --git a/lib/egl_renderer/egl_renderer_v3.py b/lib/egl_renderer/egl_renderer_v3.py index 1f36e09a6dec4343cee05a991089728e05014e89..50a699b1258bd6ef98264ec41bd6879c3ab8890f 100644 --- a/lib/egl_renderer/egl_renderer_v3.py +++ b/lib/egl_renderer/egl_renderer_v3.py @@ -560,7 +560,7 @@ class EGLRenderer(object): obj_path, vertex_scale=vertex_scale, is_textured=is_textured, - use_cache=self.use_cache, + use_cache=False, cad_model_color=cad_model_color, ) is_cad = mesh["is_cad"] diff --git a/lib/pysixd/scripts/eval_bop19.py b/lib/pysixd/scripts/eval_bop19.py index 58d7f25422b7b7653eb4495a95787e79ca77918b..cccebad1597a3f8218116cf458341e21e8bbd908 100644 --- a/lib/pysixd/scripts/eval_bop19.py +++ b/lib/pysixd/scripts/eval_bop19.py @@ -34,6 +34,7 @@ p = { "tudl": 15, "tyol": 15, "ycbv": 15, + "fruitbin": 15, "hope": 15, }, "vsd_taus": list(np.arange(0.05, 0.51, 0.05)), diff --git a/lib/pysixd/scripts/eval_calc_errors.py b/lib/pysixd/scripts/eval_calc_errors.py index 987fcdf9053b2dcd68a43283628d888bb03952f5..40642527abfe4b08c50c10616a947d4757ba52bc 100644 --- a/lib/pysixd/scripts/eval_calc_errors.py +++ b/lib/pysixd/scripts/eval_calc_errors.py @@ -47,6 +47,7 @@ p = { "tudl": 15, "tyol": 15, "ycbv": 15, + "fruitbin": 15, "hope": 15, }, "vsd_taus": list(np.arange(0.05, 0.51, 0.05)), @@ -298,7 +299,7 @@ for result_filename in p["result_filenames"]: # Load the depth image if VSD is selected as the pose error function. depth_im = None - if p["error_type"] == "vsd": + if p["error_type"] == "vsd": depth_path = dp_split["depth_tpath"].format(scene_id=scene_id, im_id=im_id) depth_im = inout.load_depth(depth_path) depth_im *= scene_camera[im_id]["depth_scale"] # Convert to [mm]. diff --git a/lib/pysixd/scripts/eval_calc_scores.py b/lib/pysixd/scripts/eval_calc_scores.py index 0a02b16bff9f7709c0fe979a8ce38b3a849628e4..9241bb5df47fd1159b881254695d335b0c2929d1 100644 --- a/lib/pysixd/scripts/eval_calc_scores.py +++ b/lib/pysixd/scripts/eval_calc_scores.py @@ -209,33 +209,35 @@ for error_dir_path in p["error_dir_paths"]: for im_id, im_targets in scene_targets.items(): scene_gt_curr[im_id] = scene_gt[im_id] - # Determine which GT poses are valid. - im_gt = scene_gt[im_id] - im_gt_info = scene_gt_info[im_id] - scene_gt_valid[im_id] = [True] * len(im_gt) - if p["visib_gt_min"] >= 0: - # All GT poses visible from at least 100 * p['visib_gt_min'] percent - # are considered valid. - for gt_id, gt in enumerate(im_gt): - is_target = gt["obj_id"] in im_targets.keys() - is_visib = im_gt_info[gt_id]["visib_fract"] >= p["visib_gt_min"] - scene_gt_valid[im_id][gt_id] = is_target and is_visib - else: - # k most visible GT poses are considered valid, where k is given by - # the "inst_count" item loaded from "targets_filename". - gt_ids_sorted = sorted( - range(len(im_gt)), - key=lambda gt_id: im_gt_info[gt_id]["visib_fract"], - reverse=True, - ) - to_add = {obj_id: trg["inst_count"] for obj_id, trg in im_targets.items()} - for gt_id in gt_ids_sorted: - obj_id = im_gt[gt_id]["obj_id"] - if obj_id in to_add.keys() and to_add[obj_id] > 0: - scene_gt_valid[im_id][gt_id] = True - to_add[obj_id] -= 1 - else: - scene_gt_valid[im_id][gt_id] = False + scene_gt_valid[im_id] = [True] * len(scene_gt[im_id]) + + # # Determine which GT poses are valid. + # im_gt = scene_gt[im_id] + # im_gt_info = scene_gt_info[im_id] + # scene_gt_valid[im_id] = [True] * len(im_gt) + # if p["visib_gt_min"] >= 0: + # # All GT poses visible from at least 100 * p['visib_gt_min'] percent + # # are considered valid. + # for gt_id, gt in enumerate(im_gt): + # is_target = gt["obj_id"] in im_targets.keys() + # is_visib = im_gt_info[gt_id]["visib_fract"] >= p["visib_gt_min"] + # scene_gt_valid[im_id][gt_id] = is_target and is_visib + # else: + # # k most visible GT poses are considered valid, where k is given by + # # the "inst_count" item loaded from "targets_filename". + # gt_ids_sorted = sorted( + # range(len(im_gt)), + # key=lambda gt_id: im_gt_info[gt_id]["visib_fract"], + # reverse=True, + # ) + # to_add = {obj_id: trg["inst_count"] for obj_id, trg in im_targets.items()} + # for gt_id in gt_ids_sorted: + # obj_id = im_gt[gt_id]["obj_id"] + # if obj_id in to_add.keys() and to_add[obj_id] > 0: + # scene_gt_valid[im_id][gt_id] = True + # to_add[obj_id] -= 1 + # else: + # scene_gt_valid[im_id][gt_id] = False # Load pre-calculated errors of the pose estimates w.r.t. the GT poses. scene_errs_path = p["error_tpath"].format( diff --git a/lib/pysixd/scripts/eval_pose_results_more.py b/lib/pysixd/scripts/eval_pose_results_more.py index 421eecc74556a0ec8394fe756ba996c9a7fb4d8d..8afe8a69d49a7981bd219811bedc2cb081052ee9 100644 --- a/lib/pysixd/scripts/eval_pose_results_more.py +++ b/lib/pysixd/scripts/eval_pose_results_more.py @@ -55,6 +55,7 @@ p = { "tudl": 15, "tyol": 15, "ycbv": 15, + "fruitbin": 15, "hope": 15, }, "vsd_taus": list(np.arange(0.05, 0.51, 0.05)), diff --git a/lib/pysixd/scripts/show_performance_bop19.py b/lib/pysixd/scripts/show_performance_bop19.py index 677e92fc799b24ab3b9a92f72a498cad44edd848..229becd74e56f53c0fa8761fd90c408c53302957 100644 --- a/lib/pysixd/scripts/show_performance_bop19.py +++ b/lib/pysixd/scripts/show_performance_bop19.py @@ -35,6 +35,7 @@ p = { "tudl": 15, "tyol": 15, "ycbv": 15, + "fruitbin": 15, "hope": 15, }, "vsd_taus": list(np.arange(0.05, 0.51, 0.05)),