diff --git a/include/neural-graphics-primitives/common.h b/include/neural-graphics-primitives/common.h index 30c59cd9478dbdcb6339d07ce43a572e5e471298..cf2fcb1b052cd715bba433709b9ac4f5ac9c0fbe 100644 --- a/include/neural-graphics-primitives/common.h +++ b/include/neural-graphics-primitives/common.h @@ -192,7 +192,9 @@ enum class ELensMode : int { OpenCV, FTheta, LatLong, + OpenCVFisheye, }; +static constexpr const char* LensModeStr = "Perspective\0OpenCV\0F-Theta\0LatLong\0OpenCV Fisheye\0\0"; struct Lens { ELensMode mode = ELensMode::Perspective; diff --git a/include/neural-graphics-primitives/common_device.cuh b/include/neural-graphics-primitives/common_device.cuh index a5d3c29e103691d711b87167a7dd7ade71ecf923..8cf8c42f570ea4fabb678b0f999dae9c0616d149 100644 --- a/include/neural-graphics-primitives/common_device.cuh +++ b/include/neural-graphics-primitives/common_device.cuh @@ -143,7 +143,7 @@ __device__ void deposit_image_gradient(const Eigen::Matrix<float, N_DIMS, 1>& va } template <typename T> -__device__ __host__ inline void apply_opencv_lens_distortion(const T* extra_params, const T u, const T v, T* du, T* dv) { +NGP_HOST_DEVICE inline void opencv_lens_distortion_delta(const T* extra_params, const T u, const T v, T* du, T* dv) { const T k1 = extra_params[0]; const T k2 = extra_params[1]; const T p1 = extra_params[2]; @@ -159,7 +159,32 @@ __device__ __host__ inline void apply_opencv_lens_distortion(const T* extra_para } template <typename T> -__device__ __host__ inline void iterative_opencv_lens_undistortion(const T* params, T* u, T* v) { +NGP_HOST_DEVICE inline void opencv_fisheye_lens_distortion_delta(const T* extra_params, const T u, const T v, T* du, T* dv) { + const T k1 = extra_params[0]; + const T k2 = extra_params[1]; + const T k3 = extra_params[2]; + const T k4 = extra_params[3]; + + const T r = std::sqrt(u * u + v * v); + + if (r > T(std::numeric_limits<double>::epsilon())) { + const T theta = std::atan(r); + const T theta2 = theta * theta; + const T theta4 = theta2 * theta2; + const T theta6 = theta4 * theta2; + const T theta8 = theta4 * theta4; + const T thetad = + theta * (T(1) + k1 * theta2 + k2 * theta4 + k3 * theta6 + k4 * theta8); + *du = u * thetad / r - u; + *dv = v * thetad / r - v; + } else { + *du = T(0); + *dv = T(0); + } +} + +template <typename T, typename F> +NGP_HOST_DEVICE inline void iterative_lens_undistortion(const T* params, T* u, T* v, F distortion_fun) { // Parameters for Newton iteration using numerical differentiation with // central differences, 100 iterations should be enough even for complex // camera models with higher order terms. @@ -179,11 +204,11 @@ __device__ __host__ inline void iterative_opencv_lens_undistortion(const T* para for (uint32_t i = 0; i < kNumIterations; ++i) { const float step0 = std::max(std::numeric_limits<float>::epsilon(), std::abs(kRelStepSize * x(0))); const float step1 = std::max(std::numeric_limits<float>::epsilon(), std::abs(kRelStepSize * x(1))); - apply_opencv_lens_distortion(params, x(0), x(1), &dx(0), &dx(1)); - apply_opencv_lens_distortion(params, x(0) - step0, x(1), &dx_0b(0), &dx_0b(1)); - apply_opencv_lens_distortion(params, x(0) + step0, x(1), &dx_0f(0), &dx_0f(1)); - apply_opencv_lens_distortion(params, x(0), x(1) - step1, &dx_1b(0), &dx_1b(1)); - apply_opencv_lens_distortion(params, x(0), x(1) + step1, &dx_1f(0), &dx_1f(1)); + distortion_fun(params, x(0), x(1), &dx(0), &dx(1)); + distortion_fun(params, x(0) - step0, x(1), &dx_0b(0), &dx_0b(1)); + distortion_fun(params, x(0) + step0, x(1), &dx_0f(0), &dx_0f(1)); + distortion_fun(params, x(0), x(1) - step1, &dx_1b(0), &dx_1b(1)); + distortion_fun(params, x(0), x(1) + step1, &dx_1f(0), &dx_1f(1)); J(0, 0) = 1 + (dx_0f(0) - dx_0b(0)) / (2 * step0); J(0, 1) = (dx_1f(0) - dx_1b(0)) / (2 * step1); J(1, 0) = (dx_0f(1) - dx_0b(1)) / (2 * step0); @@ -199,6 +224,16 @@ __device__ __host__ inline void iterative_opencv_lens_undistortion(const T* para *v = x(1); } +template <typename T> +NGP_HOST_DEVICE inline void iterative_opencv_lens_undistortion(const T* params, T* u, T* v) { + iterative_lens_undistortion(params, u, v, opencv_lens_distortion_delta<T>); +} + +template <typename T> +NGP_HOST_DEVICE inline void iterative_opencv_fisheye_lens_undistortion(const T* params, T* u, T* v) { + iterative_lens_undistortion(params, u, v, opencv_fisheye_lens_distortion_delta<T>); +} + inline NGP_HOST_DEVICE Ray pixel_to_ray_pinhole( uint32_t spp, const Eigen::Vector2i& pixel, @@ -290,8 +325,11 @@ inline NGP_HOST_DEVICE Ray pixel_to_ray( (uv.y() - screen_center.y()) * (float)resolution.y() / focal_length.y(), 1.0f }; + if (lens.mode == ELensMode::OpenCV) { iterative_opencv_lens_undistortion(lens.params, &dir.x(), &dir.y()); + } else if (lens.mode == ELensMode::OpenCVFisheye) { + iterative_opencv_fisheye_lens_undistortion(lens.params, &dir.x(), &dir.y()); } } if (distortion_grid) { @@ -334,21 +372,21 @@ inline NGP_HOST_DEVICE Eigen::Vector2f pos_to_pixel( dir /= dir.z(); dir += head_pos * parallax_shift.z(); + float du = 0.0f, dv = 0.0f; if (lens.mode == ELensMode::OpenCV) { - float du, dv; - apply_opencv_lens_distortion(lens.params, dir.x(), dir.y(), &du, &dv); - dir.x() += du; - dir.y() += dv; - } else if (lens.mode == ELensMode::FTheta) { - assert(false); - } else if (lens.mode == ELensMode::LatLong) { - assert(false); + opencv_lens_distortion_delta(lens.params, dir.x(), dir.y(), &du, &dv); + } else if (lens.mode == ELensMode::OpenCVFisheye) { + opencv_fisheye_lens_distortion_delta(lens.params, dir.x(), dir.y(), &du, &dv); + } else { + // No other type of distortion is permitted. + assert(lens.mode == ELensMode::Perspective); } - return { - dir.x() * focal_length.x() + screen_center.x() * resolution.x(), - dir.y() * focal_length.y() + screen_center.y() * resolution.y(), - }; + dir.x() += du; + dir.y() += dv; + + Eigen::Vector2f uv = Eigen::Vector2f{dir.x(), dir.y()}.cwiseProduct(focal_length).cwiseQuotient(resolution.cast<float>()) + screen_center; + return uv.cwiseProduct(resolution.cast<float>()); } inline NGP_HOST_DEVICE Eigen::Vector2f motion_vector_3d( diff --git a/include/neural-graphics-primitives/json_binding.h b/include/neural-graphics-primitives/json_binding.h index bd512906d7f455566260e44eccc75f565f9cb0b6..9f04a71d37a4eb30c8ad42f7f1b1f3dca9bad60f 100644 --- a/include/neural-graphics-primitives/json_binding.h +++ b/include/neural-graphics-primitives/json_binding.h @@ -82,10 +82,17 @@ inline void from_json(const nlohmann::json& j, BoundingBox& box) { inline void to_json(nlohmann::json& j, const Lens& lens) { if (lens.mode == ELensMode::OpenCV) { + j["is_fisheye"] = false; j["k1"] = lens.params[0]; j["k2"] = lens.params[1]; j["p1"] = lens.params[2]; j["p2"] = lens.params[3]; + } else if (lens.mode == ELensMode::OpenCVFisheye) { + j["is_fisheye"] = true; + j["k1"] = lens.params[0]; + j["k2"] = lens.params[1]; + j["k3"] = lens.params[2]; + j["k4"] = lens.params[3]; } else if (lens.mode == ELensMode::FTheta) { j["ftheta_p0"] = lens.params[0]; j["ftheta_p1"] = lens.params[1]; @@ -99,11 +106,19 @@ inline void to_json(nlohmann::json& j, const Lens& lens) { inline void from_json(const nlohmann::json& j, Lens& lens) { if (j.contains("k1")) { - lens.mode = ELensMode::OpenCV; - lens.params[0] = j.at("k1"); - lens.params[1] = j.at("k2"); - lens.params[2] = j.at("p1"); - lens.params[3] = j.at("p2"); + if (j.value("is_fisheye", false)) { + lens.mode = ELensMode::OpenCVFisheye; + lens.params[0] = j.at("k1"); + lens.params[1] = j.at("k2"); + lens.params[2] = j.at("k3"); + lens.params[3] = j.at("k4"); + } else { + lens.mode = ELensMode::OpenCV; + lens.params[0] = j.at("k1"); + lens.params[1] = j.at("k2"); + lens.params[2] = j.at("p1"); + lens.params[3] = j.at("p2"); + } } else if (j.contains("ftheta_p0")) { lens.mode = ELensMode::FTheta; lens.params[0] = j.at("ftheta_p0"); diff --git a/include/neural-graphics-primitives/testbed.h b/include/neural-graphics-primitives/testbed.h index 1efa2b27040b34160fe207f48ce235d7e1726195..d0ba0a6ee09f65840693fe6f24a5802530ffc00e 100644 --- a/include/neural-graphics-primitives/testbed.h +++ b/include/neural-graphics-primitives/testbed.h @@ -620,7 +620,7 @@ public: tcnn::GPUMemory<float> sharpness_grid; - void set_camera_intrinsics(int frame_idx, float fx, float fy = 0.0f, float cx = -0.5f, float cy = -0.5f, float k1 = 0.0f, float k2 = 0.0f, float p1 = 0.0f, float p2 = 0.0f); + void set_camera_intrinsics(int frame_idx, float fx, float fy = 0.0f, float cx = -0.5f, float cy = -0.5f, float k1 = 0.0f, float k2 = 0.0f, float p1 = 0.0f, float p2 = 0.0f, float k3 = 0.0f, float k4 = 0.0f, bool is_fisheye = false); void set_camera_extrinsics_rolling_shutter(int frame_idx, Eigen::Matrix<float, 3, 4> camera_to_world_start, Eigen::Matrix<float, 3, 4> camera_to_world_end, const Eigen::Vector4f& rolling_shutter, bool convert_to_ngp = true); void set_camera_extrinsics(int frame_idx, Eigen::Matrix<float, 3, 4> camera_to_world, bool convert_to_ngp = true); Eigen::Matrix<float, 3, 4> get_camera_extrinsics(int frame_idx); diff --git a/scripts/colmap2nerf.py b/scripts/colmap2nerf.py index d015f5e1d1030484bc54bb9ae4c4f267ced4012f..943373c0fccb4e609ce9a19c5c8402f08fc4432b 100755 --- a/scripts/colmap2nerf.py +++ b/scripts/colmap2nerf.py @@ -21,23 +21,23 @@ import os import shutil def parse_args(): - parser = argparse.ArgumentParser(description="convert a text colmap export to nerf format transforms.json; optionally convert video to images, and optionally run colmap in the first place") + parser = argparse.ArgumentParser(description="Convert a text colmap export to nerf format transforms.json; optionally convert video to images, and optionally run colmap in the first place.") - parser.add_argument("--video_in", default="", help="run ffmpeg first to convert a provided video file into a set of images. uses the video_fps parameter also") + parser.add_argument("--video_in", default="", help="Run ffmpeg first to convert a provided video file into a set of images. Uses the video_fps parameter also.") parser.add_argument("--video_fps", default=2) - parser.add_argument("--time_slice", default="", help="time (in seconds) in the format t1,t2 within which the images should be generated from the video. eg: \"--time_slice '10,300'\" will generate images only from 10th second to 300th second of the video") + parser.add_argument("--time_slice", default="", help="Time (in seconds) in the format t1,t2 within which the images should be generated from the video. E.g.: \"--time_slice '10,300'\" will generate images only from 10th second to 300th second of the video.") parser.add_argument("--run_colmap", action="store_true", help="run colmap first on the image folder") - parser.add_argument("--colmap_matcher", default="sequential", choices=["exhaustive","sequential","spatial","transitive","vocab_tree"], help="select which matcher colmap should use. sequential for videos, exhaustive for adhoc images") + parser.add_argument("--colmap_matcher", default="sequential", choices=["exhaustive","sequential","spatial","transitive","vocab_tree"], help="Select which matcher colmap should use. Sequential for videos, exhaustive for ad-hoc images.") parser.add_argument("--colmap_db", default="colmap.db", help="colmap database filename") - parser.add_argument("--colmap_camera_model", default="OPENCV", choices=["SIMPLE_PINHOLE", "PINHOLE", "SIMPLE_RADIAL", "RADIAL","OPENCV"], help="camera model") - parser.add_argument("--colmap_camera_params", default="", help="intrinsic parameters, depending on the chosen model. Format: fx,fy,cx,cy,dist") - parser.add_argument("--images", default="images", help="input path to the images") - parser.add_argument("--text", default="colmap_text", help="input path to the colmap text files (set automatically if run_colmap is used)") - parser.add_argument("--aabb_scale", default=16, choices=["1", "2", "4", "8", "16", "32", "64", "128"], help="large scene scale factor. 1=scene fits in unit cube; power of 2 up to 16") - parser.add_argument("--skip_early", default=0, help="skip this many images from the start") - parser.add_argument("--keep_colmap_coords", action="store_true", help="keep transforms.json in COLMAP's original frame of reference (this will avoid reorienting and repositioning the scene for preview and rendering)") - parser.add_argument("--out", default="transforms.json", help="output path") - parser.add_argument("--vocab_path", default="", help="vocabulary tree path") + parser.add_argument("--colmap_camera_model", default="OPENCV", choices=["SIMPLE_PINHOLE", "PINHOLE", "SIMPLE_RADIAL", "RADIAL", "OPENCV", "SIMPLE_RADIAL_FISHEYE", "RADIAL_FISHEYE", "OPENCV_FISHEYE"], help="Camera model") + parser.add_argument("--colmap_camera_params", default="", help="Intrinsic parameters, depending on the chosen model. Format: fx,fy,cx,cy,dist") + parser.add_argument("--images", default="images", help="Input path to the images.") + parser.add_argument("--text", default="colmap_text", help="Input path to the colmap text files (set automatically if --run_colmap is used).") + parser.add_argument("--aabb_scale", default=16, choices=["1", "2", "4", "8", "16", "32", "64", "128"], help="Large scene scale factor. 1=scene fits in unit cube; power of 2 up to 128") + parser.add_argument("--skip_early", default=0, help="Skip this many images from the start.") + parser.add_argument("--keep_colmap_coords", action="store_true", help="Keep transforms.json in COLMAP's original frame of reference (this will avoid reorienting and repositioning the scene for preview and rendering).") + parser.add_argument("--out", default="transforms.json", help="Output path.") + parser.add_argument("--vocab_path", default="", help="Vocabulary tree path.") args = parser.parse_args() return args @@ -182,10 +182,13 @@ if __name__ == "__main__": fl_y = float(els[4]) k1 = 0 k2 = 0 + k3 = 0 + k4 = 0 p1 = 0 p2 = 0 cx = w / 2 cy = h / 2 + is_fisheye = False if els[1] == "SIMPLE_PINHOLE": cx = float(els[5]) cy = float(els[6]) @@ -210,8 +213,28 @@ if __name__ == "__main__": k2 = float(els[9]) p1 = float(els[10]) p2 = float(els[11]) + elif els[1] == "SIMPLE_RADIAL_FISHEYE": + is_fisheye = True + cx = float(els[5]) + cy = float(els[6]) + k1 = float(els[7]) + elif els[1] == "RADIAL_FISHEYE": + is_fisheye = True + cx = float(els[5]) + cy = float(els[6]) + k1 = float(els[7]) + k2 = float(els[8]) + elif els[1] == "OPENCV_FISHEYE": + is_fisheye = True + fl_y = float(els[5]) + cx = float(els[6]) + cy = float(els[7]) + k1 = float(els[8]) + k2 = float(els[9]) + k3 = float(els[10]) + k4 = float(els[11]) else: - print("unknown camera model ", els[1]) + print("Unknown camera model ", els[1]) # fl = 0.5 * w / tan(0.5 * angle_x); angle_x = math.atan(w / (fl_x * 2)) * 2 angle_y = math.atan(h / (fl_y * 2)) * 2 @@ -230,8 +253,11 @@ if __name__ == "__main__": "fl_y": fl_y, "k1": k1, "k2": k2, + "k3": k3, + "k4": k4, "p1": p1, "p2": p2, + "is_fisheye": is_fisheye, "cx": cx, "cy": cy, "w": w, @@ -254,7 +280,7 @@ if __name__ == "__main__": # why is this requireing a relitive path while using ^ image_rel = os.path.relpath(IMAGE_FOLDER) name = str(f"./{image_rel}/{'_'.join(elems[9:])}") - b=sharpness(name) + b = sharpness(name) print(name, "sharpness=",b) image_id = int(elems[0]) qvec = np.array(tuple(map(float, elems[1:5]))) @@ -266,12 +292,12 @@ if __name__ == "__main__": if not args.keep_colmap_coords: c2w[0:3,2] *= -1 # flip the y and z axis c2w[0:3,1] *= -1 - c2w = c2w[[1,0,2,3],:] # swap y and z + c2w = c2w[[1,0,2,3],:] c2w[2,:] *= -1 # flip whole world upside down up += c2w[0:3,1] - frame={"file_path":name,"sharpness":b,"transform_matrix": c2w} + frame = {"file_path":name,"sharpness":b,"transform_matrix": c2w} out["frames"].append(frame) nframes = len(out["frames"]) diff --git a/src/nerf_loader.cu b/src/nerf_loader.cu index dfb08b07dff20d4b1fef7b52c15925b8d718ceba..e0e4c36a2503bd3c69e2ba9a59f06831c9211eec 100644 --- a/src/nerf_loader.cu +++ b/src/nerf_loader.cu @@ -193,33 +193,23 @@ NerfDataset create_empty_nerf_dataset(size_t n_images, int aabb_scale, bool is_h void read_lens(const nlohmann::json& json, Lens& lens, Vector2f& principal_point, Vector4f& rolling_shutter) { ELensMode mode = ELensMode::Perspective; - if (json.contains("k1")) { - lens.params[0] = json["k1"]; - if (lens.params[0] != 0.f) { - mode = ELensMode::OpenCV; - } - } - - if (json.contains("k2")) { - lens.params[1] = json["k2"]; - if (lens.params[1] != 0.f) { - mode = ELensMode::OpenCV; + ELensMode opencv_mode = json.value("is_fisheye", false) ? ELensMode::OpenCVFisheye : ELensMode::OpenCV; + auto read_opencv_parameter = [&](const std::string& name, size_t idx) { + if (json.contains(name)) { + lens.params[idx] = json[name]; + if (lens.params[idx] != 0.f) { + mode = opencv_mode; + } } - } + }; - if (json.contains("p1")) { - lens.params[2] = json["p1"]; - if (lens.params[2] != 0.f) { - mode = ELensMode::OpenCV; - } - } + read_opencv_parameter("k1", 0); + read_opencv_parameter("k2", 1); + read_opencv_parameter("k3", 2); + read_opencv_parameter("k4", 3); - if (json.contains("p2")) { - lens.params[3] = json["p2"]; - if (lens.params[3] != 0.f) { - mode = ELensMode::OpenCV; - } - } + read_opencv_parameter("p1", 2); + read_opencv_parameter("p2", 3); if (json.contains("cx")) { principal_point.x() = (float)json["cx"] / (float)json["w"]; diff --git a/src/python_api.cu b/src/python_api.cu index ca4543c80d0fbb136f278814ab55e5e345ad7536..bd95aa2ccf0c0f2a429ca18d5a037e025f3ef328 100644 --- a/src/python_api.cu +++ b/src/python_api.cu @@ -314,6 +314,7 @@ PYBIND11_MODULE(pyngp, m) { .value("OpenCV", ELensMode::OpenCV) .value("FTheta", ELensMode::FTheta) .value("LatLong", ELensMode::LatLong) + .value("OpenCVFisheye", ELensMode::OpenCVFisheye) .export_values(); py::class_<BoundingBox>(m, "BoundingBox") @@ -621,6 +622,8 @@ PYBIND11_MODULE(pyngp, m) { py::arg("cx")=-0.5f, py::arg("cy")=-0.5f, py::arg("k1")=0.f, py::arg("k2")=0.f, py::arg("p1")=0.f, py::arg("p2")=0.f, + py::arg("k3")=0.f, py::arg("k4")=0.f, + py::arg("is_fisheye")=false, "Set up the camera intrinsics for the given training image index." ) .def("set_camera_extrinsics", &Testbed::Nerf::Training::set_camera_extrinsics, diff --git a/src/testbed.cu b/src/testbed.cu index e70f3c933272d7c7c66bf12b56ee5bf777e4b9cb..991e52307e4ff7405ae12958b4e6a3905d86f926 100644 --- a/src/testbed.cu +++ b/src/testbed.cu @@ -864,12 +864,17 @@ void Testbed::imgui() { accum_reset |= ImGui::Checkbox("Apply lens distortion", &m_nerf.render_with_lens_distortion); if (m_nerf.render_with_lens_distortion) { - accum_reset |= ImGui::Combo("Lens mode", (int*)&m_nerf.render_lens.mode, "Perspective\0OpenCV\0F-Theta\0LatLong\0"); + accum_reset |= ImGui::Combo("Lens mode", (int*)&m_nerf.render_lens.mode, LensModeStr); if (m_nerf.render_lens.mode == ELensMode::OpenCV) { accum_reset |= ImGui::InputFloat("k1", &m_nerf.render_lens.params[0], 0.f, 0.f, "%.5f"); accum_reset |= ImGui::InputFloat("k2", &m_nerf.render_lens.params[1], 0.f, 0.f, "%.5f"); accum_reset |= ImGui::InputFloat("p1", &m_nerf.render_lens.params[2], 0.f, 0.f, "%.5f"); accum_reset |= ImGui::InputFloat("p2", &m_nerf.render_lens.params[3], 0.f, 0.f, "%.5f"); + } else if (m_nerf.render_lens.mode == ELensMode::OpenCVFisheye) { + accum_reset |= ImGui::InputFloat("k1", &m_nerf.render_lens.params[0], 0.f, 0.f, "%.5f"); + accum_reset |= ImGui::InputFloat("k2", &m_nerf.render_lens.params[1], 0.f, 0.f, "%.5f"); + accum_reset |= ImGui::InputFloat("k3", &m_nerf.render_lens.params[2], 0.f, 0.f, "%.5f"); + accum_reset |= ImGui::InputFloat("k4", &m_nerf.render_lens.params[3], 0.f, 0.f, "%.5f"); } else if (m_nerf.render_lens.mode == ELensMode::FTheta) { accum_reset |= ImGui::InputFloat("width", &m_nerf.render_lens.params[5], 0.f, 0.f, "%.0f"); accum_reset |= ImGui::InputFloat("height", &m_nerf.render_lens.params[6], 0.f, 0.f, "%.0f"); diff --git a/src/testbed_nerf.cu b/src/testbed_nerf.cu index 90ef448c207ecd41d691f6129f963e35c33ec078..0e0ad0cb46c3ed6659a22ae433506b7ee3ea4d03 100644 --- a/src/testbed_nerf.cu +++ b/src/testbed_nerf.cu @@ -378,9 +378,9 @@ __global__ void mark_untrained_density_grid(const uint32_t n_elements, float* _ Vector3f pos = ((Vector3f{(float)x+0.5f, (float)y+0.5f, (float)z+0.5f}) / NERF_GRIDSIZE() - Vector3f::Constant(0.5f)) * scalbnf(1.0f, level) + Vector3f::Constant(0.5f); float voxel_radius = 0.5f*SQRT3()*scalbnf(1.0f, level) / NERF_GRIDSIZE(); - int count=0; + int count = 0; for (uint32_t j=0; j < n_training_images; ++j) { - if (metadata[j].lens.mode == ELensMode::FTheta || metadata[j].lens.mode == ELensMode::LatLong) { + if (metadata[j].lens.mode == ELensMode::FTheta || metadata[j].lens.mode == ELensMode::LatLong || metadata[j].lens.mode == ELensMode::OpenCVFisheye) { // not supported for now count++; break; @@ -1164,6 +1164,8 @@ __global__ void generate_training_samples_nerf( if (lens.mode == ELensMode::OpenCV) { iterative_opencv_lens_undistortion(lens.params, &ray_unnormalized.d.x(), &ray_unnormalized.d.y()); + } else if (lens.mode == ELensMode::OpenCVFisheye) { + iterative_opencv_fisheye_lens_undistortion(lens.params, &ray_unnormalized.d.x(), &ray_unnormalized.d.y()); } } @@ -2268,7 +2270,7 @@ void Testbed::render_nerf(CudaRenderBuffer& render_buffer, const Vector2i& max_r NerfTracer tracer; // Our motion vector code can't undo f-theta and grid distortions -- so don't render these if DLSS is enabled. - bool render_opencv_lens = m_nerf.render_with_lens_distortion && (!render_buffer.dlss() || m_nerf.render_lens.mode == ELensMode::OpenCV); + bool render_opencv_lens = m_nerf.render_with_lens_distortion && (!render_buffer.dlss() || m_nerf.render_lens.mode == ELensMode::OpenCV || m_nerf.render_lens.mode == ELensMode::OpenCVFisheye); bool render_grid_distortion = m_nerf.render_with_lens_distortion && !render_buffer.dlss(); Lens lens = render_opencv_lens ? m_nerf.render_lens : Lens{}; @@ -2393,7 +2395,7 @@ void Testbed::render_nerf(CudaRenderBuffer& render_buffer, const Vector2i& max_r } } -void Testbed::Nerf::Training::set_camera_intrinsics(int frame_idx, float fx, float fy, float cx, float cy, float k1, float k2, float p1, float p2) { +void Testbed::Nerf::Training::set_camera_intrinsics(int frame_idx, float fx, float fy, float cx, float cy, float k1, float k2, float p1, float p2, float k3, float k4, bool is_fisheye) { if (frame_idx < 0 || frame_idx >= dataset.n_images) { return; } @@ -2402,8 +2404,15 @@ void Testbed::Nerf::Training::set_camera_intrinsics(int frame_idx, float fx, flo auto& m = dataset.metadata[frame_idx]; if (cx < 0.f) cx = -cx; else cx = cx / m.resolution.x(); if (cy < 0.f) cy = -cy; else cy = cy / m.resolution.y(); - ELensMode mode = (k1 || k2 || p1 || p2) ? ELensMode::OpenCV : ELensMode::Perspective; - m.lens = { mode, k1, k2, p1, p2 }; + m.lens = { ELensMode::Perspective }; + if (k1 || k2 || k3 || k4 || p1 || p2) { + if (is_fisheye) { + m.lens = { ELensMode::OpenCVFisheye, k1, k2, k3, k4 }; + } else { + m.lens = { ELensMode::OpenCV, k1, k2, p1, p2 }; + } + } + m.principal_point = { cx, cy }; m.focal_length = { fx, fy }; dataset.update_metadata(frame_idx, frame_idx + 1); @@ -2452,20 +2461,24 @@ void Testbed::Nerf::Training::export_camera_extrinsics(const std::string& filena tlog::info() << "Saving a total of " << n_images_for_training << " poses to " << filename; nlohmann::json trajectory; for(int i = 0; i < n_images_for_training; ++i) { - nlohmann::json frame {{"id", i}}; + nlohmann::json frame{{"id", i}}; const Eigen::Matrix<float, 3, 4> p_nerf = get_camera_extrinsics(i); if (export_extrinsics_in_quat_format) { // Assume 30 fps frame["time"] = i*0.033f; // Convert the pose from NeRF to Quaternion format. - const Eigen::Matrix<float, 3, 3> conv_coords_l {{ 0.f, 1.f, 0.f}, - { 0.f, 0.f, -1.f}, - {-1.f, 0.f, 0.f}}; - const Eigen::Matrix<float, 4, 4> conv_coords_r {{ 1.f, 0.f, 0.f, 0.f}, - { 0.f, -1.f, 0.f, 0.f}, - { 0.f, 0.f, -1.f, 0.f}, - { 0.f, 0.f, 0.f, 1.f}}; + const Eigen::Matrix<float, 3, 3> conv_coords_l{ + { 0.f, 1.f, 0.f}, + { 0.f, 0.f, -1.f}, + {-1.f, 0.f, 0.f} + }; + const Eigen::Matrix<float, 4, 4> conv_coords_r{ + { 1.f, 0.f, 0.f, 0.f}, + { 0.f, -1.f, 0.f, 0.f}, + { 0.f, 0.f, -1.f, 0.f}, + { 0.f, 0.f, 0.f, 1.f} + }; const Eigen::Matrix<float, 3, 4> p_quat = conv_coords_l * p_nerf * conv_coords_r; const Eigen::Quaternionf rot_q {p_quat.block<3, 3>(0, 0)}; @@ -2478,7 +2491,7 @@ void Testbed::Nerf::Training::export_camera_extrinsics(const std::string& filena trajectory.emplace_back(frame); } std::ofstream file(filename); - file << std::setw(2) << trajectory << std::endl; + file << std::setw(2) << trajectory << std::endl; } Eigen::Matrix<float, 3, 4> Testbed::Nerf::Training::get_camera_extrinsics(int frame_idx) { @@ -2490,7 +2503,7 @@ Eigen::Matrix<float, 3, 4> Testbed::Nerf::Training::get_camera_extrinsics(int fr void Testbed::Nerf::Training::update_transforms(int first, int last) { if (last < 0) { - last=dataset.n_images; + last = dataset.n_images; } if (last > dataset.n_images) {