From 66350cc34d4db60e6e40a08ffba0e1942b90dd7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20M=C3=BCller?= <thomas94@gmx.net> Date: Mon, 3 Oct 2022 12:32:51 +0200 Subject: [PATCH] Rename `CameraDistortion` to `Lens` Makes f-theta and latlong modes more meaningful and paves the way for a future Orthographic mode --- include/neural-graphics-primitives/common.h | 10 +-- .../common_device.cuh | 50 ++++++------- .../neural-graphics-primitives/json_binding.h | 69 +++++++++--------- .../neural-graphics-primitives/nerf_loader.h | 2 +- include/neural-graphics-primitives/testbed.h | 6 +- scripts/run.py | 2 +- src/nerf_loader.cu | 70 ++++++++----------- src/python_api.cu | 29 ++++---- src/testbed.cu | 49 +++++++------ src/testbed_nerf.cu | 42 +++++------ 10 files changed, 164 insertions(+), 165 deletions(-) diff --git a/include/neural-graphics-primitives/common.h b/include/neural-graphics-primitives/common.h index dd065d8..52e3145 100644 --- a/include/neural-graphics-primitives/common.h +++ b/include/neural-graphics-primitives/common.h @@ -173,15 +173,15 @@ struct TrainingXForm { Eigen::Matrix<float, 3, 4> end; }; -enum class ECameraDistortionMode : int { - None, - Iterative, +enum class ELensMode : int { + Perspective, + OpenCV, FTheta, LatLong, }; -struct CameraDistortion { - ECameraDistortionMode mode = ECameraDistortionMode::None; +struct Lens { + ELensMode mode = ELensMode::Perspective; float params[7] = {}; }; diff --git a/include/neural-graphics-primitives/common_device.cuh b/include/neural-graphics-primitives/common_device.cuh index 4e0a367..5029cb0 100644 --- a/include/neural-graphics-primitives/common_device.cuh +++ b/include/neural-graphics-primitives/common_device.cuh @@ -143,7 +143,7 @@ __device__ void deposit_image_gradient(const Eigen::Matrix<float, N_DIMS, 1>& va } template <typename T> -__device__ __host__ inline void apply_camera_distortion(const T* extra_params, const T u, const T v, T* du, T* dv) { +__device__ __host__ inline void apply_opencv_lens_distortion(const T* extra_params, const T u, const T v, T* du, T* dv) { const T k1 = extra_params[0]; const T k2 = extra_params[1]; const T p1 = extra_params[2]; @@ -159,7 +159,7 @@ __device__ __host__ inline void apply_camera_distortion(const T* extra_params, c } template <typename T> -__device__ __host__ inline void iterative_camera_undistortion(const T* params, T* u, T* v) { +__device__ __host__ inline void iterative_opencv_lens_undistortion(const T* params, T* u, T* v) { // Parameters for Newton iteration using numerical differentiation with // central differences, 100 iterations should be enough even for complex // camera models with higher order terms. @@ -179,11 +179,11 @@ __device__ __host__ inline void iterative_camera_undistortion(const T* params, T for (uint32_t i = 0; i < kNumIterations; ++i) { const float step0 = std::max(std::numeric_limits<float>::epsilon(), std::abs(kRelStepSize * x(0))); const float step1 = std::max(std::numeric_limits<float>::epsilon(), std::abs(kRelStepSize * x(1))); - apply_camera_distortion(params, x(0), x(1), &dx(0), &dx(1)); - apply_camera_distortion(params, x(0) - step0, x(1), &dx_0b(0), &dx_0b(1)); - apply_camera_distortion(params, x(0) + step0, x(1), &dx_0f(0), &dx_0f(1)); - apply_camera_distortion(params, x(0), x(1) - step1, &dx_1b(0), &dx_1b(1)); - apply_camera_distortion(params, x(0), x(1) + step1, &dx_1f(0), &dx_1f(1)); + apply_opencv_lens_distortion(params, x(0), x(1), &dx(0), &dx(1)); + apply_opencv_lens_distortion(params, x(0) - step0, x(1), &dx_0b(0), &dx_0b(1)); + apply_opencv_lens_distortion(params, x(0) + step0, x(1), &dx_0f(0), &dx_0f(1)); + apply_opencv_lens_distortion(params, x(0), x(1) - step1, &dx_1b(0), &dx_1b(1)); + apply_opencv_lens_distortion(params, x(0), x(1) + step1, &dx_1f(0), &dx_1f(1)); J(0, 0) = 1 + (dx_0f(0) - dx_0b(0)) / (2 * step0); J(0, 1) = (dx_1f(0) - dx_1b(0)) / (2 * step1); J(1, 0) = (dx_0f(1) - dx_0b(1)) / (2 * step0); @@ -268,20 +268,20 @@ inline NGP_HOST_DEVICE Ray pixel_to_ray( bool snap_to_pixel_centers = false, float focus_z = 1.0f, float aperture_size = 0.0f, - const CameraDistortion& camera_distortion = {}, - const float* __restrict__ distortion_data = nullptr, - const Eigen::Vector2i distortion_resolution = Eigen::Vector2i::Zero() + const Lens& lens = {}, + const float* __restrict__ distortion_grid = nullptr, + const Eigen::Vector2i distortion_grid_resolution = Eigen::Vector2i::Zero() ) { Eigen::Vector2f offset = ld_random_pixel_offset(snap_to_pixel_centers ? 0 : spp); Eigen::Vector2f uv = (pixel.cast<float>() + offset).cwiseQuotient(resolution.cast<float>()); Eigen::Vector3f dir; - if (camera_distortion.mode == ECameraDistortionMode::FTheta) { - dir = f_theta_undistortion(uv - screen_center, camera_distortion.params, {1000.f, 0.f, 0.f}); + if (lens.mode == ELensMode::FTheta) { + dir = f_theta_undistortion(uv - screen_center, lens.params, {1000.f, 0.f, 0.f}); if (dir.x() == 1000.f) { return {{1000.f, 0.f, 0.f}, {0.f, 0.f, 1.f}}; // return a point outside the aabb so the pixel is not rendered } - } else if (camera_distortion.mode == ECameraDistortionMode::LatLong) { + } else if (lens.mode == ELensMode::LatLong) { dir = latlong_to_dir(uv); } else { dir = { @@ -289,12 +289,12 @@ inline NGP_HOST_DEVICE Ray pixel_to_ray( (uv.y() - screen_center.y()) * (float)resolution.y() / focal_length.y(), 1.0f }; - if (camera_distortion.mode == ECameraDistortionMode::Iterative) { - iterative_camera_undistortion(camera_distortion.params, &dir.x(), &dir.y()); + if (lens.mode == ELensMode::OpenCV) { + iterative_opencv_lens_undistortion(lens.params, &dir.x(), &dir.y()); } } - if (distortion_data) { - dir.head<2>() += read_image<2>(distortion_data, distortion_resolution, uv); + if (distortion_grid) { + dir.head<2>() += read_image<2>(distortion_grid, distortion_grid_resolution, uv); } Eigen::Vector3f head_pos = {parallax_shift.x(), parallax_shift.y(), 0.f}; @@ -321,7 +321,7 @@ inline NGP_HOST_DEVICE Eigen::Vector2f pos_to_pixel( const Eigen::Matrix<float, 3, 4>& camera_matrix, const Eigen::Vector2f& screen_center, const Eigen::Vector3f& parallax_shift, - const CameraDistortion& camera_distortion = {} + const Lens& lens = {} ) { // Express ray in terms of camera frame Eigen::Vector3f head_pos = {parallax_shift.x(), parallax_shift.y(), 0.f}; @@ -332,14 +332,14 @@ inline NGP_HOST_DEVICE Eigen::Vector2f pos_to_pixel( dir /= dir.z(); dir += head_pos * parallax_shift.z(); - if (camera_distortion.mode == ECameraDistortionMode::Iterative) { + if (lens.mode == ELensMode::OpenCV) { float du, dv; - apply_camera_distortion(camera_distortion.params, dir.x(), dir.y(), &du, &dv); + apply_opencv_lens_distortion(lens.params, dir.x(), dir.y(), &du, &dv); dir.x() += du; dir.y() += dv; - } else if (camera_distortion.mode == ECameraDistortionMode::FTheta) { + } else if (lens.mode == ELensMode::FTheta) { assert(false); - } else if (camera_distortion.mode == ECameraDistortionMode::LatLong) { + } else if (lens.mode == ELensMode::LatLong) { assert(false); } @@ -360,7 +360,7 @@ inline NGP_HOST_DEVICE Eigen::Vector2f motion_vector_3d( const Eigen::Vector3f& parallax_shift, const bool snap_to_pixel_centers, const float depth, - const CameraDistortion& camera_distortion = {} + const Lens& lens = {} ) { Ray ray = pixel_to_ray( sample_index, @@ -373,7 +373,7 @@ inline NGP_HOST_DEVICE Eigen::Vector2f motion_vector_3d( snap_to_pixel_centers, 1.0f, 0.0f, - camera_distortion, + lens, nullptr, Eigen::Vector2i::Zero() ); @@ -385,7 +385,7 @@ inline NGP_HOST_DEVICE Eigen::Vector2f motion_vector_3d( prev_camera, screen_center, parallax_shift, - camera_distortion + lens ); return prev_pixel - (pixel.cast<float>() + ld_random_pixel_offset(sample_index)); diff --git a/include/neural-graphics-primitives/json_binding.h b/include/neural-graphics-primitives/json_binding.h index 04d2f25..bd51290 100644 --- a/include/neural-graphics-primitives/json_binding.h +++ b/include/neural-graphics-primitives/json_binding.h @@ -80,41 +80,41 @@ inline void from_json(const nlohmann::json& j, BoundingBox& box) { from_json(j.at("max"), box.max); } -inline void to_json(nlohmann::json& j, const CameraDistortion& dist) { - if (dist.mode == ECameraDistortionMode::Iterative) { - j["k1"] = dist.params[0]; - j["k2"] = dist.params[1]; - j["p1"] = dist.params[2]; - j["p2"] = dist.params[3]; - } else if (dist.mode == ECameraDistortionMode::FTheta) { - j["ftheta_p0"] = dist.params[0]; - j["ftheta_p1"] = dist.params[1]; - j["ftheta_p2"] = dist.params[2]; - j["ftheta_p3"] = dist.params[3]; - j["ftheta_p4"] = dist.params[4]; - j["w"] = dist.params[5]; - j["h"] = dist.params[6]; +inline void to_json(nlohmann::json& j, const Lens& lens) { + if (lens.mode == ELensMode::OpenCV) { + j["k1"] = lens.params[0]; + j["k2"] = lens.params[1]; + j["p1"] = lens.params[2]; + j["p2"] = lens.params[3]; + } else if (lens.mode == ELensMode::FTheta) { + j["ftheta_p0"] = lens.params[0]; + j["ftheta_p1"] = lens.params[1]; + j["ftheta_p2"] = lens.params[2]; + j["ftheta_p3"] = lens.params[3]; + j["ftheta_p4"] = lens.params[4]; + j["w"] = lens.params[5]; + j["h"] = lens.params[6]; } } -inline void from_json(const nlohmann::json& j, CameraDistortion& dist) { +inline void from_json(const nlohmann::json& j, Lens& lens) { if (j.contains("k1")) { - dist.mode = ECameraDistortionMode::Iterative; - dist.params[0] = j.at("k1"); - dist.params[1] = j.at("k2"); - dist.params[2] = j.at("p1"); - dist.params[3] = j.at("p2"); + lens.mode = ELensMode::OpenCV; + lens.params[0] = j.at("k1"); + lens.params[1] = j.at("k2"); + lens.params[2] = j.at("p1"); + lens.params[3] = j.at("p2"); } else if (j.contains("ftheta_p0")) { - dist.mode = ECameraDistortionMode::FTheta; - dist.params[0] = j.at("ftheta_p0"); - dist.params[1] = j.at("ftheta_p1"); - dist.params[2] = j.at("ftheta_p2"); - dist.params[3] = j.at("ftheta_p3"); - dist.params[4] = j.at("ftheta_p4"); - dist.params[5] = j.at("w"); - dist.params[6] = j.at("h"); + lens.mode = ELensMode::FTheta; + lens.params[0] = j.at("ftheta_p0"); + lens.params[1] = j.at("ftheta_p1"); + lens.params[2] = j.at("ftheta_p2"); + lens.params[3] = j.at("ftheta_p3"); + lens.params[4] = j.at("ftheta_p4"); + lens.params[5] = j.at("w"); + lens.params[6] = j.at("h"); } else { - dist.mode = ECameraDistortionMode::None; + lens.mode = ELensMode::Perspective; } } @@ -135,7 +135,7 @@ inline void to_json(nlohmann::json& j, const NerfDataset& dataset) { j["metadata"].emplace_back(); j["xforms"].emplace_back(); to_json(j["metadata"].at(i)["focal_length"], dataset.metadata[i].focal_length); - to_json(j["metadata"].at(i)["camera_distortion"], dataset.metadata[i].camera_distortion); + to_json(j["metadata"].at(i)["lens"], dataset.metadata[i].lens); to_json(j["metadata"].at(i)["principal_point"], dataset.metadata[i].principal_point); to_json(j["metadata"].at(i)["rolling_shutter"], dataset.metadata[i].rolling_shutter); to_json(j["metadata"].at(i)["resolution"], dataset.metadata[i].resolution); @@ -162,7 +162,9 @@ inline void from_json(const nlohmann::json& j, NerfDataset& dataset) { for (size_t i = 0; i < dataset.n_images; ++i) { // read global defaults first - if (j.contains("camera_distortion")) from_json(j.at("camera_distortion"), dataset.metadata[i].camera_distortion); + if (j.contains("lens")) from_json(j.at("lens"), dataset.metadata[i].lens); + // Legacy: "lens" used to be called "camera_distortion" + if (j.contains("camera_distortion")) from_json(j.at("camera_distortion"), dataset.metadata[i].lens); if (j.contains("principal_point")) from_json(j.at("principal_point"), dataset.metadata[i].principal_point); if (j.contains("rolling_shutter")) from_json(j.at("rolling_shutter"), dataset.metadata[i].rolling_shutter); if (j.contains("focal_length")) from_json(j.at("focal_length"), dataset.metadata[i].focal_length); @@ -175,8 +177,9 @@ inline void from_json(const nlohmann::json& j, NerfDataset& dataset) { from_json(ji.at("resolution"), dataset.metadata[i].resolution); from_json(ji.at("focal_length"), dataset.metadata[i].focal_length); from_json(ji.at("principal_point"), dataset.metadata[i].principal_point); - from_json(ji.at("rolling_shutter"), dataset.metadata[i].rolling_shutter); - from_json(ji.at("camera_distortion"), dataset.metadata[i].camera_distortion); + if (ji.contains("lens")) from_json(ji.at("lens"), dataset.metadata[i].lens); + // Legacy: "lens" used to be called "camera_distortion" + if (ji.contains("camera_distortion")) from_json(ji.at("camera_distortion"), dataset.metadata[i].lens); } } diff --git a/include/neural-graphics-primitives/nerf_loader.h b/include/neural-graphics-primitives/nerf_loader.h index 644f699..aa9d50f 100644 --- a/include/neural-graphics-primitives/nerf_loader.h +++ b/include/neural-graphics-primitives/nerf_loader.h @@ -36,7 +36,7 @@ struct TrainingImageMetadata { const float* depth = nullptr; const Ray* rays = nullptr; - CameraDistortion camera_distortion = {}; + Lens lens = {}; Eigen::Vector2i resolution = Eigen::Vector2i::Zero(); Eigen::Vector2f principal_point = Eigen::Vector2f::Constant(0.5f); Eigen::Vector2f focal_length = Eigen::Vector2f::Constant(1000.f); diff --git a/include/neural-graphics-primitives/testbed.h b/include/neural-graphics-primitives/testbed.h index bc26f61..6bac708 100644 --- a/include/neural-graphics-primitives/testbed.h +++ b/include/neural-graphics-primitives/testbed.h @@ -147,7 +147,7 @@ public: const Eigen::Matrix3f& render_aabb_to_local, float plane_z, float aperture_size, - const CameraDistortion& camera_distortion, + const Lens& lens, const float* envmap_data, const Eigen::Vector2i& envmap_resolution, const float* distortion_data, @@ -654,8 +654,8 @@ public: float cone_angle_constant = 1.f/256.f; bool visualize_cameras = false; - bool render_with_camera_distortion = false; - CameraDistortion render_distortion = {}; + bool render_with_lens_distortion = false; + Lens render_lens = {}; float rendering_min_transmittance = 0.01f; diff --git a/scripts/run.py b/scripts/run.py index b7d68e4..4139dad 100644 --- a/scripts/run.py +++ b/scripts/run.py @@ -142,7 +142,7 @@ if __name__ == "__main__": testbed.shall_train = args.train if args.gui else True - testbed.nerf.render_with_camera_distortion = True + testbed.nerf.render_with_lens_distortion = True network_stem = os.path.splitext(os.path.basename(network))[0] if args.mode == "sdf": diff --git a/src/nerf_loader.cu b/src/nerf_loader.cu index e4c9a0b..60fe8ee 100644 --- a/src/nerf_loader.cu +++ b/src/nerf_loader.cu @@ -194,34 +194,34 @@ NerfDataset create_empty_nerf_dataset(size_t n_images, int aabb_scale, bool is_h return result; } -void read_camera_distortion(const nlohmann::json& json, CameraDistortion& camera_distortion, Vector2f& principal_point, Vector4f& rolling_shutter) { - ECameraDistortionMode mode = ECameraDistortionMode::None; +void read_lens(const nlohmann::json& json, Lens& lens, Vector2f& principal_point, Vector4f& rolling_shutter) { + ELensMode mode = ELensMode::Perspective; if (json.contains("k1")) { - camera_distortion.params[0] = json["k1"]; - if (camera_distortion.params[0] != 0.f) { - mode = ECameraDistortionMode::Iterative; + lens.params[0] = json["k1"]; + if (lens.params[0] != 0.f) { + mode = ELensMode::OpenCV; } } if (json.contains("k2")) { - camera_distortion.params[1] = json["k2"]; - if (camera_distortion.params[1] != 0.f) { - mode = ECameraDistortionMode::Iterative; + lens.params[1] = json["k2"]; + if (lens.params[1] != 0.f) { + mode = ELensMode::OpenCV; } } if (json.contains("p1")) { - camera_distortion.params[2] = json["p1"]; - if (camera_distortion.params[2] != 0.f) { - mode = ECameraDistortionMode::Iterative; + lens.params[2] = json["p1"]; + if (lens.params[2] != 0.f) { + mode = ELensMode::OpenCV; } } if (json.contains("p2")) { - camera_distortion.params[3] = json["p2"]; - if (camera_distortion.params[3] != 0.f) { - mode = ECameraDistortionMode::Iterative; + lens.params[3] = json["p2"]; + if (lens.params[3] != 0.f) { + mode = ELensMode::OpenCV; } } @@ -248,31 +248,23 @@ void read_camera_distortion(const nlohmann::json& json, CameraDistortion& camera } if (json.contains("ftheta_p0")) { - if (mode != ECameraDistortionMode::None) { - tlog::warning() << "Found camera distortion parameters for multiple camera models. Overriding previous with FTheta."; - } - - camera_distortion.params[0] = json["ftheta_p0"]; - camera_distortion.params[1] = json["ftheta_p1"]; - camera_distortion.params[2] = json["ftheta_p2"]; - camera_distortion.params[3] = json["ftheta_p3"]; - camera_distortion.params[4] = json["ftheta_p4"]; - camera_distortion.params[5] = json["w"]; - camera_distortion.params[6] = json["h"]; - mode = ECameraDistortionMode::FTheta; + lens.params[0] = json["ftheta_p0"]; + lens.params[1] = json["ftheta_p1"]; + lens.params[2] = json["ftheta_p2"]; + lens.params[3] = json["ftheta_p3"]; + lens.params[4] = json["ftheta_p4"]; + lens.params[5] = json["w"]; + lens.params[6] = json["h"]; + mode = ELensMode::FTheta; } if (json.contains("latlong")) { - if (mode != ECameraDistortionMode::None) { - tlog::warning() << "Found camera distortion parameters for multiple camera models. Overriding previous with LatLong."; - } - - mode = ECameraDistortionMode::LatLong; + mode = ELensMode::LatLong; } // If there was an outer distortion mode, don't override it with nothing. - if (mode != ECameraDistortionMode::None) { - camera_distortion.mode = mode; + if (mode != ELensMode::Perspective) { + lens.mode = mode; } } @@ -489,7 +481,7 @@ NerfDataset load_nerf(const std::vector<filesystem::path>& jsonpaths, float shar result.n_extra_learnable_dims = json["n_extra_learnable_dims"]; } - CameraDistortion camera_distortion = {}; + Lens lens = {}; Vector2f principal_point = Vector2f::Constant(0.5f); Vector4f rolling_shutter = Vector4f::Zero(); @@ -497,8 +489,8 @@ NerfDataset load_nerf(const std::vector<filesystem::path>& jsonpaths, float shar info.depth_scale = json["integer_depth_scale"]; } - // Camera distortion - read_camera_distortion(json, camera_distortion, principal_point, rolling_shutter); + // Lens parameters + read_lens(json, lens, principal_point, rolling_shutter); if (json.contains("aabb_scale")) { result.aabb_scale = json["aabb_scale"]; @@ -553,7 +545,7 @@ NerfDataset load_nerf(const std::vector<filesystem::path>& jsonpaths, float shar } } - if (json.contains("frames") && json["frames"].is_array()) pool.parallelForAsync<size_t>(0, json["frames"].size(), [&progress, &n_loaded, &result, &images, &json, basepath, image_idx, info, rolling_shutter, principal_point, camera_distortion, part_after_underscore, fix_premult, enable_depth_loading, enable_ray_loading](size_t i) { + if (json.contains("frames") && json["frames"].is_array()) pool.parallelForAsync<size_t>(0, json["frames"].size(), [&progress, &n_loaded, &result, &images, &json, basepath, image_idx, info, rolling_shutter, principal_point, lens, part_after_underscore, fix_premult, enable_depth_loading, enable_ray_loading](size_t i) { size_t i_img = i + image_idx; auto& frame = json["frames"][i]; LoadedImageInfo& dst = images[i_img]; @@ -700,9 +692,9 @@ NerfDataset load_nerf(const std::vector<filesystem::path>& jsonpaths, float shar // set these from the base settings result.metadata[i_img].rolling_shutter = rolling_shutter; result.metadata[i_img].principal_point = principal_point; - result.metadata[i_img].camera_distortion = camera_distortion; + result.metadata[i_img].lens = lens; // see if there is a per-frame override - read_camera_distortion(frame, result.metadata[i_img].camera_distortion, result.metadata[i_img].principal_point, result.metadata[i_img].rolling_shutter); + read_lens(frame, result.metadata[i_img].lens, result.metadata[i_img].principal_point, result.metadata[i_img].rolling_shutter); result.xforms[i_img].start = result.nerf_matrix_to_ngp(result.xforms[i_img].start); result.xforms[i_img].end = result.nerf_matrix_to_ngp(result.xforms[i_img].end); diff --git a/src/python_api.cu b/src/python_api.cu index 45ccb5e..5009fbd 100644 --- a/src/python_api.cu +++ b/src/python_api.cu @@ -306,11 +306,11 @@ PYBIND11_MODULE(pyngp, m) { .value("Reinhard", ETonemapCurve::Reinhard) .export_values(); - py::enum_<ECameraDistortionMode>(m, "CameraDistortionMode") - .value("None", ECameraDistortionMode::None) - .value("Iterative", ECameraDistortionMode::Iterative) - .value("FTheta", ECameraDistortionMode::FTheta) - .value("LatLong", ECameraDistortionMode::LatLong) + py::enum_<ELensMode>(m, "LensMode") + .value("Perspective", ELensMode::Perspective) + .value("OpenCV", ELensMode::OpenCV) + .value("FTheta", ELensMode::FTheta) + .value("LatLong", ELensMode::LatLong) .export_values(); py::class_<BoundingBox>(m, "BoundingBox") @@ -516,11 +516,11 @@ PYBIND11_MODULE(pyngp, m) { .def("crop_box_corners", &Testbed::crop_box_corners, py::arg("nerf_space") = true) ; - py::class_<CameraDistortion> camera_distortion(m, "CameraDistortion"); - camera_distortion - .def_readwrite("mode", &CameraDistortion::mode) + py::class_<Lens> lens(m, "Lens"); + lens + .def_readwrite("mode", &Lens::mode) .def_property_readonly("params", [](py::object& obj) { - CameraDistortion& o = obj.cast<CameraDistortion&>(); + Lens& o = obj.cast<Lens&>(); return py::array{sizeof(o.params)/sizeof(o.params[0]), o.params, obj}; }) ; @@ -532,8 +532,11 @@ PYBIND11_MODULE(pyngp, m) { .def_readwrite("rgb_activation", &Testbed::Nerf::rgb_activation) .def_readwrite("density_activation", &Testbed::Nerf::density_activation) .def_readwrite("sharpen", &Testbed::Nerf::sharpen) - .def_readwrite("render_with_camera_distortion", &Testbed::Nerf::render_with_camera_distortion) - .def_readwrite("render_distortion", &Testbed::Nerf::render_distortion) + // Legacy member: lens used to be called "camera_distortion" + .def_readwrite("render_with_camera_distortion", &Testbed::Nerf::render_with_lens_distortion) + .def_readwrite("render_with_lens_distortion", &Testbed::Nerf::render_with_lens_distortion) + .def_readwrite("render_distortion", &Testbed::Nerf::render_lens) + .def_readwrite("render_lens", &Testbed::Nerf::render_lens) .def_readwrite("rendering_min_transmittance", &Testbed::Nerf::rendering_min_transmittance) .def_readwrite("cone_angle_constant", &Testbed::Nerf::cone_angle_constant) .def_readwrite("visualize_cameras", &Testbed::Nerf::visualize_cameras) @@ -557,7 +560,9 @@ PYBIND11_MODULE(pyngp, m) { py::class_<TrainingImageMetadata> metadata(m, "TrainingImageMetadata"); metadata .def_readwrite("focal_length", &TrainingImageMetadata::focal_length) - .def_readwrite("camera_distortion", &TrainingImageMetadata::camera_distortion) + // Legacy member: lens used to be called "camera_distortion" + .def_readwrite("camera_distortion", &TrainingImageMetadata::lens) + .def_readwrite("lens", &TrainingImageMetadata::lens) .def_readwrite("principal_point", &TrainingImageMetadata::principal_point) .def_readwrite("rolling_shutter", &TrainingImageMetadata::rolling_shutter) .def_readwrite("light_dir", &TrainingImageMetadata::light_dir) diff --git a/src/testbed.cu b/src/testbed.cu index 736095b..2d9ddd3 100644 --- a/src/testbed.cu +++ b/src/testbed.cu @@ -272,8 +272,8 @@ void Testbed::set_camera_to_training_view(int trainview) { m_camera = m_smoothed_camera = get_xform_given_rolling_shutter(m_nerf.training.transforms[trainview], m_nerf.training.dataset.metadata[trainview].rolling_shutter, Vector2f{0.5f, 0.5f}, 0.0f); m_relative_focal_length = m_nerf.training.dataset.metadata[trainview].focal_length / (float)m_nerf.training.dataset.metadata[trainview].resolution[m_fov_axis]; m_scale = std::max((old_look_at - view_pos()).dot(view_dir()), 0.1f); - m_nerf.render_with_camera_distortion = true; - m_nerf.render_distortion = m_nerf.training.dataset.metadata[trainview].camera_distortion; + m_nerf.render_with_lens_distortion = true; + m_nerf.render_lens = m_nerf.training.dataset.metadata[trainview].lens; m_screen_center = Vector2f::Constant(1.0f) - m_nerf.training.dataset.metadata[0].principal_point; } @@ -701,24 +701,23 @@ void Testbed::imgui() { } if (m_testbed_mode == ETestbedMode::Nerf && ImGui::TreeNode("NeRF rendering options")) { - accum_reset |= ImGui::Checkbox("Apply lens distortion", &m_nerf.render_with_camera_distortion); - - if (m_nerf.render_with_camera_distortion) { - accum_reset |= ImGui::Combo("Distortion mode", (int*)&m_nerf.render_distortion.mode, "None\0Iterative\0F-Theta\0LatLong\0"); - if (m_nerf.render_distortion.mode == ECameraDistortionMode::Iterative) { - accum_reset |= ImGui::InputFloat("k1", &m_nerf.render_distortion.params[0], 0.f, 0.f, "%.5f"); - accum_reset |= ImGui::InputFloat("k2", &m_nerf.render_distortion.params[1], 0.f, 0.f, "%.5f"); - accum_reset |= ImGui::InputFloat("p1", &m_nerf.render_distortion.params[2], 0.f, 0.f, "%.5f"); - accum_reset |= ImGui::InputFloat("p2", &m_nerf.render_distortion.params[3], 0.f, 0.f, "%.5f"); - } - else if (m_nerf.render_distortion.mode == ECameraDistortionMode::FTheta) { - accum_reset |= ImGui::InputFloat("width", &m_nerf.render_distortion.params[5], 0.f, 0.f, "%.0f"); - accum_reset |= ImGui::InputFloat("height", &m_nerf.render_distortion.params[6], 0.f, 0.f, "%.0f"); - accum_reset |= ImGui::InputFloat("f_theta p0", &m_nerf.render_distortion.params[0], 0.f, 0.f, "%.5f"); - accum_reset |= ImGui::InputFloat("f_theta p1", &m_nerf.render_distortion.params[1], 0.f, 0.f, "%.5f"); - accum_reset |= ImGui::InputFloat("f_theta p2", &m_nerf.render_distortion.params[2], 0.f, 0.f, "%.5f"); - accum_reset |= ImGui::InputFloat("f_theta p3", &m_nerf.render_distortion.params[3], 0.f, 0.f, "%.5f"); - accum_reset |= ImGui::InputFloat("f_theta p4", &m_nerf.render_distortion.params[4], 0.f, 0.f, "%.5f"); + accum_reset |= ImGui::Checkbox("Apply lens distortion", &m_nerf.render_with_lens_distortion); + + if (m_nerf.render_with_lens_distortion) { + accum_reset |= ImGui::Combo("Lens mode", (int*)&m_nerf.render_lens.mode, "Perspective\0OpenCV\0F-Theta\0LatLong\0"); + if (m_nerf.render_lens.mode == ELensMode::OpenCV) { + accum_reset |= ImGui::InputFloat("k1", &m_nerf.render_lens.params[0], 0.f, 0.f, "%.5f"); + accum_reset |= ImGui::InputFloat("k2", &m_nerf.render_lens.params[1], 0.f, 0.f, "%.5f"); + accum_reset |= ImGui::InputFloat("p1", &m_nerf.render_lens.params[2], 0.f, 0.f, "%.5f"); + accum_reset |= ImGui::InputFloat("p2", &m_nerf.render_lens.params[3], 0.f, 0.f, "%.5f"); + } else if (m_nerf.render_lens.mode == ELensMode::FTheta) { + accum_reset |= ImGui::InputFloat("width", &m_nerf.render_lens.params[5], 0.f, 0.f, "%.0f"); + accum_reset |= ImGui::InputFloat("height", &m_nerf.render_lens.params[6], 0.f, 0.f, "%.0f"); + accum_reset |= ImGui::InputFloat("f_theta p0", &m_nerf.render_lens.params[0], 0.f, 0.f, "%.5f"); + accum_reset |= ImGui::InputFloat("f_theta p1", &m_nerf.render_lens.params[1], 0.f, 0.f, "%.5f"); + accum_reset |= ImGui::InputFloat("f_theta p2", &m_nerf.render_lens.params[2], 0.f, 0.f, "%.5f"); + accum_reset |= ImGui::InputFloat("f_theta p3", &m_nerf.render_lens.params[3], 0.f, 0.f, "%.5f"); + accum_reset |= ImGui::InputFloat("f_theta p4", &m_nerf.render_lens.params[4], 0.f, 0.f, "%.5f"); } } ImGui::TreePop(); @@ -974,7 +973,7 @@ void Testbed::imgui() { if (imgui_colored_button("Mesh it!", 0.4f)) { marching_cubes(res3d, aabb, m_render_aabb_to_local, m_mesh.thresh); - m_nerf.render_with_camera_distortion = false; + m_nerf.render_with_lens_distortion = false; } if (m_mesh.indices.size()>0) { ImGui::SameLine(); @@ -2599,7 +2598,7 @@ __global__ void dlss_prep_kernel( cudaSurfaceObject_t depth_surface, cudaSurfaceObject_t mvec_surface, cudaSurfaceObject_t exposure_surface, - CameraDistortion camera_distortion, + Lens lens, const float view_dist, const float prev_view_dist, const Vector2f image_pos, @@ -2646,7 +2645,7 @@ __global__ void dlss_prep_kernel( parallax_shift, snap_to_pixel_centers, depth, - camera_distortion + lens ); surf2Dwrite(make_float2(mvec.x(), mvec.y()), mvec_surface, x_orig * sizeof(float2), y_orig); @@ -2784,7 +2783,7 @@ void Testbed::render_frame(const Matrix<float, 3, 4>& camera_matrix0, const Matr if (render_buffer.dlss()) { auto res = render_buffer.in_resolution(); - bool distortion = m_testbed_mode == ETestbedMode::Nerf && m_nerf.render_with_camera_distortion; + bool distortion = m_testbed_mode == ETestbedMode::Nerf && m_nerf.render_with_lens_distortion; const dim3 threads = { 16, 8, 1 }; const dim3 blocks = { div_round_up((uint32_t)res.x(), threads.x), div_round_up((uint32_t)res.y(), threads.y), 1 }; @@ -2803,7 +2802,7 @@ void Testbed::render_frame(const Matrix<float, 3, 4>& camera_matrix0, const Matr render_buffer.dlss()->depth(), render_buffer.dlss()->mvec(), render_buffer.dlss()->exposure(), - distortion ? m_nerf.render_distortion : CameraDistortion{}, + distortion ? m_nerf.render_lens : Lens{}, m_scale, m_prev_scale, m_image.pos, diff --git a/src/testbed_nerf.cu b/src/testbed_nerf.cu index 1d2e043..e717ea4 100644 --- a/src/testbed_nerf.cu +++ b/src/testbed_nerf.cu @@ -382,7 +382,7 @@ __global__ void mark_untrained_density_grid(const uint32_t n_elements, float* _ float voxel_radius = 0.5f*SQRT3()*scalbnf(1.0f, level) / NERF_GRIDSIZE(); int count=0; for (uint32_t j=0; j < n_training_images; ++j) { - if (metadata[j].camera_distortion.mode == ECameraDistortionMode::FTheta || metadata[j].camera_distortion.mode == ECameraDistortionMode::LatLong) { + if (metadata[j].lens.mode == ELensMode::FTheta || metadata[j].lens.mode == ELensMode::LatLong) { // not supported for now count++; break; @@ -1111,7 +1111,7 @@ __global__ void generate_training_samples_nerf( const Vector2f focal_length = metadata[img].focal_length; const Vector2f principal_point = metadata[img].principal_point; const float* extra_dims = extra_dims_gpu + img * n_extra_dims; - const CameraDistortion camera_distortion = metadata[img].camera_distortion; + const Lens lens = metadata[img].lens; const Matrix<float, 3, 4> xform = get_xform_given_rolling_shutter(training_xforms[img], metadata[img].rolling_shutter, xy, motionblur_time); @@ -1125,7 +1125,7 @@ __global__ void generate_training_samples_nerf( const Matrix<float, 3, 4> xform = get_xform_given_rolling_shutter(training_xforms[img], metadata[img].rolling_shutter, xy, 0.f); Ray ray2; ray2.o = xform.col(3); - ray2.d = f_theta_distortion(xy, principal_point, camera_distortion); + ray2.d = f_theta_distortion(xy, principal_point, lens); ray2.d = (xform.block<3, 3>(0, 0) * ray2.d).normalized(); if (i==1000) { printf("\n%d uv %0.3f,%0.3f pixel %0.2f,%0.2f transform from [%0.5f %0.5f %0.5f] to [%0.5f %0.5f %0.5f]\n" @@ -1143,9 +1143,9 @@ __global__ void generate_training_samples_nerf( } else { // Rays need to be inferred from the camera matrix ray_unnormalized.o = xform.col(3); - if (camera_distortion.mode == ECameraDistortionMode::FTheta) { - ray_unnormalized.d = f_theta_undistortion(xy - principal_point, camera_distortion.params, {0.f, 0.f, 1.f}); - } else if (camera_distortion.mode == ECameraDistortionMode::LatLong) { + if (lens.mode == ELensMode::FTheta) { + ray_unnormalized.d = f_theta_undistortion(xy - principal_point, lens.params, {0.f, 0.f, 1.f}); + } else if (lens.mode == ELensMode::LatLong) { ray_unnormalized.d = latlong_to_dir(xy); } else { ray_unnormalized.d = { @@ -1154,8 +1154,8 @@ __global__ void generate_training_samples_nerf( 1.0f, }; - if (camera_distortion.mode == ECameraDistortionMode::Iterative) { - iterative_camera_undistortion(camera_distortion.params, &ray_unnormalized.d.x(), &ray_unnormalized.d.y()); + if (lens.mode == ELensMode::OpenCV) { + iterative_opencv_lens_undistortion(lens.params, &ray_unnormalized.d.x(), &ray_unnormalized.d.y()); } } @@ -1796,7 +1796,7 @@ __global__ void init_rays_with_payload_kernel_nerf( Matrix3f render_aabb_to_local, float plane_z, float aperture_size, - CameraDistortion camera_distortion, + Lens lens, const float* __restrict__ envmap_data, const Vector2i envmap_resolution, Array4f* __restrict__ framebuffer, @@ -1838,7 +1838,7 @@ __global__ void init_rays_with_payload_kernel_nerf( snap_to_pixel_centers, plane_z, aperture_size, - camera_distortion, + lens, distortion_data, distortion_resolution ); @@ -1977,7 +1977,7 @@ void Testbed::NerfTracer::init_rays_from_camera( const Matrix3f& render_aabb_to_local, float plane_z, float aperture_size, - const CameraDistortion& camera_distortion, + const Lens& lens, const float* envmap_data, const Vector2i& envmap_resolution, const float* distortion_data, @@ -2011,7 +2011,7 @@ void Testbed::NerfTracer::init_rays_from_camera( render_aabb_to_local, plane_z, aperture_size, - camera_distortion, + lens, envmap_data, envmap_resolution, frame_buffer, @@ -2249,10 +2249,10 @@ void Testbed::render_nerf(CudaRenderBuffer& render_buffer, const Vector2i& max_r }}; // Our motion vector code can't undo f-theta and grid distortions -- so don't render these if DLSS is enabled. - bool render_opencv_camera_distortion = m_nerf.render_with_camera_distortion && (!render_buffer.dlss() || m_nerf.render_distortion.mode == ECameraDistortionMode::Iterative); - bool render_grid_camera_distortion = m_nerf.render_with_camera_distortion && !render_buffer.dlss(); + bool render_opencv_lens = m_nerf.render_with_lens_distortion && (!render_buffer.dlss() || m_nerf.render_lens.mode == ELensMode::OpenCV); + bool render_grid_distortion = m_nerf.render_with_lens_distortion && !render_buffer.dlss(); - CameraDistortion camera_distortion = render_opencv_camera_distortion ? m_nerf.render_distortion : CameraDistortion{}; + Lens lens = render_opencv_lens ? m_nerf.render_lens : Lens{}; m_nerf.tracer.init_rays_from_camera( @@ -2272,10 +2272,10 @@ void Testbed::render_nerf(CudaRenderBuffer& render_buffer, const Vector2i& max_r m_render_aabb_to_local, plane_z, m_aperture_size, - camera_distortion, + lens, m_envmap.envmap->params_inference(), m_envmap.resolution, - render_grid_camera_distortion ? m_distortion.map->params_inference() : nullptr, + render_grid_distortion ? m_distortion.map->params_inference() : nullptr, m_distortion.resolution, render_buffer.frame_buffer(), render_buffer.depth_buffer(), @@ -2384,8 +2384,8 @@ void Testbed::Nerf::Training::set_camera_intrinsics(int frame_idx, float fx, flo auto& m = dataset.metadata[frame_idx]; if (cx < 0.f) cx = -cx; else cx = cx / m.resolution.x(); if (cy < 0.f) cy = -cy; else cy = cy / m.resolution.y(); - ECameraDistortionMode mode = (k1 || k2 || p1 || p2) ? ECameraDistortionMode::Iterative : ECameraDistortionMode::None; - m.camera_distortion = { mode, k1, k2, p1, p2 }; + ELensMode mode = (k1 || k2 || p1 || p2) ? ELensMode::OpenCV : ELensMode::Perspective; + m.lens = { mode, k1, k2, p1, p2 }; m.principal_point = { cx, cy }; m.focal_length = { fx, fy }; dataset.update_metadata(frame_idx, frame_idx + 1); @@ -2550,7 +2550,7 @@ void Testbed::load_nerf_post() { // moved the second half of load_nerf here // Uncomment the following line to see how the network learns distortion from scratch rather than // starting from the distortion that's described by the training data. - // m_nerf.training.dataset.camera_distortion = {}; + // m_nerf.training.dataset.camera = {}; // Perturbation of the training cameras -- for debugging the online extrinsics learning code float perturb_amount = 0.0f; @@ -2570,7 +2570,7 @@ void Testbed::load_nerf_post() { // moved the second half of load_nerf here m_nerf.training.update_transforms(); if (!m_nerf.training.dataset.metadata.empty()) { - m_nerf.render_distortion = m_nerf.training.dataset.metadata[0].camera_distortion; + m_nerf.render_lens = m_nerf.training.dataset.metadata[0].lens; m_screen_center = Eigen::Vector2f::Constant(1.f) - m_nerf.training.dataset.metadata[0].principal_point; } -- GitLab