diff --git a/.gitmodules b/.gitmodules index 37d13bdac9a6f3ffedcf2fb1299600a66bf0adcf..23cf10097cbd7800f269aec2452b9745705a9815 100644 --- a/.gitmodules +++ b/.gitmodules @@ -28,3 +28,6 @@ [submodule "dependencies/zlib"] path = dependencies/zlib url = https://github.com/Tom94/zlib +[submodule "dependencies/OpenXR-SDK"] + path = dependencies/OpenXR-SDK + url = https://github.com/KhronosGroup/OpenXR-SDK.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 8ea96eb9860e7854f7007440a17a0254f72689d7..92d926af9326d69fe84de5750f97da2c6cfd52f4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -119,6 +119,38 @@ if (NGP_BUILD_WITH_GUI) endif() endif() + # OpenXR + if (WIN32) + list(APPEND NGP_DEFINITIONS -DXR_USE_PLATFORM_WIN32 -DGLFW_EXPOSE_NATIVE_WGL) + elseif (UNIX AND NOT APPLE) + list(APPEND NGP_DEFINITIONS -DGLFW_EXPOSE_NATIVE_GLX) + if (JK_USE_WAYLAND) + set(PRESENTATION_BACKEND wayland CACHE STRING " " FORCE) + set(BUILD_WITH_XLIB_HEADERS OFF CACHE BOOL " " FORCE) + set(BUILD_WITH_XCB_HEADERS OFF CACHE BOOL " " FORCE) + set(BUILD_WITH_WAYLAND_HEADERS ON CACHE BOOL " " FORCE) + list(APPEND NGP_DEFINITIONS -DGLFW_EXPOSE_NATIVE_WAYLAND -DXR_USE_PLATFORM_WAYLAND) + else() + set(PRESENTATION_BACKEND xlib CACHE STRING " " FORCE) + set(BUILD_WITH_XLIB_HEADERS ON CACHE BOOL " " FORCE) + set(BUILD_WITH_XCB_HEADERS OFF CACHE BOOL " " FORCE) + set(BUILD_WITH_WAYLAND_HEADERS OFF CACHE BOOL " " FORCE) + list(APPEND NGP_DEFINITIONS -DGLFW_EXPOSE_NATIVE_X11 -DXR_USE_PLATFORM_XLIB) + endif() + else() + message(FATAL_ERROR "No OpenXR platform set for this OS") + endif() + + add_subdirectory(dependencies/OpenXR-SDK) + + list(APPEND NGP_INCLUDE_DIRECTORIES "dependencies/OpenXR-SDK/include" "dependencies/OpenXR-SDK/src/common") + list(APPEND NGP_LIBRARIES openxr_loader) + list(APPEND GUI_SOURCES src/openxr_hmd.cu) + + # OpenGL + find_package(OpenGL REQUIRED) + + # GLFW set(GLFW_BUILD_EXAMPLES OFF CACHE BOOL " " FORCE) set(GLFW_BUILD_TESTS OFF CACHE BOOL " " FORCE) set(GLFW_BUILD_DOCS OFF CACHE BOOL " " FORCE) diff --git a/LICENSE.txt b/LICENSE.txt index 2cfc50b56f42a59b9bbeb82e56b527ad8deace84..34191874786e0339d672cd74d10175bec14b9f9d 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2022, NVIDIA Corporation & affiliates. All rights reserved. +Copyright (c) 2022-2023, NVIDIA Corporation & affiliates. All rights reserved. NVIDIA Source Code License for instant neural graphics primitives diff --git a/README.md b/README.md index bdf06b55e32fda8788d8c62bb361e5f7504cda7f..46451b8dd833284e8a8bf9bf5817cf646ec98f49 100644 --- a/README.md +++ b/README.md @@ -179,7 +179,10 @@ Here are the main keyboard controls for the __instant-ngp__ application. | Spacebar / C | Move up / down. | | = or + / - or _ | Increase / decrease camera velocity. | | E / Shift+E | Increase / decrease exposure. | +| Tab | Toggle menu visibility. | | T | Toggle training. After around two minutes training tends to settle down, so can be toggled off. | +| { } | Go to the first/last training set image's camera view. | +| [ ] | Go to the previous/next training set image's camera view. | | R | Reload network from file. | | Shift+R | Reset camera. | | O | Toggle visualization or accumulated error map. | diff --git a/dependencies/OpenXR-SDK b/dependencies/OpenXR-SDK new file mode 160000 index 0000000000000000000000000000000000000000..e2da9ce83a4388c9622da328bf48548471261290 --- /dev/null +++ b/dependencies/OpenXR-SDK @@ -0,0 +1 @@ +Subproject commit e2da9ce83a4388c9622da328bf48548471261290 diff --git a/include/neural-graphics-primitives/camera_path.h b/include/neural-graphics-primitives/camera_path.h index 8d3fe24792b182606950fba15df410bbb692816c..9d121757233a323295f0f722f2025df523b0dffe 100644 --- a/include/neural-graphics-primitives/camera_path.h +++ b/include/neural-graphics-primitives/camera_path.h @@ -132,8 +132,8 @@ struct CameraPath { #ifdef NGP_GUI ImGuizmo::MODE m_gizmo_mode = ImGuizmo::LOCAL; ImGuizmo::OPERATION m_gizmo_op = ImGuizmo::TRANSLATE; - bool imgui_viz(ImDrawList* list, Eigen::Matrix<float, 4, 4>& view2proj, Eigen::Matrix<float, 4, 4>& world2proj, Eigen::Matrix<float, 4, 4>& world2view, Eigen::Vector2f focal, float aspect); int imgui(char path_filename_buf[1024], float frame_milliseconds, Eigen::Matrix<float, 3, 4>& camera, float slice_plane_z, float scale, float fov, float aperture_size, float bounding_radius, const Eigen::Matrix<float, 3, 4>& first_xform, int glow_mode, float glow_y_cutoff); + bool imgui_viz(ImDrawList* list, Eigen::Matrix<float, 4, 4>& view2proj, Eigen::Matrix<float, 4, 4>& world2proj, Eigen::Matrix<float, 4, 4>& world2view, Eigen::Vector2f focal, float aspect, float znear, float zfar); #endif }; diff --git a/include/neural-graphics-primitives/common.h b/include/neural-graphics-primitives/common.h index 8d3f6068b8462f29fedaa4837f3596d5187d1993..3a632b4d2f51fa1189c3fe9cf1c53b3d3052359f 100644 --- a/include/neural-graphics-primitives/common.h +++ b/include/neural-graphics-primitives/common.h @@ -232,8 +232,13 @@ enum class ELensMode : int { FTheta, LatLong, OpenCVFisheye, + Equirectangular, }; -static constexpr const char* LensModeStr = "Perspective\0OpenCV\0F-Theta\0LatLong\0OpenCV Fisheye\0\0"; +static constexpr const char* LensModeStr = "Perspective\0OpenCV\0F-Theta\0LatLong\0OpenCV Fisheye\0Equirectangular\0\0"; + +inline bool supports_dlss(ELensMode mode) { + return mode == ELensMode::Perspective || mode == ELensMode::OpenCV || mode == ELensMode::OpenCVFisheye; +} struct Lens { ELensMode mode = ELensMode::Perspective; @@ -343,6 +348,47 @@ private: std::chrono::time_point<std::chrono::steady_clock> m_creation_time; }; +template <typename T> +struct Buffer2DView { + T* data = nullptr; + Eigen::Vector2i resolution = Eigen::Vector2i::Zero(); + + // Lookup via integer pixel position (no bounds checking) + NGP_HOST_DEVICE T at(const Eigen::Vector2i& xy) const { + return data[xy.x() + xy.y() * resolution.x()]; + } + + // Lookup via UV coordinates in [0,1]^2 + NGP_HOST_DEVICE T at(const Eigen::Vector2f& uv) const { + Eigen::Vector2i xy = resolution.cast<float>().cwiseProduct(uv).cast<int>().cwiseMax(0).cwiseMin(resolution - Eigen::Vector2i::Ones()); + return at(xy); + } + + // Lookup via UV coordinates in [0,1]^2 and LERP the nearest texels + NGP_HOST_DEVICE T at_lerp(const Eigen::Vector2f& uv) const { + const Eigen::Vector2f xy_float = resolution.cast<float>().cwiseProduct(uv); + const Eigen::Vector2i xy = xy_float.cast<int>(); + + const Eigen::Vector2f weight = xy_float - xy.cast<float>(); + + auto read_val = [&](Eigen::Vector2i pos) { + pos = pos.cwiseMax(0).cwiseMin(resolution - Eigen::Vector2i::Ones()); + return at(pos); + }; + + return ( + (1 - weight.x()) * (1 - weight.y()) * read_val({xy.x(), xy.y()}) + + (weight.x()) * (1 - weight.y()) * read_val({xy.x()+1, xy.y()}) + + (1 - weight.x()) * (weight.y()) * read_val({xy.x(), xy.y()+1}) + + (weight.x()) * (weight.y()) * read_val({xy.x()+1, xy.y()+1}) + ); + } + + NGP_HOST_DEVICE operator bool() const { + return data; + } +}; + uint8_t* load_stbi(const fs::path& path, int* width, int* height, int* comp, int req_comp); float* load_stbi_float(const fs::path& path, int* width, int* height, int* comp, int req_comp); uint16_t* load_stbi_16(const fs::path& path, int* width, int* height, int* comp, int req_comp); diff --git a/include/neural-graphics-primitives/common_device.cuh b/include/neural-graphics-primitives/common_device.cuh index 389fc3bdab7aff5925f14c5cfe88d8aebbd94e39..361c62bafd81b58ce3eabcbe79f02b472f7767d1 100644 --- a/include/neural-graphics-primitives/common_device.cuh +++ b/include/neural-graphics-primitives/common_device.cuh @@ -28,6 +28,52 @@ NGP_NAMESPACE_BEGIN using precision_t = tcnn::network_precision_t; +// The maximum depth that can be produced when rendering a frame. +// Chosen somewhat low (rather than std::numeric_limits<float>::infinity()) +// to permit numerically stable reprojection and DLSS operation, +// even when rendering the infinitely distant horizon. +inline constexpr __device__ float MAX_DEPTH() { return 16384.0f; } + +template <typename T> +class Buffer2D { +public: + Buffer2D() = default; + Buffer2D(const Eigen::Vector2i& resolution) { + resize(resolution); + } + + T* data() const { + return m_data.data(); + } + + size_t bytes() const { + return m_data.bytes(); + } + + void resize(const Eigen::Vector2i& resolution) { + m_data.resize(resolution.prod()); + m_resolution = resolution; + } + + const Eigen::Vector2i& resolution() const { + return m_resolution; + } + + Buffer2DView<T> view() const { + // Row major for now. + return {data(), m_resolution}; + } + + Buffer2DView<const T> const_view() const { + // Row major for now. + return {data(), m_resolution}; + } + +private: + tcnn::GPUMemory<T> m_data; + Eigen::Vector2i m_resolution; +}; + inline NGP_HOST_DEVICE float srgb_to_linear(float srgb) { if (srgb <= 0.04045f) { return srgb / 12.92f; @@ -76,42 +122,9 @@ inline NGP_HOST_DEVICE Eigen::Array3f linear_to_srgb_derivative(const Eigen::Arr return {linear_to_srgb_derivative(x.x()), linear_to_srgb_derivative(x.y()), (linear_to_srgb_derivative(x.z()))}; } -template <uint32_t N_DIMS, typename T> -NGP_HOST_DEVICE Eigen::Matrix<float, N_DIMS, 1> read_image(const T* __restrict__ data, const Eigen::Vector2i& resolution, const Eigen::Vector2f& pos) { - const Eigen::Vector2f pos_float = Eigen::Vector2f{pos.x() * (float)(resolution.x()-1), pos.y() * (float)(resolution.y()-1)}; - const Eigen::Vector2i texel = pos_float.cast<int>(); - - const Eigen::Vector2f weight = pos_float - texel.cast<float>(); - - auto read_val = [&](Eigen::Vector2i pos) { - pos.x() = std::max(std::min(pos.x(), resolution.x()-1), 0); - pos.y() = std::max(std::min(pos.y(), resolution.y()-1), 0); - - Eigen::Matrix<float, N_DIMS, 1> result; - if (std::is_same<T, float>::value) { - result = *(Eigen::Matrix<T, N_DIMS, 1>*)&data[(pos.x() + pos.y() * resolution.x()) * N_DIMS]; - } else { - auto val = *(tcnn::vector_t<T, N_DIMS>*)&data[(pos.x() + pos.y() * resolution.x()) * N_DIMS]; - - NGP_PRAGMA_UNROLL - for (uint32_t i = 0; i < N_DIMS; ++i) { - result[i] = (float)val[i]; - } - } - return result; - }; - - return ( - (1 - weight.x()) * (1 - weight.y()) * read_val({texel.x(), texel.y()}) + - (weight.x()) * (1 - weight.y()) * read_val({texel.x()+1, texel.y()}) + - (1 - weight.x()) * (weight.y()) * read_val({texel.x(), texel.y()+1}) + - (weight.x()) * (weight.y()) * read_val({texel.x()+1, texel.y()+1}) - ); -} - template <uint32_t N_DIMS, typename T> __device__ void deposit_image_gradient(const Eigen::Matrix<float, N_DIMS, 1>& value, T* __restrict__ gradient, T* __restrict__ gradient_weight, const Eigen::Vector2i& resolution, const Eigen::Vector2f& pos) { - const Eigen::Vector2f pos_float = Eigen::Vector2f{pos.x() * (resolution.x()-1), pos.y() * (resolution.y()-1)}; + const Eigen::Vector2f pos_float = resolution.cast<float>().cwiseProduct(pos); const Eigen::Vector2i texel = pos_float.cast<int>(); const Eigen::Vector2f weight = pos_float - texel.cast<float>(); @@ -142,6 +155,138 @@ __device__ void deposit_image_gradient(const Eigen::Matrix<float, N_DIMS, 1>& va deposit_val(value, (weight.x()) * (weight.y()), {texel.x()+1, texel.y()+1}); } +struct FoveationPiecewiseQuadratic { + NGP_HOST_DEVICE FoveationPiecewiseQuadratic() = default; + + FoveationPiecewiseQuadratic(float center_pixel_steepness, float center_inverse_piecewise_y, float center_radius) { + float center_inverse_radius = center_radius * center_pixel_steepness; + float left_inverse_piecewise_switch = center_inverse_piecewise_y - center_inverse_radius; + float right_inverse_piecewise_switch = center_inverse_piecewise_y + center_inverse_radius; + + if (left_inverse_piecewise_switch < 0) { + left_inverse_piecewise_switch = 0.0f; + } + + if (right_inverse_piecewise_switch > 1) { + right_inverse_piecewise_switch = 1.0f; + } + + float am = center_pixel_steepness; + float d = (right_inverse_piecewise_switch - left_inverse_piecewise_switch) / center_pixel_steepness / 2; + + // binary search for l,r,bm since analytical is very complex + float bm; + float m_min = 0.0f; + float m_max = 1.0f; + for (uint32_t i = 0; i < 20; i++) { + float m = (m_min + m_max) / 2.0f; + float l = m - d; + float r = m + d; + + bm = -((am - 1) * l * l) / (r * r - 2 * r + l * l + 1); + + float l_actual = (left_inverse_piecewise_switch - bm) / am; + float r_actual = (right_inverse_piecewise_switch - bm) / am; + float m_actual = (l_actual + r_actual) / 2; + + if (m_actual > m) { + m_min = m; + } else { + m_max = m; + } + } + + float l = (left_inverse_piecewise_switch - bm) / am; + float r = (right_inverse_piecewise_switch - bm) / am; + + // Full linear case. Default construction covers this. + if ((l == 0.0f && r == 1.0f) || (am == 1.0f)) { + return; + } + + // write out solution + switch_left = l; + switch_right = r; + this->am = am; + al = (am - 1) / (r * r - 2 * r + l * l + 1); + bl = (am * (r * r - 2 * r + 1) + am * l * l + (2 - 2 * am) * l) / (r * r - 2 * r + l * l + 1); + cl = 0; + this->bm = bm = -((am - 1) * l * l) / (r * r - 2 * r + l * l + 1); + ar = -(am - 1) / (r * r - 2 * r + l * l + 1); + br = (am * (r * r + 1) - 2 * r + am * l * l) / (r * r - 2 * r + l * l + 1); + cr = -(am * r * r - r * r + (am - 1) * l * l) / (r * r - 2 * r + l * l + 1); + + inv_switch_left = am * switch_left + bm; + inv_switch_right = am * switch_right + bm; + } + + // left parabola: al * x^2 + bl * x + cl + float al = 0.0f, bl = 0.0f, cl = 0.0f; + // middle linear piece: am * x + bm. am should give 1:1 pixel mapping between warped size and full size. + float am = 1.0f, bm = 0.0f; + // right parabola: al * x^2 + bl * x + cl + float ar = 0.0f, br = 0.0f, cr = 0.0f; + + // points where left and right switch over from quadratic to linear + float switch_left = 0.0f, switch_right = 1.0f; + // same, in inverted space + float inv_switch_left = 0.0f, inv_switch_right = 1.0f; + + NGP_HOST_DEVICE float warp(float x) const { + x = tcnn::clamp(x, 0.0f, 1.0f); + if (x < switch_left) { + return al * x * x + bl * x + cl; + } else if (x > switch_right) { + return ar * x * x + br * x + cr; + } else { + return am * x + bm; + } + } + + NGP_HOST_DEVICE float unwarp(float y) const { + y = tcnn::clamp(y, 0.0f, 1.0f); + if (y < inv_switch_left) { + return (std::sqrt(-4 * al * cl + 4 * al * y + bl * bl) - bl) / (2 * al); + } else if (y > inv_switch_right) { + return (std::sqrt(-4 * ar * cr + 4 * ar * y + br * br) - br) / (2 * ar); + } else { + return (y - bm) / am; + } + } + + NGP_HOST_DEVICE float density(float x) const { + x = tcnn::clamp(x, 0.0f, 1.0f); + if (x < switch_left) { + return 2 * al * x + bl; + } else if (x > switch_right) { + return 2 * ar * x + br; + } else { + return am; + } + } +}; + +struct Foveation { + NGP_HOST_DEVICE Foveation() = default; + + Foveation(const Eigen::Vector2f& center_pixel_steepness, const Eigen::Vector2f& center_inverse_piecewise_y, const Eigen::Vector2f& center_radius) + : warp_x{center_pixel_steepness.x(), center_inverse_piecewise_y.x(), center_radius.x()}, warp_y{center_pixel_steepness.y(), center_inverse_piecewise_y.y(), center_radius.y()} {} + + FoveationPiecewiseQuadratic warp_x, warp_y; + + NGP_HOST_DEVICE Eigen::Vector2f warp(const Eigen::Vector2f& x) const { + return {warp_x.warp(x.x()), warp_y.warp(x.y())}; + } + + NGP_HOST_DEVICE Eigen::Vector2f unwarp(const Eigen::Vector2f& y) const { + return {warp_x.unwarp(y.x()), warp_y.unwarp(y.y())}; + } + + NGP_HOST_DEVICE float density(const Eigen::Vector2f& x) const { + return warp_x.density(x.x()) * warp_y.density(x.y()); + } +}; + template <typename T> NGP_HOST_DEVICE inline void opencv_lens_distortion_delta(const T* extra_params, const T u, const T v, T* du, T* dv) { const T k1 = extra_params[0]; @@ -292,37 +437,53 @@ inline NGP_HOST_DEVICE Eigen::Vector3f latlong_to_dir(const Eigen::Vector2f& uv) return {sp * ct, st, cp * ct}; } -inline NGP_HOST_DEVICE Ray pixel_to_ray( +inline NGP_HOST_DEVICE Eigen::Vector3f equirectangular_to_dir(const Eigen::Vector2f& uv) { + float ct = (uv.y() - 0.5f) * 2.0f; + float st = std::sqrt(std::max(1.0f - ct * ct, 0.0f)); + float phi = (uv.x() - 0.5f) * PI() * 2.0f; + float sp, cp; + sincosf(phi, &sp, &cp); + return {sp * st, ct, cp * st}; +} + +inline NGP_HOST_DEVICE Ray uv_to_ray( uint32_t spp, - const Eigen::Vector2i& pixel, + const Eigen::Vector2f& uv, const Eigen::Vector2i& resolution, const Eigen::Vector2f& focal_length, const Eigen::Matrix<float, 3, 4>& camera_matrix, const Eigen::Vector2f& screen_center, - const Eigen::Vector3f& parallax_shift, - bool snap_to_pixel_centers = false, + const Eigen::Vector3f& parallax_shift = Eigen::Vector3f::Zero(), float near_distance = 0.0f, float focus_z = 1.0f, float aperture_size = 0.0f, + const Foveation& foveation = {}, + Buffer2DView<const uint8_t> hidden_area_mask = {}, const Lens& lens = {}, - const float* __restrict__ distortion_grid = nullptr, - const Eigen::Vector2i distortion_grid_resolution = Eigen::Vector2i::Zero() + Buffer2DView<const Eigen::Vector2f> distortion = {} ) { - Eigen::Vector2f offset = ld_random_pixel_offset(snap_to_pixel_centers ? 0 : spp); - Eigen::Vector2f uv = (pixel.cast<float>() + offset).cwiseQuotient(resolution.cast<float>()); + Eigen::Vector2f warped_uv = foveation.warp(uv); + + // Check the hidden area mask _after_ applying foveation, because foveation will be undone + // before blitting to the framebuffer to which the hidden area mask corresponds. + if (hidden_area_mask && !hidden_area_mask.at(warped_uv)) { + return Ray::invalid(); + } Eigen::Vector3f dir; if (lens.mode == ELensMode::FTheta) { - dir = f_theta_undistortion(uv - screen_center, lens.params, {1000.f, 0.f, 0.f}); - if (dir.x() == 1000.f) { - return {{1000.f, 0.f, 0.f}, {0.f, 0.f, 1.f}}; // return a point outside the aabb so the pixel is not rendered + dir = f_theta_undistortion(warped_uv - screen_center, lens.params, {0.f, 0.f, 0.f}); + if (dir == Eigen::Vector3f::Zero()) { + return Ray::invalid(); } } else if (lens.mode == ELensMode::LatLong) { - dir = latlong_to_dir(uv); + dir = latlong_to_dir(warped_uv); + } else if (lens.mode == ELensMode::Equirectangular) { + dir = equirectangular_to_dir(warped_uv); } else { dir = { - (uv.x() - screen_center.x()) * (float)resolution.x() / focal_length.x(), - (uv.y() - screen_center.y()) * (float)resolution.y() / focal_length.y(), + (warped_uv.x() - screen_center.x()) * (float)resolution.x() / focal_length.x(), + (warped_uv.y() - screen_center.y()) * (float)resolution.y() / focal_length.y(), 1.0f }; @@ -332,8 +493,9 @@ inline NGP_HOST_DEVICE Ray pixel_to_ray( iterative_opencv_fisheye_lens_undistortion(lens.params, &dir.x(), &dir.y()); } } - if (distortion_grid) { - dir.head<2>() += read_image<2>(distortion_grid, distortion_grid_resolution, uv); + + if (distortion) { + dir.head<2>() += distortion.at_lerp(warped_uv); } Eigen::Vector3f head_pos = {parallax_shift.x(), parallax_shift.y(), 0.f}; @@ -341,26 +503,60 @@ inline NGP_HOST_DEVICE Ray pixel_to_ray( dir = camera_matrix.block<3, 3>(0, 0) * dir; Eigen::Vector3f origin = camera_matrix.block<3, 3>(0, 0) * head_pos + camera_matrix.col(3); - - if (aperture_size > 0.0f) { + if (aperture_size != 0.0f) { Eigen::Vector3f lookat = origin + dir * focus_z; - Eigen::Vector2f blur = aperture_size * square2disk_shirley(ld_random_val_2d(spp, (uint32_t)pixel.x() * 19349663 + (uint32_t)pixel.y() * 96925573) * 2.0f - Eigen::Vector2f::Ones()); + Eigen::Vector2f blur = aperture_size * square2disk_shirley(ld_random_val_2d(spp, uv.cwiseProduct(resolution.cast<float>()).cast<int>().dot(Eigen::Vector2i{19349663, 96925573})) * 2.0f - Eigen::Vector2f::Ones()); origin += camera_matrix.block<3, 2>(0, 0) * blur; dir = (lookat - origin) / focus_z; } - - origin += dir * near_distance; + origin += dir * near_distance; return {origin, dir}; } -inline NGP_HOST_DEVICE Eigen::Vector2f pos_to_pixel( +inline NGP_HOST_DEVICE Ray pixel_to_ray( + uint32_t spp, + const Eigen::Vector2i& pixel, + const Eigen::Vector2i& resolution, + const Eigen::Vector2f& focal_length, + const Eigen::Matrix<float, 3, 4>& camera_matrix, + const Eigen::Vector2f& screen_center, + const Eigen::Vector3f& parallax_shift = Eigen::Vector3f::Zero(), + bool snap_to_pixel_centers = false, + float near_distance = 0.0f, + float focus_z = 1.0f, + float aperture_size = 0.0f, + const Foveation& foveation = {}, + Buffer2DView<const uint8_t> hidden_area_mask = {}, + const Lens& lens = {}, + Buffer2DView<const Eigen::Vector2f> distortion = {} +) { + return uv_to_ray( + spp, + (pixel.cast<float>() + ld_random_pixel_offset(snap_to_pixel_centers ? 0 : spp)).cwiseQuotient(resolution.cast<float>()), + resolution, + focal_length, + camera_matrix, + screen_center, + parallax_shift, + near_distance, + focus_z, + aperture_size, + foveation, + hidden_area_mask, + lens, + distortion + ); +} + +inline NGP_HOST_DEVICE Eigen::Vector2f pos_to_uv( const Eigen::Vector3f& pos, const Eigen::Vector2i& resolution, const Eigen::Vector2f& focal_length, const Eigen::Matrix<float, 3, 4>& camera_matrix, const Eigen::Vector2f& screen_center, const Eigen::Vector3f& parallax_shift, + const Foveation& foveation = {}, const Lens& lens = {} ) { // Express ray in terms of camera frame @@ -386,10 +582,23 @@ inline NGP_HOST_DEVICE Eigen::Vector2f pos_to_pixel( dir.y() += dv; Eigen::Vector2f uv = Eigen::Vector2f{dir.x(), dir.y()}.cwiseProduct(focal_length).cwiseQuotient(resolution.cast<float>()) + screen_center; - return uv.cwiseProduct(resolution.cast<float>()); + return foveation.unwarp(uv); } -inline NGP_HOST_DEVICE Eigen::Vector2f motion_vector_3d( +inline NGP_HOST_DEVICE Eigen::Vector2f pos_to_pixel( + const Eigen::Vector3f& pos, + const Eigen::Vector2i& resolution, + const Eigen::Vector2f& focal_length, + const Eigen::Matrix<float, 3, 4>& camera_matrix, + const Eigen::Vector2f& screen_center, + const Eigen::Vector3f& parallax_shift, + const Foveation& foveation = {}, + const Lens& lens = {} +) { + return pos_to_uv(pos, resolution, focal_length, camera_matrix, screen_center, parallax_shift, foveation, lens).cwiseProduct(resolution.cast<float>()); +} + +inline NGP_HOST_DEVICE Eigen::Vector2f motion_vector( const uint32_t sample_index, const Eigen::Vector2i& pixel, const Eigen::Vector2i& resolution, @@ -400,6 +609,8 @@ inline NGP_HOST_DEVICE Eigen::Vector2f motion_vector_3d( const Eigen::Vector3f& parallax_shift, const bool snap_to_pixel_centers, const float depth, + const Foveation& foveation = {}, + const Foveation& prev_foveation = {}, const Lens& lens = {} ) { Ray ray = pixel_to_ray( @@ -414,98 +625,39 @@ inline NGP_HOST_DEVICE Eigen::Vector2f motion_vector_3d( 0.0f, 1.0f, 0.0f, - lens, - nullptr, - Eigen::Vector2i::Zero() + foveation, + {}, // No hidden area mask + lens ); Eigen::Vector2f prev_pixel = pos_to_pixel( - ray.o + ray.d * depth, + ray(depth), resolution, focal_length, prev_camera, screen_center, parallax_shift, + prev_foveation, lens ); return prev_pixel - (pixel.cast<float>() + ld_random_pixel_offset(sample_index)); } -inline NGP_HOST_DEVICE Eigen::Vector2f pixel_to_image_uv( - const uint32_t sample_index, - const Eigen::Vector2i& pixel, - const Eigen::Vector2i& resolution, - const Eigen::Vector2i& image_resolution, - const Eigen::Vector2f& screen_center, - const float view_dist, - const Eigen::Vector2f& image_pos, - const bool snap_to_pixel_centers -) { - Eigen::Vector2f jit = ld_random_pixel_offset(snap_to_pixel_centers ? 0 : sample_index); - Eigen::Vector2f offset = screen_center.cwiseProduct(resolution.cast<float>()) + jit; - - float y_scale = view_dist; - float x_scale = y_scale * resolution.x() / resolution.y(); - - return { - ((x_scale * (pixel.x() + offset.x())) / resolution.x() - view_dist * image_pos.x()) / image_resolution.x() * image_resolution.y(), - (y_scale * (pixel.y() + offset.y())) / resolution.y() - view_dist * image_pos.y() - }; -} - -inline NGP_HOST_DEVICE Eigen::Vector2f image_uv_to_pixel( - const Eigen::Vector2f& uv, - const Eigen::Vector2i& resolution, - const Eigen::Vector2i& image_resolution, - const Eigen::Vector2f& screen_center, - const float view_dist, - const Eigen::Vector2f& image_pos -) { - Eigen::Vector2f offset = screen_center.cwiseProduct(resolution.cast<float>()); - - float y_scale = view_dist; - float x_scale = y_scale * resolution.x() / resolution.y(); +// Maps view-space depth (physical units) in the range [znear, zfar] hyperbolically to +// the interval [1, 0]. This is the reverse-z-component of "normalized device coordinates", +// which are commonly used in rasterization, where linear interpolation in screen space +// has to be equivalent to linear interpolation in real space (which, in turn, is +// guaranteed by the hyperbolic mapping of depth). This format is commonly found in +// z-buffers, and hence expected by downstream image processing functions, such as DLSS +// and VR reprojection. +inline NGP_HOST_DEVICE float to_ndc_depth(float z, float n, float f) { + // View depth outside of the view frustum leads to output outside of [0, 1] + z = tcnn::clamp(z, n, f); - return { - ((uv.x() / image_resolution.y() * image_resolution.x()) + view_dist * image_pos.x()) * resolution.x() / x_scale - offset.x(), - (uv.y() + view_dist * image_pos.y()) * resolution.y() / y_scale - offset.y() - }; -} - -inline NGP_HOST_DEVICE Eigen::Vector2f motion_vector_2d( - const uint32_t sample_index, - const Eigen::Vector2i& pixel, - const Eigen::Vector2i& resolution, - const Eigen::Vector2i& image_resolution, - const Eigen::Vector2f& screen_center, - const float view_dist, - const float prev_view_dist, - const Eigen::Vector2f& image_pos, - const Eigen::Vector2f& prev_image_pos, - const bool snap_to_pixel_centers -) { - Eigen::Vector2f uv = pixel_to_image_uv( - sample_index, - pixel, - resolution, - image_resolution, - screen_center, - view_dist, - image_pos, - snap_to_pixel_centers - ); - - Eigen::Vector2f prev_pixel = image_uv_to_pixel( - uv, - resolution, - image_resolution, - screen_center, - prev_view_dist, - prev_image_pos - ); - - return prev_pixel - (pixel.cast<float>() + ld_random_pixel_offset(sample_index)); + float scale = n / (n - f); + float bias = -f * scale; + return tcnn::clamp((z * scale + bias) / z, 0.0f, 1.0f); } inline NGP_HOST_DEVICE float fov_to_focal_length(int resolution, float degrees) { @@ -587,7 +739,8 @@ inline NGP_HOST_DEVICE void apply_quilting(uint32_t* x, uint32_t* y, const Eigen if (quilting_dims == Eigen::Vector2i{2, 1}) { // Likely VR: parallax_shift.x() is the IPD in this case. The following code centers the camera matrix between both eyes. - parallax_shift.x() = idx ? (-0.5f * parallax_shift.x()) : (0.5f * parallax_shift.x()); + // idx == 0 -> left eye -> -1/2 x + parallax_shift.x() = (idx == 0) ? (-0.5f * parallax_shift.x()) : (0.5f * parallax_shift.x()); } else { // Likely HoloPlay lenticular display: in this case, `parallax_shift.z()` is the inverse height of the head above the display. // The following code computes the x-offset of views as a function of this. diff --git a/include/neural-graphics-primitives/dlss.h b/include/neural-graphics-primitives/dlss.h index dbe86fccca1cd0d911535b340e8c2a05bd406a5b..49d16f95925bd0b032c44440431d7ee72a433ef3 100644 --- a/include/neural-graphics-primitives/dlss.h +++ b/include/neural-graphics-primitives/dlss.h @@ -54,16 +54,16 @@ public: virtual EDlssQuality quality() const = 0; }; -#ifdef NGP_VULKAN -std::shared_ptr<IDlss> dlss_init(const Eigen::Vector2i& out_resolution); +class IDlssProvider { +public: + virtual ~IDlssProvider() {} -void vulkan_and_ngx_init(); -size_t dlss_allocated_bytes(); -void vulkan_and_ngx_destroy(); -#else -inline size_t dlss_allocated_bytes() { - return 0; -} + virtual size_t allocated_bytes() const = 0; + virtual std::unique_ptr<IDlss> init_dlss(const Eigen::Vector2i& out_resolution) = 0; +}; + +#ifdef NGP_VULKAN +std::shared_ptr<IDlssProvider> init_vulkan_and_ngx(); #endif NGP_NAMESPACE_END diff --git a/include/neural-graphics-primitives/envmap.cuh b/include/neural-graphics-primitives/envmap.cuh index 6960800719147f13170d8347a6493c83064291cc..7ba6698fec85609c9ef981a5c7a305ace0399c77 100644 --- a/include/neural-graphics-primitives/envmap.cuh +++ b/include/neural-graphics-primitives/envmap.cuh @@ -26,31 +26,22 @@ NGP_NAMESPACE_BEGIN -template <typename T> -__device__ Eigen::Array4f read_envmap(const T* __restrict__ envmap_data, const Eigen::Vector2i envmap_resolution, const Eigen::Vector3f& dir) { +inline __device__ Eigen::Array4f read_envmap(const Buffer2DView<const Eigen::Array4f>& envmap, const Eigen::Vector3f& dir) { auto dir_cyl = dir_to_spherical_unorm({dir.z(), -dir.x(), dir.y()}); - auto envmap_float = Eigen::Vector2f{dir_cyl.y() * (envmap_resolution.x()-1), dir_cyl.x() * (envmap_resolution.y()-1)}; + auto envmap_float = Eigen::Vector2f{dir_cyl.y() * (envmap.resolution.x()-1), dir_cyl.x() * (envmap.resolution.y()-1)}; Eigen::Vector2i envmap_texel = envmap_float.cast<int>(); auto weight = envmap_float - envmap_texel.cast<float>(); auto read_val = [&](Eigen::Vector2i pos) { if (pos.x() < 0) { - pos.x() += envmap_resolution.x(); - } else if (pos.x() >= envmap_resolution.x()) { - pos.x() -= envmap_resolution.x(); - } - pos.y() = std::max(std::min(pos.y(), envmap_resolution.y()-1), 0); - - Eigen::Array4f result; - if (std::is_same<T, float>::value) { - result = *(Eigen::Array4f*)&envmap_data[(pos.x() + pos.y() * envmap_resolution.x()) * 4]; - } else { - auto val = *(tcnn::vector_t<T, 4>*)&envmap_data[(pos.x() + pos.y() * envmap_resolution.x()) * 4]; - result = {(float)val[0], (float)val[1], (float)val[2], (float)val[3]}; + pos.x() += envmap.resolution.x(); + } else if (pos.x() >= envmap.resolution.x()) { + pos.x() -= envmap.resolution.x(); } - return result; + pos.y() = std::max(std::min(pos.y(), envmap.resolution.y()-1), 0); + return envmap.at(pos); }; auto result = ( diff --git a/include/neural-graphics-primitives/openxr_hmd.h b/include/neural-graphics-primitives/openxr_hmd.h new file mode 100644 index 0000000000000000000000000000000000000000..ee442ef9aa5f07abeae2c2f78a06f3d376c59916 --- /dev/null +++ b/include/neural-graphics-primitives/openxr_hmd.h @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ + +/** @file openxr_hmd.h + * @author Thomas Müller & Ingo Esser & Robert Menzel, NVIDIA + * @brief Wrapper around the OpenXR API, providing access to + * per-eye framebuffers, lens parameters, visible area, + * view, hand, and eye poses, as well as controller inputs. + */ + +#pragma once + +#ifdef _WIN32 +# include <GL/gl3w.h> +#else +# include <GL/glew.h> +#endif + +#define XR_USE_GRAPHICS_API_OPENGL + +#include <neural-graphics-primitives/common_device.cuh> + +#include <openxr/openxr.h> +#include <xr_linear.h> +#include <xr_dependencies.h> +#include <openxr/openxr_platform.h> + +#include <Eigen/Dense> + +#include <tiny-cuda-nn/gpu_memory.h> + +#include <array> +#include <memory> +#include <vector> + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" //TODO: XR struct are uninitiaized apart from their type +#endif + +NGP_NAMESPACE_BEGIN + +class OpenXRHMD { +public: + enum class ControlFlow { + CONTINUE, + RESTART, + QUIT, + }; + + struct FrameInfo { + struct View { + GLuint framebuffer; + XrCompositionLayerProjectionView view{XR_TYPE_COMPOSITION_LAYER_PROJECTION_VIEW}; + XrCompositionLayerDepthInfoKHR depth_info{XR_TYPE_COMPOSITION_LAYER_DEPTH_INFO_KHR}; + std::shared_ptr<Buffer2D<uint8_t>> hidden_area_mask = nullptr; + Eigen::Matrix<float, 3, 4> pose; + }; + struct Hand { + Eigen::Matrix<float, 3, 4> pose; + bool pose_active = false; + Eigen::Vector2f thumbstick = Eigen::Vector2f::Zero(); + float grab_strength = 0.0f; + bool grabbing = false; + bool pressing = false; + Eigen::Vector3f grab_pos; + Eigen::Vector3f prev_grab_pos; + Eigen::Vector3f drag() const { + return grab_pos - prev_grab_pos; + } + }; + std::vector<View> views; + Hand hands[2]; + }; + using FrameInfoPtr = std::shared_ptr<FrameInfo>; + + // RAII OpenXRHMD with OpenGL +#if defined(XR_USE_PLATFORM_WIN32) + OpenXRHMD(HDC hdc, HGLRC hglrc); +#elif defined(XR_USE_PLATFORM_XLIB) + OpenXRHMD(Display* xDisplay, uint32_t visualid, GLXFBConfig glxFBConfig, GLXDrawable glxDrawable, GLXContext glxContext); +#elif defined(XR_USE_PLATFORM_WAYLAND) + OpenXRHMD(wl_display* display); +#endif + + virtual ~OpenXRHMD(); + + // disallow copy / move + OpenXRHMD(const OpenXRHMD&) = delete; + OpenXRHMD& operator=(const OpenXRHMD&) = delete; + OpenXRHMD(OpenXRHMD&&) = delete; + OpenXRHMD& operator=(OpenXRHMD&&) = delete; + + void clear(); + + // poll events, handle state changes, return control flow information + ControlFlow poll_events(); + + // begin OpenXR frame, return views to render + FrameInfoPtr begin_frame(); + // must be called for each begin_frame + void end_frame(FrameInfoPtr frame_info, float znear, float zfar); + + // if true call begin_frame and end_frame - does not imply visibility + bool must_run_frame_loop() const { + return + m_session_state == XR_SESSION_STATE_READY || + m_session_state == XR_SESSION_STATE_SYNCHRONIZED || + m_session_state == XR_SESSION_STATE_VISIBLE || + m_session_state == XR_SESSION_STATE_FOCUSED; + } + + // if true, VR is being rendered to the HMD + bool is_visible() const { + // XR_SESSION_STATE_VISIBLE -> app content is shown in HMD + // XR_SESSION_STATE_FOCUSED -> VISIBLE + input is send to app + return m_session_state == XR_SESSION_STATE_VISIBLE || m_session_state == XR_SESSION_STATE_FOCUSED; + } + +private: + // steps of the init process, called from the constructor + void init_create_xr_instance(); + void init_get_xr_system(); + void init_configure_xr_views(); + void init_check_for_xr_blend_mode(); + void init_xr_actions(); + +#if defined(XR_USE_PLATFORM_WIN32) + void init_open_gl(HDC hdc, HGLRC hglrc); +#elif defined(XR_USE_PLATFORM_XLIB) + void init_open_gl(Display* xDisplay, uint32_t visualid, GLXFBConfig glxFBConfig, GLXDrawable glxDrawable, GLXContext glxContext); +#elif defined(XR_USE_PLATFORM_WAYLAND) + void init_open_gl(wl_display* display); +#endif + + void init_xr_session(); + void init_xr_spaces(); + void init_xr_swapchain_open_gl(); + void init_open_gl_shaders(); + + // session state change + void session_state_change(XrSessionState state, ControlFlow& flow); + + std::shared_ptr<Buffer2D<uint8_t>> rasterize_hidden_area_mask(uint32_t view_index, const XrCompositionLayerProjectionView& view); + // system/instance + XrInstance m_instance{XR_NULL_HANDLE}; + XrSystemId m_system_id = {}; + XrInstanceProperties m_instance_properties = {XR_TYPE_INSTANCE_PROPERTIES}; + XrSystemProperties m_system_properties = {XR_TYPE_SYSTEM_PROPERTIES}; + std::vector<XrApiLayerProperties> m_api_layer_properties; + std::vector<XrExtensionProperties> m_instance_extension_properties; + + // view and blending + XrViewConfigurationType m_view_configuration_type = {}; + XrViewConfigurationProperties m_view_configuration_properties = {XR_TYPE_VIEW_CONFIGURATION_PROPERTIES}; + std::vector<XrViewConfigurationView> m_view_configuration_views; + std::vector<XrEnvironmentBlendMode> m_environment_blend_modes; + XrEnvironmentBlendMode m_environment_blend_mode = {XR_ENVIRONMENT_BLEND_MODE_OPAQUE}; + + // actions + std::array<XrPath, 2> m_hand_paths; + std::array<XrSpace, 2> m_hand_spaces; + XrAction m_pose_action{XR_NULL_HANDLE}; + XrAction m_press_action{XR_NULL_HANDLE}; + XrAction m_grab_action{XR_NULL_HANDLE}; + + // Two separate actions for Xbox controller support + std::array<XrAction, 2> m_thumbstick_actions; + + XrActionSet m_action_set{XR_NULL_HANDLE}; + +#if defined(XR_USE_PLATFORM_WIN32) + XrGraphicsBindingOpenGLWin32KHR m_graphics_binding{XR_TYPE_GRAPHICS_BINDING_OPENGL_WIN32_KHR}; +#elif defined(XR_USE_PLATFORM_XLIB) + XrGraphicsBindingOpenGLXlibKHR m_graphics_binding{XR_TYPE_GRAPHICS_BINDING_OPENGL_XLIB_KHR}; +#elif defined(XR_USE_PLATFORM_WAYLAND) + XrGraphicsBindingOpenGLWaylandKHR m_graphics_binding{XR_TYPE_GRAPHICS_BINDING_OPENGL_WAYLAND_KHR}; +#endif + + XrSession m_session{XR_NULL_HANDLE}; + XrSessionState m_session_state{XR_SESSION_STATE_UNKNOWN}; + + // reference space + std::vector<XrReferenceSpaceType> m_reference_spaces; + XrSpace m_space{XR_NULL_HANDLE}; + XrExtent2Df m_bounds; + + // swap chains + struct Swapchain { + Swapchain(XrSwapchainCreateInfo& rgba_create_info, XrSwapchainCreateInfo& depth_create_info, XrSession& session, XrInstance& xr_instance); + Swapchain(const Swapchain&) = delete; + Swapchain& operator=(const Swapchain&) = delete; + Swapchain(Swapchain&& other) { + *this = std::move(other); + } + Swapchain& operator=(Swapchain&& other) { + std::swap(handle, other.handle); + std::swap(depth_handle, other.depth_handle); + std::swap(width, other.width); + std::swap(height, other.height); + images_gl = std::move(other.images_gl); + depth_images_gl = std::move(other.depth_images_gl); + framebuffers_gl = std::move(other.framebuffers_gl); + return *this; + } + virtual ~Swapchain(); + + void clear(); + + XrSwapchain handle{XR_NULL_HANDLE}; + XrSwapchain depth_handle{XR_NULL_HANDLE}; + + int32_t width = 0; + int32_t height = 0; + std::vector<XrSwapchainImageOpenGLKHR> images_gl; + std::vector<XrSwapchainImageOpenGLKHR> depth_images_gl; + std::vector<GLuint> framebuffers_gl; + }; + + int64_t m_swapchain_rgba_format = 0; + std::vector<Swapchain> m_swapchains; + + bool m_supports_composition_layer_depth = false; + int64_t m_swapchain_depth_format = 0; + + bool m_supports_hidden_area_mask = false; + std::vector<std::shared_ptr<Buffer2D<uint8_t>>> m_hidden_area_masks; + + bool m_supports_eye_tracking = false; + + // frame data + XrFrameState m_frame_state{XR_TYPE_FRAME_STATE}; + FrameInfoPtr m_previous_frame_info; + + GLuint m_hidden_area_mask_program = 0; + + // print more debug info during OpenXRs init: + const bool m_print_api_layers = false; + const bool m_print_extensions = false; + const bool m_print_system_properties = false; + const bool m_print_instance_properties = false; + const bool m_print_view_configuration_types = false; + const bool m_print_view_configuration_properties = false; + const bool m_print_view_configuration_view = false; + const bool m_print_environment_blend_modes = false; + const bool m_print_available_swapchain_formats = false; + const bool m_print_reference_spaces = false; +}; + +NGP_NAMESPACE_END + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif diff --git a/include/neural-graphics-primitives/random_val.cuh b/include/neural-graphics-primitives/random_val.cuh index 667c938dd4c37ddecda10b8939ec1cd27c51a96e..e436b18ba23500a909a1ef27fca01b62bb0999dc 100644 --- a/include/neural-graphics-primitives/random_val.cuh +++ b/include/neural-graphics-primitives/random_val.cuh @@ -61,11 +61,16 @@ inline __host__ __device__ Eigen::Vector2f dir_to_cylindrical(const Eigen::Vecto return {(cos_theta + 1.0f) / 2.0f, (phi / (2.0f * PI())) + 0.5f}; } -inline __host__ __device__ Eigen::Vector2f dir_to_spherical_unorm(const Eigen::Vector3f& d) { +inline __host__ __device__ Eigen::Vector2f dir_to_spherical(const Eigen::Vector3f& d) { const float cos_theta = fminf(fmaxf(d.z(), -1.0f), 1.0f); const float theta = acosf(cos_theta); float phi = std::atan2(d.y(), d.x()); - return {theta / PI(), (phi / (2.0f * PI()) + 0.5f)}; + return {theta, phi}; +} + +inline __host__ __device__ Eigen::Vector2f dir_to_spherical_unorm(const Eigen::Vector3f& d) { + Eigen::Vector2f spherical = dir_to_spherical(d); + return {spherical.x() / PI(), (spherical.y() / (2.0f * PI()) + 0.5f)}; } template <typename RNG> diff --git a/include/neural-graphics-primitives/render_buffer.h b/include/neural-graphics-primitives/render_buffer.h index 2e29f72a6fd896c815645fa81273bbf20e219ff2..0e51364f36f228209b125f6f507c41dc0e2f43fb 100644 --- a/include/neural-graphics-primitives/render_buffer.h +++ b/include/neural-graphics-primitives/render_buffer.h @@ -34,7 +34,7 @@ public: virtual cudaSurfaceObject_t surface() = 0; virtual cudaArray_t array() = 0; virtual Eigen::Vector2i resolution() const = 0; - virtual void resize(const Eigen::Vector2i&) = 0; + virtual void resize(const Eigen::Vector2i&, int n_channels = 4) = 0; }; class CudaSurface2D : public SurfaceProvider { @@ -50,7 +50,7 @@ public: void free(); - void resize(const Eigen::Vector2i& size) override; + void resize(const Eigen::Vector2i& size, int n_channels) override; cudaSurfaceObject_t surface() override { return m_surface; @@ -65,7 +65,8 @@ public: } private: - Eigen::Vector2i m_size = Eigen::Vector2i::Constant(0); + Eigen::Vector2i m_size = Eigen::Vector2i::Zero(); + int m_n_channels = 0; cudaArray_t m_array; cudaSurfaceObject_t m_surface; }; @@ -111,10 +112,10 @@ public: void load(const uint8_t* data, Eigen::Vector2i new_size, int n_channels); - void resize(const Eigen::Vector2i& new_size, int n_channels, bool is_8bit = false); + void resize(const Eigen::Vector2i& new_size, int n_channels, bool is_8bit); - void resize(const Eigen::Vector2i& new_size) override { - resize(new_size, 4); + void resize(const Eigen::Vector2i& new_size, int n_channels) override { + resize(new_size, n_channels, false); } Eigen::Vector2i resolution() const override { @@ -124,7 +125,7 @@ public: private: class CUDAMapping { public: - CUDAMapping(GLuint texture_id, const Eigen::Vector2i& size); + CUDAMapping(GLuint texture_id, const Eigen::Vector2i& size, int n_channels); ~CUDAMapping(); cudaSurfaceObject_t surface() const { return m_cuda_surface ? m_cuda_surface->surface() : m_surface; } @@ -141,6 +142,7 @@ private: cudaSurfaceObject_t m_surface = {}; Eigen::Vector2i m_size; + int m_n_channels; std::vector<float> m_data_cpu; std::unique_ptr<CudaSurface2D> m_cuda_surface; @@ -157,9 +159,20 @@ private: }; #endif //NGP_GUI +struct CudaRenderBufferView { + Eigen::Array4f* frame_buffer = nullptr; + float* depth_buffer = nullptr; + Eigen::Vector2i resolution = Eigen::Vector2i::Zero(); + uint32_t spp = 0; + + std::shared_ptr<Buffer2D<uint8_t>> hidden_area_mask = nullptr; + + void clear(cudaStream_t stream) const; +}; + class CudaRenderBuffer { public: - CudaRenderBuffer(const std::shared_ptr<SurfaceProvider>& surf) : m_surface_provider{surf} {} + CudaRenderBuffer(const std::shared_ptr<SurfaceProvider>& rgba, const std::shared_ptr<SurfaceProvider>& depth = nullptr) : m_rgba_target{rgba}, m_depth_target{depth} {} CudaRenderBuffer(const CudaRenderBuffer& other) = delete; CudaRenderBuffer& operator=(const CudaRenderBuffer& other) = delete; @@ -167,7 +180,7 @@ public: CudaRenderBuffer& operator=(CudaRenderBuffer&& other) = default; cudaSurfaceObject_t surface() { - return m_surface_provider->surface(); + return m_rgba_target->surface(); } Eigen::Vector2i in_resolution() const { @@ -175,7 +188,7 @@ public: } Eigen::Vector2i out_resolution() const { - return m_surface_provider->resolution(); + return m_rgba_target->resolution(); } void resize(const Eigen::Vector2i& res); @@ -204,11 +217,21 @@ public: return m_accumulate_buffer.data(); } + CudaRenderBufferView view() const { + return { + frame_buffer(), + depth_buffer(), + in_resolution(), + spp(), + hidden_area_mask(), + }; + } + void clear_frame(cudaStream_t stream); void accumulate(float exposure, cudaStream_t stream); - void tonemap(float exposure, const Eigen::Array4f& background_color, EColorSpace output_color_space, cudaStream_t stream); + void tonemap(float exposure, const Eigen::Array4f& background_color, EColorSpace output_color_space, float znear, float zfar, cudaStream_t stream); void overlay_image( float alpha, @@ -238,7 +261,7 @@ public: void overlay_false_color(Eigen::Vector2i training_resolution, bool to_srgb, int fov_axis, cudaStream_t stream, const float *error_map, Eigen::Vector2i error_map_resolution, const float *average, float brightness, bool viridis); SurfaceProvider& surface_provider() { - return *m_surface_provider; + return *m_rgba_target; } void set_color_space(EColorSpace color_space) { @@ -255,22 +278,30 @@ public: } } - void enable_dlss(const Eigen::Vector2i& max_out_res); + void enable_dlss(IDlssProvider& dlss_provider, const Eigen::Vector2i& max_out_res); void disable_dlss(); void set_dlss_sharpening(float value) { m_dlss_sharpening = value; } - const std::shared_ptr<IDlss>& dlss() const { + const std::unique_ptr<IDlss>& dlss() const { return m_dlss; } + void set_hidden_area_mask(const std::shared_ptr<Buffer2D<uint8_t>>& hidden_area_mask) { + m_hidden_area_mask = hidden_area_mask; + } + + const std::shared_ptr<Buffer2D<uint8_t>>& hidden_area_mask() const { + return m_hidden_area_mask; + } + private: uint32_t m_spp = 0; EColorSpace m_color_space = EColorSpace::Linear; ETonemapCurve m_tonemap_curve = ETonemapCurve::Identity; - std::shared_ptr<IDlss> m_dlss; + std::unique_ptr<IDlss> m_dlss; float m_dlss_sharpening = 0.0f; Eigen::Vector2i m_in_resolution = Eigen::Vector2i::Zero(); @@ -279,7 +310,10 @@ private: tcnn::GPUMemory<float> m_depth_buffer; tcnn::GPUMemory<Eigen::Array4f> m_accumulate_buffer; - std::shared_ptr<SurfaceProvider> m_surface_provider; + std::shared_ptr<Buffer2D<uint8_t>> m_hidden_area_mask = nullptr; + + std::shared_ptr<SurfaceProvider> m_rgba_target; + std::shared_ptr<SurfaceProvider> m_depth_target; }; NGP_NAMESPACE_END diff --git a/include/neural-graphics-primitives/testbed.h b/include/neural-graphics-primitives/testbed.h index e6db007b6ce0c39437b4ac51ff1820566c368ed3..9459285110f29eb678dac7cd0c9063db5c2bef51 100644 --- a/include/neural-graphics-primitives/testbed.h +++ b/include/neural-graphics-primitives/testbed.h @@ -26,6 +26,10 @@ #include <neural-graphics-primitives/thread_pool.h> #include <neural-graphics-primitives/trainable_buffer.cuh> +#ifdef NGP_GUI +# include <neural-graphics-primitives/openxr_hmd.h> +#endif + #include <tiny-cuda-nn/multi_stream.h> #include <tiny-cuda-nn/random.h> @@ -95,10 +99,11 @@ public: float near_distance, float plane_z, float aperture_size, - const float* envmap_data, - const Eigen::Vector2i& envmap_resolution, + const Foveation& foveation, + const Buffer2DView<const Eigen::Array4f>& envmap, Eigen::Array4f* frame_buffer, float* depth_buffer, + const Buffer2DView<const uint8_t>& hidden_area_mask, const TriangleOctree* octree, uint32_t n_octree_levels, cudaStream_t stream @@ -151,21 +156,20 @@ public: const Eigen::Vector4f& rolling_shutter, const Eigen::Vector2f& screen_center, const Eigen::Vector3f& parallax_shift, - const Eigen::Vector2i& quilting_dims, bool snap_to_pixel_centers, const BoundingBox& render_aabb, const Eigen::Matrix3f& render_aabb_to_local, float near_distance, float plane_z, float aperture_size, + const Foveation& foveation, const Lens& lens, - const float* envmap_data, - const Eigen::Vector2i& envmap_resolution, - const float* distortion_data, - const Eigen::Vector2i& distortion_resolution, + const Buffer2DView<const Eigen::Array4f>& envmap, + const Buffer2DView<const Eigen::Vector2f>& distortion, Eigen::Array4f* frame_buffer, float* depth_buffer, - uint8_t* grid, + const Buffer2DView<const uint8_t>& hidden_area_mask, + const uint8_t* grid, int show_accel, float cone_angle_constant, ERenderMode render_mode, @@ -177,8 +181,6 @@ public: const BoundingBox& render_aabb, const Eigen::Matrix3f& render_aabb_to_local, const BoundingBox& train_aabb, - const uint32_t n_training_images, - const TrainingXForm* training_xforms, const Eigen::Vector2f& focal_length, float cone_angle_constant, const uint8_t* grid, @@ -250,7 +252,11 @@ public: int count; }; - static constexpr float LOSS_SCALE = 128.f; + // Due to mixed-precision training, small loss values can lead to + // underflow (round to zero) in the gradient computations. Hence, + // scale the loss (and thereby gradients) up by this factor and + // divide it out in the optimizer later on. + static constexpr float LOSS_SCALE = 128.0f; struct NetworkDims { uint32_t n_input; @@ -265,30 +271,91 @@ public: NetworkDims network_dims() const; - void render_volume(CudaRenderBuffer& render_buffer, - const Eigen::Vector2f& focal_length, - const Eigen::Matrix<float, 3, 4>& camera_matrix, - const Eigen::Vector2f& screen_center, - cudaStream_t stream - ); void train_volume(size_t target_batch_size, bool get_loss_scalar, cudaStream_t stream); void training_prep_volume(uint32_t batch_size, cudaStream_t stream) {} void load_volume(const fs::path& data_path); + class CudaDevice; + + const float* get_inference_extra_dims(cudaStream_t stream) const; + void render_nerf( + cudaStream_t stream, + const CudaRenderBufferView& render_buffer, + NerfNetwork<precision_t>& nerf_network, + const uint8_t* density_grid_bitfield, + const Eigen::Vector2f& focal_length, + const Eigen::Matrix<float, 3, 4>& camera_matrix0, + const Eigen::Matrix<float, 3, 4>& camera_matrix1, + const Eigen::Vector4f& rolling_shutter, + const Eigen::Vector2f& screen_center, + const Foveation& foveation, + int visualized_dimension + ); void render_sdf( + cudaStream_t stream, const distance_fun_t& distance_function, const normals_fun_t& normals_function, - CudaRenderBuffer& render_buffer, - const Eigen::Vector2i& max_res, + const CudaRenderBufferView& render_buffer, const Eigen::Vector2f& focal_length, const Eigen::Matrix<float, 3, 4>& camera_matrix, const Eigen::Vector2f& screen_center, - cudaStream_t stream + const Foveation& foveation, + int visualized_dimension + ); + void render_image( + cudaStream_t stream, + const CudaRenderBufferView& render_buffer, + const Eigen::Vector2f& focal_length, + const Eigen::Matrix<float, 3, 4>& camera_matrix, + const Eigen::Vector2f& screen_center, + const Foveation& foveation, + int visualized_dimension + ); + void render_volume( + cudaStream_t stream, + const CudaRenderBufferView& render_buffer, + const Eigen::Vector2f& focal_length, + const Eigen::Matrix<float, 3, 4>& camera_matrix, + const Eigen::Vector2f& screen_center, + const Foveation& foveation + ); + + void render_frame( + cudaStream_t stream, + const Eigen::Matrix<float, 3, 4>& camera_matrix0, + const Eigen::Matrix<float, 3, 4>& camera_matrix1, + const Eigen::Matrix<float, 3, 4>& prev_camera_matrix, + const Eigen::Vector2f& screen_center, + const Eigen::Vector2f& relative_focal_length, + const Eigen::Vector4f& nerf_rolling_shutter, + const Foveation& foveation, + const Foveation& prev_foveation, + int visualized_dimension, + CudaRenderBuffer& render_buffer, + bool to_srgb = true, + CudaDevice* device = nullptr + ); + void render_frame_main( + CudaDevice& device, + const Eigen::Matrix<float, 3, 4>& camera_matrix0, + const Eigen::Matrix<float, 3, 4>& camera_matrix1, + const Eigen::Vector2f& screen_center, + const Eigen::Vector2f& relative_focal_length, + const Eigen::Vector4f& nerf_rolling_shutter, + const Foveation& foveation, + int visualized_dimension + ); + void render_frame_epilogue( + cudaStream_t stream, + const Eigen::Matrix<float, 3, 4>& camera_matrix0, + const Eigen::Matrix<float, 3, 4>& prev_camera_matrix, + const Eigen::Vector2f& screen_center, + const Eigen::Vector2f& relative_focal_length, + const Foveation& foveation, + const Foveation& prev_foveation, + CudaRenderBuffer& render_buffer, + bool to_srgb = true ); - const float* get_inference_extra_dims(cudaStream_t stream) const; - void render_nerf(CudaRenderBuffer& render_buffer, const Eigen::Vector2i& max_res, const Eigen::Vector2f& focal_length, const Eigen::Matrix<float, 3, 4>& camera_matrix0, const Eigen::Matrix<float, 3, 4>& camera_matrix1, const Eigen::Vector4f& rolling_shutter, const Eigen::Vector2f& screen_center, cudaStream_t stream); - void render_image(CudaRenderBuffer& render_buffer, cudaStream_t stream); - void render_frame(const Eigen::Matrix<float, 3, 4>& camera_matrix0, const Eigen::Matrix<float, 3, 4>& camera_matrix1, const Eigen::Vector4f& nerf_rolling_shutter, CudaRenderBuffer& render_buffer, bool to_srgb = true) ; void visualize_nerf_cameras(ImDrawList* list, const Eigen::Matrix<float, 4, 4>& world2proj); fs::path find_network_config(const fs::path& network_config_path); nlohmann::json load_network_config(const fs::path& network_config_path); @@ -310,9 +377,10 @@ public: void set_min_level(float minlevel); void set_visualized_dim(int dim); void set_visualized_layer(int layer); - void translate_camera(const Eigen::Vector3f& rel); - void mouse_drag(const Eigen::Vector2f& rel, int button); - void mouse_wheel(Eigen::Vector2f m, float delta); + void translate_camera(const Eigen::Vector3f& rel, const Eigen::Matrix3f& rot, bool allow_up_down = true); + Eigen::Matrix3f rotation_from_angles(const Eigen::Vector2f& angles) const; + void mouse_drag(); + void mouse_wheel(); void load_file(const fs::path& path); void set_nerf_camera_matrix(const Eigen::Matrix<float, 3, 4>& cam); Eigen::Vector3f look_at() const; @@ -334,6 +402,7 @@ public: void generate_training_samples_sdf(Eigen::Vector3f* positions, float* distances, uint32_t n_to_generate, cudaStream_t stream, bool uniform_only); void update_density_grid_nerf(float decay, uint32_t n_uniform_density_grid_samples, uint32_t n_nonuniform_density_grid_samples, cudaStream_t stream); void update_density_grid_mean_and_bitfield(cudaStream_t stream); + void mark_density_grid_in_sphere_empty(const Eigen::Vector3f& pos, float radius, cudaStream_t stream); struct NerfCounters { tcnn::GPUMemory<uint32_t> numsteps_counter; // number of steps each ray took @@ -364,8 +433,8 @@ public: void training_prep_sdf(uint32_t batch_size, cudaStream_t stream); void training_prep_image(uint32_t batch_size, cudaStream_t stream) {} void train(uint32_t batch_size); - Eigen::Vector2f calc_focal_length(const Eigen::Vector2i& resolution, int fov_axis, float zoom) const ; - Eigen::Vector2f render_screen_center() const ; + Eigen::Vector2f calc_focal_length(const Eigen::Vector2i& resolution, const Eigen::Vector2f& relative_focal_length, int fov_axis, float zoom) const; + Eigen::Vector2f render_screen_center(const Eigen::Vector2f& screen_center) const; void optimise_mesh_step(uint32_t N_STEPS); void compute_mesh_vertex_colors(); tcnn::GPUMemory<float> get_density_on_grid(Eigen::Vector3i res3d, const BoundingBox& aabb, const Eigen::Matrix3f& render_aabb_to_local); // network version (nerf or sdf) @@ -373,9 +442,8 @@ public: tcnn::GPUMemory<Eigen::Array4f> get_rgba_on_grid(Eigen::Vector3i res3d, Eigen::Vector3f ray_dir, bool voxel_centers, float depth, bool density_as_alpha = false); int marching_cubes(Eigen::Vector3i res3d, const BoundingBox& render_aabb, const Eigen::Matrix3f& render_aabb_to_local, float thresh); - // Determines the 3d focus point by rendering a little 16x16 depth image around - // the mouse cursor and picking the median depth. - void determine_autofocus_target_from_pixel(const Eigen::Vector2i& focus_pixel); + float get_depth_from_renderbuffer(const CudaRenderBuffer& render_buffer, const Eigen::Vector2f& uv); + Eigen::Vector3f get_3d_pos_from_pixel(const CudaRenderBuffer& render_buffer, const Eigen::Vector2i& focus_pixel); void autofocus(); size_t n_params(); size_t first_encoder_param(); @@ -396,7 +464,10 @@ public: void destroy_window(); void apply_camera_smoothing(float elapsed_ms); int find_best_training_view(int default_view); - bool begin_frame_and_handle_user_input(); + bool begin_frame(); + void handle_user_input(); + Eigen::Vector3f vr_to_world(const Eigen::Vector3f& pos) const; + void begin_vr_frame_and_handle_vr_input(); void gather_histograms(); void draw_gui(); bool frame(); @@ -479,18 +550,18 @@ public: bool m_dynamic_res = true; float m_dynamic_res_target_fps = 20.0f; int m_fixed_res_factor = 8; - float m_last_render_res_factor = 1.0f; float m_scale = 1.0; - float m_prev_scale = 1.0; float m_aperture_size = 0.0f; Eigen::Vector2f m_relative_focal_length = Eigen::Vector2f::Ones(); uint32_t m_fov_axis = 1; float m_zoom = 1.f; // 2d zoom factor (for insets?) Eigen::Vector2f m_screen_center = Eigen::Vector2f::Constant(0.5f); // center of 2d zoom + float m_ndc_znear = 1.0f / 32.0f; + float m_ndc_zfar = 128.0f; + Eigen::Matrix<float, 3, 4> m_camera = Eigen::Matrix<float, 3, 4>::Zero(); Eigen::Matrix<float, 3, 4> m_smoothed_camera = Eigen::Matrix<float, 3, 4>::Zero(); - Eigen::Matrix<float, 3, 4> m_prev_camera = Eigen::Matrix<float, 3, 4>::Zero(); size_t m_render_skip_due_to_lack_of_camera_movement_counter = 0; bool m_fps_camera = false; @@ -505,8 +576,6 @@ public: float m_bounding_radius = 1; float m_exposure = 0.f; - Eigen::Vector2i m_quilting_dims = Eigen::Vector2i::Ones(); - ERenderMode m_render_mode = ERenderMode::Shade; EMeshRenderMode m_mesh_render_mode = EMeshRenderMode::VertexNormals; @@ -520,19 +589,31 @@ public: void draw(GLuint texture); } m_second_window; + float m_drag_depth = 1.0f; + + // The VAO will be empty, but we need a valid one for attribute-less rendering + GLuint m_blit_vao = 0; + GLuint m_blit_program = 0; + + void init_opengl_shaders(); + void blit_texture(const Foveation& foveation, GLint rgba_texture, GLint rgba_filter_mode, GLint depth_texture, GLint framebuffer, const Eigen::Vector2i& offset, const Eigen::Vector2i& resolution); + void create_second_window(); + std::unique_ptr<OpenXRHMD> m_hmd; + OpenXRHMD::FrameInfoPtr m_vr_frame_info; + void init_vr(); + void set_n_views(size_t n_views); + std::function<bool()> m_keyboard_event_callback; std::shared_ptr<GLTexture> m_pip_render_texture; - std::vector<std::shared_ptr<GLTexture>> m_render_textures; + std::vector<std::shared_ptr<GLTexture>> m_rgba_render_textures; + std::vector<std::shared_ptr<GLTexture>> m_depth_render_textures; #endif - ThreadPool m_thread_pool; - std::vector<std::future<void>> m_render_futures; - std::vector<CudaRenderBuffer> m_render_surfaces; - std::unique_ptr<CudaRenderBuffer> m_pip_render_surface; + std::unique_ptr<CudaRenderBuffer> m_pip_render_buffer; SharedQueue<std::unique_ptr<ICallable>> m_task_queue; @@ -731,8 +812,6 @@ public: }; struct Image { - Eigen::Vector2f pos = Eigen::Vector2f::Constant(0.0f); - Eigen::Vector2f prev_pos = Eigen::Vector2f::Constant(0.0f); tcnn::GPUMemory<char> data; EDataType type = EDataType::Float; @@ -785,7 +864,7 @@ public: EColorSpace m_color_space = EColorSpace::Linear; ETonemapCurve m_tonemap_curve = ETonemapCurve::Identity; bool m_dlss = false; - bool m_dlss_supported = false; + std::shared_ptr<IDlssProvider> m_dlss_provider; float m_dlss_sharpening = 0.0f; // 3D stuff @@ -814,13 +893,35 @@ public: Eigen::Array4f m_background_color = {0.0f, 0.0f, 0.0f, 1.0f}; bool m_vsync = false; + bool m_render_transparency_as_checkerboard = false; // Visualization of neuron activations int m_visualized_dimension = -1; int m_visualized_layer = 0; + + struct View { + std::shared_ptr<CudaRenderBuffer> render_buffer; + Eigen::Vector2i full_resolution = {1, 1}; + int visualized_dimension = 0; + + Eigen::Matrix<float, 3, 4> camera0 = Eigen::Matrix<float, 3, 4>::Zero(); + Eigen::Matrix<float, 3, 4> camera1 = Eigen::Matrix<float, 3, 4>::Zero(); + Eigen::Matrix<float, 3, 4> prev_camera = Eigen::Matrix<float, 3, 4>::Zero(); + + Foveation foveation; + Foveation prev_foveation; + + Eigen::Vector2f relative_focal_length; + Eigen::Vector2f screen_center; + + CudaDevice* device = nullptr; + }; + + std::vector<View> m_views; Eigen::Vector2i m_n_views = {1, 1}; - Eigen::Vector2i m_view_size = {1, 1}; - bool m_single_view = true; // Whether a single neuron is visualized, or all in a tiled grid + + bool m_single_view = true; + float m_picture_in_picture_res = 0.f; // if non zero, requests a small second picture :) struct ImGuiVars { @@ -835,9 +936,10 @@ public: } m_imgui; bool m_visualize_unit_cube = false; - bool m_snap_to_pixel_centers = false; bool m_edit_render_aabb = false; + bool m_snap_to_pixel_centers = false; + Eigen::Vector3f m_parallax_shift = {0.0f, 0.0f, 0.0f}; // to shift the viewer's origin by some amount in camera space // CUDA stuff @@ -863,6 +965,172 @@ public: bool m_train_encoding = true; bool m_train_network = true; + class CudaDevice { + public: + struct Data { + tcnn::GPUMemory<uint8_t> density_grid_bitfield; + uint8_t* density_grid_bitfield_ptr; + + tcnn::GPUMemory<precision_t> params; + std::shared_ptr<Buffer2D<uint8_t>> hidden_area_mask; + }; + + CudaDevice(int id, bool is_primary) : m_id{id}, m_is_primary{is_primary} { + auto guard = device_guard(); + m_stream = std::make_unique<tcnn::StreamAndEvent>(); + m_data = std::make_unique<Data>(); + m_render_worker = std::make_unique<ThreadPool>(is_primary ? 0u : 1u); + } + + CudaDevice(const CudaDevice&) = delete; + CudaDevice& operator=(const CudaDevice&) = delete; + + CudaDevice(CudaDevice&&) = default; + CudaDevice& operator=(CudaDevice&&) = default; + + tcnn::ScopeGuard device_guard() { + int prev_device = tcnn::cuda_device(); + if (prev_device == m_id) { + return {}; + } + + tcnn::set_cuda_device(m_id); + return tcnn::ScopeGuard{[prev_device]() { + tcnn::set_cuda_device(prev_device); + }}; + } + + int id() const { + return m_id; + } + + bool is_primary() const { + return m_is_primary; + } + + std::string name() const { + return tcnn::cuda_device_name(m_id); + } + + int compute_capability() const { + return tcnn::cuda_compute_capability(m_id); + } + + cudaStream_t stream() const { + return m_stream->get(); + } + + void wait_for(cudaStream_t stream) const { + CUDA_CHECK_THROW(cudaEventRecord(m_primary_device_event.event, stream)); + m_stream->wait_for(m_primary_device_event.event); + } + + void signal(cudaStream_t stream) const { + m_stream->signal(stream); + } + + const CudaRenderBufferView& render_buffer_view() const { + return m_render_buffer_view; + } + + void set_render_buffer_view(const CudaRenderBufferView& view) { + m_render_buffer_view = view; + } + + Data& data() const { + return *m_data; + } + + bool dirty() const { + return m_dirty; + } + + void set_dirty(bool value) { + m_dirty = value; + } + + void set_network(const std::shared_ptr<tcnn::Network<float, precision_t>>& network) { + m_network = network; + } + + void set_nerf_network(const std::shared_ptr<NerfNetwork<precision_t>>& nerf_network); + + const std::shared_ptr<tcnn::Network<float, precision_t>>& network() const { + return m_network; + } + + const std::shared_ptr<NerfNetwork<precision_t>>& nerf_network() const { + return m_nerf_network; + } + + void clear() { + m_data = std::make_unique<Data>(); + m_render_buffer_view = {}; + m_network = {}; + m_nerf_network = {}; + set_dirty(true); + } + + template <class F> + auto enqueue_task(F&& f) -> std::future<std::result_of_t <F()>> { + if (is_primary()) { + return std::async(std::launch::deferred, std::forward<F>(f)); + } else { + return m_render_worker->enqueue_task(std::forward<F>(f)); + } + } + + private: + int m_id; + bool m_is_primary; + std::unique_ptr<tcnn::StreamAndEvent> m_stream; + struct Event { + Event() { + CUDA_CHECK_THROW(cudaEventCreate(&event)); + } + + ~Event() { + cudaEventDestroy(event); + } + + Event(const Event&) = delete; + Event& operator=(const Event&) = delete; + Event(Event&& other) { *this = std::move(other); } + Event& operator=(Event&& other) { + std::swap(event, other.event); + return *this; + } + + cudaEvent_t event = {}; + }; + Event m_primary_device_event; + std::unique_ptr<Data> m_data; + CudaRenderBufferView m_render_buffer_view = {}; + + std::shared_ptr<tcnn::Network<float, precision_t>> m_network; + std::shared_ptr<NerfNetwork<precision_t>> m_nerf_network; + + bool m_dirty = true; + + std::unique_ptr<ThreadPool> m_render_worker; + }; + + void sync_device(CudaRenderBuffer& render_buffer, CudaDevice& device); + tcnn::ScopeGuard use_device(cudaStream_t stream, CudaRenderBuffer& render_buffer, CudaDevice& device); + void set_all_devices_dirty(); + + std::vector<CudaDevice> m_devices; + CudaDevice& primary_device() { + return m_devices.front(); + } + + ThreadPool m_thread_pool; + std::vector<std::future<void>> m_render_futures; + + bool m_use_aux_devices = false; + bool m_foveated_rendering = false; + float m_foveated_rendering_max_scaling = 2.0f; + fs::path m_data_path; fs::path m_network_config_path = "base.json"; @@ -876,8 +1144,8 @@ public: uint32_t network_width(uint32_t layer) const; uint32_t network_num_forward_activations() const; - std::shared_ptr<tcnn::Loss<precision_t>> m_loss; // Network & training stuff + std::shared_ptr<tcnn::Loss<precision_t>> m_loss; std::shared_ptr<tcnn::Optimizer<precision_t>> m_optimizer; std::shared_ptr<tcnn::Encoding<precision_t>> m_encoding; std::shared_ptr<tcnn::Network<float, precision_t>> m_network; @@ -890,6 +1158,22 @@ public: Eigen::Vector2i resolution; ELossType loss_type; + + Buffer2DView<const Eigen::Array4f> inference_view() const { + if (!envmap) { + return {}; + } + + return {(const Eigen::Array4f*)envmap->inference_params(), resolution}; + } + + Buffer2DView<const Eigen::Array4f> view() const { + if (!envmap) { + return {}; + } + + return {(const Eigen::Array4f*)envmap->params(), resolution}; + } } m_envmap; struct TrainableDistortionMap { @@ -897,6 +1181,22 @@ public: std::shared_ptr<TrainableBuffer<2, 2, float>> map; std::shared_ptr<tcnn::Trainer<float, float, float>> trainer; Eigen::Vector2i resolution; + + Buffer2DView<const Eigen::Vector2f> inference_view() const { + if (!map) { + return {}; + } + + return {(const Eigen::Vector2f*)map->inference_params(), resolution}; + } + + Buffer2DView<const Eigen::Vector2f> view() const { + if (!map) { + return {}; + } + + return {(const Eigen::Vector2f*)map->params(), resolution}; + } } m_distortion; std::shared_ptr<NerfNetwork<precision_t>> m_nerf_network; }; diff --git a/scripts/colmap2nerf.py b/scripts/colmap2nerf.py index 68d6bd9ece6a74588e8ac87f9197d8c9da910079..30f5104a0f597dea4e645cf141ef8584d6e61136 100755 --- a/scripts/colmap2nerf.py +++ b/scripts/colmap2nerf.py @@ -414,8 +414,7 @@ if __name__ == "__main__": from detectron2 import model_zoo from detectron2.engine import DefaultPredictor - dir_path = Path(os.path.dirname(os.path.realpath(__file__))) - category2id = json.load(open(dir_path / "category2id.json", "r")) + category2id = json.load(open(SCRIPTS_FOLDER / "category2id.json", "r")) mask_ids = [category2id[c] for c in args.mask_categories] cfg = get_cfg() diff --git a/scripts/run.py b/scripts/run.py index 16152cce42ddf2f2b00edeac8fe0e4088512f33b..d7b2054383b6eda31656629ce5dba0f2ed23514b 100644 --- a/scripts/run.py +++ b/scripts/run.py @@ -64,6 +64,7 @@ def parse_args(): parser.add_argument("--train", action="store_true", help="If the GUI is enabled, controls whether training starts immediately.") parser.add_argument("--n_steps", type=int, default=-1, help="Number of steps to train for before quitting.") parser.add_argument("--second_window", action="store_true", help="Open a second window containing a copy of the main output.") + parser.add_argument("--vr", action="store_true", help="Render to a VR headset.") parser.add_argument("--sharpen", default=0, help="Set amount of sharpening applied to NeRF training images. Range 0.0 to 1.0.") @@ -78,6 +79,8 @@ def get_scene(scene): if __name__ == "__main__": args = parse_args() + if args.vr: # VR implies having the GUI running at the moment + args.gui = True if args.mode: print("Warning: the '--mode' argument is no longer in use. It has no effect. The mode is automatically chosen based on the scene.") @@ -106,7 +109,9 @@ if __name__ == "__main__": while sw * sh > 1920 * 1080 * 4: sw = int(sw / 2) sh = int(sh / 2) - testbed.init_window(sw, sh, second_window = args.second_window or False) + testbed.init_window(sw, sh, second_window=args.second_window) + if args.vr: + testbed.init_vr() if args.load_snapshot: @@ -159,10 +164,8 @@ if __name__ == "__main__": # setting here. testbed.nerf.cone_angle_constant = 0 - # Optionally match nerf paper behaviour and train on a - # fixed white bg. We prefer training on random BG colors. - # testbed.background_color = [1.0, 1.0, 1.0, 1.0] - # testbed.nerf.training.random_bg_color = False + # Match nerf paper behaviour and train on a fixed bg. + testbed.nerf.training.random_bg_color = False old_training_step = 0 n_steps = args.n_steps @@ -223,53 +226,24 @@ if __name__ == "__main__": testbed.nerf.render_min_transmittance = 1e-4 - testbed.fov_axis = 0 - testbed.fov = test_transforms["camera_angle_x"] * 180 / np.pi testbed.shall_train = False + testbed.load_training_data(args.test_transforms) - with tqdm(list(enumerate(test_transforms["frames"])), unit="images", desc=f"Rendering test frame") as t: - for i, frame in t: - p = frame["file_path"] - if "." not in p: - p = p + ".png" - ref_fname = os.path.join(data_dir, p) - if not os.path.isfile(ref_fname): - ref_fname = os.path.join(data_dir, p + ".png") - if not os.path.isfile(ref_fname): - ref_fname = os.path.join(data_dir, p + ".jpg") - if not os.path.isfile(ref_fname): - ref_fname = os.path.join(data_dir, p + ".jpeg") - if not os.path.isfile(ref_fname): - ref_fname = os.path.join(data_dir, p + ".exr") - - ref_image = read_image(ref_fname) - - # NeRF blends with background colors in sRGB space, rather than first - # transforming to linear space, blending there, and then converting back. - # (See e.g. the PNG spec for more information on how the `alpha` channel - # is always a linear quantity.) - # The following lines of code reproduce NeRF's behavior (if enabled in - # testbed) in order to make the numbers comparable. - if testbed.color_space == ngp.ColorSpace.SRGB and ref_image.shape[2] == 4: - # Since sRGB conversion is non-linear, alpha must be factored out of it - ref_image[...,:3] = np.divide(ref_image[...,:3], ref_image[...,3:4], out=np.zeros_like(ref_image[...,:3]), where=ref_image[...,3:4] != 0) - ref_image[...,:3] = linear_to_srgb(ref_image[...,:3]) - ref_image[...,:3] *= ref_image[...,3:4] - ref_image += (1.0 - ref_image[...,3:4]) * testbed.background_color - ref_image[...,:3] = srgb_to_linear(ref_image[...,:3]) + with tqdm(range(testbed.nerf.training.dataset.n_images), unit="images", desc=f"Rendering test frame") as t: + for i in t: + resolution = testbed.nerf.training.dataset.metadata[i].resolution + testbed.render_ground_truth = True + testbed.set_camera_to_training_view(i) + ref_image = testbed.render(resolution[0], resolution[1], 1, True) + testbed.render_ground_truth = False + image = testbed.render(resolution[0], resolution[1], spp, True) if i == 0: - write_image("ref.png", ref_image) - - testbed.set_nerf_camera_matrix(np.matrix(frame["transform_matrix"])[:-1,:]) - image = testbed.render(ref_image.shape[1], ref_image.shape[0], spp, True) + write_image(f"ref.png", ref_image) + write_image(f"out.png", image) - if i == 0: - write_image("out.png", image) - - diffimg = np.absolute(image - ref_image) - diffimg[...,3:4] = 1.0 - if i == 0: + diffimg = np.absolute(image - ref_image) + diffimg[...,3:4] = 1.0 write_image("diff.png", diffimg) A = np.clip(linear_to_srgb(image[...,:3]), 0.0, 1.0) diff --git a/src/camera_path.cu b/src/camera_path.cu index 853f495a92af1056b3cab7183fd0bb82d36ff2ff..6b8953ce339c6dbff353012d9ca975afba970250 100644 --- a/src/camera_path.cu +++ b/src/camera_path.cu @@ -318,13 +318,11 @@ void visualize_nerf_camera(ImDrawList* list, const Matrix<float, 4, 4>& world2pr add_debug_line(list, world2proj, d, a, col, thickness); } -bool CameraPath::imgui_viz(ImDrawList* list, Matrix<float, 4, 4> &view2proj, Matrix<float, 4, 4> &world2proj, Matrix<float, 4, 4> &world2view, Vector2f focal, float aspect) { +bool CameraPath::imgui_viz(ImDrawList* list, Matrix<float, 4, 4> &view2proj, Matrix<float, 4, 4> &world2proj, Matrix<float, 4, 4> &world2view, Vector2f focal, float aspect, float znear, float zfar) { bool changed = false; float flx = focal.x(); float fly = focal.y(); Matrix<float, 4, 4> view2proj_guizmo; - float zfar = 100.f; - float znear = 0.1f; view2proj_guizmo << fly * 2.0f / aspect, 0, 0, 0, 0, -fly * 2.0f, 0, 0, diff --git a/src/dlss.cu b/src/dlss.cu index d81df9390f895cd0f4f47039e6db9a849081d805..28ac47e05998f6f3dac3aae74416bf128fafc8b0 100644 --- a/src/dlss.cu +++ b/src/dlss.cu @@ -79,36 +79,18 @@ std::string ngx_error_string(NVSDK_NGX_Result result) { throw std::runtime_error(std::string(FILE_LINE " " #x " failed with error ") + ngx_error_string(result)); \ } while(0) -static VkInstance vk_instance = VK_NULL_HANDLE; -static VkDebugUtilsMessengerEXT vk_debug_messenger = VK_NULL_HANDLE; -static VkPhysicalDevice vk_physical_device = VK_NULL_HANDLE; -static VkDevice vk_device = VK_NULL_HANDLE; -static VkQueue vk_queue = VK_NULL_HANDLE; -static VkCommandPool vk_command_pool = VK_NULL_HANDLE; -static VkCommandBuffer vk_command_buffer = VK_NULL_HANDLE; - -static bool ngx_initialized = false; -static NVSDK_NGX_Parameter* ngx_parameters = nullptr; - -uint32_t vk_find_memory_type(uint32_t type_filter, VkMemoryPropertyFlags properties) { - VkPhysicalDeviceMemoryProperties mem_properties; - vkGetPhysicalDeviceMemoryProperties(vk_physical_device, &mem_properties); - - for (uint32_t i = 0; i < mem_properties.memoryTypeCount; i++) { - if (type_filter & (1 << i) && (mem_properties.memoryTypes[i].propertyFlags & properties) == properties) { - return i; - } - } - - throw std::runtime_error{"Failed to find suitable memory type."}; -} - static VKAPI_ATTR VkBool32 VKAPI_CALL vk_debug_callback( VkDebugUtilsMessageSeverityFlagBitsEXT message_severity, VkDebugUtilsMessageTypeFlagsEXT message_type, const VkDebugUtilsMessengerCallbackDataEXT* callback_data, void* user_data ) { + // Ignore json files that couldn't be found... third party tools sometimes install bogus layers + // that manifest as warnings like this. + if (std::string{callback_data->pMessage}.find("Failed to open JSON file") != std::string::npos) { + return VK_FALSE; + } + if (message_severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) { tlog::warning() << "Vulkan error: " << callback_data->pMessage; } else if (message_severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) { @@ -120,366 +102,449 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL vk_debug_callback( return VK_FALSE; } -void vulkan_and_ngx_init() { - static bool already_initialized = false; +class VulkanAndNgx : public IDlssProvider, public std::enable_shared_from_this<VulkanAndNgx> { +public: + VulkanAndNgx() { + ScopeGuard cleanup_guard{[&]() { clear(); }}; - if (already_initialized) { - return; - } + if (!glfwVulkanSupported()) { + throw std::runtime_error{"!glfwVulkanSupported()"}; + } - already_initialized = true; + // ------------------------------- + // Vulkan Instance + // ------------------------------- + VkApplicationInfo app_info{}; + app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + app_info.pApplicationName = "NGP"; + app_info.applicationVersion = VK_MAKE_VERSION(1, 0, 0); + app_info.pEngineName = "No engine"; + app_info.engineVersion = VK_MAKE_VERSION(1, 0, 0); + app_info.apiVersion = VK_API_VERSION_1_0; + + VkInstanceCreateInfo instance_create_info = {}; + instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + instance_create_info.pApplicationInfo = &app_info; + + uint32_t available_layer_count; + vkEnumerateInstanceLayerProperties(&available_layer_count, nullptr); + + std::vector<VkLayerProperties> available_layers(available_layer_count); + vkEnumerateInstanceLayerProperties(&available_layer_count, available_layers.data()); + + std::vector<const char*> layers; + auto try_add_layer = [&](const char* layer) { + for (const auto& props : available_layers) { + if (strcmp(layer, props.layerName) == 0) { + layers.emplace_back(layer); + return true; + } + } - if (!glfwVulkanSupported()) { - throw std::runtime_error{"!glfwVulkanSupported()"}; - } + return false; + }; - // ------------------------------- - // Vulkan Instance - // ------------------------------- - VkApplicationInfo app_info{}; - app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; - app_info.pApplicationName = "NGP"; - app_info.applicationVersion = VK_MAKE_VERSION(1, 0, 0); - app_info.pEngineName = "No engine"; - app_info.engineVersion = VK_MAKE_VERSION(1, 0, 0); - app_info.apiVersion = VK_API_VERSION_1_0; - - VkInstanceCreateInfo instance_create_info = {}; - instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; - instance_create_info.pApplicationInfo = &app_info; - - uint32_t available_layer_count; - vkEnumerateInstanceLayerProperties(&available_layer_count, nullptr); - - std::vector<VkLayerProperties> available_layers(available_layer_count); - vkEnumerateInstanceLayerProperties(&available_layer_count, available_layers.data()); - - std::vector<const char*> layers; - auto try_add_layer = [&](const char* layer) { - for (const auto& props : available_layers) { - if (strcmp(layer, props.layerName) == 0) { - layers.emplace_back(layer); - return true; - } + bool validation_layer_enabled = try_add_layer("VK_LAYER_KHRONOS_validation"); + if (!validation_layer_enabled) { + tlog::warning() << "Vulkan validation layer is not available. Vulkan errors will be difficult to diagnose."; } - return false; - }; + instance_create_info.enabledLayerCount = static_cast<uint32_t>(layers.size()); + instance_create_info.ppEnabledLayerNames = layers.empty() ? nullptr : layers.data(); - bool validation_layer_enabled = try_add_layer("VK_LAYER_KHRONOS_validation"); - if (!validation_layer_enabled) { - tlog::warning() << "Vulkan validation layer is not available. Vulkan errors will be difficult to diagnose."; - } + std::vector<const char*> instance_extensions; + std::vector<const char*> device_extensions; - instance_create_info.enabledLayerCount = static_cast<uint32_t>(layers.size()); - instance_create_info.ppEnabledLayerNames = layers.empty() ? nullptr : layers.data(); + uint32_t n_ngx_instance_extensions = 0; + const char** ngx_instance_extensions; - std::vector<const char*> instance_extensions; - std::vector<const char*> device_extensions; + uint32_t n_ngx_device_extensions = 0; + const char** ngx_device_extensions; - uint32_t n_ngx_instance_extensions = 0; - const char** ngx_instance_extensions; + NVSDK_NGX_VULKAN_RequiredExtensions(&n_ngx_instance_extensions, &ngx_instance_extensions, &n_ngx_device_extensions, &ngx_device_extensions); - uint32_t n_ngx_device_extensions = 0; - const char** ngx_device_extensions; + for (uint32_t i = 0; i < n_ngx_instance_extensions; ++i) { + instance_extensions.emplace_back(ngx_instance_extensions[i]); + } - NVSDK_NGX_VULKAN_RequiredExtensions(&n_ngx_instance_extensions, &ngx_instance_extensions, &n_ngx_device_extensions, &ngx_device_extensions); + instance_extensions.emplace_back(VK_KHR_DEVICE_GROUP_CREATION_EXTENSION_NAME); + instance_extensions.emplace_back(VK_KHR_EXTERNAL_FENCE_CAPABILITIES_EXTENSION_NAME); + instance_extensions.emplace_back(VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME); + instance_extensions.emplace_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); - for (uint32_t i = 0; i < n_ngx_instance_extensions; ++i) { - instance_extensions.emplace_back(ngx_instance_extensions[i]); - } + if (validation_layer_enabled) { + instance_extensions.emplace_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + } - instance_extensions.emplace_back(VK_KHR_DEVICE_GROUP_CREATION_EXTENSION_NAME); - instance_extensions.emplace_back(VK_KHR_EXTERNAL_FENCE_CAPABILITIES_EXTENSION_NAME); - instance_extensions.emplace_back(VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME); - instance_extensions.emplace_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); + for (uint32_t i = 0; i < n_ngx_device_extensions; ++i) { + device_extensions.emplace_back(ngx_device_extensions[i]); + } - if (validation_layer_enabled) { - instance_extensions.emplace_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); - } + device_extensions.emplace_back(VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME); + #ifdef _WIN32 + device_extensions.emplace_back(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME); + #else + device_extensions.emplace_back(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME); + #endif + device_extensions.emplace_back(VK_KHR_DEVICE_GROUP_EXTENSION_NAME); + + instance_create_info.enabledExtensionCount = (uint32_t)instance_extensions.size(); + instance_create_info.ppEnabledExtensionNames = instance_extensions.data(); + + VkDebugUtilsMessengerCreateInfoEXT debug_messenger_create_info = {}; + debug_messenger_create_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; + debug_messenger_create_info.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; + debug_messenger_create_info.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; + debug_messenger_create_info.pfnUserCallback = vk_debug_callback; + debug_messenger_create_info.pUserData = nullptr; + + if (validation_layer_enabled) { + instance_create_info.pNext = &debug_messenger_create_info; + } - for (uint32_t i = 0; i < n_ngx_device_extensions; ++i) { - device_extensions.emplace_back(ngx_device_extensions[i]); - } + VK_CHECK_THROW(vkCreateInstance(&instance_create_info, nullptr, &m_vk_instance)); - device_extensions.emplace_back(VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME); -#ifdef _WIN32 - device_extensions.emplace_back(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME); -#else - device_extensions.emplace_back(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME); -#endif - device_extensions.emplace_back(VK_KHR_DEVICE_GROUP_EXTENSION_NAME); + if (validation_layer_enabled) { + auto CreateDebugUtilsMessengerEXT = [](VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDebugUtilsMessengerEXT* pDebugMessenger) { + auto func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(instance, "vkCreateDebugUtilsMessengerEXT"); + if (func != nullptr) { + return func(instance, pCreateInfo, pAllocator, pDebugMessenger); + } else { + return VK_ERROR_EXTENSION_NOT_PRESENT; + } + }; - instance_create_info.enabledExtensionCount = (uint32_t)instance_extensions.size(); - instance_create_info.ppEnabledExtensionNames = instance_extensions.data(); + if (CreateDebugUtilsMessengerEXT(m_vk_instance, &debug_messenger_create_info, nullptr, &m_vk_debug_messenger) != VK_SUCCESS) { + tlog::warning() << "Vulkan: could not initialize debug messenger."; + } + } - VkDebugUtilsMessengerCreateInfoEXT debug_messenger_create_info = {}; - debug_messenger_create_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; - debug_messenger_create_info.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; - debug_messenger_create_info.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; - debug_messenger_create_info.pfnUserCallback = vk_debug_callback; - debug_messenger_create_info.pUserData = nullptr; + // ------------------------------- + // Vulkan Physical Device + // ------------------------------- + uint32_t n_devices = 0; + vkEnumeratePhysicalDevices(m_vk_instance, &n_devices, nullptr); - if (validation_layer_enabled) { - instance_create_info.pNext = &debug_messenger_create_info; - } + if (n_devices == 0) { + throw std::runtime_error{"Failed to find GPUs with Vulkan support."}; + } - VK_CHECK_THROW(vkCreateInstance(&instance_create_info, nullptr, &vk_instance)); + std::vector<VkPhysicalDevice> devices(n_devices); + vkEnumeratePhysicalDevices(m_vk_instance, &n_devices, devices.data()); - if (validation_layer_enabled) { - auto CreateDebugUtilsMessengerEXT = [](VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDebugUtilsMessengerEXT* pDebugMessenger) { - auto func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(instance, "vkCreateDebugUtilsMessengerEXT"); - if (func != nullptr) { - return func(instance, pCreateInfo, pAllocator, pDebugMessenger); - } else { - return VK_ERROR_EXTENSION_NOT_PRESENT; - } + struct QueueFamilyIndices { + int graphics_family = -1; + int compute_family = -1; + int transfer_family = -1; + int all_family = -1; }; - if (CreateDebugUtilsMessengerEXT(vk_instance, &debug_messenger_create_info, nullptr, &vk_debug_messenger) != VK_SUCCESS) { - tlog::warning() << "Vulkan: could not initialize debug messenger."; - } - } - - // ------------------------------- - // Vulkan Physical Device - // ------------------------------- - uint32_t n_devices = 0; - vkEnumeratePhysicalDevices(vk_instance, &n_devices, nullptr); + auto find_queue_families = [](VkPhysicalDevice device) { + QueueFamilyIndices indices; - if (n_devices == 0) { - throw std::runtime_error{"Failed to find GPUs with Vulkan support."}; - } + uint32_t queue_family_count = 0; + vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count, nullptr); - std::vector<VkPhysicalDevice> devices(n_devices); - vkEnumeratePhysicalDevices(vk_instance, &n_devices, devices.data()); + std::vector<VkQueueFamilyProperties> queue_families(queue_family_count); + vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count, queue_families.data()); - struct QueueFamilyIndices { - int graphics_family = -1; - int compute_family = -1; - int transfer_family = -1; - int all_family = -1; - }; + int i = 0; + for (const auto& queue_family : queue_families) { + if (queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT) { + indices.graphics_family = i; + } - auto find_queue_families = [](VkPhysicalDevice device) { - QueueFamilyIndices indices; + if (queue_family.queueFlags & VK_QUEUE_COMPUTE_BIT) { + indices.compute_family = i; + } - uint32_t queue_family_count = 0; - vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count, nullptr); + if (queue_family.queueFlags & VK_QUEUE_TRANSFER_BIT) { + indices.transfer_family = i; + } - std::vector<VkQueueFamilyProperties> queue_families(queue_family_count); - vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count, queue_families.data()); + if ((queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT) && (queue_family.queueFlags & VK_QUEUE_COMPUTE_BIT) && (queue_family.queueFlags & VK_QUEUE_TRANSFER_BIT)) { + indices.all_family = i; + } - int i = 0; - for (const auto& queue_family : queue_families) { - if (queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT) { - indices.graphics_family = i; + i++; } - if (queue_family.queueFlags & VK_QUEUE_COMPUTE_BIT) { - indices.compute_family = i; - } + return indices; + }; - if (queue_family.queueFlags & VK_QUEUE_TRANSFER_BIT) { - indices.transfer_family = i; - } + cudaDeviceProp cuda_device_prop; + CUDA_CHECK_THROW(cudaGetDeviceProperties(&cuda_device_prop, tcnn::cuda_device())); - if ((queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT) && (queue_family.queueFlags & VK_QUEUE_COMPUTE_BIT) && (queue_family.queueFlags & VK_QUEUE_TRANSFER_BIT)) { - indices.all_family = i; - } + auto is_same_as_cuda_device = [&](VkPhysicalDevice device) { + VkPhysicalDeviceIDProperties physical_device_id_properties = {}; + physical_device_id_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES; + physical_device_id_properties.pNext = NULL; - i++; - } + VkPhysicalDeviceProperties2 physical_device_properties = {}; + physical_device_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + physical_device_properties.pNext = &physical_device_id_properties; - return indices; - }; + vkGetPhysicalDeviceProperties2(device, &physical_device_properties); - cudaDeviceProp cuda_device_prop; - CUDA_CHECK_THROW(cudaGetDeviceProperties(&cuda_device_prop, tcnn::cuda_device())); + return !memcmp(&cuda_device_prop.uuid, physical_device_id_properties.deviceUUID, VK_UUID_SIZE) && find_queue_families(device).all_family >= 0; + }; - auto is_same_as_cuda_device = [&](VkPhysicalDevice device) { - VkPhysicalDeviceIDProperties physical_device_id_properties = {}; - physical_device_id_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES; - physical_device_id_properties.pNext = NULL; + uint32_t device_id = 0; + for (uint32_t i = 0; i < n_devices; ++i) { + if (is_same_as_cuda_device(devices[i])) { + m_vk_physical_device = devices[i]; + device_id = i; + break; + } + } - VkPhysicalDeviceProperties2 physical_device_properties = {}; - physical_device_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; - physical_device_properties.pNext = &physical_device_id_properties; + if (m_vk_physical_device == VK_NULL_HANDLE) { + throw std::runtime_error{"Failed to find Vulkan device corresponding to CUDA device."}; + } - vkGetPhysicalDeviceProperties2(device, &physical_device_properties); + // ------------------------------- + // Vulkan Logical Device + // ------------------------------- + VkPhysicalDeviceProperties physical_device_properties; + vkGetPhysicalDeviceProperties(m_vk_physical_device, &physical_device_properties); + + QueueFamilyIndices indices = find_queue_families(m_vk_physical_device); + + VkDeviceQueueCreateInfo queue_create_info{}; + queue_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + queue_create_info.queueFamilyIndex = indices.all_family; + queue_create_info.queueCount = 1; + + float queue_priority = 1.0f; + queue_create_info.pQueuePriorities = &queue_priority; + + VkPhysicalDeviceFeatures device_features = {}; + device_features.shaderStorageImageWriteWithoutFormat = true; + + VkDeviceCreateInfo device_create_info = {}; + device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + device_create_info.pQueueCreateInfos = &queue_create_info; + device_create_info.queueCreateInfoCount = 1; + device_create_info.pEnabledFeatures = &device_features; + device_create_info.enabledExtensionCount = (uint32_t)device_extensions.size(); + device_create_info.ppEnabledExtensionNames = device_extensions.data(); + device_create_info.enabledLayerCount = static_cast<uint32_t>(layers.size()); + device_create_info.ppEnabledLayerNames = layers.data(); + + #ifdef VK_EXT_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME + VkPhysicalDeviceBufferDeviceAddressFeaturesEXT buffer_device_address_feature = {}; + buffer_device_address_feature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT; + buffer_device_address_feature.bufferDeviceAddress = VK_TRUE; + device_create_info.pNext = &buffer_device_address_feature; + #else + throw std::runtime_error{"Buffer device address extension not available."}; + #endif + + VK_CHECK_THROW(vkCreateDevice(m_vk_physical_device, &device_create_info, nullptr, &m_vk_device)); + + // ----------------------------------------------- + // Vulkan queue / command pool / command buffer + // ----------------------------------------------- + vkGetDeviceQueue(m_vk_device, indices.all_family, 0, &m_vk_queue); + + VkCommandPoolCreateInfo command_pool_info = {}; + command_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + command_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + command_pool_info.queueFamilyIndex = indices.all_family; + + VK_CHECK_THROW(vkCreateCommandPool(m_vk_device, &command_pool_info, nullptr, &m_vk_command_pool)); + + VkCommandBufferAllocateInfo command_buffer_alloc_info = {}; + command_buffer_alloc_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + command_buffer_alloc_info.commandPool = m_vk_command_pool; + command_buffer_alloc_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + command_buffer_alloc_info.commandBufferCount = 1; + + VK_CHECK_THROW(vkAllocateCommandBuffers(m_vk_device, &command_buffer_alloc_info, &m_vk_command_buffer)); + + // ------------------------------- + // NGX init + // ------------------------------- + std::wstring path; +#ifdef _WIN32 + path = fs::path::getcwd().wstr(); +#else + std::string tmp = fs::path::getcwd().str(); + path = utf8_to_utf16(fs::path::getcwd().str()); +#endif - return !memcmp(&cuda_device_prop.uuid, physical_device_id_properties.deviceUUID, VK_UUID_SIZE) && find_queue_families(device).all_family >= 0; - }; + NGX_CHECK_THROW(NVSDK_NGX_VULKAN_Init_with_ProjectID("ea75345e-5a42-4037-a5c9-59bf94dee157", NVSDK_NGX_ENGINE_TYPE_CUSTOM, "1.0.0", path.c_str(), m_vk_instance, m_vk_physical_device, m_vk_device)); + m_ngx_initialized = true; + + // ------------------------------- + // Ensure DLSS capability + // ------------------------------- + NGX_CHECK_THROW(NVSDK_NGX_VULKAN_GetCapabilityParameters(&m_ngx_parameters)); + + int needs_updated_driver = 0; + unsigned int min_driver_version_major = 0; + unsigned int min_driver_version_minor = 0; + NVSDK_NGX_Result result_updated_driver = m_ngx_parameters->Get(NVSDK_NGX_Parameter_SuperSampling_NeedsUpdatedDriver, &needs_updated_driver); + NVSDK_NGX_Result result_min_driver_version_major = m_ngx_parameters->Get(NVSDK_NGX_Parameter_SuperSampling_MinDriverVersionMajor, &min_driver_version_major); + NVSDK_NGX_Result result_min_driver_version_minor = m_ngx_parameters->Get(NVSDK_NGX_Parameter_SuperSampling_MinDriverVersionMinor, &min_driver_version_minor); + if (result_updated_driver == NVSDK_NGX_Result_Success && result_min_driver_version_major == NVSDK_NGX_Result_Success && result_min_driver_version_minor == NVSDK_NGX_Result_Success) { + if (needs_updated_driver) { + throw std::runtime_error{fmt::format("Driver too old. Minimum version required is {}.{}", min_driver_version_major, min_driver_version_minor)}; + } + } - uint32_t device_id = 0; - for (uint32_t i = 0; i < n_devices; ++i) { - if (is_same_as_cuda_device(devices[i])) { - vk_physical_device = devices[i]; - device_id = i; - break; + int dlss_available = 0; + NVSDK_NGX_Result ngx_result = m_ngx_parameters->Get(NVSDK_NGX_Parameter_SuperSampling_Available, &dlss_available); + if (ngx_result != NVSDK_NGX_Result_Success || !dlss_available) { + ngx_result = NVSDK_NGX_Result_Fail; + NVSDK_NGX_Parameter_GetI(m_ngx_parameters, NVSDK_NGX_Parameter_SuperSampling_FeatureInitResult, (int*)&ngx_result); + throw std::runtime_error{fmt::format("DLSS not available: {}", ngx_error_string(ngx_result))}; } + + cleanup_guard.disarm(); + + tlog::success() << "Initialized Vulkan and NGX on GPU #" << device_id << ": " << physical_device_properties.deviceName; } - if (vk_physical_device == VK_NULL_HANDLE) { - throw std::runtime_error{"Failed to find Vulkan device corresponding to CUDA device."}; + virtual ~VulkanAndNgx() { + clear(); } - // ------------------------------- - // Vulkan Logical Device - // ------------------------------- - VkPhysicalDeviceProperties physical_device_properties; - vkGetPhysicalDeviceProperties(vk_physical_device, &physical_device_properties); - - QueueFamilyIndices indices = find_queue_families(vk_physical_device); - - VkDeviceQueueCreateInfo queue_create_info{}; - queue_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; - queue_create_info.queueFamilyIndex = indices.all_family; - queue_create_info.queueCount = 1; - - float queue_priority = 1.0f; - queue_create_info.pQueuePriorities = &queue_priority; - - VkPhysicalDeviceFeatures device_features = {}; - device_features.shaderStorageImageWriteWithoutFormat = true; - - VkDeviceCreateInfo device_create_info = {}; - device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; - device_create_info.pQueueCreateInfos = &queue_create_info; - device_create_info.queueCreateInfoCount = 1; - device_create_info.pEnabledFeatures = &device_features; - device_create_info.enabledExtensionCount = (uint32_t)device_extensions.size(); - device_create_info.ppEnabledExtensionNames = device_extensions.data(); - device_create_info.enabledLayerCount = static_cast<uint32_t>(layers.size()); - device_create_info.ppEnabledLayerNames = layers.data(); - -#ifdef VK_EXT_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME - VkPhysicalDeviceBufferDeviceAddressFeaturesEXT buffer_device_address_feature = {}; - buffer_device_address_feature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT; - buffer_device_address_feature.bufferDeviceAddress = VK_TRUE; - device_create_info.pNext = &buffer_device_address_feature; -#else - throw std::runtime_error{"Buffer device address extension not available."}; -#endif + void clear() { + if (m_ngx_parameters) { + NVSDK_NGX_VULKAN_DestroyParameters(m_ngx_parameters); + m_ngx_parameters = nullptr; + } - VK_CHECK_THROW(vkCreateDevice(vk_physical_device, &device_create_info, nullptr, &vk_device)); + if (m_ngx_initialized) { + NVSDK_NGX_VULKAN_Shutdown(); + m_ngx_initialized = false; + } - // ----------------------------------------------- - // Vulkan queue / command pool / command buffer - // ----------------------------------------------- - vkGetDeviceQueue(vk_device, indices.all_family, 0, &vk_queue); + if (m_vk_command_pool) { + vkDestroyCommandPool(m_vk_device, m_vk_command_pool, nullptr); + m_vk_command_pool = VK_NULL_HANDLE; + } - VkCommandPoolCreateInfo command_pool_info = {}; - command_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - command_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; - command_pool_info.queueFamilyIndex = indices.all_family; + if (m_vk_device) { + vkDestroyDevice(m_vk_device, nullptr); + m_vk_device = VK_NULL_HANDLE; + } - VK_CHECK_THROW(vkCreateCommandPool(vk_device, &command_pool_info, nullptr, &vk_command_pool)); + if (m_vk_debug_messenger) { + auto DestroyDebugUtilsMessengerEXT = [](VkInstance instance, VkDebugUtilsMessengerEXT debugMessenger, const VkAllocationCallbacks* pAllocator) { + auto func = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr(instance, "vkDestroyDebugUtilsMessengerEXT"); + if (func != nullptr) { + func(instance, debugMessenger, pAllocator); + } + }; - VkCommandBufferAllocateInfo command_buffer_alloc_info = {}; - command_buffer_alloc_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - command_buffer_alloc_info.commandPool = vk_command_pool; - command_buffer_alloc_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - command_buffer_alloc_info.commandBufferCount = 1; + DestroyDebugUtilsMessengerEXT(m_vk_instance, m_vk_debug_messenger, nullptr); + m_vk_debug_messenger = VK_NULL_HANDLE; + } - VK_CHECK_THROW(vkAllocateCommandBuffers(vk_device, &command_buffer_alloc_info, &vk_command_buffer)); + if (m_vk_instance) { + vkDestroyInstance(m_vk_instance, nullptr); + m_vk_instance = VK_NULL_HANDLE; + } + } - // ------------------------------- - // NGX init - // ------------------------------- - std::wstring path; -#ifdef _WIN32 - path = fs::path::getcwd().wstr(); -#else - std::string tmp = fs::path::getcwd().str(); - std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter; - path = converter.from_bytes(tmp); -#endif + uint32_t vk_find_memory_type(uint32_t type_filter, VkMemoryPropertyFlags properties) { + VkPhysicalDeviceMemoryProperties mem_properties; + vkGetPhysicalDeviceMemoryProperties(m_vk_physical_device, &mem_properties); - NGX_CHECK_THROW(NVSDK_NGX_VULKAN_Init_with_ProjectID("ea75345e-5a42-4037-a5c9-59bf94dee157", NVSDK_NGX_ENGINE_TYPE_CUSTOM, "1.0.0", path.c_str(), vk_instance, vk_physical_device, vk_device)); - ngx_initialized = true; - - // ------------------------------- - // Ensure DLSS capability - // ------------------------------- - NGX_CHECK_THROW(NVSDK_NGX_VULKAN_GetCapabilityParameters(&ngx_parameters)); - - int needs_updated_driver = 0; - unsigned int min_driver_version_major = 0; - unsigned int min_driver_version_minor = 0; - NVSDK_NGX_Result result_updated_driver = ngx_parameters->Get(NVSDK_NGX_Parameter_SuperSampling_NeedsUpdatedDriver, &needs_updated_driver); - NVSDK_NGX_Result result_min_driver_version_major = ngx_parameters->Get(NVSDK_NGX_Parameter_SuperSampling_MinDriverVersionMajor, &min_driver_version_major); - NVSDK_NGX_Result result_min_driver_version_minor = ngx_parameters->Get(NVSDK_NGX_Parameter_SuperSampling_MinDriverVersionMinor, &min_driver_version_minor); - if (result_updated_driver == NVSDK_NGX_Result_Success && result_min_driver_version_major == NVSDK_NGX_Result_Success && result_min_driver_version_minor == NVSDK_NGX_Result_Success) { - if (needs_updated_driver) { - throw std::runtime_error{fmt::format("Driver too old. Minimum version required is {}.{}", min_driver_version_major, min_driver_version_minor)}; + for (uint32_t i = 0; i < mem_properties.memoryTypeCount; i++) { + if (type_filter & (1 << i) && (mem_properties.memoryTypes[i].propertyFlags & properties) == properties) { + return i; + } } + + throw std::runtime_error{"Failed to find suitable memory type."}; } - int dlss_available = 0; - NVSDK_NGX_Result ngx_result = ngx_parameters->Get(NVSDK_NGX_Parameter_SuperSampling_Available, &dlss_available); - if (ngx_result != NVSDK_NGX_Result_Success || !dlss_available) { - ngx_result = NVSDK_NGX_Result_Fail; - NVSDK_NGX_Parameter_GetI(ngx_parameters, NVSDK_NGX_Parameter_SuperSampling_FeatureInitResult, (int*)&ngx_result); - throw std::runtime_error{fmt::format("DLSS not available: {}", ngx_error_string(ngx_result))}; + void vk_command_buffer_begin() { + VkCommandBufferBeginInfo begin_info = {}; + begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + begin_info.pInheritanceInfo = nullptr; + + VK_CHECK_THROW(vkBeginCommandBuffer(m_vk_command_buffer, &begin_info)); } - tlog::success() << "Initialized Vulkan and NGX on GPU #" << device_id << ": " << physical_device_properties.deviceName; -} + void vk_command_buffer_end() { + VK_CHECK_THROW(vkEndCommandBuffer(m_vk_command_buffer)); + } -size_t dlss_allocated_bytes() { - unsigned long long allocated_bytes = 0; - if (!ngx_parameters) { - return 0; + void vk_command_buffer_submit() { + VkSubmitInfo submit_info = { VK_STRUCTURE_TYPE_SUBMIT_INFO }; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &m_vk_command_buffer; + + VK_CHECK_THROW(vkQueueSubmit(m_vk_queue, 1, &submit_info, VK_NULL_HANDLE)); } - try { - NGX_CHECK_THROW(NGX_DLSS_GET_STATS(ngx_parameters, &allocated_bytes)); - } catch (...) { - return 0; + void vk_synchronize() { + VK_CHECK_THROW(vkDeviceWaitIdle(m_vk_device)); } - return allocated_bytes; -} + void vk_command_buffer_submit_sync() { + vk_command_buffer_submit(); + vk_synchronize(); + } -void vk_command_buffer_begin() { - VkCommandBufferBeginInfo begin_info = {}; - begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - begin_info.pInheritanceInfo = nullptr; + void vk_command_buffer_end_and_submit_sync() { + vk_command_buffer_end(); + vk_command_buffer_submit_sync(); + } - VK_CHECK_THROW(vkBeginCommandBuffer(vk_command_buffer, &begin_info)); -} + const VkCommandBuffer& vk_command_buffer() const { + return m_vk_command_buffer; + } -void vk_command_buffer_end() { - VK_CHECK_THROW(vkEndCommandBuffer(vk_command_buffer)); -} + const VkDevice& vk_device() const { + return m_vk_device; + } -void vk_command_buffer_submit() { - VkSubmitInfo submit_info = { VK_STRUCTURE_TYPE_SUBMIT_INFO }; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &vk_command_buffer; + NVSDK_NGX_Parameter* ngx_parameters() const { + return m_ngx_parameters; + } - VK_CHECK_THROW(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE)); -} + size_t allocated_bytes() const override { + unsigned long long allocated_bytes = 0; + if (!m_ngx_parameters) { + return 0; + } -void vk_synchronize() { - VK_CHECK_THROW(vkDeviceWaitIdle(vk_device)); -} + try { + NGX_CHECK_THROW(NGX_DLSS_GET_STATS(m_ngx_parameters, &allocated_bytes)); + } catch (...) { + return 0; + } -void vk_command_buffer_submit_sync() { - vk_command_buffer_submit(); - vk_synchronize(); -} + return allocated_bytes; + } + + std::unique_ptr<IDlss> init_dlss(const Eigen::Vector2i& out_resolution) override; + +private: + VkInstance m_vk_instance = VK_NULL_HANDLE; + VkDebugUtilsMessengerEXT m_vk_debug_messenger = VK_NULL_HANDLE; + VkPhysicalDevice m_vk_physical_device = VK_NULL_HANDLE; + VkDevice m_vk_device = VK_NULL_HANDLE; + VkQueue m_vk_queue = VK_NULL_HANDLE; + VkCommandPool m_vk_command_pool = VK_NULL_HANDLE; + VkCommandBuffer m_vk_command_buffer = VK_NULL_HANDLE; + NVSDK_NGX_Parameter* m_ngx_parameters = nullptr; + bool m_ngx_initialized = false; +}; -void vk_command_buffer_end_and_submit_sync() { - vk_command_buffer_end(); - vk_command_buffer_submit_sync(); +std::shared_ptr<IDlssProvider> init_vulkan_and_ngx() { + return std::make_shared<VulkanAndNgx>(); } class VulkanTexture { public: - VulkanTexture(const Vector2i& size, uint32_t n_channels) : m_size{size}, m_n_channels{n_channels} { + VulkanTexture(std::shared_ptr<VulkanAndNgx> vk, const Vector2i& size, uint32_t n_channels) : m_vk{vk}, m_size{size}, m_n_channels{n_channels} { VkImageCreateInfo image_info{}; image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; image_info.imageType = VK_IMAGE_TYPE_2D; @@ -515,17 +580,17 @@ public: image_info.pNext = &ext_image_info; - VK_CHECK_THROW(vkCreateImage(vk_device, &image_info, nullptr, &m_vk_image)); + VK_CHECK_THROW(vkCreateImage(m_vk->vk_device(), &image_info, nullptr, &m_vk_image)); // Create device memory to back up the image VkMemoryRequirements mem_requirements = {}; - vkGetImageMemoryRequirements(vk_device, m_vk_image, &mem_requirements); + vkGetImageMemoryRequirements(m_vk->vk_device(), m_vk_image, &mem_requirements); VkMemoryAllocateInfo mem_alloc_info = {}; mem_alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; mem_alloc_info.allocationSize = mem_requirements.size; - mem_alloc_info.memoryTypeIndex = vk_find_memory_type(mem_requirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + mem_alloc_info.memoryTypeIndex = m_vk->vk_find_memory_type(mem_requirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); VkExportMemoryAllocateInfoKHR export_info = {}; export_info.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR; @@ -533,10 +598,10 @@ public: mem_alloc_info.pNext = &export_info; - VK_CHECK_THROW(vkAllocateMemory(vk_device, &mem_alloc_info, nullptr, &m_vk_device_memory)); - VK_CHECK_THROW(vkBindImageMemory(vk_device, m_vk_image, m_vk_device_memory, 0)); + VK_CHECK_THROW(vkAllocateMemory(m_vk->vk_device(), &mem_alloc_info, nullptr, &m_vk_device_memory)); + VK_CHECK_THROW(vkBindImageMemory(m_vk->vk_device(), m_vk_image, m_vk_device_memory, 0)); - vk_command_buffer_begin(); + m_vk->vk_command_buffer_begin(); VkImageMemoryBarrier barrier = {}; barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; @@ -554,7 +619,7 @@ public: barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; vkCmdPipelineBarrier( - vk_command_buffer, + m_vk->vk_command_buffer(), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, @@ -562,7 +627,7 @@ public: 1, &barrier ); - vk_command_buffer_end_and_submit_sync(); + m_vk->vk_command_buffer_end_and_submit_sync(); // Image view VkImageViewCreateInfo view_info = {}; @@ -572,7 +637,7 @@ public: view_info.format = image_info.format; view_info.subresourceRange = barrier.subresourceRange; - VK_CHECK_THROW(vkCreateImageView(vk_device, &view_info, nullptr, &m_vk_image_view)); + VK_CHECK_THROW(vkCreateImageView(m_vk->vk_device(), &view_info, nullptr, &m_vk_image_view)); // Map to NGX m_ngx_resource = NVSDK_NGX_Create_ImageView_Resource_VK(m_vk_image_view, m_vk_image, view_info.subresourceRange, image_info.format, m_size.x(), m_size.y(), true); @@ -584,21 +649,21 @@ public: handle_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR; handle_info.memory = m_vk_device_memory; handle_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; - auto pfn_vkGetMemory = (PFN_vkGetMemoryWin32HandleKHR)vkGetDeviceProcAddr(vk_device, "vkGetMemoryWin32HandleKHR"); + auto pfn_vkGetMemory = (PFN_vkGetMemoryWin32HandleKHR)vkGetDeviceProcAddr(m_vk->vk_device(), "vkGetMemoryWin32HandleKHR"); #else int handle = -1; VkMemoryGetFdInfoKHR handle_info = {}; handle_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR; handle_info.memory = m_vk_device_memory; handle_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; - auto pfn_vkGetMemory = (PFN_vkGetMemoryFdKHR)vkGetDeviceProcAddr(vk_device, "vkGetMemoryFdKHR"); + auto pfn_vkGetMemory = (PFN_vkGetMemoryFdKHR)vkGetDeviceProcAddr(m_vk->vk_device(), "vkGetMemoryFdKHR"); #endif if (!pfn_vkGetMemory) { throw std::runtime_error{"Failed to locate pfn_vkGetMemory."}; } - VK_CHECK_THROW(pfn_vkGetMemory(vk_device, &handle_info, &handle)); + VK_CHECK_THROW(pfn_vkGetMemory(m_vk->vk_device(), &handle_info, &handle)); // Map handle to CUDA memory cudaExternalMemoryHandleDesc external_memory_handle_desc = {}; @@ -687,15 +752,15 @@ public: } if (m_vk_image_view) { - vkDestroyImageView(vk_device, m_vk_image_view, nullptr); + vkDestroyImageView(m_vk->vk_device(), m_vk_image_view, nullptr); } if (m_vk_image) { - vkDestroyImage(vk_device, m_vk_image, nullptr); + vkDestroyImage(m_vk->vk_device(), m_vk_image, nullptr); } if (m_vk_device_memory) { - vkFreeMemory(vk_device, m_vk_device_memory, nullptr); + vkFreeMemory(m_vk->vk_device(), m_vk_device_memory, nullptr); } } @@ -720,6 +785,8 @@ public: } private: + std::shared_ptr<VulkanAndNgx> m_vk; + Vector2i m_size; uint32_t m_n_channels; @@ -765,7 +832,7 @@ struct DlssFeatureSpecs { } }; -DlssFeatureSpecs dlss_feature_specs(const Eigen::Vector2i& out_resolution, EDlssQuality quality) { +DlssFeatureSpecs dlss_feature_specs(NVSDK_NGX_Parameter* ngx_parameters, const Eigen::Vector2i& out_resolution, EDlssQuality quality) { DlssFeatureSpecs specs; specs.quality = quality; specs.out_resolution = out_resolution; @@ -790,7 +857,7 @@ DlssFeatureSpecs dlss_feature_specs(const Eigen::Vector2i& out_resolution, EDlss class DlssFeature { public: - DlssFeature(const DlssFeatureSpecs& specs, bool is_hdr, bool sharpen) : m_specs{specs}, m_is_hdr{is_hdr}, m_sharpen{sharpen} { + DlssFeature(std::shared_ptr<VulkanAndNgx> vk_and_ngx, const DlssFeatureSpecs& specs, bool is_hdr, bool sharpen) : m_vk_and_ngx{vk_and_ngx}, m_specs{specs}, m_is_hdr{is_hdr}, m_sharpen{sharpen} { // Initialize DLSS unsigned int creation_node_mask = 1; unsigned int visibility_node_mask = 1; @@ -799,7 +866,7 @@ public: dlss_create_feature_flags |= true ? NVSDK_NGX_DLSS_Feature_Flags_MVLowRes : 0; dlss_create_feature_flags |= false ? NVSDK_NGX_DLSS_Feature_Flags_MVJittered : 0; dlss_create_feature_flags |= is_hdr ? NVSDK_NGX_DLSS_Feature_Flags_IsHDR : 0; - dlss_create_feature_flags |= false ? NVSDK_NGX_DLSS_Feature_Flags_DepthInverted : 0; + dlss_create_feature_flags |= true ? NVSDK_NGX_DLSS_Feature_Flags_DepthInverted : 0; dlss_create_feature_flags |= sharpen ? NVSDK_NGX_DLSS_Feature_Flags_DoSharpening : 0; dlss_create_feature_flags |= false ? NVSDK_NGX_DLSS_Feature_Flags_AutoExposure : 0; @@ -815,15 +882,15 @@ public: dlss_create_params.InFeatureCreateFlags = dlss_create_feature_flags; { - vk_command_buffer_begin(); - ScopeGuard command_buffer_guard{[&]() { vk_command_buffer_end_and_submit_sync(); }}; + m_vk_and_ngx->vk_command_buffer_begin(); + ScopeGuard command_buffer_guard{[&]() { m_vk_and_ngx->vk_command_buffer_end_and_submit_sync(); }}; - NGX_CHECK_THROW(NGX_VULKAN_CREATE_DLSS_EXT(vk_command_buffer, creation_node_mask, visibility_node_mask, &m_ngx_dlss, ngx_parameters, &dlss_create_params)); + NGX_CHECK_THROW(NGX_VULKAN_CREATE_DLSS_EXT(m_vk_and_ngx->vk_command_buffer(), creation_node_mask, visibility_node_mask, &m_ngx_dlss, m_vk_and_ngx->ngx_parameters(), &dlss_create_params)); } } - DlssFeature(const Eigen::Vector2i& out_resolution, bool is_hdr, bool sharpen, EDlssQuality quality) - : DlssFeature{dlss_feature_specs(out_resolution, quality), is_hdr, sharpen} {} + DlssFeature(std::shared_ptr<VulkanAndNgx> vk_and_ngx, const Eigen::Vector2i& out_resolution, bool is_hdr, bool sharpen, EDlssQuality quality) + : DlssFeature{vk_and_ngx, dlss_feature_specs(vk_and_ngx->ngx_parameters(), out_resolution, quality), is_hdr, sharpen} {} ~DlssFeature() { cudaDeviceSynchronize(); @@ -832,7 +899,7 @@ public: NVSDK_NGX_VULKAN_ReleaseFeature(m_ngx_dlss); } - vk_synchronize(); + m_vk_and_ngx->vk_synchronize(); } void run( @@ -850,7 +917,7 @@ public: throw std::runtime_error{"May only specify non-zero sharpening, when DlssFeature has been created with sharpen option."}; } - vk_command_buffer_begin(); + m_vk_and_ngx->vk_command_buffer_begin(); NVSDK_NGX_VK_DLSS_Eval_Params dlss_params; memset(&dlss_params, 0, sizeof(dlss_params)); @@ -868,9 +935,9 @@ public: dlss_params.InMVScaleY = 1.0f; dlss_params.InRenderSubrectDimensions = {(uint32_t)in_resolution.x(), (uint32_t)in_resolution.y()}; - NGX_CHECK_THROW(NGX_VULKAN_EVALUATE_DLSS_EXT(vk_command_buffer, m_ngx_dlss, ngx_parameters, &dlss_params)); + NGX_CHECK_THROW(NGX_VULKAN_EVALUATE_DLSS_EXT(m_vk_and_ngx->vk_command_buffer(), m_ngx_dlss, m_vk_and_ngx->ngx_parameters(), &dlss_params)); - vk_command_buffer_end_and_submit_sync(); + m_vk_and_ngx->vk_command_buffer_end_and_submit_sync(); } bool is_hdr() const { @@ -898,6 +965,8 @@ public: } private: + std::shared_ptr<VulkanAndNgx> m_vk_and_ngx; + NVSDK_NGX_Handle* m_ngx_dlss = {}; DlssFeatureSpecs m_specs; bool m_is_hdr; @@ -906,28 +975,29 @@ private: class Dlss : public IDlss { public: - Dlss(const Eigen::Vector2i& max_out_resolution) + Dlss(std::shared_ptr<VulkanAndNgx> vk_and_ngx, const Eigen::Vector2i& max_out_resolution) : + m_vk_and_ngx{vk_and_ngx}, m_max_out_resolution{max_out_resolution}, // Allocate all buffers at output resolution and use dynamic sub-rects // to use subsets of them. This avoids re-allocations when using DLSS // with dynamically changing input resolution. - m_frame_buffer{max_out_resolution, 4}, - m_depth_buffer{max_out_resolution, 1}, - m_mvec_buffer{max_out_resolution, 2}, - m_exposure_buffer{{1, 1}, 1}, - m_output_buffer{max_out_resolution, 4} + m_frame_buffer{m_vk_and_ngx, max_out_resolution, 4}, + m_depth_buffer{m_vk_and_ngx, max_out_resolution, 1}, + m_mvec_buffer{m_vk_and_ngx, max_out_resolution, 2}, + m_exposure_buffer{m_vk_and_ngx, {1, 1}, 1}, + m_output_buffer{m_vk_and_ngx, max_out_resolution, 4} { // Various quality modes of DLSS for (int i = 0; i < (int)EDlssQuality::NumDlssQualitySettings; ++i) { try { - auto specs = dlss_feature_specs(max_out_resolution, (EDlssQuality)i); + auto specs = dlss_feature_specs(m_vk_and_ngx->ngx_parameters(), max_out_resolution, (EDlssQuality)i); // Only emplace the specs if the feature can be created in practice! - DlssFeature{specs, true, true}; - DlssFeature{specs, true, false}; - DlssFeature{specs, false, true}; - DlssFeature{specs, false, false}; + DlssFeature{m_vk_and_ngx, specs, true, true}; + DlssFeature{m_vk_and_ngx, specs, true, false}; + DlssFeature{m_vk_and_ngx, specs, false, true}; + DlssFeature{m_vk_and_ngx, specs, false, false}; m_dlss_specs.emplace_back(specs); } catch (...) {} } @@ -943,13 +1013,13 @@ public: for (const auto& out_resolution : reduced_out_resolutions) { try { - auto specs = dlss_feature_specs(out_resolution, EDlssQuality::UltraPerformance); + auto specs = dlss_feature_specs(m_vk_and_ngx->ngx_parameters(), out_resolution, EDlssQuality::UltraPerformance); // Only emplace the specs if the feature can be created in practice! - DlssFeature{specs, true, true}; - DlssFeature{specs, true, false}; - DlssFeature{specs, false, true}; - DlssFeature{specs, false, false}; + DlssFeature{m_vk_and_ngx, specs, true, true}; + DlssFeature{m_vk_and_ngx, specs, true, false}; + DlssFeature{m_vk_and_ngx, specs, false, true}; + DlssFeature{m_vk_and_ngx, specs, false, false}; m_dlss_specs.emplace_back(specs); } catch (...) {} } @@ -977,7 +1047,7 @@ public: } if (!m_dlss_feature || m_dlss_feature->is_hdr() != is_hdr || m_dlss_feature->sharpen() != sharpen || m_dlss_feature->quality() != specs.quality || m_dlss_feature->out_resolution() != specs.out_resolution) { - m_dlss_feature.reset(new DlssFeature{specs.out_resolution, is_hdr, sharpen, specs.quality}); + m_dlss_feature.reset(new DlssFeature{m_vk_and_ngx, specs.out_resolution, is_hdr, sharpen, specs.quality}); } } @@ -1060,6 +1130,8 @@ public: } private: + std::shared_ptr<VulkanAndNgx> m_vk_and_ngx; + std::unique_ptr<DlssFeature> m_dlss_feature; std::vector<DlssFeatureSpecs> m_dlss_specs; @@ -1072,47 +1144,8 @@ private: Vector2i m_max_out_resolution; }; -std::shared_ptr<IDlss> dlss_init(const Eigen::Vector2i& out_resolution) { - return std::make_shared<Dlss>(out_resolution); -} - -void vulkan_and_ngx_destroy() { - if (ngx_parameters) { - NVSDK_NGX_VULKAN_DestroyParameters(ngx_parameters); - ngx_parameters = nullptr; - } - - if (ngx_initialized) { - NVSDK_NGX_VULKAN_Shutdown(); - ngx_initialized = false; - } - - if (vk_command_pool) { - vkDestroyCommandPool(vk_device, vk_command_pool, nullptr); - vk_command_pool = VK_NULL_HANDLE; - } - - if (vk_device) { - vkDestroyDevice(vk_device, nullptr); - vk_device = VK_NULL_HANDLE; - } - - if (vk_debug_messenger) { - auto DestroyDebugUtilsMessengerEXT = [](VkInstance instance, VkDebugUtilsMessengerEXT debugMessenger, const VkAllocationCallbacks* pAllocator) { - auto func = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr(instance, "vkDestroyDebugUtilsMessengerEXT"); - if (func != nullptr) { - func(instance, debugMessenger, pAllocator); - } - }; - - DestroyDebugUtilsMessengerEXT(vk_instance, vk_debug_messenger, nullptr); - vk_debug_messenger = VK_NULL_HANDLE; - } - - if (vk_instance) { - vkDestroyInstance(vk_instance, nullptr); - vk_instance = VK_NULL_HANDLE; - } +std::unique_ptr<IDlss> VulkanAndNgx::init_dlss(const Eigen::Vector2i& out_resolution) { + return std::make_unique<Dlss>(shared_from_this(), out_resolution); } NGP_NAMESPACE_END diff --git a/src/main.cu b/src/main.cu index f05b489598b3cb0208251230e359310954a8b2d3..ac79bd362f70f3e354fd2029bb4c907809dfa8aa 100644 --- a/src/main.cu +++ b/src/main.cu @@ -62,6 +62,13 @@ int main_func(const std::vector<std::string>& arguments) { {"no-gui"}, }; + Flag vr_flag{ + parser, + "VR", + "Enables VR", + {"vr"} + }; + Flag no_train_flag{ parser, "NO_TRAIN", @@ -170,6 +177,10 @@ int main_func(const std::vector<std::string>& arguments) { testbed.init_window(width_flag ? get(width_flag) : 1920, height_flag ? get(height_flag) : 1080); } + if (vr_flag) { + testbed.init_vr(); + } + // Render/training loop while (testbed.frame()) { if (!gui) { diff --git a/src/marching_cubes.cu b/src/marching_cubes.cu index 28c28585ab86bd8e8104319ecde973a325fae723..2fc595405c5835268f7f83893e820bfe65cd714c 100644 --- a/src/marching_cubes.cu +++ b/src/marching_cubes.cu @@ -98,11 +98,11 @@ bool check_shader(uint32_t handle, const char* desc, bool program) { uint32_t compile_shader(bool pixel, const char* code) { GLuint g_VertHandle = glCreateShader(pixel ? GL_FRAGMENT_SHADER : GL_VERTEX_SHADER ); - const char* glsl_version = "#version 330\n"; + const char* glsl_version = "#version 140\n"; const GLchar* strings[2] = { glsl_version, code}; glShaderSource(g_VertHandle, 2, strings, NULL); glCompileShader(g_VertHandle); - if (!check_shader(g_VertHandle, pixel?"pixel":"vertex", false)) { + if (!check_shader(g_VertHandle, pixel? "pixel" : "vertex", false)) { glDeleteShader(g_VertHandle); return 0; } @@ -173,9 +173,9 @@ void draw_mesh_gl( if (!program) { vs = compile_shader(false, R"foo( -layout (location = 0) in vec3 pos; -layout (location = 1) in vec3 nor; -layout (location = 2) in vec3 col; +in vec3 pos; +in vec3 nor; +in vec3 col; out vec3 vtxcol; uniform mat4 camera; uniform vec2 f; @@ -198,16 +198,11 @@ void main() } )foo"); ps = compile_shader(true, R"foo( -layout (location = 0) out vec4 o; +out vec4 o; in vec3 vtxcol; uniform int mode; void main() { - if (mode == 3) { - vec3 tricol = vec3((ivec3(923, 3572, 5423) * gl_PrimitiveID) & 255) * (1.0 / 255.0); - o = vec4(tricol, 1.0); - } else { - o = vec4(vtxcol, 1.0); - } + o = vec4(vtxcol, 1.0); } )foo"); program = glCreateProgram(); diff --git a/src/nerf_loader.cu b/src/nerf_loader.cu index 3fd76ca003e845e183bdd3b914e3af12bc21c9c1..69af304f8e3aed090d0ffe81fd46d38dcfd4e1af 100644 --- a/src/nerf_loader.cu +++ b/src/nerf_loader.cu @@ -231,6 +231,10 @@ void read_lens(const nlohmann::json& json, Lens& lens, Vector2f& principal_point mode = ELensMode::LatLong; } + if (json.contains("equirectangular")) { + mode = ELensMode::Equirectangular; + } + // If there was an outer distortion mode, don't override it with nothing. if (mode != ELensMode::Perspective) { lens.mode = mode; diff --git a/src/openxr_hmd.cu b/src/openxr_hmd.cu new file mode 100644 index 0000000000000000000000000000000000000000..960737237bb0f6ee1322da4ece538374400965da --- /dev/null +++ b/src/openxr_hmd.cu @@ -0,0 +1,1240 @@ +/* + * Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ + +/** @file openxr_hmd.cu + * @author Thomas Müller & Ingo Esser & Robert Menzel, NVIDIA + * @brief Wrapper around the OpenXR API, providing access to + * per-eye framebuffers, lens parameters, visible area, + * view, hand, and eye poses, as well as controller inputs. + */ + +#define NOMINMAX + +#include <neural-graphics-primitives/common_device.cuh> +#include <neural-graphics-primitives/marching_cubes.h> +#include <neural-graphics-primitives/openxr_hmd.h> +#include <neural-graphics-primitives/render_buffer.h> + +#include <openxr/openxr_reflection.h> + +#include <fmt/format.h> + +#include <imgui/imgui.h> + +#include <tinylogger/tinylogger.h> + +#include <tiny-cuda-nn/common.h> + +#include <string> +#include <vector> + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" //TODO: XR struct are uninitiaized apart from their type +#endif + +using namespace Eigen; +using namespace tcnn; + +NGP_NAMESPACE_BEGIN + +// function XrEnumStr turns enum into string for printing +// uses expansion macro and data provided in openxr_reflection.h +#define XR_ENUM_CASE_STR(name, val) \ + case name: \ + return #name; +#define XR_ENUM_STR(enum_type) \ + constexpr const char* XrEnumStr(enum_type e) { \ + switch (e) { \ + XR_LIST_ENUM_##enum_type(XR_ENUM_CASE_STR) default : return "Unknown"; \ + } \ + } + +XR_ENUM_STR(XrViewConfigurationType) +XR_ENUM_STR(XrEnvironmentBlendMode) +XR_ENUM_STR(XrReferenceSpaceType) +XR_ENUM_STR(XrStructureType) +XR_ENUM_STR(XrSessionState) + +/// Checks the result of a xrXXXXXX call and throws an error on failure +#define XR_CHECK_THROW(x) \ + do { \ + XrResult result = x; \ + if (XR_FAILED(result)) { \ + char buffer[XR_MAX_RESULT_STRING_SIZE]; \ + XrResult result_to_string_result = xrResultToString(m_instance, result, buffer); \ + if (XR_FAILED(result_to_string_result)) { \ + throw std::runtime_error{std::string(FILE_LINE " " #x " failed, but could not obtain error string")}; \ + } else { \ + throw std::runtime_error{std::string(FILE_LINE " " #x " failed with error ") + buffer}; \ + } \ + } \ + } while(0) + +OpenXRHMD::Swapchain::Swapchain(XrSwapchainCreateInfo& rgba_create_info, XrSwapchainCreateInfo& depth_create_info, XrSession& session, XrInstance& m_instance) { + ScopeGuard cleanup_guard{[&]() { clear(); }}; + + XR_CHECK_THROW(xrCreateSwapchain(session, &rgba_create_info, &handle)); + + width = rgba_create_info.width; + height = rgba_create_info.height; + + { + uint32_t size; + XR_CHECK_THROW(xrEnumerateSwapchainImages(handle, 0, &size, nullptr)); + + images_gl.resize(size, {XR_TYPE_SWAPCHAIN_IMAGE_OPENGL_KHR}); + XR_CHECK_THROW(xrEnumerateSwapchainImages(handle, size, &size, (XrSwapchainImageBaseHeader*)images_gl.data())); + + // One framebuffer per swapchain image + framebuffers_gl.resize(size); + } + + if (depth_create_info.format != 0) { + XR_CHECK_THROW(xrCreateSwapchain(session, &depth_create_info, &depth_handle)); + + uint32_t depth_size; + XR_CHECK_THROW(xrEnumerateSwapchainImages(depth_handle, 0, &depth_size, nullptr)); + + depth_images_gl.resize(depth_size, {XR_TYPE_SWAPCHAIN_IMAGE_OPENGL_KHR}); + XR_CHECK_THROW(xrEnumerateSwapchainImages(depth_handle, depth_size, &depth_size, (XrSwapchainImageBaseHeader*)depth_images_gl.data())); + + // We might have a different number of depth swapchain images as we have framebuffers, + // so we will need to bind an acquired depth image to the current framebuffer on the + // fly later on. + } + + glGenFramebuffers(framebuffers_gl.size(), framebuffers_gl.data()); + + cleanup_guard.disarm(); +} + +OpenXRHMD::Swapchain::~Swapchain() { + clear(); +} + +void OpenXRHMD::Swapchain::clear() { + if (!framebuffers_gl.empty()) { + glDeleteFramebuffers(framebuffers_gl.size(), framebuffers_gl.data()); + } + + if (depth_handle != XR_NULL_HANDLE) { + xrDestroySwapchain(depth_handle); + depth_handle = XR_NULL_HANDLE; + } + + if (handle != XR_NULL_HANDLE) { + xrDestroySwapchain(handle); + handle = XR_NULL_HANDLE; + } +} + +#if defined(XR_USE_PLATFORM_WIN32) +OpenXRHMD::OpenXRHMD(HDC hdc, HGLRC hglrc) { +#elif defined(XR_USE_PLATFORM_XLIB) +OpenXRHMD::OpenXRHMD(Display* xDisplay, uint32_t visualid, GLXFBConfig glxFBConfig, GLXDrawable glxDrawable, GLXContext glxContext) { +#elif defined(XR_USE_PLATFORM_WAYLAND) +OpenXRHMD::OpenXRHMD(wl_display* display) { +#endif + ScopeGuard cleanup_guard{[&]() { clear(); }}; + + init_create_xr_instance(); + init_get_xr_system(); + init_configure_xr_views(); + init_check_for_xr_blend_mode(); +#if defined(XR_USE_PLATFORM_WIN32) + init_open_gl(hdc, hglrc); +#elif defined(XR_USE_PLATFORM_XLIB) + init_open_gl(xDisplay, visualid, glxFBConfig, glxDrawable, glxContext); +#elif defined(XR_USE_PLATFORM_WAYLAND) + init_open_gl(display); +#endif + init_xr_session(); + init_xr_actions(); + init_xr_spaces(); + init_xr_swapchain_open_gl(); + init_open_gl_shaders(); + + cleanup_guard.disarm(); + tlog::success() << "Initialized OpenXR for " << m_system_properties.systemName; + // tlog::success() << " " + // << " depth=" << (m_supports_composition_layer_depth ? "true" : "false") + // << " mask=" << (m_supports_hidden_area_mask ? "true" : "false") + // << " eye=" << (m_supports_hidden_area_mask ? "true" : "false") + // ; +} + +OpenXRHMD::~OpenXRHMD() { + clear(); +} + +void OpenXRHMD::clear() { + auto xr_destroy = [&](auto& handle, auto destroy_fun) { + if (handle != XR_NULL_HANDLE) { + destroy_fun(handle); + handle = XR_NULL_HANDLE; + } + }; + + xr_destroy(m_pose_action, xrDestroyAction); + xr_destroy(m_thumbstick_actions[0], xrDestroyAction); + xr_destroy(m_thumbstick_actions[1], xrDestroyAction); + xr_destroy(m_press_action, xrDestroyAction); + xr_destroy(m_grab_action, xrDestroyAction); + + xr_destroy(m_action_set, xrDestroyActionSet); + + m_swapchains.clear(); + xr_destroy(m_space, xrDestroySpace); + xr_destroy(m_session, xrDestroySession); + xr_destroy(m_instance, xrDestroyInstance); +} + +void OpenXRHMD::init_create_xr_instance() { + std::vector<const char*> layers = {}; + std::vector<const char*> extensions = { + XR_KHR_OPENGL_ENABLE_EXTENSION_NAME, + }; + + auto print_extension_properties = [](const char* layer_name) { + uint32_t size; + xrEnumerateInstanceExtensionProperties(layer_name, 0, &size, nullptr); + std::vector<XrExtensionProperties> props(size, {XR_TYPE_EXTENSION_PROPERTIES}); + xrEnumerateInstanceExtensionProperties(layer_name, size, &size, props.data()); + tlog::info() << fmt::format("Extensions ({}):", props.size()); + for (XrExtensionProperties extension : props) { + tlog::info() << fmt::format("\t{} (Version {})", extension.extensionName, extension.extensionVersion); + } + }; + + uint32_t size; + xrEnumerateApiLayerProperties(0, &size, nullptr); + m_api_layer_properties.clear(); + m_api_layer_properties.resize(size, {XR_TYPE_API_LAYER_PROPERTIES}); + xrEnumerateApiLayerProperties(size, &size, m_api_layer_properties.data()); + + if (m_print_api_layers) { + tlog::info() << fmt::format("API Layers ({}):", m_api_layer_properties.size()); + for (auto p : m_api_layer_properties) { + tlog::info() << fmt::format( + "{} (v {}.{}.{}, {}) {}", + p.layerName, + XR_VERSION_MAJOR(p.specVersion), + XR_VERSION_MINOR(p.specVersion), + XR_VERSION_PATCH(p.specVersion), + p.layerVersion, + p.description + ); + print_extension_properties(p.layerName); + } + } + + if (layers.size() != 0) { + for (const auto& e : layers) { + bool found = false; + for (XrApiLayerProperties layer : m_api_layer_properties) { + if (strcmp(e, layer.layerName) == 0) { + found = true; + break; + } + } + + if (!found) { + throw std::runtime_error{fmt::format("OpenXR API layer {} not found", e)}; + } + } + } + + xrEnumerateInstanceExtensionProperties(nullptr, 0, &size, nullptr); + m_instance_extension_properties.clear(); + m_instance_extension_properties.resize(size, {XR_TYPE_EXTENSION_PROPERTIES}); + xrEnumerateInstanceExtensionProperties(nullptr, size, &size, m_instance_extension_properties.data()); + + if (m_print_extensions) { + tlog::info() << fmt::format("Instance extensions ({}):", m_instance_extension_properties.size()); + for (XrExtensionProperties extension : m_instance_extension_properties) { + tlog::info() << fmt::format("\t{} (Version {})", extension.extensionName, extension.extensionVersion); + } + } + + auto has_extension = [&](const char* e) { + for (XrExtensionProperties extension : m_instance_extension_properties) { + if (strcmp(e, extension.extensionName) == 0) { + return true; + } + } + + return false; + }; + + for (const auto& e : extensions) { + if (!has_extension(e)) { + throw std::runtime_error{fmt::format("Required OpenXR extension {} not found", e)}; + } + } + + auto add_extension_if_supported = [&](const char* extension) { + if (has_extension(extension)) { + extensions.emplace_back(extension); + return true; + } + + return false; + }; + + if (add_extension_if_supported(XR_KHR_COMPOSITION_LAYER_DEPTH_EXTENSION_NAME)) { + m_supports_composition_layer_depth = true; + } + + if (add_extension_if_supported(XR_KHR_VISIBILITY_MASK_EXTENSION_NAME)) { + m_supports_hidden_area_mask = true; + } + + if (add_extension_if_supported(XR_EXT_EYE_GAZE_INTERACTION_EXTENSION_NAME)) { + m_supports_eye_tracking = true; + } + + XrInstanceCreateInfo instance_create_info = {XR_TYPE_INSTANCE_CREATE_INFO}; + instance_create_info.applicationInfo = {}; + strncpy(instance_create_info.applicationInfo.applicationName, "Instant Neural Graphics Primitives v" NGP_VERSION, XR_MAX_APPLICATION_NAME_SIZE); + instance_create_info.applicationInfo.applicationVersion = 1; + strncpy(instance_create_info.applicationInfo.engineName, "Instant Neural Graphics Primitives v" NGP_VERSION, XR_MAX_ENGINE_NAME_SIZE); + instance_create_info.applicationInfo.engineVersion = 1; + instance_create_info.applicationInfo.apiVersion = XR_CURRENT_API_VERSION; + instance_create_info.enabledExtensionCount = (uint32_t)extensions.size(); + instance_create_info.enabledExtensionNames = extensions.data(); + instance_create_info.enabledApiLayerCount = (uint32_t)layers.size(); + instance_create_info.enabledApiLayerNames = layers.data(); + + if (XR_FAILED(xrCreateInstance(&instance_create_info, &m_instance))) { + throw std::runtime_error{"Failed to create OpenXR instance"}; + } + + XR_CHECK_THROW(xrGetInstanceProperties(m_instance, &m_instance_properties)); + if (m_print_instance_properties) { + tlog::info() << "Instance Properties"; + tlog::info() << fmt::format("\t runtime name: '{}'", m_instance_properties.runtimeName); + const auto& v = m_instance_properties.runtimeVersion; + tlog::info() << fmt::format( + "\t runtime version: {}.{}.{}", + XR_VERSION_MAJOR(v), + XR_VERSION_MINOR(v), + XR_VERSION_PATCH(v) + ); + } +} + +void OpenXRHMD::init_get_xr_system() { + XrSystemGetInfo system_get_info = {XR_TYPE_SYSTEM_GET_INFO, nullptr, XR_FORM_FACTOR_HEAD_MOUNTED_DISPLAY}; + XR_CHECK_THROW(xrGetSystem(m_instance, &system_get_info, &m_system_id)); + + XR_CHECK_THROW(xrGetSystemProperties(m_instance, m_system_id, &m_system_properties)); + if (m_print_system_properties) { + tlog::info() << "System Properties"; + tlog::info() << fmt::format("\t name: '{}'", m_system_properties.systemName); + tlog::info() << fmt::format("\t vendorId: {:#x}", m_system_properties.vendorId); + tlog::info() << fmt::format("\t systemId: {:#x}", m_system_properties.systemId); + tlog::info() << fmt::format("\t max layer count: {}", m_system_properties.graphicsProperties.maxLayerCount); + tlog::info() << fmt::format("\t max img width: {}", m_system_properties.graphicsProperties.maxSwapchainImageWidth); + tlog::info() << fmt::format("\t max img height: {}", m_system_properties.graphicsProperties.maxSwapchainImageHeight); + tlog::info() << fmt::format("\torientation tracking: {}", m_system_properties.trackingProperties.orientationTracking ? "YES" : "NO"); + tlog::info() << fmt::format("\t position tracking: {}", m_system_properties.trackingProperties.orientationTracking ? "YES" : "NO"); + } +} + +void OpenXRHMD::init_configure_xr_views() { + uint32_t size; + XR_CHECK_THROW(xrEnumerateViewConfigurations(m_instance, m_system_id, 0, &size, nullptr)); + std::vector<XrViewConfigurationType> view_config_types(size); + XR_CHECK_THROW(xrEnumerateViewConfigurations(m_instance, m_system_id, size, &size, view_config_types.data())); + + if (m_print_view_configuration_types) { + tlog::info() << fmt::format("View Configuration Types ({}):", view_config_types.size()); + for (const auto& t : view_config_types) { + tlog::info() << fmt::format("\t{}", XrEnumStr(t)); + } + } + + // view configurations we support, in descending preference + const std::vector<XrViewConfigurationType> preferred_view_config_types = { + //XR_VIEW_CONFIGURATION_TYPE_PRIMARY_QUAD_VARJO, + XR_VIEW_CONFIGURATION_TYPE_PRIMARY_STEREO + }; + + bool found = false; + for (const auto& p : preferred_view_config_types) { + for (const auto& t : view_config_types) { + if (p == t) { + found = true; + m_view_configuration_type = t; + } + } + } + + if (!found) { + throw std::runtime_error{"Could not find a suitable OpenXR view configuration type"}; + } + + // get view configuration properties + XR_CHECK_THROW(xrGetViewConfigurationProperties(m_instance, m_system_id, m_view_configuration_type, &m_view_configuration_properties)); + if (m_print_view_configuration_properties) { + tlog::info() << "View Configuration Properties:"; + tlog::info() << fmt::format("\t Type: {}", XrEnumStr(m_view_configuration_type)); + tlog::info() << fmt::format("\t FOV Mutable: {}", m_view_configuration_properties.fovMutable ? "YES" : "NO"); + } + + // enumerate view configuration views + XR_CHECK_THROW(xrEnumerateViewConfigurationViews(m_instance, m_system_id, m_view_configuration_type, 0, &size, nullptr)); + m_view_configuration_views.clear(); + m_view_configuration_views.resize(size, {XR_TYPE_VIEW_CONFIGURATION_VIEW}); + XR_CHECK_THROW(xrEnumerateViewConfigurationViews( + m_instance, + m_system_id, + m_view_configuration_type, + size, + &size, + m_view_configuration_views.data() + )); + + if (m_print_view_configuration_view) { + tlog::info() << "View Configuration Views, Width x Height x Samples"; + for (size_t i = 0; i < m_view_configuration_views.size(); ++i) { + const auto& view = m_view_configuration_views[i]; + tlog::info() << fmt::format( + "\tView {}\tRecommended: {}x{}x{} Max: {}x{}x{}", + i, + view.recommendedImageRectWidth, + view.recommendedImageRectHeight, + view.recommendedSwapchainSampleCount, + view.maxImageRectWidth, + view.maxImageRectHeight, + view.maxSwapchainSampleCount + ); + } + } +} + +void OpenXRHMD::init_check_for_xr_blend_mode() { + // enumerate environment blend modes + uint32_t size; + XR_CHECK_THROW(xrEnumerateEnvironmentBlendModes(m_instance, m_system_id, m_view_configuration_type, 0, &size, nullptr)); + m_environment_blend_modes.resize(size); + XR_CHECK_THROW(xrEnumerateEnvironmentBlendModes( + m_instance, + m_system_id, + m_view_configuration_type, + size, + &size, + m_environment_blend_modes.data() + )); + + if (m_print_environment_blend_modes) { + tlog::info() << fmt::format("Environment Blend Modes ({}):", m_environment_blend_modes.size()); + } + + bool found = false; + for (const auto& m : m_environment_blend_modes) { + if (m_print_environment_blend_modes) { + tlog::info() << fmt::format("\t{}", XrEnumStr(m)); + } + + if (m == m_environment_blend_mode) { + found = true; + } + } + + if (!found) { + throw std::runtime_error{fmt::format("OpenXR environment blend mode {} not found", XrEnumStr(m_environment_blend_mode))}; + } +} + +void OpenXRHMD::init_xr_actions() { + // paths for left (0) and right (1) hands + XR_CHECK_THROW(xrStringToPath(m_instance, "/user/hand/left", &m_hand_paths[0])); + XR_CHECK_THROW(xrStringToPath(m_instance, "/user/hand/right", &m_hand_paths[1])); + + // create action set + XrActionSetCreateInfo action_set_create_info{XR_TYPE_ACTION_SET_CREATE_INFO, nullptr, "actionset", "actionset", 0}; + XR_CHECK_THROW(xrCreateActionSet(m_instance, &action_set_create_info, &m_action_set)); + + { + XrActionCreateInfo action_create_info{ + XR_TYPE_ACTION_CREATE_INFO, + nullptr, + "hand_pose", + XR_ACTION_TYPE_POSE_INPUT, + (uint32_t)m_hand_paths.size(), + m_hand_paths.data(), + "Hand pose" + }; + XR_CHECK_THROW(xrCreateAction(m_action_set, &action_create_info, &m_pose_action)); + } + + { + XrActionCreateInfo action_create_info{ + XR_TYPE_ACTION_CREATE_INFO, + nullptr, + "thumbstick_left", + XR_ACTION_TYPE_VECTOR2F_INPUT, + 0, + nullptr, + "Left thumbstick" + }; + XR_CHECK_THROW(xrCreateAction(m_action_set, &action_create_info, &m_thumbstick_actions[0])); + } + + { + XrActionCreateInfo action_create_info{ + XR_TYPE_ACTION_CREATE_INFO, + nullptr, + "thumbstick_right", + XR_ACTION_TYPE_VECTOR2F_INPUT, + 0, + nullptr, + "Right thumbstick" + }; + XR_CHECK_THROW(xrCreateAction(m_action_set, &action_create_info, &m_thumbstick_actions[1])); + } + + { + XrActionCreateInfo action_create_info{ + XR_TYPE_ACTION_CREATE_INFO, + nullptr, + "press", + XR_ACTION_TYPE_BOOLEAN_INPUT, + (uint32_t)m_hand_paths.size(), + m_hand_paths.data(), + "Press" + }; + XR_CHECK_THROW(xrCreateAction(m_action_set, &action_create_info, &m_press_action)); + } + + { + XrActionCreateInfo action_create_info{ + XR_TYPE_ACTION_CREATE_INFO, + nullptr, + "grab", + XR_ACTION_TYPE_FLOAT_INPUT, + (uint32_t)m_hand_paths.size(), + m_hand_paths.data(), + "Grab" + }; + XR_CHECK_THROW(xrCreateAction(m_action_set, &action_create_info, &m_grab_action)); + } + + auto create_binding = [&](XrAction action, const std::string& binding_path_str) { + XrPath binding; + XR_CHECK_THROW(xrStringToPath(m_instance, binding_path_str.c_str(), &binding)); + return XrActionSuggestedBinding{action, binding}; + }; + + auto suggest_bindings = [&](const std::string& interaction_profile_path_str, const std::vector<XrActionSuggestedBinding>& bindings) { + XrPath interaction_profile; + XR_CHECK_THROW(xrStringToPath(m_instance, interaction_profile_path_str.c_str(), &interaction_profile)); + XrInteractionProfileSuggestedBinding suggested_binding{ + XR_TYPE_INTERACTION_PROFILE_SUGGESTED_BINDING, + nullptr, + interaction_profile, + (uint32_t)bindings.size(), + bindings.data() + }; + XR_CHECK_THROW(xrSuggestInteractionProfileBindings(m_instance, &suggested_binding)); + }; + + suggest_bindings("/interaction_profiles/khr/simple_controller", { + create_binding(m_pose_action, "/user/hand/left/input/grip/pose"), + create_binding(m_pose_action, "/user/hand/right/input/grip/pose"), + }); + + auto suggest_controller_bindings = [&](const std::string& xy, const std::string& press, const std::string& grab, const std::string& interaction_profile_path_str) { + suggest_bindings(interaction_profile_path_str, { + create_binding(m_pose_action, "/user/hand/left/input/grip/pose"), + create_binding(m_pose_action, "/user/hand/right/input/grip/pose"), + create_binding(m_thumbstick_actions[0], std::string{"/user/hand/left/input/"} + xy), + create_binding(m_thumbstick_actions[1], std::string{"/user/hand/right/input/"} + xy), + create_binding(m_press_action, std::string{"/user/hand/left/input/"} + press), + create_binding(m_press_action, std::string{"/user/hand/right/input/"} + press), + create_binding(m_grab_action, std::string{"/user/hand/left/input/"} + grab), + create_binding(m_grab_action, std::string{"/user/hand/right/input/"} + grab), + }); + }; + + suggest_controller_bindings("trackpad", "select/click", "trackpad/click", "/interaction_profiles/google/daydream_controller"); + suggest_controller_bindings("trackpad", "trackpad/click", "trigger/click", "/interaction_profiles/htc/vive_controller"); + suggest_controller_bindings("thumbstick", "thumbstick/click", "trigger/value", "/interaction_profiles/microsoft/motion_controller"); + suggest_controller_bindings("trackpad", "trackpad/click", "trigger/click", "/interaction_profiles/oculus/go_controller"); + suggest_controller_bindings("thumbstick", "thumbstick/click", "trigger/value", "/interaction_profiles/oculus/touch_controller"); + suggest_controller_bindings("thumbstick", "thumbstick/click", "trigger/value", "/interaction_profiles/valve/index_controller"); + + // Xbox controller (currently not functional) + suggest_bindings("/interaction_profiles/microsoft/xbox_controller", { + create_binding(m_thumbstick_actions[0], std::string{"/user/gamepad/input/thumbstick_left"}), + create_binding(m_thumbstick_actions[1], std::string{"/user/gamepad/input/thumbstick_right"}), + }); +} + +#if defined(XR_USE_PLATFORM_WIN32) +void OpenXRHMD::init_open_gl(HDC hdc, HGLRC hglrc) { +#elif defined(XR_USE_PLATFORM_XLIB) +void OpenXRHMD::init_open_gl(Display* xDisplay, uint32_t visualid, GLXFBConfig glxFBConfig, GLXDrawable glxDrawable, GLXContext glxContext) { +#elif defined(XR_USE_PLATFORM_WAYLAND) +void OpenXRHMD::init_open_gl(wl_display* display) { +#endif + // GL graphics requirements + PFN_xrGetOpenGLGraphicsRequirementsKHR xrGetOpenGLGraphicsRequirementsKHR = nullptr; + XR_CHECK_THROW(xrGetInstanceProcAddr( + m_instance, + "xrGetOpenGLGraphicsRequirementsKHR", + reinterpret_cast<PFN_xrVoidFunction*>(&xrGetOpenGLGraphicsRequirementsKHR) + )); + + XrGraphicsRequirementsOpenGLKHR graphics_requirements{XR_TYPE_GRAPHICS_REQUIREMENTS_OPENGL_KHR}; + xrGetOpenGLGraphicsRequirementsKHR(m_instance, m_system_id, &graphics_requirements); + XrVersion min_version = graphics_requirements.minApiVersionSupported; + GLint major = 0; + GLint minor = 0; + glGetIntegerv(GL_MAJOR_VERSION, &major); + glGetIntegerv(GL_MINOR_VERSION, &minor); + const XrVersion have_version = XR_MAKE_VERSION(major, minor, 0); + + if (have_version < min_version) { + tlog::info() << fmt::format( + "Required OpenGL version: {}.{}, found OpenGL version: {}.{}", + XR_VERSION_MAJOR(min_version), + XR_VERSION_MINOR(min_version), + major, + minor + ); + + throw std::runtime_error{"Insufficient graphics API support"}; + } + +#if defined(XR_USE_PLATFORM_WIN32) + m_graphics_binding.hDC = hdc; + m_graphics_binding.hGLRC = hglrc; +#elif defined(XR_USE_PLATFORM_XLIB) + m_graphics_binding.xDisplay = xDisplay; + m_graphics_binding.visualid = visualid; + m_graphics_binding.glxFBConfig = glxFBConfig; + m_graphics_binding.glxDrawable = glxDrawable; + m_graphics_binding.glxContext = glxContext; +#elif defined(XR_USE_PLATFORM_WAYLAND) + m_graphics_binding.display = display; +#endif +} + +void OpenXRHMD::init_xr_session() { + // create session + XrSessionCreateInfo create_info{ + XR_TYPE_SESSION_CREATE_INFO, + reinterpret_cast<const XrBaseInStructure*>(&m_graphics_binding), + 0, + m_system_id + }; + + XR_CHECK_THROW(xrCreateSession(m_instance, &create_info, &m_session)); + + // tlog::info() << fmt::format("Created session {}", fmt::ptr(m_session)); +} + +void OpenXRHMD::init_xr_spaces() { + // reference space + uint32_t size; + XR_CHECK_THROW(xrEnumerateReferenceSpaces(m_session, 0, &size, nullptr)); + m_reference_spaces.clear(); + m_reference_spaces.resize(size); + XR_CHECK_THROW(xrEnumerateReferenceSpaces(m_session, size, &size, m_reference_spaces.data())); + + if (m_print_reference_spaces) { + tlog::info() << fmt::format("Reference spaces ({}):", m_reference_spaces.size()); + for (const auto& r : m_reference_spaces) { + tlog::info() << fmt::format("\t{}", XrEnumStr(r)); + } + } + + XrReferenceSpaceCreateInfo reference_space_create_info{XR_TYPE_REFERENCE_SPACE_CREATE_INFO}; + reference_space_create_info.referenceSpaceType = XR_REFERENCE_SPACE_TYPE_LOCAL; + reference_space_create_info.poseInReferenceSpace = XrPosef{}; + reference_space_create_info.poseInReferenceSpace.orientation.w = 1.0f; + XR_CHECK_THROW(xrCreateReferenceSpace(m_session, &reference_space_create_info, &m_space)); + XR_CHECK_THROW(xrGetReferenceSpaceBoundsRect(m_session, reference_space_create_info.referenceSpaceType, &m_bounds)); + + if (m_print_reference_spaces) { + tlog::info() << fmt::format("Using reference space {}", XrEnumStr(reference_space_create_info.referenceSpaceType)); + tlog::info() << fmt::format("Reference space boundaries: {} x {}", m_bounds.width, m_bounds.height); + } + + // action space + XrActionSpaceCreateInfo action_space_create_info{XR_TYPE_ACTION_SPACE_CREATE_INFO}; + action_space_create_info.action = m_pose_action; + action_space_create_info.poseInActionSpace.orientation.w = 1.0f; + action_space_create_info.subactionPath = m_hand_paths[0]; + XR_CHECK_THROW(xrCreateActionSpace(m_session, &action_space_create_info, &m_hand_spaces[0])); + action_space_create_info.subactionPath = m_hand_paths[1]; + XR_CHECK_THROW(xrCreateActionSpace(m_session, &action_space_create_info, &m_hand_spaces[1])); + + // attach action set + XrSessionActionSetsAttachInfo attach_info{XR_TYPE_SESSION_ACTION_SETS_ATTACH_INFO}; + attach_info.countActionSets = 1; + attach_info.actionSets = &m_action_set; + XR_CHECK_THROW(xrAttachSessionActionSets(m_session, &attach_info)); +} + +void OpenXRHMD::init_xr_swapchain_open_gl() { + // swap chains + uint32_t size; + XR_CHECK_THROW(xrEnumerateSwapchainFormats(m_session, 0, &size, nullptr)); + std::vector<int64_t> swapchain_formats(size); + XR_CHECK_THROW(xrEnumerateSwapchainFormats(m_session, size, &size, swapchain_formats.data())); + + if (m_print_available_swapchain_formats) { + tlog::info() << fmt::format("Swapchain formats ({}):", swapchain_formats.size()); + for (const auto& f : swapchain_formats) { + tlog::info() << fmt::format("\t{:#x}", f); + } + } + + auto find_compatible_swapchain_format = [&](const std::vector<int64_t>& candidates) { + for (auto format : candidates) { + if (std::find(std::begin(swapchain_formats), std::end(swapchain_formats), format) != std::end(swapchain_formats)) { + return format; + } + } + + throw std::runtime_error{"No compatible OpenXR swapchain format found"}; + }; + + m_swapchain_rgba_format = find_compatible_swapchain_format({ + GL_SRGB8_ALPHA8, + GL_SRGB8, + GL_RGBA8, + }); + + if (m_supports_composition_layer_depth) { + m_swapchain_depth_format = find_compatible_swapchain_format({ + GL_DEPTH_COMPONENT32F, + GL_DEPTH_COMPONENT24, + GL_DEPTH_COMPONENT16, + }); + } + + // tlog::info() << fmt::format("Chosen swapchain format: {:#x}", m_swapchain_rgba_format); + for (const auto& vcv : m_view_configuration_views) { + XrSwapchainCreateInfo rgba_swapchain_create_info{XR_TYPE_SWAPCHAIN_CREATE_INFO}; + rgba_swapchain_create_info.usageFlags = XR_SWAPCHAIN_USAGE_SAMPLED_BIT | XR_SWAPCHAIN_USAGE_COLOR_ATTACHMENT_BIT; + rgba_swapchain_create_info.format = m_swapchain_rgba_format; + rgba_swapchain_create_info.sampleCount = 1; + rgba_swapchain_create_info.width = vcv.recommendedImageRectWidth; + rgba_swapchain_create_info.height = vcv.recommendedImageRectHeight; + rgba_swapchain_create_info.faceCount = 1; + rgba_swapchain_create_info.arraySize = 1; + rgba_swapchain_create_info.mipCount = 1; + + XrSwapchainCreateInfo depth_swapchain_create_info = rgba_swapchain_create_info; + depth_swapchain_create_info.usageFlags = XR_SWAPCHAIN_USAGE_SAMPLED_BIT | XR_SWAPCHAIN_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + depth_swapchain_create_info.format = m_swapchain_depth_format; + + m_swapchains.emplace_back(rgba_swapchain_create_info, depth_swapchain_create_info, m_session, m_instance); + } +} + +void OpenXRHMD::init_open_gl_shaders() { + // Hidden area mask program + { + static const char* shader_vert = R"(#version 140 + in vec2 pos; + uniform mat4 project; + void main() { + vec4 pos = project * vec4(pos, -1.0, 1.0); + pos.xyz /= pos.w; + pos.y *= -1.0; + gl_Position = pos; + })"; + + static const char* shader_frag = R"(#version 140 + out vec4 frag_color; + void main() { + frag_color = vec4(0.0, 0.0, 0.0, 1.0); + })"; + + GLuint vert = glCreateShader(GL_VERTEX_SHADER); + glShaderSource(vert, 1, &shader_vert, NULL); + glCompileShader(vert); + check_shader(vert, "OpenXR hidden area mask vertex shader", false); + + GLuint frag = glCreateShader(GL_FRAGMENT_SHADER); + glShaderSource(frag, 1, &shader_frag, NULL); + glCompileShader(frag); + check_shader(frag, "OpenXR hidden area mask fragment shader", false); + + m_hidden_area_mask_program = glCreateProgram(); + glAttachShader(m_hidden_area_mask_program, vert); + glAttachShader(m_hidden_area_mask_program, frag); + glLinkProgram(m_hidden_area_mask_program); + check_shader(m_hidden_area_mask_program, "OpenXR hidden area mask shader program", true); + + glDeleteShader(vert); + glDeleteShader(frag); + } +} + +void OpenXRHMD::session_state_change(XrSessionState state, ControlFlow& flow) { + //tlog::info() << fmt::format("New session state {}", XrEnumStr(state)); + switch (state) { + case XR_SESSION_STATE_READY: { + XrSessionBeginInfo sessionBeginInfo {XR_TYPE_SESSION_BEGIN_INFO}; + sessionBeginInfo.primaryViewConfigurationType = m_view_configuration_type; + XR_CHECK_THROW(xrBeginSession(m_session, &sessionBeginInfo)); + break; + } + case XR_SESSION_STATE_STOPPING: { + XR_CHECK_THROW(xrEndSession(m_session)); + break; + } + case XR_SESSION_STATE_EXITING: { + flow = ControlFlow::QUIT; + break; + } + case XR_SESSION_STATE_LOSS_PENDING: { + flow = ControlFlow::RESTART; + break; + } + default: { + break; + } + } +} + +OpenXRHMD::ControlFlow OpenXRHMD::poll_events() { + bool more = true; + ControlFlow flow = ControlFlow::CONTINUE; + while (more) { + // poll events + XrEventDataBuffer event {XR_TYPE_EVENT_DATA_BUFFER, nullptr}; + XrResult result = xrPollEvent(m_instance, &event); + + if (XR_FAILED(result)) { + tlog::error() << "xrPollEvent failed"; + } else if (XR_SUCCESS == result) { + switch (event.type) { + case XR_TYPE_EVENT_DATA_SESSION_STATE_CHANGED: { + const XrEventDataSessionStateChanged& e = *reinterpret_cast<XrEventDataSessionStateChanged*>(&event); + //tlog::info() << "Session state change"; + //tlog::info() << fmt::format("\t from {}\t to {}", XrEnumStr(m_session_state), XrEnumStr(e.state)); + //tlog::info() << fmt::format("\t session {}, time {}", fmt::ptr(e.session), e.time); + m_session_state = e.state; + session_state_change(e.state, flow); + break; + } + + case XR_TYPE_EVENT_DATA_INSTANCE_LOSS_PENDING: { + flow = ControlFlow::RESTART; + break; + } + + case XR_TYPE_EVENT_DATA_VISIBILITY_MASK_CHANGED_KHR: { + m_hidden_area_masks.clear(); + break; + } + + case XR_TYPE_EVENT_DATA_INTERACTION_PROFILE_CHANGED: { + break; // Can ignore + } + + default: { + tlog::info() << fmt::format("Unhandled event type {}", XrEnumStr(event.type)); + break; + } + } + } else if (XR_EVENT_UNAVAILABLE == result) { + more = false; + } + } + return flow; +} + +__global__ void read_hidden_area_mask_kernel(const Vector2i resolution, cudaSurfaceObject_t surface, uint8_t* __restrict__ mask) { + uint32_t x = threadIdx.x + blockDim.x * blockIdx.x; + uint32_t y = threadIdx.y + blockDim.y * blockIdx.y; + + if (x >= resolution.x() || y >= resolution.y()) { + return; + } + + uint32_t idx = x + resolution.x() * y; + surf2Dread(&mask[idx], surface, x, y); +} + +std::shared_ptr<Buffer2D<uint8_t>> OpenXRHMD::rasterize_hidden_area_mask(uint32_t view_index, const XrCompositionLayerProjectionView& view) { + if (!m_supports_hidden_area_mask) { + return {}; + } + + PFN_xrGetVisibilityMaskKHR xrGetVisibilityMaskKHR = nullptr; + XR_CHECK_THROW(xrGetInstanceProcAddr( + m_instance, + "xrGetVisibilityMaskKHR", + reinterpret_cast<PFN_xrVoidFunction*>(&xrGetVisibilityMaskKHR) + )); + + XrVisibilityMaskKHR visibility_mask{XR_TYPE_VISIBILITY_MASK_KHR}; + XR_CHECK_THROW(xrGetVisibilityMaskKHR(m_session, m_view_configuration_type, view_index, XR_VISIBILITY_MASK_TYPE_HIDDEN_TRIANGLE_MESH_KHR, &visibility_mask)); + + if (visibility_mask.vertexCountOutput == 0 || visibility_mask.indexCountOutput == 0) { + return nullptr; + } + + std::vector<XrVector2f> vertices(visibility_mask.vertexCountOutput); + std::vector<uint32_t> indices(visibility_mask.indexCountOutput); + + visibility_mask.vertices = vertices.data(); + visibility_mask.indices = indices.data(); + + visibility_mask.vertexCapacityInput = visibility_mask.vertexCountOutput; + visibility_mask.indexCapacityInput = visibility_mask.indexCountOutput; + + XR_CHECK_THROW(xrGetVisibilityMaskKHR(m_session, m_view_configuration_type, view_index, XR_VISIBILITY_MASK_TYPE_HIDDEN_TRIANGLE_MESH_KHR, &visibility_mask)); + + CUDA_CHECK_THROW(cudaDeviceSynchronize()); + + Vector2i size = {view.subImage.imageRect.extent.width, view.subImage.imageRect.extent.height}; + + bool tex = glIsEnabled(GL_TEXTURE_2D); + bool depth = glIsEnabled(GL_DEPTH_TEST); + bool cull = glIsEnabled(GL_CULL_FACE); + GLint previous_texture_id; + glGetIntegerv(GL_TEXTURE_BINDING_2D, &previous_texture_id); + + if (!tex) glEnable(GL_TEXTURE_2D); + if (depth) glDisable(GL_DEPTH_TEST); + if (cull) glDisable(GL_CULL_FACE); + + // Generate texture to hold hidden area mask. Single channel, value of 1 means visible and 0 means masked away + ngp::GLTexture mask_texture; + mask_texture.resize(size, 1, true); + glBindTexture(GL_TEXTURE_2D, mask_texture.texture()); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + + GLuint framebuffer = 0; + glGenFramebuffers(1, &framebuffer); + glBindFramebuffer(GL_FRAMEBUFFER, framebuffer); + glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, mask_texture.texture(), 0); + + GLenum draw_buffers[1] = {GL_COLOR_ATTACHMENT0}; + glDrawBuffers(1, draw_buffers); + + glViewport(0, 0, size.x(), size.y()); + + // Draw hidden area mask + GLuint vao; + glGenVertexArrays(1, &vao); + glBindVertexArray(vao); + + GLuint vertex_buffer; + glGenBuffers(1, &vertex_buffer); + glEnableVertexAttribArray(0); + glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer); + glBufferData(GL_ARRAY_BUFFER, sizeof(XrVector2f) * vertices.size(), vertices.data(), GL_STATIC_DRAW); + glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, (void*)0); + + GLuint index_buffer; + glGenBuffers(1, &index_buffer); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(uint32_t) * indices.size(), indices.data(), GL_STATIC_DRAW); + + glClearColor(1.0f, 1.0f, 1.0f, 1.0f); + glClear(GL_COLOR_BUFFER_BIT); + glUseProgram(m_hidden_area_mask_program); + + XrMatrix4x4f proj; + XrMatrix4x4f_CreateProjectionFov(&proj, GRAPHICS_OPENGL, view.fov, 1.0f / 128.0f, 128.0f); + + GLuint project_id = glGetUniformLocation(m_hidden_area_mask_program, "project"); + glUniformMatrix4fv(project_id, 1, GL_FALSE, &proj.m[0]); + + glDrawElements(GL_TRIANGLES, indices.size(), GL_UNSIGNED_INT, (void*)0); + glFinish(); + + glDisableVertexAttribArray(0); + glDeleteBuffers(1, &vertex_buffer); + glDeleteBuffers(1, &index_buffer); + glDeleteVertexArrays(1, &vao); + glDeleteFramebuffers(1, &framebuffer); + + glBindVertexArray(0); + glUseProgram(0); + + // restore old state + if (!tex) glDisable(GL_TEXTURE_2D); + if (depth) glEnable(GL_DEPTH_TEST); + if (cull) glEnable(GL_CULL_FACE); + glBindTexture(GL_TEXTURE_2D, previous_texture_id); + glBindFramebuffer(GL_FRAMEBUFFER, 0); + + std::shared_ptr<Buffer2D<uint8_t>> mask = std::make_shared<Buffer2D<uint8_t>>(size); + + const dim3 threads = { 16, 8, 1 }; + const dim3 blocks = { div_round_up((uint32_t)size.x(), threads.x), div_round_up((uint32_t)size.y(), threads.y), 1 }; + + read_hidden_area_mask_kernel<<<blocks, threads>>>(size, mask_texture.surface(), mask->data()); + CUDA_CHECK_THROW(cudaDeviceSynchronize()); + + return mask; +} + +Matrix<float, 3, 4> convert_xr_matrix_to_eigen(const XrMatrix4x4f& m) { + Matrix<float, 3, 4> out; + + for (size_t i = 0; i < 3; ++i) { + for (size_t j = 0; j < 4; ++j) { + out(i, j) = m.m[i + j * 4]; + } + } + + // Flip Y and Z axes to match NGP conventions + out(0, 1) *= -1.f; + out(1, 0) *= -1.f; + + out(0, 2) *= -1.f; + out(2, 0) *= -1.f; + + out(1, 3) *= -1.f; + out(2, 3) *= -1.f; + + return out; +} + +Matrix<float, 3, 4> convert_xr_pose_to_eigen(const XrPosef& pose) { + XrMatrix4x4f matrix; + XrVector3f unit_scale{1.0f, 1.0f, 1.0f}; + XrMatrix4x4f_CreateTranslationRotationScale(&matrix, &pose.position, &pose.orientation, &unit_scale); + return convert_xr_matrix_to_eigen(matrix); +} + +OpenXRHMD::FrameInfoPtr OpenXRHMD::begin_frame() { + XrFrameWaitInfo frame_wait_info{XR_TYPE_FRAME_WAIT_INFO}; + XR_CHECK_THROW(xrWaitFrame(m_session, &frame_wait_info, &m_frame_state)); + + XrFrameBeginInfo frame_begin_info{XR_TYPE_FRAME_BEGIN_INFO}; + XR_CHECK_THROW(xrBeginFrame(m_session, &frame_begin_info)); + + if (!m_frame_state.shouldRender) { + return std::make_shared<FrameInfo>(); + } + + uint32_t num_views = (uint32_t)m_swapchains.size(); + // TODO assert m_view_configuration_views.size() == m_swapchains.size() + + // locate views + std::vector<XrView> views(num_views, {XR_TYPE_VIEW}); + + XrViewState viewState{XR_TYPE_VIEW_STATE}; + + XrViewLocateInfo view_locate_info{XR_TYPE_VIEW_LOCATE_INFO}; + view_locate_info.viewConfigurationType = m_view_configuration_type; + view_locate_info.displayTime = m_frame_state.predictedDisplayTime; + view_locate_info.space = m_space; + + XR_CHECK_THROW(xrLocateViews(m_session, &view_locate_info, &viewState, uint32_t(views.size()), &num_views, views.data())); + + if (!(viewState.viewStateFlags & XR_VIEW_STATE_POSITION_VALID_BIT) || !(viewState.viewStateFlags & XR_VIEW_STATE_ORIENTATION_VALID_BIT)) { + return std::make_shared<FrameInfo>(); + } + + m_hidden_area_masks.resize(num_views); + + // Fill frame information + if (!m_previous_frame_info) { + m_previous_frame_info = std::make_shared<FrameInfo>(); + } + + FrameInfoPtr frame_info = std::make_shared<FrameInfo>(*m_previous_frame_info); + frame_info->views.resize(m_swapchains.size()); + + for (size_t i = 0; i < m_swapchains.size(); ++i) { + const auto& sc = m_swapchains[i]; + + XrSwapchainImageAcquireInfo image_acquire_info{XR_TYPE_SWAPCHAIN_IMAGE_ACQUIRE_INFO}; + XrSwapchainImageWaitInfo image_wait_info{XR_TYPE_SWAPCHAIN_IMAGE_WAIT_INFO, nullptr, XR_INFINITE_DURATION}; + + uint32_t image_index; + XR_CHECK_THROW(xrAcquireSwapchainImage(sc.handle, &image_acquire_info, &image_index)); + XR_CHECK_THROW(xrWaitSwapchainImage(sc.handle, &image_wait_info)); + + FrameInfo::View& v = frame_info->views[i]; + v.framebuffer = sc.framebuffers_gl[image_index]; + v.view.pose = views[i].pose; + v.view.fov = views[i].fov; + v.view.subImage.imageRect = XrRect2Di{{0, 0}, {sc.width, sc.height}}; + v.view.subImage.imageArrayIndex = 0; + v.view.subImage.swapchain = sc.handle; + + glBindFramebuffer(GL_FRAMEBUFFER, sc.framebuffers_gl[image_index]); + glClearColor(0.0f, 0.0f, 0.0f, 0.0f); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, sc.images_gl.at(image_index).image, 0); + + if (sc.depth_handle != XR_NULL_HANDLE) { + uint32_t depth_image_index; + XR_CHECK_THROW(xrAcquireSwapchainImage(sc.depth_handle, &image_acquire_info, &depth_image_index)); + XR_CHECK_THROW(xrWaitSwapchainImage(sc.depth_handle, &image_wait_info)); + + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, sc.depth_images_gl.at(depth_image_index).image, 0); + + v.depth_info.subImage.imageRect = XrRect2Di{{0, 0}, {sc.width, sc.height}}; + v.depth_info.subImage.imageArrayIndex = 0; + v.depth_info.subImage.swapchain = sc.depth_handle; + v.depth_info.minDepth = 0.0f; + v.depth_info.maxDepth = 1.0f; + + // To be overwritten with actual near and far planes by end_frame + v.depth_info.nearZ = 1.0f / 128.0f; + v.depth_info.farZ = 128.0f; + } + + glBindFramebuffer(GL_FRAMEBUFFER, 0); + + if (!m_hidden_area_masks.at(i)) { + m_hidden_area_masks.at(i) = rasterize_hidden_area_mask(i, v.view); + } + + v.hidden_area_mask = m_hidden_area_masks.at(i); + v.pose = convert_xr_pose_to_eigen(v.view.pose); + } + + XrActiveActionSet active_action_set{m_action_set, XR_NULL_PATH}; + XrActionsSyncInfo sync_info{XR_TYPE_ACTIONS_SYNC_INFO}; + sync_info.countActiveActionSets = 1; + sync_info.activeActionSets = &active_action_set; + XR_CHECK_THROW(xrSyncActions(m_session, &sync_info)); + + for (size_t i = 0; i < 2; ++i) { + // Hand pose + { + XrActionStatePose pose_state{XR_TYPE_ACTION_STATE_POSE}; + XrActionStateGetInfo get_info{XR_TYPE_ACTION_STATE_GET_INFO}; + get_info.action = m_pose_action; + get_info.subactionPath = m_hand_paths[i]; + XR_CHECK_THROW(xrGetActionStatePose(m_session, &get_info, &pose_state)); + + frame_info->hands[i].pose_active = pose_state.isActive; + if (frame_info->hands[i].pose_active) { + XrSpaceLocation space_location{XR_TYPE_SPACE_LOCATION}; + XR_CHECK_THROW(xrLocateSpace(m_hand_spaces[i], m_space, m_frame_state.predictedDisplayTime, &space_location)); + frame_info->hands[i].pose = convert_xr_pose_to_eigen(space_location.pose); + } + } + + // Stick + { + XrActionStateVector2f thumbstick_state{XR_TYPE_ACTION_STATE_VECTOR2F}; + XrActionStateGetInfo get_info{XR_TYPE_ACTION_STATE_GET_INFO}; + get_info.action = m_thumbstick_actions[i]; + XR_CHECK_THROW(xrGetActionStateVector2f(m_session, &get_info, &thumbstick_state)); + + if (thumbstick_state.isActive) { + frame_info->hands[i].thumbstick.x() = thumbstick_state.currentState.x; + frame_info->hands[i].thumbstick.y() = thumbstick_state.currentState.y; + } else { + frame_info->hands[i].thumbstick = Vector2f::Zero(); + } + } + + // Press + { + XrActionStateBoolean press_state{XR_TYPE_ACTION_STATE_BOOLEAN}; + XrActionStateGetInfo get_info{XR_TYPE_ACTION_STATE_GET_INFO}; + get_info.action = m_press_action; + get_info.subactionPath = m_hand_paths[i]; + XR_CHECK_THROW(xrGetActionStateBoolean(m_session, &get_info, &press_state)); + + if (press_state.isActive) { + frame_info->hands[i].pressing = press_state.currentState; + } else { + frame_info->hands[i].pressing = 0.0f; + } + } + + // Grab + { + XrActionStateFloat grab_state{XR_TYPE_ACTION_STATE_FLOAT}; + XrActionStateGetInfo get_info{XR_TYPE_ACTION_STATE_GET_INFO}; + get_info.action = m_grab_action; + get_info.subactionPath = m_hand_paths[i]; + XR_CHECK_THROW(xrGetActionStateFloat(m_session, &get_info, &grab_state)); + + if (grab_state.isActive) { + frame_info->hands[i].grab_strength = grab_state.currentState; + } else { + frame_info->hands[i].grab_strength = 0.0f; + } + + bool was_grabbing = frame_info->hands[i].grabbing; + frame_info->hands[i].grabbing = frame_info->hands[i].grab_strength >= 0.5f; + + if (frame_info->hands[i].grabbing) { + frame_info->hands[i].prev_grab_pos = was_grabbing ? frame_info->hands[i].grab_pos : frame_info->hands[i].pose.col(3); + frame_info->hands[i].grab_pos = frame_info->hands[i].pose.col(3); + } + } + } + + m_previous_frame_info = frame_info; + return frame_info; +} + +void OpenXRHMD::end_frame(FrameInfoPtr frame_info, float znear, float zfar) { + std::vector<XrCompositionLayerProjectionView> layer_projection_views(frame_info->views.size()); + for (size_t i = 0; i < layer_projection_views.size(); ++i) { + auto& v = frame_info->views[i]; + auto& view = layer_projection_views[i]; + + view = v.view; + + // release swapchain image + XrSwapchainImageReleaseInfo release_info{XR_TYPE_SWAPCHAIN_IMAGE_RELEASE_INFO}; + XR_CHECK_THROW(xrReleaseSwapchainImage(v.view.subImage.swapchain, &release_info)); + + if (v.depth_info.subImage.swapchain != XR_NULL_HANDLE) { + XR_CHECK_THROW(xrReleaseSwapchainImage(v.depth_info.subImage.swapchain, &release_info)); + v.depth_info.nearZ = znear; + v.depth_info.farZ = zfar; + // The following line being commented means that our provided depth buffer + // _isn't_ actually passed to the runtime for reprojection. So far, + // experimentation has shown that runtimes do a better job at reprojection + // without getting a depth buffer from us, so we leave it disabled for now. + // view.next = &v.depth_info; + } + } + + XrCompositionLayerProjection layer{XR_TYPE_COMPOSITION_LAYER_PROJECTION}; + layer.space = m_space; + layer.viewCount = uint32_t(layer_projection_views.size()); + layer.views = layer_projection_views.data(); + + std::vector<XrCompositionLayerBaseHeader*> layers; + if (layer.viewCount) { + layers.push_back(reinterpret_cast<XrCompositionLayerBaseHeader*>(&layer)); + } + + XrFrameEndInfo frame_end_info{XR_TYPE_FRAME_END_INFO}; + frame_end_info.displayTime = m_frame_state.predictedDisplayTime; + frame_end_info.environmentBlendMode = m_environment_blend_mode; + frame_end_info.layerCount = (uint32_t)layers.size(); + frame_end_info.layers = layers.data(); + XR_CHECK_THROW(xrEndFrame(m_session, &frame_end_info)); +} + +NGP_NAMESPACE_END + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif diff --git a/src/python_api.cu b/src/python_api.cu index f69056f3f46a65aff0b7f18e7cec7a5fa56f239b..8a8d436405d11488fd550c77f5ff6468300cde91 100644 --- a/src/python_api.cu +++ b/src/python_api.cu @@ -157,6 +157,7 @@ py::array_t<float> Testbed::render_to_cpu(int width, int height, int spp, bool l } auto end_cam_matrix = m_smoothed_camera; + auto prev_camera_matrix = m_smoothed_camera; for (int i = 0; i < spp; ++i) { float start_alpha = ((float)i)/(float)spp * shutter_fraction; @@ -164,6 +165,9 @@ py::array_t<float> Testbed::render_to_cpu(int width, int height, int spp, bool l auto sample_start_cam_matrix = start_cam_matrix; auto sample_end_cam_matrix = log_space_lerp(start_cam_matrix, end_cam_matrix, shutter_fraction); + if (i == 0) { + prev_camera_matrix = sample_start_cam_matrix; + } if (path_animation_enabled) { set_camera_from_time(start_time + (end_time-start_time) * (start_alpha + end_alpha) / 2.0f); @@ -174,7 +178,21 @@ py::array_t<float> Testbed::render_to_cpu(int width, int height, int spp, bool l autofocus(); } - render_frame(sample_start_cam_matrix, sample_end_cam_matrix, Eigen::Vector4f::Zero(), m_windowless_render_surface, !linear); + render_frame( + m_stream.get(), + sample_start_cam_matrix, + sample_end_cam_matrix, + prev_camera_matrix, + m_screen_center, + m_relative_focal_length, + {0.0f, 0.0f, 0.0f, 1.0f}, + {}, + {}, + m_visualized_dimension, + m_windowless_render_surface, + !linear + ); + prev_camera_matrix = sample_start_cam_matrix; } // For cam smoothing when rendering the next frame. @@ -303,6 +321,7 @@ PYBIND11_MODULE(pyngp, m) { .value("FTheta", ELensMode::FTheta) .value("LatLong", ELensMode::LatLong) .value("OpenCVFisheye", ELensMode::OpenCVFisheye) + .value("Equirectangular", ELensMode::Equirectangular) .export_values(); py::class_<BoundingBox>(m, "BoundingBox") @@ -344,12 +363,13 @@ PYBIND11_MODULE(pyngp, m) { .def("clear_training_data", &Testbed::clear_training_data, "Clears training data to free up GPU memory.") // General control #ifdef NGP_GUI - .def("init_window", &Testbed::init_window, "Init a GLFW window that shows real-time progress and a GUI. 'second_window' creates a second copy of the output in its own window", + .def("init_window", &Testbed::init_window, "Init a GLFW window that shows real-time progress and a GUI. 'second_window' creates a second copy of the output in its own window.", py::arg("width"), py::arg("height"), py::arg("hidden") = false, py::arg("second_window") = false ) + .def("init_vr", &Testbed::init_vr, "Init rendering to a connected and active VR headset. Requires a GUI window to have been previously created via `init_window`.") .def_readwrite("keyboard_event_callback", &Testbed::m_keyboard_event_callback) .def("is_key_pressed", [](py::object& obj, int key) { return ImGui::IsKeyPressed(key); }) .def("is_key_down", [](py::object& obj, int key) { return ImGui::IsKeyDown(key); }) @@ -431,6 +451,7 @@ PYBIND11_MODULE(pyngp, m) { .def_readwrite("dynamic_res_target_fps", &Testbed::m_dynamic_res_target_fps) .def_readwrite("fixed_res_factor", &Testbed::m_fixed_res_factor) .def_readwrite("background_color", &Testbed::m_background_color) + .def_readwrite("render_transparency_as_checkerboard", &Testbed::m_render_transparency_as_checkerboard) .def_readwrite("shall_train", &Testbed::m_train) .def_readwrite("shall_train_encoding", &Testbed::m_train_encoding) .def_readwrite("shall_train_network", &Testbed::m_train_network) @@ -493,7 +514,7 @@ PYBIND11_MODULE(pyngp, m) { .def_property("dlss", [](py::object& obj) { return obj.cast<Testbed&>().m_dlss; }, [](const py::object& obj, bool value) { - if (value && !obj.cast<Testbed&>().m_dlss_supported) { + if (value && !obj.cast<Testbed&>().m_dlss_provider) { if (obj.cast<Testbed&>().m_render_window) { throw std::runtime_error{"DLSS not supported."}; } else { @@ -660,7 +681,6 @@ PYBIND11_MODULE(pyngp, m) { image .def_readonly("training", &Testbed::Image::training) .def_readwrite("random_mode", &Testbed::Image::random_mode) - .def_readwrite("pos", &Testbed::Image::pos) ; py::class_<Testbed::Image::Training>(image, "Training") diff --git a/src/render_buffer.cu b/src/render_buffer.cu index c6d06e250c5200c96c8455b8de2db5a375a3ff0b..aa0c10dd4950f743bb756c86bff5474d6e962f1c 100644 --- a/src/render_buffer.cu +++ b/src/render_buffer.cu @@ -47,30 +47,40 @@ void CudaSurface2D::free() { m_surface = 0; if (m_array) { cudaFreeArray(m_array); - g_total_n_bytes_allocated -= m_size.prod() * sizeof(float4); + g_total_n_bytes_allocated -= m_size.prod() * sizeof(float) * m_n_channels; } m_array = nullptr; + m_size = Vector2i::Zero(); + m_n_channels = 0; } -void CudaSurface2D::resize(const Vector2i& size) { - if (size == m_size) { +void CudaSurface2D::resize(const Vector2i& size, int n_channels) { + if (size == m_size && n_channels == m_n_channels) { return; } free(); - m_size = size; - - cudaChannelFormatDesc desc = cudaCreateChannelDesc<float4>(); + cudaChannelFormatDesc desc; + switch (n_channels) { + case 1: desc = cudaCreateChannelDesc<float>(); break; + case 2: desc = cudaCreateChannelDesc<float2>(); break; + case 3: desc = cudaCreateChannelDesc<float3>(); break; + case 4: desc = cudaCreateChannelDesc<float4>(); break; + default: throw std::runtime_error{fmt::format("CudaSurface2D: unsupported number of channels {}", n_channels)}; + } CUDA_CHECK_THROW(cudaMallocArray(&m_array, &desc, size.x(), size.y(), cudaArraySurfaceLoadStore)); - g_total_n_bytes_allocated += m_size.prod() * sizeof(float4); + g_total_n_bytes_allocated += m_size.prod() * sizeof(float) * n_channels; struct cudaResourceDesc resource_desc; memset(&resource_desc, 0, sizeof(resource_desc)); resource_desc.resType = cudaResourceTypeArray; resource_desc.res.array.array = m_array; CUDA_CHECK_THROW(cudaCreateSurfaceObject(&m_surface, &resource_desc)); + + m_size = size; + m_n_channels = n_channels; } #ifdef NGP_GUI @@ -91,14 +101,14 @@ GLuint GLTexture::texture() { cudaSurfaceObject_t GLTexture::surface() { if (!m_cuda_mapping) { - m_cuda_mapping = std::make_unique<CUDAMapping>(texture(), m_size); + m_cuda_mapping = std::make_unique<CUDAMapping>(texture(), m_size, m_n_channels); } return m_cuda_mapping->surface(); } cudaArray_t GLTexture::array() { if (!m_cuda_mapping) { - m_cuda_mapping = std::make_unique<CUDAMapping>(texture(), m_size); + m_cuda_mapping = std::make_unique<CUDAMapping>(texture(), m_size, m_n_channels); } return m_cuda_mapping->array(); } @@ -108,12 +118,14 @@ void GLTexture::blit_from_cuda_mapping() { return; } - if (m_internal_format != GL_RGBA32F || m_format != GL_RGBA || m_is_8bit) { - throw std::runtime_error{"Can only blit from CUDA mapping if the texture is RGBA float."}; + if (m_is_8bit) { + throw std::runtime_error{"Can only blit from CUDA mapping if the texture is float."}; } const float* data_cpu = m_cuda_mapping->data_cpu(); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, m_size.x(), m_size.y(), 0, GL_RGBA, GL_FLOAT, data_cpu); + + glBindTexture(GL_TEXTURE_2D, m_texture_id); + glTexImage2D(GL_TEXTURE_2D, 0, m_internal_format, m_size.x(), m_size.y(), 0, m_format, GL_FLOAT, data_cpu); } void GLTexture::load(const fs::path& path) { @@ -173,8 +185,7 @@ void GLTexture::resize(const Vector2i& new_size, int n_channels, bool is_8bit) { glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); } - -GLTexture::CUDAMapping::CUDAMapping(GLuint texture_id, const Vector2i& size) : m_size{size} { +GLTexture::CUDAMapping::CUDAMapping(GLuint texture_id, const Vector2i& size, int n_channels) : m_size{size}, m_n_channels{n_channels} { static bool s_is_cuda_interop_supported = !is_wsl(); if (s_is_cuda_interop_supported) { cudaError_t err = cudaGraphicsGLRegisterImage(&m_graphics_resource, texture_id, GL_TEXTURE_2D, cudaGraphicsRegisterFlagsSurfaceLoadStore); @@ -187,8 +198,8 @@ GLTexture::CUDAMapping::CUDAMapping(GLuint texture_id, const Vector2i& size) : m if (!s_is_cuda_interop_supported) { // falling back to a regular cuda surface + CPU copy of data m_cuda_surface = std::make_unique<CudaSurface2D>(); - m_cuda_surface->resize(size); - m_data_cpu.resize(m_size.prod() * 4); + m_cuda_surface->resize(size, n_channels); + m_data_cpu.resize(m_size.prod() * n_channels); return; } @@ -212,7 +223,7 @@ GLTexture::CUDAMapping::~CUDAMapping() { } const float* GLTexture::CUDAMapping::data_cpu() { - CUDA_CHECK_THROW(cudaMemcpy2DFromArray(m_data_cpu.data(), m_size.x() * sizeof(float) * 4, array(), 0, 0, m_size.x() * sizeof(float) * 4, m_size.y(), cudaMemcpyDeviceToHost)); + CUDA_CHECK_THROW(cudaMemcpy2DFromArray(m_data_cpu.data(), m_size.x() * sizeof(float) * m_n_channels, array(), 0, 0, m_size.x() * sizeof(float) * m_n_channels, m_size.y(), cudaMemcpyDeviceToHost)); return m_data_cpu.data(); } #endif //NGP_GUI @@ -362,11 +373,11 @@ __global__ void overlay_image_kernel( float fx = x+0.5f; float fy = y+0.5f; - fx-=resolution.x()*0.5f; fx/=zoom; fx+=screen_center.x() * resolution.x(); - fy-=resolution.y()*0.5f; fy/=zoom; fy+=screen_center.y() * resolution.y(); + fx -= resolution.x() * 0.5f; fx /= zoom; fx += screen_center.x() * resolution.x(); + fy -= resolution.y() * 0.5f; fy /= zoom; fy += screen_center.y() * resolution.y(); - float u = (fx-resolution.x()*0.5f) * scale + image_resolution.x()*0.5f; - float v = (fy-resolution.y()*0.5f) * scale + image_resolution.y()*0.5f; + float u = (fx - resolution.x() * 0.5f) * scale + image_resolution.x() * 0.5f; + float v = (fy - resolution.y() * 0.5f) * scale + image_resolution.y() * 0.5f; int srcx = floorf(u); int srcy = floorf(v); @@ -431,7 +442,8 @@ __global__ void overlay_depth_kernel( float depth_scale, Vector2i image_resolution, int fov_axis, - float zoom, Eigen::Vector2f screen_center, + float zoom, + Eigen::Vector2f screen_center, cudaSurfaceObject_t surface ) { uint32_t x = threadIdx.x + blockDim.x * blockIdx.x; @@ -443,14 +455,14 @@ __global__ void overlay_depth_kernel( float scale = image_resolution[fov_axis] / float(resolution[fov_axis]); - float fx = x+0.5f; - float fy = y+0.5f; + float fx = x + 0.5f; + float fy = y + 0.5f; - fx-=resolution.x()*0.5f; fx/=zoom; fx+=screen_center.x() * resolution.x(); - fy-=resolution.y()*0.5f; fy/=zoom; fy+=screen_center.y() * resolution.y(); + fx -= resolution.x() * 0.5f; fx /= zoom; fx += screen_center.x() * resolution.x(); + fy -= resolution.y() * 0.5f; fy /= zoom; fy += screen_center.y() * resolution.y(); - float u = (fx-resolution.x()*0.5f) * scale + image_resolution.x()*0.5f; - float v = (fy-resolution.y()*0.5f) * scale + image_resolution.y()*0.5f; + float u = (fx - resolution.x() * 0.5f) * scale + image_resolution.x() * 0.5f; + float v = (fy - resolution.y() * 0.5f) * scale + image_resolution.y() * 0.5f; int srcx = floorf(u); int srcy = floorf(v); @@ -568,15 +580,42 @@ __global__ void dlss_splat_kernel( surf2Dwrite(color, surface, x * sizeof(float4), y); } +__global__ void depth_splat_kernel( + Vector2i resolution, + float znear, + float zfar, + float* __restrict__ depth_buffer, + cudaSurfaceObject_t surface +) { + uint32_t x = threadIdx.x + blockDim.x * blockIdx.x; + uint32_t y = threadIdx.y + blockDim.y * blockIdx.y; + + if (x >= resolution.x() || y >= resolution.y()) { + return; + } + + uint32_t idx = x + resolution.x() * y; + surf2Dwrite(to_ndc_depth(depth_buffer[idx], znear, zfar), surface, x * sizeof(float), y); +} + +void CudaRenderBufferView::clear(cudaStream_t stream) const { + size_t n_pixels = resolution.prod(); + CUDA_CHECK_THROW(cudaMemsetAsync(frame_buffer, 0, n_pixels * sizeof(Array4f), stream)); + CUDA_CHECK_THROW(cudaMemsetAsync(depth_buffer, 0, n_pixels * sizeof(float), stream)); +} + void CudaRenderBuffer::resize(const Vector2i& res) { m_in_resolution = res; m_frame_buffer.enlarge(res.x() * res.y()); m_depth_buffer.enlarge(res.x() * res.y()); + if (m_depth_target) { + m_depth_target->resize(res, 1); + } m_accumulate_buffer.enlarge(res.x() * res.y()); Vector2i out_res = m_dlss ? m_dlss->out_resolution() : res; auto prev_out_res = out_resolution(); - m_surface_provider->resize(out_res); + m_rgba_target->resize(out_res, 4); if (out_resolution() != prev_out_res) { reset_accumulation(); @@ -584,8 +623,7 @@ void CudaRenderBuffer::resize(const Vector2i& res) { } void CudaRenderBuffer::clear_frame(cudaStream_t stream) { - CUDA_CHECK_THROW(cudaMemsetAsync(m_frame_buffer.data(), 0, m_frame_buffer.bytes(), stream)); - CUDA_CHECK_THROW(cudaMemsetAsync(m_depth_buffer.data(), 0, m_depth_buffer.bytes(), stream)); + view().clear(stream); } void CudaRenderBuffer::accumulate(float exposure, cudaStream_t stream) { @@ -610,10 +648,10 @@ void CudaRenderBuffer::accumulate(float exposure, cudaStream_t stream) { ++m_spp; } -void CudaRenderBuffer::tonemap(float exposure, const Array4f& background_color, EColorSpace output_color_space, cudaStream_t stream) { +void CudaRenderBuffer::tonemap(float exposure, const Array4f& background_color, EColorSpace output_color_space, float znear, float zfar, cudaStream_t stream) { assert(m_dlss || out_resolution() == in_resolution()); - auto res = m_dlss ? in_resolution() : out_resolution(); + auto res = in_resolution(); const dim3 threads = { 16, 8, 1 }; const dim3 blocks = { div_round_up((uint32_t)res.x(), threads.x), div_round_up((uint32_t)res.y(), threads.y), 1 }; tonemap_kernel<<<blocks, threads, 0, stream>>>( @@ -646,6 +684,10 @@ void CudaRenderBuffer::tonemap(float exposure, const Array4f& background_color, const dim3 out_blocks = { div_round_up((uint32_t)out_res.x(), threads.x), div_round_up((uint32_t)out_res.y(), threads.y), 1 }; dlss_splat_kernel<<<out_blocks, threads, 0, stream>>>(out_res, m_dlss->output(), surface()); } + + if (m_depth_target) { + depth_splat_kernel<<<blocks, threads, 0, stream>>>(res, znear, zfar, depth_buffer(), m_depth_target->surface()); + } } void CudaRenderBuffer::overlay_image( @@ -726,10 +768,10 @@ void CudaRenderBuffer::overlay_false_color(Vector2i training_resolution, bool to ); } -void CudaRenderBuffer::enable_dlss(const Eigen::Vector2i& max_out_res) { +void CudaRenderBuffer::enable_dlss(IDlssProvider& dlss_provider, const Eigen::Vector2i& max_out_res) { #ifdef NGP_VULKAN if (!m_dlss || m_dlss->max_out_resolution() != max_out_res) { - m_dlss = dlss_init(max_out_res); + m_dlss = dlss_provider.init_dlss(max_out_res); } if (m_dlss) { diff --git a/src/testbed.cu b/src/testbed.cu index 23735633885daba0ad7550ee7edfe59a93c851de..9cd0282f04d5c9f3ded8a363a586afb81cbd7bd1 100644 --- a/src/testbed.cu +++ b/src/testbed.cu @@ -186,11 +186,30 @@ void Testbed::set_mode(ETestbedMode mode) { m_distortion = {}; m_training_data_available = false; + // Clear device-owned data that might be mode-specific + for (auto&& device : m_devices) { + device.clear(); + } + // Reset paths that might be attached to the chosen mode m_data_path = {}; m_testbed_mode = mode; + // Set various defaults depending on mode + if (m_testbed_mode == ETestbedMode::Nerf) { + if (m_devices.size() > 1) { + m_use_aux_devices = true; + } + + if (m_dlss_provider) { + m_dlss = true; + } + } else { + m_use_aux_devices = false; + m_dlss = false; + } + reset_camera(); } @@ -348,8 +367,8 @@ void Testbed::reset_accumulation(bool due_to_camera_movement, bool immediate_red if (!due_to_camera_movement || !reprojection_available()) { m_windowless_render_surface.reset_accumulation(); - for (auto& tex : m_render_surfaces) { - tex.reset_accumulation(); + for (auto& view : m_views) { + view.render_buffer->reset_accumulation(); } } } @@ -359,8 +378,13 @@ void Testbed::set_visualized_dim(int dim) { reset_accumulation(); } -void Testbed::translate_camera(const Vector3f& rel) { - m_camera.col(3) += m_camera.block<3, 3>(0, 0) * rel * m_bounding_radius; +void Testbed::translate_camera(const Vector3f& rel, const Matrix3f& rot, bool allow_up_down) { + Vector3f movement = rot * rel; + if (!allow_up_down) { + movement -= movement.dot(m_up_dir) * m_up_dir; + } + + m_camera.col(3) += movement; reset_accumulation(true); } @@ -425,15 +449,28 @@ void Testbed::set_camera_to_training_view(int trainview) { m_scale = std::max((old_look_at - view_pos()).dot(view_dir()), 0.1f); m_nerf.render_with_lens_distortion = true; m_nerf.render_lens = m_nerf.training.dataset.metadata[trainview].lens; - m_screen_center = Vector2f::Constant(1.0f) - m_nerf.training.dataset.metadata[0].principal_point; + if (!supports_dlss(m_nerf.render_lens.mode)) { + m_dlss = false; + } + + m_screen_center = Vector2f::Constant(1.0f) - m_nerf.training.dataset.metadata[trainview].principal_point; + m_nerf.training.view = trainview; } void Testbed::reset_camera() { m_fov_axis = 1; - set_fov(50.625f); - m_zoom = 1.f; + m_zoom = 1.0f; m_screen_center = Vector2f::Constant(0.5f); - m_scale = m_testbed_mode == ETestbedMode::Image ? 1.0f : 1.5f; + + if (m_testbed_mode == ETestbedMode::Image) { + // Make image full-screen at the given view distance + m_relative_focal_length = Vector2f::Ones(); + m_scale = 1.0f; + } else { + set_fov(50.625f); + m_scale = 1.5f; + } + m_camera << 1.0f, 0.0f, 0.0f, 0.5f, 0.0f, -1.0f, 0.0f, 0.5f, @@ -630,7 +667,7 @@ void Testbed::imgui() { m_smoothed_camera = m_camera; } } else { - m_pip_render_surface->reset_accumulation(); + m_pip_render_buffer->reset_accumulation(); } } } @@ -639,7 +676,7 @@ void Testbed::imgui() { float w = ImGui::GetContentRegionAvail().x; if (m_camera_path.update_cam_from_path) { m_picture_in_picture_res = 0; - ImGui::Image((ImTextureID)(size_t)m_render_textures.front()->texture(), ImVec2(w, w * 9.0f / 16.0f)); + ImGui::Image((ImTextureID)(size_t)m_rgba_render_textures.front()->texture(), ImVec2(w, w * 9.0f / 16.0f)); } else { m_picture_in_picture_res = (float)std::min((int(w)+31)&(~31), 1920/4); ImGui::Image((ImTextureID)(size_t)m_pip_render_texture->texture(), ImVec2(w, w * 9.0f / 16.0f)); @@ -684,7 +721,7 @@ void Testbed::imgui() { auto elapsed = std::chrono::steady_clock::now() - m_camera_path.render_start_time; - uint32_t progress = m_camera_path.render_frame_idx * m_camera_path.render_settings.spp + m_render_surfaces.front().spp(); + uint32_t progress = m_camera_path.render_frame_idx * m_camera_path.render_settings.spp + m_views.front().render_buffer->spp(); uint32_t goal = m_camera_path.render_settings.n_frames() * m_camera_path.render_settings.spp; auto est_remaining = elapsed * (float)(goal - progress) / std::max(progress, 1u); @@ -718,7 +755,11 @@ void Testbed::imgui() { ImGui::Begin("instant-ngp v" NGP_VERSION); - size_t n_bytes = tcnn::total_n_bytes_allocated() + g_total_n_bytes_allocated + dlss_allocated_bytes(); + size_t n_bytes = tcnn::total_n_bytes_allocated() + g_total_n_bytes_allocated; + if (m_dlss_provider) { + n_bytes += m_dlss_provider->allocated_bytes(); + } + ImGui::Text("Frame: %.2f ms (%.1f FPS); Mem: %s", m_frame_ms.ema_val(), 1000.0f / m_frame_ms.ema_val(), bytes_to_string(n_bytes).c_str()); bool accum_reset = false; @@ -728,41 +769,58 @@ void Testbed::imgui() { if (imgui_colored_button(m_train ? "Stop training" : "Start training", 0.4)) { set_train(!m_train); } + + + ImGui::SameLine(); + if (imgui_colored_button("Reset training", 0.f)) { + reload_network_from_file(); + } + ImGui::SameLine(); - ImGui::Checkbox("Train encoding", &m_train_encoding); + ImGui::Checkbox("encoding", &m_train_encoding); ImGui::SameLine(); - ImGui::Checkbox("Train network", &m_train_network); + ImGui::Checkbox("network", &m_train_network); ImGui::SameLine(); - ImGui::Checkbox("Random levels", &m_max_level_rand_training); + ImGui::Checkbox("rand levels", &m_max_level_rand_training); if (m_testbed_mode == ETestbedMode::Nerf) { - ImGui::Checkbox("Train envmap", &m_nerf.training.train_envmap); + ImGui::Checkbox("envmap", &m_nerf.training.train_envmap); ImGui::SameLine(); - ImGui::Checkbox("Train extrinsics", &m_nerf.training.optimize_extrinsics); + ImGui::Checkbox("extrinsics", &m_nerf.training.optimize_extrinsics); ImGui::SameLine(); - ImGui::Checkbox("Train exposure", &m_nerf.training.optimize_exposure); + ImGui::Checkbox("exposure", &m_nerf.training.optimize_exposure); ImGui::SameLine(); - ImGui::Checkbox("Train distortion", &m_nerf.training.optimize_distortion); + ImGui::Checkbox("distortion", &m_nerf.training.optimize_distortion); + if (m_nerf.training.dataset.n_extra_learnable_dims) { - ImGui::Checkbox("Train latent codes", &m_nerf.training.optimize_extra_dims); + ImGui::SameLine(); + ImGui::Checkbox("latents", &m_nerf.training.optimize_extra_dims); } + + static bool export_extrinsics_in_quat_format = true; - if (imgui_colored_button("Export extrinsics", 0.4f)) { - m_nerf.training.export_camera_extrinsics(m_imgui.extrinsics_path, export_extrinsics_in_quat_format); + static bool extrinsics_have_been_optimized = false; + + if (m_nerf.training.optimize_extrinsics) { + extrinsics_have_been_optimized = true; } - ImGui::SameLine(); - ImGui::PushItemWidth(400.f); - ImGui::InputText("File##Extrinsics file path", m_imgui.extrinsics_path, sizeof(m_imgui.extrinsics_path)); - ImGui::PopItemWidth(); - ImGui::SameLine(); - ImGui::Checkbox("Quaternion format", &export_extrinsics_in_quat_format); - } - if (imgui_colored_button("Reset training", 0.f)) { - reload_network_from_file(); + if (extrinsics_have_been_optimized) { + if (imgui_colored_button("Export extrinsics", 0.4f)) { + m_nerf.training.export_camera_extrinsics(m_imgui.extrinsics_path, export_extrinsics_in_quat_format); + } + + ImGui::SameLine(); + ImGui::Checkbox("as quaternions", &export_extrinsics_in_quat_format); + ImGui::InputText("File##Extrinsics file path", m_imgui.extrinsics_path, sizeof(m_imgui.extrinsics_path)); + } } + + ImGui::PushItemWidth(ImGui::GetWindowWidth() * 0.3f); + ImGui::SliderInt("Batch size", (int*)&m_training_batch_size, 1 << 12, 1 << 22, "%d", ImGuiSliderFlags_Logarithmic); ImGui::SameLine(); ImGui::DragInt("Seed", (int*)&m_seed, 1.0f, 0, std::numeric_limits<int>::max()); - ImGui::SliderInt("Batch size", (int*)&m_training_batch_size, 1 << 12, 1 << 22, "%d", ImGuiSliderFlags_Logarithmic); + ImGui::PopItemWidth(); + m_training_batch_size = next_multiple(m_training_batch_size, batch_size_granularity); if (m_train) { @@ -778,9 +836,11 @@ void Testbed::imgui() { } else { ImGui::Text("Training paused"); } + if (m_testbed_mode == ETestbedMode::Nerf) { ImGui::Text("Rays/batch: %d, Samples/ray: %.2f, Batch size: %d/%d", m_nerf.training.counters_rgb.rays_per_batch, (float)m_nerf.training.counters_rgb.measured_batch_size / (float)m_nerf.training.counters_rgb.rays_per_batch, m_nerf.training.counters_rgb.measured_batch_size, m_nerf.training.counters_rgb.measured_batch_size_before_compaction); } + float elapsed_training = std::chrono::duration<float>(std::chrono::steady_clock::now() - m_training_start_time_point).count(); ImGui::Text("Steps: %d, Loss: %0.6f (%0.2f dB), Elapsed: %.1fs", m_training_step, m_loss_scalar.ema_val(), linear_to_db(m_loss_scalar.ema_val()), elapsed_training); ImGui::PlotLines("loss graph", m_loss_graph.data(), std::min(m_loss_graph_samples, m_loss_graph.size()), (m_loss_graph_samples < m_loss_graph.size()) ? 0 : (m_loss_graph_samples % m_loss_graph.size()), 0, FLT_MAX, FLT_MAX, ImVec2(0, 50.f)); @@ -848,85 +908,75 @@ void Testbed::imgui() { if (!m_training_data_available) { ImGui::EndDisabled(); } if (ImGui::CollapsingHeader("Rendering", ImGuiTreeNodeFlags_DefaultOpen)) { - ImGui::Checkbox("Render", &m_render); - ImGui::SameLine(); - - const auto& render_tex = m_render_surfaces.front(); - std::string spp_string = m_dlss ? std::string{""} : fmt::format("({} spp)", std::max(render_tex.spp(), 1u)); - ImGui::Text(": %.01fms for %dx%d %s", m_render_ms.ema_val(), render_tex.in_resolution().x(), render_tex.in_resolution().y(), spp_string.c_str()); - - if (m_dlss_supported) { - if (!m_single_view) { - ImGui::BeginDisabled(); - m_dlss = false; - } - - if (ImGui::Checkbox("DLSS", &m_dlss)) { - accum_reset = true; + if (!m_hmd) { + if (ImGui::Button("Connect to VR/AR headset")) { + try { + init_vr(); + } catch (const std::runtime_error& e) { + imgui_error_string = e.what(); + ImGui::OpenPopup("Error"); + } } - - if (render_tex.dlss()) { - ImGui::SameLine(); - ImGui::Text("(automatic quality setting: %s)", DlssQualityStrArray[(int)render_tex.dlss()->quality()]); - ImGui::SliderFloat("DLSS sharpening", &m_dlss_sharpening, 0.0f, 1.0f, "%.02f"); + } else if (ImGui::TreeNodeEx("VR/AR settings", ImGuiTreeNodeFlags_DefaultOpen)) { + if (m_devices.size() > 1 && m_testbed_mode == ETestbedMode::Nerf) { + ImGui::Checkbox("Multi-GPU rendering (one per eye)", &m_use_aux_devices); } - if (!m_single_view) { - ImGui::EndDisabled(); + accum_reset |= ImGui::Checkbox("Foveated rendering", &m_foveated_rendering) && !m_dlss; + if (m_foveated_rendering) { + accum_reset |= ImGui::SliderFloat("Maximum foveation", &m_foveated_rendering_max_scaling, 1.0f, 16.0f, "%.01f", ImGuiSliderFlags_Logarithmic | ImGuiSliderFlags_NoRoundToFormat) && !m_dlss; } + ImGui::TreePop(); } - ImGui::Checkbox("Dynamic resolution", &m_dynamic_res); + ImGui::Checkbox("Render", &m_render); + ImGui::SameLine(); + + const auto& render_buffer = m_views.front().render_buffer; + std::string spp_string = m_dlss ? std::string{""} : fmt::format("({} spp)", std::max(render_buffer->spp(), 1u)); + ImGui::Text(": %.01fms for %dx%d %s", m_render_ms.ema_val(), render_buffer->in_resolution().x(), render_buffer->in_resolution().y(), spp_string.c_str()); + + ImGui::SameLine(); if (ImGui::Checkbox("VSync", &m_vsync)) { glfwSwapInterval(m_vsync ? 1 : 0); } - ImGui::SliderFloat("Target FPS", &m_dynamic_res_target_fps, 2.0f, 144.0f, "%.01f", ImGuiSliderFlags_Logarithmic | ImGuiSliderFlags_NoRoundToFormat); - ImGui::SliderInt("Max spp", &m_max_spp, 0, 1024, "%d", ImGuiSliderFlags_Logarithmic | ImGuiSliderFlags_NoRoundToFormat); - if (!m_dynamic_res) { - ImGui::SliderInt("Fixed resolution factor", &m_fixed_res_factor, 8, 64); - } - - if (m_testbed_mode == ETestbedMode::Nerf && m_nerf.training.dataset.has_light_dirs) { - Vector3f light_dir = m_nerf.light_dir.normalized(); - if (ImGui::TreeNodeEx("Light Dir (Polar)", ImGuiTreeNodeFlags_DefaultOpen)) { - float phi = atan2f(m_nerf.light_dir.x(), m_nerf.light_dir.z()); - float theta = asinf(m_nerf.light_dir.y()); - bool spin = ImGui::SliderFloat("Light Dir Theta", &theta, -PI() / 2.0f, PI() / 2.0f); - spin |= ImGui::SliderFloat("Light Dir Phi", &phi, -PI(), PI()); - if (spin) { - float sin_phi, cos_phi; - sincosf(phi, &sin_phi, &cos_phi); - float cos_theta=cosf(theta); - m_nerf.light_dir = {sin_phi * cos_theta,sinf(theta),cos_phi * cos_theta}; - accum_reset = true; - } - ImGui::TreePop(); - } - if (ImGui::TreeNode("Light Dir (Cartesian)")) { - accum_reset |= ImGui::SliderFloat("Light Dir X", ((float*)(&m_nerf.light_dir)) + 0, -1.0f, 1.0f); - accum_reset |= ImGui::SliderFloat("Light Dir Y", ((float*)(&m_nerf.light_dir)) + 1, -1.0f, 1.0f); - accum_reset |= ImGui::SliderFloat("Light Dir Z", ((float*)(&m_nerf.light_dir)) + 2, -1.0f, 1.0f); - ImGui::TreePop(); - } + if (!m_dlss_provider) { ImGui::BeginDisabled(); } + accum_reset |= ImGui::Checkbox("DLSS", &m_dlss); + + if (render_buffer->dlss()) { + ImGui::SameLine(); + ImGui::Text("(%s)", DlssQualityStrArray[(int)render_buffer->dlss()->quality()]); + ImGui::SameLine(); + ImGui::PushItemWidth(ImGui::GetWindowWidth() * 0.3f); + ImGui::SliderFloat("Sharpening", &m_dlss_sharpening, 0.0f, 1.0f, "%.02f"); + ImGui::PopItemWidth(); } - if (m_testbed_mode == ETestbedMode::Nerf && m_nerf.training.dataset.n_extra_learnable_dims) { - accum_reset |= ImGui::SliderInt("training image latent code for inference", (int*)&m_nerf.extra_dim_idx_for_inference, 0, m_nerf.training.dataset.n_images-1); + + if (!m_dlss_provider) { + ImGui::SameLine(); + ImGui::Text("(unsupported on this system)"); + ImGui::EndDisabled(); } - accum_reset |= ImGui::Combo("Render mode", (int*)&m_render_mode, RenderModeStr); - if (m_testbed_mode == ETestbedMode::Nerf) { - accum_reset |= ImGui::Combo("Groundtruth Render mode", (int*)&m_ground_truth_render_mode, GroundTruthRenderModeStr); - accum_reset |= ImGui::SliderFloat("Groundtruth Alpha", &m_ground_truth_alpha, 0.0f, 1.0f, "%.02f", ImGuiSliderFlags_AlwaysClamp); + + ImGui::Checkbox("Dynamic resolution", &m_dynamic_res); + ImGui::SameLine(); + ImGui::PushItemWidth(ImGui::GetWindowWidth() * 0.3f); + if (m_dynamic_res) { + ImGui::SliderFloat("Target FPS", &m_dynamic_res_target_fps, 2.0f, 144.0f, "%.01f", ImGuiSliderFlags_Logarithmic | ImGuiSliderFlags_NoRoundToFormat); + } else { + ImGui::SliderInt("Resolution factor", &m_fixed_res_factor, 8, 64); } - accum_reset |= ImGui::Combo("Color space", (int*)&m_color_space, ColorSpaceStr); + ImGui::PopItemWidth(); + + accum_reset |= ImGui::Combo("Render mode", (int*)&m_render_mode, RenderModeStr); accum_reset |= ImGui::Combo("Tonemap curve", (int*)&m_tonemap_curve, TonemapCurveStr); accum_reset |= ImGui::ColorEdit4("Background", &m_background_color[0]); + if (ImGui::SliderFloat("Exposure", &m_exposure, -5.f, 5.f)) { set_exposure(m_exposure); } - accum_reset |= ImGui::Checkbox("Snap to pixel centers", &m_snap_to_pixel_centers); - float max_diam = (m_aabb.max-m_aabb.min).maxCoeff(); float render_diam = (m_render_aabb.max-m_render_aabb.min).maxCoeff(); float old_render_diam = render_diam; @@ -988,11 +1038,52 @@ void Testbed::imgui() { m_edit_render_aabb = false; } + if (ImGui::TreeNode("Advanced rendering options")) { + ImGui::SliderInt("Max spp", &m_max_spp, 0, 1024, "%d", ImGuiSliderFlags_Logarithmic | ImGuiSliderFlags_NoRoundToFormat); + accum_reset |= ImGui::Checkbox("Render transparency as checkerboard", &m_render_transparency_as_checkerboard); + accum_reset |= ImGui::Combo("Color space", (int*)&m_color_space, ColorSpaceStr); + accum_reset |= ImGui::Checkbox("Snap to pixel centers", &m_snap_to_pixel_centers); + + ImGui::TreePop(); + } + if (m_testbed_mode == ETestbedMode::Nerf && ImGui::TreeNode("NeRF rendering options")) { - accum_reset |= ImGui::Checkbox("Apply lens distortion", &m_nerf.render_with_lens_distortion); + if (m_nerf.training.dataset.has_light_dirs) { + Vector3f light_dir = m_nerf.light_dir.normalized(); + if (ImGui::TreeNodeEx("Light Dir (Polar)", ImGuiTreeNodeFlags_DefaultOpen)) { + float phi = atan2f(m_nerf.light_dir.x(), m_nerf.light_dir.z()); + float theta = asinf(m_nerf.light_dir.y()); + bool spin = ImGui::SliderFloat("Light Dir Theta", &theta, -PI() / 2.0f, PI() / 2.0f); + spin |= ImGui::SliderFloat("Light Dir Phi", &phi, -PI(), PI()); + if (spin) { + float sin_phi, cos_phi; + sincosf(phi, &sin_phi, &cos_phi); + float cos_theta=cosf(theta); + m_nerf.light_dir = {sin_phi * cos_theta,sinf(theta),cos_phi * cos_theta}; + accum_reset = true; + } + ImGui::TreePop(); + } + + if (ImGui::TreeNode("Light Dir (Cartesian)")) { + accum_reset |= ImGui::SliderFloat("Light Dir X", ((float*)(&m_nerf.light_dir)) + 0, -1.0f, 1.0f); + accum_reset |= ImGui::SliderFloat("Light Dir Y", ((float*)(&m_nerf.light_dir)) + 1, -1.0f, 1.0f); + accum_reset |= ImGui::SliderFloat("Light Dir Z", ((float*)(&m_nerf.light_dir)) + 2, -1.0f, 1.0f); + ImGui::TreePop(); + } + } + + if (m_nerf.training.dataset.n_extra_learnable_dims) { + accum_reset |= ImGui::SliderInt("training image latent code for inference", (int*)&m_nerf.extra_dim_idx_for_inference, 0, m_nerf.training.dataset.n_images-1); + } + + accum_reset |= ImGui::Combo("Groundtruth render mode", (int*)&m_ground_truth_render_mode, GroundTruthRenderModeStr); + accum_reset |= ImGui::SliderFloat("Groundtruth alpha", &m_ground_truth_alpha, 0.0f, 1.0f, "%.02f", ImGuiSliderFlags_AlwaysClamp); + + bool lens_changed = ImGui::Checkbox("Apply lens distortion", &m_nerf.render_with_lens_distortion); if (m_nerf.render_with_lens_distortion) { - accum_reset |= ImGui::Combo("Lens mode", (int*)&m_nerf.render_lens.mode, LensModeStr); + lens_changed |= ImGui::Combo("Lens mode", (int*)&m_nerf.render_lens.mode, LensModeStr); if (m_nerf.render_lens.mode == ELensMode::OpenCV) { accum_reset |= ImGui::InputFloat("k1", &m_nerf.render_lens.params[0], 0.f, 0.f, "%.5f"); accum_reset |= ImGui::InputFloat("k2", &m_nerf.render_lens.params[1], 0.f, 0.f, "%.5f"); @@ -1012,6 +1103,12 @@ void Testbed::imgui() { accum_reset |= ImGui::InputFloat("f_theta p3", &m_nerf.render_lens.params[3], 0.f, 0.f, "%.5f"); accum_reset |= ImGui::InputFloat("f_theta p4", &m_nerf.render_lens.params[4], 0.f, 0.f, "%.5f"); } + + if (lens_changed && !supports_dlss(m_nerf.render_lens.mode)) { + m_dlss = false; + } + + accum_reset |= lens_changed; } accum_reset |= ImGui::SliderFloat("Min transmittance", &m_nerf.render_min_transmittance, 0.0f, 1.0f, "%.3f", ImGuiSliderFlags_Logarithmic | ImGuiSliderFlags_NoRoundToFormat); @@ -1032,6 +1129,7 @@ void Testbed::imgui() { } accum_reset |= ImGui::Checkbox("Analytic normals", &m_sdf.analytic_normals); + accum_reset |= ImGui::Checkbox("Floor", &m_floor_enable); accum_reset |= ImGui::SliderFloat("Normals epsilon", &m_sdf.fd_normals_epsilon, 0.00001f, 0.1f, "%.6g", ImGuiSliderFlags_Logarithmic); accum_reset |= ImGui::SliderFloat("Maximum distance", &m_sdf.maximum_distance, 0.00001f, 0.1f, "%.6g", ImGuiSliderFlags_Logarithmic); @@ -1131,24 +1229,29 @@ void Testbed::imgui() { } if (ImGui::CollapsingHeader("Camera", ImGuiTreeNodeFlags_DefaultOpen)) { - if (ImGui::SliderFloat("Aperture size", &m_aperture_size, 0.0f, 0.1f)) { + ImGui::Checkbox("First person controls", &m_fps_camera); + ImGui::SameLine(); + ImGui::Checkbox("Smooth motion", &m_camera_smoothing); + ImGui::SameLine(); + ImGui::Checkbox("Autofocus", &m_autofocus); + ImGui::PushItemWidth(ImGui::GetWindowWidth() * 0.3f); + if (ImGui::SliderFloat("Aperture size", &m_aperture_size, 0.0f, 1.0f, "%.3f", ImGuiSliderFlags_Logarithmic | ImGuiSliderFlags_NoRoundToFormat)) { m_dlss = false; accum_reset = true; } + ImGui::SameLine(); + accum_reset |= ImGui::SliderFloat("Focus depth", &m_slice_plane_z, -m_bounding_radius, m_bounding_radius); + float local_fov = fov(); if (ImGui::SliderFloat("Field of view", &local_fov, 0.0f, 120.0f)) { set_fov(local_fov); accum_reset = true; } + ImGui::SameLine(); accum_reset |= ImGui::SliderFloat("Zoom", &m_zoom, 1.f, 10.f); - if (m_testbed_mode == ETestbedMode::Sdf) { - accum_reset |= ImGui::Checkbox("Floor", &m_floor_enable); - ImGui::SameLine(); - } + ImGui::PopItemWidth(); + - ImGui::Checkbox("First person controls", &m_fps_camera); - ImGui::Checkbox("Smooth camera motion", &m_camera_smoothing); - ImGui::Checkbox("Autofocus", &m_autofocus); if (ImGui::TreeNode("Advanced camera settings")) { accum_reset |= ImGui::SliderFloat2("Screen center", &m_screen_center.x(), 0.f, 1.f); @@ -1218,7 +1321,7 @@ void Testbed::imgui() { } } - if (ImGui::CollapsingHeader("Snapshot")) { + if (ImGui::CollapsingHeader("Snapshot", ImGuiTreeNodeFlags_DefaultOpen)) { ImGui::Text("Snapshot"); ImGui::SameLine(); if (ImGui::Button("Save")) { @@ -1329,7 +1432,7 @@ void Testbed::imgui() { ImGui::Text("%dx%dx%d", res3d.x(), res3d.y(), res3d.z()); float thresh_range = (m_testbed_mode == ETestbedMode::Sdf) ? 0.5f : 10.f; ImGui::SliderFloat("MC density threshold",&m_mesh.thresh, -thresh_range, thresh_range); - ImGui::Combo("Mesh render mode", (int*)&m_mesh_render_mode, "Off\0Vertex Colors\0Vertex Normals\0Face IDs\0"); + ImGui::Combo("Mesh render mode", (int*)&m_mesh_render_mode, "Off\0Vertex Colors\0Vertex Normals\0\0"); ImGui::Checkbox("Unwrap mesh", &m_mesh.unwrap); if (uint32_t tricount = m_mesh.indices.size()/3) { ImGui::InputText("##OBJFile", m_imgui.mesh_path, sizeof(m_imgui.mesh_path)); @@ -1446,11 +1549,11 @@ void Testbed::draw_visualizations(ImDrawList* list, const Matrix<float, 3, 4>& c view2world.setIdentity(); view2world.block<3,4>(0,0) = camera_matrix; - auto focal = calc_focal_length(Vector2i::Ones(), m_fov_axis, m_zoom); + auto focal = calc_focal_length(Vector2i::Ones(), m_relative_focal_length, m_fov_axis, m_zoom); float zscale = 1.0f / focal[m_fov_axis]; float xyscale = (float)m_window_res[m_fov_axis]; - Vector2f screen_center = render_screen_center(); + Vector2f screen_center = render_screen_center(m_screen_center); view2proj << xyscale, 0, (float)m_window_res.x()*screen_center.x()*zscale, 0, 0, xyscale, (float)m_window_res.y()*screen_center.y()*zscale, 0, @@ -1478,12 +1581,12 @@ void Testbed::draw_visualizations(ImDrawList* list, const Matrix<float, 3, 4>& c float flx = focal.x(); float fly = focal.y(); Matrix<float, 4, 4> view2proj_guizmo; - float zfar = 100.f; - float znear = 0.1f; + float zfar = m_ndc_zfar; + float znear = m_ndc_znear; view2proj_guizmo << - fly*2.f/aspect, 0, 0, 0, - 0, -fly*2.f, 0, 0, - 0, 0, (zfar+znear)/(zfar-znear), -(2.f*zfar*znear) / (zfar-znear), + fly * 2.f / aspect, 0, 0, 0, + 0, -fly * 2.f, 0, 0, + 0, 0, (zfar + znear) / (zfar - znear), -(2.f * zfar * znear) / (zfar - znear), 0, 0, 1, 0; ImGuizmo::SetRect(0, 0, io.DisplaySize.x, io.DisplaySize.y); @@ -1502,8 +1605,8 @@ void Testbed::draw_visualizations(ImDrawList* list, const Matrix<float, 3, 4>& c } } - if (m_camera_path.imgui_viz(list, view2proj, world2proj, world2view, focal, aspect)) { - m_pip_render_surface->reset_accumulation(); + if (m_camera_path.imgui_viz(list, view2proj, world2proj, world2view, focal, aspect, m_ndc_znear, m_ndc_zfar)) { + m_pip_render_buffer->reset_accumulation(); } } } @@ -1675,46 +1778,66 @@ bool Testbed::keyboard_event() { if (translate_vec != Vector3f::Zero()) { m_fps_camera = true; - translate_camera(translate_vec); + + // If VR is active, movement that isn't aligned with the current view + // direction is _very_ jarring to the user, so make keyboard-based + // movement aligned with the VR view, even though it is not an intended + // movement mechanism. (Users should use controllers.) + translate_camera(translate_vec, m_hmd && m_hmd->is_visible() ? m_views.front().camera0.block<3, 3>(0, 0) : m_camera.block<3, 3>(0, 0)); } return false; } -void Testbed::mouse_wheel(Vector2f m, float delta) { +void Testbed::mouse_wheel() { + float delta = ImGui::GetIO().MouseWheel; if (delta == 0) { return; } - if (!ImGui::GetIO().WantCaptureMouse) { - float scale_factor = pow(1.1f, -delta); - m_image.pos = (m_image.pos - m) / scale_factor + m; - set_scale(m_scale * scale_factor); + float scale_factor = pow(1.1f, -delta); + set_scale(m_scale * scale_factor); + + // When in image mode, zoom around the hovered point. + if (m_testbed_mode == ETestbedMode::Image) { + Vector2i mouse = {ImGui::GetMousePos().x, ImGui::GetMousePos().y}; + Vector3f offset = get_3d_pos_from_pixel(*m_views.front().render_buffer, mouse) - look_at(); + + // Don't center around infinitely distant points. + if (offset.norm() < 256.0f) { + m_camera.col(3) += offset * (1.0f - scale_factor); + } } reset_accumulation(true); } -void Testbed::mouse_drag(const Vector2f& rel, int button) { +Matrix3f Testbed::rotation_from_angles(const Vector2f& angles) const { Vector3f up = m_up_dir; Vector3f side = m_camera.col(0); + return (AngleAxisf(angles.x(), up) * AngleAxisf(angles.y(), side)).matrix(); +} + +void Testbed::mouse_drag() { + Vector2f rel = Vector2f{ImGui::GetIO().MouseDelta.x, ImGui::GetIO().MouseDelta.y} / (float)m_window_res[m_fov_axis]; + Vector2i mouse = {ImGui::GetMousePos().x, ImGui::GetMousePos().y}; - bool is_left_held = (button & 1) != 0; - bool is_right_held = (button & 2) != 0; + Vector3f up = m_up_dir; + Vector3f side = m_camera.col(0); bool shift = ImGui::GetIO().KeyMods & ImGuiKeyModFlags_Shift; - if (is_left_held) { + + // Left held + if (ImGui::GetIO().MouseDown[0]) { if (shift) { - auto mouse = ImGui::GetMousePos(); - determine_autofocus_target_from_pixel({mouse.x, mouse.y}); + m_autofocus_target = get_3d_pos_from_pixel(*m_views.front().render_buffer, mouse); + m_autofocus = true; + reset_accumulation(); } else { float rot_sensitivity = m_fps_camera ? 0.35f : 1.0f; - Matrix3f rot = - (AngleAxisf(static_cast<float>(-rel.x() * 2 * PI() * rot_sensitivity), up) * // Scroll sideways around up vector - AngleAxisf(static_cast<float>(-rel.y() * 2 * PI() * rot_sensitivity), side)).matrix(); // Scroll around side vector + Matrix3f rot = rotation_from_angles(-rel * 2 * PI() * rot_sensitivity); - m_image.pos += rel; if (m_fps_camera) { m_camera.block<3, 3>(0, 0) = rot * m_camera.block<3, 3>(0, 0); } else { @@ -1729,11 +1852,9 @@ void Testbed::mouse_drag(const Vector2f& rel, int button) { } } - if (is_right_held) { - Matrix3f rot = - (AngleAxisf(static_cast<float>(-rel.x() * 2 * PI()), up) * // Scroll sideways around up vector - AngleAxisf(static_cast<float>(-rel.y() * 2 * PI()), side)).matrix(); // Scroll around side vector - + // Right held + if (ImGui::GetIO().MouseDown[1]) { + Matrix3f rot = rotation_from_angles(-rel * 2 * PI()); if (m_render_mode == ERenderMode::Shade) { m_sun_dir = rot.transpose() * m_sun_dir; } @@ -1742,14 +1863,27 @@ void Testbed::mouse_drag(const Vector2f& rel, int button) { reset_accumulation(); } - bool is_middle_held = (button & 4) != 0; - if (is_middle_held) { - translate_camera({-rel.x(), -rel.y(), 0.0f}); + // Middle pressed + if (ImGui::GetIO().MouseClicked[2]) { + m_drag_depth = get_depth_from_renderbuffer(*m_views.front().render_buffer, mouse.cast<float>().cwiseQuotient(m_window_res.cast<float>())); + } + + // Middle held + if (ImGui::GetIO().MouseDown[2]) { + Vector3f translation = Vector3f{-rel.x(), -rel.y(), 0.0f} / m_zoom; + + // If we have a valid depth value, scale the scene translation by it such that the + // hovered point in 3D space stays under the cursor. + if (m_drag_depth < 256.0f) { + translation *= m_drag_depth / m_relative_focal_length[m_fov_axis]; + } + + translate_camera(translation, m_camera.block<3, 3>(0, 0)); } } -bool Testbed::begin_frame_and_handle_user_input() { - if (glfwWindowShouldClose(m_glfw_window) || ImGui::IsKeyDown(GLFW_KEY_ESCAPE) || ImGui::IsKeyDown(GLFW_KEY_Q)) { +bool Testbed::begin_frame() { + if (glfwWindowShouldClose(m_glfw_window) || ImGui::IsKeyPressed(GLFW_KEY_ESCAPE) || ImGui::IsKeyPressed(GLFW_KEY_Q)) { destroy_window(); return false; } @@ -1769,21 +1903,18 @@ bool Testbed::begin_frame_and_handle_user_input() { ImGui::NewFrame(); ImGuizmo::BeginFrame(); + return true; +} + +void Testbed::handle_user_input() { if (ImGui::IsKeyPressed(GLFW_KEY_TAB) || ImGui::IsKeyPressed(GLFW_KEY_GRAVE_ACCENT)) { m_imgui.enabled = !m_imgui.enabled; } - ImVec2 m = ImGui::GetMousePos(); - int mb = 0; - float mw = 0.f; - ImVec2 relm = {}; - if (!ImGui::IsAnyItemActive() && !ImGuizmo::IsUsing() && !ImGuizmo::IsOver()) { - relm = ImGui::GetIO().MouseDelta; - if (ImGui::GetIO().MouseDown[0]) mb |= 1; - if (ImGui::GetIO().MouseDown[1]) mb |= 2; - if (ImGui::GetIO().MouseDown[2]) mb |= 4; - mw = ImGui::GetIO().MouseWheel; - relm = {relm.x / (float)m_window_res.y(), relm.y / (float)m_window_res.y()}; + // Only respond to mouse inputs when not interacting with ImGui + if (!ImGui::IsAnyItemActive() && !ImGuizmo::IsUsing() && !ImGuizmo::IsOver() && !ImGui::GetIO().WantCaptureMouse) { + mouse_wheel(); + mouse_drag(); } if (m_testbed_mode == ETestbedMode::Nerf && (m_render_ground_truth || m_nerf.training.render_error_overlay)) { @@ -1791,21 +1922,150 @@ bool Testbed::begin_frame_and_handle_user_input() { int bestimage = find_best_training_view(-1); if (bestimage >= 0) { m_nerf.training.view = bestimage; - if (mb == 0) {// snap camera to ground truth view on mouse up + if (ImGui::GetIO().MouseReleased[0]) {// snap camera to ground truth view on mouse up set_camera_to_training_view(m_nerf.training.view); } } } keyboard_event(); - mouse_wheel({m.x / (float)m_window_res.y(), m.y / (float)m_window_res.y()}, mw); - mouse_drag({relm.x, relm.y}, mb); if (m_imgui.enabled) { imgui(); } +} - return true; +Vector3f Testbed::vr_to_world(const Vector3f& pos) const { + return m_camera.block<3, 3>(0, 0) * pos * m_scale + m_camera.col(3); +} + +void Testbed::begin_vr_frame_and_handle_vr_input() { + if (!m_hmd) { + m_vr_frame_info = nullptr; + return; + } + + m_hmd->poll_events(); + if (!m_hmd->must_run_frame_loop()) { + m_vr_frame_info = nullptr; + return; + } + + m_vr_frame_info = m_hmd->begin_frame(); + + const auto& views = m_vr_frame_info->views; + size_t n_views = views.size(); + size_t n_devices = m_devices.size(); + if (n_views > 0) { + set_n_views(n_views); + + Vector2i total_size = Vector2i::Zero(); + for (size_t i = 0; i < n_views; ++i) { + Vector2i view_resolution = {views[i].view.subImage.imageRect.extent.width, views[i].view.subImage.imageRect.extent.height}; + total_size += view_resolution; + + m_views[i].full_resolution = view_resolution; + + // Apply the VR pose relative to the world camera transform. + m_views[i].camera0.block<3, 3>(0, 0) = m_camera.block<3, 3>(0, 0) * views[i].pose.block<3, 3>(0, 0); + m_views[i].camera0.col(3) = vr_to_world(views[i].pose.col(3)); + m_views[i].camera1 = m_views[i].camera0; + + m_views[i].visualized_dimension = m_visualized_dimension; + + const auto& xr_fov = views[i].view.fov; + + // Compute the distance on the image plane (1 unit away from the camera) that an angle of the respective FOV spans + Vector2f rel_focal_length_left_down = 0.5f * fov_to_focal_length(Vector2i::Ones(), Vector2f{360.0f * xr_fov.angleLeft / PI(), 360.0f * xr_fov.angleDown / PI()}); + Vector2f rel_focal_length_right_up = 0.5f * fov_to_focal_length(Vector2i::Ones(), Vector2f{360.0f * xr_fov.angleRight / PI(), 360.0f * xr_fov.angleUp / PI()}); + + // Compute total distance (for X and Y) that is spanned on the image plane. + m_views[i].relative_focal_length = rel_focal_length_right_up - rel_focal_length_left_down; + + // Compute fraction of that distance that is spanned by the right-up part and set screen center accordingly. + Vector2f ratio = rel_focal_length_right_up.cwiseQuotient(m_views[i].relative_focal_length); + m_views[i].screen_center = { 1.0f - ratio.x(), ratio.y() }; + + // Fix up weirdness in the rendering pipeline + m_views[i].relative_focal_length[(m_fov_axis+1)%2] *= (float)view_resolution[(m_fov_axis+1)%2] / (float)view_resolution[m_fov_axis]; + m_views[i].render_buffer->set_hidden_area_mask(views[i].hidden_area_mask); + + // Render each view on a different GPU (if available) + m_views[i].device = m_use_aux_devices ? &m_devices.at(i % m_devices.size()) : &primary_device(); + } + + // Put all the views next to each other, but at half size + glfwSetWindowSize(m_glfw_window, total_size.x() / 2, (total_size.y() / 2) / n_views); + + // VR controller input + const auto& hands = m_vr_frame_info->hands; + m_fps_camera = true; + + // TRANSLATE BY STICK (if not pressing the stick) + if (!hands[0].pressing) { + Vector3f translate_vec = Vector3f{hands[0].thumbstick.x(), 0.0f, hands[0].thumbstick.y()} * m_camera_velocity * m_frame_ms.val() / 1000.0f; + if (translate_vec != Vector3f::Zero()) { + translate_camera(translate_vec, m_views.front().camera0.block<3, 3>(0, 0), false); + } + } + + // TURN BY STICK (if not pressing the stick) + if (!hands[1].pressing) { + auto prev_camera = m_camera; + + // Turn around the up vector (equivalent to x-axis mouse drag) with right joystick left/right + float sensitivity = 0.35f; + m_camera.block<3, 3>(0, 0) = rotation_from_angles({-2.0f * PI() * sensitivity * hands[1].thumbstick.x() * m_frame_ms.val() / 1000.0f, 0.0f}) * m_camera.block<3, 3>(0, 0); + + // Translate camera such that center of rotation was about the current view + m_camera.col(3) += prev_camera.block<3, 3>(0, 0) * views[0].pose.col(3) * m_scale - m_camera.block<3, 3>(0, 0) * views[0].pose.col(3) * m_scale; + } + + // TRANSLATE, SCALE, AND ROTATE BY GRAB + { + bool both_grabbing = hands[0].grabbing && hands[1].grabbing; + float drag_factor = both_grabbing ? 0.5f : 1.0f; + + if (both_grabbing) { + drag_factor = 0.5f; + + Vector3f prev_diff = hands[0].prev_grab_pos - hands[1].prev_grab_pos; + Vector3f diff = hands[0].grab_pos - hands[1].grab_pos; + Vector3f center = 0.5f * (hands[0].grab_pos + hands[1].grab_pos); + + Vector3f center_world = vr_to_world(0.5f * (hands[0].grab_pos + hands[1].grab_pos)); + + // Scale around center position of the two dragging hands. Makes the scaling feel similar to phone pinch-to-zoom + float scale = m_scale * prev_diff.norm() / diff.norm(); + m_camera.col(3) = (view_pos() - center_world) * (scale / m_scale) + center_world; + m_scale = scale; + + // Take rotational component and project it to the nearest rotation about the up vector. + // We don't want to rotate the scene about any other axis. + Vector3f rot = prev_diff.normalized().cross(diff.normalized()); + float rot_radians = std::asin(m_up_dir.dot(rot)); + + auto prev_camera = m_camera; + m_camera.block<3, 3>(0, 0) = AngleAxisf(rot_radians, m_up_dir) * m_camera.block<3, 3>(0, 0); + m_camera.col(3) += prev_camera.block<3, 3>(0, 0) * center * m_scale - m_camera.block<3, 3>(0, 0) * center * m_scale; + } + + for (const auto& hand : hands) { + if (hand.grabbing) { + m_camera.col(3) -= drag_factor * m_camera.block<3, 3>(0, 0) * hand.drag() * m_scale; + } + } + } + + // ERASE OCCUPANCY WHEN PRESSING STICK/TRACKPAD + if (m_testbed_mode == ETestbedMode::Nerf) { + for (const auto& hand : hands) { + if (hand.pressing) { + mark_density_grid_in_sphere_empty(vr_to_world(hand.pose.col(3)), m_scale * 0.05f, m_stream.get()); + } + } + } + } } void Testbed::SecondWindow::draw(GLuint texture) { @@ -1834,11 +2094,164 @@ void Testbed::SecondWindow::draw(GLuint texture) { glfwMakeContextCurrent(old_context); } +void Testbed::init_opengl_shaders() { + static const char* shader_vert = R"(#version 140 + out vec2 UVs; + void main() { + UVs = vec2((gl_VertexID << 1) & 2, gl_VertexID & 2); + gl_Position = vec4(UVs * 2.0 - 1.0, 0.0, 1.0); + })"; + + static const char* shader_frag = R"(#version 140 + in vec2 UVs; + out vec4 frag_color; + uniform sampler2D rgba_texture; + uniform sampler2D depth_texture; + + struct FoveationWarp { + float al, bl, cl; + float am, bm; + float ar, br, cr; + float switch_left, switch_right; + float inv_switch_left, inv_switch_right; + }; + + uniform FoveationWarp warp_x; + uniform FoveationWarp warp_y; + + float unwarp(in FoveationWarp warp, float y) { + y = clamp(y, 0.0, 1.0); + if (y < warp.inv_switch_left) { + return (sqrt(-4.0 * warp.al * warp.cl + 4.0 * warp.al * y + warp.bl * warp.bl) - warp.bl) / (2.0 * warp.al); + } else if (y > warp.inv_switch_right) { + return (sqrt(-4.0 * warp.ar * warp.cr + 4.0 * warp.ar * y + warp.br * warp.br) - warp.br) / (2.0 * warp.ar); + } else { + return (y - warp.bm) / warp.am; + } + } + + vec2 unwarp(in vec2 pos) { + return vec2(unwarp(warp_x, pos.x), unwarp(warp_y, pos.y)); + } + + void main() { + vec2 tex_coords = UVs; + tex_coords.y = 1.0 - tex_coords.y; + tex_coords = unwarp(tex_coords); + frag_color = texture(rgba_texture, tex_coords.xy); + //Uncomment the following line of code to visualize debug the depth buffer for debugging. + // frag_color = vec4(vec3(texture(depth_texture, tex_coords.xy).r), 1.0); + gl_FragDepth = texture(depth_texture, tex_coords.xy).r; + })"; + + GLuint vert = glCreateShader(GL_VERTEX_SHADER); + glShaderSource(vert, 1, &shader_vert, NULL); + glCompileShader(vert); + check_shader(vert, "Blit vertex shader", false); + + GLuint frag = glCreateShader(GL_FRAGMENT_SHADER); + glShaderSource(frag, 1, &shader_frag, NULL); + glCompileShader(frag); + check_shader(frag, "Blit fragment shader", false); + + m_blit_program = glCreateProgram(); + glAttachShader(m_blit_program, vert); + glAttachShader(m_blit_program, frag); + glLinkProgram(m_blit_program); + check_shader(m_blit_program, "Blit shader program", true); + + glDeleteShader(vert); + glDeleteShader(frag); + + glGenVertexArrays(1, &m_blit_vao); +} + +void Testbed::blit_texture(const Foveation& foveation, GLint rgba_texture, GLint rgba_filter_mode, GLint depth_texture, GLint framebuffer, const Vector2i& offset, const Vector2i& resolution) { + if (m_blit_program == 0) { + return; + } + + // Blit image to OpenXR swapchain. + // Note that the OpenXR swapchain is 8bit while the rendering is in a float texture. + // As some XR runtimes do not support float swapchains, we can't render into it directly. + + bool tex = glIsEnabled(GL_TEXTURE_2D); + bool depth = glIsEnabled(GL_DEPTH_TEST); + bool cull = glIsEnabled(GL_CULL_FACE); + + if (!tex) glEnable(GL_TEXTURE_2D); + if (!depth) glEnable(GL_DEPTH_TEST); + if (cull) glDisable(GL_CULL_FACE); + + glDepthFunc(GL_ALWAYS); + glDepthMask(GL_TRUE); + + glBindVertexArray(m_blit_vao); + glUseProgram(m_blit_program); + glUniform1i(glGetUniformLocation(m_blit_program, "rgba_texture"), 0); + glUniform1i(glGetUniformLocation(m_blit_program, "depth_texture"), 1); + + auto bind_warp = [&](const FoveationPiecewiseQuadratic& warp, const std::string& uniform_name) { + glUniform1f(glGetUniformLocation(m_blit_program, (uniform_name + ".al").c_str()), warp.al); + glUniform1f(glGetUniformLocation(m_blit_program, (uniform_name + ".bl").c_str()), warp.bl); + glUniform1f(glGetUniformLocation(m_blit_program, (uniform_name + ".cl").c_str()), warp.cl); + + glUniform1f(glGetUniformLocation(m_blit_program, (uniform_name + ".am").c_str()), warp.am); + glUniform1f(glGetUniformLocation(m_blit_program, (uniform_name + ".bm").c_str()), warp.bm); + + glUniform1f(glGetUniformLocation(m_blit_program, (uniform_name + ".ar").c_str()), warp.ar); + glUniform1f(glGetUniformLocation(m_blit_program, (uniform_name + ".br").c_str()), warp.br); + glUniform1f(glGetUniformLocation(m_blit_program, (uniform_name + ".cr").c_str()), warp.cr); + + glUniform1f(glGetUniformLocation(m_blit_program, (uniform_name + ".switch_left").c_str()), warp.switch_left); + glUniform1f(glGetUniformLocation(m_blit_program, (uniform_name + ".switch_right").c_str()), warp.switch_right); + + glUniform1f(glGetUniformLocation(m_blit_program, (uniform_name + ".inv_switch_left").c_str()), warp.inv_switch_left); + glUniform1f(glGetUniformLocation(m_blit_program, (uniform_name + ".inv_switch_right").c_str()), warp.inv_switch_right); + }; + + bind_warp(foveation.warp_x, "warp_x"); + bind_warp(foveation.warp_y, "warp_y"); + + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, depth_texture); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, rgba_texture); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, rgba_filter_mode); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, rgba_filter_mode); + + glBindFramebuffer(GL_FRAMEBUFFER, framebuffer); + glViewport(offset.x(), offset.y(), resolution.x(), resolution.y()); + + glDrawArrays(GL_TRIANGLES, 0, 3); + + glBindVertexArray(0); + glUseProgram(0); + + glDepthFunc(GL_LESS); + + // restore old state + if (!tex) glDisable(GL_TEXTURE_2D); + if (!depth) glDisable(GL_DEPTH_TEST); + if (cull) glEnable(GL_CULL_FACE); + glBindFramebuffer(GL_FRAMEBUFFER, 0); +} + void Testbed::draw_gui() { // Make sure all the cuda code finished its business here CUDA_CHECK_THROW(cudaDeviceSynchronize()); - if (!m_render_textures.empty()) - m_second_window.draw((GLuint)m_render_textures.front()->texture()); + + if (!m_rgba_render_textures.empty()) { + m_second_window.draw((GLuint)m_rgba_render_textures.front()->texture()); + } + glfwMakeContextCurrent(m_glfw_window); int display_w, display_h; glfwGetFramebufferSize(m_glfw_window, &display_w, &display_h); @@ -1846,56 +2259,42 @@ void Testbed::draw_gui() { glClearColor(0.f, 0.f, 0.f, 0.f); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + glEnable(GL_BLEND); + glBlendEquationSeparate(GL_FUNC_ADD, GL_FUNC_ADD); + glBlendFuncSeparate(GL_ONE, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); - ImDrawList* list = ImGui::GetBackgroundDrawList(); - list->AddCallback([](const ImDrawList*, const ImDrawCmd*) { - glBlendEquationSeparate(GL_FUNC_ADD, GL_FUNC_ADD); - glBlendFuncSeparate(GL_ONE, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); - }, nullptr); - - if (m_single_view) { - list->AddImageQuad((ImTextureID)(size_t)m_render_textures.front()->texture(), ImVec2{0.f, 0.f}, ImVec2{(float)display_w, 0.f}, ImVec2{(float)display_w, (float)display_h}, ImVec2{0.f, (float)display_h}, ImVec2(0, 0), ImVec2(1, 0), ImVec2(1, 1), ImVec2(0, 1)); - } else { - m_dlss = false; + Vector2i extent = Vector2f{(float)display_w / m_n_views.x(), (float)display_h / m_n_views.y()}.cast<int>(); - int i = 0; - for (int y = 0; y < m_n_views.y(); ++y) { - for (int x = 0; x < m_n_views.x(); ++x) { - if (i >= m_render_surfaces.size()) { - break; - } + int i = 0; + for (int y = 0; y < m_n_views.y(); ++y) { + for (int x = 0; x < m_n_views.x(); ++x) { + if (i >= m_views.size()) { + break; + } - Vector2f top_left{x * m_view_size.x(), y * m_view_size.y()}; - - list->AddImageQuad( - (ImTextureID)(size_t)m_render_textures[i]->texture(), - ImVec2{top_left.x(), top_left.y() }, - ImVec2{top_left.x() + (float)m_view_size.x(), top_left.y() }, - ImVec2{top_left.x() + (float)m_view_size.x(), top_left.y() + (float)m_view_size.y()}, - ImVec2{top_left.x(), top_left.y() + (float)m_view_size.y()}, - ImVec2(0, 0), - ImVec2(1, 0), - ImVec2(1, 1), - ImVec2(0, 1) - ); + auto& view = m_views[i]; + Vector2i top_left{x * extent.x(), display_h - (y + 1) * extent.y()}; + blit_texture(view.foveation, m_rgba_render_textures.at(i)->texture(), m_foveated_rendering ? GL_LINEAR : GL_NEAREST, m_depth_render_textures.at(i)->texture(), 0, top_left, extent); - ++i; - } + ++i; } } + glFinish(); + glViewport(0, 0, display_w, display_h); + + ImDrawList* list = ImGui::GetBackgroundDrawList(); list->AddCallback(ImDrawCallback_ResetRenderState, nullptr); auto draw_mesh = [&]() { glClear(GL_DEPTH_BUFFER_BIT); Vector2i res(display_w, display_h); - Vector2f focal_length = calc_focal_length(res, m_fov_axis, m_zoom); - Vector2f screen_center = render_screen_center(); - draw_mesh_gl(m_mesh.verts, m_mesh.vert_normals, m_mesh.vert_colors, m_mesh.indices, res, focal_length, m_smoothed_camera, screen_center, (int)m_mesh_render_mode); + Vector2f focal_length = calc_focal_length(res, m_relative_focal_length, m_fov_axis, m_zoom); + draw_mesh_gl(m_mesh.verts, m_mesh.vert_normals, m_mesh.vert_colors, m_mesh.indices, res, focal_length, m_smoothed_camera, render_screen_center(m_screen_center), (int)m_mesh_render_mode); }; // Visualizations are only meaningful when rendering a single view - if (m_single_view) { + if (m_views.size() == 1) { if (m_mesh.verts.size() != 0 && m_mesh.indices.size() != 0 && m_mesh_render_mode != EMeshRenderMode::Off) { list->AddCallback([](const ImDrawList*, const ImDrawCmd* cmd) { (*(decltype(draw_mesh)*)cmd->UserCallbackData)(); @@ -1955,7 +2354,7 @@ void Testbed::prepare_next_camera_path_frame() { // If we're rendering a video, we'd like to accumulate multiple spp // for motion blur. Hence dump the frame once the target spp has been reached // and only reset _then_. - if (m_render_surfaces.front().spp() == m_camera_path.render_settings.spp) { + if (m_views.front().render_buffer->spp() == m_camera_path.render_settings.spp) { auto tmp_dir = fs::path{"tmp"}; if (!tmp_dir.exists()) { if (!fs::create_directory(tmp_dir)) { @@ -1965,7 +2364,7 @@ void Testbed::prepare_next_camera_path_frame() { } } - Vector2i res = m_render_surfaces.front().out_resolution(); + Vector2i res = m_views.front().render_buffer->out_resolution(); const dim3 threads = { 16, 8, 1 }; const dim3 blocks = { div_round_up((uint32_t)res.x(), threads.x), div_round_up((uint32_t)res.y(), threads.y), 1 }; @@ -1973,7 +2372,7 @@ void Testbed::prepare_next_camera_path_frame() { to_8bit_color_kernel<<<blocks, threads>>>( res, EColorSpace::SRGB, // the GUI always renders in SRGB - m_render_surfaces.front().surface(), + m_views.front().render_buffer->surface(), image_data.data() ); @@ -2047,7 +2446,7 @@ void Testbed::prepare_next_camera_path_frame() { const auto& rs = m_camera_path.render_settings; m_camera_path.play_time = (float)((double)m_camera_path.render_frame_idx / (double)rs.n_frames()); - if (m_render_surfaces.front().spp() == 0) { + if (m_views.front().render_buffer->spp() == 0) { set_camera_from_time(m_camera_path.play_time); apply_camera_smoothing(rs.frame_milliseconds()); @@ -2109,134 +2508,204 @@ void Testbed::train_and_render(bool skip_rendering) { autofocus(); } - if (m_single_view) { - // Should have been created when the window was created. - assert(!m_render_surfaces.empty()); +#ifdef NGP_GUI + if (m_hmd && m_hmd->is_visible()) { + for (auto& view : m_views) { + view.visualized_dimension = m_visualized_dimension; + } - auto& render_buffer = m_render_surfaces.front(); + m_n_views = {m_views.size(), 1}; - { - // Don't count the time being spent allocating buffers and resetting DLSS as part of the frame time. - // Otherwise the dynamic resolution calculations for following frames will be thrown out of whack - // and may even start oscillating. - auto skip_start = std::chrono::steady_clock::now(); - ScopeGuard skip_timing_guard{[&]() { - start += std::chrono::steady_clock::now() - skip_start; - }}; - if (m_dlss) { - render_buffer.enable_dlss(m_window_res); - m_aperture_size = 0.0f; - } else { - render_buffer.disable_dlss(); - } + m_nerf.render_with_lens_distortion = false; + reset_accumulation(true); + } else if (m_single_view) { + set_n_views(1); + m_n_views = {1, 1}; - auto render_res = render_buffer.in_resolution(); - if (render_res.isZero() || (m_train && m_training_step == 0)) { - render_res = m_window_res/16; - } else { - render_res = render_res.cwiseMin(m_window_res); - } + auto& view = m_views.front(); - float render_time_per_fullres_frame = m_render_ms.val() / (float)render_res.x() / (float)render_res.y() * (float)m_window_res.x() * (float)m_window_res.y(); + view.full_resolution = m_window_res; - // Make sure we don't starve training with slow rendering - float factor = std::sqrt(1000.0f / m_dynamic_res_target_fps / render_time_per_fullres_frame); - if (!m_dynamic_res) { - factor = 8.f/(float)m_fixed_res_factor; - } + view.camera0 = m_smoothed_camera; - factor = tcnn::clamp(factor, 1.0f/16.0f, 1.0f); + // Motion blur over the fraction of time that the shutter is open. Interpolate in log-space to preserve rotations. + view.camera1 = m_camera_path.rendering ? log_space_lerp(m_smoothed_camera, m_camera_path.render_frame_end_camera, m_camera_path.render_settings.shutter_fraction) : view.camera0; - if (factor > m_last_render_res_factor * 1.2f || factor < m_last_render_res_factor * 0.8f || factor == 1.0f || !m_dynamic_res) { - render_res = (m_window_res.cast<float>() * factor).cast<int>().cwiseMin(m_window_res).cwiseMax(m_window_res/16); - m_last_render_res_factor = factor; - } + view.visualized_dimension = m_visualized_dimension; + view.relative_focal_length = m_relative_focal_length; + view.screen_center = m_screen_center; + view.render_buffer->set_hidden_area_mask(nullptr); + view.foveation = {}; + view.device = &primary_device(); + } else { + int n_views = n_dimensions_to_visualize()+1; - if (m_camera_path.rendering) { - render_res = m_camera_path.render_settings.resolution; - m_last_render_res_factor = 1.0f; - } + float d = std::sqrt((float)m_window_res.x() * (float)m_window_res.y() / (float)n_views); + + int nx = (int)std::ceil((float)m_window_res.x() / d); + int ny = (int)std::ceil((float)n_views / (float)nx); + + m_n_views = {nx, ny}; + Vector2i view_size = {m_window_res.x() / nx, m_window_res.y() / ny}; + + set_n_views(n_views); - if (render_buffer.dlss()) { - render_res = render_buffer.dlss()->clamp_resolution(render_res); - render_buffer.dlss()->update_feature(render_res, render_buffer.dlss()->is_hdr(), render_buffer.dlss()->sharpen()); + int i = 0; + for (int y = 0; y < ny; ++y) { + for (int x = 0; x < nx; ++x) { + if (i >= n_views) { + break; + } + + m_views[i].full_resolution = view_size; + + m_views[i].camera0 = m_views[i].camera1 = m_smoothed_camera; + m_views[i].visualized_dimension = i-1; + m_views[i].relative_focal_length = m_relative_focal_length; + m_views[i].screen_center = m_screen_center; + m_views[i].render_buffer->set_hidden_area_mask(nullptr); + m_views[i].foveation = {}; + m_views[i].device = &primary_device(); + ++i; } + } + } - render_buffer.resize(render_res); + if (m_dlss) { + m_aperture_size = 0.0f; + if (!supports_dlss(m_nerf.render_lens.mode)) { + m_nerf.render_with_lens_distortion = false; } + } - render_frame( - m_smoothed_camera, - m_camera_path.rendering ? log_space_lerp(m_smoothed_camera, m_camera_path.render_frame_end_camera, m_camera_path.render_settings.shutter_fraction) : m_smoothed_camera, - {0.0f, 0.0f, 0.0f, 1.0f}, - render_buffer - ); + // Update dynamic res and DLSS + { + // Don't count the time being spent allocating buffers and resetting DLSS as part of the frame time. + // Otherwise the dynamic resolution calculations for following frames will be thrown out of whack + // and may even start oscillating. + auto skip_start = std::chrono::steady_clock::now(); + ScopeGuard skip_timing_guard{[&]() { + start += std::chrono::steady_clock::now() - skip_start; + }}; -#ifdef NGP_GUI - m_render_textures.front()->blit_from_cuda_mapping(); + size_t n_pixels = 0, n_pixels_full_res = 0; + for (const auto& view : m_views) { + n_pixels += view.render_buffer->in_resolution().prod(); + n_pixels_full_res += view.full_resolution.prod(); + } - if (m_picture_in_picture_res > 0) { - Vector2i res(m_picture_in_picture_res, m_picture_in_picture_res * 9/16); - m_pip_render_surface->resize(res); - if (m_pip_render_surface->spp() < 8) { - // a bit gross, but let's copy the keyframe's state into the global state in order to not have to plumb through the fov etc to render_frame. - CameraKeyframe backup = copy_camera_to_keyframe(); - CameraKeyframe pip_kf = m_camera_path.eval_camera_path(m_camera_path.play_time); - set_camera_from_keyframe(pip_kf); - render_frame(pip_kf.m(), pip_kf.m(), Eigen::Vector4f::Zero(), *m_pip_render_surface); - set_camera_from_keyframe(backup); + float pixel_ratio = (n_pixels == 0 || (m_train && m_training_step == 0)) ? (1.0f / 256.0f) : ((float)n_pixels / (float)n_pixels_full_res); - m_pip_render_texture->blit_from_cuda_mapping(); - } + float last_factor = std::sqrt(pixel_ratio); + float factor = std::sqrt(pixel_ratio / m_render_ms.val() * 1000.0f / m_dynamic_res_target_fps); + if (!m_dynamic_res) { + factor = 8.f / (float)m_fixed_res_factor; } -#endif - } else { -#ifdef NGP_GUI - // Don't do DLSS when multi-view rendering - m_dlss = false; - m_render_surfaces.front().disable_dlss(); - int n_views = n_dimensions_to_visualize()+1; + factor = tcnn::clamp(factor, 1.0f / 16.0f, 1.0f); - float d = std::sqrt((float)m_window_res.x() * (float)m_window_res.y() / (float)n_views); + for (auto&& view : m_views) { + if (m_dlss) { + view.render_buffer->enable_dlss(*m_dlss_provider, view.full_resolution); + } else { + view.render_buffer->disable_dlss(); + } - int nx = (int)std::ceil((float)m_window_res.x() / d); - int ny = (int)std::ceil((float)n_views / (float)nx); + Vector2i render_res = view.render_buffer->in_resolution(); + Vector2i new_render_res = (view.full_resolution.cast<float>() * factor).cast<int>().cwiseMin(view.full_resolution).cwiseMax(view.full_resolution / 16); - m_n_views = {nx, ny}; - m_view_size = {m_window_res.x() / nx, m_window_res.y() / ny}; + if (m_camera_path.rendering) { + new_render_res = m_camera_path.render_settings.resolution; + } + + float ratio = std::sqrt((float)render_res.prod() / (float)new_render_res.prod()); + if (ratio > 1.2f || ratio < 0.8f || factor == 1.0f || !m_dynamic_res || m_camera_path.rendering) { + render_res = new_render_res; + } + + if (view.render_buffer->dlss()) { + render_res = view.render_buffer->dlss()->clamp_resolution(render_res); + view.render_buffer->dlss()->update_feature(render_res, view.render_buffer->dlss()->is_hdr(), view.render_buffer->dlss()->sharpen()); + } - while (m_render_surfaces.size() > n_views) { - m_render_surfaces.pop_back(); + view.render_buffer->resize(render_res); + + if (m_foveated_rendering) { + float foveation_warped_full_res_diameter = 0.55f; + Vector2f resolution_scale = render_res.cast<float>().cwiseQuotient(view.full_resolution.cast<float>()); + + // Only start foveation when DLSS if off or if DLSS is asked to do more than 1.5x upscaling. + // The reason for the 1.5x threshold is that DLSS can do up to 3x upscaling, at which point a foveation + // factor of 2x = 3.0x/1.5x corresponds exactly to bilinear super sampling, which is helpful in + // suppressing DLSS's artifacts. + float foveation_begin_factor = m_dlss ? 1.5f : 1.0f; + + resolution_scale = (resolution_scale * foveation_begin_factor).cwiseMin(1.0f).cwiseMax(1.0f / m_foveated_rendering_max_scaling); + view.foveation = {resolution_scale, Vector2f::Ones() - view.screen_center, Vector2f::Constant(foveation_warped_full_res_diameter * 0.5f)}; + } else { + view.foveation = {}; + } } + } - m_render_textures.resize(n_views); - while (m_render_surfaces.size() < n_views) { - size_t idx = m_render_surfaces.size(); - m_render_textures[idx] = std::make_shared<GLTexture>(); - m_render_surfaces.emplace_back(m_render_textures[idx]); + // Make sure all in-use auxiliary GPUs have the latest model and bitfield + std::unordered_set<CudaDevice*> devices_in_use; + for (auto& view : m_views) { + if (!view.device || devices_in_use.count(view.device) != 0) { + continue; } - int i = 0; - for (int y = 0; y < ny; ++y) { - for (int x = 0; x < nx; ++x) { - if (i >= n_views) { - return; - } + devices_in_use.insert(view.device); + sync_device(*view.render_buffer, *view.device); + } - m_visualized_dimension = i-1; - m_render_surfaces[i].resize(m_view_size); + { + SyncedMultiStream synced_streams{m_stream.get(), m_views.size()}; + + std::vector<std::future<void>> futures(m_views.size()); + for (size_t i = 0; i < m_views.size(); ++i) { + auto& view = m_views[i]; + futures[i] = view.device->enqueue_task([this, &view, stream=synced_streams.get(i)]() { + auto device_guard = use_device(stream, *view.render_buffer, *view.device); + render_frame_main(*view.device, view.camera0, view.camera1, view.screen_center, view.relative_focal_length, {0.0f, 0.0f, 0.0f, 1.0f}, view.foveation, view.visualized_dimension); + }); + } - render_frame(m_smoothed_camera, m_smoothed_camera, Eigen::Vector4f::Zero(), m_render_surfaces[i]); + for (size_t i = 0; i < m_views.size(); ++i) { + auto& view = m_views[i]; - m_render_textures[i]->blit_from_cuda_mapping(); - ++i; + if (futures[i].valid()) { + futures[i].get(); } + + render_frame_epilogue(synced_streams.get(i), view.camera0, view.prev_camera, view.screen_center, view.relative_focal_length, view.foveation, view.prev_foveation, *view.render_buffer, true); + view.prev_camera = view.camera0; + view.prev_foveation = view.foveation; } -#else - throw std::runtime_error{"Multi-view rendering is only supported when compiling with NGP_GUI."}; -#endif } + + for (size_t i = 0; i < m_views.size(); ++i) { + m_rgba_render_textures.at(i)->blit_from_cuda_mapping(); + m_depth_render_textures.at(i)->blit_from_cuda_mapping(); + } + + if (m_picture_in_picture_res > 0) { + Vector2i res(m_picture_in_picture_res, m_picture_in_picture_res * 9/16); + m_pip_render_buffer->resize(res); + if (m_pip_render_buffer->spp() < 8) { + // a bit gross, but let's copy the keyframe's state into the global state in order to not have to plumb through the fov etc to render_frame. + CameraKeyframe backup = copy_camera_to_keyframe(); + CameraKeyframe pip_kf = m_camera_path.eval_camera_path(m_camera_path.play_time); + set_camera_from_keyframe(pip_kf); + render_frame(m_stream.get(), pip_kf.m(), pip_kf.m(), pip_kf.m(), m_screen_center, m_relative_focal_length, Eigen::Vector4f::Zero(), {}, {}, m_visualized_dimension, *m_pip_render_buffer); + set_camera_from_keyframe(backup); + + m_pip_render_texture->blit_from_cuda_mapping(); + } + } +#endif + + CUDA_CHECK_THROW(cudaStreamSynchronize(m_stream.get())); } @@ -2262,7 +2731,6 @@ void Testbed::create_second_window() { win_x = 0x40000000; win_y = 0x40000000; static const char* copy_shader_vert = "\ - layout (location = 0)\n\ in vec2 vertPos_data;\n\ out vec2 texCoords;\n\ void main(){\n\ @@ -2300,7 +2768,8 @@ void Testbed::create_second_window() { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, -1.0f, - -1.0f, -1.0f}; + -1.0f, -1.0f + }; glBindBuffer(GL_ARRAY_BUFFER, m_second_window.vbo); glBufferData(GL_ARRAY_BUFFER, sizeof(fsquadVerts), fsquadVerts, GL_STATIC_DRAW); glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 2 * sizeof(float), (void *)0); @@ -2308,6 +2777,84 @@ void Testbed::create_second_window() { glBindBuffer(GL_ARRAY_BUFFER, 0); glBindVertexArray(0); } + +void Testbed::init_vr() { + try { + if (!m_glfw_window) { + throw std::runtime_error{"`init_window` must be called before `init_vr`"}; + } + +#if defined(XR_USE_PLATFORM_WIN32) + m_hmd = std::make_unique<OpenXRHMD>(wglGetCurrentDC(), glfwGetWGLContext(m_glfw_window)); +#elif defined(XR_USE_PLATFORM_XLIB) + Display* xDisplay = glfwGetX11Display(); + GLXContext glxContext = glfwGetGLXContext(m_glfw_window); + + int glxFBConfigXID = 0; + glXQueryContext(xDisplay, glxContext, GLX_FBCONFIG_ID, &glxFBConfigXID); + int attributes[3] = { GLX_FBCONFIG_ID, glxFBConfigXID, 0 }; + int nelements = 1; + GLXFBConfig* pglxFBConfig = glXChooseFBConfig(xDisplay, 0, attributes, &nelements); + if (nelements != 1 || !pglxFBConfig) { + throw std::runtime_error{"init_vr(): Couldn't obtain GLXFBConfig"}; + } + + GLXFBConfig glxFBConfig = *pglxFBConfig; + + XVisualInfo* visualInfo = glXGetVisualFromFBConfig(xDisplay, glxFBConfig); + if (!visualInfo) { + throw std::runtime_error{"init_vr(): Couldn't obtain XVisualInfo"}; + } + + m_hmd = std::make_unique<OpenXRHMD>(xDisplay, visualInfo->visualid, glxFBConfig, glXGetCurrentDrawable(), glxContext); +#elif defined(XR_USE_PLATFORM_WAYLAND) + m_hmd = std::make_unique<OpenXRHMD>(glfwGetWaylandDisplay()); +#endif + + // DLSS + sharpening is instrumental in getting VR to look good. + if (m_dlss_provider) { + m_dlss = true; + m_foveated_rendering = true; + + // VERY aggressive performance settings (detriment to quality) + // to allow maintaining VR-adequate frame rates. + m_nerf.render_min_transmittance = 0.2f; + } + + // If multiple GPUs are available, shoot for 60 fps in VR. + // Otherwise, it wouldn't be realistic to expect more than 30. + m_dynamic_res_target_fps = m_devices.size() > 1 ? 60 : 30; + + // Many VR runtimes perform optical flow for automatic reprojection / motion smoothing. + // This breaks down for solid-color background, sometimes leading to artifacts. Hence: + // set background color to transparent and, in spherical_checkerboard_kernel(...), + // blend a checkerboard. If the user desires a solid background nonetheless, they can + // set the background color to have an alpha value of 1.0 manually via the GUI or via Python. + m_background_color = {0.0f, 0.0f, 0.0f, 0.0f}; + m_render_transparency_as_checkerboard = true; + } catch (const std::runtime_error& e) { + if (std::string{e.what()}.find("XR_ERROR_FORM_FACTOR_UNAVAILABLE") != std::string::npos) { + throw std::runtime_error{"Could not initialize VR. Ensure that SteamVR, OculusVR, or any other OpenXR-compatible runtime is running. Also set it as the active OpenXR runtime."}; + } else { + throw std::runtime_error{fmt::format("Could not initialize VR: {}", e.what())}; + } + } +} + +void Testbed::set_n_views(size_t n_views) { + while (m_views.size() > n_views) { + m_views.pop_back(); + } + + m_rgba_render_textures.resize(n_views); + m_depth_render_textures.resize(n_views); + while (m_views.size() < n_views) { + size_t idx = m_views.size(); + m_rgba_render_textures[idx] = std::make_shared<GLTexture>(); + m_depth_render_textures[idx] = std::make_shared<GLTexture>(); + m_views.emplace_back(View{std::make_shared<CudaRenderBuffer>(m_rgba_render_textures[idx], m_depth_render_textures[idx])}); + } +}; #endif //NGP_GUI void Testbed::init_window(int resw, int resh, bool hidden, bool second_window) { @@ -2322,23 +2869,22 @@ void Testbed::init_window(int resw, int resh, bool hidden, bool second_window) { } #ifdef NGP_VULKAN - try { - vulkan_and_ngx_init(); - m_dlss_supported = true; - if (m_testbed_mode == ETestbedMode::Nerf) { - m_dlss = true; + // Only try to initialize DLSS (Vulkan+NGX) if the + // GPU is sufficiently new. Older GPUs don't support + // DLSS, so it is preferable to not make a futile + // attempt and emit a warning that confuses users. + if (primary_device().compute_capability() >= 70) { + try { + m_dlss_provider = init_vulkan_and_ngx(); + if (m_testbed_mode == ETestbedMode::Nerf) { + m_dlss = true; + } + } catch (const std::runtime_error& e) { + tlog::warning() << "Could not initialize Vulkan and NGX. DLSS not supported. (" << e.what() << ")"; } - } catch (const std::runtime_error& e) { - tlog::warning() << "Could not initialize Vulkan and NGX. DLSS not supported. (" << e.what() << ")"; } -#else - m_dlss_supported = false; #endif - glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3); - glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3); - glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); - glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GLFW_TRUE); glfwWindowHint(GLFW_VISIBLE, hidden ? GLFW_FALSE : GLFW_TRUE); std::string title = "Instant Neural Graphics Primitives"; m_glfw_window = glfwCreateWindow(m_window_res.x(), m_window_res.y(), title.c_str(), NULL, NULL); @@ -2358,6 +2904,17 @@ void Testbed::init_window(int resw, int resh, bool hidden, bool second_window) { #endif glfwSwapInterval(0); // Disable vsync + GLint gl_version_minor, gl_version_major; + glGetIntegerv(GL_MINOR_VERSION, &gl_version_minor); + glGetIntegerv(GL_MAJOR_VERSION, &gl_version_major); + + if (gl_version_major < 3 || (gl_version_major == 3 && gl_version_minor < 1)) { + throw std::runtime_error{fmt::format("Unsupported OpenGL version {}.{}. instant-ngp requires at least OpenGL 3.1", gl_version_major, gl_version_minor)}; + } + + tlog::success() << "Initialized OpenGL version " << glGetString(GL_VERSION); + + glfwSetWindowUserPointer(m_glfw_window, this); glfwSetDropCallback(m_glfw_window, [](GLFWwindow* window, int count, const char** paths) { Testbed* testbed = (Testbed*)glfwGetWindowUserPointer(window); @@ -2424,22 +2981,26 @@ void Testbed::init_window(int resw, int resh, bool hidden, bool second_window) { io.ConfigInputTrickleEventQueue = false; // new ImGui event handling seems to make camera controls laggy if this is true. ImGui::StyleColorsDark(); ImGui_ImplGlfw_InitForOpenGL(m_glfw_window, true); - ImGui_ImplOpenGL3_Init("#version 330 core"); + ImGui_ImplOpenGL3_Init("#version 140"); ImGui::GetStyle().ScaleAllSizes(xscale); ImFontConfig font_cfg; font_cfg.SizePixels = 13.0f * xscale; io.Fonts->AddFontDefault(&font_cfg); + init_opengl_shaders(); + // Make sure there's at least one usable render texture - m_render_textures = { std::make_shared<GLTexture>() }; + m_rgba_render_textures = { std::make_shared<GLTexture>() }; + m_depth_render_textures = { std::make_shared<GLTexture>() }; - m_render_surfaces.clear(); - m_render_surfaces.emplace_back(m_render_textures.front()); - m_render_surfaces.front().resize(m_window_res); + m_views.clear(); + m_views.emplace_back(View{std::make_shared<CudaRenderBuffer>(m_rgba_render_textures.front(), m_depth_render_textures.front())}); + m_views.front().full_resolution = m_window_res; + m_views.front().render_buffer->resize(m_views.front().full_resolution); m_pip_render_texture = std::make_shared<GLTexture>(); - m_pip_render_surface = std::make_unique<CudaRenderBuffer>(m_pip_render_texture); + m_pip_render_buffer = std::make_unique<CudaRenderBuffer>(m_pip_render_texture); m_render_window = true; @@ -2457,15 +3018,18 @@ void Testbed::destroy_window() { throw std::runtime_error{"Window must be initialized to be destroyed."}; } - m_render_surfaces.clear(); - m_render_textures.clear(); + m_hmd.reset(); - m_pip_render_surface.reset(); + m_views.clear(); + m_rgba_render_textures.clear(); + m_depth_render_textures.clear(); + + m_pip_render_buffer.reset(); m_pip_render_texture.reset(); #ifdef NGP_VULKAN - m_dlss_supported = m_dlss = false; - vulkan_and_ngx_destroy(); + m_dlss = false; + m_dlss_provider.reset(); #endif ImGui_ImplOpenGL3_Shutdown(); @@ -2474,6 +3038,9 @@ void Testbed::destroy_window() { glfwDestroyWindow(m_glfw_window); glfwTerminate(); + m_blit_program = 0; + m_blit_vao = 0; + m_glfw_window = nullptr; m_render_window = false; #endif //NGP_GUI @@ -2482,9 +3049,12 @@ void Testbed::destroy_window() { bool Testbed::frame() { #ifdef NGP_GUI if (m_render_window) { - if (!begin_frame_and_handle_user_input()) { + if (!begin_frame()) { return false; } + + begin_vr_frame_and_handle_vr_input(); + handle_user_input(); } #endif @@ -2496,7 +3066,7 @@ bool Testbed::frame() { } bool skip_rendering = m_render_skip_due_to_lack_of_camera_movement_counter++ != 0; - if (!m_dlss && m_max_spp > 0 && !m_render_surfaces.empty() && m_render_surfaces.front().spp() >= m_max_spp) { + if (!m_dlss && m_max_spp > 0 && !m_views.empty() && m_views.front().render_buffer->spp() >= m_max_spp) { skip_rendering = true; if (!m_train) { std::this_thread::sleep_for(1ms); @@ -2508,6 +3078,12 @@ bool Testbed::frame() { skip_rendering = false; } +#ifdef NGP_GUI + if (m_hmd && m_hmd->is_visible()) { + skip_rendering = false; + } +#endif + if (!skip_rendering || (std::chrono::steady_clock::now() - m_last_gui_draw_time_point) > 25ms) { redraw_gui_next_frame(); } @@ -2540,6 +3116,32 @@ bool Testbed::frame() { ImGui::EndFrame(); } + + if (m_vr_frame_info) { + // If HMD is visible to the user, splat rendered images to the HMD + if (m_hmd->is_visible()) { + size_t n_views = std::min(m_views.size(), m_vr_frame_info->views.size()); + + // Blit textures to the OpenXR-owned framebuffers (each corresponding to one eye) + for (size_t i = 0; i < n_views; ++i) { + const auto& vr_view = m_vr_frame_info->views.at(i); + + Vector2i resolution = { + vr_view.view.subImage.imageRect.extent.width, + vr_view.view.subImage.imageRect.extent.height, + }; + + blit_texture(m_views.at(i).foveation, m_rgba_render_textures.at(i)->texture(), GL_LINEAR, m_depth_render_textures.at(i)->texture(), vr_view.framebuffer, Vector2i::Zero(), resolution); + } + + glFinish(); + } + + // Far and near planes are intentionally reversed, because we map depth inversely + // to z. I.e. a window-space depth of 1 refers to the near plane and a depth of 0 + // to the far plane. This results in much better numeric precision. + m_hmd->end_frame(m_vr_frame_info, m_ndc_zfar / m_scale, m_ndc_znear / m_scale); + } #endif return true; @@ -2579,8 +3181,10 @@ void Testbed::set_camera_from_keyframe(const CameraKeyframe& k) { } void Testbed::set_camera_from_time(float t) { - if (m_camera_path.keyframes.empty()) + if (m_camera_path.keyframes.empty()) { return; + } + set_camera_from_keyframe(m_camera_path.eval_camera_path(t)); } @@ -2711,6 +3315,8 @@ void Testbed::reset_network(bool clear_density_grid) { if (clear_density_grid) { m_nerf.density_grid.memset(0); m_nerf.density_grid_bitfield.memset(0); + + set_all_devices_dirty(); } m_loss_graph_samples = 0; @@ -2723,6 +3329,13 @@ void Testbed::reset_network(bool clear_density_grid) { json& optimizer_config = config["optimizer"]; json& network_config = config["network"]; + // If the network config is incomplete, avoid doing further work. + /* + if (config.is_null() || encoding_config.is_null() || loss_config.is_null() || optimizer_config.is_null() || network_config.is_null()) { + return; + } + */ + auto dims = network_dims(); if (m_testbed_mode == ETestbedMode::Nerf) { @@ -2798,16 +3411,22 @@ void Testbed::reset_network(bool clear_density_grid) { uint32_t n_dir_dims = 3; uint32_t n_extra_dims = m_nerf.training.dataset.n_extra_dims(); - m_network = m_nerf_network = std::make_shared<NerfNetwork<precision_t>>( - dims.n_pos, - n_dir_dims, - n_extra_dims, - dims.n_pos + 1, // The offset of 1 comes from the dt member variable of NerfCoordinate. HACKY - encoding_config, - dir_encoding_config, - network_config, - rgb_network_config - ); + + // Instantiate an additional model for each auxiliary GPU + for (auto& device : m_devices) { + device.set_nerf_network(std::make_shared<NerfNetwork<precision_t>>( + dims.n_pos, + n_dir_dims, + n_extra_dims, + dims.n_pos + 1, // The offset of 1 comes from the dt member variable of NerfCoordinate. HACKY + encoding_config, + dir_encoding_config, + network_config, + rgb_network_config + )); + } + + m_network = m_nerf_network = primary_device().nerf_network(); m_encoding = m_nerf_network->encoding(); n_encoding_params = m_encoding->n_params() + m_nerf_network->dir_encoding()->n_params(); @@ -2873,7 +3492,12 @@ void Testbed::reset_network(bool clear_density_grid) { } } - m_network = std::make_shared<NetworkWithInputEncoding<precision_t>>(m_encoding, dims.n_output, network_config); + for (auto& device : m_devices) { + device.set_network(std::make_shared<NetworkWithInputEncoding<precision_t>>(m_encoding, dims.n_output, network_config)); + } + + m_network = primary_device().network(); + n_encoding_params = m_encoding->n_params(); tlog::info() @@ -2910,6 +3534,7 @@ void Testbed::reset_network(bool clear_density_grid) { } } + set_all_devices_dirty(); } Testbed::Testbed(ETestbedMode mode) { @@ -2955,6 +3580,28 @@ Testbed::Testbed(ETestbedMode mode) { tlog::warning() << "This program was compiled for >=" << MIN_GPU_ARCH << " and may thus behave unexpectedly."; } + m_devices.emplace_back(active_device, true); + + // Multi-GPU is only supported in NeRF mode for now + int n_devices = cuda_device_count(); + for (int i = 0; i < n_devices; ++i) { + if (i == active_device) { + continue; + } + + if (cuda_compute_capability(i) >= MIN_GPU_ARCH) { + m_devices.emplace_back(i, false); + } + } + + if (m_devices.size() > 1) { + tlog::success() << "Detected auxiliary GPUs:"; + for (size_t i = 1; i < m_devices.size(); ++i) { + const auto& device = m_devices[i]; + tlog::success() << " #" << device.id() << ": " << device.name() << " [" << device.compute_capability() << "]"; + } + } + m_network_config = { {"loss", { {"otype", "L2"} @@ -3032,6 +3679,8 @@ void Testbed::train(uint32_t batch_size) { throw std::runtime_error{"Cannot train without a mode."}; } + set_all_devices_dirty(); + // If we don't have a trainer, as can happen when having loaded training data or changed modes without having // explicitly loaded a new neural network. if (!m_trainer) { @@ -3097,18 +3746,16 @@ void Testbed::train(uint32_t batch_size) { } } -Vector2f Testbed::calc_focal_length(const Vector2i& resolution, int fov_axis, float zoom) const { - return m_relative_focal_length * resolution[fov_axis] * zoom; +Vector2f Testbed::calc_focal_length(const Vector2i& resolution, const Vector2f& relative_focal_length, int fov_axis, float zoom) const { + return relative_focal_length * resolution[fov_axis] * zoom; } -Vector2f Testbed::render_screen_center() const { - // see pixel_to_ray for how screen center is used; 0.5,0.5 is 'normal'. we flip so that it becomes the point in the original image we want to center on. - auto screen_center = m_screen_center; - return {(0.5f-screen_center.x())*m_zoom + 0.5f, (0.5-screen_center.y())*m_zoom + 0.5f}; +Vector2f Testbed::render_screen_center(const Vector2f& screen_center) const { + // see pixel_to_ray for how screen center is used; 0.5, 0.5 is 'normal'. we flip so that it becomes the point in the original image we want to center on. + return (Vector2f::Constant(0.5f) - screen_center) * m_zoom + Vector2f::Constant(0.5f); } __global__ void dlss_prep_kernel( - ETestbedMode mode, Vector2i resolution, uint32_t sample_index, Vector2f focal_length, @@ -3116,18 +3763,16 @@ __global__ void dlss_prep_kernel( Vector3f parallax_shift, bool snap_to_pixel_centers, float* depth_buffer, + const float znear, + const float zfar, Matrix<float, 3, 4> camera, Matrix<float, 3, 4> prev_camera, cudaSurfaceObject_t depth_surface, cudaSurfaceObject_t mvec_surface, cudaSurfaceObject_t exposure_surface, - Lens lens, - const float view_dist, - const float prev_view_dist, - const Vector2f image_pos, - const Vector2f prev_image_pos, - const Vector2i image_resolution, - const Vector2i quilting_dims + Foveation foveation, + Foveation prev_foveation, + Lens lens ) { uint32_t x = threadIdx.x + blockDim.x * blockIdx.x; uint32_t y = threadIdx.y + blockDim.y * blockIdx.y; @@ -3141,26 +3786,11 @@ __global__ void dlss_prep_kernel( uint32_t x_orig = x; uint32_t y_orig = y; - if (quilting_dims != Vector2i::Ones()) { - apply_quilting(&x, &y, resolution, parallax_shift, quilting_dims); - } - const float depth = depth_buffer[idx]; - Vector2f mvec = mode == ETestbedMode::Image ? motion_vector_2d( - sample_index, - {x, y}, - resolution.cwiseQuotient(quilting_dims), - image_resolution, - screen_center, - view_dist, - prev_view_dist, - image_pos, - prev_image_pos, - snap_to_pixel_centers - ) : motion_vector_3d( + Vector2f mvec = motion_vector( sample_index, {x, y}, - resolution.cwiseQuotient(quilting_dims), + resolution, focal_length, camera, prev_camera, @@ -3168,13 +3798,16 @@ __global__ void dlss_prep_kernel( parallax_shift, snap_to_pixel_centers, depth, + foveation, + prev_foveation, lens ); surf2Dwrite(make_float2(mvec.x(), mvec.y()), mvec_surface, x_orig * sizeof(float2), y_orig); - // Scale depth buffer to be guaranteed in [0,1]. - surf2Dwrite(std::min(std::max(depth / 128.0f, 0.0f), 1.0f), depth_surface, x_orig * sizeof(float), y_orig); + // DLSS was trained on games, which presumably used standard normalized device coordinates (ndc) + // depth buffers. So: convert depth to NDC with reasonable near- and far planes. + surf2Dwrite(to_ndc_depth(depth, znear, zfar), depth_surface, x_orig * sizeof(float), y_orig); // First thread write an exposure factor of 1. Since DLSS will run on tonemapped data, // exposure is assumed to already have been applied to DLSS' inputs. @@ -3183,22 +3816,202 @@ __global__ void dlss_prep_kernel( } } -void Testbed::render_frame(const Matrix<float, 3, 4>& camera_matrix0, const Matrix<float, 3, 4>& camera_matrix1, const Vector4f& nerf_rolling_shutter, CudaRenderBuffer& render_buffer, bool to_srgb) { - Vector2i max_res = m_window_res.cwiseMax(render_buffer.in_resolution()); +__global__ void spherical_checkerboard_kernel( + Vector2i resolution, + Vector2f focal_length, + Matrix<float, 3, 4> camera, + Vector2f screen_center, + Vector3f parallax_shift, + Foveation foveation, + Lens lens, + Array4f* frame_buffer +) { + uint32_t x = threadIdx.x + blockDim.x * blockIdx.x; + uint32_t y = threadIdx.y + blockDim.y * blockIdx.y; + + if (x >= resolution.x() || y >= resolution.y()) { + return; + } + + Ray ray = pixel_to_ray( + 0, + {x, y}, + resolution, + focal_length, + camera, + screen_center, + parallax_shift, + false, + 0.0f, + 1.0f, + 0.0f, + foveation, + {}, // No need for hidden area mask + lens + ); + + // Blend with checkerboard to break up reprojection weirdness in some VR runtimes + host_device_swap(ray.d.z(), ray.d.y()); + Vector2f spherical = dir_to_spherical(ray.d.normalized()) * 32.0f / PI(); + const Array4f dark_gray = {0.5f, 0.5f, 0.5f, 1.0f}; + const Array4f light_gray = {0.55f, 0.55f, 0.55f, 1.0f}; + Array4f checker = fabsf(fmodf(floorf(spherical.x()) + floorf(spherical.y()), 2.0f)) < 0.5f ? dark_gray : light_gray; + + uint32_t idx = x + resolution.x() * y; + frame_buffer[idx] += (1.0f - frame_buffer[idx].w()) * checker; +} + +__global__ void vr_overlay_hands_kernel( + Vector2i resolution, + Vector2f focal_length, + Matrix<float, 3, 4> camera, + Vector2f screen_center, + Vector3f parallax_shift, + Foveation foveation, + Lens lens, + Vector3f left_hand_pos, + float left_grab_strength, + Array4f left_hand_color, + Vector3f right_hand_pos, + float right_grab_strength, + Array4f right_hand_color, + float hand_radius, + EColorSpace output_color_space, + cudaSurfaceObject_t surface + // TODO: overwrite depth buffer +) { + uint32_t x = threadIdx.x + blockDim.x * blockIdx.x; + uint32_t y = threadIdx.y + blockDim.y * blockIdx.y; + + if (x >= resolution.x() || y >= resolution.y()) { + return; + } + + Ray ray = pixel_to_ray( + 0, + {x, y}, + resolution, + focal_length, + camera, + screen_center, + parallax_shift, + false, + 0.0f, + 1.0f, + 0.0f, + foveation, + {}, // No need for hidden area mask + lens + ); + + Array4f color = Array4f::Zero(); + auto composit_hand = [&](Vector3f hand_pos, float grab_strength, Array4f hand_color) { + // Don't render the hand indicator if it's behind the ray origin. + if (ray.d.dot(hand_pos - ray.o) < 0.0f) { + return; + } - render_buffer.clear_frame(m_stream.get()); + float distance = ray.distance_to(hand_pos); - Vector2f focal_length = calc_focal_length(render_buffer.in_resolution(), m_fov_axis, m_zoom); - Vector2f screen_center = render_screen_center(); + Array4f base_color = Array4f::Zero(); + const Array4f border_color = {0.4f, 0.4f, 0.4f, 0.4f}; + + // Divide hand radius into an inner part (4/5ths) and a border (1/5th). + float radius = hand_radius * 0.8f; + float border_width = hand_radius * 0.2f; + + // When grabbing, shrink the inner part as a visual indicator. + radius *= 0.5f + 0.5f * (1.0f - grab_strength); + + if (distance < radius) { + base_color = hand_color; + } else if (distance < radius + border_width) { + base_color = border_color; + } else { + return; + } + + // Make hand color opaque when grabbing. + base_color.w() = grab_strength + (1.0f - grab_strength) * base_color.w(); + color += base_color * (1.0f - color.w()); + }; + + if (ray.d.dot(left_hand_pos - ray.o) < ray.d.dot(right_hand_pos - ray.o)) { + composit_hand(left_hand_pos, left_grab_strength, left_hand_color); + composit_hand(right_hand_pos, right_grab_strength, right_hand_color); + } else { + composit_hand(right_hand_pos, right_grab_strength, right_hand_color); + composit_hand(left_hand_pos, left_grab_strength, left_hand_color); + } + + // Blend with existing color of pixel + Array4f prev_color; + surf2Dread((float4*)&prev_color, surface, x * sizeof(float4), y); + if (output_color_space == EColorSpace::SRGB) { + prev_color.head<3>() = srgb_to_linear(prev_color.head<3>()); + } + + color += (1.0f - color.w()) * prev_color; + + if (output_color_space == EColorSpace::SRGB) { + color.head<3>() = linear_to_srgb(color.head<3>()); + } + + surf2Dwrite(to_float4(color), surface, x * sizeof(float4), y); +} + +void Testbed::render_frame( + cudaStream_t stream, + const Matrix<float, 3, 4>& camera_matrix0, + const Matrix<float, 3, 4>& camera_matrix1, + const Matrix<float, 3, 4>& prev_camera_matrix, + const Vector2f& orig_screen_center, + const Vector2f& relative_focal_length, + const Vector4f& nerf_rolling_shutter, + const Foveation& foveation, + const Foveation& prev_foveation, + int visualized_dimension, + CudaRenderBuffer& render_buffer, + bool to_srgb, + CudaDevice* device +) { + if (!device) { + device = &primary_device(); + } + + sync_device(render_buffer, *device); + + { + auto device_guard = use_device(stream, render_buffer, *device); + render_frame_main(*device, camera_matrix0, camera_matrix1, orig_screen_center, relative_focal_length, nerf_rolling_shutter, foveation, visualized_dimension); + } + + render_frame_epilogue(stream, camera_matrix0, prev_camera_matrix, orig_screen_center, relative_focal_length, foveation, prev_foveation, render_buffer, to_srgb); +} + +void Testbed::render_frame_main( + CudaDevice& device, + const Matrix<float, 3, 4>& camera_matrix0, + const Matrix<float, 3, 4>& camera_matrix1, + const Vector2f& orig_screen_center, + const Vector2f& relative_focal_length, + const Vector4f& nerf_rolling_shutter, + const Foveation& foveation, + int visualized_dimension +) { + device.render_buffer_view().clear(device.stream()); if (!m_network) { return; } + Vector2f focal_length = calc_focal_length(device.render_buffer_view().resolution, relative_focal_length, m_fov_axis, m_zoom); + Vector2f screen_center = render_screen_center(orig_screen_center); + switch (m_testbed_mode) { case ETestbedMode::Nerf: if (!m_render_ground_truth || m_ground_truth_alpha < 1.0f) { - render_nerf(render_buffer, max_res, focal_length, camera_matrix0, camera_matrix1, nerf_rolling_shutter, screen_center, m_stream.get()); + render_nerf(device.stream(), device.render_buffer_view(), *device.nerf_network(), device.data().density_grid_bitfield_ptr, focal_length, camera_matrix0, camera_matrix1, nerf_rolling_shutter, screen_center, foveation, visualized_dimension); } break; case ETestbedMode::Sdf: @@ -3219,15 +4032,13 @@ void Testbed::render_frame(const Matrix<float, 3, 4>& camera_matrix0, const Matr m_sdf.brick_data.data(), m_sdf.triangles_gpu.data(), false, - m_stream.get() + device.stream() ); } } + distance_fun_t distance_fun = m_render_ground_truth ? (distance_fun_t)[&](uint32_t n_elements, const Vector3f* positions, float* distances, cudaStream_t stream) { - if (n_elements == 0) { - return; - } if (m_sdf.groundtruth_mode == ESDFGroundTruthMode::SDFBricks) { // linear_kernel(sdf_brick_kernel, 0, stream, // n_elements, @@ -3244,17 +4055,14 @@ void Testbed::render_frame(const Matrix<float, 3, 4>& camera_matrix0, const Matr m_sdf.triangle_bvh->signed_distance_gpu( n_elements, m_sdf.mesh_sdf_mode, - (Vector3f*)positions, + positions, distances, m_sdf.triangles_gpu.data(), false, - m_stream.get() + stream ); } } : (distance_fun_t)[&](uint32_t n_elements, const Vector3f* positions, float* distances, cudaStream_t stream) { - if (n_elements == 0) { - return; - } n_elements = next_multiple(n_elements, tcnn::batch_size_granularity); GPUMatrix<float> positions_matrix((float*)positions, 3, n_elements); GPUMatrix<float, RM> distances_matrix(distances, 1, n_elements); @@ -3265,53 +4073,64 @@ void Testbed::render_frame(const Matrix<float, 3, 4>& camera_matrix0, const Matr m_render_ground_truth ? (normals_fun_t)[&](uint32_t n_elements, const Vector3f* positions, Vector3f* normals, cudaStream_t stream) { // NO-OP. Normals will automatically be populated by raytrace } : (normals_fun_t)[&](uint32_t n_elements, const Vector3f* positions, Vector3f* normals, cudaStream_t stream) { - if (n_elements == 0) { - return; - } - n_elements = next_multiple(n_elements, tcnn::batch_size_granularity); - GPUMatrix<float> positions_matrix((float*)positions, 3, n_elements); GPUMatrix<float> normals_matrix((float*)normals, 3, n_elements); m_network->input_gradient(stream, 0, positions_matrix, normals_matrix); }; render_sdf( + device.stream(), distance_fun, normals_fun, - render_buffer, - max_res, + device.render_buffer_view(), focal_length, camera_matrix0, screen_center, - m_stream.get() + foveation, + visualized_dimension ); } break; case ETestbedMode::Image: - render_image(render_buffer, m_stream.get()); + render_image(device.stream(), device.render_buffer_view(), focal_length, camera_matrix0, screen_center, foveation, visualized_dimension); break; case ETestbedMode::Volume: - render_volume(render_buffer, focal_length, camera_matrix0, screen_center, m_stream.get()); + render_volume(device.stream(), device.render_buffer_view(), focal_length, camera_matrix0, screen_center, foveation); break; default: - throw std::runtime_error{"Invalid render mode."}; + // No-op if no mode is active + break; } +} + +void Testbed::render_frame_epilogue( + cudaStream_t stream, + const Matrix<float, 3, 4>& camera_matrix0, + const Matrix<float, 3, 4>& prev_camera_matrix, + const Vector2f& orig_screen_center, + const Vector2f& relative_focal_length, + const Foveation& foveation, + const Foveation& prev_foveation, + CudaRenderBuffer& render_buffer, + bool to_srgb +) { + Vector2f focal_length = calc_focal_length(render_buffer.in_resolution(), relative_focal_length, m_fov_axis, m_zoom); + Vector2f screen_center = render_screen_center(orig_screen_center); render_buffer.set_color_space(m_color_space); render_buffer.set_tonemap_curve(m_tonemap_curve); + Lens lens = (m_testbed_mode == ETestbedMode::Nerf && m_nerf.render_with_lens_distortion) ? m_nerf.render_lens : Lens{}; + // Prepare DLSS data: motion vectors, scaled depth, exposure if (render_buffer.dlss()) { auto res = render_buffer.in_resolution(); - bool distortion = m_testbed_mode == ETestbedMode::Nerf && m_nerf.render_with_lens_distortion; - const dim3 threads = { 16, 8, 1 }; const dim3 blocks = { div_round_up((uint32_t)res.x(), threads.x), div_round_up((uint32_t)res.y(), threads.y), 1 }; - dlss_prep_kernel<<<blocks, threads, 0, m_stream.get()>>>( - m_testbed_mode, + dlss_prep_kernel<<<blocks, threads, 0, stream>>>( res, render_buffer.spp(), focal_length, @@ -3319,29 +4138,49 @@ void Testbed::render_frame(const Matrix<float, 3, 4>& camera_matrix0, const Matr m_parallax_shift, m_snap_to_pixel_centers, render_buffer.depth_buffer(), + m_ndc_znear, + m_ndc_zfar, camera_matrix0, - m_prev_camera, + prev_camera_matrix, render_buffer.dlss()->depth(), render_buffer.dlss()->mvec(), render_buffer.dlss()->exposure(), - distortion ? m_nerf.render_lens : Lens{}, - m_scale, - m_prev_scale, - m_image.pos, - m_image.prev_pos, - m_image.resolution, - m_quilting_dims + foveation, + prev_foveation, + lens ); render_buffer.set_dlss_sharpening(m_dlss_sharpening); } - m_prev_camera = camera_matrix0; - m_prev_scale = m_scale; - m_image.prev_pos = m_image.pos; + EColorSpace output_color_space = to_srgb ? EColorSpace::SRGB : EColorSpace::Linear; + + if (m_render_transparency_as_checkerboard) { + Matrix<float, 3, 4> checkerboard_transform = Matrix<float, 3, 4>::Identity(); - render_buffer.accumulate(m_exposure, m_stream.get()); - render_buffer.tonemap(m_exposure, m_background_color, to_srgb ? EColorSpace::SRGB : EColorSpace::Linear, m_stream.get()); +#if NGP_GUI + if (m_vr_frame_info && !m_vr_frame_info->views.empty()) { + checkerboard_transform = m_vr_frame_info->views[0].pose; + } +#endif + + auto res = render_buffer.in_resolution(); + const dim3 threads = { 16, 8, 1 }; + const dim3 blocks = { div_round_up((uint32_t)res.x(), threads.x), div_round_up((uint32_t)res.y(), threads.y), 1 }; + spherical_checkerboard_kernel<<<blocks, threads, 0, stream>>>( + res, + focal_length, + checkerboard_transform, + screen_center, + m_parallax_shift, + foveation, + lens, + render_buffer.frame_buffer() + ); + } + + render_buffer.accumulate(m_exposure, stream); + render_buffer.tonemap(m_exposure, m_background_color, output_color_space, m_ndc_znear, m_ndc_zfar, stream); if (m_testbed_mode == ETestbedMode::Nerf) { // Overlay the ground truth image if requested @@ -3352,14 +4191,14 @@ void Testbed::render_frame(const Matrix<float, 3, 4>& camera_matrix0, const Matr m_ground_truth_alpha, Array3f::Constant(m_exposure) + m_nerf.training.cam_exposure[m_nerf.training.view].variable(), m_background_color, - to_srgb ? EColorSpace::SRGB : EColorSpace::Linear, + output_color_space, metadata.pixels, metadata.image_data_type, metadata.resolution, m_fov_axis, m_zoom, Vector2f::Constant(0.5f), - m_stream.get() + stream ); } else if (m_ground_truth_render_mode == EGroundTruthRenderMode::Depth && metadata.depth) { render_buffer.overlay_depth( @@ -3370,7 +4209,7 @@ void Testbed::render_frame(const Matrix<float, 3, 4>& camera_matrix0, const Matr m_fov_axis, m_zoom, Vector2f::Constant(0.5f), - m_stream.get() + stream ); } } @@ -3385,39 +4224,67 @@ void Testbed::render_frame(const Matrix<float, 3, 4>& camera_matrix0, const Matr } size_t emap_size = error_map_res.x() * error_map_res.y(); err_data += emap_size * m_nerf.training.view; - static GPUMemory<float> average_error; + + GPUMemory<float> average_error; average_error.enlarge(1); average_error.memset(0); const float* aligned_err_data_s = (const float*)(((size_t)err_data)&~15); const float* aligned_err_data_e = (const float*)(((size_t)(err_data+emap_size))&~15); size_t reduce_size = aligned_err_data_e - aligned_err_data_s; - reduce_sum(aligned_err_data_s, [reduce_size] __device__ (float val) { return max(val,0.f) / (reduce_size); }, average_error.data(), reduce_size, m_stream.get()); + reduce_sum(aligned_err_data_s, [reduce_size] __device__ (float val) { return max(val,0.f) / (reduce_size); }, average_error.data(), reduce_size, stream); auto const &metadata = m_nerf.training.dataset.metadata[m_nerf.training.view]; - render_buffer.overlay_false_color(metadata.resolution, to_srgb, m_fov_axis, m_stream.get(), err_data, error_map_res, average_error.data(), m_nerf.training.error_overlay_brightness, m_render_ground_truth); + render_buffer.overlay_false_color(metadata.resolution, to_srgb, m_fov_axis, stream, err_data, error_map_res, average_error.data(), m_nerf.training.error_overlay_brightness, m_render_ground_truth); } } - CUDA_CHECK_THROW(cudaStreamSynchronize(m_stream.get())); -} - -void Testbed::determine_autofocus_target_from_pixel(const Vector2i& focus_pixel) { - float depth; +#if NGP_GUI + // If in VR, indicate the hand position and render transparent background + if (m_vr_frame_info) { + auto& hands = m_vr_frame_info->hands; - const auto& surface = m_render_surfaces.front(); - if (surface.depth_buffer()) { - auto res = surface.in_resolution(); - Vector2i depth_pixel = focus_pixel.cast<float>().cwiseProduct(res.cast<float>()).cwiseQuotient(m_window_res.cast<float>()).cast<int>(); - depth_pixel = depth_pixel.cwiseMin(res).cwiseMax(0); + auto res = render_buffer.out_resolution(); + const dim3 threads = { 16, 8, 1 }; + const dim3 blocks = { div_round_up((uint32_t)res.x(), threads.x), div_round_up((uint32_t)res.y(), threads.y), 1 }; + vr_overlay_hands_kernel<<<blocks, threads, 0, stream>>>( + res, + focal_length.cwiseProduct(render_buffer.out_resolution().cast<float>()).cwiseQuotient(render_buffer.in_resolution().cast<float>()), + camera_matrix0, + screen_center, + m_parallax_shift, + foveation, + lens, + vr_to_world(hands[0].pose.col(3)), + hands[0].grab_strength, + {hands[0].pressing ? 0.8f : 0.0f, 0.0f, 0.0f, 0.8f}, + vr_to_world(hands[1].pose.col(3)), + hands[1].grab_strength, + {hands[1].pressing ? 0.8f : 0.0f, 0.0f, 0.0f, 0.8f}, + 0.05f * m_scale, // Hand radius + output_color_space, + render_buffer.surface() + ); + } +#endif +} - CUDA_CHECK_THROW(cudaMemcpy(&depth, surface.depth_buffer() + depth_pixel.x() + depth_pixel.y() * res.x(), sizeof(float), cudaMemcpyDeviceToHost)); - } else { - depth = m_scale; +float Testbed::get_depth_from_renderbuffer(const CudaRenderBuffer& render_buffer, const Vector2f& uv) { + if (!render_buffer.depth_buffer()) { + return m_scale; } - auto ray = pixel_to_ray_pinhole(0, focus_pixel, m_window_res, calc_focal_length(m_window_res, m_fov_axis, m_zoom), m_smoothed_camera, render_screen_center()); + float depth; + auto res = render_buffer.in_resolution(); + Vector2i depth_pixel = uv.cwiseProduct(res.cast<float>()).cast<int>().cwiseMin(res).cwiseMax(0); + depth_pixel = depth_pixel.cwiseMin(res).cwiseMax(0); + + CUDA_CHECK_THROW(cudaMemcpy(&depth, render_buffer.depth_buffer() + depth_pixel.x() + depth_pixel.y() * res.x(), sizeof(float), cudaMemcpyDeviceToHost)); + return depth; +} - m_autofocus_target = ray.o + ray.d * depth; - m_autofocus = true; // If someone shift-clicked, that means they want the AUTOFOCUS +Vector3f Testbed::get_3d_pos_from_pixel(const CudaRenderBuffer& render_buffer, const Vector2i& pixel) { + float depth = get_depth_from_renderbuffer(render_buffer, pixel.cast<float>().cwiseQuotient(m_window_res.cast<float>())); + auto ray = pixel_to_ray_pinhole(0, pixel, m_window_res, calc_focal_length(m_window_res, m_relative_focal_length, m_fov_axis, m_zoom), m_smoothed_camera, render_screen_center(m_screen_center)); + return ray(depth); } void Testbed::autofocus() { @@ -3593,7 +4460,7 @@ void Testbed::load_snapshot(const fs::path& path) { density_grid[i] = (float)density_grid_fp16[i]; }); - if (m_nerf.density_grid.size() == NERF_GRIDSIZE() * NERF_GRIDSIZE() * NERF_GRIDSIZE() * (m_nerf.max_cascade + 1)) { + if (m_nerf.density_grid.size() == NERF_GRID_N_CELLS() * (m_nerf.max_cascade + 1)) { update_density_grid_mean_and_bitfield(nullptr); } else if (m_nerf.density_grid.size() != 0) { // A size of 0 indicates that the density grid was never populated, which is a valid state of a (yet) untrained model. @@ -3614,6 +4481,106 @@ void Testbed::load_snapshot(const fs::path& path) { m_loss_scalar.set(m_network_config["snapshot"]["loss"]); m_trainer->deserialize(m_network_config["snapshot"]); + + set_all_devices_dirty(); +} + +void Testbed::CudaDevice::set_nerf_network(const std::shared_ptr<NerfNetwork<precision_t>>& nerf_network) { + m_network = m_nerf_network = nerf_network; +} + +void Testbed::sync_device(CudaRenderBuffer& render_buffer, Testbed::CudaDevice& device) { + if (!device.dirty()) { + return; + } + + if (device.is_primary()) { + device.data().density_grid_bitfield_ptr = m_nerf.density_grid_bitfield.data(); + device.data().hidden_area_mask = render_buffer.hidden_area_mask(); + device.set_dirty(false); + return; + } + + m_stream.signal(device.stream()); + + int active_device = cuda_device(); + auto guard = device.device_guard(); + + device.data().density_grid_bitfield.resize(m_nerf.density_grid_bitfield.size()); + if (m_nerf.density_grid_bitfield.size() > 0) { + CUDA_CHECK_THROW(cudaMemcpyPeerAsync(device.data().density_grid_bitfield.data(), device.id(), m_nerf.density_grid_bitfield.data(), active_device, m_nerf.density_grid_bitfield.bytes(), device.stream())); + } + + device.data().density_grid_bitfield_ptr = device.data().density_grid_bitfield.data(); + + if (m_network) { + device.data().params.resize(m_network->n_params()); + CUDA_CHECK_THROW(cudaMemcpyPeerAsync(device.data().params.data(), device.id(), m_network->inference_params(), active_device, device.data().params.bytes(), device.stream())); + device.nerf_network()->set_params(device.data().params.data(), device.data().params.data(), nullptr); + } + + if (render_buffer.hidden_area_mask()) { + auto ham = std::make_shared<Buffer2D<uint8_t>>(render_buffer.hidden_area_mask()->resolution()); + CUDA_CHECK_THROW(cudaMemcpyPeerAsync(ham->data(), device.id(), render_buffer.hidden_area_mask()->data(), active_device, ham->bytes(), device.stream())); + device.data().hidden_area_mask = ham; + } else { + device.data().hidden_area_mask = nullptr; + } + + device.set_dirty(false); +} + +// From https://stackoverflow.com/questions/20843271/passing-a-non-copyable-closure-object-to-stdfunction-parameter +template <class F> +auto make_copyable_function(F&& f) { + using dF = std::decay_t<F>; + auto spf = std::make_shared<dF>(std::forward<F>(f)); + return [spf](auto&&... args) -> decltype(auto) { + return (*spf)( decltype(args)(args)... ); + }; +} + +ScopeGuard Testbed::use_device(cudaStream_t stream, CudaRenderBuffer& render_buffer, Testbed::CudaDevice& device) { + device.wait_for(stream); + + if (device.is_primary()) { + device.set_render_buffer_view(render_buffer.view()); + return ScopeGuard{[&device, stream]() { + device.set_render_buffer_view({}); + device.signal(stream); + }}; + } + + int active_device = cuda_device(); + auto guard = device.device_guard(); + + size_t n_pixels = render_buffer.in_resolution().prod(); + + GPUMemoryArena::Allocation alloc; + auto scratch = allocate_workspace_and_distribute<Array4f, float>(device.stream(), &alloc, n_pixels, n_pixels); + + device.set_render_buffer_view({ + std::get<0>(scratch), + std::get<1>(scratch), + render_buffer.in_resolution(), + render_buffer.spp(), + device.data().hidden_area_mask, + }); + + return ScopeGuard{make_copyable_function([&render_buffer, &device, guard=std::move(guard), alloc=std::move(alloc), active_device, stream]() { + // Copy device's render buffer's data onto the original render buffer + CUDA_CHECK_THROW(cudaMemcpyPeerAsync(render_buffer.frame_buffer(), active_device, device.render_buffer_view().frame_buffer, device.id(), render_buffer.in_resolution().prod() * sizeof(Array4f), device.stream())); + CUDA_CHECK_THROW(cudaMemcpyPeerAsync(render_buffer.depth_buffer(), active_device, device.render_buffer_view().depth_buffer, device.id(), render_buffer.in_resolution().prod() * sizeof(float), device.stream())); + + device.set_render_buffer_view({}); + device.signal(stream); + })}; +} + +void Testbed::set_all_devices_dirty() { + for (auto& device : m_devices) { + device.set_dirty(true); + } } void Testbed::load_camera_path(const fs::path& path) { diff --git a/src/testbed_image.cu b/src/testbed_image.cu index 5be8aa74bf6f11b99d70b8663835956b66137fd0..59d385a7840d561eb389fbdc8bca34cbff606ed2 100644 --- a/src/testbed_image.cu +++ b/src/testbed_image.cu @@ -77,14 +77,20 @@ __global__ void stratify2_kernel(uint32_t n_elements, uint32_t log2_batch_size, } __global__ void init_image_coords( + uint32_t sample_index, Vector2f* __restrict__ positions, + float* __restrict__ depth_buffer, Vector2i resolution, - Vector2i image_resolution, - float view_dist, - Vector2f image_pos, + float aspect, + Vector2f focal_length, + Matrix<float, 3, 4> camera_matrix, Vector2f screen_center, + Vector3f parallax_shift, bool snap_to_pixel_centers, - uint32_t sample_index + float plane_z, + float aperture_size, + Foveation foveation, + Buffer2DView<const uint8_t> hidden_area_mask ) { uint32_t x = threadIdx.x + blockDim.x * blockIdx.x; uint32_t y = threadIdx.y + blockDim.y * blockIdx.y; @@ -93,49 +99,47 @@ __global__ void init_image_coords( return; } - uint32_t idx = x + resolution.x() * y; - positions[idx] = pixel_to_image_uv( + // The image is displayed on the plane [0.5, 0.5, 0.5] + [X, Y, 0] to facilitate + // a top-down view by default, while permitting general camera movements (for + // motion vectors and code sharing with 3D tasks). + // Hence: generate rays and intersect that plane. + Ray ray = pixel_to_ray( sample_index, {x, y}, resolution, - image_resolution, + focal_length, + camera_matrix, screen_center, - view_dist, - image_pos, - snap_to_pixel_centers + parallax_shift, + snap_to_pixel_centers, + 0.0f, // near distance + plane_z, + aperture_size, + foveation, + hidden_area_mask ); -} -// #define COLOR_SPACE_CONVERT convert to ycrcb experiment - causes some color shift tho it does lead to very slightly sharper edges. not a net win if you like colors :) -#define CHROMA_SCALE 0.2f - -__global__ void colorspace_convert_image_half(Vector2i resolution, const char* __restrict__ texture) { - uint32_t x = blockIdx.x * blockDim.x + threadIdx.x; - uint32_t y = blockIdx.y * blockDim.y + threadIdx.y; - if (x >= resolution.x() || y >= resolution.y()) return; - __half val[4]; - *(int2*)&val[0] = ((int2*)texture)[y * resolution.x() + x]; - float R=val[0],G=val[1],B=val[2]; - val[0]=(0.2126f * R + 0.7152f * G + 0.0722f * B); - val[1]=((-0.1146f * R - 0.3845f * G + 0.5f * B)+0.f)*CHROMA_SCALE; - val[2]=((0.5f * R - 0.4542f * G - 0.0458f * B)+0.f)*CHROMA_SCALE; - ((int2*)texture)[y * resolution.x() + x] = *(int2*)&val[0]; -} + // Intersect the Z=0.5 plane + float t = ray.is_valid() ? (0.5f - ray.o.z()) / ray.d.z() : -1.0f; + + uint32_t idx = x + resolution.x() * y; + if (t <= 0.0f) { + depth_buffer[idx] = MAX_DEPTH(); + positions[idx] = -Vector2f::Ones(); + return; + } + + Vector2f uv = ray(t).head<2>(); + + // Flip from world coordinates where Y goes up to image coordinates where Y goes down. + // Also, multiply the x-axis by the image's aspect ratio to make it have the right proportions. + uv = (uv - Vector2f::Constant(0.5f)).cwiseProduct(Vector2f{aspect, -1.0f}) + Vector2f::Constant(0.5f); -__global__ void colorspace_convert_image_float(Vector2i resolution, const char* __restrict__ texture) { - uint32_t x = blockIdx.x * blockDim.x + threadIdx.x; - uint32_t y = blockIdx.y * blockDim.y + threadIdx.y; - if (x >= resolution.x() || y >= resolution.y()) return; - float val[4]; - *(float4*)&val[0] = ((float4*)texture)[y * resolution.x() + x]; - float R=val[0],G=val[1],B=val[2]; - val[0]=(0.2126f * R + 0.7152f * G + 0.0722f * B); - val[1]=((-0.1146f * R - 0.3845f * G + 0.5f * B)+0.f)*CHROMA_SCALE; - val[2]=((0.5f * R - 0.4542f * G - 0.0458f * B)+0.f)*CHROMA_SCALE; - ((float4*)texture)[y * resolution.x() + x] = *(float4*)&val[0]; + depth_buffer[idx] = t; + positions[idx] = uv; } -__global__ void shade_kernel_image(Vector2i resolution, const Vector2f* __restrict__ positions, const Array3f* __restrict__ colors, Array4f* __restrict__ frame_buffer, float* __restrict__ depth_buffer, bool linear_colors) { +__global__ void shade_kernel_image(Vector2i resolution, const Vector2f* __restrict__ positions, const Array3f* __restrict__ colors, Array4f* __restrict__ frame_buffer, bool linear_colors) { uint32_t x = threadIdx.x + blockDim.x * blockIdx.x; uint32_t y = threadIdx.y + blockDim.y * blockIdx.y; @@ -148,7 +152,6 @@ __global__ void shade_kernel_image(Vector2i resolution, const Vector2f* __restri const Vector2f uv = positions[idx]; if (uv.x() < 0.0f || uv.x() > 1.0f || uv.y() < 0.0f || uv.y() > 1.0f) { frame_buffer[idx] = Array4f::Zero(); - depth_buffer[idx] = 1e10f; return; } @@ -158,16 +161,7 @@ __global__ void shade_kernel_image(Vector2i resolution, const Vector2f* __restri color = srgb_to_linear(color); } -#ifdef COLOR_SPACE_CONVERT - float Y=color.x(), Cb =color.y()*(1.f/CHROMA_SCALE) -0.f, Cr = color.z() * (1.f/CHROMA_SCALE) - 0.f; - float R = Y + 1.5748f * Cr; - float G = Y - 0.1873f * Cb - 0.4681 * Cr; - float B = Y + 1.8556f * Cb; - frame_buffer[idx] = {R, G, B, 1.0f}; -#else frame_buffer[idx] = {color.x(), color.y(), color.z(), 1.0f}; -#endif - depth_buffer[idx] = 1.0f; } template <typename T, uint32_t stride> @@ -291,8 +285,16 @@ void Testbed::train_image(size_t target_batch_size, bool get_loss_scalar, cudaSt m_training_step++; } -void Testbed::render_image(CudaRenderBuffer& render_buffer, cudaStream_t stream) { - auto res = render_buffer.in_resolution(); +void Testbed::render_image( + cudaStream_t stream, + const CudaRenderBufferView& render_buffer, + const Vector2f& focal_length, + const Matrix<float, 3, 4>& camera_matrix, + const Vector2f& screen_center, + const Foveation& foveation, + int visualized_dimension +) { + auto res = render_buffer.resolution; // Make sure we have enough memory reserved to render at the requested resolution size_t n_pixels = (size_t)res.x() * res.y(); @@ -300,18 +302,27 @@ void Testbed::render_image(CudaRenderBuffer& render_buffer, cudaStream_t stream) m_image.render_coords.enlarge(n_elements); m_image.render_out.enlarge(n_elements); + float plane_z = m_slice_plane_z + m_scale; + float aspect = (float)m_image.resolution.y() / (float)m_image.resolution.x(); + // Generate 2D coords at which to query the network const dim3 threads = { 16, 8, 1 }; const dim3 blocks = { div_round_up((uint32_t)res.x(), threads.x), div_round_up((uint32_t)res.y(), threads.y), 1 }; init_image_coords<<<blocks, threads, 0, stream>>>( + render_buffer.spp, m_image.render_coords.data(), + render_buffer.depth_buffer, res, - m_image.resolution, - m_scale, - m_image.pos, - m_screen_center - Vector2f::Constant(0.5f), + aspect, + focal_length, + camera_matrix, + screen_center, + m_parallax_shift, m_snap_to_pixel_centers, - render_buffer.spp() + plane_z, + m_aperture_size, + foveation, + render_buffer.hidden_area_mask ? render_buffer.hidden_area_mask->const_view() : Buffer2DView<const uint8_t>{} ); // Obtain colors for each 2D coord @@ -338,10 +349,10 @@ void Testbed::render_image(CudaRenderBuffer& render_buffer, cudaStream_t stream) } if (!m_render_ground_truth) { - if (m_visualized_dimension >= 0) { + if (visualized_dimension >= 0) { GPUMatrix<float> positions_matrix((float*)m_image.render_coords.data(), 2, n_elements); GPUMatrix<float> colors_matrix((float*)m_image.render_out.data(), 3, n_elements); - m_network->visualize_activation(stream, m_visualized_layer, m_visualized_dimension, positions_matrix, colors_matrix); + m_network->visualize_activation(stream, m_visualized_layer, visualized_dimension, positions_matrix, colors_matrix); } else { GPUMatrix<float> positions_matrix((float*)m_image.render_coords.data(), 2, n_elements); GPUMatrix<float> colors_matrix((float*)m_image.render_out.data(), 3, n_elements); @@ -354,8 +365,7 @@ void Testbed::render_image(CudaRenderBuffer& render_buffer, cudaStream_t stream) res, m_image.render_coords.data(), m_image.render_out.data(), - render_buffer.frame_buffer(), - render_buffer.depth_buffer(), + render_buffer.frame_buffer, m_image.training.linear_colors ); } diff --git a/src/testbed_nerf.cu b/src/testbed_nerf.cu index 33a663d4894ad43422b55af9fa2b8d97eec7dd4c..ba563a6d1115ebd38bbe864e1e99bc6c44d326a5 100644 --- a/src/testbed_nerf.cu +++ b/src/testbed_nerf.cu @@ -377,34 +377,60 @@ __global__ void mark_untrained_density_grid(const uint32_t n_elements, float* _ uint32_t y = tcnn::morton3D_invert(pos_idx>>1); uint32_t z = tcnn::morton3D_invert(pos_idx>>2); - Vector3f pos = ((Vector3f{(float)x+0.5f, (float)y+0.5f, (float)z+0.5f}) / NERF_GRIDSIZE() - Vector3f::Constant(0.5f)) * scalbnf(1.0f, level) + Vector3f::Constant(0.5f); - float voxel_radius = 0.5f*SQRT3()*scalbnf(1.0f, level) / NERF_GRIDSIZE(); - int count = 0; - for (uint32_t j=0; j < n_training_images; ++j) { - if (metadata[j].lens.mode == ELensMode::FTheta || metadata[j].lens.mode == ELensMode::LatLong || metadata[j].lens.mode == ELensMode::OpenCVFisheye) { - // not supported for now - count++; - break; + float voxel_size = scalbnf(1.0f / NERF_GRIDSIZE(), level); + Vector3f pos = (Vector3f{(float)x, (float)y, (float)z} / NERF_GRIDSIZE() - Vector3f::Constant(0.5f)) * scalbnf(1.0f, level) + Vector3f::Constant(0.5f); + + Vector3f corners[8] = { + pos + Vector3f{0.0f, 0.0f, 0.0f }, + pos + Vector3f{voxel_size, 0.0f, 0.0f }, + pos + Vector3f{0.0f, voxel_size, 0.0f }, + pos + Vector3f{voxel_size, voxel_size, 0.0f }, + pos + Vector3f{0.0f, 0.0f, voxel_size}, + pos + Vector3f{voxel_size, 0.0f, voxel_size}, + pos + Vector3f{0.0f, voxel_size, voxel_size}, + pos + Vector3f{voxel_size, voxel_size, voxel_size}, + }; + + // Number of training views that need to see a voxel cell + // at minimum for that cell to be marked trainable. + // Floaters can be reduced by increasing this value to 2, + // but at the cost of certain reconstruction artifacts. + const uint32_t min_count = 1; + uint32_t count = 0; + + for (uint32_t j = 0; j < n_training_images && count < min_count; ++j) { + const auto& xform = training_xforms[j].start; + const auto& m = metadata[j]; + + if (m.lens.mode == ELensMode::FTheta || m.lens.mode == ELensMode::LatLong || m.lens.mode == ELensMode::Equirectangular) { + // FTheta lenses don't have a forward mapping, so are assumed seeing everything. Latlong and equirect lenses + // by definition see everything. + ++count; + continue; } - float half_resx = metadata[j].resolution.x() * 0.5f; - float half_resy = metadata[j].resolution.y() * 0.5f; - Matrix<float, 3, 4> xform = training_xforms[j].start; - Vector3f ploc = pos - xform.col(3); - float x = ploc.dot(xform.col(0)); - float y = ploc.dot(xform.col(1)); - float z = ploc.dot(xform.col(2)); - if (z > 0.f) { - auto focal = metadata[j].focal_length; - // TODO - add a box / plane intersection to stop thomas from murdering me - if (fabsf(x) - voxel_radius < z / focal.x() * half_resx && fabsf(y) - voxel_radius < z / focal.y() * half_resy) { - count++; - if (count > 0) break; + + for (uint32_t k = 0; k < 8; ++k) { + // Only consider voxel corners in front of the camera + Vector3f dir = (corners[k] - xform.col(3)).normalized(); + if (dir.dot(xform.col(2)) < 1e-4f) { + continue; + } + + // Check if voxel corner projects onto the image plane, i.e. uv must be in (0, 1)^2 + Vector2f uv = pos_to_uv(corners[k], m.resolution, m.focal_length, xform, m.principal_point, Vector3f::Zero(), {}, m.lens); + + // `pos_to_uv` is _not_ injective in the presence of lens distortion (which breaks down outside of the image plane). + // So we need to check whether the produced uv location generates a ray that matches the ray that we started with. + Ray ray = uv_to_ray(0.0f, uv, m.resolution, m.focal_length, xform, m.principal_point, Vector3f::Zero(), 0.0f, 1.0f, 0.0f, {}, {}, m.lens); + if ((ray.d.normalized() - dir).norm() < 1e-3f && uv.x() > 0.0f && uv.y() > 0.0f && uv.x() < 1.0f && uv.y() < 1.0f) { + ++count; + break; } } } - if (clear_visible_voxels || (grid_out[i] < 0) != (count <= 0)) { - grid_out[i] = (count > 0) ? 0.f : -1.f; + if (clear_visible_voxels || (grid_out[i] < 0) != (count < min_count)) { + grid_out[i] = (count >= min_count) ? 0.f : -1.f; } } @@ -525,9 +551,10 @@ __global__ void grid_samples_half_to_float(const uint32_t n_elements, BoundingBo Vector3f pos = unwarp_position(coords_in[i].p, aabb); float grid_density = cascaded_grid_at(pos, grid_in, mip_from_pos(pos, max_cascade)); if (grid_density < NERF_MIN_OPTICAL_THICKNESS()) { - mlp = -10000.f; + mlp = -10000.0f; } } + dst[i] = mlp; } @@ -777,8 +804,6 @@ __global__ void composite_kernel_nerf( BoundingBox aabb, float glow_y_cutoff, int glow_mode, - const uint32_t n_training_images, - const TrainingXForm* __restrict__ training_xforms, Matrix<float, 3, 4> camera_matrix, Vector2f focal_length, float depth_scale, @@ -1038,18 +1063,18 @@ inline __device__ float pdf_2d(Vector2f sample, uint32_t img, const Vector2i& re } inline __device__ Vector2f nerf_random_image_pos_training(default_rng_t& rng, const Vector2i& resolution, bool snap_to_pixel_centers, const float* __restrict__ cdf_x_cond_y, const float* __restrict__ cdf_y, const Vector2i& cdf_res, uint32_t img, float* __restrict__ pdf = nullptr) { - Vector2f xy = random_val_2d(rng); + Vector2f uv = random_val_2d(rng); if (cdf_x_cond_y) { - xy = sample_cdf_2d(xy, img, cdf_res, cdf_x_cond_y, cdf_y, pdf); + uv = sample_cdf_2d(uv, img, cdf_res, cdf_x_cond_y, cdf_y, pdf); } else if (pdf) { *pdf = 1.0f; } if (snap_to_pixel_centers) { - xy = (xy.cwiseProduct(resolution.cast<float>()).cast<int>().cwiseMax(0).cwiseMin(resolution - Vector2i::Ones()).cast<float>() + Vector2f::Constant(0.5f)).cwiseQuotient(resolution.cast<float>()); + uv = (uv.cwiseProduct(resolution.cast<float>()).cast<int>().cwiseMax(0).cwiseMin(resolution - Vector2i::Ones()).cast<float>() + Vector2f::Constant(0.5f)).cwiseQuotient(resolution.cast<float>()); } - return xy; + return uv; } inline __device__ uint32_t image_idx(uint32_t base_idx, uint32_t n_rays, uint32_t n_rays_total, uint32_t n_training_images, const float* __restrict__ cdf = nullptr, float* __restrict__ pdf = nullptr) { @@ -1096,8 +1121,7 @@ __global__ void generate_training_samples_nerf( bool snap_to_pixel_centers, bool train_envmap, float cone_angle_constant, - const float* __restrict__ distortion_data, - const Vector2i distortion_resolution, + Buffer2DView<const Vector2f> distortion, const float* __restrict__ cdf_x_cond_y, const float* __restrict__ cdf_y, const float* __restrict__ cdf_img, @@ -1112,11 +1136,11 @@ __global__ void generate_training_samples_nerf( Eigen::Vector2i resolution = metadata[img].resolution; rng.advance(i * N_MAX_RANDOM_SAMPLES_PER_RAY()); - Vector2f xy = nerf_random_image_pos_training(rng, resolution, snap_to_pixel_centers, cdf_x_cond_y, cdf_y, cdf_res, img); + Vector2f uv = nerf_random_image_pos_training(rng, resolution, snap_to_pixel_centers, cdf_x_cond_y, cdf_y, cdf_res, img); // Negative values indicate masked-away regions - size_t pix_idx = pixel_idx(xy, resolution, 0); - if (read_rgba(xy, resolution, metadata[img].pixels, metadata[img].image_data_type).x() < 0.0f) { + size_t pix_idx = pixel_idx(uv, resolution, 0); + if (read_rgba(uv, resolution, metadata[img].pixels, metadata[img].image_data_type).x() < 0.0f) { return; } @@ -1129,7 +1153,7 @@ __global__ void generate_training_samples_nerf( const float* extra_dims = extra_dims_gpu + img * n_extra_dims; const Lens lens = metadata[img].lens; - const Matrix<float, 3, 4> xform = get_xform_given_rolling_shutter(training_xforms[img], metadata[img].rolling_shutter, xy, motionblur_time); + const Matrix<float, 3, 4> xform = get_xform_given_rolling_shutter(training_xforms[img], metadata[img].rolling_shutter, uv, motionblur_time); Ray ray_unnormalized; const Ray* rays_in_unnormalized = metadata[img].rays; @@ -1138,16 +1162,16 @@ __global__ void generate_training_samples_nerf( ray_unnormalized = rays_in_unnormalized[pix_idx]; /* DEBUG - compare the stored rays to the computed ones - const Matrix<float, 3, 4> xform = get_xform_given_rolling_shutter(training_xforms[img], metadata[img].rolling_shutter, xy, 0.f); + const Matrix<float, 3, 4> xform = get_xform_given_rolling_shutter(training_xforms[img], metadata[img].rolling_shutter, uv, 0.f); Ray ray2; ray2.o = xform.col(3); - ray2.d = f_theta_distortion(xy, principal_point, lens); + ray2.d = f_theta_distortion(uv, principal_point, lens); ray2.d = (xform.block<3, 3>(0, 0) * ray2.d).normalized(); if (i==1000) { printf("\n%d uv %0.3f,%0.3f pixel %0.2f,%0.2f transform from [%0.5f %0.5f %0.5f] to [%0.5f %0.5f %0.5f]\n" " origin [%0.5f %0.5f %0.5f] vs [%0.5f %0.5f %0.5f]\n" " direction [%0.5f %0.5f %0.5f] vs [%0.5f %0.5f %0.5f]\n" - , img,xy.x(), xy.y(), xy.x()*resolution.x(), xy.y()*resolution.y(), + , img,uv.x(), uv.y(), uv.x()*resolution.x(), uv.y()*resolution.y(), training_xforms[img].start.col(3).x(),training_xforms[img].start.col(3).y(),training_xforms[img].start.col(3).z(), training_xforms[img].end.col(3).x(),training_xforms[img].end.col(3).y(),training_xforms[img].end.col(3).z(), ray_unnormalized.o.x(),ray_unnormalized.o.y(),ray_unnormalized.o.z(), @@ -1157,31 +1181,10 @@ __global__ void generate_training_samples_nerf( } */ } else { - // Rays need to be inferred from the camera matrix - ray_unnormalized.o = xform.col(3); - if (lens.mode == ELensMode::FTheta) { - ray_unnormalized.d = f_theta_undistortion(xy - principal_point, lens.params, {0.f, 0.f, 1.f}); - } else if (lens.mode == ELensMode::LatLong) { - ray_unnormalized.d = latlong_to_dir(xy); - } else { - ray_unnormalized.d = { - (xy.x()-principal_point.x())*resolution.x() / focal_length.x(), - (xy.y()-principal_point.y())*resolution.y() / focal_length.y(), - 1.0f, - }; - - if (lens.mode == ELensMode::OpenCV) { - iterative_opencv_lens_undistortion(lens.params, &ray_unnormalized.d.x(), &ray_unnormalized.d.y()); - } else if (lens.mode == ELensMode::OpenCVFisheye) { - iterative_opencv_fisheye_lens_undistortion(lens.params, &ray_unnormalized.d.x(), &ray_unnormalized.d.y()); - } + ray_unnormalized = uv_to_ray(0, uv, resolution, focal_length, xform, principal_point, Vector3f::Zero(), 0.0f, 1.0f, 0.0f, {}, {}, lens, distortion); + if (!ray_unnormalized.is_valid()) { + ray_unnormalized = {xform.col(3), xform.col(2)}; } - - if (distortion_data) { - ray_unnormalized.d.head<2>() += read_image<2>(distortion_data, distortion_resolution, xy); - } - - ray_unnormalized.d = (xform.block<3, 3>(0, 0) * ray_unnormalized.d); // NOT normalized } Eigen::Vector3f ray_d_normalized = ray_unnormalized.d.normalized(); @@ -1278,7 +1281,7 @@ __global__ void compute_loss_kernel_train_nerf( const uint32_t* __restrict__ rays_counter, float loss_scale, int padded_output_width, - const float* __restrict__ envmap_data, + Buffer2DView<const Eigen::Array4f> envmap, float* __restrict__ envmap_gradient, const Vector2i envmap_resolution, ELossType envmap_loss_type, @@ -1374,8 +1377,8 @@ __global__ void compute_loss_kernel_train_nerf( uint32_t img = image_idx(ray_idx, n_rays, n_rays_total, n_training_images, cdf_img, &img_pdf); Eigen::Vector2i resolution = metadata[img].resolution; - float xy_pdf = 1.0f; - Vector2f xy = nerf_random_image_pos_training(rng, resolution, snap_to_pixel_centers, cdf_x_cond_y, cdf_y, error_map_cdf_res, img, &xy_pdf); + float uv_pdf = 1.0f; + Vector2f uv = nerf_random_image_pos_training(rng, resolution, snap_to_pixel_centers, cdf_x_cond_y, cdf_y, error_map_cdf_res, img, &uv_pdf); float max_level = max_level_rand_training ? (random_val(rng) * 2.0f) : 1.0f; // Multiply by 2 to ensure 50% of training is at max level if (train_with_random_bg_color) { @@ -1386,16 +1389,16 @@ __global__ void compute_loss_kernel_train_nerf( // Composit background behind envmap Array4f envmap_value; Vector3f dir; - if (envmap_data) { + if (envmap) { dir = rays_in_unnormalized[i].d.normalized(); - envmap_value = read_envmap(envmap_data, envmap_resolution, dir); + envmap_value = read_envmap(envmap, dir); background_color = envmap_value.head<3>() + background_color * (1.0f - envmap_value.w()); } Array3f exposure_scale = (0.6931471805599453f * exposure[img]).exp(); - // Array3f rgbtarget = composit_and_lerp(xy, resolution, img, training_images, background_color, exposure_scale); - // Array3f rgbtarget = composit(xy, resolution, img, training_images, background_color, exposure_scale); - Array4f texsamp = read_rgba(xy, resolution, metadata[img].pixels, metadata[img].image_data_type); + // Array3f rgbtarget = composit_and_lerp(uv, resolution, img, training_images, background_color, exposure_scale); + // Array3f rgbtarget = composit(uv, resolution, img, training_images, background_color, exposure_scale); + Array4f texsamp = read_rgba(uv, resolution, metadata[img].pixels, metadata[img].image_data_type); Array3f rgbtarget; if (train_in_linear_colors || color_space == EColorSpace::Linear) { @@ -1437,9 +1440,9 @@ __global__ void compute_loss_kernel_train_nerf( dloss_doutput += compacted_base * padded_output_width; LossAndGradient lg = loss_and_gradient(rgbtarget, rgb_ray, loss_type); - lg.loss /= img_pdf * xy_pdf; + lg.loss /= img_pdf * uv_pdf; - float target_depth = rays_in_unnormalized[i].d.norm() * ((depth_supervision_lambda > 0.0f && metadata[img].depth) ? read_depth(xy, resolution, metadata[img].depth) : -1.0f); + float target_depth = rays_in_unnormalized[i].d.norm() * ((depth_supervision_lambda > 0.0f && metadata[img].depth) ? read_depth(uv, resolution, metadata[img].depth) : -1.0f); LossAndGradient lg_depth = loss_and_gradient(Array3f::Constant(target_depth), Array3f::Constant(depth_ray), depth_loss_type); float depth_loss_gradient = target_depth > 0.0f ? depth_supervision_lambda * lg_depth.gradient.x() : 0; @@ -1447,7 +1450,7 @@ __global__ void compute_loss_kernel_train_nerf( // Essentially: variance reduction, but otherwise the same optimization. // We _dont_ want that. If importance sampling is enabled, we _do_ actually want // to change the weighting of the loss function. So don't divide. - // lg.gradient /= img_pdf * xy_pdf; + // lg.gradient /= img_pdf * uv_pdf; float mean_loss = lg.loss.mean(); if (loss_output) { @@ -1455,7 +1458,7 @@ __global__ void compute_loss_kernel_train_nerf( } if (error_map) { - const Vector2f pos = (xy.cwiseProduct(error_map_res.cast<float>()) - Vector2f::Constant(0.5f)).cwiseMax(0.0f).cwiseMin(error_map_res.cast<float>() - Vector2f::Constant(1.0f + 1e-4f)); + const Vector2f pos = (uv.cwiseProduct(error_map_res.cast<float>()) - Vector2f::Constant(0.5f)).cwiseMax(0.0f).cwiseMin(error_map_res.cast<float>() - Vector2f::Constant(1.0f + 1e-4f)); const Vector2i pos_int = pos.cast<int>(); const Vector2f weight = pos - pos_int.cast<float>(); @@ -1466,7 +1469,7 @@ __global__ void compute_loss_kernel_train_nerf( }; if (sharpness_data && aabb.contains(hitpoint)) { - Vector2i sharpness_pos = xy.cwiseProduct(sharpness_resolution.cast<float>()).cast<int>().cwiseMax(0).cwiseMin(sharpness_resolution - Vector2i::Constant(1)); + Vector2i sharpness_pos = uv.cwiseProduct(sharpness_resolution.cast<float>()).cast<int>().cwiseMax(0).cwiseMin(sharpness_resolution - Vector2i::Constant(1)); float sharp = sharpness_data[img * sharpness_resolution.prod() + sharpness_pos.y() * sharpness_resolution.x() + sharpness_pos.x()] + 1e-6f; // The maximum value of positive floats interpreted in uint format is the same as the maximum value of the floats. @@ -1549,7 +1552,7 @@ __global__ void compute_loss_kernel_train_nerf( if (exposure_gradient) { // Assume symmetric loss - Array3f dloss_by_dgt = -lg.gradient / xy_pdf; + Array3f dloss_by_dgt = -lg.gradient / uv_pdf; if (!train_in_linear_colors) { dloss_by_dgt /= srgb_to_linear_derivative(rgbtarget); @@ -1656,9 +1659,9 @@ __global__ void compute_cam_gradient_train_nerf( } rng.advance(ray_idx * N_MAX_RANDOM_SAMPLES_PER_RAY()); - float xy_pdf = 1.0f; + float uv_pdf = 1.0f; - Vector2f xy = nerf_random_image_pos_training(rng, resolution, snap_to_pixel_centers, cdf_x_cond_y, cdf_y, error_map_res, img, &xy_pdf); + Vector2f uv = nerf_random_image_pos_training(rng, resolution, snap_to_pixel_centers, cdf_x_cond_y, cdf_y, error_map_res, img, &uv_pdf); if (distortion_gradient) { // Projection of the raydir gradient onto the plane normal to raydir, @@ -1671,14 +1674,14 @@ __global__ void compute_cam_gradient_train_nerf( Vector3f image_plane_gradient = xform.block<3,3>(0,0).inverse() * orthogonal_ray_gradient; // Splat the resulting 2D image plane gradient into the distortion params - deposit_image_gradient<2>(image_plane_gradient.head<2>() / xy_pdf, distortion_gradient, distortion_gradient_weight, distortion_resolution, xy); + deposit_image_gradient<2>(image_plane_gradient.head<2>() / uv_pdf, distortion_gradient, distortion_gradient_weight, distortion_resolution, uv); } if (cam_pos_gradient) { // Atomically reduce the ray gradient into the xform gradient NGP_PRAGMA_UNROLL for (uint32_t j = 0; j < 3; ++j) { - atomicAdd(&cam_pos_gradient[img][j], ray_gradient.o[j] / xy_pdf); + atomicAdd(&cam_pos_gradient[img][j], ray_gradient.o[j] / uv_pdf); } } @@ -1692,7 +1695,7 @@ __global__ void compute_cam_gradient_train_nerf( // Atomically reduce the ray gradient into the xform gradient NGP_PRAGMA_UNROLL for (uint32_t j = 0; j < 3; ++j) { - atomicAdd(&cam_rot_gradient[img][j], angle_axis[j] / xy_pdf); + atomicAdd(&cam_rot_gradient[img][j], angle_axis[j] / uv_pdf); } } } @@ -1811,15 +1814,14 @@ __global__ void init_rays_with_payload_kernel_nerf( float near_distance, float plane_z, float aperture_size, + Foveation foveation, Lens lens, - const float* __restrict__ envmap_data, - const Vector2i envmap_resolution, - Array4f* __restrict__ framebuffer, - float* __restrict__ depthbuffer, - const float* __restrict__ distortion_data, - const Vector2i distortion_resolution, - ERenderMode render_mode, - Vector2i quilting_dims + Buffer2DView<const Eigen::Array4f> envmap, + Array4f* __restrict__ frame_buffer, + float* __restrict__ depth_buffer, + Buffer2DView<const uint8_t> hidden_area_mask, + Buffer2DView<const Eigen::Vector2f> distortion, + ERenderMode render_mode ) { uint32_t x = threadIdx.x + blockDim.x * blockIdx.x; uint32_t y = threadIdx.y + blockDim.y * blockIdx.y; @@ -1834,34 +1836,37 @@ __global__ void init_rays_with_payload_kernel_nerf( aperture_size = 0.0; } - if (quilting_dims != Vector2i::Ones()) { - apply_quilting(&x, &y, resolution, parallax_shift, quilting_dims); - } - - // TODO: pixel_to_ray also immediately computes u,v for the pixel, so this is somewhat redundant - float u = (x + 0.5f) * (1.f / resolution.x()); - float v = (y + 0.5f) * (1.f / resolution.y()); - float ray_time = rolling_shutter.x() + rolling_shutter.y() * u + rolling_shutter.z() * v + rolling_shutter.w() * ld_random_val(sample_index, idx * 72239731); - Ray ray = pixel_to_ray( + Vector2f pixel_offset = ld_random_pixel_offset(snap_to_pixel_centers ? 0 : sample_index); + Vector2f uv = Vector2f{(float)x + pixel_offset.x(), (float)y + pixel_offset.y()}.cwiseQuotient(resolution.cast<float>()); + float ray_time = rolling_shutter.x() + rolling_shutter.y() * uv.x() + rolling_shutter.z() * uv.y() + rolling_shutter.w() * ld_random_val(sample_index, idx * 72239731); + Ray ray = uv_to_ray( sample_index, - {x, y}, - resolution.cwiseQuotient(quilting_dims), + uv, + resolution, focal_length, camera_matrix0 * ray_time + camera_matrix1 * (1.f - ray_time), screen_center, parallax_shift, - snap_to_pixel_centers, near_distance, plane_z, aperture_size, + foveation, + hidden_area_mask, lens, - distortion_data, - distortion_resolution + distortion ); NerfPayload& payload = payloads[idx]; payload.max_weight = 0.0f; + depth_buffer[idx] = MAX_DEPTH(); + + if (!ray.is_valid()) { + payload.origin = ray.o; + payload.alive = false; + return; + } + if (plane_z < 0) { float n = ray.d.norm(); payload.origin = ray.o; @@ -1870,21 +1875,19 @@ __global__ void init_rays_with_payload_kernel_nerf( payload.idx = idx; payload.n_steps = 0; payload.alive = false; - depthbuffer[idx] = -plane_z; + depth_buffer[idx] = -plane_z; return; } - depthbuffer[idx] = 1e10f; - ray.d = ray.d.normalized(); - if (envmap_data) { - framebuffer[idx] = read_envmap(envmap_data, envmap_resolution, ray.d); + if (envmap) { + frame_buffer[idx] = read_envmap(envmap, ray.d); } float t = fmaxf(render_aabb.ray_intersect(render_aabb_to_local * ray.o, render_aabb_to_local * ray.d).x(), 0.0f) + 1e-6f; - if (!render_aabb.contains(render_aabb_to_local * (ray.o + ray.d * t))) { + if (!render_aabb.contains(render_aabb_to_local * ray(t))) { payload.origin = ray.o; payload.alive = false; return; @@ -1892,13 +1895,14 @@ __global__ void init_rays_with_payload_kernel_nerf( if (render_mode == ERenderMode::Distortion) { Vector2f offset = Vector2f::Zero(); - if (distortion_data) { - offset += read_image<2>(distortion_data, distortion_resolution, Vector2f((float)x + 0.5f, (float)y + 0.5f).cwiseQuotient(resolution.cast<float>())); + if (distortion) { + offset += distortion.at_lerp(Vector2f{(float)x + 0.5f, (float)y + 0.5f}.cwiseQuotient(resolution.cast<float>())); } - framebuffer[idx].head<3>() = to_rgb(offset * 50.0f); - framebuffer[idx].w() = 1.0f; - depthbuffer[idx] = 1.0f; - payload.origin = ray.o + ray.d * 10000.0f; + + frame_buffer[idx].head<3>() = to_rgb(offset * 50.0f); + frame_buffer[idx].w() = 1.0f; + depth_buffer[idx] = 1.0f; + payload.origin = ray(MAX_DEPTH()); payload.alive = false; return; } @@ -1987,21 +1991,20 @@ void Testbed::NerfTracer::init_rays_from_camera( const Vector4f& rolling_shutter, const Vector2f& screen_center, const Vector3f& parallax_shift, - const Vector2i& quilting_dims, bool snap_to_pixel_centers, const BoundingBox& render_aabb, const Matrix3f& render_aabb_to_local, float near_distance, float plane_z, float aperture_size, + const Foveation& foveation, const Lens& lens, - const float* envmap_data, - const Vector2i& envmap_resolution, - const float* distortion_data, - const Vector2i& distortion_resolution, + const Buffer2DView<const Array4f>& envmap, + const Buffer2DView<const Vector2f>& distortion, Eigen::Array4f* frame_buffer, float* depth_buffer, - uint8_t* grid, + const Buffer2DView<const uint8_t>& hidden_area_mask, + const uint8_t* grid, int show_accel, float cone_angle_constant, ERenderMode render_mode, @@ -2029,15 +2032,14 @@ void Testbed::NerfTracer::init_rays_from_camera( near_distance, plane_z, aperture_size, + foveation, lens, - envmap_data, - envmap_resolution, + envmap, frame_buffer, depth_buffer, - distortion_data, - distortion_resolution, - render_mode, - quilting_dims + hidden_area_mask, + distortion, + render_mode ); m_n_rays_initialized = resolution.x() * resolution.y(); @@ -2064,8 +2066,6 @@ uint32_t Testbed::NerfTracer::trace( const BoundingBox& render_aabb, const Eigen::Matrix3f& render_aabb_to_local, const BoundingBox& train_aabb, - const uint32_t n_training_images, - const TrainingXForm* training_xforms, const Vector2f& focal_length, float cone_angle_constant, const uint8_t* grid, @@ -2156,8 +2156,6 @@ uint32_t Testbed::NerfTracer::trace( train_aabb, glow_y_cutoff, glow_mode, - n_training_images, - training_xforms, camera_matrix, focal_length, depth_scale, @@ -2261,51 +2259,59 @@ const float* Testbed::get_inference_extra_dims(cudaStream_t stream) const { return dims_gpu; } -void Testbed::render_nerf(CudaRenderBuffer& render_buffer, const Vector2i& max_res, const Vector2f& focal_length, const Matrix<float, 3, 4>& camera_matrix0, const Matrix<float, 3, 4>& camera_matrix1, const Vector4f& rolling_shutter, const Vector2f& screen_center, cudaStream_t stream) { +void Testbed::render_nerf( + cudaStream_t stream, + const CudaRenderBufferView& render_buffer, + NerfNetwork<precision_t>& nerf_network, + const uint8_t* density_grid_bitfield, + const Vector2f& focal_length, + const Matrix<float, 3, 4>& camera_matrix0, + const Matrix<float, 3, 4>& camera_matrix1, + const Vector4f& rolling_shutter, + const Vector2f& screen_center, + const Foveation& foveation, + int visualized_dimension +) { float plane_z = m_slice_plane_z + m_scale; if (m_render_mode == ERenderMode::Slice) { plane_z = -plane_z; } - ERenderMode render_mode = m_visualized_dimension > -1 ? ERenderMode::EncodingVis : m_render_mode; + ERenderMode render_mode = visualized_dimension > -1 ? ERenderMode::EncodingVis : m_render_mode; const float* extra_dims_gpu = get_inference_extra_dims(stream); NerfTracer tracer; - // Our motion vector code can't undo f-theta and grid distortions -- so don't render these if DLSS is enabled. - bool render_opencv_lens = m_nerf.render_with_lens_distortion && (!render_buffer.dlss() || m_nerf.render_lens.mode == ELensMode::OpenCV || m_nerf.render_lens.mode == ELensMode::OpenCVFisheye); - bool render_grid_distortion = m_nerf.render_with_lens_distortion && !render_buffer.dlss(); - - Lens lens = render_opencv_lens ? m_nerf.render_lens : Lens{}; - + // Our motion vector code can't undo grid distortions -- so don't render grid distortion if DLSS is enabled + auto grid_distortion = m_nerf.render_with_lens_distortion && !m_dlss ? m_distortion.inference_view() : Buffer2DView<const Vector2f>{}; + Lens lens = m_nerf.render_with_lens_distortion ? m_nerf.render_lens : Lens{}; tracer.init_rays_from_camera( - render_buffer.spp(), - m_network->padded_output_width(), - m_nerf_network->n_extra_dims(), - render_buffer.in_resolution(), + render_buffer.spp, + nerf_network.padded_output_width(), + nerf_network.n_extra_dims(), + render_buffer.resolution, focal_length, camera_matrix0, camera_matrix1, rolling_shutter, screen_center, m_parallax_shift, - m_quilting_dims, m_snap_to_pixel_centers, m_render_aabb, m_render_aabb_to_local, m_render_near_distance, plane_z, m_aperture_size, + foveation, lens, - m_envmap.envmap->inference_params(), - m_envmap.resolution, - render_grid_distortion ? m_distortion.map->inference_params() : nullptr, - m_distortion.resolution, - render_buffer.frame_buffer(), - render_buffer.depth_buffer(), - m_nerf.density_grid_bitfield.data(), + m_envmap.inference_view(), + grid_distortion, + render_buffer.frame_buffer, + render_buffer.depth_buffer, + render_buffer.hidden_area_mask ? render_buffer.hidden_area_mask->const_view() : Buffer2DView<const uint8_t>{}, + density_grid_bitfield, m_nerf.show_accel, m_nerf.cone_angle_constant, render_mode, @@ -2318,20 +2324,18 @@ void Testbed::render_nerf(CudaRenderBuffer& render_buffer, const Vector2i& max_r } else { float depth_scale = 1.0f / m_nerf.training.dataset.scale; n_hit = tracer.trace( - *m_nerf_network, + nerf_network, m_render_aabb, m_render_aabb_to_local, m_aabb, - m_nerf.training.n_images_for_training, - m_nerf.training.transforms.data(), focal_length, m_nerf.cone_angle_constant, - m_nerf.density_grid_bitfield.data(), + density_grid_bitfield, render_mode, camera_matrix1, depth_scale, m_visualized_layer, - m_visualized_dimension, + visualized_dimension, m_nerf.rgb_activation, m_nerf.density_activation, m_nerf.show_accel, @@ -2347,19 +2351,19 @@ void Testbed::render_nerf(CudaRenderBuffer& render_buffer, const Vector2i& max_r if (m_render_mode == ERenderMode::Slice) { // Store colors in the normal buffer uint32_t n_elements = next_multiple(n_hit, tcnn::batch_size_granularity); - const uint32_t floats_per_coord = sizeof(NerfCoordinate) / sizeof(float) + m_nerf_network->n_extra_dims(); - const uint32_t extra_stride = m_nerf_network->n_extra_dims() * sizeof(float); // extra stride on top of base NerfCoordinate struct + const uint32_t floats_per_coord = sizeof(NerfCoordinate) / sizeof(float) + nerf_network.n_extra_dims(); + const uint32_t extra_stride = nerf_network.n_extra_dims() * sizeof(float); // extra stride on top of base NerfCoordinate struct GPUMatrix<float> positions_matrix{floats_per_coord, n_elements, stream}; GPUMatrix<float> rgbsigma_matrix{4, n_elements, stream}; linear_kernel(generate_nerf_network_inputs_at_current_position, 0, stream, n_hit, m_aabb, rays_hit.payload, PitchedPtr<NerfCoordinate>((NerfCoordinate*)positions_matrix.data(), 1, 0, extra_stride), extra_dims_gpu ); - if (m_visualized_dimension == -1) { - m_network->inference(stream, positions_matrix, rgbsigma_matrix); + if (visualized_dimension == -1) { + nerf_network.inference(stream, positions_matrix, rgbsigma_matrix); linear_kernel(compute_nerf_rgba, 0, stream, n_hit, (Array4f*)rgbsigma_matrix.data(), m_nerf.rgb_activation, m_nerf.density_activation, 0.01f, false); } else { - m_network->visualize_activation(stream, m_visualized_layer, m_visualized_dimension, positions_matrix, rgbsigma_matrix); + nerf_network.visualize_activation(stream, m_visualized_layer, visualized_dimension, positions_matrix, rgbsigma_matrix); } linear_kernel(shade_kernel_nerf, 0, stream, @@ -2369,8 +2373,8 @@ void Testbed::render_nerf(CudaRenderBuffer& render_buffer, const Vector2i& max_r rays_hit.payload, m_render_mode, m_nerf.training.linear_colors, - render_buffer.frame_buffer(), - render_buffer.depth_buffer() + render_buffer.frame_buffer, + render_buffer.depth_buffer ); return; } @@ -2382,8 +2386,8 @@ void Testbed::render_nerf(CudaRenderBuffer& render_buffer, const Vector2i& max_r rays_hit.payload, m_render_mode, m_nerf.training.linear_colors, - render_buffer.frame_buffer(), - render_buffer.depth_buffer() + render_buffer.frame_buffer, + render_buffer.depth_buffer ); if (render_mode == ERenderMode::Cost) { @@ -2673,7 +2677,20 @@ void Testbed::load_nerf(const fs::path& data_path) { throw std::runtime_error{"NeRF data path must either be a json file or a directory containing json files."}; } + const auto prev_aabb_scale = m_nerf.training.dataset.aabb_scale; + m_nerf.training.dataset = ngp::load_nerf(json_paths, m_nerf.sharpen); + + // Check if the NeRF network has been previously configured. + // If it has not, don't reset it. + bool previously_configured = !m_network_config["rgb_network"].is_null() + && !m_network_config["dir_encoding"].is_null(); + + if (m_nerf.training.dataset.aabb_scale != prev_aabb_scale && previously_configured) { + // The AABB scale affects network size indirectly. If it changed after loading, + // we need to reset the previously configured network to keep a consistent internal state. + reset_network(); + } } load_nerf_post(); @@ -2785,6 +2802,40 @@ void Testbed::update_density_grid_mean_and_bitfield(cudaStream_t stream) { for (uint32_t level = 1; level < NERF_CASCADES(); ++level) { linear_kernel(bitfield_max_pool, 0, stream, n_elements/64, m_nerf.get_density_grid_bitfield_mip(level-1), m_nerf.get_density_grid_bitfield_mip(level)); } + + set_all_devices_dirty(); +} + +__global__ void mark_density_grid_in_sphere_empty_kernel(const uint32_t n_elements, float* density_grid, Vector3f pos, float radius) { + const uint32_t i = threadIdx.x + blockIdx.x * blockDim.x; + if (i >= n_elements) return; + + // Random position within that cellq + uint32_t level = i / NERF_GRID_N_CELLS(); + uint32_t pos_idx = i % NERF_GRID_N_CELLS(); + + uint32_t x = tcnn::morton3D_invert(pos_idx>>0); + uint32_t y = tcnn::morton3D_invert(pos_idx>>1); + uint32_t z = tcnn::morton3D_invert(pos_idx>>2); + + float cell_radius = scalbnf(SQRT3(), level) / NERF_GRIDSIZE(); + Vector3f cell_pos = ((Vector3f{(float)x+0.5f, (float)y+0.5f, (float)z+0.5f}) / NERF_GRIDSIZE() - Vector3f::Constant(0.5f)) * scalbnf(1.0f, level) + Vector3f::Constant(0.5f); + + // Disable if the cell touches the sphere (conservatively, by bounding the cell with a sphere) + if ((pos - cell_pos).norm() < radius + cell_radius) { + density_grid[i] = -1.0f; + } +} + +void Testbed::mark_density_grid_in_sphere_empty(const Vector3f& pos, float radius, cudaStream_t stream) { + const uint32_t n_elements = NERF_GRID_N_CELLS() * (m_nerf.max_cascade + 1); + if (m_nerf.density_grid.size() != n_elements) { + return; + } + + linear_kernel(mark_density_grid_in_sphere_empty_kernel, 0, stream, n_elements, m_nerf.density_grid.data(), pos, radius); + + update_density_grid_mean_and_bitfield(stream); } void Testbed::NerfCounters::prepare_for_training_steps(cudaStream_t stream) { @@ -3167,8 +3218,7 @@ void Testbed::train_nerf_step(uint32_t target_batch_size, Testbed::NerfCounters& m_nerf.training.snap_to_pixel_centers, m_nerf.training.train_envmap, m_nerf.cone_angle_constant, - m_distortion.map->params(), - m_distortion.resolution, + m_distortion.view(), sample_focal_plane_proportional_to_error ? m_nerf.training.error_map.cdf_x_cond_y.data() : nullptr, sample_focal_plane_proportional_to_error ? m_nerf.training.error_map.cdf_y.data() : nullptr, sample_image_proportional_to_error ? m_nerf.training.error_map.cdf_img.data() : nullptr, @@ -3197,7 +3247,7 @@ void Testbed::train_nerf_step(uint32_t target_batch_size, Testbed::NerfCounters& ray_counter, LOSS_SCALE, padded_output_width, - m_envmap.envmap->params(), + m_envmap.view(), envmap_gradient, m_envmap.resolution, m_envmap.loss_type, diff --git a/src/testbed_sdf.cu b/src/testbed_sdf.cu index aced131525d5198518e14ed2a97ac75e180f6a6d..2332bec11f2b36773872657d6c941edab8f52a81 100644 --- a/src/testbed_sdf.cu +++ b/src/testbed_sdf.cu @@ -156,7 +156,7 @@ __global__ void advance_pos_kernel_sdf( BoundingBox aabb, float floor_y, const TriangleOctreeNode* __restrict__ octree_nodes, - int max_depth, + int max_octree_depth, float distance_scale, float maximum_distance, float k, @@ -181,8 +181,8 @@ __global__ void advance_pos_kernel_sdf( pos += distance * payload.dir; // Skip over regions not covered by the octree - if (octree_nodes && !TriangleOctree::contains(octree_nodes, max_depth, pos)) { - float octree_distance = (TriangleOctree::ray_intersect(octree_nodes, max_depth, pos, payload.dir) + 1e-6f); + if (octree_nodes && !TriangleOctree::contains(octree_nodes, max_octree_depth, pos)) { + float octree_distance = (TriangleOctree::ray_intersect(octree_nodes, max_octree_depth, pos, payload.dir) + 1e-6f); distance += octree_distance; pos += octree_distance * payload.dir; } @@ -242,7 +242,7 @@ __global__ void prepare_shadow_rays(const uint32_t n_elements, SdfPayload* __restrict__ payloads, BoundingBox aabb, const TriangleOctreeNode* __restrict__ octree_nodes, - int max_depth + int max_octree_depth ) { const uint32_t i = threadIdx.x + blockIdx.x * blockDim.x; if (i >= n_elements) return; @@ -256,21 +256,21 @@ __global__ void prepare_shadow_rays(const uint32_t n_elements, float t = fmaxf(aabb.ray_intersect(view_pos, dir).x() + 1e-6f, 0.0f); view_pos += t * dir; - if (octree_nodes && !TriangleOctree::contains(octree_nodes, max_depth, view_pos)) { - t = fmaxf(0.0f, TriangleOctree::ray_intersect(octree_nodes, max_depth, view_pos, dir) + 1e-6f); + if (octree_nodes && !TriangleOctree::contains(octree_nodes, max_octree_depth, view_pos)) { + t = fmaxf(0.0f, TriangleOctree::ray_intersect(octree_nodes, max_octree_depth, view_pos, dir) + 1e-6f); view_pos += t * dir; } positions[i] = view_pos; if (!aabb.contains(view_pos)) { - distances[i] = 10000.0f; + distances[i] = MAX_DEPTH(); payload.alive = false; min_visibility[i] = 1.0f; return; } - distances[i] = 10000.0f; + distances[i] = MAX_DEPTH(); payload.idx = i; payload.dir = dir; payload.n_steps = 0; @@ -322,13 +322,13 @@ __global__ void shade_kernel_sdf( // The normal in memory isn't normalized yet Vector3f normal = normals[i].normalized(); - Vector3f pos = positions[i]; bool floor = false; - if (pos.y() < floor_y+0.001f && payload.dir.y() < 0.f) { + if (pos.y() < floor_y + 0.001f && payload.dir.y() < 0.f) { normal = Vector3f(0.f, 1.f, 0.f); floor = true; } + Vector3f cam_pos = camera_matrix.col(3); Vector3f cam_fwd = camera_matrix.col(2); float ao = powf(0.92f, payload.n_steps * 0.5f) * (1.f / 0.92f); @@ -456,12 +456,12 @@ __global__ void scale_to_aabb_kernel(uint32_t n_elements, BoundingBox aabb, Vect inout[i] = aabb.min + inout[i].cwiseProduct(aabb.diag()); } -__global__ void compare_signs_kernel(uint32_t n_elements, const Vector3f *positions, const float *distances_ref, const float *distances_model, uint32_t *counters, const TriangleOctreeNode* octree_nodes, int max_depth) { +__global__ void compare_signs_kernel(uint32_t n_elements, const Vector3f *positions, const float *distances_ref, const float *distances_model, uint32_t *counters, const TriangleOctreeNode* octree_nodes, int max_octree_depth) { uint32_t i = blockIdx.x * blockDim.x + threadIdx.x; if (i >= n_elements) return; bool inside1 = distances_ref[i]<=0.f; bool inside2 = distances_model[i]<=0.f; - if (octree_nodes && !TriangleOctree::contains(octree_nodes, max_depth, positions[i])) { + if (octree_nodes && !TriangleOctree::contains(octree_nodes, max_octree_depth, positions[i])) { inside2=inside1; // assume, when using the octree, that the model is always correct outside the octree. atomicAdd(&counters[6],1); // outside the octree } else { @@ -506,12 +506,13 @@ __global__ void init_rays_with_payload_kernel_sdf( float near_distance, float plane_z, float aperture_size, - const float* __restrict__ envmap_data, - const Vector2i envmap_resolution, - Array4f* __restrict__ framebuffer, - float* __restrict__ depthbuffer, + Foveation foveation, + Buffer2DView<const Eigen::Array4f> envmap, + Array4f* __restrict__ frame_buffer, + float* __restrict__ depth_buffer, + Buffer2DView<const uint8_t> hidden_area_mask, const TriangleOctreeNode* __restrict__ octree_nodes = nullptr, - int max_depth = 0 + int max_octree_depth = 0 ) { uint32_t x = threadIdx.x + blockDim.x * blockIdx.x; uint32_t y = threadIdx.y + blockDim.y * blockIdx.y; @@ -526,30 +527,54 @@ __global__ void init_rays_with_payload_kernel_sdf( aperture_size = 0.0; } - Ray ray = pixel_to_ray(sample_index, {x, y}, resolution, focal_length, camera_matrix, screen_center, parallax_shift, snap_to_pixel_centers, near_distance, plane_z, aperture_size); + Ray ray = pixel_to_ray( + sample_index, + {x, y}, + resolution, + focal_length, + camera_matrix, + screen_center, + parallax_shift, + snap_to_pixel_centers, + near_distance, + plane_z, + aperture_size, + foveation, + hidden_area_mask + ); + + distances[idx] = MAX_DEPTH(); + depth_buffer[idx] = MAX_DEPTH(); + + SdfPayload& payload = payloads[idx]; - distances[idx] = 10000.0f; + if (!ray.is_valid()) { + payload.dir = ray.d; + payload.idx = idx; + payload.n_steps = 0; + payload.alive = false; + positions[idx] = ray.o; + return; + } if (plane_z < 0) { float n = ray.d.norm(); - SdfPayload& payload = payloads[idx]; payload.dir = (1.0f/n) * ray.d; payload.idx = idx; payload.n_steps = 0; payload.alive = false; positions[idx] = ray.o - plane_z * ray.d; - depthbuffer[idx] = -plane_z; + depth_buffer[idx] = -plane_z; return; } - depthbuffer[idx] = 1e10f; - ray.d = ray.d.normalized(); float t = max(aabb.ray_intersect(ray.o, ray.d).x(), 0.0f); - ray.o = ray.o + (t + 1e-6f) * ray.d; - if (octree_nodes && !TriangleOctree::contains(octree_nodes, max_depth, ray.o)) { - t = max(0.0f, TriangleOctree::ray_intersect(octree_nodes, max_depth, ray.o, ray.d)); + ray.advance(t + 1e-6f); + + if (octree_nodes && !TriangleOctree::contains(octree_nodes, max_octree_depth, ray.o)) { + t = max(0.0f, TriangleOctree::ray_intersect(octree_nodes, max_octree_depth, ray.o, ray.d)); if (ray.o.y() > floor_y && ray.d.y() < 0.f) { float floor_dist = -(ray.o.y() - floor_y) / ray.d.y(); if (floor_dist > 0.f) { @@ -557,16 +582,15 @@ __global__ void init_rays_with_payload_kernel_sdf( } } - ray.o = ray.o + (t + 1e-6f) * ray.d; + ray.advance(t + 1e-6f); } positions[idx] = ray.o; - if (envmap_data) { - framebuffer[idx] = read_envmap(envmap_data, envmap_resolution, ray.d); + if (envmap) { + frame_buffer[idx] = read_envmap(envmap, ray.d); } - SdfPayload& payload = payloads[idx]; payload.dir = ray.d; payload.idx = idx; payload.n_steps = 0; @@ -600,10 +624,11 @@ void Testbed::SphereTracer::init_rays_from_camera( float near_distance, float plane_z, float aperture_size, - const float* envmap_data, - const Vector2i& envmap_resolution, + const Foveation& foveation, + const Buffer2DView<const Array4f>& envmap, Array4f* frame_buffer, float* depth_buffer, + const Buffer2DView<const uint8_t>& hidden_area_mask, const TriangleOctree* octree, uint32_t n_octree_levels, cudaStream_t stream @@ -630,10 +655,11 @@ void Testbed::SphereTracer::init_rays_from_camera( near_distance, plane_z, aperture_size, - envmap_data, - envmap_resolution, + foveation, + envmap, frame_buffer, depth_buffer, + hidden_area_mask, octree ? octree->nodes_gpu() : nullptr, octree ? n_octree_levels : 0 ); @@ -840,14 +866,15 @@ void Testbed::FiniteDifferenceNormalsApproximator::normal(uint32_t n_elements, c } void Testbed::render_sdf( + cudaStream_t stream, const distance_fun_t& distance_function, const normals_fun_t& normals_function, - CudaRenderBuffer& render_buffer, - const Vector2i& max_res, + const CudaRenderBufferView& render_buffer, const Vector2f& focal_length, const Matrix<float, 3, 4>& camera_matrix, const Vector2f& screen_center, - cudaStream_t stream + const Foveation& foveation, + int visualized_dimension ) { float plane_z = m_slice_plane_z + m_scale; if (m_render_mode == ERenderMode::Slice) { @@ -865,8 +892,8 @@ void Testbed::render_sdf( BoundingBox sdf_bounding_box = m_aabb; sdf_bounding_box.inflate(m_sdf.zero_offset); tracer.init_rays_from_camera( - render_buffer.spp(), - render_buffer.in_resolution(), + render_buffer.spp, + render_buffer.resolution, focal_length, camera_matrix, screen_center, @@ -877,10 +904,11 @@ void Testbed::render_sdf( m_render_near_distance, plane_z, m_aperture_size, - m_envmap.envmap->inference_params(), - m_envmap.resolution, - render_buffer.frame_buffer(), - render_buffer.depth_buffer(), + foveation, + m_envmap.inference_view(), + render_buffer.frame_buffer, + render_buffer.depth_buffer, + render_buffer.hidden_area_mask ? render_buffer.hidden_area_mask->const_view() : Buffer2DView<const uint8_t>{}, octree_ptr, n_octree_levels, stream @@ -912,10 +940,11 @@ void Testbed::render_sdf( } else { n_hit = trace(tracer); } + RaysSdfSoa& rays_hit = m_render_mode == ERenderMode::Slice || gt_raytrace ? tracer.rays_init() : tracer.rays_hit(); if (m_render_mode == ERenderMode::Slice) { - if (m_visualized_dimension == -1) { + if (visualized_dimension == -1) { distance_function(n_hit, rays_hit.pos, rays_hit.distance, stream); extract_dimension_pos_neg_kernel<float><<<n_blocks_linear(n_hit*3), n_threads_linear, 0, stream>>>(n_hit*3, 0, 1, 3, rays_hit.distance, CM, (float*)rays_hit.normal); } else { @@ -924,11 +953,11 @@ void Testbed::render_sdf( GPUMatrix<float> positions_matrix((float*)rays_hit.pos, 3, n_elements); GPUMatrix<float> colors_matrix((float*)rays_hit.normal, 3, n_elements); - m_network->visualize_activation(stream, m_visualized_layer, m_visualized_dimension, positions_matrix, colors_matrix); + m_network->visualize_activation(stream, m_visualized_layer, visualized_dimension, positions_matrix, colors_matrix); } } - ERenderMode render_mode = (m_visualized_dimension > -1 || m_render_mode == ERenderMode::Slice) ? ERenderMode::EncodingVis : m_render_mode; + ERenderMode render_mode = (visualized_dimension > -1 || m_render_mode == ERenderMode::Slice) ? ERenderMode::EncodingVis : m_render_mode; if (render_mode == ERenderMode::Shade || render_mode == ERenderMode::Normals) { if (m_sdf.analytic_normals || gt_raytrace) { normals_function(n_hit, rays_hit.pos, rays_hit.normal, stream); @@ -964,6 +993,7 @@ void Testbed::render_sdf( octree_ptr ? octree_ptr->nodes_gpu() : nullptr, n_octree_levels ); + uint32_t n_hit_shadow = trace(shadow_tracer); auto& shadow_rays_hit = gt_raytrace ? shadow_tracer.rays_init() : shadow_tracer.rays_hit(); @@ -984,7 +1014,7 @@ void Testbed::render_sdf( GPUMatrix<float> positions_matrix((float*)rays_hit.pos, 3, n_elements); GPUMatrix<float> colors_matrix((float*)rays_hit.normal, 3, n_elements); - m_network->visualize_activation(stream, m_visualized_layer, m_visualized_dimension, positions_matrix, colors_matrix); + m_network->visualize_activation(stream, m_visualized_layer, visualized_dimension, positions_matrix, colors_matrix); } linear_kernel(shade_kernel_sdf, 0, stream, @@ -1000,8 +1030,8 @@ void Testbed::render_sdf( rays_hit.normal, rays_hit.distance, rays_hit.payload, - render_buffer.frame_buffer(), - render_buffer.depth_buffer() + render_buffer.frame_buffer, + render_buffer.depth_buffer ); if (render_mode == ERenderMode::Cost) { diff --git a/src/testbed_volume.cu b/src/testbed_volume.cu index 10306cb0ca6e0dbca0f59f32923c9c84df79b6a8..c8c7a09a7236b2740b0d6f5b5da5d1496a68673d 100644 --- a/src/testbed_volume.cu +++ b/src/testbed_volume.cu @@ -218,10 +218,11 @@ __global__ void init_rays_volume( float near_distance, float plane_z, float aperture_size, - const float* __restrict__ envmap_data, - const Vector2i envmap_resolution, - Array4f* __restrict__ framebuffer, - float* __restrict__ depthbuffer, + Foveation foveation, + Buffer2DView<const Array4f> envmap, + Array4f* __restrict__ frame_buffer, + float* __restrict__ depth_buffer, + Buffer2DView<const uint8_t> hidden_area_mask, default_rng_t rng, const uint8_t *bitgrid, float distance_scale, @@ -240,20 +241,42 @@ __global__ void init_rays_volume( if (plane_z < 0) { aperture_size = 0.0; } - Ray ray = pixel_to_ray(sample_index, {x, y}, resolution, focal_length, camera_matrix, screen_center, parallax_shift, snap_to_pixel_centers, near_distance, plane_z, aperture_size); + + Ray ray = pixel_to_ray( + sample_index, + {x, y}, + resolution, + focal_length, + camera_matrix, + screen_center, + parallax_shift, + snap_to_pixel_centers, + near_distance, + plane_z, + aperture_size, + foveation, + hidden_area_mask + ); + + if (!ray.is_valid()) { + depth_buffer[idx] = MAX_DEPTH(); + return; + } + ray.d = ray.d.normalized(); auto box_intersection = aabb.ray_intersect(ray.o, ray.d); float t = max(box_intersection.x(), 0.0f); - ray.o = ray.o + (t + 1e-6f) * ray.d; + ray.advance(t + 1e-6f); float scale = distance_scale / global_majorant; + if (t >= box_intersection.y() || !walk_to_next_event(rng, aabb, ray.o, ray.d, bitgrid, scale)) { - framebuffer[idx] = proc_envmap_render(ray.d, up_dir, sun_dir, sky_col); - depthbuffer[idx] = 1e10f; + frame_buffer[idx] = proc_envmap_render(ray.d, up_dir, sun_dir, sky_col); + depth_buffer[idx] = MAX_DEPTH(); } else { uint32_t dstidx = atomicAdd(pixel_counter, 1); positions[dstidx] = ray.o; payloads[dstidx] = {ray.d, Array4f::Constant(0.f), idx}; - depthbuffer[idx] = camera_matrix.col(2).dot(ray.o - camera_matrix.col(3)); + depth_buffer[idx] = camera_matrix.col(2).dot(ray.o - camera_matrix.col(3)); } } @@ -276,8 +299,7 @@ __global__ void volume_render_kernel_gt( float distance_scale, float albedo, float scattering, - Array4f* __restrict__ framebuffer, - float* __restrict__ depthbuffer + Array4f* __restrict__ frame_buffer ) { uint32_t idx = threadIdx.x + blockDim.x * blockIdx.x; if (idx>=n_pixels || idx>=pixel_counter_in[0]) @@ -325,7 +347,7 @@ __global__ void volume_render_kernel_gt( } else { col = proc_envmap_render(dir, up_dir, sun_dir, sky_col); } - framebuffer[pixidx] = col; + frame_buffer[pixidx] = col; } __global__ void volume_render_kernel_step( @@ -351,8 +373,7 @@ __global__ void volume_render_kernel_step( float distance_scale, float albedo, float scattering, - Array4f* __restrict__ framebuffer, - float* __restrict__ depthbuffer, + Array4f* __restrict__ frame_buffer, bool force_finish_ray ) { uint32_t idx = threadIdx.x + blockDim.x * blockIdx.x; @@ -382,23 +403,25 @@ __global__ void volume_render_kernel_step( payload.col.w() += alpha; if (payload.col.w() > 0.99f || !walk_to_next_event(rng, aabb, pos, dir, bitgrid, scale) || force_finish_ray) { payload.col += (1.f-payload.col.w()) * proc_envmap_render(dir, up_dir, sun_dir, sky_col); - framebuffer[pixidx] = payload.col; + frame_buffer[pixidx] = payload.col; return; } uint32_t dstidx = atomicAdd(pixel_counter_out, 1); - positions_out[dstidx]=pos; - payloads_out[dstidx]=payload; + positions_out[dstidx] = pos; + payloads_out[dstidx] = payload; } -void Testbed::render_volume(CudaRenderBuffer& render_buffer, +void Testbed::render_volume( + cudaStream_t stream, + const CudaRenderBufferView& render_buffer, const Vector2f& focal_length, const Matrix<float, 3, 4>& camera_matrix, const Vector2f& screen_center, - cudaStream_t stream + const Foveation& foveation ) { float plane_z = m_slice_plane_z + m_scale; float distance_scale = 1.f/std::max(m_volume.inv_distance_scale,0.01f); - auto res = render_buffer.in_resolution(); + auto res = render_buffer.resolution; size_t n_pixels = (size_t)res.x() * res.y(); for (uint32_t i=0;i<2;++i) { @@ -413,7 +436,7 @@ void Testbed::render_volume(CudaRenderBuffer& render_buffer, const dim3 threads = { 16, 8, 1 }; const dim3 blocks = { div_round_up((uint32_t)res.x(), threads.x), div_round_up((uint32_t)res.y(), threads.y), 1 }; init_rays_volume<<<blocks, threads, 0, stream>>>( - render_buffer.spp(), + render_buffer.spp, m_volume.pos[0].data(), m_volume.payload[0].data(), m_volume.hit_counter.data(), @@ -427,10 +450,11 @@ void Testbed::render_volume(CudaRenderBuffer& render_buffer, m_render_near_distance, plane_z, m_aperture_size, - m_envmap.envmap->inference_params(), - m_envmap.resolution, - render_buffer.frame_buffer(), - render_buffer.depth_buffer(), + foveation, + m_envmap.inference_view(), + render_buffer.frame_buffer, + render_buffer.depth_buffer, + render_buffer.hidden_area_mask ? render_buffer.hidden_area_mask->const_view() : Buffer2DView<const uint8_t>{}, m_rng, m_volume.bitgrid.data(), distance_scale, @@ -466,8 +490,7 @@ void Testbed::render_volume(CudaRenderBuffer& render_buffer, distance_scale, std::min(m_volume.albedo,0.995f), m_volume.scattering, - render_buffer.frame_buffer(), - render_buffer.depth_buffer() + render_buffer.frame_buffer ); m_rng.advance(n_pixels*256); } else { @@ -508,8 +531,7 @@ void Testbed::render_volume(CudaRenderBuffer& render_buffer, distance_scale, std::min(m_volume.albedo,0.995f), m_volume.scattering, - render_buffer.frame_buffer(), - render_buffer.depth_buffer(), + render_buffer.frame_buffer, (iter>=max_iter-1) ); m_rng.advance(n_pixels*256);