diff --git a/include/neural-graphics-primitives/dlss.h b/include/neural-graphics-primitives/dlss.h index 9f6dbdc0acdf3db3cca93c76a79ea3a1ead9c601..dbe86fccca1cd0d911535b340e8c2a05bd406a5b 100644 --- a/include/neural-graphics-primitives/dlss.h +++ b/include/neural-graphics-primitives/dlss.h @@ -26,6 +26,11 @@ class IDlss { public: virtual ~IDlss() {} + virtual void update_feature( + const Eigen::Vector2i& in_resolution, + bool is_hdr, + bool sharpen + ) = 0; virtual void run( const Eigen::Vector2i& in_resolution, bool is_hdr, @@ -42,8 +47,10 @@ public: virtual Eigen::Vector2i clamp_resolution(const Eigen::Vector2i& resolution) const = 0; virtual Eigen::Vector2i out_resolution() const = 0; + virtual Eigen::Vector2i max_out_resolution() const = 0; virtual bool is_hdr() const = 0; + virtual bool sharpen() const = 0; virtual EDlssQuality quality() const = 0; }; diff --git a/include/neural-graphics-primitives/render_buffer.h b/include/neural-graphics-primitives/render_buffer.h index 99756c6c559084a27d890d035ae323474e03ea28..73466ff3008d78863dea9a075240b5907d2d8552 100644 --- a/include/neural-graphics-primitives/render_buffer.h +++ b/include/neural-graphics-primitives/render_buffer.h @@ -95,11 +95,11 @@ public: GLuint texture(); - cudaSurfaceObject_t surface() override ; + cudaSurfaceObject_t surface() override; - cudaArray_t array() override ; + cudaArray_t array() override; - void blit_from_cuda_mapping() ; + void blit_from_cuda_mapping(); const std::string& texture_name() const { return m_texture_name; } @@ -162,7 +162,9 @@ public: CudaRenderBuffer(const std::shared_ptr<SurfaceProvider>& surf) : m_surface_provider{surf} {} CudaRenderBuffer(const CudaRenderBuffer& other) = delete; + CudaRenderBuffer& operator=(const CudaRenderBuffer& other) = delete; CudaRenderBuffer(CudaRenderBuffer&& other) = default; + CudaRenderBuffer& operator=(CudaRenderBuffer&& other) = default; cudaSurfaceObject_t surface() { return m_surface_provider->surface(); @@ -186,6 +188,10 @@ public: return m_spp; } + void set_spp(uint32_t value) { + m_spp = value; + } + Eigen::Array4f* frame_buffer() const { return m_frame_buffer.data(); } @@ -249,7 +255,7 @@ public: } } - void enable_dlss(const Eigen::Vector2i& out_res); + void enable_dlss(const Eigen::Vector2i& max_out_res); void disable_dlss(); void set_dlss_sharpening(float value) { m_dlss_sharpening = value; diff --git a/include/neural-graphics-primitives/testbed.h b/include/neural-graphics-primitives/testbed.h index 1503674304e944289d9534ca639cc6b38809c99f..9e40b6a3a16e805f65e80bdc15419a8b4ae7ffae 100644 --- a/include/neural-graphics-primitives/testbed.h +++ b/include/neural-graphics-primitives/testbed.h @@ -790,7 +790,7 @@ public: float m_render_near_distance = 0.0f; float m_slice_plane_z = 0.0f; bool m_floor_enable = false; - inline float get_floor_y() const { return m_floor_enable ? m_aabb.min.y()+0.001f : -10000.f; } + inline float get_floor_y() const { return m_floor_enable ? m_aabb.min.y() + 0.001f : -10000.f; } BoundingBox m_raw_aabb; BoundingBox m_aabb; BoundingBox m_render_aabb; diff --git a/src/dlss.cu b/src/dlss.cu index 9bb402c4d63f93f941fcf70993de48abd1da9995..627dbd986513a6c5575be575cd36e46ac22bb95d 100644 --- a/src/dlss.cu +++ b/src/dlss.cu @@ -417,7 +417,7 @@ void vulkan_and_ngx_init() { throw std::runtime_error{fmt::format("DLSS not available: {}", ngx_error_string(ngx_result))}; } - tlog::success() << "Initialized Vulkan and NGX on device #" << device_id << ": " << physical_device_properties.deviceName; + tlog::success() << "Initialized Vulkan and NGX on GPU #" << device_id << ": " << physical_device_properties.deviceName; } size_t dlss_allocated_bytes() { @@ -878,6 +878,10 @@ public: return m_specs.quality; } + Vector2i out_resolution() const { + return m_specs.out_resolution; + } + Vector2i clamp_resolution(const Vector2i& resolution) const { return m_specs.clamp_resolution(resolution); } @@ -895,21 +899,44 @@ private: class Dlss : public IDlss { public: - Dlss(const Eigen::Vector2i& out_resolution) + Dlss(const Eigen::Vector2i& max_out_resolution) : - m_out_resolution{out_resolution}, + m_max_out_resolution{max_out_resolution}, // Allocate all buffers at output resolution and use dynamic sub-rects // to use subsets of them. This avoids re-allocations when using DLSS // with dynamically changing input resolution. - m_frame_buffer{out_resolution, 4}, - m_depth_buffer{out_resolution, 1}, - m_mvec_buffer{out_resolution, 2}, + m_frame_buffer{max_out_resolution, 4}, + m_depth_buffer{max_out_resolution, 1}, + m_mvec_buffer{max_out_resolution, 2}, m_exposure_buffer{{1, 1}, 1}, - m_output_buffer{out_resolution, 4} + m_output_buffer{max_out_resolution, 4} { + // Various quality modes of DLSS for (int i = 0; i < (int)EDlssQuality::NumDlssQualitySettings; ++i) { try { - auto specs = dlss_feature_specs(out_resolution, (EDlssQuality)i); + auto specs = dlss_feature_specs(max_out_resolution, (EDlssQuality)i); + + // Only emplace the specs if the feature can be created in practice! + DlssFeature{specs, true, true}; + DlssFeature{specs, true, false}; + DlssFeature{specs, false, true}; + DlssFeature{specs, false, false}; + m_dlss_specs.emplace_back(specs); + } catch (...) {} + } + + // For super insane performance requirements (more than 3x upscaling) try UltraPerformance + // with reduced output resolutions for 4.5x, 6x, 9x. + std::vector<Vector2i> reduced_out_resolutions = { + max_out_resolution / 3 * 2, + max_out_resolution / 2, + max_out_resolution / 3, + // max_out_resolution / 4, + }; + + for (const auto& out_resolution : reduced_out_resolutions) { + try { + auto specs = dlss_feature_specs(out_resolution, EDlssQuality::UltraPerformance); // Only emplace the specs if the feature can be created in practice! DlssFeature{specs, true, true}; @@ -926,20 +953,14 @@ public: m_dlss_feature = nullptr; } - void run( - const Vector2i& in_resolution, - bool is_hdr, - float sharpening, - const Vector2f& jitter_offset, - bool shall_reset - ) override { + void update_feature(const Vector2i& in_resolution, bool is_hdr, bool sharpen) override { CUDA_CHECK_THROW(cudaDeviceSynchronize()); - EDlssQuality quality; + DlssFeatureSpecs specs; bool found = false; - for (const auto& specs : m_dlss_specs) { - if (specs.distance(in_resolution) == 0.0f) { - quality = specs.quality; + for (const auto& s : m_dlss_specs) { + if (s.distance(in_resolution) == 0.0f) { + specs = s; found = true; } } @@ -948,10 +969,21 @@ public: throw std::runtime_error{"Dlss::run called with invalid input resolution."}; } - bool sharpen = sharpening != 0.0f; - if (!m_dlss_feature || m_dlss_feature->is_hdr() != is_hdr || m_dlss_feature->sharpen() != sharpen || m_dlss_feature->quality() != quality) { - m_dlss_feature.reset(new DlssFeature{m_out_resolution, is_hdr, sharpen, quality}); + if (!m_dlss_feature || m_dlss_feature->is_hdr() != is_hdr || m_dlss_feature->sharpen() != sharpen || m_dlss_feature->quality() != specs.quality || m_dlss_feature->out_resolution() != specs.out_resolution) { + m_dlss_feature.reset(new DlssFeature{specs.out_resolution, is_hdr, sharpen, specs.quality}); } + } + + void run( + const Vector2i& in_resolution, + bool is_hdr, + float sharpening, + const Vector2f& jitter_offset, + bool shall_reset + ) override { + CUDA_CHECK_THROW(cudaDeviceSynchronize()); + + update_feature(in_resolution, is_hdr, sharpening != 0.0f); m_dlss_feature->run( in_resolution, @@ -1001,13 +1033,21 @@ public: } Vector2i out_resolution() const override { - return m_out_resolution; + return m_dlss_feature ? m_dlss_feature->out_resolution() : m_max_out_resolution; + } + + Vector2i max_out_resolution() const override { + return m_max_out_resolution; } bool is_hdr() const override { return m_dlss_feature && m_dlss_feature->is_hdr(); } + bool sharpen() const override { + return m_dlss_feature && m_dlss_feature->sharpen(); + } + EDlssQuality quality() const override { return m_dlss_feature ? m_dlss_feature->quality() : EDlssQuality::None; } @@ -1022,7 +1062,7 @@ private: VulkanTexture m_exposure_buffer; VulkanTexture m_output_buffer; - Vector2i m_out_resolution; + Vector2i m_max_out_resolution; }; std::shared_ptr<IDlss> dlss_init(const Eigen::Vector2i& out_resolution) { diff --git a/src/render_buffer.cu b/src/render_buffer.cu index a9a6d47724c53b6094782619bc3c725f8eb9ecbf..12ab7538f0ba475ec37684d1fbc85eb944ac6123 100644 --- a/src/render_buffer.cu +++ b/src/render_buffer.cu @@ -741,12 +741,15 @@ void CudaRenderBuffer::overlay_false_color(Vector2i training_resolution, bool to ); } -void CudaRenderBuffer::enable_dlss(const Eigen::Vector2i& out_res) { +void CudaRenderBuffer::enable_dlss(const Eigen::Vector2i& max_out_res) { #ifdef NGP_VULKAN - if (!m_dlss || m_dlss->out_resolution() != out_res) { - m_dlss = dlss_init(out_res); + if (!m_dlss || m_dlss->max_out_resolution() != max_out_res) { + m_dlss = dlss_init(max_out_res); + } + + if (m_dlss) { + resize(m_dlss->clamp_resolution(in_resolution())); } - resize(in_resolution()); #else throw std::runtime_error{"NGP was compiled without Vulkan/NGX/DLSS support."}; #endif diff --git a/src/testbed.cu b/src/testbed.cu index 886292c6bdc7d025053710f33c97ba2a704f01a9..6d8ef2c355774e7bc2b73ee9e438c7d1d064bcd4 100644 --- a/src/testbed.cu +++ b/src/testbed.cu @@ -209,7 +209,7 @@ void Testbed::set_visualized_dim(int dim) { } void Testbed::translate_camera(const Vector3f& rel) { - m_camera.col(3) += m_camera.block<3,3>(0,0) * rel * m_bounding_radius; + m_camera.col(3) += m_camera.block<3, 3>(0, 0) * rel * m_bounding_radius; reset_accumulation(true); } @@ -524,7 +524,7 @@ void Testbed::imgui() { } ImGui::SameLine(); ImGui::PushItemWidth(400.f); - ImGui::InputText("File", opt_extr_filename_buf, sizeof(opt_extr_filename_buf)); + ImGui::InputText("File##Extrinsics file path", opt_extr_filename_buf, sizeof(opt_extr_filename_buf)); ImGui::PopItemWidth(); ImGui::SameLine(); ImGui::Checkbox("Quaternion format", &export_extrinsics_in_quat_format); @@ -1019,7 +1019,7 @@ void Testbed::imgui() { } ImGui::SameLine(); ImGui::Checkbox("w/ Optimizer State", &m_include_optimizer_state_in_snapshot); - ImGui::InputText("File", snapshot_filename_buf, sizeof(snapshot_filename_buf)); + ImGui::InputText("File##Snapshot file path", snapshot_filename_buf, sizeof(snapshot_filename_buf)); } if (m_testbed_mode == ETestbedMode::Nerf || m_testbed_mode == ETestbedMode::Sdf) { @@ -1451,7 +1451,7 @@ void Testbed::mouse_drag(const Vector2f& rel, int button) { m_image.pos += rel; if (m_fps_camera) { - m_camera.block<3,3>(0,0) = rot * m_camera.block<3,3>(0,0); + m_camera.block<3, 3>(0, 0) = rot * m_camera.block<3, 3>(0, 0); } else { // Turntable auto old_look_at = look_at(); @@ -2082,7 +2082,6 @@ bool Testbed::frame() { #ifdef NGP_GUI if (m_render_window) { if (m_gui_redraw) { - // Gather histogram statistics of the encoding in use if (m_gather_histograms) { gather_histograms(); }