From 395e24374c996500d8a98b1cce2a6b57aba2f842 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20M=C3=BCller?= <tmueller@nvidia.com> Date: Fri, 3 Feb 2023 11:00:43 +0100 Subject: [PATCH] DLSS: use non-premultiplied alpha (doesn't completely remove jitter, but fixes black halos) --- src/render_buffer.cu | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/render_buffer.cu b/src/render_buffer.cu index aa0c10d..ab01d4d 100644 --- a/src/render_buffer.cu +++ b/src/render_buffer.cu @@ -534,7 +534,7 @@ __global__ void overlay_false_color_kernel(Vector2i resolution, Vector2i trainin surf2Dwrite(to_float4(color), surface, x * sizeof(float4), y); } -__global__ void tonemap_kernel(Vector2i resolution, float exposure, Array4f background_color, Array4f* accumulate_buffer, EColorSpace color_space, EColorSpace output_color_space, ETonemapCurve tonemap_curve, bool clamp_output_color, cudaSurfaceObject_t surface) { +__global__ void tonemap_kernel(Vector2i resolution, float exposure, Array4f background_color, Array4f* accumulate_buffer, EColorSpace color_space, EColorSpace output_color_space, ETonemapCurve tonemap_curve, bool clamp_output_color, bool unmultiply_alpha, cudaSurfaceObject_t surface) { uint32_t x = threadIdx.x + blockDim.x * blockIdx.x; uint32_t y = threadIdx.y + blockDim.y * blockIdx.y; @@ -556,6 +556,11 @@ __global__ void tonemap_kernel(Vector2i resolution, float exposure, Array4f back color.w() += weight; color.head<3>() = tonemap(color.head<3>(), Array3f::Constant(exposure), tonemap_curve, color_space, output_color_space); + + if (unmultiply_alpha && color.w() > 0.0f) { + color.head<3>() /= color.w(); + } + if (clamp_output_color) { color = color.cwiseMax(0.0f).cwiseMin(1.0f); } @@ -577,6 +582,12 @@ __global__ void dlss_splat_kernel( float4 color; surf2Dread(&color, dlss_surface, x * sizeof(float4), y); + + // DLSS operates on non-premultiplied alpha, so multiply it back in + color.x *= color.w; + color.y *= color.w; + color.z *= color.w; + surf2Dwrite(color, surface, x * sizeof(float4), y); } @@ -663,6 +674,7 @@ void CudaRenderBuffer::tonemap(float exposure, const Array4f& background_color, output_color_space, m_tonemap_curve, m_dlss && output_color_space == EColorSpace::SRGB, + (bool)m_dlss, // DLSS seems to perform best with non-premultiplied alpha (probably trained on such data) m_dlss ? m_dlss->frame() : surface() ); -- GitLab