diff --git a/CMakeLists.txt b/CMakeLists.txt index 0f089af33cc0eabc05f2d452d34c652db96223cc..cb503c8df9ff8a33d0f8702747966aa8d53707b1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -181,22 +181,10 @@ if (OPENMP_FOUND) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") endif() -find_package(OptiX) -# set OptiX_INSTALL_DIR via your environment if it's not found automatically - -if ((OptiX_FOUND OR OptiX_INCLUDE) AND NGP_BUILD_WITH_OPTIX) +if (NGP_BUILD_WITH_OPTIX) set(NGP_OPTIX ON) - list(APPEND NGP_INCLUDE_DIRECTORIES "${OptiX_INCLUDE}") + list(APPEND NGP_INCLUDE_DIRECTORIES "dependencies/optix") list(APPEND NGP_DEFINITIONS -DNGP_OPTIX) -else() - set(NGP_OPTIX OFF) - if (NGP_BUILD_WITH_OPTIX) - message(WARNING - "OptiX was not found. Neural graphics primitives will still compile " - "and run correctly. However, SDF training in 'raystab' and 'pathescape' " - "modes will be significantly slower." - ) - endif() endif() find_package(Python 3.7 COMPONENTS Interpreter Development) diff --git a/cmake/FindOptiX.cmake b/cmake/FindOptiX.cmake deleted file mode 100644 index 3a1b529f95be11f726cc1975af5c78321db087ca..0000000000000000000000000000000000000000 --- a/cmake/FindOptiX.cmake +++ /dev/null @@ -1,83 +0,0 @@ -# -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# - -# Locate the OptiX distribution. Search relative to the SDK first, then look in the system. - -# Our initial guess will be within the SDK. -if(DEFINED ENV{OptiX_INSTALL_DIR}) - set(OptiX_INCLUDE_DIRS "$ENV{OptiX_INSTALL_DIR}/include") -else() - if (MSVC) - file(GLOB OptiX_INCLUDE_DIRS "C:/ProgramData/NVIDIA Corporation/OptiX SDK */include") - else() - file(GLOB OptiX_INCLUDE_DIRS "/usr/local/NVIDIA-OptiX-SDK-*-linux64-x86_64/include") - endif() -endif() - -# The distribution contains only 64 bit libraries. Error when we have been mis-configured. -if (NOT CMAKE_SIZEOF_VOID_P EQUAL 8) - if(WIN32) - message(SEND_ERROR "Make sure when selecting the generator, you select one with Win64 or x64.") - endif() - message(FATAL_ERROR "OptiX only supports builds configured for 64 bits.") -endif() - -# Sort optix directories in descending natural order such that the highest version comes first. -list(SORT OptiX_INCLUDE_DIRS COMPARE NATURAL ORDER DESCENDING) -find_path(OptiX_INCLUDE - NAMES optix.h - PATHS ${OptiX_INCLUDE_DIRS} - NO_DEFAULT_PATH -) -find_path(OptiX_INCLUDE - NAMES optix.h -) - -# Check to make sure we found what we were looking for -function(OptiX_report_error error_message required component ) - if(DEFINED OptiX_FIND_REQUIRED_${component} AND NOT OptiX_FIND_REQUIRED_${component}) - set(required FALSE) - endif() - if(OptiX_FIND_REQUIRED AND required) - message(FATAL_ERROR "${error_message} Please locate before proceeding.") - else() - if(NOT OptiX_FIND_QUIETLY) - message(STATUS "${error_message}") - endif(NOT OptiX_FIND_QUIETLY) - endif() -endfunction() - -if (NOT OptiX_INCLUDE) - OptiX_report_error("OptiX headers (optix.h and friends) not found." TRUE headers ) - set(OptiX_FOUND FALSE) -else() - get_filename_component(OptiX_INSTALL_DIR ${OptiX_INCLUDE} DIRECTORY) - set(OptiX_FOUND TRUE) - message(NOTICE "-- Found OptiX: ${OptiX_INSTALL_DIR}") -endif() - diff --git a/dependencies/optix/internal/optix_7_device_impl.h b/dependencies/optix/internal/optix_7_device_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..5e803bb330b4f7a6ef6c66e1edff973b5bbc0f91 --- /dev/null +++ b/dependencies/optix/internal/optix_7_device_impl.h @@ -0,0 +1,1535 @@ +/* +* Copyright (c) 2021 NVIDIA Corporation. All rights reserved. +* +* NVIDIA Corporation and its licensors retain all intellectual property and proprietary +* rights in and to this software, related documentation and any modifications thereto. +* Any use, reproduction, disclosure or distribution of this software and related +* documentation without an express license agreement from NVIDIA Corporation is strictly +* prohibited. +* +* TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* +* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, +* INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +* PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY +* SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT +* LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF +* BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR +* INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF +* SUCH DAMAGES +*/ + +/** +* @file optix_7_device_impl.h +* @author NVIDIA Corporation +* @brief OptiX public API +* +* OptiX public API Reference - Device side implementation +*/ + +#if !defined( __OPTIX_INCLUDE_INTERNAL_HEADERS__ ) +#error("optix_7_device_impl.h is an internal header file and must not be used directly. Please use optix_device.h or optix.h instead.") +#endif + +#ifndef __optix_optix_7_device_impl_h__ +#define __optix_optix_7_device_impl_h__ + +#include "internal/optix_7_device_impl_exception.h" +#include "internal/optix_7_device_impl_transformations.h" + +#ifndef __CUDACC_RTC__ +#include <initializer_list> +#include <type_traits> +#endif + +namespace optix_internal { +template <typename...> +struct TypePack{}; +} // namespace optix_internal + +template <typename... Payload> +static __forceinline__ __device__ void optixTrace( OptixTraversableHandle handle, + float3 rayOrigin, + float3 rayDirection, + float tmin, + float tmax, + float rayTime, + OptixVisibilityMask visibilityMask, + unsigned int rayFlags, + unsigned int SBToffset, + unsigned int SBTstride, + unsigned int missSBTIndex, + Payload&... payload ) +{ + static_assert( sizeof...( Payload ) <= 32, "Only up to 32 payload values are allowed." ); + // std::is_same compares each type in the two TypePacks to make sure that all types are unsigned int. + // TypePack 1 unsigned int T0 T1 T2 ... Tn-1 Tn + // TypePack 2 T0 T1 T2 T3 ... Tn unsigned int +#ifndef __CUDACC_RTC__ + static_assert( std::is_same<optix_internal::TypePack<unsigned int, Payload...>, optix_internal::TypePack<Payload..., unsigned int>>::value, + "All payload parameters need to be unsigned int." ); +#endif + + float ox = rayOrigin.x, oy = rayOrigin.y, oz = rayOrigin.z; + float dx = rayDirection.x, dy = rayDirection.y, dz = rayDirection.z; + unsigned int p[33] = { 0, payload... }; + int payloadSize = (int)sizeof...( Payload ); + asm volatile( + "call" + "(%0,%1,%2,%3,%4,%5,%6,%7,%8,%9,%10,%11,%12,%13,%14,%15,%16,%17,%18,%19,%20,%21,%22,%23,%24,%25,%26,%27,%28,%" + "29,%30,%31)," + "_optix_trace_typed_32," + "(%32,%33,%34,%35,%36,%37,%38,%39,%40,%41,%42,%43,%44,%45,%46,%47,%48,%49,%50,%51,%52,%53,%54,%55,%56,%57,%58,%" + "59,%60,%61,%62,%63,%64,%65,%66,%67,%68,%69,%70,%71,%72,%73,%74,%75,%76,%77,%78,%79,%80);" + : "=r"( p[1] ), "=r"( p[2] ), "=r"( p[3] ), "=r"( p[4] ), "=r"( p[5] ), "=r"( p[6] ), "=r"( p[7] ), + "=r"( p[8] ), "=r"( p[9] ), "=r"( p[10] ), "=r"( p[11] ), "=r"( p[12] ), "=r"( p[13] ), "=r"( p[14] ), + "=r"( p[15] ), "=r"( p[16] ), "=r"( p[17] ), "=r"( p[18] ), "=r"( p[19] ), "=r"( p[20] ), "=r"( p[21] ), + "=r"( p[22] ), "=r"( p[23] ), "=r"( p[24] ), "=r"( p[25] ), "=r"( p[26] ), "=r"( p[27] ), "=r"( p[28] ), + "=r"( p[29] ), "=r"( p[30] ), "=r"( p[31] ), "=r"( p[32] ) + : "r"( 0 ), "l"( handle ), "f"( ox ), "f"( oy ), "f"( oz ), "f"( dx ), "f"( dy ), "f"( dz ), "f"( tmin ), + "f"( tmax ), "f"( rayTime ), "r"( visibilityMask ), "r"( rayFlags ), "r"( SBToffset ), "r"( SBTstride ), + "r"( missSBTIndex ), "r"( payloadSize ), "r"( p[1] ), "r"( p[2] ), "r"( p[3] ), "r"( p[4] ), "r"( p[5] ), + "r"( p[6] ), "r"( p[7] ), "r"( p[8] ), "r"( p[9] ), "r"( p[10] ), "r"( p[11] ), "r"( p[12] ), "r"( p[13] ), + "r"( p[14] ), "r"( p[15] ), "r"( p[16] ), "r"( p[17] ), "r"( p[18] ), "r"( p[19] ), "r"( p[20] ), + "r"( p[21] ), "r"( p[22] ), "r"( p[23] ), "r"( p[24] ), "r"( p[25] ), "r"( p[26] ), "r"( p[27] ), + "r"( p[28] ), "r"( p[29] ), "r"( p[30] ), "r"( p[31] ), "r"( p[32] ) + : ); + unsigned int index = 1; + (void)std::initializer_list<unsigned int>{ index, ( payload = p[index++] )... }; +} + +template <typename... Payload> +static __forceinline__ __device__ void optixTrace( OptixPayloadTypeID type, + OptixTraversableHandle handle, + float3 rayOrigin, + float3 rayDirection, + float tmin, + float tmax, + float rayTime, + OptixVisibilityMask visibilityMask, + unsigned int rayFlags, + unsigned int SBToffset, + unsigned int SBTstride, + unsigned int missSBTIndex, + Payload&... payload ) +{ + // std::is_same compares each type in the two TypePacks to make sure that all types are unsigned int. + // TypePack 1 unsigned int T0 T1 T2 ... Tn-1 Tn + // TypePack 2 T0 T1 T2 T3 ... Tn unsigned int + static_assert( sizeof...( Payload ) <= 32, "Only up to 32 payload values are allowed." ); + static_assert( std::is_same<optix_internal::TypePack<unsigned int, Payload...>, optix_internal::TypePack<Payload..., unsigned int>>::value, + "All payload parameters need to be unsigned int." ); + + float ox = rayOrigin.x, oy = rayOrigin.y, oz = rayOrigin.z; + float dx = rayDirection.x, dy = rayDirection.y, dz = rayDirection.z; + unsigned int p[33] = { 0, payload... }; + int payloadSize = (int)sizeof...( Payload ); + + asm volatile( + "call" + "(%0,%1,%2,%3,%4,%5,%6,%7,%8,%9,%10,%11,%12,%13,%14,%15,%16,%17,%18,%19,%20,%21,%22,%23,%24,%25,%26,%27,%28,%" + "29,%30,%31)," + "_optix_trace_typed_32," + "(%32,%33,%34,%35,%36,%37,%38,%39,%40,%41,%42,%43,%44,%45,%46,%47,%48,%49,%50,%51,%52,%53,%54,%55,%56,%57,%58,%" + "59,%60,%61,%62,%63,%64,%65,%66,%67,%68,%69,%70,%71,%72,%73,%74,%75,%76,%77,%78,%79,%80);" + : "=r"( p[1] ), "=r"( p[2] ), "=r"( p[3] ), "=r"( p[4] ), "=r"( p[5] ), "=r"( p[6] ), "=r"( p[7] ), + "=r"( p[8] ), "=r"( p[9] ), "=r"( p[10] ), "=r"( p[11] ), "=r"( p[12] ), "=r"( p[13] ), "=r"( p[14] ), + "=r"( p[15] ), "=r"( p[16] ), "=r"( p[17] ), "=r"( p[18] ), "=r"( p[19] ), "=r"( p[20] ), "=r"( p[21] ), + "=r"( p[22] ), "=r"( p[23] ), "=r"( p[24] ), "=r"( p[25] ), "=r"( p[26] ), "=r"( p[27] ), "=r"( p[28] ), + "=r"( p[29] ), "=r"( p[30] ), "=r"( p[31] ), "=r"( p[32] ) + : "r"( type ), "l"( handle ), "f"( ox ), "f"( oy ), "f"( oz ), "f"( dx ), "f"( dy ), "f"( dz ), "f"( tmin ), + "f"( tmax ), "f"( rayTime ), "r"( visibilityMask ), "r"( rayFlags ), "r"( SBToffset ), "r"( SBTstride ), + "r"( missSBTIndex ), "r"( payloadSize ), "r"( p[1] ), "r"( p[2] ), "r"( p[3] ), "r"( p[4] ), "r"( p[5] ), + "r"( p[6] ), "r"( p[7] ), "r"( p[8] ), "r"( p[9] ), "r"( p[10] ), "r"( p[11] ), "r"( p[12] ), "r"( p[13] ), + "r"( p[14] ), "r"( p[15] ), "r"( p[16] ), "r"( p[17] ), "r"( p[18] ), "r"( p[19] ), "r"( p[20] ), + "r"( p[21] ), "r"( p[22] ), "r"( p[23] ), "r"( p[24] ), "r"( p[25] ), "r"( p[26] ), "r"( p[27] ), + "r"( p[28] ), "r"( p[29] ), "r"( p[30] ), "r"( p[31] ), "r"( p[32] ) + : ); + unsigned int index = 1; + (void)std::initializer_list<unsigned int>{ index, ( payload = p[index++] )... }; +} + +static __forceinline__ __device__ void optixSetPayload_0( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 0 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_1( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 1 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_2( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 2 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_3( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 3 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_4( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 4 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_5( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 5 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_6( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 6 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_7( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 7 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_8( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 8 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_9( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 9 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_10( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 10 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_11( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 11 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_12( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 12 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_13( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 13 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_14( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 14 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_15( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 15 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_16( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 16 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_17( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 17 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_18( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 18 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_19( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 19 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_20( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 20 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_21( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 21 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_22( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 22 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_23( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 23 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_24( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 24 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_25( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 25 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_26( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 26 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_27( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 27 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_28( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 28 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_29( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 29 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_30( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 30 ), "r"( p ) : ); +} + +static __forceinline__ __device__ void optixSetPayload_31( unsigned int p ) +{ + asm volatile( "call _optix_set_payload, (%0, %1);" : : "r"( 31 ), "r"( p ) : ); +} + +static __forceinline__ __device__ unsigned int optixGetPayload_0() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 0 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_1() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 1 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_2() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 2 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_3() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 3 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_4() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 4 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_5() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 5 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_6() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 6 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_7() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 7 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_8() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 8 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_9() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 9 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_10() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 10 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_11() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 11 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_12() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 12 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_13() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 13 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_14() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 14 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_15() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 15 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_16() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 16 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_17() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 17 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_18() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 18 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_19() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 19 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_20() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 20 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_21() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 21 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_22() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 22 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_23() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 23 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_24() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 24 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_25() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 25 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_26() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 26 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_27() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 27 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_28() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 28 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_29() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 29 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_30() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 30 ) : ); + return result; +} + +static __forceinline__ __device__ unsigned int optixGetPayload_31() +{ + unsigned int result; + asm volatile( "call (%0), _optix_get_payload, (%1);" : "=r"( result ) : "r"( 31 ) : ); + return result; +} + +static __forceinline__ __device__ void optixSetPayloadTypes( unsigned int types ) +{ + asm volatile( "call _optix_set_payload_types, (%0);" : : "r"( types ) : ); +} + +static __forceinline__ __device__ unsigned int optixUndefinedValue() +{ + unsigned int u0; + asm( "call (%0), _optix_undef_value, ();" : "=r"( u0 ) : ); + return u0; +} + +static __forceinline__ __device__ float3 optixGetWorldRayOrigin() +{ + float f0, f1, f2; + asm( "call (%0), _optix_get_world_ray_origin_x, ();" : "=f"( f0 ) : ); + asm( "call (%0), _optix_get_world_ray_origin_y, ();" : "=f"( f1 ) : ); + asm( "call (%0), _optix_get_world_ray_origin_z, ();" : "=f"( f2 ) : ); + return make_float3( f0, f1, f2 ); +} + +static __forceinline__ __device__ float3 optixGetWorldRayDirection() +{ + float f0, f1, f2; + asm( "call (%0), _optix_get_world_ray_direction_x, ();" : "=f"( f0 ) : ); + asm( "call (%0), _optix_get_world_ray_direction_y, ();" : "=f"( f1 ) : ); + asm( "call (%0), _optix_get_world_ray_direction_z, ();" : "=f"( f2 ) : ); + return make_float3( f0, f1, f2 ); +} + +static __forceinline__ __device__ float3 optixGetObjectRayOrigin() +{ + float f0, f1, f2; + asm( "call (%0), _optix_get_object_ray_origin_x, ();" : "=f"( f0 ) : ); + asm( "call (%0), _optix_get_object_ray_origin_y, ();" : "=f"( f1 ) : ); + asm( "call (%0), _optix_get_object_ray_origin_z, ();" : "=f"( f2 ) : ); + return make_float3( f0, f1, f2 ); +} + +static __forceinline__ __device__ float3 optixGetObjectRayDirection() +{ + float f0, f1, f2; + asm( "call (%0), _optix_get_object_ray_direction_x, ();" : "=f"( f0 ) : ); + asm( "call (%0), _optix_get_object_ray_direction_y, ();" : "=f"( f1 ) : ); + asm( "call (%0), _optix_get_object_ray_direction_z, ();" : "=f"( f2 ) : ); + return make_float3( f0, f1, f2 ); +} + +static __forceinline__ __device__ float optixGetRayTmin() +{ + float f0; + asm( "call (%0), _optix_get_ray_tmin, ();" : "=f"( f0 ) : ); + return f0; +} + +static __forceinline__ __device__ float optixGetRayTmax() +{ + float f0; + asm( "call (%0), _optix_get_ray_tmax, ();" : "=f"( f0 ) : ); + return f0; +} + +static __forceinline__ __device__ float optixGetRayTime() +{ + float f0; + asm( "call (%0), _optix_get_ray_time, ();" : "=f"( f0 ) : ); + return f0; +} + +static __forceinline__ __device__ unsigned int optixGetRayFlags() +{ + unsigned int u0; + asm( "call (%0), _optix_get_ray_flags, ();" : "=r"( u0 ) : ); + return u0; +} + +static __forceinline__ __device__ unsigned int optixGetRayVisibilityMask() +{ + unsigned int u0; + asm( "call (%0), _optix_get_ray_visibility_mask, ();" : "=r"( u0 ) : ); + return u0; +} + +static __forceinline__ __device__ OptixTraversableHandle optixGetInstanceTraversableFromIAS( OptixTraversableHandle ias, + unsigned int instIdx ) +{ + unsigned long long handle; + asm( "call (%0), _optix_get_instance_traversable_from_ias, (%1, %2);" + : "=l"( handle ) : "l"( ias ), "r"( instIdx ) ); + return (OptixTraversableHandle)handle; +} + + +static __forceinline__ __device__ void optixGetTriangleVertexData( OptixTraversableHandle gas, + unsigned int primIdx, + unsigned int sbtGASIndex, + float time, + float3 data[3] ) +{ + asm( "call (%0, %1, %2, %3, %4, %5, %6, %7, %8), _optix_get_triangle_vertex_data, " + "(%9, %10, %11, %12);" + : "=f"( data[0].x ), "=f"( data[0].y ), "=f"( data[0].z ), "=f"( data[1].x ), "=f"( data[1].y ), + "=f"( data[1].z ), "=f"( data[2].x ), "=f"( data[2].y ), "=f"( data[2].z ) + : "l"( gas ), "r"( primIdx ), "r"( sbtGASIndex ), "f"( time ) + : ); +} + + +static __forceinline__ __device__ void optixGetLinearCurveVertexData( OptixTraversableHandle gas, + unsigned int primIdx, + unsigned int sbtGASIndex, + float time, + float4 data[2] ) +{ + asm( "call (%0, %1, %2, %3, %4, %5, %6, %7), _optix_get_linear_curve_vertex_data, " + "(%8, %9, %10, %11);" + : "=f"( data[0].x ), "=f"( data[0].y ), "=f"( data[0].z ), "=f"( data[0].w ), + "=f"( data[1].x ), "=f"( data[1].y ), "=f"( data[1].z ), "=f"( data[1].w ) + : "l"( gas ), "r"( primIdx ), "r"( sbtGASIndex ), "f"( time ) + : ); +} + +static __forceinline__ __device__ void optixGetQuadraticBSplineVertexData( OptixTraversableHandle gas, + unsigned int primIdx, + unsigned int sbtGASIndex, + float time, + float4 data[3] ) +{ + asm( "call (%0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11), _optix_get_quadratic_bspline_vertex_data, " + "(%12, %13, %14, %15);" + : "=f"( data[0].x ), "=f"( data[0].y ), "=f"( data[0].z ), "=f"( data[0].w ), + "=f"( data[1].x ), "=f"( data[1].y ), "=f"( data[1].z ), "=f"( data[1].w ), + "=f"( data[2].x ), "=f"( data[2].y ), "=f"( data[2].z ), "=f"( data[2].w ) + : "l"( gas ), "r"( primIdx ), "r"( sbtGASIndex ), "f"( time ) + : ); +} + +static __forceinline__ __device__ void optixGetCubicBSplineVertexData( OptixTraversableHandle gas, + unsigned int primIdx, + unsigned int sbtGASIndex, + float time, + float4 data[4] ) +{ + asm( "call (%0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15), " + "_optix_get_cubic_bspline_vertex_data, " + "(%16, %17, %18, %19);" + : "=f"( data[0].x ), "=f"( data[0].y ), "=f"( data[0].z ), "=f"( data[0].w ), + "=f"( data[1].x ), "=f"( data[1].y ), "=f"( data[1].z ), "=f"( data[1].w ), + "=f"( data[2].x ), "=f"( data[2].y ), "=f"( data[2].z ), "=f"( data[2].w ), + "=f"( data[3].x ), "=f"( data[3].y ), "=f"( data[3].z ), "=f"( data[3].w ) + : "l"( gas ), "r"( primIdx ), "r"( sbtGASIndex ), "f"( time ) + : ); +} + +static __forceinline__ __device__ void optixGetCatmullRomVertexData( OptixTraversableHandle gas, + unsigned int primIdx, + unsigned int sbtGASIndex, + float time, + float4 data[4] ) +{ + asm( "call (%0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15), " + "_optix_get_catmullrom_vertex_data, " + "(%16, %17, %18, %19);" + : "=f"( data[0].x ), "=f"( data[0].y ), "=f"( data[0].z ), "=f"( data[0].w ), "=f"( data[1].x ), + "=f"( data[1].y ), "=f"( data[1].z ), "=f"( data[1].w ), "=f"( data[2].x ), "=f"( data[2].y ), + "=f"( data[2].z ), "=f"( data[2].w ), "=f"( data[3].x ), "=f"( data[3].y ), "=f"( data[3].z ), "=f"( data[3].w ) + : "l"( gas ), "r"( primIdx ), "r"( sbtGASIndex ), "f"( time ) + : ); +} + +static __forceinline__ __device__ void optixGetSphereData( OptixTraversableHandle gas, + unsigned int primIdx, + unsigned int sbtGASIndex, + float time, + float4 data[1] ) +{ + asm( "call (%0, %1, %2, %3), " + "_optix_get_sphere_data, " + "(%4, %5, %6, %7);" + : "=f"( data[0].x ), "=f"( data[0].y ), "=f"( data[0].z ), "=f"( data[0].w ) + : "l"( gas ), "r"( primIdx ), "r"( sbtGASIndex ), "f"( time ) + : ); +} + +static __forceinline__ __device__ OptixTraversableHandle optixGetGASTraversableHandle() +{ + unsigned long long handle; + asm( "call (%0), _optix_get_gas_traversable_handle, ();" : "=l"( handle ) : ); + return (OptixTraversableHandle)handle; +} + +static __forceinline__ __device__ float optixGetGASMotionTimeBegin( OptixTraversableHandle handle ) +{ + float f0; + asm( "call (%0), _optix_get_gas_motion_time_begin, (%1);" : "=f"( f0 ) : "l"( handle ) : ); + return f0; +} + +static __forceinline__ __device__ float optixGetGASMotionTimeEnd( OptixTraversableHandle handle ) +{ + float f0; + asm( "call (%0), _optix_get_gas_motion_time_end, (%1);" : "=f"( f0 ) : "l"( handle ) : ); + return f0; +} + +static __forceinline__ __device__ unsigned int optixGetGASMotionStepCount( OptixTraversableHandle handle ) +{ + unsigned int u0; + asm( "call (%0), _optix_get_gas_motion_step_count, (%1);" : "=r"( u0 ) : "l"( handle ) : ); + return u0; +} + +static __forceinline__ __device__ void optixGetWorldToObjectTransformMatrix( float m[12] ) +{ + if( optixGetTransformListSize() == 0 ) + { + m[0] = 1.0f; + m[1] = 0.0f; + m[2] = 0.0f; + m[3] = 0.0f; + m[4] = 0.0f; + m[5] = 1.0f; + m[6] = 0.0f; + m[7] = 0.0f; + m[8] = 0.0f; + m[9] = 0.0f; + m[10] = 1.0f; + m[11] = 0.0f; + return; + } + + float4 m0, m1, m2; + optix_impl::optixGetWorldToObjectTransformMatrix( m0, m1, m2 ); + m[0] = m0.x; + m[1] = m0.y; + m[2] = m0.z; + m[3] = m0.w; + m[4] = m1.x; + m[5] = m1.y; + m[6] = m1.z; + m[7] = m1.w; + m[8] = m2.x; + m[9] = m2.y; + m[10] = m2.z; + m[11] = m2.w; +} + +static __forceinline__ __device__ void optixGetObjectToWorldTransformMatrix( float m[12] ) +{ + if( optixGetTransformListSize() == 0 ) + { + m[0] = 1.0f; + m[1] = 0.0f; + m[2] = 0.0f; + m[3] = 0.0f; + m[4] = 0.0f; + m[5] = 1.0f; + m[6] = 0.0f; + m[7] = 0.0f; + m[8] = 0.0f; + m[9] = 0.0f; + m[10] = 1.0f; + m[11] = 0.0f; + return; + } + + float4 m0, m1, m2; + optix_impl::optixGetObjectToWorldTransformMatrix( m0, m1, m2 ); + m[0] = m0.x; + m[1] = m0.y; + m[2] = m0.z; + m[3] = m0.w; + m[4] = m1.x; + m[5] = m1.y; + m[6] = m1.z; + m[7] = m1.w; + m[8] = m2.x; + m[9] = m2.y; + m[10] = m2.z; + m[11] = m2.w; +} + +static __forceinline__ __device__ float3 optixTransformPointFromWorldToObjectSpace( float3 point ) +{ + if( optixGetTransformListSize() == 0 ) + return point; + + float4 m0, m1, m2; + optix_impl::optixGetWorldToObjectTransformMatrix( m0, m1, m2 ); + return optix_impl::optixTransformPoint( m0, m1, m2, point ); +} + +static __forceinline__ __device__ float3 optixTransformVectorFromWorldToObjectSpace( float3 vec ) +{ + if( optixGetTransformListSize() == 0 ) + return vec; + + float4 m0, m1, m2; + optix_impl::optixGetWorldToObjectTransformMatrix( m0, m1, m2 ); + return optix_impl::optixTransformVector( m0, m1, m2, vec ); +} + +static __forceinline__ __device__ float3 optixTransformNormalFromWorldToObjectSpace( float3 normal ) +{ + if( optixGetTransformListSize() == 0 ) + return normal; + + float4 m0, m1, m2; + optix_impl::optixGetObjectToWorldTransformMatrix( m0, m1, m2 ); // inverse of optixGetWorldToObjectTransformMatrix() + return optix_impl::optixTransformNormal( m0, m1, m2, normal ); +} + +static __forceinline__ __device__ float3 optixTransformPointFromObjectToWorldSpace( float3 point ) +{ + if( optixGetTransformListSize() == 0 ) + return point; + + float4 m0, m1, m2; + optix_impl::optixGetObjectToWorldTransformMatrix( m0, m1, m2 ); + return optix_impl::optixTransformPoint( m0, m1, m2, point ); +} + +static __forceinline__ __device__ float3 optixTransformVectorFromObjectToWorldSpace( float3 vec ) +{ + if( optixGetTransformListSize() == 0 ) + return vec; + + float4 m0, m1, m2; + optix_impl::optixGetObjectToWorldTransformMatrix( m0, m1, m2 ); + return optix_impl::optixTransformVector( m0, m1, m2, vec ); +} + +static __forceinline__ __device__ float3 optixTransformNormalFromObjectToWorldSpace( float3 normal ) +{ + if( optixGetTransformListSize() == 0 ) + return normal; + + float4 m0, m1, m2; + optix_impl::optixGetWorldToObjectTransformMatrix( m0, m1, m2 ); // inverse of optixGetObjectToWorldTransformMatrix() + return optix_impl::optixTransformNormal( m0, m1, m2, normal ); +} + +static __forceinline__ __device__ unsigned int optixGetTransformListSize() +{ + unsigned int u0; + asm( "call (%0), _optix_get_transform_list_size, ();" : "=r"( u0 ) : ); + return u0; +} + +static __forceinline__ __device__ OptixTraversableHandle optixGetTransformListHandle( unsigned int index ) +{ + unsigned long long u0; + asm( "call (%0), _optix_get_transform_list_handle, (%1);" : "=l"( u0 ) : "r"( index ) : ); + return u0; +} + +static __forceinline__ __device__ OptixTransformType optixGetTransformTypeFromHandle( OptixTraversableHandle handle ) +{ + int i0; + asm( "call (%0), _optix_get_transform_type_from_handle, (%1);" : "=r"( i0 ) : "l"( handle ) : ); + return (OptixTransformType)i0; +} + +static __forceinline__ __device__ const OptixStaticTransform* optixGetStaticTransformFromHandle( OptixTraversableHandle handle ) +{ + unsigned long long ptr; + asm( "call (%0), _optix_get_static_transform_from_handle, (%1);" : "=l"( ptr ) : "l"( handle ) : ); + return (const OptixStaticTransform*)ptr; +} + +static __forceinline__ __device__ const OptixSRTMotionTransform* optixGetSRTMotionTransformFromHandle( OptixTraversableHandle handle ) +{ + unsigned long long ptr; + asm( "call (%0), _optix_get_srt_motion_transform_from_handle, (%1);" : "=l"( ptr ) : "l"( handle ) : ); + return (const OptixSRTMotionTransform*)ptr; +} + +static __forceinline__ __device__ const OptixMatrixMotionTransform* optixGetMatrixMotionTransformFromHandle( OptixTraversableHandle handle ) +{ + unsigned long long ptr; + asm( "call (%0), _optix_get_matrix_motion_transform_from_handle, (%1);" : "=l"( ptr ) : "l"( handle ) : ); + return (const OptixMatrixMotionTransform*)ptr; +} + +static __forceinline__ __device__ unsigned int optixGetInstanceIdFromHandle( OptixTraversableHandle handle ) +{ + int i0; + asm( "call (%0), _optix_get_instance_id_from_handle, (%1);" : "=r"( i0 ) : "l"( handle ) : ); + return i0; +} + +static __forceinline__ __device__ OptixTraversableHandle optixGetInstanceChildFromHandle( OptixTraversableHandle handle ) +{ + unsigned long long i0; + asm( "call (%0), _optix_get_instance_child_from_handle, (%1);" : "=l"( i0 ) : "l"( handle ) : ); + return (OptixTraversableHandle)i0; +} + +static __forceinline__ __device__ const float4* optixGetInstanceTransformFromHandle( OptixTraversableHandle handle ) +{ + unsigned long long ptr; + asm( "call (%0), _optix_get_instance_transform_from_handle, (%1);" : "=l"( ptr ) : "l"( handle ) : ); + return (const float4*)ptr; +} + +static __forceinline__ __device__ const float4* optixGetInstanceInverseTransformFromHandle( OptixTraversableHandle handle ) +{ + unsigned long long ptr; + asm( "call (%0), _optix_get_instance_inverse_transform_from_handle, (%1);" : "=l"( ptr ) : "l"( handle ) : ); + return (const float4*)ptr; +} + +static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind ) +{ + int ret; + asm volatile( + "call (%0), _optix_report_intersection_0" + ", (%1, %2);" + : "=r"( ret ) + : "f"( hitT ), "r"( hitKind ) + : ); + return ret; +} + +static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind, unsigned int a0 ) +{ + int ret; + asm volatile( + "call (%0), _optix_report_intersection_1" + ", (%1, %2, %3);" + : "=r"( ret ) + : "f"( hitT ), "r"( hitKind ), "r"( a0 ) + : ); + return ret; +} + +static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind, unsigned int a0, unsigned int a1 ) +{ + int ret; + asm volatile( + "call (%0), _optix_report_intersection_2" + ", (%1, %2, %3, %4);" + : "=r"( ret ) + : "f"( hitT ), "r"( hitKind ), "r"( a0 ), "r"( a1 ) + : ); + return ret; +} + +static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind, unsigned int a0, unsigned int a1, unsigned int a2 ) +{ + int ret; + asm volatile( + "call (%0), _optix_report_intersection_3" + ", (%1, %2, %3, %4, %5);" + : "=r"( ret ) + : "f"( hitT ), "r"( hitKind ), "r"( a0 ), "r"( a1 ), "r"( a2 ) + : ); + return ret; +} + +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3 ) +{ + int ret; + asm volatile( + "call (%0), _optix_report_intersection_4" + ", (%1, %2, %3, %4, %5, %6);" + : "=r"( ret ) + : "f"( hitT ), "r"( hitKind ), "r"( a0 ), "r"( a1 ), "r"( a2 ), "r"( a3 ) + : ); + return ret; +} + +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3, + unsigned int a4 ) +{ + int ret; + asm volatile( + "call (%0), _optix_report_intersection_5" + ", (%1, %2, %3, %4, %5, %6, %7);" + : "=r"( ret ) + : "f"( hitT ), "r"( hitKind ), "r"( a0 ), "r"( a1 ), "r"( a2 ), "r"( a3 ), "r"( a4 ) + : ); + return ret; +} + +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3, + unsigned int a4, + unsigned int a5 ) +{ + int ret; + asm volatile( + "call (%0), _optix_report_intersection_6" + ", (%1, %2, %3, %4, %5, %6, %7, %8);" + : "=r"( ret ) + : "f"( hitT ), "r"( hitKind ), "r"( a0 ), "r"( a1 ), "r"( a2 ), "r"( a3 ), "r"( a4 ), "r"( a5 ) + : ); + return ret; +} + +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3, + unsigned int a4, + unsigned int a5, + unsigned int a6 ) +{ + int ret; + asm volatile( + "call (%0), _optix_report_intersection_7" + ", (%1, %2, %3, %4, %5, %6, %7, %8, %9);" + : "=r"( ret ) + : "f"( hitT ), "r"( hitKind ), "r"( a0 ), "r"( a1 ), "r"( a2 ), "r"( a3 ), "r"( a4 ), "r"( a5 ), "r"( a6 ) + : ); + return ret; +} + +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3, + unsigned int a4, + unsigned int a5, + unsigned int a6, + unsigned int a7 ) +{ + int ret; + asm volatile( + "call (%0), _optix_report_intersection_8" + ", (%1, %2, %3, %4, %5, %6, %7, %8, %9, %10);" + : "=r"( ret ) + : "f"( hitT ), "r"( hitKind ), "r"( a0 ), "r"( a1 ), "r"( a2 ), "r"( a3 ), "r"( a4 ), "r"( a5 ), "r"( a6 ), "r"( a7 ) + : ); + return ret; +} + +#define OPTIX_DEFINE_optixGetAttribute_BODY( which ) \ + unsigned int ret; \ + asm( "call (%0), _optix_get_attribute_" #which ", ();" : "=r"( ret ) : ); \ + return ret; + +static __forceinline__ __device__ unsigned int optixGetAttribute_0() +{ + OPTIX_DEFINE_optixGetAttribute_BODY( 0 ); +} + +static __forceinline__ __device__ unsigned int optixGetAttribute_1() +{ + OPTIX_DEFINE_optixGetAttribute_BODY( 1 ); +} + +static __forceinline__ __device__ unsigned int optixGetAttribute_2() +{ + OPTIX_DEFINE_optixGetAttribute_BODY( 2 ); +} + +static __forceinline__ __device__ unsigned int optixGetAttribute_3() +{ + OPTIX_DEFINE_optixGetAttribute_BODY( 3 ); +} + +static __forceinline__ __device__ unsigned int optixGetAttribute_4() +{ + OPTIX_DEFINE_optixGetAttribute_BODY( 4 ); +} + +static __forceinline__ __device__ unsigned int optixGetAttribute_5() +{ + OPTIX_DEFINE_optixGetAttribute_BODY( 5 ); +} + +static __forceinline__ __device__ unsigned int optixGetAttribute_6() +{ + OPTIX_DEFINE_optixGetAttribute_BODY( 6 ); +} + +static __forceinline__ __device__ unsigned int optixGetAttribute_7() +{ + OPTIX_DEFINE_optixGetAttribute_BODY( 7 ); +} + +#undef OPTIX_DEFINE_optixGetAttribute_BODY + +static __forceinline__ __device__ void optixTerminateRay() +{ + asm volatile( "call _optix_terminate_ray, ();" ); +} + +static __forceinline__ __device__ void optixIgnoreIntersection() +{ + asm volatile( "call _optix_ignore_intersection, ();" ); +} + +static __forceinline__ __device__ unsigned int optixGetPrimitiveIndex() +{ + unsigned int u0; + asm( "call (%0), _optix_read_primitive_idx, ();" : "=r"( u0 ) : ); + return u0; +} + +static __forceinline__ __device__ unsigned int optixGetSbtGASIndex() +{ + unsigned int u0; + asm( "call (%0), _optix_read_sbt_gas_idx, ();" : "=r"( u0 ) : ); + return u0; +} + +static __forceinline__ __device__ unsigned int optixGetInstanceId() +{ + unsigned int u0; + asm( "call (%0), _optix_read_instance_id, ();" : "=r"( u0 ) : ); + return u0; +} + +static __forceinline__ __device__ unsigned int optixGetInstanceIndex() +{ + unsigned int u0; + asm( "call (%0), _optix_read_instance_idx, ();" : "=r"( u0 ) : ); + return u0; +} + +static __forceinline__ __device__ unsigned int optixGetHitKind() +{ + unsigned int u0; + asm( "call (%0), _optix_get_hit_kind, ();" : "=r"( u0 ) : ); + return u0; +} + +static __forceinline__ __device__ OptixPrimitiveType optixGetPrimitiveType(unsigned int hitKind) +{ + unsigned int u0; + asm( "call (%0), _optix_get_primitive_type_from_hit_kind, (%1);" : "=r"( u0 ) : "r"( hitKind ) ); + return (OptixPrimitiveType)u0; +} + +static __forceinline__ __device__ bool optixIsBackFaceHit( unsigned int hitKind ) +{ + unsigned int u0; + asm( "call (%0), _optix_get_backface_from_hit_kind, (%1);" : "=r"( u0 ) : "r"( hitKind ) ); + return (u0 == 0x1); +} + +static __forceinline__ __device__ bool optixIsFrontFaceHit( unsigned int hitKind ) +{ + return !optixIsBackFaceHit( hitKind ); +} + + +static __forceinline__ __device__ OptixPrimitiveType optixGetPrimitiveType() +{ + return optixGetPrimitiveType( optixGetHitKind() ); +} + +static __forceinline__ __device__ bool optixIsBackFaceHit() +{ + return optixIsBackFaceHit( optixGetHitKind() ); +} + +static __forceinline__ __device__ bool optixIsFrontFaceHit() +{ + return optixIsFrontFaceHit( optixGetHitKind() ); +} + +static __forceinline__ __device__ bool optixIsTriangleHit() +{ + return optixIsTriangleFrontFaceHit() || optixIsTriangleBackFaceHit(); +} + +static __forceinline__ __device__ bool optixIsTriangleFrontFaceHit() +{ + return optixGetHitKind() == OPTIX_HIT_KIND_TRIANGLE_FRONT_FACE; +} + +static __forceinline__ __device__ bool optixIsTriangleBackFaceHit() +{ + return optixGetHitKind() == OPTIX_HIT_KIND_TRIANGLE_BACK_FACE; +} + +static __forceinline__ __device__ float optixGetCurveParameter() +{ + return __int_as_float( optixGetAttribute_0() ); +} + +static __forceinline__ __device__ float2 optixGetTriangleBarycentrics() +{ + float f0, f1; + asm( "call (%0, %1), _optix_get_triangle_barycentrics, ();" : "=f"( f0 ), "=f"( f1 ) : ); + return make_float2( f0, f1 ); +} + +static __forceinline__ __device__ uint3 optixGetLaunchIndex() +{ + unsigned int u0, u1, u2; + asm( "call (%0), _optix_get_launch_index_x, ();" : "=r"( u0 ) : ); + asm( "call (%0), _optix_get_launch_index_y, ();" : "=r"( u1 ) : ); + asm( "call (%0), _optix_get_launch_index_z, ();" : "=r"( u2 ) : ); + return make_uint3( u0, u1, u2 ); +} + +static __forceinline__ __device__ uint3 optixGetLaunchDimensions() +{ + unsigned int u0, u1, u2; + asm( "call (%0), _optix_get_launch_dimension_x, ();" : "=r"( u0 ) : ); + asm( "call (%0), _optix_get_launch_dimension_y, ();" : "=r"( u1 ) : ); + asm( "call (%0), _optix_get_launch_dimension_z, ();" : "=r"( u2 ) : ); + return make_uint3( u0, u1, u2 ); +} + +static __forceinline__ __device__ CUdeviceptr optixGetSbtDataPointer() +{ + unsigned long long ptr; + asm( "call (%0), _optix_get_sbt_data_ptr_64, ();" : "=l"( ptr ) : ); + return (CUdeviceptr)ptr; +} + +static __forceinline__ __device__ void optixThrowException( int exceptionCode ) +{ + asm volatile( + "call _optix_throw_exception_0, (%0);" + : /* no return value */ + : "r"( exceptionCode ) + : ); +} + +static __forceinline__ __device__ void optixThrowException( int exceptionCode, unsigned int exceptionDetail0 ) +{ + asm volatile( + "call _optix_throw_exception_1, (%0, %1);" + : /* no return value */ + : "r"( exceptionCode ), "r"( exceptionDetail0 ) + : ); +} + +static __forceinline__ __device__ void optixThrowException( int exceptionCode, unsigned int exceptionDetail0, unsigned int exceptionDetail1 ) +{ + asm volatile( + "call _optix_throw_exception_2, (%0, %1, %2);" + : /* no return value */ + : "r"( exceptionCode ), "r"( exceptionDetail0 ), "r"( exceptionDetail1 ) + : ); +} + +static __forceinline__ __device__ void optixThrowException( int exceptionCode, unsigned int exceptionDetail0, unsigned int exceptionDetail1, unsigned int exceptionDetail2 ) +{ + asm volatile( + "call _optix_throw_exception_3, (%0, %1, %2, %3);" + : /* no return value */ + : "r"( exceptionCode ), "r"( exceptionDetail0 ), "r"( exceptionDetail1 ), "r"( exceptionDetail2 ) + : ); +} + +static __forceinline__ __device__ void optixThrowException( int exceptionCode, unsigned int exceptionDetail0, unsigned int exceptionDetail1, unsigned int exceptionDetail2, unsigned int exceptionDetail3 ) +{ + asm volatile( + "call _optix_throw_exception_4, (%0, %1, %2, %3, %4);" + : /* no return value */ + : "r"( exceptionCode ), "r"( exceptionDetail0 ), "r"( exceptionDetail1 ), "r"( exceptionDetail2 ), "r"( exceptionDetail3 ) + : ); +} + +static __forceinline__ __device__ void optixThrowException( int exceptionCode, unsigned int exceptionDetail0, unsigned int exceptionDetail1, unsigned int exceptionDetail2, unsigned int exceptionDetail3, unsigned int exceptionDetail4 ) +{ + asm volatile( + "call _optix_throw_exception_5, (%0, %1, %2, %3, %4, %5);" + : /* no return value */ + : "r"( exceptionCode ), "r"( exceptionDetail0 ), "r"( exceptionDetail1 ), "r"( exceptionDetail2 ), "r"( exceptionDetail3 ), "r"( exceptionDetail4 ) + : ); +} + +static __forceinline__ __device__ void optixThrowException( int exceptionCode, unsigned int exceptionDetail0, unsigned int exceptionDetail1, unsigned int exceptionDetail2, unsigned int exceptionDetail3, unsigned int exceptionDetail4, unsigned int exceptionDetail5 ) +{ + asm volatile( + "call _optix_throw_exception_6, (%0, %1, %2, %3, %4, %5, %6);" + : /* no return value */ + : "r"( exceptionCode ), "r"( exceptionDetail0 ), "r"( exceptionDetail1 ), "r"( exceptionDetail2 ), "r"( exceptionDetail3 ), "r"( exceptionDetail4 ), "r"( exceptionDetail5 ) + : ); +} + +static __forceinline__ __device__ void optixThrowException( int exceptionCode, unsigned int exceptionDetail0, unsigned int exceptionDetail1, unsigned int exceptionDetail2, unsigned int exceptionDetail3, unsigned int exceptionDetail4, unsigned int exceptionDetail5, unsigned int exceptionDetail6 ) +{ + asm volatile( + "call _optix_throw_exception_7, (%0, %1, %2, %3, %4, %5, %6, %7);" + : /* no return value */ + : "r"( exceptionCode ), "r"( exceptionDetail0 ), "r"( exceptionDetail1 ), "r"( exceptionDetail2 ), "r"( exceptionDetail3 ), "r"( exceptionDetail4 ), "r"( exceptionDetail5 ), "r"( exceptionDetail6 ) + : ); +} + +static __forceinline__ __device__ void optixThrowException( int exceptionCode, unsigned int exceptionDetail0, unsigned int exceptionDetail1, unsigned int exceptionDetail2, unsigned int exceptionDetail3, unsigned int exceptionDetail4, unsigned int exceptionDetail5, unsigned int exceptionDetail6, unsigned int exceptionDetail7 ) +{ + asm volatile( + "call _optix_throw_exception_8, (%0, %1, %2, %3, %4, %5, %6, %7, %8);" + : /* no return value */ + : "r"( exceptionCode ), "r"( exceptionDetail0 ), "r"( exceptionDetail1 ), "r"( exceptionDetail2 ), "r"( exceptionDetail3 ), "r"( exceptionDetail4 ), "r"( exceptionDetail5 ), "r"( exceptionDetail6 ), "r"( exceptionDetail7 ) + : ); +} + +static __forceinline__ __device__ int optixGetExceptionCode() +{ + int s0; + asm( "call (%0), _optix_get_exception_code, ();" : "=r"( s0 ) : ); + return s0; +} + +#define OPTIX_DEFINE_optixGetExceptionDetail_BODY( which ) \ + unsigned int ret; \ + asm( "call (%0), _optix_get_exception_detail_" #which ", ();" : "=r"( ret ) : ); \ + return ret; + +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_0() +{ + OPTIX_DEFINE_optixGetExceptionDetail_BODY( 0 ); +} + +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_1() +{ + OPTIX_DEFINE_optixGetExceptionDetail_BODY( 1 ); +} + +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_2() +{ + OPTIX_DEFINE_optixGetExceptionDetail_BODY( 2 ); +} + +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_3() +{ + OPTIX_DEFINE_optixGetExceptionDetail_BODY( 3 ); +} + +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_4() +{ + OPTIX_DEFINE_optixGetExceptionDetail_BODY( 4 ); +} + +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_5() +{ + OPTIX_DEFINE_optixGetExceptionDetail_BODY( 5 ); +} + +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_6() +{ + OPTIX_DEFINE_optixGetExceptionDetail_BODY( 6 ); +} + +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_7() +{ + OPTIX_DEFINE_optixGetExceptionDetail_BODY( 7 ); +} + +#undef OPTIX_DEFINE_optixGetExceptionDetail_BODY + +static __forceinline__ __device__ OptixTraversableHandle optixGetExceptionInvalidTraversable() +{ + unsigned long long handle; + asm( "call (%0), _optix_get_exception_invalid_traversable, ();" : "=l"( handle ) : ); + return (OptixTraversableHandle)handle; +} + +static __forceinline__ __device__ int optixGetExceptionInvalidSbtOffset() +{ + int s0; + asm( "call (%0), _optix_get_exception_invalid_sbt_offset, ();" : "=r"( s0 ) : ); + return s0; +} + +static __forceinline__ __device__ OptixInvalidRayExceptionDetails optixGetExceptionInvalidRay() +{ + float rayOriginX, rayOriginY, rayOriginZ, rayDirectionX, rayDirectionY, rayDirectionZ, tmin, tmax, rayTime; + asm( "call (%0, %1, %2, %3, %4, %5, %6, %7, %8), _optix_get_exception_invalid_ray, ();" + : "=f"( rayOriginX ), "=f"( rayOriginY ), "=f"( rayOriginZ ), "=f"( rayDirectionX ), "=f"( rayDirectionY ), + "=f"( rayDirectionZ ), "=f"( tmin ), "=f"( tmax ), "=f"( rayTime ) + : ); + OptixInvalidRayExceptionDetails ray; + ray.origin = make_float3( rayOriginX, rayOriginY, rayOriginZ ); + ray.direction = make_float3( rayDirectionX, rayDirectionY, rayDirectionZ ); + ray.tmin = tmin; + ray.tmax = tmax; + ray.time = rayTime; + return ray; +} + +static __forceinline__ __device__ OptixParameterMismatchExceptionDetails optixGetExceptionParameterMismatch() +{ + unsigned int expected, actual, sbtIdx; + unsigned long long calleeName; + asm( + "call (%0, %1, %2, %3), _optix_get_exception_parameter_mismatch, ();" + : "=r"(expected), "=r"(actual), "=r"(sbtIdx), "=l"(calleeName) : ); + OptixParameterMismatchExceptionDetails details; + details.expectedParameterCount = expected; + details.passedArgumentCount = actual; + details.sbtIndex = sbtIdx; + details.callableName = (char*)calleeName; + return details; +} + +static __forceinline__ __device__ char* optixGetExceptionLineInfo() +{ + unsigned long long ptr; + asm( "call (%0), _optix_get_exception_line_info, ();" : "=l"(ptr) : ); + return (char*)ptr; +} + +template <typename ReturnT, typename... ArgTypes> +static __forceinline__ __device__ ReturnT optixDirectCall( unsigned int sbtIndex, ArgTypes... args ) +{ + unsigned long long func; + asm( "call (%0), _optix_call_direct_callable,(%1);" : "=l"( func ) : "r"( sbtIndex ) : ); + using funcT = ReturnT ( * )( ArgTypes... ); + funcT call = ( funcT )( func ); + return call( args... ); +} + +template <typename ReturnT, typename... ArgTypes> +static __forceinline__ __device__ ReturnT optixContinuationCall( unsigned int sbtIndex, ArgTypes... args ) +{ + unsigned long long func; + asm( "call (%0), _optix_call_continuation_callable,(%1);" : "=l"( func ) : "r"( sbtIndex ) : ); + using funcT = ReturnT ( * )( ArgTypes... ); + funcT call = ( funcT )( func ); + return call( args... ); +} +#endif + +static __forceinline__ __device__ uint4 optixTexFootprint2D( unsigned long long tex, unsigned int texInfo, float x, float y, unsigned int* singleMipLevel ) +{ + uint4 result; + unsigned long long resultPtr = reinterpret_cast<unsigned long long>( &result ); + unsigned long long singleMipLevelPtr = reinterpret_cast<unsigned long long>( singleMipLevel ); + // Cast float args to integers, because the intrinics take .b32 arguments when compiled to PTX. + asm volatile( + "call _optix_tex_footprint_2d_v2" + ", (%0, %1, %2, %3, %4, %5);" + : + : "l"( tex ), "r"( texInfo ), "r"( __float_as_uint( x ) ), "r"( __float_as_uint( y ) ), + "l"( singleMipLevelPtr ), "l"( resultPtr ) + : ); + return result; +} + +static __forceinline__ __device__ uint4 optixTexFootprint2DGrad( unsigned long long tex, + unsigned int texInfo, + float x, + float y, + float dPdx_x, + float dPdx_y, + float dPdy_x, + float dPdy_y, + bool coarse, + unsigned int* singleMipLevel ) +{ + uint4 result; + unsigned long long resultPtr = reinterpret_cast<unsigned long long>( &result ); + unsigned long long singleMipLevelPtr = reinterpret_cast<unsigned long long>( singleMipLevel ); + // Cast float args to integers, because the intrinics take .b32 arguments when compiled to PTX. + asm volatile( + "call _optix_tex_footprint_2d_grad_v2" + ", (%0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10);" + : + : "l"( tex ), "r"( texInfo ), "r"( __float_as_uint( x ) ), "r"( __float_as_uint( y ) ), + "r"( __float_as_uint( dPdx_x ) ), "r"( __float_as_uint( dPdx_y ) ), "r"( __float_as_uint( dPdy_x ) ), + "r"( __float_as_uint( dPdy_y ) ), "r"( static_cast<unsigned int>( coarse ) ), "l"( singleMipLevelPtr ), "l"( resultPtr ) + : ); + + return result; +} + +static __forceinline__ __device__ uint4 +optixTexFootprint2DLod( unsigned long long tex, unsigned int texInfo, float x, float y, float level, bool coarse, unsigned int* singleMipLevel ) +{ + uint4 result; + unsigned long long resultPtr = reinterpret_cast<unsigned long long>( &result ); + unsigned long long singleMipLevelPtr = reinterpret_cast<unsigned long long>( singleMipLevel ); + // Cast float args to integers, because the intrinics take .b32 arguments when compiled to PTX. + asm volatile( + "call _optix_tex_footprint_2d_lod_v2" + ", (%0, %1, %2, %3, %4, %5, %6, %7);" + : + : "l"( tex ), "r"( texInfo ), "r"( __float_as_uint( x ) ), "r"( __float_as_uint( y ) ), + "r"( __float_as_uint( level ) ), "r"( static_cast<unsigned int>( coarse ) ), "l"( singleMipLevelPtr ), "l"( resultPtr ) + : ); + return result; +} diff --git a/dependencies/optix/internal/optix_7_device_impl_exception.h b/dependencies/optix/internal/optix_7_device_impl_exception.h new file mode 100644 index 0000000000000000000000000000000000000000..c398ef5fe517f364d29fd4506d6c1897e879eb03 --- /dev/null +++ b/dependencies/optix/internal/optix_7_device_impl_exception.h @@ -0,0 +1,295 @@ +/* +* Copyright (c) 2021 NVIDIA Corporation. All rights reserved. +* +* NVIDIA Corporation and its licensors retain all intellectual property and proprietary +* rights in and to this software, related documentation and any modifications thereto. +* Any use, reproduction, disclosure or distribution of this software and related +* documentation without an express license agreement from NVIDIA Corporation is strictly +* prohibited. +* +* TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* +* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, +* INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +* PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY +* SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT +* LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF +* BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR +* INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF +* SUCH DAMAGES +*/ + +/** +* @file optix_7_device_impl_exception.h +* @author NVIDIA Corporation +* @brief OptiX public API +* +* OptiX public API Reference - Device side implementation for exception helper function. +*/ + +#if !defined( __OPTIX_INCLUDE_INTERNAL_HEADERS__ ) +#error("optix_7_device_impl_exception.h is an internal header file and must not be used directly. Please use optix_device.h or optix.h instead.") +#endif + +#ifndef __optix_optix_7_device_impl_exception_h__ +#define __optix_optix_7_device_impl_exception_h__ + +#if !defined(__CUDACC_RTC__) +#include <cstdio> /* for printf */ +#endif + +namespace optix_impl { + + static __forceinline__ __device__ void optixDumpStaticTransformFromHandle( OptixTraversableHandle handle ) + { + const OptixStaticTransform* traversable = optixGetStaticTransformFromHandle( handle ); + if( traversable ) + { + const uint3 index = optixGetLaunchIndex(); + printf( "(%4i,%4i,%4i) OptixStaticTransform@%p = {\n" + " child = %p,\n" + " transform = { %f,%f,%f,%f,\n" + " %f,%f,%f,%f,\n" + " %f,%f,%f,%f } }\n", + index.x,index.y,index.z, + traversable, + (void*)traversable->child, + traversable->transform[0], traversable->transform[1], traversable->transform[2], traversable->transform[3], + traversable->transform[4], traversable->transform[5], traversable->transform[6], traversable->transform[7], + traversable->transform[8], traversable->transform[9], traversable->transform[10], traversable->transform[11] ); + } + } + + static __forceinline__ __device__ void optixDumpMotionMatrixTransformFromHandle( OptixTraversableHandle handle ) + { + const OptixMatrixMotionTransform* traversable = optixGetMatrixMotionTransformFromHandle( handle ); + if( traversable ) + { + const uint3 index = optixGetLaunchIndex(); + printf( "(%4i,%4i,%4i) OptixMatrixMotionTransform@%p = {\n" + " child = %p,\n" + " motionOptions = { numKeys = %i, flags = %i, timeBegin = %f, timeEnd = %f },\n" + " transform = { { %f,%f,%f,%f,\n" + " %f,%f,%f,%f,\n" + " %f,%f,%f,%f }, ... }\n", + index.x,index.y,index.z, + traversable, + (void*)traversable->child, + (int)traversable->motionOptions.numKeys, (int)traversable->motionOptions.flags, traversable->motionOptions.timeBegin, traversable->motionOptions.timeEnd, + traversable->transform[0][0], traversable->transform[0][1], traversable->transform[0][2], traversable->transform[0][3], + traversable->transform[0][4], traversable->transform[0][5], traversable->transform[0][6], traversable->transform[0][7], + traversable->transform[0][8], traversable->transform[0][9], traversable->transform[0][10], traversable->transform[0][11] ); + } + } + + static __forceinline__ __device__ void optixDumpSrtMatrixTransformFromHandle( OptixTraversableHandle handle ) + { + const OptixSRTMotionTransform* traversable = optixGetSRTMotionTransformFromHandle( handle ); + if( traversable ) + { + const uint3 index = optixGetLaunchIndex(); + printf( "(%4i,%4i,%4i) OptixSRTMotionTransform@%p = {\n" + " child = %p,\n" + " motionOptions = { numKeys = %i, flags = %i, timeBegin = %f, timeEnd = %f },\n" + " srtData = { { sx = %f, a = %f, b = %f, pvx = %f,\n" + " sy = %f, c = %f, pvy = %f, sz = %f,\n" + " pvz = %f, qx = %f, qy = %f, qz = %f,\n" + " qw = %f, tx = %f, ty = %f, tz = %f }, ... }\n", + index.x,index.y,index.z, + traversable, + (void*)traversable->child, + (int)traversable->motionOptions.numKeys, (int)traversable->motionOptions.flags, traversable->motionOptions.timeBegin, traversable->motionOptions.timeEnd, + traversable->srtData[0].sx, traversable->srtData[0].a, traversable->srtData[0].b, traversable->srtData[0].pvx, + traversable->srtData[0].sy, traversable->srtData[0].c, traversable->srtData[0].pvy,traversable->srtData[0].sz, + traversable->srtData[0].pvz,traversable->srtData[0].qx,traversable->srtData[0].qy, traversable->srtData[0].qz, + traversable->srtData[0].qw, traversable->srtData[0].tx,traversable->srtData[0].ty, traversable->srtData[0].tz ); + } + } + + static __forceinline__ __device__ void optixDumpInstanceFromHandle( OptixTraversableHandle handle ) + { + if( optixGetTransformTypeFromHandle( handle ) == OPTIX_TRANSFORM_TYPE_INSTANCE ) + { + unsigned int instanceId = optixGetInstanceIdFromHandle( handle ); + const float4* transform = optixGetInstanceTransformFromHandle( handle ); + + const uint3 index = optixGetLaunchIndex(); + printf( "(%4i,%4i,%4i) OptixInstance = {\n" + " instanceId = %i,\n" + " transform = { %f,%f,%f,%f,\n" + " %f,%f,%f,%f,\n" + " %f,%f,%f,%f } }\n", + index.x,index.y,index.z, + instanceId, + transform[0].x, transform[0].y, transform[0].z, transform[0].w, + transform[1].x, transform[1].y, transform[1].z, transform[1].w, + transform[2].x, transform[2].y, transform[2].z, transform[2].w ); + } + } + + static __forceinline__ __device__ void optixDumpTransform( OptixTraversableHandle handle ) + { + const OptixTransformType type = optixGetTransformTypeFromHandle( handle ); + const uint3 index = optixGetLaunchIndex(); + + switch( type ) + { + case OPTIX_TRANSFORM_TYPE_NONE: + break; + case OPTIX_TRANSFORM_TYPE_STATIC_TRANSFORM: + optixDumpStaticTransformFromHandle( handle ); + break; + case OPTIX_TRANSFORM_TYPE_MATRIX_MOTION_TRANSFORM: + optixDumpMotionMatrixTransformFromHandle( handle ); + break; + case OPTIX_TRANSFORM_TYPE_SRT_MOTION_TRANSFORM: + optixDumpSrtMatrixTransformFromHandle( handle ); + break; + case OPTIX_TRANSFORM_TYPE_INSTANCE: + optixDumpInstanceFromHandle( handle ); + break; + default: + break; + } + } + + static __forceinline__ __device__ void optixDumpTransformList() + { + const int tlistSize = optixGetTransformListSize(); + const uint3 index = optixGetLaunchIndex(); + + printf("(%4i,%4i,%4i) transform list of size %i:\n", index.x,index.y,index.z, tlistSize); + + for( unsigned int i = 0 ; i < tlistSize ; ++i ) + { + OptixTraversableHandle handle = optixGetTransformListHandle( i ); + printf("(%4i,%4i,%4i) transform[%i] = %p\n", index.x, index.y, index.z, i, (void*)handle); + optixDumpTransform(handle); + } + } + + static __forceinline__ __device__ void optixDumpExceptionDetails() + { + bool dumpTlist = false; + const int exceptionCode = optixGetExceptionCode(); + const uint3 index = optixGetLaunchIndex(); + + if( exceptionCode == OPTIX_EXCEPTION_CODE_STACK_OVERFLOW ) + { + printf("(%4i,%4i,%4i) error: stack overflow\n", index.x,index.y,index.z); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_TRACE_DEPTH_EXCEEDED ) + { + printf("(%4i,%4i,%4i) error: trace depth exceeded\n", index.x,index.y,index.z); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_TRAVERSAL_DEPTH_EXCEEDED ) + { + printf("(%4i,%4i,%4i) error: traversal depth exceeded\n", index.x,index.y,index.z); + dumpTlist = true; + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_TRAVERSABLE ) + { + OptixTraversableHandle handle = optixGetExceptionInvalidTraversable(); + printf("(%4i,%4i,%4i) error: invalid traversable %p\n", index.x,index.y,index.z, (void*)handle); + dumpTlist = true; + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_MISS_SBT ) + { + int sbtOffset = optixGetExceptionInvalidSbtOffset(); + printf("(%4i,%4i,%4i) error: invalid miss sbt of %i\n", index.x,index.y,index.z, sbtOffset); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_HIT_SBT ) + { + int sbtOffset = optixGetExceptionInvalidSbtOffset(); + printf("(%4i,%4i,%4i) error: invalid hit sbt of %i at primitive with gas sbt index %i\n", index.x,index.y,index.z, sbtOffset, optixGetSbtGASIndex() ); + dumpTlist = true; + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_UNSUPPORTED_PRIMITIVE_TYPE ) + { + dumpTlist = true; + printf( "(%4i,%4i,%4i) error: shader encountered unsupported builtin type\n" + " call location: %s\n", index.x, index.y, index.z, optixGetExceptionLineInfo() ); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_INVALID_RAY ) + { + OptixInvalidRayExceptionDetails ray = optixGetExceptionInvalidRay(); + printf( "(%4i,%4i,%4i) error: encountered an invalid ray:\n", index.x, index.y, index.z ); + printf( + " origin: [%f, %f, %f]\n" + " direction: [%f, %f, %f]\n" + " tmin: %f\n" + " tmax: %f\n" + " rayTime: %f\n" + " call location: %s\n", + ray.origin.x, ray.origin.y, ray.origin.z, ray.direction.x, ray.direction.y, + ray.direction.z, ray.tmin, ray.tmax, ray.time, optixGetExceptionLineInfo() ); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH ) + { + OptixParameterMismatchExceptionDetails details = optixGetExceptionParameterMismatch(); + printf( "(%4i,%4i,%4i) error: parameter mismatch in callable call.\n", index.x, index.y, index.z ); + printf( + " passed packed arguments: %u 32 Bit values\n" + " expected packed parameters: %u 32 Bit values\n" + " SBT index: %u\n" + " called function: %s\n" + " call location: %s\n", + details.passedArgumentCount, details.expectedParameterCount, details.sbtIndex, + details.callableName, optixGetExceptionLineInfo() ); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_BUILTIN_IS_MISMATCH ) + { + dumpTlist = true; + printf("(%4i,%4i,%4i) error: mismatch between builtin IS shader and build input\n" + " call location: %s\n", index.x,index.y,index.z, optixGetExceptionLineInfo() ); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_CALLABLE_INVALID_SBT ) + { + int sbtOffset = optixGetExceptionInvalidSbtOffset(); + printf( "(%4i,%4i,%4i) error: invalid sbt offset of %i for callable program\n", index.x, index.y, index.z, sbtOffset ); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_CALLABLE_NO_DC_SBT_RECORD ) + { + int sbtOffset = optixGetExceptionInvalidSbtOffset(); + printf( "(%4i,%4i,%4i) error: invalid sbt offset of %i for direct callable program\n", index.x, index.y, index.z, sbtOffset ); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_CALLABLE_NO_CC_SBT_RECORD ) + { + int sbtOffset = optixGetExceptionInvalidSbtOffset(); + printf( "(%4i,%4i,%4i) error: invalid sbt offset of %i for continuation callable program\n", index.x, index.y, index.z, sbtOffset ); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_UNSUPPORTED_SINGLE_LEVEL_GAS ) + { + OptixTraversableHandle handle = optixGetExceptionInvalidTraversable(); + printf("(%4i,%4i,%4i) error: unsupported single GAS traversable graph %p\n", index.x,index.y,index.z, (void*)handle); + dumpTlist = true; + } + else if( ( exceptionCode <= OPTIX_EXCEPTION_CODE_INVALID_VALUE_ARGUMENT_0 ) && ( exceptionCode >= OPTIX_EXCEPTION_CODE_INVALID_VALUE_ARGUMENT_2 ) ) + { + printf("(%4i,%4i,%4i) error: invalid value for argument %i\n", index.x,index.y,index.z, -(exceptionCode - OPTIX_EXCEPTION_CODE_INVALID_VALUE_ARGUMENT_0) ); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_UNSUPPORTED_DATA_ACCESS ) + { + printf("(%4i,%4i,%4i) error: unsupported random data access\n", index.x,index.y,index.z); + } + else if( exceptionCode == OPTIX_EXCEPTION_CODE_PAYLOAD_TYPE_MISMATCH ) + { + printf("(%4i,%4i,%4i) error: payload type mismatch between program and optixTrace call\n", index.x,index.y,index.z); + } + else if( exceptionCode >= 0 ) + { + dumpTlist = true; + printf( "(%4i,%4i,%4i) error: user exception with error code %i\n" + " call location: %s\n", index.x, index.y, index.z, exceptionCode, optixGetExceptionLineInfo() ); + } + else + { + printf("(%4i,%4i,%4i) error: unknown exception with error code %i\n", index.x,index.y,index.z, exceptionCode); + } + + if( dumpTlist ) + optixDumpTransformList(); + } + +} // namespace optix_impl + +#endif diff --git a/dependencies/optix/internal/optix_7_device_impl_transformations.h b/dependencies/optix/internal/optix_7_device_impl_transformations.h new file mode 100644 index 0000000000000000000000000000000000000000..c2cc69add507b79b41e47d6fc29b015cac60fb48 --- /dev/null +++ b/dependencies/optix/internal/optix_7_device_impl_transformations.h @@ -0,0 +1,424 @@ +/* +* Copyright (c) 2021 NVIDIA Corporation. All rights reserved. +* +* NVIDIA Corporation and its licensors retain all intellectual property and proprietary +* rights in and to this software, related documentation and any modifications thereto. +* Any use, reproduction, disclosure or distribution of this software and related +* documentation without an express license agreement from NVIDIA Corporation is strictly +* prohibited. +* +* TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* +* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, +* INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +* PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY +* SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT +* LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF +* BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR +* INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF +* SUCH DAMAGES +*/ + +/** +* @file optix_7_device_impl_transformations.h +* @author NVIDIA Corporation +* @brief OptiX public API +* +* OptiX public API Reference - Device side implementation for transformation helper functions. +*/ + +#if !defined( __OPTIX_INCLUDE_INTERNAL_HEADERS__ ) +#error("optix_7_device_impl_transformations.h is an internal header file and must not be used directly. Please use optix_device.h or optix.h instead.") +#endif + +#ifndef __optix_optix_7_device_impl_transformations_h__ +#define __optix_optix_7_device_impl_transformations_h__ + +namespace optix_impl { + +static __forceinline__ __device__ float4 optixAddFloat4( const float4& a, const float4& b ) +{ + return make_float4( a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w ); +} + +static __forceinline__ __device__ float4 optixMulFloat4( const float4& a, float b ) +{ + return make_float4( a.x * b, a.y * b, a.z * b, a.w * b ); +} + +static __forceinline__ __device__ uint4 optixLdg( unsigned long long addr ) +{ + const uint4* ptr; + asm volatile( "cvta.to.global.u64 %0, %1;" : "=l"( ptr ) : "l"( addr ) ); + uint4 ret; + asm volatile( "ld.global.v4.u32 {%0,%1,%2,%3}, [%4];" + : "=r"( ret.x ), "=r"( ret.y ), "=r"( ret.z ), "=r"( ret.w ) + : "l"( ptr ) ); + return ret; +} + +template <class T> +static __forceinline__ __device__ T optixLoadReadOnlyAlign16( const T* ptr ) +{ + T v; + for( int ofs = 0; ofs < sizeof( T ); ofs += 16 ) + *(uint4*)( (char*)&v + ofs ) = optixLdg( (unsigned long long)( (char*)ptr + ofs ) ); + return v; +} + +// Multiplies the row vector vec with the 3x4 matrix with rows m0, m1, and m2 +static __forceinline__ __device__ float4 optixMultiplyRowMatrix( const float4 vec, const float4 m0, const float4 m1, const float4 m2 ) +{ + float4 result; + + result.x = vec.x * m0.x + vec.y * m1.x + vec.z * m2.x; + result.y = vec.x * m0.y + vec.y * m1.y + vec.z * m2.y; + result.z = vec.x * m0.z + vec.y * m1.z + vec.z * m2.z; + result.w = vec.x * m0.w + vec.y * m1.w + vec.z * m2.w + vec.w; + + return result; +} + +// Converts the SRT transformation srt into a 3x4 matrix with rows m0, m1, and m2 +static __forceinline__ __device__ void optixGetMatrixFromSrt( float4& m0, float4& m1, float4& m2, const OptixSRTData& srt ) +{ + const float4 q = {srt.qx, srt.qy, srt.qz, srt.qw}; + + // normalize + const float inv_sql = 1.f / ( srt.qx * srt.qx + srt.qy * srt.qy + srt.qz * srt.qz + srt.qw * srt.qw ); + const float4 nq = optixMulFloat4( q, inv_sql ); + + const float sqw = q.w * nq.w; + const float sqx = q.x * nq.x; + const float sqy = q.y * nq.y; + const float sqz = q.z * nq.z; + + const float xy = q.x * nq.y; + const float zw = q.z * nq.w; + const float xz = q.x * nq.z; + const float yw = q.y * nq.w; + const float yz = q.y * nq.z; + const float xw = q.x * nq.w; + + m0.x = ( sqx - sqy - sqz + sqw ); + m0.y = 2.0f * ( xy - zw ); + m0.z = 2.0f * ( xz + yw ); + + m1.x = 2.0f * ( xy + zw ); + m1.y = ( -sqx + sqy - sqz + sqw ); + m1.z = 2.0f * ( yz - xw ); + + m2.x = 2.0f * ( xz - yw ); + m2.y = 2.0f * ( yz + xw ); + m2.z = ( -sqx - sqy + sqz + sqw ); + + m0.w = m0.x * srt.pvx + m0.y * srt.pvy + m0.z * srt.pvz + srt.tx; + m1.w = m1.x * srt.pvx + m1.y * srt.pvy + m1.z * srt.pvz + srt.ty; + m2.w = m2.x * srt.pvx + m2.y * srt.pvy + m2.z * srt.pvz + srt.tz; + + m0.z = m0.x * srt.b + m0.y * srt.c + m0.z * srt.sz; + m1.z = m1.x * srt.b + m1.y * srt.c + m1.z * srt.sz; + m2.z = m2.x * srt.b + m2.y * srt.c + m2.z * srt.sz; + + m0.y = m0.x * srt.a + m0.y * srt.sy; + m1.y = m1.x * srt.a + m1.y * srt.sy; + m2.y = m2.x * srt.a + m2.y * srt.sy; + + m0.x = m0.x * srt.sx; + m1.x = m1.x * srt.sx; + m2.x = m2.x * srt.sx; +} + +// Inverts a 3x4 matrix in place +static __forceinline__ __device__ void optixInvertMatrix( float4& m0, float4& m1, float4& m2 ) +{ + const float det3 = + m0.x * ( m1.y * m2.z - m1.z * m2.y ) - m0.y * ( m1.x * m2.z - m1.z * m2.x ) + m0.z * ( m1.x * m2.y - m1.y * m2.x ); + + const float inv_det3 = 1.0f / det3; + + float inv3[3][3]; + inv3[0][0] = inv_det3 * ( m1.y * m2.z - m2.y * m1.z ); + inv3[0][1] = inv_det3 * ( m0.z * m2.y - m2.z * m0.y ); + inv3[0][2] = inv_det3 * ( m0.y * m1.z - m1.y * m0.z ); + + inv3[1][0] = inv_det3 * ( m1.z * m2.x - m2.z * m1.x ); + inv3[1][1] = inv_det3 * ( m0.x * m2.z - m2.x * m0.z ); + inv3[1][2] = inv_det3 * ( m0.z * m1.x - m1.z * m0.x ); + + inv3[2][0] = inv_det3 * ( m1.x * m2.y - m2.x * m1.y ); + inv3[2][1] = inv_det3 * ( m0.y * m2.x - m2.y * m0.x ); + inv3[2][2] = inv_det3 * ( m0.x * m1.y - m1.x * m0.y ); + + const float b[3] = {m0.w, m1.w, m2.w}; + + m0.x = inv3[0][0]; + m0.y = inv3[0][1]; + m0.z = inv3[0][2]; + m0.w = -inv3[0][0] * b[0] - inv3[0][1] * b[1] - inv3[0][2] * b[2]; + + m1.x = inv3[1][0]; + m1.y = inv3[1][1]; + m1.z = inv3[1][2]; + m1.w = -inv3[1][0] * b[0] - inv3[1][1] * b[1] - inv3[1][2] * b[2]; + + m2.x = inv3[2][0]; + m2.y = inv3[2][1]; + m2.z = inv3[2][2]; + m2.w = -inv3[2][0] * b[0] - inv3[2][1] * b[1] - inv3[2][2] * b[2]; +} + +static __forceinline__ __device__ void optixLoadInterpolatedMatrixKey( float4& m0, float4& m1, float4& m2, const float4* matrix, const float t1 ) +{ + m0 = optixLoadReadOnlyAlign16( &matrix[0] ); + m1 = optixLoadReadOnlyAlign16( &matrix[1] ); + m2 = optixLoadReadOnlyAlign16( &matrix[2] ); + + // The conditional prevents concurrent loads leading to spills + if( t1 > 0.0f ) + { + const float t0 = 1.0f - t1; + m0 = optixAddFloat4( optixMulFloat4( m0, t0 ), optixMulFloat4( optixLoadReadOnlyAlign16( &matrix[3] ), t1 ) ); + m1 = optixAddFloat4( optixMulFloat4( m1, t0 ), optixMulFloat4( optixLoadReadOnlyAlign16( &matrix[4] ), t1 ) ); + m2 = optixAddFloat4( optixMulFloat4( m2, t0 ), optixMulFloat4( optixLoadReadOnlyAlign16( &matrix[5] ), t1 ) ); + } +} + +static __forceinline__ __device__ void optixLoadInterpolatedSrtKey( float4& srt0, + float4& srt1, + float4& srt2, + float4& srt3, + const float4* srt, + const float t1 ) +{ + srt0 = optixLoadReadOnlyAlign16( &srt[0] ); + srt1 = optixLoadReadOnlyAlign16( &srt[1] ); + srt2 = optixLoadReadOnlyAlign16( &srt[2] ); + srt3 = optixLoadReadOnlyAlign16( &srt[3] ); + + // The conditional prevents concurrent loads leading to spills + if( t1 > 0.0f ) + { + const float t0 = 1.0f - t1; + srt0 = optixAddFloat4( optixMulFloat4( srt0, t0 ), optixMulFloat4( optixLoadReadOnlyAlign16( &srt[4] ), t1 ) ); + srt1 = optixAddFloat4( optixMulFloat4( srt1, t0 ), optixMulFloat4( optixLoadReadOnlyAlign16( &srt[5] ), t1 ) ); + srt2 = optixAddFloat4( optixMulFloat4( srt2, t0 ), optixMulFloat4( optixLoadReadOnlyAlign16( &srt[6] ), t1 ) ); + srt3 = optixAddFloat4( optixMulFloat4( srt3, t0 ), optixMulFloat4( optixLoadReadOnlyAlign16( &srt[7] ), t1 ) ); + + float inv_length = 1.f / sqrt( srt2.y * srt2.y + srt2.z * srt2.z + srt2.w * srt2.w + srt3.x * srt3.x ); + srt2.y *= inv_length; + srt2.z *= inv_length; + srt2.w *= inv_length; + srt3.x *= inv_length; + } +} + +static __forceinline__ __device__ void optixResolveMotionKey( float& localt, int& key, const OptixMotionOptions& options, const float globalt ) +{ + const float timeBegin = options.timeBegin; + const float timeEnd = options.timeEnd; + const float numIntervals = (float)( options.numKeys - 1 ); + + // No need to check the motion flags. If data originates from a valid transform list handle, then globalt is in + // range, or vanish flags are not set. + + const float time = max( 0.f, min( numIntervals, ( globalt - timeBegin ) * numIntervals / ( timeEnd - timeBegin ) ) ); + const float fltKey = floorf( time ); + + localt = time - fltKey; + key = (int)fltKey; +} + +// Returns the interpolated transformation matrix for a particular matrix motion transformation and point in time. +static __forceinline__ __device__ void optixGetInterpolatedTransformation( float4& trf0, + float4& trf1, + float4& trf2, + const OptixMatrixMotionTransform* transformData, + const float time ) +{ + // Compute key and intra key time + float keyTime; + int key; + optixResolveMotionKey( keyTime, key, optixLoadReadOnlyAlign16( transformData ).motionOptions, time ); + + // Get pointer to left key + const float4* transform = (const float4*)( &transformData->transform[key][0] ); + + // Load and interpolate matrix keys + optixLoadInterpolatedMatrixKey( trf0, trf1, trf2, transform, keyTime ); +} + +// Returns the interpolated transformation matrix for a particular SRT motion transformation and point in time. +static __forceinline__ __device__ void optixGetInterpolatedTransformation( float4& trf0, + float4& trf1, + float4& trf2, + const OptixSRTMotionTransform* transformData, + const float time ) +{ + // Compute key and intra key time + float keyTime; + int key; + optixResolveMotionKey( keyTime, key, optixLoadReadOnlyAlign16( transformData ).motionOptions, time ); + + // Get pointer to left key + const float4* dataPtr = reinterpret_cast<const float4*>( &transformData->srtData[key] ); + + // Load and interpolated SRT keys + float4 data[4]; + optixLoadInterpolatedSrtKey( data[0], data[1], data[2], data[3], dataPtr, keyTime ); + + OptixSRTData srt = {data[0].x, data[0].y, data[0].z, data[0].w, data[1].x, data[1].y, data[1].z, data[1].w, + data[2].x, data[2].y, data[2].z, data[2].w, data[3].x, data[3].y, data[3].z, data[3].w}; + + // Convert SRT into a matrix + optixGetMatrixFromSrt( trf0, trf1, trf2, srt ); +} + +// Returns the interpolated transformation matrix for a particular traversable handle and point in time. +static __forceinline__ __device__ void optixGetInterpolatedTransformationFromHandle( float4& trf0, + float4& trf1, + float4& trf2, + const OptixTraversableHandle handle, + const float time, + const bool objectToWorld ) +{ + const OptixTransformType type = optixGetTransformTypeFromHandle( handle ); + + if( type == OPTIX_TRANSFORM_TYPE_MATRIX_MOTION_TRANSFORM || type == OPTIX_TRANSFORM_TYPE_SRT_MOTION_TRANSFORM ) + { + if( type == OPTIX_TRANSFORM_TYPE_MATRIX_MOTION_TRANSFORM ) + { + const OptixMatrixMotionTransform* transformData = optixGetMatrixMotionTransformFromHandle( handle ); + optixGetInterpolatedTransformation( trf0, trf1, trf2, transformData, time ); + } + else + { + const OptixSRTMotionTransform* transformData = optixGetSRTMotionTransformFromHandle( handle ); + optixGetInterpolatedTransformation( trf0, trf1, trf2, transformData, time ); + } + + if( !objectToWorld ) + optixInvertMatrix( trf0, trf1, trf2 ); + } + else if( type == OPTIX_TRANSFORM_TYPE_INSTANCE || type == OPTIX_TRANSFORM_TYPE_STATIC_TRANSFORM ) + { + const float4* transform; + + if( type == OPTIX_TRANSFORM_TYPE_INSTANCE ) + { + transform = ( objectToWorld ) ? optixGetInstanceTransformFromHandle( handle ) : + optixGetInstanceInverseTransformFromHandle( handle ); + } + else + { + const OptixStaticTransform* traversable = optixGetStaticTransformFromHandle( handle ); + transform = (const float4*)( ( objectToWorld ) ? traversable->transform : traversable->invTransform ); + } + + trf0 = optixLoadReadOnlyAlign16( &transform[0] ); + trf1 = optixLoadReadOnlyAlign16( &transform[1] ); + trf2 = optixLoadReadOnlyAlign16( &transform[2] ); + } + else + { + trf0 = {1.0f, 0.0f, 0.0f, 0.0f}; + trf1 = {0.0f, 1.0f, 0.0f, 0.0f}; + trf2 = {0.0f, 0.0f, 1.0f, 0.0f}; + } +} + +// Returns the world-to-object transformation matrix resulting from the current transform stack and current ray time. +static __forceinline__ __device__ void optixGetWorldToObjectTransformMatrix( float4& m0, float4& m1, float4& m2 ) +{ + const unsigned int size = optixGetTransformListSize(); + const float time = optixGetRayTime(); + +#pragma unroll 1 + for( unsigned int i = 0; i < size; ++i ) + { + OptixTraversableHandle handle = optixGetTransformListHandle( i ); + + float4 trf0, trf1, trf2; + optixGetInterpolatedTransformationFromHandle( trf0, trf1, trf2, handle, time, /*objectToWorld*/ false ); + + if( i == 0 ) + { + m0 = trf0; + m1 = trf1; + m2 = trf2; + } + else + { + // m := trf * m + float4 tmp0 = m0, tmp1 = m1, tmp2 = m2; + m0 = optixMultiplyRowMatrix( trf0, tmp0, tmp1, tmp2 ); + m1 = optixMultiplyRowMatrix( trf1, tmp0, tmp1, tmp2 ); + m2 = optixMultiplyRowMatrix( trf2, tmp0, tmp1, tmp2 ); + } + } +} + +// Returns the object-to-world transformation matrix resulting from the current transform stack and current ray time. +static __forceinline__ __device__ void optixGetObjectToWorldTransformMatrix( float4& m0, float4& m1, float4& m2 ) +{ + const int size = optixGetTransformListSize(); + const float time = optixGetRayTime(); + +#pragma unroll 1 + for( int i = size - 1; i >= 0; --i ) + { + OptixTraversableHandle handle = optixGetTransformListHandle( i ); + + float4 trf0, trf1, trf2; + optixGetInterpolatedTransformationFromHandle( trf0, trf1, trf2, handle, time, /*objectToWorld*/ true ); + + if( i == size - 1 ) + { + m0 = trf0; + m1 = trf1; + m2 = trf2; + } + else + { + // m := trf * m + float4 tmp0 = m0, tmp1 = m1, tmp2 = m2; + m0 = optixMultiplyRowMatrix( trf0, tmp0, tmp1, tmp2 ); + m1 = optixMultiplyRowMatrix( trf1, tmp0, tmp1, tmp2 ); + m2 = optixMultiplyRowMatrix( trf2, tmp0, tmp1, tmp2 ); + } + } +} + +// Multiplies the 3x4 matrix with rows m0, m1, m2 with the point p. +static __forceinline__ __device__ float3 optixTransformPoint( const float4& m0, const float4& m1, const float4& m2, const float3& p ) +{ + float3 result; + result.x = m0.x * p.x + m0.y * p.y + m0.z * p.z + m0.w; + result.y = m1.x * p.x + m1.y * p.y + m1.z * p.z + m1.w; + result.z = m2.x * p.x + m2.y * p.y + m2.z * p.z + m2.w; + return result; +} + +// Multiplies the 3x3 linear submatrix of the 3x4 matrix with rows m0, m1, m2 with the vector v. +static __forceinline__ __device__ float3 optixTransformVector( const float4& m0, const float4& m1, const float4& m2, const float3& v ) +{ + float3 result; + result.x = m0.x * v.x + m0.y * v.y + m0.z * v.z; + result.y = m1.x * v.x + m1.y * v.y + m1.z * v.z; + result.z = m2.x * v.x + m2.y * v.y + m2.z * v.z; + return result; +} + +// Multiplies the transpose of the 3x3 linear submatrix of the 3x4 matrix with rows m0, m1, m2 with the normal n. +// Note that the given matrix is supposed to be the inverse of the actual transformation matrix. +static __forceinline__ __device__ float3 optixTransformNormal( const float4& m0, const float4& m1, const float4& m2, const float3& n ) +{ + float3 result; + result.x = m0.x * n.x + m1.x * n.y + m2.x * n.z; + result.y = m0.y * n.x + m1.y * n.y + m2.y * n.z; + result.z = m0.z * n.x + m1.z * n.y + m2.z * n.z; + return result; +} + +} // namespace optix_impl + +#endif diff --git a/dependencies/optix/internal/optix_micromap_impl.h b/dependencies/optix/internal/optix_micromap_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..9de2ad0ee5f8cb124b48265aee2bc8703db5cad9 --- /dev/null +++ b/dependencies/optix/internal/optix_micromap_impl.h @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** +* @file optix_micromap_impl.h +* @author NVIDIA Corporation +* @brief OptiX micromap helper functions +*/ + +#ifndef __optix_optix_micromap_impl_h__ +#define __optix_optix_micromap_impl_h__ + +#include <cstdint> + +#if __CUDACC__ +#include <cuda_runtime.h> +#endif + +#ifndef OPTIX_MICROMAP_FUNC +#if __CUDACC__ +#define OPTIX_MICROMAP_FUNC __host__ __device__ +#else +#define OPTIX_MICROMAP_FUNC +#endif +#endif + +namespace optix_impl { + +/** \addtogroup optix_utilities +@{ +*/ + +#define OPTIX_MICROMAP_INLINE_FUNC OPTIX_MICROMAP_FUNC inline + +#if __CUDACC__ +// the device implementation of __uint_as_float is declared in cuda_runtime.h +#else +OPTIX_MICROMAP_INLINE_FUNC float __uint_as_float( uint32_t x ) +{ + union { float f; uint32_t i; } var; + var.i = x; + return var.f; +} +#endif + + +// Deinterleave bits from x into even and odd halves +OPTIX_MICROMAP_INLINE_FUNC uint32_t deinterleaveBits( uint32_t x ) +{ + x = ( ( ( ( x >> 1 ) & 0x22222222u ) | ( ( x << 1 ) & ~0x22222222u ) ) & 0x66666666u ) | ( x & ~0x66666666u ); + x = ( ( ( ( x >> 2 ) & 0x0c0c0c0cu ) | ( ( x << 2 ) & ~0x0c0c0c0cu ) ) & 0x3c3c3c3cu ) | ( x & ~0x3c3c3c3cu ); + x = ( ( ( ( x >> 4 ) & 0x00f000f0u ) | ( ( x << 4 ) & ~0x00f000f0u ) ) & 0x0ff00ff0u ) | ( x & ~0x0ff00ff0u ); + x = ( ( ( ( x >> 8 ) & 0x0000ff00u ) | ( ( x << 8 ) & ~0x0000ff00u ) ) & 0x00ffff00u ) | ( x & ~0x00ffff00u ); + return x; +} + +// Extract even bits +OPTIX_MICROMAP_INLINE_FUNC uint32_t extractEvenBits( uint32_t x ) +{ + x &= 0x55555555; + x = ( x | ( x >> 1 ) ) & 0x33333333; + x = ( x | ( x >> 2 ) ) & 0x0f0f0f0f; + x = ( x | ( x >> 4 ) ) & 0x00ff00ff; + x = ( x | ( x >> 8 ) ) & 0x0000ffff; + return x; +} + + +// Calculate exclusive prefix or (log(n) XOR's and SHF's) +OPTIX_MICROMAP_INLINE_FUNC uint32_t prefixEor( uint32_t x ) +{ + x ^= x >> 1; + x ^= x >> 2; + x ^= x >> 4; + x ^= x >> 8; + return x; +} + + +// Convert distance along the curve to discrete barycentrics +OPTIX_MICROMAP_INLINE_FUNC void index2dbary( uint32_t index, uint32_t& u, uint32_t& v, uint32_t& w ) +{ + uint32_t b0 = extractEvenBits( index ); + uint32_t b1 = extractEvenBits( index >> 1 ); + + uint32_t fx = prefixEor( b0 ); + uint32_t fy = prefixEor( b0 & ~b1 ); + + uint32_t t = fy ^ b1; + + u = ( fx & ~t ) | ( b0 & ~t ) | ( ~b0 & ~fx & t ); + v = fy ^ b0; + w = ( ~fx & ~t ) | ( b0 & ~t ) | ( ~b0 & fx & t ); +} + + +// Compute barycentrics for micro triangle +OPTIX_MICROMAP_INLINE_FUNC void micro2bary( uint32_t index, uint32_t subdivisionLevel, float2& uv0, float2& uv1, float2& uv2 ) +{ + if( subdivisionLevel == 0 ) + { + uv0 = { 0, 0 }; + uv1 = { 1, 0 }; + uv2 = { 0, 1 }; + return; + } + + uint32_t iu, iv, iw; + index2dbary( index, iu, iv, iw ); + + // we need to only look at "level" bits + iu = iu & ( ( 1 << subdivisionLevel ) - 1 ); + iv = iv & ( ( 1 << subdivisionLevel ) - 1 ); + iw = iw & ( ( 1 << subdivisionLevel ) - 1 ); + + bool upright = ( iu & 1 ) ^ ( iv & 1 ) ^ ( iw & 1 ); + if( !upright ) + { + iu = iu + 1; + iv = iv + 1; + } + + const float levelScale = __uint_as_float( ( 127u - subdivisionLevel ) << 23 ); + + // scale the barycentic coordinate to the global space/scale + float du = 1.f * levelScale; + float dv = 1.f * levelScale; + + // scale the barycentic coordinate to the global space/scale + float u = (float)iu * levelScale; + float v = (float)iv * levelScale; + + if( !upright ) + { + du = -du; + dv = -dv; + } + + uv0 = { u, v }; + uv1 = { u + du, v }; + uv2 = { u, v + dv }; +} + + +/*@}*/ // end group optix_utilities + +} // namespace optix_impl + +#endif // __optix_optix_micromap_impl_h__ diff --git a/dependencies/optix/optix.h b/dependencies/optix/optix.h new file mode 100644 index 0000000000000000000000000000000000000000..3690782d9d53ebcc7b5b74877313d8b8c81f8cc6 --- /dev/null +++ b/dependencies/optix/optix.h @@ -0,0 +1,47 @@ + +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and proprietary + * rights in and to this software, related documentation and any modifications thereto. + * Any use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation is strictly + * prohibited. + * + * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* + * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, + * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY + * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT + * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF + * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR + * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGES + */ + +/// @file +/// @author NVIDIA Corporation +/// @brief OptiX public API header +/// +/// Includes the host api if compiling host code, includes the cuda api if compiling device code. +/// For the math library routines include optix_math.h + +#ifndef __optix_optix_h__ +#define __optix_optix_h__ + +/// The OptiX version. +/// +/// - major = OPTIX_VERSION/10000 +/// - minor = (OPTIX_VERSION%10000)/100 +/// - micro = OPTIX_VERSION%100 +#define OPTIX_VERSION 70600 + + +#ifdef __CUDACC__ +#include "optix_device.h" +#else +#include "optix_host.h" +#endif + + +#endif // __optix_optix_h__ diff --git a/dependencies/optix/optix_7_device.h b/dependencies/optix/optix_7_device.h new file mode 100644 index 0000000000000000000000000000000000000000..9c6bacd63b9e4a92b7ab7853b5b1e1d07a10419e --- /dev/null +++ b/dependencies/optix/optix_7_device.h @@ -0,0 +1,1009 @@ +/* +* Copyright (c) 2021 NVIDIA Corporation. All rights reserved. +* +* NVIDIA Corporation and its licensors retain all intellectual property and proprietary +* rights in and to this software, related documentation and any modifications thereto. +* Any use, reproduction, disclosure or distribution of this software and related +* documentation without an express license agreement from NVIDIA Corporation is strictly +* prohibited. +* +* TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* +* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, +* INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +* PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY +* SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT +* LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF +* BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR +* INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF +* SUCH DAMAGES +*/ + +/// @file +/// @author NVIDIA Corporation +/// @brief OptiX public API header +/// +/// OptiX public API Reference - Device API declarations + +#if !defined( __OPTIX_INCLUDE_INTERNAL_HEADERS__ ) +#error("optix_7_device.h is an internal header file and must not be used directly. Please use optix_device.h or optix.h instead.") +#endif + + +#ifndef __optix_optix_7_device_h__ +#define __optix_optix_7_device_h__ + +#if defined( __cplusplus ) && ( __cplusplus < 201103L ) && !defined( _WIN32 ) +#error Device code for OptiX requires at least C++11. Consider adding "--std c++11" to the nvcc command-line. +#endif + +#include "optix_7_types.h" + +/// \defgroup optix_device_api Device API +/// \brief OptiX Device API + +/** \addtogroup optix_device_api +@{ +*/ + +/// Initiates a ray tracing query starting with the given traversable. +/// +/// \param[in] handle +/// \param[in] rayOrigin +/// \param[in] rayDirection +/// \param[in] tmin +/// \param[in] tmax +/// \param[in] rayTime +/// \param[in] visibilityMask really only 8 bits +/// \param[in] rayFlags really only 8 bits, combination of OptixRayFlags +/// \param[in] SBToffset really only 4 bits +/// \param[in] SBTstride really only 4 bits +/// \param[in] missSBTIndex specifies the miss program invoked on a miss +/// \param[in,out] payload up to 32 unsigned int values that hold the payload +template <typename... Payload> +static __forceinline__ __device__ void optixTrace( OptixTraversableHandle handle, + float3 rayOrigin, + float3 rayDirection, + float tmin, + float tmax, + float rayTime, + OptixVisibilityMask visibilityMask, + unsigned int rayFlags, + unsigned int SBToffset, + unsigned int SBTstride, + unsigned int missSBTIndex, + Payload&... payload ); + +/// Initiates a ray tracing query starting with the given traversable. +/// +/// \param[in] type +/// \param[in] handle +/// \param[in] rayOrigin +/// \param[in] rayDirection +/// \param[in] tmin +/// \param[in] tmax +/// \param[in] rayTime +/// \param[in] visibilityMask really only 8 bits +/// \param[in] rayFlags really only 8 bits, combination of OptixRayFlags +/// \param[in] SBToffset really only 4 bits +/// \param[in] SBTstride really only 4 bits +/// \param[in] missSBTIndex specifies the miss program invoked on a miss +/// \param[in,out] payload up to 32 unsigned int values that hold the payload +template <typename... Payload> +static __forceinline__ __device__ void optixTrace( OptixPayloadTypeID type, + OptixTraversableHandle handle, + float3 rayOrigin, + float3 rayDirection, + float tmin, + float tmax, + float rayTime, + OptixVisibilityMask visibilityMask, + unsigned int rayFlags, + unsigned int SBToffset, + unsigned int SBTstride, + unsigned int missSBTIndex, + Payload&... payload ); + +/// Writes the 32-bit payload value at slot 0. +static __forceinline__ __device__ void optixSetPayload_0( unsigned int p ); +/// Writes the 32-bit payload value at slot 1. +static __forceinline__ __device__ void optixSetPayload_1( unsigned int p ); +/// Writes the 32-bit payload value at slot 2. +static __forceinline__ __device__ void optixSetPayload_2( unsigned int p ); +/// Writes the 32-bit payload value at slot 3. +static __forceinline__ __device__ void optixSetPayload_3( unsigned int p ); +/// Writes the 32-bit payload value at slot 4. +static __forceinline__ __device__ void optixSetPayload_4( unsigned int p ); +/// Writes the 32-bit payload value at slot 5. +static __forceinline__ __device__ void optixSetPayload_5( unsigned int p ); +/// Writes the 32-bit payload value at slot 6. +static __forceinline__ __device__ void optixSetPayload_6( unsigned int p ); +/// Writes the 32-bit payload value at slot 7. +static __forceinline__ __device__ void optixSetPayload_7( unsigned int p ); + +/// Writes the 32-bit payload value at slot 8. +static __forceinline__ __device__ void optixSetPayload_8( unsigned int p ); +/// Writes the 32-bit payload value at slot 9. +static __forceinline__ __device__ void optixSetPayload_9( unsigned int p ); +/// Writes the 32-bit payload value at slot 10. +static __forceinline__ __device__ void optixSetPayload_10( unsigned int p ); +/// Writes the 32-bit payload value at slot 11. +static __forceinline__ __device__ void optixSetPayload_11( unsigned int p ); +/// Writes the 32-bit payload value at slot 12. +static __forceinline__ __device__ void optixSetPayload_12( unsigned int p ); +/// Writes the 32-bit payload value at slot 13. +static __forceinline__ __device__ void optixSetPayload_13( unsigned int p ); +/// Writes the 32-bit payload value at slot 14. +static __forceinline__ __device__ void optixSetPayload_14( unsigned int p ); +/// Writes the 32-bit payload value at slot 15. +static __forceinline__ __device__ void optixSetPayload_15( unsigned int p ); +/// Writes the 32-bit payload value at slot 16. +static __forceinline__ __device__ void optixSetPayload_16( unsigned int p ); +/// Writes the 32-bit payload value at slot 17. +static __forceinline__ __device__ void optixSetPayload_17( unsigned int p ); +/// Writes the 32-bit payload value at slot 18. +static __forceinline__ __device__ void optixSetPayload_18( unsigned int p ); +/// Writes the 32-bit payload value at slot 19. +static __forceinline__ __device__ void optixSetPayload_19( unsigned int p ); +/// Writes the 32-bit payload value at slot 20. +static __forceinline__ __device__ void optixSetPayload_20( unsigned int p ); +/// Writes the 32-bit payload value at slot 21. +static __forceinline__ __device__ void optixSetPayload_21( unsigned int p ); +/// Writes the 32-bit payload value at slot 22. +static __forceinline__ __device__ void optixSetPayload_22( unsigned int p ); +/// Writes the 32-bit payload value at slot 23. +static __forceinline__ __device__ void optixSetPayload_23( unsigned int p ); +/// Writes the 32-bit payload value at slot 24. +static __forceinline__ __device__ void optixSetPayload_24( unsigned int p ); +/// Writes the 32-bit payload value at slot 25. +static __forceinline__ __device__ void optixSetPayload_25( unsigned int p ); +/// Writes the 32-bit payload value at slot 26. +static __forceinline__ __device__ void optixSetPayload_26( unsigned int p ); +/// Writes the 32-bit payload value at slot 27. +static __forceinline__ __device__ void optixSetPayload_27( unsigned int p ); +/// Writes the 32-bit payload value at slot 28. +static __forceinline__ __device__ void optixSetPayload_28( unsigned int p ); +/// Writes the 32-bit payload value at slot 29. +static __forceinline__ __device__ void optixSetPayload_29( unsigned int p ); +/// Writes the 32-bit payload value at slot 30. +static __forceinline__ __device__ void optixSetPayload_30( unsigned int p ); +/// Writes the 32-bit payload value at slot 31. +static __forceinline__ __device__ void optixSetPayload_31( unsigned int p ); + +/// Reads the 32-bit payload value at slot 0. +static __forceinline__ __device__ unsigned int optixGetPayload_0(); +/// Reads the 32-bit payload value at slot 1. +static __forceinline__ __device__ unsigned int optixGetPayload_1(); +/// Reads the 32-bit payload value at slot 2. +static __forceinline__ __device__ unsigned int optixGetPayload_2(); +/// Reads the 32-bit payload value at slot 3. +static __forceinline__ __device__ unsigned int optixGetPayload_3(); +/// Reads the 32-bit payload value at slot 4. +static __forceinline__ __device__ unsigned int optixGetPayload_4(); +/// Reads the 32-bit payload value at slot 5. +static __forceinline__ __device__ unsigned int optixGetPayload_5(); +/// Reads the 32-bit payload value at slot 6. +static __forceinline__ __device__ unsigned int optixGetPayload_6(); +/// Reads the 32-bit payload value at slot 7. +static __forceinline__ __device__ unsigned int optixGetPayload_7(); + +/// Reads the 32-bit payload value at slot 8. +static __forceinline__ __device__ unsigned int optixGetPayload_8(); +/// Reads the 32-bit payload value at slot 9. +static __forceinline__ __device__ unsigned int optixGetPayload_9(); +/// Reads the 32-bit payload value at slot 10. +static __forceinline__ __device__ unsigned int optixGetPayload_10(); +/// Reads the 32-bit payload value at slot 11. +static __forceinline__ __device__ unsigned int optixGetPayload_11(); +/// Reads the 32-bit payload value at slot 12. +static __forceinline__ __device__ unsigned int optixGetPayload_12(); +/// Reads the 32-bit payload value at slot 13. +static __forceinline__ __device__ unsigned int optixGetPayload_13(); +/// Reads the 32-bit payload value at slot 14. +static __forceinline__ __device__ unsigned int optixGetPayload_14(); +/// Reads the 32-bit payload value at slot 15. +static __forceinline__ __device__ unsigned int optixGetPayload_15(); +/// Reads the 32-bit payload value at slot 16. +static __forceinline__ __device__ unsigned int optixGetPayload_16(); +/// Reads the 32-bit payload value at slot 17. +static __forceinline__ __device__ unsigned int optixGetPayload_17(); +/// Reads the 32-bit payload value at slot 18. +static __forceinline__ __device__ unsigned int optixGetPayload_18(); +/// Reads the 32-bit payload value at slot 19. +static __forceinline__ __device__ unsigned int optixGetPayload_19(); +/// Reads the 32-bit payload value at slot 20. +static __forceinline__ __device__ unsigned int optixGetPayload_20(); +/// Reads the 32-bit payload value at slot 21. +static __forceinline__ __device__ unsigned int optixGetPayload_21(); +/// Reads the 32-bit payload value at slot 22. +static __forceinline__ __device__ unsigned int optixGetPayload_22(); +/// Reads the 32-bit payload value at slot 23. +static __forceinline__ __device__ unsigned int optixGetPayload_23(); +/// Reads the 32-bit payload value at slot 24. +static __forceinline__ __device__ unsigned int optixGetPayload_24(); +/// Reads the 32-bit payload value at slot 25. +static __forceinline__ __device__ unsigned int optixGetPayload_25(); +/// Reads the 32-bit payload value at slot 26. +static __forceinline__ __device__ unsigned int optixGetPayload_26(); +/// Reads the 32-bit payload value at slot 27. +static __forceinline__ __device__ unsigned int optixGetPayload_27(); +/// Reads the 32-bit payload value at slot 28. +static __forceinline__ __device__ unsigned int optixGetPayload_28(); +/// Reads the 32-bit payload value at slot 29. +static __forceinline__ __device__ unsigned int optixGetPayload_29(); +/// Reads the 32-bit payload value at slot 30. +static __forceinline__ __device__ unsigned int optixGetPayload_30(); +/// Reads the 32-bit payload value at slot 31. +static __forceinline__ __device__ unsigned int optixGetPayload_31(); + +/// Specify the supported payload types for a program. +/// +/// The supported types are specified as a bitwise combination of payload types. (See OptixPayloadTypeID) +/// May only be called once per program. +/// Must be called at the top of the program. +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ void optixSetPayloadTypes( unsigned int typeMask ); + +/// Returns an undefined value. +static __forceinline__ __device__ unsigned int optixUndefinedValue(); + +/// Returns the rayOrigin passed into optixTrace. +/// +/// May be more expensive to call in IS and AH than their object space counterparts, +/// so effort should be made to use the object space ray in those programs. +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ float3 optixGetWorldRayOrigin(); + +/// Returns the rayDirection passed into optixTrace. +/// +/// May be more expensive to call in IS and AH than their object space counterparts, +/// so effort should be made to use the object space ray in those programs. +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ float3 optixGetWorldRayDirection(); + +/// Returns the current object space ray origin based on the current transform stack. +/// +/// Only available in IS and AH. +static __forceinline__ __device__ float3 optixGetObjectRayOrigin(); + +/// Returns the current object space ray direction based on the current transform stack. +/// +/// Only available in IS and AH. +static __forceinline__ __device__ float3 optixGetObjectRayDirection(); + +/// Returns the tmin passed into optixTrace. +/// +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ float optixGetRayTmin(); + +/// In IS and CH returns the current smallest reported hitT or the tmax passed into optixTrace if no hit has been reported +/// In AH returns the hitT value as passed in to optixReportIntersection +/// In MS returns the tmax passed into optixTrace +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ float optixGetRayTmax(); + +/// Returns the rayTime passed into optixTrace. +/// +/// Will return 0 if motion is disabled. +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ float optixGetRayTime(); + +/// Returns the rayFlags passed into optixTrace +/// +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ unsigned int optixGetRayFlags(); + +/// Returns the visibilityMask passed into optixTrace +/// +/// Only available in IS, AH, CH, MS +static __forceinline__ __device__ unsigned int optixGetRayVisibilityMask(); + +/// Return the traversable handle of a given instance in an Instance +/// Acceleration Structure (IAS) +static __forceinline__ __device__ OptixTraversableHandle optixGetInstanceTraversableFromIAS( OptixTraversableHandle ias, unsigned int instIdx ); + +/// Return the object space triangle vertex positions of a given triangle in a Geometry +/// Acceleration Structure (GAS) at a given motion time. +/// To access vertex data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. +/// +/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the +/// time parameter is ignored. +static __forceinline__ __device__ void optixGetTriangleVertexData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float3 data[3]); + + +/// Return the object space curve control vertex data of a linear curve in a Geometry +/// Acceleration Structure (GAS) at a given motion time. +/// To access vertex data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. +/// +/// data[i] = {x,y,z,w} with {x,y,z} the position and w the radius of control vertex i. +/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the +/// time parameter is ignored. +static __forceinline__ __device__ void optixGetLinearCurveVertexData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float4 data[2] ); + +/// Return the object space curve control vertex data of a quadratic BSpline curve in a Geometry +/// Acceleration Structure (GAS) at a given motion time. +/// To access vertex data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. +/// +/// data[i] = {x,y,z,w} with {x,y,z} the position and w the radius of control vertex i. +/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the +/// time parameter is ignored. +static __forceinline__ __device__ void optixGetQuadraticBSplineVertexData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float4 data[3] ); + +/// Return the object space curve control vertex data of a cubic BSpline curve in a Geometry +/// Acceleration Structure (GAS) at a given motion time. +/// To access vertex data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. +/// +/// data[i] = {x,y,z,w} with {x,y,z} the position and w the radius of control vertex i. +/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the +/// time parameter is ignored. +static __forceinline__ __device__ void optixGetCubicBSplineVertexData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float4 data[4] ); + +/// Return the object space curve control vertex data of a CatmullRom spline curve in a Geometry +/// Acceleration Structure (GAS) at a given motion time. +/// To access vertex data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. +/// +/// data[i] = {x,y,z,w} with {x,y,z} the position and w the radius of control vertex i. +/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the +/// time parameter is ignored. +static __forceinline__ __device__ void optixGetCatmullRomVertexData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float4 data[4] ); + +/// Return the object space sphere data, center point and radius, in a Geometry Acceleration Structure (GAS) at a given motion time. +/// To access sphere data, the GAS must be built using the flag OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS. +/// +/// data[0] = {x,y,z,w} with {x,y,z} the position of the sphere center and w the radius. +/// If motion is disabled via OptixPipelineCompileOptions::usesMotionBlur, or the GAS does not contain motion, the +/// time parameter is ignored. +static __forceinline__ __device__ void optixGetSphereData( OptixTraversableHandle gas, unsigned int primIdx, unsigned int sbtGASIndex, float time, float4 data[1] ); + +/// Returns the traversable handle for the Geometry Acceleration Structure (GAS) containing +/// the current hit. May be called from IS, AH and CH. +static __forceinline__ __device__ OptixTraversableHandle optixGetGASTraversableHandle(); + +/// Returns the motion begin time of a GAS (see OptixMotionOptions) +static __forceinline__ __device__ float optixGetGASMotionTimeBegin( OptixTraversableHandle gas ); + +/// Returns the motion end time of a GAS (see OptixMotionOptions) +static __forceinline__ __device__ float optixGetGASMotionTimeEnd( OptixTraversableHandle gas ); + +/// Returns the number of motion steps of a GAS (see OptixMotionOptions) +static __forceinline__ __device__ unsigned int optixGetGASMotionStepCount( OptixTraversableHandle gas ); + +/// Returns the world-to-object transformation matrix resulting from the current active transformation list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ void optixGetWorldToObjectTransformMatrix( float m[12] ); + +/// Returns the object-to-world transformation matrix resulting from the current active transformation list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ void optixGetObjectToWorldTransformMatrix( float m[12] ); + +/// Transforms the point using world-to-object transformation matrix resulting from the current active transformation +/// list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ float3 optixTransformPointFromWorldToObjectSpace( float3 point ); + +/// Transforms the vector using world-to-object transformation matrix resulting from the current active transformation +/// list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ float3 optixTransformVectorFromWorldToObjectSpace( float3 vec ); + +/// Transforms the normal using world-to-object transformation matrix resulting from the current active transformation +/// list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ float3 optixTransformNormalFromWorldToObjectSpace( float3 normal ); + +/// Transforms the point using object-to-world transformation matrix resulting from the current active transformation +/// list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ float3 optixTransformPointFromObjectToWorldSpace( float3 point ); + +/// Transforms the vector using object-to-world transformation matrix resulting from the current active transformation +/// list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ float3 optixTransformVectorFromObjectToWorldSpace( float3 vec ); + +/// Transforms the normal using object-to-world transformation matrix resulting from the current active transformation +/// list. +/// +/// The cost of this function may be proportional to the size of the transformation list. +static __forceinline__ __device__ float3 optixTransformNormalFromObjectToWorldSpace( float3 normal ); + +/// Returns the number of transforms on the current transform list. +/// +/// Only available in IS, AH, CH, EX +static __forceinline__ __device__ unsigned int optixGetTransformListSize(); + +/// Returns the traversable handle for a transform on the current transform list. +/// +/// Only available in IS, AH, CH, EX +static __forceinline__ __device__ OptixTraversableHandle optixGetTransformListHandle( unsigned int index ); + + +/// Returns the transform type of a traversable handle from a transform list. +static __forceinline__ __device__ OptixTransformType optixGetTransformTypeFromHandle( OptixTraversableHandle handle ); + +/// Returns a pointer to a OptixStaticTransform from its traversable handle. +/// +/// Returns 0 if the traversable is not of type OPTIX_TRANSFORM_TYPE_STATIC_TRANSFORM. +static __forceinline__ __device__ const OptixStaticTransform* optixGetStaticTransformFromHandle( OptixTraversableHandle handle ); + +/// Returns a pointer to a OptixSRTMotionTransform from its traversable handle. +/// +/// Returns 0 if the traversable is not of type OPTIX_TRANSFORM_TYPE_SRT_MOTION_TRANSFORM. +static __forceinline__ __device__ const OptixSRTMotionTransform* optixGetSRTMotionTransformFromHandle( OptixTraversableHandle handle ); + +/// Returns a pointer to a OptixMatrixMotionTransform from its traversable handle. +/// +/// Returns 0 if the traversable is not of type OPTIX_TRANSFORM_TYPE_MATRIX_MOTION_TRANSFORM. +static __forceinline__ __device__ const OptixMatrixMotionTransform* optixGetMatrixMotionTransformFromHandle( OptixTraversableHandle handle ); + +/// Returns instanceId from an OptixInstance traversable. +/// +/// Returns 0 if the traversable handle does not reference an OptixInstance. +static __forceinline__ __device__ unsigned int optixGetInstanceIdFromHandle( OptixTraversableHandle handle ); + +/// Returns child traversable handle from an OptixInstance traversable. +/// +/// Returns 0 if the traversable handle does not reference an OptixInstance. +static __forceinline__ __device__ OptixTraversableHandle optixGetInstanceChildFromHandle( OptixTraversableHandle handle ); + +/// Returns object-to-world transform from an OptixInstance traversable. +/// +/// Returns 0 if the traversable handle does not reference an OptixInstance. +static __forceinline__ __device__ const float4* optixGetInstanceTransformFromHandle( OptixTraversableHandle handle ); + +/// Returns world-to-object transform from an OptixInstance traversable. +/// +/// Returns 0 if the traversable handle does not reference an OptixInstance. +static __forceinline__ __device__ const float4* optixGetInstanceInverseTransformFromHandle( OptixTraversableHandle handle ); + +/// Reports an intersections (overload without attributes). +/// +/// If optixGetRayTmin() <= hitT <= optixGetRayTmax(), the any hit program associated with this intersection program (via the SBT entry) is called. +/// The AH program can do one of three things: +/// 1. call optixIgnoreIntersection - no hit is recorded, optixReportIntersection returns false +/// 2. call optixTerminateRay - hit is recorded, optixReportIntersection does not return, no further traversal occurs, +/// and the associated closest hit program is called +/// 3. neither - hit is recorded, optixReportIntersection returns true +/// hitKind - Only the 7 least significant bits should be written [0..127]. Any values above 127 are reserved for built in intersection. The value can be queried with optixGetHitKind() in AH and CH. +/// +/// The attributes specified with a0..a7 are available in the AH and CH programs. +/// Note that the attributes available in the CH program correspond to the closest recorded intersection. +/// The number of attributes in registers and memory can be configured in the pipeline. +/// +/// \param[in] hitT +/// \param[in] hitKind +static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind ); + +/// Reports an intersection (overload with 1 attribute register). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind, unsigned int a0 ); + +/// Reports an intersection (overload with 2 attribute registers). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind, unsigned int a0, unsigned int a1 ); + +/// Reports an intersection (overload with 3 attribute registers). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, unsigned int hitKind, unsigned int a0, unsigned int a1, unsigned int a2 ); + +/// Reports an intersection (overload with 4 attribute registers). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3 ); + +/// Reports an intersection (overload with 5 attribute registers). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3, + unsigned int a4 ); + +/// Reports an intersection (overload with 6 attribute registers). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3, + unsigned int a4, + unsigned int a5 ); + +/// Reports an intersection (overload with 7 attribute registers). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3, + unsigned int a4, + unsigned int a5, + unsigned int a6 ); + +/// Reports an intersection (overload with 8 attribute registers). +/// +/// \see #optixReportIntersection(float,unsigned int) +static __forceinline__ __device__ bool optixReportIntersection( float hitT, + unsigned int hitKind, + unsigned int a0, + unsigned int a1, + unsigned int a2, + unsigned int a3, + unsigned int a4, + unsigned int a5, + unsigned int a6, + unsigned int a7 ); + +/// Returns the attribute at slot 0. +static __forceinline__ __device__ unsigned int optixGetAttribute_0(); +/// Returns the attribute at slot 1. +static __forceinline__ __device__ unsigned int optixGetAttribute_1(); +/// Returns the attribute at slot 2. +static __forceinline__ __device__ unsigned int optixGetAttribute_2(); +/// Returns the attribute at slot 3. +static __forceinline__ __device__ unsigned int optixGetAttribute_3(); +/// Returns the attribute at slot 4. +static __forceinline__ __device__ unsigned int optixGetAttribute_4(); +/// Returns the attribute at slot 5. +static __forceinline__ __device__ unsigned int optixGetAttribute_5(); +/// Returns the attribute at slot 6. +static __forceinline__ __device__ unsigned int optixGetAttribute_6(); +/// Returns the attribute at slot 7. +static __forceinline__ __device__ unsigned int optixGetAttribute_7(); + +/// Record the hit, stops traversal, and proceeds to CH. +/// +/// Available only in AH. +static __forceinline__ __device__ void optixTerminateRay(); + +/// Discards the hit, and returns control to the calling optixReportIntersection or built-in intersection routine. +/// +/// Available only in AH. +static __forceinline__ __device__ void optixIgnoreIntersection(); + + +/// For a given OptixBuildInputTriangleArray the number of primitives is defined as +/// "(OptixBuildInputTriangleArray::indexBuffer == 0) ? OptixBuildInputTriangleArray::numVertices/3 : +/// OptixBuildInputTriangleArray::numIndexTriplets;". +/// For a given OptixBuildInputCustomPrimitiveArray the number of primitives is defined as +/// numAabbs. +/// +/// The primitive index returns the index into the array of primitives +/// plus the primitiveIndexOffset. +/// +/// In IS and AH this corresponds to the currently intersected primitive. +/// In CH this corresponds to the primitive index of the closest intersected primitive. +static __forceinline__ __device__ unsigned int optixGetPrimitiveIndex(); + +/// Returns the Sbt GAS index of the primitive associated with the current intersection. +/// +/// In IS and AH this corresponds to the currently intersected primitive. +/// In CH this corresponds to the Sbt GAS index of the closest intersected primitive. +/// In EX with exception code OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_HIT_SBT corresponds to the sbt index within the hit GAS. Returns zero for all other exceptions. +static __forceinline__ __device__ unsigned int optixGetSbtGASIndex(); + + +/// Returns the OptixInstance::instanceId of the instance within the top level acceleration structure associated with the current intersection. +/// +/// When building an acceleration structure using OptixBuildInputInstanceArray each OptixInstance has a user supplied instanceId. +/// OptixInstance objects reference another acceleration structure. During traversal the acceleration structures are visited top down. +/// In the IS and AH programs the OptixInstance::instanceId corresponding to the most recently visited OptixInstance is returned when calling optixGetInstanceId(). +/// In CH optixGetInstanceId() returns the OptixInstance::instanceId when the hit was recorded with optixReportIntersection. +/// In the case where there is no OptixInstance visited, optixGetInstanceId returns ~0u +static __forceinline__ __device__ unsigned int optixGetInstanceId(); + +/// Returns the zero-based index of the instance within its instance acceleration structure associated with the current intersection. +/// +/// In the IS and AH programs the index corresponding to the most recently visited OptixInstance is returned when calling optixGetInstanceIndex(). +/// In CH optixGetInstanceIndex() returns the index when the hit was recorded with optixReportIntersection. +/// In the case where there is no OptixInstance visited, optixGetInstanceIndex returns 0 +static __forceinline__ __device__ unsigned int optixGetInstanceIndex(); + +/// Returns the 8 bit hit kind associated with the current hit. +/// +/// Use optixGetPrimitiveType() to interpret the hit kind. +/// For custom intersections (primitive type OPTIX_PRIMITIVE_TYPE_CUSTOM), +/// this is the 7-bit hitKind passed to optixReportIntersection(). +/// Hit kinds greater than 127 are reserved for built-in primitives. +/// +/// Available only in AH and CH. +static __forceinline__ __device__ unsigned int optixGetHitKind(); + +/// Function interpreting the result of #optixGetHitKind(). +static __forceinline__ __device__ OptixPrimitiveType optixGetPrimitiveType( unsigned int hitKind ); + +/// Function interpreting the result of #optixGetHitKind(). +static __forceinline__ __device__ bool optixIsFrontFaceHit( unsigned int hitKind ); + +/// Function interpreting the result of #optixGetHitKind(). +static __forceinline__ __device__ bool optixIsBackFaceHit( unsigned int hitKind ); + +/// Function interpreting the hit kind associated with the current optixReportIntersection. +static __forceinline__ __device__ OptixPrimitiveType optixGetPrimitiveType(); + +/// Function interpreting the hit kind associated with the current optixReportIntersection. +static __forceinline__ __device__ bool optixIsFrontFaceHit(); + +/// Function interpreting the hit kind associated with the current optixReportIntersection. +static __forceinline__ __device__ bool optixIsBackFaceHit(); + +/// Convenience function interpreting the result of #optixGetHitKind(). +static __forceinline__ __device__ bool optixIsTriangleHit(); + +/// Convenience function interpreting the result of #optixGetHitKind(). +static __forceinline__ __device__ bool optixIsTriangleFrontFaceHit(); + +/// Convenience function interpreting the result of #optixGetHitKind(). +static __forceinline__ __device__ bool optixIsTriangleBackFaceHit(); + +/// Convenience function that returns the first two attributes as floats. +/// +/// When using OptixBuildInputTriangleArray objects, during intersection the barycentric +/// coordinates are stored into the first two attribute registers. +static __forceinline__ __device__ float2 optixGetTriangleBarycentrics(); + +/// Convenience function that returns the curve parameter. +/// +/// When using OptixBuildInputCurveArray objects, during intersection the curve parameter +/// is stored into the first attribute register. +static __forceinline__ __device__ float optixGetCurveParameter(); + +/// Available in any program, it returns the current launch index within the launch dimensions specified by optixLaunch on the host. +/// +/// The raygen program is typically only launched once per launch index. +static __forceinline__ __device__ uint3 optixGetLaunchIndex(); + +/// Available in any program, it returns the dimensions of the current launch specified by optixLaunch on the host. +static __forceinline__ __device__ uint3 optixGetLaunchDimensions(); + +/// Returns the generic memory space pointer to the data region (past the header) of the currently active SBT record corresponding to the current program. +static __forceinline__ __device__ CUdeviceptr optixGetSbtDataPointer(); + +/// Throws a user exception with the given exception code (overload without exception details). +/// +/// The exception code must be in the range from 0 to 2^30 - 1. Up to 8 optional exception details can be passed. They +/// can be queried in the EX program using optixGetExceptionDetail_0() to ..._8(). +/// +/// The exception details must not be used to encode pointers to the stack since the current stack is not preserved in +/// the EX program. +/// +/// Not available in EX. +/// +/// \param[in] exceptionCode The exception code to be thrown. +static __forceinline__ __device__ void optixThrowException( int exceptionCode ); + +/// Throws a user exception with the given exception code (overload with 1 exception detail). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, unsigned int exceptionDetail0 ); + +/// Throws a user exception with the given exception code (overload with 2 exception details). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, + unsigned int exceptionDetail0, + unsigned int exceptionDetail1 ); + +/// Throws a user exception with the given exception code (overload with 3 exception details). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, + unsigned int exceptionDetail0, + unsigned int exceptionDetail1, + unsigned int exceptionDetail2 ); + +/// Throws a user exception with the given exception code (overload with 4 exception details). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, + unsigned int exceptionDetail0, + unsigned int exceptionDetail1, + unsigned int exceptionDetail2, + unsigned int exceptionDetail3 ); + +/// Throws a user exception with the given exception code (overload with 5 exception details). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, + unsigned int exceptionDetail0, + unsigned int exceptionDetail1, + unsigned int exceptionDetail2, + unsigned int exceptionDetail3, + unsigned int exceptionDetail4 ); + +/// Throws a user exception with the given exception code (overload with 6 exception details). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, + unsigned int exceptionDetail0, + unsigned int exceptionDetail1, + unsigned int exceptionDetail2, + unsigned int exceptionDetail3, + unsigned int exceptionDetail4, + unsigned int exceptionDetail5 ); + +/// Throws a user exception with the given exception code (overload with 7 exception details). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, + unsigned int exceptionDetail0, + unsigned int exceptionDetail1, + unsigned int exceptionDetail2, + unsigned int exceptionDetail3, + unsigned int exceptionDetail4, + unsigned int exceptionDetail5, + unsigned int exceptionDetail6 ); + +/// Throws a user exception with the given exception code (overload with 8 exception details). +/// +/// \see #optixThrowException(int) +static __forceinline__ __device__ void optixThrowException( int exceptionCode, + unsigned int exceptionDetail0, + unsigned int exceptionDetail1, + unsigned int exceptionDetail2, + unsigned int exceptionDetail3, + unsigned int exceptionDetail4, + unsigned int exceptionDetail5, + unsigned int exceptionDetail6, + unsigned int exceptionDetail7 ); + +/// Returns the exception code. +/// +/// Only available in EX. +static __forceinline__ __device__ int optixGetExceptionCode(); + +/// Returns the 32-bit exception detail at slot 0. +/// +/// The behavior is undefined if the exception is not a user exception, or the used overload #optixThrowException() did +/// not provide the queried exception detail. +/// +/// Only available in EX. +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_0(); + +/// Returns the 32-bit exception detail at slot 1. +/// +/// \see #optixGetExceptionDetail_0() +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_1(); + +/// Returns the 32-bit exception detail at slot 2. +/// +/// \see #optixGetExceptionDetail_0() +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_2(); + +/// Returns the 32-bit exception detail at slot 3. +/// +/// \see #optixGetExceptionDetail_0() +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_3(); + +/// Returns the 32-bit exception detail at slot 4. +/// +/// \see #optixGetExceptionDetail_0() +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_4(); + +/// Returns the 32-bit exception detail at slot 5. +/// +/// \see #optixGetExceptionDetail_0() +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_5(); + +/// Returns the 32-bit exception detail at slot 6. +/// +/// \see #optixGetExceptionDetail_0() +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_6(); + +/// Returns the 32-bit exception detail at slot 7. +/// +/// \see #optixGetExceptionDetail_0() +static __forceinline__ __device__ unsigned int optixGetExceptionDetail_7(); + +/// Returns the invalid traversable handle for exceptions with exception code OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_TRAVERSABLE. +/// +/// Returns zero for all other exception codes. +/// +/// Only available in EX. +static __forceinline__ __device__ OptixTraversableHandle optixGetExceptionInvalidTraversable(); + +/// Returns the invalid sbt offset for exceptions with exception code OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_MISS_SBT and OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_HIT_SBT. +/// +/// Returns zero for all other exception codes. +/// +/// Only available in EX. +static __forceinline__ __device__ int optixGetExceptionInvalidSbtOffset(); + +/// Returns the invalid ray for exceptions with exception code OPTIX_EXCEPTION_CODE_INVALID_RAY. +/// Exceptions of type OPTIX_EXCEPTION_CODE_INVALID_RAY are thrown when one or more values that were +/// passed into optixTrace are either inf or nan. +/// +/// OptixInvalidRayExceptionDetails::rayTime will always be 0 if OptixPipelineCompileOptions::usesMotionBlur is 0. +/// Values in the returned struct are all zero for all other exception codes. +/// +/// Only available in EX. +static __forceinline__ __device__ OptixInvalidRayExceptionDetails optixGetExceptionInvalidRay(); + +/// Returns information about an exception with code OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH. +/// +/// Exceptions of type OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH are called when the number of +/// arguments that were passed into a call to optixDirectCall or optixContinuationCall does not match +/// the number of parameters of the callable that is called. +/// Note that the parameters are packed by OptiX into individual 32 bit values, so the number of +/// expected and passed values may not correspond to the number of arguments passed into optixDirectCall +/// or optixContinuationCall. +/// +/// Values in the returned struct are all zero for all other exception codes. +/// +/// Only available in EX. +static __forceinline__ __device__ OptixParameterMismatchExceptionDetails optixGetExceptionParameterMismatch(); + +/// Returns a string that includes information about the source location that caused the current exception. +/// +/// The source location is only available for exceptions of type OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH, +/// OPTIX_EXCEPTION_CODE_UNSUPPORTED_PRIMITIVE_TYPE, OPTIX_EXCEPTION_CODE_INVALID_RAY, and for user exceptions. +/// Line information needs to be present in the input PTX and OptixModuleCompileOptions::debugLevel +/// may not be set to OPTIX_COMPILE_DEBUG_LEVEL_NONE. +/// +/// Returns a NULL pointer if no line information is available. +/// +/// Only available in EX. +static __forceinline__ __device__ char* optixGetExceptionLineInfo(); + +/// Creates a call to the direct callable program at the specified SBT entry. +/// +/// This will call the program that was specified in the OptixProgramGroupCallables::entryFunctionNameDC in the +/// module specified by OptixProgramGroupCallables::moduleDC. +/// The address of the SBT entry is calculated by OptixShaderBindingTable::callablesRecordBase + ( OptixShaderBindingTable::callablesRecordStrideInBytes * sbtIndex ). +/// +/// Behavior is undefined if there is no direct callable program at the specified SBT entry. +/// +/// Behavior is undefined if the number of arguments that are being passed in does not match the number of +/// parameters expected by the program that is called. In that case an exception of type OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH +/// will be thrown if OPTIX_EXCEPTION_FLAG_DEBUG was specified for the OptixPipelineCompileOptions::exceptionFlags. +/// +/// \param[in] sbtIndex The offset of the SBT entry of the direct callable program to call relative to OptixShaderBindingTable::callablesRecordBase. +/// \param[in] args The arguments to pass to the direct callable program. +template <typename ReturnT, typename... ArgTypes> +static __forceinline__ __device__ ReturnT optixDirectCall( unsigned int sbtIndex, ArgTypes... args ); + + +/// Creates a call to the continuation callable program at the specified SBT entry. +/// +/// This will call the program that was specified in the OptixProgramGroupCallables::entryFunctionNameCC in the +/// module specified by OptixProgramGroupCallables::moduleCC. +/// The address of the SBT entry is calculated by OptixShaderBindingTable::callablesRecordBase + ( OptixShaderBindingTable::callablesRecordStrideInBytes * sbtIndex ). +/// As opposed to direct callable programs, continuation callable programs are allowed to call optixTrace recursively. +/// +/// Behavior is undefined if there is no continuation callable program at the specified SBT entry. +/// +/// Behavior is undefined if the number of arguments that are being passed in does not match the number of +/// parameters expected by the program that is called. In that case an exception of type OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH +/// will be thrown if OPTIX_EXCEPTION_FLAG_DEBUG was specified for the OptixPipelineCompileOptions::exceptionFlags. +/// +/// \param[in] sbtIndex The offset of the SBT entry of the continuation callable program to call relative to OptixShaderBindingTable::callablesRecordBase. +/// \param[in] args The arguments to pass to the continuation callable program. +template <typename ReturnT, typename... ArgTypes> +static __forceinline__ __device__ ReturnT optixContinuationCall( unsigned int sbtIndex, ArgTypes... args ); + + +/// optixTexFootprint2D calculates the footprint of a corresponding 2D texture fetch (non-mipmapped). +/// +/// On Turing and subsequent architectures, a texture footprint instruction allows user programs to +/// determine the set of texels that would be accessed by an equivalent filtered texture lookup. +/// +/// \param[in] tex CUDA texture object (cast to 64-bit integer) +/// \param[in] texInfo Texture info packed into 32-bit integer, described below. +/// \param[in] x Texture coordinate +/// \param[in] y Texture coordinate +/// \param[out] singleMipLevel Result indicating whether the footprint spans only a single miplevel. +/// +/// The texture info argument is a packed 32-bit integer with the following layout: +/// +/// texInfo[31:29] = reserved (3 bits) +/// texInfo[28:24] = miplevel count (5 bits) +/// texInfo[23:20] = log2 of tile width (4 bits) +/// texInfo[19:16] = log2 of tile height (4 bits) +/// texInfo[15:10] = reserved (6 bits) +/// texInfo[9:8] = horizontal wrap mode (2 bits) (CUaddress_mode) +/// texInfo[7:6] = vertical wrap mode (2 bits) (CUaddress_mode) +/// texInfo[5] = mipmap filter mode (1 bit) (CUfilter_mode) +/// texInfo[4:0] = maximum anisotropy (5 bits) +/// +/// Returns a 16-byte structure (as a uint4) that stores the footprint of a texture request at a +/// particular "granularity", which has the following layout: +/// +/// struct Texture2DFootprint +/// { +/// unsigned long long mask; +/// unsigned int tileY : 12; +/// unsigned int reserved1 : 4; +/// unsigned int dx : 3; +/// unsigned int dy : 3; +/// unsigned int reserved2 : 2; +/// unsigned int granularity : 4; +/// unsigned int reserved3 : 4; +/// unsigned int tileX : 12; +/// unsigned int level : 4; +/// unsigned int reserved4 : 16; +/// }; +/// +/// The granularity indicates the size of texel groups that are represented by an 8x8 bitmask. For +/// example, a granularity of 12 indicates texel groups that are 128x64 texels in size. In a +/// footprint call, The returned granularity will either be the actual granularity of the result, or +/// 0 if the footprint call was able to honor the requested granularity (the usual case). +/// +/// level is the mip level of the returned footprint. Two footprint calls are needed to get the +/// complete footprint when a texture call spans multiple mip levels. +/// +/// mask is an 8x8 bitmask of texel groups that are covered, or partially covered, by the footprint. +/// tileX and tileY give the starting position of the mask in 8x8 texel-group blocks. For example, +/// suppose a granularity of 12 (128x64 texels), and tileX=3 and tileY=4. In this case, bit 0 of the +/// mask (the low order bit) corresponds to texel group coordinates (3*8, 4*8), and texel +/// coordinates (3*8*128, 4*8*64), within the specified mip level. +/// +/// If nonzero, dx and dy specify a "toroidal rotation" of the bitmask. Toroidal rotation of a +/// coordinate in the mask simply means that its value is reduced by 8. Continuing the example from +/// above, if dx=0 and dy=0 the mask covers texel groups (3*8, 4*8) to (3*8+7, 4*8+7) inclusive. +/// If, on the other hand, dx=2, the rightmost 2 columns in the mask have their x coordinates +/// reduced by 8, and similarly for dy. +/// +/// See the OptiX SDK for sample code that illustrates how to unpack the result. +static __forceinline__ __device__ uint4 optixTexFootprint2D( unsigned long long tex, unsigned int texInfo, float x, float y, unsigned int* singleMipLevel ); + +/// optixTexFootprint2DLod calculates the footprint of a corresponding 2D texture fetch (tex2DLod) +/// \param[in] tex CUDA texture object (cast to 64-bit integer) +/// \param[in] texInfo Texture info packed into 32-bit integer, described below. +/// \param[in] x Texture coordinate +/// \param[in] y Texture coordinate +/// \param[in] level Level of detail (lod) +/// \param[in] coarse Requests footprint from coarse miplevel, when the footprint spans two levels. +/// \param[out] singleMipLevel Result indicating whether the footprint spans only a single miplevel. +/// \see #optixTexFootprint2D(unsigned long long,unsigned int,float,float,unsigned int*) +static __forceinline__ __device__ uint4 +optixTexFootprint2DLod( unsigned long long tex, unsigned int texInfo, float x, float y, float level, bool coarse, unsigned int* singleMipLevel ); + +/// optixTexFootprint2DGrad calculates the footprint of a corresponding 2D texture fetch (tex2DGrad) +/// \param[in] tex CUDA texture object (cast to 64-bit integer) +/// \param[in] texInfo Texture info packed into 32-bit integer, described below. +/// \param[in] x Texture coordinate +/// \param[in] y Texture coordinate +/// \param[in] dPdx_x Derivative of x coordinte, which determines level of detail. +/// \param[in] dPdx_y Derivative of x coordinte, which determines level of detail. +/// \param[in] dPdy_x Derivative of y coordinte, which determines level of detail. +/// \param[in] dPdy_y Derivative of y coordinte, which determines level of detail. +/// \param[in] coarse Requests footprint from coarse miplevel, when the footprint spans two levels. +/// \param[out] singleMipLevel Result indicating whether the footprint spans only a single miplevel. +/// \see #optixTexFootprint2D(unsigned long long,unsigned int,float,float,unsigned int*) +static __forceinline__ __device__ uint4 optixTexFootprint2DGrad( unsigned long long tex, + unsigned int texInfo, + float x, + float y, + float dPdx_x, + float dPdx_y, + float dPdy_x, + float dPdy_y, + bool coarse, + unsigned int* singleMipLevel ); + +/*@}*/ // end group optix_device_api + +#include "internal/optix_7_device_impl.h" + +#endif // __optix_optix_7_device_h__ diff --git a/dependencies/optix/optix_7_host.h b/dependencies/optix/optix_7_host.h new file mode 100644 index 0000000000000000000000000000000000000000..abafbab80f325c64f58d2748dd34cca21f9abb1e --- /dev/null +++ b/dependencies/optix/optix_7_host.h @@ -0,0 +1,993 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and proprietary + * rights in and to this software, related documentation and any modifications thereto. + * Any use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation is strictly + * prohibited. + * + * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* + * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, + * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY + * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT + * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF + * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR + * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGES + */ + +/// @file +/// @author NVIDIA Corporation +/// @brief OptiX public API header +/// +/// OptiX host include file -- includes the host api if compiling host code. +/// For the math library routines include optix_math.h + +#if !defined( __OPTIX_INCLUDE_INTERNAL_HEADERS__ ) +#error("optix_7_host.h is an internal header file and must not be used directly. Please use optix_host.h or optix.h instead.") +#endif + +#ifndef __optix_optix_7_host_h__ +#define __optix_optix_7_host_h__ + +#include "optix_7_types.h" +#if !defined( OPTIX_DONT_INCLUDE_CUDA ) +// If OPTIX_DONT_INCLUDE_CUDA is defined, cuda driver types must be defined through other +// means before including optix headers. +#include <cuda.h> +#endif + + + +#ifdef __cplusplus +extern "C" { +#endif + +/// \defgroup optix_host_api Host API +/// \brief OptiX Host API + +/// \defgroup optix_host_api_error_handling Error handling +/// \ingroup optix_host_api +//@{ + +/// Returns a string containing the name of an error code in the enum. +/// +/// Output is a string representation of the enum. For example "OPTIX_SUCCESS" for +/// OPTIX_SUCCESS and "OPTIX_ERROR_INVALID_VALUE" for OPTIX_ERROR_INVALID_VALUE. +/// +/// If the error code is not recognized, "Unrecognized OptixResult code" is returned. +/// +/// \param[in] result OptixResult enum to generate string name for +/// +/// \see #optixGetErrorString +const char* optixGetErrorName( OptixResult result ); + +/// Returns the description string for an error code. +/// +/// Output is a string description of the enum. For example "Success" for +/// OPTIX_SUCCESS and "Invalid value" for OPTIX_ERROR_INVALID_VALUE. +/// +/// If the error code is not recognized, "Unrecognized OptixResult code" is returned. +/// +/// \param[in] result OptixResult enum to generate string description for +/// +/// \see #optixGetErrorName +const char* optixGetErrorString( OptixResult result ); + +//@} +/// \defgroup optix_host_api_device_context Device context +/// \ingroup optix_host_api +//@{ + +/// Create a device context associated with the CUDA context specified with 'fromContext'. +/// +/// If zero is specified for 'fromContext', OptiX will use the current CUDA context. The +/// CUDA context should be initialized before calling optixDeviceContextCreate. +/// +/// \param[in] fromContext +/// \param[in] options +/// \param[out] context +/// \return +/// - OPTIX_ERROR_CUDA_NOT_INITIALIZED +/// If using zero for 'fromContext' and CUDA has not been initialized yet on the calling +/// thread. +/// - OPTIX_ERROR_CUDA_ERROR +/// CUDA operation failed. +/// - OPTIX_ERROR_HOST_OUT_OF_MEMORY +/// Heap allocation failed. +/// - OPTIX_ERROR_INTERNAL_ERROR +/// Internal error +OptixResult optixDeviceContextCreate( CUcontext fromContext, const OptixDeviceContextOptions* options, OptixDeviceContext* context ); + +/// Destroys all CPU and GPU state associated with the device. +/// +/// It will attempt to block on CUDA streams that have launch work outstanding. +/// +/// Any API objects, such as OptixModule and OptixPipeline, not already destroyed will be +/// destroyed. +/// +/// Thread safety: A device context must not be destroyed while it is still in use by concurrent API calls in other threads. +OptixResult optixDeviceContextDestroy( OptixDeviceContext context ); + +/// Query properties of a device context. +/// +/// \param[in] context the device context to query the property for +/// \param[in] property the property to query +/// \param[out] value pointer to the returned +/// \param[in] sizeInBytes size of output +OptixResult optixDeviceContextGetProperty( OptixDeviceContext context, OptixDeviceProperty property, void* value, size_t sizeInBytes ); + +/// Sets the current log callback method. +/// +/// See #OptixLogCallback for more details. +/// +/// Thread safety: It is guaranteed that the callback itself (callbackFunction and callbackData) are updated atomically. +/// It is not guaranteed that the callback itself (callbackFunction and callbackData) and the callbackLevel are updated +/// atomically. It is unspecified when concurrent API calls using the same context start to make use of the new +/// callback method. +/// +/// \param[in] context the device context +/// \param[in] callbackFunction the callback function to call +/// \param[in] callbackData pointer to data passed to callback function while invoking it +/// \param[in] callbackLevel callback level +OptixResult optixDeviceContextSetLogCallback( OptixDeviceContext context, + OptixLogCallback callbackFunction, + void* callbackData, + unsigned int callbackLevel ); + +/// Enables or disables the disk cache. +/// +/// If caching was previously disabled, enabling it will attempt to initialize +/// the disk cache database using the currently configured cache location. An +/// error will be returned if initialization fails. +/// +/// Note that no in-memory cache is used, so no caching behavior will be observed if the disk cache +/// is disabled. +/// +/// The cache can be disabled by setting the environment variable OPTIX_CACHE_MAXSIZE=0. +/// The environment variable takes precedence over this setting. +/// See #optixDeviceContextSetCacheDatabaseSizes for additional information. +/// +/// Note that the disk cache can be disabled by the environment variable, but it cannot be enabled +/// via the environment if it is disabled via the API. +/// +/// \param[in] context the device context +/// \param[in] enabled 1 to enabled, 0 to disable +OptixResult optixDeviceContextSetCacheEnabled( OptixDeviceContext context, + int enabled ); + +/// Sets the location of the disk cache. +/// +/// The location is specified by a directory. This directory should not be used for other purposes +/// and will be created if it does not exist. An error will be returned if is not possible to +/// create the disk cache at the specified location for any reason (e.g., the path is invalid or +/// the directory is not writable). Caching will be disabled if the disk cache cannot be +/// initialized in the new location. If caching is disabled, no error will be returned until caching +/// is enabled. If the disk cache is located on a network file share, behavior is undefined. +/// +/// The location of the disk cache can be overridden with the environment variable OPTIX_CACHE_PATH. +/// The environment variable takes precedence over this setting. +/// +/// The default location depends on the operating system: +/// - Windows: %LOCALAPPDATA%\\NVIDIA\\OptixCache +/// - Linux: /var/tmp/OptixCache_\<username\> (or /tmp/OptixCache_\<username\> if the first choice is not usable), +/// the underscore and username suffix are omitted if the username cannot be obtained +/// - MacOS X: /Library/Application Support/NVIDIA/OptixCache +/// +/// \param[in] context the device context +/// \param[in] location directory of disk cache +OptixResult optixDeviceContextSetCacheLocation( OptixDeviceContext context, const char* location ); + +/// Sets the low and high water marks for disk cache garbage collection. +/// +/// Garbage collection is triggered when a new entry is written to the cache and +/// the current cache data size plus the size of the cache entry that is about +/// to be inserted exceeds the high water mark. Garbage collection proceeds until +/// the size reaches the low water mark. Garbage collection will always free enough +/// space to insert the new entry without exceeding the low water mark. Setting +/// either limit to zero will disable garbage collection. An error will be returned +/// if both limits are non-zero and the high water mark is smaller than the low water mark. +/// +/// Note that garbage collection is performed only on writes to the disk cache. No garbage +/// collection is triggered on disk cache initialization or immediately when calling this function, +/// but on subsequent inserting of data into the database. +/// +/// If the size of a compiled module exceeds the value configured for the high water +/// mark and garbage collection is enabled, the module will not be added to the cache +/// and a warning will be added to the log. +/// +/// The high water mark can be overridden with the environment variable OPTIX_CACHE_MAXSIZE. +/// The environment variable takes precedence over the function parameters. The low water mark +/// will be set to half the value of OPTIX_CACHE_MAXSIZE. Setting OPTIX_CACHE_MAXSIZE to 0 will +/// disable the disk cache, but will not alter the contents of the cache. Negative and non-integer +/// values will be ignored. +/// +/// \param[in] context the device context +/// \param[in] lowWaterMark the low water mark +/// \param[in] highWaterMark the high water mark +OptixResult optixDeviceContextSetCacheDatabaseSizes( OptixDeviceContext context, size_t lowWaterMark, size_t highWaterMark ); + +/// Indicates whether the disk cache is enabled or disabled. +/// +/// \param[in] context the device context +/// \param[out] enabled 1 if enabled, 0 if disabled +OptixResult optixDeviceContextGetCacheEnabled( OptixDeviceContext context, int* enabled ); +/// Returns the location of the disk cache. If the cache has been disabled by setting the environment +/// variable OPTIX_CACHE_MAXSIZE=0, this function will return an empy string. +/// +/// \param[in] context the device context +/// \param[out] location directory of disk cache, null terminated if locationSize > 0 +/// \param[in] locationSize locationSize +OptixResult optixDeviceContextGetCacheLocation( OptixDeviceContext context, char* location, size_t locationSize ); + +/// Returns the low and high water marks for disk cache garbage collection. If the cache has been disabled by +/// setting the environment variable OPTIX_CACHE_MAXSIZE=0, this function will return 0 for the low and high +/// water marks. +/// +/// \param[in] context the device context +/// \param[out] lowWaterMark the low water mark +/// \param[out] highWaterMark the high water mark +OptixResult optixDeviceContextGetCacheDatabaseSizes( OptixDeviceContext context, size_t* lowWaterMark, size_t* highWaterMark ); + +//@} +/// \defgroup optix_host_api_pipelines Pipelines +/// \ingroup optix_host_api +//@{ + +/// logString is an optional buffer that contains compiler feedback and errors. This +/// information is also passed to the context logger (if enabled), however it may be +/// difficult to correlate output to the logger to specific API invocations when using +/// multiple threads. The output to logString will only contain feedback for this specific +/// invocation of this API call. +/// +/// logStringSize as input should be a pointer to the number of bytes backing logString. +/// Upon return it contains the length of the log message (including the null terminator) +/// which may be greater than the input value. In this case, the log message will be +/// truncated to fit into logString. +/// +/// If logString or logStringSize are NULL, no output is written to logString. If +/// logStringSize points to a value that is zero, no output is written. This does not +/// affect output to the context logger if enabled. +/// +/// \param[in] context +/// \param[in] pipelineCompileOptions +/// \param[in] pipelineLinkOptions +/// \param[in] programGroups array of ProgramGroup objects +/// \param[in] numProgramGroups number of ProgramGroup objects +/// \param[out] logString Information will be written to this string. If logStringSize > 0 logString will be null terminated. +/// \param[in,out] logStringSize +/// \param[out] pipeline +OptixResult optixPipelineCreate( OptixDeviceContext context, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const OptixPipelineLinkOptions* pipelineLinkOptions, + const OptixProgramGroup* programGroups, + unsigned int numProgramGroups, + char* logString, + size_t* logStringSize, + OptixPipeline* pipeline ); + +/// Thread safety: A pipeline must not be destroyed while it is still in use by concurrent API calls in other threads. +OptixResult optixPipelineDestroy( OptixPipeline pipeline ); + +/// Sets the stack sizes for a pipeline. +/// +/// Users are encouraged to see the programming guide and the implementations of the helper functions +/// to understand how to construct the stack sizes based on their particular needs. +/// +/// If this method is not used, an internal default implementation is used. The default implementation is correct (but +/// not necessarily optimal) as long as the maximum depth of call trees of CC and DC programs is at most 2 and no motion transforms are used. +/// +/// The maxTraversableGraphDepth responds to the maximal number of traversables visited when calling trace. +/// Every acceleration structure and motion transform count as one level of traversal. +/// E.g., for a simple IAS (instance acceleration structure) -> GAS (geometry acceleration structure) +/// traversal graph, the maxTraversableGraphDepth is two. +/// For IAS -> MT (motion transform) -> GAS, the maxTraversableGraphDepth is three. +/// Note that it does not matter whether a IAS or GAS has motion or not, it always counts as one. +/// Launching optix with exceptions turned on (see #OPTIX_EXCEPTION_FLAG_TRACE_DEPTH) will throw an exception +/// if the specified maxTraversableGraphDepth is too small. +/// +/// \param[in] pipeline The pipeline to configure the stack size for. +/// \param[in] directCallableStackSizeFromTraversal The direct stack size requirement for direct callables invoked from IS or AH. +/// \param[in] directCallableStackSizeFromState The direct stack size requirement for direct callables invoked from RG, MS, or CH. +/// \param[in] continuationStackSize The continuation stack requirement. +/// \param[in] maxTraversableGraphDepth The maximum depth of a traversable graph passed to trace. +OptixResult optixPipelineSetStackSize( OptixPipeline pipeline, + unsigned int directCallableStackSizeFromTraversal, + unsigned int directCallableStackSizeFromState, + unsigned int continuationStackSize, + unsigned int maxTraversableGraphDepth ); + +//@} +/// \defgroup optix_host_api_modules Modules +/// \ingroup optix_host_api +//@{ + +/// logString is an optional buffer that contains compiler feedback and errors. This +/// information is also passed to the context logger (if enabled), however it may be +/// difficult to correlate output to the logger to specific API invocations when using +/// multiple threads. The output to logString will only contain feedback for this specific +/// invocation of this API call. +/// +/// logStringSize as input should be a pointer to the number of bytes backing logString. +/// Upon return it contains the length of the log message (including the null terminator) +/// which may be greater than the input value. In this case, the log message will be +/// truncated to fit into logString. +/// +/// If logString or logStringSize are NULL, no output is written to logString. If +/// logStringSize points to a value that is zero, no output is written. This does not +/// affect output to the context logger if enabled. +/// +/// \param[in] context +/// \param[in] moduleCompileOptions +/// \param[in] pipelineCompileOptions All modules in a pipeline need to use the same values for the pipeline compile options. +/// \param[in] PTX Pointer to the PTX input string. +/// \param[in] PTXsize Parsing proceeds up to PTXsize characters, or the first NUL byte, whichever occurs first. +/// \param[out] logString Information will be written to this string. If logStringSize > 0 logString will be null terminated. +/// \param[in,out] logStringSize +/// \param[out] module +/// +/// \return OPTIX_ERROR_INVALID_VALUE - context is 0, moduleCompileOptions is 0, pipelineCompileOptions is 0, PTX is 0, module is 0. +OptixResult optixModuleCreateFromPTX( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const char* PTX, + size_t PTXsize, + char* logString, + size_t* logStringSize, + OptixModule* module ); + +/// This function is designed to do just enough work to create the OptixTask return +/// parameter and is expected to be fast enough run without needing parallel execution. A +/// single thread could generate all the OptixTask objects for further processing in a +/// work pool. +/// +/// Options are similar to #optixModuleCreateFromPTX(), aside from the return parameter, +/// firstTask. +/// +/// The memory used to hold the PTX should be live until all tasks are finished. +/// +/// It is illegal to call #optixModuleDestroy() if any OptixTask objects are currently +/// being executed. In that case OPTIX_ERROR_ILLEGAL_DURING_TASK_EXECUTE will be returned. +/// +/// If an invocation of optixTaskExecute fails, the OptixModule will be marked as +/// OPTIX_MODULE_COMPILE_STATE_IMPENDING_FAILURE if there are outstanding tasks or +/// OPTIX_MODULE_COMPILE_STATE_FAILURE if there are no outstanding tasks. Subsequent calls +/// to #optixTaskExecute() may execute additional work to collect compilation errors +/// generated from the input. Currently executing tasks will not necessarily be terminated +/// immediately but at the next opportunity. + +/// Logging will continue to be directed to the logger installed with the +/// OptixDeviceContext. If logString is provided to #optixModuleCreateFromPTXWithTasks(), +/// it will contain all the compiler feedback from all executed tasks. The lifetime of the +/// memory pointed to by logString should extend from calling +/// #optixModuleCreateFromPTXWithTasks() to when the compilation state is either +/// OPTIX_MODULE_COMPILE_STATE_FAILURE or OPTIX_MODULE_COMPILE_STATE_COMPLETED. OptiX will +/// not write to the logString outside of execution of +/// #optixModuleCreateFromPTXWithTasks() or #optixTaskExecute(). If the compilation state +/// is OPTIX_MODULE_COMPILE_STATE_IMPENDING_FAILURE and no further execution of +/// #optixTaskExecute() is performed the logString may be reclaimed by the application +/// before calling #optixModuleDestroy(). The contents of logString will contain output +/// from currently completed tasks. + +/// All OptixTask objects associated with a given OptixModule will be cleaned up when +/// #optixModuleDestroy() is called regardless of whether the compilation was successful +/// or not. If the compilation state is OPTIX_MODULE_COMPILE_STATE_IMPENDIND_FAILURE, any +/// unstarted OptixTask objects do not need to be executed though there is no harm doing +/// so. +/// +/// \see #optixModuleCreateFromPTX +OptixResult optixModuleCreateFromPTXWithTasks( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const char* PTX, + size_t PTXsize, + char* logString, + size_t* logStringSize, + OptixModule* module, + OptixTask* firstTask ); + +/// When creating a module with tasks, the current state of the module can be queried +/// using this function. +/// +/// Thread safety: Safe to call from any thread until optixModuleDestroy is called. +/// +/// \see #optixModuleCreateFromPTXWithTasks +OptixResult optixModuleGetCompilationState( OptixModule module, OptixModuleCompileState* state ); + +/// Call for OptixModule objects created with optixModuleCreateFromPTX and optixModuleDeserialize. +/// +/// Modules must not be destroyed while they are still used by any program group. +/// +/// Thread safety: A module must not be destroyed while it is still in use by concurrent API calls in other threads. +OptixResult optixModuleDestroy( OptixModule module ); + +/// Returns a module containing the intersection program for the built-in primitive type specified +/// by the builtinISOptions. This module must be used as the moduleIS for the OptixProgramGroupHitgroup +/// in any SBT record for that primitive type. (The entryFunctionNameIS should be null.) +OptixResult optixBuiltinISModuleGet( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const OptixBuiltinISOptions* builtinISOptions, + OptixModule* builtinModule ); + +//@} +/// \defgroup optix_host_api_tasks Tasks +/// \ingroup optix_host_api +//@{ + +/// Each OptixTask should be executed with #optixTaskExecute(). If additional parallel +/// work is found, new OptixTask objects will be returned in additionalTasks along with +/// the number of additional tasks in numAdditionalTasksCreated. The parameter +/// additionalTasks should point to a user allocated array of minimum size +/// maxNumAdditionalTasks. OptiX can generate upto maxNumAdditionalTasks additional tasks. +/// +/// Each task can be executed in parallel and in any order. +/// +/// Thread safety: Safe to call from any thread until #optixModuleDestroy() is called for +/// any associated task. +/// +/// \see #optixModuleCreateFromPTXWithTasks +/// +/// \param[in] task the OptixTask to execute +/// \param[in] additionalTasks pointer to array of OptixTask objects to be filled in +/// \param[in] maxNumAdditionalTasks maximum number of additional OptixTask objects +/// \param[out] numAdditionalTasksCreated number of OptixTask objects created by OptiX and written into #additionalTasks +OptixResult optixTaskExecute( OptixTask task, OptixTask* additionalTasks, unsigned int maxNumAdditionalTasks, unsigned int* numAdditionalTasksCreated ); + +//@} +/// \defgroup optix_host_api_program_groups Program groups +/// \ingroup optix_host_api +//@{ + +/// Returns the stack sizes for the given program group. +/// +/// \param[in] programGroup the program group +/// \param[out] stackSizes the corresponding stack sizes +OptixResult optixProgramGroupGetStackSize( OptixProgramGroup programGroup, OptixStackSizes* stackSizes ); + +/// logString is an optional buffer that contains compiler feedback and errors. This +/// information is also passed to the context logger (if enabled), however it may be +/// difficult to correlate output to the logger to specific API invocations when using +/// multiple threads. The output to logString will only contain feedback for this specific +/// invocation of this API call. +/// +/// logStringSize as input should be a pointer to the number of bytes backing logString. +/// Upon return it contains the length of the log message (including the null terminator) +/// which may be greater than the input value. In this case, the log message will be +/// truncated to fit into logString. +/// +/// If logString or logStringSize are NULL, no output is written to logString. If +/// logStringSize points to a value that is zero, no output is written. This does not +/// affect output to the context logger if enabled. +/// +/// Creates numProgramGroups OptiXProgramGroup objects from the specified +/// OptixProgramGroupDesc array. The size of the arrays must match. +/// +/// \param[in] context +/// \param[in] programDescriptions N * OptixProgramGroupDesc +/// \param[in] numProgramGroups N +/// \param[in] options +/// \param[out] logString Information will be written to this string. If logStringSize > 0 logString will be null terminated. +/// \param[in,out] logStringSize +/// \param[out] programGroups +OptixResult optixProgramGroupCreate( OptixDeviceContext context, + const OptixProgramGroupDesc* programDescriptions, + unsigned int numProgramGroups, + const OptixProgramGroupOptions* options, + char* logString, + size_t* logStringSize, + OptixProgramGroup* programGroups ); + +/// Thread safety: A program group must not be destroyed while it is still in use by concurrent API calls in other threads. +OptixResult optixProgramGroupDestroy( OptixProgramGroup programGroup ); + +//@} +/// \defgroup optix_host_api_launches Launches +/// \ingroup optix_host_api +//@{ + +/// Where the magic happens. +/// +/// The stream and pipeline must belong to the same device context. Multiple launches +/// may be issues in parallel from multiple threads to different streams. +/// +/// pipelineParamsSize number of bytes are copied from the device memory pointed to by +/// pipelineParams before launch. It is an error if pipelineParamsSize is greater than the +/// size of the variable declared in modules and identified by +/// OptixPipelineCompileOptions::pipelineLaunchParamsVariableName. If the launch params +/// variable was optimized out or not found in the modules linked to the pipeline then +/// the pipelineParams and pipelineParamsSize parameters are ignored. +/// +/// sbt points to the shader binding table, which defines shader +/// groupings and their resources. See the SBT spec. +/// +/// \param[in] pipeline +/// \param[in] stream +/// \param[in] pipelineParams +/// \param[in] pipelineParamsSize +/// \param[in] sbt +/// \param[in] width number of elements to compute +/// \param[in] height number of elements to compute +/// \param[in] depth number of elements to compute +/// +/// Thread safety: In the current implementation concurrent launches to the same pipeline are not +/// supported. Concurrent launches require separate OptixPipeline objects. +OptixResult optixLaunch( OptixPipeline pipeline, + CUstream stream, + CUdeviceptr pipelineParams, + size_t pipelineParamsSize, + const OptixShaderBindingTable* sbt, + unsigned int width, + unsigned int height, + unsigned int depth ); + +/// \param[in] programGroup the program group containing the program(s) +/// \param[out] sbtRecordHeaderHostPointer the result sbt record header +OptixResult optixSbtRecordPackHeader( OptixProgramGroup programGroup, void* sbtRecordHeaderHostPointer ); + +//@} +/// \defgroup optix_host_api_acceleration_structures Acceleration structures +/// \ingroup optix_host_api +//@{ + +/// \param[in] context +/// \param[in] accelOptions options for the accel build +/// \param[in] buildInputs an array of OptixBuildInput objects +/// \param[in] numBuildInputs number of elements in buildInputs (must be at least 1) +/// \param[out] bufferSizes fills in buffer sizes +OptixResult optixAccelComputeMemoryUsage( OptixDeviceContext context, + const OptixAccelBuildOptions* accelOptions, + const OptixBuildInput* buildInputs, + unsigned int numBuildInputs, + OptixAccelBufferSizes* bufferSizes ); + +/// \param[in] context +/// \param[in] stream +/// \param[in] accelOptions accel options +/// \param[in] buildInputs an array of OptixBuildInput objects +/// \param[in] numBuildInputs must be >= 1 for GAS, and == 1 for IAS +/// \param[in] tempBuffer must be a multiple of OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT +/// \param[in] tempBufferSizeInBytes +/// \param[in] outputBuffer must be a multiple of OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT +/// \param[in] outputBufferSizeInBytes +/// \param[out] outputHandle +/// \param[in] emittedProperties types of requested properties and output buffers +/// \param[in] numEmittedProperties number of post-build properties to populate (may be zero) +OptixResult optixAccelBuild( OptixDeviceContext context, + CUstream stream, + const OptixAccelBuildOptions* accelOptions, + const OptixBuildInput* buildInputs, + unsigned int numBuildInputs, + CUdeviceptr tempBuffer, + size_t tempBufferSizeInBytes, + CUdeviceptr outputBuffer, + size_t outputBufferSizeInBytes, + OptixTraversableHandle* outputHandle, + const OptixAccelEmitDesc* emittedProperties, + unsigned int numEmittedProperties ); + +/// Obtain relocation information, stored in OptixRelocationInfo, for a given context +/// and acceleration structure's traversable handle. +/// +/// The relocation information can be passed to optixCheckRelocationCompatibility to +/// determine if an acceleration structure, referenced by 'handle', can be relocated to a +/// different device's memory space (see #optixCheckRelocationCompatibility). +/// +/// When used with optixAccelRelocate, it provides data necessary for doing the relocation. +/// +/// If the acceleration structure data associated with 'handle' is copied multiple times, +/// the same OptixRelocationInfo can also be used on all copies. +/// +/// \param[in] context +/// \param[in] handle +/// \param[out] info +/// \return OPTIX_ERROR_INVALID_VALUE will be returned for traversable handles that are not from +/// acceleration structure builds. +OptixResult optixAccelGetRelocationInfo( OptixDeviceContext context, OptixTraversableHandle handle, OptixRelocationInfo* info ); + +/// Checks if an optix data structure built using another OptixDeviceContext (that was +/// used to fill in 'info') is compatible with the OptixDeviceContext specified in the +/// 'context' parameter. +/// +/// Any device is always compatible with itself. +/// +/// \param[in] context +/// \param[in] info +/// \param[out] compatible If OPTIX_SUCCESS is returned 'compatible' will have the value of either: +/// - 0: This context is not compatible with the optix data structure associated with 'info'. +/// - 1: This context is compatible. +OptixResult optixCheckRelocationCompatibility( OptixDeviceContext context, const OptixRelocationInfo* info, int* compatible ); + +/// optixAccelRelocate is called to update the acceleration structure after it has been +/// relocated. Relocation is necessary when the acceleration structure's location in device +/// memory has changed. optixAccelRelocate does not copy the memory. This function only +/// operates on the relocated memory whose new location is specified by 'targetAccel'. +/// optixAccelRelocate also returns the new OptixTraversableHandle associated with +/// 'targetAccel'. The original memory (source) is not required to be valid, only the +/// OptixRelocationInfo. +/// +/// Before calling optixAccelRelocate, optixCheckRelocationCompatibility should be +/// called to ensure the copy will be compatible with the destination device context. +/// +/// The memory pointed to by 'targetAccel' should be allocated with the same size as the +/// source acceleration. Similar to the 'outputBuffer' used in optixAccelBuild, this +/// pointer must be a multiple of OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT. +/// +/// The memory in 'targetAccel' must be allocated as long as the accel is in use. +/// +/// The instance traversables referenced by an IAS and the +/// micromaps referenced by a triangle GAS may themselves require relocation. +/// 'relocateInputs' and 'numRelocateInputs' should be used to specify the relocated +/// traversables and micromaps. After relocation, the relocated accel will reference +/// these relocated traversables and micromaps instead of their sources. +/// The number of relocate inputs 'numRelocateInputs' must match the number of build +/// inputs 'numBuildInputs' used to build the source accel. Relocation inputs +/// correspond with build inputs used to build the source accel and should appear in +/// the same order (see #optixAccelBuild). +/// 'relocateInputs' and 'numRelocateInputs' may be zero, preserving any references +/// to traversables and micromaps from the source accel. +/// +/// \param[in] context +/// \param[in] stream +/// \param[in] info +/// \param[in] relocateInputs +/// \param[in] numRelocateInputs +/// \param[in] targetAccel +/// \param[in] targetAccelSizeInBytes +/// \param[out] targetHandle +OptixResult optixAccelRelocate( OptixDeviceContext context, + CUstream stream, + const OptixRelocationInfo* info, + const OptixRelocateInput* relocateInputs, + size_t numRelocateInputs, + CUdeviceptr targetAccel, + size_t targetAccelSizeInBytes, + OptixTraversableHandle* targetHandle ); + +/// After building an acceleration structure, it can be copied in a compacted form to reduce +/// memory. In order to be compacted, OPTIX_BUILD_FLAG_ALLOW_COMPACTION must be supplied in +/// OptixAccelBuildOptions::buildFlags passed to optixAccelBuild. +/// +/// 'outputBuffer' is the pointer to where the compacted acceleration structure will be +/// written. This pointer must be a multiple of OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT. +/// +/// The size of the memory specified in 'outputBufferSizeInBytes' should be at least the +/// value computed using the OPTIX_PROPERTY_TYPE_COMPACTED_SIZE that was reported during +/// optixAccelBuild. +/// +/// \param[in] context +/// \param[in] stream +/// \param[in] inputHandle +/// \param[in] outputBuffer +/// \param[in] outputBufferSizeInBytes +/// \param[out] outputHandle +OptixResult optixAccelCompact( OptixDeviceContext context, + CUstream stream, + OptixTraversableHandle inputHandle, + CUdeviceptr outputBuffer, + size_t outputBufferSizeInBytes, + OptixTraversableHandle* outputHandle ); + +/// \param[in] onDevice +/// \param[in] pointer pointer to traversable allocated in OptixDeviceContext. This pointer must be a multiple of OPTIX_TRANSFORM_BYTE_ALIGNMENT +/// \param[in] traversableType Type of OptixTraversableHandle to create +/// \param[out] traversableHandle traversable handle. traversableHandle must be in host memory +OptixResult optixConvertPointerToTraversableHandle( OptixDeviceContext onDevice, + CUdeviceptr pointer, + OptixTraversableType traversableType, + OptixTraversableHandle* traversableHandle ); + + +/// Determine the amount of memory necessary for a Opacity Micromap Array build. +/// +/// \param[in] context +/// \param[in] buildInput +/// \param[out] bufferSizes +OptixResult optixOpacityMicromapArrayComputeMemoryUsage( OptixDeviceContext context, + const OptixOpacityMicromapArrayBuildInput* buildInput, + OptixMicromapBufferSizes* bufferSizes ); + +/// Construct an array of Opacity Micromaps. +/// +/// Each triangle within an instance/GAS may reference one opacity micromap to give finer +/// control over alpha behavior. A opacity micromap consists of a set of 4^N micro-triangles +/// in a triangular uniform barycentric grid. Multiple opacity micromaps are collected (built) +/// into a opacity micromap array with this function. Each geometry in a GAS may bind a +/// single opacity micromap array and can use opacity micromaps from that array only. +/// +/// Each micro-triangle within a opacity micromap can be in one of four states: Transparent, +/// Opaque, Unknown-Transparent or Unknown-Opaque. During traversal, if a triangle with a +/// opacity micromap attached is intersected, the opacity micromap is queried to categorize +/// the hit as either opaque, unknown (alpha) or a miss. Geometry, ray or instance flags that +/// modify the alpha/opaque behavior are applied _after_ this opacity micromap query. +/// +/// The opacity micromap query may operate in 2-state mode (alpha testing) or 4-state mode (AHS culling), +/// depending on the opacity micromap type and ray/instance flags. When operating in 2-state +/// mode, alpha hits will not be reported, and transparent and opaque hits must be accurate. +/// +/// \param[in] context +/// \param[in] stream +/// \param[in] buildInput a single build input object referencing many opacity micromaps +/// \param[in] buffers the buffers used for build +/// \param[in/out] emittedProperties types of requested properties and output buffers +/// \param[in] numEmittedProperties number of post-build properties to populate (may be zero) +OptixResult optixOpacityMicromapArrayBuild( OptixDeviceContext context, + CUstream stream, + const OptixOpacityMicromapArrayBuildInput* buildInput, + const OptixMicromapBuffers* buffers ); + +/// Obtain relocation information, stored in OptixRelocationInfo, for a given context +/// and opacity micromap array. +/// +/// The relocation information can be passed to optixCheckRelocationCompatibility to +/// determine if a opacity micromap array, referenced by buffers, can be relocated to a +/// different device's memory space (see #optixCheckRelocationCompatibility). +/// +/// When used with optixOpacityMicromapArrayRelocate, it provides data necessary for doing the relocation. +/// +/// If the opacity micromap array data associated with 'opacityMicromapArray' is copied multiple times, +/// the same OptixRelocationInfo can also be used on all copies. +/// +/// \param[in] context +/// \param[in] opacityMicromapArray +/// \param[out] info +OptixResult optixOpacityMicromapArrayGetRelocationInfo( OptixDeviceContext context, CUdeviceptr opacityMicromapArray, OptixRelocationInfo* info ); + +/// optixOpacityMicromapArrayRelocate is called to update the opacity micromap array after it has been +/// relocated. Relocation is necessary when the opacity micromap array's location in device +/// memory has changed. optixOpacityMicromapArrayRelocate does not copy the memory. This function only +/// operates on the relocated memory whose new location is specified by 'targetOpacityMicromapArray'. +/// The original memory (source) is not required to be valid, only the +/// OptixRelocationInfo. +/// +/// Before calling optixOpacityMicromapArrayRelocate, optixCheckRelocationCompatibility should be called +/// to ensure the copy will be compatible with the destination device context. +/// +/// The memory pointed to by 'targetOpacityMicromapArray' should be allocated with the same size as the +/// source opacity micromap array. Similar to the 'OptixMicromapBuffers::output' used in optixOpacityMicromapArrayBuild, +/// this pointer must be a multiple of OPTIX_OPACITY_MICROMAP_ARRAY_BUFFER_BYTE_ALIGNMENT. +/// +/// The memory in 'targetOpacityMicromapArray' must be allocated as long as the opacity micromap array is in use. +/// +/// Note that any Acceleration Structures build using the original memory (source) as input will +/// still be associated with this original memory. To associate an existing (possibly relocated) +/// Acceleration Structures with the relocated opacity micromap array, use optixAccelBuild +/// to update the existing Acceleration Structures (See OPTIX_BUILD_OPERATION_UPDATE) +/// +/// \param[in] context +/// \param[in] stream +/// \param[in] info +/// \param[in] targetOpacityMicromapArray +/// \param[in] targetOpacityMicromapArraySizeInBytes +OptixResult optixOpacityMicromapArrayRelocate( OptixDeviceContext context, + CUstream stream, + const OptixRelocationInfo* info, + CUdeviceptr targetOpacityMicromapArray, + size_t targetOpacityMicromapArraySizeInBytes ); + + + +//@} +/// \defgroup optix_host_api_denoiser Denoiser +/// \ingroup optix_host_api +//@{ + +/// Creates a denoiser object with the given options, using built-in inference models +/// +/// 'modelKind' selects the model used for inference. +/// Inference for the built-in models can be guided (giving hints to improve image quality) with +/// albedo and normal vector images in the guide layer (see 'optixDenoiserInvoke'). +/// Use of these images must be enabled in 'OptixDenoiserOptions'. +/// +/// \param[in] context +/// \param[in] modelKind +/// \param[in] options +/// \param[out] denoiser +OptixResult optixDenoiserCreate( OptixDeviceContext context, + OptixDenoiserModelKind modelKind, + const OptixDenoiserOptions* options, + OptixDenoiser* denoiser ); + +/// Creates a denoiser object with the given options, using a provided inference model +/// +/// 'userData' and 'userDataSizeInBytes' provide a user model for inference. +/// The memory passed in userData will be accessed only during the invocation of this function and +/// can be freed after it returns. +/// The user model must export only one weight set which determines both the model kind and the +/// required set of guide images. +/// +/// \param[in] context +/// \param[in] userData +/// \param[in] userDataSizeInBytes +/// \param[out] denoiser +OptixResult optixDenoiserCreateWithUserModel( OptixDeviceContext context, + const void* userData, size_t userDataSizeInBytes, OptixDenoiser* denoiser ); + +/// Destroys the denoiser object and any associated host resources. +OptixResult optixDenoiserDestroy( OptixDenoiser denoiser ); + +/// Computes the GPU memory resources required to execute the denoiser. +/// +/// Memory for state and scratch buffers must be allocated with the sizes in 'returnSizes' and scratch memory +/// passed to optixDenoiserSetup, optixDenoiserInvoke, +/// optixDenoiserComputeIntensity and optixDenoiserComputeAverageColor. +/// For tiled denoising an overlap area ('overlapWindowSizeInPixels') must be added to each tile on all sides +/// which increases the amount of +/// memory needed to denoise a tile. In case of tiling use withOverlapScratchSizeInBytes for scratch memory size. +/// If only full resolution images are denoised, withoutOverlapScratchSizeInBytes can be used which is always +/// smaller than withOverlapScratchSizeInBytes. +/// +/// 'outputWidth' and 'outputHeight' is the dimension of the image to be denoised (without overlap in case tiling +/// is being used). +/// 'outputWidth' and 'outputHeight' must be greater than or equal to the dimensions passed to optixDenoiserSetup. +/// +/// \param[in] denoiser +/// \param[in] outputWidth +/// \param[in] outputHeight +/// \param[out] returnSizes +OptixResult optixDenoiserComputeMemoryResources( const OptixDenoiser denoiser, + unsigned int outputWidth, + unsigned int outputHeight, + OptixDenoiserSizes* returnSizes ); + +/// Initializes the state required by the denoiser. +/// +/// 'inputWidth' and 'inputHeight' must include overlap on both sides of the image if tiling is being used. The overlap is +/// returned by #optixDenoiserComputeMemoryResources. +/// For subsequent calls to #optixDenoiserInvoke 'inputWidth' and 'inputHeight' are the maximum dimensions +/// of the input layers. Dimensions of the input layers passed to #optixDenoiserInvoke may be different in each +/// invocation however they always must be smaller than 'inputWidth' and 'inputHeight' passed to #optixDenoiserSetup. +/// +/// \param[in] denoiser +/// \param[in] stream +/// \param[in] inputWidth +/// \param[in] inputHeight +/// \param[in] denoiserState +/// \param[in] denoiserStateSizeInBytes +/// \param[in] scratch +/// \param[in] scratchSizeInBytes +OptixResult optixDenoiserSetup( OptixDenoiser denoiser, + CUstream stream, + unsigned int inputWidth, + unsigned int inputHeight, + CUdeviceptr denoiserState, + size_t denoiserStateSizeInBytes, + CUdeviceptr scratch, + size_t scratchSizeInBytes ); + +/// Invokes denoiser on a set of input data and produces at least one output image. +/// State memory must be available during the execution of the +/// denoiser (or until optixDenoiserSetup is called with a new state memory pointer). +/// Scratch memory passed is used only for the duration of this function. +/// Scratch and state memory sizes must have a size greater than or equal to the sizes as returned by +/// optixDenoiserComputeMemoryResources. +/// +/// 'inputOffsetX' and 'inputOffsetY' are pixel offsets in the 'inputLayers' image +/// specifying the beginning of the image without overlap. When denoising an entire image without tiling +/// there is no overlap and 'inputOffsetX' and 'inputOffsetY' must be zero. When denoising a tile which is +/// adjacent to one of the four sides of the entire image the corresponding offsets must also be zero since +/// there is no overlap at the side adjacent to the image border. +/// +/// 'guideLayer' provides additional information to the denoiser. When providing albedo and normal vector +/// guide images, the corresponding fields in the 'OptixDenoiserOptions' must be +/// enabled, see #optixDenoiserCreate. +/// 'guideLayer' must not be null. If a guide image in 'OptixDenoiserOptions' is not enabled, the +/// corresponding image in 'OptixDenoiserGuideLayer' is ignored. +/// +/// If OPTIX_DENOISER_MODEL_KIND_TEMPORAL or OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV is selected, a 2d flow +/// image must be given in 'OptixDenoiserGuideLayer'. +/// It describes for each pixel the flow from the previous to the current frame (a 2d vector in pixel space). +/// The denoised beauty/AOV of the previous frame must be given in 'previousOutput'. +/// If this image is not available in the first frame of a sequence, the noisy beauty/AOV from the first frame +/// and zero flow vectors could be given as a substitute. +/// For non-temporal model kinds the flow image in 'OptixDenoiserGuideLayer' is ignored. +/// 'previousOutput' and +/// 'output' may refer to the same buffer, i.e. 'previousOutput' is first read by this function and later +/// overwritten with the denoised result. 'output' can be passed as 'previousOutput' to the next frame. +/// In other model kinds (not temporal) 'previousOutput' is ignored. +/// +/// The beauty layer must be given as the first entry in 'layers'. +/// In AOV type model kinds (OPTIX_DENOISER_MODEL_KIND_AOV or in user defined models implementing +/// kernel-prediction) additional layers for the AOV images can be given. +/// In each layer the noisy input image is given in 'input', the denoised output is written into the +/// 'output' image. input and output images may refer to the same buffer, with the restriction that +/// the pixel formats must be identical for input and output when the blend mode is selected (see +/// #OptixDenoiserParams). +/// +/// If OPTIX_DENOISER_MODEL_KIND_TEMPORAL or OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV is selected, the denoised +/// image from the previous frame must be given in 'previousOutput' in the layer. 'previousOutput' and +/// 'output' may refer to the same buffer, i.e. 'previousOutput' is first read by this function and later +/// overwritten with the denoised result. 'output' can be passed as 'previousOutput' to the next frame. +/// In other model kinds (not temporal) 'previousOutput' is ignored. +/// +/// If OPTIX_DENOISER_MODEL_KIND_TEMPORAL or OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV is selected, the +/// normal vector guide image must be given as 3d vectors in camera space. In the other models only +/// the x and y channels are used and other channels are ignored. +/// +/// \param[in] denoiser +/// \param[in] stream +/// \param[in] params +/// \param[in] denoiserState +/// \param[in] denoiserStateSizeInBytes +/// \param[in] guideLayer +/// \param[in] layers +/// \param[in] numLayers +/// \param[in] inputOffsetX +/// \param[in] inputOffsetY +/// \param[in] scratch +/// \param[in] scratchSizeInBytes +OptixResult optixDenoiserInvoke( OptixDenoiser denoiser, + CUstream stream, + const OptixDenoiserParams* params, + CUdeviceptr denoiserState, + size_t denoiserStateSizeInBytes, + const OptixDenoiserGuideLayer* guideLayer, + const OptixDenoiserLayer* layers, + unsigned int numLayers, + unsigned int inputOffsetX, + unsigned int inputOffsetY, + CUdeviceptr scratch, + size_t scratchSizeInBytes ); + +/// Computes the logarithmic average intensity of the given image. The returned value 'outputIntensity' +/// is multiplied with the RGB values of the input image/tile in optixDenoiserInvoke if given in the parameter +/// OptixDenoiserParams::hdrIntensity (otherwise 'hdrIntensity' must be a null pointer). This is useful for +/// denoising HDR images which are very dark or bright. +/// When denoising tiles the intensity of the entire image should be computed, i.e. not per tile to get +/// consistent results. +/// +/// For each RGB pixel in the inputImage the intensity is calculated and summed if it is greater than 1e-8f: +/// intensity = log(r * 0.212586f + g * 0.715170f + b * 0.072200f). +/// The function returns 0.18 / exp(sum of intensities / number of summed pixels). +/// More details could be found in the Reinhard tonemapping paper: +/// http://www.cmap.polytechnique.fr/~peyre/cours/x2005signal/hdr_photographic.pdf +/// +/// The size of scratch memory required can be queried with #optixDenoiserComputeMemoryResources. +/// +/// data type unsigned char is not supported for 'inputImage', it must be 3 or 4 component half/float. +/// +/// \param[in] denoiser +/// \param[in] stream +/// \param[in] inputImage +/// \param[out] outputIntensity single float +/// \param[in] scratch +/// \param[in] scratchSizeInBytes +OptixResult optixDenoiserComputeIntensity( OptixDenoiser denoiser, + CUstream stream, + const OptixImage2D* inputImage, + CUdeviceptr outputIntensity, + CUdeviceptr scratch, + size_t scratchSizeInBytes ); + +/// Compute average logarithmic for each of the first three channels for the given image. +/// When denoising tiles the intensity of the entire image should be computed, i.e. not per tile to get +/// consistent results. +/// +/// The size of scratch memory required can be queried with #optixDenoiserComputeMemoryResources. +/// +/// data type unsigned char is not supported for 'inputImage', it must be 3 or 4 component half/float. +/// +/// \param[in] denoiser +/// \param[in] stream +/// \param[in] inputImage +/// \param[out] outputAverageColor three floats +/// \param[in] scratch +/// \param[in] scratchSizeInBytes +OptixResult optixDenoiserComputeAverageColor( OptixDenoiser denoiser, + CUstream stream, + const OptixImage2D* inputImage, + CUdeviceptr outputAverageColor, + CUdeviceptr scratch, + size_t scratchSizeInBytes ); + +//@} + +#ifdef __cplusplus +} +#endif + +#include "optix_function_table.h" + +#endif // __optix_optix_7_host_h__ diff --git a/dependencies/optix/optix_7_types.h b/dependencies/optix/optix_7_types.h new file mode 100644 index 0000000000000000000000000000000000000000..e9dbcf6437be1c107ce017b8b0833c2c7ab94c8d --- /dev/null +++ b/dependencies/optix/optix_7_types.h @@ -0,0 +1,2250 @@ + +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and proprietary + * rights in and to this software, related documentation and any modifications thereto. + * Any use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation is strictly + * prohibited. + * + * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* + * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, + * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY + * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT + * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF + * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR + * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGES + */ + +/// @file +/// @author NVIDIA Corporation +/// @brief OptiX public API header +/// +/// OptiX types include file -- defines types and enums used by the API. +/// For the math library routines include optix_math.h + +#if !defined( __OPTIX_INCLUDE_INTERNAL_HEADERS__ ) +#error("optix_7_types.h is an internal header file and must not be used directly. Please use optix_types.h, optix_host.h, optix_device.h or optix.h instead.") +#endif + +#ifndef __optix_optix_7_types_h__ +#define __optix_optix_7_types_h__ + +#if !defined(__CUDACC_RTC__) +#include <stddef.h> /* for size_t */ +#endif + + + +/// \defgroup optix_types Types +/// \brief OptiX Types + +/** \addtogroup optix_types +@{ +*/ + +// This typedef should match the one in cuda.h in order to avoid compilation errors. +#if defined(_WIN64) || defined(__LP64__) +/// CUDA device pointer +typedef unsigned long long CUdeviceptr; +#else +/// CUDA device pointer +typedef unsigned int CUdeviceptr; +#endif + +/// Opaque type representing a device context +typedef struct OptixDeviceContext_t* OptixDeviceContext; + +/// Opaque type representing a module +typedef struct OptixModule_t* OptixModule; + +/// Opaque type representing a program group +typedef struct OptixProgramGroup_t* OptixProgramGroup; + +/// Opaque type representing a pipeline +typedef struct OptixPipeline_t* OptixPipeline; + +/// Opaque type representing a denoiser instance +typedef struct OptixDenoiser_t* OptixDenoiser; + +/// Opaque type representing a work task +typedef struct OptixTask_t* OptixTask; + +/// Traversable handle +typedef unsigned long long OptixTraversableHandle; + +/// Visibility mask +typedef unsigned int OptixVisibilityMask; + +/// Size of the SBT record headers. +#define OPTIX_SBT_RECORD_HEADER_SIZE ( (size_t)32 ) + +/// Alignment requirement for device pointers in OptixShaderBindingTable. +#define OPTIX_SBT_RECORD_ALIGNMENT 16ull + +/// Alignment requirement for output and temporay buffers for acceleration structures. +#define OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT 128ull + +/// Alignment requirement for OptixBuildInputInstanceArray::instances. +#define OPTIX_INSTANCE_BYTE_ALIGNMENT 16ull + +/// Alignment requirement for OptixBuildInputCustomPrimitiveArray::aabbBuffers +#define OPTIX_AABB_BUFFER_BYTE_ALIGNMENT 8ull + +/// Alignment requirement for OptixBuildInputTriangleArray::preTransform +#define OPTIX_GEOMETRY_TRANSFORM_BYTE_ALIGNMENT 16ull + +/// Alignment requirement for OptixStaticTransform, OptixMatrixMotionTransform, OptixSRTMotionTransform. +#define OPTIX_TRANSFORM_BYTE_ALIGNMENT 64ull + +/// Maximum number of registers allowed. Defaults to no explicit limit. +#define OPTIX_COMPILE_DEFAULT_MAX_REGISTER_COUNT 0 + +/// Maximum number of payload types allowed. +#define OPTIX_COMPILE_DEFAULT_MAX_PAYLOAD_TYPE_COUNT 8 + +/// Maximum number of payload values allowed. +#define OPTIX_COMPILE_DEFAULT_MAX_PAYLOAD_VALUE_COUNT 32 + +/// Opacity micromaps encode the states of microtriangles in either 1 bit (2-state) or 2 bits (4-state) using +/// the following values. +#define OPTIX_OPACITY_MICROMAP_STATE_TRANSPARENT ( 0 ) +#define OPTIX_OPACITY_MICROMAP_STATE_OPAQUE ( 1 ) +#define OPTIX_OPACITY_MICROMAP_STATE_UNKNOWN_TRANSPARENT ( 2 ) +#define OPTIX_OPACITY_MICROMAP_STATE_UNKNOWN_OPAQUE ( 3 ) + +/// Predefined index to indicate that a triangle in the BVH build doesn't have an associated opacity micromap, +/// and that it should revert to one of the four possible states for the full triangle. +#define OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_TRANSPARENT ( -1 ) +#define OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_OPAQUE ( -2 ) +#define OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_UNKNOWN_TRANSPARENT ( -3 ) +#define OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_UNKNOWN_OPAQUE ( -4 ) + +/// Alignment requirement for opacity micromap array buffers +#define OPTIX_OPACITY_MICROMAP_ARRAY_BUFFER_BYTE_ALIGNMENT 128ull + +/// Maximum subdivision level for opacity micromaps +#define OPTIX_OPACITY_MICROMAP_MAX_SUBDIVISION_LEVEL 12 + + +/// Result codes returned from API functions +/// +/// All host side API functions return OptixResult with the exception of optixGetErrorName +/// and optixGetErrorString. When successful OPTIX_SUCCESS is returned. All return codes +/// except for OPTIX_SUCCESS should be assumed to be errors as opposed to a warning. +/// +/// \see #optixGetErrorName(), #optixGetErrorString() +typedef enum OptixResult +{ + OPTIX_SUCCESS = 0, + OPTIX_ERROR_INVALID_VALUE = 7001, + OPTIX_ERROR_HOST_OUT_OF_MEMORY = 7002, + OPTIX_ERROR_INVALID_OPERATION = 7003, + OPTIX_ERROR_FILE_IO_ERROR = 7004, + OPTIX_ERROR_INVALID_FILE_FORMAT = 7005, + OPTIX_ERROR_DISK_CACHE_INVALID_PATH = 7010, + OPTIX_ERROR_DISK_CACHE_PERMISSION_ERROR = 7011, + OPTIX_ERROR_DISK_CACHE_DATABASE_ERROR = 7012, + OPTIX_ERROR_DISK_CACHE_INVALID_DATA = 7013, + OPTIX_ERROR_LAUNCH_FAILURE = 7050, + OPTIX_ERROR_INVALID_DEVICE_CONTEXT = 7051, + OPTIX_ERROR_CUDA_NOT_INITIALIZED = 7052, + OPTIX_ERROR_VALIDATION_FAILURE = 7053, + OPTIX_ERROR_INVALID_PTX = 7200, + OPTIX_ERROR_INVALID_LAUNCH_PARAMETER = 7201, + OPTIX_ERROR_INVALID_PAYLOAD_ACCESS = 7202, + OPTIX_ERROR_INVALID_ATTRIBUTE_ACCESS = 7203, + OPTIX_ERROR_INVALID_FUNCTION_USE = 7204, + OPTIX_ERROR_INVALID_FUNCTION_ARGUMENTS = 7205, + OPTIX_ERROR_PIPELINE_OUT_OF_CONSTANT_MEMORY = 7250, + OPTIX_ERROR_PIPELINE_LINK_ERROR = 7251, + OPTIX_ERROR_ILLEGAL_DURING_TASK_EXECUTE = 7270, + OPTIX_ERROR_INTERNAL_COMPILER_ERROR = 7299, + OPTIX_ERROR_DENOISER_MODEL_NOT_SET = 7300, + OPTIX_ERROR_DENOISER_NOT_INITIALIZED = 7301, + OPTIX_ERROR_NOT_COMPATIBLE = 7400, + OPTIX_ERROR_PAYLOAD_TYPE_MISMATCH = 7500, + OPTIX_ERROR_PAYLOAD_TYPE_RESOLUTION_FAILED = 7501, + OPTIX_ERROR_PAYLOAD_TYPE_ID_INVALID = 7502, + OPTIX_ERROR_NOT_SUPPORTED = 7800, + OPTIX_ERROR_UNSUPPORTED_ABI_VERSION = 7801, + OPTIX_ERROR_FUNCTION_TABLE_SIZE_MISMATCH = 7802, + OPTIX_ERROR_INVALID_ENTRY_FUNCTION_OPTIONS = 7803, + OPTIX_ERROR_LIBRARY_NOT_FOUND = 7804, + OPTIX_ERROR_ENTRY_SYMBOL_NOT_FOUND = 7805, + OPTIX_ERROR_LIBRARY_UNLOAD_FAILURE = 7806, + OPTIX_ERROR_DEVICE_OUT_OF_MEMORY = 7807, + OPTIX_ERROR_CUDA_ERROR = 7900, + OPTIX_ERROR_INTERNAL_ERROR = 7990, + OPTIX_ERROR_UNKNOWN = 7999, +} OptixResult; + +/// Parameters used for #optixDeviceContextGetProperty() +/// +/// \see #optixDeviceContextGetProperty() +typedef enum OptixDeviceProperty +{ + /// Maximum value for OptixPipelineLinkOptions::maxTraceDepth. sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_TRACE_DEPTH = 0x2001, + + /// Maximum value to pass into optixPipelineSetStackSize for parameter + /// maxTraversableGraphDepth. sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_TRAVERSABLE_GRAPH_DEPTH = 0x2002, + + /// The maximum number of primitives (over all build inputs) as input to a single + /// Geometry Acceleration Structure (GAS). sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_PRIMITIVES_PER_GAS = 0x2003, + + /// The maximum number of instances (over all build inputs) as input to a single + /// Instance Acceleration Structure (IAS). sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCES_PER_IAS = 0x2004, + + /// The RT core version supported by the device (0 for no support, 10 for version + /// 1.0). sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_RTCORE_VERSION = 0x2005, + + /// The maximum value for #OptixInstance::instanceId. sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID = 0x2006, + + /// The number of bits available for the #OptixInstance::visibilityMask. + /// Higher bits must be set to zero. sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_NUM_BITS_INSTANCE_VISIBILITY_MASK = 0x2007, + + /// The maximum number of instances that can be added to a single Instance + /// Acceleration Structure (IAS). sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_SBT_RECORDS_PER_GAS = 0x2008, + + /// The maximum value for #OptixInstance::sbtOffset. sizeof( unsigned int ) + OPTIX_DEVICE_PROPERTY_LIMIT_MAX_SBT_OFFSET = 0x2009, +} OptixDeviceProperty; + +/// Type of the callback function used for log messages. +/// +/// \param[in] level The log level indicates the severity of the message. See below for +/// possible values. +/// \param[in] tag A terse message category description (e.g., 'SCENE STAT'). +/// \param[in] message Null terminated log message (without newline at the end). +/// \param[in] cbdata Callback data that was provided with the callback pointer. +/// +/// It is the users responsibility to ensure thread safety within this function. +/// +/// The following log levels are defined. +/// +/// 0 disable Setting the callback level will disable all messages. The callback +/// function will not be called in this case. +/// 1 fatal A non-recoverable error. The context and/or OptiX itself might no longer +/// be in a usable state. +/// 2 error A recoverable error, e.g., when passing invalid call parameters. +/// 3 warning Hints that OptiX might not behave exactly as requested by the user or +/// may perform slower than expected. +/// 4 print Status or progress messages. +/// +/// Higher levels might occur. +/// +/// \see #optixDeviceContextSetLogCallback(), #OptixDeviceContextOptions +typedef void ( *OptixLogCallback )( unsigned int level, const char* tag, const char* message, void* cbdata ); + +/// Validation mode settings. +/// +/// When enabled, certain device code utilities will be enabled to provide as good debug and +/// error checking facilities as possible. +/// +/// +/// \see #optixDeviceContextCreate() +typedef enum OptixDeviceContextValidationMode +{ + OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_OFF = 0, + OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_ALL = 0xFFFFFFFF +} OptixDeviceContextValidationMode; + +/// Parameters used for #optixDeviceContextCreate() +/// +/// \see #optixDeviceContextCreate() +typedef struct OptixDeviceContextOptions +{ + /// Function pointer used when OptiX wishes to generate messages + OptixLogCallback logCallbackFunction; + /// Pointer stored and passed to logCallbackFunction when a message is generated + void* logCallbackData; + /// Maximum callback level to generate message for (see #OptixLogCallback) + int logCallbackLevel; + /// Validation mode of context. + OptixDeviceContextValidationMode validationMode; +} OptixDeviceContextOptions; + +/// Flags used by #OptixBuildInputTriangleArray::flags +/// and #OptixBuildInput::flag +/// and #OptixBuildInputCustomPrimitiveArray::flags +typedef enum OptixGeometryFlags +{ + /// No flags set + OPTIX_GEOMETRY_FLAG_NONE = 0, + + /// Disables the invocation of the anyhit program. + /// Can be overridden by OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT and OPTIX_RAY_FLAG_ENFORCE_ANYHIT. + OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT = 1u << 0, + + /// If set, an intersection with the primitive will trigger one and only one + /// invocation of the anyhit program. Otherwise, the anyhit program may be invoked + /// more than once. + OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL = 1u << 1, + + /// Prevent triangles from getting culled due to their orientation. + /// Effectively ignores ray flags + /// OPTIX_RAY_FLAG_CULL_BACK_FACING_TRIANGLES and OPTIX_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES. + OPTIX_GEOMETRY_FLAG_DISABLE_TRIANGLE_FACE_CULLING = 1u << 2, +} OptixGeometryFlags; + +/// Legacy type: A subset of the hit kinds for built-in primitive intersections. +/// It is preferred to use optixGetPrimitiveType(), together with +/// optixIsFrontFaceHit() or optixIsBackFaceHit(). +/// +/// \see #optixGetHitKind() +typedef enum OptixHitKind +{ + /// Ray hit the triangle on the front face + OPTIX_HIT_KIND_TRIANGLE_FRONT_FACE = 0xFE, + /// Ray hit the triangle on the back face + OPTIX_HIT_KIND_TRIANGLE_BACK_FACE = 0xFF +} OptixHitKind; + +/// Format of indices used int #OptixBuildInputTriangleArray::indexFormat. +typedef enum OptixIndicesFormat +{ + /// No indices, this format must only be used in combination with triangle soups, i.e., numIndexTriplets must be zero + OPTIX_INDICES_FORMAT_NONE = 0, + /// Three shorts + OPTIX_INDICES_FORMAT_UNSIGNED_SHORT3 = 0x2102, + /// Three ints + OPTIX_INDICES_FORMAT_UNSIGNED_INT3 = 0x2103 +} OptixIndicesFormat; + +/// Format of vertices used in #OptixBuildInputTriangleArray::vertexFormat. +typedef enum OptixVertexFormat +{ + OPTIX_VERTEX_FORMAT_NONE = 0, ///< No vertices + OPTIX_VERTEX_FORMAT_FLOAT3 = 0x2121, ///< Vertices are represented by three floats + OPTIX_VERTEX_FORMAT_FLOAT2 = 0x2122, ///< Vertices are represented by two floats + OPTIX_VERTEX_FORMAT_HALF3 = 0x2123, ///< Vertices are represented by three halfs + OPTIX_VERTEX_FORMAT_HALF2 = 0x2124, ///< Vertices are represented by two halfs + OPTIX_VERTEX_FORMAT_SNORM16_3 = 0x2125, + OPTIX_VERTEX_FORMAT_SNORM16_2 = 0x2126 +} OptixVertexFormat; + +/// Format of transform used in #OptixBuildInputTriangleArray::transformFormat. +typedef enum OptixTransformFormat +{ + OPTIX_TRANSFORM_FORMAT_NONE = 0, ///< no transform, default for zero initialization + OPTIX_TRANSFORM_FORMAT_MATRIX_FLOAT12 = 0x21E1, ///< 3x4 row major affine matrix +} OptixTransformFormat; + + +/// Specifies whether to use a 2- or 4-state opacity micromap format. +typedef enum OptixOpacityMicromapFormat +{ + /// invalid format + OPTIX_OPACITY_MICROMAP_FORMAT_NONE = 0, + /// 0: Transparent, 1: Opaque + OPTIX_OPACITY_MICROMAP_FORMAT_2_STATE = 1, + /// 0: Transparent, 1: Opaque, 2: Unknown-Transparent, 3: Unknown-Opaque + OPTIX_OPACITY_MICROMAP_FORMAT_4_STATE = 2, +} OptixOpacityMicromapFormat; + +/// indexing mode of triangles to opacity micromaps in an array, used in #OptixBuildInputOpacityMicromap. +typedef enum OptixOpacityMicromapArrayIndexingMode +{ + /// No opacity micromap is used + OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_NONE = 0, + /// An implicit linear mapping of triangles to opacity micromaps in the + /// opacity micromap array is used. triangle[i] will use opacityMicromapArray[i]. + OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_LINEAR = 1, + /// OptixBuildInputVisibleMap::indexBuffer provides a per triangle array of predefined indices + /// and/or indices into OptixBuildInputVisibleMap::opacityMicromapArray. + /// See OptixBuildInputOpacityMicromap::indexBuffer for more details. + OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_INDEXED = 2, +} OptixOpacityMicromapArrayIndexingMode; + +/// Opacity micromap usage count for acceleration structure builds. +/// Specifies how many opacity micromaps of a specific type are referenced by triangles when building the AS. +/// Note that while this is similar to OptixOpacityMicromapHistogramEntry, the usage count specifies how many opacity micromaps +/// of a specific type are referenced by triangles in the AS. +typedef struct OptixOpacityMicromapUsageCount +{ + /// Number of opacity micromaps with this format and subdivision level referenced by triangles in the corresponding + /// triangle build input at AS build time. + unsigned int count; + /// Number of micro-triangles is 4^level. Valid levels are [0, 12] + unsigned int subdivisionLevel; + /// opacity micromap format. + OptixOpacityMicromapFormat format; +} OptixOpacityMicromapUsageCount; + +typedef struct OptixBuildInputOpacityMicromap +{ + /// Indexing mode of triangle to opacity micromap array mapping. + OptixOpacityMicromapArrayIndexingMode indexingMode; + + /// Device pointer to a opacity micromap array used by this build input array. + /// This buffer is required when #OptixBuildInputOpacityMicromap::indexingMode is + /// OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_LINEAR or OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_INDEXED. + /// Must be zero if #OptixBuildInputOpacityMicromap::indexingMode is OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_NONE. + CUdeviceptr opacityMicromapArray; + + /// int16 or int32 buffer specifying which opacity micromap index to use for each triangle. + /// Instead of an actual index, one of the predefined indices + /// OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_(FULLY_TRANSPARENT | FULLY_OPAQUE | FULLY_UNKNOWN_TRANSPARENT | FULLY_UNKNOWN_OPAQUE) + /// can be used to indicate that there is no opacity micromap for this particular triangle + /// but the triangle is in a uniform state and the selected behavior is applied + /// to the entire triangle. + /// This buffer is required when #OptixBuildInputOpacityMicromap::indexingMode is OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_INDEXED. + /// Must be zero if #OptixBuildInputOpacityMicromap::indexingMode is + /// OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_LINEAR or OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_NONE. + CUdeviceptr indexBuffer; + + /// 0, 2 or 4 (unused, 16 or 32 bit) + /// Must be non-zero when #OptixBuildInputOpacityMicromap::indexingMode is OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_INDEXED. + unsigned int indexSizeInBytes; + + /// Opacity micromap index buffer stride. If set to zero, indices are assumed to be tightly + /// packed and stride is inferred from #OptixBuildInputOpacityMicromap::indexSizeInBytes. + unsigned int indexStrideInBytes; + + /// Constant offset to non-negative opacity micromap indices + unsigned int indexOffset; + + /// Number of OptixOpacityMicromapUsageCount. + unsigned int numMicromapUsageCounts; + /// List of number of usages of opacity micromaps of format and subdivision combinations. + /// Counts with equal format and subdivision combination (duplicates) are added together. + const OptixOpacityMicromapUsageCount* micromapUsageCounts; +} OptixBuildInputOpacityMicromap; + +typedef struct OptixRelocateInputOpacityMicromap +{ + /// Device pointer to a reloated opacity micromap array used by the source build input array. + /// May be zero when no micromaps where used in the source accel, or the referenced opacity + /// micromaps don't require relocation (for example relocation of a GAS on the source device). + CUdeviceptr opacityMicromapArray; +} OptixRelocateInputOpacityMicromap; + + +/// Triangle inputs +/// +/// \see #OptixBuildInput::triangleArray +typedef struct OptixBuildInputTriangleArray +{ + /// Points to host array of device pointers, one per motion step. Host array size must match the number of + /// motion keys as set in #OptixMotionOptions (or an array of size 1 if OptixMotionOptions::numKeys is set + /// to 0 or 1). Each per motion key device pointer must point to an array of vertices of the + /// triangles in the format as described by vertexFormat. The minimum alignment must match the natural + /// alignment of the type as specified in the vertexFormat, i.e., for OPTIX_VERTEX_FORMAT_FLOATX 4-byte, + /// for all others a 2-byte alignment. However, an 16-byte stride (and buffer alignment) is recommended for + /// vertices of format OPTIX_VERTEX_FORMAT_FLOAT3 for GAS build performance. + const CUdeviceptr* vertexBuffers; + + /// Number of vertices in each of buffer in OptixBuildInputTriangleArray::vertexBuffers. + unsigned int numVertices; + + /// \see #OptixVertexFormat + OptixVertexFormat vertexFormat; + + /// Stride between vertices. If set to zero, vertices are assumed to be tightly + /// packed and stride is inferred from vertexFormat. + unsigned int vertexStrideInBytes; + + /// Optional pointer to array of 16 or 32-bit int triplets, one triplet per triangle. + /// The minimum alignment must match the natural alignment of the type as specified in the indexFormat, i.e., + /// for OPTIX_INDICES_FORMAT_UNSIGNED_INT3 4-byte and for OPTIX_INDICES_FORMAT_UNSIGNED_SHORT3 a 2-byte alignment. + CUdeviceptr indexBuffer; + + /// Size of array in OptixBuildInputTriangleArray::indexBuffer. For build, needs to be zero if indexBuffer is \c nullptr. + unsigned int numIndexTriplets; + + /// \see #OptixIndicesFormat + OptixIndicesFormat indexFormat; + + /// Stride between triplets of indices. If set to zero, indices are assumed to be tightly + /// packed and stride is inferred from indexFormat. + unsigned int indexStrideInBytes; + + /// Optional pointer to array of floats + /// representing a 3x4 row major affine + /// transformation matrix. This pointer must be a multiple of OPTIX_GEOMETRY_TRANSFORM_BYTE_ALIGNMENT + CUdeviceptr preTransform; + + /// Array of flags, to specify flags per sbt record, + /// combinations of OptixGeometryFlags describing the + /// primitive behavior, size must match numSbtRecords + const unsigned int* flags; + + /// Number of sbt records available to the sbt index offset override. + unsigned int numSbtRecords; + + /// Device pointer to per-primitive local sbt index offset buffer. May be NULL. + /// Every entry must be in range [0,numSbtRecords-1]. + /// Size needs to be the number of primitives. + CUdeviceptr sbtIndexOffsetBuffer; + + /// Size of type of the sbt index offset. Needs to be 0, 1, 2 or 4 (8, 16 or 32 bit). + unsigned int sbtIndexOffsetSizeInBytes; + + /// Stride between the index offsets. If set to zero, the offsets are assumed to be tightly + /// packed and the stride matches the size of the type (sbtIndexOffsetSizeInBytes). + unsigned int sbtIndexOffsetStrideInBytes; + + /// Primitive index bias, applied in optixGetPrimitiveIndex(). + /// Sum of primitiveIndexOffset and number of triangles must not overflow 32bits. + unsigned int primitiveIndexOffset; + + /// \see #OptixTransformFormat + OptixTransformFormat transformFormat; + + /// Optional opacity micromap inputs. + OptixBuildInputOpacityMicromap opacityMicromap; + +} OptixBuildInputTriangleArray; + +/// Triangle inputs +/// +/// \see #OptixRelocateInput::triangleArray +typedef struct OptixRelocateInputTriangleArray +{ + /// Number of sbt records available to the sbt index offset override. + /// Must match #OptixBuildInputTriangleArray::numSbtRecords of the source build input. + unsigned int numSbtRecords; + + /// Opacity micromap inputs. + OptixRelocateInputOpacityMicromap opacityMicromap; +} OptixRelocateInputTriangleArray; + +/// Builtin primitive types +/// +typedef enum OptixPrimitiveType +{ + /// Custom primitive. + OPTIX_PRIMITIVE_TYPE_CUSTOM = 0x2500, + /// B-spline curve of degree 2 with circular cross-section. + OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE = 0x2501, + /// B-spline curve of degree 3 with circular cross-section. + OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE = 0x2502, + /// Piecewise linear curve with circular cross-section. + OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR = 0x2503, + /// CatmullRom curve with circular cross-section. + OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM = 0x2504, + OPTIX_PRIMITIVE_TYPE_SPHERE = 0x2506, + /// Triangle. + OPTIX_PRIMITIVE_TYPE_TRIANGLE = 0x2531, +} OptixPrimitiveType; + +/// Builtin flags may be bitwise combined. +/// +/// \see #OptixPipelineCompileOptions::usesPrimitiveTypeFlags +typedef enum OptixPrimitiveTypeFlags +{ + /// Custom primitive. + OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM = 1 << 0, + /// B-spline curve of degree 2 with circular cross-section. + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE = 1 << 1, + /// B-spline curve of degree 3 with circular cross-section. + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE = 1 << 2, + /// Piecewise linear curve with circular cross-section. + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR = 1 << 3, + /// CatmullRom curve with circular cross-section. + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM = 1 << 4, + OPTIX_PRIMITIVE_TYPE_FLAGS_SPHERE = 1 << 6, + /// Triangle. + OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE = 1 << 31, +} OptixPrimitiveTypeFlags; + +/// Curve end cap types, for non-linear curves +/// +typedef enum OptixCurveEndcapFlags +{ + /// Default end caps. Round end caps for linear, no end caps for quadratic/cubic. + OPTIX_CURVE_ENDCAP_DEFAULT = 0, + /// Flat end caps at both ends of quadratic/cubic curve segments. Not valid for linear. + OPTIX_CURVE_ENDCAP_ON = 1 << 0, +} OptixCurveEndcapFlags; + +/// Curve inputs +/// +/// A curve is a swept surface defined by a 3D spline curve and a varying width (radius). A curve (or "strand") of +/// degree d (3=cubic, 2=quadratic, 1=linear) is represented by N > d vertices and N width values, and comprises N - d segments. +/// Each segment is defined by d+1 consecutive vertices. Each curve may have a different number of vertices. +/// +/// OptiX describes the curve array as a list of curve segments. The primitive id is the segment number. +/// It is the user's responsibility to maintain a mapping between curves and curve segments. +/// Each index buffer entry i = indexBuffer[primid] specifies the start of a curve segment, +/// represented by d+1 consecutive vertices in the vertex buffer, +/// and d+1 consecutive widths in the width buffer. Width is interpolated the same +/// way vertices are interpolated, that is, using the curve basis. +/// +/// Each curves build input has only one SBT record. +/// To create curves with different materials in the same BVH, use multiple build inputs. +/// +/// \see #OptixBuildInput::curveArray +typedef struct OptixBuildInputCurveArray +{ + /// Curve degree and basis + /// \see #OptixPrimitiveType + OptixPrimitiveType curveType; + /// Number of primitives. Each primitive is a polynomial curve segment. + unsigned int numPrimitives; + + /// Pointer to host array of device pointers, one per motion step. Host array size must match number of + /// motion keys as set in #OptixMotionOptions (or an array of size 1 if OptixMotionOptions::numKeys is set + /// to 1). Each per-motion-key device pointer must point to an array of floats (the vertices of the + /// curves). + const CUdeviceptr* vertexBuffers; + /// Number of vertices in each buffer in vertexBuffers. + unsigned int numVertices; + /// Stride between vertices. If set to zero, vertices are assumed to be tightly + /// packed and stride is sizeof( float3 ). + unsigned int vertexStrideInBytes; + + /// Parallel to vertexBuffers: a device pointer per motion step, each with numVertices float values, + /// specifying the curve width (radius) corresponding to each vertex. + const CUdeviceptr* widthBuffers; + /// Stride between widths. If set to zero, widths are assumed to be tightly + /// packed and stride is sizeof( float ). + unsigned int widthStrideInBytes; + + /// Reserved for future use. + const CUdeviceptr* normalBuffers; + /// Reserved for future use. + unsigned int normalStrideInBytes; + + /// Device pointer to array of unsigned ints, one per curve segment. + /// This buffer is required (unlike for OptixBuildInputTriangleArray). + /// Each index is the start of degree+1 consecutive vertices in vertexBuffers, + /// and corresponding widths in widthBuffers and normals in normalBuffers. + /// These define a single segment. Size of array is numPrimitives. + CUdeviceptr indexBuffer; + /// Stride between indices. If set to zero, indices are assumed to be tightly + /// packed and stride is sizeof( unsigned int ). + unsigned int indexStrideInBytes; + + /// Combination of OptixGeometryFlags describing the + /// primitive behavior. + unsigned int flag; + + /// Primitive index bias, applied in optixGetPrimitiveIndex(). + /// Sum of primitiveIndexOffset and number of primitives must not overflow 32bits. + unsigned int primitiveIndexOffset; + + /// End cap flags, see OptixCurveEndcapFlags + unsigned int endcapFlags; +} OptixBuildInputCurveArray; + +/// Sphere inputs +/// +/// A sphere is defined by a center point and a radius. +/// Each center point is represented by a vertex in the vertex buffer. +/// There is either a single radius for all spheres, or the radii are represented by entries in the radius buffer. +/// +/// The vertex buffers and radius buffers point to a host array of device pointers, one per motion step. +/// Host array size must match the number of motion keys as set in #OptixMotionOptions (or an array of size 1 if OptixMotionOptions::numKeys is set +/// to 0 or 1). Each per motion key device pointer must point to an array of vertices corresponding to the center points of the spheres, or +/// an array of 1 or N radii. Format OPTIX_VERTEX_FORMAT_FLOAT3 is used for vertices, OPTIX_VERTEX_FORMAT_FLOAT for radii. +/// +/// \see #OptixBuildInput::sphereArray +typedef struct OptixBuildInputSphereArray +{ + /// Pointer to host array of device pointers, one per motion step. Host array size must match number of + /// motion keys as set in #OptixMotionOptions (or an array of size 1 if OptixMotionOptions::numKeys is set + /// to 1). Each per-motion-key device pointer must point to an array of floats (the center points of + /// the spheres). + const CUdeviceptr* vertexBuffers; + + /// Stride between vertices. If set to zero, vertices are assumed to be tightly + /// packed and stride is sizeof( float3 ). + unsigned int vertexStrideInBytes; + /// Number of vertices in each buffer in vertexBuffers. + unsigned int numVertices; + + /// Parallel to vertexBuffers: a device pointer per motion step, each with numRadii float values, + /// specifying the sphere radius corresponding to each vertex. + const CUdeviceptr* radiusBuffers; + /// Stride between radii. If set to zero, widths are assumed to be tightly + /// packed and stride is sizeof( float ). + unsigned int radiusStrideInBytes; + /// Boolean value indicating whether a single radius per radius buffer is used, + /// or the number of radii in radiusBuffers equals numVertices. + int singleRadius; + + /// Array of flags, to specify flags per sbt record, + /// combinations of OptixGeometryFlags describing the + /// primitive behavior, size must match numSbtRecords + const unsigned int* flags; + + /// Number of sbt records available to the sbt index offset override. + unsigned int numSbtRecords; + /// Device pointer to per-primitive local sbt index offset buffer. May be NULL. + /// Every entry must be in range [0,numSbtRecords-1]. + /// Size needs to be the number of primitives. + CUdeviceptr sbtIndexOffsetBuffer; + /// Size of type of the sbt index offset. Needs to be 0, 1, 2 or 4 (8, 16 or 32 bit). + unsigned int sbtIndexOffsetSizeInBytes; + /// Stride between the sbt index offsets. If set to zero, the offsets are assumed to be tightly + /// packed and the stride matches the size of the type (sbtIndexOffsetSizeInBytes). + unsigned int sbtIndexOffsetStrideInBytes; + + /// Primitive index bias, applied in optixGetPrimitiveIndex(). + /// Sum of primitiveIndexOffset and number of primitives must not overflow 32bits. + unsigned int primitiveIndexOffset; +} OptixBuildInputSphereArray; + +/// AABB inputs +typedef struct OptixAabb +{ + float minX; ///< Lower extent in X direction. + float minY; ///< Lower extent in Y direction. + float minZ; ///< Lower extent in Z direction. + float maxX; ///< Upper extent in X direction. + float maxY; ///< Upper extent in Y direction. + float maxZ; ///< Upper extent in Z direction. +} OptixAabb; + +/// Custom primitive inputs +/// +/// \see #OptixBuildInput::customPrimitiveArray +typedef struct OptixBuildInputCustomPrimitiveArray +{ + /// Points to host array of device pointers to AABBs (type OptixAabb), one per motion step. + /// Host array size must match number of motion keys as set in OptixMotionOptions (or an array of size 1 + /// if OptixMotionOptions::numKeys is set to 1). + /// Each device pointer must be a multiple of OPTIX_AABB_BUFFER_BYTE_ALIGNMENT. + const CUdeviceptr* aabbBuffers; + + /// Number of primitives in each buffer (i.e., per motion step) in + /// #OptixBuildInputCustomPrimitiveArray::aabbBuffers. + unsigned int numPrimitives; + + /// Stride between AABBs (per motion key). If set to zero, the aabbs are assumed to be tightly + /// packed and the stride is assumed to be sizeof( OptixAabb ). + /// If non-zero, the value must be a multiple of OPTIX_AABB_BUFFER_BYTE_ALIGNMENT. + unsigned int strideInBytes; + + /// Array of flags, to specify flags per sbt record, + /// combinations of OptixGeometryFlags describing the + /// primitive behavior, size must match numSbtRecords + const unsigned int* flags; + + /// Number of sbt records available to the sbt index offset override. + unsigned int numSbtRecords; + + /// Device pointer to per-primitive local sbt index offset buffer. May be NULL. + /// Every entry must be in range [0,numSbtRecords-1]. + /// Size needs to be the number of primitives. + CUdeviceptr sbtIndexOffsetBuffer; + + /// Size of type of the sbt index offset. Needs to be 0, 1, 2 or 4 (8, 16 or 32 bit). + unsigned int sbtIndexOffsetSizeInBytes; + + /// Stride between the index offsets. If set to zero, the offsets are assumed to be tightly + /// packed and the stride matches the size of the type (sbtIndexOffsetSizeInBytes). + unsigned int sbtIndexOffsetStrideInBytes; + + /// Primitive index bias, applied in optixGetPrimitiveIndex(). + /// Sum of primitiveIndexOffset and number of primitive must not overflow 32bits. + unsigned int primitiveIndexOffset; +} OptixBuildInputCustomPrimitiveArray; + +/// Instance and instance pointer inputs +/// +/// \see #OptixBuildInput::instanceArray +typedef struct OptixBuildInputInstanceArray +{ + /// If OptixBuildInput::type is OPTIX_BUILD_INPUT_TYPE_INSTANCE_POINTERS instances and + /// aabbs should be interpreted as arrays of pointers instead of arrays of structs. + /// + /// This pointer must be a multiple of OPTIX_INSTANCE_BYTE_ALIGNMENT if + /// OptixBuildInput::type is OPTIX_BUILD_INPUT_TYPE_INSTANCES. The array elements must + /// be a multiple of OPTIX_INSTANCE_BYTE_ALIGNMENT if OptixBuildInput::type is + /// OPTIX_BUILD_INPUT_TYPE_INSTANCE_POINTERS. + CUdeviceptr instances; + + /// Number of elements in #OptixBuildInputInstanceArray::instances. + unsigned int numInstances; + + /// Only valid for OPTIX_BUILD_INPUT_TYPE_INSTANCE + /// Defines the stride between instances. A stride of 0 indicates a tight packing, i.e., + /// stride = sizeof( OptixInstance ) + unsigned int instanceStride; +} OptixBuildInputInstanceArray; + +/// Instance and instance pointer inputs +/// +/// \see #OptixRelocateInput::instanceArray +typedef struct OptixRelocateInputInstanceArray +{ + /// Number of elements in #OptixRelocateInputInstanceArray::traversableHandles. + /// Must match #OptixBuildInputInstanceArray::numInstances of the source build input. + unsigned int numInstances; + + /// These are the traversable handles of the instances (See OptixInstance::traversableHandle) + /// These can be used when also relocating the instances. No updates to + /// the bounds are performed. Use optixAccelBuild to update the bounds. + /// 'traversableHandles' may be zero when the traversables are not relocated + /// (i.e. relocation of an IAS on the source device). + CUdeviceptr traversableHandles; + +} OptixRelocateInputInstanceArray; + +/// Enum to distinguish the different build input types. +/// +/// \see #OptixBuildInput::type +typedef enum OptixBuildInputType +{ + /// Triangle inputs. \see #OptixBuildInputTriangleArray + OPTIX_BUILD_INPUT_TYPE_TRIANGLES = 0x2141, + /// Custom primitive inputs. \see #OptixBuildInputCustomPrimitiveArray + OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES = 0x2142, + /// Instance inputs. \see #OptixBuildInputInstanceArray + OPTIX_BUILD_INPUT_TYPE_INSTANCES = 0x2143, + /// Instance pointer inputs. \see #OptixBuildInputInstanceArray + OPTIX_BUILD_INPUT_TYPE_INSTANCE_POINTERS = 0x2144, + /// Curve inputs. \see #OptixBuildInputCurveArray + OPTIX_BUILD_INPUT_TYPE_CURVES = 0x2145, + /// Sphere inputs. \see #OptixBuildInputSphereArray + OPTIX_BUILD_INPUT_TYPE_SPHERES = 0x2146 +} OptixBuildInputType; + +/// Build inputs. +/// +/// All of them support motion and the size of the data arrays needs to match the number of motion steps +/// +/// \see #optixAccelComputeMemoryUsage(), #optixAccelBuild() +typedef struct OptixBuildInput +{ + /// The type of the build input. + OptixBuildInputType type; + + union + { + /// Triangle inputs. + OptixBuildInputTriangleArray triangleArray; + /// Curve inputs. + OptixBuildInputCurveArray curveArray; + /// Sphere inputs. + OptixBuildInputSphereArray sphereArray; + /// Custom primitive inputs. + OptixBuildInputCustomPrimitiveArray customPrimitiveArray; + /// Instance and instance pointer inputs. + OptixBuildInputInstanceArray instanceArray; + char pad[1024]; + }; +} OptixBuildInput; + +/// Relocation inputs. +/// +/// \see #optixAccelRelocate() +typedef struct OptixRelocateInput +{ + /// The type of the build input to relocate. + OptixBuildInputType type; + + union + { + /// Instance and instance pointer inputs. + OptixRelocateInputInstanceArray instanceArray; + + /// Triangle inputs. + OptixRelocateInputTriangleArray triangleArray; + + /// Inputs of any of the other types don't require any relocation data. + }; +} OptixRelocateInput; + +// Some 32-bit tools use this header. This static_assert fails for them because +// the default enum size is 4 bytes, rather than 8, under 32-bit compilers. +// This #ifndef allows them to disable the static assert. + +// TODO Define a static assert for C/pre-C++-11 +#if defined( __cplusplus ) && __cplusplus >= 201103L +static_assert( sizeof( OptixBuildInput ) == 8 + 1024, "OptixBuildInput has wrong size" ); +#endif + +/// Flags set on the #OptixInstance::flags. +/// +/// These can be or'ed together to combine multiple flags. +typedef enum OptixInstanceFlags +{ + /// No special flag set + OPTIX_INSTANCE_FLAG_NONE = 0, + + /// Prevent triangles from getting culled due to their orientation. + /// Effectively ignores ray flags + /// OPTIX_RAY_FLAG_CULL_BACK_FACING_TRIANGLES and OPTIX_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES. + OPTIX_INSTANCE_FLAG_DISABLE_TRIANGLE_FACE_CULLING = 1u << 0, + + /// Flip triangle orientation. + /// This affects front/backface culling as well as the reported face in case of a hit. + OPTIX_INSTANCE_FLAG_FLIP_TRIANGLE_FACING = 1u << 1, + + /// Disable anyhit programs for all geometries of the instance. + /// Can be overridden by OPTIX_RAY_FLAG_ENFORCE_ANYHIT. + /// This flag is mutually exclusive with OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT. + OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT = 1u << 2, + + /// Enables anyhit programs for all geometries of the instance. + /// Overrides OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT + /// Can be overridden by OPTIX_RAY_FLAG_DISABLE_ANYHIT. + /// This flag is mutually exclusive with OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT. + OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT = 1u << 3, + + + /// Force 4-state opacity micromaps to behave as 2-state opacity micromaps during traversal. + OPTIX_INSTANCE_FLAG_FORCE_OPACITY_MICROMAP_2_STATE = 1u << 4, + /// Don't perform opacity micromap query for this instance. GAS must be built with ALLOW_DISABLE_OPACITY_MICROMAPS for this to be valid. + /// This flag overrides FORCE_OPACTIY_MIXROMAP_2_STATE instance and ray flags. + OPTIX_INSTANCE_FLAG_DISABLE_OPACITY_MICROMAPS = 1u << 5, + +} OptixInstanceFlags; + +/// Instances +/// +/// \see #OptixBuildInputInstanceArray::instances +typedef struct OptixInstance +{ + /// affine object-to-world transformation as 3x4 matrix in row-major layout + float transform[12]; + + /// Application supplied ID. The maximal ID can be queried using OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID. + unsigned int instanceId; + + /// SBT record offset. Will only be used for instances of geometry acceleration structure (GAS) objects. + /// Needs to be set to 0 for instances of instance acceleration structure (IAS) objects. The maximal SBT offset + /// can be queried using OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_SBT_OFFSET. + unsigned int sbtOffset; + + /// Visibility mask. If rayMask & instanceMask == 0 the instance is culled. The number of available bits can be + /// queried using OPTIX_DEVICE_PROPERTY_LIMIT_NUM_BITS_INSTANCE_VISIBILITY_MASK. + unsigned int visibilityMask; + + /// Any combination of OptixInstanceFlags is allowed. + unsigned int flags; + + /// Set with an OptixTraversableHandle. + OptixTraversableHandle traversableHandle; + + /// round up to 80-byte, to ensure 16-byte alignment + unsigned int pad[2]; +} OptixInstance; + +/// Builder Options +/// +/// Used for #OptixAccelBuildOptions::buildFlags. Can be or'ed together. +typedef enum OptixBuildFlags +{ + /// No special flags set. + OPTIX_BUILD_FLAG_NONE = 0, + + /// Allow updating the build with new vertex positions with subsequent calls to + /// optixAccelBuild. + OPTIX_BUILD_FLAG_ALLOW_UPDATE = 1u << 0, + + OPTIX_BUILD_FLAG_ALLOW_COMPACTION = 1u << 1, + + OPTIX_BUILD_FLAG_PREFER_FAST_TRACE = 1u << 2, + + OPTIX_BUILD_FLAG_PREFER_FAST_BUILD = 1u << 3, + + /// Allow random access to build input vertices + /// See optixGetTriangleVertexData + /// optixGetLinearCurveVertexData + /// optixGetQuadraticBSplineVertexData + /// optixGetCubicBSplineVertexData + /// optixGetCatmullRomVertexData + /// optixGetSphereData + OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS = 1u << 4, + + /// Allow random access to instances + /// See optixGetInstanceTraversableFromIAS + OPTIX_BUILD_FLAG_ALLOW_RANDOM_INSTANCE_ACCESS = 1u << 5, + + /// Support updating the opacity micromap array and opacity micromap indices on refits. + /// May increase AS size and may have a small negative impact on traversal performance. + /// If this flag is absent, all opacity micromap inputs must remain unchanged between the initial AS builds and their subsequent refits. + OPTIX_BUILD_FLAG_ALLOW_OPACITY_MICROMAP_UPDATE = 1u << 6, + + /// If enabled, any instances referencing this GAS are allowed to disable the opacity micromap test through the DISABLE_OPACITY_MICROMAPS flag instance flag. + /// Note that the GAS will not be optimized for the attached opacity micromap Arrays if this flag is set, + /// which may result in reduced traversal performance. + OPTIX_BUILD_FLAG_ALLOW_DISABLE_OPACITY_MICROMAPS = 1u << 7, +} OptixBuildFlags; + + +/// Flags defining behavior of opacity micromaps in a opacity micromap array. +typedef enum OptixOpacityMicromapFlags +{ + OPTIX_OPACITY_MICROMAP_FLAG_NONE = 0, + OPTIX_OPACITY_MICROMAP_FLAG_PREFER_FAST_TRACE = 1 << 0, + OPTIX_OPACITY_MICROMAP_FLAG_PREFER_FAST_BUILD = 1 << 1, +} OptixOpacityMicromapFlags; + +/// Opacity micromap descriptor. +typedef struct OptixOpacityMicromapDesc +{ + /// Byte offset to opacity micromap in data input buffer of opacity micromap array build + unsigned int byteOffset; + /// Number of micro-triangles is 4^level. Valid levels are [0, 12] + unsigned short subdivisionLevel; + /// OptixOpacityMicromapFormat + unsigned short format; +} OptixOpacityMicromapDesc; + +/// Opacity micromap histogram entry. +/// Specifies how many opacity micromaps of a specific type are input to the opacity micromap array build. +/// Note that while this is similar to OptixOpacityMicromapUsageCount, the histogram entry specifies how many opacity micromaps +/// of a specific type are combined into a opacity micromap array. +typedef struct OptixOpacityMicromapHistogramEntry +{ + /// Number of opacity micromaps with the format and subdivision level that are input to the opacity micromap array build. + unsigned int count; + /// Number of micro-triangles is 4^level. Valid levels are [0, 12]. + unsigned int subdivisionLevel; + /// opacity micromap format. + OptixOpacityMicromapFormat format; +} OptixOpacityMicromapHistogramEntry; + +/// Inputs to opacity micromap array construction. +typedef struct OptixOpacityMicromapArrayBuildInput +{ + /// Applies to all opacity micromaps in array. + OptixOpacityMicromapFlags flags; + + /// 128B aligned base pointer for raw opacity micromap input data. + CUdeviceptr inputBuffer; + + /// One OptixOpacityMicromapDesc entry per opacity micromap. + CUdeviceptr perMicromapDescBuffer; + + /// Stride between OptixOpacityMicromapDescs in perOmDescBuffer. + /// If set to zero, the opacity micromap descriptors are assumed to be tightly packed and the stride is assumed to be sizeof( OptixOpacityMicromapDesc ). + unsigned int perMicromapDescStrideInBytes; + + /// Number of OptixOpacityMicromapHistogramEntry. + unsigned int numMicromapHistogramEntries; + /// Histogram over opacity micromaps of input format and subdivision combinations. + /// Counts of entries with equal format and subdivision combination (duplicates) are added together. + const OptixOpacityMicromapHistogramEntry* micromapHistogramEntries; +} OptixOpacityMicromapArrayBuildInput; + + +/// Conservative memory requirements for building a opacity micromap array +typedef struct OptixMicromapBufferSizes +{ + size_t outputSizeInBytes; + size_t tempSizeInBytes; +} OptixMicromapBufferSizes; + +/// Buffer inputs for opacity micromap array builds. +typedef struct OptixMicromapBuffers +{ + /// Output buffer + CUdeviceptr output; + /// Output buffer size + size_t outputSizeInBytes; + /// Temp buffer + CUdeviceptr temp; + /// Temp buffer size + size_t tempSizeInBytes; +} OptixMicromapBuffers; + + + +/// Enum to specify the acceleration build operation. +/// +/// Used in OptixAccelBuildOptions, which is then passed to optixAccelBuild and +/// optixAccelComputeMemoryUsage, this enum indicates whether to do a build or an update +/// of the acceleration structure. +/// +/// Acceleration structure updates utilize the same acceleration structure, but with +/// updated bounds. Updates are typically much faster than builds, however, large +/// perturbations can degrade the quality of the acceleration structure. +/// +/// \see #optixAccelComputeMemoryUsage(), #optixAccelBuild(), #OptixAccelBuildOptions +typedef enum OptixBuildOperation +{ + /// Perform a full build operation + OPTIX_BUILD_OPERATION_BUILD = 0x2161, + /// Perform an update using new bounds + OPTIX_BUILD_OPERATION_UPDATE = 0x2162, +} OptixBuildOperation; + +/// Enum to specify motion flags. +/// +/// \see #OptixMotionOptions::flags. +typedef enum OptixMotionFlags +{ + OPTIX_MOTION_FLAG_NONE = 0, + OPTIX_MOTION_FLAG_START_VANISH = 1u << 0, + OPTIX_MOTION_FLAG_END_VANISH = 1u << 1 +} OptixMotionFlags; + +/// Motion options +/// +/// \see #OptixAccelBuildOptions::motionOptions, #OptixMatrixMotionTransform::motionOptions, +/// #OptixSRTMotionTransform::motionOptions +typedef struct OptixMotionOptions +{ + /// If numKeys > 1, motion is enabled. timeBegin, + /// timeEnd and flags are all ignored when motion is disabled. + unsigned short numKeys; + + /// Combinations of #OptixMotionFlags + unsigned short flags; + + /// Point in time where motion starts. Must be lesser than timeEnd. + float timeBegin; + + /// Point in time where motion ends. Must be greater than timeBegin. + float timeEnd; +} OptixMotionOptions; + +/// Build options for acceleration structures. +/// +/// \see #optixAccelComputeMemoryUsage(), #optixAccelBuild() +typedef struct OptixAccelBuildOptions +{ + /// Combinations of OptixBuildFlags + unsigned int buildFlags; + + /// If OPTIX_BUILD_OPERATION_UPDATE the output buffer is assumed to contain the result + /// of a full build with OPTIX_BUILD_FLAG_ALLOW_UPDATE set and using the same number of + /// primitives. It is updated incrementally to reflect the current position of the + /// primitives. + /// If a BLAS has been built with OPTIX_BUILD_FLAG_ALLOW_OPACITY_MICROMAP_UPDATE, new opacity micromap arrays + /// and opacity micromap indices may be provided to the refit. + OptixBuildOperation operation; + + /// Options for motion. + OptixMotionOptions motionOptions; +} OptixAccelBuildOptions; + +/// Struct for querying builder allocation requirements. +/// +/// Once queried the sizes should be used to allocate device memory of at least these sizes. +/// +/// \see #optixAccelComputeMemoryUsage() +typedef struct OptixAccelBufferSizes +{ + /// The size in bytes required for the outputBuffer parameter to optixAccelBuild when + /// doing a build (OPTIX_BUILD_OPERATION_BUILD). + size_t outputSizeInBytes; + + /// The size in bytes required for the tempBuffer paramter to optixAccelBuild when + /// doing a build (OPTIX_BUILD_OPERATION_BUILD). + size_t tempSizeInBytes; + + /// The size in bytes required for the tempBuffer parameter to optixAccelBuild + /// when doing an update (OPTIX_BUILD_OPERATION_UPDATE). This value can be different + /// than tempSizeInBytes used for a full build. Only non-zero if + /// OPTIX_BUILD_FLAG_ALLOW_UPDATE flag is set in OptixAccelBuildOptions. + size_t tempUpdateSizeInBytes; +} OptixAccelBufferSizes; + +/// Properties which can be emitted during acceleration structure build. +/// +/// \see #OptixAccelEmitDesc::type. +typedef enum OptixAccelPropertyType +{ + /// Size of a compacted acceleration structure. The device pointer points to a uint64. + OPTIX_PROPERTY_TYPE_COMPACTED_SIZE = 0x2181, + + /// OptixAabb * numMotionSteps + OPTIX_PROPERTY_TYPE_AABBS = 0x2182, +} OptixAccelPropertyType; + +/// Specifies a type and output destination for emitted post-build properties. +/// +/// \see #optixAccelBuild() +typedef struct OptixAccelEmitDesc +{ + /// Output buffer for the properties + CUdeviceptr result; + + /// Requested property + OptixAccelPropertyType type; +} OptixAccelEmitDesc; + +/// Used to store information related to relocation of optix data structures. +/// +/// \see #optixOpacityMicromapArrayGetRelocationInfo(), #optixOpacityMicromapArrayRelocate(), +/// #optixAccelGetRelocationInfo(), #optixAccelRelocate(), #optixCheckRelocationCompatibility() +typedef struct OptixRelocationInfo +{ + /// Opaque data, used internally, should not be modified + unsigned long long info[4]; +} OptixRelocationInfo; + +/// Static transform +/// +/// The device address of instances of this type must be a multiple of OPTIX_TRANSFORM_BYTE_ALIGNMENT. +/// +/// \see #optixConvertPointerToTraversableHandle() +typedef struct OptixStaticTransform +{ + /// The traversable transformed by this transformation + OptixTraversableHandle child; + + /// Padding to make the transformations 16 byte aligned + unsigned int pad[2]; + + /// Affine object-to-world transformation as 3x4 matrix in row-major layout + float transform[12]; + + /// Affine world-to-object transformation as 3x4 matrix in row-major layout + /// Must be the inverse of the transform matrix + float invTransform[12]; +} OptixStaticTransform; + +/// Represents a matrix motion transformation. +/// +/// The device address of instances of this type must be a multiple of OPTIX_TRANSFORM_BYTE_ALIGNMENT. +/// +/// This struct, as defined here, handles only N=2 motion keys due to the fixed array length of its transform member. +/// The following example shows how to create instances for an arbitrary number N of motion keys: +/// +/// \code +/// float matrixData[N][12]; +/// ... // setup matrixData +/// +/// size_t transformSizeInBytes = sizeof( OptixMatrixMotionTransform ) + ( N-2 ) * 12 * sizeof( float ); +/// OptixMatrixMotionTransform* matrixMoptionTransform = (OptixMatrixMotionTransform*) malloc( transformSizeInBytes ); +/// memset( matrixMoptionTransform, 0, transformSizeInBytes ); +/// +/// ... // setup other members of matrixMoptionTransform +/// matrixMoptionTransform->motionOptions.numKeys/// = N; +/// memcpy( matrixMoptionTransform->transform, matrixData, N * 12 * sizeof( float ) ); +/// +/// ... // copy matrixMoptionTransform to device memory +/// free( matrixMoptionTransform ) +/// \endcode +/// +/// \see #optixConvertPointerToTraversableHandle() +typedef struct OptixMatrixMotionTransform +{ + /// The traversable that is transformed by this transformation + OptixTraversableHandle child; + + /// The motion options for this transformation. + /// Must have at least two motion keys. + OptixMotionOptions motionOptions; + + /// Padding to make the transformation 16 byte aligned + unsigned int pad[3]; + + /// Affine object-to-world transformation as 3x4 matrix in row-major layout + float transform[2][12]; +} OptixMatrixMotionTransform; + +/// Represents an SRT transformation. +/// +/// An SRT transformation can represent a smooth rotation with fewer motion keys than a matrix transformation. Each +/// motion key is constructed from elements taken from a matrix S, a quaternion R, and a translation T. +/// +/// The scaling matrix +/// \f$S = \begin{bmatrix} sx & a & b & pvx \\ 0 & sy & c & pvy \\ 0 & 0 & sz & pvz \end{bmatrix}\f$ +// [ sx a b pvx ] +// S = [ 0 sy c pvy ] +// [ 0 0 sz pvz ] +/// defines an affine transformation that can include scale, shear, and a translation. +/// The translation allows to define the pivot point for the subsequent rotation. +/// +/// The quaternion R = [ qx, qy, qz, qw ] describes a rotation with angular component qw = cos(theta/2) and other +/// components [ qx, qy, qz ] = sin(theta/2) * [ ax, ay, az ] where the axis [ ax, ay, az ] is normalized. +/// +/// The translation matrix +/// \f$T = \begin{bmatrix} 1 & 0 & 0 & tx \\ 0 & 1 & 0 & ty \\ 0 & 0 & 1 & tz \end{bmatrix}\f$ +// [ 1 0 0 tx ] +// T = [ 0 1 0 ty ] +// [ 0 0 1 tz ] +/// defines another translation that is applied after the rotation. Typically, this translation includes +/// the inverse translation from the matrix S to reverse the translation for the pivot point for R. +/// +/// To obtain the effective transformation at time t, the elements of the components of S, R, and T will be interpolated +/// linearly. The components are then multiplied to obtain the combined transformation C = T * R * S. The transformation +/// C is the effective object-to-world transformations at time t, and C^(-1) is the effective world-to-object +/// transformation at time t. +/// +/// \see #OptixSRTMotionTransform::srtData, #optixConvertPointerToTraversableHandle() +typedef struct OptixSRTData +{ + /// \name Parameters describing the SRT transformation + /// @{ + float sx, a, b, pvx, sy, c, pvy, sz, pvz, qx, qy, qz, qw, tx, ty, tz; + /// @} +} OptixSRTData; + +// TODO Define a static assert for C/pre-C++-11 +#if defined( __cplusplus ) && __cplusplus >= 201103L +static_assert( sizeof( OptixSRTData ) == 16 * 4, "OptixSRTData has wrong size" ); +#endif + +/// Represents an SRT motion transformation. +/// +/// The device address of instances of this type must be a multiple of OPTIX_TRANSFORM_BYTE_ALIGNMENT. +/// +/// This struct, as defined here, handles only N=2 motion keys due to the fixed array length of its srtData member. +/// The following example shows how to create instances for an arbitrary number N of motion keys: +/// +/// \code +/// OptixSRTData srtData[N]; +/// ... // setup srtData +/// +/// size_t transformSizeInBytes = sizeof( OptixSRTMotionTransform ) + ( N-2 ) * sizeof( OptixSRTData ); +/// OptixSRTMotionTransform* srtMotionTransform = (OptixSRTMotionTransform*) malloc( transformSizeInBytes ); +/// memset( srtMotionTransform, 0, transformSizeInBytes ); +/// +/// ... // setup other members of srtMotionTransform +/// srtMotionTransform->motionOptions.numKeys = N; +/// memcpy( srtMotionTransform->srtData, srtData, N * sizeof( OptixSRTData ) ); +/// +/// ... // copy srtMotionTransform to device memory +/// free( srtMotionTransform ) +/// \endcode +/// +/// \see #optixConvertPointerToTraversableHandle() +typedef struct OptixSRTMotionTransform +{ + /// The traversable transformed by this transformation + OptixTraversableHandle child; + + /// The motion options for this transformation + /// Must have at least two motion keys. + OptixMotionOptions motionOptions; + + /// Padding to make the SRT data 16 byte aligned + unsigned int pad[3]; + + /// The actual SRT data describing the transformation + OptixSRTData srtData[2]; +} OptixSRTMotionTransform; + +// TODO Define a static assert for C/pre-C++-11 +#if defined( __cplusplus ) && __cplusplus >= 201103L +static_assert( sizeof( OptixSRTMotionTransform ) == 8 + 12 + 12 + 2 * 16 * 4, "OptixSRTMotionTransform has wrong size" ); +#endif + +/// Traversable Handles +/// +/// \see #optixConvertPointerToTraversableHandle() +typedef enum OptixTraversableType +{ + /// Static transforms. \see #OptixStaticTransform + OPTIX_TRAVERSABLE_TYPE_STATIC_TRANSFORM = 0x21C1, + /// Matrix motion transform. \see #OptixMatrixMotionTransform + OPTIX_TRAVERSABLE_TYPE_MATRIX_MOTION_TRANSFORM = 0x21C2, + /// SRT motion transform. \see #OptixSRTMotionTransform + OPTIX_TRAVERSABLE_TYPE_SRT_MOTION_TRANSFORM = 0x21C3, +} OptixTraversableType; + +/// Pixel formats used by the denoiser. +/// +/// \see #OptixImage2D::format +typedef enum OptixPixelFormat +{ + OPTIX_PIXEL_FORMAT_HALF2 = 0x2207, ///< two halfs, XY + OPTIX_PIXEL_FORMAT_HALF3 = 0x2201, ///< three halfs, RGB + OPTIX_PIXEL_FORMAT_HALF4 = 0x2202, ///< four halfs, RGBA + OPTIX_PIXEL_FORMAT_FLOAT2 = 0x2208, ///< two floats, XY + OPTIX_PIXEL_FORMAT_FLOAT3 = 0x2203, ///< three floats, RGB + OPTIX_PIXEL_FORMAT_FLOAT4 = 0x2204, ///< four floats, RGBA + OPTIX_PIXEL_FORMAT_UCHAR3 = 0x2205, ///< three unsigned chars, RGB + OPTIX_PIXEL_FORMAT_UCHAR4 = 0x2206, ///< four unsigned chars, RGBA + OPTIX_PIXEL_FORMAT_INTERNAL_GUIDE_LAYER = 0x2209, ///< internal format +} OptixPixelFormat; + +/// Image descriptor used by the denoiser. +/// +/// \see #optixDenoiserInvoke(), #optixDenoiserComputeIntensity() +typedef struct OptixImage2D +{ + /// Pointer to the actual pixel data. + CUdeviceptr data; + /// Width of the image (in pixels) + unsigned int width; + /// Height of the image (in pixels) + unsigned int height; + /// Stride between subsequent rows of the image (in bytes). + unsigned int rowStrideInBytes; + /// Stride between subsequent pixels of the image (in bytes). + /// If set to 0, dense packing (no gaps) is assumed. + /// For pixel format OPTIX_PIXEL_FORMAT_INTERNAL_GUIDE_LAYER it must be set to + /// at least OptixDenoiserSizes::internalGuideLayerSizeInBytes. + unsigned int pixelStrideInBytes; + /// Pixel format. + OptixPixelFormat format; +} OptixImage2D; + +/// Model kind used by the denoiser. +/// +/// \see #optixDenoiserCreate +typedef enum OptixDenoiserModelKind +{ + /// Use the built-in model appropriate for low dynamic range input. + OPTIX_DENOISER_MODEL_KIND_LDR = 0x2322, + + /// Use the built-in model appropriate for high dynamic range input. + OPTIX_DENOISER_MODEL_KIND_HDR = 0x2323, + + /// Use the built-in model appropriate for high dynamic range input and support for AOVs + OPTIX_DENOISER_MODEL_KIND_AOV = 0x2324, + + /// Use the built-in model appropriate for high dynamic range input, temporally stable + OPTIX_DENOISER_MODEL_KIND_TEMPORAL = 0x2325, + + /// Use the built-in model appropriate for high dynamic range input and support for AOVs, temporally stable + OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV = 0x2326, + + /// Use the built-in model appropriate for high dynamic range input and support for AOVs, upscaling 2x + OPTIX_DENOISER_MODEL_KIND_UPSCALE2X = 0x2327, + + /// Use the built-in model appropriate for high dynamic range input and support for AOVs, upscaling 2x, + /// temporally stable + OPTIX_DENOISER_MODEL_KIND_TEMPORAL_UPSCALE2X = 0x2328, +} OptixDenoiserModelKind; + +/// Options used by the denoiser +/// +/// \see #optixDenoiserCreate() +typedef struct OptixDenoiserOptions +{ + // if nonzero, albedo image must be given in OptixDenoiserGuideLayer + unsigned int guideAlbedo; + + // if nonzero, normal image must be given in OptixDenoiserGuideLayer + unsigned int guideNormal; +} OptixDenoiserOptions; + +/// Guide layer for the denoiser +/// +/// \see #optixDenoiserInvoke() +typedef struct OptixDenoiserGuideLayer +{ + // albedo/bsdf image + OptixImage2D albedo; + + // normal vector image (2d or 3d pixel format) + OptixImage2D normal; + + // 2d flow image, pixel flow from previous to current frame for each pixel + OptixImage2D flow; + + OptixImage2D previousOutputInternalGuideLayer; + OptixImage2D outputInternalGuideLayer; +} OptixDenoiserGuideLayer; + +/// Input/Output layers for the denoiser +/// +/// \see #optixDenoiserInvoke() +typedef struct OptixDenoiserLayer +{ + // input image (beauty or AOV) + OptixImage2D input; + + // denoised output image from previous frame if temporal model kind selected + OptixImage2D previousOutput; + + // denoised output for given input + OptixImage2D output; +} OptixDenoiserLayer; + +/// Various parameters used by the denoiser +/// +/// \see #optixDenoiserInvoke() +/// \see #optixDenoiserComputeIntensity() +/// \see #optixDenoiserComputeAverageColor() +typedef enum OptixDenoiserAlphaMode +{ + /// Copy alpha (if present) from input layer, no denoising. + OPTIX_DENOISER_ALPHA_MODE_COPY = 0, + + /// Denoise alpha separately. With AOV model kinds, treat alpha like an AOV. + OPTIX_DENOISER_ALPHA_MODE_ALPHA_AS_AOV = 1, + + /// With AOV model kinds, full denoise pass with alpha. + /// This is slower than OPTIX_DENOISER_ALPHA_MODE_ALPHA_AS_AOV. + OPTIX_DENOISER_ALPHA_MODE_FULL_DENOISE_PASS = 2 +} OptixDenoiserAlphaMode; +typedef struct OptixDenoiserParams +{ + /// alpha denoise mode + OptixDenoiserAlphaMode denoiseAlpha; + + /// average log intensity of input image (default null pointer). points to a single float. + /// with the default (null pointer) denoised results will not be optimal for very dark or + /// bright input images. + CUdeviceptr hdrIntensity; + + /// blend factor. + /// If set to 0 the output is 100% of the denoised input. If set to 1, the output is 100% of + /// the unmodified input. Values between 0 and 1 will linearly interpolate between the denoised + /// and unmodified input. + float blendFactor; + + /// this parameter is used when the OPTIX_DENOISER_MODEL_KIND_AOV model kind is set. + /// average log color of input image, separate for RGB channels (default null pointer). + /// points to three floats. with the default (null pointer) denoised results will not be + /// optimal. + CUdeviceptr hdrAverageColor; + + /// In temporal modes this parameter must be set to 1 if previous layers (e.g. + /// previousOutputInternalGuideLayer) contain valid data. This is the case in the + /// second and subsequent frames of a sequence (for example after a change of camera + /// angle). In the first frame of such a sequence this parameter must be set to 0. + unsigned int temporalModeUsePreviousLayers; +} OptixDenoiserParams; + +/// Various sizes related to the denoiser. +/// +/// \see #optixDenoiserComputeMemoryResources() +typedef struct OptixDenoiserSizes +{ + /// Size of state memory passed to #optixDenoiserSetup, #optixDenoiserInvoke. + size_t stateSizeInBytes; + + /// Size of scratch memory passed to #optixDenoiserSetup, #optixDenoiserInvoke. + /// Overlap added to dimensions passed to #optixDenoiserComputeMemoryResources. + size_t withOverlapScratchSizeInBytes; + + /// Size of scratch memory passed to #optixDenoiserSetup, #optixDenoiserInvoke. + /// No overlap added. + size_t withoutOverlapScratchSizeInBytes; + + /// Overlap on all four tile sides. + unsigned int overlapWindowSizeInPixels; + + /// Size of scratch memory passed to #optixDenoiserComputeAverageColor. + /// The size is independent of the tile/image resolution. + size_t computeAverageColorSizeInBytes; + + /// Size of scratch memory passed to #optixDenoiserComputeIntensity. + /// The size is independent of the tile/image resolution. + size_t computeIntensitySizeInBytes; + + /// Number of bytes for each pixel in internal guide layers. + size_t internalGuideLayerPixelSizeInBytes; +} OptixDenoiserSizes; + +/// Ray flags passed to the device function #optixTrace(). These affect the behavior of +/// traversal per invocation. +/// +/// \see #optixTrace() +typedef enum OptixRayFlags +{ + /// No change from the behavior configured for the individual AS. + OPTIX_RAY_FLAG_NONE = 0u, + + /// Disables anyhit programs for the ray. + /// Overrides OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT. + /// This flag is mutually exclusive with OPTIX_RAY_FLAG_ENFORCE_ANYHIT, + /// OPTIX_RAY_FLAG_CULL_DISABLED_ANYHIT, OPTIX_RAY_FLAG_CULL_ENFORCED_ANYHIT. + OPTIX_RAY_FLAG_DISABLE_ANYHIT = 1u << 0, + + /// Forces anyhit program execution for the ray. + /// Overrides OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT as well as OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT. + /// This flag is mutually exclusive with OPTIX_RAY_FLAG_DISABLE_ANYHIT, + /// OPTIX_RAY_FLAG_CULL_DISABLED_ANYHIT, OPTIX_RAY_FLAG_CULL_ENFORCED_ANYHIT. + OPTIX_RAY_FLAG_ENFORCE_ANYHIT = 1u << 1, + + /// Terminates the ray after the first hit and executes + /// the closesthit program of that hit. + OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT = 1u << 2, + + /// Disables closesthit programs for the ray, but still executes miss program in case of a miss. + OPTIX_RAY_FLAG_DISABLE_CLOSESTHIT = 1u << 3, + + /// Do not intersect triangle back faces + /// (respects a possible face change due to instance flag + /// OPTIX_INSTANCE_FLAG_FLIP_TRIANGLE_FACING). + /// This flag is mutually exclusive with OPTIX_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES. + OPTIX_RAY_FLAG_CULL_BACK_FACING_TRIANGLES = 1u << 4, + + /// Do not intersect triangle front faces + /// (respects a possible face change due to instance flag + /// OPTIX_INSTANCE_FLAG_FLIP_TRIANGLE_FACING). + /// This flag is mutually exclusive with OPTIX_RAY_FLAG_CULL_BACK_FACING_TRIANGLES. + OPTIX_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES = 1u << 5, + + /// Do not intersect geometry which disables anyhit programs + /// (due to setting geometry flag OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT or + /// instance flag OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT). + /// This flag is mutually exclusive with OPTIX_RAY_FLAG_CULL_ENFORCED_ANYHIT, + /// OPTIX_RAY_FLAG_ENFORCE_ANYHIT, OPTIX_RAY_FLAG_DISABLE_ANYHIT. + OPTIX_RAY_FLAG_CULL_DISABLED_ANYHIT = 1u << 6, + + /// Do not intersect geometry which have an enabled anyhit program + /// (due to not setting geometry flag OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT or + /// setting instance flag OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT). + /// This flag is mutually exclusive with OPTIX_RAY_FLAG_CULL_DISABLED_ANYHIT, + /// OPTIX_RAY_FLAG_ENFORCE_ANYHIT, OPTIX_RAY_FLAG_DISABLE_ANYHIT. + OPTIX_RAY_FLAG_CULL_ENFORCED_ANYHIT = 1u << 7, + + /// Force 4-state opacity micromaps to behave as 2-state opactiy micromaps during traversal. + OPTIX_RAY_FLAG_FORCE_OPACITY_MICROMAP_2_STATE = 1u << 10, +} OptixRayFlags; + +/// Transform +/// +/// OptixTransformType is used by the device function #optixGetTransformTypeFromHandle() to +/// determine the type of the OptixTraversableHandle returned from +/// optixGetTransformListHandle(). +typedef enum OptixTransformType +{ + OPTIX_TRANSFORM_TYPE_NONE = 0, ///< Not a transformation + OPTIX_TRANSFORM_TYPE_STATIC_TRANSFORM = 1, ///< \see #OptixStaticTransform + OPTIX_TRANSFORM_TYPE_MATRIX_MOTION_TRANSFORM = 2, ///< \see #OptixMatrixMotionTransform + OPTIX_TRANSFORM_TYPE_SRT_MOTION_TRANSFORM = 3, ///< \see #OptixSRTMotionTransform + OPTIX_TRANSFORM_TYPE_INSTANCE = 4, ///< \see #OptixInstance +} OptixTransformType; + +/// Specifies the set of valid traversable graphs that may be +/// passed to invocation of #optixTrace(). Flags may be bitwise combined. +typedef enum OptixTraversableGraphFlags +{ + /// Used to signal that any traversable graphs is valid. + /// This flag is mutually exclusive with all other flags. + OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY = 0, + + /// Used to signal that a traversable graph of a single Geometry Acceleration + /// Structure (GAS) without any transforms is valid. This flag may be combined with + /// other flags except for OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY. + OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS = 1u << 0, + + /// Used to signal that a traversable graph of a single Instance Acceleration + /// Structure (IAS) directly connected to Geometry Acceleration Structure (GAS) + /// traversables without transform traversables in between is valid. This flag may + /// be combined with other flags except for OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY. + OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING = 1u << 1, +} OptixTraversableGraphFlags; + +/// Optimization levels +/// +/// \see #OptixModuleCompileOptions::optLevel +typedef enum OptixCompileOptimizationLevel +{ + /// Default is to run all optimizations + OPTIX_COMPILE_OPTIMIZATION_DEFAULT = 0, + /// No optimizations + OPTIX_COMPILE_OPTIMIZATION_LEVEL_0 = 0x2340, + /// Some optimizations + OPTIX_COMPILE_OPTIMIZATION_LEVEL_1 = 0x2341, + /// Most optimizations + OPTIX_COMPILE_OPTIMIZATION_LEVEL_2 = 0x2342, + /// All optimizations + OPTIX_COMPILE_OPTIMIZATION_LEVEL_3 = 0x2343, +} OptixCompileOptimizationLevel; + +/// Debug levels +/// +/// \see #OptixModuleCompileOptions::debugLevel +typedef enum OptixCompileDebugLevel +{ + /// Default currently is minimal + OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT = 0, + /// No debug information + OPTIX_COMPILE_DEBUG_LEVEL_NONE = 0x2350, + /// Generate information that does not impact performance. + /// Note this replaces OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO. + OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL = 0x2351, + /// Generate some debug information with slight performance cost + OPTIX_COMPILE_DEBUG_LEVEL_MODERATE = 0x2353, + /// Generate full debug information + OPTIX_COMPILE_DEBUG_LEVEL_FULL = 0x2352, +} OptixCompileDebugLevel; + +/// Module compilation state. +/// +/// \see #optixModuleGetCompilationState(), #optixModuleCreateFromPTXWithTasks() +typedef enum OptixModuleCompileState +{ + /// No OptixTask objects have started + OPTIX_MODULE_COMPILE_STATE_NOT_STARTED = 0x2360, + + /// Started, but not all OptixTask objects have completed. No detected failures. + OPTIX_MODULE_COMPILE_STATE_STARTED = 0x2361, + + /// Not all OptixTask objects have completed, but at least one has failed. + OPTIX_MODULE_COMPILE_STATE_IMPENDING_FAILURE = 0x2362, + + /// All OptixTask objects have completed, and at least one has failed + OPTIX_MODULE_COMPILE_STATE_FAILED = 0x2363, + + /// All OptixTask objects have completed. The OptixModule is ready to be used. + OPTIX_MODULE_COMPILE_STATE_COMPLETED = 0x2364, +} OptixModuleCompileState; + + + +/// Struct for specifying specializations for pipelineParams as specified in +/// OptixPipelineCompileOptions::pipelineLaunchParamsVariableName. +/// +/// The bound values are supposed to represent a constant value in the +/// pipelineParams. OptiX will attempt to locate all loads from the pipelineParams and +/// correlate them to the appropriate bound value, but there are cases where OptiX cannot +/// safely or reliably do this. For example if the pointer to the pipelineParams is passed +/// as an argument to a non-inline function or the offset of the load to the +/// pipelineParams cannot be statically determined (e.g. accessed in a loop). No module +/// should rely on the value being specialized in order to work correctly. The values in +/// the pipelineParams specified on optixLaunch should match the bound value. If +/// validation mode is enabled on the context, OptiX will verify that the bound values +/// specified matches the values in pipelineParams specified to optixLaunch. +/// +/// These values are compiled in to the module as constants. Once the constants are +/// inserted into the code, an optimization pass will be run that will attempt to +/// propagate the consants and remove unreachable code. +/// +/// If caching is enabled, changes in these values will result in newly compiled modules. +/// +/// The pipelineParamOffset and sizeInBytes must be within the bounds of the +/// pipelineParams variable. OPTIX_ERROR_INVALID_VALUE will be returned from +/// optixModuleCreateFromPTX otherwise. +/// +/// If more than one bound value overlaps or the size of a bound value is equal to 0, +/// an OPTIX_ERROR_INVALID_VALUE will be returned from optixModuleCreateFromPTX. +/// +/// The same set of bound values do not need to be used for all modules in a pipeline, but +/// overlapping values between modules must have the same value. +/// OPTIX_ERROR_INVALID_VALUE will be returned from optixPipelineCreate otherwise. +/// +/// \see #OptixModuleCompileOptions +typedef struct OptixModuleCompileBoundValueEntry { + size_t pipelineParamOffsetInBytes; + size_t sizeInBytes; + const void* boundValuePtr; + const char* annotation; // optional string to display, set to 0 if unused. If unused, + // OptiX will report the annotation as "No annotation" +} OptixModuleCompileBoundValueEntry; + +/// Payload type identifiers. +typedef enum OptixPayloadTypeID { + OPTIX_PAYLOAD_TYPE_DEFAULT = 0, + OPTIX_PAYLOAD_TYPE_ID_0 = (1 << 0u), + OPTIX_PAYLOAD_TYPE_ID_1 = (1 << 1u), + OPTIX_PAYLOAD_TYPE_ID_2 = (1 << 2u), + OPTIX_PAYLOAD_TYPE_ID_3 = (1 << 3u), + OPTIX_PAYLOAD_TYPE_ID_4 = (1 << 4u), + OPTIX_PAYLOAD_TYPE_ID_5 = (1 << 5u), + OPTIX_PAYLOAD_TYPE_ID_6 = (1 << 6u), + OPTIX_PAYLOAD_TYPE_ID_7 = (1 << 7u) +} OptixPayloadTypeID; + +/// Semantic flags for a single payload word. +/// +/// Used to specify the semantics of a payload word per shader type. +/// "read": Shader of this type may read the payload word. +/// "write": Shader of this type may write the payload word. +/// +/// "trace_caller_write": Shaders may consume the value of the payload word passed to optixTrace by the caller. +/// "trace_caller_read": The caller to optixTrace may read the payload word after the call to optixTrace. +/// +/// Semantics can be bitwise combined. +/// Combining "read" and "write" is equivalent to specifying "read_write". +/// A payload needs to be writable by the caller or at least one shader type. +/// A payload needs to be readable by the caller or at least one shader type after a being writable. +typedef enum OptixPayloadSemantics +{ + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_NONE = 0, + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ = 1u << 0, + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_WRITE = 2u << 0, + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ_WRITE = 3u << 0, + + OPTIX_PAYLOAD_SEMANTICS_CH_NONE = 0, + OPTIX_PAYLOAD_SEMANTICS_CH_READ = 1u << 2, + OPTIX_PAYLOAD_SEMANTICS_CH_WRITE = 2u << 2, + OPTIX_PAYLOAD_SEMANTICS_CH_READ_WRITE = 3u << 2, + + OPTIX_PAYLOAD_SEMANTICS_MS_NONE = 0, + OPTIX_PAYLOAD_SEMANTICS_MS_READ = 1u << 4, + OPTIX_PAYLOAD_SEMANTICS_MS_WRITE = 2u << 4, + OPTIX_PAYLOAD_SEMANTICS_MS_READ_WRITE = 3u << 4, + + OPTIX_PAYLOAD_SEMANTICS_AH_NONE = 0, + OPTIX_PAYLOAD_SEMANTICS_AH_READ = 1u << 6, + OPTIX_PAYLOAD_SEMANTICS_AH_WRITE = 2u << 6, + OPTIX_PAYLOAD_SEMANTICS_AH_READ_WRITE = 3u << 6, + + OPTIX_PAYLOAD_SEMANTICS_IS_NONE = 0, + OPTIX_PAYLOAD_SEMANTICS_IS_READ = 1u << 8, + OPTIX_PAYLOAD_SEMANTICS_IS_WRITE = 2u << 8, + OPTIX_PAYLOAD_SEMANTICS_IS_READ_WRITE = 3u << 8, +} OptixPayloadSemantics; + +/// Specifies a single payload type +typedef struct OptixPayloadType +{ + /// The number of 32b words the payload of this type holds + unsigned int numPayloadValues; + + /// Points to host array of payload word semantics, size must match numPayloadValues + const unsigned int *payloadSemantics; +} OptixPayloadType; + +/// Compilation options for module +/// +/// \see #optixModuleCreateFromPTX() +typedef struct OptixModuleCompileOptions +{ + /// Maximum number of registers allowed when compiling to SASS. + /// Set to 0 for no explicit limit. May vary within a pipeline. + int maxRegisterCount; + + /// Optimization level. May vary within a pipeline. + OptixCompileOptimizationLevel optLevel; + + /// Generate debug information. + OptixCompileDebugLevel debugLevel; + + /// Ingored if numBoundValues is set to 0 + const OptixModuleCompileBoundValueEntry* boundValues; + + /// set to 0 if unused + unsigned int numBoundValues; + + /// The number of different payload types available for compilation. + /// Must be zero if OptixPipelineCompileOptions::numPayloadValues is not zero. + unsigned int numPayloadTypes; + + /// Points to host array of payload type definitions, size must match numPayloadTypes + OptixPayloadType *payloadTypes; + +} OptixModuleCompileOptions; + +/// Distinguishes different kinds of program groups. +typedef enum OptixProgramGroupKind +{ + /// Program group containing a raygen (RG) program + /// \see #OptixProgramGroupSingleModule, #OptixProgramGroupDesc::raygen + OPTIX_PROGRAM_GROUP_KIND_RAYGEN = 0x2421, + + /// Program group containing a miss (MS) program + /// \see #OptixProgramGroupSingleModule, #OptixProgramGroupDesc::miss + OPTIX_PROGRAM_GROUP_KIND_MISS = 0x2422, + + /// Program group containing an exception (EX) program + /// \see OptixProgramGroupHitgroup, #OptixProgramGroupDesc::exception + OPTIX_PROGRAM_GROUP_KIND_EXCEPTION = 0x2423, + + /// Program group containing an intersection (IS), any hit (AH), and/or closest hit (CH) program + /// \see #OptixProgramGroupSingleModule, #OptixProgramGroupDesc::hitgroup + OPTIX_PROGRAM_GROUP_KIND_HITGROUP = 0x2424, + + /// Program group containing a direct (DC) or continuation (CC) callable program + /// \see OptixProgramGroupCallables, #OptixProgramGroupDesc::callables + OPTIX_PROGRAM_GROUP_KIND_CALLABLES = 0x2425 +} OptixProgramGroupKind; + +/// Flags for program groups +typedef enum OptixProgramGroupFlags +{ + /// Currently there are no flags + OPTIX_PROGRAM_GROUP_FLAGS_NONE = 0 +} OptixProgramGroupFlags; + +/// Program group representing a single module. +/// +/// Used for raygen, miss, and exception programs. In case of raygen and exception programs, module and entry +/// function name need to be valid. For miss programs, module and entry function name might both be \c nullptr. +/// +/// \see #OptixProgramGroupDesc::raygen, #OptixProgramGroupDesc::miss, #OptixProgramGroupDesc::exception +typedef struct OptixProgramGroupSingleModule +{ + /// Module holding single program. + OptixModule module; + /// Entry function name of the single program. + const char* entryFunctionName; +} OptixProgramGroupSingleModule; + +/// Program group representing the hitgroup. +/// +/// For each of the three program types, module and entry function name might both be \c nullptr. +/// +/// \see #OptixProgramGroupDesc::hitgroup +typedef struct OptixProgramGroupHitgroup +{ + /// Module holding the closest hit (CH) program. + OptixModule moduleCH; + /// Entry function name of the closest hit (CH) program. + const char* entryFunctionNameCH; + /// Module holding the any hit (AH) program. + OptixModule moduleAH; + /// Entry function name of the any hit (AH) program. + const char* entryFunctionNameAH; + /// Module holding the intersection (Is) program. + OptixModule moduleIS; + /// Entry function name of the intersection (IS) program. + const char* entryFunctionNameIS; +} OptixProgramGroupHitgroup; + +/// Program group representing callables. +/// +/// Module and entry function name need to be valid for at least one of the two callables. +/// +/// \see ##OptixProgramGroupDesc::callables +typedef struct OptixProgramGroupCallables +{ + /// Module holding the direct callable (DC) program. + OptixModule moduleDC; + /// Entry function name of the direct callable (DC) program. + const char* entryFunctionNameDC; + /// Module holding the continuation callable (CC) program. + OptixModule moduleCC; + /// Entry function name of the continuation callable (CC) program. + const char* entryFunctionNameCC; +} OptixProgramGroupCallables; + +/// Descriptor for program groups. +typedef struct OptixProgramGroupDesc +{ + /// The kind of program group. + OptixProgramGroupKind kind; + + /// See #OptixProgramGroupFlags + unsigned int flags; + + union + { + /// \see #OPTIX_PROGRAM_GROUP_KIND_RAYGEN + OptixProgramGroupSingleModule raygen; + /// \see #OPTIX_PROGRAM_GROUP_KIND_MISS + OptixProgramGroupSingleModule miss; + /// \see #OPTIX_PROGRAM_GROUP_KIND_EXCEPTION + OptixProgramGroupSingleModule exception; + /// \see #OPTIX_PROGRAM_GROUP_KIND_CALLABLES + OptixProgramGroupCallables callables; + /// \see #OPTIX_PROGRAM_GROUP_KIND_HITGROUP + OptixProgramGroupHitgroup hitgroup; + }; +} OptixProgramGroupDesc; + +/// Program group options +/// +/// \see #optixProgramGroupCreate() +typedef struct OptixProgramGroupOptions +{ + /// Specifies the payload type of this program group. + /// All programs in the group must support the payload type + /// (Program support for a type is specified by calling + /// \see #optixSetPayloadTypes or otherwise all types specified in + /// \see #OptixModuleCompileOptions are supported). + /// If a program is not available for the requested payload type, + /// optixProgramGroupCreate returns OPTIX_ERROR_PAYLOAD_TYPE_MISMATCH. + /// If the payloadType is left zero, a unique type is deduced. + /// The payload type can be uniquely deduced if there is exactly one payload type + /// for which all programs in the group are available. + /// If the payload type could not be deduced uniquely + /// optixProgramGroupCreate returns OPTIX_ERROR_PAYLOAD_TYPE_RESOLUTION_FAILED. + OptixPayloadType* payloadType; +} OptixProgramGroupOptions; + +/// The following values are used to indicate which exception was thrown. +typedef enum OptixExceptionCodes +{ + /// Stack overflow of the continuation stack. + /// no exception details. + OPTIX_EXCEPTION_CODE_STACK_OVERFLOW = -1, + + /// The trace depth is exceeded. + /// no exception details. + OPTIX_EXCEPTION_CODE_TRACE_DEPTH_EXCEEDED = -2, + + /// The traversal depth is exceeded. + /// Exception details: + /// optixGetTransformListSize() + /// optixGetTransformListHandle() + OPTIX_EXCEPTION_CODE_TRAVERSAL_DEPTH_EXCEEDED = -3, + + /// Traversal encountered an invalid traversable type. + /// Exception details: + /// optixGetTransformListSize() + /// optixGetTransformListHandle() + /// optixGetExceptionInvalidTraversable() + OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_TRAVERSABLE = -5, + + /// The miss SBT record index is out of bounds + /// A miss SBT record index is valid within the range [0, OptixShaderBindingTable::missRecordCount) (See optixLaunch) + /// Exception details: + /// optixGetExceptionInvalidSbtOffset() + OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_MISS_SBT = -6, + + /// The traversal hit SBT record index out of bounds. + /// + /// A traversal hit SBT record index is valid within the range [0, OptixShaderBindingTable::hitgroupRecordCount) (See optixLaunch) + /// The following formula relates the + // sbt-index (See optixGetExceptionInvalidSbtOffset), + // sbt-instance-offset (See OptixInstance::sbtOffset), + /// sbt-geometry-acceleration-structure-index (See optixGetSbtGASIndex), + /// sbt-stride-from-trace-call and sbt-offset-from-trace-call (See optixTrace) + /// + /// sbt-index = sbt-instance-offset + (sbt-geometry-acceleration-structure-index * sbt-stride-from-trace-call) + sbt-offset-from-trace-call + /// + /// Exception details: + /// optixGetTransformListSize() + /// optixGetTransformListHandle() + /// optixGetExceptionInvalidSbtOffset() + /// optixGetSbtGASIndex() + OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_HIT_SBT = -7, + + /// The shader encountered an unsupported primitive type (See OptixPipelineCompileOptions::usesPrimitiveTypeFlags). + /// no exception details. + OPTIX_EXCEPTION_CODE_UNSUPPORTED_PRIMITIVE_TYPE = -8, + + /// The shader encountered a call to optixTrace with at least + /// one of the float arguments being inf or nan, or the tmin argument is negative. + /// Exception details: + /// optixGetExceptionInvalidRay() + OPTIX_EXCEPTION_CODE_INVALID_RAY = -9, + + /// The shader encountered a call to either optixDirectCall or optixCallableCall + /// where the argument count does not match the parameter count of the callable + /// program which is called. + /// Exception details: + /// optixGetExceptionParameterMismatch + OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH = -10, + + /// The invoked builtin IS does not match the current GAS + OPTIX_EXCEPTION_CODE_BUILTIN_IS_MISMATCH = -11, + + /// Tried to call a callable program using an SBT offset that is larger + /// than the number of passed in callable SBT records. + /// Exception details: + /// optixGetExceptionInvalidSbtOffset() + OPTIX_EXCEPTION_CODE_CALLABLE_INVALID_SBT = -12, + + /// Tried to call a direct callable using an SBT offset of a record that + /// was built from a program group that did not include a direct callable. + OPTIX_EXCEPTION_CODE_CALLABLE_NO_DC_SBT_RECORD = -13, + + /// Tried to call a continuation callable using an SBT offset of a record + /// that was built from a program group that did not include a continuation callable. + OPTIX_EXCEPTION_CODE_CALLABLE_NO_CC_SBT_RECORD = -14, + + /// Tried to directly traverse a single gas while single gas traversable graphs are not enabled + /// (see OptixTraversableGraphFlags::OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS). + /// Exception details: + /// optixGetTransformListSize() + /// optixGetTransformListHandle() + /// optixGetExceptionInvalidTraversable() + OPTIX_EXCEPTION_CODE_UNSUPPORTED_SINGLE_LEVEL_GAS = -15, + + /// argument passed to an optix call is + /// not within an acceptable range of values. + OPTIX_EXCEPTION_CODE_INVALID_VALUE_ARGUMENT_0 = -16, + OPTIX_EXCEPTION_CODE_INVALID_VALUE_ARGUMENT_1 = -17, + OPTIX_EXCEPTION_CODE_INVALID_VALUE_ARGUMENT_2 = -18, + + /// Tried to access data on an AS without random data access support (See OptixBuildFlags). + OPTIX_EXCEPTION_CODE_UNSUPPORTED_DATA_ACCESS = -32, + + /// The program payload type doesn't match the trace payload type. + OPTIX_EXCEPTION_CODE_PAYLOAD_TYPE_MISMATCH = -33, +} OptixExceptionCodes; + +/// Exception flags. +/// +/// \see #OptixPipelineCompileOptions::exceptionFlags, #OptixExceptionCodes +typedef enum OptixExceptionFlags +{ + /// No exception are enabled. + OPTIX_EXCEPTION_FLAG_NONE = 0, + + /// Enables exceptions check related to the continuation stack. + OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW = 1u << 0, + + /// Enables exceptions check related to trace depth. + OPTIX_EXCEPTION_FLAG_TRACE_DEPTH = 1u << 1, + + /// Enables user exceptions via optixThrowException(). This flag must be specified for all modules in a pipeline + /// if any module calls optixThrowException(). + OPTIX_EXCEPTION_FLAG_USER = 1u << 2, + + /// Enables various exceptions check related to traversal. + OPTIX_EXCEPTION_FLAG_DEBUG = 1u << 3 +} OptixExceptionFlags; + +/// Compilation options for all modules of a pipeline. +/// +/// Similar to #OptixModuleCompileOptions, but these options here need to be equal for all modules of a pipeline. +/// +/// \see #optixModuleCreateFromPTX(), #optixPipelineCreate() +typedef struct OptixPipelineCompileOptions +{ + /// Boolean value indicating whether motion blur could be used + int usesMotionBlur; + + /// Traversable graph bitfield. See OptixTraversableGraphFlags + unsigned int traversableGraphFlags; + + /// How much storage, in 32b words, to make available for the payload, [0..32] + /// Must be zero if numPayloadTypes is not zero. + int numPayloadValues; + + /// How much storage, in 32b words, to make available for the attributes. The + /// minimum number is 2. Values below that will automatically be changed to 2. [2..8] + int numAttributeValues; + + /// A bitmask of OptixExceptionFlags indicating which exceptions are enabled. + unsigned int exceptionFlags; + + /// The name of the pipeline parameter variable. If 0, no pipeline parameter + /// will be available. This will be ignored if the launch param variable was + /// optimized out or was not found in the modules linked to the pipeline. + const char* pipelineLaunchParamsVariableName; + + /// Bit field enabling primitive types. See OptixPrimitiveTypeFlags. + /// Setting to zero corresponds to enabling OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM and OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE. + unsigned int usesPrimitiveTypeFlags; + + /// Boolean value indicating whether opacity micromaps could be used + int allowOpacityMicromaps; +} OptixPipelineCompileOptions; + +/// Link options for a pipeline +/// +/// \see #optixPipelineCreate() +typedef struct OptixPipelineLinkOptions +{ + /// Maximum trace recursion depth. 0 means a ray generation program can be + /// launched, but can't trace any rays. The maximum allowed value is 31. + unsigned int maxTraceDepth; + + /// Generate debug information. + OptixCompileDebugLevel debugLevel; +} OptixPipelineLinkOptions; + +/// Describes the shader binding table (SBT) +/// +/// \see #optixLaunch() +typedef struct OptixShaderBindingTable +{ + /// Device address of the SBT record of the ray gen program to start launch at. The address must be a multiple of + /// OPTIX_SBT_RECORD_ALIGNMENT. + CUdeviceptr raygenRecord; + + /// Device address of the SBT record of the exception program. The address must be a multiple of + /// OPTIX_SBT_RECORD_ALIGNMENT. + CUdeviceptr exceptionRecord; + + /// Arrays of SBT records for miss programs. The base address and the stride must be a multiple of + /// OPTIX_SBT_RECORD_ALIGNMENT. + /// @{ + CUdeviceptr missRecordBase; + unsigned int missRecordStrideInBytes; + unsigned int missRecordCount; + /// @} + + /// Arrays of SBT records for hit groups. The base address and the stride must be a multiple of + /// OPTIX_SBT_RECORD_ALIGNMENT. + /// @{ + CUdeviceptr hitgroupRecordBase; + unsigned int hitgroupRecordStrideInBytes; + unsigned int hitgroupRecordCount; + /// @} + + /// Arrays of SBT records for callable programs. If the base address is not null, the stride and count must not be + /// zero. If the base address is null, then the count needs to zero. The base address and the stride must be a + /// multiple of OPTIX_SBT_RECORD_ALIGNMENT. + /// @{ + CUdeviceptr callablesRecordBase; + unsigned int callablesRecordStrideInBytes; + unsigned int callablesRecordCount; + /// @} + +} OptixShaderBindingTable; + +/// Describes the stack size requirements of a program group. +/// +/// \see optixProgramGroupGetStackSize() +typedef struct OptixStackSizes +{ + /// Continuation stack size of RG programs in bytes + unsigned int cssRG; + /// Continuation stack size of MS programs in bytes + unsigned int cssMS; + /// Continuation stack size of CH programs in bytes + unsigned int cssCH; + /// Continuation stack size of AH programs in bytes + unsigned int cssAH; + /// Continuation stack size of IS programs in bytes + unsigned int cssIS; + /// Continuation stack size of CC programs in bytes + unsigned int cssCC; + /// Direct stack size of DC programs in bytes + unsigned int dssDC; + +} OptixStackSizes; + +/// Options that can be passed to \c optixQueryFunctionTable() +typedef enum OptixQueryFunctionTableOptions +{ + /// Placeholder (there are no options yet) + OPTIX_QUERY_FUNCTION_TABLE_OPTION_DUMMY = 0 + +} OptixQueryFunctionTableOptions; + +/// Type of the function \c optixQueryFunctionTable() +typedef OptixResult( OptixQueryFunctionTable_t )( int abiId, + unsigned int numOptions, + OptixQueryFunctionTableOptions* /*optionKeys*/, + const void** /*optionValues*/, + void* functionTable, + size_t sizeOfTable ); + +/// Specifies the options for retrieving an intersection program for a built-in primitive type. +/// The primitive type must not be OPTIX_PRIMITIVE_TYPE_CUSTOM. +/// +/// \see #optixBuiltinISModuleGet() +typedef struct OptixBuiltinISOptions +{ + OptixPrimitiveType builtinISModuleType; + /// Boolean value indicating whether vertex motion blur is used (but not motion transform blur). + int usesMotionBlur; + /// Build flags, see OptixBuildFlags. + unsigned int buildFlags; + /// End cap properties of curves, see OptixCurveEndcapFlags, 0 for non-curve types. + unsigned int curveEndcapFlags; +} OptixBuiltinISOptions; + +#if defined( __CUDACC__ ) +/// Describes the ray that was passed into \c optixTrace() which caused an exception with +/// exception code OPTIX_EXCEPTION_CODE_INVALID_RAY. +/// +/// \see #optixGetExceptionInvalidRay() +typedef struct OptixInvalidRayExceptionDetails +{ + float3 origin; + float3 direction; + float tmin; + float tmax; + float time; +} OptixInvalidRayExceptionDetails; + +/// Describes the details of a call to a callable program which caused an exception with +/// exception code OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH, +/// Note that OptiX packs the parameters into individual 32 bit values, so the number of +/// expected and passed values may not correspond to the number of arguments passed into +/// optixDirectCall or optixContinuationCall, or the number parameters in the definition +/// of the function that is called. +typedef struct OptixParameterMismatchExceptionDetails +{ + /// Number of 32 bit values expected by the callable program + unsigned int expectedParameterCount; + /// Number of 32 bit values that were passed to the callable program + unsigned int passedArgumentCount; + /// The offset of the SBT entry of the callable program relative to OptixShaderBindingTable::callablesRecordBase + unsigned int sbtIndex; + /// Pointer to a string that holds the name of the callable program that was called + char* callableName; +} OptixParameterMismatchExceptionDetails; +#endif + + +/*@}*/ // end group optix_types + +#endif // __optix_optix_7_types_h__ diff --git a/dependencies/optix/optix_denoiser_tiling.h b/dependencies/optix/optix_denoiser_tiling.h new file mode 100644 index 0000000000000000000000000000000000000000..03dda2613851d155c27f0e88eaf1d7476c80991d --- /dev/null +++ b/dependencies/optix/optix_denoiser_tiling.h @@ -0,0 +1,339 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/// @file +/// @author NVIDIA Corporation +/// @brief OptiX public API header + +#ifndef optix_denoiser_tiling_h +#define optix_denoiser_tiling_h + +#include <optix.h> + +#include <algorithm> +#include <vector> + +#ifdef __cplusplus +extern "C" { +#endif + +/** \addtogroup optix_utilities +@{ +*/ + +/// Tile definition +/// +/// see #optixUtilDenoiserSplitImage +/// +struct OptixUtilDenoiserImageTile +{ + // input tile image + OptixImage2D input; + + // output tile image + OptixImage2D output; + + // overlap offsets, parameters for #optixUtilDenoiserInvoke + unsigned int inputOffsetX; + unsigned int inputOffsetY; +}; + +/// Return pixel stride in bytes for the given pixel format +/// if the pixelStrideInBytes member of the image is zero. +/// Otherwise return pixelStrideInBytes from the image. +/// +/// \param[in] image Image containing the pixel stride +/// +inline OptixResult optixUtilGetPixelStride( const OptixImage2D& image, unsigned int& pixelStrideInBytes ) +{ + pixelStrideInBytes = image.pixelStrideInBytes; + if( pixelStrideInBytes == 0 ) + { + switch( image.format ) + { + case OPTIX_PIXEL_FORMAT_HALF2: + pixelStrideInBytes = 2 * sizeof( short ); + break; + case OPTIX_PIXEL_FORMAT_HALF3: + pixelStrideInBytes = 3 * sizeof( short ); + break; + case OPTIX_PIXEL_FORMAT_HALF4: + pixelStrideInBytes = 4 * sizeof( short ); + break; + case OPTIX_PIXEL_FORMAT_FLOAT2: + pixelStrideInBytes = 2 * sizeof( float ); + break; + case OPTIX_PIXEL_FORMAT_FLOAT3: + pixelStrideInBytes = 3 * sizeof( float ); + break; + case OPTIX_PIXEL_FORMAT_FLOAT4: + pixelStrideInBytes = 4 * sizeof( float ); + break; + case OPTIX_PIXEL_FORMAT_UCHAR3: + pixelStrideInBytes = 3 * sizeof( char ); + break; + case OPTIX_PIXEL_FORMAT_UCHAR4: + pixelStrideInBytes = 4 * sizeof( char ); + break; + case OPTIX_PIXEL_FORMAT_INTERNAL_GUIDE_LAYER: + return OPTIX_ERROR_INVALID_VALUE; + break; + } + } + return OPTIX_SUCCESS; +} + +/// Split image into 2D tiles given horizontal and vertical tile size +/// +/// \param[in] input full resolution input image to be split +/// \param[in] output full resolution output image +/// \param[in] overlapWindowSizeInPixels see #OptixDenoiserSizes, #optixDenoiserComputeMemoryResources +/// \param[in] tileWidth maximum width of tiles +/// \param[in] tileHeight maximum height of tiles +/// \param[out] tiles list of tiles covering the input image +/// +inline OptixResult optixUtilDenoiserSplitImage( + const OptixImage2D& input, + const OptixImage2D& output, + unsigned int overlapWindowSizeInPixels, + unsigned int tileWidth, + unsigned int tileHeight, + std::vector<OptixUtilDenoiserImageTile>& tiles ) +{ + if( tileWidth == 0 || tileHeight == 0 ) + return OPTIX_ERROR_INVALID_VALUE; + + unsigned int inPixelStride, outPixelStride; + if( const OptixResult res = optixUtilGetPixelStride( input, inPixelStride ) ) + return res; + if( const OptixResult res = optixUtilGetPixelStride( output, outPixelStride ) ) + return res; + + int inp_w = std::min( tileWidth + 2 * overlapWindowSizeInPixels, input.width ); + int inp_h = std::min( tileHeight + 2 * overlapWindowSizeInPixels, input.height ); + int inp_y = 0, copied_y = 0; + + int upscaleX = output.width / input.width; + int upscaleY = output.height / input.height; + + do + { + int inputOffsetY = inp_y == 0 ? 0 : std::max( (int)overlapWindowSizeInPixels, inp_h - ( (int)input.height - inp_y ) ); + int copy_y = inp_y == 0 ? std::min( input.height, tileHeight + overlapWindowSizeInPixels ) : + std::min( tileHeight, input.height - copied_y ); + + int inp_x = 0, copied_x = 0; + do + { + int inputOffsetX = inp_x == 0 ? 0 : std::max( (int)overlapWindowSizeInPixels, inp_w - ( (int)input.width - inp_x ) ); + int copy_x = inp_x == 0 ? std::min( input.width, tileWidth + overlapWindowSizeInPixels ) : + std::min( tileWidth, input.width - copied_x ); + + OptixUtilDenoiserImageTile tile; + tile.input.data = input.data + (size_t)( inp_y - inputOffsetY ) * input.rowStrideInBytes + + (size_t)( inp_x - inputOffsetX ) * inPixelStride; + tile.input.width = inp_w; + tile.input.height = inp_h; + tile.input.rowStrideInBytes = input.rowStrideInBytes; + tile.input.pixelStrideInBytes = input.pixelStrideInBytes; + tile.input.format = input.format; + + tile.output.data = output.data + (size_t)( upscaleY * inp_y ) * output.rowStrideInBytes + + (size_t)( upscaleX * inp_x ) * outPixelStride; + tile.output.width = upscaleX * copy_x; + tile.output.height = upscaleY * copy_y; + tile.output.rowStrideInBytes = output.rowStrideInBytes; + tile.output.pixelStrideInBytes = output.pixelStrideInBytes; + tile.output.format = output.format; + + tile.inputOffsetX = inputOffsetX; + tile.inputOffsetY = inputOffsetY; + + tiles.push_back( tile ); + + inp_x += inp_x == 0 ? tileWidth + overlapWindowSizeInPixels : tileWidth; + copied_x += copy_x; + } while( inp_x < static_cast<int>( input.width ) ); + + inp_y += inp_y == 0 ? tileHeight + overlapWindowSizeInPixels : tileHeight; + copied_y += copy_y; + } while( inp_y < static_cast<int>( input.height ) ); + + return OPTIX_SUCCESS; +} + +/// Run denoiser on input layers +/// see #optixDenoiserInvoke +/// additional parameters: + +/// Runs the denoiser on the input layers on a single GPU and stream using #optixDenoiserInvoke. +/// If the input layers' dimensions are larger than the specified tile size, the image is divided into +/// tiles using #optixUtilDenoiserSplitImage, and multiple back-to-back invocations are performed in +/// order to reuse the scratch space. Multiple tiles can be invoked concurrently if +/// #optixUtilDenoiserSplitImage is used directly and multiple scratch allocations for each concurrent +/// invocation are used. + +/// The input parameters are the same as #optixDenoiserInvoke except for the addition of the maximum tile size. +/// +/// \param[in] denoiser +/// \param[in] stream +/// \param[in] params +/// \param[in] denoiserState +/// \param[in] denoiserStateSizeInBytes +/// \param[in] guideLayer +/// \param[in] layers +/// \param[in] numLayers +/// \param[in] scratch +/// \param[in] scratchSizeInBytes +/// \param[in] overlapWindowSizeInPixels +/// \param[in] tileWidth +/// \param[in] tileHeight +inline OptixResult optixUtilDenoiserInvokeTiled( + OptixDenoiser denoiser, + CUstream stream, + const OptixDenoiserParams* params, + CUdeviceptr denoiserState, + size_t denoiserStateSizeInBytes, + const OptixDenoiserGuideLayer* guideLayer, + const OptixDenoiserLayer* layers, + unsigned int numLayers, + CUdeviceptr scratch, + size_t scratchSizeInBytes, + unsigned int overlapWindowSizeInPixels, + unsigned int tileWidth, + unsigned int tileHeight ) +{ + if( !guideLayer || !layers ) + return OPTIX_ERROR_INVALID_VALUE; + + const unsigned int upscale = numLayers > 0 && layers[0].previousOutput.width == 2 * layers[0].input.width ? 2 : 1; + + std::vector<std::vector<OptixUtilDenoiserImageTile>> tiles( numLayers ); + std::vector<std::vector<OptixUtilDenoiserImageTile>> prevTiles( numLayers ); + for( unsigned int l = 0; l < numLayers; l++ ) + { + if( const OptixResult res = optixUtilDenoiserSplitImage( layers[l].input, layers[l].output, + overlapWindowSizeInPixels, + tileWidth, tileHeight, tiles[l] ) ) + return res; + + if( layers[l].previousOutput.data ) + { + OptixImage2D dummyOutput = layers[l].previousOutput; + if( const OptixResult res = optixUtilDenoiserSplitImage( layers[l].previousOutput, dummyOutput, + upscale * overlapWindowSizeInPixels, + upscale * tileWidth, upscale * tileHeight, prevTiles[l] ) ) + return res; + } + } + + std::vector<OptixUtilDenoiserImageTile> albedoTiles; + if( guideLayer->albedo.data ) + { + OptixImage2D dummyOutput = guideLayer->albedo; + if( const OptixResult res = optixUtilDenoiserSplitImage( guideLayer->albedo, dummyOutput, + overlapWindowSizeInPixels, + tileWidth, tileHeight, albedoTiles ) ) + return res; + } + + std::vector<OptixUtilDenoiserImageTile> normalTiles; + if( guideLayer->normal.data ) + { + OptixImage2D dummyOutput = guideLayer->normal; + if( const OptixResult res = optixUtilDenoiserSplitImage( guideLayer->normal, dummyOutput, + overlapWindowSizeInPixels, + tileWidth, tileHeight, normalTiles ) ) + return res; + } + std::vector<OptixUtilDenoiserImageTile> flowTiles; + if( guideLayer->flow.data ) + { + OptixImage2D dummyOutput = guideLayer->flow; + if( const OptixResult res = optixUtilDenoiserSplitImage( guideLayer->flow, dummyOutput, + overlapWindowSizeInPixels, + tileWidth, tileHeight, flowTiles ) ) + return res; + } + + std::vector<OptixUtilDenoiserImageTile> internalGuideLayerTiles; + if( guideLayer->previousOutputInternalGuideLayer.data && guideLayer->outputInternalGuideLayer.data ) + { + if( const OptixResult res = optixUtilDenoiserSplitImage( guideLayer->previousOutputInternalGuideLayer, + guideLayer->outputInternalGuideLayer, + upscale * overlapWindowSizeInPixels, + upscale * tileWidth, upscale * tileHeight, internalGuideLayerTiles ) ) + return res; + } + + for( size_t t = 0; t < tiles[0].size(); t++ ) + { + std::vector<OptixDenoiserLayer> tlayers; + for( unsigned int l = 0; l < numLayers; l++ ) + { + OptixDenoiserLayer layer = {}; + layer.input = ( tiles[l] )[t].input; + layer.output = ( tiles[l] )[t].output; + if( layers[l].previousOutput.data ) + layer.previousOutput = ( prevTiles[l] )[t].input; + tlayers.push_back( layer ); + } + + OptixDenoiserGuideLayer gl = {}; + if( guideLayer->albedo.data ) + gl.albedo = albedoTiles[t].input; + + if( guideLayer->normal.data ) + gl.normal = normalTiles[t].input; + + if( guideLayer->flow.data ) + gl.flow = flowTiles[t].input; + + if( guideLayer->previousOutputInternalGuideLayer.data ) + gl.previousOutputInternalGuideLayer = internalGuideLayerTiles[t].input; + + if( guideLayer->outputInternalGuideLayer.data ) + gl.outputInternalGuideLayer = internalGuideLayerTiles[t].output; + + if( const OptixResult res = + optixDenoiserInvoke( denoiser, stream, params, denoiserState, denoiserStateSizeInBytes, + &gl, &tlayers[0], numLayers, + ( tiles[0] )[t].inputOffsetX, ( tiles[0] )[t].inputOffsetY, + scratch, scratchSizeInBytes ) ) + return res; + } + return OPTIX_SUCCESS; +} + +/*@}*/ // end group optix_utilities + +#ifdef __cplusplus +} +#endif + +#endif // __optix_optix_stack_size_h__ diff --git a/dependencies/optix/optix_device.h b/dependencies/optix/optix_device.h new file mode 100644 index 0000000000000000000000000000000000000000..6dcb280e23aba74954dc58fb58b8e1021c29b360 --- /dev/null +++ b/dependencies/optix/optix_device.h @@ -0,0 +1,47 @@ + +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and proprietary + * rights in and to this software, related documentation and any modifications thereto. + * Any use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation is strictly + * prohibited. + * + * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* + * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, + * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY + * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT + * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF + * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR + * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGES + */ + + /** + * @file optix_device.h + * @author NVIDIA Corporation + * @brief OptiX public API + * + * OptiX public API Reference - Host/Device side + */ + +/******************************************************************************\ + * optix_cuda.h + * + * This file provides the nvcc interface for generating PTX that the OptiX is + * capable of parsing and weaving into the final kernel. This is included by + * optix.h automatically if compiling device code. It can be included explicitly + * in host code if desired. + * +\******************************************************************************/ +#if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__) +# define __OPTIX_INCLUDE_INTERNAL_HEADERS__ +# define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__ +#endif +#include "optix_7_device.h" +#if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__ ) +# undef __OPTIX_INCLUDE_INTERNAL_HEADERS__ +# undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__ +#endif diff --git a/dependencies/optix/optix_function_table.h b/dependencies/optix/optix_function_table.h new file mode 100644 index 0000000000000000000000000000000000000000..c43e8194342cc48dd69547ca2ac832550c99b7ed --- /dev/null +++ b/dependencies/optix/optix_function_table.h @@ -0,0 +1,365 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and proprietary + * rights in and to this software, related documentation and any modifications thereto. + * Any use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation is strictly + * prohibited. + * + * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* + * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, + * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY + * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT + * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF + * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR + * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGES + */ + +/// @file +/// @author NVIDIA Corporation +/// @brief OptiX public API header + +#ifndef __optix_optix_function_table_h__ +#define __optix_optix_function_table_h__ + +/// The OptiX ABI version. +#define OPTIX_ABI_VERSION 68 + +#ifndef OPTIX_DEFINE_ABI_VERSION_ONLY + +#include "optix_types.h" + +#if !defined( OPTIX_DONT_INCLUDE_CUDA ) +// If OPTIX_DONT_INCLUDE_CUDA is defined, cuda driver types must be defined through other +// means before including optix headers. +#include <cuda.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/// \defgroup optix_function_table Function Table +/// \brief OptiX Function Table + +/** \addtogroup optix_function_table +@{ +*/ + +/// The function table containing all API functions. +/// +/// See #optixInit() and #optixInitWithHandle(). +typedef struct OptixFunctionTable +{ + /// \name Error handling + //@ { + + /// See ::optixGetErrorName(). + const char* ( *optixGetErrorName )( OptixResult result ); + + /// See ::optixGetErrorString(). + const char* ( *optixGetErrorString )( OptixResult result ); + + //@ } + /// \name Device context + //@ { + + /// See ::optixDeviceContextCreate(). + OptixResult ( *optixDeviceContextCreate )( CUcontext fromContext, const OptixDeviceContextOptions* options, OptixDeviceContext* context ); + + /// See ::optixDeviceContextDestroy(). + OptixResult ( *optixDeviceContextDestroy )( OptixDeviceContext context ); + + /// See ::optixDeviceContextGetProperty(). + OptixResult ( *optixDeviceContextGetProperty )( OptixDeviceContext context, OptixDeviceProperty property, void* value, size_t sizeInBytes ); + + /// See ::optixDeviceContextSetLogCallback(). + OptixResult ( *optixDeviceContextSetLogCallback )( OptixDeviceContext context, + OptixLogCallback callbackFunction, + void* callbackData, + unsigned int callbackLevel ); + + /// See ::optixDeviceContextSetCacheEnabled(). + OptixResult ( *optixDeviceContextSetCacheEnabled )( OptixDeviceContext context, int enabled ); + + /// See ::optixDeviceContextSetCacheLocation(). + OptixResult ( *optixDeviceContextSetCacheLocation )( OptixDeviceContext context, const char* location ); + + /// See ::optixDeviceContextSetCacheDatabaseSizes(). + OptixResult ( *optixDeviceContextSetCacheDatabaseSizes )( OptixDeviceContext context, size_t lowWaterMark, size_t highWaterMark ); + + /// See ::optixDeviceContextGetCacheEnabled(). + OptixResult ( *optixDeviceContextGetCacheEnabled )( OptixDeviceContext context, int* enabled ); + + /// See ::optixDeviceContextGetCacheLocation(). + OptixResult ( *optixDeviceContextGetCacheLocation )( OptixDeviceContext context, char* location, size_t locationSize ); + + /// See ::optixDeviceContextGetCacheDatabaseSizes(). + OptixResult ( *optixDeviceContextGetCacheDatabaseSizes )( OptixDeviceContext context, size_t* lowWaterMark, size_t* highWaterMark ); + + //@ } + /// \name Modules + //@ { + + /// See ::optixModuleCreateFromPTX(). + OptixResult ( *optixModuleCreateFromPTX )( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const char* PTX, + size_t PTXsize, + char* logString, + size_t* logStringSize, + OptixModule* module ); + + /// See ::optixModuleCreateFromPTXWithTasks(). + OptixResult ( *optixModuleCreateFromPTXWithTasks )( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const char* PTX, + size_t PTXsize, + char* logString, + size_t* logStringSize, + OptixModule* module, + OptixTask* firstTask ); + + /// See ::optixModuleGetCompilationState(). + OptixResult ( *optixModuleGetCompilationState )( OptixModule module, OptixModuleCompileState* state ); + + /// See ::optixModuleDestroy(). + OptixResult ( *optixModuleDestroy )( OptixModule module ); + + /// See ::optixBuiltinISModuleGet(). + OptixResult( *optixBuiltinISModuleGet )( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const OptixBuiltinISOptions* builtinISOptions, + OptixModule* builtinModule); + + //@ } + /// \name Tasks + //@ { + + /// See ::optixTaskExecute(). + OptixResult ( *optixTaskExecute )( OptixTask task, + OptixTask* additionalTasks, + unsigned int maxNumAdditionalTasks, + unsigned int* numAdditionalTasksCreated ); + //@ } + /// \name Program groups + //@ { + + /// See ::optixProgramGroupCreate(). + OptixResult ( *optixProgramGroupCreate )( OptixDeviceContext context, + const OptixProgramGroupDesc* programDescriptions, + unsigned int numProgramGroups, + const OptixProgramGroupOptions* options, + char* logString, + size_t* logStringSize, + OptixProgramGroup* programGroups ); + + /// See ::optixProgramGroupDestroy(). + OptixResult ( *optixProgramGroupDestroy )( OptixProgramGroup programGroup ); + + /// See ::optixProgramGroupGetStackSize(). + OptixResult ( *optixProgramGroupGetStackSize )( OptixProgramGroup programGroup, OptixStackSizes* stackSizes ); + + //@ } + /// \name Pipeline + //@ { + + /// See ::optixPipelineCreate(). + OptixResult ( *optixPipelineCreate )( OptixDeviceContext context, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const OptixPipelineLinkOptions* pipelineLinkOptions, + const OptixProgramGroup* programGroups, + unsigned int numProgramGroups, + char* logString, + size_t* logStringSize, + OptixPipeline* pipeline ); + + /// See ::optixPipelineDestroy(). + OptixResult ( *optixPipelineDestroy )( OptixPipeline pipeline ); + + /// See ::optixPipelineSetStackSize(). + OptixResult ( *optixPipelineSetStackSize )( OptixPipeline pipeline, + unsigned int directCallableStackSizeFromTraversal, + unsigned int directCallableStackSizeFromState, + unsigned int continuationStackSize, + unsigned int maxTraversableGraphDepth ); + + //@ } + /// \name Acceleration structures + //@ { + + /// See ::optixAccelComputeMemoryUsage(). + OptixResult ( *optixAccelComputeMemoryUsage )( OptixDeviceContext context, + const OptixAccelBuildOptions* accelOptions, + const OptixBuildInput* buildInputs, + unsigned int numBuildInputs, + OptixAccelBufferSizes* bufferSizes ); + + /// See ::optixAccelBuild(). + OptixResult ( *optixAccelBuild )( OptixDeviceContext context, + CUstream stream, + const OptixAccelBuildOptions* accelOptions, + const OptixBuildInput* buildInputs, + unsigned int numBuildInputs, + CUdeviceptr tempBuffer, + size_t tempBufferSizeInBytes, + CUdeviceptr outputBuffer, + size_t outputBufferSizeInBytes, + OptixTraversableHandle* outputHandle, + const OptixAccelEmitDesc* emittedProperties, + unsigned int numEmittedProperties ); + + /// See ::optixAccelGetRelocationInfo(). + OptixResult ( *optixAccelGetRelocationInfo )( OptixDeviceContext context, OptixTraversableHandle handle, OptixRelocationInfo* info ); + + + /// See ::optixCheckRelocationCompatibility(). + OptixResult ( *optixCheckRelocationCompatibility )( OptixDeviceContext context, + const OptixRelocationInfo* info, + int* compatible ); + + /// See ::optixAccelRelocate(). + OptixResult ( *optixAccelRelocate )( OptixDeviceContext context, + CUstream stream, + const OptixRelocationInfo* info, + const OptixRelocateInput* relocateInputs, + size_t numRelocateInputs, + CUdeviceptr targetAccel, + size_t targetAccelSizeInBytes, + OptixTraversableHandle* targetHandle ); + + + /// See ::optixAccelCompact(). + OptixResult ( *optixAccelCompact )( OptixDeviceContext context, + CUstream stream, + OptixTraversableHandle inputHandle, + CUdeviceptr outputBuffer, + size_t outputBufferSizeInBytes, + OptixTraversableHandle* outputHandle ); + + /// See ::optixConvertPointerToTraversableHandle(). + OptixResult ( *optixConvertPointerToTraversableHandle )( OptixDeviceContext onDevice, + CUdeviceptr pointer, + OptixTraversableType traversableType, + OptixTraversableHandle* traversableHandle ); + + /// See ::optixOpacityMicromapArrayComputeMemoryUsage(). + OptixResult ( *optixOpacityMicromapArrayComputeMemoryUsage )( OptixDeviceContext context, + const OptixOpacityMicromapArrayBuildInput* buildInput, + OptixMicromapBufferSizes* bufferSizes ); + + /// See ::optixOpacityMicromapArrayBuild(). + OptixResult ( *optixOpacityMicromapArrayBuild )( OptixDeviceContext context, + CUstream stream, + const OptixOpacityMicromapArrayBuildInput* buildInput, + const OptixMicromapBuffers* buffers ); + + /// See ::optixOpacityMicromapArrayGetRelocationInfo(). + OptixResult ( *optixOpacityMicromapArrayGetRelocationInfo )( OptixDeviceContext context, + CUdeviceptr opacityMicromapArray, + OptixRelocationInfo* info ); + + /// See ::optixOpacityMicromapArrayRelocate(). + OptixResult ( *optixOpacityMicromapArrayRelocate )( OptixDeviceContext context, + CUstream stream, + const OptixRelocationInfo* info, + CUdeviceptr targetOpacityMicromapArray, + size_t targetOpacityMicromapArraySizeInBytes ); + + void ( *reserved1 )( void ); + void ( *reserved2 )( void ); + + //@ } + /// \name Launch + //@ { + + /// See ::optixConvertPointerToTraversableHandle(). + OptixResult ( *optixSbtRecordPackHeader )( OptixProgramGroup programGroup, void* sbtRecordHeaderHostPointer ); + + /// See ::optixConvertPointerToTraversableHandle(). + OptixResult ( *optixLaunch )( OptixPipeline pipeline, + CUstream stream, + CUdeviceptr pipelineParams, + size_t pipelineParamsSize, + const OptixShaderBindingTable* sbt, + unsigned int width, + unsigned int height, + unsigned int depth ); + + //@ } + /// \name Denoiser + //@ { + + /// See ::optixDenoiserCreate(). + OptixResult ( *optixDenoiserCreate )( OptixDeviceContext context, OptixDenoiserModelKind modelKind, const OptixDenoiserOptions* options, OptixDenoiser* returnHandle ); + + /// See ::optixDenoiserDestroy(). + OptixResult ( *optixDenoiserDestroy )( OptixDenoiser handle ); + + /// See ::optixDenoiserComputeMemoryResources(). + OptixResult ( *optixDenoiserComputeMemoryResources )( const OptixDenoiser handle, + unsigned int maximumInputWidth, + unsigned int maximumInputHeight, + OptixDenoiserSizes* returnSizes ); + + /// See ::optixDenoiserSetup(). + OptixResult ( *optixDenoiserSetup )( OptixDenoiser denoiser, + CUstream stream, + unsigned int inputWidth, + unsigned int inputHeight, + CUdeviceptr state, + size_t stateSizeInBytes, + CUdeviceptr scratch, + size_t scratchSizeInBytes ); + + /// See ::optixDenoiserInvoke(). + OptixResult ( *optixDenoiserInvoke )( OptixDenoiser denoiser, + CUstream stream, + const OptixDenoiserParams* params, + CUdeviceptr denoiserState, + size_t denoiserStateSizeInBytes, + const OptixDenoiserGuideLayer * guideLayer, + const OptixDenoiserLayer * layers, + unsigned int numLayers, + unsigned int inputOffsetX, + unsigned int inputOffsetY, + CUdeviceptr scratch, + size_t scratchSizeInBytes ); + + /// See ::optixDenoiserComputeIntensity(). + OptixResult ( *optixDenoiserComputeIntensity )( OptixDenoiser handle, + CUstream stream, + const OptixImage2D* inputImage, + CUdeviceptr outputIntensity, + CUdeviceptr scratch, + size_t scratchSizeInBytes ); + + /// See ::optixDenoiserComputeAverageColor(). + OptixResult ( *optixDenoiserComputeAverageColor )( OptixDenoiser handle, + CUstream stream, + const OptixImage2D* inputImage, + CUdeviceptr outputAverageColor, + CUdeviceptr scratch, + size_t scratchSizeInBytes ); + + /// See ::optixDenoiserCreateWithUserModel(). + OptixResult ( *optixDenoiserCreateWithUserModel )( OptixDeviceContext context, const void * data, size_t dataSizeInBytes, OptixDenoiser* returnHandle ); + //@ } + +} OptixFunctionTable; + +/*@}*/ // end group optix_function_table + +#ifdef __cplusplus +} +#endif + +#endif /* OPTIX_DEFINE_ABI_VERSION_ONLY */ + +#endif /* __optix_optix_function_table_h__ */ diff --git a/dependencies/optix/optix_function_table_definition.h b/dependencies/optix/optix_function_table_definition.h new file mode 100644 index 0000000000000000000000000000000000000000..d40dd0d4886186da1b3cd06bba49efc9266ec1a8 --- /dev/null +++ b/dependencies/optix/optix_function_table_definition.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and proprietary + * rights in and to this software, related documentation and any modifications thereto. + * Any use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation is strictly + * prohibited. + * + * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* + * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, + * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY + * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT + * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF + * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR + * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGES + */ + +/// @file +/// @author NVIDIA Corporation +/// @brief OptiX public API header + +#ifndef __optix_optix_function_table_definition_h__ +#define __optix_optix_function_table_definition_h__ + +#include "optix_function_table.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** \addtogroup optix_function_table +@{ +*/ + +/// If the stubs in optix_stubs.h are used, then the function table needs to be defined in exactly +/// one translation unit. This can be achieved by including this header file in that translation +/// unit. +OptixFunctionTable g_optixFunctionTable; + +/*@}*/ // end group optix_function_table + +#ifdef __cplusplus +} +#endif + +#endif // __optix_optix_function_table_definition_h__ diff --git a/dependencies/optix/optix_host.h b/dependencies/optix/optix_host.h new file mode 100644 index 0000000000000000000000000000000000000000..2ed0c824fd93f5d4475b73b1f2f48559b749ca0e --- /dev/null +++ b/dependencies/optix/optix_host.h @@ -0,0 +1,38 @@ + +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and proprietary + * rights in and to this software, related documentation and any modifications thereto. + * Any use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation is strictly + * prohibited. + * + * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* + * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, + * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY + * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT + * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF + * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR + * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGES + */ + +/** + * @file optix_host.h + * @author NVIDIA Corporation + * @brief OptiX public API + * + * OptiX public API Reference - Host side + */ + +#if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__) +# define __OPTIX_INCLUDE_INTERNAL_HEADERS__ +# define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__ +#endif +#include "optix_7_host.h" +#if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__ ) +# undef __OPTIX_INCLUDE_INTERNAL_HEADERS__ +# undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__ +#endif diff --git a/dependencies/optix/optix_micromap.h b/dependencies/optix/optix_micromap.h new file mode 100644 index 0000000000000000000000000000000000000000..85ddfb3c260a665b6ae4f9e9fc33ffc2005f3461 --- /dev/null +++ b/dependencies/optix/optix_micromap.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** +* @file optix_micromap.h +* @author NVIDIA Corporation +* @brief OptiX micromap helper functions +* +* OptiX micromap helper functions. Useable on either host or device. +*/ + +#ifndef __optix_optix_micromap_h__ +#define __optix_optix_micromap_h__ + +#if !defined( OPTIX_DONT_INCLUDE_CUDA ) +// If OPTIX_DONT_INCLUDE_CUDA is defined, cuda driver type float2 must be defined through other +// means before including optix headers. +#include <vector_types.h> +#endif +#include "internal/optix_micromap_impl.h" + +/// Convert a micromap triangle index to three base-triangle barycentric coordinates of the micro triangle vertices. +/// The base triangle is the triangle that the micromap is applied to. +/// +/// \param[in] microTriangleIndex Index of a micro triangle withing a micromap. +/// \param[in] subdivisionLevel Subdivision level of the micromap. +/// \param[out] baseBarycentrics0 Barycentric coordinates in the space of the base triangle of vertex 0 of the micro triangle. +/// \param[out] baseBarycentrics1 Barycentric coordinates in the space of the base triangle of vertex 1 of the micro triangle. +/// \param[out] baseBarycentrics2 Barycentric coordinates in the space of the base triangle of vertex 2 of the micro triangle. +OPTIX_MICROMAP_INLINE_FUNC void optixMicromapIndexToBaseBarycentrics( uint32_t microTriangleIndex, + uint32_t subdivisionLevel, + float2& baseBarycentrics0, + float2& baseBarycentrics1, + float2& baseBarycentrics2 ) +{ + optix_impl:: + micro2bary( microTriangleIndex, subdivisionLevel, baseBarycentrics0, baseBarycentrics1, baseBarycentrics2 ); +} + + +#endif // __optix_optix_micromap_h__ diff --git a/dependencies/optix/optix_stack_size.h b/dependencies/optix/optix_stack_size.h new file mode 100644 index 0000000000000000000000000000000000000000..a3ab70dfc236bafcd05cd6d89de56a0ec9d755b2 --- /dev/null +++ b/dependencies/optix/optix_stack_size.h @@ -0,0 +1,337 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/// @file +/// @author NVIDIA Corporation +/// @brief OptiX public API header + +#ifndef __optix_optix_stack_size_h__ +#define __optix_optix_stack_size_h__ + +#include "optix.h" + +#include <algorithm> +#include <cstring> + +#ifdef __cplusplus +extern "C" { +#endif + +/** \addtogroup optix_utilities +@{ +*/ + +/// Retrieves direct and continuation stack sizes for each program in the program group and accumulates the upper bounds +/// in the correponding output variables based on the semantic type of the program. Before the first invocation of this +/// function with a given instance of #OptixStackSizes, the members of that instance should be set to 0. +inline OptixResult optixUtilAccumulateStackSizes( OptixProgramGroup programGroup, OptixStackSizes* stackSizes ) +{ + if( !stackSizes ) + return OPTIX_ERROR_INVALID_VALUE; + + OptixStackSizes localStackSizes; + OptixResult result = optixProgramGroupGetStackSize( programGroup, &localStackSizes ); + if( result != OPTIX_SUCCESS ) + return result; + + stackSizes->cssRG = std::max( stackSizes->cssRG, localStackSizes.cssRG ); + stackSizes->cssMS = std::max( stackSizes->cssMS, localStackSizes.cssMS ); + stackSizes->cssCH = std::max( stackSizes->cssCH, localStackSizes.cssCH ); + stackSizes->cssAH = std::max( stackSizes->cssAH, localStackSizes.cssAH ); + stackSizes->cssIS = std::max( stackSizes->cssIS, localStackSizes.cssIS ); + stackSizes->cssCC = std::max( stackSizes->cssCC, localStackSizes.cssCC ); + stackSizes->dssDC = std::max( stackSizes->dssDC, localStackSizes.dssDC ); + + return OPTIX_SUCCESS; +} + +/// Computes the stack size values needed to configure a pipeline. +/// +/// See the programming guide for an explanation of the formula. +/// +/// \param[in] stackSizes Accumulated stack sizes of all programs in the call graph. +/// \param[in] maxTraceDepth Maximum depth of #optixTrace() calls. +/// \param[in] maxCCDepth Maximum depth of calls trees of continuation callables. +/// \param[in] maxDCDepth Maximum depth of calls trees of direct callables. +/// \param[out] directCallableStackSizeFromTraversal Direct stack size requirement for direct callables invoked from +/// IS or AH. +/// \param[out] directCallableStackSizeFromState Direct stack size requirement for direct callables invoked from +/// RG, MS, or CH. +/// \param[out] continuationStackSize Continuation stack requirement. +inline OptixResult optixUtilComputeStackSizes( const OptixStackSizes* stackSizes, + unsigned int maxTraceDepth, + unsigned int maxCCDepth, + unsigned int maxDCDepth, + unsigned int* directCallableStackSizeFromTraversal, + unsigned int* directCallableStackSizeFromState, + unsigned int* continuationStackSize ) +{ + if( !stackSizes ) + return OPTIX_ERROR_INVALID_VALUE; + + const unsigned int cssRG = stackSizes->cssRG; + const unsigned int cssMS = stackSizes->cssMS; + const unsigned int cssCH = stackSizes->cssCH; + const unsigned int cssAH = stackSizes->cssAH; + const unsigned int cssIS = stackSizes->cssIS; + const unsigned int cssCC = stackSizes->cssCC; + const unsigned int dssDC = stackSizes->dssDC; + + if( directCallableStackSizeFromTraversal ) + *directCallableStackSizeFromTraversal = maxDCDepth * dssDC; + if( directCallableStackSizeFromState ) + *directCallableStackSizeFromState = maxDCDepth * dssDC; + + // upper bound on continuation stack used by call trees of continuation callables + unsigned int cssCCTree = maxCCDepth * cssCC; + + // upper bound on continuation stack used by CH or MS programs including the call tree of + // continuation callables + unsigned int cssCHOrMSPlusCCTree = std::max( cssCH, cssMS ) + cssCCTree; + + // clang-format off + if( continuationStackSize ) + *continuationStackSize + = cssRG + cssCCTree + + ( std::max( maxTraceDepth, 1u ) - 1 ) * cssCHOrMSPlusCCTree + + std::min( maxTraceDepth, 1u ) * std::max( cssCHOrMSPlusCCTree, cssIS + cssAH ); + // clang-format on + + return OPTIX_SUCCESS; +} + +/// Computes the stack size values needed to configure a pipeline. +/// +/// This variant is similar to #optixUtilComputeStackSizes(), except that it expects the values dssDC and +/// maxDCDepth split by call site semantic. +/// +/// See programming guide for an explanation of the formula. +/// +/// \param[in] stackSizes Accumulated stack sizes of all programs in the call graph. +/// \param[in] dssDCFromTraversal Accumulated direct stack size of all DC programs invoked from IS +/// or AH. +/// \param[in] dssDCFromState Accumulated direct stack size of all DC programs invoked from RG, +/// MS, or CH. +/// \param[in] maxTraceDepth Maximum depth of #optixTrace() calls. +/// \param[in] maxCCDepth Maximum depth of calls trees of continuation callables. +/// \param[in] maxDCDepthFromTraversal Maximum depth of calls trees of direct callables invoked from IS +/// or AH. +/// \param[in] maxDCDepthFromState Maximum depth of calls trees of direct callables invoked from RG, +/// MS, or CH. +/// \param[out] directCallableStackSizeFromTraversal Direct stack size requirement for direct callables invoked from +/// IS or AH. +/// \param[out] directCallableStackSizeFromState Direct stack size requirement for direct callables invoked from +/// RG, MS, or CH. +/// \param[out] continuationStackSize Continuation stack requirement. +inline OptixResult optixUtilComputeStackSizesDCSplit( const OptixStackSizes* stackSizes, + unsigned int dssDCFromTraversal, + unsigned int dssDCFromState, + unsigned int maxTraceDepth, + unsigned int maxCCDepth, + unsigned int maxDCDepthFromTraversal, + unsigned int maxDCDepthFromState, + unsigned int* directCallableStackSizeFromTraversal, + unsigned int* directCallableStackSizeFromState, + unsigned int* continuationStackSize ) +{ + if( !stackSizes ) + return OPTIX_ERROR_INVALID_VALUE; + + const unsigned int cssRG = stackSizes->cssRG; + const unsigned int cssMS = stackSizes->cssMS; + const unsigned int cssCH = stackSizes->cssCH; + const unsigned int cssAH = stackSizes->cssAH; + const unsigned int cssIS = stackSizes->cssIS; + const unsigned int cssCC = stackSizes->cssCC; + // use dssDCFromTraversal and dssDCFromState instead of stackSizes->dssDC + + if( directCallableStackSizeFromTraversal ) + *directCallableStackSizeFromTraversal = maxDCDepthFromTraversal * dssDCFromTraversal; + if( directCallableStackSizeFromState ) + *directCallableStackSizeFromState = maxDCDepthFromState * dssDCFromState; + + // upper bound on continuation stack used by call trees of continuation callables + unsigned int cssCCTree = maxCCDepth * cssCC; + + // upper bound on continuation stack used by CH or MS programs including the call tree of + // continuation callables + unsigned int cssCHOrMSPlusCCTree = std::max( cssCH, cssMS ) + cssCCTree; + + // clang-format off + if( continuationStackSize ) + *continuationStackSize + = cssRG + cssCCTree + + ( std::max( maxTraceDepth, 1u ) - 1 ) * cssCHOrMSPlusCCTree + + std::min( maxTraceDepth, 1u ) * std::max( cssCHOrMSPlusCCTree, cssIS + cssAH ); + // clang-format on + + return OPTIX_SUCCESS; +} + +/// Computes the stack size values needed to configure a pipeline. +/// +/// This variant is similar to #optixUtilComputeStackSizes(), except that it expects the value cssCCTree +/// instead of cssCC and maxCCDepth. +/// +/// See programming guide for an explanation of the formula. +/// +/// \param[in] stackSizes Accumulated stack sizes of all programs in the call graph. +/// \param[in] cssCCTree Maximum stack size used by calls trees of continuation callables. +/// \param[in] maxTraceDepth Maximum depth of #optixTrace() calls. +/// \param[in] maxDCDepth Maximum depth of calls trees of direct callables. +/// \param[out] directCallableStackSizeFromTraversal Direct stack size requirement for direct callables invoked from +/// IS or AH. +/// \param[out] directCallableStackSizeFromState Direct stack size requirement for direct callables invoked from +/// RG, MS, or CH. +/// \param[out] continuationStackSize Continuation stack requirement. +inline OptixResult optixUtilComputeStackSizesCssCCTree( const OptixStackSizes* stackSizes, + unsigned int cssCCTree, + unsigned int maxTraceDepth, + unsigned int maxDCDepth, + unsigned int* directCallableStackSizeFromTraversal, + unsigned int* directCallableStackSizeFromState, + unsigned int* continuationStackSize ) +{ + if( !stackSizes ) + return OPTIX_ERROR_INVALID_VALUE; + + const unsigned int cssRG = stackSizes->cssRG; + const unsigned int cssMS = stackSizes->cssMS; + const unsigned int cssCH = stackSizes->cssCH; + const unsigned int cssAH = stackSizes->cssAH; + const unsigned int cssIS = stackSizes->cssIS; + // use cssCCTree instead of stackSizes->cssCC and maxCCDepth + const unsigned int dssDC = stackSizes->dssDC; + + if( directCallableStackSizeFromTraversal ) + *directCallableStackSizeFromTraversal = maxDCDepth * dssDC; + if( directCallableStackSizeFromState ) + *directCallableStackSizeFromState = maxDCDepth * dssDC; + + // upper bound on continuation stack used by CH or MS programs including the call tree of + // continuation callables + unsigned int cssCHOrMSPlusCCTree = std::max( cssCH, cssMS ) + cssCCTree; + + // clang-format off + if( continuationStackSize ) + *continuationStackSize + = cssRG + cssCCTree + + ( std::max( maxTraceDepth, 1u ) - 1 ) * cssCHOrMSPlusCCTree + + std::min( maxTraceDepth, 1u ) * std::max( cssCHOrMSPlusCCTree, cssIS + cssAH ); + // clang-format on + + return OPTIX_SUCCESS; +} + +/// Computes the stack size values needed to configure a pipeline. +/// +/// This variant is a specialization of #optixUtilComputeStackSizes() for a simple path tracer with the following +/// assumptions: There are only two ray types, camera rays and shadow rays. There are only RG, MS, and CH programs, and +/// no AH, IS, CC, or DC programs. The camera rays invoke only the miss and closest hit programs MS1 and CH1, +/// respectively. The CH1 program might trace shadow rays, which invoke only the miss and closest hit programs MS2 and +/// CH2, respectively. +/// +/// For flexibility, we allow for each of CH1 and CH2 not just one single program group, but an array of programs +/// groups, and compute the maximas of the stack size requirements per array. +/// +/// See programming guide for an explanation of the formula. +inline OptixResult optixUtilComputeStackSizesSimplePathTracer( OptixProgramGroup programGroupRG, + OptixProgramGroup programGroupMS1, + const OptixProgramGroup* programGroupCH1, + unsigned int programGroupCH1Count, + OptixProgramGroup programGroupMS2, + const OptixProgramGroup* programGroupCH2, + unsigned int programGroupCH2Count, + unsigned int* directCallableStackSizeFromTraversal, + unsigned int* directCallableStackSizeFromState, + unsigned int* continuationStackSize ) +{ + if( !programGroupCH1 && ( programGroupCH1Count > 0 ) ) + return OPTIX_ERROR_INVALID_VALUE; + if( !programGroupCH2 && ( programGroupCH2Count > 0 ) ) + return OPTIX_ERROR_INVALID_VALUE; + + OptixResult result; + + OptixStackSizes stackSizesRG = {}; + result = optixProgramGroupGetStackSize( programGroupRG, &stackSizesRG ); + if( result != OPTIX_SUCCESS ) + return result; + + OptixStackSizes stackSizesMS1 = {}; + result = optixProgramGroupGetStackSize( programGroupMS1, &stackSizesMS1 ); + if( result != OPTIX_SUCCESS ) + return result; + + OptixStackSizes stackSizesCH1 = {}; + for( unsigned int i = 0; i < programGroupCH1Count; ++i ) + { + result = optixUtilAccumulateStackSizes( programGroupCH1[i], &stackSizesCH1 ); + if( result != OPTIX_SUCCESS ) + return result; + } + + OptixStackSizes stackSizesMS2 = {}; + result = optixProgramGroupGetStackSize( programGroupMS2, &stackSizesMS2 ); + if( result != OPTIX_SUCCESS ) + return result; + + OptixStackSizes stackSizesCH2 = {}; + memset( &stackSizesCH2, 0, sizeof( OptixStackSizes ) ); + for( unsigned int i = 0; i < programGroupCH2Count; ++i ) + { + result = optixUtilAccumulateStackSizes( programGroupCH2[i], &stackSizesCH2 ); + if( result != OPTIX_SUCCESS ) + return result; + } + + const unsigned int cssRG = stackSizesRG.cssRG; + const unsigned int cssMS1 = stackSizesMS1.cssMS; + const unsigned int cssCH1 = stackSizesCH1.cssCH; + const unsigned int cssMS2 = stackSizesMS2.cssMS; + const unsigned int cssCH2 = stackSizesCH2.cssCH; + // no AH, IS, CC, or DC programs + + if( directCallableStackSizeFromTraversal ) + *directCallableStackSizeFromTraversal = 0; + if( directCallableStackSizeFromState ) + *directCallableStackSizeFromState = 0; + + if( continuationStackSize ) + *continuationStackSize = cssRG + std::max( cssMS1, cssCH1 + std::max( cssMS2, cssCH2 ) ); + + return OPTIX_SUCCESS; +} + +/*@}*/ // end group optix_utilities + +#ifdef __cplusplus +} +#endif + +#endif // __optix_optix_stack_size_h__ diff --git a/dependencies/optix/optix_stubs.h b/dependencies/optix/optix_stubs.h new file mode 100644 index 0000000000000000000000000000000000000000..469825822e39894f6e82d1c710b93a85256857ff --- /dev/null +++ b/dependencies/optix/optix_stubs.h @@ -0,0 +1,682 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/// @file +/// @author NVIDIA Corporation +/// @brief OptiX public API header + +#ifndef __optix_optix_stubs_h__ +#define __optix_optix_stubs_h__ + +#include "optix_function_table.h" + +#ifdef _WIN32 +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN 1 +#endif +#include <windows.h> +// The cfgmgr32 header is necessary for interrogating driver information in the registry. +// For convenience the library is also linked in automatically using the #pragma command. +#include <cfgmgr32.h> +#pragma comment( lib, "Cfgmgr32.lib" ) +#include <string.h> +#else +#include <dlfcn.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// The function table needs to be defined in exactly one translation unit. This can be +// achieved by including optix_function_table_definition.h in that translation unit. +extern OptixFunctionTable g_optixFunctionTable; + +#ifdef _WIN32 +#if defined( _MSC_VER ) +// Visual Studio produces warnings suggesting strcpy and friends being replaced with _s +// variants. All the string lengths and allocation sizes have been calculated and should +// be safe, so we are disabling this warning to increase compatibility. +# pragma warning( push ) +# pragma warning( disable : 4996 ) +#endif +static void* optixLoadWindowsDllFromName( const char* optixDllName ) +{ + void* handle = NULL; + + + // Get the size of the path first, then allocate + unsigned int size = GetSystemDirectoryA( NULL, 0 ); + if( size == 0 ) + { + // Couldn't get the system path size, so bail + return NULL; + } + size_t pathSize = size + 1 + strlen( optixDllName ); + char* systemPath = (char*)malloc( pathSize ); + if( systemPath == NULL ) + return NULL; + if( GetSystemDirectoryA( systemPath, size ) != size - 1 ) + { + // Something went wrong + free( systemPath ); + return NULL; + } + strcat( systemPath, "\\" ); + strcat( systemPath, optixDllName ); + handle = LoadLibraryA( systemPath ); + free( systemPath ); + if( handle ) + return handle; + + // If we didn't find it, go looking in the register store. Since nvoptix.dll doesn't + // have its own registry entry, we are going to look for the opengl driver which lives + // next to nvoptix.dll. 0 (null) will be returned if any errors occured. + + static const char* deviceInstanceIdentifiersGUID = "{4d36e968-e325-11ce-bfc1-08002be10318}"; + const ULONG flags = CM_GETIDLIST_FILTER_CLASS | CM_GETIDLIST_FILTER_PRESENT; + ULONG deviceListSize = 0; + if( CM_Get_Device_ID_List_SizeA( &deviceListSize, deviceInstanceIdentifiersGUID, flags ) != CR_SUCCESS ) + { + return NULL; + } + char* deviceNames = (char*)malloc( deviceListSize ); + if( deviceNames == NULL ) + return NULL; + if( CM_Get_Device_ID_ListA( deviceInstanceIdentifiersGUID, deviceNames, deviceListSize, flags ) ) + { + free( deviceNames ); + return NULL; + } + DEVINST devID = 0; + char* dllPath = NULL; + + // Continue to the next device if errors are encountered. + for( char* deviceName = deviceNames; *deviceName; deviceName += strlen( deviceName ) + 1 ) + { + if( CM_Locate_DevNodeA( &devID, deviceName, CM_LOCATE_DEVNODE_NORMAL ) != CR_SUCCESS ) + { + continue; + } + HKEY regKey = 0; + if( CM_Open_DevNode_Key( devID, KEY_QUERY_VALUE, 0, RegDisposition_OpenExisting, ®Key, CM_REGISTRY_SOFTWARE ) != CR_SUCCESS ) + { + continue; + } + const char* valueName = "OpenGLDriverName"; + DWORD valueSize = 0; + LSTATUS ret = RegQueryValueExA( regKey, valueName, NULL, NULL, NULL, &valueSize ); + if( ret != ERROR_SUCCESS ) + { + RegCloseKey( regKey ); + continue; + } + char* regValue = (char*)malloc( valueSize ); + if( regValue == NULL ) + { + RegCloseKey( regKey ); + continue; + } + ret = RegQueryValueExA( regKey, valueName, NULL, NULL, (LPBYTE)regValue, &valueSize ); + if( ret != ERROR_SUCCESS ) + { + free( regValue ); + RegCloseKey( regKey ); + continue; + } + // Strip the opengl driver dll name from the string then create a new string with + // the path and the nvoptix.dll name + for( int i = (int) valueSize - 1; i >= 0 && regValue[i] != '\\'; --i ) + regValue[i] = '\0'; + size_t newPathSize = strlen( regValue ) + strlen( optixDllName ) + 1; + dllPath = (char*)malloc( newPathSize ); + if( dllPath == NULL ) + { + free( regValue ); + RegCloseKey( regKey ); + continue; + } + strcpy( dllPath, regValue ); + strcat( dllPath, optixDllName ); + free( regValue ); + RegCloseKey( regKey ); + handle = LoadLibraryA( (LPCSTR)dllPath ); + free( dllPath ); + if( handle ) + break; + } + free( deviceNames ); + return handle; +} +#if defined( _MSC_VER ) +# pragma warning( pop ) +#endif + +static void* optixLoadWindowsDll( ) +{ + return optixLoadWindowsDllFromName( "nvoptix.dll" ); +} +#endif + +/// \defgroup optix_utilities Utilities +/// \brief OptiX Utilities + +/** \addtogroup optix_utilities +@{ +*/ + +/// Loads the OptiX library and initializes the function table used by the stubs below. +/// +/// If handlePtr is not nullptr, an OS-specific handle to the library will be returned in *handlePtr. +/// +/// \see #optixUninitWithHandle +inline OptixResult optixInitWithHandle( void** handlePtr ) +{ + // Make sure these functions get initialized to zero in case the DLL and function + // table can't be loaded + g_optixFunctionTable.optixGetErrorName = 0; + g_optixFunctionTable.optixGetErrorString = 0; + + if( !handlePtr ) + return OPTIX_ERROR_INVALID_VALUE; + +#ifdef _WIN32 + *handlePtr = optixLoadWindowsDll(); + if( !*handlePtr ) + return OPTIX_ERROR_LIBRARY_NOT_FOUND; + + void* symbol = GetProcAddress( (HMODULE)*handlePtr, "optixQueryFunctionTable" ); + if( !symbol ) + return OPTIX_ERROR_ENTRY_SYMBOL_NOT_FOUND; +#else + *handlePtr = dlopen( "libnvoptix.so.1", RTLD_NOW ); + if( !*handlePtr ) + return OPTIX_ERROR_LIBRARY_NOT_FOUND; + + void* symbol = dlsym( *handlePtr, "optixQueryFunctionTable" ); + if( !symbol ) + return OPTIX_ERROR_ENTRY_SYMBOL_NOT_FOUND; +#endif + + OptixQueryFunctionTable_t* optixQueryFunctionTable = (OptixQueryFunctionTable_t*)symbol; + + return optixQueryFunctionTable( OPTIX_ABI_VERSION, 0, 0, 0, &g_optixFunctionTable, sizeof( g_optixFunctionTable ) ); +} + +/// Loads the OptiX library and initializes the function table used by the stubs below. +/// +/// A variant of #optixInitWithHandle() that does not make the handle to the loaded library available. +inline OptixResult optixInit( void ) +{ + void* handle; + return optixInitWithHandle( &handle ); +} + +/// Unloads the OptiX library and zeros the function table used by the stubs below. Takes the +/// handle returned by optixInitWithHandle. All OptixDeviceContext objects must be destroyed +/// before calling this function, or the behavior is undefined. +/// +/// \see #optixInitWithHandle +inline OptixResult optixUninitWithHandle( void* handle ) +{ + if( !handle ) + return OPTIX_ERROR_INVALID_VALUE; +#ifdef _WIN32 + if( !FreeLibrary( (HMODULE)handle ) ) + return OPTIX_ERROR_LIBRARY_UNLOAD_FAILURE; +#else + if( dlclose( handle ) ) + return OPTIX_ERROR_LIBRARY_UNLOAD_FAILURE; +#endif + OptixFunctionTable empty = { 0 }; + g_optixFunctionTable = empty; + return OPTIX_SUCCESS; +} + + +/*@}*/ // end group optix_utilities + +#ifndef OPTIX_DOXYGEN_SHOULD_SKIP_THIS + +// Stub functions that forward calls to the corresponding function pointer in the function table. + +inline const char* optixGetErrorName( OptixResult result ) +{ + if( g_optixFunctionTable.optixGetErrorName ) + return g_optixFunctionTable.optixGetErrorName( result ); + + // If the DLL and symbol table couldn't be loaded, provide a set of error strings + // suitable for processing errors related to the DLL loading. + switch( result ) + { + case OPTIX_SUCCESS: + return "OPTIX_SUCCESS"; + case OPTIX_ERROR_INVALID_VALUE: + return "OPTIX_ERROR_INVALID_VALUE"; + case OPTIX_ERROR_UNSUPPORTED_ABI_VERSION: + return "OPTIX_ERROR_UNSUPPORTED_ABI_VERSION"; + case OPTIX_ERROR_FUNCTION_TABLE_SIZE_MISMATCH: + return "OPTIX_ERROR_FUNCTION_TABLE_SIZE_MISMATCH"; + case OPTIX_ERROR_INVALID_ENTRY_FUNCTION_OPTIONS: + return "OPTIX_ERROR_INVALID_ENTRY_FUNCTION_OPTIONS"; + case OPTIX_ERROR_LIBRARY_NOT_FOUND: + return "OPTIX_ERROR_LIBRARY_NOT_FOUND"; + case OPTIX_ERROR_ENTRY_SYMBOL_NOT_FOUND: + return "OPTIX_ERROR_ENTRY_SYMBOL_NOT_FOUND"; + case OPTIX_ERROR_LIBRARY_UNLOAD_FAILURE: + return "OPTIX_ERROR_LIBRARY_UNLOAD_FAILURE"; + default: + return "Unknown OptixResult code"; + } +} + +inline const char* optixGetErrorString( OptixResult result ) +{ + if( g_optixFunctionTable.optixGetErrorString ) + return g_optixFunctionTable.optixGetErrorString( result ); + + // If the DLL and symbol table couldn't be loaded, provide a set of error strings + // suitable for processing errors related to the DLL loading. + switch( result ) + { + case OPTIX_SUCCESS: + return "Success"; + case OPTIX_ERROR_INVALID_VALUE: + return "Invalid value"; + case OPTIX_ERROR_UNSUPPORTED_ABI_VERSION: + return "Unsupported ABI version"; + case OPTIX_ERROR_FUNCTION_TABLE_SIZE_MISMATCH: + return "Function table size mismatch"; + case OPTIX_ERROR_INVALID_ENTRY_FUNCTION_OPTIONS: + return "Invalid options to entry function"; + case OPTIX_ERROR_LIBRARY_NOT_FOUND: + return "Library not found"; + case OPTIX_ERROR_ENTRY_SYMBOL_NOT_FOUND: + return "Entry symbol not found"; + case OPTIX_ERROR_LIBRARY_UNLOAD_FAILURE: + return "Library could not be unloaded"; + default: + return "Unknown OptixResult code"; + } +} + +inline OptixResult optixDeviceContextCreate( CUcontext fromContext, const OptixDeviceContextOptions* options, OptixDeviceContext* context ) +{ + return g_optixFunctionTable.optixDeviceContextCreate( fromContext, options, context ); +} + +inline OptixResult optixDeviceContextDestroy( OptixDeviceContext context ) +{ + return g_optixFunctionTable.optixDeviceContextDestroy( context ); +} + +inline OptixResult optixDeviceContextGetProperty( OptixDeviceContext context, OptixDeviceProperty property, void* value, size_t sizeInBytes ) +{ + return g_optixFunctionTable.optixDeviceContextGetProperty( context, property, value, sizeInBytes ); +} + +inline OptixResult optixDeviceContextSetLogCallback( OptixDeviceContext context, + OptixLogCallback callbackFunction, + void* callbackData, + unsigned int callbackLevel ) +{ + return g_optixFunctionTable.optixDeviceContextSetLogCallback( context, callbackFunction, callbackData, callbackLevel ); +} + +inline OptixResult optixDeviceContextSetCacheEnabled( OptixDeviceContext context, int enabled ) +{ + return g_optixFunctionTable.optixDeviceContextSetCacheEnabled( context, enabled ); +} + +inline OptixResult optixDeviceContextSetCacheLocation( OptixDeviceContext context, const char* location ) +{ + return g_optixFunctionTable.optixDeviceContextSetCacheLocation( context, location ); +} + +inline OptixResult optixDeviceContextSetCacheDatabaseSizes( OptixDeviceContext context, size_t lowWaterMark, size_t highWaterMark ) +{ + return g_optixFunctionTable.optixDeviceContextSetCacheDatabaseSizes( context, lowWaterMark, highWaterMark ); +} + +inline OptixResult optixDeviceContextGetCacheEnabled( OptixDeviceContext context, int* enabled ) +{ + return g_optixFunctionTable.optixDeviceContextGetCacheEnabled( context, enabled ); +} + +inline OptixResult optixDeviceContextGetCacheLocation( OptixDeviceContext context, char* location, size_t locationSize ) +{ + return g_optixFunctionTable.optixDeviceContextGetCacheLocation( context, location, locationSize ); +} + +inline OptixResult optixDeviceContextGetCacheDatabaseSizes( OptixDeviceContext context, size_t* lowWaterMark, size_t* highWaterMark ) +{ + return g_optixFunctionTable.optixDeviceContextGetCacheDatabaseSizes( context, lowWaterMark, highWaterMark ); +} + +inline OptixResult optixModuleCreateFromPTX( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const char* PTX, + size_t PTXsize, + char* logString, + size_t* logStringSize, + OptixModule* module ) +{ + return g_optixFunctionTable.optixModuleCreateFromPTX( context, moduleCompileOptions, pipelineCompileOptions, PTX, + PTXsize, logString, logStringSize, module ); +} + +inline OptixResult optixModuleCreateFromPTXWithTasks( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const char* PTX, + size_t PTXsize, + char* logString, + size_t* logStringSize, + OptixModule* module, + OptixTask* firstTask ) +{ + return g_optixFunctionTable.optixModuleCreateFromPTXWithTasks( context, moduleCompileOptions, pipelineCompileOptions, PTX, + PTXsize, logString, logStringSize, module, firstTask ); +} + +inline OptixResult optixModuleGetCompilationState( OptixModule module, OptixModuleCompileState* state ) +{ + return g_optixFunctionTable.optixModuleGetCompilationState( module, state ); +} + +inline OptixResult optixModuleDestroy( OptixModule module ) +{ + return g_optixFunctionTable.optixModuleDestroy( module ); +} + +inline OptixResult optixBuiltinISModuleGet( OptixDeviceContext context, + const OptixModuleCompileOptions* moduleCompileOptions, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const OptixBuiltinISOptions* builtinISOptions, + OptixModule* builtinModule ) +{ + return g_optixFunctionTable.optixBuiltinISModuleGet( context, moduleCompileOptions, pipelineCompileOptions, + builtinISOptions, builtinModule ); +} + +inline OptixResult optixTaskExecute( OptixTask task, OptixTask* additionalTasks, unsigned int maxNumAdditionalTasks, unsigned int* numAdditionalTasksCreated ) +{ + return g_optixFunctionTable.optixTaskExecute( task, additionalTasks, maxNumAdditionalTasks, numAdditionalTasksCreated ); +} + +inline OptixResult optixProgramGroupCreate( OptixDeviceContext context, + const OptixProgramGroupDesc* programDescriptions, + unsigned int numProgramGroups, + const OptixProgramGroupOptions* options, + char* logString, + size_t* logStringSize, + OptixProgramGroup* programGroups ) +{ + return g_optixFunctionTable.optixProgramGroupCreate( context, programDescriptions, numProgramGroups, options, + logString, logStringSize, programGroups ); +} + +inline OptixResult optixProgramGroupDestroy( OptixProgramGroup programGroup ) +{ + return g_optixFunctionTable.optixProgramGroupDestroy( programGroup ); +} + +inline OptixResult optixProgramGroupGetStackSize( OptixProgramGroup programGroup, OptixStackSizes* stackSizes ) +{ + return g_optixFunctionTable.optixProgramGroupGetStackSize( programGroup, stackSizes ); +} + +inline OptixResult optixPipelineCreate( OptixDeviceContext context, + const OptixPipelineCompileOptions* pipelineCompileOptions, + const OptixPipelineLinkOptions* pipelineLinkOptions, + const OptixProgramGroup* programGroups, + unsigned int numProgramGroups, + char* logString, + size_t* logStringSize, + OptixPipeline* pipeline ) +{ + return g_optixFunctionTable.optixPipelineCreate( context, pipelineCompileOptions, pipelineLinkOptions, programGroups, + numProgramGroups, logString, logStringSize, pipeline ); +} + +inline OptixResult optixPipelineDestroy( OptixPipeline pipeline ) +{ + return g_optixFunctionTable.optixPipelineDestroy( pipeline ); +} + +inline OptixResult optixPipelineSetStackSize( OptixPipeline pipeline, + unsigned int directCallableStackSizeFromTraversal, + unsigned int directCallableStackSizeFromState, + unsigned int continuationStackSize, + unsigned int maxTraversableGraphDepth ) +{ + return g_optixFunctionTable.optixPipelineSetStackSize( pipeline, directCallableStackSizeFromTraversal, directCallableStackSizeFromState, + continuationStackSize, maxTraversableGraphDepth ); +} + +inline OptixResult optixAccelComputeMemoryUsage( OptixDeviceContext context, + const OptixAccelBuildOptions* accelOptions, + const OptixBuildInput* buildInputs, + unsigned int numBuildInputs, + OptixAccelBufferSizes* bufferSizes ) +{ + return g_optixFunctionTable.optixAccelComputeMemoryUsage( context, accelOptions, buildInputs, numBuildInputs, bufferSizes ); +} + +inline OptixResult optixAccelBuild( OptixDeviceContext context, + CUstream stream, + const OptixAccelBuildOptions* accelOptions, + const OptixBuildInput* buildInputs, + unsigned int numBuildInputs, + CUdeviceptr tempBuffer, + size_t tempBufferSizeInBytes, + CUdeviceptr outputBuffer, + size_t outputBufferSizeInBytes, + OptixTraversableHandle* outputHandle, + const OptixAccelEmitDesc* emittedProperties, + unsigned int numEmittedProperties ) +{ + return g_optixFunctionTable.optixAccelBuild( context, stream, accelOptions, buildInputs, numBuildInputs, tempBuffer, + tempBufferSizeInBytes, outputBuffer, outputBufferSizeInBytes, + outputHandle, emittedProperties, numEmittedProperties ); +} + + +inline OptixResult optixAccelGetRelocationInfo( OptixDeviceContext context, OptixTraversableHandle handle, OptixRelocationInfo* info ) +{ + return g_optixFunctionTable.optixAccelGetRelocationInfo( context, handle, info ); +} + + +inline OptixResult optixCheckRelocationCompatibility( OptixDeviceContext context, const OptixRelocationInfo* info, int* compatible ) +{ + return g_optixFunctionTable.optixCheckRelocationCompatibility( context, info, compatible ); +} + +inline OptixResult optixAccelRelocate( OptixDeviceContext context, + CUstream stream, + const OptixRelocationInfo* info, + const OptixRelocateInput* relocateInputs, + size_t numRelocateInputs, + CUdeviceptr targetAccel, + size_t targetAccelSizeInBytes, + OptixTraversableHandle* targetHandle ) +{ + return g_optixFunctionTable.optixAccelRelocate( context, stream, info, relocateInputs, numRelocateInputs, + targetAccel, targetAccelSizeInBytes, targetHandle ); +} + +inline OptixResult optixAccelCompact( OptixDeviceContext context, + CUstream stream, + OptixTraversableHandle inputHandle, + CUdeviceptr outputBuffer, + size_t outputBufferSizeInBytes, + OptixTraversableHandle* outputHandle ) +{ + return g_optixFunctionTable.optixAccelCompact( context, stream, inputHandle, outputBuffer, outputBufferSizeInBytes, outputHandle ); +} + +inline OptixResult optixConvertPointerToTraversableHandle( OptixDeviceContext onDevice, + CUdeviceptr pointer, + OptixTraversableType traversableType, + OptixTraversableHandle* traversableHandle ) +{ + return g_optixFunctionTable.optixConvertPointerToTraversableHandle( onDevice, pointer, traversableType, traversableHandle ); +} + +inline OptixResult optixOpacityMicromapArrayComputeMemoryUsage( OptixDeviceContext context, + const OptixOpacityMicromapArrayBuildInput* buildInput, + OptixMicromapBufferSizes* bufferSizes ) +{ + return g_optixFunctionTable.optixOpacityMicromapArrayComputeMemoryUsage( context, buildInput, bufferSizes ); +} + +inline OptixResult optixOpacityMicromapArrayBuild( OptixDeviceContext context, + CUstream stream, + const OptixOpacityMicromapArrayBuildInput* buildInput, + const OptixMicromapBuffers* buffers ) +{ + return g_optixFunctionTable.optixOpacityMicromapArrayBuild( context, stream, buildInput, buffers ); +} + +inline OptixResult optixOpacityMicromapArrayGetRelocationInfo( OptixDeviceContext context, + CUdeviceptr opacityMicromapArray, + OptixRelocationInfo* info ) +{ + return g_optixFunctionTable.optixOpacityMicromapArrayGetRelocationInfo( context, opacityMicromapArray, info ); +} + +inline OptixResult optixOpacityMicromapArrayRelocate( OptixDeviceContext context, + CUstream stream, + const OptixRelocationInfo* info, + CUdeviceptr targetOpacityMicromapArray, + size_t targetOpacityMicromapArraySizeInBytes ) +{ + return g_optixFunctionTable.optixOpacityMicromapArrayRelocate( context, stream, info, targetOpacityMicromapArray, targetOpacityMicromapArraySizeInBytes ); +} + + +inline OptixResult optixSbtRecordPackHeader( OptixProgramGroup programGroup, void* sbtRecordHeaderHostPointer ) +{ + return g_optixFunctionTable.optixSbtRecordPackHeader( programGroup, sbtRecordHeaderHostPointer ); +} + +inline OptixResult optixLaunch( OptixPipeline pipeline, + CUstream stream, + CUdeviceptr pipelineParams, + size_t pipelineParamsSize, + const OptixShaderBindingTable* sbt, + unsigned int width, + unsigned int height, + unsigned int depth ) +{ + return g_optixFunctionTable.optixLaunch( pipeline, stream, pipelineParams, pipelineParamsSize, sbt, width, height, depth ); +} + +inline OptixResult optixDenoiserCreate( OptixDeviceContext context, OptixDenoiserModelKind modelKind, const OptixDenoiserOptions* options, OptixDenoiser* returnHandle ) +{ + return g_optixFunctionTable.optixDenoiserCreate( context, modelKind, options, returnHandle ); +} + +inline OptixResult optixDenoiserCreateWithUserModel( OptixDeviceContext context, const void* data, size_t dataSizeInBytes, OptixDenoiser* returnHandle ) +{ + return g_optixFunctionTable.optixDenoiserCreateWithUserModel( context, data, dataSizeInBytes, returnHandle ); +} + +inline OptixResult optixDenoiserDestroy( OptixDenoiser handle ) +{ + return g_optixFunctionTable.optixDenoiserDestroy( handle ); +} + +inline OptixResult optixDenoiserComputeMemoryResources( const OptixDenoiser handle, + unsigned int maximumInputWidth, + unsigned int maximumInputHeight, + OptixDenoiserSizes* returnSizes ) +{ + return g_optixFunctionTable.optixDenoiserComputeMemoryResources( handle, maximumInputWidth, maximumInputHeight, returnSizes ); +} + +inline OptixResult optixDenoiserSetup( OptixDenoiser denoiser, + CUstream stream, + unsigned int inputWidth, + unsigned int inputHeight, + CUdeviceptr denoiserState, + size_t denoiserStateSizeInBytes, + CUdeviceptr scratch, + size_t scratchSizeInBytes ) +{ + return g_optixFunctionTable.optixDenoiserSetup( denoiser, stream, inputWidth, inputHeight, denoiserState, + denoiserStateSizeInBytes, scratch, scratchSizeInBytes ); +} + +inline OptixResult optixDenoiserInvoke( OptixDenoiser handle, + CUstream stream, + const OptixDenoiserParams* params, + CUdeviceptr denoiserData, + size_t denoiserDataSize, + const OptixDenoiserGuideLayer* guideLayer, + const OptixDenoiserLayer* layers, + unsigned int numLayers, + unsigned int inputOffsetX, + unsigned int inputOffsetY, + CUdeviceptr scratch, + size_t scratchSizeInBytes ) +{ + return g_optixFunctionTable.optixDenoiserInvoke( handle, stream, params, denoiserData, denoiserDataSize, + guideLayer, layers, numLayers, + inputOffsetX, inputOffsetY, scratch, scratchSizeInBytes ); +} + +inline OptixResult optixDenoiserComputeIntensity( OptixDenoiser handle, + CUstream stream, + const OptixImage2D* inputImage, + CUdeviceptr outputIntensity, + CUdeviceptr scratch, + size_t scratchSizeInBytes ) +{ + return g_optixFunctionTable.optixDenoiserComputeIntensity( handle, stream, inputImage, outputIntensity, scratch, scratchSizeInBytes ); +} + +inline OptixResult optixDenoiserComputeAverageColor( OptixDenoiser handle, + CUstream stream, + const OptixImage2D* inputImage, + CUdeviceptr outputAverageColor, + CUdeviceptr scratch, + size_t scratchSizeInBytes ) +{ + return g_optixFunctionTable.optixDenoiserComputeAverageColor( handle, stream, inputImage, outputAverageColor, scratch, scratchSizeInBytes ); +} + +#endif // OPTIX_DOXYGEN_SHOULD_SKIP_THIS + +#ifdef __cplusplus +} +#endif + +#endif // __optix_optix_stubs_h__ diff --git a/dependencies/optix/optix_types.h b/dependencies/optix/optix_types.h new file mode 100644 index 0000000000000000000000000000000000000000..cfe0a10e2564f4b52bcf26f873d88525c083a4ae --- /dev/null +++ b/dependencies/optix/optix_types.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and proprietary + * rights in and to this software, related documentation and any modifications thereto. + * Any use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation is strictly + * prohibited. + * + * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* + * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, + * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY + * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT + * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF + * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR + * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGES + */ + +/** + * @file optix_types.h + * @author NVIDIA Corporation + * @brief OptiX public API header + * + */ + +#ifndef __optix_optix_types_h__ +#define __optix_optix_types_h__ + +// clang-format off +#if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__) +# define __OPTIX_INCLUDE_INTERNAL_HEADERS__ +# define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__ +#endif +#include "optix_7_types.h" +#if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__ ) +# undef __OPTIX_INCLUDE_INTERNAL_HEADERS__ +# undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__ +#endif +// clang-format on + +#endif // #ifndef __optix_optix_types_h__