diff --git a/Include/Extensions/NRIHelper.h b/Include/Extensions/NRIHelper.h index bfba73a4..6f25dad2 100644 --- a/Include/Extensions/NRIHelper.h +++ b/Include/Extensions/NRIHelper.h @@ -83,96 +83,79 @@ NriStruct(HelperInterface) { // Format utilities NRI_API Nri(Format) NRI_CALL nriConvertDXGIFormatToNRI(uint32_t dxgiFormat); NRI_API Nri(Format) NRI_CALL nriConvertVKFormatToNRI(uint32_t vkFormat); -NRI_API uint32_t NRI_CALL nriConvertNRIFormatToDXGI(Nri(Format) format); +NRI_API uint32_t NRI_CALL nriConvertNRIFormatToDXGI(Nri(Format) format); NRI_API uint32_t NRI_CALL nriConvertNRIFormatToVK(Nri(Format) format); NRI_API const NriRef(FormatProps) NRI_CALL nriGetFormatProps(Nri(Format) format); // Strings NRI_API const char* NRI_CALL nriGetGraphicsAPIString(Nri(GraphicsAPI) graphicsAPI); -// "TextureDesc" constructors -static inline Nri(TextureDesc) NriFunc(Texture1D)(Nri(Format) format, - uint16_t width, - Nri(Mip_t) mipNum NriDefault(1), - Nri(Dim_t) layerNum NriDefault(1), - Nri(TextureUsageBits) usageMask NriDefault(NriScopedMember(TextureUsageBits, SHADER_RESOURCE))) -{ - Nri(TextureDesc) textureDesc = NriZero; - textureDesc.type = NriScopedMember(TextureType, TEXTURE_1D); - textureDesc.format = format; - textureDesc.usageMask = usageMask; - textureDesc.width = width; - textureDesc.height = 1; - textureDesc.depth = 1; - textureDesc.mipNum = mipNum; - textureDesc.layerNum = layerNum; - textureDesc.sampleNum = 1; - - return textureDesc; -} +// A convinient way to fit pipeline layout settings into device limits, respecting D3D12 restrictions +NriStruct(PipelineLayoutSettingsDesc) { + uint32_t descriptorRangeNum; + uint32_t rootConstantSize; + uint32_t rootDescriptorNum; + bool preferRootDescriptorsOverConstants; +}; -static inline Nri(TextureDesc) NriFunc(Texture2D)(Nri(Format) format, - Nri(Dim_t) width, - Nri(Dim_t) height, - Nri(Mip_t) mipNum NriDefault(1), - Nri(Dim_t) layerNum NriDefault(1), - Nri(TextureUsageBits) usageMask NriDefault(NriScopedMember(TextureUsageBits, SHADER_RESOURCE)), - Nri(Sample_t) sampleNum NriDefault(1)) -{ - Nri(TextureDesc) textureDesc = NriZero; - textureDesc.type = NriScopedMember(TextureType, TEXTURE_2D); - textureDesc.format = format; - textureDesc.usageMask = usageMask; - textureDesc.width = width; - textureDesc.height = height; - textureDesc.depth = 1; - textureDesc.mipNum = mipNum; - textureDesc.layerNum = layerNum; - textureDesc.sampleNum = sampleNum; - - return textureDesc; -} +static inline Nri(PipelineLayoutSettingsDesc) NriFunc(FitPipelineLayoutSettingsIntoDeviceLimits)(const NriRef(DeviceDesc) deviceDesc, const NriRef(PipelineLayoutSettingsDesc) pipelineLayoutSettingsDesc) { + uint32_t descriptorRangeNum = NriDeref(pipelineLayoutSettingsDesc)->descriptorRangeNum; + uint32_t rootConstantSize = NriDeref(pipelineLayoutSettingsDesc)->rootConstantSize; + uint32_t rootDescriptorNum = NriDeref(pipelineLayoutSettingsDesc)->rootDescriptorNum; -static inline Nri(TextureDesc) NriFunc(Texture3D)(Nri(Format) format, - Nri(Dim_t) width, - Nri(Dim_t) height, - uint16_t depth, - Nri(Mip_t) mipNum NriDefault(1), - Nri(TextureUsageBits) usageMask NriDefault(NriScopedMember(TextureUsageBits, SHADER_RESOURCE))) -{ - Nri(TextureDesc) textureDesc = NriZero; - textureDesc.type = NriScopedMember(TextureType, TEXTURE_3D); - textureDesc.format = format; - textureDesc.usageMask = usageMask; - textureDesc.width = width; - textureDesc.height = height; - textureDesc.depth = depth; - textureDesc.mipNum = mipNum; - textureDesc.layerNum = 1; - textureDesc.sampleNum = 1; - - return textureDesc; -} + // Apply global limits + if (rootConstantSize > NriDeref(deviceDesc)->pipelineLayoutRootConstantMaxSize) + rootConstantSize = NriDeref(deviceDesc)->pipelineLayoutRootConstantMaxSize; -// "TextureBarrierDesc" constructors -static inline Nri(TextureBarrierDesc) NriFunc(TextureBarrier)(NriPtr(Texture) texture, - Nri(AccessLayoutStage) before, - Nri(AccessLayoutStage) after, - Nri(Mip_t) mipOffset NriDefault(0), - Nri(Mip_t) mipNum NriDefault(Nri(REMAINING_MIPS)), - Nri(Dim_t) layerOffset NriDefault(0), - Nri(Dim_t) layerNum NriDefault(Nri(REMAINING_LAYERS))) -{ - Nri(TextureBarrierDesc) textureBarrierDesc = NriZero; - textureBarrierDesc.texture = texture; - textureBarrierDesc.before = before; - textureBarrierDesc.after = after; - textureBarrierDesc.mipOffset = mipOffset; - textureBarrierDesc.mipNum = mipNum; - textureBarrierDesc.layerOffset = layerOffset; - textureBarrierDesc.layerNum = layerNum; + if (rootDescriptorNum > NriDeref(deviceDesc)->pipelineLayoutRootDescriptorMaxNum) + rootDescriptorNum = NriDeref(deviceDesc)->pipelineLayoutRootDescriptorMaxNum; - return textureBarrierDesc; + // D3D12 has limited-size root signature + if (NriDeref(deviceDesc)->graphicsAPI == NriScopedMember(GraphicsAPI, D3D12)) { + const uint32_t descriptorTableCost = 4; + const uint32_t rootDescriptorCost = 8; + + uint32_t freeBytesInRootSignature = 256; + + // 1 root descriptor can be reserved for "draw parameters" emulation + if (NriDeref(deviceDesc)->isDrawParametersEmulationEnabled) + freeBytesInRootSignature -= 8; + + // Must fit + uint32_t availableDescriptorRangeNum = freeBytesInRootSignature / descriptorTableCost; + if (descriptorRangeNum > availableDescriptorRangeNum) + descriptorRangeNum = availableDescriptorRangeNum; + + freeBytesInRootSignature -= descriptorRangeNum * descriptorTableCost; + + // Desired fit + if (NriDeref(pipelineLayoutSettingsDesc)->preferRootDescriptorsOverConstants) { + uint32_t availableRootDescriptorNum = freeBytesInRootSignature / rootDescriptorCost; + if (rootDescriptorNum > availableRootDescriptorNum) + rootDescriptorNum = availableRootDescriptorNum; + + freeBytesInRootSignature -= rootDescriptorNum * rootDescriptorCost; + + if (rootConstantSize > freeBytesInRootSignature) + rootConstantSize = freeBytesInRootSignature; + } else { + if (rootConstantSize > freeBytesInRootSignature) + rootConstantSize = freeBytesInRootSignature; + + freeBytesInRootSignature -= rootConstantSize; + + uint32_t availableRootDescriptorNum = freeBytesInRootSignature / rootDescriptorCost; + if (rootDescriptorNum > availableRootDescriptorNum) + rootDescriptorNum = availableRootDescriptorNum; + } + } + + Nri(PipelineLayoutSettingsDesc) modifiedPipelineLayoutLimitsDesc = *NriDeref(pipelineLayoutSettingsDesc); + modifiedPipelineLayoutLimitsDesc.descriptorRangeNum = descriptorRangeNum; + modifiedPipelineLayoutLimitsDesc.rootConstantSize = rootConstantSize; + modifiedPipelineLayoutLimitsDesc.rootDescriptorNum = rootDescriptorNum; + + return modifiedPipelineLayoutLimitsDesc; } static inline Nri(TextureBarrierDesc) NriFunc(TextureBarrierFromUnknown)(NriPtr(Texture) texture, @@ -208,5 +191,4 @@ static inline Nri(TextureBarrierDesc) NriFunc(TextureBarrierFromState)(NriRef(Te return *NriDeref(prevState); } - NriNamespaceEnd diff --git a/Include/NRI.h b/Include/NRI.h index 57b093c4..baeb7597 100644 --- a/Include/NRI.h +++ b/Include/NRI.h @@ -24,8 +24,8 @@ Non-goals: #pragma once #define NRI_VERSION_MAJOR 1 -#define NRI_VERSION_MINOR 150 -#define NRI_VERSION_DATE "18 September 2024" +#define NRI_VERSION_MINOR 151 +#define NRI_VERSION_DATE "20 September 2024" #include "NRIDescs.h" @@ -102,8 +102,8 @@ NriStruct(CoreInterface) { // Setup void (NRI_CALL *CmdSetPipelineLayout) (NriRef(CommandBuffer) commandBuffer, const NriRef(PipelineLayout) pipelineLayout); void (NRI_CALL *CmdSetDescriptorSet) (NriRef(CommandBuffer) commandBuffer, uint32_t setIndex, const NriRef(DescriptorSet) descriptorSet, const uint32_t* dynamicConstantBufferOffsets); - void (NRI_CALL *CmdSetRootConstants) (NriRef(CommandBuffer) commandBuffer, uint32_t rootConstantIndex, const void* data, uint32_t size); // requires "rootConstantMaxSize > 0" - void (NRI_CALL *CmdSetRootDescriptor) (NriRef(CommandBuffer) commandBuffer, uint32_t rootDescriptorIndex, NriRef(Descriptor) descriptor); // requires "rootDescriptorMaxNum > 0" + void (NRI_CALL *CmdSetRootConstants) (NriRef(CommandBuffer) commandBuffer, uint32_t rootConstantIndex, const void* data, uint32_t size); // requires "pipelineLayoutRootConstantMaxSize > 0" + void (NRI_CALL *CmdSetRootDescriptor) (NriRef(CommandBuffer) commandBuffer, uint32_t rootDescriptorIndex, NriRef(Descriptor) descriptor); // requires "pipelineLayoutRootDescriptorMaxNum > 0" void (NRI_CALL *CmdSetPipeline) (NriRef(CommandBuffer) commandBuffer, const NriRef(Pipeline) pipeline); // Barrier diff --git a/Include/NRICompatibility.hlsli b/Include/NRICompatibility.hlsli index 2cf36cef..db2d9d5f 100644 --- a/Include/NRICompatibility.hlsli +++ b/Include/NRICompatibility.hlsli @@ -31,7 +31,7 @@ Push constants: uint32_t const2; }; - NRI_ROOT_CONSTANTS(RootConstants, gRootConstants, 7); // a constant buffer in DXBC + NRI_ROOT_CONSTANTS(RootConstants, gRootConstants, 7, 0); // a constant buffer in DXBC Draw parameters: - Add to the global scope: @@ -50,7 +50,8 @@ Draw parameters: */ #ifndef __cplusplus - #define NRI_MERGE_TOKENS(a, b) a##b + #define _NRI_MERGE_TOKENS(a, b) a##b + #define NRI_MERGE_TOKENS(a, b) _NRI_MERGE_TOKENS(a, b) #endif // Container detection @@ -123,7 +124,7 @@ Draw parameters: #define NRI_RESOURCE(resourceType, name, regName, bindingIndex, setIndex) \ resourceType name : register(NRI_MERGE_TOKENS(regName, bindingIndex), NRI_MERGE_TOKENS(space, setIndex)) - #define NRI_ROOT_CONSTANTS(structName, name, bindingIndex) \ + #define NRI_ROOT_CONSTANTS(structName, name, bindingIndex, setIndex) \ [[vk::push_constant]] structName name // Draw parameters (full support, requires SPV_KHR_shader_draw_parameters) @@ -140,12 +141,13 @@ Draw parameters: #endif // DXIL +#define NRI_BASE_ATTRIBUTES_EMULATION_SPACE 999 #ifdef NRI_DXIL #define NRI_RESOURCE(resourceType, name, regName, bindingIndex, setIndex) \ resourceType name : register(NRI_MERGE_TOKENS(regName, bindingIndex), NRI_MERGE_TOKENS(space, setIndex)) - #define NRI_ROOT_CONSTANTS(structName, name, bindingIndex) \ - ConstantBuffer name : register(NRI_MERGE_TOKENS(b, bindingIndex), space0) + #define NRI_ROOT_CONSTANTS(structName, name, bindingIndex, setIndex) \ + ConstantBuffer name : register(NRI_MERGE_TOKENS(b, bindingIndex), NRI_MERGE_TOKENS(space, setIndex)) // Draw parameters #if (NRI_SHADER_MODEL < 68) @@ -156,7 +158,7 @@ Draw parameters: int baseVertex; \ uint baseInstance; \ }; \ - ConstantBuffer<_BaseAttributeConstants> _BaseAttributes : register(b0, space999) // see BASE_ATTRIBUTES_EMULATION_SPACE + ConstantBuffer<_BaseAttributeConstants> _BaseAttributes : register(b0, NRI_MERGE_TOKENS(space, NRI_BASE_ATTRIBUTES_EMULATION_SPACE)) #define NRI_DECLARE_DRAW_PARAMETERS \ uint NRI_VERTEX_ID : SV_VertexID, \ @@ -202,7 +204,7 @@ Draw parameters: #define NRI_RESOURCE(resourceType, name, regName, bindingIndex, setIndex) \ resourceType name : register(NRI_MERGE_TOKENS(regName, bindingIndex)) - #define NRI_ROOT_CONSTANTS(structName, name, bindingIndex) \ + #define NRI_ROOT_CONSTANTS(structName, name, bindingIndex, setIndex) \ cbuffer structName##_##name : register(NRI_MERGE_TOKENS(b, bindingIndex)) { \ structName name; \ } diff --git a/Include/NRIDescs.h b/Include/NRIDescs.h index ec7d98c6..e9adb82d 100644 --- a/Include/NRIDescs.h +++ b/Include/NRIDescs.h @@ -65,7 +65,7 @@ static const Nri(Dim_t) NriConstant(REMAINING_LAYERS) = 0; // only for "layerNu // Readability #define NriOptional // i.e. can be 0 (keep an eye on comments) -#define NriOut // highlights output argument +#define NriOut // highlights an output argument //============================================================================================================================================================================================ #pragma region [ Common ] @@ -444,10 +444,10 @@ NriStruct(TextureDesc) { Nri(Format) format; Nri(Dim_t) width; Nri(Dim_t) height; - Nri(Dim_t) depth; + NriOptional Nri(Dim_t) depth; Nri(Mip_t) mipNum; - Nri(Dim_t) layerNum; - Nri(Sample_t) sampleNum; + NriOptional Nri(Dim_t) layerNum; + NriOptional Nri(Sample_t) sampleNum; }; NriStruct(BufferDesc) { @@ -549,8 +549,8 @@ Pipeline layout example: RootConstantDesc #0 // "rootConstantIndex" - an index in "rootConstants" in the currently bound pipeline layout - RootDescriptorSetDesc #0 // "rootDescriptorIndex" - an index in "rootDescriptorSets" in the currently bound pipeline layout - RootDescriptorSetDesc #1 + RootDescriptorDesc #0 // "rootDescriptorIndex" - an index in "rootDescriptors" in the currently bound pipeline layout + RootDescriptorDesc #1 */ // "DescriptorRange" consists of "Descriptor" entities @@ -576,7 +576,7 @@ NriStruct(DynamicConstantBufferDesc) { }; NriStruct(DescriptorSetDesc) { - uint32_t registerSpace; + uint32_t registerSpace; // must be unique, avoid big gaps const NriPtr(DescriptorRangeDesc) ranges; uint32_t rangeNum; const NriPtr(DynamicConstantBufferDesc) dynamicConstantBuffers; // a dynamic constant buffer allows to dynamically specify an offset in the buffer via "CmdSetDescriptorSet" call @@ -584,26 +584,26 @@ NriStruct(DescriptorSetDesc) { }; // "PipelineLayout" consists of "DescriptorSet" descriptions and root parameters -NriStruct(RootConstantDesc) { // aka push constants +NriStruct(RootConstantDesc) { // aka push constants block uint32_t registerIndex; uint32_t size; Nri(StageBits) shaderStages; }; -NriStruct(RootDescriptorSetDesc) { // aka push descriptor - uint32_t registerSpace; +NriStruct(RootDescriptorDesc) { // aka push descriptor uint32_t registerIndex; Nri(DescriptorType) descriptorType; // CONSTANT_BUFFER, STRUCTURED_BUFFER or STORAGE_STRUCTURED_BUFFER Nri(StageBits) shaderStages; }; NriStruct(PipelineLayoutDesc) { - const NriPtr(DescriptorSetDesc) descriptorSets; - uint32_t descriptorSetNum; + uint32_t rootRegisterSpace; const NriPtr(RootConstantDesc) rootConstants; uint32_t rootConstantNum; - const NriPtr(RootDescriptorSetDesc) rootDescriptorSets; - uint32_t rootDescriptorSetNum; + const NriPtr(RootDescriptorDesc) rootDescriptors; + uint32_t rootDescriptorNum; + const NriPtr(DescriptorSetDesc) descriptorSets; + uint32_t descriptorSetNum; Nri(StageBits) shaderStages; bool ignoreGlobalSPIRVOffsets; bool enableD3D12DrawParametersEmulation; // implicitly expects "enableD3D12DrawParametersEmulation" passed during device creation @@ -1319,7 +1319,7 @@ NriStruct(DeviceDesc) { Nri(Dim_t) texture2DMaxDim; Nri(Dim_t) texture3DMaxDim; Nri(Dim_t) textureArrayLayerMaxNum; - uint32_t texelBufferMaxDim; + uint32_t typedBufferMaxDim; // Memory uint64_t deviceUploadHeapSize; // ReBAR @@ -1340,15 +1340,10 @@ NriStruct(DeviceDesc) { uint32_t rayTracingScratchAlignment; // Pipeline layout + // D3D12 only: rootConstantSize + descriptorSetNum * 4 + rootDescriptorNum * 8 <= 256 (see "FitPipelineLayoutSettingsIntoDeviceLimits") uint32_t pipelineLayoutDescriptorSetMaxNum; - uint32_t perStageDescriptorSamplerMaxNum; - uint32_t perStageDescriptorConstantBufferMaxNum; - uint32_t perStageDescriptorStorageBufferMaxNum; - uint32_t perStageDescriptorTextureMaxNum; - uint32_t perStageDescriptorStorageTextureMaxNum; - uint32_t perStageResourceMaxNum; - uint32_t rootConstantMaxSize; - uint32_t rootDescriptorMaxNum; + uint32_t pipelineLayoutRootConstantMaxSize; + uint32_t pipelineLayoutRootDescriptorMaxNum; // Descriptor set uint32_t descriptorSetSamplerMaxNum; @@ -1357,6 +1352,14 @@ NriStruct(DeviceDesc) { uint32_t descriptorSetTextureMaxNum; uint32_t descriptorSetStorageTextureMaxNum; + // Shader resources + uint32_t perStageDescriptorSamplerMaxNum; + uint32_t perStageDescriptorConstantBufferMaxNum; + uint32_t perStageDescriptorStorageBufferMaxNum; + uint32_t perStageDescriptorTextureMaxNum; + uint32_t perStageDescriptorStorageTextureMaxNum; + uint32_t perStageResourceMaxNum; + // Vertex shader uint32_t vertexShaderAttributeMaxNum; uint32_t vertexShaderStreamMaxNum; diff --git a/Resources/Version.h b/Resources/Version.h index 3092c964..3d3092f2 100644 --- a/Resources/Version.h +++ b/Resources/Version.h @@ -4,7 +4,7 @@ #define STR(x) STR_HELPER(x) #define VERSION_MAJOR 1 -#define VERSION_MINOR 150 +#define VERSION_MINOR 151 #define VERSION_BUILD 0 #define VERSION_REVISION 0 diff --git a/Source/Creation/Creation.cpp b/Source/Creation/Creation.cpp index 8a113a9b..f844635e 100644 --- a/Source/Creation/Creation.cpp +++ b/Source/Creation/Creation.cpp @@ -431,11 +431,11 @@ NRI_API Result NRI_CALL nriEnumerateAdapters(AdapterDesc* adapterDescs, uint32_t if (result == VK_SUCCESS && deviceGroupNum) { if (adapterDescs) { // Query device groups - VkPhysicalDeviceGroupProperties* deviceGroupProperties = StackAlloc(VkPhysicalDeviceGroupProperties, deviceGroupNum); + VkPhysicalDeviceGroupProperties* deviceGroupProperties = AllocateScratch(m_Device, VkPhysicalDeviceGroupProperties, deviceGroupNum); vkEnumeratePhysicalDeviceGroups(instance, &deviceGroupNum, deviceGroupProperties); // Query device groups properties - AdapterDesc* adapterDescsSorted = StackAlloc(AdapterDesc, deviceGroupNum); + AdapterDesc* adapterDescsSorted = AllocateScratch(m_Device, AdapterDesc, deviceGroupNum); for (uint32_t i = 0; i < deviceGroupNum; i++) { VkPhysicalDeviceIDProperties deviceIDProperties = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES}; VkPhysicalDeviceProperties2 properties2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2}; diff --git a/Source/D3D11/CommandBufferD3D11.hpp b/Source/D3D11/CommandBufferD3D11.hpp index 6a2dec54..f3560ce5 100644 --- a/Source/D3D11/CommandBufferD3D11.hpp +++ b/Source/D3D11/CommandBufferD3D11.hpp @@ -96,7 +96,7 @@ NRI_INLINE void CommandBufferD3D11::SetViewports(const Viewport* viewports, uint } NRI_INLINE void CommandBufferD3D11::SetScissors(const Rect* rects, uint32_t rectNum) { - D3D11_RECT* rectsD3D = StackAlloc(D3D11_RECT, rectNum); + Scratch rectsD3D = AllocateScratch(m_Device, D3D11_RECT, rectNum); for (uint32_t i = 0; i < rectNum; i++) { const Rect& rect = rects[i]; @@ -161,7 +161,7 @@ NRI_INLINE void CommandBufferD3D11::ClearAttachments(const ClearDesc* clearDescs } } } else { - D3D11_RECT* rectsD3D = StackAlloc(D3D11_RECT, rectNum); + Scratch rectsD3D = AllocateScratch(m_Device, D3D11_RECT, rectNum); for (uint32_t i = 0; i < rectNum; i++) { const Rect& rect = rects[i]; rectsD3D[i] = {rect.x, rect.y, (LONG)(rect.x + rect.width), (LONG)(rect.y + rect.height)}; @@ -268,15 +268,16 @@ NRI_INLINE void CommandBufferD3D11::SetVertexBuffers(uint32_t baseSlot, uint32_t offsets = s_nullOffsets; if (m_VertexBuffer != buffers[0] || m_VertexBufferOffset != offsets[0] || m_VertexBufferBaseSlot != baseSlot || bufferNum > 1) { - uint8_t* mem = StackAlloc(uint8_t, bufferNum * (sizeof(ID3D11Buffer*) + sizeof(uint32_t) * 2)); + Scratch scratch = AllocateScratch(m_Device, uint8_t, bufferNum * (sizeof(ID3D11Buffer*) + sizeof(uint32_t) * 2)); + uint8_t* ptr = scratch; - ID3D11Buffer** buf = (ID3D11Buffer**)mem; - mem += bufferNum * sizeof(ID3D11Buffer*); + ID3D11Buffer** buf = (ID3D11Buffer**)ptr; + ptr += bufferNum * sizeof(ID3D11Buffer*); - uint32_t* offsetsUint = (uint32_t*)mem; - mem += bufferNum * sizeof(uint32_t); + uint32_t* offsetsUint = (uint32_t*)ptr; + ptr += bufferNum * sizeof(uint32_t); - uint32_t* strides = (uint32_t*)mem; + uint32_t* strides = (uint32_t*)ptr; for (uint32_t i = 0; i < bufferNum; i++) { const BufferD3D11& bufferD3D11 = *(BufferD3D11*)buffers[i]; @@ -331,7 +332,7 @@ NRI_INLINE void CommandBufferD3D11::SetDescriptorSet(uint32_t setIndex, const De } NRI_INLINE void CommandBufferD3D11::SetRootConstants(uint32_t rootConstantIndex, const void* data, uint32_t size) { - m_PipelineLayout->SetRootConstants(m_DeferredContext, rootConstantIndex, (const Vec4*)data, size); + m_PipelineLayout->SetRootConstants(m_DeferredContext, rootConstantIndex, data, size); } NRI_INLINE void CommandBufferD3D11::SetRootDescriptor(uint32_t rootDescriptorIndex, Descriptor& descriptor) { @@ -530,7 +531,7 @@ NRI_INLINE void CommandBufferD3D11::CopyQueries(const QueryPool& queryPool, uint NRI_INLINE void CommandBufferD3D11::BeginAnnotation(const char* name) { size_t len = strlen(name) + 1; - wchar_t* s = StackAlloc(wchar_t, len); + Scratch s = AllocateScratch(m_Device, wchar_t, len); ConvertCharToWchar(name, s, len); m_Annotation->BeginEvent(s); diff --git a/Source/D3D11/DeviceD3D11.hpp b/Source/D3D11/DeviceD3D11.hpp index 4ba8290d..1bae608b 100644 --- a/Source/D3D11/DeviceD3D11.hpp +++ b/Source/D3D11/DeviceD3D11.hpp @@ -268,7 +268,7 @@ void DeviceD3D11::FillDesc() { m_Desc.texture2DMaxDim = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION; m_Desc.texture3DMaxDim = D3D11_REQ_TEXTURE3D_U_V_OR_W_DIMENSION; m_Desc.textureArrayLayerMaxNum = D3D11_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION; - m_Desc.texelBufferMaxDim = (1 << D3D11_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP) - 1; + m_Desc.typedBufferMaxDim = 1 << D3D11_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP; m_Desc.memoryAllocationMaxNum = 0xFFFFFFFF; m_Desc.samplerAllocationMaxNum = D3D11_REQ_SAMPLER_OBJECT_COUNT_PER_DEVICE; @@ -278,19 +278,21 @@ void DeviceD3D11::FillDesc() { m_Desc.constantBufferOffsetAlignment = 256; // D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT; m_Desc.constantBufferMaxRange = D3D11_REQ_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT * 16; m_Desc.storageBufferOffsetAlignment = D3D11_RAW_UAV_SRV_BYTE_ALIGNMENT; - m_Desc.storageBufferMaxRange = (1 << D3D11_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP) - 1; + m_Desc.storageBufferMaxRange = 1 << D3D11_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP; m_Desc.bufferTextureGranularity = 1; m_Desc.bufferMaxSize = D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_C_TERM * 1024ull * 1024ull; - m_Desc.pipelineLayoutDescriptorSetMaxNum = D3D_DESCRIPTOR_SET_MAX_NUM; + // Just use D3D12 restrictions to avoid divergence + m_Desc.pipelineLayoutDescriptorSetMaxNum = ROOT_SIGNATURE_DWORD_NUM / 1; + m_Desc.pipelineLayoutRootConstantMaxSize = sizeof(uint32_t) * ROOT_SIGNATURE_DWORD_NUM / 1; + m_Desc.pipelineLayoutRootDescriptorMaxNum = ROOT_SIGNATURE_DWORD_NUM / 2; + m_Desc.perStageDescriptorSamplerMaxNum = D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT; m_Desc.perStageDescriptorConstantBufferMaxNum = D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT; m_Desc.perStageDescriptorStorageBufferMaxNum = m_Version >= 1 ? D3D11_1_UAV_SLOT_COUNT : D3D11_PS_CS_UAV_REGISTER_COUNT; m_Desc.perStageDescriptorTextureMaxNum = D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT; m_Desc.perStageDescriptorStorageTextureMaxNum = m_Version >= 1 ? D3D11_1_UAV_SLOT_COUNT : D3D11_PS_CS_UAV_REGISTER_COUNT; m_Desc.perStageResourceMaxNum = D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT; - m_Desc.rootConstantMaxSize = D3D_ROOT_CONSTANT_MAX_SIZE; - m_Desc.rootDescriptorMaxNum = D3D_ROOT_DESCRIPTOR_MAX_NUM; m_Desc.descriptorSetSamplerMaxNum = m_Desc.perStageDescriptorSamplerMaxNum; m_Desc.descriptorSetConstantBufferMaxNum = m_Desc.perStageDescriptorConstantBufferMaxNum; diff --git a/Source/D3D11/PipelineD3D11.hpp b/Source/D3D11/PipelineD3D11.hpp index 9db03246..de85b467 100644 --- a/Source/D3D11/PipelineD3D11.hpp +++ b/Source/D3D11/PipelineD3D11.hpp @@ -49,7 +49,7 @@ Result PipelineD3D11::Create(const GraphicsPipelineDesc& pipelineDesc) { } m_InputAssemplyStrides.resize(maxBindingSlot + 1); - D3D11_INPUT_ELEMENT_DESC* inputElements = StackAlloc(D3D11_INPUT_ELEMENT_DESC, vi.attributeNum); + Scratch inputElements = AllocateScratch(m_Device, D3D11_INPUT_ELEMENT_DESC, vi.attributeNum); for (uint32_t i = 0; i < vi.attributeNum; i++) { const VertexAttributeDesc& attrIn = vi.attributes[i]; const VertexStreamDesc& stream = vi.streams[attrIn.streamIndex]; diff --git a/Source/D3D11/PipelineLayoutD3D11.h b/Source/D3D11/PipelineLayoutD3D11.h index 3a70f596..194908f9 100644 --- a/Source/D3D11/PipelineLayoutD3D11.h +++ b/Source/D3D11/PipelineLayoutD3D11.h @@ -28,17 +28,6 @@ struct ConstantBuffer { StageBits shaderStages; }; -union Vec4 { - uint32_t ui[4]; - float f[4]; -}; - -struct BindingData { - void** descriptors; - uint32_t* constantFirst; - uint32_t* rootConstantNum; -}; - struct PipelineLayoutD3D11 { inline PipelineLayoutD3D11(DeviceD3D11& device) : m_Device(device) @@ -64,7 +53,7 @@ struct PipelineLayoutD3D11 { } Result Create(const PipelineLayoutDesc& pipelineDesc); - void SetRootConstants(ID3D11DeviceContextBest* deferredContext, uint32_t rootConstantIndex, const Vec4* data, uint32_t size) const; + void SetRootConstants(ID3D11DeviceContextBest* deferredContext, uint32_t rootConstantIndex, const void* data, uint32_t size) const; void Bind(ID3D11DeviceContextBest* deferredContext); void BindDescriptorSet(BindingState& currentBindingState, ID3D11DeviceContextBest* deferredContext, uint32_t setIndex, const DescriptorSetD3D11* descriptorSet, const DescriptorD3D11* descriptor, const uint32_t* dynamicConstantBufferOffsets) const; diff --git a/Source/D3D11/PipelineLayoutD3D11.hpp b/Source/D3D11/PipelineLayoutD3D11.hpp index 49b425b9..639d5c1a 100644 --- a/Source/D3D11/PipelineLayoutD3D11.hpp +++ b/Source/D3D11/PipelineLayoutD3D11.hpp @@ -105,8 +105,8 @@ Result PipelineLayoutD3D11::Create(const PipelineLayoutDesc& pipelineLayoutDesc) // Root descriptors m_RootBindingOffset = (uint32_t)m_BindingSets.size(); - for (uint32_t i = 0; i < pipelineLayoutDesc.rootDescriptorSetNum; i++) { - const RootDescriptorSetDesc& rootDescriptorSetDesc = pipelineLayoutDesc.rootDescriptorSets[i]; + for (uint32_t i = 0; i < pipelineLayoutDesc.rootDescriptorNum; i++) { + const RootDescriptorDesc& rootDescriptorSetDesc = pipelineLayoutDesc.rootDescriptors[i]; BindingRange bindingRange = {}; bindingRange.baseSlot = rootDescriptorSetDesc.registerIndex; @@ -160,7 +160,7 @@ void PipelineLayoutD3D11::Bind(ID3D11DeviceContextBest* deferredContext) { } } -void PipelineLayoutD3D11::SetRootConstants(ID3D11DeviceContextBest* deferredContext, uint32_t rootConstantIndex, const Vec4* data, uint32_t size) const { +void PipelineLayoutD3D11::SetRootConstants(ID3D11DeviceContextBest* deferredContext, uint32_t rootConstantIndex, const void* data, uint32_t size) const { MaybeUnused(size); const ConstantBuffer& cb = m_ConstantBuffers[rootConstantIndex]; @@ -179,15 +179,16 @@ void PipelineLayoutD3D11::BindDescriptorSetImpl(BindingState& currentBindingStat const BindingSet& bindingSet = m_BindingSets[setIndex]; bool isStorageRebindNeededInGraphics = false; - uint8_t* memory = StackAlloc(uint8_t, bindingSet.descriptorNum * (sizeof(void*) + sizeof(uint32_t) * 2)); + Scratch scratch = AllocateScratch(m_Device, uint8_t, bindingSet.descriptorNum * (sizeof(void*) + sizeof(uint32_t) * 2)); + uint8_t* ptr = scratch; - void** descriptors = (void**)memory; - memory += bindingSet.descriptorNum * sizeof(void*); + void** descriptors = (void**)ptr; + ptr += bindingSet.descriptorNum * sizeof(void*); - uint32_t* constantFirst = (uint32_t*)memory; - memory += bindingSet.descriptorNum * sizeof(uint32_t); + uint32_t* constantFirst = (uint32_t*)ptr; + ptr += bindingSet.descriptorNum * sizeof(uint32_t); - uint32_t* rootConstantNum = (uint32_t*)memory; + uint32_t* rootConstantNum = (uint32_t*)ptr; for (uint32_t j = bindingSet.rangeStart; j < bindingSet.rangeEnd; j++) { const BindingRange& bindingRange = m_BindingRanges[j]; diff --git a/Source/D3D11/TextureD3D11.hpp b/Source/D3D11/TextureD3D11.hpp index 6c0723af..0a025fe5 100644 --- a/Source/D3D11/TextureD3D11.hpp +++ b/Source/D3D11/TextureD3D11.hpp @@ -86,14 +86,14 @@ Result TextureD3D11::Create(MemoryLocation memoryLocation, float priority) { } Result TextureD3D11::Create(const TextureDesc& textureDesc) { - m_Desc = textureDesc; + m_Desc = FixTextureDesc(textureDesc); return Result::SUCCESS; } Result TextureD3D11::Create(const TextureD3D11Desc& textureDesc) { if (textureDesc.desc) - m_Desc = *textureDesc.desc; + m_Desc = FixTextureDesc(*textureDesc.desc); else if (!GetTextureDesc(textureDesc, m_Desc)) return Result::INVALID_ARGUMENT; diff --git a/Source/D3D12/CommandBufferD3D12.h b/Source/D3D12/CommandBufferD3D12.h index 88a4a9dd..7553e540 100644 --- a/Source/D3D12/CommandBufferD3D12.h +++ b/Source/D3D12/CommandBufferD3D12.h @@ -114,7 +114,7 @@ struct CommandBufferD3D12 { const PipelineLayoutD3D12* m_PipelineLayout = nullptr; PipelineD3D12* m_Pipeline = nullptr; D3D12_PRIMITIVE_TOPOLOGY m_PrimitiveTopology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; - std::array m_DescriptorSets = {}; + std::array m_DescriptorSets = {}; uint32_t m_RenderTargetNum = 0; uint8_t m_Version = 0; bool m_IsGraphicsPipelineLayout = false; diff --git a/Source/D3D12/CommandBufferD3D12.hpp b/Source/D3D12/CommandBufferD3D12.hpp index ed58a3fc..932f9a07 100644 --- a/Source/D3D12/CommandBufferD3D12.hpp +++ b/Source/D3D12/CommandBufferD3D12.hpp @@ -270,7 +270,7 @@ NRI_INLINE void CommandBufferD3D12::SetViewports(const Viewport* viewports, uint } NRI_INLINE void CommandBufferD3D12::SetScissors(const Rect* rects, uint32_t rectNum) { - D3D12_RECT* rectsD3D12 = StackAlloc(D3D12_RECT, rectNum); + Scratch rectsD3D12 = AllocateScratch(m_Device, D3D12_RECT, rectNum); ConvertRects(rectsD3D12, rects, rectNum); m_GraphicsCommandList->RSSetScissorRects(rectNum, rectsD3D12); @@ -324,7 +324,7 @@ NRI_INLINE void CommandBufferD3D12::ClearAttachments(const ClearDesc* clearDescs if (!clearDescNum) return; - D3D12_RECT* rectsD3D12 = StackAlloc(D3D12_RECT, rectNum); + Scratch rectsD3D12 = AllocateScratch(m_Device, D3D12_RECT, rectNum); ConvertRects(rectsD3D12, rects, rectNum); for (uint32_t i = 0; i < clearDescNum; i++) { @@ -391,8 +391,7 @@ NRI_INLINE void CommandBufferD3D12::BeginRendering(const AttachmentsDesc& attach } NRI_INLINE void CommandBufferD3D12::SetVertexBuffers(uint32_t baseSlot, uint32_t bufferNum, const Buffer* const* buffers, const uint64_t* offsets) { - D3D12_VERTEX_BUFFER_VIEW* vertexBufferViews = StackAlloc(D3D12_VERTEX_BUFFER_VIEW, bufferNum); - + Scratch vertexBufferViews = AllocateScratch(m_Device, D3D12_VERTEX_BUFFER_VIEW, bufferNum); for (uint32_t i = 0; i < bufferNum; i++) { if (buffers[i] != nullptr) { const BufferD3D12* buffer = (BufferD3D12*)buffers[i]; @@ -663,7 +662,7 @@ NRI_INLINE void CommandBufferD3D12::Barrier(const BarrierGroupDesc& barrierGroup // Global uint16_t num = barrierGroupDesc.globalNum; - D3D12_GLOBAL_BARRIER* globalBarriers = StackAlloc(D3D12_GLOBAL_BARRIER, num); + Scratch globalBarriers = AllocateScratch(m_Device, D3D12_GLOBAL_BARRIER, num); if (num) { D3D12_BARRIER_GROUP* barrierGroup = &barrierGroups[barriersGroupsNum++]; barrierGroup->Type = D3D12_BARRIER_TYPE_GLOBAL; @@ -683,7 +682,7 @@ NRI_INLINE void CommandBufferD3D12::Barrier(const BarrierGroupDesc& barrierGroup // Buffer num = barrierGroupDesc.bufferNum; - D3D12_BUFFER_BARRIER* bufferBarriers = StackAlloc(D3D12_BUFFER_BARRIER, num); + Scratch bufferBarriers = AllocateScratch(m_Device, D3D12_BUFFER_BARRIER, num); if (barrierGroupDesc.bufferNum) { D3D12_BARRIER_GROUP* barrierGroup = &barrierGroups[barriersGroupsNum++]; barrierGroup->Type = D3D12_BARRIER_TYPE_BUFFER; @@ -707,7 +706,7 @@ NRI_INLINE void CommandBufferD3D12::Barrier(const BarrierGroupDesc& barrierGroup // Texture num = barrierGroupDesc.textureNum; - D3D12_TEXTURE_BARRIER* textureBarriers = StackAlloc(D3D12_TEXTURE_BARRIER, num); + Scratch textureBarriers = AllocateScratch(m_Device, D3D12_TEXTURE_BARRIER, num); if (barrierGroupDesc.textureNum) { D3D12_BARRIER_GROUP* barrierGroup = &barrierGroups[barriersGroupsNum++]; barrierGroup->Type = D3D12_BARRIER_TYPE_TEXTURE; @@ -775,7 +774,7 @@ NRI_INLINE void CommandBufferD3D12::Barrier(const BarrierGroupDesc& barrierGroup return; // Gather - D3D12_RESOURCE_BARRIER* barriers = StackAlloc(D3D12_RESOURCE_BARRIER, barrierNum); + Scratch barriers = AllocateScratch(m_Device, D3D12_RESOURCE_BARRIER, barrierNum); memset(barriers, 0, sizeof(D3D12_RESOURCE_BARRIER) * barrierNum); D3D12_RESOURCE_BARRIER* ptr = barriers; @@ -841,7 +840,7 @@ NRI_INLINE void CommandBufferD3D12::CopyQueries(const QueryPool& queryPool, uint NRI_INLINE void CommandBufferD3D12::BeginAnnotation(const char* name) { size_t len = strlen(name) + 1; - wchar_t* s = StackAlloc(wchar_t, len); + Scratch s = AllocateScratch(m_Device, wchar_t, len); ConvertCharToWchar(name, s, len); PIXBeginEvent(m_GraphicsCommandList, PIX_COLOR_DEFAULT, s); diff --git a/Source/D3D12/CommandQueueD3D12.hpp b/Source/D3D12/CommandQueueD3D12.hpp index 01f91f60..c35de21a 100644 --- a/Source/D3D12/CommandQueueD3D12.hpp +++ b/Source/D3D12/CommandQueueD3D12.hpp @@ -32,7 +32,7 @@ NRI_INLINE void CommandQueueD3D12::Submit(const QueueSubmitDesc& queueSubmitDesc } if (queueSubmitDesc.commandBufferNum) { - ID3D12CommandList** commandLists = StackAlloc(ID3D12CommandList*, queueSubmitDesc.commandBufferNum); + Scratch commandLists = AllocateScratch(m_Device, ID3D12CommandList*, queueSubmitDesc.commandBufferNum); for (uint32_t j = 0; j < queueSubmitDesc.commandBufferNum; j++) commandLists[j] = *(CommandBufferD3D12*)queueSubmitDesc.commandBuffers[j]; diff --git a/Source/D3D12/DeviceD3D12.hpp b/Source/D3D12/DeviceD3D12.hpp index 147592b3..a2372371 100644 --- a/Source/D3D12/DeviceD3D12.hpp +++ b/Source/D3D12/DeviceD3D12.hpp @@ -410,7 +410,7 @@ void DeviceD3D12::FillDesc(const DeviceCreationDesc& deviceCreationDesc) { m_Desc.texture2DMaxDim = D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION; m_Desc.texture3DMaxDim = D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION; m_Desc.textureArrayLayerMaxNum = D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION; - m_Desc.texelBufferMaxDim = (1 << D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP) - 1; + m_Desc.typedBufferMaxDim = 1 << D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP; m_Desc.memoryAllocationMaxNum = 0xFFFFFFFF; m_Desc.samplerAllocationMaxNum = D3D12_REQ_SAMPLER_OBJECT_COUNT_PER_DEVICE; @@ -418,21 +418,22 @@ void DeviceD3D12::FillDesc(const DeviceCreationDesc& deviceCreationDesc) { m_Desc.constantBufferOffsetAlignment = D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT; m_Desc.constantBufferMaxRange = D3D12_REQ_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT * 16; m_Desc.storageBufferOffsetAlignment = D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT; - m_Desc.storageBufferMaxRange = (1 << D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP) - 1; + m_Desc.storageBufferMaxRange = 1 << D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP; m_Desc.bufferTextureGranularity = D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT; m_Desc.bufferMaxSize = D3D12_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_C_TERM * 1024ull * 1024ull; + m_Desc.pipelineLayoutDescriptorSetMaxNum = ROOT_SIGNATURE_DWORD_NUM / 1; + m_Desc.pipelineLayoutRootConstantMaxSize = sizeof(uint32_t) * ROOT_SIGNATURE_DWORD_NUM / 1; + m_Desc.pipelineLayoutRootDescriptorMaxNum = ROOT_SIGNATURE_DWORD_NUM / 2; + // https://learn.microsoft.com/en-us/windows/win32/direct3d12/hardware-support const uint32_t FULL_HEAP = 1000000; // TODO: even on D3D12_RESOURCE_BINDING_TIER_3 devices the validation still claims that the limit is 1000000 - m_Desc.pipelineLayoutDescriptorSetMaxNum = D3D_DESCRIPTOR_SET_MAX_NUM; m_Desc.perStageDescriptorSamplerMaxNum = options.ResourceBindingTier >= D3D12_RESOURCE_BINDING_TIER_2 ? 2048 : 16; m_Desc.perStageDescriptorConstantBufferMaxNum = options.ResourceBindingTier >= D3D12_RESOURCE_BINDING_TIER_3 ? FULL_HEAP : 14; m_Desc.perStageDescriptorTextureMaxNum = options.ResourceBindingTier >= D3D12_RESOURCE_BINDING_TIER_2 ? FULL_HEAP : 128; m_Desc.perStageResourceMaxNum = m_Desc.perStageDescriptorTextureMaxNum; m_Desc.perStageDescriptorStorageTextureMaxNum = options.ResourceBindingTier >= D3D12_RESOURCE_BINDING_TIER_3 ? FULL_HEAP : (levels.MaxSupportedFeatureLevel >= D3D_FEATURE_LEVEL_11_1 ? 64 : 8); m_Desc.perStageDescriptorStorageBufferMaxNum = m_Desc.perStageDescriptorStorageTextureMaxNum; - m_Desc.rootConstantMaxSize = D3D_ROOT_CONSTANT_MAX_SIZE; - m_Desc.rootDescriptorMaxNum = D3D_ROOT_DESCRIPTOR_MAX_NUM; m_Desc.descriptorSetSamplerMaxNum = m_Desc.perStageDescriptorSamplerMaxNum; m_Desc.descriptorSetConstantBufferMaxNum = m_Desc.perStageDescriptorConstantBufferMaxNum; diff --git a/Source/D3D12/PipelineD3D12.hpp b/Source/D3D12/PipelineD3D12.hpp index c099b12a..ec49caf2 100644 --- a/Source/D3D12/PipelineD3D12.hpp +++ b/Source/D3D12/PipelineD3D12.hpp @@ -238,7 +238,8 @@ Result PipelineD3D12::CreateFromStream(const GraphicsPipelineDesc& graphicsPipel // Vertex input uint32_t attributeNum = graphicsPipelineDesc.vertexInput ? graphicsPipelineDesc.vertexInput->attributeNum : 0; - stream.inputLayout.desc.pInputElementDescs = StackAlloc(D3D12_INPUT_ELEMENT_DESC, attributeNum); + Scratch scratch = AllocateScratch(m_Device, D3D12_INPUT_ELEMENT_DESC, attributeNum); + stream.inputLayout.desc.pInputElementDescs = scratch; if (graphicsPipelineDesc.vertexInput) { const VertexInputDesc& vi = *graphicsPipelineDesc.vertexInput; @@ -330,7 +331,8 @@ Result PipelineD3D12::Create(const GraphicsPipelineDesc& graphicsPipelineDesc) { // Vertex input uint32_t attributeNum = graphicsPipelineDesc.vertexInput ? graphicsPipelineDesc.vertexInput->attributeNum : 0; - graphicsPipleineStateDesc.InputLayout.pInputElementDescs = StackAlloc(D3D12_INPUT_ELEMENT_DESC, attributeNum); + Scratch scratch = AllocateScratch(m_Device, D3D12_INPUT_ELEMENT_DESC, attributeNum); + graphicsPipleineStateDesc.InputLayout.pInputElementDescs = scratch; if (graphicsPipelineDesc.vertexInput) { const VertexInputDesc& vi = *graphicsPipelineDesc.vertexInput; @@ -408,9 +410,13 @@ Result PipelineD3D12::Create(const RayTracingPipelineDesc& rayTracingPipelineDes uint32_t stateSubobjectNum = 0; uint32_t shaderNum = rayTracingPipelineDesc.shaderLibrary ? rayTracingPipelineDesc.shaderLibrary->shaderNum : 0; - Vector stateSubobjects( - 1 /*pipeline config*/ + 1 /*shader config*/ + 1 /*node mask*/ + shaderNum /*DXIL libraries*/ + rayTracingPipelineDesc.shaderGroupDescNum + (rootSignature ? 1u : 0u), - m_Device.GetStdAllocator()); + uint32_t stateObjectNum = 1 // pipeline config + + 1 // shader config + + 1 // node mask + + shaderNum // DXIL libraries + + rayTracingPipelineDesc.shaderGroupDescNum + + (rootSignature ? 1 : 0); + Scratch stateSubobjects = AllocateScratch(m_Device, D3D12_STATE_SUBOBJECT, stateObjectNum); D3D12_RAYTRACING_PIPELINE_CONFIG rayTracingPipelineConfig = {}; { @@ -449,7 +455,7 @@ Result PipelineD3D12::Create(const RayTracingPipelineDesc& rayTracingPipelineDes stateSubobjectNum++; } - Vector libraryDescs(rayTracingPipelineDesc.shaderLibrary->shaderNum, m_Device.GetStdAllocator()); + Scratch libraryDescs = AllocateScratch(m_Device, D3D12_DXIL_LIBRARY_DESC, rayTracingPipelineDesc.shaderLibrary->shaderNum); for (uint32_t i = 0; i < rayTracingPipelineDesc.shaderLibrary->shaderNum; i++) { libraryDescs[i].DXILLibrary.pShaderBytecode = rayTracingPipelineDesc.shaderLibrary->shaders[i].bytecode; libraryDescs[i].DXILLibrary.BytecodeLength = (size_t)rayTracingPipelineDesc.shaderLibrary->shaders[i].size; @@ -470,7 +476,7 @@ Result PipelineD3D12::Create(const RayTracingPipelineDesc& rayTracingPipelineDes } uint32_t hitGroupNum = 0; - Vector hitGroups(rayTracingPipelineDesc.shaderGroupDescNum, m_Device.GetStdAllocator()); + Scratch hitGroups = AllocateScratch(m_Device, D3D12_HIT_GROUP_DESC, rayTracingPipelineDesc.shaderGroupDescNum); m_ShaderGroupNames.reserve(rayTracingPipelineDesc.shaderGroupDescNum); for (uint32_t i = 0; i < rayTracingPipelineDesc.shaderGroupDescNum; i++) { bool isHitGroup = true; diff --git a/Source/D3D12/PipelineLayoutD3D12.hpp b/Source/D3D12/PipelineLayoutD3D12.hpp index 73124ec8..ab6d0577 100644 --- a/Source/D3D12/PipelineLayoutD3D12.hpp +++ b/Source/D3D12/PipelineLayoutD3D12.hpp @@ -44,30 +44,27 @@ PipelineLayoutD3D12::PipelineLayoutD3D12(DeviceD3D12& device) Result PipelineLayoutD3D12::Create(const PipelineLayoutDesc& pipelineLayoutDesc) { m_IsGraphicsPipelineLayout = pipelineLayoutDesc.shaderStages & StageBits::GRAPHICS_SHADERS; - uint32_t rangeMax = 0; + uint32_t rangeNum = 0; + uint32_t rangeMaxNum = 0; for (uint32_t i = 0; i < pipelineLayoutDesc.descriptorSetNum; i++) - rangeMax += pipelineLayoutDesc.descriptorSets[i].rangeNum; + rangeMaxNum += pipelineLayoutDesc.descriptorSets[i].rangeNum; StdAllocator& allocator = m_Device.GetStdAllocator(); - - uint32_t totalRangeNum = 0; - Vector rootParameters(allocator); - Vector ranges(rangeMax, allocator); - m_DescriptorSetMappings.resize(pipelineLayoutDesc.descriptorSetNum, DescriptorSetMapping(allocator)); m_DescriptorSetRootMappings.resize(pipelineLayoutDesc.descriptorSetNum, DescriptorSetRootMapping(allocator)); m_DynamicConstantBufferMappings.resize(pipelineLayoutDesc.descriptorSetNum); - D3D12_ROOT_PARAMETER1 rootParameterLocal = {}; + Scratch ranges = AllocateScratch(m_Device, D3D12_DESCRIPTOR_RANGE1, rangeMaxNum); + Vector rootParameters(allocator); - bool enableDrawParametersEmulation = m_Device.GetDesc().isDrawParametersEmulationEnabled - && pipelineLayoutDesc.enableD3D12DrawParametersEmulation && (pipelineLayoutDesc.shaderStages & nri::StageBits::VERTEX_SHADER); + bool enableDrawParametersEmulation = m_Device.GetDesc().isDrawParametersEmulationEnabled && pipelineLayoutDesc.enableD3D12DrawParametersEmulation && (pipelineLayoutDesc.shaderStages & nri::StageBits::VERTEX_SHADER); + D3D12_ROOT_PARAMETER1 rootParameterLocal = {}; if (enableDrawParametersEmulation) { rootParameterLocal.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; rootParameterLocal.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; rootParameterLocal.Constants.ShaderRegister = 0; - rootParameterLocal.Constants.RegisterSpace = BASE_ATTRIBUTES_EMULATION_SPACE; + rootParameterLocal.Constants.RegisterSpace = NRI_BASE_ATTRIBUTES_EMULATION_SPACE; rootParameterLocal.Constants.Num32BitValues = 2; rootParameters.push_back(rootParameterLocal); } @@ -94,7 +91,7 @@ Result PipelineLayoutD3D12::Create(const PipelineLayoutDesc& pipelineLayoutDesc) rootParameter.DescriptorTable.NumDescriptorRanges = groupedRangeNum; rootParameters.push_back(rootParameter); - totalRangeNum += groupedRangeNum; + rangeNum += groupedRangeNum; groupedRangeNum = 0; } @@ -103,7 +100,7 @@ Result PipelineLayoutD3D12::Create(const PipelineLayoutDesc& pipelineLayoutDesc) m_DescriptorSetRootMappings[i].rootOffsets[j] = groupedRangeNum ? ROOT_PARAMETER_UNUSED : (uint16_t)rootParameters.size(); rootParameter.ShaderVisibility = shaderVisibility; - rootParameter.DescriptorTable.pDescriptorRanges = &ranges[totalRangeNum]; + rootParameter.DescriptorTable.pDescriptorRanges = &ranges[rangeNum]; D3D12_DESCRIPTOR_RANGE_FLAGS descriptorRangeFlags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE; if (descriptorRangeDesc.flags & DescriptorRangeBits::PARTIALLY_BOUND) { @@ -112,7 +109,7 @@ Result PipelineLayoutD3D12::Create(const PipelineLayoutDesc& pipelineLayoutDesc) descriptorRangeFlags |= D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE; } - D3D12_DESCRIPTOR_RANGE1& descriptorRange = ranges[totalRangeNum + groupedRangeNum]; + D3D12_DESCRIPTOR_RANGE1& descriptorRange = ranges[rangeNum + groupedRangeNum]; descriptorRange.RangeType = rangeType; descriptorRange.NumDescriptors = descriptorRangeDesc.descriptorNum; descriptorRange.BaseShaderRegister = descriptorRangeDesc.baseRegisterIndex; @@ -125,7 +122,7 @@ Result PipelineLayoutD3D12::Create(const PipelineLayoutDesc& pipelineLayoutDesc) if (groupedRangeNum) { rootParameter.DescriptorTable.NumDescriptorRanges = groupedRangeNum; rootParameters.push_back(rootParameter); - totalRangeNum += groupedRangeNum; + rangeNum += groupedRangeNum; } if (descriptorSetDesc.dynamicConstantBufferNum) { @@ -155,18 +152,18 @@ Result PipelineLayoutD3D12::Create(const PipelineLayoutDesc& pipelineLayoutDesc) rootParameterLocal.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; rootParameterLocal.ShaderVisibility = GetShaderVisibility(rootConstantDesc.shaderStages); rootParameterLocal.Constants.ShaderRegister = rootConstantDesc.registerIndex; - rootParameterLocal.Constants.RegisterSpace = 0; + rootParameterLocal.Constants.RegisterSpace = pipelineLayoutDesc.rootRegisterSpace; rootParameterLocal.Constants.Num32BitValues = rootConstantDesc.size / 4; rootParameters.push_back(rootParameterLocal); } } - if (pipelineLayoutDesc.rootDescriptorSetNum) { + if (pipelineLayoutDesc.rootDescriptorNum) { m_BaseRootDescriptor = (uint32_t)rootParameters.size(); - for (uint32_t i = 0; i < pipelineLayoutDesc.rootDescriptorSetNum; i++) { - const nri::RootDescriptorSetDesc& rootDescriptorDesc = pipelineLayoutDesc.rootDescriptorSets[i]; + for (uint32_t i = 0; i < pipelineLayoutDesc.rootDescriptorNum; i++) { + const nri::RootDescriptorDesc& rootDescriptorDesc = pipelineLayoutDesc.rootDescriptors[i]; if (rootDescriptorDesc.descriptorType == DescriptorType::CONSTANT_BUFFER) rootParameterLocal.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; @@ -177,7 +174,7 @@ Result PipelineLayoutD3D12::Create(const PipelineLayoutDesc& pipelineLayoutDesc) rootParameterLocal.ShaderVisibility = GetShaderVisibility(rootDescriptorDesc.shaderStages); rootParameterLocal.Descriptor.ShaderRegister = rootDescriptorDesc.registerIndex; - rootParameterLocal.Descriptor.RegisterSpace = rootDescriptorDesc.registerSpace; + rootParameterLocal.Descriptor.RegisterSpace = pipelineLayoutDesc.rootRegisterSpace; rootParameterLocal.Descriptor.Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE; rootParameters.push_back(rootParameterLocal); diff --git a/Source/D3D12/SharedD3D12.h b/Source/D3D12/SharedD3D12.h index c8de1b9e..a1921674 100644 --- a/Source/D3D12/SharedD3D12.h +++ b/Source/D3D12/SharedD3D12.h @@ -11,8 +11,6 @@ typedef uint64_t DescriptorPointerGPU; typedef uint16_t HeapIndexType; typedef uint16_t HeapOffsetType; -#define BASE_ATTRIBUTES_EMULATION_SPACE 999 // see NRI_ENABLE_DRAW_PARAMETERS - struct MemoryTypeInfo { uint16_t heapFlags; uint8_t heapType; diff --git a/Source/D3D12/TextureD3D12.hpp b/Source/D3D12/TextureD3D12.hpp index 9f75e952..21bb90fd 100644 --- a/Source/D3D12/TextureD3D12.hpp +++ b/Source/D3D12/TextureD3D12.hpp @@ -8,23 +8,23 @@ void nri::GetResourceDesc(D3D12_RESOURCE_DESC* desc, const TextureDesc& textureD desc->Alignment = textureDesc.sampleNum > 1 ? 0 : D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; desc->Width = Align(textureDesc.width, blockWidth); desc->Height = Align(textureDesc.height, blockWidth); - desc->DepthOrArraySize = textureDesc.type == TextureType::TEXTURE_3D ? textureDesc.depth : textureDesc.layerNum; + desc->DepthOrArraySize = std::max(textureDesc.type == TextureType::TEXTURE_3D ? textureDesc.depth : textureDesc.layerNum, (Dim_t)1); desc->MipLevels = textureDesc.mipNum; desc->Format = (textureDesc.usageMask & nri::TextureUsageBits::SHADING_RATE_ATTACHMENT) ? dxgiFormat.typed : dxgiFormat.typeless; - desc->SampleDesc.Count = textureDesc.sampleNum; + desc->SampleDesc.Count = std::max(textureDesc.sampleNum, (Sample_t)1); desc->Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; desc->Flags = GetTextureFlags(textureDesc.usageMask); } Result TextureD3D12::Create(const TextureDesc& textureDesc) { - m_Desc = textureDesc; + m_Desc = FixTextureDesc(textureDesc); return Result::SUCCESS; } Result TextureD3D12::Create(const TextureD3D12Desc& textureDesc) { if (textureDesc.desc) - m_Desc = *textureDesc.desc; + m_Desc = FixTextureDesc(*textureDesc.desc); else if (!GetTextureDesc(textureDesc, m_Desc)) return Result::INVALID_ARGUMENT; diff --git a/Source/Shared/SharedExternal.h b/Source/Shared/SharedExternal.h index 26d6fc7f..a5d025ae 100644 --- a/Source/Shared/SharedExternal.h +++ b/Source/Shared/SharedExternal.h @@ -43,9 +43,7 @@ constexpr uint32_t TIMEOUT_FENCE = 5000; // 5 sec constexpr uint64_t PRESENT_INDEX_BIT_NUM = 56ull; // https://learn.microsoft.com/en-us/windows/win32/direct3d12/root-signature-limits -constexpr uint32_t D3D_DESCRIPTOR_SET_MAX_NUM = 64 / 1; -constexpr uint32_t D3D_ROOT_CONSTANT_MAX_SIZE = sizeof(uint32_t) * 64 / 1; -constexpr uint32_t D3D_ROOT_DESCRIPTOR_MAX_NUM = 64 / 2; +constexpr uint32_t ROOT_SIGNATURE_DWORD_NUM = 64; // Helpers template @@ -201,6 +199,15 @@ inline bool IsDepthBiasEnabled(const nri::DepthBiasDesc& depthBiasDesc) { return depthBiasDesc.constant != 0.0f || depthBiasDesc.slope != 0.0f; } +inline nri::TextureDesc FixTextureDesc(const nri::TextureDesc& textureDesc) { + nri::TextureDesc desc = textureDesc; + desc.depth = std::max(desc.depth, (nri::Dim_t)1); + desc.layerNum = std::max(desc.layerNum, (nri::Dim_t)1); + desc.sampleNum = std::max(desc.sampleNum, (nri::Sample_t)1); + + return desc; +} + // Strings void ConvertCharToWchar(const char* in, wchar_t* out, size_t outLen); void ConvertWcharToChar(const wchar_t* in, char* out, size_t outLen); diff --git a/Source/Shared/StdAllocator.h b/Source/Shared/StdAllocator.h index b6a825e9..4d8cddeb 100644 --- a/Source/Shared/StdAllocator.h +++ b/Source/Shared/StdAllocator.h @@ -2,6 +2,8 @@ #pragma once +#include + #include #include #include @@ -164,28 +166,47 @@ using String = std::basic_string, StdAllocator -struct Scratch { - T* mem; - const AllocationCallbacks& allocator; - bool isHeap; +class Scratch { +public: + Scratch(const AllocationCallbacks& allocator, T* mem, size_t num) + : m_Allocator(allocator) + , m_Mem(mem) +#ifdef _DEBUG + , m_Num(num) +#endif + { + m_IsHeap = (num * sizeof(T) + alignof(T)) > MAX_STACK_ALLOC_SIZE; + } ~Scratch() { - if (isHeap) - allocator.Free(allocator.userArg, mem); + if (m_IsHeap) + m_Allocator.Free(m_Allocator.userArg, m_Mem); } inline operator T*() const { - return mem; + return m_Mem; } -}; -constexpr size_t MAX_STACK_ALLOC_SIZE = 128 * 1024; + inline T& operator [] (size_t i) const { + assert(i < m_Num); + return m_Mem[i]; + } + +private: + const AllocationCallbacks& m_Allocator; + T* m_Mem = nullptr; +#ifdef _DEBUG + size_t m_Num = 0; +#endif + bool m_IsHeap = false; +}; #define AllocateScratch(device, T, elementNum) \ - {((elementNum) * sizeof(T) + alignof(T)) > MAX_STACK_ALLOC_SIZE \ + { (device).GetStdAllocator().GetInterface(), \ + ((elementNum) * sizeof(T) + alignof(T)) > MAX_STACK_ALLOC_SIZE \ ? (T*)(device).GetStdAllocator().GetInterface().Allocate((device).GetStdAllocator().GetInterface().userArg, (elementNum) * sizeof(T), alignof(T)) \ : (T*)Align((elementNum) ? (T*)_alloca(((elementNum) * sizeof(T) + alignof(T))) : nullptr, alignof(T)), \ - (device).GetStdAllocator().GetInterface(), ((elementNum) * sizeof(T) + alignof(T)) > MAX_STACK_ALLOC_SIZE} - -#define StackAlloc(T, elementNum) Align(((elementNum) ? (T*)_alloca((elementNum) * sizeof(T) + alignof(T)) : nullptr), alignof(T)) + (elementNum) } diff --git a/Source/VK/CommandBufferVK.hpp b/Source/VK/CommandBufferVK.hpp index 1e1cc5b6..c54275de 100644 --- a/Source/VK/CommandBufferVK.hpp +++ b/Source/VK/CommandBufferVK.hpp @@ -52,7 +52,7 @@ NRI_INLINE Result CommandBufferVK::End() { } NRI_INLINE void CommandBufferVK::SetViewports(const Viewport* viewports, uint32_t viewportNum) { - VkViewport* vkViewports = StackAlloc(VkViewport, viewportNum); + Scratch vkViewports = AllocateScratch(m_Device, VkViewport, viewportNum); for (uint32_t i = 0; i < viewportNum; i++) { const Viewport& viewport = viewports[i]; VkViewport& vkViewport = vkViewports[i]; @@ -73,7 +73,7 @@ NRI_INLINE void CommandBufferVK::SetViewports(const Viewport* viewports, uint32_ } NRI_INLINE void CommandBufferVK::SetScissors(const Rect* rects, uint32_t rectNum) { - VkRect2D* vkRects = StackAlloc(VkRect2D, rectNum); + Scratch vkRects = AllocateScratch(m_Device, VkRect2D, rectNum); for (uint32_t i = 0; i < rectNum; i++) { const Rect& viewport = rects[i]; VkRect2D& vkRect = vkRects[i]; @@ -104,7 +104,7 @@ NRI_INLINE void CommandBufferVK::SetStencilReference(uint8_t frontRef, uint8_t b } NRI_INLINE void CommandBufferVK::SetSampleLocations(const SampleLocation* locations, Sample_t locationNum, Sample_t sampleNum) { - VkSampleLocationEXT* sampleLocations = StackAlloc(VkSampleLocationEXT, locationNum); + Scratch sampleLocations = AllocateScratch(m_Device, VkSampleLocationEXT, locationNum); for (uint32_t i = 0; i < locationNum; i++) sampleLocations[i] = {(float)(locations[i].x + 8) / 16.0f, (float)(locations[i].y + 8) / 16.0f}; @@ -151,7 +151,7 @@ NRI_INLINE void CommandBufferVK::ClearAttachments(const ClearDesc* clearDescs, u // Attachments uint32_t attachmentNum = 0; - VkClearAttachment* attachments = StackAlloc(VkClearAttachment, clearDescNum); + Scratch attachments = AllocateScratch(m_Device, VkClearAttachment, clearDescNum); for (uint32_t i = 0; i < clearDescNum; i++) { const ClearDesc& desc = clearDescs[i]; @@ -179,8 +179,7 @@ NRI_INLINE void CommandBufferVK::ClearAttachments(const ClearDesc* clearDescs, u if (!hasRects) rectNum = 1; - VkClearRect* clearRects = StackAlloc(VkClearRect, rectNum); - + Scratch clearRects = AllocateScratch(m_Device, VkClearRect, rectNum); for (uint32_t i = 0; i < rectNum; i++) { VkClearRect& clearRect = clearRects[i]; @@ -212,8 +211,7 @@ NRI_INLINE void CommandBufferVK::ClearStorageTexture(const ClearStorageTextureDe const DescriptorVK& descriptor = *(const DescriptorVK*)clearDesc.storageTexture; const VkClearColorValue* value = (const VkClearColorValue*)&clearDesc.value; - VkImageSubresourceRange range; - descriptor.GetImageSubresourceRange(range); + VkImageSubresourceRange range = descriptor.GetImageSubresourceRange(); const auto& vk = m_Device.GetDispatchTable(); vk.CmdClearColorImage(m_Handle, descriptor.GetImage(), VK_IMAGE_LAYOUT_GENERAL, value, 1, &range); @@ -226,7 +224,7 @@ NRI_INLINE void CommandBufferVK::BeginRendering(const AttachmentsDesc& attachmen m_RenderHeight = deviceDesc.attachmentMaxDim; // Color - VkRenderingAttachmentInfo* colors = StackAlloc(VkRenderingAttachmentInfo, attachmentsDesc.colorNum); + Scratch colors = AllocateScratch(m_Device, VkRenderingAttachmentInfo, attachmentsDesc.colorNum); for (uint32_t i = 0; i < attachmentsDesc.colorNum; i++) { const DescriptorVK& descriptor = *(DescriptorVK*)attachmentsDesc.colors[i]; const DescriptorTexDesc& desc = descriptor.GetTexDesc(); @@ -323,7 +321,7 @@ NRI_INLINE void CommandBufferVK::EndRendering() { } NRI_INLINE void CommandBufferVK::SetVertexBuffers(uint32_t baseSlot, uint32_t bufferNum, const Buffer* const* buffers, const uint64_t* offsets) { - VkBuffer* bufferHandles = StackAlloc(VkBuffer, bufferNum); + Scratch bufferHandles = AllocateScratch(m_Device, VkBuffer, bufferNum); for (uint32_t i = 0; i < bufferNum; i++) bufferHandles[i] = ((BufferVK*)buffers[i])->GetHandle(); @@ -401,13 +399,12 @@ NRI_INLINE void CommandBufferVK::SetRootDescriptor(uint32_t rootDescriptorIndex, descriptorWrite.dstArrayElement = 0; descriptorWrite.descriptorCount = 1; + VkDescriptorBufferInfo bufferInfo = descriptorVK.GetBufferInfo(); + // Let's match D3D12 spec (no textures, no typed buffers) if (descriptorType == DescriptorTypeVK::BUFFER_VIEW) { const DescriptorBufDesc& bufDesc = descriptorVK.GetBufDesc(); descriptorWrite.descriptorType = bufDesc.viewType == BufferViewType::CONSTANT ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - - VkDescriptorBufferInfo bufferInfo = {}; - descriptorVK.GetBufferInfo(bufferInfo); descriptorWrite.pBufferInfo = &bufferInfo; } @@ -625,7 +622,7 @@ static inline VkAccessFlags2 GetAccessFlags(AccessBits accessBits) { NRI_INLINE void CommandBufferVK::Barrier(const BarrierGroupDesc& barrierGroupDesc) { // Global - VkMemoryBarrier2* memoryBarriers = StackAlloc(VkMemoryBarrier2, barrierGroupDesc.globalNum); + Scratch memoryBarriers = AllocateScratch(m_Device, VkMemoryBarrier2, barrierGroupDesc.globalNum); for (uint16_t i = 0; i < barrierGroupDesc.globalNum; i++) { const GlobalBarrierDesc& in = barrierGroupDesc.globals[i]; @@ -638,7 +635,7 @@ NRI_INLINE void CommandBufferVK::Barrier(const BarrierGroupDesc& barrierGroupDes } // Buffer - VkBufferMemoryBarrier2* bufferBarriers = StackAlloc(VkBufferMemoryBarrier2, barrierGroupDesc.bufferNum); + Scratch bufferBarriers = AllocateScratch(m_Device, VkBufferMemoryBarrier2, barrierGroupDesc.bufferNum); for (uint16_t i = 0; i < barrierGroupDesc.bufferNum; i++) { const BufferBarrierDesc& in = barrierGroupDesc.buffers[i]; const BufferVK& bufferImpl = *(const BufferVK*)in.buffer; @@ -657,7 +654,7 @@ NRI_INLINE void CommandBufferVK::Barrier(const BarrierGroupDesc& barrierGroupDes } // Texture - VkImageMemoryBarrier2* textureBarriers = StackAlloc(VkImageMemoryBarrier2, barrierGroupDesc.textureNum); + Scratch textureBarriers = AllocateScratch(m_Device, VkImageMemoryBarrier2, barrierGroupDesc.textureNum); for (uint16_t i = 0; i < barrierGroupDesc.textureNum; i++) { const TextureBarrierDesc& in = barrierGroupDesc.textures[i]; const TextureVK& textureImpl = *(const TextureVK*)in.texture; @@ -760,7 +757,7 @@ NRI_INLINE void CommandBufferVK::CopyWholeTexture(const TextureVK& dstTexture, c const TextureDesc& dstTextureDesc = dstTexture.GetDesc(); const TextureDesc& srcTextureDesc = srcTexture.GetDesc(); - VkImageCopy* regions = StackAlloc(VkImageCopy, dstTextureDesc.mipNum); + Scratch regions = AllocateScratch(m_Device, VkImageCopy, dstTextureDesc.mipNum); for (Mip_t i = 0; i < dstTextureDesc.mipNum; i++) { regions[i].srcSubresource = {srcTexture.GetImageAspectFlags(), i, 0, srcTextureDesc.layerNum}; regions[i].dstSubresource = {dstTexture.GetImageAspectFlags(), i, 0, dstTextureDesc.layerNum}; diff --git a/Source/VK/CommandQueueVK.hpp b/Source/VK/CommandQueueVK.hpp index 5004d096..706b5cfe 100644 --- a/Source/VK/CommandQueueVK.hpp +++ b/Source/VK/CommandQueueVK.hpp @@ -15,7 +15,7 @@ NRI_INLINE void CommandQueueVK::SetDebugName(const char* name) { NRI_INLINE void CommandQueueVK::Submit(const QueueSubmitDesc& queueSubmitDesc, const SwapChain* swapChain) { ExclusiveScope lock(m_Lock); - VkSemaphoreSubmitInfo* waitSemaphores = StackAlloc(VkSemaphoreSubmitInfo, queueSubmitDesc.waitFenceNum); + Scratch waitSemaphores = AllocateScratch(m_Device, VkSemaphoreSubmitInfo, queueSubmitDesc.waitFenceNum); for (uint32_t i = 0; i < queueSubmitDesc.waitFenceNum; i++) { waitSemaphores[i] = {VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO}; waitSemaphores[i].semaphore = *(FenceVK*)queueSubmitDesc.waitFences[i].fence; @@ -23,13 +23,13 @@ NRI_INLINE void CommandQueueVK::Submit(const QueueSubmitDesc& queueSubmitDesc, c waitSemaphores[i].stageMask = GetPipelineStageFlags(queueSubmitDesc.waitFences[i].stages); } - VkCommandBufferSubmitInfo* commandBuffers = StackAlloc(VkCommandBufferSubmitInfo, queueSubmitDesc.commandBufferNum); + Scratch commandBuffers = AllocateScratch(m_Device, VkCommandBufferSubmitInfo, queueSubmitDesc.commandBufferNum); for (uint32_t i = 0; i < queueSubmitDesc.commandBufferNum; i++) { commandBuffers[i] = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO}; commandBuffers[i].commandBuffer = *(CommandBufferVK*)queueSubmitDesc.commandBuffers[i]; } - VkSemaphoreSubmitInfo* signalSemaphores = StackAlloc(VkSemaphoreSubmitInfo, queueSubmitDesc.signalFenceNum); + Scratch signalSemaphores = AllocateScratch(m_Device, VkSemaphoreSubmitInfo, queueSubmitDesc.signalFenceNum); for (uint32_t i = 0; i < queueSubmitDesc.signalFenceNum; i++) { signalSemaphores[i] = {VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO}; signalSemaphores[i].semaphore = *(FenceVK*)queueSubmitDesc.signalFences[i].fence; diff --git a/Source/VK/DescriptorSetVK.hpp b/Source/VK/DescriptorSetVK.hpp index 0a30366c..5f54a2b0 100644 --- a/Source/VK/DescriptorSetVK.hpp +++ b/Source/VK/DescriptorSetVK.hpp @@ -74,7 +74,7 @@ static bool WriteBuffers(const DescriptorRangeDesc& rangeDesc, const DescriptorR for (uint32_t i = 0; i < itemNumForWriting; i++) { const DescriptorVK& descriptor = *(DescriptorVK*)update.descriptors[descriptorOffset + i]; - descriptor.GetBufferInfo(infoArray[i]); + infoArray[i] = descriptor.GetBufferInfo(); } write.descriptorType = GetDescriptorType(rangeDesc.descriptorType); @@ -166,15 +166,15 @@ NRI_INLINE void DescriptorSetVK::SetDebugName(const char* name) { } NRI_INLINE void DescriptorSetVK::UpdateDescriptorRanges(uint32_t rangeOffset, uint32_t rangeNum, const DescriptorRangeUpdateDesc* rangeUpdateDescs) { - constexpr uint32_t writesPerIteration = 1024; - uint32_t writeMaxNum = std::min(writesPerIteration, rangeNum); - - VkWriteDescriptorSet* writes = StackAlloc(VkWriteDescriptorSet, writeMaxNum); + constexpr uint32_t writesPerIteration = 256; + constexpr size_t slabSize = 32 * writesPerIteration; // max item size = 32 + static_assert(slabSize <= MAX_STACK_ALLOC_SIZE, "prefer stack alloc"); - constexpr size_t slabSize = 32768; - SlabAllocator slab(StackAlloc(uint8_t, slabSize), slabSize); + uint32_t writeMaxNum = std::min(writesPerIteration, rangeNum); + Scratch writes = AllocateScratch(m_Device, VkWriteDescriptorSet, writeMaxNum); - const auto& vk = m_Device.GetDispatchTable(); + Scratch slabScratch = AllocateScratch(m_Device, uint8_t, slabSize); + SlabAllocator slab(slabScratch, slabSize); uint32_t j = 0; uint32_t descriptorOffset = 0; @@ -210,23 +210,23 @@ NRI_INLINE void DescriptorSetVK::UpdateDescriptorRanges(uint32_t rangeOffset, ui descriptorOffset = (descriptorOffset == update.descriptorNum) ? 0 : descriptorOffset; } + const auto& vk = m_Device.GetDispatchTable(); vk.UpdateDescriptorSets(m_Device, writeNum, writes, 0, nullptr); } while (j < rangeNum); } NRI_INLINE void DescriptorSetVK::UpdateDynamicConstantBuffers(uint32_t bufferOffset, uint32_t descriptorNum, const Descriptor* const* descriptors) { - VkWriteDescriptorSet* writes = StackAlloc(VkWriteDescriptorSet, descriptorNum); - VkDescriptorBufferInfo* infos = StackAlloc(VkDescriptorBufferInfo, descriptorNum); - uint32_t writeNum = 0; + Scratch writes = AllocateScratch(m_Device, VkWriteDescriptorSet, descriptorNum); + Scratch infos = AllocateScratch(m_Device, VkDescriptorBufferInfo, descriptorNum); for (uint32_t j = 0; j < descriptorNum; j++) { const DynamicConstantBufferDesc& bufferDesc = m_Desc->dynamicConstantBuffers[bufferOffset + j]; - - VkDescriptorBufferInfo& bufferInfo = infos[writeNum]; const DescriptorVK& descriptorImpl = *(const DescriptorVK*)descriptors[j]; - descriptorImpl.GetBufferInfo(bufferInfo); - VkWriteDescriptorSet& write = writes[writeNum++]; + VkDescriptorBufferInfo& bufferInfo = infos[j]; + bufferInfo = descriptorImpl.GetBufferInfo(); + + VkWriteDescriptorSet& write = writes[j]; write = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET}; write.dstSet = m_Handle; write.dstBinding = bufferDesc.registerIndex; @@ -236,13 +236,13 @@ NRI_INLINE void DescriptorSetVK::UpdateDynamicConstantBuffers(uint32_t bufferOff } const auto& vk = m_Device.GetDispatchTable(); - vk.UpdateDescriptorSets(m_Device, writeNum, writes, 0, nullptr); + vk.UpdateDescriptorSets(m_Device, descriptorNum, writes, 0, nullptr); } NRI_INLINE void DescriptorSetVK::Copy(const DescriptorSetCopyDesc& descriptorSetCopyDesc) { const uint32_t rangeNum = descriptorSetCopyDesc.rangeNum + descriptorSetCopyDesc.dynamicConstantBufferNum; - VkCopyDescriptorSet* copies = StackAlloc(VkCopyDescriptorSet, rangeNum); + Scratch copies = AllocateScratch(m_Device, VkCopyDescriptorSet, rangeNum); uint32_t copyNum = 0; const DescriptorSetVK& srcSetImpl = *(const DescriptorSetVK*)descriptorSetCopyDesc.srcDescriptorSet; diff --git a/Source/VK/DescriptorVK.h b/Source/VK/DescriptorVK.h index 8b001248..fdd1a72a 100644 --- a/Source/VK/DescriptorVK.h +++ b/Source/VK/DescriptorVK.h @@ -92,18 +92,24 @@ struct DescriptorVK { return m_TextureDesc.layout != VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL && m_TextureDesc.layout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; } - inline void GetBufferInfo(VkDescriptorBufferInfo& info) const { + inline VkDescriptorBufferInfo GetBufferInfo() const { + VkDescriptorBufferInfo info = {}; info.buffer = m_BufferDesc.handle; info.offset = m_BufferDesc.offset; info.range = m_BufferDesc.size; + + return info; } - inline void GetImageSubresourceRange(VkImageSubresourceRange& range) const { + inline VkImageSubresourceRange GetImageSubresourceRange() const { + VkImageSubresourceRange range = {}; range.aspectMask = m_TextureDesc.aspectFlags; range.baseMipLevel = m_TextureDesc.mipOffset; range.levelCount = m_TextureDesc.mipNum; range.baseArrayLayer = m_TextureDesc.layerOffset; range.layerCount = m_TextureDesc.layerNum; + + return range; } ~DescriptorVK(); diff --git a/Source/VK/DeviceVK.hpp b/Source/VK/DeviceVK.hpp index 89ddb1aa..33fef89f 100644 --- a/Source/VK/DeviceVK.hpp +++ b/Source/VK/DeviceVK.hpp @@ -413,7 +413,7 @@ Result DeviceVK::Create(const DeviceCreationDesc& deviceCreationDesc, const Devi uint32_t deviceGroupNum = 0; m_VK.EnumeratePhysicalDeviceGroups(m_Instance, &deviceGroupNum, nullptr); - VkPhysicalDeviceGroupProperties* deviceGroups = StackAlloc(VkPhysicalDeviceGroupProperties, deviceGroupNum); + Scratch deviceGroups = AllocateScratch(*this, VkPhysicalDeviceGroupProperties, deviceGroupNum); VkResult result = m_VK.EnumeratePhysicalDeviceGroups(m_Instance, &deviceGroupNum, deviceGroups); RETURN_ON_FAILURE(this, result == VK_SUCCESS, GetReturnCode(result), "vkEnumeratePhysicalDevices returned %d", (int32_t)result); @@ -740,7 +740,7 @@ Result DeviceVK::Create(const DeviceCreationDesc& deviceCreationDesc, const Devi m_Desc.texture2DMaxDim = (Dim_t)limits.maxImageDimension2D; m_Desc.texture3DMaxDim = (Dim_t)limits.maxImageDimension3D; m_Desc.textureArrayLayerMaxNum = (Dim_t)limits.maxImageArrayLayers; - m_Desc.texelBufferMaxDim = limits.maxTexelBufferElements; + m_Desc.typedBufferMaxDim = limits.maxTexelBufferElements; const VkMemoryPropertyFlags neededFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; for (uint32_t i = 0; i < m_MemoryProps.memoryTypeCount; i++) { @@ -762,14 +762,15 @@ Result DeviceVK::Create(const DeviceCreationDesc& deviceCreationDesc, const Devi m_Desc.bufferTextureGranularity = (uint32_t)limits.bufferImageGranularity; m_Desc.pipelineLayoutDescriptorSetMaxNum = limits.maxBoundDescriptorSets; + m_Desc.pipelineLayoutRootConstantMaxSize = limits.maxPushConstantsSize; + m_Desc.pipelineLayoutRootDescriptorMaxNum = pushDescriptorProps.maxPushDescriptors; + m_Desc.perStageDescriptorSamplerMaxNum = limits.maxPerStageDescriptorSamplers; m_Desc.perStageDescriptorConstantBufferMaxNum = limits.maxPerStageDescriptorUniformBuffers; m_Desc.perStageDescriptorStorageBufferMaxNum = limits.maxPerStageDescriptorStorageBuffers; m_Desc.perStageDescriptorTextureMaxNum = limits.maxPerStageDescriptorSampledImages; m_Desc.perStageDescriptorStorageTextureMaxNum = limits.maxPerStageDescriptorStorageImages; m_Desc.perStageResourceMaxNum = limits.maxPerStageResources; - m_Desc.rootConstantMaxSize = limits.maxPushConstantsSize; - m_Desc.rootDescriptorMaxNum = pushDescriptorProps.maxPushDescriptors; m_Desc.descriptorSetSamplerMaxNum = limits.maxDescriptorSetSamplers; m_Desc.descriptorSetConstantBufferMaxNum = limits.maxDescriptorSetUniformBuffers; @@ -952,10 +953,10 @@ void DeviceVK::FillCreateInfo(const TextureDesc& textureDesc, VkImageCreateInfo& info.format = ::GetVkFormat(textureDesc.format, true); info.extent.width = textureDesc.width; info.extent.height = textureDesc.height; - info.extent.depth = textureDesc.depth; + info.extent.depth = std::max(textureDesc.depth, (Dim_t)1); info.mipLevels = textureDesc.mipNum; - info.arrayLayers = textureDesc.layerNum; - info.samples = (VkSampleCountFlagBits)textureDesc.sampleNum; + info.arrayLayers = std::max(textureDesc.layerNum, (Dim_t)1); + info.samples = (VkSampleCountFlagBits)std::max(textureDesc.sampleNum, (Sample_t)1); info.tiling = VK_IMAGE_TILING_OPTIMAL; info.usage = GetImageUsageFlags(textureDesc.usageMask); info.sharingMode = m_NumActiveFamilyIndices > 1 ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE; // TODO: still no DCC on AMD with concurrent? diff --git a/Source/VK/PipelineLayoutVK.h b/Source/VK/PipelineLayoutVK.h index 6f4cab5a..3b9242b5 100644 --- a/Source/VK/PipelineLayoutVK.h +++ b/Source/VK/PipelineLayoutVK.h @@ -66,7 +66,6 @@ struct PipelineLayoutVK { void SetDebugName(const char* name); private: - void FillBindingInfo(const PipelineLayoutDesc& pipelineLayoutDesc); VkDescriptorSetLayout CreateSetLayout(const DescriptorSetDesc& descriptorSetDesc, bool ignoreGlobalSPIRVOffsets, bool isPush); private: diff --git a/Source/VK/PipelineLayoutVK.hpp b/Source/VK/PipelineLayoutVK.hpp index 619bbb70..bb427a8b 100644 --- a/Source/VK/PipelineLayoutVK.hpp +++ b/Source/VK/PipelineLayoutVK.hpp @@ -29,68 +29,126 @@ Result PipelineLayoutVK::Create(const PipelineLayoutDesc& pipelineLayoutDesc) { else if (pipelineLayoutDesc.shaderStages & StageBits::RAY_TRACING_SHADERS) m_PipelineBindPoint = VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR; - // Create set layouts + // Binding offsets + SPIRVBindingOffsets spirvBindingOffsets = {}; + if (!pipelineLayoutDesc.ignoreGlobalSPIRVOffsets) + spirvBindingOffsets = m_Device.GetSPIRVBindingOffsets(); + + std::array bindingOffsets = {}; + bindingOffsets[(uint32_t)DescriptorType::SAMPLER] = spirvBindingOffsets.samplerOffset; + bindingOffsets[(uint32_t)DescriptorType::CONSTANT_BUFFER] = spirvBindingOffsets.constantBufferOffset; + bindingOffsets[(uint32_t)DescriptorType::TEXTURE] = spirvBindingOffsets.textureOffset; + bindingOffsets[(uint32_t)DescriptorType::STORAGE_TEXTURE] = spirvBindingOffsets.storageTextureAndBufferOffset; + bindingOffsets[(uint32_t)DescriptorType::BUFFER] = spirvBindingOffsets.textureOffset; + bindingOffsets[(uint32_t)DescriptorType::STORAGE_BUFFER] = spirvBindingOffsets.storageTextureAndBufferOffset; + bindingOffsets[(uint32_t)DescriptorType::STRUCTURED_BUFFER] = spirvBindingOffsets.textureOffset; + bindingOffsets[(uint32_t)DescriptorType::STORAGE_STRUCTURED_BUFFER] = spirvBindingOffsets.storageTextureAndBufferOffset; + bindingOffsets[(uint32_t)DescriptorType::ACCELERATION_STRUCTURE] = spirvBindingOffsets.textureOffset; + + // Binding info + size_t rangeNum = 0; + size_t dynamicConstantBufferNum = 0; + for (uint32_t i = 0; i < pipelineLayoutDesc.descriptorSetNum; i++) { + rangeNum += pipelineLayoutDesc.descriptorSets[i].rangeNum; + dynamicConstantBufferNum += pipelineLayoutDesc.descriptorSets[i].dynamicConstantBufferNum; + } + + m_BindingInfo.descriptorSetDescs.insert(m_BindingInfo.descriptorSetDescs.begin(), pipelineLayoutDesc.descriptorSets, pipelineLayoutDesc.descriptorSets + pipelineLayoutDesc.descriptorSetNum); + m_BindingInfo.hasVariableDescriptorNum.resize(pipelineLayoutDesc.descriptorSetNum); + m_BindingInfo.descriptorSetRangeDescs.reserve(rangeNum); + m_BindingInfo.dynamicConstantBufferDescs.reserve(dynamicConstantBufferNum); + + // Descriptor sets uint32_t setNum = 0; for (uint32_t i = 0; i < pipelineLayoutDesc.descriptorSetNum; i++) { const DescriptorSetDesc& descriptorSetDesc = pipelineLayoutDesc.descriptorSets[i]; - // Non-push - VkDescriptorSetLayout descriptorSetLayout = CreateSetLayout(descriptorSetDesc, pipelineLayoutDesc.ignoreGlobalSPIRVOffsets, false); + setNum = std::max(setNum, descriptorSetDesc.registerSpace); + + // Create set layout + VkDescriptorSetLayout descriptorSetLayout = CreateSetLayout(descriptorSetDesc, pipelineLayoutDesc.ignoreGlobalSPIRVOffsets, false); // non-push m_DescriptorSetLayouts.push_back(descriptorSetLayout); - setNum = std::max(setNum, descriptorSetDesc.registerSpace); + // Binding info + m_BindingInfo.hasVariableDescriptorNum[i] = false; + m_BindingInfo.descriptorSetDescs[i].ranges = m_BindingInfo.descriptorSetRangeDescs.data() + m_BindingInfo.descriptorSetRangeDescs.size(); + m_BindingInfo.descriptorSetDescs[i].dynamicConstantBuffers = m_BindingInfo.dynamicConstantBufferDescs.data() + m_BindingInfo.dynamicConstantBufferDescs.size(); + m_BindingInfo.descriptorSetRangeDescs.insert(m_BindingInfo.descriptorSetRangeDescs.end(), descriptorSetDesc.ranges, descriptorSetDesc.ranges + descriptorSetDesc.rangeNum); + m_BindingInfo.dynamicConstantBufferDescs.insert(m_BindingInfo.dynamicConstantBufferDescs.end(), descriptorSetDesc.dynamicConstantBuffers, descriptorSetDesc.dynamicConstantBuffers + descriptorSetDesc.dynamicConstantBufferNum); + + DescriptorRangeDesc* ranges = (DescriptorRangeDesc*)m_BindingInfo.descriptorSetDescs[i].ranges; + for (uint32_t j = 0; j < descriptorSetDesc.rangeNum; j++) { + ranges[j].baseRegisterIndex += bindingOffsets[(uint32_t)descriptorSetDesc.ranges[j].descriptorType]; + + if (m_Device.m_IsSupported.descriptorIndexing && (descriptorSetDesc.ranges[j].flags & DescriptorRangeBits::VARIABLE_SIZED_ARRAY)) + m_BindingInfo.hasVariableDescriptorNum[i] = true; + } + + DynamicConstantBufferDesc* dynamicConstantBuffers = (DynamicConstantBufferDesc*)m_BindingInfo.descriptorSetDescs[i].dynamicConstantBuffers; + for (uint32_t j = 0; j < descriptorSetDesc.dynamicConstantBufferNum; j++) + dynamicConstantBuffers[j].registerIndex += bindingOffsets[(uint32_t)DescriptorType::CONSTANT_BUFFER]; } - if (m_Device.GetDesc().rootDescriptorMaxNum) { - for (uint32_t i = 0; i < pipelineLayoutDesc.rootDescriptorSetNum; i++) { - const RootDescriptorSetDesc& rootDescriptorDesc = pipelineLayoutDesc.rootDescriptorSets[i]; + // Root descriptors + m_BindingInfo.pushDescriptorBindings.resize(pipelineLayoutDesc.rootDescriptorNum); + + if (pipelineLayoutDesc.rootDescriptorNum) { + Scratch rootRanges = AllocateScratch(m_Device, DescriptorRangeDesc, pipelineLayoutDesc.rootDescriptorNum); + + DescriptorSetDesc rootSet = {}; + rootSet.ranges = rootRanges; + rootSet.registerSpace = pipelineLayoutDesc.rootRegisterSpace; + rootSet.rangeNum = pipelineLayoutDesc.rootDescriptorNum; + + setNum = std::max(setNum, rootSet.registerSpace); - DescriptorRangeDesc range = {}; + for (uint32_t i = 0; i < pipelineLayoutDesc.rootDescriptorNum; i++) { + const RootDescriptorDesc& rootDescriptorDesc = pipelineLayoutDesc.rootDescriptors[i]; + DescriptorRangeDesc& range = rootRanges[i]; + + range = {}; range.baseRegisterIndex = rootDescriptorDesc.registerIndex; range.descriptorNum = 1; range.descriptorType = rootDescriptorDesc.descriptorType; range.shaderStages = rootDescriptorDesc.shaderStages; - DescriptorSetDesc descriptorSetDesc = {}; - descriptorSetDesc.registerSpace = rootDescriptorDesc.registerSpace; - descriptorSetDesc.ranges = ⦥ - descriptorSetDesc.rangeNum = 1; - - // Push - VkDescriptorSetLayout descriptorSetLayout = CreateSetLayout(descriptorSetDesc, pipelineLayoutDesc.ignoreGlobalSPIRVOffsets, true); - m_DescriptorSetLayouts.push_back(descriptorSetLayout); - - setNum = std::max(setNum, descriptorSetDesc.registerSpace); + // Binding info + uint32_t registerIndex = rootDescriptorDesc.registerIndex + bindingOffsets[(uint32_t)rootDescriptorDesc.descriptorType]; + m_BindingInfo.pushDescriptorBindings[i] = {rootSet.registerSpace, registerIndex}; } - } - setNum++; + VkDescriptorSetLayout descriptorSetLayout = CreateSetLayout(rootSet, pipelineLayoutDesc.ignoreGlobalSPIRVOffsets, true); // push + m_DescriptorSetLayouts.push_back(descriptorSetLayout); + } // Allocate temp memory for ALL "register spaces" making the entire range consecutive (thanks VK API!) + setNum++; Scratch descriptorSetLayouts = AllocateScratch(m_Device, VkDescriptorSetLayout, setNum); - if (setNum != pipelineLayoutDesc.descriptorSetNum + pipelineLayoutDesc.rootDescriptorSetNum) { - // Create "dummy" set layout (needed only if "register space" indices are not consecutive) - VkDescriptorSetLayout dummyDescriptorSetLayout = CreateSetLayout({}, pipelineLayoutDesc.ignoreGlobalSPIRVOffsets, true); // created as "push" + bool hasGaps = setNum > pipelineLayoutDesc.descriptorSetNum + (pipelineLayoutDesc.rootDescriptorNum ? 1 : 0); + if (hasGaps) { + // Create a "dummy" set layout (needed only if "register space" indices are not consecutive) + VkDescriptorSetLayout dummyDescriptorSetLayout = CreateSetLayout({}, pipelineLayoutDesc.ignoreGlobalSPIRVOffsets, false); // non-push m_DescriptorSetLayouts.push_back(dummyDescriptorSetLayout); for (uint32_t i = 0; i < setNum; i++) descriptorSetLayouts[i] = dummyDescriptorSetLayout; } - // Populate descriptor set layouts in proper order + // Populate descriptor set layouts in "register space" order for (uint32_t i = 0; i < pipelineLayoutDesc.descriptorSetNum; i++) { uint32_t setIndex = pipelineLayoutDesc.descriptorSets[i].registerSpace; descriptorSetLayouts[setIndex] = m_DescriptorSetLayouts[i]; } - for (uint32_t i = 0; i < pipelineLayoutDesc.rootDescriptorSetNum; i++) { - uint32_t setIndex = pipelineLayoutDesc.rootDescriptorSets[i].registerSpace; - descriptorSetLayouts[setIndex] = m_DescriptorSetLayouts[pipelineLayoutDesc.descriptorSetNum + i]; + if (pipelineLayoutDesc.rootDescriptorNum) { + uint32_t setIndex = pipelineLayoutDesc.rootRegisterSpace; + descriptorSetLayouts[setIndex] = m_DescriptorSetLayouts[pipelineLayoutDesc.descriptorSetNum]; } // Root constants + m_BindingInfo.pushConstantBindings.resize(pipelineLayoutDesc.rootConstantNum); Scratch pushConstantRanges = AllocateScratch(m_Device, VkPushConstantRange, pipelineLayoutDesc.rootConstantNum); uint32_t offset = 0; @@ -103,6 +161,9 @@ Result PipelineLayoutVK::Create(const PipelineLayoutDesc& pipelineLayoutDesc) { range.offset = offset; range.size = pushConstantDesc.size; + // Binding info + m_BindingInfo.pushConstantBindings[i] = {GetShaderStageFlags(pushConstantDesc.shaderStages), offset}; + offset += pushConstantDesc.size; } @@ -117,8 +178,6 @@ Result PipelineLayoutVK::Create(const PipelineLayoutDesc& pipelineLayoutDesc) { VkResult result = vk.CreatePipelineLayout(m_Device, &pipelineLayoutCreateInfo, m_Device.GetAllocationCallbacks(), &m_Handle); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, Result::FAILURE, "vkCreatePipelineLayout returned %d", (int32_t)result); - FillBindingInfo(pipelineLayoutDesc); - return Result::SUCCESS; } @@ -207,8 +266,7 @@ VkDescriptorSetLayout PipelineLayoutVK::CreateSetLayout(const DescriptorSetDesc& info.pNext = m_Device.m_IsSupported.descriptorIndexing ? &bindingFlagsInfo : nullptr; info.bindingCount = bindingNum; info.pBindings = bindingsBegin; - if (m_Device.GetDesc().rootDescriptorMaxNum) - info.flags = isPush ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0; + info.flags = isPush ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0; VkDescriptorSetLayout handle = VK_NULL_HANDLE; const auto& vk = m_Device.GetDispatchTable(); @@ -218,81 +276,6 @@ VkDescriptorSetLayout PipelineLayoutVK::CreateSetLayout(const DescriptorSetDesc& return handle; } -void PipelineLayoutVK::FillBindingInfo(const PipelineLayoutDesc& pipelineLayoutDesc) { - // Binding offsets - SPIRVBindingOffsets spirvBindingOffsets = {}; - if (!pipelineLayoutDesc.ignoreGlobalSPIRVOffsets) - spirvBindingOffsets = m_Device.GetSPIRVBindingOffsets(); - - std::array bindingOffsets = {}; - bindingOffsets[(uint32_t)DescriptorType::SAMPLER] = spirvBindingOffsets.samplerOffset; - bindingOffsets[(uint32_t)DescriptorType::CONSTANT_BUFFER] = spirvBindingOffsets.constantBufferOffset; - bindingOffsets[(uint32_t)DescriptorType::TEXTURE] = spirvBindingOffsets.textureOffset; - bindingOffsets[(uint32_t)DescriptorType::STORAGE_TEXTURE] = spirvBindingOffsets.storageTextureAndBufferOffset; - bindingOffsets[(uint32_t)DescriptorType::BUFFER] = spirvBindingOffsets.textureOffset; - bindingOffsets[(uint32_t)DescriptorType::STORAGE_BUFFER] = spirvBindingOffsets.storageTextureAndBufferOffset; - bindingOffsets[(uint32_t)DescriptorType::STRUCTURED_BUFFER] = spirvBindingOffsets.textureOffset; - bindingOffsets[(uint32_t)DescriptorType::STORAGE_STRUCTURED_BUFFER] = spirvBindingOffsets.storageTextureAndBufferOffset; - bindingOffsets[(uint32_t)DescriptorType::ACCELERATION_STRUCTURE] = spirvBindingOffsets.textureOffset; - - // Count - size_t rangeNum = 0; - size_t dynamicConstantBufferNum = 0; - for (uint32_t i = 0; i < pipelineLayoutDesc.descriptorSetNum; i++) { - rangeNum += pipelineLayoutDesc.descriptorSets[i].rangeNum; - dynamicConstantBufferNum += pipelineLayoutDesc.descriptorSets[i].dynamicConstantBufferNum; - } - - // Copy descriptor set descs with dependencies - m_BindingInfo.descriptorSetDescs.insert(m_BindingInfo.descriptorSetDescs.begin(), pipelineLayoutDesc.descriptorSets, pipelineLayoutDesc.descriptorSets + pipelineLayoutDesc.descriptorSetNum); - m_BindingInfo.hasVariableDescriptorNum.resize(pipelineLayoutDesc.descriptorSetNum); - m_BindingInfo.descriptorSetRangeDescs.reserve(rangeNum); - m_BindingInfo.dynamicConstantBufferDescs.reserve(dynamicConstantBufferNum); - - for (uint32_t i = 0; i < pipelineLayoutDesc.descriptorSetNum; i++) { - const DescriptorSetDesc& descriptorSetDesc = pipelineLayoutDesc.descriptorSets[i]; - - m_BindingInfo.hasVariableDescriptorNum[i] = false; - m_BindingInfo.descriptorSetDescs[i].ranges = m_BindingInfo.descriptorSetRangeDescs.data() + m_BindingInfo.descriptorSetRangeDescs.size(); - m_BindingInfo.descriptorSetDescs[i].dynamicConstantBuffers = m_BindingInfo.dynamicConstantBufferDescs.data() + m_BindingInfo.dynamicConstantBufferDescs.size(); - - // Copy descriptor range descs - m_BindingInfo.descriptorSetRangeDescs.insert(m_BindingInfo.descriptorSetRangeDescs.end(), descriptorSetDesc.ranges, descriptorSetDesc.ranges + descriptorSetDesc.rangeNum); - - // Fix descriptor range binding offsets and check for variable descriptor num - DescriptorRangeDesc* ranges = const_cast(m_BindingInfo.descriptorSetDescs[i].ranges); - for (uint32_t j = 0; j < descriptorSetDesc.rangeNum; j++) { - ranges[j].baseRegisterIndex += bindingOffsets[(uint32_t)descriptorSetDesc.ranges[j].descriptorType]; - - if (m_Device.m_IsSupported.descriptorIndexing && (descriptorSetDesc.ranges[j].flags & DescriptorRangeBits::VARIABLE_SIZED_ARRAY)) - m_BindingInfo.hasVariableDescriptorNum[i] = true; - } - - // Copy dynamic constant buffer descs - m_BindingInfo.dynamicConstantBufferDescs.insert(m_BindingInfo.dynamicConstantBufferDescs.end(), descriptorSetDesc.dynamicConstantBuffers, descriptorSetDesc.dynamicConstantBuffers + descriptorSetDesc.dynamicConstantBufferNum); - - // Copy dynamic constant buffer binding offsets - DynamicConstantBufferDesc* dynamicConstantBuffers = const_cast(m_BindingInfo.descriptorSetDescs[i].dynamicConstantBuffers); - for (uint32_t j = 0; j < descriptorSetDesc.dynamicConstantBufferNum; j++) - dynamicConstantBuffers[j].registerIndex += bindingOffsets[(uint32_t)DescriptorType::CONSTANT_BUFFER]; - } - - // Copy root constant bindings - m_BindingInfo.pushConstantBindings.resize(pipelineLayoutDesc.rootConstantNum); - for (uint32_t i = 0, offset = 0; i < pipelineLayoutDesc.rootConstantNum; i++) { - m_BindingInfo.pushConstantBindings[i] = {GetShaderStageFlags(pipelineLayoutDesc.rootConstants[i].shaderStages), offset}; - offset += pipelineLayoutDesc.rootConstants[i].size; - } - - // Copy root descriptor bindings - for (uint32_t i = 0; i < pipelineLayoutDesc.rootDescriptorSetNum; i++) { - const RootDescriptorSetDesc& rootDescriptorDesc = pipelineLayoutDesc.rootDescriptorSets[i]; - uint32_t registerIndex = rootDescriptorDesc.registerIndex + bindingOffsets[(uint32_t)rootDescriptorDesc.descriptorType]; - - m_BindingInfo.pushDescriptorBindings.push_back({rootDescriptorDesc.registerSpace, registerIndex}); - } -} - NRI_INLINE void PipelineLayoutVK::SetDebugName(const char* name) { m_Device.SetDebugNameToTrivialObject(VK_OBJECT_TYPE_PIPELINE_LAYOUT, (uint64_t)m_Handle, name); } diff --git a/Source/VK/PipelineVK.h b/Source/VK/PipelineVK.h index 8b68a61e..8f7e05d4 100644 --- a/Source/VK/PipelineVK.h +++ b/Source/VK/PipelineVK.h @@ -43,7 +43,7 @@ struct PipelineVK { Result WriteShaderGroupIdentifiers(uint32_t baseShaderGroupIndex, uint32_t shaderGroupNum, void* buffer) const; private: - Result SetupShaderStage(VkPipelineShaderStageCreateInfo& stage, const ShaderDesc& shaderDesc, VkShaderModule*& modules); + Result SetupShaderStage(VkPipelineShaderStageCreateInfo& stage, const ShaderDesc& shaderDesc, VkShaderModule& module); private: DeviceVK& m_Device; diff --git a/Source/VK/PipelineVK.hpp b/Source/VK/PipelineVK.hpp index 19f769b0..bc5508c2 100644 --- a/Source/VK/PipelineVK.hpp +++ b/Source/VK/PipelineVK.hpp @@ -15,13 +15,12 @@ Result PipelineVK::Create(const GraphicsPipelineDesc& graphicsPipelineDesc) { m_BindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; // Shaders - VkPipelineShaderStageCreateInfo* stages = StackAlloc(VkPipelineShaderStageCreateInfo, graphicsPipelineDesc.shaderNum); - VkShaderModule* modules = StackAlloc(VkShaderModule, graphicsPipelineDesc.shaderNum); - VkShaderModule* modulesBegin = modules; + Scratch stages = AllocateScratch(m_Device, VkPipelineShaderStageCreateInfo, graphicsPipelineDesc.shaderNum); + Scratch modules = AllocateScratch(m_Device, VkShaderModule, graphicsPipelineDesc.shaderNum); for (uint32_t i = 0; i < graphicsPipelineDesc.shaderNum; i++) { const ShaderDesc& shaderDesc = graphicsPipelineDesc.shaders[i]; - Result res = SetupShaderStage(stages[i], shaderDesc, modules); + Result res = SetupShaderStage(stages[i], shaderDesc, modules[i]); if (res != Result::SUCCESS) return res; @@ -30,24 +29,27 @@ Result PipelineVK::Create(const GraphicsPipelineDesc& graphicsPipelineDesc) { // Vertex input const VertexInputDesc* vi = graphicsPipelineDesc.vertexInput; + uint32_t attributeNum = vi ? vi->attributeNum : 0u; + uint32_t streamNum = vi ? vi->streamNum : 0u; + + Scratch inputAttribs = AllocateScratch(m_Device, VkVertexInputAttributeDescription, attributeNum); + Scratch vertexBindings = AllocateScratch(m_Device, VkVertexInputBindingDescription, streamNum); VkPipelineVertexInputStateCreateInfo vertexInputState = {VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO}; - vertexInputState.pVertexAttributeDescriptions = StackAlloc(VkVertexInputAttributeDescription, vi ? vi->attributeNum : 0); - vertexInputState.pVertexBindingDescriptions = StackAlloc(VkVertexInputBindingDescription, vi ? vi->streamNum : 0); + vertexInputState.pVertexAttributeDescriptions = inputAttribs; + vertexInputState.pVertexBindingDescriptions = vertexBindings; if (vi) { vertexInputState.vertexAttributeDescriptionCount = vi->attributeNum; vertexInputState.vertexBindingDescriptionCount = vi->streamNum; - VkVertexInputAttributeDescription* attributes = const_cast(vertexInputState.pVertexAttributeDescriptions); for (uint32_t i = 0; i < vi->attributeNum; i++) { const VertexAttributeDesc& attribute_desc = vi->attributes[i]; - attributes[i] = {(uint32_t)i, attribute_desc.streamIndex, GetVkFormat(attribute_desc.format), attribute_desc.offset}; + inputAttribs[i] = {(uint32_t)i, attribute_desc.streamIndex, GetVkFormat(attribute_desc.format), attribute_desc.offset}; } - VkVertexInputBindingDescription* streams = const_cast(vertexInputState.pVertexBindingDescriptions); for (uint32_t i = 0; i < vi->streamNum; i++) { const VertexStreamDesc& stream = vi->streams[i]; - streams[i] = {stream.bindingSlot, stream.stride, (stream.stepRate == VertexStreamStepRate::PER_VERTEX) ? VK_VERTEX_INPUT_RATE_VERTEX : VK_VERTEX_INPUT_RATE_INSTANCE}; + vertexBindings[i] = {stream.bindingSlot, stream.stride, (stream.stepRate == VertexStreamStepRate::PER_VERTEX) ? VK_VERTEX_INPUT_RATE_VERTEX : VK_VERTEX_INPUT_RATE_INSTANCE}; } } @@ -149,12 +151,13 @@ Result PipelineVK::Create(const GraphicsPipelineDesc& graphicsPipelineDesc) { // Blending const OutputMergerDesc& om = graphicsPipelineDesc.outputMerger; + Scratch scratch = AllocateScratch(m_Device, VkPipelineColorBlendAttachmentState, om.colorNum); VkPipelineColorBlendStateCreateInfo colorBlendState = {VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO}; colorBlendState.logicOpEnable = om.logicFunc != LogicFunc::NONE ? VK_TRUE : VK_FALSE; colorBlendState.logicOp = GetLogicOp(om.logicFunc); colorBlendState.attachmentCount = om.colorNum; - colorBlendState.pAttachments = StackAlloc(VkPipelineColorBlendAttachmentState, om.colorNum); + colorBlendState.pAttachments = scratch; bool isConstantColorReferenced = false; VkPipelineColorBlendAttachmentState* attachments = const_cast(colorBlendState.pAttachments); @@ -177,7 +180,7 @@ Result PipelineVK::Create(const GraphicsPipelineDesc& graphicsPipelineDesc) { } // Formats - VkFormat* colorFormats = StackAlloc(VkFormat, om.colorNum); + Scratch colorFormats = AllocateScratch(m_Device, VkFormat, om.colorNum); for (uint32_t i = 0; i < om.colorNum; i++) colorFormats[i] = GetVkFormat(om.colors[i].format); @@ -217,7 +220,7 @@ Result PipelineVK::Create(const GraphicsPipelineDesc& graphicsPipelineDesc) { const PipelineLayoutVK& pipelineLayoutVK = *(const PipelineLayoutVK*)graphicsPipelineDesc.pipelineLayout; - const VkGraphicsPipelineCreateInfo info = { + VkGraphicsPipelineCreateInfo info = { VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, &pipelineRenderingCreateInfo, flags, @@ -244,7 +247,7 @@ Result PipelineVK::Create(const GraphicsPipelineDesc& graphicsPipelineDesc) { RETURN_ON_FAILURE(&m_Device, vkResult == VK_SUCCESS, GetReturnCode(vkResult), "vkCreateGraphicsPipelines returned %d", (int32_t)vkResult); for (size_t i = 0; i < graphicsPipelineDesc.shaderNum; i++) - vk.DestroyShaderModule(m_Device, modulesBegin[i], m_Device.GetAllocationCallbacks()); + vk.DestroyShaderModule(m_Device, modules[i], m_Device.GetAllocationCallbacks()); return Result::SUCCESS; } @@ -277,10 +280,17 @@ Result PipelineVK::Create(const ComputePipelineDesc& computePipelineDesc) { nullptr, }; - const VkComputePipelineCreateInfo info = {VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, nullptr, (VkPipelineCreateFlags)0, stage, pipelineLayoutVK, VK_NULL_HANDLE, -1}; + const VkComputePipelineCreateInfo info = { + VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + nullptr, + (VkPipelineCreateFlags)0, + stage, + pipelineLayoutVK, + VK_NULL_HANDLE, + -1, + }; result = vk.CreateComputePipelines(m_Device, VK_NULL_HANDLE, 1, &info, m_Device.GetAllocationCallbacks(), &m_Handle); - RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkCreateComputePipelines returned %d", (int32_t)result); vk.DestroyShaderModule(m_Device, module, m_Device.GetAllocationCallbacks()); @@ -294,20 +304,19 @@ Result PipelineVK::Create(const RayTracingPipelineDesc& rayTracingPipelineDesc) const PipelineLayoutVK& pipelineLayoutVK = *(const PipelineLayoutVK*)rayTracingPipelineDesc.pipelineLayout; const uint32_t stageNum = rayTracingPipelineDesc.shaderLibrary->shaderNum; - VkPipelineShaderStageCreateInfo* stages = StackAlloc(VkPipelineShaderStageCreateInfo, stageNum); - VkShaderModule* modules = StackAlloc(VkShaderModule, stageNum); - VkShaderModule* modulesBegin = modules; + Scratch stages = AllocateScratch(m_Device, VkPipelineShaderStageCreateInfo, stageNum); + Scratch modules = AllocateScratch(m_Device, VkShaderModule, stageNum); for (uint32_t i = 0; i < stageNum; i++) { const ShaderDesc& shaderDesc = rayTracingPipelineDesc.shaderLibrary->shaders[i]; - Result result = SetupShaderStage(stages[i], shaderDesc, modules); + Result result = SetupShaderStage(stages[i], shaderDesc, modules[i]); if (result != Result::SUCCESS) return result; stages[i].pName = shaderDesc.entryPointName ? shaderDesc.entryPointName : "main"; } - VkRayTracingShaderGroupCreateInfoKHR* groupArray = StackAlloc(VkRayTracingShaderGroupCreateInfoKHR, rayTracingPipelineDesc.shaderGroupDescNum); + Scratch groupArray = AllocateScratch(m_Device, VkRayTracingShaderGroupCreateInfoKHR, rayTracingPipelineDesc.shaderGroupDescNum); for (uint32_t i = 0; i < rayTracingPipelineDesc.shaderGroupDescNum; i++) { const ShaderGroupDesc& srcGroup = rayTracingPipelineDesc.shaderGroupDescs[i]; @@ -371,7 +380,7 @@ Result PipelineVK::Create(const RayTracingPipelineDesc& rayTracingPipelineDesc) RETURN_ON_FAILURE(&m_Device, vkResult == VK_SUCCESS, GetReturnCode(vkResult), "vkCreateRayTracingPipelinesKHR returned %d", (int32_t)vkResult); for (size_t i = 0; i < stageNum; i++) - vk.DestroyShaderModule(m_Device, modulesBegin[i], m_Device.GetAllocationCallbacks()); + vk.DestroyShaderModule(m_Device, modules[i], m_Device.GetAllocationCallbacks()); return Result::SUCCESS; } @@ -387,17 +396,19 @@ Result PipelineVK::Create(VkPipelineBindPoint bindPoint, VKNonDispatchableHandle return Result::SUCCESS; } -Result PipelineVK::SetupShaderStage(VkPipelineShaderStageCreateInfo& stage, const ShaderDesc& shaderDesc, VkShaderModule*& modules) { +Result PipelineVK::SetupShaderStage(VkPipelineShaderStageCreateInfo& stage, const ShaderDesc& shaderDesc, VkShaderModule& module) { const VkShaderModuleCreateInfo moduleInfo = { - VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, nullptr, (VkShaderModuleCreateFlags)0, (size_t)shaderDesc.size, (const uint32_t*)shaderDesc.bytecode}; + VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + nullptr, + (VkShaderModuleCreateFlags)0, + (size_t)shaderDesc.size, + (const uint32_t*)shaderDesc.bytecode, + }; - VkShaderModule module = VK_NULL_HANDLE; const auto& vk = m_Device.GetDispatchTable(); VkResult result = vk.CreateShaderModule(m_Device, &moduleInfo, m_Device.GetAllocationCallbacks(), &module); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkCreateShaderModule returned %d", (int32_t)result); - *(modules++) = module; - stage = { VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, nullptr, diff --git a/Source/VK/SwapChainVK.hpp b/Source/VK/SwapChainVK.hpp index 8e0918aa..5c2759aa 100644 --- a/Source/VK/SwapChainVK.hpp +++ b/Source/VK/SwapChainVK.hpp @@ -122,9 +122,11 @@ Result SwapChainVK::Create(const SwapChainDesc& swapChainDesc) { VkSurfaceCapabilities2KHR sc = {VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_KHR}; + std::array presentModes = {}; + VkLatencySurfaceCapabilitiesNV latencySurfaceCapabilities = {VK_STRUCTURE_TYPE_LATENCY_SURFACE_CAPABILITIES_NV}; - latencySurfaceCapabilities.presentModeCount = 8; - latencySurfaceCapabilities.pPresentModes = StackAlloc(VkPresentModeKHR, latencySurfaceCapabilities.presentModeCount); + latencySurfaceCapabilities.presentModeCount = (uint32_t)presentModes.size(); + latencySurfaceCapabilities.pPresentModes = presentModes.data(); if (m_Device.m_IsSupported.lowLatency) sc.pNext = &latencySurfaceCapabilities; @@ -152,7 +154,7 @@ Result SwapChainVK::Create(const SwapChainDesc& swapChainDesc) { VkResult result = vk.GetPhysicalDeviceSurfaceFormatsKHR(m_Device, m_Surface, &formatNum, nullptr); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkGetPhysicalDeviceSurfaceFormatsKHR returned %d", (int32_t)result); - VkSurfaceFormatKHR* surfaceFormats = StackAlloc(VkSurfaceFormatKHR, formatNum); + Scratch surfaceFormats = AllocateScratch(m_Device, VkSurfaceFormatKHR, formatNum); result = vk.GetPhysicalDeviceSurfaceFormatsKHR(m_Device, m_Surface, &formatNum, surfaceFormats); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkGetPhysicalDeviceSurfaceFormatsKHR returned %d", (int32_t)result); @@ -177,7 +179,7 @@ Result SwapChainVK::Create(const SwapChainDesc& swapChainDesc) { VkPresentModeKHR presentMode = VK_PRESENT_MODE_IMMEDIATE_KHR; { uint32_t presentModeNum = 8; - VkPresentModeKHR* presentModes = StackAlloc(VkPresentModeKHR, presentModeNum); + Scratch presentModes = AllocateScratch(m_Device, VkPresentModeKHR, presentModeNum); VkResult result = vk.GetPhysicalDeviceSurfacePresentModesKHR(m_Device, m_Surface, &presentModeNum, presentModes); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkGetPhysicalDeviceSurfacePresentModesKHR returned %d", (int32_t)result); @@ -269,7 +271,7 @@ Result SwapChainVK::Create(const SwapChainDesc& swapChainDesc) { uint32_t imageNum = 0; vk.GetSwapchainImagesKHR(m_Device, m_Handle, &imageNum, nullptr); - VkImage* imageHandles = StackAlloc(VkImage, imageNum); + Scratch imageHandles = AllocateScratch(m_Device, VkImage, imageNum); vk.GetSwapchainImagesKHR(m_Device, m_Handle, &imageNum, imageHandles); m_Textures.resize(imageNum); diff --git a/Source/VK/TextureVK.hpp b/Source/VK/TextureVK.hpp index 73864c97..25483892 100644 --- a/Source/VK/TextureVK.hpp +++ b/Source/VK/TextureVK.hpp @@ -12,15 +12,15 @@ TextureVK::~TextureVK() { } Result TextureVK::Create(const TextureDesc& textureDesc) { + m_Desc = FixTextureDesc(textureDesc); + VkImageCreateInfo info = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO}; - m_Device.FillCreateInfo(textureDesc, info); + m_Device.FillCreateInfo(m_Desc, info); const auto& vk = m_Device.GetDispatchTable(); VkResult result = vk.CreateImage(m_Device, &info, m_Device.GetAllocationCallbacks(), &m_Handle); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkCreateImage returned %d", (int32_t)result); - m_Desc = textureDesc; - return Result::SUCCESS; } diff --git a/Source/Validation/CommandBufferVal.hpp b/Source/Validation/CommandBufferVal.hpp index 08515b14..710e8959 100644 --- a/Source/Validation/CommandBufferVal.hpp +++ b/Source/Validation/CommandBufferVal.hpp @@ -139,18 +139,18 @@ NRI_INLINE void CommandBufferVal::ClearAttachments(const ClearDesc* clearDescs, const DeviceDesc& deviceDesc = m_Device.GetDesc(); for (uint32_t i = 0; i < clearDescNum; i++) { - RETURN_ON_FAILURE(&m_Device, (clearDescs[i].planes & (PlaneBits::COLOR | PlaneBits::DEPTH | PlaneBits::STENCIL)) != 0, ReturnVoid(), "'clearDesc[%u].planes' is not COLOR, DEPTH or STENCIL", i); + RETURN_ON_FAILURE(&m_Device, (clearDescs[i].planes & (PlaneBits::COLOR | PlaneBits::DEPTH | PlaneBits::STENCIL)) != 0, ReturnVoid(), "'[%u].planes' is not COLOR, DEPTH or STENCIL", i); if (clearDescs[i].planes & PlaneBits::COLOR) { - RETURN_ON_FAILURE(&m_Device, clearDescs[i].colorAttachmentIndex < deviceDesc.colorAttachmentMaxNum, ReturnVoid(), "'clearDesc[%u].colorAttachmentIndex = %u' is out of bounds", i, clearDescs[i].colorAttachmentIndex); - RETURN_ON_FAILURE(&m_Device, m_RenderTargets[clearDescs[i].colorAttachmentIndex], ReturnVoid(), "'clearDesc[%u].colorAttachmentIndex = %u' references a NULL COLOR attachment", i, clearDescs[i].colorAttachmentIndex); + RETURN_ON_FAILURE(&m_Device, clearDescs[i].colorAttachmentIndex < deviceDesc.colorAttachmentMaxNum, ReturnVoid(), "'[%u].colorAttachmentIndex = %u' is out of bounds", i, clearDescs[i].colorAttachmentIndex); + RETURN_ON_FAILURE(&m_Device, m_RenderTargets[clearDescs[i].colorAttachmentIndex], ReturnVoid(), "'[%u].colorAttachmentIndex = %u' references a NULL COLOR attachment", i, clearDescs[i].colorAttachmentIndex); } if (clearDescs[i].planes & (PlaneBits::DEPTH | PlaneBits::STENCIL)) RETURN_ON_FAILURE(&m_Device, m_DepthStencil, ReturnVoid(), "DEPTH_STENCIL attachment is NULL", i); if (clearDescs[i].colorAttachmentIndex != 0) - RETURN_ON_FAILURE(&m_Device, (clearDescs[i].planes & PlaneBits::COLOR), ReturnVoid(), "'clearDesc[%u].planes' is not COLOR, but `colorAttachmentIndex != 0`", i); + RETURN_ON_FAILURE(&m_Device, (clearDescs[i].planes & PlaneBits::COLOR), ReturnVoid(), "'[%u].planes' is not COLOR, but `colorAttachmentIndex != 0`", i); } GetCoreInterface().CmdClearAttachments(*GetImpl(), clearDescs, clearDescNum, rects, rectNum); @@ -159,7 +159,7 @@ NRI_INLINE void CommandBufferVal::ClearAttachments(const ClearDesc* clearDescs, NRI_INLINE void CommandBufferVal::ClearStorageBuffer(const ClearStorageBufferDesc& clearDesc) { RETURN_ON_FAILURE(&m_Device, m_IsRecordingStarted, ReturnVoid(), "the command buffer must be in the recording state"); RETURN_ON_FAILURE(&m_Device, !m_IsRenderPass, ReturnVoid(), "must be called outside of 'CmdBeginRendering/CmdEndRendering'"); - RETURN_ON_FAILURE(&m_Device, clearDesc.storageBuffer, ReturnVoid(), "'clearDesc.storageBuffer' is NULL"); + RETURN_ON_FAILURE(&m_Device, clearDesc.storageBuffer, ReturnVoid(), "'.storageBuffer' is NULL"); auto clearDescImpl = clearDesc; clearDescImpl.storageBuffer = NRI_GET_IMPL(Descriptor, clearDesc.storageBuffer); @@ -170,7 +170,7 @@ NRI_INLINE void CommandBufferVal::ClearStorageBuffer(const ClearStorageBufferDes NRI_INLINE void CommandBufferVal::ClearStorageTexture(const ClearStorageTextureDesc& clearDesc) { RETURN_ON_FAILURE(&m_Device, m_IsRecordingStarted, ReturnVoid(), "the command buffer must be in the recording state"); RETURN_ON_FAILURE(&m_Device, !m_IsRenderPass, ReturnVoid(), "must be called outside of 'CmdBeginRendering/CmdEndRendering'"); - RETURN_ON_FAILURE(&m_Device, clearDesc.storageTexture, ReturnVoid(), "'clearDesc.storageTexture' is NULL"); + RETURN_ON_FAILURE(&m_Device, clearDesc.storageTexture, ReturnVoid(), "'.storageTexture' is NULL"); auto clearDescImpl = clearDesc; clearDescImpl.storageTexture = NRI_GET_IMPL(Descriptor, clearDesc.storageTexture); @@ -186,7 +186,7 @@ NRI_INLINE void CommandBufferVal::BeginRendering(const AttachmentsDesc& attachme if (attachmentsDesc.shadingRate) RETURN_ON_FAILURE(&m_Device, deviceDesc.shadingRateTier, ReturnVoid(), "'shadingRateTier >= 2' required"); - Descriptor** colors = StackAlloc(Descriptor*, attachmentsDesc.colorNum); + Scratch colors = AllocateScratch(m_Device, Descriptor*, attachmentsDesc.colorNum); for (uint32_t i = 0; i < attachmentsDesc.colorNum; i++) colors[i] = NRI_GET_IMPL(Descriptor, attachmentsDesc.colors[i]); @@ -229,7 +229,7 @@ NRI_INLINE void CommandBufferVal::EndRendering() { NRI_INLINE void CommandBufferVal::SetVertexBuffers(uint32_t baseSlot, uint32_t bufferNum, const Buffer* const* buffers, const uint64_t* offsets) { RETURN_ON_FAILURE(&m_Device, m_IsRecordingStarted, ReturnVoid(), "the command buffer must be in the recording state"); - Buffer** buffersImpl = StackAlloc(Buffer*, bufferNum); + Scratch buffersImpl = AllocateScratch(m_Device, Buffer*, bufferNum); for (uint32_t i = 0; i < bufferNum; i++) buffersImpl[i] = NRI_GET_IMPL(Buffer, buffers[i]); @@ -414,12 +414,12 @@ NRI_INLINE void CommandBufferVal::Barrier(const BarrierGroupDesc& barrierGroupDe return; } - BufferBarrierDesc* buffers = StackAlloc(BufferBarrierDesc, barrierGroupDesc.bufferNum); + Scratch buffers = AllocateScratch(m_Device, BufferBarrierDesc, barrierGroupDesc.bufferNum); memcpy(buffers, barrierGroupDesc.buffers, sizeof(BufferBarrierDesc) * barrierGroupDesc.bufferNum); for (uint32_t i = 0; i < barrierGroupDesc.bufferNum; i++) buffers[i].buffer = NRI_GET_IMPL(Buffer, barrierGroupDesc.buffers[i].buffer); - TextureBarrierDesc* textures = StackAlloc(TextureBarrierDesc, barrierGroupDesc.textureNum); + Scratch textures = AllocateScratch(m_Device, TextureBarrierDesc, barrierGroupDesc.textureNum); memcpy(textures, barrierGroupDesc.textures, sizeof(TextureBarrierDesc) * barrierGroupDesc.textureNum); for (uint32_t i = 0; i < barrierGroupDesc.textureNum; i++) textures[i].texture = NRI_GET_IMPL(Texture, barrierGroupDesc.textures[i].texture); @@ -607,7 +607,7 @@ NRI_INLINE void CommandBufferVal::WriteAccelerationStructureSize(const Accelerat RETURN_ON_FAILURE(&m_Device, !m_IsRenderPass, ReturnVoid(), "must be called outside of 'CmdBeginRendering/CmdEndRendering'"); RETURN_ON_FAILURE(&m_Device, accelerationStructures, ReturnVoid(), "'accelerationStructures' is NULL"); - AccelerationStructure** accelerationStructureArray = StackAlloc(AccelerationStructure*, accelerationStructureNum); + Scratch accelerationStructureArray = AllocateScratch(m_Device, AccelerationStructure*, accelerationStructureNum); for (uint32_t i = 0; i < accelerationStructureNum; i++) { RETURN_ON_FAILURE(&m_Device, accelerationStructures[i], ReturnVoid(), "'accelerationStructures[%u]' is NULL", i); @@ -624,12 +624,12 @@ NRI_INLINE void CommandBufferVal::DispatchRays(const DispatchRaysDesc& dispatchR uint64_t align = deviceDesc.rayTracingShaderTableAlignment; RETURN_ON_FAILURE(&m_Device, m_IsRecordingStarted, ReturnVoid(), "the command buffer must be in the recording state"); RETURN_ON_FAILURE(&m_Device, !m_IsRenderPass, ReturnVoid(), "must be called outside of 'CmdBeginRendering/CmdEndRendering'"); - RETURN_ON_FAILURE(&m_Device, dispatchRaysDesc.raygenShader.buffer, ReturnVoid(), "'dispatchRaysDesc.raygenShader.buffer' is NULL"); - RETURN_ON_FAILURE(&m_Device, dispatchRaysDesc.raygenShader.size != 0, ReturnVoid(), "'dispatchRaysDesc.raygenShader.size' is 0"); - RETURN_ON_FAILURE(&m_Device, dispatchRaysDesc.raygenShader.offset % align == 0, ReturnVoid(), "'dispatchRaysDesc.raygenShader.offset' is misaligned"); - RETURN_ON_FAILURE(&m_Device, dispatchRaysDesc.missShaders.offset % align == 0, ReturnVoid(), "'dispatchRaysDesc.missShaders.offset' is misaligned"); - RETURN_ON_FAILURE(&m_Device, dispatchRaysDesc.hitShaderGroups.offset % align == 0, ReturnVoid(), "'dispatchRaysDesc.hitShaderGroups.offset' is misaligned"); - RETURN_ON_FAILURE(&m_Device, dispatchRaysDesc.callableShaders.offset % align == 0, ReturnVoid(), "'dispatchRaysDesc.callableShaders.offset' is misaligned"); + RETURN_ON_FAILURE(&m_Device, dispatchRaysDesc.raygenShader.buffer, ReturnVoid(), "'raygenShader.buffer' is NULL"); + RETURN_ON_FAILURE(&m_Device, dispatchRaysDesc.raygenShader.size != 0, ReturnVoid(), "'raygenShader.size' is 0"); + RETURN_ON_FAILURE(&m_Device, dispatchRaysDesc.raygenShader.offset % align == 0, ReturnVoid(), "'raygenShader.offset' is misaligned"); + RETURN_ON_FAILURE(&m_Device, dispatchRaysDesc.missShaders.offset % align == 0, ReturnVoid(), "'missShaders.offset' is misaligned"); + RETURN_ON_FAILURE(&m_Device, dispatchRaysDesc.hitShaderGroups.offset % align == 0, ReturnVoid(), "'hitShaderGroups.offset' is misaligned"); + RETURN_ON_FAILURE(&m_Device, dispatchRaysDesc.callableShaders.offset % align == 0, ReturnVoid(), "'callableShaders.offset' is misaligned"); auto dispatchRaysDescImpl = dispatchRaysDesc; dispatchRaysDescImpl.raygenShader.buffer = NRI_GET_IMPL(Buffer, dispatchRaysDesc.raygenShader.buffer); diff --git a/Source/Validation/CommandQueueVal.hpp b/Source/Validation/CommandQueueVal.hpp index e493b0e3..49d7a384 100644 --- a/Source/Validation/CommandQueueVal.hpp +++ b/Source/Validation/CommandQueueVal.hpp @@ -57,19 +57,19 @@ NRI_INLINE void CommandQueueVal::Submit(const QueueSubmitDesc& queueSubmitDesc, auto queueSubmitDescImpl = queueSubmitDesc; - FenceSubmitDesc* waitFences = StackAlloc(FenceSubmitDesc, queueSubmitDesc.waitFenceNum); + Scratch waitFences = AllocateScratch(m_Device, FenceSubmitDesc, queueSubmitDesc.waitFenceNum); for (uint32_t i = 0; i < queueSubmitDesc.waitFenceNum; i++) { waitFences[i] = queueSubmitDesc.waitFences[i]; waitFences[i].fence = NRI_GET_IMPL(Fence, waitFences[i].fence); } queueSubmitDescImpl.waitFences = waitFences; - CommandBuffer** commandBuffers = StackAlloc(CommandBuffer*, queueSubmitDesc.commandBufferNum); + Scratch commandBuffers = AllocateScratch(m_Device, CommandBuffer*, queueSubmitDesc.commandBufferNum); for (uint32_t i = 0; i < queueSubmitDesc.commandBufferNum; i++) commandBuffers[i] = NRI_GET_IMPL(CommandBuffer, queueSubmitDesc.commandBuffers[i]); queueSubmitDescImpl.commandBuffers = commandBuffers; - FenceSubmitDesc* signalFences = StackAlloc(FenceSubmitDesc, queueSubmitDesc.signalFenceNum); + Scratch signalFences = AllocateScratch(m_Device, FenceSubmitDesc, queueSubmitDesc.signalFenceNum); for (uint32_t i = 0; i < queueSubmitDesc.signalFenceNum; i++) { signalFences[i] = queueSubmitDesc.signalFences[i]; signalFences[i].fence = NRI_GET_IMPL(Fence, signalFences[i].fence); @@ -87,8 +87,7 @@ NRI_INLINE Result CommandQueueVal::UploadData(const TextureUploadDesc* textureUp RETURN_ON_FAILURE(&m_Device, textureUploadDescNum == 0 || textureUploadDescs != nullptr, Result::INVALID_ARGUMENT, "'textureUploadDescs' is NULL"); RETURN_ON_FAILURE(&m_Device, bufferUploadDescNum == 0 || bufferUploadDescs != nullptr, Result::INVALID_ARGUMENT, "'bufferUploadDescs' is NULL"); - TextureUploadDesc* textureUploadDescsImpl = StackAlloc(TextureUploadDesc, textureUploadDescNum); - + Scratch textureUploadDescsImpl = AllocateScratch(m_Device, TextureUploadDesc, textureUploadDescNum); for (uint32_t i = 0; i < textureUploadDescNum; i++) { if (!ValidateTextureUploadDesc(m_Device, i, textureUploadDescs[i])) return Result::INVALID_ARGUMENT; @@ -99,8 +98,7 @@ NRI_INLINE Result CommandQueueVal::UploadData(const TextureUploadDesc* textureUp textureUploadDescsImpl[i].texture = textureVal->GetImpl(); } - BufferUploadDesc* bufferUploadDescsImpl = StackAlloc(BufferUploadDesc, bufferUploadDescNum); - + Scratch bufferUploadDescsImpl = AllocateScratch(m_Device, BufferUploadDesc, bufferUploadDescNum); for (uint32_t i = 0; i < bufferUploadDescNum; i++) { if (!ValidateBufferUploadDesc(m_Device, i, bufferUploadDescs[i])) return Result::INVALID_ARGUMENT; @@ -137,7 +135,7 @@ void CommandQueueVal::ProcessValidationCommandBeginQuery(const uint8_t*& begin, const bool used = queryPool.SetQueryState(command->queryPoolOffset, true); if (used) - REPORT_ERROR(&m_Device, "it must be reset before use. (QueryPool='%s', offset=%u)", queryPool.GetDebugName(), command->queryPoolOffset); + REPORT_ERROR(&m_Device, "QueryPool='%s' (offset=%u) must be reset before use", queryPool.GetDebugName(), command->queryPoolOffset); } void CommandQueueVal::ProcessValidationCommandEndQuery(const uint8_t*& begin, const uint8_t* end) { @@ -150,10 +148,10 @@ void CommandQueueVal::ProcessValidationCommandEndQuery(const uint8_t*& begin, co if (queryPool.GetQueryType() == QueryType::TIMESTAMP) { if (used) - REPORT_ERROR(&m_Device, "it must be reset before use. (QueryPool='%s', offset=%u)", queryPool.GetDebugName(), command->queryPoolOffset); + REPORT_ERROR(&m_Device, "QueryPool='%s' (offset=%u) must be reset before use", queryPool.GetDebugName(), command->queryPoolOffset); } else { if (!used) - REPORT_ERROR(&m_Device, "it's not in active state. (QueryPool='%s', offset=%u)", queryPool.GetDebugName(), command->queryPoolOffset); + REPORT_ERROR(&m_Device, "QueryPool='%s' (offset=%u) is not in active state", queryPool.GetDebugName(), command->queryPoolOffset); } } diff --git a/Source/Validation/DescriptorPoolVal.hpp b/Source/Validation/DescriptorPoolVal.hpp index 568da419..719399d4 100644 --- a/Source/Validation/DescriptorPoolVal.hpp +++ b/Source/Validation/DescriptorPoolVal.hpp @@ -3,7 +3,7 @@ bool DescriptorPoolVal::CheckDescriptorRange(const DescriptorRangeDesc& rangeDesc, uint32_t variableDescriptorNum) { const uint32_t descriptorNum = (rangeDesc.flags & DescriptorRangeBits::VARIABLE_SIZED_ARRAY) ? variableDescriptorNum : rangeDesc.descriptorNum; if (descriptorNum > rangeDesc.descriptorNum) { - REPORT_ERROR(&m_Device, "variableDescriptorNum (%u) is greater than DescriptorRangeDesc::descriptorNum (%u)", variableDescriptorNum, rangeDesc.descriptorNum); + REPORT_ERROR(&m_Device, "'variableDescriptorNum=%u' is greater than 'descriptorNum=%u'", variableDescriptorNum, rangeDesc.descriptorNum); return false; } diff --git a/Source/Validation/DescriptorSetVal.hpp b/Source/Validation/DescriptorSetVal.hpp index 46a81c19..710ead9c 100644 --- a/Source/Validation/DescriptorSetVal.hpp +++ b/Source/Validation/DescriptorSetVal.hpp @@ -6,44 +6,38 @@ NRI_INLINE void DescriptorSetVal::SetDebugName(const char* name) { } NRI_INLINE void DescriptorSetVal::UpdateDescriptorRanges(uint32_t rangeOffset, uint32_t rangeNum, const DescriptorRangeUpdateDesc* rangeUpdateDescs) { - if (rangeNum == 0) - return; - - RETURN_ON_FAILURE(&m_Device, rangeUpdateDescs != nullptr, ReturnVoid(), "'rangeUpdateDescs' is NULL"); - RETURN_ON_FAILURE(&m_Device, rangeOffset < GetDesc().rangeNum, ReturnVoid(), "'rangeOffset' is out of bounds. (rangeOffset=%u, rangeNum=%u)", rangeOffset, GetDesc().rangeNum); + RETURN_ON_FAILURE(&m_Device, rangeOffset < GetDesc().rangeNum, ReturnVoid(), "'rangeOffset=%u' is out of 'rangeNum=%u' in the set", rangeOffset, GetDesc().rangeNum); + RETURN_ON_FAILURE(&m_Device, rangeOffset + rangeNum <= GetDesc().rangeNum, ReturnVoid(), "'rangeOffset=%u' + 'rangeNum=%u' is greater than 'rangeNum=%u' in the set", rangeOffset, rangeNum, GetDesc().rangeNum); - RETURN_ON_FAILURE(&m_Device, rangeOffset + rangeNum <= GetDesc().rangeNum, ReturnVoid(), - "'rangeOffset' + 'rangeNum' is greater than the number of ranges. (rangeOffset=%u, rangeNum=%u, rangeNum=%u)", rangeOffset, rangeNum, - GetDesc().rangeNum); + uint32_t descriptorNum = 0; + uint32_t descriptorOffset = 0; + for (uint32_t i = 0; i < rangeNum; i++) + descriptorNum += rangeUpdateDescs[i].descriptorNum; - DescriptorRangeUpdateDesc* rangeUpdateDescsImpl = StackAlloc(DescriptorRangeUpdateDesc, rangeNum); + Scratch rangeUpdateDescsImpl = AllocateScratch(m_Device, DescriptorRangeUpdateDesc, rangeNum); + Scratch descriptorsImpl = AllocateScratch(m_Device, Descriptor*, descriptorNum); for (uint32_t i = 0; i < rangeNum; i++) { const DescriptorRangeUpdateDesc& updateDesc = rangeUpdateDescs[i]; const DescriptorRangeDesc& rangeDesc = GetDesc().ranges[rangeOffset + i]; - RETURN_ON_FAILURE(&m_Device, updateDesc.descriptorNum != 0, ReturnVoid(), "'rangeUpdateDescs[%u].descriptorNum' is 0", i); - RETURN_ON_FAILURE(&m_Device, updateDesc.descriptors != nullptr, ReturnVoid(), "'rangeUpdateDescs[%u].descriptors' is NULL", i); - - RETURN_ON_FAILURE(&m_Device, updateDesc.baseDescriptor < rangeDesc.descriptorNum, ReturnVoid(), - "'rangeUpdateDescs[%u].baseDescriptor' is greater than the number of descriptors (offsetInRange=%u, rangeDescriptorNum=%u, descriptorType=%s)", - i, updateDesc.baseDescriptor, rangeDesc.descriptorNum, GetDescriptorTypeName(rangeDesc.descriptorType)); + RETURN_ON_FAILURE(&m_Device, updateDesc.descriptorNum != 0, ReturnVoid(), "'[%u].descriptorNum' is 0", i); + RETURN_ON_FAILURE(&m_Device, updateDesc.descriptors != nullptr, ReturnVoid(), "'[%u].descriptors' is NULL", i); RETURN_ON_FAILURE(&m_Device, updateDesc.baseDescriptor + updateDesc.descriptorNum <= rangeDesc.descriptorNum, ReturnVoid(), - "'rangeUpdateDescs[%u].baseDescriptor' + 'rangeUpdateDescs[%u].descriptorNum' is greater than the number of descriptors (offsetInRange=%u, " - "descriptorNum=%u, rangeDescriptorNum=%u, descriptorType=%s)", - i, i, updateDesc.baseDescriptor, updateDesc.descriptorNum, rangeDesc.descriptorNum, GetDescriptorTypeName(rangeDesc.descriptorType)); + "[%u]: 'baseDescriptor=%u' + 'descriptorNum=%u' is greater than 'descriptorNum=%u' in the range (descriptorType=%s)", + i, updateDesc.baseDescriptor, updateDesc.descriptorNum, rangeDesc.descriptorNum, GetDescriptorTypeName(rangeDesc.descriptorType)); - DescriptorRangeUpdateDesc& dstDesc = rangeUpdateDescsImpl[i]; - - dstDesc = updateDesc; - dstDesc.descriptors = StackAlloc(Descriptor*, updateDesc.descriptorNum); - Descriptor** descriptors = (Descriptor**)dstDesc.descriptors; + rangeUpdateDescsImpl[i] = updateDesc; + rangeUpdateDescsImpl[i].descriptors = descriptorsImpl + descriptorOffset; + Descriptor** descriptors = (Descriptor**)rangeUpdateDescsImpl[i].descriptors; for (uint32_t j = 0; j < updateDesc.descriptorNum; j++) { - RETURN_ON_FAILURE(&m_Device, updateDesc.descriptors[j] != nullptr, ReturnVoid(), "'rangeUpdateDescs[%u].descriptors[%u]' is NULL", i, j); + RETURN_ON_FAILURE(&m_Device, updateDesc.descriptors[j] != nullptr, ReturnVoid(), "'[%u].descriptors[%u]' is NULL", i, j); descriptors[j] = NRI_GET_IMPL(Descriptor, updateDesc.descriptors[j]); } + + descriptorOffset += updateDesc.descriptorNum; } GetCoreInterface().UpdateDescriptorRanges(*GetImpl(), rangeOffset, rangeNum, rangeUpdateDescsImpl); @@ -53,16 +47,13 @@ NRI_INLINE void DescriptorSetVal::UpdateDynamicConstantBuffers(uint32_t baseDyna if (dynamicConstantBufferNum == 0) return; - RETURN_ON_FAILURE(&m_Device, baseDynamicConstantBuffer < GetDesc().dynamicConstantBufferNum, ReturnVoid(), - "'baseDynamicConstantBuffer' is invalid. (baseDynamicConstantBuffer=%u, dynamicConstantBufferNum=%u)", baseDynamicConstantBuffer, GetDesc().dynamicConstantBufferNum); - RETURN_ON_FAILURE(&m_Device, baseDynamicConstantBuffer + dynamicConstantBufferNum <= GetDesc().dynamicConstantBufferNum, ReturnVoid(), - "'baseDynamicConstantBuffer' + 'dynamicConstantBufferNum' is greater than the number of buffers (baseDynamicConstantBuffer=%u, dynamicConstantBufferNum=%u, dynamicConstantBufferNum=%u)", baseDynamicConstantBuffer, - dynamicConstantBufferNum, GetDesc().dynamicConstantBufferNum); + "'baseDynamicConstantBuffer=%u' + 'dynamicConstantBufferNum=%u' is greater than 'dynamicConstantBufferNum=%u' in the set", + baseDynamicConstantBuffer, dynamicConstantBufferNum, GetDesc().dynamicConstantBufferNum); RETURN_ON_FAILURE(&m_Device, descriptors != nullptr, ReturnVoid(), "'descriptors' is NULL"); - Descriptor** descriptorsImpl = StackAlloc(Descriptor*, dynamicConstantBufferNum); + Scratch descriptorsImpl = AllocateScratch(m_Device, Descriptor*, dynamicConstantBufferNum); for (uint32_t i = 0; i < dynamicConstantBufferNum; i++) { RETURN_ON_FAILURE(&m_Device, descriptors[i] != nullptr, ReturnVoid(), "'descriptors[%u]' is NULL", i); @@ -73,12 +64,12 @@ NRI_INLINE void DescriptorSetVal::UpdateDynamicConstantBuffers(uint32_t baseDyna } NRI_INLINE void DescriptorSetVal::Copy(const DescriptorSetCopyDesc& descriptorSetCopyDesc) { - RETURN_ON_FAILURE(&m_Device, descriptorSetCopyDesc.srcDescriptorSet != nullptr, ReturnVoid(), "'descriptorSetCopyDesc.srcDescriptorSet' is NULL"); + RETURN_ON_FAILURE(&m_Device, descriptorSetCopyDesc.srcDescriptorSet != nullptr, ReturnVoid(), "'srcDescriptorSet' is NULL"); DescriptorSetVal& srcDescriptorSetVal = *(DescriptorSetVal*)descriptorSetCopyDesc.srcDescriptorSet; const DescriptorSetDesc& srcDesc = srcDescriptorSetVal.GetDesc(); - RETURN_ON_FAILURE(&m_Device, descriptorSetCopyDesc.srcBaseRange < srcDesc.rangeNum, ReturnVoid(), "'descriptorSetCopyDesc.srcBaseRange' is invalid"); + RETURN_ON_FAILURE(&m_Device, descriptorSetCopyDesc.srcBaseRange < srcDesc.rangeNum, ReturnVoid(), "'srcBaseRange' is invalid"); bool srcRangeValid = descriptorSetCopyDesc.srcBaseRange + descriptorSetCopyDesc.rangeNum < srcDesc.rangeNum; bool dstRangeValid = descriptorSetCopyDesc.dstBaseRange + descriptorSetCopyDesc.rangeNum < GetDesc().rangeNum; @@ -87,12 +78,12 @@ NRI_INLINE void DescriptorSetVal::Copy(const DescriptorSetCopyDesc& descriptorSe bool dstOffsetValid = descriptorSetCopyDesc.dstBaseDynamicConstantBuffer < GetDesc().dynamicConstantBufferNum; bool dstDynamicConstantBufferValid = descriptorSetCopyDesc.dstBaseDynamicConstantBuffer + descriptorSetCopyDesc.dynamicConstantBufferNum < GetDesc().dynamicConstantBufferNum; - RETURN_ON_FAILURE(&m_Device, srcRangeValid, ReturnVoid(), "'descriptorSetCopyDesc.rangeNum' is invalid"); - RETURN_ON_FAILURE(&m_Device, descriptorSetCopyDesc.dstBaseRange < GetDesc().rangeNum, ReturnVoid(), "'descriptorSetCopyDesc.dstBaseRange' is invalid"); - RETURN_ON_FAILURE(&m_Device, dstRangeValid, ReturnVoid(), "'descriptorSetCopyDesc.rangeNum' is invalid"); - RETURN_ON_FAILURE(&m_Device, srcOffsetValid, ReturnVoid(), "'descriptorSetCopyDesc.srcBaseDynamicConstantBuffer' is invalid"); + RETURN_ON_FAILURE(&m_Device, srcRangeValid, ReturnVoid(), "'rangeNum' is invalid"); + RETURN_ON_FAILURE(&m_Device, descriptorSetCopyDesc.dstBaseRange < GetDesc().rangeNum, ReturnVoid(), "'dstBaseRange' is invalid"); + RETURN_ON_FAILURE(&m_Device, dstRangeValid, ReturnVoid(), "'rangeNum' is invalid"); + RETURN_ON_FAILURE(&m_Device, srcOffsetValid, ReturnVoid(), "'srcBaseDynamicConstantBuffer' is invalid"); RETURN_ON_FAILURE(&m_Device, srcDynamicConstantBufferValid, ReturnVoid(), "source range of dynamic constant buffers is invalid"); - RETURN_ON_FAILURE(&m_Device, dstOffsetValid, ReturnVoid(), "'descriptorSetCopyDesc.dstBaseDynamicConstantBuffer' is invalid"); + RETURN_ON_FAILURE(&m_Device, dstOffsetValid, ReturnVoid(), "'dstBaseDynamicConstantBuffer' is invalid"); RETURN_ON_FAILURE(&m_Device, dstDynamicConstantBufferValid, ReturnVoid(), "destination range of dynamic constant buffers is invalid"); auto descriptorSetCopyDescImpl = descriptorSetCopyDesc; diff --git a/Source/Validation/DeviceVal.h b/Source/Validation/DeviceVal.h index 87d545bc..663978d4 100644 --- a/Source/Validation/DeviceVal.h +++ b/Source/Validation/DeviceVal.h @@ -160,7 +160,7 @@ struct DeviceVal final : public DeviceBase { Result QueryVideoMemoryInfo(MemoryLocation memoryLocation, VideoMemoryInfo& videoMemoryInfo) const; Result AllocateAndBindMemory(const ResourceGroupDesc& resourceGroupDesc, Memory** allocations); Result BindAccelerationStructureMemory(const AccelerationStructureMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum); - uint32_t CalculateAllocationNumber(const ResourceGroupDesc& resourceGroupDesc) const; + uint32_t CalculateAllocationNumber(const ResourceGroupDesc& resourceGroupDesc); FormatSupportBits GetFormatSupport(Format format) const; private: diff --git a/Source/Validation/DeviceVal.hpp b/Source/Validation/DeviceVal.hpp index f83f0b53..28788e62 100644 --- a/Source/Validation/DeviceVal.hpp +++ b/Source/Validation/DeviceVal.hpp @@ -98,11 +98,11 @@ const DeviceDesc& DeviceVal::GetDesc() const { } NRI_INLINE Result DeviceVal::CreateSwapChain(const SwapChainDesc& swapChainDesc, SwapChain*& swapChain) { - RETURN_ON_FAILURE(this, swapChainDesc.commandQueue != nullptr, Result::INVALID_ARGUMENT, "'swapChainDesc.commandQueue' is NULL"); - RETURN_ON_FAILURE(this, swapChainDesc.width != 0, Result::INVALID_ARGUMENT, "'swapChainDesc.width' is 0"); - RETURN_ON_FAILURE(this, swapChainDesc.height != 0, Result::INVALID_ARGUMENT, "'swapChainDesc.height' is 0"); - RETURN_ON_FAILURE(this, swapChainDesc.textureNum > 0, Result::INVALID_ARGUMENT, "'swapChainDesc.textureNum' is invalid"); - RETURN_ON_FAILURE(this, swapChainDesc.format < SwapChainFormat::MAX_NUM, Result::INVALID_ARGUMENT, "'swapChainDesc.format' is invalid"); + RETURN_ON_FAILURE(this, swapChainDesc.commandQueue != nullptr, Result::INVALID_ARGUMENT, "'commandQueue' is NULL"); + RETURN_ON_FAILURE(this, swapChainDesc.width != 0, Result::INVALID_ARGUMENT, "'width' is 0"); + RETURN_ON_FAILURE(this, swapChainDesc.height != 0, Result::INVALID_ARGUMENT, "'height' is 0"); + RETURN_ON_FAILURE(this, swapChainDesc.textureNum > 0, Result::INVALID_ARGUMENT, "'textureNum' is invalid"); + RETURN_ON_FAILURE(this, swapChainDesc.format < SwapChainFormat::MAX_NUM, Result::INVALID_ARGUMENT, "'format' is invalid"); auto swapChainDescImpl = swapChainDesc; swapChainDescImpl.commandQueue = NRI_GET_IMPL(CommandQueue, swapChainDesc.commandQueue); @@ -166,7 +166,7 @@ NRI_INLINE Result DeviceVal::CreateDescriptorPool(const DescriptorPoolDesc& desc } NRI_INLINE Result DeviceVal::CreateBuffer(const BufferDesc& bufferDesc, Buffer*& buffer) { - RETURN_ON_FAILURE(this, bufferDesc.size != 0, Result::INVALID_ARGUMENT, "'bufferDesc.size' is 0"); + RETURN_ON_FAILURE(this, bufferDesc.size != 0, Result::INVALID_ARGUMENT, "'size' is 0"); Buffer* bufferImpl = nullptr; Result result = m_CoreAPI.CreateBuffer(m_Device, bufferDesc, bufferImpl); @@ -178,7 +178,7 @@ NRI_INLINE Result DeviceVal::CreateBuffer(const BufferDesc& bufferDesc, Buffer*& } NRI_INLINE Result DeviceVal::AllocateBuffer(const AllocateBufferDesc& bufferDesc, Buffer*& buffer) { - RETURN_ON_FAILURE(this, bufferDesc.desc.size != 0, Result::INVALID_ARGUMENT, "'bufferDesc.size' is 0"); + RETURN_ON_FAILURE(this, bufferDesc.desc.size != 0, Result::INVALID_ARGUMENT, "'size' is 0"); Buffer* bufferImpl = nullptr; Result result = m_ResourceAllocatorAPI.AllocateBuffer(m_Device, bufferDesc, bufferImpl); @@ -192,14 +192,11 @@ NRI_INLINE Result DeviceVal::AllocateBuffer(const AllocateBufferDesc& bufferDesc NRI_INLINE Result DeviceVal::CreateTexture(const TextureDesc& textureDesc, Texture*& texture) { Mip_t maxMipNum = GetMaxMipNum(textureDesc.width, textureDesc.height, textureDesc.depth); - RETURN_ON_FAILURE(this, textureDesc.format > Format::UNKNOWN && textureDesc.format < Format::MAX_NUM, Result::INVALID_ARGUMENT, "'textureDesc.format' is invalid"); - RETURN_ON_FAILURE(this, textureDesc.width != 0, Result::INVALID_ARGUMENT, "'textureDesc.width' is 0"); - RETURN_ON_FAILURE(this, textureDesc.height != 0, Result::INVALID_ARGUMENT, "'textureDesc.height' is 0"); - RETURN_ON_FAILURE(this, textureDesc.depth != 0, Result::INVALID_ARGUMENT, "'textureDesc.depth' is 0"); - RETURN_ON_FAILURE(this, textureDesc.mipNum != 0, Result::INVALID_ARGUMENT, "'textureDesc.mipNum' is 0"); - RETURN_ON_FAILURE(this, textureDesc.mipNum <= maxMipNum, Result::INVALID_ARGUMENT, "'textureDesc.mipNum = %u' can't be > %u", textureDesc.mipNum, maxMipNum); - RETURN_ON_FAILURE(this, textureDesc.layerNum != 0, Result::INVALID_ARGUMENT, "'textureDesc.layerNum' is 0"); - RETURN_ON_FAILURE(this, textureDesc.sampleNum != 0, Result::INVALID_ARGUMENT, "'textureDesc.sampleNum' is 0"); + RETURN_ON_FAILURE(this, textureDesc.format > Format::UNKNOWN && textureDesc.format < Format::MAX_NUM, Result::INVALID_ARGUMENT, "'format' is invalid"); + RETURN_ON_FAILURE(this, textureDesc.width != 0, Result::INVALID_ARGUMENT, "'width' is 0"); + RETURN_ON_FAILURE(this, textureDesc.height != 0, Result::INVALID_ARGUMENT, "'height' is 0"); + RETURN_ON_FAILURE(this, textureDesc.mipNum != 0, Result::INVALID_ARGUMENT, "'mipNum' is 0"); + RETURN_ON_FAILURE(this, textureDesc.mipNum <= maxMipNum, Result::INVALID_ARGUMENT, "'mipNum=%u' can't be > %u", textureDesc.mipNum, maxMipNum); Texture* textureImpl = nullptr; Result result = m_CoreAPI.CreateTexture(m_Device, textureDesc, textureImpl); @@ -213,14 +210,11 @@ NRI_INLINE Result DeviceVal::CreateTexture(const TextureDesc& textureDesc, Textu NRI_INLINE Result DeviceVal::AllocateTexture(const AllocateTextureDesc& textureDesc, Texture*& texture) { Mip_t maxMipNum = GetMaxMipNum(textureDesc.desc.width, textureDesc.desc.height, textureDesc.desc.depth); - RETURN_ON_FAILURE(this, textureDesc.desc.format > Format::UNKNOWN && textureDesc.desc.format < Format::MAX_NUM, Result::INVALID_ARGUMENT, "'textureDesc.format' is invalid"); - RETURN_ON_FAILURE(this, textureDesc.desc.width != 0, Result::INVALID_ARGUMENT, "'textureDesc.width' is 0"); - RETURN_ON_FAILURE(this, textureDesc.desc.height != 0, Result::INVALID_ARGUMENT, "'textureDesc.height' is 0"); - RETURN_ON_FAILURE(this, textureDesc.desc.depth != 0, Result::INVALID_ARGUMENT, "'textureDesc.depth' is 0"); - RETURN_ON_FAILURE(this, textureDesc.desc.mipNum != 0, Result::INVALID_ARGUMENT, "'textureDesc.mipNum' is 0"); - RETURN_ON_FAILURE(this, textureDesc.desc.mipNum <= maxMipNum, Result::INVALID_ARGUMENT, "'textureDesc.mipNum = %u' can't be > %u", textureDesc.desc.mipNum, maxMipNum); - RETURN_ON_FAILURE(this, textureDesc.desc.layerNum != 0, Result::INVALID_ARGUMENT, "'textureDesc.layerNum' is 0"); - RETURN_ON_FAILURE(this, textureDesc.desc.sampleNum != 0, Result::INVALID_ARGUMENT, "'textureDesc.sampleNum' is 0"); + RETURN_ON_FAILURE(this, textureDesc.desc.format > Format::UNKNOWN && textureDesc.desc.format < Format::MAX_NUM, Result::INVALID_ARGUMENT, "'desc.format' is invalid"); + RETURN_ON_FAILURE(this, textureDesc.desc.width != 0, Result::INVALID_ARGUMENT, "'desc.width' is 0"); + RETURN_ON_FAILURE(this, textureDesc.desc.height != 0, Result::INVALID_ARGUMENT, "'desc.height' is 0"); + RETURN_ON_FAILURE(this, textureDesc.desc.mipNum != 0, Result::INVALID_ARGUMENT, "'desc.mipNum' is 0"); + RETURN_ON_FAILURE(this, textureDesc.desc.mipNum <= maxMipNum, Result::INVALID_ARGUMENT, "'desc.mipNum=%u' can't be > %u", textureDesc.desc.mipNum, maxMipNum); Texture* textureImpl = nullptr; Result result = m_ResourceAllocatorAPI.AllocateTexture(m_Device, textureDesc, textureImpl); @@ -232,18 +226,12 @@ NRI_INLINE Result DeviceVal::AllocateTexture(const AllocateTextureDesc& textureD } NRI_INLINE Result DeviceVal::CreateDescriptor(const BufferViewDesc& bufferViewDesc, Descriptor*& bufferView) { - RETURN_ON_FAILURE(this, bufferViewDesc.buffer != nullptr, Result::INVALID_ARGUMENT, "'bufferViewDesc.buffer' is NULL"); - RETURN_ON_FAILURE(this, bufferViewDesc.format < Format::MAX_NUM, Result::INVALID_ARGUMENT, "'bufferViewDesc.format' is invalid"); - RETURN_ON_FAILURE(this, bufferViewDesc.viewType < BufferViewType::MAX_NUM, Result::INVALID_ARGUMENT, "'bufferViewDesc.viewType' is invalid"); + RETURN_ON_FAILURE(this, bufferViewDesc.buffer != nullptr, Result::INVALID_ARGUMENT, "'buffer' is NULL"); + RETURN_ON_FAILURE(this, bufferViewDesc.format < Format::MAX_NUM, Result::INVALID_ARGUMENT, "'format' is invalid"); + RETURN_ON_FAILURE(this, bufferViewDesc.viewType < BufferViewType::MAX_NUM, Result::INVALID_ARGUMENT, "'viewType' is invalid"); const BufferDesc& bufferDesc = ((BufferVal*)bufferViewDesc.buffer)->GetDesc(); - - RETURN_ON_FAILURE(this, bufferViewDesc.offset < bufferDesc.size, Result::INVALID_ARGUMENT, - "'bufferViewDesc.offset' is invalid. (bufferViewDesc.offset=%llu, bufferDesc.size=%llu)", bufferViewDesc.offset, bufferDesc.size); - - RETURN_ON_FAILURE(this, bufferViewDesc.offset + bufferViewDesc.size <= bufferDesc.size, Result::INVALID_ARGUMENT, - "'bufferViewDesc.size' is invalid. (bufferViewDesc.offset=%llu, bufferViewDesc.size=%llu, bufferDesc.size=%llu)", bufferViewDesc.offset, - bufferViewDesc.size, bufferDesc.size); + RETURN_ON_FAILURE(this, bufferViewDesc.offset + bufferViewDesc.size <= bufferDesc.size, Result::INVALID_ARGUMENT, "'offset=%llu' + 'size=%llu' must be <= buffer 'size = %llu'", bufferViewDesc.offset, bufferViewDesc.size, bufferDesc.size); auto bufferViewDescImpl = bufferViewDesc; bufferViewDescImpl.buffer = NRI_GET_IMPL(Buffer, bufferViewDesc.buffer); @@ -258,28 +246,17 @@ NRI_INLINE Result DeviceVal::CreateDescriptor(const BufferViewDesc& bufferViewDe } NRI_INLINE Result DeviceVal::CreateDescriptor(const Texture1DViewDesc& textureViewDesc, Descriptor*& textureView) { - RETURN_ON_FAILURE(this, textureViewDesc.texture != nullptr, Result::INVALID_ARGUMENT, "'textureViewDesc.texture' is NULL"); - RETURN_ON_FAILURE(this, textureViewDesc.viewType < Texture1DViewType::MAX_NUM, Result::INVALID_ARGUMENT, "'textureViewDesc.viewType' is invalid"); - - RETURN_ON_FAILURE(this, textureViewDesc.format > Format::UNKNOWN && textureViewDesc.format < Format::MAX_NUM, Result::INVALID_ARGUMENT, - "'textureViewDesc.format' is invalid"); + RETURN_ON_FAILURE(this, textureViewDesc.texture != nullptr, Result::INVALID_ARGUMENT, "'texture' is NULL"); + RETURN_ON_FAILURE(this, textureViewDesc.viewType < Texture1DViewType::MAX_NUM, Result::INVALID_ARGUMENT, "'viewType' is invalid"); + RETURN_ON_FAILURE(this, textureViewDesc.format > Format::UNKNOWN && textureViewDesc.format < Format::MAX_NUM, Result::INVALID_ARGUMENT, "'format' is invalid"); const TextureDesc& textureDesc = ((TextureVal*)textureViewDesc.texture)->GetDesc(); - RETURN_ON_FAILURE(this, textureViewDesc.mipOffset < textureDesc.mipNum, Result::INVALID_ARGUMENT, - "'textureViewDesc.mipOffset' is invalid (textureViewDesc.mipOffset=%hu, textureDesc.mipNum=%hu)", textureViewDesc.mipOffset, textureDesc.mipNum); - RETURN_ON_FAILURE(this, textureViewDesc.mipOffset + textureViewDesc.mipNum <= textureDesc.mipNum, Result::INVALID_ARGUMENT, - "'textureViewDesc.mipNum' is invalid (textureViewDesc.mipOffset=%hu, textureViewDesc.mipNum=%hu, textureDesc.mipNum=%hu)", textureViewDesc.mipOffset, - textureViewDesc.mipNum, textureDesc.mipNum); - - RETURN_ON_FAILURE(this, textureViewDesc.layerOffset < textureDesc.layerNum, Result::INVALID_ARGUMENT, - "'textureViewDesc.layerOffset' is invalid (textureViewDesc.layerOffset=%hu, textureDesc.layerNum=%hu)", textureViewDesc.layerOffset, - textureDesc.layerNum); + "'mipOffset=%u' + 'mipNum=%u' must be <= texture 'mipNum=%u'", textureViewDesc.mipOffset, textureViewDesc.mipNum, textureDesc.mipNum); RETURN_ON_FAILURE(this, textureViewDesc.layerOffset + textureViewDesc.layerNum <= textureDesc.layerNum, Result::INVALID_ARGUMENT, - "'textureViewDesc.layerNum' is invalid (textureViewDesc.layerOffset=%hu, textureViewDesc.layerNum=%hu, textureDesc.layerNum=%hu)", - textureViewDesc.layerOffset, textureViewDesc.layerNum, textureDesc.layerNum); + "'layerOffset=%u' + 'layerNum=%u' must be <= texture 'layerNum=%u'", textureViewDesc.layerOffset, textureViewDesc.layerNum, textureDesc.layerNum); auto textureViewDescImpl = textureViewDesc; textureViewDescImpl.texture = NRI_GET_IMPL(Texture, textureViewDesc.texture); @@ -294,33 +271,17 @@ NRI_INLINE Result DeviceVal::CreateDescriptor(const Texture1DViewDesc& textureVi } NRI_INLINE Result DeviceVal::CreateDescriptor(const Texture2DViewDesc& textureViewDesc, Descriptor*& textureView) { - RETURN_ON_FAILURE(this, textureViewDesc.texture != nullptr, Result::INVALID_ARGUMENT, "'textureViewDesc.texture' is NULL"); - RETURN_ON_FAILURE(this, textureViewDesc.viewType < Texture2DViewType::MAX_NUM, Result::INVALID_ARGUMENT, "'textureViewDesc.viewType' is invalid"); - - RETURN_ON_FAILURE(this, textureViewDesc.format > Format::UNKNOWN && textureViewDesc.format < Format::MAX_NUM, Result::INVALID_ARGUMENT, - "'textureViewDesc.format' is invalid"); + RETURN_ON_FAILURE(this, textureViewDesc.texture != nullptr, Result::INVALID_ARGUMENT, "'texture' is NULL"); + RETURN_ON_FAILURE(this, textureViewDesc.viewType < Texture2DViewType::MAX_NUM, Result::INVALID_ARGUMENT, "'viewType' is invalid"); + RETURN_ON_FAILURE(this, textureViewDesc.format > Format::UNKNOWN && textureViewDesc.format < Format::MAX_NUM, Result::INVALID_ARGUMENT, "'format' is invalid"); const TextureDesc& textureDesc = ((TextureVal*)textureViewDesc.texture)->GetDesc(); - RETURN_ON_FAILURE(this, textureViewDesc.mipOffset < textureDesc.mipNum, Result::INVALID_ARGUMENT, - "'textureViewDesc.mipOffset' is invalid. " - "(textureViewDesc.mipOffset=%hu, textureDesc.mipNum=%hu)", - textureViewDesc.mipOffset, textureDesc.mipNum); - RETURN_ON_FAILURE(this, textureViewDesc.mipOffset + textureViewDesc.mipNum <= textureDesc.mipNum, Result::INVALID_ARGUMENT, - "'textureViewDesc.mipNum' is invalid. " - "(textureViewDesc.mipOffset=%hu, textureViewDesc.mipNum=%hu, textureDesc.mipNum=%hu)", - textureViewDesc.mipOffset, textureViewDesc.mipNum, textureDesc.mipNum); - - RETURN_ON_FAILURE(this, textureViewDesc.layerOffset < textureDesc.layerNum, Result::INVALID_ARGUMENT, - "'textureViewDesc.layerOffset' is invalid. " - "(textureViewDesc.layerOffset=%hu, textureDesc.layerNum=%hu)", - textureViewDesc.layerOffset, textureDesc.layerNum); + "'mipOffset=%u' + 'mipNum=%u' must be <= texture 'mipNum=%u'", textureViewDesc.mipOffset, textureViewDesc.mipNum, textureDesc.mipNum); RETURN_ON_FAILURE(this, textureViewDesc.layerOffset + textureViewDesc.layerNum <= textureDesc.layerNum, Result::INVALID_ARGUMENT, - "'textureViewDesc.layerNum' is invalid. " - "(textureViewDesc.layerOffset=%hu, textureViewDesc.layerNum=%hu, textureDesc.layerNum=%hu)", - textureViewDesc.layerOffset, textureViewDesc.layerNum, textureDesc.layerNum); + "'layerOffset=%u' + 'layerNum=%u' must be <= texture 'layerNum=%u'", textureViewDesc.layerOffset, textureViewDesc.layerNum, textureDesc.layerNum); auto textureViewDescImpl = textureViewDesc; textureViewDescImpl.texture = NRI_GET_IMPL(Texture, textureViewDesc.texture); @@ -335,33 +296,17 @@ NRI_INLINE Result DeviceVal::CreateDescriptor(const Texture2DViewDesc& textureVi } NRI_INLINE Result DeviceVal::CreateDescriptor(const Texture3DViewDesc& textureViewDesc, Descriptor*& textureView) { - RETURN_ON_FAILURE(this, textureViewDesc.texture != nullptr, Result::INVALID_ARGUMENT, "'textureViewDesc.texture' is NULL"); - RETURN_ON_FAILURE(this, textureViewDesc.viewType < Texture3DViewType::MAX_NUM, Result::INVALID_ARGUMENT, "'textureViewDesc.viewType' is invalid"); - - RETURN_ON_FAILURE(this, textureViewDesc.format > Format::UNKNOWN && textureViewDesc.format < Format::MAX_NUM, Result::INVALID_ARGUMENT, - "'textureViewDesc.format' is invalid"); + RETURN_ON_FAILURE(this, textureViewDesc.texture != nullptr, Result::INVALID_ARGUMENT, "'texture' is NULL"); + RETURN_ON_FAILURE(this, textureViewDesc.viewType < Texture3DViewType::MAX_NUM, Result::INVALID_ARGUMENT, "'viewType' is invalid"); + RETURN_ON_FAILURE(this, textureViewDesc.format > Format::UNKNOWN && textureViewDesc.format < Format::MAX_NUM, Result::INVALID_ARGUMENT, "'format' is invalid"); const TextureDesc& textureDesc = ((TextureVal*)textureViewDesc.texture)->GetDesc(); - RETURN_ON_FAILURE(this, textureViewDesc.mipOffset < textureDesc.mipNum, Result::INVALID_ARGUMENT, - "'textureViewDesc.mipOffset' is invalid. " - "(textureViewDesc.mipOffset=%hu, textureViewDesc.mipOffset=%hu)", - textureViewDesc.mipOffset, textureDesc.mipNum); - RETURN_ON_FAILURE(this, textureViewDesc.mipOffset + textureViewDesc.mipNum <= textureDesc.mipNum, Result::INVALID_ARGUMENT, - "'textureViewDesc.mipNum' is invalid. " - "(textureViewDesc.mipOffset=%hu, textureViewDesc.mipNum=%hu, textureDesc.mipNum=%hu)", - textureViewDesc.mipOffset, textureViewDesc.mipNum, textureDesc.mipNum); - - RETURN_ON_FAILURE(this, textureViewDesc.sliceOffset < textureDesc.depth, Result::INVALID_ARGUMENT, - "'textureViewDesc.layerOffset' is invalid. " - "(textureViewDesc.sliceOffset=%hu, textureDesc.depth=%hu)", - textureViewDesc.sliceOffset, textureDesc.depth); + "'mipOffset=%u' + 'mipNum=%u' must be <= texture 'mipNum=%u'", textureViewDesc.mipOffset, textureViewDesc.mipNum, textureDesc.mipNum); RETURN_ON_FAILURE(this, textureViewDesc.sliceOffset + textureViewDesc.sliceNum <= textureDesc.depth, Result::INVALID_ARGUMENT, - "'textureViewDesc.layerNum' is invalid. " - "(textureViewDesc.sliceOffset=%hu, textureViewDesc.sliceNum=%hu, textureDesc.depth=%hu)", - textureViewDesc.sliceOffset, textureViewDesc.sliceNum, textureDesc.depth); + "'sliceOffset=%u' + 'sliceNum=%u' must be <= texture 'depth=%u'", textureViewDesc.sliceOffset, textureViewDesc.sliceNum, textureDesc.depth); auto textureViewDescImpl = textureViewDesc; textureViewDescImpl.texture = NRI_GET_IMPL(Texture, textureViewDesc.texture); @@ -376,20 +321,20 @@ NRI_INLINE Result DeviceVal::CreateDescriptor(const Texture3DViewDesc& textureVi } NRI_INLINE Result DeviceVal::CreateDescriptor(const SamplerDesc& samplerDesc, Descriptor*& sampler) { - RETURN_ON_FAILURE(this, samplerDesc.filters.mag < Filter::MAX_NUM, Result::INVALID_ARGUMENT, "'samplerDesc.filters.mag' is invalid"); - RETURN_ON_FAILURE(this, samplerDesc.filters.min < Filter::MAX_NUM, Result::INVALID_ARGUMENT, "'samplerDesc.filters.min' is invalid"); - RETURN_ON_FAILURE(this, samplerDesc.filters.mip < Filter::MAX_NUM, Result::INVALID_ARGUMENT, "'samplerDesc.filters.mip' is invalid"); - RETURN_ON_FAILURE(this, samplerDesc.filters.ext < FilterExt::MAX_NUM, Result::INVALID_ARGUMENT, "'samplerDesc.filters.ext' is invalid"); - RETURN_ON_FAILURE(this, samplerDesc.addressModes.u < AddressMode::MAX_NUM, Result::INVALID_ARGUMENT, "'samplerDesc.addressModes.u' is invalid"); - RETURN_ON_FAILURE(this, samplerDesc.addressModes.v < AddressMode::MAX_NUM, Result::INVALID_ARGUMENT, "'samplerDesc.addressModes.v' is invalid"); - RETURN_ON_FAILURE(this, samplerDesc.addressModes.w < AddressMode::MAX_NUM, Result::INVALID_ARGUMENT, "'samplerDesc.addressModes.w' is invalid"); - RETURN_ON_FAILURE(this, samplerDesc.compareFunc < CompareFunc::MAX_NUM, Result::INVALID_ARGUMENT, "'samplerDesc.compareFunc' is invalid"); + RETURN_ON_FAILURE(this, samplerDesc.filters.mag < Filter::MAX_NUM, Result::INVALID_ARGUMENT, "'filters.mag' is invalid"); + RETURN_ON_FAILURE(this, samplerDesc.filters.min < Filter::MAX_NUM, Result::INVALID_ARGUMENT, "'filters.min' is invalid"); + RETURN_ON_FAILURE(this, samplerDesc.filters.mip < Filter::MAX_NUM, Result::INVALID_ARGUMENT, "'filters.mip' is invalid"); + RETURN_ON_FAILURE(this, samplerDesc.filters.ext < FilterExt::MAX_NUM, Result::INVALID_ARGUMENT, "'filters.ext' is invalid"); + RETURN_ON_FAILURE(this, samplerDesc.addressModes.u < AddressMode::MAX_NUM, Result::INVALID_ARGUMENT, "'addressModes.u' is invalid"); + RETURN_ON_FAILURE(this, samplerDesc.addressModes.v < AddressMode::MAX_NUM, Result::INVALID_ARGUMENT, "'addressModes.v' is invalid"); + RETURN_ON_FAILURE(this, samplerDesc.addressModes.w < AddressMode::MAX_NUM, Result::INVALID_ARGUMENT, "'addressModes.w' is invalid"); + RETURN_ON_FAILURE(this, samplerDesc.compareFunc < CompareFunc::MAX_NUM, Result::INVALID_ARGUMENT, "'compareFunc' is invalid"); if (samplerDesc.filters.ext != FilterExt::NONE) RETURN_ON_FAILURE(this, GetDesc().isTextureFilterMinMaxSupported, Result::UNSUPPORTED, "'isTextureFilterMinMaxSupported' is false"); if ((samplerDesc.addressModes.u != AddressMode::CLAMP_TO_BORDER && samplerDesc.addressModes.v != AddressMode::CLAMP_TO_BORDER && samplerDesc.addressModes.w != AddressMode::CLAMP_TO_BORDER) && (samplerDesc.borderColor.ui.x != 0 || samplerDesc.borderColor.ui.y != 0 || samplerDesc.borderColor.ui.z != 0 && samplerDesc.borderColor.ui.w != 0)) - REPORT_WARNING(this, "'samplerDesc.borderColor' is provided, but 'CLAMP_TO_BORDER' is not requested"); + REPORT_WARNING(this, "'borderColor' is provided, but 'CLAMP_TO_BORDER' is not requested"); Descriptor* samplerImpl = nullptr; Result result = m_CoreAPI.CreateSampler(m_Device, samplerDesc, samplerImpl); @@ -402,7 +347,7 @@ NRI_INLINE Result DeviceVal::CreateDescriptor(const SamplerDesc& samplerDesc, De NRI_INLINE Result DeviceVal::CreatePipelineLayout(const PipelineLayoutDesc& pipelineLayoutDesc, PipelineLayout*& pipelineLayout) { RETURN_ON_FAILURE(this, pipelineLayoutDesc.shaderStages != StageBits::NONE, Result::INVALID_ARGUMENT, "'shaderStages' can't be 'NONE'"); - RETURN_ON_FAILURE(this, pipelineLayoutDesc.rootDescriptorSetNum <= GetDesc().rootDescriptorMaxNum, Result::UNSUPPORTED, "exceeded number of root descriptors"); + RETURN_ON_FAILURE(this, pipelineLayoutDesc.rootDescriptorNum <= GetDesc().pipelineLayoutRootDescriptorMaxNum, Result::UNSUPPORTED, "exceeded number of root descriptors"); bool isGraphics = pipelineLayoutDesc.shaderStages & StageBits::GRAPHICS_SHADERS; bool isCompute = pipelineLayoutDesc.shaderStages & StageBits::COMPUTE_SHADER; @@ -411,12 +356,12 @@ NRI_INLINE Result DeviceVal::CreatePipelineLayout(const PipelineLayoutDesc& pipe RETURN_ON_FAILURE(this, supportedTypes > 0, Result::INVALID_ARGUMENT, "'shaderStages' doesn't include any shader stages"); RETURN_ON_FAILURE(this, supportedTypes == 1, Result::INVALID_ARGUMENT, "'shaderStages' is invalid, it can't be compatible with more than one type of pipeline"); - uint32_t totalDescriptorNum = pipelineLayoutDesc.descriptorSetNum + pipelineLayoutDesc.rootDescriptorSetNum; + uint32_t totalDescriptorNum = pipelineLayoutDesc.descriptorSetNum + (pipelineLayoutDesc.rootDescriptorNum ? 1 : 0); bool isTotalSetNumValid = totalDescriptorNum <= GetDesc().pipelineLayoutDescriptorSetMaxNum; - RETURN_ON_FAILURE(this, isTotalSetNumValid, Result::INVALID_ARGUMENT, "'descriptorSetNum + rootDescriptorSetNum' must be less or equal than 'pipelineLayoutDescriptorSetMaxNum'"); + RETURN_ON_FAILURE(this, isTotalSetNumValid, Result::INVALID_ARGUMENT, "exceeded number of sets"); Scratch spaces = AllocateScratch(*this, uint32_t, totalDescriptorNum); - memset(spaces.mem, 0, sizeof(uint32_t) * totalDescriptorNum); + memset(spaces, 0, sizeof(uint32_t) * totalDescriptorNum); uint32_t spaceNum = 0; for (uint32_t i = 0; i < pipelineLayoutDesc.descriptorSetNum; i++) { @@ -443,20 +388,21 @@ NRI_INLINE Result DeviceVal::CreatePipelineLayout(const PipelineLayoutDesc& pipe spaces[spaceNum++] = descriptorSetDesc.registerSpace; } - for (uint32_t i = 0; i < pipelineLayoutDesc.rootDescriptorSetNum; i++) { - const RootDescriptorSetDesc& rootDescriptorDesc = pipelineLayoutDesc.rootDescriptorSets[i]; + if (pipelineLayoutDesc.rootDescriptorNum) { + uint32_t n = 0; + for (; n < spaceNum && spaces[n] != pipelineLayoutDesc.rootRegisterSpace; n++) + ; + + RETURN_ON_FAILURE(this, n == spaceNum, Result::INVALID_ARGUMENT, "'registerSpace = %u' is already in use", pipelineLayoutDesc.rootRegisterSpace); + } + + for (uint32_t i = 0; i < pipelineLayoutDesc.rootDescriptorNum; i++) { + const RootDescriptorDesc& rootDescriptorDesc = pipelineLayoutDesc.rootDescriptors[i]; bool isDescriptorTypeValid = rootDescriptorDesc.descriptorType == DescriptorType::CONSTANT_BUFFER || rootDescriptorDesc.descriptorType == DescriptorType::STRUCTURED_BUFFER || rootDescriptorDesc.descriptorType == DescriptorType::STORAGE_STRUCTURED_BUFFER; - RETURN_ON_FAILURE(this, isDescriptorTypeValid, Result::INVALID_ARGUMENT, "'rootDescriptorSets[%u].descriptorType' must be one of 'CONSTANT_BUFFER', 'STRUCTURED_BUFFER' or 'STORAGE_STRUCTURED_BUFFER'", i); - - uint32_t n = 0; - for (; n < spaceNum && spaces[n] != rootDescriptorDesc.registerSpace; n++) - ; - - RETURN_ON_FAILURE(this, n == spaceNum, Result::INVALID_ARGUMENT, "'rootDescriptorSets[%u].registerSpace = %u' is already in use", i, rootDescriptorDesc.registerSpace); - spaces[spaceNum++] = rootDescriptorDesc.registerSpace; + RETURN_ON_FAILURE(this, isDescriptorTypeValid, Result::INVALID_ARGUMENT, "'rootDescriptors[%u].descriptorType' must be one of 'CONSTANT_BUFFER', 'STRUCTURED_BUFFER' or 'STORAGE_STRUCTURED_BUFFER'", i); } PipelineLayout* pipelineLayoutImpl = nullptr; @@ -469,9 +415,9 @@ NRI_INLINE Result DeviceVal::CreatePipelineLayout(const PipelineLayoutDesc& pipe } NRI_INLINE Result DeviceVal::CreatePipeline(const GraphicsPipelineDesc& graphicsPipelineDesc, Pipeline*& pipeline) { - RETURN_ON_FAILURE(this, graphicsPipelineDesc.pipelineLayout != nullptr, Result::INVALID_ARGUMENT, "'graphicsPipelineDesc.pipelineLayout' is NULL"); - RETURN_ON_FAILURE(this, graphicsPipelineDesc.shaders != nullptr, Result::INVALID_ARGUMENT, "'graphicsPipelineDesc.shaders' is NULL"); - RETURN_ON_FAILURE(this, graphicsPipelineDesc.shaderNum > 0, Result::INVALID_ARGUMENT, "'graphicsPipelineDesc.shaderNum' is 0"); + RETURN_ON_FAILURE(this, graphicsPipelineDesc.pipelineLayout != nullptr, Result::INVALID_ARGUMENT, "'pipelineLayout' is NULL"); + RETURN_ON_FAILURE(this, graphicsPipelineDesc.shaders != nullptr, Result::INVALID_ARGUMENT, "'shaders' is NULL"); + RETURN_ON_FAILURE(this, graphicsPipelineDesc.shaderNum > 0, Result::INVALID_ARGUMENT, "'shaderNum' is 0"); const PipelineLayoutVal& pipelineLayout = *(PipelineLayoutVal*)graphicsPipelineDesc.pipelineLayout; const StageBits shaderStages = pipelineLayout.GetPipelineLayoutDesc().shaderStages; @@ -482,17 +428,16 @@ NRI_INLINE Result DeviceVal::CreatePipeline(const GraphicsPipelineDesc& graphics if (shaderDesc->stage == StageBits::VERTEX_SHADER || shaderDesc->stage == StageBits::MESH_CONTROL_SHADER) hasEntryPoint = true; - RETURN_ON_FAILURE(this, shaderDesc->stage & shaderStages, Result::INVALID_ARGUMENT, "'graphicsPipelineDesc.shaders[%u].stage' is not enabled in the pipeline layout", i); - RETURN_ON_FAILURE(this, shaderDesc->bytecode != nullptr, Result::INVALID_ARGUMENT, "'graphicsPipelineDesc.shaders[%u].bytecode' is invalid", i); - RETURN_ON_FAILURE(this, shaderDesc->size != 0, Result::INVALID_ARGUMENT, "'graphicsPipelineDesc.shaders[%u].size' is 0", i); - RETURN_ON_FAILURE(this, IsShaderStageValid(shaderDesc->stage, uniqueShaderStages, StageBits::GRAPHICS_SHADERS), Result::INVALID_ARGUMENT, - "'graphicsPipelineDesc.shaders[%u].stage' must include only 1 graphics shader stage, unique for the entire pipeline", i); + RETURN_ON_FAILURE(this, shaderDesc->stage & shaderStages, Result::INVALID_ARGUMENT, "'shaders[%u].stage' is not enabled in the pipeline layout", i); + RETURN_ON_FAILURE(this, shaderDesc->bytecode != nullptr, Result::INVALID_ARGUMENT, "'shaders[%u].bytecode' is invalid", i); + RETURN_ON_FAILURE(this, shaderDesc->size != 0, Result::INVALID_ARGUMENT, "'shaders[%u].size' is 0", i); + RETURN_ON_FAILURE(this, IsShaderStageValid(shaderDesc->stage, uniqueShaderStages, StageBits::GRAPHICS_SHADERS), Result::INVALID_ARGUMENT, "'shaders[%u].stage' must include only 1 graphics shader stage, unique for the entire pipeline", i); } RETURN_ON_FAILURE(this, hasEntryPoint, Result::INVALID_ARGUMENT, "a VERTEX or MESH_CONTROL shader is not provided"); for (uint32_t i = 0; i < graphicsPipelineDesc.outputMerger.colorNum; i++) { const ColorAttachmentDesc* color = graphicsPipelineDesc.outputMerger.colors + i; - RETURN_ON_FAILURE(this, color->format > Format::UNKNOWN && color->format < Format::BC1_RGBA_UNORM, Result::INVALID_ARGUMENT, "'graphicsPipelineDesc.outputMerger->color[%u].format = %u' is invalid", i, color->format); + RETURN_ON_FAILURE(this, color->format > Format::UNKNOWN && color->format < Format::BC1_RGBA_UNORM, Result::INVALID_ARGUMENT, "'outputMerger->color[%u].format = %u' is invalid", i, color->format); } if (graphicsPipelineDesc.vertexInput) { @@ -501,7 +446,7 @@ NRI_INLINE Result DeviceVal::CreatePipeline(const GraphicsPipelineDesc& graphics uint32_t size = GetFormatProps(attribute->format).stride; uint32_t stride = graphicsPipelineDesc.vertexInput->streams[attribute->streamIndex].stride; RETURN_ON_FAILURE(this, attribute->offset + size <= stride, Result::INVALID_ARGUMENT, - "'graphicsPipelineDesc.inputAssembly->attributes[%u]' is out of bounds of 'graphicsPipelineDesc.inputAssembly->streams[%u]' (stride = %u)", i, attribute->streamIndex, stride); + "'inputAssembly->attributes[%u]' is out of bounds of 'inputAssembly->streams[%u]' (stride = %u)", i, attribute->streamIndex, stride); } } @@ -536,10 +481,10 @@ NRI_INLINE Result DeviceVal::CreatePipeline(const GraphicsPipelineDesc& graphics } NRI_INLINE Result DeviceVal::CreatePipeline(const ComputePipelineDesc& computePipelineDesc, Pipeline*& pipeline) { - RETURN_ON_FAILURE(this, computePipelineDesc.pipelineLayout != nullptr, Result::INVALID_ARGUMENT, "'computePipelineDesc.pipelineLayout' is NULL"); - RETURN_ON_FAILURE(this, computePipelineDesc.shader.size != 0, Result::INVALID_ARGUMENT, "'computePipelineDesc.shader.size' is 0"); - RETURN_ON_FAILURE(this, computePipelineDesc.shader.bytecode != nullptr, Result::INVALID_ARGUMENT, "'computePipelineDesc.shader.bytecode' is NULL"); - RETURN_ON_FAILURE(this, computePipelineDesc.shader.stage == StageBits::COMPUTE_SHADER, Result::INVALID_ARGUMENT, "'computePipelineDesc.shader.stage' must be 'StageBits::COMPUTE_SHADER'"); + RETURN_ON_FAILURE(this, computePipelineDesc.pipelineLayout != nullptr, Result::INVALID_ARGUMENT, "'pipelineLayout' is NULL"); + RETURN_ON_FAILURE(this, computePipelineDesc.shader.size != 0, Result::INVALID_ARGUMENT, "'shader.size' is 0"); + RETURN_ON_FAILURE(this, computePipelineDesc.shader.bytecode != nullptr, Result::INVALID_ARGUMENT, "'shader.bytecode' is NULL"); + RETURN_ON_FAILURE(this, computePipelineDesc.shader.stage == StageBits::COMPUTE_SHADER, Result::INVALID_ARGUMENT, "'shader.stage' must be 'StageBits::COMPUTE_SHADER'"); auto computePipelineDescImpl = computePipelineDesc; computePipelineDescImpl.pipelineLayout = NRI_GET_IMPL(PipelineLayout, computePipelineDesc.pipelineLayout); @@ -554,8 +499,8 @@ NRI_INLINE Result DeviceVal::CreatePipeline(const ComputePipelineDesc& computePi } NRI_INLINE Result DeviceVal::CreateQueryPool(const QueryPoolDesc& queryPoolDesc, QueryPool*& queryPool) { - RETURN_ON_FAILURE(this, queryPoolDesc.queryType < QueryType::MAX_NUM, Result::INVALID_ARGUMENT, "'queryPoolDesc.queryType' is invalid"); - RETURN_ON_FAILURE(this, queryPoolDesc.capacity > 0, Result::INVALID_ARGUMENT, "'queryPoolDesc.capacity' is 0"); + RETURN_ON_FAILURE(this, queryPoolDesc.queryType < QueryType::MAX_NUM, Result::INVALID_ARGUMENT, "'queryType' is invalid"); + RETURN_ON_FAILURE(this, queryPoolDesc.capacity > 0, Result::INVALID_ARGUMENT, "'capacity' is 0"); if (queryPoolDesc.queryType == QueryType::TIMESTAMP_COPY_QUEUE) RETURN_ON_FAILURE(this, GetDesc().isCopyQueueTimestampSupported, Result::UNSUPPORTED, "'isCopyQueueTimestampSupported' is false"); @@ -630,8 +575,8 @@ NRI_INLINE void DeviceVal::DestroyFence(Fence& fence) { } NRI_INLINE Result DeviceVal::AllocateMemory(const AllocateMemoryDesc& allocateMemoryDesc, Memory*& memory) { - RETURN_ON_FAILURE(this, allocateMemoryDesc.size > 0, Result::INVALID_ARGUMENT, "'allocateMemoryDesc.size' is 0"); - RETURN_ON_FAILURE(this, allocateMemoryDesc.priority >= -1.0f && allocateMemoryDesc.priority <= 1.0f, Result::INVALID_ARGUMENT, "'allocateMemoryDesc.priority' outside of [-1; 1] range"); + RETURN_ON_FAILURE(this, allocateMemoryDesc.size > 0, Result::INVALID_ARGUMENT, "'size' is 0"); + RETURN_ON_FAILURE(this, allocateMemoryDesc.priority >= -1.0f && allocateMemoryDesc.priority <= 1.0f, Result::INVALID_ARGUMENT, "'priority' outside of [-1; 1] range"); std::unordered_map::iterator it; std::unordered_map::iterator end; @@ -653,21 +598,18 @@ NRI_INLINE Result DeviceVal::AllocateMemory(const AllocateMemoryDesc& allocateMe } NRI_INLINE Result DeviceVal::BindBufferMemory(const BufferMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum) { - RETURN_ON_FAILURE(this, memoryBindingDescs != nullptr, Result::INVALID_ARGUMENT, "'memoryBindingDescs' is NULL"); - - BufferMemoryBindingDesc* memoryBindingDescsImpl = StackAlloc(BufferMemoryBindingDesc, memoryBindingDescNum); - + Scratch memoryBindingDescsImpl = AllocateScratch(*this, BufferMemoryBindingDesc, memoryBindingDescNum); for (uint32_t i = 0; i < memoryBindingDescNum; i++) { BufferMemoryBindingDesc& destDesc = memoryBindingDescsImpl[i]; const BufferMemoryBindingDesc& srcDesc = memoryBindingDescs[i]; - RETURN_ON_FAILURE(this, srcDesc.buffer != nullptr, Result::INVALID_ARGUMENT, "'memoryBindingDescs[%u].buffer' is NULL", i); - RETURN_ON_FAILURE(this, srcDesc.memory != nullptr, Result::INVALID_ARGUMENT, "'memoryBindingDescs[%u].memory' is NULL", i); + RETURN_ON_FAILURE(this, srcDesc.buffer != nullptr, Result::INVALID_ARGUMENT, "'[%u].buffer' is NULL", i); + RETURN_ON_FAILURE(this, srcDesc.memory != nullptr, Result::INVALID_ARGUMENT, "'[%u].memory' is NULL", i); MemoryVal& memory = (MemoryVal&)*srcDesc.memory; BufferVal& buffer = (BufferVal&)*srcDesc.buffer; - RETURN_ON_FAILURE(this, !buffer.IsBoundToMemory(), Result::INVALID_ARGUMENT, "'memoryBindingDescs[%u].buffer' is already bound to memory", i); + RETURN_ON_FAILURE(this, !buffer.IsBoundToMemory(), Result::INVALID_ARGUMENT, "'[%u].buffer' is already bound to memory", i); destDesc = srcDesc; destDesc.memory = memory.GetImpl(); @@ -680,14 +622,14 @@ NRI_INLINE Result DeviceVal::BindBufferMemory(const BufferMemoryBindingDesc* mem MemoryDesc memoryDesc = {}; GetCoreInterface().GetBufferMemoryDesc(GetImpl(), buffer.GetDesc(), memory.GetMemoryLocation(), memoryDesc); - RETURN_ON_FAILURE(this, !memoryDesc.mustBeDedicated || srcDesc.offset == 0, Result::INVALID_ARGUMENT, "'memoryBindingDescs[%u].offset' must be zero for dedicated allocation", i); - RETURN_ON_FAILURE(this, memoryDesc.alignment != 0, Result::INVALID_ARGUMENT, "'memoryBindingDescs[%u].alignment' is 0", i); - RETURN_ON_FAILURE(this, srcDesc.offset % memoryDesc.alignment == 0, Result::INVALID_ARGUMENT, "'memoryBindingDescs[%u].offset' is misaligned", i); + RETURN_ON_FAILURE(this, !memoryDesc.mustBeDedicated || srcDesc.offset == 0, Result::INVALID_ARGUMENT, "'[%u].offset' must be zero for dedicated allocation", i); + RETURN_ON_FAILURE(this, memoryDesc.alignment != 0, Result::INVALID_ARGUMENT, "'[%u].alignment' is 0", i); + RETURN_ON_FAILURE(this, srcDesc.offset % memoryDesc.alignment == 0, Result::INVALID_ARGUMENT, "'[%u].offset' is misaligned", i); const uint64_t rangeMax = srcDesc.offset + memoryDesc.size; const bool memorySizeIsUnknown = memory.GetSize() == 0; - RETURN_ON_FAILURE(this, memorySizeIsUnknown || rangeMax <= memory.GetSize(), Result::INVALID_ARGUMENT, "'memoryBindingDescs[%u].offset' is invalid", i); + RETURN_ON_FAILURE(this, memorySizeIsUnknown || rangeMax <= memory.GetSize(), Result::INVALID_ARGUMENT, "'[%u].offset' is invalid", i); } Result result = m_CoreAPI.BindBufferMemory(m_Device, memoryBindingDescsImpl, memoryBindingDescNum); @@ -703,21 +645,18 @@ NRI_INLINE Result DeviceVal::BindBufferMemory(const BufferMemoryBindingDesc* mem } NRI_INLINE Result DeviceVal::BindTextureMemory(const TextureMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum) { - RETURN_ON_FAILURE(this, memoryBindingDescs != nullptr, Result::INVALID_ARGUMENT, "'memoryBindingDescs' is a NULL"); - - TextureMemoryBindingDesc* memoryBindingDescsImpl = StackAlloc(TextureMemoryBindingDesc, memoryBindingDescNum); - + Scratch memoryBindingDescsImpl = AllocateScratch(*this, TextureMemoryBindingDesc, memoryBindingDescNum); for (uint32_t i = 0; i < memoryBindingDescNum; i++) { TextureMemoryBindingDesc& destDesc = memoryBindingDescsImpl[i]; const TextureMemoryBindingDesc& srcDesc = memoryBindingDescs[i]; - RETURN_ON_FAILURE(this, srcDesc.texture != nullptr, Result::INVALID_ARGUMENT, "'memoryBindingDescs[%u].texture' is NULL", i); - RETURN_ON_FAILURE(this, srcDesc.memory != nullptr, Result::INVALID_ARGUMENT, "'memoryBindingDescs[%u].memory' is NULL", i); + RETURN_ON_FAILURE(this, srcDesc.texture != nullptr, Result::INVALID_ARGUMENT, "'[%u].texture' is NULL", i); + RETURN_ON_FAILURE(this, srcDesc.memory != nullptr, Result::INVALID_ARGUMENT, "'[%u].memory' is NULL", i); MemoryVal& memory = (MemoryVal&)*srcDesc.memory; TextureVal& texture = (TextureVal&)*srcDesc.texture; - RETURN_ON_FAILURE(this, !texture.IsBoundToMemory(), Result::INVALID_ARGUMENT, "'memoryBindingDescs[%u].texture' is already bound to memory", i); + RETURN_ON_FAILURE(this, !texture.IsBoundToMemory(), Result::INVALID_ARGUMENT, "'[%u].texture' is already bound to memory", i); destDesc = srcDesc; destDesc.memory = memory.GetImpl(); @@ -730,14 +669,14 @@ NRI_INLINE Result DeviceVal::BindTextureMemory(const TextureMemoryBindingDesc* m MemoryDesc memoryDesc = {}; GetCoreInterface().GetTextureMemoryDesc(GetImpl(), texture.GetDesc(), memory.GetMemoryLocation(), memoryDesc); - RETURN_ON_FAILURE(this, !memoryDesc.mustBeDedicated || srcDesc.offset == 0, Result::INVALID_ARGUMENT, "'memoryBindingDescs[%u].offset' must be zero for dedicated allocation", i); - RETURN_ON_FAILURE(this, memoryDesc.alignment != 0, Result::INVALID_ARGUMENT, "'memoryBindingDescs[%u].alignment' is 0", i); - RETURN_ON_FAILURE(this, srcDesc.offset % memoryDesc.alignment == 0, Result::INVALID_ARGUMENT, "'memoryBindingDescs[%u].offset' is misaligned", i); + RETURN_ON_FAILURE(this, !memoryDesc.mustBeDedicated || srcDesc.offset == 0, Result::INVALID_ARGUMENT, "'[%u].offset' must be zero for dedicated allocation", i); + RETURN_ON_FAILURE(this, memoryDesc.alignment != 0, Result::INVALID_ARGUMENT, "'[%u].alignment' is 0", i); + RETURN_ON_FAILURE(this, srcDesc.offset % memoryDesc.alignment == 0, Result::INVALID_ARGUMENT, "'[%u].offset' is misaligned", i); const uint64_t rangeMax = srcDesc.offset + memoryDesc.size; const bool memorySizeIsUnknown = memory.GetSize() == 0; - RETURN_ON_FAILURE(this, memorySizeIsUnknown || rangeMax <= memory.GetSize(), Result::INVALID_ARGUMENT, "'memoryBindingDescs[%u].offset' is invalid", i); + RETURN_ON_FAILURE(this, memorySizeIsUnknown || rangeMax <= memory.GetSize(), Result::INVALID_ARGUMENT, "'[%u].offset' is invalid", i); } Result result = m_CoreAPI.BindTextureMemory(m_Device, memoryBindingDescsImpl, memoryBindingDescNum); @@ -772,8 +711,8 @@ NRI_INLINE FormatSupportBits DeviceVal::GetFormatSupport(Format format) const { #if NRI_USE_VK NRI_INLINE Result DeviceVal::CreateCommandQueue(const CommandQueueVKDesc& commandQueueVKDesc, CommandQueue*& commandQueue) { - RETURN_ON_FAILURE(this, commandQueueVKDesc.vkQueue != 0, Result::INVALID_ARGUMENT, "'commandQueueVKDesc.vkQueue' is NULL"); - RETURN_ON_FAILURE(this, commandQueueVKDesc.commandQueueType < CommandQueueType::MAX_NUM, Result::INVALID_ARGUMENT, "'commandQueueVKDesc.commandQueueType' is invalid"); + RETURN_ON_FAILURE(this, commandQueueVKDesc.vkQueue != 0, Result::INVALID_ARGUMENT, "'vkQueue' is NULL"); + RETURN_ON_FAILURE(this, commandQueueVKDesc.commandQueueType < CommandQueueType::MAX_NUM, Result::INVALID_ARGUMENT, "'commandQueueType' is invalid"); CommandQueue* commandQueueImpl = nullptr; Result result = m_WrapperVKAPI.CreateCommandQueueVK(m_Device, commandQueueVKDesc, commandQueueImpl); @@ -785,8 +724,8 @@ NRI_INLINE Result DeviceVal::CreateCommandQueue(const CommandQueueVKDesc& comman } NRI_INLINE Result DeviceVal::CreateCommandAllocator(const CommandAllocatorVKDesc& commandAllocatorVKDesc, CommandAllocator*& commandAllocator) { - RETURN_ON_FAILURE(this, commandAllocatorVKDesc.vkCommandPool != 0, Result::INVALID_ARGUMENT, "'commandAllocatorVKDesc.vkCommandPool' is NULL"); - RETURN_ON_FAILURE(this, commandAllocatorVKDesc.commandQueueType < CommandQueueType::MAX_NUM, Result::INVALID_ARGUMENT, "'commandAllocatorVKDesc.commandQueueType' is invalid"); + RETURN_ON_FAILURE(this, commandAllocatorVKDesc.vkCommandPool != 0, Result::INVALID_ARGUMENT, "'vkCommandPool' is NULL"); + RETURN_ON_FAILURE(this, commandAllocatorVKDesc.commandQueueType < CommandQueueType::MAX_NUM, Result::INVALID_ARGUMENT, "'commandQueueType' is invalid"); CommandAllocator* commandAllocatorImpl = nullptr; Result result = m_WrapperVKAPI.CreateCommandAllocatorVK(m_Device, commandAllocatorVKDesc, commandAllocatorImpl); @@ -798,8 +737,8 @@ NRI_INLINE Result DeviceVal::CreateCommandAllocator(const CommandAllocatorVKDesc } NRI_INLINE Result DeviceVal::CreateCommandBuffer(const CommandBufferVKDesc& commandBufferVKDesc, CommandBuffer*& commandBuffer) { - RETURN_ON_FAILURE(this, commandBufferVKDesc.vkCommandBuffer != 0, Result::INVALID_ARGUMENT, "'commandBufferVKDesc.vkCommandBuffer' is NULL"); - RETURN_ON_FAILURE(this, commandBufferVKDesc.commandQueueType < CommandQueueType::MAX_NUM, Result::INVALID_ARGUMENT, "'commandBufferVKDesc.commandQueueType' is invalid"); + RETURN_ON_FAILURE(this, commandBufferVKDesc.vkCommandBuffer != 0, Result::INVALID_ARGUMENT, "'vkCommandBuffer' is NULL"); + RETURN_ON_FAILURE(this, commandBufferVKDesc.commandQueueType < CommandQueueType::MAX_NUM, Result::INVALID_ARGUMENT, "'commandQueueType' is invalid"); CommandBuffer* commandBufferImpl = nullptr; Result result = m_WrapperVKAPI.CreateCommandBufferVK(m_Device, commandBufferVKDesc, commandBufferImpl); @@ -824,8 +763,8 @@ NRI_INLINE Result DeviceVal::CreateDescriptorPool(const DescriptorPoolVKDesc& de } NRI_INLINE Result DeviceVal::CreateBuffer(const BufferVKDesc& bufferDesc, Buffer*& buffer) { - RETURN_ON_FAILURE(this, bufferDesc.vkBuffer != 0, Result::INVALID_ARGUMENT, "'bufferDesc.vkBuffer' is NULL"); - RETURN_ON_FAILURE(this, bufferDesc.size > 0, Result::INVALID_ARGUMENT, "'bufferDesc.bufferSize' is 0"); + RETURN_ON_FAILURE(this, bufferDesc.vkBuffer != 0, Result::INVALID_ARGUMENT, "'vkBuffer' is NULL"); + RETURN_ON_FAILURE(this, bufferDesc.size > 0, Result::INVALID_ARGUMENT, "'bufferSize' is 0"); Buffer* bufferImpl = nullptr; Result result = m_WrapperVKAPI.CreateBufferVK(m_Device, bufferDesc, bufferImpl); @@ -837,11 +776,11 @@ NRI_INLINE Result DeviceVal::CreateBuffer(const BufferVKDesc& bufferDesc, Buffer } NRI_INLINE Result DeviceVal::CreateTexture(const TextureVKDesc& textureVKDesc, Texture*& texture) { - RETURN_ON_FAILURE(this, textureVKDesc.vkImage != 0, Result::INVALID_ARGUMENT, "'textureVKDesc.vkImage' is NULL"); - RETURN_ON_FAILURE(this, nriConvertVKFormatToNRI(textureVKDesc.vkFormat) != Format::UNKNOWN, Result::INVALID_ARGUMENT, "'textureVKDesc.sampleNum' is 0"); - RETURN_ON_FAILURE(this, textureVKDesc.sampleNum > 0, Result::INVALID_ARGUMENT, "'textureVKDesc.sampleNum' is 0"); - RETURN_ON_FAILURE(this, textureVKDesc.layerNum > 0, Result::INVALID_ARGUMENT, "'textureVKDesc.layerNum' is 0"); - RETURN_ON_FAILURE(this, textureVKDesc.mipNum > 0, Result::INVALID_ARGUMENT, "'textureVKDesc.mipNum' is 0"); + RETURN_ON_FAILURE(this, textureVKDesc.vkImage != 0, Result::INVALID_ARGUMENT, "'vkImage' is NULL"); + RETURN_ON_FAILURE(this, nriConvertVKFormatToNRI(textureVKDesc.vkFormat) != Format::UNKNOWN, Result::INVALID_ARGUMENT, "'sampleNum' is 0"); + RETURN_ON_FAILURE(this, textureVKDesc.sampleNum > 0, Result::INVALID_ARGUMENT, "'sampleNum' is 0"); + RETURN_ON_FAILURE(this, textureVKDesc.layerNum > 0, Result::INVALID_ARGUMENT, "'layerNum' is 0"); + RETURN_ON_FAILURE(this, textureVKDesc.mipNum > 0, Result::INVALID_ARGUMENT, "'mipNum' is 0"); Texture* textureImpl = nullptr; Result result = m_WrapperVKAPI.CreateTextureVK(m_Device, textureVKDesc, textureImpl); @@ -853,8 +792,8 @@ NRI_INLINE Result DeviceVal::CreateTexture(const TextureVKDesc& textureVKDesc, T } NRI_INLINE Result DeviceVal::CreateMemory(const MemoryVKDesc& memoryVKDesc, Memory*& memory) { - RETURN_ON_FAILURE(this, memoryVKDesc.vkDeviceMemory != 0, Result::INVALID_ARGUMENT, "'memoryVKDesc.vkDeviceMemory' is NULL"); - RETURN_ON_FAILURE(this, memoryVKDesc.size > 0, Result::INVALID_ARGUMENT, "'memoryVKDesc.size' is 0"); + RETURN_ON_FAILURE(this, memoryVKDesc.vkDeviceMemory != 0, Result::INVALID_ARGUMENT, "'vkDeviceMemory' is NULL"); + RETURN_ON_FAILURE(this, memoryVKDesc.size > 0, Result::INVALID_ARGUMENT, "'size' is 0"); Memory* memoryImpl = nullptr; Result result = m_WrapperVKAPI.CreateMemoryVK(m_Device, memoryVKDesc, memoryImpl); @@ -890,7 +829,7 @@ NRI_INLINE Result DeviceVal::CreateComputePipeline(VKNonDispatchableHandle vkPip } NRI_INLINE Result DeviceVal::CreateQueryPool(const QueryPoolVKDesc& queryPoolVKDesc, QueryPool*& queryPool) { - RETURN_ON_FAILURE(this, queryPoolVKDesc.vkQueryPool != 0, Result::INVALID_ARGUMENT, "'queryPoolVKDesc.vkQueryPool' is NULL"); + RETURN_ON_FAILURE(this, queryPoolVKDesc.vkQueryPool != 0, Result::INVALID_ARGUMENT, "'vkQueryPool' is NULL"); QueryPool* queryPoolImpl = nullptr; Result result = m_WrapperVKAPI.CreateQueryPoolVK(m_Device, queryPoolVKDesc, queryPoolImpl); @@ -904,7 +843,7 @@ NRI_INLINE Result DeviceVal::CreateQueryPool(const QueryPoolVKDesc& queryPoolVKD } NRI_INLINE Result DeviceVal::CreateAccelerationStructure(const AccelerationStructureVKDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure) { - RETURN_ON_FAILURE(this, accelerationStructureDesc.vkAccelerationStructure != 0, Result::INVALID_ARGUMENT, "'accelerationStructureDesc.vkAccelerationStructure' is NULL"); + RETURN_ON_FAILURE(this, accelerationStructureDesc.vkAccelerationStructure != 0, Result::INVALID_ARGUMENT, "'vkAccelerationStructure' is NULL"); AccelerationStructure* accelerationStructureImpl = nullptr; Result result = m_WrapperVKAPI.CreateAccelerationStructureVK(m_Device, accelerationStructureDesc, accelerationStructureImpl); @@ -922,7 +861,7 @@ NRI_INLINE Result DeviceVal::CreateAccelerationStructure(const AccelerationStruc #if NRI_USE_D3D11 NRI_INLINE Result DeviceVal::CreateCommandBuffer(const CommandBufferD3D11Desc& commandBufferDesc, CommandBuffer*& commandBuffer) { - RETURN_ON_FAILURE(this, commandBufferDesc.d3d11DeviceContext != nullptr, Result::INVALID_ARGUMENT, "'commandBufferDesc.d3d11DeviceContext' is NULL"); + RETURN_ON_FAILURE(this, commandBufferDesc.d3d11DeviceContext != nullptr, Result::INVALID_ARGUMENT, "'d3d11DeviceContext' is NULL"); CommandBuffer* commandBufferImpl = nullptr; Result result = m_WrapperD3D11API.CreateCommandBufferD3D11(m_Device, commandBufferDesc, commandBufferImpl); @@ -934,7 +873,7 @@ NRI_INLINE Result DeviceVal::CreateCommandBuffer(const CommandBufferD3D11Desc& c } NRI_INLINE Result DeviceVal::CreateBuffer(const BufferD3D11Desc& bufferDesc, Buffer*& buffer) { - RETURN_ON_FAILURE(this, bufferDesc.d3d11Resource != nullptr, Result::INVALID_ARGUMENT, "'bufferDesc.d3d11Resource' is NULL"); + RETURN_ON_FAILURE(this, bufferDesc.d3d11Resource != nullptr, Result::INVALID_ARGUMENT, "'d3d11Resource' is NULL"); Buffer* bufferImpl = nullptr; Result result = m_WrapperD3D11API.CreateBufferD3D11(m_Device, bufferDesc, bufferImpl); @@ -946,7 +885,7 @@ NRI_INLINE Result DeviceVal::CreateBuffer(const BufferD3D11Desc& bufferDesc, Buf } NRI_INLINE Result DeviceVal::CreateTexture(const TextureD3D11Desc& textureDesc, Texture*& texture) { - RETURN_ON_FAILURE(this, textureDesc.d3d11Resource != nullptr, Result::INVALID_ARGUMENT, "'textureDesc.d3d11Resource' is NULL"); + RETURN_ON_FAILURE(this, textureDesc.d3d11Resource != nullptr, Result::INVALID_ARGUMENT, "'d3d11Resource' is NULL"); Texture* textureImpl = nullptr; Result result = m_WrapperD3D11API.CreateTextureD3D11(m_Device, textureDesc, textureImpl); @@ -962,8 +901,8 @@ NRI_INLINE Result DeviceVal::CreateTexture(const TextureD3D11Desc& textureDesc, #if NRI_USE_D3D12 NRI_INLINE Result DeviceVal::CreateCommandBuffer(const CommandBufferD3D12Desc& commandBufferDesc, CommandBuffer*& commandBuffer) { - RETURN_ON_FAILURE(this, commandBufferDesc.d3d12CommandAllocator != nullptr, Result::INVALID_ARGUMENT, "'commandBufferDesc.d3d12CommandAllocator' is NULL"); - RETURN_ON_FAILURE(this, commandBufferDesc.d3d12CommandList != nullptr, Result::INVALID_ARGUMENT, "'commandBufferDesc.d3d12CommandList' is NULL"); + RETURN_ON_FAILURE(this, commandBufferDesc.d3d12CommandAllocator != nullptr, Result::INVALID_ARGUMENT, "'d3d12CommandAllocator' is NULL"); + RETURN_ON_FAILURE(this, commandBufferDesc.d3d12CommandList != nullptr, Result::INVALID_ARGUMENT, "'d3d12CommandList' is NULL"); CommandBuffer* commandBufferImpl = nullptr; Result result = m_WrapperD3D12API.CreateCommandBufferD3D12(m_Device, commandBufferDesc, commandBufferImpl); @@ -975,8 +914,7 @@ NRI_INLINE Result DeviceVal::CreateCommandBuffer(const CommandBufferD3D12Desc& c } NRI_INLINE Result DeviceVal::CreateDescriptorPool(const DescriptorPoolD3D12Desc& descriptorPoolD3D12Desc, DescriptorPool*& descriptorPool) { - RETURN_ON_FAILURE(this, descriptorPoolD3D12Desc.d3d12ResourceDescriptorHeap || descriptorPoolD3D12Desc.d3d12SamplerDescriptorHeap, - Result::INVALID_ARGUMENT, "'descriptorPoolD3D12Desc.d3d12ResourceDescriptorHeap' and 'descriptorPoolD3D12Desc.d3d12ResourceDescriptorHeap' are both NULL"); + RETURN_ON_FAILURE(this, descriptorPoolD3D12Desc.d3d12ResourceDescriptorHeap || descriptorPoolD3D12Desc.d3d12SamplerDescriptorHeap, Result::INVALID_ARGUMENT, "'d3d12ResourceDescriptorHeap' and 'd3d12ResourceDescriptorHeap' are both NULL"); DescriptorPool* descriptorPoolImpl = nullptr; Result result = m_WrapperD3D12API.CreateDescriptorPoolD3D12(m_Device, descriptorPoolD3D12Desc, descriptorPoolImpl); @@ -988,7 +926,7 @@ NRI_INLINE Result DeviceVal::CreateDescriptorPool(const DescriptorPoolD3D12Desc& } NRI_INLINE Result DeviceVal::CreateBuffer(const BufferD3D12Desc& bufferDesc, Buffer*& buffer) { - RETURN_ON_FAILURE(this, bufferDesc.d3d12Resource != nullptr, Result::INVALID_ARGUMENT, "'bufferDesc.d3d12Resource' is NULL"); + RETURN_ON_FAILURE(this, bufferDesc.d3d12Resource != nullptr, Result::INVALID_ARGUMENT, "'d3d12Resource' is NULL"); Buffer* bufferImpl = nullptr; Result result = m_WrapperD3D12API.CreateBufferD3D12(m_Device, bufferDesc, bufferImpl); @@ -1000,7 +938,7 @@ NRI_INLINE Result DeviceVal::CreateBuffer(const BufferD3D12Desc& bufferDesc, Buf } NRI_INLINE Result DeviceVal::CreateTexture(const TextureD3D12Desc& textureDesc, Texture*& texture) { - RETURN_ON_FAILURE(this, textureDesc.d3d12Resource != nullptr, Result::INVALID_ARGUMENT, "'textureDesc.d3d12Resource' is NULL"); + RETURN_ON_FAILURE(this, textureDesc.d3d12Resource != nullptr, Result::INVALID_ARGUMENT, "'d3d12Resource' is NULL"); Texture* textureImpl = nullptr; Result result = m_WrapperD3D12API.CreateTextureD3D12(m_Device, textureDesc, textureImpl); @@ -1012,7 +950,7 @@ NRI_INLINE Result DeviceVal::CreateTexture(const TextureD3D12Desc& textureDesc, } NRI_INLINE Result DeviceVal::CreateMemory(const MemoryD3D12Desc& memoryDesc, Memory*& memory) { - RETURN_ON_FAILURE(this, memoryDesc.d3d12Heap != nullptr, Result::INVALID_ARGUMENT, "'memoryDesc.d3d12Heap' is NULL"); + RETURN_ON_FAILURE(this, memoryDesc.d3d12Heap != nullptr, Result::INVALID_ARGUMENT, "'d3d12Heap' is NULL"); Memory* memoryImpl = nullptr; Result result = m_WrapperD3D12API.CreateMemoryD3D12(m_Device, memoryDesc, memoryImpl); @@ -1026,7 +964,7 @@ NRI_INLINE Result DeviceVal::CreateMemory(const MemoryD3D12Desc& memoryDesc, Mem } NRI_INLINE Result DeviceVal::CreateAccelerationStructure(const AccelerationStructureD3D12Desc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure) { - RETURN_ON_FAILURE(this, accelerationStructureDesc.d3d12Resource != nullptr, Result::INVALID_ARGUMENT, "'accelerationStructureDesc.d3d12Resource' is NULL"); + RETURN_ON_FAILURE(this, accelerationStructureDesc.d3d12Resource != nullptr, Result::INVALID_ARGUMENT, "'d3d12Resource' is NULL"); AccelerationStructure* accelerationStructureImpl = nullptr; Result result = m_WrapperD3D12API.CreateAccelerationStructureD3D12(m_Device, accelerationStructureDesc, accelerationStructureImpl); @@ -1041,24 +979,22 @@ NRI_INLINE Result DeviceVal::CreateAccelerationStructure(const AccelerationStruc #endif -NRI_INLINE uint32_t DeviceVal::CalculateAllocationNumber(const ResourceGroupDesc& resourceGroupDesc) const { - RETURN_ON_FAILURE(this, resourceGroupDesc.memoryLocation < MemoryLocation::MAX_NUM, 0, "'resourceGroupDesc.memoryLocation' is invalid"); - RETURN_ON_FAILURE(this, resourceGroupDesc.bufferNum == 0 || resourceGroupDesc.buffers != nullptr, 0, "'resourceGroupDesc.buffers' is NULL"); - RETURN_ON_FAILURE(this, resourceGroupDesc.textureNum == 0 || resourceGroupDesc.textures != nullptr, 0, "'resourceGroupDesc.textures' is NULL"); - - Buffer** buffersImpl = StackAlloc(Buffer*, resourceGroupDesc.bufferNum); +NRI_INLINE uint32_t DeviceVal::CalculateAllocationNumber(const ResourceGroupDesc& resourceGroupDesc) { + RETURN_ON_FAILURE(this, resourceGroupDesc.memoryLocation < MemoryLocation::MAX_NUM, 0, "'memoryLocation' is invalid"); + RETURN_ON_FAILURE(this, resourceGroupDesc.bufferNum == 0 || resourceGroupDesc.buffers != nullptr, 0, "'buffers' is NULL"); + RETURN_ON_FAILURE(this, resourceGroupDesc.textureNum == 0 || resourceGroupDesc.textures != nullptr, 0, "'textures' is NULL"); + Scratch buffersImpl = AllocateScratch(*this, Buffer*, resourceGroupDesc.bufferNum); for (uint32_t i = 0; i < resourceGroupDesc.bufferNum; i++) { - RETURN_ON_FAILURE(this, resourceGroupDesc.buffers[i] != nullptr, 0, "'resourceGroupDesc.buffers[%u]' is NULL", i); + RETURN_ON_FAILURE(this, resourceGroupDesc.buffers[i] != nullptr, 0, "'buffers[%u]' is NULL", i); BufferVal& bufferVal = *(BufferVal*)resourceGroupDesc.buffers[i]; buffersImpl[i] = bufferVal.GetImpl(); } - Texture** texturesImpl = StackAlloc(Texture*, resourceGroupDesc.textureNum); - + Scratch texturesImpl = AllocateScratch(*this, Texture*, resourceGroupDesc.textureNum); for (uint32_t i = 0; i < resourceGroupDesc.textureNum; i++) { - RETURN_ON_FAILURE(this, resourceGroupDesc.textures[i] != nullptr, 0, "'resourceGroupDesc.textures[%u]' is NULL", i); + RETURN_ON_FAILURE(this, resourceGroupDesc.textures[i] != nullptr, 0, "'textures[%u]' is NULL", i); TextureVal& textureVal = *(TextureVal*)resourceGroupDesc.textures[i]; texturesImpl[i] = textureVal.GetImpl(); @@ -1073,23 +1009,21 @@ NRI_INLINE uint32_t DeviceVal::CalculateAllocationNumber(const ResourceGroupDesc NRI_INLINE Result DeviceVal::AllocateAndBindMemory(const ResourceGroupDesc& resourceGroupDesc, Memory** allocations) { RETURN_ON_FAILURE(this, allocations != nullptr, Result::INVALID_ARGUMENT, "'allocations' is NULL"); - RETURN_ON_FAILURE(this, resourceGroupDesc.memoryLocation < MemoryLocation::MAX_NUM, Result::INVALID_ARGUMENT, "'resourceGroupDesc.memoryLocation' is invalid"); - RETURN_ON_FAILURE(this, resourceGroupDesc.bufferNum == 0 || resourceGroupDesc.buffers != nullptr, Result::INVALID_ARGUMENT, "'resourceGroupDesc.buffers' is NULL"); - RETURN_ON_FAILURE(this, resourceGroupDesc.textureNum == 0 || resourceGroupDesc.textures != nullptr, Result::INVALID_ARGUMENT, "'resourceGroupDesc.textures' is NULL"); - - Buffer** buffersImpl = StackAlloc(Buffer*, resourceGroupDesc.bufferNum); + RETURN_ON_FAILURE(this, resourceGroupDesc.memoryLocation < MemoryLocation::MAX_NUM, Result::INVALID_ARGUMENT, "'memoryLocation' is invalid"); + RETURN_ON_FAILURE(this, resourceGroupDesc.bufferNum == 0 || resourceGroupDesc.buffers != nullptr, Result::INVALID_ARGUMENT, "'buffers' is NULL"); + RETURN_ON_FAILURE(this, resourceGroupDesc.textureNum == 0 || resourceGroupDesc.textures != nullptr, Result::INVALID_ARGUMENT, "'textures' is NULL"); + Scratch buffersImpl = AllocateScratch(*this, Buffer*, resourceGroupDesc.bufferNum); for (uint32_t i = 0; i < resourceGroupDesc.bufferNum; i++) { - RETURN_ON_FAILURE(this, resourceGroupDesc.buffers[i] != nullptr, Result::INVALID_ARGUMENT, "'resourceGroupDesc.buffers[%u]' is NULL", i); + RETURN_ON_FAILURE(this, resourceGroupDesc.buffers[i] != nullptr, Result::INVALID_ARGUMENT, "'buffers[%u]' is NULL", i); BufferVal& bufferVal = *(BufferVal*)resourceGroupDesc.buffers[i]; buffersImpl[i] = bufferVal.GetImpl(); } - Texture** texturesImpl = StackAlloc(Texture*, resourceGroupDesc.textureNum); - + Scratch texturesImpl = AllocateScratch(*this, Texture*, resourceGroupDesc.textureNum); for (uint32_t i = 0; i < resourceGroupDesc.textureNum; i++) { - RETURN_ON_FAILURE(this, resourceGroupDesc.textures[i] != nullptr, Result::INVALID_ARGUMENT, "'resourceGroupDesc.textures[%u]' is NULL", i); + RETURN_ON_FAILURE(this, resourceGroupDesc.textures[i] != nullptr, Result::INVALID_ARGUMENT, "'textures[%u]' is NULL", i); TextureVal& textureVal = *(TextureVal*)resourceGroupDesc.textures[i]; texturesImpl[i] = textureVal.GetImpl(); @@ -1126,21 +1060,20 @@ NRI_INLINE Result DeviceVal::QueryVideoMemoryInfo(MemoryLocation memoryLocation, } NRI_INLINE Result DeviceVal::CreatePipeline(const RayTracingPipelineDesc& pipelineDesc, Pipeline*& pipeline) { - RETURN_ON_FAILURE(this, pipelineDesc.pipelineLayout != nullptr, Result::INVALID_ARGUMENT, "'pipelineDesc.pipelineLayout' is NULL"); - RETURN_ON_FAILURE(this, pipelineDesc.shaderLibrary != nullptr, Result::INVALID_ARGUMENT, "'pipelineDesc.shaderLibrary' is NULL"); - RETURN_ON_FAILURE(this, pipelineDesc.shaderGroupDescs != nullptr, Result::INVALID_ARGUMENT, "'pipelineDesc.shaderGroupDescs' is NULL"); - RETURN_ON_FAILURE(this, pipelineDesc.shaderGroupDescNum != 0, Result::INVALID_ARGUMENT, "'pipelineDesc.shaderGroupDescNum' is 0"); - RETURN_ON_FAILURE(this, pipelineDesc.recursionDepthMax != 0, Result::INVALID_ARGUMENT, "'pipelineDesc.recursionDepthMax' is 0"); + RETURN_ON_FAILURE(this, pipelineDesc.pipelineLayout != nullptr, Result::INVALID_ARGUMENT, "'pipelineLayout' is NULL"); + RETURN_ON_FAILURE(this, pipelineDesc.shaderLibrary != nullptr, Result::INVALID_ARGUMENT, "'shaderLibrary' is NULL"); + RETURN_ON_FAILURE(this, pipelineDesc.shaderGroupDescs != nullptr, Result::INVALID_ARGUMENT, "'shaderGroupDescs' is NULL"); + RETURN_ON_FAILURE(this, pipelineDesc.shaderGroupDescNum != 0, Result::INVALID_ARGUMENT, "'shaderGroupDescNum' is 0"); + RETURN_ON_FAILURE(this, pipelineDesc.recursionDepthMax != 0, Result::INVALID_ARGUMENT, "'recursionDepthMax' is 0"); uint32_t uniqueShaderStages = 0; for (uint32_t i = 0; i < pipelineDesc.shaderLibrary->shaderNum; i++) { const ShaderDesc& shaderDesc = pipelineDesc.shaderLibrary->shaders[i]; - RETURN_ON_FAILURE(this, shaderDesc.bytecode != nullptr, Result::INVALID_ARGUMENT, "'pipelineDesc.shaderLibrary->shaders[%u].bytecode' is invalid", i); - - RETURN_ON_FAILURE(this, shaderDesc.size != 0, Result::INVALID_ARGUMENT, "'pipelineDesc.shaderLibrary->shaders[%u].size' is 0", i); + RETURN_ON_FAILURE(this, shaderDesc.bytecode != nullptr, Result::INVALID_ARGUMENT, "'shaderLibrary->shaders[%u].bytecode' is invalid", i); + RETURN_ON_FAILURE(this, shaderDesc.size != 0, Result::INVALID_ARGUMENT, "'shaderLibrary->shaders[%u].size' is 0", i); RETURN_ON_FAILURE(this, IsShaderStageValid(shaderDesc.stage, uniqueShaderStages, StageBits::RAY_TRACING_SHADERS), Result::INVALID_ARGUMENT, - "'pipelineDesc.shaderLibrary->shaders[%u].stage' must include only 1 ray tracing shader stage, unique for the entire pipeline", i); + "'shaderLibrary->shaders[%u].stage' must include only 1 ray tracing shader stage, unique for the entire pipeline", i); } auto pipelineDescImpl = pipelineDesc; @@ -1156,8 +1089,7 @@ NRI_INLINE Result DeviceVal::CreatePipeline(const RayTracingPipelineDesc& pipeli } NRI_INLINE Result DeviceVal::CreateAccelerationStructure(const AccelerationStructureDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure) { - RETURN_ON_FAILURE(this, accelerationStructureDesc.instanceOrGeometryObjectNum != 0, Result::INVALID_ARGUMENT, - "'accelerationStructureDesc.instanceOrGeometryObjectNum' is 0"); + RETURN_ON_FAILURE(this, accelerationStructureDesc.instanceOrGeometryObjectNum != 0, Result::INVALID_ARGUMENT, "'instanceOrGeometryObjectNum' is 0"); AccelerationStructureDesc accelerationStructureDescImpl = accelerationStructureDesc; @@ -1183,7 +1115,7 @@ NRI_INLINE Result DeviceVal::CreateAccelerationStructure(const AccelerationStruc } NRI_INLINE Result DeviceVal::AllocateAccelerationStructure(const AllocateAccelerationStructureDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure) { - RETURN_ON_FAILURE(this, accelerationStructureDesc.desc.instanceOrGeometryObjectNum != 0, Result::INVALID_ARGUMENT, "'accelerationStructureDesc.instanceOrGeometryObjectNum' is 0"); + RETURN_ON_FAILURE(this, accelerationStructureDesc.desc.instanceOrGeometryObjectNum != 0, Result::INVALID_ARGUMENT, "'instanceOrGeometryObjectNum' is 0"); AllocateAccelerationStructureDesc accelerationStructureDescImpl = accelerationStructureDesc; @@ -1209,9 +1141,9 @@ NRI_INLINE Result DeviceVal::AllocateAccelerationStructure(const AllocateAcceler } NRI_INLINE Result DeviceVal::BindAccelerationStructureMemory(const AccelerationStructureMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum) { - RETURN_ON_FAILURE(this, memoryBindingDescs != nullptr, Result::INVALID_ARGUMENT, "'memoryBindingDescs' is NULL"); + RETURN_ON_FAILURE(this, memoryBindingDescs != nullptr, Result::INVALID_ARGUMENT, "'' is NULL"); - AccelerationStructureMemoryBindingDesc* memoryBindingDescsImpl = StackAlloc(AccelerationStructureMemoryBindingDesc, memoryBindingDescNum); + Scratch memoryBindingDescsImpl = AllocateScratch(*this, AccelerationStructureMemoryBindingDesc, memoryBindingDescNum); for (uint32_t i = 0; i < memoryBindingDescNum; i++) { AccelerationStructureMemoryBindingDesc& destDesc = memoryBindingDescsImpl[i]; const AccelerationStructureMemoryBindingDesc& srcDesc = memoryBindingDescs[i]; @@ -1220,15 +1152,15 @@ NRI_INLINE Result DeviceVal::BindAccelerationStructureMemory(const AccelerationS AccelerationStructureVal& accelerationStructure = (AccelerationStructureVal&)*srcDesc.accelerationStructure; const MemoryDesc& memoryDesc = accelerationStructure.GetMemoryDesc(); - RETURN_ON_FAILURE(this, !accelerationStructure.IsBoundToMemory(), Result::INVALID_ARGUMENT, "'memoryBindingDescs[%u].accelerationStructure' is already bound to memory", i); - RETURN_ON_FAILURE(this, !memoryDesc.mustBeDedicated || srcDesc.offset == 0, Result::INVALID_ARGUMENT, "'memoryBindingDescs[%u].offset' must be 0 for dedicated allocation", i); - RETURN_ON_FAILURE(this, memoryDesc.alignment != 0, Result::INVALID_ARGUMENT, "'memoryBindingDescs[%u].alignment' is 0", i); - RETURN_ON_FAILURE(this, srcDesc.offset % memoryDesc.alignment == 0, Result::INVALID_ARGUMENT, "'memoryBindingDescs[%u].offset' is misaligned", i); + RETURN_ON_FAILURE(this, !accelerationStructure.IsBoundToMemory(), Result::INVALID_ARGUMENT, "'[%u].accelerationStructure' is already bound to memory", i); + RETURN_ON_FAILURE(this, !memoryDesc.mustBeDedicated || srcDesc.offset == 0, Result::INVALID_ARGUMENT, "'[%u].offset' must be 0 for dedicated allocation", i); + RETURN_ON_FAILURE(this, memoryDesc.alignment != 0, Result::INVALID_ARGUMENT, "'[%u].alignment' is 0", i); + RETURN_ON_FAILURE(this, srcDesc.offset % memoryDesc.alignment == 0, Result::INVALID_ARGUMENT, "'[%u].offset' is misaligned", i); const uint64_t rangeMax = srcDesc.offset + memoryDesc.size; const bool memorySizeIsUnknown = memory.GetSize() == 0; - RETURN_ON_FAILURE(this, memorySizeIsUnknown || rangeMax <= memory.GetSize(), Result::INVALID_ARGUMENT, "'memoryBindingDescs[%u].offset' is invalid", i); + RETURN_ON_FAILURE(this, memorySizeIsUnknown || rangeMax <= memory.GetSize(), Result::INVALID_ARGUMENT, "'[%u].offset' is invalid", i); destDesc = srcDesc; destDesc.memory = memory.GetImpl();