From 8dd05d6922b20f4e256405e5a91f8e8a159163f1 Mon Sep 17 00:00:00 2001 From: jacob Date: Mon, 24 Nov 2025 21:44:33 -0600 Subject: [PATCH] keep a constant number of rtv descriptors per command list --- src/base/base.h | 62 +++-- src/gpu/gpu_common.c | 8 +- src/gpu/gpu_common.h | 12 +- src/gpu/gpu_core.h | 250 ++++++++++-------- src/gpu/gpu_dx12/gpu_dx12.c | 496 +++++++++++++++++------------------- src/gpu/gpu_dx12/gpu_dx12.h | 30 ++- src/proto/proto.c | 17 +- src/proto/proto.lay | 1 - src/sprite/sprite.c | 14 +- 9 files changed, 454 insertions(+), 436 deletions(-) diff --git a/src/base/base.h b/src/base/base.h index 16a0756c..35700477 100644 --- a/src/base/base.h +++ b/src/base/base.h @@ -734,59 +734,55 @@ Struct(U128) Struct(PixelShader) { ResourceKey resource; }; Struct(ComputeShader) { ResourceKey resource; }; - //- Pointers + //- Shader object handles - Struct(BufferGpuPtr) { u32 v; }; - Struct(RWBufferGpuPtr) { u32 v; }; - Struct(IndexBufferGpuPtr) { u32 v; }; - Struct(Texture1DGpuPtr) { u32 v; }; - Struct(RWTexture1DGpuPtr) { u32 v; }; - Struct(Texture2DGpuPtr) { u32 v; }; - Struct(RWTexture2DGpuPtr) { u32 v; }; - Struct(Texture3DGpuPtr) { u32 v; }; - Struct(RWTexture3DGpuPtr) { u32 v; }; - Struct(RenderTargetGpuPtr) { u32 v; }; - Struct(SamplerGpuPtr) { u32 v; }; + Struct(StructuredBufferHandle) { u32 v; }; + Struct(RWStructuredBufferHandle) { u32 v; }; + Struct(Texture1DHandle) { u32 v; }; + Struct(RWTexture1DHandle) { u32 v; }; + Struct(Texture2DHandle) { u32 v; }; + Struct(RWTexture2DHandle) { u32 v; }; + Struct(Texture3DHandle) { u32 v; }; + Struct(RWTexture3DHandle) { u32 v; }; + Struct(SamplerHandle) { u32 v; }; - #define IsGpuPtrNil(p) ((p).v == 0) + #define IsGpuHandleNil(h) ((h).v == 0) #elif IsLanguageGpu //- Shader declaration #define ComputeShader(name, x) [numthreads(x, 1, 1)] void name(Semantic(u32, SV_DispatchThreadID)) #define ComputeShader2D(name, x, y) [numthreads(x, y, 1)] void name(Semantic(Vec2U32, SV_DispatchThreadID)) #define ComputeShader3D(name, x, y, z) [numthreads(x, y, z)] void name(Semantic(Vec3U32, SV_DispatchThreadID)) - #define VertexShader(name, return_type) return_type name(Semantic(u32, SV_VertexID), Semantic(u32, SV_InstanceID)) + #define VertexShader(name, return_type) return_type name(Semantic(u32, SV_InstanceID), Semantic(u32, SV_VertexID)) #define PixelShader(name, return_type, ...) return_type name(__VA_ARGS__) //- Semantic declaration # define Semantic(t, n) t n : n - //- Pointers + //- Shader object handles - typedef BufferGpuPtr u32; - typedef RWBufferGpuPtr u32; - typedef IndexBufferGpuPtr u32; - typedef Texture1DGpuPtr u32; - typedef RWTexture1DGpuPtr u32; - typedef Texture2DGpuPtr u32; - typedef RWTexture2DGpuPtr u32; - typedef Texture3DGpuPtr u32; - typedef RWTexture3DGpuPtr u32; - typedef RenderTargetGpuPtr u32; - typedef SamplerGpuPtr u32; + typedef StructuredBufferHandle u32; + typedef RWStructuredBufferHandle u32; + typedef Texture1DHandle u32; + typedef RWTexture1DHandle u32; + typedef Texture2DHandle u32; + typedef RWTexture2DHandle u32; + typedef Texture3DHandle u32; + typedef RWTexture3DHandle u32; + typedef SamplerHandle u32; - #define IsGpuPtrNil(p) ((p) == 0) + #define IsGpuHandleNil(h) ((h) == 0) //- Pointer dereference - #define DerefUniformBuffer(p) ResourceDescriptorHeap[p] - #define DerefUniformTexture(p) ResourceDescriptorHeap[p] - #define DerefUniformSampler(p) SamplerDescriptorHeap[p] + #define StructuredBufferFromUniformHandle(h) ResourceDescriptorHeap[h] + #define TextureFromUniformHandle(h) ResourceDescriptorHeap[h] + #define SamplerFromUniformHandle(h) SamplerDescriptorHeap[h] - #define DerefNonUniformBuffer(p) ResourceDescriptorHeap[NonUniformResourceIndex(p)] - #define DerefNonUniformTexture(p) ResourceDescriptorHeap[NonUniformResourceIndex(p)] - #define DerefNonUniformSampler(p) SamplerDescriptorHeap[NonUniformResourceIndex(p)] + #define StructuredBufferFromNonUniformHandle(h) ResourceDescriptorHeap[NonUniformResourceIndex(h)] + #define TextureFromNonUniformHandle(h) ResourceDescriptorHeap[NonUniformResourceIndex(h)] + #define SamplerFromNonUniformHandle(h) SamplerDescriptorHeap[NonUniformResourceIndex(h)] #endif //////////////////////////////////////////////////////////// diff --git a/src/gpu/gpu_common.c b/src/gpu/gpu_common.c index 5e72cb9f..f3cc718e 100644 --- a/src/gpu/gpu_common.c +++ b/src/gpu/gpu_common.c @@ -53,7 +53,7 @@ GPU_ArenaHandle GPU_PermArena(void) { i16 fiber_id = FiberId(); GPU_ArenaHandle perm = GPU_shared_util_state.perm_arenas[fiber_id]; - if (IsGpuPtrNil(perm)) + if (GPU_IsArenaNil(perm)) { GPU_shared_util_state.perm_arenas[fiber_id] = GPU_AcquireArena(); perm = GPU_shared_util_state.perm_arenas[fiber_id]; @@ -72,17 +72,17 @@ void GPU_CopyResourceFromCpu(GPU_CommandListHandle cl, GPU_ResourceHandle dst, S //////////////////////////////////////////////////////////// //~ Common resource helpers -SamplerGpuPtr GPU_GetCommonPointSampler(void) +SamplerHandle GPU_GetCommonPointSampler(void) { return GPU_shared_util_state.pt_sampler; } -IndexBufferGpuPtr GPU_GetCommonQuadIndices(void) +GPU_IndexBufferDesc GPU_GetCommonQuadIndices(void) { return GPU_shared_util_state.quad_indices; } -Texture3DGpuPtr GPU_GetCommonNoise(void) +Texture3DHandle GPU_GetCommonNoise(void) { return GPU_shared_util_state.noise_tex; } diff --git a/src/gpu/gpu_common.h b/src/gpu/gpu_common.h index 15e37dd7..6644ee5d 100644 --- a/src/gpu/gpu_common.h +++ b/src/gpu/gpu_common.h @@ -4,9 +4,9 @@ Struct(GPU_SharedUtilState) { /* Common shared resources */ - SamplerGpuPtr pt_sampler; - IndexBufferGpuPtr quad_indices; - Texture3DGpuPtr noise_tex; + SamplerHandle pt_sampler; + GPU_IndexBufferDesc quad_indices; + Texture3DHandle noise_tex; GPU_ArenaHandle perm_arenas[MaxFibers]; } extern GPU_shared_util_state; @@ -29,6 +29,6 @@ void GPU_CopyResourceFromCpu(GPU_CommandListHandle cl, GPU_ResourceHandle dst, S //////////////////////////////////////////////////////////// //~ Common resource helpers -SamplerGpuPtr GPU_GetCommonPointSampler(void); -IndexBufferGpuPtr GPU_GetCommonQuadIndices(void); -Texture3DGpuPtr GPU_GetCommonNoise(void); +SamplerHandle GPU_GetCommonPointSampler(void); +GPU_IndexBufferDesc GPU_GetCommonQuadIndices(void); +Texture3DHandle GPU_GetCommonNoise(void); diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index 3e9fc37e..5b853972 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -163,105 +163,112 @@ Enum(GPU_Format) //////////////////////////////////////////////////////////// //~ Barrier types -Enum(GPU_StageFlag) +Enum(GPU_Stage) { - GPU_StageFlag_NoStage = 0, + GPU_Stage_None = 0, /* Compute stages */ - GPU_StageFlag_ComputeShading = (1 << 1), + GPU_Stage_ComputeShading = (1 << 1), /* Draw stages */ - GPU_StageFlag_IndexAssembly = (1 << 2), - GPU_StageFlag_VertexShading = (1 << 3), - GPU_StageFlag_PixelShading = (1 << 4), - GPU_StageFlag_DepthStencil = (1 << 5), - GPU_StageFlag_RenderTarget = (1 << 6), + GPU_Stage_IndexAssembly = (1 << 2), + GPU_Stage_VertexShading = (1 << 3), + GPU_Stage_PixelShading = (1 << 4), + GPU_Stage_DepthStencil = (1 << 5), + GPU_Stage_RenderTarget = (1 << 6), /* Copy stages */ - GPU_StageFlag_Copy = (1 << 7), + GPU_Stage_Copy = (1 << 7), /* Indirect stages */ - GPU_StageFlag_Indirect = (1 << 8), + GPU_Stage_Indirect = (1 << 8), /* Aggregate stages */ - GPU_StageFlag_AllDrawStages = GPU_StageFlag_IndexAssembly | - GPU_StageFlag_VertexShading | - GPU_StageFlag_PixelShading | - GPU_StageFlag_DepthStencil | - GPU_StageFlag_RenderTarget, + GPU_Stage_AllDraw = GPU_Stage_IndexAssembly | + GPU_Stage_VertexShading | + GPU_Stage_PixelShading | + GPU_Stage_DepthStencil | + GPU_Stage_RenderTarget, - GPU_StageFlag_AllShadingStages = GPU_StageFlag_ComputeShading | - GPU_StageFlag_VertexShading | - GPU_StageFlag_PixelShading, + GPU_Stage_AllShading = GPU_Stage_ComputeShading | + GPU_Stage_VertexShading | + GPU_Stage_PixelShading, - GPU_StageFlag_AllNonPixelShadingStages = GPU_StageFlag_ComputeShading | - GPU_StageFlag_VertexShading, - - GPU_StageFlag_AllStages = 0xFFFFFFFF + GPU_Stage_All = 0xFFFFFFFF }; -Enum(GPU_AccessFlag) +Enum(GPU_Access) { - GPU_AccessFlag_NoAccess = 0, + GPU_Access_None = 0, - GPU_AccessFlag_ShaderReadWrite = (1 << 1), - GPU_AccessFlag_ShaderRead = (1 << 2), + GPU_Access_ShaderReadWrite = (1 << 1), + GPU_Access_ShaderRead = (1 << 2), - GPU_AccessFlag_CopyWrite = (1 << 3), - GPU_AccessFlag_CopyRead = (1 << 4), + GPU_Access_CopyWrite = (1 << 3), + GPU_Access_CopyRead = (1 << 4), - GPU_AccessFlag_IndexBuffer = (1 << 5), - GPU_AccessFlag_IndirectArgument = (1 << 6), + GPU_Access_IndexBuffer = (1 << 5), + GPU_Access_IndirectArgument = (1 << 6), - GPU_AccessFlag_DepthStencilRead = (1 << 7), - GPU_AccessFlag_DepthStencilWrite = (1 << 8), - GPU_AccessFlag_RenderTargetWrite = (1 << 9), + GPU_Access_DepthStencilRead = (1 << 7), + GPU_Access_DepthStencilWrite = (1 << 8), + GPU_Access_RenderTargetWrite = (1 << 9), - GPU_AccessFlag_AllAccess = 0xFFFFFFFF + GPU_Access_All = 0xFFFFFFFF }; -Enum(GPU_LayoutKind) +Enum(GPU_Layout) { - GPU_LayoutKind_NoChange, + GPU_Layout_NoChange, - GPU_LayoutKind_AnyQueue_AnyAccess, /* NOTE: Textures cannot transition to/from this layout. They must be created with it. */ + GPU_Layout_Undefined, /* D3D12_BARRIER_LAYOUT_UNDEFINED */ - GPU_LayoutKind_Undefined, /* D3D12_BARRIER_LAYOUT_UNDEFINED */ - GPU_LayoutKind_Present, /* D3D12_BARRIER_LAYOUT_COMMON */ + /* Allows a resource to be used on any queue with any access type, so long + * as there is only one writer at a time, and the writer is not writing to + * any texels currently being read. + * + * Resources cannot transition to/from this layout. They must be created + * with it and are locked to it. + */ + GPU_Layout_Simultaneous, ////////////////////////////// //- Queue-agnostic - GPU_LayoutKind_AnyQueue_ShaderRead_CopyRead_CopyWrite, /* D3D12_BARRIER_LAYOUT_COMMON */ - GPU_LayoutKind_AnyQueue_ShaderReadWrite, /* D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS */ - - GPU_LayoutKind_AnyQueue_ShaderRead_CopyRead, /* D3D12_BARRIER_LAYOUT_GENERIC_READ */ - GPU_LayoutKind_AnyQueue_ShaderRead, /* D3D12_BARRIER_LAYOUT_SHADER_RESOURCE */ - GPU_LayoutKind_AnyQueue_CopyRead, /* D3D12_BARRIER_LAYOUT_COPY_SOURCE */ + GPU_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present, /* D3D12_BARRIER_LAYOUT_COMMON */ ////////////////////////////// - //- Direct queue + //- Direct & Compute queue specific - GPU_LayoutKind_DirectQueue_ShaderReadWrite_CopyRead_CopyWrite, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON */ - GPU_LayoutKind_DirectQueue_ShaderReadWrite, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS */ + GPU_Layout_DirectComputeQueue_ShaderRead_CopyRead, /* D3D12_BARRIER_LAYOUT_GENERIC_READ */ - GPU_LayoutKind_DirectQueue_ShaderRead_CopyRead_DepthStencilRead, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ */ - GPU_LayoutKind_DirectQueue_ShaderRead, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE */ - GPU_LayoutKind_DirectQueue_CopyRead, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE */ - - GPU_LayoutKind_DirectQueue_DepthStencilRead_DepthStencilWrite, /* D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE */ - GPU_LayoutKind_DirectQueue_DepthStencilRead, /* D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ */ - GPU_LayoutKind_DirectQueue_RenderTargetWrite, /* D3D12_BARRIER_LAYOUT_RENDER_TARGET */ + GPU_Layout_DirectComputeQueue_ShaderReadWrite, /* D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS */ + GPU_Layout_DirectComputeQueue_ShaderRead, /* D3D12_BARRIER_LAYOUT_SHADER_RESOURCE */ + GPU_Layout_DirectComputeQueue_CopyRead, /* D3D12_BARRIER_LAYOUT_COPY_SOURCE */ ////////////////////////////// - //- Compute queue + //- Direct queue specific - GPU_LayoutKind_ComputeQueue_ShaderReadWrite_CopyRead_CopyWrite, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON */ - GPU_LayoutKind_ComputeQueue_ShaderReadWrite, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS */ + GPU_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON */ + GPU_Layout_DirectQueue_ShaderRead_CopyRead_DepthStencilRead, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ */ - GPU_LayoutKind_ComputeQueue_ShaderRead_CopyRead, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ */ - GPU_LayoutKind_ComputeQueue_ShaderRead, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE */ - GPU_LayoutKind_ComputeQueue_CopyRead, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE */ + GPU_Layout_DirectQueue_ShaderReadWrite, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS */ + GPU_Layout_DirectQueue_ShaderRead, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE */ + GPU_Layout_DirectQueue_CopyRead, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE */ + + GPU_Layout_DirectQueue_DepthStencilRead_DepthStencilWrite, /* D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE */ + GPU_Layout_DirectQueue_DepthStencilRead, /* D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ */ + GPU_Layout_DirectQueue_RenderTargetWrite, /* D3D12_BARRIER_LAYOUT_RENDER_TARGET */ + + ////////////////////////////// + //- Compute queue specific + + GPU_Layout_ComputeQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON */ + GPU_Layout_ComputeQueue_ShaderRead_CopyRead, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ */ + + GPU_Layout_ComputeQueue_ShaderReadWrite, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS */ + GPU_Layout_ComputeQueue_ShaderRead, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE */ + GPU_Layout_ComputeQueue_CopyRead, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE */ }; /* Barrier will execute after previous stages specified by `sync_prev`, and before next stages specified by `sync_next`. @@ -272,11 +279,12 @@ Enum(GPU_LayoutKind) Struct(GPU_BarrierDesc) { GPU_ResourceHandle resource; - GPU_StageFlag sync_prev; - GPU_StageFlag sync_next; - GPU_AccessFlag access_prev; - GPU_AccessFlag access_next; - GPU_LayoutKind layout; + b32 is_global; + GPU_Stage sync_prev; + GPU_Stage sync_next; + GPU_Access access_prev; + GPU_Access access_next; + GPU_Layout layout; }; //////////////////////////////////////////////////////////// @@ -319,7 +327,7 @@ Struct(GPU_TextureDesc) GPU_Format format; Vec3I32 dims; GPU_TextureFlag flags; - GPU_LayoutKind initial_layout; + GPU_Layout initial_layout; i32 mip_levels; /* Will be clamped to range [1, max] */ Vec4 clear_color; }; @@ -432,6 +440,13 @@ Enum(GPU_RasterMode) GPU_RasterMode_WireTriangleStrip, }; +Struct(GPU_IndexBufferDesc) +{ + GPU_ResourceHandle resource; + u32 index_size; /* Either 2 for u16 indices, or 4 for u32 indices */ + u32 index_count; +}; + //////////////////////////////////////////////////////////// //~ Synchronization types @@ -480,6 +495,8 @@ void GPU_Startup(void); GPU_ArenaHandle GPU_AcquireArena(void); void GPU_ReleaseArena(GPU_ArenaHandle arena); +b32 GPU_IsArenaNil(GPU_ArenaHandle handle); + //////////////////////////////////////////////////////////// //~ @hookdecl Resource @@ -489,6 +506,8 @@ GPU_ResourceHandle GPU_PushBufferEx(GPU_ArenaHandle arena, GPU_BufferDesc desc); GPU_ResourceHandle GPU_PushTextureEx(GPU_ArenaHandle arena, GPU_TextureDesc desc); GPU_ResourceHandle GPU_PushSampler(GPU_ArenaHandle arena, GPU_SamplerDesc desc); +b32 GPU_IsResourceNil(GPU_ResourceHandle handle); + #define GPU_PushBuffer(arena, type, count, ...) GPU_PushBufferEx((arena), \ (GPU_BufferDesc) { \ .size = sizeof(type) * (count), \ @@ -526,23 +545,20 @@ GPU_ResourceHandle GPU_PushSampler(GPU_ArenaHandle arena, GPU_SamplerDesc desc); } \ ) -//- Pointer creation +//- Shader handle creation -BufferGpuPtr GPU_PushBufferPtrEx (GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range); -RWBufferGpuPtr GPU_PushRWBufferPtrEx (GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range); -IndexBufferGpuPtr GPU_PushIndexBufferPtrEx (GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range); -Texture1DGpuPtr GPU_PushTexture1DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); -RWTexture1DGpuPtr GPU_PushRWTexture1DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); -Texture2DGpuPtr GPU_PushTexture2DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); -RWTexture2DGpuPtr GPU_PushRWTexture2DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); -Texture3DGpuPtr GPU_PushTexture3DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); -RWTexture3DGpuPtr GPU_PushRWTexture3DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); -RenderTargetGpuPtr GPU_PushRenderTargetPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); -SamplerGpuPtr GPU_PushSamplerPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); +StructuredBufferHandle GPU_PushStructuredBufferHandleEx (GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range); +RWStructuredBufferHandle GPU_PushRWStructuredBufferHandleEx (GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range); +Texture1DHandle GPU_PushTexture1DHandle (GPU_ArenaHandle arena, GPU_ResourceHandle resource); +RWTexture1DHandle GPU_PushRWTexture1DHandle (GPU_ArenaHandle arena, GPU_ResourceHandle resource); +Texture2DHandle GPU_PushTexture2DHandle (GPU_ArenaHandle arena, GPU_ResourceHandle resource); +RWTexture2DHandle GPU_PushRWTexture2DHandle (GPU_ArenaHandle arena, GPU_ResourceHandle resource); +Texture3DHandle GPU_PushTexture3DHandle (GPU_ArenaHandle arena, GPU_ResourceHandle resource); +RWTexture3DHandle GPU_PushRWTexture3DHandle (GPU_ArenaHandle arena, GPU_ResourceHandle resource); +SamplerHandle GPU_PushSamplerHandle (GPU_ArenaHandle arena, GPU_ResourceHandle resource); -#define GPU_PushBufferPtr(arena, resource, type) GPU_PushBufferPtrEx((arena), (resource), sizeof(type), RNGU32(0, GPU_CountBuffer((resource), type))) -#define GPU_PushRWBufferPtr(arena, resource, type) GPU_PushRWBufferPtrEx((arena), (resource), sizeof(type), RNGU32(0, GPU_CountBuffer((resource), type))) -#define GPU_PushIndexBufferPtr(arena, resource, type) GPU_PushIndexBufferPtrEx((arena), (resource), sizeof(type), RNGU32(0, GPU_CountBuffer((resource), type))) +#define GPU_PushStructuredBufferHandle(arena, resource, type) GPU_PushStructuredBufferHandleEx((arena), (resource), sizeof(type), RNGU32(0, GPU_CountBuffer((resource), type))) +#define GPU_PushRWStructuredBufferHandle(arena, resource, type) GPU_PushRWStructuredBufferHandleEx((arena), (resource), sizeof(type), RNGU32(0, GPU_CountBuffer((resource), type))) //- Count @@ -578,32 +594,58 @@ void GPU_CopyTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 ds //- Constants -void GPU_SetConstU32 (GPU_CommandListHandle cl, i32 slot, u32 v); -void GPU_SetConstF32 (GPU_CommandListHandle cl, i32 slot, f32 v); -void GPU_SetConstBuffer (GPU_CommandListHandle cl, i32 slot, BufferGpuPtr v); -void GPU_SetConstRWBuffer (GPU_CommandListHandle cl, i32 slot, RWBufferGpuPtr v); -void GPU_SetConstTexture1D (GPU_CommandListHandle cl, i32 slot, Texture1DGpuPtr v); -void GPU_SetConstRWTexture1D (GPU_CommandListHandle cl, i32 slot, RWTexture1DGpuPtr v); -void GPU_SetConstTexture2D (GPU_CommandListHandle cl, i32 slot, Texture2DGpuPtr v); -void GPU_SetConstRWTexture2D (GPU_CommandListHandle cl, i32 slot, RWTexture2DGpuPtr v); -void GPU_SetConstTexture3D (GPU_CommandListHandle cl, i32 slot, Texture3DGpuPtr v); -void GPU_SetConstRWTexture3D (GPU_CommandListHandle cl, i32 slot, RWTexture3DGpuPtr v); -void GPU_SetConstSampler (GPU_CommandListHandle cl, i32 slot, SamplerGpuPtr v); +void GPU_SetConstU32 (GPU_CommandListHandle cl, i32 slot, u32 v); +void GPU_SetConstF32 (GPU_CommandListHandle cl, i32 slot, f32 v); +void GPU_SetConstStructuredBuffer (GPU_CommandListHandle cl, i32 slot, StructuredBufferHandle v); +void GPU_SetConstRWStructuredBuffer (GPU_CommandListHandle cl, i32 slot, RWStructuredBufferHandle v); +void GPU_SetConstTexture1D (GPU_CommandListHandle cl, i32 slot, Texture1DHandle v); +void GPU_SetConstRWTexture1D (GPU_CommandListHandle cl, i32 slot, RWTexture1DHandle v); +void GPU_SetConstTexture2D (GPU_CommandListHandle cl, i32 slot, Texture2DHandle v); +void GPU_SetConstRWTexture2D (GPU_CommandListHandle cl, i32 slot, RWTexture2DHandle v); +void GPU_SetConstTexture3D (GPU_CommandListHandle cl, i32 slot, Texture3DHandle v); +void GPU_SetConstRWTexture3D (GPU_CommandListHandle cl, i32 slot, RWTexture3DHandle v); +void GPU_SetConstSampler (GPU_CommandListHandle cl, i32 slot, SamplerHandle v); //- Barrier void GPU_BarrierEx(GPU_CommandListHandle cl, GPU_BarrierDesc desc); -#define GPU_LayoutBarrier(_cl, _resource, _sync_prev, _sync_next, _access_prev, _access_next, _layout) \ + +#define GPU_GlobalBarrier(_cl, _sync_prev, _sync_next, _access_prev, _access_next) \ + GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \ + .is_global = 1, \ + .sync_prev = _sync_prev, \ + .sync_next = _sync_next, \ + .access_prev = _access_prev, \ + .access_next = _access_next, \ + }) + +#define GPU_MemoryBarrier(_cl, _resource, _sync_prev, _sync_next, _access_prev, _access_next) \ GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \ .resource = (_resource), \ - .sync_prev = GPU_StageFlag_##_sync_prev, \ - .sync_next = GPU_StageFlag_##_sync_next, \ - .access_prev = GPU_AccessFlag_##_access_prev, \ - .access_next = GPU_AccessFlag_##_access_next, \ - .layout = GPU_LayoutKind_##_layout, \ + .sync_prev = _sync_prev, \ + .sync_next = _sync_next, \ + .access_prev = _access_prev, \ + .access_next = _access_next, \ }) -#define GPU_Barrier(_cl, _resource, _sync_prev, _sync_next, _access_prev, _access_next) \ - GPU_LayoutBarrier((_cl), (_resource), _sync_prev, _sync_next, _access_prev, _access_next) + +#define GPU_LayoutBarrier(_cl, _resource, _layout, _sync_prev, _sync_next, _access_prev, _access_next) \ + GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \ + .resource = (_resource), \ + .layout = _layout, \ + .sync_prev = _sync_prev, \ + .sync_next = _sync_next, \ + .access_prev = _access_prev, \ + .access_next = _access_next, \ + }) + +#define GPU_DumbGlobalBarrier(_cl) \ + GPU_GlobalBarrier((_cl), GPU_Stage_All, GPU_Stage_All, GPU_Access_All, GPU_Access_All) + +#define GPU_DumbMemoryBarrier(_cl, _resource) \ + GPU_MemoryBarrier((_cl), (_resource), GPU_Stage_All, GPU_Stage_All, GPU_Access_All, GPU_Access_All) + +#define GPU_DumbLayoutBarrier(_cl, _resource, _layout) \ + GPU_LayoutBarrier((_cl), (_resource), (_layout), GPU_Stage_All, GPU_Stage_All, GPU_Access_All, GPU_Access_All) //- Compute @@ -613,14 +655,14 @@ void GPU_Compute(GPU_CommandListHandle cl, ComputeShader cs, Vec3I32 groups); void GPU_Rasterize(GPU_CommandListHandle cl, VertexShader vs, PixelShader ps, - u32 instances_count, IndexBufferGpuPtr idx_buff, - u32 raster_targets_count, RenderTargetGpuPtr *raster_targets, + u32 instances_count, GPU_IndexBufferDesc index_buffer, + u32 render_targets_count, GPU_ResourceHandle *render_targets, Rng3 viewport, Rng2 scissor, GPU_RasterMode mode); //- Clear -void GPU_ClearRenderTarget(GPU_CommandListHandle cl, RenderTargetGpuPtr ptr, Vec4 color); +void GPU_ClearRenderTarget(GPU_CommandListHandle cl, GPU_ResourceHandle render_target, Vec4 color); //- Profile diff --git a/src/gpu/gpu_dx12/gpu_dx12.c b/src/gpu/gpu_dx12/gpu_dx12.c index 1a356f56..f4b8312a 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.c +++ b/src/gpu/gpu_dx12/gpu_dx12.c @@ -183,28 +183,27 @@ void GPU_D12_Startup(void) { Struct(Dx12HeapDesc) { D3D12_DESCRIPTOR_HEAP_TYPE type; D3D12_DESCRIPTOR_HEAP_FLAGS flags; u64 max; }; - Dx12HeapDesc descs[] = { - { + Dx12HeapDesc descs[GPU_D12_DescriptorHeapKind_Count] = { + [GPU_D12_DescriptorHeapKind_CbvSrvUav] = { .type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, - .flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, .max = GPU_D12_MaxCbvSrvUavDescriptors, }, - { + [GPU_D12_DescriptorHeapKind_Rtv] = { .type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV, .flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE, .max = GPU_D12_MaxRtvDescriptors, }, - { + [GPU_D12_DescriptorHeapKind_Sampler] = { .type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, .flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, .max = GPU_D12_MaxSamplerDescriptors, }, }; - for (u32 i = 0; i < countof(descs); ++i) + for (GPU_D12_DescriptorHeapKind kind = 0; kind < countof(descs); ++kind) { - Dx12HeapDesc desc = descs[i]; - GPU_D12_DescriptorHeap *heap = &g->descriptor_heaps[i]; + Dx12HeapDesc desc = descs[kind]; + GPU_D12_DescriptorHeap *heap = &g->descriptor_heaps[kind]; heap->descriptors_arena = AcquireArena(Gibi(1)); heap->type = desc.type; @@ -215,12 +214,31 @@ void GPU_D12_Startup(void) d3d_desc.Type = desc.type; d3d_desc.Flags = desc.flags; d3d_desc.NumDescriptors = desc.max; - HRESULT hr = ID3D12Device_CreateDescriptorHeap(g->device, &d3d_desc, &IID_ID3D12DescriptorHeap, (void **)&heap->d3d_heap); + + HRESULT hr = 1; + + if (SUCCEEDED(hr)) + { + hr = ID3D12Device_CreateDescriptorHeap(g->device, &d3d_desc, &IID_ID3D12DescriptorHeap, (void **)&heap->d3d_heap); + } + + if (SUCCEEDED(hr)) + { + ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap->d3d_heap, &heap->start_handle); + } + + if (SUCCEEDED(hr)) + { + /* Push an empty descriptor at index 0, so that a handle with a value of 0 always represents nil */ + GPU_D12_Arena *gpu_perm = GPU_D12_ArenaFromHandle(GPU_PermArena()); + GPU_D12_Descriptor *nil_descriptor = GPU_D12_PushDescriptor(gpu_perm, kind); + Assert(nil_descriptor->index == 0); + } + if (FAILED(hr)) { Panic(Lit("Failed to create descriptor heap")); } - ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap->d3d_heap, &heap->start_handle); } } @@ -309,78 +327,77 @@ DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format) return (DXGI_FORMAT)format; } -D3D12_BARRIER_SYNC GPU_D12_BarrierSyncFromStageFlags(GPU_StageFlag flags) +D3D12_BARRIER_SYNC GPU_D12_BarrierSyncFromStages(GPU_Stage stages) { D3D12_BARRIER_SYNC result = 0; - if (flags == GPU_StageFlag_AllStages) + if (stages == GPU_Stage_All) { result = D3D12_BARRIER_SYNC_ALL; } else { - result |= D3D12_BARRIER_SYNC_COMPUTE_SHADING * AnyBit(flags, GPU_StageFlag_ComputeShading); - result |= D3D12_BARRIER_SYNC_INDEX_INPUT * AnyBit(flags, GPU_StageFlag_IndexAssembly); - result |= D3D12_BARRIER_SYNC_VERTEX_SHADING * AnyBit(flags, GPU_StageFlag_VertexShading); - result |= D3D12_BARRIER_SYNC_PIXEL_SHADING * AnyBit(flags, GPU_StageFlag_PixelShading); - result |= D3D12_BARRIER_SYNC_DEPTH_STENCIL * AnyBit(flags, GPU_StageFlag_DepthStencil); - result |= D3D12_BARRIER_SYNC_RENDER_TARGET * AnyBit(flags, GPU_StageFlag_RenderTarget); - result |= D3D12_BARRIER_SYNC_COPY * AnyBit(flags, GPU_StageFlag_Copy); - result |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT * AnyBit(flags, GPU_StageFlag_Indirect); + result |= D3D12_BARRIER_SYNC_COMPUTE_SHADING * AnyBit(stages, GPU_Stage_ComputeShading); + result |= D3D12_BARRIER_SYNC_INDEX_INPUT * AnyBit(stages, GPU_Stage_IndexAssembly); + result |= D3D12_BARRIER_SYNC_VERTEX_SHADING * AnyBit(stages, GPU_Stage_VertexShading); + result |= D3D12_BARRIER_SYNC_PIXEL_SHADING * AnyBit(stages, GPU_Stage_PixelShading); + result |= D3D12_BARRIER_SYNC_DEPTH_STENCIL * AnyBit(stages, GPU_Stage_DepthStencil); + result |= D3D12_BARRIER_SYNC_RENDER_TARGET * AnyBit(stages, GPU_Stage_RenderTarget); + result |= D3D12_BARRIER_SYNC_COPY * AnyBit(stages, GPU_Stage_Copy); + result |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT * AnyBit(stages, GPU_Stage_Indirect); } return result; } -D3D12_BARRIER_ACCESS GPU_D12_BarrierAccessFromAccessFlags(GPU_AccessFlag flags) +D3D12_BARRIER_ACCESS GPU_D12_BarrierAccessFromAccesses(GPU_Access accesses) { D3D12_BARRIER_ACCESS result = 0; - if (flags == 0) + if (accesses == 0) { result = D3D12_BARRIER_ACCESS_NO_ACCESS; } - else if (flags == GPU_AccessFlag_AllAccess) + else if (accesses == GPU_Access_All) { result = D3D12_BARRIER_ACCESS_COMMON; } else { - result |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS * AnyBit(flags, GPU_AccessFlag_ShaderReadWrite); - result |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE * AnyBit(flags, GPU_AccessFlag_ShaderRead); - result |= D3D12_BARRIER_ACCESS_COPY_DEST * AnyBit(flags, GPU_AccessFlag_CopyWrite); - result |= D3D12_BARRIER_ACCESS_COPY_SOURCE * AnyBit(flags, GPU_AccessFlag_CopyRead); - result |= D3D12_BARRIER_ACCESS_INDEX_BUFFER * AnyBit(flags, GPU_AccessFlag_IndexBuffer); - result |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT * AnyBit(flags, GPU_AccessFlag_IndirectArgument); - result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ * AnyBit(flags, GPU_AccessFlag_DepthStencilRead); - result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE * AnyBit(flags, GPU_AccessFlag_DepthStencilWrite); - result |= D3D12_BARRIER_ACCESS_RENDER_TARGET * AnyBit(flags, GPU_AccessFlag_RenderTargetWrite); + result |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS * AnyBit(accesses, GPU_Access_ShaderReadWrite); + result |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE * AnyBit(accesses, GPU_Access_ShaderRead); + result |= D3D12_BARRIER_ACCESS_COPY_DEST * AnyBit(accesses, GPU_Access_CopyWrite); + result |= D3D12_BARRIER_ACCESS_COPY_SOURCE * AnyBit(accesses, GPU_Access_CopyRead); + result |= D3D12_BARRIER_ACCESS_INDEX_BUFFER * AnyBit(accesses, GPU_Access_IndexBuffer); + result |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT * AnyBit(accesses, GPU_Access_IndirectArgument); + result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ * AnyBit(accesses, GPU_Access_DepthStencilRead); + result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE * AnyBit(accesses, GPU_Access_DepthStencilWrite); + result |= D3D12_BARRIER_ACCESS_RENDER_TARGET * AnyBit(accesses, GPU_Access_RenderTargetWrite); } return result; } -D3D12_BARRIER_LAYOUT GPU_D12_BarrierLayoutFromLayoutKind(GPU_LayoutKind kind) +D3D12_BARRIER_LAYOUT GPU_D12_BarrierLayoutFromLayout(GPU_Layout layout) { PERSIST Readonly D3D12_BARRIER_LAYOUT translate[] = { - [GPU_LayoutKind_Undefined] = D3D12_BARRIER_LAYOUT_UNDEFINED, - [GPU_LayoutKind_Present] = D3D12_BARRIER_LAYOUT_COMMON, - [GPU_LayoutKind_AnyQueue_ShaderRead_CopyRead_CopyWrite] = D3D12_BARRIER_LAYOUT_COMMON, - [GPU_LayoutKind_AnyQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS, - [GPU_LayoutKind_AnyQueue_ShaderRead_CopyRead] = D3D12_BARRIER_LAYOUT_GENERIC_READ, - [GPU_LayoutKind_AnyQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_SHADER_RESOURCE, - [GPU_LayoutKind_AnyQueue_CopyRead] = D3D12_BARRIER_LAYOUT_COPY_SOURCE, - [GPU_LayoutKind_DirectQueue_ShaderReadWrite_CopyRead_CopyWrite] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON, - [GPU_LayoutKind_DirectQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS, - [GPU_LayoutKind_DirectQueue_ShaderRead_CopyRead_DepthStencilRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ, - [GPU_LayoutKind_DirectQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE, - [GPU_LayoutKind_DirectQueue_CopyRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE, - [GPU_LayoutKind_DirectQueue_DepthStencilRead_DepthStencilWrite] = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, - [GPU_LayoutKind_DirectQueue_DepthStencilRead] = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ, - [GPU_LayoutKind_DirectQueue_RenderTargetWrite] = D3D12_BARRIER_LAYOUT_RENDER_TARGET, - [GPU_LayoutKind_ComputeQueue_ShaderReadWrite_CopyRead_CopyWrite] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON, - [GPU_LayoutKind_ComputeQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS, - [GPU_LayoutKind_ComputeQueue_ShaderRead_CopyRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ, - [GPU_LayoutKind_ComputeQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE, - [GPU_LayoutKind_ComputeQueue_CopyRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE, + [GPU_Layout_Undefined] = D3D12_BARRIER_LAYOUT_UNDEFINED, + [GPU_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present] = D3D12_BARRIER_LAYOUT_COMMON, + [GPU_Layout_DirectComputeQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS, + [GPU_Layout_DirectComputeQueue_ShaderRead_CopyRead] = D3D12_BARRIER_LAYOUT_GENERIC_READ, + [GPU_Layout_DirectComputeQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_SHADER_RESOURCE, + [GPU_Layout_DirectComputeQueue_CopyRead] = D3D12_BARRIER_LAYOUT_COPY_SOURCE, + [GPU_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON, + [GPU_Layout_DirectQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS, + [GPU_Layout_DirectQueue_ShaderRead_CopyRead_DepthStencilRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ, + [GPU_Layout_DirectQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE, + [GPU_Layout_DirectQueue_CopyRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE, + [GPU_Layout_DirectQueue_DepthStencilRead_DepthStencilWrite] = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, + [GPU_Layout_DirectQueue_DepthStencilRead] = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ, + [GPU_Layout_DirectQueue_RenderTargetWrite] = D3D12_BARRIER_LAYOUT_RENDER_TARGET, + [GPU_Layout_ComputeQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON, + [GPU_Layout_ComputeQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS, + [GPU_Layout_ComputeQueue_ShaderRead_CopyRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ, + [GPU_Layout_ComputeQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE, + [GPU_Layout_ComputeQueue_CopyRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE, }; - return translate[kind]; + return translate[layout]; }; //////////////////////////////////////////////////////////// @@ -550,68 +567,6 @@ GPU_D12_Queue *GPU_D12_QueueFromKind(GPU_QueueKind kind) return &g->queues[kind]; } -//////////////////////////////////////////////////////////// -//~ Descriptor - -// GPU_D12_Descriptor *GPU_D12_AcquireDescriptor(GPU_D12_DescriptorHeap *heap) -// { -// GPU_D12_Descriptor *d = 0; -// u32 index = 0; -// D3D12_CPU_DESCRIPTOR_HANDLE handle = ZI; -// { -// Lock lock = LockE(&heap->mutex); -// if (heap->first_free) -// { -// d = heap->first_free; -// heap->first_free = d->next_free; -// handle = d->handle; -// index = d->index; -// } -// else -// { -// if (heap->allocated_count >= heap->max_count) -// { -// Panic(Lit("Max descriptors reached in heap")); -// } -// d = PushStructNoZero(heap->arena, GPU_D12_Descriptor); -// index = heap->allocated_count++; -// handle.ptr = heap->start_handle.ptr + (index * heap->descriptor_size); -// Atomic64FetchAdd(&GPU_D12_shared_state.driver_descriptors_allocated, 1); -// } -// Unlock(&lock); -// } -// ZeroStruct(d); -// d->valid = 1; -// d->heap = heap; -// d->handle = handle; -// d->index = index; -// return d; -// } - -// void GPU_D12_ReleaseDescriptor(GPU_D12_Descriptor *descriptor) -// { -// GPU_D12_DescriptorHeap *heap = descriptor->heap; -// Lock lock = LockE(&heap->mutex); -// { -// descriptor->next_free = heap->first_free; -// heap->first_free = descriptor; -// } -// Unlock(&lock); -// } - -// GPU_D12_Descriptor *GPU_D12_DescriptorFromRtPtr(RenderTargetGpuPtr ptr) -// { -// /* TODO */ -// return 0; -// } - -// D3D12_INDEX_BUFFER_VIEW GPU_D12_IbvFromIbPtr(IndexBufferGpuPtr ptr) -// { -// /* TODO */ -// D3D12_INDEX_BUFFER_VIEW result = ZI; -// return result; -// } - //////////////////////////////////////////////////////////// //~ Raw command list @@ -650,34 +605,50 @@ GPU_D12_RawCommandList *GPU_D12_PrepareRawCommandList(GPU_QueueKind queue_kind) } cl->queue = queue; - HRESULT hr = ID3D12Device_CreateCommandAllocator(g->device, queue->desc.type, &IID_ID3D12CommandAllocator, (void **)&cl->ca); - if (FAILED(hr)) + HRESULT hr = 0; { - Panic(Lit("Failed to create command allocator")); + if (SUCCEEDED(hr)) + { + hr = ID3D12Device_CreateCommandAllocator(g->device, queue->desc.type, &IID_ID3D12CommandAllocator, (void **)&cl->d3d_ca); + } + + if (SUCCEEDED(hr)) + { + hr = ID3D12Device_CreateCommandList(g->device, 0, queue->desc.type, cl->d3d_ca, 0, &IID_ID3D12GraphicsCommandList7, (void **)&cl->d3d_cl); + } + + if (SUCCEEDED(hr)) + { + hr = ID3D12GraphicsCommandList_Close(cl->d3d_cl); + } + + /* Initialize Direct queue CPU-only descriptors */ + if (SUCCEEDED(hr) && queue_kind == GPU_QueueKind_Direct) + { + GPU_D12_Arena *gpu_perm = GPU_D12_ArenaFromHandle(GPU_PermArena()); + for (u32 i = 0; i < countof(cl->rtv_descriptors); ++i) + { + cl->rtv_descriptors[i] = GPU_D12_PushDescriptor(gpu_perm, GPU_D12_DescriptorHeapKind_Rtv); + } + cl->rtv_clear_descriptor = GPU_D12_PushDescriptor(gpu_perm, GPU_D12_DescriptorHeapKind_Rtv); + } } - hr = ID3D12Device_CreateCommandList(g->device, 0, queue->desc.type, cl->ca, 0, &IID_ID3D12GraphicsCommandList7, (void **)&cl->cl); if (FAILED(hr)) { Panic(Lit("Failed to create command list")); } - - hr = ID3D12GraphicsCommandList_Close(cl->cl); - if (FAILED(hr)) - { - Panic(Lit("Failed to close command list during initialization")); - } } /* Reset command list */ { - HRESULT hr = ID3D12CommandAllocator_Reset(cl->ca); + HRESULT hr = ID3D12CommandAllocator_Reset(cl->d3d_ca); if (FAILED(hr)) { Panic(Lit("Failed to reset command allocator")); } - hr = ID3D12GraphicsCommandList_Reset(cl->cl, cl->ca, 0); + hr = ID3D12GraphicsCommandList_Reset(cl->d3d_cl, cl->d3d_ca, 0); if (FAILED(hr)) { Panic(Lit("Failed to reset command list")); @@ -694,7 +665,7 @@ void GPU_D12_CommitRawCommandList(GPU_D12_RawCommandList *cl) /* Close */ { __profn("Close DX12 command list"); - HRESULT hr = ID3D12GraphicsCommandList_Close(cl->cl); + HRESULT hr = ID3D12GraphicsCommandList_Close(cl->d3d_cl); if (FAILED(hr)) { /* TODO: Don't panic */ @@ -711,7 +682,7 @@ void GPU_D12_CommitRawCommandList(GPU_D12_RawCommandList *cl) cl->commit_fence_target = target; /* Execute */ - ID3D12CommandQueue_ExecuteCommandLists(queue->d3d_queue, 1, (ID3D12CommandList **)&cl->cl); + ID3D12CommandQueue_ExecuteCommandLists(queue->d3d_queue, 1, (ID3D12CommandList **)&cl->d3d_cl); ID3D12CommandQueue_Signal(queue->d3d_queue, queue->commit_fence, target); /* Append */ @@ -1174,10 +1145,15 @@ void GPU_ReleaseArena(GPU_ArenaHandle arena) /* TODO */ } +b32 GPU_IsArenaNil(GPU_ArenaHandle handle) +{ + return handle.v == 0; +} + //////////////////////////////////////////////////////////// //~ Resource helpers -GPU_D12_Descriptor *GPU_D12_PushDescriptor(GPU_D12_Arena *gpu_arena, GPU_D12_Resource *resource, GPU_D12_DescriptorHeapKind heap_kind) +GPU_D12_Descriptor *GPU_D12_PushDescriptor(GPU_D12_Arena *gpu_arena, GPU_D12_DescriptorHeapKind heap_kind) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_DescriptorHeap *heap = &g->descriptor_heaps[heap_kind]; @@ -1238,7 +1214,6 @@ GPU_D12_Descriptor *GPU_D12_PushDescriptor(GPU_D12_Arena *gpu_arena, GPU_D12_Res /* Initialize descriptor handle */ ZeroStruct(descriptor); descriptor->heap = heap; - descriptor->resource = resource; descriptor->index = index; descriptor->handle.ptr = heap->start_handle.ptr + (index * heap->descriptor_size); @@ -1253,13 +1228,6 @@ GPU_D12_Descriptor *GPU_D12_DescriptorFromIndex(GPU_D12_DescriptorHeapKind heap_ return &descriptors[index]; } -D3D12_INDEX_BUFFER_VIEW GPU_D12_IbvFromIbPtr(IndexBufferGpuPtr ptr) -{ - /* TODO */ - D3D12_INDEX_BUFFER_VIEW result = ZI; - return result; -} - //////////////////////////////////////////////////////////// //~ @hookimpl Resource @@ -1283,81 +1251,65 @@ GPU_ResourceHandle GPU_PushSampler(GPU_ArenaHandle arena, GPU_SamplerDesc desc) return (GPU_ResourceHandle) { 0 }; } +b32 GPU_IsResourceNil(GPU_ResourceHandle handle) +{ + return handle.v == 0; +} + //- Pointer creation -BufferGpuPtr GPU_PushBufferPtrEx(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle, u32 element_size, RngU32 element_range) +StructuredBufferHandle GPU_PushStructuredBufferHandleEx(GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range) { /* TODO */ - return (BufferGpuPtr) { 0 }; + return (StructuredBufferHandle) { 0 }; } -RWBufferGpuPtr GPU_PushRWBufferPtrEx(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle, u32 element_size, RngU32 element_range) +RWStructuredBufferHandle GPU_PushRWStructuredBufferHandleEx(GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range) { /* TODO */ - return (RWBufferGpuPtr) { 0 }; + return (RWStructuredBufferHandle) { 0 }; } -IndexBufferGpuPtr GPU_PushIndexBufferPtrEx(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle, u32 element_size, RngU32 element_range) +Texture1DHandle GPU_PushTexture1DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource) { /* TODO */ - return (IndexBufferGpuPtr) { 0 }; + return (Texture1DHandle) { 0 }; } -Texture1DGpuPtr GPU_PushTexture1DPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) +RWTexture1DHandle GPU_PushRWTexture1DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource) { /* TODO */ - return (Texture1DGpuPtr) { 0 }; + return (RWTexture1DHandle) { 0 }; } -RWTexture1DGpuPtr GPU_PushRWTexture1DPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) +Texture2DHandle GPU_PushTexture2DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource) { /* TODO */ - return (RWTexture1DGpuPtr) { 0 }; + return (Texture2DHandle) { 0 }; } -Texture2DGpuPtr GPU_PushTexture2DPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) +RWTexture2DHandle GPU_PushRWTexture2DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource) { /* TODO */ - return (Texture2DGpuPtr) { 0 }; + return (RWTexture2DHandle) { 0 }; } -RWTexture2DGpuPtr GPU_PushRWTexture2DPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) +Texture3DHandle GPU_PushTexture3DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource) { /* TODO */ - return (RWTexture2DGpuPtr) { 0 }; + return (Texture3DHandle) { 0 }; } -Texture3DGpuPtr GPU_PushTexture3DPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) +RWTexture3DHandle GPU_PushRWTexture3DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource) { /* TODO */ - return (Texture3DGpuPtr) { 0 }; + return (RWTexture3DHandle) { 0 }; } -RWTexture3DGpuPtr GPU_PushRWTexture3DPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) +SamplerHandle GPU_PushSamplerHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource) { /* TODO */ - return (RWTexture3DGpuPtr) { 0 }; -} - -RenderTargetGpuPtr GPU_PushRenderTargetPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) -{ - /* Allocate descriptor */ - GPU_D12_SharedState *g = &GPU_D12_shared_state; - GPU_D12_Arena *arena = GPU_D12_ArenaFromHandle(arena_handle); - GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(resource_handle); - GPU_D12_Descriptor *rtv_descriptor = GPU_D12_PushDescriptor(arena, resource, GPU_D12_DescriptorHeapKind_Rtv); - - /* Initialize descriptor */ - ID3D12Device_CreateRenderTargetView(g->device, resource->d3d_resource, 0, rtv_descriptor->handle); - - /* TODO */ - return (RenderTargetGpuPtr) { .v = rtv_descriptor->index }; -} - -SamplerGpuPtr GPU_PushSamplerPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) -{ - /* TODO */ - return (SamplerGpuPtr) { 0 }; + return (SamplerHandle) { 0 }; } //- Count @@ -1471,8 +1423,8 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu TempArena scratch = BeginScratchNoConflict(); /* Begin dx12 command list */ - GPU_D12_RawCommandList *dx12_cl = GPU_D12_PrepareRawCommandList(queue_kind); - ID3D12GraphicsCommandList7 *rcl = dx12_cl->cl; + GPU_D12_RawCommandList *rcl = GPU_D12_PrepareRawCommandList(queue_kind); + ID3D12GraphicsCommandList7 *d3d_cl = rcl->d3d_cl; /* Pipeline state */ b32 graphics_rootsig_set = 0; @@ -1485,7 +1437,8 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu D3D12_RECT bound_scissor = ZI; D3D_PRIMITIVE_TOPOLOGY bound_primitive_topology = -1; D3D12_INDEX_BUFFER_VIEW bound_ibv = ZI; - D3D12_CPU_DESCRIPTOR_HANDLE bound_raster_targets[GPU_MaxRenderTargets] = ZI; + u64 bound_render_target_uids[GPU_MaxRenderTargets] = ZI; + u64 bound_render_clear_target_uid = 0; /* Flatten command chunks */ u64 cmds_count = 0; @@ -1618,10 +1571,18 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu D3D12_BARRIER_TYPE barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER; /* Translate gpu barrier kind -> d3d barrier fields */ - D3D12_BARRIER_SYNC sync_before = GPU_D12_BarrierSyncFromStageFlags(desc.sync_prev); - D3D12_BARRIER_SYNC sync_after = GPU_D12_BarrierSyncFromStageFlags(desc.sync_next); - D3D12_BARRIER_ACCESS access_before = GPU_D12_BarrierAccessFromAccessFlags(desc.access_prev); - D3D12_BARRIER_ACCESS access_after = GPU_D12_BarrierAccessFromAccessFlags(desc.access_next); + D3D12_BARRIER_SYNC sync_before = GPU_D12_BarrierSyncFromStages(desc.sync_prev); + D3D12_BARRIER_SYNC sync_after = GPU_D12_BarrierSyncFromStages(desc.sync_next); + D3D12_BARRIER_ACCESS access_before = GPU_D12_BarrierAccessFromAccesses(desc.access_prev); + D3D12_BARRIER_ACCESS access_after = GPU_D12_BarrierAccessFromAccesses(desc.access_next); + D3D12_BARRIER_LAYOUT layout_before = D3D12_BARRIER_LAYOUT_UNDEFINED; + D3D12_BARRIER_LAYOUT layout_after = D3D12_BARRIER_LAYOUT_UNDEFINED; + if (desc.layout != GPU_Layout_NoChange) + { + layout_before = resource->texture_layout; + layout_after = GPU_D12_BarrierLayoutFromLayout(desc.layout); + resource->texture_layout = layout_after; + } /* Build barrier */ switch (barrier_type) @@ -1640,26 +1601,15 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu case D3D12_BARRIER_TYPE_TEXTURE: { - D3D12_BARRIER_LAYOUT layout_after = 0; - if (desc.layout == GPU_LayoutKind_NoChange) - { - layout_after = resource->texture_layout; - } - else - { - layout_after = GPU_D12_BarrierLayoutFromLayoutKind(desc.layout); - } - D3D12_TEXTURE_BARRIER *barrier = &texture_barriers[texture_barriers_count++]; barrier->SyncBefore = sync_before; barrier->SyncAfter = sync_after; barrier->AccessBefore = access_before; barrier->AccessAfter = access_after; - barrier->LayoutBefore = resource->texture_layout; + barrier->LayoutBefore = layout_before; barrier->LayoutAfter = layout_after; barrier->pResource = resource->d3d_resource; barrier->Subresources.IndexOrFirstMipLevel = 0xffffffff; - resource->texture_layout = layout_after; } break; case D3D12_BARRIER_TYPE_GLOBAL: @@ -1701,7 +1651,7 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu } if (barrier_groups_count > 0) { - ID3D12GraphicsCommandList7_Barrier(rcl, barrier_groups_count, barrier_groups); + ID3D12GraphicsCommandList7_Barrier(d3d_cl, barrier_groups_count, barrier_groups); } } @@ -1730,7 +1680,7 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu // u64 cpy_len = MinU64(dst_len, src_len); // if (cpy_len > 0) // { - // ID3D12GraphicsCommandList_CopyBufferRegion(rcl, dst->d3d_resource, 0, src->d3d_resource, 0, cpy_len); + // ID3D12GraphicsCommandList_CopyBufferRegion(d3d_cl, dst->d3d_resource, 0, src->d3d_resource, 0, cpy_len); // /* Implicit promotion */ // if (dst->state == D3D12_RESOURCE_STATE_COMMON) dst->state = D3D12_RESOURCE_STATE_COPY_DEST; // if (src->state == D3D12_RESOURCE_STATE_COMMON) src->state = D3D12_RESOURCE_STATE_COPY_SOURCE; @@ -1751,7 +1701,7 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu // src_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; // src_loc.PlacedFootprint = dst_placed_footprint; - // ID3D12GraphicsCommandList_CopyTextureRegion(rcl, &dst_loc, 0, 0, 0, &src_loc, 0); + // ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, 0, 0, 0, &src_loc, 0); // /* Implicit promotion */ // if (dst->state == D3D12_RESOURCE_STATE_COMMON) dst->state = D3D12_RESOURCE_STATE_COPY_DEST; // if (src->state == D3D12_RESOURCE_STATE_COMMON) src->state = D3D12_RESOURCE_STATE_COPY_SOURCE; @@ -1790,26 +1740,26 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu g->descriptor_heaps[GPU_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, g->descriptor_heaps[GPU_D12_DescriptorHeapKind_Sampler].d3d_heap, }; - ID3D12GraphicsCommandList_SetDescriptorHeaps(rcl, countof(heaps), heaps); + ID3D12GraphicsCommandList_SetDescriptorHeaps(d3d_cl, countof(heaps), heaps); descriptor_heaps_set = 1; } /* Bind rootsig */ if (!compute_rootsig_set) { - ID3D12GraphicsCommandList_SetComputeRootSignature(rcl, g->bindless_rootsig); + ID3D12GraphicsCommandList_SetComputeRootSignature(d3d_cl, g->bindless_rootsig); compute_rootsig_set = 1; } /* Bind pipeline */ if (pipeline != bound_pipeline) { - ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->pso); + ID3D12GraphicsCommandList_SetPipelineState(d3d_cl, pipeline->pso); bound_pipeline = pipeline; } /* Dispatch */ - ID3D12GraphicsCommandList_Dispatch(rcl, cmd->compute.groups.x, cmd->compute.groups.y, cmd->compute.groups.z); + ID3D12GraphicsCommandList_Dispatch(d3d_cl, cmd->compute.groups.x, cmd->compute.groups.y, cmd->compute.groups.z); } cmd_idx += 1; @@ -1842,12 +1792,12 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu { pipeline_desc.is_wireframe = 1; } - for (u32 i = 0; i < countof(cmd->rasterize.rtv_descriptors); ++i) + for (u32 i = 0; i < countof(cmd->rasterize.render_targets); ++i) { - GPU_D12_Descriptor *rtv_descriptor = cmd->rasterize.rtv_descriptors[i]; - if (rtv_descriptor != 0) + GPU_D12_Resource *rt = cmd->rasterize.render_targets[i]; + if (rt) { - pipeline_desc.render_target_formats[i] = rtv_descriptor->resource->texture_format; + pipeline_desc.render_target_formats[i] = rt->texture_format; } else { @@ -1857,16 +1807,30 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu pipeline = GPU_D12_PipelineFromDesc(pipeline_desc); } - /* Calculate IBV count */ + /* Create ibv */ u32 indices_count = 0; - D3D12_INDEX_BUFFER_VIEW ibv = cmd->rasterize.ibv; - if (ibv.Format == DXGI_FORMAT_R16_UINT) + D3D12_INDEX_BUFFER_VIEW ibv = ZI; { - indices_count = ibv.SizeInBytes / 2; - } - else if (ibv.Format == DXGI_FORMAT_R32_UINT) - { - indices_count = ibv.SizeInBytes / 4; + { + GPU_IndexBufferDesc desc = cmd->rasterize.index_buffer_desc; + GPU_D12_Resource *index_buffer_resource = GPU_D12_ResourceFromHandle(desc.resource); + ibv.BufferLocation = index_buffer_resource->buffer_gpu_address; + ibv.SizeInBytes = desc.index_size * desc.index_count; + if (desc.index_size == 2) + { + ibv.Format = DXGI_FORMAT_R16_UINT; + indices_count = ibv.SizeInBytes / 2; + } + else if (desc.index_size == 4) + { + ibv.Format = DXGI_FORMAT_R32_UINT; + indices_count = ibv.SizeInBytes / 4; + } + else + { + Assert(0); /* Invalid index size */ + } + } } /* Prepare & dispatch */ @@ -1879,21 +1843,21 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu g->descriptor_heaps[GPU_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, g->descriptor_heaps[GPU_D12_DescriptorHeapKind_Sampler].d3d_heap, }; - ID3D12GraphicsCommandList_SetDescriptorHeaps(rcl, countof(heaps), heaps); + ID3D12GraphicsCommandList_SetDescriptorHeaps(d3d_cl, countof(heaps), heaps); descriptor_heaps_set = 1; } /* Bind rootsig */ if (!graphics_rootsig_set) { - ID3D12GraphicsCommandList_SetGraphicsRootSignature(rcl, g->bindless_rootsig); + ID3D12GraphicsCommandList_SetGraphicsRootSignature(d3d_cl, g->bindless_rootsig); graphics_rootsig_set = 1; } /* Bind pipeline */ if (pipeline != bound_pipeline) { - ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->pso); + ID3D12GraphicsCommandList_SetPipelineState(d3d_cl, pipeline->pso); bound_pipeline = pipeline; } @@ -1903,7 +1867,7 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu // u32 sig_size = cmd->rasterize.sig_size; // void *sig = cmd->rasterize.sig; // u32 num32bit = sig_size / 4; - // ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(rcl, 0, num32bit, sig, 0); + // ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(d3d_cl, 0, num32bit, sig, 0); // } /* Set viewport */ @@ -1921,7 +1885,7 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu if (!MatchStruct(&viewport, &bound_viewport)) { bound_viewport = viewport; - ID3D12GraphicsCommandList_RSSetViewports(rcl, 1, &viewport); + ID3D12GraphicsCommandList_RSSetViewports(d3d_cl, 1, &viewport); } } @@ -1938,7 +1902,7 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu if (!MatchStruct(&scissor, &bound_scissor)) { bound_scissor = scissor; - ID3D12GraphicsCommandList_RSSetScissorRects(rcl, 1, &scissor); + ID3D12GraphicsCommandList_RSSetScissorRects(d3d_cl, 1, &scissor); } } @@ -1958,14 +1922,14 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu } if (topology != bound_primitive_topology) { - ID3D12GraphicsCommandList_IASetPrimitiveTopology(rcl, topology); + ID3D12GraphicsCommandList_IASetPrimitiveTopology(d3d_cl, topology); } } /* Set index buffer */ if (!MatchStruct(&ibv, &bound_ibv)) { - ID3D12GraphicsCommandList_IASetIndexBuffer(rcl, &ibv); + ID3D12GraphicsCommandList_IASetIndexBuffer(d3d_cl, &ibv); bound_ibv = ibv; } @@ -1973,14 +1937,19 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu { b32 om_dirty = 0; u32 rtvs_count = 0; - D3D12_CPU_DESCRIPTOR_HANDLE rtvs[countof(bound_raster_targets)] = ZI; - for (u32 i = 0; i < countof(cmd->rasterize.rtv_descriptors); ++i) + for (u32 i = 0; i < countof(cmd->rasterize.render_targets); ++i) { - GPU_D12_Descriptor *rtv_desc = cmd->rasterize.rtv_descriptors[i]; - if (rtv_desc != 0) + GPU_D12_Resource *rt = cmd->rasterize.render_targets[i]; + if (rt) { - om_dirty = om_dirty || (bound_raster_targets[i].ptr != rtv_desc->handle.ptr); - rtvs[rtvs_count++] = rtv_desc->handle; + if (bound_render_target_uids[i] != rt->uid) + { + GPU_D12_Descriptor *rtv_descriptor = rcl->rtv_descriptors[i]; + ID3D12Device_CreateRenderTargetView(g->device, rt->d3d_resource, 0, rtv_descriptor->handle); + bound_render_target_uids[i] = rt->uid; + om_dirty = 1; + } + ++rtvs_count; } else { @@ -1989,13 +1958,17 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu } if (om_dirty) { - CopyStructs(bound_raster_targets, rtvs, rtvs_count); - ID3D12GraphicsCommandList_OMSetRenderTargets(rcl, rtvs_count, rtvs, 0, 0); + D3D12_CPU_DESCRIPTOR_HANDLE rtv_handles[GPU_MaxRenderTargets] = ZI; + for (u32 i = 0; i < rtvs_count; ++i) + { + rtv_handles[i] = rcl->rtv_descriptors[i]->handle; + } + ID3D12GraphicsCommandList_OMSetRenderTargets(d3d_cl, rtvs_count, rtv_handles, 0, 0); } } /* Dispatch */ - ID3D12GraphicsCommandList_DrawIndexedInstanced(rcl, indices_count, cmd->rasterize.instances_count, 0, 0, 0); + ID3D12GraphicsCommandList_DrawIndexedInstanced(d3d_cl, indices_count, cmd->rasterize.instances_count, 0, 0, 0); } cmd_idx += 1; @@ -2005,14 +1978,21 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu case GPU_D12_CmdKind_ClearRtv: { - GPU_D12_Descriptor *descriptor = cmd->clear_rtv.rtv_descriptor; - GPU_D12_Resource *resource = descriptor->resource; + GPU_D12_Resource *rt = cmd->clear_rtv.render_target; f32 clear_color[4] = ZI; - clear_color[0] = cmd->clear_rtv.color.x; - clear_color[1] = cmd->clear_rtv.color.y; - clear_color[2] = cmd->clear_rtv.color.z; - clear_color[3] = cmd->clear_rtv.color.w; - ID3D12GraphicsCommandList_ClearRenderTargetView(rcl, descriptor->handle, clear_color, 0, 0); + { + clear_color[0] = cmd->clear_rtv.color.x; + clear_color[1] = cmd->clear_rtv.color.y; + clear_color[2] = cmd->clear_rtv.color.z; + clear_color[3] = cmd->clear_rtv.color.w; + } + D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle = rcl->rtv_clear_descriptor->handle; + if (bound_render_clear_target_uid != rt->uid) + { + ID3D12Device_CreateRenderTargetView(g->device, rt->d3d_resource, 0, rtv_handle); + bound_render_clear_target_uid = rt->uid; + } + ID3D12GraphicsCommandList_ClearRenderTargetView(d3d_cl, rtv_handle, clear_color, 0, 0); cmd_idx += 1; } break; } @@ -2021,7 +2001,7 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu } /* End dx12 command list */ - GPU_D12_CommitRawCommandList(dx12_cl); + GPU_D12_CommitRawCommandList(rcl); /* Free command list */ { @@ -2139,47 +2119,47 @@ void GPU_SetConstF32(GPU_CommandListHandle cl_handle, i32 slot, f32 v) /* TODO */ } -void GPU_SetConstBuffer(GPU_CommandListHandle cl_handle, i32 slot, BufferGpuPtr v) +void GPU_SetConstStructuredBuffer(GPU_CommandListHandle cl_handle, i32 slot, StructuredBufferHandle v) { /* TODO */ } -void GPU_SetConstRWBuffer(GPU_CommandListHandle cl_handle, i32 slot, RWBufferGpuPtr v) +void GPU_SetConstRWStructuredBuffer(GPU_CommandListHandle cl_handle, i32 slot, RWStructuredBufferHandle v) { /* TODO */ } -void GPU_SetConstTexture1D(GPU_CommandListHandle cl_handle, i32 slot, Texture1DGpuPtr v) +void GPU_SetConstTexture1D(GPU_CommandListHandle cl_handle, i32 slot, Texture1DHandle v) { /* TODO */ } -void GPU_SetConstRWTexture1D(GPU_CommandListHandle cl_handle, i32 slot, RWTexture1DGpuPtr v) +void GPU_SetConstRWTexture1D(GPU_CommandListHandle cl_handle, i32 slot, RWTexture1DHandle v) { /* TODO */ } -void GPU_SetConstTexture2D(GPU_CommandListHandle cl_handle, i32 slot, Texture2DGpuPtr v) +void GPU_SetConstTexture2D(GPU_CommandListHandle cl_handle, i32 slot, Texture2DHandle v) { /* TODO */ } -void GPU_SetConstRWTexture2D(GPU_CommandListHandle cl_handle, i32 slot, RWTexture2DGpuPtr v) +void GPU_SetConstRWTexture2D(GPU_CommandListHandle cl_handle, i32 slot, RWTexture2DHandle v) { /* TODO */ } -void GPU_SetConstTexture3D(GPU_CommandListHandle cl_handle, i32 slot, Texture3DGpuPtr v) +void GPU_SetConstTexture3D(GPU_CommandListHandle cl_handle, i32 slot, Texture3DHandle v) { /* TODO */ } -void GPU_SetConstRWTexture3D(GPU_CommandListHandle cl_handle, i32 slot, RWTexture3DGpuPtr v) +void GPU_SetConstRWTexture3D(GPU_CommandListHandle cl_handle, i32 slot, RWTexture3DHandle v) { /* TODO */ } -void GPU_SetConstSampler(GPU_CommandListHandle cl_handle, i32 slot, SamplerGpuPtr v) +void GPU_SetConstSampler(GPU_CommandListHandle cl_handle, i32 slot, SamplerHandle v) { /* TODO */ } @@ -2209,8 +2189,8 @@ void GPU_Compute(GPU_CommandListHandle cl_handle, ComputeShader cs, Vec3I32 grou void GPU_Rasterize(GPU_CommandListHandle cl_handle, VertexShader vs, PixelShader ps, - u32 instances_count, IndexBufferGpuPtr idx_buff, - u32 raster_targets_count, RenderTargetGpuPtr *raster_targets, + u32 instances_count, GPU_IndexBufferDesc index_buffer, + u32 render_targets_count, GPU_ResourceHandle *render_targets, Rng3 viewport, Rng2 scissor, GPU_RasterMode mode) { @@ -2220,10 +2200,10 @@ void GPU_Rasterize(GPU_CommandListHandle cl_handle, cmd->rasterize.vs = vs; cmd->rasterize.ps = ps; cmd->rasterize.instances_count = instances_count; - cmd->rasterize.ibv = GPU_D12_IbvFromIbPtr(idx_buff); - for (u32 i = 0; i < MinU32(raster_targets_count, GPU_MaxRenderTargets); ++i) + cmd->rasterize.index_buffer_desc = index_buffer; + for (u32 i = 0; i < MinU32(render_targets_count, GPU_MaxRenderTargets); ++i) { - cmd->rasterize.rtv_descriptors[i] = GPU_D12_DescriptorFromIndex(GPU_D12_DescriptorHeapKind_Rtv, raster_targets[i].v); + cmd->rasterize.render_targets[i] = GPU_D12_ResourceFromHandle(render_targets[i]); } cmd->rasterize.viewport = viewport; cmd->rasterize.scissor = scissor; @@ -2232,12 +2212,12 @@ void GPU_Rasterize(GPU_CommandListHandle cl_handle, //- Clear -void GPU_ClearRenderTarget(GPU_CommandListHandle cl_handle, RenderTargetGpuPtr ptr, Vec4 color) +void GPU_ClearRenderTarget(GPU_CommandListHandle cl_handle, GPU_ResourceHandle resource_handle, Vec4 color) { GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); cmd->kind = GPU_D12_CmdKind_ClearRtv; - cmd->clear_rtv.rtv_descriptor = GPU_D12_DescriptorFromIndex(GPU_D12_DescriptorHeapKind_Rtv, ptr.v); + cmd->clear_rtv.render_target = GPU_D12_ResourceFromHandle(resource_handle); cmd->clear_rtv.color = color; } @@ -2486,7 +2466,7 @@ GPU_ResourceHandle GPU_PrepareBackbuffer(GPU_SwapchainHandle swapchain_handle, G } ZeroStruct(backbuffer); backbuffer->d3d_resource = d3d_resource; - backbuffer->uid = Atomic64FetchAdd(&g->next_resource_uid.v, 1); + backbuffer->uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1; backbuffer->is_texture = 1; backbuffer->texture_flags = GPU_TextureFlag_AllowRenderTarget; backbuffer->texture_dims = VEC3I32(size.x, size.y, 1); diff --git a/src/gpu/gpu_dx12/gpu_dx12.h b/src/gpu/gpu_dx12/gpu_dx12.h index 736937e0..8e7bc44a 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.h +++ b/src/gpu/gpu_dx12/gpu_dx12.h @@ -91,8 +91,6 @@ Struct(GPU_D12_Descriptor) GPU_D12_DescriptorHeap *heap; D3D12_CPU_DESCRIPTOR_HANDLE handle; u32 index; - - struct GPU_D12_Resource *resource; }; Struct(GPU_D12_DescriptorList) @@ -168,8 +166,12 @@ Struct(GPU_D12_RawCommandList) u64 commit_fence_target; - ID3D12CommandAllocator *ca; - ID3D12GraphicsCommandList7 *cl; + ID3D12CommandAllocator *d3d_ca; + ID3D12GraphicsCommandList7 *d3d_cl; + + /* Direct queue command lists keep a constant list of CPU-only descriptors */ + GPU_D12_Descriptor *rtv_descriptors[GPU_MaxRenderTargets]; + GPU_D12_Descriptor *rtv_clear_descriptor; }; //////////////////////////////////////////////////////////// @@ -238,8 +240,8 @@ Struct(GPU_D12_Cmd) VertexShader vs; PixelShader ps; u32 instances_count; - D3D12_INDEX_BUFFER_VIEW ibv; - GPU_D12_Descriptor *rtv_descriptors[GPU_MaxRenderTargets]; + GPU_IndexBufferDesc index_buffer_desc; + GPU_D12_Resource *render_targets[GPU_MaxRenderTargets]; Rng3 viewport; Rng2 scissor; GPU_RasterMode mode; @@ -247,7 +249,7 @@ Struct(GPU_D12_Cmd) struct { - GPU_D12_Descriptor *rtv_descriptor; + GPU_D12_Resource *render_target; Vec4 color; } clear_rtv; }; @@ -295,7 +297,10 @@ Struct(GPU_D12_Swapchain) Struct(GPU_D12_SharedState) { - Atomic64Padded next_resource_uid; + Atomic64Padded resource_creation_gen; + + /* Per-fiber permanent arenas */ + GPU_ArenaHandle perm_arenas[MaxFibers]; /* Stats */ Atomic64 driver_resources_allocated; @@ -345,9 +350,9 @@ GPU_D12_Resource *GPU_D12_ResourceFromHandle(GPU_ResourceHandle handle); GPU_D12_Swapchain *GPU_D12_SwapchainFromHandle(GPU_SwapchainHandle handle); DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format); -D3D12_BARRIER_SYNC GPU_D12_BarrierSyncFromStageFlags(GPU_StageFlag flags); -D3D12_BARRIER_ACCESS GPU_D12_BarrierAccessFromAccessFlags(GPU_AccessFlag flags); -D3D12_BARRIER_LAYOUT GPU_D12_BarrierLayoutFromLayoutKind(GPU_LayoutKind kind); +D3D12_BARRIER_SYNC GPU_D12_BarrierSyncFromStages(GPU_Stage stages); +D3D12_BARRIER_ACCESS GPU_D12_BarrierAccessFromAccesses(GPU_Access accesses); +D3D12_BARRIER_LAYOUT GPU_D12_BarrierLayoutFromLayout(GPU_Layout layout); //////////////////////////////////////////////////////////// //~ Pipeline @@ -363,9 +368,8 @@ GPU_D12_Queue *GPU_D12_QueueFromKind(GPU_QueueKind kind); //////////////////////////////////////////////////////////// //~ Resource helpers -GPU_D12_Descriptor *GPU_D12_PushDescriptor(GPU_D12_Arena *gpu_arena, GPU_D12_Resource *resource, GPU_D12_DescriptorHeapKind heap_kind); +GPU_D12_Descriptor *GPU_D12_PushDescriptor(GPU_D12_Arena *gpu_arena, GPU_D12_DescriptorHeapKind heap_kind); GPU_D12_Descriptor *GPU_D12_DescriptorFromIndex(GPU_D12_DescriptorHeapKind heap_kind, u32 index); -D3D12_INDEX_BUFFER_VIEW GPU_D12_IbvFromIbPtr(IndexBufferGpuPtr ptr); //////////////////////////////////////////////////////////// //~ Raw command list diff --git a/src/proto/proto.c b/src/proto/proto.c index b5f7b1d3..4075d421 100644 --- a/src/proto/proto.c +++ b/src/proto/proto.c @@ -31,27 +31,28 @@ JobImpl(PR_RunForever, _sig, _id) /* Draw to backbuffer */ GPU_CommandListHandle cl = GPU_PrepareCommandList(); { - RenderTargetGpuPtr backbuffer_rt = GPU_PushRenderTargetPtr(gpu_frame_arena, backbuffer); /* Prep clear pass */ { GPU_LayoutBarrier(cl, backbuffer, - NoStage, RenderTarget, - NoAccess, RenderTargetWrite, - DirectQueue_RenderTargetWrite); + GPU_Layout_DirectQueue_RenderTargetWrite, + GPU_Stage_None, GPU_Stage_RenderTarget, + GPU_Access_None, GPU_Access_RenderTargetWrite); + // GPU_DumbLayoutBarrier(cl, backbuffer, GPU_Layout_DirectQueue_RenderTargetWrite); } /* Clear pass */ { - GPU_ClearRenderTarget(cl, backbuffer_rt, VEC4(1, 0, 0, 1)); + GPU_ClearRenderTarget(cl, backbuffer, VEC4(1, 0, 0, 1)); } /* Finalize backbuffer layout */ { GPU_LayoutBarrier(cl, backbuffer, - RenderTarget, NoStage, - RenderTargetWrite, NoAccess, - Present); + GPU_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present, + GPU_Stage_RenderTarget, GPU_Stage_None, + GPU_Access_RenderTargetWrite, GPU_Access_None); + // GPU_DumbLayoutBarrier(cl, backbuffer, GPU_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present); } /* Reset */ diff --git a/src/proto/proto.lay b/src/proto/proto.lay index e156329d..81ad5541 100644 --- a/src/proto/proto.lay +++ b/src/proto/proto.lay @@ -4,7 +4,6 @@ @Dep gpu @Dep window -@Dep sprite //- Impl diff --git a/src/sprite/sprite.c b/src/sprite/sprite.c index 83c71ec0..d94080ff 100644 --- a/src/sprite/sprite.c +++ b/src/sprite/sprite.c @@ -27,21 +27,17 @@ JobImpl(SPR_LoadTexture, sig, _) GPU_ResourceHandle gpu_resource = GPU_PushTexture2D(gpu_perm, GPU_Format_R8G8B8A8_Unorm_Srgb, VEC2I32(decoded.width, decoded.height), - GPU_LayoutKind_AnyQueue_ShaderRead_CopyRead_CopyWrite); + GPU_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present); // texture->texture = gpu_tex; texture->width = decoded.width; texture->height = decoded.height; GPU_CommandListHandle cl = GPU_PrepareCommandList(); { - GPU_CopyCpuTexels(cl, - gpu_resource, VEC3I32(0,0,0), - decoded.pixels, VEC3I32(decoded.width, decoded.height, 1), - RNG3I32(VEC3I32(0,0,0), VEC3I32(decoded.width, decoded.height, 1))); - GPU_LayoutBarrier(cl, gpu_resource, - Copy, NoStage, - CopyWrite, NoAccess, - AnyQueue_ShaderRead_CopyRead); + GPU_ReadTexelsFromCpu(cl, + gpu_resource, VEC3I32(0,0,0), + decoded.pixels, VEC3I32(decoded.width, decoded.height, 1), + RNG3I32(VEC3I32(0,0,0), VEC3I32(decoded.width, decoded.height, 1))); } GPU_CommitCommandList(cl, GPU_QueueKind_AsyncCopy); }