From 4e95e44823ffbee27de64ac18ec81b11415ee267 Mon Sep 17 00:00:00 2001 From: jacob Date: Sun, 23 Nov 2025 22:06:45 -0600 Subject: [PATCH] expose barrier before/after sync, access & layout in gpu api --- src/base/base.h | 4 +- src/gpu/gpu_core.h | 186 +++++++++++++---- src/gpu/gpu_dx12/gpu_dx12.c | 384 ++++++++++++++++++++---------------- src/gpu/gpu_dx12/gpu_dx12.h | 39 ++-- src/proto/proto.c | 28 ++- src/sprite/sprite.c | 8 +- 6 files changed, 404 insertions(+), 245 deletions(-) diff --git a/src/base/base.h b/src/base/base.h index 6b8f3a2e..16a0756c 100644 --- a/src/base/base.h +++ b/src/base/base.h @@ -745,7 +745,7 @@ Struct(U128) Struct(RWTexture2DGpuPtr) { u32 v; }; Struct(Texture3DGpuPtr) { u32 v; }; Struct(RWTexture3DGpuPtr) { u32 v; }; - Struct(RasterTargetGpuPtr) { u32 v; }; + Struct(RenderTargetGpuPtr) { u32 v; }; Struct(SamplerGpuPtr) { u32 v; }; #define IsGpuPtrNil(p) ((p).v == 0) @@ -773,7 +773,7 @@ Struct(U128) typedef RWTexture2DGpuPtr u32; typedef Texture3DGpuPtr u32; typedef RWTexture3DGpuPtr u32; - typedef RasterTargetGpuPtr u32; + typedef RenderTargetGpuPtr u32; typedef SamplerGpuPtr u32; #define IsGpuPtrNil(p) ((p) == 0) diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index 605fd732..3e9fc37e 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -161,35 +161,122 @@ Enum(GPU_Format) }; //////////////////////////////////////////////////////////// -//~ Access types +//~ Barrier types -Enum(GPU_AccessKind) +Enum(GPU_StageFlag) { - GPU_AccessKind_None, + GPU_StageFlag_NoStage = 0, - /* Generic */ - GPU_AccessKind_Generic_Read, - // GPU_AccessKind_Generic_ReadWrite, /* NOTE: Textures cannot transition to/from this access to another access kind. They must be created with it. */ + /* Compute stages */ + GPU_StageFlag_ComputeShading = (1 << 1), - /* Copy */ - GPU_AccessKind_Copy_Read, - GPU_AccessKind_Copy_Write, + /* Draw stages */ + GPU_StageFlag_IndexAssembly = (1 << 2), + GPU_StageFlag_VertexShading = (1 << 3), + GPU_StageFlag_PixelShading = (1 << 4), + GPU_StageFlag_DepthStencil = (1 << 5), + GPU_StageFlag_RenderTarget = (1 << 6), - /* Shader read/write */ - GPU_AccessKind_AnyShader_Read, - GPU_AccessKind_AnyShader_ReadWrite, - GPU_AccessKind_ComputeShader_Read, - GPU_AccessKind_ComputeShader_ReadWrite, - GPU_AccessKind_VertexPixelShader_Read, - GPU_AccessKind_VertexPixelShader_ReadWrite, - GPU_AccessKind_VertexShader_Read, - GPU_AccessKind_VertexShader_ReadWrite, - GPU_AccessKind_PixelShader_Read, - GPU_AccessKind_PixelShader_ReadWrite, + /* Copy stages */ + GPU_StageFlag_Copy = (1 << 7), - /* Special */ - GPU_AccessKind_RasterTarget, - GPU_AccessKind_Present, + /* Indirect stages */ + GPU_StageFlag_Indirect = (1 << 8), + + /* Aggregate stages */ + GPU_StageFlag_AllDrawStages = GPU_StageFlag_IndexAssembly | + GPU_StageFlag_VertexShading | + GPU_StageFlag_PixelShading | + GPU_StageFlag_DepthStencil | + GPU_StageFlag_RenderTarget, + + GPU_StageFlag_AllShadingStages = GPU_StageFlag_ComputeShading | + GPU_StageFlag_VertexShading | + GPU_StageFlag_PixelShading, + + GPU_StageFlag_AllNonPixelShadingStages = GPU_StageFlag_ComputeShading | + GPU_StageFlag_VertexShading, + + GPU_StageFlag_AllStages = 0xFFFFFFFF +}; + +Enum(GPU_AccessFlag) +{ + GPU_AccessFlag_NoAccess = 0, + + GPU_AccessFlag_ShaderReadWrite = (1 << 1), + GPU_AccessFlag_ShaderRead = (1 << 2), + + GPU_AccessFlag_CopyWrite = (1 << 3), + GPU_AccessFlag_CopyRead = (1 << 4), + + GPU_AccessFlag_IndexBuffer = (1 << 5), + GPU_AccessFlag_IndirectArgument = (1 << 6), + + GPU_AccessFlag_DepthStencilRead = (1 << 7), + GPU_AccessFlag_DepthStencilWrite = (1 << 8), + GPU_AccessFlag_RenderTargetWrite = (1 << 9), + + GPU_AccessFlag_AllAccess = 0xFFFFFFFF +}; + +Enum(GPU_LayoutKind) +{ + GPU_LayoutKind_NoChange, + + GPU_LayoutKind_AnyQueue_AnyAccess, /* NOTE: Textures cannot transition to/from this layout. They must be created with it. */ + + GPU_LayoutKind_Undefined, /* D3D12_BARRIER_LAYOUT_UNDEFINED */ + GPU_LayoutKind_Present, /* D3D12_BARRIER_LAYOUT_COMMON */ + + ////////////////////////////// + //- Queue-agnostic + + GPU_LayoutKind_AnyQueue_ShaderRead_CopyRead_CopyWrite, /* D3D12_BARRIER_LAYOUT_COMMON */ + GPU_LayoutKind_AnyQueue_ShaderReadWrite, /* D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS */ + + GPU_LayoutKind_AnyQueue_ShaderRead_CopyRead, /* D3D12_BARRIER_LAYOUT_GENERIC_READ */ + GPU_LayoutKind_AnyQueue_ShaderRead, /* D3D12_BARRIER_LAYOUT_SHADER_RESOURCE */ + GPU_LayoutKind_AnyQueue_CopyRead, /* D3D12_BARRIER_LAYOUT_COPY_SOURCE */ + + ////////////////////////////// + //- Direct queue + + GPU_LayoutKind_DirectQueue_ShaderReadWrite_CopyRead_CopyWrite, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON */ + GPU_LayoutKind_DirectQueue_ShaderReadWrite, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS */ + + GPU_LayoutKind_DirectQueue_ShaderRead_CopyRead_DepthStencilRead, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ */ + GPU_LayoutKind_DirectQueue_ShaderRead, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE */ + GPU_LayoutKind_DirectQueue_CopyRead, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE */ + + GPU_LayoutKind_DirectQueue_DepthStencilRead_DepthStencilWrite, /* D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE */ + GPU_LayoutKind_DirectQueue_DepthStencilRead, /* D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ */ + GPU_LayoutKind_DirectQueue_RenderTargetWrite, /* D3D12_BARRIER_LAYOUT_RENDER_TARGET */ + + ////////////////////////////// + //- Compute queue + + GPU_LayoutKind_ComputeQueue_ShaderReadWrite_CopyRead_CopyWrite, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON */ + GPU_LayoutKind_ComputeQueue_ShaderReadWrite, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS */ + + GPU_LayoutKind_ComputeQueue_ShaderRead_CopyRead, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ */ + GPU_LayoutKind_ComputeQueue_ShaderRead, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE */ + GPU_LayoutKind_ComputeQueue_CopyRead, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE */ +}; + +/* Barrier will execute after previous stages specified by `sync_prev`, and before next stages specified by `sync_next`. + * When barrier executes: + * - Necessary resource flushes will occur based on `access_prev` & `access_next` + * - Texture layout will transition based on `layout` (if specified) + */ +Struct(GPU_BarrierDesc) +{ + GPU_ResourceHandle resource; + GPU_StageFlag sync_prev; + GPU_StageFlag sync_next; + GPU_AccessFlag access_prev; + GPU_AccessFlag access_next; + GPU_LayoutKind layout; }; //////////////////////////////////////////////////////////// @@ -210,13 +297,13 @@ Struct(GPU_BufferDesc) //////////////////////////////////////////////////////////// //~ Texture types -#define GPU_MaxRasterTargets 8 +#define GPU_MaxRenderTargets 8 Enum(GPU_TextureFlag) { GPU_TextureFlag_None = 0, GPU_TextureFlag_AllowWritable = (1 << 0), - GPU_TextureFlag_AllowRasterTarget = (1 << 1), + GPU_TextureFlag_AllowRenderTarget = (1 << 1), }; Enum(GPU_TextureKind) @@ -232,7 +319,7 @@ Struct(GPU_TextureDesc) GPU_Format format; Vec3I32 dims; GPU_TextureFlag flags; - GPU_AccessKind initial_access; + GPU_LayoutKind initial_layout; i32 mip_levels; /* Will be clamped to range [1, max] */ Vec4 clear_color; }; @@ -409,32 +496,32 @@ GPU_ResourceHandle GPU_PushSampler(GPU_ArenaHandle arena, GPU_SamplerDesc desc); } \ ) -#define GPU_PushTexture1D(arena, _format, _size, _initial_access) GPU_PushTextureEx((arena), \ +#define GPU_PushTexture1D(arena, _format, _size, _initial_layout) GPU_PushTextureEx((arena), \ (GPU_TextureDesc) { \ .kind = GPU_TextureKind_1D, \ .format = (_format), \ .dims = VEC3I32((_size), 1, 1), \ - .initial_access = (_initial_access), \ + .initial_layout = (_initial_layout), \ __VA_ARGS__ \ } \ ) -#define GPU_PushTexture2D(arena, _format, _size, _initial_access) GPU_PushTextureEx((arena), \ +#define GPU_PushTexture2D(arena, _format, _size, _initial_layout) GPU_PushTextureEx((arena), \ (GPU_TextureDesc) { \ .kind = GPU_TextureKind_2D, \ .format = (_format), \ .dims = VEC3I32((_size).x, (_size).y, 1), \ - .initial_access = (_initial_access), \ + .initial_layout = (_initial_layout), \ __VA_ARGS__ \ } \ ) -#define GPU_PushTexture3D(arena, _format, _size, _initial_access) GPU_PushTextureEx((arena), \ +#define GPU_PushTexture3D(arena, _format, _size, _initial_layout) GPU_PushTextureEx((arena), \ (GPU_TextureDesc) { \ .kind = GPU_TextureKind_3D, \ .format = (_format), \ .dims = (_size), \ - .initial_access = (_initial_access), \ + .initial_layout = (_initial_layout), \ __VA_ARGS__ \ } \ ) @@ -450,7 +537,7 @@ Texture2DGpuPtr GPU_PushTexture2DPtr (GPU_ArenaHandle arena, GPU_Resourc RWTexture2DGpuPtr GPU_PushRWTexture2DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); Texture3DGpuPtr GPU_PushTexture3DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); RWTexture3DGpuPtr GPU_PushRWTexture3DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); -RasterTargetGpuPtr GPU_PushRasterTargetPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); +RenderTargetGpuPtr GPU_PushRenderTargetPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); SamplerGpuPtr GPU_PushSamplerPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); #define GPU_PushBufferPtr(arena, resource, type) GPU_PushBufferPtrEx((arena), (resource), sizeof(type), RNGU32(0, GPU_CountBuffer((resource), type))) @@ -470,22 +557,27 @@ u64 GPU_Count3D(GPU_ResourceHandle texture3d); //~ @hookdecl Command //- Command list + GPU_CommandListHandle GPU_PrepareCommandList(void); void GPU_CommitCommandListEx(GPU_CommandListHandle cl, GPU_QueueKind queue, u64 fence_ops_count, GPU_FenceOp *fence_ops); #define GPU_CommitCommandList(cl, queue) GPU_CommitCommandListEx((cl), (queue), 0, 0) //- Arena + void GPU_ResetArena(GPU_CommandListHandle cl, GPU_ArenaHandle arena); //- Cpu -> Gpu copy -void GPU_CopyCpuBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, void *src, RngU64 src_copy_range); -void GPU_CopyCpuTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range); + +void GPU_CopyCpuBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, void *cpu_src, RngU64 cpu_src_copy_range); +void GPU_CopyCpuTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, void *cpu_src, Vec3I32 cpu_src_dims, Rng3I32 cpu_src_copy_range); //- Gpu <-> Gpu copy + void GPU_CopyBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, GPU_ResourceHandle src, RngU64 src_copy_range); void GPU_CopyTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, GPU_ResourceHandle src, Rng3I32 src_copy_range); //- Constants + void GPU_SetConstU32 (GPU_CommandListHandle cl, i32 slot, u32 v); void GPU_SetConstF32 (GPU_CommandListHandle cl, i32 slot, f32 v); void GPU_SetConstBuffer (GPU_CommandListHandle cl, i32 slot, BufferGpuPtr v); @@ -498,24 +590,40 @@ void GPU_SetConstTexture3D (GPU_CommandListHandle cl, i32 slot, Texture3DGpu void GPU_SetConstRWTexture3D (GPU_CommandListHandle cl, i32 slot, RWTexture3DGpuPtr v); void GPU_SetConstSampler (GPU_CommandListHandle cl, i32 slot, SamplerGpuPtr v); -//- Access -void GPU_SyncAccess(GPU_CommandListHandle cl, GPU_ResourceHandle resource, GPU_AccessKind kind); +//- Barrier + +void GPU_BarrierEx(GPU_CommandListHandle cl, GPU_BarrierDesc desc); +#define GPU_LayoutBarrier(_cl, _resource, _sync_prev, _sync_next, _access_prev, _access_next, _layout) \ + GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \ + .resource = (_resource), \ + .sync_prev = GPU_StageFlag_##_sync_prev, \ + .sync_next = GPU_StageFlag_##_sync_next, \ + .access_prev = GPU_AccessFlag_##_access_prev, \ + .access_next = GPU_AccessFlag_##_access_next, \ + .layout = GPU_LayoutKind_##_layout, \ + }) +#define GPU_Barrier(_cl, _resource, _sync_prev, _sync_next, _access_prev, _access_next) \ + GPU_LayoutBarrier((_cl), (_resource), _sync_prev, _sync_next, _access_prev, _access_next) //- Compute + void GPU_Compute(GPU_CommandListHandle cl, ComputeShader cs, Vec3I32 groups); //- Rasterize + void GPU_Rasterize(GPU_CommandListHandle cl, VertexShader vs, PixelShader ps, u32 instances_count, IndexBufferGpuPtr idx_buff, - u32 raster_targets_count, RasterTargetGpuPtr *raster_targets, + u32 raster_targets_count, RenderTargetGpuPtr *raster_targets, Rng3 viewport, Rng2 scissor, GPU_RasterMode mode); //- Clear -void GPU_ClearRasterTarget(GPU_CommandListHandle cl, RasterTargetGpuPtr ptr, Vec4 color); + +void GPU_ClearRenderTarget(GPU_CommandListHandle cl, RenderTargetGpuPtr ptr, Vec4 color); //- Profile + void GPU_ProfN(GPU_CommandListHandle cl, String name); //////////////////////////////////////////////////////////// diff --git a/src/gpu/gpu_dx12/gpu_dx12.c b/src/gpu/gpu_dx12/gpu_dx12.c index bd8a3d1a..1a356f56 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.c +++ b/src/gpu/gpu_dx12/gpu_dx12.c @@ -284,11 +284,6 @@ void GPU_D12_Startup(void) //////////////////////////////////////////////////////////// //~ Helpers -DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format) -{ - return (DXGI_FORMAT)format; -} - GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle) { return (GPU_D12_Arena *)handle.v; @@ -309,6 +304,85 @@ GPU_D12_Swapchain *GPU_D12_SwapchainFromHandle(GPU_SwapchainHandle handle) return (GPU_D12_Swapchain *)handle.v; } +DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format) +{ + return (DXGI_FORMAT)format; +} + +D3D12_BARRIER_SYNC GPU_D12_BarrierSyncFromStageFlags(GPU_StageFlag flags) +{ + D3D12_BARRIER_SYNC result = 0; + if (flags == GPU_StageFlag_AllStages) + { + result = D3D12_BARRIER_SYNC_ALL; + } + else + { + result |= D3D12_BARRIER_SYNC_COMPUTE_SHADING * AnyBit(flags, GPU_StageFlag_ComputeShading); + result |= D3D12_BARRIER_SYNC_INDEX_INPUT * AnyBit(flags, GPU_StageFlag_IndexAssembly); + result |= D3D12_BARRIER_SYNC_VERTEX_SHADING * AnyBit(flags, GPU_StageFlag_VertexShading); + result |= D3D12_BARRIER_SYNC_PIXEL_SHADING * AnyBit(flags, GPU_StageFlag_PixelShading); + result |= D3D12_BARRIER_SYNC_DEPTH_STENCIL * AnyBit(flags, GPU_StageFlag_DepthStencil); + result |= D3D12_BARRIER_SYNC_RENDER_TARGET * AnyBit(flags, GPU_StageFlag_RenderTarget); + result |= D3D12_BARRIER_SYNC_COPY * AnyBit(flags, GPU_StageFlag_Copy); + result |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT * AnyBit(flags, GPU_StageFlag_Indirect); + } + return result; +} + +D3D12_BARRIER_ACCESS GPU_D12_BarrierAccessFromAccessFlags(GPU_AccessFlag flags) +{ + D3D12_BARRIER_ACCESS result = 0; + if (flags == 0) + { + result = D3D12_BARRIER_ACCESS_NO_ACCESS; + } + else if (flags == GPU_AccessFlag_AllAccess) + { + result = D3D12_BARRIER_ACCESS_COMMON; + } + else + { + result |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS * AnyBit(flags, GPU_AccessFlag_ShaderReadWrite); + result |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE * AnyBit(flags, GPU_AccessFlag_ShaderRead); + result |= D3D12_BARRIER_ACCESS_COPY_DEST * AnyBit(flags, GPU_AccessFlag_CopyWrite); + result |= D3D12_BARRIER_ACCESS_COPY_SOURCE * AnyBit(flags, GPU_AccessFlag_CopyRead); + result |= D3D12_BARRIER_ACCESS_INDEX_BUFFER * AnyBit(flags, GPU_AccessFlag_IndexBuffer); + result |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT * AnyBit(flags, GPU_AccessFlag_IndirectArgument); + result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ * AnyBit(flags, GPU_AccessFlag_DepthStencilRead); + result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE * AnyBit(flags, GPU_AccessFlag_DepthStencilWrite); + result |= D3D12_BARRIER_ACCESS_RENDER_TARGET * AnyBit(flags, GPU_AccessFlag_RenderTargetWrite); + } + return result; +} + +D3D12_BARRIER_LAYOUT GPU_D12_BarrierLayoutFromLayoutKind(GPU_LayoutKind kind) +{ + PERSIST Readonly D3D12_BARRIER_LAYOUT translate[] = { + [GPU_LayoutKind_Undefined] = D3D12_BARRIER_LAYOUT_UNDEFINED, + [GPU_LayoutKind_Present] = D3D12_BARRIER_LAYOUT_COMMON, + [GPU_LayoutKind_AnyQueue_ShaderRead_CopyRead_CopyWrite] = D3D12_BARRIER_LAYOUT_COMMON, + [GPU_LayoutKind_AnyQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS, + [GPU_LayoutKind_AnyQueue_ShaderRead_CopyRead] = D3D12_BARRIER_LAYOUT_GENERIC_READ, + [GPU_LayoutKind_AnyQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_SHADER_RESOURCE, + [GPU_LayoutKind_AnyQueue_CopyRead] = D3D12_BARRIER_LAYOUT_COPY_SOURCE, + [GPU_LayoutKind_DirectQueue_ShaderReadWrite_CopyRead_CopyWrite] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON, + [GPU_LayoutKind_DirectQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS, + [GPU_LayoutKind_DirectQueue_ShaderRead_CopyRead_DepthStencilRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ, + [GPU_LayoutKind_DirectQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE, + [GPU_LayoutKind_DirectQueue_CopyRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE, + [GPU_LayoutKind_DirectQueue_DepthStencilRead_DepthStencilWrite] = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, + [GPU_LayoutKind_DirectQueue_DepthStencilRead] = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ, + [GPU_LayoutKind_DirectQueue_RenderTargetWrite] = D3D12_BARRIER_LAYOUT_RENDER_TARGET, + [GPU_LayoutKind_ComputeQueue_ShaderReadWrite_CopyRead_CopyWrite] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON, + [GPU_LayoutKind_ComputeQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS, + [GPU_LayoutKind_ComputeQueue_ShaderRead_CopyRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ, + [GPU_LayoutKind_ComputeQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE, + [GPU_LayoutKind_ComputeQueue_CopyRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE, + }; + return translate[kind]; +}; + //////////////////////////////////////////////////////////// //~ Pipeline @@ -525,7 +599,7 @@ GPU_D12_Queue *GPU_D12_QueueFromKind(GPU_QueueKind kind) // Unlock(&lock); // } -// GPU_D12_Descriptor *GPU_D12_DescriptorFromRtPtr(RasterTargetGpuPtr ptr) +// GPU_D12_Descriptor *GPU_D12_DescriptorFromRtPtr(RenderTargetGpuPtr ptr) // { // /* TODO */ // return 0; @@ -1265,7 +1339,7 @@ RWTexture3DGpuPtr GPU_PushRWTexture3DPtr(GPU_ArenaHandle arena_handle, GPU_Resou return (RWTexture3DGpuPtr) { 0 }; } -RasterTargetGpuPtr GPU_PushRasterTargetPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) +RenderTargetGpuPtr GPU_PushRenderTargetPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) { /* Allocate descriptor */ GPU_D12_SharedState *g = &GPU_D12_shared_state; @@ -1277,7 +1351,7 @@ RasterTargetGpuPtr GPU_PushRasterTargetPtr(GPU_ArenaHandle arena_handle, GPU_Res ID3D12Device_CreateRenderTargetView(g->device, resource->d3d_resource, 0, rtv_descriptor->handle); /* TODO */ - return (RasterTargetGpuPtr) { .v = rtv_descriptor->index }; + return (RenderTargetGpuPtr) { .v = rtv_descriptor->index }; } SamplerGpuPtr GPU_PushSamplerPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) @@ -1411,7 +1485,7 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu D3D12_RECT bound_scissor = ZI; D3D_PRIMITIVE_TOPOLOGY bound_primitive_topology = -1; D3D12_INDEX_BUFFER_VIEW bound_ibv = ZI; - D3D12_CPU_DESCRIPTOR_HANDLE bound_raster_targets[GPU_MaxRasterTargets] = ZI; + D3D12_CPU_DESCRIPTOR_HANDLE bound_raster_targets[GPU_MaxRenderTargets] = ZI; /* Flatten command chunks */ u64 cmds_count = 0; @@ -1441,32 +1515,11 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu } } - /* Determine skippable access cmds & access cmd before kinds based on resource */ + /* Batch barrier cmds */ { - Struct(ResourceLookupNode) - { - ResourceLookupNode *next_in_list; - ResourceLookupNode *next_in_bin; - - GPU_D12_Resource *resource; - GPU_D12_Cmd *last_access_cmd; - }; - - Struct(ResourceLookupBin) - { - ResourceLookupNode *first; - ResourceLookupNode *last; - }; - - u64 num_lookup_bins = MaxU64(64, AlignU64Pow2(cmds_count * 4)); - ResourceLookupNode *first_resource_node = 0; - ResourceLookupNode *last_resource_node = 0; - ResourceLookupBin *lookup_bins = PushStructs(scratch.arena, ResourceLookupBin, num_lookup_bins); - - GPU_D12_Cmd *final_access_cmd = 0; - u64 cmd_idx = 0; u64 batch_gen = 0; + GPU_D12_Cmd *prev_barrier_cmd = 0; while (cmd_idx < cmds_count) { GPU_D12_Cmd *cmd = &cmds[cmd_idx]; @@ -1484,75 +1537,43 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu case GPU_D12_CmdKind_Rasterize: case GPU_D12_CmdKind_ClearRtv: { - /* TODO: - * - Only interrupt batch if cmd actually runs - * - e.g. Rasterize with empty idx buffer will not actually run - * - For non-shader interruptions, only interrupt batches for explicitly bound resources - * - e.g. Copy should only interrupt batches for supplied resources - */ cmd_idx += 1; batch_gen += 1; } break; - case GPU_D12_CmdKind_Access: + case GPU_D12_CmdKind_Barrier: { - GPU_D12_Resource *resource = cmd->access.resource; - - /* Lookup last resource command resource in current command list */ - ResourceLookupNode *lookup = 0; - { - u64 hash = RandU64FromSeed(resource->uid); - ResourceLookupBin *bin = &lookup_bins[hash % num_lookup_bins]; - lookup = bin->first; - for (; lookup && lookup->resource->uid != resource->uid;) - { - lookup = lookup->next_in_bin; - } - if (!lookup) - { - lookup = PushStruct(scratch.arena, ResourceLookupNode); - lookup->resource = resource; - SllQueuePushN(bin->first, bin->last, lookup, next_in_bin); - SllQueuePushN(first_resource_node, last_resource_node, lookup, next_in_list); - } - } - /* Determine 'before' state from lookup */ - if (lookup->last_access_cmd) + if (prev_barrier_cmd) { - GPU_D12_Cmd *last_cmd = lookup->last_access_cmd; - if (last_cmd->access.batch_gen != batch_gen || last_cmd->access.is_queue_specific != cmd->access.is_queue_specific) + if (prev_barrier_cmd->barrier.batch_gen != batch_gen) { - /* Access is part of new batch */ - last_cmd->access.is_end_of_batch = 1; - cmd->access.before = last_cmd->access.after; + /* This barrier is part of new batch */ + prev_barrier_cmd->barrier.is_end_of_batch = 1; } else { - /* Last access cmd for this resource is in the same batch, merge them */ - cmd->access.before = last_cmd->access.before; - last_cmd->skip = 1; + /* Barriers can be batched */ + prev_barrier_cmd->skip = 1; } } - lookup->last_access_cmd = cmd; - cmd->access.batch_gen = batch_gen; - - final_access_cmd = cmd; + cmd->barrier.batch_gen = batch_gen; + prev_barrier_cmd = cmd; cmd_idx += 1; } break; } } - if (final_access_cmd) + if (prev_barrier_cmd) { - final_access_cmd->access.is_end_of_batch = 1; + prev_barrier_cmd->barrier.is_end_of_batch = 1; } } /* Process gpu commands into dx12 commands */ { - u64 batch_access_idx_start = 0; - u64 batch_access_idx_opl = 0; /* One past last */ + u64 batch_barrier_idx_start = 0; + u64 batch_barrier_idx_opl = 0; /* One past last */ u64 cmd_idx = 0; while (cmd_idx < cmds_count) @@ -1573,68 +1594,34 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu //- Access - case GPU_D12_CmdKind_Access: + case GPU_D12_CmdKind_Barrier: { - batch_access_idx_opl = cmd_idx + 1; + batch_barrier_idx_opl = cmd_idx + 1; /* Submit batched barriers */ - if (cmd->access.is_end_of_batch) + if (cmd->barrier.is_end_of_batch) { /* Build barriers */ u64 buffer_barriers_count = 0; u64 texture_barriers_count = 0; u64 global_barriers_count = 0; - D3D12_BUFFER_BARRIER *buffer_barriers = PushStructs(scratch.arena, D3D12_BUFFER_BARRIER, (batch_access_idx_opl - batch_access_idx_start)); - D3D12_TEXTURE_BARRIER *texture_barriers = PushStructs(scratch.arena, D3D12_TEXTURE_BARRIER, (batch_access_idx_opl - batch_access_idx_start)); - D3D12_GLOBAL_BARRIER *global_barriers = PushStructs(scratch.arena, D3D12_GLOBAL_BARRIER, (batch_access_idx_opl - batch_access_idx_start)); - for (u64 access_cmd_idx = batch_access_idx_start; access_cmd_idx < batch_access_idx_opl; ++access_cmd_idx) + D3D12_BUFFER_BARRIER *buffer_barriers = PushStructs(scratch.arena, D3D12_BUFFER_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start)); + D3D12_TEXTURE_BARRIER *texture_barriers = PushStructs(scratch.arena, D3D12_TEXTURE_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start)); + D3D12_GLOBAL_BARRIER *global_barriers = PushStructs(scratch.arena, D3D12_GLOBAL_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start)); + for (u64 barrier_cmd_idx = batch_barrier_idx_start; barrier_cmd_idx < batch_barrier_idx_opl; ++barrier_cmd_idx) { - GPU_D12_Cmd *access_cmd = &cmds[access_cmd_idx]; - if (access_cmd->kind == GPU_D12_CmdKind_Access && !access_cmd->skip) + GPU_D12_Cmd *barrier_cmd = &cmds[barrier_cmd_idx]; + if (barrier_cmd->kind == GPU_D12_CmdKind_Barrier && !barrier_cmd->skip) { - GPU_D12_Resource *resource = access_cmd->access.resource; + GPU_BarrierDesc desc = barrier_cmd->barrier.desc; + GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(desc.resource); D3D12_BARRIER_TYPE barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER; - b32 is_queue_specific = access_cmd->access.is_queue_specific; - /* Translate gpu access kind -> d3d barrier fields */ - D3D12_BARRIER_SYNC d3d_syncs[2] = ZI; - D3D12_BARRIER_ACCESS d3d_accesses[2] = ZI; - D3D12_BARRIER_LAYOUT d3d_layouts[2] = ZI; - for (u32 i = 0; i < 2; ++i) - { - GPU_AccessKind access_kind = i == 0 ? access_cmd->access.before : access_cmd->access.after; - - switch (access_kind) - { - case GPU_AccessKind_None: - { - d3d_syncs[i] = D3D12_BARRIER_SYNC_NONE; - d3d_accesses[i] = D3D12_BARRIER_ACCESS_NO_ACCESS; - d3d_layouts[i] = resource->texture_layout; - } break; - - case GPU_AccessKind_Generic_Read: - { - d3d_syncs[i] = D3D12_BARRIER_SYNC_RENDER_TARGET; - d3d_accesses[i] = D3D12_BARRIER_ACCESS_RENDER_TARGET; - d3d_layouts[i] = D3D12_BARRIER_LAYOUT_RENDER_TARGET; - } break; - - case GPU_AccessKind_RasterTarget: - { - d3d_syncs[i] = D3D12_BARRIER_SYNC_RENDER_TARGET; - d3d_accesses[i] = D3D12_BARRIER_ACCESS_RENDER_TARGET; - d3d_layouts[i] = D3D12_BARRIER_LAYOUT_RENDER_TARGET; - } break; - - case GPU_AccessKind_Present: - { - d3d_syncs[i] = D3D12_BARRIER_SYNC_NONE; - d3d_accesses[i] = D3D12_BARRIER_ACCESS_NO_ACCESS; - d3d_layouts[i] = D3D12_BARRIER_LAYOUT_PRESENT; - } break; - } - } + /* Translate gpu barrier kind -> d3d barrier fields */ + D3D12_BARRIER_SYNC sync_before = GPU_D12_BarrierSyncFromStageFlags(desc.sync_prev); + D3D12_BARRIER_SYNC sync_after = GPU_D12_BarrierSyncFromStageFlags(desc.sync_next); + D3D12_BARRIER_ACCESS access_before = GPU_D12_BarrierAccessFromAccessFlags(desc.access_prev); + D3D12_BARRIER_ACCESS access_after = GPU_D12_BarrierAccessFromAccessFlags(desc.access_next); /* Build barrier */ switch (barrier_type) @@ -1642,10 +1629,10 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu case D3D12_BARRIER_TYPE_BUFFER: { D3D12_BUFFER_BARRIER *barrier = &buffer_barriers[buffer_barriers_count++]; - barrier->SyncBefore = d3d_syncs[0]; - barrier->SyncAfter = d3d_syncs[1]; - barrier->AccessBefore = d3d_accesses[0]; - barrier->AccessAfter = d3d_accesses[1]; + barrier->SyncBefore = sync_before; + barrier->SyncAfter = sync_after; + barrier->AccessBefore = access_before; + barrier->AccessAfter = access_after; barrier->pResource = resource->d3d_resource; barrier->Offset = 0; barrier->Size = U64Max; @@ -1653,25 +1640,35 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu case D3D12_BARRIER_TYPE_TEXTURE: { + D3D12_BARRIER_LAYOUT layout_after = 0; + if (desc.layout == GPU_LayoutKind_NoChange) + { + layout_after = resource->texture_layout; + } + else + { + layout_after = GPU_D12_BarrierLayoutFromLayoutKind(desc.layout); + } + D3D12_TEXTURE_BARRIER *barrier = &texture_barriers[texture_barriers_count++]; - barrier->SyncBefore = d3d_syncs[0]; - barrier->SyncAfter = d3d_syncs[1]; - barrier->AccessBefore = d3d_accesses[0]; - barrier->AccessAfter = d3d_accesses[1]; - barrier->LayoutBefore = d3d_layouts[0]; - barrier->LayoutAfter = d3d_layouts[1]; + barrier->SyncBefore = sync_before; + barrier->SyncAfter = sync_after; + barrier->AccessBefore = access_before; + barrier->AccessAfter = access_after; + barrier->LayoutBefore = resource->texture_layout; + barrier->LayoutAfter = layout_after; barrier->pResource = resource->d3d_resource; barrier->Subresources.IndexOrFirstMipLevel = 0xffffffff; - resource->texture_layout = d3d_layouts[1]; + resource->texture_layout = layout_after; } break; case D3D12_BARRIER_TYPE_GLOBAL: { D3D12_GLOBAL_BARRIER *barrier = &global_barriers[global_barriers_count++]; - barrier->SyncBefore = d3d_syncs[0]; - barrier->SyncAfter = d3d_syncs[1]; - barrier->AccessBefore = d3d_accesses[0]; - barrier->AccessAfter = d3d_accesses[1]; + barrier->SyncBefore = sync_before; + barrier->SyncAfter = sync_after; + barrier->AccessBefore = access_before; + barrier->AccessAfter = access_after; } break; } } @@ -1708,7 +1705,7 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu } } - batch_access_idx_start = cmd_idx + 1; + batch_barrier_idx_start = cmd_idx + 1; } cmd_idx += 1; @@ -1850,7 +1847,7 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu GPU_D12_Descriptor *rtv_descriptor = cmd->rasterize.rtv_descriptors[i]; if (rtv_descriptor != 0) { - pipeline_desc.render_target_formats[i] = rtv_descriptor->resource->texture_desc.format; + pipeline_desc.render_target_formats[i] = rtv_descriptor->resource->texture_format; } else { @@ -2053,9 +2050,69 @@ void GPU_CopyCpuBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_ /* TODO */ } -void GPU_CopyCpuTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range) +void GPU_CopyCpuTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst_handle, Vec3I32 dst_offset, void *cpu_src, Vec3I32 cpu_src_dims, Rng3I32 cpu_src_copy_range) { /* TODO */ + + + + +// GPU_D12_SharedState *g = &GPU_D12_shared_state; + +// D3D12_RESOURCE_DESC desc = ZI; +// ID3D12Resource_GetDesc(((GPU_D12_Resource *)footprint_reference)->d3d_resource, &desc); + +// u64 upload_size = 0; +// u64 upload_row_size = 0; +// u32 upload_num_rows = 0; +// D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI; +// ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size); +// D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint; + +// { +// D3D12_RANGE read_range = ZI; +// u8 *dst_base = (u8 *)dst + placed_footprint.Offset; +// u8 *src_base = src; + +// u32 z_size = upload_row_size * upload_num_rows; + +// b32 src_overflow = 0; +// for (u32 z = 0; !src_overflow && z < desc.DepthOrArraySize; ++z) +// { +// u32 z_offset = z * z_size; +// for (u32 y = 0; !src_overflow && y < upload_num_rows; ++y) +// { +// u8 *dst_row = dst_base + y * footprint.RowPitch + z_offset; +// u8 *src_row = src_base + y * upload_row_size + z_offset; +// CopyBytes(dst_row, src_row, upload_row_size); +// } +// } +// } + + + + + + // GPU_D12_Resource *dst = GPU_D12_ResourceFromHandle(dst_handle); + + // D3D12_RESOURCE_DESC desc = ZI; + // ID3D12Resource_GetDesc(dst->d3d_resource, &desc); + + // u64 upload_size = 0; + // u64 upload_row_size = 0; + // u32 upload_num_rows = 0; + // D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI; + // ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, dst->texture_mip_levels, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size); + // D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint; + + + + + + // void *copy_start = ((u8 *)src) + src_copy_range.min; + // u64 copy_len = src_copy_range.max - src_copy_range.min; + + } //- Gpu <-> Gpu copy @@ -2127,26 +2184,14 @@ void GPU_SetConstSampler(GPU_CommandListHandle cl_handle, i32 slot, SamplerGpuPt /* TODO */ } -//- Access +//- Barrier -void GPU_SyncQueueAccess(GPU_CommandListHandle cl_handle, GPU_ResourceHandle handle, GPU_AccessKind kind) +void GPU_BarrierEx(GPU_CommandListHandle cl_handle, GPU_BarrierDesc desc) { GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); - cmd->kind = GPU_D12_CmdKind_Access; - cmd->access.after = kind; - cmd->access.resource = GPU_D12_ResourceFromHandle(handle); - cmd->access.is_queue_specific = 1; -} - -void GPU_SyncGlobalAccess(GPU_CommandListHandle cl_handle, GPU_ResourceHandle handle, GPU_AccessKind kind) -{ - GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); - GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); - cmd->kind = GPU_D12_CmdKind_Access; - cmd->access.after = kind; - cmd->access.resource = GPU_D12_ResourceFromHandle(handle); - cmd->access.is_queue_specific = 0; + cmd->kind = GPU_D12_CmdKind_Barrier; + cmd->barrier.desc = desc; } //- Compute @@ -2165,7 +2210,7 @@ void GPU_Compute(GPU_CommandListHandle cl_handle, ComputeShader cs, Vec3I32 grou void GPU_Rasterize(GPU_CommandListHandle cl_handle, VertexShader vs, PixelShader ps, u32 instances_count, IndexBufferGpuPtr idx_buff, - u32 raster_targets_count, RasterTargetGpuPtr *raster_targets, + u32 raster_targets_count, RenderTargetGpuPtr *raster_targets, Rng3 viewport, Rng2 scissor, GPU_RasterMode mode) { @@ -2176,7 +2221,7 @@ void GPU_Rasterize(GPU_CommandListHandle cl_handle, cmd->rasterize.ps = ps; cmd->rasterize.instances_count = instances_count; cmd->rasterize.ibv = GPU_D12_IbvFromIbPtr(idx_buff); - for (u32 i = 0; i < MinU32(raster_targets_count, GPU_MaxRasterTargets); ++i) + for (u32 i = 0; i < MinU32(raster_targets_count, GPU_MaxRenderTargets); ++i) { cmd->rasterize.rtv_descriptors[i] = GPU_D12_DescriptorFromIndex(GPU_D12_DescriptorHeapKind_Rtv, raster_targets[i].v); } @@ -2187,7 +2232,7 @@ void GPU_Rasterize(GPU_CommandListHandle cl_handle, //- Clear -void GPU_ClearRasterTarget(GPU_CommandListHandle cl_handle, RasterTargetGpuPtr ptr, Vec4 color) +void GPU_ClearRenderTarget(GPU_CommandListHandle cl_handle, RenderTargetGpuPtr ptr, Vec4 color) { GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); @@ -2443,16 +2488,11 @@ GPU_ResourceHandle GPU_PrepareBackbuffer(GPU_SwapchainHandle swapchain_handle, G backbuffer->d3d_resource = d3d_resource; backbuffer->uid = Atomic64FetchAdd(&g->next_resource_uid.v, 1); backbuffer->is_texture = 1; + backbuffer->texture_flags = GPU_TextureFlag_AllowRenderTarget; + backbuffer->texture_dims = VEC3I32(size.x, size.y, 1); + backbuffer->texture_mip_levels = 1; backbuffer->texture_layout = D3D12_BARRIER_LAYOUT_PRESENT; backbuffer->swapchain = swapchain; - { - backbuffer->texture_desc.kind = GPU_TextureKind_2D; - backbuffer->texture_desc.format = format; - backbuffer->texture_desc.dims = VEC3I32(size.x, size.y, 1); - backbuffer->texture_desc.flags = GPU_TextureFlag_AllowRasterTarget; - backbuffer->texture_desc.initial_access = GPU_AccessKind_Present; - backbuffer->texture_desc.mip_levels = 1; - } } } swapchain->backbuffers_format = format; diff --git a/src/gpu/gpu_dx12/gpu_dx12.h b/src/gpu/gpu_dx12/gpu_dx12.h index a36d8a34..736937e0 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.h +++ b/src/gpu/gpu_dx12/gpu_dx12.h @@ -34,7 +34,7 @@ Struct(GPU_D12_PipelineDesc) ComputeShader cs; b32 is_wireframe; D3D12_PRIMITIVE_TOPOLOGY_TYPE topology_type; - GPU_Format render_target_formats[GPU_MaxRasterTargets]; + GPU_Format render_target_formats[GPU_MaxRenderTargets]; }; Struct(GPU_D12_Pipeline) @@ -119,13 +119,17 @@ Struct(GPU_D12_Resource) u64 uid; /* Buffer info */ - GPU_BufferDesc buffer_desc; + GPU_BufferFlag buffer_flags; + u64 buffer_size; D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address; /* Texture info */ b32 is_texture; + GPU_TextureFlag texture_flags; + GPU_Format texture_format; + Vec3I32 texture_dims; + i32 texture_mip_levels; D3D12_BARRIER_LAYOUT texture_layout; - GPU_TextureDesc texture_desc; /* Backbuffer info */ struct GPU_D12_Swapchain *swapchain; @@ -176,23 +180,11 @@ Struct(GPU_D12_RawCommandList) Enum(GPU_D12_CmdKind) { GPU_D12_CmdKind_None, - - /* Access */ - GPU_D12_CmdKind_Access, - - /* Constant */ + GPU_D12_CmdKind_Barrier, GPU_D12_CmdKind_Constant, - - /* Copy */ GPU_D12_CmdKind_Copy, - - /* Compute */ GPU_D12_CmdKind_Compute, - - /* Rasterize */ GPU_D12_CmdKind_Rasterize, - - /* Clear rtv */ GPU_D12_CmdKind_ClearRtv, }; @@ -204,15 +196,12 @@ Struct(GPU_D12_Cmd) { struct { - GPU_AccessKind after; - GPU_D12_Resource *resource; + GPU_BarrierDesc desc; /* Post-batch data */ - GPU_AccessKind before; b32 is_end_of_batch; - b32 is_queue_specific; u64 batch_gen; - } access; + } barrier; struct { @@ -250,7 +239,7 @@ Struct(GPU_D12_Cmd) PixelShader ps; u32 instances_count; D3D12_INDEX_BUFFER_VIEW ibv; - GPU_D12_Descriptor *rtv_descriptors[GPU_MaxRasterTargets]; + GPU_D12_Descriptor *rtv_descriptors[GPU_MaxRenderTargets]; Rng3 viewport; Rng2 scissor; GPU_RasterMode mode; @@ -350,12 +339,16 @@ void GPU_D12_Startup(void); //////////////////////////////////////////////////////////// //~ Helpers -DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format); GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle); GPU_D12_CmdList *GPU_D12_CommandListFromHandle(GPU_CommandListHandle handle); GPU_D12_Resource *GPU_D12_ResourceFromHandle(GPU_ResourceHandle handle); GPU_D12_Swapchain *GPU_D12_SwapchainFromHandle(GPU_SwapchainHandle handle); +DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format); +D3D12_BARRIER_SYNC GPU_D12_BarrierSyncFromStageFlags(GPU_StageFlag flags); +D3D12_BARRIER_ACCESS GPU_D12_BarrierAccessFromAccessFlags(GPU_AccessFlag flags); +D3D12_BARRIER_LAYOUT GPU_D12_BarrierLayoutFromLayoutKind(GPU_LayoutKind kind); + //////////////////////////////////////////////////////////// //~ Pipeline diff --git a/src/proto/proto.c b/src/proto/proto.c index 3d8e8c04..b5f7b1d3 100644 --- a/src/proto/proto.c +++ b/src/proto/proto.c @@ -31,19 +31,33 @@ JobImpl(PR_RunForever, _sig, _id) /* Draw to backbuffer */ GPU_CommandListHandle cl = GPU_PrepareCommandList(); { - RasterTargetGpuPtr backbuffer_rt = GPU_PushRasterTargetPtr(gpu_frame_arena, backbuffer); + RenderTargetGpuPtr backbuffer_rt = GPU_PushRenderTargetPtr(gpu_frame_arena, backbuffer); - /* Clear backbuffer */ + /* Prep clear pass */ { - GPU_SyncAccess(cl, backbuffer, GPU_AccessKind_RasterTarget); - GPU_ClearRasterTarget(cl, backbuffer_rt, VEC4(1, 0, 0, 1)); + GPU_LayoutBarrier(cl, backbuffer, + NoStage, RenderTarget, + NoAccess, RenderTargetWrite, + DirectQueue_RenderTargetWrite); } - /* Make backbuffer presentable */ - GPU_SyncAccess(cl, backbuffer, GPU_AccessKind_Present); + /* Clear pass */ + { + GPU_ClearRenderTarget(cl, backbuffer_rt, VEC4(1, 0, 0, 1)); + } + + /* Finalize backbuffer layout */ + { + GPU_LayoutBarrier(cl, backbuffer, + RenderTarget, NoStage, + RenderTargetWrite, NoAccess, + Present); + } /* Reset */ - GPU_ResetArena(cl, gpu_frame_arena); + { + GPU_ResetArena(cl, gpu_frame_arena); + } } GPU_CommitCommandList(cl, GPU_QueueKind_Direct); } diff --git a/src/sprite/sprite.c b/src/sprite/sprite.c index 4c502cb8..83c71ec0 100644 --- a/src/sprite/sprite.c +++ b/src/sprite/sprite.c @@ -27,7 +27,7 @@ JobImpl(SPR_LoadTexture, sig, _) GPU_ResourceHandle gpu_resource = GPU_PushTexture2D(gpu_perm, GPU_Format_R8G8B8A8_Unorm_Srgb, VEC2I32(decoded.width, decoded.height), - GPU_AccessKind_AnyRead); + GPU_LayoutKind_AnyQueue_ShaderRead_CopyRead_CopyWrite); // texture->texture = gpu_tex; texture->width = decoded.width; texture->height = decoded.height; @@ -38,8 +38,12 @@ JobImpl(SPR_LoadTexture, sig, _) gpu_resource, VEC3I32(0,0,0), decoded.pixels, VEC3I32(decoded.width, decoded.height, 1), RNG3I32(VEC3I32(0,0,0), VEC3I32(decoded.width, decoded.height, 1))); + GPU_LayoutBarrier(cl, gpu_resource, + Copy, NoStage, + CopyWrite, NoAccess, + AnyQueue_ShaderRead_CopyRead); } - GPU_CommitCommandListEx(cl, GPU_QueueKind_AsyncCopy, 1, &GPU_SetFence(&entry->texture_ready_fence, 1)); + GPU_CommitCommandList(cl, GPU_QueueKind_AsyncCopy); } EndScratch(scratch);