expose barrier before/after sync, access & layout in gpu api

This commit is contained in:
jacob 2025-11-23 22:06:45 -06:00
parent 7c9f5f7e06
commit 4e95e44823
6 changed files with 404 additions and 245 deletions

View File

@ -745,7 +745,7 @@ Struct(U128)
Struct(RWTexture2DGpuPtr) { u32 v; }; Struct(RWTexture2DGpuPtr) { u32 v; };
Struct(Texture3DGpuPtr) { u32 v; }; Struct(Texture3DGpuPtr) { u32 v; };
Struct(RWTexture3DGpuPtr) { u32 v; }; Struct(RWTexture3DGpuPtr) { u32 v; };
Struct(RasterTargetGpuPtr) { u32 v; }; Struct(RenderTargetGpuPtr) { u32 v; };
Struct(SamplerGpuPtr) { u32 v; }; Struct(SamplerGpuPtr) { u32 v; };
#define IsGpuPtrNil(p) ((p).v == 0) #define IsGpuPtrNil(p) ((p).v == 0)
@ -773,7 +773,7 @@ Struct(U128)
typedef RWTexture2DGpuPtr u32; typedef RWTexture2DGpuPtr u32;
typedef Texture3DGpuPtr u32; typedef Texture3DGpuPtr u32;
typedef RWTexture3DGpuPtr u32; typedef RWTexture3DGpuPtr u32;
typedef RasterTargetGpuPtr u32; typedef RenderTargetGpuPtr u32;
typedef SamplerGpuPtr u32; typedef SamplerGpuPtr u32;
#define IsGpuPtrNil(p) ((p) == 0) #define IsGpuPtrNil(p) ((p) == 0)

View File

@ -161,35 +161,122 @@ Enum(GPU_Format)
}; };
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Access types //~ Barrier types
Enum(GPU_AccessKind) Enum(GPU_StageFlag)
{ {
GPU_AccessKind_None, GPU_StageFlag_NoStage = 0,
/* Generic */ /* Compute stages */
GPU_AccessKind_Generic_Read, GPU_StageFlag_ComputeShading = (1 << 1),
// GPU_AccessKind_Generic_ReadWrite, /* NOTE: Textures cannot transition to/from this access to another access kind. They must be created with it. */
/* Copy */ /* Draw stages */
GPU_AccessKind_Copy_Read, GPU_StageFlag_IndexAssembly = (1 << 2),
GPU_AccessKind_Copy_Write, GPU_StageFlag_VertexShading = (1 << 3),
GPU_StageFlag_PixelShading = (1 << 4),
GPU_StageFlag_DepthStencil = (1 << 5),
GPU_StageFlag_RenderTarget = (1 << 6),
/* Shader read/write */ /* Copy stages */
GPU_AccessKind_AnyShader_Read, GPU_StageFlag_Copy = (1 << 7),
GPU_AccessKind_AnyShader_ReadWrite,
GPU_AccessKind_ComputeShader_Read,
GPU_AccessKind_ComputeShader_ReadWrite,
GPU_AccessKind_VertexPixelShader_Read,
GPU_AccessKind_VertexPixelShader_ReadWrite,
GPU_AccessKind_VertexShader_Read,
GPU_AccessKind_VertexShader_ReadWrite,
GPU_AccessKind_PixelShader_Read,
GPU_AccessKind_PixelShader_ReadWrite,
/* Special */ /* Indirect stages */
GPU_AccessKind_RasterTarget, GPU_StageFlag_Indirect = (1 << 8),
GPU_AccessKind_Present,
/* Aggregate stages */
GPU_StageFlag_AllDrawStages = GPU_StageFlag_IndexAssembly |
GPU_StageFlag_VertexShading |
GPU_StageFlag_PixelShading |
GPU_StageFlag_DepthStencil |
GPU_StageFlag_RenderTarget,
GPU_StageFlag_AllShadingStages = GPU_StageFlag_ComputeShading |
GPU_StageFlag_VertexShading |
GPU_StageFlag_PixelShading,
GPU_StageFlag_AllNonPixelShadingStages = GPU_StageFlag_ComputeShading |
GPU_StageFlag_VertexShading,
GPU_StageFlag_AllStages = 0xFFFFFFFF
};
Enum(GPU_AccessFlag)
{
GPU_AccessFlag_NoAccess = 0,
GPU_AccessFlag_ShaderReadWrite = (1 << 1),
GPU_AccessFlag_ShaderRead = (1 << 2),
GPU_AccessFlag_CopyWrite = (1 << 3),
GPU_AccessFlag_CopyRead = (1 << 4),
GPU_AccessFlag_IndexBuffer = (1 << 5),
GPU_AccessFlag_IndirectArgument = (1 << 6),
GPU_AccessFlag_DepthStencilRead = (1 << 7),
GPU_AccessFlag_DepthStencilWrite = (1 << 8),
GPU_AccessFlag_RenderTargetWrite = (1 << 9),
GPU_AccessFlag_AllAccess = 0xFFFFFFFF
};
Enum(GPU_LayoutKind)
{
GPU_LayoutKind_NoChange,
GPU_LayoutKind_AnyQueue_AnyAccess, /* NOTE: Textures cannot transition to/from this layout. They must be created with it. */
GPU_LayoutKind_Undefined, /* D3D12_BARRIER_LAYOUT_UNDEFINED */
GPU_LayoutKind_Present, /* D3D12_BARRIER_LAYOUT_COMMON */
//////////////////////////////
//- Queue-agnostic
GPU_LayoutKind_AnyQueue_ShaderRead_CopyRead_CopyWrite, /* D3D12_BARRIER_LAYOUT_COMMON */
GPU_LayoutKind_AnyQueue_ShaderReadWrite, /* D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS */
GPU_LayoutKind_AnyQueue_ShaderRead_CopyRead, /* D3D12_BARRIER_LAYOUT_GENERIC_READ */
GPU_LayoutKind_AnyQueue_ShaderRead, /* D3D12_BARRIER_LAYOUT_SHADER_RESOURCE */
GPU_LayoutKind_AnyQueue_CopyRead, /* D3D12_BARRIER_LAYOUT_COPY_SOURCE */
//////////////////////////////
//- Direct queue
GPU_LayoutKind_DirectQueue_ShaderReadWrite_CopyRead_CopyWrite, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON */
GPU_LayoutKind_DirectQueue_ShaderReadWrite, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS */
GPU_LayoutKind_DirectQueue_ShaderRead_CopyRead_DepthStencilRead, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ */
GPU_LayoutKind_DirectQueue_ShaderRead, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE */
GPU_LayoutKind_DirectQueue_CopyRead, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE */
GPU_LayoutKind_DirectQueue_DepthStencilRead_DepthStencilWrite, /* D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE */
GPU_LayoutKind_DirectQueue_DepthStencilRead, /* D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ */
GPU_LayoutKind_DirectQueue_RenderTargetWrite, /* D3D12_BARRIER_LAYOUT_RENDER_TARGET */
//////////////////////////////
//- Compute queue
GPU_LayoutKind_ComputeQueue_ShaderReadWrite_CopyRead_CopyWrite, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON */
GPU_LayoutKind_ComputeQueue_ShaderReadWrite, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS */
GPU_LayoutKind_ComputeQueue_ShaderRead_CopyRead, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ */
GPU_LayoutKind_ComputeQueue_ShaderRead, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE */
GPU_LayoutKind_ComputeQueue_CopyRead, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE */
};
/* Barrier will execute after previous stages specified by `sync_prev`, and before next stages specified by `sync_next`.
* When barrier executes:
* - Necessary resource flushes will occur based on `access_prev` & `access_next`
* - Texture layout will transition based on `layout` (if specified)
*/
Struct(GPU_BarrierDesc)
{
GPU_ResourceHandle resource;
GPU_StageFlag sync_prev;
GPU_StageFlag sync_next;
GPU_AccessFlag access_prev;
GPU_AccessFlag access_next;
GPU_LayoutKind layout;
}; };
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
@ -210,13 +297,13 @@ Struct(GPU_BufferDesc)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Texture types //~ Texture types
#define GPU_MaxRasterTargets 8 #define GPU_MaxRenderTargets 8
Enum(GPU_TextureFlag) Enum(GPU_TextureFlag)
{ {
GPU_TextureFlag_None = 0, GPU_TextureFlag_None = 0,
GPU_TextureFlag_AllowWritable = (1 << 0), GPU_TextureFlag_AllowWritable = (1 << 0),
GPU_TextureFlag_AllowRasterTarget = (1 << 1), GPU_TextureFlag_AllowRenderTarget = (1 << 1),
}; };
Enum(GPU_TextureKind) Enum(GPU_TextureKind)
@ -232,7 +319,7 @@ Struct(GPU_TextureDesc)
GPU_Format format; GPU_Format format;
Vec3I32 dims; Vec3I32 dims;
GPU_TextureFlag flags; GPU_TextureFlag flags;
GPU_AccessKind initial_access; GPU_LayoutKind initial_layout;
i32 mip_levels; /* Will be clamped to range [1, max] */ i32 mip_levels; /* Will be clamped to range [1, max] */
Vec4 clear_color; Vec4 clear_color;
}; };
@ -409,32 +496,32 @@ GPU_ResourceHandle GPU_PushSampler(GPU_ArenaHandle arena, GPU_SamplerDesc desc);
} \ } \
) )
#define GPU_PushTexture1D(arena, _format, _size, _initial_access) GPU_PushTextureEx((arena), \ #define GPU_PushTexture1D(arena, _format, _size, _initial_layout) GPU_PushTextureEx((arena), \
(GPU_TextureDesc) { \ (GPU_TextureDesc) { \
.kind = GPU_TextureKind_1D, \ .kind = GPU_TextureKind_1D, \
.format = (_format), \ .format = (_format), \
.dims = VEC3I32((_size), 1, 1), \ .dims = VEC3I32((_size), 1, 1), \
.initial_access = (_initial_access), \ .initial_layout = (_initial_layout), \
__VA_ARGS__ \ __VA_ARGS__ \
} \ } \
) )
#define GPU_PushTexture2D(arena, _format, _size, _initial_access) GPU_PushTextureEx((arena), \ #define GPU_PushTexture2D(arena, _format, _size, _initial_layout) GPU_PushTextureEx((arena), \
(GPU_TextureDesc) { \ (GPU_TextureDesc) { \
.kind = GPU_TextureKind_2D, \ .kind = GPU_TextureKind_2D, \
.format = (_format), \ .format = (_format), \
.dims = VEC3I32((_size).x, (_size).y, 1), \ .dims = VEC3I32((_size).x, (_size).y, 1), \
.initial_access = (_initial_access), \ .initial_layout = (_initial_layout), \
__VA_ARGS__ \ __VA_ARGS__ \
} \ } \
) )
#define GPU_PushTexture3D(arena, _format, _size, _initial_access) GPU_PushTextureEx((arena), \ #define GPU_PushTexture3D(arena, _format, _size, _initial_layout) GPU_PushTextureEx((arena), \
(GPU_TextureDesc) { \ (GPU_TextureDesc) { \
.kind = GPU_TextureKind_3D, \ .kind = GPU_TextureKind_3D, \
.format = (_format), \ .format = (_format), \
.dims = (_size), \ .dims = (_size), \
.initial_access = (_initial_access), \ .initial_layout = (_initial_layout), \
__VA_ARGS__ \ __VA_ARGS__ \
} \ } \
) )
@ -450,7 +537,7 @@ Texture2DGpuPtr GPU_PushTexture2DPtr (GPU_ArenaHandle arena, GPU_Resourc
RWTexture2DGpuPtr GPU_PushRWTexture2DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); RWTexture2DGpuPtr GPU_PushRWTexture2DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource);
Texture3DGpuPtr GPU_PushTexture3DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); Texture3DGpuPtr GPU_PushTexture3DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource);
RWTexture3DGpuPtr GPU_PushRWTexture3DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); RWTexture3DGpuPtr GPU_PushRWTexture3DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource);
RasterTargetGpuPtr GPU_PushRasterTargetPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); RenderTargetGpuPtr GPU_PushRenderTargetPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource);
SamplerGpuPtr GPU_PushSamplerPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource); SamplerGpuPtr GPU_PushSamplerPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource);
#define GPU_PushBufferPtr(arena, resource, type) GPU_PushBufferPtrEx((arena), (resource), sizeof(type), RNGU32(0, GPU_CountBuffer((resource), type))) #define GPU_PushBufferPtr(arena, resource, type) GPU_PushBufferPtrEx((arena), (resource), sizeof(type), RNGU32(0, GPU_CountBuffer((resource), type)))
@ -470,22 +557,27 @@ u64 GPU_Count3D(GPU_ResourceHandle texture3d);
//~ @hookdecl Command //~ @hookdecl Command
//- Command list //- Command list
GPU_CommandListHandle GPU_PrepareCommandList(void); GPU_CommandListHandle GPU_PrepareCommandList(void);
void GPU_CommitCommandListEx(GPU_CommandListHandle cl, GPU_QueueKind queue, u64 fence_ops_count, GPU_FenceOp *fence_ops); void GPU_CommitCommandListEx(GPU_CommandListHandle cl, GPU_QueueKind queue, u64 fence_ops_count, GPU_FenceOp *fence_ops);
#define GPU_CommitCommandList(cl, queue) GPU_CommitCommandListEx((cl), (queue), 0, 0) #define GPU_CommitCommandList(cl, queue) GPU_CommitCommandListEx((cl), (queue), 0, 0)
//- Arena //- Arena
void GPU_ResetArena(GPU_CommandListHandle cl, GPU_ArenaHandle arena); void GPU_ResetArena(GPU_CommandListHandle cl, GPU_ArenaHandle arena);
//- Cpu -> Gpu copy //- Cpu -> Gpu copy
void GPU_CopyCpuBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, void *src, RngU64 src_copy_range);
void GPU_CopyCpuTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range); void GPU_CopyCpuBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, void *cpu_src, RngU64 cpu_src_copy_range);
void GPU_CopyCpuTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, void *cpu_src, Vec3I32 cpu_src_dims, Rng3I32 cpu_src_copy_range);
//- Gpu <-> Gpu copy //- Gpu <-> Gpu copy
void GPU_CopyBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, GPU_ResourceHandle src, RngU64 src_copy_range); void GPU_CopyBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, GPU_ResourceHandle src, RngU64 src_copy_range);
void GPU_CopyTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, GPU_ResourceHandle src, Rng3I32 src_copy_range); void GPU_CopyTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, GPU_ResourceHandle src, Rng3I32 src_copy_range);
//- Constants //- Constants
void GPU_SetConstU32 (GPU_CommandListHandle cl, i32 slot, u32 v); void GPU_SetConstU32 (GPU_CommandListHandle cl, i32 slot, u32 v);
void GPU_SetConstF32 (GPU_CommandListHandle cl, i32 slot, f32 v); void GPU_SetConstF32 (GPU_CommandListHandle cl, i32 slot, f32 v);
void GPU_SetConstBuffer (GPU_CommandListHandle cl, i32 slot, BufferGpuPtr v); void GPU_SetConstBuffer (GPU_CommandListHandle cl, i32 slot, BufferGpuPtr v);
@ -498,24 +590,40 @@ void GPU_SetConstTexture3D (GPU_CommandListHandle cl, i32 slot, Texture3DGpu
void GPU_SetConstRWTexture3D (GPU_CommandListHandle cl, i32 slot, RWTexture3DGpuPtr v); void GPU_SetConstRWTexture3D (GPU_CommandListHandle cl, i32 slot, RWTexture3DGpuPtr v);
void GPU_SetConstSampler (GPU_CommandListHandle cl, i32 slot, SamplerGpuPtr v); void GPU_SetConstSampler (GPU_CommandListHandle cl, i32 slot, SamplerGpuPtr v);
//- Access //- Barrier
void GPU_SyncAccess(GPU_CommandListHandle cl, GPU_ResourceHandle resource, GPU_AccessKind kind);
void GPU_BarrierEx(GPU_CommandListHandle cl, GPU_BarrierDesc desc);
#define GPU_LayoutBarrier(_cl, _resource, _sync_prev, _sync_next, _access_prev, _access_next, _layout) \
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
.resource = (_resource), \
.sync_prev = GPU_StageFlag_##_sync_prev, \
.sync_next = GPU_StageFlag_##_sync_next, \
.access_prev = GPU_AccessFlag_##_access_prev, \
.access_next = GPU_AccessFlag_##_access_next, \
.layout = GPU_LayoutKind_##_layout, \
})
#define GPU_Barrier(_cl, _resource, _sync_prev, _sync_next, _access_prev, _access_next) \
GPU_LayoutBarrier((_cl), (_resource), _sync_prev, _sync_next, _access_prev, _access_next)
//- Compute //- Compute
void GPU_Compute(GPU_CommandListHandle cl, ComputeShader cs, Vec3I32 groups); void GPU_Compute(GPU_CommandListHandle cl, ComputeShader cs, Vec3I32 groups);
//- Rasterize //- Rasterize
void GPU_Rasterize(GPU_CommandListHandle cl, void GPU_Rasterize(GPU_CommandListHandle cl,
VertexShader vs, PixelShader ps, VertexShader vs, PixelShader ps,
u32 instances_count, IndexBufferGpuPtr idx_buff, u32 instances_count, IndexBufferGpuPtr idx_buff,
u32 raster_targets_count, RasterTargetGpuPtr *raster_targets, u32 raster_targets_count, RenderTargetGpuPtr *raster_targets,
Rng3 viewport, Rng2 scissor, Rng3 viewport, Rng2 scissor,
GPU_RasterMode mode); GPU_RasterMode mode);
//- Clear //- Clear
void GPU_ClearRasterTarget(GPU_CommandListHandle cl, RasterTargetGpuPtr ptr, Vec4 color);
void GPU_ClearRenderTarget(GPU_CommandListHandle cl, RenderTargetGpuPtr ptr, Vec4 color);
//- Profile //- Profile
void GPU_ProfN(GPU_CommandListHandle cl, String name); void GPU_ProfN(GPU_CommandListHandle cl, String name);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////

View File

@ -284,11 +284,6 @@ void GPU_D12_Startup(void)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Helpers //~ Helpers
DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format)
{
return (DXGI_FORMAT)format;
}
GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle) GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle)
{ {
return (GPU_D12_Arena *)handle.v; return (GPU_D12_Arena *)handle.v;
@ -309,6 +304,85 @@ GPU_D12_Swapchain *GPU_D12_SwapchainFromHandle(GPU_SwapchainHandle handle)
return (GPU_D12_Swapchain *)handle.v; return (GPU_D12_Swapchain *)handle.v;
} }
DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format)
{
return (DXGI_FORMAT)format;
}
D3D12_BARRIER_SYNC GPU_D12_BarrierSyncFromStageFlags(GPU_StageFlag flags)
{
D3D12_BARRIER_SYNC result = 0;
if (flags == GPU_StageFlag_AllStages)
{
result = D3D12_BARRIER_SYNC_ALL;
}
else
{
result |= D3D12_BARRIER_SYNC_COMPUTE_SHADING * AnyBit(flags, GPU_StageFlag_ComputeShading);
result |= D3D12_BARRIER_SYNC_INDEX_INPUT * AnyBit(flags, GPU_StageFlag_IndexAssembly);
result |= D3D12_BARRIER_SYNC_VERTEX_SHADING * AnyBit(flags, GPU_StageFlag_VertexShading);
result |= D3D12_BARRIER_SYNC_PIXEL_SHADING * AnyBit(flags, GPU_StageFlag_PixelShading);
result |= D3D12_BARRIER_SYNC_DEPTH_STENCIL * AnyBit(flags, GPU_StageFlag_DepthStencil);
result |= D3D12_BARRIER_SYNC_RENDER_TARGET * AnyBit(flags, GPU_StageFlag_RenderTarget);
result |= D3D12_BARRIER_SYNC_COPY * AnyBit(flags, GPU_StageFlag_Copy);
result |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT * AnyBit(flags, GPU_StageFlag_Indirect);
}
return result;
}
D3D12_BARRIER_ACCESS GPU_D12_BarrierAccessFromAccessFlags(GPU_AccessFlag flags)
{
D3D12_BARRIER_ACCESS result = 0;
if (flags == 0)
{
result = D3D12_BARRIER_ACCESS_NO_ACCESS;
}
else if (flags == GPU_AccessFlag_AllAccess)
{
result = D3D12_BARRIER_ACCESS_COMMON;
}
else
{
result |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS * AnyBit(flags, GPU_AccessFlag_ShaderReadWrite);
result |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE * AnyBit(flags, GPU_AccessFlag_ShaderRead);
result |= D3D12_BARRIER_ACCESS_COPY_DEST * AnyBit(flags, GPU_AccessFlag_CopyWrite);
result |= D3D12_BARRIER_ACCESS_COPY_SOURCE * AnyBit(flags, GPU_AccessFlag_CopyRead);
result |= D3D12_BARRIER_ACCESS_INDEX_BUFFER * AnyBit(flags, GPU_AccessFlag_IndexBuffer);
result |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT * AnyBit(flags, GPU_AccessFlag_IndirectArgument);
result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ * AnyBit(flags, GPU_AccessFlag_DepthStencilRead);
result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE * AnyBit(flags, GPU_AccessFlag_DepthStencilWrite);
result |= D3D12_BARRIER_ACCESS_RENDER_TARGET * AnyBit(flags, GPU_AccessFlag_RenderTargetWrite);
}
return result;
}
D3D12_BARRIER_LAYOUT GPU_D12_BarrierLayoutFromLayoutKind(GPU_LayoutKind kind)
{
PERSIST Readonly D3D12_BARRIER_LAYOUT translate[] = {
[GPU_LayoutKind_Undefined] = D3D12_BARRIER_LAYOUT_UNDEFINED,
[GPU_LayoutKind_Present] = D3D12_BARRIER_LAYOUT_COMMON,
[GPU_LayoutKind_AnyQueue_ShaderRead_CopyRead_CopyWrite] = D3D12_BARRIER_LAYOUT_COMMON,
[GPU_LayoutKind_AnyQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS,
[GPU_LayoutKind_AnyQueue_ShaderRead_CopyRead] = D3D12_BARRIER_LAYOUT_GENERIC_READ,
[GPU_LayoutKind_AnyQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_SHADER_RESOURCE,
[GPU_LayoutKind_AnyQueue_CopyRead] = D3D12_BARRIER_LAYOUT_COPY_SOURCE,
[GPU_LayoutKind_DirectQueue_ShaderReadWrite_CopyRead_CopyWrite] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON,
[GPU_LayoutKind_DirectQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS,
[GPU_LayoutKind_DirectQueue_ShaderRead_CopyRead_DepthStencilRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ,
[GPU_LayoutKind_DirectQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE,
[GPU_LayoutKind_DirectQueue_CopyRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE,
[GPU_LayoutKind_DirectQueue_DepthStencilRead_DepthStencilWrite] = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE,
[GPU_LayoutKind_DirectQueue_DepthStencilRead] = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ,
[GPU_LayoutKind_DirectQueue_RenderTargetWrite] = D3D12_BARRIER_LAYOUT_RENDER_TARGET,
[GPU_LayoutKind_ComputeQueue_ShaderReadWrite_CopyRead_CopyWrite] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON,
[GPU_LayoutKind_ComputeQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS,
[GPU_LayoutKind_ComputeQueue_ShaderRead_CopyRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ,
[GPU_LayoutKind_ComputeQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE,
[GPU_LayoutKind_ComputeQueue_CopyRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE,
};
return translate[kind];
};
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Pipeline //~ Pipeline
@ -525,7 +599,7 @@ GPU_D12_Queue *GPU_D12_QueueFromKind(GPU_QueueKind kind)
// Unlock(&lock); // Unlock(&lock);
// } // }
// GPU_D12_Descriptor *GPU_D12_DescriptorFromRtPtr(RasterTargetGpuPtr ptr) // GPU_D12_Descriptor *GPU_D12_DescriptorFromRtPtr(RenderTargetGpuPtr ptr)
// { // {
// /* TODO */ // /* TODO */
// return 0; // return 0;
@ -1265,7 +1339,7 @@ RWTexture3DGpuPtr GPU_PushRWTexture3DPtr(GPU_ArenaHandle arena_handle, GPU_Resou
return (RWTexture3DGpuPtr) { 0 }; return (RWTexture3DGpuPtr) { 0 };
} }
RasterTargetGpuPtr GPU_PushRasterTargetPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) RenderTargetGpuPtr GPU_PushRenderTargetPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle)
{ {
/* Allocate descriptor */ /* Allocate descriptor */
GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_SharedState *g = &GPU_D12_shared_state;
@ -1277,7 +1351,7 @@ RasterTargetGpuPtr GPU_PushRasterTargetPtr(GPU_ArenaHandle arena_handle, GPU_Res
ID3D12Device_CreateRenderTargetView(g->device, resource->d3d_resource, 0, rtv_descriptor->handle); ID3D12Device_CreateRenderTargetView(g->device, resource->d3d_resource, 0, rtv_descriptor->handle);
/* TODO */ /* TODO */
return (RasterTargetGpuPtr) { .v = rtv_descriptor->index }; return (RenderTargetGpuPtr) { .v = rtv_descriptor->index };
} }
SamplerGpuPtr GPU_PushSamplerPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) SamplerGpuPtr GPU_PushSamplerPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle)
@ -1411,7 +1485,7 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu
D3D12_RECT bound_scissor = ZI; D3D12_RECT bound_scissor = ZI;
D3D_PRIMITIVE_TOPOLOGY bound_primitive_topology = -1; D3D_PRIMITIVE_TOPOLOGY bound_primitive_topology = -1;
D3D12_INDEX_BUFFER_VIEW bound_ibv = ZI; D3D12_INDEX_BUFFER_VIEW bound_ibv = ZI;
D3D12_CPU_DESCRIPTOR_HANDLE bound_raster_targets[GPU_MaxRasterTargets] = ZI; D3D12_CPU_DESCRIPTOR_HANDLE bound_raster_targets[GPU_MaxRenderTargets] = ZI;
/* Flatten command chunks */ /* Flatten command chunks */
u64 cmds_count = 0; u64 cmds_count = 0;
@ -1441,32 +1515,11 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu
} }
} }
/* Determine skippable access cmds & access cmd before kinds based on resource */ /* Batch barrier cmds */
{ {
Struct(ResourceLookupNode)
{
ResourceLookupNode *next_in_list;
ResourceLookupNode *next_in_bin;
GPU_D12_Resource *resource;
GPU_D12_Cmd *last_access_cmd;
};
Struct(ResourceLookupBin)
{
ResourceLookupNode *first;
ResourceLookupNode *last;
};
u64 num_lookup_bins = MaxU64(64, AlignU64Pow2(cmds_count * 4));
ResourceLookupNode *first_resource_node = 0;
ResourceLookupNode *last_resource_node = 0;
ResourceLookupBin *lookup_bins = PushStructs(scratch.arena, ResourceLookupBin, num_lookup_bins);
GPU_D12_Cmd *final_access_cmd = 0;
u64 cmd_idx = 0; u64 cmd_idx = 0;
u64 batch_gen = 0; u64 batch_gen = 0;
GPU_D12_Cmd *prev_barrier_cmd = 0;
while (cmd_idx < cmds_count) while (cmd_idx < cmds_count)
{ {
GPU_D12_Cmd *cmd = &cmds[cmd_idx]; GPU_D12_Cmd *cmd = &cmds[cmd_idx];
@ -1484,75 +1537,43 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu
case GPU_D12_CmdKind_Rasterize: case GPU_D12_CmdKind_Rasterize:
case GPU_D12_CmdKind_ClearRtv: case GPU_D12_CmdKind_ClearRtv:
{ {
/* TODO:
* - Only interrupt batch if cmd actually runs
* - e.g. Rasterize with empty idx buffer will not actually run
* - For non-shader interruptions, only interrupt batches for explicitly bound resources
* - e.g. Copy should only interrupt batches for supplied resources
*/
cmd_idx += 1; cmd_idx += 1;
batch_gen += 1; batch_gen += 1;
} break; } break;
case GPU_D12_CmdKind_Access: case GPU_D12_CmdKind_Barrier:
{ {
GPU_D12_Resource *resource = cmd->access.resource;
/* Lookup last resource command resource in current command list */
ResourceLookupNode *lookup = 0;
{
u64 hash = RandU64FromSeed(resource->uid);
ResourceLookupBin *bin = &lookup_bins[hash % num_lookup_bins];
lookup = bin->first;
for (; lookup && lookup->resource->uid != resource->uid;)
{
lookup = lookup->next_in_bin;
}
if (!lookup)
{
lookup = PushStruct(scratch.arena, ResourceLookupNode);
lookup->resource = resource;
SllQueuePushN(bin->first, bin->last, lookup, next_in_bin);
SllQueuePushN(first_resource_node, last_resource_node, lookup, next_in_list);
}
}
/* Determine 'before' state from lookup */ /* Determine 'before' state from lookup */
if (lookup->last_access_cmd) if (prev_barrier_cmd)
{ {
GPU_D12_Cmd *last_cmd = lookup->last_access_cmd; if (prev_barrier_cmd->barrier.batch_gen != batch_gen)
if (last_cmd->access.batch_gen != batch_gen || last_cmd->access.is_queue_specific != cmd->access.is_queue_specific)
{ {
/* Access is part of new batch */ /* This barrier is part of new batch */
last_cmd->access.is_end_of_batch = 1; prev_barrier_cmd->barrier.is_end_of_batch = 1;
cmd->access.before = last_cmd->access.after;
} }
else else
{ {
/* Last access cmd for this resource is in the same batch, merge them */ /* Barriers can be batched */
cmd->access.before = last_cmd->access.before; prev_barrier_cmd->skip = 1;
last_cmd->skip = 1;
} }
} }
lookup->last_access_cmd = cmd; cmd->barrier.batch_gen = batch_gen;
cmd->access.batch_gen = batch_gen; prev_barrier_cmd = cmd;
final_access_cmd = cmd;
cmd_idx += 1; cmd_idx += 1;
} break; } break;
} }
} }
if (final_access_cmd) if (prev_barrier_cmd)
{ {
final_access_cmd->access.is_end_of_batch = 1; prev_barrier_cmd->barrier.is_end_of_batch = 1;
} }
} }
/* Process gpu commands into dx12 commands */ /* Process gpu commands into dx12 commands */
{ {
u64 batch_access_idx_start = 0; u64 batch_barrier_idx_start = 0;
u64 batch_access_idx_opl = 0; /* One past last */ u64 batch_barrier_idx_opl = 0; /* One past last */
u64 cmd_idx = 0; u64 cmd_idx = 0;
while (cmd_idx < cmds_count) while (cmd_idx < cmds_count)
@ -1573,68 +1594,34 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu
//- Access //- Access
case GPU_D12_CmdKind_Access: case GPU_D12_CmdKind_Barrier:
{ {
batch_access_idx_opl = cmd_idx + 1; batch_barrier_idx_opl = cmd_idx + 1;
/* Submit batched barriers */ /* Submit batched barriers */
if (cmd->access.is_end_of_batch) if (cmd->barrier.is_end_of_batch)
{ {
/* Build barriers */ /* Build barriers */
u64 buffer_barriers_count = 0; u64 buffer_barriers_count = 0;
u64 texture_barriers_count = 0; u64 texture_barriers_count = 0;
u64 global_barriers_count = 0; u64 global_barriers_count = 0;
D3D12_BUFFER_BARRIER *buffer_barriers = PushStructs(scratch.arena, D3D12_BUFFER_BARRIER, (batch_access_idx_opl - batch_access_idx_start)); D3D12_BUFFER_BARRIER *buffer_barriers = PushStructs(scratch.arena, D3D12_BUFFER_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start));
D3D12_TEXTURE_BARRIER *texture_barriers = PushStructs(scratch.arena, D3D12_TEXTURE_BARRIER, (batch_access_idx_opl - batch_access_idx_start)); D3D12_TEXTURE_BARRIER *texture_barriers = PushStructs(scratch.arena, D3D12_TEXTURE_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start));
D3D12_GLOBAL_BARRIER *global_barriers = PushStructs(scratch.arena, D3D12_GLOBAL_BARRIER, (batch_access_idx_opl - batch_access_idx_start)); D3D12_GLOBAL_BARRIER *global_barriers = PushStructs(scratch.arena, D3D12_GLOBAL_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start));
for (u64 access_cmd_idx = batch_access_idx_start; access_cmd_idx < batch_access_idx_opl; ++access_cmd_idx) for (u64 barrier_cmd_idx = batch_barrier_idx_start; barrier_cmd_idx < batch_barrier_idx_opl; ++barrier_cmd_idx)
{ {
GPU_D12_Cmd *access_cmd = &cmds[access_cmd_idx]; GPU_D12_Cmd *barrier_cmd = &cmds[barrier_cmd_idx];
if (access_cmd->kind == GPU_D12_CmdKind_Access && !access_cmd->skip) if (barrier_cmd->kind == GPU_D12_CmdKind_Barrier && !barrier_cmd->skip)
{ {
GPU_D12_Resource *resource = access_cmd->access.resource; GPU_BarrierDesc desc = barrier_cmd->barrier.desc;
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(desc.resource);
D3D12_BARRIER_TYPE barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER; D3D12_BARRIER_TYPE barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER;
b32 is_queue_specific = access_cmd->access.is_queue_specific;
/* Translate gpu access kind -> d3d barrier fields */ /* Translate gpu barrier kind -> d3d barrier fields */
D3D12_BARRIER_SYNC d3d_syncs[2] = ZI; D3D12_BARRIER_SYNC sync_before = GPU_D12_BarrierSyncFromStageFlags(desc.sync_prev);
D3D12_BARRIER_ACCESS d3d_accesses[2] = ZI; D3D12_BARRIER_SYNC sync_after = GPU_D12_BarrierSyncFromStageFlags(desc.sync_next);
D3D12_BARRIER_LAYOUT d3d_layouts[2] = ZI; D3D12_BARRIER_ACCESS access_before = GPU_D12_BarrierAccessFromAccessFlags(desc.access_prev);
for (u32 i = 0; i < 2; ++i) D3D12_BARRIER_ACCESS access_after = GPU_D12_BarrierAccessFromAccessFlags(desc.access_next);
{
GPU_AccessKind access_kind = i == 0 ? access_cmd->access.before : access_cmd->access.after;
switch (access_kind)
{
case GPU_AccessKind_None:
{
d3d_syncs[i] = D3D12_BARRIER_SYNC_NONE;
d3d_accesses[i] = D3D12_BARRIER_ACCESS_NO_ACCESS;
d3d_layouts[i] = resource->texture_layout;
} break;
case GPU_AccessKind_Generic_Read:
{
d3d_syncs[i] = D3D12_BARRIER_SYNC_RENDER_TARGET;
d3d_accesses[i] = D3D12_BARRIER_ACCESS_RENDER_TARGET;
d3d_layouts[i] = D3D12_BARRIER_LAYOUT_RENDER_TARGET;
} break;
case GPU_AccessKind_RasterTarget:
{
d3d_syncs[i] = D3D12_BARRIER_SYNC_RENDER_TARGET;
d3d_accesses[i] = D3D12_BARRIER_ACCESS_RENDER_TARGET;
d3d_layouts[i] = D3D12_BARRIER_LAYOUT_RENDER_TARGET;
} break;
case GPU_AccessKind_Present:
{
d3d_syncs[i] = D3D12_BARRIER_SYNC_NONE;
d3d_accesses[i] = D3D12_BARRIER_ACCESS_NO_ACCESS;
d3d_layouts[i] = D3D12_BARRIER_LAYOUT_PRESENT;
} break;
}
}
/* Build barrier */ /* Build barrier */
switch (barrier_type) switch (barrier_type)
@ -1642,10 +1629,10 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu
case D3D12_BARRIER_TYPE_BUFFER: case D3D12_BARRIER_TYPE_BUFFER:
{ {
D3D12_BUFFER_BARRIER *barrier = &buffer_barriers[buffer_barriers_count++]; D3D12_BUFFER_BARRIER *barrier = &buffer_barriers[buffer_barriers_count++];
barrier->SyncBefore = d3d_syncs[0]; barrier->SyncBefore = sync_before;
barrier->SyncAfter = d3d_syncs[1]; barrier->SyncAfter = sync_after;
barrier->AccessBefore = d3d_accesses[0]; barrier->AccessBefore = access_before;
barrier->AccessAfter = d3d_accesses[1]; barrier->AccessAfter = access_after;
barrier->pResource = resource->d3d_resource; barrier->pResource = resource->d3d_resource;
barrier->Offset = 0; barrier->Offset = 0;
barrier->Size = U64Max; barrier->Size = U64Max;
@ -1653,25 +1640,35 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu
case D3D12_BARRIER_TYPE_TEXTURE: case D3D12_BARRIER_TYPE_TEXTURE:
{ {
D3D12_BARRIER_LAYOUT layout_after = 0;
if (desc.layout == GPU_LayoutKind_NoChange)
{
layout_after = resource->texture_layout;
}
else
{
layout_after = GPU_D12_BarrierLayoutFromLayoutKind(desc.layout);
}
D3D12_TEXTURE_BARRIER *barrier = &texture_barriers[texture_barriers_count++]; D3D12_TEXTURE_BARRIER *barrier = &texture_barriers[texture_barriers_count++];
barrier->SyncBefore = d3d_syncs[0]; barrier->SyncBefore = sync_before;
barrier->SyncAfter = d3d_syncs[1]; barrier->SyncAfter = sync_after;
barrier->AccessBefore = d3d_accesses[0]; barrier->AccessBefore = access_before;
barrier->AccessAfter = d3d_accesses[1]; barrier->AccessAfter = access_after;
barrier->LayoutBefore = d3d_layouts[0]; barrier->LayoutBefore = resource->texture_layout;
barrier->LayoutAfter = d3d_layouts[1]; barrier->LayoutAfter = layout_after;
barrier->pResource = resource->d3d_resource; barrier->pResource = resource->d3d_resource;
barrier->Subresources.IndexOrFirstMipLevel = 0xffffffff; barrier->Subresources.IndexOrFirstMipLevel = 0xffffffff;
resource->texture_layout = d3d_layouts[1]; resource->texture_layout = layout_after;
} break; } break;
case D3D12_BARRIER_TYPE_GLOBAL: case D3D12_BARRIER_TYPE_GLOBAL:
{ {
D3D12_GLOBAL_BARRIER *barrier = &global_barriers[global_barriers_count++]; D3D12_GLOBAL_BARRIER *barrier = &global_barriers[global_barriers_count++];
barrier->SyncBefore = d3d_syncs[0]; barrier->SyncBefore = sync_before;
barrier->SyncAfter = d3d_syncs[1]; barrier->SyncAfter = sync_after;
barrier->AccessBefore = d3d_accesses[0]; barrier->AccessBefore = access_before;
barrier->AccessAfter = d3d_accesses[1]; barrier->AccessAfter = access_after;
} break; } break;
} }
} }
@ -1708,7 +1705,7 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu
} }
} }
batch_access_idx_start = cmd_idx + 1; batch_barrier_idx_start = cmd_idx + 1;
} }
cmd_idx += 1; cmd_idx += 1;
@ -1850,7 +1847,7 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu
GPU_D12_Descriptor *rtv_descriptor = cmd->rasterize.rtv_descriptors[i]; GPU_D12_Descriptor *rtv_descriptor = cmd->rasterize.rtv_descriptors[i];
if (rtv_descriptor != 0) if (rtv_descriptor != 0)
{ {
pipeline_desc.render_target_formats[i] = rtv_descriptor->resource->texture_desc.format; pipeline_desc.render_target_formats[i] = rtv_descriptor->resource->texture_format;
} }
else else
{ {
@ -2053,9 +2050,69 @@ void GPU_CopyCpuBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_
/* TODO */ /* TODO */
} }
void GPU_CopyCpuTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range) void GPU_CopyCpuTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst_handle, Vec3I32 dst_offset, void *cpu_src, Vec3I32 cpu_src_dims, Rng3I32 cpu_src_copy_range)
{ {
/* TODO */ /* TODO */
// GPU_D12_SharedState *g = &GPU_D12_shared_state;
// D3D12_RESOURCE_DESC desc = ZI;
// ID3D12Resource_GetDesc(((GPU_D12_Resource *)footprint_reference)->d3d_resource, &desc);
// u64 upload_size = 0;
// u64 upload_row_size = 0;
// u32 upload_num_rows = 0;
// D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI;
// ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size);
// D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint;
// {
// D3D12_RANGE read_range = ZI;
// u8 *dst_base = (u8 *)dst + placed_footprint.Offset;
// u8 *src_base = src;
// u32 z_size = upload_row_size * upload_num_rows;
// b32 src_overflow = 0;
// for (u32 z = 0; !src_overflow && z < desc.DepthOrArraySize; ++z)
// {
// u32 z_offset = z * z_size;
// for (u32 y = 0; !src_overflow && y < upload_num_rows; ++y)
// {
// u8 *dst_row = dst_base + y * footprint.RowPitch + z_offset;
// u8 *src_row = src_base + y * upload_row_size + z_offset;
// CopyBytes(dst_row, src_row, upload_row_size);
// }
// }
// }
// GPU_D12_Resource *dst = GPU_D12_ResourceFromHandle(dst_handle);
// D3D12_RESOURCE_DESC desc = ZI;
// ID3D12Resource_GetDesc(dst->d3d_resource, &desc);
// u64 upload_size = 0;
// u64 upload_row_size = 0;
// u32 upload_num_rows = 0;
// D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI;
// ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, dst->texture_mip_levels, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size);
// D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint;
// void *copy_start = ((u8 *)src) + src_copy_range.min;
// u64 copy_len = src_copy_range.max - src_copy_range.min;
} }
//- Gpu <-> Gpu copy //- Gpu <-> Gpu copy
@ -2127,26 +2184,14 @@ void GPU_SetConstSampler(GPU_CommandListHandle cl_handle, i32 slot, SamplerGpuPt
/* TODO */ /* TODO */
} }
//- Access //- Barrier
void GPU_SyncQueueAccess(GPU_CommandListHandle cl_handle, GPU_ResourceHandle handle, GPU_AccessKind kind) void GPU_BarrierEx(GPU_CommandListHandle cl_handle, GPU_BarrierDesc desc)
{ {
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl);
cmd->kind = GPU_D12_CmdKind_Access; cmd->kind = GPU_D12_CmdKind_Barrier;
cmd->access.after = kind; cmd->barrier.desc = desc;
cmd->access.resource = GPU_D12_ResourceFromHandle(handle);
cmd->access.is_queue_specific = 1;
}
void GPU_SyncGlobalAccess(GPU_CommandListHandle cl_handle, GPU_ResourceHandle handle, GPU_AccessKind kind)
{
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl);
cmd->kind = GPU_D12_CmdKind_Access;
cmd->access.after = kind;
cmd->access.resource = GPU_D12_ResourceFromHandle(handle);
cmd->access.is_queue_specific = 0;
} }
//- Compute //- Compute
@ -2165,7 +2210,7 @@ void GPU_Compute(GPU_CommandListHandle cl_handle, ComputeShader cs, Vec3I32 grou
void GPU_Rasterize(GPU_CommandListHandle cl_handle, void GPU_Rasterize(GPU_CommandListHandle cl_handle,
VertexShader vs, PixelShader ps, VertexShader vs, PixelShader ps,
u32 instances_count, IndexBufferGpuPtr idx_buff, u32 instances_count, IndexBufferGpuPtr idx_buff,
u32 raster_targets_count, RasterTargetGpuPtr *raster_targets, u32 raster_targets_count, RenderTargetGpuPtr *raster_targets,
Rng3 viewport, Rng2 scissor, Rng3 viewport, Rng2 scissor,
GPU_RasterMode mode) GPU_RasterMode mode)
{ {
@ -2176,7 +2221,7 @@ void GPU_Rasterize(GPU_CommandListHandle cl_handle,
cmd->rasterize.ps = ps; cmd->rasterize.ps = ps;
cmd->rasterize.instances_count = instances_count; cmd->rasterize.instances_count = instances_count;
cmd->rasterize.ibv = GPU_D12_IbvFromIbPtr(idx_buff); cmd->rasterize.ibv = GPU_D12_IbvFromIbPtr(idx_buff);
for (u32 i = 0; i < MinU32(raster_targets_count, GPU_MaxRasterTargets); ++i) for (u32 i = 0; i < MinU32(raster_targets_count, GPU_MaxRenderTargets); ++i)
{ {
cmd->rasterize.rtv_descriptors[i] = GPU_D12_DescriptorFromIndex(GPU_D12_DescriptorHeapKind_Rtv, raster_targets[i].v); cmd->rasterize.rtv_descriptors[i] = GPU_D12_DescriptorFromIndex(GPU_D12_DescriptorHeapKind_Rtv, raster_targets[i].v);
} }
@ -2187,7 +2232,7 @@ void GPU_Rasterize(GPU_CommandListHandle cl_handle,
//- Clear //- Clear
void GPU_ClearRasterTarget(GPU_CommandListHandle cl_handle, RasterTargetGpuPtr ptr, Vec4 color) void GPU_ClearRenderTarget(GPU_CommandListHandle cl_handle, RenderTargetGpuPtr ptr, Vec4 color)
{ {
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl);
@ -2443,16 +2488,11 @@ GPU_ResourceHandle GPU_PrepareBackbuffer(GPU_SwapchainHandle swapchain_handle, G
backbuffer->d3d_resource = d3d_resource; backbuffer->d3d_resource = d3d_resource;
backbuffer->uid = Atomic64FetchAdd(&g->next_resource_uid.v, 1); backbuffer->uid = Atomic64FetchAdd(&g->next_resource_uid.v, 1);
backbuffer->is_texture = 1; backbuffer->is_texture = 1;
backbuffer->texture_flags = GPU_TextureFlag_AllowRenderTarget;
backbuffer->texture_dims = VEC3I32(size.x, size.y, 1);
backbuffer->texture_mip_levels = 1;
backbuffer->texture_layout = D3D12_BARRIER_LAYOUT_PRESENT; backbuffer->texture_layout = D3D12_BARRIER_LAYOUT_PRESENT;
backbuffer->swapchain = swapchain; backbuffer->swapchain = swapchain;
{
backbuffer->texture_desc.kind = GPU_TextureKind_2D;
backbuffer->texture_desc.format = format;
backbuffer->texture_desc.dims = VEC3I32(size.x, size.y, 1);
backbuffer->texture_desc.flags = GPU_TextureFlag_AllowRasterTarget;
backbuffer->texture_desc.initial_access = GPU_AccessKind_Present;
backbuffer->texture_desc.mip_levels = 1;
}
} }
} }
swapchain->backbuffers_format = format; swapchain->backbuffers_format = format;

View File

@ -34,7 +34,7 @@ Struct(GPU_D12_PipelineDesc)
ComputeShader cs; ComputeShader cs;
b32 is_wireframe; b32 is_wireframe;
D3D12_PRIMITIVE_TOPOLOGY_TYPE topology_type; D3D12_PRIMITIVE_TOPOLOGY_TYPE topology_type;
GPU_Format render_target_formats[GPU_MaxRasterTargets]; GPU_Format render_target_formats[GPU_MaxRenderTargets];
}; };
Struct(GPU_D12_Pipeline) Struct(GPU_D12_Pipeline)
@ -119,13 +119,17 @@ Struct(GPU_D12_Resource)
u64 uid; u64 uid;
/* Buffer info */ /* Buffer info */
GPU_BufferDesc buffer_desc; GPU_BufferFlag buffer_flags;
u64 buffer_size;
D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address; D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address;
/* Texture info */ /* Texture info */
b32 is_texture; b32 is_texture;
GPU_TextureFlag texture_flags;
GPU_Format texture_format;
Vec3I32 texture_dims;
i32 texture_mip_levels;
D3D12_BARRIER_LAYOUT texture_layout; D3D12_BARRIER_LAYOUT texture_layout;
GPU_TextureDesc texture_desc;
/* Backbuffer info */ /* Backbuffer info */
struct GPU_D12_Swapchain *swapchain; struct GPU_D12_Swapchain *swapchain;
@ -176,23 +180,11 @@ Struct(GPU_D12_RawCommandList)
Enum(GPU_D12_CmdKind) Enum(GPU_D12_CmdKind)
{ {
GPU_D12_CmdKind_None, GPU_D12_CmdKind_None,
GPU_D12_CmdKind_Barrier,
/* Access */
GPU_D12_CmdKind_Access,
/* Constant */
GPU_D12_CmdKind_Constant, GPU_D12_CmdKind_Constant,
/* Copy */
GPU_D12_CmdKind_Copy, GPU_D12_CmdKind_Copy,
/* Compute */
GPU_D12_CmdKind_Compute, GPU_D12_CmdKind_Compute,
/* Rasterize */
GPU_D12_CmdKind_Rasterize, GPU_D12_CmdKind_Rasterize,
/* Clear rtv */
GPU_D12_CmdKind_ClearRtv, GPU_D12_CmdKind_ClearRtv,
}; };
@ -204,15 +196,12 @@ Struct(GPU_D12_Cmd)
{ {
struct struct
{ {
GPU_AccessKind after; GPU_BarrierDesc desc;
GPU_D12_Resource *resource;
/* Post-batch data */ /* Post-batch data */
GPU_AccessKind before;
b32 is_end_of_batch; b32 is_end_of_batch;
b32 is_queue_specific;
u64 batch_gen; u64 batch_gen;
} access; } barrier;
struct struct
{ {
@ -250,7 +239,7 @@ Struct(GPU_D12_Cmd)
PixelShader ps; PixelShader ps;
u32 instances_count; u32 instances_count;
D3D12_INDEX_BUFFER_VIEW ibv; D3D12_INDEX_BUFFER_VIEW ibv;
GPU_D12_Descriptor *rtv_descriptors[GPU_MaxRasterTargets]; GPU_D12_Descriptor *rtv_descriptors[GPU_MaxRenderTargets];
Rng3 viewport; Rng3 viewport;
Rng2 scissor; Rng2 scissor;
GPU_RasterMode mode; GPU_RasterMode mode;
@ -350,12 +339,16 @@ void GPU_D12_Startup(void);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Helpers //~ Helpers
DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format);
GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle); GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle);
GPU_D12_CmdList *GPU_D12_CommandListFromHandle(GPU_CommandListHandle handle); GPU_D12_CmdList *GPU_D12_CommandListFromHandle(GPU_CommandListHandle handle);
GPU_D12_Resource *GPU_D12_ResourceFromHandle(GPU_ResourceHandle handle); GPU_D12_Resource *GPU_D12_ResourceFromHandle(GPU_ResourceHandle handle);
GPU_D12_Swapchain *GPU_D12_SwapchainFromHandle(GPU_SwapchainHandle handle); GPU_D12_Swapchain *GPU_D12_SwapchainFromHandle(GPU_SwapchainHandle handle);
DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format);
D3D12_BARRIER_SYNC GPU_D12_BarrierSyncFromStageFlags(GPU_StageFlag flags);
D3D12_BARRIER_ACCESS GPU_D12_BarrierAccessFromAccessFlags(GPU_AccessFlag flags);
D3D12_BARRIER_LAYOUT GPU_D12_BarrierLayoutFromLayoutKind(GPU_LayoutKind kind);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Pipeline //~ Pipeline

View File

@ -31,19 +31,33 @@ JobImpl(PR_RunForever, _sig, _id)
/* Draw to backbuffer */ /* Draw to backbuffer */
GPU_CommandListHandle cl = GPU_PrepareCommandList(); GPU_CommandListHandle cl = GPU_PrepareCommandList();
{ {
RasterTargetGpuPtr backbuffer_rt = GPU_PushRasterTargetPtr(gpu_frame_arena, backbuffer); RenderTargetGpuPtr backbuffer_rt = GPU_PushRenderTargetPtr(gpu_frame_arena, backbuffer);
/* Clear backbuffer */ /* Prep clear pass */
{ {
GPU_SyncAccess(cl, backbuffer, GPU_AccessKind_RasterTarget); GPU_LayoutBarrier(cl, backbuffer,
GPU_ClearRasterTarget(cl, backbuffer_rt, VEC4(1, 0, 0, 1)); NoStage, RenderTarget,
NoAccess, RenderTargetWrite,
DirectQueue_RenderTargetWrite);
} }
/* Make backbuffer presentable */ /* Clear pass */
GPU_SyncAccess(cl, backbuffer, GPU_AccessKind_Present); {
GPU_ClearRenderTarget(cl, backbuffer_rt, VEC4(1, 0, 0, 1));
}
/* Finalize backbuffer layout */
{
GPU_LayoutBarrier(cl, backbuffer,
RenderTarget, NoStage,
RenderTargetWrite, NoAccess,
Present);
}
/* Reset */ /* Reset */
GPU_ResetArena(cl, gpu_frame_arena); {
GPU_ResetArena(cl, gpu_frame_arena);
}
} }
GPU_CommitCommandList(cl, GPU_QueueKind_Direct); GPU_CommitCommandList(cl, GPU_QueueKind_Direct);
} }

View File

@ -27,7 +27,7 @@ JobImpl(SPR_LoadTexture, sig, _)
GPU_ResourceHandle gpu_resource = GPU_PushTexture2D(gpu_perm, GPU_ResourceHandle gpu_resource = GPU_PushTexture2D(gpu_perm,
GPU_Format_R8G8B8A8_Unorm_Srgb, GPU_Format_R8G8B8A8_Unorm_Srgb,
VEC2I32(decoded.width, decoded.height), VEC2I32(decoded.width, decoded.height),
GPU_AccessKind_AnyRead); GPU_LayoutKind_AnyQueue_ShaderRead_CopyRead_CopyWrite);
// texture->texture = gpu_tex; // texture->texture = gpu_tex;
texture->width = decoded.width; texture->width = decoded.width;
texture->height = decoded.height; texture->height = decoded.height;
@ -38,8 +38,12 @@ JobImpl(SPR_LoadTexture, sig, _)
gpu_resource, VEC3I32(0,0,0), gpu_resource, VEC3I32(0,0,0),
decoded.pixels, VEC3I32(decoded.width, decoded.height, 1), decoded.pixels, VEC3I32(decoded.width, decoded.height, 1),
RNG3I32(VEC3I32(0,0,0), VEC3I32(decoded.width, decoded.height, 1))); RNG3I32(VEC3I32(0,0,0), VEC3I32(decoded.width, decoded.height, 1)));
GPU_LayoutBarrier(cl, gpu_resource,
Copy, NoStage,
CopyWrite, NoAccess,
AnyQueue_ShaderRead_CopyRead);
} }
GPU_CommitCommandListEx(cl, GPU_QueueKind_AsyncCopy, 1, &GPU_SetFence(&entry->texture_ready_fence, 1)); GPU_CommitCommandList(cl, GPU_QueueKind_AsyncCopy);
} }
EndScratch(scratch); EndScratch(scratch);