expose barrier before/after sync, access & layout in gpu api
This commit is contained in:
parent
7c9f5f7e06
commit
4e95e44823
@ -745,7 +745,7 @@ Struct(U128)
|
||||
Struct(RWTexture2DGpuPtr) { u32 v; };
|
||||
Struct(Texture3DGpuPtr) { u32 v; };
|
||||
Struct(RWTexture3DGpuPtr) { u32 v; };
|
||||
Struct(RasterTargetGpuPtr) { u32 v; };
|
||||
Struct(RenderTargetGpuPtr) { u32 v; };
|
||||
Struct(SamplerGpuPtr) { u32 v; };
|
||||
|
||||
#define IsGpuPtrNil(p) ((p).v == 0)
|
||||
@ -773,7 +773,7 @@ Struct(U128)
|
||||
typedef RWTexture2DGpuPtr u32;
|
||||
typedef Texture3DGpuPtr u32;
|
||||
typedef RWTexture3DGpuPtr u32;
|
||||
typedef RasterTargetGpuPtr u32;
|
||||
typedef RenderTargetGpuPtr u32;
|
||||
typedef SamplerGpuPtr u32;
|
||||
|
||||
#define IsGpuPtrNil(p) ((p) == 0)
|
||||
|
||||
@ -161,35 +161,122 @@ Enum(GPU_Format)
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Access types
|
||||
//~ Barrier types
|
||||
|
||||
Enum(GPU_AccessKind)
|
||||
Enum(GPU_StageFlag)
|
||||
{
|
||||
GPU_AccessKind_None,
|
||||
GPU_StageFlag_NoStage = 0,
|
||||
|
||||
/* Generic */
|
||||
GPU_AccessKind_Generic_Read,
|
||||
// GPU_AccessKind_Generic_ReadWrite, /* NOTE: Textures cannot transition to/from this access to another access kind. They must be created with it. */
|
||||
/* Compute stages */
|
||||
GPU_StageFlag_ComputeShading = (1 << 1),
|
||||
|
||||
/* Copy */
|
||||
GPU_AccessKind_Copy_Read,
|
||||
GPU_AccessKind_Copy_Write,
|
||||
/* Draw stages */
|
||||
GPU_StageFlag_IndexAssembly = (1 << 2),
|
||||
GPU_StageFlag_VertexShading = (1 << 3),
|
||||
GPU_StageFlag_PixelShading = (1 << 4),
|
||||
GPU_StageFlag_DepthStencil = (1 << 5),
|
||||
GPU_StageFlag_RenderTarget = (1 << 6),
|
||||
|
||||
/* Shader read/write */
|
||||
GPU_AccessKind_AnyShader_Read,
|
||||
GPU_AccessKind_AnyShader_ReadWrite,
|
||||
GPU_AccessKind_ComputeShader_Read,
|
||||
GPU_AccessKind_ComputeShader_ReadWrite,
|
||||
GPU_AccessKind_VertexPixelShader_Read,
|
||||
GPU_AccessKind_VertexPixelShader_ReadWrite,
|
||||
GPU_AccessKind_VertexShader_Read,
|
||||
GPU_AccessKind_VertexShader_ReadWrite,
|
||||
GPU_AccessKind_PixelShader_Read,
|
||||
GPU_AccessKind_PixelShader_ReadWrite,
|
||||
/* Copy stages */
|
||||
GPU_StageFlag_Copy = (1 << 7),
|
||||
|
||||
/* Special */
|
||||
GPU_AccessKind_RasterTarget,
|
||||
GPU_AccessKind_Present,
|
||||
/* Indirect stages */
|
||||
GPU_StageFlag_Indirect = (1 << 8),
|
||||
|
||||
/* Aggregate stages */
|
||||
GPU_StageFlag_AllDrawStages = GPU_StageFlag_IndexAssembly |
|
||||
GPU_StageFlag_VertexShading |
|
||||
GPU_StageFlag_PixelShading |
|
||||
GPU_StageFlag_DepthStencil |
|
||||
GPU_StageFlag_RenderTarget,
|
||||
|
||||
GPU_StageFlag_AllShadingStages = GPU_StageFlag_ComputeShading |
|
||||
GPU_StageFlag_VertexShading |
|
||||
GPU_StageFlag_PixelShading,
|
||||
|
||||
GPU_StageFlag_AllNonPixelShadingStages = GPU_StageFlag_ComputeShading |
|
||||
GPU_StageFlag_VertexShading,
|
||||
|
||||
GPU_StageFlag_AllStages = 0xFFFFFFFF
|
||||
};
|
||||
|
||||
Enum(GPU_AccessFlag)
|
||||
{
|
||||
GPU_AccessFlag_NoAccess = 0,
|
||||
|
||||
GPU_AccessFlag_ShaderReadWrite = (1 << 1),
|
||||
GPU_AccessFlag_ShaderRead = (1 << 2),
|
||||
|
||||
GPU_AccessFlag_CopyWrite = (1 << 3),
|
||||
GPU_AccessFlag_CopyRead = (1 << 4),
|
||||
|
||||
GPU_AccessFlag_IndexBuffer = (1 << 5),
|
||||
GPU_AccessFlag_IndirectArgument = (1 << 6),
|
||||
|
||||
GPU_AccessFlag_DepthStencilRead = (1 << 7),
|
||||
GPU_AccessFlag_DepthStencilWrite = (1 << 8),
|
||||
GPU_AccessFlag_RenderTargetWrite = (1 << 9),
|
||||
|
||||
GPU_AccessFlag_AllAccess = 0xFFFFFFFF
|
||||
};
|
||||
|
||||
Enum(GPU_LayoutKind)
|
||||
{
|
||||
GPU_LayoutKind_NoChange,
|
||||
|
||||
GPU_LayoutKind_AnyQueue_AnyAccess, /* NOTE: Textures cannot transition to/from this layout. They must be created with it. */
|
||||
|
||||
GPU_LayoutKind_Undefined, /* D3D12_BARRIER_LAYOUT_UNDEFINED */
|
||||
GPU_LayoutKind_Present, /* D3D12_BARRIER_LAYOUT_COMMON */
|
||||
|
||||
//////////////////////////////
|
||||
//- Queue-agnostic
|
||||
|
||||
GPU_LayoutKind_AnyQueue_ShaderRead_CopyRead_CopyWrite, /* D3D12_BARRIER_LAYOUT_COMMON */
|
||||
GPU_LayoutKind_AnyQueue_ShaderReadWrite, /* D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS */
|
||||
|
||||
GPU_LayoutKind_AnyQueue_ShaderRead_CopyRead, /* D3D12_BARRIER_LAYOUT_GENERIC_READ */
|
||||
GPU_LayoutKind_AnyQueue_ShaderRead, /* D3D12_BARRIER_LAYOUT_SHADER_RESOURCE */
|
||||
GPU_LayoutKind_AnyQueue_CopyRead, /* D3D12_BARRIER_LAYOUT_COPY_SOURCE */
|
||||
|
||||
//////////////////////////////
|
||||
//- Direct queue
|
||||
|
||||
GPU_LayoutKind_DirectQueue_ShaderReadWrite_CopyRead_CopyWrite, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON */
|
||||
GPU_LayoutKind_DirectQueue_ShaderReadWrite, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS */
|
||||
|
||||
GPU_LayoutKind_DirectQueue_ShaderRead_CopyRead_DepthStencilRead, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ */
|
||||
GPU_LayoutKind_DirectQueue_ShaderRead, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE */
|
||||
GPU_LayoutKind_DirectQueue_CopyRead, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE */
|
||||
|
||||
GPU_LayoutKind_DirectQueue_DepthStencilRead_DepthStencilWrite, /* D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE */
|
||||
GPU_LayoutKind_DirectQueue_DepthStencilRead, /* D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ */
|
||||
GPU_LayoutKind_DirectQueue_RenderTargetWrite, /* D3D12_BARRIER_LAYOUT_RENDER_TARGET */
|
||||
|
||||
//////////////////////////////
|
||||
//- Compute queue
|
||||
|
||||
GPU_LayoutKind_ComputeQueue_ShaderReadWrite_CopyRead_CopyWrite, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON */
|
||||
GPU_LayoutKind_ComputeQueue_ShaderReadWrite, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS */
|
||||
|
||||
GPU_LayoutKind_ComputeQueue_ShaderRead_CopyRead, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ */
|
||||
GPU_LayoutKind_ComputeQueue_ShaderRead, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE */
|
||||
GPU_LayoutKind_ComputeQueue_CopyRead, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE */
|
||||
};
|
||||
|
||||
/* Barrier will execute after previous stages specified by `sync_prev`, and before next stages specified by `sync_next`.
|
||||
* When barrier executes:
|
||||
* - Necessary resource flushes will occur based on `access_prev` & `access_next`
|
||||
* - Texture layout will transition based on `layout` (if specified)
|
||||
*/
|
||||
Struct(GPU_BarrierDesc)
|
||||
{
|
||||
GPU_ResourceHandle resource;
|
||||
GPU_StageFlag sync_prev;
|
||||
GPU_StageFlag sync_next;
|
||||
GPU_AccessFlag access_prev;
|
||||
GPU_AccessFlag access_next;
|
||||
GPU_LayoutKind layout;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
@ -210,13 +297,13 @@ Struct(GPU_BufferDesc)
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Texture types
|
||||
|
||||
#define GPU_MaxRasterTargets 8
|
||||
#define GPU_MaxRenderTargets 8
|
||||
|
||||
Enum(GPU_TextureFlag)
|
||||
{
|
||||
GPU_TextureFlag_None = 0,
|
||||
GPU_TextureFlag_AllowWritable = (1 << 0),
|
||||
GPU_TextureFlag_AllowRasterTarget = (1 << 1),
|
||||
GPU_TextureFlag_AllowRenderTarget = (1 << 1),
|
||||
};
|
||||
|
||||
Enum(GPU_TextureKind)
|
||||
@ -232,7 +319,7 @@ Struct(GPU_TextureDesc)
|
||||
GPU_Format format;
|
||||
Vec3I32 dims;
|
||||
GPU_TextureFlag flags;
|
||||
GPU_AccessKind initial_access;
|
||||
GPU_LayoutKind initial_layout;
|
||||
i32 mip_levels; /* Will be clamped to range [1, max] */
|
||||
Vec4 clear_color;
|
||||
};
|
||||
@ -409,32 +496,32 @@ GPU_ResourceHandle GPU_PushSampler(GPU_ArenaHandle arena, GPU_SamplerDesc desc);
|
||||
} \
|
||||
)
|
||||
|
||||
#define GPU_PushTexture1D(arena, _format, _size, _initial_access) GPU_PushTextureEx((arena), \
|
||||
#define GPU_PushTexture1D(arena, _format, _size, _initial_layout) GPU_PushTextureEx((arena), \
|
||||
(GPU_TextureDesc) { \
|
||||
.kind = GPU_TextureKind_1D, \
|
||||
.format = (_format), \
|
||||
.dims = VEC3I32((_size), 1, 1), \
|
||||
.initial_access = (_initial_access), \
|
||||
.initial_layout = (_initial_layout), \
|
||||
__VA_ARGS__ \
|
||||
} \
|
||||
)
|
||||
|
||||
#define GPU_PushTexture2D(arena, _format, _size, _initial_access) GPU_PushTextureEx((arena), \
|
||||
#define GPU_PushTexture2D(arena, _format, _size, _initial_layout) GPU_PushTextureEx((arena), \
|
||||
(GPU_TextureDesc) { \
|
||||
.kind = GPU_TextureKind_2D, \
|
||||
.format = (_format), \
|
||||
.dims = VEC3I32((_size).x, (_size).y, 1), \
|
||||
.initial_access = (_initial_access), \
|
||||
.initial_layout = (_initial_layout), \
|
||||
__VA_ARGS__ \
|
||||
} \
|
||||
)
|
||||
|
||||
#define GPU_PushTexture3D(arena, _format, _size, _initial_access) GPU_PushTextureEx((arena), \
|
||||
#define GPU_PushTexture3D(arena, _format, _size, _initial_layout) GPU_PushTextureEx((arena), \
|
||||
(GPU_TextureDesc) { \
|
||||
.kind = GPU_TextureKind_3D, \
|
||||
.format = (_format), \
|
||||
.dims = (_size), \
|
||||
.initial_access = (_initial_access), \
|
||||
.initial_layout = (_initial_layout), \
|
||||
__VA_ARGS__ \
|
||||
} \
|
||||
)
|
||||
@ -450,7 +537,7 @@ Texture2DGpuPtr GPU_PushTexture2DPtr (GPU_ArenaHandle arena, GPU_Resourc
|
||||
RWTexture2DGpuPtr GPU_PushRWTexture2DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource);
|
||||
Texture3DGpuPtr GPU_PushTexture3DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource);
|
||||
RWTexture3DGpuPtr GPU_PushRWTexture3DPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource);
|
||||
RasterTargetGpuPtr GPU_PushRasterTargetPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource);
|
||||
RenderTargetGpuPtr GPU_PushRenderTargetPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource);
|
||||
SamplerGpuPtr GPU_PushSamplerPtr (GPU_ArenaHandle arena, GPU_ResourceHandle resource);
|
||||
|
||||
#define GPU_PushBufferPtr(arena, resource, type) GPU_PushBufferPtrEx((arena), (resource), sizeof(type), RNGU32(0, GPU_CountBuffer((resource), type)))
|
||||
@ -470,22 +557,27 @@ u64 GPU_Count3D(GPU_ResourceHandle texture3d);
|
||||
//~ @hookdecl Command
|
||||
|
||||
//- Command list
|
||||
|
||||
GPU_CommandListHandle GPU_PrepareCommandList(void);
|
||||
void GPU_CommitCommandListEx(GPU_CommandListHandle cl, GPU_QueueKind queue, u64 fence_ops_count, GPU_FenceOp *fence_ops);
|
||||
#define GPU_CommitCommandList(cl, queue) GPU_CommitCommandListEx((cl), (queue), 0, 0)
|
||||
|
||||
//- Arena
|
||||
|
||||
void GPU_ResetArena(GPU_CommandListHandle cl, GPU_ArenaHandle arena);
|
||||
|
||||
//- Cpu -> Gpu copy
|
||||
void GPU_CopyCpuBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, void *src, RngU64 src_copy_range);
|
||||
void GPU_CopyCpuTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range);
|
||||
|
||||
void GPU_CopyCpuBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, void *cpu_src, RngU64 cpu_src_copy_range);
|
||||
void GPU_CopyCpuTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, void *cpu_src, Vec3I32 cpu_src_dims, Rng3I32 cpu_src_copy_range);
|
||||
|
||||
//- Gpu <-> Gpu copy
|
||||
|
||||
void GPU_CopyBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, GPU_ResourceHandle src, RngU64 src_copy_range);
|
||||
void GPU_CopyTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, GPU_ResourceHandle src, Rng3I32 src_copy_range);
|
||||
|
||||
//- Constants
|
||||
|
||||
void GPU_SetConstU32 (GPU_CommandListHandle cl, i32 slot, u32 v);
|
||||
void GPU_SetConstF32 (GPU_CommandListHandle cl, i32 slot, f32 v);
|
||||
void GPU_SetConstBuffer (GPU_CommandListHandle cl, i32 slot, BufferGpuPtr v);
|
||||
@ -498,24 +590,40 @@ void GPU_SetConstTexture3D (GPU_CommandListHandle cl, i32 slot, Texture3DGpu
|
||||
void GPU_SetConstRWTexture3D (GPU_CommandListHandle cl, i32 slot, RWTexture3DGpuPtr v);
|
||||
void GPU_SetConstSampler (GPU_CommandListHandle cl, i32 slot, SamplerGpuPtr v);
|
||||
|
||||
//- Access
|
||||
void GPU_SyncAccess(GPU_CommandListHandle cl, GPU_ResourceHandle resource, GPU_AccessKind kind);
|
||||
//- Barrier
|
||||
|
||||
void GPU_BarrierEx(GPU_CommandListHandle cl, GPU_BarrierDesc desc);
|
||||
#define GPU_LayoutBarrier(_cl, _resource, _sync_prev, _sync_next, _access_prev, _access_next, _layout) \
|
||||
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
|
||||
.resource = (_resource), \
|
||||
.sync_prev = GPU_StageFlag_##_sync_prev, \
|
||||
.sync_next = GPU_StageFlag_##_sync_next, \
|
||||
.access_prev = GPU_AccessFlag_##_access_prev, \
|
||||
.access_next = GPU_AccessFlag_##_access_next, \
|
||||
.layout = GPU_LayoutKind_##_layout, \
|
||||
})
|
||||
#define GPU_Barrier(_cl, _resource, _sync_prev, _sync_next, _access_prev, _access_next) \
|
||||
GPU_LayoutBarrier((_cl), (_resource), _sync_prev, _sync_next, _access_prev, _access_next)
|
||||
|
||||
//- Compute
|
||||
|
||||
void GPU_Compute(GPU_CommandListHandle cl, ComputeShader cs, Vec3I32 groups);
|
||||
|
||||
//- Rasterize
|
||||
|
||||
void GPU_Rasterize(GPU_CommandListHandle cl,
|
||||
VertexShader vs, PixelShader ps,
|
||||
u32 instances_count, IndexBufferGpuPtr idx_buff,
|
||||
u32 raster_targets_count, RasterTargetGpuPtr *raster_targets,
|
||||
u32 raster_targets_count, RenderTargetGpuPtr *raster_targets,
|
||||
Rng3 viewport, Rng2 scissor,
|
||||
GPU_RasterMode mode);
|
||||
|
||||
//- Clear
|
||||
void GPU_ClearRasterTarget(GPU_CommandListHandle cl, RasterTargetGpuPtr ptr, Vec4 color);
|
||||
|
||||
void GPU_ClearRenderTarget(GPU_CommandListHandle cl, RenderTargetGpuPtr ptr, Vec4 color);
|
||||
|
||||
//- Profile
|
||||
|
||||
void GPU_ProfN(GPU_CommandListHandle cl, String name);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
|
||||
@ -284,11 +284,6 @@ void GPU_D12_Startup(void)
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Helpers
|
||||
|
||||
DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format)
|
||||
{
|
||||
return (DXGI_FORMAT)format;
|
||||
}
|
||||
|
||||
GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle)
|
||||
{
|
||||
return (GPU_D12_Arena *)handle.v;
|
||||
@ -309,6 +304,85 @@ GPU_D12_Swapchain *GPU_D12_SwapchainFromHandle(GPU_SwapchainHandle handle)
|
||||
return (GPU_D12_Swapchain *)handle.v;
|
||||
}
|
||||
|
||||
DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format)
|
||||
{
|
||||
return (DXGI_FORMAT)format;
|
||||
}
|
||||
|
||||
D3D12_BARRIER_SYNC GPU_D12_BarrierSyncFromStageFlags(GPU_StageFlag flags)
|
||||
{
|
||||
D3D12_BARRIER_SYNC result = 0;
|
||||
if (flags == GPU_StageFlag_AllStages)
|
||||
{
|
||||
result = D3D12_BARRIER_SYNC_ALL;
|
||||
}
|
||||
else
|
||||
{
|
||||
result |= D3D12_BARRIER_SYNC_COMPUTE_SHADING * AnyBit(flags, GPU_StageFlag_ComputeShading);
|
||||
result |= D3D12_BARRIER_SYNC_INDEX_INPUT * AnyBit(flags, GPU_StageFlag_IndexAssembly);
|
||||
result |= D3D12_BARRIER_SYNC_VERTEX_SHADING * AnyBit(flags, GPU_StageFlag_VertexShading);
|
||||
result |= D3D12_BARRIER_SYNC_PIXEL_SHADING * AnyBit(flags, GPU_StageFlag_PixelShading);
|
||||
result |= D3D12_BARRIER_SYNC_DEPTH_STENCIL * AnyBit(flags, GPU_StageFlag_DepthStencil);
|
||||
result |= D3D12_BARRIER_SYNC_RENDER_TARGET * AnyBit(flags, GPU_StageFlag_RenderTarget);
|
||||
result |= D3D12_BARRIER_SYNC_COPY * AnyBit(flags, GPU_StageFlag_Copy);
|
||||
result |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT * AnyBit(flags, GPU_StageFlag_Indirect);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
D3D12_BARRIER_ACCESS GPU_D12_BarrierAccessFromAccessFlags(GPU_AccessFlag flags)
|
||||
{
|
||||
D3D12_BARRIER_ACCESS result = 0;
|
||||
if (flags == 0)
|
||||
{
|
||||
result = D3D12_BARRIER_ACCESS_NO_ACCESS;
|
||||
}
|
||||
else if (flags == GPU_AccessFlag_AllAccess)
|
||||
{
|
||||
result = D3D12_BARRIER_ACCESS_COMMON;
|
||||
}
|
||||
else
|
||||
{
|
||||
result |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS * AnyBit(flags, GPU_AccessFlag_ShaderReadWrite);
|
||||
result |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE * AnyBit(flags, GPU_AccessFlag_ShaderRead);
|
||||
result |= D3D12_BARRIER_ACCESS_COPY_DEST * AnyBit(flags, GPU_AccessFlag_CopyWrite);
|
||||
result |= D3D12_BARRIER_ACCESS_COPY_SOURCE * AnyBit(flags, GPU_AccessFlag_CopyRead);
|
||||
result |= D3D12_BARRIER_ACCESS_INDEX_BUFFER * AnyBit(flags, GPU_AccessFlag_IndexBuffer);
|
||||
result |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT * AnyBit(flags, GPU_AccessFlag_IndirectArgument);
|
||||
result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ * AnyBit(flags, GPU_AccessFlag_DepthStencilRead);
|
||||
result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE * AnyBit(flags, GPU_AccessFlag_DepthStencilWrite);
|
||||
result |= D3D12_BARRIER_ACCESS_RENDER_TARGET * AnyBit(flags, GPU_AccessFlag_RenderTargetWrite);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
D3D12_BARRIER_LAYOUT GPU_D12_BarrierLayoutFromLayoutKind(GPU_LayoutKind kind)
|
||||
{
|
||||
PERSIST Readonly D3D12_BARRIER_LAYOUT translate[] = {
|
||||
[GPU_LayoutKind_Undefined] = D3D12_BARRIER_LAYOUT_UNDEFINED,
|
||||
[GPU_LayoutKind_Present] = D3D12_BARRIER_LAYOUT_COMMON,
|
||||
[GPU_LayoutKind_AnyQueue_ShaderRead_CopyRead_CopyWrite] = D3D12_BARRIER_LAYOUT_COMMON,
|
||||
[GPU_LayoutKind_AnyQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS,
|
||||
[GPU_LayoutKind_AnyQueue_ShaderRead_CopyRead] = D3D12_BARRIER_LAYOUT_GENERIC_READ,
|
||||
[GPU_LayoutKind_AnyQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_SHADER_RESOURCE,
|
||||
[GPU_LayoutKind_AnyQueue_CopyRead] = D3D12_BARRIER_LAYOUT_COPY_SOURCE,
|
||||
[GPU_LayoutKind_DirectQueue_ShaderReadWrite_CopyRead_CopyWrite] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON,
|
||||
[GPU_LayoutKind_DirectQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS,
|
||||
[GPU_LayoutKind_DirectQueue_ShaderRead_CopyRead_DepthStencilRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ,
|
||||
[GPU_LayoutKind_DirectQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE,
|
||||
[GPU_LayoutKind_DirectQueue_CopyRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE,
|
||||
[GPU_LayoutKind_DirectQueue_DepthStencilRead_DepthStencilWrite] = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE,
|
||||
[GPU_LayoutKind_DirectQueue_DepthStencilRead] = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ,
|
||||
[GPU_LayoutKind_DirectQueue_RenderTargetWrite] = D3D12_BARRIER_LAYOUT_RENDER_TARGET,
|
||||
[GPU_LayoutKind_ComputeQueue_ShaderReadWrite_CopyRead_CopyWrite] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON,
|
||||
[GPU_LayoutKind_ComputeQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS,
|
||||
[GPU_LayoutKind_ComputeQueue_ShaderRead_CopyRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ,
|
||||
[GPU_LayoutKind_ComputeQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE,
|
||||
[GPU_LayoutKind_ComputeQueue_CopyRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE,
|
||||
};
|
||||
return translate[kind];
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Pipeline
|
||||
|
||||
@ -525,7 +599,7 @@ GPU_D12_Queue *GPU_D12_QueueFromKind(GPU_QueueKind kind)
|
||||
// Unlock(&lock);
|
||||
// }
|
||||
|
||||
// GPU_D12_Descriptor *GPU_D12_DescriptorFromRtPtr(RasterTargetGpuPtr ptr)
|
||||
// GPU_D12_Descriptor *GPU_D12_DescriptorFromRtPtr(RenderTargetGpuPtr ptr)
|
||||
// {
|
||||
// /* TODO */
|
||||
// return 0;
|
||||
@ -1265,7 +1339,7 @@ RWTexture3DGpuPtr GPU_PushRWTexture3DPtr(GPU_ArenaHandle arena_handle, GPU_Resou
|
||||
return (RWTexture3DGpuPtr) { 0 };
|
||||
}
|
||||
|
||||
RasterTargetGpuPtr GPU_PushRasterTargetPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle)
|
||||
RenderTargetGpuPtr GPU_PushRenderTargetPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle)
|
||||
{
|
||||
/* Allocate descriptor */
|
||||
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
||||
@ -1277,7 +1351,7 @@ RasterTargetGpuPtr GPU_PushRasterTargetPtr(GPU_ArenaHandle arena_handle, GPU_Res
|
||||
ID3D12Device_CreateRenderTargetView(g->device, resource->d3d_resource, 0, rtv_descriptor->handle);
|
||||
|
||||
/* TODO */
|
||||
return (RasterTargetGpuPtr) { .v = rtv_descriptor->index };
|
||||
return (RenderTargetGpuPtr) { .v = rtv_descriptor->index };
|
||||
}
|
||||
|
||||
SamplerGpuPtr GPU_PushSamplerPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle)
|
||||
@ -1411,7 +1485,7 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu
|
||||
D3D12_RECT bound_scissor = ZI;
|
||||
D3D_PRIMITIVE_TOPOLOGY bound_primitive_topology = -1;
|
||||
D3D12_INDEX_BUFFER_VIEW bound_ibv = ZI;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE bound_raster_targets[GPU_MaxRasterTargets] = ZI;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE bound_raster_targets[GPU_MaxRenderTargets] = ZI;
|
||||
|
||||
/* Flatten command chunks */
|
||||
u64 cmds_count = 0;
|
||||
@ -1441,32 +1515,11 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu
|
||||
}
|
||||
}
|
||||
|
||||
/* Determine skippable access cmds & access cmd before kinds based on resource */
|
||||
/* Batch barrier cmds */
|
||||
{
|
||||
Struct(ResourceLookupNode)
|
||||
{
|
||||
ResourceLookupNode *next_in_list;
|
||||
ResourceLookupNode *next_in_bin;
|
||||
|
||||
GPU_D12_Resource *resource;
|
||||
GPU_D12_Cmd *last_access_cmd;
|
||||
};
|
||||
|
||||
Struct(ResourceLookupBin)
|
||||
{
|
||||
ResourceLookupNode *first;
|
||||
ResourceLookupNode *last;
|
||||
};
|
||||
|
||||
u64 num_lookup_bins = MaxU64(64, AlignU64Pow2(cmds_count * 4));
|
||||
ResourceLookupNode *first_resource_node = 0;
|
||||
ResourceLookupNode *last_resource_node = 0;
|
||||
ResourceLookupBin *lookup_bins = PushStructs(scratch.arena, ResourceLookupBin, num_lookup_bins);
|
||||
|
||||
GPU_D12_Cmd *final_access_cmd = 0;
|
||||
|
||||
u64 cmd_idx = 0;
|
||||
u64 batch_gen = 0;
|
||||
GPU_D12_Cmd *prev_barrier_cmd = 0;
|
||||
while (cmd_idx < cmds_count)
|
||||
{
|
||||
GPU_D12_Cmd *cmd = &cmds[cmd_idx];
|
||||
@ -1484,75 +1537,43 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu
|
||||
case GPU_D12_CmdKind_Rasterize:
|
||||
case GPU_D12_CmdKind_ClearRtv:
|
||||
{
|
||||
/* TODO:
|
||||
* - Only interrupt batch if cmd actually runs
|
||||
* - e.g. Rasterize with empty idx buffer will not actually run
|
||||
* - For non-shader interruptions, only interrupt batches for explicitly bound resources
|
||||
* - e.g. Copy should only interrupt batches for supplied resources
|
||||
*/
|
||||
cmd_idx += 1;
|
||||
batch_gen += 1;
|
||||
} break;
|
||||
|
||||
case GPU_D12_CmdKind_Access:
|
||||
case GPU_D12_CmdKind_Barrier:
|
||||
{
|
||||
GPU_D12_Resource *resource = cmd->access.resource;
|
||||
|
||||
/* Lookup last resource command resource in current command list */
|
||||
ResourceLookupNode *lookup = 0;
|
||||
{
|
||||
u64 hash = RandU64FromSeed(resource->uid);
|
||||
ResourceLookupBin *bin = &lookup_bins[hash % num_lookup_bins];
|
||||
lookup = bin->first;
|
||||
for (; lookup && lookup->resource->uid != resource->uid;)
|
||||
{
|
||||
lookup = lookup->next_in_bin;
|
||||
}
|
||||
if (!lookup)
|
||||
{
|
||||
lookup = PushStruct(scratch.arena, ResourceLookupNode);
|
||||
lookup->resource = resource;
|
||||
SllQueuePushN(bin->first, bin->last, lookup, next_in_bin);
|
||||
SllQueuePushN(first_resource_node, last_resource_node, lookup, next_in_list);
|
||||
}
|
||||
}
|
||||
|
||||
/* Determine 'before' state from lookup */
|
||||
if (lookup->last_access_cmd)
|
||||
if (prev_barrier_cmd)
|
||||
{
|
||||
GPU_D12_Cmd *last_cmd = lookup->last_access_cmd;
|
||||
if (last_cmd->access.batch_gen != batch_gen || last_cmd->access.is_queue_specific != cmd->access.is_queue_specific)
|
||||
if (prev_barrier_cmd->barrier.batch_gen != batch_gen)
|
||||
{
|
||||
/* Access is part of new batch */
|
||||
last_cmd->access.is_end_of_batch = 1;
|
||||
cmd->access.before = last_cmd->access.after;
|
||||
/* This barrier is part of new batch */
|
||||
prev_barrier_cmd->barrier.is_end_of_batch = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Last access cmd for this resource is in the same batch, merge them */
|
||||
cmd->access.before = last_cmd->access.before;
|
||||
last_cmd->skip = 1;
|
||||
/* Barriers can be batched */
|
||||
prev_barrier_cmd->skip = 1;
|
||||
}
|
||||
}
|
||||
lookup->last_access_cmd = cmd;
|
||||
cmd->access.batch_gen = batch_gen;
|
||||
|
||||
final_access_cmd = cmd;
|
||||
cmd->barrier.batch_gen = batch_gen;
|
||||
prev_barrier_cmd = cmd;
|
||||
cmd_idx += 1;
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
if (final_access_cmd)
|
||||
if (prev_barrier_cmd)
|
||||
{
|
||||
final_access_cmd->access.is_end_of_batch = 1;
|
||||
prev_barrier_cmd->barrier.is_end_of_batch = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Process gpu commands into dx12 commands */
|
||||
{
|
||||
u64 batch_access_idx_start = 0;
|
||||
u64 batch_access_idx_opl = 0; /* One past last */
|
||||
u64 batch_barrier_idx_start = 0;
|
||||
u64 batch_barrier_idx_opl = 0; /* One past last */
|
||||
|
||||
u64 cmd_idx = 0;
|
||||
while (cmd_idx < cmds_count)
|
||||
@ -1573,68 +1594,34 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu
|
||||
|
||||
//- Access
|
||||
|
||||
case GPU_D12_CmdKind_Access:
|
||||
case GPU_D12_CmdKind_Barrier:
|
||||
{
|
||||
batch_access_idx_opl = cmd_idx + 1;
|
||||
batch_barrier_idx_opl = cmd_idx + 1;
|
||||
|
||||
/* Submit batched barriers */
|
||||
if (cmd->access.is_end_of_batch)
|
||||
if (cmd->barrier.is_end_of_batch)
|
||||
{
|
||||
/* Build barriers */
|
||||
u64 buffer_barriers_count = 0;
|
||||
u64 texture_barriers_count = 0;
|
||||
u64 global_barriers_count = 0;
|
||||
D3D12_BUFFER_BARRIER *buffer_barriers = PushStructs(scratch.arena, D3D12_BUFFER_BARRIER, (batch_access_idx_opl - batch_access_idx_start));
|
||||
D3D12_TEXTURE_BARRIER *texture_barriers = PushStructs(scratch.arena, D3D12_TEXTURE_BARRIER, (batch_access_idx_opl - batch_access_idx_start));
|
||||
D3D12_GLOBAL_BARRIER *global_barriers = PushStructs(scratch.arena, D3D12_GLOBAL_BARRIER, (batch_access_idx_opl - batch_access_idx_start));
|
||||
for (u64 access_cmd_idx = batch_access_idx_start; access_cmd_idx < batch_access_idx_opl; ++access_cmd_idx)
|
||||
D3D12_BUFFER_BARRIER *buffer_barriers = PushStructs(scratch.arena, D3D12_BUFFER_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start));
|
||||
D3D12_TEXTURE_BARRIER *texture_barriers = PushStructs(scratch.arena, D3D12_TEXTURE_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start));
|
||||
D3D12_GLOBAL_BARRIER *global_barriers = PushStructs(scratch.arena, D3D12_GLOBAL_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start));
|
||||
for (u64 barrier_cmd_idx = batch_barrier_idx_start; barrier_cmd_idx < batch_barrier_idx_opl; ++barrier_cmd_idx)
|
||||
{
|
||||
GPU_D12_Cmd *access_cmd = &cmds[access_cmd_idx];
|
||||
if (access_cmd->kind == GPU_D12_CmdKind_Access && !access_cmd->skip)
|
||||
GPU_D12_Cmd *barrier_cmd = &cmds[barrier_cmd_idx];
|
||||
if (barrier_cmd->kind == GPU_D12_CmdKind_Barrier && !barrier_cmd->skip)
|
||||
{
|
||||
GPU_D12_Resource *resource = access_cmd->access.resource;
|
||||
GPU_BarrierDesc desc = barrier_cmd->barrier.desc;
|
||||
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(desc.resource);
|
||||
D3D12_BARRIER_TYPE barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER;
|
||||
b32 is_queue_specific = access_cmd->access.is_queue_specific;
|
||||
|
||||
/* Translate gpu access kind -> d3d barrier fields */
|
||||
D3D12_BARRIER_SYNC d3d_syncs[2] = ZI;
|
||||
D3D12_BARRIER_ACCESS d3d_accesses[2] = ZI;
|
||||
D3D12_BARRIER_LAYOUT d3d_layouts[2] = ZI;
|
||||
for (u32 i = 0; i < 2; ++i)
|
||||
{
|
||||
GPU_AccessKind access_kind = i == 0 ? access_cmd->access.before : access_cmd->access.after;
|
||||
|
||||
switch (access_kind)
|
||||
{
|
||||
case GPU_AccessKind_None:
|
||||
{
|
||||
d3d_syncs[i] = D3D12_BARRIER_SYNC_NONE;
|
||||
d3d_accesses[i] = D3D12_BARRIER_ACCESS_NO_ACCESS;
|
||||
d3d_layouts[i] = resource->texture_layout;
|
||||
} break;
|
||||
|
||||
case GPU_AccessKind_Generic_Read:
|
||||
{
|
||||
d3d_syncs[i] = D3D12_BARRIER_SYNC_RENDER_TARGET;
|
||||
d3d_accesses[i] = D3D12_BARRIER_ACCESS_RENDER_TARGET;
|
||||
d3d_layouts[i] = D3D12_BARRIER_LAYOUT_RENDER_TARGET;
|
||||
} break;
|
||||
|
||||
case GPU_AccessKind_RasterTarget:
|
||||
{
|
||||
d3d_syncs[i] = D3D12_BARRIER_SYNC_RENDER_TARGET;
|
||||
d3d_accesses[i] = D3D12_BARRIER_ACCESS_RENDER_TARGET;
|
||||
d3d_layouts[i] = D3D12_BARRIER_LAYOUT_RENDER_TARGET;
|
||||
} break;
|
||||
|
||||
case GPU_AccessKind_Present:
|
||||
{
|
||||
d3d_syncs[i] = D3D12_BARRIER_SYNC_NONE;
|
||||
d3d_accesses[i] = D3D12_BARRIER_ACCESS_NO_ACCESS;
|
||||
d3d_layouts[i] = D3D12_BARRIER_LAYOUT_PRESENT;
|
||||
} break;
|
||||
}
|
||||
}
|
||||
/* Translate gpu barrier kind -> d3d barrier fields */
|
||||
D3D12_BARRIER_SYNC sync_before = GPU_D12_BarrierSyncFromStageFlags(desc.sync_prev);
|
||||
D3D12_BARRIER_SYNC sync_after = GPU_D12_BarrierSyncFromStageFlags(desc.sync_next);
|
||||
D3D12_BARRIER_ACCESS access_before = GPU_D12_BarrierAccessFromAccessFlags(desc.access_prev);
|
||||
D3D12_BARRIER_ACCESS access_after = GPU_D12_BarrierAccessFromAccessFlags(desc.access_next);
|
||||
|
||||
/* Build barrier */
|
||||
switch (barrier_type)
|
||||
@ -1642,10 +1629,10 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu
|
||||
case D3D12_BARRIER_TYPE_BUFFER:
|
||||
{
|
||||
D3D12_BUFFER_BARRIER *barrier = &buffer_barriers[buffer_barriers_count++];
|
||||
barrier->SyncBefore = d3d_syncs[0];
|
||||
barrier->SyncAfter = d3d_syncs[1];
|
||||
barrier->AccessBefore = d3d_accesses[0];
|
||||
barrier->AccessAfter = d3d_accesses[1];
|
||||
barrier->SyncBefore = sync_before;
|
||||
barrier->SyncAfter = sync_after;
|
||||
barrier->AccessBefore = access_before;
|
||||
barrier->AccessAfter = access_after;
|
||||
barrier->pResource = resource->d3d_resource;
|
||||
barrier->Offset = 0;
|
||||
barrier->Size = U64Max;
|
||||
@ -1653,25 +1640,35 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu
|
||||
|
||||
case D3D12_BARRIER_TYPE_TEXTURE:
|
||||
{
|
||||
D3D12_BARRIER_LAYOUT layout_after = 0;
|
||||
if (desc.layout == GPU_LayoutKind_NoChange)
|
||||
{
|
||||
layout_after = resource->texture_layout;
|
||||
}
|
||||
else
|
||||
{
|
||||
layout_after = GPU_D12_BarrierLayoutFromLayoutKind(desc.layout);
|
||||
}
|
||||
|
||||
D3D12_TEXTURE_BARRIER *barrier = &texture_barriers[texture_barriers_count++];
|
||||
barrier->SyncBefore = d3d_syncs[0];
|
||||
barrier->SyncAfter = d3d_syncs[1];
|
||||
barrier->AccessBefore = d3d_accesses[0];
|
||||
barrier->AccessAfter = d3d_accesses[1];
|
||||
barrier->LayoutBefore = d3d_layouts[0];
|
||||
barrier->LayoutAfter = d3d_layouts[1];
|
||||
barrier->SyncBefore = sync_before;
|
||||
barrier->SyncAfter = sync_after;
|
||||
barrier->AccessBefore = access_before;
|
||||
barrier->AccessAfter = access_after;
|
||||
barrier->LayoutBefore = resource->texture_layout;
|
||||
barrier->LayoutAfter = layout_after;
|
||||
barrier->pResource = resource->d3d_resource;
|
||||
barrier->Subresources.IndexOrFirstMipLevel = 0xffffffff;
|
||||
resource->texture_layout = d3d_layouts[1];
|
||||
resource->texture_layout = layout_after;
|
||||
} break;
|
||||
|
||||
case D3D12_BARRIER_TYPE_GLOBAL:
|
||||
{
|
||||
D3D12_GLOBAL_BARRIER *barrier = &global_barriers[global_barriers_count++];
|
||||
barrier->SyncBefore = d3d_syncs[0];
|
||||
barrier->SyncAfter = d3d_syncs[1];
|
||||
barrier->AccessBefore = d3d_accesses[0];
|
||||
barrier->AccessAfter = d3d_accesses[1];
|
||||
barrier->SyncBefore = sync_before;
|
||||
barrier->SyncAfter = sync_after;
|
||||
barrier->AccessBefore = access_before;
|
||||
barrier->AccessAfter = access_after;
|
||||
} break;
|
||||
}
|
||||
}
|
||||
@ -1708,7 +1705,7 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu
|
||||
}
|
||||
}
|
||||
|
||||
batch_access_idx_start = cmd_idx + 1;
|
||||
batch_barrier_idx_start = cmd_idx + 1;
|
||||
}
|
||||
|
||||
cmd_idx += 1;
|
||||
@ -1850,7 +1847,7 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu
|
||||
GPU_D12_Descriptor *rtv_descriptor = cmd->rasterize.rtv_descriptors[i];
|
||||
if (rtv_descriptor != 0)
|
||||
{
|
||||
pipeline_desc.render_target_formats[i] = rtv_descriptor->resource->texture_desc.format;
|
||||
pipeline_desc.render_target_formats[i] = rtv_descriptor->resource->texture_format;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -2053,9 +2050,69 @@ void GPU_CopyCpuBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_
|
||||
/* TODO */
|
||||
}
|
||||
|
||||
void GPU_CopyCpuTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range)
|
||||
void GPU_CopyCpuTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst_handle, Vec3I32 dst_offset, void *cpu_src, Vec3I32 cpu_src_dims, Rng3I32 cpu_src_copy_range)
|
||||
{
|
||||
/* TODO */
|
||||
|
||||
|
||||
|
||||
|
||||
// GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
||||
|
||||
// D3D12_RESOURCE_DESC desc = ZI;
|
||||
// ID3D12Resource_GetDesc(((GPU_D12_Resource *)footprint_reference)->d3d_resource, &desc);
|
||||
|
||||
// u64 upload_size = 0;
|
||||
// u64 upload_row_size = 0;
|
||||
// u32 upload_num_rows = 0;
|
||||
// D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI;
|
||||
// ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size);
|
||||
// D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint;
|
||||
|
||||
// {
|
||||
// D3D12_RANGE read_range = ZI;
|
||||
// u8 *dst_base = (u8 *)dst + placed_footprint.Offset;
|
||||
// u8 *src_base = src;
|
||||
|
||||
// u32 z_size = upload_row_size * upload_num_rows;
|
||||
|
||||
// b32 src_overflow = 0;
|
||||
// for (u32 z = 0; !src_overflow && z < desc.DepthOrArraySize; ++z)
|
||||
// {
|
||||
// u32 z_offset = z * z_size;
|
||||
// for (u32 y = 0; !src_overflow && y < upload_num_rows; ++y)
|
||||
// {
|
||||
// u8 *dst_row = dst_base + y * footprint.RowPitch + z_offset;
|
||||
// u8 *src_row = src_base + y * upload_row_size + z_offset;
|
||||
// CopyBytes(dst_row, src_row, upload_row_size);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// GPU_D12_Resource *dst = GPU_D12_ResourceFromHandle(dst_handle);
|
||||
|
||||
// D3D12_RESOURCE_DESC desc = ZI;
|
||||
// ID3D12Resource_GetDesc(dst->d3d_resource, &desc);
|
||||
|
||||
// u64 upload_size = 0;
|
||||
// u64 upload_row_size = 0;
|
||||
// u32 upload_num_rows = 0;
|
||||
// D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI;
|
||||
// ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, dst->texture_mip_levels, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size);
|
||||
// D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// void *copy_start = ((u8 *)src) + src_copy_range.min;
|
||||
// u64 copy_len = src_copy_range.max - src_copy_range.min;
|
||||
|
||||
|
||||
}
|
||||
|
||||
//- Gpu <-> Gpu copy
|
||||
@ -2127,26 +2184,14 @@ void GPU_SetConstSampler(GPU_CommandListHandle cl_handle, i32 slot, SamplerGpuPt
|
||||
/* TODO */
|
||||
}
|
||||
|
||||
//- Access
|
||||
//- Barrier
|
||||
|
||||
void GPU_SyncQueueAccess(GPU_CommandListHandle cl_handle, GPU_ResourceHandle handle, GPU_AccessKind kind)
|
||||
void GPU_BarrierEx(GPU_CommandListHandle cl_handle, GPU_BarrierDesc desc)
|
||||
{
|
||||
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
|
||||
GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl);
|
||||
cmd->kind = GPU_D12_CmdKind_Access;
|
||||
cmd->access.after = kind;
|
||||
cmd->access.resource = GPU_D12_ResourceFromHandle(handle);
|
||||
cmd->access.is_queue_specific = 1;
|
||||
}
|
||||
|
||||
void GPU_SyncGlobalAccess(GPU_CommandListHandle cl_handle, GPU_ResourceHandle handle, GPU_AccessKind kind)
|
||||
{
|
||||
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
|
||||
GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl);
|
||||
cmd->kind = GPU_D12_CmdKind_Access;
|
||||
cmd->access.after = kind;
|
||||
cmd->access.resource = GPU_D12_ResourceFromHandle(handle);
|
||||
cmd->access.is_queue_specific = 0;
|
||||
cmd->kind = GPU_D12_CmdKind_Barrier;
|
||||
cmd->barrier.desc = desc;
|
||||
}
|
||||
|
||||
//- Compute
|
||||
@ -2165,7 +2210,7 @@ void GPU_Compute(GPU_CommandListHandle cl_handle, ComputeShader cs, Vec3I32 grou
|
||||
void GPU_Rasterize(GPU_CommandListHandle cl_handle,
|
||||
VertexShader vs, PixelShader ps,
|
||||
u32 instances_count, IndexBufferGpuPtr idx_buff,
|
||||
u32 raster_targets_count, RasterTargetGpuPtr *raster_targets,
|
||||
u32 raster_targets_count, RenderTargetGpuPtr *raster_targets,
|
||||
Rng3 viewport, Rng2 scissor,
|
||||
GPU_RasterMode mode)
|
||||
{
|
||||
@ -2176,7 +2221,7 @@ void GPU_Rasterize(GPU_CommandListHandle cl_handle,
|
||||
cmd->rasterize.ps = ps;
|
||||
cmd->rasterize.instances_count = instances_count;
|
||||
cmd->rasterize.ibv = GPU_D12_IbvFromIbPtr(idx_buff);
|
||||
for (u32 i = 0; i < MinU32(raster_targets_count, GPU_MaxRasterTargets); ++i)
|
||||
for (u32 i = 0; i < MinU32(raster_targets_count, GPU_MaxRenderTargets); ++i)
|
||||
{
|
||||
cmd->rasterize.rtv_descriptors[i] = GPU_D12_DescriptorFromIndex(GPU_D12_DescriptorHeapKind_Rtv, raster_targets[i].v);
|
||||
}
|
||||
@ -2187,7 +2232,7 @@ void GPU_Rasterize(GPU_CommandListHandle cl_handle,
|
||||
|
||||
//- Clear
|
||||
|
||||
void GPU_ClearRasterTarget(GPU_CommandListHandle cl_handle, RasterTargetGpuPtr ptr, Vec4 color)
|
||||
void GPU_ClearRenderTarget(GPU_CommandListHandle cl_handle, RenderTargetGpuPtr ptr, Vec4 color)
|
||||
{
|
||||
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
|
||||
GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl);
|
||||
@ -2443,16 +2488,11 @@ GPU_ResourceHandle GPU_PrepareBackbuffer(GPU_SwapchainHandle swapchain_handle, G
|
||||
backbuffer->d3d_resource = d3d_resource;
|
||||
backbuffer->uid = Atomic64FetchAdd(&g->next_resource_uid.v, 1);
|
||||
backbuffer->is_texture = 1;
|
||||
backbuffer->texture_flags = GPU_TextureFlag_AllowRenderTarget;
|
||||
backbuffer->texture_dims = VEC3I32(size.x, size.y, 1);
|
||||
backbuffer->texture_mip_levels = 1;
|
||||
backbuffer->texture_layout = D3D12_BARRIER_LAYOUT_PRESENT;
|
||||
backbuffer->swapchain = swapchain;
|
||||
{
|
||||
backbuffer->texture_desc.kind = GPU_TextureKind_2D;
|
||||
backbuffer->texture_desc.format = format;
|
||||
backbuffer->texture_desc.dims = VEC3I32(size.x, size.y, 1);
|
||||
backbuffer->texture_desc.flags = GPU_TextureFlag_AllowRasterTarget;
|
||||
backbuffer->texture_desc.initial_access = GPU_AccessKind_Present;
|
||||
backbuffer->texture_desc.mip_levels = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
swapchain->backbuffers_format = format;
|
||||
|
||||
@ -34,7 +34,7 @@ Struct(GPU_D12_PipelineDesc)
|
||||
ComputeShader cs;
|
||||
b32 is_wireframe;
|
||||
D3D12_PRIMITIVE_TOPOLOGY_TYPE topology_type;
|
||||
GPU_Format render_target_formats[GPU_MaxRasterTargets];
|
||||
GPU_Format render_target_formats[GPU_MaxRenderTargets];
|
||||
};
|
||||
|
||||
Struct(GPU_D12_Pipeline)
|
||||
@ -119,13 +119,17 @@ Struct(GPU_D12_Resource)
|
||||
u64 uid;
|
||||
|
||||
/* Buffer info */
|
||||
GPU_BufferDesc buffer_desc;
|
||||
GPU_BufferFlag buffer_flags;
|
||||
u64 buffer_size;
|
||||
D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address;
|
||||
|
||||
/* Texture info */
|
||||
b32 is_texture;
|
||||
GPU_TextureFlag texture_flags;
|
||||
GPU_Format texture_format;
|
||||
Vec3I32 texture_dims;
|
||||
i32 texture_mip_levels;
|
||||
D3D12_BARRIER_LAYOUT texture_layout;
|
||||
GPU_TextureDesc texture_desc;
|
||||
|
||||
/* Backbuffer info */
|
||||
struct GPU_D12_Swapchain *swapchain;
|
||||
@ -176,23 +180,11 @@ Struct(GPU_D12_RawCommandList)
|
||||
Enum(GPU_D12_CmdKind)
|
||||
{
|
||||
GPU_D12_CmdKind_None,
|
||||
|
||||
/* Access */
|
||||
GPU_D12_CmdKind_Access,
|
||||
|
||||
/* Constant */
|
||||
GPU_D12_CmdKind_Barrier,
|
||||
GPU_D12_CmdKind_Constant,
|
||||
|
||||
/* Copy */
|
||||
GPU_D12_CmdKind_Copy,
|
||||
|
||||
/* Compute */
|
||||
GPU_D12_CmdKind_Compute,
|
||||
|
||||
/* Rasterize */
|
||||
GPU_D12_CmdKind_Rasterize,
|
||||
|
||||
/* Clear rtv */
|
||||
GPU_D12_CmdKind_ClearRtv,
|
||||
};
|
||||
|
||||
@ -204,15 +196,12 @@ Struct(GPU_D12_Cmd)
|
||||
{
|
||||
struct
|
||||
{
|
||||
GPU_AccessKind after;
|
||||
GPU_D12_Resource *resource;
|
||||
GPU_BarrierDesc desc;
|
||||
|
||||
/* Post-batch data */
|
||||
GPU_AccessKind before;
|
||||
b32 is_end_of_batch;
|
||||
b32 is_queue_specific;
|
||||
u64 batch_gen;
|
||||
} access;
|
||||
} barrier;
|
||||
|
||||
struct
|
||||
{
|
||||
@ -250,7 +239,7 @@ Struct(GPU_D12_Cmd)
|
||||
PixelShader ps;
|
||||
u32 instances_count;
|
||||
D3D12_INDEX_BUFFER_VIEW ibv;
|
||||
GPU_D12_Descriptor *rtv_descriptors[GPU_MaxRasterTargets];
|
||||
GPU_D12_Descriptor *rtv_descriptors[GPU_MaxRenderTargets];
|
||||
Rng3 viewport;
|
||||
Rng2 scissor;
|
||||
GPU_RasterMode mode;
|
||||
@ -350,12 +339,16 @@ void GPU_D12_Startup(void);
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Helpers
|
||||
|
||||
DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format);
|
||||
GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle);
|
||||
GPU_D12_CmdList *GPU_D12_CommandListFromHandle(GPU_CommandListHandle handle);
|
||||
GPU_D12_Resource *GPU_D12_ResourceFromHandle(GPU_ResourceHandle handle);
|
||||
GPU_D12_Swapchain *GPU_D12_SwapchainFromHandle(GPU_SwapchainHandle handle);
|
||||
|
||||
DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format);
|
||||
D3D12_BARRIER_SYNC GPU_D12_BarrierSyncFromStageFlags(GPU_StageFlag flags);
|
||||
D3D12_BARRIER_ACCESS GPU_D12_BarrierAccessFromAccessFlags(GPU_AccessFlag flags);
|
||||
D3D12_BARRIER_LAYOUT GPU_D12_BarrierLayoutFromLayoutKind(GPU_LayoutKind kind);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Pipeline
|
||||
|
||||
|
||||
@ -31,20 +31,34 @@ JobImpl(PR_RunForever, _sig, _id)
|
||||
/* Draw to backbuffer */
|
||||
GPU_CommandListHandle cl = GPU_PrepareCommandList();
|
||||
{
|
||||
RasterTargetGpuPtr backbuffer_rt = GPU_PushRasterTargetPtr(gpu_frame_arena, backbuffer);
|
||||
RenderTargetGpuPtr backbuffer_rt = GPU_PushRenderTargetPtr(gpu_frame_arena, backbuffer);
|
||||
|
||||
/* Clear backbuffer */
|
||||
/* Prep clear pass */
|
||||
{
|
||||
GPU_SyncAccess(cl, backbuffer, GPU_AccessKind_RasterTarget);
|
||||
GPU_ClearRasterTarget(cl, backbuffer_rt, VEC4(1, 0, 0, 1));
|
||||
GPU_LayoutBarrier(cl, backbuffer,
|
||||
NoStage, RenderTarget,
|
||||
NoAccess, RenderTargetWrite,
|
||||
DirectQueue_RenderTargetWrite);
|
||||
}
|
||||
|
||||
/* Make backbuffer presentable */
|
||||
GPU_SyncAccess(cl, backbuffer, GPU_AccessKind_Present);
|
||||
/* Clear pass */
|
||||
{
|
||||
GPU_ClearRenderTarget(cl, backbuffer_rt, VEC4(1, 0, 0, 1));
|
||||
}
|
||||
|
||||
/* Finalize backbuffer layout */
|
||||
{
|
||||
GPU_LayoutBarrier(cl, backbuffer,
|
||||
RenderTarget, NoStage,
|
||||
RenderTargetWrite, NoAccess,
|
||||
Present);
|
||||
}
|
||||
|
||||
/* Reset */
|
||||
{
|
||||
GPU_ResetArena(cl, gpu_frame_arena);
|
||||
}
|
||||
}
|
||||
GPU_CommitCommandList(cl, GPU_QueueKind_Direct);
|
||||
}
|
||||
GPU_CommitBackbuffer(backbuffer, VSYNC);
|
||||
|
||||
@ -27,7 +27,7 @@ JobImpl(SPR_LoadTexture, sig, _)
|
||||
GPU_ResourceHandle gpu_resource = GPU_PushTexture2D(gpu_perm,
|
||||
GPU_Format_R8G8B8A8_Unorm_Srgb,
|
||||
VEC2I32(decoded.width, decoded.height),
|
||||
GPU_AccessKind_AnyRead);
|
||||
GPU_LayoutKind_AnyQueue_ShaderRead_CopyRead_CopyWrite);
|
||||
// texture->texture = gpu_tex;
|
||||
texture->width = decoded.width;
|
||||
texture->height = decoded.height;
|
||||
@ -38,8 +38,12 @@ JobImpl(SPR_LoadTexture, sig, _)
|
||||
gpu_resource, VEC3I32(0,0,0),
|
||||
decoded.pixels, VEC3I32(decoded.width, decoded.height, 1),
|
||||
RNG3I32(VEC3I32(0,0,0), VEC3I32(decoded.width, decoded.height, 1)));
|
||||
GPU_LayoutBarrier(cl, gpu_resource,
|
||||
Copy, NoStage,
|
||||
CopyWrite, NoAccess,
|
||||
AnyQueue_ShaderRead_CopyRead);
|
||||
}
|
||||
GPU_CommitCommandListEx(cl, GPU_QueueKind_AsyncCopy, 1, &GPU_SetFence(&entry->texture_ready_fence, 1));
|
||||
GPU_CommitCommandList(cl, GPU_QueueKind_AsyncCopy);
|
||||
}
|
||||
|
||||
EndScratch(scratch);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user