From a1cf0a46b5718dc28cdd6e05634ba76565e3de85 Mon Sep 17 00:00:00 2001 From: jacob Date: Tue, 3 Mar 2026 23:49:29 -0600 Subject: [PATCH] vk testing --- src/gpu/gpu.lay | 3 +- src/gpu/gpu_vk/gpu_vk.lay | 11 + src/gpu/gpu_vk/gpu_vk_core.c | 270 ++ src/gpu/gpu_vk/gpu_vk_core.h | 15 + src/gpu_old/gpu.lay | 30 + src/gpu_old/gpu_common.c | 264 ++ src/gpu_old/gpu_common.h | 53 + src/gpu_old/gpu_core.h | 795 ++++ src/gpu_old/gpu_dx12/gpu_dx12.lay | 16 + src/gpu_old/gpu_dx12/gpu_dx12_core.c | 3950 +++++++++++++++++ src/gpu_old/gpu_dx12/gpu_dx12_core.h | 577 +++ .../AgilitySDK/1.618.5/D3D12Core.dat | 3 + .../AgilitySDK/1.618.5/d3d12SDKLayers.dat | 3 + src/gpu_old/gpu_res/noise_128x128x64_16.dat | 3 + src/gpu_old/gpu_shared.cgh | 333 ++ 15 files changed, 6325 insertions(+), 1 deletion(-) create mode 100644 src/gpu/gpu_vk/gpu_vk.lay create mode 100644 src/gpu/gpu_vk/gpu_vk_core.c create mode 100644 src/gpu/gpu_vk/gpu_vk_core.h create mode 100644 src/gpu_old/gpu.lay create mode 100644 src/gpu_old/gpu_common.c create mode 100644 src/gpu_old/gpu_common.h create mode 100644 src/gpu_old/gpu_core.h create mode 100644 src/gpu_old/gpu_dx12/gpu_dx12.lay create mode 100644 src/gpu_old/gpu_dx12/gpu_dx12_core.c create mode 100644 src/gpu_old/gpu_dx12/gpu_dx12_core.h create mode 100644 src/gpu_old/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/D3D12Core.dat create mode 100644 src/gpu_old/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/d3d12SDKLayers.dat create mode 100644 src/gpu_old/gpu_res/noise_128x128x64_16.dat create mode 100644 src/gpu_old/gpu_shared.cgh diff --git a/src/gpu/gpu.lay b/src/gpu/gpu.lay index f7ed7d2e..2b895d04 100644 --- a/src/gpu/gpu.lay +++ b/src/gpu/gpu.lay @@ -27,4 +27,5 @@ @IncludeC gpu_common.c -@DefaultDownstream Win32 gpu_dx12 +// @DefaultDownstream Win32 gpu_dx12 +@DefaultDownstream Any gpu_vk diff --git a/src/gpu/gpu_vk/gpu_vk.lay b/src/gpu/gpu_vk/gpu_vk.lay new file mode 100644 index 00000000..2c8e2d78 --- /dev/null +++ b/src/gpu/gpu_vk/gpu_vk.lay @@ -0,0 +1,11 @@ +@Layer gpu_vk + +////////////////////////////// +//- Api + +@IncludeC gpu_vk_core.h + +////////////////////////////// +//- Impl + +@IncludeC gpu_vk_core.c diff --git a/src/gpu/gpu_vk/gpu_vk_core.c b/src/gpu/gpu_vk/gpu_vk_core.c new file mode 100644 index 00000000..45620a17 --- /dev/null +++ b/src/gpu/gpu_vk/gpu_vk_core.c @@ -0,0 +1,270 @@ +G_VK_Ctx G_VK = Zi; +ThreadLocal G_VK_ThreadLocalCtx G_VK_tl = Zi; + +//////////////////////////////////////////////////////////// +//~ @hookimpl Bootstrap + +void G_Bootstrap(void) +{ +} + +//////////////////////////////////////////////////////////// +//~ @hookimpl Arena + +G_ArenaHandle G_AcquireArena(void) +{ + // TODO: Impl + return (G_ArenaHandle) Zi; +} + +void G_ReleaseArena(G_CommandListHandle cl_handle, G_ArenaHandle arena) +{ + // TODO: Impl +} + +void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle) +{ + // TODO: Impl +} + +//////////////////////////////////////////////////////////// +//~ @hookimpl Resource + +G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle cl_handle, G_ResourceDesc desc) +{ + // TODO: Impl + return (G_ResourceHandle) Zi; +} + +//////////////////////////////////////////////////////////// +//~ @hookimpl Shader resource reference + +u32 G_PushRef(G_ArenaHandle arena_handle, G_ResourceHandle resource_handle, G_RefDesc ref_desc) +{ + // TODO: Impl + return 0; +} + +//- Count + +u64 G_CountBufferBytes(G_ResourceHandle buffer) +{ + // TODO: Impl + return 0; +} + +i32 G_Count1D(G_ResourceHandle texture) +{ + // TODO: Impl + return 0; +} + +Vec2I32 G_Count2D(G_ResourceHandle texture) +{ + // TODO: Impl + return (Vec2I32) Zi; +} + +Vec3I32 G_Count3D(G_ResourceHandle texture) +{ + // TODO: Impl + return (Vec3I32) Zi; +} + +i32 G_CountWidth(G_ResourceHandle texture) +{ + // TODO: Impl + return 0; +} + +i32 G_CountHeight(G_ResourceHandle texture) +{ + // TODO: Impl + return 0; +} + +i32 G_CountDepth(G_ResourceHandle texture) +{ + // TODO: Impl + return 0; +} + +i32 G_CountMips(G_ResourceHandle texture) +{ + // TODO: Impl + return 0; +} + +//- Map + +void *G_HostPointerFromResource(G_ResourceHandle resource_handle) +{ + // TODO: Impl + return 0; +} + +//////////////////////////////////////////////////////////// +//~ @hookimpl Command + +//- Command list + +G_CommandListHandle G_PrepareCommandList(G_QueueKind queue) +{ + // TODO: Impl + return (G_CommandListHandle) Zi; +} + +i64 G_CommitCommandList(G_CommandListHandle cl_handle) +{ + // TODO: Impl + return (i64) Zi; +} + +//- Cpu -> Gpu staged copy + +void G_CopyCpuToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, u64 dst_offset, void *src, RngU64 src_copy_range) +{ + // TODO: Impl +} + +void G_CopyCpuToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range) +{ + // TODO: Impl +} + +//- Gpu <-> Gpu copy + +void G_CopyBufferToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, u64 dst_offset, G_ResourceHandle src_handle, RngU64 src_copy_range) +{ + // TODO: Impl +} + +void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Rng3I32 dst_copy_range, G_ResourceHandle src_handle, u64 src_offset) +{ + // TODO: Impl +} + +void G_CopyTextureToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Rng3I32 src_copy_range) +{ + // TODO: Impl +} + +void G_CopyTextureToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Rng3I32 src_copy_range) +{ + // TODO: Impl +} + +//- Constant + +void G_SetConstantEx(G_CommandListHandle cl_handle, i32 slot, void *src_32bit, u32 size) +{ + // TODO: Impl +} + +//- Memory sync + +void G_MemorySyncEx(G_CommandListHandle cl_handle, G_MemoryBarrierDesc desc) +{ + // TODO: Impl +} + +//- Compute + +void G_ComputeEx(G_CommandListHandle cl_handle, ComputeShaderDesc cs, Vec3I32 threads) +{ + // TODO: Impl +} + +//- Rasterize + +void G_Rasterize( + G_CommandListHandle cl_handle, + VertexShaderDesc vs, PixelShaderDesc ps, + u32 instances_count, G_IndexBufferDesc index_buffer, + u32 render_targets_count, G_RenderTargetDesc *render_targets, + Rng3 viewport, Rng2 scissor, + G_RasterMode raster_mode +) +{ + // TODO: Impl +} + +//- Clear + +void G_ClearRenderTarget(G_CommandListHandle cl_handle, G_ResourceHandle resource_handle, Vec4 color, i32 mip) +{ + // TODO: Impl +} + +//- Log + +void G_LogResource(G_CommandListHandle cl_handle, G_ResourceHandle resource_handle) +{ + // TODO: Impl +} + +//////////////////////////////////////////////////////////// +//~ @hookimpl Queue synchronization + +i64 G_CompletionValueFromQueue(G_QueueKind queue_kind) +{ + // TODO: Impl + return (i64) Zi; +} + +i64 G_CompletionTargetFromQueue(G_QueueKind queue_kind) +{ + // TODO: Impl + return (i64) Zi; +} + +G_QueueCompletions G_CompletionValuesFromQueues(G_QueueMask queue_mask) +{ + // TODO: Impl + return (G_QueueCompletions) Zi; +} + +G_QueueCompletions G_CompletionTargetsFromQueues(G_QueueMask queue_mask) +{ + // TODO: Impl + return (G_QueueCompletions) Zi; +} + +void G_QueueSyncEx(G_QueueBarrierDesc desc) +{ + // TODO: Impl +} + +//////////////////////////////////////////////////////////// +//~ @hookimpl Statistics + +G_Stats G_QueryStats(void) +{ + // TODO: Impl + return (G_Stats) Zi; +} + +//////////////////////////////////////////////////////////// +//~ @hookimpl Swapchain + +G_SwapchainHandle G_AcquireSwapchain(u64 os_window_handle) +{ + // TODO: Impl + return (G_SwapchainHandle) Zi; +} + +void G_ReleaseSwapchain(G_SwapchainHandle swapchain_handle) +{ + // TODO: Impl +} + +G_ResourceHandle G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Format format, Vec2I32 size) +{ + // TODO: Impl + return (G_ResourceHandle) Zi; +} + +void G_CommitBackbuffer(G_ResourceHandle backbuffer_handle, i32 vsync) +{ + // TODO: Impl +} diff --git a/src/gpu/gpu_vk/gpu_vk_core.h b/src/gpu/gpu_vk/gpu_vk_core.h new file mode 100644 index 00000000..01702e05 --- /dev/null +++ b/src/gpu/gpu_vk/gpu_vk_core.h @@ -0,0 +1,15 @@ +//////////////////////////////////////////////////////////// +//~ State types + +Struct(G_VK_Ctx) +{ + i32 _; +}; + +Struct(G_VK_ThreadLocalCtx) +{ + i32 _; +}; + +extern G_VK_Ctx G_Vk; +extern ThreadLocal G_VK_ThreadLocalCtx G_VK_tl; diff --git a/src/gpu_old/gpu.lay b/src/gpu_old/gpu.lay new file mode 100644 index 00000000..8250e805 --- /dev/null +++ b/src/gpu_old/gpu.lay @@ -0,0 +1,30 @@ +@Layer gpu_old + +////////////////////////////// +//- Dependencies + +@Dep platform + +////////////////////////////// +//- Resources + +@EmbedDir G_Resources gpu_res + +////////////////////////////// +//- Api + +@IncludeC gpu_shared.cgh +@IncludeC gpu_core.h +@IncludeC gpu_common.h + +@IncludeG gpu_shared.cgh + +@Bootstrap G_Bootstrap +@Bootstrap G_BootstrapCommon + +////////////////////////////// +//- Impl + +@IncludeC gpu_common.c + +@DefaultDownstream Win32 gpu_dx12 diff --git a/src/gpu_old/gpu_common.c b/src/gpu_old/gpu_common.c new file mode 100644 index 00000000..9c732b11 --- /dev/null +++ b/src/gpu_old/gpu_common.c @@ -0,0 +1,264 @@ +G_Ctx G = Zi; +ThreadLocal G_ThreadLocalCtx G_tl = Zi; + +//////////////////////////////////////////////////////////// +//~ Bootstrap + +void G_BootstrapCommon(void) +{ + G_ArenaHandle gpu_perm = G_PermArena(); + + G_CommandListHandle cl = G_PrepareCommandList(G_QueueKind_Direct); + { + // Init quad index buffer + { + G_ResourceHandle quad_indices = Zi; + u16 quad_data[6] = { 0, 1, 2, 0, 2, 3 }; + quad_indices = G_PushBuffer(gpu_perm, cl, u16, countof(quad_data)); + G_CopyCpuToBuffer(cl, quad_indices, 0, quad_data, RNGU64(0, sizeof(quad_data))); + G.quad_indices = G_IdxBuff16(quad_indices); + } + + // Init blank texture + { + G_ResourceHandle blank_tex = G_PushTexture2D( + gpu_perm, cl, + G_Format_R8G8B8A8_Unorm, + VEC2I32(8, 8), + G_Layout_Common, + .flags = G_ResourceFlag_ZeroMemory, + .name = Lit("Blank texture") + ); + G.blank_tex = G_PushTexture2DRef(gpu_perm, blank_tex); + } + + // Init noise texture + { + G_ResourceHandle noise_tex = Zi; + String noise_data = DataFromResource(ResourceKeyFromStore(&G_Resources, Lit("noise_128x128x64_16.dat"))); + Vec3I32 noise_dims = VEC3I32(128, 128, 64); + if (noise_data.len != noise_dims.x * noise_dims.y * noise_dims.z * 2) + { + Panic(Lit("Unexpected noise texture size")); + } + noise_tex = G_PushTexture3D( + gpu_perm, cl, + G_Format_R16_Uint, + noise_dims, + G_Layout_Common, + .name = Lit("Noise texture") + ); + G_CopyCpuToTexture( + cl, + noise_tex, VEC3I32(0, 0, 0), + noise_data.text, noise_dims, + RNG3I32(VEC3I32(0, 0, 0), noise_dims) + ); + G.basic_noise = G_PushTexture3DRef(gpu_perm, noise_tex); + } + + // Init basic samplers + for (G_BasicSamplerKind sampler_kind = 0; sampler_kind < countof(G.basic_samplers); ++sampler_kind) + { + G_SamplerStateRef sampler = Zi; + switch (sampler_kind) + { + default: + { + // Sampler unspecified + Assert(0); + } FALLTHROUGH; + case G_BasicSamplerKind_PointClamp: + { + G_Filter filter = G_Filter_MinMagMipPoint; + G_AddressMode address_mode = G_AddressMode_Clamp; + G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); + sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + } break; + case G_BasicSamplerKind_PointWrap: + { + G_Filter filter = G_Filter_MinMagMipPoint; + G_AddressMode address_mode = G_AddressMode_Wrap; + G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); + sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + } break; + case G_BasicSamplerKind_PointMirror: + { + G_Filter filter = G_Filter_MinMagMipPoint; + G_AddressMode address_mode = G_AddressMode_Mirror; + G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); + sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + } break; + case G_BasicSamplerKind_BilinearClamp: + { + G_Filter filter = G_Filter_MinMagLinearMipPoint; + G_AddressMode address_mode = G_AddressMode_Clamp; + G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); + sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + } break; + case G_BasicSamplerKind_BilinearWrap: + { + G_Filter filter = G_Filter_MinMagLinearMipPoint; + G_AddressMode address_mode = G_AddressMode_Wrap; + G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); + sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + } break; + case G_BasicSamplerKind_BilinearMirror: + { + G_Filter filter = G_Filter_MinMagLinearMipPoint; + G_AddressMode address_mode = G_AddressMode_Mirror; + G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); + sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + } break; + case G_BasicSamplerKind_TrilinearClamp: + { + G_Filter filter = G_Filter_MinMagMipLinear; + G_AddressMode address_mode = G_AddressMode_Clamp; + G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); + sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + } break; + case G_BasicSamplerKind_TrilinearWrap: + { + G_Filter filter = G_Filter_MinMagMipLinear; + G_AddressMode address_mode = G_AddressMode_Wrap; + G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); + sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + } break; + case G_BasicSamplerKind_TrilinearMirror: + { + G_Filter filter = G_Filter_MinMagMipLinear; + G_AddressMode address_mode = G_AddressMode_Mirror; + G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); + sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + } break; + } + G.basic_samplers[sampler_kind] = sampler; + } + } + G_CommitCommandList(cl); + G_QueueSync(G_QueueMask_Direct, G_QueueMask_All); +} + +//////////////////////////////////////////////////////////// +//~ Utils + +//- Arena + +G_ArenaHandle G_PermArena(void) +{ + if (G_IsArenaNil(G_tl.gpu_perm)) + { + G_tl.gpu_perm = G_AcquireArena(); + } + return G_tl.gpu_perm; +} + +//- Push resource from cpu + +G_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandListHandle cl, String src, G_BufferDesc desc) +{ + G_ResourceHandle buffer = G_PushResource(gpu_arena, cl, (G_ResourceDesc) { .kind = G_ResourceKind_Buffer, .buffer = desc }); + G_CopyCpuToBuffer(cl, buffer, 0, src.text, RNGU64(0, src.len)); + return buffer; +} + +//- Mip + +i32 G_DimsFromMip1D(i32 mip0_dims, i32 mip) +{ + mip = ClampI32(mip, -31, 31); + i32 result = 0; + if (mip >= 0) + { + result = MaxI32(result >> mip, 1); + } + else + { + result = MaxI32(result << -mip, 1); + } + return result; +} + +Vec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip) +{ + mip = ClampI32(mip, -31, 31); + Vec2I32 result = Zi; + if (mip >= 0) + { + result.x = MaxI32(mip0_dims.x >> mip, 1); + result.y = MaxI32(mip0_dims.y >> mip, 1); + } + else + { + result.x = MaxI32(mip0_dims.x << -mip, 1); + result.y = MaxI32(mip0_dims.y << -mip, 1); + } + return result; +} + +Vec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip) +{ + mip = ClampI32(mip, -31, 31); + Vec3I32 result = Zi; + if (mip >= 0) + { + result.x = MaxI32(mip0_dims.x >> mip, 1); + result.y = MaxI32(mip0_dims.y >> mip, 1); + result.z = MaxI32(mip0_dims.z >> mip, 1); + } + else + { + result.x = MaxI32(mip0_dims.x << -mip, 1); + result.y = MaxI32(mip0_dims.y << -mip, 1); + result.z = MaxI32(mip0_dims.z << -mip, 1); + } + return result; +} + +//- Thread count + +Vec3I32 G_GroupCountFromThreadCount(ComputeShaderDesc cs, Vec3I32 threads) +{ + return VEC3I32( + (threads.x + cs.x - 1) / cs.x, + (threads.y + cs.y - 1) / cs.y, + (threads.z + cs.z - 1) / cs.z + ); +} + +//- Viewport / scissor + +Rng3 G_ViewportFromTexture(G_ResourceHandle texture) +{ + Vec2I32 dims = G_Count2D(texture); + return RNG3(VEC3(0, 0, 0), VEC3(dims.x, dims.y, 1)); +} + +Rng2 G_ScissorFromTexture(G_ResourceHandle texture) +{ + Vec2I32 dims = G_Count2D(texture); + return RNG2(VEC2(0, 0), VEC2(dims.x, dims.y)); +} + +//- Shared resources + + +G_SamplerStateRef G_BasicSamplerFromKind(G_BasicSamplerKind kind) +{ + return G.basic_samplers[kind]; +} + +G_IndexBufferDesc G_QuadIndices(void) +{ + return G.quad_indices; +} + +G_Texture2DRef G_BlankTexture2D(void) +{ + return G.blank_tex; +} + +G_Texture3DRef G_BasicNoiseTexture(void) +{ + return G.basic_noise; +} diff --git a/src/gpu_old/gpu_common.h b/src/gpu_old/gpu_common.h new file mode 100644 index 00000000..7af854b0 --- /dev/null +++ b/src/gpu_old/gpu_common.h @@ -0,0 +1,53 @@ +//////////////////////////////////////////////////////////// +//~ State types + +Struct(G_Ctx) +{ + // Common shared resources + G_IndexBufferDesc quad_indices; + G_Texture2DRef blank_tex; + G_Texture3DRef basic_noise; + G_SamplerStateRef basic_samplers[G_BasicSamplerKind_COUNT]; +}; + +Struct(G_ThreadLocalCtx) +{ + G_ArenaHandle gpu_perm; +}; + +extern G_Ctx G; +extern ThreadLocal G_ThreadLocalCtx G_tl; + +//////////////////////////////////////////////////////////// +//~ Bootstrap + +void G_BootstrapCommon(void); + +//////////////////////////////////////////////////////////// +//~ Utils + +//- Arena +G_ArenaHandle G_PermArena(void); + +//- Push resource from cpu +G_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandListHandle cl, String src, G_BufferDesc desc); +#define G_PushBufferFromCpuCopy(_arena, _cl, _src, ...) \ + G_PushBufferFromCpuCopy_((_arena), (_cl), (_src), (G_BufferDesc) { .size = (_src).len, __VA_ARGS__ }) + +//- Mip +i32 G_DimsFromMip1D(i32 mip0_dims, i32 mip); +Vec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip); +Vec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip); + +//- Thread count +Vec3I32 G_GroupCountFromThreadCount(ComputeShaderDesc cs, Vec3I32 threads); + +//- Viewport / scissor +Rng3 G_ViewportFromTexture(G_ResourceHandle texture); +Rng2 G_ScissorFromTexture(G_ResourceHandle texture); + +//- Shared resources +G_SamplerStateRef G_BasicSamplerFromKind(G_BasicSamplerKind kind); +G_IndexBufferDesc G_QuadIndices(void); +G_Texture2DRef G_BlankTexture2D(void); +G_Texture3DRef G_BasicNoiseTexture(void); diff --git a/src/gpu_old/gpu_core.h b/src/gpu_old/gpu_core.h new file mode 100644 index 00000000..ccee6c07 --- /dev/null +++ b/src/gpu_old/gpu_core.h @@ -0,0 +1,795 @@ +//////////////////////////////////////////////////////////// +//~ Handle types + +Struct(G_ArenaHandle) { u64 v; }; +Struct(G_CommandListHandle) { u64 v; }; +Struct(G_ResourceHandle) { u64 v; }; +Struct(G_SwapchainHandle) { u64 v; }; + +#define G_IsArenaNil(h) ((h).v == 0) +#define G_IsCommandListNil(h) ((h).v == 0) +#define G_IsResourceNil(h) ((h).v == 0) +#define G_IsSwapchainNil(h) ((h).v == 0) + +//////////////////////////////////////////////////////////// +//~ Queue types + +#define G_IsMultiQueueEnabled 1 + +Enum(G_QueueKind) +{ + G_QueueKind_Direct = 0, +#if G_IsMultiQueueEnabled + G_QueueKind_AsyncCompute = 1, + G_QueueKind_AsyncCopy = 2, +#else + G_QueueKind_AsyncCompute = G_QueueKind_Direct, + G_QueueKind_AsyncCopy = G_QueueKind_Direct, +#endif + G_QueueKind_COUNT +}; + +Enum(G_QueueMask) +{ + G_QueueMask_None = 0, + G_QueueMask_Direct = (1 << 0), +#if G_IsMultiQueueEnabled + G_QueueMask_AsyncCompute = (1 << 1), + G_QueueMask_AsyncCopy = (1 << 2), +#else + G_QueueMask_AsyncCompute = G_QueueMask_Direct, + G_QueueMask_AsyncCopy = G_QueueMask_Direct, +#endif + G_QueueMask_All = (0xFFFFFFFF >> (32 - G_QueueKind_COUNT)) +}; +#define G_QueueMaskFromKind(queue_kind) (1 << queue_kind) + +Struct(G_QueueCompletions) +{ + i64 v[G_QueueKind_COUNT]; // Array of completions indexed by queue kind +}; + +// All waiters will wait until specified queues reach their value in the `completions` array +Struct(G_QueueBarrierDesc) +{ + G_QueueCompletions completions; // Completions that waiters should wait for + G_QueueMask wait_queues; // Mask of queues that will wait for completions + b32 wait_cpu; // Will the cpu wait for completion +}; + +//////////////////////////////////////////////////////////// +//~ Format types + +// NOTE: Matches DirectX DXGI_FORMAT +Enum(G_Format) +{ + G_Format_Unknown = 0, + G_Format_R32G32B32A32_Typeless = 1, + G_Format_R32G32B32A32_Float = 2, + G_Format_R32G32B32A32_Uint = 3, + G_Format_R32G32B32A32_Sint = 4, + G_Format_R32G32B32_Typeless = 5, + G_Format_R32G32B32_Float = 6, + G_Format_R32G32B32_Uint = 7, + G_Format_R32G32B32_Sint = 8, + G_Format_R16G16B16A16_Typeless = 9, + G_Format_R16G16B16A16_Float = 10, + G_Format_R16G16B16A16_Unorm = 11, + G_Format_R16G16B16A16_Uint = 12, + G_Format_R16G16B16A16_Snorm = 13, + G_Format_R16G16B16A16_Sint = 14, + G_Format_R32G32_Typeless = 15, + G_Format_R32G32_Float = 16, + G_Format_R32G32_Uint = 17, + G_Format_R32G32_Sint = 18, + G_Format_R32G8X24_Typeless = 19, + G_Format_D32_Float_S8X24_Uint = 20, + G_Format_R32_Float_X8X24_Typeless = 21, + G_Format_X32_Typeless_G8X24_Uint = 22, + G_Format_R10G10B10A2_Typeless = 23, + G_Format_R10G10B10A2_Unorm = 24, + G_Format_R10G10B10A2_Uint = 25, + G_Format_R11G11B10_Float = 26, + G_Format_R8G8B8A8_Typeless = 27, + G_Format_R8G8B8A8_Unorm = 28, + G_Format_R8G8B8A8_Unorm_Srgb = 29, + G_Format_R8G8B8A8_Uint = 30, + G_Format_R8G8B8A8_Snorm = 31, + G_Format_R8G8B8A8_Sint = 32, + G_Format_R16G16_Typeless = 33, + G_Format_R16G16_Float = 34, + G_Format_R16G16_Unorm = 35, + G_Format_R16G16_Uint = 36, + G_Format_R16G16_Snorm = 37, + G_Format_R16G16_Sint = 38, + G_Format_R32_Typeless = 39, + G_Format_D32_Float = 40, + G_Format_R32_Float = 41, + G_Format_R32_Uint = 42, + G_Format_R32_Sint = 43, + G_Format_R24G8_Typeless = 44, + G_Format_D24_Unorm_S8_Uint = 45, + G_Format_R24_Unorm_X8_Typeless = 46, + G_Format_X24_Typeless_G8_Uint = 47, + G_Format_R8G8_Typeless = 48, + G_Format_R8G8_Unorm = 49, + G_Format_R8G8_Uint = 50, + G_Format_R8G8_Snorm = 51, + G_Format_R8G8_Sint = 52, + G_Format_R16_Typeless = 53, + G_Format_R16_Float = 54, + G_Format_D16_Unorm = 55, + G_Format_R16_Unorm = 56, + G_Format_R16_Uint = 57, + G_Format_R16_Snorm = 58, + G_Format_R16_Sint = 59, + G_Format_R8_Typeless = 60, + G_Format_R8_Unorm = 61, + G_Format_R8_Uint = 62, + G_Format_R8_Snorm = 63, + G_Format_R8_Sint = 64, + G_Format_A8_Unorm = 65, + G_Format_R1_Unorm = 66, + G_Format_R9G9B9E5_SharedXP = 67, + G_Format_R8G8_B8G8_Unorm = 68, + G_Format_G8R8_G8B8_Unorm = 69, + G_Format_BC1_Typeless = 70, + G_Format_BC1_Unorm = 71, + G_Format_BC1_Unorm_Srgb = 72, + G_Format_BC2_Typeless = 73, + G_Format_BC2_Unorm = 74, + G_Format_BC2_Unorm_Srgb = 75, + G_Format_BC3_Typeless = 76, + G_Format_BC3_Unorm = 77, + G_Format_BC3_Unorm_Srgb = 78, + G_Format_BC4_Typeless = 79, + G_Format_BC4_Unorm = 80, + G_Format_BC4_Snorm = 81, + G_Format_BC5_Typeless = 82, + G_Format_BC5_Unorm = 83, + G_Format_BC5_Snorm = 84, + G_Format_B5G6R5_Unorm = 85, + G_Format_B5G5R5A1_Unorm = 86, + G_Format_B8G8R8A8_Unorm = 87, + G_Format_B8G8R8X8_Unorm = 88, + G_Format_R10G10B10_XR_BIAS_A2_Unorm = 89, + G_Format_B8G8R8A8_Typeless = 90, + G_Format_B8G8R8A8_Unorm_Srgb = 91, + G_Format_B8G8R8X8_Typeless = 92, + G_Format_B8G8R8X8_Unorm_Srgb = 93, + G_Format_BC6H_Typeless = 94, + G_Format_BC6H_UF16 = 95, + G_Format_BC6H_SF16 = 96, + G_Format_BC7_Typeless = 97, + G_Format_BC7_Unorm = 98, + G_Format_BC7_Unorm_Srgb = 99, + G_Format_AYUV = 100, + G_Format_Y410 = 101, + G_Format_Y416 = 102, + G_Format_NV12 = 103, + G_Format_P010 = 104, + G_Format_P016 = 105, + G_Format_420_Opaque = 106, + G_Format_YUY2 = 107, + G_Format_Y210 = 108, + G_Format_Y216 = 109, + G_Format_NV11 = 110, + G_Format_AI44 = 111, + G_Format_IA44 = 112, + G_Format_P8 = 113, + G_Format_A8P8 = 114, + G_Format_B4G4R4A4_Unorm = 115, + G_Format_P208 = 130, + G_Format_V208 = 131, + G_Format_V408 = 132, + G_Format_SamplerFeedbackMinMipOpaque = 189, + G_Format_SamplerFeedbackMipRegionUsedOpaque = 190, + G_Format_A4B4G4R4_Unorm = 191, + G_Format_COUNT = 192 +}; + +//////////////////////////////////////////////////////////// +//~ Memory sync types + +Enum(G_Stage) +{ + G_Stage_None = 0, + + // Compute stages + G_Stage_ComputeShading = (1 << 1), + + // Draw stages + G_Stage_IndexAssembly = (1 << 2), + G_Stage_VertexShading = (1 << 3), + G_Stage_PixelShading = (1 << 4), + G_Stage_DepthStencil = (1 << 5), + G_Stage_RenderTarget = (1 << 6), + + // Copy stages + G_Stage_Copy = (1 << 7), + + // Indirect stages + G_Stage_Indirect = (1 << 8), + + // Aggregate stages + G_Stage_Drawing = G_Stage_IndexAssembly | + G_Stage_VertexShading | + G_Stage_PixelShading | + G_Stage_DepthStencil | + G_Stage_RenderTarget, + + G_Stage_Shading = G_Stage_ComputeShading | + G_Stage_VertexShading | + G_Stage_PixelShading, + + G_Stage_All = 0xFFFFFFFF +}; + +Enum(G_Access) +{ + G_Access_None = 0, + + G_Access_ShaderReadWrite = (1 << 1), + G_Access_ShaderRead = (1 << 2), + + G_Access_CopyWrite = (1 << 3), + G_Access_CopyRead = (1 << 4), + + G_Access_DepthStencilRead = (1 << 5), + G_Access_DepthStencilWrite = (1 << 6), + G_Access_RenderTargetWrite = (1 << 7), + + G_Access_IndexBuffer = (1 << 8), + G_Access_IndirectArgument = (1 << 9), + + G_Access_All = 0xFFFFFFFF // Represents all accesses relevant to the stage specified in the barrier +}; + +Enum(G_Layout) +{ + G_Layout_NoChange, + G_Layout_Undefined, + + ////////////////////////////// + //- Queue-agnostic + + // Simultaneous layout allows a resource to be used on any queue with any + // access type (except depth-stencil). Resources cannot transition to/from + // this layout, they must be created with it. Allows concurrent reads + // with up to 1 write to non-overlapping regions. + G_Layout_Simultaneous, // Any access except depth-stencil <-- D3D12_BARRIER_LAYOUT_COMMON + D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS + G_Layout_Common, // ShaderRead/CopyRead/CopyWrite/Present <-- D3D12_BARRIER_LAYOUT_COMMON + + ////////////////////////////// + //- Direct queue + + G_Layout_DirectQueue_General, // ShaderRead/ShaderReadWrite/CopyRead/CopyWrite <-- D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON + G_Layout_DirectQueue_Read, // ShaderRead/CopyRead/DepthStencilRead <-- D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ + G_Layout_DirectQueue_DepthStencil, // DepthStencilRead/DepthStencilWrite <-- D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE + G_Layout_DirectQueue_RenderTarget, // RenderTargetWrite <-- D3D12_BARRIER_LAYOUT_RENDER_TARGET + + ////////////////////////////// + //- Compute queue + + G_Layout_ComputeQueue_General, // ShaderRead/ShaderReadWrite/CopyRead/CopyWrite <-- D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON + + ////////////////////////////// + //- Direct & Compute queue + + G_Layout_DirectComputeQueue_Read, // ShaderRead/CopyRead <-- D3D12_BARRIER_LAYOUT_GENERIC_READ + G_Layout_DirectComputeQueue_ShaderReadWrite, // ShaderReadWrite <-- D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS + G_Layout_DirectComputeQueue_CopyWrite, // CopyWrite <-- D3D12_BARRIER_LAYOUT_COPY_DEST +}; + +// Barrier will execute after stages specified by `stage_prev`, and before stages specified by `stage_next`. +// When barrier executes: +// - Necessary resource flushes will occur based on `access_prev` & `access_next` +// - Texture layout will transition based on `layout` (if specified) +Struct(G_MemoryBarrierDesc) +{ + G_ResourceHandle resource; + b32 is_global; + G_Stage stage_prev; + G_Stage stage_next; + G_Access access_prev; + G_Access access_next; + G_Layout layout; + RngI32 mips; // Inclusive range of texture mip levels to sync +}; + +//////////////////////////////////////////////////////////// +//~ Filter types + +// NOTE: Matches DirectX D3D12_FILTER +Enum(G_Filter) +{ + // Standard filter + G_Filter_MinMagMipPoint = 0, + G_Filter_MinMagPointMipLinear = 0x1, + G_Filter_MinPointMagLinearMipPoint = 0x4, + G_Filter_MinPointMagMipLinear = 0x5, + G_Filter_MinLinearMagMipPoint = 0x10, + G_Filter_MinLinearMagPointMipLinear = 0x11, + G_Filter_MinMagLinearMipPoint = 0x14, + G_Filter_MinMagMipLinear = 0x15, + G_Filter_MinMagAnisotropicMipPoint = 0x54, + G_Filter_Anisotropic = 0x55, + + // Comparison filter + G_Filter_Comparison_MinMagMipPoint = 0x80, + G_Filter_Comparison_MinMagPointMipLinear = 0x81, + G_Filter_Comparison_MinPointMagLinearMipPoint = 0x84, + G_Filter_Comparison_MinPointMagMipLinear = 0x85, + G_Filter_Comparison_MinLinearMagMipPoint = 0x90, + G_Filter_Comparison_MinLinearMagPointMipLinear = 0x91, + G_Filter_Comparison_MinMagLinearMipPoint = 0x94, + G_Filter_Comparison_MinMagMipLinear = 0x95, + G_Filter_Comparison_MinMagAnisotropicMipPoint = 0xd4, + G_Filter_Comparison_Anisotropic = 0xd5, + + // Minimum filter + G_Filter_Minimum_MinMagMipPoint = 0x100, + G_Filter_Minimum_MinMagPointMipLinear = 0x101, + G_Filter_Minimum_MinPointMagLinearMipPoint = 0x104, + G_Filter_Minimum_MinPointMagMipLinear = 0x105, + G_Filter_Minimum_MinLinearMagMipPoint = 0x110, + G_Filter_Minimum_MinLinearMagPointMipLinear = 0x111, + G_Filter_Minimum_MinMagLinearMipPoint = 0x114, + G_Filter_Minimum_MinMagMipLinear = 0x115, + G_Filter_Minimum_MinMagAnisotropicMipPoint = 0x155, + G_Filter_Minimum_Anisotropic = 0x155, + + // Maximum filter + G_Filter_Maximum_MinMagMipPoint = 0x180, + G_Filter_Maximum_MinMagPointMipLinear = 0x181, + G_Filter_Maximum_MinPointMagLinearMipPoint = 0x184, + G_Filter_Maximum_MinPointMagMipLinear = 0x185, + G_Filter_Maximum_MinLinearMagMipPoint = 0x190, + G_Filter_Maximum_MinLinearMagPointMipLinear = 0x191, + G_Filter_Maximum_MinMagLinearMipPoint = 0x194, + G_Filter_Maximum_MinMagMipLinear = 0x195, + G_Filter_Maximum_MinMagAnisotropicMipPoint = 0x1d4, + G_Filter_Maximum_Anisotropic = 0x1d5 +}; + +// NOTE: Matches DirectX D3D12_TEXTURE_ADDRESS_MODE +Enum(G_AddressMode) +{ + G_AddressMode_Wrap = 1, + G_AddressMode_Mirror = 2, + G_AddressMode_Clamp = 3, // Default + G_AddressMode_Border = 4, + G_AddressMode_MirrorOnce = 5 +}; + +// NOTE: Matches DirectX D3D12_COMPARISON_FUNC +Enum(G_ComparisonFunc) +{ + G_ComparisonFunc_None = 0, + G_ComparisonFunc_Never = 1, + G_ComparisonFunc_Less = 2, + G_ComparisonFunc_Equal = 3, + G_ComparisonFunc_LessEqual = 4, + G_ComparisonFunc_Greater = 5, + G_ComparisonFunc_NotEqual = 6, + G_ComparisonFunc_GreaterEqual = 7, + G_ComparisonFunc_Always = 8 +}; + +//////////////////////////////////////////////////////////// +//~ Resource types + +Enum(G_ResourceKind) +{ + G_ResourceKind_Buffer, + G_ResourceKind_Texture1D, + G_ResourceKind_Texture2D, + G_ResourceKind_Texture3D, + G_ResourceKind_Sampler, +}; + +Enum(G_ResourceFlag) +{ + G_ResourceFlag_None = 0, + G_ResourceFlag_AllowShaderReadWrite = (1 << 0), + G_ResourceFlag_AllowRenderTarget = (1 << 1), + G_ResourceFlag_AllowDepthStencil = (1 << 2), + G_ResourceFlag_ZeroMemory = (1 << 3), + G_ResourceFlag_HostMemory = (1 << 4), // Resource will be mapped into the cpu's address space + G_ResourceFlag_Uncached = (1 << 5), // Cpu writes will be combined & reads will be uncached + G_ResourceFlag_ForceNoReuse = (1 << 6), +}; + +Struct(G_BufferDesc) +{ + G_ResourceFlag flags; + u64 size; + String name; +}; + +Struct(G_TextureDesc) +{ + G_ResourceFlag flags; + G_Format format; + Vec3I32 dims; + G_Layout initial_layout; + Vec4 clear_color; + i32 max_mips; // Will be clamped to range [1, max mips] + String name; +}; + +Struct(G_SamplerDesc) +{ + G_ResourceFlag flags; + G_Filter filter; + G_AddressMode x; + G_AddressMode y; + G_AddressMode z; + f32 mip_lod_bias; + u32 max_anisotropy; + G_ComparisonFunc comparison; + Vec4 border_color; + f32 min_lod; + f32 max_lod; + String name; +}; + +Struct(G_ResourceDesc) +{ + G_ResourceKind kind; + G_BufferDesc buffer; + G_TextureDesc texture; + G_SamplerDesc sampler; +}; + +//////////////////////////////////////////////////////////// +//~ Ref types + +Struct(G_RefDesc) +{ + G_RefKind kind; + u64 element_size; + u64 element_offset; + RngI32 mips; // Inclusive range of texture mip indices to reference +}; + +//////////////////////////////////////////////////////////// +//~ Rasterization types + +Enum(G_RasterMode) +{ + G_RasterMode_None, + G_RasterMode_PointList, + G_RasterMode_LineList, + G_RasterMode_LineStrip, + G_RasterMode_TriangleList, + G_RasterMode_TriangleStrip, + G_RasterMode_WireTriangleList, + G_RasterMode_WireTriangleStrip, +}; + +Enum(G_BlendMode) +{ + G_BlendMode_Opaque, + G_BlendMode_CompositeStraightAlpha, + G_BlendMode_CompositePremultipliedAlpha, +}; + +Struct(G_IndexBufferDesc) +{ + u32 count; + u32 stride; // Either 2 for u16 indices, or 4 for u32 indices + G_ResourceHandle resource; +}; + +Struct(G_RenderTargetDesc) +{ + G_ResourceHandle resource; + G_BlendMode blend; + i32 mip; +}; + +//////////////////////////////////////////////////////////// +//~ Statistic types + +Struct(G_Stats) +{ + // Memory usage + u64 device_committed; + u64 device_budget; + u64 host_committed; + u64 host_budget; + + // Other stats + u64 arenas_count; + u64 cumulative_nonreuse_count; +}; + +//////////////////////////////////////////////////////////// +//~ @hookdecl Bootstrap + +void G_Bootstrap(void); + +//////////////////////////////////////////////////////////// +//~ @hookdecl Arena + +G_ArenaHandle G_AcquireArena(void); +void G_ReleaseArena(G_CommandListHandle cl_handle, G_ArenaHandle arena); +void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle); + +//////////////////////////////////////////////////////////// +//~ @hookdecl Resource + +//- Resource creation + +G_ResourceHandle G_PushResource(G_ArenaHandle arena, G_CommandListHandle cl, G_ResourceDesc desc); + +#define G_PushBuffer(arena, cl, _type, _count, ...) G_PushResource((arena), (cl), \ + (G_ResourceDesc) { \ + .kind = G_ResourceKind_Buffer, \ + .buffer = { \ + .size = sizeof(_type) * (_count), \ + __VA_ARGS__ \ + } \ + } \ +) + +#define G_PushTexture1D(arena, cl, _format, _size, _initial_layout, ...) G_PushResource((arena), (cl), \ + (G_ResourceDesc) { \ + .kind = G_ResourceKind_Texture1D, \ + .texture = { \ + .format = (_format), \ + .dims = VEC3I32((_size), 1, 1), \ + .initial_layout = (_initial_layout), \ + __VA_ARGS__ \ + } \ + } \ +) + +#define G_PushTexture2D(arena, cl, _format, _size, _initial_layout, ...) G_PushResource((arena), (cl), \ + (G_ResourceDesc) { \ + .kind = G_ResourceKind_Texture2D, \ + .texture = { \ + .format = (_format), \ + .dims = VEC3I32((_size).x, (_size).y, 1), \ + .initial_layout = (_initial_layout), \ + __VA_ARGS__ \ + } \ + } \ +) + +#define G_PushTexture3D(arena, cl, _format, _size, _initial_layout, ...) G_PushResource((arena), (cl), \ + (G_ResourceDesc) { \ + .kind = G_ResourceKind_Texture3D, \ + .texture = { \ + .format = (_format), \ + .dims = (_size), \ + .initial_layout = (_initial_layout), \ + __VA_ARGS__ \ + } \ + } \ +) + +#define G_PushSampler(arena, cl, ...) G_PushResource((arena), (cl), \ + (G_ResourceDesc) { \ + .kind = G_ResourceKind_Sampler, \ + .sampler = { \ + .filter = G_Filter_MinMagMipPoint, \ + __VA_ARGS__ \ + } \ + } \ +) + +//- Index buffer helpers + +#define G_IdxBuff16(_res) ((G_IndexBufferDesc) { .resource = (_res), .stride = 2, .count = (G_CountBuffer((_res), i16)) }) +#define G_IdxBuff32(_res) ((G_IndexBufferDesc) { .resource = (_res), .stride = 4, .count = (G_CountBuffer((_res), i32)) }) + +//- Render target helpers + +#define G_Rt(_res, _blend_mode) ((G_RenderTargetDesc) { .resource = (_res), .blend = (_blend_mode) }) + +//- Count + +u64 G_CountBufferBytes(G_ResourceHandle buffer); +i32 G_Count1D(G_ResourceHandle texture); +Vec2I32 G_Count2D(G_ResourceHandle texture); +Vec3I32 G_Count3D(G_ResourceHandle texture); +i32 G_CountWidth(G_ResourceHandle texture); +i32 G_CountHeight(G_ResourceHandle texture); +i32 G_CountDepth(G_ResourceHandle texture); +i32 G_CountMips(G_ResourceHandle texture); + +#define G_CountBuffer(buffer, type) G_CountBufferBytes(buffer) / sizeof(type) + +//- Map + +void *G_HostPointerFromResource(G_ResourceHandle resource); +#define G_StructFromResource(resource, type) (type *)G_HostPointerFromResource(resource) + +//////////////////////////////////////////////////////////// +//~ @hookdecl Shader resource reference + +u32 G_PushRef(G_ArenaHandle arena, G_ResourceHandle resource, G_RefDesc desc); + +#define G_PushStructuredBufferRef(arena, resource, type, ...) (G_StructuredBufferRef) { \ + .v = G_PushRef( \ + (arena), (resource), \ + (G_RefDesc) { .kind = G_RefKind_StructuredBuffer, .element_size = sizeof(type), __VA_ARGS__ } \ + ) \ +} + +#define G_PushByteAddressBufferRef(arena, resource, ...) (G_ByteAddressBufferRef) { \ + .v = G_PushRef( \ + (arena), (resource), \ + (G_RefDesc) { .kind = G_RefKind_ByteAddressBuffer, __VA_ARGS__ } \ + ) \ +} + +#define G_PushTexture1DRef(arena, resource, ...) (G_Texture1DRef) { \ + .v = G_PushRef( \ + (arena), (resource), \ + (G_RefDesc) { .kind = G_RefKind_Texture1D, .mips.max = G_MaxMips, __VA_ARGS__ } \ + ) \ +} + +#define G_PushTexture2DRef(arena, resource, ...) (G_Texture2DRef) { \ + .v = G_PushRef( \ + (arena), (resource), \ + (G_RefDesc) { .kind = G_RefKind_Texture2D, .mips.max = G_MaxMips, __VA_ARGS__ } \ + ) \ +} + +#define G_PushTexture3DRef(arena, resource, ...) (G_Texture3DRef) { \ + .v = G_PushRef( \ + (arena), (resource), \ + (G_RefDesc) { .kind = G_RefKind_Texture3D, .mips.max = G_MaxMips, __VA_ARGS__ } \ + ) \ +} + +#define G_PushSamplerStateRef(arena, resource, ...) (G_SamplerStateRef) { \ + .v = G_PushRef( \ + (arena), (resource), \ + (G_RefDesc) { .kind = G_RefKind_SamplerState, __VA_ARGS__ } \ + ) \ +} + +//////////////////////////////////////////////////////////// +//~ @hookdecl Command + +//- Command list + +G_CommandListHandle G_PrepareCommandList(G_QueueKind queue); +i64 G_CommitCommandList(G_CommandListHandle cl); + +//- Cpu -> Gpu staged copy + +void G_CopyCpuToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, u64 dst_offset, void *src, RngU64 src_copy_range); +void G_CopyCpuToTexture(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range); + +//- Gpu <-> Gpu copy + +void G_CopyBufferToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, u64 dst_offset, G_ResourceHandle src, RngU64 src_copy_range); +void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Rng3I32 dst_copy_range, G_ResourceHandle src_handle, u64 src_offset); +void G_CopyTextureToTexture(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Rng3I32 src_copy_range); +void G_CopyTextureToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Rng3I32 src_copy_range); + +//- Constant + +void G_SetConstantEx(G_CommandListHandle cl, i32 slot, void *src_32bit, u32 size); + +#define G_SetConstant(cl, name, value) do { \ + CAT(name, __shaderconstanttype) __src; \ + __src.v = value; \ + G_SetConstantEx((cl), (name), &__src, sizeof(__src)); \ + } while (0) + +//- Memory sync + +void G_MemorySyncEx(G_CommandListHandle cl, G_MemoryBarrierDesc desc); + +#define G_MemorySync(_cl, _resource, _stage_prev, _access_prev, _stage_next, _access_next, ...) \ + G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \ + .resource = (_resource), \ + .stage_prev = _stage_prev, \ + .access_prev = _access_prev, \ + .stage_next = _stage_next, \ + .access_next = _access_next, \ + .mips.max = G_MaxMips, \ + __VA_ARGS__ \ + }) + +#define G_MemoryLayoutSync(_cl, _resource, _stage_prev, _access_prev, _stage_next, _access_next, _layout, ...) \ + G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \ + .resource = (_resource), \ + .stage_prev = _stage_prev, \ + .access_prev = _access_prev, \ + .stage_next = _stage_next, \ + .access_next = _access_next, \ + .layout = _layout, \ + .mips.max = G_MaxMips, \ + __VA_ARGS__ \ + }) + +#define G_GlobalMemorySync(_cl, _stage_prev, _access_prev, _stage_next, _access_next, ...) \ + G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \ + .is_global = 1, \ + .stage_prev = _stage_prev, \ + .access_prev = _access_prev, \ + .stage_next = _stage_next, \ + .access_next = _access_next, \ + .mips.max = G_MaxMips, \ + __VA_ARGS__ \ + }) + +#define G_DumbMemorySync(cl, resource, ...) \ + G_MemorySync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, __VA_ARGS__) + +#define G_DumbMemoryLayoutSync(cl, resource, layout, ...) \ + G_MemoryLayoutSync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, (layout), __VA_ARGS__) + +#define G_DumbGlobalMemorySync(cl, ...) \ + G_GlobalMemorySync((cl), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, __VA_ARGS__) + +//- Compute + +void G_ComputeEx(G_CommandListHandle cl, ComputeShaderDesc cs, Vec3I32 threads); + +#define G_Compute(cl, cs, threads) G_ComputeEx((cl), (cs), VEC3I32((threads), 1, 1)) +#define G_Compute2D(cl, cs, threads) G_ComputeEx((cl), (cs), VEC3I32((threads).x, (threads).y, 1)) +#define G_Compute3D(cl, cs, threads) G_ComputeEx((cl), (cs), VEC3I32((threads).x, (threads).y, (threads).z)) + +//- Rasterize + +void G_Rasterize( + G_CommandListHandle cl, + VertexShaderDesc vs, PixelShaderDesc ps, + u32 instances_count, G_IndexBufferDesc index_buffer, + u32 render_targets_count, G_RenderTargetDesc *render_targets, + Rng3 viewport, Rng2 scissor, + G_RasterMode raster_mode +); + +//- Clear + +void G_ClearRenderTarget(G_CommandListHandle cl, G_ResourceHandle render_target, Vec4 color, i32 mip); + +//- Log + +void G_LogResource(G_CommandListHandle cl, G_ResourceHandle resource); + +//////////////////////////////////////////////////////////// +//~ @hookdecl Queue synchronization + +i64 G_CompletionValueFromQueue(G_QueueKind queue_kind); +i64 G_CompletionTargetFromQueue(G_QueueKind queue_kind); +G_QueueCompletions G_CompletionValuesFromQueues(G_QueueMask queue_mask); +G_QueueCompletions G_CompletionTargetsFromQueues(G_QueueMask queue_mask); + +void G_QueueSyncEx(G_QueueBarrierDesc desc); + +#define G_QueueSync(completion_mask, ...) \ + G_QueueSyncEx((G_QueueBarrierDesc) { \ + .completions = G_CompletionTargetsFromQueues(completion_mask), \ + __VA_ARGS__ \ + }) + +#define G_QueueSyncGpu(completion_mask, wait_mask) G_QueueSync((completion_mask), .wait_queues = (wait_mask)) +#define G_QueueSyncCpu(completion_mask) G_QueueSync((completion_mask), .wait_cpu = 1); + +//////////////////////////////////////////////////////////// +//~ @hookdecl Statistics + +G_Stats G_QueryStats(void); + +//////////////////////////////////////////////////////////// +//~ @hookdecl Swapchain + +G_SwapchainHandle G_AcquireSwapchain(u64 os_window_handle); +void G_ReleaseSwapchain(G_SwapchainHandle swapchain); + +// Waits until a new backbuffer is ready from the swapchain. +// This should be called before rendering for minimum latency. +G_ResourceHandle G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Format format, Vec2I32 size); + +void G_CommitBackbuffer(G_ResourceHandle backbuffer, i32 vsync); diff --git a/src/gpu_old/gpu_dx12/gpu_dx12.lay b/src/gpu_old/gpu_dx12/gpu_dx12.lay new file mode 100644 index 00000000..f70d3798 --- /dev/null +++ b/src/gpu_old/gpu_dx12/gpu_dx12.lay @@ -0,0 +1,16 @@ +@Layer gpu_dx12_old + +////////////////////////////// +//- Resources + +@EmbedDir G_D12_Resources gpu_dx12_res + +////////////////////////////// +//- Api + +@IncludeC gpu_dx12_core.h + +////////////////////////////// +//- Impl + +@IncludeC gpu_dx12_core.c diff --git a/src/gpu_old/gpu_dx12/gpu_dx12_core.c b/src/gpu_old/gpu_dx12/gpu_dx12_core.c new file mode 100644 index 00000000..95b17f53 --- /dev/null +++ b/src/gpu_old/gpu_dx12/gpu_dx12_core.c @@ -0,0 +1,3950 @@ +G_D12_Ctx G_D12 = Zi; +ThreadLocal G_D12_ThreadLocalCtx G_D12_tl = Zi; + +//////////////////////////////////////////////////////////// +//~ @hookimpl Bootstrap + +void G_Bootstrap(void) +{ + TempArena scratch = BeginScratchNoConflict(); + Arena *perm = PermArena(); + + // NOTE: Nsight seems to have trouble attaching when independent devices are enabled + G_D12.independent_devices_enabled = !CommandlineArgFromName(Lit("no-d3d12-independent-devices")).exists; + G_D12.validation_layer_enabled = CommandlineArgFromName(Lit("gpu-debug-validation")).exists; + G_D12.debug_layer_enabled = G_D12.validation_layer_enabled || CommandlineArgFromName(Lit("gpu-debug")).exists; + + if (G_D12.independent_devices_enabled && IsRunningInWine()) + { + LogInfoF("Wine detected, disabling D3D12 independent devices"); + G_D12.independent_devices_enabled = 0; + } + + LogInfoF("D3D12 independent devices enabled: %F", FmtSint(G_D12.independent_devices_enabled)); + LogInfoF("D3D12 debug layer enabled: %F", FmtSint(G_D12.debug_layer_enabled)); + LogInfoF("D3D12 validation layer enabled: %F", FmtSint(G_D12.validation_layer_enabled)); + + ////////////////////////////// + //- Initialize independent device factory with Agility SDK + + + + + + + + HMODULE lib = LoadLibrary(TEXT("vulkan-1.dll")); + DEBUGBREAKABLE; + + + + + + + + if (G_D12.independent_devices_enabled) + { + ////////////////////////////// + //- Extract agility SDK + + String appdir = GetAppDirectory(); + + u32 sdk_ver_num = 618; + String sdk_ver_str = Lit("1.618.5"); + String sdk_dir_path = StringF(scratch.arena, "%Fd3d12/%F/", FmtString(appdir), FmtString(sdk_ver_str)); + { + LogInfoF("D3D12 agility sdk path: \"%F\"", FmtString(sdk_dir_path)); + String core_path = StringF(scratch.arena, "%FD3D12Core.dll", FmtString(sdk_dir_path)); + String layers_path = StringF(scratch.arena, "%Fd3d12SDKLayers.dll", FmtString(sdk_dir_path)); + if (!PLT_IsFile(core_path) || !PLT_IsFile(layers_path)) + { + LogInfoF("Unpacking D3D12 Agility SDK to %F", FmtString(sdk_dir_path)); + ResourceKey core_key = ResourceKeyFromStore(&G_D12_Resources, Lit("AgilitySDK/1.618.5/D3D12Core.dat")); + ResourceKey layers_key = ResourceKeyFromStore(&G_D12_Resources, Lit("AgilitySDK/1.618.5/d3d12SDKLayers.dat")); + String core_data = PLT_Decompress(scratch.arena, DataFromResource(core_key), PLT_CompressionLevel_3); + String layers_data = PLT_Decompress(scratch.arena, DataFromResource(layers_key), PLT_CompressionLevel_3); + PLT_MkDir(StringF(scratch.arena, "%Fd3d12/", FmtString(appdir))); + PLT_MkDir(StringF(scratch.arena, "%Fd3d12/", FmtString(appdir))); + PLT_MkDir(StringF(scratch.arena, "%Fd3d12/%F/", FmtString(appdir), FmtString(sdk_ver_str))); + { + PLT_File file = PLT_OpenFileWrite(core_path); + PLT_WriteFile(file, core_data); + PLT_CloseFile(file); + } + { + PLT_File file = PLT_OpenFileWrite(layers_path); + PLT_WriteFile(file, layers_data); + PLT_CloseFile(file); + } + if (!PLT_IsFile(core_path) || !PLT_IsFile(layers_path)) + { + Panic(StringF( + scratch.arena, + "Failed to extract D3D12 Agility SDK to \"%F\"", + FmtString(core_path) + )); + } + } + } + + ////////////////////////////// + //- Create device factory + + if (G_D12.independent_devices_enabled) + { + D3D12GetInterface(&CLSID_D3D12SDKConfiguration, &IID_ID3D12SDKConfiguration1, (void **)&G_D12.independent.sdk_config); + + // Create device factory + char *sdk_dir_path_cstr = CstrFromString(scratch.arena, PathFromString(scratch.arena, sdk_dir_path, '\\')); + HRESULT hr = ID3D12SDKConfiguration1_CreateDeviceFactory( + G_D12.independent.sdk_config, + sdk_ver_num, + sdk_dir_path_cstr, + &IID_ID3D12DeviceFactory, + (void **)&G_D12.independent.device_factory + ); + if (FAILED(hr)) + { + Panic(StringF(scratch.arena, "Failed to create ID3D12DeviceFactory: Error code 0x%F", FmtHex(hr))); + } + } + } + + ////////////////////////////// + //- Enable D3D12 debug layer + + // Enable debug layer + if (G_D12.debug_layer_enabled) + { + ID3D12Debug1 *debug = 0; + HRESULT hr = 0; + if (G_D12.independent_devices_enabled) + { + hr = ID3D12DeviceFactory_GetConfigurationInterface(G_D12.independent.device_factory, &CLSID_D3D12Debug, &IID_ID3D12Debug1, (void **)&debug); + } + else + { + hr = D3D12GetDebugInterface(&IID_ID3D12Debug1, (void **)&debug); + } + if (FAILED(hr)) + { + Panic(Lit("Failed to retrieve D3D12 Debug interface")); + } + ID3D12Debug1_EnableDebugLayer(debug); + if (G_D12.validation_layer_enabled) + { + ID3D12Debug1_SetEnableGPUBasedValidation(debug, 1); + } + } + + ////////////////////////////// + //- Initialize device + + { + HRESULT hr = 0; + + // Create dxgi factory + { + u32 dxgi_factory_flags = 0; + if (G_D12.debug_layer_enabled) + { + dxgi_factory_flags |= DXGI_CREATE_FACTORY_DEBUG; + } + hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&G_D12.dxgi_factory); + if (FAILED(hr)) + { + Panic(Lit("Failed to initialize DXGI factory")); + } + } + + // Create device + { + IDXGIAdapter3 *adapter = 0; + ID3D12Device10 *device = 0; + String adapter_name = Zi; + String error = Lit("Failed to initialize D3D12 device"); + u32 adapter_index = 0; + b32 done = 0; + i32 skips = 0; // For iGPU testing + while (!done) + { + hr = IDXGIFactory6_EnumAdapterByGpuPreference(G_D12.dxgi_factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter3, (void **)&adapter); + if (SUCCEEDED(hr)) + { + { + DXGI_ADAPTER_DESC1 desc; + IDXGIAdapter3_GetDesc1(adapter, &desc); + adapter_name = StringFromWstrNoLimit(scratch.arena, desc.Description); + LogInfoF("D3D12 adapter name: '%F'", FmtString(adapter_name)); + } + if (skips <= 0) + { + if (G_D12.independent_devices_enabled) + { + hr = ID3D12DeviceFactory_CreateDevice(G_D12.independent.device_factory, (IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device10, (void **)&device); + } + else + { + hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device10, (void **)&device); + } + done = 1; + } + else + { + skips -= 1; + adapter_index += 1; + if (device) + { + ID3D12Device_Release(device); + } + if (adapter) + { + IDXGIAdapter3_Release(adapter); + } + adapter = 0; + device = 0; + } + } + else + { + done = 1; + } + } + + if (!device || !SUCCEEDED(hr)) + { + if (adapter_name.len > 0) + { + error = StringF( + scratch.arena, + "Could not initialize device '%F' with D3D_FEATURE_LEVEL_12_0. Ensure that the device is capable and drivers are up to date.", + FmtString(adapter_name) + ); + } + Panic(error); + } + + if (device) + { + StringList missing = Zi; + { + D3D12_FEATURE_DATA_SHADER_MODEL shader_model = { D3D_SHADER_MODEL_6_6 }; + D3D12_FEATURE_DATA_D3D12_OPTIONS options = Zi; + D3D12_FEATURE_DATA_D3D12_OPTIONS9 options9 = Zi; + D3D12_FEATURE_DATA_D3D12_OPTIONS11 options11 = Zi; + D3D12_FEATURE_DATA_D3D12_OPTIONS12 options12 = Zi; + { + ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_SHADER_MODEL, &shader_model, sizeof(shader_model)); + ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options)); + ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_D3D12_OPTIONS9, &options9, sizeof(options9)); + ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_D3D12_OPTIONS11, &options11, sizeof(options11)); + ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_D3D12_OPTIONS12, &options12, sizeof(options12)); + } + + if (shader_model.HighestShaderModel < D3D_SHADER_MODEL_6_6) + { + PushStringToList(scratch.arena, &missing, Lit(" - Shader model 6.6")); + } + if (options.ResourceBindingTier < D3D12_RESOURCE_BINDING_TIER_3) + { + PushStringToList(scratch.arena, &missing, Lit(" - Resource binding tier 3")); + } + // if (!options.DoublePrecisionFloatShaderOps) + // { + // PushStringToList(scratch.arena, &missing, Lit(" - Double precision shader ops")); + // } + // if (!options9.AtomicInt64OnTypedResourceSupported) + // { + // PushStringToList(scratch.arena, &missing, Lit(" - 64-bit atomics on typed resources")); + // } + // if (!options11.AtomicInt64OnDescriptorHeapResourceSupported) + // { + // PushStringToList(scratch.arena, &missing, Lit(" - 64-bit atomics on descriptor heap resources")); + // } + if (!options12.EnhancedBarriersSupported) + { + PushStringToList(scratch.arena, &missing, Lit(" - Enhanced barriers")); + } + } + if (missing.count > 0) + { + String msg = StringF( + scratch.arena, + "Could not intiialize D3D12\n\n" + "The driver for device '%F' does not support the following feature(s):\n\n" + "%F\n\n" + "Ensure drivers are up to date and the device is capable.", + FmtString(adapter_name), + FmtString(StringFromList(scratch.arena, missing, Lit("\n"))) + ); + Panic(msg); + } + } + + G_D12.dxgi_adapter = adapter; + G_D12.device = device; + } + + // Enable debug layer breaks + if (G_D12.debug_layer_enabled) + { + // Enable D3D12 Debug break + { + ID3D12InfoQueue1 *info = 0; + hr = ID3D12Device_QueryInterface(G_D12.device, &IID_ID3D12InfoQueue1, (void **)&info); + if (FAILED(hr)) + { + Panic(Lit("Failed to query ID3D12Device interface")); + } + ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_CORRUPTION, 1); + ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_ERROR, 1); + ID3D12InfoQueue_Release(info); + } + // Enable DXGI Debug break + { + IDXGIInfoQueue *dxgi_info = 0; + hr = DXGIGetDebugInterface1(0, &IID_IDXGIInfoQueue, (void **)&dxgi_info); + if (FAILED(hr)) + { + Panic(Lit("Failed to retrieve DXGI debug interface")); + } + IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, 1); + IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, 1); + IDXGIInfoQueue_Release(dxgi_info); + } + } + + // Retrieve device configuration + if (G_D12.independent_devices_enabled) + { + hr = ID3D12Device_QueryInterface(G_D12.device, &IID_ID3D12DeviceConfiguration, (void **)&G_D12.independent.device_config); + if (FAILED(hr)) + { + Panic(Lit("Failed to query ID3D12DeviceConfiguration interface")); + } + + D3D12_DEVICE_CONFIGURATION_DESC desc = Zi; + ID3D12DeviceConfiguration_GetDesc(G_D12.independent.device_config, &desc); + StringList flags_list = Zi; + { + if (desc.Flags & D3D12_DEVICE_FLAG_DEBUG_LAYER_ENABLED) PushStringToList(scratch.arena, &flags_list, Lit("D3D12_DEVICE_FLAG_DEBUG_LAYER_ENABLED")); + if (desc.Flags & D3D12_DEVICE_FLAG_GPU_BASED_VALIDATION_ENABLED) PushStringToList(scratch.arena, &flags_list, Lit("D3D12_DEVICE_FLAG_GPU_BASED_VALIDATION_ENABLED")); + if (desc.Flags & D3D12_DEVICE_FLAG_SYNCHRONIZED_COMMAND_QUEUE_VALIDATION_DISABLED) PushStringToList(scratch.arena, &flags_list, Lit("D3D12_DEVICE_FLAG_SYNCHRONIZED_COMMAND_QUEUE_VALIDATION_DISABLED")); + if (desc.Flags & D3D12_DEVICE_FLAG_DRED_AUTO_BREADCRUMBS_ENABLED) PushStringToList(scratch.arena, &flags_list, Lit("D3D12_DEVICE_FLAG_DRED_AUTO_BREADCRUMBS_ENABLED")); + if (desc.Flags & D3D12_DEVICE_FLAG_DRED_PAGE_FAULT_REPORTING_ENABLED) PushStringToList(scratch.arena, &flags_list, Lit("D3D12_DEVICE_FLAG_DRED_PAGE_FAULT_REPORTING_ENABLED")); + if (desc.Flags & D3D12_DEVICE_FLAG_DRED_WATSON_REPORTING_ENABLED) PushStringToList(scratch.arena, &flags_list, Lit("D3D12_DEVICE_FLAG_DRED_WATSON_REPORTING_ENABLED")); + if (desc.Flags & D3D12_DEVICE_FLAG_DRED_BREADCRUMB_CONTEXT_ENABLED) PushStringToList(scratch.arena, &flags_list, Lit("D3D12_DEVICE_FLAG_DRED_BREADCRUMB_CONTEXT_ENABLED")); + if (desc.Flags & D3D12_DEVICE_FLAG_DRED_USE_MARKERS_ONLY_BREADCRUMBS) PushStringToList(scratch.arena, &flags_list, Lit("D3D12_DEVICE_FLAG_DRED_USE_MARKERS_ONLY_BREADCRUMBS")); + if (desc.Flags & D3D12_DEVICE_FLAG_SHADER_INSTRUMENTATION_ENABLED) PushStringToList(scratch.arena, &flags_list, Lit("D3D12_DEVICE_FLAG_SHADER_INSTRUMENTATION_ENABLED")); + if (desc.Flags & D3D12_DEVICE_FLAG_AUTO_DEBUG_NAME_ENABLED) PushStringToList(scratch.arena, &flags_list, Lit("D3D12_DEVICE_FLAG_AUTO_DEBUG_NAME_ENABLED")); + if (desc.Flags & D3D12_DEVICE_FLAG_FORCE_LEGACY_STATE_VALIDATION) PushStringToList(scratch.arena, &flags_list, Lit("D3D12_DEVICE_FLAG_FORCE_LEGACY_STATE_VALIDATION")); + } + String flags_str = flags_list.count > 0 ? StringFromList(scratch.arena, flags_list, Lit(", ")) : Lit("None"); + LogInfoF("D3D12 SDKVersion: %F", FmtUint(desc.SDKVersion)); + LogInfoF("D3D12 NumEnabledExperimentalFeatures: %F", FmtUint(desc.NumEnabledExperimentalFeatures)); + LogInfoF("D3D12 device configuration flags: %F", FmtString(flags_str)); + } + } + + ////////////////////////////// + //- Initialize command queues + + { + G_D12_CommandQueueDesc descs[] = { + { .type = D3D12_COMMAND_LIST_TYPE_DIRECT, .priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH, .name = Lit("Direct Queue") }, + { .type = D3D12_COMMAND_LIST_TYPE_COMPUTE, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .name = Lit("Compute Queue") }, + { .type = D3D12_COMMAND_LIST_TYPE_COPY, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .name = Lit("Copy Queue") }, + }; + for (u32 i = 0; i < MinU32(countof(descs), countof(G_D12.queues)); ++i) + { + G_D12_CommandQueueDesc desc = descs[i]; + D3D12_COMMAND_QUEUE_DESC d3d_desc = { .Type = desc.type, .Priority = desc.priority }; + G_D12_Queue *queue = &G_D12.queues[i]; + queue->desc = desc; + HRESULT hr = ID3D12Device_CreateCommandQueue(G_D12.device, &d3d_desc, &IID_ID3D12CommandQueue, (void **)&queue->d3d_queue); + if (SUCCEEDED(hr)) + { + hr = ID3D12Device_CreateFence(G_D12.device, 0, 0, &IID_ID3D12Fence, (void **)&queue->commit_fence); + G_D12_SetObjectName((ID3D12Object *)queue->d3d_queue, desc.name); + } + if (FAILED(hr)) + { + Panic(Lit("Failed to create GPU Command Queue")); + } + } + } + + ////////////////////////////// + //- Initialize descriptor heaps + + { + Struct(Dx12HeapDesc) + { + D3D12_DESCRIPTOR_HEAP_TYPE type; + D3D12_DESCRIPTOR_HEAP_FLAGS flags; + u64 max; + u64 per_batch_count; + String name; + }; + Dx12HeapDesc descs[G_D12_DescriptorHeapKind_COUNT] = { + [G_D12_DescriptorHeapKind_CbvSrvUav] = { + .type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + .flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, + .max = G_D12_MaxCbvSrvUavDescriptors, + .per_batch_count = 2, // 1 read, 1 write per ref + .name = Lit("Primary Resource Descriptor Heap"), + }, + [G_D12_DescriptorHeapKind_Rtv] = { + .type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV, + .flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE, + .max = G_D12_MaxRtvDescriptors, + .per_batch_count = 1, + .name = Lit("Primary RTV Descriptor Heap"), + }, + [G_D12_DescriptorHeapKind_Sampler] = { + .type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, + .flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, + .max = G_D12_MaxSamplerDescriptors, + .per_batch_count = 1, + .name = Lit("Primary Sampler Descriptor Heap"), + }, + }; + for (G_D12_DescriptorHeapKind kind = 0; kind < countof(descs); ++kind) + { + Dx12HeapDesc desc = descs[kind]; + G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[kind]; + heap->descriptors_arena = AcquireArena(Gibi(1)); + + heap->kind = kind; + heap->type = desc.type; + heap->per_batch_count = desc.per_batch_count; + heap->max_count = desc.max; + heap->descriptor_size = ID3D12Device_GetDescriptorHandleIncrementSize(G_D12.device, desc.type); + + D3D12_DESCRIPTOR_HEAP_DESC d3d_desc = Zi; + d3d_desc.Type = desc.type; + d3d_desc.Flags = desc.flags; + d3d_desc.NumDescriptors = desc.max; + + HRESULT hr = 0; + + if (SUCCEEDED(hr)) + { + hr = ID3D12Device_CreateDescriptorHeap(G_D12.device, &d3d_desc, &IID_ID3D12DescriptorHeap, (void **)&heap->d3d_heap); + } + + if (SUCCEEDED(hr)) + { + ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap->d3d_heap, &heap->start_handle); + } + + if (SUCCEEDED(hr)) + { + // Push an empty descriptor at index 0, so that a handle with a value of 0 always represents nil + G_D12_Arena *gpu_perm = G_D12_ArenaFromHandle(G_PermArena()); + G_D12_Descriptor *nil_descriptor = G_D12_PushDescriptor(gpu_perm, kind); + Assert(nil_descriptor->index == 0); + G_D12_SetObjectName((ID3D12Object *)heap->d3d_heap, desc.name); + } + + if (FAILED(hr)) + { + Panic(Lit("Failed to create descriptor heap")); + } + } + } + + ////////////////////////////// + //- Initialize global root signature + + { + HRESULT hr = 0; + + // Serialize root signature + ID3D10Blob *blob = 0; + if (SUCCEEDED(hr)) + { + D3D12_ROOT_PARAMETER1 params[G_NumConstants] = Zi; + for (i32 slot = 0; slot < G_NumConstants; ++slot) + { + D3D12_ROOT_PARAMETER1 *param = ¶ms[slot]; + param->ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + param->ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + param->Constants.ShaderRegister = slot; + param->Constants.RegisterSpace = 0; + param->Constants.Num32BitValues = 1; + } + + D3D12_VERSIONED_ROOT_SIGNATURE_DESC desc = Zi; + desc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1; + desc.Desc_1_1.NumParameters = countof(params); + desc.Desc_1_1.pParameters = params; + desc.Desc_1_1.NumStaticSamplers = 0; + desc.Desc_1_1.pStaticSamplers = 0; + desc.Desc_1_1.Flags = D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED; + if (G_D12.independent_devices_enabled) + { + hr = ID3D12DeviceConfiguration_SerializeVersionedRootSignature(G_D12.independent.device_config, &desc, &blob, 0); + } + else + { + hr = D3D12SerializeVersionedRootSignature(&desc, &blob, 0); + } + } + + // Create root signature + ID3D12RootSignature *rootsig = 0; + if (SUCCEEDED(hr)) + { + hr = ID3D12Device_CreateRootSignature(G_D12.device, 0, ID3D10Blob_GetBufferPointer(blob), ID3D10Blob_GetBufferSize(blob), &IID_ID3D12RootSignature, (void **)&rootsig); + } + G_D12.bindless_rootsig = rootsig; + + if (blob) + { + ID3D10Blob_Release(blob); + } + if (FAILED(hr)) + { + Panic(Lit("Failed to create root signature")); + } + } + + ////////////////////////////// + //- Create global resources + + { + // Create debug print buffers + if (GPU_SHADER_PRINT) + { + for (G_QueueKind queue_kind = 0; queue_kind < G_QueueKind_COUNT; ++queue_kind) + { + G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); + if (queue_kind != G_QueueKind_AsyncCopy) + { + G_CommandListHandle cl = G_PrepareCommandList(queue_kind); + { + G_ArenaHandle gpu_perm = G_PermArena(); + queue->print_buffer_size = GPU_SHADER_PRINT_BUFFER_SIZE; + queue->print_buffer = G_PushBuffer( + gpu_perm, cl, + u8, + queue->print_buffer_size, + .flags = G_ResourceFlag_AllowShaderReadWrite, + .name = Lit("Debug print gpu buffer"), + ); + queue->print_readback_buffer = G_PushBuffer( + gpu_perm, cl, + u8, + queue->print_buffer_size, + .flags = G_ResourceFlag_HostMemory, + .name = Lit("Debug print readback buffer") + ); + queue->print_buffer_ref = G_PushByteAddressBufferRef(gpu_perm, queue->print_buffer); + } + G_CommitCommandList(cl); + } + } + } + } + + ////////////////////////////// + //- Start workers + + // for (G_QueueKind kind = 0; kind < G_QueueKind_COUNT; ++kind) + // { + // String name = Zi; + // if (kind == G_QueueKind_Direct) name = Lit("Gpu direct queue worker"); + // if (kind == G_QueueKind_AsyncCompute) name = Lit("Gpu compute queue worker"); + // if (kind == G_QueueKind_AsyncCopy) name = Lit("Gpu copy queue worker"); + // DispatchWave(name, 1, G_D12_WorkerEntry, (void *)(u64)kind); + // } + + OnAsyncTick(G_D12_TickAsync); + + DispatchWave(Lit("Gpu collection worker"), 1, G_D12_CollectionWorkerEntryPoint, 0); + + EndScratch(scratch); +} + +//////////////////////////////////////////////////////////// +//~ Helpers + +G_D12_Arena *G_D12_ArenaFromHandle(G_ArenaHandle handle) +{ + return (G_D12_Arena *)handle.v; +} + +G_D12_CmdList *G_D12_CmdListFromHandle(G_CommandListHandle handle) +{ + return (G_D12_CmdList *)handle.v; +} + +G_D12_Resource *G_D12_ResourceFromHandle(G_ResourceHandle handle) +{ + return (G_D12_Resource *)handle.v; +} + +G_D12_Swapchain *G_D12_SwapchainFromHandle(G_SwapchainHandle handle) +{ + return (G_D12_Swapchain *)handle.v; +} + +DXGI_FORMAT G_D12_DxgiFormatFromGpuFormat(G_Format format) +{ + return (DXGI_FORMAT)format; +} + +D3D12_BARRIER_SYNC G_D12_BarrierSyncFromStages(G_Stage stages) +{ + D3D12_BARRIER_SYNC result = 0; + if (stages == G_Stage_All) + { + result = D3D12_BARRIER_SYNC_ALL; + } + else + { + result |= D3D12_BARRIER_SYNC_COMPUTE_SHADING * AnyBit(stages, G_Stage_ComputeShading); + result |= D3D12_BARRIER_SYNC_INDEX_INPUT * AnyBit(stages, G_Stage_IndexAssembly); + result |= D3D12_BARRIER_SYNC_VERTEX_SHADING * AnyBit(stages, G_Stage_VertexShading); + result |= D3D12_BARRIER_SYNC_PIXEL_SHADING * AnyBit(stages, G_Stage_PixelShading); + result |= D3D12_BARRIER_SYNC_DEPTH_STENCIL * AnyBit(stages, G_Stage_DepthStencil); + result |= D3D12_BARRIER_SYNC_RENDER_TARGET * AnyBit(stages, G_Stage_RenderTarget); + result |= D3D12_BARRIER_SYNC_COPY * AnyBit(stages, G_Stage_Copy); + result |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT * AnyBit(stages, G_Stage_Indirect); + } + return result; +} + +D3D12_BARRIER_ACCESS G_D12_BarrierAccessFromAccesses(G_Access accesses) +{ + D3D12_BARRIER_ACCESS result = 0; + if (accesses == 0) + { + result = D3D12_BARRIER_ACCESS_NO_ACCESS; + } + else if (accesses == G_Access_All) + { + result = D3D12_BARRIER_ACCESS_COMMON; + } + else + { + result |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS * AnyBit(accesses, G_Access_ShaderReadWrite); + result |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE * AnyBit(accesses, G_Access_ShaderRead); + result |= D3D12_BARRIER_ACCESS_COPY_DEST * AnyBit(accesses, G_Access_CopyWrite); + result |= D3D12_BARRIER_ACCESS_COPY_SOURCE * AnyBit(accesses, G_Access_CopyRead); + result |= D3D12_BARRIER_ACCESS_INDEX_BUFFER * AnyBit(accesses, G_Access_IndexBuffer); + result |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT * AnyBit(accesses, G_Access_IndirectArgument); + result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ * AnyBit(accesses, G_Access_DepthStencilRead); + result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE * AnyBit(accesses, G_Access_DepthStencilWrite); + result |= D3D12_BARRIER_ACCESS_RENDER_TARGET * AnyBit(accesses, G_Access_RenderTargetWrite); + } + return result; +} + +D3D12_BARRIER_LAYOUT G_D12_BarrierLayoutFromLayout(G_Layout layout) +{ + PERSIST Readonly D3D12_BARRIER_LAYOUT translate[] = { + [G_Layout_Undefined] = D3D12_BARRIER_LAYOUT_UNDEFINED, + [G_Layout_Simultaneous] = D3D12_BARRIER_LAYOUT_COMMON, + [G_Layout_Common] = D3D12_BARRIER_LAYOUT_COMMON, + [G_Layout_DirectQueue_General] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON, + [G_Layout_DirectQueue_Read] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ, + [G_Layout_DirectQueue_DepthStencil] = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, + [G_Layout_DirectQueue_RenderTarget] = D3D12_BARRIER_LAYOUT_RENDER_TARGET, + [G_Layout_ComputeQueue_General] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON, + [G_Layout_DirectComputeQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS, + [G_Layout_DirectComputeQueue_Read] = D3D12_BARRIER_LAYOUT_GENERIC_READ, + [G_Layout_DirectComputeQueue_CopyWrite] = D3D12_BARRIER_LAYOUT_COPY_DEST, + }; + D3D12_BARRIER_LAYOUT result = D3D12_BARRIER_LAYOUT_UNDEFINED; + if (layout >= 0 && layout < countof(translate)) + { + result = translate[layout]; + } + return result; +}; + +String G_D12_NameFromBarrierLayout(D3D12_BARRIER_LAYOUT layout) +{ + PERSIST Readonly String names[] = { + [D3D12_BARRIER_LAYOUT_COMMON] = CompLit("D3D12_BARRIER_LAYOUT_COMMON"), + [D3D12_BARRIER_LAYOUT_PRESENT] = CompLit("D3D12_BARRIER_LAYOUT_PRESENT"), + [D3D12_BARRIER_LAYOUT_GENERIC_READ] = CompLit("D3D12_BARRIER_LAYOUT_GENERIC_READ"), + [D3D12_BARRIER_LAYOUT_RENDER_TARGET] = CompLit("D3D12_BARRIER_LAYOUT_RENDER_TARGET"), + [D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS] = CompLit("D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS"), + [D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE] = CompLit("D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE"), + [D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ] = CompLit("D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ"), + [D3D12_BARRIER_LAYOUT_SHADER_RESOURCE] = CompLit("D3D12_BARRIER_LAYOUT_SHADER_RESOURCE"), + [D3D12_BARRIER_LAYOUT_COPY_SOURCE] = CompLit("D3D12_BARRIER_LAYOUT_COPY_SOURCE"), + [D3D12_BARRIER_LAYOUT_COPY_DEST] = CompLit("D3D12_BARRIER_LAYOUT_COPY_DEST"), + [D3D12_BARRIER_LAYOUT_RESOLVE_SOURCE] = CompLit("D3D12_BARRIER_LAYOUT_RESOLVE_SOURCE"), + [D3D12_BARRIER_LAYOUT_RESOLVE_DEST] = CompLit("D3D12_BARRIER_LAYOUT_RESOLVE_DEST"), + [D3D12_BARRIER_LAYOUT_SHADING_RATE_SOURCE] = CompLit("D3D12_BARRIER_LAYOUT_SHADING_RATE_SOURCE"), + [D3D12_BARRIER_LAYOUT_VIDEO_DECODE_READ] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_DECODE_READ"), + [D3D12_BARRIER_LAYOUT_VIDEO_DECODE_WRITE] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_DECODE_WRITE"), + [D3D12_BARRIER_LAYOUT_VIDEO_PROCESS_READ] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_PROCESS_READ"), + [D3D12_BARRIER_LAYOUT_VIDEO_PROCESS_WRITE] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_PROCESS_WRITE"), + [D3D12_BARRIER_LAYOUT_VIDEO_ENCODE_READ] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_ENCODE_READ"), + [D3D12_BARRIER_LAYOUT_VIDEO_ENCODE_WRITE] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_ENCODE_WRITE"), + [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON"), + [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ"), + [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS"), + [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE"), + [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE"), + [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_DEST] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_DEST"), + [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON"), + [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ"), + [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS"), + [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE"), + [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE"), + [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_DEST] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_DEST"), + [D3D12_BARRIER_LAYOUT_VIDEO_QUEUE_COMMON] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_QUEUE_COMMON") + }; + String result = Zi; + if (layout >= 0 && layout < countof(names)) + { + result = names[layout]; + } + else if (layout == D3D12_BARRIER_LAYOUT_UNDEFINED) + { + result = Lit("D3D12_BARRIER_LAYOUT_UNDEFINED"); + } + return result; +} + +void G_D12_InitRtv(G_D12_Resource *resource, D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle, i32 mip) +{ + DXGI_FORMAT format = G_D12_DxgiFormatFromGpuFormat(resource->texture_format); + D3D12_RESOURCE_DESC res_d3d_desc = Zi; + { + ID3D12Resource_GetDesc(resource->d3d_resource, &res_d3d_desc); + } + D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = Zi; + { + rtv_desc.Format = res_d3d_desc.Format; + if (res_d3d_desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE1D) + { + rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1D; + rtv_desc.Texture1D.MipSlice = mip; + } + else if (res_d3d_desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE2D) + { + rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rtv_desc.Texture2D.MipSlice = mip; + } + else if (res_d3d_desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) + { + rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D; + rtv_desc.Texture3D.MipSlice = mip; + } + } + ID3D12Device_CreateRenderTargetView(G_D12.device, resource->d3d_resource, &rtv_desc, rtv_handle); +} + +void G_D12_SetObjectName(ID3D12Object *object, String name) +{ + TempArena scratch = BeginScratchNoConflict(); + { + wchar_t *name_wstr = WstrFromString(scratch.arena, name); + ID3D12Object_SetName(object, name_wstr); + } + EndScratch(scratch); +} + +String G_D12_NameFromObject(Arena *arena, ID3D12Object *object) +{ + String result = Zi; + { + wchar_t name_text[G_D12_MaxNameLen] = Zi; + u32 name_text_sz = sizeof(name_text); + ID3D12Object_GetPrivateData(object, &WKPDID_D3DDebugObjectNameW, &name_text_sz, name_text); + if (name_text_sz > 2) + { + String16 str16 = Zi; + str16.len = (name_text_sz / 2) - 1; + str16.text = name_text; + result = StringFromString16(arena, str16); + } + } + return result; +} + +//////////////////////////////////////////////////////////// +//~ Pipeline + +G_D12_Pipeline *G_D12_PipelineFromDesc(G_D12_PipelineDesc desc) +{ + u64 hash = G_D12_HashFromPipelineDesc(desc); + + // Fetch pipeline from cache + G_D12_Pipeline *pipeline = 0; + b32 is_pipeline_new = 0; + G_D12_PipelineBin *bin = &G_D12.pipeline_bins[hash % countof(G_D12.pipeline_bins)]; + { + { + Lock lock = LockS(&bin->mutex); + for (pipeline = bin->first; pipeline; pipeline = pipeline->next_in_bin) + { + if (pipeline->hash == hash) break; + } + Unlock(&lock); + } + if (!pipeline) + { + Lock lock = LockE(&bin->mutex); + for (pipeline = bin->first; pipeline; pipeline = pipeline->next_in_bin) + { + if (pipeline->hash == hash) break; + } + if (!pipeline) + { + Arena *perm = PermArena(); + PushAlign(perm, IsolationSize); + { + pipeline = PushStruct(perm, G_D12_Pipeline); + pipeline->desc = desc; + pipeline->hash = hash; + is_pipeline_new = 1; + } + PushAlign(perm, IsolationSize); + SllStackPushN(bin->first, pipeline, next_in_bin); + } + Unlock(&lock); + } + } + + // Create pipeline + if (is_pipeline_new) + { + TempArena scratch = BeginScratchNoConflict(); + HRESULT hr = 0; + b32 ok = 1; + String error_str = Zi; + b32 is_compute = IsResourceNil(desc.vs.resource) || IsResourceNil(desc.ps.resource); + + String pipeline_name = Zi; + if (is_compute) + { + pipeline_name = NameFromResource(desc.cs.resource); + if (pipeline_name.len == 0) + { + pipeline_name = StringF(scratch.arena, "%F", FmtHandle(desc.cs.resource.v)); + } + } + else + { + String vs_name = NameFromResource(desc.vs.resource); + String ps_name = NameFromResource(desc.ps.resource); + if (vs_name.len == 0) + { + vs_name = StringF(scratch.arena, "%F", FmtHandle(desc.vs.resource.v)); + } + if (ps_name.len == 0) + { + ps_name = StringF(scratch.arena, "%F", FmtHandle(desc.ps.resource.v)); + } + pipeline_name = StringF( + scratch.arena, + "%F - %F", + FmtString(vs_name), + FmtString(ps_name) + ); + } + + LogInfoF("Creating pipeline %F", FmtString(pipeline_name)); + + // Create PSO + ID3D12PipelineState *pso = 0; + if (ok) + { + if (!is_compute) + { + i32 rts_count = 0; + b32 has_multiple_blend_modes = 0; + { + G_BlendMode last_blend_mode = 0; + for (i32 rt_idx = 0; rt_idx < countof(desc.render_target_formats); ++rt_idx) + { + G_BlendMode blend_mode = desc.render_target_blend_modes[rt_idx]; + DXGI_FORMAT format = G_D12_DxgiFormatFromGpuFormat(desc.render_target_formats[rt_idx]); + if (format == DXGI_FORMAT_UNKNOWN) + { + break; + } + else + { + if (rt_idx > 0 && blend_mode != last_blend_mode) + { + has_multiple_blend_modes = 1; + } + last_blend_mode = blend_mode; + rts_count += 1; + } + } + } + + D3D12_RASTERIZER_DESC raster_desc = Zi; + { + if (desc.is_wireframe) + { + raster_desc.FillMode = D3D12_FILL_MODE_WIREFRAME; + } + else + { + raster_desc.FillMode = D3D12_FILL_MODE_SOLID; + } + raster_desc.CullMode = D3D12_CULL_MODE_NONE; + raster_desc.FrontCounterClockwise = 0; + raster_desc.DepthBias = D3D12_DEFAULT_DEPTH_BIAS; + raster_desc.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; + raster_desc.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; + raster_desc.DepthClipEnable = 0; + raster_desc.MultisampleEnable = 0; + raster_desc.AntialiasedLineEnable = 0; + raster_desc.ForcedSampleCount = 0; + raster_desc.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; + } + + D3D12_BLEND_DESC blend_desc = Zi; + { + blend_desc.IndependentBlendEnable = has_multiple_blend_modes; + blend_desc.AlphaToCoverageEnable = 0; + for (i32 rt_idx = 0; rt_idx < rts_count; ++rt_idx) + { + G_BlendMode blend_mode = desc.render_target_blend_modes[rt_idx]; + D3D12_RENDER_TARGET_BLEND_DESC *rt = &blend_desc.RenderTarget[rt_idx]; + switch (blend_mode) + { + default: + { + rt->BlendEnable = 0; + rt->RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; + } break; + + case G_BlendMode_CompositeStraightAlpha: + { + rt->BlendEnable = 1; + + rt->SrcBlend = D3D12_BLEND_SRC_ALPHA; + rt->BlendOp = D3D12_BLEND_OP_ADD; + rt->DestBlend = D3D12_BLEND_INV_SRC_ALPHA; + + rt->SrcBlendAlpha = D3D12_BLEND_ONE; + rt->BlendOpAlpha = D3D12_BLEND_OP_ADD; + rt->DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA; + + rt->RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; + } break; + + case G_BlendMode_CompositePremultipliedAlpha: + { + rt->BlendEnable = 1; + + rt->SrcBlend = D3D12_BLEND_ONE; + rt->BlendOp = D3D12_BLEND_OP_ADD; + rt->DestBlend = D3D12_BLEND_INV_SRC_ALPHA; + + rt->SrcBlendAlpha = D3D12_BLEND_ONE; + rt->BlendOpAlpha = D3D12_BLEND_OP_ADD; + rt->DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA; + + rt->RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; + } break; + } + } + } + + D3D12_DEPTH_STENCIL_DESC ds_desc = Zi; + { + ds_desc.DepthEnable = 0; + ds_desc.StencilEnable = 0; + } + + String vs = DataFromResource(desc.vs.resource); + String ps = DataFromResource(desc.ps.resource); + D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = Zi; + { + pso_desc.pRootSignature = G_D12.bindless_rootsig; + pso_desc.VS.pShaderBytecode = vs.text; + pso_desc.VS.BytecodeLength = vs.len; + pso_desc.PS.pShaderBytecode = ps.text; + pso_desc.PS.BytecodeLength = ps.len; + pso_desc.RasterizerState = raster_desc; + pso_desc.BlendState = blend_desc; + pso_desc.DepthStencilState = ds_desc; + pso_desc.PrimitiveTopologyType = desc.topology_type; + pso_desc.SampleMask = UINT_MAX; + pso_desc.SampleDesc.Count = 1; + pso_desc.SampleDesc.Quality = 0; + pso_desc.NumRenderTargets = rts_count; + for (i32 rt_idx = 0; rt_idx < rts_count; ++rt_idx) + { + DXGI_FORMAT format = G_D12_DxgiFormatFromGpuFormat(desc.render_target_formats[rt_idx]); + pso_desc.RTVFormats[rt_idx] = format; + } + } + hr = ID3D12Device_CreateGraphicsPipelineState(G_D12.device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); + if (FAILED(hr)) + { + error_str = StringF(scratch.arena, "Failed to create graphics pipeline \"%F\"", FmtString(pipeline_name)); + ok = 0; + } + } + else + { + String cs = DataFromResource(desc.cs.resource); + D3D12_COMPUTE_PIPELINE_STATE_DESC pso_desc = Zi; + { + pso_desc.pRootSignature = G_D12.bindless_rootsig; + pso_desc.CS.pShaderBytecode = cs.text; + pso_desc.CS.BytecodeLength = cs.len; + } + hr = ID3D12Device_CreateComputePipelineState(G_D12.device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); + if (FAILED(hr)) + { + error_str = StringF(scratch.arena, "Failed to create compute pipeline \"%F\"", FmtString(pipeline_name)); + ok = 0; + } + } + } + + if (ok) + { + if (GPU_NAMES) + { + G_D12_SetObjectName((ID3D12Object *)pso, pipeline_name); + } + } + else + { + // TOOD: Don't panic + Panic(error_str); + } + + LogInfoF("Created pipeline %F", FmtString(pipeline_name)); + + pipeline->pso = pso; + pipeline->error = error_str; + pipeline->ok = ok; + EndScratch(scratch); + } + + return pipeline; +} + +u64 G_D12_HashFromPipelineDesc(G_D12_PipelineDesc desc) +{ + return HashString(StringFromStruct(&desc)); +} + +//////////////////////////////////////////////////////////// +//~ Queue + +G_D12_Queue *G_D12_QueueFromKind(G_QueueKind kind) +{ + return &G_D12.queues[kind]; +} + +//////////////////////////////////////////////////////////// +//~ Raw command list + +G_D12_RawCommandList *G_D12_PrepareRawCommandList(G_QueueKind queue_kind) +{ + G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); + + // Try to pull first completed command list from queue + G_D12_RawCommandList *cl = Zi; + { + Lock lock = LockE(&queue->commit_mutex); + { + u64 completed = ID3D12Fence_GetCompletedValue(queue->commit_fence); + cl = queue->first_committed_cl; + if (cl && cl->commit_fence_target <= completed) + { + SllQueuePop(queue->first_committed_cl, queue->last_committed_cl); + } + else + { + cl = 0; + } + } + Unlock(&lock); + } + + // Allocate new command list if none are available + if (!cl) + { + Arena *perm = PermArena(); + { + PushAlign(perm, IsolationSize); + cl = PushStruct(perm, G_D12_RawCommandList); + PushAlign(perm, IsolationSize); + } + cl->queue = queue; + + HRESULT hr = 0; + { + if (SUCCEEDED(hr)) + { + hr = ID3D12Device_CreateCommandAllocator(G_D12.device, queue->desc.type, &IID_ID3D12CommandAllocator, (void **)&cl->d3d_ca); + } + + if (SUCCEEDED(hr)) + { + hr = ID3D12Device_CreateCommandList(G_D12.device, 0, queue->desc.type, cl->d3d_ca, 0, &IID_ID3D12GraphicsCommandList7, (void **)&cl->d3d_cl); + } + + if (SUCCEEDED(hr)) + { + hr = ID3D12GraphicsCommandList_Close(cl->d3d_cl); + } + + // Initialize Direct queue CPU-only descriptors + if (SUCCEEDED(hr) && queue_kind == G_QueueKind_Direct) + { + G_D12_Arena *gpu_perm = G_D12_ArenaFromHandle(G_PermArena()); + for (u32 rtv_idx = 0; rtv_idx < countof(cl->rtv_descriptors); ++rtv_idx) + { + cl->rtv_descriptors[rtv_idx] = G_D12_PushDescriptor(gpu_perm, G_D12_DescriptorHeapKind_Rtv); + } + cl->rtv_clear_descriptor = G_D12_PushDescriptor(gpu_perm, G_D12_DescriptorHeapKind_Rtv); + } + } + + if (FAILED(hr)) + { + Panic(Lit("Failed to create command list")); + } + } + + // Reset command list + { + HRESULT hr = 0; + { + if (SUCCEEDED(hr)) + { + hr = ID3D12CommandAllocator_Reset(cl->d3d_ca); + } + + if (SUCCEEDED(hr)) + { + hr = ID3D12GraphicsCommandList_Reset(cl->d3d_cl, cl->d3d_ca, 0); + } + } + + if (FAILED(hr)) + { + Panic(Lit("Failed to reset command list")); + } + } + + return cl; +} + +i64 G_D12_CommitRawCommandList(G_D12_RawCommandList *cl) +{ + G_D12_Queue *queue = cl->queue; + + // Close + { + HRESULT hr = ID3D12GraphicsCommandList_Close(cl->d3d_cl); + if (FAILED(hr)) + { + // TODO: Don't panic + Panic(Lit("Failed to close command list before execution")); + } + } + + // Commit + i64 completion_target = 0; + { + // Execute + ID3D12CommandQueue_ExecuteCommandLists(queue->d3d_queue, 1, (ID3D12CommandList **)&cl->d3d_cl); + Lock lock = LockE(&queue->commit_mutex); + { + completion_target = ++queue->commit_fence_target; + cl->commit_fence_target = completion_target; + ID3D12CommandQueue_Signal(queue->d3d_queue, queue->commit_fence, completion_target); + + // Append + SllQueuePush(queue->first_committed_cl, queue->last_committed_cl, cl); + } + Unlock(&lock); + } + + return completion_target; +} + +//////////////////////////////////////////////////////////// +//~ @hookimpl Arena + +G_ArenaHandle G_AcquireArena(void) +{ + G_D12_Arena *gpu_arena = 0; + { + Arena *perm = PermArena(); + PushAlign(perm, IsolationSize); + gpu_arena = PushStruct(perm, G_D12_Arena); + PushAlign(perm, IsolationSize); + } + gpu_arena->arena = AcquireArena(Gibi(1)); + + Atomic64FetchAdd(&G_D12.arenas_count, 1); + + return G_D12_MakeHandle(G_ArenaHandle, gpu_arena); +} + +void G_ReleaseArena(G_CommandListHandle cl_handle, G_ArenaHandle arena) +{ + // TODO + + // TODO: Release resources + + // TODO: Update gstats + + // TODO: Move this to actual release + // Atomic64FetchAdd(&G_D12.arenas_count, -1); +} + +void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle) +{ + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(arena_handle); + G_D12_ResetArena(cl, gpu_arena); +} + +//////////////////////////////////////////////////////////// +//~ Arena + +void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena) +{ + // Move resources to reset list + if (gpu_arena->resources.first) + { + if (gpu_arena->reset_resources.last) + { + gpu_arena->reset_resources.last->next = gpu_arena->resources.first; + } + else + { + gpu_arena->reset_resources.first = gpu_arena->resources.first; + } + gpu_arena->reset_resources.last = gpu_arena->resources.last; + gpu_arena->reset_resources.count += gpu_arena->resources.count; + ZeroStruct(&gpu_arena->resources); + } + + // Push descriptors to cl reset list + if (gpu_arena->descriptors.first) + { + if (cl->reset_descriptors.last) + { + cl->reset_descriptors.last->next = gpu_arena->descriptors.first; + } + else + { + cl->reset_descriptors.first = gpu_arena->descriptors.first; + } + cl->reset_descriptors.last = gpu_arena->descriptors.last; + cl->reset_descriptors.count += gpu_arena->descriptors.count; + gpu_arena->descriptors.count = 0; + gpu_arena->descriptors.first = 0; + gpu_arena->descriptors.last = 0; + } +} + +//////////////////////////////////////////////////////////// +//~ @hookimpl Resource + +G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle cl_handle, G_ResourceDesc desc) +{ + Arena *perm = PermArena(); + G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(arena_handle); + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + G_D12_Resource *resource = 0; + + b32 is_buffer = desc.kind == G_ResourceKind_Buffer; + b32 is_texture = ( + desc.kind == G_ResourceKind_Texture1D || + desc.kind == G_ResourceKind_Texture2D || + desc.kind == G_ResourceKind_Texture3D + ); + b32 is_sampler = desc.kind == G_ResourceKind_Sampler; + G_ResourceFlag flags = ( + is_buffer ? desc.buffer.flags : + is_texture ? desc.texture.flags : + desc.sampler.flags + ); + String new_name = ( + is_buffer ? desc.buffer.name : + is_texture ? desc.texture.name : + desc.sampler.name + ); + new_name.len = MinU64(new_name.len, countof(resource->name_text)); + + ////////////////////////////// + //- Initialize heap info + + b32 can_reuse = !AnyBit(flags, G_ResourceFlag_ForceNoReuse); + + D3D12_HEAP_FLAGS heap_flags = 0; + D3D12_HEAP_PROPERTIES heap_props = Zi; + b32 should_map = 0; + if (is_buffer || is_texture) + { + G_D12_ResourceHeapKind heap_kind = G_D12_ResourceHeapKind_Gpu; + // Heap flags + if (flags & G_ResourceFlag_HostMemory) + { + heap_kind = G_D12_ResourceHeapKind_Cpu; + if (flags & G_ResourceFlag_Uncached) + { + heap_kind = G_D12_ResourceHeapKind_CpuWriteCombined; + } + } + if (flags & G_ResourceFlag_ZeroMemory) + { + can_reuse = 0; + } + else + { + heap_flags |= D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + } + // Heap props + if (heap_kind == G_D12_ResourceHeapKind_Cpu) + { + heap_props.Type = D3D12_HEAP_TYPE_CUSTOM; + heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK; + heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_L0; + should_map = 1; + } + else if (heap_kind == G_D12_ResourceHeapKind_CpuWriteCombined) + { + heap_props.Type = D3D12_HEAP_TYPE_CUSTOM; + heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE; + heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_L0; + should_map = 1; + } + else + { + heap_props.Type = D3D12_HEAP_TYPE_DEFAULT; + } + } + + ////////////////////////////// + //- Initialize d3d resource desc + + D3D12_BARRIER_LAYOUT d3d_begin_layout = D3D12_BARRIER_LAYOUT_UNDEFINED; + D3D12_CLEAR_VALUE clear_value = Zi; + D3D12_RESOURCE_DESC1 d3d_desc = Zi; + if (is_buffer) + { + u64 min_buffer_size = 1024; + d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + d3d_desc.Format = DXGI_FORMAT_UNKNOWN; + d3d_desc.Width = NextPow2U64(MaxU64(desc.buffer.size, min_buffer_size)); + d3d_desc.Height = 1; + d3d_desc.DepthOrArraySize = 1; + d3d_desc.MipLevels = 1; + d3d_desc.SampleDesc.Count = 1; + d3d_desc.SampleDesc.Quality = 0; + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(flags, G_ResourceFlag_AllowShaderReadWrite); + } + else if (is_texture) + { + i32 largest_dim = MaxI32(MaxI32(desc.texture.dims.x, desc.texture.dims.y), desc.texture.dims.z); + i32 max_mips = MinI32(FloorF32(Log2F32(largest_dim)) + 1, G_MaxMips); + d3d_desc.Dimension = ( + desc.kind == G_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D : + desc.kind == G_ResourceKind_Texture2D ? D3D12_RESOURCE_DIMENSION_TEXTURE2D : + D3D12_RESOURCE_DIMENSION_TEXTURE3D + ); + d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + d3d_desc.Format = G_D12_DxgiFormatFromGpuFormat(desc.texture.format); + d3d_desc.Width = MaxI32(desc.texture.dims.x, 1); + d3d_desc.Height = MaxI32(desc.texture.dims.y, 1); + d3d_desc.DepthOrArraySize = MaxI32(desc.texture.dims.z, 1); + d3d_desc.MipLevels = ClampF32(desc.texture.max_mips, 1, max_mips); + d3d_desc.SampleDesc.Count = 1; + d3d_desc.SampleDesc.Quality = 0; + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(flags, G_ResourceFlag_AllowShaderReadWrite); + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * AnyBit(flags, G_ResourceFlag_AllowRenderTarget); + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL * AnyBit(flags, G_ResourceFlag_AllowDepthStencil); + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS * (desc.texture.initial_layout == G_Layout_Simultaneous); + clear_value.Color[0] = desc.texture.clear_color.x, + clear_value.Color[1] = desc.texture.clear_color.y, + clear_value.Color[2] = desc.texture.clear_color.z, + clear_value.Color[3] = desc.texture.clear_color.w, + clear_value.Format = d3d_desc.Format; + + d3d_begin_layout = G_D12_BarrierLayoutFromLayout(desc.texture.initial_layout); + if (!AnyBit(flags, G_ResourceFlag_ZeroMemory) && !AnyBit(d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS)) + { + if (AnyBit(d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) + { + d3d_begin_layout = D3D12_BARRIER_LAYOUT_RENDER_TARGET; + } + else if (AnyBit(d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) + { + d3d_begin_layout = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE; + } + } + } + + ////////////////////////////// + //- Check for reset-resource reusability + + // Pop reset resource + resource = gpu_arena->reset_resources.first; + if (resource) + { + DllQueueRemove(gpu_arena->reset_resources.first, gpu_arena->reset_resources.last, resource); + --gpu_arena->reset_resources.count; + + D3D12_RESOURCE_DESC1 reset_d3d_desc = Zi; + D3D12_RESOURCE_DESC1 compare_d3d_desc = Zi; + CopyStruct(&reset_d3d_desc, &resource->d3d_desc); + CopyStruct(&compare_d3d_desc, &reset_d3d_desc); + + // Buffers can be reused if size fits + if (d3d_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && reset_d3d_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + { + if (reset_d3d_desc.Width >= d3d_desc.Width) + { + compare_d3d_desc.Width = d3d_desc.Width; + } + } + + // TODO: Less stringent reuse constraints. We could even create textures as placed resources and reset their underlying heaps. + can_reuse = can_reuse && MatchStruct(&compare_d3d_desc, &d3d_desc); + if (!can_reuse) + { + // Push releasable to command list + { + G_D12_Releasable *release = 0; + { + Lock lock = LockE(&G_D12.free_releases_mutex); + { + release = G_D12.free_releases.first; + if (release) + { + SllQueuePop(G_D12.free_releases.first, G_D12.free_releases.last); + } + else + { + release = PushStructNoZero(perm, G_D12_Releasable); + } + } + Unlock(&lock); + } + ZeroStruct(release); + SllQueuePush(cl->releases.first, cl->releases.last, release); + release->d3d_resource = resource->d3d_resource; + if (GPU_NAMES) + { + StaticAssert(countof(release->name_text) == countof(resource->name_text)); + release->name_len = resource->name_len; + CopyBytes(release->name_text, resource->name_text, resource->name_len); + } + } + ZeroStruct(resource); + } + } + else + { + can_reuse = 0; + resource = PushStruct(gpu_arena->arena, G_D12_Resource); + } + + if (!can_reuse) + { + resource->d3d_desc = d3d_desc; + } + + ////////////////////////////// + //- Init resource + + resource->flags = flags; + resource->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, d3d_desc.MipLevels) + 1; + + if (is_buffer) + { + resource->buffer_size = desc.buffer.size; + resource->buffer_size_actual = d3d_desc.Width; + } + + if (is_texture) + { + resource->is_texture = is_texture; + resource->texture_format = desc.texture.format; + resource->texture_dims = desc.texture.dims; + resource->texture_mips = d3d_desc.MipLevels; + } + + if (is_sampler) + { + resource->sampler_desc = desc.sampler; + } + + DllQueuePush(gpu_arena->resources.first, gpu_arena->resources.last, resource); + ++gpu_arena->resources.count; + + ////////////////////////////// + //- Allocate D3D12 resource + + if ((is_buffer || is_texture) && !resource->d3d_resource) + { + D3D12_CLEAR_VALUE *clear_value_arg = 0; + if (d3d_desc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) + { + clear_value_arg = &clear_value; + } + HRESULT hr = ID3D12Device10_CreateCommittedResource3( + G_D12.device, + &heap_props, + heap_flags, + &resource->d3d_desc, + d3d_begin_layout, + clear_value_arg, + 0, // pProtectedSession + 0, // NumCastableFormats + 0, // pCastableFormats + &IID_ID3D12Resource, + (void **)&resource->d3d_resource + ); + Atomic64FetchAdd(&G_D12.cumulative_nonreuse_count, 1); + for (i32 mip_idx = 0; mip_idx < resource->texture_mips; ++mip_idx) + { + resource->cmdlist_texture_layouts[mip_idx] = d3d_begin_layout; + } + + // Queue initial Rtv/Dsv discard + if (!AnyBit(flags, G_ResourceFlag_ZeroMemory)) + { + if (AnyBit(d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) + { + G_D12_Cmd *cmd = G_D12_PushCmd(cl); + cmd->kind = G_D12_CmdKind_Discard; + cmd->discard.resource = resource; + } + + if (d3d_begin_layout == D3D12_BARRIER_LAYOUT_RENDER_TARGET) + { + G_MemoryLayoutSync( + cl_handle, G_D12_MakeHandle(G_ResourceHandle, resource), + G_Stage_RenderTarget, G_Access_RenderTargetWrite, + G_Stage_All, G_Access_All, + desc.texture.initial_layout + ); + } + else if (d3d_begin_layout == D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE) + { + G_MemoryLayoutSync( + cl_handle, G_D12_MakeHandle(G_ResourceHandle, resource), + G_Stage_DepthStencil, G_Access_DepthStencilWrite, + G_Stage_All, G_Access_All, + desc.texture.initial_layout + ); + } + } + + if (!SUCCEEDED(hr)) + { + // TODO: Don't panic + Panic(Lit("Failed to allocate D3D12 resource")); + } + + if (is_buffer) + { + resource->buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(resource->d3d_resource); + } + } + + if (should_map && !resource->mapped) + { + D3D12_RANGE read_range = Zi; + HRESULT hr = ID3D12Resource_Map(resource->d3d_resource, 0, &read_range, &resource->mapped); + + if (!SUCCEEDED(hr)) + { + // TODO: Don't panic + Panic(Lit("Failed to map D3D12 resource")); + } + } + + ////////////////////////////// + //- Set debug information + + String old_name = STRING(resource->name_len, resource->name_text); + if (!MatchString(old_name, new_name)) + { + resource->name_len = new_name.len; + CopyBytes(resource->name_text, new_name.text, new_name.len); + if (GPU_NAMES && resource->d3d_resource) + { + G_D12_SetObjectName((ID3D12Object *)resource->d3d_resource, new_name); + } + } + + ////////////////////////////// + //- Barrier reused resource + + // TODO: These barriers are overly cautious. It's unlikely that anything + // other than an activation-layout transition is needed for textures, since + // arenas are rarely reset in the middle of a command list. In the case that + // a resource is reused within the same command list, we should insert + // barriers as described in the spec: + // https://microsoft.github.io/DirectX-Specs/d3d/D3D12EnhancedBarriers.html#resource-aliasing + + if (can_reuse) + { + if (is_buffer) + { + G_DumbMemorySync(cl_handle, G_D12_MakeHandle(G_ResourceHandle, resource)); + } + else if (is_texture) + { + G_DumbMemoryLayoutSync(cl_handle, G_D12_MakeHandle(G_ResourceHandle, resource), desc.texture.initial_layout); + } + } + + return G_D12_MakeHandle(G_ResourceHandle, resource); +} + +//////////////////////////////////////////////////////////// +//~ Descriptor + +G_D12_Descriptor *G_D12_DescriptorFromIndex(G_D12_DescriptorHeapKind heap_kind, u32 index) +{ + G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[heap_kind]; + G_D12_Descriptor *descriptors = ArenaFirst(heap->descriptors_arena, G_D12_Descriptor); + return &descriptors[index]; +} + +G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorHeapKind heap_kind) +{ + G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[heap_kind]; + u64 per_batch_count = heap->per_batch_count; + + G_D12_Descriptor *descriptor = 0; + u32 index = 0; + + // Grab completed descriptor from arena + G_D12_DescriptorList *descriptors = &gpu_arena->reset_descriptors_by_heap[heap_kind]; + descriptor = descriptors->first; + if (descriptor) + { + G_D12_Queue *queue = G_D12_QueueFromKind(descriptor->completion_queue_kind); + i64 queue_commit_completion = ID3D12Fence_GetCompletedValue(queue->commit_fence); + if (queue_commit_completion >= descriptor->completion_queue_target) + { + // Descriptor no longer in use by gpu, reuse it + DllQueueRemove(descriptors->first, descriptors->last, descriptor); + descriptors->count -= 1; + index = descriptor->index; + } + else + { + // Descriptor may still be in use by gpu + descriptor = 0; + } + } + + // Allocate new descriptor from heap + if (!descriptor) + { + Lock lock = LockE(&heap->mutex); + { + if (heap->first_free) + { + descriptor = heap->first_free; + DllStackRemove(heap->first_free, descriptor); + index = descriptor->index; + } + else + { + u32 descriptors_count = ArenaCount(heap->descriptors_arena, G_D12_Descriptor); + if (descriptors_count >= heap->max_count) + { + Panic(Lit("Max descriptors reached in heap")); + } + descriptor = PushStructNoZero(heap->descriptors_arena, G_D12_Descriptor); + index = descriptors_count * per_batch_count; + } + } + Unlock(&lock); + } + + // Initialize descriptor handle + ZeroStruct(descriptor); + descriptor->gpu_arena = gpu_arena; + descriptor->index = index; + descriptor->first_handle.ptr = heap->start_handle.ptr + (index * heap->descriptor_size); + descriptor->heap = heap; + + DllQueuePush(gpu_arena->descriptors.first, gpu_arena->descriptors.last, descriptor); + gpu_arena->descriptors.count += 1; + + return descriptor; +} + +//////////////////////////////////////////////////////////// +//~ @hookimpl Shader resource reference + +u32 G_PushRef(G_ArenaHandle arena_handle, G_ResourceHandle resource_handle, G_RefDesc ref_desc) +{ + G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(arena_handle); + G_D12_Resource *resource = G_D12_ResourceFromHandle(resource_handle); + u32 result = 0; + + G_RefKind kind = ref_desc.kind; + b32 is_buffer = ( + kind == G_RefKind_StructuredBuffer || + kind == G_RefKind_ByteAddressBuffer + ); + b32 is_sampler = kind == G_RefKind_SamplerState; + b32 is_texture = !is_buffer && !is_sampler; + b32 is_raw = kind == G_RefKind_ByteAddressBuffer; + b32 is_writable = resource->flags & G_ResourceFlag_AllowShaderReadWrite; + + G_D12_Descriptor *descriptor = 0; + if (is_buffer || is_texture) + { + descriptor = G_D12_PushDescriptor(gpu_arena, G_D12_DescriptorHeapKind_CbvSrvUav); + + G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_CbvSrvUav]; + Assert(heap->per_batch_count >= 2); + D3D12_CPU_DESCRIPTOR_HANDLE readonly_handle = descriptor->first_handle; + D3D12_CPU_DESCRIPTOR_HANDLE readwrite_handle = descriptor->first_handle; + readwrite_handle.ptr += heap->descriptor_size; + + b32 srv_ok = 0; + b32 uav_ok = 0; + + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = Zi; + D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = Zi; + + if (is_buffer) + { + if (is_raw) + { + ref_desc.element_size = 4; + ref_desc.element_offset /= 4; + } + + u64 buffer_size_actual = resource->buffer_size_actual; + u64 num_elements_in_buffer = buffer_size_actual / ref_desc.element_size; + u64 num_elements_after_offset = num_elements_in_buffer > ref_desc.element_offset ? num_elements_in_buffer - ref_desc.element_offset : 0; + + //- Create buffer SRV + { + { + srv_desc.Format = DXGI_FORMAT_UNKNOWN; + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srv_desc.Buffer.FirstElement = ref_desc.element_offset; + srv_desc.Buffer.NumElements = num_elements_after_offset; + srv_desc.Buffer.StructureByteStride = ref_desc.element_size; + srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; + } + if (is_raw) + { + srv_desc.Format = DXGI_FORMAT_R32_TYPELESS; + srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; + srv_desc.Buffer.StructureByteStride = 0; + } + srv_ok = 1; + } + //- Create buffer UAV + { + { + uav_desc.Format = DXGI_FORMAT_UNKNOWN; + uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uav_desc.Buffer.FirstElement = ref_desc.element_offset; + uav_desc.Buffer.NumElements = num_elements_after_offset; + uav_desc.Buffer.StructureByteStride = ref_desc.element_size; + uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; + } + if (is_raw) + { + uav_desc.Format = DXGI_FORMAT_R32_TYPELESS; + uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; + uav_desc.Buffer.StructureByteStride = 0; + } + } + if (num_elements_after_offset > 0) + { + srv_ok = 1; + if (is_writable) + { + uav_ok = 1; + } + } + } + else if (is_texture) + { + // DXGI_FORMAT format = G_D12_DxgiFormatFromGpuFormat(resource->texture_format); + RngI32 mips = ref_desc.mips; + mips.min = ClampI32(mips.min, 0, resource->texture_mips - 1); + mips.max = ClampI32(mips.max, mips.min, resource->texture_mips - 1); + //- Create texture SRV + { + srv_desc.Format = DXGI_FORMAT_UNKNOWN; + srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + if (ref_desc.kind == G_RefKind_Texture1D) + { + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; + srv_desc.Texture1D.MostDetailedMip = mips.min; + srv_desc.Texture1D.MipLevels = mips.max - mips.min + 1; + } + else if (ref_desc.kind == G_RefKind_Texture2D) + { + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srv_desc.Texture2D.MostDetailedMip = mips.min; + srv_desc.Texture2D.MipLevels = mips.max - mips.min + 1; + } + else if (ref_desc.kind == G_RefKind_Texture3D) + { + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; + srv_desc.Texture3D.MostDetailedMip = mips.min; + srv_desc.Texture3D.MipLevels = mips.max - mips.min + 1; + } + } + //- Create texture UAV + { + uav_desc.Format = DXGI_FORMAT_UNKNOWN; + if (ref_desc.kind == G_RefKind_Texture1D) + { + uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1D; + uav_desc.Texture1D.MipSlice = mips.min; + } + else if (ref_desc.kind == G_RefKind_Texture2D) + { + uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + uav_desc.Texture2D.MipSlice = mips.min; + } + else if (ref_desc.kind == G_RefKind_Texture3D) + { + uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D; + uav_desc.Texture3D.MipSlice = mips.min; + uav_desc.Texture3D.WSize = U32Max; + } + } + + srv_ok = 1; + if (is_writable) + { + uav_ok = 1; + } + + if (!uav_ok) + { + uav_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + } + } + + if (srv_ok) + { + ID3D12Device_CreateShaderResourceView(G_D12.device, resource->d3d_resource, &srv_desc, readonly_handle); + } + else + { + ID3D12Device_CreateShaderResourceView(G_D12.device, 0, &srv_desc, readonly_handle); + } + + if (uav_ok) + { + ID3D12Device_CreateUnorderedAccessView(G_D12.device, resource->d3d_resource, 0, &uav_desc, readwrite_handle); + } + else + { + ID3D12Device_CreateUnorderedAccessView(G_D12.device, 0, 0, &uav_desc, readwrite_handle); + } + } + else if (is_sampler) + { + descriptor = G_D12_PushDescriptor(gpu_arena, G_D12_DescriptorHeapKind_Sampler); + G_SamplerDesc sampler_desc = resource->sampler_desc; + D3D12_SAMPLER_DESC d3d_desc = Zi; + { + d3d_desc.Filter = (D3D12_FILTER)sampler_desc.filter; + d3d_desc.AddressU = (D3D12_TEXTURE_ADDRESS_MODE)sampler_desc.x; + d3d_desc.AddressV = (D3D12_TEXTURE_ADDRESS_MODE)sampler_desc.y; + d3d_desc.AddressW = (D3D12_TEXTURE_ADDRESS_MODE)sampler_desc.z; + d3d_desc.MipLODBias = sampler_desc.mip_lod_bias; + d3d_desc.MaxAnisotropy = MaxU32(sampler_desc.max_anisotropy, 1); + d3d_desc.ComparisonFunc = (D3D12_COMPARISON_FUNC)sampler_desc.comparison; + d3d_desc.BorderColor[0] = sampler_desc.border_color.x; + d3d_desc.BorderColor[1] = sampler_desc.border_color.y; + d3d_desc.BorderColor[2] = sampler_desc.border_color.z; + d3d_desc.BorderColor[3] = sampler_desc.border_color.w; + d3d_desc.MinLOD = sampler_desc.min_lod; + d3d_desc.MaxLOD = sampler_desc.max_lod; + } + if (d3d_desc.AddressU == 0) d3d_desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + if (d3d_desc.AddressV == 0) d3d_desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + if (d3d_desc.AddressW == 0) d3d_desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + if (d3d_desc.MaxLOD >= Inf) + { + d3d_desc.MaxLOD = D3D12_FLOAT32_MAX; + } + ID3D12Device_CreateSampler(G_D12.device, &d3d_desc, descriptor->first_handle); + } + + return descriptor->index; +} + +//- Count + +u64 G_CountBufferBytes(G_ResourceHandle buffer) +{ + G_D12_Resource *resource = G_D12_ResourceFromHandle(buffer); + return resource->buffer_size; +} + +i32 G_Count1D(G_ResourceHandle texture) +{ + G_D12_Resource *resource = G_D12_ResourceFromHandle(texture); + return resource->texture_dims.x; +} + +Vec2I32 G_Count2D(G_ResourceHandle texture) +{ + G_D12_Resource *resource = G_D12_ResourceFromHandle(texture); + return VEC2I32(resource->texture_dims.x, resource->texture_dims.y); +} + +Vec3I32 G_Count3D(G_ResourceHandle texture) +{ + G_D12_Resource *resource = G_D12_ResourceFromHandle(texture); + return resource->texture_dims; +} + +i32 G_CountWidth(G_ResourceHandle texture) +{ + G_D12_Resource *resource = G_D12_ResourceFromHandle(texture); + return resource->texture_dims.x; +} + +i32 G_CountHeight(G_ResourceHandle texture) +{ + G_D12_Resource *resource = G_D12_ResourceFromHandle(texture); + return resource->texture_dims.y; +} + +i32 G_CountDepth(G_ResourceHandle texture) +{ + G_D12_Resource *resource = G_D12_ResourceFromHandle(texture); + return resource->texture_dims.z; +} + +i32 G_CountMips(G_ResourceHandle texture) +{ + G_D12_Resource *resource = G_D12_ResourceFromHandle(texture); + return resource->texture_mips; +} + +//- Map + +void *G_HostPointerFromResource(G_ResourceHandle resource_handle) +{ + G_D12_Resource *resource = G_D12_ResourceFromHandle(resource_handle); + return resource->mapped; +} + +//////////////////////////////////////////////////////////// +//~ Command helpers + +G_D12_Cmd *G_D12_PushCmd(G_D12_CmdList *cl) +{ + // Grab chunk + G_D12_CmdChunk *chunk = cl->last_cmd_chunk; + { + if (chunk && chunk->cmds_count >= G_D12_CmdsPerChunk) + { + chunk = 0; + } + if (!chunk) + { + Lock lock = LockE(&G_D12.free_cmd_chunks_mutex); + { + chunk = G_D12.first_free_cmd_chunk; + if (chunk) + { + G_D12.first_free_cmd_chunk = chunk->next; + } + } + Unlock(&lock); + if (chunk) + { + G_D12_Cmd *cmds = chunk->cmds; + ZeroStruct(chunk); + chunk->cmds = cmds; + } + } + if (!chunk) + { + Arena *perm = PermArena(); + chunk = PushStruct(perm, G_D12_CmdChunk); + chunk->cmds = PushStructsNoZero(perm, G_D12_Cmd, G_D12_CmdsPerChunk); + } + if (chunk != cl->last_cmd_chunk) + { + SllQueuePush(cl->first_cmd_chunk, cl->last_cmd_chunk, chunk); + } + } + + // Push cmd to chunk + G_D12_Cmd *cmd = &chunk->cmds[chunk->cmds_count++]; + ZeroStruct(cmd); + ++cl->cmds_count; + return cmd; +} + +G_D12_Cmd *G_D12_PushConstCmd(G_D12_CmdList *cl, i32 slot, void *v) +{ + G_D12_Cmd *cmd = G_D12_PushCmd(cl); + cmd->kind = G_D12_CmdKind_Constant; + cmd->constant.slot = slot; + CopyBytes(&cmd->constant.value, v, 4); + return cmd; +} + +G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size) +{ + size = AlignU64(size, MaxU64(D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT, 512)); + + G_QueueKind queue_kind = cl->queue_kind; + G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); + G_D12_StagingRegionNode *result = 0; + + Lock lock = LockE(&queue->staging_mutex); + { + G_D12_StagingRing *old_ring = 0; + G_D12_StagingRing *ring = queue->staging_ring; + i64 completion = ID3D12Fence_GetCompletedValue(queue->commit_fence); + + // Find first completed region with matching size. + // For each region in ring: + // - If region size > size, split off a smaller region & use it + // + // - If region size < size, try to merge with next completed region + // + // - If no available completed region with eligible size, queue the + // current ring for deletion & create a new ring + // with larger size + + // Find region with large enough size + G_D12_StagingRegionNode *match = 0; + if (ring && ring->size >= size) + { + G_D12_StagingRegionNode *r = ring->head_region_node; + for (;;) + { + G_D12_StagingRegionNode *next = r->next; + b32 is_completed = completion >= Atomic64Fetch(&r->completion_target); + if (is_completed) + { + u64 region_size = 0; + if (next->pos > r->pos) + { + region_size = next->pos - r->pos; + } + else + { + region_size = ring->size - r->pos; + } + + if (region_size < size) + { + b32 next_is_completed = completion >= Atomic64Fetch(&next->completion_target); + if (next_is_completed) + { + if (next->pos > r->pos) + { + // Merge with next region & retry + if (next == ring->head_region_node) + { + ring->head_region_node = r; + } + r->next = next->next; + r->next->prev = r; + SllStackPush(ring->first_free_region_node, next); + } + else + { + // Wrap to beginning + r = next; + if (r == ring->head_region_node) + { + // No large-enough completed region found + break; + } + } + } + else + { + // No large-enough completed region found + break; + } + } + else + { + // Found matching region + match = r; + break; + } + } + else + { + // Continue to next region + r = next; + if (r == ring->head_region_node) + { + // No large-enough completed region found + break; + } + } + } + } + + // Create new ring if no match found + if (!match) + { + // Queue old ring for deletion + old_ring = ring; + ring = 0; + u64 new_ring_size = MaxU64(NextPow2U64(size), Mebi(8)); + if (old_ring) + { + new_ring_size = MaxU64(new_ring_size, old_ring->size * 2); + } + + // Create new ring + { + Arena *arena = AcquireArena(Gibi(1)); + ring = PushStruct(arena, G_D12_StagingRing); + ring->arena = arena; + ring->size = new_ring_size; + + G_ArenaHandle gpu_arena_handle = G_AcquireArena(); + ring->gpu_arena = G_D12_ArenaFromHandle(gpu_arena_handle); + + G_ResourceHandle resource_handle = G_PushBuffer( + gpu_arena_handle, G_D12_MakeHandle(G_CommandListHandle, cl), + u8, + new_ring_size, + .flags = G_ResourceFlag_HostMemory | G_ResourceFlag_Uncached + ); + ring->resource = G_D12_ResourceFromHandle(resource_handle); + ring->base = G_StructFromResource(resource_handle, u8); + } + + // Create initial region + match = PushStruct(ring->arena, G_D12_StagingRegionNode); + match->ring = ring; + match->next = match; + match->prev = match; + + // FIXME: Remove this + queue->staging_ring = ring; + } + + // Split extra region space + { + G_D12_StagingRegionNode *next = match->next; + u64 region_size = 0; + if (next->pos > match->pos) + { + region_size = next->pos - match->pos; + } + else + { + region_size = ring->size - match->pos; + } + + if (region_size > size) + { + G_D12_StagingRegionNode *new_next = ring->first_free_region_node; + if (new_next) + { + SllStackPop(ring->first_free_region_node); + ZeroStruct(new_next); + } + else + { + new_next = PushStruct(ring->arena, G_D12_StagingRegionNode); + } + new_next->next = next; + new_next->prev = match; + next->prev = new_next; + match->next = new_next; + + new_next->ring = ring; + new_next->pos = match->pos + size; + } + } + + ring->head_region_node = match->next; + + Atomic64Set(&match->completion_target, I64Max); + result = match; + + if (old_ring) + { + // FIXME: Queue old ring for deletion with command list + } + } + Unlock(&lock); + + // Add to command list + SllQueuePushN(cl->first_staging_region, cl->last_staging_region, result, next_in_command_list); + + return result; +} + +//////////////////////////////////////////////////////////// +//~ @hookimpl Command + +//- Command list + +G_CommandListHandle G_PrepareCommandList(G_QueueKind queue) +{ + G_D12_CmdList *cl = 0; + Lock lock = LockE(&G_D12.free_cmd_lists_mutex); + { + cl = G_D12.first_free_cmd_list; + if (cl) + { + G_D12.first_free_cmd_list = cl->next; + ZeroStruct(cl); + } + else + { + Arena *perm = PermArena(); + cl = PushStruct(perm, G_D12_CmdList); + } + } + Unlock(&lock); + cl->queue_kind = queue; + + return G_D12_MakeHandle(G_CommandListHandle, cl); +} + +i64 G_CommitCommandList(G_CommandListHandle cl_handle) +{ + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + G_QueueKind queue_kind = cl->queue_kind; + G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); + TempArena scratch = BeginScratchNoConflict(); + + // Begin dx12 command list + G_D12_RawCommandList *rcl = G_D12_PrepareRawCommandList(queue_kind); + ID3D12GraphicsCommandList7 *d3d_cl = rcl->d3d_cl; + + // Pipeline state + b32 graphics_rootsig_set = 0; + b32 compute_rootsig_set = 0; + b32 descriptor_heaps_set = 0; + G_D12_Pipeline *bound_pipeline = 0; + + // Constants state + u64 slotted_constants[G_NumConstants]; + u64 bound_compute_constants[G_NumConstants]; + u64 bound_graphics_constants[G_NumConstants]; + for (i32 i = 0; i < countof(slotted_constants); ++i) { slotted_constants[i] = 0; } // Zero-initialize all slots + for (i32 i = 0; i < countof(bound_compute_constants); ++i) { bound_compute_constants[i] = U64Max; } + for (i32 i = 0; i < countof(bound_graphics_constants); ++i) { bound_graphics_constants[i] = U64Max; } + + // Fill built-in constants + if (!G_IsRefNil(queue->print_buffer_ref)) + { + slotted_constants[G_ShaderConst_PrintBufferRef] = queue->print_buffer_ref.v; + } + { + b32 tweak_b32 = TweakBool("Shader tweak-bool", 1); + f32 tweak_f32 = TweakFloat("Shader tweak-float", 1, 0, 1); + slotted_constants[G_ShaderConst_TweakB32] = tweak_b32; + slotted_constants[G_ShaderConst_TweakF32] = *(u32 *)&tweak_f32; + } + + // Rasterizer state + D3D12_VIEWPORT bound_viewport = Zi; + D3D12_RECT bound_scissor = Zi; + D3D_PRIMITIVE_TOPOLOGY bound_primitive_topology = -1; + D3D12_INDEX_BUFFER_VIEW bound_ibv = Zi; + u64 bound_render_target_uids[G_MaxRenderTargets] = Zi; + u64 bound_render_clear_target_uid = 0; + + // Flatten command chunks + u64 cmds_count = 0; + G_D12_Cmd *cmds = PushStructsNoZero(scratch.arena, G_D12_Cmd, cl->cmds_count); + { + // Flatten command chunks + { + for (G_D12_CmdChunk *chunk = cl->first_cmd_chunk; chunk; chunk = chunk->next) + { + for (u64 cmd_chunk_idx = 0; cmd_chunk_idx < chunk->cmds_count; ++cmd_chunk_idx) + { + cmds[cmds_count++] = chunk->cmds[cmd_chunk_idx]; + } + } + } + // Free command chunks + { + Lock lock = LockE(&G_D12.free_cmd_chunks_mutex); + { + G_D12_CmdChunk *chunk = cl->first_cmd_chunk; + while (chunk) + { + G_D12_CmdChunk *next = chunk->next; + G_D12.first_free_cmd_chunk = chunk; + chunk = next; + } + } + Unlock(&lock); + } + } + + // Batch barrier cmds + i64 max_buffer_barriers = 0; + i64 max_texture_barriers = 0; + i64 max_global_barriers = 0; + { + u64 cmd_idx = 0; + u64 batch_gen = 0; + G_D12_Cmd *prev_barrier_cmd = 0; + while (cmd_idx < cmds_count) + { + G_D12_Cmd *cmd = &cmds[cmd_idx]; + switch (cmd->kind) + { + // Batch-interrupting cmds + default: + { + cmd_idx += 1; + batch_gen += 1; + } break; + + // Non-batch-interrupting cmds + case G_D12_CmdKind_Constant: + { + cmd_idx += 1; + } break; + + case G_D12_CmdKind_Barrier: + { + // Determine 'before' state from lookup + if (prev_barrier_cmd && prev_barrier_cmd->barrier.batch_gen != batch_gen) + { + // This barrier is part of new batch + prev_barrier_cmd->barrier.is_end_of_batch = 1; + } + cmd->barrier.batch_gen = batch_gen; + prev_barrier_cmd = cmd; + + if (cmd->barrier.desc.is_global) + { + max_global_barriers += 1; + } + else + { + G_D12_Resource *resource = G_D12_ResourceFromHandle(cmd->barrier.desc.resource); + if (resource->is_texture) + { + RngI32 mips = cmd->barrier.desc.mips; + mips.min = ClampI32(mips.min, 0, resource->texture_mips - 1); + mips.max = ClampI32(mips.max, mips.min, resource->texture_mips - 1); + max_texture_barriers += mips.max - mips.min + 1; + } + else + { + max_buffer_barriers += 1; + } + } + + cmd_idx += 1; + } break; + } + } + + if (prev_barrier_cmd) + { + prev_barrier_cmd->barrier.is_end_of_batch = 1; + } + } + + // Build d3d commands + { + u64 batch_barrier_idx_start = 0; + u64 batch_barrier_idx_opl = 0; // One past last + + u64 cmd_idx = 0; + while (cmd_idx < cmds_count) + { + G_D12_Cmd *cmd = &cmds[cmd_idx]; + switch (cmd->kind) + { + default: + { + cmd_idx += 1; + } break; + + //- Constant + + case G_D12_CmdKind_Constant: + { + i32 slot = cmd->constant.slot; + u32 value = cmd->constant.value; + if (slot >= 0 && slot < countof(slotted_constants)) + { + slotted_constants[slot] = value; + } + cmd_idx += 1; + } break; + + //- Barrier + + case G_D12_CmdKind_Barrier: + { + batch_barrier_idx_opl = cmd_idx + 1; + + // Submit batched barriers + if (cmd->barrier.is_end_of_batch) + { + // Build barriers + u64 buffer_barriers_count = 0; + u64 texture_barriers_count = 0; + u64 global_barriers_count = 0; + D3D12_BUFFER_BARRIER *buffer_barriers = PushStructs(scratch.arena, D3D12_BUFFER_BARRIER, max_buffer_barriers); + D3D12_TEXTURE_BARRIER *texture_barriers = PushStructs(scratch.arena, D3D12_TEXTURE_BARRIER, max_texture_barriers); + D3D12_GLOBAL_BARRIER *global_barriers = PushStructs(scratch.arena, D3D12_GLOBAL_BARRIER, max_global_barriers); + for (u64 barrier_cmd_idx = batch_barrier_idx_start; barrier_cmd_idx < batch_barrier_idx_opl; ++barrier_cmd_idx) + { + G_D12_Cmd *barrier_cmd = &cmds[barrier_cmd_idx]; + if (barrier_cmd->kind == G_D12_CmdKind_Barrier) + { + G_MemoryBarrierDesc desc = barrier_cmd->barrier.desc; + // Translate gpu barrier kind -> d3d barrier fields + D3D12_BARRIER_SYNC sync_before = G_D12_BarrierSyncFromStages(desc.stage_prev); + D3D12_BARRIER_SYNC sync_after = G_D12_BarrierSyncFromStages(desc.stage_next); + D3D12_BARRIER_ACCESS access_before = G_D12_BarrierAccessFromAccesses(desc.access_prev); + D3D12_BARRIER_ACCESS access_after = G_D12_BarrierAccessFromAccesses(desc.access_next); + D3D12_BARRIER_TYPE barrier_type = D3D12_BARRIER_TYPE_GLOBAL; + if (!desc.is_global) + { + G_D12_Resource *resource = G_D12_ResourceFromHandle(desc.resource); + barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER; + } + + // Build barrier + switch (barrier_type) + { + case D3D12_BARRIER_TYPE_BUFFER: + { + G_D12_Resource *resource = G_D12_ResourceFromHandle(desc.resource); + D3D12_BUFFER_BARRIER *barrier = &buffer_barriers[buffer_barriers_count++]; + barrier->SyncBefore = sync_before; + barrier->SyncAfter = sync_after; + barrier->AccessBefore = access_before; + barrier->AccessAfter = access_after; + barrier->pResource = resource->d3d_resource; + barrier->Offset = 0; + barrier->Size = U64Max; + } break; + + case D3D12_BARRIER_TYPE_TEXTURE: + { + G_D12_Resource *resource = G_D12_ResourceFromHandle(desc.resource); + RngI32 mips = barrier_cmd->barrier.desc.mips; + { + mips.min = ClampI32(mips.min, 0, resource->texture_mips - 1); + mips.max = ClampI32(mips.max, mips.min, resource->texture_mips - 1); + } + // Create a barrier for each contiguous span of mips with matching layout + D3D12_TEXTURE_BARRIER *barrier = 0; + for (i32 mip_idx = mips.min; mip_idx <= mips.max; ++mip_idx) + { + D3D12_BARRIER_LAYOUT layout_before = resource->cmdlist_texture_layouts[mip_idx]; + D3D12_BARRIER_LAYOUT layout_after = layout_before; + if (desc.layout != G_Layout_NoChange) + { + layout_after = G_D12_BarrierLayoutFromLayout(desc.layout); + } + if (barrier == 0 || barrier->LayoutBefore != layout_before) + { + barrier = &texture_barriers[texture_barriers_count++]; + barrier->SyncBefore = sync_before; + barrier->SyncAfter = sync_after; + barrier->AccessBefore = access_before; + barrier->AccessAfter = access_after; + barrier->LayoutBefore = layout_before; + barrier->LayoutAfter = layout_after; + barrier->pResource = resource->d3d_resource; + barrier->Subresources.IndexOrFirstMipLevel = mip_idx; + barrier->Subresources.NumArraySlices = 1; + barrier->Subresources.NumPlanes = 1; + } + barrier->Subresources.NumMipLevels += 1; + resource->cmdlist_texture_layouts[mip_idx] = layout_after; + } + } break; + + case D3D12_BARRIER_TYPE_GLOBAL: + { + D3D12_GLOBAL_BARRIER *barrier = &global_barriers[global_barriers_count++]; + barrier->SyncBefore = sync_before; + barrier->SyncAfter = sync_after; + barrier->AccessBefore = access_before; + barrier->AccessAfter = access_after; + } break; + } + } + } + + // Dispatch barriers + { + u32 barrier_groups_count = 0; + D3D12_BARRIER_GROUP barrier_groups[3] = Zi; + if (buffer_barriers_count > 0) + { + D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; + group->Type = D3D12_BARRIER_TYPE_BUFFER; + group->NumBarriers = buffer_barriers_count; + group->pBufferBarriers = buffer_barriers; + } + if (texture_barriers_count > 0) + { + D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; + group->Type = D3D12_BARRIER_TYPE_TEXTURE; + group->NumBarriers = texture_barriers_count; + group->pTextureBarriers = texture_barriers; + } + if (global_barriers_count > 0) + { + D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; + group->Type = D3D12_BARRIER_TYPE_GLOBAL; + group->NumBarriers = global_barriers_count; + group->pGlobalBarriers = global_barriers; + } + if (barrier_groups_count > 0) + { + ID3D12GraphicsCommandList7_Barrier(d3d_cl, barrier_groups_count, barrier_groups); + } + } + + batch_barrier_idx_start = cmd_idx + 1; + } + + cmd_idx += 1; + } break; + + //- Copy bytes + + case G_D12_CmdKind_CopyBytes: + { + u64 src_offset = cmd->copy_bytes.src_range.min; + u64 copy_size = cmd->copy_bytes.src_range.max - cmd->copy_bytes.src_range.min; + ID3D12GraphicsCommandList_CopyBufferRegion( + d3d_cl, + cmd->copy_bytes.dst->d3d_resource, + cmd->copy_bytes.dst_offset, + cmd->copy_bytes.src->d3d_resource, + src_offset, + copy_size + ); + cmd_idx += 1; + } break; + + //- Copy texels + + case G_D12_CmdKind_CopyTexels: + { + G_D12_Resource *dst = cmd->copy_texels.dst; + G_D12_Resource *src = cmd->copy_texels.src; + D3D12_TEXTURE_COPY_LOCATION dst_loc = cmd->copy_texels.dst_loc; + D3D12_TEXTURE_COPY_LOCATION src_loc = cmd->copy_texels.src_loc; + Vec3I32 dst_offset = cmd->copy_texels.dst_texture_offset; + Rng3I32 src_range = cmd->copy_texels.src_texture_range; + + D3D12_BOX src_box = Zi; + D3D12_BOX *src_box_ptr = 0; + { + src_box.left = src_range.p0.x; + src_box.top = src_range.p0.y; + src_box.front = src_range.p0.z; + src_box.right = src_range.p1.x; + src_box.bottom = src_range.p1.y; + src_box.back = src_range.p1.z; + if (src->is_texture) + { + src_box_ptr = &src_box; + } + } + + if (dst->flags & G_ResourceFlag_AllowDepthStencil) + { + // Depth-stencil textures must have src box & dst offset set to 0 + // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12graphicscommandlist-copytextureregion + ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, 0, 0, 0, &src_loc, 0); + } + else + { + ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, dst_offset.x, dst_offset.y, dst_offset.z, &src_loc, src_box_ptr); + } + + cmd_idx += 1; + } break; + + //- Compute + + case G_D12_CmdKind_Compute: + { + // Fetch pipeline + G_D12_Pipeline *pipeline = 0; + { + G_D12_PipelineDesc pipeline_desc; + ZeroStruct(&pipeline_desc); + pipeline_desc.cs = cmd->compute.cs; + pipeline = G_D12_PipelineFromDesc(pipeline_desc); + } + + if (pipeline) + { + // Set descriptor heaps + if (!descriptor_heaps_set) + { + ID3D12DescriptorHeap *heaps[] = { + G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, + G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_Sampler].d3d_heap, + }; + ID3D12GraphicsCommandList_SetDescriptorHeaps(d3d_cl, countof(heaps), heaps); + descriptor_heaps_set = 1; + } + + // Bind rootsig + if (!compute_rootsig_set) + { + ID3D12GraphicsCommandList_SetComputeRootSignature(d3d_cl, G_D12.bindless_rootsig); + compute_rootsig_set = 1; + } + + // Bind pipeline + if (pipeline != bound_pipeline) + { + ID3D12GraphicsCommandList_SetPipelineState(d3d_cl, pipeline->pso); + bound_pipeline = pipeline; + } + + // Update root constants + for (i32 slot = 0; slot < countof(slotted_constants); ++slot) + { + if (bound_compute_constants[slot] != slotted_constants[slot]) + { + ID3D12GraphicsCommandList_SetComputeRoot32BitConstant(d3d_cl, slot, slotted_constants[slot], 0); + bound_compute_constants[slot] = slotted_constants[slot]; + } + } + + // Dispatch + ID3D12GraphicsCommandList_Dispatch(d3d_cl, cmd->compute.groups.x, cmd->compute.groups.y, cmd->compute.groups.z); + } + + cmd_idx += 1; + } break; + + //- Rasterize + + case G_D12_CmdKind_Rasterize: + { + // Fetch pipeline + G_D12_Pipeline *pipeline = 0; + { + G_D12_PipelineDesc pipeline_desc; + ZeroStruct(&pipeline_desc); + pipeline_desc.vs = cmd->rasterize.vs; + pipeline_desc.ps = cmd->rasterize.ps; + { + pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED; + switch (cmd->rasterize.raster_mode) + { + default: Assert(0); break; + case G_RasterMode_PointList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; break; + case G_RasterMode_LineList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; + case G_RasterMode_LineStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; + case G_RasterMode_TriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + case G_RasterMode_TriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + case G_RasterMode_WireTriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + case G_RasterMode_WireTriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + } + } + if (cmd->rasterize.raster_mode == G_RasterMode_WireTriangleList || cmd->rasterize.raster_mode == G_RasterMode_WireTriangleStrip) + { + pipeline_desc.is_wireframe = 1; + } + for (u32 i = 0; i < countof(cmd->rasterize.render_target_descs); ++i) + { + G_RenderTargetDesc desc = cmd->rasterize.render_target_descs[i]; + G_D12_Resource *rt = G_D12_ResourceFromHandle(desc.resource); + if (rt) + { + pipeline_desc.render_target_formats[i] = rt->texture_format; + pipeline_desc.render_target_blend_modes[i] = desc.blend; + } + else + { + pipeline_desc.render_target_formats[i] = G_Format_Unknown; + } + } + pipeline = G_D12_PipelineFromDesc(pipeline_desc); + } + + // Create ibv + u32 indices_count = 0; + D3D12_INDEX_BUFFER_VIEW ibv = Zi; + { + G_IndexBufferDesc desc = cmd->rasterize.index_buffer_desc; + if (desc.count > 0) + { + G_D12_Resource *index_buffer_resource = G_D12_ResourceFromHandle(desc.resource); + ibv.BufferLocation = index_buffer_resource->buffer_gpu_address; + ibv.SizeInBytes = desc.stride * desc.count; + if (desc.stride == 2) + { + ibv.Format = DXGI_FORMAT_R16_UINT; + indices_count = ibv.SizeInBytes / 2; + } + else if (desc.stride == 4) + { + ibv.Format = DXGI_FORMAT_R32_UINT; + indices_count = ibv.SizeInBytes / 4; + } + else + { + Assert(0); // Invalid index size + } + } + } + + // Prepare & dispatch + if (pipeline && indices_count > 0) + { + // Set descriptor heaps + if (!descriptor_heaps_set) + { + ID3D12DescriptorHeap *heaps[] = { + G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, + G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_Sampler].d3d_heap, + }; + ID3D12GraphicsCommandList_SetDescriptorHeaps(d3d_cl, countof(heaps), heaps); + descriptor_heaps_set = 1; + } + + // Bind rootsig + if (!graphics_rootsig_set) + { + ID3D12GraphicsCommandList_SetGraphicsRootSignature(d3d_cl, G_D12.bindless_rootsig); + graphics_rootsig_set = 1; + } + + // Bind pipeline + if (pipeline != bound_pipeline) + { + ID3D12GraphicsCommandList_SetPipelineState(d3d_cl, pipeline->pso); + bound_pipeline = pipeline; + } + + // Update root constants + for (i32 slot = 0; slot < countof(slotted_constants); ++slot) + { + if (bound_graphics_constants[slot] != slotted_constants[slot]) + { + ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstant(d3d_cl, slot, slotted_constants[slot], 0); + bound_graphics_constants[slot] = slotted_constants[slot]; + } + } + + // Set viewport + { + D3D12_VIEWPORT viewport = Zi; + { + Rng3 range = cmd->rasterize.viewport; + viewport.TopLeftX = range.p0.x; + viewport.TopLeftY = range.p0.y; + viewport.Width = range.p1.x - range.p0.x; + viewport.Height = range.p1.y - range.p0.y; + viewport.MinDepth = range.p0.z; + viewport.MaxDepth = range.p1.z; + } + if (!MatchStruct(&viewport, &bound_viewport)) + { + bound_viewport = viewport; + ID3D12GraphicsCommandList_RSSetViewports(d3d_cl, 1, &viewport); + } + } + + // Set scissor + { + D3D12_RECT scissor = Zi; + { + Rng2 range = cmd->rasterize.scissor; + scissor.left = range.p0.x; + scissor.top = range.p0.y; + scissor.right = range.p1.x; + scissor.bottom = range.p1.y; + } + if (!MatchStruct(&scissor, &bound_scissor)) + { + bound_scissor = scissor; + ID3D12GraphicsCommandList_RSSetScissorRects(d3d_cl, 1, &scissor); + } + } + + // Set topology + { + D3D_PRIMITIVE_TOPOLOGY topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + switch (cmd->rasterize.raster_mode) + { + default: Assert(0); break; + case G_RasterMode_PointList: topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; break; + case G_RasterMode_LineList: topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break; + case G_RasterMode_LineStrip: topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break; + case G_RasterMode_TriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; + case G_RasterMode_TriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; + case G_RasterMode_WireTriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; + case G_RasterMode_WireTriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; + } + if (topology != bound_primitive_topology) + { + ID3D12GraphicsCommandList_IASetPrimitiveTopology(d3d_cl, topology); + } + } + + // Set index buffer + if (!MatchStruct(&ibv, &bound_ibv)) + { + ID3D12GraphicsCommandList_IASetIndexBuffer(d3d_cl, &ibv); + bound_ibv = ibv; + } + + // Bind render targets + { + b32 om_dirty = 0; + u32 rtvs_count = 0; + for (u32 i = 0; i < countof(cmd->rasterize.render_target_descs); ++i) + { + G_RenderTargetDesc desc = cmd->rasterize.render_target_descs[i]; + G_D12_Resource *rt = G_D12_ResourceFromHandle(desc.resource); + if (rt) + { + Assert(AnyBit(rt->d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)); + if (bound_render_target_uids[i] != rt->uid + desc.mip) + { + G_D12_Descriptor *rtv_descriptor = rcl->rtv_descriptors[i]; + G_D12_InitRtv(rt, rtv_descriptor->first_handle, desc.mip); + bound_render_target_uids[i] = rt->uid + desc.mip; + om_dirty = 1; + } + ++rtvs_count; + } + else + { + break; + } + } + if (om_dirty) + { + D3D12_CPU_DESCRIPTOR_HANDLE rtv_handles[G_MaxRenderTargets] = Zi; + for (u32 i = 0; i < rtvs_count; ++i) + { + rtv_handles[i] = rcl->rtv_descriptors[i]->first_handle; + } + ID3D12GraphicsCommandList_OMSetRenderTargets(d3d_cl, rtvs_count, rtv_handles, 0, 0); + } + } + + // Dispatch + ID3D12GraphicsCommandList_DrawIndexedInstanced(d3d_cl, indices_count, cmd->rasterize.instances_count, 0, 0, 0); + } + + cmd_idx += 1; + } break; + + //- Clear rtv + + case G_D12_CmdKind_ClearRtv: + { + G_D12_Resource *rt = cmd->clear_rtv.render_target; + f32 clear_color[4] = Zi; + { + clear_color[0] = cmd->clear_rtv.color.x; + clear_color[1] = cmd->clear_rtv.color.y; + clear_color[2] = cmd->clear_rtv.color.z; + clear_color[3] = cmd->clear_rtv.color.w; + } + D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle = rcl->rtv_clear_descriptor->first_handle; + if (bound_render_clear_target_uid != rt->uid + cmd->clear_rtv.mip) + { + G_D12_InitRtv(rt, rtv_handle, cmd->clear_rtv.mip); + bound_render_clear_target_uid = rt->uid + cmd->clear_rtv.mip; + } + ID3D12GraphicsCommandList_ClearRenderTargetView(d3d_cl, rtv_handle, clear_color, 0, 0); + cmd_idx += 1; + } break; + + //- Log + + case G_D12_CmdKind_Log: + { + G_D12_Resource *resource = cmd->log.resource; + String resource_name = STRING(resource->name_len, resource->name_text); + + String layouts_str = Zi; + { + StringList layout_names = Zi; + for (i32 mip_idx = 0; mip_idx < resource->texture_mips; ++mip_idx) + { + String layout_name = G_D12_NameFromBarrierLayout(resource->cmdlist_texture_layouts[mip_idx]); + String layout_str = StringF(scratch.arena, "[%F] %F", FmtSint(mip_idx), FmtString(layout_name)); + PushStringToList(scratch.arena, &layout_names, layout_str); + } + layouts_str = StringFromList(scratch.arena, layout_names, Lit(", ")); + } + + String msg = StringF( + scratch.arena, + "[Gpu command list resource log] uid: %F, name: \"%F\", layouts: { %F }", + FmtUint(resource->uid), + FmtString(resource_name), + FmtString(layouts_str) + ); + LogDebug(msg); + cmd_idx += 1; + } break; + + //- Discard + + case G_D12_CmdKind_Discard: + { + G_D12_Resource *resource = cmd->discard.resource; + D3D12_DISCARD_REGION region = Zi; + region.FirstSubresource = 0; + region.NumSubresources = resource->texture_mips; + ID3D12GraphicsCommandList_DiscardResource(d3d_cl, resource->d3d_resource, 0); + cmd_idx += 1; + } break; + } + } + } + + // End dx12 command list + i64 completion_target = G_D12_CommitRawCommandList(rcl); + + // Attach completion info to staging regions + for (G_D12_StagingRegionNode *n = cl->first_staging_region; n;) + { + G_D12_StagingRegionNode *next = n->next_in_command_list; + { + Atomic64Set(&n->completion_target, completion_target); + n->next_in_command_list = 0; + } + n = next; + } + + // Attach completion info to reset descriptors + for (G_D12_Descriptor *d = cl->reset_descriptors.first; d;) + { + G_D12_Descriptor *next = d->next; + { + G_D12_Arena *gpu_arena = d->gpu_arena; + d->completion_queue_kind = queue_kind; + d->completion_queue_target = completion_target; + G_D12_DescriptorList *gpu_arena_reset_descriptors_list = &gpu_arena->reset_descriptors_by_heap[d->heap->kind]; + DllQueuePush(gpu_arena_reset_descriptors_list->first, gpu_arena_reset_descriptors_list->last, d); + ++gpu_arena_reset_descriptors_list->count; + } + d = next; + } + + // Attach completion info to releasables & submit for release + if (cl->releases.first) + { + // Attach completion info + for (G_D12_Releasable *release = cl->releases.first; release; release = release->next) + { + release->completion_queue_kind = queue_kind; + release->completion_queue_target = completion_target; + } + // Submit releass + Lock lock = LockE(&G_D12.pending_releases_mutex); + { + if (G_D12.pending_releases.last) + { + G_D12.pending_releases.last->next = cl->releases.first; + } + else + { + G_D12.pending_releases.first = cl->releases.first; + } + G_D12.pending_releases.last = cl->releases.last; + } + Unlock(&lock); + } + + + + + + // // Attach completion info to resources + // for (G_D12_Resource *r = cl->reset_resources.first; r;) + // { + // G_D12_Resource *next = r->next; + // { + // G_D12_ResourceHeap *heap = r->heap; + // G_D12_Arena *gpu_arena = >heap->gpu_arena; + // r->completion_queue_kind = queue->kind; + // r->completion_queue_target = completion_target; + // G_D12_ResourceList *heap_reset_resources_list = &heap->reset_resources; + // DllQueuePush(heap_reset_resources_list->first, heap_reset_resourecs_list->last, r); + // ++heap_reset_resources_list->count; + // } + // r = next; + // } + + // Free command list + { + Lock lock = LockE(&G_D12.free_cmd_lists_mutex); + { + cl->next = G_D12.first_free_cmd_list; + G_D12.first_free_cmd_list = cl; + } + Unlock(&lock); + } + + EndScratch(scratch); + return completion_target; +} + +//- Cpu -> Gpu staged copy + +void G_CopyCpuToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, u64 dst_offset, void *src, RngU64 src_copy_range) +{ + if (src_copy_range.max > src_copy_range.min) + { + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + u64 copy_size = src_copy_range.max - src_copy_range.min; + G_D12_StagingRegionNode *region = G_D12_PushStagingRegion(cl, copy_size); + CopyBytes((u8 *)region->ring->base + region->pos, (u8 *)src + src_copy_range.min, copy_size); + G_CopyBufferToBuffer( + cl_handle, + dst_handle, + dst_offset, + G_D12_MakeHandle(G_ResourceHandle, region->ring->resource), + RNGU64(region->pos, region->pos + copy_size) + ); + } +} + +void G_CopyCpuToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range) +{ + Vec3I32 staged_dims = Zi; + { + staged_dims.x = src_copy_range.p1.x - src_copy_range.p0.x; + staged_dims.y = src_copy_range.p1.y - src_copy_range.p0.y; + staged_dims.z = src_copy_range.p1.z - src_copy_range.p0.z; + } + if (staged_dims.x > 0 && staged_dims.y > 0 && staged_dims.z > 0) + { + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + G_D12_Resource *dst = G_D12_ResourceFromHandle(dst_handle); + Assert(dst->is_texture); + + // Grab footprint info + u64 staging_footprint_rows_count = 0; + u64 staging_footprint_row_size = 0; + u64 staging_footprint_row_pitch = 0; + u64 staging_footprint_size = 0; + D3D12_PLACED_SUBRESOURCE_FOOTPRINT staging_footprint = Zi; + { + D3D12_RESOURCE_DESC src_desc = Zi; + { + ID3D12Resource_GetDesc(dst->d3d_resource, &src_desc); + src_desc.Width = staged_dims.x; + src_desc.Height = staged_dims.y; + src_desc.DepthOrArraySize = staged_dims.z; + } + ID3D12Device_GetCopyableFootprints(G_D12.device, &src_desc, 0, 1, 0, &staging_footprint, (u32 *)&staging_footprint_rows_count, &staging_footprint_row_size, &staging_footprint_size); + staging_footprint_row_pitch = staging_footprint.Footprint.RowPitch; + } + + i32 bytes_per_texel = staging_footprint_row_size / staged_dims.x; + u64 src_row_pitch = src_dims.x * bytes_per_texel; + + G_D12_StagingRegionNode *staging_region = G_D12_PushStagingRegion(cl, staging_footprint_size); + G_D12_Resource *staging_resource = staging_region->ring->resource; + G_ResourceHandle staging_resource_handle = G_D12_MakeHandle(G_ResourceHandle, staging_resource); + staging_footprint.Offset = staging_region->pos; + + // Fill staging buffer + { + u8 *src_base = (u8 *)src + (src_copy_range.p0.y * src_row_pitch) + (src_copy_range.p0.x * bytes_per_texel); + u8 *staged_base = (u8 *)staging_region->ring->base + staging_footprint.Offset; + u64 src_z_pitch = src_row_pitch * src_dims.y; + u64 staged_z_pitch = staging_footprint_row_size * staging_footprint_rows_count; + for (i32 z = 0; z < src_dims.z; ++z) + { + u64 src_z_offset = z * src_z_pitch; + u64 staged_z_offset = z * staged_z_pitch; + for (i32 y = 0; y < staging_footprint_rows_count; ++y) + { + u8 *src_row = src_base + y * src_row_pitch + src_z_offset; + u8 *staged_row = staged_base + y * staging_footprint_row_pitch + staged_z_offset; + CopyBytes(staged_row, src_row, staging_footprint_row_size); + } + } + } + + Rng3I32 dst_copy_range = Zi; + dst_copy_range.p0 = dst_offset; + dst_copy_range.p1.x = dst_copy_range.p0.x + staged_dims.x; + dst_copy_range.p1.y = dst_copy_range.p0.y + staged_dims.y; + dst_copy_range.p1.z = dst_copy_range.p0.z + staged_dims.z; + G_CopyBufferToTexture( + cl_handle, + dst_handle, dst_copy_range, + staging_resource_handle, staging_footprint.Offset + ); + } +} + +//- Gpu <-> Gpu copy + +void G_CopyBufferToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, u64 dst_offset, G_ResourceHandle src_handle, RngU64 src_copy_range) +{ + if (src_copy_range.max > src_copy_range.min) + { + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + G_D12_Cmd *cmd = G_D12_PushCmd(cl); + cmd->kind = G_D12_CmdKind_CopyBytes; + cmd->copy_bytes.src = G_D12_ResourceFromHandle(src_handle); + cmd->copy_bytes.dst = G_D12_ResourceFromHandle(dst_handle); + cmd->copy_bytes.dst_offset = dst_offset; + cmd->copy_bytes.src_range = src_copy_range; + } +} + +void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Rng3I32 dst_copy_range, G_ResourceHandle src_handle, u64 src_offset) +{ + Vec3I32 src_dims = Zi; + { + src_dims.x = dst_copy_range.p1.x - dst_copy_range.p0.x; + src_dims.y = dst_copy_range.p1.y - dst_copy_range.p0.y; + src_dims.z = dst_copy_range.p1.z - dst_copy_range.p0.z; + } + if (src_dims.x > 0 && src_dims.y > 0 && src_dims.z > 0) + { + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + G_D12_Resource *src = G_D12_ResourceFromHandle(src_handle); + G_D12_Resource *dst = G_D12_ResourceFromHandle(dst_handle); + Assert(!src->is_texture); + Assert(dst->is_texture); + + // Grab footprint info + D3D12_PLACED_SUBRESOURCE_FOOTPRINT src_footprint = Zi; + { + D3D12_RESOURCE_DESC src_desc = Zi; + { + ID3D12Resource_GetDesc(dst->d3d_resource, &src_desc); + src_desc.Width = src_dims.x; + src_desc.Height = src_dims.y; + src_desc.DepthOrArraySize = src_dims.z; + } + ID3D12Device_GetCopyableFootprints(G_D12.device, &src_desc, 0, 1, 0, &src_footprint, 0, 0, 0); + src_footprint.Offset = src_offset; + } + + D3D12_TEXTURE_COPY_LOCATION src_loc = Zi; + D3D12_TEXTURE_COPY_LOCATION dst_loc = Zi; + { + src_loc.pResource = src->d3d_resource; + src_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + src_loc.PlacedFootprint = src_footprint; + } + { + dst_loc.pResource = dst->d3d_resource; + dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dst_loc.SubresourceIndex = 0; + } + + G_D12_Cmd *cmd = G_D12_PushCmd(cl); + cmd->kind = G_D12_CmdKind_CopyTexels; + cmd->copy_texels.dst = dst; + cmd->copy_texels.src = src; + cmd->copy_texels.dst_loc = dst_loc; + cmd->copy_texels.src_loc = src_loc; + cmd->copy_texels.dst_texture_offset = dst_copy_range.p0; + } +} + +void G_CopyTextureToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Rng3I32 src_copy_range) +{ + if ( + src_copy_range.p1.x > src_copy_range.p0.x && + src_copy_range.p1.y > src_copy_range.p0.y && + src_copy_range.p1.z > src_copy_range.p0.z + ) + { + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + G_D12_Resource *src = G_D12_ResourceFromHandle(src_handle); + G_D12_Resource *dst = G_D12_ResourceFromHandle(dst_handle); + Assert(src->is_texture); + Assert(dst->is_texture); + + D3D12_TEXTURE_COPY_LOCATION src_loc = Zi; + D3D12_TEXTURE_COPY_LOCATION dst_loc = Zi; + { + src_loc.pResource = dst->d3d_resource; + src_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src_loc.SubresourceIndex = 0; + } + { + dst_loc.pResource = dst->d3d_resource; + dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dst_loc.SubresourceIndex = 0; + } + + G_D12_Cmd *cmd = G_D12_PushCmd(cl); + cmd->kind = G_D12_CmdKind_CopyTexels; + cmd->copy_texels.dst = dst; + cmd->copy_texels.src = src; + cmd->copy_texels.dst_loc = dst_loc; + cmd->copy_texels.src_loc = src_loc; + cmd->copy_texels.dst_texture_offset = dst_offset; + cmd->copy_texels.src_texture_range = src_copy_range; + } +} + +void G_CopyTextureToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Rng3I32 src_copy_range) +{ + // TODO + Assert(0); +} + +//- Constant + +void G_SetConstantEx(G_CommandListHandle cl_handle, i32 slot, void *src_32bit, u32 size) +{ + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + G_D12_Cmd *cmd = G_D12_PushCmd(cl); + cmd->kind = G_D12_CmdKind_Constant; + cmd->constant.slot = slot; + CopyBytes(&cmd->constant.value, src_32bit, MinU32(size, 4)); +} + +//- Memory sync + +void G_MemorySyncEx(G_CommandListHandle cl_handle, G_MemoryBarrierDesc desc) +{ + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + G_D12_Cmd *cmd = G_D12_PushCmd(cl); + cmd->kind = G_D12_CmdKind_Barrier; + cmd->barrier.desc = desc; +} + +//- Compute + +void G_ComputeEx(G_CommandListHandle cl_handle, ComputeShaderDesc cs, Vec3I32 threads) +{ + if (threads.x > 0 && threads.y > 0 && threads.z > 0) + { + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + G_D12_Cmd *cmd = G_D12_PushCmd(cl); + cmd->kind = G_D12_CmdKind_Compute; + cmd->compute.cs = cs; + cmd->compute.groups = G_GroupCountFromThreadCount(cs, threads); + } +} + +//- Rasterize + +void G_Rasterize( + G_CommandListHandle cl_handle, + VertexShaderDesc vs, PixelShaderDesc ps, + u32 instances_count, G_IndexBufferDesc index_buffer, + u32 render_targets_count, G_RenderTargetDesc *render_targets, + Rng3 viewport, Rng2 scissor, + G_RasterMode raster_mode +) +{ + if (instances_count > 0 && index_buffer.count > 0) + { + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + G_D12_Cmd *cmd = G_D12_PushCmd(cl); + cmd->kind = G_D12_CmdKind_Rasterize; + cmd->rasterize.vs = vs; + cmd->rasterize.ps = ps; + cmd->rasterize.instances_count = instances_count; + cmd->rasterize.index_buffer_desc = index_buffer; + for (u32 rt_idx = 0; rt_idx < MinU32(render_targets_count, G_MaxRenderTargets); ++rt_idx) + { + cmd->rasterize.render_target_descs[rt_idx] = render_targets[rt_idx]; + } + cmd->rasterize.viewport = viewport; + cmd->rasterize.scissor = scissor; + cmd->rasterize.raster_mode = raster_mode; + } +} + +//- Clear + +void G_ClearRenderTarget(G_CommandListHandle cl_handle, G_ResourceHandle resource_handle, Vec4 color, i32 mip) +{ + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + G_D12_Cmd *cmd = G_D12_PushCmd(cl); + cmd->kind = G_D12_CmdKind_ClearRtv; + cmd->clear_rtv.render_target = G_D12_ResourceFromHandle(resource_handle); + cmd->clear_rtv.color = color; + cmd->clear_rtv.mip = mip; +} + +//- Log + +void G_LogResource(G_CommandListHandle cl_handle, G_ResourceHandle resource_handle) +{ + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + G_D12_Cmd *cmd = G_D12_PushCmd(cl); + cmd->kind = G_D12_CmdKind_Log; + cmd->log.resource = G_D12_ResourceFromHandle(resource_handle); +} + +//////////////////////////////////////////////////////////// +//~ @hookimpl Queue synchronization + +i64 G_CompletionValueFromQueue(G_QueueKind queue_kind) +{ + G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); + return ID3D12Fence_GetCompletedValue(queue->commit_fence); +} + +i64 G_CompletionTargetFromQueue(G_QueueKind queue_kind) +{ + G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); + i64 target = 0; + { + Lock lock = LockS(&queue->commit_mutex); + target = queue->commit_fence_target; + Unlock(&lock); + } + return target; +} + +G_QueueCompletions G_CompletionValuesFromQueues(G_QueueMask queue_mask) +{ + G_QueueCompletions completions = Zi; + for (G_QueueKind queue_kind = 0; queue_kind < G_QueueKind_COUNT; ++queue_kind) + { + if (queue_mask & (1 << queue_kind)) + { + completions.v[queue_kind] = G_CompletionValueFromQueue(queue_kind); + } + } + return completions; +} + +G_QueueCompletions G_CompletionTargetsFromQueues(G_QueueMask queue_mask) +{ + G_QueueCompletions completions = Zi; + for (G_QueueKind queue_kind = 0; queue_kind < G_QueueKind_COUNT; ++queue_kind) + { + if (queue_mask & (1 << queue_kind)) + { + completions.v[queue_kind] = G_CompletionTargetFromQueue(queue_kind); + } + } + return completions; +} + +void G_QueueSyncEx(G_QueueBarrierDesc desc) +{ + u64 fences_count = 0; + ID3D12Fence *fences[G_QueueKind_COUNT] = Zi; + i64 fence_targets[G_QueueKind_COUNT] = Zi; + + // Grab fences + for (G_QueueKind completion_queue_kind = 0; completion_queue_kind < G_QueueKind_COUNT; ++ completion_queue_kind) + { + G_D12_Queue *completion_queue = G_D12_QueueFromKind(completion_queue_kind); + i64 target = desc.completions.v[completion_queue_kind]; + if (target > 0) + { + i64 fence_value = ID3D12Fence_GetCompletedValue(completion_queue->commit_fence); + if (fence_value < target) + { + fences[fences_count] = completion_queue->commit_fence; + fence_targets[fences_count] = target; + fences_count += 1; + } + } + } + + // Sync Queues + for (G_QueueKind waiter_queue_kind = 0; waiter_queue_kind < G_QueueKind_COUNT; ++ waiter_queue_kind) + { + if (desc.wait_queues & (1 << waiter_queue_kind)) + { + G_D12_Queue *waiter_queue = G_D12_QueueFromKind(waiter_queue_kind); + for (u64 fence_idx = 0; fence_idx < fences_count; ++fence_idx) + { + ID3D12Fence *fence = fences[fence_idx]; + if (waiter_queue->commit_fence != fence) + { + i64 target = fence_targets[fence_idx]; + ID3D12CommandQueue_Wait(waiter_queue->d3d_queue, fence, target); + } + } + } + } + + // Sync Cpu + if (desc.wait_cpu && fences_count > 0) + { + if (G_D12_tl.sync_event == 0) + { + G_D12_tl.sync_event = CreateEvent(0, 0, 0, 0); + } + ID3D12Device1_SetEventOnMultipleFenceCompletion( + G_D12.device, + fences, + (u64 *)fence_targets, + fences_count, + D3D12_MULTIPLE_FENCE_WAIT_FLAG_ALL, + G_D12_tl.sync_event + ); + WaitForSingleObject(G_D12_tl.sync_event, INFINITE); + } +} + +//////////////////////////////////////////////////////////// +//~ @hookimpl Statistics + +G_Stats G_QueryStats(void) +{ + G_Stats result = Zi; + { + DXGI_QUERY_VIDEO_MEMORY_INFO info = Zi; + IDXGIAdapter3_QueryVideoMemoryInfo(G_D12.dxgi_adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info); + result.device_committed = info.CurrentUsage; + result.device_budget = info.Budget; + } + { + DXGI_QUERY_VIDEO_MEMORY_INFO info = Zi; + IDXGIAdapter3_QueryVideoMemoryInfo(G_D12.dxgi_adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &info); + result.host_budget = info.Budget; + result.host_committed = info.CurrentUsage; + } + result.arenas_count = Atomic64Fetch(&G_D12.arenas_count); + result.cumulative_nonreuse_count = Atomic64Fetch(&G_D12.cumulative_nonreuse_count); + return result; +} + +//////////////////////////////////////////////////////////// +//~ @hookimpl Swapchain + +G_SwapchainHandle G_AcquireSwapchain(u64 os_window_handle) +{ + G_D12_Swapchain *swapchain = 0; + { + Arena *perm = PermArena(); + swapchain = PushStruct(perm, G_D12_Swapchain); + } + swapchain->window_hwnd = (HWND)os_window_handle; + return G_D12_MakeHandle(G_SwapchainHandle, swapchain); +} + +void G_ReleaseSwapchain(G_SwapchainHandle swapchain_handle) +{ + // TODO +} + +G_ResourceHandle G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Format format, Vec2I32 size) +{ + G_D12_Swapchain *swapchain = G_D12_SwapchainFromHandle(swapchain_handle); + size = VEC2I32(MaxI32(size.x, 1), MaxI32(size.y, 1)); + G_D12_Queue *direct_queue = G_D12_QueueFromKind(G_QueueKind_Direct); + + // Initialize swapchain + if (!swapchain->d3d_swapchain) + { + HRESULT hr = 0; + + // Create d3d swapchain + { + IDXGISwapChain3 *swapchain3 = 0; + { + // Create swapchain1 + IDXGISwapChain1 *swapchain1 = 0; + if (SUCCEEDED(hr)) + { + DXGI_SWAP_CHAIN_DESC1 desc = Zi; + desc.Format = G_D12_DxgiFormatFromGpuFormat(format); + desc.Width = size.x; + desc.Height = size.y; + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + desc.BufferCount = G_D12_SwapchainBufferCount; + desc.Scaling = DXGI_SCALING_NONE; + desc.Flags = G_D12_SwapchainFlags; + desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE; + desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + hr = IDXGIFactory2_CreateSwapChainForHwnd(G_D12.dxgi_factory, (IUnknown *)direct_queue->d3d_queue, swapchain->window_hwnd, &desc, 0, 0, &swapchain1); + } + + // Upgrade to swapchain3 + if (SUCCEEDED(hr)) + { + hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain3, (void **)&swapchain3); + IDXGISwapChain1_Release(swapchain1); + } + } + swapchain->d3d_swapchain = swapchain3; + swapchain->backbuffers_format = format; + swapchain->backbuffers_resolution = size; + } + + // Create waitable object + { + HANDLE waitable = 0; + if (SUCCEEDED(hr) && G_D12_FrameLatency > 0) + { + hr = IDXGISwapChain3_SetMaximumFrameLatency(swapchain->d3d_swapchain, G_D12_FrameLatency); + waitable = IDXGISwapChain2_GetFrameLatencyWaitableObject(swapchain->d3d_swapchain); + } + swapchain->waitable = waitable; + } + + // Create present fence + { + HANDLE present_event = 0; + ID3D12Fence *present_fence = 0; + if (SUCCEEDED(hr)) + { + present_event = CreateEvent(0, 0, 0, 0); + hr = ID3D12Device_CreateFence(G_D12.device, 0, 0, &IID_ID3D12Fence, (void **)&present_fence); + } + swapchain->present_fence = present_fence; + swapchain->present_event = present_event; + } + + // Disable Alt+Enter + IDXGIFactory_MakeWindowAssociation(G_D12.dxgi_factory, swapchain->window_hwnd, DXGI_MWA_NO_ALT_ENTER); + + if (FAILED(hr)) + { + Panic(Lit("Failed to create swapchain")); + } + } + + // Resize backbuffers + if (!MatchVec2I32(swapchain->backbuffers_resolution, size) || swapchain->backbuffers_format != format) + { + HRESULT hr = 0; + + // Wait for any previous backbuffer commands to finish + { + ID3D12Fence_SetEventOnCompletion(swapchain->present_fence, swapchain->present_fence_target, swapchain->present_event); + WaitForSingleObject(swapchain->present_event, INFINITE); + } + + // Release backbuffers + for (u32 i = 0; i < countof(swapchain->backbuffers); ++i) + { + G_D12_Resource *backbuffer = &swapchain->backbuffers[i]; + if (backbuffer->d3d_resource) + { + ID3D12Resource_Release(backbuffer->d3d_resource); + backbuffer->d3d_resource = 0; + } + } + + // Resize buffers + hr = IDXGISwapChain_ResizeBuffers(swapchain->d3d_swapchain, 0, size.x, size.y, DXGI_FORMAT_UNKNOWN, G_D12_SwapchainFlags); + if (FAILED(hr)) + { + // TODO: Don't panic + Panic(Lit("Failed to resize swapchain")); + } + } + + // Initialize backbuffers + { + for (u32 i = 0; i < countof(swapchain->backbuffers); ++i) + { + G_D12_Resource *backbuffer = &swapchain->backbuffers[i]; + if (!backbuffer->d3d_resource) + { + ID3D12Resource *d3d_resource = 0; + HRESULT hr = IDXGISwapChain3_GetBuffer(swapchain->d3d_swapchain, i, &IID_ID3D12Resource, (void **)&d3d_resource); + if (FAILED(hr)) + { + // TODO: Don't panic + Panic(Lit("Failed to retrieve swapchain buffer")); + } + ZeroStruct(backbuffer); + backbuffer->flags = G_ResourceFlag_AllowRenderTarget; + backbuffer->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, 1) + 1; + + ID3D12Resource_GetDesc(d3d_resource, (D3D12_RESOURCE_DESC *)&backbuffer->d3d_desc); + backbuffer->d3d_resource = d3d_resource; + + backbuffer->is_texture = 1; + backbuffer->texture_format = format; + backbuffer->texture_dims = VEC3I32(size.x, size.y, 1); + backbuffer->texture_mips = 1; + backbuffer->cmdlist_texture_layouts[0] = D3D12_BARRIER_LAYOUT_PRESENT; + backbuffer->swapchain = swapchain; + } + } + swapchain->backbuffers_format = format; + swapchain->backbuffers_resolution = size; + } + + // Wait for available backbuffer + if (swapchain->waitable) + { + DWORD wait_result = WaitForSingleObject(swapchain->waitable, 500); + if (wait_result == WAIT_TIMEOUT) + { + ID3D12Fence_SetEventOnCompletion(swapchain->present_fence, swapchain->present_fence_target, swapchain->present_event); + WaitForSingleObject(swapchain->present_event, INFINITE); + } + } + + // Grab current backbuffer + G_D12_Resource *cur_backbuffer = 0; + { + u32 backbuffer_idx = IDXGISwapChain3_GetCurrentBackBufferIndex(swapchain->d3d_swapchain); + cur_backbuffer = &swapchain->backbuffers[backbuffer_idx]; + } + + return G_D12_MakeHandle(G_ResourceHandle, cur_backbuffer); +} + +void G_CommitBackbuffer(G_ResourceHandle backbuffer_handle, i32 vsync) +{ + G_D12_Resource *backbuffer = G_D12_ResourceFromHandle(backbuffer_handle); + G_D12_Swapchain *swapchain = backbuffer->swapchain; + G_D12_Queue *direct_queue = G_D12_QueueFromKind(G_QueueKind_Direct); + + u32 present_flags = 0; + if (G_D12_TearingIsAllowed && vsync == 0) + { + present_flags |= DXGI_PRESENT_ALLOW_TEARING; + } + + // Present + { + HRESULT hr = IDXGISwapChain3_Present(swapchain->d3d_swapchain, vsync, present_flags); + if (!SUCCEEDED(hr)) + { + Assert(0); + } + } + + if (vsync != 0 && !(present_flags & DXGI_PRESENT_ALLOW_TEARING)) + { + // FIXME: Flush in windowed mode? + // DwmFlush(); + } + + // Increment swapchain fence + { + u64 target = ++swapchain->present_fence_target; + ID3D12CommandQueue_Signal(direct_queue->d3d_queue, swapchain->present_fence, target); + } +} + +//////////////////////////////////////////////////////////// +//~ Collection worker + +// TODO: Move this to common + +void G_D12_CollectionWorkerEntryPoint(WaveLaneCtx *lane) +{ + for (;;) + { + // FIXME: Remove this + SleepSeconds(0.100); + + // Copy print-buffers to readback + for (G_QueueKind queue_kind = 0; queue_kind < G_QueueKind_COUNT; ++queue_kind) + { + G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); + if (!G_IsResourceNil(queue->print_buffer)) + { + G_CommandListHandle cl = G_PrepareCommandList(queue_kind); + { + // Copy print buffer to readback buffer + G_CopyBufferToBuffer(cl, queue->print_readback_buffer, 0, queue->print_buffer, RNGU64(0, queue->print_buffer_size)); + // Reset counters to 0 + G_MemorySync( + cl, queue->print_buffer, + G_Stage_Copy, G_Access_CopyRead, + G_Stage_Copy, G_Access_CopyWrite + ); + u8 zero[12] = Zi; + G_CopyCpuToBuffer(cl, queue->print_buffer, 0, zero, RNGU64(0, sizeof(zero))); + } + G_CommitCommandList(cl); + } + } + + // TODO: Collect asynchronously + G_QueueSyncCpu(G_QueueMask_Direct | G_QueueMask_AsyncCompute); + + for (G_QueueKind queue_kind = 0; queue_kind < G_QueueKind_COUNT; ++queue_kind) + { + G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); + if (!G_IsResourceNil(queue->print_buffer)) + { + u32 attempted_print_bytes_count = *(G_StructFromResource(queue->print_readback_buffer, u32) + 0); // The number of bytes shaders attempted to write + u32 prints_count = *(G_StructFromResource(queue->print_readback_buffer, u32) + 1); // The number of shader prints that are in the buffer + u32 overflows_count = *(G_StructFromResource(queue->print_readback_buffer, u32) + 2); // The number of shader prints that could not fit in the buffer + u8 *start = G_StructFromResource(queue->print_readback_buffer, u8) + 12; + + // Deserialize + if (GPU_SHADER_PRINT_LOG) + { + if (prints_count > 0) + { + LogDebugF( + "Forwarding logs collected from GPU - Resident prints: %F, Total attempted prints: %F, Total attempted bytes: %F", + FmtUint(prints_count), + FmtUint(prints_count + overflows_count), + FmtUint(attempted_print_bytes_count) + ); + } + + // FIXME: Remove this + TempArena scratch = BeginScratchNoConflict(); + u8 *at = start; + { + for (u32 print_idx = 0; print_idx < prints_count; ++print_idx) + { + u32 chars_count = 0; + u32 args_count = 0; + b32 internal_overflow = 0; + { + u32 header = *(u32 *)at; + chars_count = (header & 0x0000FFFF) >> 0; + args_count = (header & 0x7FFF0000) >> 16; + internal_overflow = (header & 0xF0000000) >> 31; + at += 4; + } + + String fmt = Zi; + { + fmt.len = chars_count; + fmt.text = at; + at += chars_count; + } + + FmtArgArray args = Zi; + args.count = args_count; + { + if (args_count > 0) + { + args.args = PushStructs(scratch.arena, FmtArg, args_count); + for (u32 arg_idx = 0; arg_idx < args_count; ++arg_idx) + { + G_FmtArgKind gpu_kind = (G_FmtArgKind)(*at); + at += 1; + + FmtArg *dst = &args.args[arg_idx]; + switch (gpu_kind) + { + // Translate unsigned integer args + case G_FmtArgKind_Uint: + { + u32 gpu_value = *(u32 *)at; + *dst = FmtUint(gpu_value); + at += 4; + } break; + case G_FmtArgKind_Uint2: + { + Vec2U32 gpu_value = *(Vec2U32 *)at; + *dst = FmtUint2(gpu_value); + at += 8; + } break; + case G_FmtArgKind_Uint3: + { + Vec3U32 gpu_value = *(Vec3U32 *)at; + *dst = FmtUint3(gpu_value); + at += 12; + } break; + case G_FmtArgKind_Uint4: + { + Vec4U32 gpu_value = *(Vec4U32 *)at; + *dst = FmtUint4(gpu_value); + at += 16; + } break; + + // Translate signed integer args + case G_FmtArgKind_Sint: + { + i32 gpu_value = *(i32 *)at; + *dst = FmtSint(gpu_value); + at += 4; + } break; + case G_FmtArgKind_Sint2: + { + Vec2I32 gpu_value = *(Vec2I32 *)at; + *dst = FmtSint2(gpu_value); + at += 8; + } break; + case G_FmtArgKind_Sint3: + { + Vec3I32 gpu_value = *(Vec3I32 *)at; + *dst = FmtSint3(gpu_value); + at += 12; + } break; + case G_FmtArgKind_Sint4: + { + Vec4I32 gpu_value = *(Vec4I32 *)at; + *dst = FmtSint4(gpu_value); + at += 16; + } break; + + // Translate float args + case G_FmtArgKind_Float: + { + f32 gpu_value = *(f32 *)at; + *dst = FmtFloat(gpu_value); + at += 4; + } break; + case G_FmtArgKind_Float2: + { + Vec2 gpu_value = *(Vec2 *)at; + *dst = FmtFloat2(gpu_value); + at += 8; + } break; + case G_FmtArgKind_Float3: + { + Vec3 gpu_value = *(Vec3 *)at; + *dst = FmtFloat3(gpu_value); + at += 12; + } break; + case G_FmtArgKind_Float4: + { + Vec4 gpu_value = *(Vec4 *)at; + *dst = FmtFloat4(gpu_value); + at += 16; + } break; + } + dst->p = 16; + } + } + } + + String final_str = Zi; + if (internal_overflow) + { + final_str = Lit("[Shader PrintF is too large]"); + } + else + { + final_str = FormatString(scratch.arena, fmt, args); + } + LogDebug(final_str); + + at = (u8 *)AlignU64((u64)at, 4); + } + } + EndScratch(scratch); + } + } + } + } +} + +//////////////////////////////////////////////////////////// +//~ Async + +void G_D12_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame) +{ + G_D12_AsyncCtx *async = &G_D12.async_ctx; + Arena *frame_arena = base_async_lane_frame->arena; + + // TODO: Investigate if we gain anything by going wide here (resource release might be exclusive driver-side) + if (lane->idx == 0) + { + // Pop pending releases + { + Lock lock = LockE(&G_D12.pending_releases_mutex); + { + if (G_D12.pending_releases.first) + { + if (async->pending_releases.last) + { + async->pending_releases.last->next = G_D12.pending_releases.first; + } + else + { + async->pending_releases.first = G_D12.pending_releases.first; + } + async->pending_releases.last = G_D12.pending_releases.last; + G_D12.pending_releases.first = 0; + G_D12.pending_releases.last = 0; + } + } + Unlock(&lock); + } + + // Release resources until we reach an uncompleted one + G_D12_Releasable *release = async->pending_releases.first; + if (release) + { + G_QueueCompletions completions = G_CompletionValuesFromQueues(G_QueueMask_All); + while (release) + { + G_D12_Releasable *next = release->next; + if (completions.v[release->completion_queue_kind] >= release->completion_queue_target) + { + SllQueuePop(async->pending_releases.first, async->pending_releases.last); + if (release->d3d_resource) + { + ID3D12Resource_Release(release->d3d_resource); + } + SllQueuePush(async->free_releases.first, async->free_releases.last, release); + } + else + { + break; + } + release = next; + } + } + + // Push releasable nodes to free list + if (async->pending_releases.first) + { + Lock lock = LockE(&G_D12.free_releases_mutex); + { + if (G_D12.free_releases.last) + { + G_D12.free_releases.last->next = async->free_releases.first; + } + else + { + G_D12.free_releases.first = async->free_releases.first; + } + G_D12.free_releases.last = async->free_releases.last; + async->free_releases.first = 0; + async->free_releases.last = 0; + } + Unlock(&lock); + } + } +} diff --git a/src/gpu_old/gpu_dx12/gpu_dx12_core.h b/src/gpu_old/gpu_dx12/gpu_dx12_core.h new file mode 100644 index 00000000..b2af8fb9 --- /dev/null +++ b/src/gpu_old/gpu_dx12/gpu_dx12_core.h @@ -0,0 +1,577 @@ +//////////////////////////////////////////////////////////// +//~ DirectX12 libs + +#pragma warning(push, 0) + #include + #include + #include +#pragma warning(pop) + +#pragma comment(lib, "d3d12") +#pragma comment(lib, "dxgi") + +//////////////////////////////////////////////////////////// +//~ Tweakable definitions + +#define G_D12_TearingIsAllowed 1 +#define G_D12_FrameLatency 1 +#define G_D12_SwapchainBufferCount 2 +#define G_D12_SwapchainFlags ( \ + ((G_D12_TearingIsAllowed != 0) * DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) | \ + ((G_D12_FrameLatency != 0) * DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT) \ + ) + +#define G_D12_MaxCbvSrvUavDescriptors (1024 * 128) +#define G_D12_MaxSamplerDescriptors (1024 * 1) +#define G_D12_MaxRtvDescriptors (1024 * 64) + +#define G_D12_MaxMips 16 +#define G_D12_MaxNameLen 64 + +//////////////////////////////////////////////////////////// +//~ Pipeline types + +// NOTE: Must be zero initialized (including padding bits) for hashing +Struct(G_D12_PipelineDesc) +{ + VertexShaderDesc vs; + PixelShaderDesc ps; + ComputeShaderDesc cs; + b32 is_wireframe; + D3D12_PRIMITIVE_TOPOLOGY_TYPE topology_type; + G_Format render_target_formats[G_MaxRenderTargets]; + G_BlendMode render_target_blend_modes[G_MaxRenderTargets]; +}; + +Struct(G_D12_Pipeline) +{ + G_D12_Pipeline *next_in_bin; + u64 hash; + + G_D12_PipelineDesc desc; + ID3D12PipelineState *pso; + + b32 ok; + String error; +}; + +Struct(G_D12_PipelineBin) +{ + Mutex mutex; + G_D12_Pipeline *first; +}; + +//////////////////////////////////////////////////////////// +//~ Resource types + +Struct(G_D12_Resource) +{ + G_D12_Resource *next; + G_D12_Resource *prev; + + G_ResourceFlag flags; + u64 uid; + + // D3D12 resource + D3D12_RESOURCE_DESC1 d3d_desc; + ID3D12Resource *d3d_resource; + D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address; + void *mapped; + + // Buffer info + u64 buffer_size; + u64 buffer_size_actual; + + // Texture info + b32 is_texture; + G_Format texture_format; + Vec3I32 texture_dims; + i32 texture_mips; + D3D12_BARRIER_LAYOUT cmdlist_texture_layouts[G_D12_MaxMips]; + + // Sampler info + G_SamplerDesc sampler_desc; + + // Backbuffer info + struct G_D12_Swapchain *swapchain; + + u64 name_len; + u8 name_text[G_D12_MaxNameLen]; +}; + +Struct(G_D12_ResourceList) +{ + u64 count; + G_D12_Resource *first; + G_D12_Resource *last; +}; + +//////////////////////////////////////////////////////////// +//~ Descriptor types + +Enum(G_D12_DescriptorHeapKind) +{ + G_D12_DescriptorHeapKind_CbvSrvUav, + G_D12_DescriptorHeapKind_Rtv, + G_D12_DescriptorHeapKind_Sampler, + + G_D12_DescriptorHeapKind_COUNT +}; + +Struct(G_D12_DescriptorHeap) +{ + Arena *descriptors_arena; + G_D12_DescriptorHeapKind kind; + + D3D12_DESCRIPTOR_HEAP_TYPE type; + u32 per_batch_count; + u32 descriptor_size; + ID3D12DescriptorHeap *d3d_heap; + D3D12_CPU_DESCRIPTOR_HANDLE start_handle; + + Mutex mutex; + struct G_D12_Descriptor *first_free; + u32 max_count; +}; + +Struct(G_D12_Descriptor) +{ + G_D12_Descriptor *next; + G_D12_Descriptor *prev; + + struct G_D12_Arena *gpu_arena; + G_QueueKind completion_queue_kind; + i64 completion_queue_target; + + G_D12_DescriptorHeap *heap; + D3D12_CPU_DESCRIPTOR_HANDLE first_handle; + u32 index; +}; + +Struct(G_D12_DescriptorList) +{ + u64 count; + G_D12_Descriptor *first; + G_D12_Descriptor *last; +}; + +//////////////////////////////////////////////////////////// +//~ Arena types + +// TODO: +// To support D3D12_RESOURCE_HEAP_TIER_1 devices, create separate heaps for: +// - Buffers +// - Non-render target & non-depth stencil textures +// - Render target or depth stencil textures +Enum(G_D12_ResourceHeapKind) +{ + G_D12_ResourceHeapKind_Gpu, + G_D12_ResourceHeapKind_Cpu, + G_D12_ResourceHeapKind_CpuWriteCombined, + + G_D12_ResourceHeapKind_COUNT +}; + +Struct(G_D12_Arena) +{ + Arena *arena; + + G_D12_DescriptorList descriptors; + G_D12_DescriptorList reset_descriptors_by_heap[G_D12_DescriptorHeapKind_COUNT]; + + G_D12_ResourceList resources; + G_D12_ResourceList reset_resources; + // G_D12_ResourceList free_resources; +}; + +//////////////////////////////////////////////////////////// +//~ Staging types + +Struct(G_D12_StagingRing) +{ + Arena *arena; + G_D12_Arena *gpu_arena; + u64 size; + + G_D12_Resource *resource; + u8 *base; + + struct G_D12_StagingRegionNode *head_region_node; + struct G_D12_StagingRegionNode *first_free_region_node; + +}; + +Struct(G_D12_StagingRegionNode) +{ + G_D12_StagingRing *ring; + + // Ring links (requires ring lock to read) + G_D12_StagingRegionNode *prev; + G_D12_StagingRegionNode *next; + + // Command list links + G_D12_StagingRegionNode *next_in_command_list; + + // Region info + Atomic64 completion_target; + u64 pos; +}; + +//////////////////////////////////////////////////////////// +//~ Command queue types + +Struct(G_D12_CommandQueueDesc) +{ + D3D12_COMMAND_LIST_TYPE type; + D3D12_COMMAND_QUEUE_PRIORITY priority; + String name; +}; + +Struct(G_D12_Queue) +{ + ID3D12CommandQueue *d3d_queue; + G_D12_CommandQueueDesc desc; + + Mutex commit_mutex; + ID3D12Fence *commit_fence; + u64 commit_fence_target; + + // Global resources + u64 print_buffer_size; + G_ResourceHandle print_buffer; + G_ResourceHandle print_readback_buffer; + G_ByteAddressBufferRef print_buffer_ref; + + // Raw command lists + struct G_D12_RawCommandList *first_committed_cl; + struct G_D12_RawCommandList *last_committed_cl; + + // Staging heap + Mutex staging_mutex; + G_D12_StagingRing *staging_ring; + + Fence sync_fence; +}; + +//////////////////////////////////////////////////////////// +//~ Raw command list types + +Struct(G_D12_RawCommandList) +{ + G_D12_Queue *queue; + G_D12_RawCommandList *next; + + u64 commit_fence_target; + + ID3D12CommandAllocator *d3d_ca; + ID3D12GraphicsCommandList7 *d3d_cl; + + // Direct queue command lists keep a constant list of CPU-only descriptors + G_D12_Descriptor *rtv_descriptors[G_MaxRenderTargets]; + G_D12_Descriptor *rtv_clear_descriptor; +}; + +//////////////////////////////////////////////////////////// +//~ Releasable types + +Struct(G_D12_Releasable) +{ + G_D12_Releasable *next; + + G_QueueKind completion_queue_kind; + i64 completion_queue_target; + + ID3D12Resource *d3d_resource; + + u64 name_len; + u8 name_text[G_D12_MaxNameLen]; +}; + +Struct(G_D12_ReleasableList) +{ + G_D12_Releasable *first; + G_D12_Releasable *last; +}; + +//////////////////////////////////////////////////////////// +//~ Command list types + +#define G_D12_CmdsPerChunk 256 + +Enum(G_D12_CmdKind) +{ + G_D12_CmdKind_None, + G_D12_CmdKind_Barrier, + G_D12_CmdKind_Constant, + G_D12_CmdKind_CopyBytes, + G_D12_CmdKind_CopyTexels, + G_D12_CmdKind_Compute, + G_D12_CmdKind_Rasterize, + G_D12_CmdKind_ClearRtv, + G_D12_CmdKind_Log, + G_D12_CmdKind_Discard, +}; + +Struct(G_D12_Cmd) +{ + G_D12_CmdKind kind; + b32 skip; + union + { + struct + { + i32 slot; + u32 value; + } constant; + + struct + { + G_MemoryBarrierDesc desc; + + // Post-batch data + b32 is_end_of_batch; + u64 batch_gen; + } barrier; + + struct + { + G_D12_Resource *dst; + G_D12_Resource *src; + u64 dst_offset; + RngU64 src_range; + } copy_bytes; + + struct + { + G_D12_Resource *dst; + G_D12_Resource *src; + D3D12_TEXTURE_COPY_LOCATION dst_loc; + D3D12_TEXTURE_COPY_LOCATION src_loc; + Vec3I32 dst_texture_offset; + Rng3I32 src_texture_range; + } copy_texels; + + struct + { + ComputeShaderDesc cs; + Vec3I32 groups; + } compute; + + struct + { + VertexShaderDesc vs; + PixelShaderDesc ps; + u32 instances_count; + G_IndexBufferDesc index_buffer_desc; + G_RenderTargetDesc render_target_descs[G_MaxRenderTargets]; + Rng3 viewport; + Rng2 scissor; + G_RasterMode raster_mode; + } rasterize; + + struct + { + G_D12_Resource *render_target; + Vec4 color; + i32 mip; + } clear_rtv; + + struct + { + G_D12_Resource *resource; + } log; + + struct + { + G_D12_Resource *resource; + } discard; + }; +}; + +Struct(G_D12_CmdChunk) +{ + G_D12_CmdChunk *next; + struct G_D12_CmdList *cl; + G_D12_Cmd *cmds; + u64 cmds_count; +}; + +Struct(G_D12_CmdList) +{ + G_D12_CmdList *next; + G_QueueKind queue_kind; + + G_D12_DescriptorList reset_descriptors; + G_D12_ReleasableList releases; + + G_D12_StagingRegionNode *first_staging_region; + G_D12_StagingRegionNode *last_staging_region; + + G_D12_CmdChunk *first_cmd_chunk; + G_D12_CmdChunk *last_cmd_chunk; + u64 chunks_count; + u64 cmds_count; +}; + +//////////////////////////////////////////////////////////// +//~ Swapchain types + +Struct(G_D12_Swapchain) +{ + + IDXGISwapChain3 *d3d_swapchain; + HWND window_hwnd; + HANDLE waitable; + + HANDLE present_event; + ID3D12Fence *present_fence; + u64 present_fence_target; + + G_Format backbuffers_format; + Vec2I32 backbuffers_resolution; + G_D12_Resource backbuffers[G_D12_SwapchainBufferCount]; +}; + +//////////////////////////////////////////////////////////// +//~ State types + +Struct(G_D12_AsyncCtx) +{ + G_D12_ReleasableList pending_releases; + G_D12_ReleasableList free_releases; +}; + +Struct(G_D12_Ctx) +{ + IsolatedAtomic64 resource_creation_gen; + b32 independent_devices_enabled; + b32 debug_layer_enabled; + b32 validation_layer_enabled; + + // Stats + Atomic64 arenas_count; + Atomic64 cumulative_nonreuse_count; + + Atomic64 driver_resources_allocated; + Atomic64 driver_descriptors_allocated; + + // Queues + G_D12_Queue queues[G_QueueKind_COUNT]; + + // Descriptor heaps + G_D12_DescriptorHeap descriptor_heaps[G_D12_DescriptorHeapKind_COUNT]; + + // Rootsig + ID3D12RootSignature *bindless_rootsig; + + // Pipelines + G_D12_PipelineBin pipeline_bins[1024]; + + // Command lists + Mutex free_cmd_lists_mutex; + G_D12_CmdList *first_free_cmd_list; + + // Command chunks + Mutex free_cmd_chunks_mutex; + G_D12_CmdChunk *first_free_cmd_chunk; + + // Swapchains + Mutex free_swapchains_mutex; + G_D12_Swapchain *first_free_swapchain; + + // Independent device (only valid when independent_devices_enabled = 1) + struct + { + ID3D12SDKConfiguration1 *sdk_config; + ID3D12DeviceConfiguration *device_config; + ID3D12DeviceFactory *device_factory; + } independent; + + // Device + IDXGIFactory6 *dxgi_factory; + IDXGIAdapter3 *dxgi_adapter; + ID3D12Device10 *device; + + // Release-queue + Mutex pending_releases_mutex; + Mutex free_releases_mutex; + G_D12_ReleasableList pending_releases; + G_D12_ReleasableList free_releases; + + // Async + G_D12_AsyncCtx async_ctx; +}; + +Struct(G_D12_ThreadLocalCtx) +{ + HANDLE sync_event; +}; + +extern G_D12_Ctx G_D12; +extern ThreadLocal G_D12_ThreadLocalCtx G_D12_tl; + +//////////////////////////////////////////////////////////// +//~ Helpers + +#define G_D12_MakeHandle(type, ptr) (type) { .v = (u64)(ptr) } + +G_D12_Arena *G_D12_ArenaFromHandle(G_ArenaHandle handle); +G_D12_CmdList *G_D12_CmdListFromHandle(G_CommandListHandle handle); +G_D12_Resource *G_D12_ResourceFromHandle(G_ResourceHandle handle); +G_D12_Swapchain *G_D12_SwapchainFromHandle(G_SwapchainHandle handle); + +DXGI_FORMAT G_D12_DxgiFormatFromGpuFormat(G_Format format); +D3D12_BARRIER_SYNC G_D12_BarrierSyncFromStages(G_Stage stages); +D3D12_BARRIER_ACCESS G_D12_BarrierAccessFromAccesses(G_Access accesses); +D3D12_BARRIER_LAYOUT G_D12_BarrierLayoutFromLayout(G_Layout layout); +String G_D12_NameFromBarrierLayout(D3D12_BARRIER_LAYOUT layout); + +void G_D12_InitRtv(G_D12_Resource *resource, D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle, i32 mip); + +void G_D12_SetObjectName(ID3D12Object *object, String name); +String G_D12_NameFromObject(Arena *arena, ID3D12Object *object); + +//////////////////////////////////////////////////////////// +//~ Pipeline + +G_D12_Pipeline *G_D12_PipelineFromDesc(G_D12_PipelineDesc desc); +u64 G_D12_HashFromPipelineDesc(G_D12_PipelineDesc desc); + +//////////////////////////////////////////////////////////// +//~ Queue + +G_D12_Queue *G_D12_QueueFromKind(G_QueueKind kind); + +//////////////////////////////////////////////////////////// +//~ Raw command list + +G_D12_RawCommandList *G_D12_PrepareRawCommandList(G_QueueKind queue_kind); +i64 G_D12_CommitRawCommandList(G_D12_RawCommandList *cl); + +//////////////////////////////////////////////////////////// +//~ Arena + +void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena); + +//////////////////////////////////////////////////////////// +//~ Descriptor + +G_D12_Descriptor *G_D12_DescriptorFromIndex(G_D12_DescriptorHeapKind heap_kind, u32 index); +G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorHeapKind heap_kind); + +//////////////////////////////////////////////////////////// +//~ Command helpers + +G_D12_Cmd *G_D12_PushCmd(G_D12_CmdList *cl); +G_D12_Cmd *G_D12_PushConstCmd(G_D12_CmdList *cl, i32 slot, void *v); +G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size); + +//////////////////////////////////////////////////////////// +//~ Collection worker + +void G_D12_CollectionWorkerEntryPoint(WaveLaneCtx *lane); + +//////////////////////////////////////////////////////////// +//~ Async + +void G_D12_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame); diff --git a/src/gpu_old/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/D3D12Core.dat b/src/gpu_old/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/D3D12Core.dat new file mode 100644 index 00000000..9cfa41c0 --- /dev/null +++ b/src/gpu_old/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/D3D12Core.dat @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55430c370d4f012ef7b2e7854fd194ed8abb2c94a537835be12bd38f9ff80e67 +size 1662796 diff --git a/src/gpu_old/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/d3d12SDKLayers.dat b/src/gpu_old/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/d3d12SDKLayers.dat new file mode 100644 index 00000000..222b2b8b --- /dev/null +++ b/src/gpu_old/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/d3d12SDKLayers.dat @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd64750d758873691414f705c2fdff08ffd25437f77198d3ee00c9040f48856a +size 1775662 diff --git a/src/gpu_old/gpu_res/noise_128x128x64_16.dat b/src/gpu_old/gpu_res/noise_128x128x64_16.dat new file mode 100644 index 00000000..bfbdffc9 --- /dev/null +++ b/src/gpu_old/gpu_res/noise_128x128x64_16.dat @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35c141664e6879a3a336816112a8fbabe72067d5dcdd57c130d836de6dda5b2e +size 2097152 diff --git a/src/gpu_old/gpu_shared.cgh b/src/gpu_old/gpu_shared.cgh new file mode 100644 index 00000000..c88f6d80 --- /dev/null +++ b/src/gpu_old/gpu_shared.cgh @@ -0,0 +1,333 @@ +//////////////////////////////////////////////////////////// +//~ Ref types + +Enum(G_RefKind) +{ + G_RefKind_StructuredBuffer, + G_RefKind_ByteAddressBuffer, + G_RefKind_Texture1D, + G_RefKind_Texture2D, + G_RefKind_Texture3D, + G_RefKind_SamplerState, +}; + +Struct(G_StructuredBufferRef) { u32 v; }; +Struct(G_ByteAddressBufferRef) { u32 v; }; +Struct(G_Texture1DRef) { u32 v; }; +Struct(G_Texture2DRef) { u32 v; }; +Struct(G_Texture3DRef) { u32 v; }; +Struct(G_SamplerStateRef) { u32 v; }; + +#define G_IsRefNil(r) ((r).v == 0) + +//////////////////////////////////////////////////////////// +//~ Constant types + +// +// D3D12 exposes 64 root constants and Vulkan exposes 32 push constants. +// Supposedly AMD hardware will start spilling constants once more than +// 12 are in use - https://gpuopen.com/learn/rdna-performance-guide/ +// +#define G_NumGeneralPurposeConstants (24) // Constants available for any usage +#define G_NumReservedConstants (4) // Constants reserved for internal usage by the GPU layer +#define G_NumConstants (G_NumGeneralPurposeConstants + G_NumReservedConstants) + +#if IsCpu + #define G_ForceDeclConstant(type, name, slot) \ + enum { name = slot }; \ + Struct(name##__shaderconstanttype) { type v; } + #define G_DeclConstant(type, name, slot) \ + StaticAssert(sizeof(type) <= 4); \ + StaticAssert(slot < G_NumGeneralPurposeConstants); \ + G_ForceDeclConstant(type, name, slot) +#else + #define G_ForceDeclConstant(type, name, slot) cbuffer name : register(b##slot) { type name; } + #define G_DeclConstant(type, name, slot) G_ForceDeclConstant(type, name, slot) +#endif + +//////////////////////////////////////////////////////////// +//~ Reserved constants + +// The constants declared below assume this configuration is accurate for slot usage +StaticAssert(G_NumGeneralPurposeConstants == 24); +StaticAssert(G_NumReservedConstants >= 3); + +G_ForceDeclConstant(G_ByteAddressBufferRef, G_ShaderConst_PrintBufferRef, 24); +G_ForceDeclConstant(b32, G_ShaderConst_TweakB32, 25); +G_ForceDeclConstant(f32, G_ShaderConst_TweakF32, 26); + +#if IsGpu + #define G_TweakBool G_ShaderConst_TweakB32 + #define G_TweakFloat G_ShaderConst_TweakF32 +#endif + +//////////////////////////////////////////////////////////// +//~ Basic samplers + +Enum(G_BasicSamplerKind) +{ + G_BasicSamplerKind_PointClamp, + G_BasicSamplerKind_PointWrap, + G_BasicSamplerKind_PointMirror, + G_BasicSamplerKind_BilinearClamp, + G_BasicSamplerKind_BilinearWrap, + G_BasicSamplerKind_BilinearMirror, + G_BasicSamplerKind_TrilinearClamp, + G_BasicSamplerKind_TrilinearWrap, + G_BasicSamplerKind_TrilinearMirror, + + G_BasicSamplerKind_COUNT +}; + +//////////////////////////////////////////////////////////// +//~ Resource dereference + +#if IsGpu + // NOTE: Uniform dereferencing is faster than Non-Uniform on AMD hardware + + //- Scalar/Uniform dereference + SamplerState G_SDeref(G_SamplerStateRef r) { return SamplerDescriptorHeap[r.v]; } + template StructuredBuffer G_SDeref(G_StructuredBufferRef r) { return ResourceDescriptorHeap[r.v]; } + ByteAddressBuffer G_SDeref(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[r.v]; } + template Texture1D G_SDeref(G_Texture1DRef r) { return ResourceDescriptorHeap[r.v]; } + template Texture2D G_SDeref(G_Texture2DRef r) { return ResourceDescriptorHeap[r.v]; } + template Texture3D G_SDeref(G_Texture3DRef r) { return ResourceDescriptorHeap[r.v]; } + template RWStructuredBuffer G_SDerefRW(G_StructuredBufferRef r) { return ResourceDescriptorHeap[r.v + 1]; } + RWByteAddressBuffer G_SDerefRW(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[r.v + 1]; } + template RWTexture1D G_SDerefRW(G_Texture1DRef r) { return ResourceDescriptorHeap[r.v + 1]; } + template RWTexture2D G_SDerefRW(G_Texture2DRef r) { return ResourceDescriptorHeap[r.v + 1]; } + template RWTexture3D G_SDerefRW(G_Texture3DRef r) { return ResourceDescriptorHeap[r.v + 1]; } + + //- Vector/Non-Uniform dereference + SamplerState G_VDeref(G_SamplerStateRef r) { return SamplerDescriptorHeap[NonUniformResourceIndex(r.v)]; } + template StructuredBuffer G_VDeref(G_StructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } + ByteAddressBuffer G_VDeref(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } + template Texture1D G_VDeref(G_Texture1DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } + template Texture2D G_VDeref(G_Texture2DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } + template Texture3D G_VDeref(G_Texture3DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } + template RWStructuredBuffer G_VDerefRW(G_StructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } + RWByteAddressBuffer G_VDerefRW(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } + template RWTexture1D G_VDerefRW(G_Texture1DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } + template RWTexture2D G_VDerefRW(G_Texture2DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } + template RWTexture3D G_VDerefRW(G_Texture3DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } +#endif + +//////////////////////////////////////////////////////////// +//~ Resource countof + +#define G_MaxMips 16 +#define G_MaxRenderTargets 8 + +#if IsGpu + template u32 countof(StructuredBuffer obj) { u32 result; obj.GetDimensions(result); return result; } + template u32 countof(RWStructuredBuffer obj) { u32 result; u32 stride; obj.GetDimensions(result, stride); return result; } + u32 countof(ByteAddressBuffer obj) { u32 result; obj.GetDimensions(result); return result; } + u32 countof(RWByteAddressBuffer obj) { u32 result; obj.GetDimensions(result); return result; } + template u32 countof(Texture1D obj) { u32 result; obj.GetDimensions(result); return result; } + template u32 countof(RWTexture1D obj) { u32 result; obj.GetDimensions(result); return result; } + template Vec2U32 countof(Texture2D obj) { Vec2U32 result; obj.GetDimensions(result.x, result.y); return result; } + template Vec2U32 countof(RWTexture2D obj) { Vec2U32 result; obj.GetDimensions(result.x, result.y); return result; } + template Vec3U32 countof(Texture3D obj) { Vec3U32 result; obj.GetDimensions(result.x, result.y, result.z); return result; } + template Vec3U32 countof(RWTexture3D obj) { Vec3U32 result; obj.GetDimensions(result.x, result.y, result.z); return result; } +#endif + +//////////////////////////////////////////////////////////// +//~ Debug printf + +// This technique is based on MJP's article - https://therealmjp.github.io/posts/hlsl-printf/ + +Enum(G_FmtArgKind) +{ + G_FmtArgKind_None, + G_FmtArgKind_End, + + G_FmtArgKind_BEGINSIZE1, + + G_FmtArgKind_Uint, + G_FmtArgKind_Sint, + G_FmtArgKind_Float, + + G_FmtArgKind_BEGINSIZE2, + + G_FmtArgKind_Uint2, + G_FmtArgKind_Sint2, + G_FmtArgKind_Float2, + + G_FmtArgKind_BEGINSIZE3, + + G_FmtArgKind_Uint3, + G_FmtArgKind_Sint3, + G_FmtArgKind_Float3, + + G_FmtArgKind_BEGINSIZE4, + + G_FmtArgKind_Uint4, + G_FmtArgKind_Sint4, + G_FmtArgKind_Float4, +}; + +Struct(G_FmtArg) +{ + G_FmtArgKind kind; + Vec4U32 v; +}; + +#if IsGpu && GPU_SHADER_PRINT + G_FmtArg G_Fmt(u32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Uint; result.v.x = v; return result; } + G_FmtArg G_Fmt(Vec2U32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Uint2; result.v.xy = v.xy; return result; } + G_FmtArg G_Fmt(Vec3U32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Uint3; result.v.xyz = v.xyz; return result; } + G_FmtArg G_Fmt(Vec4U32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Uint4; result.v.xyzw = v.xyzw; return result; } + + G_FmtArg G_Fmt(i32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Sint; result.v.x = v; return result; } + G_FmtArg G_Fmt(Vec2I32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Sint2; result.v.xy = v.xy; return result; } + G_FmtArg G_Fmt(Vec3I32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Sint3; result.v.xyz = v.xyz; return result; } + G_FmtArg G_Fmt(Vec4I32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Sint4; result.v.xyzw = v.xyzw; return result; } + + G_FmtArg G_Fmt(f32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Float; result.v.x = asuint(v); return result; } + G_FmtArg G_Fmt(Vec2 v) { G_FmtArg result; result.kind = G_FmtArgKind_Float2; result.v.xy = asuint(v.xy); return result; } + G_FmtArg G_Fmt(Vec3 v) { G_FmtArg result; result.kind = G_FmtArgKind_Float3; result.v.xyz = asuint(v.xyz); return result; } + G_FmtArg G_Fmt(Vec4 v) { G_FmtArg result; result.kind = G_FmtArgKind_Float4; result.v.xyzw = asuint(v.xyzw); return result; } + + G_FmtArg G_FmtEnd(void) { G_FmtArg result; result.kind = G_FmtArgKind_End; return result; } + + Struct(G_TempPrintBuffer) + { + // NOTE: The larger the array size, the longer the compilation time + u32 byte_chunks[64]; + u32 bytes_count; + u32 chars_count; + u32 args_count; + b32 overflowed; + }; + + void G_PushPrintByte(inout G_TempPrintBuffer buff, u32 v) + { + u32 chunk_idx = buff.bytes_count / 4; + if (chunk_idx < countof(buff.byte_chunks)) + { + u32 byte_idx_in_chunk = buff.bytes_count & 0x03; + if (byte_idx_in_chunk == 0) + { + // Since buff is not zero initialized, we set the chunk on first write here + buff.byte_chunks[chunk_idx] = v & 0xFF; + } + else + { + buff.byte_chunks[chunk_idx] |= (v & 0xFF) << (byte_idx_in_chunk * 8); + } + buff.bytes_count += 1; + } + else + { + buff.overflowed = 1; + } + } + + void G_CommitPrint(G_TempPrintBuffer buff) + { + RWByteAddressBuffer rw = G_SDerefRW(G_ShaderConst_PrintBufferRef); + + if (buff.overflowed) + { + buff.bytes_count = 0; + buff.chars_count = 0; + buff.args_count = 0; + } + + u32 chunks_count = (buff.bytes_count + 3) / 4; + u32 alloc_size = 0; + alloc_size += 4; // Header + alloc_size += chunks_count * 4; // Chunks + + // Atomic fetch + add to base counter + u32 base; + rw.InterlockedAdd(0, alloc_size, base); + base += 4; // Offset for allocation counter + base += 4; // Offset for success counter + base += 4; // Offset for overflow counter + + if ((base + alloc_size) < countof(rw)) + { + // Increment success counter + rw.InterlockedAdd(4, 1); + u32 pos = 0; + + // Write header + { + u32 header = 0; + header |= (buff.chars_count << 0) & 0x0000FFFF; + header |= (buff.args_count << 16) & 0x7FFF0000; + header |= (buff.overflowed << 31) & 0xF0000000; + rw.Store(base + pos, header); + pos += 4; + } + + // Write chunks + for (u32 chunk_idx = 0; chunk_idx < chunks_count; ++chunk_idx) + { + u32 chunk = buff.byte_chunks[chunk_idx]; + rw.Store(base + pos, chunk); + pos += 4; + } + } + else + { + // Increment overflow counter + rw.InterlockedAdd(8, 1); + } + } + + #define G_PrintF_(fmt, ...) do { \ + G_TempPrintBuffer __tmp; \ + __tmp.bytes_count = 0; \ + __tmp.overflowed = 0; \ + u32 __char_idx = 0; \ + while (U32FromChar(fmt[__char_idx]) != 0) \ + { \ + G_PushPrintByte(__tmp, U32FromChar(fmt[__char_idx])); \ + ++__char_idx; \ + } \ + G_FmtArg __args[] = { __VA_ARGS__ }; \ + __tmp.chars_count = __char_idx; \ + __tmp.args_count = (countof(__args) - 1); \ + for (u32 __arg_idx = 0; __arg_idx < __tmp.args_count; ++__arg_idx) \ + { \ + G_FmtArg __arg = __args[__arg_idx]; \ + G_PushPrintByte(__tmp, __arg.kind); \ + if (__arg.kind > G_FmtArgKind_BEGINSIZE1) \ + { \ + G_PushPrintByte(__tmp, __arg.v.x >> 0); \ + G_PushPrintByte(__tmp, __arg.v.x >> 8); \ + G_PushPrintByte(__tmp, __arg.v.x >> 16); \ + G_PushPrintByte(__tmp, __arg.v.x >> 24); \ + } \ + if (__arg.kind > G_FmtArgKind_BEGINSIZE2) \ + { \ + G_PushPrintByte(__tmp, __arg.v.y >> 0); \ + G_PushPrintByte(__tmp, __arg.v.y >> 8); \ + G_PushPrintByte(__tmp, __arg.v.y >> 16); \ + G_PushPrintByte(__tmp, __arg.v.y >> 24); \ + } \ + if (__arg.kind > G_FmtArgKind_BEGINSIZE3) \ + { \ + G_PushPrintByte(__tmp, __arg.v.z >> 0); \ + G_PushPrintByte(__tmp, __arg.v.z >> 8); \ + G_PushPrintByte(__tmp, __arg.v.z >> 16); \ + G_PushPrintByte(__tmp, __arg.v.z >> 24); \ + } \ + if (__arg.kind > G_FmtArgKind_BEGINSIZE4) \ + { \ + G_PushPrintByte(__tmp, __arg.v.w >> 0); \ + G_PushPrintByte(__tmp, __arg.v.w >> 8); \ + G_PushPrintByte(__tmp, __arg.v.w >> 16); \ + G_PushPrintByte(__tmp, __arg.v.w >> 24); \ + } \ + } \ + G_CommitPrint(__tmp); \ + } while (0) + + #define G_PrintF(fmt, ...) G_PrintF_(fmt, ##__VA_ARGS__, G_FmtEnd()) + +#else + #define G_PrintF(fmt) +#endif