From ac967161a2b2323b195693dd5e36cd14f3ce718c Mon Sep 17 00:00:00 2001 From: jacob Date: Mon, 9 Mar 2026 22:12:40 -0700 Subject: [PATCH] more descriptor refactor wip --- src/gpu/gpu_common.c | 57 +-- src/gpu/gpu_common.h | 20 +- src/gpu/gpu_core.h | 89 ++-- src/gpu/gpu_dx12/gpu_dx12_core.c | 717 ++++++++++++++++++++++++------- src/gpu/gpu_dx12/gpu_dx12_core.h | 62 ++- src/gpu/gpu_shared.cgh | 4 +- 6 files changed, 696 insertions(+), 253 deletions(-) diff --git a/src/gpu/gpu_common.c b/src/gpu/gpu_common.c index ab1e51fd..b9591b01 100644 --- a/src/gpu/gpu_common.c +++ b/src/gpu/gpu_common.c @@ -18,7 +18,7 @@ void G_BootstrapCommon(void) // Init blank texture { - G.blank_tex = G_PushTexture2D( + G.blank_tex2d = G_PushTexture2D( cl, gpu_perm, G_TextureLayout_Common, G_Format_R8G8B8A8_Unorm, @@ -36,7 +36,7 @@ void G_BootstrapCommon(void) { Panic(Lit("Unexpected noise texture size")); } - G_Texture3DRef noise_tex = G_PushTexture3D( + G.basic_noise_tex3d = G_PushTexture3D( cl, gpu_perm, G_TextureLayout_Common, G_Format_R16_Uint, @@ -45,17 +45,16 @@ void G_BootstrapCommon(void) ); G_CopyCpuToTexture( cl, - noise_tex, VEC3I32(0, 0, 0), + G.basic_noise_tex3d, VEC3I32(0, 0, 0), noise_data.text, noise_dims, RNG3I32(VEC3I32(0, 0, 0), noise_dims) ); - G.basic_noise = G_PushTexture3DRef(gpu_perm, noise_tex); } // Init basic samplers for (G_BasicSamplerKind sampler_kind = 0; sampler_kind < countof(G.basic_samplers); ++sampler_kind) { - G_SamplerStateRef sampler = Zi; + G_SamplerRef sampler = Zi; switch (sampler_kind) { default: @@ -67,64 +66,55 @@ void G_BootstrapCommon(void) { G_Filter filter = G_Filter_MinMagMipPoint; G_AddressMode address_mode = G_AddressMode_Clamp; - G_ResourceHandle sampler_res = G_PushSampler(cl, gpu_perm, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); - sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + sampler = G_PushSampler(cl, gpu_perm, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); } break; case G_BasicSamplerKind_PointWrap: { G_Filter filter = G_Filter_MinMagMipPoint; G_AddressMode address_mode = G_AddressMode_Wrap; - G_ResourceHandle sampler_res = G_PushSampler(cl, gpu_perm, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); - sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + sampler = G_PushSampler(cl, gpu_perm, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); } break; case G_BasicSamplerKind_PointMirror: { G_Filter filter = G_Filter_MinMagMipPoint; G_AddressMode address_mode = G_AddressMode_Mirror; - G_ResourceHandle sampler_res = G_PushSampler(cl, gpu_perm, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); - sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + sampler = G_PushSampler(cl, gpu_perm, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); } break; case G_BasicSamplerKind_BilinearClamp: { G_Filter filter = G_Filter_MinMagLinearMipPoint; G_AddressMode address_mode = G_AddressMode_Clamp; - G_ResourceHandle sampler_res = G_PushSampler(cl, gpu_perm, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); - sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + sampler = G_PushSampler(cl, gpu_perm, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); } break; case G_BasicSamplerKind_BilinearWrap: { G_Filter filter = G_Filter_MinMagLinearMipPoint; G_AddressMode address_mode = G_AddressMode_Wrap; - G_ResourceHandle sampler_res = G_PushSampler(cl, gpu_perm, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); - sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + sampler = G_PushSampler(cl, gpu_perm, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); } break; case G_BasicSamplerKind_BilinearMirror: { G_Filter filter = G_Filter_MinMagLinearMipPoint; G_AddressMode address_mode = G_AddressMode_Mirror; - G_ResourceHandle sampler_res = G_PushSampler(cl, gpu_perm, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); - sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + sampler = G_PushSampler(cl, gpu_perm, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); } break; case G_BasicSamplerKind_TrilinearClamp: { G_Filter filter = G_Filter_MinMagMipLinear; G_AddressMode address_mode = G_AddressMode_Clamp; - G_ResourceHandle sampler_res = G_PushSampler(cl, gpu_perm, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); - sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + sampler = G_PushSampler(cl, gpu_perm, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); } break; case G_BasicSamplerKind_TrilinearWrap: { G_Filter filter = G_Filter_MinMagMipLinear; G_AddressMode address_mode = G_AddressMode_Wrap; - G_ResourceHandle sampler_res = G_PushSampler(cl, gpu_perm, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); - sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + sampler = G_PushSampler(cl, gpu_perm, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); } break; case G_BasicSamplerKind_TrilinearMirror: { G_Filter filter = G_Filter_MinMagMipLinear; G_AddressMode address_mode = G_AddressMode_Mirror; - G_ResourceHandle sampler_res = G_PushSampler(cl, gpu_perm, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); - sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); + sampler = G_PushSampler(cl, gpu_perm, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); } break; } G.basic_samplers[sampler_kind] = sampler; @@ -150,10 +140,9 @@ G_ArenaHandle G_PermArena(void) //- Push resource from cpu -G_BufferRef G_PushBufferFromCpu_(G_CommandListHandle cl, G_ArenaHandle gpu_arena, void *src, G_BufferDesc desc) +G_BufferRef G_PushBufferFromCpu_(G_CommandListHandle cl, G_ArenaHandle gpu_arena, void *src, G_BufferRef buffer) { - G_BufferRef buffer = G_PushBuffer(cl, gpu_arena, desc); - G_CopyCpuToBuffer(cl, buffer, 0, src, RNGU64(0, desc.count * desc.stride)); + G_CopyCpuToBuffer(cl, buffer, 0, src, RNGU64(0, G_CountBufferBytes(buffer))); return buffer; } @@ -223,13 +212,13 @@ Vec3I32 G_GroupCountFromThreadCount(ComputeShaderDesc cs, Vec3I32 threads) //- Viewport / scissor -Rng3 G_ViewportFromTexture(G_Texture2DRef texture) +Rng3 G_ViewportFromTexture(G_TextureRef texture) { Vec2I32 dims = G_Count2D(texture); return RNG3(VEC3(0, 0, 0), VEC3(dims.x, dims.y, 1)); } -Rng2 G_ScissorFromTexture(G_Texture2DRef texture) +Rng2 G_ScissorFromTexture(G_TextureRef texture) { Vec2I32 dims = G_Count2D(texture); return RNG2(VEC2(0, 0), VEC2(dims.x, dims.y)); @@ -238,22 +227,22 @@ Rng2 G_ScissorFromTexture(G_Texture2DRef texture) //- Shared resources -G_SamplerStateRef G_BasicSamplerFromKind(G_BasicSamplerKind kind) +G_SamplerRef G_BasicSamplerFromKind(G_BasicSamplerKind kind) { return G.basic_samplers[kind]; } -G_IndexBufferDesc G_QuadIndices(void) +G_BufferRef G_QuadIndices(void) { return G.quad_indices; } -G_Texture2DRef G_BlankTexture2D(void) +G_TextureRef G_Blank2D(void) { - return G.blank_tex; + return G.blank_tex2d; } -G_Texture3DRef G_BasicNoise3D(void) +G_TextureRef G_BasicNoise3D(void) { - return G.basic_noise_tex; + return G.basic_noise_tex3d; } diff --git a/src/gpu/gpu_common.h b/src/gpu/gpu_common.h index 38228e48..f7f77ab0 100644 --- a/src/gpu/gpu_common.h +++ b/src/gpu/gpu_common.h @@ -5,8 +5,8 @@ Struct(G_Ctx) { // Common shared resources G_BufferRef quad_indices; - G_Texture2DRef blank_tex; - G_Texture2DRef basic_noise_tex; + G_TextureRef blank_tex2d; + G_TextureRef basic_noise_tex3d; G_SamplerRef basic_samplers[G_BasicSamplerKind_COUNT]; }; @@ -32,13 +32,13 @@ G_ArenaHandle G_PermArena(void); //- Push resource from cpu -G_BufferRef G_PushBufferFromCpu_(G_CommandListHandle cl, G_ArenaHandle gpu_arena, void *src, G_BufferDesc desc); +G_BufferRef G_PushBufferFromCpu_(G_CommandListHandle cl, G_ArenaHandle gpu_arena, void *src, G_BufferRef buffer); #define G_PushBufferFromCpu(_cl, _gpu_arena, _src, _type, _count, ...) \ - G_PushBufferFromCpu_((_cl), (_gpu_arena), (_src), (G_BufferDesc) { .count = _count, .stride = sizeof(_type) }) + G_PushBufferFromCpu_((_cl), (_gpu_arena), (_src), G_PushBuffer((_cl), (_gpu_arena), (_count), sizeof(_type), __VA_ARGS__)) #define G_PushBufferFromCpuArena(_cl, _gpu_arena, _cpu_arena, _type, ...) \ - G_PushBufferFromCpu_((_cl), (_gpu_arena), ArenaFirst((_cpu_arena), _type), ArenaCount((_cpu_arena), (_count)), __VA_ARGS__) + G_PushBufferFromCpu_((_cl), (_gpu_arena), ArenaFirst((_cpu_arena), _type), G_PushBuffer((_cl), (_gpu_arena), ArenaCount((_cpu_arena), _type), sizeof(_type), __VA_ARGS__)) //- Mip @@ -52,12 +52,12 @@ Vec3I32 G_GroupCountFromThreadCount(ComputeShaderDesc cs, Vec3I32 threads); //- Viewport / scissor -Rng3 G_ViewportFromTexture(G_Texture2DRef texture); -Rng2 G_ScissorFromTexture(G_Texture2DRef texture); +Rng3 G_ViewportFromTexture(G_TextureRef texture); +Rng2 G_ScissorFromTexture(G_TextureRef texture); //- Shared resources G_SamplerRef G_BasicSamplerFromKind(G_BasicSamplerKind kind); -G_IndexBufferDesc G_QuadIndices(void); -G_Texture2DRef G_BlankTexture2D(void); -G_Texture2DRef G_BasicNoise3D(void); +G_BufferRef G_QuadIndices(void); +G_TextureRef G_Blank2D(void); +G_TextureRef G_BasicNoise3D(void); diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index d92a32cc..43f954ad 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -16,6 +16,13 @@ Struct(G_SwapchainHandle) { u64 v; }; #define G_IsMultiQueueEnabled 1 +Enum(G_QueueFamily) +{ + G_QueueFamily_Graphics, + G_QueueFamily_Compute, + G_QueueFamily_Copy +}; + Enum(G_QueueKind) { G_QueueKind_Direct = 0, @@ -422,21 +429,21 @@ void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle); G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl, G_ArenaHandle gpu_arena, G_MemoryDesc desc); -#define G_PushBuffer(_cl, _arena, _count, _type, ...) ((G_BufferRef) { .v = G_PushMemory((cl), (_arena), \ - (G_MemoryDesc) { \ - .kind = G_MemoryKind_Buffer, \ - .buffer = { \ - .count = (_count), \ - .stride = sizeof(_type), \ - __VA_ARGS__ \ - } \ - } \ +#define G_PushBuffer(_cl, _arena, _type, _count, ...) ((G_BufferRef) { .v = G_PushMemory((_cl), (_arena), \ + (G_MemoryDesc) { \ + .kind = G_MemoryKind_Buffer, \ + .buffer = { \ + .count = (_count), \ + .stride = sizeof(_type), \ + __VA_ARGS__ \ + } \ + } \ )}) -#define G_PushTexture1D(_cl, _arena, _initial_layout, _format, _dims, ...) ((G_Texture1DRef) { .v = G_PushMemory((cl), (_arena), \ +#define G_PushTexture1D(_cl, _arena, _initial_layout, _format, _dims, ...) ((G_TextureRef) { .v = G_PushMemory((_cl), (_arena), \ (G_MemoryDesc) { \ .kind = G_MemoryKind_Texture1D, \ .texture = { \ @@ -449,7 +456,7 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl, G_ArenaHandle gpu_are )}) -#define G_PushTexture2D(_cl, _arena, _initial_layout, _format, _dims, ...) ((G_Texture2DRef) { .v = G_PushMemory((cl), (_arena), \ +#define G_PushTexture2D(_cl, _arena, _initial_layout, _format, _dims, ...) ((G_TextureRef) { .v = G_PushMemory((_cl), (_arena), \ (G_MemoryDesc) { \ .kind = G_MemoryKind_Texture2D, \ .texture = { \ @@ -464,7 +471,7 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl, G_ArenaHandle gpu_are -#define G_PushTexture3D(_cl, _arena, _initial_layout, _format, _dims, ...) ((G_Texture3DRef) { .v = G_PushMemory((cl), (_arena), \ +#define G_PushTexture3D(_cl, _arena, _initial_layout, _format, _dims, ...) ((G_TextureRef) { .v = G_PushMemory((_cl), (_arena), \ (G_MemoryDesc) { \ .kind = G_MemoryKind_Texture3D, \ .texture = { \ @@ -481,6 +488,21 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl, G_ArenaHandle gpu_are +#define G_PushSampler(_cl, _arena, ...) ((G_SamplerRef) { .v = G_PushMemory((_cl), (_arena), \ + (G_MemoryDesc) { \ + .kind = G_MemoryKind_Sampler, \ + .sampler = { \ + .filter = G_Filter_MinMagMipPoint, \ + __VA_ARGS__ \ + } \ + } \ +)}) + + + + + + //- Resource creation @@ -553,21 +575,20 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl, G_ArenaHandle gpu_are //- Count -u64 G_CountBufferBytes(G_ResourceHandle buffer); -i32 G_Count1D(G_ResourceHandle texture); -Vec2I32 G_Count2D(G_ResourceHandle texture); -Vec3I32 G_Count3D(G_ResourceHandle texture); -i32 G_CountWidth(G_ResourceHandle texture); -i32 G_CountHeight(G_ResourceHandle texture); -i32 G_CountDepth(G_ResourceHandle texture); -i32 G_CountMips(G_ResourceHandle texture); - -#define G_CountBuffer(buffer, type) G_CountBufferBytes(buffer) / sizeof(type) +u64 G_CountBuffer(G_BufferRef buffer); +u64 G_CountBufferBytes(G_BufferRef buffer); +i32 G_Count1D(G_TextureRef texture); +Vec2I32 G_Count2D(G_TextureRef texture); +Vec3I32 G_Count3D(G_TextureRef texture); +i32 G_CountWidth(G_TextureRef texture); +i32 G_CountHeight(G_TextureRef texture); +i32 G_CountDepth(G_TextureRef texture); +i32 G_CountMips(G_TextureRef texture); //- Map -void *G_HostPointerFromResource(G_ResourceHandle resource); -#define G_StructFromResource(resource, type) (type *)G_HostPointerFromResource(resource) +void *G_CpuAddressFromBuffer(G_BufferRef buffer); +#define G_StructFromBuffer(buffer, type) (type *)G_CpuAddressFromBuffer(buffer) //////////////////////////////////////////////////////////// //~ @hookdecl Shader resource reference @@ -637,7 +658,7 @@ void G_SetConstantEx(G_CommandListHandle cl, i32 slot, void *src_32bit, u32 size //- Barrier void G_Sync(G_CommandListHandle cl); -void G_SyncLayout(G_CommandListHandle cl, G_ResourceHandle resource, G_TextureLayout layout); +void G_SyncLayout(G_CommandListHandle cl, G_TextureRef resource, G_TextureLayout layout); //- Zone @@ -650,15 +671,15 @@ void G_PopZoneEx(G_CommandListHandle cl); //- Cpu -> Gpu staged copy -void G_CopyCpuToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, u64 dst_offset, void *src, RngU64 src_copy_range); -void G_CopyCpuToTexture(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range); +void G_CopyCpuToBuffer(G_CommandListHandle cl, G_BufferRef dst, u64 dst_offset, void *src, RngU64 src_copy_range); +void G_CopyCpuToTexture(G_CommandListHandle cl, G_TextureRef dst, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range); //- Gpu <-> Gpu copy -void G_CopyBufferToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, u64 dst_offset, G_ResourceHandle src, RngU64 src_copy_range); -void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Rng3I32 dst_copy_range, G_ResourceHandle src_handle, u64 src_offset); -void G_CopyTextureToTexture(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Rng3I32 src_copy_range); -void G_CopyTextureToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Rng3I32 src_copy_range); +void G_CopyBufferToBuffer(G_CommandListHandle cl, G_BufferRef dst, u64 dst_offset, G_BufferRef src, RngU64 src_copy_range); +void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_TextureRef dst_handle, Rng3I32 dst_copy_range, G_BufferRef src_handle, u64 src_offset); +void G_CopyTextureToTexture(G_CommandListHandle cl, G_TextureRef dst, Vec3I32 dst_offset, G_TextureRef src, Rng3I32 src_copy_range); +void G_CopyTextureToBuffer(G_CommandListHandle cl, G_BufferRef dst, Vec3I32 dst_offset, G_TextureRef src, Rng3I32 src_copy_range); //- Compute @@ -681,7 +702,7 @@ void G_Draw( //- Clear -void G_ClearRenderTarget(G_CommandListHandle cl, G_ResourceHandle render_target, Vec4 color, i32 mip); +void G_ClearRenderTarget(G_CommandListHandle cl, G_TextureRef render_target, Vec4 color, i32 mip); //////////////////////////////////////////////////////////// //~ @hookdecl Queue synchronization @@ -715,6 +736,6 @@ void G_ReleaseSwapchain(G_SwapchainHandle swapchain); // Waits until a new backbuffer is ready from the swapchain. // This should be called before rendering for minimum latency. -G_ResourceHandle G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Format format, Vec2I32 size); +G_TextureRef G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Format format, Vec2I32 size); -void G_CommitBackbuffer(G_ResourceHandle backbuffer, i32 vsync); +void G_CommitBackbuffer(G_TextureRef backbuffer, i32 vsync); diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.c b/src/gpu/gpu_dx12/gpu_dx12_core.c index 4bbf18af..a70b2873 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.c +++ b/src/gpu/gpu_dx12/gpu_dx12_core.c @@ -432,49 +432,45 @@ void G_Bootstrap(void) { D3D12_DESCRIPTOR_HEAP_TYPE type; D3D12_DESCRIPTOR_HEAP_FLAGS flags; - u64 max; - u64 per_batch_count; + u64 capacity; String name; }; Dx12HeapDesc descs[G_D12_DescriptorHeapKind_COUNT] = { [G_D12_DescriptorHeapKind_CbvSrvUav] = { .type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, .flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, - .max = G_D12_MaxCbvSrvUavDescriptors, - .per_batch_count = 4, // 0: SRV, 1: UAV, 2: Raw-SRV, 3: Raw-UAV + .capacity = G_D12_MaxCbvSrvUavDescriptors, .name = Lit("Primary Resource Descriptor Heap"), }, [G_D12_DescriptorHeapKind_Rtv] = { .type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV, .flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE, - .max = G_D12_MaxRtvDescriptors, - .per_batch_count = 1, + .capacity = G_D12_MaxRtvDescriptors, .name = Lit("Primary RTV Descriptor Heap"), }, [G_D12_DescriptorHeapKind_Sampler] = { .type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, .flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, - .max = G_D12_MaxSamplerDescriptors, - .per_batch_count = 1, + .capacity = G_D12_MaxSamplerDescriptors, .name = Lit("Primary Sampler Descriptor Heap"), }, }; - for (G_D12_DescriptorHeapKind kind = 0; kind < countof(descs); ++kind) + for (G_D12_DescriptorHeapKind heap_kind = 0; heap_kind < countof(descs); ++heap_kind) { - Dx12HeapDesc desc = descs[kind]; - G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[kind]; - heap->descriptors_arena = AcquireArena(Gibi(1)); + Dx12HeapDesc desc = descs[heap_kind]; + G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[heap_kind]; + heap->arena = AcquireArena(Gibi(64)); + heap->descriptor_indices_arena = AcquireArena(Gibi(64)); - heap->kind = kind; + heap->kind = heap_kind; heap->type = desc.type; - heap->per_batch_count = desc.per_batch_count; - heap->max_count = desc.max; - heap->descriptor_size = ID3D12Device_GetDescriptorHandleIncrementSize(G_D12.device, desc.type); + heap->capacity = desc.capacity; + heap->stride = ID3D12Device_GetDescriptorHandleIncrementSize(G_D12.device, desc.type); D3D12_DESCRIPTOR_HEAP_DESC d3d_desc = Zi; d3d_desc.Type = desc.type; d3d_desc.Flags = desc.flags; - d3d_desc.NumDescriptors = desc.max; + d3d_desc.NumDescriptors = desc.capacity; HRESULT hr = 0; @@ -488,12 +484,18 @@ void G_Bootstrap(void) ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap->d3d_heap, &heap->start_handle); } + // Push nil descriptor at index 0 if (SUCCEEDED(hr)) { + G_D12_DescriptorDesc descriptor_desc = Zi; + descriptor_desc.resource = 0; + descriptor_desc.bundle_count = G_D12_MaxDescriptorBundleCount; + // Push an empty descriptor at index 0, so that a handle with a value of 0 always represents nil G_D12_Arena *gpu_perm = G_D12_ArenaFromHandle(G_PermArena()); - G_D12_Descriptor *nil_descriptor = G_D12_PushDescriptor(gpu_perm, kind); - Assert(nil_descriptor->index == 0); + G_D12_Descriptor *nil_descriptor = G_D12_PushDescriptor(gpu_perm, descriptor_desc, heap_kind); + Assert(nil_descriptor->base_index == 0); + G_D12_SetObjectName((ID3D12Object *)heap->d3d_heap, desc.name); } @@ -577,20 +579,16 @@ void G_Bootstrap(void) G_ArenaHandle gpu_perm = G_PermArena(); queue->print_buffer_size = GPU_SHADER_PRINT_BUFFER_SIZE; queue->print_buffer = G_PushBuffer( - gpu_perm, cl, - u8, - queue->print_buffer_size, - .flags = G_ResourceFlag_AllowShaderReadWrite, - .name = Lit("Debug print gpu buffer"), + cl, gpu_perm, + u8, queue->print_buffer_size, + .name = Lit("Debug print gpu buffer") ); queue->print_readback_buffer = G_PushBuffer( - gpu_perm, cl, - u8, - queue->print_buffer_size, - .flags = G_ResourceFlag_HostMemory, + cl, gpu_perm, + u8, queue->print_buffer_size, + .flags = G_MemoryFlag_HostCached, .name = Lit("Debug print readback buffer") ); - queue->print_buffer_ref = G_PushByteAddressBufferRef(gpu_perm, queue->print_buffer); } G_CommitCommandList(cl); } @@ -1204,9 +1202,478 @@ void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena) G_Sync(G_D12_MakeHandle(G_CommandListHandle, cl)); } + + + + + + + +//////////////////////////////////////////////////////////// +//~ Descriptor + +// G_D12_Descriptor *G_D12_DescriptorFromIndex(G_D12_DescriptorHeapKind heap_kind, u32 index) +// { +// G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[heap_kind]; +// G_D12_Descriptor *descriptors = ArenaFirst(heap->descriptors_arena, G_D12_Descriptor); +// return &descriptors[index]; +// } + +// G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorHeapKind heap_kind) +// { +// G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[heap_kind]; +// u64 per_batch_count = heap->per_batch_count; + +// G_D12_Descriptor *descriptor = 0; +// u32 index = 0; + +// // Grab completed descriptor from arena +// G_D12_DescriptorList *descriptors = &gpu_arena->reset_descriptors_by_heap[heap_kind]; +// descriptor = descriptors->first; +// if (descriptor) +// { +// G_D12_Queue *queue = G_D12_QueueFromKind(descriptor->completion_queue_kind); +// i64 queue_commit_completion = ID3D12Fence_GetCompletedValue(queue->commit_fence); +// if (queue_commit_completion >= descriptor->completion_queue_target) +// { +// // Descriptor no longer in use by gpu, reuse it +// DllQueueRemove(descriptors->first, descriptors->last, descriptor); +// descriptors->count -= 1; +// index = descriptor->index; +// } +// else +// { +// // Descriptor may still be in use by gpu +// descriptor = 0; +// } +// } + +// // Allocate new descriptor from heap +// if (!descriptor) +// { +// Lock lock = LockE(&heap->mutex); +// { +// if (heap->first_free) +// { +// descriptor = heap->first_free; +// DllStackRemove(heap->first_free, descriptor); +// index = descriptor->index; +// } +// else +// { +// u32 descriptors_count = ArenaCount(heap->descriptors_arena, G_D12_Descriptor); +// if (descriptors_count >= heap->max_count) +// { +// Panic(Lit("Max descriptors reached in heap")); +// } +// descriptor = PushStructNoZero(heap->descriptors_arena, G_D12_Descriptor); +// index = descriptors_count * per_batch_count; +// } +// } +// Unlock(&lock); +// } + +// // Initialize descriptor handle +// ZeroStruct(descriptor); +// descriptor->gpu_arena = gpu_arena; +// descriptor->index = index; +// descriptor->first_handle.ptr = heap->start_handle.ptr + (index * heap->stride); +// descriptor->heap = heap; + +// DllQueuePush(gpu_arena->descriptors.first, gpu_arena->descriptors.last, descriptor); +// gpu_arena->descriptors.count += 1; + +// return descriptor; +// } + + + + +G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorDesc desc, G_D12_DescriptorHeapKind heap_kind) +{ + // FIXME: Impl + G_D12_Descriptor result = 0; + return result; +} + + + + + + + + + //////////////////////////////////////////////////////////// //~ @hookimpl Memory + + + + + + + + + + + + + + +G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl_handle, G_ArenaHandle gpu_arena_handle, G_MemoryDesc memory_dsec) +{ + Arena *perm = PermArena(); + G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(gpu_arena_handle); + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + G_QueueKind queue_kind = cl->queue_kind; + G_D12_Resource *resource = 0; + + b32 is_buffer = desc.kind == G_MemoryKind_Buffer; + b32 is_texture = ( + desc.kind == G_MemoryKind_Texture1D || + desc.kind == G_MemoryKind_Texture2D || + desc.kind == G_MemoryKind_Texture3D + ); + b32 is_sampler = desc.kind == G_MemoryKind_Sampler; + G_MemoryFlag flags = ( + is_buffer ? desc.buffer.flags : + is_texture ? desc.texture.flags : + desc.sampler.flags + ); + String new_name = ( + is_buffer ? desc.buffer.name : + is_texture ? desc.texture.name : + desc.sampler.name + ); + new_name.len = MinU64(new_name.len, G_D12_MaxNameLen); + + ////////////////////////////// + //- Initialize heap info + + b32 can_reuse = !AnyBit(flags, G_MemoryFlag_ForceNoReuse); + + D3D12_HEAP_FLAGS heap_flags = 0; + D3D12_HEAP_PROPERTIES heap_props = Zi; + b32 should_map = 0; + if (is_buffer || is_texture) + { + G_D12_ResourceHeapKind heap_kind = G_D12_ResourceHeapKind_Gpu; + // Heap flags + if (flags & G_MemoryFlag_HostMemory) + { + heap_kind = G_D12_ResourceHeapKind_Cpu; + if (flags & G_MemoryFlag_Uncached) + { + heap_kind = G_D12_ResourceHeapKind_CpuWriteCombined; + } + } + if (flags & G_MemoryFlag_ZeroMemory) + { + can_reuse = 0; + } + else + { + heap_flags |= D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + } + // Heap props + if (heap_kind == G_D12_ResourceHeapKind_Cpu) + { + heap_props.Type = D3D12_HEAP_TYPE_CUSTOM; + heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK; + heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_L0; + should_map = 1; + } + else if (heap_kind == G_D12_ResourceHeapKind_CpuWriteCombined) + { + heap_props.Type = D3D12_HEAP_TYPE_CUSTOM; + heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE; + heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_L0; + should_map = 1; + } + else + { + heap_props.Type = D3D12_HEAP_TYPE_DEFAULT; + } + } + + ////////////////////////////// + //- Initialize d3d resource desc + + D3D12_CLEAR_VALUE clear_value = Zi; + D3D12_RESOURCE_DESC1 d3d_desc = Zi; + if (is_buffer) + { + u64 min_buffer_size = 1024; + d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + d3d_desc.Format = DXGI_FORMAT_UNKNOWN; + d3d_desc.Width = NextPow2U64(MaxU64(desc.buffer.size, min_buffer_size)); + d3d_desc.Height = 1; + d3d_desc.DepthOrArraySize = 1; + d3d_desc.MipLevels = 1; + d3d_desc.SampleDesc.Count = 1; + d3d_desc.SampleDesc.Quality = 0; + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(flags, G_MemoryFlag_AllowShaderReadWrite); + } + else if (is_texture) + { + i32 largest_dim = MaxI32(MaxI32(desc.texture.dims.x, desc.texture.dims.y), desc.texture.dims.z); + i32 max_mips = MinI32(FloorF32(Log2F32(largest_dim)) + 1, G_MaxMips); + d3d_desc.Dimension = ( + desc.kind == G_MemoryKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D : + desc.kind == G_MemoryKind_Texture2D ? D3D12_RESOURCE_DIMENSION_TEXTURE2D : + D3D12_RESOURCE_DIMENSION_TEXTURE3D + ); + d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + d3d_desc.Format = G_D12_DxgiFormatFromGpuFormat(desc.texture.format); + d3d_desc.Width = MaxI32(desc.texture.dims.x, 1); + d3d_desc.Height = MaxI32(desc.texture.dims.y, 1); + d3d_desc.DepthOrArraySize = MaxI32(desc.texture.dims.z, 1); + d3d_desc.MipLevels = ClampF32(desc.texture.max_mips, 1, max_mips); + d3d_desc.SampleDesc.Count = 1; + d3d_desc.SampleDesc.Quality = 0; + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(flags, G_MemoryFlag_AllowShaderReadWrite); + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * AnyBit(flags, G_MemoryFlag_AllowRenderTarget); + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL * AnyBit(flags, G_MemoryFlag_AllowTextureDepthStencil); + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS * (desc.texture.initial_layout == G_Layout_Simultaneous); + clear_value.Color[0] = desc.texture.clear_color.x, + clear_value.Color[1] = desc.texture.clear_color.y, + clear_value.Color[2] = desc.texture.clear_color.z, + clear_value.Color[3] = desc.texture.clear_color.w, + clear_value.Format = d3d_desc.Format; + } + + ////////////////////////////// + //- Check for reset-resource reusability + + // Pop reset resource + resource = gpu_arena->reset_resources.first; + if (resource) + { + DllQueueRemove(gpu_arena->reset_resources.first, gpu_arena->reset_resources.last, resource); + --gpu_arena->reset_resources.count; + + D3D12_RESOURCE_DESC1 reset_d3d_desc = Zi; + D3D12_RESOURCE_DESC1 compare_d3d_desc = Zi; + CopyStruct(&reset_d3d_desc, &resource->d3d_desc); + CopyStruct(&compare_d3d_desc, &reset_d3d_desc); + + // Buffers can be reused if size fits + if (d3d_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && reset_d3d_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + { + if (reset_d3d_desc.Width >= d3d_desc.Width) + { + compare_d3d_desc.Width = d3d_desc.Width; + } + } + + // TODO: Less stringent reuse constraints. We could even create textures as placed resources and reset their underlying heaps. + can_reuse = can_reuse && MatchStruct(&compare_d3d_desc, &d3d_desc); + if (!can_reuse) + { + // Push releasable to command list + { + G_D12_Releasable *release = 0; + { + Lock lock = LockE(&G_D12.free_releases_mutex); + { + release = G_D12.free_releases.first; + if (release) + { + SllQueuePop(G_D12.free_releases.first, G_D12.free_releases.last); + } + else + { + release = PushStructNoZero(perm, G_D12_Releasable); + } + } + Unlock(&lock); + } + ZeroStruct(release); + SllQueuePush(cl->releases.first, cl->releases.last, release); + release->d3d_resource = resource->d3d_resource; + } + ZeroStruct(resource); + } + } + else + { + can_reuse = 0; + resource = PushStruct(gpu_arena->arena, G_D12_Resource); + } + + if (!can_reuse) + { + resource->d3d_desc = d3d_desc; + } + + ////////////////////////////// + //- Init resource + + resource->flags = flags; + + if (is_buffer) + { + resource->buffer_size = desc.buffer.size; + resource->buffer_size_actual = d3d_desc.Width; + } + + if (is_texture) + { + resource->is_texture = is_texture; + resource->texture_format = desc.texture.format; + resource->texture_dims = desc.texture.dims; + resource->texture_mips = d3d_desc.MipLevels; + } + + if (is_sampler) + { + resource->sampler_desc = desc.sampler; + } + + DllQueuePush(gpu_arena->resources.first, gpu_arena->resources.last, resource); + ++gpu_arena->resources.count; + + ////////////////////////////// + //- Allocate D3D12 resource + + if ((is_buffer || is_texture) && !resource->d3d_resource) + { + D3D12_CLEAR_VALUE *clear_value_arg = 0; + if (d3d_desc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) + { + clear_value_arg = &clear_value; + } + + D3D12_BARRIER_LAYOUT d3d_initial_layout = D3D12_BARRIER_LAYOUT_UNDEFINED; + if (is_texture) + { + d3d_initial_layout = D3D12_BARRIER_LAYOUT_COMMON; + if (desc.texture.initial_layout == G_Layout_Exclusive) + { + switch (queue_kind) + { + case G_QueueKind_Direct: d3d_initial_layout = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON; break; + case G_QueueKind_AsyncCompute: d3d_initial_layout = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON; break; + } + } + } + + HRESULT hr = ID3D12Device10_CreateCommittedResource3( + G_D12.device, + &heap_props, + heap_flags, + &resource->d3d_desc, + d3d_initial_layout, + clear_value_arg, + 0, // pProtectedSession + 0, // NumCastableFormats + 0, // pCastableFormats + &IID_ID3D12Resource, + (void **)&resource->d3d_resource + ); + Atomic64FetchAdd(&G_D12.cumulative_nonreuse_count, 1); + resource->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, d3d_desc.MipLevels); + + // Queue initial Rtv/Dsv discard + if ( + !AnyBit(flags, G_MemoryFlag_ZeroMemory) && + AnyBit(d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) + ) + { + G_D12_Cmd *cmd = G_D12_PushCmd(cl); + cmd->kind = G_D12_CmdKind_Discard; + cmd->discard.resource = resource; + } + + if (!SUCCEEDED(hr)) + { + // TODO: Don't panic + Panic(Lit("Failed to allocate D3D12 resource")); + } + + if (is_buffer) + { + resource->buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(resource->d3d_resource); + } + } + + if (should_map && !resource->mapped) + { + D3D12_RANGE read_range = Zi; + HRESULT hr = ID3D12Resource_Map(resource->d3d_resource, 0, &read_range, &resource->mapped); + + if (!SUCCEEDED(hr)) + { + // TODO: Don't panic + Panic(Lit("Failed to map D3D12 resource")); + } + } + + ////////////////////////////// + //- Set debug information + + String old_name = STRING(resource->name_len, (u8 *)resource->name_cstr); + if (!MatchString(old_name, new_name)) + { + resource->name_len = new_name.len; + CopyBytes(resource->name_cstr, new_name.text, new_name.len); + resource->name_cstr[new_name.len] = 0; + if (resource->d3d_resource) + { + G_D12_SetObjectName((ID3D12Object *)resource->d3d_resource, new_name); + } + } + + ////////////////////////////// + //- Transition reused resources to common + + G_ResourceHandle resource_handle = G_D12_MakeHandle(G_ResourceHandle, resource); + if ( + can_reuse && + desc.texture.initial_layout == G_Layout_Common && + queue_kind != G_QueueKind_AsyncCopy + ) + { + G_SyncLayout(cl_handle, resource_handle, G_Layout_Common); + } + + ////////////////////////////// + //- Push ref + + G_D12_RefBundle *bundle = 0; + { + G_D12_RefBundleDesc desc = Zi; + desc.resource = resource; + if (is_buffer) + { + desc.buffer_element_offset = 0; + desc.buffer_element_count = desc.buffer.count; + desc.buffer_element_stride = desc.buffer.stride; + } + else if (is_texture) + { + desc.mips = RngI32(0, resource->texture_mips); + } + + bundle = G_D12_PushRefBundle(cl, gpu_arena, desc); + } + + + return bundle->base_index; +} + + + + + + + + + + + // G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle cl_handle, G_ResourceDesc desc) // { // Arena *perm = PermArena(); @@ -1222,7 +1689,7 @@ void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena) // desc.kind == G_ResourceKind_Texture3D // ); // b32 is_sampler = desc.kind == G_ResourceKind_Sampler; -// G_ResourceFlag flags = ( +// G_MemoryFlag flags = ( // is_buffer ? desc.buffer.flags : // is_texture ? desc.texture.flags : // desc.sampler.flags @@ -1237,7 +1704,7 @@ void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena) // ////////////////////////////// // //- Initialize heap info -// b32 can_reuse = !AnyBit(flags, G_ResourceFlag_ForceNoReuse); +// b32 can_reuse = !AnyBit(flags, G_MemoryFlag_ForceNoReuse); // D3D12_HEAP_FLAGS heap_flags = 0; // D3D12_HEAP_PROPERTIES heap_props = Zi; @@ -1246,15 +1713,15 @@ void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena) // { // G_D12_ResourceHeapKind heap_kind = G_D12_ResourceHeapKind_Gpu; // // Heap flags -// if (flags & G_ResourceFlag_HostMemory) +// if (flags & G_MemoryFlag_HostMemory) // { // heap_kind = G_D12_ResourceHeapKind_Cpu; -// if (flags & G_ResourceFlag_Uncached) +// if (flags & G_MemoryFlag_Uncached) // { // heap_kind = G_D12_ResourceHeapKind_CpuWriteCombined; // } // } -// if (flags & G_ResourceFlag_ZeroMemory) +// if (flags & G_MemoryFlag_ZeroMemory) // { // can_reuse = 0; // } @@ -1300,7 +1767,7 @@ void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena) // d3d_desc.MipLevels = 1; // d3d_desc.SampleDesc.Count = 1; // d3d_desc.SampleDesc.Quality = 0; -// d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(flags, G_ResourceFlag_AllowShaderReadWrite); +// d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(flags, G_MemoryFlag_AllowShaderReadWrite); // } // else if (is_texture) // { @@ -1319,9 +1786,9 @@ void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena) // d3d_desc.MipLevels = ClampF32(desc.texture.max_mips, 1, max_mips); // d3d_desc.SampleDesc.Count = 1; // d3d_desc.SampleDesc.Quality = 0; -// d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(flags, G_ResourceFlag_AllowShaderReadWrite); -// d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * AnyBit(flags, G_ResourceFlag_AllowRenderTarget); -// d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL * AnyBit(flags, G_ResourceFlag_AllowDepthStencil); +// d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(flags, G_MemoryFlag_AllowShaderReadWrite); +// d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * AnyBit(flags, G_MemoryFlag_AllowRenderTarget); +// d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL * AnyBit(flags, G_MemoryFlag_AllowTextureDepthStencil); // d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS * (desc.texture.initial_layout == G_Layout_Simultaneous); // clear_value.Color[0] = desc.texture.clear_color.x, // clear_value.Color[1] = desc.texture.clear_color.y, @@ -1464,7 +1931,7 @@ void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena) // // Queue initial Rtv/Dsv discard // if ( -// !AnyBit(flags, G_ResourceFlag_ZeroMemory) && +// !AnyBit(flags, G_MemoryFlag_ZeroMemory) && // AnyBit(d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) // ) // { @@ -1528,82 +1995,12 @@ void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena) // return resource_handle; // } -//////////////////////////////////////////////////////////// -//~ Descriptor -G_D12_Descriptor *G_D12_DescriptorFromIndex(G_D12_DescriptorHeapKind heap_kind, u32 index) -{ - G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[heap_kind]; - G_D12_Descriptor *descriptors = ArenaFirst(heap->descriptors_arena, G_D12_Descriptor); - return &descriptors[index]; -} -G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorHeapKind heap_kind) -{ - G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[heap_kind]; - u64 per_batch_count = heap->per_batch_count; - G_D12_Descriptor *descriptor = 0; - u32 index = 0; - // Grab completed descriptor from arena - G_D12_DescriptorList *descriptors = &gpu_arena->reset_descriptors_by_heap[heap_kind]; - descriptor = descriptors->first; - if (descriptor) - { - G_D12_Queue *queue = G_D12_QueueFromKind(descriptor->completion_queue_kind); - i64 queue_commit_completion = ID3D12Fence_GetCompletedValue(queue->commit_fence); - if (queue_commit_completion >= descriptor->completion_queue_target) - { - // Descriptor no longer in use by gpu, reuse it - DllQueueRemove(descriptors->first, descriptors->last, descriptor); - descriptors->count -= 1; - index = descriptor->index; - } - else - { - // Descriptor may still be in use by gpu - descriptor = 0; - } - } - // Allocate new descriptor from heap - if (!descriptor) - { - Lock lock = LockE(&heap->mutex); - { - if (heap->first_free) - { - descriptor = heap->first_free; - DllStackRemove(heap->first_free, descriptor); - index = descriptor->index; - } - else - { - u32 descriptors_count = ArenaCount(heap->descriptors_arena, G_D12_Descriptor); - if (descriptors_count >= heap->max_count) - { - Panic(Lit("Max descriptors reached in heap")); - } - descriptor = PushStructNoZero(heap->descriptors_arena, G_D12_Descriptor); - index = descriptors_count * per_batch_count; - } - } - Unlock(&lock); - } - // Initialize descriptor handle - ZeroStruct(descriptor); - descriptor->gpu_arena = gpu_arena; - descriptor->index = index; - descriptor->first_handle.ptr = heap->start_handle.ptr + (index * heap->descriptor_size); - descriptor->heap = heap; - - DllQueuePush(gpu_arena->descriptors.first, gpu_arena->descriptors.last, descriptor); - gpu_arena->descriptors.count += 1; - - return descriptor; -} //////////////////////////////////////////////////////////// //~ @hookimpl Shader resource reference @@ -1622,7 +2019,7 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH // b32 is_sampler = kind == G_RefKind_SamplerState; // b32 is_texture = !is_buffer && !is_sampler; // b32 is_raw = kind == G_RefKind_ByteAddressBuffer; -// b32 is_writable = resource->flags & G_ResourceFlag_AllowShaderReadWrite; +// b32 is_writable = resource->flags & G_MemoryFlag_AllowShaderReadWrite; // G_D12_Descriptor *descriptor = 0; // if (is_buffer || is_texture) @@ -1633,7 +2030,7 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH // Assert(heap->per_batch_count >= 2); // D3D12_CPU_DESCRIPTOR_HANDLE readonly_handle = descriptor->first_handle; // D3D12_CPU_DESCRIPTOR_HANDLE readwrite_handle = descriptor->first_handle; -// readwrite_handle.ptr += heap->descriptor_size; +// readwrite_handle.ptr += heap->stride; // b32 srv_ok = 0; // b32 uav_ok = 0; @@ -1813,60 +2210,76 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH //- Count -u64 G_CountBufferBytes(G_ResourceHandle buffer) +u64 G_CountBuffer(G_BufferRef buffer) { - G_D12_Resource *resource = G_D12_ResourceFromHandle(buffer); - return resource->buffer_size; + // FIXME: Impl + u64 result = Zi; + return result; } -i32 G_Count1D(G_ResourceHandle texture) +u64 G_CountBufferBytes(G_BufferRef buffer) { - G_D12_Resource *resource = G_D12_ResourceFromHandle(texture); - return resource->texture_dims.x; + // FIXME: Impl + u64 result = Zi; + return result; } -Vec2I32 G_Count2D(G_ResourceHandle texture) +i32 G_Count1D(G_TextureRef texture) { - G_D12_Resource *resource = G_D12_ResourceFromHandle(texture); - return VEC2I32(resource->texture_dims.x, resource->texture_dims.y); + // FIXME: Impl + i32 result = Zi; + return result; } -Vec3I32 G_Count3D(G_ResourceHandle texture) +Vec2I32 G_Count2D(G_TextureRef texture) { - G_D12_Resource *resource = G_D12_ResourceFromHandle(texture); - return resource->texture_dims; + // FIXME: Impl + Vec2I32 result = Zi; + return result; } -i32 G_CountWidth(G_ResourceHandle texture) +Vec3I32 G_Count3D(G_TextureRef texture) { - G_D12_Resource *resource = G_D12_ResourceFromHandle(texture); - return resource->texture_dims.x; + // FIXME: Impl + Vec3I32 result = Zi; + return result; } -i32 G_CountHeight(G_ResourceHandle texture) +i32 G_CountWidth(G_TextureRef texture) { - G_D12_Resource *resource = G_D12_ResourceFromHandle(texture); - return resource->texture_dims.y; + // FIXME: Impl + i32 result = Zi; + return result; } -i32 G_CountDepth(G_ResourceHandle texture) +i32 G_CountHeight(G_TextureRef texture) { - G_D12_Resource *resource = G_D12_ResourceFromHandle(texture); - return resource->texture_dims.z; + // FIXME: Impl + i32 result = Zi; + return result; } -i32 G_CountMips(G_ResourceHandle texture) +i32 G_CountDepth(G_TextureRef texture) { - G_D12_Resource *resource = G_D12_ResourceFromHandle(texture); - return resource->texture_mips; + // FIXME: Impl + i32 result = Zi; + return result; +} + +i32 G_CountMips(G_TextureRef texture) +{ + // FIXME: Impl + i32 result = Zi; + return result; } //- Map -void *G_HostPointerFromResource(G_ResourceHandle resource_handle) +void *G_CpuAddressFromBuffer(G_BufferRef buffer) { - G_D12_Resource *resource = G_D12_ResourceFromHandle(resource_handle); - return resource->mapped; + // FIXME: Impl + void *result = 0; + return result; } //////////////////////////////////////////////////////////// @@ -2001,14 +2414,12 @@ G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size) G_ArenaHandle gpu_arena_handle = G_AcquireArena(); ring->gpu_arena = G_D12_ArenaFromHandle(gpu_arena_handle); - G_ResourceHandle resource_handle = G_PushBuffer( - gpu_arena_handle, G_D12_MakeHandle(G_CommandListHandle, cl), - u8, - new_ring_size, - .flags = G_ResourceFlag_HostMemory | G_ResourceFlag_Uncached + ring->buffer = G_PushBuffer( + G_D12_MakeHandle(G_CommandListHandle, cl), gpu_arena_handle, + u8, new_ring_size, + .flags = G_MemoryFlag_HostUncached ); - ring->resource = G_D12_ResourceFromHandle(resource_handle); - ring->base = G_StructFromResource(resource_handle, u8); + ring->base = G_StructFromBuffer(ring->buffer, u8); } // Create initial region @@ -2565,15 +2976,15 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) for (i32 i = 0; i < countof(bound_graphics_constants); ++i) { bound_graphics_constants[i] = U64Max; } // Fill built-in constants - if (!G_IsRefNil(queue->print_buffer_ref)) + if (!G_IsRefNil(queue->print_buffer)) { - slotted_constants[G_ShaderConst_PrintBufferRef] = queue->print_buffer_ref.v; + slotted_constants[G_ShaderConst_PrintBuffer] = queue->print_buffer.v; } { b32 tweak_b32 = TweakBool("Shader tweak-bool", 1); f32 tweak_f32 = TweakFloat("Shader tweak-float", 1, 0, 1); slotted_constants[G_ShaderConst_TweakB32] = tweak_b32; - slotted_constants[G_ShaderConst_TweakF32] = *(u32 *)&tweak_f32; + CopyBytes(&slotted_constants[G_ShaderConst_TweakF32], &tweak_f32, sizeof(tweak_f32)); } // Rasterizer state @@ -2759,7 +3170,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) } } - if (dst->flags & G_ResourceFlag_AllowDepthStencil) + if (dst->flags & G_MemoryFlag_AllowTextureDepthStencil) { // Depth-stencil textures must have src box & dst offset set to 0 // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12graphicscommandlist-copytextureregion @@ -3188,13 +3599,13 @@ void G_Sync(G_CommandListHandle cl_handle) cmd->kind = G_D12_CmdKind_Barrier; } -void G_SyncLayout(G_CommandListHandle cl_handle, G_ResourceHandle resource_handle, G_Layout layout) +void G_SyncLayout(G_CommandListHandle cl_handle, G_TextureRef texture, G_TextureLayout layout) { G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); G_D12_Cmd *cmd = G_D12_PushCmd(cl); cmd->kind = G_D12_CmdKind_Barrier; - cmd->barrier.resource = G_D12_ResourceFromHandle(resource_handle); - cmd->barrier.acquire = layout == G_Layout_Exclusive; + cmd->barrier.resource = G_ResourceFromRef(texture); + cmd->barrier.acquire = layout == G_TextureLayout_Family; } //- Zone @@ -3751,7 +4162,7 @@ G_ResourceHandle G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Forma Panic(Lit("Failed to retrieve swapchain buffer")); } ZeroStruct(backbuffer); - backbuffer->flags = G_ResourceFlag_AllowRenderTarget; + backbuffer->flags = G_MemoryFlag_AllowRenderTarget; backbuffer->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, 1) + 1; ID3D12Resource_GetDesc(d3d_resource, (D3D12_RESOURCE_DESC *)&backbuffer->d3d_desc); diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.h b/src/gpu/gpu_dx12/gpu_dx12_core.h index 54a016e0..2ccd501f 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.h +++ b/src/gpu/gpu_dx12/gpu_dx12_core.h @@ -21,7 +21,7 @@ ((G_D12_FrameLatency != 0) * DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT) \ ) -#define G_D12_MaxCbvSrvUavDescriptors (1024 * 128) +#define G_D12_MaxCbvSrvUavDescriptors (1024 * 512) #define G_D12_MaxSamplerDescriptors (1024 * 1) #define G_D12_MaxRtvDescriptors (1024 * 64) @@ -72,7 +72,7 @@ Struct(G_D12_Resource) G_D12_Resource *next; G_D12_Resource *prev; - G_ResourceFlag flags; + G_MemoryFlag flags; u64 uid; // D3D12 resource @@ -111,6 +111,8 @@ Struct(G_D12_ResourceList) //////////////////////////////////////////////////////////// //~ Descriptor types +#define G_D12_MaxDescriptorBundleCount (G_MaxMips * 2) + Enum(G_D12_DescriptorHeapKind) { G_D12_DescriptorHeapKind_CbvSrvUav, @@ -122,18 +124,34 @@ Enum(G_D12_DescriptorHeapKind) Struct(G_D12_DescriptorHeap) { - Arena *descriptors_arena; G_D12_DescriptorHeapKind kind; + Mutex mutex; + Arena *arena; + Arena *descriptor_indices_arena; + D3D12_DESCRIPTOR_HEAP_TYPE type; - u32 per_batch_count; - u32 descriptor_size; ID3D12DescriptorHeap *d3d_heap; D3D12_CPU_DESCRIPTOR_HANDLE start_handle; - Mutex mutex; - struct G_D12_Descriptor *first_free; - u32 max_count; + u32 stride; + u32 count; + u32 capacity; + + struct G_D12_Descriptor *first_free_descriptor_by_count[G_D12_MaxDescriptorBundleCount]; +}; + +Struct(G_D12_DescriptorDesc) +{ + G_D12_Resource *resource; + + u32 bundle_count; + + u64 buffer_element_offset; + u64 buffer_element_count; + u64 buffer_element_stride; + + RngI32 texture_mips; }; Struct(G_D12_Descriptor) @@ -141,13 +159,18 @@ Struct(G_D12_Descriptor) G_D12_Descriptor *next; G_D12_Descriptor *prev; - struct G_D12_Arena *gpu_arena; - G_QueueKind completion_queue_kind; - i64 completion_queue_target; + // Static data G_D12_DescriptorHeap *heap; - D3D12_CPU_DESCRIPTOR_HANDLE first_handle; - u32 index; + u32 base_index; + + // Per-lifetime data + + struct G_D12_Arena *gpu_arena; + G_D12_DescriptorDesc desc; + + G_QueueKind completion_queue_kind; + i64 completion_queue_target; }; Struct(G_D12_DescriptorList) @@ -195,7 +218,7 @@ Struct(G_D12_StagingRing) G_D12_Arena *gpu_arena; u64 size; - G_D12_Resource *resource; + G_BufferRef buffer; u8 *base; struct G_D12_StagingRegionNode *head_region_node; @@ -240,9 +263,8 @@ Struct(G_D12_Queue) // Global resources u64 print_buffer_size; - G_ResourceHandle print_buffer; - G_ResourceHandle print_readback_buffer; - G_ByteAddressBufferRef print_buffer_ref; + G_BufferRef print_buffer; + G_BufferRef print_readback_buffer; // Raw command lists struct G_D12_RawCommandList *first_committed_cl; @@ -719,8 +741,10 @@ void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena); //////////////////////////////////////////////////////////// //~ Descriptor -G_D12_Descriptor *G_D12_DescriptorFromIndex(G_D12_DescriptorHeapKind heap_kind, u32 index); -G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorHeapKind heap_kind); +// G_D12_Descriptor *G_D12_DescriptorFromIndex(G_D12_DescriptorHeapKind heap_kind, u32 index); +// G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorHeapKind heap_kind); + +G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorDesc desc, G_D12_DescriptorHeapKind heap_kind); //////////////////////////////////////////////////////////// //~ Command helpers diff --git a/src/gpu/gpu_shared.cgh b/src/gpu/gpu_shared.cgh index 473748d4..fe876934 100644 --- a/src/gpu/gpu_shared.cgh +++ b/src/gpu/gpu_shared.cgh @@ -4,9 +4,7 @@ typedef u32 G_BaseDescriptorIndex; Struct(G_BufferRef) { G_BaseDescriptorIndex v; }; -Struct(G_Texture1DRef) { G_BaseDescriptorIndex v; }; -Struct(G_Texture2DRef) { G_BaseDescriptorIndex v; }; -Struct(G_Texture3DRef) { G_BaseDescriptorIndex v; }; +Struct(G_TextureRef) { G_BaseDescriptorIndex v; }; Struct(G_SamplerRef) { G_BaseDescriptorIndex v; }; #define G_IsRefNil(r) ((r).v == 0)