From f231c8322bb44e8ca79d6b371d368b75561a365d Mon Sep 17 00:00:00 2001 From: jacob Date: Wed, 17 Sep 2025 00:41:22 -0500 Subject: [PATCH] gpu refactor progress --- src/base/base.h | 10 ++++---- src/gpu/gpu.h | 7 +++--- src/gpu/gpu_dx12/gpu_dx12.c | 49 +++++++++++++++++++++++-------------- src/pp/pp.c | 24 +++++++++--------- src/pp/pp.h | 7 ++++-- src/sprite/sprite.c | 2 -- 6 files changed, 56 insertions(+), 43 deletions(-) diff --git a/src/base/base.h b/src/base/base.h index 5eb1883b..8c0b2c57 100644 --- a/src/base/base.h +++ b/src/base/base.h @@ -588,14 +588,14 @@ ForceInline i64 Atomic64FetchAdd (Atomic64 *x, i64 a) { return #if LanguageIsC Struct(TicketMutex) { - Atomic16Padded ticket; - Atomic16Padded serving; + Atomic64Padded ticket; + Atomic64Padded serving; }; ForceInline void LockTicketMutex(TicketMutex *tm) { - u16 ticket = Atomic16FetchAdd(&tm->ticket.v, 1); - while (Atomic16Fetch(&tm->serving.v) != ticket) + i64 ticket = Atomic64FetchAdd(&tm->ticket.v, 1); + while (Atomic64Fetch(&tm->serving.v) != ticket) { _mm_pause(); } @@ -603,7 +603,7 @@ ForceInline void LockTicketMutex(TicketMutex *tm) ForceInline void UnlockTicketMutex(TicketMutex *tm) { - Atomic16FetchAdd(&tm->serving.v, 1); + Atomic64FetchAdd(&tm->serving.v, 1); } #endif diff --git a/src/gpu/gpu.h b/src/gpu/gpu.h index b0e0e53e..806a9b58 100644 --- a/src/gpu/gpu.h +++ b/src/gpu/gpu.h @@ -174,9 +174,8 @@ Enum(GPU_ResourceKind) Enum(GPU_ResourceFlag) { GPU_ResourceFlag_None = 0, - GPU_ResourceFlag_AllowSrv = (1 << 0), - GPU_ResourceFlag_AllowUav = (1 << 1), - GPU_ResourceFlag_AllowRtv = (1 << 2), + GPU_ResourceFlag_AllowUav = (1 << 0), + GPU_ResourceFlag_AllowRtv = (1 << 1), }; Enum(GPU_HeapKind) @@ -215,7 +214,7 @@ Struct(GPU_ResourceDesc) struct { GPU_HeapKind heap_kind; - u32 element_capacity; + u32 size; u32 element_count; u32 element_size; } buffer; diff --git a/src/gpu/gpu_dx12/gpu_dx12.c b/src/gpu/gpu_dx12/gpu_dx12.c index 8b45eda1..ad9603f3 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.c +++ b/src/gpu/gpu_dx12/gpu_dx12.c @@ -380,10 +380,11 @@ JobDef(GPU_D12_StartQueueSync, _, __) { GPU_D12_SharedState *g = &GPU_D12_shared_state; HANDLE queue_fences_events[GPU_NumQueues] = ZI; - u64 queue_fences_seen[GPU_NumQueues] = ZI; + i64 queue_fences_seen[GPU_NumQueues] = ZI; for (i32 i = 0; i < countof(queue_fences_events); ++i) { queue_fences_events[i] = CreateEvent(0, 0, 1, 0); + queue_fences_seen[i] = -1; } for (;;) { @@ -391,8 +392,8 @@ JobDef(GPU_D12_StartQueueSync, _, __) for (GPU_QueueKind queue_kind = 0; queue_kind < GPU_NumQueues; ++queue_kind) { GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); - u64 last_seen = queue_fences_seen[queue_kind]; - u64 completed = ID3D12Fence_GetCompletedValue(queue->submit_fence); + i64 last_seen = queue_fences_seen[queue_kind]; + i64 completed = ID3D12Fence_GetCompletedValue(queue->submit_fence); if (completed > last_seen) { SetFence(&queue->sync_fence, completed); @@ -442,10 +443,15 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_Resource *r = 0; - u64 hash = GPU_D12_ReuseHashFromResourceDesc(desc); - /* Grab reusable */ + if (desc.kind == GPU_ResourceKind_Buffer) + { + desc.buffer.size = MaxU64(AlignU64Pow2(desc.buffer.size), Kibi(64)); + } + + u64 reuse_hash = GPU_D12_ReuseHashFromResourceDesc(desc); #if 0 + /* Grab reusable */ { u64 bin_index = hash % countof(g->reuse_bins); GPU_D12_ReuseBin *bin = &g->reuse_bins[bin_index]; @@ -493,7 +499,7 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc) if (r->reuse_hash == 0) { - r->reuse_hash = hash; + r->reuse_hash = reuse_hash; switch (desc.kind) { @@ -511,19 +517,16 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc) d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; d3d_desc.Format = DXGI_FORMAT_UNKNOWN; d3d_desc.Alignment = 0; - d3d_desc.Width = desc.buffer.element_capacity * desc.buffer.element_size; + d3d_desc.Width = desc.buffer.size; d3d_desc.Height = 1; d3d_desc.DepthOrArraySize = 1; d3d_desc.MipLevels = 1; d3d_desc.SampleDesc.Count = 1; d3d_desc.SampleDesc.Quality = 0; - d3d_desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE * !(desc.flags & GPU_ResourceFlag_AllowSrv); d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * !!(desc.flags & GPU_ResourceFlag_AllowUav); d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * !!(desc.flags & GPU_ResourceFlag_AllowRtv); D3D12_RESOURCE_STATES initial_state = desc.buffer.heap_kind == GPU_HeapKind_Upload ? D3D12_RESOURCE_STATE_GENERIC_READ : D3D12_RESOURCE_STATE_COPY_DEST; - D3D12_CLEAR_VALUE clear_value = { .Format = d3d_desc.Format, .Color = { 0 } }; - D3D12_CLEAR_VALUE *clear_value_ptr = d3d_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET ? &clear_value : 0; - HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, initial_state, clear_value_ptr, &IID_ID3D12Resource, (void **)&r->raw); + HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, initial_state, 0, &IID_ID3D12Resource, (void **)&r->raw); if (FAILED(hr)) { /* TODO: Don't panic */ @@ -555,7 +558,6 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc) d3d_desc.MipLevels = 1; d3d_desc.SampleDesc.Count = 1; d3d_desc.SampleDesc.Quality = 0; - d3d_desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE * !(desc.flags & GPU_ResourceFlag_AllowSrv); d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * !!(desc.flags & GPU_ResourceFlag_AllowUav); d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * !!(desc.flags & GPU_ResourceFlag_AllowRtv); D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST; @@ -758,7 +760,7 @@ u64 GPU_EndCommandList(GPU_CommandList *gpu_cl) Assert(indices->desc.buffer.element_size == 4); ibv.Format = GPU_D12_DxgiFormatFromGpuFormat(DXGI_FORMAT_R32_UINT); } - ibv.SizeInBytes = indices->desc.buffer.element_size * indices->desc.buffer.element_capacity; + ibv.SizeInBytes = indices->desc.buffer.element_size * indices->desc.buffer.element_count; indices_count = indices->desc.buffer.element_count; } } @@ -978,17 +980,28 @@ void GPU_CopyToMapped(GPU_Mapped *mapped, String data) { D3D12_RANGE read_range = ZI; - u8 *dst = (u8 *)mapped->mem + placed_footprint.Offset; - u8 *src = data.text; + u8 *dst_base = (u8 *)mapped->mem + placed_footprint.Offset; + u8 *src_base = data.text; u32 z_size = upload_row_size * upload_num_rows; - for (u32 z = 0; z < desc.DepthOrArraySize; ++z) + b32 src_overflow = 0; + for (u32 z = 0; !src_overflow && z < desc.DepthOrArraySize; ++z) { u32 z_offset = z * z_size; - for (u32 y = 0; y < upload_num_rows; ++y) + for (u32 y = 0; !src_overflow && y < upload_num_rows; ++y) { - CopyBytes(dst + y * footprint.RowPitch + z_offset, (u8 *)src + y * upload_row_size + z_offset, upload_row_size); + u8 *dst = dst_base + y * footprint.RowPitch + z_offset; + u8 *src = src_base + y * upload_row_size + z_offset; + i64 cpy_size = MinI64(upload_row_size, data.len - (src - src_base)); + if (cpy_size > 0) + { + CopyBytes(dst, src, cpy_size); + } + else + { + src_overflow = 1; + } } } } diff --git a/src/pp/pp.c b/src/pp/pp.c index 455df07f..c59c35a9 100644 --- a/src/pp/pp.c +++ b/src/pp/pp.c @@ -394,7 +394,7 @@ GPU_Resource *AcquireGbuffer(GPU_Format format, Vec2I32 size) __prof; GPU_ResourceDesc desc = ZI; desc.kind = GPU_ResourceKind_Texture2D; - desc.flags = GPU_ResourceFlag_AllowSrv | GPU_ResourceFlag_AllowUav | GPU_ResourceFlag_AllowRtv; + desc.flags = GPU_ResourceFlag_AllowUav | GPU_ResourceFlag_AllowRtv; desc.texture.format = format; desc.texture.size = VEC3I32(size.x, size.y, 1); desc.texture.mip_levels = 1; @@ -403,15 +403,15 @@ GPU_Resource *AcquireGbuffer(GPU_Format format, Vec2I32 size) //- Upload buffer -GPU_Resource *AcquireUploadBuffer(u32 element_count, u32 element_size, void *src) +GPU_Resource *AcquireUploadBuffer_(void *src, u32 element_size, u32 element_count) { __prof; GPU_ResourceDesc desc = ZI; desc.kind = GPU_ResourceKind_Buffer; desc.flags = GPU_ResourceFlag_None; desc.buffer.heap_kind = GPU_HeapKind_Upload; + desc.buffer.size = element_size * element_count; desc.buffer.element_count = element_count; - desc.buffer.element_capacity = element_count; desc.buffer.element_size = element_size; GPU_Resource *r = GPU_AcquireResource(desc); { @@ -423,11 +423,11 @@ GPU_Resource *AcquireUploadBuffer(u32 element_count, u32 element_size, void *src return r; } -GPU_Resource *AcquireUploadBufferFromArena(u32 element_count, Arena *arena) +GPU_Resource *AcquireUploadBufferFromArena_(Arena *arena, u32 element_size) { __prof; - u64 element_size = element_count > 0 ? arena->pos / element_count : 0; - GPU_Resource *r = AcquireUploadBuffer(element_count, element_size, (void *)ArenaBase(arena)); + u64 element_count = arena->pos / element_size; + GPU_Resource *r = AcquireUploadBuffer_(ArenaBase(arena), element_size, element_count); return r; } @@ -2198,12 +2198,12 @@ void UpdateUser(P_Window *window) /* Acquire transfer buffers */ /* TODO: Make these static */ u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 }; - GPU_Resource *quad_index_buffer = AcquireUploadBuffer(countof(quad_indices), sizeof(*quad_indices), quad_indices); - GPU_Resource *material_instance_buffer = AcquireUploadBufferFromArena(g->material_instances_count, g->material_instances_arena); - GPU_Resource *ui_rect_instance_buffer = AcquireUploadBufferFromArena(g->ui_rect_instances_count, g->ui_rect_instances_arena); - GPU_Resource *ui_shape_verts_buffer = AcquireUploadBufferFromArena(g->ui_shape_verts_count, g->ui_shape_verts_arena); - GPU_Resource *ui_shape_indices_buffer = AcquireUploadBufferFromArena(g->ui_shape_indices_count, g->ui_shape_indices_arena); - GPU_Resource *grids_buffer = AcquireUploadBufferFromArena(g->grids_count, g->grids_arena); + GPU_Resource *quad_index_buffer = AcquireUploadBuffer(quad_indices, u16, countof(quad_indices)); + GPU_Resource *material_instance_buffer = AcquireUploadBufferFromArena(g->material_instances_arena, MaterialInstance); + GPU_Resource *ui_rect_instance_buffer = AcquireUploadBufferFromArena(g->ui_rect_instances_arena, UiRectInstance); + GPU_Resource *ui_shape_verts_buffer = AcquireUploadBufferFromArena(g->ui_shape_verts_arena, UiShapeVert); + GPU_Resource *ui_shape_indices_buffer = AcquireUploadBufferFromArena(g->ui_shape_indices_arena, u32); + GPU_Resource *grids_buffer = AcquireUploadBufferFromArena(g->grids_arena, MaterialGrid); GPU_CommandList *cl = GPU_BeginCommandList(gpu_render_queue); { diff --git a/src/pp/pp.h b/src/pp/pp.h index 6fde0995..13e12433 100644 --- a/src/pp/pp.h +++ b/src/pp/pp.h @@ -297,9 +297,12 @@ void DrawDebugConsole(i32 level, b32 minimized); //////////////////////////////// //~ Gpu buffer helpers +#define AcquireUploadBuffer(src, type, count) AcquireUploadBuffer_((src), sizeof(type), (count)) +#define AcquireUploadBufferFromArena(arena, type) AcquireUploadBufferFromArena_((arena), sizeof(type)) + GPU_Resource *AcquireGbuffer(GPU_Format format, Vec2I32 size); -GPU_Resource *AcquireUploadBuffer(u32 element_count, u32 element_size, void *src); -GPU_Resource *AcquireUploadBufferFromArena(u32 element_count, Arena *arena); +GPU_Resource *AcquireUploadBuffer_(void *src, u32 element_size, u32 element_count); +GPU_Resource *AcquireUploadBufferFromArena_(Arena *arena, u32 element_size); JobDecl(DelayReleaseGpuResources, { Fence *begin_fence; u64 begin_fence_target; GPU_Resource **resources; GPU_ReleaseFlag flags; }); //////////////////////////////// diff --git a/src/sprite/sprite.c b/src/sprite/sprite.c index c95ca2db..9fe43da6 100644 --- a/src/sprite/sprite.c +++ b/src/sprite/sprite.c @@ -24,9 +24,7 @@ JobDef(S_LoadTexture, sig, _) { GPU_ResourceDesc desc = ZI; desc.kind = GPU_ResourceKind_Texture2D; - desc.flags = GPU_ResourceFlag_AllowSrv; desc.flags = GPU_ResourceFlag_AllowUav; - desc.flags = GPU_ResourceFlag_AllowRtv; desc.texture.format = GPU_Format_R8G8B8A8_Unorm_Srgb; desc.texture.size = VEC3I32(decoded.width, decoded.height, 1); desc.texture.mip_levels = 1;