gpu refactor progress

This commit is contained in:
jacob 2025-09-17 00:41:22 -05:00
parent 34294754c7
commit f231c8322b
6 changed files with 56 additions and 43 deletions

View File

@ -588,14 +588,14 @@ ForceInline i64 Atomic64FetchAdd (Atomic64 *x, i64 a) { return
#if LanguageIsC #if LanguageIsC
Struct(TicketMutex) Struct(TicketMutex)
{ {
Atomic16Padded ticket; Atomic64Padded ticket;
Atomic16Padded serving; Atomic64Padded serving;
}; };
ForceInline void LockTicketMutex(TicketMutex *tm) ForceInline void LockTicketMutex(TicketMutex *tm)
{ {
u16 ticket = Atomic16FetchAdd(&tm->ticket.v, 1); i64 ticket = Atomic64FetchAdd(&tm->ticket.v, 1);
while (Atomic16Fetch(&tm->serving.v) != ticket) while (Atomic64Fetch(&tm->serving.v) != ticket)
{ {
_mm_pause(); _mm_pause();
} }
@ -603,7 +603,7 @@ ForceInline void LockTicketMutex(TicketMutex *tm)
ForceInline void UnlockTicketMutex(TicketMutex *tm) ForceInline void UnlockTicketMutex(TicketMutex *tm)
{ {
Atomic16FetchAdd(&tm->serving.v, 1); Atomic64FetchAdd(&tm->serving.v, 1);
} }
#endif #endif

View File

@ -174,9 +174,8 @@ Enum(GPU_ResourceKind)
Enum(GPU_ResourceFlag) Enum(GPU_ResourceFlag)
{ {
GPU_ResourceFlag_None = 0, GPU_ResourceFlag_None = 0,
GPU_ResourceFlag_AllowSrv = (1 << 0), GPU_ResourceFlag_AllowUav = (1 << 0),
GPU_ResourceFlag_AllowUav = (1 << 1), GPU_ResourceFlag_AllowRtv = (1 << 1),
GPU_ResourceFlag_AllowRtv = (1 << 2),
}; };
Enum(GPU_HeapKind) Enum(GPU_HeapKind)
@ -215,7 +214,7 @@ Struct(GPU_ResourceDesc)
struct struct
{ {
GPU_HeapKind heap_kind; GPU_HeapKind heap_kind;
u32 element_capacity; u32 size;
u32 element_count; u32 element_count;
u32 element_size; u32 element_size;
} buffer; } buffer;

View File

@ -380,10 +380,11 @@ JobDef(GPU_D12_StartQueueSync, _, __)
{ {
GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_SharedState *g = &GPU_D12_shared_state;
HANDLE queue_fences_events[GPU_NumQueues] = ZI; HANDLE queue_fences_events[GPU_NumQueues] = ZI;
u64 queue_fences_seen[GPU_NumQueues] = ZI; i64 queue_fences_seen[GPU_NumQueues] = ZI;
for (i32 i = 0; i < countof(queue_fences_events); ++i) for (i32 i = 0; i < countof(queue_fences_events); ++i)
{ {
queue_fences_events[i] = CreateEvent(0, 0, 1, 0); queue_fences_events[i] = CreateEvent(0, 0, 1, 0);
queue_fences_seen[i] = -1;
} }
for (;;) for (;;)
{ {
@ -391,8 +392,8 @@ JobDef(GPU_D12_StartQueueSync, _, __)
for (GPU_QueueKind queue_kind = 0; queue_kind < GPU_NumQueues; ++queue_kind) for (GPU_QueueKind queue_kind = 0; queue_kind < GPU_NumQueues; ++queue_kind)
{ {
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
u64 last_seen = queue_fences_seen[queue_kind]; i64 last_seen = queue_fences_seen[queue_kind];
u64 completed = ID3D12Fence_GetCompletedValue(queue->submit_fence); i64 completed = ID3D12Fence_GetCompletedValue(queue->submit_fence);
if (completed > last_seen) if (completed > last_seen)
{ {
SetFence(&queue->sync_fence, completed); SetFence(&queue->sync_fence, completed);
@ -442,10 +443,15 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
{ {
GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_SharedState *g = &GPU_D12_shared_state;
GPU_D12_Resource *r = 0; GPU_D12_Resource *r = 0;
u64 hash = GPU_D12_ReuseHashFromResourceDesc(desc);
/* Grab reusable */ if (desc.kind == GPU_ResourceKind_Buffer)
{
desc.buffer.size = MaxU64(AlignU64Pow2(desc.buffer.size), Kibi(64));
}
u64 reuse_hash = GPU_D12_ReuseHashFromResourceDesc(desc);
#if 0 #if 0
/* Grab reusable */
{ {
u64 bin_index = hash % countof(g->reuse_bins); u64 bin_index = hash % countof(g->reuse_bins);
GPU_D12_ReuseBin *bin = &g->reuse_bins[bin_index]; GPU_D12_ReuseBin *bin = &g->reuse_bins[bin_index];
@ -493,7 +499,7 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
if (r->reuse_hash == 0) if (r->reuse_hash == 0)
{ {
r->reuse_hash = hash; r->reuse_hash = reuse_hash;
switch (desc.kind) switch (desc.kind)
{ {
@ -511,19 +517,16 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
d3d_desc.Format = DXGI_FORMAT_UNKNOWN; d3d_desc.Format = DXGI_FORMAT_UNKNOWN;
d3d_desc.Alignment = 0; d3d_desc.Alignment = 0;
d3d_desc.Width = desc.buffer.element_capacity * desc.buffer.element_size; d3d_desc.Width = desc.buffer.size;
d3d_desc.Height = 1; d3d_desc.Height = 1;
d3d_desc.DepthOrArraySize = 1; d3d_desc.DepthOrArraySize = 1;
d3d_desc.MipLevels = 1; d3d_desc.MipLevels = 1;
d3d_desc.SampleDesc.Count = 1; d3d_desc.SampleDesc.Count = 1;
d3d_desc.SampleDesc.Quality = 0; d3d_desc.SampleDesc.Quality = 0;
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE * !(desc.flags & GPU_ResourceFlag_AllowSrv);
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * !!(desc.flags & GPU_ResourceFlag_AllowUav); d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * !!(desc.flags & GPU_ResourceFlag_AllowUav);
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * !!(desc.flags & GPU_ResourceFlag_AllowRtv); d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * !!(desc.flags & GPU_ResourceFlag_AllowRtv);
D3D12_RESOURCE_STATES initial_state = desc.buffer.heap_kind == GPU_HeapKind_Upload ? D3D12_RESOURCE_STATE_GENERIC_READ : D3D12_RESOURCE_STATE_COPY_DEST; D3D12_RESOURCE_STATES initial_state = desc.buffer.heap_kind == GPU_HeapKind_Upload ? D3D12_RESOURCE_STATE_GENERIC_READ : D3D12_RESOURCE_STATE_COPY_DEST;
D3D12_CLEAR_VALUE clear_value = { .Format = d3d_desc.Format, .Color = { 0 } }; HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, initial_state, 0, &IID_ID3D12Resource, (void **)&r->raw);
D3D12_CLEAR_VALUE *clear_value_ptr = d3d_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET ? &clear_value : 0;
HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, initial_state, clear_value_ptr, &IID_ID3D12Resource, (void **)&r->raw);
if (FAILED(hr)) if (FAILED(hr))
{ {
/* TODO: Don't panic */ /* TODO: Don't panic */
@ -555,7 +558,6 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
d3d_desc.MipLevels = 1; d3d_desc.MipLevels = 1;
d3d_desc.SampleDesc.Count = 1; d3d_desc.SampleDesc.Count = 1;
d3d_desc.SampleDesc.Quality = 0; d3d_desc.SampleDesc.Quality = 0;
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE * !(desc.flags & GPU_ResourceFlag_AllowSrv);
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * !!(desc.flags & GPU_ResourceFlag_AllowUav); d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * !!(desc.flags & GPU_ResourceFlag_AllowUav);
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * !!(desc.flags & GPU_ResourceFlag_AllowRtv); d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * !!(desc.flags & GPU_ResourceFlag_AllowRtv);
D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST; D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST;
@ -758,7 +760,7 @@ u64 GPU_EndCommandList(GPU_CommandList *gpu_cl)
Assert(indices->desc.buffer.element_size == 4); Assert(indices->desc.buffer.element_size == 4);
ibv.Format = GPU_D12_DxgiFormatFromGpuFormat(DXGI_FORMAT_R32_UINT); ibv.Format = GPU_D12_DxgiFormatFromGpuFormat(DXGI_FORMAT_R32_UINT);
} }
ibv.SizeInBytes = indices->desc.buffer.element_size * indices->desc.buffer.element_capacity; ibv.SizeInBytes = indices->desc.buffer.element_size * indices->desc.buffer.element_count;
indices_count = indices->desc.buffer.element_count; indices_count = indices->desc.buffer.element_count;
} }
} }
@ -978,17 +980,28 @@ void GPU_CopyToMapped(GPU_Mapped *mapped, String data)
{ {
D3D12_RANGE read_range = ZI; D3D12_RANGE read_range = ZI;
u8 *dst = (u8 *)mapped->mem + placed_footprint.Offset; u8 *dst_base = (u8 *)mapped->mem + placed_footprint.Offset;
u8 *src = data.text; u8 *src_base = data.text;
u32 z_size = upload_row_size * upload_num_rows; u32 z_size = upload_row_size * upload_num_rows;
for (u32 z = 0; z < desc.DepthOrArraySize; ++z) b32 src_overflow = 0;
for (u32 z = 0; !src_overflow && z < desc.DepthOrArraySize; ++z)
{ {
u32 z_offset = z * z_size; u32 z_offset = z * z_size;
for (u32 y = 0; y < upload_num_rows; ++y) for (u32 y = 0; !src_overflow && y < upload_num_rows; ++y)
{ {
CopyBytes(dst + y * footprint.RowPitch + z_offset, (u8 *)src + y * upload_row_size + z_offset, upload_row_size); u8 *dst = dst_base + y * footprint.RowPitch + z_offset;
u8 *src = src_base + y * upload_row_size + z_offset;
i64 cpy_size = MinI64(upload_row_size, data.len - (src - src_base));
if (cpy_size > 0)
{
CopyBytes(dst, src, cpy_size);
}
else
{
src_overflow = 1;
}
} }
} }
} }

View File

@ -394,7 +394,7 @@ GPU_Resource *AcquireGbuffer(GPU_Format format, Vec2I32 size)
__prof; __prof;
GPU_ResourceDesc desc = ZI; GPU_ResourceDesc desc = ZI;
desc.kind = GPU_ResourceKind_Texture2D; desc.kind = GPU_ResourceKind_Texture2D;
desc.flags = GPU_ResourceFlag_AllowSrv | GPU_ResourceFlag_AllowUav | GPU_ResourceFlag_AllowRtv; desc.flags = GPU_ResourceFlag_AllowUav | GPU_ResourceFlag_AllowRtv;
desc.texture.format = format; desc.texture.format = format;
desc.texture.size = VEC3I32(size.x, size.y, 1); desc.texture.size = VEC3I32(size.x, size.y, 1);
desc.texture.mip_levels = 1; desc.texture.mip_levels = 1;
@ -403,15 +403,15 @@ GPU_Resource *AcquireGbuffer(GPU_Format format, Vec2I32 size)
//- Upload buffer //- Upload buffer
GPU_Resource *AcquireUploadBuffer(u32 element_count, u32 element_size, void *src) GPU_Resource *AcquireUploadBuffer_(void *src, u32 element_size, u32 element_count)
{ {
__prof; __prof;
GPU_ResourceDesc desc = ZI; GPU_ResourceDesc desc = ZI;
desc.kind = GPU_ResourceKind_Buffer; desc.kind = GPU_ResourceKind_Buffer;
desc.flags = GPU_ResourceFlag_None; desc.flags = GPU_ResourceFlag_None;
desc.buffer.heap_kind = GPU_HeapKind_Upload; desc.buffer.heap_kind = GPU_HeapKind_Upload;
desc.buffer.size = element_size * element_count;
desc.buffer.element_count = element_count; desc.buffer.element_count = element_count;
desc.buffer.element_capacity = element_count;
desc.buffer.element_size = element_size; desc.buffer.element_size = element_size;
GPU_Resource *r = GPU_AcquireResource(desc); GPU_Resource *r = GPU_AcquireResource(desc);
{ {
@ -423,11 +423,11 @@ GPU_Resource *AcquireUploadBuffer(u32 element_count, u32 element_size, void *src
return r; return r;
} }
GPU_Resource *AcquireUploadBufferFromArena(u32 element_count, Arena *arena) GPU_Resource *AcquireUploadBufferFromArena_(Arena *arena, u32 element_size)
{ {
__prof; __prof;
u64 element_size = element_count > 0 ? arena->pos / element_count : 0; u64 element_count = arena->pos / element_size;
GPU_Resource *r = AcquireUploadBuffer(element_count, element_size, (void *)ArenaBase(arena)); GPU_Resource *r = AcquireUploadBuffer_(ArenaBase(arena), element_size, element_count);
return r; return r;
} }
@ -2198,12 +2198,12 @@ void UpdateUser(P_Window *window)
/* Acquire transfer buffers */ /* Acquire transfer buffers */
/* TODO: Make these static */ /* TODO: Make these static */
u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 }; u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 };
GPU_Resource *quad_index_buffer = AcquireUploadBuffer(countof(quad_indices), sizeof(*quad_indices), quad_indices); GPU_Resource *quad_index_buffer = AcquireUploadBuffer(quad_indices, u16, countof(quad_indices));
GPU_Resource *material_instance_buffer = AcquireUploadBufferFromArena(g->material_instances_count, g->material_instances_arena); GPU_Resource *material_instance_buffer = AcquireUploadBufferFromArena(g->material_instances_arena, MaterialInstance);
GPU_Resource *ui_rect_instance_buffer = AcquireUploadBufferFromArena(g->ui_rect_instances_count, g->ui_rect_instances_arena); GPU_Resource *ui_rect_instance_buffer = AcquireUploadBufferFromArena(g->ui_rect_instances_arena, UiRectInstance);
GPU_Resource *ui_shape_verts_buffer = AcquireUploadBufferFromArena(g->ui_shape_verts_count, g->ui_shape_verts_arena); GPU_Resource *ui_shape_verts_buffer = AcquireUploadBufferFromArena(g->ui_shape_verts_arena, UiShapeVert);
GPU_Resource *ui_shape_indices_buffer = AcquireUploadBufferFromArena(g->ui_shape_indices_count, g->ui_shape_indices_arena); GPU_Resource *ui_shape_indices_buffer = AcquireUploadBufferFromArena(g->ui_shape_indices_arena, u32);
GPU_Resource *grids_buffer = AcquireUploadBufferFromArena(g->grids_count, g->grids_arena); GPU_Resource *grids_buffer = AcquireUploadBufferFromArena(g->grids_arena, MaterialGrid);
GPU_CommandList *cl = GPU_BeginCommandList(gpu_render_queue); GPU_CommandList *cl = GPU_BeginCommandList(gpu_render_queue);
{ {

View File

@ -297,9 +297,12 @@ void DrawDebugConsole(i32 level, b32 minimized);
//////////////////////////////// ////////////////////////////////
//~ Gpu buffer helpers //~ Gpu buffer helpers
#define AcquireUploadBuffer(src, type, count) AcquireUploadBuffer_((src), sizeof(type), (count))
#define AcquireUploadBufferFromArena(arena, type) AcquireUploadBufferFromArena_((arena), sizeof(type))
GPU_Resource *AcquireGbuffer(GPU_Format format, Vec2I32 size); GPU_Resource *AcquireGbuffer(GPU_Format format, Vec2I32 size);
GPU_Resource *AcquireUploadBuffer(u32 element_count, u32 element_size, void *src); GPU_Resource *AcquireUploadBuffer_(void *src, u32 element_size, u32 element_count);
GPU_Resource *AcquireUploadBufferFromArena(u32 element_count, Arena *arena); GPU_Resource *AcquireUploadBufferFromArena_(Arena *arena, u32 element_size);
JobDecl(DelayReleaseGpuResources, { Fence *begin_fence; u64 begin_fence_target; GPU_Resource **resources; GPU_ReleaseFlag flags; }); JobDecl(DelayReleaseGpuResources, { Fence *begin_fence; u64 begin_fence_target; GPU_Resource **resources; GPU_ReleaseFlag flags; });
//////////////////////////////// ////////////////////////////////

View File

@ -24,9 +24,7 @@ JobDef(S_LoadTexture, sig, _)
{ {
GPU_ResourceDesc desc = ZI; GPU_ResourceDesc desc = ZI;
desc.kind = GPU_ResourceKind_Texture2D; desc.kind = GPU_ResourceKind_Texture2D;
desc.flags = GPU_ResourceFlag_AllowSrv;
desc.flags = GPU_ResourceFlag_AllowUav; desc.flags = GPU_ResourceFlag_AllowUav;
desc.flags = GPU_ResourceFlag_AllowRtv;
desc.texture.format = GPU_Format_R8G8B8A8_Unorm_Srgb; desc.texture.format = GPU_Format_R8G8B8A8_Unorm_Srgb;
desc.texture.size = VEC3I32(decoded.width, decoded.height, 1); desc.texture.size = VEC3I32(decoded.width, decoded.height, 1);
desc.texture.mip_levels = 1; desc.texture.mip_levels = 1;