From 7c9f5f7e0632de8bdd80f57399468b695122655c Mon Sep 17 00:00:00 2001 From: jacob Date: Sun, 23 Nov 2025 18:00:26 -0600 Subject: [PATCH] gpu fence op --- src/base/base_controller.c | 2 +- src/base/base_controller.h | 2 +- src/gpu/gpu_core.h | 84 ++++++++++++++---------- src/gpu/gpu_dx12/gpu_dx12.c | 89 +++++++++++++++----------- src/gpu/gpu_dx12/gpu_dx12.h | 5 +- src/pp/pp_vis/pp_vis_core.h | 4 +- src/proto/proto.c | 18 +++++- src/proto/proto.lay | 1 + src/sprite/sprite.c | 23 +++---- src/sprite/sprite.h | 2 +- src/window/window_win32/window_win32.c | 2 +- 11 files changed, 141 insertions(+), 91 deletions(-) diff --git a/src/base/base_controller.c b/src/base/base_controller.c index 0baca395..0eefc840 100644 --- a/src/base/base_controller.c +++ b/src/base/base_controller.c @@ -11,7 +11,7 @@ String StringFromButton(Button button) [Button_M5] = CompLit("Mouse 5"), [Button_MWheelUp] = CompLit("Wheel Up"), [Button_MWheelDown] = CompLit("Wheel Down"), - [Button_Esc] = CompLit("Escape"), + [Button_Escape] = CompLit("Escape"), [Button_F1] = CompLit("F1"), [Button_F2] = CompLit("F2"), [Button_F3] = CompLit("F3"), diff --git a/src/base/base_controller.h b/src/base/base_controller.h index 2de21148..726f75a3 100644 --- a/src/base/base_controller.h +++ b/src/base/base_controller.h @@ -17,7 +17,7 @@ Enum(Button) Button_MWheelDown, //- Keyboard buttons - Button_Esc, + Button_Escape, Button_F1, Button_F2, Button_F3, diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index 5f80e04b..605fd732 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -167,29 +167,28 @@ Enum(GPU_AccessKind) { GPU_AccessKind_None, - GPU_AccessKind_AnyRead, + /* Generic */ + GPU_AccessKind_Generic_Read, + // GPU_AccessKind_Generic_ReadWrite, /* NOTE: Textures cannot transition to/from this access to another access kind. They must be created with it. */ + /* Copy */ + GPU_AccessKind_Copy_Read, + GPU_AccessKind_Copy_Write, - GPU_AccessKind_CopyRead, - GPU_AccessKind_CopyWrite, - - GPU_AccessKind_AnyShaderRead, - GPU_AccessKind_AnyShaderReadWrite, - - GPU_AccessKind_ComputeRead, - GPU_AccessKind_ComputeReadWrite, - - GPU_AccessKind_VertexPixelRead, - GPU_AccessKind_VertexPixelReadWrite, - - GPU_AccessKind_VertexRead, - GPU_AccessKind_VertexReadWrite, - - GPU_AccessKind_PixelRead, - GPU_AccessKind_PixelReadWrite, + /* Shader read/write */ + GPU_AccessKind_AnyShader_Read, + GPU_AccessKind_AnyShader_ReadWrite, + GPU_AccessKind_ComputeShader_Read, + GPU_AccessKind_ComputeShader_ReadWrite, + GPU_AccessKind_VertexPixelShader_Read, + GPU_AccessKind_VertexPixelShader_ReadWrite, + GPU_AccessKind_VertexShader_Read, + GPU_AccessKind_VertexShader_ReadWrite, + GPU_AccessKind_PixelShader_Read, + GPU_AccessKind_PixelShader_ReadWrite, + /* Special */ GPU_AccessKind_RasterTarget, - GPU_AccessKind_Present, }; @@ -346,6 +345,25 @@ Enum(GPU_RasterMode) GPU_RasterMode_WireTriangleStrip, }; +//////////////////////////////////////////////////////////// +//~ Synchronization types + +Enum(GPU_FenceOpKind) +{ + GPU_FenceOpKind_Set, + GPU_FenceOpKind_Add, +}; + +Struct(GPU_FenceOp) +{ + GPU_FenceOpKind kind; + Fence *fence; + i64 v; +}; + +#define GPU_SetFence(_fence, _v) ((GPU_FenceOp) { .kind = GPU_FenceOpKind_Set, .fence = (_fence), .v = (_v) }) +#define GPU_AddFence(_fence, _v) ((GPU_FenceOp) { .kind = GPU_FenceOpKind_Add, .fence = (_fence), .v = (_v) }) + //////////////////////////////////////////////////////////// //~ Statistic types @@ -391,27 +409,27 @@ GPU_ResourceHandle GPU_PushSampler(GPU_ArenaHandle arena, GPU_SamplerDesc desc); } \ ) -#define GPU_PushTexture1D(arena, _size, _format, _initial_access) GPU_PushTextureEx((arena), \ +#define GPU_PushTexture1D(arena, _format, _size, _initial_access) GPU_PushTextureEx((arena), \ (GPU_TextureDesc) { \ .kind = GPU_TextureKind_1D, \ .format = (_format), \ - .dims = Vec3I32((_size), 1, 1), \ + .dims = VEC3I32((_size), 1, 1), \ .initial_access = (_initial_access), \ __VA_ARGS__ \ } \ ) -#define GPU_PushTexture2D(arena, _size, _format, _initial_access) GPU_PushTextureEx((arena), \ +#define GPU_PushTexture2D(arena, _format, _size, _initial_access) GPU_PushTextureEx((arena), \ (GPU_TextureDesc) { \ .kind = GPU_TextureKind_2D, \ .format = (_format), \ - .dims = Vec3I32((_size).x, (_size).y, 1), \ + .dims = VEC3I32((_size).x, (_size).y, 1), \ .initial_access = (_initial_access), \ __VA_ARGS__ \ } \ ) -#define GPU_PushTexture3D(arena, _size, _format, _initial_access) GPU_PushTextureEx((arena), \ +#define GPU_PushTexture3D(arena, _format, _size, _initial_access) GPU_PushTextureEx((arena), \ (GPU_TextureDesc) { \ .kind = GPU_TextureKind_3D, \ .format = (_format), \ @@ -453,14 +471,19 @@ u64 GPU_Count3D(GPU_ResourceHandle texture3d); //- Command list GPU_CommandListHandle GPU_PrepareCommandList(void); -void GPU_CommitCommandList(GPU_CommandListHandle cl, GPU_QueueKind queue); +void GPU_CommitCommandListEx(GPU_CommandListHandle cl, GPU_QueueKind queue, u64 fence_ops_count, GPU_FenceOp *fence_ops); +#define GPU_CommitCommandList(cl, queue) GPU_CommitCommandListEx((cl), (queue), 0, 0) //- Arena void GPU_ResetArena(GPU_CommandListHandle cl, GPU_ArenaHandle arena); -//- Copy -void GPU_CopyBuffer(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, GPU_ResourceHandle src, u64 src_offset, u64 size); -void GPU_CopyTexture(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, GPU_ResourceHandle src, Vec3I32 src_offset, Vec3I32 dims); +//- Cpu -> Gpu copy +void GPU_CopyCpuBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, void *src, RngU64 src_copy_range); +void GPU_CopyCpuTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range); + +//- Gpu <-> Gpu copy +void GPU_CopyBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, GPU_ResourceHandle src, RngU64 src_copy_range); +void GPU_CopyTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, GPU_ResourceHandle src, Rng3I32 src_copy_range); //- Constants void GPU_SetConstU32 (GPU_CommandListHandle cl, i32 slot, u32 v); @@ -495,11 +518,6 @@ void GPU_ClearRasterTarget(GPU_CommandListHandle cl, RasterTargetGpuPtr ptr, Vec //- Profile void GPU_ProfN(GPU_CommandListHandle cl, String name); -//////////////////////////////////////////////////////////// -//~ @hookdecl Synchronization - -void GPU_CpuWaitOnQueue(GPU_QueueKind queue_kind); - //////////////////////////////////////////////////////////// //~ @hookdecl Statistics diff --git a/src/gpu/gpu_dx12/gpu_dx12.c b/src/gpu/gpu_dx12/gpu_dx12.c index 0a89b397..bd8a3d1a 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.c +++ b/src/gpu/gpu_dx12/gpu_dx12.c @@ -1389,7 +1389,7 @@ GPU_CommandListHandle GPU_PrepareCommandList(void) return (GPU_CommandListHandle) { .v = (u64)cl }; } -void GPU_CommitCommandList(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_kind) +void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_kind, u64 fence_ops_count, GPU_FenceOp *fence_ops) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); @@ -1521,7 +1521,7 @@ void GPU_CommitCommandList(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_ if (lookup->last_access_cmd) { GPU_D12_Cmd *last_cmd = lookup->last_access_cmd; - if (last_cmd->access.batch_gen != batch_gen) + if (last_cmd->access.batch_gen != batch_gen || last_cmd->access.is_queue_specific != cmd->access.is_queue_specific) { /* Access is part of new batch */ last_cmd->access.is_end_of_batch = 1; @@ -1571,6 +1571,8 @@ void GPU_CommitCommandList(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_ cmd_idx += 1; } break; + //- Access + case GPU_D12_CmdKind_Access: { batch_access_idx_opl = cmd_idx + 1; @@ -1592,6 +1594,7 @@ void GPU_CommitCommandList(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_ { GPU_D12_Resource *resource = access_cmd->access.resource; D3D12_BARRIER_TYPE barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER; + b32 is_queue_specific = access_cmd->access.is_queue_specific; /* Translate gpu access kind -> d3d barrier fields */ D3D12_BARRIER_SYNC d3d_syncs[2] = ZI; @@ -1610,6 +1613,13 @@ void GPU_CommitCommandList(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_ d3d_layouts[i] = resource->texture_layout; } break; + case GPU_AccessKind_Generic_Read: + { + d3d_syncs[i] = D3D12_BARRIER_SYNC_RENDER_TARGET; + d3d_accesses[i] = D3D12_BARRIER_ACCESS_RENDER_TARGET; + d3d_layouts[i] = D3D12_BARRIER_LAYOUT_RENDER_TARGET; + } break; + case GPU_AccessKind_RasterTarget: { d3d_syncs[i] = D3D12_BARRIER_SYNC_RENDER_TARGET; @@ -1637,18 +1647,20 @@ void GPU_CommitCommandList(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_ barrier->AccessBefore = d3d_accesses[0]; barrier->AccessAfter = d3d_accesses[1]; barrier->pResource = resource->d3d_resource; + barrier->Offset = 0; + barrier->Size = U64Max; } break; case D3D12_BARRIER_TYPE_TEXTURE: { D3D12_TEXTURE_BARRIER *barrier = &texture_barriers[texture_barriers_count++]; - barrier->SyncBefore = d3d_syncs[0]; - barrier->SyncAfter = d3d_syncs[1]; - barrier->AccessBefore = d3d_accesses[0]; - barrier->AccessAfter = d3d_accesses[1]; - barrier->LayoutBefore = d3d_layouts[0]; - barrier->LayoutAfter = d3d_layouts[1]; - barrier->pResource = resource->d3d_resource; + barrier->SyncBefore = d3d_syncs[0]; + barrier->SyncAfter = d3d_syncs[1]; + barrier->AccessBefore = d3d_accesses[0]; + barrier->AccessAfter = d3d_accesses[1]; + barrier->LayoutBefore = d3d_layouts[0]; + barrier->LayoutAfter = d3d_layouts[1]; + barrier->pResource = resource->d3d_resource; barrier->Subresources.IndexOrFirstMipLevel = 0xffffffff; resource->texture_layout = d3d_layouts[1]; } break; @@ -1761,7 +1773,8 @@ void GPU_CommitCommandList(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_ // cmd_idx += 1; // } break; - //- Dispatch compute shader + //- Compute + case GPU_D12_CmdKind_Compute: { GPU_D12_Pipeline *pipeline = 0; @@ -1805,7 +1818,7 @@ void GPU_CommitCommandList(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_ cmd_idx += 1; } break; - //- Dispatch Vs/Ps shader + //- Rasterize case GPU_D12_CmdKind_Rasterize: { @@ -2033,14 +2046,26 @@ void GPU_ResetArena(GPU_CommandListHandle cl_handle, GPU_ArenaHandle arena) /* TODO */ } -//- Copy +//- Cpu -> Gpu copy -void GPU_CopyBuffer(GPU_CommandListHandle cl_handle, GPU_ResourceHandle dst, u64 dst_offset, GPU_ResourceHandle src, u64 src_offset, u64 size) +void GPU_CopyCpuBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, void *src, RngU64 src_copy_range) { /* TODO */ } -void GPU_CopyTexture(GPU_CommandListHandle cl_handle, GPU_ResourceHandle dst, Vec3I32 dst_offset, GPU_ResourceHandle src, Vec3I32 src_offset, Vec3I32 dims) +void GPU_CopyCpuTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range) +{ + /* TODO */ +} + +//- Gpu <-> Gpu copy + +void GPU_CopyBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, GPU_ResourceHandle src, RngU64 src_copy_range) +{ + /* TODO */ +} + +void GPU_CopyTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, GPU_ResourceHandle src, Rng3I32 src_copy_range) { /* TODO */ } @@ -2104,13 +2129,24 @@ void GPU_SetConstSampler(GPU_CommandListHandle cl_handle, i32 slot, SamplerGpuPt //- Access -void GPU_SyncAccess(GPU_CommandListHandle cl_handle, GPU_ResourceHandle handle, GPU_AccessKind kind) +void GPU_SyncQueueAccess(GPU_CommandListHandle cl_handle, GPU_ResourceHandle handle, GPU_AccessKind kind) { GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); cmd->kind = GPU_D12_CmdKind_Access; cmd->access.after = kind; cmd->access.resource = GPU_D12_ResourceFromHandle(handle); + cmd->access.is_queue_specific = 1; +} + +void GPU_SyncGlobalAccess(GPU_CommandListHandle cl_handle, GPU_ResourceHandle handle, GPU_AccessKind kind) +{ + GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); + GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); + cmd->kind = GPU_D12_CmdKind_Access; + cmd->access.after = kind; + cmd->access.resource = GPU_D12_ResourceFromHandle(handle); + cmd->access.is_queue_specific = 0; } //- Compute @@ -2226,28 +2262,6 @@ void GPU_ProfN(GPU_CommandListHandle cl, String name) // } // } -//////////////////////////////////////////////////////////// -//~ @hookimpl Synchronization - -void GPU_CpuWaitOnQueue(GPU_QueueKind queue_kind) -{ - GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); - u64 commit_fence_target = 0; - { - Lock lock = LockS(&queue->commit_mutex); - commit_fence_target = queue->commit_fence_target; - Unlock(&lock); - } - if (commit_fence_target > 0) - { - /* TODO: Cache event handle */ - HANDLE event = CreateEvent(0, 0, 0, 0); - ID3D12Fence_SetEventOnCompletion(queue->commit_fence, commit_fence_target, event); - WaitForSingleObject(event, INFINITE); - CloseHandle(event); - } -} - //////////////////////////////////////////////////////////// //~ @hookimpl Statistics @@ -2305,7 +2319,6 @@ GPU_ResourceHandle GPU_PrepareBackbuffer(GPU_SwapchainHandle swapchain_handle, G size = VEC2I32(MaxI32(size.x, 1), MaxI32(size.y, 1)); GPU_D12_Queue *direct_queue = GPU_D12_QueueFromKind(GPU_QueueKind_Direct); - /* Initialize swapchain */ if (!swapchain->d3d_swapchain) { diff --git a/src/gpu/gpu_dx12/gpu_dx12.h b/src/gpu/gpu_dx12/gpu_dx12.h index 40f814da..a36d8a34 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.h +++ b/src/gpu/gpu_dx12/gpu_dx12.h @@ -208,9 +208,10 @@ Struct(GPU_D12_Cmd) GPU_D12_Resource *resource; /* Post-batch data */ - b32 is_end_of_batch; - u64 batch_gen; GPU_AccessKind before; + b32 is_end_of_batch; + b32 is_queue_specific; + u64 batch_gen; } access; struct diff --git a/src/pp/pp_vis/pp_vis_core.h b/src/pp/pp_vis/pp_vis_core.h index c04ab326..387d4b99 100644 --- a/src/pp/pp_vis/pp_vis_core.h +++ b/src/pp/pp_vis/pp_vis_core.h @@ -1,9 +1,9 @@ //////////////////////////////////////////////////////////// //~ Command table -#define V_CmdsTableXMacro(X) \ +#define V_CmdsTableXMacro(X) \ X(nop, NOP, V_CmdDescFlag_HideFromPalette, V_HOTKEY(0), ) \ - X(exit_program, Exit Program, V_CmdDescFlag_HideFromPalette, V_HOTKEY( Button_Esc ) ) \ + X(exit_program, Exit Program, V_CmdDescFlag_HideFromPalette, V_HOTKEY( Button_Escape ) ) \ X(toggle_command_palette, Toggle Command Palette, V_CmdDescFlag_HideFromPalette, V_HOTKEY( Button_P, .ctrl = 1, .shift = 1 ), ) \ X(toggle_ui_debug, Toggle UI Debug, V_CmdDescFlag_None, V_HOTKEY( Button_F5 ), ) \ X(toggle_console, Toggle Developer Console, V_CmdDescFlag_None, V_HOTKEY( Button_GraveAccent ), ) \ diff --git a/src/proto/proto.c b/src/proto/proto.c index 17375eb8..3d8e8c04 100644 --- a/src/proto/proto.c +++ b/src/proto/proto.c @@ -1,4 +1,5 @@ -void PR_Startup(void) +JobDecl(PR_RunForever, EmptySig); +JobImpl(PR_RunForever, _sig, _id) { GPU_ArenaHandle gpu_frame_arena = GPU_AcquireArena(); @@ -8,6 +9,16 @@ void PR_Startup(void) for (;;) { WND_Frame window_frame = WND_BeginFrame(); + for (u64 cev_idx = 0; cev_idx < window_frame.controller_events.count; ++cev_idx) + { + ControllerEvent *cev = &window_frame.controller_events.events[cev_idx]; + if (cev->kind == ControllerEventKind_Quit || + (cev->kind == ControllerEventKind_ButtonDown && cev->button == Button_Escape)) + { + SignalExit(0); + } + } + { if (!swapchain_initialized) { @@ -41,3 +52,8 @@ void PR_Startup(void) WND_EndFrame(window_frame); } } + +void PR_Startup(void) +{ + RunJob(PR_RunForever); +} diff --git a/src/proto/proto.lay b/src/proto/proto.lay index 81ad5541..e156329d 100644 --- a/src/proto/proto.lay +++ b/src/proto/proto.lay @@ -4,6 +4,7 @@ @Dep gpu @Dep window +@Dep sprite //- Impl diff --git a/src/sprite/sprite.c b/src/sprite/sprite.c index c1008b2a..4c502cb8 100644 --- a/src/sprite/sprite.c +++ b/src/sprite/sprite.c @@ -23,22 +23,23 @@ JobImpl(SPR_LoadTexture, sig, _) /* Upload texture to gpu */ if (ok) { - GPU_Arena *gpu_perm = GPU_Perm(); - GpuPointer gpu_tex = GPU_PushTexture(gpu_perm, - GPU_TextureKind_2D, - GPU_Format_R8G8B8A8_Unorm_Srgb, - VEC3I32(decoded.width, decoded.height, 1), - GPU_TextureFlag_Allow); - texture->gpu_texture = gpu_tex; + GPU_ArenaHandle gpu_perm = GPU_PermArena(); + GPU_ResourceHandle gpu_resource = GPU_PushTexture2D(gpu_perm, + GPU_Format_R8G8B8A8_Unorm_Srgb, + VEC2I32(decoded.width, decoded.height), + GPU_AccessKind_AnyRead); + // texture->texture = gpu_tex; texture->width = decoded.width; texture->height = decoded.height; - GPU_CommandList *cl = GPU_OpenCommandList(GPU_QueueKind_BackgroundCopy); + GPU_CommandListHandle cl = GPU_PrepareCommandList(); { - GPU_ReadCpu(cl, gpu_tex, decoded.data); - GPU_SetCpuFence(cl, &entry->texture_ready_fence, 1); + GPU_CopyCpuTexels(cl, + gpu_resource, VEC3I32(0,0,0), + decoded.pixels, VEC3I32(decoded.width, decoded.height, 1), + RNG3I32(VEC3I32(0,0,0), VEC3I32(decoded.width, decoded.height, 1))); } - GPU_EndCommandList(cl); + GPU_CommitCommandListEx(cl, GPU_QueueKind_AsyncCopy, 1, &GPU_SetFence(&entry->texture_ready_fence, 1)); } EndScratch(scratch); diff --git a/src/sprite/sprite.h b/src/sprite/sprite.h index 6c6e6d77..591e9617 100644 --- a/src/sprite/sprite.h +++ b/src/sprite/sprite.h @@ -17,7 +17,7 @@ Struct(SPR_SliceKey) Struct(SPR_Texture) { b32 valid; - GpuTexture gpu_texture; + Texture2DGpuPtr texture; u32 width; u32 height; }; diff --git a/src/window/window_win32/window_win32.c b/src/window/window_win32/window_win32.c index f8793342..e5a60024 100644 --- a/src/window/window_win32/window_win32.c +++ b/src/window/window_win32/window_win32.c @@ -22,7 +22,7 @@ void WND_Startup(void) { g->vk_to_button[i] = (Button)j; } - g->vk_to_button[VK_ESCAPE] = Button_Esc; + g->vk_to_button[VK_ESCAPE] = Button_Escape; g->vk_to_button[VK_OEM_3] = Button_GraveAccent; g->vk_to_button[VK_OEM_MINUS] = Button_Minus; g->vk_to_button[VK_OEM_PLUS] = Button_Equal;