From 97237b5ed999e5d0d73b78a6a345d64f50e49d9c Mon Sep 17 00:00:00 2001 From: jacob Date: Sat, 6 Dec 2025 04:46:30 -0600 Subject: [PATCH] ditch jobs in prototype --- src/base/base_win32/base_win32.c | 36 +--- src/base/base_win32/base_win32.h | 2 +- src/config.h | 10 +- src/gpu/gpu_common.c | 8 +- src/gpu/gpu_common.h | 4 +- src/gpu/gpu_dx12/gpu_dx12.c | 210 +++++++++---------- src/gpu/gpu_dx12/gpu_dx12.h | 5 - src/platform/platform_win32/platform_win32.c | 5 +- src/platform/platform_win32/platform_win32.h | 2 +- src/proto/proto.c | 6 +- src/sprite/sprite.c | 2 +- src/window/window_win32/window_win32.c | 7 +- src/window/window_win32/window_win32.h | 2 +- 13 files changed, 124 insertions(+), 175 deletions(-) diff --git a/src/base/base_win32/base_win32.c b/src/base/base_win32/base_win32.c index 1e3381c0..1ef03f50 100644 --- a/src/base/base_win32/base_win32.c +++ b/src/base/base_win32/base_win32.c @@ -249,7 +249,7 @@ void SignalExit(i32 code) { W32_SharedState *g = &W32_shared_state; Atomic32Set(&g->exit_code, code); - SetEvent(g->exit_begin_event); + SetEvent(g->exit_event); } void ExitNow(i32 code) @@ -277,8 +277,8 @@ i32 W32_Main(void) } /* Setup events */ - g->panic_event = CreateEventW(0, 1, 0, 0); - g->exit_begin_event = CreateEventW(0, 1, 0, 0); + g->panic_event = CreateEventW(0, 1, 0, 0); + g->exit_event = CreateEventW(0, 1, 0, 0); g->main_thread_id = GetCurrentThreadId(); SetThreadDescription(GetCurrentThread(), L"Main thread"); @@ -330,20 +330,11 @@ i32 W32_Main(void) StartupLayers(); } - /* Wait for panic */ - if (!Atomic32Fetch(&g->panicking)) - { - HANDLE handles[] = { - g->panic_event, - }; - WaitForMultipleObjects(countof(handles), handles, 0, INFINITE); - } - /* Wait for exit start or panic */ if (!Atomic32Fetch(&g->panicking)) { HANDLE handles[] = { - g->exit_begin_event, + g->exit_event, g->panic_event, }; DWORD wake = WaitForMultipleObjects(countof(handles), handles, 0, INFINITE); @@ -362,25 +353,6 @@ i32 W32_Main(void) } } - /* Wait for exit end or panic */ - if (!Atomic32Fetch(&g->panicking)) - { - HANDLE handles[] = { - g->panic_event - }; - WaitForMultipleObjects(countof(handles), handles, 0, INFINITE); - } - - /* Signal swap finish */ - if (!Atomic32Fetch(&g->panicking) && IsSwappingOut()) - { - HANDLE swap_end_event = OpenEventW(EVENT_MODIFY_STATE, 0, L"Local\\pp_swap_end"); - if (swap_end_event != 0) - { - SetEvent(swap_end_event); - } - } - /* Exit */ if (Atomic32Fetch(&g->panicking)) { diff --git a/src/base/base_win32/base_win32.h b/src/base/base_win32/base_win32.h index 6d2d6903..6b3b86e3 100644 --- a/src/base/base_win32/base_win32.h +++ b/src/base/base_win32/base_win32.h @@ -74,7 +74,7 @@ Struct(W32_SharedState) Atomic32 panicking; wchar_t panic_wstr[4096]; HANDLE panic_event; - HANDLE exit_begin_event; + HANDLE exit_event; //- Exit funcs Atomic32 num_exit_funcs; diff --git a/src/config.h b/src/config.h index d9997ccf..de83fc90 100644 --- a/src/config.h +++ b/src/config.h @@ -69,14 +69,8 @@ #define FLOOD_DEBUG 0 -#define GPU_DEBUG 1 -#define GPU_DEBUG_VALIDATION 1 - -/* If virtual fibers are enabled, each fiber will get its own OS thread, - * and fiber suspend/resume will be emulated using OS thread primitives. - * This is slow but allows for easier debugging in tricky cases - * since the debugger won't be confused by fiber context switching. */ -#define VIRTUAL_FIBERS 0 +#define GPU_DEBUG 0 +#define GPU_DEBUG_VALIDATION 0 /* If enabled, bitbuffs will insert/verify magic numbers & length for each read & write */ #define BITBUFF_DEBUG 0 diff --git a/src/gpu/gpu_common.c b/src/gpu/gpu_common.c index fb74e570..0a074cc7 100644 --- a/src/gpu/gpu_common.c +++ b/src/gpu/gpu_common.c @@ -1,4 +1,5 @@ GPU_SharedUtilState GPU_shared_util_state = ZI; +ThreadLocal GPU_ArenaHandle GPU_t_perm_arena = ZI; //////////////////////////////////////////////////////////// //~ Startup @@ -93,12 +94,11 @@ void GPU_StartupCommon(void) GPU_ArenaHandle GPU_PermArena(void) { - i16 fiber_id = FiberId(); - GPU_ArenaHandle perm = GPU_shared_util_state.perm_arenas[fiber_id]; + GPU_ArenaHandle perm = GPU_t_perm_arena; if (GPU_IsArenaNil(perm)) { - GPU_shared_util_state.perm_arenas[fiber_id] = GPU_AcquireArena(); - perm = GPU_shared_util_state.perm_arenas[fiber_id]; + GPU_t_perm_arena = GPU_AcquireArena(); + perm = GPU_t_perm_arena; } return perm; } diff --git a/src/gpu/gpu_common.h b/src/gpu/gpu_common.h index 576586db..70bdcc27 100644 --- a/src/gpu/gpu_common.h +++ b/src/gpu/gpu_common.h @@ -7,10 +7,10 @@ Struct(GPU_SharedUtilState) SamplerStateHandle pt_sampler; GPU_IndexBufferDesc quad_indices; Texture3DHandle noise_tex; - - GPU_ArenaHandle perm_arenas[MaxFibers]; } extern GPU_shared_util_state; +extern ThreadLocal GPU_ArenaHandle GPU_t_perm_arena; + //////////////////////////////////////////////////////////// //~ Startup diff --git a/src/gpu/gpu_dx12/gpu_dx12.c b/src/gpu/gpu_dx12/gpu_dx12.c index 187915cd..85f51002 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.c +++ b/src/gpu/gpu_dx12/gpu_dx12.c @@ -400,118 +400,12 @@ D3D12_BARRIER_LAYOUT GPU_D12_BarrierLayoutFromLayout(GPU_Layout layout) //////////////////////////////////////////////////////////// //~ Pipeline -JobImpl(GPU_D12_LoadPipeline, sig, _) -{ - GPU_D12_SharedState *g = &GPU_D12_shared_state; - GPU_D12_Pipeline *pipeline = sig->pipeline; - GPU_D12_PipelineDesc desc = pipeline->desc; - - HRESULT hr = 0; - b32 ok = 1; - String error_str = ZI; - - /* Create PSO */ - ID3D12PipelineState *pso = 0; - if (ok && (!IsResourceNil(desc.vs.resource) || !IsResourceNil(desc.ps.resource))) - { - D3D12_RASTERIZER_DESC raster_desc = ZI; - if (desc.is_wireframe) - { - raster_desc.FillMode = D3D12_FILL_MODE_WIREFRAME; - } - else - { - raster_desc.FillMode = D3D12_FILL_MODE_SOLID; - } - raster_desc.CullMode = D3D12_CULL_MODE_NONE; - raster_desc.FrontCounterClockwise = 0; - raster_desc.DepthBias = D3D12_DEFAULT_DEPTH_BIAS; - raster_desc.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; - raster_desc.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; - raster_desc.DepthClipEnable = 1; - raster_desc.MultisampleEnable = 0; - raster_desc.AntialiasedLineEnable = 0; - raster_desc.ForcedSampleCount = 0; - raster_desc.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; - - D3D12_BLEND_DESC blend_desc = ZI; - blend_desc.AlphaToCoverageEnable = 0; - blend_desc.IndependentBlendEnable = 0; - blend_desc.RenderTarget[0].BlendEnable = 1; - blend_desc.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA; - blend_desc.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA; - blend_desc.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD; - blend_desc.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_ONE; - blend_desc.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA; - blend_desc.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD; - blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; - - D3D12_DEPTH_STENCIL_DESC ds_desc = ZI; - ds_desc.DepthEnable = 0; - ds_desc.StencilEnable = 0; - - String vs = DataFromResource(desc.vs.resource); - String ps = DataFromResource(desc.ps.resource); - - D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = ZI; - pso_desc.pRootSignature = g->bindless_rootsig; - pso_desc.VS.pShaderBytecode = vs.text; - pso_desc.VS.BytecodeLength = vs.len; - pso_desc.PS.pShaderBytecode = ps.text; - pso_desc.PS.BytecodeLength = ps.len; - pso_desc.RasterizerState = raster_desc; - pso_desc.BlendState = blend_desc; - pso_desc.DepthStencilState = ds_desc; - pso_desc.PrimitiveTopologyType = desc.topology_type; - pso_desc.SampleMask = UINT_MAX; - pso_desc.SampleDesc.Count = 1; - pso_desc.SampleDesc.Quality = 0; - for (i32 i = 0; i < (i32)countof(desc.render_target_formats); ++i) - { - StaticAssert(countof(pso_desc.RTVFormats) <= countof(desc.render_target_formats)); - DXGI_FORMAT format = GPU_D12_DxgiFormatFromGpuFormat(desc.render_target_formats[i]); - if (format != DXGI_FORMAT_UNKNOWN) - { - pso_desc.RTVFormats[pso_desc.NumRenderTargets++] = format; - } - else - { - break; - } - } - hr = ID3D12Device_CreateGraphicsPipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); - if (FAILED(hr)) - { - error_str = Lit("Failed to create pipeline state object"); - ok = 0; - } - } - else if (ok) - { - String cs = DataFromResource(desc.cs.resource); - - D3D12_COMPUTE_PIPELINE_STATE_DESC pso_desc = ZI; - pso_desc.pRootSignature = g->bindless_rootsig; - pso_desc.CS.pShaderBytecode = cs.text; - pso_desc.CS.BytecodeLength = cs.len; - hr = ID3D12Device_CreateComputePipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); - if (FAILED(hr)) - { - error_str = Lit("Failed to create pipeline state object"); - ok = 0; - } - } - - pipeline->pso = pso; - pipeline->error = error_str; - pipeline->ok = ok; -} - GPU_D12_Pipeline *GPU_D12_PipelineFromDesc(GPU_D12_PipelineDesc desc) { GPU_D12_SharedState *g = &GPU_D12_shared_state; u64 hash = RandU64FromSeed(HashFnv64(Fnv64Basis, StringFromStruct(&desc))); + /* Fetch pipeline from cache */ GPU_D12_Pipeline *pipeline = 0; b32 is_pipeline_new = 0; GPU_D12_PipelineBin *bin = &g->pipeline_bins[hash % countof(g->pipeline_bins)]; @@ -546,11 +440,109 @@ GPU_D12_Pipeline *GPU_D12_PipelineFromDesc(GPU_D12_PipelineDesc desc) } } + /* Create pipeline */ if (is_pipeline_new) { - RunJob(GPU_D12_LoadPipeline, .fence = &pipeline->ready_fence, .sig.pipeline = pipeline); + HRESULT hr = 0; + b32 ok = 1; + String error_str = ZI; + + /* Create PSO */ + ID3D12PipelineState *pso = 0; + if (ok && (!IsResourceNil(desc.vs.resource) || !IsResourceNil(desc.ps.resource))) + { + D3D12_RASTERIZER_DESC raster_desc = ZI; + if (desc.is_wireframe) + { + raster_desc.FillMode = D3D12_FILL_MODE_WIREFRAME; + } + else + { + raster_desc.FillMode = D3D12_FILL_MODE_SOLID; + } + raster_desc.CullMode = D3D12_CULL_MODE_NONE; + raster_desc.FrontCounterClockwise = 0; + raster_desc.DepthBias = D3D12_DEFAULT_DEPTH_BIAS; + raster_desc.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; + raster_desc.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; + raster_desc.DepthClipEnable = 1; + raster_desc.MultisampleEnable = 0; + raster_desc.AntialiasedLineEnable = 0; + raster_desc.ForcedSampleCount = 0; + raster_desc.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; + + D3D12_BLEND_DESC blend_desc = ZI; + blend_desc.AlphaToCoverageEnable = 0; + blend_desc.IndependentBlendEnable = 0; + blend_desc.RenderTarget[0].BlendEnable = 1; + blend_desc.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA; + blend_desc.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA; + blend_desc.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD; + blend_desc.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_ONE; + blend_desc.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA; + blend_desc.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD; + blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; + + D3D12_DEPTH_STENCIL_DESC ds_desc = ZI; + ds_desc.DepthEnable = 0; + ds_desc.StencilEnable = 0; + + String vs = DataFromResource(desc.vs.resource); + String ps = DataFromResource(desc.ps.resource); + + D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = ZI; + pso_desc.pRootSignature = g->bindless_rootsig; + pso_desc.VS.pShaderBytecode = vs.text; + pso_desc.VS.BytecodeLength = vs.len; + pso_desc.PS.pShaderBytecode = ps.text; + pso_desc.PS.BytecodeLength = ps.len; + pso_desc.RasterizerState = raster_desc; + pso_desc.BlendState = blend_desc; + pso_desc.DepthStencilState = ds_desc; + pso_desc.PrimitiveTopologyType = desc.topology_type; + pso_desc.SampleMask = UINT_MAX; + pso_desc.SampleDesc.Count = 1; + pso_desc.SampleDesc.Quality = 0; + for (i32 i = 0; i < (i32)countof(desc.render_target_formats); ++i) + { + StaticAssert(countof(pso_desc.RTVFormats) <= countof(desc.render_target_formats)); + DXGI_FORMAT format = GPU_D12_DxgiFormatFromGpuFormat(desc.render_target_formats[i]); + if (format != DXGI_FORMAT_UNKNOWN) + { + pso_desc.RTVFormats[pso_desc.NumRenderTargets++] = format; + } + else + { + break; + } + } + hr = ID3D12Device_CreateGraphicsPipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); + if (FAILED(hr)) + { + error_str = Lit("Failed to create pipeline state object"); + ok = 0; + } + } + else if (ok) + { + String cs = DataFromResource(desc.cs.resource); + + D3D12_COMPUTE_PIPELINE_STATE_DESC pso_desc = ZI; + pso_desc.pRootSignature = g->bindless_rootsig; + pso_desc.CS.pShaderBytecode = cs.text; + pso_desc.CS.BytecodeLength = cs.len; + hr = ID3D12Device_CreateComputePipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); + if (FAILED(hr)) + { + error_str = Lit("Failed to create pipeline state object"); + ok = 0; + } + } + + pipeline->pso = pso; + pipeline->error = error_str; + pipeline->ok = ok; } - YieldOnFence(&pipeline->ready_fence, 1); return pipeline; } diff --git a/src/gpu/gpu_dx12/gpu_dx12.h b/src/gpu/gpu_dx12/gpu_dx12.h index 134e34ed..1b25bc85 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.h +++ b/src/gpu/gpu_dx12/gpu_dx12.h @@ -45,7 +45,6 @@ Struct(GPU_D12_Pipeline) GPU_D12_PipelineDesc desc; ID3D12PipelineState *pso; - Fence ready_fence; b32 ok; String error; }; @@ -350,9 +349,6 @@ Struct(GPU_D12_SharedState) { Atomic64Padded resource_creation_gen; - /* Per-fiber permanent arenas */ - GPU_ArenaHandle perm_arenas[MaxFibers]; - /* Stats */ Atomic64 driver_resources_allocated; Atomic64 driver_descriptors_allocated; @@ -405,7 +401,6 @@ D3D12_BARRIER_LAYOUT GPU_D12_BarrierLayoutFromLayout(GPU_Layout layout); //////////////////////////////////////////////////////////// //~ Pipeline -JobDecl(GPU_D12_LoadPipeline, { GPU_D12_Pipeline *pipeline; }); GPU_D12_Pipeline *GPU_D12_PipelineFromDesc(GPU_D12_PipelineDesc desc); //////////////////////////////////////////////////////////// diff --git a/src/platform/platform_win32/platform_win32.c b/src/platform/platform_win32/platform_win32.c index af31da58..473c7289 100644 --- a/src/platform/platform_win32/platform_win32.c +++ b/src/platform/platform_win32/platform_win32.c @@ -15,8 +15,7 @@ void P_Startup(void) g->socks_arena = AcquireArena(Gibi(64)); //- Init timer - JobPoolId timer_pool = InitJobPool(1, Lit("Timer sync"), JobPoolPriority_Critical); - RunJob(P_W32_StartTimerSync, .pool = timer_pool); + DispatchWave(Lit("Win32 timer sync"), 1, P_W32_SyncTimerForever, 0); } //////////////////////////////////////////////////////////// @@ -158,7 +157,7 @@ P_Address P_W32_PlatformAddressFromWin32Address(P_W32_Address ws_addr) //////////////////////////////////////////////////////////// //~ Timer job -JobImpl(P_W32_StartTimerSync, _, __) +void P_W32_SyncTimerForever(WaveLaneCtx *lane, void *udata) { P_W32_SharedState *g = &P_W32_shared_state; SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL); diff --git a/src/platform/platform_win32/platform_win32.h b/src/platform/platform_win32/platform_win32.h index eaa4f7d3..180677a1 100644 --- a/src/platform/platform_win32/platform_win32.h +++ b/src/platform/platform_win32/platform_win32.h @@ -103,4 +103,4 @@ P_Address P_W32_PlatformAddressFromWin32Address(P_W32_Address ws_addr); //////////////////////////////////////////////////////////// //~ Timer job -JobDecl(P_W32_StartTimerSync, EmptySig); +void P_W32_SyncTimerForever(WaveLaneCtx *lane, void *udata); diff --git a/src/proto/proto.c b/src/proto/proto.c index e12f6f20..7ce1dc96 100644 --- a/src/proto/proto.c +++ b/src/proto/proto.c @@ -1,5 +1,4 @@ -JobDecl(PT_RunForever, EmptySig); -JobImpl(PT_RunForever, _sig, _id) +void PT_RunForever(WaveLaneCtx *lane, void *udata) { GPU_ArenaHandle gpu_frame_arena = GPU_AcquireArena(); @@ -89,8 +88,7 @@ JobImpl(PT_RunForever, _sig, _id) } } -void PT_Startup(void); void PT_Startup(void) { - RunJob(PT_RunForever); + DispatchWave(Lit("Proto"), 1, PT_RunForever, 0); } diff --git a/src/sprite/sprite.c b/src/sprite/sprite.c index df3bcfe8..c994fba0 100644 --- a/src/sprite/sprite.c +++ b/src/sprite/sprite.c @@ -233,7 +233,7 @@ JobImpl(SPR_LoadSheet, sig, _) //////////////////////////////////////////////////////////// //~ Cache -/* TODO: Per-fiber L1 cache */ +/* TODO: Per-thread L1 cache */ SPR_Entry *SPR_FetchEntry(ResourceKey resource, JobPoolId pool, SPR_FetchFlag flags) { SPR_SharedState *g = &SPR_shared_state; diff --git a/src/window/window_win32/window_win32.c b/src/window/window_win32/window_win32.c index 1c5211e2..4abc2c9f 100644 --- a/src/window/window_win32/window_win32.c +++ b/src/window/window_win32/window_win32.c @@ -85,9 +85,8 @@ void WND_Startup(void) RegisterRawInputDevices(&rid, 1, sizeof(rid)); } - //- Start message processing job - JobPoolId message_job_pool = InitJobPool(1, Lit("Win32 message loop"), JobPoolPriority_Graphics); - RunJob(WND_W32_ProcessMessagesForever, .pool = message_job_pool); + //- Dispatch message processor + DispatchWave(Lit("Win32 msg loop"), 1, WND_W32_ProcessMessagesForever, 0); } //////////////////////////////////////////////////////////// @@ -102,7 +101,7 @@ WND_W32_Window *WND_W32_WindowFromHandle(WND_Handle handle) //~ Initialization /* Win32 limitation: Window must be initialized on same thread that processes events */ -JobImpl(WND_W32_ProcessMessagesForever, sig, id) +void WND_W32_ProcessMessagesForever(WaveLaneCtx *lane, void *udata) { WND_W32_SharedState *g = &WND_W32_shared_state; WND_W32_Window *window = &g->window; diff --git a/src/window/window_win32/window_win32.h b/src/window/window_win32/window_win32.h index 916d7988..37016e6c 100644 --- a/src/window/window_win32/window_win32.h +++ b/src/window/window_win32/window_win32.h @@ -77,7 +77,7 @@ WND_W32_Window *WND_W32_WindowFromHandle(WND_Handle handle); //////////////////////////////////////////////////////////// //~ Initialization -JobDecl(WND_W32_ProcessMessagesForever, EmptySig); +void WND_W32_ProcessMessagesForever(WaveLaneCtx *lane, void *udata); //////////////////////////////////////////////////////////// //~ Message processing