gpu refactor progress

This commit is contained in:
jacob 2025-09-16 16:16:38 -05:00
parent ddf4f5c421
commit 4d3a5b7c3e
10 changed files with 160 additions and 121 deletions

View File

@ -39,12 +39,6 @@ Enum(JobPool)
typedef void JobFunc(void *, i32);
Struct(JobCounter)
{
Atomic64Padded num_jobs_dispatched;
Fence num_jobs_completed_fence;
};
Struct(Job)
{
/* Internal */
@ -58,7 +52,7 @@ Struct(Job)
/* Configurable between OpenJob & CloseJob */
i32 count;
JobCounter *counter;
Fence *fence;
void *sig;
};
@ -80,7 +74,7 @@ void ResumeFibers(i16 fiber_ids_count, i16 *fiber_ids); /* NOTE: Must only be c
#define JobDecl(job, sigdef) \
typedef struct job##_Sig sigdef job##_Sig; \
Struct(job##_Desc) { JobFunc *func; JobPool pool; u32 count; JobCounter *counter; job##_Sig sig; }; \
Struct(job##_Desc) { JobFunc *func; JobPool pool; u32 count; Fence *fence; job##_Sig sig; }; \
void job(job##_Sig *, i32); \
StaticAssert(1)
@ -92,31 +86,30 @@ void ResumeFibers(i16 fiber_ids_count, i16 *fiber_ids); /* NOTE: Must only be c
/* RunJob example usage:
*
* This example pushes a single 'LoadTextureJob' onto the background job
* pool, copying 'sprite' into the job signature. 'counter' is also passed
* pool, copying 'sprite' into the job signature. 'fence' is also passed
* and then immediately yielded on in this example, effectively making
* the operation synchronous as the caller will block until the job completes:
* {
* JobCounter counter = {0};
* RunJob(LoadTextureJob, .pool = JobPool_Background, .counter = &counter, .sig = { .resource = sprite });
* YieldOnJobs(&counter);
* Fence job_fence = {0};
* u32 job_count = 0;
* job_count += RunJob(LoadTextureJob, .pool = JobPool_Background, .fence = &job_fence, .sig = { .resource = sprite });
* YieldOnFence(&job_fence, job_count);
* }
*
*/
#define RunJob(job_func, ...) \
#define RunJob(job_func, ...) (1); \
do { \
job_func##_Desc __desc = { .count = 1, .pool = JobPool_Inherit, .func = job_func, __VA_ARGS__ }; \
Job *__job = OpenJob(__desc.func, __desc.pool); \
__job->count = __desc.count; \
__job->counter = __desc.counter; \
__job->fence = __desc.fence; \
__job->sig = PushStructNoZero(__job->arena, job_func##_Sig); \
CopyBytes(__job->sig, &__desc.sig, sizeof(__desc.sig)); \
CloseJob(__job); \
} while (0)
Job *OpenJob(JobFunc *func, JobPool pool_kind);
void CloseJob(Job *job);
#define YieldOnJobs(counter) (YieldOnFence(&(counter)->num_jobs_completed_fence, Atomic64Fetch(&(counter)->num_jobs_dispatched.v)))
u32 CloseJob(Job *job);
////////////////////////////////
//~ @hookdecl Dedicated job operations
@ -129,8 +122,8 @@ void CloseJob(Job *job);
*
* For example, Win32 window message processing is required by the OS to occur
* on the same thread that initially created the window, which means it
* actually must run in a dedicated to prevent message processing from yielding
* & resuming on another thread.
* actually must run inside a dedicated job to prevent message processing from
* yielding & resuming on another thread.
*/
void RunDedicatedJob(JobFunc job_func);

View File

@ -363,10 +363,10 @@ void W32_FiberEntryPoint(void *_)
/* Check if we've completed the last task in the job */
if (Atomic32FetchAdd(&job->num_tasks_completed.v, 1) + 1 >= job->count)
{
/* Increment counter */
if (job->counter)
/* Increment fence */
if (job->fence)
{
FetchAddFence(&job->counter->num_jobs_completed_fence, 1);
FetchAddFence(job->fence, 1);
}
/* Free job */
LockTicketMutex(&pool->free_jobs_tm);
@ -394,8 +394,6 @@ void W32_FiberEntryPoint(void *_)
}
}
#if VirtualFibersEnabled
DWORD WINAPI W32_VirtualFiberEntryPoint(LPVOID arg)
{
ConvertThreadToFiber(arg);
@ -411,8 +409,6 @@ DWORD WINAPI W32_VirtualFiberEntryPoint(LPVOID arg)
return 0;
}
#endif
////////////////////////////////
//~ Win32 job worker entry
@ -689,7 +685,7 @@ Job *OpenJob(JobFunc *func, JobPool pool_kind)
return job;
}
void CloseJob(Job *job)
u32 CloseJob(Job *job)
{
TempArena scratch = BeginScratchNoConflict();
@ -698,11 +694,6 @@ void CloseJob(Job *job)
if (num_tasks > 0)
{
if (job->counter)
{
Atomic64FetchAdd(&job->counter->num_jobs_dispatched.v, 1);
}
/* Allocate tasks from free list */
u32 num_tasks_allocated = 0;
W32_Task **tasks_array = PushStructsNoZero(scratch.arena, W32_Task *, num_tasks);
@ -783,8 +774,13 @@ void CloseJob(Job *job)
}
}
}
else if (job->fence)
{
FetchAddFence(job->fence, 1);
}
EndScratch(scratch);
return 1;
}
////////////////////////////////

View File

@ -287,11 +287,6 @@ void GPU_Startup(void);
GPU_Viewport GPU_ViewportFromRect(Rect rect);
GPU_Scissor GPU_ScissorFromRect(Rect rect);
////////////////////////////////
//~ @hookdecl Fence operations
GPU_Fence GPU_GetGlobalFence(void);
////////////////////////////////
//~ @hookdecl Resource operations
@ -305,7 +300,7 @@ Vec2I32 GPU_GetTextureSize(GPU_Resource *resource);
//~ @hookdecl Command list operations
GPU_CommandList *GPU_BeginCommandList(void);
GPU_Fence GPU_EndCommandList(GPU_CommandList *cl, JobCounter *counter);
u32 GPU_EndCommandList(GPU_CommandList *cl, Fence *fence);
////////////////////////////////
//~ @hookdecl Profiling helpers

View File

@ -44,7 +44,7 @@ GPU_D12_Command *GPU_D12_PushCmd(GPU_D12_CommandList *cl)
u64 GPU_D12_ReuseHashFromResourceDesc(GPU_ResourceDesc desc)
{
/* TODO */
u64 result = 0;
u64 result = 1;
return result;
}
@ -282,14 +282,6 @@ GPU_Scissor GPU_ScissorFromRect(Rect rect)
return (GPU_Scissor) ZI;
}
////////////////////////////////
//~ @hookdef Fence hooks
GPU_Fence GPU_GetGlobalFence(void)
{
return (GPU_Fence) ZI;
}
////////////////////////////////
//~ @hookdef Resource hooks
@ -297,10 +289,11 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
GPU_D12_Resource *r = 0;
u64 hash = GPU_D12_ReuseHashFromResourceDesc(desc);
/* Grab reusable */
#if 0
u64 hash = GPU_D12_ReuseHashFromResourceDesc(desc);
{
u64 bin_index = hash % countof(g->reuse_bins);
GPU_D12_ReuseBin *bin = &g->reuse_bins[bin_index];
{
@ -315,18 +308,38 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
}
Unlock(&lock);
}
#else
u64 hash = 0;
}
#endif
/* Grab from free list */
if (!r)
{
{
Lock lock = LockE(&g->free_resources_mutex);
r = g->first_free_resource;
if (r)
{
g->first_free_resource = r->next_free;
}
Unlock(&lock);
}
if (r)
{
ZeroStruct(r);
}
}
/* Push new */
if (!r)
{
Arena *perm = PermArena();
PushAlign(perm, CachelineSize);
r = PushStruct(perm, GPU_D12_Resource);
PushAlign(perm, CachelineSize);
}
if (r->reuse_hash == 0)
{
r->reuse_hash = hash;
switch (desc.kind)
@ -407,12 +420,34 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
}
}
r->desc = desc;
return (GPU_Resource *)r;
}
void GPU_ReleaseResource(GPU_Resource *resource, GPU_ReleaseFlag flags)
void GPU_ReleaseResource(GPU_Resource *gpu_resource, GPU_ReleaseFlag flags)
{
/* TODO */
GPU_D12_SharedState *g = &GPU_D12_shared_state;
GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_resource;
/* TODO: Reuse */
switch (r->desc.kind)
{
case GPU_ResourceKind_Buffer:
case GPU_ResourceKind_Texture1D:
case GPU_ResourceKind_Texture2D:
case GPU_ResourceKind_Texture3D:
{
ID3D12Resource_Release(r->raw);
}
/* TODO: Sampler */
}
Lock lock = LockE(&g->free_resources_mutex);
r->next_free = g->first_free_resource;
g->first_free_resource = r;
Unlock(&lock);
}
u32 GPU_GetResourceId(GPU_Resource *resource, GPU_ResourceIdKind kind)
@ -447,7 +482,7 @@ GPU_CommandList *GPU_BeginCommandList(void)
return (GPU_CommandList *)cl;
}
GPU_Fence GPU_EndCommandList(GPU_CommandList *gpu_cl, JobCounter *counter)
u32 GPU_EndCommandList(GPU_CommandList *gpu_cl, Fence *fence)
{
GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId());
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
@ -503,7 +538,7 @@ GPU_Fence GPU_EndCommandList(GPU_CommandList *gpu_cl, JobCounter *counter)
GPU_D12_Resource *r = cmd->rasterize.rts[i];
if (r)
{
pipeline_desc.render_target_formats[i] = r->format;
pipeline_desc.render_target_formats[i] = r->desc.texture.format;
}
else
{
@ -568,9 +603,17 @@ GPU_Fence GPU_EndCommandList(GPU_CommandList *gpu_cl, JobCounter *counter)
{
D3D12_INDEX_BUFFER_VIEW ibv = ZI;
ibv.BufferLocation = indices->gpu_address;
ibv.Format = GPU_D12_DxgiFormatFromGpuFormat(indices->format);
ibv.SizeInBytes = indices->size;
indices_count = indices->element_count;
if (indices->desc.buffer.element_size == 2)
{
ibv.Format = GPU_D12_DxgiFormatFromGpuFormat(DXGI_FORMAT_R16_UINT);
}
else
{
Assert(indices->desc.buffer.element_size == 4);
ibv.Format = GPU_D12_DxgiFormatFromGpuFormat(DXGI_FORMAT_R32_UINT);
}
ibv.SizeInBytes = indices->desc.buffer.element_size * indices->desc.buffer.element_capacity;
indices_count = indices->desc.buffer.element_count;
}
}
@ -623,7 +666,7 @@ GPU_Fence GPU_EndCommandList(GPU_CommandList *gpu_cl, JobCounter *counter)
/* Free command list */
StackPush(f->first_free_command_list, cl);
return (GPU_Fence) ZI;
return 1;
}
////////////////////////////////

View File

@ -39,14 +39,12 @@ Struct(GPU_D12_Pipeline)
Struct(GPU_D12_Resource)
{
GPU_D12_Resource *next_free;
GPU_ResourceDesc desc;
ID3D12Resource *raw;
u64 reuse_hash;
GPU_Format format;
u32 size;
u32 element_size;
u32 element_count;
D3D12_GPU_VIRTUAL_ADDRESS gpu_address;
};
@ -191,6 +189,10 @@ Struct(GPU_D12_SharedState)
{
GPU_D12_FiberState *fiber_states[MaxFibers];
/* Resources */
Mutex free_resources_mutex;
GPU_D12_Resource *first_free_resource;
/* Device */
IDXGIFactory6 *factory;
IDXGIAdapter1 *adapter;

View File

@ -530,12 +530,12 @@ JobDef(Step, sig, id)
}
}
JobCounter counter = ZI;
RunJob(RunCommand,
u32 job_count = 0; Fence job_fence = ZI;
job_count += RunJob(RunCommand,
.count = shader_entries_count,
.counter = &counter,
.fence = &job_fence,
.sig = { .cmds = compile_cmds, .results = compile_results });
YieldOnJobs(&counter);
YieldOnFence(&job_fence, job_count);
//- Process shader compilation results
{
@ -567,9 +567,9 @@ JobDef(Step, sig, id)
arc_params.arc_store = shader_store_name;
arc_params.arc_dir = shader_store_name;
JobCounter counter = ZI;
RunJob(Step, .counter = &counter, .sig.params = &arc_params, .sig.results = &arc_results);
YieldOnJobs(&counter);
u32 job_count = 0; Fence job_fence = ZI;
job_count += RunJob(Step, .fence = &job_fence, .sig.params = &arc_params, .sig.results = &arc_results);
YieldOnFence(&job_fence, job_count);
InheritStepResults(arena, result, 1, &arc_results);
}
}
@ -643,13 +643,13 @@ JobDef(Step, sig, id)
++i;
}
}
JobCounter counter = ZI;
RunJob(Step,
.counter = &counter,
u32 job_count = 0; Fence job_fence = ZI;
job_count += RunJob(Step,
.fence = &job_fence,
.count = dir_embeds_count,
.sig.params = arc_params_array,
.sig.results = arc_results_array);
YieldOnJobs(&counter);
YieldOnFence(&job_fence, job_count);
InheritStepResults(arena, result, dir_embeds_count, arc_results_array);
}
}
@ -994,9 +994,9 @@ JobDef(Build, _, __)
resource_params->compiler_params = cp;
resource_params->flattened = flattened;
JobCounter step_counter = ZI;
RunJob(Step, .count = countof(params_array), .counter = &step_counter, .sig.params = params_array, .sig.results = results_array);
YieldOnJobs(&step_counter);
u32 job_count = 0; Fence job_fence = ZI;
job_count += RunJob(Step, .count = countof(params_array), .fence = &job_fence, .sig.params = params_array, .sig.results = results_array);
YieldOnFence(&job_fence, job_count);
////////////////////////////////
//~ Process compile step results

View File

@ -15,7 +15,7 @@ void PB_Startup(void)
PB_WSP_SharedState *g = &PB_WSP_shared_state;
PB_WSP_InitializeWasapi();
/* Start playback job */
RunJob(PB_WSP_Playback, .pool = JobPool_Audio, .counter = &g->shutdown_job_counter);
g->shutdown_jobs_count += RunJob(PB_WSP_Playback, .pool = JobPool_Audio, .fence = &g->shutdown_jobs_fence);
OnExit(&PB_WSP_Shutdown);
}
@ -24,7 +24,7 @@ ExitFuncDef(PB_WSP_Shutdown)
__prof;
PB_WSP_SharedState *g = &PB_WSP_shared_state;
Atomic32Set(&g->shutdown, 1);
YieldOnJobs(&g->shutdown_job_counter);
YieldOnFence(&g->shutdown_jobs_fence, g->shutdown_jobs_count);
}
void PB_WSP_InitializeWasapi(void)

View File

@ -32,7 +32,8 @@ Struct(PB_WSP_SharedState)
IAudioRenderClient *playback;
WAVEFORMATEX *buffer_format;
u32 buffer_frames;
JobCounter shutdown_job_counter;
Fence shutdown_jobs_fence;
u32 shutdown_jobs_count;
} extern PB_WSP_shared_state;
////////////////////////////////

View File

@ -48,8 +48,8 @@ void StartupUser(void)
P_ShowWindow(g->window);
/* Start jobs */
RunJob(UpdateUserOrSleep, .pool = JobPool_User, .counter = &g->shutdown_job_counter);
RunJob(UpdateSim, .pool = JobPool_Sim, .counter = &g->shutdown_job_counter);
g->shutdown_jobs_count += RunJob(UpdateUserOrSleep, .pool = JobPool_User, .fence = &g->shutdown_jobs_fence);
g->shutdown_jobs_count += RunJob(UpdateSim, .pool = JobPool_Sim, .fence = &g->shutdown_jobs_fence);
OnExit(&ShutdownUser);
}
@ -61,7 +61,7 @@ ExitFuncDef(ShutdownUser)
__prof;
SharedUserState *g = &shared_user_state;
Atomic32Set(&g->shutdown, 1);
YieldOnJobs(&g->shutdown_job_counter);
YieldOnFence(&g->shutdown_jobs_fence, g->shutdown_jobs_count);
P_ReleaseWindow(g->window);
}
@ -432,6 +432,13 @@ GPU_Resource *AcquireUploadBufferFromArena(u32 element_count, Arena *arena)
return r;
}
JobDef(DelayReleaseGpuResources, sig, id)
{
YieldOnFence(sig->begin_fence, sig->begin_fence_target);
GPU_Resource *resource = sig->resources[id];
GPU_ReleaseResource(resource, sig->flags);
}
////////////////////////////////
//~ Sort entities
@ -2151,7 +2158,7 @@ void UpdateUser(P_Window *window)
if (g->shade_target && !EqVec2I32(g->render_size, GPU_GetTextureSize(g->shade_target)))
{
__profn("Release render resources");
/* FIXME: Yield on render fence */
YieldOnFence(&g->gpu_render_fence, g->gpu_render_fence_target);
GPU_ReleaseResource(g->albedo, GPU_ReleaseFlag_None);
GPU_ReleaseResource(g->emittance, GPU_ReleaseFlag_None);
GPU_ReleaseResource(g->emittance_flood_read, GPU_ReleaseFlag_None);
@ -2174,7 +2181,7 @@ void UpdateUser(P_Window *window)
/* Acquire ui buffers */
if (g->ui_target && !EqVec2I32(g->ui_size, GPU_GetTextureSize(g->ui_target)))
{
/* FIXME: Wait on render fence */
YieldOnFence(&g->gpu_render_fence, g->gpu_render_fence_target);
GPU_ReleaseResource(g->ui_target, GPU_ReleaseFlag_None);
g->ui_target = 0;
}
@ -2420,9 +2427,7 @@ void UpdateUser(P_Window *window)
GPU_RasterizeMode_TriangleList);
}
}
/* FIXME: Enable this */
#if 0
g->last_gpu_barrier = GPU_EndCommandList(cl);
g->gpu_render_fence_target += GPU_EndCommandList(cl, &g->gpu_render_fence);
/* Release transfer buffers */
{
@ -2435,14 +2440,15 @@ void UpdateUser(P_Window *window)
ui_shape_verts_buffer,
ui_shape_indices_buffer,
};
Job *job = OpenJob(ReleaseRenderResources);
Job *job = OpenJob(DelayReleaseGpuResources, JobPool_Inherit);
{
ReleaseRenderResources_Sig *sig = PushStruct(job->arena, ReleaseRenderResources_Sig);
job->count = countof(resources);
sig->barrier = g->last_gpu_barrier;
sig->resources = PushStructsNoZero(sig->arena, GPU_Resource *, job->count);
DelayReleaseGpuResources_Sig *sig = PushStruct(job->arena, DelayReleaseGpuResources_Sig);
job->count = countof(release_resources);
sig->begin_fence = &g->gpu_render_fence;
sig->begin_fence_target = g->gpu_render_fence_target;
sig->resources = PushStructsNoZero(job->arena, GPU_Resource *, job->count);
sig->flags = GPU_ReleaseFlag_Reuse;
CopyBytes(sig->resources, resources, sizeof(resources));
CopyBytes(sig->resources, release_resources, sizeof(release_resources));
job->sig = sig;
}
CloseJob(job);
@ -2458,7 +2464,6 @@ void UpdateUser(P_Window *window)
g->ui_shape_indices_count = 0;
g->grids_count = 0;
}
#endif
}
EndScratch(scratch);

View File

@ -153,10 +153,12 @@ Struct(BindState)
Struct(SharedUserState)
{
Atomic32 shutdown;
JobCounter shutdown_job_counter;
P_Window *window;
GPU_Swapchain *swapchain;
Fence shutdown_jobs_fence;
u64 shutdown_jobs_count;
Arena *arena;
String connect_address_str;
@ -193,7 +195,8 @@ Struct(SharedUserState)
u32 ui_shape_indices_count;
u32 grids_count;
JobCounter render_counter;
Fence gpu_render_fence;
u64 gpu_render_fence_target;
//- Bind state
BindState bind_states[BindKind_Count];
@ -297,6 +300,7 @@ void DrawDebugConsole(i32 level, b32 minimized);
GPU_Resource *AcquireGbuffer(GPU_Format format, Vec2I32 size);
GPU_Resource *AcquireUploadBuffer(u32 element_count, u32 element_size, void *src);
GPU_Resource *AcquireUploadBufferFromArena(u32 element_count, Arena *arena);
JobDecl(DelayReleaseGpuResources, { Fence *begin_fence; u64 begin_fence_target; GPU_Resource **resources; GPU_ReleaseFlag flags; });
////////////////////////////////
//~ Entity sorting