diff --git a/src/base/base.h b/src/base/base.h index 9f692fa4..de65fc8b 100644 --- a/src/base/base.h +++ b/src/base/base.h @@ -570,7 +570,7 @@ ForceInline i32 Atomic32FetchTestSet (Atomic32 *x, i32 c, i32 e) { return ForceInline i32 Atomic32FetchXor (Atomic32 *x, i32 c) { return (i32)_InterlockedXor((volatile long *)&(x)->_v, (c)); } ForceInline i32 Atomic32FetchAdd (Atomic32 *x, i32 a) { return (i32)_InterlockedExchangeAdd((volatile long *)&(x)->_v, (a)); } //- 64 bit atomic operations -ForceInline i64 Atomic64Fetch (Atomic64 *x) { i16 result = (x)->_v; CompilerMemoryBarrier(); return result; } +ForceInline i64 Atomic64Fetch (Atomic64 *x) { i64 result = (x)->_v; CompilerMemoryBarrier(); return result; } ForceInline void Atomic64Set (Atomic64 *x, i64 e) { CompilerMemoryBarrier(); (x)->_v = e; } ForceInline i64 Atomic64FetchSet (Atomic64 *x, i64 e) { return (i64)_InterlockedExchange64(&(x)->_v, (e)); } ForceInline i64 Atomic64FetchTestSet (Atomic64 *x, i64 c, i64 e) { return (i64)_InterlockedCompareExchange64(&(x)->_v, (e), (c)); } @@ -711,7 +711,12 @@ Struct(ComputeShader) { Resource resource; }; #endif //////////////////////////////// -//~ Fiber id +//~ Fibers + +#define VirtualFibersEnabled 0 + +# define MaxFibers 4096 +StaticAssert(MaxFibers < I16Max); /* MaxFibers should fit in FiberId */ #if LanguageIsC # if PlatformIsWindows @@ -719,8 +724,6 @@ Struct(ComputeShader) { Resource resource; }; # else # error FiberId not implemented # endif -# define MaxFibers 4096 -StaticAssert(MaxFibers < I16Max); /* MaxFibers should fit in FiberId */ #endif diff --git a/src/base/base_futex.c b/src/base/base_futex.c index 967fba16..e1412a96 100644 --- a/src/base/base_futex.c +++ b/src/base/base_futex.c @@ -22,7 +22,8 @@ FiberNeqFutexState *FiberNeqFutexStateFromId(i16 fiber_id) void FutexYieldNeq(volatile void *addr, void *cmp, u8 cmp_size) { SharedFutexState *g = &shared_futex_state; - FutexNeqListBin *bin = &g->neq_bins[RandU64FromSeed((u64)addr) % countof(g->neq_bins)]; + u64 bin_index = RandU64FromSeed((u64)addr) % countof(g->neq_bins); + FutexNeqListBin *bin = &g->neq_bins[bin_index]; b32 cancel = 0; @@ -81,8 +82,8 @@ void FutexYieldNeq(volatile void *addr, void *cmp, u8 cmp_size) PushAlign(perm, CachelineSize); list = PushStruct(perm, FutexNeqList); PushAlign(perm, CachelineSize); - list->addr = addr; } + list->addr = addr; list->next = bin->first; bin->first = list; } @@ -109,6 +110,7 @@ void FutexYieldNeq(volatile void *addr, void *cmp, u8 cmp_size) void FutexWakeNeq(void *addr) { SharedFutexState *g = &shared_futex_state; + u64 bin_index = RandU64FromSeed((u64)addr) % countof(g->neq_bins); FutexNeqListBin *bin = &g->neq_bins[RandU64FromSeed((u64)addr) % countof(g->neq_bins)]; /* Pull waiting ids */ diff --git a/src/base/base_win32/base_win32_job.c b/src/base/base_win32/base_win32_job.c index 11c2a15d..04eabf35 100644 --- a/src/base/base_win32/base_win32_job.c +++ b/src/base/base_win32/base_win32_job.c @@ -84,7 +84,7 @@ void InitJobSystem(void) ctx->pool_kind = pool_kind; ctx->id = i; String name = FormatString(perm, name_fmt, FmtSint(i)); - pool->worker_threads[i] = W32_StartThread(W32_JobWorkerEntryFunc, ctx, name, prof_group + i); + pool->worker_threads[i] = W32_StartThread(W32_JobWorkerEntryPoint, ctx, name, prof_group + i); } } } @@ -101,17 +101,19 @@ DWORD WINAPI W32_Win32ThreadProc(LPVOID vt) W32_AcquireFiber(0); Arena *perm = PermArena(); - W32_Thread *t = (W32_Thread *)vt; - char *thread_name_cstr = CstrFromString(perm, t->thread_name); - wchar_t *thread_name_wstr = WstrFromString(perm, t->thread_name); - __profthread(thread_name_cstr, t->profiler_group); + W32_Thread *t = (W32_Thread *)vt; + String thread_name_desc = StringF(perm, "[%F] %F", FmtSint(FiberId()), FmtString(t->thread_name)); + char *thread_name_desc_cstr = CstrFromString(perm, thread_name_desc); + wchar_t *thread_name_desc_wstr = WstrFromString(perm, thread_name_desc); + + __profthread(thread_name_dsec_cstr, t->profiler_group); /* Initialize COM */ CoInitializeEx(0, COINIT_MULTITHREADED); /* Set thread name */ - SetThreadDescription(GetCurrentThread(), thread_name_wstr); + SetThreadDescription(GetCurrentThread(), thread_name_desc_wstr); //P_LogInfoF("New thread \"%F\" created with ID %F", FmtString(StringFromCstrNoLimit(t->thread_name_cstr)), FmtUint(ThreadId())); @@ -141,7 +143,7 @@ W32_Thread *W32_StartThread(W32_ThreadFunc *entry_point, void *thread_udata, Str t->handle = CreateThread( 0, - W32_ThreadStackSize, + W32_FiberStackSize, W32_Win32ThreadProc, t, 0, @@ -206,7 +208,7 @@ W32_Fiber *W32_AcquireFiber(W32_JobPool *pool) { fiber_id = pool->first_free_fiber_id; fiber = &g->fibers[fiber_id]; - pool->first_free_fiber_id = fiber->parent_id; + pool->first_free_fiber_id = fiber->return_id; } UnlockTicketMutex(&pool->free_fibers_tm); } @@ -275,18 +277,25 @@ W32_Fiber *W32_AcquireFiber(W32_JobPool *pool) if (pool != 0) { __profn("CreateFiber"); - fiber->addr = CreateFiber(W32_FiberStackSize, W32_FiberEntryPoint, (void *)(i64)fiber_id); fiber->pool = pool->kind; +#if VirtualFibersEnabled + fiber->addr = CreateThread(0, W32_FiberStackSize, W32_VirtualFiberEntryPoint, (void *)(i64)fiber_id, 0, 0); +#else + fiber->addr = CreateFiber(W32_FiberStackSize, W32_FiberEntryPoint, (void *)(i64)fiber_id); +#endif } else { /* Fiber is not a part of a job pool, convert thread to fiber */ __profn("ConvertThreadToFiber"); fiber->addr = ConvertThreadToFiber((void *)(i64)fiber_id); +#if VirtualFibersEnabled + fiber->addr = GetCurrentThread(); +#endif } } fiber->task = 0; - fiber->parent_id = 0; + fiber->return_id = 0; return fiber; } @@ -296,30 +305,48 @@ void W32_ReleaseFiber(W32_JobPool *pool, W32_Fiber *fiber) LockTicketMutex(&pool->free_fibers_tm); { i16 fiber_id = fiber->id; - fiber->parent_id = pool->first_free_fiber_id; + fiber->return_id = pool->first_free_fiber_id; pool->first_free_fiber_id = fiber_id; } UnlockTicketMutex(&pool->free_fibers_tm); } - //- Fiber id ForceInline W32_Fiber *W32_FiberFromId(i16 id) { return id > 0 ? &W32_shared_job_state.fibers[id] : 0; } -void W32_SwitchToFiber(W32_Fiber *fiber) +void W32_SwitchToFiber(W32_Fiber *target) { - SwitchToFiber(fiber->addr); +#if VirtualFibersEnabled + W32_Fiber *self = W32_FiberFromId(FiberId()); + Atomic8Set(&self->virtual_yield, 1); + /* Signal virtual target */ + { + Atomic8Set(&target->virtual_yield, 0); + WakeByAddressSingle(&target->virtual_yield); + } + /* Wait for return */ + { + i8 vswitch = 1; + while (vswitch != 0) + { + WaitOnAddress(&self->virtual_yield, &vswitch, sizeof(vswitch), INFINITE); + vswitch = Atomic8Fetch(&self->virtual_yield); + } + } +#else + SwitchToFiber(target->addr); +#endif } //////////////////////////////// //~ Win32 fiber entry -void W32_FiberEntryPoint(void *win32_fiber_data) +void W32_FiberEntryPoint(void *_) { - i16 fiber_id = (i16)(i64)win32_fiber_data; + i16 fiber_id = FiberId(); volatile W32_Fiber *fiber = W32_FiberFromId(fiber_id); W32_JobPool *pool = &W32_shared_job_state.job_pools[fiber->pool]; JobPool pool_kind = fiber->pool; @@ -361,16 +388,35 @@ void W32_FiberEntryPoint(void *win32_fiber_data) /* Yield to worker */ { __prof_fiber_leave(); - W32_Fiber *parent_fiber = W32_FiberFromId(fiber->parent_id); + W32_Fiber *parent_fiber = W32_FiberFromId(fiber->return_id); W32_SwitchToFiber(parent_fiber); } } } +#if VirtualFibersEnabled + +DWORD WINAPI W32_VirtualFiberEntryPoint(LPVOID arg) +{ + ConvertThreadToFiber(arg); + + Arena *perm = PermArena(); + char *fiber_name_cstr = W32_FiberFromId(FiberId())->name_cstr; + wchar_t *fiber_name_wstr = WstrFromString(perm, StringFromCstrNoLimit(fiber_name_cstr)); + SetThreadDescription(GetCurrentThread(), fiber_name_wstr); + + CoInitializeEx(0, COINIT_MULTITHREADED); + W32_FiberEntryPoint(0); + CoUninitialize(); + return 0; +} + +#endif + //////////////////////////////// //~ Win32 job worker entry -W32_ThreadDef(W32_JobWorkerEntryFunc, worker_ctx_arg) +W32_ThreadDef(W32_JobWorkerEntryPoint, worker_ctx_arg) { W32_WorkerCtx *ctx = worker_ctx_arg; JobPool pool_kind = ctx->pool_kind; @@ -463,12 +509,13 @@ W32_ThreadDef(W32_JobWorkerEntryFunc, worker_ctx_arg) } /* Run task fiber */ - task_fiber->parent_id = worker_fiber_id; + task_fiber->return_id = worker_fiber_id; W32_SwitchToFiber(task_fiber); - if (Atomic8FetchTestSet(&task_fiber->is_suspending, 1, 0) == 1) + if (Atomic8Fetch(&task_fiber->status) == W32_FiberStatus_Suspending) { /* Fiber suspended during execution */ + Atomic8Set(&task_fiber->status, W32_FiberStatus_Suspended); task_fiber = 0; } } @@ -496,35 +543,67 @@ W32_ThreadDef(W32_JobWorkerEntryFunc, worker_ctx_arg) void SuspendFiber(void) { + __prof; + i16 fiber_id = FiberId(); W32_Fiber *fiber = W32_FiberFromId(FiberId()); - W32_Fiber *parent_fiber = W32_FiberFromId(fiber->parent_id); - Assert(parent_fiber->id > 0); + i16 return_id = fiber->return_id; + __prof_fiber_leave(); + if (return_id > 0) { - __prof_fiber_leave(); - Atomic8Set(&fiber->is_suspending, 1); + /* Suspend task fiber (return control flow to parent/worker fiber) */ + Atomic8Set(&fiber->status, W32_FiberStatus_Suspending); + W32_Fiber *parent_fiber = W32_FiberFromId(return_id); W32_SwitchToFiber(parent_fiber); - __prof_fiber_enter(fiber->name_cstr, PROF_THREAD_GROUP_FIBERS - Mebi(fiber->pool) + Kibi(1) + fiber->id); } + else + { + /* Suspend dedicated fiber (block thread) */ + Atomic8Set(&fiber->status, W32_FiberStatus_Suspended); + i8 status = W32_FiberStatus_Suspended; + while (status != W32_FiberStatus_None) + { + WaitOnAddress(&fiber->status, &status, sizeof(status), INFINITE); + status = Atomic8Fetch(&fiber->status); + } + } + __prof_fiber_enter(fiber->name_cstr, PROF_THREAD_GROUP_FIBERS - Mebi(fiber->pool) + Kibi(1) + fiber->id); } void ResumeFibers(i16 fiber_ids_count, i16 *fiber_ids) { + __prof; /* Group tasks by pool */ W32_TaskList tasks_by_pool[JobPool_Count] = ZI; for (i16 id_index = 0; id_index < fiber_ids_count; ++id_index) { i16 fiber_id = fiber_ids[id_index]; W32_Fiber *fiber = W32_FiberFromId(fiber_id); - W32_Task *task = fiber->task; - JobPool pool_kind = fiber->pool; - W32_TaskList *pool_tasks = &tasks_by_pool[pool_kind]; - QueuePush(pool_tasks->first, pool_tasks->last, task); - ++pool_tasks->count; - /* Wait for fiber to finish suspending */ - while (Atomic8Fetch(&fiber->is_suspending)) + /* Wait for fiber to complete suspending */ + W32_FiberStatus status = Atomic8Fetch(&fiber->status); + while (status != W32_FiberStatus_Suspended) { _mm_pause(); + status = Atomic8Fetch(&fiber->status); + } + + /* Update fiber status */ + Atomic8Set(&fiber->status, W32_FiberStatus_None); + + i16 return_id = fiber->return_id; + if (return_id > 0) + { + /* Group task based on pool */ + W32_Task *task = fiber->task; + JobPool pool_kind = fiber->pool; + W32_TaskList *pool_tasks = &tasks_by_pool[pool_kind]; + QueuePush(pool_tasks->first, pool_tasks->last, task); + ++pool_tasks->count; + } + else + { + /* Wake dedicated fiber right now */ + WakeByAddressSingle(&fiber->status); } } diff --git a/src/base/base_win32/base_win32_job.h b/src/base/base_win32/base_win32_job.h index 08e84f47..7e015dc5 100644 --- a/src/base/base_win32/base_win32_job.h +++ b/src/base/base_win32/base_win32_job.h @@ -26,7 +26,6 @@ //////////////////////////////// //~ Thread types -#define W32_ThreadStackSize Kibi(64) #define W32_ThreadDef(name, arg_name) void name(void *arg_name) typedef W32_ThreadDef(W32_ThreadFunc, data); @@ -43,12 +42,18 @@ Struct(W32_Thread) //////////////////////////////// //~ Fiber types -#define W32_FiberStackSize Mebi(4) -#define W32_FiberNamePrefixCstr "Fiber [" +#define W32_FiberStackSize Mebi(1) +#define W32_FiberNamePrefixCstr "[" #define W32_FiberNameSuffixCstr "]" #define W32_FiberNameMaxSize 64 -//- Fiber +Enum(W32_FiberStatus) +{ + W32_FiberStatus_None = 0, + W32_FiberStatus_Suspending = 1, + W32_FiberStatus_Suspended = 2, +}; + AlignedStruct(W32_Fiber, CachelineSize) { /* ---------------------------------------------------- */ @@ -56,20 +61,16 @@ AlignedStruct(W32_Fiber, CachelineSize) /* ---------------------------------------------------- */ char *name_cstr; /* 08 bytes */ /* ---------------------------------------------------- */ - Atomic8 is_suspending; /* 01 bytes */ - u8 _pad0[1]; /* 01 bytes (padding) */ + Atomic8 status; /* 01 bytes */ + Atomic8 virtual_yield; /* 01 bytes */ i16 id; /* 02 bytes */ i16 pool; /* 02 bytes */ - i16 parent_id; /* 02 bytes */ + i16 return_id; /* 02 bytes */ /* ---------------------------------------------------- */ struct W32_Task *task; /* 08 bytes */ /* ---------------------------------------------------- */ -#if 0 - u8 _pad1[32]; /* 32 bytes (padding) */ -#endif }; StaticAssert(alignof(W32_Fiber) == CachelineSize && sizeof(W32_Fiber) % CachelineSize == 0); /* False sharing validation */ -StaticAssert(offsetof(W32_Fiber, is_suspending) % 4 == 0); /* Atomic alignment validation */ //////////////////////////////// //~ Job pool types @@ -184,15 +185,16 @@ void W32_WaitEndThread(W32_Thread *thread); W32_Fiber *W32_AcquireFiber(W32_JobPool *pool); void W32_ReleaseFiber(W32_JobPool *pool, W32_Fiber *fiber); ForceInline W32_Fiber *W32_FiberFromId(i16 id); -void W32_SwitchToFiber(W32_Fiber *fiber); +void W32_SwitchToFiber(W32_Fiber *target); void W32_YieldFiber(W32_Fiber *fiber, W32_Fiber *parent_fiber); //////////////////////////////// //~ Fiber entry void W32_FiberEntryPoint(void *wi32_fiber_data); +DWORD WINAPI W32_VirtualFiberEntryPoint(LPVOID arg); //////////////////////////////// //~ Job worker entry -W32_ThreadDef(W32_JobWorkerEntryFunc, worker_ctx_arg); +W32_ThreadDef(W32_JobWorkerEntryPoint, worker_ctx_arg); diff --git a/src/gpu/gpu.h b/src/gpu/gpu.h index 0bf604f3..418862b2 100644 --- a/src/gpu/gpu.h +++ b/src/gpu/gpu.h @@ -292,7 +292,7 @@ Vec2I32 GPU_GetTextureSize(GPU_Resource *resource); //~ @hookdecl Command list operations GPU_CommandList *GPU_BeginCommandList(void); -GPU_Fence GPU_EndCommandList(GPU_CommandList *cl); +GPU_Fence GPU_EndCommandList(GPU_CommandList *cl, JobCounter *counter); //////////////////////////////// //~ @hookdecl Profiling helpers diff --git a/src/gpu/gpu_dx12/gpu_dx12.c b/src/gpu/gpu_dx12/gpu_dx12.c index cf202471..9ab53868 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.c +++ b/src/gpu/gpu_dx12/gpu_dx12.c @@ -147,16 +147,20 @@ GPU_CommandList *GPU_BeginCommandList(void) return (GPU_CommandList *)cl; } -GPU_Fence GPU_EndCommandList(GPU_CommandList *gpu_cl) +GPU_Fence GPU_EndCommandList(GPU_CommandList *gpu_cl, JobCounter *counter) { GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId()); GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl; /* Determine queue kind */ +#if 0 + GPU_QueueKind queue_kind = GPU_QueueKind_Direct; +#else GPU_QueueKind queue_kind = GPU_QueueKind_BackgroundCopy; for (GPU_D12_Command *cmd = cl->first; cmd; cmd = cmd->next) { } +#endif /* Begin dx12 command list */ GPU_D12_RawCommandList *dx12_cl = GPU_D12_BeginRawCommandList(queue_kind); diff --git a/src/pp/pp.c b/src/pp/pp.c index 0ddfa80a..ede27e0c 100644 --- a/src/pp/pp.c +++ b/src/pp/pp.c @@ -2418,10 +2418,10 @@ void UpdateUser(P_Window *window) GPU_RasterizeMode_TriangleList); } } + GPU_EndCommandList(cl, &g->render_counter); /* FIXME: Enable this */ #if 0 - g->most_recent_render_counter = GPU_EndCommandList(cl); /* Release transfer buffers */ { @@ -2438,7 +2438,7 @@ void UpdateUser(P_Window *window) { ReleaseRenderResources_Sig *sig = PushStruct(job->arena, ReleaseRenderResources_Sig); job->count = countof(resources); - sig->render_counter = g->most_recent_render_counter; + sig->render_fence = g->most_recent_render_counter; sig->resources = PushStructsNoZero(sig->arena, GPU_Resource *, job->count); sig->flags = GPU_ReleaseFlag_Reuse; CopyBytes(sig->resources, resources, sizeof(resources)); diff --git a/src/pp/pp.h b/src/pp/pp.h index 26eddd1c..3e9c927b 100644 --- a/src/pp/pp.h +++ b/src/pp/pp.h @@ -193,7 +193,7 @@ Struct(SharedUserState) u32 ui_shape_indices_count; u32 grids_count; - GPU_Fence render_fence; + JobCounter render_counter; //- Bind state BindState bind_states[BindKind_Count];