From 7906108994864f80814dc014f2b13e7da98a6488 Mon Sep 17 00:00:00 2001 From: jacob Date: Sat, 2 Aug 2025 09:11:26 -0500 Subject: [PATCH] begin job refactor --- src/app/app_core.c | 16 +- src/ase/ase_core.c | 2 +- src/asset_cache/asset_cache_core.c | 2 +- src/asset_cache/asset_cache_core.h | 2 +- src/base/base.c | 7 +- src/base/base.h | 8 +- src/base/base_atomic.h | 88 -- src/base/base_core.h | 78 ++ src/base/base_fiber.c | 19 - src/base/base_fiber.h | 4 - src/base/base_incbin.c | 2 +- src/base/base_intrinsics.h | 13 - src/base/base_job.h | 62 ++ src/base/base_string.c | 37 +- src/base/base_string.h | 6 +- src/base/win32/base_win32.c | 3 + src/base/win32/base_win32.h | 8 + src/base/win32/base_win32_job.c | 1476 ++++++++++++++++++++++++++++ src/base/win32/base_win32_job.h | 293 ++++++ src/draw/draw_core.c | 3 +- src/draw/draw_core.h | 2 +- src/dxc/dxc_core_win32.cpp | 4 +- src/font/font_core.c | 52 +- src/font/font_core.h | 44 +- src/gpu/gpu_dx12.c | 71 +- src/gpu/gpu_dx12.h | 33 +- src/net/net_core.c | 4 +- src/platform/platform_core.h | 93 +- src/platform/platform_log.c | 18 +- src/platform/platform_log.h | 4 +- src/platform/platform_snc.c | 8 +- src/platform/platform_snc.h | 10 +- src/platform/platform_win32.c | 1473 +-------------------------- src/platform/platform_win32.h | 278 +----- src/playback/playback_win32.c | 4 +- src/playback/playback_win32.h | 6 +- src/pp/pp_core.c | 110 +-- src/pp/pp_core.h | 10 +- src/pp/pp_sim.h | 2 +- src/pp/pp_step.c | 4 +- src/settings/settings_core.c | 2 +- src/sound/sound_core.c | 60 +- src/sound/sound_core.h | 54 +- src/sprite/sprite_core.c | 14 +- src/sprite/sprite_core.h | 23 +- src/watch/watch_core.c | 18 +- src/watch/watch_core.h | 8 +- 47 files changed, 2194 insertions(+), 2344 deletions(-) delete mode 100644 src/base/base_atomic.h create mode 100644 src/base/base_job.h create mode 100644 src/base/win32/base_win32.c create mode 100644 src/base/win32/base_win32.h create mode 100644 src/base/win32/base_win32_job.c create mode 100644 src/base/win32/base_win32_job.h diff --git a/src/app/app_core.c b/src/app/app_core.c index 67d63203..90cd9d17 100644 --- a/src/app/app_core.c +++ b/src/app/app_core.c @@ -101,8 +101,8 @@ AppArgList ParseAppArgs(Arena *arena, String args_str) value_end = i + 1; } if (key_start >= 0 && key_end > key_start && key_end <= (i64)args_str.len && value_start >= 0 && value_end > value_start && value_end <= (i64)args_str.len) { - String key = CopyString(arena, STRING(key_end - key_start, args_str.text + key_start)); - String value = CopyString(arena, STRING(value_end - value_start, args_str.text + value_start)); + String key = PushString(arena, STRING(key_end - key_start, args_str.text + key_start)); + String value = PushString(arena, STRING(value_end - value_start, args_str.text + value_start)); AppArg *arg = PushStruct(arena, AppArg); arg->key = key; arg->value = value; @@ -223,7 +223,7 @@ void P_AppStartup(String args_str) P_LogInfoF("Settings file not found, loading default"); window_settings = GetDefaultAppWindowSettings(window); } - CopyStringToBuffer(StringFromArray(window_settings.title), Lit(WINDOW_TITLE)); + PushStringToBuffer(StringFromArray(window_settings.title), Lit(WINDOW_TITLE)); P_UpdateWindowSettings(window, &window_settings); EndTempArena(temp); @@ -237,17 +237,15 @@ void P_AppStartup(String args_str) /* Subsystems */ AC_StartupReceipt asset_cache_sr = AC_Startup(); - TTF_StartupReceipt ttf_sr = TTF_Startup(); - F_StartupReceipt font_sr = F_Startup(&asset_cache_sr, &ttf_sr); + TTF_Startup(); S_StartupReceipt sprite_sr = S_Startup(); MIX_StartupReceipt mixer_sr = MIX_Startup(); - SND_StartupReceipt sound_sr = SND_Startup(&asset_cache_sr); - D_StartupReceipt draw_sr = D_Startup(&font_sr); - + D_StartupReceipt draw_sr = D_Startup(); + /* Interface systems */ SimStartupReceipt sim_sr = SimStartup(); PB_StartupReceipt playback_sr = PB_Startup(&mixer_sr); - UserStartupReceipt user_sr = StartupUser(&font_sr, &sprite_sr, &draw_sr, &asset_cache_sr, &sound_sr, &mixer_sr, &sim_sr, connect_address); + UserStartupReceipt user_sr = StartupUser(&sprite_sr, &draw_sr, &asset_cache_sr, &mixer_sr, &sim_sr, connect_address); LAX user_sr; LAX playback_sr; diff --git a/src/ase/ase_core.c b/src/ase/ase_core.c index f82a443d..6cc95786 100644 --- a/src/ase/ase_core.c +++ b/src/ase/ase_core.c @@ -407,7 +407,7 @@ void ASE_Inflate(u8 *dst, u8 *encoded) void ASE_PushError(Arena *arena, ASE_ErrorList *list, String msg_src) { ASE_Error *e = PushStruct(arena, ASE_Error); - e->msg = CopyString(arena, msg_src); + e->msg = PushString(arena, msg_src); if (!list->first) { list->first = e; diff --git a/src/asset_cache/asset_cache_core.c b/src/asset_cache/asset_cache_core.c index d78facc3..687e4faa 100644 --- a/src/asset_cache/asset_cache_core.c +++ b/src/asset_cache/asset_cache_core.c @@ -126,7 +126,7 @@ AC_Asset *AC_TouchCache(String key, u64 hash, b32 *is_first_touch) { /* Copy key to store */ AC_Store store = AC_OpenStore(); - key_stored = CopyString(store.arena, key); + key_stored = PushString(store.arena, key); AC_CloseStore(&store); } /* Initialize asset data */ diff --git a/src/asset_cache/asset_cache_core.h b/src/asset_cache/asset_cache_core.h index e4275cad..eb8b6a6b 100644 --- a/src/asset_cache/asset_cache_core.h +++ b/src/asset_cache/asset_cache_core.h @@ -17,7 +17,7 @@ Struct(AC_Asset) u64 hash; String key; - P_Counter counter; + Counter counter; /* Managed via asset_cache_mark_x functions */ AC_Status status; diff --git a/src/base/base.c b/src/base/base.c index 140bd0eb..70c7fd18 100644 --- a/src/base/base.c +++ b/src/base/base.c @@ -1,6 +1,5 @@ #include "base.h" -#include "base_fiber.c" #include "base_arena.c" #include "base_gstat.c" #include "base_memory.c" @@ -11,3 +10,9 @@ #include "base_uid.c" #include "base_uni.c" #include "base_incbin.c" + +#if PlatformIsWindows +# include "win32/base_win32.c" +#else +# error Base layer platform backend not implemented +#enidf diff --git a/src/base/base.h b/src/base/base.h index 6625b2a9..2fa051bc 100644 --- a/src/base/base.h +++ b/src/base/base.h @@ -7,9 +7,8 @@ //- Base cpu includes #include "../prof/prof.h" # include "base_intrinsics.h" -# include "base_atomic.h" -# include "base_fiber.h" # include "base_memory.h" +# include "base_job.h" # include "base_arena.h" # include "base_uid.h" # include "base_string.h" @@ -20,6 +19,11 @@ # include "base_rand.h" # include "base_util.h" # include "base_incbin.h" + +#if PlatformIsWindows +# include "win32/base_win32.h" +#endif + #elif LanguageIsGpu //- Base gpu includes # include "base_math_gpu.h" diff --git a/src/base/base_atomic.h b/src/base/base_atomic.h deleted file mode 100644 index 984bda8d..00000000 --- a/src/base/base_atomic.h +++ /dev/null @@ -1,88 +0,0 @@ -//////////////////////////////// -//~ Atomic types - - /* NOTE: Must be aligned to 32 bit boundary by user */ -Struct(Atomic8) -{ - volatile i8 _v; -}; - -/* NOTE: Must be aligned to 32 bit boundary by user */ -Struct(Atomic16) -{ - volatile i16 _v; -}; - -Struct(Atomic32) -{ - volatile i32 _v; -}; - -Struct(Atomic64) -{ - volatile i64 _v; -}; - -//////////////////////////////// -//~ Cache-line isolated atomic types - -AlignedStruct(Atomic8Padded, 64) -{ - Atomic8 v; - u8 _pad[60]; -}; -StaticAssert(sizeof(Atomic8Padded) == 64 && alignof(Atomic8Padded) == 64); - -AlignedStruct(Atomic16Padded, 64) -{ - Atomic16 v; - u8 _pad[60]; -}; -StaticAssert(sizeof(Atomic16Padded) == 64 && alignof(Atomic16Padded) == 64); - -AlignedStruct(Atomic32Padded, 64) -{ - Atomic32 v; - u8 _pad[60]; -}; -StaticAssert(sizeof(Atomic32Padded) == 64 && alignof(Atomic32Padded) == 64); - -AlignedStruct(Atomic64Padded, 64) -{ - Atomic64 v; - u8 _pad[56]; -}; -StaticAssert(sizeof(Atomic64Padded) == 64 && alignof(Atomic64Padded) == 64); - -//////////////////////////////// -//~ Atomic operations - -#if PlatformIsWindows - -ForceInline i8 Atomic8Fetch(Atomic8 *x) { return (i8)_InterlockedCompareExchange8((char *)&x->_v, 0, 0); } -ForceInline i8 Atomic8FetchSet(Atomic8 *x, i8 e) { return (i8)_InterlockedExchange8((char *)&x->_v, e); } -ForceInline i8 Atomic8FetchTestSet(Atomic8 *x, i8 c, i8 e) { return (i8)_InterlockedCompareExchange8((char *)&x->_v, e, c); } -ForceInline i8 Atomic8FetchXor(Atomic8 *x, i8 c) { return (i8)_InterlockedXor8((char *)&x->_v, c); } -ForceInline i8 Atomic8FetchAdd(Atomic8 *x, i8 a) { return (i8)_InterlockedExchangeAdd8((char *)&x->_v, a); } - -ForceInline i16 Atomic16Fetch(Atomic16 *x) { return (i16)_InterlockedCompareExchange16(&x->_v, 0, 0); } -ForceInline i16 Atomic16FetchSet(Atomic16 *x, i16 e) { return (i16)_InterlockedExchange16(&x->_v, e); } -ForceInline i16 Atomic16FetchTestSet(Atomic16 *x, i16 c, i16 e) { return (i16)_InterlockedCompareExchange16(&x->_v, e, c); } -ForceInline i16 Atomic16FetchTestXor(Atomic16 *x, i16 c) { return (i16)_InterlockedXor16(&x->_v, c); } -ForceInline i16 Atomic16FetchTestAdd(Atomic16 *x, i16 a) { return (i16)_InterlockedExchangeAdd16(&x->_v, a); } - -ForceInline i32 Atomic32Fetch(Atomic32 *x) { return (i32)_InterlockedCompareExchange((volatile long *)&x->_v, 0, 0); } -ForceInline i32 Atomic32FetchSet(Atomic32 *x, i32 e) { return (i32)_InterlockedExchange((volatile long *)&x->_v, e); } -ForceInline i32 Atomic32FetchTestSet(Atomic32 *x, i32 c, i32 e) { return (i32)_InterlockedCompareExchange((volatile long *)&x->_v, e, c); } -ForceInline i32 Atomic32FetchXor(Atomic32 *x, i32 c) { return (i32)_InterlockedXor((volatile long *)&x->_v, c); } -ForceInline i32 Atomic32FetchAdd(Atomic32 *x, i32 a) { return (i32)_InterlockedExchangeAdd((volatile long *)&x->_v, a); } - -ForceInline i64 Atomic64Fetch(Atomic64 *x) { return (i64)_InterlockedCompareExchange64(&x->_v, 0, 0); } -ForceInline i64 Atomic64FetchSet(Atomic64 *x, i64 e) { return (i64)_InterlockedExchange64(&x->_v, e); } -ForceInline i64 Atomic64FetchTestSet(Atomic64 *x, i64 c, i64 e) { return (i64)_InterlockedCompareExchange64(&x->_v, e, c); } -ForceInline i64 Atomic64FetchXor(Atomic64 *x, i64 c) { return (i64)_InterlockedXor64(&x->_v, c); } -ForceInline i64 Atomic64FetchAdd(Atomic64 *x, i64 a) { return (i64)_InterlockedExchangeAdd64(&x->_v, a); } - -#else -# error Atomics not implemented -#endif diff --git a/src/base/base_core.h b/src/base/base_core.h index e9e2d4ea..4cc5a5aa 100644 --- a/src/base/base_core.h +++ b/src/base/base_core.h @@ -470,6 +470,84 @@ Global const f64 *_f64_nan = (f64 *)&_f64_nan_u64; #define IsF64Nan(x) (x != x) #endif +//////////////////////////////// +//~ Atomics + +#if !LanguageIsGpu + +//- Atomic types +Struct(Atomic8) { volatile i8 _v; }; +Struct(Atomic16) { volatile i16 _v; }; +Struct(Atomic32) { volatile i32 _v; }; +Struct(Atomic64) { volatile i64 _v; }; + +//- Cache-line isolated aligned atomic types +AlignedStruct(Atomic8Padded, 64) { Atomic8 v; u8 _pad[60]; }; +AlignedStruct(Atomic16Padded, 64) { Atomic16 v; u8 _pad[60]; }; +AlignedStruct(Atomic32Padded, 64) { Atomic32 v; u8 _pad[60]; }; +AlignedStruct(Atomic64Padded, 64) { Atomic64 v; u8 _pad[56]; }; +StaticAssert(sizeof(Atomic8Padded) == 64 && alignof(Atomic8Padded) == 64); +StaticAssert(sizeof(Atomic16Padded) == 64 && alignof(Atomic16Padded) == 64); +StaticAssert(sizeof(Atomic32Padded) == 64 && alignof(Atomic32Padded) == 64); +StaticAssert(sizeof(Atomic64Padded) == 64 && alignof(Atomic64Padded) == 64); + +#if PlatformIsWindows +//- 8 bit atomics operations +ForceInline i8 Atomic8Fetch(Atomic8 *x) { return (i8)_InterlockedCompareExchange8((char *)&x->_v, 0, 0); } +ForceInline i8 Atomic8FetchSet(Atomic8 *x, i8 e) { return (i8)_InterlockedExchange8((char *)&x->_v, e); } +ForceInline i8 Atomic8FetchTestSet(Atomic8 *x, i8 c, i8 e) { return (i8)_InterlockedCompareExchange8((char *)&x->_v, e, c); } +ForceInline i8 Atomic8FetchXor(Atomic8 *x, i8 c) { return (i8)_InterlockedXor8((char *)&x->_v, c); } +ForceInline i8 Atomic8FetchAdd(Atomic8 *x, i8 a) { return (i8)_InterlockedExchangeAdd8((char *)&x->_v, a); } +//- 16 bit atomic operations +ForceInline i16 Atomic16Fetch(Atomic16 *x) { return (i16)_InterlockedCompareExchange16(&x->_v, 0, 0); } +ForceInline i16 Atomic16FetchSet(Atomic16 *x, i16 e) { return (i16)_InterlockedExchange16(&x->_v, e); } +ForceInline i16 Atomic16FetchTestSet(Atomic16 *x, i16 c, i16 e) { return (i16)_InterlockedCompareExchange16(&x->_v, e, c); } +ForceInline i16 Atomic16FetchTestXor(Atomic16 *x, i16 c) { return (i16)_InterlockedXor16(&x->_v, c); } +ForceInline i16 Atomic16FetchTestAdd(Atomic16 *x, i16 a) { return (i16)_InterlockedExchangeAdd16(&x->_v, a); } +//- 32 bit atomic operations +ForceInline i32 Atomic32Fetch(Atomic32 *x) { return (i32)_InterlockedCompareExchange((volatile long *)&x->_v, 0, 0); } +ForceInline i32 Atomic32FetchSet(Atomic32 *x, i32 e) { return (i32)_InterlockedExchange((volatile long *)&x->_v, e); } +ForceInline i32 Atomic32FetchTestSet(Atomic32 *x, i32 c, i32 e) { return (i32)_InterlockedCompareExchange((volatile long *)&x->_v, e, c); } +ForceInline i32 Atomic32FetchXor(Atomic32 *x, i32 c) { return (i32)_InterlockedXor((volatile long *)&x->_v, c); } +ForceInline i32 Atomic32FetchAdd(Atomic32 *x, i32 a) { return (i32)_InterlockedExchangeAdd((volatile long *)&x->_v, a); } +//- 64 bit atomic operations +ForceInline i64 Atomic64Fetch(Atomic64 *x) { return (i64)_InterlockedCompareExchange64(&x->_v, 0, 0); } +ForceInline i64 Atomic64FetchSet(Atomic64 *x, i64 e) { return (i64)_InterlockedExchange64(&x->_v, e); } +ForceInline i64 Atomic64FetchTestSet(Atomic64 *x, i64 c, i64 e) { return (i64)_InterlockedCompareExchange64(&x->_v, e, c); } +ForceInline i64 Atomic64FetchXor(Atomic64 *x, i64 c) { return (i64)_InterlockedXor64(&x->_v, c); } +ForceInline i64 Atomic64FetchAdd(Atomic64 *x, i64 a) { return (i64)_InterlockedExchangeAdd64(&x->_v, a); } +#else +# error Atomics not implemented +#endif + +#endif + +//////////////////////////////// +//~ Ticket mutex + +#if !LanguageIsGpu + +Struct(TicketMutex) +{ + Atomic64Padded ticket; + Atomic64Padded serving; +}; + +ForceInline void LockTicketMutex(TicketMutex *tm) +{ + i64 ticket = Atomic64FetchAdd(&tm->ticket.v, 1); + while (Atomic64Fetch(&tm->serving.v) != ticket) + { + _mm_pause(); + } +} + +ForceInline void UnlockTicketMutex(TicketMutex *tm) +{ + Atomic64FetchAdd(&tm->serving.v, 1); +} +#endif + //////////////////////////////// //~ Config diff --git a/src/base/base_fiber.c b/src/base/base_fiber.c index bab456ff..e69de29b 100644 --- a/src/base/base_fiber.c +++ b/src/base/base_fiber.c @@ -1,19 +0,0 @@ -#if PlatformIsWindows - -//////////////////////////////// -//~ Windows headers -#define WIN32_LEAN_AND_MEAN -#define UNICODE -#include - -//////////////////////////////// -//~ FiberId - -i16 FiberId(void) -{ - return (i16)(i64)GetFiberData(); -} - -#else -# error FiberId not implemented for this platform -#endif diff --git a/src/base/base_fiber.h b/src/base/base_fiber.h index 1b9d3071..e69de29b 100644 --- a/src/base/base_fiber.h +++ b/src/base/base_fiber.h @@ -1,4 +0,0 @@ -#define MaxFibers 4096 - -StaticAssert(MaxFibers < I16Max); /* Fiber id should fit max fibers */ -i16 FiberId(void); diff --git a/src/base/base_incbin.c b/src/base/base_incbin.c index 7ce9795a..74ca5dc1 100644 --- a/src/base/base_incbin.c +++ b/src/base/base_incbin.c @@ -73,7 +73,7 @@ String StringFromIncbinRcResource(IncbinRcResource *inc) /* Spin while another thread searches */ while (state != IncbinStatus_Searched) { - IxPause(); + _mm_pause(); state = Atomic32Fetch(&inc->state); } diff --git a/src/base/base_intrinsics.h b/src/base/base_intrinsics.h index 5819f2ac..eb0f3213 100644 --- a/src/base/base_intrinsics.h +++ b/src/base/base_intrinsics.h @@ -103,16 +103,3 @@ Inline f64 IxTruncF64ToF64(f64 f) { return _mm_cvtsd_f64(_mm_round_sd(_mm_setzero_pd(), _mm_set_sd(f), _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)); } - -//////////////////////////////// -//~ Utility intrinsics - -Inline void IxPause(void) -{ - _mm_pause(); -} - -Inline u64 IxClock(void) -{ - return __rdtsc(); -} diff --git a/src/base/base_job.h b/src/base/base_job.h new file mode 100644 index 00000000..42091594 --- /dev/null +++ b/src/base/base_job.h @@ -0,0 +1,62 @@ +//////////////////////////////// +//~ Job queue types + +/* Work pools contain their own worker threads with their own thread priority + * affinity based on the intended context of the pool. */ +typedef i32 PoolKind; enum +{ + PoolKind_Inherit = -1, + + /* The floating pool contains a large number of lower priority worker + * threads that have affinity over the entire CPU. Other pools should push + * jobs that only block and do no work here so that they can yield on the + * blocking job rather than blocking themselves. */ + PoolKind_Floating = 0, + + PoolKind_Background = 1, + PoolKind_Audio = 2, + PoolKind_User = 3, + PoolKind_Sim = 4, + + PoolKind_Count +}; + +/* Job execution order within a pool is based on priority. */ +typedef i32 PriorityKind; enum +{ + PriorityKind_Inherit = -1, + PriorityKind_High = 0, + PriorityKind_Normal = 1, + PriorityKind_Low = 2, + + PriorityKind_Count +}; + +//////////////////////////////// +//~ @hookdecl Fiber helpers + +#define MaxFibers 4096 + +i16 FiberId(void); +StaticAssert(MaxFibers < I16Max); /* Fiber id type should fit max fibers */ + +//////////////////////////////// +//~ @hookdecl Job helpers + +#define EmptySig { i32 _; } + +#define JobDecl(job, sigdef) \ + typedef struct job##_Sig sigdef job##_Sig; \ + Struct(job##_Desc) { Counter *counter; Arena *job_arena; job##_Sig *sig; }; \ + void job(job##_Sig *) + +#define JobDef(job, sig_arg) void job(job##_Sig *sig_arg) + +#define RunJob(job, desc) job(&desc->sig) + +//////////////////////////////// +//~ @hookdecl Wait + +/* Futex-like wait & wake */ +void P_Wait(volatile void *addr, void *cmp, u32 size, i64 timeout_ns); +void P_Wake(void *addr, i32 count); diff --git a/src/base/base_string.c b/src/base/base_string.c index dc27f13f..94c0190f 100644 --- a/src/base/base_string.c +++ b/src/base/base_string.c @@ -81,7 +81,7 @@ String StringFromI64(Arena *arena, i64 n, u64 base, u64 zfill) String StringFromPtr(Arena *arena, void *ptr) { - String prepend = CopyString(arena, Lit("0x")); + String prepend = PushString(arena, Lit("0x")); String uint_str = StringFromU64(arena, (u64)ptr, 16, sizeof(ptr)); return (String) { @@ -100,15 +100,15 @@ String StringFromF64(Arena *arena, f64 f, u32 precision) if (IsF32Nan(f)) { - final_len += CopyString(arena, Lit("NaN")).len; + final_len += PushString(arena, Lit("NaN")).len; } else if (f == F64Infinity) { - final_len += CopyString(arena, Lit("inf")).len; + final_len += PushString(arena, Lit("inf")).len; } else if (f == -F64Infinity) { - final_len += CopyString(arena, Lit("-inf")).len; + final_len += PushString(arena, Lit("-inf")).len; } else { @@ -176,9 +176,9 @@ String StringFromhandle(Arena *arena, u64 v0, u64 v1) { String result = ZI; result.text = PushDry(arena, u8); - result.len += CopyString(arena, Lit("h")).len; + result.len += PushString(arena, Lit("h")).len; result.len += StringFromU64(arena, v0, 16, 0).len; - result.len += CopyString(arena, Lit("x")).len; + result.len += PushString(arena, Lit("x")).len; result.len += StringFromU64(arena, v1, 16, 0).len; return result; } @@ -198,7 +198,7 @@ String StringFromUid(Arena *arena, Uid uid) //- Copy -String CopyString(Arena *arena, String src) +String PushString(Arena *arena, String src) { String str = { .len = src.len, @@ -208,7 +208,7 @@ String CopyString(Arena *arena, String src) return str; } -String CopyStringToBuffer(String dst, String src) +String PushStringToBuffer(String dst, String src) { String result = ZI; result.len = MinU64(dst.len, src.len); @@ -306,7 +306,7 @@ String IndentString(Arena *arena, String str, u32 indent) StringFromChar(arena, ' '); ++final_len; } - CopyString(arena, piece); + PushString(arena, piece); final_len += piece.len; if (i < split.count - 1) { @@ -462,11 +462,7 @@ b32 StringEndsWith(String str, String substring) * included in the arguments (instead of w/ the specifier like in printf). * * Example: - * StringFormat(arena, - * Lit("Hello there %F. You are %F feet %F inches tall!"), - * FmtString(Lit("George")), - * FmtUint(6), - * FmtFloat(5.375)); + * StringFormat(arena, Lit("Hello there %F"), FmtString(Lit("George"))) * * NOTE: FmtEnd must be passed as the last arg in the va_list (This is * done automatically by the `StringFormat` macro). @@ -524,7 +520,7 @@ String StringFormatV(Arena *arena, String fmt, va_list args) case FmtKind_String: { - parsed_str = CopyString(arena, arg.value.string); + parsed_str = PushString(arena, arg.value.string); } break; case FmtKind_Uint: @@ -566,7 +562,7 @@ String StringFormatV(Arena *arena, String fmt, va_list args) { /* Unexpected end. Not enough FMT args passed to function. */ Assert(0); - parsed_str = CopyString(arena, Lit("")); + parsed_str = PushString(arena, Lit("")); no_more_args = 1; } break; @@ -574,7 +570,7 @@ String StringFormatV(Arena *arena, String fmt, va_list args) { /* Unknown format type */ Assert(0); - parsed_str = CopyString(arena, Lit("")); + parsed_str = PushString(arena, Lit("")); no_more_args = 1; } break; } @@ -757,7 +753,9 @@ String32 String32FromString(Arena *arena, String str8) } //////////////////////////////// -//~ Legacy null-terminated narrow C strings +//~ Legacy null-terminated C string operations + +//- Narrow C strings u64 CstrLenNoLimit(char *cstr) { @@ -831,8 +829,7 @@ String StringFromCstr(char *cstr, u64 limit) }; } -//////////////////////////////// -//~ Legacy null-terminated wide C strings +//- Wide C strings u64 WstrLenNoLimit(wchar_t *wstr) { diff --git a/src/base/base_string.h b/src/base/base_string.h index dee26ac1..e1a25035 100644 --- a/src/base/base_string.h +++ b/src/base/base_string.h @@ -112,8 +112,8 @@ String StringFromhandle(Arena *arena, u64 v0, u64 v1); String StringFromUid(Arena *arena, Uid uid); //- Modification -String CopyString(Arena *arena, String src); -String CopyStringToBuffer(String dst, String src); +String PushString(Arena *arena, String src); +String PushStringToBuffer(String dst, String src); String RepeatString(Arena *arena, String src, u64 count); String CatString(Arena *arena, String str1, String str2); StringArray SplitString(Arena *arena, String str, String delim); @@ -161,7 +161,7 @@ String16 String16FromString(Arena *arena, String str8); String32 String32FromString(Arena *arena, String str8); //////////////////////////////// -//~ Legacy C string operations +//~ Legacy null-terminated C string operations //- Narrow strings u64 CstrLenNoLimit(char *cstr); diff --git a/src/base/win32/base_win32.c b/src/base/win32/base_win32.c new file mode 100644 index 00000000..20a3a3b3 --- /dev/null +++ b/src/base/win32/base_win32.c @@ -0,0 +1,3 @@ +#include "base_win32.h" + +#include "base_win32_job.c" diff --git a/src/base/win32/base_win32.h b/src/base/win32/base_win32.h new file mode 100644 index 00000000..abc11eb3 --- /dev/null +++ b/src/base/win32/base_win32.h @@ -0,0 +1,8 @@ +#ifndef BASE_WIN32_H +#define BASE_WIN32_H + +#include "../base.h" + +#include "base_win32_job.h" + +#endif diff --git a/src/base/win32/base_win32_job.c b/src/base/win32/base_win32_job.c new file mode 100644 index 00000000..2f265e59 --- /dev/null +++ b/src/base/win32/base_win32_job.c @@ -0,0 +1,1476 @@ +W32_SharedCtx W32_shared_ctx = ZI; + +//////////////////////////////// +//~ Win32 libs + +#pragma comment(lib, "kernel32") +#pragma comment(lib, "user32") +#pragma comment(lib, "shell32") +#pragma comment(lib, "ole32") +#pragma comment(lib, "winmm") +#pragma comment(lib, "dwmapi") +#pragma comment(lib, "bcrypt") +#pragma comment(lib, "synchronization") +#pragma comment(lib, "avrt") +#pragma comment(lib, "ws2_32.lib") + +//////////////////////////////// +//~ Startup + +void StartupJobs(void) +{ + W32_SharedCtx *g = &W32_shared_ctx; + + /* Init fibers */ + g->num_fibers = 1; /* Fiber at index 0 always nil */ + g->fiber_names_arena = AllocArena(Gibi(64)); + + /* Init wait lists */ + g->wait_lists_arena = AllocArena(Gibi(64)); + + /* Init job pools */ + for (PoolKind pool_kind = 0; pool_kind < (i32)countof(g->job_pools); ++pool_kind) + { + W32_JobPool *pool = &g->job_pools[pool_kind]; + + /* Init queues */ + for (PriorityKind priority = 0; priority < (i32)countof(pool->job_queues); ++priority) + { + W32_JobQueue *queue = &pool->job_queues[priority]; + queue->arena = AllocArena(Gibi(64)); + } + } + + /* Init threads pool */ + g->threads_arena = AllocArena(Gibi(64)); + + /* Start job scheduler */ + Atomic64FetchSet(&g->current_scheduler_cycle_period_ns.v, W32_DefaultSchedulerPeriodNs); + W32_Thread *scheduler_thread = W32_AllocThread(W32_JobSchedulerEntryFunc, 0, Lit("Scheduler thread"), PROF_THREAD_GROUP_SCHEDULER); + + //- Start job workers + /* TODO: Heuristic worker counts & affinities */ + { + __profn("Start job workers"); + for (PoolKind pool_kind = 0; pool_kind < (i32)countof(g->job_pools); ++pool_kind) + { + W32_JobPool *pool = &g->job_pools[pool_kind]; + String name_fmt = ZI; + i32 prof_group = PROF_THREAD_GROUP_FIBERS - Mebi(pool_kind); + switch (pool_kind) + { + default: Assert(0); break; + + case PoolKind_Sim: + { + name_fmt = Lit("Sim worker #%F"); + pool->num_worker_threads = 4; + pool->thread_affinity_mask = 0x000000000000000Full; + pool->thread_priority = THREAD_PRIORITY_TIME_CRITICAL; + } break; + + case PoolKind_User: + { + name_fmt = Lit("User worker #%F"); + pool->num_worker_threads = 4; + pool->thread_affinity_mask = 0x00000000000000F0ull; + pool->thread_priority = THREAD_PRIORITY_TIME_CRITICAL; + } break; + + case PoolKind_Audio: + { + name_fmt = Lit("Audio worker #%F"); + pool->num_worker_threads = 2; + pool->thread_affinity_mask = 0x0000000000000300ull; + pool->thread_priority = THREAD_PRIORITY_TIME_CRITICAL; + pool->thread_is_audio = 1; + } break; + + case PoolKind_Background: + { + name_fmt = Lit("Background worker #%F"); + pool->num_worker_threads = 2; + pool->thread_affinity_mask = 0x0000000000000C00ull; + } break; + + case PoolKind_Floating: + { + name_fmt = Lit("Floating worker #%F"); + pool->num_worker_threads = 8; + pool->thread_affinity_mask = 0x0000000000000FFFull; + } break; + } + pool->worker_threads_arena = AllocArena(Gibi(64)); + pool->worker_threads = PushStructs(pool->worker_threads_arena, W32_Thread *, pool->num_worker_threads); + pool->worker_contexts = PushStructs(pool->worker_threads_arena, W32_WorkerCtx, pool->num_worker_threads); + for (i32 i = 0; i < pool->num_worker_threads; ++i) + { + W32_WorkerCtx *ctx = &pool->worker_contexts[i]; + ctx->pool_kind = pool_kind; + ctx->id = i; + String name = StringFormat(pool->worker_threads_arena, name_fmt, FmtSint(i)); + pool->worker_threads[i] = W32_AllocThread(W32_JobWorkerEntryFunc, ctx, name, prof_group + i); + } + } + } + + P_OnExit(ShutdownJobs); +} + +//////////////////////////////// +//~ Shutdown + +void ShutdownJobs(void) +{ + /* Signal shutdown */ + if (!Atomic32Fetch(&g->panicking)) + { + Atomic32FetchSet(&g->shutdown, 1); + for (PoolKind pool_kind = 0; pool_kind < (i32)countof(g->job_pools); ++pool_kind) + { + W32_JobPool *pool = &g->job_pools[pool_kind]; + LockTicketMutex(&pool->workers_wake_tm); + { + Atomic32FetchSet(&pool->workers_shutdown.v, 1); + Atomic64FetchSet(&pool->num_jobs_in_queue.v, -100000); + WakeByAddressAll(&pool->num_jobs_in_queue); + } + UnlockTicketMutex(&pool->workers_wake_tm); + } + } + + /* Wait on worker threads */ + if (!Atomic32Fetch(&g->panicking)) + { + for (PoolKind pool_kind = 0; pool_kind < (i32)countof(g->job_pools); ++pool_kind) + { + W32_JobPool *pool = &g->job_pools[pool_kind]; + for (i32 i = 0; i < pool->num_worker_threads; ++i) + { + W32_Thread *worker_thread = pool->worker_threads[i]; + W32_WaitReleaseThread(worker_thread); + } + } + } + + /* Wait on scheduler thread */ + if (!Atomic32Fetch(&g->panicking)) + { + W32_WaitReleaseThread(scheduler_thread); + } + + /* Find any dangling threads that haven't exited gracefully by now */ + if (!Atomic32Fetch(&g->panicking)) + { + P_Lock lock = P_LockS(&g->threads_mutex); + if (g->first_thread) + { + TempArena scratch = BeginScratchNoConflict(); + u64 num_dangling_threads = 0; + String threads_msg = ZI; + threads_msg.text = PushDry(scratch.arena, u8); + for (W32_Thread *t = g->first_thread; t; t = t->next) + { + String name = StringFromCstr(t->thread_name_cstr, countof(t->thread_name_cstr)); + threads_msg.len += StringFormat(scratch.arena, Lit(" \"%F\"\n"), FmtString(name)).len; + ++num_dangling_threads; + } + threads_msg = StringFormat(scratch.arena, Lit("%F dangling thread(s):\n%F"), FmtUint(num_dangling_threads), FmtString(threads_msg)); + P_Panic(threads_msg); + EndScratch(scratch); + } + P_Unlock(&lock); + } +} + +//////////////////////////////// +//~ Win32 ticket mutex + +void LockTicketMutex(W32_TicketMutex *tm) +{ + i64 ticket = Atomic64FetchAdd(&tm->ticket.v, 1); + while (Atomic64Fetch(&tm->serving.v) != ticket) + { + _mm_pause(); + } +} + +void UnlockTicketMutex(W32_TicketMutex *tm) +{ + Atomic64FetchAdd(&tm->serving.v, 1); +} + +//////////////////////////////// +//~ Win32 thread + +DWORD WINAPI W32_Win32ThreadProc(LPVOID vt) +{ + W32_AllocFiber(0); + + W32_Thread *t = (W32_Thread *)vt; + __profthread(t->thread_name_cstr, t->profiler_group); + + /* Initialize COM */ + CoInitializeEx(0, COINIT_MULTITHREADED); + + /* Set thread name */ + if (t->thread_name_wstr[0] != 0) + { + SetThreadDescription(GetCurrentThread(), t->thread_name_wstr); + } + + P_LogInfoF("New thread \"%F\" created with ID %F", FmtString(StringFromCstrNoLimit(t->thread_name_cstr)), FmtUint(P_GetThreadId())); + + /* Enter thread entry point */ + t->entry_point(t->thread_data); + + /* Uninitialize COM */ + CoUninitialize(); + + return 0; +} + +W32_Thread *W32_AllocThread(W32_ThreadFunc *entry_point, void *thread_data, String thread_name, i32 profiler_group) +{ + __prof; + TempArena scratch = BeginScratchNoConflict(); + W32_SharedCtx *g = &W32_shared_ctx; + Assert(entry_point != 0); + P_LogInfoF("Creating thread \"%F\"", FmtString(thread_name)); + + /* Allocate thread object */ + W32_Thread *t = 0; + { + P_Lock lock = P_LockE(&g->threads_mutex); + if (g->first_free_thread) + { + t = g->first_free_thread; + g->first_free_thread = t->next; + } + else + { + t = PushStructNoZero(g->threads_arena, W32_Thread); + } + ZeroStruct(t); + if (g->last_thread) + { + g->last_thread->next = t; + t->prev = g->last_thread; + } + else + { + g->first_thread = t; + } + g->last_thread = t; + P_Unlock(&lock); + } + + t->entry_point = entry_point; + t->thread_data = thread_data; + t->profiler_group = profiler_group; + + /* Copy thread name to params */ + { + u64 CstrLen = MinU64((countof(t->thread_name_cstr) - 1), thread_name.len); + CopyBytes(t->thread_name_cstr, thread_name.text, CstrLen * sizeof(*t->thread_name_cstr)); + t->thread_name_cstr[CstrLen] = 0; + } + { + String16 thread_name16 = String16FromString(scratch.arena, thread_name); + u64 WstrLen = MinU64((countof(t->thread_name_wstr) - 1), thread_name16.len); + CopyBytes(t->thread_name_wstr, thread_name16.text, WstrLen * sizeof(*t->thread_name_wstr)); + t->thread_name_wstr[WstrLen] = 0; + } + + t->handle = CreateThread( + 0, + W32_ThreadStackSize, + W32_Win32ThreadProc, + t, + 0, + 0 + ); + + if (!t->handle) + { + P_Panic(Lit("Failed to create thread")); + } + + EndScratch(scratch); + return (W32_Thread *)t; +} + +/* Returns 0 if the thread could not release in specified timeout (e.g. because it is still running) */ +b32 W32_TryReleaseThread(W32_Thread *thread, f32 timeout_seconds) +{ + __prof; + W32_SharedCtx *g = &W32_shared_ctx; + b32 success = 0; + W32_Thread *t = (W32_Thread *)thread; + HANDLE handle = t->handle; + if (handle) + { + /* Wait for thread to stop */ + DWORD timeout_ms = (timeout_seconds > 10000000) ? INFINITE : RoundF32ToI32(timeout_seconds * 1000); + DWORD wait_result = WaitForSingleObject(handle, timeout_ms); + if (wait_result == WAIT_OBJECT_0) + { + /* Release thread */ + success = 1; + CloseHandle(handle); + { + P_Lock lock = P_LockE(&g->threads_mutex); + { + W32_Thread *prev = t->prev; + W32_Thread *next = t->next; + if (prev) + { + prev->next = next; + } + else + { + g->first_thread = next; + } + if (next) + { + next->prev = prev; + } + else + { + g->last_thread = prev; + } + t->next = g->first_free_thread; + g->first_free_thread = t; + } + P_Unlock(&lock); + } + } + } + return success; +} + +void W32_WaitReleaseThread(W32_Thread *thread) +{ + __prof; + b32 success = W32_TryReleaseThread(thread, F32Infinity); + Assert(success); + LAX success; +} + +//////////////////////////////// +//~ Win32 wait list + +/* REQUIRED: Caller must have acquired `wake_lock` for each fiber in array */ +void W32_WakeLockedFibers(i32 num_fibers, W32_Fiber **fibers) +{ + W32_SharedCtx *g = &W32_shared_ctx; + + /* Update wait lists */ + for (i32 i = 0; i < num_fibers; ++i) + { + W32_Fiber *fiber = fibers[i]; + u64 wait_addr = fiber->wait_addr; + u64 wait_time = fiber->wait_time; + + /* Lock & search wait bins */ + /* TODO: Cache these in parameters since caller has one of them already calculated */ + W32_WaitBin *wait_addr_bin = 0; + W32_WaitBin *wait_time_bin = 0; + W32_WaitList *wait_addr_list = 0; + W32_WaitList *wait_time_list = 0; + if (wait_addr != 0) + { + wait_addr_bin = &g->wait_addr_bins[wait_addr % W32_NumWaitAddrBins]; + LockTicketMutex(&wait_addr_bin->lock); + for (W32_WaitList *tmp = wait_addr_bin->first_wait_list; tmp && !wait_addr_list; tmp = tmp->next_in_bin) + { + if (tmp->value == (u64)wait_addr) + { + wait_addr_list = tmp; + } + } + } + if (wait_time != 0) + { + wait_time_bin = &g->wait_time_bins[wait_time % W32_NumWaitTimeBins]; + LockTicketMutex(&wait_time_bin->lock); + for (W32_WaitList *tmp = wait_time_bin->first_wait_list; tmp && !wait_time_list; tmp = tmp->next_in_bin) + { + if (tmp->value == (u64)wait_time) + { + wait_time_list = tmp; + } + } + } + { + /* Remove from addr list */ + if (wait_addr_list) + { + if (--wait_addr_list->num_waiters == 0) + { + /* Free addr list */ + W32_WaitList *prev = wait_addr_list->prev_in_bin; + W32_WaitList *next = wait_addr_list->next_in_bin; + if (prev) + { + prev->next_in_bin = next; + } + else + { + wait_addr_bin->first_wait_list = next; + } + if (next) + { + next->prev_in_bin = prev; + } + else + { + wait_addr_bin->last_wait_list = prev; + } + wait_addr_list->next_in_bin = wait_addr_bin->first_free_wait_list; + wait_addr_bin->first_free_wait_list = wait_addr_list; + } + else + { + i16 prev_id = fiber->prev_addr_waiter; + i16 next_id = fiber->next_addr_waiter; + if (prev_id) + { + W32_FiberFromId(prev_id)->next_addr_waiter = next_id; + } + else + { + wait_addr_list->first_waiter = next_id; + } + if (next_id) + { + W32_FiberFromId(next_id)->prev_addr_waiter = prev_id; + } + else + { + wait_addr_list->last_waiter = prev_id; + } + } + fiber->wait_addr = 0; + fiber->prev_addr_waiter = 0; + fiber->next_addr_waiter = 0; + } + /* Remove from time list */ + if (wait_time_list) + { + if (--wait_time_list->num_waiters == 0) + { + /* Free time list */ + W32_WaitList *prev = wait_time_list->prev_in_bin; + W32_WaitList *next = wait_time_list->next_in_bin; + if (prev) + { + prev->next_in_bin = next; + } + else + { + wait_time_bin->first_wait_list = next; + } + if (next) + { + next->prev_in_bin = prev; + } + else + { + wait_time_bin->last_wait_list = prev; + } + wait_time_list->next_in_bin = wait_time_bin->first_free_wait_list; + wait_time_bin->first_free_wait_list = wait_time_list; + } + else + { + i16 prev_id = fiber->prev_time_waiter; + i16 next_id = fiber->next_time_waiter; + if (prev_id) + { + W32_FiberFromId(prev_id)->next_time_waiter = next_id; + } + else + { + wait_time_list->first_waiter = next_id; + } + if (next_id) + { + W32_FiberFromId(next_id)->prev_time_waiter = prev_id; + } + else + { + wait_time_list->last_waiter = prev_id; + } + } + fiber->wait_time = 0; + fiber->prev_time_waiter = 0; + fiber->next_time_waiter = 0; + } + /* Unlock fiber */ + Atomic32FetchSet(&fiber->wake_lock, 0); + } + /* Unlock wait bins */ + if (wait_time_bin != 0) UnlockTicketMutex(&wait_time_bin->lock); + if (wait_addr_bin != 0) UnlockTicketMutex(&wait_addr_bin->lock); + } + + /* Resume jobs */ + /* TODO: Batch submit waiters based on queue kind rather than one at a time */ + i32 job_counts_per_pool[PoolKind_Count] = ZI; + for (i32 i = 0; i < num_fibers; ++i) + { + W32_Fiber *fiber = fibers[i]; + PoolKind pool_kind = fiber->job_pool; + ++job_counts_per_pool[pool_kind]; + W32_JobPool *pool = &g->job_pools[pool_kind]; + W32_JobQueue *queue = &pool->job_queues[fiber->job_priority]; + LockTicketMutex(&queue->lock); + { + W32_JobInfo *info = 0; + if (queue->first_free) + { + info = queue->first_free; + queue->first_free = info->next; + } + else + { + info = PushStructNoZero(queue->arena, W32_JobInfo); + } + ZeroStruct(info); + info->count = 1; + info->num_dispatched = fiber->job_id; + info->func = fiber->job_func; + info->sig = fiber->job_sig; + info->counter = fiber->job_counter; + info->fiber_id = fiber->id; + if (queue->first) + { + info->next = queue->first; + } + else + { + queue->last = info; + } + queue->first = info; + } + UnlockTicketMutex(&queue->lock); + } + + /* Wake workers */ + if (num_fibers > 0) + { + for (PoolKind pool_kind = 0; pool_kind < (i32)countof(job_counts_per_pool); ++pool_kind) + { + i32 job_count = job_counts_per_pool[pool_kind]; + if (job_count > 0) + { + W32_JobPool *pool = &g->job_pools[pool_kind]; + LockTicketMutex(&pool->workers_wake_tm); + { + Atomic64FetchAdd(&pool->num_jobs_in_queue.v, job_count); + if (job_count >= W32_WakeAllThreshold) + { + WakeByAddressAll(&pool->num_jobs_in_queue); + } + else + { + for (i32 i = 0; i < job_count; ++i) + { + WakeByAddressSingle(&pool->num_jobs_in_queue); + } + } + } + UnlockTicketMutex(&pool->workers_wake_tm); + } + } + } +} + +void W32_WakeByAddress(void *addr, i32 count) +{ + TempArena scratch = BeginScratchNoConflict(); + W32_SharedCtx *g = &W32_shared_ctx; + + u64 wait_addr_bin_index = (u64)addr % W32_NumWaitAddrBins; + W32_WaitBin *wait_addr_bin = &g->wait_addr_bins[wait_addr_bin_index]; + W32_WaitList *wait_addr_list = 0; + + /* Get list of waiting fibers */ + i32 num_fibers = 0; + W32_Fiber **fibers = 0; + { + LockTicketMutex(&wait_addr_bin->lock); + { + /* Search for wait addr list */ + for (W32_WaitList *tmp = wait_addr_bin->first_wait_list; tmp && !wait_addr_list; tmp = tmp->next_in_bin) + { + if (tmp->value == (u64)addr) + { + wait_addr_list = tmp; + } + } + + /* Lock fibers & build array */ + if (wait_addr_list) + { + fibers = PushStructsNoZero(scratch.arena, W32_Fiber *, wait_addr_list->num_waiters); + for (W32_Fiber *fiber = W32_FiberFromId(wait_addr_list->first_waiter); fiber && num_fibers < count; fiber = W32_FiberFromId(fiber->next_addr_waiter)) + { + if (Atomic32FetchTestSet(&fiber->wake_lock, 0, 1) == 0) + { + fibers[num_fibers] = fiber; + ++num_fibers; + } + } + } + } + UnlockTicketMutex(&wait_addr_bin->lock); + } + + if (num_fibers > 0) + { + W32_WakeLockedFibers(num_fibers, fibers); + } + + /* Wake win32 blocking thread waiters */ + if (count >= W32_WakeAllThreshold) + { + WakeByAddressAll(addr); + } + else + { + for (i32 i = 0; i < count; ++i) + { + WakeByAddressSingle(addr); + } + } + + EndScratch(scratch); +} + +void W32_WakeByTime(u64 time) +{ + TempArena scratch = BeginScratchNoConflict(); + W32_SharedCtx *g = &W32_shared_ctx; + + u64 wait_time_bin_index = (u64)time % W32_NumWaitTimeBins; + W32_WaitBin *wait_time_bin = &g->wait_time_bins[wait_time_bin_index]; + W32_WaitList *wait_time_list = 0; + + /* Build list of waiters to resume */ + i32 num_fibers = 0; + W32_Fiber **fibers = 0; + { + LockTicketMutex(&wait_time_bin->lock); + { + /* Search for wait time list */ + for (W32_WaitList *tmp = wait_time_bin->first_wait_list; tmp && !wait_time_list; tmp = tmp->next_in_bin) + { + if (tmp->value == (u64)time) + { + wait_time_list = tmp; + } + } + + if (wait_time_list) + { + /* Set waiter wake status & build fibers list */ + fibers = PushStructsNoZero(scratch.arena, W32_Fiber *, wait_time_list->num_waiters); + for (W32_Fiber *fiber = W32_FiberFromId(wait_time_list->first_waiter); fiber; fiber = W32_FiberFromId(fiber->next_time_waiter)) + { + if (Atomic32FetchTestSet(&fiber->wake_lock, 0, 1) == 0) + { + fibers[num_fibers] = fiber; + ++num_fibers; + } + + } + } + } + UnlockTicketMutex(&wait_time_bin->lock); + } + + W32_WakeLockedFibers(num_fibers, fibers); + + EndScratch(scratch); +} + +//////////////////////////////// +//~ Win32 fiber + +//- Allocate fiber +/* If `pool` is 0, then the currently running thread will be converted into a fiber */ +W32_Fiber *W32_AllocFiber(W32_JobPool *pool) +{ + W32_SharedCtx *g = &W32_shared_ctx; + i16 fiber_id = 0; + W32_Fiber *fiber = 0; + char *new_name_cstr = 0; + { + if (pool != 0) + { + LockTicketMutex(&pool->free_fibers_lock); + if (pool->first_free_fiber_id) + { + fiber_id = pool->first_free_fiber_id; + fiber = &g->fibers[fiber_id]; + pool->first_free_fiber_id = fiber->parent_id; + } + UnlockTicketMutex(&pool->free_fibers_lock); + } + if (!fiber_id) + { + LockTicketMutex(&g->fibers_lock); + { + { + fiber_id = g->num_fibers++; + if (fiber_id >= MaxFibers) + { + P_Panic(Lit("Max fibers reached")); + } + fiber = &g->fibers[fiber_id]; + new_name_cstr = PushStructs(g->fiber_names_arena, char, W32_FiberNameMaxSize); + } + } + UnlockTicketMutex(&g->fibers_lock); + } + + } + if (new_name_cstr != 0) + { + __profn("Initialize fiber"); + fiber->id = fiber_id; + + /* Id to ASCII */ + i32 id_div = fiber_id; + char id_chars[64] = ZI; + i32 id_chars_len = 0; + do + { + i32 digit = id_div % 10; + id_div /= 10; + id_chars[id_chars_len] = ("0123456789")[digit]; + ++id_chars_len; + } while (id_div > 0); + i32 rev_start = 0; + i32 rev_end = id_chars_len - 1; + while (rev_start < rev_end) + { + char swp = id_chars[rev_start]; + id_chars[rev_start] = id_chars[rev_end]; + id_chars[rev_end] = swp; + ++rev_start; + --rev_end; + } + + /* Concat fiber name */ + i32 name_size = 1; + Assert(sizeof(sizeof(W32_FiberNamePrefixCstr)) <= W32_FiberNameMaxSize); + CopyBytes(new_name_cstr, W32_FiberNamePrefixCstr, sizeof(W32_FiberNamePrefixCstr)); + name_size += sizeof(W32_FiberNamePrefixCstr) - 2; + CopyBytes(new_name_cstr + name_size, id_chars, id_chars_len); + name_size += id_chars_len; + CopyBytes(new_name_cstr + name_size, W32_FiberNameSuffixCstr, sizeof(W32_FiberNameSuffixCstr)); + name_size += sizeof(W32_FiberNameSuffixCstr) - 2; + + fiber->name_cstr = new_name_cstr; + + /* Init win32 fiber */ + if (pool != 0) + { + __profn("CreateFiber"); + fiber->addr = CreateFiber(W32_FiberStackSize, W32_FiberEntryPoint, (void *)(i64)fiber_id); + } + else + { + /* Fiber is not a part of a job pool, convert thread to fiber */ + __profn("ConvertThreadToFiber"); + fiber->addr = ConvertThreadToFiber((void *)(i64)fiber_id); + } + } + fiber->wait_addr = 0; + fiber->wait_time = 0; + fiber->prev_addr_waiter = 0; + fiber->next_addr_waiter = 0; + fiber->prev_time_waiter = 0; + fiber->next_time_waiter = 0; + fiber->job_func = 0; + fiber->job_sig = 0; + fiber->job_id = 0; + fiber->job_pool = 0; + fiber->job_priority = 0; + fiber->job_counter = 0; + fiber->yield_param = 0; + fiber->parent_id = 0; + return fiber; +} + +//- Release fiber +void W32_ReleaseFiber(W32_JobPool *pool, W32_Fiber *fiber) +{ + LockTicketMutex(&pool->free_fibers_lock); + { + i16 fiber_id = fiber->id; + fiber->parent_id = pool->first_free_fiber_id; + pool->first_free_fiber_id = fiber_id; + } + UnlockTicketMutex(&pool->free_fibers_lock); +} + + +//- Fiber id +ForceInline W32_Fiber *W32_FiberFromId(i16 id) +{ + W32_SharedCtx *g = &W32_shared_ctx; + if (id <= 0) + { + return 0; + } + else + { + return &g->fibers[id]; + } +} + +//- Fiber control flow +ForceNoInline void W32_FiberResume(W32_Fiber *fiber) +{ + MemoryBarrier(); + SwitchToFiber(fiber->addr); + MemoryBarrier(); +} + +void W32_YieldFiber(W32_Fiber *fiber, W32_Fiber *parent_fiber) +{ + LAX fiber; + Assert(fiber->id == FiberId()); + Assert(parent_fiber->id == fiber->parent_id); + Assert(parent_fiber->id > 0); + { + __prof_fiber_leave(); + W32_FiberResume(parent_fiber); + __prof_fiber_enter(fiber->name_cstr, PROF_THREAD_GROUP_FIBERS - Mebi(fiber->job_pool) + Kibi(1) + fiber->id); + } +} + +//- Fiber entry +void W32_FiberEntryPoint(void *id_ptr) +{ + i16 id = (i32)(i64)id_ptr; + volatile W32_Fiber *fiber = W32_FiberFromId(id); + __prof_fiber_enter(fiber->name_cstr, PROF_THREAD_GROUP_FIBERS - Mebi(fiber->job_pool) + Kibi(1) + fiber->id); + for (;;) + { + /* Run job */ + { + W32_YieldParam *yield_param = fiber->yield_param; + yield_param->kind = W32_YieldKind_None; + P_JobData data = ZI; + data.id = fiber->job_id; + data.sig = fiber->job_sig; + { + MemoryBarrier(); + fiber->job_func(data); + MemoryBarrier(); + } + } + /* Job completed, yield */ + { + /* Decrement job counter */ + Counter *job_counter = fiber->job_counter; + if (job_counter) + { + P_CounterAdd(job_counter, -1); + } + /* Yield to worker */ + fiber->yield_param->kind = W32_YieldKind_Done; + W32_Fiber *parent_fiber = W32_FiberFromId(fiber->parent_id); + W32_YieldFiber((W32_Fiber *)fiber, parent_fiber); + } + } +} + +//////////////////////////////// +//~ Win32 job worker + +W32_ThreadDef(W32_JobWorkerEntryFunc, worker_ctx_arg) +{ + W32_SharedCtx *g = &W32_shared_ctx; + W32_WorkerCtx *ctx = worker_ctx_arg; + PoolKind pool_kind = ctx->pool_kind; + W32_JobPool *pool = &g->job_pools[pool_kind]; + LAX ctx; + + { + /* TODO: Heuristic pinning */ + /* TODO: Pin non-worker threads to other cores */ + HANDLE thread_handle = GetCurrentThread(); + + if (pool->thread_priority) + { + __profn("Set priority"); + b32 success = SetThreadPriority(thread_handle, pool->thread_priority) != 0; + Assert(success); + LAX success; + } + +#if 0 + if (pool->thread_affinity_mask) + { + __profn("Set affinity"); + b32 success = SetThreadAffinityMask(thread_handle, pool->thread_affinity_mask) != 0; +#if RtcIsEnabled || ProfilingIsEnabled + { + /* Retry until external tools can set correct process affinity */ + i32 delay_ms = 16; + while (!success && delay_ms <= 1024) + { + __profn("Affinity retry"); + Sleep(delay_ms); + success = SetThreadAffinityMask(thread_handle, pool->thread_affinity_mask) != 0; + delay_ms *= 2; + } + } +#endif + Assert(success); + LAX success; + } +#endif + + if (pool->thread_is_audio) + { + /* https://learn.microsoft.com/en-us/windows/win32/procthread/multimedia-class-scheduler-service#registry-settings */ + __profn("Set mm thread characteristics"); + DWORD task = 0; + HANDLE mmc_handle = AvSetMmThreadCharacteristics(L"Pro Audio", &task); + Assert(mmc_handle); + LAX mmc_handle; + } + } + + i32 worker_fiber_id = FiberId(); + + W32_Fiber *job_fiber = 0; + b32 shutdown = 0; + while (!shutdown) + { + //- Pull job from queue + PriorityKind job_priority = 0; + i16 job_fiber_id = 0; + i32 job_id = 0; + P_JobFunc *job_func = 0; + void *job_sig = 0; + Counter *job_counter = 0; + { + //__profnc("Pull job", Rgb32F(0.75, 0.75, 0)); + for (PriorityKind priority = 0; priority < (i32)countof(pool->job_queues) && !job_func; ++priority) + { + W32_JobQueue *queue = &pool->job_queues[priority]; + if (queue) + { + LockTicketMutex(&queue->lock); + { + W32_JobInfo *info = queue->first; + while (info && !job_func) + { + W32_JobInfo *next = info->next; + b32 dequeue = 0; + if (info->fiber_id <= 0) + { + job_id = info->num_dispatched++; + if (job_id < info->count) + { + /* Pick job */ + Atomic64FetchAdd(&pool->num_jobs_in_queue.v, -1); + job_priority = priority; + job_func = info->func; + job_sig = info->sig; + job_counter = info->counter; + if (job_id == (info->count - 1)) + { + /* We're picking up the last dispatch, so dequeue the job */ + dequeue = 1; + } + } + } + else + { + /* This job is to be resumed from a yield */ + Atomic64FetchAdd(&pool->num_jobs_in_queue.v, -1); + job_fiber_id = info->fiber_id; + job_priority = priority; + job_id = info->num_dispatched; + job_func = info->func; + job_sig = info->sig; + job_counter = info->counter; + dequeue = 1; + } + if (dequeue) + { + if (!next) + { + queue->last = 0; + } + queue->first = next; + info->next = queue->first_free; + queue->first_free = info; + } + info = next; + } + } + UnlockTicketMutex(&queue->lock); + } + } + } + + //- Release old fiber if resuming a yielded fiber + if (job_fiber_id > 0) + { + if (job_fiber) + { + W32_ReleaseFiber(pool, job_fiber); + } + job_fiber = W32_FiberFromId(job_fiber_id); + } + + //- Run fiber + if (job_func) + { + if (!job_fiber) + { + job_fiber = W32_AllocFiber(pool); + } + job_fiber_id = job_fiber->id; + { + __profnc("Run fiber", Rgb32F(1, 1, 1)); + __profvalue(job_fiber->id); + W32_YieldParam yield = ZI; + job_fiber->parent_id = worker_fiber_id; + job_fiber->job_func = job_func; + job_fiber->job_sig = job_sig; + job_fiber->job_id = job_id; + job_fiber->job_pool = pool_kind; + job_fiber->job_priority = job_priority; + job_fiber->job_counter = job_counter; + job_fiber->yield_param = &yield; + b32 done = 0; + while (!done) + { + W32_FiberResume(job_fiber); + switch (yield.kind) + { + default: + { + /* Invalid yield kind */ + TempArena scratch = BeginScratchNoConflict(); + P_Panic(StringFormat(scratch.arena, Lit("Invalid fiber yield kind \"%F\""), FmtSint(yield.kind))); + EndScratch(scratch); + } break; + + //- Fiber is waiting + case W32_YieldKind_Wait: + { + __profn("Process fiber wait"); + volatile void *wait_addr = yield.wait.addr; + void *wait_cmp = yield.wait.cmp; + u32 wait_size = yield.wait.size; + i64 wait_timeout_ns = yield.wait.timeout_ns; + i64 wait_time = 0; + if (wait_timeout_ns > 0 && wait_timeout_ns < I64Max) + { + u64 current_scheduler_cycle = Atomic64Fetch(&g->current_scheduler_cycle.v); + i64 current_scheduler_cycle_period_ns = Atomic64Fetch(&g->current_scheduler_cycle_period_ns.v); + wait_time = current_scheduler_cycle + MaxI64((i64)((f64)wait_timeout_ns / (f64)current_scheduler_cycle_period_ns), 1); + } + + u64 wait_addr_bin_index = (u64)wait_addr % W32_NumWaitAddrBins; + u64 wait_time_bin_index = (u64)wait_time % W32_NumWaitTimeBins; + W32_WaitBin *wait_addr_bin = &g->wait_addr_bins[wait_addr_bin_index]; + W32_WaitBin *wait_time_bin = &g->wait_time_bins[wait_time_bin_index]; + + if (wait_addr != 0) LockTicketMutex(&wait_addr_bin->lock); + { + if (wait_time != 0) LockTicketMutex(&wait_time_bin->lock); + { + //- Load and compare value at address now that wait bins are locked + b32 cancel_wait = wait_addr == 0 && wait_time == 0; + if (wait_addr != 0) + { + switch (wait_size) + { + case 1: cancel_wait = (u8)_InterlockedCompareExchange8(wait_addr, 0, 0) != *(u8 *)wait_cmp; break; + case 2: cancel_wait = (u16)_InterlockedCompareExchange16(wait_addr, 0, 0) != *(u16 *)wait_cmp; break; + case 4: cancel_wait = (u32)_InterlockedCompareExchange(wait_addr, 0, 0) != *(u32 *)wait_cmp; break; + case 8: cancel_wait = (u64)_InterlockedCompareExchange64(wait_addr, 0, 0) != *(u64 *)wait_cmp; break; + default: cancel_wait = 1; Assert(0); break; /* Invalid wait size */ + } + } + if (wait_time != 0 && !cancel_wait) + { + cancel_wait = wait_time <= Atomic64Fetch(&g->current_scheduler_cycle.v); + } + if (!cancel_wait) + { + if (wait_addr != 0) + { + //- Search for wait addr list in bin + W32_WaitList *wait_addr_list = 0; + for (W32_WaitList *tmp = wait_addr_bin->first_wait_list; tmp && !wait_addr_list; tmp = tmp->next_in_bin) + { + if (tmp->value == (u64)wait_addr) + { + wait_addr_list = tmp; + } + } + //- Allocate new wait addr list + if (!wait_addr_list) + { + if (wait_addr_bin->first_free_wait_list) + { + wait_addr_list = wait_addr_bin->first_free_wait_list; + wait_addr_bin->first_free_wait_list = wait_addr_list->next_in_bin; + } + else + { + LockTicketMutex(&g->wait_lists_arena_lock); + { + wait_addr_list = PushStructNoZero(g->wait_lists_arena, W32_WaitList); + } + UnlockTicketMutex(&g->wait_lists_arena_lock); + } + ZeroStruct(wait_addr_list); + wait_addr_list->value = (u64)wait_addr; + if (wait_addr_bin->last_wait_list) + { + wait_addr_bin->last_wait_list->next_in_bin = wait_addr_list; + wait_addr_list->prev_in_bin = wait_addr_bin->last_wait_list; + } + else + { + wait_addr_bin->first_wait_list = wait_addr_list; + } + wait_addr_bin->last_wait_list = wait_addr_list; + } + //- Insert fiber into wait addr list + job_fiber->wait_addr = (u64)wait_addr; + if (wait_addr_list->last_waiter) + { + W32_FiberFromId(wait_addr_list->last_waiter)->next_addr_waiter = job_fiber_id; + job_fiber->prev_addr_waiter = wait_addr_list->last_waiter; + } + else + { + wait_addr_list->first_waiter = job_fiber_id; + } + wait_addr_list->last_waiter = job_fiber_id; + ++wait_addr_list->num_waiters; + } + if (wait_time != 0) + { + //- Search for wait time list in bin + W32_WaitList *wait_time_list = 0; + for (W32_WaitList *tmp = wait_time_bin->first_wait_list; tmp && !wait_time_list; tmp = tmp->next_in_bin) + { + if (tmp->value == (u64)wait_time) + { + wait_time_list = tmp; + } + } + //- Allocate new wait time list + if (!wait_time_list) + { + if (wait_time_bin->first_free_wait_list) + { + wait_time_list = wait_time_bin->first_free_wait_list; + wait_time_bin->first_free_wait_list = wait_time_list->next_in_bin; + } + else + { + LockTicketMutex(&g->wait_lists_arena_lock); + { + wait_time_list = PushStructNoZero(g->wait_lists_arena, W32_WaitList); + } + UnlockTicketMutex(&g->wait_lists_arena_lock); + } + ZeroStruct(wait_time_list); + wait_time_list->value = wait_time; + if (wait_time_bin->last_wait_list) + { + wait_time_bin->last_wait_list->next_in_bin = wait_time_list; + wait_time_list->prev_in_bin = wait_time_bin->last_wait_list; + } + else + { + wait_time_bin->first_wait_list = wait_time_list; + } + wait_time_bin->last_wait_list = wait_time_list; + } + //- Insert fiber into wait time list + job_fiber->wait_time = wait_time; + if (wait_time_list->last_waiter) + { + W32_FiberFromId(wait_time_list->last_waiter)->next_time_waiter = job_fiber_id; + job_fiber->prev_time_waiter = wait_time_list->last_waiter; + } + else + { + wait_time_list->first_waiter = job_fiber_id; + } + wait_time_list->last_waiter = job_fiber_id; + ++wait_time_list->num_waiters; + } + + //- PopStruct worker's job fiber + job_fiber = 0; + done = 1; + } + } + if (wait_time != 0) UnlockTicketMutex(&wait_time_bin->lock); + } + if (wait_addr != 0) UnlockTicketMutex(&wait_addr_bin->lock); + } break; + + //- Fiber is finished + case W32_YieldKind_Done: + { + done = 1; + } break; + } + } + } + } + + //- Wait for job + i64 num_jobs_in_queue = Atomic64Fetch(&pool->num_jobs_in_queue.v); + shutdown = Atomic32Fetch(&pool->workers_shutdown.v); + if (num_jobs_in_queue <= 0 && !shutdown) + { + //__profnc("Wait for job", Rgb32F(0.75, 0.75, 0)); + LockTicketMutex(&pool->workers_wake_tm); + { + num_jobs_in_queue = Atomic64Fetch(&pool->num_jobs_in_queue.v); + shutdown = Atomic32Fetch(&pool->workers_shutdown.v); + while (num_jobs_in_queue <= 0 && !shutdown) + { + { + UnlockTicketMutex(&pool->workers_wake_tm); + WaitOnAddress(&pool->num_jobs_in_queue, &num_jobs_in_queue, sizeof(num_jobs_in_queue), INFINITE); + LockTicketMutex(&pool->workers_wake_tm); + } + shutdown = Atomic32Fetch(&pool->workers_shutdown.v); + num_jobs_in_queue = Atomic64Fetch(&pool->num_jobs_in_queue.v); + } + } + UnlockTicketMutex(&pool->workers_wake_tm); + } + } + + //- Worker shutdown + if (job_fiber) + { + W32_ReleaseFiber(pool, job_fiber); + } +} + +//////////////////////////////// +//~ Win32 job scheduler + +W32_ThreadDef(W32_JobSchedulerEntryFunc, UNUSED arg) +{ + struct W32_SharedCtx *g = &W32_shared_ctx; + + { + i32 priority = THREAD_PRIORITY_TIME_CRITICAL; + b32 success = SetThreadPriority(GetCurrentThread(), priority); + LAX success; + Assert(success); + } + + /* Create high resolution timer */ + HANDLE timer = CreateWaitableTimerExW(0, 0, CREATE_WAITABLE_TIMER_HIGH_RESOLUTION, TIMER_ALL_ACCESS); + if (!timer) + { + P_Panic(Lit("Failed to create high resolution timer")); + } + + /* Create rolling buffer of scheduler cycles initialized to default value */ + i32 periods_index = 0; + i64 periods[W32_NumRollingSchedulerPeriods] = ZI; + for (i32 i = 0; i < (i32)countof(periods); ++i) + { + periods[i] = W32_DefaultSchedulerPeriodNs; + } + + i64 last_cycle_ns = 0; + while (!Atomic32Fetch(&g->shutdown)) + { + __profn("Job scheduler cycle"); + { + __profn("Job scheduler wait"); + LARGE_INTEGER due = ZI; + due.QuadPart = -1; + //due.QuadPart = -10000; + //due.QuadPart = -32000; + //due.QuadPart = -12000; + //due.QuadPart = -8000; + SetWaitableTimerEx(timer, &due, 0, 0, 0, 0, 0); + WaitForSingleObject(timer, INFINITE); + } + + /* Calculate mean period */ + i64 now_ns = P_TimeNs(); + i64 period_ns = last_cycle_ns == 0 ? W32_DefaultSchedulerPeriodNs : now_ns - last_cycle_ns; + last_cycle_ns = now_ns; + + /* Calculate mean period */ + { + periods[periods_index++] = period_ns; + if (periods_index == countof(periods)) + { + periods_index = 0; + } + f64 periods_sum_ns = 0; + for (i32 i = 0; i < (i32)countof(periods); ++i) + { + periods_sum_ns += (f64)periods[i]; + } + f64 mean_ns = periods_sum_ns / (f64)countof(periods); + Atomic64FetchSet(&g->current_scheduler_cycle_period_ns.v, RoundF64ToI64(mean_ns)); + } + + { + __profn("Job scheduler run"); + i64 current_cycle = Atomic64FetchAdd(&g->current_scheduler_cycle.v, 1) + 1; + W32_WakeByTime((u64)current_cycle); + } + } +} + +//////////////////////////////// +//~ Wait / wake + +void P_Wait(volatile void *addr, void *cmp, u32 size, i64 timeout_ns) +{ + W32_Fiber *fiber = W32_FiberFromId(FiberId()); + i16 parent_id = fiber->parent_id; + if (parent_id != 0) + { + *fiber->yield_param = (W32_YieldParam) { + .kind = W32_YieldKind_Wait, + .wait = { + .addr = addr, + .cmp = cmp, + .size = size, + .timeout_ns = timeout_ns + } + }; + W32_YieldFiber(fiber, W32_FiberFromId(parent_id)); + } + else + { + i32 timeout_ms = 0; + if (timeout_ns > 10000000000000000ll) + { + timeout_ms = INFINITE; + } + else if (timeout_ns != 0) + { + timeout_ms = timeout_ns / 1000000; + timeout_ms += (timeout_ms == 0) * SignF32(timeout_ns); + } + if (addr == 0) + { + Sleep(timeout_ms); + } + else + { + WaitOnAddress(addr, cmp, size, timeout_ms); + } + } +} + +void P_Wake(void *addr, i32 count) +{ + W32_WakeByAddress(addr, count); +} + +//////////////////////////////// +//~ Fiber + +i16 FiberId(void) +{ + return (i16)(i64)GetFiberData(); +} + +//////////////////////////////// +//~ Job + +void P_Run(i32 count, P_JobFunc *func, void *sig, PoolKind pool_kind, PriorityKind priority, Counter *counter) +{ + __prof; + struct W32_SharedCtx *g = &W32_shared_ctx; + if (count > 0) + { + if (counter) + { + P_CounterAdd(counter, count); + } + W32_Fiber *fiber = W32_FiberFromId(FiberId()); + priority = ClampI32(priority, fiber->job_priority, PriorityKind_Count - 1); /* A job cannot create a job with a higher priority than itself */ + if (pool_kind == PoolKind_Inherit) + { + pool_kind = fiber->job_pool; + } + W32_JobPool *pool = &g->job_pools[pool_kind]; + W32_JobQueue *queue = &pool->job_queues[priority]; + LockTicketMutex(&queue->lock); + { + W32_JobInfo *info = 0; + if (queue->first_free) + { + info = queue->first_free; + queue->first_free = info->next; + } + else + { + info = PushStructNoZero(queue->arena, W32_JobInfo); + } + ZeroStruct(info); + info->count = count; + info->func = func; + info->sig = sig; + info->counter = counter; + if (queue->last) + { + queue->last->next = info; + } + else + { + queue->first = info; + } + queue->last = info; + } + UnlockTicketMutex(&queue->lock); + + /* Wake workers */ + { + LockTicketMutex(&pool->workers_wake_tm); + { + Atomic64FetchAdd(&pool->num_jobs_in_queue.v, count); + if (count >= W32_WakeAllThreshold) + { + WakeByAddressAll(&pool->num_jobs_in_queue); + } + else + { + for (i32 i = 0; i < count; ++i) + { + WakeByAddressSingle(&pool->num_jobs_in_queue); + } + } + } + UnlockTicketMutex(&pool->workers_wake_tm); + } + } +} diff --git a/src/base/win32/base_win32_job.h b/src/base/win32/base_win32_job.h new file mode 100644 index 00000000..8f370588 --- /dev/null +++ b/src/base/win32/base_win32_job.h @@ -0,0 +1,293 @@ +//////////////////////////////// +//~ Win32 headers + +#pragma warning(push, 0) +# define UNICODE +# define WIN32_LEAN_AND_MEAN +# include +#if 0 +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +#endif +#pragma warning(pop) + +//////////////////////////////// +//~ Thread types + +#define W32_ThreadStackSize Kibi(64) +#define W32_ThreadDef(name, arg_name) void name(void *arg_name) +typedef W32_ThreadDef(W32_ThreadFunc, data); + +Struct(W32_Thread) +{ + W32_ThreadFunc *entry_point; + void *thread_data; + char thread_name_cstr[256]; + wchar_t thread_name_wstr[256]; + i32 profiler_group; + + W32_Thread *next; + W32_Thread *prev; + + HANDLE handle; +}; + +//////////////////////////////// +//~ Wait list types + +AlignedStruct(W32_WaitList, 64) +{ + u64 value; + i16 first_waiter; + i16 last_waiter; + i32 num_waiters; + W32_WaitList *next_in_bin; + W32_WaitList *prev_in_bin; +}; +StaticAssert(alignof(W32_WaitList) == 64); /* Avoid false sharing */ + +AlignedStruct(W32_WaitBin, 64) +{ + W32_WaitList *first_wait_list; + W32_WaitList *last_wait_list; + W32_WaitList *first_free_wait_list; + TicketMutex lock; +}; +StaticAssert(alignof(W32_WaitBin) == 64); /* Avoid false sharing */ + +//////////////////////////////// +//~ Fiber types + +#define W32_FiberStackSize Mebi(4) +#define W32_FiberNamePrefixCstr "Fiber [" +#define W32_FiberNameSuffixCstr "]" +#define W32_FiberNameMaxSize 64 + +//- Yield param +typedef i32 W32_YieldKind; enum +{ + W32_YieldKind_None, + W32_YieldKind_Done, + W32_YieldKind_Wait, + + W32_YieldKind_Count +}; + +Struct(W32_YieldParam) +{ + W32_YieldKind kind; + union + { + struct + { + volatile void *addr; + void *cmp; + u32 size; + i64 timeout_ns; + } wait; + }; +}; + +//- Fiber +AlignedStruct(W32_Fiber, 64) +{ + /* ---------------------------------------------------- */ + void *addr; /* 08 bytes */ + /* ---------------------------------------------------- */ + char *name_cstr; /* 08 bytes */ + /* ---------------------------------------------------- */ + Atomic32 wake_lock; /* 04 bytes (4 byte alignment) */ + i16 id; /* 02 bytes */ + i16 parent_id; /* 02 bytes */ + /* ---------------------------------------------------- */ + u64 wait_addr; /* 08 bytes */ + /* ---------------------------------------------------- */ + u64 wait_time; /* 08 bytes */ + /* ---------------------------------------------------- */ + i16 next_addr_waiter; /* 02 bytes */ + i16 prev_addr_waiter; /* 02 bytes */ + i16 next_time_waiter; /* 02 bytes */ + i16 prev_time_waiter; /* 02 bytes */ + /* ---------------------------------------------------- */ + u8 _pad1[8]; /* 08 bytes (padding) */ + /* ---------------------------------------------------- */ + u8 _pad2[8]; /* 08 bytes (padding) */ + /* ---------------------------------------------------- */ + /* -------------------- Cache line -------------------- */ + /* ---------------------------------------------------- */ + void *job_func; /* 08 bytes */ + /* ---------------------------------------------------- */ + void *job_sig; /* 08 bytes */ + /* ---------------------------------------------------- */ + i32 job_id; /* 04 bytes */ + i16 job_pool; /* 02 bytes */ + i16 job_priority; /* 02 bytes */ + /* ---------------------------------------------------- */ + struct Counter *job_counter; /* 08 bytes */ + /* ---------------------------------------------------- */ + W32_YieldParam *yield_param; /* 08 bytes */ + /* ---------------------------------------------------- */ + u8 _pad3[24]; /* 24 bytes (padding) */ + +}; +StaticAssert(sizeof(W32_Fiber) == 128); /* Padding validation (increase if necessary) */ +StaticAssert(alignof(W32_Fiber) == 64); /* Verify alignment to avoid false sharing */ +StaticAssert(offsetof(W32_Fiber, wake_lock) % 4 == 0); /* Atomic must be aligned */ + +//////////////////////////////// +//~ Job queue types + +//- Worker ctx +AlignedStruct(W32_WorkerCtx, 64) +{ + PoolKind pool_kind; + i32 id; +}; + +//- Job info +Struct(W32_JobInfo) +{ + i32 num_dispatched; + + i32 count; + void *func; + void *sig; + struct Counter *counter; + + i16 fiber_id; /* If the job is being resumed from a yield */ + + W32_JobInfo *next; +}; + +//- Job queue +AlignedStruct(W32_JobQueue, 64) +{ + TicketMutex lock; + Arena *arena; + + W32_JobInfo *first; + W32_JobInfo *last; + + W32_JobInfo *first_free; +}; + +//- Job pool +AlignedStruct(W32_JobPool, 64) +{ + /* Jobs */ + W32_JobQueue job_queues[PriorityKind_Count]; + + TicketMutex free_fibers_tm; + i16 first_free_fiber_id; + + /* Workers */ + Atomic32Padded workers_shutdown; + Atomic64Padded num_jobs_in_queue; + TicketMutex workers_wake_tm; + + i32 num_worker_threads; + i32 thread_priority; + u64 thread_affinity_mask; + b32 thread_is_audio; + Arena *worker_threads_arena; + W32_Thread **worker_threads; + W32_WorkerCtx *worker_contexts; +}; + +//////////////////////////////// +//~ Shared state + +/* Assume scheduler cycle is 20hz at start to be conservative */ +#define W32_DefaultSchedulerPeriodNs 50000000 +#define W32_NumRollingSchedulerPeriods 1000 + +#define W32_NumWaitAddrBins 16384 +#define W32_NumWaitTimeBins 1024 + +/* Arbitrary threshold for determining when to fall back from a looped WakeByAddressSingle to WakeByAddressAll */ +#define W32_WakeAllThreshold 16 + +Struct(W32_SharedCtx) +{ + Atomic32 shutdown; + + //- Worker thread pool + TicketMutex threads_tm; + Arena *threads_arena; + W32_Thread *first_thread; + W32_Thread *last_thread; + W32_Thread *first_free_thread; + + //- Scheduler + Atomic64Padded current_scheduler_cycle; + Atomic64Padded current_scheduler_cycle_period_ns; + + //- Fibers + TicketMutex fibers_tm; + i16 num_fibers; + Arena *fiber_names_arena; + W32_Fiber fibers[MaxFibers]; + + //- Wait lists + Atomic64Padded waiter_wake_gen; + TicketMutex wait_lists_arena_tm; + Arena *wait_lists_arena; + + //- Wait tables + W32_WaitBin wait_addr_bins[W32_NumWaitAddrBins]; + W32_WaitBin wait_time_bins[W32_NumWaitTimeBins]; + + //- Job pools + W32_JobPool job_pools[PoolKind_Count]; +}; + +extern W32_SharedCtx W32_shared_ctx; + +//////////////////////////////// +//~ Startup + +void StartupJobs(void); + +//////////////////////////////// +//~ Shutdown + +void ShutdownJobs(void); + +//////////////////////////////// +//~ Thread operations + +DWORD WINAPI W32_Win32ThreadProc(LPVOID vt); +W32_Thread *W32_AllocThread(W32_ThreadFunc *entry_point, void *thread_data, String thread_name, i32 profiler_group); +b32 W32_TryReleaseThread(W32_Thread *thread, f32 timeout_seconds); +void W32_WaitReleaseThread(W32_Thread *thread); + +//////////////////////////////// +//~ Wait list operations + +void W32_WakeLockedFibers(i32 num_fibers, W32_Fiber **fibers); +void W32_WakeByAddress(void *addr, i32 count); +void W32_WakeByTime(u64 time); + +//////////////////////////////// +//~ Fiber operations + +W32_Fiber *W32_AllocFiber(W32_JobPool *pool); +void W32_ReleaseFiber(W32_JobPool *pool, W32_Fiber *fiber); +ForceInline W32_Fiber *W32_FiberFromId(i16 id); +ForceNoInline void W32_FiberResume(W32_Fiber *fiber); +void W32_YieldFiber(W32_Fiber *fiber, W32_Fiber *parent_fiber); +void W32_FiberEntryPoint(void *id_ptr); + +//////////////////////////////// +//~ Workers + +W32_ThreadDef(W32_JobWorkerEntryFunc, worker_ctx_arg); +W32_ThreadDef(W32_JobSchedulerEntryFunc, _); diff --git a/src/draw/draw_core.c b/src/draw/draw_core.c index 4a52df88..33029789 100644 --- a/src/draw/draw_core.c +++ b/src/draw/draw_core.c @@ -3,11 +3,10 @@ D_SharedState D_shared_state = ZI; //////////////////////////////// //~ Startup -D_StartupReceipt D_Startup(F_StartupReceipt *font_sr) +D_StartupReceipt D_Startup(void) { __prof; D_SharedState *g = &D_shared_state; - LAX font_sr; u32 pixel_white = 0xFFFFFFFF; g->solid_white_texture = GPU_AllocTexture(GP_TEXTURE_FORMAT_R8G8B8A8_UNORM, 0, VEC2I32(1, 1), &pixel_white); return (D_StartupReceipt) { 0 }; diff --git a/src/draw/draw_core.h b/src/draw/draw_core.h index 010d970d..cd09f8cb 100644 --- a/src/draw/draw_core.h +++ b/src/draw/draw_core.h @@ -112,7 +112,7 @@ extern D_SharedState D_shared_state; //~ Startup Struct(D_StartupReceipt) { i32 _; }; -D_StartupReceipt D_Startup(F_StartupReceipt *font_sr); +D_StartupReceipt D_Startup(void); //////////////////////////////// //~ Material operations diff --git a/src/dxc/dxc_core_win32.cpp b/src/dxc/dxc_core_win32.cpp index cf829fb8..96fafcc4 100644 --- a/src/dxc/dxc_core_win32.cpp +++ b/src/dxc/dxc_core_win32.cpp @@ -70,7 +70,7 @@ DXC_Result DXC_Compile(Arena *arena, String shader_source, i32 num_args, String String blob_str = ZI; blob_str.len = dxc_errors->GetBufferSize(); blob_str.text = (u8 *)dxc_errors->GetBufferPointer(); - result.errors = CopyString(arena, blob_str); + result.errors = PushString(arena, blob_str); } //- Get status @@ -86,7 +86,7 @@ DXC_Result DXC_Compile(Arena *arena, String shader_source, i32 num_args, String String blob_str = ZI; blob_str.len = dxc_shader->GetBufferSize(); blob_str.text = (u8 *)dxc_shader->GetBufferPointer(); - result.dxc = CopyString(arena, blob_str); + result.dxc = PushString(arena, blob_str); } } diff --git a/src/font/font_core.c b/src/font/font_core.c index 9a9d0689..6c647afe 100644 --- a/src/font/font_core.c +++ b/src/font/font_core.c @@ -1,51 +1,7 @@ -F_SharedState F_shared_state = ZI; - -//////////////////////////////// -//~ Startup - -F_StartupReceipt F_Startup(AC_StartupReceipt *asset_cache_sr, TTF_StartupReceipt *ttf_sr) -{ - __prof; - F_SharedState *g = &F_shared_state; - LAX asset_cache_sr; - LAX ttf_sr; - g->params.arena = AllocArena(Gibi(64)); - return (F_StartupReceipt) { 0 }; -} - //////////////////////////////// //~ Load job -F_LoadJobSig *F_AllocJobSig(void) -{ - F_SharedState *g = &F_shared_state; - F_LoadJobSig *p = 0; - { - P_Lock lock = P_LockE(&g->params.mutex); - if (g->params.head_free) - { - p = g->params.head_free; - g->params.head_free = p->next_free; - } - else - { - p = PushStruct(g->params.arena, F_LoadJobSig); - } - P_Unlock(&lock); - } - return p; -} - -void F_ReleaseJobSig(F_LoadJobSig *p) -{ - F_SharedState *g = &F_shared_state; - P_Lock lock = P_LockE(&g->params.mutex); - p->next_free = g->params.head_free; - g->params.head_free = p; - P_Unlock(&lock); -} - -P_JobDef(F_LoadAssetJob, job) +JobDef(F_LoadAssetJob, job) { __prof; TempArena scratch = BeginScratchNoConflict(); @@ -120,7 +76,7 @@ P_JobDef(F_LoadAssetJob, job) FmtString(path))); } - /* CopyStruct glyphs from decode result */ + /* Copy glyphs from decode result */ StaticAssert(sizeof(*font->glyphs) == sizeof(*result.glyphs)); /* Font glyph size must match TTF glyph size for memcpy */ CopyBytes(font->glyphs, result.glyphs, sizeof(*font->glyphs) * result.glyphs_count); @@ -131,8 +87,6 @@ P_JobDef(F_LoadAssetJob, job) font->lookup[codepoint] = result.cache_indices[i]; } - F_ReleaseJobSig(params); - P_LogSuccessF("Loaded font \"%F\" (point size %F) in %F seconds", FmtString(path), FmtFloat((f64)point_size), FmtFloat(SecondsFromNs(P_TimeNs() - start_ns))); AC_MarkReady(asset, font); @@ -174,7 +128,7 @@ AC_Asset *F_LoadAsset(String path, f32 point_size, b32 wait) /* PushStruct task */ AC_MarkLoading(asset); - P_Run(1, F_LoadAssetJob, params, P_Pool_Background, P_Priority_Low, 0); + P_Run(1, F_LoadAssetJob, params, PoolKind_Background, PriorityKind_Low, 0); if (wait) { AC_WaitOnAssetReady(asset); diff --git a/src/font/font_core.h b/src/font/font_core.h index b96e7e8c..7a7e2ce6 100644 --- a/src/font/font_core.h +++ b/src/font/font_core.h @@ -1,6 +1,8 @@ //////////////////////////////// //~ Font types +#define F_LookupTableSize (256) + Struct(F_Glyph) { f32 off_x; @@ -22,50 +24,10 @@ Struct(F_Font) u16 *lookup; }; -//////////////////////////////// -//~ Font job types - -Struct(F_LoadJobSig) -{ - F_LoadJobSig *next_free; - - AC_Asset *asset; - f32 point_size; - u64 path_len; - char path_cstr[1024]; -}; - -Struct(F_LoadJobSigStore) -{ - F_LoadJobSig *head_free; - Arena *arena; - P_Mutex mutex; -}; - -//////////////////////////////// -//~ Shared state - -#define F_LookupTableSize (256) - -Struct(F_SharedState) -{ - F_LoadJobSigStore params; -}; - -extern F_SharedState F_shared_state; - -//////////////////////////////// -//~ Startup - -Struct(F_StartupReceipt) { i32 _; }; -F_StartupReceipt F_Startup(AC_StartupReceipt *asset_cache_sr, TTF_StartupReceipt *ttf_sr); - //////////////////////////////// //~ Font load job -F_LoadJobSig *F_AllocJobSig(void); -void F_ReleaseJobSig(F_LoadJobSig *p); -P_JobDef(F_LoadAssetJob, job); +JobDecl(F_LoadAssetJob, { AC_Asset *asset; f32 point_size; String path; }); //////////////////////////////// //~ Font load operations diff --git a/src/gpu/gpu_dx12.c b/src/gpu/gpu_dx12.c index ab5aba99..d08f21a4 100644 --- a/src/gpu/gpu_dx12.c +++ b/src/gpu/gpu_dx12.c @@ -71,7 +71,7 @@ void GPU_Startup(void) P_OnExit(GPU_D12_Shutdown); /* Start evictor job */ - P_Run(1, GPU_D12_EvictorJob, 0, P_Pool_Background, P_Priority_Low, &g->evictor_job_counter); + P_Run(1, GPU_D12_EvictorJob, 0, PoolKind_Background, PriorityKind_Low, &g->evictor_job_counter); } P_ExitFuncDef(GPU_D12_Shutdown) @@ -319,15 +319,15 @@ void GPU_D12_InitObjects(void) GPU_D12_CommandQueueDesc params[] = { {.type = D3D12_COMMAND_LIST_TYPE_DIRECT, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = Lit("Direct queue") }, {.type = D3D12_COMMAND_LIST_TYPE_COMPUTE, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = Lit("Compute queue") }, - {.type = D3D12_COMMAND_LIST_TYPE_COPY, .priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH, .dbg_name = Lit("Copyqueue") }, + {.type = D3D12_COMMAND_LIST_TYPE_COPY, .priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH, .dbg_name = Lit("Copy queue") }, {.type = D3D12_COMMAND_LIST_TYPE_COPY, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = Lit("Background copy queue") } }; GPU_D12_AllocCommandQueueJobSig sig = ZI; sig.descs_in = params; sig.cqs_out = g->command_queues; { - P_Counter counter = ZI; - P_Run(DX12_NUM_QUEUES, GPU_D12_AllocCommandQueueJob, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter); + Counter counter = ZI; + P_Run(DX12_NUM_QUEUES, GPU_D12_AllocCommandQueueJob, &sig, PoolKind_Inherit, PriorityKind_Inherit, &counter); P_WaitOnCounter(&counter); } #if ProfilingIsEnabled @@ -421,8 +421,8 @@ void GPU_InitPipelines(void) GPU_D12_AllocPipelineJobSig sig = ZI; sig.descs_in = descs; sig.pipelines_out = pipelines; - P_Counter counter = ZI; - P_Run(num_pipelines, GPU_D12_AllocPipelineJob, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter); + Counter counter = ZI; + P_Run(num_pipelines, GPU_D12_AllocPipelineJob, &sig, PoolKind_Inherit, PriorityKind_Inherit, &counter); P_WaitOnCounter(&counter); } for (u32 i = 0; i < num_pipelines; ++i) @@ -501,11 +501,11 @@ void GPU_D12_InitNoise(void) /* Upload texture */ { - P_Counter counter = ZI; + Counter counter = ZI; GPU_D12_UploadJobSig sig = ZI; sig.resource = r; sig.data = data.text; - P_Run(1, GPU_D12_UploadJob, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter); + P_Run(1, GPU_D12_UploadJob, &sig, PoolKind_Inherit, PriorityKind_Inherit, &counter); P_WaitOnCounter(&counter); } } @@ -526,7 +526,7 @@ void GPU_D12_InitNoise(void) #if RESOURCE_RELOADING -P_JobDef(GPU_D12_CompileShaderJob, job) +JobDef(GPU_D12_CompileShaderJob, job) { __prof; GPU_D12_CompileShaderJobSig *sig = job.sig; @@ -577,7 +577,7 @@ P_JobDef(GPU_D12_CompileShaderJob, job) * Pipeline * ========================== */ -P_JobDef(GPU_D12_AllocPipelineJob, job) +JobDef(GPU_D12_AllocPipelineJob, job) { __prof; GPU_D12_SharedState *g = &GPU_D12_shared_state; @@ -1116,8 +1116,8 @@ W_CallbackFuncDef(GPU_D12_WatchPipelineCallback, name) sig.descs[0].target = Lit("cs_6_6"); } { - P_Counter counter = ZI; - P_Run(num_shaders, GPU_D12_CompileShaderJob, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter); + Counter counter = ZI; + P_Run(num_shaders, GPU_D12_CompileShaderJob, &sig, PoolKind_Inherit, PriorityKind_Inherit, &counter); P_WaitOnCounter(&counter); } } @@ -1180,8 +1180,8 @@ W_CallbackFuncDef(GPU_D12_WatchPipelineCallback, name) GPU_D12_AllocPipelineJobSig sig = ZI; sig.descs_in = pipeline_descs; sig.pipelines_out = pipelines; - P_Counter counter = ZI; - P_Run(num_pipelines, GPU_D12_AllocPipelineJob, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter); + Counter counter = ZI; + P_Run(num_pipelines, GPU_D12_AllocPipelineJob, &sig, PoolKind_Inherit, PriorityKind_Inherit, &counter); P_WaitOnCounter(&counter); } { @@ -1515,7 +1515,7 @@ void GPU_D12_InsertBarrier(ID3D12GraphicsCommandList *cl, i32 num_descs, GPU_D12 GPU_D12_CommandListPool *GPU_D12_AllocCommandListPool(GPU_D12_CommandQueue *cq); -P_JobDef(GPU_D12_AllocCommandQueueJob, job) +JobDef(GPU_D12_AllocCommandQueueJob, job) { __prof; GPU_D12_SharedState *g = &GPU_D12_shared_state; @@ -1853,7 +1853,7 @@ GPU_D12_CommandDescriptorHeap *GPU_D12_PushDescriptorHeap(GPU_D12_CommandList *c ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(cdh->heap, &cdh->start_gpu_handle); } - /* CopyCPU heap */ + /* Copy CPU heap */ { P_Lock lock = P_LockS(&dh_cpu->mutex); ID3D12Device_CopyDescriptorsSimple(g->device, dh_cpu->num_descriptors_reserved, cdh->start_cpu_handle, dh_cpu->handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); @@ -2017,7 +2017,7 @@ GPU_D12_CommandBuffer *GPU_D12__PushCommandBuffer(GPU_D12_CommandList *cl, u64 d * Wait job * ========================== */ -P_JobDef(GPU_D12_WaitOnFenceJob, job) +JobDef(GPU_D12_WaitOnFenceJob, job) { __prof; GPU_D12_WaitOnFenceJobSig *sig = job.sig; @@ -2099,11 +2099,11 @@ GPU_Resource *GPU_AllocTexture(GPU_TextureFormat format, u32 flags, Vec2I32 size if (initial_data) { /* TODO: Make wait optional */ - P_Counter counter = ZI; + Counter counter = ZI; GPU_D12_UploadJobSig sig = ZI; sig.resource = r; sig.data = initial_data; - P_Run(1, GPU_D12_UploadJob, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter); + P_Run(1, GPU_D12_UploadJob, &sig, PoolKind_Inherit, PriorityKind_Inherit, &counter); P_WaitOnCounter(&counter); } @@ -2120,7 +2120,7 @@ Vec2I32 GPU_GetTextureSize(GPU_Resource *resource) * Upload * ========================== */ -P_JobDef(GPU_D12_UploadJob, job) +JobDef(GPU_D12_UploadJob, job) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_UploadJobSig *sig = job.sig; @@ -2168,7 +2168,7 @@ P_JobDef(GPU_D12_UploadJob, job) GPU_D12_CommandQueue *cq = g->command_queues[DX12_QUEUE_COPY_BACKGROUND]; GPU_D12_CommandList *cl = GPU_D12_BeginCommandList(cq->cl_pool); { - /* Copyto upload heap */ + /* Copy to upload heap */ { D3D12_RANGE read_range = ZI; void *mapped = 0; @@ -2194,7 +2194,7 @@ P_JobDef(GPU_D12_UploadJob, job) ID3D12Resource_Unmap(upload->resource, 0, 0); } - /* Copyfrom upload heap to texture */ + /* Copy from upload heap to texture */ { __profnc_dx12(cl->cq->prof, cl->cl, "Upload texture", Rgb32F(0.2, 0.5, 0.2)); D3D12_TEXTURE_COPY_LOCATION dst_loc = { @@ -2219,8 +2219,8 @@ P_JobDef(GPU_D12_UploadJob, job) GPU_D12_WaitOnFenceJobSig wait_sig = ZI; wait_sig.fence = cq->submit_fence; wait_sig.target = fence_target; - P_Counter counter = ZI; - P_Run(1, GPU_D12_WaitOnFenceJob, &wait_sig, P_Pool_Floating, P_Priority_Low, &counter); + Counter counter = ZI; + P_Run(1, GPU_D12_WaitOnFenceJob, &wait_sig, PoolKind_Floating, PriorityKind_Low, &counter); P_WaitOnCounter(&counter); } @@ -2254,6 +2254,17 @@ void GPU_D12_SetSig(GPU_D12_CommandList *cl, void *src, u32 size) Assert(size <= 256); /* Only 64 32-bit root constants allowed in signature */ u32 num32bit = size / 4; b32 is_gfx = cl->cur_pipeline->is_gfx; + /* FIXME: Enable this */ +#if 0 + if (is_gfx) + { + ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(cl->cl, 0, num32bit, src, 0); + } + else + { + ID3D12GraphicsCommandList_SetComputeRoot32BitConstants(cl->cl, 0, num32bit, src, 0); + } +#else for (u32 i = 0; i < num32bit; ++i) { u32 val = 0; @@ -2267,6 +2278,8 @@ void GPU_D12_SetSig(GPU_D12_CommandList *cl, void *src, u32 size) ID3D12GraphicsCommandList_SetComputeRoot32BitConstant(cl->cl, 0, val, i); } } +#endif + } struct D3D12_VIEWPORT GPU_D12_ViewportFromRect(Rect r) @@ -3321,7 +3334,7 @@ void GPU_PresentSwapchain(GPU_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolu * Evictor job * ========================== */ -P_JobDef(GPU_D12_EvictorJob, _) +JobDef(GPU_D12_EvictorJob, _) { GPU_D12_SharedState *g = &GPU_D12_shared_state; u64 completed_targets[DX12_NUM_QUEUES] = ZI; @@ -3334,11 +3347,11 @@ P_JobDef(GPU_D12_EvictorJob, _) TempArena scratch = BeginScratchNoConflict(); u64 targets[countof(completed_targets)] = ZI; - /* Copyqueued data */ + /* Copy queued data */ u32 num_fenced_releases = 0; GPU_D12_FencedReleaseData *fenced_releases = 0; { - __profn("Copyqueued releases"); + __profn("Copy queued releases"); P_Lock lock = P_LockE(&g->fenced_releases_mutex); num_fenced_releases = g->fenced_releases_arena->pos / sizeof(GPU_D12_FencedReleaseData); fenced_releases = PushStructsNoZero(scratch.arena, GPU_D12_FencedReleaseData, num_fenced_releases); @@ -3365,8 +3378,8 @@ P_JobDef(GPU_D12_EvictorJob, _) sig.fence = cq->submit_fence; sig.target = targets[i]; { - P_Counter counter = ZI; - P_Run(1, GPU_D12_WaitOnFenceJob, &sig, P_Pool_Floating, P_Priority_Low, &counter); + Counter counter = ZI; + P_Run(1, GPU_D12_WaitOnFenceJob, &sig, PoolKind_Floating, PriorityKind_Low, &counter); P_WaitOnCounter(&counter); } } diff --git a/src/gpu/gpu_dx12.h b/src/gpu/gpu_dx12.h index 089650f0..e5002003 100644 --- a/src/gpu/gpu_dx12.h +++ b/src/gpu/gpu_dx12.h @@ -257,14 +257,6 @@ Struct(GPU_D12_FencedReleaseData) void *ptr; }; -Struct(GPU_D12_AllocCommandQueueJobSig) { GPU_D12_CommandQueueDesc *descs_in; GPU_D12_CommandQueue **cqs_out; }; - -Struct(GPU_D12_AllocPipelineJobSig) { GPU_D12_PipelineDesc *descs_in; GPU_D12_Pipeline **pipelines_out; }; - -Struct(GPU_D12_UploadJobSig) { GPU_D12_Resource *resource; void *data; }; - -Struct(GPU_D12_WaitOnFenceJobSig) { ID3D12Fence *fence; u64 target; }; - Struct(GPU_D12_ShaderDesc) { String src; @@ -281,14 +273,6 @@ Struct(GPU_D12_CompiledShaderResult) b32 success; }; -Struct(GPU_D12_CompileShaderJobSig) -{ - Arena *arena; - GPU_D12_ShaderDesc *descs; - GPU_D12_CompiledShaderResult *results; -}; - - Struct(GPU_D12_RenderSig) { Arena *arena; @@ -360,7 +344,7 @@ Struct(GPU_D12_ResourceBarrierDesc) }; /* ========================== * - * Global state + * Shared state * ========================== */ Struct(GPU_D12_SharedState) @@ -428,7 +412,7 @@ Struct(GPU_D12_SharedState) GPU_D12_CommandQueue *command_queues[DX12_NUM_QUEUES]; /* Evictor job */ - P_Counter evictor_job_counter; + Counter evictor_job_counter; P_Cv evictor_wake_cv; P_Mutex evictor_wake_mutex; i64 evictor_wake_gen; @@ -473,13 +457,13 @@ void GPU_D12_InitNoise(void); * Shader compilation * ========================== */ -P_JobDef(GPU_D12_CompileShaderJob, job); +JobDecl(GPU_D12_CompileShaderJob, { Arena *arena; GPU_D12_ShaderDesc *decs; GPU_D12_CompiledShaderResult results; }); /* ========================== * * Pipeline * ========================== */ -P_JobDef(GPU_D12_AllocPipelineJob, job); +JobDecl(GPU_D12_AllocPipelineJob, { GPU_D12_PipelineDesc *descs_in; GPU_D12_Pipeline **pipelines_out; }); void GPU_D12_ReleasePipelineNow(GPU_D12_Pipeline *pipeline); @@ -538,7 +522,7 @@ void GPU_D12_InsertBarrier(ID3D12GraphicsCommandList *cl, i32 num_descs, GPU_D12 * Command queue * ========================== */ -P_JobDef(GPU_D12_AllocCommandQueueJob, job); +JobDecl(GPU_D12_AllocCommandQueueJob, { GPU_D12_CommandQueueDesc *descs_in; GPU_D12_CommandQueue **cqs_out; }); void GPU_D12_ReleaseCommandQueue(GPU_D12_CommandQueue *cq); @@ -572,13 +556,13 @@ GPU_D12_CommandBuffer *GPU_D12__PushCommandBuffer(GPU_D12_CommandList *cl, u64 d * Wait job * ========================== */ -P_JobDef(GPU_D12_WaitOnFenceJob, job); +JobDecl(GPU_D12_WaitOnFenceJob, { ID3D12Fence *fence; u64 target; }); /* ========================== * * Upload * ========================== */ -P_JobDef(GPU_D12_UploadJob, job); +JobDecl(GPU_D12_UploadJob, { GPU_D12_Resource *resource; void *data; }); /* ========================== * * Run utils @@ -625,9 +609,8 @@ GPU_D12_SwapchainBuffer *GPU_D12_UpdateSwapchain(GPU_D12_Swapchain *swapchain, V void GPU_D12_BlitToSwapchain(GPU_D12_SwapchainBuffer *dst, GPU_D12_Resource *src, Xform src_xf); - /* ========================== * * Evictor job * ========================== */ -P_JobDef(GPU_D12_EvictorJob, _); +JobDecl(GPU_D12_EvictorJob, EmptySig); diff --git a/src/net/net_core.c b/src/net/net_core.c index 227995ae..7fcb4329 100644 --- a/src/net/net_core.c +++ b/src/net/net_core.c @@ -520,7 +520,7 @@ void N_Write(N_Host *host, N_ChannelId channel_id, String msg, N_WriteFlag flags N_Cmd *cmd = N_PushCmd(host); cmd->kind = N_CmdKind_Write; cmd->channel_id = channel_id; - cmd->write_msg = CopyString(host->cmd_arena, msg); + cmd->write_msg = PushString(host->cmd_arena, msg); cmd->write_reliable = flags & N_WriteFlag_Reliable; } @@ -561,7 +561,7 @@ N_EventList N_BeginUpdate(Arena *arena, N_Host *host) { N_RcvPacket *packet = PushStruct(scratch.arena, N_RcvPacket); packet->address = address; - packet->data = CopyString(scratch.arena, data); + packet->data = PushString(scratch.arena, data); if (last_packet) { last_packet->next = packet; diff --git a/src/platform/platform_core.h b/src/platform/platform_core.h index 8574baa3..60efb84c 100644 --- a/src/platform/platform_core.h +++ b/src/platform/platform_core.h @@ -5,47 +5,6 @@ Struct(P_Watch); Struct(P_Window); Struct(P_Sock); -//////////////////////////////// -//~ Job queue types - -/* Work pools contain their own worker threads with their own thread priority/affinity based on the intended context of the pool. */ -typedef i32 P_Pool; enum -{ - P_Pool_Inherit = -1, - - /* The floating pool contains a large number of lower priority threads that have affinity over the entire CPU. - * Other pools should push jobs that only block and do no work here so that they can yield on the blocking job - * rather than blocking themselves. */ - P_Pool_Floating = 0, - - P_Pool_Background = 1, - P_Pool_Audio = 2, - P_Pool_User = 3, - P_Pool_Sim = 4, - - P_Pool_Count -}; - -/* Job execution order within a pool is based on priority. */ -typedef i32 P_Priority; enum -{ - P_Priority_Inherit = -1, - P_Priority_High = 0, - P_Priority_Normal = 1, - P_Priority_Low = 2, - - P_Priority_Count -}; - -Struct(P_JobData) -{ - i32 id; - void *sig; -}; - -#define P_JobDef(job_name, arg_name) void job_name(P_JobData arg_name) -typedef P_JobDef(P_JobFunc, job_data); - //////////////////////////////// //~ Time types @@ -349,25 +308,13 @@ typedef i32 P_MessageBoxKind; enum typedef P_ExitFuncDef(P_ExitFunc); //////////////////////////////// -//~ Wait - - /* Futex-like wait & wake */ -void P_Wait(volatile void *addr, void *cmp, u32 size, i64 timeout_ns); -void P_Wake(void *addr, i32 count); - -//////////////////////////////// -//~ Job helpers - -void P_Run(i32 count, P_JobFunc *func, void *sig, P_Pool pool_kind, P_Priority priority, P_Counter *counter); - -//////////////////////////////// -//~ Time helpers +//~ @hookdecl Time helper operations P_DateTime P_LocalTime(void); i64 P_TimeNs(void); //////////////////////////////// -//~ File system operations +//~ @hookdecl File system operations /* NOTE: File paths use forward slash '/' as delimiter */ @@ -384,7 +331,7 @@ P_File P_OpenFileWrite(String path); P_File P_OpenFileAppend(String path); void P_CloseFIle(P_File file); -//- File data operations +//- File data manipulation String P_ReadFile(Arena *arena, P_File file); void P_WriteFile(P_File file, String data); @@ -393,16 +340,15 @@ u64 P_GetFileSize(P_File file); P_FileTime P_GetFileTime(P_File file); //////////////////////////////// -//~ File map operations +//~ @hookdecl File map operations P_FileMap P_OpenFileMap(P_File file); void P_CloseFileMap(P_FileMap map); String P_GetFileMapData(P_FileMap map); //////////////////////////////// -//~ Watch operations - -/* A watch object allows the caller to watch for changes in a directory */ +//~ @hookdecl Watch operations +// A watch object allows the caller to watch for changes in a directory P_Watch *P_AllocWatch(String path); void P_ReleaseWatch(P_Watch *dw); @@ -410,15 +356,15 @@ P_WatchInfoList P_ReadWatchWait(Arena *arena, P_Watch *dw); void P_WakeWatch(P_Watch *dw); //////////////////////////////// -//~ Window operations +//~ @hookdecl Window operations P_Window *P_AllocWindow(void); void P_ReleaseWindow(P_Window *window); -//- Window events +//- Events P_WindowEventArray P_PopWindowEvents(Arena *arena, P_Window *window); -//- Window settings +//- Settings void P_UpdateWindowSettings(P_Window *window, P_WindowSettings *settings); P_WindowSettings P_GetWindowSettings(P_Window *window); void P_ShowWindow(P_Window *window); @@ -429,13 +375,13 @@ void P_EnableWindoweCursorClip(P_Window *window, Rect bounds); void P_DisableWindoweCursorClip(P_Window *window); void P_ToggleWindowTopmost(P_Window *window); -//- Window info +//- Info Vec2 P_GetWindowSize(P_Window *window); Vec2 P_GetWindowMonitorSize(P_Window *window); u64 P_GetInternalWindowHandle(P_Window *window); //////////////////////////////// -//~ Address helpers +//~ @hookdecl Address helpers P_Address P_AddressFromString(String str); P_Address P_AddressFromIpPortCstr(char *ip_cstr, char *port_cstr); @@ -444,7 +390,7 @@ String P_StringFromAddress(Arena *arena, P_Address address); b32 P_AddressIsEqual(P_Address a, P_Address b); //////////////////////////////// -//~ Sock operations +//~ @hookdecl Sock operations P_Sock *P_AllocSock(u16 listen_port, u64 sndbuf_size, u64 rcvbuf_size); void P_ReleaseSock(P_Sock *sock); @@ -452,7 +398,7 @@ P_SockReadResult P_ReadSock(Arena *arena, P_Sock *sock); void P_WriteSock(P_Sock *sock, P_Address address, String data); //////////////////////////////// -//~ Utils +//~ @hookdecl Utils void P_MessageBox(P_MessageBoxKind kind, String message); void P_SetClipboardText(String str); @@ -462,20 +408,19 @@ u32 P_GetThreadId(void); i64 P_GetCurrentSchedulerPeriodNs(void); //////////////////////////////// -//~ Sleep +//~ @hookdecl Sleep void P_SleepPrecise(i64 sleep_time_ns); void P_SleepFrame(i64 last_frame_time_ns, i64 target_dt_ns); //////////////////////////////// -//~ Program exit +//~ @hookdecl Program exit -void P_OnExit(P_ExitFunc *func); /* Registers a function to be called during graceful shutdown (in reverse order) */ -void P_Exit(void); /* Signals the program to shut down gracefully and run exit callbacks */ -void P_Panic(String msg); /* Forcefully exits the program and displays `msg` to the user */ +void P_OnExit(P_ExitFunc *func); +void P_Exit(void); +void P_Panic(String msg); //////////////////////////////// -//~ Entry point +//~ @hookdecl Entry point (implemented per application) -/* Must be defined by app */ void P_AppStartup(String args_str); diff --git a/src/platform/platform_log.c b/src/platform/platform_log.c index e3ffbc9f..d6ed1cde 100644 --- a/src/platform/platform_log.c +++ b/src/platform/platform_log.c @@ -1,7 +1,7 @@ //////////////////////////////// -//~ Global state +//~ Shared state -P_SharedLogCtx P_shared_log_ctx = ZI; +P_SharedLogState P_shared_log_state = ZI; //////////////////////////////// //~ Startup @@ -9,7 +9,7 @@ P_SharedLogCtx P_shared_log_ctx = ZI; void P_LogStartup(String logfile_path) { __prof; - P_SharedLogCtx *ctx = &P_shared_log_ctx; + P_SharedLogState *ctx = &P_shared_log_state; ctx->callbacks_arena = AllocArena(Mebi(8)); if (logfile_path.len > 0) { @@ -30,7 +30,7 @@ void P_LogStartup(String logfile_path) void P_RegisterLogCallback(P_LogEventCallbackFunc *func, i32 level) { - P_SharedLogCtx *ctx = &P_shared_log_ctx; + P_SharedLogState *ctx = &P_shared_log_state; if (!Atomic32Fetch(&ctx->initialized)) { return; } P_Lock lock = P_LockE(&ctx->callbacks_mutex); { @@ -56,7 +56,7 @@ void P_RegisterLogCallback(P_LogEventCallbackFunc *func, i32 level) void P_LogAppend_(String msg) { __prof; - P_SharedLogCtx *ctx = &P_shared_log_ctx; + P_SharedLogState *ctx = &P_shared_log_state; if (!Atomic32Fetch(&ctx->initialized)) { return; } if (ctx->file_valid) @@ -75,7 +75,7 @@ void P_LogAppend_(String msg) * writing to log file. */ void P_LogPanic_(String msg) { - P_SharedLogCtx *ctx = &P_shared_log_ctx; + P_SharedLogState *ctx = &P_shared_log_state; if (!Atomic32Fetch(&ctx->initialized)) { return; } if (ctx->file_valid) @@ -95,7 +95,7 @@ void P_LogFV_(i32 level, String file, u32 line, String fmt, va_list args) void P_LogFV_(i32 level, String fmt, va_list args) #endif { - P_SharedLogCtx *ctx = &P_shared_log_ctx; + P_SharedLogState *ctx = &P_shared_log_state; if (!Atomic32Fetch(&ctx->initialized)) { return; } TempArena scratch = BeginScratchNoConflict(); String msg = StringFormatV(scratch.arena, fmt, args); @@ -116,7 +116,7 @@ void P_LogF_(i32 level, String file, u32 line, String fmt, ...) void P_LogF_(i32 level, String fmt, ...) #endif { - P_SharedLogCtx *ctx = &P_shared_log_ctx; + P_SharedLogState *ctx = &P_shared_log_state; if (!Atomic32Fetch(&ctx->initialized)) { return; } va_list args; va_start(args, fmt); @@ -138,7 +138,7 @@ void P_Log_(i32 level, String msg) #endif { __prof; - P_SharedLogCtx *ctx = &P_shared_log_ctx; + P_SharedLogState *ctx = &P_shared_log_state; if (!Atomic32Fetch(&ctx->initialized)) { return; } TempArena scratch = BeginScratchNoConflict(); diff --git a/src/platform/platform_log.h b/src/platform/platform_log.h index 0ccb0518..3200acf9 100644 --- a/src/platform/platform_log.h +++ b/src/platform/platform_log.h @@ -59,7 +59,7 @@ Struct(LogEventCallback) //~ Shared state //- Shared context -Struct(P_SharedLogCtx) +Struct(P_SharedLogState) { Atomic32 initialized; @@ -72,7 +72,7 @@ Struct(P_SharedLogCtx) b32 file_valid; }; -extern P_SharedLogCtx P_shared_log_ctx; +extern P_SharedLogState P_shared_log_state; //-- Log level settings Struct(P_LogLevelSettings) diff --git a/src/platform/platform_snc.c b/src/platform/platform_snc.c index 35754e4f..1013640b 100644 --- a/src/platform/platform_snc.c +++ b/src/platform/platform_snc.c @@ -45,7 +45,7 @@ P_Lock P_LockSpinE(P_Mutex *m, i32 spin) { if (spin_cnt < spin) { - IxPause(); + _mm_pause(); } else { @@ -92,7 +92,7 @@ P_Lock P_LockSpinS(P_Mutex *m, i32 spin) { if (spin_cnt < spin) { - IxPause(); + _mm_pause(); } else { @@ -173,7 +173,7 @@ void P_SignalCv(P_Cv *cv, i32 count) //////////////////////////////// //~ Counter -void P_CounterAdd(P_Counter *counter, i64 x) +void P_CounterAdd(Counter *counter, i64 x) { i64 old_v = Atomic64FetchAdd(&counter->v, x); i64 new_v = old_v + x; @@ -183,7 +183,7 @@ void P_CounterAdd(P_Counter *counter, i64 x) } } -void P_WaitOnCounter(P_Counter *counter) +void P_WaitOnCounter(Counter *counter) { i64 v = Atomic64Fetch(&counter->v); while (v > 0) diff --git a/src/platform/platform_snc.h b/src/platform/platform_snc.h index 1d42596f..49bd1c3a 100644 --- a/src/platform/platform_snc.h +++ b/src/platform/platform_snc.h @@ -41,13 +41,13 @@ StaticAssert(alignof(P_Cv) == 64); /* Prevent false sharing */ //////////////////////////////// //~ Counter types -AlignedStruct(P_Counter, 64) +AlignedStruct(Counter, 64) { Atomic64 v; u8 _pad[56]; }; -StaticAssert(sizeof(P_Counter) == 64); /* Padding validation */ -StaticAssert(alignof(P_Counter) == 64); /* Prevent false sharing */ +StaticAssert(sizeof(Counter) == 64); /* Padding validation */ +StaticAssert(alignof(Counter) == 64); /* Prevent false sharing */ //////////////////////////////// //~ Mutex operations @@ -79,5 +79,5 @@ void P_SignalCv(P_Cv *cv, i32 count); //////////////////////////////// //~ Counter operations -void P_CounterAdd(P_Counter *counter, i64 x); -void P_WaitOnCounter(P_Counter *counter); +void P_CounterAdd(Counter *counter, i64 x); +void P_WaitOnCounter(Counter *counter); diff --git a/src/platform/platform_win32.c b/src/platform/platform_win32.c index 918b7e30..75b17eec 100644 --- a/src/platform/platform_win32.c +++ b/src/platform/platform_win32.c @@ -14,1166 +14,6 @@ P_W32_SharedCtx P_W32_shared_ctx = ZI; #pragma comment(lib, "avrt") #pragma comment(lib, "ws2_32.lib") -//////////////////////////////// -//~ Win32 ticket mutex - -void P_W32_LockTicketMutex(P_W32_TicketMutex *tm) -{ - i64 ticket = Atomic64FetchAdd(&tm->ticket.v, 1); - while (Atomic64Fetch(&tm->serving.v) != ticket) - { - IxPause(); - } -} - -void P_W32_UnlockTicketMutex(P_W32_TicketMutex *tm) -{ - Atomic64FetchAdd(&tm->serving.v, 1); -} - -//////////////////////////////// -//~ Win32 thread - -DWORD WINAPI P_W32_Win32ThreadProc(LPVOID vt) -{ - P_W32_AllocFiber(0); - - P_W32_Thread *t = (P_W32_Thread *)vt; - __profthread(t->thread_name_cstr, t->profiler_group); - - /* Initialize COM */ - CoInitializeEx(0, COINIT_MULTITHREADED); - - /* Set thread name */ - if (t->thread_name_wstr[0] != 0) - { - SetThreadDescription(GetCurrentThread(), t->thread_name_wstr); - } - - P_LogInfoF("New thread \"%F\" created with ID %F", FmtString(StringFromCstrNoLimit(t->thread_name_cstr)), FmtUint(P_GetThreadId())); - - /* Enter thread entry point */ - t->entry_point(t->thread_data); - - /* Uninitialize COM */ - CoUninitialize(); - - return 0; -} - -P_W32_Thread *P_W32_AllocThread(P_W32_ThreadFunc *entry_point, void *thread_data, String thread_name, i32 profiler_group) -{ - __prof; - TempArena scratch = BeginScratchNoConflict(); - P_W32_SharedCtx *g = &P_W32_shared_ctx; - Assert(entry_point != 0); - P_LogInfoF("Creating thread \"%F\"", FmtString(thread_name)); - - - /* Allocate thread object */ - P_W32_Thread *t = 0; - { - P_Lock lock = P_LockE(&g->threads_mutex); - if (g->first_free_thread) - { - t = g->first_free_thread; - g->first_free_thread = t->next; - } - else - { - t = PushStructNoZero(g->threads_arena, P_W32_Thread); - } - ZeroStruct(t); - if (g->last_thread) - { - g->last_thread->next = t; - t->prev = g->last_thread; - } - else - { - g->first_thread = t; - } - g->last_thread = t; - P_Unlock(&lock); - } - - - t->entry_point = entry_point; - t->thread_data = thread_data; - t->profiler_group = profiler_group; - - /* Copy thread name to params */ - { - u64 CstrLen = MinU64((countof(t->thread_name_cstr) - 1), thread_name.len); - CopyBytes(t->thread_name_cstr, thread_name.text, CstrLen * sizeof(*t->thread_name_cstr)); - t->thread_name_cstr[CstrLen] = 0; - } - { - String16 thread_name16 = String16FromString(scratch.arena, thread_name); - u64 WstrLen = MinU64((countof(t->thread_name_wstr) - 1), thread_name16.len); - CopyBytes(t->thread_name_wstr, thread_name16.text, WstrLen * sizeof(*t->thread_name_wstr)); - t->thread_name_wstr[WstrLen] = 0; - } - - t->handle = CreateThread( - 0, - P_W32_ThreadStackSize, - P_W32_Win32ThreadProc, - t, - 0, - 0 - ); - - if (!t->handle) - { - P_Panic(Lit("Failed to create thread")); - } - - EndScratch(scratch); - return (P_W32_Thread *)t; -} - -/* Returns 0 if the thread could not release in specified timeout (e.g. because it is still running) */ -b32 P_W32_TryReleaseThread(P_W32_Thread *thread, f32 timeout_seconds) -{ - __prof; - P_W32_SharedCtx *g = &P_W32_shared_ctx; - b32 success = 0; - P_W32_Thread *t = (P_W32_Thread *)thread; - HANDLE handle = t->handle; - if (handle) - { - /* Wait for thread to stop */ - DWORD timeout_ms = (timeout_seconds > 10000000) ? INFINITE : RoundF32ToI32(timeout_seconds * 1000); - DWORD wait_result = WaitForSingleObject(handle, timeout_ms); - if (wait_result == WAIT_OBJECT_0) - { - /* Release thread */ - success = 1; - CloseHandle(handle); - { - P_Lock lock = P_LockE(&g->threads_mutex); - { - P_W32_Thread *prev = t->prev; - P_W32_Thread *next = t->next; - if (prev) - { - prev->next = next; - } - else - { - g->first_thread = next; - } - if (next) - { - next->prev = prev; - } - else - { - g->last_thread = prev; - } - t->next = g->first_free_thread; - g->first_free_thread = t; - } - P_Unlock(&lock); - } - } - } - return success; -} - -void P_W32_WaitReleaseThread(P_W32_Thread *thread) -{ - __prof; - b32 success = P_W32_TryReleaseThread(thread, F32Infinity); - Assert(success); - LAX success; -} - -//////////////////////////////// -//~ Win32 wait list - -/* REQUIRED: Caller must have acquired `wake_lock` for each fiber in array */ -void P_W32_WakeLockedFibers(i32 num_fibers, P_W32_Fiber **fibers) -{ - P_W32_SharedCtx *g = &P_W32_shared_ctx; - - /* Update wait lists */ - for (i32 i = 0; i < num_fibers; ++i) - { - P_W32_Fiber *fiber = fibers[i]; - u64 wait_addr = fiber->wait_addr; - u64 wait_time = fiber->wait_time; - - /* Lock & search wait bins */ - /* TODO: Cache these in parameters since caller has one of them already calculated */ - P_W32_WaitBin *wait_addr_bin = 0; - P_W32_WaitBin *wait_time_bin = 0; - P_W32_WaitList *wait_addr_list = 0; - P_W32_WaitList *wait_time_list = 0; - if (wait_addr != 0) - { - wait_addr_bin = &g->wait_addr_bins[wait_addr % P_W32_NumWaitAddrBins]; - P_W32_LockTicketMutex(&wait_addr_bin->lock); - for (P_W32_WaitList *tmp = wait_addr_bin->first_wait_list; tmp && !wait_addr_list; tmp = tmp->next_in_bin) - { - if (tmp->value == (u64)wait_addr) - { - wait_addr_list = tmp; - } - } - } - if (wait_time != 0) - { - wait_time_bin = &g->wait_time_bins[wait_time % P_W32_NumWaitTimeBins]; - P_W32_LockTicketMutex(&wait_time_bin->lock); - for (P_W32_WaitList *tmp = wait_time_bin->first_wait_list; tmp && !wait_time_list; tmp = tmp->next_in_bin) - { - if (tmp->value == (u64)wait_time) - { - wait_time_list = tmp; - } - } - } - { - /* Remove from addr list */ - if (wait_addr_list) - { - if (--wait_addr_list->num_waiters == 0) - { - /* Free addr list */ - P_W32_WaitList *prev = wait_addr_list->prev_in_bin; - P_W32_WaitList *next = wait_addr_list->next_in_bin; - if (prev) - { - prev->next_in_bin = next; - } - else - { - wait_addr_bin->first_wait_list = next; - } - if (next) - { - next->prev_in_bin = prev; - } - else - { - wait_addr_bin->last_wait_list = prev; - } - wait_addr_list->next_in_bin = wait_addr_bin->first_free_wait_list; - wait_addr_bin->first_free_wait_list = wait_addr_list; - } - else - { - i16 prev_id = fiber->prev_addr_waiter; - i16 next_id = fiber->next_addr_waiter; - if (prev_id) - { - P_W32_FiberFromId(prev_id)->next_addr_waiter = next_id; - } - else - { - wait_addr_list->first_waiter = next_id; - } - if (next_id) - { - P_W32_FiberFromId(next_id)->prev_addr_waiter = prev_id; - } - else - { - wait_addr_list->last_waiter = prev_id; - } - } - fiber->wait_addr = 0; - fiber->prev_addr_waiter = 0; - fiber->next_addr_waiter = 0; - } - /* Remove from time list */ - if (wait_time_list) - { - if (--wait_time_list->num_waiters == 0) - { - /* Free time list */ - P_W32_WaitList *prev = wait_time_list->prev_in_bin; - P_W32_WaitList *next = wait_time_list->next_in_bin; - if (prev) - { - prev->next_in_bin = next; - } - else - { - wait_time_bin->first_wait_list = next; - } - if (next) - { - next->prev_in_bin = prev; - } - else - { - wait_time_bin->last_wait_list = prev; - } - wait_time_list->next_in_bin = wait_time_bin->first_free_wait_list; - wait_time_bin->first_free_wait_list = wait_time_list; - } - else - { - i16 prev_id = fiber->prev_time_waiter; - i16 next_id = fiber->next_time_waiter; - if (prev_id) - { - P_W32_FiberFromId(prev_id)->next_time_waiter = next_id; - } - else - { - wait_time_list->first_waiter = next_id; - } - if (next_id) - { - P_W32_FiberFromId(next_id)->prev_time_waiter = prev_id; - } - else - { - wait_time_list->last_waiter = prev_id; - } - } - fiber->wait_time = 0; - fiber->prev_time_waiter = 0; - fiber->next_time_waiter = 0; - } - /* Unlock fiber */ - Atomic32FetchSet(&fiber->wake_lock, 0); - } - /* Unlock wait bins */ - if (wait_time_bin != 0) P_W32_UnlockTicketMutex(&wait_time_bin->lock); - if (wait_addr_bin != 0) P_W32_UnlockTicketMutex(&wait_addr_bin->lock); - } - - /* Resume jobs */ - /* TODO: Batch submit waiters based on queue kind rather than one at a time */ - i32 job_counts_per_pool[P_Pool_Count] = ZI; - for (i32 i = 0; i < num_fibers; ++i) - { - P_W32_Fiber *fiber = fibers[i]; - P_Pool pool_kind = fiber->job_pool; - ++job_counts_per_pool[pool_kind]; - P_W32_JobPool *pool = &g->job_pools[pool_kind]; - P_W32_JobQueue *queue = &pool->job_queues[fiber->job_priority]; - P_W32_LockTicketMutex(&queue->lock); - { - P_W32_JobInfo *info = 0; - if (queue->first_free) - { - info = queue->first_free; - queue->first_free = info->next; - } - else - { - info = PushStructNoZero(queue->arena, P_W32_JobInfo); - } - ZeroStruct(info); - info->count = 1; - info->num_dispatched = fiber->job_id; - info->func = fiber->job_func; - info->sig = fiber->job_sig; - info->counter = fiber->job_counter; - info->fiber_id = fiber->id; - if (queue->first) - { - info->next = queue->first; - } - else - { - queue->last = info; - } - queue->first = info; - } - P_W32_UnlockTicketMutex(&queue->lock); - } - - /* Wake workers */ - if (num_fibers > 0) - { - for (P_Pool pool_kind = 0; pool_kind < (i32)countof(job_counts_per_pool); ++pool_kind) - { - i32 job_count = job_counts_per_pool[pool_kind]; - if (job_count > 0) - { - P_W32_JobPool *pool = &g->job_pools[pool_kind]; - P_W32_LockTicketMutex(&pool->workers_wake_lock); - { - Atomic64FetchAdd(&pool->num_jobs_in_queue.v, job_count); - if (job_count >= P_W32_WakeAllThreshold) - { - WakeByAddressAll(&pool->num_jobs_in_queue); - } - else - { - for (i32 i = 0; i < job_count; ++i) - { - WakeByAddressSingle(&pool->num_jobs_in_queue); - } - } - } - P_W32_UnlockTicketMutex(&pool->workers_wake_lock); - } - } - } -} - -void P_W32_WakeByAddress(void *addr, i32 count) -{ - TempArena scratch = BeginScratchNoConflict(); - P_W32_SharedCtx *g = &P_W32_shared_ctx; - - u64 wait_addr_bin_index = (u64)addr % P_W32_NumWaitAddrBins; - P_W32_WaitBin *wait_addr_bin = &g->wait_addr_bins[wait_addr_bin_index]; - P_W32_WaitList *wait_addr_list = 0; - - /* Get list of waiting fibers */ - i32 num_fibers = 0; - P_W32_Fiber **fibers = 0; - { - P_W32_LockTicketMutex(&wait_addr_bin->lock); - { - /* Search for wait addr list */ - for (P_W32_WaitList *tmp = wait_addr_bin->first_wait_list; tmp && !wait_addr_list; tmp = tmp->next_in_bin) - { - if (tmp->value == (u64)addr) - { - wait_addr_list = tmp; - } - } - - /* Lock fibers & build array */ - if (wait_addr_list) - { - fibers = PushStructsNoZero(scratch.arena, P_W32_Fiber *, wait_addr_list->num_waiters); - for (P_W32_Fiber *fiber = P_W32_FiberFromId(wait_addr_list->first_waiter); fiber && num_fibers < count; fiber = P_W32_FiberFromId(fiber->next_addr_waiter)) - { - if (Atomic32FetchTestSet(&fiber->wake_lock, 0, 1) == 0) - { - fibers[num_fibers] = fiber; - ++num_fibers; - } - } - } - } - P_W32_UnlockTicketMutex(&wait_addr_bin->lock); - } - - if (num_fibers > 0) - { - P_W32_WakeLockedFibers(num_fibers, fibers); - } - - /* Wake win32 blocking thread waiters */ - if (count >= P_W32_WakeAllThreshold) - { - WakeByAddressAll(addr); - } - else - { - for (i32 i = 0; i < count; ++i) - { - WakeByAddressSingle(addr); - } - } - - EndScratch(scratch); -} - -void P_W32_WakeByTime(u64 time) -{ - TempArena scratch = BeginScratchNoConflict(); - P_W32_SharedCtx *g = &P_W32_shared_ctx; - - u64 wait_time_bin_index = (u64)time % P_W32_NumWaitTimeBins; - P_W32_WaitBin *wait_time_bin = &g->wait_time_bins[wait_time_bin_index]; - P_W32_WaitList *wait_time_list = 0; - - /* Build list of waiters to resume */ - i32 num_fibers = 0; - P_W32_Fiber **fibers = 0; - { - P_W32_LockTicketMutex(&wait_time_bin->lock); - { - /* Search for wait time list */ - for (P_W32_WaitList *tmp = wait_time_bin->first_wait_list; tmp && !wait_time_list; tmp = tmp->next_in_bin) - { - if (tmp->value == (u64)time) - { - wait_time_list = tmp; - } - } - - if (wait_time_list) - { - /* Set waiter wake status & build fibers list */ - fibers = PushStructsNoZero(scratch.arena, P_W32_Fiber *, wait_time_list->num_waiters); - for (P_W32_Fiber *fiber = P_W32_FiberFromId(wait_time_list->first_waiter); fiber; fiber = P_W32_FiberFromId(fiber->next_time_waiter)) - { - if (Atomic32FetchTestSet(&fiber->wake_lock, 0, 1) == 0) - { - fibers[num_fibers] = fiber; - ++num_fibers; - } - - } - } - } - P_W32_UnlockTicketMutex(&wait_time_bin->lock); - } - - P_W32_WakeLockedFibers(num_fibers, fibers); - - EndScratch(scratch); -} - -//////////////////////////////// -//~ Win32 fiber - -/* If `pool` is 0, then the currently running thread will be converted into a fiber */ -P_W32_Fiber *P_W32_AllocFiber(P_W32_JobPool *pool) -{ - P_W32_SharedCtx *g = &P_W32_shared_ctx; - i16 fiber_id = 0; - P_W32_Fiber *fiber = 0; - char *new_name_cstr = 0; - { - if (pool != 0) - { - P_W32_LockTicketMutex(&pool->free_fibers_lock); - if (pool->first_free_fiber_id) - { - fiber_id = pool->first_free_fiber_id; - fiber = &g->fibers[fiber_id]; - pool->first_free_fiber_id = fiber->parent_id; - } - P_W32_UnlockTicketMutex(&pool->free_fibers_lock); - } - if (!fiber_id) - { - P_W32_LockTicketMutex(&g->fibers_lock); - { - { - fiber_id = g->num_fibers++; - if (fiber_id >= MaxFibers) - { - P_Panic(Lit("Max fibers reached")); - } - fiber = &g->fibers[fiber_id]; - new_name_cstr = PushStructs(g->fiber_names_arena, char, P_W32_FiberNameMaxSize); - } - } - P_W32_UnlockTicketMutex(&g->fibers_lock); - } - - } - if (new_name_cstr != 0) - { - __profn("Initialize fiber"); - fiber->id = fiber_id; - - /* Id to ASCII */ - i32 id_div = fiber_id; - char id_chars[64] = ZI; - i32 id_chars_len = 0; - do - { - i32 digit = id_div % 10; - id_div /= 10; - id_chars[id_chars_len] = ("0123456789")[digit]; - ++id_chars_len; - } while (id_div > 0); - i32 rev_start = 0; - i32 rev_end = id_chars_len - 1; - while (rev_start < rev_end) - { - char swp = id_chars[rev_start]; - id_chars[rev_start] = id_chars[rev_end]; - id_chars[rev_end] = swp; - ++rev_start; - --rev_end; - } - - /* Concat fiber name */ - i32 name_size = 1; - Assert(sizeof(sizeof(P_W32_FiberNamePrefixCstr)) <= P_W32_FiberNameMaxSize); - CopyBytes(new_name_cstr, P_W32_FiberNamePrefixCstr, sizeof(P_W32_FiberNamePrefixCstr)); - name_size += sizeof(P_W32_FiberNamePrefixCstr) - 2; - CopyBytes(new_name_cstr + name_size, id_chars, id_chars_len); - name_size += id_chars_len; - CopyBytes(new_name_cstr + name_size, P_W32_FiberNameSuffixCstr, sizeof(P_W32_FiberNameSuffixCstr)); - name_size += sizeof(P_W32_FiberNameSuffixCstr) - 2; - - fiber->name_cstr = new_name_cstr; - - /* Init win32 fiber */ - if (pool != 0) - { - __profn("CreateFiber"); - fiber->addr = CreateFiber(P_W32_FiberStackSize, P_W32_FiberEntryPoint, (void *)(i64)fiber_id); - } - else - { - /* Fiber is not a part of a job pool, convert thread to fiber */ - __profn("ConvertThreadToFiber"); - fiber->addr = ConvertThreadToFiber((void *)(i64)fiber_id); - } - } - fiber->wait_addr = 0; - fiber->wait_time = 0; - fiber->prev_addr_waiter = 0; - fiber->next_addr_waiter = 0; - fiber->prev_time_waiter = 0; - fiber->next_time_waiter = 0; - fiber->job_func = 0; - fiber->job_sig = 0; - fiber->job_id = 0; - fiber->job_pool = 0; - fiber->job_priority = 0; - fiber->job_counter = 0; - fiber->yield_param = 0; - fiber->parent_id = 0; - return fiber; -} - -void P_W32_ReleaseFiber(P_W32_JobPool *pool, P_W32_Fiber *fiber) -{ - P_W32_LockTicketMutex(&pool->free_fibers_lock); - { - i16 fiber_id = fiber->id; - fiber->parent_id = pool->first_free_fiber_id; - pool->first_free_fiber_id = fiber_id; - } - P_W32_UnlockTicketMutex(&pool->free_fibers_lock); -} - -ForceInline P_W32_Fiber *P_W32_FiberFromId(i16 id) -{ - P_W32_SharedCtx *g = &P_W32_shared_ctx; - if (id <= 0) - { - return 0; - } - else - { - return &g->fibers[id]; - } -} - -ForceNoInline void P_W32_FiberResume(P_W32_Fiber *fiber) -{ - MemoryBarrier(); - SwitchToFiber(fiber->addr); - MemoryBarrier(); -} - -void P_W32_YieldFiber(P_W32_Fiber *fiber, P_W32_Fiber *parent_fiber) -{ - LAX fiber; - Assert(fiber->id == FiberId()); - Assert(parent_fiber->id == fiber->parent_id); - Assert(parent_fiber->id > 0); - { - __prof_fiber_leave(); - P_W32_FiberResume(parent_fiber); - __prof_fiber_enter(fiber->name_cstr, PROF_THREAD_GROUP_FIBERS - Mebi(fiber->job_pool) + Kibi(1) + fiber->id); - } -} - -void P_W32_FiberEntryPoint(void *id_ptr) -{ - i16 id = (i32)(i64)id_ptr; - volatile P_W32_Fiber *fiber = P_W32_FiberFromId(id); - __prof_fiber_enter(fiber->name_cstr, PROF_THREAD_GROUP_FIBERS - Mebi(fiber->job_pool) + Kibi(1) + fiber->id); - for (;;) - { - /* Run job */ - { - P_W32_YieldParam *yield_param = fiber->yield_param; - yield_param->kind = P_W32_YieldKind_None; - P_JobData data = ZI; - data.id = fiber->job_id; - data.sig = fiber->job_sig; - { - MemoryBarrier(); - fiber->job_func(data); - MemoryBarrier(); - } - } - /* Job completed, yield */ - { - /* Decrement job counter */ - P_Counter *job_counter = fiber->job_counter; - if (job_counter) - { - P_CounterAdd(job_counter, -1); - } - /* Yield to worker */ - fiber->yield_param->kind = P_W32_YieldKind_Done; - P_W32_Fiber *parent_fiber = P_W32_FiberFromId(fiber->parent_id); - P_W32_YieldFiber((P_W32_Fiber *)fiber, parent_fiber); - } - } -} - -//////////////////////////////// -//~ Win32 job worker - -P_W32_ThreadDef(P_W32_JobWorkerEntryFunc, worker_ctx_arg) -{ - P_W32_SharedCtx *g = &P_W32_shared_ctx; - P_W32_WorkerCtx *ctx = worker_ctx_arg; - P_Pool pool_kind = ctx->pool_kind; - P_W32_JobPool *pool = &g->job_pools[pool_kind]; - LAX ctx; - - { - /* TODO: Heuristic pinning */ - /* TODO: Pin non-worker threads to other cores */ - HANDLE thread_handle = GetCurrentThread(); - - if (pool->thread_priority) - { - __profn("Set priority"); - b32 success = SetThreadPriority(thread_handle, pool->thread_priority) != 0; - Assert(success); - LAX success; - } - -#if 0 - if (pool->thread_affinity_mask) - { - __profn("Set affinity"); - b32 success = SetThreadAffinityMask(thread_handle, pool->thread_affinity_mask) != 0; -#if RtcIsEnabled || ProfilingIsEnabled - { - /* Retry until external tools can set correct process affinity */ - i32 delay_ms = 16; - while (!success && delay_ms <= 1024) - { - __profn("Affinity retry"); - Sleep(delay_ms); - success = SetThreadAffinityMask(thread_handle, pool->thread_affinity_mask) != 0; - delay_ms *= 2; - } - } -#endif - Assert(success); - LAX success; - } -#endif - - if (pool->thread_is_audio) - { - /* https://learn.microsoft.com/en-us/windows/win32/procthread/multimedia-class-scheduler-service#registry-settings */ - __profn("Set mm thread characteristics"); - DWORD task = 0; - HANDLE mmc_handle = AvSetMmThreadCharacteristics(L"Pro Audio", &task); - Assert(mmc_handle); - LAX mmc_handle; - } - } - - i32 worker_fiber_id = FiberId(); - - P_W32_Fiber *job_fiber = 0; - b32 shutdown = 0; - while (!shutdown) - { - //- Pull job from queue - P_Priority job_priority = 0; - i16 job_fiber_id = 0; - i32 job_id = 0; - P_JobFunc *job_func = 0; - void *job_sig = 0; - P_Counter *job_counter = 0; - { - //__profnc("Pull job", Rgb32F(0.75, 0.75, 0)); - for (P_Priority priority = 0; priority < (i32)countof(pool->job_queues) && !job_func; ++priority) - { - P_W32_JobQueue *queue = &pool->job_queues[priority]; - if (queue) - { - P_W32_LockTicketMutex(&queue->lock); - { - P_W32_JobInfo *info = queue->first; - while (info && !job_func) - { - P_W32_JobInfo *next = info->next; - b32 dequeue = 0; - if (info->fiber_id <= 0) - { - job_id = info->num_dispatched++; - if (job_id < info->count) - { - /* Pick job */ - Atomic64FetchAdd(&pool->num_jobs_in_queue.v, -1); - job_priority = priority; - job_func = info->func; - job_sig = info->sig; - job_counter = info->counter; - if (job_id == (info->count - 1)) - { - /* We're picking up the last dispatch, so dequeue the job */ - dequeue = 1; - } - } - } - else - { - /* This job is to be resumed from a yield */ - Atomic64FetchAdd(&pool->num_jobs_in_queue.v, -1); - job_fiber_id = info->fiber_id; - job_priority = priority; - job_id = info->num_dispatched; - job_func = info->func; - job_sig = info->sig; - job_counter = info->counter; - dequeue = 1; - } - if (dequeue) - { - if (!next) - { - queue->last = 0; - } - queue->first = next; - info->next = queue->first_free; - queue->first_free = info; - } - info = next; - } - } - P_W32_UnlockTicketMutex(&queue->lock); - } - } - } - - //- Release old fiber if resuming a yielded fiber - if (job_fiber_id > 0) - { - if (job_fiber) - { - P_W32_ReleaseFiber(pool, job_fiber); - } - job_fiber = P_W32_FiberFromId(job_fiber_id); - } - - //- Run fiber - if (job_func) - { - if (!job_fiber) - { - job_fiber = P_W32_AllocFiber(pool); - } - job_fiber_id = job_fiber->id; - { - __profnc("Run fiber", Rgb32F(1, 1, 1)); - __profvalue(job_fiber->id); - P_W32_YieldParam yield = ZI; - job_fiber->parent_id = worker_fiber_id; - job_fiber->job_func = job_func; - job_fiber->job_sig = job_sig; - job_fiber->job_id = job_id; - job_fiber->job_pool = pool_kind; - job_fiber->job_priority = job_priority; - job_fiber->job_counter = job_counter; - job_fiber->yield_param = &yield; - b32 done = 0; - while (!done) - { - P_W32_FiberResume(job_fiber); - switch (yield.kind) - { - default: - { - /* Invalid yield kind */ - TempArena scratch = BeginScratchNoConflict(); - P_Panic(StringFormat(scratch.arena, Lit("Invalid fiber yield kind \"%F\""), FmtSint(yield.kind))); - EndScratch(scratch); - } break; - - //- Fiber is waiting - case P_W32_YieldKind_Wait: - { - __profn("Process fiber wait"); - volatile void *wait_addr = yield.wait.addr; - void *wait_cmp = yield.wait.cmp; - u32 wait_size = yield.wait.size; - i64 wait_timeout_ns = yield.wait.timeout_ns; - i64 wait_time = 0; - if (wait_timeout_ns > 0 && wait_timeout_ns < I64Max) - { - u64 current_scheduler_cycle = Atomic64Fetch(&g->current_scheduler_cycle.v); - i64 current_scheduler_cycle_period_ns = Atomic64Fetch(&g->current_scheduler_cycle_period_ns.v); - wait_time = current_scheduler_cycle + MaxI64((i64)((f64)wait_timeout_ns / (f64)current_scheduler_cycle_period_ns), 1); - } - - u64 wait_addr_bin_index = (u64)wait_addr % P_W32_NumWaitAddrBins; - u64 wait_time_bin_index = (u64)wait_time % P_W32_NumWaitTimeBins; - P_W32_WaitBin *wait_addr_bin = &g->wait_addr_bins[wait_addr_bin_index]; - P_W32_WaitBin *wait_time_bin = &g->wait_time_bins[wait_time_bin_index]; - - if (wait_addr != 0) P_W32_LockTicketMutex(&wait_addr_bin->lock); - { - if (wait_time != 0) P_W32_LockTicketMutex(&wait_time_bin->lock); - { - //- Load and compare value at address now that wait bins are locked - b32 cancel_wait = wait_addr == 0 && wait_time == 0; - if (wait_addr != 0) - { - switch (wait_size) - { - case 1: cancel_wait = (u8)_InterlockedCompareExchange8(wait_addr, 0, 0) != *(u8 *)wait_cmp; break; - case 2: cancel_wait = (u16)_InterlockedCompareExchange16(wait_addr, 0, 0) != *(u16 *)wait_cmp; break; - case 4: cancel_wait = (u32)_InterlockedCompareExchange(wait_addr, 0, 0) != *(u32 *)wait_cmp; break; - case 8: cancel_wait = (u64)_InterlockedCompareExchange64(wait_addr, 0, 0) != *(u64 *)wait_cmp; break; - default: cancel_wait = 1; Assert(0); break; /* Invalid wait size */ - } - } - if (wait_time != 0 && !cancel_wait) - { - cancel_wait = wait_time <= Atomic64Fetch(&g->current_scheduler_cycle.v); - } - if (!cancel_wait) - { - if (wait_addr != 0) - { - //- Search for wait addr list in bin - P_W32_WaitList *wait_addr_list = 0; - for (P_W32_WaitList *tmp = wait_addr_bin->first_wait_list; tmp && !wait_addr_list; tmp = tmp->next_in_bin) - { - if (tmp->value == (u64)wait_addr) - { - wait_addr_list = tmp; - } - } - //- Allocate new wait addr list - if (!wait_addr_list) - { - if (wait_addr_bin->first_free_wait_list) - { - wait_addr_list = wait_addr_bin->first_free_wait_list; - wait_addr_bin->first_free_wait_list = wait_addr_list->next_in_bin; - } - else - { - P_W32_LockTicketMutex(&g->wait_lists_arena_lock); - { - wait_addr_list = PushStructNoZero(g->wait_lists_arena, P_W32_WaitList); - } - P_W32_UnlockTicketMutex(&g->wait_lists_arena_lock); - } - ZeroStruct(wait_addr_list); - wait_addr_list->value = (u64)wait_addr; - if (wait_addr_bin->last_wait_list) - { - wait_addr_bin->last_wait_list->next_in_bin = wait_addr_list; - wait_addr_list->prev_in_bin = wait_addr_bin->last_wait_list; - } - else - { - wait_addr_bin->first_wait_list = wait_addr_list; - } - wait_addr_bin->last_wait_list = wait_addr_list; - } - //- Insert fiber into wait addr list - job_fiber->wait_addr = (u64)wait_addr; - if (wait_addr_list->last_waiter) - { - P_W32_FiberFromId(wait_addr_list->last_waiter)->next_addr_waiter = job_fiber_id; - job_fiber->prev_addr_waiter = wait_addr_list->last_waiter; - } - else - { - wait_addr_list->first_waiter = job_fiber_id; - } - wait_addr_list->last_waiter = job_fiber_id; - ++wait_addr_list->num_waiters; - } - if (wait_time != 0) - { - //- Search for wait time list in bin - P_W32_WaitList *wait_time_list = 0; - for (P_W32_WaitList *tmp = wait_time_bin->first_wait_list; tmp && !wait_time_list; tmp = tmp->next_in_bin) - { - if (tmp->value == (u64)wait_time) - { - wait_time_list = tmp; - } - } - //- Allocate new wait time list - if (!wait_time_list) - { - if (wait_time_bin->first_free_wait_list) - { - wait_time_list = wait_time_bin->first_free_wait_list; - wait_time_bin->first_free_wait_list = wait_time_list->next_in_bin; - } - else - { - P_W32_LockTicketMutex(&g->wait_lists_arena_lock); - { - wait_time_list = PushStructNoZero(g->wait_lists_arena, P_W32_WaitList); - } - P_W32_UnlockTicketMutex(&g->wait_lists_arena_lock); - } - ZeroStruct(wait_time_list); - wait_time_list->value = wait_time; - if (wait_time_bin->last_wait_list) - { - wait_time_bin->last_wait_list->next_in_bin = wait_time_list; - wait_time_list->prev_in_bin = wait_time_bin->last_wait_list; - } - else - { - wait_time_bin->first_wait_list = wait_time_list; - } - wait_time_bin->last_wait_list = wait_time_list; - } - //- Insert fiber into wait time list - job_fiber->wait_time = wait_time; - if (wait_time_list->last_waiter) - { - P_W32_FiberFromId(wait_time_list->last_waiter)->next_time_waiter = job_fiber_id; - job_fiber->prev_time_waiter = wait_time_list->last_waiter; - } - else - { - wait_time_list->first_waiter = job_fiber_id; - } - wait_time_list->last_waiter = job_fiber_id; - ++wait_time_list->num_waiters; - } - - //- PopStruct worker's job fiber - job_fiber = 0; - done = 1; - } - } - if (wait_time != 0) P_W32_UnlockTicketMutex(&wait_time_bin->lock); - } - if (wait_addr != 0) P_W32_UnlockTicketMutex(&wait_addr_bin->lock); - } break; - - //- Fiber is finished - case P_W32_YieldKind_Done: - { - done = 1; - } break; - } - } - } - } - - //- Wait for job - i64 num_jobs_in_queue = Atomic64Fetch(&pool->num_jobs_in_queue.v); - shutdown = Atomic32Fetch(&pool->workers_shutdown.v); - if (num_jobs_in_queue <= 0 && !shutdown) - { - //__profnc("Wait for job", Rgb32F(0.75, 0.75, 0)); - P_W32_LockTicketMutex(&pool->workers_wake_lock); - { - num_jobs_in_queue = Atomic64Fetch(&pool->num_jobs_in_queue.v); - shutdown = Atomic32Fetch(&pool->workers_shutdown.v); - while (num_jobs_in_queue <= 0 && !shutdown) - { - { - P_W32_UnlockTicketMutex(&pool->workers_wake_lock); - WaitOnAddress(&pool->num_jobs_in_queue, &num_jobs_in_queue, sizeof(num_jobs_in_queue), INFINITE); - P_W32_LockTicketMutex(&pool->workers_wake_lock); - } - shutdown = Atomic32Fetch(&pool->workers_shutdown.v); - num_jobs_in_queue = Atomic64Fetch(&pool->num_jobs_in_queue.v); - } - } - P_W32_UnlockTicketMutex(&pool->workers_wake_lock); - } - } - - //- Worker shutdown - if (job_fiber) - { - P_W32_ReleaseFiber(pool, job_fiber); - } -} - -//////////////////////////////// -//~ Win32 job scheduler - -P_W32_ThreadDef(P_W32_JobSchedulerEntryFunc, UNUSED arg) -{ - struct P_W32_SharedCtx *g = &P_W32_shared_ctx; - - { - i32 priority = THREAD_PRIORITY_TIME_CRITICAL; - b32 success = SetThreadPriority(GetCurrentThread(), priority); - LAX success; - Assert(success); - } - - /* Create high resolution timer */ - HANDLE timer = CreateWaitableTimerExW(0, 0, CREATE_WAITABLE_TIMER_HIGH_RESOLUTION, TIMER_ALL_ACCESS); - if (!timer) - { - P_Panic(Lit("Failed to create high resolution timer")); - } - - /* Create rolling buffer of scheduler cycles initialized to default value */ - i32 periods_index = 0; - i64 periods[P_W32_NumRollingSchedulerPeriods] = ZI; - for (i32 i = 0; i < (i32)countof(periods); ++i) - { - periods[i] = P_W32_DefaultSchedulerPeriodNs; - } - - i64 last_cycle_ns = 0; - while (!Atomic32Fetch(&g->shutdown)) - { - __profn("Job scheduler cycle"); - { - __profn("Job scheduler wait"); - LARGE_INTEGER due = ZI; - due.QuadPart = -1; - //due.QuadPart = -10000; - //due.QuadPart = -32000; - //due.QuadPart = -12000; - //due.QuadPart = -8000; - SetWaitableTimerEx(timer, &due, 0, 0, 0, 0, 0); - WaitForSingleObject(timer, INFINITE); - } - - /* Calculate mean period */ - i64 now_ns = P_TimeNs(); - i64 period_ns = last_cycle_ns == 0 ? P_W32_DefaultSchedulerPeriodNs : now_ns - last_cycle_ns; - last_cycle_ns = now_ns; - - /* Calculate mean period */ - { - periods[periods_index++] = period_ns; - if (periods_index == countof(periods)) - { - periods_index = 0; - } - f64 periods_sum_ns = 0; - for (i32 i = 0; i < (i32)countof(periods); ++i) - { - periods_sum_ns += (f64)periods[i]; - } - f64 mean_ns = periods_sum_ns / (f64)countof(periods); - Atomic64FetchSet(&g->current_scheduler_cycle_period_ns.v, RoundF64ToI64(mean_ns)); - } - - { - __profn("Job scheduler run"); - i64 current_cycle = Atomic64FetchAdd(&g->current_scheduler_cycle.v, 1) + 1; - P_W32_WakeByTime((u64)current_cycle); - } - } -} - //////////////////////////////// //~ Win32 time @@ -1254,7 +94,7 @@ P_W32_Window *P_W32_AllocWindow(void) * created and receive a HWND, because on Windows a the event proc must run on * the same thread that created the window. */ P_CounterAdd(&window->ready_fence, 1); - window->window_thread = P_W32_AllocThread(&P_W32_WindowThreadEntryFunc, window, Lit("Window thread"), PROF_THREAD_GROUP_WINDOW); + window->window_thread = P_W32_AllocThread(&W32_WindowThreadEntryFunc, window, Lit("Window thread"), PROF_THREAD_GROUP_WINDOW); P_WaitOnCounter(&window->ready_fence); return window; @@ -1462,7 +302,7 @@ void P_W32_UpdateWindowFromSettings(P_W32_Window *window, P_WindowSettings *sett //////////////////////////////// //~ Win32 window thread -P_W32_ThreadDef(P_W32_WindowThreadEntryFunc, arg) +W32_ThreadDef(W32_WindowThreadEntryFunc, arg) { P_W32_Window *window = (P_W32_Window *)arg; @@ -1936,127 +776,7 @@ P_Address P_W32_PlatformAddressFromWin32Address(P_W32_Address ws_addr) } //////////////////////////////// -//~ Wait / wake - -void P_Wait(volatile void *addr, void *cmp, u32 size, i64 timeout_ns) -{ - P_W32_Fiber *fiber = P_W32_FiberFromId(FiberId()); - i16 parent_id = fiber->parent_id; - if (parent_id != 0) - { - *fiber->yield_param = (P_W32_YieldParam) { - .kind = P_W32_YieldKind_Wait, - .wait = { - .addr = addr, - .cmp = cmp, - .size = size, - .timeout_ns = timeout_ns - } - }; - P_W32_YieldFiber(fiber, P_W32_FiberFromId(parent_id)); - } - else - { - i32 timeout_ms = 0; - if (timeout_ns > 10000000000000000ll) - { - timeout_ms = INFINITE; - } - else if (timeout_ns != 0) - { - timeout_ms = timeout_ns / 1000000; - timeout_ms += (timeout_ms == 0) * SignF32(timeout_ns); - } - if (addr == 0) - { - Sleep(timeout_ms); - } - else - { - WaitOnAddress(addr, cmp, size, timeout_ms); - } - } -} - -void P_Wake(void *addr, i32 count) -{ - P_W32_WakeByAddress(addr, count); -} - -//////////////////////////////// -//~ Job - -void P_Run(i32 count, P_JobFunc *func, void *sig, P_Pool pool_kind, P_Priority priority, P_Counter *counter) -{ - __prof; - struct P_W32_SharedCtx *g = &P_W32_shared_ctx; - if (count > 0) - { - if (counter) - { - P_CounterAdd(counter, count); - } - P_W32_Fiber *fiber = P_W32_FiberFromId(FiberId()); - priority = ClampI32(priority, fiber->job_priority, P_Priority_Count - 1); /* A job cannot create a job with a higher priority than itself */ - if (pool_kind == P_Pool_Inherit) - { - pool_kind = fiber->job_pool; - } - P_W32_JobPool *pool = &g->job_pools[pool_kind]; - P_W32_JobQueue *queue = &pool->job_queues[priority]; - P_W32_LockTicketMutex(&queue->lock); - { - P_W32_JobInfo *info = 0; - if (queue->first_free) - { - info = queue->first_free; - queue->first_free = info->next; - } - else - { - info = PushStructNoZero(queue->arena, P_W32_JobInfo); - } - ZeroStruct(info); - info->count = count; - info->func = func; - info->sig = sig; - info->counter = counter; - if (queue->last) - { - queue->last->next = info; - } - else - { - queue->first = info; - } - queue->last = info; - } - P_W32_UnlockTicketMutex(&queue->lock); - - /* Wake workers */ - { - P_W32_LockTicketMutex(&pool->workers_wake_lock); - { - Atomic64FetchAdd(&pool->num_jobs_in_queue.v, count); - if (count >= P_W32_WakeAllThreshold) - { - WakeByAddressAll(&pool->num_jobs_in_queue); - } - else - { - for (i32 i = 0; i < count; ++i) - { - WakeByAddressSingle(&pool->num_jobs_in_queue); - } - } - } - P_W32_UnlockTicketMutex(&pool->workers_wake_lock); - } - } -} - -//////////////////////////////// -//~ Time +//~ @hookdef Time hooks P_DateTime P_LocalTime(void) { @@ -2075,8 +795,9 @@ i64 P_TimeNs(void) } //////////////////////////////// -//~ File system +//~ @hookdef File system hooks +//- FIle system helpers String P_GetWritePath(Arena *arena) { u16 *p = 0; @@ -2157,6 +878,7 @@ void P_MkDir(String path) EndScratch(scratch); } +//- File creation P_File P_OpenFileRead(String path) { __prof; @@ -2267,6 +989,7 @@ void P_CloseFIle(P_File file) } } +//- File data manipulation String P_ReadFile(Arena *arena, P_File file) { __prof; @@ -2320,6 +1043,7 @@ void P_WriteFile(P_File file, String data) ); } +//- File info u64 P_GetFileSize(P_File file) { LARGE_INTEGER li_file_size; @@ -2365,7 +1089,7 @@ P_FileTime P_GetFileTime(P_File file) } //////////////////////////////// -//~ File map +//~ @hookdef File map hooks P_FileMap P_OpenFileMap(P_File file) { @@ -2433,7 +1157,7 @@ String P_GetFileMapData(P_FileMap map) } //////////////////////////////// -//~ Watch +//~ @hookdef Watch hooks P_Watch *P_AllocWatch(String dir_path) { @@ -2622,7 +1346,7 @@ void P_WakeWatch(P_Watch *dw) } //////////////////////////////// -//~ Window +//~ @hookdef Window hooks P_Window *P_AllocWindow(void) { @@ -2766,7 +1490,7 @@ u64 P_GetInternalWindowHandle(P_Window *p_window) } //////////////////////////////// -//~ Address +//~ @hookdef Address helper hooks P_Address P_AddressFromIpPortCstr(char *ip_cstr, char *port_cstr) { @@ -2968,7 +1692,7 @@ b32 P_AddressIsEqual(P_Address a, P_Address b) } //////////////////////////////// -//~ Sock +//~ @hookdef Sock hooks P_Sock *P_AllocSock(u16 listen_port, u64 sndbuf_size, u64 rcvbuf_size) { @@ -3080,7 +1804,7 @@ void P_WriteSock(P_Sock *sock, P_Address address, String data) } //////////////////////////////// -//~ Util +//~ @hookdef Utility hooks void P_MessageBox(P_MessageBoxKind kind, String message) { @@ -3178,7 +1902,7 @@ i64 P_GetCurrentSchedulerPeriodNs(void) } //////////////////////////////// -//~ Sleep +//~ @hookdef Sleep hooks void P_SleepPrecise(i64 sleep_time_ns) { @@ -3204,7 +1928,7 @@ void P_SleepPrecise(i64 sleep_time_ns) __profn("Sleep spin"); while (now_ns < target_ns) { - IxPause(); + _mm_pause(); now_ns = P_TimeNs(); } } @@ -3225,7 +1949,7 @@ void P_SleepFrame(i64 last_frame_time_ns, i64 target_dt_ns) } //////////////////////////////// -//~ Exit +//~ @hookdef Exit hooks void P_OnExit(P_ExitFunc *func) { @@ -3349,7 +2073,7 @@ void P_W32_InitBtnTable(void) g->vk_btn_table[VK_OEM_1] = P_Btn_Semicolon; } -P_JobDef(P_W32_AppStartupJob, UNUSED job) +JobDef(P_W32_AppStartupJob, UNUSED job) { P_W32_SharedCtx *g = &P_W32_shared_ctx; TempArena scratch = BeginScratchNoConflict(); @@ -3361,7 +2085,7 @@ P_JobDef(P_W32_AppStartupJob, UNUSED job) EndScratch(scratch); } -P_JobDef(P_W32_AppShutdownJob, _) +JobDef(P_W32_AppShutdownJob, _) { __prof; P_W32_SharedCtx *g = &P_W32_shared_ctx; @@ -3476,28 +2200,8 @@ int CALLBACK wWinMain(_In_ HINSTANCE instance, _In_opt_ HINSTANCE prev_instance, g->timer_start_qpc = qpc.QuadPart; } - /* Init fibers */ - g->num_fibers = 1; /* Fiber at index 0 always nil */ - g->fiber_names_arena = AllocArena(Gibi(64)); - - /* Init wait lists */ - g->wait_lists_arena = AllocArena(Gibi(64)); - /* Convert main thread to fiber */ - P_W32_AllocFiber(0); - - /* Init job pools */ - for (P_Pool pool_kind = 0; pool_kind < (i32)countof(g->job_pools); ++pool_kind) - { - P_W32_JobPool *pool = &g->job_pools[pool_kind]; - - /* Init queues */ - for (P_Priority priority = 0; priority < (i32)countof(pool->job_queues); ++priority) - { - P_W32_JobQueue *queue = &pool->job_queues[priority]; - queue->arena = AllocArena(Gibi(64)); - } - } + W32_AllocFiber(0); u64 cmdline_len = WstrLen(cmdline_wstr, countof(g->cmdline_args_wstr) - 1); CopyBytes(g->cmdline_args_wstr, cmdline_wstr, cmdline_len * sizeof(*cmdline_wstr)); @@ -3562,82 +2266,12 @@ int CALLBACK wWinMain(_In_ HINSTANCE instance, _In_opt_ HINSTANCE prev_instance, WSAStartup(MAKEWORD(2, 2), &g->wsa_data); g->socks_arena = AllocArena(Gibi(64)); - /* Start job scheduler */ - Atomic64FetchSet(&g->current_scheduler_cycle_period_ns.v, P_W32_DefaultSchedulerPeriodNs); - P_W32_Thread *scheduler_thread = P_W32_AllocThread(P_W32_JobSchedulerEntryFunc, 0, Lit("Scheduler thread"), PROF_THREAD_GROUP_SCHEDULER); - - //- Start job workers - /* TODO: Heuristic worker counts & affinities */ - { - __profn("Start job workers"); - for (P_Pool pool_kind = 0; pool_kind < (i32)countof(g->job_pools); ++pool_kind) - { - P_W32_JobPool *pool = &g->job_pools[pool_kind]; - String name_fmt = ZI; - i32 prof_group = PROF_THREAD_GROUP_FIBERS - Mebi(pool_kind); - switch (pool_kind) - { - default: Assert(0); break; - - case P_Pool_Sim: - { - name_fmt = Lit("Sim worker #%F"); - pool->num_worker_threads = 4; - pool->thread_affinity_mask = 0x000000000000000Full; - pool->thread_priority = THREAD_PRIORITY_TIME_CRITICAL; - } break; - - case P_Pool_User: - { - name_fmt = Lit("User worker #%F"); - pool->num_worker_threads = 4; - pool->thread_affinity_mask = 0x00000000000000F0ull; - pool->thread_priority = THREAD_PRIORITY_TIME_CRITICAL; - } break; - - case P_Pool_Audio: - { - name_fmt = Lit("Audio worker #%F"); - pool->num_worker_threads = 2; - pool->thread_affinity_mask = 0x0000000000000300ull; - pool->thread_priority = THREAD_PRIORITY_TIME_CRITICAL; - pool->thread_is_audio = 1; - } break; - - case P_Pool_Background: - { - name_fmt = Lit("Background worker #%F"); - pool->num_worker_threads = 2; - pool->thread_affinity_mask = 0x0000000000000C00ull; - } break; - - case P_Pool_Floating: - { - name_fmt = Lit("Floating worker #%F"); - pool->num_worker_threads = 8; - pool->thread_affinity_mask = 0x0000000000000FFFull; - } break; - } - pool->worker_threads_arena = AllocArena(Gibi(64)); - pool->worker_threads = PushStructs(pool->worker_threads_arena, P_W32_Thread *, pool->num_worker_threads); - pool->worker_contexts = PushStructs(pool->worker_threads_arena, P_W32_WorkerCtx, pool->num_worker_threads); - for (i32 i = 0; i < pool->num_worker_threads; ++i) - { - P_W32_WorkerCtx *ctx = &pool->worker_contexts[i]; - ctx->pool_kind = pool_kind; - ctx->id = i; - String name = StringFormat(pool->worker_threads_arena, name_fmt, FmtSint(i)); - pool->worker_threads[i] = P_W32_AllocThread(P_W32_JobWorkerEntryFunc, ctx, name, prof_group + i); - } - } - } - //- App startup /* Run app start job */ if (!Atomic32Fetch(&g->panicking)) { - P_Run(1, P_W32_AppStartupJob, 0, P_Pool_Floating, P_Priority_High, 0); + P_Run(1, P_W32_AppStartupJob, 0, PoolKind_Floating, PriorityKind_High, 0); } /* Wait for startup end or panic */ @@ -3665,7 +2299,7 @@ int CALLBACK wWinMain(_In_ HINSTANCE instance, _In_opt_ HINSTANCE prev_instance, /* Run exit callbacks job */ if (!Atomic32Fetch(&g->panicking)) { - P_Run(1, P_W32_AppShutdownJob, 0, P_Pool_Floating, P_Priority_High, 0); + P_Run(1, P_W32_AppShutdownJob, 0, PoolKind_Floating, PriorityKind_High, 0); } /* Wait for exit end or panic */ @@ -3678,66 +2312,6 @@ int CALLBACK wWinMain(_In_ HINSTANCE instance, _In_opt_ HINSTANCE prev_instance, WaitForMultipleObjects(countof(handles), handles, 0, INFINITE); } - /* Signal shutdown */ - if (!Atomic32Fetch(&g->panicking)) - { - Atomic32FetchSet(&g->shutdown, 1); - for (P_Pool pool_kind = 0; pool_kind < (i32)countof(g->job_pools); ++pool_kind) - { - P_W32_JobPool *pool = &g->job_pools[pool_kind]; - P_W32_LockTicketMutex(&pool->workers_wake_lock); - { - Atomic32FetchSet(&pool->workers_shutdown.v, 1); - Atomic64FetchSet(&pool->num_jobs_in_queue.v, -100000); - WakeByAddressAll(&pool->num_jobs_in_queue); - } - P_W32_UnlockTicketMutex(&pool->workers_wake_lock); - } - } - - /* Wait on worker threads */ - if (!Atomic32Fetch(&g->panicking)) - { - for (P_Pool pool_kind = 0; pool_kind < (i32)countof(g->job_pools); ++pool_kind) - { - P_W32_JobPool *pool = &g->job_pools[pool_kind]; - for (i32 i = 0; i < pool->num_worker_threads; ++i) - { - P_W32_Thread *worker_thread = pool->worker_threads[i]; - P_W32_WaitReleaseThread(worker_thread); - } - } - } - - /* Wait on scheduler thread */ - if (!Atomic32Fetch(&g->panicking)) - { - P_W32_WaitReleaseThread(scheduler_thread); - } - - /* Find any dangling threads that haven't exited gracefully by now */ - if (!Atomic32Fetch(&g->panicking)) - { - P_Lock lock = P_LockS(&g->threads_mutex); - if (g->first_thread) - { - TempArena scratch = BeginScratchNoConflict(); - u64 num_dangling_threads = 0; - String threads_msg = ZI; - threads_msg.text = PushDry(scratch.arena, u8); - for (P_W32_Thread *t = g->first_thread; t; t = t->next) - { - String name = StringFromCstr(t->thread_name_cstr, countof(t->thread_name_cstr)); - threads_msg.len += StringFormat(scratch.arena, Lit(" \"%F\"\n"), FmtString(name)).len; - ++num_dangling_threads; - } - threads_msg = StringFormat(scratch.arena, Lit("%F dangling thread(s):\n%F"), FmtUint(num_dangling_threads), FmtString(threads_msg)); - P_Panic(threads_msg); - EndScratch(scratch); - } - P_Unlock(&lock); - } - /* Exit */ i32 exit_code = 0; if (Atomic32Fetch(&g->panicking)) @@ -3749,7 +2323,8 @@ int CALLBACK wWinMain(_In_ HINSTANCE instance, _In_opt_ HINSTANCE prev_instance, return exit_code; } -//- CRT stub +//////////////////////////////// +//~ Crt stub #if !CrtlibIsEnabled diff --git a/src/platform/platform_win32.h b/src/platform/platform_win32.h index a1755548..e734b57a 100644 --- a/src/platform/platform_win32.h +++ b/src/platform/platform_win32.h @@ -17,198 +17,6 @@ # include #pragma warning(pop) -//////////////////////////////// -//~ Ticket mutex types - -Struct(P_W32_TicketMutex) -{ - Atomic64Padded ticket; - Atomic64Padded serving; -}; - -//////////////////////////////// -//~ Thread types - -#define P_W32_ThreadStackSize Kibi(64) -#define P_W32_ThreadDef(name, arg_name) void name(void *arg_name) -typedef P_W32_ThreadDef(P_W32_ThreadFunc, data); - -Struct(P_W32_Thread) -{ - P_W32_ThreadFunc *entry_point; - void *thread_data; - char thread_name_cstr[256]; - wchar_t thread_name_wstr[256]; - i32 profiler_group; - - P_W32_Thread *next; - P_W32_Thread *prev; - - HANDLE handle; -}; - -//////////////////////////////// -//~ Wait list types - -AlignedStruct(P_W32_WaitList, 64) -{ - u64 value; - i16 first_waiter; - i16 last_waiter; - i32 num_waiters; - P_W32_WaitList *next_in_bin; - P_W32_WaitList *prev_in_bin; -}; -StaticAssert(alignof(P_W32_WaitList) == 64); /* Avoid false sharing */ - -AlignedStruct(P_W32_WaitBin, 64) -{ - P_W32_WaitList *first_wait_list; - P_W32_WaitList *last_wait_list; - P_W32_WaitList *first_free_wait_list; - P_W32_TicketMutex lock; -}; -StaticAssert(alignof(P_W32_WaitBin) == 64); /* Avoid false sharing */ - -//////////////////////////////// -//~ Fiber types - -#define P_W32_FiberStackSize Mebi(4) -#define P_W32_FiberNamePrefixCstr "Fiber [" -#define P_W32_FiberNameSuffixCstr "]" -#define P_W32_FiberNameMaxSize 64 - -//- Yield param -typedef i32 P_W32_YieldKind; enum -{ - P_W32_YieldKind_None, - P_W32_YieldKind_Done, - P_W32_YieldKind_Wait, - - P_W32_YieldKind_Count -}; - -Struct(P_W32_YieldParam) -{ - P_W32_YieldKind kind; - union - { - struct - { - volatile void *addr; - void *cmp; - u32 size; - i64 timeout_ns; - } wait; - }; -}; - -//- Fiber -AlignedStruct(P_W32_Fiber, 64) -{ - /* ---------------------------------------------------- */ - void *addr; /* 08 bytes */ - /* ---------------------------------------------------- */ - char *name_cstr; /* 08 bytes */ - /* ---------------------------------------------------- */ - Atomic32 wake_lock; /* 04 bytes (4 byte alignment) */ - i16 id; /* 02 bytes */ - i16 parent_id; /* 02 bytes */ - /* ---------------------------------------------------- */ - u64 wait_addr; /* 08 bytes */ - /* ---------------------------------------------------- */ - u64 wait_time; /* 08 bytes */ - /* ---------------------------------------------------- */ - i16 next_addr_waiter; /* 02 bytes */ - i16 prev_addr_waiter; /* 02 bytes */ - i16 next_time_waiter; /* 02 bytes */ - i16 prev_time_waiter; /* 02 bytes */ - /* ---------------------------------------------------- */ - u8 _pad1[8]; /* 08 bytes (padding) */ - /* ---------------------------------------------------- */ - u8 _pad2[8]; /* 08 bytes (padding) */ - /* ---------------------------------------------------- */ - /* -------------------- Cache line -------------------- */ - /* ---------------------------------------------------- */ - P_JobFunc *job_func; /* 08 bytes */ - /* ---------------------------------------------------- */ - void *job_sig; /* 08 bytes */ - /* ---------------------------------------------------- */ - i32 job_id; /* 04 bytes */ - i16 job_pool; /* 02 bytes */ - i16 job_priority; /* 02 bytes */ - /* ---------------------------------------------------- */ - P_Counter *job_counter; /* 08 bytes */ - /* ---------------------------------------------------- */ - P_W32_YieldParam *yield_param; /* 08 bytes */ - /* ---------------------------------------------------- */ - u8 _pad3[24]; /* 24 bytes (padding) */ - -}; -StaticAssert(sizeof(P_W32_Fiber) == 128); /* Padding validation (increase if necessary) */ -StaticAssert(alignof(P_W32_Fiber) == 64); /* Avoid false sharing */ -StaticAssert(offsetof(P_W32_Fiber, wake_lock) % 4 == 0); /* Atomic must be aligned */ - -//////////////////////////////// -//~ Job queue types - -//- Worker ctx -AlignedStruct(P_W32_WorkerCtx, 64) -{ - P_Pool pool_kind; - i32 id; -}; - -//- Job info -Struct(P_W32_JobInfo) -{ - i32 num_dispatched; - - i32 count; - P_JobFunc *func; - void *sig; - P_Counter *counter; - - i16 fiber_id; /* If the job is being resumed from a yield */ - - P_W32_JobInfo *next; -}; - -//- Job queue -AlignedStruct(P_W32_JobQueue, 64) -{ - P_W32_TicketMutex lock; - Arena *arena; - - P_W32_JobInfo *first; - P_W32_JobInfo *last; - - P_W32_JobInfo *first_free; -}; - -//- Job pool -AlignedStruct(P_W32_JobPool, 64) -{ - /* Jobs */ - P_W32_JobQueue job_queues[P_Priority_Count]; - - P_W32_TicketMutex free_fibers_lock; - i16 first_free_fiber_id; - - /* Workers */ - Atomic32Padded workers_shutdown; - Atomic64Padded num_jobs_in_queue; - P_W32_TicketMutex workers_wake_lock; - - i32 num_worker_threads; - i32 thread_priority; - u64 thread_affinity_mask; - b32 thread_is_audio; - Arena *worker_threads_arena; - P_W32_Thread **worker_threads; - P_W32_WorkerCtx *worker_contexts; -}; - //////////////////////////////// //~ Window types @@ -227,7 +35,7 @@ Struct(P_W32_Window) u32 flags; HWND hwnd; - P_Counter ready_fence; + Counter ready_fence; u16 utf16_high_surrogate_last_input; @@ -253,7 +61,7 @@ Struct(P_W32_Window) i32 current_event_arena_index; Arena *event_arenas[2]; - P_W32_Thread *window_thread; + W32_Thread *window_thread; Atomic32 shutdown; P_W32_Window *next_free; @@ -300,18 +108,8 @@ Struct(P_W32_Sock) #define P_W32_WindowClassName L"power_play_window_class" -/* Assume scheduler cycle is 20hz at start to be conservative */ -#define P_W32_DefaultSchedulerPeriodNs 50000000 -#define P_W32_NumRollingSchedulerPeriods 1000 - -#define P_W32_NumWaitAddrBins 16384 -#define P_W32_NumWaitTimeBins 1024 - #define P_W32_MaxOnExitFuncs 1024 -/* Arbitrary threshold for determining when to fall back from a looped WakeByAddressSingle to WakeByAddressAll */ -#define P_W32_WakeAllThreshold 16 - Struct(P_W32_SharedCtx) { SYSTEM_INFO info; @@ -333,13 +131,6 @@ Struct(P_W32_SharedCtx) //- Key lookup table P_Btn vk_btn_table[256]; - //- Worker thread pool - P_Mutex threads_mutex; - Arena *threads_arena; - P_W32_Thread *first_thread; - P_W32_Thread *last_thread; - P_W32_Thread *first_free_thread; - //- Watches pool P_Mutex watches_mutex; Arena *watches_arena; @@ -360,69 +151,10 @@ Struct(P_W32_SharedCtx) //- Exit funcs Atomic32 num_exit_funcs; P_ExitFunc *exit_funcs[P_W32_MaxOnExitFuncs]; - - //- Scheduler - Atomic64Padded current_scheduler_cycle; - Atomic64Padded current_scheduler_cycle_period_ns; - - //- Fibers - P_W32_TicketMutex fibers_lock; - i16 num_fibers; - Arena *fiber_names_arena; - P_W32_Fiber fibers[MaxFibers]; - - //- Wait lists - Atomic64Padded waiter_wake_gen; - P_W32_TicketMutex wait_lists_arena_lock; - Arena *wait_lists_arena; - - //- Wait tables - P_W32_WaitBin wait_addr_bins[P_W32_NumWaitAddrBins]; - P_W32_WaitBin wait_time_bins[P_W32_NumWaitTimeBins]; - - //- Job pools - P_W32_JobPool job_pools[P_Pool_Count]; }; extern P_W32_SharedCtx P_W32_shared_ctx; -//////////////////////////////// -//~ Ticket mutex operations - -void P_W32_LockTicketMutex(P_W32_TicketMutex *tm); -void P_W32_UnlockTicketMutex(P_W32_TicketMutex *tm); - -//////////////////////////////// -//~ Thread operations - -DWORD WINAPI P_W32_Win32ThreadProc(LPVOID vt); -P_W32_Thread *P_W32_AllocThread(P_W32_ThreadFunc *entry_point, void *thread_data, String thread_name, i32 profiler_group); -b32 P_W32_TryReleaseThread(P_W32_Thread *thread, f32 timeout_seconds); -void P_W32_WaitReleaseThread(P_W32_Thread *thread); - -//////////////////////////////// -//~ Wait list operations - -void P_W32_WakeLockedFibers(i32 num_fibers, P_W32_Fiber **fibers); -void P_W32_WakeByAddress(void *addr, i32 count); -void P_W32_WakeByTime(u64 time); - -//////////////////////////////// -//~ Fiber operations - -P_W32_Fiber *P_W32_AllocFiber(P_W32_JobPool *pool); -void P_W32_ReleaseFiber(P_W32_JobPool *pool, P_W32_Fiber *fiber); -ForceInline P_W32_Fiber *P_W32_FiberFromId(i16 id); -ForceNoInline void P_W32_FiberResume(P_W32_Fiber *fiber); -void P_W32_YieldFiber(P_W32_Fiber *fiber, P_W32_Fiber *parent_fiber); -void P_W32_FiberEntryPoint(void *id_ptr); - -//////////////////////////////// -//~ Workers - -P_W32_ThreadDef(P_W32_JobWorkerEntryFunc, worker_ctx_arg); -P_W32_ThreadDef(P_W32_JobSchedulerEntryFunc, _); - //////////////////////////////// //~ Time operations @@ -445,7 +177,7 @@ void P_W32_UpdateWindowFromSystem(P_W32_Window *window); void P_W32_UpdateWindowFromSettings(P_W32_Window *window, P_WindowSettings *settings); //- Window thread -P_W32_ThreadDef(P_W32_WindowThreadEntryFunc, arg); +W32_ThreadDef(P_W32_WindowThreadEntryFunc, arg); void P_W32_ProcessWindowEvent(P_W32_Window *window, P_WindowEvent event); void P_W32_WakeWindow(P_W32_Window *window); LRESULT CALLBACK P_W32_Win32WindowProc(HWND hwnd, UINT msg, WPARAM wparam, LPARAM lparam); @@ -461,5 +193,5 @@ P_Address P_W32_PlatformAddressFromWin32Address(P_W32_Address ws_addr); //~ Entry point void P_W32_InitBtnTable(void); -P_JobDef(P_W32_AppStartupJob, _); -P_JobDef(P_W32_AppShutdownJob, _); +JobDecl(P_W32_AppStartupJob, EmptySig); +JobDecl(P_W32_AppShutdownJob, EmptySig); diff --git a/src/playback/playback_win32.c b/src/playback/playback_win32.c index 8332e169..5581be39 100644 --- a/src/playback/playback_win32.c +++ b/src/playback/playback_win32.c @@ -16,7 +16,7 @@ PB_StartupReceipt PB_Startup(MIX_StartupReceipt *mixer_sr) LAX mixer_sr; PB_WSP_InitializeWasapi(); /* Start playback job */ - P_Run(1, PB_WSP_PlaybackJob, 0, P_Pool_Audio, P_Priority_High, &g->PB_WSP_PlaybackJob_counter); + P_Run(1, PB_WSP_PlaybackJob, 0, PoolKind_Audio, PriorityKind_High, &g->PB_WSP_PlaybackJob_counter); P_OnExit(&PB_WSP_Shutdown); return (PB_StartupReceipt) { 0 }; @@ -184,7 +184,7 @@ void PB_WSP_EndUpdate(PB_WSP_Buff *wspbuf, MIX_PcmF32 src) //////////////////////////////// //~ Playback job -P_JobDef(PB_WSP_PlaybackJob, _) +JobDef(PB_WSP_PlaybackJob, _) { __prof; PB_WSP_SharedState *g = &PB_WSP_shared_state; diff --git a/src/playback/playback_win32.h b/src/playback/playback_win32.h index 6288deee..cd5cad81 100644 --- a/src/playback/playback_win32.h +++ b/src/playback/playback_win32.h @@ -39,7 +39,7 @@ Struct(PB_WSP_SharedState) IAudioRenderClient *playback; WAVEFORMATEX *buffer_format; u32 buffer_frames; - P_Counter PB_WSP_PlaybackJob_counter; + Counter PB_WSP_PlaybackJob_counter; }; extern PB_WSP_SharedState PB_WSP_shared_state; @@ -49,8 +49,6 @@ extern PB_WSP_SharedState PB_WSP_shared_state; void PB_WSP_InitializeWasapi(void); P_ExitFuncDef(PB_WSP_Shutdown); -P_JobDef(PB_WSP_PlaybackJob, _); -P_ExitFuncDef(PB_WSP_Shutdown); //////////////////////////////// //~ Playback update @@ -61,4 +59,4 @@ void PB_WSP_EndUpdate(PB_WSP_Buff *wspbuf, MIX_PcmF32 src); //////////////////////////////// //~ Playback job -P_JobDef(PB_WSP_PlaybackJob, _); +JobDecl(PB_WSP_PlaybackJob, EmptySig); diff --git a/src/pp/pp_core.c b/src/pp/pp_core.c index c006c0d5..1cb33ef3 100644 --- a/src/pp/pp_core.c +++ b/src/pp/pp_core.c @@ -3,11 +3,9 @@ SharedUserState shared_user_state = ZI; //////////////////////////////// //~ Startup -UserStartupReceipt StartupUser(F_StartupReceipt *font_sr, - S_StartupReceipt *sprite_sr, +UserStartupReceipt StartupUser(S_StartupReceipt *sprite_sr, D_StartupReceipt *draw_sr, AC_StartupReceipt *asset_cache_sr, - SND_StartupReceipt *sound_sr, MIX_StartupReceipt *mixer_sr, SimStartupReceipt *sim_sr, String connect_address_str) @@ -28,7 +26,7 @@ UserStartupReceipt StartupUser(F_StartupReceipt *font_sr, g->real_time_ns = P_TimeNs(); /* TODO: Remove this */ - g->connect_address_str = CopyString(g->arena, connect_address_str); + g->connect_address_str = PushString(g->arena, connect_address_str); /* Initialize average dt to a reasonable value */ g->average_local_to_user_snapshot_publish_dt_ns = NsFromSeconds(1) / SIM_TICKS_PER_SECOND; @@ -57,8 +55,8 @@ UserStartupReceipt StartupUser(F_StartupReceipt *font_sr, P_ShowWindow(g->window); /* Start jobs */ - P_Run(1, UpdateUserJob, 0, P_Pool_User, P_Priority_High, &g->shutdown_job_counters); - P_Run(1, SimJob, 0, P_Pool_Sim, P_Priority_High, &g->shutdown_job_counters); + P_Run(1, UpdateUserJob, 0, PoolKind_User, PriorityKind_High, &g->shutdown_job_counters); + P_Run(1, SimJob, 0, PoolKind_Sim, PriorityKind_High, &g->shutdown_job_counters); P_OnExit(&ShutdownUser); return (UserStartupReceipt) { 0 }; @@ -124,7 +122,7 @@ void DrawDebugMovement(Entity *ent) } } -//- Debug string +//- Entity debug string String DebugStringFromEntity(Arena *arena, Entity *ent) { TempArena scratch = BeginScratch(arena); @@ -141,29 +139,29 @@ String DebugStringFromEntity(Arena *arena, Entity *ent) b32 receiving = sim_ent_has_prop(ent, SEPROP_SYNC_DST); if (transmitting & receiving) { - result.len += CopyString(arena, Lit(" networked (sending & receiving)")).len; + result.len += PushString(arena, Lit(" networked (sending & receiving)")).len; } else if (transmitting) { - result.len += CopyString(arena, Lit(" networked (sending)")).len; + result.len += PushString(arena, Lit(" networked (sending)")).len; } else if (receiving) { - result.len += CopyString(arena, Lit(" networked (receiving)")).len; + result.len += PushString(arena, Lit(" networked (receiving)")).len; } else { - result.len += CopyString(arena, Lit(" local")).len; + result.len += PushString(arena, Lit(" local")).len; } } - result.len += CopyString(arena, Lit("\n")).len; + result.len += PushString(arena, Lit("\n")).len; result.len += StringFormat(arena, Lit("owner: [%F]\n"), FmtUid(ent->owner.uid)).len; - result.len += CopyString(arena, Lit("\n")).len; + result.len += PushString(arena, Lit("\n")).len; { - result.len += CopyString(arena, Lit("props: 0x")).len; + result.len += PushString(arena, Lit("props: 0x")).len; for (u64 chunk_index = countof(ent->props); chunk_index-- > 0;) { u64 chunk = ent->props[chunk_index]; @@ -178,7 +176,7 @@ String DebugStringFromEntity(Arena *arena, Entity *ent) } } } - result.len += CopyString(arena, Lit("\n")).len; + result.len += PushString(arena, Lit("\n")).len; } if (!sim_ent_id_eq(ent->parent, SIM_ENT_ROOT_ID)) @@ -192,7 +190,7 @@ String DebugStringFromEntity(Arena *arena, Entity *ent) result.len += StringFormat(arena, Lit("next: [%F]\n"), FmtUid(ent->next.uid)).len; } - result.len += CopyString(arena, Lit("\n")).len; + result.len += PushString(arena, Lit("\n")).len; /* Pos */ Xform xf = sim_ent_get_xform(ent); @@ -216,8 +214,8 @@ String DebugStringFromEntity(Arena *arena, Entity *ent) } while (child->valid) { - result.len += CopyString(arena, Lit("\n---------------------------------\n")).len; - result.len += CopyString(arena, Lit("CHILD\n")).len; + result.len += PushString(arena, Lit("\n---------------------------------\n")).len; + result.len += PushString(arena, Lit("CHILD\n")).len; String child_text = DebugStringFromEntity(scratch.arena, child); result.len += IndentString(arena, child_text, 4).len; child = sim_ent_from_id(ss, child->next); @@ -240,7 +238,7 @@ P_LogEventCallbackFuncDef(ConsoleLogCallback, log) { ConsoleLog *clog = PushStruct(g->console_logs_arena, ConsoleLog); clog->level = log.level; - clog->msg = CopyString(g->console_logs_arena, log.msg); + clog->msg = PushString(g->console_logs_arena, log.msg); clog->datetime = log.datetime; clog->time_ns = log.time_ns; @@ -1922,94 +1920,94 @@ void UpdateUser(P_Window *window) text.text = PushDry(temp.arena, u8); #if BITBUFF_DEBUG - text.len += CopyString(temp.arena, Lit("(bitbuff debug enabled)")).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("(bitbuff debug enabled)")).len; + text.len += PushString(temp.arena, Lit("\n")).len; #endif text.len += StringFormat(temp.arena, Lit("blended world entities: %F/%F"), FmtUint(g->ss_blended->num_ents_allocated), FmtUint(g->ss_blended->num_ents_reserved)).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; text.len += StringFormat(temp.arena, Lit("blended world tick: %F"), FmtUint(g->ss_blended->tick)).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; text.len += StringFormat(temp.arena, Lit("blended world time: %F"), FmtFloat(SecondsFromNs(g->ss_blended->sim_time_ns))).len; - text.len += CopyString(temp.arena, Lit("\n")).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; text.len += StringFormat(temp.arena, Lit("average local sim publish dt: %F"), FmtFloat(SecondsFromNs(g->average_local_to_user_snapshot_publish_dt_ns))).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; text.len += StringFormat(temp.arena, Lit("local sim last known tick: %F"), FmtUint(g->local_sim_last_known_tick)).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; text.len += StringFormat(temp.arena, Lit("local sim last known time: %F"), FmtFloat(SecondsFromNs(g->local_sim_last_known_time_ns))).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; text.len += StringFormat(temp.arena, Lit("local sim predicted time: %F"), FmtFloat(SecondsFromNs(g->local_sim_predicted_time_ns))).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; text.len += StringFormat(temp.arena, Lit("render time target: %F"), FmtFloat(SecondsFromNs(g->render_time_target_ns))).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; text.len += StringFormat(temp.arena, Lit("render time: %F"), FmtFloat(SecondsFromNs(g->render_time_ns))).len; - text.len += CopyString(temp.arena, Lit("\n")).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; text.len += StringFormat(temp.arena, Lit("local player: [%F]"), FmtUid(local_player->id.uid)).len; - text.len += CopyString(temp.arena, Lit("\n")).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; Vec2 world_cursor = g->world_cursor; text.len += StringFormat(temp.arena, Lit("cursor world: %F, %F"), FmtFloat(world_cursor.x), FmtFloat(world_cursor.y)).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; Vec2I32 world_tile_cursor = sim_world_tile_index_from_pos(world_cursor); text.len += StringFormat(temp.arena, Lit("cursor world tile: %F, %F"), FmtSint(world_tile_cursor.x), FmtSint(world_tile_cursor.y)).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; Vec2I32 local_tile_cursor = sim_local_tile_index_from_world_tile_index(world_tile_cursor); text.len += StringFormat(temp.arena, Lit("cursor local tile: %F, %F"), FmtSint(local_tile_cursor.x), FmtSint(local_tile_cursor.y)).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; Vec2I32 tile_chunk_cursor = sim_tile_chunk_index_from_world_tile_index(world_tile_cursor); text.len += StringFormat(temp.arena, Lit("cursor tile chunk: %F, %F"), FmtSint(tile_chunk_cursor.x), FmtSint(tile_chunk_cursor.y)).len; - text.len += CopyString(temp.arena, Lit("\n")).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; text.len += StringFormat(temp.arena, Lit("Network read: %F mbit/s"), FmtFloat((f64)g->net_bytes_read.last_second * 8 / 1000 / 1000)).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; text.len += StringFormat(temp.arena, Lit("Network write: %F mbit/s"), FmtFloat((f64)g->net_bytes_sent.last_second * 8 / 1000 / 1000)).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; text.len += StringFormat(temp.arena, Lit("Ping (real): %F ms"), FmtFloat(SecondsFromNs(local_player->player_last_rtt_ns) * 1000)).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; text.len += StringFormat(temp.arena, Lit("Ping (average): %F ms"), FmtFloat(local_player->player_average_rtt_seconds * 1000)).len; - text.len += CopyString(temp.arena, Lit("\n")).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; text.len += StringFormat(temp.arena, Lit("Memory committed: %F MiB"), FmtFloat((f64)GetGstat(GSTAT_MEMORY_COMMITTED) / 1024 / 1024)).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; text.len += StringFormat(temp.arena, Lit("Virtual memory reserved: %F TiB"), FmtFloat((f64)GetGstat(GSTAT_MEMORY_RESERVED) / 1024 / 1024 / 1024 / 1024)).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; text.len += StringFormat(temp.arena, Lit("Arenas allocated: %F"), FmtUint(GetGstat(GSTAT_NUM_ARENAS))).len; - text.len += CopyString(temp.arena, Lit("\n")).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; text.len += StringFormat(temp.arena, Lit("Video memory (GPU): %F MiB"), FmtFloat((f64)vram.local_used / 1024 / 1024)).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; text.len += StringFormat(temp.arena, Lit("Video memory (shared): %F MiB"), FmtFloat((f64)vram.non_local_used / 1024 / 1024)).len; - //text.len += CopyString(temp.arena, Lit("\n")).len; - //text.len += CopyString(temp.arena, Lit("\n")).len; + //text.len += PushString(temp.arena, Lit("\n")).len; + //text.len += PushString(temp.arena, Lit("\n")).len; #if RtcIsEnabled - text.len += CopyString(temp.arena, Lit("\n")).len; - text.len += CopyString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; + text.len += PushString(temp.arena, Lit("\n")).len; text.len += StringFormat(temp.arena, Lit("Debug steps: %F"), FmtUint(GetGstat(GSTAT_DEBUG_STEPS))).len; - //text.len += CopyString(temp.arena, Lit("\n")).len; + //text.len += PushString(temp.arena, Lit("\n")).len; #endif //draw_text(g->render_sig, font, pos, StringFormat(temp.arena, Lit("blended world entities: %F/%F"), FmtUint(g->ss_blended->num_ents_allocated), FmtUint(g->ss_blended->num_ents_reserved))); @@ -2068,7 +2066,7 @@ void UpdateUser(P_Window *window) //- User update job -P_JobDef(UpdateUserJob, _) +JobDef(UpdateUserJob, _) { SharedUserState *g = &shared_user_state; i64 time_ns = P_TimeNs(); @@ -2134,7 +2132,7 @@ void GenerateuserInputCmds(Client *user_input_client, u64 tick) //////////////////////////////// //~ Sim update -P_JobDef(SimJob, UNUSED job) +JobDef(SimJob, UNUSED job) { SharedUserState *g = &shared_user_state; #if 0 diff --git a/src/pp/pp_core.h b/src/pp/pp_core.h index 8ef4c076..3619327a 100644 --- a/src/pp/pp_core.h +++ b/src/pp/pp_core.h @@ -153,7 +153,7 @@ Struct(BindState) Struct(SharedUserState) { Atomic32 shutdown; - P_Counter shutdown_job_counters; + Counter shutdown_job_counters; P_Window *window; GPU_Swapchain *swapchain; @@ -253,11 +253,9 @@ extern SharedUserState shared_user_state; //~ Startup Struct(UserStartupReceipt) { i32 _; }; -UserStartupReceipt StartupUser(F_StartupReceipt *font_sr, - S_StartupReceipt *sprite_sr, +UserStartupReceipt StartupUser(S_StartupReceipt *sprite_sr, D_StartupReceipt *draw_sr, AC_StartupReceipt *asset_cache_sr, - SND_StartupReceipt *sound_sr, MIX_StartupReceipt *mixer_sr, SimStartupReceipt *sim_sr, String connect_address_str); @@ -289,7 +287,7 @@ MergesortCompareFuncDef(EntitySortCmp, arg_a, arg_b, _); //~ User update void UpdateUser(P_Window *window); -P_JobDef(UpdateUserJob, _); +JobDecl(UpdateUserJob, EmptySig); //////////////////////////////// //~ User input cmds @@ -299,4 +297,4 @@ void GenerateuserInputCmds(Client *user_input_client, u64 tick); //////////////////////////////// //~ Sim update -P_JobDef(SimJob, _); +JobDecl(SimJob, EmptySig); diff --git a/src/pp/pp_sim.h b/src/pp/pp_sim.h index 47e7eb33..b7b2a674 100644 --- a/src/pp/pp_sim.h +++ b/src/pp/pp_sim.h @@ -160,7 +160,7 @@ Struct(ControlData) { Vec2 move; /* Movement direction vector (speed of 0 -> 1) */ Vec2 focus; /* Focus direction vector (where does the controller want to look) */ Vec2 dbg_cursor; /* Where is the user's cursor in the world (used for things like editing the world) */ - u32 flags; + ControlFlag flags; }; typedef i32 CmdKind; enum { diff --git a/src/pp/pp_step.c b/src/pp/pp_step.c index bd3f84d0..2b571fd2 100644 --- a/src/pp/pp_step.c +++ b/src/pp/pp_step.c @@ -993,7 +993,7 @@ void sim_step(SimStepCtx *ctx) ControlData *control = &player->player_control; *control = cmd_ent->cmd_control; { - u32 flags = control->flags; + ControlFlag flags = control->flags; player->player_cursor_pos = control->dbg_cursor; player->player_hovered_ent = cmd_ent->cmd_control_hovered_ent; @@ -1296,7 +1296,7 @@ void sim_step(SimStepCtx *ctx) if (sim_ent_has_prop(ent, SEPROP_CONTROLLED)) { ControlData *control = &ent->control; - u32 flags = control->flags; + ControlFlag flags = control->flags; if (flags & SIM_CONTROL_FLAG_FIRE) { Entity *equipped = sim_ent_from_id(world, ent->equipped); if (equipped->valid) { diff --git a/src/settings/settings_core.c b/src/settings/settings_core.c index c247a933..c9218d37 100644 --- a/src/settings/settings_core.c +++ b/src/settings/settings_core.c @@ -173,7 +173,7 @@ abort: } else { - *error_out = CopyString(arena, error); + *error_out = PushString(arena, error); } } diff --git a/src/sound/sound_core.c b/src/sound/sound_core.c index f5f09f3e..1c0f9fa8 100644 --- a/src/sound/sound_core.c +++ b/src/sound/sound_core.c @@ -1,60 +1,14 @@ -SND_SharedState SND_shared_state = ZI; - -//////////////////////////////// -//~ Startup - -SND_StartupReceipt SND_Startup(AC_StartupReceipt *asset_cache_sr) -{ - __prof; - SND_SharedState *g = &SND_shared_state; - LAX asset_cache_sr; - g->params.arena = AllocArena(Gibi(64)); - return (SND_StartupReceipt) { 0 }; -} - -//////////////////////////////// -//~ Job sig store - -SND_LoadAssetJobSig *SND_AllocJobSig(void) -{ - SND_SharedState *g = &SND_shared_state; - SND_LoadAssetJobSig *p = 0; - { - P_Lock lock = P_LockE(&g->params.mutex); - if (g->params.head_free) - { - p = g->params.head_free; - g->params.head_free = p->next_free; - } - else - { - p = PushStruct(g->params.arena, SND_LoadAssetJobSig); - } - P_Unlock(&lock); - } - return p; -} - -void SND_ReleaseJobSig(SND_LoadAssetJobSig *p) -{ - SND_SharedState *g = &SND_shared_state; - P_Lock lock = P_LockE(&g->params.mutex); - p->next_free = g->params.head_free; - g->params.head_free = p; - P_Unlock(&lock); -} - //////////////////////////////// //~ Load job -P_JobDef(SND_LoadAssetJob, job) +JobDef(SND_LoadAssetJob, job) { __prof; SND_LoadAssetJobSig *params = job.sig; TempArena scratch = BeginScratchNoConflict(); String path = STRING(params->path_len, (u8 *)params->path_cstr); AC_Asset *asset = params->asset; - u32 flags = params->flags; + SND_SoundFlag flags = params->flags; P_LogInfoF("Loading sound \"%F\"", FmtString(path)); i64 start_ns = P_TimeNs(); @@ -125,15 +79,13 @@ P_JobDef(SND_LoadAssetJob, job) AC_MarkReady(asset, sound); } - SND_ReleaseJobSig(params); - EndScratch(scratch); } //////////////////////////////// //~ Load sound -AC_Asset *SND_LoadAsset(String path, u32 flags, b32 wait) +AC_Asset *SND_LoadAsset(String path, SND_SoundFlag flags, b32 wait) { __prof; TempArena scratch = BeginScratchNoConflict(); @@ -164,7 +116,7 @@ AC_Asset *SND_LoadAsset(String path, u32 flags, b32 wait) /* PushStruct task */ AC_MarkLoading(asset); - P_Run(1, SND_LoadAssetJob, params, P_Pool_Background, P_Priority_Low, &asset->counter); + P_Run(1, SND_LoadAssetJob, params, PoolKind_Background, PriorityKind_Low, &asset->counter); if (wait) { AC_WaitOnAssetReady(asset); @@ -175,7 +127,7 @@ AC_Asset *SND_LoadAsset(String path, u32 flags, b32 wait) return asset; } -SND_Sound *SND_LoadSoundAsync(String path, u32 flags) +SND_Sound *SND_LoadSoundAsync(String path, SND_SoundFlag flags) { __prof; AC_Asset *asset = SND_LoadAsset(path, flags, 0); @@ -183,7 +135,7 @@ SND_Sound *SND_LoadSoundAsync(String path, u32 flags) return sound; } -SND_Sound *SND_LoadSoundWait(String path, u32 flags) +SND_Sound *SND_LoadSoundWait(String path, SND_SoundFlag flags) { __prof; AC_Asset *asset = SND_LoadAsset(path, flags, 1); diff --git a/src/sound/sound_core.h b/src/sound/sound_core.h index 1e0561ab..31e6eb9e 100644 --- a/src/sound/sound_core.h +++ b/src/sound/sound_core.h @@ -1,5 +1,5 @@ //////////////////////////////// -//~ Sound structs +//~ Sound types #define SND_SampleRate 48000 @@ -11,57 +11,15 @@ typedef u32 SND_SoundFlag; enum Struct(SND_Sound) { - u32 flags; + SND_SoundFlag flags; u64 samples_count; i16 *samples; }; -//////////////////////////////// -//~ Sound job types - -Struct(SND_LoadAssetJobSig) -{ - SND_LoadAssetJobSig *next_free; - - u32 flags; - AC_Asset *asset; - u64 path_len; - char path_cstr[1024]; -}; - -Struct(SND_LoadAssetJobSigStore) -{ - SND_LoadAssetJobSig *head_free; - Arena *arena; - P_Mutex mutex; -}; - -//////////////////////////////// -//~ Shared state - -Struct(SND_SharedState) -{ - SND_LoadAssetJobSigStore params; -}; - -extern SND_SharedState SND_shared_state; - -//////////////////////////////// -//~ Startup - -Struct(SND_StartupReceipt) { i32 _; }; -SND_StartupReceipt SND_Startup(AC_StartupReceipt *asset_cache_sr); - -//////////////////////////////// -//~ Sound load job - -SND_LoadAssetJobSig *SND_AllocJobSig(void); -void SND_ReleaseJobSig(SND_LoadAssetJobSig *p); - //////////////////////////////// //~ Sound load operations -P_JobDef(SND_LoadAssetJob, job); -AC_Asset *SND_LoadAsset(String path, u32 flags, b32 wait); -SND_Sound *SND_LoadSoundAsync(String path, u32 flags); -SND_Sound *SND_LoadSoundWait(String path, u32 flags); +JobDecl(SND_LoadAssetJob, { SND_SoundFlag flags; AC_Asset *asset; String path; }); +AC_Asset *SND_LoadAsset(String path, SND_SoundFlag flags, b32 wait); +SND_Sound *SND_LoadSoundAsync(String path, SND_SoundFlag flags); +SND_Sound *SND_LoadSoundWait(String path, SND_SoundFlag flags); diff --git a/src/sprite/sprite_core.c b/src/sprite/sprite_core.c index 47d6a15b..3c65487e 100644 --- a/src/sprite/sprite_core.c +++ b/src/sprite/sprite_core.c @@ -44,7 +44,7 @@ S_StartupReceipt S_Startup(void) g->scopes_arena = AllocArena(Gibi(64)); - P_Run(1, S_EvictorJob, 0, P_Pool_Background, P_Priority_Low, &g->shutdown_counter); + P_Run(1, S_EvictorJob, 0, PoolKind_Background, PriorityKind_Low, &g->shutdown_counter); P_OnExit(&S_Shutdown); #if RESOURCE_RELOADING @@ -188,7 +188,7 @@ S_Sheet S_SheetFromAseResult(Arena *arena, ASE_DecodedSheet ase) u64 index = 0; for (ASE_Span *ase_span = ase.span_head; ase_span; ase_span = ase_span->next) { - String name = CopyString(arena, ase_span->name); + String name = PushString(arena, ase_span->name); S_Span *span = &sheet.spans[index]; span->name = name; span->start = ase_span->start; @@ -265,7 +265,7 @@ S_Sheet S_SheetFromAseResult(Arena *arena, ASE_DecodedSheet ase) for (struct temp_slice_group_node *temp_slice_group_node = temp_slice_group_head; temp_slice_group_node; temp_slice_group_node = temp_slice_group_node->next) { S_SheetSliceGroup *slice_group = &sheet.slice_groups[index]; - slice_group->name = CopyString(arena, temp_slice_group_node->name); + slice_group->name = PushString(arena, temp_slice_group_node->name); slice_group->per_frame_count = temp_slice_group_node->per_frame_count; slice_group->frame_slices = PushStructs(arena, S_Slice, ase.num_frames * slice_group->per_frame_count); @@ -422,7 +422,7 @@ S_Sheet S_SheetFromAseResult(Arena *arena, ASE_DecodedSheet ase) //~ Load job //- Job def -P_JobDef(S_LoadSpriteJob, job) +JobDef(S_LoadSpriteJob, job) { __prof; S_SharedState *g = &S_shared_state; @@ -483,7 +483,7 @@ void S_PushLoadJob(S_CacheEntryRef ref, S_Tag tag) } /* PushStruct work */ - P_Run(1, S_LoadSpriteJob, cmd, P_Pool_Background, P_Priority_Inherit, 0); + P_Run(1, S_LoadSpriteJob, cmd, PoolKind_Background, PriorityKind_Inherit, 0); } //////////////////////////////// @@ -998,7 +998,7 @@ void *S_DataFromTag(S_Scope *scope, S_Tag tag, S_CacheEntryKind kind, b32 await) { while (Atomic32Fetch(&ref.e->state) != S_CacheEntryState_Loaded) { - IxPause(); + _mm_pause(); } } @@ -1189,7 +1189,7 @@ MergesortCompareFuncDef(S_EvictorSortCmp, arg_a, arg_b, _) * - The cache is over its memory budget and the node's last reference is longer ago than the grace period * - Resource reloading is enabled and the node is out of date due to a change to its original resource file */ -P_JobDef(S_EvictorJob, _) +JobDef(S_EvictorJob, _) { S_SharedState *g = &S_shared_state; b32 shutdown = 0; diff --git a/src/sprite/sprite_core.h b/src/sprite/sprite_core.h index 3ccceaf1..6803ce6e 100644 --- a/src/sprite/sprite_core.h +++ b/src/sprite/sprite_core.h @@ -188,18 +188,6 @@ Struct(S_Scope) S_Scope *next_free; }; -//////////////////////////////// -//~ Cmd - -Struct(S_Cmd) -{ - S_Cmd *next_free; - S_Scope *scope; - S_CacheEntryRef ref; - S_Tag tag; - u8 tag_path_buff[512]; -}; - //////////////////////////////// //~ Evictor @@ -249,11 +237,6 @@ Struct(S_SharedState) /* Cache */ S_Cache cache; - /* Cmds */ - P_Mutex cmds_mutex; - Arena *cmds_arena; - S_Cmd *first_free_cmd; - /* Scopes */ P_Mutex scopes_mutex; Arena *scopes_arena; @@ -261,7 +244,7 @@ Struct(S_SharedState) /* Evictor */ Atomic32Padded evictor_cycle; - P_Counter shutdown_counter; + Counter shutdown_counter; b32 evictor_scheduler_shutdown; P_Mutex evictor_scheduler_mutex; P_Cv evictor_scheduler_shutdown_cv; @@ -302,7 +285,7 @@ S_Sheet S_SheetFromAseResult(Arena *arena, ASE_DecodedSheet ase); //~ Load job void S_PushLoadJob(S_CacheEntryRef ref, S_Tag tag); -P_JobDef(S_LoadSpriteJob, job); +JobDecl(S_LoadSpriteJob, { S_Scope *scope; S_CacheEntryRef ref; S_Tag tag; }); //////////////////////////////// //~ Cache load operations @@ -365,4 +348,4 @@ W_CallbackFuncDef(S_WatchSpriteCallback, name); //~ Evictor job MergesortCompareFuncDef(S_EvictorSortCmp, arg_a, arg_b, udata); -P_JobDef(S_EvictorJob, _); +JobDecl(S_EvictorJob, EmptySig); diff --git a/src/watch/watch_core.c b/src/watch/watch_core.c index cb0d60be..2186ff9e 100644 --- a/src/watch/watch_core.c +++ b/src/watch/watch_core.c @@ -10,8 +10,8 @@ void W_Startup(void) g->watch_events_arena = AllocArena(Gibi(64)); - P_Run(1, W_MonitorJob, 0, P_Pool_Floating, P_Priority_Low, &g->watch_jobs_counter); - P_Run(1, W_DispatcherJob, 0, P_Pool_Background, P_Priority_Low, &g->watch_jobs_counter); + P_Run(1, W_MonitorJob, 0, PoolKind_Floating, PriorityKind_Low, &g->watch_jobs_counter); + P_Run(1, W_DispatcherJob, 0, PoolKind_Background, PriorityKind_Low, &g->watch_jobs_counter); P_OnExit(&W_Shutdown); } @@ -49,7 +49,7 @@ void W_RegisterCallback(W_CallbackFunc *callback) P_Unlock(&lock); } -P_JobDef(W_RunCallbackJob, job) +JobDef(W_RunCallbackJob, job) { __prof; W_RunCallbackJobSig *sig = job.sig; @@ -65,7 +65,7 @@ P_JobDef(W_RunCallbackJob, job) * & dispatching watch callbacks into two separate jobs so that we can delay * the dispatch, allowing for deduplication of file modification notifications. */ -P_JobDef(W_MonitorJob, _) +JobDef(W_MonitorJob, _) { TempArena scratch = BeginScratchNoConflict(); W_SharedState *g = &W_shared_state; @@ -98,7 +98,7 @@ P_JobDef(W_MonitorJob, _) if (!ignore) { W_Event *e = PushStruct(g->watch_events_arena, W_Event); - e->name = CopyString(g->watch_events_arena, name_src); + e->name = PushString(g->watch_events_arena, name_src); if (g->last_watch_event) { g->last_watch_event->next = e; @@ -123,7 +123,7 @@ P_JobDef(W_MonitorJob, _) //////////////////////////////// //~ Dispatcher job -P_JobDef(W_DispatcherJob, _) +JobDef(W_DispatcherJob, _) { W_SharedState *g = &W_shared_state; @@ -147,7 +147,7 @@ P_JobDef(W_DispatcherJob, _) for (W_Event *src_event = g->first_watch_event; src_event; src_event = src_event->next) { W_Event *e = PushStruct(scratch.arena, W_Event); - e->name = CopyString(scratch.arena, src_event->name); + e->name = PushString(scratch.arena, src_event->name); if (last_watch_event) { last_watch_event->next = e; @@ -200,8 +200,8 @@ P_JobDef(W_DispatcherJob, _) W_RunCallbackJobSig sig = ZI; sig.name = e->name; sig.callbacks = callbacks; - P_Counter counter = ZI; - P_Run(num_callbacks, W_RunCallbackJob, &sig, P_Pool_Background, P_Priority_Low, &counter); + Counter counter = ZI; + P_Run(num_callbacks, W_RunCallbackJob, &sig, PoolKind_Background, PriorityKind_Low, &counter); P_WaitOnCounter(&counter); } } diff --git a/src/watch/watch_core.h b/src/watch/watch_core.h index 982b3aa0..6aadb871 100644 --- a/src/watch/watch_core.h +++ b/src/watch/watch_core.h @@ -29,7 +29,7 @@ Struct(W_SharedState) { P_Watch *watch; Atomic32 W_Shutdown; - P_Counter watch_jobs_counter; + Counter watch_jobs_counter; P_Mutex watch_dispatcher_mutex; Arena *watch_events_arena; @@ -58,10 +58,10 @@ void W_RegisterCallback(W_CallbackFunc *callback); //////////////////////////////// //~ Callback job -P_JobDef(W_RunCallbackJob, job); +JobDecl(W_RunCallbackJob, EmptySig); //////////////////////////////// //~ Long running jobs -P_JobDef(W_MonitorJob, job); -P_JobDef(W_DispatcherJob, job); +JobDecl(W_MonitorJob, { i32 _; }); +JobDecl(W_DispatcherJob, { i32 _; });