diff --git a/src/common.h b/src/common.h index c1c802c4..b261a13c 100644 --- a/src/common.h +++ b/src/common.h @@ -678,10 +678,7 @@ INLINE f64 clamp_f64(f64 v, f64 min, f64 max) { return v < min ? min : v > max ? #include "prof_tracy.h" -#define PROF_THREAD_GROUP_WORKERS_DEDICATED -MEBI(10) -#define PROF_THREAD_GROUP_FIBERS -MEBI(9) -#define PROF_THREAD_GROUP_WORKERS_BACKGROUND -MEBI(8) -#define PROF_THREAD_GROUP_WORKERS_BLOCKING -MEBI(7) +#define PROF_THREAD_GROUP_FIBERS -GIBI(1) #define PROF_THREAD_GROUP_SCHEDULER -MEBI(6) #define PROF_THREAD_GROUP_IO -MEBI(5) #define PROF_THREAD_GROUP_WINDOW -MEBI(4) diff --git a/src/gp_dx12.c b/src/gp_dx12.c index 7de6432d..b83fcd57 100644 --- a/src/gp_dx12.c +++ b/src/gp_dx12.c @@ -437,6 +437,7 @@ INTERNAL void dx12_init_device(void) u32 dxgi_factory_flags = 0; #if DX12_DEBUG { + __profn("Enable debug layer"); ID3D12Debug *debug_controller0 = 0; hr = D3D12GetDebugInterface(&IID_ID3D12Debug, (void **)&debug_controller0); if (FAILED(hr)) { @@ -461,27 +462,35 @@ INTERNAL void dx12_init_device(void) #endif /* Create factory */ - hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&G.factory); - if (FAILED(hr)) { - dx12_init_error(LIT("Failed to initialize DXGI factory")); + { + __profn("Create factory"); + hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&G.factory); + if (FAILED(hr)) { + dx12_init_error(LIT("Failed to initialize DXGI factory")); + } } /* Create device */ { + __profn("Create device"); IDXGIAdapter1 *adapter = 0; ID3D12Device *device = 0; struct string error = LIT("Could not initialize GPU device."); struct string first_gpu_name = ZI; u32 adapter_index = 0; while (1) { - hr = IDXGIFactory6_EnumAdapterByGpuPreference(G.factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter1, (void **)&adapter); + { + hr = IDXGIFactory6_EnumAdapterByGpuPreference(G.factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter1, (void **)&adapter); + } if (SUCCEEDED(hr)) { DXGI_ADAPTER_DESC1 desc; IDXGIAdapter1_GetDesc1(adapter, &desc); if (first_gpu_name.len == 0) { first_gpu_name = string_from_wstr_no_limit(scratch.arena, desc.Description); } - hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device); + { + hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device); + } if (SUCCEEDED(hr)) { break; } @@ -508,6 +517,7 @@ INTERNAL void dx12_init_device(void) #if DX12_DEBUG /* Enable D3D12 Debug break */ { + __profn("Enable d3d12 debug break"); ID3D12InfoQueue *info = 0; hr = ID3D12Device_QueryInterface(G.device, &IID_ID3D12InfoQueue, (void **)&info); if (FAILED(hr)) { @@ -520,6 +530,7 @@ INTERNAL void dx12_init_device(void) /* Enable DXGI Debug break */ { + __profn("Enable dxgi debug break"); IDXGIInfoQueue *dxgi_info = 0; hr = DXGIGetDebugInterface1(0, &IID_IDXGIInfoQueue, (void **)&dxgi_info); if (FAILED(hr)) { @@ -534,8 +545,8 @@ INTERNAL void dx12_init_device(void) #if PROFILING_D3D /* Enable stable power state */ { - b32 success = 1; __profn("Set stable power state"); + b32 success = 1; HKEY key = 0; success = RegOpenKeyExW(HKEY_LOCAL_MACHINE, L"SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\AppModelUnlock", 0, KEY_READ, &key) == ERROR_SUCCESS; if (success) { diff --git a/src/sprite.c b/src/sprite.c index 41158899..ae8b49a9 100644 --- a/src/sprite.c +++ b/src/sprite.c @@ -735,18 +735,17 @@ INTERNAL void refcount_add(struct cache_entry *e, i32 amount) i32 evictor_cycle = atomic32_fetch(&G.evictor_cycle.v); struct atomic64 *refcount_atomic = &e->refcount_struct.v; u64 old_refcount_uncast = atomic64_fetch(refcount_atomic); - do { + while (1) { struct cache_refcount new_refcount = *(struct cache_refcount *)&old_refcount_uncast; new_refcount.count += amount; new_refcount.last_ref_cycle = evictor_cycle; u64 v = atomic64_fetch_test_set(refcount_atomic, old_refcount_uncast, *(u64 *)&new_refcount); - if (v != old_refcount_uncast) { - old_refcount_uncast = v; - } else { + if (v == old_refcount_uncast) { ASSERT(new_refcount.count >= 0); break; } - } while (1); + old_refcount_uncast = v; + } } INTERNAL struct sprite_scope_cache_ref *scope_ensure_ref_unsafe(struct sprite_scope *scope, struct cache_entry *e) diff --git a/src/sys.h b/src/sys.h index 3fcb3454..313978e6 100644 --- a/src/sys.h +++ b/src/sys.h @@ -43,6 +43,13 @@ void sys_wake(void *addr, i32 count); i16 sys_current_fiber_id(void); +/* When a job reaches a 'sys_wait' statement, by default the fiber will yield to the job pool and potentially resume on a different thread. + * Call this function to disable this behavior for the remainder of the job, and instead force a blocking wait on the current thread. + * + * For example, a job that processes audio and runs until the end of the program can set thread priority and then call this function to + * ensure it will never switch off of the worker thread that it set thread priority for. */ +void sys_make_current_job_unyielding(void); + /* ========================== * * Job * ========================== */ @@ -61,7 +68,7 @@ enum sys_pool { /* This pool contains a large number of floating low priority worker threads with the intent that these threads will only block and do no actual work. * Blocking operations (e.g. opening a file) should be isolated to jobs that get pushed to this pool. They can then be yielded on by jobs in other pools that actually do work. */ - SYS_POOL_BLOCKING = 2, + SYS_POOL_FLOATING = 2, NUM_SYS_POOLS }; diff --git a/src/sys_win32.c b/src/sys_win32.c index 09db1523..47058154 100644 --- a/src/sys_win32.c +++ b/src/sys_win32.c @@ -162,10 +162,10 @@ struct alignas(64) fiber { /* ==================================================== */ char *name_cstr; /* 08 bytes */ /* ==================================================== */ - struct atomic16 wake_lock; /* 02 bytes (aligned) */ + struct atomic16 wake_lock; /* 02 bytes (4 byte alignment) */ i16 id; /* 02 bytes */ i16 parent_id; /* 02 bytes */ - i16 can_yield; /* 02 bytes */ + i16 unyielding; /* 02 bytes */ /* ==================================================== */ u64 wait_addr; /* 08 bytes */ /* ==================================================== */ @@ -179,12 +179,8 @@ struct alignas(64) fiber { u8 _pad0[8]; /* 08 bytes (padding) */ /* ==================================================== */ u8 _pad1[8]; /* 08 bytes (padding) */ - /* ==================================================== */ - /* ==================================================== */ /* ==================== Cache line ==================== */ - /* ==================================================== */ - /* ==================================================== */ struct sys_scratch_ctx scratch_ctx; /* 16 bytes */ /* ==================================================== */ @@ -396,7 +392,8 @@ i64 sys_current_scheduler_period_ns(void) void sys_wait(void *addr, void *cmp, u32 size, i64 timeout_ns) { struct fiber *fiber = fiber_from_id(sys_current_fiber_id()); - if (fiber->can_yield) { + i16 parent_id = fiber->parent_id; + if (parent_id != 0 && !fiber->unyielding) { *fiber->yield_param = (struct yield_param) { .kind = YIELD_KIND_WAIT, .wait = { @@ -406,8 +403,7 @@ void sys_wait(void *addr, void *cmp, u32 size, i64 timeout_ns) .timeout_ns = timeout_ns } }; - ASSERT(fiber->parent_id != 0); - job_fiber_yield(fiber, fiber_from_id(fiber->parent_id)); + job_fiber_yield(fiber, fiber_from_id(parent_id)); } else { i32 timeout_ms = 0; if (timeout_ns == I64_MAX) { @@ -762,11 +758,9 @@ INTERNAL struct fiber *fiber_alloc(struct job_pool *pool) /* Init win32 fiber */ if (pool != 0) { fiber->addr = CreateFiber(FIBER_STACK_SIZE, job_fiber_entry, (void *)(i64)fiber_id); - fiber->can_yield = 1; } else { /* Fiber is not a part of a job pool, convert thread to fiber */ fiber->addr = ConvertThreadToFiber((void *)(i64)fiber_id); - fiber->can_yield = 0; } } MEMZERO_STRUCT(&fiber->wake_lock); @@ -784,6 +778,7 @@ INTERNAL struct fiber *fiber_alloc(struct job_pool *pool) fiber->job_counter = 0; fiber->yield_param = 0; fiber->parent_id = 0; + fiber->unyielding = 0; return fiber; } @@ -815,6 +810,11 @@ i16 sys_current_fiber_id(void) return (i16)(i64)GetFiberData(); } +void sys_make_current_job_unyielding(void) +{ + fiber_from_id(sys_current_fiber_id())->unyielding = 1; +} + void sys_run(i32 count, sys_job_func *func, void *sig, enum sys_pool pool_kind, enum sys_priority priority, struct snc_counter *counter) { if (count > 0) { @@ -876,7 +876,7 @@ INTERNAL void job_fiber_yield(struct fiber *fiber, struct fiber *parent_fiber) MemoryBarrier(); SwitchToFiber(parent_fiber->addr); MemoryBarrier(); - __prof_fiber_enter(fiber->name_cstr, PROF_THREAD_GROUP_FIBERS + fiber->id); + __prof_fiber_enter(fiber->name_cstr, PROF_THREAD_GROUP_FIBERS + MEBI(fiber->job_pool) + fiber->id + 1); } } @@ -895,7 +895,7 @@ INTERNAL void job_fiber_entry(void *id_ptr) { i16 id = (i32)(i64)id_ptr; struct fiber *fiber = fiber_from_id(id); - __prof_fiber_enter(fiber->name_cstr, PROF_THREAD_GROUP_FIBERS + fiber->id); + __prof_fiber_enter(fiber->name_cstr, PROF_THREAD_GROUP_FIBERS + MEBI(fiber->job_pool) + fiber->id + 1); while (1) { /* Run job */ { @@ -1031,13 +1031,14 @@ INTERNAL SYS_THREAD_DEF(job_worker_entry, worker_ctx_arg) __profnc("Run fiber", RGB32_F(0.25, 0.75, 0)); __profvalue(job_fiber->id); struct yield_param yield = ZI; + job_fiber->parent_id = worker_fiber_id; + job_fiber->unyielding = 0; job_fiber->job_func = job_func; job_fiber->job_sig = job_sig; job_fiber->job_id = job_id; job_fiber->job_pool = pool_kind; job_fiber->job_priority = job_priority; job_fiber->job_counter = job_counter; - job_fiber->parent_id = worker_fiber_id; job_fiber->yield_param = &yield; b32 done = 0; while (!done) { @@ -1296,14 +1297,13 @@ INTERNAL SYS_THREAD_DEF(test_entry, _) for (enum sys_pool pool_kind = 0; pool_kind < (i32)countof(G.job_pools); ++pool_kind) { struct job_pool *pool = &G.job_pools[pool_kind]; struct string name_fmt = ZI; - i32 prof_group = 0; + i32 prof_group = PROF_THREAD_GROUP_FIBERS + MEBI(pool_kind); switch (pool_kind) { default: ASSERT(0); break; case SYS_POOL_DEDICATED: { name_fmt = LIT("Dedicated worker #%F"); - prof_group = PROF_THREAD_GROUP_WORKERS_DEDICATED; pool->thread_affinity_mask = 0xFFFFFFFFFFFFFFFFULL; pool->thread_priority = THREAD_PRIORITY_TIME_CRITICAL; pool->num_worker_threads = 4; @@ -1312,16 +1312,14 @@ INTERNAL SYS_THREAD_DEF(test_entry, _) case SYS_POOL_BACKGROUND: { name_fmt = LIT("Background worker #%F"); - prof_group = PROF_THREAD_GROUP_WORKERS_BACKGROUND; pool->thread_affinity_mask = 0xFFFFFFFFFFFFFFFFULL; pool->thread_priority = 0; pool->num_worker_threads = 4; } break; - case SYS_POOL_BLOCKING: + case SYS_POOL_FLOATING: { - name_fmt = LIT("Blocking worker #%F"); - prof_group = PROF_THREAD_GROUP_WORKERS_BLOCKING; + name_fmt = LIT("Floating worker #%F"); pool->thread_affinity_mask = 0xFFFFFFFFFFFFFFFFULL; pool->thread_priority = 0; pool->num_worker_threads = 4;