yield sleep testing

This commit is contained in:
jacob 2025-07-09 22:04:11 -05:00
parent cc51fe29a7
commit 1dd5bf90d0
8 changed files with 63 additions and 223 deletions

View File

@ -2768,8 +2768,6 @@ INTERNAL void present_blit(struct swapchain_buffer *dst, struct dx12_resource *s
void gp_present(struct sys_window *window, struct v2i32 backresolution, struct gp_resource *texture, struct xform texture_xf, i32 vsync)
{
__prof;
//sys_sleep(0.1);
struct swapchain *swapchain = &G.swapchain;
struct swapchain_buffer *swapchain_buffer = update_swapchain(swapchain, window, backresolution);
struct dx12_resource *texture_resource = (struct dx12_resource *)texture;
@ -2777,8 +2775,6 @@ void gp_present(struct sys_window *window, struct v2i32 backresolution, struct g
/* Blit */
present_blit(swapchain_buffer, texture_resource, texture_xf);
//sys_sleep(0.1);
u32 present_flags = 0;
if (!vsync) {
present_flags |= (DXGI_PRESENT_ALLOW_TEARING * DX12_ALLOW_TEARING);

View File

@ -7,7 +7,7 @@
#if PROFILING
#define PROFILING_SYSTEM_TRACE 0
#define PROFILING_SYSTEM_TRACE 1
#define PROFILING_CAPTURE_FRAME_IMAGE 0
#define PROFILING_LOCKS 0
#define PROFILING_D3D 1

View File

@ -247,7 +247,7 @@ INTERNAL SYS_THREAD_DEF(resource_watch_dispatcher_thread_entry_point, _)
{
__profn("Delay");
snc_unlock(&watch_dispatcher_lock);
sys_sleep(WATCH_DISPATCHER_DELAY_SECONDS);
sys_wait(NULL, NULL, 0, NS_FROM_SECONDS(WATCH_DISPATCHER_DELAY_SECONDS));
watch_dispatcher_lock = snc_lock_e(&G.watch_dispatcher_mutex);
}
if (!atomic_i32_fetch(&G.watch_shutdown)) {

View File

@ -152,6 +152,7 @@ GLOBAL struct {
/* Evictor */
struct atomic_i32 evictor_cycle;
struct snc_counter shutdown_counter;
b32 evictor_scheduler_shutdown;
struct snc_mutex evictor_scheduler_mutex;
struct snc_cv evictor_scheduler_shutdown_cv;
@ -248,7 +249,7 @@ struct sprite_startup_receipt sprite_startup(struct gp_startup_receipt *gp_sr,
G.scopes_arena = arena_alloc(GIBI(64));
sys_run(1, sprite_evictor_job, NULL, SYS_PRIORITY_BACKGROUND, NULL);
sys_run(1, sprite_evictor_job, NULL, SYS_PRIORITY_BACKGROUND, &G.shutdown_counter);
app_register_exit_callback(&sprite_shutdown);
resource_register_watch_callback(&sprite_resource_watch_callback);
@ -266,6 +267,8 @@ INTERNAL APP_EXIT_CALLBACK_FUNC_DEF(sprite_shutdown)
snc_cv_broadcast(&G.evictor_scheduler_shutdown_cv);
snc_unlock(&lock);
}
/* Wait for evictor shutdown */
snc_counter_wait(&G.shutdown_counter);
}
/* ========================== *

View File

@ -416,17 +416,6 @@ u32 sys_num_logical_processors(void);
void sys_exit(void);
void sys_panic(struct string msg);
/* ========================== *
* Sleep
* ========================== */
/* Sleep for precisely the amount of time specified (more cpu intensive) */
void sys_sleep_precise(f64 seconds);
/* Sleep for the amount of time specified rounded to the OS scheduler period
* (less cpu intensive) */
void sys_sleep(f64 seconds);
/* ========================== *
* Command line
* ========================== */

View File

@ -109,7 +109,7 @@ struct win32_window {
* NOTE: This is not the actual rate that the scheduler runs at, just the
* minimum amount of time that it can refer to. Smaller values mean that the
* scheduler has to process a greater number of wait lists upon waking up. */
#define SCHEDULER_MIN_INTERVAL_NS (KIBI(256)) /* ~256 microseconds */
#define SCHEDULER_MIN_INTERVAL_NS (KIBI(256)) /* ~262 microseconds */
struct alignas(64) wait_list {
/* =================================================== */
@ -307,7 +307,7 @@ GLOBAL struct {
/* Scheduler */
struct atomic_i64 current_scheduler_interval; /* TODO: Prevent false sharing */
struct atomic_i64 last_scheduler_interval; /* TODO: Prevent false sharing */
/* Wait lists */
struct atomic_u64 waiter_wake_gen; /* TODO: Prevent false sharing */
@ -330,6 +330,7 @@ GLOBAL struct {
/* Workers */
struct atomic_i64 workers_wake_gen; /* TODO: Prevent false sharing */
struct atomic_i64 num_jobs_in_queue; /* TODO: Prevent false sharing */
struct snc_mutex workers_wake_mutex;
struct snc_cv workers_wake_cv;
@ -391,9 +392,13 @@ void sys_wait(void *addr, void *cmp, u32 size, i64 timeout_ns)
timeout_ms = timeout_ns / 1000000;
timeout_ms += (timeout_ms == 0) * math_fsign(timeout_ns);
}
if (addr == NULL) {
Sleep(timeout_ms);
} else {
WaitOnAddress(addr, cmp, size, timeout_ms);
}
}
}
@ -587,6 +592,7 @@ void sys_wake_all(void *addr)
if (num_waiters > 0) {
struct snc_lock lock = snc_lock_e(&G.workers_wake_mutex);
{
atomic_i64_fetch_add(&G.num_jobs_in_queue, num_waiters);
if (atomic_i64_fetch(&G.workers_wake_gen) >= 0) {
atomic_i64_fetch_add(&G.workers_wake_gen, 1);
snc_cv_broadcast(&G.workers_wake_cv);
@ -755,6 +761,7 @@ void sys_run(i32 count, sys_job_func *func, void *sig, enum sys_priority priorit
/* TODO: Only wake necessary amount of workers */
struct snc_lock lock = snc_lock_e(&G.workers_wake_mutex);
{
atomic_i64_fetch_add(&G.num_jobs_in_queue, count);
if (atomic_i64_fetch(&G.workers_wake_gen) >= 0) {
atomic_i64_fetch_add(&G.workers_wake_gen, 1);
snc_cv_broadcast(&G.workers_wake_cv);
@ -870,7 +877,6 @@ INTERNAL SYS_THREAD_DEF(job_worker_entry, worker_ctx_arg)
i64 last_seen_wake_gen = 0;
while (last_seen_wake_gen >= 0) {
/* Pull job from queue */
b32 queues_empty = true;
enum sys_priority job_priority = 0;
i16 job_fiber_id = 0;
i32 job_id = 0;
@ -878,7 +884,7 @@ INTERNAL SYS_THREAD_DEF(job_worker_entry, worker_ctx_arg)
void *job_sig = 0;
struct snc_counter *job_counter = 0;
{
__profnc("Pull job", RGB32_F(0.75, 0.75, 0));
//__profnc("Pull job", RGB32_F(0.75, 0.75, 0));
for (u32 queue_index = 0; queue_index < countof(queues) && !job_func; ++queue_index) {
struct job_queue *queue = queues[queue_index];
if (queue) {
@ -894,6 +900,7 @@ INTERNAL SYS_THREAD_DEF(job_worker_entry, worker_ctx_arg)
job_id = info->num_dispatched++;
if (job_id < info->count) {
/* Pick job */
atomic_i64_fetch_add(&G.num_jobs_in_queue, -1);
job_func = info->func;
job_sig = info->sig;
job_counter = info->counter;
@ -901,21 +908,16 @@ INTERNAL SYS_THREAD_DEF(job_worker_entry, worker_ctx_arg)
/* We're picking up the last dispatch, so dequeue the job */
dequeue = true;
}
if (!next) {
queues_empty = queue_index >= ((i32)countof(queues) - 1);
}
}
} else {
/* This job is to be resumed from a yield */
atomic_i64_fetch_add(&G.num_jobs_in_queue, -1);
job_fiber_id = info->fiber_id;
job_id = info->num_dispatched;
job_func = info->func;
job_sig = info->sig;
job_counter = info->counter;
dequeue = true;
if (!next) {
queues_empty = queue_index >= ((i32)countof(queues) - 1);
}
}
if (dequeue) {
if (!next) {
@ -978,7 +980,7 @@ INTERNAL SYS_THREAD_DEF(job_worker_entry, worker_ctx_arg)
i64 wait_timeout_ns = yield.wait.timeout_ns;
i64 wait_time = 0;
if (wait_timeout_ns > 0 && wait_timeout_ns < I64_MAX) {
wait_time = atomic_i64_fetch(&G.current_scheduler_interval) + (wait_timeout_ns / SCHEDULER_MIN_INTERVAL_NS);
wait_time = (sys_time_ns() + wait_timeout_ns) / SCHEDULER_MIN_INTERVAL_NS - 1;
}
u64 wait_addr_bin_index = (u64)wait_addr % NUM_WAIT_ADDR_BINS;
@ -990,7 +992,7 @@ INTERNAL SYS_THREAD_DEF(job_worker_entry, worker_ctx_arg)
{
if (wait_time != 0) while (atomic_i32_fetch_test_set(&wait_time_bin->lock, 0, 1) != 0) ix_pause();
{
b32 cancel_wait = true;
b32 cancel_wait = wait_addr == 0 && wait_time == 0;
if (wait_addr != 0) {
switch (wait_size) {
case 1: cancel_wait = (u8)_InterlockedCompareExchange8(wait_addr, 0, 0) != *(u8 *)wait_cmp; break;
@ -1001,7 +1003,7 @@ INTERNAL SYS_THREAD_DEF(job_worker_entry, worker_ctx_arg)
}
}
if (wait_time != 0 && !cancel_wait) {
cancel_wait = atomic_i64_fetch(&G.current_scheduler_interval) > wait_time;
cancel_wait = wait_time <= atomic_i64_fetch(&G.last_scheduler_interval);
}
if (!cancel_wait) {
if (wait_addr != 0) {
@ -1095,76 +1097,6 @@ INTERNAL SYS_THREAD_DEF(job_worker_entry, worker_ctx_arg)
if (wait_time != 0) atomic_i32_fetch_set(&wait_time_bin->lock, 0);
}
if (wait_addr != 0) atomic_i32_fetch_set(&wait_addr_bin->lock, 0);
#if 0
while (atomic_i32_fetch_test_set(&wait_addr_bin->lock, 0, 1) != 0) ix_pause();
{
/* Load and compare values now that bin is locked */
b32 cancel_wait;
switch (wait_size) {
case 1: cancel_wait = (u8)_InterlockedCompareExchange8(wait_addr, 0, 0) == *(u8 *)wait_cmp; break;
case 2: cancel_wait = (u16)_InterlockedCompareExchange16(wait_addr, 0, 0) == *(u16 *)wait_cmp; break;
case 4: cancel_wait = (u32)_InterlockedCompareExchange(wait_addr, 0, 0) == *(u32 *)wait_cmp; break;
case 8: cancel_wait = (u64)_InterlockedCompareExchange64(wait_addr, 0, 0) == *(u64 *)wait_cmp; break;
default: cancel_wait = true; ASSERT(false); break; /* Invalid wait size */
}
if (!cancel_wait) {
/* Search addr wait list in bin */
struct wait_list *wait_addr_list = NULL;
for (struct wait_list *tmp = wait_addr_bin->first_wait_list; tmp && !wait_addr_list; tmp = tmp->next_in_bin) {
if (tmp->value == (u64)wait_addr) {
wait_addr_list = tmp;
}
}
/* Allocate new wait list */
if (!wait_addr_list) {
if (wait_addr_bin->first_free_wait_list) {
wait_addr_list = wait_addr_bin->first_free_wait_list;
wait_addr_bin->first_free_wait_list = wait_addr_list->next_in_bin;
} else {
while (atomic_i32_fetch_test_set(&G.wait_lists_arena_lock, 0, 1) != 0) ix_pause();
{
wait_addr_list = arena_push_no_zero(G.wait_lists_arena, struct wait_list);
}
atomic_i32_fetch_set(&G.wait_lists_arena_lock, 0);
}
MEMZERO_STRUCT(wait_addr_list);
wait_addr_list->value = wait_addr;
if (wait_addr_bin->last_wait_list) {
wait_addr_bin->last_wait_list->next_in_bin = wait_addr_list;
wait_addr_list->prev_in_bin = wait_addr_bin->last_wait_list;
} else {
wait_addr_bin->first_wait_list = wait_addr_list;
}
wait_addr_bin->last_wait_list = wait_addr_list;
}
/* Insert fiber into wait list */
job_fiber->wait_addr = wait_addr;
if (wait_addr_list->last_waiter) {
fiber_from_id(wait_addr_list->last_waiter)->next_addr_waiter = job_fiber_id;
job_fiber->prev_addr_waiter = wait_addr_list->last_waiter;
} else {
wait_addr_list->first_waiter = job_fiber_id;
}
wait_addr_list->last_waiter = job_fiber_id;
++wait_addr_list->num_waiters;
/* Pop worker's job fiber */
job_fiber = NULL;
done = true;
}
}
atomic_i32_fetch_set(&wait_addr_bin->lock, 0);
#endif
} break;
case YIELD_KIND_DONE:
@ -1182,7 +1114,7 @@ INTERNAL SYS_THREAD_DEF(job_worker_entry, worker_ctx_arg)
/* Wait */
struct snc_lock wake_lock = snc_lock_s(&G.workers_wake_mutex);
{
if (queues_empty) {
if (atomic_i64_fetch(&G.num_jobs_in_queue) <= 0) {
i64 new_wake_gen = atomic_i64_fetch(&G.workers_wake_gen);
while (new_wake_gen == last_seen_wake_gen) {
__profnc("Wait for job", RGB32_F(0.75, 0.75, 0));
@ -1224,14 +1156,15 @@ INTERNAL SYS_THREAD_DEF(job_scheduler_entry, _)
{
__profn("Job scheduler wait");
LARGE_INTEGER due = ZI;
due.QuadPart = -(SCHEDULER_MIN_INTERVAL_NS / 100);
//due.QuadPart = -(SCHEDULER_MIN_INTERVAL_NS / 100);
due.QuadPart = 0;
SetWaitableTimerEx(timer, &due, 0, NULL, NULL, NULL, 0);
WaitForSingleObject(timer, INFINITE);
}
u64 wake_gen = atomic_u64_fetch_add_u64(&G.waiter_wake_gen, 1);
i64 new_interval = sys_time_ns() / SCHEDULER_MIN_INTERVAL_NS;
atomic_i64_fetch_set(&G.current_scheduler_interval, new_interval);
atomic_i64_fetch_set(&G.last_scheduler_interval, new_interval);
{
__profn("Job scheduler run");
struct arena_temp temp = arena_temp_begin(scratch.arena);
@ -1405,6 +1338,7 @@ INTERNAL SYS_THREAD_DEF(job_scheduler_entry, _)
if (num_waiters > 0) {
struct snc_lock lock = snc_lock_e(&G.workers_wake_mutex);
{
atomic_i64_fetch_add(&G.num_jobs_in_queue, num_waiters);
if (atomic_i64_fetch(&G.workers_wake_gen) >= 0) {
atomic_i64_fetch_add(&G.workers_wake_gen, 1);
snc_cv_broadcast(&G.workers_wake_cv);
@ -1433,9 +1367,10 @@ INTERNAL SYS_THREAD_DEF(test_entry, _)
/* Start scheduler */
struct sys_thread *scheduler_thread = sys_thread_alloc(job_scheduler_entry, NULL, LIT("Scheduler thread"), PROF_THREAD_GROUP_SCHEDULER);
while (atomic_i64_fetch(&G.current_scheduler_interval) == 0) ix_pause();
while (atomic_i64_fetch(&G.last_scheduler_interval) == 0) ix_pause();
/* Start workers */
//G.num_worker_threads = 1;
G.num_worker_threads = 6;
G.worker_threads_arena = arena_alloc(GIBI(64));
G.worker_threads = arena_push_array(G.worker_threads_arena, struct sys_thread *, G.num_worker_threads);
@ -3241,119 +3176,6 @@ void sys_panic(struct string msg)
}
}
/* ========================== *
* Sleep
* ========================== */
/* https://blog.bearcats.nl/perfect-sleep-function/ */
INTERNAL void win32_precise_sleep_timer(HANDLE timer, f64 seconds)
{
__prof;
/* TODO: Does the high frequency timer even require setting / scaling of
* timeBeginPeriod/scheduler_period_ms? There isn't much documentation. */
i64 qpc_per_second = G.qpc_per_second;
i32 scheduler_period_ms = G.scheduler_period_ms;
LARGE_INTEGER qpc;
QueryPerformanceCounter(&qpc);
i64 target_qpc = (i64)(qpc.QuadPart + seconds * qpc_per_second);
/* TODO: Maybe increase tolerance for higher precision but more power usage */
//const f64 tolerance = scheduler_period_ms * 0.001200;
const f64 tolerance = scheduler_period_ms * 0.000520;
//const f64 tolerance = scheduler_period_ms * 1;
i64 max_ticks = (i64)scheduler_period_ms * 9500;
while (true) {
__profn("Sleep part");
/* Break sleep up into parts that are lower than scheduler period */
f64 remaining_seconds = (f64)(target_qpc - qpc.QuadPart) / (f64)qpc_per_second;
i64 sleep_ticks = (i64)((remaining_seconds - tolerance) * 10000000);
if (sleep_ticks <= 0) {
break;
}
LARGE_INTEGER due;
due.QuadPart = -(sleep_ticks > max_ticks ? max_ticks : sleep_ticks);
SetWaitableTimerEx(timer, &due, 0, NULL, NULL, NULL, 0);
WaitForSingleObject(timer, INFINITE);
QueryPerformanceCounter(&qpc);
}
/* Spin for any remaining time */
{
__profn("Sleep spin");
while (qpc.QuadPart < target_qpc) {
YieldProcessor();
QueryPerformanceCounter(&qpc);
}
}
}
INTERNAL void win32_precise_sleep_legacy(f64 seconds)
{
__prof;
i64 qpc_per_second = G.qpc_per_second;
i32 scheduler_period_ms = G.scheduler_period_ms;
LARGE_INTEGER qpc;
QueryPerformanceCounter(&qpc);
i64 target_qpc = (i64)(qpc.QuadPart + seconds * qpc_per_second);
/* TODO: Calculate tolerance */
/* TODO: Maybe increase tolerance for higher precision but more power usage */
//const double tolerance = 1.02;
const double tolerance = 0.52 * scheduler_period_ms;
/* Sleep */
f64 sleep_ms = (seconds * 1000) - tolerance;
i32 sleep_slices = (i32)(sleep_ms / scheduler_period_ms);
if (sleep_slices > 0) {
__profn("Legacy sleep part");
Sleep((DWORD)sleep_slices * scheduler_period_ms);
}
QueryPerformanceCounter(&qpc);
/* Spin for any remaining time */
{
__profn("Legacy sleep spin");
while (qpc.QuadPart < target_qpc) {
YieldProcessor();
QueryPerformanceCounter(&qpc);
}
}
}
void sys_sleep_precise(f64 seconds)
{
__prof;
/* FIXME: Enable this */
#if 0
HANDLE timer = ctx->sleep_timer;
if (timer) {
/* Use newer sleeping method */
win32_precise_sleep_timer(timer, seconds);
} else {
/* Fall back to older sleep method if CREATE_WAITABLE_TIMER_HIGH_RESOLUTION
* is not available due to older windows version */
win32_precise_sleep_legacy(seconds);
}
#else
(UNUSED)win32_precise_sleep_timer;
win32_precise_sleep_legacy(seconds);
#endif
}
void sys_sleep(f64 seconds)
{
__prof;
u32 ms = max_u32(1, math_round_to_int((f32)(seconds * 1000.0)));
Sleep(ms);
}
/* ========================== *
* Command line
* ========================== */

View File

@ -49,6 +49,7 @@ struct console_log {
GLOBAL struct {
struct atomic_i32 shutdown;
struct snc_counter shutdown_job_counters;
struct sim_ctx *local_sim_ctx;
@ -253,8 +254,8 @@ struct user_startup_receipt user_startup(struct gp_startup_receipt *gp_sr,
sys_window_register_event_callback(G.window, &window_event_callback);
/* Start jobs */
sys_run(1, local_sim_job, NULL, SYS_PRIORITY_HIGH, NULL);
sys_run(1, user_job, NULL, SYS_PRIORITY_HIGH, NULL);
sys_run(1, local_sim_job, NULL, SYS_PRIORITY_HIGH, &G.shutdown_job_counters);
sys_run(1, user_job, NULL, SYS_PRIORITY_HIGH, &G.shutdown_job_counters);
app_register_exit_callback(&user_shutdown);
return (struct user_startup_receipt) { 0 };
@ -263,9 +264,11 @@ struct user_startup_receipt user_startup(struct gp_startup_receipt *gp_sr,
INTERNAL APP_EXIT_CALLBACK_FUNC_DEF(user_shutdown)
{
__prof;
sys_window_unregister_event_callback(G.window, &window_event_callback);
/* Signal shutdown */
atomic_i32_fetch_set(&G.shutdown, true);
/* Wait for jobs shutdown */
snc_counter_wait(&G.shutdown_job_counters);
}
/* ========================== *

View File

@ -261,6 +261,33 @@ INLINE void dict_remove_entry(struct dict *dict, struct dict_entry *entry)
* Sleep frame
* ========================== */
INLINE void sleep_precise(i64 sleep_time_ns)
{
__prof;
i64 tolerance = 200000;
i64 big_sleep = 500000;
i64 now_ns = sys_time_ns();
i64 target_ns = now_ns + sleep_time_ns;
/* Sleep */
while (now_ns < target_ns - big_sleep - tolerance) {
__profn("Sleep part");
sys_wait(NULL, NULL, 0, big_sleep);
now_ns = sys_time_ns();
}
/* Spin */
{
__profn("Sleep spin");
while (now_ns < target_ns) {
ix_pause();
now_ns = sys_time_ns();
}
}
}
INLINE void sleep_frame(i64 last_frame_time_ns, i64 target_dt_ns)
{
if (last_frame_time_ns != 0 && target_dt_ns > 0) {
@ -268,7 +295,7 @@ INLINE void sleep_frame(i64 last_frame_time_ns, i64 target_dt_ns)
i64 last_frame_dt_ns = now_ns - last_frame_time_ns;
i64 sleep_time_ns = target_dt_ns - last_frame_dt_ns;
if (sleep_time_ns > 0) {
sys_sleep_precise(SECONDS_FROM_NS(sleep_time_ns));
sleep_precise(sleep_time_ns);
}
}
}