From a397458c727f310b0a28992e30c55670e3a62303 Mon Sep 17 00:00:00 2001 From: jacob Date: Sun, 6 Jul 2025 17:46:10 -0500 Subject: [PATCH] mutex spinning --- src/snc.c | 60 ++++++++++++++++++++++++++++++++++--------------- src/snc.h | 2 ++ src/sys.h | 1 + src/sys_win32.c | 24 ++++++++++++-------- 4 files changed, 60 insertions(+), 27 deletions(-) diff --git a/src/snc.c b/src/snc.c index 949f4802..270381ab 100644 --- a/src/snc.c +++ b/src/snc.c @@ -3,29 +3,38 @@ #include "sys.h" #include "memory.h" +#define DEFAULT_MUTEX_SPIN 4000 + /* ========================== * * Mutex * ========================== */ -struct snc_lock snc_lock_e(struct snc_mutex *m) +struct snc_lock snc_lock_spin_e(struct snc_mutex *m, i32 spin) { __prof; b32 locked = false; while (!locked) { + /* Spin lock */ + i32 spin_cnt = 0; i32 v = atomic_i32_fetch_test_set(&m->v, 0, (1 << 31)); - if (v == 0) { - locked = true; - } else { - /* Set pending */ - if ((v & (1 << 30)) == 0) { - i32 old = atomic_i32_fetch_test_set(&m->v, v | (1 << 30), v); - while (old != v && (old & (1 << 30)) == 0) { + do { + if (v == 0) { + locked = true; + } else { + /* Set pending */ + if ((v & (1 << 30)) == 0) { + i32 old = atomic_i32_fetch_test_set(&m->v, v | (1 << 30), v); + while (old != v && (old & (1 << 30)) == 0) { + v = old; + old = atomic_i32_fetch_test_set(&m->v, v | (1 << 30), v); + } v = old; - old = atomic_i32_fetch_test_set(&m->v, v | (1 << 30), v); } - v = old; } - /* Wait for change */ + ++spin_cnt; + } while (spin_cnt < spin && !locked); + /* Wait if not successful */ + if (!locked) { sys_wait(&m->v, &v, 4); } } @@ -35,19 +44,24 @@ struct snc_lock snc_lock_e(struct snc_mutex *m) return lock; } -struct snc_lock snc_lock_s(struct snc_mutex *m) +struct snc_lock snc_lock_spin_s(struct snc_mutex *m, i32 spin) { __prof; b32 locked = false; while (!locked) { + /* Spin lock */ + i32 spin_cnt = 0; i32 v = atomic_i32_fetch(&m->v); - while (!locked && (v & 0xC0000000) == 0) { - /* Increment shared lock count */ - i32 old = atomic_i32_fetch_test_set(&m->v, v, v + 1); - if (v == old) { - locked = true; + do { + while (!locked && (v & 0xC0000000) == 0) { + /* Lock has no exclusive or pending exclusive lock, increment shared count */ + i32 old = atomic_i32_fetch_test_set(&m->v, v, v + 1); + if (v == old) { + locked = true; + } } - } + } while (spin_cnt < spin && !locked); + /* Wait if not successful */ if (!locked) { sys_wait(&m->v, &v, 4); } @@ -57,6 +71,16 @@ struct snc_lock snc_lock_s(struct snc_mutex *m) return lock; } +struct snc_lock snc_lock_e(struct snc_mutex *m) +{ + return snc_lock_spin_e(m, DEFAULT_MUTEX_SPIN); +} + +struct snc_lock snc_lock_s(struct snc_mutex *m) +{ + return snc_lock_spin_s(m, DEFAULT_MUTEX_SPIN); +} + void snc_unlock(struct snc_lock *l) { __prof; diff --git a/src/snc.h b/src/snc.h index 12889d42..c4cfdce1 100644 --- a/src/snc.h +++ b/src/snc.h @@ -14,6 +14,8 @@ struct snc_mutex { struct atomic_i32 v; }; +struct snc_lock snc_lock_spin_e(struct snc_mutex *m, i32 spin); +struct snc_lock snc_lock_spin_s(struct snc_mutex *m, i32 spin); struct snc_lock snc_lock_e(struct snc_mutex *m); struct snc_lock snc_lock_s(struct snc_mutex *m); void snc_unlock(struct snc_lock *lock); diff --git a/src/sys.h b/src/sys.h index 4637aa07..e91392e6 100644 --- a/src/sys.h +++ b/src/sys.h @@ -450,6 +450,7 @@ b32 sys_run_command(struct string cmd); /* Futex-like wait & wake */ void sys_wait(void *addr, void *cmp, u32 size); +void sys_wake_single(void *addr); void sys_wake_all(void *addr); /* ========================== * diff --git a/src/sys_win32.c b/src/sys_win32.c index c27126bd..8bcfba64 100644 --- a/src/sys_win32.c +++ b/src/sys_win32.c @@ -111,7 +111,6 @@ struct alignas(64) wait_list { /* =================================================== */ struct yielder *last_yielder; /* 8 bytes */ /* =================================================== */ - struct yielder *first_free_yielder; /* 8 bytes */ /* =================================================== */ i32 num_yielders; /* 4 bytes */ u8 _pad0[4]; /* 4 bytes (padding */ @@ -133,10 +132,12 @@ struct alignas(64) wait_bin { /* =================================================== */ struct wait_list *first_free_wait_list; /* 8 bytes */ /* =================================================== */ + struct yielder *first_free_yielder; + /* =================================================== */ struct atomic_i32 lock; /* 4 bytes */ u8 _pad0[4]; /* 4 bytes (padding) */ /* =================================================== */ - u8 _pad1[32]; /* 32 bytes (padding) */ + u8 _pad1[24]; /* 24 bytes (padding) */ }; STATIC_ASSERT(sizeof(struct wait_bin) == 64); /* Assume wait_bin fits in one cache line (increase if necessary) */ STATIC_ASSERT(alignof(struct wait_bin) == 64); /* Avoid false sharing */ @@ -413,9 +414,14 @@ void sys_wait(void *addr, void *cmp, u32 size) #endif } +void sys_wake_single(void *addr) +{ + ASSERT(false); + (UNUSED)addr; +} + void sys_wake_all(void *addr) { -#if 1 u64 wait_bin_index = (u64)addr % NUM_WAIT_BINS; struct wait_bin *bin = &G.wait_bins[wait_bin_index]; @@ -480,8 +486,8 @@ void sys_wake_all(void *addr) } } /* Free yielders */ - wait_list->last_yielder->next = wait_list->first_free_yielder; - wait_list->first_free_yielder = wait_list->first_yielder; + wait_list->last_yielder->next = bin->first_free_yielder; + bin->first_free_yielder = wait_list->first_yielder; wait_list->first_yielder = NULL; wait_list->last_yielder = NULL; wait_list->num_yielders = 0; @@ -490,7 +496,6 @@ void sys_wake_all(void *addr) } } atomic_i32_fetch_set(&bin->lock, 0); -#endif /* Wake blocking waiters */ WakeByAddressAll(addr); @@ -714,6 +719,7 @@ INTERNAL SYS_THREAD_DEF(worker_entry, worker_ctx_arg) (UNUSED)ctx; { + /* TODO: Heuristic pinning */ HANDLE thread_handle = GetCurrentThread(); b32 success = false; (UNUSED)success; @@ -879,9 +885,9 @@ INTERNAL SYS_THREAD_DEF(worker_entry, worker_ctx_arg) /* Allocate new yielder */ struct yielder *yielder = NULL; - if (wait_list->first_free_yielder) { - yielder = wait_list->first_free_yielder; - wait_list->first_free_yielder = yielder->next; + if (bin->first_free_yielder) { + yielder = bin->first_free_yielder; + bin->first_free_yielder = yielder->next; } else { while (atomic_i32_fetch_test_set(&G.yielders_arena_lock, 0, 1) != 0) ix_pause(); {