wrap tracy fiber funcs in NO_INLINE

This commit is contained in:
jacob 2025-07-06 13:15:16 -05:00
parent 40dcdb40dc
commit 60613815d7
8 changed files with 84 additions and 105 deletions

View File

@ -231,34 +231,6 @@ void sys_app_entry(struct string args_str)
G.exit_callbacks_arena = arena_alloc(GIGABYTE(64)); G.exit_callbacks_arena = arena_alloc(GIGABYTE(64));
G.arena = arena_alloc(GIGABYTE(64)); G.arena = arena_alloc(GIGABYTE(64));
i32 worker_count;
{
/* FIXME: Switch this on to utilize more cores. Only decreasing worker count for testing purposes. */
#if !PROFILING && !RTC
i32 max_worker_count = JOB_MAX_WORKERS;
i32 min_worker_count = clamp_i32(NUM_APP_DEDICATED_WORKERS + 2, JOB_MIN_WORKERS, max_worker_count);
i32 target_worker_count = (i32)sys_num_logical_processors() * 0.75;
worker_count = clamp_i32(target_worker_count, min_worker_count, max_worker_count);
#else
worker_count = 8;
#endif
}
struct string *worker_names = arena_push_array(scratch.arena, struct string, worker_count);
for (i32 i = 0; i < worker_count; ++i) {
struct string id = string_from_int(scratch.arena, i, 10, 2);
struct string *name = &worker_names[i];
if (i == APP_DEDICATED_WORKER_ID_USER) {
*name = string_format(scratch.arena, LIT("Worker #%F (User)"), FMT_STR(id));
} else if (i == APP_DEDICATED_WORKER_ID_SIM) {
*name = string_format(scratch.arena, LIT("Worker #%F (Sim)"), FMT_STR(id));
} else if (i == APP_DEDICATED_WORKER_ID_AUDIO) {
*name = string_format(scratch.arena, LIT("Worker #%F (Audio)"), FMT_STR(id));
} else {
*name = string_format(scratch.arena, LIT("Worker #%F"), FMT_STR(id));
}
}
G.write_path = initialize_write_directory(G.arena, LIT(WRITE_DIR)); G.write_path = initialize_write_directory(G.arena, LIT(WRITE_DIR));
/* Startup logging */ /* Startup logging */

View File

@ -8,7 +8,7 @@
/* NOTE: Application will exit if arena fails to reserve or commit initial memory. */ /* NOTE: Application will exit if arena fails to reserve or commit initial memory. */
struct arena *arena_alloc(u64 reserve) struct arena *arena_alloc(u64 reserve)
{ {
__prof; //__prof;
reserve += ARENA_HEADER_SIZE; reserve += ARENA_HEADER_SIZE;
/* Round up to nearest block size */ /* Round up to nearest block size */

View File

@ -186,12 +186,7 @@ void __asan_unpoison_memory_region(void const volatile *add, size_t);
# define ZI { } # define ZI { }
#endif #endif
#if 1
#define INLINE static inline #define INLINE static inline
#else
/* TODO: benchmark benefits of forced inlining */
# define INLINE static inline __attribute((always_inline))
#endif
#if COMPILER_MSVC #if COMPILER_MSVC
# define FORCE_INLINE static inline __forceinline # define FORCE_INLINE static inline __forceinline
@ -199,6 +194,12 @@ void __asan_unpoison_memory_region(void const volatile *add, size_t);
# define FORCE_INLINE static inline __attribute((always_inline)) # define FORCE_INLINE static inline __attribute((always_inline))
#endif #endif
#if COMPILER_MSVC
# define NO_INLINE __declspec(noinline)
#else
# define NO_INLINE __attribute__((noinline))
#endif
/* Separate `static` usage into different keywords for easier grepping */ /* Separate `static` usage into different keywords for easier grepping */
#define LOCAL_PERSIST static #define LOCAL_PERSIST static
#define INTERNAL static #define INTERNAL static

View File

@ -2,6 +2,7 @@
#pragma clang diagnostic push #pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything" #pragma clang diagnostic ignored "-Weverything"
#define TRACY_FIBERS
# include TRACY_CLIENT_SRC_PATH # include TRACY_CLIENT_SRC_PATH
#pragma clang diagnostic pop #pragma clang diagnostic pop

View File

@ -10,9 +10,10 @@
#define PROFILING_SYSTEM_TRACE 0 #define PROFILING_SYSTEM_TRACE 0
#define PROFILING_CAPTURE_FRAME_IMAGE 0 #define PROFILING_CAPTURE_FRAME_IMAGE 0
#define PROFILING_LOCKS 0 #define PROFILING_LOCKS 0
#define PROFILING_D3D 1 #define PROFILING_D3D 0
#define PROFILING_FILE_WSTR L".tracy" #define PROFILING_FILE_WSTR L".tracy"
#define PROFILING_CMD_WSTR L"cmd /C start \"\" /wait tracy-capture.exe -o .tracy -a 127.0.0.1 && start \"\" tracy-profiler.exe .tracy" //#define PROFILING_CMD_WSTR L"cmd /C start \"\" /wait tracy-capture.exe -o .tracy -a 127.0.0.1 && start \"\" tracy-profiler.exe .tracy"
#define PROFILING_CMD_WSTR L"tracy-profiler.exe -a 127.0.0.1"
/* Tracy defines */ /* Tracy defines */
#define TRACY_ENABLE #define TRACY_ENABLE
@ -136,11 +137,13 @@ INLINE void __prof_dx12_zone_cleanup_func(TracyCD3D12ZoneCtx *ctx) { ___tracy_d3
#endif /* PROFILING_CAPTURE_FRAME_IMAGE */ #endif /* PROFILING_CAPTURE_FRAME_IMAGE */
#ifdef TRACY_FIBERS #ifdef TRACY_FIBERS
# define __prof_fiber_enter(fiber_name, profiler_group) TracyCFiberEnterWithHint(fiber_name, profiler_group) /* Tracy fiber methods are wrapped in NO_INLINE because otherwise issues arise
# define __prof_fiber_leave TracyCFiberLeave * accross fiber context boundaries during optimization */
NO_INLINE INLINE void __prof_fiber_enter(char *fiber_name, i32 profiler_group) { TracyCFiberEnterWithHint(fiber_name, profiler_group); }
NO_INLINE INLINE void __prof_fiber_leave(void) { TracyCFiberLeave; }
#else #else
# define __prof_fiber_enter(fiber_name, profiler_group) # define __prof_fiber_enter(fiber_name, profiler_group)
# define __prof_fiber_leave # define __prof_fiber_leave()
#endif #endif
#endif #endif

View File

@ -35,7 +35,7 @@ struct resource_startup_receipt resource_startup(void);
struct resource resource_open(struct string name); struct resource resource_open(struct string name);
#if RESOURCES_EMBEDDED #if RESOURCES_EMBEDDED
#define resource_close(res_ptr) #define resource_close(res_ptr) (UNUSED)res_ptr
#else #else
void resource_close(struct resource *res_ptr); void resource_close(struct resource *res_ptr);
#endif #endif

View File

@ -492,9 +492,6 @@ void sys_wake_all(void *addr);
i32 sys_current_fiber_id(void); i32 sys_current_fiber_id(void);
/* Cooperative yield to give other jobs of the same priority level a chance to run */
void sys_yield(void);
/* ========================== * /* ========================== *
* Counter * Counter
* ========================== */ * ========================== */

View File

@ -190,7 +190,6 @@ STATIC_ASSERT(alignof(struct counter) == 64); /* Avoid false sharing */
enum yield_kind { enum yield_kind {
YIELD_KIND_NONE, YIELD_KIND_NONE,
YIELD_KIND_DONE, YIELD_KIND_DONE,
YIELD_KIND_COOPERATIVE,
YIELD_KIND_WAIT, YIELD_KIND_WAIT,
NUM_YIELD_KINDS NUM_YIELD_KINDS
@ -381,7 +380,7 @@ GLOBAL struct {
INTERNAL struct fiber *fiber_from_id(i32 id); INTERNAL struct fiber *fiber_from_id(i32 id);
INTERNAL void fiber_yield(struct fiber *fiber, struct fiber *parent_fiber); INTERNAL void job_fiber_yield(struct fiber *fiber, struct fiber *parent_fiber);
@ -404,7 +403,7 @@ INTERNAL void fiber_yield(struct fiber *fiber, struct fiber *parent_fiber);
void sys_wait(void *addr, void *cmp, u32 size) void sys_wait(void *addr, void *cmp, u32 size)
{ {
__prof; //__prof;
#if 0 #if 0
WaitOnAddress(addr, cmp, size, INFINITE); WaitOnAddress(addr, cmp, size, INFINITE);
#else #else
@ -412,7 +411,6 @@ void sys_wait(void *addr, void *cmp, u32 size)
i32 parent_fiber_id = fiber->parent_id; i32 parent_fiber_id = fiber->parent_id;
/* Yield if job fiber, otherwise fall back to windows blocking function */ /* Yield if job fiber, otherwise fall back to windows blocking function */
if (parent_fiber_id > 0) { if (parent_fiber_id > 0) {
#if 1
/* Yield if job fiber */ /* Yield if job fiber */
*fiber->yield_param = (struct yield_param) { *fiber->yield_param = (struct yield_param) {
.kind = YIELD_KIND_WAIT, .kind = YIELD_KIND_WAIT,
@ -423,12 +421,7 @@ void sys_wait(void *addr, void *cmp, u32 size)
} }
}; };
struct fiber *parent_fiber = fiber_from_id(parent_fiber_id); struct fiber *parent_fiber = fiber_from_id(parent_fiber_id);
fiber_yield(fiber, parent_fiber); job_fiber_yield(fiber, parent_fiber);
#else
while (MEMEQ(addr, cmp, size)) {
ix_pause();
}
#endif
} else { } else {
WaitOnAddress(addr, cmp, size, INFINITE); WaitOnAddress(addr, cmp, size, INFINITE);
} }
@ -688,32 +681,6 @@ i32 sys_current_fiber_id(void)
return (i32)(i64)GetFiberData(); return (i32)(i64)GetFiberData();
} }
INTERNAL void fiber_yield(struct fiber *fiber, struct fiber *parent_fiber)
{
ASSERT(fiber->id == sys_current_fiber_id());
ASSERT(parent_fiber->id == fiber->parent_id);
if (parent_fiber->id <= 0) {
sys_panic(LIT("A top level fiber tried to yield"));
}
{
__prof_fiber_leave;
SwitchToFiber(parent_fiber->addr);
__prof_fiber_enter(fiber->name_cstr, PROF_THREAD_GROUP_FIBERS + fiber->id);
}
}
void sys_yield(void)
{
/* TODO: Don't yield if job queue is empty */
struct fiber *fiber = fiber_from_id(sys_current_fiber_id());
i32 parent_id = fiber->parent_id;
if (parent_id > 0) { /* Top level fibers should not yield */
struct fiber *parent_fiber = fiber_from_id(parent_id);
fiber->yield_param->kind = YIELD_KIND_COOPERATIVE;
fiber_yield(fiber, parent_fiber);
}
}
void sys_run(i32 count, sys_job_func *func, void *sig, enum sys_priority priority, struct sys_counter *counter) void sys_run(i32 count, sys_job_func *func, void *sig, enum sys_priority priority, struct sys_counter *counter)
{ {
struct counter *job_counter = (struct counter *)counter; struct counter *job_counter = (struct counter *)counter;
@ -752,36 +719,66 @@ void sys_run(i32 count, sys_job_func *func, void *sig, enum sys_priority priorit
} }
/* ========================== * /* ========================== *
* Job fiber func * Job fiber control
* ========================== */
INTERNAL void job_fiber_yield(struct fiber *fiber, struct fiber *parent_fiber)
{
(UNUSED)fiber;
ASSERT(fiber->id == sys_current_fiber_id());
ASSERT(parent_fiber->id == fiber->parent_id);
ASSERT(parent_fiber->id > 0);
{
__prof_fiber_leave();
MemoryBarrier();
SwitchToFiber(parent_fiber->addr);
MemoryBarrier();
__prof_fiber_enter(fiber->name_cstr, PROF_THREAD_GROUP_FIBERS + fiber->id);
}
}
INTERNAL void job_fiber_resume(struct fiber *fiber)
{
MemoryBarrier();
SwitchToFiber(fiber->addr);
MemoryBarrier();
}
/* ========================== *
* Job fiber entry
* ========================== */ * ========================== */
INTERNAL void job_fiber_entry(void *id_ptr) INTERNAL void job_fiber_entry(void *id_ptr)
{ {
i32 id = (i32)(i64)id_ptr; i32 id = (i32)(i64)id_ptr;
struct fiber *fiber = fiber_from_id(id); struct fiber *fiber = fiber_from_id(id);
while (true) {
__prof_fiber_enter(fiber->name_cstr, PROF_THREAD_GROUP_FIBERS + fiber->id); __prof_fiber_enter(fiber->name_cstr, PROF_THREAD_GROUP_FIBERS + fiber->id);
while (true) {
/* Run job */ /* Run job */
{ {
__profscope(Run job); //__profscope(Run job);
fiber->yield_param->kind = YIELD_KIND_NONE; volatile struct yield_param *yield_param = fiber->yield_param;
yield_param->kind = YIELD_KIND_NONE;
struct sys_job_data data = ZI; struct sys_job_data data = ZI;
data.id = fiber->job_id; data.id = fiber->job_id;
data.sig = fiber->job_sig; data.sig = fiber->job_sig;
fiber->job_func(data);
fiber->yield_param->kind = YIELD_KIND_DONE;
}
__prof_fiber_leave;
{ {
i32 parent_id = fiber->parent_id; MemoryBarrier();
struct fiber *parent_fiber = fiber_from_id(parent_id); fiber->job_func(data);
SwitchToFiber(parent_fiber->addr); MemoryBarrier();
}
}
{
volatile struct yield_param *yield_param = fiber->yield_param;
yield_param->kind = YIELD_KIND_DONE;
struct fiber *parent_fiber = fiber_from_id(fiber->parent_id);
job_fiber_yield(fiber, parent_fiber);
} }
} }
} }
/* ========================== * /* ========================== *
* Test workers * Worker entry
* ========================== */ * ========================== */
INTERNAL SYS_THREAD_DEF(worker_entry, worker_ctx_arg) INTERNAL SYS_THREAD_DEF(worker_entry, worker_ctx_arg)
@ -871,7 +868,7 @@ INTERNAL SYS_THREAD_DEF(worker_entry, worker_ctx_arg)
/* Run fiber */ /* Run fiber */
if (job_func) { if (job_func) {
__profscope(Run fiber); //__profscope(Run fiber);
if (!job_fiber) { if (!job_fiber) {
job_fiber = fiber_alloc(FIBER_KIND_JOB_WORKER); job_fiber = fiber_alloc(FIBER_KIND_JOB_WORKER);
} }
@ -884,7 +881,7 @@ INTERNAL SYS_THREAD_DEF(worker_entry, worker_ctx_arg)
job_fiber->yield_param = &yield; job_fiber->yield_param = &yield;
b32 done = false; b32 done = false;
while (!done) { while (!done) {
SwitchToFiber(job_fiber->addr); job_fiber_resume(job_fiber);
switch (yield.kind) { switch (yield.kind) {
default: default:
{ {
@ -991,20 +988,18 @@ INTERNAL SYS_THREAD_DEF(worker_entry, worker_ctx_arg)
} }
} }
/* ========================== *
* Test entry
* ========================== */
INTERNAL SYS_THREAD_DEF(test_entry, _) INTERNAL SYS_THREAD_DEF(test_entry, _)
{ {
struct arena_temp scratch = scratch_begin_no_conflict(); struct arena_temp scratch = scratch_begin_no_conflict();
(UNUSED)_; (UNUSED)_;
/* Init job queues */
for (u32 i = 0; i < countof(G.job_queues); ++i) {
struct job_queue *queue = &G.job_queues[i];
queue->kind = (enum job_queue_kind)i;
queue->arena = arena_alloc(GIGABYTE(64));
}
/* Start workers */ /* Start workers */
G.num_worker_threads = 6; G.num_worker_threads = 6;
//G.num_worker_threads = 2;
G.worker_threads_arena = arena_alloc(GIGABYTE(64)); G.worker_threads_arena = arena_alloc(GIGABYTE(64));
G.worker_threads = arena_push_array(G.worker_threads_arena, struct sys_thread *, G.num_worker_threads); G.worker_threads = arena_push_array(G.worker_threads_arena, struct sys_thread *, G.num_worker_threads);
G.worker_contexts = arena_push_array(G.worker_threads_arena, struct worker_ctx, G.num_worker_threads); G.worker_contexts = arena_push_array(G.worker_threads_arena, struct worker_ctx, G.num_worker_threads);
@ -1047,7 +1042,7 @@ struct sys_scratch_ctx *sys_scratch_ctx_from_fiber_id(i32 id)
struct fiber_ctx *fiber_ctx = fiber_ctx_from_id(id); struct fiber_ctx *fiber_ctx = fiber_ctx_from_id(id);
struct sys_scratch_ctx *scratch_ctx = &fiber_ctx->scratch_ctx; struct sys_scratch_ctx *scratch_ctx = &fiber_ctx->scratch_ctx;
if (!scratch_ctx->arenas[0]) { if (!scratch_ctx->arenas[0]) {
__profscope(Initialize scratch context); //__profscope(Initialize scratch context);
for (u32 i = 0; i < countof(scratch_ctx->arenas); ++i) { for (u32 i = 0; i < countof(scratch_ctx->arenas); ++i) {
scratch_ctx->arenas[i] = arena_alloc(GIGABYTE(64)); scratch_ctx->arenas[i] = arena_alloc(GIGABYTE(64));
} }
@ -2539,6 +2534,7 @@ void sys_mutex_release(struct sys_mutex *mutex)
struct sys_lock sys_mutex_lock_e(struct sys_mutex *mutex) struct sys_lock sys_mutex_lock_e(struct sys_mutex *mutex)
{ {
__prof;
struct win32_mutex *m = (struct win32_mutex *)mutex; struct win32_mutex *m = (struct win32_mutex *)mutex;
{ {
while (true) { while (true) {
@ -2564,6 +2560,7 @@ struct sys_lock sys_mutex_lock_e(struct sys_mutex *mutex)
struct sys_lock sys_mutex_lock_s(struct sys_mutex *mutex) struct sys_lock sys_mutex_lock_s(struct sys_mutex *mutex)
{ {
__prof;
struct win32_mutex *m = (struct win32_mutex *)mutex; struct win32_mutex *m = (struct win32_mutex *)mutex;
{ {
while (true) { while (true) {
@ -2585,6 +2582,7 @@ struct sys_lock sys_mutex_lock_s(struct sys_mutex *mutex)
void sys_mutex_unlock(struct sys_lock *lock) void sys_mutex_unlock(struct sys_lock *lock)
{ {
__prof;
struct win32_mutex *m = (struct win32_mutex *)lock->mutex; struct win32_mutex *m = (struct win32_mutex *)lock->mutex;
if (lock->exclusive) { if (lock->exclusive) {
//LONG old = m->state; //LONG old = m->state;
@ -2736,13 +2734,13 @@ INTERNAL void win32_thread_release(struct win32_thread *t)
INTERNAL DWORD WINAPI win32_thread_proc(LPVOID vt) INTERNAL DWORD WINAPI win32_thread_proc(LPVOID vt)
{ {
fiber_alloc(FIBER_KIND_CONVERTED_THREAD);
struct win32_thread *t = (struct win32_thread *)vt; struct win32_thread *t = (struct win32_thread *)vt;
__profthread(t->thread_name_cstr, t->profiler_group); __profthread(t->thread_name_cstr, t->profiler_group);
fiber_alloc(FIBER_KIND_CONVERTED_THREAD);
/* Initialize COM */ /* Initialize COM */
CoInitializeEx(NULL, COINIT_APARTMENTTHREADED); CoInitializeEx(NULL, COINIT_MULTITHREADED);
/* Set thread name */ /* Set thread name */
if (t->thread_name_wstr[0] != 0) { if (t->thread_name_wstr[0] != 0) {
@ -2944,7 +2942,7 @@ struct string sys_get_clipboard_text(struct arena *arena)
} }
/* ========================== * /* ========================== *
* RNG * Util
* ========================== */ * ========================== */
void sys_true_rand(struct string b) void sys_true_rand(struct string b)
@ -3193,6 +3191,13 @@ int CALLBACK wWinMain(_In_ HINSTANCE instance, _In_opt_ HINSTANCE prev_instance,
/* Init counters */ /* Init counters */
G.counters_arena = arena_alloc(GIGABYTE(64)); G.counters_arena = arena_alloc(GIGABYTE(64));
/* Init job queues */
for (u32 i = 0; i < countof(G.job_queues); ++i) {
struct job_queue *queue = &G.job_queues[i];
queue->kind = (enum job_queue_kind)i;
queue->arena = arena_alloc(GIGABYTE(64));
}
/* Convert main thread to fiber */ /* Convert main thread to fiber */
fiber_alloc(FIBER_KIND_CONVERTED_THREAD); fiber_alloc(FIBER_KIND_CONVERTED_THREAD);