diff --git a/src/app.c b/src/app.c index b57985fb..f1cc9754 100644 --- a/src/app.c +++ b/src/app.c @@ -231,34 +231,6 @@ void sys_app_entry(struct string args_str) G.exit_callbacks_arena = arena_alloc(GIGABYTE(64)); G.arena = arena_alloc(GIGABYTE(64)); - i32 worker_count; - { - /* FIXME: Switch this on to utilize more cores. Only decreasing worker count for testing purposes. */ -#if !PROFILING && !RTC - i32 max_worker_count = JOB_MAX_WORKERS; - i32 min_worker_count = clamp_i32(NUM_APP_DEDICATED_WORKERS + 2, JOB_MIN_WORKERS, max_worker_count); - i32 target_worker_count = (i32)sys_num_logical_processors() * 0.75; - worker_count = clamp_i32(target_worker_count, min_worker_count, max_worker_count); -#else - worker_count = 8; -#endif - } - - struct string *worker_names = arena_push_array(scratch.arena, struct string, worker_count); - for (i32 i = 0; i < worker_count; ++i) { - struct string id = string_from_int(scratch.arena, i, 10, 2); - struct string *name = &worker_names[i]; - if (i == APP_DEDICATED_WORKER_ID_USER) { - *name = string_format(scratch.arena, LIT("Worker #%F (User)"), FMT_STR(id)); - } else if (i == APP_DEDICATED_WORKER_ID_SIM) { - *name = string_format(scratch.arena, LIT("Worker #%F (Sim)"), FMT_STR(id)); - } else if (i == APP_DEDICATED_WORKER_ID_AUDIO) { - *name = string_format(scratch.arena, LIT("Worker #%F (Audio)"), FMT_STR(id)); - } else { - *name = string_format(scratch.arena, LIT("Worker #%F"), FMT_STR(id)); - } - } - G.write_path = initialize_write_directory(G.arena, LIT(WRITE_DIR)); /* Startup logging */ diff --git a/src/arena.c b/src/arena.c index 3bab294c..6eb81108 100644 --- a/src/arena.c +++ b/src/arena.c @@ -8,7 +8,7 @@ /* NOTE: Application will exit if arena fails to reserve or commit initial memory. */ struct arena *arena_alloc(u64 reserve) { - __prof; + //__prof; reserve += ARENA_HEADER_SIZE; /* Round up to nearest block size */ diff --git a/src/common.h b/src/common.h index ca80639a..45fe3fcc 100644 --- a/src/common.h +++ b/src/common.h @@ -186,12 +186,7 @@ void __asan_unpoison_memory_region(void const volatile *add, size_t); # define ZI { } #endif -#if 1 -# define INLINE static inline -#else -/* TODO: benchmark benefits of forced inlining */ -# define INLINE static inline __attribute((always_inline)) -#endif +#define INLINE static inline #if COMPILER_MSVC # define FORCE_INLINE static inline __forceinline @@ -199,6 +194,12 @@ void __asan_unpoison_memory_region(void const volatile *add, size_t); # define FORCE_INLINE static inline __attribute((always_inline)) #endif +#if COMPILER_MSVC +# define NO_INLINE __declspec(noinline) +#else +# define NO_INLINE __attribute__((noinline)) +#endif + /* Separate `static` usage into different keywords for easier grepping */ #define LOCAL_PERSIST static #define INTERNAL static diff --git a/src/prof_tracy.cpp b/src/prof_tracy.cpp index cf4a6a41..aecad03b 100644 --- a/src/prof_tracy.cpp +++ b/src/prof_tracy.cpp @@ -2,6 +2,7 @@ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Weverything" +#define TRACY_FIBERS # include TRACY_CLIENT_SRC_PATH #pragma clang diagnostic pop diff --git a/src/prof_tracy.h b/src/prof_tracy.h index 1a08dc11..db007017 100644 --- a/src/prof_tracy.h +++ b/src/prof_tracy.h @@ -10,9 +10,10 @@ #define PROFILING_SYSTEM_TRACE 0 #define PROFILING_CAPTURE_FRAME_IMAGE 0 #define PROFILING_LOCKS 0 -#define PROFILING_D3D 1 +#define PROFILING_D3D 0 #define PROFILING_FILE_WSTR L".tracy" -#define PROFILING_CMD_WSTR L"cmd /C start \"\" /wait tracy-capture.exe -o .tracy -a 127.0.0.1 && start \"\" tracy-profiler.exe .tracy" +//#define PROFILING_CMD_WSTR L"cmd /C start \"\" /wait tracy-capture.exe -o .tracy -a 127.0.0.1 && start \"\" tracy-profiler.exe .tracy" +#define PROFILING_CMD_WSTR L"tracy-profiler.exe -a 127.0.0.1" /* Tracy defines */ #define TRACY_ENABLE @@ -136,11 +137,13 @@ INLINE void __prof_dx12_zone_cleanup_func(TracyCD3D12ZoneCtx *ctx) { ___tracy_d3 #endif /* PROFILING_CAPTURE_FRAME_IMAGE */ #ifdef TRACY_FIBERS -# define __prof_fiber_enter(fiber_name, profiler_group) TracyCFiberEnterWithHint(fiber_name, profiler_group) -# define __prof_fiber_leave TracyCFiberLeave +/* Tracy fiber methods are wrapped in NO_INLINE because otherwise issues arise + * accross fiber context boundaries during optimization */ +NO_INLINE INLINE void __prof_fiber_enter(char *fiber_name, i32 profiler_group) { TracyCFiberEnterWithHint(fiber_name, profiler_group); } +NO_INLINE INLINE void __prof_fiber_leave(void) { TracyCFiberLeave; } #else # define __prof_fiber_enter(fiber_name, profiler_group) -# define __prof_fiber_leave +# define __prof_fiber_leave() #endif #endif diff --git a/src/resource.h b/src/resource.h index 93f0342f..b5dd1858 100644 --- a/src/resource.h +++ b/src/resource.h @@ -35,7 +35,7 @@ struct resource_startup_receipt resource_startup(void); struct resource resource_open(struct string name); #if RESOURCES_EMBEDDED - #define resource_close(res_ptr) + #define resource_close(res_ptr) (UNUSED)res_ptr #else void resource_close(struct resource *res_ptr); #endif diff --git a/src/sys.h b/src/sys.h index 07296b3e..e0e7e9da 100644 --- a/src/sys.h +++ b/src/sys.h @@ -492,9 +492,6 @@ void sys_wake_all(void *addr); i32 sys_current_fiber_id(void); -/* Cooperative yield to give other jobs of the same priority level a chance to run */ -void sys_yield(void); - /* ========================== * * Counter * ========================== */ diff --git a/src/sys_win32.c b/src/sys_win32.c index 5d5a94a3..710353ad 100644 --- a/src/sys_win32.c +++ b/src/sys_win32.c @@ -190,7 +190,6 @@ STATIC_ASSERT(alignof(struct counter) == 64); /* Avoid false sharing */ enum yield_kind { YIELD_KIND_NONE, YIELD_KIND_DONE, - YIELD_KIND_COOPERATIVE, YIELD_KIND_WAIT, NUM_YIELD_KINDS @@ -381,7 +380,7 @@ GLOBAL struct { INTERNAL struct fiber *fiber_from_id(i32 id); -INTERNAL void fiber_yield(struct fiber *fiber, struct fiber *parent_fiber); +INTERNAL void job_fiber_yield(struct fiber *fiber, struct fiber *parent_fiber); @@ -404,7 +403,7 @@ INTERNAL void fiber_yield(struct fiber *fiber, struct fiber *parent_fiber); void sys_wait(void *addr, void *cmp, u32 size) { - __prof; + //__prof; #if 0 WaitOnAddress(addr, cmp, size, INFINITE); #else @@ -412,7 +411,6 @@ void sys_wait(void *addr, void *cmp, u32 size) i32 parent_fiber_id = fiber->parent_id; /* Yield if job fiber, otherwise fall back to windows blocking function */ if (parent_fiber_id > 0) { -#if 1 /* Yield if job fiber */ *fiber->yield_param = (struct yield_param) { .kind = YIELD_KIND_WAIT, @@ -423,12 +421,7 @@ void sys_wait(void *addr, void *cmp, u32 size) } }; struct fiber *parent_fiber = fiber_from_id(parent_fiber_id); - fiber_yield(fiber, parent_fiber); -#else - while (MEMEQ(addr, cmp, size)) { - ix_pause(); - } -#endif + job_fiber_yield(fiber, parent_fiber); } else { WaitOnAddress(addr, cmp, size, INFINITE); } @@ -688,32 +681,6 @@ i32 sys_current_fiber_id(void) return (i32)(i64)GetFiberData(); } -INTERNAL void fiber_yield(struct fiber *fiber, struct fiber *parent_fiber) -{ - ASSERT(fiber->id == sys_current_fiber_id()); - ASSERT(parent_fiber->id == fiber->parent_id); - if (parent_fiber->id <= 0) { - sys_panic(LIT("A top level fiber tried to yield")); - } - { - __prof_fiber_leave; - SwitchToFiber(parent_fiber->addr); - __prof_fiber_enter(fiber->name_cstr, PROF_THREAD_GROUP_FIBERS + fiber->id); - } -} - -void sys_yield(void) -{ - /* TODO: Don't yield if job queue is empty */ - struct fiber *fiber = fiber_from_id(sys_current_fiber_id()); - i32 parent_id = fiber->parent_id; - if (parent_id > 0) { /* Top level fibers should not yield */ - struct fiber *parent_fiber = fiber_from_id(parent_id); - fiber->yield_param->kind = YIELD_KIND_COOPERATIVE; - fiber_yield(fiber, parent_fiber); - } -} - void sys_run(i32 count, sys_job_func *func, void *sig, enum sys_priority priority, struct sys_counter *counter) { struct counter *job_counter = (struct counter *)counter; @@ -752,36 +719,66 @@ void sys_run(i32 count, sys_job_func *func, void *sig, enum sys_priority priorit } /* ========================== * - * Job fiber func + * Job fiber control + * ========================== */ + +INTERNAL void job_fiber_yield(struct fiber *fiber, struct fiber *parent_fiber) +{ + (UNUSED)fiber; + ASSERT(fiber->id == sys_current_fiber_id()); + ASSERT(parent_fiber->id == fiber->parent_id); + ASSERT(parent_fiber->id > 0); + { + __prof_fiber_leave(); + MemoryBarrier(); + SwitchToFiber(parent_fiber->addr); + MemoryBarrier(); + __prof_fiber_enter(fiber->name_cstr, PROF_THREAD_GROUP_FIBERS + fiber->id); + } +} + +INTERNAL void job_fiber_resume(struct fiber *fiber) +{ + MemoryBarrier(); + SwitchToFiber(fiber->addr); + MemoryBarrier(); +} + +/* ========================== * + * Job fiber entry * ========================== */ INTERNAL void job_fiber_entry(void *id_ptr) { i32 id = (i32)(i64)id_ptr; struct fiber *fiber = fiber_from_id(id); + __prof_fiber_enter(fiber->name_cstr, PROF_THREAD_GROUP_FIBERS + fiber->id); while (true) { - __prof_fiber_enter(fiber->name_cstr, PROF_THREAD_GROUP_FIBERS + fiber->id); /* Run job */ { - __profscope(Run job); - fiber->yield_param->kind = YIELD_KIND_NONE; + //__profscope(Run job); + volatile struct yield_param *yield_param = fiber->yield_param; + yield_param->kind = YIELD_KIND_NONE; struct sys_job_data data = ZI; data.id = fiber->job_id; data.sig = fiber->job_sig; - fiber->job_func(data); - fiber->yield_param->kind = YIELD_KIND_DONE; + { + MemoryBarrier(); + fiber->job_func(data); + MemoryBarrier(); + } } - __prof_fiber_leave; { - i32 parent_id = fiber->parent_id; - struct fiber *parent_fiber = fiber_from_id(parent_id); - SwitchToFiber(parent_fiber->addr); + volatile struct yield_param *yield_param = fiber->yield_param; + yield_param->kind = YIELD_KIND_DONE; + struct fiber *parent_fiber = fiber_from_id(fiber->parent_id); + job_fiber_yield(fiber, parent_fiber); } } } /* ========================== * - * Test workers + * Worker entry * ========================== */ INTERNAL SYS_THREAD_DEF(worker_entry, worker_ctx_arg) @@ -871,7 +868,7 @@ INTERNAL SYS_THREAD_DEF(worker_entry, worker_ctx_arg) /* Run fiber */ if (job_func) { - __profscope(Run fiber); + //__profscope(Run fiber); if (!job_fiber) { job_fiber = fiber_alloc(FIBER_KIND_JOB_WORKER); } @@ -884,7 +881,7 @@ INTERNAL SYS_THREAD_DEF(worker_entry, worker_ctx_arg) job_fiber->yield_param = &yield; b32 done = false; while (!done) { - SwitchToFiber(job_fiber->addr); + job_fiber_resume(job_fiber); switch (yield.kind) { default: { @@ -991,20 +988,18 @@ INTERNAL SYS_THREAD_DEF(worker_entry, worker_ctx_arg) } } +/* ========================== * + * Test entry + * ========================== */ + INTERNAL SYS_THREAD_DEF(test_entry, _) { struct arena_temp scratch = scratch_begin_no_conflict(); (UNUSED)_; - /* Init job queues */ - for (u32 i = 0; i < countof(G.job_queues); ++i) { - struct job_queue *queue = &G.job_queues[i]; - queue->kind = (enum job_queue_kind)i; - queue->arena = arena_alloc(GIGABYTE(64)); - } - /* Start workers */ G.num_worker_threads = 6; + //G.num_worker_threads = 2; G.worker_threads_arena = arena_alloc(GIGABYTE(64)); G.worker_threads = arena_push_array(G.worker_threads_arena, struct sys_thread *, G.num_worker_threads); G.worker_contexts = arena_push_array(G.worker_threads_arena, struct worker_ctx, G.num_worker_threads); @@ -1047,7 +1042,7 @@ struct sys_scratch_ctx *sys_scratch_ctx_from_fiber_id(i32 id) struct fiber_ctx *fiber_ctx = fiber_ctx_from_id(id); struct sys_scratch_ctx *scratch_ctx = &fiber_ctx->scratch_ctx; if (!scratch_ctx->arenas[0]) { - __profscope(Initialize scratch context); + //__profscope(Initialize scratch context); for (u32 i = 0; i < countof(scratch_ctx->arenas); ++i) { scratch_ctx->arenas[i] = arena_alloc(GIGABYTE(64)); } @@ -2539,6 +2534,7 @@ void sys_mutex_release(struct sys_mutex *mutex) struct sys_lock sys_mutex_lock_e(struct sys_mutex *mutex) { + __prof; struct win32_mutex *m = (struct win32_mutex *)mutex; { while (true) { @@ -2564,6 +2560,7 @@ struct sys_lock sys_mutex_lock_e(struct sys_mutex *mutex) struct sys_lock sys_mutex_lock_s(struct sys_mutex *mutex) { + __prof; struct win32_mutex *m = (struct win32_mutex *)mutex; { while (true) { @@ -2585,6 +2582,7 @@ struct sys_lock sys_mutex_lock_s(struct sys_mutex *mutex) void sys_mutex_unlock(struct sys_lock *lock) { + __prof; struct win32_mutex *m = (struct win32_mutex *)lock->mutex; if (lock->exclusive) { //LONG old = m->state; @@ -2736,13 +2734,13 @@ INTERNAL void win32_thread_release(struct win32_thread *t) INTERNAL DWORD WINAPI win32_thread_proc(LPVOID vt) { + fiber_alloc(FIBER_KIND_CONVERTED_THREAD); + struct win32_thread *t = (struct win32_thread *)vt; __profthread(t->thread_name_cstr, t->profiler_group); - fiber_alloc(FIBER_KIND_CONVERTED_THREAD); - /* Initialize COM */ - CoInitializeEx(NULL, COINIT_APARTMENTTHREADED); + CoInitializeEx(NULL, COINIT_MULTITHREADED); /* Set thread name */ if (t->thread_name_wstr[0] != 0) { @@ -2944,7 +2942,7 @@ struct string sys_get_clipboard_text(struct arena *arena) } /* ========================== * - * RNG + * Util * ========================== */ void sys_true_rand(struct string b) @@ -3193,6 +3191,13 @@ int CALLBACK wWinMain(_In_ HINSTANCE instance, _In_opt_ HINSTANCE prev_instance, /* Init counters */ G.counters_arena = arena_alloc(GIGABYTE(64)); + /* Init job queues */ + for (u32 i = 0; i < countof(G.job_queues); ++i) { + struct job_queue *queue = &G.job_queues[i]; + queue->kind = (enum job_queue_kind)i; + queue->arena = arena_alloc(GIGABYTE(64)); + } + /* Convert main thread to fiber */ fiber_alloc(FIBER_KIND_CONVERTED_THREAD);