diff --git a/build.c b/build.c index 91867798..9959084b 100644 --- a/build.c +++ b/build.c @@ -22,7 +22,6 @@ Bool arg_crtlib = false; Bool arg_debinfo = false; Bool arg_developer = false; Bool arg_profiling = false; -Bool arg_profiler_sampling = false; Bool arg_unoptimized = false; /* ========================== * @@ -359,7 +358,6 @@ void OnBuild(StringList cli_args) if (StringEqual(arg, Lit("-debinfo"))) arg_debinfo = true; if (StringEqual(arg, Lit("-developer"))) arg_developer = true; if (StringEqual(arg, Lit("-profiling"))) arg_profiling = true; - if (StringEqual(arg, Lit("-sampling"))) arg_profiler_sampling = true; if (StringEqual(arg, Lit("-unoptimized"))) arg_unoptimized = true; } break; } @@ -404,7 +402,6 @@ void OnBuild(StringList cli_args) SH_PrintF(Lit("[%F]\n"), FmtStr(compiler)); if (arg_asan) SH_Print(Lit("[Asan Enabled]\n")); if (arg_profiling) SH_Print(Lit("[Profiling]\n")); - if (arg_profiler_sampling) SH_Print(Lit("[Profiler sampling]\n")); if (arg_developer) SH_Print(Lit("[Developer build]\n")); SH_Print(Lit("------------------------------\n\n")); } @@ -594,12 +591,6 @@ void OnBuild(StringList cli_args) } StringListAppend(&perm, &compile_args, StringF(&perm, Lit("-DTRACY_INCLUDE_PATH=\"%F\""), FmtStr(tracy_include_path))); - /* Tracy flags */ - StringListAppend(&perm, &compile_args, Lit("-DTRACY_ENABLE=1")); - if (!arg_profiler_sampling) { - StringListAppend(&perm, &compile_args, Lit("-DTRACY_NO_SAMPLING -DTRACY_NO_SYSTEM_TRACING -DTRACY_NO_CALLSTACK")); - } - /* Disable compiler warnings when compiling tracy client */ compile_warnings = (StringList) { 0 }; link_warnings = (StringList) { 0 }; @@ -607,14 +598,6 @@ void OnBuild(StringList cli_args) StringListAppend(&perm, &link_warnings, Lit("-Wno-everything")); } - /* Profiler sampling */ - if (arg_profiler_sampling) { - if (!arg_profiling) { - Error(Lit("Profiling must be enabled to use profiler sampling")); - OS_Exit(1); - } - } - if (!arg_msvc) { String incbin_dir = StringReplace(&perm, out_inc_dir_path, Lit("\\"), Lit("/")); StringListAppend(&perm, &compile_args, StringF(&perm, Lit("-DINCBIN_DIR_RAW=\"%F\""), FmtStr(incbin_dir))); diff --git a/src/app.c b/src/app.c index 7434b16d..c9dcc024 100644 --- a/src/app.c +++ b/src/app.c @@ -3,10 +3,9 @@ #include "string.h" #include "scratch.h" #include "sys.h" -#include "work.h" +#include "job.h" #include "user.h" #include "sim.h" -#include "sim.h" #include "playback.h" #include "log.h" #include "resource.h" @@ -248,8 +247,8 @@ void app_entry_point(struct string args_str) i32 num_logical_cores = (i32)sys_num_logical_processors(); //num_logical_cores = min(num_logical_cores, 8) + (max(num_logical_cores - 8, 0) / 2); /* Dumb heuristic to try and lessen e-core usage */ - i32 min_worker_count = 2; - i32 max_worker_count = 128; + i32 min_worker_count = JOB_MIN_WORKER_COUNT; + i32 max_worker_count = JOB_MAX_WORKER_COUNT; i32 target_worker_count = num_logical_cores - num_reserved_cores; worker_count = (u32)clamp_i32(target_worker_count, min_worker_count, max_worker_count); #endif @@ -319,20 +318,20 @@ void app_entry_point(struct string args_str) } /* Startup systems */ + job_startup(worker_count); + struct resource_startup_receipt resource_sr = resource_startup(); struct sock_startup_receipt sock_sr = sock_startup(); struct host_startup_receipt host_sr = host_startup(&sock_sr); - struct resource_startup_receipt resource_sr = resource_startup(); - struct work_startup_receipt work_sr = work_startup(worker_count); - struct gp_startup_receipt gp_sr = gp_startup(&work_sr); - struct asset_cache_startup_receipt asset_cache_sr = asset_cache_startup(&work_sr); + struct gp_startup_receipt gp_sr = gp_startup(); + struct asset_cache_startup_receipt asset_cache_sr = asset_cache_startup(); struct ttf_startup_receipt ttf_sr = ttf_startup(); - struct font_startup_receipt font_sr = font_startup(&work_sr, &gp_sr, &asset_cache_sr, &ttf_sr, &resource_sr); + struct font_startup_receipt font_sr = font_startup(&gp_sr, &asset_cache_sr, &ttf_sr, &resource_sr); struct sprite_startup_receipt sprite_sr = sprite_startup(&gp_sr, &resource_sr); struct mixer_startup_receipt mixer_sr = mixer_startup(); - struct sound_startup_receipt sound_sr = sound_startup(&work_sr, &asset_cache_sr, &resource_sr); + struct sound_startup_receipt sound_sr = sound_startup(&asset_cache_sr, &resource_sr); struct draw_startup_receipt draw_sr = draw_startup(&gp_sr, &font_sr); struct sim_startup_receipt sim_sr = sim_startup(); - struct user_startup_receipt user_sr = user_startup(&work_sr, &gp_sr, &font_sr, &sprite_sr, &draw_sr, &asset_cache_sr, &sound_sr, &mixer_sr, &host_sr, &sim_sr, connect_address, window); + struct user_startup_receipt user_sr = user_startup(&gp_sr, &font_sr, &sprite_sr, &draw_sr, &asset_cache_sr, &sound_sr, &mixer_sr, &host_sr, &sim_sr, connect_address, window); struct playback_startup_receipt playback_sr = playback_startup(&mixer_sr); (UNUSED)user_sr; diff --git a/src/asset_cache.c b/src/asset_cache.c index 075b7c41..2b15fa6d 100644 --- a/src/asset_cache.c +++ b/src/asset_cache.c @@ -5,8 +5,8 @@ #include "arena.h" #include "scratch.h" #include "util.h" -#include "work.h" #include "log.h" +#include "job.h" /* ========================== * * Global state @@ -35,10 +35,8 @@ GLOBAL struct { * Startup * ========================== */ -struct asset_cache_startup_receipt asset_cache_startup(struct work_startup_receipt *work_sr) +struct asset_cache_startup_receipt asset_cache_startup(void) { - (UNUSED)work_sr; - /* Init lookup */ G.lookup_mutex = sys_mutex_alloc(); /* Init store */ @@ -152,7 +150,7 @@ struct asset *asset_cache_touch(struct string key, u64 hash, b32 *is_first_touch .status = ASSET_STATUS_UNINITIALIZED, .hash = hash, .key = key_stored, - .work_ready_sf = sync_flag_alloc(), + .job_ready_sf = sync_flag_alloc(), .asset_ready_sf = sync_flag_alloc() }; if (is_first_touch) { @@ -173,13 +171,13 @@ struct asset *asset_cache_touch(struct string key, u64 hash, b32 *is_first_touch * Marking * ========================== */ -/* Call this once asset work has been created */ +/* Call this once asset job has been created */ void asset_cache_mark_loading(struct asset *asset) { asset->status = ASSET_STATUS_LOADING; } -/* Call this once asset work has finished */ +/* Call this once asset job has finished */ void asset_cache_mark_ready(struct asset *asset, void *store_data) { asset->store_data = store_data; @@ -189,24 +187,23 @@ void asset_cache_mark_ready(struct asset *asset, void *store_data) } /* ========================== * - * Work + * Job * ========================== */ -/* NOTE: If an asset doesn't have any load work then call this function with `NULL` */ -void asset_cache_set_work(struct asset *asset, struct work_handle *handle) +void asset_cache_set_job(struct asset *asset, struct job_handle *job) { - asset->work = handle ? *handle : (struct work_handle) { 0 }; - sync_flag_set(&asset->work_ready_sf); + asset->job = job ? *job : (struct job_handle) { 0 }; + sync_flag_set(&asset->job_ready_sf); } void asset_cache_wait(struct asset *asset) { if (asset->status != ASSET_STATUS_READY) { - /* Wait for work to be set */ - sync_flag_wait(&asset->work_ready_sf); - /* Help with work */ - if (asset->work.gen != 0) { - work_help(asset->work); + /* Wait for job to be set */ + sync_flag_wait(&asset->job_ready_sf); + /* Wait on job */ + if (asset->job.gen != 0) { + job_wait(asset->job); } /* Wait for asset to be ready */ sync_flag_wait(&asset->asset_ready_sf); diff --git a/src/asset_cache.h b/src/asset_cache.h index e83cbbab..668432c3 100644 --- a/src/asset_cache.h +++ b/src/asset_cache.h @@ -2,10 +2,8 @@ #define ASSET_CACHE_H #include "sys.h" -#include "work.h" #include "util.h" - -struct work_startup_receipt; +#include "job.h" enum asset_status { ASSET_STATUS_NONE, @@ -21,9 +19,9 @@ struct asset { u64 hash; struct string key; - /* Managed via asset_cache_set_work */ - struct work_handle work; - struct sync_flag work_ready_sf; + /* Managed via asset_cache_set_job */ + struct job_handle job; + struct sync_flag job_ready_sf; /* Managed via asset_cache_mark_x functions */ enum asset_status status; @@ -41,14 +39,14 @@ struct asset_cache_store { }; struct asset_cache_startup_receipt { i32 _; }; -struct asset_cache_startup_receipt asset_cache_startup(struct work_startup_receipt *work_sr); +struct asset_cache_startup_receipt asset_cache_startup(void); struct asset *asset_cache_touch(struct string key, u64 hash, b32 *is_first_touch); void asset_cache_mark_loading(struct asset *asset); void asset_cache_mark_ready(struct asset *asset, void *store_data); -void asset_cache_set_work(struct asset *asset, struct work_handle *handle); +void asset_cache_set_job(struct asset *asset, struct job_handle *job); void asset_cache_wait(struct asset *asset); void *asset_cache_get_store_data(struct asset *asset); diff --git a/src/common.h b/src/common.h index 78b34fd2..68cc23f0 100644 --- a/src/common.h +++ b/src/common.h @@ -626,129 +626,18 @@ INLINE i64 clamp_i64(i64 v, i64 min, i64 max) { return v < min ? min : v > max ? INLINE f32 clamp_f32(f32 v, f32 min, f32 max) { return v < min ? min : v > max ? max : v; } INLINE f64 clamp_f64(f64 v, f64 min, f64 max) { return v < min ? min : v > max ? max : v; } -/* ========================== * - * Profiling - * ========================== */ - -#if PROFILING - -#include STRINGIZE(TRACY_INCLUDE_PATH) - -#define PROFILING_CAPTURE_FRAME_IMAGE 0 - -/* Clang/GCC cleanup macros */ -#if COMPILER_MSVC -# error "MSVC not supported for profiling (cleanup attributes are required for profiling markup)" -#else -# ifdef TRACY_NO_CALLSTACK -# define __prof static const struct ___tracy_source_location_data CAT(__tracy_source_location,__LINE__) = { NULL, __func__, __FILE__, (uint32_t)__LINE__, 0 }; __attribute((cleanup(__prof_zone_cleanup_func))) TracyCZoneCtx __tracy_zone_ctx = ___tracy_emit_zone_begin( &CAT(__tracy_source_location,__LINE__), true ); -# define __profscope(name) static const struct ___tracy_source_location_data CAT(__tracy_source_location,__LINE__) = { #name, __func__, __FILE__, (uint32_t)__LINE__, 0 }; __attribute((cleanup(__prof_zone_cleanup_func))) TracyCZoneCtx __tracy_zone_ctx = ___tracy_emit_zone_begin( &CAT(__tracy_source_location,__LINE__), true ); -# else -# define __prof static const struct ___tracy_source_location_data CAT(__tracy_source_location,__LINE__) = { NULL, __func__, __FILE__, (uint32_t)__LINE__, 0 }; __attribute((cleanup(__prof_zone_cleanup_func))) TracyCZoneCtx __tracy_zone_ctx = ___tracy_emit_zone_begin_callstack( &CAT(__tracy_source_location,__LINE__), TRACY_CALLSTACK, true ); -# define __profscope(name) static const struct ___tracy_source_location_data CAT(__tracy_source_location,__LINE__) = { #name, __func__, __FILE__, (uint32_t)__LINE__, 0 }; __attribute((cleanup(__prof_zone_cleanup_func))) TracyCZoneCtx __tracy_zone_ctx = ___tracy_emit_zone_begin_callstack( &CAT(__tracy_source_location,__LINE__), TRACY_CALLSTACK, true ); -# endif -# define __profscope_dx11(dx11_ctx, name, color) static const struct ___tracy_source_location_data CAT(__tracy_gpu_d3d11_source_location,__LINE__) = { #name, __func__, __FILE__, (uint32_t)__LINE__, BGR32(color) }; __attribute((cleanup(__prof_dx11_zone_cleanup_func))) TracyCD3D11ZoneCtx __tracy_d3d11_zone_ctx; ___tracy_d3d11_emit_zone_begin( dx11_ctx, &__tracy_d3d11_zone_ctx, &CAT(__tracy_gpu_d3d11_source_location,__LINE__), true); -# define __profscope_dx12(dx12_ctx, cmd_list, name, color) static const struct ___tracy_source_location_data CAT(__tracy_gpu_d3d12_source_location,__LINE__) = { #name, __func__, __FILE__, (uint32_t)__LINE__, BGR32(color) }; __attribute((cleanup(__prof_dx12_zone_cleanup_func))) TracyCD3D12ZoneCtx __tracy_d3d12_zone_ctx; ___tracy_d3d12_emit_zone_begin( dx12_ctx, cmd_list, &__tracy_d3d12_zone_ctx, &CAT(__tracy_gpu_d3d12_source_location,__LINE__), true); -#endif -INLINE void __prof_zone_cleanup_func(TracyCZoneCtx *ctx) { TracyCZoneEnd(*ctx); } -INLINE void __prof_dx11_zone_cleanup_func(TracyCD3D11ZoneCtx *ctx) { ___tracy_d3d11_emit_zone_end(*ctx); } -INLINE void __prof_dx12_zone_cleanup_func(TracyCD3D12ZoneCtx *ctx) { ___tracy_d3d12_emit_zone_end(*ctx); } - -#define __profalloc(ptr, size) TracyCAlloc((ptr), (size)) -#define __proffree(ptr) TracyCFree((ptr)) -#define __profmsg(txt, len, col) TracyCMessageC((txt), (len), BGR32(col)); -#define __profframe(name) TracyCFrameMarkNamed((name)) -#define __profthread(name) TracyCSetThreadName((name)) - -#define __proflock_ctx TracyCSharedLockCtx -#define __proflock_alloc(ctx) TracyCSharedLockAnnounce((ctx)) -#define __proflock_release(ctx) TracyCSharedLockTerminate((ctx)) -#define __proflock_before_exclusive_lock(ctx) TracyCSharedLockBeforeExclusiveLock((ctx)) -#define __proflock_after_exclusive_lock(ctx) TracyCSharedLockAfterExclusiveLock((ctx)) -#define __proflock_after_exclusive_unlock(ctx) TracyCSharedLockAfterExclusiveUnlock((ctx)) -#define __proflock_after_try_exclusive_lock(ctx, acquired) TracyCSharedLockAfterTryExclusiveLock((ctx), (acquired)) -#define __proflock_before_shared_lock(ctx) TracyCSharedLockBeforeSharedLock((ctx)) -#define __proflock_after_shared_lock(ctx) TracyCSharedLockAfterSharedLock((ctx)) -#define __proflock_after_shared_unlock(ctx) TracyCSharedLockAfterSharedUnlock((ctx)) -#define __proflock_after_try_shared_lock(ctx, acquired) TracyCSharedLockAfterTrySharedLock((ctx), (acquired)) -#define __proflock_mark(ctx) TracyCSharedLockMark((ctx)) -#define __proflock_custom_name(ctx, name, len) TracyCSharedLockCustomName((ctx), (name), (len)) - -#define __prof_dx11_ctx TracyCD3D11Ctx -#define __prof_dx11_ctx_alloc(ctx, device, device_ctx, name, name_len) ctx = ___tracy_d3d11_context_announce(device, device_ctx, name, name_len) -#define __prof_dx11_ctx_release(ctx) ___tracy_d3d11_context_terminate(ctx) -#define __prof_dx11_collect(ctx) ___tracy_d3d11_context_collect(ctx) - -#define __prof_dx12_ctx TracyCD3D12Ctx -#define __prof_dx12_ctx_alloc(ctx, device, queue, name, name_len) ctx = ___tracy_d3d12_context_announce(device, queue, name, name_len) -#define __prof_dx12_ctx_release(ctx) ___tracy_d3d12_context_terminate(ctx) -#define __prof_dx12_new_frame(ctx) ___tracy_d3d12_context_new_frame(ctx) -#define __prof_dx12_collect(ctx) ___tracy_d3d12_context_collect(ctx) - -enum __prof_plot_type { - __prof_plot_type_number = TracyPlotFormatNumber, - __prof_plot_type_memory = TracyPlotFormatMemory, - __prof_plot_type_percentage = TracyPlotFormatPercentage, - __prof_plot_type_watt = TracyPlotFormatWatt -}; -#define __prof_plot_init(name, type, step, fill, color) TracyCPlotConfig(name, type, step, fill, BGR32(color)) -#define __prof_plot(name, val) TracyCPlot(name, val) -#define __prof_plot_i(name, val) TracyCPlotI(name, val) - -#if PROFILING_CAPTURE_FRAME_IMAGE -# define __profframeimage(image, width, height, offset, flipped) TracyCFrameImage((image), (width), (height), (offset), (flipped)); -#else -# define __profframeimage(image, width, height, offset, flipped) -#endif /* PROFILING_CAPTURE_FRAME_IMAGE */ - -#else - -#define PROFILING_CAPTURE_FRAME_IMAGE 0 - -#define __prof -#define __profscope(name) -#define __profscope_dx11(dx11_ctx, name, color) -#define __profscope_dx12(dx11_ctx, queue, name, color) -#define __profalloc(ptr, size) -#define __proffree(ptr) -#define __profmsg(txt, len, col) -#define __profframe(name) -#define __profthread(name) -#define __profframeimage(image, width, height, offset, flipped) -#define __proflock_ctx -#define __proflock_alloc(ctx) -#define __proflock_release(ctx) -#define __proflock_before_exclusive_lock(ctx) -#define __proflock_after_exclusive_lock(ctx) -#define __proflock_after_exclusive_unlock(ctx) -#define __proflock_after_try_exclusive_lock(ctx, acquired) -#define __proflock_before_shared_lock(ctx) -#define __proflock_after_shared_lock(ctx) -#define __proflock_after_shared_unlock(ctx) -#define __proflock_after_try_shared_lock(ctx, acquired) -#define __proflock_mark(ctx) -#define __proflock_custom_name(ctx, name, len) -#define __prof_dx11_ctx -#define __prof_dx11_ctx_alloc(ctx, device, device_ctx, name, name_len) -#define __prof_dx11_ctx_release(ctx) -#define __prof_dx11_collect(ctx) -#define __prof_dx12_ctx -#define __prof_dx12_ctx_alloc(ctx, device, queue, name, name_len) -#define __prof_dx12_ctx_release(ctx) -#define __prof_dx12_new_frame(ctx) -#define __prof_dx12_collect(ctx) -#define __prof_plot_init(name, type, step, fill, color) -#define __prof_plot(name, val) -#define __prof_plot_i(name, val) - -#endif /* PROFILING */ - /* ========================== * * Configurable constants * ========================== */ #include "config.h" +/* ========================== * + * Profiling + * ========================== */ + +#include "prof_tracy.h" + #ifdef __cplusplus } #endif diff --git a/src/font.c b/src/font.c index 6ed42bb8..3978cbd8 100644 --- a/src/font.c +++ b/src/font.c @@ -1,7 +1,7 @@ #include "font.h" #include "arena.h" #include "ttf.h" -#include "work.h" +#include "job.h" #include "scratch.h" #include "asset_cache.h" #include "resource.h" @@ -41,13 +41,11 @@ GLOBAL struct { * Startup * ========================== */ -struct font_startup_receipt font_startup(struct work_startup_receipt *work_sr, - struct gp_startup_receipt *gp_sr, +struct font_startup_receipt font_startup(struct gp_startup_receipt *gp_sr, struct asset_cache_startup_receipt *asset_cache_sr, struct ttf_startup_receipt *ttf_sr, struct resource_startup_receipt *resource_sr) { - (UNUSED)work_sr; (UNUSED)gp_sr; (UNUSED)asset_cache_sr; (UNUSED)ttf_sr; @@ -91,12 +89,12 @@ INTERNAL void font_task_params_release(struct font_task_params *p) * Load * ========================== */ -INTERNAL WORK_TASK_FUNC_DEF(font_load_asset_task, vparams) +INTERNAL JOB_DEF(font_load_asset_job, job) { __prof; struct arena_temp scratch = scratch_begin_no_conflict(); - struct font_task_params *params = (struct font_task_params *)vparams; + struct font_task_params *params = job.sig; struct string path = STRING(params->path_len, (u8 *)params->path_cstr); f32 point_size = params->point_size; struct asset *asset = params->asset; @@ -163,7 +161,7 @@ INTERNAL WORK_TASK_FUNC_DEF(font_load_asset_task, vparams) } /* Returns the asset from the asset cache */ -struct asset *font_load_asset(struct string path, f32 point_size, b32 help) +struct asset *font_load_asset(struct string path, f32 point_size, b32 wait) { __prof; struct arena_temp scratch = scratch_begin_no_conflict(); @@ -192,13 +190,13 @@ struct asset *font_load_asset(struct string path, f32 point_size, b32 help) /* Push task */ asset_cache_mark_loading(asset); - struct work_handle wh = ZI; - if (help) { - wh = work_push_task_and_help(&font_load_asset_task, params, WORK_PRIORITY_NORMAL); + if (wait) { + job_dispatch_wait(1, font_load_asset_job, params); + asset_cache_set_job(asset, NULL); } else { - wh = work_push_task(&font_load_asset_task, params, WORK_PRIORITY_NORMAL); + struct job_handle job = job_dispatch_async(1, font_load_asset_job, params); + asset_cache_set_job(asset, &job); } - asset_cache_set_work(asset, &wh); } scratch_end(scratch); diff --git a/src/font.h b/src/font.h index 193fb1c5..cef1c00d 100644 --- a/src/font.h +++ b/src/font.h @@ -5,7 +5,6 @@ #include "gp.h" struct asset; -struct work_startup_receipt; struct gp_startup_receipt; struct asset_cache_startup_receipt; struct ttf_startup_receipt; @@ -31,13 +30,12 @@ struct font { }; struct font_startup_receipt { i32 _; }; -struct font_startup_receipt font_startup(struct work_startup_receipt *work_sr, - struct gp_startup_receipt *gp_sr, +struct font_startup_receipt font_startup(struct gp_startup_receipt *gp_sr, struct asset_cache_startup_receipt *asset_cache_sr, struct ttf_startup_receipt *ttf_sr, struct resource_startup_receipt *resource_sr); -struct asset *font_load_asset(struct string path, f32 point_size, b32 help); +struct asset *font_load_asset(struct string path, f32 point_size, b32 wait); struct font *font_load_async(struct string path, f32 point_size); struct font *font_load(struct string path, f32 point_size); diff --git a/src/gp.h b/src/gp.h index e4dab4f3..97821baa 100644 --- a/src/gp.h +++ b/src/gp.h @@ -2,14 +2,13 @@ #define GP_H struct sys_window; -struct work_startup_receipt; /* ========================== * * Startup * ========================== */ struct gp_startup_receipt { i32 _; }; -struct gp_startup_receipt gp_startup(struct work_startup_receipt *work_sr); +struct gp_startup_receipt gp_startup(void); /* ========================== * * Resource diff --git a/src/gp_dx11.c b/src/gp_dx11.c index 07522aaa..b10760fd 100644 --- a/src/gp_dx11.c +++ b/src/gp_dx11.c @@ -223,8 +223,8 @@ struct dx11_shader_desc { GLOBAL struct { struct arena *arena; -#if PROFILING - struct __prof_dx11_ctx *profiling_ctx; +#if PROFILING_D3D + __prof_dx11_ctx(profiling_ctx); #endif ID3D11Device *dev; @@ -2007,7 +2007,7 @@ void gp_present(struct sys_window *window, struct v2i32 backbuffer_resolution, s * ========================== */ /* FIXME: enable this */ -#if PROFILING && PROFILING_CAPTURE_FRAME_IMAGE +#if PROFILING_CAPTURE_FRAME_IMAGE #define CAP_WIDTH 320 #define CAP_HEIGHT 180 diff --git a/src/gp_dx12.c b/src/gp_dx12.c index 65422fcd..16aa3120 100644 --- a/src/gp_dx12.c +++ b/src/gp_dx12.c @@ -7,7 +7,7 @@ #include "string.h" #include "scratch.h" #include "app.h" -#include "work.h" +#include "job.h" #include "log.h" #include "resource.h" #include "atomic.h" @@ -36,7 +36,7 @@ #pragma comment(lib, "dxguid") #pragma comment(lib, "d3dcompiler") -#if PROFILING +#if PROFILING_D3D /* For RegOpenKeyEx */ # include # pragma comment(lib, "advapi32") @@ -61,13 +61,13 @@ #if DX12_MULTI_QUEUE # define DX12_QUEUE_DIRECT 0 # define DX12_QUEUE_COMPUTE 1 -# define DX12_QUEUE_COPY_CRITICAL 2 +# define DX12_QUEUE_COPY 2 # define DX12_QUEUE_COPY_BACKGROUND 3 # define DX12_NUM_QUEUES 4 #else # define DX12_QUEUE_DIRECT 0 # define DX12_QUEUE_COMPUTE 0 -# define DX12_QUEUE_COPY_CRITICAL 0 +# define DX12_QUEUE_COPY 0 # define DX12_QUEUE_COPY_BACKGROUND 0 # define DX12_NUM_QUEUES 1 #endif @@ -142,8 +142,8 @@ struct command_queue { struct command_list_pool *cl_pool; -#if PROFILING - struct __prof_dx12_ctx *prof; +#if PROFILING_D3D + __prof_dx12_ctx(prof); #endif }; @@ -344,17 +344,16 @@ INTERNAL void dx12_init_pipelines(void); INTERNAL struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type); INTERNAL struct command_queue *command_queue_alloc(enum D3D12_COMMAND_LIST_TYPE type, enum D3D12_COMMAND_QUEUE_PRIORITY priority, struct string dbg_name); INTERNAL void command_queue_release(struct command_queue *cq); -INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(evictor_thread_entry_point, arg); +INTERNAL SYS_THREAD_DEF(evictor_thread_entry_point, arg); INTERNAL void fenced_release(void *data, enum fenced_release_kind kind); #if RESOURCE_RELOADING INTERNAL RESOURCE_WATCH_CALLBACK_FUNC_DEF(pipeline_resource_watch_callback, name); #endif -struct gp_startup_receipt gp_startup(struct work_startup_receipt *work_sr) +struct gp_startup_receipt gp_startup(void) { __prof; - (UNUSED)work_sr; /* Initialize command descriptor heaps pool */ G.command_descriptor_heaps_mutex = sys_mutex_alloc(); @@ -535,7 +534,7 @@ INTERNAL void dx12_init_device(void) } #endif -#if PROFILING +#if PROFILING_D3D /* Enable stable power state */ { b32 success = true; @@ -601,8 +600,8 @@ INTERNAL void dx12_init_objects(void) G.command_queues[i] = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, LIT("Direct queue")); } else if (i == DX12_QUEUE_COMPUTE) { G.command_queues[i] = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_COMPUTE, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, LIT("Compute queue")); - } else if (i == DX12_QUEUE_COPY_CRITICAL) { - G.command_queues[i] = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_COPY, D3D12_COMMAND_QUEUE_PRIORITY_HIGH, LIT("High priority copy queue")); + } else if (i == DX12_QUEUE_COPY) { + G.command_queues[i] = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_COPY, D3D12_COMMAND_QUEUE_PRIORITY_HIGH, LIT("Copy queue")); } else if (i == DX12_QUEUE_COPY_BACKGROUND) { G.command_queues[i] = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_COPY, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, LIT("Background copy queue")); } @@ -613,7 +612,7 @@ INTERNAL void dx12_init_objects(void) * Dx12 pipeline initialization * ========================== */ -INTERNAL void pipeline_alloc_from_desc(u64 num_pipelines, struct pipeline_desc *descs, struct pipeline **pipelines_out); +INTERNAL void pipeline_alloc(u64 num_pipelines, struct pipeline_desc *descs_in, struct pipeline **pipelines_out); INTERNAL void pipeline_register(u64 num_pipelines, struct pipeline **pipelines); INTERNAL void dx12_init_pipelines(void) @@ -666,7 +665,7 @@ INTERNAL void dx12_init_pipelines(void) ++num_pipelines; } struct pipeline **pipelines = arena_push_array(scratch.arena, struct pipeline *, num_pipelines); - pipeline_alloc_from_desc(num_pipelines, descs, pipelines); + pipeline_alloc(num_pipelines, descs, pipelines); for (u32 i = 0; i < num_pipelines; ++i) { struct pipeline *pipeline = pipelines[i]; if (!pipeline->success) { @@ -763,14 +762,14 @@ INTERNAL void dx12_include_handler_release(struct dx12_include_handler *handler) sys_mutex_release(handler->pipeline_mutex); } -enum shader_compile_task_kind { +enum shader_compile_job_kind { SHADER_COMPILE_TASK_KIND_VS, SHADER_COMPILE_TASK_KIND_PS }; -struct shader_compile_task_arg { +struct shader_compile_job_param { /* In */ - enum shader_compile_task_kind kind; + enum shader_compile_job_kind kind; struct pipeline *pipeline; struct shader_desc shader_desc; struct resource *shader_res; @@ -782,15 +781,20 @@ struct shader_compile_task_arg { i64 elapsed; }; -/* TODO: Compile shaders offline w/ dxc for performance & language features like static_assert */ -INTERNAL WORK_TASK_FUNC_DEF(shader_compile_task, comp_arg_raw) +struct shader_compile_job_sig { + struct shader_compile_job_param **params; +}; + +/* TODO: Compile shaders offline w/ dxc for performance & language features */ +INTERNAL JOB_DEF(shader_compile_job, job) { __prof; - struct shader_compile_task_arg *comp_arg = (struct shader_compile_task_arg *)comp_arg_raw; - enum shader_compile_task_kind kind = comp_arg->kind; - struct pipeline *pipeline = comp_arg->pipeline; - struct shader_desc shader_desc = comp_arg->shader_desc; - struct resource *shader_res = comp_arg->shader_res; + struct shader_compile_job_sig *sig = job.sig; + struct shader_compile_job_param *param = sig->params[job.id]; + enum shader_compile_job_kind kind = param->kind; + struct pipeline *pipeline = param->pipeline; + struct shader_desc shader_desc = param->shader_desc; + struct resource *shader_res = param->shader_res; struct arena_temp scratch = scratch_begin_no_conflict(); { @@ -846,10 +850,10 @@ INTERNAL WORK_TASK_FUNC_DEF(shader_compile_task, comp_arg_raw) } #endif - comp_arg->success = success; - comp_arg->blob = blob; - comp_arg->error_blob = error_blob; - comp_arg->elapsed = sys_time_ns() - start_ns; + param->success = success; + param->blob = blob; + param->error_blob = error_blob; + param->elapsed = sys_time_ns() - start_ns; } scratch_end(scratch); } @@ -858,11 +862,29 @@ INTERNAL WORK_TASK_FUNC_DEF(shader_compile_task, comp_arg_raw) * Pipeline * ========================== */ -INTERNAL WORK_TASK_FUNC_DEF(pipeline_load_task, load_arg_raw) +struct pipeline_init_job_sig { + struct pipeline_desc *descs_in; + struct pipeline **pipelines_out; +}; + +INTERNAL JOB_DEF(pipeline_init_job, job) { __prof; - struct pipeline *pipeline = (struct pipeline *)load_arg_raw; - struct pipeline_desc *desc = &pipeline->desc; + struct pipeline_init_job_sig *sig = job.sig; + struct pipeline_desc *desc = &sig->descs_in[job.id]; + struct pipeline **pipelines_out = sig->pipelines_out; + + struct pipeline *pipeline = NULL; + { + struct arena *pipeline_arena = arena_alloc(MEGABYTE(64)); + pipeline = arena_push(pipeline_arena, struct pipeline); + pipeline->arena = pipeline_arena; + pipelines_out[job.id] = pipeline; + } + pipeline->desc = *desc; + pipeline->name = string_copy(pipeline->arena, desc->name); + pipeline->hash = hash_fnv64(HASH_FNV64_BASIS, pipeline->name); + pipeline->dependencies = dict_init(pipeline->arena, 64); struct arena_temp scratch = scratch_begin_no_conflict(); { @@ -893,13 +915,13 @@ INTERNAL WORK_TASK_FUNC_DEF(pipeline_load_task, load_arg_raw) } } - struct shader_compile_task_arg vs = ZI; + struct shader_compile_job_param vs = ZI; vs.kind = SHADER_COMPILE_TASK_KIND_VS; vs.pipeline = pipeline; vs.shader_desc = desc->vs; vs.shader_res = &vs_res; - struct shader_compile_task_arg ps = ZI; + struct shader_compile_job_param ps = ZI; ps.kind = SHADER_COMPILE_TASK_KIND_PS; ps.pipeline = pipeline; ps.shader_desc = desc->ps; @@ -907,11 +929,9 @@ INTERNAL WORK_TASK_FUNC_DEF(pipeline_load_task, load_arg_raw) /* Compile shaders */ if (success) { - struct work_slate ws = work_slate_begin(); - work_slate_push_task(&ws, shader_compile_task, &vs); - work_slate_push_task(&ws, shader_compile_task, &ps); - struct work_handle work = work_slate_end_and_help(&ws, WORK_PRIORITY_HIGH); - work_wait(work); + struct shader_compile_job_param *params[] = { &vs, &ps }; + struct shader_compile_job_sig comp_sig = { .params = params }; + job_dispatch_wait(ARRAY_COUNT(params), shader_compile_job, &comp_sig); success = vs.success && ps.success; } @@ -1099,27 +1119,11 @@ INTERNAL WORK_TASK_FUNC_DEF(pipeline_load_task, load_arg_raw) scratch_end(scratch); } -INTERNAL void pipeline_alloc_from_desc(u64 num_pipelines, struct pipeline_desc *descs, struct pipeline **pipelines_out) +INTERNAL void pipeline_alloc(u64 num_pipelines, struct pipeline_desc *descs_in, struct pipeline **pipelines_out) { __prof; - struct work_slate ws = work_slate_begin(); - for (u64 i = 0; i < num_pipelines; ++i) { - struct pipeline_desc *desc = &descs[i]; - struct pipeline *pipeline = NULL; - { - struct arena *pipeline_arena = arena_alloc(MEGABYTE(64)); - pipeline = arena_push(pipeline_arena, struct pipeline); - pipeline->arena = pipeline_arena; - pipelines_out[i] = pipeline; - } - pipeline->desc = *desc; - pipeline->name = string_copy(pipeline->arena, desc->name); - pipeline->hash = hash_fnv64(HASH_FNV64_BASIS, pipeline->name); - pipeline->dependencies = dict_init(pipeline->arena, 64); - work_slate_push_task(&ws, pipeline_load_task, pipeline); - } - struct work_handle work = work_slate_end_and_help(&ws, WORK_PRIORITY_HIGH); - work_wait(work); + struct pipeline_init_job_sig sig = { .descs_in = descs_in, .pipelines_out = pipelines_out }; + job_dispatch_wait(num_pipelines, pipeline_init_job, &sig); } INTERNAL void pipeline_release_now(struct pipeline *pipeline) @@ -1262,7 +1266,7 @@ INTERNAL RESOURCE_WATCH_CALLBACK_FUNC_DEF(pipeline_resource_watch_callback, name /* Recompile dirty pipelines */ if (num_pipelines > 0) { struct pipeline **pipelines = arena_push_array(scratch.arena, struct pipeline *, num_pipelines); - pipeline_alloc_from_desc(num_pipelines, pipeline_descs, pipelines); + pipeline_alloc(num_pipelines, pipeline_descs, pipelines); { struct sys_lock lock = sys_mutex_lock_s(G.pipelines_mutex); for (u32 i = 0; i < num_pipelines; ++i) { @@ -1539,7 +1543,7 @@ INTERNAL void fenced_release(void *data, enum fenced_release_kind kind) u64 fr_targets[ARRAY_COUNT(G.fenced_release_targets)] = ZI; - /* Read fence values */ + /* Read current fence target values from command queues */ for (u32 i = 0; i < ARRAY_COUNT(G.command_queues); ++i) { struct command_queue *cq = G.command_queues[i]; struct sys_lock lock = sys_mutex_lock_s(cq->submit_fence_mutex); @@ -2803,7 +2807,7 @@ void gp_present(struct sys_window *window, struct v2i32 backresolution, struct g __profframe(0); } -#if PROFILING +#if PROFILING_D3D { __profscope(Mark queue frames); /* Lock because frame marks shouldn't occur while command lists are recording */ @@ -2828,7 +2832,7 @@ void gp_present(struct sys_window *window, struct v2i32 backresolution, struct g * Evictor thread * ========================== */ -INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(evictor_thread_entry_point, arg) +INTERNAL SYS_THREAD_DEF(evictor_thread_entry_point, arg) { __prof; (UNUSED)arg; diff --git a/src/host.c b/src/host.c index 9672bdd2..2aac9403 100644 --- a/src/host.c +++ b/src/host.c @@ -157,7 +157,7 @@ GLOBAL struct { i32 _; } G = ZI, DEBUG_ALIAS(G, G_host); -INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(host_receiver_thread_entry_point, arg); +INTERNAL SYS_THREAD_DEF(host_receiver_thread_entry_point, arg); INTERNAL void host_msg_assembler_release(struct host_msg_assembler *ma); /* ========================== * @@ -1061,7 +1061,7 @@ void host_update_end(struct host *host) * Receive thread * ========================== */ -INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(host_receiver_thread_entry_point, arg) +INTERNAL SYS_THREAD_DEF(host_receiver_thread_entry_point, arg) { u64 read_buff_size = KILOBYTE(64); struct arena *read_buff_arena = arena_alloc(read_buff_size); diff --git a/src/job.c b/src/job.c new file mode 100644 index 00000000..35336e2a --- /dev/null +++ b/src/job.c @@ -0,0 +1,296 @@ +#include "job.h" +#include "sys.h" +#include "arena.h" +#include "atomic.h" +#include "string.h" +#include "scratch.h" +#include "app.h" + +#if 0 +/* FIXME: Remove this (replace with sys_ wrappers) */ +#include +#endif + +struct worker_job { + struct sys_mutex *mutex; + i32 num_workers; + i32 num_dispatched; + + i32 count; + job_func *func; + void *sig; + + u64 gen; + struct sys_condition_variable *gen_cv; + + + struct worker_job *prev; + struct worker_job *next; + + struct worker_job *next_free; +}; + +/* ========================== * + * Global state + * ========================== */ + +struct worker_info { + i32 id; +}; + +GLOBAL struct { + struct sys_mutex *free_jobs_mutex; + struct arena *free_jobs_arena; + struct worker_job *first_free_job; + + struct sys_mutex *queued_jobs_mutex; + struct worker_job *first_queued_job; + struct worker_job *last_queued_job; + u64 num_queued_jobs; + + + u32 num_worker_threads; + b32 workers_shutdown; + struct sys_mutex *workers_wake_mutex; + struct sys_condition_variable *workers_wake_cv; + struct sys_thread *worker_threads[JOB_MAX_WORKERS]; +} G = ZI, DEBUG_ALIAS(G, G_job); + +/* ========================== * + * Startup + * ========================== */ + +INTERNAL SYS_THREAD_DEF(worker_thread_entry_point, thread_arg); +INTERNAL APP_EXIT_CALLBACK_FUNC_DEF(job_shutdown); + +void job_startup(i32 num_workers) +{ + __prof; + struct arena_temp scratch = scratch_begin_no_conflict(); + + G.free_jobs_mutex = sys_mutex_alloc(); + G.free_jobs_arena = arena_alloc(GIGABYTE(64)); + + G.queued_jobs_mutex = sys_mutex_alloc(); + + G.workers_wake_mutex = sys_mutex_alloc(); + G.workers_wake_cv = sys_condition_variable_alloc(); + + if (num_workers < JOB_MIN_WORKERS || num_workers > JOB_MAX_WORKERS) { + /* Invalid worker count */ + ASSERT(false); + } + G.num_worker_threads = num_workers; + for (u64 i = 0; i < G.num_worker_threads; ++i) { + u32 prefix = num_workers - i; /* For profiler sorting order */ + struct string name = string_format(scratch.arena, LIT("[P6%F] Worker #%F"), FMT_UINT(prefix), FMT_UINT(i)); + G.worker_threads[i] = sys_thread_alloc(worker_thread_entry_point, &i, name); + } + + app_register_exit_callback(job_shutdown); + + scratch_end(scratch); +} + +INTERNAL APP_EXIT_CALLBACK_FUNC_DEF(job_shutdown) +{ + __prof; + { + struct sys_lock lock = sys_mutex_lock_e(G.workers_wake_mutex); + G.workers_shutdown = true; + sys_condition_variable_signal(G.workers_wake_cv, U32_MAX); + sys_mutex_unlock(&lock); + } + for (u32 i = 0; i < G.num_worker_threads; ++i) { + struct sys_thread *thread = G.worker_threads[i]; + sys_thread_wait_release(thread); + } +} + +/* ========================== * + * Job + * ========================== */ + +struct job_handle job_dispatch_async(u32 count, job_func *job_func, void *sig) +{ + __prof; + + /* Allocate job */ + u64 gen = 0; + struct worker_job *job = NULL; + { + struct sys_mutex *old_mutex = NULL; + struct sys_condition_variable *old_cv = NULL; + { + struct sys_lock lock = sys_mutex_lock_e(G.free_jobs_mutex); + if (G.first_free_job) { + job = G.first_free_job; + G.first_free_job = job->next_free; + old_mutex = job->mutex; + old_cv = job->gen_cv; + gen = job->gen + 1; + } else { + job = arena_push_no_zero(G.free_jobs_arena, struct worker_job); + gen = 1; + } + sys_mutex_unlock(&lock); + } + MEMZERO_STRUCT(job); + if (old_mutex) { + job->mutex = old_mutex; + job->gen_cv = old_cv; + } else { + job->mutex = sys_mutex_alloc(); + job->gen_cv = sys_condition_variable_alloc(); + } + } + job->count = count; + job->func = job_func; + job->sig = sig; + job->gen = gen; + + /* Queue job */ + { + struct sys_lock lock = sys_mutex_lock_e(G.queued_jobs_mutex); + if (G.last_queued_job) { + G.last_queued_job->next = job; + } else { + G.first_queued_job = job; + } + G.last_queued_job = job; + sys_mutex_unlock(&lock); + } + + /* Signal workers */ + { + struct sys_lock lock = sys_mutex_lock_e(G.workers_wake_mutex); + sys_condition_variable_signal(G.workers_wake_cv, count); + sys_mutex_unlock(&lock); + } + + struct job_handle handle = ZI; + handle.job = job; + handle.gen = gen; + return handle; +} + +void job_dispatch_wait(u32 count, job_func *job_func, void *sig) +{ + __prof; + struct job_handle handle = job_dispatch_async(count, job_func, sig); + job_wait(handle); +} + +void job_wait(struct job_handle handle) +{ + __prof; + if (handle.job) { + struct worker_job *job = handle.job; + while (job->gen == handle.gen) { + struct sys_lock lock = sys_mutex_lock_s(job->mutex); + sys_condition_variable_wait(job->gen_cv, &lock); + sys_mutex_unlock(&lock); + } + } +} + +/* ========================== * + * Worker + * ========================== */ + +INTERNAL SYS_THREAD_DEF(worker_thread_entry_point, thread_arg) +{ + i32 worker_id = *(i32 *)thread_arg; + (UNUSED)worker_id; + + struct sys_lock workers_wake_lock = sys_mutex_lock_s(G.workers_wake_mutex); + while (!G.workers_shutdown) { + sys_mutex_unlock(&workers_wake_lock); + + /* Try to pick job from queue */ + i32 job_id = 0; + i32 job_count = 0; + struct worker_job *job = NULL; + { + struct sys_lock queue_lock = sys_mutex_lock_s(G.queued_jobs_mutex); + for (struct worker_job *tmp = G.first_queued_job; tmp && !job; tmp = tmp->next) { + struct sys_lock job_lock = sys_mutex_lock_e(tmp->mutex); + { + i32 tmp_id = tmp->num_dispatched++; + i32 tmp_count = tmp->count; + if (tmp_id < tmp_count) { + /* Pick job */ + ++tmp->num_workers; + job = tmp; + job_id = tmp_id; + job_count = tmp_count; + } + } + sys_mutex_unlock(&job_lock); + } + sys_mutex_unlock(&queue_lock); + } + + /* Remove job from queue */ + if (job_id == (job_count - 1)) { + struct sys_lock queue_lock = sys_mutex_lock_e(G.queued_jobs_mutex); + { + struct worker_job *prev = job->prev; + struct worker_job *next = job->next; + if (prev) { + prev->next = next; + } else { + G.first_queued_job = next; + } + if (next) { + next->prev = prev; + } else { + G.last_queued_job = prev; + } + --G.num_queued_jobs; + } + sys_mutex_unlock(&queue_lock); + } + + /* Execute job */ + if (job) { + struct job_data data = ZI; + data.sig = job->sig; + job_func *func = job->func; + b32 should_release = false; + while (job_id < job_count) { + { + data.id = job_id; + func(data); + } + { + struct sys_lock job_lock = sys_mutex_lock_e(job->mutex); + job_id = job->num_dispatched++; + if (job_id >= job_count) { + i32 num_workers = --job->num_workers; + if (num_workers == 0) { + ++job->gen; + should_release = true; + sys_condition_variable_signal(job->gen_cv, U32_MAX); + } + } + sys_mutex_unlock(&job_lock); + } + } + if (should_release) { + struct sys_lock fj_lock = sys_mutex_lock_e(G.free_jobs_mutex); + { + job->next_free = G.first_free_job; + G.first_free_job = job; + } + sys_mutex_unlock(&fj_lock); + } + } + + workers_wake_lock = sys_mutex_lock_s(G.workers_wake_mutex); + if (!G.workers_shutdown && !G.first_queued_job) { + __profscope(Worker sleep); + sys_condition_variable_wait(G.workers_wake_cv, &workers_wake_lock); + } + } +} diff --git a/src/job.h b/src/job.h new file mode 100644 index 00000000..4a8d0a7e --- /dev/null +++ b/src/job.h @@ -0,0 +1,34 @@ +#ifndef JOB_H +#define JOB_H + +#define JOB_MIN_WORKERS 2 +#define JOB_MAX_WORKERS 64 + +/* ========================== * + * Startup + * ========================== */ + +void job_startup(i32 num_workers); + +/* ========================== * + * Job + * ========================== */ + +struct job_data { + i32 id; + void *sig; +}; + +struct job_handle { + void *job; + u64 gen; +}; + +#define JOB_DEF(job_name, arg_name) void job_name(struct job_data arg_name) +typedef JOB_DEF(job_func, job_data); + +struct job_handle job_dispatch_async(u32 count, job_func *job_func, void *sig); +void job_dispatch_wait(u32 count, job_func *job_func, void *sig); +void job_wait(struct job_handle handle); + +#endif diff --git a/src/playback_wasapi.c b/src/playback_wasapi.c index 48e49d0f..44d07010 100644 --- a/src/playback_wasapi.c +++ b/src/playback_wasapi.c @@ -44,7 +44,6 @@ GLOBAL struct { IAudioRenderClient *playback; WAVEFORMATEX *buffer_format; u32 buffer_frames; - HANDLE mmtc_handle; } G = ZI, DEBUG_ALIAS(G, G_playback_wasapi); /* ========================== * @@ -53,7 +52,7 @@ GLOBAL struct { INTERNAL void wasapi_initialize(void); INTERNAL APP_EXIT_CALLBACK_FUNC_DEF(playback_shutdown); -INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(playback_thread_entry_point, arg); +INTERNAL SYS_THREAD_DEF(playback_thread_entry_point, arg); struct playback_startup_receipt playback_startup(struct mixer_startup_receipt *mixer_sr) { @@ -79,10 +78,6 @@ INTERNAL APP_EXIT_CALLBACK_FUNC_DEF(playback_shutdown) INTERNAL void wasapi_initialize(void) { - /* https://learn.microsoft.com/en-us/windows/win32/procthread/multimedia-class-scheduler-service#registry-settings */ - DWORD task = 0; - G.mmtc_handle = AvSetMmThreadCharacteristicsW(L"Pro Audio", &task); - u64 sample_rate = PLAYBACK_SAMPLE_RATE; u64 channel_count = 2; u32 channel_mask = SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT; @@ -233,11 +228,15 @@ INTERNAL void wasapi_update_end(struct wasapi_buffer *wspbuf, struct mixed_pcm_f * Playback thread entry * ========================== */ -INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(playback_thread_entry_point, arg) +INTERNAL SYS_THREAD_DEF(playback_thread_entry_point, arg) { struct arena_temp scratch = scratch_begin_no_conflict(); (UNUSED)arg; + /* https://learn.microsoft.com/en-us/windows/win32/procthread/multimedia-class-scheduler-service#registry-settings */ + DWORD task = 0; + HANDLE mmc_handle = AvSetMmThreadCharacteristicsW(L"Pro Audio", &task); + ASSERT(mmc_handle); /* FIXME: If playback fails at any point and mixer stops advancing, we * need to halt mixer to prevent memory leak when sounds are played. */ diff --git a/src/prof_tracy.h b/src/prof_tracy.h new file mode 100644 index 00000000..45e7d31f --- /dev/null +++ b/src/prof_tracy.h @@ -0,0 +1,139 @@ +#ifndef PROF_H +#define PROF_H + +#if COMPILER_MSVC +# error "MSVC not supported for profiling (cleanup attributes are required for profiling markup)" +#endif + +#if PROFILING + +/* Include tracy client */ +#define TRACY_ENABLE +#define TRACY_MANUAL_LIFETIME +#define TRACY_DELAYED_INIT +#if 1 +/* Disable system tracing (very slow) */ +# define TRACY_NO_CALLSTACK +# define TRACY_NO_SYSTEM_TRACING +#endif +#include STRINGIZE(TRACY_INCLUDE_PATH) + +#define PROFILING_CAPTURE_FRAME_IMAGE 0 +#define PROFILING_LOCKS 0 +#define PROFILING_D3D 1 +#define PROFILING_CMD_WSTR L"tracy-profiler.exe -a 127.0.0.1" + +/* Clang/GCC cleanup macros */ +#define __prof static const struct ___tracy_source_location_data CAT(__tracy_source_location,__LINE__) = { NULL, __func__, __FILE__, (uint32_t)__LINE__, 0 }; __attribute((cleanup(__prof_zone_cleanup_func))) TracyCZoneCtx __tracy_zone_ctx = ___tracy_emit_zone_begin( &CAT(__tracy_source_location,__LINE__), true ); +#define __profscope(name) static const struct ___tracy_source_location_data CAT(__tracy_source_location,__LINE__) = { #name, __func__, __FILE__, (uint32_t)__LINE__, 0 }; __attribute((cleanup(__prof_zone_cleanup_func))) TracyCZoneCtx __tracy_zone_ctx = ___tracy_emit_zone_begin( &CAT(__tracy_source_location,__LINE__), true ); +INLINE void __prof_zone_cleanup_func(TracyCZoneCtx *ctx) { TracyCZoneEnd(*ctx); } + +#define __profalloc(ptr, size) TracyCAlloc((ptr), (size)) +#define __proffree(ptr) TracyCFree((ptr)) +#define __profmsg(txt, len, col) TracyCMessageC((txt), (len), BGR32(col)); +#define __profframe(name) TracyCFrameMarkNamed((name)) +#define __profthread(name) TracyCSetThreadName((name)) + +enum __prof_plot_type { + __prof_plot_type_number = TracyPlotFormatNumber, + __prof_plot_type_memory = TracyPlotFormatMemory, + __prof_plot_type_percentage = TracyPlotFormatPercentage, + __prof_plot_type_watt = TracyPlotFormatWatt +}; +#define __prof_plot_init(name, type, step, fill, color) TracyCPlotConfig(name, type, step, fill, BGR32(color)) +#define __prof_plot(name, val) TracyCPlot(name, val) +#define __prof_plot_i(name, val) TracyCPlotI(name, val) +#define __prof_is_connected() ___tracy_connected() + +#else + +#define PROFILING_CAPTURE_FRAME_IMAGE 0 +#define PROFILING_LOCKS 0 +#define PROFILING_D3D 0 + +#define __prof +#define __profscope(name) +#define __profalloc(ptr, size) +#define __proffree(ptr) +#define __profmsg(txt, len, col) +#define __profframe(name) +#define __profthread(name) +#define __prof_plot_init(name, type, step, fill, color) +#define __prof_plot(name, val) +#define __prof_plot_i(name, val) +#define __prof_is_connected() 0 + +#endif /* PROFILING */ + +#if PROFILING_LOCKS +# define __proflock_ctx(name) struct TracyCSharedLockCtx *name +# define __proflock_alloc(ctx) TracyCSharedLockAnnounce((ctx)) +# define __proflock_release(ctx) TracyCSharedLockTerminate((ctx)) +# define __proflock_before_exclusive_lock(ctx) TracyCSharedLockBeforeExclusiveLock((ctx)) +# define __proflock_after_exclusive_lock(ctx) TracyCSharedLockAfterExclusiveLock((ctx)) +# define __proflock_after_exclusive_unlock(ctx) TracyCSharedLockAfterExclusiveUnlock((ctx)) +# define __proflock_after_try_exclusive_lock(ctx, acquired) TracyCSharedLockAfterTryExclusiveLock((ctx), (acquired)) +# define __proflock_before_shared_lock(ctx) TracyCSharedLockBeforeSharedLock((ctx)) +# define __proflock_after_shared_lock(ctx) TracyCSharedLockAfterSharedLock((ctx)) +# define __proflock_after_shared_unlock(ctx) TracyCSharedLockAfterSharedUnlock((ctx)) +# define __proflock_after_try_shared_lock(ctx, acquired) TracyCSharedLockAfterTrySharedLock((ctx), (acquired)) +# define __proflock_mark(ctx) TracyCSharedLockMark((ctx)) +# define __proflock_custom_name(ctx, name, len) TracyCSharedLockCustomName((ctx), (name), (len)) +#else +# define __proflock_alloc(ctx) +# define __proflock_release(ctx) +# define __proflock_before_exclusive_lock(ctx) +# define __proflock_after_exclusive_lock(ctx) +# define __proflock_after_exclusive_unlock(ctx) +# define __proflock_after_try_exclusive_lock(ctx, acquired) +# define __proflock_before_shared_lock(ctx) +# define __proflock_after_shared_lock(ctx) +# define __proflock_after_shared_unlock(ctx) +# define __proflock_after_try_shared_lock(ctx, acquired) +# define __proflock_mark(ctx) +# define __proflock_custom_name(ctx, name, len) +#endif /* PROFILING && PROFILING_LOCKS */ + +#if PROFILING_D3D +/* Dx11 */ +INLINE void __prof_dx11_zone_cleanup_func(TracyCD3D11ZoneCtx *ctx) { ___tracy_d3d11_emit_zone_end(*ctx); } +# define __profscope_dx11(dx11_ctx, name, color) static const struct ___tracy_source_location_data CAT(__tracy_gpu_d3d11_source_location,__LINE__) = { #name, __func__, __FILE__, (uint32_t)__LINE__, BGR32(color) }; __attribute((cleanup(__prof_dx11_zone_cleanup_func))) TracyCD3D11ZoneCtx __tracy_d3d11_zone_ctx; ___tracy_d3d11_emit_zone_begin( dx11_ctx, &__tracy_d3d11_zone_ctx, &CAT(__tracy_gpu_d3d11_source_location,__LINE__), true); +# define __prof_dx11_ctx(name) struct TracyCD3D11Ctx *name +# define __prof_dx11_ctx_alloc(ctx, device, device_ctx, name, name_len) ctx = ___tracy_d3d11_context_announce(device, device_ctx, name, name_len) +# define __prof_dx11_ctx_release(ctx) ___tracy_d3d11_context_terminate(ctx) +# define __prof_dx11_collect(ctx) ___tracy_d3d11_context_collect(ctx) +/* Dx12 */ +INLINE void __prof_dx12_zone_cleanup_func(TracyCD3D12ZoneCtx *ctx) { ___tracy_d3d12_emit_zone_end(*ctx); } +# define __profscope_dx12(dx12_ctx, cmd_list, name, color) static const struct ___tracy_source_location_data CAT(__tracy_gpu_d3d12_source_location,__LINE__) = { #name, __func__, __FILE__, (uint32_t)__LINE__, BGR32(color) }; __attribute((cleanup(__prof_dx12_zone_cleanup_func))) TracyCD3D12ZoneCtx __tracy_d3d12_zone_ctx; ___tracy_d3d12_emit_zone_begin( dx12_ctx, cmd_list, &__tracy_d3d12_zone_ctx, &CAT(__tracy_gpu_d3d12_source_location,__LINE__), true); +# define __prof_dx12_ctx(name) struct TracyCD3D12Ctx *name +# define __prof_dx12_ctx_alloc(ctx, device, queue, name, name_len) ctx = ___tracy_d3d12_context_announce(device, queue, name, name_len) +# define __prof_dx12_ctx_release(ctx) ___tracy_d3d12_context_terminate(ctx) +# define __prof_dx12_new_frame(ctx) ___tracy_d3d12_context_new_frame(ctx) +# define __prof_dx12_collect(ctx) ___tracy_d3d12_context_collect(ctx) +#else +# define __profscope_dx11(dx11_ctx, name, color) +# define __prof_dx11_ctx_alloc(ctx, device, device_ctx, name, name_len) +# define __prof_dx11_ctx_release(ctx) +# define __prof_dx11_collect(ctx) +# define __profscope_dx12(dx11_ctx, queue, name, color) +# define __prof_dx12_ctx_alloc(ctx, device, queue, name, name_len) +# define __prof_dx12_ctx_release(ctx) +# define __prof_dx12_new_frame(ctx) +# define __prof_dx12_collect(ctx) +#endif /* PROFILING_D3D */ + +#if PROFILING_CAPTURE_FRAME_IMAGE +# define __profframeimage(image, width, height, offset, flipped) TracyCFrameImage((image), (width), (height), (offset), (flipped)); +#else +# define __profframeimage(image, width, height, offset, flipped) +#endif /* PROFILING_CAPTURE_FRAME_IMAGE */ + +#ifdef TRACY_MANUAL_LIFETIME +# define __prof_startup ___tracy_startup_profiler() +# define __prof_shutdown ___tracy_shutdown_profiler() +#else +# define __prof_startup +# define __prof_shutdown +#endif /* TRACY_MANUAL_LIFETIME */ + +#endif diff --git a/src/resource.c b/src/resource.c index 2e1635df..2b2970ea 100644 --- a/src/resource.c +++ b/src/resource.c @@ -45,8 +45,8 @@ GLOBAL struct { * ========================== */ #if RESOURCE_RELOADING -INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(resource_watch_monitor_thread_entry_point, _); -INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(resource_watch_dispatcher_thread_entry_point, _); +INTERNAL SYS_THREAD_DEF(resource_watch_monitor_thread_entry_point, _); +INTERNAL SYS_THREAD_DEF(resource_watch_dispatcher_thread_entry_point, _); INTERNAL APP_EXIT_CALLBACK_FUNC_DEF(resource_shutdown); #endif @@ -182,7 +182,7 @@ void resource_register_watch_callback(resource_watch_callback *callback) sys_mutex_unlock(&lock); } -INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(resource_watch_monitor_thread_entry_point, _) +INTERNAL SYS_THREAD_DEF(resource_watch_monitor_thread_entry_point, _) { (UNUSED)_; struct arena_temp scratch = scratch_begin_no_conflict(); @@ -218,7 +218,7 @@ INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(resource_watch_monitor_thread_entry_poi #define WATCH_DISPATCHER_DELAY_SECONDS 0.050 #define WATCH_DISPATCHER_DEDUP_DICT_BINS 128 -INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(resource_watch_dispatcher_thread_entry_point, _) +INTERNAL SYS_THREAD_DEF(resource_watch_dispatcher_thread_entry_point, _) { (UNUSED)_; struct arena_temp scratch = scratch_begin_no_conflict(); diff --git a/src/sound.c b/src/sound.c index ee8ec0f3..e98df42e 100644 --- a/src/sound.c +++ b/src/sound.c @@ -6,7 +6,7 @@ #include "resource.h" #include "asset_cache.h" #include "mp3.h" -#include "work.h" +#include "job.h" struct sound_task_params { struct sound_task_params *next_free; @@ -35,11 +35,9 @@ GLOBAL struct { * Startup * ========================== */ -struct sound_startup_receipt sound_startup(struct work_startup_receipt *work_sr, - struct asset_cache_startup_receipt *asset_cache_sr, +struct sound_startup_receipt sound_startup(struct asset_cache_startup_receipt *asset_cache_sr, struct resource_startup_receipt *resource_sr) { - (UNUSED)work_sr; (UNUSED)asset_cache_sr; (UNUSED)resource_sr; @@ -81,10 +79,10 @@ INTERNAL void sound_task_params_release(struct sound_task_params *p) * Load * ========================== */ -INTERNAL WORK_TASK_FUNC_DEF(sound_load_asset_task, vparams) +INTERNAL JOB_DEF(sound_load_asset_job, job) { __prof; - struct sound_task_params *params = (struct sound_task_params *)vparams; + struct sound_task_params *params = job.sig; struct arena_temp scratch = scratch_begin_no_conflict(); struct string path = STRING(params->path_len, (u8 *)params->path_cstr); struct asset *asset = params->asset; @@ -156,7 +154,7 @@ INTERNAL WORK_TASK_FUNC_DEF(sound_load_asset_task, vparams) scratch_end(scratch); } -struct asset *sound_load_asset(struct string path, u32 flags, b32 help) +struct asset *sound_load_asset(struct string path, u32 flags, b32 wait) { __prof; struct arena_temp scratch = scratch_begin_no_conflict(); @@ -185,13 +183,13 @@ struct asset *sound_load_asset(struct string path, u32 flags, b32 help) /* Push task */ asset_cache_mark_loading(asset); - struct work_handle wh = ZI; - if (help) { - wh = work_push_task_and_help(&sound_load_asset_task, params, WORK_PRIORITY_NORMAL); + if (wait) { + job_dispatch_wait(1, sound_load_asset_job, params); + asset_cache_set_job(asset, NULL); } else { - wh = work_push_task(&sound_load_asset_task, params, WORK_PRIORITY_NORMAL); + struct job_handle job = job_dispatch_async(1, sound_load_asset_job, params); + asset_cache_set_job(asset, &job); } - asset_cache_set_work(asset, &wh); } scratch_end(scratch); diff --git a/src/sound.h b/src/sound.h index aecf766e..41872b9a 100644 --- a/src/sound.h +++ b/src/sound.h @@ -5,7 +5,6 @@ #define SOUND_FLAG_STEREO 0x1 struct asset; -struct work_startup_receipt; struct asset_cache_startup_receipt; struct resource_startup_receipt; @@ -15,8 +14,7 @@ struct sound { }; struct sound_startup_receipt { i32 _; }; -struct sound_startup_receipt sound_startup(struct work_startup_receipt *work_sr, - struct asset_cache_startup_receipt *asset_cache_sr, +struct sound_startup_receipt sound_startup(struct asset_cache_startup_receipt *asset_cache_sr, struct resource_startup_receipt *resource_sr); struct asset *sound_load_asset(struct string path, u32 flags, b32 wait); diff --git a/src/sprite.c b/src/sprite.c index 732f2b46..a704f937 100644 --- a/src/sprite.c +++ b/src/sprite.c @@ -6,7 +6,7 @@ #include "resource.h" #include "ase.h" #include "util.h" -#include "work.h" +#include "job.h" #include "atomic.h" #include "app.h" #include "gp.h" @@ -203,8 +203,8 @@ INTERNAL struct image_rgba generate_purple_black_image(struct arena *arena, u32 * ========================== */ INTERNAL APP_EXIT_CALLBACK_FUNC_DEF(sprite_shutdown); -INTERNAL WORK_TASK_FUNC_DEF(sprite_load_task, arg); -INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(sprite_evictor_thread_entry_point, arg); +INTERNAL JOB_DEF(sprite_load_job, arg); +INTERNAL SYS_THREAD_DEF(sprite_evictor_thread_entry_point, arg); #if RESOURCE_RELOADING INTERNAL RESOURCE_WATCH_CALLBACK_FUNC_DEF(sprite_resource_watch_callback, info); @@ -312,7 +312,7 @@ INTERNAL struct cache_entry_hash cache_entry_hash_from_tag_hash(u64 tag_hash, en * ========================== */ INTERNAL struct sprite_scope_cache_ref *scope_ensure_ref_from_ref(struct sprite_scope *scope, struct cache_ref ref); -INTERNAL void push_load_task(struct cache_ref ref, struct sprite_tag tag) +INTERNAL void push_load_job(struct cache_ref ref, struct sprite_tag tag) { struct load_cmd *cmd = NULL; { @@ -338,7 +338,7 @@ INTERNAL void push_load_task(struct cache_ref ref, struct sprite_tag tag) } /* Push work */ - work_push_task(&sprite_load_task, cmd, WORK_PRIORITY_NORMAL); + job_dispatch_async(1, sprite_load_job, cmd); } INTERNAL void cache_entry_load_texture(struct cache_ref ref, struct sprite_tag tag) @@ -1013,7 +1013,7 @@ INTERNAL void *data_from_tag_internal(struct sprite_scope *scope, struct sprite_ } } else { /* Allocate cmd */ - push_load_task(ref, tag); + push_load_job(ref, tag); } } } @@ -1141,13 +1141,13 @@ struct sprite_sheet_slice_array sprite_sheet_get_slices(struct sprite_sheet *she } /* ========================== * - * Load task + * Load job * ========================== */ -INTERNAL WORK_TASK_FUNC_DEF(sprite_load_task, arg) +INTERNAL JOB_DEF(sprite_load_job, job) { __prof; - struct load_cmd *cmd = (struct load_cmd *)arg; + struct load_cmd *cmd = job.sig; struct cache_ref ref = cmd->ref; switch (ref.e->kind) { @@ -1190,7 +1190,7 @@ INTERNAL void reload_if_exists(struct sprite_scope *scope, struct sprite_tag tag if (existing_ref) { logf_info("Sprite resource file \"%F\" has changed for sprite [%F].", FMT_STR(tag.path), FMT_HEX(hash.v)); struct sprite_scope_cache_ref *scope_ref = cache_entry_from_tag(scope, tag, kind, true); - push_load_task(scope_ref->ref, tag); + push_load_job(scope_ref->ref, tag); } } @@ -1240,7 +1240,7 @@ INTERNAL SORT_COMPARE_FUNC_DEF(evict_sort, arg_a, arg_b, udata) * - The cache is over its memory budget and the node's last reference is longer ago than the grace period * - Resource reloading is enabled and the node is out of date due to a change to its original resource file */ -INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(sprite_evictor_thread_entry_point, arg) +INTERNAL SYS_THREAD_DEF(sprite_evictor_thread_entry_point, arg) { (UNUSED)arg; diff --git a/src/sys.h b/src/sys.h index 530e44a2..11d0ae40 100644 --- a/src/sys.h +++ b/src/sys.h @@ -403,12 +403,12 @@ struct thread_local_store *sys_thread_get_thread_local_store(void); #define SYS_THREAD_STACK_SIZE MEGABYTE(4) -#define SYS_THREAD_ENTRY_POINT_FUNC_DEF(name, arg_name) void name(void *arg_name) -typedef SYS_THREAD_ENTRY_POINT_FUNC_DEF(sys_thread_entry_point_func, data); +#define SYS_THREAD_DEF(name, arg_name) void name(void *arg_name) +typedef SYS_THREAD_DEF(sys_thread_func, data); /* Creates a new thread running in the supplied `entry_point` */ struct sys_thread *sys_thread_alloc( - sys_thread_entry_point_func *entry_point, + sys_thread_func *entry_point, void *thread_data, /* Passed as arg to `entry_point` */ struct string thread_name ); @@ -467,4 +467,10 @@ void sys_sleep_precise(f64 seconds); * (less cpu intensive) */ void sys_sleep(f64 seconds); +/* ========================== * + * Command line + * ========================== */ + +b32 sys_run_command(struct string cmd); + #endif diff --git a/src/sys_win32.c b/src/sys_win32.c index e9540921..b55ea886 100644 --- a/src/sys_win32.c +++ b/src/sys_win32.c @@ -5,7 +5,6 @@ #include "arena.h" #include "scratch.h" #include "atomic.h" -#include "work.h" #include "log.h" #include "math.h" #include "util.h" @@ -37,9 +36,10 @@ struct win32_mutex { SRWLOCK srwlock; struct win32_mutex *next_free; -#if PROFILING - struct __proflock_ctx *profiling_ctx; +#if PROFILING_LOCKS + __proflock_ctx(profiling_ctx); #endif + #if RTC u64 owner_tid; struct atomic_i64 count; @@ -55,7 +55,7 @@ struct win32_condition_variable { }; struct win32_thread { - sys_thread_entry_point_func *entry_point; + sys_thread_func *entry_point; void *thread_data; char thread_name_cstr[256]; wchar_t thread_name_wstr[256]; @@ -944,7 +944,7 @@ INTERNAL HWND win32_create_window(struct win32_window *window) return hwnd; } -INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(window_thread_entry_point, arg) +INTERNAL SYS_THREAD_DEF(window_thread_entry_point, arg) { struct win32_window *window = (struct win32_window *)arg; @@ -1588,12 +1588,12 @@ void sys_window_cursor_disable_clip(struct sys_window *sys_window) INTERNAL void win32_mutex_init(struct win32_mutex *m) { -#if PROFILING - struct __proflock_ctx *profiling_ctx = m->profiling_ctx; +#if PROFILING_LOCKS + __proflock_ctx(profiling_ctx) = m->profiling_ctx; #endif MEMZERO_STRUCT(m); m->srwlock = (SRWLOCK)SRWLOCK_INIT; -#if PROFILING +#if PROFILING_LOCKS if (!profiling_ctx) { __proflock_alloc(profiling_ctx); } @@ -1964,7 +1964,7 @@ INTERNAL DWORD WINAPI win32_thread_proc(LPVOID vt) return 0; } -struct sys_thread *sys_thread_alloc(sys_thread_entry_point_func *entry_point, void *thread_data, struct string thread_name) +struct sys_thread *sys_thread_alloc(sys_thread_func *entry_point, void *thread_data, struct string thread_name) { __prof; struct arena_temp scratch = scratch_begin_no_conflict(); @@ -2324,11 +2324,30 @@ void sys_sleep(f64 seconds) Sleep(ms); } +/* ========================== * + * Command line + * ========================== */ + +b32 sys_run_command(struct string cmd) +{ + b32 success = false; + { + struct arena_temp scratch = scratch_begin_no_conflict(); + wchar_t *cmd_wstr = wstr_from_string(scratch.arena, cmd); + STARTUPINFO si = ZI; + si.cb = sizeof(si); + PROCESS_INFORMATION pi = ZI; + success = CreateProcessW(NULL, cmd_wstr, NULL, NULL, FALSE, DETACHED_PROCESS, NULL, NULL, &si, &pi); + scratch_end(scratch); + } + return success; +} + /* ========================== * * Entry point * ========================== */ -INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(win32_app_thread_entry_point, arg) +INTERNAL SYS_THREAD_DEF(win32_app_thread_entry_point, arg) { (UNUSED)arg; struct arena_temp scratch = scratch_begin_no_conflict(); @@ -2343,6 +2362,7 @@ int CALLBACK wWinMain(_In_ HINSTANCE instance, _In_opt_ HINSTANCE prev_instance, (UNUSED)prev_instance; (UNUSED)cmdline_wstr; (UNUSED)show_code; + __prof_startup; u64 cmdline_len = wstr_len(cmdline_wstr, ARRAY_COUNT(G.cmdline_args_wstr) - 1); MEMCPY(G.cmdline_args_wstr, cmdline_wstr, cmdline_len * sizeof(*cmdline_wstr)); @@ -2532,6 +2552,26 @@ int CALLBACK wWinMain(_In_ HINSTANCE instance, _In_opt_ HINSTANCE prev_instance, return 1; } +#if PROFILING + /* Launch profiler */ + if (!__prof_is_connected()) { + __profscope(Launch profiler); + STARTUPINFO si = { sizeof(si) }; + PROCESS_INFORMATION pi = ZI; + wchar_t cmd[sizeof(PROFILING_CMD_WSTR)] = ZI; + MEMCPY(cmd, PROFILING_CMD_WSTR, sizeof(PROFILING_CMD_WSTR)); + b32 success = CreateProcessW(NULL, cmd, NULL, NULL, FALSE, DETACHED_PROCESS, NULL, NULL, &si, &pi); + if (success) { + while (!__prof_is_connected()) { + ix_pause(); + } + } else { + MessageBoxExW(NULL, L"Failed to launch tracy profiler using command " PROFILING_CMD_WSTR, L"Error", MB_ICONSTOP | MB_SETFOREGROUND | MB_TOPMOST, 0); + } + } +#endif + + __prof_shutdown; return 0; } diff --git a/src/thread_local.c b/src/thread_local.c index 3c001441..19d81d65 100644 --- a/src/thread_local.c +++ b/src/thread_local.c @@ -56,7 +56,7 @@ void thread_local_store_release(struct thread_local_store *t) arena_release(t->arena); } -void *_thread_local_var_eval(struct thread_local_var_meta *meta) +volatile void *_thread_local_var_eval(struct thread_local_var_meta *meta) { /* Register var if unregistered */ u64 id; diff --git a/src/thread_local.h b/src/thread_local.h index 3b4bc1b6..1c5ef45e 100644 --- a/src/thread_local.h +++ b/src/thread_local.h @@ -60,6 +60,6 @@ struct thread_local_var_meta { # define thread_local_var_eval(var_ptr) (void *)(_thread_local_var_eval(&(var_ptr)->meta)) #endif -void *_thread_local_var_eval(struct thread_local_var_meta *meta); +volatile void *_thread_local_var_eval(struct thread_local_var_meta *meta); #endif diff --git a/src/ttf_dwrite.cpp b/src/ttf_dwrite.cpp index ed203016..b879c874 100644 --- a/src/ttf_dwrite.cpp +++ b/src/ttf_dwrite.cpp @@ -76,6 +76,7 @@ struct ttf_startup_receipt ttf_startup(void) struct ttf_decode_result ttf_decode(struct arena *arena, struct string encoded, f32 point_size, u32 *cache_codes, u32 cache_codes_count) { + __prof; COLORREF bg_color = RGB32(0,0,0); COLORREF fg_color = RGB32(255,255,255); @@ -173,110 +174,113 @@ struct ttf_decode_result ttf_decode(struct arena *arena, struct string encoded, u32 out_offset_x = 0; u32 out_offset_y = 0; u32 row_height = 0; - for (u16 i = 0; i < glyph_count; ++i) { - /* Render glyph to target */ - DWRITE_GLYPH_RUN glyph_run = ZI; - glyph_run.fontFace = font_face; - glyph_run.fontEmSize = pixel_per_em; - glyph_run.glyphCount = 1; - glyph_run.glyphIndices = &i; + { + __profscope(Build atlas); + for (u16 i = 0; i < glyph_count; ++i) { + /* Render glyph to target */ + DWRITE_GLYPH_RUN glyph_run = ZI; + glyph_run.fontFace = font_face; + glyph_run.fontEmSize = pixel_per_em; + glyph_run.glyphCount = 1; + glyph_run.glyphIndices = &i; - RECT bounding_box = ZI; - error = render_target->DrawGlyphRun( - raster_target_x, - raster_target_y, - DWRITE_MEASURING_MODE_NATURAL, - &glyph_run, - rendering_params, - fg_color, - &bounding_box - ); + RECT bounding_box = ZI; + error = render_target->DrawGlyphRun( + raster_target_x, + raster_target_y, + DWRITE_MEASURING_MODE_NATURAL, + &glyph_run, + rendering_params, + fg_color, + &bounding_box + ); - if (bounding_box.left < 0 - || bounding_box.top < 0 - || bounding_box.right > raster_target_w - || bounding_box.bottom > raster_target_h) { - /* Skip */ - continue; - } - - /* Compute glyph metrics */ - DWRITE_GLYPH_METRICS glyph_metrics = ZI; - - - error = font_face->GetDesignGlyphMetrics(&i, 1, &glyph_metrics, false); - - f32 off_x = (f32)bounding_box.left - raster_target_x; - f32 off_y = (f32)bounding_box.top - raster_target_y; - f32 advance = (f32)glyph_metrics.advanceWidth * pixel_per_design_unit; - i32 tex_w = bounding_box.right - bounding_box.left; - i32 tex_h = bounding_box.bottom - bounding_box.top; - - struct font_glyph *glyph = &glyphs[i]; - glyph->off_x = off_x; - glyph->off_y = off_y; - glyph->advance = round_up(advance); - glyph->width = (f32)tex_w; - glyph->height = (f32)tex_h; - - /* Get the bitmap */ - HBITMAP bitmap = (HBITMAP)GetCurrentObject(dc, OBJ_BITMAP); - DIBSECTION dib = ZI; - GetObject(bitmap, sizeof(dib), &dib); - - /* Start new row if necessary */ - if ((out_offset_x + tex_w) >= atlas_w) { - out_offset_y += row_height; - out_offset_x = 0; - row_height = 0; - } - - /* Grow atlas height */ - if ((out_offset_y + tex_h) > atlas_h) { - u64 diff = (out_offset_y + tex_h) - atlas_h; - /* NOTE: This allocation must be contiguous with the initial atlas - * allocation (IE: No non-atlas arena PUSHes) */ - arena_push_array(arena, u32, diff * atlas_w); - atlas_h += diff; - } - - /* Set bounding box metrics (now that we know atlas x & y) */ - glyph->atlas_rect = ZI; - glyph->atlas_rect.x = (f32)out_offset_x; - glyph->atlas_rect.y = (f32)out_offset_y; - glyph->atlas_rect.width = (f32)tex_w; - glyph->atlas_rect.height = (f32)tex_h; - - /* Fill atlas */ - u64 in_pitch = (u64)dib.dsBm.bmWidthBytes / 4; - u32 *in_data = (u32 *)dib.dsBm.bmBits; - u32 *out_data = atlas_memory; - for (i32 y = 0; y < tex_h; ++y) { - u64 out_y = out_offset_y + y; - u64 in_y = (u64)bounding_box.top + y; - for (i32 x = 0; x < tex_w; ++x) { - u64 out_x = out_offset_x + x; - u64 in_x = (u64)bounding_box.left + x; - u32 *out_pixel = out_data + (out_x + (out_y * atlas_w)); - u32 *in_pixel = in_data + (in_x + (in_y * in_pitch)); - *out_pixel = RGBA32(0xFF, 0xFF, 0xFF, *in_pixel & 0xFF); + if (bounding_box.left < 0 + || bounding_box.top < 0 + || bounding_box.right > raster_target_w + || bounding_box.bottom > raster_target_h) { + /* Skip */ + continue; } - } - out_offset_x += tex_w; - /* Grow row height */ - if ((u32)tex_h > row_height) { - row_height = (u32)tex_h; - } + /* Compute glyph metrics */ + DWRITE_GLYPH_METRICS glyph_metrics = ZI; - /* Clear the render target */ - { - HGDIOBJ original = SelectObject(dc, GetStockObject(DC_PEN)); - SetDCPenColor(dc, bg_color); - SelectObject(dc, GetStockObject(DC_BRUSH)); - SetDCBrushColor(dc, bg_color); - Rectangle(dc, bounding_box.left, bounding_box.top, bounding_box.right, bounding_box.bottom); - SelectObject(dc, original); + + error = font_face->GetDesignGlyphMetrics(&i, 1, &glyph_metrics, false); + + f32 off_x = (f32)bounding_box.left - raster_target_x; + f32 off_y = (f32)bounding_box.top - raster_target_y; + f32 advance = (f32)glyph_metrics.advanceWidth * pixel_per_design_unit; + i32 tex_w = bounding_box.right - bounding_box.left; + i32 tex_h = bounding_box.bottom - bounding_box.top; + + struct font_glyph *glyph = &glyphs[i]; + glyph->off_x = off_x; + glyph->off_y = off_y; + glyph->advance = round_up(advance); + glyph->width = (f32)tex_w; + glyph->height = (f32)tex_h; + + /* Get the bitmap */ + HBITMAP bitmap = (HBITMAP)GetCurrentObject(dc, OBJ_BITMAP); + DIBSECTION dib = ZI; + GetObject(bitmap, sizeof(dib), &dib); + + /* Start new row if necessary */ + if ((out_offset_x + tex_w) >= atlas_w) { + out_offset_y += row_height; + out_offset_x = 0; + row_height = 0; + } + + /* Grow atlas height */ + if ((out_offset_y + tex_h) > atlas_h) { + u64 diff = (out_offset_y + tex_h) - atlas_h; + /* NOTE: This allocation must be contiguous with the initial atlas + * allocation (IE: No non-atlas arena PUSHes) */ + arena_push_array(arena, u32, diff * atlas_w); + atlas_h += diff; + } + + /* Set bounding box metrics (now that we know atlas x & y) */ + glyph->atlas_rect = ZI; + glyph->atlas_rect.x = (f32)out_offset_x; + glyph->atlas_rect.y = (f32)out_offset_y; + glyph->atlas_rect.width = (f32)tex_w; + glyph->atlas_rect.height = (f32)tex_h; + + /* Fill atlas */ + u64 in_pitch = (u64)dib.dsBm.bmWidthBytes / 4; + u32 *in_data = (u32 *)dib.dsBm.bmBits; + u32 *out_data = atlas_memory; + for (i32 y = 0; y < tex_h; ++y) { + u64 out_y = out_offset_y + y; + u64 in_y = (u64)bounding_box.top + y; + for (i32 x = 0; x < tex_w; ++x) { + u64 out_x = out_offset_x + x; + u64 in_x = (u64)bounding_box.left + x; + u32 *out_pixel = out_data + (out_x + (out_y * atlas_w)); + u32 *in_pixel = in_data + (in_x + (in_y * in_pitch)); + *out_pixel = RGBA32(0xFF, 0xFF, 0xFF, *in_pixel & 0xFF); + } + } + out_offset_x += tex_w; + + /* Grow row height */ + if ((u32)tex_h > row_height) { + row_height = (u32)tex_h; + } + + /* Clear the render target */ + { + HGDIOBJ original = SelectObject(dc, GetStockObject(DC_PEN)); + SetDCPenColor(dc, bg_color); + SelectObject(dc, GetStockObject(DC_BRUSH)); + SetDCBrushColor(dc, bg_color); + Rectangle(dc, bounding_box.left, bounding_box.top, bounding_box.right, bounding_box.bottom); + SelectObject(dc, original); + } } } diff --git a/src/user.c b/src/user.c index db6fdaf1..92c0604c 100644 --- a/src/user.c +++ b/src/user.c @@ -94,6 +94,7 @@ GLOBAL struct { i32 console_log_color_indices[LOG_LEVEL_COUNT]; f32 console_logs_height; b32 debug_console; + b32 profiler_launched; /* Window -> user */ struct sys_mutex *sys_events_mutex; @@ -175,6 +176,7 @@ GLOBAL READONLY enum user_bind_kind g_binds[SYS_BTN_COUNT] = { [SYS_BTN_F1] = USER_BIND_KIND_DEBUG_PAUSE, [SYS_BTN_F2] = USER_BIND_KIND_DEBUG_CAMERA, [SYS_BTN_F3] = USER_BIND_KIND_DEBUG_DRAW, + [SYS_BTN_F4] = USER_BIND_KIND_PROFILER, [SYS_BTN_GRAVE_ACCENT] = USER_BIND_KIND_DEBUG_CONSOLE, [SYS_BTN_F11] = USER_BIND_KIND_FULLSCREEN, [SYS_BTN_MWHEELUP] = USER_BIND_KIND_ZOOM_IN, @@ -195,12 +197,11 @@ GLOBAL READONLY enum user_bind_kind g_binds[SYS_BTN_COUNT] = { INTERNAL APP_EXIT_CALLBACK_FUNC_DEF(user_shutdown); INTERNAL LOG_EVENT_CALLBACK_FUNC_DEF(debug_console_log_callback, log); -INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(user_thread_entry_point, arg); -INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(user_local_sim_thread_entry_point, arg); +INTERNAL SYS_THREAD_DEF(user_thread_entry_point, arg); +INTERNAL SYS_THREAD_DEF(user_local_sim_thread_entry_point, arg); INTERNAL SYS_WINDOW_EVENT_CALLBACK_FUNC_DEF(window_event_callback, event); -struct user_startup_receipt user_startup(struct work_startup_receipt *work_sr, - struct gp_startup_receipt *gp_sr, +struct user_startup_receipt user_startup(struct gp_startup_receipt *gp_sr, struct font_startup_receipt *font_sr, struct sprite_startup_receipt *sprite_sr, struct draw_startup_receipt *draw_sr, @@ -212,7 +213,6 @@ struct user_startup_receipt user_startup(struct work_startup_receipt *work_sr, struct string connect_address_str, struct sys_window *window) { - (UNUSED)work_sr; (UNUSED)gp_sr; (UNUSED)font_sr; (UNUSED)sprite_sr; @@ -624,6 +624,7 @@ INTERNAL SORT_COMPARE_FUNC_DEF(ent_draw_order_cmp, arg_a, arg_b, udata) INTERNAL void user_update(void) { + __prof; struct arena_temp scratch = scratch_begin_no_conflict(); /* ========================== * @@ -868,6 +869,26 @@ INTERNAL void user_update(void) if (G.bind_states[USER_BIND_KIND_DEBUG_CAMERA].num_presses > 0) { G.debug_camera = !G.debug_camera; } + if (G.bind_states[USER_BIND_KIND_PROFILER].num_presses > 0) { + if (G.profiler_launched) { + logf_warning("Profiler already launched"); + } else { +#if PROFILING + __profscope(Launch profiler); + struct string cmd = string_from_wstr_no_limit(scratch.arena, PROFILING_CMD_WSTR); + logf_info("Launching profiler with command \"%F\"", FMT_STR(cmd)); + b32 success = sys_run_command(cmd); + if (success) { + G.profiler_launched = true; + logf_success("Launched profiler successfully"); + } else { + logf_error("Failed to launch profiler using command \"%F\" (is the executable in your PATH?)", FMT_STR(cmd)); + } +#else + logf_warning("Cannot launch profiler: Program is not in profiling mode"); +#endif + } + } { if (G.bind_states[USER_BIND_KIND_DEBUG_FOLLOW].num_presses > 0) { @@ -2104,15 +2125,17 @@ INTERNAL void user_update(void) * User thread * ========================== */ -INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(user_thread_entry_point, arg) +INTERNAL SYS_THREAD_DEF(user_thread_entry_point, arg) { (UNUSED)arg; i64 last_frame_ns = 0; i64 target_dt_ns = NS_FROM_SECONDS(USER_FPS_LIMIT > (0) ? (1.0 / USER_FPS_LIMIT) : 0); while (!atomic_i32_eval(&G.user_thread_shutdown)) { - __profscope(user_update_w_sleep); - sleep_frame(last_frame_ns, target_dt_ns); + { + __profscope(User sleep); + sleep_frame(last_frame_ns, target_dt_ns); + } last_frame_ns = sys_time_ns(); user_update(); } @@ -2191,7 +2214,7 @@ struct sim_decode_queue { }; -INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(user_local_sim_thread_entry_point, arg) +INTERNAL SYS_THREAD_DEF(user_local_sim_thread_entry_point, arg) { #if 0 struct host_listen_address local_listen_addr = host_listen_address_from_local_name(LIT("LOCAL_SIM")); @@ -2268,515 +2291,518 @@ INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(user_local_sim_thread_entry_point, arg) i64 step_dt_ns = NS_FROM_SECONDS(1) / SIM_TICKS_PER_SECOND; f64 compute_timescale = 1.0; while (!atomic_i32_eval(&G.local_sim_thread_shutdown)) { - __profscope(local_sim_loop); struct arena_temp scratch = scratch_begin_no_conflict(); { - __profscope(local_sim_sleep); + __profscope(Sim sleep); sleep_frame(real_time_ns, step_dt_ns * compute_timescale); } - real_dt_ns = sys_time_ns() - real_time_ns; - real_time_ns += real_dt_ns; - - struct host_event_list host_events = host_update_begin(scratch.arena, host); - - /* Read net messages */ - struct sim_decode_queue queue = ZI; { - for (struct host_event *event = host_events.first; event; event = event->next) { - struct host_channel_id channel_id = event->channel_id; - struct sim_client *client = sim_client_from_channel_id(store, channel_id); - switch (event->kind) { - case HOST_EVENT_KIND_CHANNEL_OPENED: - { - if (!client->valid) { - if (is_master) { - /* Create remote client */ - client = sim_client_alloc(store); - sim_client_set_channel_id(client, channel_id); - } else { - /* Create master client */ - if (!master_client->valid) { + __profscope(Sim update); + + real_dt_ns = sys_time_ns() - real_time_ns; + real_time_ns += real_dt_ns; + + struct host_event_list host_events = host_update_begin(scratch.arena, host); + + /* Read net messages */ + struct sim_decode_queue queue = ZI; + { + for (struct host_event *event = host_events.first; event; event = event->next) { + struct host_channel_id channel_id = event->channel_id; + struct sim_client *client = sim_client_from_channel_id(store, channel_id); + switch (event->kind) { + case HOST_EVENT_KIND_CHANNEL_OPENED: + { + if (!client->valid) { + if (is_master) { + /* Create remote client */ client = sim_client_alloc(store); sim_client_set_channel_id(client, channel_id); - master_client = client; - master_blended_client = sim_client_alloc(store); } else { - /* We already have a master client */ - ASSERT(false); + /* Create master client */ + if (!master_client->valid) { + client = sim_client_alloc(store); + sim_client_set_channel_id(client, channel_id); + master_client = client; + master_blended_client = sim_client_alloc(store); + } else { + /* We already have a master client */ + ASSERT(false); + } } } - } - } break; + } break; - case HOST_EVENT_KIND_MSG: - { - if (client->valid) { - struct bitbuff msg_bb = bitbuff_from_string(event->msg); - struct bitbuff_reader msg_br = br_from_bitbuff(&msg_bb); + case HOST_EVENT_KIND_MSG: + { + if (client->valid) { + struct bitbuff msg_bb = bitbuff_from_string(event->msg); + struct bitbuff_reader msg_br = br_from_bitbuff(&msg_bb); - u64 ack = br_read_uv(&msg_br); - u64 double_ack = br_read_uv(&msg_br); - if (ack > client->ack) { - client->ack = ack; - } - if (double_ack > client->double_ack) { - client->double_ack = double_ack; - } + u64 ack = br_read_uv(&msg_br); + u64 double_ack = br_read_uv(&msg_br); + if (ack > client->ack) { + client->ack = ack; + } + if (double_ack > client->double_ack) { + client->double_ack = double_ack; + } - /* Read & queue incoming snapshots for decoding */ - u64 tmp_encoded_len = br_read_uv(&msg_br); - while (tmp_encoded_len > 0) { - u8 *tmp_encoded_bytes = br_read_bytes_raw(&msg_br, tmp_encoded_len); - if (!tmp_encoded_bytes) break; + /* Read & queue incoming snapshots for decoding */ + u64 tmp_encoded_len = br_read_uv(&msg_br); + while (tmp_encoded_len > 0) { + u8 *tmp_encoded_bytes = br_read_bytes_raw(&msg_br, tmp_encoded_len); + if (!tmp_encoded_bytes) break; - struct bitbuff decoder_bb = bitbuff_from_string(STRING(tmp_encoded_len, tmp_encoded_bytes)); - struct bitbuff_reader decoder_br = br_from_bitbuff(&decoder_bb); - u64 base_tick = br_read_uv(&decoder_br); - u64 tick = br_read_uv(&decoder_br); + struct bitbuff decoder_bb = bitbuff_from_string(STRING(tmp_encoded_len, tmp_encoded_bytes)); + struct bitbuff_reader decoder_br = br_from_bitbuff(&decoder_bb); + u64 base_tick = br_read_uv(&decoder_br); + u64 tick = br_read_uv(&decoder_br); - struct string tmp_encoded = ZI; - tmp_encoded.len = br_num_bytes_left(&decoder_br); - tmp_encoded.text = br_read_bytes_raw(&decoder_br, tmp_encoded.len); - if (!tmp_encoded.text) tmp_encoded.len = 0; + struct string tmp_encoded = ZI; + tmp_encoded.len = br_num_bytes_left(&decoder_br); + tmp_encoded.text = br_read_bytes_raw(&decoder_br, tmp_encoded.len); + if (!tmp_encoded.text) tmp_encoded.len = 0; - struct sim_snapshot *base_ss = sim_snapshot_from_tick(client, base_tick); - if (base_ss->tick == base_tick) { - if (is_master) { - /* Queue incoming slave client snapshot for decoding */ - //b32 should_decode = tick == client->highest_received_tick + 1 || client->highest_received_tick == 0; - b32 should_decode = tick > client->highest_received_tick; - if (should_decode) { - struct sim_ss_decode_node *node = arena_push(scratch.arena, struct sim_ss_decode_node); - node->client = client; - node->tick = tick; - node->base_tick = base_tick; - node->tmp_encoded = tmp_encoded; - if (queue.last) { - queue.last->next = node; - } else { - queue.first = node; + struct sim_snapshot *base_ss = sim_snapshot_from_tick(client, base_tick); + if (base_ss->tick == base_tick) { + if (is_master) { + /* Queue incoming slave client snapshot for decoding */ + //b32 should_decode = tick == client->highest_received_tick + 1 || client->highest_received_tick == 0; + b32 should_decode = tick > client->highest_received_tick; + if (should_decode) { + struct sim_ss_decode_node *node = arena_push(scratch.arena, struct sim_ss_decode_node); + node->client = client; + node->tick = tick; + node->base_tick = base_tick; + node->tmp_encoded = tmp_encoded; + if (queue.last) { + queue.last->next = node; + } else { + queue.first = node; + } + queue.last = node; + if (tick > client->highest_received_tick) { + client->highest_received_tick = tick; + } } - queue.last = node; - if (tick > client->highest_received_tick) { - client->highest_received_tick = tick; + } else { + /* Decode incoming master client snapshots for decoding (only the newest one) */ + b32 should_decode = client == master_client && tick > client->highest_received_tick; + if (should_decode) { + struct sim_ss_decode_node *node = queue.first ? queue.first : arena_push(scratch.arena, struct sim_ss_decode_node); + node->client = client; + node->tick = tick; + node->base_tick = base_tick; + node->tmp_encoded = tmp_encoded; + queue.first = node; + queue.last = node; + if (tick > client->highest_received_tick) { + client->highest_received_tick = tick; + if (average_master_receive_dt_ns == 0) { + average_master_receive_dt_ns = NS_FROM_SECONDS(1) / SIM_TICKS_PER_SECOND; + } else { + average_master_receive_dt_ns -= average_master_receive_dt_ns / 50; + average_master_receive_dt_ns += (real_time_ns - last_tick_from_master_received_at_ns) / 50; + } + last_tick_from_master_received_at_ns = real_time_ns; + } } } } else { - /* Decode incoming master client snapshots for decoding (only the newest one) */ - b32 should_decode = client == master_client && tick > client->highest_received_tick; - if (should_decode) { - struct sim_ss_decode_node *node = queue.first ? queue.first : arena_push(scratch.arena, struct sim_ss_decode_node); - node->client = client; - node->tick = tick; - node->base_tick = base_tick; - node->tmp_encoded = tmp_encoded; - queue.first = node; - queue.last = node; - if (tick > client->highest_received_tick) { - client->highest_received_tick = tick; - if (average_master_receive_dt_ns == 0) { - average_master_receive_dt_ns = NS_FROM_SECONDS(1) / SIM_TICKS_PER_SECOND; - } else { - average_master_receive_dt_ns -= average_master_receive_dt_ns / 50; - average_master_receive_dt_ns += (real_time_ns - last_tick_from_master_received_at_ns) / 50; - } - last_tick_from_master_received_at_ns = real_time_ns; - } - } + /* We do not have the tick that the incoming delta is based from */ + ASSERT(false); } - } else { - /* We do not have the tick that the incoming delta is based from */ - ASSERT(false); + + tmp_encoded_len = br_read_uv(&msg_br); } - - tmp_encoded_len = br_read_uv(&msg_br); } - } - } break; + } break; - default: break; - } - } - } - - /* Decode incoming snapshots */ - for (struct sim_ss_decode_node *n = queue.first; n; n = n->next) { - struct sim_client *client = n->client; - u64 base_tick = n->base_tick; - u64 tick = n->tick; - struct sim_snapshot *base_ss = sim_snapshot_from_tick(client, base_tick); - if (base_ss->tick == base_tick) { - struct bitbuff bb = bitbuff_from_string(n->tmp_encoded); - struct bitbuff_reader br = br_from_bitbuff(&bb); - - /* Alloc & decode snapshot */ - struct sim_snapshot *ss = sim_snapshot_alloc(client, base_ss, tick); - sim_snapshot_decode(&br, ss); - - /* Assume all incoming ents want to be sync srcs */ - for (u64 i = 0; i < ss->num_ents_reserved; ++i) { - struct sim_ent *ent = &ss->ents[i]; - if (ent->valid && sim_ent_has_prop(ent, SEPROP_SYNC_DST)) { - sim_ent_disable_prop(ent, SEPROP_SYNC_DST); - sim_ent_enable_prop(ent, SEPROP_SYNC_SRC); + default: break; } } - } else { - /* We do not have the tick that the incoming delta is based from. - * This decode should never have been queued in the first place. */ - ASSERT(false); } - } - if (!is_master && !initialized_from_master) { - if (master_client->valid && master_client->last_tick > 0) { - initialized_from_master = true; - } else { + /* Decode incoming snapshots */ + for (struct sim_ss_decode_node *n = queue.first; n; n = n->next) { + struct sim_client *client = n->client; + u64 base_tick = n->base_tick; + u64 tick = n->tick; + struct sim_snapshot *base_ss = sim_snapshot_from_tick(client, base_tick); + if (base_ss->tick == base_tick) { + struct bitbuff bb = bitbuff_from_string(n->tmp_encoded); + struct bitbuff_reader br = br_from_bitbuff(&bb); + + /* Alloc & decode snapshot */ + struct sim_snapshot *ss = sim_snapshot_alloc(client, base_ss, tick); + sim_snapshot_decode(&br, ss); + + /* Assume all incoming ents want to be sync srcs */ + for (u64 i = 0; i < ss->num_ents_reserved; ++i) { + struct sim_ent *ent = &ss->ents[i]; + if (ent->valid && sim_ent_has_prop(ent, SEPROP_SYNC_DST)) { + sim_ent_disable_prop(ent, SEPROP_SYNC_DST); + sim_ent_enable_prop(ent, SEPROP_SYNC_SRC); + } + } + } else { + /* We do not have the tick that the incoming delta is based from. + * This decode should never have been queued in the first place. */ + ASSERT(false); + } + } + + if (!is_master && !initialized_from_master) { + if (master_client->valid && master_client->last_tick > 0) { + initialized_from_master = true; + } else { + goto skip_step; + } + } + + b32 should_step = !atomic_i32_eval(&G.user_paused); + if (atomic_i32_eval(&G.user_paused_steps) > 0) { + should_step = true; + atomic_i32_eval_add(&G.user_paused_steps, -1); + } + + if (!should_step) { goto skip_step; } - } - b32 should_step = !atomic_i32_eval(&G.user_paused); - if (atomic_i32_eval(&G.user_paused_steps) > 0) { - should_step = true; - atomic_i32_eval_add(&G.user_paused_steps, -1); - } - - if (!should_step) { - goto skip_step; - } - - /* Update networked clients */ - u64 oldest_client_ack = 0; - for (u64 i = 0; i < store->num_clients_reserved; ++i) { - struct sim_client *client = &store->clients[i]; - if (client->valid && client != local_client && client != publish_client && client != user_input_client && client != master_client) { - client->last_rtt_ns = host_get_channel_last_rtt_ns(host, client->channel_id); - /* Release unneeded received snapshots */ - /* TDOO: Cap how many client snapshots we're willing to retain */ - if (client->double_ack > 0) { - u64 keep_tick = min_u64(client->double_ack, local_client->last_tick); - if (keep_tick > 0) { - sim_snapshot_release_ticks_in_range(client, 0, keep_tick - 1); + /* Update networked clients */ + u64 oldest_client_ack = 0; + for (u64 i = 0; i < store->num_clients_reserved; ++i) { + struct sim_client *client = &store->clients[i]; + if (client->valid && client != local_client && client != publish_client && client != user_input_client && client != master_client) { + client->last_rtt_ns = host_get_channel_last_rtt_ns(host, client->channel_id); + /* Release unneeded received snapshots */ + /* TDOO: Cap how many client snapshots we're willing to retain */ + if (client->double_ack > 0) { + u64 keep_tick = min_u64(client->double_ack, local_client->last_tick); + if (keep_tick > 0) { + sim_snapshot_release_ticks_in_range(client, 0, keep_tick - 1); + } + } + if (client->ack < oldest_client_ack || oldest_client_ack == 0) { + oldest_client_ack = client->ack; } } - if (client->ack < oldest_client_ack || oldest_client_ack == 0) { - oldest_client_ack = client->ack; - } } - } - /* Release unneeded published snapshots */ - { - u64 keep_tick = oldest_client_ack; - if (keep_tick == 0 && publish_client->last_tick > 0) { - keep_tick = publish_client->last_tick - 1; - } - if (keep_tick > 0) { - --keep_tick; - } - sim_snapshot_release_ticks_in_range(publish_client, 0, keep_tick); - } - - /* Release old local snapshots */ - { - u64 keep_range = 50; - if (local_client->last_tick > keep_range) { - u64 keep_tick = local_client->last_tick - keep_range; - sim_snapshot_release_ticks_in_range(local_client, 0, keep_tick); - } - } - - /* Release unneeded user input snapshots */ - sim_snapshot_release_ticks_in_range(user_input_client, 0, local_client->first_tick - 1); - - - - - - - - - - if (is_master) { - /* Step master */ - u64 prev_tick = local_client->last_tick; - u64 next_tick = prev_tick + 1; - struct sim_step_ctx ctx = ZI; - ctx.is_master = is_master; - ctx.sim_dt_ns = step_dt_ns; - ctx.accel = &accel; - ctx.user_input_client = user_input_client; - ctx.master_client = master_client; - ctx.publish_client = publish_client; - struct sim_snapshot *prev_world = sim_snapshot_from_tick(local_client, prev_tick); - ctx.world = sim_snapshot_alloc(local_client, prev_world, next_tick); - generate_user_input_cmds(user_input_client, next_tick); - sim_step(&ctx); - } else if (master_client->valid) { - /* Step client */ - - /* TODO: Eventually determine master tick based on a delay to allow for jitter and also interpolation so we can lower snapshot publish frequency */ - - - b32 master_ss_is_blended = false; - struct sim_snapshot *master_ss = sim_snapshot_nil(); + /* Release unneeded published snapshots */ { - /* How along are we between master sim ticks (0 = start of tick, 1 = end of tick) */ - f64 tick_progress = 0; - i64 next_tick_expected_ns = last_tick_from_master_received_at_ns + average_master_receive_dt_ns; - if (next_tick_expected_ns > last_tick_from_master_received_at_ns) { - tick_progress = (f64)(real_time_ns - last_tick_from_master_received_at_ns) / (f64)(next_tick_expected_ns - last_tick_from_master_received_at_ns); + u64 keep_tick = oldest_client_ack; + if (keep_tick == 0 && publish_client->last_tick > 0) { + keep_tick = publish_client->last_tick - 1; } - - /* Predict master sim time based on average snapshot publish dt. */ - struct sim_snapshot *newest_snapshot = sim_snapshot_from_tick(master_client, master_client->last_tick); - i64 master_sim_predicted_time_ns = newest_snapshot->sim_time_ns + (newest_snapshot->sim_dt_ns * tick_progress); - - /* Determine blend time */ - i64 master_blend_time_target_ns = master_sim_predicted_time_ns - (SIM_CLIENT_INTERP_RATIO * average_master_receive_dt_ns); - if (average_master_receive_dt_ns > 0) { - master_blend_time_ns += real_dt_ns; + if (keep_tick > 0) { + --keep_tick; } + sim_snapshot_release_ticks_in_range(publish_client, 0, keep_tick); + } - i64 blend_time_target_diff_ns = master_blend_time_target_ns - master_blend_time_ns; - if (blend_time_target_diff_ns > NS_FROM_SECONDS(0.100) || blend_time_target_diff_ns < NS_FROM_SECONDS(-0.100)) { - /* Snap blend time if it gets too far from target blend time */ - master_blend_time_ns = master_blend_time_target_ns; + /* Release old local snapshots */ + { + u64 keep_range = 50; + if (local_client->last_tick > keep_range) { + u64 keep_tick = local_client->last_tick - keep_range; + sim_snapshot_release_ticks_in_range(local_client, 0, keep_tick); } - u64 master_blend_tick = master_blend_time_ns / newest_snapshot->sim_dt_ns; + } - /* Get snapshot nearest to master blend time */ - /* TODO: Blend */ - struct sim_snapshot *left_snapshot = sim_snapshot_nil(); - struct sim_snapshot *right_snapshot = newest_snapshot; + /* Release unneeded user input snapshots */ + sim_snapshot_release_ticks_in_range(user_input_client, 0, local_client->first_tick - 1); + + + + + + + + + + if (is_master) { + /* Step master */ + u64 prev_tick = local_client->last_tick; + u64 next_tick = prev_tick + 1; + struct sim_step_ctx ctx = ZI; + ctx.is_master = is_master; + ctx.sim_dt_ns = step_dt_ns; + ctx.accel = &accel; + ctx.user_input_client = user_input_client; + ctx.master_client = master_client; + ctx.publish_client = publish_client; + struct sim_snapshot *prev_world = sim_snapshot_from_tick(local_client, prev_tick); + ctx.world = sim_snapshot_alloc(local_client, prev_world, next_tick); + generate_user_input_cmds(user_input_client, next_tick); + sim_step(&ctx); + } else if (master_client->valid) { + /* Step client */ + + /* TODO: Eventually determine master tick based on a delay to allow for jitter and also interpolation so we can lower snapshot publish frequency */ + + + b32 master_ss_is_blended = false; + struct sim_snapshot *master_ss = sim_snapshot_nil(); { - struct sim_snapshot *ss = sim_snapshot_from_tick(master_client, master_client->first_tick); - while (ss->valid) { - u64 next_tick = ss->next_tick; - i64 ss_time_ns = ss->sim_time_ns; - if (ss_time_ns < master_blend_time_ns && ss_time_ns > left_snapshot->sim_time_ns) { - left_snapshot = ss; - } - if (ss_time_ns > master_blend_time_ns && ss_time_ns < right_snapshot->sim_time_ns) { - right_snapshot = ss; - } - ss = sim_snapshot_from_tick(master_client, next_tick); + /* How along are we between master sim ticks (0 = start of tick, 1 = end of tick) */ + f64 tick_progress = 0; + i64 next_tick_expected_ns = last_tick_from_master_received_at_ns + average_master_receive_dt_ns; + if (next_tick_expected_ns > last_tick_from_master_received_at_ns) { + tick_progress = (f64)(real_time_ns - last_tick_from_master_received_at_ns) / (f64)(next_tick_expected_ns - last_tick_from_master_received_at_ns); } - } - /* Create world from blended master snapshots */ - f64 blend = 0; - if (left_snapshot->valid && right_snapshot->valid && right_snapshot->tick > left_snapshot->tick) { - blend = (f64)(master_blend_tick - left_snapshot->tick) / (f64)(right_snapshot->tick - left_snapshot->tick); - f64 epsilon = 0.001; - if (blend < epsilon) { - master_ss_is_blended = false; - master_ss = left_snapshot; - } else if (blend > 1 - epsilon) { - master_ss_is_blended = false; - master_ss = right_snapshot; + /* Predict master sim time based on average snapshot publish dt. */ + struct sim_snapshot *newest_snapshot = sim_snapshot_from_tick(master_client, master_client->last_tick); + i64 master_sim_predicted_time_ns = newest_snapshot->sim_time_ns + (newest_snapshot->sim_dt_ns * tick_progress); + + /* Determine blend time */ + i64 master_blend_time_target_ns = master_sim_predicted_time_ns - (SIM_CLIENT_INTERP_RATIO * average_master_receive_dt_ns); + if (average_master_receive_dt_ns > 0) { + master_blend_time_ns += real_dt_ns; + } + + i64 blend_time_target_diff_ns = master_blend_time_target_ns - master_blend_time_ns; + if (blend_time_target_diff_ns > NS_FROM_SECONDS(0.100) || blend_time_target_diff_ns < NS_FROM_SECONDS(-0.100)) { + /* Snap blend time if it gets too far from target blend time */ + master_blend_time_ns = master_blend_time_target_ns; + } + u64 master_blend_tick = master_blend_time_ns / newest_snapshot->sim_dt_ns; + + /* Get snapshot nearest to master blend time */ + /* TODO: Blend */ + struct sim_snapshot *left_snapshot = sim_snapshot_nil(); + struct sim_snapshot *right_snapshot = newest_snapshot; + { + struct sim_snapshot *ss = sim_snapshot_from_tick(master_client, master_client->first_tick); + while (ss->valid) { + u64 next_tick = ss->next_tick; + i64 ss_time_ns = ss->sim_time_ns; + if (ss_time_ns < master_blend_time_ns && ss_time_ns > left_snapshot->sim_time_ns) { + left_snapshot = ss; + } + if (ss_time_ns > master_blend_time_ns && ss_time_ns < right_snapshot->sim_time_ns) { + right_snapshot = ss; + } + ss = sim_snapshot_from_tick(master_client, next_tick); + } + } + + /* Create world from blended master snapshots */ + f64 blend = 0; + if (left_snapshot->valid && right_snapshot->valid && right_snapshot->tick > left_snapshot->tick) { + blend = (f64)(master_blend_tick - left_snapshot->tick) / (f64)(right_snapshot->tick - left_snapshot->tick); + f64 epsilon = 0.001; + if (blend < epsilon) { + master_ss_is_blended = false; + master_ss = left_snapshot; + } else if (blend > 1 - epsilon) { + master_ss_is_blended = false; + master_ss = right_snapshot; + } else { + master_ss_is_blended = true; + master_ss = sim_snapshot_alloc_from_lerp(master_blended_client, left_snapshot, right_snapshot, blend); + + /* Release unneeded blended master snapshots */ + if (master_ss->tick > 0) { + sim_snapshot_release_ticks_in_range(master_blended_client, 0, master_ss->tick - 1); + sim_snapshot_release_ticks_in_range(master_blended_client, master_ss->tick + 1, U64_MAX); + } + } } else { - master_ss_is_blended = true; - master_ss = sim_snapshot_alloc_from_lerp(master_blended_client, left_snapshot, right_snapshot, blend); - - /* Release unneeded blended master snapshots */ - if (master_ss->tick > 0) { - sim_snapshot_release_ticks_in_range(master_blended_client, 0, master_ss->tick - 1); - sim_snapshot_release_ticks_in_range(master_blended_client, master_ss->tick + 1, U64_MAX); - } + master_ss_is_blended = false; + master_ss = left_snapshot->valid ? left_snapshot : right_snapshot; } - } else { - master_ss_is_blended = false; - master_ss = left_snapshot->valid ? left_snapshot : right_snapshot; - } - /* Release unneeded master snapshots */ - u64 keep_master_tick = min_u64(left_snapshot->tick, master_client->double_ack); - if (keep_master_tick > 0) { - sim_snapshot_release_ticks_in_range(master_client, 0, keep_master_tick - 1); - } + /* Release unneeded master snapshots */ + u64 keep_master_tick = min_u64(left_snapshot->tick, master_client->double_ack); + if (keep_master_tick > 0) { + sim_snapshot_release_ticks_in_range(master_client, 0, keep_master_tick - 1); + } #if 0 - DEBUGBREAKABLE; - logf_debug("*************************************************"); - logf_debug("local_client->last_tick: %F", FMT_UINT(local_client->last_tick)); - logf_debug("master_sim_predicted_time_ns: %F", FMT_SINT(master_sim_predicted_time_ns)); - logf_debug("tick_progress: %F", FMT_FLOAT(tick_progress)); - logf_debug("sim_publish_timescale: %F", FMT_FLOAT(sim_publish_timescale)); - logf_debug("last_tick_from_master_received_at_ns: %F", FMT_SINT(last_tick_from_master_received_at_ns)); - logf_debug("average_master_receive_dt_ns: %F", FMT_SINT(average_master_receive_dt_ns)); - logf_debug("next_tick_expected_ns: %F", FMT_SINT(next_tick_expected_ns)); - logf_debug("master_blend_time_target_ns: %F", FMT_SINT(master_blend_time_target_ns)); - logf_debug("blend_time_target_diff_ns: %F", FMT_SINT(blend_time_target_diff_ns)); - logf_debug("master_blend_time_ns: %F", FMT_SINT(master_blend_time_ns)); - logf_debug("left_snapshot->tick: %F", FMT_UINT(left_snapshot->tick)); - logf_debug("right_snapshot->tick: %F", FMT_UINT(right_snapshot->tick)); - logf_debug("master_ss->tick: %F", FMT_UINT(master_ss->tick)); + DEBUGBREAKABLE; + logf_debug("*************************************************"); + logf_debug("local_client->last_tick: %F", FMT_UINT(local_client->last_tick)); + logf_debug("master_sim_predicted_time_ns: %F", FMT_SINT(master_sim_predicted_time_ns)); + logf_debug("tick_progress: %F", FMT_FLOAT(tick_progress)); + logf_debug("sim_publish_timescale: %F", FMT_FLOAT(sim_publish_timescale)); + logf_debug("last_tick_from_master_received_at_ns: %F", FMT_SINT(last_tick_from_master_received_at_ns)); + logf_debug("average_master_receive_dt_ns: %F", FMT_SINT(average_master_receive_dt_ns)); + logf_debug("next_tick_expected_ns: %F", FMT_SINT(next_tick_expected_ns)); + logf_debug("master_blend_time_target_ns: %F", FMT_SINT(master_blend_time_target_ns)); + logf_debug("blend_time_target_diff_ns: %F", FMT_SINT(blend_time_target_diff_ns)); + logf_debug("master_blend_time_ns: %F", FMT_SINT(master_blend_time_ns)); + logf_debug("left_snapshot->tick: %F", FMT_UINT(left_snapshot->tick)); + logf_debug("right_snapshot->tick: %F", FMT_UINT(right_snapshot->tick)); + logf_debug("master_ss->tick: %F", FMT_UINT(master_ss->tick)); #endif - } - - if (master_ss->valid) { - struct sim_ent *master_player = sim_ent_find_first_match_one(master_ss, SEPROP_PLAYER_IS_MASTER); - - /* Update ent id from master */ - { - user_input_client->player_id = master_ss->local_player; - local_client->player_id = master_ss->local_player; } - /* Check for misprediction */ - u64 mispredicted_tick = 0; - if (!master_ss_is_blended) { - /* TODO: Actually check for misprediction rather than triggering mispredict any time a new master snapshot is received */ - mispredicted_tick = master_ss->tick; - } + if (master_ss->valid) { + struct sim_ent *master_player = sim_ent_find_first_match_one(master_ss, SEPROP_PLAYER_IS_MASTER); - - u64 step_base_tick = local_client->last_tick; - u64 step_end_tick = step_base_tick + 1; - if (mispredicted_tick > 0) { - step_base_tick = mispredicted_tick; - if (step_end_tick <= step_base_tick) { - step_end_tick = step_base_tick + 1; - } - } - - /* We want to simulate the ahead of the server to predict client input. - * How many ticks ahead we want to simulate is a balance between added latency and the server not receiving our inputs on time. - * We can take the server's ack minus the server's tick to determine how many cmds of ours the server has buffered. - * - * If this buffer gets too low (because we are lagging behind or the connection is unstable), meaning the server is not getting our input on time: - * - Shorten local compute rate to increase the rate at which we predict ahead & produce cmds, until the server's ack indicates a buffer size within desired range. - * - * If this buffer gets too large (because the client predicts too far ahead), meaning unneeded latency is being introduced: - * - Dilate local compute rate to decrease the rate at which we predict ahead & produce cmds until the server's ack indicates a buffer size within desired range. - */ - { - i64 cmds_ahead_on_master = (i64)master_client->ack - (i64)master_client->last_tick; - if (cmds_ahead_on_master < -3 || cmds_ahead_on_master > 10) { - /* Cmds are too far from master time, snap step end tick */ - i64 rtt_ns = master_client->last_rtt_ns; - f64 rtt_tick_ratio = (f64)(rtt_ns + (step_dt_ns - 1)) / (f64)step_dt_ns; - i64 num_predict_ticks = math_round_to_int64(rtt_tick_ratio) + 5; - step_end_tick = master_client->last_tick + num_predict_ticks; - compute_timescale = 1.1; - } else if (cmds_ahead_on_master > 2) { - /* Slow down simulation to dial back how far ahead we are predicting and bring local sim time closer to master sim time */ - compute_timescale = 1.1; - } else if (cmds_ahead_on_master < 1) { - /* Speed up simulation rate predict more ticks and give master more inputs to work with */ - compute_timescale = 0.9; - } else { - /* Server's cmd buffer is in a healthy range */ - compute_timescale = 1; - } - } - - /* Sync master with local base tick */ - struct sim_snapshot *base_ss = sim_snapshot_from_tick(local_client, step_base_tick); - if (mispredicted_tick) { - if (base_ss->valid) { - sim_snapshot_sync_ents(base_ss, master_ss, master_player->id, 0); - } else { - base_ss = sim_snapshot_alloc(local_client, master_ss, step_base_tick); - } - } - - /* Release any existing ticks that are about to be simulated */ - sim_snapshot_release_ticks_in_range(local_client, step_base_tick + 1, U64_MAX); - - /* Step */ - generate_user_input_cmds(user_input_client, step_end_tick); - { - struct sim_step_ctx ctx = ZI; - ctx.is_master = is_master; - ctx.sim_dt_ns = step_dt_ns; - ctx.accel = &accel; - ctx.user_input_client = user_input_client; - ctx.master_client = master_client; - ctx.publish_client = publish_client; - - u64 step_tick = step_base_tick + 1; - struct sim_snapshot *prev_ss = base_ss; - while (step_tick <= step_end_tick) { - ctx.world = sim_snapshot_alloc(local_client, prev_ss, step_tick); - if (!mispredicted_tick && step_tick == step_end_tick) { - sim_snapshot_sync_ents(ctx.world, master_ss, master_player->id, SIM_SYNC_FLAG_NOSYNC_PREDICTABLES); - } - sim_step(&ctx); - prev_ss = ctx.world; - ++step_tick; - } - } - } - } - - /* Publish snapshot to remote clients */ - for (u64 i = 0; i < store->num_clients_reserved; ++i) { - struct sim_client *client = &store->clients[i]; - if (client->valid && client != user_input_client && client != local_client && client != publish_client) { - struct bitbuff_writer msg_bw = bw_from_bitbuff(&msg_writer_bb); - - bw_write_uv(&msg_bw, client->highest_received_tick); /* ack */ - bw_write_uv(&msg_bw, client->ack); /* double ack */ - - struct sim_snapshot *base_ss = sim_snapshot_from_tick(publish_client, client->ack); - struct sim_snapshot *publish_ss; - if (client == master_client) { - /* If sending to master, start sending all snapshots since last ack */ - publish_ss = sim_snapshot_from_closest_tick_gte(publish_client, base_ss->tick + 1); - } else { - /* If sending to slave, only send latest snapshot */ - publish_ss = sim_snapshot_from_tick(publish_client, publish_client->last_tick); - } - - while (publish_ss->valid) { - struct bitbuff_writer snapshot_bw = bw_from_bitbuff(&snapshot_writer_bb); - struct string tmp_snapshot_encoded = ZI; + /* Update ent id from master */ { - bw_write_uv(&snapshot_bw, base_ss->tick); - bw_write_uv(&snapshot_bw, publish_ss->tick); - sim_snapshot_encode(&snapshot_bw, client, base_ss, publish_ss); - tmp_snapshot_encoded.len = bw_num_bytes_written(&snapshot_bw); - tmp_snapshot_encoded.text = bw_get_written_raw(&snapshot_bw); + user_input_client->player_id = master_ss->local_player; + local_client->player_id = master_ss->local_player; + } + + /* Check for misprediction */ + u64 mispredicted_tick = 0; + if (!master_ss_is_blended) { + /* TODO: Actually check for misprediction rather than triggering mispredict any time a new master snapshot is received */ + mispredicted_tick = master_ss->tick; + } + + + u64 step_base_tick = local_client->last_tick; + u64 step_end_tick = step_base_tick + 1; + if (mispredicted_tick > 0) { + step_base_tick = mispredicted_tick; + if (step_end_tick <= step_base_tick) { + step_end_tick = step_base_tick + 1; + } + } + + /* We want to simulate the ahead of the server to predict client input. + * How many ticks ahead we want to simulate is a balance between added latency and the server not receiving our inputs on time. + * We can take the server's ack minus the server's tick to determine how many cmds of ours the server has buffered. + * + * If this buffer gets too low (because we are lagging behind or the connection is unstable), meaning the server is not getting our input on time: + * - Shorten local compute rate to increase the rate at which we predict ahead & produce cmds, until the server's ack indicates a buffer size within desired range. + * + * If this buffer gets too large (because the client predicts too far ahead), meaning unneeded latency is being introduced: + * - Dilate local compute rate to decrease the rate at which we predict ahead & produce cmds until the server's ack indicates a buffer size within desired range. + */ + { + i64 cmds_ahead_on_master = (i64)master_client->ack - (i64)master_client->last_tick; + if (cmds_ahead_on_master < -3 || cmds_ahead_on_master > 10) { + /* Cmds are too far from master time, snap step end tick */ + i64 rtt_ns = master_client->last_rtt_ns; + f64 rtt_tick_ratio = (f64)(rtt_ns + (step_dt_ns - 1)) / (f64)step_dt_ns; + i64 num_predict_ticks = math_round_to_int64(rtt_tick_ratio) + 5; + step_end_tick = master_client->last_tick + num_predict_ticks; + compute_timescale = 1.1; + } else if (cmds_ahead_on_master > 2) { + /* Slow down simulation to dial back how far ahead we are predicting and bring local sim time closer to master sim time */ + compute_timescale = 1.1; + } else if (cmds_ahead_on_master < 1) { + /* Speed up simulation rate predict more ticks and give master more inputs to work with */ + compute_timescale = 0.9; + } else { + /* Server's cmd buffer is in a healthy range */ + compute_timescale = 1; + } + } + + /* Sync master with local base tick */ + struct sim_snapshot *base_ss = sim_snapshot_from_tick(local_client, step_base_tick); + if (mispredicted_tick) { + if (base_ss->valid) { + sim_snapshot_sync_ents(base_ss, master_ss, master_player->id, 0); + } else { + base_ss = sim_snapshot_alloc(local_client, master_ss, step_base_tick); + } + } + + /* Release any existing ticks that are about to be simulated */ + sim_snapshot_release_ticks_in_range(local_client, step_base_tick + 1, U64_MAX); + + /* Step */ + generate_user_input_cmds(user_input_client, step_end_tick); + { + struct sim_step_ctx ctx = ZI; + ctx.is_master = is_master; + ctx.sim_dt_ns = step_dt_ns; + ctx.accel = &accel; + ctx.user_input_client = user_input_client; + ctx.master_client = master_client; + ctx.publish_client = publish_client; + + u64 step_tick = step_base_tick + 1; + struct sim_snapshot *prev_ss = base_ss; + while (step_tick <= step_end_tick) { + ctx.world = sim_snapshot_alloc(local_client, prev_ss, step_tick); + if (!mispredicted_tick && step_tick == step_end_tick) { + sim_snapshot_sync_ents(ctx.world, master_ss, master_player->id, SIM_SYNC_FLAG_NOSYNC_PREDICTABLES); + } + sim_step(&ctx); + prev_ss = ctx.world; + ++step_tick; + } } - bw_write_uv(&msg_bw, tmp_snapshot_encoded.len); - bw_write_bytes(&msg_bw, tmp_snapshot_encoded); - publish_ss = sim_snapshot_from_tick(publish_client, publish_ss->tick + 1); } - bw_write_uv(&msg_bw, 0); - - struct string encoded = ZI; - encoded.len = bw_num_bytes_written(&msg_bw); - encoded.text = bw_get_written_raw(&msg_bw); - host_queue_write(host, client->channel_id, encoded, 0); } - } - /* Copy local snapshot to user client */ - { - struct sim_snapshot *local_ss = sim_snapshot_from_tick(local_client, local_client->last_tick); - if (local_ss->valid) { - /* TODO: Double buffer */ - struct sys_lock lock = sys_mutex_lock_e(G.local_to_user_client_mutex); - sim_snapshot_alloc(G.local_to_user_client, local_ss, local_ss->tick); - i64 publish_ns = sys_time_ns(); - G.local_to_user_client_publish_dt_ns = publish_ns - last_publish_to_user_ns; - G.local_to_user_client_publish_time_ns = publish_ns; - last_publish_to_user_ns = publish_ns; - sim_snapshot_release_ticks_in_range(G.local_to_user_client, 0, local_ss->tick - 1); - sys_mutex_unlock(&lock); + /* Publish snapshot to remote clients */ + for (u64 i = 0; i < store->num_clients_reserved; ++i) { + struct sim_client *client = &store->clients[i]; + if (client->valid && client != user_input_client && client != local_client && client != publish_client) { + struct bitbuff_writer msg_bw = bw_from_bitbuff(&msg_writer_bb); + + bw_write_uv(&msg_bw, client->highest_received_tick); /* ack */ + bw_write_uv(&msg_bw, client->ack); /* double ack */ + + struct sim_snapshot *base_ss = sim_snapshot_from_tick(publish_client, client->ack); + struct sim_snapshot *publish_ss; + if (client == master_client) { + /* If sending to master, start sending all snapshots since last ack */ + publish_ss = sim_snapshot_from_closest_tick_gte(publish_client, base_ss->tick + 1); + } else { + /* If sending to slave, only send latest snapshot */ + publish_ss = sim_snapshot_from_tick(publish_client, publish_client->last_tick); + } + + while (publish_ss->valid) { + struct bitbuff_writer snapshot_bw = bw_from_bitbuff(&snapshot_writer_bb); + struct string tmp_snapshot_encoded = ZI; + { + bw_write_uv(&snapshot_bw, base_ss->tick); + bw_write_uv(&snapshot_bw, publish_ss->tick); + sim_snapshot_encode(&snapshot_bw, client, base_ss, publish_ss); + tmp_snapshot_encoded.len = bw_num_bytes_written(&snapshot_bw); + tmp_snapshot_encoded.text = bw_get_written_raw(&snapshot_bw); + } + bw_write_uv(&msg_bw, tmp_snapshot_encoded.len); + bw_write_bytes(&msg_bw, tmp_snapshot_encoded); + publish_ss = sim_snapshot_from_tick(publish_client, publish_ss->tick + 1); + } + bw_write_uv(&msg_bw, 0); + + struct string encoded = ZI; + encoded.len = bw_num_bytes_written(&msg_bw); + encoded.text = bw_get_written_raw(&msg_bw); + host_queue_write(host, client->channel_id, encoded, 0); + } } + + /* Copy local snapshot to user client */ + { + struct sim_snapshot *local_ss = sim_snapshot_from_tick(local_client, local_client->last_tick); + if (local_ss->valid) { + /* TODO: Double buffer */ + struct sys_lock lock = sys_mutex_lock_e(G.local_to_user_client_mutex); + sim_snapshot_alloc(G.local_to_user_client, local_ss, local_ss->tick); + i64 publish_ns = sys_time_ns(); + G.local_to_user_client_publish_dt_ns = publish_ns - last_publish_to_user_ns; + G.local_to_user_client_publish_time_ns = publish_ns; + last_publish_to_user_ns = publish_ns; + sim_snapshot_release_ticks_in_range(G.local_to_user_client, 0, local_ss->tick - 1); + sys_mutex_unlock(&lock); + } + } + +skip_step: + + /* Send host messages */ + host_update_end(host); + __profframe("Local sim"); + + scratch_end(scratch); } - - skip_step: - - /* Send host messages */ - host_update_end(host); - __profframe("Local sim"); - - scratch_end(scratch); } sim_client_store_release(store); diff --git a/src/user.h b/src/user.h index b5c49ea8..42ce727e 100644 --- a/src/user.h +++ b/src/user.h @@ -2,7 +2,6 @@ #define USER_H struct sys_window; -struct work_startup_receipt; struct gp_startup_receipt; struct font_startup_receipt; struct sprite_startup_receipt; @@ -36,6 +35,7 @@ enum user_bind_kind { USER_BIND_KIND_DEBUG_WALLS, USER_BIND_KIND_DEBUG_FOLLOW, USER_BIND_KIND_DEBUG_DRAW, + USER_BIND_KIND_PROFILER, USER_BIND_KIND_DEBUG_CONSOLE, USER_BIND_KIND_DEBUG_CAMERA, USER_BIND_KIND_DEBUG_PAUSE, @@ -61,8 +61,7 @@ enum user_bind_kind { }; struct user_startup_receipt { i32 _; }; -struct user_startup_receipt user_startup(struct work_startup_receipt *work_sr, - struct gp_startup_receipt *gp_sr, +struct user_startup_receipt user_startup(struct gp_startup_receipt *gp_sr, struct font_startup_receipt *font_sr, struct sprite_startup_receipt *sprite_sr, struct draw_startup_receipt *draw_sr, diff --git a/src/util.h b/src/util.h index aad39548..51a25376 100644 --- a/src/util.h +++ b/src/util.h @@ -306,7 +306,6 @@ INLINE void sync_flag_wait(struct sync_flag *sf) INLINE void sleep_frame(i64 last_frame_time_ns, i64 target_dt_ns) { - __prof; if (last_frame_time_ns != 0 && target_dt_ns > 0) { i64 now_ns = sys_time_ns(); i64 last_frame_dt_ns = now_ns - last_frame_time_ns; diff --git a/src/work.c b/src/work.c deleted file mode 100644 index fb248144..00000000 --- a/src/work.c +++ /dev/null @@ -1,612 +0,0 @@ -#include "work.h" -#include "intrinsics.h" -#include "sys.h" -#include "arena.h" -#include "scratch.h" -#include "memory.h" -#include "string.h" -#include "log.h" -#include "thread_local.h" -#include "atomic.h" -#include "app.h" - -/* Terminology: - * - * Task: Single unit of stuff to be done (a function with a data pointer) - * - * Work: A group of tasks (doesn't have to be homogeneous) bundled together. - * Work is "complete" when all of its tasks are complete. - * - * Work Slate: A list of tasks used as a building-tool for constructing work. - * - * Worker: A thread that can do work. "work_startup" will create a certain - * amount of dedicated worker threads. Note that non-worker threads can - * also do work themselves (IE: callers of "work_wait") - */ - -struct worker { - struct sys_thread *thread; - struct worker *next; -}; - -struct work_task; - -struct work { - enum work_priority priority; - enum work_status status; - u32 workers; - - struct sys_condition_variable *condition_variable_finished; - - struct work *prev_scheduled; - struct work *next_scheduled; - struct work *next_free; - - struct work_task *task_head; /* Unstarted task head */ - u32 tasks_incomplete; - - u64 gen; -}; - -struct work_task { - void *data; - work_task_func *func; - struct work *work; - - struct work_task *next_in_work; - struct work_task *next_free; -}; - -/* ========================== * - * Global state - * ========================== */ - -GLOBAL struct { - struct arena *arena; - - b32 workers_shutdown; - struct sys_mutex *mutex; - struct sys_condition_variable *cv; - - u32 worker_count; - u32 idle_worker_count; - struct worker *worker_head; - - /* TODO: Make below pointers volatile? */ - - struct work_task *free_task_head; - - struct work *free_work_head; - struct work *scheduled_work_head; - - /* Pointers to the last piece of work of each priority in the scheduled - * work list (used for O(1) insertion) */ - struct work *scheduled_work_priority_tails[NUM_WORK_PRIORITIES]; -} G = ZI, DEBUG_ALIAS(G, G_work); - -/* ========================== * - * Thread local state - * ========================== */ - -struct worker_ctx { - b32 is_worker; -}; - -GLOBAL THREAD_LOCAL_VAR_DEF(tl_worker_ctx, struct worker_ctx, NULL, NULL); - -/* ========================== * - * Startup - * ========================== */ - -INTERNAL APP_EXIT_CALLBACK_FUNC_DEF(work_shutdown); -INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(worker_thread_entry_point, thread_data); - -struct work_startup_receipt work_startup(u32 num_worker_threads) -{ - struct arena_temp scratch = scratch_begin_no_conflict(); - - if (num_worker_threads <= 0) { - sys_panic(LIT("Tried to start up worker pool with 0 threads")); - } - - G.arena = arena_alloc(GIGABYTE(64)); - G.mutex = sys_mutex_alloc(); - G.cv = sys_condition_variable_alloc(); - G.worker_count = num_worker_threads; - G.idle_worker_count = num_worker_threads; - app_register_exit_callback(&work_shutdown); - - /* Initialize threads */ - struct sys_lock lock = sys_mutex_lock_e(G.mutex); - { - struct worker *prev = NULL; - for (u32 i = 0; i < num_worker_threads; ++i) { - struct string thread_name = string_format(scratch.arena, - LIT("[P6] Worker %F"), - FMT_UINT(i)); - - struct worker *worker = arena_push(G.arena, struct worker); - worker->thread = sys_thread_alloc(&worker_thread_entry_point, NULL, thread_name); - if (prev) { - prev->next = worker; - } else { - G.worker_head = worker; - } - prev = worker; - } - } - sys_mutex_unlock(&lock); - - scratch_end(scratch); - - return (struct work_startup_receipt) { 0 }; -} - -INTERNAL APP_EXIT_CALLBACK_FUNC_DEF(work_shutdown) -{ - __prof; - - struct sys_lock lock = sys_mutex_lock_e(G.mutex); - { - G.workers_shutdown = true; - sys_condition_variable_broadcast(G.cv); - } - sys_mutex_unlock(&lock); - - for (struct worker *worker = G.worker_head; worker; worker = worker->next) { - sys_thread_wait_release(worker->thread); - } -} - -/* ========================== * - * Internal work / task allocation - * ========================== */ - -INTERNAL struct work *work_alloc_locked(struct sys_lock *lock) -{ - __prof; - sys_assert_locked_e(lock, G.mutex); - (UNUSED)lock; - - struct work *work = NULL; - - /* Allocate work */ - if (G.free_work_head) { - /* Reuse from free list */ - work = G.free_work_head; - G.free_work_head = work->next_free; - *work = (struct work) { - .condition_variable_finished = work->condition_variable_finished, - .gen = work->gen + 1 - }; - } else { - /* Make new */ - work = arena_push_no_zero(G.arena, struct work); - *work = (struct work) { - .condition_variable_finished = sys_condition_variable_alloc(), - .gen = 1 - }; - } - return work; -} - -INTERNAL void work_release_locked(struct sys_lock *lock, struct work *work) -{ - sys_assert_locked_e(lock, G.mutex); - (UNUSED)lock; - - work->next_free = G.free_work_head; - G.free_work_head = work; - ++work->gen; -} - -INTERNAL struct work_handle work_to_handle_locked(struct sys_lock *lock, struct work *work) -{ - sys_assert_locked_e(lock, G.mutex); - (UNUSED)lock; - - return (struct work_handle) { - .work = work, - .gen = work->gen - }; -} - -INTERNAL struct work_task *task_alloc_locked(struct sys_lock *lock) -{ - sys_assert_locked_e(lock, G.mutex); - (UNUSED)lock; - - struct work_task *task = NULL; - - /* Allocate task */ - if (G.free_task_head) { - /* Reuse from free list */ - task = G.free_task_head; - G.free_task_head = task->next_free; - *task = (struct work_task) { 0 }; - } else { - /* Make new */ - task = arena_push(G.arena, struct work_task); - } - - return task; -} - -INTERNAL void task_release_locked(struct sys_lock *lock, struct work_task *task) -{ - sys_assert_locked_e(lock, G.mutex); - (UNUSED)lock; - - task->next_free = G.free_task_head; - G.free_task_head = task; -} - -/* ========================== * - * Work scheduling / insertion - * ========================== */ - -INTERNAL void work_schedule_locked(struct sys_lock *lock, struct work *work) -{ - __prof; - sys_assert_locked_e(lock, G.mutex); - (UNUSED)lock; - - enum work_priority priority = work->priority; - - if (G.scheduled_work_head) { - struct work *head = G.scheduled_work_head; - - if (head->priority >= priority) { - /* Head is lower priority, insert work as new head */ - G.scheduled_work_head = work; - work->next_scheduled = head; - head->prev_scheduled = work; - } else { - /* Find higher priority */ - struct work *tail = NULL; - for (i32 i = priority; i >= 0; --i) { - tail = G.scheduled_work_priority_tails[i]; - if (tail) { - break; - } - } - /* Hook work */ - work->next_scheduled = tail->next_scheduled; - work->prev_scheduled = tail; - tail->next_scheduled = work; - } - } else { - G.scheduled_work_head = work; - } - - G.scheduled_work_priority_tails[priority] = work; - - sys_condition_variable_signal(G.cv, work->tasks_incomplete); -} - -INTERNAL void work_unschedule_locked(struct sys_lock *lock, struct work *work) -{ - __prof; - sys_assert_locked_e(lock, G.mutex); - (UNUSED)lock; - - struct work *prev = (struct work *)work->prev_scheduled; - struct work *next = (struct work *)work->next_scheduled; - - /* Remove from priority tails array */ - enum work_priority priority = work->priority; - struct work *priority_tail = G.scheduled_work_priority_tails[priority]; - if (priority_tail == work && (!prev || prev->priority == priority)) { - G.scheduled_work_priority_tails[priority] = prev; - } - - /* Unhook work */ - if (prev) { - prev->next_scheduled = next; - } - if (next) { - next->prev_scheduled = prev; - } - if (work == G.scheduled_work_head) { - G.scheduled_work_head = next; - } -} - -/* ========================== * - * Task dequeuing - * ========================== */ - -INTERNAL struct work_task *work_dequeue_task_locked(struct sys_lock *lock, struct work *work) -{ - __prof; - sys_assert_locked_e(lock, G.mutex); - - struct work_task *task = work->task_head; - if (task) { - work->task_head = task->next_in_work; - if (!work->task_head) { - /* Unschedule work if last task */ - work_unschedule_locked(lock, work); - } - } - return task; -} - -/* ========================== * - * Work doing - * ========================== */ - -/* NOTE: This function will release `work` if there are no more tasks once completed. - * Returns `true` if more tasks are still present in the work after completion. */ -INTERNAL b32 work_exec_single_task_maybe_release_locked(struct sys_lock *lock, struct work *work) -{ - __prof; - sys_assert_locked_e(lock, G.mutex); - - struct work_task *task = work_dequeue_task_locked(lock, work); - b32 more_tasks = work->task_head != NULL; - - if (task) { - work->status = WORK_STATUS_IN_PROGRESS; - - ++work->workers; - /* Do task (temporarily unlock) */ - { - sys_mutex_unlock(lock); - task->func(task->data); - *lock = sys_mutex_lock_e(G.mutex); - } - --work->workers; - --work->tasks_incomplete; - task_release_locked(lock, task); - - if (work->tasks_incomplete == 0) { - /* Signal finished */ - work->status = WORK_STATUS_DONE; - sys_condition_variable_broadcast(work->condition_variable_finished); - - /* Release */ - work_release_locked(lock, work); - } - } - - return more_tasks; -} - -INTERNAL void work_exec_remaining_tasks_maybe_release_locked(struct sys_lock *lock, struct work *work) -{ - __prof; - sys_assert_locked_e(lock, G.mutex); - - b32 more_tasks = true; - while (more_tasks) { - more_tasks = work_exec_single_task_maybe_release_locked(lock, work); - } -} - -/* ========================== * - * Work thread proc - * ========================== */ - -INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(worker_thread_entry_point, thread_data) -{ - (UNUSED)thread_data; - - struct worker_ctx *ctx = thread_local_var_eval(&tl_worker_ctx); - *ctx = (struct worker_ctx) { - .is_worker = true - }; - - struct sys_lock lock = sys_mutex_lock_e(G.mutex); - { - while (!G.workers_shutdown) { - struct work *work = G.scheduled_work_head; - if (work) { - __profscope(work_pool_task); - --G.idle_worker_count; - work_exec_single_task_maybe_release_locked(&lock, work); - ++G.idle_worker_count; - } else { - sys_condition_variable_wait(G.cv, &lock); - } - } - } - sys_mutex_unlock(&lock); -} - -/* ========================== * - * Work pushing interface - * ========================== */ - -/* If `help` is true, then the calling thread will start picking up tasks immediately (before other workers can see it) */ -INTERNAL struct work_handle work_push_from_slate_locked(struct sys_lock *lock, struct work_slate *ws, b32 help, enum work_priority priority) -{ - __prof; - sys_assert_locked_e(lock, G.mutex); - - struct work *work = work_alloc_locked(lock); - struct work_handle wh = work_to_handle_locked(lock, work); - - work->priority = priority; - work->status = WORK_STATUS_IN_PROGRESS; - - work->task_head = ws->task_head; - work->tasks_incomplete = ws->num_tasks; - - work_schedule_locked(lock, work); - - if (help) { - work_exec_remaining_tasks_maybe_release_locked(lock, work); - } else { - /* When work is submitted from a worker thread, we want the worker to pick - * up the tasks itself when idle workers = 0 and work.workers = 0 - * (work.workers will always = 0 when work is first pushed). - * - * This is not ideal, however it is necessary to prevent - * a scenario in which all workers are waiting on child work to complete in - * a subtle way (IE: outside of work_wait). Since all workers are waiting, - * there would be no remaining workers to complete the child work, meaning - * there is a deadlock. - * - * By forcing workers to do their own child work in this scenario, we can - * guarantee that this does not occur. However it is not ideal since it - * creates situations in which work is not done asynchronously. - */ - struct worker_ctx *ctx = thread_local_var_eval(&tl_worker_ctx); - if (ctx->is_worker) { - b32 work_done = false; - while (!work_done && G.idle_worker_count == 0 && work->workers == 0) { - work_done = !work_exec_single_task_maybe_release_locked(lock, work); - } - } - } - - return wh; -} - -INTERNAL struct work_handle work_push_task_internal(work_task_func *func, void *data, b32 help, enum work_priority priority) -{ - struct work_handle handle; - struct sys_lock lock = sys_mutex_lock_e(G.mutex); - { - struct work_task *task = task_alloc_locked(&lock); - task->data = data; - task->func = func; - - struct work_slate ws = { - .task_head = task, - .task_tail = task, - .num_tasks = 1 - }; - handle = work_push_from_slate_locked(&lock, &ws, help, priority); - } - sys_mutex_unlock(&lock); - return handle; -} - -/* Push work that contains a single task */ -struct work_handle work_push_task(work_task_func *func, void *data, enum work_priority priority) -{ - __prof; - struct work_handle handle = work_push_task_internal(func, data, false, priority); - return handle; -} - -struct work_handle work_push_task_and_help(work_task_func *func, void *data, enum work_priority priority) -{ - __prof; - struct work_handle handle = work_push_task_internal(func, data, true, priority); - return handle; -} - -struct work_slate work_slate_begin(void) -{ - __prof; - struct work_slate ws = ZI; - return ws; -} - -void work_slate_push_task(struct work_slate *ws, work_task_func *func, void *data) -{ - __prof; - - struct work_task *task = NULL; - struct sys_lock lock = sys_mutex_lock_e(G.mutex); - { - task = task_alloc_locked(&lock); - } - sys_mutex_unlock(&lock); - - task->data = data; - task->func = func; - - if (ws->task_tail) { - ws->task_tail->next_in_work = task; - } else { - ws->task_head = task; - } - ws->task_tail = task; - ++ws->num_tasks; - -} - -/* Push work that contains multiple tasks (work slate) */ -struct work_handle work_slate_end(struct work_slate *ws, enum work_priority priority) -{ - __prof; - - struct work_handle handle; - struct sys_lock lock = sys_mutex_lock_e(G.mutex); - { - handle = work_push_from_slate_locked(&lock, ws, false, priority); - } - sys_mutex_unlock(&lock); - - return handle; -} - -struct work_handle work_slate_end_and_help(struct work_slate *ws, enum work_priority priority) -{ - __prof; - struct work_handle handle = ZI; - if (ws->num_tasks > 0) { - struct sys_lock lock = sys_mutex_lock_e(G.mutex); - handle = work_push_from_slate_locked(&lock, ws, true, priority); - sys_mutex_unlock(&lock); - } - return handle; -} - -/* ========================== * - * Work intervention interface - * ========================== */ - -INTERNAL struct work *work_from_handle_locked(struct sys_lock *lock, struct work_handle handle) -{ - sys_assert_locked_e(lock, G.mutex); - (UNUSED)lock; - - struct work *work = handle.work; - if (work && work->gen != handle.gen) { - work = NULL; - } - return work; -} - -/* Wait for all tasks in work to be completed. Will also pick up any unstarted - * tasks in the work since the caller will be idle while waiting anyway. */ -void work_wait(struct work_handle handle) -{ - __prof; - struct sys_lock lock = sys_mutex_lock_e(G.mutex); - { - struct work *work = work_from_handle_locked(&lock, handle); - if (work) { - /* Help with tasks */ - work_exec_remaining_tasks_maybe_release_locked(&lock, work); - - /* Wait for work completion */ - work = work_from_handle_locked(&lock, handle); /* Re-checking work is sitll valid here in case work_exec caused work to release */ - if (work) { - while (work->status != WORK_STATUS_DONE) { - sys_condition_variable_wait(work->condition_variable_finished, &lock); - } - } - } - } - sys_mutex_unlock(&lock); -} - -/* Try to pick up any scheduled tasks */ -void work_help(struct work_handle handle) -{ - __prof; - struct sys_lock lock = sys_mutex_lock_e(G.mutex); - { - struct work *work = work_from_handle_locked(&lock, handle); - if (work) { - work_exec_remaining_tasks_maybe_release_locked(&lock, work); - } - } - sys_mutex_unlock(&lock); -} diff --git a/src/work.h b/src/work.h deleted file mode 100644 index 196ae0a4..00000000 --- a/src/work.h +++ /dev/null @@ -1,48 +0,0 @@ -#ifndef WORK_H -#define WORK_H - -enum work_status { - WORK_STATUS_DONE, - WORK_STATUS_SCHEDULED, - WORK_STATUS_IN_PROGRESS -}; - -enum work_priority { - WORK_PRIORITY_HIGH, - WORK_PRIORITY_NORMAL, - - NUM_WORK_PRIORITIES -}; - -#define WORK_TASK_FUNC_DEF(name, arg_name) void name(void *arg_name) -typedef WORK_TASK_FUNC_DEF(work_task_func, data); - -struct work; -struct work_task; - -struct work_handle { - struct work *work; - u64 gen; -}; - -struct work_slate { - struct work_task *task_head; - struct work_task *task_tail; - u32 num_tasks; -}; - -struct work_startup_receipt { i32 _; }; -struct work_startup_receipt work_startup(u32 num_worker_threads); - -struct work_slate work_slate_begin(void); -struct work_handle work_slate_end(struct work_slate *ws, enum work_priority priority); -struct work_handle work_slate_end_and_help(struct work_slate *ws, enum work_priority priority); - -struct work_handle work_push_task(work_task_func *func, void *data, enum work_priority priority); -struct work_handle work_push_task_and_help(work_task_func *func, void *data, enum work_priority priority); -void work_slate_push_task(struct work_slate *ws, work_task_func *func, void *data); - -void work_wait(struct work_handle handle); -void work_help(struct work_handle handle); - -#endif