From e503fc9bdf6419980482aa3c9ad298c13fd5d330 Mon Sep 17 00:00:00 2001 From: jacob Date: Wed, 25 Jun 2025 16:17:14 -0500 Subject: [PATCH] d3d12 profiling --- res/sh/common.hlsl | 2 +- src/common.h | 18 ++++++++++++++-- src/gp_dx12.c | 52 +++++++++++++++++++++++++++++++++++++++------- src/log.c | 1 + src/sys_win32.c | 12 ++++++++--- src/user.c | 3 +++ 6 files changed, 75 insertions(+), 13 deletions(-) diff --git a/res/sh/common.hlsl b/res/sh/common.hlsl index 095f1347..992be30f 100644 --- a/res/sh/common.hlsl +++ b/res/sh/common.hlsl @@ -13,7 +13,7 @@ /* Linear color from normalized sRGB */ INLINE float4 linear_from_srgb(float4 srgb) { - return float4(pow(srgb.rgb, 2.2), srgb.a); + return float4(pow(abs(srgb.rgb), 2.2), srgb.a); } /* Linear color from R8G8B8A8 sRGB */ diff --git a/src/common.h b/src/common.h index ef5396f9..78b34fd2 100644 --- a/src/common.h +++ b/src/common.h @@ -648,9 +648,11 @@ INLINE f64 clamp_f64(f64 v, f64 min, f64 max) { return v < min ? min : v > max ? # define __profscope(name) static const struct ___tracy_source_location_data CAT(__tracy_source_location,__LINE__) = { #name, __func__, __FILE__, (uint32_t)__LINE__, 0 }; __attribute((cleanup(__prof_zone_cleanup_func))) TracyCZoneCtx __tracy_zone_ctx = ___tracy_emit_zone_begin_callstack( &CAT(__tracy_source_location,__LINE__), TRACY_CALLSTACK, true ); # endif # define __profscope_dx11(dx11_ctx, name, color) static const struct ___tracy_source_location_data CAT(__tracy_gpu_d3d11_source_location,__LINE__) = { #name, __func__, __FILE__, (uint32_t)__LINE__, BGR32(color) }; __attribute((cleanup(__prof_dx11_zone_cleanup_func))) TracyCD3D11ZoneCtx __tracy_d3d11_zone_ctx; ___tracy_d3d11_emit_zone_begin( dx11_ctx, &__tracy_d3d11_zone_ctx, &CAT(__tracy_gpu_d3d11_source_location,__LINE__), true); +# define __profscope_dx12(dx12_ctx, cmd_list, name, color) static const struct ___tracy_source_location_data CAT(__tracy_gpu_d3d12_source_location,__LINE__) = { #name, __func__, __FILE__, (uint32_t)__LINE__, BGR32(color) }; __attribute((cleanup(__prof_dx12_zone_cleanup_func))) TracyCD3D12ZoneCtx __tracy_d3d12_zone_ctx; ___tracy_d3d12_emit_zone_begin( dx12_ctx, cmd_list, &__tracy_d3d12_zone_ctx, &CAT(__tracy_gpu_d3d12_source_location,__LINE__), true); #endif INLINE void __prof_zone_cleanup_func(TracyCZoneCtx *ctx) { TracyCZoneEnd(*ctx); } INLINE void __prof_dx11_zone_cleanup_func(TracyCD3D11ZoneCtx *ctx) { ___tracy_d3d11_emit_zone_end(*ctx); } +INLINE void __prof_dx12_zone_cleanup_func(TracyCD3D12ZoneCtx *ctx) { ___tracy_d3d12_emit_zone_end(*ctx); } #define __profalloc(ptr, size) TracyCAlloc((ptr), (size)) #define __proffree(ptr) TracyCFree((ptr)) @@ -673,10 +675,16 @@ INLINE void __prof_dx11_zone_cleanup_func(TracyCD3D11ZoneCtx *ctx) { ___tracy_d3 #define __proflock_custom_name(ctx, name, len) TracyCSharedLockCustomName((ctx), (name), (len)) #define __prof_dx11_ctx TracyCD3D11Ctx -#define __prof_dx11_ctx_alloc(ctx, device, devicectx, name, name_len) ctx = ___tracy_d3d11_context_announce(device, devicectx, name, name_len) +#define __prof_dx11_ctx_alloc(ctx, device, device_ctx, name, name_len) ctx = ___tracy_d3d11_context_announce(device, device_ctx, name, name_len) #define __prof_dx11_ctx_release(ctx) ___tracy_d3d11_context_terminate(ctx) #define __prof_dx11_collect(ctx) ___tracy_d3d11_context_collect(ctx) +#define __prof_dx12_ctx TracyCD3D12Ctx +#define __prof_dx12_ctx_alloc(ctx, device, queue, name, name_len) ctx = ___tracy_d3d12_context_announce(device, queue, name, name_len) +#define __prof_dx12_ctx_release(ctx) ___tracy_d3d12_context_terminate(ctx) +#define __prof_dx12_new_frame(ctx) ___tracy_d3d12_context_new_frame(ctx) +#define __prof_dx12_collect(ctx) ___tracy_d3d12_context_collect(ctx) + enum __prof_plot_type { __prof_plot_type_number = TracyPlotFormatNumber, __prof_plot_type_memory = TracyPlotFormatMemory, @@ -700,6 +708,7 @@ enum __prof_plot_type { #define __prof #define __profscope(name) #define __profscope_dx11(dx11_ctx, name, color) +#define __profscope_dx12(dx11_ctx, queue, name, color) #define __profalloc(ptr, size) #define __proffree(ptr) #define __profmsg(txt, len, col) @@ -720,9 +729,14 @@ enum __prof_plot_type { #define __proflock_mark(ctx) #define __proflock_custom_name(ctx, name, len) #define __prof_dx11_ctx -#define __prof_dx11_ctx_alloc(ctx, device, devicectx, name, name_len) +#define __prof_dx11_ctx_alloc(ctx, device, device_ctx, name, name_len) #define __prof_dx11_ctx_release(ctx) #define __prof_dx11_collect(ctx) +#define __prof_dx12_ctx +#define __prof_dx12_ctx_alloc(ctx, device, queue, name, name_len) +#define __prof_dx12_ctx_release(ctx) +#define __prof_dx12_new_frame(ctx) +#define __prof_dx12_collect(ctx) #define __prof_plot_init(name, type, step, fill, color) #define __prof_plot(name, val) #define __prof_plot_i(name, val) diff --git a/src/gp_dx12.c b/src/gp_dx12.c index fcfdb8f4..413bd5bf 100644 --- a/src/gp_dx12.c +++ b/src/gp_dx12.c @@ -117,6 +117,7 @@ struct pipeline_scope { }; struct command_queue { + D3D12_COMMAND_LIST_TYPE type; ID3D12CommandQueue *cq; struct arena *arena; @@ -127,12 +128,17 @@ struct command_queue { struct atomic_u64 fence_target; ID3D12Fence *fence; + +#if PROFILING + struct __prof_dx12_ctx *prof; +#endif }; struct command_list { struct command_queue *cq; struct ID3D12CommandAllocator *ca; struct ID3D12GraphicsCommandList *cl; + struct sys_lock global_lock; struct command_descriptor_heap *first_command_descriptor_heap; struct command_buffer *first_command_buffer; @@ -293,6 +299,7 @@ GLOBAL struct { /* Command queues */ /* TODO: Add optional mode to route everything to direct queue */ + struct sys_mutex *global_command_list_mutex; struct command_queue *cq_direct; struct command_queue *cq_compute; struct command_queue *cq_copy_critical; @@ -315,7 +322,7 @@ INTERNAL void dx12_init_device(void); INTERNAL void dx12_init_objects(void); INTERNAL void dx12_init_pipelines(void); INTERNAL struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type); -INTERNAL struct command_queue *command_queue_alloc(enum D3D12_COMMAND_LIST_TYPE type, enum D3D12_COMMAND_QUEUE_PRIORITY priority); +INTERNAL struct command_queue *command_queue_alloc(enum D3D12_COMMAND_LIST_TYPE type, enum D3D12_COMMAND_QUEUE_PRIORITY priority, struct string dbg_name); INTERNAL void command_queue_release(struct command_queue *cq); INTERNAL void dx12_resource_release(struct dx12_resource *resource); @@ -662,11 +669,12 @@ INTERNAL void dx12_init_objects(void) G.cbv_srv_uav_heap = cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); G.rtv_heap = cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); - /* Create direct command queue */ - G.cq_direct = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL); - G.cq_compute = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_COMPUTE, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL); - G.cq_copy_critical = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_COPY, D3D12_COMMAND_QUEUE_PRIORITY_HIGH); - G.cq_copy_background = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_COPY, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL); + /* Create command queues */ + G.global_command_list_mutex = sys_mutex_alloc(); + G.cq_direct = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, LIT("Direct queue")); + G.cq_compute = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_COMPUTE, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, LIT("Compute queue")); + G.cq_copy_critical = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_COPY, D3D12_COMMAND_QUEUE_PRIORITY_HIGH, LIT("High priority copy queue")); + G.cq_copy_background = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_COPY, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, LIT("Background copy queue")); } /* ========================== * @@ -1700,7 +1708,7 @@ INTERNAL enum D3D12_RESOURCE_STATES dx12_resource_barrier(ID3D12GraphicsCommandL * Command queue * ========================== */ -INTERNAL struct command_queue *command_queue_alloc(enum D3D12_COMMAND_LIST_TYPE type, enum D3D12_COMMAND_QUEUE_PRIORITY priority) +INTERNAL struct command_queue *command_queue_alloc(enum D3D12_COMMAND_LIST_TYPE type, enum D3D12_COMMAND_QUEUE_PRIORITY priority, struct string dbg_name) { __prof; struct command_queue *cq = NULL; @@ -1726,6 +1734,9 @@ INTERNAL struct command_queue *command_queue_alloc(enum D3D12_COMMAND_LIST_TYPE sys_panic(LIT("Failed to create command queue fence")); } + __prof_dx12_ctx_alloc(cq->prof, G.device, cq->cq, dbg_name.text, dbg_name.len); + (UNUSED)dbg_name; + return cq; } @@ -1781,6 +1792,7 @@ INTERNAL struct command_list *command_list_open(struct command_queue *cq) } MEMZERO_STRUCT(cl); cl->cq = cq; + cl->global_lock = sys_mutex_lock_s(G.global_command_list_mutex); HRESULT hr = 0; /* FIXME: Determine command list type from command queue */ @@ -1870,6 +1882,7 @@ INTERNAL u64 command_list_close(struct command_list *cl) } /* Add command list to submitted list */ + sys_mutex_unlock(&cl->global_lock); cl->submitted_fence_target = target_fence_value; { struct sys_lock lock = sys_mutex_lock_e(cq->mutex); @@ -2285,6 +2298,7 @@ struct gp_handle gp_texture_alloc(enum gp_texture_format format, u32 flags, stru struct command_queue *cq = G.cq_copy_background; struct command_list *cl = command_list_open(cq); { + __profscope_dx12(cl->cq->prof, cl->cl, Upload texture, RGB32_F(0.2, 0.5, 0.2)); D3D12_TEXTURE_COPY_LOCATION dst_loc = { .pResource = r->resource, .Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, @@ -2347,6 +2361,7 @@ void gp_dispatch(struct gp_dispatch_params params) struct pipeline *shape_pipeline = pipeline_from_name(pipeline_scope, LIT("shape")); struct command_list *cl = command_list_open(G.cq_direct); { + __profscope_dx12(cl->cq->prof, cl->cl, Dispatch, RGB32_F(0.5, 0.2, 0.2)); struct dx12_resource *target = handle_get_data(params.draw_target, DX12_HANDLE_KIND_RESOURCE); struct mat4x4 vp_matrix = calculate_vp(params.draw_target_view, params.draw_target_viewport.width, params.draw_target_viewport.height); @@ -2379,6 +2394,7 @@ void gp_dispatch(struct gp_dispatch_params params) /* Material pass */ if (material_pipeline->success) { __profscope(Material pass); + __profscope_dx12(cl->cq->prof, cl->cl, Material pass, RGB32_F(0.5, 0.2, 0.2)); /* Bind pipeline */ ID3D12GraphicsCommandList_SetPipelineState(cl->cl, material_pipeline->pso); @@ -2419,6 +2435,7 @@ void gp_dispatch(struct gp_dispatch_params params) /* Shape pass */ if (shape_pipeline->success) { __profscope(Shape pass); + __profscope_dx12(cl->cq->prof, cl->cl, Shape pass, RGB32_F(0.5, 0.2, 0.2)); /* Bind pipeline */ ID3D12GraphicsCommandList_SetPipelineState(cl->cl, shape_pipeline->pso); @@ -2571,6 +2588,8 @@ INTERNAL void present_blit(struct dx12_resource *dst, struct dx12_resource *src, if (blit_pipeline->success) { struct command_list *cl = command_list_open(G.cq_direct); { + __profscope_dx12(cl->cq->prof, cl->cl, Blit, RGB32_F(0.5, 0.2, 0.2)); + /* Upload dummmy vert & index buffer */ /* TODO: Make these static */ /* Dummy vertex buffer */ @@ -2654,6 +2673,25 @@ void gp_present(struct sys_window *window, struct v2i32 backbuffer_resolution, s __profframe(0); } +#if PROFILING + { + /* Lock because command shouldn't be recording during a frame mark */ + struct sys_lock lock = sys_mutex_lock_e(G.global_command_list_mutex); + __prof_dx12_new_frame(G.cq_direct->prof); + __prof_dx12_new_frame(G.cq_compute->prof); + __prof_dx12_new_frame(G.cq_copy_critical->prof); + __prof_dx12_new_frame(G.cq_copy_background->prof); + sys_mutex_unlock(&lock); + } + { + __prof_dx12_collect(G.cq_direct->prof); + __prof_dx12_collect(G.cq_compute->prof); + __prof_dx12_collect(G.cq_copy_critical->prof); + __prof_dx12_collect(G.cq_copy_background->prof); + } +#endif + + (UNUSED)backbuffer_resolution; (UNUSED)texture; (UNUSED)texture_xf; diff --git a/src/log.c b/src/log.c index 46a555b4..cdbdfee7 100644 --- a/src/log.c +++ b/src/log.c @@ -216,6 +216,7 @@ void _log(i32 level, struct string msg) struct sys_lock lock = sys_mutex_lock_s(G.callbacks_mutex); for (struct log_event_callback *callback = G.first_callback; callback; callback = callback->next) { if (level <= callback->level) { + __profscope(Run log callback); callback->func(event); } } diff --git a/src/sys_win32.c b/src/sys_win32.c index f8f43bc7..e9540921 100644 --- a/src/sys_win32.c +++ b/src/sys_win32.c @@ -1588,9 +1588,17 @@ void sys_window_cursor_disable_clip(struct sys_window *sys_window) INTERNAL void win32_mutex_init(struct win32_mutex *m) { +#if PROFILING + struct __proflock_ctx *profiling_ctx = m->profiling_ctx; +#endif MEMZERO_STRUCT(m); - __proflock_alloc(m->profiling_ctx); m->srwlock = (SRWLOCK)SRWLOCK_INIT; +#if PROFILING + if (!profiling_ctx) { + __proflock_alloc(profiling_ctx); + } + m->profiling_ctx = profiling_ctx; +#endif } struct sys_mutex *sys_mutex_alloc(void) @@ -1607,7 +1615,6 @@ struct sys_mutex *sys_mutex_alloc(void) } sys_mutex_unlock(&lock); } - MEMZERO_STRUCT(m); win32_mutex_init(m); return (struct sys_mutex *)m; } @@ -1624,7 +1631,6 @@ void sys_mutex_release(struct sys_mutex *mutex) G.first_free_mutex = m; sys_mutex_unlock(&lock); } - __proflock_release(m->profiling_ctx); } struct sys_lock sys_mutex_lock_e(struct sys_mutex *mutex) diff --git a/src/user.c b/src/user.c index d2f19a46..a926dadf 100644 --- a/src/user.c +++ b/src/user.c @@ -462,6 +462,7 @@ INTERNAL struct string get_ent_debug_text(struct arena *arena, struct sim_ent *e INTERNAL LOG_EVENT_CALLBACK_FUNC_DEF(debug_console_log_callback, log) { + __prof; struct sys_lock lock = sys_mutex_lock_e(G.console_logs_mutex); { struct console_log *clog = arena_push(G.console_logs_arena, struct console_log); @@ -487,6 +488,7 @@ INTERNAL LOG_EVENT_CALLBACK_FUNC_DEF(debug_console_log_callback, log) INTERNAL void draw_debug_console(i32 level, b32 minimized) { + __prof; struct arena_temp scratch = scratch_begin_no_conflict(); struct v2 desired_start_pos = V2(10, minimized ? 100 : 600); @@ -1928,6 +1930,7 @@ INTERNAL void user_update(void) * ========================== */ if (G.debug_draw) { + __profscope(Draw debug info); struct font *font = font_load_async(LIT("font/fixedsys.ttf"), 12.0f); if (font) { struct arena_temp temp = arena_temp_begin(scratch.arena);