d3d12 profiling

This commit is contained in:
jacob 2025-06-25 16:17:14 -05:00
parent a0f659959d
commit e503fc9bdf
6 changed files with 75 additions and 13 deletions

View File

@ -13,7 +13,7 @@
/* Linear color from normalized sRGB */
INLINE float4 linear_from_srgb(float4 srgb)
{
return float4(pow(srgb.rgb, 2.2), srgb.a);
return float4(pow(abs(srgb.rgb), 2.2), srgb.a);
}
/* Linear color from R8G8B8A8 sRGB */

View File

@ -648,9 +648,11 @@ INLINE f64 clamp_f64(f64 v, f64 min, f64 max) { return v < min ? min : v > max ?
# define __profscope(name) static const struct ___tracy_source_location_data CAT(__tracy_source_location,__LINE__) = { #name, __func__, __FILE__, (uint32_t)__LINE__, 0 }; __attribute((cleanup(__prof_zone_cleanup_func))) TracyCZoneCtx __tracy_zone_ctx = ___tracy_emit_zone_begin_callstack( &CAT(__tracy_source_location,__LINE__), TRACY_CALLSTACK, true );
# endif
# define __profscope_dx11(dx11_ctx, name, color) static const struct ___tracy_source_location_data CAT(__tracy_gpu_d3d11_source_location,__LINE__) = { #name, __func__, __FILE__, (uint32_t)__LINE__, BGR32(color) }; __attribute((cleanup(__prof_dx11_zone_cleanup_func))) TracyCD3D11ZoneCtx __tracy_d3d11_zone_ctx; ___tracy_d3d11_emit_zone_begin( dx11_ctx, &__tracy_d3d11_zone_ctx, &CAT(__tracy_gpu_d3d11_source_location,__LINE__), true);
# define __profscope_dx12(dx12_ctx, cmd_list, name, color) static const struct ___tracy_source_location_data CAT(__tracy_gpu_d3d12_source_location,__LINE__) = { #name, __func__, __FILE__, (uint32_t)__LINE__, BGR32(color) }; __attribute((cleanup(__prof_dx12_zone_cleanup_func))) TracyCD3D12ZoneCtx __tracy_d3d12_zone_ctx; ___tracy_d3d12_emit_zone_begin( dx12_ctx, cmd_list, &__tracy_d3d12_zone_ctx, &CAT(__tracy_gpu_d3d12_source_location,__LINE__), true);
#endif
INLINE void __prof_zone_cleanup_func(TracyCZoneCtx *ctx) { TracyCZoneEnd(*ctx); }
INLINE void __prof_dx11_zone_cleanup_func(TracyCD3D11ZoneCtx *ctx) { ___tracy_d3d11_emit_zone_end(*ctx); }
INLINE void __prof_dx12_zone_cleanup_func(TracyCD3D12ZoneCtx *ctx) { ___tracy_d3d12_emit_zone_end(*ctx); }
#define __profalloc(ptr, size) TracyCAlloc((ptr), (size))
#define __proffree(ptr) TracyCFree((ptr))
@ -673,10 +675,16 @@ INLINE void __prof_dx11_zone_cleanup_func(TracyCD3D11ZoneCtx *ctx) { ___tracy_d3
#define __proflock_custom_name(ctx, name, len) TracyCSharedLockCustomName((ctx), (name), (len))
#define __prof_dx11_ctx TracyCD3D11Ctx
#define __prof_dx11_ctx_alloc(ctx, device, devicectx, name, name_len) ctx = ___tracy_d3d11_context_announce(device, devicectx, name, name_len)
#define __prof_dx11_ctx_alloc(ctx, device, device_ctx, name, name_len) ctx = ___tracy_d3d11_context_announce(device, device_ctx, name, name_len)
#define __prof_dx11_ctx_release(ctx) ___tracy_d3d11_context_terminate(ctx)
#define __prof_dx11_collect(ctx) ___tracy_d3d11_context_collect(ctx)
#define __prof_dx12_ctx TracyCD3D12Ctx
#define __prof_dx12_ctx_alloc(ctx, device, queue, name, name_len) ctx = ___tracy_d3d12_context_announce(device, queue, name, name_len)
#define __prof_dx12_ctx_release(ctx) ___tracy_d3d12_context_terminate(ctx)
#define __prof_dx12_new_frame(ctx) ___tracy_d3d12_context_new_frame(ctx)
#define __prof_dx12_collect(ctx) ___tracy_d3d12_context_collect(ctx)
enum __prof_plot_type {
__prof_plot_type_number = TracyPlotFormatNumber,
__prof_plot_type_memory = TracyPlotFormatMemory,
@ -700,6 +708,7 @@ enum __prof_plot_type {
#define __prof
#define __profscope(name)
#define __profscope_dx11(dx11_ctx, name, color)
#define __profscope_dx12(dx11_ctx, queue, name, color)
#define __profalloc(ptr, size)
#define __proffree(ptr)
#define __profmsg(txt, len, col)
@ -720,9 +729,14 @@ enum __prof_plot_type {
#define __proflock_mark(ctx)
#define __proflock_custom_name(ctx, name, len)
#define __prof_dx11_ctx
#define __prof_dx11_ctx_alloc(ctx, device, devicectx, name, name_len)
#define __prof_dx11_ctx_alloc(ctx, device, device_ctx, name, name_len)
#define __prof_dx11_ctx_release(ctx)
#define __prof_dx11_collect(ctx)
#define __prof_dx12_ctx
#define __prof_dx12_ctx_alloc(ctx, device, queue, name, name_len)
#define __prof_dx12_ctx_release(ctx)
#define __prof_dx12_new_frame(ctx)
#define __prof_dx12_collect(ctx)
#define __prof_plot_init(name, type, step, fill, color)
#define __prof_plot(name, val)
#define __prof_plot_i(name, val)

View File

@ -117,6 +117,7 @@ struct pipeline_scope {
};
struct command_queue {
D3D12_COMMAND_LIST_TYPE type;
ID3D12CommandQueue *cq;
struct arena *arena;
@ -127,12 +128,17 @@ struct command_queue {
struct atomic_u64 fence_target;
ID3D12Fence *fence;
#if PROFILING
struct __prof_dx12_ctx *prof;
#endif
};
struct command_list {
struct command_queue *cq;
struct ID3D12CommandAllocator *ca;
struct ID3D12GraphicsCommandList *cl;
struct sys_lock global_lock;
struct command_descriptor_heap *first_command_descriptor_heap;
struct command_buffer *first_command_buffer;
@ -293,6 +299,7 @@ GLOBAL struct {
/* Command queues */
/* TODO: Add optional mode to route everything to direct queue */
struct sys_mutex *global_command_list_mutex;
struct command_queue *cq_direct;
struct command_queue *cq_compute;
struct command_queue *cq_copy_critical;
@ -315,7 +322,7 @@ INTERNAL void dx12_init_device(void);
INTERNAL void dx12_init_objects(void);
INTERNAL void dx12_init_pipelines(void);
INTERNAL struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type);
INTERNAL struct command_queue *command_queue_alloc(enum D3D12_COMMAND_LIST_TYPE type, enum D3D12_COMMAND_QUEUE_PRIORITY priority);
INTERNAL struct command_queue *command_queue_alloc(enum D3D12_COMMAND_LIST_TYPE type, enum D3D12_COMMAND_QUEUE_PRIORITY priority, struct string dbg_name);
INTERNAL void command_queue_release(struct command_queue *cq);
INTERNAL void dx12_resource_release(struct dx12_resource *resource);
@ -662,11 +669,12 @@ INTERNAL void dx12_init_objects(void)
G.cbv_srv_uav_heap = cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
G.rtv_heap = cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
/* Create direct command queue */
G.cq_direct = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL);
G.cq_compute = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_COMPUTE, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL);
G.cq_copy_critical = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_COPY, D3D12_COMMAND_QUEUE_PRIORITY_HIGH);
G.cq_copy_background = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_COPY, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL);
/* Create command queues */
G.global_command_list_mutex = sys_mutex_alloc();
G.cq_direct = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, LIT("Direct queue"));
G.cq_compute = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_COMPUTE, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, LIT("Compute queue"));
G.cq_copy_critical = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_COPY, D3D12_COMMAND_QUEUE_PRIORITY_HIGH, LIT("High priority copy queue"));
G.cq_copy_background = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_COPY, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, LIT("Background copy queue"));
}
/* ========================== *
@ -1700,7 +1708,7 @@ INTERNAL enum D3D12_RESOURCE_STATES dx12_resource_barrier(ID3D12GraphicsCommandL
* Command queue
* ========================== */
INTERNAL struct command_queue *command_queue_alloc(enum D3D12_COMMAND_LIST_TYPE type, enum D3D12_COMMAND_QUEUE_PRIORITY priority)
INTERNAL struct command_queue *command_queue_alloc(enum D3D12_COMMAND_LIST_TYPE type, enum D3D12_COMMAND_QUEUE_PRIORITY priority, struct string dbg_name)
{
__prof;
struct command_queue *cq = NULL;
@ -1726,6 +1734,9 @@ INTERNAL struct command_queue *command_queue_alloc(enum D3D12_COMMAND_LIST_TYPE
sys_panic(LIT("Failed to create command queue fence"));
}
__prof_dx12_ctx_alloc(cq->prof, G.device, cq->cq, dbg_name.text, dbg_name.len);
(UNUSED)dbg_name;
return cq;
}
@ -1781,6 +1792,7 @@ INTERNAL struct command_list *command_list_open(struct command_queue *cq)
}
MEMZERO_STRUCT(cl);
cl->cq = cq;
cl->global_lock = sys_mutex_lock_s(G.global_command_list_mutex);
HRESULT hr = 0;
/* FIXME: Determine command list type from command queue */
@ -1870,6 +1882,7 @@ INTERNAL u64 command_list_close(struct command_list *cl)
}
/* Add command list to submitted list */
sys_mutex_unlock(&cl->global_lock);
cl->submitted_fence_target = target_fence_value;
{
struct sys_lock lock = sys_mutex_lock_e(cq->mutex);
@ -2285,6 +2298,7 @@ struct gp_handle gp_texture_alloc(enum gp_texture_format format, u32 flags, stru
struct command_queue *cq = G.cq_copy_background;
struct command_list *cl = command_list_open(cq);
{
__profscope_dx12(cl->cq->prof, cl->cl, Upload texture, RGB32_F(0.2, 0.5, 0.2));
D3D12_TEXTURE_COPY_LOCATION dst_loc = {
.pResource = r->resource,
.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
@ -2347,6 +2361,7 @@ void gp_dispatch(struct gp_dispatch_params params)
struct pipeline *shape_pipeline = pipeline_from_name(pipeline_scope, LIT("shape"));
struct command_list *cl = command_list_open(G.cq_direct);
{
__profscope_dx12(cl->cq->prof, cl->cl, Dispatch, RGB32_F(0.5, 0.2, 0.2));
struct dx12_resource *target = handle_get_data(params.draw_target, DX12_HANDLE_KIND_RESOURCE);
struct mat4x4 vp_matrix = calculate_vp(params.draw_target_view, params.draw_target_viewport.width, params.draw_target_viewport.height);
@ -2379,6 +2394,7 @@ void gp_dispatch(struct gp_dispatch_params params)
/* Material pass */
if (material_pipeline->success) {
__profscope(Material pass);
__profscope_dx12(cl->cq->prof, cl->cl, Material pass, RGB32_F(0.5, 0.2, 0.2));
/* Bind pipeline */
ID3D12GraphicsCommandList_SetPipelineState(cl->cl, material_pipeline->pso);
@ -2419,6 +2435,7 @@ void gp_dispatch(struct gp_dispatch_params params)
/* Shape pass */
if (shape_pipeline->success) {
__profscope(Shape pass);
__profscope_dx12(cl->cq->prof, cl->cl, Shape pass, RGB32_F(0.5, 0.2, 0.2));
/* Bind pipeline */
ID3D12GraphicsCommandList_SetPipelineState(cl->cl, shape_pipeline->pso);
@ -2571,6 +2588,8 @@ INTERNAL void present_blit(struct dx12_resource *dst, struct dx12_resource *src,
if (blit_pipeline->success) {
struct command_list *cl = command_list_open(G.cq_direct);
{
__profscope_dx12(cl->cq->prof, cl->cl, Blit, RGB32_F(0.5, 0.2, 0.2));
/* Upload dummmy vert & index buffer */
/* TODO: Make these static */
/* Dummy vertex buffer */
@ -2654,6 +2673,25 @@ void gp_present(struct sys_window *window, struct v2i32 backbuffer_resolution, s
__profframe(0);
}
#if PROFILING
{
/* Lock because command shouldn't be recording during a frame mark */
struct sys_lock lock = sys_mutex_lock_e(G.global_command_list_mutex);
__prof_dx12_new_frame(G.cq_direct->prof);
__prof_dx12_new_frame(G.cq_compute->prof);
__prof_dx12_new_frame(G.cq_copy_critical->prof);
__prof_dx12_new_frame(G.cq_copy_background->prof);
sys_mutex_unlock(&lock);
}
{
__prof_dx12_collect(G.cq_direct->prof);
__prof_dx12_collect(G.cq_compute->prof);
__prof_dx12_collect(G.cq_copy_critical->prof);
__prof_dx12_collect(G.cq_copy_background->prof);
}
#endif
(UNUSED)backbuffer_resolution;
(UNUSED)texture;
(UNUSED)texture_xf;

View File

@ -216,6 +216,7 @@ void _log(i32 level, struct string msg)
struct sys_lock lock = sys_mutex_lock_s(G.callbacks_mutex);
for (struct log_event_callback *callback = G.first_callback; callback; callback = callback->next) {
if (level <= callback->level) {
__profscope(Run log callback);
callback->func(event);
}
}

View File

@ -1588,9 +1588,17 @@ void sys_window_cursor_disable_clip(struct sys_window *sys_window)
INTERNAL void win32_mutex_init(struct win32_mutex *m)
{
#if PROFILING
struct __proflock_ctx *profiling_ctx = m->profiling_ctx;
#endif
MEMZERO_STRUCT(m);
__proflock_alloc(m->profiling_ctx);
m->srwlock = (SRWLOCK)SRWLOCK_INIT;
#if PROFILING
if (!profiling_ctx) {
__proflock_alloc(profiling_ctx);
}
m->profiling_ctx = profiling_ctx;
#endif
}
struct sys_mutex *sys_mutex_alloc(void)
@ -1607,7 +1615,6 @@ struct sys_mutex *sys_mutex_alloc(void)
}
sys_mutex_unlock(&lock);
}
MEMZERO_STRUCT(m);
win32_mutex_init(m);
return (struct sys_mutex *)m;
}
@ -1624,7 +1631,6 @@ void sys_mutex_release(struct sys_mutex *mutex)
G.first_free_mutex = m;
sys_mutex_unlock(&lock);
}
__proflock_release(m->profiling_ctx);
}
struct sys_lock sys_mutex_lock_e(struct sys_mutex *mutex)

View File

@ -462,6 +462,7 @@ INTERNAL struct string get_ent_debug_text(struct arena *arena, struct sim_ent *e
INTERNAL LOG_EVENT_CALLBACK_FUNC_DEF(debug_console_log_callback, log)
{
__prof;
struct sys_lock lock = sys_mutex_lock_e(G.console_logs_mutex);
{
struct console_log *clog = arena_push(G.console_logs_arena, struct console_log);
@ -487,6 +488,7 @@ INTERNAL LOG_EVENT_CALLBACK_FUNC_DEF(debug_console_log_callback, log)
INTERNAL void draw_debug_console(i32 level, b32 minimized)
{
__prof;
struct arena_temp scratch = scratch_begin_no_conflict();
struct v2 desired_start_pos = V2(10, minimized ? 100 : 600);
@ -1928,6 +1930,7 @@ INTERNAL void user_update(void)
* ========================== */
if (G.debug_draw) {
__profscope(Draw debug info);
struct font *font = font_load_async(LIT("font/fixedsys.ttf"), 12.0f);
if (font) {
struct arena_temp temp = arena_temp_begin(scratch.arena);