From d9f841dad6ab8b87554e653177a3f19f968f255e Mon Sep 17 00:00:00 2001 From: jacob Date: Wed, 21 May 2025 22:10:30 -0500 Subject: [PATCH] vram profiling --- src/common.h | 13 ++++++++++ src/gstat.h | 4 +++ src/renderer_d3d11.c | 61 +++++++++++++++++++++++++++++++++++++++++++- src/sim_step.c | 5 ++++ src/user.c | 12 ++++++--- 5 files changed, 91 insertions(+), 4 deletions(-) diff --git a/src/common.h b/src/common.h index b7cfe6f9..f05e69b6 100644 --- a/src/common.h +++ b/src/common.h @@ -687,6 +687,16 @@ INLINE void __prof_d3d11_zone_cleanup_func(TracyCD3D11ZoneCtx *ctx) { ___tracy_d #define __prof_d3d11_ctx_release(ctx) ___tracy_d3d11_context_terminate(ctx) #define __prof_d3d11_collect(ctx) ___tracy_d3d11_context_collect(ctx) +enum __prof_plot_type { + __prof_plot_type_number = TracyPlotFormatNumber, + __prof_plot_type_memory = TracyPlotFormatMemory, + __prof_plot_type_percentage = TracyPlotFormatPercentage, + __prof_plot_type_watt = TracyPlotFormatWatt +}; +#define __prof_plot_init(name, type, step, fill, color) TracyCPlotConfig(name, type, step, fill, color) +#define __prof_plot(name, val) TracyCPlot(name, val) +#define __prof_plot_i(name, val) TracyCPlotI(name, val) + #if PROFILING_CAPTURE_FRAME_IMAGE # define __profframeimage(image, width, height, offset, flipped) TracyCFrameImage((image), (width), (height), (offset), (flipped)); #else @@ -723,6 +733,9 @@ INLINE void __prof_d3d11_zone_cleanup_func(TracyCD3D11ZoneCtx *ctx) { ___tracy_d #define __prof_d3d11_ctx_alloc(ctx, device, devicectx, name, name_len) #define __prof_d3d11_ctx_release(ctx) #define __prof_d3d11_collect(ctx) +#define __prof_plot_init(name, type, step, fill, color) +#define __prof_plot(name, val) +#define __prof_plot_i(name, val) #endif /* PROFILING */ diff --git a/src/gstat.h b/src/gstat.h index 2f5a9fe1..b2d2c955 100644 --- a/src/gstat.h +++ b/src/gstat.h @@ -10,6 +10,8 @@ struct _gstats { struct atomic_u64 GSTAT_SOCK_BYTES_SENT; struct atomic_u64 GSTAT_SOCK_BYTES_RECEIVED; + struct atomic_u64 GSTAT_VRAM_USAGE; + struct atomic_u64 GSTAT_VRAM_BUDGET; struct atomic_u64 GSTAT_MEMORY_COMMITTED; struct atomic_u64 GSTAT_MEMORY_RESERVED; struct atomic_u64 GSTAT_NUM_ARENAS; @@ -17,12 +19,14 @@ struct _gstats { extern struct _gstats _g_gstats; +#define gstat_set(name, v) atomic_u64_eval_exchange(&_g_gstats.name, (v)) #define gstat_add(name, v) atomic_u64_eval_add_u64(&_g_gstats.name, (v)) #define gstat_sub(name, v) atomic_u64_eval_add_i64(&_g_gstats.name, -((i64)(v))) #define gstat_get(name) atomic_u64_eval(&_g_gstats.name) #else +#define gstat_set(name, v) #define gstat_add(name, v) #define gstat_sub(name, v) #define gstat_get(name) 0 diff --git a/src/renderer_d3d11.c b/src/renderer_d3d11.c index 889c823f..8c7adb82 100644 --- a/src/renderer_d3d11.c +++ b/src/renderer_d3d11.c @@ -9,6 +9,7 @@ #include "inc.h" #include "sprite.h" #include "log.h" +#include "gstat.h" #pragma warning(push, 0) # define UNICODE @@ -17,7 +18,7 @@ # include # include # include -# include +# include #pragma warning(pop) #pragma comment(lib, "d3d11") @@ -935,6 +936,42 @@ struct v2i32 renderer_texture_get_size(struct renderer_texture texture) * Backbuffer * ========================== */ +#if GSTAT_ENABLED || PROFILING +INTERNAL struct DXGI_QUERY_VIDEO_MEMORY_INFO get_memory_info(void) +{ + __prof; + + /* Get DXGI device from D3D11 device */ + IDXGIDevice *dxgiDevice; + HRESULT hr = ID3D11Device_QueryInterface(G.dev, &IID_IDXGIDevice, (void **)&dxgiDevice); + (UNUSED)hr; + ASSERT(SUCCEEDED(hr)); + + /* Get DXGI adapter from DXGI device */ + IDXGIAdapter *dxgiAdapter; + hr = IDXGIDevice_GetAdapter(dxgiDevice, &dxgiAdapter); + ASSERT(SUCCEEDED(hr)); + + IDXGIAdapter3 *dxgiAdapter3 = NULL; + hr = IDXGIAdapter_QueryInterface(dxgiAdapter, &IID_IDXGIAdapter3, (void **)&dxgiAdapter3); + ASSERT(SUCCEEDED(hr)); + + struct DXGI_QUERY_VIDEO_MEMORY_INFO info = ZI; + IDXGIAdapter3_QueryVideoMemoryInfo( + dxgiAdapter3, + 0, + DXGI_MEMORY_SEGMENT_GROUP_LOCAL, + &info + ); + + IDXGIAdapter_Release(dxgiAdapter3); + IDXGIAdapter_Release(dxgiAdapter); + IDXGIDevice_Release(dxgiDevice); + + return info; +} +#endif + struct renderer_texture renderer_backbuffer_recreate(struct v2i32 size) { struct renderer_texture res = ZI; @@ -962,6 +999,28 @@ void renderer_backbuffer_present(i32 vsync) } #endif +#if GSTAT_ENABLED || PROFILING + { + struct DXGI_QUERY_VIDEO_MEMORY_INFO info = get_memory_info(); + u64 vram = info.CurrentUsage; + u64 budget = info.Budget; + gstat_set(GSTAT_VRAM_USAGE, vram); + gstat_set(GSTAT_VRAM_BUDGET, budget); +# if PROFILING + static char *plot_name = NULL; + static u64 prev_vram = 0; + if (!plot_name) { + plot_name = "Video memory usage"; + __prof_plot_init(plot_name, __prof_plot_type_memory, 1, 1, 0); + } + if (vram != prev_vram) { + __prof_plot_i(plot_name, vram); + } + prev_vram = vram; +# endif + } +#endif + i32 flags = 0; if (vsync == 0) { flags = DXGI_PRESENT_ALLOW_TEARING; diff --git a/src/sim_step.c b/src/sim_step.c index 2e6a241b..1fb53d5f 100644 --- a/src/sim_step.c +++ b/src/sim_step.c @@ -403,6 +403,9 @@ INTERNAL void test_generate_walls(struct sim_snapshot *world) } /* Sort tile chunks */ + /* NOTE: We sort tile chunks from top-left to bottom-right so that cross + * chunk tile checks only have to happen in one direction (because we + * know the left & top chunks have already been processed) */ struct sim_ent **sorted_tile_chunks = arena_dry_push(scratch.arena, struct sim_ent *); u64 sorted_tile_chunks_count = 0; for (u64 ent_index = 0; ent_index < world->num_ents_reserved; ++ent_index) { @@ -482,6 +485,7 @@ INTERNAL void test_generate_walls(struct sim_snapshot *world) u64 start_hash = rand_u64_from_seed(*(u64 *)&start); struct dict_entry *entry = dict_get_entry(&horizontal_ends_dict, start_hash); if (entry) { + /* Existing wall exists accross chunk boundary */ node = (struct wall_node *)entry->value; dict_remove_entry(&horizontal_ends_dict, entry); } @@ -554,6 +558,7 @@ INTERNAL void test_generate_walls(struct sim_snapshot *world) u64 start_hash = rand_u64_from_seed(*(u64 *)&start); struct dict_entry *entry = dict_get_entry(&vertical_ends_dict, start_hash); if (entry) { + /* Existing wall exists accross chunk boundary */ node = (struct wall_node *)entry->value; dict_remove_entry(&vertical_ends_dict, entry); } diff --git a/src/user.c b/src/user.c index dd008c04..bdc7c9e3 100644 --- a/src/user.c +++ b/src/user.c @@ -1849,16 +1849,23 @@ INTERNAL void user_update(void) pos.y += spacing; pos.y += spacing; - draw_text(G.ui_cmd_buffer, font, pos, string_format(temp.arena, LIT("Memory usage: %F MiB"), FMT_FLOAT((f64)gstat_get(GSTAT_MEMORY_COMMITTED) / 1024 / 1024))); + draw_text(G.ui_cmd_buffer, font, pos, string_format(temp.arena, LIT("Memory committed: %F MiB"), FMT_FLOAT((f64)gstat_get(GSTAT_MEMORY_COMMITTED) / 1024 / 1024))); pos.y += spacing; - draw_text(G.ui_cmd_buffer, font, pos, string_format(temp.arena, LIT("Virtual memory usage: %F TiB"), FMT_FLOAT((f64)gstat_get(GSTAT_MEMORY_RESERVED) / 1024 / 1024 / 1024 / 1024))); + draw_text(G.ui_cmd_buffer, font, pos, string_format(temp.arena, LIT("Virtual memory reserved: %F TiB"), FMT_FLOAT((f64)gstat_get(GSTAT_MEMORY_RESERVED) / 1024 / 1024 / 1024 / 1024))); pos.y += spacing; draw_text(G.ui_cmd_buffer, font, pos, string_format(temp.arena, LIT("Arenas allocated: %F"), FMT_UINT(gstat_get(GSTAT_NUM_ARENAS)))); pos.y += spacing; pos.y += spacing; + draw_text(G.ui_cmd_buffer, font, pos, string_format(temp.arena, LIT("Video memory usage: %F MiB"), FMT_FLOAT((f64)gstat_get(GSTAT_VRAM_USAGE) / 1024 / 1024))); + pos.y += spacing; + + draw_text(G.ui_cmd_buffer, font, pos, string_format(temp.arena, LIT("Video memory budget: %F GiB"), FMT_FLOAT((f64)gstat_get(GSTAT_VRAM_BUDGET) / 1024 / 1024 / 1024))); + pos.y += spacing; + pos.y += spacing; + #if 0 draw_text(G.ui_cmd_buffer, font, pos, string_format(temp.arena, LIT("screen_size: (%F, %F)"), FMT_FLOAT((f64)G.screen_size.x), FMT_FLOAT((f64)G.screen_size.y))); pos.y += spacing; @@ -2029,7 +2036,6 @@ INTERNAL void user_update(void) INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(user_thread_entry_point, arg) { (UNUSED)arg; - i64 last_frame_ns = 0; i64 target_dt_ns = NS_FROM_SECONDS(USER_FPS_LIMIT > (0) ? (1.0 / USER_FPS_LIMIT) : 0);