vram profiling

This commit is contained in:
jacob 2025-05-21 22:10:30 -05:00
parent a47009f16b
commit d9f841dad6
5 changed files with 91 additions and 4 deletions

View File

@ -687,6 +687,16 @@ INLINE void __prof_d3d11_zone_cleanup_func(TracyCD3D11ZoneCtx *ctx) { ___tracy_d
#define __prof_d3d11_ctx_release(ctx) ___tracy_d3d11_context_terminate(ctx) #define __prof_d3d11_ctx_release(ctx) ___tracy_d3d11_context_terminate(ctx)
#define __prof_d3d11_collect(ctx) ___tracy_d3d11_context_collect(ctx) #define __prof_d3d11_collect(ctx) ___tracy_d3d11_context_collect(ctx)
enum __prof_plot_type {
__prof_plot_type_number = TracyPlotFormatNumber,
__prof_plot_type_memory = TracyPlotFormatMemory,
__prof_plot_type_percentage = TracyPlotFormatPercentage,
__prof_plot_type_watt = TracyPlotFormatWatt
};
#define __prof_plot_init(name, type, step, fill, color) TracyCPlotConfig(name, type, step, fill, color)
#define __prof_plot(name, val) TracyCPlot(name, val)
#define __prof_plot_i(name, val) TracyCPlotI(name, val)
#if PROFILING_CAPTURE_FRAME_IMAGE #if PROFILING_CAPTURE_FRAME_IMAGE
# define __profframeimage(image, width, height, offset, flipped) TracyCFrameImage((image), (width), (height), (offset), (flipped)); # define __profframeimage(image, width, height, offset, flipped) TracyCFrameImage((image), (width), (height), (offset), (flipped));
#else #else
@ -723,6 +733,9 @@ INLINE void __prof_d3d11_zone_cleanup_func(TracyCD3D11ZoneCtx *ctx) { ___tracy_d
#define __prof_d3d11_ctx_alloc(ctx, device, devicectx, name, name_len) #define __prof_d3d11_ctx_alloc(ctx, device, devicectx, name, name_len)
#define __prof_d3d11_ctx_release(ctx) #define __prof_d3d11_ctx_release(ctx)
#define __prof_d3d11_collect(ctx) #define __prof_d3d11_collect(ctx)
#define __prof_plot_init(name, type, step, fill, color)
#define __prof_plot(name, val)
#define __prof_plot_i(name, val)
#endif /* PROFILING */ #endif /* PROFILING */

View File

@ -10,6 +10,8 @@
struct _gstats { struct _gstats {
struct atomic_u64 GSTAT_SOCK_BYTES_SENT; struct atomic_u64 GSTAT_SOCK_BYTES_SENT;
struct atomic_u64 GSTAT_SOCK_BYTES_RECEIVED; struct atomic_u64 GSTAT_SOCK_BYTES_RECEIVED;
struct atomic_u64 GSTAT_VRAM_USAGE;
struct atomic_u64 GSTAT_VRAM_BUDGET;
struct atomic_u64 GSTAT_MEMORY_COMMITTED; struct atomic_u64 GSTAT_MEMORY_COMMITTED;
struct atomic_u64 GSTAT_MEMORY_RESERVED; struct atomic_u64 GSTAT_MEMORY_RESERVED;
struct atomic_u64 GSTAT_NUM_ARENAS; struct atomic_u64 GSTAT_NUM_ARENAS;
@ -17,12 +19,14 @@ struct _gstats {
extern struct _gstats _g_gstats; extern struct _gstats _g_gstats;
#define gstat_set(name, v) atomic_u64_eval_exchange(&_g_gstats.name, (v))
#define gstat_add(name, v) atomic_u64_eval_add_u64(&_g_gstats.name, (v)) #define gstat_add(name, v) atomic_u64_eval_add_u64(&_g_gstats.name, (v))
#define gstat_sub(name, v) atomic_u64_eval_add_i64(&_g_gstats.name, -((i64)(v))) #define gstat_sub(name, v) atomic_u64_eval_add_i64(&_g_gstats.name, -((i64)(v)))
#define gstat_get(name) atomic_u64_eval(&_g_gstats.name) #define gstat_get(name) atomic_u64_eval(&_g_gstats.name)
#else #else
#define gstat_set(name, v)
#define gstat_add(name, v) #define gstat_add(name, v)
#define gstat_sub(name, v) #define gstat_sub(name, v)
#define gstat_get(name) 0 #define gstat_get(name) 0

View File

@ -9,6 +9,7 @@
#include "inc.h" #include "inc.h"
#include "sprite.h" #include "sprite.h"
#include "log.h" #include "log.h"
#include "gstat.h"
#pragma warning(push, 0) #pragma warning(push, 0)
# define UNICODE # define UNICODE
@ -17,7 +18,7 @@
# include <d3d11.h> # include <d3d11.h>
# include <d3dcompiler.h> # include <d3dcompiler.h>
# include <dxgidebug.h> # include <dxgidebug.h>
# include <dxgi1_3.h> # include <dxgi1_4.h>
#pragma warning(pop) #pragma warning(pop)
#pragma comment(lib, "d3d11") #pragma comment(lib, "d3d11")
@ -935,6 +936,42 @@ struct v2i32 renderer_texture_get_size(struct renderer_texture texture)
* Backbuffer * Backbuffer
* ========================== */ * ========================== */
#if GSTAT_ENABLED || PROFILING
INTERNAL struct DXGI_QUERY_VIDEO_MEMORY_INFO get_memory_info(void)
{
__prof;
/* Get DXGI device from D3D11 device */
IDXGIDevice *dxgiDevice;
HRESULT hr = ID3D11Device_QueryInterface(G.dev, &IID_IDXGIDevice, (void **)&dxgiDevice);
(UNUSED)hr;
ASSERT(SUCCEEDED(hr));
/* Get DXGI adapter from DXGI device */
IDXGIAdapter *dxgiAdapter;
hr = IDXGIDevice_GetAdapter(dxgiDevice, &dxgiAdapter);
ASSERT(SUCCEEDED(hr));
IDXGIAdapter3 *dxgiAdapter3 = NULL;
hr = IDXGIAdapter_QueryInterface(dxgiAdapter, &IID_IDXGIAdapter3, (void **)&dxgiAdapter3);
ASSERT(SUCCEEDED(hr));
struct DXGI_QUERY_VIDEO_MEMORY_INFO info = ZI;
IDXGIAdapter3_QueryVideoMemoryInfo(
dxgiAdapter3,
0,
DXGI_MEMORY_SEGMENT_GROUP_LOCAL,
&info
);
IDXGIAdapter_Release(dxgiAdapter3);
IDXGIAdapter_Release(dxgiAdapter);
IDXGIDevice_Release(dxgiDevice);
return info;
}
#endif
struct renderer_texture renderer_backbuffer_recreate(struct v2i32 size) struct renderer_texture renderer_backbuffer_recreate(struct v2i32 size)
{ {
struct renderer_texture res = ZI; struct renderer_texture res = ZI;
@ -962,6 +999,28 @@ void renderer_backbuffer_present(i32 vsync)
} }
#endif #endif
#if GSTAT_ENABLED || PROFILING
{
struct DXGI_QUERY_VIDEO_MEMORY_INFO info = get_memory_info();
u64 vram = info.CurrentUsage;
u64 budget = info.Budget;
gstat_set(GSTAT_VRAM_USAGE, vram);
gstat_set(GSTAT_VRAM_BUDGET, budget);
# if PROFILING
static char *plot_name = NULL;
static u64 prev_vram = 0;
if (!plot_name) {
plot_name = "Video memory usage";
__prof_plot_init(plot_name, __prof_plot_type_memory, 1, 1, 0);
}
if (vram != prev_vram) {
__prof_plot_i(plot_name, vram);
}
prev_vram = vram;
# endif
}
#endif
i32 flags = 0; i32 flags = 0;
if (vsync == 0) { if (vsync == 0) {
flags = DXGI_PRESENT_ALLOW_TEARING; flags = DXGI_PRESENT_ALLOW_TEARING;

View File

@ -403,6 +403,9 @@ INTERNAL void test_generate_walls(struct sim_snapshot *world)
} }
/* Sort tile chunks */ /* Sort tile chunks */
/* NOTE: We sort tile chunks from top-left to bottom-right so that cross
* chunk tile checks only have to happen in one direction (because we
* know the left & top chunks have already been processed) */
struct sim_ent **sorted_tile_chunks = arena_dry_push(scratch.arena, struct sim_ent *); struct sim_ent **sorted_tile_chunks = arena_dry_push(scratch.arena, struct sim_ent *);
u64 sorted_tile_chunks_count = 0; u64 sorted_tile_chunks_count = 0;
for (u64 ent_index = 0; ent_index < world->num_ents_reserved; ++ent_index) { for (u64 ent_index = 0; ent_index < world->num_ents_reserved; ++ent_index) {
@ -482,6 +485,7 @@ INTERNAL void test_generate_walls(struct sim_snapshot *world)
u64 start_hash = rand_u64_from_seed(*(u64 *)&start); u64 start_hash = rand_u64_from_seed(*(u64 *)&start);
struct dict_entry *entry = dict_get_entry(&horizontal_ends_dict, start_hash); struct dict_entry *entry = dict_get_entry(&horizontal_ends_dict, start_hash);
if (entry) { if (entry) {
/* Existing wall exists accross chunk boundary */
node = (struct wall_node *)entry->value; node = (struct wall_node *)entry->value;
dict_remove_entry(&horizontal_ends_dict, entry); dict_remove_entry(&horizontal_ends_dict, entry);
} }
@ -554,6 +558,7 @@ INTERNAL void test_generate_walls(struct sim_snapshot *world)
u64 start_hash = rand_u64_from_seed(*(u64 *)&start); u64 start_hash = rand_u64_from_seed(*(u64 *)&start);
struct dict_entry *entry = dict_get_entry(&vertical_ends_dict, start_hash); struct dict_entry *entry = dict_get_entry(&vertical_ends_dict, start_hash);
if (entry) { if (entry) {
/* Existing wall exists accross chunk boundary */
node = (struct wall_node *)entry->value; node = (struct wall_node *)entry->value;
dict_remove_entry(&vertical_ends_dict, entry); dict_remove_entry(&vertical_ends_dict, entry);
} }

View File

@ -1849,16 +1849,23 @@ INTERNAL void user_update(void)
pos.y += spacing; pos.y += spacing;
pos.y += spacing; pos.y += spacing;
draw_text(G.ui_cmd_buffer, font, pos, string_format(temp.arena, LIT("Memory usage: %F MiB"), FMT_FLOAT((f64)gstat_get(GSTAT_MEMORY_COMMITTED) / 1024 / 1024))); draw_text(G.ui_cmd_buffer, font, pos, string_format(temp.arena, LIT("Memory committed: %F MiB"), FMT_FLOAT((f64)gstat_get(GSTAT_MEMORY_COMMITTED) / 1024 / 1024)));
pos.y += spacing; pos.y += spacing;
draw_text(G.ui_cmd_buffer, font, pos, string_format(temp.arena, LIT("Virtual memory usage: %F TiB"), FMT_FLOAT((f64)gstat_get(GSTAT_MEMORY_RESERVED) / 1024 / 1024 / 1024 / 1024))); draw_text(G.ui_cmd_buffer, font, pos, string_format(temp.arena, LIT("Virtual memory reserved: %F TiB"), FMT_FLOAT((f64)gstat_get(GSTAT_MEMORY_RESERVED) / 1024 / 1024 / 1024 / 1024)));
pos.y += spacing; pos.y += spacing;
draw_text(G.ui_cmd_buffer, font, pos, string_format(temp.arena, LIT("Arenas allocated: %F"), FMT_UINT(gstat_get(GSTAT_NUM_ARENAS)))); draw_text(G.ui_cmd_buffer, font, pos, string_format(temp.arena, LIT("Arenas allocated: %F"), FMT_UINT(gstat_get(GSTAT_NUM_ARENAS))));
pos.y += spacing; pos.y += spacing;
pos.y += spacing; pos.y += spacing;
draw_text(G.ui_cmd_buffer, font, pos, string_format(temp.arena, LIT("Video memory usage: %F MiB"), FMT_FLOAT((f64)gstat_get(GSTAT_VRAM_USAGE) / 1024 / 1024)));
pos.y += spacing;
draw_text(G.ui_cmd_buffer, font, pos, string_format(temp.arena, LIT("Video memory budget: %F GiB"), FMT_FLOAT((f64)gstat_get(GSTAT_VRAM_BUDGET) / 1024 / 1024 / 1024)));
pos.y += spacing;
pos.y += spacing;
#if 0 #if 0
draw_text(G.ui_cmd_buffer, font, pos, string_format(temp.arena, LIT("screen_size: (%F, %F)"), FMT_FLOAT((f64)G.screen_size.x), FMT_FLOAT((f64)G.screen_size.y))); draw_text(G.ui_cmd_buffer, font, pos, string_format(temp.arena, LIT("screen_size: (%F, %F)"), FMT_FLOAT((f64)G.screen_size.x), FMT_FLOAT((f64)G.screen_size.y)));
pos.y += spacing; pos.y += spacing;
@ -2029,7 +2036,6 @@ INTERNAL void user_update(void)
INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(user_thread_entry_point, arg) INTERNAL SYS_THREAD_ENTRY_POINT_FUNC_DEF(user_thread_entry_point, arg)
{ {
(UNUSED)arg; (UNUSED)arg;
i64 last_frame_ns = 0; i64 last_frame_ns = 0;
i64 target_dt_ns = NS_FROM_SECONDS(USER_FPS_LIMIT > (0) ? (1.0 / USER_FPS_LIMIT) : 0); i64 target_dt_ns = NS_FROM_SECONDS(USER_FPS_LIMIT > (0) ? (1.0 / USER_FPS_LIMIT) : 0);