allow color & values for profiling

This commit is contained in:
jacob 2025-07-06 14:37:17 -05:00
parent 60613815d7
commit a9bcab1b78
16 changed files with 211 additions and 191 deletions

View File

@ -324,7 +324,7 @@ void sys_app_entry(struct string args_str)
* forcing process exit (to prevent process hanging in the background
* if something gets stuck) */
{
__profscope(Run exit callbacks);
__profn("Run exit callbacks");
struct sys_lock lock = sys_mutex_lock_e(G.exit_callbacks_mutex);
for (struct exit_callback *callback = G.exit_callbacks_head; callback; callback = callback->next) {
callback->func();
@ -334,7 +334,7 @@ void sys_app_entry(struct string args_str)
/* Write window settings to file */
{
__profscope(Write settings file);
__profn("Write settings file");
struct arena_temp temp = arena_temp_begin(scratch.arena);
struct string window_settings_path = app_write_path_cat(temp.arena, settings_file_name);

View File

@ -76,7 +76,7 @@ void *arena_push_bytes_no_zero(struct arena *arena, u64 size, u64 align)
u64 new_pos = aligned_start_pos + size;
if (new_pos > arena->committed) {
__profscope(Arena commit);
__profn("Arena commit");
/* Commit new block(s) */
u64 blocks_needed = (new_pos - arena->committed + ARENA_BLOCK_SIZE - 1) / ARENA_BLOCK_SIZE;
u64 commit_bytes = blocks_needed * ARENA_BLOCK_SIZE;

View File

@ -731,7 +731,7 @@ struct ase_decode_image_result ase_decode_image(struct arena *arena, struct stri
}
{
__profscope(Build image from cels);
__profn("Build image from cels");
/* Assemble image from cels */
for (struct cel *cel = cel_head; cel; cel = cel->next) {

View File

@ -534,7 +534,7 @@ INTERNAL void dx12_init_device(void)
/* Enable stable power state */
{
b32 success = true;
__profscope(Set stable power state);
__profn("Set stable power state");
HKEY key = 0;
success = RegOpenKeyExW(HKEY_LOCAL_MACHINE, L"SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\AppModelUnlock", 0, KEY_READ, &key) == ERROR_SUCCESS;
if (success) {
@ -942,7 +942,7 @@ INTERNAL SYS_JOB_DEF(pipeline_init_job, job)
* root signature exists and matches between shaders. */
ID3D10Blob *rootsig_blob = NULL;
if (success) {
__profscope(Validate root signatures);
__profn("Validate root signatures");
char *vs_rootsig_data = NULL;
char *ps_rootsig_data = NULL;
u32 vs_rootsig_data_len = 0;
@ -979,7 +979,7 @@ INTERNAL SYS_JOB_DEF(pipeline_init_job, job)
/* Create root signature */
ID3D12RootSignature *rootsig = NULL;
if (success) {
__profscope(Create root signature);
__profn("Create root signature");
hr = ID3D12Device_CreateRootSignature(G.device, 0, ID3D10Blob_GetBufferPointer(rootsig_blob), ID3D10Blob_GetBufferSize(rootsig_blob), &IID_ID3D12RootSignature, (void **)&rootsig);
if (FAILED(hr)) {
error_str = LIT("Failed to create root signature");
@ -991,7 +991,7 @@ INTERNAL SYS_JOB_DEF(pipeline_init_job, job)
ID3D12PipelineState *pso = NULL;
if (success) {
/* Default rasterizer state */
__profscope(Create PSO);
__profn("Create PSO");
D3D12_RASTERIZER_DESC raster_desc = {
.FillMode = D3D12_FILL_MODE_SOLID,
.CullMode = D3D12_CULL_MODE_NONE,
@ -1845,7 +1845,7 @@ INTERNAL u64 command_list_close(struct command_list *cl)
/* Close */
{
__profscope(Close DX12 command list);
__profn("Close DX12 command list");
HRESULT hr = ID3D12GraphicsCommandList_Close(cl->cl);
if (FAILED(hr)) {
/* TODO: Don't panic */
@ -1856,7 +1856,7 @@ INTERNAL u64 command_list_close(struct command_list *cl)
/* Submit */
u64 submit_fence_target = 0;
{
__profscope(Execute);
__profn("Execute");
struct sys_lock submit_lock = sys_mutex_lock_s(G.global_submit_mutex);
struct sys_lock fence_lock = sys_mutex_lock_e(cq->submit_fence_mutex);
{
@ -2318,7 +2318,7 @@ struct gp_resource *gp_texture_alloc(enum gp_texture_format format, u32 flags, s
struct command_queue *cq = G.command_queues[DX12_QUEUE_COPY_BACKGROUND];
struct command_list *cl = command_list_open(cq->cl_pool);
{
__profscope_dx12(cl->cq->prof, cl->cl, Upload texture, RGB32_F(0.2, 0.5, 0.2));
__profnc_dx12(cl->cq->prof, cl->cl, "Upload texture", RGB32_F(0.2, 0.5, 0.2));
D3D12_TEXTURE_COPY_LOCATION dst_loc = {
.pResource = r->resource,
.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
@ -2340,7 +2340,7 @@ struct gp_resource *gp_texture_alloc(enum gp_texture_format format, u32 flags, s
/* Wait */
/* TODO: Return async waitable to caller */
{
__profscope(Wait for upload);
__profn("Wait for upload");
HANDLE event = CreateEvent(NULL, false, false, NULL);
ID3D12Fence_SetEventOnCompletion(cq->submit_fence, fence_target, event);
WaitForSingleObject(event, INFINITE);
@ -2383,7 +2383,7 @@ void gp_dispatch(struct gp_dispatch_params params)
struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT];
struct command_list *cl = command_list_open(cq->cl_pool);
{
__profscope_dx12(cl->cq->prof, cl->cl, Dispatch, RGB32_F(0.5, 0.2, 0.2));
__profnc_dx12(cl->cq->prof, cl->cl, "Dispatch", RGB32_F(0.5, 0.2, 0.2));
struct mat4x4 vp_matrix = calculate_vp(params.draw_target_view, params.draw_target_viewport.width, params.draw_target_viewport.height);
/* Upload dummmy vert & index buffer */
@ -2397,11 +2397,11 @@ void gp_dispatch(struct gp_dispatch_params params)
struct sh_material_instance *material_instances = arena_push_array_no_zero(scratch.arena, struct sh_material_instance, flow->num_material_instance_descs);
struct sh_material_grid *grids = arena_push_array_no_zero(scratch.arena, struct sh_material_grid, flow->num_material_grid_descs);
{
__profscope(Process flow data);
__profn("Process flow data");
/* Process material instances */
{
__profscope(Process material instances);
__profn("Process material instances");
for (u32 i = 0; i < flow->num_material_instance_descs; ++i) {
struct material_instance_desc *desc = &((struct material_instance_desc *)arena_base(flow->material_instance_descs_arena))[i];
struct sh_material_instance *instance = &material_instances[i];
@ -2427,7 +2427,7 @@ void gp_dispatch(struct gp_dispatch_params params)
/* Process grids */
{
__profscope(Process grids);
__profn("Process grids");
for (u32 i = 0; i < flow->num_material_grid_descs; ++i) {
struct material_grid_desc *desc = &((struct material_grid_desc *)arena_base(flow->material_grid_descs_arena))[i];
struct sh_material_grid *grid = &grids[i];
@ -2464,8 +2464,8 @@ void gp_dispatch(struct gp_dispatch_params params)
/* Material pass */
if (material_pipeline->success) {
__profscope(Material pass);
__profscope_dx12(cl->cq->prof, cl->cl, Material pass, RGB32_F(0.5, 0.2, 0.2));
__profn("Material pass");
__profnc_dx12(cl->cq->prof, cl->cl, "Material pass", RGB32_F(0.5, 0.2, 0.2));
/* Bind pipeline */
ID3D12GraphicsCommandList_SetPipelineState(cl->cl, material_pipeline->pso);
@ -2505,8 +2505,8 @@ void gp_dispatch(struct gp_dispatch_params params)
/* Shape pass */
if (shape_pipeline->success) {
__profscope(Shape pass);
__profscope_dx12(cl->cq->prof, cl->cl, Shape pass, RGB32_F(0.5, 0.2, 0.2));
__profn("Shape pass");
__profnc_dx12(cl->cq->prof, cl->cl, "Shape pass", RGB32_F(0.5, 0.2, 0.2));
/* Bind pipeline */
ID3D12GraphicsCommandList_SetPipelineState(cl->cl, shape_pipeline->pso);
@ -2696,7 +2696,7 @@ INTERNAL void present_blit(struct swapchain_buffer *dst, struct dx12_resource *s
struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT];
struct command_list *cl = command_list_open(cq->cl_pool);
{
__profscope_dx12(cl->cq->prof, cl->cl, Blit, RGB32_F(0.5, 0.2, 0.2));
__profnc_dx12(cl->cq->prof, cl->cl, "Blit", RGB32_F(0.5, 0.2, 0.2));
struct swapchain *swapchain = dst->swapchain;
/* Upload dummmy vert & index buffer */
@ -2804,7 +2804,7 @@ void gp_present(struct sys_window *window, struct v2i32 backresolution, struct g
/* Present */
/* FIXME: Resource barrier */
{
__profscope(Present);
__profn("Present");
HRESULT hr = IDXGISwapChain3_Present(swapchain->swapchain, vsync, present_flags);
if (!SUCCEEDED(hr)) {
ASSERT(false);
@ -2814,7 +2814,7 @@ void gp_present(struct sys_window *window, struct v2i32 backresolution, struct g
#if PROFILING_D3D
{
__profscope(Mark queue frames);
__profn("Mark queue frames");
/* Lock because frame marks shouldn't occur while command lists are recording */
struct sys_lock lock = sys_mutex_lock_e(G.global_command_list_record_mutex);
for (u32 i = 0; i < countof(G.command_queues); ++i) {
@ -2824,7 +2824,7 @@ void gp_present(struct sys_window *window, struct v2i32 backresolution, struct g
sys_mutex_unlock(&lock);
}
{
__profscope(Collect queues);
__profn("Collect queues");
for (u32 i = 0; i < countof(G.command_queues); ++i) {
struct command_queue *cq = G.command_queues[i];
__prof_dx12_collect(cq->prof);
@ -2854,7 +2854,7 @@ INTERNAL SYS_THREAD_DEF(evictor_thread_entry_point, arg)
while (!shutdown) {
struct arena_temp temp = arena_temp_begin(scratch.arena);
{
__profscope(Run);
__profn("Run");
u64 targets[countof(completed_targets)] = ZI;
@ -2862,7 +2862,7 @@ INTERNAL SYS_THREAD_DEF(evictor_thread_entry_point, arg)
u32 num_fenced_releases = 0;
struct fenced_release_data *fenced_releases = NULL;
{
__profscope(Copy queued releases);
__profn("Copy queued releases");
struct sys_lock lock = sys_mutex_lock_e(G.fenced_releases_mutex);
num_fenced_releases = G.fenced_releases_arena->pos / sizeof(struct fenced_release_data);
fenced_releases = arena_push_array_no_zero(temp.arena, struct fenced_release_data, num_fenced_releases);
@ -2874,7 +2874,7 @@ INTERNAL SYS_THREAD_DEF(evictor_thread_entry_point, arg)
/* Wait until fences reach target */
{
__profscope(Check fences);
__profn("Check fences");
for (u32 i = 0; i < countof(targets) && !shutdown; ++i) {
while (completed_targets[i] < targets[i] && !shutdown) {
struct command_queue *cq = G.command_queues[i];
@ -2882,7 +2882,7 @@ INTERNAL SYS_THREAD_DEF(evictor_thread_entry_point, arg)
if (completed_targets[i] < targets[i]) {
ID3D12Fence_SetEventOnCompletion(cq->submit_fence, targets[i], event);
{
__profscope(Wait on fence);
__profn("Wait on fence");
WaitForMultipleObjects(2, events, false, INFINITE);
shutdown = atomic_i32_fetch(&G.evictor_thread_shutdown);
}
@ -2893,7 +2893,7 @@ INTERNAL SYS_THREAD_DEF(evictor_thread_entry_point, arg)
/* Process releases */
for (u32 i = 0; i < num_fenced_releases; ++i) {
__profscope(Release);
__profn("Release");
struct fenced_release_data *fr = &fenced_releases[i];
switch (fr->kind) {
default:
@ -2918,7 +2918,7 @@ INTERNAL SYS_THREAD_DEF(evictor_thread_entry_point, arg)
}
arena_temp_end(temp);
{
__profscope(Sleep);
__profn("Sleep");
WaitForSingleObject(G.evictor_thread_wake_event, INFINITE);
shutdown = atomic_i32_fetch(&G.evictor_thread_shutdown);
}

View File

@ -652,7 +652,7 @@ struct host_event_list host_update_begin(struct arena *arena, struct host *host)
i64 now_ns = sys_time_ns();
{
__profscope(Read host packets);
__profn("Read host packets");
struct string read_buff = ZI;
read_buff.len = PACKET_DATA_MAX_LEN;
read_buff.text = arena_push_array_no_zero(scratch.arena, u8, read_buff.len);
@ -828,7 +828,7 @@ struct host_event_list host_update_begin(struct arena *arena, struct host *host)
/* Update channels */
{
__profscope(Update host channels);
__profn("Update host channels");
for (u64 i = 0; i < host->num_channels_reserved; ++i) {
struct host_channel *channel = &host->channels[i];
if (channel->valid) {
@ -902,7 +902,7 @@ void host_update_end(struct host *host)
/* Process cmds into sendable packets */
/* TODO: Unreliable packets don't need to be allocated into unreliable packet queue, should just send them and forget */
{
__profscope(Process host cmds);
__profn("Process host cmds");
for (struct host_cmd *cmd = host->first_cmd; cmd; cmd = cmd->next) {
enum host_cmd_kind kind = cmd->kind;
struct host_channel_id channel_id = cmd->channel_id;
@ -1017,7 +1017,7 @@ void host_update_end(struct host *host)
/* Send packets */
/* TODO: Aggregate small packets */
{
__profscope(Send host packets);
__profn("Send host packets");
for (u64 i = 0; i < host->num_channels_reserved; ++i) {
struct sock *sock = host->sock;
struct host_channel *channel = &host->channels[i];

View File

@ -216,7 +216,7 @@ void _log(i32 level, struct string msg)
struct sys_lock lock = sys_mutex_lock_s(G.callbacks_mutex);
for (struct log_event_callback *callback = G.first_callback; callback; callback = callback->next) {
if (level <= callback->level) {
__profscope(Run log callback);
__profn("Run log callback");
callback->func(event);
}
}

View File

@ -290,7 +290,7 @@ struct mixed_pcm_f32 mixer_update(struct arena *arena, u64 frame_count)
/* Update & read mixes */
mixes = arena_push_array_no_zero(scratch.arena, struct mix *, G.track_playing_count);
for (struct track *track = G.track_first_playing; track; track = track->next) {
__profscope(Prepare track);
__profn("Prepare track");
struct mix *mix = &track->mix;
mix->desc = track->desc;
mixes[mixes_count++] = mix;
@ -300,7 +300,7 @@ struct mixed_pcm_f32 mixer_update(struct arena *arena, u64 frame_count)
}
for (u64 mix_index = 0; mix_index < mixes_count; ++mix_index) {
__profscope(Mix track);
__profn("Mix track");
struct mix *mix = mixes[mix_index];
if (mix->source->pcm.count <= 0) {
@ -353,7 +353,7 @@ struct mixed_pcm_f32 mixer_update(struct arena *arena, u64 frame_count)
/* Transform 16 bit source -> 32 bit stereo at output duration */
{
__profscope(Resample);
__profn("Resample");
f32 *out_samples = mix_pcm.samples;
u64 out_frames_count = mix_pcm.count / 2;
@ -407,7 +407,7 @@ struct mixed_pcm_f32 mixer_update(struct arena *arena, u64 frame_count)
* ========================== */
if (desc.flags & MIXER_FLAG_SPATIALIZE) {
__profscope(Spatialize);
__profn("Spatialize");
/* Algorithm constants */
const f32 rolloff_height = 1.2f;
@ -468,7 +468,7 @@ struct mixed_pcm_f32 mixer_update(struct arena *arena, u64 frame_count)
}
{
__profscope(Update track effect data);
__profn("Update track effect data");
struct sys_lock lock = sys_mutex_lock_e(G.mutex);
for (u64 i = 0; i < mixes_count; ++i) {
struct mix *mix = mixes[i];

View File

@ -1254,7 +1254,7 @@ void phys_step(struct phys_step_ctx *ctx, f32 timestep)
f32 remaining_dt = timestep;
while (remaining_dt > 0) {
__profscope(Step part);
__profn("Step part");
++phys_iteration;
struct arena_temp scratch = scratch_begin_no_conflict();
@ -1282,7 +1282,7 @@ void phys_step(struct phys_step_ctx *ctx, f32 timestep)
f32 substep_dt = step_dt / SIM_PHYSICS_SUBSTEPS;
for (u32 i = 0; i < SIM_PHYSICS_SUBSTEPS; ++i) {
__profscope(Substep);
__profn("Substep");
/* Warm start */
#if SIM_PHYSICS_ENABLE_WARM_STARTING

View File

@ -174,7 +174,7 @@ INTERNAL struct wasapi_buffer wasapi_update_begin(void)
/* Wait */
{
__profscope(wasapi_wait_on_event);
__profn("Wasapi wait");
WaitForSingleObject(G.event, INFINITE);
}

View File

@ -10,10 +10,10 @@
#define PROFILING_SYSTEM_TRACE 0
#define PROFILING_CAPTURE_FRAME_IMAGE 0
#define PROFILING_LOCKS 0
#define PROFILING_D3D 0
#define PROFILING_D3D 1
#define PROFILING_FILE_WSTR L".tracy"
//#define PROFILING_CMD_WSTR L"cmd /C start \"\" /wait tracy-capture.exe -o .tracy -a 127.0.0.1 && start \"\" tracy-profiler.exe .tracy"
#define PROFILING_CMD_WSTR L"tracy-profiler.exe -a 127.0.0.1"
#define PROFILING_CMD_WSTR L"cmd /C start \"\" /wait tracy-capture.exe -o .tracy -a 127.0.0.1 && start \"\" tracy-profiler.exe .tracy"
//#define PROFILING_CMD_WSTR L"tracy-profiler.exe -a 127.0.0.1"
/* Tracy defines */
#define TRACY_ENABLE
@ -31,11 +31,12 @@
#pragma clang diagnostic ignored "-Wincompatible-pointer-types-discards-qualifiers"
#include TRACY_CLIENT_HEADER_PATH
/* Clang/GCC cleanup macros */
#define __prof static const struct ___tracy_source_location_data CAT(__tracy_source_location,__LINE__) = { NULL, __func__, __FILE__, (uint32_t)__LINE__, 0 }; __attribute((cleanup(__prof_zone_cleanup_func))) TracyCZoneCtx __tracy_zone_ctx = ___tracy_emit_zone_begin( &CAT(__tracy_source_location,__LINE__), true )
#define __profscope(name) static const struct ___tracy_source_location_data CAT(__tracy_source_location,__LINE__) = { #name, __func__, __FILE__, (uint32_t)__LINE__, 0 }; __attribute((cleanup(__prof_zone_cleanup_func))) TracyCZoneCtx __tracy_zone_ctx = ___tracy_emit_zone_begin( &CAT(__tracy_source_location,__LINE__), true )
INLINE void __prof_zone_cleanup_func(TracyCZoneCtx *ctx) { TracyCZoneEnd(*ctx) }
#define __profnc(name, color) static const struct ___tracy_source_location_data CAT(__tracy_source_location,__LINE__) = { (name), __func__, __FILE__, (uint32_t)__LINE__, BGR32(color) }; __attribute((cleanup(__prof_zone_cleanup_func))) TracyCZoneCtx __tracy_zone_ctx = ___tracy_emit_zone_begin( &CAT(__tracy_source_location,__LINE__), true )
#define __profn(name) __profnc(name, 0)
#define __prof __profnc(NULL, 0)
#define __profvalue(v) TracyCZoneValue(__tracy_zone_ctx, (v))
#define __profalloc(ptr, size) TracyCAlloc((ptr), (size))
#define __proffree(ptr) TracyCFree((ptr))
#define __profmsg(txt, len, col) TracyCMessageC((txt), (len), BGR32(col))
@ -59,8 +60,11 @@ enum __prof_plot_type {
#define PROFILING_LOCKS 0
#define PROFILING_D3D 0
#define __profnc(name, color)
#define __profn(name)
#define __prof
#define __profscope(name)
#define __profvalue(v)
#define __profalloc(ptr, size)
#define __proffree(ptr)
#define __profmsg(txt, len, col)
@ -105,25 +109,25 @@ enum __prof_plot_type {
#if PROFILING_D3D
/* Dx11 */
INLINE void __prof_dx11_zone_cleanup_func(TracyCD3D11ZoneCtx *ctx) { ___tracy_d3d11_emit_zone_end(*ctx); }
# define __profscope_dx11(dx11_ctx, name, color) static const struct ___tracy_source_location_data CAT(__tracy_gpu_d3d11_source_location,__LINE__) = { #name, __func__, __FILE__, (uint32_t)__LINE__, BGR32(color) }; __attribute((cleanup(__prof_dx11_zone_cleanup_func))) TracyCD3D11ZoneCtx __tracy_d3d11_zone_ctx; ___tracy_d3d11_emit_zone_begin( dx11_ctx, &__tracy_d3d11_zone_ctx, &CAT(__tracy_gpu_d3d11_source_location,__LINE__), true)
# define __profnc_dx11(dx11_ctx, name, color) static const struct ___tracy_source_location_data CAT(__tracy_gpu_d3d11_source_location,__LINE__) = { name, __func__, __FILE__, (uint32_t)__LINE__, BGR32(color) }; __attribute((cleanup(__prof_dx11_zone_cleanup_func))) TracyCD3D11ZoneCtx __tracy_d3d11_zone_ctx; ___tracy_d3d11_emit_zone_begin( dx11_ctx, &__tracy_d3d11_zone_ctx, &CAT(__tracy_gpu_d3d11_source_location,__LINE__), true)
# define __prof_dx11_ctx(name) struct TracyCD3D11Ctx *name
# define __prof_dx11_ctx_alloc(ctx, device, device_ctx, name, name_len) ctx = ___tracy_d3d11_context_announce(device, device_ctx, name, name_len)
# define __prof_dx11_ctx_release(ctx) ___tracy_d3d11_context_terminate(ctx)
# define __prof_dx11_collect(ctx) ___tracy_d3d11_context_collect(ctx)
/* Dx12 */
INLINE void __prof_dx12_zone_cleanup_func(TracyCD3D12ZoneCtx *ctx) { ___tracy_d3d12_emit_zone_end(*ctx); }
# define __profscope_dx12(dx12_ctx, cmd_list, name, color) static const struct ___tracy_source_location_data CAT(__tracy_gpu_d3d12_source_location,__LINE__) = { #name, __func__, __FILE__, (uint32_t)__LINE__, BGR32(color) }; __attribute((cleanup(__prof_dx12_zone_cleanup_func))) TracyCD3D12ZoneCtx __tracy_d3d12_zone_ctx; ___tracy_d3d12_emit_zone_begin( dx12_ctx, cmd_list, &__tracy_d3d12_zone_ctx, &CAT(__tracy_gpu_d3d12_source_location,__LINE__), true)
# define __profnc_dx12(dx12_ctx, cmd_list, name, color) static const struct ___tracy_source_location_data CAT(__tracy_gpu_d3d12_source_location,__LINE__) = { name, __func__, __FILE__, (uint32_t)__LINE__, BGR32(color) }; __attribute((cleanup(__prof_dx12_zone_cleanup_func))) TracyCD3D12ZoneCtx __tracy_d3d12_zone_ctx; ___tracy_d3d12_emit_zone_begin( dx12_ctx, cmd_list, &__tracy_d3d12_zone_ctx, &CAT(__tracy_gpu_d3d12_source_location,__LINE__), true)
# define __prof_dx12_ctx(name) struct TracyCD3D12Ctx *name
# define __prof_dx12_ctx_alloc(ctx, device, queue, name, name_len) ctx = ___tracy_d3d12_context_announce(device, queue, name, name_len)
# define __prof_dx12_ctx_release(ctx) ___tracy_d3d12_context_terminate(ctx)
# define __prof_dx12_new_frame(ctx) ___tracy_d3d12_context_new_frame(ctx)
# define __prof_dx12_collect(ctx) ___tracy_d3d12_context_collect(ctx)
#else
# define __profscope_dx11(dx11_ctx, name, color)
# define __profnc_dx11(dx11_ctx, name, color)
# define __prof_dx11_ctx_alloc(ctx, device, device_ctx, name, name_len)
# define __prof_dx11_ctx_release(ctx)
# define __prof_dx11_collect(ctx)
# define __profscope_dx12(dx11_ctx, queue, name, color)
# define __profnc_dx12(dx11_ctx, queue, name, color)
# define __prof_dx12_ctx_alloc(ctx, device, queue, name, name_len)
# define __prof_dx12_ctx_release(ctx)
# define __prof_dx12_new_frame(ctx)
@ -137,7 +141,7 @@ INLINE void __prof_dx12_zone_cleanup_func(TracyCD3D12ZoneCtx *ctx) { ___tracy_d3
#endif /* PROFILING_CAPTURE_FRAME_IMAGE */
#ifdef TRACY_FIBERS
/* Tracy fiber methods are wrapped in NO_INLINE because otherwise issues arise
/* Tracy fiber methods are wrapped in NO_INLINE because otherwise issues can arise
* accross fiber context boundaries during optimization */
NO_INLINE INLINE void __prof_fiber_enter(char *fiber_name, i32 profiler_group) { TracyCFiberEnterWithHint(fiber_name, profiler_group); }
NO_INLINE INLINE void __prof_fiber_leave(void) { TracyCFiberLeave; }

View File

@ -241,10 +241,10 @@ INTERNAL SYS_THREAD_DEF(resource_watch_dispatcher_thread_entry_point, _)
while (!atomic_i32_fetch(&G.watch_shutdown)) {
sys_condition_variable_wait(G.watch_dispatcher_cv, &watch_dispatcher_lock);
if (!atomic_i32_fetch(&G.watch_shutdown) && G.watch_dispatcher_info_arena->pos > 0) {
__profscope(Dispatch resource watch callbacks);
__profn("Dispatch resource watch callbacks");
/* Unlock and sleep a bit so duplicate events pile up */
{
__profscope(Delay);
__profn("Delay");
sys_mutex_unlock(&watch_dispatcher_lock);
sys_sleep(WATCH_DISPATCHER_DELAY_SECONDS);
watch_dispatcher_lock = sys_mutex_lock_e(G.watch_dispatcher_mutex);
@ -275,7 +275,7 @@ INTERNAL SYS_THREAD_DEF(resource_watch_dispatcher_thread_entry_point, _)
{
struct dict *dedup_dict = dict_init(temp.arena, WATCH_DISPATCHER_DEDUP_DICT_BINS);
for (struct sys_watch_info *info = watch_info_list.first; info; info = info->next) {
__profscope(Dispatch);
__profn("Dispatch");
/* Do not run callbacks for the same file more than once */
b32 skip = false;
u64 hash = hash_fnv64(HASH_FNV64_BASIS, info->name);

View File

@ -649,7 +649,7 @@ struct sim_snapshot *sim_snapshot_alloc_from_lerp(struct sim_client *client, str
/* Blend entities */
{
__profscope(Lerp snapshot entities);
__profn("Lerp snapshot entities");
u64 num_entities = min_u64(ss0->num_ents_reserved, ss1->num_ents_reserved);
for (u64 i = 0; i < num_entities; ++i) {
struct sim_ent *e = &ss->ents[i];

View File

@ -427,7 +427,7 @@ INTERNAL struct sprite_sheet init_sheet_from_ase_result(struct arena *arena, str
/* Init frames */
{
__profscope(Init frames);
__profn("Init frames");
sheet.image_size = ase.image_size;
sheet.frame_size = ase.frame_size;
sheet.frames = arena_push_array(arena, struct sprite_sheet_frame, ase.num_frames);
@ -449,7 +449,7 @@ INTERNAL struct sprite_sheet init_sheet_from_ase_result(struct arena *arena, str
/* Init spans */
sheet.spans_count = ase.num_spans;
if (ase.num_spans > 0) {
__profscope(Init spans);
__profn("Init spans");
sheet.spans = arena_push_array(arena, struct sprite_sheet_span, sheet.spans_count);
sheet.spans_dict = dict_init(arena, (u64)(ase.num_spans * SHEET_SPAN_LOOKUP_TABLE_BIN_RATIO));
u64 index = 0;
@ -467,7 +467,7 @@ INTERNAL struct sprite_sheet init_sheet_from_ase_result(struct arena *arena, str
/* Init slices */
if (ase.num_slice_keys > 0) {
__profscope(Init slices);
__profn("Init slices");
struct arena_temp scratch = scratch_begin(arena);
struct temp_ase_slice_key_node {
@ -1241,7 +1241,7 @@ INTERNAL SYS_JOB_DEF(sprite_evictor_job, _)
/* Scan for evictable nodes */
b32 cache_over_budget_threshold = atomic_u64_fetch(&G.cache.memory_usage) > CACHE_MEMORY_BUDGET_THRESHOLD;
if (cache_over_budget_threshold || RESOURCE_RELOADING) {
__profscope(Evictor scan);
__profn("Evictor scan");
for (u64 i = 0; i < CACHE_BINS_COUNT; ++i) {
struct cache_bin *bin = &G.cache.bins[i];
struct sys_lock bin_lock = sys_mutex_lock_s(bin->mutex);
@ -1282,14 +1282,14 @@ INTERNAL SYS_JOB_DEF(sprite_evictor_job, _)
/* Sort evict nodes */
{
__profscope(Evictor sort);
__profn("Evictor sort");
merge_sort(evict_array, evict_array_count, sizeof(*evict_array), evict_sort, NULL);
}
/* Remove evictable nodes from cache until under budget */
struct evict_node *first_evicted = NULL;
{
__profscope(Evictor cache removal);
__profn("Evictor cache removal");
b32 stop_evicting = false;
for (u64 i = 0; i < evict_array_count && !stop_evicting; ++i) {
struct evict_node *en = &evict_array[i];
@ -1335,7 +1335,7 @@ INTERNAL SYS_JOB_DEF(sprite_evictor_job, _)
if (first_evicted) {
/* Release evicted node memory */
{
__profscope(Evictor memory release);
__profn("Evictor memory release");
for (struct evict_node *en = first_evicted; en; en = en->next_evicted) {
struct cache_entry *n = en->cache_entry;
if (n->kind == CACHE_ENTRY_KIND_TEXTURE && n->texture->valid) {
@ -1347,7 +1347,7 @@ INTERNAL SYS_JOB_DEF(sprite_evictor_job, _)
/* Add evicted nodes to free list */
{
__profscope(Evictor free list append);
__profn("Evictor free list append");
struct sys_lock pool_lock = sys_mutex_lock_e(G.cache.entry_pool_mutex);
for (struct evict_node *en = first_evicted; en; en = en->next_evicted) {
struct cache_entry *n = en->cache_entry;

View File

@ -403,7 +403,7 @@ INTERNAL void job_fiber_yield(struct fiber *fiber, struct fiber *parent_fiber);
void sys_wait(void *addr, void *cmp, u32 size)
{
//__prof;
__prof;
#if 0
WaitOnAddress(addr, cmp, size, INFINITE);
#else
@ -756,7 +756,7 @@ INTERNAL void job_fiber_entry(void *id_ptr)
while (true) {
/* Run job */
{
//__profscope(Run job);
__profn("Run job");
volatile struct yield_param *yield_param = fiber->yield_param;
yield_param->kind = YIELD_KIND_NONE;
struct sys_job_data data = ZI;
@ -783,10 +783,23 @@ INTERNAL void job_fiber_entry(void *id_ptr)
INTERNAL SYS_THREAD_DEF(worker_entry, worker_ctx_arg)
{
__prof;
struct worker_ctx *ctx = worker_ctx_arg;
(UNUSED)ctx;
{
HANDLE thread_handle = GetCurrentThread();
b32 success = false;
(UNUSED)success;
i32 priority = THREAD_PRIORITY_TIME_CRITICAL;
success = SetThreadPriority(thread_handle, priority);
ASSERT(success);
u64 affinity_mask = 1 << (ctx->id * 2);
success = !!SetThreadAffinityMask(thread_handle, affinity_mask);
ASSERT(success);
}
i32 worker_fiber_id = sys_current_fiber_id();
struct job_queue *queues[countof(G.job_queues)] = ZI;
@ -806,7 +819,7 @@ INTERNAL SYS_THREAD_DEF(worker_entry, worker_ctx_arg)
void *job_sig = 0;
struct counter *job_counter = 0;
{
//__profscope(Pull job);
//__profnc("Pull job", RGB32_F(0.75, 0.75, 0));
for (u32 queue_index = 0; queue_index < countof(queues) && !job_func; ++queue_index) {
struct job_queue *queue = queues[queue_index];
if (queue) {
@ -868,10 +881,12 @@ INTERNAL SYS_THREAD_DEF(worker_entry, worker_ctx_arg)
/* Run fiber */
if (job_func) {
//__profscope(Run fiber);
if (!job_fiber) {
job_fiber = fiber_alloc(FIBER_KIND_JOB_WORKER);
}
{
__profnc("Run fiber", RGB32_F(0.25, 0.75, 0));
__profvalue(job_fiber->id);
struct yield_param yield = ZI;
job_fiber->job_func = job_func;
job_fiber->job_sig = job_sig;
@ -987,6 +1002,7 @@ INTERNAL SYS_THREAD_DEF(worker_entry, worker_ctx_arg)
}
}
}
}
/* ========================== *
* Test entry
@ -1042,7 +1058,7 @@ struct sys_scratch_ctx *sys_scratch_ctx_from_fiber_id(i32 id)
struct fiber_ctx *fiber_ctx = fiber_ctx_from_id(id);
struct sys_scratch_ctx *scratch_ctx = &fiber_ctx->scratch_ctx;
if (!scratch_ctx->arenas[0]) {
//__profscope(Initialize scratch context);
__profn("Initialize scratch context");
for (u32 i = 0; i < countof(scratch_ctx->arenas); ++i) {
scratch_ctx->arenas[i] = arena_alloc(GIGABYTE(64));
}
@ -1323,7 +1339,7 @@ struct sys_file sys_file_open_read_wait(struct string path)
HANDLE handle;
while ((handle = CreateFileW(path_wstr, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL)) == INVALID_HANDLE_VALUE) {
if (GetLastError() == ERROR_SHARING_VIOLATION) {
__profscope(File share conflict delay);
__profn("File share conflict delay");
Sleep(delay_ms);
if (delay_ms < 1024) {
delay_ms *= 2;
@ -1869,7 +1885,7 @@ INTERNAL SYS_THREAD_DEF(window_thread_entry_point, arg)
GetMessageW(&msg, 0, 0, 0);
}
{
__profscope(Process window message);
__profn("Process window message");
if (atomic_i32_fetch(&window->event_thread_shutdown)) {
break;
}
@ -3035,7 +3051,7 @@ INTERNAL void win32_precise_sleep_timer(HANDLE timer, f64 seconds)
i64 max_ticks = (i64)scheduler_period_ms * 9500;
while (true) {
__profscope(Sleep part);
__profn("Sleep part");
/* Break sleep up into parts that are lower than scheduler period */
f64 remaining_seconds = (f64)(target_qpc - qpc.QuadPart) / (f64)qpc_per_second;
i64 sleep_ticks = (i64)((remaining_seconds - tolerance) * 10000000);
@ -3051,7 +3067,7 @@ INTERNAL void win32_precise_sleep_timer(HANDLE timer, f64 seconds)
/* Spin for any remaining time */
{
__profscope(Sleep spin);
__profn("Sleep spin");
while (qpc.QuadPart < target_qpc) {
YieldProcessor();
QueryPerformanceCounter(&qpc);
@ -3079,14 +3095,14 @@ INTERNAL void win32_precise_sleep_legacy(f64 seconds)
f64 sleep_ms = (seconds * 1000) - tolerance;
i32 sleep_slices = (i32)(sleep_ms / scheduler_period_ms);
if (sleep_slices > 0) {
__profscope(Legacy sleep part);
__profn("Legacy sleep part");
Sleep((DWORD)sleep_slices * scheduler_period_ms);
}
QueryPerformanceCounter(&qpc);
/* Spin for any remaining time */
{
__profscope(Legacy sleep spin);
__profn("Legacy sleep spin");
while (qpc.QuadPart < target_qpc) {
YieldProcessor();
QueryPerformanceCounter(&qpc);
@ -3162,7 +3178,7 @@ int CALLBACK wWinMain(_In_ HINSTANCE instance, _In_opt_ HINSTANCE prev_instance,
#if PROFILING
{
__profscope(Launch profiler);
__profn("Launch profiler");
STARTUPINFO si = ZI;
si.cb = sizeof(si);
PROCESS_INFORMATION pi = ZI;

View File

@ -174,7 +174,7 @@ struct ttf_decode_result ttf_decode(struct arena *arena, struct string encoded,
u32 out_offset_y = 0;
u32 row_height = 0;
{
__profscope(Build atlas);
__profn("Build atlas");
for (u16 i = 0; i < glyph_count; ++i) {
/* Render glyph to target */
DWRITE_GLYPH_RUN glyph_run = ZI;

View File

@ -1147,7 +1147,7 @@ INTERNAL void user_update(void)
{
/* Copy valid entities */
{
__profscope(Build ents list for sorting);
__profn("Build ents list for sorting");
for (u64 ent_index = 0; ent_index < G.ss_blended->num_ents_reserved; ++ent_index) {
struct sim_ent *ent = &G.ss_blended->ents[ent_index];
if (sim_ent_is_valid_and_active(ent)) {
@ -1158,7 +1158,7 @@ INTERNAL void user_update(void)
}
/* Sort */
{
__profscope(Sort ents);
__profn("Sort ents");
merge_sort(sorted, sorted_count, sizeof(*sorted), ent_draw_order_cmp, NULL);
}
}
@ -1168,7 +1168,7 @@ INTERNAL void user_update(void)
* ========================== */
{
__profscope(Draw entities);
__profn("Draw entities");
for (u64 sorted_index = 0; sorted_index < sorted_count; ++sorted_index) {
struct sim_ent *ent = sorted[sorted_index];
if (!sim_ent_is_valid_and_active(ent)) continue;
@ -1694,7 +1694,7 @@ INTERNAL void user_update(void)
/* Draw crosshair or show cursor */
if (!G.debug_camera) {
__profscope(Draw crosshair);
__profn("Draw crosshair");
struct v2 crosshair_pos = G.user_cursor;
struct sprite_tag crosshair = sprite_tag_from_path(LIT("sprite/crosshair.ase"));
struct sprite_texture *t = sprite_texture_from_tag_async(sprite_frame_scope, crosshair);
@ -1706,7 +1706,7 @@ INTERNAL void user_update(void)
/* FIXME: Enable this */
#if 0
{
__profscope(Update window cursor);
__profn("Update window cursor");
if (G.debug_camera) {
sys_window_cursor_disable_clip(G.window);
sys_window_cursor_show(G.window);
@ -1924,7 +1924,7 @@ INTERNAL void user_update(void)
* ========================== */
if (G.debug_draw) {
__profscope(Draw debug info);
__profn("Draw debug info");
struct font *font = font_load_async(LIT("font/fixedsys.ttf"), 12.0f);
if (font) {
struct arena_temp temp = arena_temp_begin(scratch.arena);
@ -2047,7 +2047,7 @@ INTERNAL void user_update(void)
* ========================== */
{
__profscope(Render);
__profn("Render");
struct rect user_viewport = RECT_FROM_V2(V2(0, 0), G.user_size);
struct v2i32 user_resolution = v2_round_to_int(user_viewport.size);
@ -2107,7 +2107,7 @@ INTERNAL SYS_JOB_DEF(user_job, _)
while (!atomic_i32_fetch(&G.shutdown)) {
{
__profscope(User sleep);
__profn("User sleep");
sleep_frame(last_frame_ns, target_dt_ns);
}
last_frame_ns = sys_time_ns();
@ -2268,11 +2268,11 @@ INTERNAL SYS_JOB_DEF(local_sim_job, _)
while (!atomic_i32_fetch(&G.shutdown)) {
struct arena_temp scratch = scratch_begin_no_conflict();
{
__profscope(Sim sleep);
__profn("Sim sleep");
sleep_frame(real_time_ns, step_dt_ns * compute_timescale);
}
{
__profscope(Sim update);
__profn("Sim update");
real_dt_ns = sys_time_ns() - real_time_ns;
real_time_ns += real_dt_ns;