From a9bcab1b78f7f53a4aa09faee3cbf9e18eece480 Mon Sep 17 00:00:00 2001
From: jacob <jacob@cagori.com>
Date: Sun, 6 Jul 2025 14:37:17 -0500
Subject: [PATCH] allow color & values for profiling

---
 src/app.c             |   4 +-
 src/arena.c           |   2 +-
 src/ase.c             |   2 +-
 src/gp_dx12.c         |  52 ++++-----
 src/host.c            |   8 +-
 src/log.c             |   2 +-
 src/mixer.c           |  10 +-
 src/phys.c            |   4 +-
 src/playback_wasapi.c |   2 +-
 src/prof_tracy.h      |  28 ++---
 src/resource.c        |   6 +-
 src/sim.c             |   2 +-
 src/sprite.c          |  16 +--
 src/sys_win32.c       | 242 ++++++++++++++++++++++--------------------
 src/ttf_dwrite.cpp    |   2 +-
 src/user.c            |  20 ++--
 16 files changed, 211 insertions(+), 191 deletions(-)

diff --git a/src/app.c b/src/app.c
index f1cc9754..d380b758 100644
--- a/src/app.c
+++ b/src/app.c
@@ -324,7 +324,7 @@ void sys_app_entry(struct string args_str)
      * forcing process exit (to prevent process hanging in the background
      * if something gets stuck) */
     {
-        __profscope(Run exit callbacks);
+        __profn("Run exit callbacks");
         struct sys_lock lock = sys_mutex_lock_e(G.exit_callbacks_mutex);
         for (struct exit_callback *callback = G.exit_callbacks_head; callback; callback = callback->next) {
             callback->func();
@@ -334,7 +334,7 @@ void sys_app_entry(struct string args_str)
 
     /* Write window settings to file */
     {
-        __profscope(Write settings file);
+        __profn("Write settings file");
         struct arena_temp temp = arena_temp_begin(scratch.arena);
 
         struct string window_settings_path = app_write_path_cat(temp.arena, settings_file_name);
diff --git a/src/arena.c b/src/arena.c
index 6eb81108..9ed6da6d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -76,7 +76,7 @@ void *arena_push_bytes_no_zero(struct arena *arena, u64 size, u64 align)
 
         u64 new_pos = aligned_start_pos + size;
         if (new_pos > arena->committed) {
-            __profscope(Arena commit);
+            __profn("Arena commit");
             /* Commit new block(s) */
             u64 blocks_needed = (new_pos - arena->committed + ARENA_BLOCK_SIZE - 1) / ARENA_BLOCK_SIZE;
             u64 commit_bytes = blocks_needed * ARENA_BLOCK_SIZE;
diff --git a/src/ase.c b/src/ase.c
index 3c98a275..d0841a07 100644
--- a/src/ase.c
+++ b/src/ase.c
@@ -731,7 +731,7 @@ struct ase_decode_image_result ase_decode_image(struct arena *arena, struct stri
     }
 
     {
-        __profscope(Build image from cels);
+        __profn("Build image from cels");
 
         /* Assemble image from cels */
         for (struct cel *cel = cel_head; cel; cel = cel->next) {
diff --git a/src/gp_dx12.c b/src/gp_dx12.c
index 6a63c44f..21a1cd32 100644
--- a/src/gp_dx12.c
+++ b/src/gp_dx12.c
@@ -534,7 +534,7 @@ INTERNAL void dx12_init_device(void)
     /* Enable stable power state */
     {
         b32 success = true;
-        __profscope(Set stable power state);
+        __profn("Set stable power state");
         HKEY key = 0;
         success = RegOpenKeyExW(HKEY_LOCAL_MACHINE, L"SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\AppModelUnlock", 0, KEY_READ, &key) == ERROR_SUCCESS;
         if (success) {
@@ -942,7 +942,7 @@ INTERNAL SYS_JOB_DEF(pipeline_init_job, job)
          * root signature exists and matches between shaders. */
         ID3D10Blob *rootsig_blob = NULL;
         if (success) {
-            __profscope(Validate root signatures);
+            __profn("Validate root signatures");
             char *vs_rootsig_data = NULL;
             char *ps_rootsig_data = NULL;
             u32 vs_rootsig_data_len = 0;
@@ -979,7 +979,7 @@ INTERNAL SYS_JOB_DEF(pipeline_init_job, job)
         /* Create root signature */
         ID3D12RootSignature *rootsig = NULL;
         if (success) {
-            __profscope(Create root signature);
+            __profn("Create root signature");
             hr = ID3D12Device_CreateRootSignature(G.device, 0, ID3D10Blob_GetBufferPointer(rootsig_blob), ID3D10Blob_GetBufferSize(rootsig_blob), &IID_ID3D12RootSignature, (void **)&rootsig);
             if (FAILED(hr)) {
                 error_str = LIT("Failed to create root signature");
@@ -991,7 +991,7 @@ INTERNAL SYS_JOB_DEF(pipeline_init_job, job)
         ID3D12PipelineState *pso = NULL;
         if (success) {
             /* Default rasterizer state */
-            __profscope(Create PSO);
+            __profn("Create PSO");
             D3D12_RASTERIZER_DESC raster_desc = {
                 .FillMode = D3D12_FILL_MODE_SOLID,
                 .CullMode = D3D12_CULL_MODE_NONE,
@@ -1845,7 +1845,7 @@ INTERNAL u64 command_list_close(struct command_list *cl)
 
     /* Close */
     {
-        __profscope(Close DX12 command list);
+        __profn("Close DX12 command list");
         HRESULT hr = ID3D12GraphicsCommandList_Close(cl->cl);
         if (FAILED(hr)) {
             /* TODO: Don't panic */
@@ -1856,7 +1856,7 @@ INTERNAL u64 command_list_close(struct command_list *cl)
     /* Submit */
     u64 submit_fence_target = 0;
     {
-        __profscope(Execute);
+        __profn("Execute");
         struct sys_lock submit_lock = sys_mutex_lock_s(G.global_submit_mutex);
         struct sys_lock fence_lock = sys_mutex_lock_e(cq->submit_fence_mutex);
         {
@@ -2318,7 +2318,7 @@ struct gp_resource *gp_texture_alloc(enum gp_texture_format format, u32 flags, s
         struct command_queue *cq = G.command_queues[DX12_QUEUE_COPY_BACKGROUND];
         struct command_list *cl = command_list_open(cq->cl_pool);
         {
-            __profscope_dx12(cl->cq->prof, cl->cl, Upload texture, RGB32_F(0.2, 0.5, 0.2));
+            __profnc_dx12(cl->cq->prof, cl->cl, "Upload texture", RGB32_F(0.2, 0.5, 0.2));
             D3D12_TEXTURE_COPY_LOCATION dst_loc = {
                 .pResource = r->resource,
                 .Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
@@ -2340,7 +2340,7 @@ struct gp_resource *gp_texture_alloc(enum gp_texture_format format, u32 flags, s
         /* Wait */
         /* TODO: Return async waitable to caller */
         {
-            __profscope(Wait for upload);
+            __profn("Wait for upload");
             HANDLE event = CreateEvent(NULL, false, false, NULL);
             ID3D12Fence_SetEventOnCompletion(cq->submit_fence, fence_target, event);
             WaitForSingleObject(event, INFINITE);
@@ -2383,7 +2383,7 @@ void gp_dispatch(struct gp_dispatch_params params)
     struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT];
     struct command_list *cl = command_list_open(cq->cl_pool);
     {
-        __profscope_dx12(cl->cq->prof, cl->cl, Dispatch, RGB32_F(0.5, 0.2, 0.2));
+        __profnc_dx12(cl->cq->prof, cl->cl, "Dispatch", RGB32_F(0.5, 0.2, 0.2));
         struct mat4x4 vp_matrix = calculate_vp(params.draw_target_view, params.draw_target_viewport.width, params.draw_target_viewport.height);
 
         /* Upload dummmy vert & index buffer */
@@ -2397,11 +2397,11 @@ void gp_dispatch(struct gp_dispatch_params params)
         struct sh_material_instance *material_instances = arena_push_array_no_zero(scratch.arena, struct sh_material_instance, flow->num_material_instance_descs);
         struct sh_material_grid *grids = arena_push_array_no_zero(scratch.arena, struct sh_material_grid, flow->num_material_grid_descs);
         {
-            __profscope(Process flow data);
+            __profn("Process flow data");
 
             /* Process material instances */
             {
-                __profscope(Process material instances);
+                __profn("Process material instances");
                 for (u32 i = 0; i < flow->num_material_instance_descs; ++i) {
                     struct material_instance_desc *desc = &((struct material_instance_desc *)arena_base(flow->material_instance_descs_arena))[i];
                     struct sh_material_instance *instance = &material_instances[i];
@@ -2427,7 +2427,7 @@ void gp_dispatch(struct gp_dispatch_params params)
 
             /* Process grids */
             {
-                __profscope(Process grids);
+                __profn("Process grids");
                 for (u32 i = 0; i < flow->num_material_grid_descs; ++i) {
                     struct material_grid_desc *desc = &((struct material_grid_desc *)arena_base(flow->material_grid_descs_arena))[i];
                     struct sh_material_grid *grid = &grids[i];
@@ -2464,8 +2464,8 @@ void gp_dispatch(struct gp_dispatch_params params)
 
         /* Material pass */
         if (material_pipeline->success) {
-            __profscope(Material pass);
-            __profscope_dx12(cl->cq->prof, cl->cl, Material pass, RGB32_F(0.5, 0.2, 0.2));
+            __profn("Material pass");
+            __profnc_dx12(cl->cq->prof, cl->cl, "Material pass", RGB32_F(0.5, 0.2, 0.2));
 
             /* Bind pipeline */
             ID3D12GraphicsCommandList_SetPipelineState(cl->cl, material_pipeline->pso);
@@ -2505,8 +2505,8 @@ void gp_dispatch(struct gp_dispatch_params params)
 
         /* Shape pass */
         if (shape_pipeline->success) {
-            __profscope(Shape pass);
-            __profscope_dx12(cl->cq->prof, cl->cl, Shape pass, RGB32_F(0.5, 0.2, 0.2));
+            __profn("Shape pass");
+            __profnc_dx12(cl->cq->prof, cl->cl, "Shape pass", RGB32_F(0.5, 0.2, 0.2));
 
             /* Bind pipeline */
             ID3D12GraphicsCommandList_SetPipelineState(cl->cl, shape_pipeline->pso);
@@ -2696,7 +2696,7 @@ INTERNAL void present_blit(struct swapchain_buffer *dst, struct dx12_resource *s
         struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT];
         struct command_list *cl = command_list_open(cq->cl_pool);
         {
-            __profscope_dx12(cl->cq->prof, cl->cl, Blit, RGB32_F(0.5, 0.2, 0.2));
+            __profnc_dx12(cl->cq->prof, cl->cl, "Blit", RGB32_F(0.5, 0.2, 0.2));
             struct swapchain *swapchain = dst->swapchain;
 
             /* Upload dummmy vert & index buffer */
@@ -2804,7 +2804,7 @@ void gp_present(struct sys_window *window, struct v2i32 backresolution, struct g
     /* Present */
     /* FIXME: Resource barrier */
     {
-        __profscope(Present);
+        __profn("Present");
         HRESULT hr = IDXGISwapChain3_Present(swapchain->swapchain, vsync, present_flags);
         if (!SUCCEEDED(hr)) {
             ASSERT(false);
@@ -2814,7 +2814,7 @@ void gp_present(struct sys_window *window, struct v2i32 backresolution, struct g
 
 #if PROFILING_D3D
     {
-        __profscope(Mark queue frames);
+        __profn("Mark queue frames");
         /* Lock because frame marks shouldn't occur while command lists are recording */
         struct sys_lock lock = sys_mutex_lock_e(G.global_command_list_record_mutex);
         for (u32 i = 0; i < countof(G.command_queues); ++i) {
@@ -2824,7 +2824,7 @@ void gp_present(struct sys_window *window, struct v2i32 backresolution, struct g
         sys_mutex_unlock(&lock);
     }
     {
-        __profscope(Collect queues);
+        __profn("Collect queues");
         for (u32 i = 0; i < countof(G.command_queues); ++i) {
             struct command_queue *cq = G.command_queues[i];
             __prof_dx12_collect(cq->prof);
@@ -2854,7 +2854,7 @@ INTERNAL SYS_THREAD_DEF(evictor_thread_entry_point, arg)
     while (!shutdown) {
         struct arena_temp temp = arena_temp_begin(scratch.arena);
         {
-            __profscope(Run);
+            __profn("Run");
 
             u64 targets[countof(completed_targets)] = ZI;
 
@@ -2862,7 +2862,7 @@ INTERNAL SYS_THREAD_DEF(evictor_thread_entry_point, arg)
             u32 num_fenced_releases = 0;
             struct fenced_release_data *fenced_releases = NULL;
             {
-                __profscope(Copy queued releases);
+                __profn("Copy queued releases");
                 struct sys_lock lock = sys_mutex_lock_e(G.fenced_releases_mutex);
                 num_fenced_releases = G.fenced_releases_arena->pos / sizeof(struct fenced_release_data);
                 fenced_releases = arena_push_array_no_zero(temp.arena, struct fenced_release_data, num_fenced_releases);
@@ -2874,7 +2874,7 @@ INTERNAL SYS_THREAD_DEF(evictor_thread_entry_point, arg)
 
             /* Wait until fences reach target */
             {
-                __profscope(Check fences);
+                __profn("Check fences");
                 for (u32 i = 0; i < countof(targets) && !shutdown; ++i) {
                     while (completed_targets[i] < targets[i] && !shutdown) {
                         struct command_queue *cq = G.command_queues[i];
@@ -2882,7 +2882,7 @@ INTERNAL SYS_THREAD_DEF(evictor_thread_entry_point, arg)
                         if (completed_targets[i] < targets[i]) {
                             ID3D12Fence_SetEventOnCompletion(cq->submit_fence, targets[i], event);
                             {
-                                __profscope(Wait on fence);
+                                __profn("Wait on fence");
                                 WaitForMultipleObjects(2, events, false, INFINITE);
                                 shutdown = atomic_i32_fetch(&G.evictor_thread_shutdown);
                             }
@@ -2893,7 +2893,7 @@ INTERNAL SYS_THREAD_DEF(evictor_thread_entry_point, arg)
 
             /* Process releases */
             for (u32 i = 0; i < num_fenced_releases; ++i) {
-                __profscope(Release);
+                __profn("Release");
                 struct fenced_release_data *fr = &fenced_releases[i];
                 switch (fr->kind) {
                     default:
@@ -2918,7 +2918,7 @@ INTERNAL SYS_THREAD_DEF(evictor_thread_entry_point, arg)
         }
         arena_temp_end(temp);
         {
-            __profscope(Sleep);
+            __profn("Sleep");
             WaitForSingleObject(G.evictor_thread_wake_event, INFINITE);
             shutdown = atomic_i32_fetch(&G.evictor_thread_shutdown);
         }
diff --git a/src/host.c b/src/host.c
index 37af1dca..ea7c2b8e 100644
--- a/src/host.c
+++ b/src/host.c
@@ -652,7 +652,7 @@ struct host_event_list host_update_begin(struct arena *arena, struct host *host)
     i64 now_ns = sys_time_ns();
 
     {
-        __profscope(Read host packets);
+        __profn("Read host packets");
         struct string read_buff = ZI;
         read_buff.len = PACKET_DATA_MAX_LEN;
         read_buff.text = arena_push_array_no_zero(scratch.arena, u8, read_buff.len);
@@ -828,7 +828,7 @@ struct host_event_list host_update_begin(struct arena *arena, struct host *host)
 
     /* Update channels */
     {
-        __profscope(Update host channels);
+        __profn("Update host channels");
         for (u64 i = 0; i < host->num_channels_reserved; ++i) {
             struct host_channel *channel = &host->channels[i];
             if (channel->valid) {
@@ -902,7 +902,7 @@ void host_update_end(struct host *host)
     /* Process cmds into sendable packets */
     /* TODO: Unreliable packets don't need to be allocated into unreliable packet queue, should just send them and forget */
     {
-        __profscope(Process host cmds);
+        __profn("Process host cmds");
         for (struct host_cmd *cmd = host->first_cmd; cmd; cmd = cmd->next) {
             enum host_cmd_kind kind = cmd->kind;
             struct host_channel_id channel_id = cmd->channel_id;
@@ -1017,7 +1017,7 @@ void host_update_end(struct host *host)
     /* Send packets */
     /* TODO: Aggregate small packets */
     {
-        __profscope(Send host packets);
+        __profn("Send host packets");
         for (u64 i = 0; i < host->num_channels_reserved; ++i) {
             struct sock *sock = host->sock;
             struct host_channel *channel = &host->channels[i];
diff --git a/src/log.c b/src/log.c
index 70d20e2f..7c29049e 100644
--- a/src/log.c
+++ b/src/log.c
@@ -216,7 +216,7 @@ void _log(i32 level, struct string msg)
         struct sys_lock lock = sys_mutex_lock_s(G.callbacks_mutex);
         for (struct log_event_callback *callback = G.first_callback; callback; callback = callback->next) {
             if (level <= callback->level) {
-                __profscope(Run log callback);
+                __profn("Run log callback");
                 callback->func(event);
             }
         }
diff --git a/src/mixer.c b/src/mixer.c
index f1de21d7..3ffec65b 100644
--- a/src/mixer.c
+++ b/src/mixer.c
@@ -290,7 +290,7 @@ struct mixed_pcm_f32 mixer_update(struct arena *arena, u64 frame_count)
         /* Update & read mixes */
         mixes = arena_push_array_no_zero(scratch.arena, struct mix *, G.track_playing_count);
         for (struct track *track = G.track_first_playing; track; track = track->next) {
-            __profscope(Prepare track);
+            __profn("Prepare track");
             struct mix *mix = &track->mix;
             mix->desc = track->desc;
             mixes[mixes_count++] = mix;
@@ -300,7 +300,7 @@ struct mixed_pcm_f32 mixer_update(struct arena *arena, u64 frame_count)
     }
 
     for (u64 mix_index = 0; mix_index < mixes_count; ++mix_index) {
-        __profscope(Mix track);
+        __profn("Mix track");
         struct mix *mix = mixes[mix_index];
 
         if (mix->source->pcm.count <= 0) {
@@ -353,7 +353,7 @@ struct mixed_pcm_f32 mixer_update(struct arena *arena, u64 frame_count)
 
         /* Transform 16 bit source -> 32 bit stereo at output duration */
         {
-            __profscope(Resample);
+            __profn("Resample");
             f32 *out_samples = mix_pcm.samples;
 
             u64 out_frames_count = mix_pcm.count / 2;
@@ -407,7 +407,7 @@ struct mixed_pcm_f32 mixer_update(struct arena *arena, u64 frame_count)
          * ========================== */
 
         if (desc.flags & MIXER_FLAG_SPATIALIZE) {
-            __profscope(Spatialize);
+            __profn("Spatialize");
 
             /* Algorithm constants */
             const f32 rolloff_height = 1.2f;
@@ -468,7 +468,7 @@ struct mixed_pcm_f32 mixer_update(struct arena *arena, u64 frame_count)
     }
 
     {
-        __profscope(Update track effect data);
+        __profn("Update track effect data");
         struct sys_lock lock = sys_mutex_lock_e(G.mutex);
         for (u64 i = 0; i < mixes_count; ++i) {
             struct mix *mix = mixes[i];
diff --git a/src/phys.c b/src/phys.c
index b33b5da0..09f92079 100644
--- a/src/phys.c
+++ b/src/phys.c
@@ -1254,7 +1254,7 @@ void phys_step(struct phys_step_ctx *ctx, f32 timestep)
 
     f32 remaining_dt = timestep;
     while (remaining_dt > 0) {
-        __profscope(Step part);
+        __profn("Step part");
         ++phys_iteration;
         struct arena_temp scratch = scratch_begin_no_conflict();
 
@@ -1282,7 +1282,7 @@ void phys_step(struct phys_step_ctx *ctx, f32 timestep)
 
         f32 substep_dt = step_dt / SIM_PHYSICS_SUBSTEPS;
         for (u32 i = 0; i < SIM_PHYSICS_SUBSTEPS; ++i) {
-            __profscope(Substep);
+            __profn("Substep");
 
             /* Warm start */
 #if SIM_PHYSICS_ENABLE_WARM_STARTING
diff --git a/src/playback_wasapi.c b/src/playback_wasapi.c
index b6a86c8b..1796329b 100644
--- a/src/playback_wasapi.c
+++ b/src/playback_wasapi.c
@@ -174,7 +174,7 @@ INTERNAL struct wasapi_buffer wasapi_update_begin(void)
 
     /* Wait */
     {
-        __profscope(wasapi_wait_on_event);
+        __profn("Wasapi wait");
         WaitForSingleObject(G.event, INFINITE);
     }
 
diff --git a/src/prof_tracy.h b/src/prof_tracy.h
index db007017..bc12e952 100644
--- a/src/prof_tracy.h
+++ b/src/prof_tracy.h
@@ -10,10 +10,10 @@
 #define PROFILING_SYSTEM_TRACE 0
 #define PROFILING_CAPTURE_FRAME_IMAGE 0
 #define PROFILING_LOCKS 0
-#define PROFILING_D3D 0
+#define PROFILING_D3D 1
 #define PROFILING_FILE_WSTR L".tracy"
-//#define PROFILING_CMD_WSTR L"cmd /C start \"\" /wait tracy-capture.exe -o .tracy -a 127.0.0.1 && start \"\" tracy-profiler.exe .tracy"
-#define PROFILING_CMD_WSTR L"tracy-profiler.exe -a 127.0.0.1"
+#define PROFILING_CMD_WSTR L"cmd /C start \"\" /wait tracy-capture.exe -o .tracy -a 127.0.0.1 && start \"\" tracy-profiler.exe .tracy"
+//#define PROFILING_CMD_WSTR L"tracy-profiler.exe -a 127.0.0.1"
 
 /* Tracy defines */
 #define TRACY_ENABLE
@@ -31,11 +31,12 @@
 #pragma clang diagnostic ignored "-Wincompatible-pointer-types-discards-qualifiers"
 #include TRACY_CLIENT_HEADER_PATH
 
-/* Clang/GCC cleanup macros */
-#define __prof static const struct ___tracy_source_location_data CAT(__tracy_source_location,__LINE__) = { NULL, __func__,  __FILE__, (uint32_t)__LINE__, 0 }; __attribute((cleanup(__prof_zone_cleanup_func))) TracyCZoneCtx __tracy_zone_ctx = ___tracy_emit_zone_begin( &CAT(__tracy_source_location,__LINE__), true )
-#define __profscope(name) static const struct ___tracy_source_location_data CAT(__tracy_source_location,__LINE__) = { #name, __func__,  __FILE__, (uint32_t)__LINE__, 0 }; __attribute((cleanup(__prof_zone_cleanup_func))) TracyCZoneCtx __tracy_zone_ctx = ___tracy_emit_zone_begin( &CAT(__tracy_source_location,__LINE__), true )
 INLINE void __prof_zone_cleanup_func(TracyCZoneCtx *ctx) { TracyCZoneEnd(*ctx) }
+#define __profnc(name, color) static const struct ___tracy_source_location_data CAT(__tracy_source_location,__LINE__) = { (name), __func__,  __FILE__, (uint32_t)__LINE__, BGR32(color) }; __attribute((cleanup(__prof_zone_cleanup_func))) TracyCZoneCtx __tracy_zone_ctx = ___tracy_emit_zone_begin( &CAT(__tracy_source_location,__LINE__), true )
+#define __profn(name) __profnc(name, 0)
+#define __prof __profnc(NULL, 0)
 
+#define __profvalue(v)                  TracyCZoneValue(__tracy_zone_ctx, (v))
 #define __profalloc(ptr, size)          TracyCAlloc((ptr), (size))
 #define __proffree(ptr)                 TracyCFree((ptr))
 #define __profmsg(txt, len, col)        TracyCMessageC((txt), (len), BGR32(col))
@@ -59,8 +60,11 @@ enum __prof_plot_type {
 #define PROFILING_LOCKS 0
 #define PROFILING_D3D 0
 
+#define __profnc(name, color)
+#define __profn(name)
 #define __prof
-#define __profscope(name)
+
+#define __profvalue(v)
 #define __profalloc(ptr, size)
 #define __proffree(ptr)
 #define __profmsg(txt, len, col)
@@ -105,25 +109,25 @@ enum __prof_plot_type {
 #if PROFILING_D3D
 /* Dx11 */
 INLINE void __prof_dx11_zone_cleanup_func(TracyCD3D11ZoneCtx *ctx) { ___tracy_d3d11_emit_zone_end(*ctx); }
-# define __profscope_dx11(dx11_ctx, name, color) static const struct ___tracy_source_location_data CAT(__tracy_gpu_d3d11_source_location,__LINE__) = { #name, __func__,  __FILE__, (uint32_t)__LINE__, BGR32(color) }; __attribute((cleanup(__prof_dx11_zone_cleanup_func))) TracyCD3D11ZoneCtx __tracy_d3d11_zone_ctx; ___tracy_d3d11_emit_zone_begin( dx11_ctx, &__tracy_d3d11_zone_ctx, &CAT(__tracy_gpu_d3d11_source_location,__LINE__), true)
+# define __profnc_dx11(dx11_ctx, name, color) static const struct ___tracy_source_location_data CAT(__tracy_gpu_d3d11_source_location,__LINE__) = { name, __func__,  __FILE__, (uint32_t)__LINE__, BGR32(color) }; __attribute((cleanup(__prof_dx11_zone_cleanup_func))) TracyCD3D11ZoneCtx __tracy_d3d11_zone_ctx; ___tracy_d3d11_emit_zone_begin( dx11_ctx, &__tracy_d3d11_zone_ctx, &CAT(__tracy_gpu_d3d11_source_location,__LINE__), true)
 # define __prof_dx11_ctx(name) struct TracyCD3D11Ctx *name
 # define __prof_dx11_ctx_alloc(ctx, device, device_ctx, name, name_len) ctx = ___tracy_d3d11_context_announce(device, device_ctx, name, name_len)
 # define __prof_dx11_ctx_release(ctx) ___tracy_d3d11_context_terminate(ctx)
 # define __prof_dx11_collect(ctx) ___tracy_d3d11_context_collect(ctx)
 /* Dx12 */
 INLINE void __prof_dx12_zone_cleanup_func(TracyCD3D12ZoneCtx *ctx) { ___tracy_d3d12_emit_zone_end(*ctx); }
-# define __profscope_dx12(dx12_ctx, cmd_list, name, color) static const struct ___tracy_source_location_data CAT(__tracy_gpu_d3d12_source_location,__LINE__) = { #name, __func__,  __FILE__, (uint32_t)__LINE__, BGR32(color) }; __attribute((cleanup(__prof_dx12_zone_cleanup_func))) TracyCD3D12ZoneCtx __tracy_d3d12_zone_ctx; ___tracy_d3d12_emit_zone_begin( dx12_ctx, cmd_list, &__tracy_d3d12_zone_ctx, &CAT(__tracy_gpu_d3d12_source_location,__LINE__), true)
+# define __profnc_dx12(dx12_ctx, cmd_list, name, color) static const struct ___tracy_source_location_data CAT(__tracy_gpu_d3d12_source_location,__LINE__) = { name, __func__,  __FILE__, (uint32_t)__LINE__, BGR32(color) }; __attribute((cleanup(__prof_dx12_zone_cleanup_func))) TracyCD3D12ZoneCtx __tracy_d3d12_zone_ctx; ___tracy_d3d12_emit_zone_begin( dx12_ctx, cmd_list, &__tracy_d3d12_zone_ctx, &CAT(__tracy_gpu_d3d12_source_location,__LINE__), true)
 # define __prof_dx12_ctx(name) struct TracyCD3D12Ctx *name
 # define __prof_dx12_ctx_alloc(ctx, device, queue, name, name_len) ctx = ___tracy_d3d12_context_announce(device, queue, name, name_len)
 # define __prof_dx12_ctx_release(ctx) ___tracy_d3d12_context_terminate(ctx)
 # define __prof_dx12_new_frame(ctx) ___tracy_d3d12_context_new_frame(ctx)
 # define __prof_dx12_collect(ctx) ___tracy_d3d12_context_collect(ctx)
 #else
-# define __profscope_dx11(dx11_ctx, name, color)
+# define __profnc_dx11(dx11_ctx, name, color)
 # define __prof_dx11_ctx_alloc(ctx, device, device_ctx, name, name_len)
 # define __prof_dx11_ctx_release(ctx)
 # define __prof_dx11_collect(ctx)
-# define __profscope_dx12(dx11_ctx, queue, name, color)
+# define __profnc_dx12(dx11_ctx, queue, name, color)
 # define __prof_dx12_ctx_alloc(ctx, device, queue, name, name_len)
 # define __prof_dx12_ctx_release(ctx)
 # define __prof_dx12_new_frame(ctx)
@@ -137,7 +141,7 @@ INLINE void __prof_dx12_zone_cleanup_func(TracyCD3D12ZoneCtx *ctx) { ___tracy_d3
 #endif  /* PROFILING_CAPTURE_FRAME_IMAGE */
 
 #ifdef TRACY_FIBERS
-/* Tracy fiber methods are wrapped in NO_INLINE because otherwise issues arise
+/* Tracy fiber methods are wrapped in NO_INLINE because otherwise issues can arise
  * accross fiber context boundaries during optimization */
 NO_INLINE INLINE void __prof_fiber_enter(char *fiber_name, i32 profiler_group) { TracyCFiberEnterWithHint(fiber_name, profiler_group); }
 NO_INLINE INLINE void __prof_fiber_leave(void) { TracyCFiberLeave; }
diff --git a/src/resource.c b/src/resource.c
index 6b58a6ce..d7f43a88 100644
--- a/src/resource.c
+++ b/src/resource.c
@@ -241,10 +241,10 @@ INTERNAL SYS_THREAD_DEF(resource_watch_dispatcher_thread_entry_point, _)
     while (!atomic_i32_fetch(&G.watch_shutdown)) {
         sys_condition_variable_wait(G.watch_dispatcher_cv, &watch_dispatcher_lock);
         if (!atomic_i32_fetch(&G.watch_shutdown) && G.watch_dispatcher_info_arena->pos > 0) {
-            __profscope(Dispatch resource watch callbacks);
+            __profn("Dispatch resource watch callbacks");
             /* Unlock and sleep a bit so duplicate events pile up */
             {
-                __profscope(Delay);
+                __profn("Delay");
                 sys_mutex_unlock(&watch_dispatcher_lock);
                 sys_sleep(WATCH_DISPATCHER_DELAY_SECONDS);
                 watch_dispatcher_lock = sys_mutex_lock_e(G.watch_dispatcher_mutex);
@@ -275,7 +275,7 @@ INTERNAL SYS_THREAD_DEF(resource_watch_dispatcher_thread_entry_point, _)
                 {
                     struct dict *dedup_dict = dict_init(temp.arena, WATCH_DISPATCHER_DEDUP_DICT_BINS);
                     for (struct sys_watch_info *info = watch_info_list.first; info; info = info->next) {
-                        __profscope(Dispatch);
+                        __profn("Dispatch");
                         /* Do not run callbacks for the same file more than once */
                         b32 skip = false;
                         u64 hash = hash_fnv64(HASH_FNV64_BASIS, info->name);
diff --git a/src/sim.c b/src/sim.c
index 4898169f..5ad8d717 100644
--- a/src/sim.c
+++ b/src/sim.c
@@ -649,7 +649,7 @@ struct sim_snapshot *sim_snapshot_alloc_from_lerp(struct sim_client *client, str
 
         /* Blend entities */
         {
-            __profscope(Lerp snapshot entities);
+            __profn("Lerp snapshot entities");
             u64 num_entities = min_u64(ss0->num_ents_reserved, ss1->num_ents_reserved);
             for (u64 i = 0; i < num_entities; ++i) {
                 struct sim_ent *e = &ss->ents[i];
diff --git a/src/sprite.c b/src/sprite.c
index 25589a6e..f01817dd 100644
--- a/src/sprite.c
+++ b/src/sprite.c
@@ -427,7 +427,7 @@ INTERNAL struct sprite_sheet init_sheet_from_ase_result(struct arena *arena, str
 
     /* Init frames */
     {
-        __profscope(Init frames);
+        __profn("Init frames");
         sheet.image_size = ase.image_size;
         sheet.frame_size = ase.frame_size;
         sheet.frames = arena_push_array(arena, struct sprite_sheet_frame, ase.num_frames);
@@ -449,7 +449,7 @@ INTERNAL struct sprite_sheet init_sheet_from_ase_result(struct arena *arena, str
     /* Init spans */
     sheet.spans_count = ase.num_spans;
     if (ase.num_spans > 0) {
-        __profscope(Init spans);
+        __profn("Init spans");
         sheet.spans = arena_push_array(arena, struct sprite_sheet_span, sheet.spans_count);
         sheet.spans_dict = dict_init(arena, (u64)(ase.num_spans * SHEET_SPAN_LOOKUP_TABLE_BIN_RATIO));
         u64 index = 0;
@@ -467,7 +467,7 @@ INTERNAL struct sprite_sheet init_sheet_from_ase_result(struct arena *arena, str
 
     /* Init slices */
     if (ase.num_slice_keys > 0) {
-        __profscope(Init slices);
+        __profn("Init slices");
         struct arena_temp scratch = scratch_begin(arena);
 
         struct temp_ase_slice_key_node {
@@ -1241,7 +1241,7 @@ INTERNAL SYS_JOB_DEF(sprite_evictor_job, _)
             /* Scan for evictable nodes */
             b32 cache_over_budget_threshold = atomic_u64_fetch(&G.cache.memory_usage) > CACHE_MEMORY_BUDGET_THRESHOLD;
             if (cache_over_budget_threshold || RESOURCE_RELOADING) {
-                __profscope(Evictor scan);
+                __profn("Evictor scan");
                 for (u64 i = 0; i < CACHE_BINS_COUNT; ++i) {
                     struct cache_bin *bin = &G.cache.bins[i];
                     struct sys_lock bin_lock = sys_mutex_lock_s(bin->mutex);
@@ -1282,14 +1282,14 @@ INTERNAL SYS_JOB_DEF(sprite_evictor_job, _)
 
             /* Sort evict nodes */
             {
-                __profscope(Evictor sort);
+                __profn("Evictor sort");
                 merge_sort(evict_array, evict_array_count, sizeof(*evict_array), evict_sort, NULL);
             }
 
             /* Remove evictable nodes from cache until under budget */
             struct evict_node *first_evicted = NULL;
             {
-                __profscope(Evictor cache removal);
+                __profn("Evictor cache removal");
                 b32 stop_evicting = false;
                 for (u64 i = 0; i < evict_array_count && !stop_evicting; ++i) {
                     struct evict_node *en = &evict_array[i];
@@ -1335,7 +1335,7 @@ INTERNAL SYS_JOB_DEF(sprite_evictor_job, _)
             if (first_evicted) {
                 /* Release evicted node memory */
                 {
-                    __profscope(Evictor memory release);
+                    __profn("Evictor memory release");
                     for (struct evict_node *en = first_evicted; en; en = en->next_evicted) {
                         struct cache_entry *n = en->cache_entry;
                         if (n->kind == CACHE_ENTRY_KIND_TEXTURE && n->texture->valid) {
@@ -1347,7 +1347,7 @@ INTERNAL SYS_JOB_DEF(sprite_evictor_job, _)
 
                 /* Add evicted nodes to free list */
                 {
-                    __profscope(Evictor free list append);
+                    __profn("Evictor free list append");
                     struct sys_lock pool_lock = sys_mutex_lock_e(G.cache.entry_pool_mutex);
                     for (struct evict_node *en = first_evicted; en; en = en->next_evicted) {
                         struct cache_entry *n = en->cache_entry;
diff --git a/src/sys_win32.c b/src/sys_win32.c
index 710353ad..6b0f80d5 100644
--- a/src/sys_win32.c
+++ b/src/sys_win32.c
@@ -403,7 +403,7 @@ INTERNAL void job_fiber_yield(struct fiber *fiber, struct fiber *parent_fiber);
 
 void sys_wait(void *addr, void *cmp, u32 size)
 {
-    //__prof;
+    __prof;
 #if 0
     WaitOnAddress(addr, cmp, size, INFINITE);
 #else
@@ -756,7 +756,7 @@ INTERNAL void job_fiber_entry(void *id_ptr)
     while (true) {
         /* Run job */
         {
-            //__profscope(Run job);
+            __profn("Run job");
             volatile struct yield_param *yield_param = fiber->yield_param;
             yield_param->kind = YIELD_KIND_NONE;
             struct sys_job_data data = ZI;
@@ -783,10 +783,23 @@ INTERNAL void job_fiber_entry(void *id_ptr)
 
 INTERNAL SYS_THREAD_DEF(worker_entry, worker_ctx_arg)
 {
-    __prof;
     struct worker_ctx *ctx = worker_ctx_arg;
     (UNUSED)ctx;
 
+    {
+        HANDLE thread_handle = GetCurrentThread();
+        b32 success = false;
+        (UNUSED)success;
+
+        i32 priority = THREAD_PRIORITY_TIME_CRITICAL;
+        success = SetThreadPriority(thread_handle, priority);
+        ASSERT(success);
+
+        u64 affinity_mask = 1 << (ctx->id * 2);
+        success = !!SetThreadAffinityMask(thread_handle, affinity_mask);
+        ASSERT(success);
+    }
+
     i32 worker_fiber_id = sys_current_fiber_id();
 
     struct job_queue *queues[countof(G.job_queues)] = ZI;
@@ -806,7 +819,7 @@ INTERNAL SYS_THREAD_DEF(worker_entry, worker_ctx_arg)
         void *job_sig = 0;
         struct counter *job_counter = 0;
         {
-            //__profscope(Pull job);
+            //__profnc("Pull job", RGB32_F(0.75, 0.75, 0));
             for (u32 queue_index = 0; queue_index < countof(queues) && !job_func; ++queue_index) {
                 struct job_queue *queue = queues[queue_index];
                 if (queue) {
@@ -868,120 +881,123 @@ INTERNAL SYS_THREAD_DEF(worker_entry, worker_ctx_arg)
 
         /* Run fiber */
         if (job_func) {
-            //__profscope(Run fiber);
             if (!job_fiber) {
                 job_fiber = fiber_alloc(FIBER_KIND_JOB_WORKER);
             }
-            struct yield_param yield = ZI;
-            job_fiber->job_func = job_func;
-            job_fiber->job_sig = job_sig;
-            job_fiber->job_id = job_id;
-            job_fiber->job_priority = job_priority;
-            job_fiber->parent_id = worker_fiber_id;
-            job_fiber->yield_param = &yield;
-            b32 done = false;
-            while (!done) {
-                job_fiber_resume(job_fiber);
-                switch (yield.kind) {
-                    default:
-                    {
-                        /* Invalid yield kind */
-                        struct arena_temp scratch = scratch_begin_no_conflict();
-                        sys_panic(string_format(scratch.arena, LIT("Invalid fiber yield kind \"%F\""), FMT_SINT(yield.kind)));
-                        scratch_end(scratch);
-                    } break;
-
-                    case YIELD_KIND_WAIT:
-                    {
-#if 1
-                        void *wait_addr = yield.wait.addr;
-                        void *wait_cmp = yield.wait.cmp;
-                        u32 wait_size = yield.wait.size;
-
-                        u64 wait_bin_index = (u64)wait_addr % NUM_WAIT_BINS;
-                        struct wait_bin *bin = &G.wait_bins[wait_bin_index];
-
-                        while (atomic_i32_fetch_test_set(&bin->lock, 0, 1) != 0) ix_pause();
+            {
+                __profnc("Run fiber", RGB32_F(0.25, 0.75, 0));
+                __profvalue(job_fiber->id);
+                struct yield_param yield = ZI;
+                job_fiber->job_func = job_func;
+                job_fiber->job_sig = job_sig;
+                job_fiber->job_id = job_id;
+                job_fiber->job_priority = job_priority;
+                job_fiber->parent_id = worker_fiber_id;
+                job_fiber->yield_param = &yield;
+                b32 done = false;
+                while (!done) {
+                    job_fiber_resume(job_fiber);
+                    switch (yield.kind) {
+                        default:
                         {
-                            if (MEMEQ(wait_addr, wait_cmp, wait_size)) {
-                                /* Search addr wait list in bin */
-                                struct wait_list *wait_list = NULL;
-                                for (struct wait_list *tmp = bin->first_wait_list; tmp && !wait_list; tmp = tmp->next_in_bin) {
-                                    if (tmp->addr == wait_addr) {
-                                        wait_list = tmp;
-                                    }
-                                }
+                            /* Invalid yield kind */
+                            struct arena_temp scratch = scratch_begin_no_conflict();
+                            sys_panic(string_format(scratch.arena, LIT("Invalid fiber yield kind \"%F\""), FMT_SINT(yield.kind)));
+                            scratch_end(scratch);
+                        } break;
 
-                                /* Allocate new wait list */
-                                if (!wait_list) {
-                                    if (bin->first_free_wait_list) {
-                                        wait_list = bin->first_free_wait_list;
-                                        bin->first_free_wait_list = wait_list->next_in_bin;
-                                    } else {
-                                        while (atomic_i32_fetch_test_set(&G.wait_lists_arena_lock, 0, 1) != 0) ix_pause();
-                                        {
-                                            wait_list = arena_push_no_zero(G.wait_lists_arena, struct wait_list);
+                        case YIELD_KIND_WAIT:
+                        {
+    #if 1
+                            void *wait_addr = yield.wait.addr;
+                            void *wait_cmp = yield.wait.cmp;
+                            u32 wait_size = yield.wait.size;
+
+                            u64 wait_bin_index = (u64)wait_addr % NUM_WAIT_BINS;
+                            struct wait_bin *bin = &G.wait_bins[wait_bin_index];
+
+                            while (atomic_i32_fetch_test_set(&bin->lock, 0, 1) != 0) ix_pause();
+                            {
+                                if (MEMEQ(wait_addr, wait_cmp, wait_size)) {
+                                    /* Search addr wait list in bin */
+                                    struct wait_list *wait_list = NULL;
+                                    for (struct wait_list *tmp = bin->first_wait_list; tmp && !wait_list; tmp = tmp->next_in_bin) {
+                                        if (tmp->addr == wait_addr) {
+                                            wait_list = tmp;
                                         }
-                                        atomic_i32_fetch_set(&G.wait_lists_arena_lock, 0);
                                     }
-                                    MEMZERO_STRUCT(wait_list);
-                                    wait_list->addr = wait_addr;
-                                    if (bin->last_wait_list) {
-                                        bin->last_wait_list->next_in_bin = wait_list;
-                                        wait_list->prev_in_bin = bin->last_wait_list;
+
+                                    /* Allocate new wait list */
+                                    if (!wait_list) {
+                                        if (bin->first_free_wait_list) {
+                                            wait_list = bin->first_free_wait_list;
+                                            bin->first_free_wait_list = wait_list->next_in_bin;
+                                        } else {
+                                            while (atomic_i32_fetch_test_set(&G.wait_lists_arena_lock, 0, 1) != 0) ix_pause();
+                                            {
+                                                wait_list = arena_push_no_zero(G.wait_lists_arena, struct wait_list);
+                                            }
+                                            atomic_i32_fetch_set(&G.wait_lists_arena_lock, 0);
+                                        }
+                                        MEMZERO_STRUCT(wait_list);
+                                        wait_list->addr = wait_addr;
+                                        if (bin->last_wait_list) {
+                                            bin->last_wait_list->next_in_bin = wait_list;
+                                            wait_list->prev_in_bin = bin->last_wait_list;
+                                        } else {
+                                            bin->first_wait_list = wait_list;
+                                        }
+                                        bin->last_wait_list = wait_list;
+                                    }
+
+                                    /* Allocate new yielder */
+                                    struct yielder *yielder = NULL;
+                                    if (wait_list->first_free_yielder) {
+                                        yielder = wait_list->first_free_yielder;
+                                        wait_list->first_free_yielder = yielder->next;
                                     } else {
-                                        bin->first_wait_list = wait_list;
+                                        while (atomic_i32_fetch_test_set(&G.yielders_arena_lock, 0, 1) != 0) ix_pause();
+                                        {
+                                            yielder = arena_push_no_zero(G.yielders_arena, struct yielder);
+                                        }
+                                        atomic_i32_fetch_set(&G.yielders_arena_lock, 0);
                                     }
-                                    bin->last_wait_list = wait_list;
-                                }
-
-                                /* Allocate new yielder */
-                                struct yielder *yielder = NULL;
-                                if (wait_list->first_free_yielder) {
-                                    yielder = wait_list->first_free_yielder;
-                                    wait_list->first_free_yielder = yielder->next;
-                                } else {
-                                    while (atomic_i32_fetch_test_set(&G.yielders_arena_lock, 0, 1) != 0) ix_pause();
-                                    {
-                                        yielder = arena_push_no_zero(G.yielders_arena, struct yielder);
+                                    MEMZERO_STRUCT(yielder);
+                                    yielder->fiber_id = job_fiber->id;
+                                    yielder->job_queue_kind = job_queue_kind;
+                                    yielder->job_func = job_func;
+                                    yielder->job_sig = job_sig;
+                                    yielder->job_counter = job_counter;
+                                    yielder->job_id = job_id;
+                                    if (wait_list->last_yielder) {
+                                        wait_list->last_yielder->next = yielder;
+                                        yielder->prev = wait_list->last_yielder;
+                                    } else {
+                                        wait_list->first_yielder = yielder;
                                     }
-                                    atomic_i32_fetch_set(&G.yielders_arena_lock, 0);
-                                }
-                                MEMZERO_STRUCT(yielder);
-                                yielder->fiber_id = job_fiber->id;
-                                yielder->job_queue_kind = job_queue_kind;
-                                yielder->job_func = job_func;
-                                yielder->job_sig = job_sig;
-                                yielder->job_counter = job_counter;
-                                yielder->job_id = job_id;
-                                if (wait_list->last_yielder) {
-                                    wait_list->last_yielder->next = yielder;
-                                    yielder->prev = wait_list->last_yielder;
-                                } else {
-                                    wait_list->first_yielder = yielder;
-                                }
-                                wait_list->last_yielder = yielder;
-                                ++wait_list->num_yielders;
+                                    wait_list->last_yielder = yielder;
+                                    ++wait_list->num_yielders;
 
-                                /* Pop worker's job fiber */
-                                job_fiber = NULL;
-                                done = true;
+                                    /* Pop worker's job fiber */
+                                    job_fiber = NULL;
+                                    done = true;
+                                }
                             }
-                        }
-                        atomic_i32_fetch_set(&bin->lock, 0);
-#else
-                        (UNUSED)job_queue_kind;
-                        //ASSERT(false);
-#endif
-                    } break;
+                            atomic_i32_fetch_set(&bin->lock, 0);
+    #else
+                            (UNUSED)job_queue_kind;
+                            //ASSERT(false);
+    #endif
+                        } break;
 
-                    case YIELD_KIND_DONE:
-                    {
-                        if (job_counter) {
-                            counter_add(job_counter, -1);
-                        }
-                        done = true;
-                    } break;
+                        case YIELD_KIND_DONE:
+                        {
+                            if (job_counter) {
+                                counter_add(job_counter, -1);
+                            }
+                            done = true;
+                        } break;
+                    }
                 }
             }
         }
@@ -1042,7 +1058,7 @@ struct sys_scratch_ctx *sys_scratch_ctx_from_fiber_id(i32 id)
     struct fiber_ctx *fiber_ctx = fiber_ctx_from_id(id);
     struct sys_scratch_ctx *scratch_ctx = &fiber_ctx->scratch_ctx;
     if (!scratch_ctx->arenas[0]) {
-        //__profscope(Initialize scratch context);
+        __profn("Initialize scratch context");
         for (u32 i = 0; i < countof(scratch_ctx->arenas); ++i) {
             scratch_ctx->arenas[i] = arena_alloc(GIGABYTE(64));
         }
@@ -1323,7 +1339,7 @@ struct sys_file sys_file_open_read_wait(struct string path)
     HANDLE handle;
     while ((handle = CreateFileW(path_wstr, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL)) == INVALID_HANDLE_VALUE) {
         if (GetLastError() == ERROR_SHARING_VIOLATION) {
-            __profscope(File share conflict delay);
+            __profn("File share conflict delay");
             Sleep(delay_ms);
             if (delay_ms < 1024) {
                 delay_ms *= 2;
@@ -1869,7 +1885,7 @@ INTERNAL SYS_THREAD_DEF(window_thread_entry_point, arg)
             GetMessageW(&msg, 0, 0, 0);
         }
         {
-            __profscope(Process window message);
+            __profn("Process window message");
             if (atomic_i32_fetch(&window->event_thread_shutdown)) {
                 break;
             }
@@ -3035,7 +3051,7 @@ INTERNAL void win32_precise_sleep_timer(HANDLE timer, f64 seconds)
 
     i64 max_ticks = (i64)scheduler_period_ms * 9500;
     while (true) {
-        __profscope(Sleep part);
+        __profn("Sleep part");
         /* Break sleep up into parts that are lower than scheduler period */
         f64 remaining_seconds = (f64)(target_qpc - qpc.QuadPart) / (f64)qpc_per_second;
         i64 sleep_ticks = (i64)((remaining_seconds - tolerance) * 10000000);
@@ -3051,7 +3067,7 @@ INTERNAL void win32_precise_sleep_timer(HANDLE timer, f64 seconds)
 
     /* Spin for any remaining time */
     {
-        __profscope(Sleep spin);
+        __profn("Sleep spin");
         while (qpc.QuadPart < target_qpc) {
             YieldProcessor();
             QueryPerformanceCounter(&qpc);
@@ -3079,14 +3095,14 @@ INTERNAL void win32_precise_sleep_legacy(f64 seconds)
     f64 sleep_ms = (seconds * 1000) - tolerance;
     i32 sleep_slices = (i32)(sleep_ms / scheduler_period_ms);
     if (sleep_slices > 0) {
-        __profscope(Legacy sleep part);
+        __profn("Legacy sleep part");
         Sleep((DWORD)sleep_slices * scheduler_period_ms);
     }
     QueryPerformanceCounter(&qpc);
 
     /* Spin for any remaining time */
     {
-        __profscope(Legacy sleep spin);
+        __profn("Legacy sleep spin");
         while (qpc.QuadPart < target_qpc) {
             YieldProcessor();
             QueryPerformanceCounter(&qpc);
@@ -3162,7 +3178,7 @@ int CALLBACK wWinMain(_In_ HINSTANCE instance, _In_opt_ HINSTANCE prev_instance,
 
 #if PROFILING
     {
-        __profscope(Launch profiler);
+        __profn("Launch profiler");
         STARTUPINFO si = ZI;
         si.cb = sizeof(si);
         PROCESS_INFORMATION pi = ZI;
diff --git a/src/ttf_dwrite.cpp b/src/ttf_dwrite.cpp
index 4e61128c..11933a7b 100644
--- a/src/ttf_dwrite.cpp
+++ b/src/ttf_dwrite.cpp
@@ -174,7 +174,7 @@ struct ttf_decode_result ttf_decode(struct arena *arena, struct string encoded,
     u32 out_offset_y = 0;
     u32 row_height = 0;
     {
-        __profscope(Build atlas);
+        __profn("Build atlas");
         for (u16 i = 0; i < glyph_count; ++i) {
             /* Render glyph to target */
             DWRITE_GLYPH_RUN glyph_run = ZI;
diff --git a/src/user.c b/src/user.c
index 831df4e0..e164f188 100644
--- a/src/user.c
+++ b/src/user.c
@@ -1147,7 +1147,7 @@ INTERNAL void user_update(void)
     {
         /* Copy valid entities */
         {
-            __profscope(Build ents list for sorting);
+            __profn("Build ents list for sorting");
             for (u64 ent_index = 0; ent_index < G.ss_blended->num_ents_reserved; ++ent_index) {
                 struct sim_ent *ent = &G.ss_blended->ents[ent_index];
                 if (sim_ent_is_valid_and_active(ent)) {
@@ -1158,7 +1158,7 @@ INTERNAL void user_update(void)
         }
         /* Sort */
         {
-            __profscope(Sort ents);
+            __profn("Sort ents");
             merge_sort(sorted, sorted_count, sizeof(*sorted), ent_draw_order_cmp, NULL);
         }
     }
@@ -1168,7 +1168,7 @@ INTERNAL void user_update(void)
      * ========================== */
 
     {
-        __profscope(Draw entities);
+        __profn("Draw entities");
         for (u64 sorted_index = 0; sorted_index < sorted_count; ++sorted_index) {
             struct sim_ent *ent = sorted[sorted_index];
             if (!sim_ent_is_valid_and_active(ent)) continue;
@@ -1694,7 +1694,7 @@ INTERNAL void user_update(void)
 
     /* Draw crosshair or show cursor */
     if (!G.debug_camera) {
-        __profscope(Draw crosshair);
+        __profn("Draw crosshair");
         struct v2 crosshair_pos = G.user_cursor;
         struct sprite_tag crosshair = sprite_tag_from_path(LIT("sprite/crosshair.ase"));
         struct sprite_texture *t = sprite_texture_from_tag_async(sprite_frame_scope, crosshair);
@@ -1706,7 +1706,7 @@ INTERNAL void user_update(void)
     /* FIXME: Enable this */
 #if 0
     {
-        __profscope(Update window cursor);
+        __profn("Update window cursor");
         if (G.debug_camera) {
             sys_window_cursor_disable_clip(G.window);
             sys_window_cursor_show(G.window);
@@ -1924,7 +1924,7 @@ INTERNAL void user_update(void)
      * ========================== */
 
     if (G.debug_draw) {
-        __profscope(Draw debug info);
+        __profn("Draw debug info");
         struct font *font = font_load_async(LIT("font/fixedsys.ttf"), 12.0f);
         if (font) {
             struct arena_temp temp = arena_temp_begin(scratch.arena);
@@ -2047,7 +2047,7 @@ INTERNAL void user_update(void)
      * ========================== */
 
     {
-        __profscope(Render);
+        __profn("Render");
 
         struct rect user_viewport = RECT_FROM_V2(V2(0, 0), G.user_size);
         struct v2i32 user_resolution = v2_round_to_int(user_viewport.size);
@@ -2107,7 +2107,7 @@ INTERNAL SYS_JOB_DEF(user_job, _)
 
     while (!atomic_i32_fetch(&G.shutdown)) {
         {
-            __profscope(User sleep);
+            __profn("User sleep");
             sleep_frame(last_frame_ns, target_dt_ns);
         }
         last_frame_ns = sys_time_ns();
@@ -2268,11 +2268,11 @@ INTERNAL SYS_JOB_DEF(local_sim_job, _)
     while (!atomic_i32_fetch(&G.shutdown)) {
         struct arena_temp scratch = scratch_begin_no_conflict();
         {
-            __profscope(Sim sleep);
+            __profn("Sim sleep");
             sleep_frame(real_time_ns, step_dt_ns * compute_timescale);
         }
         {
-            __profscope(Sim update);
+            __profn("Sim update");
 
             real_dt_ns = sys_time_ns() - real_time_ns;
             real_time_ns += real_dt_ns;