refactor prof layer

This commit is contained in:
jacob 2025-07-30 20:44:02 -05:00
parent 73f45cd765
commit 6b4aec63f3
9 changed files with 56 additions and 47 deletions

View File

@ -697,8 +697,8 @@ void OnBuild(StringList cli_args)
Error(StringF(&perm, Lit("Profiling is enabled but tracy directory \"%F\" does not exist (set by environment variable \"%F\")"), FmtStr(tracy_src_dir_path), FmtStr(tracy_env_var_name)));
OS_Exit(1);
}
StringListAppend(&perm, &compile_args, StringF(&perm, Lit("-DTRACY_CLIENT_HEADER_PATH=\\\"%F\\\""), FmtStr(tracy_client_header_path)));
StringListAppend(&perm, &compile_args, StringF(&perm, Lit("-DTRACY_CLIENT_SRC_PATH=\\\"%F\\\""), FmtStr(tracy_client_src_path)));
StringListAppend(&perm, &compile_args, StringF(&perm, Lit("-DTracyClientHeaderPath=\\\"%F\\\""), FmtStr(tracy_client_header_path)));
StringListAppend(&perm, &compile_args, StringF(&perm, Lit("-DTracyClientSrcPath=\\\"%F\\\""), FmtStr(tracy_client_src_path)));
}
//- Incbin

View File

@ -19,8 +19,6 @@
#include "intrin.h"
#include "nmmintrin.h" /* SSE4.2 */
#include "../config.h"
#include "../prof/prof.h"
#include "base_core.h"

View File

@ -393,6 +393,11 @@ Global const f64 *_f64_nan = (f64 *)&_f64_nan_u64;
#define IsF32Nan(x) (x != x)
#define IsF64Nan(x) (x != x)
////////////////////////////////
//~ Config
#include "../config.h"
#ifdef __cplusplus
}
#endif

View File

@ -17,7 +17,7 @@
#pragma comment(lib, "dxguid")
#pragma comment(lib, "d3dcompiler")
#if ProfilingIsEnabled_GPU
#if ProfilingGpu
/* For RegOpenKeyEx */
# include <winreg.h>
# pragma comment(lib, "advapi32")
@ -136,7 +136,7 @@ struct command_queue {
struct command_list_pool *cl_pool;
#if ProfilingIsEnabled_GPU
#if ProfilingGpu
__prof_dx12_ctx(prof);
#endif
};
@ -591,7 +591,7 @@ internal void dx12_init_device(void)
}
#endif
#if ProfilingIsEnabled_GPU && ProfilingIsEnabled_GPU_STABLE_POWER_STATE
#if ProfilingGpu && ProfilingGpuStablePowerState
/* Enable stable power state */
{
__profn("Set stable power state");
@ -3464,7 +3464,7 @@ void gp_present(G_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, G_Reso
}
}
#if ProfilingIsEnabled_GPU
#if ProfilingGpu
{
__profframe(0);

View File

@ -3391,21 +3391,21 @@ int CALLBACK wWinMain(_In_ HINSTANCE instance, _In_opt_ HINSTANCE prev_instance,
STARTUPINFO si = ZI;
si.cb = sizeof(si);
PROCESS_INFORMATION pi = ZI;
wchar_t cmd[sizeof(ProfilingIsEnabled_CMD_WSTR)] = ZI;
CopyBytes(cmd, ProfilingIsEnabled_CMD_WSTR, sizeof(ProfilingIsEnabled_CMD_WSTR));
DeleteFileW(ProfilingIsEnabled_FILE_WSTR);
wchar_t cmd[sizeof(ProfilingCmdWstr)] = ZI;
CopyBytes(cmd, ProfilingCmdWstr, sizeof(ProfilingCmdWstr));
DeleteFileW(ProfilingOutFileWstr);
b32 success = CreateProcessW(0, cmd, 0, 0, 0, DETACHED_PROCESS, 0, 0, &si, &pi);
if (!success)
{
MessageBoxExW(0, L"Failed to launch profiler using command '" ProfilingIsEnabled_CMD_WSTR L"'.", L"Error", MB_ICONSTOP | MB_SETFOREGROUND | MB_TOPMOST, 0);
MessageBoxExW(0, L"Failed to launch profiler using command '" ProfilingCmdWstr L"'.", L"Error", MB_ICONSTOP | MB_SETFOREGROUND | MB_TOPMOST, 0);
}
}
/* Set internal profiler thread affinities */
{
__profn("Set profiler thread affinities");
wchar_t *prefix_name_wstr = PROFILER_THREAD_PREFIX_WSTR;
u64 prefix_name_wstr_len = ((i32)sizeof(PROFILER_THREAD_PREFIX_WSTR) >> 1) - 1;
if (prefix_name_wstr_len > 0 && PROFILER_THREAD_AFFINITY_MASK != 0)
wchar_t *prefix_name_wstr = ProfilerThreadPrefixWstr;
u64 prefix_name_wstr_len = ((i32)sizeof(ProfilerThreadPrefixWstr) >> 1) - 1;
if (prefix_name_wstr_len > 0 && ProfilerThreadAffinityMask != 0)
{
DWORD proc_id = GetCurrentProcessId();
HANDLE snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, 0);
@ -3431,7 +3431,7 @@ int CALLBACK wWinMain(_In_ HINSTANCE instance, _In_opt_ HINSTANCE prev_instance,
if (thread_name_len >= prefix_name_wstr_len && EqBytes(thread_name_wstr, prefix_name_wstr, prefix_name_wstr_len))
{
__profn("Set profiler thread affinity");
b32 success = SetThreadAffinityMask(thread, PROFILER_THREAD_AFFINITY_MASK) != 0;
b32 success = SetThreadAffinityMask(thread, ProfilerThreadAffinityMask) != 0;
{
/* Retry until external tools can set correct process affinity */
i32 delay_ms = 16;
@ -3439,7 +3439,7 @@ int CALLBACK wWinMain(_In_ HINSTANCE instance, _In_opt_ HINSTANCE prev_instance,
{
__profn("Profiler thread affinity retry");
Sleep(delay_ms);
success = SetThreadAffinityMask(thread, PROFILER_THREAD_AFFINITY_MASK) != 0;
success = SetThreadAffinityMask(thread, ProfilerThreadAffinityMask) != 0;
delay_ms *= 2;
}
}

View File

@ -1,3 +1,3 @@
#include "prof.h"
#include "prof_core_tracy.cpp"
#include "prof_tracy.cpp"

View File

@ -1,6 +1,6 @@
#ifndef PROF_H
#define PROF_H
#include "prof_core_tracy.h"
#include "prof_tracy.h"
#endif

View File

@ -3,7 +3,7 @@
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
#define TRACY_FIBERS
# include TRACY_CLIENT_SRC_PATH
# include TracyClientSrcPath
#pragma clang diagnostic pop
#endif

View File

@ -1,25 +1,28 @@
#if defined(ProfilingIsEnabled) && ProfilingIsEnabled == 1
////////////////////////////////
//~ Profiling enabled
#ifndef __clang__
# error Only clang is supported when compiling with ProfilingIsEnabled=1 (cleanup attributes are required for profiling markup)
#endif
#define ProfilingIsEnabled_SYSTEM_TRACE 0
#define ProfilingIsEnabled_CAPTURE_FRAME_IMAGE 0
#define ProfilingIsEnabled_LOCKS 0
#define ProfilingIsEnabled_GPU 1
#define ProfilingIsEnabled_GPU_STABLE_POWER_STATE 1
//#define PROFILER_THREAD_AFFINITY_MASK 0x000000000000F000ull
#define PROFILER_THREAD_AFFINITY_MASK 0
#define PROFILER_THREAD_PREFIX_WSTR L"Tracy"
#define ProfilingIsEnabled_FILE_WSTR L".tracy"
#define ProfilingIsEnabled_CMD_WSTR L"cmd /C start \"\" /wait tracy-capture.exe -o .tracy -a 127.0.0.1 && start \"\" tracy-profiler.exe .tracy"
//#define ProfilingIsEnabled_CMD_WSTR L"tracy-profiler.exe -a 127.0.0.1"
#define ProfilingSystemTrace 0
#define ProfilingCaptureFrame 0
#define ProfilingLocks 0
#define ProfilingGpu 1
#define ProfilingGpuStablePowerState 1
//#define ProfilerThreadAffinityMask 0x000000000000F000ull
#define ProfilerThreadAffinityMask 0
#define ProfilerThreadPrefixWstr L"Tracy"
#define ProfilingOutFileWstr L".tracy"
#define ProfilingCmdWstr L"cmd /C start \"\" /wait tracy-capture.exe -o .tracy -a 127.0.0.1 && start \"\" tracy-profiler.exe .tracy"
//#define ProfilingCmdWstr L"tracy-profiler.exe -a 127.0.0.1"
/* Tracy defines */
#define TRACY_ENABLE
#define TRACY_FIBERS
#if !ProfilingIsEnabled_SYSTEM_TRACE
#if !ProfilingSystemTrace
# define TRACY_NO_CALLSTACK
# define TRACY_NO_SYSTEM_TRACING
#endif
@ -30,9 +33,9 @@
#pragma clang diagnostic ignored "-Wextra-semi-stmt"
#pragma clang diagnostic ignored "-Wpointer-sign"
#pragma clang diagnostic ignored "-Wincompatible-pointer-types-discards-qualifiers"
#include TRACY_CLIENT_HEADER_PATH
#include TracyClientHeaderPath
Inline void __prof_zone_cleanup_func(TracyCZoneCtx *ctx) { TracyCZoneEnd(*ctx) }
inline void __prof_zone_cleanup_func(TracyCZoneCtx *ctx) { TracyCZoneEnd(*ctx) }
#define __profnc(name, color) static const struct ___tracy_source_location_data Cat(__tracy_source_location,__LINE__) = { (name), __func__, __FILE__, (uint32_t)__LINE__, Bgr32(color) }; __attribute((cleanup(__prof_zone_cleanup_func))) TracyCZoneCtx __tracy_zone_ctx = ___tracy_emit_zone_begin( &Cat(__tracy_source_location,__LINE__), 1 )
#define __profn(name) __profnc(name, 0)
#define __prof __profnc(0, 0)
@ -57,9 +60,12 @@ enum __prof_plot_type {
#else
#define ProfilingIsEnabled_CAPTURE_FRAME_IMAGE 0
#define ProfilingIsEnabled_LOCKS 0
#define ProfilingIsEnabled_GPU 0
////////////////////////////////
//~ Profiling disabled
#define ProfilingCaptureFrame 0
#define ProfilingLocks 0
#define ProfilingGpu 0
#define __profnc(name, color)
#define __profn(name)
@ -78,7 +84,7 @@ enum __prof_plot_type {
#endif /* ProfilingIsEnabled */
#if ProfilingIsEnabled_LOCKS
#if ProfilingLocks
# define __proflock_ctx(name) struct TracyCSharedLockCtx *name
# define __proflock_alloc(ctx) TracyCSharedLockAnnounce((ctx))
# define __proflock_release(ctx) TracyCSharedLockTerminate((ctx))
@ -105,18 +111,18 @@ enum __prof_plot_type {
# define __proflock_after_try_shared_lock(ctx, acquired)
# define __proflock_mark(ctx)
# define __proflock_custom_name(ctx, name, len)
#endif /* ProfilingIsEnabled && ProfilingIsEnabled_LOCKS */
#endif /* ProfilingIsEnabled && ProfilingLocks */
#if ProfilingIsEnabled_GPU
#if ProfilingGpu
/* Dx11 */
Inline void __prof_dx11_zone_cleanup_func(TracyCD3D11ZoneCtx *ctx) { ___tracy_d3d11_emit_zone_end(*ctx); }
inline void __prof_dx11_zone_cleanup_func(TracyCD3D11ZoneCtx *ctx) { ___tracy_d3d11_emit_zone_end(*ctx); }
# define __profnc_dx11(dx11_ctx, name, color) static const struct ___tracy_source_location_data Cat(__tracy_gpu_d3d11_source_location,__LINE__) = { name, __func__, __FILE__, (uint32_t)__LINE__, Bgr32(color) }; __attribute((cleanup(__prof_dx11_zone_cleanup_func))) TracyCD3D11ZoneCtx __tracy_d3d11_zone_ctx; ___tracy_d3d11_emit_zone_begin( dx11_ctx, &__tracy_d3d11_zone_ctx, &Cat(__tracy_gpu_d3d11_source_location,__LINE__), 1)
# define __prof_dx11_ctx(name) struct TracyCD3D11Ctx *name
# define __prof_dx11_ctx_alloc(ctx, device, device_ctx, name, name_len) ctx = ___tracy_d3d11_context_announce(device, device_ctx, name, name_len)
# define __prof_dx11_ctx_release(ctx) ___tracy_d3d11_context_terminate(ctx)
# define __prof_dx11_collect(ctx) ___tracy_d3d11_context_collect(ctx)
/* Dx12 */
Inline void __prof_dx12_zone_cleanup_func(TracyCD3D12ZoneCtx *ctx) { ___tracy_d3d12_emit_zone_end(*ctx); }
inline void __prof_dx12_zone_cleanup_func(TracyCD3D12ZoneCtx *ctx) { ___tracy_d3d12_emit_zone_end(*ctx); }
# define __profnc_dx12(dx12_ctx, cmd_list, name, color) static const struct ___tracy_source_location_data Cat(__tracy_gpu_d3d12_source_location,__LINE__) = { name, __func__, __FILE__, (uint32_t)__LINE__, Bgr32(color) }; __attribute((cleanup(__prof_dx12_zone_cleanup_func))) TracyCD3D12ZoneCtx __tracy_d3d12_zone_ctx; ___tracy_d3d12_emit_zone_begin( dx12_ctx, cmd_list, &__tracy_d3d12_zone_ctx, &Cat(__tracy_gpu_d3d12_source_location,__LINE__), 1)
# define __prof_dx12_ctx(name) struct TracyCD3D12Ctx *name
# define __prof_dx12_ctx_alloc(ctx, device, queue, name, name_len) ctx = ___tracy_d3d12_context_announce(device, queue, name, name_len)
@ -133,19 +139,19 @@ Inline void __prof_dx12_zone_cleanup_func(TracyCD3D12ZoneCtx *ctx) { ___tracy_d3
# define __prof_dx12_ctx_release(ctx)
# define __prof_dx12_new_frame(ctx)
# define __prof_dx12_collect(ctx)
#endif /* ProfilingIsEnabled_GPU */
#endif /* ProfilingGpu */
#if ProfilingIsEnabled_CAPTURE_FRAME_IMAGE
#if ProfilingCaptureFrame
# define __profframeimage(image, width, height, offset, flipped) TracyCFrameImage((image), (width), (height), (offset), (flipped))
#else
# define __profframeimage(image, width, height, offset, flipped)
#endif /* ProfilingIsEnabled_CAPTURE_FRAME_IMAGE */
#endif /* ProfilingCaptureFrame */
#ifdef TRACY_FIBERS
/* Tracy fiber methods are wrapped in ForceNoInline because otherwise issues can arise
* accross fiber context boundaries during optimization */
ForceNoInline Inline void __prof_fiber_enter(char *fiber_name, i32 profiler_group) { TracyCFiberEnterWithHint(fiber_name, profiler_group); }
ForceNoInline Inline void __prof_fiber_leave(void) { TracyCFiberLeave; }
__attribute__((noinline)) inline void __prof_fiber_enter(char *fiber_name, int profiler_group) { TracyCFiberEnterWithHint(fiber_name, profiler_group); }
__attribute__((noinline)) inline void __prof_fiber_leave(void) { TracyCFiberLeave; }
#else
# define __prof_fiber_enter(fiber_name, profiler_group)
# define __prof_fiber_leave()