From b9bc22a0193436c0e91f6ae5d51c442eede2a20d Mon Sep 17 00:00:00 2001 From: jacob Date: Sun, 26 Jan 2025 21:29:03 -0600 Subject: [PATCH] profile locks --- profile.bat | 6 +- src/app.c | 2 +- src/common.h | 43 +++- src/game.c | 3 +- src/intrinsics.h | 2 +- src/math.h | 6 +- src/sys.h | 3 + src/sys_win32.c | 47 +++- .../tracy/client/TracyProfiler.cpp | 220 +++++++----------- src/third_party/tracy/common/TracyQueue.hpp | 4 +- src/third_party/tracy/tracy/TracyC.h | 42 ++++ src/user.c | 10 +- 12 files changed, 233 insertions(+), 155 deletions(-) diff --git a/profile.bat b/profile.bat index 21f44cb9..3241f074 100644 --- a/profile.bat +++ b/profile.bat @@ -3,9 +3,9 @@ :: `ping` is being used in place of `TIMEOUT` :: https://www.ibm.com/support/pages/timeout-command-run-batch-job-exits-immediately-and-returns-error-input-redirection-not-supported-exiting-process-immediately -taskkill /im tracy.exe /f 2> nul +taskkill /im tracy-profiler.exe /f 2> nul -start %UserProfile%\Home\apps\tracy\capture.exe -o .tracy -f +start tracy-capture.exe -o .tracy -f echo Launching app... build\clang-user-profiling\bin\powerplay.exe @@ -21,4 +21,4 @@ if NOT %errorlevel% == 0 ( ping -n 2 127.0.0.1 >NUL echo Launching tracy... -start "" "%UserProfile%\Home\apps\tracy\Tracy.exe" ".tracy" +start "" "tracy-profiler.exe" ".tracy" diff --git a/src/app.c b/src/app.c index 4f154dd8..c15e9651 100644 --- a/src/app.c +++ b/src/app.c @@ -129,6 +129,7 @@ void app_entry_point(void) G.exit_sf = sync_flag_alloc(); G.exit_callbacks_mutex = sys_mutex_alloc(); G.exit_callbacks_arena = arena_alloc(GIGABYTE(64)); + G.arena = arena_alloc(GIGABYTE(64)); u32 worker_count = 4; { @@ -147,7 +148,6 @@ void app_entry_point(void) #endif } - G.arena = arena_alloc(GIGABYTE(64)); G.write_path = initialize_write_directory(&G.arena, STR(WRITE_DIR)); /* Startup logging */ diff --git a/src/common.h b/src/common.h index 14f0636a..43dc4891 100644 --- a/src/common.h +++ b/src/common.h @@ -373,6 +373,8 @@ GLOBAL const f64 *_f64_nan = (f64 *)&_f64_nan_u64; typedef __uint128_t u128; INLINE b32 u128_eq(u128 a, u128 b) { return a == b; } +INLINE b32 u128_lt(u128 a, u128 b) { return a < b; } +INLINE b32 u128_gt(u128 a, u128 b) { return a > b; } INLINE u128 u128_xor_u8(u128 a, u8 b) { return a ^ b; } INLINE u128 u128_mul(u128 a, u128 b) { return a * b; } @@ -389,6 +391,8 @@ INLINE u128 u128_mul(u128 a, u128 b) { return a * b; } typedef struct { u64 hi; u64 lo; } u128; INLINE b32 u128_eq(u128 a, u128 b) { return a.hi == b.hi && a.lo == b.lo; } +INLINE b32 u128_lt(u128 a, u128 b) { return ((((a.hi > b.hi) - (a.hi < b.hi)) << 1) + ((a.lo > b.lo) - (a.lo < b.lo))) < 0; } +INLINE b32 u128_gt(u128 a, u128 b) { return ((((a.hi > b.hi) - (a.hi < b.hi)) << 1) + ((a.lo > b.lo) - (a.lo < b.lo))) > 0; } INLINE u128 u128_xor_u8(u128 a, u8 b) { return U128(a.hi, a.lo ^ b); } /* https://www.codeproject.com/Tips/784635/UInt-Bit-Operations */ @@ -685,13 +689,28 @@ INLINE f64 clamp_f64(f64 v, f64 min, f64 max) { return v < min ? min : v > max ? #endif INLINE void __prof_zone_cleanup_func(TracyCZoneCtx *__tracy_ctx) { TracyCZoneEnd(*__tracy_ctx); } -#define __profalloc(ptr, size) TracyCAlloc(ptr, size) -#define __proffree(ptr) TracyCFree(ptr) -#define __profmsg(txt, len, col) TracyCMessageC(txt, len, col); -#define __profframe(name) TracyCFrameMarkNamed(name) +#define __profalloc(ptr, size) TracyCAlloc((ptr), (size)) +#define __proffree(ptr) TracyCFree((ptr)) +#define __profmsg(txt, len, col) TracyCMessageC((txt), (len), (col)); +#define __profframe(name) TracyCFrameMarkNamed((name)) +#define __profthread(name) TracyCSetThreadName((name)) + +#define __proflock_ctx TracyCSharedLockCtx +#define __proflock_alloc(ctx) TracyCSharedLockAnnounce((ctx)) +#define __proflock_release(ctx) TracyCSharedLockTerminate((ctx)) +#define __proflock_before_exclusive_lock(ctx) TracyCSharedLockBeforeExclusiveLock((ctx)) +#define __proflock_after_exclusive_lock(ctx) TracyCSharedLockAfterExclusiveLock((ctx)) +#define __proflock_after_exclusive_unlock(ctx) TracyCSharedLockAfterExclusiveUnlock((ctx)) +#define __proflock_after_try_exclusive_lock(ctx, acquired) TracyCSharedLockAfterTryExclusiveLock((ctx), (acquired)) +#define __proflock_before_shared_lock(ctx) TracyCSharedLockBeforeSharedLock((ctx)) +#define __proflock_after_shared_lock(ctx) TracyCSharedLockAfterSharedLock((ctx)) +#define __proflock_after_shared_unlock(ctx) TracyCSharedLockAfterSharedUnlock((ctx)) +#define __proflock_after_try_shared_lock(ctx, acquired) TracyCSharedLockAfterTrySharedLock((ctx), (acquired)) +#define __proflock_mark(ctx) TracyCSharedLockMark((ctx)) +#define __proflock_custom_name(ctx, name, len) TracyCSharedLockCustomName((ctx), (name), (len)) #if PROFILING_CAPTURE_FRAME_IMAGE -# define __profframeimage(image, width, height, offset, flipped) TracyCFrameImage(image, width, height, offset, flipped); +# define __profframeimage(image, width, height, offset, flipped) TracyCFrameImage((image), (width), (height), (offset), (flipped)); #else # define __profframeimage(image, width, height, offset, flipped) #endif /* PROFILING_CAPTURE_FRAME_IMAGE */ @@ -706,7 +725,21 @@ INLINE void __prof_zone_cleanup_func(TracyCZoneCtx *__tracy_ctx) { TracyCZoneEnd #define __proffree(ptr) #define __profmsg(txt, len, col) #define __profframe(name) +#define __profthread(name) #define __profframeimage(image, width, height, offset, flipped) +#define __proflock_ctx +#define __proflock_alloc(ctx) +#define __proflock_release(ctx) +#define __proflock_before_exclusive_lock(ctx) +#define __proflock_after_exclusive_lock(ctx) +#define __proflock_after_exclusive_unlock(ctx) +#define __proflock_after_try_exclusive_lock(ctx, acquired) +#define __proflock_before_shared_lock(ctx) +#define __proflock_after_shared_lock(ctx) +#define __proflock_after_shared_unlock(ctx) +#define __proflock_after_try_shared_lock(ctx, acquired) +#define __proflock_mark(ctx) +#define __proflock_custom_name(ctx, name, len) #endif /* PROFILING */ diff --git a/src/game.c b/src/game.c index 65890a32..089d575f 100644 --- a/src/game.c +++ b/src/game.c @@ -302,7 +302,8 @@ INTERNAL void spawn_test_entities(void) e->layer = GAME_LAYER_RELATIVE_WEAPON; entity_enable_prop(e, ENTITY_PROP_WEAPON); - e->trigger_delay = 1.0f / 10.0f; + //e->trigger_delay = 1.0f / 10.0f; + e->trigger_delay = 1.0f / 100.0f; player_ent->equipped = e->handle; } diff --git a/src/intrinsics.h b/src/intrinsics.h index fd4c8c01..3dc45654 100644 --- a/src/intrinsics.h +++ b/src/intrinsics.h @@ -115,7 +115,7 @@ INLINE void ix_pause(void) _mm_pause(); } -INLINE i64 ix_clock(void) +INLINE u64 ix_clock(void) { return __rdtsc(); } diff --git a/src/math.h b/src/math.h index 4108dbaf..72d96da4 100644 --- a/src/math.h +++ b/src/math.h @@ -224,7 +224,7 @@ INLINE f32 math_ln(f32 x) LOCAL_PERSIST const f32 ln2_hi = 6.9313812256e-01f; LOCAL_PERSIST const f32 ln2_lo = 9.0580006145e-06f; - i32 x_int = *(u32 *)&x; + i32 x_int = *(i32 *)&x; i32 k = 0; if (x_int < 0x00800000) { @@ -238,7 +238,7 @@ INLINE f32 math_ln(f32 x) } k -= 25; x *= two_p25; - x_int = *(u32 *)&x; + x_int = *(i32 *)&x; } else if (x_int >= 0x7f800000) { return x + x; } @@ -300,7 +300,7 @@ INLINE f32 math_exp(f32 x) LOCAL_PERSIST const f32 two_m100 = 7.8886090522e-31f; u32 x_uint = *(u32 *)&x; - i32 x_sign_bit = (x_uint >> 31) & 1; + u32 x_sign_bit = (x_uint >> 31) & 1; x_uint &= 0x7fffffff; /* Filter out non-finite argument */ diff --git a/src/sys.h b/src/sys.h index d8abef1a..bc05f5fb 100644 --- a/src/sys.h +++ b/src/sys.h @@ -336,6 +336,9 @@ void sys_window_cursor_disable_clip(struct sys_window *sys_window); struct sys_mutex { u64 handle; +#if PROFILING + __proflock_ctx profiling_ctx; +#endif #if RTC u64 owner_tid; struct atomic_i64 count; diff --git a/src/sys_win32.c b/src/sys_win32.c index 9d3a11a6..ee58f879 100644 --- a/src/sys_win32.c +++ b/src/sys_win32.c @@ -679,6 +679,7 @@ INTERNAL void win32_update_window_from_system(struct win32_window *window); INTERNAL void win32_window_process_event(struct win32_window *window, struct sys_event event) { + __prof; struct sys_lock lock = sys_mutex_lock_e(&window->event_callbacks_mutex); for (u64 i = 0; i < window->event_callbacks_count; ++i) { window->event_callbacks[i](event); @@ -1356,10 +1357,12 @@ void sys_window_cursor_disable_clip(struct sys_window *sys_window) struct sys_mutex sys_mutex_alloc(void) { __prof; + struct sys_mutex mutex = ZI; + + __proflock_alloc(mutex.profiling_ctx); SRWLOCK srwlock = SRWLOCK_INIT; - struct sys_mutex mutex = { - .handle = *(u64 *)&srwlock - }; + mutex.handle = *(u64 *)&srwlock; + return mutex; } @@ -1367,6 +1370,7 @@ void sys_mutex_release(struct sys_mutex *mutex) { __prof; (UNUSED)mutex; + __proflock_release(mutex->profiling_ctx); /* Mutex should be unlocked */ ASSERT(atomic_i64_eval(&mutex->count) == 0); } @@ -1374,7 +1378,9 @@ void sys_mutex_release(struct sys_mutex *mutex) struct sys_lock sys_mutex_lock_e(struct sys_mutex *mutex) { __prof; + __proflock_before_exclusive_lock(mutex->profiling_ctx); AcquireSRWLockExclusive((SRWLOCK *)&mutex->handle); + __proflock_after_exclusive_lock(mutex->profiling_ctx); #if RTC mutex->owner_tid = (u64)GetCurrentThreadId(); atomic_i64_inc_eval(&mutex->count); @@ -1388,7 +1394,9 @@ struct sys_lock sys_mutex_lock_e(struct sys_mutex *mutex) struct sys_lock sys_mutex_lock_s(struct sys_mutex *mutex) { __prof; + __proflock_before_shared_lock(mutex->profiling_ctx); AcquireSRWLockShared((SRWLOCK *)&mutex->handle); + __proflock_after_shared_lock(mutex->profiling_ctx); #if RTC atomic_i64_inc_eval(&mutex->count); #endif @@ -1406,8 +1414,10 @@ void sys_mutex_unlock(struct sys_lock *lock) #endif if (lock->exclusive) { ReleaseSRWLockExclusive((SRWLOCK *)&lock->mutex->handle); + __proflock_after_exclusive_unlock(lock->mutex->profiling_ctx); } else { ReleaseSRWLockShared((SRWLOCK *)&lock->mutex->handle); + __proflock_after_shared_unlock(lock->mutex->profiling_ctx); } MEMZERO_STRUCT(lock); } @@ -1489,7 +1499,22 @@ void sys_condition_variable_wait(struct sys_condition_variable *cv, struct sys_l atomic_i64_dec_eval(&mutex->count); #endif struct win32_condition_variable *w32cv = (struct win32_condition_variable *)cv->handle; + + /* TODO: Correct profiling of internal condition variable sleep / wait mutex state */ + if (exclusive) { + __proflock_after_exclusive_unlock(mutex->profiling_ctx); + } else { + __proflock_after_shared_unlock(mutex->profiling_ctx); + } SleepConditionVariableSRW(&w32cv->condition_variable, (SRWLOCK *)&mutex->handle, INFINITE, exclusive ? 0 : CONDITION_VARIABLE_LOCKMODE_SHARED); + if (exclusive) { + __proflock_before_exclusive_lock(mutex->profiling_ctx); + __proflock_after_exclusive_lock(mutex->profiling_ctx); + } else { + __proflock_before_shared_lock(mutex->profiling_ctx); + __proflock_after_shared_lock(mutex->profiling_ctx); + } + #if RTC atomic_i64_inc_eval(&mutex->count); if (exclusive) { @@ -1513,7 +1538,22 @@ void sys_condition_variable_wait_time(struct sys_condition_variable *cv, struct #endif struct win32_condition_variable *w32cv = (struct win32_condition_variable *)cv->handle; u32 ms = (u32)math_round_to_int((f32)seconds * 1000.f); + + /* TODO: Correct profiling of internal condition variable sleep / wait mutex state */ + if (exclusive) { + __proflock_after_exclusive_unlock(mutex->profiling_ctx); + } else { + __proflock_after_shared_unlock(mutex->profiling_ctx); + } SleepConditionVariableSRW(&w32cv->condition_variable, (SRWLOCK *)&mutex->handle, ms, exclusive ? 0 : CONDITION_VARIABLE_LOCKMODE_SHARED); + if (exclusive) { + __proflock_before_exclusive_lock(mutex->profiling_ctx); + __proflock_after_exclusive_lock(mutex->profiling_ctx); + } else { + __proflock_before_shared_lock(mutex->profiling_ctx); + __proflock_after_shared_lock(mutex->profiling_ctx); + } + #if RTC atomic_i64_inc_eval(&mutex->count); if (exclusive) { @@ -1639,6 +1679,7 @@ INTERNAL void win32_thread_release_locked(struct sys_lock *lock, struct win32_th INTERNAL DWORD WINAPI win32_thread_proc(LPVOID vt) { struct win32_thread *t = (struct win32_thread *)vt; + __profthread(t->thread_name_cstr); /* Initialize COM */ CoInitializeEx(NULL, COINIT_APARTMENTTHREADED); diff --git a/src/third_party/tracy/client/TracyProfiler.cpp b/src/third_party/tracy/client/TracyProfiler.cpp index 6fe78680..2fa787a6 100644 --- a/src/third_party/tracy/client/TracyProfiler.cpp +++ b/src/third_party/tracy/client/TracyProfiler.cpp @@ -79,6 +79,7 @@ #include "TracyThread.hpp" #include "TracyArmCpuTable.hpp" #include "TracySysTrace.hpp" +#include "TracyLock.hpp" #include "../tracy/TracyC.h" #if defined TRACY_MANUAL_LIFETIME && !defined(TRACY_DELAYED_INIT) @@ -4792,171 +4793,128 @@ TRACY_API void ___tracy_emit_gpu_time_sync_serial( const struct ___tracy_gpu_tim struct __tracy_lockable_context_data { - uint32_t m_id; -#ifdef TRACY_ON_DEMAND - std::atomic m_lockCount; - std::atomic m_active; -#endif + tracy::LockableCtx ctx; }; TRACY_API struct __tracy_lockable_context_data* ___tracy_announce_lockable_ctx( const struct ___tracy_source_location_data* srcloc ) { - struct __tracy_lockable_context_data *lockdata = (__tracy_lockable_context_data*)tracy::tracy_malloc( sizeof( __tracy_lockable_context_data ) ); - lockdata->m_id =tracy:: GetLockCounter().fetch_add( 1, std::memory_order_relaxed ); -#ifdef TRACY_ON_DEMAND - new(&lockdata->m_lockCount) std::atomic( 0 ); - new(&lockdata->m_active) std::atomic( false ); -#endif - assert( lockdata->m_id != (std::numeric_limits::max)() ); + static_assert(sizeof(struct ___tracy_source_location_data) == sizeof(tracy::SourceLocationData)); // C srcloc struct must fit in cpp srcloc struct + struct __tracy_lockable_context_data *lockdata = (__tracy_lockable_context_data *)tracy::tracy_malloc(sizeof(__tracy_lockable_context_data)); - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockAnnounce ); - tracy::MemWrite( &item->lockAnnounce.id, lockdata->m_id ); - tracy::MemWrite( &item->lockAnnounce.time, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc ); - tracy::MemWrite( &item->lockAnnounce.type, tracy::LockType::Lockable ); -#ifdef TRACY_ON_DEMAND - tracy::GetProfiler().DeferItem( *item ); -#endif - tracy::Profiler::QueueSerialFinish(); + // Because Tracy deletes the "=" operator + tracy::LockableCtx ctx { (tracy::SourceLocationData *)srcloc }; + memcpy(&lockdata->ctx, &ctx, sizeof(ctx)); return lockdata; } TRACY_API void ___tracy_terminate_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) { - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockTerminate ); - tracy::MemWrite( &item->lockTerminate.id, lockdata->m_id ); - tracy::MemWrite( &item->lockTerminate.time, tracy::Profiler::GetTime() ); -#ifdef TRACY_ON_DEMAND - tracy::GetProfiler().DeferItem( *item ); -#endif - tracy::Profiler::QueueSerialFinish(); - -#ifdef TRACY_ON_DEMAND - lockdata->m_lockCount.~atomic(); - lockdata->m_active.~atomic(); -#endif + lockdata->ctx.~LockableCtx(); tracy::tracy_free((void*)lockdata); } TRACY_API int32_t ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) { -#ifdef TRACY_ON_DEMAND - bool queue = false; - const auto locks = lockdata->m_lockCount.fetch_add( 1, std::memory_order_relaxed ); - const auto active = lockdata->m_active.load( std::memory_order_relaxed ); - if( locks == 0 || active ) - { - const bool connected = tracy::GetProfiler().IsConnected(); - if( active != connected ) lockdata->m_active.store( connected, std::memory_order_relaxed ); - if( connected ) queue = true; - } - if( !queue ) return static_cast(false); -#endif - - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockWait ); - tracy::MemWrite( &item->lockWait.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->lockWait.id, lockdata->m_id ); - tracy::MemWrite( &item->lockWait.time, tracy::Profiler::GetTime() ); - tracy::Profiler::QueueSerialFinish(); - return static_cast(true); + return lockdata->ctx.BeforeLock(); } TRACY_API void ___tracy_after_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) { - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockObtain ); - tracy::MemWrite( &item->lockObtain.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->lockObtain.id, lockdata->m_id ); - tracy::MemWrite( &item->lockObtain.time, tracy::Profiler::GetTime() ); - tracy::Profiler::QueueSerialFinish(); + lockdata->ctx.AfterLock(); } TRACY_API void ___tracy_after_unlock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) { -#ifdef TRACY_ON_DEMAND - lockdata->m_lockCount.fetch_sub( 1, std::memory_order_relaxed ); - if( !lockdata->m_active.load( std::memory_order_relaxed ) ) return; - if( !tracy::GetProfiler().IsConnected() ) - { - lockdata->m_active.store( false, std::memory_order_relaxed ); - return; - } -#endif - - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockRelease ); - tracy::MemWrite( &item->lockRelease.id, lockdata->m_id ); - tracy::MemWrite( &item->lockRelease.time, tracy::Profiler::GetTime() ); - tracy::Profiler::QueueSerialFinish(); + lockdata->ctx.AfterUnlock(); } TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata, int32_t acquired ) { -#ifdef TRACY_ON_DEMAND - if( !acquired ) return; - - bool queue = false; - const auto locks = lockdata->m_lockCount.fetch_add( 1, std::memory_order_relaxed ); - const auto active = lockdata->m_active.load( std::memory_order_relaxed ); - if( locks == 0 || active ) - { - const bool connected = tracy::GetProfiler().IsConnected(); - if( active != connected ) lockdata->m_active.store( connected, std::memory_order_relaxed ); - if( connected ) queue = true; - } - if( !queue ) return; -#endif - - if( acquired ) - { - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockObtain ); - tracy::MemWrite( &item->lockObtain.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->lockObtain.id, lockdata->m_id ); - tracy::MemWrite( &item->lockObtain.time, tracy::Profiler::GetTime() ); - tracy::Profiler::QueueSerialFinish(); - } + lockdata->ctx.AfterTryLock(acquired); } -TRACY_API void ___tracy_mark_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const struct ___tracy_source_location_data* srcloc ) +TRACY_API void ___tracy_mark_lockable_ctx(struct __tracy_lockable_context_data *lockdata, const struct ___tracy_source_location_data *srcloc) { -#ifdef TRACY_ON_DEMAND - const auto active = lockdata->m_active.load( std::memory_order_relaxed ); - if( !active ) return; - const auto connected = tracy::GetProfiler().IsConnected(); - if( !connected ) - { - if( active ) lockdata->m_active.store( false, std::memory_order_relaxed ); - return; - } -#endif - - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockMark ); - tracy::MemWrite( &item->lockMark.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->lockMark.id, lockdata->m_id ); - tracy::MemWrite( &item->lockMark.srcloc, (uint64_t)srcloc ); - tracy::Profiler::QueueSerialFinish(); + lockdata->ctx.Mark((tracy::SourceLocationData *)srcloc); } -TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const char* name, size_t nameSz ) +TRACY_API void ___tracy_custom_name_lockable_ctx(struct __tracy_lockable_context_data *lockdata, const char *name, size_t nameSz) { - assert( nameSz < (std::numeric_limits::max)() ); - auto ptr = (char*)tracy::tracy_malloc( nameSz ); - memcpy( ptr, name, nameSz ); - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockName ); - tracy::MemWrite( &item->lockNameFat.id, lockdata->m_id ); - tracy::MemWrite( &item->lockNameFat.name, (uint64_t)ptr ); - tracy::MemWrite( &item->lockNameFat.size, (uint16_t)nameSz ); -#ifdef TRACY_ON_DEMAND - tracy::GetProfiler().DeferItem( *item ); -#endif - tracy::Profiler::QueueSerialFinish(); + lockdata->ctx.CustomName(name, nameSz); +} + +struct __tracy_shared_lockable_context_data +{ + tracy::SharedLockableCtx ctx; +}; + +TRACY_API struct __tracy_shared_lockable_context_data* ___tracy_announce_shared_lockable_ctx( const struct ___tracy_source_location_data* srcloc ) +{ + static_assert(sizeof(struct ___tracy_source_location_data) == sizeof(tracy::SourceLocationData)); // C srcloc struct must fit in cpp srcloc struct + struct __tracy_shared_lockable_context_data *lockdata = (__tracy_shared_lockable_context_data *)tracy::tracy_malloc(sizeof(__tracy_shared_lockable_context_data)); + + // Because Tracy deletes the "=" operator + tracy::SharedLockableCtx ctx { (tracy::SourceLocationData *)srcloc }; + memcpy(&lockdata->ctx, &ctx, sizeof(ctx)); + + return lockdata; +} + +TRACY_API void ___tracy_terminate_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata ) +{ + lockdata->ctx.~SharedLockableCtx(); + tracy::tracy_free((void*)lockdata); +} + +TRACY_API int32_t ___tracy_before_exclusive_lock_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata ) +{ + return lockdata->ctx.BeforeLock(); +} + +TRACY_API void ___tracy_after_exclusive_lock_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata ) +{ + lockdata->ctx.AfterLock(); +} + +TRACY_API void ___tracy_after_exclusive_unlock_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata ) +{ + lockdata->ctx.AfterUnlock(); +} + +TRACY_API void ___tracy_after_try_exclusive_lock_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata, int32_t acquired ) +{ + lockdata->ctx.AfterTryLock(acquired); +} + +TRACY_API int32_t ___tracy_before_shared_lock_shared_lockable_ctx(struct __tracy_shared_lockable_context_data *lockdata) +{ + return lockdata->ctx.BeforeLockShared(); +} + +TRACY_API void ___tracy_after_shared_lock_shared_lockable_ctx(struct __tracy_shared_lockable_context_data *lockdata) +{ + lockdata->ctx.AfterLockShared(); +} + +TRACY_API void ___tracy_after_try_shared_lock_shared_lockable_ctx(struct __tracy_shared_lockable_context_data *lockdata, int32_t acquired) +{ + lockdata->ctx.AfterTryLockShared(acquired); +} + +TRACY_API void ___tracy_after_shared_unlock_shared_lockable_ctx(struct __tracy_shared_lockable_context_data *lockdata) +{ + lockdata->ctx.AfterUnlockShared(); +} + +TRACY_API void ___tracy_mark_shared_lockable_ctx(struct __tracy_shared_lockable_context_data *lockdata, const struct ___tracy_source_location_data *srcloc) +{ + lockdata->ctx.Mark((tracy::SourceLocationData *)srcloc); +} + +TRACY_API void ___tracy_custom_name_shared_lockable_ctx(struct __tracy_shared_lockable_context_data *lockdata, const char *name, size_t nameSz) +{ + lockdata->ctx.CustomName(name, nameSz); } TRACY_API int32_t ___tracy_connected( void ) diff --git a/src/third_party/tracy/common/TracyQueue.hpp b/src/third_party/tracy/common/TracyQueue.hpp index c681698a..6a5b8727 100644 --- a/src/third_party/tracy/common/TracyQueue.hpp +++ b/src/third_party/tracy/common/TracyQueue.hpp @@ -331,7 +331,7 @@ struct QueuePlotDataInt : public QueuePlotDataBase int64_t val; }; -struct QueuePlotDataFloat : public QueuePlotDataBase +struct QueuePlotDataFloat : public QueuePlotDataBase { float val; }; @@ -466,7 +466,7 @@ struct QueueGpuTimeSync int64_t cpuTime; uint8_t context; }; - + struct QueueGpuContextName { uint8_t context; diff --git a/src/third_party/tracy/tracy/TracyC.h b/src/third_party/tracy/tracy/TracyC.h index 1b1373e0..028f8263 100644 --- a/src/third_party/tracy/tracy/TracyC.h +++ b/src/third_party/tracy/tracy/TracyC.h @@ -39,6 +39,7 @@ TRACY_API void ___tracy_set_thread_name( const char* name ); typedef const void* TracyCZoneCtx; typedef const void* TracyCLockCtx; +typedef const void* TracyCSharedLockCtx; #define TracyCZone(c,x) #define TracyCZoneN(c,x,y) @@ -111,6 +112,20 @@ typedef const void* TracyCLockCtx; #define TracyCLockMark(l) #define TracyCLockCustomName(l,x,y) +#define TracyCSharedLockCtx(l) +#define TracyCSharedLockAnnounce(l) +#define TracyCSharedLockTerminate(l) +#define TracyCSharedLockBeforeExclusiveLock(l) +#define TracyCSharedLockAfterExclusiveLock(l) +#define TracyCSharedLockAfterExclusiveUnl(l) +#define TracyCSharedLockAfterTryExclusiveLock(l,x) +#define TracyCSharedLockBeforeSharedLock(l) +#define TracyCSharedLockAfterSharedLock(l) +#define TracyCSharedLockAfterSharedUnl(l) +#define TracyCSharedLockAfterTrySharedLock(l,x) +#define TracyCSharedLockMark(l) +#define TracyCSharedLockCustomName(l,x,y) + #define TracyCIsConnected 0 #define TracyCIsStarted 0 @@ -200,6 +215,7 @@ struct __tracy_lockable_context_data; typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx; typedef struct __tracy_lockable_context_data* TracyCLockCtx; +typedef struct __tracy_lockable_context_data *TracyCSharedLockCtx; #ifdef TRACY_MANUAL_LIFETIME TRACY_API void ___tracy_startup_profiler(void); @@ -365,6 +381,32 @@ TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_contex #define TracyCLockMark( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; ___tracy_mark_lockable_ctx( lock, &TracyConcat(__tracy_source_location,TracyLine) ); #define TracyCLockCustomName( lock, name, nameSz ) ___tracy_custom_name_lockable_ctx( lock, name, nameSz ); +TRACY_API struct __tracy_shared_lockable_context_data *___tracy_announce_shared_lockable_ctx(const struct ___tracy_source_location_data *srcloc); +TRACY_API void ___tracy_terminate_shared_lockable_ctx(struct __tracy_shared_lockable_context_data *lockdata); +TRACY_API int32_t ___tracy_before_exclusive_lock_shared_lockable_ctx(struct __tracy_shared_lockable_context_data *lockdata); +TRACY_API void ___tracy_after_exclusive_lock_shared_lockable_ctx(struct __tracy_shared_lockable_context_data *lockdata); +TRACY_API void ___tracy_after_exclusive_unlock_shared_lockable_ctx(struct __tracy_shared_lockable_context_data *lockdata); +TRACY_API void ___tracy_after_try_exclusive_lock_shared_lockable_ctx(struct __tracy_shared_lockable_context_data *lockdata, int32_t acquired); +TRACY_API int32_t ___tracy_before_shared_lock_shared_lockable_ctx(struct __tracy_shared_lockable_context_data *lockdata); +TRACY_API void ___tracy_after_shared_lock_shared_lockable_ctx(struct __tracy_shared_lockable_context_data *lockdata); +TRACY_API void ___tracy_after_shared_unlock_shared_lockable_ctx(struct __tracy_shared_lockable_context_data *lockdata); +TRACY_API void ___tracy_after_try_shared_lock_shared_lockable_ctx(struct __tracy_shared_lockable_context_data *lockdata, int32_t acquired); +TRACY_API void ___tracy_mark_shared_lockable_ctx(struct __tracy_shared_lockable_context_data *lockdata, const struct ___tracy_source_location_data *srcloc); +TRACY_API void ___tracy_custom_name_shared_lockable_ctx(struct __tracy_shared_lockable_context_data *lockdata, const char *name, size_t nameSz); + +#define TracyCSharedLockAnnounce( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; lock = ___tracy_announce_shared_lockable_ctx( &TracyConcat(__tracy_source_location,TracyLine) ); +#define TracyCSharedLockTerminate( lock ) ___tracy_terminate_shared_lockable_ctx( lock ); +#define TracyCSharedLockBeforeExclusiveLock( lock ) ___tracy_before_exclusive_lock_shared_lockable_ctx( lock ); +#define TracyCSharedLockAfterExclusiveLock( lock ) ___tracy_after_exclusive_lock_shared_lockable_ctx( lock ); +#define TracyCSharedLockAfterExclusiveUnlock( lock ) ___tracy_after_exclusive_unlock_shared_lockable_ctx( lock ); +#define TracyCSharedLockAfterTryExclusiveLock( lock, acquired ) ___tracy_after_try_exclusive_lock_shared_lockable_ctx( lock, acquired ); +#define TracyCSharedLockBeforeSharedLock( lock ) ___tracy_before_shared_lock_shared_lockable_ctx( lock ); +#define TracyCSharedLockAfterSharedLock( lock ) ___tracy_after_shared_lock_shared_lockable_ctx( lock ); +#define TracyCSharedLockAfterSharedUnlock( lock ) ___tracy_after_shared_unlock_shared_lockable_ctx( lock ); +#define TracyCSharedLockAfterTrySharedLock( lock, acquired ) ___tracy_after_try_shared_lock_shared_lockable_ctx( lock, acquired ); +#define TracyCSharedLockMark( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; ___tracy_mark_shared_lockable_ctx( lock, &TracyConcat(__tracy_source_location,TracyLine) ); +#define TracyCSharedLockCustomName( lock, name, nameSz ) ___tracy_custom_name_shared_lockable_ctx( lock, name, nameSz ); + #define TracyCIsConnected ___tracy_connected() #ifdef TRACY_FIBERS diff --git a/src/user.c b/src/user.c index b1ba8afe..03387027 100644 --- a/src/user.c +++ b/src/user.c @@ -462,9 +462,9 @@ INTERNAL SORT_COMPARE_FUNC_DEF(entity_draw_order_cmp, arg_a, arg_b, udata) } if (res == 0) { /* Sort by sprite */ - u64 a_cmp = a->sprite.hash; - u64 b_cmp = b->sprite.hash; - res = (a_cmp < b_cmp) - (a_cmp > b_cmp); + u128 a_cmp = a->sprite.hash; + u128 b_cmp = b->sprite.hash; + res = u128_lt(a_cmp, b_cmp) - u128_gt(a_cmp, b_cmp); } if (res == 0) { /* Sort by activation */ @@ -733,7 +733,7 @@ INTERNAL void user_update(void) if (!entity_is_valid_and_active(ent)) continue; /* How much time between camera shakes */ - const f32 frequency = 0.01; + const f32 frequency = 0.01f; f32 shake = ent->shake; if (shake > 0) { u64 basis = hash_fnv64(HASH_FNV64_BASIS, BUFFER_FROM_STRUCT(&ent->handle)); @@ -960,7 +960,7 @@ INTERNAL void user_update(void) f32 opacity_b = clamp_f32(1.f - (v2_dot(vdc, vdb) / v2_len_sq(vdc)), 0, 1); - f32 thickness = 0.01; + f32 thickness = 0.01f; u32 color_start = RGBA_32_F(1, 0.5, 0, opacity_a); u32 color_end = RGBA_32_F(1, 0.8, 0.4, opacity_b);