From e259d5680d3654a1914bbf0eaff20c956be04691 Mon Sep 17 00:00:00 2001 From: jacob Date: Tue, 21 Jan 2025 21:10:15 -0600 Subject: [PATCH] update tracy --- src/third_party/tracy/TracyClient.cpp | 29 +- .../tracy/client/TracyCallstack.cpp | 283 ++++++- .../tracy/client/TracyCallstack.hpp | 31 +- src/third_party/tracy/client/TracyKCore.cpp | 121 +++ src/third_party/tracy/client/TracyKCore.hpp | 37 + .../tracy/client/TracyProfiler.cpp | 720 +++++++++++++++--- .../tracy/client/TracyProfiler.hpp | 307 +++++--- src/third_party/tracy/client/TracyScoped.hpp | 112 +-- .../tracy/client/TracySysPower.cpp | 2 +- .../tracy/client/TracySysTrace.cpp | 25 +- .../tracy/client/tracy_rpmalloc.cpp | 9 +- .../tracy/common/TracyProtocol.hpp | 4 +- src/third_party/tracy/common/TracyQueue.hpp | 27 +- src/third_party/tracy/common/TracySocket.cpp | 3 + src/third_party/tracy/common/TracySystem.cpp | 36 +- src/third_party/tracy/common/TracySystem.hpp | 13 +- src/third_party/tracy/common/TracyVersion.hpp | 4 +- src/third_party/tracy/common/tracy_lz4.cpp | 4 +- src/third_party/tracy/libbacktrace/dwarf.cpp | 106 ++- src/third_party/tracy/libbacktrace/elf.cpp | 272 +++++-- .../tracy/libbacktrace/fileline.cpp | 75 +- .../tracy/libbacktrace/internal.hpp | 43 +- src/third_party/tracy/libbacktrace/macho.cpp | 41 +- src/third_party/tracy/tracy/Tracy.hpp | 166 ++-- src/third_party/tracy/tracy/TracyC.h | 199 ++--- src/third_party/tracy/tracy/TracyD3D11.hpp | 6 +- src/third_party/tracy/tracy/TracyD3D12.hpp | 4 +- src/third_party/tracy/tracy/TracyLua.hpp | 23 +- src/third_party/tracy/tracy/TracyMetal.hmm | 644 ++++++++++++++++ src/third_party/tracy/tracy/TracyOpenCL.hpp | 8 +- src/third_party/tracy/tracy/TracyOpenGL.hpp | 6 +- src/third_party/tracy/tracy/TracyVulkan.hpp | 40 +- 32 files changed, 2664 insertions(+), 736 deletions(-) create mode 100644 src/third_party/tracy/client/TracyKCore.cpp create mode 100644 src/third_party/tracy/client/TracyKCore.hpp create mode 100644 src/third_party/tracy/tracy/TracyMetal.hmm diff --git a/src/third_party/tracy/TracyClient.cpp b/src/third_party/tracy/TracyClient.cpp index 26387b76..6224f48b 100644 --- a/src/third_party/tracy/TracyClient.cpp +++ b/src/third_party/tracy/TracyClient.cpp @@ -30,21 +30,24 @@ #include "client/TracyDxt1.cpp" #include "client/TracyAlloc.cpp" #include "client/TracyOverride.cpp" +#include "client/TracyKCore.cpp" -#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6 -# include "libbacktrace/alloc.cpp" -# include "libbacktrace/dwarf.cpp" -# include "libbacktrace/fileline.cpp" -# include "libbacktrace/mmapio.cpp" -# include "libbacktrace/posix.cpp" -# include "libbacktrace/sort.cpp" -# include "libbacktrace/state.cpp" -# if TRACY_HAS_CALLSTACK == 4 -# include "libbacktrace/macho.cpp" -# else -# include "libbacktrace/elf.cpp" +#if defined(TRACY_HAS_CALLSTACK) +# if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6 +# include "libbacktrace/alloc.cpp" +# include "libbacktrace/dwarf.cpp" +# include "libbacktrace/fileline.cpp" +# include "libbacktrace/mmapio.cpp" +# include "libbacktrace/posix.cpp" +# include "libbacktrace/sort.cpp" +# include "libbacktrace/state.cpp" +# if TRACY_HAS_CALLSTACK == 4 +# include "libbacktrace/macho.cpp" +# else +# include "libbacktrace/elf.cpp" +# endif +# include "common/TracyStackFrames.cpp" # endif -# include "common/TracyStackFrames.cpp" #endif #ifdef _MSC_VER diff --git a/src/third_party/tracy/client/TracyCallstack.cpp b/src/third_party/tracy/client/TracyCallstack.cpp index e346647e..946a1972 100644 --- a/src/third_party/tracy/client/TracyCallstack.cpp +++ b/src/third_party/tracy/client/TracyCallstack.cpp @@ -3,10 +3,12 @@ #include #include #include "TracyCallstack.hpp" +#include "TracyDebug.hpp" #include "TracyFastVector.hpp" #include "TracyStringHelpers.hpp" #include "../common/TracyAlloc.hpp" -#include "TracyDebug.hpp" +#include "../common/TracySystem.hpp" + #ifdef TRACY_HAS_CALLSTACK @@ -31,7 +33,6 @@ # include # include # include -# include "TracyFastVector.hpp" #elif TRACY_HAS_CALLSTACK == 5 # include # include @@ -66,7 +67,7 @@ extern "C" extern "C" const char* ___tracy_demangle( const char* mangled ); #ifndef TRACY_DEMANGLE -constexpr size_t ___tracy_demangle_buffer_len = 1024*1024; +constexpr size_t ___tracy_demangle_buffer_len = 1024*1024; char* ___tracy_demangle_buffer; void ___tracy_init_demangle_buffer() @@ -90,9 +91,164 @@ extern "C" const char* ___tracy_demangle( const char* mangled ) #endif #endif +#if TRACY_HAS_CALLSTACK == 3 +# define TRACY_USE_IMAGE_CACHE +# include +#endif + namespace tracy { +#ifdef TRACY_USE_IMAGE_CACHE +// when we have access to dl_iterate_phdr(), we can build a cache of address ranges to image paths +// so we can quickly determine which image an address falls into. +// We refresh this cache only when we hit an address that doesn't fall into any known range. +class ImageCache +{ +public: + struct ImageEntry + { + void* m_startAddress = nullptr; + void* m_endAddress = nullptr; + char* m_name = nullptr; + }; + + ImageCache() + : m_images( 512 ) + { + Refresh(); + } + + ~ImageCache() + { + Clear(); + } + + const ImageEntry* GetImageForAddress( void* address ) + { + const ImageEntry* entry = GetImageForAddressImpl( address ); + if( !entry ) + { + Refresh(); + return GetImageForAddressImpl( address ); + } + return entry; + } + +private: + tracy::FastVector m_images; + bool m_updated = false; + bool m_haveMainImageName = false; + + static int Callback( struct dl_phdr_info* info, size_t size, void* data ) + { + ImageCache* cache = reinterpret_cast( data ); + + const auto startAddress = reinterpret_cast( info->dlpi_addr ); + if( cache->Contains( startAddress ) ) return 0; + + const uint32_t headerCount = info->dlpi_phnum; + assert( headerCount > 0); + const auto endAddress = reinterpret_cast( info->dlpi_addr + + info->dlpi_phdr[info->dlpi_phnum - 1].p_vaddr + info->dlpi_phdr[info->dlpi_phnum - 1].p_memsz); + + ImageEntry* image = cache->m_images.push_next(); + image->m_startAddress = startAddress; + image->m_endAddress = endAddress; + + // the base executable name isn't provided when iterating with dl_iterate_phdr, + // we will have to patch the executable image name outside this callback + if( info->dlpi_name && info->dlpi_name[0] != '\0' ) + { + size_t sz = strlen( info->dlpi_name ) + 1; + image->m_name = (char*)tracy_malloc( sz ); + memcpy( image->m_name, info->dlpi_name, sz ); + } + else + { + image->m_name = nullptr; + } + + cache->m_updated = true; + + return 0; + } + + bool Contains( void* startAddress ) const + { + return std::any_of( m_images.begin(), m_images.end(), [startAddress]( const ImageEntry& entry ) { return startAddress == entry.m_startAddress; } ); + } + + void Refresh() + { + m_updated = false; + dl_iterate_phdr( Callback, this ); + + if( m_updated ) + { + std::sort( m_images.begin(), m_images.end(), + []( const ImageEntry& lhs, const ImageEntry& rhs ) { return lhs.m_startAddress > rhs.m_startAddress; } ); + + // patch the main executable image name here, as calling dl_* functions inside the dl_iterate_phdr callback might cause deadlocks + UpdateMainImageName(); + } + } + + void UpdateMainImageName() + { + if( m_haveMainImageName ) + { + return; + } + + for( ImageEntry& entry : m_images ) + { + if( entry.m_name == nullptr ) + { + Dl_info dlInfo; + if( dladdr( (void *)entry.m_startAddress, &dlInfo ) ) + { + if( dlInfo.dli_fname ) + { + size_t sz = strlen( dlInfo.dli_fname ) + 1; + entry.m_name = (char*)tracy_malloc( sz ); + memcpy( entry.m_name, dlInfo.dli_fname, sz ); + } + } + + // we only expect one entry to be null for the main executable entry + break; + } + } + + m_haveMainImageName = true; + } + + const ImageEntry* GetImageForAddressImpl( void* address ) const + { + auto it = std::lower_bound( m_images.begin(), m_images.end(), address, + []( const ImageEntry& lhs, const void* rhs ) { return lhs.m_startAddress > rhs; } ); + + if( it != m_images.end() && address < it->m_endAddress ) + { + return it; + } + return nullptr; + } + + void Clear() + { + for( ImageEntry& entry : m_images ) + { + tracy_free( entry.m_name ); + } + + m_images.clear(); + m_haveMainImageName = false; + } +}; +#endif //#ifdef TRACY_USE_IMAGE_CACHE + // when "TRACY_SYMBOL_OFFLINE_RESOLVE" is set, instead of fully resolving symbols at runtime, // simply resolve the offset and image name (which will be enough the resolving to be done offline) #ifdef TRACY_SYMBOL_OFFLINE_RESOLVE @@ -190,7 +346,7 @@ ModuleCache* LoadSymbolsForModuleAndCache( const char* imageName, uint32_t image cachedModule->start = baseOfDll; cachedModule->end = baseOfDll + dllSize; - // when doing offline symbol resolution, we must store the full path of the dll for the resolving to work + // when doing offline symbol resolution, we must store the full path of the dll for the resolving to work if( s_shouldResolveSymbolsOffline ) { cachedModule->name = (char*)tracy_malloc_fast(imageNameLength + 1); @@ -229,11 +385,11 @@ void InitCallstack() DBGHELP_LOCK; #endif - // use TRACY_NO_DBHELP_INIT_LOAD=1 to disable preloading of driver + // use TRACY_NO_DBGHELP_INIT_LOAD=1 to disable preloading of driver // and process module symbol loading at startup time - they will be loaded on demand later // Sometimes this process can take a very long time and prevent resolving callstack frames // symbols during that time. - const char* noInitLoadEnv = GetEnvVar( "TRACY_NO_DBHELP_INIT_LOAD" ); + const char* noInitLoadEnv = GetEnvVar( "TRACY_NO_DBGHELP_INIT_LOAD" ); const bool initTimeModuleLoad = !( noInitLoadEnv && noInitLoadEnv[0] == '1' ); if ( !initTimeModuleLoad ) { @@ -305,7 +461,6 @@ void InitCallstack() MODULEINFO info; if( GetModuleInformation( proc, mod[i], &info, sizeof( info ) ) != 0 ) { - const auto base = uint64_t( info.lpBaseOfDll ); char name[1024]; const auto nameLength = GetModuleFileNameA( mod[i], name, 1021 ); if( nameLength > 0 ) @@ -607,6 +762,9 @@ struct backtrace_state* cb_bts = nullptr; int cb_num; CallstackEntry cb_data[MaxCbTrace]; int cb_fixup; +#ifdef TRACY_USE_IMAGE_CACHE +static ImageCache* s_imageCache = nullptr; +#endif //#ifdef TRACY_USE_IMAGE_CACHE #ifdef TRACY_DEBUGINFOD debuginfod_client* s_debuginfod; @@ -619,13 +777,14 @@ struct DebugInfo int fd; }; -FastVector s_di_known( 16 ); +static FastVector* s_di_known; #endif #ifdef __linux struct KernelSymbol { uint64_t addr; + uint32_t size; const char* name; const char* mod; }; @@ -637,10 +796,11 @@ static void InitKernelSymbols() { FILE* f = fopen( "/proc/kallsyms", "rb" ); if( !f ) return; - tracy::FastVector tmpSym( 1024 ); + tracy::FastVector tmpSym( 512 * 1024 ); size_t linelen = 16 * 1024; // linelen must be big enough to prevent reallocs in getline() auto linebuf = (char*)tracy_malloc( linelen ); ssize_t sz; + size_t validCnt = 0; while( ( sz = getline( &linebuf, &linelen, f ) ) != -1 ) { auto ptr = linebuf; @@ -673,7 +833,7 @@ static void InitKernelSymbols() } if( addr == 0 ) continue; ptr++; - if( *ptr != 'T' && *ptr != 't' ) continue; + const bool valid = *ptr == 'T' || *ptr == 't'; ptr += 2; const auto namestart = ptr; while( *ptr != '\t' && *ptr != '\n' ) ptr++; @@ -688,20 +848,28 @@ static void InitKernelSymbols() modend = ptr; } - auto strname = (char*)tracy_malloc_fast( nameend - namestart + 1 ); - memcpy( strname, namestart, nameend - namestart ); - strname[nameend-namestart] = '\0'; - + char* strname = nullptr; char* strmod = nullptr; - if( modstart ) + + if( valid ) { - strmod = (char*)tracy_malloc_fast( modend - modstart + 1 ); - memcpy( strmod, modstart, modend - modstart ); - strmod[modend-modstart] = '\0'; + validCnt++; + + strname = (char*)tracy_malloc_fast( nameend - namestart + 1 ); + memcpy( strname, namestart, nameend - namestart ); + strname[nameend-namestart] = '\0'; + + if( modstart ) + { + strmod = (char*)tracy_malloc_fast( modend - modstart + 1 ); + memcpy( strmod, modstart, modend - modstart ); + strmod[modend-modstart] = '\0'; + } } auto sym = tmpSym.push_next(); sym->addr = addr; + sym->size = 0; sym->name = strname; sym->mod = strmod; } @@ -709,11 +877,22 @@ static void InitKernelSymbols() fclose( f ); if( tmpSym.empty() ) return; - std::sort( tmpSym.begin(), tmpSym.end(), []( const KernelSymbol& lhs, const KernelSymbol& rhs ) { return lhs.addr > rhs.addr; } ); - s_kernelSymCnt = tmpSym.size(); - s_kernelSym = (KernelSymbol*)tracy_malloc_fast( sizeof( KernelSymbol ) * s_kernelSymCnt ); - memcpy( s_kernelSym, tmpSym.data(), sizeof( KernelSymbol ) * s_kernelSymCnt ); - TracyDebug( "Loaded %zu kernel symbols\n", s_kernelSymCnt ); + std::sort( tmpSym.begin(), tmpSym.end(), []( const KernelSymbol& lhs, const KernelSymbol& rhs ) { return lhs.addr < rhs.addr; } ); + for( size_t i=0; i*)tracy_malloc( sizeof( FastVector ) ); + new (s_di_known) FastVector( 16 ); #endif } @@ -831,11 +1018,11 @@ DebugInfo* FindDebugInfo( FastVector& vec, const uint8_t* buildid_dat int GetDebugInfoDescriptor( const char* buildid_data, size_t buildid_size, const char* filename ) { auto buildid = (uint8_t*)buildid_data; - auto it = FindDebugInfo( s_di_known, buildid, buildid_size ); + auto it = FindDebugInfo( *s_di_known, buildid, buildid_size ); if( it ) return it->fd >= 0 ? dup( it->fd ) : -1; int fd = debuginfod_find_debuginfo( s_debuginfod, buildid, buildid_size, nullptr ); - it = s_di_known.push_next(); + it = s_di_known->push_next(); it->buildid_size = buildid_size; it->buildid = (uint8_t*)tracy_malloc( buildid_size ); memcpy( it->buildid, buildid, buildid_size ); @@ -850,7 +1037,7 @@ int GetDebugInfoDescriptor( const char* buildid_data, size_t buildid_size, const const uint8_t* GetBuildIdForImage( const char* image, size_t& size ) { assert( image ); - for( auto& v : s_di_known ) + for( auto& v : *s_di_known ) { if( strcmp( image, v.filename ) == 0 ) { @@ -869,11 +1056,21 @@ debuginfod_client* GetDebuginfodClient() void EndCallstack() { +#ifdef TRACY_USE_IMAGE_CACHE + if( s_imageCache ) + { + s_imageCache->~ImageCache(); + tracy_free( s_imageCache ); + } +#endif //#ifdef TRACY_USE_IMAGE_CACHE #ifndef TRACY_DEMANGLE ___tracy_free_demangle_buffer(); #endif #ifdef TRACY_DEBUGINFOD - ClearDebugInfoVector( s_di_known ); + ClearDebugInfoVector( *s_di_known ); + s_di_known->~FastVector(); + tracy_free( s_di_known ); + debuginfod_end( s_debuginfod ); #endif } @@ -1041,12 +1238,12 @@ void SymInfoError( void* /*data*/, const char* /*msg*/, int /*errnum*/ ) cb_data[cb_num-1].symAddr = 0; } -void GetSymbolForOfflineResolve(void* address, Dl_info& dlinfo, CallstackEntry& cbEntry) +void GetSymbolForOfflineResolve(void* address, uint64_t imageBaseAddress, CallstackEntry& cbEntry) { // tagged with a string that we can identify as an unresolved symbol cbEntry.name = CopyStringFast( "[unresolved]" ); // set .so relative offset so it can be resolved offline - cbEntry.symAddr = (uint64_t)address - (uint64_t)(dlinfo.dli_fbase); + cbEntry.symAddr = (uint64_t)address - imageBaseAddress; cbEntry.symLen = 0x0; cbEntry.file = CopyStringFast( "[unknown]" ); cbEntry.line = 0; @@ -1057,17 +1254,29 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr ) InitRpmalloc(); if( ptr >> 63 == 0 ) { - const char* symloc = nullptr; + const char* imageName = nullptr; + uint64_t imageBaseAddress = 0x0; + +#ifdef TRACY_USE_IMAGE_CACHE + const auto* image = s_imageCache->GetImageForAddress((void*)ptr); + if( image ) + { + imageName = image->m_name; + imageBaseAddress = uint64_t(image->m_startAddress); + } +#else Dl_info dlinfo; if( dladdr( (void*)ptr, &dlinfo ) ) { - symloc = dlinfo.dli_fname; + imageName = dlinfo.dli_fname; + imageBaseAddress = uint64_t( dlinfo.dli_fbase ); } +#endif if( s_shouldResolveSymbolsOffline ) { cb_num = 1; - GetSymbolForOfflineResolve( (void*)ptr, dlinfo, cb_data[0] ); + GetSymbolForOfflineResolve( (void*)ptr, imageBaseAddress, cb_data[0] ); } else { @@ -1078,18 +1287,18 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr ) backtrace_syminfo( cb_bts, ptr, SymInfoCallback, SymInfoError, nullptr ); } - return { cb_data, uint8_t( cb_num ), symloc ? symloc : "[unknown]" }; + return { cb_data, uint8_t( cb_num ), imageName ? imageName : "[unknown]" }; } #ifdef __linux else if( s_kernelSym ) { - auto it = std::lower_bound( s_kernelSym, s_kernelSym + s_kernelSymCnt, ptr, []( const KernelSymbol& lhs, const uint64_t& rhs ) { return lhs.addr > rhs; } ); + auto it = std::lower_bound( s_kernelSym, s_kernelSym + s_kernelSymCnt, ptr, []( const KernelSymbol& lhs, const uint64_t& rhs ) { return lhs.addr + lhs.size < rhs; } ); if( it != s_kernelSym + s_kernelSymCnt ) { cb_data[0].name = CopyStringFast( it->name ); cb_data[0].file = CopyStringFast( "" ); cb_data[0].line = 0; - cb_data[0].symLen = 0; + cb_data[0].symLen = it->size; cb_data[0].symAddr = it->addr; return { cb_data, 1, it->mod ? it->mod : "" }; } diff --git a/src/third_party/tracy/client/TracyCallstack.hpp b/src/third_party/tracy/client/TracyCallstack.hpp index 63e081f9..12bea967 100644 --- a/src/third_party/tracy/client/TracyCallstack.hpp +++ b/src/third_party/tracy/client/TracyCallstack.hpp @@ -5,10 +5,20 @@ #include "../common/TracyForceInline.hpp" #include "TracyCallstack.h" +#ifndef TRACY_HAS_CALLSTACK + +namespace tracy +{ +static constexpr bool has_callstack() { return false; } +static tracy_force_inline void* Callstack( int32_t /*depth*/ ) { return nullptr; } +} + +#else + #if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 5 # include #elif TRACY_HAS_CALLSTACK >= 3 -# ifdef TRACE_CLIENT_LIBUNWIND_BACKTRACE +# ifdef TRACY_LIBUNWIND_BACKTRACE // libunwind is, in general, significantly faster than execinfo based backtraces # define UNW_LOCAL_ONLY # include @@ -17,15 +27,6 @@ # endif #endif -#ifndef TRACY_HAS_CALLSTACK - -namespace tracy -{ -static tracy_force_inline void* Callstack( int depth ) { return nullptr; } -} - -#else - #ifdef TRACY_DEBUGINFOD # include #endif @@ -38,6 +39,8 @@ static tracy_force_inline void* Callstack( int depth ) { return nullptr; } namespace tracy { +static constexpr bool has_callstack() { return true; } + struct CallstackSymbolData { const char* file; @@ -83,7 +86,7 @@ extern "C" TRACY_API extern ___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChain; } -static tracy_force_inline void* Callstack( int depth ) +static tracy_force_inline void* Callstack( int32_t depth ) { assert( depth >= 1 && depth < 63 ); auto trace = (uintptr_t*)tracy_malloc( ( 1 + depth ) * sizeof( uintptr_t ) ); @@ -112,7 +115,7 @@ static _Unwind_Reason_Code tracy_unwind_callback( struct _Unwind_Context* ctx, v return _URC_NO_REASON; } -static tracy_force_inline void* Callstack( int depth ) +static tracy_force_inline void* Callstack( int32_t depth ) { assert( depth >= 1 && depth < 63 ); @@ -127,13 +130,13 @@ static tracy_force_inline void* Callstack( int depth ) #elif TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6 -static tracy_force_inline void* Callstack( int depth ) +static tracy_force_inline void* Callstack( int32_t depth ) { assert( depth >= 1 ); auto trace = (uintptr_t*)tracy_malloc( ( 1 + (size_t)depth ) * sizeof( uintptr_t ) ); -#ifdef TRACE_CLIENT_LIBUNWIND_BACKTRACE +#ifdef TRACY_LIBUNWIND_BACKTRACE size_t num = unw_backtrace( (void**)(trace+1), depth ); #else const auto num = (size_t)backtrace( (void**)(trace+1), depth ); diff --git a/src/third_party/tracy/client/TracyKCore.cpp b/src/third_party/tracy/client/TracyKCore.cpp new file mode 100644 index 00000000..09d51d11 --- /dev/null +++ b/src/third_party/tracy/client/TracyKCore.cpp @@ -0,0 +1,121 @@ +#ifdef __linux__ + +#include +#include +#include +#include +#include + +#include "TracyDebug.hpp" +#include "TracyKCore.hpp" +#include "../common/TracyAlloc.hpp" + +#if !defined(__GLIBC__) && !defined(__WORDSIZE) +// include __WORDSIZE headers for musl +# include +#endif + +namespace tracy +{ + +using elf_half = uint16_t; +using elf_word = uint32_t; +using elf_sword = int32_t; + +#if __WORDSIZE == 32 + using elf_addr = uint32_t; + using elf_off = uint32_t; + using elf_xword = uint32_t; +#else + using elf_addr = uint64_t; + using elf_off = uint64_t; + using elf_xword = uint64_t; +#endif + +struct elf_ehdr +{ + unsigned char e_ident[16]; + elf_half e_type; + elf_half e_machine; + elf_word e_version; + elf_addr e_entry; + elf_off e_phoff; + elf_off e_shoff; + elf_word e_flags; + elf_half e_ehsize; + elf_half e_phentsize; + elf_half e_phnum; + elf_half e_shentsize; + elf_half e_shnum; + elf_half e_shstrndx; +}; + +struct elf_phdr +{ + elf_word p_type; + elf_word p_flags; + elf_off p_offset; + elf_addr p_vaddr; + elf_addr p_paddr; + elf_xword p_filesz; + elf_xword p_memsz; + uint64_t p_align; // include 32-bit-only flags field for 32-bit compatibility +}; + +KCore::KCore() + : m_offsets( 16 ) +{ + m_fd = open( "/proc/kcore", O_RDONLY ); + if( m_fd == -1 ) return; + + elf_ehdr ehdr; + if( read( m_fd, &ehdr, sizeof( ehdr ) ) != sizeof( ehdr ) ) goto err; + + assert( ehdr.e_phentsize == sizeof( elf_phdr ) ); + + for( elf_half i=0; istart = phdr.p_vaddr; + ptr->size = phdr.p_memsz; + ptr->offset = phdr.p_offset; + } + + std::sort( m_offsets.begin(), m_offsets.end(), []( const Offset& lhs, const Offset& rhs ) { return lhs.start < rhs.start; } ); + TracyDebug( "KCore: %zu segments found\n", m_offsets.size() ); + return; + +err: + close( m_fd ); + m_fd = -1; +} + +KCore::~KCore() +{ + if( m_fd != -1 ) close( m_fd ); +} + +void* KCore::Retrieve( uint64_t addr, uint64_t size ) const +{ + if( m_fd == -1 ) return nullptr; + auto it = std::lower_bound( m_offsets.begin(), m_offsets.end(), addr, []( const Offset& lhs, uint64_t rhs ) { return lhs.start + lhs.size < rhs; } ); + if( it == m_offsets.end() ) return nullptr; + if( addr + size > it->start + it->size ) return nullptr; + if( lseek( m_fd, it->offset + addr - it->start, SEEK_SET ) == -1 ) return nullptr; + auto ptr = tracy_malloc( size ); + if( read( m_fd, ptr, size ) != ssize_t( size ) ) + { + tracy_free( ptr ); + return nullptr; + } + return ptr; +} + +} + +#endif \ No newline at end of file diff --git a/src/third_party/tracy/client/TracyKCore.hpp b/src/third_party/tracy/client/TracyKCore.hpp new file mode 100644 index 00000000..437e172c --- /dev/null +++ b/src/third_party/tracy/client/TracyKCore.hpp @@ -0,0 +1,37 @@ +#ifndef __TRACYKCORE_HPP__ +#define __TRACYKCORE_HPP__ + +#ifdef __linux__ + +#include + +#include "TracyFastVector.hpp" + +namespace tracy +{ + +class KCore +{ + struct Offset + { + uint64_t start; + uint64_t size; + uint64_t offset; + }; + +public: + KCore(); + ~KCore(); + + void* Retrieve( uint64_t addr, uint64_t size ) const; + +private: + int m_fd; + FastVector m_offsets; +}; + +} + +#endif + +#endif diff --git a/src/third_party/tracy/client/TracyProfiler.cpp b/src/third_party/tracy/client/TracyProfiler.cpp index 0f3d5810..6fe78680 100644 --- a/src/third_party/tracy/client/TracyProfiler.cpp +++ b/src/third_party/tracy/client/TracyProfiler.cpp @@ -81,6 +81,10 @@ #include "TracySysTrace.hpp" #include "../tracy/TracyC.h" +#if defined TRACY_MANUAL_LIFETIME && !defined(TRACY_DELAYED_INIT) +# error "TRACY_MANUAL_LIFETIME requires enabled TRACY_DELAYED_INIT" +#endif + #ifdef TRACY_PORT # ifndef TRACY_DATA_PORT # define TRACY_DATA_PORT TRACY_PORT @@ -106,9 +110,12 @@ # include extern "C" typedef LONG (WINAPI *t_RtlGetVersion)( PRTL_OSVERSIONINFOW ); extern "C" typedef BOOL (WINAPI *t_GetLogicalProcessorInformationEx)( LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD ); +extern "C" typedef char* (WINAPI *t_WineGetVersion)(); +extern "C" typedef char* (WINAPI *t_WineGetBuildId)(); #else # include # include +# include #endif #if defined __linux__ # include @@ -271,8 +278,19 @@ static bool EnsureReadable( uintptr_t address ) MappingInfo* mapping = LookUpMapping(address); return mapping && EnsureReadable( *mapping ); } - -#endif // defined __ANDROID__ +#elif defined WIN32 +static bool EnsureReadable( uintptr_t address ) +{ + MEMORY_BASIC_INFORMATION memInfo; + VirtualQuery( reinterpret_cast( address ), &memInfo, sizeof( memInfo ) ); + return memInfo.Protect != PAGE_NOACCESS; +} +#else +static bool EnsureReadable( uintptr_t address ) +{ + return true; +} +#endif #ifndef TRACY_DELAYED_INIT @@ -297,7 +315,7 @@ struct ThreadHandleWrapper static inline void CpuId( uint32_t* regs, uint32_t leaf ) { memset(regs, 0, sizeof(uint32_t) * 4); -#if defined _WIN32 +#if defined _MSC_VER __cpuidex( (int*)regs, leaf, 0 ); #else __get_cpuid( leaf, regs, regs+1, regs+2, regs+3 ); @@ -510,7 +528,16 @@ static const char* GetHostInfo() # ifdef __MINGW32__ ptr += sprintf( ptr, "OS: Windows %i.%i.%i (MingW)\n", (int)ver.dwMajorVersion, (int)ver.dwMinorVersion, (int)ver.dwBuildNumber ); # else - ptr += sprintf( ptr, "OS: Windows %i.%i.%i\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber ); + auto WineGetVersion = (t_WineGetVersion)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "wine_get_version" ); + auto WineGetBuildId = (t_WineGetBuildId)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "wine_get_build_id" ); + if( WineGetVersion && WineGetBuildId ) + { + ptr += sprintf( ptr, "OS: Windows %lu.%lu.%lu (Wine %s [%s])\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber, WineGetVersion(), WineGetBuildId() ); + } + else + { + ptr += sprintf( ptr, "OS: Windows %lu.%lu.%lu\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber ); + } # endif } #elif defined __linux__ @@ -1367,6 +1394,8 @@ TRACY_API LuaZoneState& GetLuaZoneState() { return s_luaZoneState; } TRACY_API bool ProfilerAvailable() { return s_instance != nullptr; } TRACY_API bool ProfilerAllocatorAvailable() { return !RpThreadShutdown; } +constexpr static size_t SafeSendBufferSize = 65536; + Profiler::Profiler() : m_timeBegin( 0 ) , m_mainThread( detail::GetThreadHandleImpl() ) @@ -1421,6 +1450,11 @@ Profiler::Profiler() CalibrateDelay(); ReportTopology(); +#ifdef __linux__ + m_kcore = (KCore*)tracy_malloc( sizeof( KCore ) ); + new(m_kcore) KCore(); +#endif + #ifndef TRACY_NO_EXIT const char* noExitEnv = GetEnvVar( "TRACY_NO_EXIT" ); if( noExitEnv && noExitEnv[0] == '1' ) @@ -1435,11 +1469,87 @@ Profiler::Profiler() m_userPort = atoi( userPort ); } + m_safeSendBuffer = (char*)tracy_malloc( SafeSendBufferSize ); + +#ifndef _WIN32 + pipe(m_pipe); +# if defined __APPLE__ || defined BSD + // FreeBSD/XNU don't have F_SETPIPE_SZ, so use the default + m_pipeBufSize = 16384; +# else + m_pipeBufSize = (int)(ptrdiff_t)SafeSendBufferSize; + while( fcntl( m_pipe[0], F_SETPIPE_SZ, m_pipeBufSize ) < 0 && errno == EPERM ) m_pipeBufSize /= 2; // too big; reduce + m_pipeBufSize = fcntl( m_pipe[0], F_GETPIPE_SZ ); +# endif + fcntl( m_pipe[1], F_SETFL, O_NONBLOCK ); +#endif + #if !defined(TRACY_DELAYED_INIT) || !defined(TRACY_MANUAL_LIFETIME) SpawnWorkerThreads(); #endif } +void Profiler::InstallCrashHandler() +{ + +#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER + struct sigaction threadFreezer = {}; + threadFreezer.sa_handler = ThreadFreezer; + sigaction( TRACY_CRASH_SIGNAL, &threadFreezer, &m_prevSignal.pwr ); + + struct sigaction crashHandler = {}; + crashHandler.sa_sigaction = CrashHandler; + crashHandler.sa_flags = SA_SIGINFO; + sigaction( SIGILL, &crashHandler, &m_prevSignal.ill ); + sigaction( SIGFPE, &crashHandler, &m_prevSignal.fpe ); + sigaction( SIGSEGV, &crashHandler, &m_prevSignal.segv ); + sigaction( SIGPIPE, &crashHandler, &m_prevSignal.pipe ); + sigaction( SIGBUS, &crashHandler, &m_prevSignal.bus ); + sigaction( SIGABRT, &crashHandler, &m_prevSignal.abrt ); +#endif + +#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER + // We cannot use Vectored Exception handling because it catches application-wide frame-based SEH blocks. We only + // want to catch unhandled exceptions. + m_prevHandler = SetUnhandledExceptionFilter( CrashFilter ); +#endif + +#ifndef TRACY_NO_CRASH_HANDLER + m_crashHandlerInstalled = true; +#endif + +} + +void Profiler::RemoveCrashHandler() +{ +#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER + if( m_crashHandlerInstalled ) + { + auto prev = SetUnhandledExceptionFilter( (LPTOP_LEVEL_EXCEPTION_FILTER)m_prevHandler ); + if( prev != CrashFilter ) SetUnhandledExceptionFilter( prev ); // A different exception filter was installed over ours => put it back + } +#endif + +#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER + if( m_crashHandlerInstalled ) + { + auto restore = []( int signum, struct sigaction* prev ) { + struct sigaction old; + sigaction( signum, prev, &old ); + if( old.sa_sigaction != CrashHandler ) sigaction( signum, &old, nullptr ); // A different signal handler was installed over ours => put it back + }; + restore( TRACY_CRASH_SIGNAL, &m_prevSignal.pwr ); + restore( SIGILL, &m_prevSignal.ill ); + restore( SIGFPE, &m_prevSignal.fpe ); + restore( SIGSEGV, &m_prevSignal.segv ); + restore( SIGPIPE, &m_prevSignal.pipe ); + restore( SIGBUS, &m_prevSignal.bus ); + restore( SIGABRT, &m_prevSignal.abrt ); + } +#endif + m_crashHandlerInstalled = false; +} + void Profiler::SpawnWorkerThreads() { #ifdef TRACY_HAS_SYSTEM_TRACING @@ -1477,27 +1587,6 @@ void Profiler::SpawnWorkerThreads() # ifdef TRACY_HAS_CALLSTACK s_symbolThreadId = GetThreadId( s_symbolThread->Handle() ); # endif - m_exceptionHandler = AddVectoredExceptionHandler( 1, CrashFilter ); -#endif - -#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER - struct sigaction threadFreezer = {}; - threadFreezer.sa_handler = ThreadFreezer; - sigaction( TRACY_CRASH_SIGNAL, &threadFreezer, &m_prevSignal.pwr ); - - struct sigaction crashHandler = {}; - crashHandler.sa_sigaction = CrashHandler; - crashHandler.sa_flags = SA_SIGINFO; - sigaction( SIGILL, &crashHandler, &m_prevSignal.ill ); - sigaction( SIGFPE, &crashHandler, &m_prevSignal.fpe ); - sigaction( SIGSEGV, &crashHandler, &m_prevSignal.segv ); - sigaction( SIGPIPE, &crashHandler, &m_prevSignal.pipe ); - sigaction( SIGBUS, &crashHandler, &m_prevSignal.bus ); - sigaction( SIGABRT, &crashHandler, &m_prevSignal.abrt ); -#endif - -#ifndef TRACY_NO_CRASH_HANDLER - m_crashHandlerInstalled = true; #endif #ifdef TRACY_HAS_CALLSTACK @@ -1511,22 +1600,7 @@ Profiler::~Profiler() { m_shutdown.store( true, std::memory_order_relaxed ); -#if defined _WIN32 && !defined TRACY_UWP - if( m_crashHandlerInstalled ) RemoveVectoredExceptionHandler( m_exceptionHandler ); -#endif - -#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER - if( m_crashHandlerInstalled ) - { - sigaction( TRACY_CRASH_SIGNAL, &m_prevSignal.pwr, nullptr ); - sigaction( SIGILL, &m_prevSignal.ill, nullptr ); - sigaction( SIGFPE, &m_prevSignal.fpe, nullptr ); - sigaction( SIGSEGV, &m_prevSignal.segv, nullptr ); - sigaction( SIGPIPE, &m_prevSignal.pipe, nullptr ); - sigaction( SIGBUS, &m_prevSignal.bus, nullptr ); - sigaction( SIGABRT, &m_prevSignal.abrt, nullptr ); - } -#endif + RemoveCrashHandler(); #ifdef TRACY_HAS_SYSTEM_TRACING if( s_sysTraceThread ) @@ -1554,6 +1628,17 @@ Profiler::~Profiler() EndCallstack(); #endif +#ifdef __linux__ + m_kcore->~KCore(); + tracy_free( m_kcore ); +#endif + +#ifndef _WIN32 + close( m_pipe[0] ); + close( m_pipe[1] ); +#endif + tracy_free( m_safeSendBuffer ); + tracy_free( m_lz4Buf ); tracy_free( m_buffer ); LZ4_freeStream( (LZ4_stream_t*)m_stream ); @@ -1731,10 +1816,12 @@ void Profiler::Worker() new(m_broadcast) UdpBroadcast(); # ifdef TRACY_ONLY_LOCALHOST const char* addr = "127.255.255.255"; +# elif defined TRACY_CLIENT_ADDRESS + const char* addr = TRACY_CLIENT_ADDRESS; # elif defined __QNX__ // global broadcast address of 255.255.255.255 is not well-supported by QNX, // use the interface broadcast address instead, e.g. "const char* addr = 192.168.1.255;" -# error Need to set an appropriate broadcast address for a QNX target. +# error Need to specify TRACY_CLIENT_ADDRESS for a QNX target. # else const char* addr = "255.255.255.255"; # endif @@ -1847,6 +1934,7 @@ void Profiler::Worker() m_connectionId.fetch_add( 1, std::memory_order_release ); #endif m_isConnected.store( true, std::memory_order_release ); + InstallCrashHandler(); HandshakeStatus handshake = HandshakeWelcome; m_sock->Send( &handshake, sizeof( handshake ) ); @@ -1949,6 +2037,8 @@ void Profiler::Worker() if( ShouldExit() ) break; m_isConnected.store( false, std::memory_order_release ); + RemoveCrashHandler(); + #ifdef TRACY_ON_DEMAND m_bufferOffset = 0; m_bufferStart = 0; @@ -2776,6 +2866,15 @@ Profiler::DequeueStatus Profiler::DequeueSerial() MemWrite( &item->memFree.time, dt ); break; } + case QueueType::MemDiscard: + case QueueType::MemDiscardCallstack: + { + int64_t t = MemRead( &item->memDiscard.time ); + int64_t dt = t - refSerial; + refSerial = t; + MemWrite( &item->memDiscard.time, dt ); + break; + } case QueueType::GpuZoneBeginSerial: case QueueType::GpuZoneBeginCallstackSerial: { @@ -3012,6 +3111,62 @@ bool Profiler::CommitData() return ret; } +char* Profiler::SafeCopyProlog( const char* data, size_t size ) +{ + bool success = true; + char* buf = m_safeSendBuffer; +#ifndef NDEBUG + assert( !m_inUse.exchange(true) ); +#endif + + if( size > SafeSendBufferSize ) buf = (char*)tracy_malloc( size ); + +#ifdef _WIN32 + __try + { + memcpy( buf, data, size ); + } + __except( 1 /*EXCEPTION_EXECUTE_HANDLER*/ ) + { + success = false; + } +#else + // Send through the pipe to ensure safe reads + for( size_t offset = 0; offset != size; /*in loop*/ ) + { + size_t sendsize = size - offset; + ssize_t result1, result2; + while( ( result1 = write( m_pipe[1], data + offset, sendsize ) ) < 0 && errno == EINTR ) { /* retry */ } + if( result1 < 0 ) + { + success = false; + break; + } + while( ( result2 = read( m_pipe[0], buf + offset, result1 ) ) < 0 && errno == EINTR ) { /* retry */ } + if( result2 != result1 ) + { + success = false; + break; + } + offset += result1; + } +#endif + + if( success ) return buf; + + SafeCopyEpilog( buf ); + return nullptr; +} + +void Profiler::SafeCopyEpilog( char* buf ) +{ + if( buf != m_safeSendBuffer ) tracy_free( buf ); + +#ifndef NDEBUG + m_inUse.store( false ); +#endif +} + bool Profiler::SendData( const char* data, size_t len ) { const lz4sz_t lz4sz = LZ4_compress_fast_continue( (LZ4_stream_t*)m_stream, data, m_lz4Buf + sizeof( lz4sz_t ), (int)len, LZ4Size, 1 ); @@ -3338,6 +3493,17 @@ void Profiler::HandleSymbolQueueItem( const SymbolQueueItem& si ) } } } +#elif defined __linux__ + void* data = m_kcore->Retrieve( si.ptr, si.extra ); + if( data ) + { + TracyLfqPrepare( QueueType::SymbolCodeMetadata ); + MemWrite( &item->symbolCodeMetadata.symbol, si.ptr ); + MemWrite( &item->symbolCodeMetadata.ptr, (uint64_t)data ); + MemWrite( &item->symbolCodeMetadata.size, (uint32_t)si.extra ); + TracyLfqCommit; + break; + } #endif TracyLfqPrepare( QueueType::AckSymbolCodeNotAvailable ); TracyLfqCommit; @@ -3423,7 +3589,22 @@ bool Profiler::HandleServerQuery() } else { - SendString( ptr, GetThreadName( ptr ), QueueType::ThreadName ); + auto t = GetThreadNameData( (uint32_t)ptr ); + if( t ) + { + SendString( ptr, t->name, QueueType::ThreadName ); + if( t->groupHint != 0 ) + { + TracyLfqPrepare( QueueType::ThreadGroupHint ); + MemWrite( &item->threadGroupHint.thread, (uint32_t)ptr ); + MemWrite( &item->threadGroupHint.groupHint, t->groupHint ); + TracyLfqCommit; + } + } + else + { + SendString( ptr, GetThreadName( (uint32_t)ptr ), QueueType::ThreadName ); + } } break; case ServerQuerySourceLocation: @@ -3661,6 +3842,7 @@ void Profiler::ReportTopology() struct CpuData { uint32_t package; + uint32_t die; uint32_t core; uint32_t thread; }; @@ -3673,23 +3855,55 @@ void Profiler::ReportTopology() # endif if( !_GetLogicalProcessorInformationEx ) return; + SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* packageInfo = nullptr; + SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* dieInfo = nullptr; + SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* coreInfo = nullptr; + DWORD psz = 0; _GetLogicalProcessorInformationEx( RelationProcessorPackage, nullptr, &psz ); - auto packageInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( psz ); - auto res = _GetLogicalProcessorInformationEx( RelationProcessorPackage, packageInfo, &psz ); - assert( res ); + if( GetLastError() == ERROR_INSUFFICIENT_BUFFER ) + { + packageInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( psz ); + auto res = _GetLogicalProcessorInformationEx( RelationProcessorPackage, packageInfo, &psz ); + assert( res ); + } + else + { + psz = 0; + } + + DWORD dsz = 0; + _GetLogicalProcessorInformationEx( RelationProcessorDie, nullptr, &dsz ); + if( GetLastError() == ERROR_INSUFFICIENT_BUFFER ) + { + dieInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( dsz ); + auto res = _GetLogicalProcessorInformationEx( RelationProcessorDie, dieInfo, &dsz ); + assert( res ); + } + else + { + dsz = 0; + } DWORD csz = 0; _GetLogicalProcessorInformationEx( RelationProcessorCore, nullptr, &csz ); - auto coreInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( csz ); - res = _GetLogicalProcessorInformationEx( RelationProcessorCore, coreInfo, &csz ); - assert( res ); + if( GetLastError() == ERROR_INSUFFICIENT_BUFFER ) + { + coreInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( csz ); + auto res = _GetLogicalProcessorInformationEx( RelationProcessorCore, coreInfo, &csz ); + assert( res ); + } + else + { + csz = 0; + } SYSTEM_INFO sysinfo; GetSystemInfo( &sysinfo ); const uint32_t numcpus = sysinfo.dwNumberOfProcessors; auto cpuData = (CpuData*)tracy_malloc( sizeof( CpuData ) * numcpus ); + memset( cpuData, 0, sizeof( CpuData ) * numcpus ); for( uint32_t i=0; iRelationship == RelationProcessorDie ); + // FIXME account for GroupCount + auto mask = ptr->Processor.GroupMask[0].Mask; + int core = 0; + while( mask != 0 ) + { + if( mask & 1 ) cpuData[core].die = idx; + core++; + mask >>= 1; + } + ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size); + idx++; + } + idx = 0; ptr = coreInfo; while( (char*)ptr < ((char*)coreInfo) + csz ) @@ -3734,6 +3966,7 @@ void Profiler::ReportTopology() TracyLfqPrepare( QueueType::CpuTopology ); MemWrite( &item->cpuTopology.package, data.package ); + MemWrite( &item->cpuTopology.die, data.die ); MemWrite( &item->cpuTopology.core, data.core ); MemWrite( &item->cpuTopology.thread, data.thread ); @@ -3769,12 +4002,26 @@ void Profiler::ReportTopology() fclose( f ); cpuData[i].package = uint32_t( atoi( buf ) ); cpuData[i].thread = i; + sprintf( path, "%s%i/topology/core_id", basePath, i ); f = fopen( path, "rb" ); - read = fread( buf, 1, 1024, f ); - buf[read] = '\0'; - fclose( f ); - cpuData[i].core = uint32_t( atoi( buf ) ); + if( f ) + { + read = fread( buf, 1, 1024, f ); + buf[read] = '\0'; + fclose( f ); + cpuData[i].core = uint32_t( atoi( buf ) ); + } + + sprintf( path, "%s%i/topology/die_id", basePath, i ); + f = fopen( path, "rb" ); + if( f ) + { + read = fread( buf, 1, 1024, f ); + buf[read] = '\0'; + fclose( f ); + cpuData[i].die = uint32_t( atoi( buf ) ); + } } for( int i=0; icpuTopology.package, data.package ); + MemWrite( &item->cpuTopology.die, data.die ); MemWrite( &item->cpuTopology.core, data.core ); MemWrite( &item->cpuTopology.thread, data.thread ); @@ -3798,7 +4046,7 @@ void Profiler::ReportTopology() #endif } -void Profiler::SendCallstack( int depth, const char* skipBefore ) +void Profiler::SendCallstack( int32_t depth, const char* skipBefore ) { #ifdef TRACY_HAS_CALLSTACK auto ptr = Callstack( depth ); @@ -3873,44 +4121,41 @@ void Profiler::HandleSymbolCodeQuery( uint64_t symbol, uint32_t size ) } else { -#ifdef __ANDROID__ - // On Android it's common for code to be in mappings that are only executable - // but not readable. - if( !EnsureReadable( symbol ) ) - { - AckSymbolCodeNotAvailable(); - return; - } -#endif - SendLongString( symbol, (const char*)symbol, size, QueueType::SymbolCode ); + auto&& lambda = [ this, symbol ]( const char* buf, size_t size ) { + SendLongString( symbol, buf, size, QueueType::SymbolCode ); + }; + + // 'symbol' may have come from a module that has since unloaded, perform a safe copy before sending + if( !WithSafeCopy( (const char*)symbol, size, lambda ) ) AckSymbolCodeNotAvailable(); } } void Profiler::HandleSourceCodeQuery( char* data, char* image, uint32_t id ) { bool ok = false; - struct stat st; - if( stat( data, &st ) == 0 && (uint64_t)st.st_mtime < m_exectime ) + FILE* f = fopen( data, "rb" ); + if( f ) { - if( st.st_size < ( TargetFrameSize - 16 ) ) + struct stat st; + if( fstat( fileno( f ), &st ) == 0 && (uint64_t)st.st_mtime < m_exectime && st.st_size < ( TargetFrameSize - 16 ) ) { - FILE* f = fopen( data, "rb" ); - if( f ) + auto ptr = (char*)tracy_malloc_fast( st.st_size ); + auto rd = fread( ptr, 1, st.st_size, f ); + if( rd == (size_t)st.st_size ) { - auto ptr = (char*)tracy_malloc_fast( st.st_size ); - auto rd = fread( ptr, 1, st.st_size, f ); - fclose( f ); - if( rd == (size_t)st.st_size ) - { - TracyLfqPrepare( QueueType::SourceCodeMetadata ); - MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr ); - MemWrite( &item->sourceCodeMetadata.size, (uint32_t)rd ); - MemWrite( &item->sourceCodeMetadata.id, id ); - TracyLfqCommit; - ok = true; - } + TracyLfqPrepare( QueueType::SourceCodeMetadata ); + MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr ); + MemWrite( &item->sourceCodeMetadata.size, (uint32_t)rd ); + MemWrite( &item->sourceCodeMetadata.id, id ); + TracyLfqCommit; + ok = true; + } + else + { + tracy_free_fast( ptr ); } } + fclose( f ); } #ifdef TRACY_DEBUGINFOD @@ -3940,6 +4185,10 @@ void Profiler::HandleSourceCodeQuery( char* data, char* image, uint32_t id ) TracyLfqCommit; ok = true; } + else + { + tracy_free_fast( ptr ); + } } close( d ); } @@ -3966,6 +4215,10 @@ void Profiler::HandleSourceCodeQuery( char* data, char* image, uint32_t id ) TracyLfqCommit; ok = true; } + else + { + tracy_free_fast( ptr ); + } } } @@ -3995,7 +4248,7 @@ int64_t Profiler::GetTimeQpc() extern "C" { #endif -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active ) +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int32_t active ) { ___tracy_c_zone_context ctx; #ifdef TRACY_ON_DEMAND @@ -4023,7 +4276,7 @@ TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_l return ctx; } -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active ) +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int32_t depth, int32_t active ) { ___tracy_c_zone_context ctx; #ifdef TRACY_ON_DEMAND @@ -4042,17 +4295,21 @@ TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___trac TracyQueueCommitC( zoneValidationThread ); } #endif - tracy::GetProfiler().SendCallstack( depth ); + auto zoneQueue = tracy::QueueType::ZoneBegin; + if( depth > 0 && tracy::has_callstack() ) { - TracyQueuePrepareC( tracy::QueueType::ZoneBeginCallstack ); - tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); - TracyQueueCommitC( zoneBeginThread ); + tracy::GetProfiler().SendCallstack( depth ); + zoneQueue = tracy::QueueType::ZoneBeginCallstack; } + TracyQueuePrepareC( zoneQueue ); + tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); + TracyQueueCommitC( zoneBeginThread ); + return ctx; } -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int active ) +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int32_t active ) { ___tracy_c_zone_context ctx; #ifdef TRACY_ON_DEMAND @@ -4084,7 +4341,7 @@ TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int act return ctx; } -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int depth, int active ) +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int32_t depth, int32_t active ) { ___tracy_c_zone_context ctx; #ifdef TRACY_ON_DEMAND @@ -4107,13 +4364,17 @@ TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srclo TracyQueueCommitC( zoneValidationThread ); } #endif - tracy::GetProfiler().SendCallstack( depth ); + auto zoneQueue = tracy::QueueType::ZoneBeginAllocSrcLoc; + if( depth > 0 && tracy::has_callstack() ) { - TracyQueuePrepareC( tracy::QueueType::ZoneBeginAllocSrcLocCallstack ); - tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->zoneBegin.srcloc, srcloc ); - TracyQueueCommitC( zoneBeginThread ); + tracy::GetProfiler().SendCallstack( depth ); + zoneQueue = tracy::QueueType::ZoneBeginAllocSrcLocCallstack; } + TracyQueuePrepareC( zoneQueue ); + tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->zoneBegin.srcloc, srcloc ); + TracyQueueCommitC( zoneBeginThread ); + return ctx; } @@ -4211,34 +4472,86 @@ TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value ) } } -TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure ) { tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); } -TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure ) { tracy::Profiler::MemAllocCallstack( ptr, size, depth, secure != 0 ); } -TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure ) { tracy::Profiler::MemFree( ptr, secure != 0 ); } -TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure ) { tracy::Profiler::MemFreeCallstack( ptr, depth, secure != 0 ); } -TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int secure, const char* name ) { tracy::Profiler::MemAllocNamed( ptr, size, secure != 0, name ); } -TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int depth, int secure, const char* name ) { tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, secure != 0, name ); } -TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int secure, const char* name ) { tracy::Profiler::MemFreeNamed( ptr, secure != 0, name ); } -TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int depth, int secure, const char* name ) { tracy::Profiler::MemFreeCallstackNamed( ptr, depth, secure != 0, name ); } +TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int32_t secure ) { tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); } +TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int32_t depth, int32_t secure ) +{ + if( depth > 0 && tracy::has_callstack() ) + { + tracy::Profiler::MemAllocCallstack( ptr, size, depth, secure != 0 ); + } + else + { + tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); + } +} +TRACY_API void ___tracy_emit_memory_free( const void* ptr, int32_t secure ) { tracy::Profiler::MemFree( ptr, secure != 0 ); } +TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int32_t depth, int32_t secure ) +{ + if( depth > 0 && tracy::has_callstack() ) + { + tracy::Profiler::MemFreeCallstack( ptr, depth, secure != 0 ); + } + else + { + tracy::Profiler::MemFree( ptr, secure != 0 ); + } +} +TRACY_API void ___tracy_emit_memory_discard( const char* name, int32_t secure ) { tracy::Profiler::MemDiscard( name, secure != 0 ); } +TRACY_API void ___tracy_emit_memory_discard_callstack( const char* name, int32_t secure, int32_t depth ) +{ + if( depth > 0 && tracy::has_callstack() ) + { + tracy::Profiler::MemDiscardCallstack( name, secure != 0, depth ); + } + else + { + tracy::Profiler::MemDiscard( name, secure != 0 ); + } +} +TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int32_t secure, const char* name ) { tracy::Profiler::MemAllocNamed( ptr, size, secure != 0, name ); } +TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int32_t depth, int32_t secure, const char* name ) +{ + if( depth > 0 && tracy::has_callstack() ) + { + tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, secure != 0, name ); + } + else + { + tracy::Profiler::MemAllocNamed( ptr, size, secure != 0, name ); + } +} +TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int32_t secure, const char* name ) { tracy::Profiler::MemFreeNamed( ptr, secure != 0, name ); } +TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int32_t depth, int32_t secure, const char* name ) +{ + if( depth > 0 && tracy::has_callstack() ) + { + tracy::Profiler::MemFreeCallstackNamed( ptr, depth, secure != 0, name ); + } + else + { + tracy::Profiler::MemFreeNamed( ptr, secure != 0, name ); + } +} TRACY_API void ___tracy_emit_frame_mark( const char* name ) { tracy::Profiler::SendFrameMark( name ); } TRACY_API void ___tracy_emit_frame_mark_start( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart ); } TRACY_API void ___tracy_emit_frame_mark_end( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd ); } -TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int flip ) { tracy::Profiler::SendFrameImage( image, w, h, offset, flip ); } +TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int32_t flip ) { tracy::Profiler::SendFrameImage( image, w, h, offset, flip != 0 ); } TRACY_API void ___tracy_emit_plot( const char* name, double val ) { tracy::Profiler::PlotData( name, val ); } TRACY_API void ___tracy_emit_plot_float( const char* name, float val ) { tracy::Profiler::PlotData( name, val ); } TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val ) { tracy::Profiler::PlotData( name, val ); } -TRACY_API void ___tracy_emit_plot_config( const char* name, int type, int step, int fill, uint32_t color ) { tracy::Profiler::ConfigurePlot( name, tracy::PlotFormatType(type), step, fill, color ); } -TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack ) { tracy::Profiler::Message( txt, size, callstack ); } -TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack ) { tracy::Profiler::Message( txt, callstack ); } -TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, size, color, callstack ); } -TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, color, callstack ); } +TRACY_API void ___tracy_emit_plot_config( const char* name, int32_t type, int32_t step, int32_t fill, uint32_t color ) { tracy::Profiler::ConfigurePlot( name, tracy::PlotFormatType(type), step != 0, fill != 0, color ); } +TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int32_t callstack_depth ) { tracy::Profiler::Message( txt, size, callstack_depth ); } +TRACY_API void ___tracy_emit_messageL( const char* txt, int32_t callstack_depth ) { tracy::Profiler::Message( txt, callstack_depth ); } +TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int32_t callstack_depth ) { tracy::Profiler::MessageColor( txt, size, color, callstack_depth ); } +TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int32_t callstack_depth ) { tracy::Profiler::MessageColor( txt, color, callstack_depth ); } TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ) { tracy::Profiler::MessageAppInfo( txt, size ); } -TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz ) { - return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz ); +TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, uint32_t color ) { + return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, color ); } -TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz ) { - return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); +TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color ) { + return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color ); } TRACY_API void ___tracy_emit_gpu_zone_begin( const struct ___tracy_gpu_zone_begin_data data ) @@ -4477,17 +4790,186 @@ TRACY_API void ___tracy_emit_gpu_time_sync_serial( const struct ___tracy_gpu_tim tracy::Profiler::QueueSerialFinish(); } -TRACY_API int ___tracy_connected( void ) +struct __tracy_lockable_context_data { - return tracy::GetProfiler().IsConnected(); + uint32_t m_id; +#ifdef TRACY_ON_DEMAND + std::atomic m_lockCount; + std::atomic m_active; +#endif +}; + +TRACY_API struct __tracy_lockable_context_data* ___tracy_announce_lockable_ctx( const struct ___tracy_source_location_data* srcloc ) +{ + struct __tracy_lockable_context_data *lockdata = (__tracy_lockable_context_data*)tracy::tracy_malloc( sizeof( __tracy_lockable_context_data ) ); + lockdata->m_id =tracy:: GetLockCounter().fetch_add( 1, std::memory_order_relaxed ); +#ifdef TRACY_ON_DEMAND + new(&lockdata->m_lockCount) std::atomic( 0 ); + new(&lockdata->m_active) std::atomic( false ); +#endif + assert( lockdata->m_id != (std::numeric_limits::max)() ); + + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockAnnounce ); + tracy::MemWrite( &item->lockAnnounce.id, lockdata->m_id ); + tracy::MemWrite( &item->lockAnnounce.time, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc ); + tracy::MemWrite( &item->lockAnnounce.type, tracy::LockType::Lockable ); +#ifdef TRACY_ON_DEMAND + tracy::GetProfiler().DeferItem( *item ); +#endif + tracy::Profiler::QueueSerialFinish(); + + return lockdata; +} + +TRACY_API void ___tracy_terminate_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) +{ + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockTerminate ); + tracy::MemWrite( &item->lockTerminate.id, lockdata->m_id ); + tracy::MemWrite( &item->lockTerminate.time, tracy::Profiler::GetTime() ); +#ifdef TRACY_ON_DEMAND + tracy::GetProfiler().DeferItem( *item ); +#endif + tracy::Profiler::QueueSerialFinish(); + +#ifdef TRACY_ON_DEMAND + lockdata->m_lockCount.~atomic(); + lockdata->m_active.~atomic(); +#endif + tracy::tracy_free((void*)lockdata); +} + +TRACY_API int32_t ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) +{ +#ifdef TRACY_ON_DEMAND + bool queue = false; + const auto locks = lockdata->m_lockCount.fetch_add( 1, std::memory_order_relaxed ); + const auto active = lockdata->m_active.load( std::memory_order_relaxed ); + if( locks == 0 || active ) + { + const bool connected = tracy::GetProfiler().IsConnected(); + if( active != connected ) lockdata->m_active.store( connected, std::memory_order_relaxed ); + if( connected ) queue = true; + } + if( !queue ) return static_cast(false); +#endif + + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockWait ); + tracy::MemWrite( &item->lockWait.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->lockWait.id, lockdata->m_id ); + tracy::MemWrite( &item->lockWait.time, tracy::Profiler::GetTime() ); + tracy::Profiler::QueueSerialFinish(); + return static_cast(true); +} + +TRACY_API void ___tracy_after_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) +{ + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockObtain ); + tracy::MemWrite( &item->lockObtain.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->lockObtain.id, lockdata->m_id ); + tracy::MemWrite( &item->lockObtain.time, tracy::Profiler::GetTime() ); + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API void ___tracy_after_unlock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) +{ +#ifdef TRACY_ON_DEMAND + lockdata->m_lockCount.fetch_sub( 1, std::memory_order_relaxed ); + if( !lockdata->m_active.load( std::memory_order_relaxed ) ) return; + if( !tracy::GetProfiler().IsConnected() ) + { + lockdata->m_active.store( false, std::memory_order_relaxed ); + return; + } +#endif + + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockRelease ); + tracy::MemWrite( &item->lockRelease.id, lockdata->m_id ); + tracy::MemWrite( &item->lockRelease.time, tracy::Profiler::GetTime() ); + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata, int32_t acquired ) +{ +#ifdef TRACY_ON_DEMAND + if( !acquired ) return; + + bool queue = false; + const auto locks = lockdata->m_lockCount.fetch_add( 1, std::memory_order_relaxed ); + const auto active = lockdata->m_active.load( std::memory_order_relaxed ); + if( locks == 0 || active ) + { + const bool connected = tracy::GetProfiler().IsConnected(); + if( active != connected ) lockdata->m_active.store( connected, std::memory_order_relaxed ); + if( connected ) queue = true; + } + if( !queue ) return; +#endif + + if( acquired ) + { + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockObtain ); + tracy::MemWrite( &item->lockObtain.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->lockObtain.id, lockdata->m_id ); + tracy::MemWrite( &item->lockObtain.time, tracy::Profiler::GetTime() ); + tracy::Profiler::QueueSerialFinish(); + } +} + +TRACY_API void ___tracy_mark_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const struct ___tracy_source_location_data* srcloc ) +{ +#ifdef TRACY_ON_DEMAND + const auto active = lockdata->m_active.load( std::memory_order_relaxed ); + if( !active ) return; + const auto connected = tracy::GetProfiler().IsConnected(); + if( !connected ) + { + if( active ) lockdata->m_active.store( false, std::memory_order_relaxed ); + return; + } +#endif + + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockMark ); + tracy::MemWrite( &item->lockMark.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->lockMark.id, lockdata->m_id ); + tracy::MemWrite( &item->lockMark.srcloc, (uint64_t)srcloc ); + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const char* name, size_t nameSz ) +{ + assert( nameSz < (std::numeric_limits::max)() ); + auto ptr = (char*)tracy::tracy_malloc( nameSz ); + memcpy( ptr, name, nameSz ); + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockName ); + tracy::MemWrite( &item->lockNameFat.id, lockdata->m_id ); + tracy::MemWrite( &item->lockNameFat.name, (uint64_t)ptr ); + tracy::MemWrite( &item->lockNameFat.size, (uint16_t)nameSz ); +#ifdef TRACY_ON_DEMAND + tracy::GetProfiler().DeferItem( *item ); +#endif + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API int32_t ___tracy_connected( void ) +{ + return static_cast( tracy::GetProfiler().IsConnected() ); } #ifdef TRACY_FIBERS -TRACY_API void ___tracy_fiber_enter( const char* fiber ){ tracy::Profiler::EnterFiber( fiber ); } +TRACY_API void ___tracy_fiber_enter( const char* fiber ){ tracy::Profiler::EnterFiber( fiber, 0 ); } TRACY_API void ___tracy_fiber_leave( void ){ tracy::Profiler::LeaveFiber(); } #endif -# ifdef TRACY_MANUAL_LIFETIME +# if defined TRACY_MANUAL_LIFETIME && defined TRACY_DELAYED_INIT TRACY_API void ___tracy_startup_profiler( void ) { tracy::StartupProfiler(); @@ -4498,9 +4980,9 @@ TRACY_API void ___tracy_shutdown_profiler( void ) tracy::ShutdownProfiler(); } -TRACY_API int ___tracy_profiler_started( void ) +TRACY_API int32_t ___tracy_profiler_started( void ) { - return tracy::s_isProfilerStarted.load( std::memory_order_seq_cst ); + return static_cast( tracy::s_isProfilerStarted.load( std::memory_order_seq_cst ) ); } # endif diff --git a/src/third_party/tracy/client/TracyProfiler.hpp b/src/third_party/tracy/client/TracyProfiler.hpp index c2bf5b11..8d169058 100644 --- a/src/third_party/tracy/client/TracyProfiler.hpp +++ b/src/third_party/tracy/client/TracyProfiler.hpp @@ -10,6 +10,7 @@ #include "tracy_concurrentqueue.h" #include "tracy_SPSCQueue.h" #include "TracyCallstack.hpp" +#include "TracyKCore.hpp" #include "TracySysPower.hpp" #include "TracySysTime.hpp" #include "TracyFastVector.hpp" @@ -27,7 +28,7 @@ # include #endif -#if ( defined _WIN32 || ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) || ( defined TARGET_OS_IOS && TARGET_OS_IOS == 1 ) ) +#if ( (defined _WIN32 && !(defined _M_ARM64 || defined _M_ARM)) || ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) || ( defined TARGET_OS_IOS && TARGET_OS_IOS == 1 ) ) # define TRACY_HW_TIMER #endif @@ -113,11 +114,11 @@ struct LuaZoneState #define TracyLfqPrepare( _type ) \ - moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \ - auto __token = GetToken(); \ + tracy::moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \ + auto __token = tracy::GetToken(); \ auto& __tail = __token->get_tail_index(); \ auto item = __token->enqueue_begin( __magic ); \ - MemWrite( &item->hdr.type, _type ); + tracy::MemWrite( &item->hdr.type, _type ); #define TracyLfqCommit \ __tail.store( __magic + 1, std::memory_order_release ); @@ -135,11 +136,11 @@ struct LuaZoneState #ifdef TRACY_FIBERS # define TracyQueuePrepare( _type ) \ - auto item = Profiler::QueueSerial(); \ - MemWrite( &item->hdr.type, _type ); + auto item = tracy::Profiler::QueueSerial(); \ + tracy::MemWrite( &item->hdr.type, _type ); # define TracyQueueCommit( _name ) \ - MemWrite( &item->_name.thread, GetThreadHandle() ); \ - Profiler::QueueSerialFinish(); + tracy::MemWrite( &item->_name.thread, tracy::GetThreadHandle() ); \ + tracy::Profiler::QueueSerialFinish(); # define TracyQueuePrepareC( _type ) \ auto item = tracy::Profiler::QueueSerial(); \ tracy::MemWrite( &item->hdr.type, _type ); @@ -386,58 +387,58 @@ public: TracyLfqCommit; } - static tracy_force_inline void Message( const char* txt, size_t size, int callstack ) + static tracy_force_inline void Message( const char* txt, size_t size, int32_t callstack_depth ) { assert( size < (std::numeric_limits::max)() ); #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif - if( callstack != 0 ) + if( callstack_depth != 0 && has_callstack() ) { - tracy::GetProfiler().SendCallstack( callstack ); + tracy::GetProfiler().SendCallstack( callstack_depth ); } auto ptr = (char*)tracy_malloc( size ); memcpy( ptr, txt, size ); - TracyQueuePrepare( callstack == 0 ? QueueType::Message : QueueType::MessageCallstack ); + TracyQueuePrepare( callstack_depth == 0 ? QueueType::Message : QueueType::MessageCallstack ); MemWrite( &item->messageFat.time, GetTime() ); MemWrite( &item->messageFat.text, (uint64_t)ptr ); MemWrite( &item->messageFat.size, (uint16_t)size ); TracyQueueCommit( messageFatThread ); } - static tracy_force_inline void Message( const char* txt, int callstack ) + static tracy_force_inline void Message( const char* txt, int32_t callstack_depth ) { #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif - if( callstack != 0 ) + if( callstack_depth != 0 && has_callstack() ) { - tracy::GetProfiler().SendCallstack( callstack ); + tracy::GetProfiler().SendCallstack( callstack_depth ); } - TracyQueuePrepare( callstack == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack ); + TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack ); MemWrite( &item->messageLiteral.time, GetTime() ); MemWrite( &item->messageLiteral.text, (uint64_t)txt ); TracyQueueCommit( messageLiteralThread ); } - static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int callstack ) + static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int32_t callstack_depth ) { assert( size < (std::numeric_limits::max)() ); #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif - if( callstack != 0 ) + if( callstack_depth != 0 && has_callstack() ) { - tracy::GetProfiler().SendCallstack( callstack ); + tracy::GetProfiler().SendCallstack( callstack_depth ); } auto ptr = (char*)tracy_malloc( size ); memcpy( ptr, txt, size ); - TracyQueuePrepare( callstack == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack ); + TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack ); MemWrite( &item->messageColorFat.time, GetTime() ); MemWrite( &item->messageColorFat.text, (uint64_t)ptr ); MemWrite( &item->messageColorFat.b, uint8_t( ( color ) & 0xFF ) ); @@ -447,17 +448,17 @@ public: TracyQueueCommit( messageColorFatThread ); } - static tracy_force_inline void MessageColor( const char* txt, uint32_t color, int callstack ) + static tracy_force_inline void MessageColor( const char* txt, uint32_t color, int32_t callstack_depth ) { #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif - if( callstack != 0 ) + if( callstack_depth != 0 && has_callstack() ) { - tracy::GetProfiler().SendCallstack( callstack ); + tracy::GetProfiler().SendCallstack( callstack_depth ); } - TracyQueuePrepare( callstack == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack ); + TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack ); MemWrite( &item->messageColorLiteral.time, GetTime() ); MemWrite( &item->messageColorLiteral.text, (uint64_t)txt ); MemWrite( &item->messageColorLiteral.b, uint8_t( ( color ) & 0xFF ) ); @@ -509,29 +510,31 @@ public: GetProfiler().m_serialLock.unlock(); } - static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int depth, bool secure ) + static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int32_t depth, bool secure ) { if( secure && !ProfilerAvailable() ) return; -#ifdef TRACY_HAS_CALLSTACK - auto& profiler = GetProfiler(); + if( depth > 0 && has_callstack() ) + { + auto& profiler = GetProfiler(); # ifdef TRACY_ON_DEMAND - if( !profiler.IsConnected() ) return; + if( !profiler.IsConnected() ) return; # endif - const auto thread = GetThreadHandle(); + const auto thread = GetThreadHandle(); - auto callstack = Callstack( depth ); + auto callstack = Callstack( depth ); - profiler.m_serialLock.lock(); - SendCallstackSerial( callstack ); - SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size ); - profiler.m_serialLock.unlock(); -#else - static_cast(depth); // unused - MemAlloc( ptr, size, secure ); -#endif + profiler.m_serialLock.lock(); + SendCallstackSerial( callstack ); + SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size ); + profiler.m_serialLock.unlock(); + } + else + { + MemAlloc( ptr, size, secure ); + } } - static tracy_force_inline void MemFreeCallstack( const void* ptr, int depth, bool secure ) + static tracy_force_inline void MemFreeCallstack( const void* ptr, int32_t depth, bool secure ) { if( secure && !ProfilerAvailable() ) return; if( !ProfilerAllocatorAvailable() ) @@ -539,23 +542,25 @@ public: MemFree( ptr, secure ); return; } -#ifdef TRACY_HAS_CALLSTACK - auto& profiler = GetProfiler(); + if( depth > 0 && has_callstack() ) + { + auto& profiler = GetProfiler(); # ifdef TRACY_ON_DEMAND - if( !profiler.IsConnected() ) return; + if( !profiler.IsConnected() ) return; # endif - const auto thread = GetThreadHandle(); + const auto thread = GetThreadHandle(); - auto callstack = Callstack( depth ); + auto callstack = Callstack( depth ); - profiler.m_serialLock.lock(); - SendCallstackSerial( callstack ); - SendMemFree( QueueType::MemFreeCallstack, thread, ptr ); - profiler.m_serialLock.unlock(); -#else - static_cast(depth); // unused - MemFree( ptr, secure ); -#endif + profiler.m_serialLock.lock(); + SendCallstackSerial( callstack ); + SendMemFree( QueueType::MemFreeCallstack, thread, ptr ); + profiler.m_serialLock.unlock(); + } + else + { + MemFree( ptr, secure ); + } } static tracy_force_inline void MemAllocNamed( const void* ptr, size_t size, bool secure, const char* name ) @@ -586,64 +591,101 @@ public: GetProfiler().m_serialLock.unlock(); } - static tracy_force_inline void MemAllocCallstackNamed( const void* ptr, size_t size, int depth, bool secure, const char* name ) + static tracy_force_inline void MemAllocCallstackNamed( const void* ptr, size_t size, int32_t depth, bool secure, const char* name ) { if( secure && !ProfilerAvailable() ) return; -#ifdef TRACY_HAS_CALLSTACK - auto& profiler = GetProfiler(); + if( depth > 0 && has_callstack() ) + { + auto& profiler = GetProfiler(); # ifdef TRACY_ON_DEMAND - if( !profiler.IsConnected() ) return; + if( !profiler.IsConnected() ) return; # endif - const auto thread = GetThreadHandle(); + const auto thread = GetThreadHandle(); - auto callstack = Callstack( depth ); + auto callstack = Callstack( depth ); - profiler.m_serialLock.lock(); - SendCallstackSerial( callstack ); - SendMemName( name ); - SendMemAlloc( QueueType::MemAllocCallstackNamed, thread, ptr, size ); - profiler.m_serialLock.unlock(); -#else - static_cast(depth); // unused - static_cast(name); // unused - MemAlloc( ptr, size, secure ); -#endif + profiler.m_serialLock.lock(); + SendCallstackSerial( callstack ); + SendMemName( name ); + SendMemAlloc( QueueType::MemAllocCallstackNamed, thread, ptr, size ); + profiler.m_serialLock.unlock(); + } + else + { + MemAllocNamed( ptr, size, secure, name ); + } } - static tracy_force_inline void MemFreeCallstackNamed( const void* ptr, int depth, bool secure, const char* name ) + static tracy_force_inline void MemFreeCallstackNamed( const void* ptr, int32_t depth, bool secure, const char* name ) { if( secure && !ProfilerAvailable() ) return; -#ifdef TRACY_HAS_CALLSTACK - auto& profiler = GetProfiler(); + if( depth > 0 && has_callstack() ) + { + auto& profiler = GetProfiler(); # ifdef TRACY_ON_DEMAND - if( !profiler.IsConnected() ) return; + if( !profiler.IsConnected() ) return; # endif - const auto thread = GetThreadHandle(); + const auto thread = GetThreadHandle(); - auto callstack = Callstack( depth ); + auto callstack = Callstack( depth ); - profiler.m_serialLock.lock(); - SendCallstackSerial( callstack ); - SendMemName( name ); - SendMemFree( QueueType::MemFreeCallstackNamed, thread, ptr ); - profiler.m_serialLock.unlock(); -#else - static_cast(depth); // unused - static_cast(name); // unused - MemFree( ptr, secure ); -#endif + profiler.m_serialLock.lock(); + SendCallstackSerial( callstack ); + SendMemName( name ); + SendMemFree( QueueType::MemFreeCallstackNamed, thread, ptr ); + profiler.m_serialLock.unlock(); + } + else + { + MemFreeNamed( ptr, secure, name ); + } } - static tracy_force_inline void SendCallstack( int depth ) + static tracy_force_inline void MemDiscard( const char* name, bool secure ) { -#ifdef TRACY_HAS_CALLSTACK - auto ptr = Callstack( depth ); - TracyQueuePrepare( QueueType::Callstack ); - MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); - TracyQueueCommit( callstackFatThread ); -#else - static_cast(depth); // unused + if( secure && !ProfilerAvailable() ) return; +#ifdef TRACY_ON_DEMAND + if( !GetProfiler().IsConnected() ) return; #endif + const auto thread = GetThreadHandle(); + + GetProfiler().m_serialLock.lock(); + SendMemDiscard( QueueType::MemDiscard, thread, name ); + GetProfiler().m_serialLock.unlock(); + } + + static tracy_force_inline void MemDiscardCallstack( const char* name, bool secure, int32_t depth ) + { + if( secure && !ProfilerAvailable() ) return; + if( depth > 0 && has_callstack() ) + { +# ifdef TRACY_ON_DEMAND + if( !GetProfiler().IsConnected() ) return; +# endif + const auto thread = GetThreadHandle(); + + auto callstack = Callstack( depth ); + + GetProfiler().m_serialLock.lock(); + SendCallstackSerial( callstack ); + SendMemDiscard( QueueType::MemDiscard, thread, name ); + GetProfiler().m_serialLock.unlock(); + } + else + { + MemDiscard( name, secure ); + } + } + + static tracy_force_inline void SendCallstack( int32_t depth ) + { + if( depth > 0 && has_callstack() ) + { + auto ptr = Callstack( depth ); + TracyQueuePrepare( QueueType::Callstack ); + MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); + TracyQueueCommit( callstackFatThread ); + } } static tracy_force_inline void ParameterRegister( ParameterCallback cb, void* data ) @@ -676,11 +718,12 @@ public: } #ifdef TRACY_FIBERS - static tracy_force_inline void EnterFiber( const char* fiber ) + static tracy_force_inline void EnterFiber( const char* fiber, int32_t groupHint ) { TracyQueuePrepare( QueueType::FiberEnter ); MemWrite( &item->fiberEnter.time, GetTime() ); MemWrite( &item->fiberEnter.fiber, (uint64_t)fiber ); + MemWrite( &item->fiberEnter.groupHint, groupHint ); TracyQueueCommit( fiberEnter ); } @@ -692,7 +735,7 @@ public: } #endif - void SendCallstack( int depth, const char* skipBefore ); + void SendCallstack( int32_t depth, const char* skipBefore ); static void CutCallstack( void* callstack, const char* skipBefore ); static bool ShouldExit(); @@ -745,29 +788,29 @@ public: // 1b null terminator // nsz zone name (optional) - static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function ) + static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function, uint32_t color = 0 ) { - return AllocSourceLocation( line, source, function, nullptr, 0 ); + return AllocSourceLocation( line, source, function, nullptr, 0, color ); } - static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function, const char* name, size_t nameSz ) + static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function, const char* name, size_t nameSz, uint32_t color = 0 ) { - return AllocSourceLocation( line, source, strlen(source), function, strlen(function), name, nameSz ); + return AllocSourceLocation( line, source, strlen(source), function, strlen(function), name, nameSz, color ); } - static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz ) + static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, uint32_t color = 0 ) { - return AllocSourceLocation( line, source, sourceSz, function, functionSz, nullptr, 0 ); + return AllocSourceLocation( line, source, sourceSz, function, functionSz, nullptr, 0, color ); } - static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz ) + static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color = 0 ) { const auto sz32 = uint32_t( 2 + 4 + 4 + functionSz + 1 + sourceSz + 1 + nameSz ); assert( sz32 <= (std::numeric_limits::max)() ); const auto sz = uint16_t( sz32 ); auto ptr = (char*)tracy_malloc( sz ); memcpy( ptr, &sz, 2 ); - memset( ptr + 2, 0, 4 ); + memcpy( ptr + 2, &color, 4 ); memcpy( ptr + 6, &line, 4 ); memcpy( ptr + 10, function, functionSz ); ptr[10 + functionSz] = '\0'; @@ -798,6 +841,9 @@ private: void HandleSymbolQueueItem( const SymbolQueueItem& si ); #endif + void InstallCrashHandler(); + void RemoveCrashHandler(); + void ClearQueues( tracy::moodycamel::ConsumerToken& token ); void ClearSerial(); DequeueStatus Dequeue( tracy::moodycamel::ConsumerToken& token ); @@ -830,6 +876,21 @@ private: m_bufferOffset += int( len ); } + char* SafeCopyProlog( const char* p, size_t size ); + void SafeCopyEpilog( char* buf ); + + template // must be void( const char* buf, size_t size ) + bool WithSafeCopy( const char* p, size_t size, Callable&& callable ) + { + if( char* buf = SafeCopyProlog( p, size ) ) + { + callable( buf, size ); + SafeCopyEpilog( buf ); + return true; + } + return false; + } + bool SendData( const char* data, size_t len ); void SendLongString( uint64_t ptr, const char* str, size_t len, QueueType type ); void SendSourceLocation( uint64_t ptr ); @@ -859,14 +920,13 @@ private: static tracy_force_inline void SendCallstackSerial( void* ptr ) { -#ifdef TRACY_HAS_CALLSTACK - auto item = GetProfiler().m_serialQueue.prepare_next(); - MemWrite( &item->hdr.type, QueueType::CallstackSerial ); - MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); - GetProfiler().m_serialQueue.commit_next(); -#else - static_cast(ptr); // unused -#endif + if( has_callstack() ) + { + auto item = GetProfiler().m_serialQueue.prepare_next(); + MemWrite( &item->hdr.type, QueueType::CallstackSerial ); + MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); + GetProfiler().m_serialQueue.commit_next(); + } } static tracy_force_inline void SendMemAlloc( QueueType type, const uint32_t thread, const void* ptr, size_t size ) @@ -904,6 +964,18 @@ private: GetProfiler().m_serialQueue.commit_next(); } + static tracy_force_inline void SendMemDiscard( QueueType type, const uint32_t thread, const char* name ) + { + assert( type == QueueType::MemDiscard || type == QueueType::MemDiscardCallstack ); + + auto item = GetProfiler().m_serialQueue.prepare_next(); + MemWrite( &item->hdr.type, type ); + MemWrite( &item->memDiscard.time, GetTime() ); + MemWrite( &item->memDiscard.thread, thread ); + MemWrite( &item->memDiscard.name, (uint64_t)name ); + GetProfiler().m_serialQueue.commit_next(); + } + static tracy_force_inline void SendMemName( const char* name ) { assert( name ); @@ -987,13 +1059,24 @@ private: char* m_queryData; char* m_queryDataPtr; -#if defined _WIN32 - void* m_exceptionHandler; +#ifndef NDEBUG + // m_safeSendBuffer and m_pipe should only be used by the Tracy Profiler thread; this ensures that in debug builds. + std::atomic_bool m_inUse{ false }; #endif + char* m_safeSendBuffer; + +#if defined _WIN32 + void* m_prevHandler; +#else + int m_pipe[2]; + int m_pipeBufSize; +#endif + #ifdef __linux__ struct { struct sigaction pwr, ill, fpe, segv, pipe, bus, abrt; } m_prevSignal; + KCore* m_kcore; #endif bool m_crashHandlerInstalled; diff --git a/src/third_party/tracy/client/TracyScoped.hpp b/src/third_party/tracy/client/TracyScoped.hpp index d2274e40..7f9256d8 100644 --- a/src/third_party/tracy/client/TracyScoped.hpp +++ b/src/third_party/tracy/client/TracyScoped.hpp @@ -2,6 +2,7 @@ #define __TRACYSCOPED_HPP__ #include +#include #include #include @@ -9,6 +10,7 @@ #include "../common/TracyAlign.hpp" #include "../common/TracyAlloc.hpp" #include "TracyProfiler.hpp" +#include "TracyCallstack.hpp" namespace tracy { @@ -21,7 +23,7 @@ public: ScopedZone& operator=( const ScopedZone& ) = delete; ScopedZone& operator=( ScopedZone&& ) = delete; - tracy_force_inline ScopedZone( const SourceLocationData* srcloc, bool is_active = true ) + tracy_force_inline ScopedZone( const SourceLocationData* srcloc, int32_t depth = -1, bool is_active = true ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else @@ -32,13 +34,19 @@ public: #ifdef TRACY_ON_DEMAND m_connectionId = GetProfiler().ConnectionId(); #endif - TracyQueuePrepare( QueueType::ZoneBegin ); + auto zoneQueue = QueueType::ZoneBegin; + if( depth > 0 && has_callstack() ) + { + GetProfiler().SendCallstack( depth ); + zoneQueue = QueueType::ZoneBeginCallstack; + } + TracyQueuePrepare( zoneQueue ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); TracyQueueCommit( zoneBeginThread ); } - tracy_force_inline ScopedZone( const SourceLocationData* srcloc, int depth, bool is_active = true ) + tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color, int32_t depth = -1, bool is_active = true ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else @@ -49,51 +57,21 @@ public: #ifdef TRACY_ON_DEMAND m_connectionId = GetProfiler().ConnectionId(); #endif - GetProfiler().SendCallstack( depth ); - - TracyQueuePrepare( QueueType::ZoneBeginCallstack ); - MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); - MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); - TracyQueueCommit( zoneBeginThread ); - } - - tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active = true ) -#ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) -#else - : m_active( is_active ) -#endif - { - if( !m_active ) return; -#ifdef TRACY_ON_DEMAND - m_connectionId = GetProfiler().ConnectionId(); -#endif - TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc ); - const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); + auto zoneQueue = QueueType::ZoneBeginAllocSrcLoc; + if( depth > 0 && has_callstack() ) + { + GetProfiler().SendCallstack( depth ); + zoneQueue = QueueType::ZoneBeginAllocSrcLocCallstack; + } + TracyQueuePrepare( zoneQueue ); + const auto srcloc = + Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); MemWrite( &item->zoneBegin.srcloc, srcloc ); TracyQueueCommit( zoneBeginThread ); } - tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active = true ) -#ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) -#else - : m_active( is_active ) -#endif - { - if( !m_active ) return; -#ifdef TRACY_ON_DEMAND - m_connectionId = GetProfiler().ConnectionId(); -#endif - GetProfiler().SendCallstack( depth ); - - TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack ); - const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); - MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); - MemWrite( &item->zoneBegin.srcloc, srcloc ); - TracyQueueCommit( zoneBeginThread ); - } + tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active = true ) : ScopedZone( line, source, sourceSz, function, functionSz, name, nameSz, 0, depth, is_active ) {} tracy_force_inline ~ScopedZone() { @@ -121,6 +99,30 @@ public: TracyQueueCommit( zoneTextFatThread ); } + void TextFmt( const char* fmt, ... ) + { + if( !m_active ) return; +#ifdef TRACY_ON_DEMAND + if( GetProfiler().ConnectionId() != m_connectionId ) return; +#endif + va_list args; + va_start( args, fmt ); + auto size = vsnprintf( nullptr, 0, fmt, args ); + va_end( args ); + if( size < 0 ) return; + assert( size < (std::numeric_limits::max)() ); + + char* ptr = (char*)tracy_malloc( size_t( size ) + 1 ); + va_start( args, fmt ); + vsnprintf( ptr, size_t( size ) + 1, fmt, args ); + va_end( args ); + + TracyQueuePrepare( QueueType::ZoneText ); + MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); + MemWrite( &item->zoneTextFat.size, (uint16_t)size ); + TracyQueueCommit( zoneTextFatThread ); + } + tracy_force_inline void Name( const char* txt, size_t size ) { assert( size < (std::numeric_limits::max)() ); @@ -136,6 +138,30 @@ public: TracyQueueCommit( zoneTextFatThread ); } + void NameFmt( const char* fmt, ... ) + { + if( !m_active ) return; +#ifdef TRACY_ON_DEMAND + if( GetProfiler().ConnectionId() != m_connectionId ) return; +#endif + va_list args; + va_start( args, fmt ); + auto size = vsnprintf( nullptr, 0, fmt, args ); + va_end( args ); + if( size < 0 ) return; + assert( size < (std::numeric_limits::max)() ); + + char* ptr = (char*)tracy_malloc( size_t( size ) + 1 ); + va_start( args, fmt ); + vsnprintf( ptr, size_t( size ) + 1, fmt, args ); + va_end( args ); + + TracyQueuePrepare( QueueType::ZoneName ); + MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); + MemWrite( &item->zoneTextFat.size, (uint16_t)size ); + TracyQueueCommit( zoneTextFatThread ); + } + tracy_force_inline void Color( uint32_t color ) { if( !m_active ) return; diff --git a/src/third_party/tracy/client/TracySysPower.cpp b/src/third_party/tracy/client/TracySysPower.cpp index bd5939da..6ad1d647 100644 --- a/src/third_party/tracy/client/TracySysPower.cpp +++ b/src/third_party/tracy/client/TracySysPower.cpp @@ -85,7 +85,7 @@ void SysPower::ScanDirectory( const char* path, int parent ) FILE* f = fopen( tmp, "r" ); if( f ) { - fscanf( f, "%" PRIu64, &maxRange ); + (void)fscanf( f, "%" PRIu64, &maxRange ); fclose( f ); } } diff --git a/src/third_party/tracy/client/TracySysTrace.cpp b/src/third_party/tracy/client/TracySysTrace.cpp index af0641fe..5827a992 100644 --- a/src/third_party/tracy/client/TracySysTrace.cpp +++ b/src/third_party/tracy/client/TracySysTrace.cpp @@ -16,16 +16,25 @@ namespace tracy { -static constexpr int GetSamplingFrequency() +static int GetSamplingFrequency() { + int samplingHz = TRACY_SAMPLING_HZ; + + auto env = GetEnvVar( "TRACY_SAMPLING_HZ" ); + if( env ) + { + int val = atoi( env ); + if( val > 0 ) samplingHz = val; + } + #if defined _WIN32 - return TRACY_SAMPLING_HZ > 8000 ? 8000 : ( TRACY_SAMPLING_HZ < 1 ? 1 : TRACY_SAMPLING_HZ ); + return samplingHz > 8000 ? 8000 : ( samplingHz < 1 ? 1 : samplingHz ); #else - return TRACY_SAMPLING_HZ > 1000000 ? 1000000 : ( TRACY_SAMPLING_HZ < 1 ? 1 : TRACY_SAMPLING_HZ ); + return samplingHz > 1000000 ? 1000000 : ( samplingHz < 1 ? 1 : samplingHz ); #endif } -static constexpr int GetSamplingPeriod() +static int GetSamplingPeriod() { return 1000000000 / GetSamplingFrequency(); } @@ -321,7 +330,7 @@ static void SetupVsync() #endif } -static constexpr int GetSamplingInterval() +static int GetSamplingInterval() { return GetSamplingPeriod() / 100; } @@ -489,11 +498,11 @@ void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const ch if( _GetThreadDescription ) { PWSTR tmp; - _GetThreadDescription( hnd, &tmp ); - char buf[256]; - if( tmp ) + if ( SUCCEEDED( _GetThreadDescription( hnd, &tmp ) ) ) { + char buf[256]; auto ret = wcstombs( buf, tmp, 256 ); + LocalFree(tmp); if( ret != 0 ) { threadName = CopyString( buf, ret ); diff --git a/src/third_party/tracy/client/tracy_rpmalloc.cpp b/src/third_party/tracy/client/tracy_rpmalloc.cpp index 711505d2..315a40f9 100644 --- a/src/third_party/tracy/client/tracy_rpmalloc.cpp +++ b/src/third_party/tracy/client/tracy_rpmalloc.cpp @@ -690,7 +690,9 @@ static pthread_key_t _memory_thread_heap; # define _Thread_local __declspec(thread) # define TLS_MODEL # else -# ifndef __HAIKU__ +# if defined(__ANDROID__) && __ANDROID_API__ >= 29 && defined(__NDK_MAJOR__) && __NDK_MAJOR__ >= 26 +# define TLS_MODEL __attribute__((tls_model("local-dynamic"))) +# elif !defined(__HAIKU__) # define TLS_MODEL __attribute__((tls_model("initial-exec"))) # else # define TLS_MODEL @@ -781,7 +783,7 @@ rpmalloc_set_main_thread(void) { static void _rpmalloc_spin(void) { -#if defined(_MSC_VER) +#if defined(_MSC_VER) && !(defined(_M_ARM) || defined(_M_ARM64)) _mm_pause(); #elif defined(__x86_64__) || defined(__i386__) __asm__ volatile("pause" ::: "memory"); @@ -793,8 +795,7 @@ _rpmalloc_spin(void) { #elif defined(__sparc__) __asm__ volatile("rd %ccr, %g0 \n\trd %ccr, %g0 \n\trd %ccr, %g0"); #else - struct timespec ts = {0}; - nanosleep(&ts, 0); + std::this_thread::yield(); #endif } diff --git a/src/third_party/tracy/common/TracyProtocol.hpp b/src/third_party/tracy/common/TracyProtocol.hpp index 51399b09..839c24e8 100644 --- a/src/third_party/tracy/common/TracyProtocol.hpp +++ b/src/third_party/tracy/common/TracyProtocol.hpp @@ -9,7 +9,7 @@ namespace tracy constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; } -enum : uint32_t { ProtocolVersion = 65 }; +enum : uint32_t { ProtocolVersion = 72 }; enum : uint16_t { BroadcastVersion = 3 }; using lz4sz_t = uint32_t; @@ -47,10 +47,10 @@ enum ServerQuery : uint8_t ServerQueryFrameName, ServerQueryParameter, ServerQueryFiberName, + ServerQueryExternalName, // Items above are high priority. Split order must be preserved. See IsQueryPrio(). ServerQueryDisconnect, ServerQueryCallstackFrame, - ServerQueryExternalName, ServerQuerySymbol, ServerQuerySymbolCode, ServerQuerySourceCode, diff --git a/src/third_party/tracy/common/TracyQueue.hpp b/src/third_party/tracy/common/TracyQueue.hpp index df886e41..c681698a 100644 --- a/src/third_party/tracy/common/TracyQueue.hpp +++ b/src/third_party/tracy/common/TracyQueue.hpp @@ -42,6 +42,8 @@ enum class QueueType : uint8_t MemAllocCallstackNamed, MemFreeCallstack, MemFreeCallstackNamed, + MemDiscard, + MemDiscardCallstack, GpuZoneBegin, GpuZoneBeginCallstack, GpuZoneBeginAllocSrcLoc, @@ -108,6 +110,7 @@ enum class QueueType : uint8_t SingleStringData, SecondStringData, MemNamePayload, + ThreadGroupHint, StringData, ThreadName, PlotName, @@ -259,6 +262,7 @@ struct QueueFiberEnter int64_t time; uint64_t fiber; // ptr uint32_t thread; + int32_t groupHint; }; struct QueueFiberLeave @@ -399,7 +403,9 @@ enum class GpuContextType : uint8_t Vulkan, OpenCL, Direct3D12, - Direct3D11 + Direct3D11, + Metal, + Custom }; enum GpuContextFlags : uint8_t @@ -477,6 +483,12 @@ struct QueueMemNamePayload uint64_t name; }; +struct QueueThreadGroupHint +{ + uint32_t thread; + int32_t groupHint; +}; + struct QueueMemAlloc { int64_t time; @@ -492,6 +504,13 @@ struct QueueMemFree uint64_t ptr; }; +struct QueueMemDiscard +{ + int64_t time; + uint32_t thread; + uint64_t name; +}; + struct QueueCallstackFat { uint64_t ptr; @@ -639,6 +658,7 @@ struct QueueSourceCodeNotAvailable struct QueueCpuTopology { uint32_t package; + uint32_t die; uint32_t core; uint32_t thread; }; @@ -731,7 +751,9 @@ struct QueueItem QueueGpuContextNameFat gpuContextNameFat; QueueMemAlloc memAlloc; QueueMemFree memFree; + QueueMemDiscard memDiscard; QueueMemNamePayload memName; + QueueThreadGroupHint threadGroupHint; QueueCallstackFat callstackFat; QueueCallstackFatThread callstackFatThread; QueueCallstackAllocFat callstackAllocFat; @@ -801,6 +823,8 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack, named sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack, named + sizeof( QueueHeader ) + sizeof( QueueMemDiscard ), + sizeof( QueueHeader ) + sizeof( QueueMemDiscard ), // callstack sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// allocated source location @@ -868,6 +892,7 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ), // single string data sizeof( QueueHeader ), // second string data sizeof( QueueHeader ) + sizeof( QueueMemNamePayload ), + sizeof( QueueHeader ) + sizeof( QueueThreadGroupHint ), // keep all QueueStringTransfer below sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // string data sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // thread name diff --git a/src/third_party/tracy/common/TracySocket.cpp b/src/third_party/tracy/common/TracySocket.cpp index 25967898..bdba3619 100644 --- a/src/third_party/tracy/common/TracySocket.cpp +++ b/src/third_party/tracy/common/TracySocket.cpp @@ -21,6 +21,9 @@ # pragma warning(disable:4267) # endif # define poll WSAPoll +# ifdef _MSC_VER +# pragma comment(lib, "ws2_32.lib") +# endif #else # include # include diff --git a/src/third_party/tracy/common/TracySystem.cpp b/src/third_party/tracy/common/TracySystem.cpp index 0e26aeca..a92a3457 100644 --- a/src/third_party/tracy/common/TracySystem.cpp +++ b/src/third_party/tracy/common/TracySystem.cpp @@ -26,7 +26,9 @@ # include #elif defined __FreeBSD__ # include -#elif defined __NetBSD__ || defined __DragonFly__ +#elif defined __NetBSD__ +# include +#elif defined __DragonFly__ # include #elif defined __QNX__ # include @@ -101,16 +103,10 @@ TRACY_API uint32_t GetThreadHandleImpl() } #ifdef TRACY_ENABLE -struct ThreadNameData -{ - uint32_t id; - const char* name; - ThreadNameData* next; -}; std::atomic& GetThreadNameData(); #endif -#ifdef _MSC_VER +#if defined _MSC_VER && !defined __clang__ # pragma pack( push, 8 ) struct THREADNAME_INFO { @@ -134,6 +130,11 @@ void ThreadNameMsvcMagic( const THREADNAME_INFO& info ) #endif TRACY_API void SetThreadName( const char* name ) +{ + SetThreadNameWithHint( name, 0 ); +} + +TRACY_API void SetThreadNameWithHint( const char* name, int32_t groupHint ) { #if defined _WIN32 # ifdef TRACY_UWP @@ -149,7 +150,7 @@ TRACY_API void SetThreadName( const char* name ) } else { -# if defined _MSC_VER +# if defined _MSC_VER && !defined __clang__ THREADNAME_INFO info; info.dwType = 0x1000; info.szName = name; @@ -205,6 +206,7 @@ TRACY_API void SetThreadName( const char* name ) buf[sz] = '\0'; auto data = (ThreadNameData*)tracy_malloc_fast( sizeof( ThreadNameData ) ); data->id = detail::GetThreadHandleImpl(); + data->groupHint = groupHint; data->name = buf; data->next = GetThreadNameData().load( std::memory_order_relaxed ); while( !GetThreadNameData().compare_exchange_weak( data->next, data, std::memory_order_release, std::memory_order_relaxed ) ) {} @@ -212,6 +214,22 @@ TRACY_API void SetThreadName( const char* name ) #endif } +#ifdef TRACY_ENABLE +ThreadNameData* GetThreadNameData( uint32_t id ) +{ + auto ptr = GetThreadNameData().load( std::memory_order_relaxed ); + while( ptr ) + { + if( ptr->id == id ) + { + return ptr; + } + ptr = ptr->next; + } + return nullptr; +} +#endif + TRACY_API const char* GetThreadName( uint32_t id ) { static char buf[256]; diff --git a/src/third_party/tracy/common/TracySystem.hpp b/src/third_party/tracy/common/TracySystem.hpp index e0040e95..2f565e9a 100644 --- a/src/third_party/tracy/common/TracySystem.hpp +++ b/src/third_party/tracy/common/TracySystem.hpp @@ -14,6 +14,16 @@ TRACY_API uint32_t GetThreadHandleImpl(); } #ifdef TRACY_ENABLE +struct ThreadNameData +{ + uint32_t id; + int32_t groupHint; + const char* name; + ThreadNameData* next; +}; + +ThreadNameData* GetThreadNameData( uint32_t id ); + TRACY_API uint32_t GetThreadHandle(); #else static inline uint32_t GetThreadHandle() @@ -23,9 +33,10 @@ static inline uint32_t GetThreadHandle() #endif TRACY_API void SetThreadName( const char* name ); +TRACY_API void SetThreadNameWithHint( const char* name, int32_t groupHint ); TRACY_API const char* GetThreadName( uint32_t id ); -TRACY_API const char* GetEnvVar(const char* name); +TRACY_API const char* GetEnvVar( const char* name ); } diff --git a/src/third_party/tracy/common/TracyVersion.hpp b/src/third_party/tracy/common/TracyVersion.hpp index 2355279f..12642d65 100644 --- a/src/third_party/tracy/common/TracyVersion.hpp +++ b/src/third_party/tracy/common/TracyVersion.hpp @@ -6,8 +6,8 @@ namespace tracy namespace Version { enum { Major = 0 }; -enum { Minor = 10 }; -enum { Patch = 0 }; +enum { Minor = 11 }; +enum { Patch = 2 }; } } diff --git a/src/third_party/tracy/common/tracy_lz4.cpp b/src/third_party/tracy/common/tracy_lz4.cpp index 6c26639c..15d0990f 100644 --- a/src/third_party/tracy/common/tracy_lz4.cpp +++ b/src/third_party/tracy/common/tracy_lz4.cpp @@ -128,11 +128,11 @@ #endif /* _MSC_VER */ #ifndef LZ4_FORCE_INLINE -# ifdef _MSC_VER /* Visual Studio */ +# if defined (_MSC_VER) && !defined (__clang__) /* MSVC */ # define LZ4_FORCE_INLINE static __forceinline # else # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# ifdef __GNUC__ +# if defined (__GNUC__) || defined (__clang__) # define LZ4_FORCE_INLINE static inline __attribute__((always_inline)) # else # define LZ4_FORCE_INLINE static inline diff --git a/src/third_party/tracy/libbacktrace/dwarf.cpp b/src/third_party/tracy/libbacktrace/dwarf.cpp index f3899cbc..52fa8a8d 100644 --- a/src/third_party/tracy/libbacktrace/dwarf.cpp +++ b/src/third_party/tracy/libbacktrace/dwarf.cpp @@ -725,8 +725,8 @@ struct dwarf_data struct dwarf_data *next; /* The data for .gnu_debugaltlink. */ struct dwarf_data *altlink; - /* The base address for this file. */ - uintptr_t base_address; +/* The base address mapping for this file. */ + struct libbacktrace_base_address base_address; /* A sorted list of address ranges. */ struct unit_addrs *addrs; /* Number of address ranges in list. */ @@ -1947,8 +1947,9 @@ update_pcrange (const struct attr* attr, const struct attr_val* val, static int add_low_high_range (struct backtrace_state *state, const struct dwarf_sections *dwarf_sections, - uintptr_t base_address, int is_bigendian, - struct unit *u, const struct pcrange *pcrange, + struct libbacktrace_base_address base_address, + int is_bigendian, struct unit *u, + const struct pcrange *pcrange, int (*add_range) (struct backtrace_state *state, void *rdata, uintptr_t lowpc, uintptr_t highpc, @@ -1983,8 +1984,8 @@ add_low_high_range (struct backtrace_state *state, /* Add in the base address of the module when recording PC values, so that we can look up the PC directly. */ - lowpc += base_address; - highpc += base_address; + lowpc = libbacktrace_add_base (lowpc, base_address); + highpc = libbacktrace_add_base (highpc, base_address); return add_range (state, rdata, lowpc, highpc, error_callback, data, vec); } @@ -1996,7 +1997,7 @@ static int add_ranges_from_ranges ( struct backtrace_state *state, const struct dwarf_sections *dwarf_sections, - uintptr_t base_address, int is_bigendian, + struct libbacktrace_base_address base_address, int is_bigendian, struct unit *u, uintptr_t base, const struct pcrange *pcrange, int (*add_range) (struct backtrace_state *state, void *rdata, @@ -2042,10 +2043,11 @@ add_ranges_from_ranges ( base = (uintptr_t) high; else { - if (!add_range (state, rdata, - (uintptr_t) low + base + base_address, - (uintptr_t) high + base + base_address, - error_callback, data, vec)) + uintptr_t rl, rh; + + rl = libbacktrace_add_base ((uintptr_t) low + base, base_address); + rh = libbacktrace_add_base ((uintptr_t) high + base, base_address); + if (!add_range (state, rdata, rl, rh, error_callback, data, vec)) return 0; } } @@ -2063,7 +2065,7 @@ static int add_ranges_from_rnglists ( struct backtrace_state *state, const struct dwarf_sections *dwarf_sections, - uintptr_t base_address, int is_bigendian, + struct libbacktrace_base_address base_address, int is_bigendian, struct unit *u, uintptr_t base, const struct pcrange *pcrange, int (*add_range) (struct backtrace_state *state, void *rdata, @@ -2146,9 +2148,10 @@ add_ranges_from_rnglists ( u->addrsize, is_bigendian, index, error_callback, data, &high)) return 0; - if (!add_range (state, rdata, low + base_address, - high + base_address, error_callback, data, - vec)) + if (!add_range (state, rdata, + libbacktrace_add_base (low, base_address), + libbacktrace_add_base (high, base_address), + error_callback, data, vec)) return 0; } break; @@ -2165,7 +2168,7 @@ add_ranges_from_rnglists ( error_callback, data, &low)) return 0; length = read_uleb128 (&rnglists_buf); - low += base_address; + low = libbacktrace_add_base (low, base_address); if (!add_range (state, rdata, low, low + length, error_callback, data, vec)) return 0; @@ -2179,8 +2182,9 @@ add_ranges_from_rnglists ( low = read_uleb128 (&rnglists_buf); high = read_uleb128 (&rnglists_buf); - if (!add_range (state, rdata, low + base + base_address, - high + base + base_address, + if (!add_range (state, rdata, + libbacktrace_add_base (low + base, base_address), + libbacktrace_add_base (high + base, base_address), error_callback, data, vec)) return 0; } @@ -2197,9 +2201,10 @@ add_ranges_from_rnglists ( low = (uintptr_t) read_address (&rnglists_buf, u->addrsize); high = (uintptr_t) read_address (&rnglists_buf, u->addrsize); - if (!add_range (state, rdata, low + base_address, - high + base_address, error_callback, data, - vec)) + if (!add_range (state, rdata, + libbacktrace_add_base (low, base_address), + libbacktrace_add_base (high, base_address), + error_callback, data, vec)) return 0; } break; @@ -2211,7 +2216,7 @@ add_ranges_from_rnglists ( low = (uintptr_t) read_address (&rnglists_buf, u->addrsize); length = (uintptr_t) read_uleb128 (&rnglists_buf); - low += base_address; + low = libbacktrace_add_base (low, base_address); if (!add_range (state, rdata, low, low + length, error_callback, data, vec)) return 0; @@ -2239,7 +2244,7 @@ add_ranges_from_rnglists ( static int add_ranges (struct backtrace_state *state, const struct dwarf_sections *dwarf_sections, - uintptr_t base_address, int is_bigendian, + struct libbacktrace_base_address base_address, int is_bigendian, struct unit *u, uintptr_t base, const struct pcrange *pcrange, int (*add_range) (struct backtrace_state *state, void *rdata, uintptr_t lowpc, uintptr_t highpc, @@ -2275,7 +2280,8 @@ add_ranges (struct backtrace_state *state, read, 0 if there is some error. */ static int -find_address_ranges (struct backtrace_state *state, uintptr_t base_address, +find_address_ranges (struct backtrace_state *state, + struct libbacktrace_base_address base_address, struct dwarf_buf *unit_buf, const struct dwarf_sections *dwarf_sections, int is_bigendian, struct dwarf_data *altlink, @@ -2430,7 +2436,8 @@ find_address_ranges (struct backtrace_state *state, uintptr_t base_address, on success, 0 on failure. */ static int -build_address_map (struct backtrace_state *state, uintptr_t base_address, +build_address_map (struct backtrace_state *state, + struct libbacktrace_base_address base_address, const struct dwarf_sections *dwarf_sections, int is_bigendian, struct dwarf_data *altlink, backtrace_error_callback error_callback, void *data, @@ -2649,7 +2656,7 @@ add_line (struct backtrace_state *state, struct dwarf_data *ddata, /* Add in the base address here, so that we can look up the PC directly. */ - ln->pc = pc + ddata->base_address; + ln->pc = libbacktrace_add_base (pc, ddata->base_address); ln->filename = filename; ln->lineno = lineno; @@ -4251,6 +4258,19 @@ dwarf_lookup_pc (struct backtrace_state *state, struct dwarf_data *ddata, } } +bool dwarf_fileline_dwarf_lookup_pc_in_all_entries(struct backtrace_state *state, uintptr_t pc, + backtrace_full_callback callback, backtrace_error_callback error_callback, void *data, + int& found, int ret) +{ + for (struct dwarf_data* ddata = (struct dwarf_data *)state->fileline_data; + ddata != NULL; + ddata = ddata->next) + { + ret = dwarf_lookup_pc(state, ddata, pc, callback, error_callback, data, &found); + if (ret != 0 || found) return true; + } + return false; +} /* Return the file/line information for a PC using the DWARF mapping we built earlier. */ @@ -4262,20 +4282,30 @@ dwarf_fileline (struct backtrace_state *state, uintptr_t pc, { struct dwarf_data *ddata; int found; - int ret; + int ret = 0; if (!state->threaded) + { + if (dwarf_fileline_dwarf_lookup_pc_in_all_entries(state, pc, callback, error_callback, data, found, ret)) { - for (ddata = (struct dwarf_data *) state->fileline_data; - ddata != NULL; - ddata = ddata->next) - { - ret = dwarf_lookup_pc (state, ddata, pc, callback, error_callback, - data, &found); - if (ret != 0 || found) - return ret; - } + return ret; } + + // if we failed to obtain an entry in range, it can mean that the address map has been changed and new entries + // have been loaded in the meantime. Request a refresh and try again. + if (state->request_known_address_ranges_refresh_fn) + { + int new_range_count = state->request_known_address_ranges_refresh_fn(state, pc); + if (new_range_count > 0) + { + if (dwarf_fileline_dwarf_lookup_pc_in_all_entries(state, pc, callback, error_callback, data, found, ret)) + { + return ret; + } + } + } + + } else { struct dwarf_data **pp; @@ -4306,7 +4336,7 @@ dwarf_fileline (struct backtrace_state *state, uintptr_t pc, static struct dwarf_data * build_dwarf_data (struct backtrace_state *state, - uintptr_t base_address, + struct libbacktrace_base_address base_address, const struct dwarf_sections *dwarf_sections, int is_bigendian, struct dwarf_data *altlink, @@ -4364,7 +4394,7 @@ build_dwarf_data (struct backtrace_state *state, int backtrace_dwarf_add (struct backtrace_state *state, - uintptr_t base_address, + struct libbacktrace_base_address base_address, const struct dwarf_sections *dwarf_sections, int is_bigendian, struct dwarf_data *fileline_altlink, diff --git a/src/third_party/tracy/libbacktrace/elf.cpp b/src/third_party/tracy/libbacktrace/elf.cpp index c65bc4e7..ffe8d702 100644 --- a/src/third_party/tracy/libbacktrace/elf.cpp +++ b/src/third_party/tracy/libbacktrace/elf.cpp @@ -38,6 +38,7 @@ POSSIBILITY OF SUCH DAMAGE. */ #include #include #include +#include #ifdef HAVE_DL_ITERATE_PHDR #include @@ -642,7 +643,7 @@ elf_symbol_search (const void *vkey, const void *ventry) static int elf_initialize_syminfo (struct backtrace_state *state, - uintptr_t base_address, + struct libbacktrace_base_address base_address, const unsigned char *symtab_data, size_t symtab_size, const unsigned char *strtab, size_t strtab_size, backtrace_error_callback error_callback, @@ -708,7 +709,8 @@ elf_initialize_syminfo (struct backtrace_state *state, = *(const b_elf_addr *) (opd->data + (sym->st_value - opd->addr)); else elf_symbols[j].address = sym->st_value; - elf_symbols[j].address += base_address; + elf_symbols[j].address = + libbacktrace_add_base (elf_symbols[j].address, base_address); elf_symbols[j].size = sym->st_size; ++j; } @@ -1199,14 +1201,7 @@ elf_fetch_bits_backward (const unsigned char **ppin, val = *pval; if (unlikely (pin <= pinend)) - { - if (bits == 0) - { - elf_uncompress_failed (); - return 0; - } - return 1; - } + return 1; pin -= 4; @@ -5093,7 +5088,7 @@ elf_uncompress_chdr (struct backtrace_state *state, backtrace_error_callback error_callback, void *data, unsigned char **uncompressed, size_t *uncompressed_size) { - const b_elf_chdr *chdr; + b_elf_chdr chdr; char *alc; size_t alc_len; unsigned char *po; @@ -5105,27 +5100,30 @@ elf_uncompress_chdr (struct backtrace_state *state, if (compressed_size < sizeof (b_elf_chdr)) return 1; - chdr = (const b_elf_chdr *) compressed; + /* The lld linker can misalign a compressed section, so we can't safely read + the fields directly as we can for other ELF sections. See + https://github.com/ianlancetaylor/libbacktrace/pull/120. */ + memcpy (&chdr, compressed, sizeof (b_elf_chdr)); alc = NULL; alc_len = 0; - if (*uncompressed != NULL && *uncompressed_size >= chdr->ch_size) + if (*uncompressed != NULL && *uncompressed_size >= chdr.ch_size) po = *uncompressed; else { - alc_len = chdr->ch_size; + alc_len = chdr.ch_size; alc = (char*)backtrace_alloc (state, alc_len, error_callback, data); if (alc == NULL) return 0; po = (unsigned char *) alc; } - switch (chdr->ch_type) + switch (chdr.ch_type) { case ELFCOMPRESS_ZLIB: if (!elf_zlib_inflate_and_verify (compressed + sizeof (b_elf_chdr), compressed_size - sizeof (b_elf_chdr), - zdebug_table, po, chdr->ch_size)) + zdebug_table, po, chdr.ch_size)) goto skip; break; @@ -5133,7 +5131,7 @@ elf_uncompress_chdr (struct backtrace_state *state, if (!elf_zstd_decompress (compressed + sizeof (b_elf_chdr), compressed_size - sizeof (b_elf_chdr), (unsigned char *)zdebug_table, po, - chdr->ch_size)) + chdr.ch_size)) goto skip; break; @@ -5143,7 +5141,7 @@ elf_uncompress_chdr (struct backtrace_state *state, } *uncompressed = po; - *uncompressed_size = chdr->ch_size; + *uncompressed_size = chdr.ch_size; return 1; @@ -5585,6 +5583,7 @@ elf_uncompress_lzma_block (const unsigned char *compressed, uint64_t header_compressed_size; uint64_t header_uncompressed_size; unsigned char lzma2_properties; + size_t crc_offset; uint32_t computed_crc; uint32_t stream_crc; size_t uncompressed_offset; @@ -5688,28 +5687,29 @@ elf_uncompress_lzma_block (const unsigned char *compressed, /* The properties describe the dictionary size, but we don't care what that is. */ - /* Block header padding. */ - if (unlikely (off + 4 > compressed_size)) + /* Skip to just before CRC, verifying zero bytes in between. */ + crc_offset = block_header_offset + block_header_size - 4; + if (unlikely (crc_offset + 4 > compressed_size)) { elf_uncompress_failed (); return 0; } - - off = (off + 3) &~ (size_t) 3; - - if (unlikely (off + 4 > compressed_size)) + for (; off < crc_offset; off++) { - elf_uncompress_failed (); - return 0; + if (compressed[off] != 0) + { + elf_uncompress_failed (); + return 0; + } } /* Block header CRC. */ computed_crc = elf_crc32 (0, compressed + block_header_offset, block_header_size - 4); - stream_crc = (compressed[off] - | (compressed[off + 1] << 8) - | (compressed[off + 2] << 16) - | (compressed[off + 3] << 24)); + stream_crc = ((uint32_t)compressed[off] + | ((uint32_t)compressed[off + 1] << 8) + | ((uint32_t)compressed[off + 2] << 16) + | ((uint32_t)compressed[off + 3] << 24)); if (unlikely (computed_crc != stream_crc)) { elf_uncompress_failed (); @@ -6216,10 +6216,10 @@ elf_uncompress_lzma_block (const unsigned char *compressed, return 0; } computed_crc = elf_crc32 (0, uncompressed, uncompressed_offset); - stream_crc = (compressed[off] - | (compressed[off + 1] << 8) - | (compressed[off + 2] << 16) - | (compressed[off + 3] << 24)); + stream_crc = ((uint32_t)compressed[off] + | ((uint32_t)compressed[off + 1] << 8) + | ((uint32_t)compressed[off + 2] << 16) + | ((uint32_t)compressed[off + 3] << 24)); if (computed_crc != stream_crc) { elf_uncompress_failed (); @@ -6319,10 +6319,10 @@ elf_uncompress_lzma (struct backtrace_state *state, /* Next comes a CRC of the stream flags. */ computed_crc = elf_crc32 (0, compressed + 6, 2); - stream_crc = (compressed[8] - | (compressed[9] << 8) - | (compressed[10] << 16) - | (compressed[11] << 24)); + stream_crc = ((uint32_t)compressed[8] + | ((uint32_t)compressed[9] << 8) + | ((uint32_t)compressed[10] << 16) + | ((uint32_t)compressed[11] << 24)); if (unlikely (computed_crc != stream_crc)) { elf_uncompress_failed (); @@ -6363,10 +6363,10 @@ elf_uncompress_lzma (struct backtrace_state *state, /* Before that is a footer CRC. */ computed_crc = elf_crc32 (0, compressed + offset, 6); - stream_crc = (compressed[offset - 4] - | (compressed[offset - 3] << 8) - | (compressed[offset - 2] << 16) - | (compressed[offset - 1] << 24)); + stream_crc = ((uint32_t)compressed[offset - 4] + | ((uint32_t)compressed[offset - 3] << 8) + | ((uint32_t)compressed[offset - 2] << 16) + | ((uint32_t)compressed[offset - 1] << 24)); if (unlikely (computed_crc != stream_crc)) { elf_uncompress_failed (); @@ -6422,10 +6422,10 @@ elf_uncompress_lzma (struct backtrace_state *state, /* Next is a CRC of the index. */ computed_crc = elf_crc32 (0, compressed + index_offset, offset - index_offset); - stream_crc = (compressed[offset] - | (compressed[offset + 1] << 8) - | (compressed[offset + 2] << 16) - | (compressed[offset + 3] << 24)); + stream_crc = ((uint32_t)compressed[offset] + | ((uint32_t)compressed[offset + 1] << 8) + | ((uint32_t)compressed[offset + 2] << 16) + | ((uint32_t)compressed[offset + 3] << 24)); if (unlikely (computed_crc != stream_crc)) { elf_uncompress_failed (); @@ -6518,8 +6518,10 @@ backtrace_uncompress_lzma (struct backtrace_state *state, static int elf_add (struct backtrace_state *state, const char *filename, int descriptor, const unsigned char *memory, size_t memory_size, - uintptr_t base_address, backtrace_error_callback error_callback, - void *data, fileline *fileline_fn, int *found_sym, int *found_dwarf, + struct libbacktrace_base_address base_address, + struct elf_ppc64_opd_data *caller_opd, + backtrace_error_callback error_callback, void *data, + fileline *fileline_fn, int *found_sym, int *found_dwarf, struct dwarf_data **fileline_entry, int exe, int debuginfo, const char *with_buildid_data, uint32_t with_buildid_size) { @@ -6574,6 +6576,7 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, struct elf_view split_debug_view[DEBUG_MAX]; unsigned char split_debug_view_valid[DEBUG_MAX]; struct elf_ppc64_opd_data opd_data, *opd; + int opd_view_valid; struct dwarf_sections dwarf_sections; struct dwarf_data *fileline_altlink = NULL; @@ -6602,6 +6605,7 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, debug_view_valid = 0; memset (&split_debug_view_valid[0], 0, sizeof split_debug_view_valid); opd = NULL; + opd_view_valid = 0; if (!elf_get_view (state, descriptor, memory, memory_size, 0, sizeof ehdr, error_callback, data, &ehdr_view)) @@ -6858,7 +6862,8 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, } } - if (!gnu_debugdata_view_valid + if (!debuginfo + && !gnu_debugdata_view_valid && strcmp (name, ".gnu_debugdata") == 0) { if (!elf_get_view (state, descriptor, memory, memory_size, @@ -6885,12 +6890,18 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, opd->addr = shdr->sh_addr; opd->data = (const char *) opd_data.view.view.data; opd->size = shdr->sh_size; + opd_view_valid = 1; } } + /* A debuginfo file may not have a useful .opd section, but we can use the + one from the original executable. */ + if (opd == NULL) + opd = caller_opd; + if (symtab_shndx == 0) symtab_shndx = dynsym_shndx; - if (symtab_shndx != 0 && !debuginfo) + if (symtab_shndx != 0) { const b_elf_shdr *symtab_shdr; unsigned int strtab_shndx; @@ -6966,9 +6977,9 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, elf_release_view (state, &debuglink_view, error_callback, data); if (debugaltlink_view_valid) elf_release_view (state, &debugaltlink_view, error_callback, data); - ret = elf_add (state, "", d, NULL, 0, base_address, error_callback, - data, fileline_fn, found_sym, found_dwarf, NULL, 0, - 1, NULL, 0); + ret = elf_add (state, "", d, NULL, 0, base_address, opd, + error_callback, data, fileline_fn, found_sym, + found_dwarf, NULL, 0, 1, NULL, 0); if (ret < 0) backtrace_close (d, error_callback, data); else if (descriptor >= 0) @@ -6983,12 +6994,6 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, buildid_view_valid = 0; } - if (opd) - { - elf_release_view (state, &opd->view, error_callback, data); - opd = NULL; - } - if (debuglink_name != NULL) { int d; @@ -7003,9 +7008,9 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, elf_release_view (state, &debuglink_view, error_callback, data); if (debugaltlink_view_valid) elf_release_view (state, &debugaltlink_view, error_callback, data); - ret = elf_add (state, "", d, NULL, 0, base_address, error_callback, - data, fileline_fn, found_sym, found_dwarf, NULL, 0, - 1, NULL, 0); + ret = elf_add (state, "", d, NULL, 0, base_address, opd, + error_callback, data, fileline_fn, found_sym, + found_dwarf, NULL, 0, 1, NULL, 0); if (ret < 0) backtrace_close (d, error_callback, data); else if (descriptor >= 0) @@ -7030,7 +7035,7 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, { int ret; - ret = elf_add (state, filename, d, NULL, 0, base_address, + ret = elf_add (state, filename, d, NULL, 0, base_address, opd, error_callback, data, fileline_fn, found_sym, found_dwarf, &fileline_altlink, 0, 1, debugaltlink_buildid_data, debugaltlink_buildid_size); @@ -7067,7 +7072,7 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, if (ret) { ret = elf_add (state, filename, -1, gnu_debugdata_uncompressed, - gnu_debugdata_uncompressed_size, base_address, + gnu_debugdata_uncompressed_size, base_address, opd, error_callback, data, fileline_fn, found_sym, found_dwarf, NULL, 0, 0, NULL, 0); if (ret >= 0 && descriptor >= 0) @@ -7076,6 +7081,13 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, } } + if (opd_view_valid) + { + elf_release_view (state, &opd->view, error_callback, data); + opd_view_valid = 0; + opd = NULL; + } + /* Read all the debug sections in a single view, since they are probably adjacent in the file. If any of sections are uncompressed, we never release this view. */ @@ -7322,7 +7334,7 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, if (split_debug_view_valid[i]) elf_release_view (state, &split_debug_view[i], error_callback, data); } - if (opd) + if (opd_view_valid) elf_release_view (state, &opd->view, error_callback, data); if (descriptor >= 0) backtrace_close (descriptor, error_callback, data); @@ -7350,13 +7362,37 @@ struct PhdrIterate { char* dlpi_name; ElfW(Addr) dlpi_addr; + ElfW(Addr) dlpi_end_addr; }; FastVector s_phdrData(16); +struct ElfAddrRange +{ + ElfW(Addr) dlpi_addr; + ElfW(Addr) dlpi_end_addr; +}; +FastVector s_sortedKnownElfRanges(16); + +static int address_in_known_elf_ranges(uintptr_t pc) +{ + auto it = std::lower_bound( s_sortedKnownElfRanges.begin(), s_sortedKnownElfRanges.end(), pc, + []( const ElfAddrRange& lhs, const uintptr_t rhs ) { return uintptr_t(lhs.dlpi_addr) > rhs; } ); + if( it != s_sortedKnownElfRanges.end() && pc <= it->dlpi_end_addr ) + { + return true; + } + return false; +} + static int phdr_callback_mock (struct dl_phdr_info *info, size_t size ATTRIBUTE_UNUSED, void *pdata) { + if( address_in_known_elf_ranges(info->dlpi_addr) ) + { + return 0; + } + auto ptr = s_phdrData.push_next(); if (info->dlpi_name) { @@ -7366,6 +7402,12 @@ phdr_callback_mock (struct dl_phdr_info *info, size_t size ATTRIBUTE_UNUSED, } else ptr->dlpi_name = nullptr; ptr->dlpi_addr = info->dlpi_addr; + + // calculate the end address as well, so we can quickly determine if a PC is within the range of this image + ptr->dlpi_end_addr = uintptr_t(info->dlpi_addr) + (info->dlpi_phnum ? uintptr_t( + info->dlpi_phdr[info->dlpi_phnum - 1].p_vaddr + + info->dlpi_phdr[info->dlpi_phnum - 1].p_memsz) : 0); + return 0; } @@ -7379,6 +7421,7 @@ phdr_callback (struct PhdrIterate *info, void *pdata) const char *filename; int descriptor; int does_not_exist; + struct libbacktrace_base_address base_address; fileline elf_fileline_fn; int found_dwarf; @@ -7408,7 +7451,8 @@ phdr_callback (struct PhdrIterate *info, void *pdata) return 0; } - if (elf_add (pd->state, filename, descriptor, NULL, 0, info->dlpi_addr, + base_address.m = info->dlpi_addr; + if (elf_add (pd->state, filename, descriptor, NULL, 0, base_address, NULL, pd->error_callback, pd->data, &elf_fileline_fn, pd->found_sym, &found_dwarf, NULL, 0, 0, NULL, 0)) { @@ -7422,6 +7466,66 @@ phdr_callback (struct PhdrIterate *info, void *pdata) return 0; } +static int elf_iterate_phdr_and_add_new_files(phdr_data *pd) +{ + assert(s_phdrData.empty()); + // dl_iterate_phdr, will only add entries for elf files loaded in a previously unseen range + dl_iterate_phdr(phdr_callback_mock, nullptr); + + if(s_phdrData.size() == 0) + { + return 0; + } + + uint32_t headersAdded = 0; + for (auto &v : s_phdrData) + { + phdr_callback(&v, (void *)pd); + + auto newEntry = s_sortedKnownElfRanges.push_next(); + newEntry->dlpi_addr = v.dlpi_addr; + newEntry->dlpi_end_addr = v.dlpi_end_addr; + + tracy_free(v.dlpi_name); + + headersAdded++; + } + + s_phdrData.clear(); + + std::sort( s_sortedKnownElfRanges.begin(), s_sortedKnownElfRanges.end(), + []( const ElfAddrRange& lhs, const ElfAddrRange& rhs ) { return lhs.dlpi_addr > rhs.dlpi_addr; } ); + + return headersAdded; +} + +#ifdef TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT +/* Request an elf entry update if the pc passed in is not in any of the known elf ranges. +This could mean that new images were dlopened and we need to add those new elf entries */ +static int elf_refresh_address_ranges_if_needed(struct backtrace_state *state, uintptr_t pc) +{ + if ( address_in_known_elf_ranges(pc) ) + { + return 0; + } + + struct phdr_data pd; + int found_sym = 0; + int found_dwarf = 0; + fileline fileline_fn = nullptr; + pd.state = state; + pd.error_callback = nullptr; + pd.data = nullptr; + pd.fileline_fn = &fileline_fn; + pd.found_sym = &found_sym; + pd.found_dwarf = &found_dwarf; + pd.exe_filename = nullptr; + pd.exe_descriptor = -1; + + return elf_iterate_phdr_and_add_new_files(&pd); +} +#endif //#ifdef TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT + /* Initialize the backtrace data we need from an ELF executable. At the ELF level, all we need to do is find the debug info sections. */ @@ -7437,11 +7541,21 @@ backtrace_initialize (struct backtrace_state *state, const char *filename, fileline elf_fileline_fn = elf_nodebug; struct phdr_data pd; - ret = elf_add (state, filename, descriptor, NULL, 0, 0, error_callback, data, - &elf_fileline_fn, &found_sym, &found_dwarf, NULL, 1, 0, NULL, - 0); - if (!ret) - return 0; + + /* When using fdpic we must use dl_iterate_phdr for all modules, including + the main executable, so that we can get the right base address + mapping. */ + if (!libbacktrace_using_fdpic ()) + { + struct libbacktrace_base_address zero_base_address; + + memset (&zero_base_address, 0, sizeof zero_base_address); + ret = elf_add (state, filename, descriptor, NULL, 0, zero_base_address, + NULL, error_callback, data, &elf_fileline_fn, &found_sym, + &found_dwarf, NULL, 1, 0, NULL, 0); + if (!ret) + return 0; + } pd.state = state; pd.error_callback = error_callback; @@ -7452,14 +7566,7 @@ backtrace_initialize (struct backtrace_state *state, const char *filename, pd.exe_filename = filename; pd.exe_descriptor = ret < 0 ? descriptor : -1; - assert (s_phdrData.empty()); - dl_iterate_phdr (phdr_callback_mock, nullptr); - for (auto& v : s_phdrData) - { - phdr_callback (&v, (void *) &pd); - tracy_free (v.dlpi_name); - } - s_phdrData.clear(); + elf_iterate_phdr_and_add_new_files(&pd); if (!state->threaded) { @@ -7485,6 +7592,13 @@ backtrace_initialize (struct backtrace_state *state, const char *filename, if (*fileline_fn == NULL || *fileline_fn == elf_nodebug) *fileline_fn = elf_fileline_fn; + // install an address range refresh callback so we can cope with dynamically loaded elf files +#ifdef TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT + state->request_known_address_ranges_refresh_fn = elf_refresh_address_ranges_if_needed; +#else + state->request_known_address_ranges_refresh_fn = NULL; +#endif + return 1; } diff --git a/src/third_party/tracy/libbacktrace/fileline.cpp b/src/third_party/tracy/libbacktrace/fileline.cpp index 8645d754..5a37ff0c 100644 --- a/src/third_party/tracy/libbacktrace/fileline.cpp +++ b/src/third_party/tracy/libbacktrace/fileline.cpp @@ -47,6 +47,18 @@ POSSIBILITY OF SUCH DAMAGE. */ #include #endif +#ifdef HAVE_WINDOWS_H +#ifndef WIN32_MEAN_AND_LEAN +#define WIN32_MEAN_AND_LEAN +#endif + +#ifndef NOMINMAX +#define NOMINMAX +#endif + +#include +#endif + #include "backtrace.hpp" #include "internal.hpp" @@ -158,6 +170,47 @@ macho_get_executable_path (struct backtrace_state *state, #endif /* !defined (HAVE_MACH_O_DYLD_H) */ +#if HAVE_DECL__PGMPTR + +#define windows_executable_filename() _pgmptr + +#else /* !HAVE_DECL__PGMPTR */ + +#define windows_executable_filename() NULL + +#endif /* !HAVE_DECL__PGMPTR */ + +#ifdef HAVE_WINDOWS_H + +#define FILENAME_BUF_SIZE (MAX_PATH) + +static char * +windows_get_executable_path (char *buf, backtrace_error_callback error_callback, + void *data) +{ + size_t got; + int error; + + got = GetModuleFileNameA (NULL, buf, FILENAME_BUF_SIZE - 1); + error = GetLastError (); + if (got == 0 + || (got == FILENAME_BUF_SIZE - 1 && error == ERROR_INSUFFICIENT_BUFFER)) + { + error_callback (data, + "could not get the filename of the current executable", + error); + return NULL; + } + return buf; +} + +#else /* !defined (HAVE_WINDOWS_H) */ + +#define windows_get_executable_path(buf, error_callback, data) NULL +#define FILENAME_BUF_SIZE 64 + +#endif /* !defined (HAVE_WINDOWS_H) */ + /* Initialize the fileline information from the executable. Returns 1 on success, 0 on failure. */ @@ -171,7 +224,7 @@ fileline_initialize (struct backtrace_state *state, int called_error_callback; int descriptor; const char *filename; - char buf[64]; + char buf[FILENAME_BUF_SIZE]; if (!state->threaded) failed = state->fileline_initialization_failed; @@ -195,7 +248,7 @@ fileline_initialize (struct backtrace_state *state, descriptor = -1; called_error_callback = 0; - for (pass = 0; pass < 8; ++pass) + for (pass = 0; pass < 10; ++pass) { int does_not_exist; @@ -208,25 +261,33 @@ fileline_initialize (struct backtrace_state *state, filename = getexecname (); break; case 2: - filename = "/proc/self/exe"; + /* Test this before /proc/self/exe, as the latter exists but points + to the wine binary (and thus doesn't work). */ + filename = windows_executable_filename (); break; case 3: - filename = "/proc/curproc/file"; + filename = "/proc/self/exe"; break; case 4: + filename = "/proc/curproc/file"; + break; + case 5: snprintf (buf, sizeof (buf), "/proc/%ld/object/a.out", (long) getpid ()); filename = buf; break; - case 5: + case 6: filename = sysctl_exec_name1 (state, error_callback, data); break; - case 6: + case 7: filename = sysctl_exec_name2 (state, error_callback, data); break; - case 7: + case 8: filename = macho_get_executable_path (state, error_callback, data); break; + case 9: + filename = windows_get_executable_path (buf, error_callback, data); + break; default: abort (); } diff --git a/src/third_party/tracy/libbacktrace/internal.hpp b/src/third_party/tracy/libbacktrace/internal.hpp index f871844b..21395975 100644 --- a/src/third_party/tracy/libbacktrace/internal.hpp +++ b/src/third_party/tracy/libbacktrace/internal.hpp @@ -133,6 +133,11 @@ typedef void (*syminfo) (struct backtrace_state *state, uintptr_t pc, backtrace_syminfo_callback callback, backtrace_error_callback error_callback, void *data); +/* The type of the function that will trigger an known address range refresh + (if pc passed in is for an address whichs lies ourtisde of known ranges) */ +typedef int (*request_known_address_ranges_refresh)(struct backtrace_state *state, + uintptr_t pc); + /* What the backtrace state pointer points to. */ struct backtrace_state @@ -159,6 +164,8 @@ struct backtrace_state int lock_alloc; /* The freelist when using mmap. */ struct backtrace_freelist_struct *freelist; + /* Trigger an known address range refresh */ + request_known_address_ranges_refresh request_known_address_ranges_refresh_fn; }; /* Open a file for reading. Returns -1 on error. If DOES_NOT_EXIST @@ -326,10 +333,44 @@ struct dwarf_sections struct dwarf_data; +/* The load address mapping. */ + +#if defined(__FDPIC__) && defined(HAVE_DL_ITERATE_PHDR) && (defined(HAVE_LINK_H) || defined(HAVE_SYS_LINK_H)) + +#ifdef HAVE_LINK_H + #include +#endif +#ifdef HAVE_SYS_LINK_H + #include +#endif + +#define libbacktrace_using_fdpic() (1) + +struct libbacktrace_base_address +{ + struct elf32_fdpic_loadaddr m; +}; + +#define libbacktrace_add_base(pc, base) \ + ((uintptr_t) (__RELOC_POINTER ((pc), (base).m))) + +#else /* not _FDPIC__ */ + +#define libbacktrace_using_fdpic() (0) + +struct libbacktrace_base_address +{ + uintptr_t m; +}; + +#define libbacktrace_add_base(pc, base) ((pc) + (base).m) + +#endif /* not _FDPIC__ */ + /* Add file/line information for a DWARF module. */ extern int backtrace_dwarf_add (struct backtrace_state *state, - uintptr_t base_address, + struct libbacktrace_base_address base_address, const struct dwarf_sections *dwarf_sections, int is_bigendian, struct dwarf_data *fileline_altlink, diff --git a/src/third_party/tracy/libbacktrace/macho.cpp b/src/third_party/tracy/libbacktrace/macho.cpp index 6cccdaba..b9f08456 100644 --- a/src/third_party/tracy/libbacktrace/macho.cpp +++ b/src/third_party/tracy/libbacktrace/macho.cpp @@ -274,12 +274,14 @@ struct macho_nlist_64 /* Value found in nlist n_type field. */ -#define MACH_O_N_EXT 0x01 /* Extern symbol */ -#define MACH_O_N_ABS 0x02 /* Absolute symbol */ -#define MACH_O_N_SECT 0x0e /* Defined in section */ - -#define MACH_O_N_TYPE 0x0e /* Mask for type bits */ #define MACH_O_N_STAB 0xe0 /* Stabs debugging symbol */ +#define MACH_O_N_TYPE 0x0e /* Mask for type bits */ + +/* Values found after masking with MACH_O_N_TYPE. */ +#define MACH_O_N_UNDF 0x00 /* Undefined symbol */ +#define MACH_O_N_ABS 0x02 /* Absolute symbol */ +#define MACH_O_N_SECT 0x0e /* Defined in section from n_sect field */ + /* Information we keep for a Mach-O symbol. */ @@ -316,8 +318,9 @@ static const char * const dwarf_section_names[DEBUG_MAX] = /* Forward declaration. */ static int macho_add (struct backtrace_state *, const char *, int, off_t, - const unsigned char *, uintptr_t, int, - backtrace_error_callback, void *, fileline *, int *); + const unsigned char *, struct libbacktrace_base_address, + int, backtrace_error_callback, void *, fileline *, + int *); /* A dummy callback function used when we can't find any debug info. */ @@ -495,10 +498,10 @@ macho_defined_symbol (uint8_t type) { if ((type & MACH_O_N_STAB) != 0) return 0; - if ((type & MACH_O_N_EXT) != 0) - return 0; switch (type & MACH_O_N_TYPE) { + case MACH_O_N_UNDF: + return 0; case MACH_O_N_ABS: return 1; case MACH_O_N_SECT: @@ -512,7 +515,7 @@ macho_defined_symbol (uint8_t type) static int macho_add_symtab (struct backtrace_state *state, int descriptor, - uintptr_t base_address, int is_64, + struct libbacktrace_base_address base_address, int is_64, off_t symoff, unsigned int nsyms, off_t stroff, unsigned int strsize, backtrace_error_callback error_callback, void *data) @@ -627,7 +630,7 @@ macho_add_symtab (struct backtrace_state *state, int descriptor, if (name[0] == '_') ++name; macho_symbols[j].name = name; - macho_symbols[j].address = value + base_address; + macho_symbols[j].address = libbacktrace_add_base (value, base_address); ++j; } @@ -760,7 +763,8 @@ macho_syminfo (struct backtrace_state *state, uintptr_t addr, static int macho_add_fat (struct backtrace_state *state, const char *filename, int descriptor, int swapped, off_t offset, - const unsigned char *match_uuid, uintptr_t base_address, + const unsigned char *match_uuid, + struct libbacktrace_base_address base_address, int skip_symtab, uint32_t nfat_arch, int is_64, backtrace_error_callback error_callback, void *data, fileline *fileline_fn, int *found_sym) @@ -862,7 +866,8 @@ macho_add_fat (struct backtrace_state *state, const char *filename, static int macho_add_dsym (struct backtrace_state *state, const char *filename, - uintptr_t base_address, const unsigned char *uuid, + struct libbacktrace_base_address base_address, + const unsigned char *uuid, backtrace_error_callback error_callback, void *data, fileline* fileline_fn) { @@ -980,7 +985,7 @@ macho_add_dsym (struct backtrace_state *state, const char *filename, static int macho_add (struct backtrace_state *state, const char *filename, int descriptor, off_t offset, const unsigned char *match_uuid, - uintptr_t base_address, int skip_symtab, + struct libbacktrace_base_address base_address, int skip_symtab, backtrace_error_callback error_callback, void *data, fileline *fileline_fn, int *found_sym) { @@ -1242,7 +1247,7 @@ backtrace_initialize (struct backtrace_state *state, const char *filename, c = _dyld_image_count (); for (i = 0; i < c; ++i) { - uintptr_t base_address; + struct libbacktrace_base_address base_address; const char *name; int d; fileline mff; @@ -1266,7 +1271,7 @@ backtrace_initialize (struct backtrace_state *state, const char *filename, continue; } - base_address = _dyld_get_image_vmaddr_slide (i); + base_address.m = _dyld_get_image_vmaddr_slide (i); mff = macho_nodebug; if (!macho_add (state, name, d, 0, NULL, base_address, 0, @@ -1321,10 +1326,12 @@ backtrace_initialize (struct backtrace_state *state, const char *filename, void *data, fileline *fileline_fn) { fileline macho_fileline_fn; + struct libbacktrace_base_address zero_base_address; int found_sym; macho_fileline_fn = macho_nodebug; - if (!macho_add (state, filename, descriptor, 0, NULL, 0, 0, + memset (&zero_base_address, 0, sizeof zero_base_address); + if (!macho_add (state, filename, descriptor, 0, NULL, zero_base_address, 0, error_callback, data, &macho_fileline_fn, &found_sym)) return 0; diff --git a/src/third_party/tracy/tracy/Tracy.hpp b/src/third_party/tracy/tracy/Tracy.hpp index c557a2c8..bed51179 100644 --- a/src/third_party/tracy/tracy/Tracy.hpp +++ b/src/third_party/tracy/tracy/Tracy.hpp @@ -18,6 +18,8 @@ #ifndef TRACY_ENABLE +#define TracyNoop + #define ZoneNamed(x,y) #define ZoneNamedN(x,y,z) #define ZoneNamedC(x,y,z) @@ -33,8 +35,12 @@ #define ZoneText(x,y) #define ZoneTextV(x,y,z) +#define ZoneTextF(x,...) +#define ZoneTextVF(x,y,...) #define ZoneName(x,y) #define ZoneNameV(x,y,z) +#define ZoneNameF(x,...) +#define ZoneNameVF(x,y,...) #define ZoneColor(x) #define ZoneColorV(x,y) #define ZoneValue(x) @@ -69,8 +75,10 @@ #define TracyAlloc(x,y) #define TracyFree(x) +#define TracyMemoryDiscard(x) #define TracySecureAlloc(x,y) #define TracySecureFree(x) +#define TracySecureMemoryDiscard(x) #define TracyAllocN(x,y,z) #define TracyFreeN(x,y) @@ -92,8 +100,10 @@ #define TracyAllocS(x,y,z) #define TracyFreeS(x,y) +#define TracyMemoryDiscardS(x,y) #define TracySecureAllocS(x,y,z) #define TracySecureFreeS(x,y) +#define TracySecureMemoryDiscardS(x,y) #define TracyAllocNS(x,y,z,w) #define TracyFreeNS(x,y,z) @@ -113,6 +123,7 @@ #define TracySetProgramName(x) #define TracyFiberEnter(x) +#define TracyFiberEnterHint(x,y) #define TracyFiberLeave #else @@ -123,24 +134,21 @@ #include "../client/TracyProfiler.hpp" #include "../client/TracyScoped.hpp" -#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) -# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) -# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) -# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) - -# define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, TRACY_CALLSTACK, active ) -# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active ) -#else -# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active ) -# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active ) -# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active ) -# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active ) - -# define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, active ) -# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), active ) +#ifndef TRACY_CALLSTACK +#define TRACY_CALLSTACK 0 #endif +#define TracyNoop tracy::ProfilerAvailable() + +#define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) +#define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) +#define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) +#define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) + +#define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, TRACY_CALLSTACK, active ) +#define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active ) +#define ZoneTransientNC( varname, name, color, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), color, TRACY_CALLSTACK, active ) + #define ZoneScoped ZoneNamed( ___tracy_scoped_zone, true ) #define ZoneScopedN( name ) ZoneNamedN( ___tracy_scoped_zone, name, true ) #define ZoneScopedC( color ) ZoneNamedC( ___tracy_scoped_zone, color, true ) @@ -148,8 +156,12 @@ #define ZoneText( txt, size ) ___tracy_scoped_zone.Text( txt, size ) #define ZoneTextV( varname, txt, size ) varname.Text( txt, size ) +#define ZoneTextF( fmt, ... ) ___tracy_scoped_zone.TextFmt( fmt, ##__VA_ARGS__ ) +#define ZoneTextVF( varname, fmt, ... ) varname.TextFmt( fmt, ##__VA_ARGS__ ) #define ZoneName( txt, size ) ___tracy_scoped_zone.Name( txt, size ) #define ZoneNameV( varname, txt, size ) varname.Name( txt, size ) +#define ZoneNameF( fmt, ... ) ___tracy_scoped_zone.NameFmt( fmt, ##__VA_ARGS__ ) +#define ZoneNameVF( varname, fmt, ... ) varname.NameFmt( fmt, ##__VA_ARGS__ ) #define ZoneColor( color ) ___tracy_scoped_zone.Color( color ) #define ZoneColorV( varname, color ) varname.Color( color ) #define ZoneValue( value ) ___tracy_scoped_zone.Value( value ) @@ -178,95 +190,52 @@ #define TracyAppInfo( txt, size ) tracy::Profiler::MessageAppInfo( txt, size ) -#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, TRACY_CALLSTACK ) -# define TracyMessageL( txt ) tracy::Profiler::Message( txt, TRACY_CALLSTACK ) -# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, TRACY_CALLSTACK ) -# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK ) +#define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, TRACY_CALLSTACK ) +#define TracyMessageL( txt ) tracy::Profiler::Message( txt, TRACY_CALLSTACK ) +#define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, TRACY_CALLSTACK ) +#define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK ) -# define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, false ) -# define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, false ) -# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, true ) -# define TracySecureFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, true ) +#define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, false ) +#define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, false ) +#define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, true ) +#define TracySecureFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, true ) -# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, false, name ) -# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, false, name ) -# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, true, name ) -# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, true, name ) -#else -# define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, 0 ) -# define TracyMessageL( txt ) tracy::Profiler::Message( txt, 0 ) -# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, 0 ) -# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, 0 ) +#define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, false, name ) +#define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, false, name ) +#define TracyMemoryDiscard( name ) tracy::Profiler::MemDiscardCallstack( name, false, TRACY_CALLSTACK ) +#define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, true, name ) +#define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, true, name ) +#define TracySecureMemoryDiscard( name ) tracy::Profiler::MemDiscardCallstack( name, true, TRACY_CALLSTACK ) -# define TracyAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, false ) -# define TracyFree( ptr ) tracy::Profiler::MemFree( ptr, false ) -# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, true ) -# define TracySecureFree( ptr ) tracy::Profiler::MemFree( ptr, true ) +#define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) +#define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) +#define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) +#define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) -# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, false, name ) -# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, false, name ) -# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, true, name ) -# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, true, name ) -#endif +#define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, depth, active ) +#define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active ) -#ifdef TRACY_HAS_CALLSTACK -# define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) -# define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) -# define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) -# define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) +#define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true ) +#define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true ) +#define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true ) +#define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color, depth, true ) -# define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, depth, active ) -# define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active ) +#define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, false ) +#define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, false ) +#define TracySecureAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, true ) +#define TracySecureFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, true ) -# define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true ) -# define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true ) -# define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true ) -# define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color, depth, true ) +#define TracyAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, false, name ) +#define TracyFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, false, name ) +#define TracyMemoryDiscardS( name, depth ) tracy::Profiler::MemDiscardCallstack( name, false, depth ) +#define TracySecureAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, true, name ) +#define TracySecureFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, true, name ) +#define TracySecureMemoryDiscardS( name, depth ) tracy::Profiler::MemDiscardCallstack( name, true, depth ) -# define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, false ) -# define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, false ) -# define TracySecureAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, true ) -# define TracySecureFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, true ) - -# define TracyAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, false, name ) -# define TracyFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, false, name ) -# define TracySecureAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, true, name ) -# define TracySecureFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, true, name ) - -# define TracyMessageS( txt, size, depth ) tracy::Profiler::Message( txt, size, depth ) -# define TracyMessageLS( txt, depth ) tracy::Profiler::Message( txt, depth ) -# define TracyMessageCS( txt, size, color, depth ) tracy::Profiler::MessageColor( txt, size, color, depth ) -# define TracyMessageLCS( txt, color, depth ) tracy::Profiler::MessageColor( txt, color, depth ) -#else -# define ZoneNamedS( varname, depth, active ) ZoneNamed( varname, active ) -# define ZoneNamedNS( varname, name, depth, active ) ZoneNamedN( varname, name, active ) -# define ZoneNamedCS( varname, color, depth, active ) ZoneNamedC( varname, color, active ) -# define ZoneNamedNCS( varname, name, color, depth, active ) ZoneNamedNC( varname, name, color, active ) - -# define ZoneTransientS( varname, depth, active ) ZoneTransient( varname, active ) -# define ZoneTransientNS( varname, name, depth, active ) ZoneTransientN( varname, name, active ) - -# define ZoneScopedS( depth ) ZoneScoped -# define ZoneScopedNS( name, depth ) ZoneScopedN( name ) -# define ZoneScopedCS( color, depth ) ZoneScopedC( color ) -# define ZoneScopedNCS( name, color, depth ) ZoneScopedNC( name, color ) - -# define TracyAllocS( ptr, size, depth ) TracyAlloc( ptr, size ) -# define TracyFreeS( ptr, depth ) TracyFree( ptr ) -# define TracySecureAllocS( ptr, size, depth ) TracySecureAlloc( ptr, size ) -# define TracySecureFreeS( ptr, depth ) TracySecureFree( ptr ) - -# define TracyAllocNS( ptr, size, depth, name ) TracyAllocN( ptr, size, name ) -# define TracyFreeNS( ptr, depth, name ) TracyFreeN( ptr, name ) -# define TracySecureAllocNS( ptr, size, depth, name ) TracySecureAllocN( ptr, size, name ) -# define TracySecureFreeNS( ptr, depth, name ) TracySecureFreeN( ptr, name ) - -# define TracyMessageS( txt, size, depth ) TracyMessage( txt, size ) -# define TracyMessageLS( txt, depth ) TracyMessageL( txt ) -# define TracyMessageCS( txt, size, color, depth ) TracyMessageC( txt, size, color ) -# define TracyMessageLCS( txt, color, depth ) TracyMessageLC( txt, color ) -#endif +#define TracyMessageS( txt, size, depth ) tracy::Profiler::Message( txt, size, depth ) +#define TracyMessageLS( txt, depth ) tracy::Profiler::Message( txt, depth ) +#define TracyMessageCS( txt, size, color, depth ) tracy::Profiler::MessageColor( txt, size, color, depth ) +#define TracyMessageLCS( txt, color, depth ) tracy::Profiler::MessageColor( txt, color, depth ) #define TracySourceCallbackRegister( cb, data ) tracy::Profiler::SourceCallbackRegister( cb, data ) #define TracyParameterRegister( cb, data ) tracy::Profiler::ParameterRegister( cb, data ) @@ -275,7 +244,8 @@ #define TracySetProgramName( name ) tracy::GetProfiler().SetProgramName( name ); #ifdef TRACY_FIBERS -# define TracyFiberEnter( fiber ) tracy::Profiler::EnterFiber( fiber ) +# define TracyFiberEnter( fiber ) tracy::Profiler::EnterFiber( fiber, 0 ) +# define TracyFiberEnterHint( fiber, groupHint ) tracy::Profiler::EnterFiber( fiber, groupHint ) # define TracyFiberLeave tracy::Profiler::LeaveFiber() #endif diff --git a/src/third_party/tracy/tracy/TracyC.h b/src/third_party/tracy/tracy/TracyC.h index 6e62d56a..1b1373e0 100644 --- a/src/third_party/tracy/tracy/TracyC.h +++ b/src/third_party/tracy/tracy/TracyC.h @@ -4,7 +4,6 @@ #include #include -#include "../client/TracyCallstack.h" #include "../common/TracyApi.h" #ifdef __cplusplus @@ -39,6 +38,8 @@ TRACY_API void ___tracy_set_thread_name( const char* name ); typedef const void* TracyCZoneCtx; +typedef const void* TracyCLockCtx; + #define TracyCZone(c,x) #define TracyCZoneN(c,x,y) #define TracyCZoneC(c,x,y) @@ -51,8 +52,10 @@ typedef const void* TracyCZoneCtx; #define TracyCAlloc(x,y) #define TracyCFree(x) +#define TracyCMemoryDiscard(x) #define TracyCSecureAlloc(x,y) #define TracyCSecureFree(x) +#define TracyCSecureMemoryDiscard(x) #define TracyCAllocN(x,y,z) #define TracyCFreeN(x,y) @@ -83,8 +86,10 @@ typedef const void* TracyCZoneCtx; #define TracyCAllocS(x,y,z) #define TracyCFreeS(x,y) +#define TracyCMemoryDiscardS(x,y) #define TracyCSecureAllocS(x,y,z) #define TracyCSecureFreeS(x,y) +#define TracyCSecureMemoryDiscardS(x,y) #define TracyCAllocNS(x,y,z,w) #define TracyCFreeNS(x,y,z) @@ -96,6 +101,16 @@ typedef const void* TracyCZoneCtx; #define TracyCMessageCS(x,y,z,w) #define TracyCMessageLCS(x,y,z) +#define TracyCLockCtx(l) +#define TracyCLockAnnounce(l) +#define TracyCLockTerminate(l) +#define TracyCLockBeforeLock(l) +#define TracyCLockAfterLock(l) +#define TracyCLockAfterUnlock(l) +#define TracyCLockAfterTryLock(l,x) +#define TracyCLockMark(l) +#define TracyCLockCustomName(l,x,y) + #define TracyCIsConnected 0 #define TracyCIsStarted 0 @@ -125,7 +140,7 @@ struct ___tracy_source_location_data struct ___tracy_c_zone_context { uint32_t id; - int active; + int32_t active; }; struct ___tracy_gpu_time_data @@ -143,7 +158,7 @@ struct ___tracy_gpu_zone_begin_data { struct ___tracy_gpu_zone_begin_callstack_data { uint64_t srcloc; - int depth; + int32_t depth; uint16_t queryId; uint8_t context; }; @@ -178,28 +193,31 @@ struct ___tracy_gpu_time_sync_data { uint8_t context; }; +struct __tracy_lockable_context_data; + // Some containers don't support storing const types. // This struct, as visible to user, is immutable, so treat it as if const was declared here. typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx; +typedef struct __tracy_lockable_context_data* TracyCLockCtx; #ifdef TRACY_MANUAL_LIFETIME TRACY_API void ___tracy_startup_profiler(void); TRACY_API void ___tracy_shutdown_profiler(void); -TRACY_API int ___tracy_profiler_started(void); +TRACY_API int32_t ___tracy_profiler_started(void); # define TracyCIsStarted ___tracy_profiler_started() #else # define TracyCIsStarted 1 #endif -TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz ); -TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz ); +TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, uint32_t color ); +TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color ); -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active ); -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active ); -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int active ); -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int depth, int active ); +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int32_t active ); +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int32_t depth, int32_t active ); +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int32_t active ); +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int32_t depth, int32_t active ); TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx ); TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size ); TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size ); @@ -228,20 +246,17 @@ TRACY_API void ___tracy_emit_gpu_context_name_serial( const struct ___tracy_gpu_ TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_calibration_data ); TRACY_API void ___tracy_emit_gpu_time_sync_serial( const struct ___tracy_gpu_time_sync_data ); -TRACY_API int ___tracy_connected(void); +TRACY_API int32_t ___tracy_connected(void); -#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); -# define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); -# define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); -# define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); -#else -# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active ); -# define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active ); -# define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active ); -# define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active ); +#ifndef TRACY_CALLSTACK +#define TRACY_CALLSTACK 0 #endif +#define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); +#define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); +#define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); +#define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); + #define TracyCZoneEnd( ctx ) ___tracy_emit_zone_end( ctx ); #define TracyCZoneText( ctx, txt, size ) ___tracy_emit_zone_text( ctx, txt, size ); @@ -250,57 +265,44 @@ TRACY_API int ___tracy_connected(void); #define TracyCZoneValue( ctx, value ) ___tracy_emit_zone_value( ctx, value ); -TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure ); -TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure ); -TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure ); -TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure ); -TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int secure, const char* name ); -TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int depth, int secure, const char* name ); -TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int secure, const char* name ); -TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int depth, int secure, const char* name ); +TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int32_t secure ); +TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int32_t depth, int32_t secure ); +TRACY_API void ___tracy_emit_memory_free( const void* ptr, int32_t secure ); +TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int32_t depth, int32_t secure ); +TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int32_t secure, const char* name ); +TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int32_t depth, int32_t secure, const char* name ); +TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int32_t secure, const char* name ); +TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int32_t depth, int32_t secure, const char* name ); +TRACY_API void ___tracy_emit_memory_discard( const char* name, int32_t secure ); +TRACY_API void ___tracy_emit_memory_discard_callstack( const char* name, int32_t secure, int32_t depth ); -TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack ); -TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack ); -TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack ); -TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack ); +TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int32_t callstack_depth ); +TRACY_API void ___tracy_emit_messageL( const char* txt, int32_t callstack_depth ); +TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int32_t callstack_depth ); +TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int32_t callstack_depth ); -#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 0 ) -# define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 0 ) -# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 1 ) -# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 1 ) +#define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 0 ) +#define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 0 ) +#define TracyCMemoryDiscard( name ) ___tracy_emit_memory_discard_callstack( name, 0, TRACY_CALLSTACK ); +#define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 1 ) +#define TracyCSecureFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 1 ) +#define TracyCSecureMemoryDiscard( name ) ___tracy_emit_memory_discard_callstack( name, 1, TRACY_CALLSTACK ); -# define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 0, name ) -# define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 0, name ) -# define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 1, name ) -# define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 1, name ) +#define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 0, name ) +#define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 0, name ) +#define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 1, name ) +#define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 1, name ) -# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, TRACY_CALLSTACK ); -# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, TRACY_CALLSTACK ); -# define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, TRACY_CALLSTACK ); -# define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, TRACY_CALLSTACK ); -#else -# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 0 ); -# define TracyCFree( ptr ) ___tracy_emit_memory_free( ptr, 0 ); -# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 1 ); -# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free( ptr, 1 ); - -# define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_named( ptr, size, 0, name ); -# define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_named( ptr, 0, name ); -# define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_named( ptr, size, 1, name ); -# define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_named( ptr, 1, name ); - -# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, 0 ); -# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, 0 ); -# define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, 0 ); -# define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, 0 ); -#endif +#define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, TRACY_CALLSTACK ); +#define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, TRACY_CALLSTACK ); +#define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, TRACY_CALLSTACK ); +#define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, TRACY_CALLSTACK ); TRACY_API void ___tracy_emit_frame_mark( const char* name ); TRACY_API void ___tracy_emit_frame_mark_start( const char* name ); TRACY_API void ___tracy_emit_frame_mark_end( const char* name ); -TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int flip ); +TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int32_t flip ); #define TracyCFrameMark ___tracy_emit_frame_mark( 0 ); #define TracyCFrameMarkNamed( name ) ___tracy_emit_frame_mark( name ); @@ -312,7 +314,7 @@ TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_ TRACY_API void ___tracy_emit_plot( const char* name, double val ); TRACY_API void ___tracy_emit_plot_float( const char* name, float val ); TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val ); -TRACY_API void ___tracy_emit_plot_config( const char* name, int type, int step, int fill, uint32_t color ); +TRACY_API void ___tracy_emit_plot_config( const char* name, int32_t type, int32_t step, int32_t fill, uint32_t color ); TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ); #define TracyCPlot( name, val ) ___tracy_emit_plot( name, val ); @@ -322,47 +324,46 @@ TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ); #define TracyCAppInfo( txt, size ) ___tracy_emit_message_appinfo( txt, size ); -#ifdef TRACY_HAS_CALLSTACK -# define TracyCZoneS( ctx, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); -# define TracyCZoneNS( ctx, name, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); -# define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); -# define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); +#define TracyCZoneS( ctx, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); +#define TracyCZoneNS( ctx, name, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); +#define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); +#define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); -# define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 0 ) -# define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 0 ) -# define TracyCSecureAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 1 ) -# define TracyCSecureFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 1 ) +#define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 0 ) +#define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 0 ) +#define TracyCMemoryDiscardS( name, depth ) ___tracy_emit_memory_discard_callstack( name, 0, depth ) +#define TracyCSecureAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 1 ) +#define TracyCSecureFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 1 ) +#define TracyCSecureMemoryDiscardS( name, depth ) ___tracy_emit_memory_discard_callstack( name, 1, depth ) -# define TracyCAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 0, name ) -# define TracyCFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 0, name ) -# define TracyCSecureAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 1, name ) -# define TracyCSecureFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 1, name ) +#define TracyCAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 0, name ) +#define TracyCFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 0, name ) +#define TracyCSecureAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 1, name ) +#define TracyCSecureFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 1, name ) -# define TracyCMessageS( txt, size, depth ) ___tracy_emit_message( txt, size, depth ); -# define TracyCMessageLS( txt, depth ) ___tracy_emit_messageL( txt, depth ); -# define TracyCMessageCS( txt, size, color, depth ) ___tracy_emit_messageC( txt, size, color, depth ); -# define TracyCMessageLCS( txt, color, depth ) ___tracy_emit_messageLC( txt, color, depth ); -#else -# define TracyCZoneS( ctx, depth, active ) TracyCZone( ctx, active ) -# define TracyCZoneNS( ctx, name, depth, active ) TracyCZoneN( ctx, name, active ) -# define TracyCZoneCS( ctx, color, depth, active ) TracyCZoneC( ctx, color, active ) -# define TracyCZoneNCS( ctx, name, color, depth, active ) TracyCZoneNC( ctx, name, color, active ) +#define TracyCMessageS( txt, size, depth ) ___tracy_emit_message( txt, size, depth ); +#define TracyCMessageLS( txt, depth ) ___tracy_emit_messageL( txt, depth ); +#define TracyCMessageCS( txt, size, color, depth ) ___tracy_emit_messageC( txt, size, color, depth ); +#define TracyCMessageLCS( txt, color, depth ) ___tracy_emit_messageLC( txt, color, depth ); -# define TracyCAllocS( ptr, size, depth ) TracyCAlloc( ptr, size ) -# define TracyCFreeS( ptr, depth ) TracyCFree( ptr ) -# define TracyCSecureAllocS( ptr, size, depth ) TracyCSecureAlloc( ptr, size ) -# define TracyCSecureFreeS( ptr, depth ) TracyCSecureFree( ptr ) -# define TracyCAllocNS( ptr, size, depth, name ) TracyCAllocN( ptr, size, name ) -# define TracyCFreeNS( ptr, depth, name ) TracyCFreeN( ptr, name ) -# define TracyCSecureAllocNS( ptr, size, depth, name ) TracyCSecureAllocN( ptr, size, name ) -# define TracyCSecureFreeNS( ptr, depth, name ) TracyCSecureFreeN( ptr, name ) +TRACY_API struct __tracy_lockable_context_data* ___tracy_announce_lockable_ctx( const struct ___tracy_source_location_data* srcloc ); +TRACY_API void ___tracy_terminate_lockable_ctx( struct __tracy_lockable_context_data* lockdata ); +TRACY_API int32_t ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ); +TRACY_API void ___tracy_after_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ); +TRACY_API void ___tracy_after_unlock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ); +TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata, int32_t acquired ); +TRACY_API void ___tracy_mark_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const struct ___tracy_source_location_data* srcloc ); +TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const char* name, size_t nameSz ); -# define TracyCMessageS( txt, size, depth ) TracyCMessage( txt, size ) -# define TracyCMessageLS( txt, depth ) TracyCMessageL( txt ) -# define TracyCMessageCS( txt, size, color, depth ) TracyCMessageC( txt, size, color ) -# define TracyCMessageLCS( txt, color, depth ) TracyCMessageLC( txt, color ) -#endif +#define TracyCLockAnnounce( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; lock = ___tracy_announce_lockable_ctx( &TracyConcat(__tracy_source_location,TracyLine) ); +#define TracyCLockTerminate( lock ) ___tracy_terminate_lockable_ctx( lock ); +#define TracyCLockBeforeLock( lock ) ___tracy_before_lock_lockable_ctx( lock ); +#define TracyCLockAfterLock( lock ) ___tracy_after_lock_lockable_ctx( lock ); +#define TracyCLockAfterUnlock( lock ) ___tracy_after_unlock_lockable_ctx( lock ); +#define TracyCLockAfterTryLock( lock, acquired ) ___tracy_after_try_lock_lockable_ctx( lock, acquired ); +#define TracyCLockMark( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; ___tracy_mark_lockable_ctx( lock, &TracyConcat(__tracy_source_location,TracyLine) ); +#define TracyCLockCustomName( lock, name, nameSz ) ___tracy_custom_name_lockable_ctx( lock, name, nameSz ); #define TracyCIsConnected ___tracy_connected() diff --git a/src/third_party/tracy/tracy/TracyD3D11.hpp b/src/third_party/tracy/tracy/TracyD3D11.hpp index 8aebdb26..6e0c756e 100644 --- a/src/third_party/tracy/tracy/TracyD3D11.hpp +++ b/src/third_party/tracy/tracy/TracyD3D11.hpp @@ -307,7 +307,7 @@ public: WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast(srcloc)); } - tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int depth, bool active ) + tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int32_t depth, bool active ) : D3D11ZoneScope(ctx, active) { if( !m_active ) return; @@ -327,7 +327,7 @@ public: WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation); } - tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool active) + tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool active) : D3D11ZoneScope(ctx, active) { if( !m_active ) return; @@ -357,7 +357,7 @@ public: private: tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, bool active ) #ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) + : m_active( active && GetProfiler().IsConnected() ) #else : m_active( active ) #endif diff --git a/src/third_party/tracy/tracy/TracyD3D12.hpp b/src/third_party/tracy/tracy/TracyD3D12.hpp index 41567937..d36253d7 100644 --- a/src/third_party/tracy/tracy/TracyD3D12.hpp +++ b/src/third_party/tracy/tracy/TracyD3D12.hpp @@ -385,7 +385,7 @@ namespace tracy WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast(srcLocation)); } - tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int depth, bool active) + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int32_t depth, bool active) : D3D12ZoneScope(ctx, cmdList, active) { if (!m_active) return; @@ -405,7 +405,7 @@ namespace tracy WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation); } - tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int depth, bool active) + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int32_t depth, bool active) : D3D12ZoneScope(ctx, cmdList, active) { if (!m_active) return; diff --git a/src/third_party/tracy/tracy/TracyLua.hpp b/src/third_party/tracy/tracy/TracyLua.hpp index c972ffb2..51dead51 100644 --- a/src/third_party/tracy/tracy/TracyLua.hpp +++ b/src/third_party/tracy/tracy/TracyLua.hpp @@ -188,6 +188,13 @@ static tracy_force_inline void SendLuaCallstack( lua_State* L, uint32_t depth ) TracyQueueCommit( callstackAllocFatThread ); } +static inline void LuaShortenSrc( char* dst, const char* src ) +{ + size_t l = std::min( (size_t)255, strlen( src ) ); + memcpy( dst, src, l ); + dst[l] = 0; +} + static inline int LuaZoneBeginS( lua_State* L ) { #ifdef TRACY_ON_DEMAND @@ -207,7 +214,9 @@ static inline int LuaZoneBeginS( lua_State* L ) lua_Debug dbg; lua_getstack( L, 1, &dbg ); lua_getinfo( L, "Snl", &dbg ); - const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src ); + char src[256]; + LuaShortenSrc( src, dbg.source ); + const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src ); TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); @@ -237,8 +246,10 @@ static inline int LuaZoneBeginNS( lua_State* L ) lua_getstack( L, 1, &dbg ); lua_getinfo( L, "Snl", &dbg ); size_t nsz; + char src[256]; + LuaShortenSrc( src, dbg.source ); const auto name = lua_tolstring( L, 1, &nsz ); - const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src, name, nsz ); + const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src, name, nsz ); TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); @@ -264,7 +275,9 @@ static inline int LuaZoneBegin( lua_State* L ) lua_Debug dbg; lua_getstack( L, 1, &dbg ); lua_getinfo( L, "Snl", &dbg ); - const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src ); + char src[256]; + LuaShortenSrc( src, dbg.source ); + const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src ); TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); @@ -290,8 +303,10 @@ static inline int LuaZoneBeginN( lua_State* L ) lua_getstack( L, 1, &dbg ); lua_getinfo( L, "Snl", &dbg ); size_t nsz; + char src[256]; + LuaShortenSrc( src, dbg.source ); const auto name = lua_tolstring( L, 1, &nsz ); - const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src, name, nsz ); + const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src, name, nsz ); TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); diff --git a/src/third_party/tracy/tracy/TracyMetal.hmm b/src/third_party/tracy/tracy/TracyMetal.hmm new file mode 100644 index 00000000..a4b4cb52 --- /dev/null +++ b/src/third_party/tracy/tracy/TracyMetal.hmm @@ -0,0 +1,644 @@ +#ifndef __TRACYMETAL_HMM__ +#define __TRACYMETAL_HMM__ + +/* This file implements a Metal API back-end for Tracy (it has only been tested on Apple + Silicon devices, but it should also work on Intel-based Macs and older iOS devices). + The Metal back-end in Tracy operates differently than other GPU back-ends like Vulkan, + Direct3D and OpenGL. Specifically, TracyMetalZone() must be placed around the site where + a command encoder is created. This is because not all hardware supports timestamps at + command granularity, and can only provide timestamps around an entire command encoder. + This accommodates for all tiers of hardware; in the future, variants of TracyMetalZone() + will be added to support the habitual command-level granularity of Tracy GPU back-ends. + Metal also imposes a few restrictions that make the process of requesting and collecting + queries more complicated in Tracy: + a) timestamp query buffers are limited to 4096 queries (32KB, where each query is 8 bytes) + b) when a timestamp query buffer is created, Metal initializes all timestamps with zeroes, + and there's no way to reset them back to zero after timestamps get resolved; the only + way to clear the timestamps is by allocating a new timestamp query buffer + c) if a command encoder records no commands and its corresponding command buffer ends up + committed to the command queue, Metal will "optimize-away" the encoder along with any + timestamp queries associated with it (the timestamp will remain as zero and will never + get resolved) + Because of the limitations above, two timestamp buffers are managed internally. Once one + of the buffers fills up with requests, the second buffer can start serving new requests. + Once all requests in a buffer get resolved and collected, the entire buffer is discarded + and a new one allocated for future requests. (Proper cycling through a ring buffer would + require bookkeeping and completion handlers to collect only the known complete queries.) + In the current implementation, there is potential for a race condition when the buffer is + discarded and reallocated. In practice, the race condition will never materialize so long + as TracyMetalCollect() is called frequently to keep the amount of unresolved queries low. + Finally, there's a timeout mechanism during timestamp collection to detect "empty" command + encoders and ensure progress. +*/ + +#ifndef TRACY_ENABLE + +#define TracyMetalContext(device) nullptr +#define TracyMetalDestroy(ctx) +#define TracyMetalContextName(ctx, name, size) + +#define TracyMetalZone(ctx, encoderDesc, name) +#define TracyMetalZoneC(ctx, encoderDesc, name, color) +#define TracyMetalNamedZone(ctx, varname, encoderDesc, name, active) +#define TracyMetalNamedZoneC(ctx, varname, encoderDesc, name, color, active) + +#define TracyMetalCollect(ctx) + +namespace tracy +{ +class MetalZoneScope {}; +} + +using TracyMetalCtx = void; + +#else + +#if not __has_feature(objc_arc) +#error TracyMetal requires ARC to be enabled. +#endif + +#include +#include +#include + +#include "Tracy.hpp" +#include "../client/TracyProfiler.hpp" +#include "../client/TracyCallstack.hpp" +#include "../common/TracyAlign.hpp" +#include "../common/TracyAlloc.hpp" + +// ok to import if in obj-c code +#import + +#define TRACY_METAL_VA_ARGS(...) , ##__VA_ARGS__ + +#define TracyMetalPanic(ret, msg, ...) do { \ + char buffer [1024]; \ + snprintf(buffer, sizeof(buffer), "TracyMetal: " msg TRACY_METAL_VA_ARGS(__VA_ARGS__)); \ + TracyMessageC(buffer, strlen(buffer), tracy::Color::OrangeRed); \ + fprintf(stderr, "%s\n", buffer); \ + ret; \ + } while(false); + +#ifndef TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT +#define TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT 0.200f +#endif//TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT + +#ifndef TRACY_METAL_DEBUG_MASK +#define TRACY_METAL_DEBUG_MASK (0) +#endif//TRACY_METAL_DEBUG_MASK + +#if TRACY_METAL_DEBUG_MASK + #define TracyMetalDebugMasked(mask, ...) if constexpr (mask & TRACY_METAL_DEBUG_MASK) { __VA_ARGS__; } +#else + #define TracyMetalDebugMasked(mask, ...) +#endif + +#if TRACY_METAL_DEBUG_MASK & (1 << 1) + #define TracyMetalDebug_0b00010(...) __VA_ARGS__; +#else + #define TracyMetalDebug_0b00010(...) +#endif + +#if TRACY_METAL_DEBUG_MASK & (1 << 4) + #define TracyMetalDebug_0b10000(...) __VA_ARGS__; +#else + #define TracyMetalDebug_0b10000(...) +#endif + +#ifndef TracyMetalDebugZoneScopeWireTap +#define TracyMetalDebugZoneScopeWireTap +#endif//TracyMetalDebugZoneScopeWireTap + +namespace tracy +{ + +class MetalCtx +{ + friend class MetalZoneScope; + + enum { MaxQueries = 4 * 1024 }; // Metal: between 8 and 32768 _BYTES_... + +public: + static MetalCtx* Create(id device) + { + ZoneScopedNC("tracy::MetalCtx::Create", Color::Red4); + auto ctx = static_cast(tracy_malloc(sizeof(MetalCtx))); + new (ctx) MetalCtx(device); + if (ctx->m_contextId == 255) + { + TracyMetalPanic({assert(false);} return nullptr, "ERROR: unable to create context."); + Destroy(ctx); + } + return ctx; + } + + static void Destroy(MetalCtx* ctx) + { + ZoneScopedNC("tracy::MetalCtx::Destroy", Color::Red4); + ctx->~MetalCtx(); + tracy_free(ctx); + } + + void Name( const char* name, uint16_t len ) + { + auto ptr = (char*)tracy_malloc( len ); + memcpy( ptr, name, len ); + + auto* item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuContextName ); + MemWrite( &item->gpuContextNameFat.context, m_contextId ); + MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); + MemWrite( &item->gpuContextNameFat.size, len ); + SubmitQueueItem(item); + } + + bool Collect() + { + ZoneScopedNC("tracy::MetalCtx::Collect", Color::Red4); + +#ifdef TRACY_ON_DEMAND + if (!GetProfiler().IsConnected()) + { + return true; + } +#endif + + // Only one thread is allowed to collect timestamps at any given time + // but there's no need to block contending threads + if (!m_collectionMutex.try_lock()) + { + return true; + } + + std::unique_lock lock (m_collectionMutex, std::adopt_lock); + + uintptr_t begin = m_previousCheckpoint.load(); + uintptr_t latestCheckpoint = m_queryCounter.load(); // TODO: MTLEvent? MTLFence?; + TracyMetalDebugMasked(1<<3, ZoneValue(begin)); + TracyMetalDebugMasked(1<<3, ZoneValue(latestCheckpoint)); + + uint32_t count = RingCount(begin, latestCheckpoint); + if (count == 0) // no pending timestamp queries + { + //uintptr_t nextCheckpoint = m_queryCounter.load(); + //if (nextCheckpoint != latestCheckpoint) + //{ + // // TODO: signal event / fence now? + //} + return true; + } + + // resolve up until the ring buffer boundary and let a subsequenty call + // to Collect handle the wrap-around + bool reallocateBuffer = false; + if (RingIndex(begin) + count >= RingSize()) + { + count = RingSize() - RingIndex(begin); + reallocateBuffer = true; + } + TracyMetalDebugMasked(1<<3, ZoneValue(count)); + + auto buffer_idx = (begin / MaxQueries) % 2; + auto counterSampleBuffer = m_counterSampleBuffers[buffer_idx]; + + if (count >= RingSize()) + { + TracyMetalPanic(return false, "Collect: FULL! too many pending timestamp queries. [%llu, %llu] (%u)", begin, latestCheckpoint, count); + } + + TracyMetalDebugMasked(1<<3, TracyMetalPanic(, "Collect: [%llu, %llu] :: (%u)", begin, latestCheckpoint, count)); + + NSRange range = NSMakeRange(RingIndex(begin), count); + NSData* data = [counterSampleBuffer resolveCounterRange:range]; + NSUInteger numResolvedTimestamps = data.length / sizeof(MTLCounterResultTimestamp); + MTLCounterResultTimestamp* timestamps = (MTLCounterResultTimestamp *)(data.bytes); + if (timestamps == nil) + { + TracyMetalPanic(return false, "Collect: unable to resolve timestamps."); + } + + if (numResolvedTimestamps != count) + { + TracyMetalPanic(, "Collect: numResolvedTimestamps != count : %u != %u", (uint32_t)numResolvedTimestamps, count); + } + + int resolved = 0; + for (auto i = 0; i < numResolvedTimestamps; i += 2) + { + TracyMetalDebug_0b10000( ZoneScopedN("tracy::MetalCtx::Collect::[i]") ); + MTLTimestamp t_start = timestamps[i+0].timestamp; + MTLTimestamp t_end = timestamps[i+1].timestamp; + uint32_t k = RingIndex(begin + i); + TracyMetalDebugMasked(1<<4, TracyMetalPanic(, "Collect: timestamp[%u] = %llu | timestamp[%u] = %llu | diff = %llu\n", k, t_start, k+1, t_end, (t_end - t_start))); + if ((t_start == MTLCounterErrorValue) || (t_end == MTLCounterErrorValue)) + { + TracyMetalPanic(, "Collect: invalid timestamp (MTLCounterErrorValue) at %u.", k); + break; + } + // Metal will initialize timestamp buffer with zeroes; encountering a zero-value + // timestamp means that the timestamp has not been written and resolved yet + if ((t_start == 0) || (t_end == 0)) + { + auto checkTime = std::chrono::high_resolution_clock::now(); + auto requestTime = m_timestampRequestTime[k]; + auto ms_in_flight = std::chrono::duration(checkTime-requestTime).count()*1000.0f; + TracyMetalDebugMasked(1<<4, TracyMetalPanic(, "Collect: invalid timestamp (zero) at %u [%.0fms in flight].", k, ms_in_flight)); + const float timeout_ms = TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT * 1000.0f; + if (ms_in_flight < timeout_ms) + break; + TracyMetalDebug_0b10000( ZoneScopedN("tracy::MetalCtx::Collect::Drop") ); + TracyMetalPanic(, "Collect: giving up on timestamp at %u [%.0fms in flight].", k, ms_in_flight); + t_start = m_mostRecentTimestamp + 5; + t_end = t_start + 5; + } + TracyMetalDebugMasked(1<<2, TracyFreeN((void*)(uintptr_t)(k+0), "TracyMetalGpuZone")); + TracyMetalDebugMasked(1<<2, TracyFreeN((void*)(uintptr_t)(k+1), "TracyMetalGpuZone")); + { + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuTime); + MemWrite(&item->gpuTime.gpuTime, static_cast(t_start)); + MemWrite(&item->gpuTime.queryId, static_cast(k)); + MemWrite(&item->gpuTime.context, m_contextId); + Profiler::QueueSerialFinish(); + } + { + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuTime); + MemWrite(&item->gpuTime.gpuTime, static_cast(t_end)); + MemWrite(&item->gpuTime.queryId, static_cast(k+1)); + MemWrite(&item->gpuTime.context, m_contextId); + Profiler::QueueSerialFinish(); + } + m_mostRecentTimestamp = (t_end > m_mostRecentTimestamp) ? t_end : m_mostRecentTimestamp; + TracyMetalDebugMasked(1<<1, TracyFreeN((void*)(uintptr_t)k, "TracyMetalTimestampQueryId")); + resolved += 2; + } + TracyMetalDebugMasked(1<<3, ZoneValue(RingCount(begin, m_previousCheckpoint.load()))); + + m_previousCheckpoint += resolved; + + // Check whether the timestamp buffer has been fully resolved/collected: + // WARN: there's technically a race condition here: NextQuery() may reference the + // buffer that is being released instead of the new one. In practice, this should + // never happen so long as Collect is called frequently enough to prevent pending + // timestamp query requests from piling up too quickly. + if ((resolved == count) && (m_previousCheckpoint.load() % MaxQueries) == 0) + { + m_counterSampleBuffers[buffer_idx] = NewTimestampSampleBuffer(m_device, MaxQueries); + } + + //RecalibrateClocks(); // to account for drift + + return true; + } + +private: + MetalCtx(id device) + : m_device(device) + { + TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "MTLCounterErrorValue = 0x%llx", MTLCounterErrorValue)); + TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "MTLCounterDontSample = 0x%llx", MTLCounterDontSample)); + + if (m_device == nil) + { + TracyMetalPanic({assert(false);} return, "device is nil."); + } + if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtStageBoundary]) + { + TracyMetalPanic({assert(false);} return, "ERROR: timestamp sampling at pipeline stage boundary is not supported."); + } + if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDrawBoundary]) + { + TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at draw call boundary is not supported.\n")); + } + if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtBlitBoundary]) + { + TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at blit boundary is not supported.\n")); + } + if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDispatchBoundary]) + { + TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at compute dispatch boundary is not supported.\n")); + } + if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtTileDispatchBoundary]) + { + TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at tile dispatch boundary is not supported.\n")); + } + + m_counterSampleBuffers[0] = NewTimestampSampleBuffer(m_device, MaxQueries); + m_counterSampleBuffers[1] = NewTimestampSampleBuffer(m_device, MaxQueries); + + m_timestampRequestTime.resize(MaxQueries); + + MTLTimestamp cpuTimestamp = 0; + MTLTimestamp gpuTimestamp = 0; + [m_device sampleTimestamps:&cpuTimestamp gpuTimestamp:&gpuTimestamp]; + m_mostRecentTimestamp = gpuTimestamp; + TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Metal): %llu", cpuTimestamp)); + TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: GPU timestamp (Metal): %llu", gpuTimestamp)); + + cpuTimestamp = Profiler::GetTime(); + TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Tracy): %llu", cpuTimestamp)); + + float period = 1.0f; + + m_contextId = GetGpuCtxCounter().fetch_add(1); + + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuNewContext); + MemWrite(&item->gpuNewContext.cpuTime, int64_t(cpuTimestamp)); + MemWrite(&item->gpuNewContext.gpuTime, int64_t(gpuTimestamp)); + MemWrite(&item->gpuNewContext.thread, uint32_t(0)); // TODO: why not GetThreadHandle()? + MemWrite(&item->gpuNewContext.period, period); + MemWrite(&item->gpuNewContext.context, m_contextId); + //MemWrite(&item->gpuNewContext.flags, GpuContextCalibration); + MemWrite(&item->gpuNewContext.flags, GpuContextFlags(0)); + MemWrite(&item->gpuNewContext.type, GpuContextType::Metal); + SubmitQueueItem(item); + } + + ~MetalCtx() + { + // collect the last remnants of Metal GPU activity... + // TODO: add a timeout to this loop? + while (m_previousCheckpoint.load() != m_queryCounter.load()) + Collect(); + } + + tracy_force_inline void SubmitQueueItem(QueueItem* item) + { +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem(*item); +#endif + Profiler::QueueSerialFinish(); + } + + tracy_force_inline uint32_t RingIndex(uintptr_t index) + { + index %= MaxQueries; + return static_cast(index); + } + + tracy_force_inline uint32_t RingCount(uintptr_t begin, uintptr_t end) + { + // wrap-around safe: all unsigned + uintptr_t count = end - begin; + return static_cast(count); + } + + tracy_force_inline uint32_t RingSize() const + { + return MaxQueries; + } + + struct Query { id buffer; uint32_t idx; }; + + tracy_force_inline Query NextQuery() + { + TracyMetalDebug_0b00010( ZoneScopedNC("Tracy::MetalCtx::NextQuery", tracy::Color::LightCoral) ); + auto id = m_queryCounter.fetch_add(2); + TracyMetalDebug_0b00010( ZoneValue(id) ); + auto count = RingCount(m_previousCheckpoint, id); + if (count >= MaxQueries) + { + // TODO: return a proper (hidden) "sentinel" query + Query sentinel = Query{ m_counterSampleBuffers[1], MaxQueries-2 }; + TracyMetalPanic( + return sentinel, + "NextQueryId: FULL! too many pending timestamp queries. Consider calling TracyMetalCollect() more frequently. [%llu, %llu] (%u)", + m_previousCheckpoint.load(), id, count + ); + } + uint32_t buffer_idx = (id / MaxQueries) % 2; + TracyMetalDebug_0b00010( ZoneValue(buffer_idx) ); + auto buffer = m_counterSampleBuffers[buffer_idx]; + if (buffer == nil) + TracyMetalPanic(, "NextQueryId: sample buffer is nil! (id=%llu)", id); + uint32_t idx = RingIndex(id); + TracyMetalDebug_0b00010( ZoneValue(idx) ); + TracyMetalDebug_0b00010( TracyAllocN((void*)(uintptr_t)idx, 2, "TracyMetalTimestampQueryId") ); + m_timestampRequestTime[idx] = std::chrono::high_resolution_clock::now(); + return Query{ buffer, idx }; + } + + tracy_force_inline uint8_t GetContextId() const + { + return m_contextId; + } + + static id NewTimestampSampleBuffer(id device, size_t count) + { + ZoneScopedN("tracy::MetalCtx::NewTimestampSampleBuffer"); + + id timestampCounterSet = nil; + for (id counterSet in device.counterSets) + { + if ([counterSet.name isEqualToString:MTLCommonCounterSetTimestamp]) + { + timestampCounterSet = counterSet; + break; + } + } + if (timestampCounterSet == nil) + { + TracyMetalPanic({assert(false);} return nil, "ERROR: timestamp counters are not supported on the platform."); + } + + MTLCounterSampleBufferDescriptor* sampleDescriptor = [[MTLCounterSampleBufferDescriptor alloc] init]; + sampleDescriptor.counterSet = timestampCounterSet; + sampleDescriptor.sampleCount = MaxQueries; + sampleDescriptor.storageMode = MTLStorageModeShared; + sampleDescriptor.label = @"TracyMetalTimestampPool"; + + NSError* error = nil; + id counterSampleBuffer = [device newCounterSampleBufferWithDescriptor:sampleDescriptor error:&error]; + if (error != nil) + { + //NSLog(@"%@ | %@", error.localizedDescription, error.localizedFailureReason); + TracyMetalPanic({assert(false);} return nil, + "ERROR: unable to create sample buffer for timestamp counters : %s | %s", + [error.localizedDescription cString], [error.localizedFailureReason cString]); + } + + return counterSampleBuffer; + } + + uint8_t m_contextId = 255; + + id m_device = nil; + id m_counterSampleBuffers [2] = {}; + + using atomic_counter = std::atomic; + static_assert(atomic_counter::is_always_lock_free); + atomic_counter m_queryCounter = 0; + + atomic_counter m_previousCheckpoint = 0; + MTLTimestamp m_mostRecentTimestamp = 0; + + std::vector m_timestampRequestTime; + + std::mutex m_collectionMutex; +}; + +class MetalZoneScope +{ +public: + tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLComputePassDescriptor* desc, const SourceLocationData* srcloc, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if ( !m_active ) return; + if (desc == nil) TracyMetalPanic({assert(false);} return, "compute pass descriptor is nil."); + m_ctx = ctx; + + auto& query = m_query = ctx->NextQuery(); + + desc.sampleBufferAttachments[0].sampleBuffer = query.buffer; + desc.sampleBufferAttachments[0].startOfEncoderSampleIndex = query.idx+0; + desc.sampleBufferAttachments[0].endOfEncoderSampleIndex = query.idx+1; + + SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc); + } + + tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLBlitPassDescriptor* desc, const SourceLocationData* srcloc, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if ( !m_active ) return; + if (desc == nil) TracyMetalPanic({assert(false); }return, "blit pass descriptor is nil."); + m_ctx = ctx; + + auto& query = m_query = ctx->NextQuery(); + + desc.sampleBufferAttachments[0].sampleBuffer = query.buffer; + desc.sampleBufferAttachments[0].startOfEncoderSampleIndex = query.idx+0; + desc.sampleBufferAttachments[0].endOfEncoderSampleIndex = query.idx+1; + + SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc); + } + + tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLRenderPassDescriptor* desc, const SourceLocationData* srcloc, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if ( !m_active ) return; + if (desc == nil) TracyMetalPanic({assert(false);} return, "render pass descriptor is nil."); + m_ctx = ctx; + + auto& query = m_query = ctx->NextQuery(); + + desc.sampleBufferAttachments[0].sampleBuffer = query.buffer; + desc.sampleBufferAttachments[0].startOfVertexSampleIndex = query.idx+0; + desc.sampleBufferAttachments[0].endOfVertexSampleIndex = MTLCounterDontSample; + desc.sampleBufferAttachments[0].startOfFragmentSampleIndex = MTLCounterDontSample; + desc.sampleBufferAttachments[0].endOfFragmentSampleIndex = query.idx+1; + + SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc); + } + + /* TODO: implement this constructor interfarce for "command-level" profiling, if the device supports it + tracy_force_inline MetalZoneScope( MetalCtx* ctx, id cmdEncoder, const SourceLocationData* srcloc, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; + m_ctx = ctx; + m_cmdEncoder = cmdEncoder; + + auto& query = m_query = ctx->NextQueryId(); + + [m_cmdEncoder sampleCountersInBuffer:m_ctx->m_counterSampleBuffer atSampleIndex:query.idx withBarrier:YES]; + + SubmitZoneBeginGpu(ctx, query.idx, srcloc); + } + */ + + tracy_force_inline ~MetalZoneScope() + { + if( !m_active ) return; + + SubmitZoneEndGpu(m_ctx, m_query.idx + 1); + } + + TracyMetalDebugZoneScopeWireTap; + +private: + const bool m_active; + + MetalCtx* m_ctx; + + /* TODO: declare it for "command-level" profiling + id m_cmdEncoder; + */ + + static void SubmitZoneBeginGpu(MetalCtx* ctx, uint32_t queryId, const SourceLocationData* srcloc) + { + auto* item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial ); + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); + MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, ctx->GetContextId() ); + Profiler::QueueSerialFinish(); + + TracyMetalDebugMasked(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone")); + } + + static void SubmitZoneEndGpu(MetalCtx* ctx, uint32_t queryId) + { + auto* item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial ); + MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneEnd.context, ctx->GetContextId() ); + Profiler::QueueSerialFinish(); + + TracyMetalDebugMasked(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone")); + } + + MetalCtx::Query m_query = {}; +}; + +} + +using TracyMetalCtx = tracy::MetalCtx; + +#define TracyMetalContext(device) tracy::MetalCtx::Create(device) +#define TracyMetalDestroy(ctx) tracy::MetalCtx::Destroy(ctx) +#define TracyMetalContextName(ctx, name, size) ctx->Name(name, size) + +#define TracyMetalZone( ctx, encoderDesc, name ) TracyMetalNamedZone( ctx, ___tracy_gpu_zone, encoderDesc, name, true ) +#define TracyMetalZoneC( ctx, encoderDesc, name, color ) TracyMetalNamedZoneC( ctx, ___tracy_gpu_zone, encoderDesc, name, color, true ) +#define TracyMetalNamedZone( ctx, varname, encoderDesc, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::MetalZoneScope varname( ctx, encoderDesc, &TracyConcat(__tracy_gpu_source_location,TracyLine), active ); +#define TracyMetalNamedZoneC( ctx, varname, encoderDesc, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::MetalZoneScope varname( ctx, encoderDesc, &TracyConcat(__tracy_gpu_source_location,TracyLine), active ); + +#define TracyMetalCollect( ctx ) ctx->Collect(); + + + +#undef TracyMetalDebug_ZoneScopeWireTap +#undef TracyMetalDebug_0b00010 +#undef TracyMetalDebug_0b10000 +#undef TracyMetalDebugMasked +#undef TRACY_METAL_DEBUG_MASK +#undef TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT +#undef TracyMetalPanic +#undef TRACY_METAL_VA_ARGS + +#endif + +#endif//__TRACYMETAL_HMM__ diff --git a/src/third_party/tracy/tracy/TracyOpenCL.hpp b/src/third_party/tracy/tracy/TracyOpenCL.hpp index 34466ccc..ede5c461 100644 --- a/src/third_party/tracy/tracy/TracyOpenCL.hpp +++ b/src/third_party/tracy/tracy/TracyOpenCL.hpp @@ -255,7 +255,7 @@ namespace tracy { Profiler::QueueSerialFinish(); } - tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, int depth, bool is_active) + tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, int32_t depth, bool is_active) #ifdef TRACY_ON_DEMAND : m_active(is_active&& GetProfiler().IsConnected()) #else @@ -304,7 +304,7 @@ namespace tracy { Profiler::QueueSerialFinish(); } - tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active) + tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active) #ifdef TRACY_ON_DEMAND : m_active(is_active && GetProfiler().IsConnected()) #else @@ -373,9 +373,9 @@ namespace tracy { using TracyCLCtx = tracy::OpenCLCtx*; -#define TracyCLContext(context, device) tracy::CreateCLContext(context, device); +#define TracyCLContext(ctx, device) tracy::CreateCLContext(ctx, device); #define TracyCLDestroy(ctx) tracy::DestroyCLContext(ctx); -#define TracyCLContextName(context, name, size) ctx->Name(name, size); +#define TracyCLContextName(ctx, name, size) ctx->Name(name, size); #if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK # define TracyCLNamedZone(ctx, varname, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active ); # define TracyCLNamedZoneC(ctx, varname, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active ); diff --git a/src/third_party/tracy/tracy/TracyOpenGL.hpp b/src/third_party/tracy/tracy/TracyOpenGL.hpp index 3bdadcce..30abd4fd 100644 --- a/src/third_party/tracy/tracy/TracyOpenGL.hpp +++ b/src/third_party/tracy/tracy/TracyOpenGL.hpp @@ -25,7 +25,7 @@ class GpuCtxScope { public: GpuCtxScope( const SourceLocationData*, bool ) {} - GpuCtxScope( const SourceLocationData*, int, bool ) {} + GpuCtxScope( const SourceLocationData*, int32_t, bool ) {} }; } @@ -222,7 +222,7 @@ public: TracyLfqCommit; } - tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int depth, bool is_active ) + tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int32_t depth, bool is_active ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else @@ -271,7 +271,7 @@ public: TracyLfqCommit; } - tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active ) + tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else diff --git a/src/third_party/tracy/tracy/TracyVulkan.hpp b/src/third_party/tracy/tracy/TracyVulkan.hpp index f49437be..9e0973d2 100644 --- a/src/third_party/tracy/tracy/TracyVulkan.hpp +++ b/src/third_party/tracy/tracy/TracyVulkan.hpp @@ -218,7 +218,9 @@ public: WriteInitialItem( physdev, tcpu, tgpu ); - m_res = (int64_t*)tracy_malloc( sizeof( int64_t ) * m_queryCount ); + // We need the buffer to be twice as large for availability values + size_t resSize = sizeof( int64_t ) * m_queryCount * 2; + m_res = (int64_t*)tracy_malloc( resSize ); } #endif @@ -263,7 +265,7 @@ public: } #endif assert( head > m_tail ); - + const unsigned int wrappedTail = (unsigned int)( m_tail % m_queryCount ); unsigned int cnt; @@ -283,17 +285,22 @@ public: } - if( VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( m_device, m_query, wrappedTail, cnt, sizeof( int64_t ) * m_queryCount, m_res, sizeof( int64_t ), VK_QUERY_RESULT_64_BIT ) == VK_NOT_READY ) ) - { - m_oldCnt = cnt; - return; - } + VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( m_device, m_query, wrappedTail, cnt, sizeof( int64_t ) * m_queryCount * 2, m_res, sizeof( int64_t ) * 2, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT ) ); for( unsigned int idx=0; idxhdr.type, QueueType::GpuTime ); - MemWrite( &item->gpuTime.gpuTime, m_res[idx] ); + MemWrite( &item->gpuTime.gpuTime, m_res[idx * 2] ); MemWrite( &item->gpuTime.queryId, uint16_t( wrappedTail + idx ) ); MemWrite( &item->gpuTime.context, m_context ); Profiler::QueueSerialFinish(); @@ -323,7 +330,6 @@ public: m_tail += cnt; } -private: tracy_force_inline unsigned int NextQueryId() { const uint64_t id = m_head.fetch_add(1, std::memory_order_relaxed); @@ -335,6 +341,12 @@ private: return m_context; } + tracy_force_inline VkQueryPool GetQueryPool() const + { + return m_query; + } + +private: tracy_force_inline void Calibrate( VkDevice device, int64_t& tCpu, int64_t& tGpu ) { assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ); @@ -405,11 +417,11 @@ private: }; uint64_t ts[2]; uint64_t deviation[NumProbes]; - for( int i=0; i deviation[i] ) { minDeviation = deviation[i]; } @@ -476,7 +488,9 @@ private: VkSymbolTable m_symbols; #endif uint64_t m_deviation; +#ifdef _WIN32 int64_t m_qpcToNs; +#endif int64_t m_prevCalibration; uint8_t m_context; @@ -517,7 +531,7 @@ public: Profiler::QueueSerialFinish(); } - tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int depth, bool is_active ) + tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int32_t depth, bool is_active ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else @@ -566,7 +580,7 @@ public: Profiler::QueueSerialFinish(); } - tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, int depth, bool is_active ) + tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, int32_t depth, bool is_active ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else