diff --git a/src/third_party/tracy/TracyClient.cpp b/src/third_party/tracy/TracyClient.cpp deleted file mode 100644 index 6224f48b..00000000 --- a/src/third_party/tracy/TracyClient.cpp +++ /dev/null @@ -1,61 +0,0 @@ -// -// Tracy profiler -// ---------------- -// -// For fast integration, compile and -// link with this source file (and none -// other) in your executable (or in the -// main DLL / shared object on multi-DLL -// projects). -// - -// Define TRACY_ENABLE to enable profiler. - -#include "common/TracySystem.cpp" - -#ifdef TRACY_ENABLE - -#ifdef _MSC_VER -# pragma warning(push, 0) -#endif - -#include "common/tracy_lz4.cpp" -#include "client/TracyProfiler.cpp" -#include "client/TracyCallstack.cpp" -#include "client/TracySysPower.cpp" -#include "client/TracySysTime.cpp" -#include "client/TracySysTrace.cpp" -#include "common/TracySocket.cpp" -#include "client/tracy_rpmalloc.cpp" -#include "client/TracyDxt1.cpp" -#include "client/TracyAlloc.cpp" -#include "client/TracyOverride.cpp" -#include "client/TracyKCore.cpp" - -#if defined(TRACY_HAS_CALLSTACK) -# if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6 -# include "libbacktrace/alloc.cpp" -# include "libbacktrace/dwarf.cpp" -# include "libbacktrace/fileline.cpp" -# include "libbacktrace/mmapio.cpp" -# include "libbacktrace/posix.cpp" -# include "libbacktrace/sort.cpp" -# include "libbacktrace/state.cpp" -# if TRACY_HAS_CALLSTACK == 4 -# include "libbacktrace/macho.cpp" -# else -# include "libbacktrace/elf.cpp" -# endif -# include "common/TracyStackFrames.cpp" -# endif -#endif - -#ifdef _MSC_VER -# pragma comment(lib, "ws2_32.lib") -# pragma comment(lib, "dbghelp.lib") -# pragma comment(lib, "advapi32.lib") -# pragma comment(lib, "user32.lib") -# pragma warning(pop) -#endif - -#endif diff --git a/src/third_party/tracy/client/TracyAlloc.cpp b/src/third_party/tracy/client/TracyAlloc.cpp deleted file mode 100644 index c675b6d3..00000000 --- a/src/third_party/tracy/client/TracyAlloc.cpp +++ /dev/null @@ -1,43 +0,0 @@ -#include "../common/TracyAlloc.hpp" - -#ifdef TRACY_USE_RPMALLOC - -#include - -#include "../common/TracyForceInline.hpp" -#include "../common/TracyYield.hpp" - -namespace tracy -{ - -extern thread_local bool RpThreadInitDone; -extern std::atomic RpInitDone; -extern std::atomic RpInitLock; - -tracy_no_inline static void InitRpmallocPlumbing() -{ - const auto done = RpInitDone.load( std::memory_order_acquire ); - if( !done ) - { - int expected = 0; - while( !RpInitLock.compare_exchange_weak( expected, 1, std::memory_order_release, std::memory_order_relaxed ) ) { expected = 0; YieldThread(); } - const auto done = RpInitDone.load( std::memory_order_acquire ); - if( !done ) - { - rpmalloc_initialize(); - RpInitDone.store( 1, std::memory_order_release ); - } - RpInitLock.store( 0, std::memory_order_release ); - } - rpmalloc_thread_initialize(); - RpThreadInitDone = true; -} - -TRACY_API void InitRpmalloc() -{ - if( !RpThreadInitDone ) InitRpmallocPlumbing(); -} - -} - -#endif diff --git a/src/third_party/tracy/client/TracyArmCpuTable.hpp b/src/third_party/tracy/client/TracyArmCpuTable.hpp deleted file mode 100644 index 2b47c3a6..00000000 --- a/src/third_party/tracy/client/TracyArmCpuTable.hpp +++ /dev/null @@ -1,419 +0,0 @@ -namespace tracy -{ - -#if defined __linux__ && defined __ARM_ARCH - -static const char* DecodeArmImplementer( uint32_t v ) -{ - static char buf[16]; - switch( v ) - { - case 0x41: return "ARM"; - case 0x42: return "Broadcom"; - case 0x43: return "Cavium"; - case 0x44: return "DEC"; - case 0x46: return "Fujitsu"; - case 0x48: return "HiSilicon"; - case 0x49: return "Infineon"; - case 0x4d: return "Motorola"; - case 0x4e: return "Nvidia"; - case 0x50: return "Applied Micro"; - case 0x51: return "Qualcomm"; - case 0x53: return "Samsung"; - case 0x54: return "Texas Instruments"; - case 0x56: return "Marvell"; - case 0x61: return "Apple"; - case 0x66: return "Faraday"; - case 0x68: return "HXT"; - case 0x69: return "Intel"; - case 0xc0: return "Ampere Computing"; - default: break; - } - sprintf( buf, "0x%x", v ); - return buf; -} - -static const char* DecodeArmPart( uint32_t impl, uint32_t part ) -{ - static char buf[16]; - switch( impl ) - { - case 0x41: // ARM - switch( part ) - { - case 0x810: return "810"; - case 0x920: return "920"; - case 0x922: return "922"; - case 0x926: return "926"; - case 0x940: return "940"; - case 0x946: return "946"; - case 0x966: return "966"; - case 0xa20: return "1020"; - case 0xa22: return "1022"; - case 0xa26: return "1026"; - case 0xb02: return "11 MPCore"; - case 0xb36: return "1136"; - case 0xb56: return "1156"; - case 0xb76: return "1176"; - case 0xc05: return " Cortex-A5"; - case 0xc07: return " Cortex-A7"; - case 0xc08: return " Cortex-A8"; - case 0xc09: return " Cortex-A9"; - case 0xc0c: return " Cortex-A12"; - case 0xc0d: return " Rockchip RK3288"; - case 0xc0e: return " Cortex-A17"; - case 0xc0f: return " Cortex-A15"; - case 0xc14: return " Cortex-R4"; - case 0xc15: return " Cortex-R5"; - case 0xc17: return " Cortex-R7"; - case 0xc18: return " Cortex-R8"; - case 0xc20: return " Cortex-M0"; - case 0xc21: return " Cortex-M1"; - case 0xc23: return " Cortex-M3"; - case 0xc24: return " Cortex-M4"; - case 0xc27: return " Cortex-M7"; - case 0xc60: return " Cortex-M0+"; - case 0xd00: return " AArch64 simulator"; - case 0xd01: return " Cortex-A32"; - case 0xd02: return " Cortex-A34"; - case 0xd03: return " Cortex-A53"; - case 0xd04: return " Cortex-A35"; - case 0xd05: return " Cortex-A55"; - case 0xd06: return " Cortex-A65"; - case 0xd07: return " Cortex-A57"; - case 0xd08: return " Cortex-A72"; - case 0xd09: return " Cortex-A73"; - case 0xd0a: return " Cortex-A75"; - case 0xd0b: return " Cortex-A76"; - case 0xd0c: return " Neoverse N1"; - case 0xd0d: return " Cortex-A77"; - case 0xd0e: return " Cortex-A76AE"; - case 0xd0f: return " AEMv8"; - case 0xd13: return " Cortex-R52"; - case 0xd20: return " Cortex-M23"; - case 0xd21: return " Cortex-M33"; - case 0xd22: return " Cortex-M55"; - case 0xd40: return " Neoverse V1"; - case 0xd41: return " Cortex-A78"; - case 0xd42: return " Cortex-A78AE"; - case 0xd43: return " Cortex-A65AE"; - case 0xd44: return " Cortex-X1"; - case 0xd47: return " Cortex-A710"; - case 0xd48: return " Cortex-X2"; - case 0xd49: return " Neoverse N2"; - case 0xd4a: return " Neoverse E1"; - case 0xd4b: return " Cortex-A78C"; - case 0xd4c: return " Cortex-X1C"; - default: break; - } - case 0x42: // Broadcom - switch( part ) - { - case 0xf: return " Brahma B15"; - case 0x100: return " Brahma B53"; - case 0x516: return " ThunderX2"; - default: break; - } - case 0x43: // Cavium - switch( part ) - { - case 0xa0: return " ThunderX"; - case 0xa1: return " ThunderX 88XX"; - case 0xa2: return " ThunderX 81XX"; - case 0xa3: return " ThunderX 83XX"; - case 0xaf: return " ThunderX2 99xx"; - case 0xb0: return " OcteonTX2"; - case 0xb1: return " OcteonTX2 T98"; - case 0xb2: return " OcteonTX2 T96"; - case 0xb3: return " OcteonTX2 F95"; - case 0xb4: return " OcteonTX2 F95N"; - case 0xb5: return " OcteonTX2 F95MM"; - case 0xb6: return " OcteonTX2 F95O"; - case 0xb8: return " ThunderX3 T110"; - default: break; - } - case 0x44: // DEC - switch( part ) - { - case 0xa10: return " SA110"; - case 0xa11: return " SA1100"; - default: break; - } - case 0x46: // Fujitsu - switch( part ) - { - case 0x1: return " A64FX"; - default: break; - } - case 0x48: // HiSilicon - switch( part ) - { - case 0xd01: return " TSV100"; - case 0xd40: return " Kirin 980"; - default: break; - } - case 0x4e: // Nvidia - switch( part ) - { - case 0x0: return " Denver"; - case 0x3: return " Denver 2"; - case 0x4: return " Carmel"; - default: break; - } - case 0x50: // Applied Micro - switch( part ) - { - case 0x0: return " X-Gene"; - default: break; - } - case 0x51: // Qualcomm - switch( part ) - { - case 0xf: return " Scorpion"; - case 0x2d: return " Scorpion"; - case 0x4d: return " Krait"; - case 0x6f: return " Krait"; - case 0x200: return " Kryo"; - case 0x201: return " Kryo Silver (Snapdragon 821)"; - case 0x205: return " Kryo Gold"; - case 0x211: return " Kryo Silver (Snapdragon 820)"; - case 0x800: return " Kryo 260 / 280 Gold"; - case 0x801: return " Kryo 260 / 280 Silver"; - case 0x802: return " Kryo 385 Gold"; - case 0x803: return " Kryo 385 Silver"; - case 0x804: return " Kryo 485 Gold"; - case 0x805: return " Kryo 4xx/5xx Silver"; - case 0xc00: return " Falkor"; - case 0xc01: return " Saphira"; - default: break; - } - case 0x53: // Samsung - switch( part ) - { - case 0x1: return " Exynos M1/M2"; - case 0x2: return " Exynos M3"; - case 0x3: return " Exynos M4"; - case 0x4: return " Exynos M5"; - default: break; - } - case 0x54: // Texas Instruments - switch( part ) - { - case 0x925: return " TI925"; - default: break; - } - case 0x56: // Marvell - switch( part ) - { - case 0x131: return " Feroceon 88FR131"; - case 0x581: return " PJ4 / PJ4B"; - case 0x584: return " PJ4B-MP / PJ4C"; - default: break; - } - case 0x61: // Apple - switch( part ) - { - case 0x1: return " Cyclone"; - case 0x2: return " Typhoon"; - case 0x3: return " Typhoon/Capri"; - case 0x4: return " Twister"; - case 0x5: return " Twister/Elba/Malta"; - case 0x6: return " Hurricane"; - case 0x7: return " Hurricane/Myst"; - case 0x22: return " M1 Icestorm"; - case 0x23: return " M1 Firestorm"; - case 0x24: return " M1 Icestorm Pro"; - case 0x25: return " M1 Firestorm Pro"; - case 0x28: return " M1 Icestorm Max"; - case 0x29: return " M1 Firestorm Max"; - default: break; - } - case 0x66: // Faraday - switch( part ) - { - case 0x526: return " FA526"; - case 0x626: return " FA626"; - default: break; - } - case 0x68: // HXT - switch( part ) - { - case 0x0: return " Phecda"; - default: break; - } - case 0xc0: // Ampere Computing - switch( part ) - { - case 0xac3: return " Ampere1"; - default: break; - } - default: break; - } - sprintf( buf, " 0x%x", part ); - return buf; -} - -#elif defined __APPLE__ && TARGET_OS_IPHONE == 1 - -static const char* DecodeIosDevice( const char* id ) -{ - static const char* DeviceTable[] = { - "i386", "32-bit simulator", - "x86_64", "64-bit simulator", - "iPhone1,1", "iPhone", - "iPhone1,2", "iPhone 3G", - "iPhone2,1", "iPhone 3GS", - "iPhone3,1", "iPhone 4 (GSM)", - "iPhone3,2", "iPhone 4 (GSM)", - "iPhone3,3", "iPhone 4 (CDMA)", - "iPhone4,1", "iPhone 4S", - "iPhone5,1", "iPhone 5 (A1428)", - "iPhone5,2", "iPhone 5 (A1429)", - "iPhone5,3", "iPhone 5c (A1456/A1532)", - "iPhone5,4", "iPhone 5c (A1507/A1516/1526/A1529)", - "iPhone6,1", "iPhone 5s (A1433/A1533)", - "iPhone6,2", "iPhone 5s (A1457/A1518/A1528/A1530)", - "iPhone7,1", "iPhone 6 Plus", - "iPhone7,2", "iPhone 6", - "iPhone8,1", "iPhone 6S", - "iPhone8,2", "iPhone 6S Plus", - "iPhone8,4", "iPhone SE", - "iPhone9,1", "iPhone 7 (CDMA)", - "iPhone9,2", "iPhone 7 Plus (CDMA)", - "iPhone9,3", "iPhone 7 (GSM)", - "iPhone9,4", "iPhone 7 Plus (GSM)", - "iPhone10,1", "iPhone 8 (CDMA)", - "iPhone10,2", "iPhone 8 Plus (CDMA)", - "iPhone10,3", "iPhone X (CDMA)", - "iPhone10,4", "iPhone 8 (GSM)", - "iPhone10,5", "iPhone 8 Plus (GSM)", - "iPhone10,6", "iPhone X (GSM)", - "iPhone11,2", "iPhone XS", - "iPhone11,4", "iPhone XS Max", - "iPhone11,6", "iPhone XS Max China", - "iPhone11,8", "iPhone XR", - "iPhone12,1", "iPhone 11", - "iPhone12,3", "iPhone 11 Pro", - "iPhone12,5", "iPhone 11 Pro Max", - "iPhone12,8", "iPhone SE 2nd Gen", - "iPhone13,1", "iPhone 12 Mini", - "iPhone13,2", "iPhone 12", - "iPhone13,3", "iPhone 12 Pro", - "iPhone13,4", "iPhone 12 Pro Max", - "iPhone14,2", "iPhone 13 Pro", - "iPhone14,3", "iPhone 13 Pro Max", - "iPhone14,4", "iPhone 13 Mini", - "iPhone14,5", "iPhone 13", - "iPhone14,6", "iPhone SE 3rd Gen", - "iPhone14,7", "iPhone 14", - "iPhone14,8", "iPhone 14 Plus", - "iPhone15,2", "iPhone 14 Pro", - "iPhone15,3", "iPhone 14 Pro Max", - "iPhone15,4", "iPhone 15", - "iPhone15,5", "iPhone 15 Plus", - "iPhone16,1", "iPhone 15 Pro", - "iPhone16,2", "iPhone 15 Pro Max", - "iPad1,1", "iPad (A1219/A1337)", - "iPad2,1", "iPad 2 (A1395)", - "iPad2,2", "iPad 2 (A1396)", - "iPad2,3", "iPad 2 (A1397)", - "iPad2,4", "iPad 2 (A1395)", - "iPad2,5", "iPad Mini (A1432)", - "iPad2,6", "iPad Mini (A1454)", - "iPad2,7", "iPad Mini (A1455)", - "iPad3,1", "iPad 3 (A1416)", - "iPad3,2", "iPad 3 (A1403)", - "iPad3,3", "iPad 3 (A1430)", - "iPad3,4", "iPad 4 (A1458)", - "iPad3,5", "iPad 4 (A1459)", - "iPad3,6", "iPad 4 (A1460)", - "iPad4,1", "iPad Air (A1474)", - "iPad4,2", "iPad Air (A1475)", - "iPad4,3", "iPad Air (A1476)", - "iPad4,4", "iPad Mini 2 (A1489)", - "iPad4,5", "iPad Mini 2 (A1490)", - "iPad4,6", "iPad Mini 2 (A1491)", - "iPad4,7", "iPad Mini 3 (A1599)", - "iPad4,8", "iPad Mini 3 (A1600)", - "iPad4,9", "iPad Mini 3 (A1601)", - "iPad5,1", "iPad Mini 4 (A1538)", - "iPad5,2", "iPad Mini 4 (A1550)", - "iPad5,3", "iPad Air 2 (A1566)", - "iPad5,4", "iPad Air 2 (A1567)", - "iPad6,3", "iPad Pro 9.7\" (A1673)", - "iPad6,4", "iPad Pro 9.7\" (A1674)", - "iPad6,5", "iPad Pro 9.7\" (A1675)", - "iPad6,7", "iPad Pro 12.9\" (A1584)", - "iPad6,8", "iPad Pro 12.9\" (A1652)", - "iPad6,11", "iPad 5th gen (A1822)", - "iPad6,12", "iPad 5th gen (A1823)", - "iPad7,1", "iPad Pro 12.9\" 2nd gen (A1670)", - "iPad7,2", "iPad Pro 12.9\" 2nd gen (A1671/A1821)", - "iPad7,3", "iPad Pro 10.5\" (A1701)", - "iPad7,4", "iPad Pro 10.5\" (A1709)", - "iPad7,5", "iPad 6th gen (A1893)", - "iPad7,6", "iPad 6th gen (A1954)", - "iPad7,11", "iPad 7th gen 10.2\" (Wifi)", - "iPad7,12", "iPad 7th gen 10.2\" (Wifi+Cellular)", - "iPad8,1", "iPad Pro 11\" (A1980)", - "iPad8,2", "iPad Pro 11\" (A1980)", - "iPad8,3", "iPad Pro 11\" (A1934/A1979/A2013)", - "iPad8,4", "iPad Pro 11\" (A1934/A1979/A2013)", - "iPad8,5", "iPad Pro 12.9\" 3rd gen (A1876)", - "iPad8,6", "iPad Pro 12.9\" 3rd gen (A1876)", - "iPad8,7", "iPad Pro 12.9\" 3rd gen (A1895/A1983/A2014)", - "iPad8,8", "iPad Pro 12.9\" 3rd gen (A1895/A1983/A2014)", - "iPad8,9", "iPad Pro 11\" 2nd gen (Wifi)", - "iPad8,10", "iPad Pro 11\" 2nd gen (Wifi+Cellular)", - "iPad8,11", "iPad Pro 12.9\" 4th gen (Wifi)", - "iPad8,12", "iPad Pro 12.9\" 4th gen (Wifi+Cellular)", - "iPad11,1", "iPad Mini 5th gen (A2133)", - "iPad11,2", "iPad Mini 5th gen (A2124/A2125/A2126)", - "iPad11,3", "iPad Air 3rd gen (A2152)", - "iPad11,4", "iPad Air 3rd gen (A2123/A2153/A2154)", - "iPad11,6", "iPad 8th gen (WiFi)", - "iPad11,7", "iPad 8th gen (WiFi+Cellular)", - "iPad12,1", "iPad 9th Gen (WiFi)", - "iPad12,2", "iPad 9th Gen (WiFi+Cellular)", - "iPad13,1", "iPad Air 4th gen (WiFi)", - "iPad13,2", "iPad Air 4th gen (WiFi+Cellular)", - "iPad13,4", "iPad Pro 11\" 3rd gen", - "iPad13,5", "iPad Pro 11\" 3rd gen", - "iPad13,6", "iPad Pro 11\" 3rd gen", - "iPad13,7", "iPad Pro 11\" 3rd gen", - "iPad13,8", "iPad Pro 12.9\" 5th gen", - "iPad13,9", "iPad Pro 12.9\" 5th gen", - "iPad13,10", "iPad Pro 12.9\" 5th gen", - "iPad13,11", "iPad Pro 12.9\" 5th gen", - "iPad13,16", "iPad Air 5th Gen (WiFi)", - "iPad13,17", "iPad Air 5th Gen (WiFi+Cellular)", - "iPad13,18", "iPad 10th Gen", - "iPad13,19", "iPad 10th Gen", - "iPad14,1", "iPad mini 6th Gen (WiFi)", - "iPad14,2", "iPad mini 6th Gen (WiFi+Cellular)", - "iPad14,3", "iPad Pro 11\" 4th Gen", - "iPad14,4", "iPad Pro 11\" 4th Gen", - "iPad14,5", "iPad Pro 12.9\" 6th Gen", - "iPad14,6", "iPad Pro 12.9\" 6th Gen", - "iPod1,1", "iPod Touch", - "iPod2,1", "iPod Touch 2nd gen", - "iPod3,1", "iPod Touch 3rd gen", - "iPod4,1", "iPod Touch 4th gen", - "iPod5,1", "iPod Touch 5th gen", - "iPod7,1", "iPod Touch 6th gen", - "iPod9,1", "iPod Touch 7th gen", - nullptr - }; - - auto ptr = DeviceTable; - while( *ptr ) - { - if( strcmp( ptr[0], id ) == 0 ) return ptr[1]; - ptr += 2; - } - return id; -} - -#endif - -} diff --git a/src/third_party/tracy/client/TracyCallstack.cpp b/src/third_party/tracy/client/TracyCallstack.cpp deleted file mode 100644 index 946a1972..00000000 --- a/src/third_party/tracy/client/TracyCallstack.cpp +++ /dev/null @@ -1,1415 +0,0 @@ -#include -#include -#include -#include -#include "TracyCallstack.hpp" -#include "TracyDebug.hpp" -#include "TracyFastVector.hpp" -#include "TracyStringHelpers.hpp" -#include "../common/TracyAlloc.hpp" -#include "../common/TracySystem.hpp" - - -#ifdef TRACY_HAS_CALLSTACK - -#if TRACY_HAS_CALLSTACK == 1 -# ifndef NOMINMAX -# define NOMINMAX -# endif -# include -# include -# include -# ifdef _MSC_VER -# pragma warning( push ) -# pragma warning( disable : 4091 ) -# endif -# include -# ifdef _MSC_VER -# pragma warning( pop ) -# endif -#elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6 -# include "../libbacktrace/backtrace.hpp" -# include -# include -# include -# include -#elif TRACY_HAS_CALLSTACK == 5 -# include -# include -#endif - -#ifdef TRACY_DBGHELP_LOCK -# include "TracyProfiler.hpp" - -# define DBGHELP_INIT TracyConcat( TRACY_DBGHELP_LOCK, Init() ) -# define DBGHELP_LOCK TracyConcat( TRACY_DBGHELP_LOCK, Lock() ); -# define DBGHELP_UNLOCK TracyConcat( TRACY_DBGHELP_LOCK, Unlock() ); - -extern "C" -{ - void DBGHELP_INIT; - void DBGHELP_LOCK; - void DBGHELP_UNLOCK; -}; -#endif - -#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 5 || TRACY_HAS_CALLSTACK == 6 -// If you want to use your own demangling functionality (e.g. for another language), -// define TRACY_DEMANGLE and provide your own implementation of the __tracy_demangle -// function. The input parameter is a function name. The demangle function must -// identify whether this name is mangled, and fail if it is not. Failure is indicated -// by returning nullptr. If demangling succeeds, a pointer to the C string containing -// demangled function must be returned. The demangling function is responsible for -// managing memory for this string. It is expected that it will be internally reused. -// When a call to ___tracy_demangle is made, previous contents of the string memory -// do not need to be preserved. Function may return string of any length, but the -// profiler can choose to truncate it. -extern "C" const char* ___tracy_demangle( const char* mangled ); - -#ifndef TRACY_DEMANGLE -constexpr size_t ___tracy_demangle_buffer_len = 1024*1024; -char* ___tracy_demangle_buffer; - -void ___tracy_init_demangle_buffer() -{ - ___tracy_demangle_buffer = (char*)tracy::tracy_malloc( ___tracy_demangle_buffer_len ); -} - -void ___tracy_free_demangle_buffer() -{ - tracy::tracy_free( ___tracy_demangle_buffer ); -} - -extern "C" const char* ___tracy_demangle( const char* mangled ) -{ - if( !mangled || mangled[0] != '_' ) return nullptr; - if( strlen( mangled ) > ___tracy_demangle_buffer_len ) return nullptr; - int status; - size_t len = ___tracy_demangle_buffer_len; - return abi::__cxa_demangle( mangled, ___tracy_demangle_buffer, &len, &status ); -} -#endif -#endif - -#if TRACY_HAS_CALLSTACK == 3 -# define TRACY_USE_IMAGE_CACHE -# include -#endif - -namespace tracy -{ - -#ifdef TRACY_USE_IMAGE_CACHE -// when we have access to dl_iterate_phdr(), we can build a cache of address ranges to image paths -// so we can quickly determine which image an address falls into. -// We refresh this cache only when we hit an address that doesn't fall into any known range. -class ImageCache -{ -public: - struct ImageEntry - { - void* m_startAddress = nullptr; - void* m_endAddress = nullptr; - char* m_name = nullptr; - }; - - ImageCache() - : m_images( 512 ) - { - Refresh(); - } - - ~ImageCache() - { - Clear(); - } - - const ImageEntry* GetImageForAddress( void* address ) - { - const ImageEntry* entry = GetImageForAddressImpl( address ); - if( !entry ) - { - Refresh(); - return GetImageForAddressImpl( address ); - } - return entry; - } - -private: - tracy::FastVector m_images; - bool m_updated = false; - bool m_haveMainImageName = false; - - static int Callback( struct dl_phdr_info* info, size_t size, void* data ) - { - ImageCache* cache = reinterpret_cast( data ); - - const auto startAddress = reinterpret_cast( info->dlpi_addr ); - if( cache->Contains( startAddress ) ) return 0; - - const uint32_t headerCount = info->dlpi_phnum; - assert( headerCount > 0); - const auto endAddress = reinterpret_cast( info->dlpi_addr + - info->dlpi_phdr[info->dlpi_phnum - 1].p_vaddr + info->dlpi_phdr[info->dlpi_phnum - 1].p_memsz); - - ImageEntry* image = cache->m_images.push_next(); - image->m_startAddress = startAddress; - image->m_endAddress = endAddress; - - // the base executable name isn't provided when iterating with dl_iterate_phdr, - // we will have to patch the executable image name outside this callback - if( info->dlpi_name && info->dlpi_name[0] != '\0' ) - { - size_t sz = strlen( info->dlpi_name ) + 1; - image->m_name = (char*)tracy_malloc( sz ); - memcpy( image->m_name, info->dlpi_name, sz ); - } - else - { - image->m_name = nullptr; - } - - cache->m_updated = true; - - return 0; - } - - bool Contains( void* startAddress ) const - { - return std::any_of( m_images.begin(), m_images.end(), [startAddress]( const ImageEntry& entry ) { return startAddress == entry.m_startAddress; } ); - } - - void Refresh() - { - m_updated = false; - dl_iterate_phdr( Callback, this ); - - if( m_updated ) - { - std::sort( m_images.begin(), m_images.end(), - []( const ImageEntry& lhs, const ImageEntry& rhs ) { return lhs.m_startAddress > rhs.m_startAddress; } ); - - // patch the main executable image name here, as calling dl_* functions inside the dl_iterate_phdr callback might cause deadlocks - UpdateMainImageName(); - } - } - - void UpdateMainImageName() - { - if( m_haveMainImageName ) - { - return; - } - - for( ImageEntry& entry : m_images ) - { - if( entry.m_name == nullptr ) - { - Dl_info dlInfo; - if( dladdr( (void *)entry.m_startAddress, &dlInfo ) ) - { - if( dlInfo.dli_fname ) - { - size_t sz = strlen( dlInfo.dli_fname ) + 1; - entry.m_name = (char*)tracy_malloc( sz ); - memcpy( entry.m_name, dlInfo.dli_fname, sz ); - } - } - - // we only expect one entry to be null for the main executable entry - break; - } - } - - m_haveMainImageName = true; - } - - const ImageEntry* GetImageForAddressImpl( void* address ) const - { - auto it = std::lower_bound( m_images.begin(), m_images.end(), address, - []( const ImageEntry& lhs, const void* rhs ) { return lhs.m_startAddress > rhs; } ); - - if( it != m_images.end() && address < it->m_endAddress ) - { - return it; - } - return nullptr; - } - - void Clear() - { - for( ImageEntry& entry : m_images ) - { - tracy_free( entry.m_name ); - } - - m_images.clear(); - m_haveMainImageName = false; - } -}; -#endif //#ifdef TRACY_USE_IMAGE_CACHE - -// when "TRACY_SYMBOL_OFFLINE_RESOLVE" is set, instead of fully resolving symbols at runtime, -// simply resolve the offset and image name (which will be enough the resolving to be done offline) -#ifdef TRACY_SYMBOL_OFFLINE_RESOLVE -constexpr bool s_shouldResolveSymbolsOffline = true; -#else -static bool s_shouldResolveSymbolsOffline = false; -bool ShouldResolveSymbolsOffline() -{ - const char* symbolOfflineResolve = GetEnvVar( "TRACY_SYMBOL_OFFLINE_RESOLVE" ); - return (symbolOfflineResolve && symbolOfflineResolve[0] == '1'); -} -#endif // #ifdef TRACY_SYMBOL_OFFLINE_RESOLVE - -#if TRACY_HAS_CALLSTACK == 1 - -enum { MaxCbTrace = 64 }; -enum { MaxNameSize = 8*1024 }; - -int cb_num; -CallstackEntry cb_data[MaxCbTrace]; - -extern "C" -{ - typedef DWORD (__stdcall *t_SymAddrIncludeInlineTrace)( HANDLE hProcess, DWORD64 Address ); - typedef BOOL (__stdcall *t_SymQueryInlineTrace)( HANDLE hProcess, DWORD64 StartAddress, DWORD StartContext, DWORD64 StartRetAddress, DWORD64 CurAddress, LPDWORD CurContext, LPDWORD CurFrameIndex ); - typedef BOOL (__stdcall *t_SymFromInlineContext)( HANDLE hProcess, DWORD64 Address, ULONG InlineContext, PDWORD64 Displacement, PSYMBOL_INFO Symbol ); - typedef BOOL (__stdcall *t_SymGetLineFromInlineContext)( HANDLE hProcess, DWORD64 qwAddr, ULONG InlineContext, DWORD64 qwModuleBaseAddress, PDWORD pdwDisplacement, PIMAGEHLP_LINE64 Line64 ); - - t_SymAddrIncludeInlineTrace _SymAddrIncludeInlineTrace = 0; - t_SymQueryInlineTrace _SymQueryInlineTrace = 0; - t_SymFromInlineContext _SymFromInlineContext = 0; - t_SymGetLineFromInlineContext _SymGetLineFromInlineContext = 0; - - TRACY_API ___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChain = 0; -} - -struct ModuleCache -{ - uint64_t start; - uint64_t end; - char* name; -}; - -static FastVector* s_modCache; - - -struct KernelDriver -{ - uint64_t addr; - const char* mod; - const char* path; -}; - -KernelDriver* s_krnlCache = nullptr; -size_t s_krnlCacheCnt; - -void InitCallstackCritical() -{ - ___tracy_RtlWalkFrameChain = (___tracy_t_RtlWalkFrameChain)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlWalkFrameChain" ); -} - -void DbgHelpInit() -{ - if( s_shouldResolveSymbolsOffline ) return; - - _SymAddrIncludeInlineTrace = (t_SymAddrIncludeInlineTrace)GetProcAddress(GetModuleHandleA("dbghelp.dll"), "SymAddrIncludeInlineTrace"); - _SymQueryInlineTrace = (t_SymQueryInlineTrace)GetProcAddress(GetModuleHandleA("dbghelp.dll"), "SymQueryInlineTrace"); - _SymFromInlineContext = (t_SymFromInlineContext)GetProcAddress(GetModuleHandleA("dbghelp.dll"), "SymFromInlineContext"); - _SymGetLineFromInlineContext = (t_SymGetLineFromInlineContext)GetProcAddress(GetModuleHandleA("dbghelp.dll"), "SymGetLineFromInlineContext"); - -#ifdef TRACY_DBGHELP_LOCK - DBGHELP_INIT; - DBGHELP_LOCK; -#endif - - SymInitialize( GetCurrentProcess(), nullptr, true ); - SymSetOptions( SYMOPT_LOAD_LINES ); - -#ifdef TRACY_DBGHELP_LOCK - DBGHELP_UNLOCK; -#endif -} - -DWORD64 DbgHelpLoadSymbolsForModule( const char* imageName, uint64_t baseOfDll, uint32_t bllSize ) -{ - if( s_shouldResolveSymbolsOffline ) return 0; - return SymLoadModuleEx( GetCurrentProcess(), nullptr, imageName, nullptr, baseOfDll, bllSize, nullptr, 0 ); -} - -ModuleCache* LoadSymbolsForModuleAndCache( const char* imageName, uint32_t imageNameLength, uint64_t baseOfDll, uint32_t dllSize ) -{ - DbgHelpLoadSymbolsForModule( imageName, baseOfDll, dllSize ); - - ModuleCache* cachedModule = s_modCache->push_next(); - cachedModule->start = baseOfDll; - cachedModule->end = baseOfDll + dllSize; - - // when doing offline symbol resolution, we must store the full path of the dll for the resolving to work - if( s_shouldResolveSymbolsOffline ) - { - cachedModule->name = (char*)tracy_malloc_fast(imageNameLength + 1); - memcpy(cachedModule->name, imageName, imageNameLength); - cachedModule->name[imageNameLength] = '\0'; - } - else - { - auto ptr = imageName + imageNameLength; - while (ptr > imageName && *ptr != '\\' && *ptr != '/') ptr--; - if (ptr > imageName) ptr++; - const auto namelen = imageName + imageNameLength - ptr; - cachedModule->name = (char*)tracy_malloc_fast(namelen + 3); - cachedModule->name[0] = '['; - memcpy(cachedModule->name + 1, ptr, namelen); - cachedModule->name[namelen + 1] = ']'; - cachedModule->name[namelen + 2] = '\0'; - } - - return cachedModule; -} - -void InitCallstack() -{ -#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE - s_shouldResolveSymbolsOffline = ShouldResolveSymbolsOffline(); -#endif //#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE - if( s_shouldResolveSymbolsOffline ) - { - TracyDebug("TRACY: enabling offline symbol resolving!\n"); - } - - DbgHelpInit(); - -#ifdef TRACY_DBGHELP_LOCK - DBGHELP_LOCK; -#endif - - // use TRACY_NO_DBGHELP_INIT_LOAD=1 to disable preloading of driver - // and process module symbol loading at startup time - they will be loaded on demand later - // Sometimes this process can take a very long time and prevent resolving callstack frames - // symbols during that time. - const char* noInitLoadEnv = GetEnvVar( "TRACY_NO_DBGHELP_INIT_LOAD" ); - const bool initTimeModuleLoad = !( noInitLoadEnv && noInitLoadEnv[0] == '1' ); - if ( !initTimeModuleLoad ) - { - TracyDebug("TRACY: skipping init time dbghelper module load\n"); - } - - DWORD needed; - LPVOID dev[4096]; - if( initTimeModuleLoad && EnumDeviceDrivers( dev, sizeof(dev), &needed ) != 0 ) - { - char windir[MAX_PATH]; - if( !GetWindowsDirectoryA( windir, sizeof( windir ) ) ) memcpy( windir, "c:\\windows", 11 ); - const auto windirlen = strlen( windir ); - - const auto sz = needed / sizeof( LPVOID ); - s_krnlCache = (KernelDriver*)tracy_malloc( sizeof(KernelDriver) * sz ); - int cnt = 0; - for( size_t i=0; i", 2 ); - s_krnlCache[cnt] = KernelDriver { (uint64_t)dev[i], buf }; - - const auto len = GetDeviceDriverFileNameA( dev[i], fn, sizeof( fn ) ); - if( len != 0 ) - { - char full[MAX_PATH]; - char* path = fn; - - if( memcmp( fn, "\\SystemRoot\\", 12 ) == 0 ) - { - memcpy( full, windir, windirlen ); - strcpy( full + windirlen, fn + 11 ); - path = full; - } - - DbgHelpLoadSymbolsForModule( path, (DWORD64)dev[i], 0 ); - - const auto psz = strlen( path ); - auto pptr = (char*)tracy_malloc_fast( psz+1 ); - memcpy( pptr, path, psz ); - pptr[psz] = '\0'; - s_krnlCache[cnt].path = pptr; - } - - cnt++; - } - } - s_krnlCacheCnt = cnt; - std::sort( s_krnlCache, s_krnlCache + s_krnlCacheCnt, []( const KernelDriver& lhs, const KernelDriver& rhs ) { return lhs.addr > rhs.addr; } ); - } - - s_modCache = (FastVector*)tracy_malloc( sizeof( FastVector ) ); - new(s_modCache) FastVector( 512 ); - - HANDLE proc = GetCurrentProcess(); - HMODULE mod[1024]; - if( initTimeModuleLoad && EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 ) - { - const auto sz = needed / sizeof( HMODULE ); - for( size_t i=0; i 0 ) - { - // This may be a new module loaded since our call to SymInitialize. - // Just in case, force DbgHelp to load its pdb ! - LoadSymbolsForModuleAndCache( name, nameLength, (DWORD64)info.lpBaseOfDll, info.SizeOfImage ); - } - } - } - } - -#ifdef TRACY_DBGHELP_LOCK - DBGHELP_UNLOCK; -#endif -} - -void EndCallstack() -{ -} - -const char* DecodeCallstackPtrFast( uint64_t ptr ) -{ - if( s_shouldResolveSymbolsOffline ) return "[unresolved]"; - - static char ret[MaxNameSize]; - const auto proc = GetCurrentProcess(); - - char buf[sizeof( SYMBOL_INFO ) + MaxNameSize]; - auto si = (SYMBOL_INFO*)buf; - si->SizeOfStruct = sizeof( SYMBOL_INFO ); - si->MaxNameLen = MaxNameSize; - -#ifdef TRACY_DBGHELP_LOCK - DBGHELP_LOCK; -#endif - if( SymFromAddr( proc, ptr, nullptr, si ) == 0 ) - { - *ret = '\0'; - } - else - { - memcpy( ret, si->Name, si->NameLen ); - ret[si->NameLen] = '\0'; - } -#ifdef TRACY_DBGHELP_LOCK - DBGHELP_UNLOCK; -#endif - return ret; -} - -const char* GetKernelModulePath( uint64_t addr ) -{ - assert( addr >> 63 != 0 ); - if( !s_krnlCache ) return nullptr; - auto it = std::lower_bound( s_krnlCache, s_krnlCache + s_krnlCacheCnt, addr, []( const KernelDriver& lhs, const uint64_t& rhs ) { return lhs.addr > rhs; } ); - if( it == s_krnlCache + s_krnlCacheCnt ) return nullptr; - return it->path; -} - -struct ModuleNameAndBaseAddress -{ - const char* name; - uint64_t baseAddr; -}; - -ModuleNameAndBaseAddress GetModuleNameAndPrepareSymbols( uint64_t addr ) -{ - if( ( addr >> 63 ) != 0 ) - { - if( s_krnlCache ) - { - auto it = std::lower_bound( s_krnlCache, s_krnlCache + s_krnlCacheCnt, addr, []( const KernelDriver& lhs, const uint64_t& rhs ) { return lhs.addr > rhs; } ); - if( it != s_krnlCache + s_krnlCacheCnt ) - { - return ModuleNameAndBaseAddress{ it->mod, it->addr }; - } - } - return ModuleNameAndBaseAddress{ "", addr }; - } - - for( auto& v : *s_modCache ) - { - if( addr >= v.start && addr < v.end ) - { - return ModuleNameAndBaseAddress{ v.name, v.start }; - } - } - - HMODULE mod[1024]; - DWORD needed; - HANDLE proc = GetCurrentProcess(); - - InitRpmalloc(); - if( EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 ) - { - const auto sz = needed / sizeof( HMODULE ); - for( size_t i=0; i= base && addr < base + info.SizeOfImage ) - { - char name[1024]; - const auto nameLength = GetModuleFileNameA( mod[i], name, 1021 ); - if( nameLength > 0 ) - { - // since this is the first time we encounter this module, load its symbols (needed for modules loaded after SymInitialize) - ModuleCache* cachedModule = LoadSymbolsForModuleAndCache( name, nameLength, (DWORD64)info.lpBaseOfDll, info.SizeOfImage ); - return ModuleNameAndBaseAddress{ cachedModule->name, cachedModule->start }; - } - } - } - } - } - - return ModuleNameAndBaseAddress{ "[unknown]", 0x0 }; -} - -CallstackSymbolData DecodeSymbolAddress( uint64_t ptr ) -{ - CallstackSymbolData sym; - - if( s_shouldResolveSymbolsOffline ) - { - sym.file = "[unknown]"; - sym.line = 0; - sym.needFree = false; - return sym; - } - - IMAGEHLP_LINE64 line; - DWORD displacement = 0; - line.SizeOfStruct = sizeof(IMAGEHLP_LINE64); -#ifdef TRACY_DBGHELP_LOCK - DBGHELP_LOCK; -#endif - const auto res = SymGetLineFromAddr64( GetCurrentProcess(), ptr, &displacement, &line ); - if( res == 0 || line.LineNumber >= 0xF00000 ) - { - sym.file = "[unknown]"; - sym.line = 0; - sym.needFree = false; - } - else - { - sym.file = CopyString( line.FileName ); - sym.line = line.LineNumber; - sym.needFree = true; - } -#ifdef TRACY_DBGHELP_LOCK - DBGHELP_UNLOCK; -#endif - return sym; -} - -CallstackEntryData DecodeCallstackPtr( uint64_t ptr ) -{ -#ifdef TRACY_DBGHELP_LOCK - DBGHELP_LOCK; -#endif - - InitRpmalloc(); - - const ModuleNameAndBaseAddress moduleNameAndAddress = GetModuleNameAndPrepareSymbols( ptr ); - - if( s_shouldResolveSymbolsOffline ) - { -#ifdef TRACY_DBGHELP_LOCK - DBGHELP_UNLOCK; -#endif - - cb_data[0].symAddr = ptr - moduleNameAndAddress.baseAddr; - cb_data[0].symLen = 0; - - cb_data[0].name = CopyStringFast("[unresolved]"); - cb_data[0].file = CopyStringFast("[unknown]"); - cb_data[0].line = 0; - - return { cb_data, 1, moduleNameAndAddress.name }; - } - - int write; - const auto proc = GetCurrentProcess(); - -#if !defined TRACY_NO_CALLSTACK_INLINES - BOOL doInline = FALSE; - DWORD ctx = 0; - DWORD inlineNum = 0; - if( _SymAddrIncludeInlineTrace ) - { - inlineNum = _SymAddrIncludeInlineTrace( proc, ptr ); - if( inlineNum > MaxCbTrace - 1 ) inlineNum = MaxCbTrace - 1; - DWORD idx; - if( inlineNum != 0 ) doInline = _SymQueryInlineTrace( proc, ptr, 0, ptr, ptr, &ctx, &idx ); - } - if( doInline ) - { - write = inlineNum; - cb_num = 1 + inlineNum; - } - else -#endif - { - write = 0; - cb_num = 1; - } - - char buf[sizeof( SYMBOL_INFO ) + MaxNameSize]; - auto si = (SYMBOL_INFO*)buf; - si->SizeOfStruct = sizeof( SYMBOL_INFO ); - si->MaxNameLen = MaxNameSize; - - const auto symValid = SymFromAddr( proc, ptr, nullptr, si ) != 0; - - IMAGEHLP_LINE64 line; - DWORD displacement = 0; - line.SizeOfStruct = sizeof(IMAGEHLP_LINE64); - - { - const char* filename; - const auto res = SymGetLineFromAddr64( proc, ptr, &displacement, &line ); - if( res == 0 || line.LineNumber >= 0xF00000 ) - { - filename = "[unknown]"; - cb_data[write].line = 0; - } - else - { - filename = line.FileName; - cb_data[write].line = line.LineNumber; - } - - cb_data[write].name = symValid ? CopyStringFast( si->Name, si->NameLen ) : CopyStringFast( moduleNameAndAddress.name ); - cb_data[write].file = CopyStringFast( filename ); - if( symValid ) - { - cb_data[write].symLen = si->Size; - cb_data[write].symAddr = si->Address; - } - else - { - cb_data[write].symLen = 0; - cb_data[write].symAddr = 0; - } - } - -#if !defined TRACY_NO_CALLSTACK_INLINES - if( doInline ) - { - for( DWORD i=0; iName, si->NameLen ) : CopyStringFast( moduleNameAndAddress.name ); - cb.file = CopyStringFast( filename ); - if( symInlineValid ) - { - cb.symLen = si->Size; - cb.symAddr = si->Address; - } - else - { - cb.symLen = 0; - cb.symAddr = 0; - } - - ctx++; - } - } -#endif -#ifdef TRACY_DBGHELP_LOCK - DBGHELP_UNLOCK; -#endif - - return { cb_data, uint8_t( cb_num ), moduleNameAndAddress.name }; -} - -#elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6 - -enum { MaxCbTrace = 64 }; - -struct backtrace_state* cb_bts = nullptr; - -int cb_num; -CallstackEntry cb_data[MaxCbTrace]; -int cb_fixup; -#ifdef TRACY_USE_IMAGE_CACHE -static ImageCache* s_imageCache = nullptr; -#endif //#ifdef TRACY_USE_IMAGE_CACHE - -#ifdef TRACY_DEBUGINFOD -debuginfod_client* s_debuginfod; - -struct DebugInfo -{ - uint8_t* buildid; - size_t buildid_size; - char* filename; - int fd; -}; - -static FastVector* s_di_known; -#endif - -#ifdef __linux -struct KernelSymbol -{ - uint64_t addr; - uint32_t size; - const char* name; - const char* mod; -}; - -KernelSymbol* s_kernelSym = nullptr; -size_t s_kernelSymCnt; - -static void InitKernelSymbols() -{ - FILE* f = fopen( "/proc/kallsyms", "rb" ); - if( !f ) return; - tracy::FastVector tmpSym( 512 * 1024 ); - size_t linelen = 16 * 1024; // linelen must be big enough to prevent reallocs in getline() - auto linebuf = (char*)tracy_malloc( linelen ); - ssize_t sz; - size_t validCnt = 0; - while( ( sz = getline( &linebuf, &linelen, f ) ) != -1 ) - { - auto ptr = linebuf; - uint64_t addr = 0; - while( *ptr != ' ' ) - { - auto v = *ptr; - if( v >= '0' && v <= '9' ) - { - v -= '0'; - } - else if( v >= 'a' && v <= 'f' ) - { - v -= 'a'; - v += 10; - } - else if( v >= 'A' && v <= 'F' ) - { - v -= 'A'; - v += 10; - } - else - { - assert( false ); - } - assert( ( v & ~0xF ) == 0 ); - addr <<= 4; - addr |= v; - ptr++; - } - if( addr == 0 ) continue; - ptr++; - const bool valid = *ptr == 'T' || *ptr == 't'; - ptr += 2; - const auto namestart = ptr; - while( *ptr != '\t' && *ptr != '\n' ) ptr++; - const auto nameend = ptr; - const char* modstart = nullptr; - const char* modend; - if( *ptr == '\t' ) - { - ptr += 2; - modstart = ptr; - while( *ptr != ']' ) ptr++; - modend = ptr; - } - - char* strname = nullptr; - char* strmod = nullptr; - - if( valid ) - { - validCnt++; - - strname = (char*)tracy_malloc_fast( nameend - namestart + 1 ); - memcpy( strname, namestart, nameend - namestart ); - strname[nameend-namestart] = '\0'; - - if( modstart ) - { - strmod = (char*)tracy_malloc_fast( modend - modstart + 1 ); - memcpy( strmod, modstart, modend - modstart ); - strmod[modend-modstart] = '\0'; - } - } - - auto sym = tmpSym.push_next(); - sym->addr = addr; - sym->size = 0; - sym->name = strname; - sym->mod = strmod; - } - tracy_free_fast( linebuf ); - fclose( f ); - if( tmpSym.empty() ) return; - - std::sort( tmpSym.begin(), tmpSym.end(), []( const KernelSymbol& lhs, const KernelSymbol& rhs ) { return lhs.addr < rhs.addr; } ); - for( size_t i=0; i res && *back != '/' ) back--; - rsz = back - res; - ptr = next + 1; - continue; - } - break; - case 1: - if( *ptr == '.' ) - { - ptr = next + 1; - continue; - } - break; - case 0: - ptr = next + 1; - continue; - } - if( rsz != 1 ) res[rsz++] = '/'; - memcpy( res+rsz, ptr, lsz ); - rsz += lsz; - ptr = next + 1; - } - - if( rsz == 0 ) - { - memcpy( res, "/", 2 ); - } - else - { - res[rsz] = '\0'; - } - return res; -} - -void InitCallstackCritical() -{ -} - -void InitCallstack() -{ - InitRpmalloc(); - -#ifdef TRACY_USE_IMAGE_CACHE - s_imageCache = (ImageCache*)tracy_malloc( sizeof( ImageCache ) ); - new(s_imageCache) ImageCache(); -#endif //#ifdef TRACY_USE_IMAGE_CACHE - -#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE - s_shouldResolveSymbolsOffline = ShouldResolveSymbolsOffline(); -#endif //#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE - if( s_shouldResolveSymbolsOffline ) - { - cb_bts = nullptr; // disable use of libbacktrace calls - TracyDebug("TRACY: enabling offline symbol resolving!\n"); - } - else - { - cb_bts = backtrace_create_state( nullptr, 0, nullptr, nullptr ); - } - -#ifndef TRACY_DEMANGLE - ___tracy_init_demangle_buffer(); -#endif - -#ifdef __linux - InitKernelSymbols(); -#endif -#ifdef TRACY_DEBUGINFOD - s_debuginfod = debuginfod_begin(); - s_di_known = (FastVector*)tracy_malloc( sizeof( FastVector ) ); - new (s_di_known) FastVector( 16 ); -#endif -} - -#ifdef TRACY_DEBUGINFOD -void ClearDebugInfoVector( FastVector& vec ) -{ - for( auto& v : vec ) - { - tracy_free( v.buildid ); - tracy_free( v.filename ); - if( v.fd >= 0 ) close( v.fd ); - } - vec.clear(); -} - -DebugInfo* FindDebugInfo( FastVector& vec, const uint8_t* buildid_data, size_t buildid_size ) -{ - for( auto& v : vec ) - { - if( v.buildid_size == buildid_size && memcmp( v.buildid, buildid_data, buildid_size ) == 0 ) - { - return &v; - } - } - return nullptr; -} - -int GetDebugInfoDescriptor( const char* buildid_data, size_t buildid_size, const char* filename ) -{ - auto buildid = (uint8_t*)buildid_data; - auto it = FindDebugInfo( *s_di_known, buildid, buildid_size ); - if( it ) return it->fd >= 0 ? dup( it->fd ) : -1; - - int fd = debuginfod_find_debuginfo( s_debuginfod, buildid, buildid_size, nullptr ); - it = s_di_known->push_next(); - it->buildid_size = buildid_size; - it->buildid = (uint8_t*)tracy_malloc( buildid_size ); - memcpy( it->buildid, buildid, buildid_size ); - const auto fnsz = strlen( filename ) + 1; - it->filename = (char*)tracy_malloc( fnsz ); - memcpy( it->filename, filename, fnsz ); - it->fd = fd >= 0 ? fd : -1; - TracyDebug( "DebugInfo descriptor query: %i, fn: %s\n", fd, filename ); - return it->fd; -} - -const uint8_t* GetBuildIdForImage( const char* image, size_t& size ) -{ - assert( image ); - for( auto& v : *s_di_known ) - { - if( strcmp( image, v.filename ) == 0 ) - { - size = v.buildid_size; - return v.buildid; - } - } - return nullptr; -} - -debuginfod_client* GetDebuginfodClient() -{ - return s_debuginfod; -} -#endif - -void EndCallstack() -{ -#ifdef TRACY_USE_IMAGE_CACHE - if( s_imageCache ) - { - s_imageCache->~ImageCache(); - tracy_free( s_imageCache ); - } -#endif //#ifdef TRACY_USE_IMAGE_CACHE -#ifndef TRACY_DEMANGLE - ___tracy_free_demangle_buffer(); -#endif -#ifdef TRACY_DEBUGINFOD - ClearDebugInfoVector( *s_di_known ); - s_di_known->~FastVector(); - tracy_free( s_di_known ); - - debuginfod_end( s_debuginfod ); -#endif -} - -const char* DecodeCallstackPtrFast( uint64_t ptr ) -{ - static char ret[1024]; - auto vptr = (void*)ptr; - const char* symname = nullptr; - Dl_info dlinfo; - if( dladdr( vptr, &dlinfo ) && dlinfo.dli_sname ) - { - symname = dlinfo.dli_sname; - } - if( symname ) - { - strcpy( ret, symname ); - } - else - { - *ret = '\0'; - } - return ret; -} - -static int SymbolAddressDataCb( void* data, uintptr_t pc, uintptr_t lowaddr, const char* fn, int lineno, const char* function ) -{ - auto& sym = *(CallstackSymbolData*)data; - if( !fn ) - { - sym.file = "[unknown]"; - sym.line = 0; - sym.needFree = false; - } - else - { - sym.file = NormalizePath( fn ); - if( !sym.file ) sym.file = CopyString( fn ); - sym.line = lineno; - sym.needFree = true; - } - - return 1; -} - -static void SymbolAddressErrorCb( void* data, const char* /*msg*/, int /*errnum*/ ) -{ - auto& sym = *(CallstackSymbolData*)data; - sym.file = "[unknown]"; - sym.line = 0; - sym.needFree = false; -} - -CallstackSymbolData DecodeSymbolAddress( uint64_t ptr ) -{ - CallstackSymbolData sym; - if( cb_bts ) - { - backtrace_pcinfo( cb_bts, ptr, SymbolAddressDataCb, SymbolAddressErrorCb, &sym ); - } - else - { - SymbolAddressErrorCb(&sym, nullptr, 0); - } - - return sym; -} - -static int CallstackDataCb( void* /*data*/, uintptr_t pc, uintptr_t lowaddr, const char* fn, int lineno, const char* function ) -{ - cb_data[cb_num].symLen = 0; - cb_data[cb_num].symAddr = (uint64_t)lowaddr; - - if( !fn && !function ) - { - const char* symname = nullptr; - auto vptr = (void*)pc; - ptrdiff_t symoff = 0; - - Dl_info dlinfo; - if( dladdr( vptr, &dlinfo ) ) - { - symname = dlinfo.dli_sname; - symoff = (char*)pc - (char*)dlinfo.dli_saddr; - const char* demangled = ___tracy_demangle( symname ); - if( demangled ) symname = demangled; - } - - if( !symname ) symname = "[unknown]"; - - if( symoff == 0 ) - { - const auto len = std::min( strlen( symname ), std::numeric_limits::max() ); - cb_data[cb_num].name = CopyStringFast( symname, len ); - } - else - { - char buf[32]; - const auto offlen = sprintf( buf, " + %td", symoff ); - const auto namelen = std::min( strlen( symname ), std::numeric_limits::max() - offlen ); - auto name = (char*)tracy_malloc_fast( namelen + offlen + 1 ); - memcpy( name, symname, namelen ); - memcpy( name + namelen, buf, offlen ); - name[namelen + offlen] = '\0'; - cb_data[cb_num].name = name; - } - - cb_data[cb_num].file = CopyStringFast( "[unknown]" ); - cb_data[cb_num].line = 0; - } - else - { - if( !fn ) fn = "[unknown]"; - if( !function ) - { - function = "[unknown]"; - } - else - { - const char* demangled = ___tracy_demangle( function ); - if( demangled ) function = demangled; - } - - const auto len = std::min( strlen( function ), std::numeric_limits::max() ); - cb_data[cb_num].name = CopyStringFast( function, len ); - cb_data[cb_num].file = NormalizePath( fn ); - if( !cb_data[cb_num].file ) cb_data[cb_num].file = CopyStringFast( fn ); - cb_data[cb_num].line = lineno; - } - - if( ++cb_num >= MaxCbTrace ) - { - return 1; - } - else - { - return 0; - } -} - -static void CallstackErrorCb( void* /*data*/, const char* /*msg*/, int /*errnum*/ ) -{ - for( int i=0; i> 63 == 0 ) - { - const char* imageName = nullptr; - uint64_t imageBaseAddress = 0x0; - -#ifdef TRACY_USE_IMAGE_CACHE - const auto* image = s_imageCache->GetImageForAddress((void*)ptr); - if( image ) - { - imageName = image->m_name; - imageBaseAddress = uint64_t(image->m_startAddress); - } -#else - Dl_info dlinfo; - if( dladdr( (void*)ptr, &dlinfo ) ) - { - imageName = dlinfo.dli_fname; - imageBaseAddress = uint64_t( dlinfo.dli_fbase ); - } -#endif - - if( s_shouldResolveSymbolsOffline ) - { - cb_num = 1; - GetSymbolForOfflineResolve( (void*)ptr, imageBaseAddress, cb_data[0] ); - } - else - { - cb_num = 0; - backtrace_pcinfo( cb_bts, ptr, CallstackDataCb, CallstackErrorCb, nullptr ); - assert( cb_num > 0 ); - - backtrace_syminfo( cb_bts, ptr, SymInfoCallback, SymInfoError, nullptr ); - } - - return { cb_data, uint8_t( cb_num ), imageName ? imageName : "[unknown]" }; - } -#ifdef __linux - else if( s_kernelSym ) - { - auto it = std::lower_bound( s_kernelSym, s_kernelSym + s_kernelSymCnt, ptr, []( const KernelSymbol& lhs, const uint64_t& rhs ) { return lhs.addr + lhs.size < rhs; } ); - if( it != s_kernelSym + s_kernelSymCnt ) - { - cb_data[0].name = CopyStringFast( it->name ); - cb_data[0].file = CopyStringFast( "" ); - cb_data[0].line = 0; - cb_data[0].symLen = it->size; - cb_data[0].symAddr = it->addr; - return { cb_data, 1, it->mod ? it->mod : "" }; - } - } -#endif - - cb_data[0].name = CopyStringFast( "[unknown]" ); - cb_data[0].file = CopyStringFast( "" ); - cb_data[0].line = 0; - cb_data[0].symLen = 0; - cb_data[0].symAddr = 0; - return { cb_data, 1, "" }; -} - -#elif TRACY_HAS_CALLSTACK == 5 - -void InitCallstackCritical() -{ -} - -void InitCallstack() -{ - ___tracy_init_demangle_buffer(); -} - -void EndCallstack() -{ - ___tracy_free_demangle_buffer(); -} - -const char* DecodeCallstackPtrFast( uint64_t ptr ) -{ - static char ret[1024]; - auto vptr = (void*)ptr; - const char* symname = nullptr; - Dl_info dlinfo; - if( dladdr( vptr, &dlinfo ) && dlinfo.dli_sname ) - { - symname = dlinfo.dli_sname; - } - if( symname ) - { - strcpy( ret, symname ); - } - else - { - *ret = '\0'; - } - return ret; -} - -CallstackSymbolData DecodeSymbolAddress( uint64_t ptr ) -{ - const char* symloc = nullptr; - Dl_info dlinfo; - if( dladdr( (void*)ptr, &dlinfo ) ) symloc = dlinfo.dli_fname; - if( !symloc ) symloc = "[unknown]"; - return CallstackSymbolData { symloc, 0, false, 0 }; -} - -CallstackEntryData DecodeCallstackPtr( uint64_t ptr ) -{ - static CallstackEntry cb; - cb.line = 0; - - const char* symname = nullptr; - const char* symloc = nullptr; - auto vptr = (void*)ptr; - ptrdiff_t symoff = 0; - void* symaddr = nullptr; - - Dl_info dlinfo; - if( dladdr( vptr, &dlinfo ) ) - { - symloc = dlinfo.dli_fname; - symname = dlinfo.dli_sname; - symoff = (char*)ptr - (char*)dlinfo.dli_saddr; - symaddr = dlinfo.dli_saddr; - const char* demangled = ___tracy_demangle( symname ); - if( demangled ) symname = demangled; - } - - if( !symname ) symname = "[unknown]"; - if( !symloc ) symloc = "[unknown]"; - - if( symoff == 0 ) - { - const auto len = std::min( strlen( symname ), std::numeric_limits::max() ); - cb.name = CopyString( symname, len ); - } - else - { - char buf[32]; - const auto offlen = sprintf( buf, " + %td", symoff ); - const auto namelen = std::min( strlen( symname ), std::numeric_limits::max() - offlen ); - auto name = (char*)tracy_malloc( namelen + offlen + 1 ); - memcpy( name, symname, namelen ); - memcpy( name + namelen, buf, offlen ); - name[namelen + offlen] = '\0'; - cb.name = name; - } - - cb.file = CopyString( "[unknown]" ); - cb.symLen = 0; - cb.symAddr = (uint64_t)symaddr; - - return { &cb, 1, symloc }; -} - -#endif - -} - -#endif diff --git a/src/third_party/tracy/client/TracyCallstack.h b/src/third_party/tracy/client/TracyCallstack.h deleted file mode 100644 index 2c7ecad9..00000000 --- a/src/third_party/tracy/client/TracyCallstack.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef __TRACYCALLSTACK_H__ -#define __TRACYCALLSTACK_H__ - -#ifndef TRACY_NO_CALLSTACK - -# if !defined _WIN32 -# include -# endif - -# if defined _WIN32 -# include "../common/TracyUwp.hpp" -# ifndef TRACY_UWP -# define TRACY_HAS_CALLSTACK 1 -# endif -# elif defined __ANDROID__ -# if !defined __arm__ || __ANDROID_API__ >= 21 -# define TRACY_HAS_CALLSTACK 2 -# else -# define TRACY_HAS_CALLSTACK 5 -# endif -# elif defined __linux -# if defined _GNU_SOURCE && defined __GLIBC__ -# define TRACY_HAS_CALLSTACK 3 -# else -# define TRACY_HAS_CALLSTACK 2 -# endif -# elif defined __APPLE__ -# define TRACY_HAS_CALLSTACK 4 -# elif defined BSD -# define TRACY_HAS_CALLSTACK 6 -# endif - -#endif - -#endif diff --git a/src/third_party/tracy/client/TracyCallstack.hpp b/src/third_party/tracy/client/TracyCallstack.hpp deleted file mode 100644 index 12bea967..00000000 --- a/src/third_party/tracy/client/TracyCallstack.hpp +++ /dev/null @@ -1,156 +0,0 @@ -#ifndef __TRACYCALLSTACK_HPP__ -#define __TRACYCALLSTACK_HPP__ - -#include "../common/TracyApi.h" -#include "../common/TracyForceInline.hpp" -#include "TracyCallstack.h" - -#ifndef TRACY_HAS_CALLSTACK - -namespace tracy -{ -static constexpr bool has_callstack() { return false; } -static tracy_force_inline void* Callstack( int32_t /*depth*/ ) { return nullptr; } -} - -#else - -#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 5 -# include -#elif TRACY_HAS_CALLSTACK >= 3 -# ifdef TRACY_LIBUNWIND_BACKTRACE - // libunwind is, in general, significantly faster than execinfo based backtraces -# define UNW_LOCAL_ONLY -# include -# else -# include -# endif -#endif - -#ifdef TRACY_DEBUGINFOD -# include -#endif - -#include -#include - -#include "../common/TracyAlloc.hpp" - -namespace tracy -{ - -static constexpr bool has_callstack() { return true; } - -struct CallstackSymbolData -{ - const char* file; - uint32_t line; - bool needFree; - uint64_t symAddr; -}; - -struct CallstackEntry -{ - const char* name; - const char* file; - uint32_t line; - uint32_t symLen; - uint64_t symAddr; -}; - -struct CallstackEntryData -{ - const CallstackEntry* data; - uint8_t size; - const char* imageName; -}; - -CallstackSymbolData DecodeSymbolAddress( uint64_t ptr ); -const char* DecodeCallstackPtrFast( uint64_t ptr ); -CallstackEntryData DecodeCallstackPtr( uint64_t ptr ); -void InitCallstack(); -void InitCallstackCritical(); -void EndCallstack(); -const char* GetKernelModulePath( uint64_t addr ); - -#ifdef TRACY_DEBUGINFOD -const uint8_t* GetBuildIdForImage( const char* image, size_t& size ); -debuginfod_client* GetDebuginfodClient(); -#endif - -#if TRACY_HAS_CALLSTACK == 1 - -extern "C" -{ - typedef unsigned long (__stdcall *___tracy_t_RtlWalkFrameChain)( void**, unsigned long, unsigned long ); - TRACY_API extern ___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChain; -} - -static tracy_force_inline void* Callstack( int32_t depth ) -{ - assert( depth >= 1 && depth < 63 ); - auto trace = (uintptr_t*)tracy_malloc( ( 1 + depth ) * sizeof( uintptr_t ) ); - const auto num = ___tracy_RtlWalkFrameChain( (void**)( trace + 1 ), depth, 0 ); - *trace = num; - return trace; -} - -#elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 5 - -struct BacktraceState -{ - void** current; - void** end; -}; - -static _Unwind_Reason_Code tracy_unwind_callback( struct _Unwind_Context* ctx, void* arg ) -{ - auto state = (BacktraceState*)arg; - uintptr_t pc = _Unwind_GetIP( ctx ); - if( pc ) - { - if( state->current == state->end ) return _URC_END_OF_STACK; - *state->current++ = (void*)pc; - } - return _URC_NO_REASON; -} - -static tracy_force_inline void* Callstack( int32_t depth ) -{ - assert( depth >= 1 && depth < 63 ); - - auto trace = (uintptr_t*)tracy_malloc( ( 1 + depth ) * sizeof( uintptr_t ) ); - BacktraceState state = { (void**)(trace+1), (void**)(trace+1+depth) }; - _Unwind_Backtrace( tracy_unwind_callback, &state ); - - *trace = (uintptr_t*)state.current - trace + 1; - - return trace; -} - -#elif TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6 - -static tracy_force_inline void* Callstack( int32_t depth ) -{ - assert( depth >= 1 ); - - auto trace = (uintptr_t*)tracy_malloc( ( 1 + (size_t)depth ) * sizeof( uintptr_t ) ); - -#ifdef TRACY_LIBUNWIND_BACKTRACE - size_t num = unw_backtrace( (void**)(trace+1), depth ); -#else - const auto num = (size_t)backtrace( (void**)(trace+1), depth ); -#endif - - *trace = num; - - return trace; -} - -#endif - -} - -#endif - -#endif diff --git a/src/third_party/tracy/client/TracyCpuid.hpp b/src/third_party/tracy/client/TracyCpuid.hpp deleted file mode 100644 index 9820be00..00000000 --- a/src/third_party/tracy/client/TracyCpuid.hpp +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef __TRACYCPUID_HPP__ -#define __TRACYCPUID_HPP__ - -// Prior to GCC 11 the cpuid.h header did not have any include guards and thus -// including it more than once would cause a compiler error due to symbol -// redefinitions. In order to support older GCC versions, we have to wrap this -// include between custom include guards to prevent this issue. -// See also https://github.com/wolfpld/tracy/issues/452 - -#include - -#endif diff --git a/src/third_party/tracy/client/TracyDebug.hpp b/src/third_party/tracy/client/TracyDebug.hpp deleted file mode 100644 index 8723356f..00000000 --- a/src/third_party/tracy/client/TracyDebug.hpp +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef __TRACYPRINT_HPP__ -#define __TRACYPRINT_HPP__ - -#ifdef TRACY_VERBOSE -# include -# define TracyDebug(...) fprintf( stderr, __VA_ARGS__ ); -#else -# define TracyDebug(...) -#endif - -#endif diff --git a/src/third_party/tracy/client/TracyDxt1.cpp b/src/third_party/tracy/client/TracyDxt1.cpp deleted file mode 100644 index 930d0982..00000000 --- a/src/third_party/tracy/client/TracyDxt1.cpp +++ /dev/null @@ -1,644 +0,0 @@ -#include "TracyDxt1.hpp" -#include "../common/TracyForceInline.hpp" - -#include -#include -#include - -#ifdef __ARM_NEON -# include -#endif - -#if defined __AVX__ && !defined __SSE4_1__ -# define __SSE4_1__ -#endif - -#if defined __SSE4_1__ || defined __AVX2__ -# ifdef _MSC_VER -# include -# else -# include -# ifndef _mm256_cvtsi256_si32 -# define _mm256_cvtsi256_si32( v ) ( _mm_cvtsi128_si32( _mm256_castsi256_si128( v ) ) ) -# endif -# endif -#endif - -namespace tracy -{ - -static inline uint16_t to565( uint8_t r, uint8_t g, uint8_t b ) -{ - return ( ( r & 0xF8 ) << 8 ) | ( ( g & 0xFC ) << 3 ) | ( b >> 3 ); -} - -static inline uint16_t to565( uint32_t c ) -{ - return - ( ( c & 0xF80000 ) >> 19 ) | - ( ( c & 0x00FC00 ) >> 5 ) | - ( ( c & 0x0000F8 ) << 8 ); -} - -static const uint16_t DivTable[255*3+1] = { - 0xffff, 0xffff, 0xffff, 0xffff, 0xcccc, 0xaaaa, 0x9249, 0x8000, 0x71c7, 0x6666, 0x5d17, 0x5555, 0x4ec4, 0x4924, 0x4444, 0x4000, - 0x3c3c, 0x38e3, 0x35e5, 0x3333, 0x30c3, 0x2e8b, 0x2c85, 0x2aaa, 0x28f5, 0x2762, 0x25ed, 0x2492, 0x234f, 0x2222, 0x2108, 0x2000, - 0x1f07, 0x1e1e, 0x1d41, 0x1c71, 0x1bac, 0x1af2, 0x1a41, 0x1999, 0x18f9, 0x1861, 0x17d0, 0x1745, 0x16c1, 0x1642, 0x15c9, 0x1555, - 0x14e5, 0x147a, 0x1414, 0x13b1, 0x1352, 0x12f6, 0x129e, 0x1249, 0x11f7, 0x11a7, 0x115b, 0x1111, 0x10c9, 0x1084, 0x1041, 0x1000, - 0x0fc0, 0x0f83, 0x0f48, 0x0f0f, 0x0ed7, 0x0ea0, 0x0e6c, 0x0e38, 0x0e07, 0x0dd6, 0x0da7, 0x0d79, 0x0d4c, 0x0d20, 0x0cf6, 0x0ccc, - 0x0ca4, 0x0c7c, 0x0c56, 0x0c30, 0x0c0c, 0x0be8, 0x0bc5, 0x0ba2, 0x0b81, 0x0b60, 0x0b40, 0x0b21, 0x0b02, 0x0ae4, 0x0ac7, 0x0aaa, - 0x0a8e, 0x0a72, 0x0a57, 0x0a3d, 0x0a23, 0x0a0a, 0x09f1, 0x09d8, 0x09c0, 0x09a9, 0x0991, 0x097b, 0x0964, 0x094f, 0x0939, 0x0924, - 0x090f, 0x08fb, 0x08e7, 0x08d3, 0x08c0, 0x08ad, 0x089a, 0x0888, 0x0876, 0x0864, 0x0853, 0x0842, 0x0831, 0x0820, 0x0810, 0x0800, - 0x07f0, 0x07e0, 0x07d1, 0x07c1, 0x07b3, 0x07a4, 0x0795, 0x0787, 0x0779, 0x076b, 0x075d, 0x0750, 0x0743, 0x0736, 0x0729, 0x071c, - 0x070f, 0x0703, 0x06f7, 0x06eb, 0x06df, 0x06d3, 0x06c8, 0x06bc, 0x06b1, 0x06a6, 0x069b, 0x0690, 0x0685, 0x067b, 0x0670, 0x0666, - 0x065c, 0x0652, 0x0648, 0x063e, 0x0634, 0x062b, 0x0621, 0x0618, 0x060f, 0x0606, 0x05fd, 0x05f4, 0x05eb, 0x05e2, 0x05d9, 0x05d1, - 0x05c9, 0x05c0, 0x05b8, 0x05b0, 0x05a8, 0x05a0, 0x0598, 0x0590, 0x0588, 0x0581, 0x0579, 0x0572, 0x056b, 0x0563, 0x055c, 0x0555, - 0x054e, 0x0547, 0x0540, 0x0539, 0x0532, 0x052b, 0x0525, 0x051e, 0x0518, 0x0511, 0x050b, 0x0505, 0x04fe, 0x04f8, 0x04f2, 0x04ec, - 0x04e6, 0x04e0, 0x04da, 0x04d4, 0x04ce, 0x04c8, 0x04c3, 0x04bd, 0x04b8, 0x04b2, 0x04ad, 0x04a7, 0x04a2, 0x049c, 0x0497, 0x0492, - 0x048d, 0x0487, 0x0482, 0x047d, 0x0478, 0x0473, 0x046e, 0x0469, 0x0465, 0x0460, 0x045b, 0x0456, 0x0452, 0x044d, 0x0448, 0x0444, - 0x043f, 0x043b, 0x0436, 0x0432, 0x042d, 0x0429, 0x0425, 0x0421, 0x041c, 0x0418, 0x0414, 0x0410, 0x040c, 0x0408, 0x0404, 0x0400, - 0x03fc, 0x03f8, 0x03f4, 0x03f0, 0x03ec, 0x03e8, 0x03e4, 0x03e0, 0x03dd, 0x03d9, 0x03d5, 0x03d2, 0x03ce, 0x03ca, 0x03c7, 0x03c3, - 0x03c0, 0x03bc, 0x03b9, 0x03b5, 0x03b2, 0x03ae, 0x03ab, 0x03a8, 0x03a4, 0x03a1, 0x039e, 0x039b, 0x0397, 0x0394, 0x0391, 0x038e, - 0x038b, 0x0387, 0x0384, 0x0381, 0x037e, 0x037b, 0x0378, 0x0375, 0x0372, 0x036f, 0x036c, 0x0369, 0x0366, 0x0364, 0x0361, 0x035e, - 0x035b, 0x0358, 0x0355, 0x0353, 0x0350, 0x034d, 0x034a, 0x0348, 0x0345, 0x0342, 0x0340, 0x033d, 0x033a, 0x0338, 0x0335, 0x0333, - 0x0330, 0x032e, 0x032b, 0x0329, 0x0326, 0x0324, 0x0321, 0x031f, 0x031c, 0x031a, 0x0317, 0x0315, 0x0313, 0x0310, 0x030e, 0x030c, - 0x0309, 0x0307, 0x0305, 0x0303, 0x0300, 0x02fe, 0x02fc, 0x02fa, 0x02f7, 0x02f5, 0x02f3, 0x02f1, 0x02ef, 0x02ec, 0x02ea, 0x02e8, - 0x02e6, 0x02e4, 0x02e2, 0x02e0, 0x02de, 0x02dc, 0x02da, 0x02d8, 0x02d6, 0x02d4, 0x02d2, 0x02d0, 0x02ce, 0x02cc, 0x02ca, 0x02c8, - 0x02c6, 0x02c4, 0x02c2, 0x02c0, 0x02be, 0x02bc, 0x02bb, 0x02b9, 0x02b7, 0x02b5, 0x02b3, 0x02b1, 0x02b0, 0x02ae, 0x02ac, 0x02aa, - 0x02a8, 0x02a7, 0x02a5, 0x02a3, 0x02a1, 0x02a0, 0x029e, 0x029c, 0x029b, 0x0299, 0x0297, 0x0295, 0x0294, 0x0292, 0x0291, 0x028f, - 0x028d, 0x028c, 0x028a, 0x0288, 0x0287, 0x0285, 0x0284, 0x0282, 0x0280, 0x027f, 0x027d, 0x027c, 0x027a, 0x0279, 0x0277, 0x0276, - 0x0274, 0x0273, 0x0271, 0x0270, 0x026e, 0x026d, 0x026b, 0x026a, 0x0268, 0x0267, 0x0265, 0x0264, 0x0263, 0x0261, 0x0260, 0x025e, - 0x025d, 0x025c, 0x025a, 0x0259, 0x0257, 0x0256, 0x0255, 0x0253, 0x0252, 0x0251, 0x024f, 0x024e, 0x024d, 0x024b, 0x024a, 0x0249, - 0x0247, 0x0246, 0x0245, 0x0243, 0x0242, 0x0241, 0x0240, 0x023e, 0x023d, 0x023c, 0x023b, 0x0239, 0x0238, 0x0237, 0x0236, 0x0234, - 0x0233, 0x0232, 0x0231, 0x0230, 0x022e, 0x022d, 0x022c, 0x022b, 0x022a, 0x0229, 0x0227, 0x0226, 0x0225, 0x0224, 0x0223, 0x0222, - 0x0220, 0x021f, 0x021e, 0x021d, 0x021c, 0x021b, 0x021a, 0x0219, 0x0218, 0x0216, 0x0215, 0x0214, 0x0213, 0x0212, 0x0211, 0x0210, - 0x020f, 0x020e, 0x020d, 0x020c, 0x020b, 0x020a, 0x0209, 0x0208, 0x0207, 0x0206, 0x0205, 0x0204, 0x0203, 0x0202, 0x0201, 0x0200, - 0x01ff, 0x01fe, 0x01fd, 0x01fc, 0x01fb, 0x01fa, 0x01f9, 0x01f8, 0x01f7, 0x01f6, 0x01f5, 0x01f4, 0x01f3, 0x01f2, 0x01f1, 0x01f0, - 0x01ef, 0x01ee, 0x01ed, 0x01ec, 0x01eb, 0x01ea, 0x01e9, 0x01e9, 0x01e8, 0x01e7, 0x01e6, 0x01e5, 0x01e4, 0x01e3, 0x01e2, 0x01e1, - 0x01e0, 0x01e0, 0x01df, 0x01de, 0x01dd, 0x01dc, 0x01db, 0x01da, 0x01da, 0x01d9, 0x01d8, 0x01d7, 0x01d6, 0x01d5, 0x01d4, 0x01d4, - 0x01d3, 0x01d2, 0x01d1, 0x01d0, 0x01cf, 0x01cf, 0x01ce, 0x01cd, 0x01cc, 0x01cb, 0x01cb, 0x01ca, 0x01c9, 0x01c8, 0x01c7, 0x01c7, - 0x01c6, 0x01c5, 0x01c4, 0x01c3, 0x01c3, 0x01c2, 0x01c1, 0x01c0, 0x01c0, 0x01bf, 0x01be, 0x01bd, 0x01bd, 0x01bc, 0x01bb, 0x01ba, - 0x01ba, 0x01b9, 0x01b8, 0x01b7, 0x01b7, 0x01b6, 0x01b5, 0x01b4, 0x01b4, 0x01b3, 0x01b2, 0x01b2, 0x01b1, 0x01b0, 0x01af, 0x01af, - 0x01ae, 0x01ad, 0x01ad, 0x01ac, 0x01ab, 0x01aa, 0x01aa, 0x01a9, 0x01a8, 0x01a8, 0x01a7, 0x01a6, 0x01a6, 0x01a5, 0x01a4, 0x01a4, - 0x01a3, 0x01a2, 0x01a2, 0x01a1, 0x01a0, 0x01a0, 0x019f, 0x019e, 0x019e, 0x019d, 0x019c, 0x019c, 0x019b, 0x019a, 0x019a, 0x0199, - 0x0198, 0x0198, 0x0197, 0x0197, 0x0196, 0x0195, 0x0195, 0x0194, 0x0193, 0x0193, 0x0192, 0x0192, 0x0191, 0x0190, 0x0190, 0x018f, - 0x018f, 0x018e, 0x018d, 0x018d, 0x018c, 0x018b, 0x018b, 0x018a, 0x018a, 0x0189, 0x0189, 0x0188, 0x0187, 0x0187, 0x0186, 0x0186, - 0x0185, 0x0184, 0x0184, 0x0183, 0x0183, 0x0182, 0x0182, 0x0181, 0x0180, 0x0180, 0x017f, 0x017f, 0x017e, 0x017e, 0x017d, 0x017d, - 0x017c, 0x017b, 0x017b, 0x017a, 0x017a, 0x0179, 0x0179, 0x0178, 0x0178, 0x0177, 0x0177, 0x0176, 0x0175, 0x0175, 0x0174, 0x0174, - 0x0173, 0x0173, 0x0172, 0x0172, 0x0171, 0x0171, 0x0170, 0x0170, 0x016f, 0x016f, 0x016e, 0x016e, 0x016d, 0x016d, 0x016c, 0x016c, - 0x016b, 0x016b, 0x016a, 0x016a, 0x0169, 0x0169, 0x0168, 0x0168, 0x0167, 0x0167, 0x0166, 0x0166, 0x0165, 0x0165, 0x0164, 0x0164, - 0x0163, 0x0163, 0x0162, 0x0162, 0x0161, 0x0161, 0x0160, 0x0160, 0x015f, 0x015f, 0x015e, 0x015e, 0x015d, 0x015d, 0x015d, 0x015c, - 0x015c, 0x015b, 0x015b, 0x015a, 0x015a, 0x0159, 0x0159, 0x0158, 0x0158, 0x0158, 0x0157, 0x0157, 0x0156, 0x0156 -}; - -#if defined __ARM_NEON && defined __aarch64__ -static const uint16_t DivTableNEON[255*3+1] = { - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x1c71, 0x1af2, 0x1999, 0x1861, 0x1745, 0x1642, 0x1555, 0x147a, 0x13b1, 0x12f6, 0x1249, 0x11a7, 0x1111, 0x1084, 0x1000, - 0x0f83, 0x0f0f, 0x0ea0, 0x0e38, 0x0dd6, 0x0d79, 0x0d20, 0x0ccc, 0x0c7c, 0x0c30, 0x0be8, 0x0ba2, 0x0b60, 0x0b21, 0x0ae4, 0x0aaa, - 0x0a72, 0x0a3d, 0x0a0a, 0x09d8, 0x09a9, 0x097b, 0x094f, 0x0924, 0x08fb, 0x08d3, 0x08ad, 0x0888, 0x0864, 0x0842, 0x0820, 0x0800, - 0x07e0, 0x07c1, 0x07a4, 0x0787, 0x076b, 0x0750, 0x0736, 0x071c, 0x0703, 0x06eb, 0x06d3, 0x06bc, 0x06a6, 0x0690, 0x067b, 0x0666, - 0x0652, 0x063e, 0x062b, 0x0618, 0x0606, 0x05f4, 0x05e2, 0x05d1, 0x05c0, 0x05b0, 0x05a0, 0x0590, 0x0581, 0x0572, 0x0563, 0x0555, - 0x0547, 0x0539, 0x052b, 0x051e, 0x0511, 0x0505, 0x04f8, 0x04ec, 0x04e0, 0x04d4, 0x04c8, 0x04bd, 0x04b2, 0x04a7, 0x049c, 0x0492, - 0x0487, 0x047d, 0x0473, 0x0469, 0x0460, 0x0456, 0x044d, 0x0444, 0x043b, 0x0432, 0x0429, 0x0421, 0x0418, 0x0410, 0x0408, 0x0400, - 0x03f8, 0x03f0, 0x03e8, 0x03e0, 0x03d9, 0x03d2, 0x03ca, 0x03c3, 0x03bc, 0x03b5, 0x03ae, 0x03a8, 0x03a1, 0x039b, 0x0394, 0x038e, - 0x0387, 0x0381, 0x037b, 0x0375, 0x036f, 0x0369, 0x0364, 0x035e, 0x0358, 0x0353, 0x034d, 0x0348, 0x0342, 0x033d, 0x0338, 0x0333, - 0x032e, 0x0329, 0x0324, 0x031f, 0x031a, 0x0315, 0x0310, 0x030c, 0x0307, 0x0303, 0x02fe, 0x02fa, 0x02f5, 0x02f1, 0x02ec, 0x02e8, - 0x02e4, 0x02e0, 0x02dc, 0x02d8, 0x02d4, 0x02d0, 0x02cc, 0x02c8, 0x02c4, 0x02c0, 0x02bc, 0x02b9, 0x02b5, 0x02b1, 0x02ae, 0x02aa, - 0x02a7, 0x02a3, 0x02a0, 0x029c, 0x0299, 0x0295, 0x0292, 0x028f, 0x028c, 0x0288, 0x0285, 0x0282, 0x027f, 0x027c, 0x0279, 0x0276, - 0x0273, 0x0270, 0x026d, 0x026a, 0x0267, 0x0264, 0x0261, 0x025e, 0x025c, 0x0259, 0x0256, 0x0253, 0x0251, 0x024e, 0x024b, 0x0249, - 0x0246, 0x0243, 0x0241, 0x023e, 0x023c, 0x0239, 0x0237, 0x0234, 0x0232, 0x0230, 0x022d, 0x022b, 0x0229, 0x0226, 0x0224, 0x0222, - 0x021f, 0x021d, 0x021b, 0x0219, 0x0216, 0x0214, 0x0212, 0x0210, 0x020e, 0x020c, 0x020a, 0x0208, 0x0206, 0x0204, 0x0202, 0x0200, - 0x01fe, 0x01fc, 0x01fa, 0x01f8, 0x01f6, 0x01f4, 0x01f2, 0x01f0, 0x01ee, 0x01ec, 0x01ea, 0x01e9, 0x01e7, 0x01e5, 0x01e3, 0x01e1, - 0x01e0, 0x01de, 0x01dc, 0x01da, 0x01d9, 0x01d7, 0x01d5, 0x01d4, 0x01d2, 0x01d0, 0x01cf, 0x01cd, 0x01cb, 0x01ca, 0x01c8, 0x01c7, - 0x01c5, 0x01c3, 0x01c2, 0x01c0, 0x01bf, 0x01bd, 0x01bc, 0x01ba, 0x01b9, 0x01b7, 0x01b6, 0x01b4, 0x01b3, 0x01b2, 0x01b0, 0x01af, - 0x01ad, 0x01ac, 0x01aa, 0x01a9, 0x01a8, 0x01a6, 0x01a5, 0x01a4, 0x01a2, 0x01a1, 0x01a0, 0x019e, 0x019d, 0x019c, 0x019a, 0x0199, - 0x0198, 0x0197, 0x0195, 0x0194, 0x0193, 0x0192, 0x0190, 0x018f, 0x018e, 0x018d, 0x018b, 0x018a, 0x0189, 0x0188, 0x0187, 0x0186, - 0x0184, 0x0183, 0x0182, 0x0181, 0x0180, 0x017f, 0x017e, 0x017d, 0x017b, 0x017a, 0x0179, 0x0178, 0x0177, 0x0176, 0x0175, 0x0174, - 0x0173, 0x0172, 0x0171, 0x0170, 0x016f, 0x016e, 0x016d, 0x016c, 0x016b, 0x016a, 0x0169, 0x0168, 0x0167, 0x0166, 0x0165, 0x0164, - 0x0163, 0x0162, 0x0161, 0x0160, 0x015f, 0x015e, 0x015d, 0x015c, 0x015b, 0x015a, 0x0159, 0x0158, 0x0158, 0x0157, 0x0156, 0x0155, - 0x0154, 0x0153, 0x0152, 0x0151, 0x0150, 0x0150, 0x014f, 0x014e, 0x014d, 0x014c, 0x014b, 0x014a, 0x014a, 0x0149, 0x0148, 0x0147, - 0x0146, 0x0146, 0x0145, 0x0144, 0x0143, 0x0142, 0x0142, 0x0141, 0x0140, 0x013f, 0x013e, 0x013e, 0x013d, 0x013c, 0x013b, 0x013b, - 0x013a, 0x0139, 0x0138, 0x0138, 0x0137, 0x0136, 0x0135, 0x0135, 0x0134, 0x0133, 0x0132, 0x0132, 0x0131, 0x0130, 0x0130, 0x012f, - 0x012e, 0x012e, 0x012d, 0x012c, 0x012b, 0x012b, 0x012a, 0x0129, 0x0129, 0x0128, 0x0127, 0x0127, 0x0126, 0x0125, 0x0125, 0x0124, - 0x0123, 0x0123, 0x0122, 0x0121, 0x0121, 0x0120, 0x0120, 0x011f, 0x011e, 0x011e, 0x011d, 0x011c, 0x011c, 0x011b, 0x011b, 0x011a, - 0x0119, 0x0119, 0x0118, 0x0118, 0x0117, 0x0116, 0x0116, 0x0115, 0x0115, 0x0114, 0x0113, 0x0113, 0x0112, 0x0112, 0x0111, 0x0111, - 0x0110, 0x010f, 0x010f, 0x010e, 0x010e, 0x010d, 0x010d, 0x010c, 0x010c, 0x010b, 0x010a, 0x010a, 0x0109, 0x0109, 0x0108, 0x0108, - 0x0107, 0x0107, 0x0106, 0x0106, 0x0105, 0x0105, 0x0104, 0x0104, 0x0103, 0x0103, 0x0102, 0x0102, 0x0101, 0x0101, 0x0100, 0x0100, - 0x00ff, 0x00ff, 0x00fe, 0x00fe, 0x00fd, 0x00fd, 0x00fc, 0x00fc, 0x00fb, 0x00fb, 0x00fa, 0x00fa, 0x00f9, 0x00f9, 0x00f8, 0x00f8, - 0x00f7, 0x00f7, 0x00f6, 0x00f6, 0x00f5, 0x00f5, 0x00f4, 0x00f4, 0x00f4, 0x00f3, 0x00f3, 0x00f2, 0x00f2, 0x00f1, 0x00f1, 0x00f0, - 0x00f0, 0x00f0, 0x00ef, 0x00ef, 0x00ee, 0x00ee, 0x00ed, 0x00ed, 0x00ed, 0x00ec, 0x00ec, 0x00eb, 0x00eb, 0x00ea, 0x00ea, 0x00ea, - 0x00e9, 0x00e9, 0x00e8, 0x00e8, 0x00e7, 0x00e7, 0x00e7, 0x00e6, 0x00e6, 0x00e5, 0x00e5, 0x00e5, 0x00e4, 0x00e4, 0x00e3, 0x00e3, - 0x00e3, 0x00e2, 0x00e2, 0x00e1, 0x00e1, 0x00e1, 0x00e0, 0x00e0, 0x00e0, 0x00df, 0x00df, 0x00de, 0x00de, 0x00de, 0x00dd, 0x00dd, - 0x00dd, 0x00dc, 0x00dc, 0x00db, 0x00db, 0x00db, 0x00da, 0x00da, 0x00da, 0x00d9, 0x00d9, 0x00d9, 0x00d8, 0x00d8, 0x00d7, 0x00d7, - 0x00d7, 0x00d6, 0x00d6, 0x00d6, 0x00d5, 0x00d5, 0x00d5, 0x00d4, 0x00d4, 0x00d4, 0x00d3, 0x00d3, 0x00d3, 0x00d2, 0x00d2, 0x00d2, - 0x00d1, 0x00d1, 0x00d1, 0x00d0, 0x00d0, 0x00d0, 0x00cf, 0x00cf, 0x00cf, 0x00ce, 0x00ce, 0x00ce, 0x00cd, 0x00cd, 0x00cd, 0x00cc, - 0x00cc, 0x00cc, 0x00cb, 0x00cb, 0x00cb, 0x00ca, 0x00ca, 0x00ca, 0x00c9, 0x00c9, 0x00c9, 0x00c9, 0x00c8, 0x00c8, 0x00c8, 0x00c7, - 0x00c7, 0x00c7, 0x00c6, 0x00c6, 0x00c6, 0x00c5, 0x00c5, 0x00c5, 0x00c5, 0x00c4, 0x00c4, 0x00c4, 0x00c3, 0x00c3, 0x00c3, 0x00c3, - 0x00c2, 0x00c2, 0x00c2, 0x00c1, 0x00c1, 0x00c1, 0x00c1, 0x00c0, 0x00c0, 0x00c0, 0x00bf, 0x00bf, 0x00bf, 0x00bf, 0x00be, 0x00be, - 0x00be, 0x00bd, 0x00bd, 0x00bd, 0x00bd, 0x00bc, 0x00bc, 0x00bc, 0x00bc, 0x00bb, 0x00bb, 0x00bb, 0x00ba, 0x00ba, 0x00ba, 0x00ba, - 0x00b9, 0x00b9, 0x00b9, 0x00b9, 0x00b8, 0x00b8, 0x00b8, 0x00b8, 0x00b7, 0x00b7, 0x00b7, 0x00b7, 0x00b6, 0x00b6, 0x00b6, 0x00b6, - 0x00b5, 0x00b5, 0x00b5, 0x00b5, 0x00b4, 0x00b4, 0x00b4, 0x00b4, 0x00b3, 0x00b3, 0x00b3, 0x00b3, 0x00b2, 0x00b2, 0x00b2, 0x00b2, - 0x00b1, 0x00b1, 0x00b1, 0x00b1, 0x00b0, 0x00b0, 0x00b0, 0x00b0, 0x00af, 0x00af, 0x00af, 0x00af, 0x00ae, 0x00ae, 0x00ae, 0x00ae, - 0x00ae, 0x00ad, 0x00ad, 0x00ad, 0x00ad, 0x00ac, 0x00ac, 0x00ac, 0x00ac, 0x00ac, 0x00ab, 0x00ab, 0x00ab, 0x00ab, -}; -#endif - - -static tracy_force_inline uint64_t ProcessRGB( const uint8_t* src ) -{ -#ifdef __SSE4_1__ - __m128i px0 = _mm_loadu_si128(((__m128i*)src) + 0); - __m128i px1 = _mm_loadu_si128(((__m128i*)src) + 1); - __m128i px2 = _mm_loadu_si128(((__m128i*)src) + 2); - __m128i px3 = _mm_loadu_si128(((__m128i*)src) + 3); - - __m128i smask = _mm_set1_epi32( 0xF8FCF8 ); - __m128i sd0 = _mm_and_si128( px0, smask ); - __m128i sd1 = _mm_and_si128( px1, smask ); - __m128i sd2 = _mm_and_si128( px2, smask ); - __m128i sd3 = _mm_and_si128( px3, smask ); - - __m128i sc = _mm_shuffle_epi32(sd0, _MM_SHUFFLE(0, 0, 0, 0)); - - __m128i sc0 = _mm_cmpeq_epi8(sd0, sc); - __m128i sc1 = _mm_cmpeq_epi8(sd1, sc); - __m128i sc2 = _mm_cmpeq_epi8(sd2, sc); - __m128i sc3 = _mm_cmpeq_epi8(sd3, sc); - - __m128i sm0 = _mm_and_si128(sc0, sc1); - __m128i sm1 = _mm_and_si128(sc2, sc3); - __m128i sm = _mm_and_si128(sm0, sm1); - - if( _mm_testc_si128(sm, _mm_set1_epi32(-1)) ) - { - return uint64_t( to565( src[0], src[1], src[2] ) ) << 16; - } - - __m128i amask = _mm_set1_epi32( 0xFFFFFF ); - px0 = _mm_and_si128( px0, amask ); - px1 = _mm_and_si128( px1, amask ); - px2 = _mm_and_si128( px2, amask ); - px3 = _mm_and_si128( px3, amask ); - - __m128i min0 = _mm_min_epu8( px0, px1 ); - __m128i min1 = _mm_min_epu8( px2, px3 ); - __m128i min2 = _mm_min_epu8( min0, min1 ); - - __m128i max0 = _mm_max_epu8( px0, px1 ); - __m128i max1 = _mm_max_epu8( px2, px3 ); - __m128i max2 = _mm_max_epu8( max0, max1 ); - - __m128i min3 = _mm_shuffle_epi32( min2, _MM_SHUFFLE( 2, 3, 0, 1 ) ); - __m128i max3 = _mm_shuffle_epi32( max2, _MM_SHUFFLE( 2, 3, 0, 1 ) ); - __m128i min4 = _mm_min_epu8( min2, min3 ); - __m128i max4 = _mm_max_epu8( max2, max3 ); - - __m128i min5 = _mm_shuffle_epi32( min4, _MM_SHUFFLE( 0, 0, 2, 2 ) ); - __m128i max5 = _mm_shuffle_epi32( max4, _MM_SHUFFLE( 0, 0, 2, 2 ) ); - __m128i rmin = _mm_min_epu8( min4, min5 ); - __m128i rmax = _mm_max_epu8( max4, max5 ); - - __m128i range1 = _mm_subs_epu8( rmax, rmin ); - __m128i range2 = _mm_sad_epu8( rmax, rmin ); - - uint32_t vrange = _mm_cvtsi128_si32( range2 ) >> 1; - __m128i range = _mm_set1_epi16( DivTable[vrange] ); - - __m128i inset1 = _mm_srli_epi16( range1, 4 ); - __m128i inset = _mm_and_si128( inset1, _mm_set1_epi8( 0xF ) ); - __m128i min = _mm_adds_epu8( rmin, inset ); - __m128i max = _mm_subs_epu8( rmax, inset ); - - __m128i c0 = _mm_subs_epu8( px0, rmin ); - __m128i c1 = _mm_subs_epu8( px1, rmin ); - __m128i c2 = _mm_subs_epu8( px2, rmin ); - __m128i c3 = _mm_subs_epu8( px3, rmin ); - - __m128i is0 = _mm_maddubs_epi16( c0, _mm_set1_epi8( 1 ) ); - __m128i is1 = _mm_maddubs_epi16( c1, _mm_set1_epi8( 1 ) ); - __m128i is2 = _mm_maddubs_epi16( c2, _mm_set1_epi8( 1 ) ); - __m128i is3 = _mm_maddubs_epi16( c3, _mm_set1_epi8( 1 ) ); - - __m128i s0 = _mm_hadd_epi16( is0, is1 ); - __m128i s1 = _mm_hadd_epi16( is2, is3 ); - - __m128i m0 = _mm_mulhi_epu16( s0, range ); - __m128i m1 = _mm_mulhi_epu16( s1, range ); - - __m128i p0 = _mm_packus_epi16( m0, m1 ); - - __m128i p1 = _mm_or_si128( _mm_srai_epi32( p0, 6 ), _mm_srai_epi32( p0, 12 ) ); - __m128i p2 = _mm_or_si128( _mm_srai_epi32( p0, 18 ), p0 ); - __m128i p3 = _mm_or_si128( p1, p2 ); - __m128i p =_mm_shuffle_epi8( p3, _mm_set1_epi32( 0x0C080400 ) ); - - uint32_t vmin = _mm_cvtsi128_si32( min ); - uint32_t vmax = _mm_cvtsi128_si32( max ); - uint32_t vp = _mm_cvtsi128_si32( p ); - - return uint64_t( ( uint64_t( to565( vmin ) ) << 16 ) | to565( vmax ) | ( uint64_t( vp ) << 32 ) ); -#elif defined __ARM_NEON -# ifdef __aarch64__ - uint8x16x4_t px = vld4q_u8( src ); - - uint8x16_t lr = px.val[0]; - uint8x16_t lg = px.val[1]; - uint8x16_t lb = px.val[2]; - - uint8_t rmaxr = vmaxvq_u8( lr ); - uint8_t rmaxg = vmaxvq_u8( lg ); - uint8_t rmaxb = vmaxvq_u8( lb ); - - uint8_t rminr = vminvq_u8( lr ); - uint8_t rming = vminvq_u8( lg ); - uint8_t rminb = vminvq_u8( lb ); - - int rr = rmaxr - rminr; - int rg = rmaxg - rming; - int rb = rmaxb - rminb; - - int vrange1 = rr + rg + rb; - uint16_t vrange2 = DivTableNEON[vrange1]; - - uint8_t insetr = rr >> 4; - uint8_t insetg = rg >> 4; - uint8_t insetb = rb >> 4; - - uint8_t minr = rminr + insetr; - uint8_t ming = rming + insetg; - uint8_t minb = rminb + insetb; - - uint8_t maxr = rmaxr - insetr; - uint8_t maxg = rmaxg - insetg; - uint8_t maxb = rmaxb - insetb; - - uint8x16_t cr = vsubq_u8( lr, vdupq_n_u8( rminr ) ); - uint8x16_t cg = vsubq_u8( lg, vdupq_n_u8( rming ) ); - uint8x16_t cb = vsubq_u8( lb, vdupq_n_u8( rminb ) ); - - uint16x8_t is0l = vaddl_u8( vget_low_u8( cr ), vget_low_u8( cg ) ); - uint16x8_t is0h = vaddl_u8( vget_high_u8( cr ), vget_high_u8( cg ) ); - uint16x8_t is1l = vaddw_u8( is0l, vget_low_u8( cb ) ); - uint16x8_t is1h = vaddw_u8( is0h, vget_high_u8( cb ) ); - - int16x8_t range = vdupq_n_s16( vrange2 ); - uint16x8_t m0 = vreinterpretq_u16_s16( vqdmulhq_s16( vreinterpretq_s16_u16( is1l ), range ) ); - uint16x8_t m1 = vreinterpretq_u16_s16( vqdmulhq_s16( vreinterpretq_s16_u16( is1h ), range ) ); - - uint8x8_t p00 = vmovn_u16( m0 ); - uint8x8_t p01 = vmovn_u16( m1 ); - uint8x16_t p0 = vcombine_u8( p00, p01 ); - - uint32x4_t p1 = vaddq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 6 ), vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 12 ) ); - uint32x4_t p2 = vaddq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 18 ), vreinterpretq_u32_u8( p0 ) ); - uint32x4_t p3 = vaddq_u32( p1, p2 ); - - uint16x4x2_t p4 = vuzp_u16( vget_low_u16( vreinterpretq_u16_u32( p3 ) ), vget_high_u16( vreinterpretq_u16_u32( p3 ) ) ); - uint8x8x2_t p = vuzp_u8( vreinterpret_u8_u16( p4.val[0] ), vreinterpret_u8_u16( p4.val[0] ) ); - - uint32_t vp; - vst1_lane_u32( &vp, vreinterpret_u32_u8( p.val[0] ), 0 ); - - return uint64_t( ( uint64_t( to565( minr, ming, minb ) ) << 16 ) | to565( maxr, maxg, maxb ) | ( uint64_t( vp ) << 32 ) ); -# else - uint32x4_t px0 = vld1q_u32( (uint32_t*)src ); - uint32x4_t px1 = vld1q_u32( (uint32_t*)src + 4 ); - uint32x4_t px2 = vld1q_u32( (uint32_t*)src + 8 ); - uint32x4_t px3 = vld1q_u32( (uint32_t*)src + 12 ); - - uint32x4_t smask = vdupq_n_u32( 0xF8FCF8 ); - uint32x4_t sd0 = vandq_u32( smask, px0 ); - uint32x4_t sd1 = vandq_u32( smask, px1 ); - uint32x4_t sd2 = vandq_u32( smask, px2 ); - uint32x4_t sd3 = vandq_u32( smask, px3 ); - - uint32x4_t sc = vdupq_n_u32( sd0[0] ); - - uint32x4_t sc0 = vceqq_u32( sd0, sc ); - uint32x4_t sc1 = vceqq_u32( sd1, sc ); - uint32x4_t sc2 = vceqq_u32( sd2, sc ); - uint32x4_t sc3 = vceqq_u32( sd3, sc ); - - uint32x4_t sm0 = vandq_u32( sc0, sc1 ); - uint32x4_t sm1 = vandq_u32( sc2, sc3 ); - int64x2_t sm = vreinterpretq_s64_u32( vandq_u32( sm0, sm1 ) ); - - if( sm[0] == -1 && sm[1] == -1 ) - { - return uint64_t( to565( src[0], src[1], src[2] ) ) << 16; - } - - uint32x4_t mask = vdupq_n_u32( 0xFFFFFF ); - uint8x16_t l0 = vreinterpretq_u8_u32( vandq_u32( mask, px0 ) ); - uint8x16_t l1 = vreinterpretq_u8_u32( vandq_u32( mask, px1 ) ); - uint8x16_t l2 = vreinterpretq_u8_u32( vandq_u32( mask, px2 ) ); - uint8x16_t l3 = vreinterpretq_u8_u32( vandq_u32( mask, px3 ) ); - - uint8x16_t min0 = vminq_u8( l0, l1 ); - uint8x16_t min1 = vminq_u8( l2, l3 ); - uint8x16_t min2 = vminq_u8( min0, min1 ); - - uint8x16_t max0 = vmaxq_u8( l0, l1 ); - uint8x16_t max1 = vmaxq_u8( l2, l3 ); - uint8x16_t max2 = vmaxq_u8( max0, max1 ); - - uint8x16_t min3 = vreinterpretq_u8_u32( vrev64q_u32( vreinterpretq_u32_u8( min2 ) ) ); - uint8x16_t max3 = vreinterpretq_u8_u32( vrev64q_u32( vreinterpretq_u32_u8( max2 ) ) ); - - uint8x16_t min4 = vminq_u8( min2, min3 ); - uint8x16_t max4 = vmaxq_u8( max2, max3 ); - - uint8x16_t min5 = vcombine_u8( vget_high_u8( min4 ), vget_low_u8( min4 ) ); - uint8x16_t max5 = vcombine_u8( vget_high_u8( max4 ), vget_low_u8( max4 ) ); - - uint8x16_t rmin = vminq_u8( min4, min5 ); - uint8x16_t rmax = vmaxq_u8( max4, max5 ); - - uint8x16_t range1 = vsubq_u8( rmax, rmin ); - uint8x8_t range2 = vget_low_u8( range1 ); - uint8x8x2_t range3 = vzip_u8( range2, vdup_n_u8( 0 ) ); - uint16x4_t range4 = vreinterpret_u16_u8( range3.val[0] ); - - uint16_t vrange1; - uint16x4_t range5 = vpadd_u16( range4, range4 ); - uint16x4_t range6 = vpadd_u16( range5, range5 ); - vst1_lane_u16( &vrange1, range6, 0 ); - - uint32_t vrange2 = ( 2 << 16 ) / uint32_t( vrange1 + 1 ); - uint16x8_t range = vdupq_n_u16( vrange2 ); - - uint8x16_t inset = vshrq_n_u8( range1, 4 ); - uint8x16_t min = vaddq_u8( rmin, inset ); - uint8x16_t max = vsubq_u8( rmax, inset ); - - uint8x16_t c0 = vsubq_u8( l0, rmin ); - uint8x16_t c1 = vsubq_u8( l1, rmin ); - uint8x16_t c2 = vsubq_u8( l2, rmin ); - uint8x16_t c3 = vsubq_u8( l3, rmin ); - - uint16x8_t is0 = vpaddlq_u8( c0 ); - uint16x8_t is1 = vpaddlq_u8( c1 ); - uint16x8_t is2 = vpaddlq_u8( c2 ); - uint16x8_t is3 = vpaddlq_u8( c3 ); - - uint16x4_t is4 = vpadd_u16( vget_low_u16( is0 ), vget_high_u16( is0 ) ); - uint16x4_t is5 = vpadd_u16( vget_low_u16( is1 ), vget_high_u16( is1 ) ); - uint16x4_t is6 = vpadd_u16( vget_low_u16( is2 ), vget_high_u16( is2 ) ); - uint16x4_t is7 = vpadd_u16( vget_low_u16( is3 ), vget_high_u16( is3 ) ); - - uint16x8_t s0 = vcombine_u16( is4, is5 ); - uint16x8_t s1 = vcombine_u16( is6, is7 ); - - uint16x8_t m0 = vreinterpretq_u16_s16( vqdmulhq_s16( vreinterpretq_s16_u16( s0 ), vreinterpretq_s16_u16( range ) ) ); - uint16x8_t m1 = vreinterpretq_u16_s16( vqdmulhq_s16( vreinterpretq_s16_u16( s1 ), vreinterpretq_s16_u16( range ) ) ); - - uint8x8_t p00 = vmovn_u16( m0 ); - uint8x8_t p01 = vmovn_u16( m1 ); - uint8x16_t p0 = vcombine_u8( p00, p01 ); - - uint32x4_t p1 = vaddq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 6 ), vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 12 ) ); - uint32x4_t p2 = vaddq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 18 ), vreinterpretq_u32_u8( p0 ) ); - uint32x4_t p3 = vaddq_u32( p1, p2 ); - - uint16x4x2_t p4 = vuzp_u16( vget_low_u16( vreinterpretq_u16_u32( p3 ) ), vget_high_u16( vreinterpretq_u16_u32( p3 ) ) ); - uint8x8x2_t p = vuzp_u8( vreinterpret_u8_u16( p4.val[0] ), vreinterpret_u8_u16( p4.val[0] ) ); - - uint32_t vmin, vmax, vp; - vst1q_lane_u32( &vmin, vreinterpretq_u32_u8( min ), 0 ); - vst1q_lane_u32( &vmax, vreinterpretq_u32_u8( max ), 0 ); - vst1_lane_u32( &vp, vreinterpret_u32_u8( p.val[0] ), 0 ); - - return uint64_t( ( uint64_t( to565( vmin ) ) << 16 ) | to565( vmax ) | ( uint64_t( vp ) << 32 ) ); -# endif -#else - uint32_t ref; - memcpy( &ref, src, 4 ); - uint32_t refMask = ref & 0xF8FCF8; - auto stmp = src + 4; - for( int i=1; i<16; i++ ) - { - uint32_t px; - memcpy( &px, stmp, 4 ); - if( ( px & 0xF8FCF8 ) != refMask ) break; - stmp += 4; - } - if( stmp == src + 64 ) - { - return uint64_t( to565( ref ) ) << 16; - } - - uint8_t min[3] = { src[0], src[1], src[2] }; - uint8_t max[3] = { src[0], src[1], src[2] }; - auto tmp = src + 4; - for( int i=1; i<16; i++ ) - { - for( int j=0; j<3; j++ ) - { - if( tmp[j] < min[j] ) min[j] = tmp[j]; - else if( tmp[j] > max[j] ) max[j] = tmp[j]; - } - tmp += 4; - } - - const uint32_t range = DivTable[max[0] - min[0] + max[1] - min[1] + max[2] - min[2]]; - const uint32_t rmin = min[0] + min[1] + min[2]; - for( int i=0; i<3; i++ ) - { - const uint8_t inset = ( max[i] - min[i] ) >> 4; - min[i] += inset; - max[i] -= inset; - } - - uint32_t data = 0; - for( int i=0; i<16; i++ ) - { - const uint32_t c = src[0] + src[1] + src[2] - rmin; - const uint8_t idx = ( c * range ) >> 16; - data |= idx << (i*2); - src += 4; - } - - return uint64_t( ( uint64_t( to565( min[0], min[1], min[2] ) ) << 16 ) | to565( max[0], max[1], max[2] ) | ( uint64_t( data ) << 32 ) ); -#endif -} - -#ifdef __AVX2__ -static tracy_force_inline void ProcessRGB_AVX( const uint8_t* src, char*& dst ) -{ - __m256i px0 = _mm256_loadu_si256(((__m256i*)src) + 0); - __m256i px1 = _mm256_loadu_si256(((__m256i*)src) + 1); - __m256i px2 = _mm256_loadu_si256(((__m256i*)src) + 2); - __m256i px3 = _mm256_loadu_si256(((__m256i*)src) + 3); - - __m256i smask = _mm256_set1_epi32( 0xF8FCF8 ); - __m256i sd0 = _mm256_and_si256( px0, smask ); - __m256i sd1 = _mm256_and_si256( px1, smask ); - __m256i sd2 = _mm256_and_si256( px2, smask ); - __m256i sd3 = _mm256_and_si256( px3, smask ); - - __m256i sc = _mm256_shuffle_epi32(sd0, _MM_SHUFFLE(0, 0, 0, 0)); - - __m256i sc0 = _mm256_cmpeq_epi8( sd0, sc ); - __m256i sc1 = _mm256_cmpeq_epi8( sd1, sc ); - __m256i sc2 = _mm256_cmpeq_epi8( sd2, sc ); - __m256i sc3 = _mm256_cmpeq_epi8( sd3, sc ); - - __m256i sm0 = _mm256_and_si256( sc0, sc1 ); - __m256i sm1 = _mm256_and_si256( sc2, sc3 ); - __m256i sm = _mm256_and_si256( sm0, sm1 ); - - const int64_t solid0 = 1 - _mm_testc_si128( _mm256_castsi256_si128( sm ), _mm_set1_epi32( -1 ) ); - const int64_t solid1 = 1 - _mm_testc_si128( _mm256_extracti128_si256( sm, 1 ), _mm_set1_epi32( -1 ) ); - - if( solid0 + solid1 == 0 ) - { - const auto c0 = uint64_t( to565( src[0], src[1], src[2] ) ) << 16; - const auto c1 = uint64_t( to565( src[16], src[17], src[18] ) ) << 16; - memcpy( dst, &c0, 8 ); - memcpy( dst+8, &c1, 8 ); - dst += 16; - return; - } - - __m256i amask = _mm256_set1_epi32( 0xFFFFFF ); - px0 = _mm256_and_si256( px0, amask ); - px1 = _mm256_and_si256( px1, amask ); - px2 = _mm256_and_si256( px2, amask ); - px3 = _mm256_and_si256( px3, amask ); - - __m256i min0 = _mm256_min_epu8( px0, px1 ); - __m256i min1 = _mm256_min_epu8( px2, px3 ); - __m256i min2 = _mm256_min_epu8( min0, min1 ); - - __m256i max0 = _mm256_max_epu8( px0, px1 ); - __m256i max1 = _mm256_max_epu8( px2, px3 ); - __m256i max2 = _mm256_max_epu8( max0, max1 ); - - __m256i min3 = _mm256_shuffle_epi32( min2, _MM_SHUFFLE( 2, 3, 0, 1 ) ); - __m256i max3 = _mm256_shuffle_epi32( max2, _MM_SHUFFLE( 2, 3, 0, 1 ) ); - __m256i min4 = _mm256_min_epu8( min2, min3 ); - __m256i max4 = _mm256_max_epu8( max2, max3 ); - - __m256i min5 = _mm256_shuffle_epi32( min4, _MM_SHUFFLE( 0, 0, 2, 2 ) ); - __m256i max5 = _mm256_shuffle_epi32( max4, _MM_SHUFFLE( 0, 0, 2, 2 ) ); - __m256i rmin = _mm256_min_epu8( min4, min5 ); - __m256i rmax = _mm256_max_epu8( max4, max5 ); - - __m256i range1 = _mm256_subs_epu8( rmax, rmin ); - __m256i range2 = _mm256_sad_epu8( rmax, rmin ); - - uint16_t vrange0 = DivTable[_mm256_cvtsi256_si32( range2 ) >> 1]; - uint16_t vrange1 = DivTable[_mm256_extract_epi16( range2, 8 ) >> 1]; - __m256i range00 = _mm256_set1_epi16( vrange0 ); - __m256i range = _mm256_inserti128_si256( range00, _mm_set1_epi16( vrange1 ), 1 ); - - __m256i inset1 = _mm256_srli_epi16( range1, 4 ); - __m256i inset = _mm256_and_si256( inset1, _mm256_set1_epi8( 0xF ) ); - __m256i min = _mm256_adds_epu8( rmin, inset ); - __m256i max = _mm256_subs_epu8( rmax, inset ); - - __m256i c0 = _mm256_subs_epu8( px0, rmin ); - __m256i c1 = _mm256_subs_epu8( px1, rmin ); - __m256i c2 = _mm256_subs_epu8( px2, rmin ); - __m256i c3 = _mm256_subs_epu8( px3, rmin ); - - __m256i is0 = _mm256_maddubs_epi16( c0, _mm256_set1_epi8( 1 ) ); - __m256i is1 = _mm256_maddubs_epi16( c1, _mm256_set1_epi8( 1 ) ); - __m256i is2 = _mm256_maddubs_epi16( c2, _mm256_set1_epi8( 1 ) ); - __m256i is3 = _mm256_maddubs_epi16( c3, _mm256_set1_epi8( 1 ) ); - - __m256i s0 = _mm256_hadd_epi16( is0, is1 ); - __m256i s1 = _mm256_hadd_epi16( is2, is3 ); - - __m256i m0 = _mm256_mulhi_epu16( s0, range ); - __m256i m1 = _mm256_mulhi_epu16( s1, range ); - - __m256i p0 = _mm256_packus_epi16( m0, m1 ); - - __m256i p1 = _mm256_or_si256( _mm256_srai_epi32( p0, 6 ), _mm256_srai_epi32( p0, 12 ) ); - __m256i p2 = _mm256_or_si256( _mm256_srai_epi32( p0, 18 ), p0 ); - __m256i p3 = _mm256_or_si256( p1, p2 ); - __m256i p =_mm256_shuffle_epi8( p3, _mm256_set1_epi32( 0x0C080400 ) ); - - __m256i mm0 = _mm256_unpacklo_epi8( _mm256_setzero_si256(), min ); - __m256i mm1 = _mm256_unpacklo_epi8( _mm256_setzero_si256(), max ); - __m256i mm2 = _mm256_unpacklo_epi64( mm1, mm0 ); - __m256i mmr = _mm256_slli_epi64( _mm256_srli_epi64( mm2, 11 ), 11 ); - __m256i mmg = _mm256_slli_epi64( _mm256_srli_epi64( mm2, 26 ), 5 ); - __m256i mmb = _mm256_srli_epi64( _mm256_slli_epi64( mm2, 16 ), 59 ); - __m256i mm3 = _mm256_or_si256( mmr, mmg ); - __m256i mm4 = _mm256_or_si256( mm3, mmb ); - __m256i mm5 = _mm256_shuffle_epi8( mm4, _mm256_set1_epi32( 0x09080100 ) ); - - __m256i d0 = _mm256_unpacklo_epi32( mm5, p ); - __m256i d1 = _mm256_permute4x64_epi64( d0, _MM_SHUFFLE( 3, 2, 2, 0 ) ); - __m128i d2 = _mm256_castsi256_si128( d1 ); - - __m128i mask = _mm_set_epi64x( 0xFFFF0000 | -solid1, 0xFFFF0000 | -solid0 ); - __m128i d3 = _mm_and_si128( d2, mask ); - _mm_storeu_si128( (__m128i*)dst, d3 ); - dst += 16; -} -#endif - -void CompressImageDxt1( const char* src, char* dst, int w, int h ) -{ - assert( (w % 4) == 0 && (h % 4) == 0 ); - -#ifdef __AVX2__ - if( w%8 == 0 ) - { - uint32_t buf[8*4]; - int i = 0; - - auto blocks = w * h / 32; - do - { - auto tmp = (char*)buf; - memcpy( tmp, src, 8*4 ); - memcpy( tmp + 8*4, src + w * 4, 8*4 ); - memcpy( tmp + 16*4, src + w * 8, 8*4 ); - memcpy( tmp + 24*4, src + w * 12, 8*4 ); - src += 8*4; - if( ++i == w/8 ) - { - src += w * 3 * 4; - i = 0; - } - - ProcessRGB_AVX( (uint8_t*)buf, dst ); - } - while( --blocks ); - } - else -#endif - { - uint32_t buf[4*4]; - int i = 0; - - auto ptr = dst; - auto blocks = w * h / 16; - do - { - auto tmp = (char*)buf; - memcpy( tmp, src, 4*4 ); - memcpy( tmp + 4*4, src + w * 4, 4*4 ); - memcpy( tmp + 8*4, src + w * 8, 4*4 ); - memcpy( tmp + 12*4, src + w * 12, 4*4 ); - src += 4*4; - if( ++i == w/4 ) - { - src += w * 3 * 4; - i = 0; - } - - const auto c = ProcessRGB( (uint8_t*)buf ); - memcpy( ptr, &c, sizeof( uint64_t ) ); - ptr += sizeof( uint64_t ); - } - while( --blocks ); - } -} - -} diff --git a/src/third_party/tracy/client/TracyDxt1.hpp b/src/third_party/tracy/client/TracyDxt1.hpp deleted file mode 100644 index c2313542..00000000 --- a/src/third_party/tracy/client/TracyDxt1.hpp +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef __TRACYDXT1_HPP__ -#define __TRACYDXT1_HPP__ - -namespace tracy -{ - -void CompressImageDxt1( const char* src, char* dst, int w, int h ); - -} - -#endif diff --git a/src/third_party/tracy/client/TracyFastVector.hpp b/src/third_party/tracy/client/TracyFastVector.hpp deleted file mode 100644 index 38accc92..00000000 --- a/src/third_party/tracy/client/TracyFastVector.hpp +++ /dev/null @@ -1,118 +0,0 @@ -#ifndef __TRACYFASTVECTOR_HPP__ -#define __TRACYFASTVECTOR_HPP__ - -#include -#include - -#include "../common/TracyAlloc.hpp" -#include "../common/TracyForceInline.hpp" - -namespace tracy -{ - -template -class FastVector -{ -public: - using iterator = T*; - using const_iterator = const T*; - - FastVector( size_t capacity ) - : m_ptr( (T*)tracy_malloc( sizeof( T ) * capacity ) ) - , m_write( m_ptr ) - , m_end( m_ptr + capacity ) - { - assert( capacity != 0 ); - } - - FastVector( const FastVector& ) = delete; - FastVector( FastVector&& ) = delete; - - ~FastVector() - { - tracy_free( m_ptr ); - } - - FastVector& operator=( const FastVector& ) = delete; - FastVector& operator=( FastVector&& ) = delete; - - bool empty() const { return m_ptr == m_write; } - size_t size() const { return m_write - m_ptr; } - - T* data() { return m_ptr; } - const T* data() const { return m_ptr; }; - - T* begin() { return m_ptr; } - const T* begin() const { return m_ptr; } - T* end() { return m_write; } - const T* end() const { return m_write; } - - T& front() { assert( !empty() ); return m_ptr[0]; } - const T& front() const { assert( !empty() ); return m_ptr[0]; } - - T& back() { assert( !empty() ); return m_write[-1]; } - const T& back() const { assert( !empty() ); return m_write[-1]; } - - T& operator[]( size_t idx ) { return m_ptr[idx]; } - const T& operator[]( size_t idx ) const { return m_ptr[idx]; } - - T* push_next() - { - if( m_write == m_end ) AllocMore(); - return m_write++; - } - - T* prepare_next() - { - if( m_write == m_end ) AllocMore(); - return m_write; - } - - void commit_next() - { - m_write++; - } - - void clear() - { - m_write = m_ptr; - } - - void swap( FastVector& vec ) - { - const auto ptr1 = m_ptr; - const auto ptr2 = vec.m_ptr; - const auto write1 = m_write; - const auto write2 = vec.m_write; - const auto end1 = m_end; - const auto end2 = vec.m_end; - - m_ptr = ptr2; - vec.m_ptr = ptr1; - m_write = write2; - vec.m_write = write1; - m_end = end2; - vec.m_end = end1; - } - -private: - tracy_no_inline void AllocMore() - { - const auto cap = size_t( m_end - m_ptr ) * 2; - const auto size = size_t( m_write - m_ptr ); - T* ptr = (T*)tracy_malloc( sizeof( T ) * cap ); - memcpy( ptr, m_ptr, size * sizeof( T ) ); - tracy_free_fast( m_ptr ); - m_ptr = ptr; - m_write = m_ptr + size; - m_end = m_ptr + cap; - } - - T* m_ptr; - T* m_write; - T* m_end; -}; - -} - -#endif diff --git a/src/third_party/tracy/client/TracyKCore.cpp b/src/third_party/tracy/client/TracyKCore.cpp deleted file mode 100644 index 09d51d11..00000000 --- a/src/third_party/tracy/client/TracyKCore.cpp +++ /dev/null @@ -1,121 +0,0 @@ -#ifdef __linux__ - -#include -#include -#include -#include -#include - -#include "TracyDebug.hpp" -#include "TracyKCore.hpp" -#include "../common/TracyAlloc.hpp" - -#if !defined(__GLIBC__) && !defined(__WORDSIZE) -// include __WORDSIZE headers for musl -# include -#endif - -namespace tracy -{ - -using elf_half = uint16_t; -using elf_word = uint32_t; -using elf_sword = int32_t; - -#if __WORDSIZE == 32 - using elf_addr = uint32_t; - using elf_off = uint32_t; - using elf_xword = uint32_t; -#else - using elf_addr = uint64_t; - using elf_off = uint64_t; - using elf_xword = uint64_t; -#endif - -struct elf_ehdr -{ - unsigned char e_ident[16]; - elf_half e_type; - elf_half e_machine; - elf_word e_version; - elf_addr e_entry; - elf_off e_phoff; - elf_off e_shoff; - elf_word e_flags; - elf_half e_ehsize; - elf_half e_phentsize; - elf_half e_phnum; - elf_half e_shentsize; - elf_half e_shnum; - elf_half e_shstrndx; -}; - -struct elf_phdr -{ - elf_word p_type; - elf_word p_flags; - elf_off p_offset; - elf_addr p_vaddr; - elf_addr p_paddr; - elf_xword p_filesz; - elf_xword p_memsz; - uint64_t p_align; // include 32-bit-only flags field for 32-bit compatibility -}; - -KCore::KCore() - : m_offsets( 16 ) -{ - m_fd = open( "/proc/kcore", O_RDONLY ); - if( m_fd == -1 ) return; - - elf_ehdr ehdr; - if( read( m_fd, &ehdr, sizeof( ehdr ) ) != sizeof( ehdr ) ) goto err; - - assert( ehdr.e_phentsize == sizeof( elf_phdr ) ); - - for( elf_half i=0; istart = phdr.p_vaddr; - ptr->size = phdr.p_memsz; - ptr->offset = phdr.p_offset; - } - - std::sort( m_offsets.begin(), m_offsets.end(), []( const Offset& lhs, const Offset& rhs ) { return lhs.start < rhs.start; } ); - TracyDebug( "KCore: %zu segments found\n", m_offsets.size() ); - return; - -err: - close( m_fd ); - m_fd = -1; -} - -KCore::~KCore() -{ - if( m_fd != -1 ) close( m_fd ); -} - -void* KCore::Retrieve( uint64_t addr, uint64_t size ) const -{ - if( m_fd == -1 ) return nullptr; - auto it = std::lower_bound( m_offsets.begin(), m_offsets.end(), addr, []( const Offset& lhs, uint64_t rhs ) { return lhs.start + lhs.size < rhs; } ); - if( it == m_offsets.end() ) return nullptr; - if( addr + size > it->start + it->size ) return nullptr; - if( lseek( m_fd, it->offset + addr - it->start, SEEK_SET ) == -1 ) return nullptr; - auto ptr = tracy_malloc( size ); - if( read( m_fd, ptr, size ) != ssize_t( size ) ) - { - tracy_free( ptr ); - return nullptr; - } - return ptr; -} - -} - -#endif \ No newline at end of file diff --git a/src/third_party/tracy/client/TracyKCore.hpp b/src/third_party/tracy/client/TracyKCore.hpp deleted file mode 100644 index 437e172c..00000000 --- a/src/third_party/tracy/client/TracyKCore.hpp +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef __TRACYKCORE_HPP__ -#define __TRACYKCORE_HPP__ - -#ifdef __linux__ - -#include - -#include "TracyFastVector.hpp" - -namespace tracy -{ - -class KCore -{ - struct Offset - { - uint64_t start; - uint64_t size; - uint64_t offset; - }; - -public: - KCore(); - ~KCore(); - - void* Retrieve( uint64_t addr, uint64_t size ) const; - -private: - int m_fd; - FastVector m_offsets; -}; - -} - -#endif - -#endif diff --git a/src/third_party/tracy/client/TracyLock.hpp b/src/third_party/tracy/client/TracyLock.hpp deleted file mode 100644 index d12a3c16..00000000 --- a/src/third_party/tracy/client/TracyLock.hpp +++ /dev/null @@ -1,546 +0,0 @@ -#ifndef __TRACYLOCK_HPP__ -#define __TRACYLOCK_HPP__ - -#include -#include - -#include "../common/TracySystem.hpp" -#include "../common/TracyAlign.hpp" -#include "TracyProfiler.hpp" - -namespace tracy -{ - -class LockableCtx -{ -public: - tracy_force_inline LockableCtx( const SourceLocationData* srcloc ) - : m_id( GetLockCounter().fetch_add( 1, std::memory_order_relaxed ) ) -#ifdef TRACY_ON_DEMAND - , m_lockCount( 0 ) - , m_active( false ) -#endif - { - assert( m_id != (std::numeric_limits::max)() ); - - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::LockAnnounce ); - MemWrite( &item->lockAnnounce.id, m_id ); - MemWrite( &item->lockAnnounce.time, Profiler::GetTime() ); - MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc ); - MemWrite( &item->lockAnnounce.type, LockType::Lockable ); -#ifdef TRACY_ON_DEMAND - GetProfiler().DeferItem( *item ); -#endif - Profiler::QueueSerialFinish(); - } - - LockableCtx( const LockableCtx& ) = delete; - LockableCtx& operator=( const LockableCtx& ) = delete; - - tracy_force_inline ~LockableCtx() - { - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::LockTerminate ); - MemWrite( &item->lockTerminate.id, m_id ); - MemWrite( &item->lockTerminate.time, Profiler::GetTime() ); -#ifdef TRACY_ON_DEMAND - GetProfiler().DeferItem( *item ); -#endif - Profiler::QueueSerialFinish(); - } - - tracy_force_inline bool BeforeLock() - { -#ifdef TRACY_ON_DEMAND - bool queue = false; - const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed ); - const auto active = m_active.load( std::memory_order_relaxed ); - if( locks == 0 || active ) - { - const bool connected = GetProfiler().IsConnected(); - if( active != connected ) m_active.store( connected, std::memory_order_relaxed ); - if( connected ) queue = true; - } - if( !queue ) return false; -#endif - - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::LockWait ); - MemWrite( &item->lockWait.thread, GetThreadHandle() ); - MemWrite( &item->lockWait.id, m_id ); - MemWrite( &item->lockWait.time, Profiler::GetTime() ); - Profiler::QueueSerialFinish(); - return true; - } - - tracy_force_inline void AfterLock() - { - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::LockObtain ); - MemWrite( &item->lockObtain.thread, GetThreadHandle() ); - MemWrite( &item->lockObtain.id, m_id ); - MemWrite( &item->lockObtain.time, Profiler::GetTime() ); - Profiler::QueueSerialFinish(); - } - - tracy_force_inline void AfterUnlock() - { -#ifdef TRACY_ON_DEMAND - m_lockCount.fetch_sub( 1, std::memory_order_relaxed ); - if( !m_active.load( std::memory_order_relaxed ) ) return; - if( !GetProfiler().IsConnected() ) - { - m_active.store( false, std::memory_order_relaxed ); - return; - } -#endif - - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::LockRelease ); - MemWrite( &item->lockRelease.id, m_id ); - MemWrite( &item->lockRelease.time, Profiler::GetTime() ); - Profiler::QueueSerialFinish(); - } - - tracy_force_inline void AfterTryLock( bool acquired ) - { -#ifdef TRACY_ON_DEMAND - if( !acquired ) return; - - bool queue = false; - const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed ); - const auto active = m_active.load( std::memory_order_relaxed ); - if( locks == 0 || active ) - { - const bool connected = GetProfiler().IsConnected(); - if( active != connected ) m_active.store( connected, std::memory_order_relaxed ); - if( connected ) queue = true; - } - if( !queue ) return; -#endif - - if( acquired ) - { - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::LockObtain ); - MemWrite( &item->lockObtain.thread, GetThreadHandle() ); - MemWrite( &item->lockObtain.id, m_id ); - MemWrite( &item->lockObtain.time, Profiler::GetTime() ); - Profiler::QueueSerialFinish(); - } - } - - tracy_force_inline void Mark( const SourceLocationData* srcloc ) - { -#ifdef TRACY_ON_DEMAND - const auto active = m_active.load( std::memory_order_relaxed ); - if( !active ) return; - const auto connected = GetProfiler().IsConnected(); - if( !connected ) - { - if( active ) m_active.store( false, std::memory_order_relaxed ); - return; - } -#endif - - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::LockMark ); - MemWrite( &item->lockMark.thread, GetThreadHandle() ); - MemWrite( &item->lockMark.id, m_id ); - MemWrite( &item->lockMark.srcloc, (uint64_t)srcloc ); - Profiler::QueueSerialFinish(); - } - - tracy_force_inline void CustomName( const char* name, size_t size ) - { - assert( size < (std::numeric_limits::max)() ); - auto ptr = (char*)tracy_malloc( size ); - memcpy( ptr, name, size ); - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::LockName ); - MemWrite( &item->lockNameFat.id, m_id ); - MemWrite( &item->lockNameFat.name, (uint64_t)ptr ); - MemWrite( &item->lockNameFat.size, (uint16_t)size ); -#ifdef TRACY_ON_DEMAND - GetProfiler().DeferItem( *item ); -#endif - Profiler::QueueSerialFinish(); - } - -private: - uint32_t m_id; - -#ifdef TRACY_ON_DEMAND - std::atomic m_lockCount; - std::atomic m_active; -#endif -}; - -template -class Lockable -{ -public: - tracy_force_inline Lockable( const SourceLocationData* srcloc ) - : m_ctx( srcloc ) - { - } - - Lockable( const Lockable& ) = delete; - Lockable& operator=( const Lockable& ) = delete; - - tracy_force_inline void lock() - { - const auto runAfter = m_ctx.BeforeLock(); - m_lockable.lock(); - if( runAfter ) m_ctx.AfterLock(); - } - - tracy_force_inline void unlock() - { - m_lockable.unlock(); - m_ctx.AfterUnlock(); - } - - tracy_force_inline bool try_lock() - { - const auto acquired = m_lockable.try_lock(); - m_ctx.AfterTryLock( acquired ); - return acquired; - } - - tracy_force_inline void Mark( const SourceLocationData* srcloc ) - { - m_ctx.Mark( srcloc ); - } - - tracy_force_inline void CustomName( const char* name, size_t size ) - { - m_ctx.CustomName( name, size ); - } - -private: - T m_lockable; - LockableCtx m_ctx; -}; - - -class SharedLockableCtx -{ -public: - tracy_force_inline SharedLockableCtx( const SourceLocationData* srcloc ) - : m_id( GetLockCounter().fetch_add( 1, std::memory_order_relaxed ) ) -#ifdef TRACY_ON_DEMAND - , m_lockCount( 0 ) - , m_active( false ) -#endif - { - assert( m_id != (std::numeric_limits::max)() ); - - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::LockAnnounce ); - MemWrite( &item->lockAnnounce.id, m_id ); - MemWrite( &item->lockAnnounce.time, Profiler::GetTime() ); - MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc ); - MemWrite( &item->lockAnnounce.type, LockType::SharedLockable ); -#ifdef TRACY_ON_DEMAND - GetProfiler().DeferItem( *item ); -#endif - Profiler::QueueSerialFinish(); - } - - SharedLockableCtx( const SharedLockableCtx& ) = delete; - SharedLockableCtx& operator=( const SharedLockableCtx& ) = delete; - - tracy_force_inline ~SharedLockableCtx() - { - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::LockTerminate ); - MemWrite( &item->lockTerminate.id, m_id ); - MemWrite( &item->lockTerminate.time, Profiler::GetTime() ); -#ifdef TRACY_ON_DEMAND - GetProfiler().DeferItem( *item ); -#endif - Profiler::QueueSerialFinish(); - } - - tracy_force_inline bool BeforeLock() - { -#ifdef TRACY_ON_DEMAND - bool queue = false; - const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed ); - const auto active = m_active.load( std::memory_order_relaxed ); - if( locks == 0 || active ) - { - const bool connected = GetProfiler().IsConnected(); - if( active != connected ) m_active.store( connected, std::memory_order_relaxed ); - if( connected ) queue = true; - } - if( !queue ) return false; -#endif - - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::LockWait ); - MemWrite( &item->lockWait.thread, GetThreadHandle() ); - MemWrite( &item->lockWait.id, m_id ); - MemWrite( &item->lockWait.time, Profiler::GetTime() ); - Profiler::QueueSerialFinish(); - return true; - } - - tracy_force_inline void AfterLock() - { - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::LockObtain ); - MemWrite( &item->lockObtain.thread, GetThreadHandle() ); - MemWrite( &item->lockObtain.id, m_id ); - MemWrite( &item->lockObtain.time, Profiler::GetTime() ); - Profiler::QueueSerialFinish(); - } - - tracy_force_inline void AfterUnlock() - { -#ifdef TRACY_ON_DEMAND - m_lockCount.fetch_sub( 1, std::memory_order_relaxed ); - if( !m_active.load( std::memory_order_relaxed ) ) return; - if( !GetProfiler().IsConnected() ) - { - m_active.store( false, std::memory_order_relaxed ); - return; - } -#endif - - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::LockRelease ); - MemWrite( &item->lockRelease.id, m_id ); - MemWrite( &item->lockRelease.time, Profiler::GetTime() ); - Profiler::QueueSerialFinish(); - } - - tracy_force_inline void AfterTryLock( bool acquired ) - { -#ifdef TRACY_ON_DEMAND - if( !acquired ) return; - - bool queue = false; - const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed ); - const auto active = m_active.load( std::memory_order_relaxed ); - if( locks == 0 || active ) - { - const bool connected = GetProfiler().IsConnected(); - if( active != connected ) m_active.store( connected, std::memory_order_relaxed ); - if( connected ) queue = true; - } - if( !queue ) return; -#endif - - if( acquired ) - { - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::LockObtain ); - MemWrite( &item->lockObtain.thread, GetThreadHandle() ); - MemWrite( &item->lockObtain.id, m_id ); - MemWrite( &item->lockObtain.time, Profiler::GetTime() ); - Profiler::QueueSerialFinish(); - } - } - - tracy_force_inline bool BeforeLockShared() - { -#ifdef TRACY_ON_DEMAND - bool queue = false; - const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed ); - const auto active = m_active.load( std::memory_order_relaxed ); - if( locks == 0 || active ) - { - const bool connected = GetProfiler().IsConnected(); - if( active != connected ) m_active.store( connected, std::memory_order_relaxed ); - if( connected ) queue = true; - } - if( !queue ) return false; -#endif - - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::LockSharedWait ); - MemWrite( &item->lockWait.thread, GetThreadHandle() ); - MemWrite( &item->lockWait.id, m_id ); - MemWrite( &item->lockWait.time, Profiler::GetTime() ); - Profiler::QueueSerialFinish(); - return true; - } - - tracy_force_inline void AfterLockShared() - { - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::LockSharedObtain ); - MemWrite( &item->lockObtain.thread, GetThreadHandle() ); - MemWrite( &item->lockObtain.id, m_id ); - MemWrite( &item->lockObtain.time, Profiler::GetTime() ); - Profiler::QueueSerialFinish(); - } - - tracy_force_inline void AfterUnlockShared() - { -#ifdef TRACY_ON_DEMAND - m_lockCount.fetch_sub( 1, std::memory_order_relaxed ); - if( !m_active.load( std::memory_order_relaxed ) ) return; - if( !GetProfiler().IsConnected() ) - { - m_active.store( false, std::memory_order_relaxed ); - return; - } -#endif - - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::LockSharedRelease ); - MemWrite( &item->lockReleaseShared.thread, GetThreadHandle() ); - MemWrite( &item->lockReleaseShared.id, m_id ); - MemWrite( &item->lockReleaseShared.time, Profiler::GetTime() ); - Profiler::QueueSerialFinish(); - } - - tracy_force_inline void AfterTryLockShared( bool acquired ) - { -#ifdef TRACY_ON_DEMAND - if( !acquired ) return; - - bool queue = false; - const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed ); - const auto active = m_active.load( std::memory_order_relaxed ); - if( locks == 0 || active ) - { - const bool connected = GetProfiler().IsConnected(); - if( active != connected ) m_active.store( connected, std::memory_order_relaxed ); - if( connected ) queue = true; - } - if( !queue ) return; -#endif - - if( acquired ) - { - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::LockSharedObtain ); - MemWrite( &item->lockObtain.thread, GetThreadHandle() ); - MemWrite( &item->lockObtain.id, m_id ); - MemWrite( &item->lockObtain.time, Profiler::GetTime() ); - Profiler::QueueSerialFinish(); - } - } - - tracy_force_inline void Mark( const SourceLocationData* srcloc ) - { -#ifdef TRACY_ON_DEMAND - const auto active = m_active.load( std::memory_order_relaxed ); - if( !active ) return; - const auto connected = GetProfiler().IsConnected(); - if( !connected ) - { - if( active ) m_active.store( false, std::memory_order_relaxed ); - return; - } -#endif - - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::LockMark ); - MemWrite( &item->lockMark.thread, GetThreadHandle() ); - MemWrite( &item->lockMark.id, m_id ); - MemWrite( &item->lockMark.srcloc, (uint64_t)srcloc ); - Profiler::QueueSerialFinish(); - } - - tracy_force_inline void CustomName( const char* name, size_t size ) - { - assert( size < (std::numeric_limits::max)() ); - auto ptr = (char*)tracy_malloc( size ); - memcpy( ptr, name, size ); - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::LockName ); - MemWrite( &item->lockNameFat.id, m_id ); - MemWrite( &item->lockNameFat.name, (uint64_t)ptr ); - MemWrite( &item->lockNameFat.size, (uint16_t)size ); -#ifdef TRACY_ON_DEMAND - GetProfiler().DeferItem( *item ); -#endif - Profiler::QueueSerialFinish(); - } - -private: - uint32_t m_id; - -#ifdef TRACY_ON_DEMAND - std::atomic m_lockCount; - std::atomic m_active; -#endif -}; - -template -class SharedLockable -{ -public: - tracy_force_inline SharedLockable( const SourceLocationData* srcloc ) - : m_ctx( srcloc ) - { - } - - SharedLockable( const SharedLockable& ) = delete; - SharedLockable& operator=( const SharedLockable& ) = delete; - - tracy_force_inline void lock() - { - const auto runAfter = m_ctx.BeforeLock(); - m_lockable.lock(); - if( runAfter ) m_ctx.AfterLock(); - } - - tracy_force_inline void unlock() - { - m_lockable.unlock(); - m_ctx.AfterUnlock(); - } - - tracy_force_inline bool try_lock() - { - const auto acquired = m_lockable.try_lock(); - m_ctx.AfterTryLock( acquired ); - return acquired; - } - - tracy_force_inline void lock_shared() - { - const auto runAfter = m_ctx.BeforeLockShared(); - m_lockable.lock_shared(); - if( runAfter ) m_ctx.AfterLockShared(); - } - - tracy_force_inline void unlock_shared() - { - m_lockable.unlock_shared(); - m_ctx.AfterUnlockShared(); - } - - tracy_force_inline bool try_lock_shared() - { - const auto acquired = m_lockable.try_lock_shared(); - m_ctx.AfterTryLockShared( acquired ); - return acquired; - } - - tracy_force_inline void Mark( const SourceLocationData* srcloc ) - { - m_ctx.Mark( srcloc ); - } - - tracy_force_inline void CustomName( const char* name, size_t size ) - { - m_ctx.CustomName( name, size ); - } - -private: - T m_lockable; - SharedLockableCtx m_ctx; -}; - - -} - -#endif diff --git a/src/third_party/tracy/client/TracyOverride.cpp b/src/third_party/tracy/client/TracyOverride.cpp deleted file mode 100644 index 591508a7..00000000 --- a/src/third_party/tracy/client/TracyOverride.cpp +++ /dev/null @@ -1,26 +0,0 @@ -#ifdef TRACY_ENABLE -# ifdef __linux__ -# include "TracyDebug.hpp" -# ifdef TRACY_VERBOSE -# include -# include -# endif - -extern "C" int dlclose( void* hnd ) -{ -#ifdef TRACY_VERBOSE - struct link_map* lm; - if( dlinfo( hnd, RTLD_DI_LINKMAP, &lm ) == 0 ) - { - TracyDebug( "Overriding dlclose for %s\n", lm->l_name ); - } - else - { - TracyDebug( "Overriding dlclose for unknown object (%s)\n", dlerror() ); - } -#endif - return 0; -} - -# endif -#endif diff --git a/src/third_party/tracy/client/TracyProfiler.cpp b/src/third_party/tracy/client/TracyProfiler.cpp deleted file mode 100644 index 02a9bbb7..00000000 --- a/src/third_party/tracy/client/TracyProfiler.cpp +++ /dev/null @@ -1,5013 +0,0 @@ -#ifdef TRACY_ENABLE - -#ifdef _WIN32 -# ifndef NOMINMAX -# define NOMINMAX -# endif -# include -# include -# include -# include -# include -# include "../common/TracyUwp.hpp" -#else -# include -# include -#endif - -#ifdef _GNU_SOURCE -# include -#endif - -#ifdef __linux__ -# include -# include -# include -# include -#endif - -#if defined __APPLE__ || defined BSD -# include -# include -#endif - -#if defined __APPLE__ -# include "TargetConditionals.h" -# include -#endif - -#ifdef __ANDROID__ -# include -# include -# include -# include -# include -# include -#endif - -#ifdef __QNX__ -# include -# include -# include -# include -# include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../common/TracyAlign.hpp" -#include "../common/TracyAlloc.hpp" -#include "../common/TracySocket.hpp" -#include "../common/TracySystem.hpp" -#include "../common/TracyYield.hpp" -#include "../common/tracy_lz4.hpp" -#include "tracy_rpmalloc.hpp" -#include "TracyCallstack.hpp" -#include "TracyDebug.hpp" -#include "TracyDxt1.hpp" -#include "TracyScoped.hpp" -#include "TracyProfiler.hpp" -#include "TracyThread.hpp" -#include "TracyArmCpuTable.hpp" -#include "TracySysTrace.hpp" -#include "TracyLock.hpp" -#include "../tracy/TracyC.h" - -#if defined TRACY_MANUAL_LIFETIME && !defined(TRACY_DELAYED_INIT) -# error "TRACY_MANUAL_LIFETIME requires enabled TRACY_DELAYED_INIT" -#endif - -#ifdef TRACY_PORT -# ifndef TRACY_DATA_PORT -# define TRACY_DATA_PORT TRACY_PORT -# endif -# ifndef TRACY_BROADCAST_PORT -# define TRACY_BROADCAST_PORT TRACY_PORT -# endif -#endif - -#ifdef __APPLE__ -# ifndef TRACY_DELAYED_INIT -# define TRACY_DELAYED_INIT -# endif -#else -# ifdef __GNUC__ -# define init_order( val ) __attribute__ ((init_priority(val))) -# else -# define init_order(x) -# endif -#endif - -#if defined _WIN32 -# include -extern "C" typedef LONG (WINAPI *t_RtlGetVersion)( PRTL_OSVERSIONINFOW ); -extern "C" typedef BOOL (WINAPI *t_GetLogicalProcessorInformationEx)( LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD ); -extern "C" typedef char* (WINAPI *t_WineGetVersion)(); -extern "C" typedef char* (WINAPI *t_WineGetBuildId)(); -#else -# include -# include -# include -#endif -#if defined __linux__ -# include -# include -#endif - -#if !defined _WIN32 && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) -# include "TracyCpuid.hpp" -#endif - -#if !( ( defined _WIN32 && _WIN32_WINNT >= _WIN32_WINNT_VISTA ) || defined __linux__ ) -# include -#endif - -#ifdef __QNX__ -extern char* __progname; -#endif - -namespace tracy -{ - -#ifdef __ANDROID__ -// Implementation helpers of EnsureReadable(address). -// This is so far only needed on Android, where it is common for libraries to be mapped -// with only executable, not readable, permissions. Typical example (line from /proc/self/maps): -/* -746b63b000-746b6dc000 --xp 00042000 07:48 35 /apex/com.android.runtime/lib64/bionic/libc.so -*/ -// See https://github.com/wolfpld/tracy/issues/125 . -// To work around this, we parse /proc/self/maps and we use mprotect to set read permissions -// on any mappings that contain symbols addresses hit by HandleSymbolCodeQuery. - -namespace { -// Holds some information about a single memory mapping. -struct MappingInfo { - // Start of address range. Inclusive. - uintptr_t start_address; - // End of address range. Exclusive, so the mapping is the half-open interval - // [start, end) and its length in bytes is `end - start`. As in /proc/self/maps. - uintptr_t end_address; - // Read/Write/Executable permissions. - bool perm_r, perm_w, perm_x; -}; -} // anonymous namespace - - // Internal implementation helper for LookUpMapping(address). - // - // Parses /proc/self/maps returning a vector. - // /proc/self/maps is assumed to be sorted by ascending address, so the resulting - // vector is sorted by ascending address too. -static std::vector ParseMappings() -{ - std::vector result; - FILE* file = fopen( "/proc/self/maps", "r" ); - if( !file ) return result; - char line[1024]; - while( fgets( line, sizeof( line ), file ) ) - { - uintptr_t start_addr; - uintptr_t end_addr; -#if defined(__LP64__) - if( sscanf( line, "%lx-%lx", &start_addr, &end_addr ) != 2 ) continue; -#else - if (sscanf( line, "%dx-%dx", &start_addr, &end_addr ) != 2 ) continue; -#endif - char* first_space = strchr( line, ' ' ); - if( !first_space ) continue; - char* perm = first_space + 1; - char* second_space = strchr( perm, ' ' ); - if( !second_space || second_space - perm != 4 ) continue; - result.emplace_back(); - auto& mapping = result.back(); - mapping.start_address = start_addr; - mapping.end_address = end_addr; - mapping.perm_r = perm[0] == 'r'; - mapping.perm_w = perm[1] == 'w'; - mapping.perm_x = perm[2] == 'x'; - } - fclose( file ); - return result; -} - -// Internal implementation helper for LookUpMapping(address). -// -// Takes as input an `address` and a known vector `mappings`, assumed to be -// sorted by increasing addresses, as /proc/self/maps seems to be. -// Returns a pointer to the MappingInfo describing the mapping that this -// address belongs to, or nullptr if the address isn't in `mappings`. -static MappingInfo* LookUpMapping(std::vector& mappings, uintptr_t address) -{ - // Comparison function for std::lower_bound. Returns true if all addresses in `m1` - // are lower than `addr`. - auto Compare = []( const MappingInfo& m1, uintptr_t addr ) { - // '<=' because the address ranges are half-open intervals, [start, end). - return m1.end_address <= addr; - }; - auto iter = std::lower_bound( mappings.begin(), mappings.end(), address, Compare ); - if( iter == mappings.end() || iter->start_address > address) { - return nullptr; - } - return &*iter; -} - -// Internal implementation helper for EnsureReadable(address). -// -// Takes as input an `address` and returns a pointer to a MappingInfo -// describing the mapping that this address belongs to, or nullptr if -// the address isn't in any known mapping. -// -// This function is stateful and not reentrant (assumes to be called from -// only one thread). It holds a vector of mappings parsed from /proc/self/maps. -// -// Attempts to react to mappings changes by re-parsing /proc/self/maps. -static MappingInfo* LookUpMapping(uintptr_t address) -{ - // Static state managed by this function. Not constant, we mutate that state as - // we turn some mappings readable. Initially parsed once here, updated as needed below. - static std::vector s_mappings = ParseMappings(); - MappingInfo* mapping = LookUpMapping( s_mappings, address ); - if( mapping ) return mapping; - - // This address isn't in any known mapping. Try parsing again, maybe - // mappings changed. - s_mappings = ParseMappings(); - return LookUpMapping( s_mappings, address ); -} - -// Internal implementation helper for EnsureReadable(address). -// -// Attempts to make the specified `mapping` readable if it isn't already. -// Returns true if and only if the mapping is readable. -static bool EnsureReadable( MappingInfo& mapping ) -{ - if( mapping.perm_r ) - { - // The mapping is already readable. - return true; - } - int prot = PROT_READ; - if( mapping.perm_w ) prot |= PROT_WRITE; - if( mapping.perm_x ) prot |= PROT_EXEC; - if( mprotect( reinterpret_cast( mapping.start_address ), - mapping.end_address - mapping.start_address, prot ) == -1 ) - { - // Failed to make the mapping readable. Shouldn't happen, hasn't - // been observed yet. If it happened in practice, we should consider - // adding a bool to MappingInfo to track this to avoid retrying mprotect - // everytime on such mappings. - return false; - } - // The mapping is now readable. Update `mapping` so the next call will be fast. - mapping.perm_r = true; - return true; -} - -// Attempts to set the read permission on the entire mapping containing the -// specified address. Returns true if and only if the mapping is now readable. -static bool EnsureReadable( uintptr_t address ) -{ - MappingInfo* mapping = LookUpMapping(address); - return mapping && EnsureReadable( *mapping ); -} -#elif defined WIN32 -static bool EnsureReadable( uintptr_t address ) -{ - MEMORY_BASIC_INFORMATION memInfo; - VirtualQuery( reinterpret_cast( address ), &memInfo, sizeof( memInfo ) ); - return memInfo.Protect != PAGE_NOACCESS; -} -#else -static bool EnsureReadable( uintptr_t address ) -{ - return true; -} -#endif - -#ifndef TRACY_DELAYED_INIT - -struct InitTimeWrapper -{ - int64_t val; -}; - -struct ProducerWrapper -{ - tracy::moodycamel::ConcurrentQueue::ExplicitProducer* ptr; -}; - -struct ThreadHandleWrapper -{ - uint32_t val; -}; -#endif - - -#if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 -static inline void CpuId( uint32_t* regs, uint32_t leaf ) -{ - memset(regs, 0, sizeof(uint32_t) * 4); -#if defined _MSC_VER - __cpuidex( (int*)regs, leaf, 0 ); -#else - __get_cpuid( leaf, regs, regs+1, regs+2, regs+3 ); -#endif -} - -static void InitFailure( const char* msg ) -{ -#if defined _WIN32 - bool hasConsole = false; - bool reopen = false; - const auto attached = AttachConsole( ATTACH_PARENT_PROCESS ); - if( attached ) - { - hasConsole = true; - reopen = true; - } - else - { - const auto err = GetLastError(); - if( err == ERROR_ACCESS_DENIED ) - { - hasConsole = true; - } - } - if( hasConsole ) - { - fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg ); - if( reopen ) - { - freopen( "CONOUT$", "w", stderr ); - fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg ); - } - } - else - { -# ifndef TRACY_UWP - MessageBoxA( nullptr, msg, "Tracy Profiler initialization failure", MB_ICONSTOP ); -# endif - } -#else - fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg ); -#endif - exit( 1 ); -} - -static bool CheckHardwareSupportsInvariantTSC() -{ - const char* noCheck = GetEnvVar( "TRACY_NO_INVARIANT_CHECK" ); - if( noCheck && noCheck[0] == '1' ) return true; - - uint32_t regs[4]; - CpuId( regs, 1 ); - if( !( regs[3] & ( 1 << 4 ) ) ) - { -#if !defined TRACY_TIMER_QPC && !defined TRACY_TIMER_FALLBACK - InitFailure( "CPU doesn't support RDTSC instruction." ); -#else - return false; -#endif - } - CpuId( regs, 0x80000007 ); - if( regs[3] & ( 1 << 8 ) ) return true; - - return false; -} - -#if defined TRACY_TIMER_FALLBACK && defined TRACY_HW_TIMER -bool HardwareSupportsInvariantTSC() -{ - static bool cachedResult = CheckHardwareSupportsInvariantTSC(); - return cachedResult; -} -#endif - -static int64_t SetupHwTimer() -{ -#if !defined TRACY_TIMER_QPC && !defined TRACY_TIMER_FALLBACK - if( !CheckHardwareSupportsInvariantTSC() ) - { -#if defined _WIN32 - InitFailure( "CPU doesn't support invariant TSC.\nDefine TRACY_NO_INVARIANT_CHECK=1 to ignore this error, *if you know what you are doing*.\nAlternatively you may rebuild the application with the TRACY_TIMER_QPC or TRACY_TIMER_FALLBACK define to use lower resolution timer." ); -#else - InitFailure( "CPU doesn't support invariant TSC.\nDefine TRACY_NO_INVARIANT_CHECK=1 to ignore this error, *if you know what you are doing*.\nAlternatively you may rebuild the application with the TRACY_TIMER_FALLBACK define to use lower resolution timer." ); -#endif - } -#endif - - return Profiler::GetTime(); -} -#else -static int64_t SetupHwTimer() -{ - return Profiler::GetTime(); -} -#endif - -static const char* GetProcessName() -{ - const char* processName = "unknown"; -#ifdef _WIN32 - static char buf[_MAX_PATH]; - GetModuleFileNameA( nullptr, buf, _MAX_PATH ); - const char* ptr = buf; - while( *ptr != '\0' ) ptr++; - while( ptr > buf && *ptr != '\\' && *ptr != '/' ) ptr--; - if( ptr > buf ) ptr++; - processName = ptr; -#elif defined __ANDROID__ -# if __ANDROID_API__ >= 21 - auto buf = getprogname(); - if( buf ) processName = buf; -# endif -#elif defined __linux__ && defined _GNU_SOURCE - if( program_invocation_short_name ) processName = program_invocation_short_name; -#elif defined __APPLE__ || defined BSD - auto buf = getprogname(); - if( buf ) processName = buf; -#elif defined __QNX__ - processName = __progname; -#endif - return processName; -} - -static const char* GetProcessExecutablePath() -{ -#ifdef _WIN32 - static char buf[_MAX_PATH]; - GetModuleFileNameA( nullptr, buf, _MAX_PATH ); - return buf; -#elif defined __ANDROID__ - return nullptr; -#elif defined __linux__ && defined _GNU_SOURCE - return program_invocation_name; -#elif defined __APPLE__ - static char buf[1024]; - uint32_t size = 1024; - _NSGetExecutablePath( buf, &size ); - return buf; -#elif defined __DragonFly__ - static char buf[1024]; - readlink( "/proc/curproc/file", buf, 1024 ); - return buf; -#elif defined __FreeBSD__ - static char buf[1024]; - int mib[4]; - mib[0] = CTL_KERN; - mib[1] = KERN_PROC; - mib[2] = KERN_PROC_PATHNAME; - mib[3] = -1; - size_t cb = 1024; - sysctl( mib, 4, buf, &cb, nullptr, 0 ); - return buf; -#elif defined __NetBSD__ - static char buf[1024]; - readlink( "/proc/curproc/exe", buf, 1024 ); - return buf; -#elif defined __QNX__ - static char buf[_PC_PATH_MAX + 1]; - _cmdname(buf); - return buf; -#else - return nullptr; -#endif -} - -#if defined __linux__ && defined __ARM_ARCH -static uint32_t GetHex( char*& ptr, int skip ) -{ - uint32_t ret; - ptr += skip; - char* end; - if( ptr[0] == '0' && ptr[1] == 'x' ) - { - ptr += 2; - ret = strtol( ptr, &end, 16 ); - } - else - { - ret = strtol( ptr, &end, 10 ); - } - ptr = end; - return ret; -} -#endif - -static const char* GetHostInfo() -{ - static char buf[1024]; - auto ptr = buf; -#if defined _WIN32 -# ifdef TRACY_UWP - auto GetVersion = &::GetVersionEx; -# else - auto GetVersion = (t_RtlGetVersion)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlGetVersion" ); -# endif - if( !GetVersion ) - { -# ifdef __MINGW32__ - ptr += sprintf( ptr, "OS: Windows (MingW)\n" ); -# else - ptr += sprintf( ptr, "OS: Windows\n" ); -# endif - } - else - { - RTL_OSVERSIONINFOW ver = { sizeof( RTL_OSVERSIONINFOW ) }; - GetVersion( &ver ); - -# ifdef __MINGW32__ - ptr += sprintf( ptr, "OS: Windows %i.%i.%i (MingW)\n", (int)ver.dwMajorVersion, (int)ver.dwMinorVersion, (int)ver.dwBuildNumber ); -# else - auto WineGetVersion = (t_WineGetVersion)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "wine_get_version" ); - auto WineGetBuildId = (t_WineGetBuildId)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "wine_get_build_id" ); - if( WineGetVersion && WineGetBuildId ) - { - ptr += sprintf( ptr, "OS: Windows %lu.%lu.%lu (Wine %s [%s])\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber, WineGetVersion(), WineGetBuildId() ); - } - else - { - ptr += sprintf( ptr, "OS: Windows %lu.%lu.%lu\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber ); - } -# endif - } -#elif defined __linux__ - struct utsname utsName; - uname( &utsName ); -# if defined __ANDROID__ - ptr += sprintf( ptr, "OS: Linux %s (Android)\n", utsName.release ); -# else - ptr += sprintf( ptr, "OS: Linux %s\n", utsName.release ); -# endif -#elif defined __APPLE__ -# if TARGET_OS_IPHONE == 1 - ptr += sprintf( ptr, "OS: Darwin (iOS)\n" ); -# elif TARGET_OS_MAC == 1 - ptr += sprintf( ptr, "OS: Darwin (OSX)\n" ); -# else - ptr += sprintf( ptr, "OS: Darwin (unknown)\n" ); -# endif -#elif defined __DragonFly__ - ptr += sprintf( ptr, "OS: BSD (DragonFly)\n" ); -#elif defined __FreeBSD__ - ptr += sprintf( ptr, "OS: BSD (FreeBSD)\n" ); -#elif defined __NetBSD__ - ptr += sprintf( ptr, "OS: BSD (NetBSD)\n" ); -#elif defined __OpenBSD__ - ptr += sprintf( ptr, "OS: BSD (OpenBSD)\n" ); -#elif defined __QNX__ - ptr += sprintf( ptr, "OS: QNX\n" ); -#else - ptr += sprintf( ptr, "OS: unknown\n" ); -#endif - -#if defined _MSC_VER -# if defined __clang__ - ptr += sprintf( ptr, "Compiler: MSVC clang-cl %i.%i.%i\n", __clang_major__, __clang_minor__, __clang_patchlevel__ ); -# else - ptr += sprintf( ptr, "Compiler: MSVC %i\n", _MSC_VER ); -# endif -#elif defined __clang__ - ptr += sprintf( ptr, "Compiler: clang %i.%i.%i\n", __clang_major__, __clang_minor__, __clang_patchlevel__ ); -#elif defined __GNUC__ - ptr += sprintf( ptr, "Compiler: gcc %i.%i.%i\n", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__ ); -#else - ptr += sprintf( ptr, "Compiler: unknown\n" ); -#endif - -#if defined _WIN32 - InitWinSock(); - - char hostname[512]; - gethostname( hostname, 512 ); - -# ifdef TRACY_UWP - const char* user = ""; -# else - DWORD userSz = UNLEN+1; - char user[UNLEN+1]; - GetUserNameA( user, &userSz ); -# endif - - ptr += sprintf( ptr, "User: %s@%s\n", user, hostname ); -#else - char hostname[_POSIX_HOST_NAME_MAX]{}; - char user[_POSIX_LOGIN_NAME_MAX]{}; - - gethostname( hostname, _POSIX_HOST_NAME_MAX ); -# if defined __ANDROID__ - const auto login = getlogin(); - if( login ) - { - strcpy( user, login ); - } - else - { - memcpy( user, "(?)", 4 ); - } -# else - getlogin_r( user, _POSIX_LOGIN_NAME_MAX ); -# endif - - ptr += sprintf( ptr, "User: %s@%s\n", user, hostname ); -#endif - -#if defined __i386 || defined _M_IX86 - ptr += sprintf( ptr, "Arch: x86\n" ); -#elif defined __x86_64__ || defined _M_X64 - ptr += sprintf( ptr, "Arch: x64\n" ); -#elif defined __aarch64__ - ptr += sprintf( ptr, "Arch: ARM64\n" ); -#elif defined __ARM_ARCH - ptr += sprintf( ptr, "Arch: ARM\n" ); -#else - ptr += sprintf( ptr, "Arch: unknown\n" ); -#endif - -#if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 - uint32_t regs[4]; - char cpuModel[4*4*3+1] = {}; - auto modelPtr = cpuModel; - for( uint32_t i=0x80000002; i<0x80000005; ++i ) - { - CpuId( regs, i ); - memcpy( modelPtr, regs, sizeof( regs ) ); modelPtr += sizeof( regs ); - } - - ptr += sprintf( ptr, "CPU: %s\n", cpuModel ); -#elif defined __linux__ && defined __ARM_ARCH - bool cpuFound = false; - FILE* fcpuinfo = fopen( "/proc/cpuinfo", "rb" ); - if( fcpuinfo ) - { - enum { BufSize = 4*1024 }; - char buf[BufSize]; - const auto sz = fread( buf, 1, BufSize, fcpuinfo ); - fclose( fcpuinfo ); - const auto end = buf + sz; - auto cptr = buf; - - uint32_t impl = 0; - uint32_t var = 0; - uint32_t part = 0; - uint32_t rev = 0; - - while( end - cptr > 20 ) - { - while( end - cptr > 20 && memcmp( cptr, "CPU ", 4 ) != 0 ) - { - cptr += 4; - while( end - cptr > 20 && *cptr != '\n' ) cptr++; - cptr++; - } - if( end - cptr <= 20 ) break; - cptr += 4; - if( memcmp( cptr, "implementer\t: ", 14 ) == 0 ) - { - if( impl != 0 ) break; - impl = GetHex( cptr, 14 ); - } - else if( memcmp( cptr, "variant\t: ", 10 ) == 0 ) var = GetHex( cptr, 10 ); - else if( memcmp( cptr, "part\t: ", 7 ) == 0 ) part = GetHex( cptr, 7 ); - else if( memcmp( cptr, "revision\t: ", 11 ) == 0 ) rev = GetHex( cptr, 11 ); - while( *cptr != '\n' && *cptr != '\0' ) cptr++; - cptr++; - } - - if( impl != 0 || var != 0 || part != 0 || rev != 0 ) - { - cpuFound = true; - ptr += sprintf( ptr, "CPU: %s%s r%ip%i\n", DecodeArmImplementer( impl ), DecodeArmPart( impl, part ), var, rev ); - } - } - if( !cpuFound ) - { - ptr += sprintf( ptr, "CPU: unknown\n" ); - } -#elif defined __APPLE__ && TARGET_OS_IPHONE == 1 - { - size_t sz; - sysctlbyname( "hw.machine", nullptr, &sz, nullptr, 0 ); - auto str = (char*)tracy_malloc( sz ); - sysctlbyname( "hw.machine", str, &sz, nullptr, 0 ); - ptr += sprintf( ptr, "Device: %s\n", DecodeIosDevice( str ) ); - tracy_free( str ); - } -#else - ptr += sprintf( ptr, "CPU: unknown\n" ); -#endif -#ifdef __ANDROID__ - char deviceModel[PROP_VALUE_MAX+1]; - char deviceManufacturer[PROP_VALUE_MAX+1]; - __system_property_get( "ro.product.model", deviceModel ); - __system_property_get( "ro.product.manufacturer", deviceManufacturer ); - ptr += sprintf( ptr, "Device: %s %s\n", deviceManufacturer, deviceModel ); -#endif - - ptr += sprintf( ptr, "CPU cores: %i\n", std::thread::hardware_concurrency() ); - -#if defined _WIN32 - MEMORYSTATUSEX statex; - statex.dwLength = sizeof( statex ); - GlobalMemoryStatusEx( &statex ); -# ifdef _MSC_VER - ptr += sprintf( ptr, "RAM: %I64u MB\n", statex.ullTotalPhys / 1024 / 1024 ); -# else - ptr += sprintf( ptr, "RAM: %llu MB\n", statex.ullTotalPhys / 1024 / 1024 ); -# endif -#elif defined __linux__ - struct sysinfo sysInfo; - sysinfo( &sysInfo ); - ptr += sprintf( ptr, "RAM: %lu MB\n", sysInfo.totalram / 1024 / 1024 ); -#elif defined __APPLE__ - size_t memSize; - size_t sz = sizeof( memSize ); - sysctlbyname( "hw.memsize", &memSize, &sz, nullptr, 0 ); - ptr += sprintf( ptr, "RAM: %zu MB\n", memSize / 1024 / 1024 ); -#elif defined BSD - size_t memSize; - size_t sz = sizeof( memSize ); - sysctlbyname( "hw.physmem", &memSize, &sz, nullptr, 0 ); - ptr += sprintf( ptr, "RAM: %zu MB\n", memSize / 1024 / 1024 ); -#elif defined __QNX__ - struct asinfo_entry *entries = SYSPAGE_ENTRY(asinfo); - size_t count = SYSPAGE_ENTRY_SIZE(asinfo) / sizeof(struct asinfo_entry); - char *strings = SYSPAGE_ENTRY(strings)->data; - - uint64_t memSize = 0; - size_t i; - for (i = 0; i < count; i++) { - struct asinfo_entry *entry = &entries[i]; - if (strcmp(strings + entry->name, "ram") == 0) { - memSize += entry->end - entry->start + 1; - } - } - memSize = memSize / 1024 / 1024; - ptr += sprintf( ptr, "RAM: %llu MB\n", memSize); -#else - ptr += sprintf( ptr, "RAM: unknown\n" ); -#endif - - return buf; -} - -static uint64_t GetPid() -{ -#if defined _WIN32 - return uint64_t( GetCurrentProcessId() ); -#else - return uint64_t( getpid() ); -#endif -} - -void Profiler::AckServerQuery() -{ - QueueItem item; - MemWrite( &item.hdr.type, QueueType::AckServerQueryNoop ); - NeedDataSize( QueueDataSize[(int)QueueType::AckServerQueryNoop] ); - AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::AckServerQueryNoop] ); -} - -void Profiler::AckSymbolCodeNotAvailable() -{ - QueueItem item; - MemWrite( &item.hdr.type, QueueType::AckSymbolCodeNotAvailable ); - NeedDataSize( QueueDataSize[(int)QueueType::AckSymbolCodeNotAvailable] ); - AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::AckSymbolCodeNotAvailable] ); -} - -static BroadcastMessage& GetBroadcastMessage( const char* procname, size_t pnsz, int& len, int port ) -{ - static BroadcastMessage msg; - - msg.broadcastVersion = BroadcastVersion; - msg.protocolVersion = ProtocolVersion; - msg.listenPort = port; - msg.pid = GetPid(); - - memcpy( msg.programName, procname, pnsz ); - memset( msg.programName + pnsz, 0, WelcomeMessageProgramNameSize - pnsz ); - - len = int( offsetof( BroadcastMessage, programName ) + pnsz + 1 ); - return msg; -} - -#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER -static DWORD s_profilerThreadId = 0; -static DWORD s_symbolThreadId = 0; -static char s_crashText[1024]; - -LONG WINAPI CrashFilter( PEXCEPTION_POINTERS pExp ) -{ - if( !GetProfiler().IsConnected() ) return EXCEPTION_CONTINUE_SEARCH; - - const unsigned ec = pExp->ExceptionRecord->ExceptionCode; - auto msgPtr = s_crashText; - switch( ec ) - { - case EXCEPTION_ACCESS_VIOLATION: - msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ACCESS_VIOLATION (0x%x). ", ec ); - switch( pExp->ExceptionRecord->ExceptionInformation[0] ) - { - case 0: - msgPtr += sprintf( msgPtr, "Read violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] ); - break; - case 1: - msgPtr += sprintf( msgPtr, "Write violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] ); - break; - case 8: - msgPtr += sprintf( msgPtr, "DEP violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] ); - break; - default: - break; - } - break; - case EXCEPTION_ARRAY_BOUNDS_EXCEEDED: - msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ARRAY_BOUNDS_EXCEEDED (0x%x). ", ec ); - break; - case EXCEPTION_DATATYPE_MISALIGNMENT: - msgPtr += sprintf( msgPtr, "Exception EXCEPTION_DATATYPE_MISALIGNMENT (0x%x). ", ec ); - break; - case EXCEPTION_FLT_DIVIDE_BY_ZERO: - msgPtr += sprintf( msgPtr, "Exception EXCEPTION_FLT_DIVIDE_BY_ZERO (0x%x). ", ec ); - break; - case EXCEPTION_ILLEGAL_INSTRUCTION: - msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ILLEGAL_INSTRUCTION (0x%x). ", ec ); - break; - case EXCEPTION_IN_PAGE_ERROR: - msgPtr += sprintf( msgPtr, "Exception EXCEPTION_IN_PAGE_ERROR (0x%x). ", ec ); - break; - case EXCEPTION_INT_DIVIDE_BY_ZERO: - msgPtr += sprintf( msgPtr, "Exception EXCEPTION_INT_DIVIDE_BY_ZERO (0x%x). ", ec ); - break; - case EXCEPTION_PRIV_INSTRUCTION: - msgPtr += sprintf( msgPtr, "Exception EXCEPTION_PRIV_INSTRUCTION (0x%x). ", ec ); - break; - case EXCEPTION_STACK_OVERFLOW: - msgPtr += sprintf( msgPtr, "Exception EXCEPTION_STACK_OVERFLOW (0x%x). ", ec ); - break; - default: - return EXCEPTION_CONTINUE_SEARCH; - } - - { - GetProfiler().SendCallstack( 60, "KiUserExceptionDispatcher" ); - - TracyQueuePrepare( QueueType::CrashReport ); - item->crashReport.time = Profiler::GetTime(); - item->crashReport.text = (uint64_t)s_crashText; - TracyQueueCommit( crashReportThread ); - } - - HANDLE h = CreateToolhelp32Snapshot( TH32CS_SNAPTHREAD, 0 ); - if( h == INVALID_HANDLE_VALUE ) return EXCEPTION_CONTINUE_SEARCH; - - THREADENTRY32 te = { sizeof( te ) }; - if( !Thread32First( h, &te ) ) - { - CloseHandle( h ); - return EXCEPTION_CONTINUE_SEARCH; - } - - const auto pid = GetCurrentProcessId(); - const auto tid = GetCurrentThreadId(); - - do - { - if( te.th32OwnerProcessID == pid && te.th32ThreadID != tid && te.th32ThreadID != s_profilerThreadId && te.th32ThreadID != s_symbolThreadId ) - { - HANDLE th = OpenThread( THREAD_SUSPEND_RESUME, FALSE, te.th32ThreadID ); - if( th != INVALID_HANDLE_VALUE ) - { - SuspendThread( th ); - CloseHandle( th ); - } - } - } - while( Thread32Next( h, &te ) ); - CloseHandle( h ); - - { - TracyLfqPrepare( QueueType::Crash ); - TracyLfqCommit; - } - - std::this_thread::sleep_for( std::chrono::milliseconds( 500 ) ); - GetProfiler().RequestShutdown(); - while( !GetProfiler().HasShutdownFinished() ) { std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); }; - - return EXCEPTION_CONTINUE_SEARCH; -} -#endif - -static Profiler* s_instance = nullptr; -static Thread* s_thread; -#ifndef TRACY_NO_FRAME_IMAGE -static Thread* s_compressThread; -#endif -#ifdef TRACY_HAS_CALLSTACK -static Thread* s_symbolThread; -std::atomic s_symbolThreadGone { false }; -#endif -#ifdef TRACY_HAS_SYSTEM_TRACING -static Thread* s_sysTraceThread = nullptr; -#endif - -#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER -# ifndef TRACY_CRASH_SIGNAL -# define TRACY_CRASH_SIGNAL SIGPWR -# endif - -static long s_profilerTid = 0; -static long s_symbolTid = 0; -static char s_crashText[1024]; -static std::atomic s_alreadyCrashed( false ); - -static void ThreadFreezer( int /*signal*/ ) -{ - for(;;) sleep( 1000 ); -} - -static inline void HexPrint( char*& ptr, uint64_t val ) -{ - if( val == 0 ) - { - *ptr++ = '0'; - return; - } - - static const char HexTable[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - char buf[16]; - auto bptr = buf; - - do - { - *bptr++ = HexTable[val%16]; - val /= 16; - } - while( val > 0 ); - - do - { - *ptr++ = *--bptr; - } - while( bptr != buf ); -} - -static void CrashHandler( int signal, siginfo_t* info, void* /*ucontext*/ ) -{ - bool expected = false; - if( !s_alreadyCrashed.compare_exchange_strong( expected, true ) ) ThreadFreezer( signal ); - - struct sigaction act = {}; - act.sa_handler = SIG_DFL; - sigaction( SIGABRT, &act, nullptr ); - - auto msgPtr = s_crashText; - switch( signal ) - { - case SIGILL: - strcpy( msgPtr, "Illegal Instruction.\n" ); - while( *msgPtr ) msgPtr++; - switch( info->si_code ) - { - case ILL_ILLOPC: - strcpy( msgPtr, "Illegal opcode.\n" ); - break; - case ILL_ILLOPN: - strcpy( msgPtr, "Illegal operand.\n" ); - break; - case ILL_ILLADR: - strcpy( msgPtr, "Illegal addressing mode.\n" ); - break; - case ILL_ILLTRP: - strcpy( msgPtr, "Illegal trap.\n" ); - break; - case ILL_PRVOPC: - strcpy( msgPtr, "Privileged opcode.\n" ); - break; - case ILL_PRVREG: - strcpy( msgPtr, "Privileged register.\n" ); - break; - case ILL_COPROC: - strcpy( msgPtr, "Coprocessor error.\n" ); - break; - case ILL_BADSTK: - strcpy( msgPtr, "Internal stack error.\n" ); - break; - default: - break; - } - break; - case SIGFPE: - strcpy( msgPtr, "Floating-point exception.\n" ); - while( *msgPtr ) msgPtr++; - switch( info->si_code ) - { - case FPE_INTDIV: - strcpy( msgPtr, "Integer divide by zero.\n" ); - break; - case FPE_INTOVF: - strcpy( msgPtr, "Integer overflow.\n" ); - break; - case FPE_FLTDIV: - strcpy( msgPtr, "Floating-point divide by zero.\n" ); - break; - case FPE_FLTOVF: - strcpy( msgPtr, "Floating-point overflow.\n" ); - break; - case FPE_FLTUND: - strcpy( msgPtr, "Floating-point underflow.\n" ); - break; - case FPE_FLTRES: - strcpy( msgPtr, "Floating-point inexact result.\n" ); - break; - case FPE_FLTINV: - strcpy( msgPtr, "Floating-point invalid operation.\n" ); - break; - case FPE_FLTSUB: - strcpy( msgPtr, "Subscript out of range.\n" ); - break; - default: - break; - } - break; - case SIGSEGV: - strcpy( msgPtr, "Invalid memory reference.\n" ); - while( *msgPtr ) msgPtr++; - switch( info->si_code ) - { - case SEGV_MAPERR: - strcpy( msgPtr, "Address not mapped to object.\n" ); - break; - case SEGV_ACCERR: - strcpy( msgPtr, "Invalid permissions for mapped object.\n" ); - break; -# ifdef SEGV_BNDERR - case SEGV_BNDERR: - strcpy( msgPtr, "Failed address bound checks.\n" ); - break; -# endif -# ifdef SEGV_PKUERR - case SEGV_PKUERR: - strcpy( msgPtr, "Access was denied by memory protection keys.\n" ); - break; -# endif - default: - break; - } - break; - case SIGPIPE: - strcpy( msgPtr, "Broken pipe.\n" ); - while( *msgPtr ) msgPtr++; - break; - case SIGBUS: - strcpy( msgPtr, "Bus error.\n" ); - while( *msgPtr ) msgPtr++; - switch( info->si_code ) - { - case BUS_ADRALN: - strcpy( msgPtr, "Invalid address alignment.\n" ); - break; - case BUS_ADRERR: - strcpy( msgPtr, "Nonexistent physical address.\n" ); - break; - case BUS_OBJERR: - strcpy( msgPtr, "Object-specific hardware error.\n" ); - break; -# ifdef BUS_MCEERR_AR - case BUS_MCEERR_AR: - strcpy( msgPtr, "Hardware memory error consumed on a machine check; action required.\n" ); - break; -# endif -# ifdef BUS_MCEERR_AO - case BUS_MCEERR_AO: - strcpy( msgPtr, "Hardware memory error detected in process but not consumed; action optional.\n" ); - break; -# endif - default: - break; - } - break; - case SIGABRT: - strcpy( msgPtr, "Abort signal from abort().\n" ); - break; - default: - abort(); - } - while( *msgPtr ) msgPtr++; - - if( signal != SIGPIPE ) - { - strcpy( msgPtr, "Fault address: 0x" ); - while( *msgPtr ) msgPtr++; - HexPrint( msgPtr, uint64_t( info->si_addr ) ); - *msgPtr++ = '\n'; - } - - { - GetProfiler().SendCallstack( 60, "__kernel_rt_sigreturn" ); - - TracyQueuePrepare( QueueType::CrashReport ); - item->crashReport.time = Profiler::GetTime(); - item->crashReport.text = (uint64_t)s_crashText; - TracyQueueCommit( crashReportThread ); - } - - DIR* dp = opendir( "/proc/self/task" ); - if( !dp ) abort(); - - const auto selfTid = syscall( SYS_gettid ); - - struct dirent* ep; - while( ( ep = readdir( dp ) ) != nullptr ) - { - if( ep->d_name[0] == '.' ) continue; - int tid = atoi( ep->d_name ); - if( tid != selfTid && tid != s_profilerTid && tid != s_symbolTid ) - { - syscall( SYS_tkill, tid, TRACY_CRASH_SIGNAL ); - } - } - closedir( dp ); - -#ifdef TRACY_HAS_CALLSTACK - if( selfTid == s_symbolTid ) s_symbolThreadGone.store( true, std::memory_order_release ); -#endif - - TracyLfqPrepare( QueueType::Crash ); - TracyLfqCommit; - - std::this_thread::sleep_for( std::chrono::milliseconds( 500 ) ); - GetProfiler().RequestShutdown(); - while( !GetProfiler().HasShutdownFinished() ) { std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); }; - - abort(); -} -#endif - - -enum { QueuePrealloc = 256 * 1024 }; - -TRACY_API int64_t GetFrequencyQpc() -{ -#if defined _WIN32 - LARGE_INTEGER t; - QueryPerformanceFrequency( &t ); - return t.QuadPart; -#else - return 0; -#endif -} - -#ifdef TRACY_DELAYED_INIT -struct ThreadNameData; -TRACY_API moodycamel::ConcurrentQueue& GetQueue(); - -struct ProfilerData -{ - int64_t initTime = SetupHwTimer(); - moodycamel::ConcurrentQueue queue; - Profiler profiler; - std::atomic lockCounter { 0 }; - std::atomic gpuCtxCounter { 0 }; - std::atomic threadNameData { nullptr }; -}; - -struct ProducerWrapper -{ - ProducerWrapper( ProfilerData& data ) : detail( data.queue ), ptr( data.queue.get_explicit_producer( detail ) ) {} - moodycamel::ProducerToken detail; - tracy::moodycamel::ConcurrentQueue::ExplicitProducer* ptr; -}; - -struct ProfilerThreadData -{ - ProfilerThreadData( ProfilerData& data ) : token( data ), gpuCtx( { nullptr } ) {} - ProducerWrapper token; - GpuCtxWrapper gpuCtx; -# ifdef TRACY_ON_DEMAND - LuaZoneState luaZoneState; -# endif -}; - -std::atomic RpInitDone { 0 }; -std::atomic RpInitLock { 0 }; -thread_local bool RpThreadInitDone = false; -thread_local bool RpThreadShutdown = false; - -# ifdef TRACY_MANUAL_LIFETIME -ProfilerData* s_profilerData = nullptr; -static ProfilerThreadData& GetProfilerThreadData(); -static std::atomic s_isProfilerStarted { false }; -TRACY_API void StartupProfiler() -{ - s_profilerData = (ProfilerData*)tracy_malloc( sizeof( ProfilerData ) ); - new (s_profilerData) ProfilerData(); - s_profilerData->profiler.SpawnWorkerThreads(); - GetProfilerThreadData().token = ProducerWrapper( *s_profilerData ); - s_isProfilerStarted.store( true, std::memory_order_seq_cst ); -} -static ProfilerData& GetProfilerData() -{ - assert( s_profilerData ); - return *s_profilerData; -} -TRACY_API void ShutdownProfiler() -{ - s_isProfilerStarted.store( false, std::memory_order_seq_cst ); - s_profilerData->~ProfilerData(); - tracy_free( s_profilerData ); - s_profilerData = nullptr; - rpmalloc_finalize(); - RpThreadInitDone = false; - RpInitDone.store( 0, std::memory_order_release ); -} -TRACY_API bool IsProfilerStarted() -{ - return s_isProfilerStarted.load( std::memory_order_seq_cst ); -} -# else -static std::atomic profilerDataLock { 0 }; -static std::atomic profilerData { nullptr }; - -static ProfilerData& GetProfilerData() -{ - auto ptr = profilerData.load( std::memory_order_acquire ); - if( !ptr ) - { - int expected = 0; - while( !profilerDataLock.compare_exchange_weak( expected, 1, std::memory_order_release, std::memory_order_relaxed ) ) { expected = 0; YieldThread(); } - ptr = profilerData.load( std::memory_order_acquire ); - if( !ptr ) - { - ptr = (ProfilerData*)tracy_malloc( sizeof( ProfilerData ) ); - new (ptr) ProfilerData(); - profilerData.store( ptr, std::memory_order_release ); - } - profilerDataLock.store( 0, std::memory_order_release ); - } - return *ptr; -} -# endif - -// GCC prior to 8.4 had a bug with function-inline thread_local variables. Versions of glibc beginning with -// 2.18 may attempt to work around this issue, which manifests as a crash while running static destructors -// if this function is compiled into a shared object. Unfortunately, centos7 ships with glibc 2.17. If running -// on old GCC, use the old-fashioned way as a workaround -// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85400 -#if !defined(__clang__) && defined(__GNUC__) && ((__GNUC__ < 8) || ((__GNUC__ == 8) && (__GNUC_MINOR__ < 4))) -struct ProfilerThreadDataKey -{ -public: - ProfilerThreadDataKey() - { - int val = pthread_key_create(&m_key, sDestructor); - static_cast(val); // unused - assert(val == 0); - } - ~ProfilerThreadDataKey() - { - int val = pthread_key_delete(m_key); - static_cast(val); // unused - assert(val == 0); - } - ProfilerThreadData& get() - { - void* p = pthread_getspecific(m_key); - if (!p) - { - p = (ProfilerThreadData*)tracy_malloc( sizeof( ProfilerThreadData ) ); - new (p) ProfilerThreadData(GetProfilerData()); - pthread_setspecific(m_key, p); - } - return *static_cast(p); - } -private: - pthread_key_t m_key; - - static void sDestructor(void* p) - { - ((ProfilerThreadData*)p)->~ProfilerThreadData(); - tracy_free(p); - } -}; - -static ProfilerThreadData& GetProfilerThreadData() -{ - static ProfilerThreadDataKey key; - return key.get(); -} -#else -static ProfilerThreadData& GetProfilerThreadData() -{ - thread_local ProfilerThreadData data( GetProfilerData() ); - return data; -} -#endif - -TRACY_API moodycamel::ConcurrentQueue::ExplicitProducer* GetToken() { return GetProfilerThreadData().token.ptr; } -TRACY_API Profiler& GetProfiler() { return GetProfilerData().profiler; } -TRACY_API moodycamel::ConcurrentQueue& GetQueue() { return GetProfilerData().queue; } -TRACY_API int64_t GetInitTime() { return GetProfilerData().initTime; } -TRACY_API std::atomic& GetLockCounter() { return GetProfilerData().lockCounter; } -TRACY_API std::atomic& GetGpuCtxCounter() { return GetProfilerData().gpuCtxCounter; } -TRACY_API GpuCtxWrapper& GetGpuCtx() { return GetProfilerThreadData().gpuCtx; } -TRACY_API uint32_t GetThreadHandle() { return detail::GetThreadHandleImpl(); } -std::atomic& GetThreadNameData() { return GetProfilerData().threadNameData; } - -# ifdef TRACY_ON_DEMAND -TRACY_API LuaZoneState& GetLuaZoneState() { return GetProfilerThreadData().luaZoneState; } -# endif - -# ifndef TRACY_MANUAL_LIFETIME -namespace -{ - const auto& __profiler_init = GetProfiler(); -} -# endif - -#else - -// MSVC static initialization order solution. gcc/clang uses init_order() to avoid all this. - -// 1a. But s_queue is needed for initialization of variables in point 2. -extern moodycamel::ConcurrentQueue s_queue; - -// 2. If these variables would be in the .CRT$XCB section, they would be initialized only in main thread. -thread_local moodycamel::ProducerToken init_order(107) s_token_detail( s_queue ); -thread_local ProducerWrapper init_order(108) s_token { s_queue.get_explicit_producer( s_token_detail ) }; -thread_local ThreadHandleWrapper init_order(104) s_threadHandle { detail::GetThreadHandleImpl() }; - -# ifdef _MSC_VER -// 1. Initialize these static variables before all other variables. -# pragma warning( disable : 4075 ) -# pragma init_seg( ".CRT$XCB" ) -# endif - -static InitTimeWrapper init_order(101) s_initTime { SetupHwTimer() }; -std::atomic init_order(102) RpInitDone( 0 ); -std::atomic init_order(102) RpInitLock( 0 ); -thread_local bool RpThreadInitDone = false; -thread_local bool RpThreadShutdown = false; -moodycamel::ConcurrentQueue init_order(103) s_queue( QueuePrealloc ); -std::atomic init_order(104) s_lockCounter( 0 ); -std::atomic init_order(104) s_gpuCtxCounter( 0 ); - -thread_local GpuCtxWrapper init_order(104) s_gpuCtx { nullptr }; - -struct ThreadNameData; -static std::atomic init_order(104) s_threadNameDataInstance( nullptr ); -std::atomic& s_threadNameData = s_threadNameDataInstance; - -# ifdef TRACY_ON_DEMAND -thread_local LuaZoneState init_order(104) s_luaZoneState { 0, false }; -# endif - -static Profiler init_order(105) s_profiler; - -TRACY_API moodycamel::ConcurrentQueue::ExplicitProducer* GetToken() { return s_token.ptr; } -TRACY_API Profiler& GetProfiler() { return s_profiler; } -TRACY_API moodycamel::ConcurrentQueue& GetQueue() { return s_queue; } -TRACY_API int64_t GetInitTime() { return s_initTime.val; } -TRACY_API std::atomic& GetLockCounter() { return s_lockCounter; } -TRACY_API std::atomic& GetGpuCtxCounter() { return s_gpuCtxCounter; } -TRACY_API GpuCtxWrapper& GetGpuCtx() { return s_gpuCtx; } -TRACY_API uint32_t GetThreadHandle() { return s_threadHandle.val; } - -std::atomic& GetThreadNameData() { return s_threadNameData; } - -# ifdef TRACY_ON_DEMAND -TRACY_API LuaZoneState& GetLuaZoneState() { return s_luaZoneState; } -# endif -#endif - -TRACY_API bool ProfilerAvailable() { return s_instance != nullptr; } -TRACY_API bool ProfilerAllocatorAvailable() { return !RpThreadShutdown; } - -constexpr static size_t SafeSendBufferSize = 65536; - -Profiler::Profiler() - : m_timeBegin( 0 ) - , m_mainThread( detail::GetThreadHandleImpl() ) - , m_epoch( std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch() ).count() ) - , m_shutdown( false ) - , m_shutdownManual( false ) - , m_shutdownFinished( false ) - , m_sock( nullptr ) - , m_broadcast( nullptr ) - , m_noExit( false ) - , m_userPort( 0 ) - , m_zoneId( 1 ) - , m_samplingPeriod( 0 ) - , m_stream( LZ4_createStream() ) - , m_buffer( (char*)tracy_malloc( TargetFrameSize*3 ) ) - , m_bufferOffset( 0 ) - , m_bufferStart( 0 ) - , m_lz4Buf( (char*)tracy_malloc( LZ4Size + sizeof( lz4sz_t ) ) ) - , m_serialQueue( 1024*1024 ) - , m_serialDequeue( 1024*1024 ) -#ifndef TRACY_NO_FRAME_IMAGE - , m_fiQueue( 16 ) - , m_fiDequeue( 16 ) -#endif - , m_symbolQueue( 8*1024 ) - , m_frameCount( 0 ) - , m_isConnected( false ) -#ifdef TRACY_ON_DEMAND - , m_connectionId( 0 ) - , m_deferredQueue( 64*1024 ) -#endif - , m_paramCallback( nullptr ) - , m_sourceCallback( nullptr ) - , m_queryImage( nullptr ) - , m_queryData( nullptr ) - , m_crashHandlerInstalled( false ) - , m_programName( nullptr ) -{ - assert( !s_instance ); - s_instance = this; - -#ifndef TRACY_DELAYED_INIT -# ifdef _MSC_VER - // 3. But these variables need to be initialized in main thread within the .CRT$XCB section. Do it here. - s_token_detail = moodycamel::ProducerToken( s_queue ); - s_token = ProducerWrapper { s_queue.get_explicit_producer( s_token_detail ) }; - s_threadHandle = ThreadHandleWrapper { m_mainThread }; -# endif -#endif - - CalibrateTimer(); - CalibrateDelay(); - ReportTopology(); - -#ifdef __linux__ - m_kcore = (KCore*)tracy_malloc( sizeof( KCore ) ); - new(m_kcore) KCore(); -#endif - -#ifndef TRACY_NO_EXIT - const char* noExitEnv = GetEnvVar( "TRACY_NO_EXIT" ); - if( noExitEnv && noExitEnv[0] == '1' ) - { - m_noExit = true; - } -#endif - - const char* userPort = GetEnvVar( "TRACY_PORT" ); - if( userPort ) - { - m_userPort = atoi( userPort ); - } - - m_safeSendBuffer = (char*)tracy_malloc( SafeSendBufferSize ); - -#ifndef _WIN32 - pipe(m_pipe); -# if defined __APPLE__ || defined BSD - // FreeBSD/XNU don't have F_SETPIPE_SZ, so use the default - m_pipeBufSize = 16384; -# else - m_pipeBufSize = (int)(ptrdiff_t)SafeSendBufferSize; - while( fcntl( m_pipe[0], F_SETPIPE_SZ, m_pipeBufSize ) < 0 && errno == EPERM ) m_pipeBufSize /= 2; // too big; reduce - m_pipeBufSize = fcntl( m_pipe[0], F_GETPIPE_SZ ); -# endif - fcntl( m_pipe[1], F_SETFL, O_NONBLOCK ); -#endif - -#if !defined(TRACY_DELAYED_INIT) || !defined(TRACY_MANUAL_LIFETIME) - SpawnWorkerThreads(); -#endif -} - -void Profiler::InstallCrashHandler() -{ - -#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER - struct sigaction threadFreezer = {}; - threadFreezer.sa_handler = ThreadFreezer; - sigaction( TRACY_CRASH_SIGNAL, &threadFreezer, &m_prevSignal.pwr ); - - struct sigaction crashHandler = {}; - crashHandler.sa_sigaction = CrashHandler; - crashHandler.sa_flags = SA_SIGINFO; - sigaction( SIGILL, &crashHandler, &m_prevSignal.ill ); - sigaction( SIGFPE, &crashHandler, &m_prevSignal.fpe ); - sigaction( SIGSEGV, &crashHandler, &m_prevSignal.segv ); - sigaction( SIGPIPE, &crashHandler, &m_prevSignal.pipe ); - sigaction( SIGBUS, &crashHandler, &m_prevSignal.bus ); - sigaction( SIGABRT, &crashHandler, &m_prevSignal.abrt ); -#endif - -#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER - // We cannot use Vectored Exception handling because it catches application-wide frame-based SEH blocks. We only - // want to catch unhandled exceptions. - m_prevHandler = SetUnhandledExceptionFilter( CrashFilter ); -#endif - -#ifndef TRACY_NO_CRASH_HANDLER - m_crashHandlerInstalled = true; -#endif - -} - -void Profiler::RemoveCrashHandler() -{ -#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER - if( m_crashHandlerInstalled ) - { - auto prev = SetUnhandledExceptionFilter( (LPTOP_LEVEL_EXCEPTION_FILTER)m_prevHandler ); - if( prev != CrashFilter ) SetUnhandledExceptionFilter( prev ); // A different exception filter was installed over ours => put it back - } -#endif - -#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER - if( m_crashHandlerInstalled ) - { - auto restore = []( int signum, struct sigaction* prev ) { - struct sigaction old; - sigaction( signum, prev, &old ); - if( old.sa_sigaction != CrashHandler ) sigaction( signum, &old, nullptr ); // A different signal handler was installed over ours => put it back - }; - restore( TRACY_CRASH_SIGNAL, &m_prevSignal.pwr ); - restore( SIGILL, &m_prevSignal.ill ); - restore( SIGFPE, &m_prevSignal.fpe ); - restore( SIGSEGV, &m_prevSignal.segv ); - restore( SIGPIPE, &m_prevSignal.pipe ); - restore( SIGBUS, &m_prevSignal.bus ); - restore( SIGABRT, &m_prevSignal.abrt ); - } -#endif - m_crashHandlerInstalled = false; -} - -void Profiler::SpawnWorkerThreads() -{ -#ifdef TRACY_HAS_SYSTEM_TRACING - // use TRACY_NO_SYS_TRACE=1 to force disabling sys tracing (even if available in the underlying system) - // as it can have significant impact on the size of the traces - const char* noSysTrace = GetEnvVar( "TRACY_NO_SYS_TRACE" ); - const bool disableSystrace = (noSysTrace && noSysTrace[0] == '1'); - if( disableSystrace ) - { - TracyDebug("TRACY: Sys Trace was disabled by 'TRACY_NO_SYS_TRACE=1'\n"); - } - else if( SysTraceStart( m_samplingPeriod ) ) - { - s_sysTraceThread = (Thread*)tracy_malloc( sizeof( Thread ) ); - new(s_sysTraceThread) Thread( SysTraceWorker, nullptr ); - std::this_thread::sleep_for( std::chrono::milliseconds( 1 ) ); - } -#endif - - s_thread = (Thread*)tracy_malloc( sizeof( Thread ) ); - new(s_thread) Thread( LaunchWorker, this ); - -#ifndef TRACY_NO_FRAME_IMAGE - s_compressThread = (Thread*)tracy_malloc( sizeof( Thread ) ); - new(s_compressThread) Thread( LaunchCompressWorker, this ); -#endif - -#ifdef TRACY_HAS_CALLSTACK - s_symbolThread = (Thread*)tracy_malloc( sizeof( Thread ) ); - new(s_symbolThread) Thread( LaunchSymbolWorker, this ); -#endif - -#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER - s_profilerThreadId = GetThreadId( s_thread->Handle() ); -# ifdef TRACY_HAS_CALLSTACK - s_symbolThreadId = GetThreadId( s_symbolThread->Handle() ); -# endif -#endif - -#ifdef TRACY_HAS_CALLSTACK - InitCallstackCritical(); -#endif - - m_timeBegin.store( GetTime(), std::memory_order_relaxed ); -} - -Profiler::~Profiler() -{ - m_shutdown.store( true, std::memory_order_relaxed ); - - RemoveCrashHandler(); - -#ifdef TRACY_HAS_SYSTEM_TRACING - if( s_sysTraceThread ) - { - SysTraceStop(); - s_sysTraceThread->~Thread(); - tracy_free( s_sysTraceThread ); - } -#endif - -#ifdef TRACY_HAS_CALLSTACK - s_symbolThread->~Thread(); - tracy_free( s_symbolThread ); -#endif - -#ifndef TRACY_NO_FRAME_IMAGE - s_compressThread->~Thread(); - tracy_free( s_compressThread ); -#endif - - s_thread->~Thread(); - tracy_free( s_thread ); - -#ifdef TRACY_HAS_CALLSTACK - EndCallstack(); -#endif - -#ifdef __linux__ - m_kcore->~KCore(); - tracy_free( m_kcore ); -#endif - -#ifndef _WIN32 - close( m_pipe[0] ); - close( m_pipe[1] ); -#endif - tracy_free( m_safeSendBuffer ); - - tracy_free( m_lz4Buf ); - tracy_free( m_buffer ); - LZ4_freeStream( (LZ4_stream_t*)m_stream ); - - if( m_sock ) - { - m_sock->~Socket(); - tracy_free( m_sock ); - } - - if( m_broadcast ) - { - m_broadcast->~UdpBroadcast(); - tracy_free( m_broadcast ); - } - - assert( s_instance ); - s_instance = nullptr; -} - -bool Profiler::ShouldExit() -{ - return s_instance->m_shutdown.load( std::memory_order_relaxed ); -} - -void Profiler::Worker() -{ -#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER - s_profilerTid = syscall( SYS_gettid ); -#endif - - ThreadExitHandler threadExitHandler; - - SetThreadName( "Tracy Profiler" ); - -#ifdef TRACY_DATA_PORT - const bool dataPortSearch = false; - auto dataPort = m_userPort != 0 ? m_userPort : TRACY_DATA_PORT; -#else - const bool dataPortSearch = m_userPort == 0; - auto dataPort = m_userPort != 0 ? m_userPort : 8086; -#endif -#ifdef TRACY_BROADCAST_PORT - const auto broadcastPort = TRACY_BROADCAST_PORT; -#else - const auto broadcastPort = 8086; -#endif - - while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); - -#ifdef TRACY_USE_RPMALLOC - rpmalloc_thread_initialize(); -#endif - - m_exectime = 0; - const auto execname = GetProcessExecutablePath(); - if( execname ) - { - struct stat st; - if( stat( execname, &st ) == 0 ) - { - m_exectime = (uint64_t)st.st_mtime; - } - } - - const auto procname = GetProcessName(); - const auto pnsz = std::min( strlen( procname ), WelcomeMessageProgramNameSize - 1 ); - - const auto hostinfo = GetHostInfo(); - const auto hisz = std::min( strlen( hostinfo ), WelcomeMessageHostInfoSize - 1 ); - - const uint64_t pid = GetPid(); - - uint8_t flags = 0; - -#ifdef TRACY_ON_DEMAND - flags |= WelcomeFlag::OnDemand; -#endif -#ifdef __APPLE__ - flags |= WelcomeFlag::IsApple; -#endif -#ifndef TRACY_NO_CODE_TRANSFER - flags |= WelcomeFlag::CodeTransfer; -#endif -#ifdef _WIN32 - flags |= WelcomeFlag::CombineSamples; -# ifndef TRACY_NO_CONTEXT_SWITCH - flags |= WelcomeFlag::IdentifySamples; -# endif -#endif - -#if defined __i386 || defined _M_IX86 - uint8_t cpuArch = CpuArchX86; -#elif defined __x86_64__ || defined _M_X64 - uint8_t cpuArch = CpuArchX64; -#elif defined __aarch64__ - uint8_t cpuArch = CpuArchArm64; -#elif defined __ARM_ARCH - uint8_t cpuArch = CpuArchArm32; -#else - uint8_t cpuArch = CpuArchUnknown; -#endif - -#if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 - uint32_t regs[4]; - char manufacturer[12]; - CpuId( regs, 0 ); - memcpy( manufacturer, regs+1, 4 ); - memcpy( manufacturer+4, regs+3, 4 ); - memcpy( manufacturer+8, regs+2, 4 ); - - CpuId( regs, 1 ); - uint32_t cpuId = ( regs[0] & 0xFFF ) | ( ( regs[0] & 0xFFF0000 ) >> 4 ); -#else - const char manufacturer[12] = {}; - uint32_t cpuId = 0; -#endif - - WelcomeMessage welcome; - MemWrite( &welcome.timerMul, m_timerMul ); - MemWrite( &welcome.initBegin, GetInitTime() ); - MemWrite( &welcome.initEnd, m_timeBegin.load( std::memory_order_relaxed ) ); - MemWrite( &welcome.delay, m_delay ); - MemWrite( &welcome.resolution, m_resolution ); - MemWrite( &welcome.epoch, m_epoch ); - MemWrite( &welcome.exectime, m_exectime ); - MemWrite( &welcome.pid, pid ); - MemWrite( &welcome.samplingPeriod, m_samplingPeriod ); - MemWrite( &welcome.flags, flags ); - MemWrite( &welcome.cpuArch, cpuArch ); - memcpy( welcome.cpuManufacturer, manufacturer, 12 ); - MemWrite( &welcome.cpuId, cpuId ); - memcpy( welcome.programName, procname, pnsz ); - memset( welcome.programName + pnsz, 0, WelcomeMessageProgramNameSize - pnsz ); - memcpy( welcome.hostInfo, hostinfo, hisz ); - memset( welcome.hostInfo + hisz, 0, WelcomeMessageHostInfoSize - hisz ); - - moodycamel::ConsumerToken token( GetQueue() ); - - ListenSocket listen; - bool isListening = false; - if( !dataPortSearch ) - { - isListening = listen.Listen( dataPort, 4 ); - } - else - { - for( uint32_t i=0; i<20; i++ ) - { - if( listen.Listen( dataPort+i, 4 ) ) - { - dataPort += i; - isListening = true; - break; - } - } - } - if( !isListening ) - { - for(;;) - { - if( ShouldExit() ) - { - m_shutdownFinished.store( true, std::memory_order_relaxed ); - return; - } - - ClearQueues( token ); - std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); - } - } - -#ifndef TRACY_NO_BROADCAST - m_broadcast = (UdpBroadcast*)tracy_malloc( sizeof( UdpBroadcast ) ); - new(m_broadcast) UdpBroadcast(); -# ifdef TRACY_ONLY_LOCALHOST - const char* addr = "127.255.255.255"; -# elif defined TRACY_CLIENT_ADDRESS - const char* addr = TRACY_CLIENT_ADDRESS; -# elif defined __QNX__ - // global broadcast address of 255.255.255.255 is not well-supported by QNX, - // use the interface broadcast address instead, e.g. "const char* addr = 192.168.1.255;" -# error Need to specify TRACY_CLIENT_ADDRESS for a QNX target. -# else - const char* addr = "255.255.255.255"; -# endif - if( !m_broadcast->Open( addr, broadcastPort ) ) - { - m_broadcast->~UdpBroadcast(); - tracy_free( m_broadcast ); - m_broadcast = nullptr; - } -#endif - - int broadcastLen = 0; - auto& broadcastMsg = GetBroadcastMessage( procname, pnsz, broadcastLen, dataPort ); - uint64_t lastBroadcast = 0; - - // Connections loop. - // Each iteration of the loop handles whole connection. Multiple iterations will only - // happen in the on-demand mode or when handshake fails. - for(;;) - { - // Wait for incoming connection - for(;;) - { -#ifndef TRACY_NO_EXIT - if( !m_noExit && ShouldExit() ) - { - if( m_broadcast ) - { - broadcastMsg.activeTime = -1; - m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen ); - } - m_shutdownFinished.store( true, std::memory_order_relaxed ); - return; - } -#endif - m_sock = listen.Accept(); - if( m_sock ) break; -#ifndef TRACY_ON_DEMAND - ProcessSysTime(); -# ifdef TRACY_HAS_SYSPOWER - m_sysPower.Tick(); -# endif -#endif - - if( m_broadcast ) - { - const auto t = std::chrono::high_resolution_clock::now().time_since_epoch().count(); - if( t - lastBroadcast > 3000000000 ) // 3s - { - m_programNameLock.lock(); - if( m_programName ) - { - broadcastMsg = GetBroadcastMessage( m_programName, strlen( m_programName ), broadcastLen, dataPort ); - m_programName = nullptr; - } - m_programNameLock.unlock(); - - lastBroadcast = t; - const auto ts = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch() ).count(); - broadcastMsg.activeTime = int32_t( ts - m_epoch ); - assert( broadcastMsg.activeTime >= 0 ); - m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen ); - } - } - } - - if( m_broadcast ) - { - lastBroadcast = 0; - broadcastMsg.activeTime = -1; - m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen ); - } - - // Handshake - { - char shibboleth[HandshakeShibbolethSize]; - auto res = m_sock->ReadRaw( shibboleth, HandshakeShibbolethSize, 2000 ); - if( !res || memcmp( shibboleth, HandshakeShibboleth, HandshakeShibbolethSize ) != 0 ) - { - m_sock->~Socket(); - tracy_free( m_sock ); - m_sock = nullptr; - continue; - } - - uint32_t protocolVersion; - res = m_sock->ReadRaw( &protocolVersion, sizeof( protocolVersion ), 2000 ); - if( !res ) - { - m_sock->~Socket(); - tracy_free( m_sock ); - m_sock = nullptr; - continue; - } - - if( protocolVersion != ProtocolVersion ) - { - HandshakeStatus status = HandshakeProtocolMismatch; - m_sock->Send( &status, sizeof( status ) ); - m_sock->~Socket(); - tracy_free( m_sock ); - m_sock = nullptr; - continue; - } - } - -#ifdef TRACY_ON_DEMAND - const auto currentTime = GetTime(); - ClearQueues( token ); - m_connectionId.fetch_add( 1, std::memory_order_release ); -#endif - m_isConnected.store( true, std::memory_order_release ); - InstallCrashHandler(); - - HandshakeStatus handshake = HandshakeWelcome; - m_sock->Send( &handshake, sizeof( handshake ) ); - - LZ4_resetStream( (LZ4_stream_t*)m_stream ); - m_sock->Send( &welcome, sizeof( welcome ) ); - - m_threadCtx = 0; - m_refTimeSerial = 0; - m_refTimeCtx = 0; - m_refTimeGpu = 0; - -#ifdef TRACY_ON_DEMAND - OnDemandPayloadMessage onDemand; - onDemand.frames = m_frameCount.load( std::memory_order_relaxed ); - onDemand.currentTime = currentTime; - - m_sock->Send( &onDemand, sizeof( onDemand ) ); - - m_deferredLock.lock(); - for( auto& item : m_deferredQueue ) - { - uint64_t ptr; - uint16_t size; - const auto idx = MemRead( &item.hdr.idx ); - switch( (QueueType)idx ) - { - case QueueType::MessageAppInfo: - ptr = MemRead( &item.messageFat.text ); - size = MemRead( &item.messageFat.size ); - SendSingleString( (const char*)ptr, size ); - break; - case QueueType::LockName: - ptr = MemRead( &item.lockNameFat.name ); - size = MemRead( &item.lockNameFat.size ); - SendSingleString( (const char*)ptr, size ); - break; - case QueueType::GpuContextName: - ptr = MemRead( &item.gpuContextNameFat.ptr ); - size = MemRead( &item.gpuContextNameFat.size ); - SendSingleString( (const char*)ptr, size ); - break; - default: - break; - } - AppendData( &item, QueueDataSize[idx] ); - } - m_deferredLock.unlock(); -#endif - - // Main communications loop - int keepAlive = 0; - for(;;) - { - ProcessSysTime(); -#ifdef TRACY_HAS_SYSPOWER - m_sysPower.Tick(); -#endif - const auto status = Dequeue( token ); - const auto serialStatus = DequeueSerial(); - if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost ) - { - break; - } - else if( status == DequeueStatus::QueueEmpty && serialStatus == DequeueStatus::QueueEmpty ) - { - if( ShouldExit() ) break; - if( m_bufferOffset != m_bufferStart ) - { - if( !CommitData() ) break; - } - if( keepAlive == 500 ) - { - QueueItem ka; - ka.hdr.type = QueueType::KeepAlive; - AppendData( &ka, QueueDataSize[ka.hdr.idx] ); - if( !CommitData() ) break; - - keepAlive = 0; - } - else if( !m_sock->HasData() ) - { - keepAlive++; - std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); - } - } - else - { - keepAlive = 0; - } - - bool connActive = true; - while( m_sock->HasData() ) - { - connActive = HandleServerQuery(); - if( !connActive ) break; - } - if( !connActive ) break; - } - if( ShouldExit() ) break; - - m_isConnected.store( false, std::memory_order_release ); - RemoveCrashHandler(); - -#ifdef TRACY_ON_DEMAND - m_bufferOffset = 0; - m_bufferStart = 0; -#endif - - m_sock->~Socket(); - tracy_free( m_sock ); - m_sock = nullptr; - -#ifndef TRACY_ON_DEMAND - // Client is no longer available here. Accept incoming connections, but reject handshake. - for(;;) - { - if( ShouldExit() ) - { - m_shutdownFinished.store( true, std::memory_order_relaxed ); - return; - } - - ClearQueues( token ); - - m_sock = listen.Accept(); - if( m_sock ) - { - char shibboleth[HandshakeShibbolethSize]; - auto res = m_sock->ReadRaw( shibboleth, HandshakeShibbolethSize, 1000 ); - if( !res || memcmp( shibboleth, HandshakeShibboleth, HandshakeShibbolethSize ) != 0 ) - { - m_sock->~Socket(); - tracy_free( m_sock ); - m_sock = nullptr; - continue; - } - - uint32_t protocolVersion; - res = m_sock->ReadRaw( &protocolVersion, sizeof( protocolVersion ), 1000 ); - if( !res ) - { - m_sock->~Socket(); - tracy_free( m_sock ); - m_sock = nullptr; - continue; - } - - HandshakeStatus status = HandshakeNotAvailable; - m_sock->Send( &status, sizeof( status ) ); - m_sock->~Socket(); - tracy_free( m_sock ); - } - } -#endif - } - // End of connections loop - - // Wait for symbols thread to terminate. Symbol resolution will continue in this thread. -#ifdef TRACY_HAS_CALLSTACK - while( s_symbolThreadGone.load() == false ) { YieldThread(); } -#endif - - // Client is exiting. Send items remaining in queues. - for(;;) - { - const auto status = Dequeue( token ); - const auto serialStatus = DequeueSerial(); - if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost ) - { - m_shutdownFinished.store( true, std::memory_order_relaxed ); - return; - } - else if( status == DequeueStatus::QueueEmpty && serialStatus == DequeueStatus::QueueEmpty ) - { - if( m_bufferOffset != m_bufferStart ) CommitData(); - break; - } - - while( m_sock->HasData() ) - { - if( !HandleServerQuery() ) - { - m_shutdownFinished.store( true, std::memory_order_relaxed ); - return; - } - } - -#ifdef TRACY_HAS_CALLSTACK - for(;;) - { - auto si = m_symbolQueue.front(); - if( !si ) break; - HandleSymbolQueueItem( *si ); - m_symbolQueue.pop(); - } -#endif - } - - // Send client termination notice to the server - QueueItem terminate; - MemWrite( &terminate.hdr.type, QueueType::Terminate ); - if( !SendData( (const char*)&terminate, 1 ) ) - { - m_shutdownFinished.store( true, std::memory_order_relaxed ); - return; - } - // Handle remaining server queries - for(;;) - { - while( m_sock->HasData() ) - { - if( !HandleServerQuery() ) - { - m_shutdownFinished.store( true, std::memory_order_relaxed ); - return; - } - } -#ifdef TRACY_HAS_CALLSTACK - for(;;) - { - auto si = m_symbolQueue.front(); - if( !si ) break; - HandleSymbolQueueItem( *si ); - m_symbolQueue.pop(); - } -#endif - const auto status = Dequeue( token ); - const auto serialStatus = DequeueSerial(); - if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost ) - { - m_shutdownFinished.store( true, std::memory_order_relaxed ); - return; - } - if( m_bufferOffset != m_bufferStart ) - { - if( !CommitData() ) - { - m_shutdownFinished.store( true, std::memory_order_relaxed ); - return; - } - } - } -} - -#ifndef TRACY_NO_FRAME_IMAGE -void Profiler::CompressWorker() -{ - ThreadExitHandler threadExitHandler; - SetThreadName( "Tracy DXT1" ); - while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); - -#ifdef TRACY_USE_RPMALLOC - rpmalloc_thread_initialize(); -#endif - - for(;;) - { - const auto shouldExit = ShouldExit(); - - { - bool lockHeld = true; - while( !m_fiLock.try_lock() ) - { - if( m_shutdownManual.load( std::memory_order_relaxed ) ) - { - lockHeld = false; - break; - } - } - if( !m_fiQueue.empty() ) m_fiQueue.swap( m_fiDequeue ); - if( lockHeld ) - { - m_fiLock.unlock(); - } - } - - const auto sz = m_fiDequeue.size(); - if( sz > 0 ) - { - auto fi = m_fiDequeue.data(); - auto end = fi + sz; - while( fi != end ) - { - const auto w = fi->w; - const auto h = fi->h; - const auto csz = size_t( w * h / 2 ); - auto etc1buf = (char*)tracy_malloc( csz ); - CompressImageDxt1( (const char*)fi->image, etc1buf, w, h ); - tracy_free( fi->image ); - - TracyLfqPrepare( QueueType::FrameImage ); - MemWrite( &item->frameImageFat.image, (uint64_t)etc1buf ); - MemWrite( &item->frameImageFat.frame, fi->frame ); - MemWrite( &item->frameImageFat.w, w ); - MemWrite( &item->frameImageFat.h, h ); - uint8_t flip = fi->flip; - MemWrite( &item->frameImageFat.flip, flip ); - TracyLfqCommit; - - fi++; - } - m_fiDequeue.clear(); - } - else - { - std::this_thread::sleep_for( std::chrono::milliseconds( 20 ) ); - } - - if( shouldExit ) - { - return; - } - } -} -#endif - -static void FreeAssociatedMemory( const QueueItem& item ) -{ - if( item.hdr.idx >= (int)QueueType::Terminate ) return; - - uint64_t ptr; - switch( item.hdr.type ) - { - case QueueType::ZoneText: - case QueueType::ZoneName: - ptr = MemRead( &item.zoneTextFat.text ); - tracy_free( (void*)ptr ); - break; - case QueueType::MessageColor: - case QueueType::MessageColorCallstack: - ptr = MemRead( &item.messageColorFat.text ); - tracy_free( (void*)ptr ); - break; - case QueueType::Message: - case QueueType::MessageCallstack: -#ifndef TRACY_ON_DEMAND - case QueueType::MessageAppInfo: -#endif - ptr = MemRead( &item.messageFat.text ); - tracy_free( (void*)ptr ); - break; - case QueueType::ZoneBeginAllocSrcLoc: - case QueueType::ZoneBeginAllocSrcLocCallstack: - ptr = MemRead( &item.zoneBegin.srcloc ); - tracy_free( (void*)ptr ); - break; - case QueueType::GpuZoneBeginAllocSrcLoc: - case QueueType::GpuZoneBeginAllocSrcLocCallstack: - case QueueType::GpuZoneBeginAllocSrcLocSerial: - case QueueType::GpuZoneBeginAllocSrcLocCallstackSerial: - ptr = MemRead( &item.gpuZoneBegin.srcloc ); - tracy_free( (void*)ptr ); - break; - case QueueType::CallstackSerial: - case QueueType::Callstack: - ptr = MemRead( &item.callstackFat.ptr ); - tracy_free( (void*)ptr ); - break; - case QueueType::CallstackAlloc: - ptr = MemRead( &item.callstackAllocFat.nativePtr ); - tracy_free( (void*)ptr ); - ptr = MemRead( &item.callstackAllocFat.ptr ); - tracy_free( (void*)ptr ); - break; - case QueueType::CallstackSample: - case QueueType::CallstackSampleContextSwitch: - ptr = MemRead( &item.callstackSampleFat.ptr ); - tracy_free( (void*)ptr ); - break; - case QueueType::FrameImage: - ptr = MemRead( &item.frameImageFat.image ); - tracy_free( (void*)ptr ); - break; -#ifdef TRACY_HAS_CALLSTACK - case QueueType::CallstackFrameSize: - { - InitRpmalloc(); - auto size = MemRead( &item.callstackFrameSizeFat.size ); - auto data = (const CallstackEntry*)MemRead( &item.callstackFrameSizeFat.data ); - for( uint8_t i=0; i( &item.symbolInformationFat.needFree ); - if( needFree ) - { - ptr = MemRead( &item.symbolInformationFat.fileString ); - tracy_free( (void*)ptr ); - } - break; - } - case QueueType::SymbolCodeMetadata: - ptr = MemRead( &item.symbolCodeMetadata.ptr ); - tracy_free( (void*)ptr ); - break; -#endif -#ifndef TRACY_ON_DEMAND - case QueueType::LockName: - ptr = MemRead( &item.lockNameFat.name ); - tracy_free( (void*)ptr ); - break; - case QueueType::GpuContextName: - ptr = MemRead( &item.gpuContextNameFat.ptr ); - tracy_free( (void*)ptr ); - break; -#endif -#ifdef TRACY_ON_DEMAND - case QueueType::MessageAppInfo: - case QueueType::GpuContextName: - // Don't free memory associated with deferred messages. - break; -#endif -#ifdef TRACY_HAS_SYSTEM_TRACING - case QueueType::ExternalNameMetadata: - ptr = MemRead( &item.externalNameMetadata.name ); - tracy_free( (void*)ptr ); - ptr = MemRead( &item.externalNameMetadata.threadName ); - tracy_free_fast( (void*)ptr ); - break; -#endif - case QueueType::SourceCodeMetadata: - ptr = MemRead( &item.sourceCodeMetadata.ptr ); - tracy_free( (void*)ptr ); - break; - default: - break; - } -} - -void Profiler::ClearQueues( moodycamel::ConsumerToken& token ) -{ - for(;;) - { - const auto sz = GetQueue().try_dequeue_bulk_single( token, [](const uint64_t&){}, []( QueueItem* item, size_t sz ) { assert( sz > 0 ); while( sz-- > 0 ) FreeAssociatedMemory( *item++ ); } ); - if( sz == 0 ) break; - } - - ClearSerial(); -} - -void Profiler::ClearSerial() -{ - bool lockHeld = true; - while( !m_serialLock.try_lock() ) - { - if( m_shutdownManual.load( std::memory_order_relaxed ) ) - { - lockHeld = false; - break; - } - } - for( auto& v : m_serialQueue ) FreeAssociatedMemory( v ); - m_serialQueue.clear(); - if( lockHeld ) - { - m_serialLock.unlock(); - } - - for( auto& v : m_serialDequeue ) FreeAssociatedMemory( v ); - m_serialDequeue.clear(); -} - -Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) -{ - bool connectionLost = false; - const auto sz = GetQueue().try_dequeue_bulk_single( token, - [this, &connectionLost] ( const uint32_t& threadId ) - { - if( ThreadCtxCheck( threadId ) == ThreadCtxStatus::ConnectionLost ) connectionLost = true; - }, - [this, &connectionLost] ( QueueItem* item, size_t sz ) - { - if( connectionLost ) return; - InitRpmalloc(); - assert( sz > 0 ); - int64_t refThread = m_refTimeThread; - int64_t refCtx = m_refTimeCtx; - int64_t refGpu = m_refTimeGpu; - while( sz-- > 0 ) - { - uint64_t ptr; - uint16_t size; - auto idx = MemRead( &item->hdr.idx ); - if( idx < (int)QueueType::Terminate ) - { - switch( (QueueType)idx ) - { - case QueueType::ZoneText: - case QueueType::ZoneName: - ptr = MemRead( &item->zoneTextFat.text ); - size = MemRead( &item->zoneTextFat.size ); - SendSingleString( (const char*)ptr, size ); - tracy_free_fast( (void*)ptr ); - break; - case QueueType::Message: - case QueueType::MessageCallstack: - ptr = MemRead( &item->messageFat.text ); - size = MemRead( &item->messageFat.size ); - SendSingleString( (const char*)ptr, size ); - tracy_free_fast( (void*)ptr ); - break; - case QueueType::MessageColor: - case QueueType::MessageColorCallstack: - ptr = MemRead( &item->messageColorFat.text ); - size = MemRead( &item->messageColorFat.size ); - SendSingleString( (const char*)ptr, size ); - tracy_free_fast( (void*)ptr ); - break; - case QueueType::MessageAppInfo: - ptr = MemRead( &item->messageFat.text ); - size = MemRead( &item->messageFat.size ); - SendSingleString( (const char*)ptr, size ); -#ifndef TRACY_ON_DEMAND - tracy_free_fast( (void*)ptr ); -#endif - break; - case QueueType::ZoneBeginAllocSrcLoc: - case QueueType::ZoneBeginAllocSrcLocCallstack: - { - int64_t t = MemRead( &item->zoneBegin.time ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->zoneBegin.time, dt ); - ptr = MemRead( &item->zoneBegin.srcloc ); - SendSourceLocationPayload( ptr ); - tracy_free_fast( (void*)ptr ); - break; - } - case QueueType::Callstack: - ptr = MemRead( &item->callstackFat.ptr ); - SendCallstackPayload( ptr ); - tracy_free_fast( (void*)ptr ); - break; - case QueueType::CallstackAlloc: - ptr = MemRead( &item->callstackAllocFat.nativePtr ); - if( ptr != 0 ) - { - CutCallstack( (void*)ptr, "lua_pcall" ); - SendCallstackPayload( ptr ); - tracy_free_fast( (void*)ptr ); - } - ptr = MemRead( &item->callstackAllocFat.ptr ); - SendCallstackAlloc( ptr ); - tracy_free_fast( (void*)ptr ); - break; - case QueueType::CallstackSample: - case QueueType::CallstackSampleContextSwitch: - { - ptr = MemRead( &item->callstackSampleFat.ptr ); - SendCallstackPayload64( ptr ); - tracy_free_fast( (void*)ptr ); - int64_t t = MemRead( &item->callstackSampleFat.time ); - int64_t dt = t - refCtx; - refCtx = t; - MemWrite( &item->callstackSampleFat.time, dt ); - break; - } - case QueueType::FrameImage: - { - ptr = MemRead( &item->frameImageFat.image ); - const auto w = MemRead( &item->frameImageFat.w ); - const auto h = MemRead( &item->frameImageFat.h ); - const auto csz = size_t( w * h / 2 ); - SendLongString( ptr, (const char*)ptr, csz, QueueType::FrameImageData ); - tracy_free_fast( (void*)ptr ); - break; - } - case QueueType::ZoneBegin: - case QueueType::ZoneBeginCallstack: - { - int64_t t = MemRead( &item->zoneBegin.time ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->zoneBegin.time, dt ); - break; - } - case QueueType::ZoneEnd: - { - int64_t t = MemRead( &item->zoneEnd.time ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->zoneEnd.time, dt ); - break; - } - case QueueType::GpuZoneBegin: - case QueueType::GpuZoneBeginCallstack: - { - int64_t t = MemRead( &item->gpuZoneBegin.cpuTime ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->gpuZoneBegin.cpuTime, dt ); - break; - } - case QueueType::GpuZoneBeginAllocSrcLoc: - case QueueType::GpuZoneBeginAllocSrcLocCallstack: - { - int64_t t = MemRead( &item->gpuZoneBegin.cpuTime ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->gpuZoneBegin.cpuTime, dt ); - ptr = MemRead( &item->gpuZoneBegin.srcloc ); - SendSourceLocationPayload( ptr ); - tracy_free_fast( (void*)ptr ); - break; - } - case QueueType::GpuZoneEnd: - { - int64_t t = MemRead( &item->gpuZoneEnd.cpuTime ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->gpuZoneEnd.cpuTime, dt ); - break; - } - case QueueType::GpuContextName: - ptr = MemRead( &item->gpuContextNameFat.ptr ); - size = MemRead( &item->gpuContextNameFat.size ); - SendSingleString( (const char*)ptr, size ); -#ifndef TRACY_ON_DEMAND - tracy_free_fast( (void*)ptr ); -#endif - break; - case QueueType::PlotDataInt: - case QueueType::PlotDataFloat: - case QueueType::PlotDataDouble: - { - int64_t t = MemRead( &item->plotDataInt.time ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->plotDataInt.time, dt ); - break; - } - case QueueType::ContextSwitch: - { - int64_t t = MemRead( &item->contextSwitch.time ); - int64_t dt = t - refCtx; - refCtx = t; - MemWrite( &item->contextSwitch.time, dt ); - break; - } - case QueueType::ThreadWakeup: - { - int64_t t = MemRead( &item->threadWakeup.time ); - int64_t dt = t - refCtx; - refCtx = t; - MemWrite( &item->threadWakeup.time, dt ); - break; - } - case QueueType::GpuTime: - { - int64_t t = MemRead( &item->gpuTime.gpuTime ); - int64_t dt = t - refGpu; - refGpu = t; - MemWrite( &item->gpuTime.gpuTime, dt ); - break; - } -#ifdef TRACY_HAS_CALLSTACK - case QueueType::CallstackFrameSize: - { - auto data = (const CallstackEntry*)MemRead( &item->callstackFrameSizeFat.data ); - auto datasz = MemRead( &item->callstackFrameSizeFat.size ); - auto imageName = (const char*)MemRead( &item->callstackFrameSizeFat.imageName ); - SendSingleString( imageName ); - AppendData( item++, QueueDataSize[idx] ); - - for( uint8_t i=0; i( &item->symbolInformationFat.fileString ); - auto needFree = MemRead( &item->symbolInformationFat.needFree ); - SendSingleString( fileString ); - if( needFree ) tracy_free_fast( (void*)fileString ); - break; - } - case QueueType::SymbolCodeMetadata: - { - auto symbol = MemRead( &item->symbolCodeMetadata.symbol ); - auto ptr = (const char*)MemRead( &item->symbolCodeMetadata.ptr ); - auto size = MemRead( &item->symbolCodeMetadata.size ); - SendLongString( symbol, ptr, size, QueueType::SymbolCode ); - tracy_free_fast( (void*)ptr ); - ++item; - continue; - } -#endif -#ifdef TRACY_HAS_SYSTEM_TRACING - case QueueType::ExternalNameMetadata: - { - auto thread = MemRead( &item->externalNameMetadata.thread ); - auto name = (const char*)MemRead( &item->externalNameMetadata.name ); - auto threadName = (const char*)MemRead( &item->externalNameMetadata.threadName ); - SendString( thread, threadName, QueueType::ExternalThreadName ); - SendString( thread, name, QueueType::ExternalName ); - tracy_free_fast( (void*)threadName ); - tracy_free_fast( (void*)name ); - ++item; - continue; - } -#endif - case QueueType::SourceCodeMetadata: - { - auto ptr = (const char*)MemRead( &item->sourceCodeMetadata.ptr ); - auto size = MemRead( &item->sourceCodeMetadata.size ); - auto id = MemRead( &item->sourceCodeMetadata.id ); - SendLongString( (uint64_t)id, ptr, size, QueueType::SourceCode ); - tracy_free_fast( (void*)ptr ); - ++item; - continue; - } - default: - assert( false ); - break; - } - } - if( !AppendData( item++, QueueDataSize[idx] ) ) - { - connectionLost = true; - m_refTimeThread = refThread; - m_refTimeCtx = refCtx; - m_refTimeGpu = refGpu; - return; - } - } - m_refTimeThread = refThread; - m_refTimeCtx = refCtx; - m_refTimeGpu = refGpu; - } - ); - if( connectionLost ) return DequeueStatus::ConnectionLost; - return sz > 0 ? DequeueStatus::DataDequeued : DequeueStatus::QueueEmpty; -} - -Profiler::DequeueStatus Profiler::DequeueContextSwitches( tracy::moodycamel::ConsumerToken& token, int64_t& timeStop ) -{ - const auto sz = GetQueue().try_dequeue_bulk_single( token, [] ( const uint64_t& ) {}, - [this, &timeStop] ( QueueItem* item, size_t sz ) - { - assert( sz > 0 ); - int64_t refCtx = m_refTimeCtx; - while( sz-- > 0 ) - { - FreeAssociatedMemory( *item ); - if( timeStop < 0 ) return; - const auto idx = MemRead( &item->hdr.idx ); - if( idx == (uint8_t)QueueType::ContextSwitch ) - { - const auto csTime = MemRead( &item->contextSwitch.time ); - if( csTime > timeStop ) - { - timeStop = -1; - m_refTimeCtx = refCtx; - return; - } - int64_t dt = csTime - refCtx; - refCtx = csTime; - MemWrite( &item->contextSwitch.time, dt ); - if( !AppendData( item, QueueDataSize[(int)QueueType::ContextSwitch] ) ) - { - timeStop = -2; - m_refTimeCtx = refCtx; - return; - } - } - else if( idx == (uint8_t)QueueType::ThreadWakeup ) - { - const auto csTime = MemRead( &item->threadWakeup.time ); - if( csTime > timeStop ) - { - timeStop = -1; - m_refTimeCtx = refCtx; - return; - } - int64_t dt = csTime - refCtx; - refCtx = csTime; - MemWrite( &item->threadWakeup.time, dt ); - if( !AppendData( item, QueueDataSize[(int)QueueType::ThreadWakeup] ) ) - { - timeStop = -2; - m_refTimeCtx = refCtx; - return; - } - } - item++; - } - m_refTimeCtx = refCtx; - } - ); - - if( timeStop == -2 ) return DequeueStatus::ConnectionLost; - return ( timeStop == -1 || sz > 0 ) ? DequeueStatus::DataDequeued : DequeueStatus::QueueEmpty; -} - -#define ThreadCtxCheckSerial( _name ) \ - uint32_t thread = MemRead( &item->_name.thread ); \ - switch( ThreadCtxCheck( thread ) ) \ - { \ - case ThreadCtxStatus::Same: break; \ - case ThreadCtxStatus::Changed: assert( m_refTimeThread == 0 ); refThread = 0; break; \ - case ThreadCtxStatus::ConnectionLost: return DequeueStatus::ConnectionLost; \ - default: assert( false ); break; \ - } - -Profiler::DequeueStatus Profiler::DequeueSerial() -{ - { - bool lockHeld = true; - while( !m_serialLock.try_lock() ) - { - if( m_shutdownManual.load( std::memory_order_relaxed ) ) - { - lockHeld = false; - break; - } - } - if( !m_serialQueue.empty() ) m_serialQueue.swap( m_serialDequeue ); - if( lockHeld ) - { - m_serialLock.unlock(); - } - } - - const auto sz = m_serialDequeue.size(); - if( sz > 0 ) - { - InitRpmalloc(); - int64_t refSerial = m_refTimeSerial; - int64_t refGpu = m_refTimeGpu; -#ifdef TRACY_FIBERS - int64_t refThread = m_refTimeThread; -#endif - auto item = m_serialDequeue.data(); - auto end = item + sz; - while( item != end ) - { - uint64_t ptr; - auto idx = MemRead( &item->hdr.idx ); - if( idx < (int)QueueType::Terminate ) - { - switch( (QueueType)idx ) - { - case QueueType::CallstackSerial: - ptr = MemRead( &item->callstackFat.ptr ); - SendCallstackPayload( ptr ); - tracy_free_fast( (void*)ptr ); - break; - case QueueType::LockWait: - case QueueType::LockSharedWait: - { - int64_t t = MemRead( &item->lockWait.time ); - int64_t dt = t - refSerial; - refSerial = t; - MemWrite( &item->lockWait.time, dt ); - break; - } - case QueueType::LockObtain: - case QueueType::LockSharedObtain: - { - int64_t t = MemRead( &item->lockObtain.time ); - int64_t dt = t - refSerial; - refSerial = t; - MemWrite( &item->lockObtain.time, dt ); - break; - } - case QueueType::LockRelease: - case QueueType::LockSharedRelease: - { - int64_t t = MemRead( &item->lockRelease.time ); - int64_t dt = t - refSerial; - refSerial = t; - MemWrite( &item->lockRelease.time, dt ); - break; - } - case QueueType::LockName: - { - ptr = MemRead( &item->lockNameFat.name ); - uint16_t size = MemRead( &item->lockNameFat.size ); - SendSingleString( (const char*)ptr, size ); -#ifndef TRACY_ON_DEMAND - tracy_free_fast( (void*)ptr ); -#endif - break; - } - case QueueType::MemAlloc: - case QueueType::MemAllocNamed: - case QueueType::MemAllocCallstack: - case QueueType::MemAllocCallstackNamed: - { - int64_t t = MemRead( &item->memAlloc.time ); - int64_t dt = t - refSerial; - refSerial = t; - MemWrite( &item->memAlloc.time, dt ); - break; - } - case QueueType::MemFree: - case QueueType::MemFreeNamed: - case QueueType::MemFreeCallstack: - case QueueType::MemFreeCallstackNamed: - { - int64_t t = MemRead( &item->memFree.time ); - int64_t dt = t - refSerial; - refSerial = t; - MemWrite( &item->memFree.time, dt ); - break; - } - case QueueType::MemDiscard: - case QueueType::MemDiscardCallstack: - { - int64_t t = MemRead( &item->memDiscard.time ); - int64_t dt = t - refSerial; - refSerial = t; - MemWrite( &item->memDiscard.time, dt ); - break; - } - case QueueType::GpuZoneBeginSerial: - case QueueType::GpuZoneBeginCallstackSerial: - { - int64_t t = MemRead( &item->gpuZoneBegin.cpuTime ); - int64_t dt = t - refSerial; - refSerial = t; - MemWrite( &item->gpuZoneBegin.cpuTime, dt ); - break; - } - case QueueType::GpuZoneBeginAllocSrcLocSerial: - case QueueType::GpuZoneBeginAllocSrcLocCallstackSerial: - { - int64_t t = MemRead( &item->gpuZoneBegin.cpuTime ); - int64_t dt = t - refSerial; - refSerial = t; - MemWrite( &item->gpuZoneBegin.cpuTime, dt ); - ptr = MemRead( &item->gpuZoneBegin.srcloc ); - SendSourceLocationPayload( ptr ); - tracy_free_fast( (void*)ptr ); - break; - } - case QueueType::GpuZoneEndSerial: - { - int64_t t = MemRead( &item->gpuZoneEnd.cpuTime ); - int64_t dt = t - refSerial; - refSerial = t; - MemWrite( &item->gpuZoneEnd.cpuTime, dt ); - break; - } - case QueueType::GpuTime: - { - int64_t t = MemRead( &item->gpuTime.gpuTime ); - int64_t dt = t - refGpu; - refGpu = t; - MemWrite( &item->gpuTime.gpuTime, dt ); - break; - } - case QueueType::GpuContextName: - { - ptr = MemRead( &item->gpuContextNameFat.ptr ); - uint16_t size = MemRead( &item->gpuContextNameFat.size ); - SendSingleString( (const char*)ptr, size ); -#ifndef TRACY_ON_DEMAND - tracy_free_fast( (void*)ptr ); -#endif - break; - } -#ifdef TRACY_FIBERS - case QueueType::ZoneBegin: - case QueueType::ZoneBeginCallstack: - { - ThreadCtxCheckSerial( zoneBeginThread ); - int64_t t = MemRead( &item->zoneBegin.time ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->zoneBegin.time, dt ); - break; - } - case QueueType::ZoneBeginAllocSrcLoc: - case QueueType::ZoneBeginAllocSrcLocCallstack: - { - ThreadCtxCheckSerial( zoneBeginThread ); - int64_t t = MemRead( &item->zoneBegin.time ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->zoneBegin.time, dt ); - ptr = MemRead( &item->zoneBegin.srcloc ); - SendSourceLocationPayload( ptr ); - tracy_free_fast( (void*)ptr ); - break; - } - case QueueType::ZoneEnd: - { - ThreadCtxCheckSerial( zoneEndThread ); - int64_t t = MemRead( &item->zoneEnd.time ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->zoneEnd.time, dt ); - break; - } - case QueueType::ZoneText: - case QueueType::ZoneName: - { - ThreadCtxCheckSerial( zoneTextFatThread ); - ptr = MemRead( &item->zoneTextFat.text ); - uint16_t size = MemRead( &item->zoneTextFat.size ); - SendSingleString( (const char*)ptr, size ); - tracy_free_fast( (void*)ptr ); - break; - } - case QueueType::Message: - case QueueType::MessageCallstack: - { - ThreadCtxCheckSerial( messageFatThread ); - ptr = MemRead( &item->messageFat.text ); - uint16_t size = MemRead( &item->messageFat.size ); - SendSingleString( (const char*)ptr, size ); - tracy_free_fast( (void*)ptr ); - break; - } - case QueueType::MessageColor: - case QueueType::MessageColorCallstack: - { - ThreadCtxCheckSerial( messageColorFatThread ); - ptr = MemRead( &item->messageColorFat.text ); - uint16_t size = MemRead( &item->messageColorFat.size ); - SendSingleString( (const char*)ptr, size ); - tracy_free_fast( (void*)ptr ); - break; - } - case QueueType::Callstack: - { - ThreadCtxCheckSerial( callstackFatThread ); - ptr = MemRead( &item->callstackFat.ptr ); - SendCallstackPayload( ptr ); - tracy_free_fast( (void*)ptr ); - break; - } - case QueueType::CallstackAlloc: - { - ThreadCtxCheckSerial( callstackAllocFatThread ); - ptr = MemRead( &item->callstackAllocFat.nativePtr ); - if( ptr != 0 ) - { - CutCallstack( (void*)ptr, "lua_pcall" ); - SendCallstackPayload( ptr ); - tracy_free_fast( (void*)ptr ); - } - ptr = MemRead( &item->callstackAllocFat.ptr ); - SendCallstackAlloc( ptr ); - tracy_free_fast( (void*)ptr ); - break; - } - case QueueType::FiberEnter: - { - ThreadCtxCheckSerial( fiberEnter ); - int64_t t = MemRead( &item->fiberEnter.time ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->fiberEnter.time, dt ); - break; - } - case QueueType::FiberLeave: - { - ThreadCtxCheckSerial( fiberLeave ); - int64_t t = MemRead( &item->fiberLeave.time ); - int64_t dt = t - refThread; - refThread = t; - MemWrite( &item->fiberLeave.time, dt ); - break; - } -#endif - default: - assert( false ); - break; - } - } -#ifdef TRACY_FIBERS - else - { - switch( (QueueType)idx ) - { - case QueueType::ZoneColor: - { - ThreadCtxCheckSerial( zoneColorThread ); - break; - } - case QueueType::ZoneValue: - { - ThreadCtxCheckSerial( zoneValueThread ); - break; - } - case QueueType::ZoneValidation: - { - ThreadCtxCheckSerial( zoneValidationThread ); - break; - } - case QueueType::MessageLiteral: - case QueueType::MessageLiteralCallstack: - { - ThreadCtxCheckSerial( messageLiteralThread ); - break; - } - case QueueType::MessageLiteralColor: - case QueueType::MessageLiteralColorCallstack: - { - ThreadCtxCheckSerial( messageColorLiteralThread ); - break; - } - case QueueType::CrashReport: - { - ThreadCtxCheckSerial( crashReportThread ); - break; - } - default: - break; - } - } -#endif - if( !AppendData( item, QueueDataSize[idx] ) ) return DequeueStatus::ConnectionLost; - item++; - } - m_refTimeSerial = refSerial; - m_refTimeGpu = refGpu; -#ifdef TRACY_FIBERS - m_refTimeThread = refThread; -#endif - m_serialDequeue.clear(); - } - else - { - return DequeueStatus::QueueEmpty; - } - return DequeueStatus::DataDequeued; -} - -Profiler::ThreadCtxStatus Profiler::ThreadCtxCheck( uint32_t threadId ) -{ - if( m_threadCtx == threadId ) return ThreadCtxStatus::Same; - QueueItem item; - MemWrite( &item.hdr.type, QueueType::ThreadContext ); - MemWrite( &item.threadCtx.thread, threadId ); - if( !AppendData( &item, QueueDataSize[(int)QueueType::ThreadContext] ) ) return ThreadCtxStatus::ConnectionLost; - m_threadCtx = threadId; - m_refTimeThread = 0; - return ThreadCtxStatus::Changed; -} - -bool Profiler::CommitData() -{ - bool ret = SendData( m_buffer + m_bufferStart, m_bufferOffset - m_bufferStart ); - if( m_bufferOffset > TargetFrameSize * 2 ) m_bufferOffset = 0; - m_bufferStart = m_bufferOffset; - return ret; -} - -char* Profiler::SafeCopyProlog( const char* data, size_t size ) -{ - bool success = true; - char* buf = m_safeSendBuffer; -#ifndef NDEBUG - assert( !m_inUse.exchange(true) ); -#endif - - if( size > SafeSendBufferSize ) buf = (char*)tracy_malloc( size ); - -#ifdef _WIN32 - __try - { - memcpy( buf, data, size ); - } - __except( 1 /*EXCEPTION_EXECUTE_HANDLER*/ ) - { - success = false; - } -#else - // Send through the pipe to ensure safe reads - for( size_t offset = 0; offset != size; /*in loop*/ ) - { - size_t sendsize = size - offset; - ssize_t result1, result2; - while( ( result1 = write( m_pipe[1], data + offset, sendsize ) ) < 0 && errno == EINTR ) { /* retry */ } - if( result1 < 0 ) - { - success = false; - break; - } - while( ( result2 = read( m_pipe[0], buf + offset, result1 ) ) < 0 && errno == EINTR ) { /* retry */ } - if( result2 != result1 ) - { - success = false; - break; - } - offset += result1; - } -#endif - - if( success ) return buf; - - SafeCopyEpilog( buf ); - return nullptr; -} - -void Profiler::SafeCopyEpilog( char* buf ) -{ - if( buf != m_safeSendBuffer ) tracy_free( buf ); - -#ifndef NDEBUG - m_inUse.store( false ); -#endif -} - -bool Profiler::SendData( const char* data, size_t len ) -{ - const lz4sz_t lz4sz = LZ4_compress_fast_continue( (LZ4_stream_t*)m_stream, data, m_lz4Buf + sizeof( lz4sz_t ), (int)len, LZ4Size, 1 ); - memcpy( m_lz4Buf, &lz4sz, sizeof( lz4sz ) ); - return m_sock->Send( m_lz4Buf, lz4sz + sizeof( lz4sz_t ) ) != -1; -} - -void Profiler::SendString( uint64_t str, const char* ptr, size_t len, QueueType type ) -{ - assert( type == QueueType::StringData || - type == QueueType::ThreadName || - type == QueueType::PlotName || - type == QueueType::FrameName || - type == QueueType::ExternalName || - type == QueueType::ExternalThreadName || - type == QueueType::FiberName ); - - QueueItem item; - MemWrite( &item.hdr.type, type ); - MemWrite( &item.stringTransfer.ptr, str ); - - assert( len <= std::numeric_limits::max() ); - auto l16 = uint16_t( len ); - - NeedDataSize( QueueDataSize[(int)type] + sizeof( l16 ) + l16 ); - - AppendDataUnsafe( &item, QueueDataSize[(int)type] ); - AppendDataUnsafe( &l16, sizeof( l16 ) ); - AppendDataUnsafe( ptr, l16 ); -} - -void Profiler::SendSingleString( const char* ptr, size_t len ) -{ - QueueItem item; - MemWrite( &item.hdr.type, QueueType::SingleStringData ); - - assert( len <= std::numeric_limits::max() ); - auto l16 = uint16_t( len ); - - NeedDataSize( QueueDataSize[(int)QueueType::SingleStringData] + sizeof( l16 ) + l16 ); - - AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SingleStringData] ); - AppendDataUnsafe( &l16, sizeof( l16 ) ); - AppendDataUnsafe( ptr, l16 ); -} - -void Profiler::SendSecondString( const char* ptr, size_t len ) -{ - QueueItem item; - MemWrite( &item.hdr.type, QueueType::SecondStringData ); - - assert( len <= std::numeric_limits::max() ); - auto l16 = uint16_t( len ); - - NeedDataSize( QueueDataSize[(int)QueueType::SecondStringData] + sizeof( l16 ) + l16 ); - - AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SecondStringData] ); - AppendDataUnsafe( &l16, sizeof( l16 ) ); - AppendDataUnsafe( ptr, l16 ); -} - -void Profiler::SendLongString( uint64_t str, const char* ptr, size_t len, QueueType type ) -{ - assert( type == QueueType::FrameImageData || - type == QueueType::SymbolCode || - type == QueueType::SourceCode ); - - QueueItem item; - MemWrite( &item.hdr.type, type ); - MemWrite( &item.stringTransfer.ptr, str ); - - assert( len <= std::numeric_limits::max() ); - assert( QueueDataSize[(int)type] + sizeof( uint32_t ) + len <= TargetFrameSize ); - auto l32 = uint32_t( len ); - - NeedDataSize( QueueDataSize[(int)type] + sizeof( l32 ) + l32 ); - - AppendDataUnsafe( &item, QueueDataSize[(int)type] ); - AppendDataUnsafe( &l32, sizeof( l32 ) ); - AppendDataUnsafe( ptr, l32 ); -} - -void Profiler::SendSourceLocation( uint64_t ptr ) -{ - auto srcloc = (const SourceLocationData*)ptr; - QueueItem item; - MemWrite( &item.hdr.type, QueueType::SourceLocation ); - MemWrite( &item.srcloc.name, (uint64_t)srcloc->name ); - MemWrite( &item.srcloc.file, (uint64_t)srcloc->file ); - MemWrite( &item.srcloc.function, (uint64_t)srcloc->function ); - MemWrite( &item.srcloc.line, srcloc->line ); - MemWrite( &item.srcloc.b, uint8_t( ( srcloc->color ) & 0xFF ) ); - MemWrite( &item.srcloc.g, uint8_t( ( srcloc->color >> 8 ) & 0xFF ) ); - MemWrite( &item.srcloc.r, uint8_t( ( srcloc->color >> 16 ) & 0xFF ) ); - AppendData( &item, QueueDataSize[(int)QueueType::SourceLocation] ); -} - -void Profiler::SendSourceLocationPayload( uint64_t _ptr ) -{ - auto ptr = (const char*)_ptr; - - QueueItem item; - MemWrite( &item.hdr.type, QueueType::SourceLocationPayload ); - MemWrite( &item.stringTransfer.ptr, _ptr ); - - uint16_t len; - memcpy( &len, ptr, sizeof( len ) ); - assert( len > 2 ); - len -= 2; - ptr += 2; - - NeedDataSize( QueueDataSize[(int)QueueType::SourceLocationPayload] + sizeof( len ) + len ); - - AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SourceLocationPayload] ); - AppendDataUnsafe( &len, sizeof( len ) ); - AppendDataUnsafe( ptr, len ); -} - -void Profiler::SendCallstackPayload( uint64_t _ptr ) -{ - auto ptr = (uintptr_t*)_ptr; - - QueueItem item; - MemWrite( &item.hdr.type, QueueType::CallstackPayload ); - MemWrite( &item.stringTransfer.ptr, _ptr ); - - const auto sz = *ptr++; - const auto len = sz * sizeof( uint64_t ); - const auto l16 = uint16_t( len ); - - NeedDataSize( QueueDataSize[(int)QueueType::CallstackPayload] + sizeof( l16 ) + l16 ); - - AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::CallstackPayload] ); - AppendDataUnsafe( &l16, sizeof( l16 ) ); - - if( compile_time_condition::value ) - { - AppendDataUnsafe( ptr, sizeof( uint64_t ) * sz ); - } - else - { - for( uintptr_t i=0; i> 63 != 0 ) - { - SendSingleString( "" ); - QueueItem item; - MemWrite( &item.hdr.type, QueueType::SymbolInformation ); - MemWrite( &item.symbolInformation.line, 0 ); - MemWrite( &item.symbolInformation.symAddr, symbol ); - AppendData( &item, QueueDataSize[(int)QueueType::SymbolInformation] ); - } - else - { - m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::SymbolQuery, symbol } ); - } -#else - AckServerQuery(); -#endif -} - -void Profiler::QueueExternalName( uint64_t ptr ) -{ -#ifdef TRACY_HAS_SYSTEM_TRACING - m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::ExternalName, ptr } ); -#endif -} - -void Profiler::QueueKernelCode( uint64_t symbol, uint32_t size ) -{ - assert( symbol >> 63 != 0 ); -#ifdef TRACY_HAS_CALLSTACK - m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::KernelCode, symbol, size } ); -#else - AckSymbolCodeNotAvailable(); -#endif -} - -void Profiler::QueueSourceCodeQuery( uint32_t id ) -{ - assert( m_exectime != 0 ); - assert( m_queryData ); - m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::SourceCode, uint64_t( m_queryData ), uint64_t( m_queryImage ), id } ); - m_queryData = nullptr; - m_queryImage = nullptr; -} - -#ifdef TRACY_HAS_CALLSTACK -void Profiler::HandleSymbolQueueItem( const SymbolQueueItem& si ) -{ - switch( si.type ) - { - case SymbolQueueItemType::CallstackFrame: - { - const auto frameData = DecodeCallstackPtr( si.ptr ); - auto data = tracy_malloc_fast( sizeof( CallstackEntry ) * frameData.size ); - memcpy( data, frameData.data, sizeof( CallstackEntry ) * frameData.size ); - TracyLfqPrepare( QueueType::CallstackFrameSize ); - MemWrite( &item->callstackFrameSizeFat.ptr, si.ptr ); - MemWrite( &item->callstackFrameSizeFat.size, frameData.size ); - MemWrite( &item->callstackFrameSizeFat.data, (uint64_t)data ); - MemWrite( &item->callstackFrameSizeFat.imageName, (uint64_t)frameData.imageName ); - TracyLfqCommit; - break; - } - case SymbolQueueItemType::SymbolQuery: - { -#ifdef __ANDROID__ - // On Android it's common for code to be in mappings that are only executable - // but not readable. - if( !EnsureReadable( si.ptr ) ) - { - TracyLfqPrepare( QueueType::AckServerQueryNoop ); - TracyLfqCommit; - break; - } -#endif - const auto sym = DecodeSymbolAddress( si.ptr ); - TracyLfqPrepare( QueueType::SymbolInformation ); - MemWrite( &item->symbolInformationFat.line, sym.line ); - MemWrite( &item->symbolInformationFat.symAddr, si.ptr ); - MemWrite( &item->symbolInformationFat.fileString, (uint64_t)sym.file ); - MemWrite( &item->symbolInformationFat.needFree, (uint8_t)sym.needFree ); - TracyLfqCommit; - break; - } -#ifdef TRACY_HAS_SYSTEM_TRACING - case SymbolQueueItemType::ExternalName: - { - const char* threadName; - const char* name; - SysTraceGetExternalName( si.ptr, threadName, name ); - TracyLfqPrepare( QueueType::ExternalNameMetadata ); - MemWrite( &item->externalNameMetadata.thread, si.ptr ); - MemWrite( &item->externalNameMetadata.name, (uint64_t)name ); - MemWrite( &item->externalNameMetadata.threadName, (uint64_t)threadName ); - TracyLfqCommit; - break; - } -#endif - case SymbolQueueItemType::KernelCode: - { -#ifdef _WIN32 - auto mod = GetKernelModulePath( si.ptr ); - if( mod ) - { - auto fn = DecodeCallstackPtrFast( si.ptr ); - if( *fn ) - { - auto hnd = LoadLibraryExA( mod, nullptr, DONT_RESOLVE_DLL_REFERENCES ); - if( hnd ) - { - auto ptr = (const void*)GetProcAddress( hnd, fn ); - if( ptr ) - { - auto buf = (char*)tracy_malloc( si.extra ); - memcpy( buf, ptr, si.extra ); - FreeLibrary( hnd ); - TracyLfqPrepare( QueueType::SymbolCodeMetadata ); - MemWrite( &item->symbolCodeMetadata.symbol, si.ptr ); - MemWrite( &item->symbolCodeMetadata.ptr, (uint64_t)buf ); - MemWrite( &item->symbolCodeMetadata.size, (uint32_t)si.extra ); - TracyLfqCommit; - break; - } - FreeLibrary( hnd ); - } - } - } -#elif defined __linux__ - void* data = m_kcore->Retrieve( si.ptr, si.extra ); - if( data ) - { - TracyLfqPrepare( QueueType::SymbolCodeMetadata ); - MemWrite( &item->symbolCodeMetadata.symbol, si.ptr ); - MemWrite( &item->symbolCodeMetadata.ptr, (uint64_t)data ); - MemWrite( &item->symbolCodeMetadata.size, (uint32_t)si.extra ); - TracyLfqCommit; - break; - } -#endif - TracyLfqPrepare( QueueType::AckSymbolCodeNotAvailable ); - TracyLfqCommit; - break; - } - case SymbolQueueItemType::SourceCode: - HandleSourceCodeQuery( (char*)si.ptr, (char*)si.extra, si.id ); - break; - default: - assert( false ); - break; - } -} - -void Profiler::SymbolWorker() -{ -#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER - s_symbolTid = syscall( SYS_gettid ); -#endif - - ThreadExitHandler threadExitHandler; - SetThreadName( "Tracy Symbol Worker" ); -#ifdef TRACY_USE_RPMALLOC - InitRpmalloc(); -#endif - InitCallstack(); - while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); - - for(;;) - { - const auto shouldExit = ShouldExit(); -#ifdef TRACY_ON_DEMAND - if( !IsConnected() ) - { - if( shouldExit ) - { - s_symbolThreadGone.store( true, std::memory_order_release ); - return; - } - while( m_symbolQueue.front() ) m_symbolQueue.pop(); - std::this_thread::sleep_for( std::chrono::milliseconds( 20 ) ); - continue; - } -#endif - auto si = m_symbolQueue.front(); - if( si ) - { - HandleSymbolQueueItem( *si ); - m_symbolQueue.pop(); - } - else - { - if( shouldExit ) - { - s_symbolThreadGone.store( true, std::memory_order_release ); - return; - } - std::this_thread::sleep_for( std::chrono::milliseconds( 20 ) ); - } - } -} -#endif - -bool Profiler::HandleServerQuery() -{ - ServerQueryPacket payload; - if( !m_sock->Read( &payload, sizeof( payload ), 10 ) ) return false; - - uint8_t type; - uint64_t ptr; - memcpy( &type, &payload.type, sizeof( payload.type ) ); - memcpy( &ptr, &payload.ptr, sizeof( payload.ptr ) ); - - switch( type ) - { - case ServerQueryString: - SendString( ptr, (const char*)ptr, QueueType::StringData ); - break; - case ServerQueryThreadString: - if( ptr == m_mainThread ) - { - SendString( ptr, "Main thread", 11, QueueType::ThreadName ); - } - else - { - auto t = GetThreadNameData( (uint32_t)ptr ); - if( t ) - { - SendString( ptr, t->name, QueueType::ThreadName ); - if( t->groupHint != 0 ) - { - TracyLfqPrepare( QueueType::ThreadGroupHint ); - MemWrite( &item->threadGroupHint.thread, (uint32_t)ptr ); - MemWrite( &item->threadGroupHint.groupHint, t->groupHint ); - TracyLfqCommit; - } - } - else - { - SendString( ptr, GetThreadName( (uint32_t)ptr ), QueueType::ThreadName ); - } - } - break; - case ServerQuerySourceLocation: - SendSourceLocation( ptr ); - break; - case ServerQueryPlotName: - SendString( ptr, (const char*)ptr, QueueType::PlotName ); - break; - case ServerQueryTerminate: - return false; - case ServerQueryCallstackFrame: - QueueCallstackFrame( ptr ); - break; - case ServerQueryFrameName: - SendString( ptr, (const char*)ptr, QueueType::FrameName ); - break; - case ServerQueryDisconnect: - HandleDisconnect(); - return false; -#ifdef TRACY_HAS_SYSTEM_TRACING - case ServerQueryExternalName: - QueueExternalName( ptr ); - break; -#endif - case ServerQueryParameter: - HandleParameter( ptr ); - break; - case ServerQuerySymbol: - QueueSymbolQuery( ptr ); - break; -#ifndef TRACY_NO_CODE_TRANSFER - case ServerQuerySymbolCode: - HandleSymbolCodeQuery( ptr, payload.extra ); - break; -#endif - case ServerQuerySourceCode: - QueueSourceCodeQuery( uint32_t( ptr ) ); - break; - case ServerQueryDataTransfer: - if( m_queryData ) - { - assert( !m_queryImage ); - m_queryImage = m_queryData; - } - m_queryDataPtr = m_queryData = (char*)tracy_malloc( ptr + 11 ); - AckServerQuery(); - break; - case ServerQueryDataTransferPart: - memcpy( m_queryDataPtr, &ptr, 8 ); - memcpy( m_queryDataPtr+8, &payload.extra, 4 ); - m_queryDataPtr += 12; - AckServerQuery(); - break; -#ifdef TRACY_FIBERS - case ServerQueryFiberName: - SendString( ptr, (const char*)ptr, QueueType::FiberName ); - break; -#endif - default: - assert( false ); - break; - } - - return true; -} - -void Profiler::HandleDisconnect() -{ - moodycamel::ConsumerToken token( GetQueue() ); - -#ifdef TRACY_HAS_SYSTEM_TRACING - if( s_sysTraceThread ) - { - auto timestamp = GetTime(); - for(;;) - { - const auto status = DequeueContextSwitches( token, timestamp ); - if( status == DequeueStatus::ConnectionLost ) - { - return; - } - else if( status == DequeueStatus::QueueEmpty ) - { - if( m_bufferOffset != m_bufferStart ) - { - if( !CommitData() ) return; - } - } - if( timestamp < 0 ) - { - if( m_bufferOffset != m_bufferStart ) - { - if( !CommitData() ) return; - } - break; - } - ClearSerial(); - if( m_sock->HasData() ) - { - while( m_sock->HasData() ) - { - if( !HandleServerQuery() ) return; - } - if( m_bufferOffset != m_bufferStart ) - { - if( !CommitData() ) return; - } - } - else - { - if( m_bufferOffset != m_bufferStart ) - { - if( !CommitData() ) return; - } - std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); - } - } - } -#endif - - QueueItem terminate; - MemWrite( &terminate.hdr.type, QueueType::Terminate ); - if( !SendData( (const char*)&terminate, 1 ) ) return; - for(;;) - { - ClearQueues( token ); - if( m_sock->HasData() ) - { - while( m_sock->HasData() ) - { - if( !HandleServerQuery() ) return; - } - if( m_bufferOffset != m_bufferStart ) - { - if( !CommitData() ) return; - } - } - else - { - if( m_bufferOffset != m_bufferStart ) - { - if( !CommitData() ) return; - } - std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); - } - } -} - -void Profiler::CalibrateTimer() -{ - m_timerMul = 1.; - -#ifdef TRACY_HW_TIMER - -# if !defined TRACY_TIMER_QPC && defined TRACY_TIMER_FALLBACK - const bool needCalibration = HardwareSupportsInvariantTSC(); -# else - const bool needCalibration = true; -# endif - if( needCalibration ) - { - std::atomic_signal_fence( std::memory_order_acq_rel ); - const auto t0 = std::chrono::high_resolution_clock::now(); - const auto r0 = GetTime(); - std::atomic_signal_fence( std::memory_order_acq_rel ); - std::this_thread::sleep_for( std::chrono::milliseconds( 200 ) ); - std::atomic_signal_fence( std::memory_order_acq_rel ); - const auto t1 = std::chrono::high_resolution_clock::now(); - const auto r1 = GetTime(); - std::atomic_signal_fence( std::memory_order_acq_rel ); - - const auto dt = std::chrono::duration_cast( t1 - t0 ).count(); - const auto dr = r1 - r0; - - m_timerMul = double( dt ) / double( dr ); - } -#endif -} - -void Profiler::CalibrateDelay() -{ - constexpr int Iterations = 50000; - - auto mindiff = std::numeric_limits::max(); - for( int i=0; i 0 && dti < mindiff ) mindiff = dti; - } - m_resolution = mindiff; - -#ifdef TRACY_DELAYED_INIT - m_delay = m_resolution; -#else - constexpr int Events = Iterations * 2; // start + end - static_assert( Events < QueuePrealloc, "Delay calibration loop will allocate memory in queue" ); - - static const tracy::SourceLocationData __tracy_source_location { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; - const auto t0 = GetTime(); - for( int i=0; izoneBegin.time, Profiler::GetTime() ); - MemWrite( &item->zoneBegin.srcloc, (uint64_t)&__tracy_source_location ); - TracyLfqCommit; - } - { - TracyLfqPrepare( QueueType::ZoneEnd ); - MemWrite( &item->zoneEnd.time, GetTime() ); - TracyLfqCommit; - } - } - const auto t1 = GetTime(); - const auto dt = t1 - t0; - m_delay = dt / Events; - - moodycamel::ConsumerToken token( GetQueue() ); - int left = Events; - while( left != 0 ) - { - const auto sz = GetQueue().try_dequeue_bulk_single( token, [](const uint64_t&){}, [](QueueItem* item, size_t sz){} ); - assert( sz > 0 ); - left -= (int)sz; - } - assert( GetQueue().size_approx() == 0 ); -#endif -} - -void Profiler::ReportTopology() -{ -#ifndef TRACY_DELAYED_INIT - struct CpuData - { - uint32_t package; - uint32_t die; - uint32_t core; - uint32_t thread; - }; - -#if defined _WIN32 -# ifdef TRACY_UWP - t_GetLogicalProcessorInformationEx _GetLogicalProcessorInformationEx = &::GetLogicalProcessorInformationEx; -# else - t_GetLogicalProcessorInformationEx _GetLogicalProcessorInformationEx = (t_GetLogicalProcessorInformationEx)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetLogicalProcessorInformationEx" ); -# endif - if( !_GetLogicalProcessorInformationEx ) return; - - SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* packageInfo = nullptr; - SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* dieInfo = nullptr; - SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* coreInfo = nullptr; - - DWORD psz = 0; - _GetLogicalProcessorInformationEx( RelationProcessorPackage, nullptr, &psz ); - if( GetLastError() == ERROR_INSUFFICIENT_BUFFER ) - { - packageInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( psz ); - auto res = _GetLogicalProcessorInformationEx( RelationProcessorPackage, packageInfo, &psz ); - assert( res ); - } - else - { - psz = 0; - } - - DWORD dsz = 0; - _GetLogicalProcessorInformationEx( RelationProcessorDie, nullptr, &dsz ); - if( GetLastError() == ERROR_INSUFFICIENT_BUFFER ) - { - dieInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( dsz ); - auto res = _GetLogicalProcessorInformationEx( RelationProcessorDie, dieInfo, &dsz ); - assert( res ); - } - else - { - dsz = 0; - } - - DWORD csz = 0; - _GetLogicalProcessorInformationEx( RelationProcessorCore, nullptr, &csz ); - if( GetLastError() == ERROR_INSUFFICIENT_BUFFER ) - { - coreInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( csz ); - auto res = _GetLogicalProcessorInformationEx( RelationProcessorCore, coreInfo, &csz ); - assert( res ); - } - else - { - csz = 0; - } - - SYSTEM_INFO sysinfo; - GetSystemInfo( &sysinfo ); - const uint32_t numcpus = sysinfo.dwNumberOfProcessors; - - auto cpuData = (CpuData*)tracy_malloc( sizeof( CpuData ) * numcpus ); - memset( cpuData, 0, sizeof( CpuData ) * numcpus ); - for( uint32_t i=0; iRelationship == RelationProcessorPackage ); - // FIXME account for GroupCount - auto mask = ptr->Processor.GroupMask[0].Mask; - int core = 0; - while( mask != 0 ) - { - if( mask & 1 ) cpuData[core].package = idx; - core++; - mask >>= 1; - } - ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size); - idx++; - } - - idx = 0; - ptr = dieInfo; - while( (char*)ptr < ((char*)dieInfo) + dsz ) - { - assert( ptr->Relationship == RelationProcessorDie ); - // FIXME account for GroupCount - auto mask = ptr->Processor.GroupMask[0].Mask; - int core = 0; - while( mask != 0 ) - { - if( mask & 1 ) cpuData[core].die = idx; - core++; - mask >>= 1; - } - ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size); - idx++; - } - - idx = 0; - ptr = coreInfo; - while( (char*)ptr < ((char*)coreInfo) + csz ) - { - assert( ptr->Relationship == RelationProcessorCore ); - // FIXME account for GroupCount - auto mask = ptr->Processor.GroupMask[0].Mask; - int core = 0; - while( mask != 0 ) - { - if( mask & 1 ) cpuData[core].core = idx; - core++; - mask >>= 1; - } - ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size); - idx++; - } - - for( uint32_t i=0; icpuTopology.package, data.package ); - MemWrite( &item->cpuTopology.die, data.die ); - MemWrite( &item->cpuTopology.core, data.core ); - MemWrite( &item->cpuTopology.thread, data.thread ); - -#ifdef TRACY_ON_DEMAND - DeferItem( *item ); -#endif - - TracyLfqCommit; - } - - tracy_free( cpuData ); - tracy_free( coreInfo ); - tracy_free( packageInfo ); -#elif defined __linux__ - const int numcpus = std::thread::hardware_concurrency(); - auto cpuData = (CpuData*)tracy_malloc( sizeof( CpuData ) * numcpus ); - memset( cpuData, 0, sizeof( CpuData ) * numcpus ); - - const char* basePath = "/sys/devices/system/cpu/cpu"; - for( int i=0; icpuTopology.package, data.package ); - MemWrite( &item->cpuTopology.die, data.die ); - MemWrite( &item->cpuTopology.core, data.core ); - MemWrite( &item->cpuTopology.thread, data.thread ); - -#ifdef TRACY_ON_DEMAND - DeferItem( *item ); -#endif - - TracyLfqCommit; - } - - tracy_free( cpuData ); -#endif -#endif -} - -void Profiler::SendCallstack( int32_t depth, const char* skipBefore ) -{ -#ifdef TRACY_HAS_CALLSTACK - auto ptr = Callstack( depth ); - CutCallstack( ptr, skipBefore ); - - TracyQueuePrepare( QueueType::Callstack ); - MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); - TracyQueueCommit( callstackFatThread ); -#endif -} - -void Profiler::CutCallstack( void* callstack, const char* skipBefore ) -{ -#ifdef TRACY_HAS_CALLSTACK - auto data = (uintptr_t*)callstack; - const auto sz = *data++; - uintptr_t i; - for( i=0; i 100000000 ) // 100 ms - { - auto sysTime = m_sysTime.Get(); - if( sysTime >= 0 ) - { - m_sysTimeLast = t; - - TracyLfqPrepare( QueueType::SysTimeReport ); - MemWrite( &item->sysTime.time, GetTime() ); - MemWrite( &item->sysTime.sysTime, sysTime ); - TracyLfqCommit; - } - } -} -#endif - -void Profiler::HandleParameter( uint64_t payload ) -{ - assert( m_paramCallback ); - const auto idx = uint32_t( payload >> 32 ); - const auto val = int32_t( payload & 0xFFFFFFFF ); - m_paramCallback( m_paramCallbackData, idx, val ); - AckServerQuery(); -} - -void Profiler::HandleSymbolCodeQuery( uint64_t symbol, uint32_t size ) -{ - if( symbol >> 63 != 0 ) - { - QueueKernelCode( symbol, size ); - } - else - { - auto&& lambda = [ this, symbol ]( const char* buf, size_t size ) { - SendLongString( symbol, buf, size, QueueType::SymbolCode ); - }; - - // 'symbol' may have come from a module that has since unloaded, perform a safe copy before sending - if( !WithSafeCopy( (const char*)symbol, size, lambda ) ) AckSymbolCodeNotAvailable(); - } -} - -void Profiler::HandleSourceCodeQuery( char* data, char* image, uint32_t id ) -{ - bool ok = false; - FILE* f = fopen( data, "rb" ); - if( f ) - { - struct stat st; - if( fstat( fileno( f ), &st ) == 0 && (uint64_t)st.st_mtime < m_exectime && st.st_size < ( TargetFrameSize - 16 ) ) - { - auto ptr = (char*)tracy_malloc_fast( st.st_size ); - auto rd = fread( ptr, 1, st.st_size, f ); - if( rd == (size_t)st.st_size ) - { - TracyLfqPrepare( QueueType::SourceCodeMetadata ); - MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr ); - MemWrite( &item->sourceCodeMetadata.size, (uint32_t)rd ); - MemWrite( &item->sourceCodeMetadata.id, id ); - TracyLfqCommit; - ok = true; - } - else - { - tracy_free_fast( ptr ); - } - } - fclose( f ); - } - -#ifdef TRACY_DEBUGINFOD - else if( image && data[0] == '/' ) - { - size_t size; - auto buildid = GetBuildIdForImage( image, size ); - if( buildid ) - { - auto d = debuginfod_find_source( GetDebuginfodClient(), buildid, size, data, nullptr ); - TracyDebug( "DebugInfo source query: %s, fn: %s, image: %s\n", d >= 0 ? " ok " : "fail", data, image ); - if( d >= 0 ) - { - struct stat st; - fstat( d, &st ); - if( st.st_size < ( TargetFrameSize - 16 ) ) - { - lseek( d, 0, SEEK_SET ); - auto ptr = (char*)tracy_malloc_fast( st.st_size ); - auto rd = read( d, ptr, st.st_size ); - if( rd == (size_t)st.st_size ) - { - TracyLfqPrepare( QueueType::SourceCodeMetadata ); - MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr ); - MemWrite( &item->sourceCodeMetadata.size, (uint32_t)rd ); - MemWrite( &item->sourceCodeMetadata.id, id ); - TracyLfqCommit; - ok = true; - } - else - { - tracy_free_fast( ptr ); - } - } - close( d ); - } - } - } - else - { - TracyDebug( "DebugInfo invalid query fn: %s, image: %s\n", data, image ); - } -#endif - - if( !ok && m_sourceCallback ) - { - size_t sz; - char* ptr = m_sourceCallback( m_sourceCallbackData, data, sz ); - if( ptr ) - { - if( sz < ( TargetFrameSize - 16 ) ) - { - TracyLfqPrepare( QueueType::SourceCodeMetadata ); - MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr ); - MemWrite( &item->sourceCodeMetadata.size, (uint32_t)sz ); - MemWrite( &item->sourceCodeMetadata.id, id ); - TracyLfqCommit; - ok = true; - } - else - { - tracy_free_fast( ptr ); - } - } - } - - if( !ok ) - { - TracyLfqPrepare( QueueType::AckSourceCodeNotAvailable ); - MemWrite( &item->sourceCodeNotAvailable, id ); - TracyLfqCommit; - } - - tracy_free_fast( data ); - tracy_free_fast( image ); -} - -#if defined _WIN32 && defined TRACY_TIMER_QPC -int64_t Profiler::GetTimeQpc() -{ - LARGE_INTEGER t; - QueryPerformanceCounter( &t ); - return t.QuadPart; -} -#endif - -} - -#ifdef __cplusplus -extern "C" { -#endif - -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int32_t active ) -{ - ___tracy_c_zone_context ctx; -#ifdef TRACY_ON_DEMAND - ctx.active = active && tracy::GetProfiler().IsConnected(); -#else - ctx.active = active; -#endif - if( !ctx.active ) return ctx; - const auto id = tracy::GetProfiler().GetNextZoneId(); - ctx.id = id; - -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - { - TracyQueuePrepareC( tracy::QueueType::ZoneBegin ); - tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); - TracyQueueCommitC( zoneBeginThread ); - } - return ctx; -} - -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int32_t depth, int32_t active ) -{ - ___tracy_c_zone_context ctx; -#ifdef TRACY_ON_DEMAND - ctx.active = active && tracy::GetProfiler().IsConnected(); -#else - ctx.active = active; -#endif - if( !ctx.active ) return ctx; - const auto id = tracy::GetProfiler().GetNextZoneId(); - ctx.id = id; - -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - auto zoneQueue = tracy::QueueType::ZoneBegin; - if( depth > 0 && tracy::has_callstack() ) - { - tracy::GetProfiler().SendCallstack( depth ); - zoneQueue = tracy::QueueType::ZoneBeginCallstack; - } - TracyQueuePrepareC( zoneQueue ); - tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); - TracyQueueCommitC( zoneBeginThread ); - - return ctx; -} - -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int32_t active ) -{ - ___tracy_c_zone_context ctx; -#ifdef TRACY_ON_DEMAND - ctx.active = active && tracy::GetProfiler().IsConnected(); -#else - ctx.active = active; -#endif - if( !ctx.active ) - { - tracy::tracy_free( (void*)srcloc ); - return ctx; - } - const auto id = tracy::GetProfiler().GetNextZoneId(); - ctx.id = id; - -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - { - TracyQueuePrepareC( tracy::QueueType::ZoneBeginAllocSrcLoc ); - tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->zoneBegin.srcloc, srcloc ); - TracyQueueCommitC( zoneBeginThread ); - } - return ctx; -} - -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int32_t depth, int32_t active ) -{ - ___tracy_c_zone_context ctx; -#ifdef TRACY_ON_DEMAND - ctx.active = active && tracy::GetProfiler().IsConnected(); -#else - ctx.active = active; -#endif - if( !ctx.active ) - { - tracy::tracy_free( (void*)srcloc ); - return ctx; - } - const auto id = tracy::GetProfiler().GetNextZoneId(); - ctx.id = id; - -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - auto zoneQueue = tracy::QueueType::ZoneBeginAllocSrcLoc; - if( depth > 0 && tracy::has_callstack() ) - { - tracy::GetProfiler().SendCallstack( depth ); - zoneQueue = tracy::QueueType::ZoneBeginAllocSrcLocCallstack; - } - TracyQueuePrepareC( zoneQueue ); - tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->zoneBegin.srcloc, srcloc ); - TracyQueueCommitC( zoneBeginThread ); - - return ctx; -} - -TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx ) -{ - if( !ctx.active ) return; -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, ctx.id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - { - TracyQueuePrepareC( tracy::QueueType::ZoneEnd ); - tracy::MemWrite( &item->zoneEnd.time, tracy::Profiler::GetTime() ); - TracyQueueCommitC( zoneEndThread ); - } -} - -TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size ) -{ - assert( size < std::numeric_limits::max() ); - if( !ctx.active ) return; - auto ptr = (char*)tracy::tracy_malloc( size ); - memcpy( ptr, txt, size ); -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, ctx.id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - { - TracyQueuePrepareC( tracy::QueueType::ZoneText ); - tracy::MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); - tracy::MemWrite( &item->zoneTextFat.size, (uint16_t)size ); - TracyQueueCommitC( zoneTextFatThread ); - } -} - -TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size ) -{ - assert( size < std::numeric_limits::max() ); - if( !ctx.active ) return; - auto ptr = (char*)tracy::tracy_malloc( size ); - memcpy( ptr, txt, size ); -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, ctx.id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - { - TracyQueuePrepareC( tracy::QueueType::ZoneName ); - tracy::MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); - tracy::MemWrite( &item->zoneTextFat.size, (uint16_t)size ); - TracyQueueCommitC( zoneTextFatThread ); - } -} - -TRACY_API void ___tracy_emit_zone_color( TracyCZoneCtx ctx, uint32_t color ) { - if( !ctx.active ) return; -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, ctx.id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - { - TracyQueuePrepareC( tracy::QueueType::ZoneColor ); - tracy::MemWrite( &item->zoneColor.b, uint8_t( ( color ) & 0xFF ) ); - tracy::MemWrite( &item->zoneColor.g, uint8_t( ( color >> 8 ) & 0xFF ) ); - tracy::MemWrite( &item->zoneColor.r, uint8_t( ( color >> 16 ) & 0xFF ) ); - TracyQueueCommitC( zoneColorThread ); - } -} - -TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value ) -{ - if( !ctx.active ) return; -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, ctx.id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - { - TracyQueuePrepareC( tracy::QueueType::ZoneValue ); - tracy::MemWrite( &item->zoneValue.value, value ); - TracyQueueCommitC( zoneValueThread ); - } -} - -TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int32_t secure ) { tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); } -TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int32_t depth, int32_t secure ) -{ - if( depth > 0 && tracy::has_callstack() ) - { - tracy::Profiler::MemAllocCallstack( ptr, size, depth, secure != 0 ); - } - else - { - tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); - } -} -TRACY_API void ___tracy_emit_memory_free( const void* ptr, int32_t secure ) { tracy::Profiler::MemFree( ptr, secure != 0 ); } -TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int32_t depth, int32_t secure ) -{ - if( depth > 0 && tracy::has_callstack() ) - { - tracy::Profiler::MemFreeCallstack( ptr, depth, secure != 0 ); - } - else - { - tracy::Profiler::MemFree( ptr, secure != 0 ); - } -} -TRACY_API void ___tracy_emit_memory_discard( const char* name, int32_t secure ) { tracy::Profiler::MemDiscard( name, secure != 0 ); } -TRACY_API void ___tracy_emit_memory_discard_callstack( const char* name, int32_t secure, int32_t depth ) -{ - if( depth > 0 && tracy::has_callstack() ) - { - tracy::Profiler::MemDiscardCallstack( name, secure != 0, depth ); - } - else - { - tracy::Profiler::MemDiscard( name, secure != 0 ); - } -} -TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int32_t secure, const char* name ) { tracy::Profiler::MemAllocNamed( ptr, size, secure != 0, name ); } -TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int32_t depth, int32_t secure, const char* name ) -{ - if( depth > 0 && tracy::has_callstack() ) - { - tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, secure != 0, name ); - } - else - { - tracy::Profiler::MemAllocNamed( ptr, size, secure != 0, name ); - } -} -TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int32_t secure, const char* name ) { tracy::Profiler::MemFreeNamed( ptr, secure != 0, name ); } -TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int32_t depth, int32_t secure, const char* name ) -{ - if( depth > 0 && tracy::has_callstack() ) - { - tracy::Profiler::MemFreeCallstackNamed( ptr, depth, secure != 0, name ); - } - else - { - tracy::Profiler::MemFreeNamed( ptr, secure != 0, name ); - } -} -TRACY_API void ___tracy_emit_frame_mark( const char* name ) { tracy::Profiler::SendFrameMark( name ); } -TRACY_API void ___tracy_emit_frame_mark_start( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart ); } -TRACY_API void ___tracy_emit_frame_mark_end( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd ); } -TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int32_t flip ) { tracy::Profiler::SendFrameImage( image, w, h, offset, flip != 0 ); } -TRACY_API void ___tracy_emit_plot( const char* name, double val ) { tracy::Profiler::PlotData( name, val ); } -TRACY_API void ___tracy_emit_plot_float( const char* name, float val ) { tracy::Profiler::PlotData( name, val ); } -TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val ) { tracy::Profiler::PlotData( name, val ); } -TRACY_API void ___tracy_emit_plot_config( const char* name, int32_t type, int32_t step, int32_t fill, uint32_t color ) { tracy::Profiler::ConfigurePlot( name, tracy::PlotFormatType(type), step != 0, fill != 0, color ); } -TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int32_t callstack_depth ) { tracy::Profiler::Message( txt, size, callstack_depth ); } -TRACY_API void ___tracy_emit_messageL( const char* txt, int32_t callstack_depth ) { tracy::Profiler::Message( txt, callstack_depth ); } -TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int32_t callstack_depth ) { tracy::Profiler::MessageColor( txt, size, color, callstack_depth ); } -TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int32_t callstack_depth ) { tracy::Profiler::MessageColor( txt, color, callstack_depth ); } -TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ) { tracy::Profiler::MessageAppInfo( txt, size ); } - -TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, uint32_t color ) { - return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, color ); -} - -TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color ) { - return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color ); -} - -TRACY_API void ___tracy_emit_gpu_zone_begin( const struct ___tracy_gpu_zone_begin_data data ) -{ - TracyLfqPrepareC( tracy::QueueType::GpuZoneBegin ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_zone_begin_callstack( const struct ___tracy_gpu_zone_begin_callstack_data data ) -{ - tracy::GetProfiler().SendCallstack( data.depth ); - TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginCallstack ); - tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_zone_begin_alloc( const struct ___tracy_gpu_zone_begin_data data ) -{ - TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginAllocSrcLoc ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack( const struct ___tracy_gpu_zone_begin_callstack_data data ) -{ - tracy::GetProfiler().SendCallstack( data.depth ); - TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginAllocSrcLocCallstack ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_time( const struct ___tracy_gpu_time_data data ) -{ - TracyLfqPrepareC( tracy::QueueType::GpuTime ); - tracy::MemWrite( &item->gpuTime.gpuTime, data.gpuTime ); - tracy::MemWrite( &item->gpuTime.queryId, data.queryId ); - tracy::MemWrite( &item->gpuTime.context, data.context ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_zone_end( const struct ___tracy_gpu_zone_end_data data ) -{ - TracyLfqPrepareC( tracy::QueueType::GpuZoneEnd ); - tracy::MemWrite( &item->gpuZoneEnd.cpuTime, tracy::Profiler::GetTime() ); - memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) ); - tracy::MemWrite( &item->gpuZoneEnd.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneEnd.context, data.context ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_new_context( ___tracy_gpu_new_context_data data ) -{ - TracyLfqPrepareC( tracy::QueueType::GpuNewContext ); - tracy::MemWrite( &item->gpuNewContext.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuNewContext.gpuTime, data.gpuTime ); - tracy::MemWrite( &item->gpuNewContext.period, data.period ); - tracy::MemWrite( &item->gpuNewContext.context, data.context ); - tracy::MemWrite( &item->gpuNewContext.flags, data.flags ); - tracy::MemWrite( &item->gpuNewContext.type, data.type ); - -#ifdef TRACY_ON_DEMAND - tracy::GetProfiler().DeferItem( *item ); -#endif - - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_context_name( const struct ___tracy_gpu_context_name_data data ) -{ - auto ptr = (char*)tracy::tracy_malloc( data.len ); - memcpy( ptr, data.name, data.len ); - - TracyLfqPrepareC( tracy::QueueType::GpuContextName ); - tracy::MemWrite( &item->gpuContextNameFat.context, data.context ); - tracy::MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); - tracy::MemWrite( &item->gpuContextNameFat.size, data.len ); - -#ifdef TRACY_ON_DEMAND - tracy::GetProfiler().DeferItem( *item ); -#endif - - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_calibration( const struct ___tracy_gpu_calibration_data data ) -{ - TracyLfqPrepareC( tracy::QueueType::GpuCalibration ); - tracy::MemWrite( &item->gpuCalibration.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuCalibration.gpuTime, data.gpuTime ); - tracy::MemWrite( &item->gpuCalibration.cpuDelta, data.cpuDelta ); - tracy::MemWrite( &item->gpuCalibration.context, data.context ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_time_sync( const struct ___tracy_gpu_time_sync_data data ) -{ - TracyLfqPrepareC( tracy::QueueType::GpuTimeSync ); - tracy::MemWrite( &item->gpuTimeSync.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuTimeSync.gpuTime, data.gpuTime ); - tracy::MemWrite( &item->gpuTimeSync.context, data.context ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_zone_begin_serial( const struct ___tracy_gpu_zone_begin_data data ) -{ - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginSerial ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_zone_begin_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data data ) -{ - auto item = tracy::Profiler::QueueSerialCallstack( tracy::Callstack( data.depth ) ); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginCallstackSerial ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_serial( const struct ___tracy_gpu_zone_begin_data data ) -{ - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginAllocSrcLocSerial ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data data ) -{ - auto item = tracy::Profiler::QueueSerialCallstack( tracy::Callstack( data.depth ) ); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginAllocSrcLocCallstackSerial ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_time_serial( const struct ___tracy_gpu_time_data data ) -{ - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTime ); - tracy::MemWrite( &item->gpuTime.gpuTime, data.gpuTime ); - tracy::MemWrite( &item->gpuTime.queryId, data.queryId ); - tracy::MemWrite( &item->gpuTime.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_zone_end_serial( const struct ___tracy_gpu_zone_end_data data ) -{ - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneEndSerial ); - tracy::MemWrite( &item->gpuZoneEnd.cpuTime, tracy::Profiler::GetTime() ); - memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) ); - tracy::MemWrite( &item->gpuZoneEnd.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneEnd.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_new_context_serial( ___tracy_gpu_new_context_data data ) -{ - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuNewContext ); - tracy::MemWrite( &item->gpuNewContext.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuNewContext.gpuTime, data.gpuTime ); - tracy::MemWrite( &item->gpuNewContext.period, data.period ); - tracy::MemWrite( &item->gpuNewContext.context, data.context ); - tracy::MemWrite( &item->gpuNewContext.flags, data.flags ); - tracy::MemWrite( &item->gpuNewContext.type, data.type ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_context_name_serial( const struct ___tracy_gpu_context_name_data data ) -{ - auto ptr = (char*)tracy::tracy_malloc( data.len ); - memcpy( ptr, data.name, data.len ); - - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuContextName ); - tracy::MemWrite( &item->gpuContextNameFat.context, data.context ); - tracy::MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); - tracy::MemWrite( &item->gpuContextNameFat.size, data.len ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_calibration_data data ) -{ - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuCalibration ); - tracy::MemWrite( &item->gpuCalibration.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuCalibration.gpuTime, data.gpuTime ); - tracy::MemWrite( &item->gpuCalibration.cpuDelta, data.cpuDelta ); - tracy::MemWrite( &item->gpuCalibration.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_time_sync_serial( const struct ___tracy_gpu_time_sync_data data ) -{ - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTimeSync ); - tracy::MemWrite( &item->gpuTimeSync.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuTimeSync.gpuTime, data.gpuTime ); - tracy::MemWrite( &item->gpuTimeSync.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -/* ========================== * - * Lock - * ========================== */ - -TRACY_API struct TracyCLockCtx *___tracy_announce_lockable_ctx( const struct ___tracy_source_location_data* srcloc ) -{ - static_assert(sizeof(struct ___tracy_source_location_data) == sizeof(tracy::SourceLocationData)); // C srcloc struct must fit in cpp srcloc struct - tracy::LockableCtx *ctx = (tracy::LockableCtx *)tracy::tracy_malloc(sizeof(tracy::LockableCtx)); - new(ctx) tracy::LockableCtx((tracy::SourceLocationData *)srcloc); - return (struct TracyCLockCtx *)ctx; -} - -TRACY_API void ___tracy_terminate_lockable_ctx( struct TracyCLockCtx *lockdata ) -{ - tracy::LockableCtx *ctx = (tracy::LockableCtx *)lockdata; - ctx->~LockableCtx(); - tracy::tracy_free((void*)lockdata); -} - -TRACY_API int32_t ___tracy_before_lock_lockable_ctx( struct TracyCLockCtx *lockdata ) -{ - tracy::LockableCtx *ctx = (tracy::LockableCtx *)lockdata; - return ctx->BeforeLock(); -} - -TRACY_API void ___tracy_after_lock_lockable_ctx( struct TracyCLockCtx *lockdata ) -{ - tracy::LockableCtx *ctx = (tracy::LockableCtx *)lockdata; - ctx->AfterLock(); -} - -TRACY_API void ___tracy_after_unlock_lockable_ctx( struct TracyCLockCtx *lockdata ) -{ - tracy::LockableCtx *ctx = (tracy::LockableCtx *)lockdata; - ctx->AfterUnlock(); -} - -TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct TracyCLockCtx *lockdata, int32_t acquired ) -{ - tracy::LockableCtx *ctx = (tracy::LockableCtx *)lockdata; - ctx->AfterTryLock(acquired); -} - -TRACY_API void ___tracy_mark_lockable_ctx(struct TracyCLockCtx *lockdata, const struct ___tracy_source_location_data *srcloc) -{ - tracy::LockableCtx *ctx = (tracy::LockableCtx *)lockdata; - ctx->Mark((tracy::SourceLocationData *)srcloc); -} - -TRACY_API void ___tracy_custom_name_lockable_ctx(struct TracyCLockCtx *lockdata, const char *name, size_t nameSz) -{ - tracy::LockableCtx *ctx = (tracy::LockableCtx *)lockdata; - ctx->CustomName(name, nameSz); -} - -/* ========================== * - * Shared lock - * ========================== */ - -TRACY_API struct TracyCSharedLockCtx *___tracy_announce_shared_lockable_ctx(const struct ___tracy_source_location_data *srcloc) -{ - static_assert(sizeof(struct ___tracy_source_location_data) == sizeof(tracy::SourceLocationData)); // C srcloc struct must fit in cpp srcloc struct - tracy::SharedLockableCtx *ctx = (tracy::SharedLockableCtx *)tracy::tracy_malloc(sizeof(tracy::SharedLockableCtx)); - new(ctx) tracy::SharedLockableCtx((tracy::SourceLocationData *)srcloc); - return (struct TracyCSharedLockCtx *)ctx; -} - -TRACY_API void ___tracy_terminate_shared_lockable_ctx(struct TracyCSharedLockCtx *lockdata) -{ - tracy::SharedLockableCtx *ctx = (tracy::SharedLockableCtx *)lockdata; - ctx->~SharedLockableCtx(); - tracy::tracy_free((void *)lockdata); -} - -TRACY_API int32_t ___tracy_before_exclusive_lock_shared_lockable_ctx(struct TracyCSharedLockCtx *lockdata) -{ - tracy::SharedLockableCtx *ctx = (tracy::SharedLockableCtx *)lockdata; - return ctx->BeforeLock(); -} - -TRACY_API void ___tracy_after_exclusive_lock_shared_lockable_ctx(struct TracyCSharedLockCtx *lockdata) -{ - tracy::SharedLockableCtx *ctx = (tracy::SharedLockableCtx *)lockdata; - ctx->AfterLock(); -} - -TRACY_API void ___tracy_after_exclusive_unlock_shared_lockable_ctx(struct TracyCSharedLockCtx *lockdata) -{ - tracy::SharedLockableCtx *ctx = (tracy::SharedLockableCtx *)lockdata; - ctx->AfterUnlock(); -} - -TRACY_API void ___tracy_after_try_exclusive_lock_shared_lockable_ctx(struct TracyCSharedLockCtx *lockdata, int32_t acquired) -{ - tracy::SharedLockableCtx *ctx = (tracy::SharedLockableCtx *)lockdata; - ctx->AfterTryLock(acquired); -} - -TRACY_API int32_t ___tracy_before_shared_lock_shared_lockable_ctx(struct TracyCSharedLockCtx *lockdata) -{ - tracy::SharedLockableCtx *ctx = (tracy::SharedLockableCtx *)lockdata; - return ctx->BeforeLockShared(); -} - -TRACY_API void ___tracy_after_shared_lock_shared_lockable_ctx(struct TracyCSharedLockCtx *lockdata) -{ - tracy::SharedLockableCtx *ctx = (tracy::SharedLockableCtx *)lockdata; - ctx->AfterLockShared(); -} - -TRACY_API void ___tracy_after_try_shared_lock_shared_lockable_ctx(struct TracyCSharedLockCtx *lockdata, int32_t acquired) -{ - tracy::SharedLockableCtx *ctx = (tracy::SharedLockableCtx *)lockdata; - ctx->AfterTryLockShared(acquired); -} - -TRACY_API void ___tracy_after_shared_unlock_shared_lockable_ctx(struct TracyCSharedLockCtx *lockdata) -{ - tracy::SharedLockableCtx *ctx = (tracy::SharedLockableCtx *)lockdata; - ctx->AfterUnlockShared(); -} - -TRACY_API void ___tracy_mark_shared_lockable_ctx(struct TracyCSharedLockCtx *lockdata, const struct ___tracy_source_location_data *srcloc) -{ - tracy::SharedLockableCtx *ctx = (tracy::SharedLockableCtx *)lockdata; - ctx->Mark((tracy::SourceLocationData *)srcloc); -} - -TRACY_API void ___tracy_custom_name_shared_lockable_ctx(struct TracyCSharedLockCtx *lockdata, const char *name, size_t nameSz) -{ - tracy::SharedLockableCtx *ctx = (tracy::SharedLockableCtx *)lockdata; - ctx->CustomName(name, nameSz); -} - -/* ========================== * - * D3D11 - * ========================== */ - -// Compilation error if D3D11_NO_HELPERS not defined -#define D3D11_NO_HELPERS -# include "../tracy/TracyD3D11.hpp" -#undef D3D11_NO_HELPERS - -TRACY_API struct TracyCD3D11Ctx *___tracy_d3d11_context_announce(struct ID3D11Device *device, struct ID3D11DeviceContext *devicectx, char *name, int name_size) -{ - tracy::D3D11Ctx *ctx = tracy::CreateD3D11Context(device, devicectx); - TracyD3D11ContextName(ctx, name, name_size) ctx->Name(name, name_size); - return (struct TracyCD3D11Ctx *)ctx; -} - -TRACY_API void ___tracy_d3d11_context_terminate(struct TracyCD3D11Ctx *d3d11_ctx) -{ - tracy::D3D11Ctx *ctx = (tracy::D3D11Ctx *)d3d11_ctx; - tracy::DestroyD3D11Context(ctx); -} - -TRACY_API void ___tracy_d3d11_context_collect(struct TracyCD3D11Ctx *d3d11_ctx) -{ - tracy::D3D11Ctx *ctx = (tracy::D3D11Ctx *)d3d11_ctx; - TracyD3D11Collect(ctx); -} - -TRACY_API void ___tracy_d3d11_emit_zone_begin(struct TracyCD3D11Ctx *d3d11_ctx, TracyCD3D11ZoneCtx *zone_ctx, struct ___tracy_source_location_data *srcloc, int32_t active) -{ - tracy::D3D11Ctx *ctx = (tracy::D3D11Ctx *)d3d11_ctx; - - // Cpp zone class must fit in C zone struct - static_assert(sizeof(TracyCD3D11ZoneCtx) == sizeof(tracy::D3D11ZoneScope)); - static_assert(alignof(TracyCD3D11ZoneCtx) == alignof(tracy::D3D11ZoneScope)); - -#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK - new(&zone_ctx->opaque) tracy::D3D11ZoneScope(ctx, (tracy::SourceLocationData *)srcloc, active); -#else - static_assert(TRACY_CALLSTACK == 0); - new(&zone_ctx->opaque) tracy::D3D11ZoneScope(ctx, (tracy::SourceLocationData *)srcloc, TRACY_CALLSTACK, active); -#endif -} - -TRACY_API void ___tracy_d3d11_emit_zone_end(TracyCD3D11ZoneCtx zone_ctx) -{ - tracy::D3D11ZoneScope *ctx = (tracy::D3D11ZoneScope *)&zone_ctx.opaque; - ctx->~D3D11ZoneScope(); -} - -/* ========================== * - * Other - * ========================== */ - -TRACY_API int32_t ___tracy_connected( void ) -{ - return static_cast( tracy::GetProfiler().IsConnected() ); -} - -#ifdef TRACY_FIBERS -TRACY_API void ___tracy_fiber_enter( const char* fiber ){ tracy::Profiler::EnterFiber( fiber, 0 ); } -TRACY_API void ___tracy_fiber_leave( void ){ tracy::Profiler::LeaveFiber(); } -#endif - -# if defined TRACY_MANUAL_LIFETIME && defined TRACY_DELAYED_INIT -TRACY_API void ___tracy_startup_profiler( void ) -{ - tracy::StartupProfiler(); -} - -TRACY_API void ___tracy_shutdown_profiler( void ) -{ - tracy::ShutdownProfiler(); -} - -TRACY_API int32_t ___tracy_profiler_started( void ) -{ - return static_cast( tracy::s_isProfilerStarted.load( std::memory_order_seq_cst ) ); -} -# endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/third_party/tracy/client/TracyProfiler.hpp b/src/third_party/tracy/client/TracyProfiler.hpp deleted file mode 100644 index 8d169058..00000000 --- a/src/third_party/tracy/client/TracyProfiler.hpp +++ /dev/null @@ -1,1089 +0,0 @@ -#ifndef __TRACYPROFILER_HPP__ -#define __TRACYPROFILER_HPP__ - -#include -#include -#include -#include -#include - -#include "tracy_concurrentqueue.h" -#include "tracy_SPSCQueue.h" -#include "TracyCallstack.hpp" -#include "TracyKCore.hpp" -#include "TracySysPower.hpp" -#include "TracySysTime.hpp" -#include "TracyFastVector.hpp" -#include "../common/TracyQueue.hpp" -#include "../common/TracyAlign.hpp" -#include "../common/TracyAlloc.hpp" -#include "../common/TracyMutex.hpp" -#include "../common/TracyProtocol.hpp" - -#if defined _WIN32 -# include -#endif -#ifdef __APPLE__ -# include -# include -#endif - -#if ( (defined _WIN32 && !(defined _M_ARM64 || defined _M_ARM)) || ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) || ( defined TARGET_OS_IOS && TARGET_OS_IOS == 1 ) ) -# define TRACY_HW_TIMER -#endif - -#ifdef __linux__ -# include -#endif - -#if defined TRACY_TIMER_FALLBACK || !defined TRACY_HW_TIMER -# include -#endif - -#ifndef TracyConcat -# define TracyConcat(x,y) TracyConcatIndirect(x,y) -#endif -#ifndef TracyConcatIndirect -# define TracyConcatIndirect(x,y) x##y -#endif - -namespace tracy -{ -#if defined(TRACY_DELAYED_INIT) && defined(TRACY_MANUAL_LIFETIME) -TRACY_API void StartupProfiler(); -TRACY_API void ShutdownProfiler(); -TRACY_API bool IsProfilerStarted(); -# define TracyIsStarted tracy::IsProfilerStarted() -#else -# define TracyIsStarted true -#endif - -class GpuCtx; -class Profiler; -class Socket; -class UdpBroadcast; - -struct GpuCtxWrapper -{ - GpuCtx* ptr; -}; - -TRACY_API moodycamel::ConcurrentQueue::ExplicitProducer* GetToken(); -TRACY_API Profiler& GetProfiler(); -TRACY_API std::atomic& GetLockCounter(); -TRACY_API std::atomic& GetGpuCtxCounter(); -TRACY_API GpuCtxWrapper& GetGpuCtx(); -TRACY_API uint32_t GetThreadHandle(); -TRACY_API bool ProfilerAvailable(); -TRACY_API bool ProfilerAllocatorAvailable(); -TRACY_API int64_t GetFrequencyQpc(); - -#if defined TRACY_TIMER_FALLBACK && defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) -TRACY_API bool HardwareSupportsInvariantTSC(); // check, if we need fallback scenario -#else -# if defined TRACY_HW_TIMER -tracy_force_inline bool HardwareSupportsInvariantTSC() -{ - return true; // this is checked at startup -} -# else -tracy_force_inline bool HardwareSupportsInvariantTSC() -{ - return false; -} -# endif -#endif - - -struct SourceLocationData -{ - const char* name; - const char* function; - const char* file; - uint32_t line; - uint32_t color; -}; - -#ifdef TRACY_ON_DEMAND -struct LuaZoneState -{ - uint32_t counter; - bool active; -}; -#endif - - -#define TracyLfqPrepare( _type ) \ - tracy::moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \ - auto __token = tracy::GetToken(); \ - auto& __tail = __token->get_tail_index(); \ - auto item = __token->enqueue_begin( __magic ); \ - tracy::MemWrite( &item->hdr.type, _type ); - -#define TracyLfqCommit \ - __tail.store( __magic + 1, std::memory_order_release ); - -#define TracyLfqPrepareC( _type ) \ - tracy::moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \ - auto __token = tracy::GetToken(); \ - auto& __tail = __token->get_tail_index(); \ - auto item = __token->enqueue_begin( __magic ); \ - tracy::MemWrite( &item->hdr.type, _type ); - -#define TracyLfqCommitC \ - __tail.store( __magic + 1, std::memory_order_release ); - - -#ifdef TRACY_FIBERS -# define TracyQueuePrepare( _type ) \ - auto item = tracy::Profiler::QueueSerial(); \ - tracy::MemWrite( &item->hdr.type, _type ); -# define TracyQueueCommit( _name ) \ - tracy::MemWrite( &item->_name.thread, tracy::GetThreadHandle() ); \ - tracy::Profiler::QueueSerialFinish(); -# define TracyQueuePrepareC( _type ) \ - auto item = tracy::Profiler::QueueSerial(); \ - tracy::MemWrite( &item->hdr.type, _type ); -# define TracyQueueCommitC( _name ) \ - tracy::MemWrite( &item->_name.thread, tracy::GetThreadHandle() ); \ - tracy::Profiler::QueueSerialFinish(); -#else -# define TracyQueuePrepare( _type ) TracyLfqPrepare( _type ) -# define TracyQueueCommit( _name ) TracyLfqCommit -# define TracyQueuePrepareC( _type ) TracyLfqPrepareC( _type ) -# define TracyQueueCommitC( _name ) TracyLfqCommitC -#endif - - -typedef void(*ParameterCallback)( void* data, uint32_t idx, int32_t val ); -typedef char*(*SourceContentsCallback)( void* data, const char* filename, size_t& size ); - -class Profiler -{ - struct FrameImageQueueItem - { - void* image; - uint32_t frame; - uint16_t w; - uint16_t h; - bool flip; - }; - - enum class SymbolQueueItemType - { - CallstackFrame, - SymbolQuery, - ExternalName, - KernelCode, - SourceCode - }; - - struct SymbolQueueItem - { - SymbolQueueItemType type; - uint64_t ptr; - uint64_t extra; - uint32_t id; - }; - -public: - Profiler(); - ~Profiler(); - - void SpawnWorkerThreads(); - - static tracy_force_inline int64_t GetTime() - { -#ifdef TRACY_HW_TIMER -# if defined TARGET_OS_IOS && TARGET_OS_IOS == 1 - if( HardwareSupportsInvariantTSC() ) return mach_absolute_time(); -# elif defined _WIN32 -# ifdef TRACY_TIMER_QPC - return GetTimeQpc(); -# else - if( HardwareSupportsInvariantTSC() ) return int64_t( __rdtsc() ); -# endif -# elif defined __i386 || defined _M_IX86 - if( HardwareSupportsInvariantTSC() ) - { - uint32_t eax, edx; - asm volatile ( "rdtsc" : "=a" (eax), "=d" (edx) ); - return ( uint64_t( edx ) << 32 ) + uint64_t( eax ); - } -# elif defined __x86_64__ || defined _M_X64 - if( HardwareSupportsInvariantTSC() ) - { - uint64_t rax, rdx; -#ifdef TRACY_PATCHABLE_NOPSLEDS - // Some external tooling (such as rr) wants to patch our rdtsc and replace it by a - // branch to control the external input seen by a program. This kind of patching is - // not generally possible depending on the surrounding code and can lead to significant - // slowdowns if the compiler generated unlucky code and rr and tracy are used together. - // To avoid this, use the rr-safe `nopl 0(%rax, %rax, 1); rdtsc` instruction sequence, - // which rr promises will be patchable independent of the surrounding code. - asm volatile ( - // This is nopl 0(%rax, %rax, 1), but assemblers are inconsistent about whether - // they emit that as a 4 or 5 byte sequence and we need to be guaranteed to use - // the 5 byte one. - ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00\n\t" - "rdtsc" : "=a" (rax), "=d" (rdx) ); -#else - asm volatile ( "rdtsc" : "=a" (rax), "=d" (rdx) ); -#endif - return (int64_t)(( rdx << 32 ) + rax); - } -# else -# error "TRACY_HW_TIMER detection logic needs fixing" -# endif -#endif - -#if !defined TRACY_HW_TIMER || defined TRACY_TIMER_FALLBACK -# if defined __linux__ && defined CLOCK_MONOTONIC_RAW - struct timespec ts; - clock_gettime( CLOCK_MONOTONIC_RAW, &ts ); - return int64_t( ts.tv_sec ) * 1000000000ll + int64_t( ts.tv_nsec ); -# else - return std::chrono::duration_cast( std::chrono::high_resolution_clock::now().time_since_epoch() ).count(); -# endif -#endif - -#if !defined TRACY_TIMER_FALLBACK - return 0; // unreachable branch -#endif - } - - tracy_force_inline uint32_t GetNextZoneId() - { - return m_zoneId.fetch_add( 1, std::memory_order_relaxed ); - } - - static tracy_force_inline QueueItem* QueueSerial() - { - auto& p = GetProfiler(); - p.m_serialLock.lock(); - return p.m_serialQueue.prepare_next(); - } - - static tracy_force_inline QueueItem* QueueSerialCallstack( void* ptr ) - { - auto& p = GetProfiler(); - p.m_serialLock.lock(); - p.SendCallstackSerial( ptr ); - return p.m_serialQueue.prepare_next(); - } - - static tracy_force_inline void QueueSerialFinish() - { - auto& p = GetProfiler(); - p.m_serialQueue.commit_next(); - p.m_serialLock.unlock(); - } - - static tracy_force_inline void SendFrameMark( const char* name ) - { - if( !name ) GetProfiler().m_frameCount.fetch_add( 1, std::memory_order_relaxed ); -#ifdef TRACY_ON_DEMAND - if( !GetProfiler().IsConnected() ) return; -#endif - auto item = QueueSerial(); - MemWrite( &item->hdr.type, QueueType::FrameMarkMsg ); - MemWrite( &item->frameMark.time, GetTime() ); - MemWrite( &item->frameMark.name, uint64_t( name ) ); - QueueSerialFinish(); - } - - static tracy_force_inline void SendFrameMark( const char* name, QueueType type ) - { - assert( type == QueueType::FrameMarkMsgStart || type == QueueType::FrameMarkMsgEnd ); -#ifdef TRACY_ON_DEMAND - if( !GetProfiler().IsConnected() ) return; -#endif - auto item = QueueSerial(); - MemWrite( &item->hdr.type, type ); - MemWrite( &item->frameMark.time, GetTime() ); - MemWrite( &item->frameMark.name, uint64_t( name ) ); - QueueSerialFinish(); - } - - static tracy_force_inline void SendFrameImage( const void* image, uint16_t w, uint16_t h, uint8_t offset, bool flip ) - { -#ifndef TRACY_NO_FRAME_IMAGE - auto& profiler = GetProfiler(); - assert( profiler.m_frameCount.load( std::memory_order_relaxed ) < (std::numeric_limits::max)() ); -# ifdef TRACY_ON_DEMAND - if( !profiler.IsConnected() ) return; -# endif - const auto sz = size_t( w ) * size_t( h ) * 4; - auto ptr = (char*)tracy_malloc( sz ); - memcpy( ptr, image, sz ); - - profiler.m_fiLock.lock(); - auto fi = profiler.m_fiQueue.prepare_next(); - fi->image = ptr; - fi->frame = uint32_t( profiler.m_frameCount.load( std::memory_order_relaxed ) - offset ); - fi->w = w; - fi->h = h; - fi->flip = flip; - profiler.m_fiQueue.commit_next(); - profiler.m_fiLock.unlock(); -#else - static_cast(image); // unused - static_cast(w); // unused - static_cast(h); // unused - static_cast(offset); // unused - static_cast(flip); // unused -#endif - } - - static tracy_force_inline void PlotData( const char* name, int64_t val ) - { -#ifdef TRACY_ON_DEMAND - if( !GetProfiler().IsConnected() ) return; -#endif - TracyLfqPrepare( QueueType::PlotDataInt ); - MemWrite( &item->plotDataInt.name, (uint64_t)name ); - MemWrite( &item->plotDataInt.time, GetTime() ); - MemWrite( &item->plotDataInt.val, val ); - TracyLfqCommit; - } - - static tracy_force_inline void PlotData( const char* name, float val ) - { -#ifdef TRACY_ON_DEMAND - if( !GetProfiler().IsConnected() ) return; -#endif - TracyLfqPrepare( QueueType::PlotDataFloat ); - MemWrite( &item->plotDataFloat.name, (uint64_t)name ); - MemWrite( &item->plotDataFloat.time, GetTime() ); - MemWrite( &item->plotDataFloat.val, val ); - TracyLfqCommit; - } - - static tracy_force_inline void PlotData( const char* name, double val ) - { -#ifdef TRACY_ON_DEMAND - if( !GetProfiler().IsConnected() ) return; -#endif - TracyLfqPrepare( QueueType::PlotDataDouble ); - MemWrite( &item->plotDataDouble.name, (uint64_t)name ); - MemWrite( &item->plotDataDouble.time, GetTime() ); - MemWrite( &item->plotDataDouble.val, val ); - TracyLfqCommit; - } - - static tracy_force_inline void ConfigurePlot( const char* name, PlotFormatType type, bool step, bool fill, uint32_t color ) - { - TracyLfqPrepare( QueueType::PlotConfig ); - MemWrite( &item->plotConfig.name, (uint64_t)name ); - MemWrite( &item->plotConfig.type, (uint8_t)type ); - MemWrite( &item->plotConfig.step, (uint8_t)step ); - MemWrite( &item->plotConfig.fill, (uint8_t)fill ); - MemWrite( &item->plotConfig.color, color ); - -#ifdef TRACY_ON_DEMAND - GetProfiler().DeferItem( *item ); -#endif - - TracyLfqCommit; - } - - static tracy_force_inline void Message( const char* txt, size_t size, int32_t callstack_depth ) - { - assert( size < (std::numeric_limits::max)() ); -#ifdef TRACY_ON_DEMAND - if( !GetProfiler().IsConnected() ) return; -#endif - if( callstack_depth != 0 && has_callstack() ) - { - tracy::GetProfiler().SendCallstack( callstack_depth ); - } - - auto ptr = (char*)tracy_malloc( size ); - memcpy( ptr, txt, size ); - - TracyQueuePrepare( callstack_depth == 0 ? QueueType::Message : QueueType::MessageCallstack ); - MemWrite( &item->messageFat.time, GetTime() ); - MemWrite( &item->messageFat.text, (uint64_t)ptr ); - MemWrite( &item->messageFat.size, (uint16_t)size ); - TracyQueueCommit( messageFatThread ); - } - - static tracy_force_inline void Message( const char* txt, int32_t callstack_depth ) - { -#ifdef TRACY_ON_DEMAND - if( !GetProfiler().IsConnected() ) return; -#endif - if( callstack_depth != 0 && has_callstack() ) - { - tracy::GetProfiler().SendCallstack( callstack_depth ); - } - - TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack ); - MemWrite( &item->messageLiteral.time, GetTime() ); - MemWrite( &item->messageLiteral.text, (uint64_t)txt ); - TracyQueueCommit( messageLiteralThread ); - } - - static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int32_t callstack_depth ) - { - assert( size < (std::numeric_limits::max)() ); -#ifdef TRACY_ON_DEMAND - if( !GetProfiler().IsConnected() ) return; -#endif - if( callstack_depth != 0 && has_callstack() ) - { - tracy::GetProfiler().SendCallstack( callstack_depth ); - } - - auto ptr = (char*)tracy_malloc( size ); - memcpy( ptr, txt, size ); - - TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack ); - MemWrite( &item->messageColorFat.time, GetTime() ); - MemWrite( &item->messageColorFat.text, (uint64_t)ptr ); - MemWrite( &item->messageColorFat.b, uint8_t( ( color ) & 0xFF ) ); - MemWrite( &item->messageColorFat.g, uint8_t( ( color >> 8 ) & 0xFF ) ); - MemWrite( &item->messageColorFat.r, uint8_t( ( color >> 16 ) & 0xFF ) ); - MemWrite( &item->messageColorFat.size, (uint16_t)size ); - TracyQueueCommit( messageColorFatThread ); - } - - static tracy_force_inline void MessageColor( const char* txt, uint32_t color, int32_t callstack_depth ) - { -#ifdef TRACY_ON_DEMAND - if( !GetProfiler().IsConnected() ) return; -#endif - if( callstack_depth != 0 && has_callstack() ) - { - tracy::GetProfiler().SendCallstack( callstack_depth ); - } - - TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack ); - MemWrite( &item->messageColorLiteral.time, GetTime() ); - MemWrite( &item->messageColorLiteral.text, (uint64_t)txt ); - MemWrite( &item->messageColorLiteral.b, uint8_t( ( color ) & 0xFF ) ); - MemWrite( &item->messageColorLiteral.g, uint8_t( ( color >> 8 ) & 0xFF ) ); - MemWrite( &item->messageColorLiteral.r, uint8_t( ( color >> 16 ) & 0xFF ) ); - TracyQueueCommit( messageColorLiteralThread ); - } - - static tracy_force_inline void MessageAppInfo( const char* txt, size_t size ) - { - assert( size < (std::numeric_limits::max)() ); - auto ptr = (char*)tracy_malloc( size ); - memcpy( ptr, txt, size ); - TracyLfqPrepare( QueueType::MessageAppInfo ); - MemWrite( &item->messageFat.time, GetTime() ); - MemWrite( &item->messageFat.text, (uint64_t)ptr ); - MemWrite( &item->messageFat.size, (uint16_t)size ); - -#ifdef TRACY_ON_DEMAND - GetProfiler().DeferItem( *item ); -#endif - - TracyLfqCommit; - } - - static tracy_force_inline void MemAlloc( const void* ptr, size_t size, bool secure ) - { - if( secure && !ProfilerAvailable() ) return; -#ifdef TRACY_ON_DEMAND - if( !GetProfiler().IsConnected() ) return; -#endif - const auto thread = GetThreadHandle(); - - GetProfiler().m_serialLock.lock(); - SendMemAlloc( QueueType::MemAlloc, thread, ptr, size ); - GetProfiler().m_serialLock.unlock(); - } - - static tracy_force_inline void MemFree( const void* ptr, bool secure ) - { - if( secure && !ProfilerAvailable() ) return; -#ifdef TRACY_ON_DEMAND - if( !GetProfiler().IsConnected() ) return; -#endif - const auto thread = GetThreadHandle(); - - GetProfiler().m_serialLock.lock(); - SendMemFree( QueueType::MemFree, thread, ptr ); - GetProfiler().m_serialLock.unlock(); - } - - static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int32_t depth, bool secure ) - { - if( secure && !ProfilerAvailable() ) return; - if( depth > 0 && has_callstack() ) - { - auto& profiler = GetProfiler(); -# ifdef TRACY_ON_DEMAND - if( !profiler.IsConnected() ) return; -# endif - const auto thread = GetThreadHandle(); - - auto callstack = Callstack( depth ); - - profiler.m_serialLock.lock(); - SendCallstackSerial( callstack ); - SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size ); - profiler.m_serialLock.unlock(); - } - else - { - MemAlloc( ptr, size, secure ); - } - } - - static tracy_force_inline void MemFreeCallstack( const void* ptr, int32_t depth, bool secure ) - { - if( secure && !ProfilerAvailable() ) return; - if( !ProfilerAllocatorAvailable() ) - { - MemFree( ptr, secure ); - return; - } - if( depth > 0 && has_callstack() ) - { - auto& profiler = GetProfiler(); -# ifdef TRACY_ON_DEMAND - if( !profiler.IsConnected() ) return; -# endif - const auto thread = GetThreadHandle(); - - auto callstack = Callstack( depth ); - - profiler.m_serialLock.lock(); - SendCallstackSerial( callstack ); - SendMemFree( QueueType::MemFreeCallstack, thread, ptr ); - profiler.m_serialLock.unlock(); - } - else - { - MemFree( ptr, secure ); - } - } - - static tracy_force_inline void MemAllocNamed( const void* ptr, size_t size, bool secure, const char* name ) - { - if( secure && !ProfilerAvailable() ) return; -#ifdef TRACY_ON_DEMAND - if( !GetProfiler().IsConnected() ) return; -#endif - const auto thread = GetThreadHandle(); - - GetProfiler().m_serialLock.lock(); - SendMemName( name ); - SendMemAlloc( QueueType::MemAllocNamed, thread, ptr, size ); - GetProfiler().m_serialLock.unlock(); - } - - static tracy_force_inline void MemFreeNamed( const void* ptr, bool secure, const char* name ) - { - if( secure && !ProfilerAvailable() ) return; -#ifdef TRACY_ON_DEMAND - if( !GetProfiler().IsConnected() ) return; -#endif - const auto thread = GetThreadHandle(); - - GetProfiler().m_serialLock.lock(); - SendMemName( name ); - SendMemFree( QueueType::MemFreeNamed, thread, ptr ); - GetProfiler().m_serialLock.unlock(); - } - - static tracy_force_inline void MemAllocCallstackNamed( const void* ptr, size_t size, int32_t depth, bool secure, const char* name ) - { - if( secure && !ProfilerAvailable() ) return; - if( depth > 0 && has_callstack() ) - { - auto& profiler = GetProfiler(); -# ifdef TRACY_ON_DEMAND - if( !profiler.IsConnected() ) return; -# endif - const auto thread = GetThreadHandle(); - - auto callstack = Callstack( depth ); - - profiler.m_serialLock.lock(); - SendCallstackSerial( callstack ); - SendMemName( name ); - SendMemAlloc( QueueType::MemAllocCallstackNamed, thread, ptr, size ); - profiler.m_serialLock.unlock(); - } - else - { - MemAllocNamed( ptr, size, secure, name ); - } - } - - static tracy_force_inline void MemFreeCallstackNamed( const void* ptr, int32_t depth, bool secure, const char* name ) - { - if( secure && !ProfilerAvailable() ) return; - if( depth > 0 && has_callstack() ) - { - auto& profiler = GetProfiler(); -# ifdef TRACY_ON_DEMAND - if( !profiler.IsConnected() ) return; -# endif - const auto thread = GetThreadHandle(); - - auto callstack = Callstack( depth ); - - profiler.m_serialLock.lock(); - SendCallstackSerial( callstack ); - SendMemName( name ); - SendMemFree( QueueType::MemFreeCallstackNamed, thread, ptr ); - profiler.m_serialLock.unlock(); - } - else - { - MemFreeNamed( ptr, secure, name ); - } - } - - static tracy_force_inline void MemDiscard( const char* name, bool secure ) - { - if( secure && !ProfilerAvailable() ) return; -#ifdef TRACY_ON_DEMAND - if( !GetProfiler().IsConnected() ) return; -#endif - const auto thread = GetThreadHandle(); - - GetProfiler().m_serialLock.lock(); - SendMemDiscard( QueueType::MemDiscard, thread, name ); - GetProfiler().m_serialLock.unlock(); - } - - static tracy_force_inline void MemDiscardCallstack( const char* name, bool secure, int32_t depth ) - { - if( secure && !ProfilerAvailable() ) return; - if( depth > 0 && has_callstack() ) - { -# ifdef TRACY_ON_DEMAND - if( !GetProfiler().IsConnected() ) return; -# endif - const auto thread = GetThreadHandle(); - - auto callstack = Callstack( depth ); - - GetProfiler().m_serialLock.lock(); - SendCallstackSerial( callstack ); - SendMemDiscard( QueueType::MemDiscard, thread, name ); - GetProfiler().m_serialLock.unlock(); - } - else - { - MemDiscard( name, secure ); - } - } - - static tracy_force_inline void SendCallstack( int32_t depth ) - { - if( depth > 0 && has_callstack() ) - { - auto ptr = Callstack( depth ); - TracyQueuePrepare( QueueType::Callstack ); - MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); - TracyQueueCommit( callstackFatThread ); - } - } - - static tracy_force_inline void ParameterRegister( ParameterCallback cb, void* data ) - { - auto& profiler = GetProfiler(); - profiler.m_paramCallback = cb; - profiler.m_paramCallbackData = data; - } - - static tracy_force_inline void ParameterSetup( uint32_t idx, const char* name, bool isBool, int32_t val ) - { - TracyLfqPrepare( QueueType::ParamSetup ); - tracy::MemWrite( &item->paramSetup.idx, idx ); - tracy::MemWrite( &item->paramSetup.name, (uint64_t)name ); - tracy::MemWrite( &item->paramSetup.isBool, (uint8_t)isBool ); - tracy::MemWrite( &item->paramSetup.val, val ); - -#ifdef TRACY_ON_DEMAND - GetProfiler().DeferItem( *item ); -#endif - - TracyLfqCommit; - } - - static tracy_force_inline void SourceCallbackRegister( SourceContentsCallback cb, void* data ) - { - auto& profiler = GetProfiler(); - profiler.m_sourceCallback = cb; - profiler.m_sourceCallbackData = data; - } - -#ifdef TRACY_FIBERS - static tracy_force_inline void EnterFiber( const char* fiber, int32_t groupHint ) - { - TracyQueuePrepare( QueueType::FiberEnter ); - MemWrite( &item->fiberEnter.time, GetTime() ); - MemWrite( &item->fiberEnter.fiber, (uint64_t)fiber ); - MemWrite( &item->fiberEnter.groupHint, groupHint ); - TracyQueueCommit( fiberEnter ); - } - - static tracy_force_inline void LeaveFiber() - { - TracyQueuePrepare( QueueType::FiberLeave ); - MemWrite( &item->fiberLeave.time, GetTime() ); - TracyQueueCommit( fiberLeave ); - } -#endif - - void SendCallstack( int32_t depth, const char* skipBefore ); - static void CutCallstack( void* callstack, const char* skipBefore ); - - static bool ShouldExit(); - - tracy_force_inline bool IsConnected() const - { - return m_isConnected.load( std::memory_order_acquire ); - } - - tracy_force_inline void SetProgramName( const char* name ) - { - m_programNameLock.lock(); - m_programName = name; - m_programNameLock.unlock(); - } - -#ifdef TRACY_ON_DEMAND - tracy_force_inline uint64_t ConnectionId() const - { - return m_connectionId.load( std::memory_order_acquire ); - } - - tracy_force_inline void DeferItem( const QueueItem& item ) - { - m_deferredLock.lock(); - auto dst = m_deferredQueue.push_next(); - memcpy( dst, &item, sizeof( item ) ); - m_deferredLock.unlock(); - } -#endif - - void RequestShutdown() { m_shutdown.store( true, std::memory_order_relaxed ); m_shutdownManual.store( true, std::memory_order_relaxed ); } - bool HasShutdownFinished() const { return m_shutdownFinished.load( std::memory_order_relaxed ); } - - void SendString( uint64_t str, const char* ptr, QueueType type ) { SendString( str, ptr, strlen( ptr ), type ); } - void SendString( uint64_t str, const char* ptr, size_t len, QueueType type ); - void SendSingleString( const char* ptr ) { SendSingleString( ptr, strlen( ptr ) ); } - void SendSingleString( const char* ptr, size_t len ); - void SendSecondString( const char* ptr ) { SendSecondString( ptr, strlen( ptr ) ); } - void SendSecondString( const char* ptr, size_t len ); - - - // Allocated source location data layout: - // 2b payload size - // 4b color - // 4b source line - // fsz function name - // 1b null terminator - // ssz source file name - // 1b null terminator - // nsz zone name (optional) - - static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function, uint32_t color = 0 ) - { - return AllocSourceLocation( line, source, function, nullptr, 0, color ); - } - - static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function, const char* name, size_t nameSz, uint32_t color = 0 ) - { - return AllocSourceLocation( line, source, strlen(source), function, strlen(function), name, nameSz, color ); - } - - static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, uint32_t color = 0 ) - { - return AllocSourceLocation( line, source, sourceSz, function, functionSz, nullptr, 0, color ); - } - - static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color = 0 ) - { - const auto sz32 = uint32_t( 2 + 4 + 4 + functionSz + 1 + sourceSz + 1 + nameSz ); - assert( sz32 <= (std::numeric_limits::max)() ); - const auto sz = uint16_t( sz32 ); - auto ptr = (char*)tracy_malloc( sz ); - memcpy( ptr, &sz, 2 ); - memcpy( ptr + 2, &color, 4 ); - memcpy( ptr + 6, &line, 4 ); - memcpy( ptr + 10, function, functionSz ); - ptr[10 + functionSz] = '\0'; - memcpy( ptr + 10 + functionSz + 1, source, sourceSz ); - ptr[10 + functionSz + 1 + sourceSz] = '\0'; - if( nameSz != 0 ) - { - memcpy( ptr + 10 + functionSz + 1 + sourceSz + 1, name, nameSz ); - } - return uint64_t( ptr ); - } - -private: - enum class DequeueStatus { DataDequeued, ConnectionLost, QueueEmpty }; - enum class ThreadCtxStatus { Same, Changed, ConnectionLost }; - - static void LaunchWorker( void* ptr ) { ((Profiler*)ptr)->Worker(); } - void Worker(); - -#ifndef TRACY_NO_FRAME_IMAGE - static void LaunchCompressWorker( void* ptr ) { ((Profiler*)ptr)->CompressWorker(); } - void CompressWorker(); -#endif - -#ifdef TRACY_HAS_CALLSTACK - static void LaunchSymbolWorker( void* ptr ) { ((Profiler*)ptr)->SymbolWorker(); } - void SymbolWorker(); - void HandleSymbolQueueItem( const SymbolQueueItem& si ); -#endif - - void InstallCrashHandler(); - void RemoveCrashHandler(); - - void ClearQueues( tracy::moodycamel::ConsumerToken& token ); - void ClearSerial(); - DequeueStatus Dequeue( tracy::moodycamel::ConsumerToken& token ); - DequeueStatus DequeueContextSwitches( tracy::moodycamel::ConsumerToken& token, int64_t& timeStop ); - DequeueStatus DequeueSerial(); - ThreadCtxStatus ThreadCtxCheck( uint32_t threadId ); - bool CommitData(); - - tracy_force_inline bool AppendData( const void* data, size_t len ) - { - const auto ret = NeedDataSize( len ); - AppendDataUnsafe( data, len ); - return ret; - } - - tracy_force_inline bool NeedDataSize( size_t len ) - { - assert( len <= TargetFrameSize ); - bool ret = true; - if( m_bufferOffset - m_bufferStart + (int)len > TargetFrameSize ) - { - ret = CommitData(); - } - return ret; - } - - tracy_force_inline void AppendDataUnsafe( const void* data, size_t len ) - { - memcpy( m_buffer + m_bufferOffset, data, len ); - m_bufferOffset += int( len ); - } - - char* SafeCopyProlog( const char* p, size_t size ); - void SafeCopyEpilog( char* buf ); - - template // must be void( const char* buf, size_t size ) - bool WithSafeCopy( const char* p, size_t size, Callable&& callable ) - { - if( char* buf = SafeCopyProlog( p, size ) ) - { - callable( buf, size ); - SafeCopyEpilog( buf ); - return true; - } - return false; - } - - bool SendData( const char* data, size_t len ); - void SendLongString( uint64_t ptr, const char* str, size_t len, QueueType type ); - void SendSourceLocation( uint64_t ptr ); - void SendSourceLocationPayload( uint64_t ptr ); - void SendCallstackPayload( uint64_t ptr ); - void SendCallstackPayload64( uint64_t ptr ); - void SendCallstackAlloc( uint64_t ptr ); - - void QueueCallstackFrame( uint64_t ptr ); - void QueueSymbolQuery( uint64_t symbol ); - void QueueExternalName( uint64_t ptr ); - void QueueKernelCode( uint64_t symbol, uint32_t size ); - void QueueSourceCodeQuery( uint32_t id ); - - bool HandleServerQuery(); - void HandleDisconnect(); - void HandleParameter( uint64_t payload ); - void HandleSymbolCodeQuery( uint64_t symbol, uint32_t size ); - void HandleSourceCodeQuery( char* data, char* image, uint32_t id ); - - void AckServerQuery(); - void AckSymbolCodeNotAvailable(); - - void CalibrateTimer(); - void CalibrateDelay(); - void ReportTopology(); - - static tracy_force_inline void SendCallstackSerial( void* ptr ) - { - if( has_callstack() ) - { - auto item = GetProfiler().m_serialQueue.prepare_next(); - MemWrite( &item->hdr.type, QueueType::CallstackSerial ); - MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); - GetProfiler().m_serialQueue.commit_next(); - } - } - - static tracy_force_inline void SendMemAlloc( QueueType type, const uint32_t thread, const void* ptr, size_t size ) - { - assert( type == QueueType::MemAlloc || type == QueueType::MemAllocCallstack || type == QueueType::MemAllocNamed || type == QueueType::MemAllocCallstackNamed ); - - auto item = GetProfiler().m_serialQueue.prepare_next(); - MemWrite( &item->hdr.type, type ); - MemWrite( &item->memAlloc.time, GetTime() ); - MemWrite( &item->memAlloc.thread, thread ); - MemWrite( &item->memAlloc.ptr, (uint64_t)ptr ); - if( compile_time_condition::value ) - { - memcpy( &item->memAlloc.size, &size, 4 ); - memset( &item->memAlloc.size + 4, 0, 2 ); - } - else - { - assert( sizeof( size ) == 8 ); - memcpy( &item->memAlloc.size, &size, 4 ); - memcpy( ((char*)&item->memAlloc.size)+4, ((char*)&size)+4, 2 ); - } - GetProfiler().m_serialQueue.commit_next(); - } - - static tracy_force_inline void SendMemFree( QueueType type, const uint32_t thread, const void* ptr ) - { - assert( type == QueueType::MemFree || type == QueueType::MemFreeCallstack || type == QueueType::MemFreeNamed || type == QueueType::MemFreeCallstackNamed ); - - auto item = GetProfiler().m_serialQueue.prepare_next(); - MemWrite( &item->hdr.type, type ); - MemWrite( &item->memFree.time, GetTime() ); - MemWrite( &item->memFree.thread, thread ); - MemWrite( &item->memFree.ptr, (uint64_t)ptr ); - GetProfiler().m_serialQueue.commit_next(); - } - - static tracy_force_inline void SendMemDiscard( QueueType type, const uint32_t thread, const char* name ) - { - assert( type == QueueType::MemDiscard || type == QueueType::MemDiscardCallstack ); - - auto item = GetProfiler().m_serialQueue.prepare_next(); - MemWrite( &item->hdr.type, type ); - MemWrite( &item->memDiscard.time, GetTime() ); - MemWrite( &item->memDiscard.thread, thread ); - MemWrite( &item->memDiscard.name, (uint64_t)name ); - GetProfiler().m_serialQueue.commit_next(); - } - - static tracy_force_inline void SendMemName( const char* name ) - { - assert( name ); - auto item = GetProfiler().m_serialQueue.prepare_next(); - MemWrite( &item->hdr.type, QueueType::MemNamePayload ); - MemWrite( &item->memName.name, (uint64_t)name ); - GetProfiler().m_serialQueue.commit_next(); - } - -#if defined _WIN32 && defined TRACY_TIMER_QPC - static int64_t GetTimeQpc(); -#endif - - double m_timerMul; - uint64_t m_resolution; - uint64_t m_delay; - std::atomic m_timeBegin; - uint32_t m_mainThread; - uint64_t m_epoch, m_exectime; - std::atomic m_shutdown; - std::atomic m_shutdownManual; - std::atomic m_shutdownFinished; - Socket* m_sock; - UdpBroadcast* m_broadcast; - bool m_noExit; - uint32_t m_userPort; - std::atomic m_zoneId; - int64_t m_samplingPeriod; - - uint32_t m_threadCtx; - int64_t m_refTimeThread; - int64_t m_refTimeSerial; - int64_t m_refTimeCtx; - int64_t m_refTimeGpu; - - void* m_stream; // LZ4_stream_t* - char* m_buffer; - int m_bufferOffset; - int m_bufferStart; - - char* m_lz4Buf; - - FastVector m_serialQueue, m_serialDequeue; - TracyMutex m_serialLock; - -#ifndef TRACY_NO_FRAME_IMAGE - FastVector m_fiQueue, m_fiDequeue; - TracyMutex m_fiLock; -#endif - - SPSCQueue m_symbolQueue; - - std::atomic m_frameCount; - std::atomic m_isConnected; -#ifdef TRACY_ON_DEMAND - std::atomic m_connectionId; - - TracyMutex m_deferredLock; - FastVector m_deferredQueue; -#endif - -#ifdef TRACY_HAS_SYSTIME - void ProcessSysTime(); - - SysTime m_sysTime; - uint64_t m_sysTimeLast = 0; -#else - void ProcessSysTime() {} -#endif - -#ifdef TRACY_HAS_SYSPOWER - SysPower m_sysPower; -#endif - - ParameterCallback m_paramCallback; - void* m_paramCallbackData; - SourceContentsCallback m_sourceCallback; - void* m_sourceCallbackData; - - char* m_queryImage; - char* m_queryData; - char* m_queryDataPtr; - -#ifndef NDEBUG - // m_safeSendBuffer and m_pipe should only be used by the Tracy Profiler thread; this ensures that in debug builds. - std::atomic_bool m_inUse{ false }; -#endif - char* m_safeSendBuffer; - -#if defined _WIN32 - void* m_prevHandler; -#else - int m_pipe[2]; - int m_pipeBufSize; -#endif - -#ifdef __linux__ - struct { - struct sigaction pwr, ill, fpe, segv, pipe, bus, abrt; - } m_prevSignal; - KCore* m_kcore; -#endif - bool m_crashHandlerInstalled; - - const char* m_programName; - TracyMutex m_programNameLock; -}; - -} - -#endif diff --git a/src/third_party/tracy/client/TracyRingBuffer.hpp b/src/third_party/tracy/client/TracyRingBuffer.hpp deleted file mode 100644 index e9100e2d..00000000 --- a/src/third_party/tracy/client/TracyRingBuffer.hpp +++ /dev/null @@ -1,141 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "TracyDebug.hpp" - -namespace tracy -{ - -class RingBuffer -{ -public: - RingBuffer( unsigned int size, int fd, int id, int cpu = -1 ) - : m_size( size ) - , m_id( id ) - , m_cpu( cpu ) - , m_fd( fd ) - { - const auto pageSize = uint32_t( getpagesize() ); - assert( size >= pageSize ); - assert( __builtin_popcount( size ) == 1 ); - m_mapSize = size + pageSize; - auto mapAddr = mmap( nullptr, m_mapSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 ); - if( mapAddr == MAP_FAILED ) - { - TracyDebug( "mmap failed: errno %i (%s)\n", errno, strerror( errno ) ); - m_fd = 0; - m_metadata = nullptr; - close( fd ); - return; - } - m_metadata = (perf_event_mmap_page*)mapAddr; - assert( m_metadata->data_offset == pageSize ); - m_buffer = ((char*)mapAddr) + pageSize; - m_tail = m_metadata->data_tail; - } - - ~RingBuffer() - { - if( m_metadata ) munmap( m_metadata, m_mapSize ); - if( m_fd ) close( m_fd ); - } - - RingBuffer( const RingBuffer& ) = delete; - RingBuffer& operator=( const RingBuffer& ) = delete; - - RingBuffer( RingBuffer&& other ) - { - memcpy( (char*)&other, (char*)this, sizeof( RingBuffer ) ); - m_metadata = nullptr; - m_fd = 0; - } - - RingBuffer& operator=( RingBuffer&& other ) - { - memcpy( (char*)&other, (char*)this, sizeof( RingBuffer ) ); - m_metadata = nullptr; - m_fd = 0; - return *this; - } - - bool IsValid() const { return m_metadata != nullptr; } - int GetId() const { return m_id; } - int GetCpu() const { return m_cpu; } - - void Enable() - { - ioctl( m_fd, PERF_EVENT_IOC_ENABLE, 0 ); - } - - void Read( void* dst, uint64_t offset, uint64_t cnt ) - { - const auto size = m_size; - auto src = ( m_tail + offset ) % size; - if( src + cnt <= size ) - { - memcpy( dst, m_buffer + src, cnt ); - } - else - { - const auto s0 = size - src; - const auto buf = m_buffer; - memcpy( dst, buf + src, s0 ); - memcpy( (char*)dst + s0, buf, cnt - s0 ); - } - } - - void Advance( uint64_t cnt ) - { - m_tail += cnt; - StoreTail(); - } - - bool CheckTscCaps() const - { - return m_metadata->cap_user_time_zero; - } - - int64_t ConvertTimeToTsc( int64_t timestamp ) const - { - if( !m_metadata->cap_user_time_zero ) return 0; - const auto time = timestamp - m_metadata->time_zero; - const auto quot = time / m_metadata->time_mult; - const auto rem = time % m_metadata->time_mult; - return ( quot << m_metadata->time_shift ) + ( rem << m_metadata->time_shift ) / m_metadata->time_mult; - } - - uint64_t LoadHead() const - { - return std::atomic_load_explicit( (const volatile std::atomic*)&m_metadata->data_head, std::memory_order_acquire ); - } - - uint64_t GetTail() const - { - return m_tail; - } - -private: - void StoreTail() - { - std::atomic_store_explicit( (volatile std::atomic*)&m_metadata->data_tail, m_tail, std::memory_order_release ); - } - - unsigned int m_size; - uint64_t m_tail; - char* m_buffer; - int m_id; - int m_cpu; - perf_event_mmap_page* m_metadata; - - size_t m_mapSize; - int m_fd; -}; - -} diff --git a/src/third_party/tracy/client/TracyScoped.hpp b/src/third_party/tracy/client/TracyScoped.hpp deleted file mode 100644 index 7f9256d8..00000000 --- a/src/third_party/tracy/client/TracyScoped.hpp +++ /dev/null @@ -1,201 +0,0 @@ -#ifndef __TRACYSCOPED_HPP__ -#define __TRACYSCOPED_HPP__ - -#include -#include -#include -#include - -#include "../common/TracySystem.hpp" -#include "../common/TracyAlign.hpp" -#include "../common/TracyAlloc.hpp" -#include "TracyProfiler.hpp" -#include "TracyCallstack.hpp" - -namespace tracy -{ - -class ScopedZone -{ -public: - ScopedZone( const ScopedZone& ) = delete; - ScopedZone( ScopedZone&& ) = delete; - ScopedZone& operator=( const ScopedZone& ) = delete; - ScopedZone& operator=( ScopedZone&& ) = delete; - - tracy_force_inline ScopedZone( const SourceLocationData* srcloc, int32_t depth = -1, bool is_active = true ) -#ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) -#else - : m_active( is_active ) -#endif - { - if( !m_active ) return; -#ifdef TRACY_ON_DEMAND - m_connectionId = GetProfiler().ConnectionId(); -#endif - auto zoneQueue = QueueType::ZoneBegin; - if( depth > 0 && has_callstack() ) - { - GetProfiler().SendCallstack( depth ); - zoneQueue = QueueType::ZoneBeginCallstack; - } - TracyQueuePrepare( zoneQueue ); - MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); - MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); - TracyQueueCommit( zoneBeginThread ); - } - - tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color, int32_t depth = -1, bool is_active = true ) -#ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) -#else - : m_active( is_active ) -#endif - { - if( !m_active ) return; -#ifdef TRACY_ON_DEMAND - m_connectionId = GetProfiler().ConnectionId(); -#endif - auto zoneQueue = QueueType::ZoneBeginAllocSrcLoc; - if( depth > 0 && has_callstack() ) - { - GetProfiler().SendCallstack( depth ); - zoneQueue = QueueType::ZoneBeginAllocSrcLocCallstack; - } - TracyQueuePrepare( zoneQueue ); - const auto srcloc = - Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color ); - MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); - MemWrite( &item->zoneBegin.srcloc, srcloc ); - TracyQueueCommit( zoneBeginThread ); - } - - tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active = true ) : ScopedZone( line, source, sourceSz, function, functionSz, name, nameSz, 0, depth, is_active ) {} - - tracy_force_inline ~ScopedZone() - { - if( !m_active ) return; -#ifdef TRACY_ON_DEMAND - if( GetProfiler().ConnectionId() != m_connectionId ) return; -#endif - TracyQueuePrepare( QueueType::ZoneEnd ); - MemWrite( &item->zoneEnd.time, Profiler::GetTime() ); - TracyQueueCommit( zoneEndThread ); - } - - tracy_force_inline void Text( const char* txt, size_t size ) - { - assert( size < (std::numeric_limits::max)() ); - if( !m_active ) return; -#ifdef TRACY_ON_DEMAND - if( GetProfiler().ConnectionId() != m_connectionId ) return; -#endif - auto ptr = (char*)tracy_malloc( size ); - memcpy( ptr, txt, size ); - TracyQueuePrepare( QueueType::ZoneText ); - MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); - MemWrite( &item->zoneTextFat.size, (uint16_t)size ); - TracyQueueCommit( zoneTextFatThread ); - } - - void TextFmt( const char* fmt, ... ) - { - if( !m_active ) return; -#ifdef TRACY_ON_DEMAND - if( GetProfiler().ConnectionId() != m_connectionId ) return; -#endif - va_list args; - va_start( args, fmt ); - auto size = vsnprintf( nullptr, 0, fmt, args ); - va_end( args ); - if( size < 0 ) return; - assert( size < (std::numeric_limits::max)() ); - - char* ptr = (char*)tracy_malloc( size_t( size ) + 1 ); - va_start( args, fmt ); - vsnprintf( ptr, size_t( size ) + 1, fmt, args ); - va_end( args ); - - TracyQueuePrepare( QueueType::ZoneText ); - MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); - MemWrite( &item->zoneTextFat.size, (uint16_t)size ); - TracyQueueCommit( zoneTextFatThread ); - } - - tracy_force_inline void Name( const char* txt, size_t size ) - { - assert( size < (std::numeric_limits::max)() ); - if( !m_active ) return; -#ifdef TRACY_ON_DEMAND - if( GetProfiler().ConnectionId() != m_connectionId ) return; -#endif - auto ptr = (char*)tracy_malloc( size ); - memcpy( ptr, txt, size ); - TracyQueuePrepare( QueueType::ZoneName ); - MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); - MemWrite( &item->zoneTextFat.size, (uint16_t)size ); - TracyQueueCommit( zoneTextFatThread ); - } - - void NameFmt( const char* fmt, ... ) - { - if( !m_active ) return; -#ifdef TRACY_ON_DEMAND - if( GetProfiler().ConnectionId() != m_connectionId ) return; -#endif - va_list args; - va_start( args, fmt ); - auto size = vsnprintf( nullptr, 0, fmt, args ); - va_end( args ); - if( size < 0 ) return; - assert( size < (std::numeric_limits::max)() ); - - char* ptr = (char*)tracy_malloc( size_t( size ) + 1 ); - va_start( args, fmt ); - vsnprintf( ptr, size_t( size ) + 1, fmt, args ); - va_end( args ); - - TracyQueuePrepare( QueueType::ZoneName ); - MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); - MemWrite( &item->zoneTextFat.size, (uint16_t)size ); - TracyQueueCommit( zoneTextFatThread ); - } - - tracy_force_inline void Color( uint32_t color ) - { - if( !m_active ) return; -#ifdef TRACY_ON_DEMAND - if( GetProfiler().ConnectionId() != m_connectionId ) return; -#endif - TracyQueuePrepare( QueueType::ZoneColor ); - MemWrite( &item->zoneColor.b, uint8_t( ( color ) & 0xFF ) ); - MemWrite( &item->zoneColor.g, uint8_t( ( color >> 8 ) & 0xFF ) ); - MemWrite( &item->zoneColor.r, uint8_t( ( color >> 16 ) & 0xFF ) ); - TracyQueueCommit( zoneColorThread ); - } - - tracy_force_inline void Value( uint64_t value ) - { - if( !m_active ) return; -#ifdef TRACY_ON_DEMAND - if( GetProfiler().ConnectionId() != m_connectionId ) return; -#endif - TracyQueuePrepare( QueueType::ZoneValue ); - MemWrite( &item->zoneValue.value, value ); - TracyQueueCommit( zoneValueThread ); - } - - tracy_force_inline bool IsActive() const { return m_active; } - -private: - const bool m_active; - -#ifdef TRACY_ON_DEMAND - uint64_t m_connectionId = 0; -#endif -}; - -} - -#endif diff --git a/src/third_party/tracy/client/TracyStringHelpers.hpp b/src/third_party/tracy/client/TracyStringHelpers.hpp deleted file mode 100644 index 977be6a3..00000000 --- a/src/third_party/tracy/client/TracyStringHelpers.hpp +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef __TRACYSTRINGHELPERS_HPP__ -#define __TRACYSTRINGHELPERS_HPP__ - -#include -#include - -#include "../common/TracyAlloc.hpp" -#include "../common/TracyForceInline.hpp" - -namespace tracy -{ - -static tracy_force_inline char* CopyString( const char* src, size_t sz ) -{ - auto dst = (char*)tracy_malloc( sz + 1 ); - memcpy( dst, src, sz ); - dst[sz] = '\0'; - return dst; -} - -static tracy_force_inline char* CopyString( const char* src ) -{ - return CopyString( src, strlen( src ) ); -} - -static tracy_force_inline char* CopyStringFast( const char* src, size_t sz ) -{ - auto dst = (char*)tracy_malloc_fast( sz + 1 ); - memcpy( dst, src, sz ); - dst[sz] = '\0'; - return dst; -} - -static tracy_force_inline char* CopyStringFast( const char* src ) -{ - return CopyStringFast( src, strlen( src ) ); -} - -} - -#endif diff --git a/src/third_party/tracy/client/TracySysPower.cpp b/src/third_party/tracy/client/TracySysPower.cpp deleted file mode 100644 index 6ad1d647..00000000 --- a/src/third_party/tracy/client/TracySysPower.cpp +++ /dev/null @@ -1,164 +0,0 @@ -#include "TracySysPower.hpp" - -#ifdef TRACY_HAS_SYSPOWER - -#include -#include -#include -#include -#include -#include - -#include "TracyDebug.hpp" -#include "TracyProfiler.hpp" -#include "../common/TracyAlloc.hpp" - -namespace tracy -{ - -SysPower::SysPower() - : m_domains( 4 ) - , m_lastTime( 0 ) -{ - ScanDirectory( "/sys/devices/virtual/powercap/intel-rapl", -1 ); -} - -SysPower::~SysPower() -{ - for( auto& v : m_domains ) - { - fclose( v.handle ); - // Do not release v.name, as it may be still needed - } -} - -void SysPower::Tick() -{ - auto t = std::chrono::high_resolution_clock::now().time_since_epoch().count(); - if( t - m_lastTime > 10000000 ) // 10 ms - { - m_lastTime = t; - for( auto& v : m_domains ) - { - char tmp[32]; - if( fread( tmp, 1, 32, v.handle ) > 0 ) - { - rewind( v.handle ); - auto p = (uint64_t)atoll( tmp ); - uint64_t delta; - if( p >= v.value ) - { - delta = p - v.value; - } - else - { - delta = v.overflow - v.value + p; - } - v.value = p; - - TracyLfqPrepare( QueueType::SysPowerReport ); - MemWrite( &item->sysPower.time, Profiler::GetTime() ); - MemWrite( &item->sysPower.delta, delta ); - MemWrite( &item->sysPower.name, (uint64_t)v.name ); - TracyLfqCommit; - } - } - } -} - -void SysPower::ScanDirectory( const char* path, int parent ) -{ - DIR* dir = opendir( path ); - if( !dir ) return; - struct dirent* ent; - uint64_t maxRange = 0; - char* name = nullptr; - FILE* handle = nullptr; - while( ( ent = readdir( dir ) ) ) - { - if( ent->d_type == DT_REG ) - { - if( strcmp( ent->d_name, "max_energy_range_uj" ) == 0 ) - { - char tmp[PATH_MAX]; - snprintf( tmp, PATH_MAX, "%s/max_energy_range_uj", path ); - FILE* f = fopen( tmp, "r" ); - if( f ) - { - (void)fscanf( f, "%" PRIu64, &maxRange ); - fclose( f ); - } - } - else if( strcmp( ent->d_name, "name" ) == 0 ) - { - char tmp[PATH_MAX]; - snprintf( tmp, PATH_MAX, "%s/name", path ); - FILE* f = fopen( tmp, "r" ); - if( f ) - { - char ntmp[128]; - if( fgets( ntmp, 128, f ) ) - { - // Last character is newline, skip it - const auto sz = strlen( ntmp ) - 1; - if( parent < 0 ) - { - name = (char*)tracy_malloc( sz + 1 ); - memcpy( name, ntmp, sz ); - name[sz] = '\0'; - } - else - { - const auto p = m_domains[parent]; - const auto psz = strlen( p.name ); - name = (char*)tracy_malloc( psz + sz + 2 ); - memcpy( name, p.name, psz ); - name[psz] = ':'; - memcpy( name+psz+1, ntmp, sz ); - name[psz+sz+1] = '\0'; - } - } - fclose( f ); - } - } - else if( strcmp( ent->d_name, "energy_uj" ) == 0 ) - { - char tmp[PATH_MAX]; - snprintf( tmp, PATH_MAX, "%s/energy_uj", path ); - handle = fopen( tmp, "r" ); - } - } - if( name && handle && maxRange > 0 ) break; - } - if( name && handle && maxRange > 0 ) - { - parent = (int)m_domains.size(); - Domain* domain = m_domains.push_next(); - domain->value = 0; - domain->overflow = maxRange; - domain->handle = handle; - domain->name = name; - TracyDebug( "Power domain id %i, %s found at %s\n", parent, name, path ); - } - else - { - if( name ) tracy_free( name ); - if( handle ) fclose( handle ); - } - - rewinddir( dir ); - while( ( ent = readdir( dir ) ) ) - { - if( ent->d_type == DT_DIR && strncmp( ent->d_name, "intel-rapl:", 11 ) == 0 ) - { - char tmp[PATH_MAX]; - snprintf( tmp, PATH_MAX, "%s/%s", path, ent->d_name ); - ScanDirectory( tmp, parent ); - } - } - closedir( dir ); -} - -} - -#endif diff --git a/src/third_party/tracy/client/TracySysPower.hpp b/src/third_party/tracy/client/TracySysPower.hpp deleted file mode 100644 index 210123bc..00000000 --- a/src/third_party/tracy/client/TracySysPower.hpp +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef __TRACYSYSPOWER_HPP__ -#define __TRACYSYSPOWER_HPP__ - -#if defined __linux__ -# define TRACY_HAS_SYSPOWER -#endif - -#ifdef TRACY_HAS_SYSPOWER - -#include -#include - -#include "TracyFastVector.hpp" - -namespace tracy -{ - -class SysPower -{ - struct Domain - { - uint64_t value; - uint64_t overflow; - FILE* handle; - const char* name; - }; - -public: - SysPower(); - ~SysPower(); - - void Tick(); - -private: - void ScanDirectory( const char* path, int parent ); - - FastVector m_domains; - uint64_t m_lastTime; -}; - -} -#endif - -#endif diff --git a/src/third_party/tracy/client/TracySysTime.cpp b/src/third_party/tracy/client/TracySysTime.cpp deleted file mode 100644 index b690a911..00000000 --- a/src/third_party/tracy/client/TracySysTime.cpp +++ /dev/null @@ -1,108 +0,0 @@ -#include "TracySysTime.hpp" - -#ifdef TRACY_HAS_SYSTIME - -# if defined _WIN32 -# include -# elif defined __linux__ -# include -# include -# elif defined __APPLE__ -# include -# include -# elif defined BSD -# include -# include -# endif - -namespace tracy -{ - -# if defined _WIN32 - -static inline uint64_t ConvertTime( const FILETIME& t ) -{ - return ( uint64_t( t.dwHighDateTime ) << 32 ) | uint64_t( t.dwLowDateTime ); -} - -void SysTime::ReadTimes() -{ - FILETIME idleTime; - FILETIME kernelTime; - FILETIME userTime; - - GetSystemTimes( &idleTime, &kernelTime, &userTime ); - - idle = ConvertTime( idleTime ); - const auto kernel = ConvertTime( kernelTime ); - const auto user = ConvertTime( userTime ); - used = kernel + user; -} - -# elif defined __linux__ - -void SysTime::ReadTimes() -{ - uint64_t user, nice, system; - FILE* f = fopen( "/proc/stat", "r" ); - if( f ) - { - int read = fscanf( f, "cpu %" PRIu64 " %" PRIu64 " %" PRIu64" %" PRIu64, &user, &nice, &system, &idle ); - fclose( f ); - if (read == 4) - { - used = user + nice + system; - } - } -} - -# elif defined __APPLE__ - -void SysTime::ReadTimes() -{ - host_cpu_load_info_data_t info; - mach_msg_type_number_t cnt = HOST_CPU_LOAD_INFO_COUNT; - host_statistics( mach_host_self(), HOST_CPU_LOAD_INFO, reinterpret_cast( &info ), &cnt ); - used = info.cpu_ticks[CPU_STATE_USER] + info.cpu_ticks[CPU_STATE_NICE] + info.cpu_ticks[CPU_STATE_SYSTEM]; - idle = info.cpu_ticks[CPU_STATE_IDLE]; -} - -# elif defined BSD - -void SysTime::ReadTimes() -{ - u_long data[5]; - size_t sz = sizeof( data ); - sysctlbyname( "kern.cp_time", &data, &sz, nullptr, 0 ); - used = data[0] + data[1] + data[2] + data[3]; - idle = data[4]; -} - -#endif - -SysTime::SysTime() -{ - ReadTimes(); -} - -float SysTime::Get() -{ - const auto oldUsed = used; - const auto oldIdle = idle; - - ReadTimes(); - - const auto diffIdle = idle - oldIdle; - const auto diffUsed = used - oldUsed; - -#if defined _WIN32 - return diffUsed == 0 ? -1 : ( diffUsed - diffIdle ) * 100.f / diffUsed; -#elif defined __linux__ || defined __APPLE__ || defined BSD - const auto total = diffUsed + diffIdle; - return total == 0 ? -1 : diffUsed * 100.f / total; -#endif -} - -} - -#endif diff --git a/src/third_party/tracy/client/TracySysTime.hpp b/src/third_party/tracy/client/TracySysTime.hpp deleted file mode 100644 index cb5ebe73..00000000 --- a/src/third_party/tracy/client/TracySysTime.hpp +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef __TRACYSYSTIME_HPP__ -#define __TRACYSYSTIME_HPP__ - -#if defined _WIN32 || defined __linux__ || defined __APPLE__ -# define TRACY_HAS_SYSTIME -#else -# include -#endif - -#ifdef BSD -# define TRACY_HAS_SYSTIME -#endif - -#ifdef TRACY_HAS_SYSTIME - -#include - -namespace tracy -{ - -class SysTime -{ -public: - SysTime(); - float Get(); - - void ReadTimes(); - -private: - uint64_t idle, used; -}; - -} -#endif - -#endif diff --git a/src/third_party/tracy/client/TracySysTrace.cpp b/src/third_party/tracy/client/TracySysTrace.cpp deleted file mode 100644 index 5827a992..00000000 --- a/src/third_party/tracy/client/TracySysTrace.cpp +++ /dev/null @@ -1,1611 +0,0 @@ -#include "TracyDebug.hpp" -#include "TracyStringHelpers.hpp" -#include "TracySysTrace.hpp" -#include "../common/TracySystem.hpp" - -#ifdef TRACY_HAS_SYSTEM_TRACING - -#ifndef TRACY_SAMPLING_HZ -# if defined _WIN32 -# define TRACY_SAMPLING_HZ 8000 -# elif defined __linux__ -# define TRACY_SAMPLING_HZ 10000 -# endif -#endif - -namespace tracy -{ - -static int GetSamplingFrequency() -{ - int samplingHz = TRACY_SAMPLING_HZ; - - auto env = GetEnvVar( "TRACY_SAMPLING_HZ" ); - if( env ) - { - int val = atoi( env ); - if( val > 0 ) samplingHz = val; - } - -#if defined _WIN32 - return samplingHz > 8000 ? 8000 : ( samplingHz < 1 ? 1 : samplingHz ); -#else - return samplingHz > 1000000 ? 1000000 : ( samplingHz < 1 ? 1 : samplingHz ); -#endif -} - -static int GetSamplingPeriod() -{ - return 1000000000 / GetSamplingFrequency(); -} - -} - -# if defined _WIN32 - -# ifndef NOMINMAX -# define NOMINMAX -# endif - -# define INITGUID -# include -# include -# include -# include -# include -# include -# include -# include - -# include "../common/TracyAlloc.hpp" -# include "../common/TracySystem.hpp" -# include "TracyProfiler.hpp" -# include "TracyThread.hpp" - -namespace tracy -{ - -static const GUID PerfInfoGuid = { 0xce1dbfb4, 0x137e, 0x4da6, { 0x87, 0xb0, 0x3f, 0x59, 0xaa, 0x10, 0x2c, 0xbc } }; -static const GUID DxgKrnlGuid = { 0x802ec45a, 0x1e99, 0x4b83, { 0x99, 0x20, 0x87, 0xc9, 0x82, 0x77, 0xba, 0x9d } }; -static const GUID ThreadV2Guid = { 0x3d6fa8d1, 0xfe05, 0x11d0, { 0x9d, 0xda, 0x00, 0xc0, 0x4f, 0xd7, 0xba, 0x7c } }; - - -static TRACEHANDLE s_traceHandle; -static TRACEHANDLE s_traceHandle2; -static EVENT_TRACE_PROPERTIES* s_prop; -static DWORD s_pid; - -static EVENT_TRACE_PROPERTIES* s_propVsync; -static TRACEHANDLE s_traceHandleVsync; -static TRACEHANDLE s_traceHandleVsync2; -Thread* s_threadVsync = nullptr; - -struct CSwitch -{ - uint32_t newThreadId; - uint32_t oldThreadId; - int8_t newThreadPriority; - int8_t oldThreadPriority; - uint8_t previousCState; - int8_t spareByte; - int8_t oldThreadWaitReason; - int8_t oldThreadWaitMode; - int8_t oldThreadState; - int8_t oldThreadWaitIdealProcessor; - uint32_t newThreadWaitTime; - uint32_t reserved; -}; - -struct ReadyThread -{ - uint32_t threadId; - int8_t adjustReason; - int8_t adjustIncrement; - int8_t flag; - int8_t reserverd; -}; - -struct ThreadTrace -{ - uint32_t processId; - uint32_t threadId; - uint32_t stackBase; - uint32_t stackLimit; - uint32_t userStackBase; - uint32_t userStackLimit; - uint32_t startAddr; - uint32_t win32StartAddr; - uint32_t tebBase; - uint32_t subProcessTag; -}; - -struct StackWalkEvent -{ - uint64_t eventTimeStamp; - uint32_t stackProcess; - uint32_t stackThread; - uint64_t stack[192]; -}; - -struct VSyncInfo -{ - void* dxgAdapter; - uint32_t vidPnTargetId; - uint64_t scannedPhysicalAddress; - uint32_t vidPnSourceId; - uint32_t frameNumber; - int64_t frameQpcTime; - void* hFlipDevice; - uint32_t flipType; - uint64_t flipFenceId; -}; - -extern "C" typedef NTSTATUS (WINAPI *t_NtQueryInformationThread)( HANDLE, THREADINFOCLASS, PVOID, ULONG, PULONG ); -extern "C" typedef BOOL (WINAPI *t_EnumProcessModules)( HANDLE, HMODULE*, DWORD, LPDWORD ); -extern "C" typedef BOOL (WINAPI *t_GetModuleInformation)( HANDLE, HMODULE, LPMODULEINFO, DWORD ); -extern "C" typedef DWORD (WINAPI *t_GetModuleBaseNameA)( HANDLE, HMODULE, LPSTR, DWORD ); -extern "C" typedef HRESULT (WINAPI *t_GetThreadDescription)( HANDLE, PWSTR* ); - -t_NtQueryInformationThread NtQueryInformationThread = (t_NtQueryInformationThread)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "NtQueryInformationThread" ); -t_EnumProcessModules _EnumProcessModules = (t_EnumProcessModules)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "K32EnumProcessModules" ); -t_GetModuleInformation _GetModuleInformation = (t_GetModuleInformation)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "K32GetModuleInformation" ); -t_GetModuleBaseNameA _GetModuleBaseNameA = (t_GetModuleBaseNameA)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "K32GetModuleBaseNameA" ); - -static t_GetThreadDescription _GetThreadDescription = 0; - - -void WINAPI EventRecordCallback( PEVENT_RECORD record ) -{ -#ifdef TRACY_ON_DEMAND - if( !GetProfiler().IsConnected() ) return; -#endif - - const auto& hdr = record->EventHeader; - switch( hdr.ProviderId.Data1 ) - { - case 0x3d6fa8d1: // Thread Guid - if( hdr.EventDescriptor.Opcode == 36 ) - { - const auto cswitch = (const CSwitch*)record->UserData; - - TracyLfqPrepare( QueueType::ContextSwitch ); - MemWrite( &item->contextSwitch.time, hdr.TimeStamp.QuadPart ); - MemWrite( &item->contextSwitch.oldThread, cswitch->oldThreadId ); - MemWrite( &item->contextSwitch.newThread, cswitch->newThreadId ); - MemWrite( &item->contextSwitch.cpu, record->BufferContext.ProcessorNumber ); - MemWrite( &item->contextSwitch.reason, cswitch->oldThreadWaitReason ); - MemWrite( &item->contextSwitch.state, cswitch->oldThreadState ); - TracyLfqCommit; - } - else if( hdr.EventDescriptor.Opcode == 50 ) - { - const auto rt = (const ReadyThread*)record->UserData; - - TracyLfqPrepare( QueueType::ThreadWakeup ); - MemWrite( &item->threadWakeup.time, hdr.TimeStamp.QuadPart ); - MemWrite( &item->threadWakeup.thread, rt->threadId ); - TracyLfqCommit; - } - else if( hdr.EventDescriptor.Opcode == 1 || hdr.EventDescriptor.Opcode == 3 ) - { - const auto tt = (const ThreadTrace*)record->UserData; - - uint64_t tid = tt->threadId; - if( tid == 0 ) return; - uint64_t pid = tt->processId; - TracyLfqPrepare( QueueType::TidToPid ); - MemWrite( &item->tidToPid.tid, tid ); - MemWrite( &item->tidToPid.pid, pid ); - TracyLfqCommit; - } - break; - case 0xdef2fe46: // StackWalk Guid - if( hdr.EventDescriptor.Opcode == 32 ) - { - const auto sw = (const StackWalkEvent*)record->UserData; - if( sw->stackProcess == s_pid ) - { - const uint64_t sz = ( record->UserDataLength - 16 ) / 8; - if( sz > 0 ) - { - auto trace = (uint64_t*)tracy_malloc( ( 1 + sz ) * sizeof( uint64_t ) ); - memcpy( trace, &sz, sizeof( uint64_t ) ); - memcpy( trace+1, sw->stack, sizeof( uint64_t ) * sz ); - TracyLfqPrepare( QueueType::CallstackSample ); - MemWrite( &item->callstackSampleFat.time, sw->eventTimeStamp ); - MemWrite( &item->callstackSampleFat.thread, sw->stackThread ); - MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace ); - TracyLfqCommit; - } - } - } - break; - default: - break; - } -} - -void WINAPI EventRecordCallbackVsync( PEVENT_RECORD record ) -{ -#ifdef TRACY_ON_DEMAND - if( !GetProfiler().IsConnected() ) return; -#endif - - const auto& hdr = record->EventHeader; - assert( hdr.ProviderId.Data1 == 0x802EC45A ); - assert( hdr.EventDescriptor.Id == 0x0011 ); - - const auto vs = (const VSyncInfo*)record->UserData; - - TracyLfqPrepare( QueueType::FrameVsync ); - MemWrite( &item->frameVsync.time, hdr.TimeStamp.QuadPart ); - MemWrite( &item->frameVsync.id, vs->vidPnTargetId ); - TracyLfqCommit; -} - -static void SetupVsync() -{ -#if _WIN32_WINNT >= _WIN32_WINNT_WINBLUE && !defined(__MINGW32__) - const auto psz = sizeof( EVENT_TRACE_PROPERTIES ) + MAX_PATH; - s_propVsync = (EVENT_TRACE_PROPERTIES*)tracy_malloc( psz ); - memset( s_propVsync, 0, sizeof( EVENT_TRACE_PROPERTIES ) ); - s_propVsync->LogFileMode = EVENT_TRACE_REAL_TIME_MODE; - s_propVsync->Wnode.BufferSize = psz; -#ifdef TRACY_TIMER_QPC - s_propVsync->Wnode.ClientContext = 1; -#else - s_propVsync->Wnode.ClientContext = 3; -#endif - s_propVsync->LoggerNameOffset = sizeof( EVENT_TRACE_PROPERTIES ); - strcpy( ((char*)s_propVsync) + sizeof( EVENT_TRACE_PROPERTIES ), "TracyVsync" ); - - auto backup = tracy_malloc( psz ); - memcpy( backup, s_propVsync, psz ); - - const auto controlStatus = ControlTraceA( 0, "TracyVsync", s_propVsync, EVENT_TRACE_CONTROL_STOP ); - if( controlStatus != ERROR_SUCCESS && controlStatus != ERROR_WMI_INSTANCE_NOT_FOUND ) - { - tracy_free( backup ); - tracy_free( s_propVsync ); - return; - } - - memcpy( s_propVsync, backup, psz ); - tracy_free( backup ); - - const auto startStatus = StartTraceA( &s_traceHandleVsync, "TracyVsync", s_propVsync ); - if( startStatus != ERROR_SUCCESS ) - { - tracy_free( s_propVsync ); - return; - } - - EVENT_FILTER_EVENT_ID fe = {}; - fe.FilterIn = TRUE; - fe.Count = 1; - fe.Events[0] = 0x0011; // VSyncDPC_Info - - EVENT_FILTER_DESCRIPTOR desc = {}; - desc.Ptr = (ULONGLONG)&fe; - desc.Size = sizeof( fe ); - desc.Type = EVENT_FILTER_TYPE_EVENT_ID; - - ENABLE_TRACE_PARAMETERS params = {}; - params.Version = ENABLE_TRACE_PARAMETERS_VERSION_2; - params.EnableProperty = EVENT_ENABLE_PROPERTY_IGNORE_KEYWORD_0; - params.SourceId = s_propVsync->Wnode.Guid; - params.EnableFilterDesc = &desc; - params.FilterDescCount = 1; - - uint64_t mask = 0x4000000000000001; // Microsoft_Windows_DxgKrnl_Performance | Base - if( EnableTraceEx2( s_traceHandleVsync, &DxgKrnlGuid, EVENT_CONTROL_CODE_ENABLE_PROVIDER, TRACE_LEVEL_INFORMATION, mask, mask, 0, ¶ms ) != ERROR_SUCCESS ) - { - tracy_free( s_propVsync ); - return; - } - - char loggerName[MAX_PATH]; - strcpy( loggerName, "TracyVsync" ); - - EVENT_TRACE_LOGFILEA log = {}; - log.LoggerName = loggerName; - log.ProcessTraceMode = PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_EVENT_RECORD | PROCESS_TRACE_MODE_RAW_TIMESTAMP; - log.EventRecordCallback = EventRecordCallbackVsync; - - s_traceHandleVsync2 = OpenTraceA( &log ); - if( s_traceHandleVsync2 == (TRACEHANDLE)INVALID_HANDLE_VALUE ) - { - CloseTrace( s_traceHandleVsync ); - tracy_free( s_propVsync ); - return; - } - - s_threadVsync = (Thread*)tracy_malloc( sizeof( Thread ) ); - new(s_threadVsync) Thread( [] (void*) { - ThreadExitHandler threadExitHandler; - SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL ); - SetThreadName( "Tracy Vsync" ); - ProcessTrace( &s_traceHandleVsync2, 1, nullptr, nullptr ); - }, nullptr ); -#endif -} - -static int GetSamplingInterval() -{ - return GetSamplingPeriod() / 100; -} - -bool SysTraceStart( int64_t& samplingPeriod ) -{ - if( !_GetThreadDescription ) _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" ); - - s_pid = GetCurrentProcessId(); - -#if defined _WIN64 - constexpr bool isOs64Bit = true; -#else - BOOL _iswow64; - IsWow64Process( GetCurrentProcess(), &_iswow64 ); - const bool isOs64Bit = _iswow64; -#endif - - TOKEN_PRIVILEGES priv = {}; - priv.PrivilegeCount = 1; - priv.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; - if( LookupPrivilegeValue( nullptr, SE_SYSTEM_PROFILE_NAME, &priv.Privileges[0].Luid ) == 0 ) return false; - - HANDLE pt; - if( OpenProcessToken( GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES, &pt ) == 0 ) return false; - const auto adjust = AdjustTokenPrivileges( pt, FALSE, &priv, 0, nullptr, nullptr ); - CloseHandle( pt ); - if( adjust == 0 ) return false; - const auto status = GetLastError(); - if( status != ERROR_SUCCESS ) return false; - - if( isOs64Bit ) - { - TRACE_PROFILE_INTERVAL interval = {}; - interval.Interval = GetSamplingInterval(); - const auto intervalStatus = TraceSetInformation( 0, TraceSampledProfileIntervalInfo, &interval, sizeof( interval ) ); - if( intervalStatus != ERROR_SUCCESS ) return false; - samplingPeriod = GetSamplingPeriod(); - } - - const auto psz = sizeof( EVENT_TRACE_PROPERTIES ) + sizeof( KERNEL_LOGGER_NAME ); - s_prop = (EVENT_TRACE_PROPERTIES*)tracy_malloc( psz ); - memset( s_prop, 0, sizeof( EVENT_TRACE_PROPERTIES ) ); - ULONG flags = 0; -#ifndef TRACY_NO_CONTEXT_SWITCH - flags = EVENT_TRACE_FLAG_CSWITCH | EVENT_TRACE_FLAG_DISPATCHER | EVENT_TRACE_FLAG_THREAD; -#endif -#ifndef TRACY_NO_SAMPLING - if( isOs64Bit ) flags |= EVENT_TRACE_FLAG_PROFILE; -#endif - s_prop->EnableFlags = flags; - s_prop->LogFileMode = EVENT_TRACE_REAL_TIME_MODE; - s_prop->Wnode.BufferSize = psz; - s_prop->Wnode.Flags = WNODE_FLAG_TRACED_GUID; -#ifdef TRACY_TIMER_QPC - s_prop->Wnode.ClientContext = 1; -#else - s_prop->Wnode.ClientContext = 3; -#endif - s_prop->Wnode.Guid = SystemTraceControlGuid; - s_prop->BufferSize = 1024; - s_prop->MinimumBuffers = std::thread::hardware_concurrency() * 4; - s_prop->MaximumBuffers = std::thread::hardware_concurrency() * 6; - s_prop->LoggerNameOffset = sizeof( EVENT_TRACE_PROPERTIES ); - memcpy( ((char*)s_prop) + sizeof( EVENT_TRACE_PROPERTIES ), KERNEL_LOGGER_NAME, sizeof( KERNEL_LOGGER_NAME ) ); - - auto backup = tracy_malloc( psz ); - memcpy( backup, s_prop, psz ); - - const auto controlStatus = ControlTrace( 0, KERNEL_LOGGER_NAME, s_prop, EVENT_TRACE_CONTROL_STOP ); - if( controlStatus != ERROR_SUCCESS && controlStatus != ERROR_WMI_INSTANCE_NOT_FOUND ) - { - tracy_free( backup ); - tracy_free( s_prop ); - return false; - } - - memcpy( s_prop, backup, psz ); - tracy_free( backup ); - - const auto startStatus = StartTrace( &s_traceHandle, KERNEL_LOGGER_NAME, s_prop ); - if( startStatus != ERROR_SUCCESS ) - { - tracy_free( s_prop ); - return false; - } - -#ifndef TRACY_NO_SAMPLING - if( isOs64Bit ) - { - CLASSIC_EVENT_ID stackId[2] = {}; - stackId[0].EventGuid = PerfInfoGuid; - stackId[0].Type = 46; - stackId[1].EventGuid = ThreadV2Guid; - stackId[1].Type = 36; - const auto stackStatus = TraceSetInformation( s_traceHandle, TraceStackTracingInfo, &stackId, sizeof( stackId ) ); - if( stackStatus != ERROR_SUCCESS ) - { - tracy_free( s_prop ); - return false; - } - } -#endif - -#ifdef UNICODE - WCHAR KernelLoggerName[sizeof( KERNEL_LOGGER_NAME )]; -#else - char KernelLoggerName[sizeof( KERNEL_LOGGER_NAME )]; -#endif - memcpy( KernelLoggerName, KERNEL_LOGGER_NAME, sizeof( KERNEL_LOGGER_NAME ) ); - EVENT_TRACE_LOGFILE log = {}; - log.LoggerName = KernelLoggerName; - log.ProcessTraceMode = PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_EVENT_RECORD | PROCESS_TRACE_MODE_RAW_TIMESTAMP; - log.EventRecordCallback = EventRecordCallback; - - s_traceHandle2 = OpenTrace( &log ); - if( s_traceHandle2 == (TRACEHANDLE)INVALID_HANDLE_VALUE ) - { - CloseTrace( s_traceHandle ); - tracy_free( s_prop ); - return false; - } - -#ifndef TRACY_NO_VSYNC_CAPTURE - SetupVsync(); -#endif - - return true; -} - -void SysTraceStop() -{ - if( s_threadVsync ) - { - CloseTrace( s_traceHandleVsync2 ); - CloseTrace( s_traceHandleVsync ); - s_threadVsync->~Thread(); - tracy_free( s_threadVsync ); - } - - CloseTrace( s_traceHandle2 ); - CloseTrace( s_traceHandle ); -} - -void SysTraceWorker( void* ptr ) -{ - ThreadExitHandler threadExitHandler; - SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL ); - SetThreadName( "Tracy SysTrace" ); - ProcessTrace( &s_traceHandle2, 1, 0, 0 ); - ControlTrace( 0, KERNEL_LOGGER_NAME, s_prop, EVENT_TRACE_CONTROL_STOP ); - tracy_free( s_prop ); -} - -void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const char*& name ) -{ - bool threadSent = false; - auto hnd = OpenThread( THREAD_QUERY_INFORMATION, FALSE, DWORD( thread ) ); - if( hnd == 0 ) - { - hnd = OpenThread( THREAD_QUERY_LIMITED_INFORMATION, FALSE, DWORD( thread ) ); - } - if( hnd != 0 ) - { - if( _GetThreadDescription ) - { - PWSTR tmp; - if ( SUCCEEDED( _GetThreadDescription( hnd, &tmp ) ) ) - { - char buf[256]; - auto ret = wcstombs( buf, tmp, 256 ); - LocalFree(tmp); - if( ret != 0 ) - { - threadName = CopyString( buf, ret ); - threadSent = true; - } - } - } - const auto pid = GetProcessIdOfThread( hnd ); - if( !threadSent && NtQueryInformationThread && _EnumProcessModules && _GetModuleInformation && _GetModuleBaseNameA ) - { - void* ptr; - ULONG retlen; - auto status = NtQueryInformationThread( hnd, (THREADINFOCLASS)9 /*ThreadQuerySetWin32StartAddress*/, &ptr, sizeof( &ptr ), &retlen ); - if( status == 0 ) - { - const auto phnd = OpenProcess( PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, pid ); - if( phnd != INVALID_HANDLE_VALUE ) - { - HMODULE modules[1024]; - DWORD needed; - if( _EnumProcessModules( phnd, modules, 1024 * sizeof( HMODULE ), &needed ) != 0 ) - { - const auto sz = std::min( DWORD( needed / sizeof( HMODULE ) ), DWORD( 1024 ) ); - for( DWORD i=0; i= (uint64_t)info.lpBaseOfDll && (uint64_t)ptr <= (uint64_t)info.lpBaseOfDll + (uint64_t)info.SizeOfImage ) - { - char buf2[1024]; - const auto modlen = _GetModuleBaseNameA( phnd, modules[i], buf2, 1024 ); - if( modlen != 0 ) - { - threadName = CopyString( buf2, modlen ); - threadSent = true; - } - } - } - } - } - CloseHandle( phnd ); - } - } - } - CloseHandle( hnd ); - if( !threadSent ) - { - threadName = CopyString( "???", 3 ); - threadSent = true; - } - if( pid != 0 ) - { - { - uint64_t _pid = pid; - TracyLfqPrepare( QueueType::TidToPid ); - MemWrite( &item->tidToPid.tid, thread ); - MemWrite( &item->tidToPid.pid, _pid ); - TracyLfqCommit; - } - if( pid == 4 ) - { - name = CopyStringFast( "System", 6 ); - return; - } - else - { - const auto phnd = OpenProcess( PROCESS_QUERY_LIMITED_INFORMATION, FALSE, pid ); - if( phnd != INVALID_HANDLE_VALUE ) - { - char buf2[1024]; - const auto sz = GetProcessImageFileNameA( phnd, buf2, 1024 ); - CloseHandle( phnd ); - if( sz != 0 ) - { - auto ptr = buf2 + sz - 1; - while( ptr > buf2 && *ptr != '\\' ) ptr--; - if( *ptr == '\\' ) ptr++; - name = CopyStringFast( ptr ); - return; - } - } - } - } - } - - if( !threadSent ) - { - threadName = CopyString( "???", 3 ); - } - name = CopyStringFast( "???", 3 ); -} - -} - -# elif defined __linux__ - -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include - -# if defined __i386 || defined __x86_64__ -# include "TracyCpuid.hpp" -# endif - -# include "TracyProfiler.hpp" -# include "TracyRingBuffer.hpp" -# include "TracyThread.hpp" - -namespace tracy -{ - -static std::atomic traceActive { false }; -static int s_numCpus = 0; -static int s_numBuffers = 0; -static int s_ctxBufferIdx = 0; - -static RingBuffer* s_ring = nullptr; - -static const int ThreadHashSize = 4 * 1024; -static uint32_t s_threadHash[ThreadHashSize] = {}; - -static bool CurrentProcOwnsThread( uint32_t tid ) -{ - const auto hash = tid & ( ThreadHashSize-1 ); - const auto hv = s_threadHash[hash]; - if( hv == tid ) return true; - if( hv == -tid ) return false; - - char path[256]; - sprintf( path, "/proc/self/task/%d", tid ); - struct stat st; - if( stat( path, &st ) == 0 ) - { - s_threadHash[hash] = tid; - return true; - } - else - { - s_threadHash[hash] = -tid; - return false; - } -} - -static int perf_event_open( struct perf_event_attr* hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags ) -{ - return syscall( __NR_perf_event_open, hw_event, pid, cpu, group_fd, flags ); -} - -enum TraceEventId -{ - EventCallstack, - EventCpuCycles, - EventInstructionsRetired, - EventCacheReference, - EventCacheMiss, - EventBranchRetired, - EventBranchMiss, - EventVsync, - EventContextSwitch, - EventWakeup, -}; - -static void ProbePreciseIp( perf_event_attr& pe, unsigned long long config0, unsigned long long config1, pid_t pid ) -{ - pe.config = config1; - pe.precise_ip = 3; - while( pe.precise_ip != 0 ) - { - const int fd = perf_event_open( &pe, pid, 0, -1, PERF_FLAG_FD_CLOEXEC ); - if( fd != -1 ) - { - close( fd ); - break; - } - pe.precise_ip--; - } - pe.config = config0; - while( pe.precise_ip != 0 ) - { - const int fd = perf_event_open( &pe, pid, 0, -1, PERF_FLAG_FD_CLOEXEC ); - if( fd != -1 ) - { - close( fd ); - break; - } - pe.precise_ip--; - } - TracyDebug( " Probed precise_ip: %i\n", pe.precise_ip ); -} - -static void ProbePreciseIp( perf_event_attr& pe, pid_t pid ) -{ - pe.precise_ip = 3; - while( pe.precise_ip != 0 ) - { - const int fd = perf_event_open( &pe, pid, 0, -1, PERF_FLAG_FD_CLOEXEC ); - if( fd != -1 ) - { - close( fd ); - break; - } - pe.precise_ip--; - } - TracyDebug( " Probed precise_ip: %i\n", pe.precise_ip ); -} - -static bool IsGenuineIntel() -{ -#if defined __i386 || defined __x86_64__ - uint32_t regs[4] = {}; - __get_cpuid( 0, regs, regs+1, regs+2, regs+3 ); - char manufacturer[12]; - memcpy( manufacturer, regs+1, 4 ); - memcpy( manufacturer+4, regs+3, 4 ); - memcpy( manufacturer+8, regs+2, 4 ); - return memcmp( manufacturer, "GenuineIntel", 12 ) == 0; -#else - return false; -#endif -} - -static const char* ReadFile( const char* path ) -{ - int fd = open( path, O_RDONLY ); - if( fd < 0 ) return nullptr; - - static char tmp[64]; - const auto cnt = read( fd, tmp, 63 ); - close( fd ); - if( cnt < 0 ) return nullptr; - tmp[cnt] = '\0'; - return tmp; -} - -bool SysTraceStart( int64_t& samplingPeriod ) -{ -#ifndef CLOCK_MONOTONIC_RAW - return false; -#endif - - const auto paranoidLevelStr = ReadFile( "/proc/sys/kernel/perf_event_paranoid" ); - if( !paranoidLevelStr ) return false; -#ifdef TRACY_VERBOSE - int paranoidLevel = 2; - paranoidLevel = atoi( paranoidLevelStr ); - TracyDebug( "perf_event_paranoid: %i\n", paranoidLevel ); -#endif - - int switchId = -1, wakeupId = -1, vsyncId = -1; - const auto switchIdStr = ReadFile( "/sys/kernel/debug/tracing/events/sched/sched_switch/id" ); - if( switchIdStr ) switchId = atoi( switchIdStr ); - const auto wakeupIdStr = ReadFile( "/sys/kernel/debug/tracing/events/sched/sched_wakeup/id" ); - if( wakeupIdStr ) wakeupId = atoi( wakeupIdStr ); - const auto vsyncIdStr = ReadFile( "/sys/kernel/debug/tracing/events/drm/drm_vblank_event/id" ); - if( vsyncIdStr ) vsyncId = atoi( vsyncIdStr ); - - TracyDebug( "sched_switch id: %i\n", switchId ); - TracyDebug( "sched_wakeup id: %i\n", wakeupId ); - TracyDebug( "drm_vblank_event id: %i\n", vsyncId ); - -#ifdef TRACY_NO_SAMPLING - const bool noSoftwareSampling = true; -#else - const char* noSoftwareSamplingEnv = GetEnvVar( "TRACY_NO_SAMPLING" ); - const bool noSoftwareSampling = noSoftwareSamplingEnv && noSoftwareSamplingEnv[0] == '1'; -#endif - -#ifdef TRACY_NO_SAMPLE_RETIREMENT - const bool noRetirement = true; -#else - const char* noRetirementEnv = GetEnvVar( "TRACY_NO_SAMPLE_RETIREMENT" ); - const bool noRetirement = noRetirementEnv && noRetirementEnv[0] == '1'; -#endif - -#ifdef TRACY_NO_SAMPLE_CACHE - const bool noCache = true; -#else - const char* noCacheEnv = GetEnvVar( "TRACY_NO_SAMPLE_CACHE" ); - const bool noCache = noCacheEnv && noCacheEnv[0] == '1'; -#endif - -#ifdef TRACY_NO_SAMPLE_BRANCH - const bool noBranch = true; -#else - const char* noBranchEnv = GetEnvVar( "TRACY_NO_SAMPLE_BRANCH" ); - const bool noBranch = noBranchEnv && noBranchEnv[0] == '1'; -#endif - -#ifdef TRACY_NO_CONTEXT_SWITCH - const bool noCtxSwitch = true; -#else - const char* noCtxSwitchEnv = GetEnvVar( "TRACY_NO_CONTEXT_SWITCH" ); - const bool noCtxSwitch = noCtxSwitchEnv && noCtxSwitchEnv[0] == '1'; -#endif - -#ifdef TRACY_NO_VSYNC_CAPTURE - const bool noVsync = true; -#else - const char* noVsyncEnv = GetEnvVar( "TRACY_NO_VSYNC_CAPTURE" ); - const bool noVsync = noVsyncEnv && noVsyncEnv[0] == '1'; -#endif - - samplingPeriod = GetSamplingPeriod(); - uint32_t currentPid = (uint32_t)getpid(); - - s_numCpus = (int)std::thread::hardware_concurrency(); - - const auto maxNumBuffers = s_numCpus * ( - 1 + // software sampling - 2 + // CPU cycles + instructions retired - 2 + // cache reference + miss - 2 + // branch retired + miss - 2 + // context switches + wakeups - 1 // vsync - ); - s_ring = (RingBuffer*)tracy_malloc( sizeof( RingBuffer ) * maxNumBuffers ); - s_numBuffers = 0; - - // software sampling - perf_event_attr pe = {}; - pe.type = PERF_TYPE_SOFTWARE; - pe.size = sizeof( perf_event_attr ); - pe.config = PERF_COUNT_SW_CPU_CLOCK; - pe.sample_freq = GetSamplingFrequency(); - pe.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_CALLCHAIN; -#if LINUX_VERSION_CODE >= KERNEL_VERSION( 4, 8, 0 ) - pe.sample_max_stack = 127; -#endif - pe.disabled = 1; - pe.freq = 1; - pe.inherit = 1; -#if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) - pe.use_clockid = 1; - pe.clockid = CLOCK_MONOTONIC_RAW; -#endif - - if( !noSoftwareSampling ) - { - TracyDebug( "Setup software sampling\n" ); - ProbePreciseIp( pe, currentPid ); - for( int i=0; i= KERNEL_VERSION( 4, 8, 0 ) - pe.sample_max_stack = 127; -#endif - pe.disabled = 1; - pe.inherit = 1; - pe.config = switchId; -#if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) - pe.use_clockid = 1; - pe.clockid = CLOCK_MONOTONIC_RAW; -#endif - - TracyDebug( "Setup context switch capture\n" ); - for( int i=0; i> 63; - const auto m2 = test >> 47; - if( m1 == m2 ) break; - } - while( --cnt > 0 ); - for( uint64_t j=1; j> 63; - const auto m2 = test >> 47; - if( m1 != m2 ) trace[j] = 0; - } -#endif - - for( uint64_t j=1; j<=cnt; j++ ) - { - if( trace[j] >= (uint64_t)-4095 ) // PERF_CONTEXT_MAX - { - memmove( trace+j, trace+j+1, sizeof( uint64_t ) * ( cnt - j ) ); - cnt--; - } - } - - memcpy( trace, &cnt, sizeof( uint64_t ) ); - return trace; -} - -void SysTraceWorker( void* ptr ) -{ - ThreadExitHandler threadExitHandler; - SetThreadName( "Tracy Sampling" ); - InitRpmalloc(); - sched_param sp = { 99 }; - if( pthread_setschedparam( pthread_self(), SCHED_FIFO, &sp ) != 0 ) TracyDebug( "Failed to increase SysTraceWorker thread priority!\n" ); - auto ctxBufferIdx = s_ctxBufferIdx; - auto ringArray = s_ring; - auto numBuffers = s_numBuffers; - for( int i=0; i tail ); - hadData = true; - - const auto id = ring.GetId(); - assert( id != EventContextSwitch ); - const auto end = head - tail; - uint64_t pos = 0; - if( id == EventCallstack ) - { - while( pos < end ) - { - perf_event_header hdr; - ring.Read( &hdr, pos, sizeof( perf_event_header ) ); - if( hdr.type == PERF_RECORD_SAMPLE ) - { - auto offset = pos + sizeof( perf_event_header ); - - // Layout: - // u32 pid, tid - // u64 time - // u64 cnt - // u64 ip[cnt] - - uint32_t tid; - uint64_t t0; - uint64_t cnt; - - offset += sizeof( uint32_t ); - ring.Read( &tid, offset, sizeof( uint32_t ) ); - offset += sizeof( uint32_t ); - ring.Read( &t0, offset, sizeof( uint64_t ) ); - offset += sizeof( uint64_t ); - ring.Read( &cnt, offset, sizeof( uint64_t ) ); - offset += sizeof( uint64_t ); - - if( cnt > 0 ) - { -#if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) - t0 = ring.ConvertTimeToTsc( t0 ); -#endif - auto trace = GetCallstackBlock( cnt, ring, offset ); - - TracyLfqPrepare( QueueType::CallstackSample ); - MemWrite( &item->callstackSampleFat.time, t0 ); - MemWrite( &item->callstackSampleFat.thread, tid ); - MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace ); - TracyLfqCommit; - } - } - pos += hdr.size; - } - } - else - { - while( pos < end ) - { - perf_event_header hdr; - ring.Read( &hdr, pos, sizeof( perf_event_header ) ); - if( hdr.type == PERF_RECORD_SAMPLE ) - { - auto offset = pos + sizeof( perf_event_header ); - - // Layout: - // u64 ip - // u64 time - - uint64_t ip, t0; - ring.Read( &ip, offset, sizeof( uint64_t ) ); - offset += sizeof( uint64_t ); - ring.Read( &t0, offset, sizeof( uint64_t ) ); - -#if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) - t0 = ring.ConvertTimeToTsc( t0 ); -#endif - QueueType type; - switch( id ) - { - case EventCpuCycles: - type = QueueType::HwSampleCpuCycle; - break; - case EventInstructionsRetired: - type = QueueType::HwSampleInstructionRetired; - break; - case EventCacheReference: - type = QueueType::HwSampleCacheReference; - break; - case EventCacheMiss: - type = QueueType::HwSampleCacheMiss; - break; - case EventBranchRetired: - type = QueueType::HwSampleBranchRetired; - break; - case EventBranchMiss: - type = QueueType::HwSampleBranchMiss; - break; - default: - abort(); - } - - TracyLfqPrepare( type ); - MemWrite( &item->hwSample.ip, ip ); - MemWrite( &item->hwSample.time, t0 ); - TracyLfqCommit; - } - pos += hdr.size; - } - } - assert( pos == end ); - ring.Advance( end ); - } - if( !traceActive.load( std::memory_order_relaxed ) ) break; - - if( ctxBufferIdx != numBuffers ) - { - const auto ctxBufNum = numBuffers - ctxBufferIdx; - - int activeNum = 0; - uint16_t active[512]; - uint32_t end[512]; - uint32_t pos[512]; - for( int i=0; i 0 ) - { - hadData = true; - while( activeNum > 0 ) - { - int sel = -1; - int selPos; - int64_t t0 = std::numeric_limits::max(); - for( int i=0; i= 0 ) - { - auto& ring = ringArray[ctxBufferIdx + sel]; - auto rbPos = pos[sel]; - auto offset = rbPos; - perf_event_header hdr; - ring.Read( &hdr, offset, sizeof( perf_event_header ) ); - -#if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) - t0 = ring.ConvertTimeToTsc( t0 ); -#endif - - const auto rid = ring.GetId(); - if( rid == EventContextSwitch ) - { - // Layout: - // u64 time - // u64 cnt - // u64 ip[cnt] - // u32 size - // u8 data[size] - // Data (not ABI stable, but has not changed since it was added, in 2009): - // u8 hdr[8] - // u8 prev_comm[16] - // u32 prev_pid - // u32 prev_prio - // lng prev_state - // u8 next_comm[16] - // u32 next_pid - // u32 next_prio - - offset += sizeof( perf_event_header ) + sizeof( uint64_t ); - - uint64_t cnt; - ring.Read( &cnt, offset, sizeof( uint64_t ) ); - offset += sizeof( uint64_t ); - const auto traceOffset = offset; - offset += sizeof( uint64_t ) * cnt + sizeof( uint32_t ) + 8 + 16; - - uint32_t prev_pid, next_pid; - long prev_state; - - ring.Read( &prev_pid, offset, sizeof( uint32_t ) ); - offset += sizeof( uint32_t ) + sizeof( uint32_t ); - ring.Read( &prev_state, offset, sizeof( long ) ); - offset += sizeof( long ) + 16; - ring.Read( &next_pid, offset, sizeof( uint32_t ) ); - - uint8_t reason = 100; - uint8_t state; - - if( prev_state & 0x0001 ) state = 104; - else if( prev_state & 0x0002 ) state = 101; - else if( prev_state & 0x0004 ) state = 105; - else if( prev_state & 0x0008 ) state = 106; - else if( prev_state & 0x0010 ) state = 108; - else if( prev_state & 0x0020 ) state = 109; - else if( prev_state & 0x0040 ) state = 110; - else if( prev_state & 0x0080 ) state = 102; - else state = 103; - - TracyLfqPrepare( QueueType::ContextSwitch ); - MemWrite( &item->contextSwitch.time, t0 ); - MemWrite( &item->contextSwitch.oldThread, prev_pid ); - MemWrite( &item->contextSwitch.newThread, next_pid ); - MemWrite( &item->contextSwitch.cpu, uint8_t( ring.GetCpu() ) ); - MemWrite( &item->contextSwitch.reason, reason ); - MemWrite( &item->contextSwitch.state, state ); - TracyLfqCommit; - - if( cnt > 0 && prev_pid != 0 && CurrentProcOwnsThread( prev_pid ) ) - { - auto trace = GetCallstackBlock( cnt, ring, traceOffset ); - - TracyLfqPrepare( QueueType::CallstackSampleContextSwitch ); - MemWrite( &item->callstackSampleFat.time, t0 ); - MemWrite( &item->callstackSampleFat.thread, prev_pid ); - MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace ); - TracyLfqCommit; - } - } - else if( rid == EventWakeup ) - { - // Layout: - // u64 time - // u32 size - // u8 data[size] - // Data: - // u8 hdr[8] - // u8 comm[16] - // u32 pid - // u32 prio - // u64 target_cpu - - offset += sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t ) + 8 + 16; - - uint32_t pid; - ring.Read( &pid, offset, sizeof( uint32_t ) ); - - TracyLfqPrepare( QueueType::ThreadWakeup ); - MemWrite( &item->threadWakeup.time, t0 ); - MemWrite( &item->threadWakeup.thread, pid ); - TracyLfqCommit; - } - else - { - assert( rid == EventVsync ); - // Layout: - // u64 time - // u32 size - // u8 data[size] - // Data (not ABI stable): - // u8 hdr[8] - // i32 crtc - // u32 seq - // i64 ktime - // u8 high precision - - offset += sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t ) + 8; - - int32_t crtc; - ring.Read( &crtc, offset, sizeof( int32_t ) ); - - // Note: The timestamp value t0 might be off by a number of microseconds from the - // true hardware vblank event. The ktime value should be used instead, but it is - // measured in CLOCK_MONOTONIC time. Tracy only supports the timestamp counter - // register (TSC) or CLOCK_MONOTONIC_RAW clock. -#if 0 - offset += sizeof( uint32_t ) * 2; - int64_t ktime; - ring.Read( &ktime, offset, sizeof( int64_t ) ); -#endif - - TracyLfqPrepare( QueueType::FrameVsync ); - MemWrite( &item->frameVsync.id, crtc ); - MemWrite( &item->frameVsync.time, t0 ); - TracyLfqCommit; - } - - rbPos += hdr.size; - if( rbPos == end[sel] ) - { - memmove( active+selPos, active+selPos+1, sizeof(*active) * ( activeNum - selPos - 1 ) ); - activeNum--; - } - else - { - pos[sel] = rbPos; - } - } - } - for( int i=0; i 0 && buf[sz-1] == '\n' ) buf[sz-1] = '\0'; - threadName = CopyString( buf ); - fclose( f ); - } - else - { - threadName = CopyString( "???", 3 ); - } - - sprintf( fn, "/proc/%" PRIu64 "/status", thread ); - f = fopen( fn, "rb" ); - if( f ) - { - char* tmp = (char*)tracy_malloc_fast( 8*1024 ); - const auto fsz = (ptrdiff_t)fread( tmp, 1, 8*1024, f ); - fclose( f ); - - int pid = -1; - auto line = tmp; - for(;;) - { - if( memcmp( "Tgid:\t", line, 6 ) == 0 ) - { - pid = atoi( line + 6 ); - break; - } - while( line - tmp < fsz && *line != '\n' ) line++; - if( *line != '\n' ) break; - line++; - } - tracy_free_fast( tmp ); - - if( pid >= 0 ) - { - { - uint64_t _pid = pid; - TracyLfqPrepare( QueueType::TidToPid ); - MemWrite( &item->tidToPid.tid, thread ); - MemWrite( &item->tidToPid.pid, _pid ); - TracyLfqCommit; - } - sprintf( fn, "/proc/%i/comm", pid ); - f = fopen( fn, "rb" ); - if( f ) - { - char buf[256]; - const auto sz = fread( buf, 1, 256, f ); - if( sz > 0 && buf[sz-1] == '\n' ) buf[sz-1] = '\0'; - name = CopyStringFast( buf ); - fclose( f ); - return; - } - } - } - name = CopyStringFast( "???", 3 ); -} - -} - -# endif - -#endif diff --git a/src/third_party/tracy/client/TracySysTrace.hpp b/src/third_party/tracy/client/TracySysTrace.hpp deleted file mode 100644 index 8c663cd7..00000000 --- a/src/third_party/tracy/client/TracySysTrace.hpp +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef __TRACYSYSTRACE_HPP__ -#define __TRACYSYSTRACE_HPP__ - -#if !defined TRACY_NO_SYSTEM_TRACING && ( defined _WIN32 || defined __linux__ ) -# include "../common/TracyUwp.hpp" -# ifndef TRACY_UWP -# define TRACY_HAS_SYSTEM_TRACING -# endif -#endif - -#ifdef TRACY_HAS_SYSTEM_TRACING - -#include - -namespace tracy -{ - -bool SysTraceStart( int64_t& samplingPeriod ); -void SysTraceStop(); -void SysTraceWorker( void* ptr ); - -void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const char*& name ); - -} - -#endif - -#endif diff --git a/src/third_party/tracy/client/TracyThread.hpp b/src/third_party/tracy/client/TracyThread.hpp deleted file mode 100644 index 5638756a..00000000 --- a/src/third_party/tracy/client/TracyThread.hpp +++ /dev/null @@ -1,90 +0,0 @@ -#ifndef __TRACYTHREAD_HPP__ -#define __TRACYTHREAD_HPP__ - -#if defined _WIN32 -# include -#else -# include -#endif - -#ifdef TRACY_MANUAL_LIFETIME -# include "tracy_rpmalloc.hpp" -#endif - -namespace tracy -{ - -#ifdef TRACY_MANUAL_LIFETIME -extern thread_local bool RpThreadInitDone; -#endif - -class ThreadExitHandler -{ -public: - ~ThreadExitHandler() - { -#ifdef TRACY_MANUAL_LIFETIME - rpmalloc_thread_finalize( 1 ); - RpThreadInitDone = false; -#endif - } -}; - -#if defined _WIN32 - -class Thread -{ -public: - Thread( void(*func)( void* ptr ), void* ptr ) - : m_func( func ) - , m_ptr( ptr ) - , m_hnd( CreateThread( nullptr, 0, Launch, this, 0, nullptr ) ) - {} - - ~Thread() - { - WaitForSingleObject( m_hnd, INFINITE ); - CloseHandle( m_hnd ); - } - - HANDLE Handle() const { return m_hnd; } - -private: - static DWORD WINAPI Launch( void* ptr ) { ((Thread*)ptr)->m_func( ((Thread*)ptr)->m_ptr ); return 0; } - - void(*m_func)( void* ptr ); - void* m_ptr; - HANDLE m_hnd; -}; - -#else - -class Thread -{ -public: - Thread( void(*func)( void* ptr ), void* ptr ) - : m_func( func ) - , m_ptr( ptr ) - { - pthread_create( &m_thread, nullptr, Launch, this ); - } - - ~Thread() - { - pthread_join( m_thread, nullptr ); - } - - pthread_t Handle() const { return m_thread; } - -private: - static void* Launch( void* ptr ) { ((Thread*)ptr)->m_func( ((Thread*)ptr)->m_ptr ); return nullptr; } - void(*m_func)( void* ptr ); - void* m_ptr; - pthread_t m_thread; -}; - -#endif - -} - -#endif diff --git a/src/third_party/tracy/client/tracy_SPSCQueue.h b/src/third_party/tracy/client/tracy_SPSCQueue.h deleted file mode 100644 index 7f1752b5..00000000 --- a/src/third_party/tracy/client/tracy_SPSCQueue.h +++ /dev/null @@ -1,148 +0,0 @@ -/* -Copyright (c) 2020 Erik Rigtorp - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - */ - -#pragma once - -#include -#include -#include -#include -#include // std::enable_if, std::is_*_constructible - -#include "../common/TracyAlloc.hpp" - -#if defined (_MSC_VER) -#pragma warning(push) -#pragma warning(disable:4324) -#endif - -namespace tracy { - -template class SPSCQueue { -public: - explicit SPSCQueue(const size_t capacity) - : capacity_(capacity) { - capacity_++; // Needs one slack element - slots_ = (T*)tracy_malloc(sizeof(T) * (capacity_ + 2 * kPadding)); - - static_assert(alignof(SPSCQueue) == kCacheLineSize, ""); - static_assert(sizeof(SPSCQueue) >= 3 * kCacheLineSize, ""); - assert(reinterpret_cast(&readIdx_) - - reinterpret_cast(&writeIdx_) >= - static_cast(kCacheLineSize)); - } - - ~SPSCQueue() { - while (front()) { - pop(); - } - tracy_free(slots_); - } - - // non-copyable and non-movable - SPSCQueue(const SPSCQueue &) = delete; - SPSCQueue &operator=(const SPSCQueue &) = delete; - - template - void emplace(Args &&...args) noexcept( - std::is_nothrow_constructible::value) { - static_assert(std::is_constructible::value, - "T must be constructible with Args&&..."); - auto const writeIdx = writeIdx_.load(std::memory_order_relaxed); - auto nextWriteIdx = writeIdx + 1; - if (nextWriteIdx == capacity_) { - nextWriteIdx = 0; - } - while (nextWriteIdx == readIdxCache_) { - readIdxCache_ = readIdx_.load(std::memory_order_acquire); - } - new (&slots_[writeIdx + kPadding]) T(std::forward(args)...); - writeIdx_.store(nextWriteIdx, std::memory_order_release); - } - - T *front() noexcept { - auto const readIdx = readIdx_.load(std::memory_order_relaxed); - if (readIdx == writeIdxCache_) { - writeIdxCache_ = writeIdx_.load(std::memory_order_acquire); - if (writeIdxCache_ == readIdx) { - return nullptr; - } - } - return &slots_[readIdx + kPadding]; - } - - void pop() noexcept { - static_assert(std::is_nothrow_destructible::value, - "T must be nothrow destructible"); - auto const readIdx = readIdx_.load(std::memory_order_relaxed); - assert(writeIdx_.load(std::memory_order_acquire) != readIdx); - slots_[readIdx + kPadding].~T(); - auto nextReadIdx = readIdx + 1; - if (nextReadIdx == capacity_) { - nextReadIdx = 0; - } - readIdx_.store(nextReadIdx, std::memory_order_release); - } - - size_t size() const noexcept { - std::ptrdiff_t diff = writeIdx_.load(std::memory_order_acquire) - - readIdx_.load(std::memory_order_acquire); - if (diff < 0) { - diff += capacity_; - } - return static_cast(diff); - } - - bool empty() const noexcept { - return writeIdx_.load(std::memory_order_acquire) == - readIdx_.load(std::memory_order_acquire); - } - - size_t capacity() const noexcept { return capacity_ - 1; } - -private: - static constexpr size_t kCacheLineSize = 64; - - // Padding to avoid false sharing between slots_ and adjacent allocations - static constexpr size_t kPadding = (kCacheLineSize - 1) / sizeof(T) + 1; - -private: - size_t capacity_; - T *slots_; - - // Align to cache line size in order to avoid false sharing - // readIdxCache_ and writeIdxCache_ is used to reduce the amount of cache - // coherency traffic - alignas(kCacheLineSize) std::atomic writeIdx_ = {0}; - alignas(kCacheLineSize) size_t readIdxCache_ = 0; - alignas(kCacheLineSize) std::atomic readIdx_ = {0}; - alignas(kCacheLineSize) size_t writeIdxCache_ = 0; - - // Padding to avoid adjacent allocations to share cache line with - // writeIdxCache_ - char padding_[kCacheLineSize - sizeof(SPSCQueue::writeIdxCache_)]; -}; -} // namespace rigtorp - -#if defined (_MSC_VER) -#pragma warning(pop) -#endif diff --git a/src/third_party/tracy/client/tracy_concurrentqueue.h b/src/third_party/tracy/client/tracy_concurrentqueue.h deleted file mode 100644 index 4178d39e..00000000 --- a/src/third_party/tracy/client/tracy_concurrentqueue.h +++ /dev/null @@ -1,1441 +0,0 @@ -// Provides a C++11 implementation of a multi-producer, multi-consumer lock-free queue. -// An overview, including benchmark results, is provided here: -// http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++ -// The full design is also described in excruciating detail at: -// http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue - -// Simplified BSD license: -// Copyright (c) 2013-2016, Cameron Desrochers. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, this list of -// conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, this list of -// conditions and the following disclaimer in the documentation and/or other materials -// provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT -// OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR -// TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, -// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -#pragma once - -#include "../common/TracyAlloc.hpp" -#include "../common/TracyForceInline.hpp" -#include "../common/TracySystem.hpp" - -#if defined(__GNUC__) -// Disable -Wconversion warnings (spuriously triggered when Traits::size_t and -// Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings -// upon assigning any computed values) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - -#if defined(__APPLE__) -#include "TargetConditionals.h" -#endif - -#include // Requires C++11. Sorry VS2010. -#include -#include // for max_align_t -#include -#include -#include -#include -#include -#include -#include // for CHAR_BIT -#include -#include // partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading - -namespace tracy -{ - -// Compiler-specific likely/unlikely hints -namespace moodycamel { namespace details { -#if defined(__GNUC__) - inline bool cqLikely(bool x) { return __builtin_expect((x), true); } - inline bool cqUnlikely(bool x) { return __builtin_expect((x), false); } -#else - inline bool cqLikely(bool x) { return x; } - inline bool cqUnlikely(bool x) { return x; } -#endif -} } - -namespace -{ - // to avoid MSVC warning 4127: conditional expression is constant - template - struct compile_time_condition - { - static const bool value = false; - }; - template <> - struct compile_time_condition - { - static const bool value = true; - }; -} - -namespace moodycamel { -namespace details { - template - struct const_numeric_max { - static_assert(std::is_integral::value, "const_numeric_max can only be used with integers"); - static const T value = std::numeric_limits::is_signed - ? (static_cast(1) << (sizeof(T) * CHAR_BIT - 1)) - static_cast(1) - : static_cast(-1); - }; - -#if defined(__GLIBCXX__) - typedef ::max_align_t std_max_align_t; // libstdc++ forgot to add it to std:: for a while -#else - typedef std::max_align_t std_max_align_t; // Others (e.g. MSVC) insist it can *only* be accessed via std:: -#endif - - // Some platforms have incorrectly set max_align_t to a type with <8 bytes alignment even while supporting - // 8-byte aligned scalar values (*cough* 32-bit iOS). Work around this with our own union. See issue #64. - typedef union { - std_max_align_t x; - long long y; - void* z; - } max_align_t; -} - -// Default traits for the ConcurrentQueue. To change some of the -// traits without re-implementing all of them, inherit from this -// struct and shadow the declarations you wish to be different; -// since the traits are used as a template type parameter, the -// shadowed declarations will be used where defined, and the defaults -// otherwise. -struct ConcurrentQueueDefaultTraits -{ - // General-purpose size type. std::size_t is strongly recommended. - typedef std::size_t size_t; - - // The type used for the enqueue and dequeue indices. Must be at least as - // large as size_t. Should be significantly larger than the number of elements - // you expect to hold at once, especially if you have a high turnover rate; - // for example, on 32-bit x86, if you expect to have over a hundred million - // elements or pump several million elements through your queue in a very - // short space of time, using a 32-bit type *may* trigger a race condition. - // A 64-bit int type is recommended in that case, and in practice will - // prevent a race condition no matter the usage of the queue. Note that - // whether the queue is lock-free with a 64-int type depends on the whether - // std::atomic is lock-free, which is platform-specific. - typedef std::size_t index_t; - - // Internally, all elements are enqueued and dequeued from multi-element - // blocks; this is the smallest controllable unit. If you expect few elements - // but many producers, a smaller block size should be favoured. For few producers - // and/or many elements, a larger block size is preferred. A sane default - // is provided. Must be a power of 2. - static const size_t BLOCK_SIZE = 64*1024; - - // For explicit producers (i.e. when using a producer token), the block is - // checked for being empty by iterating through a list of flags, one per element. - // For large block sizes, this is too inefficient, and switching to an atomic - // counter-based approach is faster. The switch is made for block sizes strictly - // larger than this threshold. - static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32; - - // How many full blocks can be expected for a single explicit producer? This should - // reflect that number's maximum for optimal performance. Must be a power of 2. - static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32; - - // Controls the number of items that an explicit consumer (i.e. one with a token) - // must consume before it causes all consumers to rotate and move on to the next - // internal queue. - static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256; - - // The maximum number of elements (inclusive) that can be enqueued to a sub-queue. - // Enqueue operations that would cause this limit to be surpassed will fail. Note - // that this limit is enforced at the block level (for performance reasons), i.e. - // it's rounded up to the nearest block size. - static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max::value; - - - // Memory allocation can be customized if needed. - // malloc should return nullptr on failure, and handle alignment like std::malloc. -#if defined(malloc) || defined(free) - // Gah, this is 2015, stop defining macros that break standard code already! - // Work around malloc/free being special macros: - static inline void* WORKAROUND_malloc(size_t size) { return malloc(size); } - static inline void WORKAROUND_free(void* ptr) { return free(ptr); } - static inline void* (malloc)(size_t size) { return WORKAROUND_malloc(size); } - static inline void (free)(void* ptr) { return WORKAROUND_free(ptr); } -#else - static inline void* malloc(size_t size) { return tracy::tracy_malloc(size); } - static inline void free(void* ptr) { return tracy::tracy_free(ptr); } -#endif -}; - - -// When producing or consuming many elements, the most efficient way is to: -// 1) Use one of the bulk-operation methods of the queue with a token -// 2) Failing that, use the bulk-operation methods without a token -// 3) Failing that, create a token and use that with the single-item methods -// 4) Failing that, use the single-parameter methods of the queue -// Having said that, don't create tokens willy-nilly -- ideally there should be -// a maximum of one token per thread (of each kind). -struct ProducerToken; -struct ConsumerToken; - -template class ConcurrentQueue; - - -namespace details -{ - struct ConcurrentQueueProducerTypelessBase - { - ConcurrentQueueProducerTypelessBase* next; - std::atomic inactive; - ProducerToken* token; - uint32_t threadId; - - ConcurrentQueueProducerTypelessBase() - : next(nullptr), inactive(false), token(nullptr), threadId(0) - { - } - }; - - template - static inline bool circular_less_than(T a, T b) - { - static_assert(std::is_integral::value && !std::numeric_limits::is_signed, "circular_less_than is intended to be used only with unsigned integer types"); - return static_cast(a - b) > static_cast(static_cast(1) << (static_cast(sizeof(T) * CHAR_BIT - 1))); - // Note: extra parens around rhs of operator<< is MSVC bug: https://developercommunity2.visualstudio.com/t/C4554-triggers-when-both-lhs-and-rhs-is/10034931 - // silencing the bug requires #pragma warning(disable: 4554) around the calling code and has no effect when done here. - } - - template - static inline char* align_for(char* ptr) - { - const std::size_t alignment = std::alignment_of::value; - return ptr + (alignment - (reinterpret_cast(ptr) % alignment)) % alignment; - } - - template - static inline T ceil_to_pow_2(T x) - { - static_assert(std::is_integral::value && !std::numeric_limits::is_signed, "ceil_to_pow_2 is intended to be used only with unsigned integer types"); - - // Adapted from http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 - --x; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - for (std::size_t i = 1; i < sizeof(T); i <<= 1) { - x |= x >> (i << 3); - } - ++x; - return x; - } - - template - static inline void swap_relaxed(std::atomic& left, std::atomic& right) - { - T temp = std::move(left.load(std::memory_order_relaxed)); - left.store(std::move(right.load(std::memory_order_relaxed)), std::memory_order_relaxed); - right.store(std::move(temp), std::memory_order_relaxed); - } - - template - static inline T const& nomove(T const& x) - { - return x; - } - - template - struct nomove_if - { - template - static inline T const& eval(T const& x) - { - return x; - } - }; - - template<> - struct nomove_if - { - template - static inline auto eval(U&& x) - -> decltype(std::forward(x)) - { - return std::forward(x); - } - }; - - template - static inline auto deref_noexcept(It& it) noexcept -> decltype(*it) - { - return *it; - } - -#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) - template struct is_trivially_destructible : std::is_trivially_destructible { }; -#else - template struct is_trivially_destructible : std::has_trivial_destructor { }; -#endif - - template struct static_is_lock_free_num { enum { value = 0 }; }; - template<> struct static_is_lock_free_num { enum { value = ATOMIC_CHAR_LOCK_FREE }; }; - template<> struct static_is_lock_free_num { enum { value = ATOMIC_SHORT_LOCK_FREE }; }; - template<> struct static_is_lock_free_num { enum { value = ATOMIC_INT_LOCK_FREE }; }; - template<> struct static_is_lock_free_num { enum { value = ATOMIC_LONG_LOCK_FREE }; }; - template<> struct static_is_lock_free_num { enum { value = ATOMIC_LLONG_LOCK_FREE }; }; - template struct static_is_lock_free : static_is_lock_free_num::type> { }; - template<> struct static_is_lock_free { enum { value = ATOMIC_BOOL_LOCK_FREE }; }; - template struct static_is_lock_free { enum { value = ATOMIC_POINTER_LOCK_FREE }; }; -} - - -struct ProducerToken -{ - template - explicit ProducerToken(ConcurrentQueue& queue); - - ProducerToken(ProducerToken&& other) noexcept - : producer(other.producer) - { - other.producer = nullptr; - if (producer != nullptr) { - producer->token = this; - } - } - - inline ProducerToken& operator=(ProducerToken&& other) noexcept - { - swap(other); - return *this; - } - - void swap(ProducerToken& other) noexcept - { - std::swap(producer, other.producer); - if (producer != nullptr) { - producer->token = this; - } - if (other.producer != nullptr) { - other.producer->token = &other; - } - } - - // A token is always valid unless: - // 1) Memory allocation failed during construction - // 2) It was moved via the move constructor - // (Note: assignment does a swap, leaving both potentially valid) - // 3) The associated queue was destroyed - // Note that if valid() returns true, that only indicates - // that the token is valid for use with a specific queue, - // but not which one; that's up to the user to track. - inline bool valid() const { return producer != nullptr; } - - ~ProducerToken() - { - if (producer != nullptr) { - producer->token = nullptr; - producer->inactive.store(true, std::memory_order_release); - } - } - - // Disable copying and assignment - ProducerToken(ProducerToken const&) = delete; - ProducerToken& operator=(ProducerToken const&) = delete; - -private: - template friend class ConcurrentQueue; - -protected: - details::ConcurrentQueueProducerTypelessBase* producer; -}; - - -struct ConsumerToken -{ - template - explicit ConsumerToken(ConcurrentQueue& q); - - ConsumerToken(ConsumerToken&& other) noexcept - : initialOffset(other.initialOffset), lastKnownGlobalOffset(other.lastKnownGlobalOffset), itemsConsumedFromCurrent(other.itemsConsumedFromCurrent), currentProducer(other.currentProducer), desiredProducer(other.desiredProducer) - { - } - - inline ConsumerToken& operator=(ConsumerToken&& other) noexcept - { - swap(other); - return *this; - } - - void swap(ConsumerToken& other) noexcept - { - std::swap(initialOffset, other.initialOffset); - std::swap(lastKnownGlobalOffset, other.lastKnownGlobalOffset); - std::swap(itemsConsumedFromCurrent, other.itemsConsumedFromCurrent); - std::swap(currentProducer, other.currentProducer); - std::swap(desiredProducer, other.desiredProducer); - } - - // Disable copying and assignment - ConsumerToken(ConsumerToken const&) = delete; - ConsumerToken& operator=(ConsumerToken const&) = delete; - -private: - template friend class ConcurrentQueue; - -private: // but shared with ConcurrentQueue - std::uint32_t initialOffset; - std::uint32_t lastKnownGlobalOffset; - std::uint32_t itemsConsumedFromCurrent; - details::ConcurrentQueueProducerTypelessBase* currentProducer; - details::ConcurrentQueueProducerTypelessBase* desiredProducer; -}; - - -template -class ConcurrentQueue -{ -public: - struct ExplicitProducer; - - typedef moodycamel::ProducerToken producer_token_t; - typedef moodycamel::ConsumerToken consumer_token_t; - - typedef typename Traits::index_t index_t; - typedef typename Traits::size_t size_t; - - static const size_t BLOCK_SIZE = static_cast(Traits::BLOCK_SIZE); - static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = static_cast(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD); - static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast(Traits::EXPLICIT_INITIAL_INDEX_SIZE); - static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = static_cast(Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE); -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable: 4307) // + integral constant overflow (that's what the ternary expression is for!) -#pragma warning(disable: 4309) // static_cast: Truncation of constant value -#endif - static const size_t MAX_SUBQUEUE_SIZE = (details::const_numeric_max::value - static_cast(Traits::MAX_SUBQUEUE_SIZE) < BLOCK_SIZE) ? details::const_numeric_max::value : ((static_cast(Traits::MAX_SUBQUEUE_SIZE) + (BLOCK_SIZE - 1)) / BLOCK_SIZE * BLOCK_SIZE); -#ifdef _MSC_VER -#pragma warning(pop) -#endif - - static_assert(!std::numeric_limits::is_signed && std::is_integral::value, "Traits::size_t must be an unsigned integral type"); - static_assert(!std::numeric_limits::is_signed && std::is_integral::value, "Traits::index_t must be an unsigned integral type"); - static_assert(sizeof(index_t) >= sizeof(size_t), "Traits::index_t must be at least as wide as Traits::size_t"); - static_assert((BLOCK_SIZE > 1) && !(BLOCK_SIZE & (BLOCK_SIZE - 1)), "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)"); - static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1) && !(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD & (EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1)), "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a power of 2 (and greater than 1)"); - static_assert((EXPLICIT_INITIAL_INDEX_SIZE > 1) && !(EXPLICIT_INITIAL_INDEX_SIZE & (EXPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)"); - -public: - // Creates a queue with at least `capacity` element slots; note that the - // actual number of elements that can be inserted without additional memory - // allocation depends on the number of producers and the block size (e.g. if - // the block size is equal to `capacity`, only a single block will be allocated - // up-front, which means only a single producer will be able to enqueue elements - // without an extra allocation -- blocks aren't shared between producers). - // This method is not thread safe -- it is up to the user to ensure that the - // queue is fully constructed before it starts being used by other threads (this - // includes making the memory effects of construction visible, possibly with a - // memory barrier). - explicit ConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE) - : producerListTail(nullptr), - producerCount(0), - initialBlockPoolIndex(0), - nextExplicitConsumerId(0), - globalExplicitConsumerOffset(0) - { - populate_initial_block_list(capacity / BLOCK_SIZE + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1)); - } - - // Computes the correct amount of pre-allocated blocks for you based - // on the minimum number of elements you want available at any given - // time, and the maximum concurrent number of each type of producer. - ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers) - : producerListTail(nullptr), - producerCount(0), - initialBlockPoolIndex(0), - nextExplicitConsumerId(0), - globalExplicitConsumerOffset(0) - { - size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * (maxExplicitProducers + 1) + 2 * (maxExplicitProducers); - populate_initial_block_list(blocks); - } - - // Note: The queue should not be accessed concurrently while it's - // being deleted. It's up to the user to synchronize this. - // This method is not thread safe. - ~ConcurrentQueue() - { - // Destroy producers - auto ptr = producerListTail.load(std::memory_order_relaxed); - while (ptr != nullptr) { - auto next = ptr->next_prod(); - if (ptr->token != nullptr) { - ptr->token->producer = nullptr; - } - destroy(ptr); - ptr = next; - } - - // Destroy global free list - auto block = freeList.head_unsafe(); - while (block != nullptr) { - auto next = block->freeListNext.load(std::memory_order_relaxed); - if (block->dynamicallyAllocated) { - destroy(block); - } - block = next; - } - - // Destroy initial free list - destroy_array(initialBlockPool, initialBlockPoolSize); - } - - // Disable copying and copy assignment - ConcurrentQueue(ConcurrentQueue const&) = delete; - ConcurrentQueue(ConcurrentQueue&& other) = delete; - ConcurrentQueue& operator=(ConcurrentQueue const&) = delete; - ConcurrentQueue& operator=(ConcurrentQueue&& other) = delete; - -public: - tracy_force_inline T* enqueue_begin(producer_token_t const& token, index_t& currentTailIndex) - { - return static_cast(token.producer)->ConcurrentQueue::ExplicitProducer::enqueue_begin(currentTailIndex); - } - - template - size_t try_dequeue_bulk_single(consumer_token_t& token, NotifyThread notifyThread, ProcessData processData ) - { - if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) { - if (!update_current_producer_after_rotation(token)) { - return 0; - } - } - - size_t count = static_cast(token.currentProducer)->dequeue_bulk(notifyThread, processData); - token.itemsConsumedFromCurrent += static_cast(count); - - auto tail = producerListTail.load(std::memory_order_acquire); - auto ptr = static_cast(token.currentProducer)->next_prod(); - if (ptr == nullptr) { - ptr = tail; - } - if( count == 0 ) - { - while (ptr != static_cast(token.currentProducer)) { - auto dequeued = ptr->dequeue_bulk(notifyThread, processData); - if (dequeued != 0) { - token.currentProducer = ptr; - token.itemsConsumedFromCurrent = static_cast(dequeued); - return dequeued; - } - ptr = ptr->next_prod(); - if (ptr == nullptr) { - ptr = tail; - } - } - return 0; - } - else - { - token.currentProducer = ptr; - token.itemsConsumedFromCurrent = 0; - return count; - } - } - - - // Returns an estimate of the total number of elements currently in the queue. This - // estimate is only accurate if the queue has completely stabilized before it is called - // (i.e. all enqueue and dequeue operations have completed and their memory effects are - // visible on the calling thread, and no further operations start while this method is - // being called). - // Thread-safe. - size_t size_approx() const - { - size_t size = 0; - for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { - size += ptr->size_approx(); - } - return size; - } - - - // Returns true if the underlying atomic variables used by - // the queue are lock-free (they should be on most platforms). - // Thread-safe. - static bool is_lock_free() - { - return - details::static_is_lock_free::value == 2 && - details::static_is_lock_free::value == 2 && - details::static_is_lock_free::value == 2 && - details::static_is_lock_free::value == 2 && - details::static_is_lock_free::value == 2; - } - - -private: - friend struct ProducerToken; - friend struct ConsumerToken; - friend struct ExplicitProducer; - - - /////////////////////////////// - // Queue methods - /////////////////////////////// - - inline bool update_current_producer_after_rotation(consumer_token_t& token) - { - // Ah, there's been a rotation, figure out where we should be! - auto tail = producerListTail.load(std::memory_order_acquire); - if (token.desiredProducer == nullptr && tail == nullptr) { - return false; - } - auto prodCount = producerCount.load(std::memory_order_relaxed); - auto globalOffset = globalExplicitConsumerOffset.load(std::memory_order_relaxed); - if (details::cqUnlikely(token.desiredProducer == nullptr)) { - // Aha, first time we're dequeueing anything. - // Figure out our local position - // Note: offset is from start, not end, but we're traversing from end -- subtract from count first - std::uint32_t offset = prodCount - 1 - (token.initialOffset % prodCount); - token.desiredProducer = tail; - for (std::uint32_t i = 0; i != offset; ++i) { - token.desiredProducer = static_cast(token.desiredProducer)->next_prod(); - if (token.desiredProducer == nullptr) { - token.desiredProducer = tail; - } - } - } - - std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset; - if (delta >= prodCount) { - delta = delta % prodCount; - } - for (std::uint32_t i = 0; i != delta; ++i) { - token.desiredProducer = static_cast(token.desiredProducer)->next_prod(); - if (token.desiredProducer == nullptr) { - token.desiredProducer = tail; - } - } - - token.lastKnownGlobalOffset = globalOffset; - token.currentProducer = token.desiredProducer; - token.itemsConsumedFromCurrent = 0; - return true; - } - - - /////////////////////////// - // Free list - /////////////////////////// - - template - struct FreeListNode - { - FreeListNode() : freeListRefs(0), freeListNext(nullptr) { } - - std::atomic freeListRefs; - std::atomic freeListNext; - }; - - // A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but - // simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly - // speedy under low contention. - template // N must inherit FreeListNode or have the same fields (and initialization of them) - struct FreeList - { - FreeList() : freeListHead(nullptr) { } - FreeList(FreeList&& other) : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) { other.freeListHead.store(nullptr, std::memory_order_relaxed); } - void swap(FreeList& other) { details::swap_relaxed(freeListHead, other.freeListHead); } - - FreeList(FreeList const&) = delete; - FreeList& operator=(FreeList const&) = delete; - - inline void add(N* node) - { - // We know that the should-be-on-freelist bit is 0 at this point, so it's safe to - // set it using a fetch_add - if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, std::memory_order_acq_rel) == 0) { - // Oh look! We were the last ones referencing this node, and we know - // we want to add it to the free list, so let's do it! - add_knowing_refcount_is_zero(node); - } - } - - inline N* try_get() - { - auto head = freeListHead.load(std::memory_order_acquire); - while (head != nullptr) { - auto prevHead = head; - auto refs = head->freeListRefs.load(std::memory_order_relaxed); - if ((refs & REFS_MASK) == 0 || !head->freeListRefs.compare_exchange_strong(refs, refs + 1, std::memory_order_acquire, std::memory_order_relaxed)) { - head = freeListHead.load(std::memory_order_acquire); - continue; - } - - // Good, reference count has been incremented (it wasn't at zero), which means we can read the - // next and not worry about it changing between now and the time we do the CAS - auto next = head->freeListNext.load(std::memory_order_relaxed); - if (freeListHead.compare_exchange_strong(head, next, std::memory_order_acquire, std::memory_order_relaxed)) { - // Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no - // matter the refcount (because nobody else knows it's been taken off yet, it can't have been put back on). - assert((head->freeListRefs.load(std::memory_order_relaxed) & SHOULD_BE_ON_FREELIST) == 0); - - // Decrease refcount twice, once for our ref, and once for the list's ref - head->freeListRefs.fetch_sub(2, std::memory_order_release); - return head; - } - - // OK, the head must have changed on us, but we still need to decrease the refcount we increased. - // Note that we don't need to release any memory effects, but we do need to ensure that the reference - // count decrement happens-after the CAS on the head. - refs = prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel); - if (refs == SHOULD_BE_ON_FREELIST + 1) { - add_knowing_refcount_is_zero(prevHead); - } - } - - return nullptr; - } - - // Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes) - N* head_unsafe() const { return freeListHead.load(std::memory_order_relaxed); } - - private: - inline void add_knowing_refcount_is_zero(N* node) - { - // Since the refcount is zero, and nobody can increase it once it's zero (except us, and we run - // only one copy of this method per node at a time, i.e. the single thread case), then we know - // we can safely change the next pointer of the node; however, once the refcount is back above - // zero, then other threads could increase it (happens under heavy contention, when the refcount - // goes to zero in between a load and a refcount increment of a node in try_get, then back up to - // something non-zero, then the refcount increment is done by the other thread) -- so, if the CAS - // to add the node to the actual list fails, decrease the refcount and leave the add operation to - // the next thread who puts the refcount back at zero (which could be us, hence the loop). - auto head = freeListHead.load(std::memory_order_relaxed); - while (true) { - node->freeListNext.store(head, std::memory_order_relaxed); - node->freeListRefs.store(1, std::memory_order_release); - if (!freeListHead.compare_exchange_strong(head, node, std::memory_order_release, std::memory_order_relaxed)) { - // Hmm, the add failed, but we can only try again when the refcount goes back to zero - if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST - 1, std::memory_order_release) == 1) { - continue; - } - } - return; - } - } - - private: - // Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under contention) - std::atomic freeListHead; - - static const std::uint32_t REFS_MASK = 0x7FFFFFFF; - static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000; - }; - - - /////////////////////////// - // Block - /////////////////////////// - - struct Block - { - Block() - : next(nullptr), elementsCompletelyDequeued(0), freeListRefs(0), freeListNext(nullptr), shouldBeOnFreeList(false), dynamicallyAllocated(true) - { - } - - inline bool is_empty() const - { - if (compile_time_condition::value) { - // Check flags - for (size_t i = 0; i < BLOCK_SIZE; ++i) { - if (!emptyFlags[i].load(std::memory_order_relaxed)) { - return false; - } - } - - // Aha, empty; make sure we have all other memory effects that happened before the empty flags were set - std::atomic_thread_fence(std::memory_order_acquire); - return true; - } - else { - // Check counter - if (elementsCompletelyDequeued.load(std::memory_order_relaxed) == BLOCK_SIZE) { - std::atomic_thread_fence(std::memory_order_acquire); - return true; - } - assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <= BLOCK_SIZE); - return false; - } - } - - // Returns true if the block is now empty (does not apply in explicit context) - inline bool set_empty(index_t i) - { - if (BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { - // Set flag - assert(!emptyFlags[BLOCK_SIZE - 1 - static_cast(i & static_cast(BLOCK_SIZE - 1))].load(std::memory_order_relaxed)); - emptyFlags[BLOCK_SIZE - 1 - static_cast(i & static_cast(BLOCK_SIZE - 1))].store(true, std::memory_order_release); - return false; - } - else { - // Increment counter - auto prevVal = elementsCompletelyDequeued.fetch_add(1, std::memory_order_release); - assert(prevVal < BLOCK_SIZE); - return prevVal == BLOCK_SIZE - 1; - } - } - - // Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0). - // Returns true if the block is now empty (does not apply in explicit context). - inline bool set_many_empty(index_t i, size_t count) - { - if (compile_time_condition::value) { - // Set flags - std::atomic_thread_fence(std::memory_order_release); - i = BLOCK_SIZE - 1 - static_cast(i & static_cast(BLOCK_SIZE - 1)) - count + 1; - for (size_t j = 0; j != count; ++j) { - assert(!emptyFlags[i + j].load(std::memory_order_relaxed)); - emptyFlags[i + j].store(true, std::memory_order_relaxed); - } - return false; - } - else { - // Increment counter - auto prevVal = elementsCompletelyDequeued.fetch_add(count, std::memory_order_release); - assert(prevVal + count <= BLOCK_SIZE); - return prevVal + count == BLOCK_SIZE; - } - } - - inline void set_all_empty() - { - if (BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { - // Set all flags - for (size_t i = 0; i != BLOCK_SIZE; ++i) { - emptyFlags[i].store(true, std::memory_order_relaxed); - } - } - else { - // Reset counter - elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed); - } - } - - inline void reset_empty() - { - if (compile_time_condition::value) { - // Reset flags - for (size_t i = 0; i != BLOCK_SIZE; ++i) { - emptyFlags[i].store(false, std::memory_order_relaxed); - } - } - else { - // Reset counter - elementsCompletelyDequeued.store(0, std::memory_order_relaxed); - } - } - - inline T* operator[](index_t idx) noexcept { return static_cast(static_cast(elements)) + static_cast(idx & static_cast(BLOCK_SIZE - 1)); } - inline T const* operator[](index_t idx) const noexcept { return static_cast(static_cast(elements)) + static_cast(idx & static_cast(BLOCK_SIZE - 1)); } - - private: - // IMPORTANT: This must be the first member in Block, so that if T depends on the alignment of - // addresses returned by malloc, that alignment will be preserved. Apparently clang actually - // generates code that uses this assumption for AVX instructions in some cases. Ideally, we - // should also align Block to the alignment of T in case it's higher than malloc's 16-byte - // alignment, but this is hard to do in a cross-platform way. Assert for this case: - static_assert(std::alignment_of::value <= std::alignment_of::value, "The queue does not support super-aligned types at this time"); - // Additionally, we need the alignment of Block itself to be a multiple of max_align_t since - // otherwise the appropriate padding will not be added at the end of Block in order to make - // arrays of Blocks all be properly aligned (not just the first one). We use a union to force - // this. - union { - char elements[sizeof(T) * BLOCK_SIZE]; - details::max_align_t dummy; - }; - public: - Block* next; - std::atomic elementsCompletelyDequeued; - std::atomic emptyFlags[BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1]; - public: - std::atomic freeListRefs; - std::atomic freeListNext; - std::atomic shouldBeOnFreeList; - bool dynamicallyAllocated; // Perhaps a better name for this would be 'isNotPartOfInitialBlockPool' - }; - static_assert(std::alignment_of::value >= std::alignment_of::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping"); - - - /////////////////////////// - // Producer base - /////////////////////////// - - struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase - { - ProducerBase(ConcurrentQueue* parent_) : - tailIndex(0), - headIndex(0), - dequeueOptimisticCount(0), - dequeueOvercommit(0), - tailBlock(nullptr), - parent(parent_) - { - } - - virtual ~ProducerBase() { }; - - template - inline size_t dequeue_bulk(NotifyThread notifyThread, ProcessData processData) - { - return static_cast(this)->dequeue_bulk(notifyThread, processData); - } - - inline ProducerBase* next_prod() const { return static_cast(next); } - - inline size_t size_approx() const - { - auto tail = tailIndex.load(std::memory_order_relaxed); - auto head = headIndex.load(std::memory_order_relaxed); - return details::circular_less_than(head, tail) ? static_cast(tail - head) : 0; - } - - inline index_t getTail() const { return tailIndex.load(std::memory_order_relaxed); } - protected: - std::atomic tailIndex; // Where to enqueue to next - std::atomic headIndex; // Where to dequeue from next - - std::atomic dequeueOptimisticCount; - std::atomic dequeueOvercommit; - - Block* tailBlock; - - public: - ConcurrentQueue* parent; - }; - - - public: - /////////////////////////// - // Explicit queue - /////////////////////////// - struct ExplicitProducer : public ProducerBase - { - explicit ExplicitProducer(ConcurrentQueue* _parent) : - ProducerBase(_parent), - blockIndex(nullptr), - pr_blockIndexSlotsUsed(0), - pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1), - pr_blockIndexFront(0), - pr_blockIndexEntries(nullptr), - pr_blockIndexRaw(nullptr) - { - size_t poolBasedIndexSize = details::ceil_to_pow_2(_parent->initialBlockPoolSize) >> 1; - if (poolBasedIndexSize > pr_blockIndexSize) { - pr_blockIndexSize = poolBasedIndexSize; - } - - new_block_index(0); // This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE - } - - ~ExplicitProducer() - { - // Destruct any elements not yet dequeued. - // Since we're in the destructor, we can assume all elements - // are either completely dequeued or completely not (no halfways). - if (this->tailBlock != nullptr) { // Note this means there must be a block index too - // First find the block that's partially dequeued, if any - Block* halfDequeuedBlock = nullptr; - if ((this->headIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)) != 0) { - // The head's not on a block boundary, meaning a block somewhere is partially dequeued - // (or the head block is the tail block and was fully dequeued, but the head/tail are still not on a boundary) - size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & (pr_blockIndexSize - 1); - while (details::circular_less_than(pr_blockIndexEntries[i].base + BLOCK_SIZE, this->headIndex.load(std::memory_order_relaxed))) { - i = (i + 1) & (pr_blockIndexSize - 1); - } - assert(details::circular_less_than(pr_blockIndexEntries[i].base, this->headIndex.load(std::memory_order_relaxed))); - halfDequeuedBlock = pr_blockIndexEntries[i].block; - } - - // Start at the head block (note the first line in the loop gives us the head from the tail on the first iteration) - auto block = this->tailBlock; - do { - block = block->next; - if (block->ConcurrentQueue::Block::is_empty()) { - continue; - } - - size_t i = 0; // Offset into block - if (block == halfDequeuedBlock) { - i = static_cast(this->headIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)); - } - - // Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail index - auto lastValidIndex = (this->tailIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)) == 0 ? BLOCK_SIZE : static_cast(this->tailIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)); - while (i != BLOCK_SIZE && (block != this->tailBlock || i != lastValidIndex)) { - (*block)[i++]->~T(); - } - } while (block != this->tailBlock); - } - - // Destroy all blocks that we own - if (this->tailBlock != nullptr) { - auto block = this->tailBlock; - do { - auto nextBlock = block->next; - if (block->dynamicallyAllocated) { - destroy(block); - } - else { - this->parent->add_block_to_free_list(block); - } - block = nextBlock; - } while (block != this->tailBlock); - } - - // Destroy the block indices - auto header = static_cast(pr_blockIndexRaw); - while (header != nullptr) { - auto prev = static_cast(header->prev); - header->~BlockIndexHeader(); - (Traits::free)(header); - header = prev; - } - } - - inline void enqueue_begin_alloc(index_t currentTailIndex) - { - // We reached the end of a block, start a new one - if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::is_empty()) { - // We can re-use the block ahead of us, it's empty! - this->tailBlock = this->tailBlock->next; - this->tailBlock->ConcurrentQueue::Block::reset_empty(); - - // We'll put the block on the block index (guaranteed to be room since we're conceptually removing the - // last block from it first -- except instead of removing then adding, we can just overwrite). - // Note that there must be a valid block index here, since even if allocation failed in the ctor, - // it would have been re-attempted when adding the first block to the queue; since there is such - // a block, a block index must have been successfully allocated. - } - else { - // We're going to need a new block; check that the block index has room - if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize) { - // Hmm, the circular block index is already full -- we'll need - // to allocate a new index. Note pr_blockIndexRaw can only be nullptr if - // the initial allocation failed in the constructor. - new_block_index(pr_blockIndexSlotsUsed); - } - - // Insert a new block in the circular linked list - auto newBlock = this->parent->ConcurrentQueue::requisition_block(); - newBlock->ConcurrentQueue::Block::reset_empty(); - if (this->tailBlock == nullptr) { - newBlock->next = newBlock; - } - else { - newBlock->next = this->tailBlock->next; - this->tailBlock->next = newBlock; - } - this->tailBlock = newBlock; - ++pr_blockIndexSlotsUsed; - } - - // Add block to block index - auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; - entry.base = currentTailIndex; - entry.block = this->tailBlock; - blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release); - pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); - } - - tracy_force_inline T* enqueue_begin(index_t& currentTailIndex) - { - currentTailIndex = this->tailIndex.load(std::memory_order_relaxed); - if (details::cqUnlikely((currentTailIndex & static_cast(BLOCK_SIZE - 1)) == 0)) { - this->enqueue_begin_alloc(currentTailIndex); - } - return (*this->tailBlock)[currentTailIndex]; - } - - tracy_force_inline std::atomic& get_tail_index() - { - return this->tailIndex; - } - - template - size_t dequeue_bulk(NotifyThread notifyThread, ProcessData processData) - { - auto tail = this->tailIndex.load(std::memory_order_relaxed); - auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); - auto desiredCount = static_cast(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit)); - if (details::circular_less_than(0, desiredCount)) { - desiredCount = desiredCount < 8192 ? desiredCount : 8192; - std::atomic_thread_fence(std::memory_order_acquire); - - auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed); - assert(overcommit <= myDequeueCount); - - tail = this->tailIndex.load(std::memory_order_acquire); - auto actualCount = static_cast(tail - (myDequeueCount - overcommit)); - if (details::circular_less_than(0, actualCount)) { - actualCount = desiredCount < actualCount ? desiredCount : actualCount; - if (actualCount < desiredCount) { - this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release); - } - - // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this - // will never exceed tail. - auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel); - - // Determine which block the first element is in - auto localBlockIndex = blockIndex.load(std::memory_order_acquire); - auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire); - - auto headBase = localBlockIndex->entries[localBlockIndexHead].base; - auto firstBlockBaseIndex = firstIndex & ~static_cast(BLOCK_SIZE - 1); - auto offset = static_cast(static_cast::type>(firstBlockBaseIndex - headBase) / BLOCK_SIZE); - auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1); - - notifyThread( this->threadId ); - - // Iterate the blocks and dequeue - auto index = firstIndex; - do { - auto firstIndexInBlock = index; - auto endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); - endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; - auto block = localBlockIndex->entries[indexIndex].block; - - const auto sz = endIndex - index; - processData( (*block)[index], sz ); - index += sz; - - block->ConcurrentQueue::Block::set_many_empty(firstIndexInBlock, static_cast(endIndex - firstIndexInBlock)); - indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1); - } while (index != firstIndex + actualCount); - - return actualCount; - } - else { - // Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent - this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release); - } - } - - return 0; - } - - private: - struct BlockIndexEntry - { - index_t base; - Block* block; - }; - - struct BlockIndexHeader - { - size_t size; - std::atomic front; // Current slot (not next, like pr_blockIndexFront) - BlockIndexEntry* entries; - void* prev; - }; - - - bool new_block_index(size_t numberOfFilledSlotsToExpose) - { - auto prevBlockSizeMask = pr_blockIndexSize - 1; - - // Create the new block - pr_blockIndexSize <<= 1; - auto newRawPtr = static_cast((Traits::malloc)(sizeof(BlockIndexHeader) + std::alignment_of::value - 1 + sizeof(BlockIndexEntry) * pr_blockIndexSize)); - if (newRawPtr == nullptr) { - pr_blockIndexSize >>= 1; // Reset to allow graceful retry - return false; - } - - auto newBlockIndexEntries = reinterpret_cast(details::align_for(newRawPtr + sizeof(BlockIndexHeader))); - - // Copy in all the old indices, if any - size_t j = 0; - if (pr_blockIndexSlotsUsed != 0) { - auto i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & prevBlockSizeMask; - do { - newBlockIndexEntries[j++] = pr_blockIndexEntries[i]; - i = (i + 1) & prevBlockSizeMask; - } while (i != pr_blockIndexFront); - } - - // Update everything - auto header = new (newRawPtr) BlockIndexHeader; - header->size = pr_blockIndexSize; - header->front.store(numberOfFilledSlotsToExpose - 1, std::memory_order_relaxed); - header->entries = newBlockIndexEntries; - header->prev = pr_blockIndexRaw; // we link the new block to the old one so we can free it later - - pr_blockIndexFront = j; - pr_blockIndexEntries = newBlockIndexEntries; - pr_blockIndexRaw = newRawPtr; - blockIndex.store(header, std::memory_order_release); - - return true; - } - - private: - std::atomic blockIndex; - - // To be used by producer only -- consumer must use the ones in referenced by blockIndex - size_t pr_blockIndexSlotsUsed; - size_t pr_blockIndexSize; - size_t pr_blockIndexFront; // Next slot (not current) - BlockIndexEntry* pr_blockIndexEntries; - void* pr_blockIndexRaw; - }; - - ExplicitProducer* get_explicit_producer(producer_token_t const& token) - { - return static_cast(token.producer); - } - - private: - - ////////////////////////////////// - // Block pool manipulation - ////////////////////////////////// - - void populate_initial_block_list(size_t blockCount) - { - initialBlockPoolSize = blockCount; - if (initialBlockPoolSize == 0) { - initialBlockPool = nullptr; - return; - } - - initialBlockPool = create_array(blockCount); - if (initialBlockPool == nullptr) { - initialBlockPoolSize = 0; - } - for (size_t i = 0; i < initialBlockPoolSize; ++i) { - initialBlockPool[i].dynamicallyAllocated = false; - } - } - - inline Block* try_get_block_from_initial_pool() - { - if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= initialBlockPoolSize) { - return nullptr; - } - - auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed); - - return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr; - } - - inline void add_block_to_free_list(Block* block) - { - freeList.add(block); - } - - inline void add_blocks_to_free_list(Block* block) - { - while (block != nullptr) { - auto next = block->next; - add_block_to_free_list(block); - block = next; - } - } - - inline Block* try_get_block_from_free_list() - { - return freeList.try_get(); - } - - // Gets a free block from one of the memory pools, or allocates a new one (if applicable) - Block* requisition_block() - { - auto block = try_get_block_from_initial_pool(); - if (block != nullptr) { - return block; - } - - block = try_get_block_from_free_list(); - if (block != nullptr) { - return block; - } - - return create(); - } - - - ////////////////////////////////// - // Producer list manipulation - ////////////////////////////////// - - ProducerBase* recycle_or_create_producer() - { - bool recycled; - return recycle_or_create_producer(recycled); - } - - ProducerBase* recycle_or_create_producer(bool& recycled) - { - // Try to re-use one first - for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { - if (ptr->inactive.load(std::memory_order_relaxed)) { - if( ptr->size_approx() == 0 ) - { - bool expected = true; - if (ptr->inactive.compare_exchange_strong(expected, /* desired */ false, std::memory_order_acquire, std::memory_order_relaxed)) { - // We caught one! It's been marked as activated, the caller can have it - recycled = true; - return ptr; - } - } - } - } - - recycled = false; - return add_producer(static_cast(create(this))); - } - - ProducerBase* add_producer(ProducerBase* producer) - { - // Handle failed memory allocation - if (producer == nullptr) { - return nullptr; - } - - producerCount.fetch_add(1, std::memory_order_relaxed); - - // Add it to the lock-free list - auto prevTail = producerListTail.load(std::memory_order_relaxed); - do { - producer->next = prevTail; - } while (!producerListTail.compare_exchange_weak(prevTail, producer, std::memory_order_release, std::memory_order_relaxed)); - - return producer; - } - - void reown_producers() - { - // After another instance is moved-into/swapped-with this one, all the - // producers we stole still think their parents are the other queue. - // So fix them up! - for (auto ptr = producerListTail.load(std::memory_order_relaxed); ptr != nullptr; ptr = ptr->next_prod()) { - ptr->parent = this; - } - } - - ////////////////////////////////// - // Utility functions - ////////////////////////////////// - - template - static inline U* create_array(size_t count) - { - assert(count > 0); - return static_cast((Traits::malloc)(sizeof(U) * count)); - } - - template - static inline void destroy_array(U* p, size_t count) - { - ((void)count); - if (p != nullptr) { - assert(count > 0); - (Traits::free)(p); - } - } - - template - static inline U* create() - { - auto p = (Traits::malloc)(sizeof(U)); - return new (p) U; - } - - template - static inline U* create(A1&& a1) - { - auto p = (Traits::malloc)(sizeof(U)); - return new (p) U(std::forward(a1)); - } - - template - static inline void destroy(U* p) - { - if (p != nullptr) { - p->~U(); - } - (Traits::free)(p); - } - -private: - std::atomic producerListTail; - std::atomic producerCount; - - std::atomic initialBlockPoolIndex; - Block* initialBlockPool; - size_t initialBlockPoolSize; - - FreeList freeList; - - std::atomic nextExplicitConsumerId; - std::atomic globalExplicitConsumerOffset; -}; - - -template -ProducerToken::ProducerToken(ConcurrentQueue& queue) - : producer(queue.recycle_or_create_producer()) -{ - if (producer != nullptr) { - producer->token = this; - producer->threadId = detail::GetThreadHandleImpl(); - } -} - -template -ConsumerToken::ConsumerToken(ConcurrentQueue& queue) - : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr) -{ - initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release); - lastKnownGlobalOffset = static_cast(-1); -} - -template -inline void swap(ConcurrentQueue& a, ConcurrentQueue& b) noexcept -{ - a.swap(b); -} - -inline void swap(ProducerToken& a, ProducerToken& b) noexcept -{ - a.swap(b); -} - -inline void swap(ConsumerToken& a, ConsumerToken& b) noexcept -{ - a.swap(b); -} - -} - -} /* namespace tracy */ - -#if defined(__GNUC__) -#pragma GCC diagnostic pop -#endif diff --git a/src/third_party/tracy/client/tracy_rpmalloc.cpp b/src/third_party/tracy/client/tracy_rpmalloc.cpp deleted file mode 100644 index 315a40f9..00000000 --- a/src/third_party/tracy/client/tracy_rpmalloc.cpp +++ /dev/null @@ -1,3519 +0,0 @@ -#ifdef TRACY_ENABLE - -/* rpmalloc.c - Memory allocator - Public Domain - 2016-2020 Mattias Jansson - * - * This library provides a cross-platform lock free thread caching malloc implementation in C11. - * The latest source code is always available at - * - * https://github.com/mjansson/rpmalloc - * - * This library is put in the public domain; you can redistribute it and/or modify it without any restrictions. - * - */ - -#include "tracy_rpmalloc.hpp" - -#define BUILD_DYNAMIC_LINK 1 - -//////////// -/// -/// Build time configurable limits -/// -////// - -#if defined(__clang__) -#pragma clang diagnostic ignored "-Wunused-macros" -#pragma clang diagnostic ignored "-Wunused-function" -#if __has_warning("-Wreserved-identifier") -#pragma clang diagnostic ignored "-Wreserved-identifier" -#endif -#elif defined(__GNUC__) -#pragma GCC diagnostic ignored "-Wunused-macros" -#pragma GCC diagnostic ignored "-Wunused-function" -#pragma GCC diagnostic ignored "-Warray-bounds" -#endif - -#ifndef HEAP_ARRAY_SIZE -//! Size of heap hashmap -#define HEAP_ARRAY_SIZE 47 -#endif -#ifndef ENABLE_THREAD_CACHE -//! Enable per-thread cache -#define ENABLE_THREAD_CACHE 1 -#endif -#ifndef ENABLE_GLOBAL_CACHE -//! Enable global cache shared between all threads, requires thread cache -#define ENABLE_GLOBAL_CACHE 1 -#endif -#ifndef ENABLE_VALIDATE_ARGS -//! Enable validation of args to public entry points -#define ENABLE_VALIDATE_ARGS 0 -#endif -#ifndef ENABLE_STATISTICS -//! Enable statistics collection -#define ENABLE_STATISTICS 0 -#endif -#ifndef ENABLE_ASSERTS -//! Enable asserts -#define ENABLE_ASSERTS 0 -#endif -#ifndef ENABLE_OVERRIDE -//! Override standard library malloc/free and new/delete entry points -#define ENABLE_OVERRIDE 0 -#endif -#ifndef ENABLE_PRELOAD -//! Support preloading -#define ENABLE_PRELOAD 0 -#endif -#ifndef DISABLE_UNMAP -//! Disable unmapping memory pages (also enables unlimited cache) -#define DISABLE_UNMAP 0 -#endif -#ifndef ENABLE_UNLIMITED_CACHE -//! Enable unlimited global cache (no unmapping until finalization) -#define ENABLE_UNLIMITED_CACHE 0 -#endif -#ifndef ENABLE_ADAPTIVE_THREAD_CACHE -//! Enable adaptive thread cache size based on use heuristics -#define ENABLE_ADAPTIVE_THREAD_CACHE 0 -#endif -#ifndef DEFAULT_SPAN_MAP_COUNT -//! Default number of spans to map in call to map more virtual memory (default values yield 4MiB here) -#define DEFAULT_SPAN_MAP_COUNT 64 -#endif -#ifndef GLOBAL_CACHE_MULTIPLIER -//! Multiplier for global cache -#define GLOBAL_CACHE_MULTIPLIER 8 -#endif - -#if DISABLE_UNMAP && !ENABLE_GLOBAL_CACHE -#error Must use global cache if unmap is disabled -#endif - -#if DISABLE_UNMAP -#undef ENABLE_UNLIMITED_CACHE -#define ENABLE_UNLIMITED_CACHE 1 -#endif - -#if !ENABLE_GLOBAL_CACHE -#undef ENABLE_UNLIMITED_CACHE -#define ENABLE_UNLIMITED_CACHE 0 -#endif - -#if !ENABLE_THREAD_CACHE -#undef ENABLE_ADAPTIVE_THREAD_CACHE -#define ENABLE_ADAPTIVE_THREAD_CACHE 0 -#endif - -#if defined(_WIN32) || defined(__WIN32__) || defined(_WIN64) -# define PLATFORM_WINDOWS 1 -# define PLATFORM_POSIX 0 -#else -# define PLATFORM_WINDOWS 0 -# define PLATFORM_POSIX 1 -#endif - -/// Platform and arch specifics -#if defined(_MSC_VER) && !defined(__clang__) -# pragma warning (disable: 5105) -# ifndef FORCEINLINE -# define FORCEINLINE inline __forceinline -# endif -#else -# ifndef FORCEINLINE -# define FORCEINLINE inline __attribute__((__always_inline__)) -# endif -#endif -#if PLATFORM_WINDOWS -# ifndef WIN32_LEAN_AND_MEAN -# define WIN32_LEAN_AND_MEAN -# endif -# include -# if ENABLE_VALIDATE_ARGS -# include -# endif -#else -# include -# include -# include -# include -# if defined(__linux__) || defined(__ANDROID__) -# include -# if !defined(PR_SET_VMA) -# define PR_SET_VMA 0x53564d41 -# define PR_SET_VMA_ANON_NAME 0 -# endif -# endif -# if defined(__APPLE__) -# include -# if !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR -# include -# include -# endif -# include -# endif -# if defined(__HAIKU__) || defined(__TINYC__) -# include -# endif -#endif - -#include -#include -#include - -#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) -#include -static DWORD fls_key; -#endif - -#if PLATFORM_POSIX -# include -# include -# ifdef __FreeBSD__ -# include -# define MAP_HUGETLB MAP_ALIGNED_SUPER -# ifndef PROT_MAX -# define PROT_MAX(f) 0 -# endif -# else -# define PROT_MAX(f) 0 -# endif -# ifdef __sun -extern int madvise(caddr_t, size_t, int); -# endif -# ifndef MAP_UNINITIALIZED -# define MAP_UNINITIALIZED 0 -# endif -#endif -#include - -#if ENABLE_ASSERTS -# undef NDEBUG -# if defined(_MSC_VER) && !defined(_DEBUG) -# define _DEBUG -# endif -# include -#define RPMALLOC_TOSTRING_M(x) #x -#define RPMALLOC_TOSTRING(x) RPMALLOC_TOSTRING_M(x) -#define rpmalloc_assert(truth, message) \ - do { \ - if (!(truth)) { \ - if (_memory_config.error_callback) { \ - _memory_config.error_callback( \ - message " (" RPMALLOC_TOSTRING(truth) ") at " __FILE__ ":" RPMALLOC_TOSTRING(__LINE__)); \ - } else { \ - assert((truth) && message); \ - } \ - } \ - } while (0) -#else -# define rpmalloc_assert(truth, message) do {} while(0) -#endif -#if ENABLE_STATISTICS -# include -#endif - -////// -/// -/// Atomic access abstraction (since MSVC does not do C11 yet) -/// -////// - -#include - -typedef std::atomic atomic32_t; -typedef std::atomic atomic64_t; -typedef std::atomic atomicptr_t; - -static FORCEINLINE int32_t atomic_load32(atomic32_t* src) { return std::atomic_load_explicit(src, std::memory_order_relaxed); } -static FORCEINLINE void atomic_store32(atomic32_t* dst, int32_t val) { std::atomic_store_explicit(dst, val, std::memory_order_relaxed); } -static FORCEINLINE int32_t atomic_incr32(atomic32_t* val) { return std::atomic_fetch_add_explicit(val, 1, std::memory_order_relaxed) + 1; } -static FORCEINLINE int32_t atomic_decr32(atomic32_t* val) { return std::atomic_fetch_add_explicit(val, -1, std::memory_order_relaxed) - 1; } -static FORCEINLINE int32_t atomic_add32(atomic32_t* val, int32_t add) { return std::atomic_fetch_add_explicit(val, add, std::memory_order_relaxed) + add; } -static FORCEINLINE int atomic_cas32_acquire(atomic32_t* dst, int32_t val, int32_t ref) { return std::atomic_compare_exchange_weak_explicit(dst, &ref, val, std::memory_order_acquire, std::memory_order_relaxed); } -static FORCEINLINE void atomic_store32_release(atomic32_t* dst, int32_t val) { std::atomic_store_explicit(dst, val, std::memory_order_release); } -static FORCEINLINE int64_t atomic_load64(atomic64_t* val) { return std::atomic_load_explicit(val, std::memory_order_relaxed); } -static FORCEINLINE int64_t atomic_add64(atomic64_t* val, int64_t add) { return std::atomic_fetch_add_explicit(val, add, std::memory_order_relaxed) + add; } -static FORCEINLINE void* atomic_load_ptr(atomicptr_t* src) { return std::atomic_load_explicit(src, std::memory_order_relaxed); } -static FORCEINLINE void atomic_store_ptr(atomicptr_t* dst, void* val) { std::atomic_store_explicit(dst, val, std::memory_order_relaxed); } -static FORCEINLINE void atomic_store_ptr_release(atomicptr_t* dst, void* val) { std::atomic_store_explicit(dst, val, std::memory_order_release); } -static FORCEINLINE void* atomic_exchange_ptr_acquire(atomicptr_t* dst, void* val) { return std::atomic_exchange_explicit(dst, val, std::memory_order_acquire); } -static FORCEINLINE int atomic_cas_ptr(atomicptr_t* dst, void* val, void* ref) { return std::atomic_compare_exchange_weak_explicit(dst, &ref, val, std::memory_order_relaxed, std::memory_order_relaxed); } - -#if defined(_MSC_VER) && !defined(__clang__) - -#define EXPECTED(x) (x) -#define UNEXPECTED(x) (x) - -#else - -#define EXPECTED(x) __builtin_expect((x), 1) -#define UNEXPECTED(x) __builtin_expect((x), 0) - -#endif - -//////////// -/// -/// Statistics related functions (evaluate to nothing when statistics not enabled) -/// -////// - -#if ENABLE_STATISTICS -# define _rpmalloc_stat_inc(counter) atomic_incr32(counter) -# define _rpmalloc_stat_dec(counter) atomic_decr32(counter) -# define _rpmalloc_stat_add(counter, value) atomic_add32(counter, (int32_t)(value)) -# define _rpmalloc_stat_add64(counter, value) atomic_add64(counter, (int64_t)(value)) -# define _rpmalloc_stat_add_peak(counter, value, peak) do { int32_t _cur_count = atomic_add32(counter, (int32_t)(value)); if (_cur_count > (peak)) peak = _cur_count; } while (0) -# define _rpmalloc_stat_sub(counter, value) atomic_add32(counter, -(int32_t)(value)) -# define _rpmalloc_stat_inc_alloc(heap, class_idx) do { \ - int32_t alloc_current = atomic_incr32(&heap->size_class_use[class_idx].alloc_current); \ - if (alloc_current > heap->size_class_use[class_idx].alloc_peak) \ - heap->size_class_use[class_idx].alloc_peak = alloc_current; \ - atomic_incr32(&heap->size_class_use[class_idx].alloc_total); \ -} while(0) -# define _rpmalloc_stat_inc_free(heap, class_idx) do { \ - atomic_decr32(&heap->size_class_use[class_idx].alloc_current); \ - atomic_incr32(&heap->size_class_use[class_idx].free_total); \ -} while(0) -#else -# define _rpmalloc_stat_inc(counter) do {} while(0) -# define _rpmalloc_stat_dec(counter) do {} while(0) -# define _rpmalloc_stat_add(counter, value) do {} while(0) -# define _rpmalloc_stat_add64(counter, value) do {} while(0) -# define _rpmalloc_stat_add_peak(counter, value, peak) do {} while (0) -# define _rpmalloc_stat_sub(counter, value) do {} while(0) -# define _rpmalloc_stat_inc_alloc(heap, class_idx) do {} while(0) -# define _rpmalloc_stat_inc_free(heap, class_idx) do {} while(0) -#endif - - -/// -/// Preconfigured limits and sizes -/// - -//! Granularity of a small allocation block (must be power of two) -#define SMALL_GRANULARITY 16 -//! Small granularity shift count -#define SMALL_GRANULARITY_SHIFT 4 -//! Number of small block size classes -#define SMALL_CLASS_COUNT 65 -//! Maximum size of a small block -#define SMALL_SIZE_LIMIT (SMALL_GRANULARITY * (SMALL_CLASS_COUNT - 1)) -//! Granularity of a medium allocation block -#define MEDIUM_GRANULARITY 512 -//! Medium granularity shift count -#define MEDIUM_GRANULARITY_SHIFT 9 -//! Number of medium block size classes -#define MEDIUM_CLASS_COUNT 61 -//! Total number of small + medium size classes -#define SIZE_CLASS_COUNT (SMALL_CLASS_COUNT + MEDIUM_CLASS_COUNT) -//! Number of large block size classes -#define LARGE_CLASS_COUNT 63 -//! Maximum size of a medium block -#define MEDIUM_SIZE_LIMIT (SMALL_SIZE_LIMIT + (MEDIUM_GRANULARITY * MEDIUM_CLASS_COUNT)) -//! Maximum size of a large block -#define LARGE_SIZE_LIMIT ((LARGE_CLASS_COUNT * _memory_span_size) - SPAN_HEADER_SIZE) -//! Size of a span header (must be a multiple of SMALL_GRANULARITY and a power of two) -#define SPAN_HEADER_SIZE 128 -//! Number of spans in thread cache -#define MAX_THREAD_SPAN_CACHE 400 -//! Number of spans to transfer between thread and global cache -#define THREAD_SPAN_CACHE_TRANSFER 64 -//! Number of spans in thread cache for large spans (must be greater than LARGE_CLASS_COUNT / 2) -#define MAX_THREAD_SPAN_LARGE_CACHE 100 -//! Number of spans to transfer between thread and global cache for large spans -#define THREAD_SPAN_LARGE_CACHE_TRANSFER 6 - -static_assert((SMALL_GRANULARITY & (SMALL_GRANULARITY - 1)) == 0, "Small granularity must be power of two"); -static_assert((SPAN_HEADER_SIZE & (SPAN_HEADER_SIZE - 1)) == 0, "Span header size must be power of two"); - -#if ENABLE_VALIDATE_ARGS -//! Maximum allocation size to avoid integer overflow -#undef MAX_ALLOC_SIZE -#define MAX_ALLOC_SIZE (((size_t)-1) - _memory_span_size) -#endif - -#define pointer_offset(ptr, ofs) (void*)((char*)(ptr) + (ptrdiff_t)(ofs)) -#define pointer_diff(first, second) (ptrdiff_t)((const char*)(first) - (const char*)(second)) - -#define INVALID_POINTER ((void*)((uintptr_t)-1)) - -#define SIZE_CLASS_LARGE SIZE_CLASS_COUNT -#define SIZE_CLASS_HUGE ((uint32_t)-1) - -//////////// -/// -/// Data types -/// -////// - -namespace tracy -{ - -//! A memory heap, per thread -typedef struct heap_t heap_t; -//! Span of memory pages -typedef struct span_t span_t; -//! Span list -typedef struct span_list_t span_list_t; -//! Span active data -typedef struct span_active_t span_active_t; -//! Size class definition -typedef struct size_class_t size_class_t; -//! Global cache -typedef struct global_cache_t global_cache_t; - -//! Flag indicating span is the first (master) span of a split superspan -#define SPAN_FLAG_MASTER 1U -//! Flag indicating span is a secondary (sub) span of a split superspan -#define SPAN_FLAG_SUBSPAN 2U -//! Flag indicating span has blocks with increased alignment -#define SPAN_FLAG_ALIGNED_BLOCKS 4U -//! Flag indicating an unmapped master span -#define SPAN_FLAG_UNMAPPED_MASTER 8U - -#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS -struct span_use_t { - //! Current number of spans used (actually used, not in cache) - atomic32_t current; - //! High water mark of spans used - atomic32_t high; -#if ENABLE_STATISTICS - //! Number of spans in deferred list - atomic32_t spans_deferred; - //! Number of spans transitioned to global cache - atomic32_t spans_to_global; - //! Number of spans transitioned from global cache - atomic32_t spans_from_global; - //! Number of spans transitioned to thread cache - atomic32_t spans_to_cache; - //! Number of spans transitioned from thread cache - atomic32_t spans_from_cache; - //! Number of spans transitioned to reserved state - atomic32_t spans_to_reserved; - //! Number of spans transitioned from reserved state - atomic32_t spans_from_reserved; - //! Number of raw memory map calls - atomic32_t spans_map_calls; -#endif -}; -typedef struct span_use_t span_use_t; -#endif - -#if ENABLE_STATISTICS -struct size_class_use_t { - //! Current number of allocations - atomic32_t alloc_current; - //! Peak number of allocations - int32_t alloc_peak; - //! Total number of allocations - atomic32_t alloc_total; - //! Total number of frees - atomic32_t free_total; - //! Number of spans in use - atomic32_t spans_current; - //! Number of spans transitioned to cache - int32_t spans_peak; - //! Number of spans transitioned to cache - atomic32_t spans_to_cache; - //! Number of spans transitioned from cache - atomic32_t spans_from_cache; - //! Number of spans transitioned from reserved state - atomic32_t spans_from_reserved; - //! Number of spans mapped - atomic32_t spans_map_calls; - int32_t unused; -}; -typedef struct size_class_use_t size_class_use_t; -#endif - -// A span can either represent a single span of memory pages with size declared by span_map_count configuration variable, -// or a set of spans in a continuous region, a super span. Any reference to the term "span" usually refers to both a single -// span or a super span. A super span can further be divided into multiple spans (or this, super spans), where the first -// (super)span is the master and subsequent (super)spans are subspans. The master span keeps track of how many subspans -// that are still alive and mapped in virtual memory, and once all subspans and master have been unmapped the entire -// superspan region is released and unmapped (on Windows for example, the entire superspan range has to be released -// in the same call to release the virtual memory range, but individual subranges can be decommitted individually -// to reduce physical memory use). -struct span_t { - //! Free list - void* free_list; - //! Total block count of size class - uint32_t block_count; - //! Size class - uint32_t size_class; - //! Index of last block initialized in free list - uint32_t free_list_limit; - //! Number of used blocks remaining when in partial state - uint32_t used_count; - //! Deferred free list - atomicptr_t free_list_deferred; - //! Size of deferred free list, or list of spans when part of a cache list - uint32_t list_size; - //! Size of a block - uint32_t block_size; - //! Flags and counters - uint32_t flags; - //! Number of spans - uint32_t span_count; - //! Total span counter for master spans - uint32_t total_spans; - //! Offset from master span for subspans - uint32_t offset_from_master; - //! Remaining span counter, for master spans - atomic32_t remaining_spans; - //! Alignment offset - uint32_t align_offset; - //! Owning heap - heap_t* heap; - //! Next span - span_t* next; - //! Previous span - span_t* prev; -}; -static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "span size mismatch"); - -struct span_cache_t { - size_t count; - span_t* span[MAX_THREAD_SPAN_CACHE]; -}; -typedef struct span_cache_t span_cache_t; - -struct span_large_cache_t { - size_t count; - span_t* span[MAX_THREAD_SPAN_LARGE_CACHE]; -}; -typedef struct span_large_cache_t span_large_cache_t; - -struct heap_size_class_t { - //! Free list of active span - void* free_list; - //! Double linked list of partially used spans with free blocks. - // Previous span pointer in head points to tail span of list. - span_t* partial_span; - //! Early level cache of fully free spans - span_t* cache; -}; -typedef struct heap_size_class_t heap_size_class_t; - -// Control structure for a heap, either a thread heap or a first class heap if enabled -struct heap_t { - //! Owning thread ID - uintptr_t owner_thread; - //! Free lists for each size class - heap_size_class_t size_class[SIZE_CLASS_COUNT]; -#if ENABLE_THREAD_CACHE - //! Arrays of fully freed spans, single span - span_cache_t span_cache; -#endif - //! List of deferred free spans (single linked list) - atomicptr_t span_free_deferred; - //! Number of full spans - size_t full_span_count; - //! Mapped but unused spans - span_t* span_reserve; - //! Master span for mapped but unused spans - span_t* span_reserve_master; - //! Number of mapped but unused spans - uint32_t spans_reserved; - //! Child count - atomic32_t child_count; - //! Next heap in id list - heap_t* next_heap; - //! Next heap in orphan list - heap_t* next_orphan; - //! Heap ID - int32_t id; - //! Finalization state flag - int finalize; - //! Master heap owning the memory pages - heap_t* master_heap; -#if ENABLE_THREAD_CACHE - //! Arrays of fully freed spans, large spans with > 1 span count - span_large_cache_t span_large_cache[LARGE_CLASS_COUNT - 1]; -#endif -#if RPMALLOC_FIRST_CLASS_HEAPS - //! Double linked list of fully utilized spans with free blocks for each size class. - // Previous span pointer in head points to tail span of list. - span_t* full_span[SIZE_CLASS_COUNT]; - //! Double linked list of large and huge spans allocated by this heap - span_t* large_huge_span; -#endif -#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS - //! Current and high water mark of spans used per span count - span_use_t span_use[LARGE_CLASS_COUNT]; -#endif -#if ENABLE_STATISTICS - //! Allocation stats per size class - size_class_use_t size_class_use[SIZE_CLASS_COUNT + 1]; - //! Number of bytes transitioned thread -> global - atomic64_t thread_to_global; - //! Number of bytes transitioned global -> thread - atomic64_t global_to_thread; -#endif -}; - -// Size class for defining a block size bucket -struct size_class_t { - //! Size of blocks in this class - uint32_t block_size; - //! Number of blocks in each chunk - uint16_t block_count; - //! Class index this class is merged with - uint16_t class_idx; -}; -static_assert(sizeof(size_class_t) == 8, "Size class size mismatch"); - -struct global_cache_t { - //! Cache lock - atomic32_t lock; - //! Cache count - uint32_t count; -#if ENABLE_STATISTICS - //! Insert count - size_t insert_count; - //! Extract count - size_t extract_count; -#endif - //! Cached spans - span_t* span[GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE]; - //! Unlimited cache overflow - span_t* overflow; -}; - -//////////// -/// -/// Global data -/// -////// - -//! Default span size (64KiB) -#define _memory_default_span_size (64 * 1024) -#define _memory_default_span_size_shift 16 -#define _memory_default_span_mask (~((uintptr_t)(_memory_span_size - 1))) - -//! Initialized flag -static int _rpmalloc_initialized; -//! Main thread ID -static uintptr_t _rpmalloc_main_thread_id; -//! Configuration -static rpmalloc_config_t _memory_config; -//! Memory page size -static size_t _memory_page_size; -//! Shift to divide by page size -static size_t _memory_page_size_shift; -//! Granularity at which memory pages are mapped by OS -static size_t _memory_map_granularity; -#if RPMALLOC_CONFIGURABLE -//! Size of a span of memory pages -static size_t _memory_span_size; -//! Shift to divide by span size -static size_t _memory_span_size_shift; -//! Mask to get to start of a memory span -static uintptr_t _memory_span_mask; -#else -//! Hardwired span size -#define _memory_span_size _memory_default_span_size -#define _memory_span_size_shift _memory_default_span_size_shift -#define _memory_span_mask _memory_default_span_mask -#endif -//! Number of spans to map in each map call -static size_t _memory_span_map_count; -//! Number of spans to keep reserved in each heap -static size_t _memory_heap_reserve_count; -//! Global size classes -static size_class_t _memory_size_class[SIZE_CLASS_COUNT]; -//! Run-time size limit of medium blocks -static size_t _memory_medium_size_limit; -//! Heap ID counter -static atomic32_t _memory_heap_id; -//! Huge page support -static int _memory_huge_pages; -#if ENABLE_GLOBAL_CACHE -//! Global span cache -static global_cache_t _memory_span_cache[LARGE_CLASS_COUNT]; -#endif -//! Global reserved spans -static span_t* _memory_global_reserve; -//! Global reserved count -static size_t _memory_global_reserve_count; -//! Global reserved master -static span_t* _memory_global_reserve_master; -//! All heaps -static heap_t* _memory_heaps[HEAP_ARRAY_SIZE]; -//! Used to restrict access to mapping memory for huge pages -static atomic32_t _memory_global_lock; -//! Orphaned heaps -static heap_t* _memory_orphan_heaps; -#if RPMALLOC_FIRST_CLASS_HEAPS -//! Orphaned heaps (first class heaps) -static heap_t* _memory_first_class_orphan_heaps; -#endif -#if ENABLE_STATISTICS -//! Allocations counter -static atomic64_t _allocation_counter; -//! Deallocations counter -static atomic64_t _deallocation_counter; -//! Active heap count -static atomic32_t _memory_active_heaps; -//! Number of currently mapped memory pages -static atomic32_t _mapped_pages; -//! Peak number of concurrently mapped memory pages -static int32_t _mapped_pages_peak; -//! Number of mapped master spans -static atomic32_t _master_spans; -//! Number of unmapped dangling master spans -static atomic32_t _unmapped_master_spans; -//! Running counter of total number of mapped memory pages since start -static atomic32_t _mapped_total; -//! Running counter of total number of unmapped memory pages since start -static atomic32_t _unmapped_total; -//! Number of currently mapped memory pages in OS calls -static atomic32_t _mapped_pages_os; -//! Number of currently allocated pages in huge allocations -static atomic32_t _huge_pages_current; -//! Peak number of currently allocated pages in huge allocations -static int32_t _huge_pages_peak; -#endif - -//////////// -/// -/// Thread local heap and ID -/// -////// - -//! Current thread heap -#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) || defined(__TINYC__) -static pthread_key_t _memory_thread_heap; -#else -# ifdef _MSC_VER -# define _Thread_local __declspec(thread) -# define TLS_MODEL -# else -# if defined(__ANDROID__) && __ANDROID_API__ >= 29 && defined(__NDK_MAJOR__) && __NDK_MAJOR__ >= 26 -# define TLS_MODEL __attribute__((tls_model("local-dynamic"))) -# elif !defined(__HAIKU__) -# define TLS_MODEL __attribute__((tls_model("initial-exec"))) -# else -# define TLS_MODEL -# endif -# if !defined(__clang__) && defined(__GNUC__) -# define _Thread_local __thread -# endif -# endif -static _Thread_local heap_t* _memory_thread_heap TLS_MODEL; -#endif - -static inline heap_t* -get_thread_heap_raw(void) { -#if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD - return pthread_getspecific(_memory_thread_heap); -#else - return _memory_thread_heap; -#endif -} - -//! Get the current thread heap -static inline heap_t* -get_thread_heap(void) { - heap_t* heap = get_thread_heap_raw(); -#if ENABLE_PRELOAD - if (EXPECTED(heap != 0)) - return heap; - rpmalloc_initialize(); - return get_thread_heap_raw(); -#else - return heap; -#endif -} - -//! Fast thread ID -static inline uintptr_t -get_thread_id(void) { -#if defined(_WIN32) - return (uintptr_t)((void*)NtCurrentTeb()); -#elif (defined(__GNUC__) || defined(__clang__)) && !defined(__CYGWIN__) - uintptr_t tid; -# if defined(__i386__) - __asm__("movl %%gs:0, %0" : "=r" (tid) : : ); -# elif defined(__x86_64__) -# if defined(__MACH__) - __asm__("movq %%gs:0, %0" : "=r" (tid) : : ); -# else - __asm__("movq %%fs:0, %0" : "=r" (tid) : : ); -# endif -# elif defined(__arm__) - __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3" : "=r" (tid)); -# elif defined(__aarch64__) -# if defined(__MACH__) - // tpidr_el0 likely unused, always return 0 on iOS - __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tid)); -# else - __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tid)); -# endif -# else - tid = (uintptr_t)((void*)get_thread_heap_raw()); -# endif - return tid; -#else - return (uintptr_t)((void*)get_thread_heap_raw()); -#endif -} - -//! Set the current thread heap -static void -set_thread_heap(heap_t* heap) { -#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) || defined(__TINYC__) - pthread_setspecific(_memory_thread_heap, heap); -#else - _memory_thread_heap = heap; -#endif - if (heap) - heap->owner_thread = get_thread_id(); -} - -//! Set main thread ID -extern void -rpmalloc_set_main_thread(void); - -void -rpmalloc_set_main_thread(void) { - _rpmalloc_main_thread_id = get_thread_id(); -} - -static void -_rpmalloc_spin(void) { -#if defined(_MSC_VER) && !(defined(_M_ARM) || defined(_M_ARM64)) - _mm_pause(); -#elif defined(__x86_64__) || defined(__i386__) - __asm__ volatile("pause" ::: "memory"); -#elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH >= 7) - __asm__ volatile("yield" ::: "memory"); -#elif defined(__powerpc__) || defined(__powerpc64__) - // No idea if ever been compiled in such archs but ... as precaution - __asm__ volatile("or 27,27,27"); -#elif defined(__sparc__) - __asm__ volatile("rd %ccr, %g0 \n\trd %ccr, %g0 \n\trd %ccr, %g0"); -#else - std::this_thread::yield(); -#endif -} - -#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) -static void NTAPI -_rpmalloc_thread_destructor(void* value) { -#if ENABLE_OVERRIDE - // If this is called on main thread it means rpmalloc_finalize - // has not been called and shutdown is forced (through _exit) or unclean - if (get_thread_id() == _rpmalloc_main_thread_id) - return; -#endif - if (value) - rpmalloc_thread_finalize(1); -} -#endif - - -//////////// -/// -/// Low level memory map/unmap -/// -////// - -static void -_rpmalloc_set_name(void* address, size_t size) { -#if defined(__linux__) || defined(__ANDROID__) - const char *name = _memory_huge_pages ? _memory_config.huge_page_name : _memory_config.page_name; - if (address == MAP_FAILED || !name) - return; - // If the kernel does not support CONFIG_ANON_VMA_NAME or if the call fails - // (e.g. invalid name) it is a no-op basically. - (void)prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)address, size, (uintptr_t)name); -#else - (void)sizeof(size); - (void)sizeof(address); -#endif -} - - -//! Map more virtual memory -// size is number of bytes to map -// offset receives the offset in bytes from start of mapped region -// returns address to start of mapped region to use -static void* -_rpmalloc_mmap(size_t size, size_t* offset) { - rpmalloc_assert(!(size % _memory_page_size), "Invalid mmap size"); - rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size"); - void* address = _memory_config.memory_map(size, offset); - if (EXPECTED(address != 0)) { - _rpmalloc_stat_add_peak(&_mapped_pages, (size >> _memory_page_size_shift), _mapped_pages_peak); - _rpmalloc_stat_add(&_mapped_total, (size >> _memory_page_size_shift)); - } - return address; -} - -//! Unmap virtual memory -// address is the memory address to unmap, as returned from _memory_map -// size is the number of bytes to unmap, which might be less than full region for a partial unmap -// offset is the offset in bytes to the actual mapped region, as set by _memory_map -// release is set to 0 for partial unmap, or size of entire range for a full unmap -static void -_rpmalloc_unmap(void* address, size_t size, size_t offset, size_t release) { - rpmalloc_assert(!release || (release >= size), "Invalid unmap size"); - rpmalloc_assert(!release || (release >= _memory_page_size), "Invalid unmap size"); - if (release) { - rpmalloc_assert(!(release % _memory_page_size), "Invalid unmap size"); - _rpmalloc_stat_sub(&_mapped_pages, (release >> _memory_page_size_shift)); - _rpmalloc_stat_add(&_unmapped_total, (release >> _memory_page_size_shift)); - } - _memory_config.memory_unmap(address, size, offset, release); -} - -//! Default implementation to map new pages to virtual memory -static void* -_rpmalloc_mmap_os(size_t size, size_t* offset) { - //Either size is a heap (a single page) or a (multiple) span - we only need to align spans, and only if larger than map granularity - size_t padding = ((size >= _memory_span_size) && (_memory_span_size > _memory_map_granularity)) ? _memory_span_size : 0; - rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size"); -#if PLATFORM_WINDOWS - //Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not allocated unless/until the virtual addresses are actually accessed" - void* ptr = VirtualAlloc(0, size + padding, (_memory_huge_pages ? MEM_LARGE_PAGES : 0) | MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); - if (!ptr) { - if (_memory_config.map_fail_callback) { - if (_memory_config.map_fail_callback(size + padding)) - return _rpmalloc_mmap_os(size, offset); - } else { - rpmalloc_assert(ptr, "Failed to map virtual memory block"); - } - return 0; - } -#else - int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED; -# if defined(__APPLE__) && !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR - int fd = (int)VM_MAKE_TAG(240U); - if (_memory_huge_pages) - fd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; - void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, fd, 0); -# elif defined(MAP_HUGETLB) - void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE | PROT_MAX(PROT_READ | PROT_WRITE), (_memory_huge_pages ? MAP_HUGETLB : 0) | flags, -1, 0); -# if defined(MADV_HUGEPAGE) - // In some configurations, huge pages allocations might fail thus - // we fallback to normal allocations and promote the region as transparent huge page - if ((ptr == MAP_FAILED || !ptr) && _memory_huge_pages) { - ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0); - if (ptr && ptr != MAP_FAILED) { - int prm = madvise(ptr, size + padding, MADV_HUGEPAGE); - (void)prm; - rpmalloc_assert((prm == 0), "Failed to promote the page to THP"); - } - } -# endif - _rpmalloc_set_name(ptr, size + padding); -# elif defined(MAP_ALIGNED) - const size_t align = (sizeof(size_t) * 8) - (size_t)(__builtin_clzl(size - 1)); - void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, (_memory_huge_pages ? MAP_ALIGNED(align) : 0) | flags, -1, 0); -# elif defined(MAP_ALIGN) - caddr_t base = (_memory_huge_pages ? (caddr_t)(4 << 20) : 0); - void* ptr = mmap(base, size + padding, PROT_READ | PROT_WRITE, (_memory_huge_pages ? MAP_ALIGN : 0) | flags, -1, 0); -# else - void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0); -# endif - if ((ptr == MAP_FAILED) || !ptr) { - if (_memory_config.map_fail_callback) { - if (_memory_config.map_fail_callback(size + padding)) - return _rpmalloc_mmap_os(size, offset); - } else if (errno != ENOMEM) { - rpmalloc_assert((ptr != MAP_FAILED) && ptr, "Failed to map virtual memory block"); - } - return 0; - } -#endif - _rpmalloc_stat_add(&_mapped_pages_os, (int32_t)((size + padding) >> _memory_page_size_shift)); - if (padding) { - size_t final_padding = padding - ((uintptr_t)ptr & ~_memory_span_mask); - rpmalloc_assert(final_padding <= _memory_span_size, "Internal failure in padding"); - rpmalloc_assert(final_padding <= padding, "Internal failure in padding"); - rpmalloc_assert(!(final_padding % 8), "Internal failure in padding"); - ptr = pointer_offset(ptr, final_padding); - *offset = final_padding >> 3; - } - rpmalloc_assert((size < _memory_span_size) || !((uintptr_t)ptr & ~_memory_span_mask), "Internal failure in padding"); - return ptr; -} - -//! Default implementation to unmap pages from virtual memory -static void -_rpmalloc_unmap_os(void* address, size_t size, size_t offset, size_t release) { - rpmalloc_assert(release || (offset == 0), "Invalid unmap size"); - rpmalloc_assert(!release || (release >= _memory_page_size), "Invalid unmap size"); - rpmalloc_assert(size >= _memory_page_size, "Invalid unmap size"); - if (release && offset) { - offset <<= 3; - address = pointer_offset(address, -(int32_t)offset); - if ((release >= _memory_span_size) && (_memory_span_size > _memory_map_granularity)) { - //Padding is always one span size - release += _memory_span_size; - } - } -#if !DISABLE_UNMAP -#if PLATFORM_WINDOWS - if (!VirtualFree(address, release ? 0 : size, release ? MEM_RELEASE : MEM_DECOMMIT)) { - rpmalloc_assert(0, "Failed to unmap virtual memory block"); - } -#else - if (release) { - if (munmap(address, release)) { - rpmalloc_assert(0, "Failed to unmap virtual memory block"); - } - } else { -#if defined(MADV_FREE_REUSABLE) - int ret; - while ((ret = madvise(address, size, MADV_FREE_REUSABLE)) == -1 && (errno == EAGAIN)) - errno = 0; - if ((ret == -1) && (errno != 0)) { -#elif defined(MADV_DONTNEED) - if (madvise(address, size, MADV_DONTNEED)) { -#elif defined(MADV_PAGEOUT) - if (madvise(address, size, MADV_PAGEOUT)) { -#elif defined(MADV_FREE) - if (madvise(address, size, MADV_FREE)) { -#else - if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) { -#endif - rpmalloc_assert(0, "Failed to madvise virtual memory block as free"); - } - } -#endif -#endif - if (release) - _rpmalloc_stat_sub(&_mapped_pages_os, release >> _memory_page_size_shift); -} - -static void -_rpmalloc_span_mark_as_subspan_unless_master(span_t* master, span_t* subspan, size_t span_count); - -//! Use global reserved spans to fulfill a memory map request (reserve size must be checked by caller) -static span_t* -_rpmalloc_global_get_reserved_spans(size_t span_count) { - span_t* span = _memory_global_reserve; - _rpmalloc_span_mark_as_subspan_unless_master(_memory_global_reserve_master, span, span_count); - _memory_global_reserve_count -= span_count; - if (_memory_global_reserve_count) - _memory_global_reserve = (span_t*)pointer_offset(span, span_count << _memory_span_size_shift); - else - _memory_global_reserve = 0; - return span; -} - -//! Store the given spans as global reserve (must only be called from within new heap allocation, not thread safe) -static void -_rpmalloc_global_set_reserved_spans(span_t* master, span_t* reserve, size_t reserve_span_count) { - _memory_global_reserve_master = master; - _memory_global_reserve_count = reserve_span_count; - _memory_global_reserve = reserve; -} - - -//////////// -/// -/// Span linked list management -/// -////// - -//! Add a span to double linked list at the head -static void -_rpmalloc_span_double_link_list_add(span_t** head, span_t* span) { - if (*head) - (*head)->prev = span; - span->next = *head; - *head = span; -} - -//! Pop head span from double linked list -static void -_rpmalloc_span_double_link_list_pop_head(span_t** head, span_t* span) { - rpmalloc_assert(*head == span, "Linked list corrupted"); - span = *head; - *head = span->next; -} - -//! Remove a span from double linked list -static void -_rpmalloc_span_double_link_list_remove(span_t** head, span_t* span) { - rpmalloc_assert(*head, "Linked list corrupted"); - if (*head == span) { - *head = span->next; - } else { - span_t* next_span = span->next; - span_t* prev_span = span->prev; - prev_span->next = next_span; - if (EXPECTED(next_span != 0)) - next_span->prev = prev_span; - } -} - - -//////////// -/// -/// Span control -/// -////// - -static void -_rpmalloc_heap_cache_insert(heap_t* heap, span_t* span); - -static void -_rpmalloc_heap_finalize(heap_t* heap); - -static void -_rpmalloc_heap_set_reserved_spans(heap_t* heap, span_t* master, span_t* reserve, size_t reserve_span_count); - -//! Declare the span to be a subspan and store distance from master span and span count -static void -_rpmalloc_span_mark_as_subspan_unless_master(span_t* master, span_t* subspan, size_t span_count) { - rpmalloc_assert((subspan != master) || (subspan->flags & SPAN_FLAG_MASTER), "Span master pointer and/or flag mismatch"); - if (subspan != master) { - subspan->flags = SPAN_FLAG_SUBSPAN; - subspan->offset_from_master = (uint32_t)((uintptr_t)pointer_diff(subspan, master) >> _memory_span_size_shift); - subspan->align_offset = 0; - } - subspan->span_count = (uint32_t)span_count; -} - -//! Use reserved spans to fulfill a memory map request (reserve size must be checked by caller) -static span_t* -_rpmalloc_span_map_from_reserve(heap_t* heap, size_t span_count) { - //Update the heap span reserve - span_t* span = heap->span_reserve; - heap->span_reserve = (span_t*)pointer_offset(span, span_count * _memory_span_size); - heap->spans_reserved -= (uint32_t)span_count; - - _rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, span, span_count); - if (span_count <= LARGE_CLASS_COUNT) - _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_reserved); - - return span; -} - -//! Get the aligned number of spans to map in based on wanted count, configured mapping granularity and the page size -static size_t -_rpmalloc_span_align_count(size_t span_count) { - size_t request_count = (span_count > _memory_span_map_count) ? span_count : _memory_span_map_count; - if ((_memory_page_size > _memory_span_size) && ((request_count * _memory_span_size) % _memory_page_size)) - request_count += _memory_span_map_count - (request_count % _memory_span_map_count); - return request_count; -} - -//! Setup a newly mapped span -static void -_rpmalloc_span_initialize(span_t* span, size_t total_span_count, size_t span_count, size_t align_offset) { - span->total_spans = (uint32_t)total_span_count; - span->span_count = (uint32_t)span_count; - span->align_offset = (uint32_t)align_offset; - span->flags = SPAN_FLAG_MASTER; - atomic_store32(&span->remaining_spans, (int32_t)total_span_count); -} - -static void -_rpmalloc_span_unmap(span_t* span); - -//! Map an aligned set of spans, taking configured mapping granularity and the page size into account -static span_t* -_rpmalloc_span_map_aligned_count(heap_t* heap, size_t span_count) { - //If we already have some, but not enough, reserved spans, release those to heap cache and map a new - //full set of spans. Otherwise we would waste memory if page size > span size (huge pages) - size_t aligned_span_count = _rpmalloc_span_align_count(span_count); - size_t align_offset = 0; - span_t* span = (span_t*)_rpmalloc_mmap(aligned_span_count * _memory_span_size, &align_offset); - if (!span) - return 0; - _rpmalloc_span_initialize(span, aligned_span_count, span_count, align_offset); - _rpmalloc_stat_inc(&_master_spans); - if (span_count <= LARGE_CLASS_COUNT) - _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_map_calls); - if (aligned_span_count > span_count) { - span_t* reserved_spans = (span_t*)pointer_offset(span, span_count * _memory_span_size); - size_t reserved_count = aligned_span_count - span_count; - if (heap->spans_reserved) { - _rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, heap->span_reserve, heap->spans_reserved); - _rpmalloc_heap_cache_insert(heap, heap->span_reserve); - } - if (reserved_count > _memory_heap_reserve_count) { - // If huge pages or eager spam map count, the global reserve spin lock is held by caller, _rpmalloc_span_map - rpmalloc_assert(atomic_load32(&_memory_global_lock) == 1, "Global spin lock not held as expected"); - size_t remain_count = reserved_count - _memory_heap_reserve_count; - reserved_count = _memory_heap_reserve_count; - span_t* remain_span = (span_t*)pointer_offset(reserved_spans, reserved_count * _memory_span_size); - if (_memory_global_reserve) { - _rpmalloc_span_mark_as_subspan_unless_master(_memory_global_reserve_master, _memory_global_reserve, _memory_global_reserve_count); - _rpmalloc_span_unmap(_memory_global_reserve); - } - _rpmalloc_global_set_reserved_spans(span, remain_span, remain_count); - } - _rpmalloc_heap_set_reserved_spans(heap, span, reserved_spans, reserved_count); - } - return span; -} - -//! Map in memory pages for the given number of spans (or use previously reserved pages) -static span_t* -_rpmalloc_span_map(heap_t* heap, size_t span_count) { - if (span_count <= heap->spans_reserved) - return _rpmalloc_span_map_from_reserve(heap, span_count); - span_t* span = 0; - int use_global_reserve = (_memory_page_size > _memory_span_size) || (_memory_span_map_count > _memory_heap_reserve_count); - if (use_global_reserve) { - // If huge pages, make sure only one thread maps more memory to avoid bloat - while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0)) - _rpmalloc_spin(); - if (_memory_global_reserve_count >= span_count) { - size_t reserve_count = (!heap->spans_reserved ? _memory_heap_reserve_count : span_count); - if (_memory_global_reserve_count < reserve_count) - reserve_count = _memory_global_reserve_count; - span = _rpmalloc_global_get_reserved_spans(reserve_count); - if (span) { - if (reserve_count > span_count) { - span_t* reserved_span = (span_t*)pointer_offset(span, span_count << _memory_span_size_shift); - _rpmalloc_heap_set_reserved_spans(heap, _memory_global_reserve_master, reserved_span, reserve_count - span_count); - } - // Already marked as subspan in _rpmalloc_global_get_reserved_spans - span->span_count = (uint32_t)span_count; - } - } - } - if (!span) - span = _rpmalloc_span_map_aligned_count(heap, span_count); - if (use_global_reserve) - atomic_store32_release(&_memory_global_lock, 0); - return span; -} - -//! Unmap memory pages for the given number of spans (or mark as unused if no partial unmappings) -static void -_rpmalloc_span_unmap(span_t* span) { - rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) || (span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); - rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) || !(span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); - - int is_master = !!(span->flags & SPAN_FLAG_MASTER); - span_t* master = is_master ? span : ((span_t*)pointer_offset(span, -(intptr_t)((uintptr_t)span->offset_from_master * _memory_span_size))); - rpmalloc_assert(is_master || (span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); - rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted"); - - size_t span_count = span->span_count; - if (!is_master) { - //Directly unmap subspans (unless huge pages, in which case we defer and unmap entire page range with master) - rpmalloc_assert(span->align_offset == 0, "Span align offset corrupted"); - if (_memory_span_size >= _memory_page_size) - _rpmalloc_unmap(span, span_count * _memory_span_size, 0, 0); - } else { - //Special double flag to denote an unmapped master - //It must be kept in memory since span header must be used - span->flags |= SPAN_FLAG_MASTER | SPAN_FLAG_SUBSPAN | SPAN_FLAG_UNMAPPED_MASTER; - _rpmalloc_stat_add(&_unmapped_master_spans, 1); - } - - if (atomic_add32(&master->remaining_spans, -(int32_t)span_count) <= 0) { - //Everything unmapped, unmap the master span with release flag to unmap the entire range of the super span - rpmalloc_assert(!!(master->flags & SPAN_FLAG_MASTER) && !!(master->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); - size_t unmap_count = master->span_count; - if (_memory_span_size < _memory_page_size) - unmap_count = master->total_spans; - _rpmalloc_stat_sub(&_master_spans, 1); - _rpmalloc_stat_sub(&_unmapped_master_spans, 1); - _rpmalloc_unmap(master, unmap_count * _memory_span_size, master->align_offset, (size_t)master->total_spans * _memory_span_size); - } -} - -//! Move the span (used for small or medium allocations) to the heap thread cache -static void -_rpmalloc_span_release_to_cache(heap_t* heap, span_t* span) { - rpmalloc_assert(heap == span->heap, "Span heap pointer corrupted"); - rpmalloc_assert(span->size_class < SIZE_CLASS_COUNT, "Invalid span size class"); - rpmalloc_assert(span->span_count == 1, "Invalid span count"); -#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS - atomic_decr32(&heap->span_use[0].current); -#endif - _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current); - if (!heap->finalize) { - _rpmalloc_stat_inc(&heap->span_use[0].spans_to_cache); - _rpmalloc_stat_inc(&heap->size_class_use[span->size_class].spans_to_cache); - if (heap->size_class[span->size_class].cache) - _rpmalloc_heap_cache_insert(heap, heap->size_class[span->size_class].cache); - heap->size_class[span->size_class].cache = span; - } else { - _rpmalloc_span_unmap(span); - } -} - -//! Initialize a (partial) free list up to next system memory page, while reserving the first block -//! as allocated, returning number of blocks in list -static uint32_t -free_list_partial_init(void** list, void** first_block, void* page_start, void* block_start, uint32_t block_count, uint32_t block_size) { - rpmalloc_assert(block_count, "Internal failure"); - *first_block = block_start; - if (block_count > 1) { - void* free_block = pointer_offset(block_start, block_size); - void* block_end = pointer_offset(block_start, (size_t)block_size * block_count); - //If block size is less than half a memory page, bound init to next memory page boundary - if (block_size < (_memory_page_size >> 1)) { - void* page_end = pointer_offset(page_start, _memory_page_size); - if (page_end < block_end) - block_end = page_end; - } - *list = free_block; - block_count = 2; - void* next_block = pointer_offset(free_block, block_size); - while (next_block < block_end) { - *((void**)free_block) = next_block; - free_block = next_block; - ++block_count; - next_block = pointer_offset(next_block, block_size); - } - *((void**)free_block) = 0; - } else { - *list = 0; - } - return block_count; -} - -//! Initialize an unused span (from cache or mapped) to be new active span, putting the initial free list in heap class free list -static void* -_rpmalloc_span_initialize_new(heap_t* heap, heap_size_class_t* heap_size_class, span_t* span, uint32_t class_idx) { - rpmalloc_assert(span->span_count == 1, "Internal failure"); - size_class_t* size_class = _memory_size_class + class_idx; - span->size_class = class_idx; - span->heap = heap; - span->flags &= ~SPAN_FLAG_ALIGNED_BLOCKS; - span->block_size = size_class->block_size; - span->block_count = size_class->block_count; - span->free_list = 0; - span->list_size = 0; - atomic_store_ptr_release(&span->free_list_deferred, 0); - - //Setup free list. Only initialize one system page worth of free blocks in list - void* block; - span->free_list_limit = free_list_partial_init(&heap_size_class->free_list, &block, - span, pointer_offset(span, SPAN_HEADER_SIZE), size_class->block_count, size_class->block_size); - //Link span as partial if there remains blocks to be initialized as free list, or full if fully initialized - if (span->free_list_limit < span->block_count) { - _rpmalloc_span_double_link_list_add(&heap_size_class->partial_span, span); - span->used_count = span->free_list_limit; - } else { -#if RPMALLOC_FIRST_CLASS_HEAPS - _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span); -#endif - ++heap->full_span_count; - span->used_count = span->block_count; - } - return block; -} - -static void -_rpmalloc_span_extract_free_list_deferred(span_t* span) { - // We need acquire semantics on the CAS operation since we are interested in the list size - // Refer to _rpmalloc_deallocate_defer_small_or_medium for further comments on this dependency - do { - span->free_list = atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER); - } while (span->free_list == INVALID_POINTER); - span->used_count -= span->list_size; - span->list_size = 0; - atomic_store_ptr_release(&span->free_list_deferred, 0); -} - -static int -_rpmalloc_span_is_fully_utilized(span_t* span) { - rpmalloc_assert(span->free_list_limit <= span->block_count, "Span free list corrupted"); - return !span->free_list && (span->free_list_limit >= span->block_count); -} - -static int -_rpmalloc_span_finalize(heap_t* heap, size_t iclass, span_t* span, span_t** list_head) { - void* free_list = heap->size_class[iclass].free_list; - span_t* class_span = (span_t*)((uintptr_t)free_list & _memory_span_mask); - if (span == class_span) { - // Adopt the heap class free list back into the span free list - void* block = span->free_list; - void* last_block = 0; - while (block) { - last_block = block; - block = *((void**)block); - } - uint32_t free_count = 0; - block = free_list; - while (block) { - ++free_count; - block = *((void**)block); - } - if (last_block) { - *((void**)last_block) = free_list; - } else { - span->free_list = free_list; - } - heap->size_class[iclass].free_list = 0; - span->used_count -= free_count; - } - //If this assert triggers you have memory leaks - rpmalloc_assert(span->list_size == span->used_count, "Memory leak detected"); - if (span->list_size == span->used_count) { - _rpmalloc_stat_dec(&heap->span_use[0].current); - _rpmalloc_stat_dec(&heap->size_class_use[iclass].spans_current); - // This function only used for spans in double linked lists - if (list_head) - _rpmalloc_span_double_link_list_remove(list_head, span); - _rpmalloc_span_unmap(span); - return 1; - } - return 0; -} - - -//////////// -/// -/// Global cache -/// -////// - -#if ENABLE_GLOBAL_CACHE - -//! Finalize a global cache -static void -_rpmalloc_global_cache_finalize(global_cache_t* cache) { - while (!atomic_cas32_acquire(&cache->lock, 1, 0)) - _rpmalloc_spin(); - - for (size_t ispan = 0; ispan < cache->count; ++ispan) - _rpmalloc_span_unmap(cache->span[ispan]); - cache->count = 0; - - while (cache->overflow) { - span_t* span = cache->overflow; - cache->overflow = span->next; - _rpmalloc_span_unmap(span); - } - - atomic_store32_release(&cache->lock, 0); -} - -static void -_rpmalloc_global_cache_insert_spans(span_t** span, size_t span_count, size_t count) { - const size_t cache_limit = (span_count == 1) ? - GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE : - GLOBAL_CACHE_MULTIPLIER * (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1)); - - global_cache_t* cache = &_memory_span_cache[span_count - 1]; - - size_t insert_count = count; - while (!atomic_cas32_acquire(&cache->lock, 1, 0)) - _rpmalloc_spin(); - -#if ENABLE_STATISTICS - cache->insert_count += count; -#endif - if ((cache->count + insert_count) > cache_limit) - insert_count = cache_limit - cache->count; - - memcpy(cache->span + cache->count, span, sizeof(span_t*) * insert_count); - cache->count += (uint32_t)insert_count; - -#if ENABLE_UNLIMITED_CACHE - while (insert_count < count) { -#else - // Enable unlimited cache if huge pages, or we will leak since it is unlikely that an entire huge page - // will be unmapped, and we're unable to partially decommit a huge page - while ((_memory_page_size > _memory_span_size) && (insert_count < count)) { -#endif - span_t* current_span = span[insert_count++]; - current_span->next = cache->overflow; - cache->overflow = current_span; - } - atomic_store32_release(&cache->lock, 0); - - span_t* keep = 0; - for (size_t ispan = insert_count; ispan < count; ++ispan) { - span_t* current_span = span[ispan]; - // Keep master spans that has remaining subspans to avoid dangling them - if ((current_span->flags & SPAN_FLAG_MASTER) && - (atomic_load32(¤t_span->remaining_spans) > (int32_t)current_span->span_count)) { - current_span->next = keep; - keep = current_span; - } else { - _rpmalloc_span_unmap(current_span); - } - } - - if (keep) { - while (!atomic_cas32_acquire(&cache->lock, 1, 0)) - _rpmalloc_spin(); - - size_t islot = 0; - while (keep) { - for (; islot < cache->count; ++islot) { - span_t* current_span = cache->span[islot]; - if (!(current_span->flags & SPAN_FLAG_MASTER) || ((current_span->flags & SPAN_FLAG_MASTER) && - (atomic_load32(¤t_span->remaining_spans) <= (int32_t)current_span->span_count))) { - _rpmalloc_span_unmap(current_span); - cache->span[islot] = keep; - break; - } - } - if (islot == cache->count) - break; - keep = keep->next; - } - - if (keep) { - span_t* tail = keep; - while (tail->next) - tail = tail->next; - tail->next = cache->overflow; - cache->overflow = keep; - } - - atomic_store32_release(&cache->lock, 0); - } -} - -static size_t -_rpmalloc_global_cache_extract_spans(span_t** span, size_t span_count, size_t count) { - global_cache_t* cache = &_memory_span_cache[span_count - 1]; - - size_t extract_count = 0; - while (!atomic_cas32_acquire(&cache->lock, 1, 0)) - _rpmalloc_spin(); - -#if ENABLE_STATISTICS - cache->extract_count += count; -#endif - size_t want = count - extract_count; - if (want > cache->count) - want = cache->count; - - memcpy(span + extract_count, cache->span + (cache->count - want), sizeof(span_t*) * want); - cache->count -= (uint32_t)want; - extract_count += want; - - while ((extract_count < count) && cache->overflow) { - span_t* current_span = cache->overflow; - span[extract_count++] = current_span; - cache->overflow = current_span->next; - } - -#if ENABLE_ASSERTS - for (size_t ispan = 0; ispan < extract_count; ++ispan) { - assert(span[ispan]->span_count == span_count); - } -#endif - - atomic_store32_release(&cache->lock, 0); - - return extract_count; -} - -#endif - -//////////// -/// -/// Heap control -/// -////// - -static void _rpmalloc_deallocate_huge(span_t*); - -//! Store the given spans as reserve in the given heap -static void -_rpmalloc_heap_set_reserved_spans(heap_t* heap, span_t* master, span_t* reserve, size_t reserve_span_count) { - heap->span_reserve_master = master; - heap->span_reserve = reserve; - heap->spans_reserved = (uint32_t)reserve_span_count; -} - -//! Adopt the deferred span cache list, optionally extracting the first single span for immediate re-use -static void -_rpmalloc_heap_cache_adopt_deferred(heap_t* heap, span_t** single_span) { - span_t* span = (span_t*)((void*)atomic_exchange_ptr_acquire(&heap->span_free_deferred, 0)); - while (span) { - span_t* next_span = (span_t*)span->free_list; - rpmalloc_assert(span->heap == heap, "Span heap pointer corrupted"); - if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) { - rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted"); - --heap->full_span_count; - _rpmalloc_stat_dec(&heap->span_use[0].spans_deferred); -#if RPMALLOC_FIRST_CLASS_HEAPS - _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class], span); -#endif - _rpmalloc_stat_dec(&heap->span_use[0].current); - _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current); - if (single_span && !*single_span) - *single_span = span; - else - _rpmalloc_heap_cache_insert(heap, span); - } else { - if (span->size_class == SIZE_CLASS_HUGE) { - _rpmalloc_deallocate_huge(span); - } else { - rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE, "Span size class invalid"); - rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted"); - --heap->full_span_count; -#if RPMALLOC_FIRST_CLASS_HEAPS - _rpmalloc_span_double_link_list_remove(&heap->large_huge_span, span); -#endif - uint32_t idx = span->span_count - 1; - _rpmalloc_stat_dec(&heap->span_use[idx].spans_deferred); - _rpmalloc_stat_dec(&heap->span_use[idx].current); - if (!idx && single_span && !*single_span) - *single_span = span; - else - _rpmalloc_heap_cache_insert(heap, span); - } - } - span = next_span; - } -} - -static void -_rpmalloc_heap_unmap(heap_t* heap) { - if (!heap->master_heap) { - if ((heap->finalize > 1) && !atomic_load32(&heap->child_count)) { - span_t* span = (span_t*)((uintptr_t)heap & _memory_span_mask); - _rpmalloc_span_unmap(span); - } - } else { - if (atomic_decr32(&heap->master_heap->child_count) == 0) { - _rpmalloc_heap_unmap(heap->master_heap); - } - } -} - -static void -_rpmalloc_heap_global_finalize(heap_t* heap) { - if (heap->finalize++ > 1) { - --heap->finalize; - return; - } - - _rpmalloc_heap_finalize(heap); - -#if ENABLE_THREAD_CACHE - for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { - span_cache_t* span_cache; - if (!iclass) - span_cache = &heap->span_cache; - else - span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); - for (size_t ispan = 0; ispan < span_cache->count; ++ispan) - _rpmalloc_span_unmap(span_cache->span[ispan]); - span_cache->count = 0; - } -#endif - - if (heap->full_span_count) { - --heap->finalize; - return; - } - - for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { - if (heap->size_class[iclass].free_list || heap->size_class[iclass].partial_span) { - --heap->finalize; - return; - } - } - //Heap is now completely free, unmap and remove from heap list - size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE; - heap_t* list_heap = _memory_heaps[list_idx]; - if (list_heap == heap) { - _memory_heaps[list_idx] = heap->next_heap; - } else { - while (list_heap->next_heap != heap) - list_heap = list_heap->next_heap; - list_heap->next_heap = heap->next_heap; - } - - _rpmalloc_heap_unmap(heap); -} - -//! Insert a single span into thread heap cache, releasing to global cache if overflow -static void -_rpmalloc_heap_cache_insert(heap_t* heap, span_t* span) { - if (UNEXPECTED(heap->finalize != 0)) { - _rpmalloc_span_unmap(span); - _rpmalloc_heap_global_finalize(heap); - return; - } -#if ENABLE_THREAD_CACHE - size_t span_count = span->span_count; - _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_to_cache); - if (span_count == 1) { - span_cache_t* span_cache = &heap->span_cache; - span_cache->span[span_cache->count++] = span; - if (span_cache->count == MAX_THREAD_SPAN_CACHE) { - const size_t remain_count = MAX_THREAD_SPAN_CACHE - THREAD_SPAN_CACHE_TRANSFER; -#if ENABLE_GLOBAL_CACHE - _rpmalloc_stat_add64(&heap->thread_to_global, THREAD_SPAN_CACHE_TRANSFER * _memory_span_size); - _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global, THREAD_SPAN_CACHE_TRANSFER); - _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count, span_count, THREAD_SPAN_CACHE_TRANSFER); -#else - for (size_t ispan = 0; ispan < THREAD_SPAN_CACHE_TRANSFER; ++ispan) - _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]); -#endif - span_cache->count = remain_count; - } - } else { - size_t cache_idx = span_count - 2; - span_large_cache_t* span_cache = heap->span_large_cache + cache_idx; - span_cache->span[span_cache->count++] = span; - const size_t cache_limit = (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1)); - if (span_cache->count == cache_limit) { - const size_t transfer_limit = 2 + (cache_limit >> 2); - const size_t transfer_count = (THREAD_SPAN_LARGE_CACHE_TRANSFER <= transfer_limit ? THREAD_SPAN_LARGE_CACHE_TRANSFER : transfer_limit); - const size_t remain_count = cache_limit - transfer_count; -#if ENABLE_GLOBAL_CACHE - _rpmalloc_stat_add64(&heap->thread_to_global, transfer_count * span_count * _memory_span_size); - _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global, transfer_count); - _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count, span_count, transfer_count); -#else - for (size_t ispan = 0; ispan < transfer_count; ++ispan) - _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]); -#endif - span_cache->count = remain_count; - } - } -#else - (void)sizeof(heap); - _rpmalloc_span_unmap(span); -#endif -} - -//! Extract the given number of spans from the different cache levels -static span_t* -_rpmalloc_heap_thread_cache_extract(heap_t* heap, size_t span_count) { - span_t* span = 0; -#if ENABLE_THREAD_CACHE - span_cache_t* span_cache; - if (span_count == 1) - span_cache = &heap->span_cache; - else - span_cache = (span_cache_t*)(heap->span_large_cache + (span_count - 2)); - if (span_cache->count) { - _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_cache); - return span_cache->span[--span_cache->count]; - } -#endif - return span; -} - -static span_t* -_rpmalloc_heap_thread_cache_deferred_extract(heap_t* heap, size_t span_count) { - span_t* span = 0; - if (span_count == 1) { - _rpmalloc_heap_cache_adopt_deferred(heap, &span); - } else { - _rpmalloc_heap_cache_adopt_deferred(heap, 0); - span = _rpmalloc_heap_thread_cache_extract(heap, span_count); - } - return span; -} - -static span_t* -_rpmalloc_heap_reserved_extract(heap_t* heap, size_t span_count) { - if (heap->spans_reserved >= span_count) - return _rpmalloc_span_map(heap, span_count); - return 0; -} - -//! Extract a span from the global cache -static span_t* -_rpmalloc_heap_global_cache_extract(heap_t* heap, size_t span_count) { -#if ENABLE_GLOBAL_CACHE -#if ENABLE_THREAD_CACHE - span_cache_t* span_cache; - size_t wanted_count; - if (span_count == 1) { - span_cache = &heap->span_cache; - wanted_count = THREAD_SPAN_CACHE_TRANSFER; - } else { - span_cache = (span_cache_t*)(heap->span_large_cache + (span_count - 2)); - wanted_count = THREAD_SPAN_LARGE_CACHE_TRANSFER; - } - span_cache->count = _rpmalloc_global_cache_extract_spans(span_cache->span, span_count, wanted_count); - if (span_cache->count) { - _rpmalloc_stat_add64(&heap->global_to_thread, span_count * span_cache->count * _memory_span_size); - _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global, span_cache->count); - return span_cache->span[--span_cache->count]; - } -#else - span_t* span = 0; - size_t count = _rpmalloc_global_cache_extract_spans(&span, span_count, 1); - if (count) { - _rpmalloc_stat_add64(&heap->global_to_thread, span_count * count * _memory_span_size); - _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global, count); - return span; - } -#endif -#endif - (void)sizeof(heap); - (void)sizeof(span_count); - return 0; -} - -static void -_rpmalloc_inc_span_statistics(heap_t* heap, size_t span_count, uint32_t class_idx) { - (void)sizeof(heap); - (void)sizeof(span_count); - (void)sizeof(class_idx); -#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS - uint32_t idx = (uint32_t)span_count - 1; - uint32_t current_count = (uint32_t)atomic_incr32(&heap->span_use[idx].current); - if (current_count > (uint32_t)atomic_load32(&heap->span_use[idx].high)) - atomic_store32(&heap->span_use[idx].high, (int32_t)current_count); - _rpmalloc_stat_add_peak(&heap->size_class_use[class_idx].spans_current, 1, heap->size_class_use[class_idx].spans_peak); -#endif -} - -//! Get a span from one of the cache levels (thread cache, reserved, global cache) or fallback to mapping more memory -static span_t* -_rpmalloc_heap_extract_new_span(heap_t* heap, heap_size_class_t* heap_size_class, size_t span_count, uint32_t class_idx) { - span_t* span; -#if ENABLE_THREAD_CACHE - if (heap_size_class && heap_size_class->cache) { - span = heap_size_class->cache; - heap_size_class->cache = (heap->span_cache.count ? heap->span_cache.span[--heap->span_cache.count] : 0); - _rpmalloc_inc_span_statistics(heap, span_count, class_idx); - return span; - } -#endif - (void)sizeof(class_idx); - // Allow 50% overhead to increase cache hits - size_t base_span_count = span_count; - size_t limit_span_count = (span_count > 2) ? (span_count + (span_count >> 1)) : span_count; - if (limit_span_count > LARGE_CLASS_COUNT) - limit_span_count = LARGE_CLASS_COUNT; - do { - span = _rpmalloc_heap_thread_cache_extract(heap, span_count); - if (EXPECTED(span != 0)) { - _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache); - _rpmalloc_inc_span_statistics(heap, span_count, class_idx); - return span; - } - span = _rpmalloc_heap_thread_cache_deferred_extract(heap, span_count); - if (EXPECTED(span != 0)) { - _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache); - _rpmalloc_inc_span_statistics(heap, span_count, class_idx); - return span; - } - span = _rpmalloc_heap_reserved_extract(heap, span_count); - if (EXPECTED(span != 0)) { - _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_reserved); - _rpmalloc_inc_span_statistics(heap, span_count, class_idx); - return span; - } - span = _rpmalloc_heap_global_cache_extract(heap, span_count); - if (EXPECTED(span != 0)) { - _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache); - _rpmalloc_inc_span_statistics(heap, span_count, class_idx); - return span; - } - ++span_count; - } while (span_count <= limit_span_count); - //Final fallback, map in more virtual memory - span = _rpmalloc_span_map(heap, base_span_count); - _rpmalloc_inc_span_statistics(heap, base_span_count, class_idx); - _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_map_calls); - return span; -} - -static void -_rpmalloc_heap_initialize(heap_t* heap) { - memset((void*)heap, 0, sizeof(heap_t)); - //Get a new heap ID - heap->id = 1 + atomic_incr32(&_memory_heap_id); - - //Link in heap in heap ID map - size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE; - heap->next_heap = _memory_heaps[list_idx]; - _memory_heaps[list_idx] = heap; -} - -static void -_rpmalloc_heap_orphan(heap_t* heap, int first_class) { - heap->owner_thread = (uintptr_t)-1; -#if RPMALLOC_FIRST_CLASS_HEAPS - heap_t** heap_list = (first_class ? &_memory_first_class_orphan_heaps : &_memory_orphan_heaps); -#else - (void)sizeof(first_class); - heap_t** heap_list = &_memory_orphan_heaps; -#endif - heap->next_orphan = *heap_list; - *heap_list = heap; -} - -//! Allocate a new heap from newly mapped memory pages -static heap_t* -_rpmalloc_heap_allocate_new(void) { - // Map in pages for a 16 heaps. If page size is greater than required size for this, map a page and - // use first part for heaps and remaining part for spans for allocations. Adds a lot of complexity, - // but saves a lot of memory on systems where page size > 64 spans (4MiB) - size_t heap_size = sizeof(heap_t); - size_t aligned_heap_size = 16 * ((heap_size + 15) / 16); - size_t request_heap_count = 16; - size_t heap_span_count = ((aligned_heap_size * request_heap_count) + sizeof(span_t) + _memory_span_size - 1) / _memory_span_size; - size_t block_size = _memory_span_size * heap_span_count; - size_t span_count = heap_span_count; - span_t* span = 0; - // If there are global reserved spans, use these first - if (_memory_global_reserve_count >= heap_span_count) { - span = _rpmalloc_global_get_reserved_spans(heap_span_count); - } - if (!span) { - if (_memory_page_size > block_size) { - span_count = _memory_page_size / _memory_span_size; - block_size = _memory_page_size; - // If using huge pages, make sure to grab enough heaps to avoid reallocating a huge page just to serve new heaps - size_t possible_heap_count = (block_size - sizeof(span_t)) / aligned_heap_size; - if (possible_heap_count >= (request_heap_count * 16)) - request_heap_count *= 16; - else if (possible_heap_count < request_heap_count) - request_heap_count = possible_heap_count; - heap_span_count = ((aligned_heap_size * request_heap_count) + sizeof(span_t) + _memory_span_size - 1) / _memory_span_size; - } - - size_t align_offset = 0; - span = (span_t*)_rpmalloc_mmap(block_size, &align_offset); - if (!span) - return 0; - - // Master span will contain the heaps - _rpmalloc_stat_inc(&_master_spans); - _rpmalloc_span_initialize(span, span_count, heap_span_count, align_offset); - } - - size_t remain_size = _memory_span_size - sizeof(span_t); - heap_t* heap = (heap_t*)pointer_offset(span, sizeof(span_t)); - _rpmalloc_heap_initialize(heap); - - // Put extra heaps as orphans - size_t num_heaps = remain_size / aligned_heap_size; - if (num_heaps < request_heap_count) - num_heaps = request_heap_count; - atomic_store32(&heap->child_count, (int32_t)num_heaps - 1); - heap_t* extra_heap = (heap_t*)pointer_offset(heap, aligned_heap_size); - while (num_heaps > 1) { - _rpmalloc_heap_initialize(extra_heap); - extra_heap->master_heap = heap; - _rpmalloc_heap_orphan(extra_heap, 1); - extra_heap = (heap_t*)pointer_offset(extra_heap, aligned_heap_size); - --num_heaps; - } - - if (span_count > heap_span_count) { - // Cap reserved spans - size_t remain_count = span_count - heap_span_count; - size_t reserve_count = (remain_count > _memory_heap_reserve_count ? _memory_heap_reserve_count : remain_count); - span_t* remain_span = (span_t*)pointer_offset(span, heap_span_count * _memory_span_size); - _rpmalloc_heap_set_reserved_spans(heap, span, remain_span, reserve_count); - - if (remain_count > reserve_count) { - // Set to global reserved spans - remain_span = (span_t*)pointer_offset(remain_span, reserve_count * _memory_span_size); - reserve_count = remain_count - reserve_count; - _rpmalloc_global_set_reserved_spans(span, remain_span, reserve_count); - } - } - - return heap; -} - -static heap_t* -_rpmalloc_heap_extract_orphan(heap_t** heap_list) { - heap_t* heap = *heap_list; - *heap_list = (heap ? heap->next_orphan : 0); - return heap; -} - -//! Allocate a new heap, potentially reusing a previously orphaned heap -static heap_t* -_rpmalloc_heap_allocate(int first_class) { - heap_t* heap = 0; - while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0)) - _rpmalloc_spin(); - if (first_class == 0) - heap = _rpmalloc_heap_extract_orphan(&_memory_orphan_heaps); -#if RPMALLOC_FIRST_CLASS_HEAPS - if (!heap) - heap = _rpmalloc_heap_extract_orphan(&_memory_first_class_orphan_heaps); -#endif - if (!heap) - heap = _rpmalloc_heap_allocate_new(); - atomic_store32_release(&_memory_global_lock, 0); - _rpmalloc_heap_cache_adopt_deferred(heap, 0); - return heap; -} - -extern thread_local bool RpThreadShutdown; - -static void -_rpmalloc_heap_release(void* heapptr, int first_class, int release_cache) { - heap_t* heap = (heap_t*)heapptr; - if (!heap) - return; - RpThreadShutdown = true; - //Release thread cache spans back to global cache - _rpmalloc_heap_cache_adopt_deferred(heap, 0); - if (release_cache || heap->finalize) { -#if ENABLE_THREAD_CACHE - for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { - span_cache_t* span_cache; - if (!iclass) - span_cache = &heap->span_cache; - else - span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); - if (!span_cache->count) - continue; -#if ENABLE_GLOBAL_CACHE - if (heap->finalize) { - for (size_t ispan = 0; ispan < span_cache->count; ++ispan) - _rpmalloc_span_unmap(span_cache->span[ispan]); - } else { - _rpmalloc_stat_add64(&heap->thread_to_global, span_cache->count * (iclass + 1) * _memory_span_size); - _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global, span_cache->count); - _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1, span_cache->count); - } -#else - for (size_t ispan = 0; ispan < span_cache->count; ++ispan) - _rpmalloc_span_unmap(span_cache->span[ispan]); -#endif - span_cache->count = 0; - } -#endif - } - - if (get_thread_heap_raw() == heap) - set_thread_heap(0); - -#if ENABLE_STATISTICS - atomic_decr32(&_memory_active_heaps); - rpmalloc_assert(atomic_load32(&_memory_active_heaps) >= 0, "Still active heaps during finalization"); -#endif - - // If we are forcibly terminating with _exit the state of the - // lock atomic is unknown and it's best to just go ahead and exit - if (get_thread_id() != _rpmalloc_main_thread_id) { - while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0)) - _rpmalloc_spin(); - } - _rpmalloc_heap_orphan(heap, first_class); - atomic_store32_release(&_memory_global_lock, 0); -} - -static void -_rpmalloc_heap_release_raw(void* heapptr, int release_cache) { - _rpmalloc_heap_release(heapptr, 0, release_cache); -} - -static void -_rpmalloc_heap_release_raw_fc(void* heapptr) { - _rpmalloc_heap_release_raw(heapptr, 1); -} - -static void -_rpmalloc_heap_finalize(heap_t* heap) { - if (heap->spans_reserved) { - span_t* span = _rpmalloc_span_map(heap, heap->spans_reserved); - _rpmalloc_span_unmap(span); - heap->spans_reserved = 0; - } - - _rpmalloc_heap_cache_adopt_deferred(heap, 0); - - for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { - if (heap->size_class[iclass].cache) - _rpmalloc_span_unmap(heap->size_class[iclass].cache); - heap->size_class[iclass].cache = 0; - span_t* span = heap->size_class[iclass].partial_span; - while (span) { - span_t* next = span->next; - _rpmalloc_span_finalize(heap, iclass, span, &heap->size_class[iclass].partial_span); - span = next; - } - // If class still has a free list it must be a full span - if (heap->size_class[iclass].free_list) { - span_t* class_span = (span_t*)((uintptr_t)heap->size_class[iclass].free_list & _memory_span_mask); - span_t** list = 0; -#if RPMALLOC_FIRST_CLASS_HEAPS - list = &heap->full_span[iclass]; -#endif - --heap->full_span_count; - if (!_rpmalloc_span_finalize(heap, iclass, class_span, list)) { - if (list) - _rpmalloc_span_double_link_list_remove(list, class_span); - _rpmalloc_span_double_link_list_add(&heap->size_class[iclass].partial_span, class_span); - } - } - } - -#if ENABLE_THREAD_CACHE - for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { - span_cache_t* span_cache; - if (!iclass) - span_cache = &heap->span_cache; - else - span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); - for (size_t ispan = 0; ispan < span_cache->count; ++ispan) - _rpmalloc_span_unmap(span_cache->span[ispan]); - span_cache->count = 0; - } -#endif - rpmalloc_assert(!atomic_load_ptr(&heap->span_free_deferred), "Heaps still active during finalization"); -} - - -//////////// -/// -/// Allocation entry points -/// -////// - -//! Pop first block from a free list -static void* -free_list_pop(void** list) { - void* block = *list; - *list = *((void**)block); - return block; -} - -//! Allocate a small/medium sized memory block from the given heap -static void* -_rpmalloc_allocate_from_heap_fallback(heap_t* heap, heap_size_class_t* heap_size_class, uint32_t class_idx) { - span_t* span = heap_size_class->partial_span; - if (EXPECTED(span != 0)) { - rpmalloc_assert(span->block_count == _memory_size_class[span->size_class].block_count, "Span block count corrupted"); - rpmalloc_assert(!_rpmalloc_span_is_fully_utilized(span), "Internal failure"); - void* block; - if (span->free_list) { - //Span local free list is not empty, swap to size class free list - block = free_list_pop(&span->free_list); - heap_size_class->free_list = span->free_list; - span->free_list = 0; - } else { - //If the span did not fully initialize free list, link up another page worth of blocks - void* block_start = pointer_offset(span, SPAN_HEADER_SIZE + ((size_t)span->free_list_limit * span->block_size)); - span->free_list_limit += free_list_partial_init(&heap_size_class->free_list, &block, - (void*)((uintptr_t)block_start & ~(_memory_page_size - 1)), block_start, - span->block_count - span->free_list_limit, span->block_size); - } - rpmalloc_assert(span->free_list_limit <= span->block_count, "Span block count corrupted"); - span->used_count = span->free_list_limit; - - //Swap in deferred free list if present - if (atomic_load_ptr(&span->free_list_deferred)) - _rpmalloc_span_extract_free_list_deferred(span); - - //If span is still not fully utilized keep it in partial list and early return block - if (!_rpmalloc_span_is_fully_utilized(span)) - return block; - - //The span is fully utilized, unlink from partial list and add to fully utilized list - _rpmalloc_span_double_link_list_pop_head(&heap_size_class->partial_span, span); -#if RPMALLOC_FIRST_CLASS_HEAPS - _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span); -#endif - ++heap->full_span_count; - return block; - } - - //Find a span in one of the cache levels - span = _rpmalloc_heap_extract_new_span(heap, heap_size_class, 1, class_idx); - if (EXPECTED(span != 0)) { - //Mark span as owned by this heap and set base data, return first block - return _rpmalloc_span_initialize_new(heap, heap_size_class, span, class_idx); - } - - return 0; -} - -//! Allocate a small sized memory block from the given heap -static void* -_rpmalloc_allocate_small(heap_t* heap, size_t size) { - rpmalloc_assert(heap, "No thread heap"); - //Small sizes have unique size classes - const uint32_t class_idx = (uint32_t)((size + (SMALL_GRANULARITY - 1)) >> SMALL_GRANULARITY_SHIFT); - heap_size_class_t* heap_size_class = heap->size_class + class_idx; - _rpmalloc_stat_inc_alloc(heap, class_idx); - if (EXPECTED(heap_size_class->free_list != 0)) - return free_list_pop(&heap_size_class->free_list); - return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class, class_idx); -} - -//! Allocate a medium sized memory block from the given heap -static void* -_rpmalloc_allocate_medium(heap_t* heap, size_t size) { - rpmalloc_assert(heap, "No thread heap"); - //Calculate the size class index and do a dependent lookup of the final class index (in case of merged classes) - const uint32_t base_idx = (uint32_t)(SMALL_CLASS_COUNT + ((size - (SMALL_SIZE_LIMIT + 1)) >> MEDIUM_GRANULARITY_SHIFT)); - const uint32_t class_idx = _memory_size_class[base_idx].class_idx; - heap_size_class_t* heap_size_class = heap->size_class + class_idx; - _rpmalloc_stat_inc_alloc(heap, class_idx); - if (EXPECTED(heap_size_class->free_list != 0)) - return free_list_pop(&heap_size_class->free_list); - return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class, class_idx); -} - -//! Allocate a large sized memory block from the given heap -static void* -_rpmalloc_allocate_large(heap_t* heap, size_t size) { - rpmalloc_assert(heap, "No thread heap"); - //Calculate number of needed max sized spans (including header) - //Since this function is never called if size > LARGE_SIZE_LIMIT - //the span_count is guaranteed to be <= LARGE_CLASS_COUNT - size += SPAN_HEADER_SIZE; - size_t span_count = size >> _memory_span_size_shift; - if (size & (_memory_span_size - 1)) - ++span_count; - - //Find a span in one of the cache levels - span_t* span = _rpmalloc_heap_extract_new_span(heap, 0, span_count, SIZE_CLASS_LARGE); - if (!span) - return span; - - //Mark span as owned by this heap and set base data - rpmalloc_assert(span->span_count >= span_count, "Internal failure"); - span->size_class = SIZE_CLASS_LARGE; - span->heap = heap; - -#if RPMALLOC_FIRST_CLASS_HEAPS - _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span); -#endif - ++heap->full_span_count; - - return pointer_offset(span, SPAN_HEADER_SIZE); -} - -//! Allocate a huge block by mapping memory pages directly -static void* -_rpmalloc_allocate_huge(heap_t* heap, size_t size) { - rpmalloc_assert(heap, "No thread heap"); - _rpmalloc_heap_cache_adopt_deferred(heap, 0); - size += SPAN_HEADER_SIZE; - size_t num_pages = size >> _memory_page_size_shift; - if (size & (_memory_page_size - 1)) - ++num_pages; - size_t align_offset = 0; - span_t* span = (span_t*)_rpmalloc_mmap(num_pages * _memory_page_size, &align_offset); - if (!span) - return span; - - //Store page count in span_count - span->size_class = SIZE_CLASS_HUGE; - span->span_count = (uint32_t)num_pages; - span->align_offset = (uint32_t)align_offset; - span->heap = heap; - _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak); - -#if RPMALLOC_FIRST_CLASS_HEAPS - _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span); -#endif - ++heap->full_span_count; - - return pointer_offset(span, SPAN_HEADER_SIZE); -} - -//! Allocate a block of the given size -static void* -_rpmalloc_allocate(heap_t* heap, size_t size) { - _rpmalloc_stat_add64(&_allocation_counter, 1); - if (EXPECTED(size <= SMALL_SIZE_LIMIT)) - return _rpmalloc_allocate_small(heap, size); - else if (size <= _memory_medium_size_limit) - return _rpmalloc_allocate_medium(heap, size); - else if (size <= LARGE_SIZE_LIMIT) - return _rpmalloc_allocate_large(heap, size); - return _rpmalloc_allocate_huge(heap, size); -} - -static void* -_rpmalloc_aligned_allocate(heap_t* heap, size_t alignment, size_t size) { - if (alignment <= SMALL_GRANULARITY) - return _rpmalloc_allocate(heap, size); - -#if ENABLE_VALIDATE_ARGS - if ((size + alignment) < size) { - errno = EINVAL; - return 0; - } - if (alignment & (alignment - 1)) { - errno = EINVAL; - return 0; - } -#endif - - if ((alignment <= SPAN_HEADER_SIZE) && (size < _memory_medium_size_limit)) { - // If alignment is less or equal to span header size (which is power of two), - // and size aligned to span header size multiples is less than size + alignment, - // then use natural alignment of blocks to provide alignment - size_t multiple_size = size ? (size + (SPAN_HEADER_SIZE - 1)) & ~(uintptr_t)(SPAN_HEADER_SIZE - 1) : SPAN_HEADER_SIZE; - rpmalloc_assert(!(multiple_size % SPAN_HEADER_SIZE), "Failed alignment calculation"); - if (multiple_size <= (size + alignment)) - return _rpmalloc_allocate(heap, multiple_size); - } - - void* ptr = 0; - size_t align_mask = alignment - 1; - if (alignment <= _memory_page_size) { - ptr = _rpmalloc_allocate(heap, size + alignment); - if ((uintptr_t)ptr & align_mask) { - ptr = (void*)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment); - //Mark as having aligned blocks - span_t* span = (span_t*)((uintptr_t)ptr & _memory_span_mask); - span->flags |= SPAN_FLAG_ALIGNED_BLOCKS; - } - return ptr; - } - - // Fallback to mapping new pages for this request. Since pointers passed - // to rpfree must be able to reach the start of the span by bitmasking of - // the address with the span size, the returned aligned pointer from this - // function must be with a span size of the start of the mapped area. - // In worst case this requires us to loop and map pages until we get a - // suitable memory address. It also means we can never align to span size - // or greater, since the span header will push alignment more than one - // span size away from span start (thus causing pointer mask to give us - // an invalid span start on free) - if (alignment & align_mask) { - errno = EINVAL; - return 0; - } - if (alignment >= _memory_span_size) { - errno = EINVAL; - return 0; - } - - size_t extra_pages = alignment / _memory_page_size; - - // Since each span has a header, we will at least need one extra memory page - size_t num_pages = 1 + (size / _memory_page_size); - if (size & (_memory_page_size - 1)) - ++num_pages; - - if (extra_pages > num_pages) - num_pages = 1 + extra_pages; - - size_t original_pages = num_pages; - size_t limit_pages = (_memory_span_size / _memory_page_size) * 2; - if (limit_pages < (original_pages * 2)) - limit_pages = original_pages * 2; - - size_t mapped_size, align_offset; - span_t* span; - -retry: - align_offset = 0; - mapped_size = num_pages * _memory_page_size; - - span = (span_t*)_rpmalloc_mmap(mapped_size, &align_offset); - if (!span) { - errno = ENOMEM; - return 0; - } - ptr = pointer_offset(span, SPAN_HEADER_SIZE); - - if ((uintptr_t)ptr & align_mask) - ptr = (void*)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment); - - if (((size_t)pointer_diff(ptr, span) >= _memory_span_size) || - (pointer_offset(ptr, size) > pointer_offset(span, mapped_size)) || - (((uintptr_t)ptr & _memory_span_mask) != (uintptr_t)span)) { - _rpmalloc_unmap(span, mapped_size, align_offset, mapped_size); - ++num_pages; - if (num_pages > limit_pages) { - errno = EINVAL; - return 0; - } - goto retry; - } - - //Store page count in span_count - span->size_class = SIZE_CLASS_HUGE; - span->span_count = (uint32_t)num_pages; - span->align_offset = (uint32_t)align_offset; - span->heap = heap; - _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak); - -#if RPMALLOC_FIRST_CLASS_HEAPS - _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span); -#endif - ++heap->full_span_count; - - _rpmalloc_stat_add64(&_allocation_counter, 1); - - return ptr; -} - - -//////////// -/// -/// Deallocation entry points -/// -////// - -//! Deallocate the given small/medium memory block in the current thread local heap -static void -_rpmalloc_deallocate_direct_small_or_medium(span_t* span, void* block) { - heap_t* heap = span->heap; - rpmalloc_assert(heap->owner_thread == get_thread_id() || !heap->owner_thread || heap->finalize, "Internal failure"); - //Add block to free list - if (UNEXPECTED(_rpmalloc_span_is_fully_utilized(span))) { - span->used_count = span->block_count; -#if RPMALLOC_FIRST_CLASS_HEAPS - _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class], span); -#endif - _rpmalloc_span_double_link_list_add(&heap->size_class[span->size_class].partial_span, span); - --heap->full_span_count; - } - *((void**)block) = span->free_list; - --span->used_count; - span->free_list = block; - if (UNEXPECTED(span->used_count == span->list_size)) { - // If there are no used blocks it is guaranteed that no other external thread is accessing the span - if (span->used_count) { - // Make sure we have synchronized the deferred list and list size by using acquire semantics - // and guarantee that no external thread is accessing span concurrently - void* free_list; - do { - free_list = atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER); - } while (free_list == INVALID_POINTER); - atomic_store_ptr_release(&span->free_list_deferred, free_list); - } - _rpmalloc_span_double_link_list_remove(&heap->size_class[span->size_class].partial_span, span); - _rpmalloc_span_release_to_cache(heap, span); - } -} - -static void -_rpmalloc_deallocate_defer_free_span(heap_t* heap, span_t* span) { - if (span->size_class != SIZE_CLASS_HUGE) - _rpmalloc_stat_inc(&heap->span_use[span->span_count - 1].spans_deferred); - //This list does not need ABA protection, no mutable side state - do { - span->free_list = (void*)atomic_load_ptr(&heap->span_free_deferred); - } while (!atomic_cas_ptr(&heap->span_free_deferred, span, span->free_list)); -} - -//! Put the block in the deferred free list of the owning span -static void -_rpmalloc_deallocate_defer_small_or_medium(span_t* span, void* block) { - // The memory ordering here is a bit tricky, to avoid having to ABA protect - // the deferred free list to avoid desynchronization of list and list size - // we need to have acquire semantics on successful CAS of the pointer to - // guarantee the list_size variable validity + release semantics on pointer store - void* free_list; - do { - free_list = atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER); - } while (free_list == INVALID_POINTER); - *((void**)block) = free_list; - uint32_t free_count = ++span->list_size; - int all_deferred_free = (free_count == span->block_count); - atomic_store_ptr_release(&span->free_list_deferred, block); - if (all_deferred_free) { - // Span was completely freed by this block. Due to the INVALID_POINTER spin lock - // no other thread can reach this state simultaneously on this span. - // Safe to move to owner heap deferred cache - _rpmalloc_deallocate_defer_free_span(span->heap, span); - } -} - -static void -_rpmalloc_deallocate_small_or_medium(span_t* span, void* p) { - _rpmalloc_stat_inc_free(span->heap, span->size_class); - if (span->flags & SPAN_FLAG_ALIGNED_BLOCKS) { - //Realign pointer to block start - void* blocks_start = pointer_offset(span, SPAN_HEADER_SIZE); - uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start); - p = pointer_offset(p, -(int32_t)(block_offset % span->block_size)); - } - //Check if block belongs to this heap or if deallocation should be deferred -#if RPMALLOC_FIRST_CLASS_HEAPS - int defer = (span->heap->owner_thread && (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); -#else - int defer = ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); -#endif - if (!defer) - _rpmalloc_deallocate_direct_small_or_medium(span, p); - else - _rpmalloc_deallocate_defer_small_or_medium(span, p); -} - -//! Deallocate the given large memory block to the current heap -static void -_rpmalloc_deallocate_large(span_t* span) { - rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE, "Bad span size class"); - rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) || !(span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); - rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) || (span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); - //We must always defer (unless finalizing) if from another heap since we cannot touch the list or counters of another heap -#if RPMALLOC_FIRST_CLASS_HEAPS - int defer = (span->heap->owner_thread && (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); -#else - int defer = ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); -#endif - if (defer) { - _rpmalloc_deallocate_defer_free_span(span->heap, span); - return; - } - rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted"); - --span->heap->full_span_count; -#if RPMALLOC_FIRST_CLASS_HEAPS - _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span); -#endif -#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS - //Decrease counter - size_t idx = span->span_count - 1; - atomic_decr32(&span->heap->span_use[idx].current); -#endif - heap_t* heap = span->heap; - rpmalloc_assert(heap, "No thread heap"); -#if ENABLE_THREAD_CACHE - const int set_as_reserved = ((span->span_count > 1) && (heap->span_cache.count == 0) && !heap->finalize && !heap->spans_reserved); -#else - const int set_as_reserved = ((span->span_count > 1) && !heap->finalize && !heap->spans_reserved); -#endif - if (set_as_reserved) { - heap->span_reserve = span; - heap->spans_reserved = span->span_count; - if (span->flags & SPAN_FLAG_MASTER) { - heap->span_reserve_master = span; - } else { //SPAN_FLAG_SUBSPAN - span_t* master = (span_t*)pointer_offset(span, -(intptr_t)((size_t)span->offset_from_master * _memory_span_size)); - heap->span_reserve_master = master; - rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted"); - rpmalloc_assert(atomic_load32(&master->remaining_spans) >= (int32_t)span->span_count, "Master span count corrupted"); - } - _rpmalloc_stat_inc(&heap->span_use[idx].spans_to_reserved); - } else { - //Insert into cache list - _rpmalloc_heap_cache_insert(heap, span); - } -} - -//! Deallocate the given huge span -static void -_rpmalloc_deallocate_huge(span_t* span) { - rpmalloc_assert(span->heap, "No span heap"); -#if RPMALLOC_FIRST_CLASS_HEAPS - int defer = (span->heap->owner_thread && (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); -#else - int defer = ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); -#endif - if (defer) { - _rpmalloc_deallocate_defer_free_span(span->heap, span); - return; - } - rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted"); - --span->heap->full_span_count; -#if RPMALLOC_FIRST_CLASS_HEAPS - _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span); -#endif - - //Oversized allocation, page count is stored in span_count - size_t num_pages = span->span_count; - _rpmalloc_unmap(span, num_pages * _memory_page_size, span->align_offset, num_pages * _memory_page_size); - _rpmalloc_stat_sub(&_huge_pages_current, num_pages); -} - -//! Deallocate the given block -static void -_rpmalloc_deallocate(void* p) { - _rpmalloc_stat_add64(&_deallocation_counter, 1); - //Grab the span (always at start of span, using span alignment) - span_t* span = (span_t*)((uintptr_t)p & _memory_span_mask); - if (UNEXPECTED(!span)) - return; - if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) - _rpmalloc_deallocate_small_or_medium(span, p); - else if (span->size_class == SIZE_CLASS_LARGE) - _rpmalloc_deallocate_large(span); - else - _rpmalloc_deallocate_huge(span); -} - -//////////// -/// -/// Reallocation entry points -/// -////// - -static size_t -_rpmalloc_usable_size(void* p); - -//! Reallocate the given block to the given size -static void* -_rpmalloc_reallocate(heap_t* heap, void* p, size_t size, size_t oldsize, unsigned int flags) { - if (p) { - //Grab the span using guaranteed span alignment - span_t* span = (span_t*)((uintptr_t)p & _memory_span_mask); - if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) { - //Small/medium sized block - rpmalloc_assert(span->span_count == 1, "Span counter corrupted"); - void* blocks_start = pointer_offset(span, SPAN_HEADER_SIZE); - uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start); - uint32_t block_idx = block_offset / span->block_size; - void* block = pointer_offset(blocks_start, (size_t)block_idx * span->block_size); - if (!oldsize) - oldsize = (size_t)((ptrdiff_t)span->block_size - pointer_diff(p, block)); - if ((size_t)span->block_size >= size) { - //Still fits in block, never mind trying to save memory, but preserve data if alignment changed - if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE)) - memmove(block, p, oldsize); - return block; - } - } else if (span->size_class == SIZE_CLASS_LARGE) { - //Large block - size_t total_size = size + SPAN_HEADER_SIZE; - size_t num_spans = total_size >> _memory_span_size_shift; - if (total_size & (_memory_span_mask - 1)) - ++num_spans; - size_t current_spans = span->span_count; - void* block = pointer_offset(span, SPAN_HEADER_SIZE); - if (!oldsize) - oldsize = (current_spans * _memory_span_size) - (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE; - if ((current_spans >= num_spans) && (total_size >= (oldsize / 2))) { - //Still fits in block, never mind trying to save memory, but preserve data if alignment changed - if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE)) - memmove(block, p, oldsize); - return block; - } - } else { - //Oversized block - size_t total_size = size + SPAN_HEADER_SIZE; - size_t num_pages = total_size >> _memory_page_size_shift; - if (total_size & (_memory_page_size - 1)) - ++num_pages; - //Page count is stored in span_count - size_t current_pages = span->span_count; - void* block = pointer_offset(span, SPAN_HEADER_SIZE); - if (!oldsize) - oldsize = (current_pages * _memory_page_size) - (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE; - if ((current_pages >= num_pages) && (num_pages >= (current_pages / 2))) { - //Still fits in block, never mind trying to save memory, but preserve data if alignment changed - if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE)) - memmove(block, p, oldsize); - return block; - } - } - } else { - oldsize = 0; - } - - if (!!(flags & RPMALLOC_GROW_OR_FAIL)) - return 0; - - //Size is greater than block size, need to allocate a new block and deallocate the old - //Avoid hysteresis by overallocating if increase is small (below 37%) - size_t lower_bound = oldsize + (oldsize >> 2) + (oldsize >> 3); - size_t new_size = (size > lower_bound) ? size : ((size > oldsize) ? lower_bound : size); - void* block = _rpmalloc_allocate(heap, new_size); - if (p && block) { - if (!(flags & RPMALLOC_NO_PRESERVE)) - memcpy(block, p, oldsize < new_size ? oldsize : new_size); - _rpmalloc_deallocate(p); - } - - return block; -} - -static void* -_rpmalloc_aligned_reallocate(heap_t* heap, void* ptr, size_t alignment, size_t size, size_t oldsize, - unsigned int flags) { - if (alignment <= SMALL_GRANULARITY) - return _rpmalloc_reallocate(heap, ptr, size, oldsize, flags); - - int no_alloc = !!(flags & RPMALLOC_GROW_OR_FAIL); - size_t usablesize = (ptr ? _rpmalloc_usable_size(ptr) : 0); - if ((usablesize >= size) && !((uintptr_t)ptr & (alignment - 1))) { - if (no_alloc || (size >= (usablesize / 2))) - return ptr; - } - // Aligned alloc marks span as having aligned blocks - void* block = (!no_alloc ? _rpmalloc_aligned_allocate(heap, alignment, size) : 0); - if (EXPECTED(block != 0)) { - if (!(flags & RPMALLOC_NO_PRESERVE) && ptr) { - if (!oldsize) - oldsize = usablesize; - memcpy(block, ptr, oldsize < size ? oldsize : size); - } - _rpmalloc_deallocate(ptr); - } - return block; -} - - -//////////// -/// -/// Initialization, finalization and utility -/// -////// - -//! Get the usable size of the given block -static size_t -_rpmalloc_usable_size(void* p) { - //Grab the span using guaranteed span alignment - span_t* span = (span_t*)((uintptr_t)p & _memory_span_mask); - if (span->size_class < SIZE_CLASS_COUNT) { - //Small/medium block - void* blocks_start = pointer_offset(span, SPAN_HEADER_SIZE); - return span->block_size - ((size_t)pointer_diff(p, blocks_start) % span->block_size); - } - if (span->size_class == SIZE_CLASS_LARGE) { - //Large block - size_t current_spans = span->span_count; - return (current_spans * _memory_span_size) - (size_t)pointer_diff(p, span); - } - //Oversized block, page count is stored in span_count - size_t current_pages = span->span_count; - return (current_pages * _memory_page_size) - (size_t)pointer_diff(p, span); -} - -//! Adjust and optimize the size class properties for the given class -static void -_rpmalloc_adjust_size_class(size_t iclass) { - size_t block_size = _memory_size_class[iclass].block_size; - size_t block_count = (_memory_span_size - SPAN_HEADER_SIZE) / block_size; - - _memory_size_class[iclass].block_count = (uint16_t)block_count; - _memory_size_class[iclass].class_idx = (uint16_t)iclass; - - //Check if previous size classes can be merged - if (iclass >= SMALL_CLASS_COUNT) { - size_t prevclass = iclass; - while (prevclass > 0) { - --prevclass; - //A class can be merged if number of pages and number of blocks are equal - if (_memory_size_class[prevclass].block_count == _memory_size_class[iclass].block_count) - memcpy(_memory_size_class + prevclass, _memory_size_class + iclass, sizeof(_memory_size_class[iclass])); - else - break; - } - } -} - -//! Initialize the allocator and setup global data -TRACY_API int -rpmalloc_initialize(void) { - if (_rpmalloc_initialized) { - rpmalloc_thread_initialize(); - return 0; - } - return rpmalloc_initialize_config(0); -} - -int -rpmalloc_initialize_config(const rpmalloc_config_t* config) { - if (_rpmalloc_initialized) { - rpmalloc_thread_initialize(); - return 0; - } - _rpmalloc_initialized = 1; - - if (config) - memcpy(&_memory_config, config, sizeof(rpmalloc_config_t)); - else - memset(&_memory_config, 0, sizeof(rpmalloc_config_t)); - - if (!_memory_config.memory_map || !_memory_config.memory_unmap) { - _memory_config.memory_map = _rpmalloc_mmap_os; - _memory_config.memory_unmap = _rpmalloc_unmap_os; - } - -#if PLATFORM_WINDOWS - SYSTEM_INFO system_info; - memset(&system_info, 0, sizeof(system_info)); - GetSystemInfo(&system_info); - _memory_map_granularity = system_info.dwAllocationGranularity; -#else - _memory_map_granularity = (size_t)sysconf(_SC_PAGESIZE); -#endif - -#if RPMALLOC_CONFIGURABLE - _memory_page_size = _memory_config.page_size; -#else - _memory_page_size = 0; -#endif - _memory_huge_pages = 0; - if (!_memory_page_size) { -#if PLATFORM_WINDOWS - _memory_page_size = system_info.dwPageSize; -#else - _memory_page_size = _memory_map_granularity; - if (_memory_config.enable_huge_pages) { -#if defined(__linux__) - size_t huge_page_size = 0; - FILE* meminfo = fopen("/proc/meminfo", "r"); - if (meminfo) { - char line[128]; - while (!huge_page_size && fgets(line, sizeof(line) - 1, meminfo)) { - line[sizeof(line) - 1] = 0; - if (strstr(line, "Hugepagesize:")) - huge_page_size = (size_t)strtol(line + 13, 0, 10) * 1024; - } - fclose(meminfo); - } - if (huge_page_size) { - _memory_huge_pages = 1; - _memory_page_size = huge_page_size; - _memory_map_granularity = huge_page_size; - } -#elif defined(__FreeBSD__) - int rc; - size_t sz = sizeof(rc); - - if (sysctlbyname("vm.pmap.pg_ps_enabled", &rc, &sz, NULL, 0) == 0 && rc == 1) { - _memory_huge_pages = 1; - _memory_page_size = 2 * 1024 * 1024; - _memory_map_granularity = _memory_page_size; - } -#elif defined(__APPLE__) || defined(__NetBSD__) - _memory_huge_pages = 1; - _memory_page_size = 2 * 1024 * 1024; - _memory_map_granularity = _memory_page_size; -#endif - } -#endif - } else { - if (_memory_config.enable_huge_pages) - _memory_huge_pages = 1; - } - -#if PLATFORM_WINDOWS - if (_memory_config.enable_huge_pages) { - HANDLE token = 0; - size_t large_page_minimum = GetLargePageMinimum(); - if (large_page_minimum) - OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token); - if (token) { - LUID luid; - if (LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &luid)) { - TOKEN_PRIVILEGES token_privileges; - memset(&token_privileges, 0, sizeof(token_privileges)); - token_privileges.PrivilegeCount = 1; - token_privileges.Privileges[0].Luid = luid; - token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; - if (AdjustTokenPrivileges(token, FALSE, &token_privileges, 0, 0, 0)) { - if (GetLastError() == ERROR_SUCCESS) - _memory_huge_pages = 1; - } - } - CloseHandle(token); - } - if (_memory_huge_pages) { - if (large_page_minimum > _memory_page_size) - _memory_page_size = large_page_minimum; - if (large_page_minimum > _memory_map_granularity) - _memory_map_granularity = large_page_minimum; - } - } -#endif - - size_t min_span_size = 256; - size_t max_page_size; -#if UINTPTR_MAX > 0xFFFFFFFF - max_page_size = 4096ULL * 1024ULL * 1024ULL; -#else - max_page_size = 4 * 1024 * 1024; -#endif - if (_memory_page_size < min_span_size) - _memory_page_size = min_span_size; - if (_memory_page_size > max_page_size) - _memory_page_size = max_page_size; - _memory_page_size_shift = 0; - size_t page_size_bit = _memory_page_size; - while (page_size_bit != 1) { - ++_memory_page_size_shift; - page_size_bit >>= 1; - } - _memory_page_size = ((size_t)1 << _memory_page_size_shift); - -#if RPMALLOC_CONFIGURABLE - if (!_memory_config.span_size) { - _memory_span_size = _memory_default_span_size; - _memory_span_size_shift = _memory_default_span_size_shift; - _memory_span_mask = _memory_default_span_mask; - } else { - size_t span_size = _memory_config.span_size; - if (span_size > (256 * 1024)) - span_size = (256 * 1024); - _memory_span_size = 4096; - _memory_span_size_shift = 12; - while (_memory_span_size < span_size) { - _memory_span_size <<= 1; - ++_memory_span_size_shift; - } - _memory_span_mask = ~(uintptr_t)(_memory_span_size - 1); - } -#endif - - _memory_span_map_count = ( _memory_config.span_map_count ? _memory_config.span_map_count : DEFAULT_SPAN_MAP_COUNT); - if ((_memory_span_size * _memory_span_map_count) < _memory_page_size) - _memory_span_map_count = (_memory_page_size / _memory_span_size); - if ((_memory_page_size >= _memory_span_size) && ((_memory_span_map_count * _memory_span_size) % _memory_page_size)) - _memory_span_map_count = (_memory_page_size / _memory_span_size); - _memory_heap_reserve_count = (_memory_span_map_count > DEFAULT_SPAN_MAP_COUNT) ? DEFAULT_SPAN_MAP_COUNT : _memory_span_map_count; - - _memory_config.page_size = _memory_page_size; - _memory_config.span_size = _memory_span_size; - _memory_config.span_map_count = _memory_span_map_count; - _memory_config.enable_huge_pages = _memory_huge_pages; - -#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) || defined(__TINYC__) - if (pthread_key_create(&_memory_thread_heap, _rpmalloc_heap_release_raw_fc)) - return -1; -#endif -#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) - fls_key = FlsAlloc(&_rpmalloc_thread_destructor); -#endif - - //Setup all small and medium size classes - size_t iclass = 0; - _memory_size_class[iclass].block_size = SMALL_GRANULARITY; - _rpmalloc_adjust_size_class(iclass); - for (iclass = 1; iclass < SMALL_CLASS_COUNT; ++iclass) { - size_t size = iclass * SMALL_GRANULARITY; - _memory_size_class[iclass].block_size = (uint32_t)size; - _rpmalloc_adjust_size_class(iclass); - } - //At least two blocks per span, then fall back to large allocations - _memory_medium_size_limit = (_memory_span_size - SPAN_HEADER_SIZE) >> 1; - if (_memory_medium_size_limit > MEDIUM_SIZE_LIMIT) - _memory_medium_size_limit = MEDIUM_SIZE_LIMIT; - for (iclass = 0; iclass < MEDIUM_CLASS_COUNT; ++iclass) { - size_t size = SMALL_SIZE_LIMIT + ((iclass + 1) * MEDIUM_GRANULARITY); - if (size > _memory_medium_size_limit) - break; - _memory_size_class[SMALL_CLASS_COUNT + iclass].block_size = (uint32_t)size; - _rpmalloc_adjust_size_class(SMALL_CLASS_COUNT + iclass); - } - - _memory_orphan_heaps = 0; -#if RPMALLOC_FIRST_CLASS_HEAPS - _memory_first_class_orphan_heaps = 0; -#endif -#if ENABLE_STATISTICS - atomic_store32(&_memory_active_heaps, 0); - atomic_store32(&_mapped_pages, 0); - _mapped_pages_peak = 0; - atomic_store32(&_master_spans, 0); - atomic_store32(&_mapped_total, 0); - atomic_store32(&_unmapped_total, 0); - atomic_store32(&_mapped_pages_os, 0); - atomic_store32(&_huge_pages_current, 0); - _huge_pages_peak = 0; -#endif - memset(_memory_heaps, 0, sizeof(_memory_heaps)); - atomic_store32_release(&_memory_global_lock, 0); - - //Initialize this thread - rpmalloc_thread_initialize(); - return 0; -} - -//! Finalize the allocator -TRACY_API void -rpmalloc_finalize(void) { - rpmalloc_thread_finalize(1); - //rpmalloc_dump_statistics(stdout); - - if (_memory_global_reserve) { - atomic_add32(&_memory_global_reserve_master->remaining_spans, -(int32_t)_memory_global_reserve_count); - _memory_global_reserve_master = 0; - _memory_global_reserve_count = 0; - _memory_global_reserve = 0; - } - atomic_store32_release(&_memory_global_lock, 0); - - //Free all thread caches and fully free spans - for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) { - heap_t* heap = _memory_heaps[list_idx]; - while (heap) { - heap_t* next_heap = heap->next_heap; - heap->finalize = 1; - _rpmalloc_heap_global_finalize(heap); - heap = next_heap; - } - } - -#if ENABLE_GLOBAL_CACHE - //Free global caches - for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) - _rpmalloc_global_cache_finalize(&_memory_span_cache[iclass]); -#endif - -#if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD - pthread_key_delete(_memory_thread_heap); -#endif -#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) - FlsFree(fls_key); - fls_key = 0; -#endif -#if ENABLE_STATISTICS - //If you hit these asserts you probably have memory leaks (perhaps global scope data doing dynamic allocations) or double frees in your code - rpmalloc_assert(atomic_load32(&_mapped_pages) == 0, "Memory leak detected"); - rpmalloc_assert(atomic_load32(&_mapped_pages_os) == 0, "Memory leak detected"); -#endif - - _rpmalloc_initialized = 0; -} - -//! Initialize thread, assign heap -TRACY_API void -rpmalloc_thread_initialize(void) { - if (!get_thread_heap_raw()) { - heap_t* heap = _rpmalloc_heap_allocate(0); - if (heap) { - _rpmalloc_stat_inc(&_memory_active_heaps); - set_thread_heap(heap); -#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) - FlsSetValue(fls_key, heap); -#endif - } - } -} - -//! Finalize thread, orphan heap -TRACY_API void -rpmalloc_thread_finalize(int release_caches) { - heap_t* heap = get_thread_heap_raw(); - if (heap) - _rpmalloc_heap_release_raw(heap, release_caches); - set_thread_heap(0); -#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) - FlsSetValue(fls_key, 0); -#endif -} - -int -rpmalloc_is_thread_initialized(void) { - return (get_thread_heap_raw() != 0) ? 1 : 0; -} - -const rpmalloc_config_t* -rpmalloc_config(void) { - return &_memory_config; -} - -// Extern interface - -TRACY_API RPMALLOC_ALLOCATOR void* -rpmalloc(size_t size) { -#if ENABLE_VALIDATE_ARGS - if (size >= MAX_ALLOC_SIZE) { - errno = EINVAL; - return 0; - } -#endif - heap_t* heap = get_thread_heap(); - return _rpmalloc_allocate(heap, size); -} - -TRACY_API void -rpfree(void* ptr) { - _rpmalloc_deallocate(ptr); -} - -extern inline RPMALLOC_ALLOCATOR void* -rpcalloc(size_t num, size_t size) { - size_t total; -#if ENABLE_VALIDATE_ARGS -#if PLATFORM_WINDOWS - int err = SizeTMult(num, size, &total); - if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) { - errno = EINVAL; - return 0; - } -#else - int err = __builtin_umull_overflow(num, size, &total); - if (err || (total >= MAX_ALLOC_SIZE)) { - errno = EINVAL; - return 0; - } -#endif -#else - total = num * size; -#endif - heap_t* heap = get_thread_heap(); - void* block = _rpmalloc_allocate(heap, total); - if (block) - memset(block, 0, total); - return block; -} - -TRACY_API RPMALLOC_ALLOCATOR void* -rprealloc(void* ptr, size_t size) { -#if ENABLE_VALIDATE_ARGS - if (size >= MAX_ALLOC_SIZE) { - errno = EINVAL; - return ptr; - } -#endif - heap_t* heap = get_thread_heap(); - return _rpmalloc_reallocate(heap, ptr, size, 0, 0); -} - -extern RPMALLOC_ALLOCATOR void* -rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, - unsigned int flags) { -#if ENABLE_VALIDATE_ARGS - if ((size + alignment < size) || (alignment > _memory_page_size)) { - errno = EINVAL; - return 0; - } -#endif - heap_t* heap = get_thread_heap(); - return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, oldsize, flags); -} - -extern RPMALLOC_ALLOCATOR void* -rpaligned_alloc(size_t alignment, size_t size) { - heap_t* heap = get_thread_heap(); - return _rpmalloc_aligned_allocate(heap, alignment, size); -} - -extern inline RPMALLOC_ALLOCATOR void* -rpaligned_calloc(size_t alignment, size_t num, size_t size) { - size_t total; -#if ENABLE_VALIDATE_ARGS -#if PLATFORM_WINDOWS - int err = SizeTMult(num, size, &total); - if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) { - errno = EINVAL; - return 0; - } -#else - int err = __builtin_umull_overflow(num, size, &total); - if (err || (total >= MAX_ALLOC_SIZE)) { - errno = EINVAL; - return 0; - } -#endif -#else - total = num * size; -#endif - void* block = rpaligned_alloc(alignment, total); - if (block) - memset(block, 0, total); - return block; -} - -extern inline RPMALLOC_ALLOCATOR void* -rpmemalign(size_t alignment, size_t size) { - return rpaligned_alloc(alignment, size); -} - -extern inline int -rpposix_memalign(void **memptr, size_t alignment, size_t size) { - if (memptr) - *memptr = rpaligned_alloc(alignment, size); - else - return EINVAL; - return *memptr ? 0 : ENOMEM; -} - -extern inline size_t -rpmalloc_usable_size(void* ptr) { - return (ptr ? _rpmalloc_usable_size(ptr) : 0); -} - -extern inline void -rpmalloc_thread_collect(void) { -} - -void -rpmalloc_thread_statistics(rpmalloc_thread_statistics_t* stats) { - memset(stats, 0, sizeof(rpmalloc_thread_statistics_t)); - heap_t* heap = get_thread_heap_raw(); - if (!heap) - return; - - for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { - size_class_t* size_class = _memory_size_class + iclass; - span_t* span = heap->size_class[iclass].partial_span; - while (span) { - size_t free_count = span->list_size; - size_t block_count = size_class->block_count; - if (span->free_list_limit < block_count) - block_count = span->free_list_limit; - free_count += (block_count - span->used_count); - stats->sizecache = free_count * size_class->block_size; - span = span->next; - } - } - -#if ENABLE_THREAD_CACHE - for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { - span_cache_t* span_cache; - if (!iclass) - span_cache = &heap->span_cache; - else - span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); - stats->spancache = span_cache->count * (iclass + 1) * _memory_span_size; - } -#endif - - span_t* deferred = (span_t*)atomic_load_ptr(&heap->span_free_deferred); - while (deferred) { - if (deferred->size_class != SIZE_CLASS_HUGE) - stats->spancache = (size_t)deferred->span_count * _memory_span_size; - deferred = (span_t*)deferred->free_list; - } - -#if ENABLE_STATISTICS - stats->thread_to_global = (size_t)atomic_load64(&heap->thread_to_global); - stats->global_to_thread = (size_t)atomic_load64(&heap->global_to_thread); - - for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { - stats->span_use[iclass].current = (size_t)atomic_load32(&heap->span_use[iclass].current); - stats->span_use[iclass].peak = (size_t)atomic_load32(&heap->span_use[iclass].high); - stats->span_use[iclass].to_global = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_global); - stats->span_use[iclass].from_global = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_global); - stats->span_use[iclass].to_cache = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache); - stats->span_use[iclass].from_cache = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache); - stats->span_use[iclass].to_reserved = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved); - stats->span_use[iclass].from_reserved = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved); - stats->span_use[iclass].map_calls = (size_t)atomic_load32(&heap->span_use[iclass].spans_map_calls); - } - for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { - stats->size_use[iclass].alloc_current = (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_current); - stats->size_use[iclass].alloc_peak = (size_t)heap->size_class_use[iclass].alloc_peak; - stats->size_use[iclass].alloc_total = (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_total); - stats->size_use[iclass].free_total = (size_t)atomic_load32(&heap->size_class_use[iclass].free_total); - stats->size_use[iclass].spans_to_cache = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache); - stats->size_use[iclass].spans_from_cache = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache); - stats->size_use[iclass].spans_from_reserved = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_reserved); - stats->size_use[iclass].map_calls = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_map_calls); - } -#endif -} - -void -rpmalloc_global_statistics(rpmalloc_global_statistics_t* stats) { - memset(stats, 0, sizeof(rpmalloc_global_statistics_t)); -#if ENABLE_STATISTICS - stats->mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size; - stats->mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size; - stats->mapped_total = (size_t)atomic_load32(&_mapped_total) * _memory_page_size; - stats->unmapped_total = (size_t)atomic_load32(&_unmapped_total) * _memory_page_size; - stats->huge_alloc = (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size; - stats->huge_alloc_peak = (size_t)_huge_pages_peak * _memory_page_size; -#endif -#if ENABLE_GLOBAL_CACHE - for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) - stats->cached += _memory_span_cache[iclass].count * (iclass + 1) * _memory_span_size; -#endif -} - -#if ENABLE_STATISTICS - -static void -_memory_heap_dump_statistics(heap_t* heap, void* file) { - fprintf(file, "Heap %d stats:\n", heap->id); - fprintf(file, "Class CurAlloc PeakAlloc TotAlloc TotFree BlkSize BlkCount SpansCur SpansPeak PeakAllocMiB ToCacheMiB FromCacheMiB FromReserveMiB MmapCalls\n"); - for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { - if (!atomic_load32(&heap->size_class_use[iclass].alloc_total)) - continue; - fprintf(file, "%3u: %10u %10u %10u %10u %8u %8u %8d %9d %13zu %11zu %12zu %14zu %9u\n", (uint32_t)iclass, - atomic_load32(&heap->size_class_use[iclass].alloc_current), - heap->size_class_use[iclass].alloc_peak, - atomic_load32(&heap->size_class_use[iclass].alloc_total), - atomic_load32(&heap->size_class_use[iclass].free_total), - _memory_size_class[iclass].block_size, - _memory_size_class[iclass].block_count, - atomic_load32(&heap->size_class_use[iclass].spans_current), - heap->size_class_use[iclass].spans_peak, - ((size_t)heap->size_class_use[iclass].alloc_peak * (size_t)_memory_size_class[iclass].block_size) / (size_t)(1024 * 1024), - ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache) * _memory_span_size) / (size_t)(1024 * 1024), - ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache) * _memory_span_size) / (size_t)(1024 * 1024), - ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_reserved) * _memory_span_size) / (size_t)(1024 * 1024), - atomic_load32(&heap->size_class_use[iclass].spans_map_calls)); - } - fprintf(file, "Spans Current Peak Deferred PeakMiB Cached ToCacheMiB FromCacheMiB ToReserveMiB FromReserveMiB ToGlobalMiB FromGlobalMiB MmapCalls\n"); - for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { - if (!atomic_load32(&heap->span_use[iclass].high) && !atomic_load32(&heap->span_use[iclass].spans_map_calls)) - continue; - fprintf(file, "%4u: %8d %8u %8u %8zu %7u %11zu %12zu %12zu %14zu %11zu %13zu %10u\n", (uint32_t)(iclass + 1), - atomic_load32(&heap->span_use[iclass].current), - atomic_load32(&heap->span_use[iclass].high), - atomic_load32(&heap->span_use[iclass].spans_deferred), - ((size_t)atomic_load32(&heap->span_use[iclass].high) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024), -#if ENABLE_THREAD_CACHE - (unsigned int)(!iclass ? heap->span_cache.count : heap->span_large_cache[iclass - 1].count), - ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024), - ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024), -#else - 0, (size_t)0, (size_t)0, -#endif - ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024), - ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024), - ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_global) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024), - ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_global) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024), - atomic_load32(&heap->span_use[iclass].spans_map_calls)); - } - fprintf(file, "Full spans: %zu\n", heap->full_span_count); - fprintf(file, "ThreadToGlobalMiB GlobalToThreadMiB\n"); - fprintf(file, "%17zu %17zu\n", (size_t)atomic_load64(&heap->thread_to_global) / (size_t)(1024 * 1024), (size_t)atomic_load64(&heap->global_to_thread) / (size_t)(1024 * 1024)); -} - -#endif - -void -rpmalloc_dump_statistics(void* file) { -#if ENABLE_STATISTICS - for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) { - heap_t* heap = _memory_heaps[list_idx]; - while (heap) { - int need_dump = 0; - for (size_t iclass = 0; !need_dump && (iclass < SIZE_CLASS_COUNT); ++iclass) { - if (!atomic_load32(&heap->size_class_use[iclass].alloc_total)) { - rpmalloc_assert(!atomic_load32(&heap->size_class_use[iclass].free_total), "Heap statistics counter mismatch"); - rpmalloc_assert(!atomic_load32(&heap->size_class_use[iclass].spans_map_calls), "Heap statistics counter mismatch"); - continue; - } - need_dump = 1; - } - for (size_t iclass = 0; !need_dump && (iclass < LARGE_CLASS_COUNT); ++iclass) { - if (!atomic_load32(&heap->span_use[iclass].high) && !atomic_load32(&heap->span_use[iclass].spans_map_calls)) - continue; - need_dump = 1; - } - if (need_dump) - _memory_heap_dump_statistics(heap, file); - heap = heap->next_heap; - } - } - fprintf(file, "Global stats:\n"); - size_t huge_current = (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size; - size_t huge_peak = (size_t)_huge_pages_peak * _memory_page_size; - fprintf(file, "HugeCurrentMiB HugePeakMiB\n"); - fprintf(file, "%14zu %11zu\n", huge_current / (size_t)(1024 * 1024), huge_peak / (size_t)(1024 * 1024)); - - fprintf(file, "GlobalCacheMiB\n"); - for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { - global_cache_t* cache = _memory_span_cache + iclass; - size_t global_cache = (size_t)cache->count * iclass * _memory_span_size; - - size_t global_overflow_cache = 0; - span_t* span = cache->overflow; - while (span) { - global_overflow_cache += iclass * _memory_span_size; - span = span->next; - } - if (global_cache || global_overflow_cache || cache->insert_count || cache->extract_count) - fprintf(file, "%4zu: %8zuMiB (%8zuMiB overflow) %14zu insert %14zu extract\n", iclass + 1, global_cache / (size_t)(1024 * 1024), global_overflow_cache / (size_t)(1024 * 1024), cache->insert_count, cache->extract_count); - } - - size_t mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size; - size_t mapped_os = (size_t)atomic_load32(&_mapped_pages_os) * _memory_page_size; - size_t mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size; - size_t mapped_total = (size_t)atomic_load32(&_mapped_total) * _memory_page_size; - size_t unmapped_total = (size_t)atomic_load32(&_unmapped_total) * _memory_page_size; - fprintf(file, "MappedMiB MappedOSMiB MappedPeakMiB MappedTotalMiB UnmappedTotalMiB\n"); - fprintf(file, "%9zu %11zu %13zu %14zu %16zu\n", - mapped / (size_t)(1024 * 1024), - mapped_os / (size_t)(1024 * 1024), - mapped_peak / (size_t)(1024 * 1024), - mapped_total / (size_t)(1024 * 1024), - unmapped_total / (size_t)(1024 * 1024)); - - fprintf(file, "\n"); -#if 0 - int64_t allocated = atomic_load64(&_allocation_counter); - int64_t deallocated = atomic_load64(&_deallocation_counter); - fprintf(file, "Allocation count: %lli\n", allocated); - fprintf(file, "Deallocation count: %lli\n", deallocated); - fprintf(file, "Current allocations: %lli\n", (allocated - deallocated)); - fprintf(file, "Master spans: %d\n", atomic_load32(&_master_spans)); - fprintf(file, "Dangling master spans: %d\n", atomic_load32(&_unmapped_master_spans)); -#endif -#endif - (void)sizeof(file); -} - -#if RPMALLOC_FIRST_CLASS_HEAPS - -extern inline rpmalloc_heap_t* -rpmalloc_heap_acquire(void) { - // Must be a pristine heap from newly mapped memory pages, or else memory blocks - // could already be allocated from the heap which would (wrongly) be released when - // heap is cleared with rpmalloc_heap_free_all(). Also heaps guaranteed to be - // pristine from the dedicated orphan list can be used. - heap_t* heap = _rpmalloc_heap_allocate(1); - heap->owner_thread = 0; - _rpmalloc_stat_inc(&_memory_active_heaps); - return heap; -} - -extern inline void -rpmalloc_heap_release(rpmalloc_heap_t* heap) { - if (heap) - _rpmalloc_heap_release(heap, 1, 1); -} - -extern inline RPMALLOC_ALLOCATOR void* -rpmalloc_heap_alloc(rpmalloc_heap_t* heap, size_t size) { -#if ENABLE_VALIDATE_ARGS - if (size >= MAX_ALLOC_SIZE) { - errno = EINVAL; - return 0; - } -#endif - return _rpmalloc_allocate(heap, size); -} - -extern inline RPMALLOC_ALLOCATOR void* -rpmalloc_heap_aligned_alloc(rpmalloc_heap_t* heap, size_t alignment, size_t size) { -#if ENABLE_VALIDATE_ARGS - if (size >= MAX_ALLOC_SIZE) { - errno = EINVAL; - return 0; - } -#endif - return _rpmalloc_aligned_allocate(heap, alignment, size); -} - -extern inline RPMALLOC_ALLOCATOR void* -rpmalloc_heap_calloc(rpmalloc_heap_t* heap, size_t num, size_t size) { - return rpmalloc_heap_aligned_calloc(heap, 0, num, size); -} - -extern inline RPMALLOC_ALLOCATOR void* -rpmalloc_heap_aligned_calloc(rpmalloc_heap_t* heap, size_t alignment, size_t num, size_t size) { - size_t total; -#if ENABLE_VALIDATE_ARGS -#if PLATFORM_WINDOWS - int err = SizeTMult(num, size, &total); - if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) { - errno = EINVAL; - return 0; - } -#else - int err = __builtin_umull_overflow(num, size, &total); - if (err || (total >= MAX_ALLOC_SIZE)) { - errno = EINVAL; - return 0; - } -#endif -#else - total = num * size; -#endif - void* block = _rpmalloc_aligned_allocate(heap, alignment, total); - if (block) - memset(block, 0, total); - return block; -} - -extern inline RPMALLOC_ALLOCATOR void* -rpmalloc_heap_realloc(rpmalloc_heap_t* heap, void* ptr, size_t size, unsigned int flags) { -#if ENABLE_VALIDATE_ARGS - if (size >= MAX_ALLOC_SIZE) { - errno = EINVAL; - return ptr; - } -#endif - return _rpmalloc_reallocate(heap, ptr, size, 0, flags); -} - -extern inline RPMALLOC_ALLOCATOR void* -rpmalloc_heap_aligned_realloc(rpmalloc_heap_t* heap, void* ptr, size_t alignment, size_t size, unsigned int flags) { -#if ENABLE_VALIDATE_ARGS - if ((size + alignment < size) || (alignment > _memory_page_size)) { - errno = EINVAL; - return 0; - } -#endif - return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, 0, flags); -} - -extern inline void -rpmalloc_heap_free(rpmalloc_heap_t* heap, void* ptr) { - (void)sizeof(heap); - _rpmalloc_deallocate(ptr); -} - -extern inline void -rpmalloc_heap_free_all(rpmalloc_heap_t* heap) { - span_t* span; - span_t* next_span; - - _rpmalloc_heap_cache_adopt_deferred(heap, 0); - - for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { - span = heap->size_class[iclass].partial_span; - while (span) { - next_span = span->next; - _rpmalloc_heap_cache_insert(heap, span); - span = next_span; - } - heap->size_class[iclass].partial_span = 0; - span = heap->full_span[iclass]; - while (span) { - next_span = span->next; - _rpmalloc_heap_cache_insert(heap, span); - span = next_span; - } - } - memset(heap->size_class, 0, sizeof(heap->size_class)); - memset(heap->full_span, 0, sizeof(heap->full_span)); - - span = heap->large_huge_span; - while (span) { - next_span = span->next; - if (UNEXPECTED(span->size_class == SIZE_CLASS_HUGE)) - _rpmalloc_deallocate_huge(span); - else - _rpmalloc_heap_cache_insert(heap, span); - span = next_span; - } - heap->large_huge_span = 0; - heap->full_span_count = 0; - -#if ENABLE_THREAD_CACHE - for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { - span_cache_t* span_cache; - if (!iclass) - span_cache = &heap->span_cache; - else - span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); - if (!span_cache->count) - continue; -#if ENABLE_GLOBAL_CACHE - _rpmalloc_stat_add64(&heap->thread_to_global, span_cache->count * (iclass + 1) * _memory_span_size); - _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global, span_cache->count); - _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1, span_cache->count); -#else - for (size_t ispan = 0; ispan < span_cache->count; ++ispan) - _rpmalloc_span_unmap(span_cache->span[ispan]); -#endif - span_cache->count = 0; - } -#endif - -#if ENABLE_STATISTICS - for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { - atomic_store32(&heap->size_class_use[iclass].alloc_current, 0); - atomic_store32(&heap->size_class_use[iclass].spans_current, 0); - } - for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { - atomic_store32(&heap->span_use[iclass].current, 0); - } -#endif -} - -extern inline void -rpmalloc_heap_thread_set_current(rpmalloc_heap_t* heap) { - heap_t* prev_heap = get_thread_heap_raw(); - if (prev_heap != heap) { - set_thread_heap(heap); - if (prev_heap) - rpmalloc_heap_release(prev_heap); - } -} - -#endif - -} - -#endif diff --git a/src/third_party/tracy/client/tracy_rpmalloc.hpp b/src/third_party/tracy/client/tracy_rpmalloc.hpp deleted file mode 100644 index 51216a21..00000000 --- a/src/third_party/tracy/client/tracy_rpmalloc.hpp +++ /dev/null @@ -1,363 +0,0 @@ -/* rpmalloc.h - Memory allocator - Public Domain - 2016 Mattias Jansson - * - * This library provides a cross-platform lock free thread caching malloc implementation in C11. - * The latest source code is always available at - * - * https://github.com/mjansson/rpmalloc - * - * This library is put in the public domain; you can redistribute it and/or modify it without any restrictions. - * - */ - -#pragma once - -#include -#include "../common/TracyApi.h" - -namespace tracy -{ - -#if defined(__clang__) || defined(__GNUC__) -# define RPMALLOC_EXPORT __attribute__((visibility("default"))) -# define RPMALLOC_ALLOCATOR -# if (defined(__clang_major__) && (__clang_major__ < 4)) || (defined(__GNUC__) && defined(ENABLE_PRELOAD) && ENABLE_PRELOAD) -# define RPMALLOC_ATTRIB_MALLOC -# define RPMALLOC_ATTRIB_ALLOC_SIZE(size) -# define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size) -# else -# define RPMALLOC_ATTRIB_MALLOC __attribute__((__malloc__)) -# define RPMALLOC_ATTRIB_ALLOC_SIZE(size) __attribute__((alloc_size(size))) -# define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size) __attribute__((alloc_size(count, size))) -# endif -# define RPMALLOC_CDECL -#elif defined(_MSC_VER) -# define RPMALLOC_EXPORT -# define RPMALLOC_ALLOCATOR __declspec(allocator) __declspec(restrict) -# define RPMALLOC_ATTRIB_MALLOC -# define RPMALLOC_ATTRIB_ALLOC_SIZE(size) -# define RPMALLOC_ATTRIB_ALLOC_SIZE2(count,size) -# define RPMALLOC_CDECL __cdecl -#else -# define RPMALLOC_EXPORT -# define RPMALLOC_ALLOCATOR -# define RPMALLOC_ATTRIB_MALLOC -# define RPMALLOC_ATTRIB_ALLOC_SIZE(size) -# define RPMALLOC_ATTRIB_ALLOC_SIZE2(count,size) -# define RPMALLOC_CDECL -#endif - -//! Define RPMALLOC_CONFIGURABLE to enable configuring sizes. Will introduce -// a very small overhead due to some size calculations not being compile time constants -#ifndef RPMALLOC_CONFIGURABLE -#define RPMALLOC_CONFIGURABLE 0 -#endif - -//! Define RPMALLOC_FIRST_CLASS_HEAPS to enable heap based API (rpmalloc_heap_* functions). -// Will introduce a very small overhead to track fully allocated spans in heaps -#ifndef RPMALLOC_FIRST_CLASS_HEAPS -#define RPMALLOC_FIRST_CLASS_HEAPS 0 -#endif - -//! Flag to rpaligned_realloc to not preserve content in reallocation -#define RPMALLOC_NO_PRESERVE 1 -//! Flag to rpaligned_realloc to fail and return null pointer if grow cannot be done in-place, -// in which case the original pointer is still valid (just like a call to realloc which failes to allocate -// a new block). -#define RPMALLOC_GROW_OR_FAIL 2 - -typedef struct rpmalloc_global_statistics_t { - //! Current amount of virtual memory mapped, all of which might not have been committed (only if ENABLE_STATISTICS=1) - size_t mapped; - //! Peak amount of virtual memory mapped, all of which might not have been committed (only if ENABLE_STATISTICS=1) - size_t mapped_peak; - //! Current amount of memory in global caches for small and medium sizes (<32KiB) - size_t cached; - //! Current amount of memory allocated in huge allocations, i.e larger than LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1) - size_t huge_alloc; - //! Peak amount of memory allocated in huge allocations, i.e larger than LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1) - size_t huge_alloc_peak; - //! Total amount of memory mapped since initialization (only if ENABLE_STATISTICS=1) - size_t mapped_total; - //! Total amount of memory unmapped since initialization (only if ENABLE_STATISTICS=1) - size_t unmapped_total; -} rpmalloc_global_statistics_t; - -typedef struct rpmalloc_thread_statistics_t { - //! Current number of bytes available in thread size class caches for small and medium sizes (<32KiB) - size_t sizecache; - //! Current number of bytes available in thread span caches for small and medium sizes (<32KiB) - size_t spancache; - //! Total number of bytes transitioned from thread cache to global cache (only if ENABLE_STATISTICS=1) - size_t thread_to_global; - //! Total number of bytes transitioned from global cache to thread cache (only if ENABLE_STATISTICS=1) - size_t global_to_thread; - //! Per span count statistics (only if ENABLE_STATISTICS=1) - struct { - //! Currently used number of spans - size_t current; - //! High water mark of spans used - size_t peak; - //! Number of spans transitioned to global cache - size_t to_global; - //! Number of spans transitioned from global cache - size_t from_global; - //! Number of spans transitioned to thread cache - size_t to_cache; - //! Number of spans transitioned from thread cache - size_t from_cache; - //! Number of spans transitioned to reserved state - size_t to_reserved; - //! Number of spans transitioned from reserved state - size_t from_reserved; - //! Number of raw memory map calls (not hitting the reserve spans but resulting in actual OS mmap calls) - size_t map_calls; - } span_use[64]; - //! Per size class statistics (only if ENABLE_STATISTICS=1) - struct { - //! Current number of allocations - size_t alloc_current; - //! Peak number of allocations - size_t alloc_peak; - //! Total number of allocations - size_t alloc_total; - //! Total number of frees - size_t free_total; - //! Number of spans transitioned to cache - size_t spans_to_cache; - //! Number of spans transitioned from cache - size_t spans_from_cache; - //! Number of spans transitioned from reserved state - size_t spans_from_reserved; - //! Number of raw memory map calls (not hitting the reserve spans but resulting in actual OS mmap calls) - size_t map_calls; - } size_use[128]; -} rpmalloc_thread_statistics_t; - -typedef struct rpmalloc_config_t { - //! Map memory pages for the given number of bytes. The returned address MUST be - // aligned to the rpmalloc span size, which will always be a power of two. - // Optionally the function can store an alignment offset in the offset variable - // in case it performs alignment and the returned pointer is offset from the - // actual start of the memory region due to this alignment. The alignment offset - // will be passed to the memory unmap function. The alignment offset MUST NOT be - // larger than 65535 (storable in an uint16_t), if it is you must use natural - // alignment to shift it into 16 bits. If you set a memory_map function, you - // must also set a memory_unmap function or else the default implementation will - // be used for both. This function must be thread safe, it can be called by - // multiple threads simultaneously. - void* (*memory_map)(size_t size, size_t* offset); - //! Unmap the memory pages starting at address and spanning the given number of bytes. - // If release is set to non-zero, the unmap is for an entire span range as returned by - // a previous call to memory_map and that the entire range should be released. The - // release argument holds the size of the entire span range. If release is set to 0, - // the unmap is a partial decommit of a subset of the mapped memory range. - // If you set a memory_unmap function, you must also set a memory_map function or - // else the default implementation will be used for both. This function must be thread - // safe, it can be called by multiple threads simultaneously. - void (*memory_unmap)(void* address, size_t size, size_t offset, size_t release); - //! Called when an assert fails, if asserts are enabled. Will use the standard assert() - // if this is not set. - void (*error_callback)(const char* message); - //! Called when a call to map memory pages fails (out of memory). If this callback is - // not set or returns zero the library will return a null pointer in the allocation - // call. If this callback returns non-zero the map call will be retried. The argument - // passed is the number of bytes that was requested in the map call. Only used if - // the default system memory map function is used (memory_map callback is not set). - int (*map_fail_callback)(size_t size); - //! Size of memory pages. The page size MUST be a power of two. All memory mapping - // requests to memory_map will be made with size set to a multiple of the page size. - // Used if RPMALLOC_CONFIGURABLE is defined to 1, otherwise system page size is used. - size_t page_size; - //! Size of a span of memory blocks. MUST be a power of two, and in [4096,262144] - // range (unless 0 - set to 0 to use the default span size). Used if RPMALLOC_CONFIGURABLE - // is defined to 1. - size_t span_size; - //! Number of spans to map at each request to map new virtual memory blocks. This can - // be used to minimize the system call overhead at the cost of virtual memory address - // space. The extra mapped pages will not be written until actually used, so physical - // committed memory should not be affected in the default implementation. Will be - // aligned to a multiple of spans that match memory page size in case of huge pages. - size_t span_map_count; - //! Enable use of large/huge pages. If this flag is set to non-zero and page size is - // zero, the allocator will try to enable huge pages and auto detect the configuration. - // If this is set to non-zero and page_size is also non-zero, the allocator will - // assume huge pages have been configured and enabled prior to initializing the - // allocator. - // For Windows, see https://docs.microsoft.com/en-us/windows/desktop/memory/large-page-support - // For Linux, see https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt - int enable_huge_pages; - //! Respectively allocated pages and huge allocated pages names for systems - // supporting it to be able to distinguish among anonymous regions. - const char *page_name; - const char *huge_page_name; -} rpmalloc_config_t; - -//! Initialize allocator with default configuration -TRACY_API int -rpmalloc_initialize(void); - -//! Initialize allocator with given configuration -RPMALLOC_EXPORT int -rpmalloc_initialize_config(const rpmalloc_config_t* config); - -//! Get allocator configuration -RPMALLOC_EXPORT const rpmalloc_config_t* -rpmalloc_config(void); - -//! Finalize allocator -TRACY_API void -rpmalloc_finalize(void); - -//! Initialize allocator for calling thread -TRACY_API void -rpmalloc_thread_initialize(void); - -//! Finalize allocator for calling thread -TRACY_API void -rpmalloc_thread_finalize(int release_caches); - -//! Perform deferred deallocations pending for the calling thread heap -RPMALLOC_EXPORT void -rpmalloc_thread_collect(void); - -//! Query if allocator is initialized for calling thread -RPMALLOC_EXPORT int -rpmalloc_is_thread_initialized(void); - -//! Get per-thread statistics -RPMALLOC_EXPORT void -rpmalloc_thread_statistics(rpmalloc_thread_statistics_t* stats); - -//! Get global statistics -RPMALLOC_EXPORT void -rpmalloc_global_statistics(rpmalloc_global_statistics_t* stats); - -//! Dump all statistics in human readable format to file (should be a FILE*) -RPMALLOC_EXPORT void -rpmalloc_dump_statistics(void* file); - -//! Allocate a memory block of at least the given size -TRACY_API RPMALLOC_ALLOCATOR void* -rpmalloc(size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(1); - -//! Free the given memory block -TRACY_API void -rpfree(void* ptr); - -//! Allocate a memory block of at least the given size and zero initialize it -RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void* -rpcalloc(size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(1, 2); - -//! Reallocate the given block to at least the given size -TRACY_API RPMALLOC_ALLOCATOR void* -rprealloc(void* ptr, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2); - -//! Reallocate the given block to at least the given size and alignment, -// with optional control flags (see RPMALLOC_NO_PRESERVE). -// Alignment must be a power of two and a multiple of sizeof(void*), -// and should ideally be less than memory page size. A caveat of rpmalloc -// internals is that this must also be strictly less than the span size (default 64KiB) -RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void* -rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(3); - -//! Allocate a memory block of at least the given size and alignment. -// Alignment must be a power of two and a multiple of sizeof(void*), -// and should ideally be less than memory page size. A caveat of rpmalloc -// internals is that this must also be strictly less than the span size (default 64KiB) -RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void* -rpaligned_alloc(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2); - -//! Allocate a memory block of at least the given size and alignment, and zero initialize it. -// Alignment must be a power of two and a multiple of sizeof(void*), -// and should ideally be less than memory page size. A caveat of rpmalloc -// internals is that this must also be strictly less than the span size (default 64KiB) -RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void* -rpaligned_calloc(size_t alignment, size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3); - -//! Allocate a memory block of at least the given size and alignment. -// Alignment must be a power of two and a multiple of sizeof(void*), -// and should ideally be less than memory page size. A caveat of rpmalloc -// internals is that this must also be strictly less than the span size (default 64KiB) -RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void* -rpmemalign(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2); - -//! Allocate a memory block of at least the given size and alignment. -// Alignment must be a power of two and a multiple of sizeof(void*), -// and should ideally be less than memory page size. A caveat of rpmalloc -// internals is that this must also be strictly less than the span size (default 64KiB) -RPMALLOC_EXPORT int -rpposix_memalign(void** memptr, size_t alignment, size_t size); - -//! Query the usable size of the given memory block (from given pointer to the end of block) -RPMALLOC_EXPORT size_t -rpmalloc_usable_size(void* ptr); - -#if RPMALLOC_FIRST_CLASS_HEAPS - -//! Heap type -typedef struct heap_t rpmalloc_heap_t; - -//! Acquire a new heap. Will reuse existing released heaps or allocate memory for a new heap -// if none available. Heap API is implemented with the strict assumption that only one single -// thread will call heap functions for a given heap at any given time, no functions are thread safe. -RPMALLOC_EXPORT rpmalloc_heap_t* -rpmalloc_heap_acquire(void); - -//! Release a heap (does NOT free the memory allocated by the heap, use rpmalloc_heap_free_all before destroying the heap). -// Releasing a heap will enable it to be reused by other threads. Safe to pass a null pointer. -RPMALLOC_EXPORT void -rpmalloc_heap_release(rpmalloc_heap_t* heap); - -//! Allocate a memory block of at least the given size using the given heap. -RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void* -rpmalloc_heap_alloc(rpmalloc_heap_t* heap, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2); - -//! Allocate a memory block of at least the given size using the given heap. The returned -// block will have the requested alignment. Alignment must be a power of two and a multiple of sizeof(void*), -// and should ideally be less than memory page size. A caveat of rpmalloc -// internals is that this must also be strictly less than the span size (default 64KiB). -RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void* -rpmalloc_heap_aligned_alloc(rpmalloc_heap_t* heap, size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(3); - -//! Allocate a memory block of at least the given size using the given heap and zero initialize it. -RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void* -rpmalloc_heap_calloc(rpmalloc_heap_t* heap, size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3); - -//! Allocate a memory block of at least the given size using the given heap and zero initialize it. The returned -// block will have the requested alignment. Alignment must either be zero, or a power of two and a multiple of sizeof(void*), -// and should ideally be less than memory page size. A caveat of rpmalloc -// internals is that this must also be strictly less than the span size (default 64KiB). -RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void* -rpmalloc_heap_aligned_calloc(rpmalloc_heap_t* heap, size_t alignment, size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3); - -//! Reallocate the given block to at least the given size. The memory block MUST be allocated -// by the same heap given to this function. -RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void* -rpmalloc_heap_realloc(rpmalloc_heap_t* heap, void* ptr, size_t size, unsigned int flags) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(3); - -//! Reallocate the given block to at least the given size. The memory block MUST be allocated -// by the same heap given to this function. The returned block will have the requested alignment. -// Alignment must be either zero, or a power of two and a multiple of sizeof(void*), and should ideally be -// less than memory page size. A caveat of rpmalloc internals is that this must also be strictly less than -// the span size (default 64KiB). -RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void* -rpmalloc_heap_aligned_realloc(rpmalloc_heap_t* heap, void* ptr, size_t alignment, size_t size, unsigned int flags) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(4); - -//! Free the given memory block from the given heap. The memory block MUST be allocated -// by the same heap given to this function. -RPMALLOC_EXPORT void -rpmalloc_heap_free(rpmalloc_heap_t* heap, void* ptr); - -//! Free all memory allocated by the heap -RPMALLOC_EXPORT void -rpmalloc_heap_free_all(rpmalloc_heap_t* heap); - -//! Set the given heap as the current heap for the calling thread. A heap MUST only be current heap -// for a single thread, a heap can never be shared between multiple threads. The previous -// current heap for the calling thread is released to be reused by other threads. -RPMALLOC_EXPORT void -rpmalloc_heap_thread_set_current(rpmalloc_heap_t* heap); - -#endif - -} diff --git a/src/third_party/tracy/common/TracyAlign.hpp b/src/third_party/tracy/common/TracyAlign.hpp deleted file mode 100644 index c3531ba0..00000000 --- a/src/third_party/tracy/common/TracyAlign.hpp +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef __TRACYALIGN_HPP__ -#define __TRACYALIGN_HPP__ - -#include - -#include "TracyForceInline.hpp" - -namespace tracy -{ - -template -tracy_force_inline T MemRead( const void* ptr ) -{ - T val; - memcpy( &val, ptr, sizeof( T ) ); - return val; -} - -template -tracy_force_inline void MemWrite( void* ptr, T val ) -{ - memcpy( ptr, &val, sizeof( T ) ); -} - -} - -#endif diff --git a/src/third_party/tracy/common/TracyAlloc.hpp b/src/third_party/tracy/common/TracyAlloc.hpp deleted file mode 100644 index ddb0e5df..00000000 --- a/src/third_party/tracy/common/TracyAlloc.hpp +++ /dev/null @@ -1,72 +0,0 @@ -#ifndef __TRACYALLOC_HPP__ -#define __TRACYALLOC_HPP__ - -#include - -#if defined TRACY_ENABLE && !defined __EMSCRIPTEN__ -# include "TracyApi.h" -# include "TracyForceInline.hpp" -# include "../client/tracy_rpmalloc.hpp" -# define TRACY_USE_RPMALLOC -#endif - -namespace tracy -{ - -#ifdef TRACY_USE_RPMALLOC -TRACY_API void InitRpmalloc(); -#else -static inline void InitRpmalloc() {} -#endif - -static inline void* tracy_malloc( size_t size ) -{ -#ifdef TRACY_USE_RPMALLOC - InitRpmalloc(); - return rpmalloc( size ); -#else - return malloc( size ); -#endif -} - -static inline void* tracy_malloc_fast( size_t size ) -{ -#ifdef TRACY_USE_RPMALLOC - return rpmalloc( size ); -#else - return malloc( size ); -#endif -} - -static inline void tracy_free( void* ptr ) -{ -#ifdef TRACY_USE_RPMALLOC - InitRpmalloc(); - rpfree( ptr ); -#else - free( ptr ); -#endif -} - -static inline void tracy_free_fast( void* ptr ) -{ -#ifdef TRACY_USE_RPMALLOC - rpfree( ptr ); -#else - free( ptr ); -#endif -} - -static inline void* tracy_realloc( void* ptr, size_t size ) -{ -#ifdef TRACY_USE_RPMALLOC - InitRpmalloc(); - return rprealloc( ptr, size ); -#else - return realloc( ptr, size ); -#endif -} - -} - -#endif diff --git a/src/third_party/tracy/common/TracyApi.h b/src/third_party/tracy/common/TracyApi.h deleted file mode 100644 index f396ce0c..00000000 --- a/src/third_party/tracy/common/TracyApi.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef __TRACYAPI_H__ -#define __TRACYAPI_H__ - -#if defined _WIN32 -# if defined TRACY_EXPORTS -# define TRACY_API __declspec(dllexport) -# elif defined TRACY_IMPORTS -# define TRACY_API __declspec(dllimport) -# else -# define TRACY_API -# endif -#else -# define TRACY_API __attribute__((visibility("default"))) -#endif - -#endif // __TRACYAPI_H__ diff --git a/src/third_party/tracy/common/TracyColor.hpp b/src/third_party/tracy/common/TracyColor.hpp deleted file mode 100644 index 4825c0fb..00000000 --- a/src/third_party/tracy/common/TracyColor.hpp +++ /dev/null @@ -1,690 +0,0 @@ -#ifndef __TRACYCOLOR_HPP__ -#define __TRACYCOLOR_HPP__ - -namespace tracy -{ -struct Color -{ -enum ColorType -{ - Snow = 0xfffafa, - GhostWhite = 0xf8f8ff, - WhiteSmoke = 0xf5f5f5, - Gainsboro = 0xdcdcdc, - FloralWhite = 0xfffaf0, - OldLace = 0xfdf5e6, - Linen = 0xfaf0e6, - AntiqueWhite = 0xfaebd7, - PapayaWhip = 0xffefd5, - BlanchedAlmond = 0xffebcd, - Bisque = 0xffe4c4, - PeachPuff = 0xffdab9, - NavajoWhite = 0xffdead, - Moccasin = 0xffe4b5, - Cornsilk = 0xfff8dc, - Ivory = 0xfffff0, - LemonChiffon = 0xfffacd, - Seashell = 0xfff5ee, - Honeydew = 0xf0fff0, - MintCream = 0xf5fffa, - Azure = 0xf0ffff, - AliceBlue = 0xf0f8ff, - Lavender = 0xe6e6fa, - LavenderBlush = 0xfff0f5, - MistyRose = 0xffe4e1, - White = 0xffffff, - Black = 0x000000, - DarkSlateGray = 0x2f4f4f, - DarkSlateGrey = 0x2f4f4f, - DimGray = 0x696969, - DimGrey = 0x696969, - SlateGray = 0x708090, - SlateGrey = 0x708090, - LightSlateGray = 0x778899, - LightSlateGrey = 0x778899, - Gray = 0xbebebe, - Grey = 0xbebebe, - X11Gray = 0xbebebe, - X11Grey = 0xbebebe, - WebGray = 0x808080, - WebGrey = 0x808080, - LightGrey = 0xd3d3d3, - LightGray = 0xd3d3d3, - MidnightBlue = 0x191970, - Navy = 0x000080, - NavyBlue = 0x000080, - CornflowerBlue = 0x6495ed, - DarkSlateBlue = 0x483d8b, - SlateBlue = 0x6a5acd, - MediumSlateBlue = 0x7b68ee, - LightSlateBlue = 0x8470ff, - MediumBlue = 0x0000cd, - RoyalBlue = 0x4169e1, - Blue = 0x0000ff, - DodgerBlue = 0x1e90ff, - DeepSkyBlue = 0x00bfff, - SkyBlue = 0x87ceeb, - LightSkyBlue = 0x87cefa, - SteelBlue = 0x4682b4, - LightSteelBlue = 0xb0c4de, - LightBlue = 0xadd8e6, - PowderBlue = 0xb0e0e6, - PaleTurquoise = 0xafeeee, - DarkTurquoise = 0x00ced1, - MediumTurquoise = 0x48d1cc, - Turquoise = 0x40e0d0, - Cyan = 0x00ffff, - Aqua = 0x00ffff, - LightCyan = 0xe0ffff, - CadetBlue = 0x5f9ea0, - MediumAquamarine = 0x66cdaa, - Aquamarine = 0x7fffd4, - DarkGreen = 0x006400, - DarkOliveGreen = 0x556b2f, - DarkSeaGreen = 0x8fbc8f, - SeaGreen = 0x2e8b57, - MediumSeaGreen = 0x3cb371, - LightSeaGreen = 0x20b2aa, - PaleGreen = 0x98fb98, - SpringGreen = 0x00ff7f, - LawnGreen = 0x7cfc00, - Green = 0x00ff00, - Lime = 0x00ff00, - X11Green = 0x00ff00, - WebGreen = 0x008000, - Chartreuse = 0x7fff00, - MediumSpringGreen = 0x00fa9a, - GreenYellow = 0xadff2f, - LimeGreen = 0x32cd32, - YellowGreen = 0x9acd32, - ForestGreen = 0x228b22, - OliveDrab = 0x6b8e23, - DarkKhaki = 0xbdb76b, - Khaki = 0xf0e68c, - PaleGoldenrod = 0xeee8aa, - LightGoldenrodYellow = 0xfafad2, - LightYellow = 0xffffe0, - Yellow = 0xffff00, - Gold = 0xffd700, - LightGoldenrod = 0xeedd82, - Goldenrod = 0xdaa520, - DarkGoldenrod = 0xb8860b, - RosyBrown = 0xbc8f8f, - IndianRed = 0xcd5c5c, - SaddleBrown = 0x8b4513, - Sienna = 0xa0522d, - Peru = 0xcd853f, - Burlywood = 0xdeb887, - Beige = 0xf5f5dc, - Wheat = 0xf5deb3, - SandyBrown = 0xf4a460, - Tan = 0xd2b48c, - Chocolate = 0xd2691e, - Firebrick = 0xb22222, - Brown = 0xa52a2a, - DarkSalmon = 0xe9967a, - Salmon = 0xfa8072, - LightSalmon = 0xffa07a, - Orange = 0xffa500, - DarkOrange = 0xff8c00, - Coral = 0xff7f50, - LightCoral = 0xf08080, - Tomato = 0xff6347, - OrangeRed = 0xff4500, - Red = 0xff0000, - HotPink = 0xff69b4, - DeepPink = 0xff1493, - Pink = 0xffc0cb, - LightPink = 0xffb6c1, - PaleVioletRed = 0xdb7093, - Maroon = 0xb03060, - X11Maroon = 0xb03060, - WebMaroon = 0x800000, - MediumVioletRed = 0xc71585, - VioletRed = 0xd02090, - Magenta = 0xff00ff, - Fuchsia = 0xff00ff, - Violet = 0xee82ee, - Plum = 0xdda0dd, - Orchid = 0xda70d6, - MediumOrchid = 0xba55d3, - DarkOrchid = 0x9932cc, - DarkViolet = 0x9400d3, - BlueViolet = 0x8a2be2, - Purple = 0xa020f0, - X11Purple = 0xa020f0, - WebPurple = 0x800080, - MediumPurple = 0x9370db, - Thistle = 0xd8bfd8, - Snow1 = 0xfffafa, - Snow2 = 0xeee9e9, - Snow3 = 0xcdc9c9, - Snow4 = 0x8b8989, - Seashell1 = 0xfff5ee, - Seashell2 = 0xeee5de, - Seashell3 = 0xcdc5bf, - Seashell4 = 0x8b8682, - AntiqueWhite1 = 0xffefdb, - AntiqueWhite2 = 0xeedfcc, - AntiqueWhite3 = 0xcdc0b0, - AntiqueWhite4 = 0x8b8378, - Bisque1 = 0xffe4c4, - Bisque2 = 0xeed5b7, - Bisque3 = 0xcdb79e, - Bisque4 = 0x8b7d6b, - PeachPuff1 = 0xffdab9, - PeachPuff2 = 0xeecbad, - PeachPuff3 = 0xcdaf95, - PeachPuff4 = 0x8b7765, - NavajoWhite1 = 0xffdead, - NavajoWhite2 = 0xeecfa1, - NavajoWhite3 = 0xcdb38b, - NavajoWhite4 = 0x8b795e, - LemonChiffon1 = 0xfffacd, - LemonChiffon2 = 0xeee9bf, - LemonChiffon3 = 0xcdc9a5, - LemonChiffon4 = 0x8b8970, - Cornsilk1 = 0xfff8dc, - Cornsilk2 = 0xeee8cd, - Cornsilk3 = 0xcdc8b1, - Cornsilk4 = 0x8b8878, - Ivory1 = 0xfffff0, - Ivory2 = 0xeeeee0, - Ivory3 = 0xcdcdc1, - Ivory4 = 0x8b8b83, - Honeydew1 = 0xf0fff0, - Honeydew2 = 0xe0eee0, - Honeydew3 = 0xc1cdc1, - Honeydew4 = 0x838b83, - LavenderBlush1 = 0xfff0f5, - LavenderBlush2 = 0xeee0e5, - LavenderBlush3 = 0xcdc1c5, - LavenderBlush4 = 0x8b8386, - MistyRose1 = 0xffe4e1, - MistyRose2 = 0xeed5d2, - MistyRose3 = 0xcdb7b5, - MistyRose4 = 0x8b7d7b, - Azure1 = 0xf0ffff, - Azure2 = 0xe0eeee, - Azure3 = 0xc1cdcd, - Azure4 = 0x838b8b, - SlateBlue1 = 0x836fff, - SlateBlue2 = 0x7a67ee, - SlateBlue3 = 0x6959cd, - SlateBlue4 = 0x473c8b, - RoyalBlue1 = 0x4876ff, - RoyalBlue2 = 0x436eee, - RoyalBlue3 = 0x3a5fcd, - RoyalBlue4 = 0x27408b, - Blue1 = 0x0000ff, - Blue2 = 0x0000ee, - Blue3 = 0x0000cd, - Blue4 = 0x00008b, - DodgerBlue1 = 0x1e90ff, - DodgerBlue2 = 0x1c86ee, - DodgerBlue3 = 0x1874cd, - DodgerBlue4 = 0x104e8b, - SteelBlue1 = 0x63b8ff, - SteelBlue2 = 0x5cacee, - SteelBlue3 = 0x4f94cd, - SteelBlue4 = 0x36648b, - DeepSkyBlue1 = 0x00bfff, - DeepSkyBlue2 = 0x00b2ee, - DeepSkyBlue3 = 0x009acd, - DeepSkyBlue4 = 0x00688b, - SkyBlue1 = 0x87ceff, - SkyBlue2 = 0x7ec0ee, - SkyBlue3 = 0x6ca6cd, - SkyBlue4 = 0x4a708b, - LightSkyBlue1 = 0xb0e2ff, - LightSkyBlue2 = 0xa4d3ee, - LightSkyBlue3 = 0x8db6cd, - LightSkyBlue4 = 0x607b8b, - SlateGray1 = 0xc6e2ff, - SlateGray2 = 0xb9d3ee, - SlateGray3 = 0x9fb6cd, - SlateGray4 = 0x6c7b8b, - LightSteelBlue1 = 0xcae1ff, - LightSteelBlue2 = 0xbcd2ee, - LightSteelBlue3 = 0xa2b5cd, - LightSteelBlue4 = 0x6e7b8b, - LightBlue1 = 0xbfefff, - LightBlue2 = 0xb2dfee, - LightBlue3 = 0x9ac0cd, - LightBlue4 = 0x68838b, - LightCyan1 = 0xe0ffff, - LightCyan2 = 0xd1eeee, - LightCyan3 = 0xb4cdcd, - LightCyan4 = 0x7a8b8b, - PaleTurquoise1 = 0xbbffff, - PaleTurquoise2 = 0xaeeeee, - PaleTurquoise3 = 0x96cdcd, - PaleTurquoise4 = 0x668b8b, - CadetBlue1 = 0x98f5ff, - CadetBlue2 = 0x8ee5ee, - CadetBlue3 = 0x7ac5cd, - CadetBlue4 = 0x53868b, - Turquoise1 = 0x00f5ff, - Turquoise2 = 0x00e5ee, - Turquoise3 = 0x00c5cd, - Turquoise4 = 0x00868b, - Cyan1 = 0x00ffff, - Cyan2 = 0x00eeee, - Cyan3 = 0x00cdcd, - Cyan4 = 0x008b8b, - DarkSlateGray1 = 0x97ffff, - DarkSlateGray2 = 0x8deeee, - DarkSlateGray3 = 0x79cdcd, - DarkSlateGray4 = 0x528b8b, - Aquamarine1 = 0x7fffd4, - Aquamarine2 = 0x76eec6, - Aquamarine3 = 0x66cdaa, - Aquamarine4 = 0x458b74, - DarkSeaGreen1 = 0xc1ffc1, - DarkSeaGreen2 = 0xb4eeb4, - DarkSeaGreen3 = 0x9bcd9b, - DarkSeaGreen4 = 0x698b69, - SeaGreen1 = 0x54ff9f, - SeaGreen2 = 0x4eee94, - SeaGreen3 = 0x43cd80, - SeaGreen4 = 0x2e8b57, - PaleGreen1 = 0x9aff9a, - PaleGreen2 = 0x90ee90, - PaleGreen3 = 0x7ccd7c, - PaleGreen4 = 0x548b54, - SpringGreen1 = 0x00ff7f, - SpringGreen2 = 0x00ee76, - SpringGreen3 = 0x00cd66, - SpringGreen4 = 0x008b45, - Green1 = 0x00ff00, - Green2 = 0x00ee00, - Green3 = 0x00cd00, - Green4 = 0x008b00, - Chartreuse1 = 0x7fff00, - Chartreuse2 = 0x76ee00, - Chartreuse3 = 0x66cd00, - Chartreuse4 = 0x458b00, - OliveDrab1 = 0xc0ff3e, - OliveDrab2 = 0xb3ee3a, - OliveDrab3 = 0x9acd32, - OliveDrab4 = 0x698b22, - DarkOliveGreen1 = 0xcaff70, - DarkOliveGreen2 = 0xbcee68, - DarkOliveGreen3 = 0xa2cd5a, - DarkOliveGreen4 = 0x6e8b3d, - Khaki1 = 0xfff68f, - Khaki2 = 0xeee685, - Khaki3 = 0xcdc673, - Khaki4 = 0x8b864e, - LightGoldenrod1 = 0xffec8b, - LightGoldenrod2 = 0xeedc82, - LightGoldenrod3 = 0xcdbe70, - LightGoldenrod4 = 0x8b814c, - LightYellow1 = 0xffffe0, - LightYellow2 = 0xeeeed1, - LightYellow3 = 0xcdcdb4, - LightYellow4 = 0x8b8b7a, - Yellow1 = 0xffff00, - Yellow2 = 0xeeee00, - Yellow3 = 0xcdcd00, - Yellow4 = 0x8b8b00, - Gold1 = 0xffd700, - Gold2 = 0xeec900, - Gold3 = 0xcdad00, - Gold4 = 0x8b7500, - Goldenrod1 = 0xffc125, - Goldenrod2 = 0xeeb422, - Goldenrod3 = 0xcd9b1d, - Goldenrod4 = 0x8b6914, - DarkGoldenrod1 = 0xffb90f, - DarkGoldenrod2 = 0xeead0e, - DarkGoldenrod3 = 0xcd950c, - DarkGoldenrod4 = 0x8b6508, - RosyBrown1 = 0xffc1c1, - RosyBrown2 = 0xeeb4b4, - RosyBrown3 = 0xcd9b9b, - RosyBrown4 = 0x8b6969, - IndianRed1 = 0xff6a6a, - IndianRed2 = 0xee6363, - IndianRed3 = 0xcd5555, - IndianRed4 = 0x8b3a3a, - Sienna1 = 0xff8247, - Sienna2 = 0xee7942, - Sienna3 = 0xcd6839, - Sienna4 = 0x8b4726, - Burlywood1 = 0xffd39b, - Burlywood2 = 0xeec591, - Burlywood3 = 0xcdaa7d, - Burlywood4 = 0x8b7355, - Wheat1 = 0xffe7ba, - Wheat2 = 0xeed8ae, - Wheat3 = 0xcdba96, - Wheat4 = 0x8b7e66, - Tan1 = 0xffa54f, - Tan2 = 0xee9a49, - Tan3 = 0xcd853f, - Tan4 = 0x8b5a2b, - Chocolate1 = 0xff7f24, - Chocolate2 = 0xee7621, - Chocolate3 = 0xcd661d, - Chocolate4 = 0x8b4513, - Firebrick1 = 0xff3030, - Firebrick2 = 0xee2c2c, - Firebrick3 = 0xcd2626, - Firebrick4 = 0x8b1a1a, - Brown1 = 0xff4040, - Brown2 = 0xee3b3b, - Brown3 = 0xcd3333, - Brown4 = 0x8b2323, - Salmon1 = 0xff8c69, - Salmon2 = 0xee8262, - Salmon3 = 0xcd7054, - Salmon4 = 0x8b4c39, - LightSalmon1 = 0xffa07a, - LightSalmon2 = 0xee9572, - LightSalmon3 = 0xcd8162, - LightSalmon4 = 0x8b5742, - Orange1 = 0xffa500, - Orange2 = 0xee9a00, - Orange3 = 0xcd8500, - Orange4 = 0x8b5a00, - DarkOrange1 = 0xff7f00, - DarkOrange2 = 0xee7600, - DarkOrange3 = 0xcd6600, - DarkOrange4 = 0x8b4500, - Coral1 = 0xff7256, - Coral2 = 0xee6a50, - Coral3 = 0xcd5b45, - Coral4 = 0x8b3e2f, - Tomato1 = 0xff6347, - Tomato2 = 0xee5c42, - Tomato3 = 0xcd4f39, - Tomato4 = 0x8b3626, - OrangeRed1 = 0xff4500, - OrangeRed2 = 0xee4000, - OrangeRed3 = 0xcd3700, - OrangeRed4 = 0x8b2500, - Red1 = 0xff0000, - Red2 = 0xee0000, - Red3 = 0xcd0000, - Red4 = 0x8b0000, - DeepPink1 = 0xff1493, - DeepPink2 = 0xee1289, - DeepPink3 = 0xcd1076, - DeepPink4 = 0x8b0a50, - HotPink1 = 0xff6eb4, - HotPink2 = 0xee6aa7, - HotPink3 = 0xcd6090, - HotPink4 = 0x8b3a62, - Pink1 = 0xffb5c5, - Pink2 = 0xeea9b8, - Pink3 = 0xcd919e, - Pink4 = 0x8b636c, - LightPink1 = 0xffaeb9, - LightPink2 = 0xeea2ad, - LightPink3 = 0xcd8c95, - LightPink4 = 0x8b5f65, - PaleVioletRed1 = 0xff82ab, - PaleVioletRed2 = 0xee799f, - PaleVioletRed3 = 0xcd6889, - PaleVioletRed4 = 0x8b475d, - Maroon1 = 0xff34b3, - Maroon2 = 0xee30a7, - Maroon3 = 0xcd2990, - Maroon4 = 0x8b1c62, - VioletRed1 = 0xff3e96, - VioletRed2 = 0xee3a8c, - VioletRed3 = 0xcd3278, - VioletRed4 = 0x8b2252, - Magenta1 = 0xff00ff, - Magenta2 = 0xee00ee, - Magenta3 = 0xcd00cd, - Magenta4 = 0x8b008b, - Orchid1 = 0xff83fa, - Orchid2 = 0xee7ae9, - Orchid3 = 0xcd69c9, - Orchid4 = 0x8b4789, - Plum1 = 0xffbbff, - Plum2 = 0xeeaeee, - Plum3 = 0xcd96cd, - Plum4 = 0x8b668b, - MediumOrchid1 = 0xe066ff, - MediumOrchid2 = 0xd15fee, - MediumOrchid3 = 0xb452cd, - MediumOrchid4 = 0x7a378b, - DarkOrchid1 = 0xbf3eff, - DarkOrchid2 = 0xb23aee, - DarkOrchid3 = 0x9a32cd, - DarkOrchid4 = 0x68228b, - Purple1 = 0x9b30ff, - Purple2 = 0x912cee, - Purple3 = 0x7d26cd, - Purple4 = 0x551a8b, - MediumPurple1 = 0xab82ff, - MediumPurple2 = 0x9f79ee, - MediumPurple3 = 0x8968cd, - MediumPurple4 = 0x5d478b, - Thistle1 = 0xffe1ff, - Thistle2 = 0xeed2ee, - Thistle3 = 0xcdb5cd, - Thistle4 = 0x8b7b8b, - Gray0 = 0x000000, - Grey0 = 0x000000, - Gray1 = 0x030303, - Grey1 = 0x030303, - Gray2 = 0x050505, - Grey2 = 0x050505, - Gray3 = 0x080808, - Grey3 = 0x080808, - Gray4 = 0x0a0a0a, - Grey4 = 0x0a0a0a, - Gray5 = 0x0d0d0d, - Grey5 = 0x0d0d0d, - Gray6 = 0x0f0f0f, - Grey6 = 0x0f0f0f, - Gray7 = 0x121212, - Grey7 = 0x121212, - Gray8 = 0x141414, - Grey8 = 0x141414, - Gray9 = 0x171717, - Grey9 = 0x171717, - Gray10 = 0x1a1a1a, - Grey10 = 0x1a1a1a, - Gray11 = 0x1c1c1c, - Grey11 = 0x1c1c1c, - Gray12 = 0x1f1f1f, - Grey12 = 0x1f1f1f, - Gray13 = 0x212121, - Grey13 = 0x212121, - Gray14 = 0x242424, - Grey14 = 0x242424, - Gray15 = 0x262626, - Grey15 = 0x262626, - Gray16 = 0x292929, - Grey16 = 0x292929, - Gray17 = 0x2b2b2b, - Grey17 = 0x2b2b2b, - Gray18 = 0x2e2e2e, - Grey18 = 0x2e2e2e, - Gray19 = 0x303030, - Grey19 = 0x303030, - Gray20 = 0x333333, - Grey20 = 0x333333, - Gray21 = 0x363636, - Grey21 = 0x363636, - Gray22 = 0x383838, - Grey22 = 0x383838, - Gray23 = 0x3b3b3b, - Grey23 = 0x3b3b3b, - Gray24 = 0x3d3d3d, - Grey24 = 0x3d3d3d, - Gray25 = 0x404040, - Grey25 = 0x404040, - Gray26 = 0x424242, - Grey26 = 0x424242, - Gray27 = 0x454545, - Grey27 = 0x454545, - Gray28 = 0x474747, - Grey28 = 0x474747, - Gray29 = 0x4a4a4a, - Grey29 = 0x4a4a4a, - Gray30 = 0x4d4d4d, - Grey30 = 0x4d4d4d, - Gray31 = 0x4f4f4f, - Grey31 = 0x4f4f4f, - Gray32 = 0x525252, - Grey32 = 0x525252, - Gray33 = 0x545454, - Grey33 = 0x545454, - Gray34 = 0x575757, - Grey34 = 0x575757, - Gray35 = 0x595959, - Grey35 = 0x595959, - Gray36 = 0x5c5c5c, - Grey36 = 0x5c5c5c, - Gray37 = 0x5e5e5e, - Grey37 = 0x5e5e5e, - Gray38 = 0x616161, - Grey38 = 0x616161, - Gray39 = 0x636363, - Grey39 = 0x636363, - Gray40 = 0x666666, - Grey40 = 0x666666, - Gray41 = 0x696969, - Grey41 = 0x696969, - Gray42 = 0x6b6b6b, - Grey42 = 0x6b6b6b, - Gray43 = 0x6e6e6e, - Grey43 = 0x6e6e6e, - Gray44 = 0x707070, - Grey44 = 0x707070, - Gray45 = 0x737373, - Grey45 = 0x737373, - Gray46 = 0x757575, - Grey46 = 0x757575, - Gray47 = 0x787878, - Grey47 = 0x787878, - Gray48 = 0x7a7a7a, - Grey48 = 0x7a7a7a, - Gray49 = 0x7d7d7d, - Grey49 = 0x7d7d7d, - Gray50 = 0x7f7f7f, - Grey50 = 0x7f7f7f, - Gray51 = 0x828282, - Grey51 = 0x828282, - Gray52 = 0x858585, - Grey52 = 0x858585, - Gray53 = 0x878787, - Grey53 = 0x878787, - Gray54 = 0x8a8a8a, - Grey54 = 0x8a8a8a, - Gray55 = 0x8c8c8c, - Grey55 = 0x8c8c8c, - Gray56 = 0x8f8f8f, - Grey56 = 0x8f8f8f, - Gray57 = 0x919191, - Grey57 = 0x919191, - Gray58 = 0x949494, - Grey58 = 0x949494, - Gray59 = 0x969696, - Grey59 = 0x969696, - Gray60 = 0x999999, - Grey60 = 0x999999, - Gray61 = 0x9c9c9c, - Grey61 = 0x9c9c9c, - Gray62 = 0x9e9e9e, - Grey62 = 0x9e9e9e, - Gray63 = 0xa1a1a1, - Grey63 = 0xa1a1a1, - Gray64 = 0xa3a3a3, - Grey64 = 0xa3a3a3, - Gray65 = 0xa6a6a6, - Grey65 = 0xa6a6a6, - Gray66 = 0xa8a8a8, - Grey66 = 0xa8a8a8, - Gray67 = 0xababab, - Grey67 = 0xababab, - Gray68 = 0xadadad, - Grey68 = 0xadadad, - Gray69 = 0xb0b0b0, - Grey69 = 0xb0b0b0, - Gray70 = 0xb3b3b3, - Grey70 = 0xb3b3b3, - Gray71 = 0xb5b5b5, - Grey71 = 0xb5b5b5, - Gray72 = 0xb8b8b8, - Grey72 = 0xb8b8b8, - Gray73 = 0xbababa, - Grey73 = 0xbababa, - Gray74 = 0xbdbdbd, - Grey74 = 0xbdbdbd, - Gray75 = 0xbfbfbf, - Grey75 = 0xbfbfbf, - Gray76 = 0xc2c2c2, - Grey76 = 0xc2c2c2, - Gray77 = 0xc4c4c4, - Grey77 = 0xc4c4c4, - Gray78 = 0xc7c7c7, - Grey78 = 0xc7c7c7, - Gray79 = 0xc9c9c9, - Grey79 = 0xc9c9c9, - Gray80 = 0xcccccc, - Grey80 = 0xcccccc, - Gray81 = 0xcfcfcf, - Grey81 = 0xcfcfcf, - Gray82 = 0xd1d1d1, - Grey82 = 0xd1d1d1, - Gray83 = 0xd4d4d4, - Grey83 = 0xd4d4d4, - Gray84 = 0xd6d6d6, - Grey84 = 0xd6d6d6, - Gray85 = 0xd9d9d9, - Grey85 = 0xd9d9d9, - Gray86 = 0xdbdbdb, - Grey86 = 0xdbdbdb, - Gray87 = 0xdedede, - Grey87 = 0xdedede, - Gray88 = 0xe0e0e0, - Grey88 = 0xe0e0e0, - Gray89 = 0xe3e3e3, - Grey89 = 0xe3e3e3, - Gray90 = 0xe5e5e5, - Grey90 = 0xe5e5e5, - Gray91 = 0xe8e8e8, - Grey91 = 0xe8e8e8, - Gray92 = 0xebebeb, - Grey92 = 0xebebeb, - Gray93 = 0xededed, - Grey93 = 0xededed, - Gray94 = 0xf0f0f0, - Grey94 = 0xf0f0f0, - Gray95 = 0xf2f2f2, - Grey95 = 0xf2f2f2, - Gray96 = 0xf5f5f5, - Grey96 = 0xf5f5f5, - Gray97 = 0xf7f7f7, - Grey97 = 0xf7f7f7, - Gray98 = 0xfafafa, - Grey98 = 0xfafafa, - Gray99 = 0xfcfcfc, - Grey99 = 0xfcfcfc, - Gray100 = 0xffffff, - Grey100 = 0xffffff, - DarkGrey = 0xa9a9a9, - DarkGray = 0xa9a9a9, - DarkBlue = 0x00008b, - DarkCyan = 0x008b8b, - DarkMagenta = 0x8b008b, - DarkRed = 0x8b0000, - LightGreen = 0x90ee90, - Crimson = 0xdc143c, - Indigo = 0x4b0082, - Olive = 0x808000, - RebeccaPurple = 0x663399, - Silver = 0xc0c0c0, - Teal = 0x008080, -}; -}; -} - -#endif diff --git a/src/third_party/tracy/common/TracyForceInline.hpp b/src/third_party/tracy/common/TracyForceInline.hpp deleted file mode 100644 index b6a5833e..00000000 --- a/src/third_party/tracy/common/TracyForceInline.hpp +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef __TRACYFORCEINLINE_HPP__ -#define __TRACYFORCEINLINE_HPP__ - -#if defined(__GNUC__) -# define tracy_force_inline __attribute__((always_inline)) inline -#elif defined(_MSC_VER) -# define tracy_force_inline __forceinline -#else -# define tracy_force_inline inline -#endif - -#if defined(__GNUC__) -# define tracy_no_inline __attribute__((noinline)) -#elif defined(_MSC_VER) -# define tracy_no_inline __declspec(noinline) -#else -# define tracy_no_inline -#endif - -#endif diff --git a/src/third_party/tracy/common/TracyMutex.hpp b/src/third_party/tracy/common/TracyMutex.hpp deleted file mode 100644 index 57fb01a0..00000000 --- a/src/third_party/tracy/common/TracyMutex.hpp +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef __TRACYMUTEX_HPP__ -#define __TRACYMUTEX_HPP__ - -#if defined _MSC_VER - -# include - -namespace tracy -{ -using TracyMutex = std::shared_mutex; -} - -#else - -#include - -namespace tracy -{ -using TracyMutex = std::mutex; -} - -#endif - -#endif diff --git a/src/third_party/tracy/common/TracyProtocol.hpp b/src/third_party/tracy/common/TracyProtocol.hpp deleted file mode 100644 index 839c24e8..00000000 --- a/src/third_party/tracy/common/TracyProtocol.hpp +++ /dev/null @@ -1,169 +0,0 @@ -#ifndef __TRACYPROTOCOL_HPP__ -#define __TRACYPROTOCOL_HPP__ - -#include -#include - -namespace tracy -{ - -constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; } - -enum : uint32_t { ProtocolVersion = 72 }; -enum : uint16_t { BroadcastVersion = 3 }; - -using lz4sz_t = uint32_t; - -enum { TargetFrameSize = 256 * 1024 }; -enum { LZ4Size = Lz4CompressBound( TargetFrameSize ) }; -static_assert( LZ4Size <= (std::numeric_limits::max)(), "LZ4Size greater than lz4sz_t" ); -static_assert( TargetFrameSize * 2 >= 64 * 1024, "Not enough space for LZ4 stream buffer" ); - -enum { HandshakeShibbolethSize = 8 }; -static const char HandshakeShibboleth[HandshakeShibbolethSize] = { 'T', 'r', 'a', 'c', 'y', 'P', 'r', 'f' }; - -enum HandshakeStatus : uint8_t -{ - HandshakePending, - HandshakeWelcome, - HandshakeProtocolMismatch, - HandshakeNotAvailable, - HandshakeDropped -}; - -enum { WelcomeMessageProgramNameSize = 64 }; -enum { WelcomeMessageHostInfoSize = 1024 }; - -#pragma pack( push, 1 ) - -// Must increase left query space after handling! -enum ServerQuery : uint8_t -{ - ServerQueryTerminate, - ServerQueryString, - ServerQueryThreadString, - ServerQuerySourceLocation, - ServerQueryPlotName, - ServerQueryFrameName, - ServerQueryParameter, - ServerQueryFiberName, - ServerQueryExternalName, - // Items above are high priority. Split order must be preserved. See IsQueryPrio(). - ServerQueryDisconnect, - ServerQueryCallstackFrame, - ServerQuerySymbol, - ServerQuerySymbolCode, - ServerQuerySourceCode, - ServerQueryDataTransfer, - ServerQueryDataTransferPart -}; - -struct ServerQueryPacket -{ - ServerQuery type; - uint64_t ptr; - uint32_t extra; -}; - -enum { ServerQueryPacketSize = sizeof( ServerQueryPacket ) }; - - -enum CpuArchitecture : uint8_t -{ - CpuArchUnknown, - CpuArchX86, - CpuArchX64, - CpuArchArm32, - CpuArchArm64 -}; - - -struct WelcomeFlag -{ - enum _t : uint8_t - { - OnDemand = 1 << 0, - IsApple = 1 << 1, - CodeTransfer = 1 << 2, - CombineSamples = 1 << 3, - IdentifySamples = 1 << 4, - }; -}; - -struct WelcomeMessage -{ - double timerMul; - int64_t initBegin; - int64_t initEnd; - uint64_t delay; - uint64_t resolution; - uint64_t epoch; - uint64_t exectime; - uint64_t pid; - int64_t samplingPeriod; - uint8_t flags; - uint8_t cpuArch; - char cpuManufacturer[12]; - uint32_t cpuId; - char programName[WelcomeMessageProgramNameSize]; - char hostInfo[WelcomeMessageHostInfoSize]; -}; - -enum { WelcomeMessageSize = sizeof( WelcomeMessage ) }; - - -struct OnDemandPayloadMessage -{ - uint64_t frames; - uint64_t currentTime; -}; - -enum { OnDemandPayloadMessageSize = sizeof( OnDemandPayloadMessage ) }; - - -struct BroadcastMessage -{ - uint16_t broadcastVersion; - uint16_t listenPort; - uint32_t protocolVersion; - uint64_t pid; - int32_t activeTime; // in seconds - char programName[WelcomeMessageProgramNameSize]; -}; - -struct BroadcastMessage_v2 -{ - uint16_t broadcastVersion; - uint16_t listenPort; - uint32_t protocolVersion; - int32_t activeTime; - char programName[WelcomeMessageProgramNameSize]; -}; - -struct BroadcastMessage_v1 -{ - uint32_t broadcastVersion; - uint32_t protocolVersion; - uint32_t listenPort; - uint32_t activeTime; - char programName[WelcomeMessageProgramNameSize]; -}; - -struct BroadcastMessage_v0 -{ - uint32_t broadcastVersion; - uint32_t protocolVersion; - uint32_t activeTime; - char programName[WelcomeMessageProgramNameSize]; -}; - -enum { BroadcastMessageSize = sizeof( BroadcastMessage ) }; -enum { BroadcastMessageSize_v2 = sizeof( BroadcastMessage_v2 ) }; -enum { BroadcastMessageSize_v1 = sizeof( BroadcastMessage_v1 ) }; -enum { BroadcastMessageSize_v0 = sizeof( BroadcastMessage_v0 ) }; - -#pragma pack( pop ) - -} - -#endif diff --git a/src/third_party/tracy/common/TracyQueue.hpp b/src/third_party/tracy/common/TracyQueue.hpp deleted file mode 100644 index 6a5b8727..00000000 --- a/src/third_party/tracy/common/TracyQueue.hpp +++ /dev/null @@ -1,919 +0,0 @@ -#ifndef __TRACYQUEUE_HPP__ -#define __TRACYQUEUE_HPP__ - -#include -#include - -namespace tracy -{ - -enum class QueueType : uint8_t -{ - ZoneText, - ZoneName, - Message, - MessageColor, - MessageCallstack, - MessageColorCallstack, - MessageAppInfo, - ZoneBeginAllocSrcLoc, - ZoneBeginAllocSrcLocCallstack, - CallstackSerial, - Callstack, - CallstackAlloc, - CallstackSample, - CallstackSampleContextSwitch, - FrameImage, - ZoneBegin, - ZoneBeginCallstack, - ZoneEnd, - LockWait, - LockObtain, - LockRelease, - LockSharedWait, - LockSharedObtain, - LockSharedRelease, - LockName, - MemAlloc, - MemAllocNamed, - MemFree, - MemFreeNamed, - MemAllocCallstack, - MemAllocCallstackNamed, - MemFreeCallstack, - MemFreeCallstackNamed, - MemDiscard, - MemDiscardCallstack, - GpuZoneBegin, - GpuZoneBeginCallstack, - GpuZoneBeginAllocSrcLoc, - GpuZoneBeginAllocSrcLocCallstack, - GpuZoneEnd, - GpuZoneBeginSerial, - GpuZoneBeginCallstackSerial, - GpuZoneBeginAllocSrcLocSerial, - GpuZoneBeginAllocSrcLocCallstackSerial, - GpuZoneEndSerial, - PlotDataInt, - PlotDataFloat, - PlotDataDouble, - ContextSwitch, - ThreadWakeup, - GpuTime, - GpuContextName, - CallstackFrameSize, - SymbolInformation, - ExternalNameMetadata, - SymbolCodeMetadata, - SourceCodeMetadata, - FiberEnter, - FiberLeave, - Terminate, - KeepAlive, - ThreadContext, - GpuCalibration, - GpuTimeSync, - Crash, - CrashReport, - ZoneValidation, - ZoneColor, - ZoneValue, - FrameMarkMsg, - FrameMarkMsgStart, - FrameMarkMsgEnd, - FrameVsync, - SourceLocation, - LockAnnounce, - LockTerminate, - LockMark, - MessageLiteral, - MessageLiteralColor, - MessageLiteralCallstack, - MessageLiteralColorCallstack, - GpuNewContext, - CallstackFrame, - SysTimeReport, - SysPowerReport, - TidToPid, - HwSampleCpuCycle, - HwSampleInstructionRetired, - HwSampleCacheReference, - HwSampleCacheMiss, - HwSampleBranchRetired, - HwSampleBranchMiss, - PlotConfig, - ParamSetup, - AckServerQueryNoop, - AckSourceCodeNotAvailable, - AckSymbolCodeNotAvailable, - CpuTopology, - SingleStringData, - SecondStringData, - MemNamePayload, - ThreadGroupHint, - StringData, - ThreadName, - PlotName, - SourceLocationPayload, - CallstackPayload, - CallstackAllocPayload, - FrameName, - FrameImageData, - ExternalName, - ExternalThreadName, - SymbolCode, - SourceCode, - FiberName, - NUM_TYPES -}; - -#pragma pack( push, 1 ) - -struct QueueThreadContext -{ - uint32_t thread; -}; - -struct QueueZoneBeginLean -{ - int64_t time; -}; - -struct QueueZoneBegin : public QueueZoneBeginLean -{ - uint64_t srcloc; // ptr -}; - -struct QueueZoneBeginThread : public QueueZoneBegin -{ - uint32_t thread; -}; - -struct QueueZoneEnd -{ - int64_t time; -}; - -struct QueueZoneEndThread : public QueueZoneEnd -{ - uint32_t thread; -}; - -struct QueueZoneValidation -{ - uint32_t id; -}; - -struct QueueZoneValidationThread : public QueueZoneValidation -{ - uint32_t thread; -}; - -struct QueueZoneColor -{ - uint8_t b; - uint8_t g; - uint8_t r; -}; - -struct QueueZoneColorThread : public QueueZoneColor -{ - uint32_t thread; -}; - -struct QueueZoneValue -{ - uint64_t value; -}; - -struct QueueZoneValueThread : public QueueZoneValue -{ - uint32_t thread; -}; - -struct QueueStringTransfer -{ - uint64_t ptr; -}; - -struct QueueFrameMark -{ - int64_t time; - uint64_t name; // ptr -}; - -struct QueueFrameVsync -{ - int64_t time; - uint32_t id; -}; - -struct QueueFrameImage -{ - uint32_t frame; - uint16_t w; - uint16_t h; - uint8_t flip; -}; - -struct QueueFrameImageFat : public QueueFrameImage -{ - uint64_t image; // ptr -}; - -struct QueueSourceLocation -{ - uint64_t name; - uint64_t function; // ptr - uint64_t file; // ptr - uint32_t line; - uint8_t b; - uint8_t g; - uint8_t r; -}; - -struct QueueZoneTextFat -{ - uint64_t text; // ptr - uint16_t size; -}; - -struct QueueZoneTextFatThread : public QueueZoneTextFat -{ - uint32_t thread; -}; - -enum class LockType : uint8_t -{ - Lockable, - SharedLockable -}; - -struct QueueLockAnnounce -{ - uint32_t id; - int64_t time; - uint64_t lckloc; // ptr - LockType type; -}; - -struct QueueFiberEnter -{ - int64_t time; - uint64_t fiber; // ptr - uint32_t thread; - int32_t groupHint; -}; - -struct QueueFiberLeave -{ - int64_t time; - uint32_t thread; -}; - -struct QueueLockTerminate -{ - uint32_t id; - int64_t time; -}; - -struct QueueLockWait -{ - uint32_t thread; - uint32_t id; - int64_t time; -}; - -struct QueueLockObtain -{ - uint32_t thread; - uint32_t id; - int64_t time; -}; - -struct QueueLockRelease -{ - uint32_t id; - int64_t time; -}; - -struct QueueLockReleaseShared : public QueueLockRelease -{ - uint32_t thread; -}; - -struct QueueLockMark -{ - uint32_t thread; - uint32_t id; - uint64_t srcloc; // ptr -}; - -struct QueueLockName -{ - uint32_t id; -}; - -struct QueueLockNameFat : public QueueLockName -{ - uint64_t name; // ptr - uint16_t size; -}; - -struct QueuePlotDataBase -{ - uint64_t name; // ptr - int64_t time; -}; - -struct QueuePlotDataInt : public QueuePlotDataBase -{ - int64_t val; -}; - -struct QueuePlotDataFloat : public QueuePlotDataBase -{ - float val; -}; - -struct QueuePlotDataDouble : public QueuePlotDataBase -{ - double val; -}; - -struct QueueMessage -{ - int64_t time; -}; - -struct QueueMessageColor : public QueueMessage -{ - uint8_t b; - uint8_t g; - uint8_t r; -}; - -struct QueueMessageLiteral : public QueueMessage -{ - uint64_t text; // ptr -}; - -struct QueueMessageLiteralThread : public QueueMessageLiteral -{ - uint32_t thread; -}; - -struct QueueMessageColorLiteral : public QueueMessageColor -{ - uint64_t text; // ptr -}; - -struct QueueMessageColorLiteralThread : public QueueMessageColorLiteral -{ - uint32_t thread; -}; - -struct QueueMessageFat : public QueueMessage -{ - uint64_t text; // ptr - uint16_t size; -}; - -struct QueueMessageFatThread : public QueueMessageFat -{ - uint32_t thread; -}; - -struct QueueMessageColorFat : public QueueMessageColor -{ - uint64_t text; // ptr - uint16_t size; -}; - -struct QueueMessageColorFatThread : public QueueMessageColorFat -{ - uint32_t thread; -}; - -// Don't change order, only add new entries at the end, this is also used on trace dumps! -enum class GpuContextType : uint8_t -{ - Invalid, - OpenGl, - Vulkan, - OpenCL, - Direct3D12, - Direct3D11, - Metal, - Custom -}; - -enum GpuContextFlags : uint8_t -{ - GpuContextCalibration = 1 << 0 -}; - -struct QueueGpuNewContext -{ - int64_t cpuTime; - int64_t gpuTime; - uint32_t thread; - float period; - uint8_t context; - GpuContextFlags flags; - GpuContextType type; -}; - -struct QueueGpuZoneBeginLean -{ - int64_t cpuTime; - uint32_t thread; - uint16_t queryId; - uint8_t context; -}; - -struct QueueGpuZoneBegin : public QueueGpuZoneBeginLean -{ - uint64_t srcloc; -}; - -struct QueueGpuZoneEnd -{ - int64_t cpuTime; - uint32_t thread; - uint16_t queryId; - uint8_t context; -}; - -struct QueueGpuTime -{ - int64_t gpuTime; - uint16_t queryId; - uint8_t context; -}; - -struct QueueGpuCalibration -{ - int64_t gpuTime; - int64_t cpuTime; - int64_t cpuDelta; - uint8_t context; -}; - -struct QueueGpuTimeSync -{ - int64_t gpuTime; - int64_t cpuTime; - uint8_t context; -}; - -struct QueueGpuContextName -{ - uint8_t context; -}; - -struct QueueGpuContextNameFat : public QueueGpuContextName -{ - uint64_t ptr; - uint16_t size; -}; - -struct QueueMemNamePayload -{ - uint64_t name; -}; - -struct QueueThreadGroupHint -{ - uint32_t thread; - int32_t groupHint; -}; - -struct QueueMemAlloc -{ - int64_t time; - uint32_t thread; - uint64_t ptr; - char size[6]; -}; - -struct QueueMemFree -{ - int64_t time; - uint32_t thread; - uint64_t ptr; -}; - -struct QueueMemDiscard -{ - int64_t time; - uint32_t thread; - uint64_t name; -}; - -struct QueueCallstackFat -{ - uint64_t ptr; -}; - -struct QueueCallstackFatThread : public QueueCallstackFat -{ - uint32_t thread; -}; - -struct QueueCallstackAllocFat -{ - uint64_t ptr; - uint64_t nativePtr; -}; - -struct QueueCallstackAllocFatThread : public QueueCallstackAllocFat -{ - uint32_t thread; -}; - -struct QueueCallstackSample -{ - int64_t time; - uint32_t thread; -}; - -struct QueueCallstackSampleFat : public QueueCallstackSample -{ - uint64_t ptr; -}; - -struct QueueCallstackFrameSize -{ - uint64_t ptr; - uint8_t size; -}; - -struct QueueCallstackFrameSizeFat : public QueueCallstackFrameSize -{ - uint64_t data; - uint64_t imageName; -}; - -struct QueueCallstackFrame -{ - uint32_t line; - uint64_t symAddr; - uint32_t symLen; -}; - -struct QueueSymbolInformation -{ - uint32_t line; - uint64_t symAddr; -}; - -struct QueueSymbolInformationFat : public QueueSymbolInformation -{ - uint64_t fileString; - uint8_t needFree; -}; - -struct QueueCrashReport -{ - int64_t time; - uint64_t text; // ptr -}; - -struct QueueCrashReportThread -{ - uint32_t thread; -}; - -struct QueueSysTime -{ - int64_t time; - float sysTime; -}; - -struct QueueSysPower -{ - int64_t time; - uint64_t delta; - uint64_t name; // ptr -}; - -struct QueueContextSwitch -{ - int64_t time; - uint32_t oldThread; - uint32_t newThread; - uint8_t cpu; - uint8_t reason; - uint8_t state; -}; - -struct QueueThreadWakeup -{ - int64_t time; - uint32_t thread; -}; - -struct QueueTidToPid -{ - uint64_t tid; - uint64_t pid; -}; - -struct QueueHwSample -{ - uint64_t ip; - int64_t time; -}; - -enum class PlotFormatType : uint8_t -{ - Number, - Memory, - Percentage -}; - -struct QueuePlotConfig -{ - uint64_t name; // ptr - uint8_t type; - uint8_t step; - uint8_t fill; - uint32_t color; -}; - -struct QueueParamSetup -{ - uint32_t idx; - uint64_t name; // ptr - uint8_t isBool; - int32_t val; -}; - -struct QueueSourceCodeNotAvailable -{ - uint32_t id; -}; - -struct QueueCpuTopology -{ - uint32_t package; - uint32_t die; - uint32_t core; - uint32_t thread; -}; - -struct QueueExternalNameMetadata -{ - uint64_t thread; - uint64_t name; - uint64_t threadName; -}; - -struct QueueSymbolCodeMetadata -{ - uint64_t symbol; - uint64_t ptr; - uint32_t size; -}; - -struct QueueSourceCodeMetadata -{ - uint64_t ptr; - uint32_t size; - uint32_t id; -}; - -struct QueueHeader -{ - union - { - QueueType type; - uint8_t idx; - }; -}; - -struct QueueItem -{ - QueueHeader hdr; - union - { - QueueThreadContext threadCtx; - QueueZoneBegin zoneBegin; - QueueZoneBeginLean zoneBeginLean; - QueueZoneBeginThread zoneBeginThread; - QueueZoneEnd zoneEnd; - QueueZoneEndThread zoneEndThread; - QueueZoneValidation zoneValidation; - QueueZoneValidationThread zoneValidationThread; - QueueZoneColor zoneColor; - QueueZoneColorThread zoneColorThread; - QueueZoneValue zoneValue; - QueueZoneValueThread zoneValueThread; - QueueStringTransfer stringTransfer; - QueueFrameMark frameMark; - QueueFrameVsync frameVsync; - QueueFrameImage frameImage; - QueueFrameImageFat frameImageFat; - QueueSourceLocation srcloc; - QueueZoneTextFat zoneTextFat; - QueueZoneTextFatThread zoneTextFatThread; - QueueLockAnnounce lockAnnounce; - QueueLockTerminate lockTerminate; - QueueLockWait lockWait; - QueueLockObtain lockObtain; - QueueLockRelease lockRelease; - QueueLockReleaseShared lockReleaseShared; - QueueLockMark lockMark; - QueueLockName lockName; - QueueLockNameFat lockNameFat; - QueuePlotDataInt plotDataInt; - QueuePlotDataFloat plotDataFloat; - QueuePlotDataDouble plotDataDouble; - QueueMessage message; - QueueMessageColor messageColor; - QueueMessageLiteral messageLiteral; - QueueMessageLiteralThread messageLiteralThread; - QueueMessageColorLiteral messageColorLiteral; - QueueMessageColorLiteralThread messageColorLiteralThread; - QueueMessageFat messageFat; - QueueMessageFatThread messageFatThread; - QueueMessageColorFat messageColorFat; - QueueMessageColorFatThread messageColorFatThread; - QueueGpuNewContext gpuNewContext; - QueueGpuZoneBegin gpuZoneBegin; - QueueGpuZoneBeginLean gpuZoneBeginLean; - QueueGpuZoneEnd gpuZoneEnd; - QueueGpuTime gpuTime; - QueueGpuCalibration gpuCalibration; - QueueGpuTimeSync gpuTimeSync; - QueueGpuContextName gpuContextName; - QueueGpuContextNameFat gpuContextNameFat; - QueueMemAlloc memAlloc; - QueueMemFree memFree; - QueueMemDiscard memDiscard; - QueueMemNamePayload memName; - QueueThreadGroupHint threadGroupHint; - QueueCallstackFat callstackFat; - QueueCallstackFatThread callstackFatThread; - QueueCallstackAllocFat callstackAllocFat; - QueueCallstackAllocFatThread callstackAllocFatThread; - QueueCallstackSample callstackSample; - QueueCallstackSampleFat callstackSampleFat; - QueueCallstackFrameSize callstackFrameSize; - QueueCallstackFrameSizeFat callstackFrameSizeFat; - QueueCallstackFrame callstackFrame; - QueueSymbolInformation symbolInformation; - QueueSymbolInformationFat symbolInformationFat; - QueueCrashReport crashReport; - QueueCrashReportThread crashReportThread; - QueueSysTime sysTime; - QueueSysPower sysPower; - QueueContextSwitch contextSwitch; - QueueThreadWakeup threadWakeup; - QueueTidToPid tidToPid; - QueueHwSample hwSample; - QueuePlotConfig plotConfig; - QueueParamSetup paramSetup; - QueueCpuTopology cpuTopology; - QueueExternalNameMetadata externalNameMetadata; - QueueSymbolCodeMetadata symbolCodeMetadata; - QueueSourceCodeMetadata sourceCodeMetadata; - QueueSourceCodeNotAvailable sourceCodeNotAvailable; - QueueFiberEnter fiberEnter; - QueueFiberLeave fiberLeave; - }; -}; -#pragma pack( pop ) - - -enum { QueueItemSize = sizeof( QueueItem ) }; - -static constexpr size_t QueueDataSize[] = { - sizeof( QueueHeader ), // zone text - sizeof( QueueHeader ), // zone name - sizeof( QueueHeader ) + sizeof( QueueMessage ), - sizeof( QueueHeader ) + sizeof( QueueMessageColor ), - sizeof( QueueHeader ) + sizeof( QueueMessage ), // callstack - sizeof( QueueHeader ) + sizeof( QueueMessageColor ), // callstack - sizeof( QueueHeader ) + sizeof( QueueMessage ), // app info - sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // allocated source location - sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // allocated source location, callstack - sizeof( QueueHeader ), // callstack memory - sizeof( QueueHeader ), // callstack - sizeof( QueueHeader ), // callstack alloc - sizeof( QueueHeader ) + sizeof( QueueCallstackSample ), - sizeof( QueueHeader ) + sizeof( QueueCallstackSample ), // context switch - sizeof( QueueHeader ) + sizeof( QueueFrameImage ), - sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), - sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // callstack - sizeof( QueueHeader ) + sizeof( QueueZoneEnd ), - sizeof( QueueHeader ) + sizeof( QueueLockWait ), - sizeof( QueueHeader ) + sizeof( QueueLockObtain ), - sizeof( QueueHeader ) + sizeof( QueueLockRelease ), - sizeof( QueueHeader ) + sizeof( QueueLockWait ), // shared - sizeof( QueueHeader ) + sizeof( QueueLockObtain ), // shared - sizeof( QueueHeader ) + sizeof( QueueLockReleaseShared ), - sizeof( QueueHeader ) + sizeof( QueueLockName ), - sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), - sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // named - sizeof( QueueHeader ) + sizeof( QueueMemFree ), - sizeof( QueueHeader ) + sizeof( QueueMemFree ), // named - sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack - sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack, named - sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack - sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack, named - sizeof( QueueHeader ) + sizeof( QueueMemDiscard ), - sizeof( QueueHeader ) + sizeof( QueueMemDiscard ), // callstack - sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), - sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack - sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// allocated source location - sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// allocated source location, callstack - sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ), - sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // serial - sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // serial, callstack - sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// serial, allocated source location - sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// serial, allocated source location, callstack - sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ), // serial - sizeof( QueueHeader ) + sizeof( QueuePlotDataInt ), - sizeof( QueueHeader ) + sizeof( QueuePlotDataFloat ), - sizeof( QueueHeader ) + sizeof( QueuePlotDataDouble ), - sizeof( QueueHeader ) + sizeof( QueueContextSwitch ), - sizeof( QueueHeader ) + sizeof( QueueThreadWakeup ), - sizeof( QueueHeader ) + sizeof( QueueGpuTime ), - sizeof( QueueHeader ) + sizeof( QueueGpuContextName ), - sizeof( QueueHeader ) + sizeof( QueueCallstackFrameSize ), - sizeof( QueueHeader ) + sizeof( QueueSymbolInformation ), - sizeof( QueueHeader ), // ExternalNameMetadata - not for wire transfer - sizeof( QueueHeader ), // SymbolCodeMetadata - not for wire transfer - sizeof( QueueHeader ), // SourceCodeMetadata - not for wire transfer - sizeof( QueueHeader ) + sizeof( QueueFiberEnter ), - sizeof( QueueHeader ) + sizeof( QueueFiberLeave ), - // above items must be first - sizeof( QueueHeader ), // terminate - sizeof( QueueHeader ), // keep alive - sizeof( QueueHeader ) + sizeof( QueueThreadContext ), - sizeof( QueueHeader ) + sizeof( QueueGpuCalibration ), - sizeof( QueueHeader ) + sizeof( QueueGpuTimeSync ), - sizeof( QueueHeader ), // crash - sizeof( QueueHeader ) + sizeof( QueueCrashReport ), - sizeof( QueueHeader ) + sizeof( QueueZoneValidation ), - sizeof( QueueHeader ) + sizeof( QueueZoneColor ), - sizeof( QueueHeader ) + sizeof( QueueZoneValue ), - sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // continuous frames - sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // start - sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // end - sizeof( QueueHeader ) + sizeof( QueueFrameVsync ), - sizeof( QueueHeader ) + sizeof( QueueSourceLocation ), - sizeof( QueueHeader ) + sizeof( QueueLockAnnounce ), - sizeof( QueueHeader ) + sizeof( QueueLockTerminate ), - sizeof( QueueHeader ) + sizeof( QueueLockMark ), - sizeof( QueueHeader ) + sizeof( QueueMessageLiteral ), - sizeof( QueueHeader ) + sizeof( QueueMessageColorLiteral ), - sizeof( QueueHeader ) + sizeof( QueueMessageLiteral ), // callstack - sizeof( QueueHeader ) + sizeof( QueueMessageColorLiteral ), // callstack - sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ), - sizeof( QueueHeader ) + sizeof( QueueCallstackFrame ), - sizeof( QueueHeader ) + sizeof( QueueSysTime ), - sizeof( QueueHeader ) + sizeof( QueueSysPower ), - sizeof( QueueHeader ) + sizeof( QueueTidToPid ), - sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cpu cycle - sizeof( QueueHeader ) + sizeof( QueueHwSample ), // instruction retired - sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cache reference - sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cache miss - sizeof( QueueHeader ) + sizeof( QueueHwSample ), // branch retired - sizeof( QueueHeader ) + sizeof( QueueHwSample ), // branch miss - sizeof( QueueHeader ) + sizeof( QueuePlotConfig ), - sizeof( QueueHeader ) + sizeof( QueueParamSetup ), - sizeof( QueueHeader ), // server query acknowledgement - sizeof( QueueHeader ) + sizeof( QueueSourceCodeNotAvailable ), - sizeof( QueueHeader ), // symbol code not available - sizeof( QueueHeader ) + sizeof( QueueCpuTopology ), - sizeof( QueueHeader ), // single string data - sizeof( QueueHeader ), // second string data - sizeof( QueueHeader ) + sizeof( QueueMemNamePayload ), - sizeof( QueueHeader ) + sizeof( QueueThreadGroupHint ), - // keep all QueueStringTransfer below - sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // string data - sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // thread name - sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // plot name - sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // allocated source location payload - sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // callstack payload - sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // callstack alloc payload - sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // frame name - sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // frame image data - sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // external name - sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // external thread name - sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // symbol code - sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // source code - sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // fiber name -}; - -static_assert( QueueItemSize == 32, "Queue item size not 32 bytes" ); -static_assert( sizeof( QueueDataSize ) / sizeof( size_t ) == (uint8_t)QueueType::NUM_TYPES, "QueueDataSize mismatch" ); -static_assert( sizeof( void* ) <= sizeof( uint64_t ), "Pointer size > 8 bytes" ); -static_assert( sizeof( void* ) == sizeof( uintptr_t ), "Pointer size != uintptr_t" ); - -} - -#endif diff --git a/src/third_party/tracy/common/TracySocket.cpp b/src/third_party/tracy/common/TracySocket.cpp deleted file mode 100644 index bdba3619..00000000 --- a/src/third_party/tracy/common/TracySocket.cpp +++ /dev/null @@ -1,752 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include "TracyAlloc.hpp" -#include "TracySocket.hpp" -#include "TracySystem.hpp" - -#ifdef _WIN32 -# ifndef NOMINMAX -# define NOMINMAX -# endif -# include -# include -# ifdef _MSC_VER -# pragma warning(disable:4244) -# pragma warning(disable:4267) -# endif -# define poll WSAPoll -# ifdef _MSC_VER -# pragma comment(lib, "ws2_32.lib") -# endif -#else -# include -# include -# include -# include -# include -# include -# include -# include -# include -#endif - -#ifndef MSG_NOSIGNAL -# define MSG_NOSIGNAL 0 -#endif - -namespace tracy -{ - -#ifdef _WIN32 -typedef SOCKET socket_t; -#else -typedef int socket_t; -#endif - -#ifdef _WIN32 -struct __wsinit -{ - __wsinit() - { - WSADATA wsaData; - if( WSAStartup( MAKEWORD( 2, 2 ), &wsaData ) != 0 ) - { - fprintf( stderr, "Cannot init winsock.\n" ); - exit( 1 ); - } - } -}; - -void InitWinSock() -{ - static __wsinit init; -} -#endif - - -enum { BufSize = 128 * 1024 }; - -Socket::Socket() - : m_buf( (char*)tracy_malloc( BufSize ) ) - , m_bufPtr( nullptr ) - , m_sock( -1 ) - , m_bufLeft( 0 ) - , m_ptr( nullptr ) -{ -#ifdef _WIN32 - InitWinSock(); -#endif -} - -Socket::Socket( int sock ) - : m_buf( (char*)tracy_malloc( BufSize ) ) - , m_bufPtr( nullptr ) - , m_sock( sock ) - , m_bufLeft( 0 ) - , m_ptr( nullptr ) -{ -} - -Socket::~Socket() -{ - tracy_free( m_buf ); - if( m_sock.load( std::memory_order_relaxed ) != -1 ) - { - Close(); - } - if( m_ptr ) - { - freeaddrinfo( m_res ); -#ifdef _WIN32 - closesocket( m_connSock ); -#else - close( m_connSock ); -#endif - } -} - -bool Socket::Connect( const char* addr, uint16_t port ) -{ - assert( !IsValid() ); - - if( m_ptr ) - { - const auto c = connect( m_connSock, m_ptr->ai_addr, m_ptr->ai_addrlen ); - if( c == -1 ) - { -#if defined _WIN32 - const auto err = WSAGetLastError(); - if( err == WSAEALREADY || err == WSAEINPROGRESS ) return false; - if( err != WSAEISCONN ) - { - freeaddrinfo( m_res ); - closesocket( m_connSock ); - m_ptr = nullptr; - return false; - } -#else - const auto err = errno; - if( err == EALREADY || err == EINPROGRESS ) return false; - if( err != EISCONN ) - { - freeaddrinfo( m_res ); - close( m_connSock ); - m_ptr = nullptr; - return false; - } -#endif - } - -#if defined _WIN32 - u_long nonblocking = 0; - ioctlsocket( m_connSock, FIONBIO, &nonblocking ); -#else - int flags = fcntl( m_connSock, F_GETFL, 0 ); - fcntl( m_connSock, F_SETFL, flags & ~O_NONBLOCK ); -#endif - m_sock.store( m_connSock, std::memory_order_relaxed ); - freeaddrinfo( m_res ); - m_ptr = nullptr; - return true; - } - - struct addrinfo hints; - struct addrinfo *res, *ptr; - - memset( &hints, 0, sizeof( hints ) ); - hints.ai_family = AF_UNSPEC; - hints.ai_socktype = SOCK_STREAM; - - char portbuf[32]; - sprintf( portbuf, "%" PRIu16, port ); - - if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false; - int sock = 0; - for( ptr = res; ptr; ptr = ptr->ai_next ) - { - if( ( sock = socket( ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol ) ) == -1 ) continue; -#if defined __APPLE__ - int val = 1; - setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) ); -#endif -#if defined _WIN32 - u_long nonblocking = 1; - ioctlsocket( sock, FIONBIO, &nonblocking ); -#else - int flags = fcntl( sock, F_GETFL, 0 ); - fcntl( sock, F_SETFL, flags | O_NONBLOCK ); -#endif - if( connect( sock, ptr->ai_addr, ptr->ai_addrlen ) == 0 ) - { - break; - } - else - { -#if defined _WIN32 - const auto err = WSAGetLastError(); - if( err != WSAEWOULDBLOCK ) - { - closesocket( sock ); - continue; - } -#else - if( errno != EINPROGRESS ) - { - close( sock ); - continue; - } -#endif - } - m_res = res; - m_ptr = ptr; - m_connSock = sock; - return false; - } - freeaddrinfo( res ); - if( !ptr ) return false; - -#if defined _WIN32 - u_long nonblocking = 0; - ioctlsocket( sock, FIONBIO, &nonblocking ); -#else - int flags = fcntl( sock, F_GETFL, 0 ); - fcntl( sock, F_SETFL, flags & ~O_NONBLOCK ); -#endif - - m_sock.store( sock, std::memory_order_relaxed ); - return true; -} - -bool Socket::ConnectBlocking( const char* addr, uint16_t port ) -{ - assert( !IsValid() ); - assert( !m_ptr ); - - struct addrinfo hints; - struct addrinfo *res, *ptr; - - memset( &hints, 0, sizeof( hints ) ); - hints.ai_family = AF_UNSPEC; - hints.ai_socktype = SOCK_STREAM; - - char portbuf[32]; - sprintf( portbuf, "%" PRIu16, port ); - - if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false; - int sock = 0; - for( ptr = res; ptr; ptr = ptr->ai_next ) - { - if( ( sock = socket( ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol ) ) == -1 ) continue; -#if defined __APPLE__ - int val = 1; - setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) ); -#endif - if( connect( sock, ptr->ai_addr, ptr->ai_addrlen ) == -1 ) - { -#ifdef _WIN32 - closesocket( sock ); -#else - close( sock ); -#endif - continue; - } - break; - } - freeaddrinfo( res ); - if( !ptr ) return false; - - m_sock.store( sock, std::memory_order_relaxed ); - return true; -} - -void Socket::Close() -{ - const auto sock = m_sock.load( std::memory_order_relaxed ); - assert( sock != -1 ); -#ifdef _WIN32 - closesocket( sock ); -#else - close( sock ); -#endif - m_sock.store( -1, std::memory_order_relaxed ); -} - -int Socket::Send( const void* _buf, int len ) -{ - const auto sock = m_sock.load( std::memory_order_relaxed ); - auto buf = (const char*)_buf; - assert( sock != -1 ); - auto start = buf; - while( len > 0 ) - { - auto ret = send( sock, buf, len, MSG_NOSIGNAL ); - if( ret == -1 ) return -1; - len -= ret; - buf += ret; - } - return int( buf - start ); -} - -int Socket::GetSendBufSize() -{ - const auto sock = m_sock.load( std::memory_order_relaxed ); - int bufSize; -#if defined _WIN32 - int sz = sizeof( bufSize ); - getsockopt( sock, SOL_SOCKET, SO_SNDBUF, (char*)&bufSize, &sz ); -#else - socklen_t sz = sizeof( bufSize ); - getsockopt( sock, SOL_SOCKET, SO_SNDBUF, &bufSize, &sz ); -#endif - return bufSize; -} - -int Socket::RecvBuffered( void* buf, int len, int timeout ) -{ - if( len <= m_bufLeft ) - { - memcpy( buf, m_bufPtr, len ); - m_bufPtr += len; - m_bufLeft -= len; - return len; - } - - if( m_bufLeft > 0 ) - { - memcpy( buf, m_bufPtr, m_bufLeft ); - const auto ret = m_bufLeft; - m_bufLeft = 0; - return ret; - } - - if( len >= BufSize ) return Recv( buf, len, timeout ); - - m_bufLeft = Recv( m_buf, BufSize, timeout ); - if( m_bufLeft <= 0 ) return m_bufLeft; - - const auto sz = len < m_bufLeft ? len : m_bufLeft; - memcpy( buf, m_buf, sz ); - m_bufPtr = m_buf + sz; - m_bufLeft -= sz; - return sz; -} - -int Socket::Recv( void* _buf, int len, int timeout ) -{ - const auto sock = m_sock.load( std::memory_order_relaxed ); - auto buf = (char*)_buf; - - struct pollfd fd; - fd.fd = (socket_t)sock; - fd.events = POLLIN; - - if( poll( &fd, 1, timeout ) > 0 ) - { - return recv( sock, buf, len, 0 ); - } - else - { - return -1; - } -} - -int Socket::ReadUpTo( void* _buf, int len ) -{ - const auto sock = m_sock.load( std::memory_order_relaxed ); - auto buf = (char*)_buf; - - int rd = 0; - while( len > 0 ) - { - const auto res = recv( sock, buf, len, 0 ); - if( res == 0 ) break; - if( res == -1 ) return -1; - len -= res; - rd += res; - buf += res; - } - return rd; -} - -bool Socket::Read( void* buf, int len, int timeout ) -{ - auto cbuf = (char*)buf; - while( len > 0 ) - { - if( !ReadImpl( cbuf, len, timeout ) ) return false; - } - return true; -} - -bool Socket::ReadImpl( char*& buf, int& len, int timeout ) -{ - const auto sz = RecvBuffered( buf, len, timeout ); - switch( sz ) - { - case 0: - return false; - case -1: -#ifdef _WIN32 - { - auto err = WSAGetLastError(); - if( err == WSAECONNABORTED || err == WSAECONNRESET ) return false; - } -#endif - break; - default: - len -= sz; - buf += sz; - break; - } - return true; -} - -bool Socket::ReadRaw( void* _buf, int len, int timeout ) -{ - auto buf = (char*)_buf; - while( len > 0 ) - { - const auto sz = Recv( buf, len, timeout ); - if( sz <= 0 ) return false; - len -= sz; - buf += sz; - } - return true; -} - -bool Socket::HasData() -{ - const auto sock = m_sock.load( std::memory_order_relaxed ); - if( m_bufLeft > 0 ) return true; - - struct pollfd fd; - fd.fd = (socket_t)sock; - fd.events = POLLIN; - - return poll( &fd, 1, 0 ) > 0; -} - -bool Socket::IsValid() const -{ - return m_sock.load( std::memory_order_relaxed ) >= 0; -} - - -ListenSocket::ListenSocket() - : m_sock( -1 ) -{ -#ifdef _WIN32 - InitWinSock(); -#endif -} - -ListenSocket::~ListenSocket() -{ - if( m_sock != -1 ) Close(); -} - -static int addrinfo_and_socket_for_family( uint16_t port, int ai_family, struct addrinfo** res ) -{ - struct addrinfo hints; - memset( &hints, 0, sizeof( hints ) ); - hints.ai_family = ai_family; - hints.ai_socktype = SOCK_STREAM; -#ifndef TRACY_ONLY_LOCALHOST - const char* onlyLocalhost = GetEnvVar( "TRACY_ONLY_LOCALHOST" ); - if( !onlyLocalhost || onlyLocalhost[0] != '1' ) - { - hints.ai_flags = AI_PASSIVE; - } -#endif - char portbuf[32]; - sprintf( portbuf, "%" PRIu16, port ); - if( getaddrinfo( nullptr, portbuf, &hints, res ) != 0 ) return -1; - int sock = socket( (*res)->ai_family, (*res)->ai_socktype, (*res)->ai_protocol ); - if (sock == -1) freeaddrinfo( *res ); - return sock; -} - -bool ListenSocket::Listen( uint16_t port, int backlog ) -{ - assert( m_sock == -1 ); - - struct addrinfo* res = nullptr; - -#if !defined TRACY_ONLY_IPV4 && !defined TRACY_ONLY_LOCALHOST - const char* onlyIPv4 = GetEnvVar( "TRACY_ONLY_IPV4" ); - if( !onlyIPv4 || onlyIPv4[0] != '1' ) - { - m_sock = addrinfo_and_socket_for_family( port, AF_INET6, &res ); - } -#endif - if (m_sock == -1) - { - // IPV6 protocol may not be available/is disabled. Try to create a socket - // with the IPV4 protocol - m_sock = addrinfo_and_socket_for_family( port, AF_INET, &res ); - if( m_sock == -1 ) return false; - } -#if defined _WIN32 - unsigned long val = 0; - setsockopt( m_sock, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&val, sizeof( val ) ); -#elif defined BSD - int val = 0; - setsockopt( m_sock, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&val, sizeof( val ) ); - val = 1; - setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof( val ) ); -#else - int val = 1; - setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof( val ) ); -#endif - if( bind( m_sock, res->ai_addr, res->ai_addrlen ) == -1 ) { freeaddrinfo( res ); Close(); return false; } - if( listen( m_sock, backlog ) == -1 ) { freeaddrinfo( res ); Close(); return false; } - freeaddrinfo( res ); - return true; -} - -Socket* ListenSocket::Accept() -{ - struct sockaddr_storage remote; - socklen_t sz = sizeof( remote ); - - struct pollfd fd; - fd.fd = (socket_t)m_sock; - fd.events = POLLIN; - - if( poll( &fd, 1, 10 ) > 0 ) - { - int sock = accept( m_sock, (sockaddr*)&remote, &sz); - if( sock == -1 ) return nullptr; - -#if defined __APPLE__ - int val = 1; - setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) ); -#endif - - auto ptr = (Socket*)tracy_malloc( sizeof( Socket ) ); - new(ptr) Socket( sock ); - return ptr; - } - else - { - return nullptr; - } -} - -void ListenSocket::Close() -{ - assert( m_sock != -1 ); -#ifdef _WIN32 - closesocket( m_sock ); -#else - close( m_sock ); -#endif - m_sock = -1; -} - -UdpBroadcast::UdpBroadcast() - : m_sock( -1 ) -{ -#ifdef _WIN32 - InitWinSock(); -#endif -} - -UdpBroadcast::~UdpBroadcast() -{ - if( m_sock != -1 ) Close(); -} - -bool UdpBroadcast::Open( const char* addr, uint16_t port ) -{ - assert( m_sock == -1 ); - - struct addrinfo hints; - struct addrinfo *res, *ptr; - - memset( &hints, 0, sizeof( hints ) ); - hints.ai_family = AF_INET; - hints.ai_socktype = SOCK_DGRAM; - - char portbuf[32]; - sprintf( portbuf, "%" PRIu16, port ); - - if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false; - int sock = 0; - for( ptr = res; ptr; ptr = ptr->ai_next ) - { - if( ( sock = socket( ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol ) ) == -1 ) continue; -#if defined __APPLE__ - int val = 1; - setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) ); -#endif -#if defined _WIN32 - unsigned long broadcast = 1; - if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, (const char*)&broadcast, sizeof( broadcast ) ) == -1 ) -#else - int broadcast = 1; - if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, &broadcast, sizeof( broadcast ) ) == -1 ) -#endif - { -#ifdef _WIN32 - closesocket( sock ); -#else - close( sock ); -#endif - continue; - } - break; - } - freeaddrinfo( res ); - if( !ptr ) return false; - - m_sock = sock; - inet_pton( AF_INET, addr, &m_addr ); - return true; -} - -void UdpBroadcast::Close() -{ - assert( m_sock != -1 ); -#ifdef _WIN32 - closesocket( m_sock ); -#else - close( m_sock ); -#endif - m_sock = -1; -} - -int UdpBroadcast::Send( uint16_t port, const void* data, int len ) -{ - assert( m_sock != -1 ); - struct sockaddr_in addr; - addr.sin_family = AF_INET; - addr.sin_port = htons( port ); - addr.sin_addr.s_addr = m_addr; - return sendto( m_sock, (const char*)data, len, MSG_NOSIGNAL, (sockaddr*)&addr, sizeof( addr ) ); -} - -IpAddress::IpAddress() - : m_number( 0 ) -{ - *m_text = '\0'; -} - -IpAddress::~IpAddress() -{ -} - -void IpAddress::Set( const struct sockaddr& addr ) -{ -#if defined _WIN32 && ( !defined NTDDI_WIN10 || NTDDI_VERSION < NTDDI_WIN10 ) - struct sockaddr_in tmp; - memcpy( &tmp, &addr, sizeof( tmp ) ); - auto ai = &tmp; -#else - auto ai = (const struct sockaddr_in*)&addr; -#endif - inet_ntop( AF_INET, &ai->sin_addr, m_text, 17 ); - m_number = ai->sin_addr.s_addr; -} - -UdpListen::UdpListen() - : m_sock( -1 ) -{ -#ifdef _WIN32 - InitWinSock(); -#endif -} - -UdpListen::~UdpListen() -{ - if( m_sock != -1 ) Close(); -} - -bool UdpListen::Listen( uint16_t port ) -{ - assert( m_sock == -1 ); - - int sock; - if( ( sock = socket( AF_INET, SOCK_DGRAM, 0 ) ) == -1 ) return false; - -#if defined __APPLE__ - int val = 1; - setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) ); -#endif -#if defined _WIN32 - unsigned long reuse = 1; - setsockopt( sock, SOL_SOCKET, SO_REUSEADDR, (const char*)&reuse, sizeof( reuse ) ); -#else - int reuse = 1; - setsockopt( sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof( reuse ) ); -#endif -#if defined _WIN32 - unsigned long broadcast = 1; - if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, (const char*)&broadcast, sizeof( broadcast ) ) == -1 ) -#else - int broadcast = 1; - if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, &broadcast, sizeof( broadcast ) ) == -1 ) -#endif - { -#ifdef _WIN32 - closesocket( sock ); -#else - close( sock ); -#endif - return false; - } - - struct sockaddr_in addr; - addr.sin_family = AF_INET; - addr.sin_port = htons( port ); - addr.sin_addr.s_addr = INADDR_ANY; - - if( bind( sock, (sockaddr*)&addr, sizeof( addr ) ) == -1 ) - { -#ifdef _WIN32 - closesocket( sock ); -#else - close( sock ); -#endif - return false; - } - - m_sock = sock; - return true; -} - -void UdpListen::Close() -{ - assert( m_sock != -1 ); -#ifdef _WIN32 - closesocket( m_sock ); -#else - close( m_sock ); -#endif - m_sock = -1; -} - -const char* UdpListen::Read( size_t& len, IpAddress& addr, int timeout ) -{ - static char buf[2048]; - - struct pollfd fd; - fd.fd = (socket_t)m_sock; - fd.events = POLLIN; - if( poll( &fd, 1, timeout ) <= 0 ) return nullptr; - - sockaddr sa; - socklen_t salen = sizeof( struct sockaddr ); - len = (size_t)recvfrom( m_sock, buf, 2048, 0, &sa, &salen ); - addr.Set( sa ); - - return buf; -} - -} diff --git a/src/third_party/tracy/common/TracySocket.hpp b/src/third_party/tracy/common/TracySocket.hpp deleted file mode 100644 index f7713aac..00000000 --- a/src/third_party/tracy/common/TracySocket.hpp +++ /dev/null @@ -1,155 +0,0 @@ -#ifndef __TRACYSOCKET_HPP__ -#define __TRACYSOCKET_HPP__ - -#include -#include -#include - -struct addrinfo; -struct sockaddr; - -namespace tracy -{ - -#ifdef _WIN32 -void InitWinSock(); -#endif - -class Socket -{ -public: - Socket(); - Socket( int sock ); - ~Socket(); - - bool Connect( const char* addr, uint16_t port ); - bool ConnectBlocking( const char* addr, uint16_t port ); - void Close(); - - int Send( const void* buf, int len ); - int GetSendBufSize(); - - int ReadUpTo( void* buf, int len ); - bool Read( void* buf, int len, int timeout ); - - template - bool Read( void* buf, int len, int timeout, ShouldExit exitCb ) - { - auto cbuf = (char*)buf; - while( len > 0 ) - { - if( exitCb() ) return false; - if( !ReadImpl( cbuf, len, timeout ) ) return false; - } - return true; - } - - bool ReadRaw( void* buf, int len, int timeout ); - bool HasData(); - bool IsValid() const; - - Socket( const Socket& ) = delete; - Socket( Socket&& ) = delete; - Socket& operator=( const Socket& ) = delete; - Socket& operator=( Socket&& ) = delete; - -private: - int RecvBuffered( void* buf, int len, int timeout ); - int Recv( void* buf, int len, int timeout ); - - bool ReadImpl( char*& buf, int& len, int timeout ); - - char* m_buf; - char* m_bufPtr; - std::atomic m_sock; - int m_bufLeft; - - struct addrinfo *m_res; - struct addrinfo *m_ptr; - int m_connSock; -}; - -class ListenSocket -{ -public: - ListenSocket(); - ~ListenSocket(); - - bool Listen( uint16_t port, int backlog ); - Socket* Accept(); - void Close(); - - ListenSocket( const ListenSocket& ) = delete; - ListenSocket( ListenSocket&& ) = delete; - ListenSocket& operator=( const ListenSocket& ) = delete; - ListenSocket& operator=( ListenSocket&& ) = delete; - -private: - int m_sock; -}; - -class UdpBroadcast -{ -public: - UdpBroadcast(); - ~UdpBroadcast(); - - bool Open( const char* addr, uint16_t port ); - void Close(); - - int Send( uint16_t port, const void* data, int len ); - - UdpBroadcast( const UdpBroadcast& ) = delete; - UdpBroadcast( UdpBroadcast&& ) = delete; - UdpBroadcast& operator=( const UdpBroadcast& ) = delete; - UdpBroadcast& operator=( UdpBroadcast&& ) = delete; - -private: - int m_sock; - uint32_t m_addr; -}; - -class IpAddress -{ -public: - IpAddress(); - ~IpAddress(); - - void Set( const struct sockaddr& addr ); - - uint32_t GetNumber() const { return m_number; } - const char* GetText() const { return m_text; } - - IpAddress( const IpAddress& ) = delete; - IpAddress( IpAddress&& ) = delete; - IpAddress& operator=( const IpAddress& ) = delete; - IpAddress& operator=( IpAddress&& ) = delete; - -private: - uint32_t m_number; - char m_text[17]; -}; - -class UdpListen -{ -public: - UdpListen(); - ~UdpListen(); - - bool Listen( uint16_t port ); - void Close(); - - const char* Read( size_t& len, IpAddress& addr, int timeout ); - - UdpListen( const UdpListen& ) = delete; - UdpListen( UdpListen&& ) = delete; - UdpListen& operator=( const UdpListen& ) = delete; - UdpListen& operator=( UdpListen&& ) = delete; - -private: - int m_sock; -}; - -} - -#endif diff --git a/src/third_party/tracy/common/TracyStackFrames.cpp b/src/third_party/tracy/common/TracyStackFrames.cpp deleted file mode 100644 index 7b0abace..00000000 --- a/src/third_party/tracy/common/TracyStackFrames.cpp +++ /dev/null @@ -1,122 +0,0 @@ -#include "TracyStackFrames.hpp" - -namespace tracy -{ - -const char* s_tracyStackFrames_[] = { - "tracy::Callstack", - "tracy::Callstack(int)", - "tracy::GpuCtxScope::{ctor}", - "tracy::Profiler::SendCallstack", - "tracy::Profiler::SendCallstack(int)", - "tracy::Profiler::SendCallstack(int, unsigned long)", - "tracy::Profiler::MemAllocCallstack", - "tracy::Profiler::MemAllocCallstack(void const*, unsigned long, int)", - "tracy::Profiler::MemFreeCallstack", - "tracy::Profiler::MemFreeCallstack(void const*, int)", - "tracy::ScopedZone::{ctor}", - "tracy::ScopedZone::ScopedZone(tracy::SourceLocationData const*, int, bool)", - "tracy::Profiler::Message", - nullptr -}; - -const char** s_tracyStackFrames = s_tracyStackFrames_; - -const StringMatch s_tracySkipSubframes_[] = { - { "/include/arm_neon.h", 19 }, - { "/include/adxintrin.h", 20 }, - { "/include/ammintrin.h", 20 }, - { "/include/amxbf16intrin.h", 24 }, - { "/include/amxint8intrin.h", 24 }, - { "/include/amxtileintrin.h", 24 }, - { "/include/avx2intrin.h", 21 }, - { "/include/avx5124fmapsintrin.h", 29 }, - { "/include/avx5124vnniwintrin.h", 29 }, - { "/include/avx512bf16intrin.h", 27 }, - { "/include/avx512bf16vlintrin.h", 29 }, - { "/include/avx512bitalgintrin.h", 29 }, - { "/include/avx512bwintrin.h", 25 }, - { "/include/avx512cdintrin.h", 25 }, - { "/include/avx512dqintrin.h", 25 }, - { "/include/avx512erintrin.h", 25 }, - { "/include/avx512fintrin.h", 24 }, - { "/include/avx512ifmaintrin.h", 27 }, - { "/include/avx512ifmavlintrin.h", 29 }, - { "/include/avx512pfintrin.h", 25 }, - { "/include/avx512vbmi2intrin.h", 28 }, - { "/include/avx512vbmi2vlintrin.h", 30 }, - { "/include/avx512vbmiintrin.h", 27 }, - { "/include/avx512vbmivlintrin.h", 29 }, - { "/include/avx512vlbwintrin.h", 27 }, - { "/include/avx512vldqintrin.h", 27 }, - { "/include/avx512vlintrin.h", 25 }, - { "/include/avx512vnniintrin.h", 27 }, - { "/include/avx512vnnivlintrin.h", 29 }, - { "/include/avx512vp2intersectintrin.h", 35 }, - { "/include/avx512vp2intersectvlintrin.h", 37 }, - { "/include/avx512vpopcntdqintrin.h", 32 }, - { "/include/avx512vpopcntdqvlintrin.h", 34 }, - { "/include/avxintrin.h", 20 }, - { "/include/avxvnniintrin.h", 24 }, - { "/include/bmi2intrin.h", 21 }, - { "/include/bmiintrin.h", 20 }, - { "/include/bmmintrin.h", 20 }, - { "/include/cetintrin.h", 20 }, - { "/include/cldemoteintrin.h", 25 }, - { "/include/clflushoptintrin.h", 27 }, - { "/include/clwbintrin.h", 21 }, - { "/include/clzerointrin.h", 23 }, - { "/include/emmintrin.h", 20 }, - { "/include/enqcmdintrin.h", 23 }, - { "/include/f16cintrin.h", 21 }, - { "/include/fma4intrin.h", 21 }, - { "/include/fmaintrin.h", 20 }, - { "/include/fxsrintrin.h", 21 }, - { "/include/gfniintrin.h", 21 }, - { "/include/hresetintrin.h", 23 }, - { "/include/ia32intrin.h", 21 }, - { "/include/immintrin.h", 20 }, - { "/include/keylockerintrin.h", 26 }, - { "/include/lwpintrin.h", 20 }, - { "/include/lzcntintrin.h", 22 }, - { "/include/mmintrin.h", 19 }, - { "/include/movdirintrin.h", 23 }, - { "/include/mwaitxintrin.h", 23 }, - { "/include/nmmintrin.h", 20 }, - { "/include/pconfigintrin.h", 24 }, - { "/include/pkuintrin.h", 20 }, - { "/include/pmmintrin.h", 20 }, - { "/include/popcntintrin.h", 23 }, - { "/include/prfchwintrin.h", 23 }, - { "/include/rdseedintrin.h", 23 }, - { "/include/rtmintrin.h", 20 }, - { "/include/serializeintrin.h", 26 }, - { "/include/sgxintrin.h", 20 }, - { "/include/shaintrin.h", 20 }, - { "/include/smmintrin.h", 20 }, - { "/include/tbmintrin.h", 20 }, - { "/include/tmmintrin.h", 20 }, - { "/include/tsxldtrkintrin.h", 25 }, - { "/include/uintrintrin.h", 22 }, - { "/include/vaesintrin.h", 21 }, - { "/include/vpclmulqdqintrin.h", 27 }, - { "/include/waitpkgintrin.h", 24 }, - { "/include/wbnoinvdintrin.h", 25 }, - { "/include/wmmintrin.h", 20 }, - { "/include/x86gprintrin.h", 23 }, - { "/include/x86intrin.h", 20 }, - { "/include/xmmintrin.h", 20 }, - { "/include/xopintrin.h", 20 }, - { "/include/xsavecintrin.h", 23 }, - { "/include/xsaveintrin.h", 22 }, - { "/include/xsaveoptintrin.h", 25 }, - { "/include/xsavesintrin.h", 23 }, - { "/include/xtestintrin.h", 22 }, - { "/bits/atomic_base.h", 19 }, - { "/atomic", 7 }, - {} -}; - -const StringMatch* s_tracySkipSubframes = s_tracySkipSubframes_; - -} diff --git a/src/third_party/tracy/common/TracyStackFrames.hpp b/src/third_party/tracy/common/TracyStackFrames.hpp deleted file mode 100644 index 9d4262c0..00000000 --- a/src/third_party/tracy/common/TracyStackFrames.hpp +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef __TRACYSTACKFRAMES_HPP__ -#define __TRACYSTACKFRAMES_HPP__ - -#include - -namespace tracy -{ - -struct StringMatch -{ - const char* str; - size_t len; -}; - -extern const char** s_tracyStackFrames; -extern const StringMatch* s_tracySkipSubframes; - -static constexpr int s_tracySkipSubframesMinLen = 7; - -} - -#endif diff --git a/src/third_party/tracy/common/TracySystem.cpp b/src/third_party/tracy/common/TracySystem.cpp deleted file mode 100644 index a92a3457..00000000 --- a/src/third_party/tracy/common/TracySystem.cpp +++ /dev/null @@ -1,349 +0,0 @@ -#ifdef _MSC_VER -# pragma warning(disable:4996) -#endif -#if defined _WIN32 -# ifndef WIN32_LEAN_AND_MEAN -# define WIN32_LEAN_AND_MEAN -# endif -# ifndef NOMINMAX -# define NOMINMAX -# endif -# include -# include -# include "TracyUwp.hpp" -#else -# include -# include -# include -#endif - -#ifdef __linux__ -# ifdef __ANDROID__ -# include -# else -# include -# endif -# include -#elif defined __FreeBSD__ -# include -#elif defined __NetBSD__ -# include -#elif defined __DragonFly__ -# include -#elif defined __QNX__ -# include -# include -#endif - -#ifdef __MINGW32__ -# define __STDC_FORMAT_MACROS -#endif -#include -#include -#include - -#include "TracySystem.hpp" - -#if defined _WIN32 -extern "C" typedef HRESULT (WINAPI *t_SetThreadDescription)( HANDLE, PCWSTR ); -extern "C" typedef HRESULT (WINAPI *t_GetThreadDescription)( HANDLE, PWSTR* ); -#endif - -#ifdef TRACY_ENABLE -# include -# include "TracyAlloc.hpp" -#endif - -namespace tracy -{ - -namespace detail -{ - -TRACY_API uint32_t GetThreadHandleImpl() -{ -#if defined _WIN32 - static_assert( sizeof( decltype( GetCurrentThreadId() ) ) <= sizeof( uint32_t ), "Thread handle too big to fit in protocol" ); - return uint32_t( GetCurrentThreadId() ); -#elif defined __APPLE__ - uint64_t id; - pthread_threadid_np( pthread_self(), &id ); - return uint32_t( id ); -#elif defined __ANDROID__ - return (uint32_t)gettid(); -#elif defined __linux__ - return (uint32_t)syscall( SYS_gettid ); -#elif defined __FreeBSD__ - long id; - thr_self( &id ); - return id; -#elif defined __NetBSD__ - return _lwp_self(); -#elif defined __DragonFly__ - return lwp_gettid(); -#elif defined __OpenBSD__ - return getthrid(); -#elif defined __QNX__ - return (uint32_t) gettid(); -#elif defined __EMSCRIPTEN__ - // Not supported, but let it compile. - return 0; -#else - // To add support for a platform, retrieve and return the kernel thread identifier here. - // - // Note that pthread_t (as for example returned by pthread_self()) is *not* a kernel - // thread identifier. It is a pointer to a library-allocated data structure instead. - // Such pointers will be reused heavily, making the pthread_t non-unique. Additionally - // a 64-bit pointer cannot be reliably truncated to 32 bits. - #error "Unsupported platform!" -#endif - -} - -} - -#ifdef TRACY_ENABLE -std::atomic& GetThreadNameData(); -#endif - -#if defined _MSC_VER && !defined __clang__ -# pragma pack( push, 8 ) -struct THREADNAME_INFO -{ - DWORD dwType; - LPCSTR szName; - DWORD dwThreadID; - DWORD dwFlags; -}; -# pragma pack( pop ) - -void ThreadNameMsvcMagic( const THREADNAME_INFO& info ) -{ - __try - { - RaiseException( 0x406D1388, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info ); - } - __except(EXCEPTION_EXECUTE_HANDLER) - { - } -} -#endif - -TRACY_API void SetThreadName( const char* name ) -{ - SetThreadNameWithHint( name, 0 ); -} - -TRACY_API void SetThreadNameWithHint( const char* name, int32_t groupHint ) -{ -#if defined _WIN32 -# ifdef TRACY_UWP - static auto _SetThreadDescription = &::SetThreadDescription; -# else - static auto _SetThreadDescription = (t_SetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "SetThreadDescription" ); -# endif - if( _SetThreadDescription ) - { - wchar_t buf[256]; - mbstowcs( buf, name, 256 ); - _SetThreadDescription( GetCurrentThread(), buf ); - } - else - { -# if defined _MSC_VER && !defined __clang__ - THREADNAME_INFO info; - info.dwType = 0x1000; - info.szName = name; - info.dwThreadID = GetCurrentThreadId(); - info.dwFlags = 0; - ThreadNameMsvcMagic( info ); -# endif - } -#elif defined _GNU_SOURCE && !defined __EMSCRIPTEN__ - { - const auto sz = strlen( name ); - if( sz <= 15 ) - { -#if defined __APPLE__ - pthread_setname_np( name ); -#else - pthread_setname_np( pthread_self(), name ); -#endif - } - else - { - char buf[16]; - memcpy( buf, name, 15 ); - buf[15] = '\0'; -#if defined __APPLE__ - pthread_setname_np( buf ); -#else - pthread_setname_np( pthread_self(), buf ); -#endif - } - } -#elif defined __QNX__ - { - const auto sz = strlen( name ); - if( sz <= _NTO_THREAD_NAME_MAX ) - { - pthread_setname_np( pthread_self(), name ); - } - else - { - char buf[_NTO_THREAD_NAME_MAX + 1]; - memcpy( buf, name, _NTO_THREAD_NAME_MAX ); - buf[_NTO_THREAD_NAME_MAX] = '\0'; - pthread_setname_np( pthread_self(), buf ); - } - }; -#endif -#ifdef TRACY_ENABLE - { - const auto sz = strlen( name ); - char* buf = (char*)tracy_malloc( sz+1 ); - memcpy( buf, name, sz ); - buf[sz] = '\0'; - auto data = (ThreadNameData*)tracy_malloc_fast( sizeof( ThreadNameData ) ); - data->id = detail::GetThreadHandleImpl(); - data->groupHint = groupHint; - data->name = buf; - data->next = GetThreadNameData().load( std::memory_order_relaxed ); - while( !GetThreadNameData().compare_exchange_weak( data->next, data, std::memory_order_release, std::memory_order_relaxed ) ) {} - } -#endif -} - -#ifdef TRACY_ENABLE -ThreadNameData* GetThreadNameData( uint32_t id ) -{ - auto ptr = GetThreadNameData().load( std::memory_order_relaxed ); - while( ptr ) - { - if( ptr->id == id ) - { - return ptr; - } - ptr = ptr->next; - } - return nullptr; -} -#endif - -TRACY_API const char* GetThreadName( uint32_t id ) -{ - static char buf[256]; -#ifdef TRACY_ENABLE - auto ptr = GetThreadNameData().load( std::memory_order_relaxed ); - while( ptr ) - { - if( ptr->id == id ) - { - return ptr->name; - } - ptr = ptr->next; - } -#endif - -#if defined _WIN32 -# ifdef TRACY_UWP - static auto _GetThreadDescription = &::GetThreadDescription; -# else - static auto _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" ); -# endif - if( _GetThreadDescription ) - { - auto hnd = OpenThread( THREAD_QUERY_LIMITED_INFORMATION, FALSE, (DWORD)id ); - if( hnd != 0 ) - { - PWSTR tmp; - if( SUCCEEDED( _GetThreadDescription( hnd, &tmp ) ) ) - { - auto ret = wcstombs( buf, tmp, 256 ); - CloseHandle( hnd ); - LocalFree( tmp ); - if( ret != static_cast( -1 ) ) - { - return buf; - } - } - } - } -#elif defined __linux__ - int cs, fd; - char path[32]; - snprintf( path, sizeof( path ), "/proc/self/task/%d/comm", id ); - sprintf( buf, "%" PRIu32, id ); -# ifndef __ANDROID__ - pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, &cs ); -# endif - if ( ( fd = open( path, O_RDONLY ) ) > 0) { - int len = read( fd, buf, 255 ); - if( len > 0 ) - { - buf[len] = 0; - if( len > 1 && buf[len-1] == '\n' ) - { - buf[len-1] = 0; - } - } - close( fd ); - } -# ifndef __ANDROID__ - pthread_setcancelstate( cs, 0 ); -# endif - return buf; -#elif defined __QNX__ - static char qnxNameBuf[_NTO_THREAD_NAME_MAX + 1] = {0}; - if (pthread_getname_np(static_cast(id), qnxNameBuf, _NTO_THREAD_NAME_MAX) == 0) { - return qnxNameBuf; - }; -#endif - - sprintf( buf, "%" PRIu32, id ); - return buf; -} - -TRACY_API const char* GetEnvVar( const char* name ) -{ -#if defined _WIN32 - // unfortunately getenv() on Windows is just fundamentally broken. It caches the entire - // environment block once on startup, then never refreshes it again. If any environment - // strings are added or modified after startup of the CRT, those changes will not be - // seen by getenv(). This removes the possibility of an app using this SDK from - // programmatically setting any of the behaviour controlling envvars here. - // - // To work around this, we'll instead go directly to the Win32 environment strings APIs - // to get the current value. - static char buffer[1024]; - DWORD const kBufferSize = DWORD(sizeof(buffer) / sizeof(buffer[0])); - DWORD count = GetEnvironmentVariableA(name, buffer, kBufferSize); - - if( count == 0 ) - return nullptr; - - if( count >= kBufferSize ) - { - char* buf = reinterpret_cast(_alloca(count + 1)); - count = GetEnvironmentVariableA(name, buf, count + 1); - memcpy(buffer, buf, kBufferSize); - buffer[kBufferSize - 1] = 0; - } - - return buffer; -#else - return getenv(name); -#endif -} - -} - -#ifdef __cplusplus -extern "C" { -#endif - -TRACY_API void ___tracy_set_thread_name( const char* name ) { tracy::SetThreadName( name ); } - -#ifdef __cplusplus -} -#endif diff --git a/src/third_party/tracy/common/TracySystem.hpp b/src/third_party/tracy/common/TracySystem.hpp deleted file mode 100644 index 2f565e9a..00000000 --- a/src/third_party/tracy/common/TracySystem.hpp +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef __TRACYSYSTEM_HPP__ -#define __TRACYSYSTEM_HPP__ - -#include - -#include "TracyApi.h" - -namespace tracy -{ - -namespace detail -{ -TRACY_API uint32_t GetThreadHandleImpl(); -} - -#ifdef TRACY_ENABLE -struct ThreadNameData -{ - uint32_t id; - int32_t groupHint; - const char* name; - ThreadNameData* next; -}; - -ThreadNameData* GetThreadNameData( uint32_t id ); - -TRACY_API uint32_t GetThreadHandle(); -#else -static inline uint32_t GetThreadHandle() -{ - return detail::GetThreadHandleImpl(); -} -#endif - -TRACY_API void SetThreadName( const char* name ); -TRACY_API void SetThreadNameWithHint( const char* name, int32_t groupHint ); -TRACY_API const char* GetThreadName( uint32_t id ); - -TRACY_API const char* GetEnvVar( const char* name ); - -} - -#endif diff --git a/src/third_party/tracy/common/TracyUwp.hpp b/src/third_party/tracy/common/TracyUwp.hpp deleted file mode 100644 index 7dce96b9..00000000 --- a/src/third_party/tracy/common/TracyUwp.hpp +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef __TRACYUWP_HPP__ -#define __TRACYUWP_HPP__ - -#ifdef _WIN32 -# include -# if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) -# define TRACY_UWP -# endif -#endif - -#endif diff --git a/src/third_party/tracy/common/TracyVersion.hpp b/src/third_party/tracy/common/TracyVersion.hpp deleted file mode 100644 index 12642d65..00000000 --- a/src/third_party/tracy/common/TracyVersion.hpp +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef __TRACYVERSION_HPP__ -#define __TRACYVERSION_HPP__ - -namespace tracy -{ -namespace Version -{ -enum { Major = 0 }; -enum { Minor = 11 }; -enum { Patch = 2 }; -} -} - -#endif diff --git a/src/third_party/tracy/common/TracyYield.hpp b/src/third_party/tracy/common/TracyYield.hpp deleted file mode 100644 index 035836cd..00000000 --- a/src/third_party/tracy/common/TracyYield.hpp +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef __TRACYYIELD_HPP__ -#define __TRACYYIELD_HPP__ - -#if defined __SSE2__ || defined _M_AMD64 || (defined _M_IX86_FP && _M_IX86_FP == 2) -# include -#else -# include -#endif - -#include "TracyForceInline.hpp" - -namespace tracy -{ - -static tracy_force_inline void YieldThread() -{ -#if defined __SSE2__ || defined _M_AMD64 || (defined _M_IX86_FP && _M_IX86_FP == 2) - _mm_pause(); -#elif defined __aarch64__ - asm volatile( "isb" : : ); -#else - std::this_thread::yield(); -#endif -} - -} - -#endif diff --git a/src/third_party/tracy/common/tracy_lz4.cpp b/src/third_party/tracy/common/tracy_lz4.cpp deleted file mode 100644 index 15d0990f..00000000 --- a/src/third_party/tracy/common/tracy_lz4.cpp +++ /dev/null @@ -1,2720 +0,0 @@ -/* - LZ4 - Fast LZ compression algorithm - Copyright (C) 2011-2020, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - LZ4 homepage : http://www.lz4.org - - LZ4 source repository : https://github.com/lz4/lz4 -*/ - -/*-************************************ -* Tuning parameters -**************************************/ -/* - * LZ4_HEAPMODE : - * Select how default compression functions will allocate memory for their hash table, - * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()). - */ -#ifndef LZ4_HEAPMODE -# define LZ4_HEAPMODE 0 -#endif - -/* - * LZ4_ACCELERATION_DEFAULT : - * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0 - */ -#define LZ4_ACCELERATION_DEFAULT 1 -/* - * LZ4_ACCELERATION_MAX : - * Any "acceleration" value higher than this threshold - * get treated as LZ4_ACCELERATION_MAX instead (fix #876) - */ -#define LZ4_ACCELERATION_MAX 65537 - - -/*-************************************ -* CPU Feature Detection -**************************************/ -/* LZ4_FORCE_MEMORY_ACCESS - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. - * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets which assembly generation depends on alignment. - * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) - */ -#ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */ -# if defined(__GNUC__) && \ - ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \ - || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) -# define LZ4_FORCE_MEMORY_ACCESS 2 -# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) -# define LZ4_FORCE_MEMORY_ACCESS 1 -# endif -#endif - -/* - * LZ4_FORCE_SW_BITCOUNT - * Define this parameter if your target system or compiler does not support hardware bit count - */ -#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for WinCE doesn't support Hardware bit count */ -# undef LZ4_FORCE_SW_BITCOUNT /* avoid double def */ -# define LZ4_FORCE_SW_BITCOUNT -#endif - - - -/*-************************************ -* Dependency -**************************************/ -/* - * LZ4_SRC_INCLUDED: - * Amalgamation flag, whether lz4.c is included - */ -#ifndef LZ4_SRC_INCLUDED -# define LZ4_SRC_INCLUDED 1 -#endif - -#ifndef LZ4_STATIC_LINKING_ONLY -#define LZ4_STATIC_LINKING_ONLY -#endif - -#ifndef LZ4_DISABLE_DEPRECATE_WARNINGS -#define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */ -#endif - -#define LZ4_STATIC_LINKING_ONLY /* LZ4_DISTANCE_MAX */ -#include "tracy_lz4.hpp" -/* see also "memory routines" below */ - - -/*-************************************ -* Compiler Options -**************************************/ -#if defined(_MSC_VER) && (_MSC_VER >= 1400) /* Visual Studio 2005+ */ -# include /* only present in VS2005+ */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# pragma warning(disable : 6237) /* disable: C6237: conditional expression is always 0 */ -#endif /* _MSC_VER */ - -#ifndef LZ4_FORCE_INLINE -# if defined (_MSC_VER) && !defined (__clang__) /* MSVC */ -# define LZ4_FORCE_INLINE static __forceinline -# else -# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# if defined (__GNUC__) || defined (__clang__) -# define LZ4_FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define LZ4_FORCE_INLINE static inline -# endif -# else -# define LZ4_FORCE_INLINE static -# endif /* __STDC_VERSION__ */ -# endif /* _MSC_VER */ -#endif /* LZ4_FORCE_INLINE */ - -/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE - * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8, - * together with a simple 8-byte copy loop as a fall-back path. - * However, this optimization hurts the decompression speed by >30%, - * because the execution does not go to the optimized loop - * for typical compressible data, and all of the preamble checks - * before going to the fall-back path become useless overhead. - * This optimization happens only with the -O3 flag, and -O2 generates - * a simple 8-byte copy loop. - * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8 - * functions are annotated with __attribute__((optimize("O2"))), - * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute - * of LZ4_wildCopy8 does not affect the compression speed. - */ -#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__) -# define LZ4_FORCE_O2 __attribute__((optimize("O2"))) -# undef LZ4_FORCE_INLINE -# define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline)) -#else -# define LZ4_FORCE_O2 -#endif - -#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__) -# define expect(expr,value) (__builtin_expect ((expr),(value)) ) -#else -# define expect(expr,value) (expr) -#endif - -#ifndef likely -#define likely(expr) expect((expr) != 0, 1) -#endif -#ifndef unlikely -#define unlikely(expr) expect((expr) != 0, 0) -#endif - -/* Should the alignment test prove unreliable, for some reason, - * it can be disabled by setting LZ4_ALIGN_TEST to 0 */ -#ifndef LZ4_ALIGN_TEST /* can be externally provided */ -# define LZ4_ALIGN_TEST 1 -#endif - - -/*-************************************ -* Memory routines -**************************************/ - -/*! LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION : - * Disable relatively high-level LZ4/HC functions that use dynamic memory - * allocation functions (malloc(), calloc(), free()). - * - * Note that this is a compile-time switch. And since it disables - * public/stable LZ4 v1 API functions, we don't recommend using this - * symbol to generate a library for distribution. - * - * The following public functions are removed when this symbol is defined. - * - lz4 : LZ4_createStream, LZ4_freeStream, - * LZ4_createStreamDecode, LZ4_freeStreamDecode, LZ4_create (deprecated) - * - lz4hc : LZ4_createStreamHC, LZ4_freeStreamHC, - * LZ4_createHC (deprecated), LZ4_freeHC (deprecated) - * - lz4frame, lz4file : All LZ4F_* functions - */ -#if defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) -# define ALLOC(s) lz4_error_memory_allocation_is_disabled -# define ALLOC_AND_ZERO(s) lz4_error_memory_allocation_is_disabled -# define FREEMEM(p) lz4_error_memory_allocation_is_disabled -#elif defined(LZ4_USER_MEMORY_FUNCTIONS) -/* memory management functions can be customized by user project. - * Below functions must exist somewhere in the Project - * and be available at link time */ -void* LZ4_malloc(size_t s); -void* LZ4_calloc(size_t n, size_t s); -void LZ4_free(void* p); -# define ALLOC(s) LZ4_malloc(s) -# define ALLOC_AND_ZERO(s) LZ4_calloc(1,s) -# define FREEMEM(p) LZ4_free(p) -#else -# include /* malloc, calloc, free */ -# define ALLOC(s) malloc(s) -# define ALLOC_AND_ZERO(s) calloc(1,s) -# define FREEMEM(p) free(p) -#endif - -#if ! LZ4_FREESTANDING -# include /* memset, memcpy */ -#endif -#if !defined(LZ4_memset) -# define LZ4_memset(p,v,s) memset((p),(v),(s)) -#endif -#define MEM_INIT(p,v,s) LZ4_memset((p),(v),(s)) - - -/*-************************************ -* Common Constants -**************************************/ -#define MINMATCH 4 - -#define WILDCOPYLENGTH 8 -#define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ -#define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ -#define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */ -#define FASTLOOP_SAFE_DISTANCE 64 -static const int LZ4_minLength = (MFLIMIT+1); - -#define KB *(1 <<10) -#define MB *(1 <<20) -#define GB *(1U<<30) - -#define LZ4_DISTANCE_ABSOLUTE_MAX 65535 -#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */ -# error "LZ4_DISTANCE_MAX is too big : must be <= 65535" -#endif - -#define ML_BITS 4 -#define ML_MASK ((1U<=1) -# include -#else -# ifndef assert -# define assert(condition) ((void)0) -# endif -#endif - -#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use after variable declarations */ - -#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) -# include - static int g_debuglog_enable = 1; -# define DEBUGLOG(l, ...) { \ - if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \ - fprintf(stderr, __FILE__ ": "); \ - fprintf(stderr, __VA_ARGS__); \ - fprintf(stderr, " \n"); \ - } } -#else -# define DEBUGLOG(l, ...) {} /* disabled */ -#endif - -static int LZ4_isAligned(const void* ptr, size_t alignment) -{ - return ((size_t)ptr & (alignment -1)) == 0; -} - - -/*-************************************ -* Types -**************************************/ -#include -#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; - typedef uintptr_t uptrval; -#else -# if UINT_MAX != 4294967295UL -# error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4" -# endif - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef unsigned int U32; - typedef signed int S32; - typedef unsigned long long U64; - typedef size_t uptrval; /* generally true, except OpenVMS-64 */ -#endif - -#if defined(__x86_64__) - typedef U64 reg_t; /* 64-bits in x32 mode */ -#else - typedef size_t reg_t; /* 32-bits in x32 mode */ -#endif - -typedef enum { - notLimited = 0, - limitedOutput = 1, - fillOutput = 2 -} limitedOutput_directive; - -namespace tracy -{ - -/*-************************************ -* Reading and writing into memory -**************************************/ - -/** - * LZ4 relies on memcpy with a constant size being inlined. In freestanding - * environments, the compiler can't assume the implementation of memcpy() is - * standard compliant, so it can't apply its specialized memcpy() inlining - * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze - * memcpy() as if it were standard compliant, so it can inline it in freestanding - * environments. This is needed when decompressing the Linux Kernel, for example. - */ -#if !defined(LZ4_memcpy) -# if defined(__GNUC__) && (__GNUC__ >= 4) -# define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) -# else -# define LZ4_memcpy(dst, src, size) memcpy(dst, src, size) -# endif -#endif - -#if !defined(LZ4_memmove) -# if defined(__GNUC__) && (__GNUC__ >= 4) -# define LZ4_memmove __builtin_memmove -# else -# define LZ4_memmove memmove -# endif -#endif - -static unsigned LZ4_isLittleEndian(void) -{ - const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ - return one.c[0]; -} - - -#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2) -/* lie to the compiler about data alignment; use with caution */ - -static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; } -static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; } -static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; } - -static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } -static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } - -#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1) - -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) LZ4_unalign; - -static U16 LZ4_read16(const void* ptr) { return ((const LZ4_unalign*)ptr)->u16; } -static U32 LZ4_read32(const void* ptr) { return ((const LZ4_unalign*)ptr)->u32; } -static reg_t LZ4_read_ARCH(const void* ptr) { return ((const LZ4_unalign*)ptr)->uArch; } - -static void LZ4_write16(void* memPtr, U16 value) { ((LZ4_unalign*)memPtr)->u16 = value; } -static void LZ4_write32(void* memPtr, U32 value) { ((LZ4_unalign*)memPtr)->u32 = value; } - -#else /* safe and portable access using memcpy() */ - -static U16 LZ4_read16(const void* memPtr) -{ - U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; -} - -static U32 LZ4_read32(const void* memPtr) -{ - U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; -} - -static reg_t LZ4_read_ARCH(const void* memPtr) -{ - reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; -} - -static void LZ4_write16(void* memPtr, U16 value) -{ - LZ4_memcpy(memPtr, &value, sizeof(value)); -} - -static void LZ4_write32(void* memPtr, U32 value) -{ - LZ4_memcpy(memPtr, &value, sizeof(value)); -} - -#endif /* LZ4_FORCE_MEMORY_ACCESS */ - - -static U16 LZ4_readLE16(const void* memPtr) -{ - if (LZ4_isLittleEndian()) { - return LZ4_read16(memPtr); - } else { - const BYTE* p = (const BYTE*)memPtr; - return (U16)((U16)p[0] + (p[1]<<8)); - } -} - -static void LZ4_writeLE16(void* memPtr, U16 value) -{ - if (LZ4_isLittleEndian()) { - LZ4_write16(memPtr, value); - } else { - BYTE* p = (BYTE*)memPtr; - p[0] = (BYTE) value; - p[1] = (BYTE)(value>>8); - } -} - -/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */ -LZ4_FORCE_INLINE -void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd) -{ - BYTE* d = (BYTE*)dstPtr; - const BYTE* s = (const BYTE*)srcPtr; - BYTE* const e = (BYTE*)dstEnd; - - do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d= 16. */ -LZ4_FORCE_INLINE void -LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd) -{ - BYTE* d = (BYTE*)dstPtr; - const BYTE* s = (const BYTE*)srcPtr; - BYTE* const e = (BYTE*)dstEnd; - - do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d= dstPtr + MINMATCH - * - there is at least 8 bytes available to write after dstEnd */ -LZ4_FORCE_INLINE void -LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) -{ - BYTE v[8]; - - assert(dstEnd >= dstPtr + MINMATCH); - - switch(offset) { - case 1: - MEM_INIT(v, *srcPtr, 8); - break; - case 2: - LZ4_memcpy(v, srcPtr, 2); - LZ4_memcpy(&v[2], srcPtr, 2); -#if defined(_MSC_VER) && (_MSC_VER <= 1933) /* MSVC 2022 ver 17.3 or earlier */ -# pragma warning(push) -# pragma warning(disable : 6385) /* warning C6385: Reading invalid data from 'v'. */ -#endif - LZ4_memcpy(&v[4], v, 4); -#if defined(_MSC_VER) && (_MSC_VER <= 1933) /* MSVC 2022 ver 17.3 or earlier */ -# pragma warning(pop) -#endif - break; - case 4: - LZ4_memcpy(v, srcPtr, 4); - LZ4_memcpy(&v[4], srcPtr, 4); - break; - default: - LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset); - return; - } - - LZ4_memcpy(dstPtr, v, 8); - dstPtr += 8; - while (dstPtr < dstEnd) { - LZ4_memcpy(dstPtr, v, 8); - dstPtr += 8; - } -} -#endif - - -/*-************************************ -* Common functions -**************************************/ -LZ4_FORCE_INLINE unsigned LZ4_NbCommonBytes (reg_t val) -{ - assert(val != 0); - if (LZ4_isLittleEndian()) { - if (sizeof(val) == 8) { -# if defined(_MSC_VER) && (_MSC_VER >= 1800) && (defined(_M_AMD64) && !defined(_M_ARM64EC)) && !defined(LZ4_FORCE_SW_BITCOUNT) -/*-************************************************************************************************* -* ARM64EC is a Microsoft-designed ARM64 ABI compatible with AMD64 applications on ARM64 Windows 11. -* The ARM64EC ABI does not support AVX/AVX2/AVX512 instructions, nor their relevant intrinsics -* including _tzcnt_u64. Therefore, we need to neuter the _tzcnt_u64 code path for ARM64EC. -****************************************************************************************************/ -# if defined(__clang__) && (__clang_major__ < 10) - /* Avoid undefined clang-cl intrinsics issue. - * See https://github.com/lz4/lz4/pull/1017 for details. */ - return (unsigned)__builtin_ia32_tzcnt_u64(val) >> 3; -# else - /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */ - return (unsigned)_tzcnt_u64(val) >> 3; -# endif -# elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanForward64(&r, (U64)val); - return (unsigned)r >> 3; -# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ - ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ - !defined(LZ4_FORCE_SW_BITCOUNT) - return (unsigned)__builtin_ctzll((U64)val) >> 3; -# else - const U64 m = 0x0101010101010101ULL; - val ^= val - 1; - return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56); -# endif - } else /* 32 bits */ { -# if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r; - _BitScanForward(&r, (U32)val); - return (unsigned)r >> 3; -# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ - ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ - !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (unsigned)__builtin_ctz((U32)val) >> 3; -# else - const U32 m = 0x01010101; - return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24; -# endif - } - } else /* Big Endian CPU */ { - if (sizeof(val)==8) { -# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ - ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ - !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (unsigned)__builtin_clzll((U64)val) >> 3; -# else -#if 1 - /* this method is probably faster, - * but adds a 128 bytes lookup table */ - static const unsigned char ctz7_tab[128] = { - 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - }; - U64 const mask = 0x0101010101010101ULL; - U64 const t = (((val >> 8) - mask) | val) & mask; - return ctz7_tab[(t * 0x0080402010080402ULL) >> 57]; -#else - /* this method doesn't consume memory space like the previous one, - * but it contains several branches, - * that may end up slowing execution */ - static const U32 by32 = sizeof(val)*4; /* 32 on 64 bits (goal), 16 on 32 bits. - Just to avoid some static analyzer complaining about shift by 32 on 32-bits target. - Note that this code path is never triggered in 32-bits mode. */ - unsigned r; - if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; -#endif -# endif - } else /* 32 bits */ { -# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ - ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ - !defined(LZ4_FORCE_SW_BITCOUNT) - return (unsigned)__builtin_clz((U32)val) >> 3; -# else - val >>= 8; - val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) | - (val + 0x00FF0000)) >> 24; - return (unsigned)val ^ 3; -# endif - } - } -} - - -#define STEPSIZE sizeof(reg_t) -LZ4_FORCE_INLINE -unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) -{ - const BYTE* const pStart = pIn; - - if (likely(pIn < pInLimit-(STEPSIZE-1))) { - reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); - if (!diff) { - pIn+=STEPSIZE; pMatch+=STEPSIZE; - } else { - return LZ4_NbCommonBytes(diff); - } } - - while (likely(pIn < pInLimit-(STEPSIZE-1))) { - reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); - if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; } - pIn += LZ4_NbCommonBytes(diff); - return (unsigned)(pIn - pStart); - } - - if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; } - if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; } - if ((pIn compression run slower on incompressible data */ - - -/*-************************************ -* Local Structures and types -**************************************/ -typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t; - -/** - * This enum distinguishes several different modes of accessing previous - * content in the stream. - * - * - noDict : There is no preceding content. - * - withPrefix64k : Table entries up to ctx->dictSize before the current blob - * blob being compressed are valid and refer to the preceding - * content (of length ctx->dictSize), which is available - * contiguously preceding in memory the content currently - * being compressed. - * - usingExtDict : Like withPrefix64k, but the preceding content is somewhere - * else in memory, starting at ctx->dictionary with length - * ctx->dictSize. - * - usingDictCtx : Everything concerning the preceding content is - * in a separate context, pointed to by ctx->dictCtx. - * ctx->dictionary, ctx->dictSize, and table entries - * in the current context that refer to positions - * preceding the beginning of the current compression are - * ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx - * ->dictSize describe the location and size of the preceding - * content, and matches are found by looking in the ctx - * ->dictCtx->hashTable. - */ -typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive; -typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; - - -/*-************************************ -* Local Utils -**************************************/ -int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; } -const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; } -int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); } -int LZ4_sizeofState(void) { return sizeof(LZ4_stream_t); } - - -/*-**************************************** -* Internal Definitions, used only in Tests -*******************************************/ - -int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize); - -int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, - int compressedSize, int maxOutputSize, - const void* dictStart, size_t dictSize); -int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest, - int compressedSize, int targetOutputSize, int dstCapacity, - const void* dictStart, size_t dictSize); - -/*-****************************** -* Compression functions -********************************/ -LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType) -{ - if (tableType == byU16) - return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); - else - return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); -} - -LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType) -{ - const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG; - if (LZ4_isLittleEndian()) { - const U64 prime5bytes = 889523592379ULL; - return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog)); - } else { - const U64 prime8bytes = 11400714785074694791ULL; - return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog)); - } -} - -LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType) -{ - if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType); - return LZ4_hash4(LZ4_read32(p), tableType); -} - -LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType) -{ - switch (tableType) - { - default: /* fallthrough */ - case clearedTable: { /* illegal! */ assert(0); return; } - case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; } - case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; } - case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; } - } -} - -LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType) -{ - switch (tableType) - { - default: /* fallthrough */ - case clearedTable: /* fallthrough */ - case byPtr: { /* illegal! */ assert(0); return; } - case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; } - case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; } - } -} - -LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h, - void* tableBase, tableType_t const tableType, - const BYTE* srcBase) -{ - switch (tableType) - { - case clearedTable: { /* illegal! */ assert(0); return; } - case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; } - case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; } - case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; } - } -} - -LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase) -{ - U32 const h = LZ4_hashPosition(p, tableType); - LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase); -} - -/* LZ4_getIndexOnHash() : - * Index of match position registered in hash table. - * hash position must be calculated by using base+index, or dictBase+index. - * Assumption 1 : only valid if tableType == byU32 or byU16. - * Assumption 2 : h is presumed valid (within limits of hash table) - */ -LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType) -{ - LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2); - if (tableType == byU32) { - const U32* const hashTable = (const U32*) tableBase; - assert(h < (1U << (LZ4_MEMORY_USAGE-2))); - return hashTable[h]; - } - if (tableType == byU16) { - const U16* const hashTable = (const U16*) tableBase; - assert(h < (1U << (LZ4_MEMORY_USAGE-1))); - return hashTable[h]; - } - assert(0); return 0; /* forbidden case */ -} - -static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase) -{ - if (tableType == byPtr) { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; } - if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; return hashTable[h] + srcBase; } - { const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; } /* default, to ensure a return */ -} - -LZ4_FORCE_INLINE const BYTE* -LZ4_getPosition(const BYTE* p, - const void* tableBase, tableType_t tableType, - const BYTE* srcBase) -{ - U32 const h = LZ4_hashPosition(p, tableType); - return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase); -} - -LZ4_FORCE_INLINE void -LZ4_prepareTable(LZ4_stream_t_internal* const cctx, - const int inputSize, - const tableType_t tableType) { - /* If the table hasn't been used, it's guaranteed to be zeroed out, and is - * therefore safe to use no matter what mode we're in. Otherwise, we figure - * out if it's safe to leave as is or whether it needs to be reset. - */ - if ((tableType_t)cctx->tableType != clearedTable) { - assert(inputSize >= 0); - if ((tableType_t)cctx->tableType != tableType - || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU) - || ((tableType == byU32) && cctx->currentOffset > 1 GB) - || tableType == byPtr - || inputSize >= 4 KB) - { - DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx); - MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE); - cctx->currentOffset = 0; - cctx->tableType = (U32)clearedTable; - } else { - DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)"); - } - } - - /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back, - * is faster than compressing without a gap. - * However, compressing with currentOffset == 0 is faster still, - * so we preserve that case. - */ - if (cctx->currentOffset != 0 && tableType == byU32) { - DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset"); - cctx->currentOffset += 64 KB; - } - - /* Finally, clear history */ - cctx->dictCtx = NULL; - cctx->dictionary = NULL; - cctx->dictSize = 0; -} - -/** LZ4_compress_generic() : - * inlined, to ensure branches are decided at compilation time. - * Presumed already validated at this stage: - * - source != NULL - * - inputSize > 0 - */ -LZ4_FORCE_INLINE int LZ4_compress_generic_validated( - LZ4_stream_t_internal* const cctx, - const char* const source, - char* const dest, - const int inputSize, - int* inputConsumed, /* only written when outputDirective == fillOutput */ - const int maxOutputSize, - const limitedOutput_directive outputDirective, - const tableType_t tableType, - const dict_directive dictDirective, - const dictIssue_directive dictIssue, - const int acceleration) -{ - int result; - const BYTE* ip = (const BYTE*) source; - - U32 const startIndex = cctx->currentOffset; - const BYTE* base = (const BYTE*) source - startIndex; - const BYTE* lowLimit; - - const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx; - const BYTE* const dictionary = - dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary; - const U32 dictSize = - dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize; - const U32 dictDelta = (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0; /* make indexes in dictCtx comparable with index in current context */ - - int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx); - U32 const prefixIdxLimit = startIndex - dictSize; /* used when dictDirective == dictSmall */ - const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary; - const BYTE* anchor = (const BYTE*) source; - const BYTE* const iend = ip + inputSize; - const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1; - const BYTE* const matchlimit = iend - LASTLITERALS; - - /* the dictCtx currentOffset is indexed on the start of the dictionary, - * while a dictionary in the current context precedes the currentOffset */ - const BYTE* dictBase = (dictionary == NULL) ? NULL : - (dictDirective == usingDictCtx) ? - dictionary + dictSize - dictCtx->currentOffset : - dictionary + dictSize - startIndex; - - BYTE* op = (BYTE*) dest; - BYTE* const olimit = op + maxOutputSize; - - U32 offset = 0; - U32 forwardH; - - DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType); - assert(ip != NULL); - /* If init conditions are not met, we don't have to mark stream - * as having dirty context, since no action was taken yet */ - if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */ - if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; } /* Size too large (not within 64K limit) */ - if (tableType==byPtr) assert(dictDirective==noDict); /* only supported use case with byPtr */ - assert(acceleration >= 1); - - lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0); - - /* Update context state */ - if (dictDirective == usingDictCtx) { - /* Subsequent linked blocks can't use the dictionary. */ - /* Instead, they use the block we just compressed. */ - cctx->dictCtx = NULL; - cctx->dictSize = (U32)inputSize; - } else { - cctx->dictSize += (U32)inputSize; - } - cctx->currentOffset += (U32)inputSize; - cctx->tableType = (U32)tableType; - - if (inputSizehashTable, tableType, base); - ip++; forwardH = LZ4_hashPosition(ip, tableType); - - /* Main Loop */ - for ( ; ; ) { - const BYTE* match; - BYTE* token; - const BYTE* filledIp; - - /* Find a match */ - if (tableType == byPtr) { - const BYTE* forwardIp = ip; - int step = 1; - int searchMatchNb = acceleration << LZ4_skipTrigger; - do { - U32 const h = forwardH; - ip = forwardIp; - forwardIp += step; - step = (searchMatchNb++ >> LZ4_skipTrigger); - - if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals; - assert(ip < mflimitPlusOne); - - match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base); - forwardH = LZ4_hashPosition(forwardIp, tableType); - LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base); - - } while ( (match+LZ4_DISTANCE_MAX < ip) - || (LZ4_read32(match) != LZ4_read32(ip)) ); - - } else { /* byU32, byU16 */ - - const BYTE* forwardIp = ip; - int step = 1; - int searchMatchNb = acceleration << LZ4_skipTrigger; - do { - U32 const h = forwardH; - U32 const current = (U32)(forwardIp - base); - U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType); - assert(matchIndex <= current); - assert(forwardIp - base < (ptrdiff_t)(2 GB - 1)); - ip = forwardIp; - forwardIp += step; - step = (searchMatchNb++ >> LZ4_skipTrigger); - - if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals; - assert(ip < mflimitPlusOne); - - if (dictDirective == usingDictCtx) { - if (matchIndex < startIndex) { - /* there was no match, try the dictionary */ - assert(tableType == byU32); - matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32); - match = dictBase + matchIndex; - matchIndex += dictDelta; /* make dictCtx index comparable with current context */ - lowLimit = dictionary; - } else { - match = base + matchIndex; - lowLimit = (const BYTE*)source; - } - } else if (dictDirective == usingExtDict) { - if (matchIndex < startIndex) { - DEBUGLOG(7, "extDict candidate: matchIndex=%5u < startIndex=%5u", matchIndex, startIndex); - assert(startIndex - matchIndex >= MINMATCH); - assert(dictBase); - match = dictBase + matchIndex; - lowLimit = dictionary; - } else { - match = base + matchIndex; - lowLimit = (const BYTE*)source; - } - } else { /* single continuous memory segment */ - match = base + matchIndex; - } - forwardH = LZ4_hashPosition(forwardIp, tableType); - LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType); - - DEBUGLOG(7, "candidate at pos=%u (offset=%u \n", matchIndex, current - matchIndex); - if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; } /* match outside of valid area */ - assert(matchIndex < current); - if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX)) - && (matchIndex+LZ4_DISTANCE_MAX < current)) { - continue; - } /* too far */ - assert((current - matchIndex) <= LZ4_DISTANCE_MAX); /* match now expected within distance */ - - if (LZ4_read32(match) == LZ4_read32(ip)) { - if (maybe_extMem) offset = current - matchIndex; - break; /* match found */ - } - - } while(1); - } - - /* Catch up */ - filledIp = ip; - while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; } - - /* Encode Literals */ - { unsigned const litLength = (unsigned)(ip - anchor); - token = op++; - if ((outputDirective == limitedOutput) && /* Check output buffer overflow */ - (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) { - return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ - } - if ((outputDirective == fillOutput) && - (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) { - op--; - goto _last_literals; - } - if (litLength >= RUN_MASK) { - int len = (int)(litLength - RUN_MASK); - *token = (RUN_MASK<= 255 ; len-=255) *op++ = 255; - *op++ = (BYTE)len; - } - else *token = (BYTE)(litLength< olimit)) { - /* the match was too close to the end, rewind and go to last literals */ - op = token; - goto _last_literals; - } - - /* Encode Offset */ - if (maybe_extMem) { /* static test */ - DEBUGLOG(6, " with offset=%u (ext if > %i)", offset, (int)(ip - (const BYTE*)source)); - assert(offset <= LZ4_DISTANCE_MAX && offset > 0); - LZ4_writeLE16(op, (U16)offset); op+=2; - } else { - DEBUGLOG(6, " with offset=%u (same segment)", (U32)(ip - match)); - assert(ip-match <= LZ4_DISTANCE_MAX); - LZ4_writeLE16(op, (U16)(ip - match)); op+=2; - } - - /* Encode MatchLength */ - { unsigned matchCode; - - if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx) - && (lowLimit==dictionary) /* match within extDict */ ) { - const BYTE* limit = ip + (dictEnd-match); - assert(dictEnd > match); - if (limit > matchlimit) limit = matchlimit; - matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit); - ip += (size_t)matchCode + MINMATCH; - if (ip==limit) { - unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit); - matchCode += more; - ip += more; - } - DEBUGLOG(6, " with matchLength=%u starting in extDict", matchCode+MINMATCH); - } else { - matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit); - ip += (size_t)matchCode + MINMATCH; - DEBUGLOG(6, " with matchLength=%u", matchCode+MINMATCH); - } - - if ((outputDirective) && /* Check output buffer overflow */ - (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) { - if (outputDirective == fillOutput) { - /* Match description too long : reduce it */ - U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255; - ip -= matchCode - newMatchCode; - assert(newMatchCode < matchCode); - matchCode = newMatchCode; - if (unlikely(ip <= filledIp)) { - /* We have already filled up to filledIp so if ip ends up less than filledIp - * we have positions in the hash table beyond the current position. This is - * a problem if we reuse the hash table. So we have to remove these positions - * from the hash table. - */ - const BYTE* ptr; - DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip)); - for (ptr = ip; ptr <= filledIp; ++ptr) { - U32 const h = LZ4_hashPosition(ptr, tableType); - LZ4_clearHash(h, cctx->hashTable, tableType); - } - } - } else { - assert(outputDirective == limitedOutput); - return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ - } - } - if (matchCode >= ML_MASK) { - *token += ML_MASK; - matchCode -= ML_MASK; - LZ4_write32(op, 0xFFFFFFFF); - while (matchCode >= 4*255) { - op+=4; - LZ4_write32(op, 0xFFFFFFFF); - matchCode -= 4*255; - } - op += matchCode / 255; - *op++ = (BYTE)(matchCode % 255); - } else - *token += (BYTE)(matchCode); - } - /* Ensure we have enough space for the last literals. */ - assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit)); - - anchor = ip; - - /* Test end of chunk */ - if (ip >= mflimitPlusOne) break; - - /* Fill table */ - LZ4_putPosition(ip-2, cctx->hashTable, tableType, base); - - /* Test next position */ - if (tableType == byPtr) { - - match = LZ4_getPosition(ip, cctx->hashTable, tableType, base); - LZ4_putPosition(ip, cctx->hashTable, tableType, base); - if ( (match+LZ4_DISTANCE_MAX >= ip) - && (LZ4_read32(match) == LZ4_read32(ip)) ) - { token=op++; *token=0; goto _next_match; } - - } else { /* byU32, byU16 */ - - U32 const h = LZ4_hashPosition(ip, tableType); - U32 const current = (U32)(ip-base); - U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType); - assert(matchIndex < current); - if (dictDirective == usingDictCtx) { - if (matchIndex < startIndex) { - /* there was no match, try the dictionary */ - matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32); - match = dictBase + matchIndex; - lowLimit = dictionary; /* required for match length counter */ - matchIndex += dictDelta; - } else { - match = base + matchIndex; - lowLimit = (const BYTE*)source; /* required for match length counter */ - } - } else if (dictDirective==usingExtDict) { - if (matchIndex < startIndex) { - assert(dictBase); - match = dictBase + matchIndex; - lowLimit = dictionary; /* required for match length counter */ - } else { - match = base + matchIndex; - lowLimit = (const BYTE*)source; /* required for match length counter */ - } - } else { /* single memory segment */ - match = base + matchIndex; - } - LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType); - assert(matchIndex < current); - if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1) - && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current)) - && (LZ4_read32(match) == LZ4_read32(ip)) ) { - token=op++; - *token=0; - if (maybe_extMem) offset = current - matchIndex; - DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i", - (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source)); - goto _next_match; - } - } - - /* Prepare next loop */ - forwardH = LZ4_hashPosition(++ip, tableType); - - } - -_last_literals: - /* Encode Last Literals */ - { size_t lastRun = (size_t)(iend - anchor); - if ( (outputDirective) && /* Check output buffer overflow */ - (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) { - if (outputDirective == fillOutput) { - /* adapt lastRun to fill 'dst' */ - assert(olimit >= op); - lastRun = (size_t)(olimit-op) - 1/*token*/; - lastRun -= (lastRun + 256 - RUN_MASK) / 256; /*additional length tokens*/ - } else { - assert(outputDirective == limitedOutput); - return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ - } - } - DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun); - if (lastRun >= RUN_MASK) { - size_t accumulator = lastRun - RUN_MASK; - *op++ = RUN_MASK << ML_BITS; - for(; accumulator >= 255 ; accumulator-=255) *op++ = 255; - *op++ = (BYTE) accumulator; - } else { - *op++ = (BYTE)(lastRun< 0); - DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result); - return result; -} - -/** LZ4_compress_generic() : - * inlined, to ensure branches are decided at compilation time; - * takes care of src == (NULL, 0) - * and forward the rest to LZ4_compress_generic_validated */ -LZ4_FORCE_INLINE int LZ4_compress_generic( - LZ4_stream_t_internal* const cctx, - const char* const src, - char* const dst, - const int srcSize, - int *inputConsumed, /* only written when outputDirective == fillOutput */ - const int dstCapacity, - const limitedOutput_directive outputDirective, - const tableType_t tableType, - const dict_directive dictDirective, - const dictIssue_directive dictIssue, - const int acceleration) -{ - DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i", - srcSize, dstCapacity); - - if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; } /* Unsupported srcSize, too large (or negative) */ - if (srcSize == 0) { /* src == NULL supported if srcSize == 0 */ - if (outputDirective != notLimited && dstCapacity <= 0) return 0; /* no output, can't write anything */ - DEBUGLOG(5, "Generating an empty block"); - assert(outputDirective == notLimited || dstCapacity >= 1); - assert(dst != NULL); - dst[0] = 0; - if (outputDirective == fillOutput) { - assert (inputConsumed != NULL); - *inputConsumed = 0; - } - return 1; - } - assert(src != NULL); - - return LZ4_compress_generic_validated(cctx, src, dst, srcSize, - inputConsumed, /* only written into if outputDirective == fillOutput */ - dstCapacity, outputDirective, - tableType, dictDirective, dictIssue, acceleration); -} - - -int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) -{ - LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse; - assert(ctx != NULL); - if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; - if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; - if (maxOutputSize >= LZ4_compressBound(inputSize)) { - if (inputSize < LZ4_64Klimit) { - return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration); - } else { - const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32; - return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); - } - } else { - if (inputSize < LZ4_64Klimit) { - return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration); - } else { - const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32; - return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration); - } - } -} - -/** - * LZ4_compress_fast_extState_fastReset() : - * A variant of LZ4_compress_fast_extState(). - * - * Using this variant avoids an expensive initialization step. It is only safe - * to call if the state buffer is known to be correctly initialized already - * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of - * "correctly initialized"). - */ -int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration) -{ - LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse; - if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; - if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; - - if (dstCapacity >= LZ4_compressBound(srcSize)) { - if (srcSize < LZ4_64Klimit) { - const tableType_t tableType = byU16; - LZ4_prepareTable(ctx, srcSize, tableType); - if (ctx->currentOffset) { - return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration); - } else { - return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); - } - } else { - const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; - LZ4_prepareTable(ctx, srcSize, tableType); - return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); - } - } else { - if (srcSize < LZ4_64Klimit) { - const tableType_t tableType = byU16; - LZ4_prepareTable(ctx, srcSize, tableType); - if (ctx->currentOffset) { - return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration); - } else { - return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration); - } - } else { - const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; - LZ4_prepareTable(ctx, srcSize, tableType); - return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration); - } - } -} - - -int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) -{ - int result; -#if (LZ4_HEAPMODE) - LZ4_stream_t* ctxPtr = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ - if (ctxPtr == NULL) return 0; -#else - LZ4_stream_t ctx; - LZ4_stream_t* const ctxPtr = &ctx; -#endif - result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration); - -#if (LZ4_HEAPMODE) - FREEMEM(ctxPtr); -#endif - return result; -} - - -int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputSize) -{ - return LZ4_compress_fast(src, dst, srcSize, maxOutputSize, 1); -} - - -/* Note!: This function leaves the stream in an unclean/broken state! - * It is not safe to subsequently use the same state with a _fastReset() or - * _continue() call without resetting it. */ -static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize) -{ - void* const s = LZ4_initStream(state, sizeof (*state)); - assert(s != NULL); (void)s; - - if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) { /* compression success is guaranteed */ - return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1); - } else { - if (*srcSizePtr < LZ4_64Klimit) { - return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1); - } else { - tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; - return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, 1); - } } -} - - -int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize) -{ -#if (LZ4_HEAPMODE) - LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ - if (ctx == NULL) return 0; -#else - LZ4_stream_t ctxBody; - LZ4_stream_t* ctx = &ctxBody; -#endif - - int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize); - -#if (LZ4_HEAPMODE) - FREEMEM(ctx); -#endif - return result; -} - - - -/*-****************************** -* Streaming functions -********************************/ - -#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) -LZ4_stream_t* LZ4_createStream(void) -{ - LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); - LZ4_STATIC_ASSERT(sizeof(LZ4_stream_t) >= sizeof(LZ4_stream_t_internal)); - DEBUGLOG(4, "LZ4_createStream %p", lz4s); - if (lz4s == NULL) return NULL; - LZ4_initStream(lz4s, sizeof(*lz4s)); - return lz4s; -} -#endif - -static size_t LZ4_stream_t_alignment(void) -{ -#if LZ4_ALIGN_TEST - typedef struct { char c; LZ4_stream_t t; } t_a; - return sizeof(t_a) - sizeof(LZ4_stream_t); -#else - return 1; /* effectively disabled */ -#endif -} - -LZ4_stream_t* LZ4_initStream (void* buffer, size_t size) -{ - DEBUGLOG(5, "LZ4_initStream"); - if (buffer == NULL) { return NULL; } - if (size < sizeof(LZ4_stream_t)) { return NULL; } - if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL; - MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal)); - return (LZ4_stream_t*)buffer; -} - -/* resetStream is now deprecated, - * prefer initStream() which is more general */ -void LZ4_resetStream (LZ4_stream_t* LZ4_stream) -{ - DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream); - MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal)); -} - -void LZ4_resetStream_fast(LZ4_stream_t* ctx) { - LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32); -} - -#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) -int LZ4_freeStream (LZ4_stream_t* LZ4_stream) -{ - if (!LZ4_stream) return 0; /* support free on NULL */ - DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream); - FREEMEM(LZ4_stream); - return (0); -} -#endif - - -#define HASH_UNIT sizeof(reg_t) -int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) -{ - LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse; - const tableType_t tableType = byU32; - const BYTE* p = (const BYTE*)dictionary; - const BYTE* const dictEnd = p + dictSize; - const BYTE* base; - - DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict); - - /* It's necessary to reset the context, - * and not just continue it with prepareTable() - * to avoid any risk of generating overflowing matchIndex - * when compressing using this dictionary */ - LZ4_resetStream(LZ4_dict); - - /* We always increment the offset by 64 KB, since, if the dict is longer, - * we truncate it to the last 64k, and if it's shorter, we still want to - * advance by a whole window length so we can provide the guarantee that - * there are only valid offsets in the window, which allows an optimization - * in LZ4_compress_fast_continue() where it uses noDictIssue even when the - * dictionary isn't a full 64k. */ - dict->currentOffset += 64 KB; - - if (dictSize < (int)HASH_UNIT) { - return 0; - } - - if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB; - base = dictEnd - dict->currentOffset; - dict->dictionary = p; - dict->dictSize = (U32)(dictEnd - p); - dict->tableType = (U32)tableType; - - while (p <= dictEnd-HASH_UNIT) { - LZ4_putPosition(p, dict->hashTable, tableType, base); - p+=3; - } - - return (int)dict->dictSize; -} - -void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream) -{ - const LZ4_stream_t_internal* dictCtx = (dictionaryStream == NULL) ? NULL : - &(dictionaryStream->internal_donotuse); - - DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)", - workingStream, dictionaryStream, - dictCtx != NULL ? dictCtx->dictSize : 0); - - if (dictCtx != NULL) { - /* If the current offset is zero, we will never look in the - * external dictionary context, since there is no value a table - * entry can take that indicate a miss. In that case, we need - * to bump the offset to something non-zero. - */ - if (workingStream->internal_donotuse.currentOffset == 0) { - workingStream->internal_donotuse.currentOffset = 64 KB; - } - - /* Don't actually attach an empty dictionary. - */ - if (dictCtx->dictSize == 0) { - dictCtx = NULL; - } - } - workingStream->internal_donotuse.dictCtx = dictCtx; -} - - -static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize) -{ - assert(nextSize >= 0); - if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) { /* potential ptrdiff_t overflow (32-bits mode) */ - /* rescale hash table */ - U32 const delta = LZ4_dict->currentOffset - 64 KB; - const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize; - int i; - DEBUGLOG(4, "LZ4_renormDictT"); - for (i=0; ihashTable[i] < delta) LZ4_dict->hashTable[i]=0; - else LZ4_dict->hashTable[i] -= delta; - } - LZ4_dict->currentOffset = 64 KB; - if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB; - LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize; - } -} - - -int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, - const char* source, char* dest, - int inputSize, int maxOutputSize, - int acceleration) -{ - const tableType_t tableType = byU32; - LZ4_stream_t_internal* const streamPtr = &LZ4_stream->internal_donotuse; - const char* dictEnd = streamPtr->dictSize ? (const char*)streamPtr->dictionary + streamPtr->dictSize : NULL; - - DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i, dictSize=%u)", inputSize, streamPtr->dictSize); - - LZ4_renormDictT(streamPtr, inputSize); /* fix index overflow */ - if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; - if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; - - /* invalidate tiny dictionaries */ - if ( (streamPtr->dictSize < 4) /* tiny dictionary : not enough for a hash */ - && (dictEnd != source) /* prefix mode */ - && (inputSize > 0) /* tolerance : don't lose history, in case next invocation would use prefix mode */ - && (streamPtr->dictCtx == NULL) /* usingDictCtx */ - ) { - DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary); - /* remove dictionary existence from history, to employ faster prefix mode */ - streamPtr->dictSize = 0; - streamPtr->dictionary = (const BYTE*)source; - dictEnd = source; - } - - /* Check overlapping input/dictionary space */ - { const char* const sourceEnd = source + inputSize; - if ((sourceEnd > (const char*)streamPtr->dictionary) && (sourceEnd < dictEnd)) { - streamPtr->dictSize = (U32)(dictEnd - sourceEnd); - if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB; - if (streamPtr->dictSize < 4) streamPtr->dictSize = 0; - streamPtr->dictionary = (const BYTE*)dictEnd - streamPtr->dictSize; - } - } - - /* prefix mode : source data follows dictionary */ - if (dictEnd == source) { - if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) - return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration); - else - return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration); - } - - /* external dictionary mode */ - { int result; - if (streamPtr->dictCtx) { - /* We depend here on the fact that dictCtx'es (produced by - * LZ4_loadDict) guarantee that their tables contain no references - * to offsets between dictCtx->currentOffset - 64 KB and - * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe - * to use noDictIssue even when the dict isn't a full 64 KB. - */ - if (inputSize > 4 KB) { - /* For compressing large blobs, it is faster to pay the setup - * cost to copy the dictionary's tables into the active context, - * so that the compression loop is only looking into one table. - */ - LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr)); - result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration); - } else { - result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration); - } - } else { /* small data <= 4 KB */ - if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) { - result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration); - } else { - result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration); - } - } - streamPtr->dictionary = (const BYTE*)source; - streamPtr->dictSize = (U32)inputSize; - return result; - } -} - - -/* Hidden debug function, to force-test external dictionary mode */ -int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize) -{ - LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse; - int result; - - LZ4_renormDictT(streamPtr, srcSize); - - if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) { - result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1); - } else { - result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1); - } - - streamPtr->dictionary = (const BYTE*)source; - streamPtr->dictSize = (U32)srcSize; - - return result; -} - - -/*! LZ4_saveDict() : - * If previously compressed data block is not guaranteed to remain available at its memory location, - * save it into a safer place (char* safeBuffer). - * Note : no need to call LZ4_loadDict() afterwards, dictionary is immediately usable, - * one can therefore call LZ4_compress_fast_continue() right after. - * @return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error. - */ -int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize) -{ - LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse; - - DEBUGLOG(5, "LZ4_saveDict : dictSize=%i, safeBuffer=%p", dictSize, safeBuffer); - - if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */ - if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; } - - if (safeBuffer == NULL) assert(dictSize == 0); - if (dictSize > 0) { - const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize; - assert(dict->dictionary); - LZ4_memmove(safeBuffer, previousDictEnd - dictSize, (size_t)dictSize); - } - - dict->dictionary = (const BYTE*)safeBuffer; - dict->dictSize = (U32)dictSize; - - return dictSize; -} - - - -/*-******************************* - * Decompression functions - ********************************/ - -typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive; - -#undef MIN -#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) - - -/* variant for decompress_unsafe() - * does not know end of input - * presumes input is well formed - * note : will consume at least one byte */ -size_t read_long_length_no_check(const BYTE** pp) -{ - size_t b, l = 0; - do { b = **pp; (*pp)++; l += b; } while (b==255); - DEBUGLOG(6, "read_long_length_no_check: +length=%zu using %zu input bytes", l, l/255 + 1) - return l; -} - -/* core decoder variant for LZ4_decompress_fast*() - * for legacy support only : these entry points are deprecated. - * - Presumes input is correctly formed (no defense vs malformed inputs) - * - Does not know input size (presume input buffer is "large enough") - * - Decompress a full block (only) - * @return : nb of bytes read from input. - * Note : this variant is not optimized for speed, just for maintenance. - * the goal is to remove support of decompress_fast*() variants by v2.0 -**/ -LZ4_FORCE_INLINE int -LZ4_decompress_unsafe_generic( - const BYTE* const istart, - BYTE* const ostart, - int decompressedSize, - - size_t prefixSize, - const BYTE* const dictStart, /* only if dict==usingExtDict */ - const size_t dictSize /* note: =0 if dictStart==NULL */ - ) -{ - const BYTE* ip = istart; - BYTE* op = (BYTE*)ostart; - BYTE* const oend = ostart + decompressedSize; - const BYTE* const prefixStart = ostart - prefixSize; - - DEBUGLOG(5, "LZ4_decompress_unsafe_generic"); - if (dictStart == NULL) assert(dictSize == 0); - - while (1) { - /* start new sequence */ - unsigned token = *ip++; - - /* literals */ - { size_t ll = token >> ML_BITS; - if (ll==15) { - /* long literal length */ - ll += read_long_length_no_check(&ip); - } - if ((size_t)(oend-op) < ll) return -1; /* output buffer overflow */ - LZ4_memmove(op, ip, ll); /* support in-place decompression */ - op += ll; - ip += ll; - if ((size_t)(oend-op) < MFLIMIT) { - if (op==oend) break; /* end of block */ - DEBUGLOG(5, "invalid: literals end at distance %zi from end of block", oend-op); - /* incorrect end of block : - * last match must start at least MFLIMIT==12 bytes before end of output block */ - return -1; - } } - - /* match */ - { size_t ml = token & 15; - size_t const offset = LZ4_readLE16(ip); - ip+=2; - - if (ml==15) { - /* long literal length */ - ml += read_long_length_no_check(&ip); - } - ml += MINMATCH; - - if ((size_t)(oend-op) < ml) return -1; /* output buffer overflow */ - - { const BYTE* match = op - offset; - - /* out of range */ - if (offset > (size_t)(op - prefixStart) + dictSize) { - DEBUGLOG(6, "offset out of range"); - return -1; - } - - /* check special case : extDict */ - if (offset > (size_t)(op - prefixStart)) { - /* extDict scenario */ - const BYTE* const dictEnd = dictStart + dictSize; - const BYTE* extMatch = dictEnd - (offset - (size_t)(op-prefixStart)); - size_t const extml = (size_t)(dictEnd - extMatch); - if (extml > ml) { - /* match entirely within extDict */ - LZ4_memmove(op, extMatch, ml); - op += ml; - ml = 0; - } else { - /* match split between extDict & prefix */ - LZ4_memmove(op, extMatch, extml); - op += extml; - ml -= extml; - } - match = prefixStart; - } - - /* match copy - slow variant, supporting overlap copy */ - { size_t u; - for (u=0; u= ipmax before start of loop. Returns initial_error if so. - * @error (output) - error code. Must be set to 0 before call. -**/ -typedef size_t Rvl_t; -static const Rvl_t rvl_error = (Rvl_t)(-1); -LZ4_FORCE_INLINE Rvl_t -read_variable_length(const BYTE** ip, const BYTE* ilimit, - int initial_check) -{ - Rvl_t s, length = 0; - assert(ip != NULL); - assert(*ip != NULL); - assert(ilimit != NULL); - if (initial_check && unlikely((*ip) >= ilimit)) { /* read limit reached */ - return rvl_error; - } - do { - s = **ip; - (*ip)++; - length += s; - if (unlikely((*ip) > ilimit)) { /* read limit reached */ - return rvl_error; - } - /* accumulator overflow detection (32-bit mode only) */ - if ((sizeof(length)<8) && unlikely(length > ((Rvl_t)(-1)/2)) ) { - return rvl_error; - } - } while (s==255); - - return length; -} - -/*! LZ4_decompress_generic() : - * This generic decompression function covers all use cases. - * It shall be instantiated several times, using different sets of directives. - * Note that it is important for performance that this function really get inlined, - * in order to remove useless branches during compilation optimization. - */ -LZ4_FORCE_INLINE int -LZ4_decompress_generic( - const char* const src, - char* const dst, - int srcSize, - int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */ - - earlyEnd_directive partialDecoding, /* full, partial */ - dict_directive dict, /* noDict, withPrefix64k, usingExtDict */ - const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */ - const BYTE* const dictStart, /* only if dict==usingExtDict */ - const size_t dictSize /* note : = 0 if noDict */ - ) -{ - if ((src == NULL) || (outputSize < 0)) { return -1; } - - { const BYTE* ip = (const BYTE*) src; - const BYTE* const iend = ip + srcSize; - - BYTE* op = (BYTE*) dst; - BYTE* const oend = op + outputSize; - BYTE* cpy; - - const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize; - - const int checkOffset = (dictSize < (int)(64 KB)); - - - /* Set up the "end" pointers for the shortcut. */ - const BYTE* const shortiend = iend - 14 /*maxLL*/ - 2 /*offset*/; - const BYTE* const shortoend = oend - 14 /*maxLL*/ - 18 /*maxML*/; - - const BYTE* match; - size_t offset; - unsigned token; - size_t length; - - - DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize); - - /* Special cases */ - assert(lowPrefix <= op); - if (unlikely(outputSize==0)) { - /* Empty output buffer */ - if (partialDecoding) return 0; - return ((srcSize==1) && (*ip==0)) ? 0 : -1; - } - if (unlikely(srcSize==0)) { return -1; } - - /* LZ4_FAST_DEC_LOOP: - * designed for modern OoO performance cpus, - * where copying reliably 32-bytes is preferable to an unpredictable branch. - * note : fast loop may show a regression for some client arm chips. */ -#if LZ4_FAST_DEC_LOOP - if ((oend - op) < FASTLOOP_SAFE_DISTANCE) { - DEBUGLOG(6, "skip fast decode loop"); - goto safe_decode; - } - - /* Fast loop : decode sequences as long as output < oend-FASTLOOP_SAFE_DISTANCE */ - while (1) { - /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */ - assert(oend - op >= FASTLOOP_SAFE_DISTANCE); - assert(ip < iend); - token = *ip++; - length = token >> ML_BITS; /* literal length */ - - /* decode literal length */ - if (length == RUN_MASK) { - size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1); - if (addl == rvl_error) { goto _output_error; } - length += addl; - if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ - if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ - - /* copy literals */ - cpy = op+length; - LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); - if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; } - LZ4_wildCopy32(op, ip, cpy); - ip += length; op = cpy; - } else { - cpy = op+length; - DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length); - /* We don't need to check oend, since we check it once for each loop below */ - if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; } - /* Literals can only be <= 14, but hope compilers optimize better when copy by a register size */ - LZ4_memcpy(op, ip, 16); - ip += length; op = cpy; - } - - /* get offset */ - offset = LZ4_readLE16(ip); ip+=2; - match = op - offset; - assert(match <= op); /* overflow check */ - - /* get matchlength */ - length = token & ML_MASK; - - if (length == ML_MASK) { - size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0); - if (addl == rvl_error) { goto _output_error; } - length += addl; - length += MINMATCH; - if (unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */ - if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ - if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { - goto safe_match_copy; - } - } else { - length += MINMATCH; - if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { - goto safe_match_copy; - } - - /* Fastpath check: skip LZ4_wildCopy32 when true */ - if ((dict == withPrefix64k) || (match >= lowPrefix)) { - if (offset >= 8) { - assert(match >= lowPrefix); - assert(match <= op); - assert(op + 18 <= oend); - - LZ4_memcpy(op, match, 8); - LZ4_memcpy(op+8, match+8, 8); - LZ4_memcpy(op+16, match+16, 2); - op += length; - continue; - } } } - - if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ - /* match starting within external dictionary */ - if ((dict==usingExtDict) && (match < lowPrefix)) { - assert(dictEnd != NULL); - if (unlikely(op+length > oend-LASTLITERALS)) { - if (partialDecoding) { - DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd"); - length = MIN(length, (size_t)(oend-op)); - } else { - goto _output_error; /* end-of-block condition violated */ - } } - - if (length <= (size_t)(lowPrefix-match)) { - /* match fits entirely within external dictionary : just copy */ - LZ4_memmove(op, dictEnd - (lowPrefix-match), length); - op += length; - } else { - /* match stretches into both external dictionary and current block */ - size_t const copySize = (size_t)(lowPrefix - match); - size_t const restSize = length - copySize; - LZ4_memcpy(op, dictEnd - copySize, copySize); - op += copySize; - if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ - BYTE* const endOfMatch = op + restSize; - const BYTE* copyFrom = lowPrefix; - while (op < endOfMatch) { *op++ = *copyFrom++; } - } else { - LZ4_memcpy(op, lowPrefix, restSize); - op += restSize; - } } - continue; - } - - /* copy match within block */ - cpy = op + length; - - assert((op <= oend) && (oend-op >= 32)); - if (unlikely(offset<16)) { - LZ4_memcpy_using_offset(op, match, cpy, offset); - } else { - LZ4_wildCopy32(op, match, cpy); - } - - op = cpy; /* wildcopy correction */ - } - safe_decode: -#endif - - /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */ - while (1) { - assert(ip < iend); - token = *ip++; - length = token >> ML_BITS; /* literal length */ - - /* A two-stage shortcut for the most common case: - * 1) If the literal length is 0..14, and there is enough space, - * enter the shortcut and copy 16 bytes on behalf of the literals - * (in the fast mode, only 8 bytes can be safely copied this way). - * 2) Further if the match length is 4..18, copy 18 bytes in a similar - * manner; but we ensure that there's enough space in the output for - * those 18 bytes earlier, upon entering the shortcut (in other words, - * there is a combined check for both stages). - */ - if ( (length != RUN_MASK) - /* strictly "less than" on input, to re-enter the loop with at least one byte */ - && likely((ip < shortiend) & (op <= shortoend)) ) { - /* Copy the literals */ - LZ4_memcpy(op, ip, 16); - op += length; ip += length; - - /* The second stage: prepare for match copying, decode full info. - * If it doesn't work out, the info won't be wasted. */ - length = token & ML_MASK; /* match length */ - offset = LZ4_readLE16(ip); ip += 2; - match = op - offset; - assert(match <= op); /* check overflow */ - - /* Do not deal with overlapping matches. */ - if ( (length != ML_MASK) - && (offset >= 8) - && (dict==withPrefix64k || match >= lowPrefix) ) { - /* Copy the match. */ - LZ4_memcpy(op + 0, match + 0, 8); - LZ4_memcpy(op + 8, match + 8, 8); - LZ4_memcpy(op +16, match +16, 2); - op += length + MINMATCH; - /* Both stages worked, load the next token. */ - continue; - } - - /* The second stage didn't work out, but the info is ready. - * Propel it right to the point of match copying. */ - goto _copy_match; - } - - /* decode literal length */ - if (length == RUN_MASK) { - size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1); - if (addl == rvl_error) { goto _output_error; } - length += addl; - if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ - if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ - } - - /* copy literals */ - cpy = op+length; -#if LZ4_FAST_DEC_LOOP - safe_literal_copy: -#endif - LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); - if ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) { - /* We've either hit the input parsing restriction or the output parsing restriction. - * In the normal scenario, decoding a full block, it must be the last sequence, - * otherwise it's an error (invalid input or dimensions). - * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow. - */ - if (partialDecoding) { - /* Since we are partial decoding we may be in this block because of the output parsing - * restriction, which is not valid since the output buffer is allowed to be undersized. - */ - DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end") - DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length); - DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op)); - DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip)); - /* Finishing in the middle of a literals segment, - * due to lack of input. - */ - if (ip+length > iend) { - length = (size_t)(iend-ip); - cpy = op + length; - } - /* Finishing in the middle of a literals segment, - * due to lack of output space. - */ - if (cpy > oend) { - cpy = oend; - assert(op<=oend); - length = (size_t)(oend-op); - } - } else { - /* We must be on the last sequence (or invalid) because of the parsing limitations - * so check that we exactly consume the input and don't overrun the output buffer. - */ - if ((ip+length != iend) || (cpy > oend)) { - DEBUGLOG(6, "should have been last run of literals") - DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend); - DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend); - goto _output_error; - } - } - LZ4_memmove(op, ip, length); /* supports overlapping memory regions, for in-place decompression scenarios */ - ip += length; - op += length; - /* Necessarily EOF when !partialDecoding. - * When partialDecoding, it is EOF if we've either - * filled the output buffer or - * can't proceed with reading an offset for following match. - */ - if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) { - break; - } - } else { - LZ4_wildCopy8(op, ip, cpy); /* can overwrite up to 8 bytes beyond cpy */ - ip += length; op = cpy; - } - - /* get offset */ - offset = LZ4_readLE16(ip); ip+=2; - match = op - offset; - - /* get matchlength */ - length = token & ML_MASK; - - _copy_match: - if (length == ML_MASK) { - size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0); - if (addl == rvl_error) { goto _output_error; } - length += addl; - if (unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */ - } - length += MINMATCH; - -#if LZ4_FAST_DEC_LOOP - safe_match_copy: -#endif - if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */ - /* match starting within external dictionary */ - if ((dict==usingExtDict) && (match < lowPrefix)) { - assert(dictEnd != NULL); - if (unlikely(op+length > oend-LASTLITERALS)) { - if (partialDecoding) length = MIN(length, (size_t)(oend-op)); - else goto _output_error; /* doesn't respect parsing restriction */ - } - - if (length <= (size_t)(lowPrefix-match)) { - /* match fits entirely within external dictionary : just copy */ - LZ4_memmove(op, dictEnd - (lowPrefix-match), length); - op += length; - } else { - /* match stretches into both external dictionary and current block */ - size_t const copySize = (size_t)(lowPrefix - match); - size_t const restSize = length - copySize; - LZ4_memcpy(op, dictEnd - copySize, copySize); - op += copySize; - if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ - BYTE* const endOfMatch = op + restSize; - const BYTE* copyFrom = lowPrefix; - while (op < endOfMatch) *op++ = *copyFrom++; - } else { - LZ4_memcpy(op, lowPrefix, restSize); - op += restSize; - } } - continue; - } - assert(match >= lowPrefix); - - /* copy match within block */ - cpy = op + length; - - /* partialDecoding : may end anywhere within the block */ - assert(op<=oend); - if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { - size_t const mlen = MIN(length, (size_t)(oend-op)); - const BYTE* const matchEnd = match + mlen; - BYTE* const copyEnd = op + mlen; - if (matchEnd > op) { /* overlap copy */ - while (op < copyEnd) { *op++ = *match++; } - } else { - LZ4_memcpy(op, match, mlen); - } - op = copyEnd; - if (op == oend) { break; } - continue; - } - - if (unlikely(offset<8)) { - LZ4_write32(op, 0); /* silence msan warning when offset==0 */ - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - match += inc32table[offset]; - LZ4_memcpy(op+4, match, 4); - match -= dec64table[offset]; - } else { - LZ4_memcpy(op, match, 8); - match += 8; - } - op += 8; - - if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { - BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1); - if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ - if (op < oCopyLimit) { - LZ4_wildCopy8(op, match, oCopyLimit); - match += oCopyLimit - op; - op = oCopyLimit; - } - while (op < cpy) { *op++ = *match++; } - } else { - LZ4_memcpy(op, match, 8); - if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); } - } - op = cpy; /* wildcopy correction */ - } - - /* end of decoding */ - DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst)); - return (int) (((char*)op)-dst); /* Nb of output bytes decoded */ - - /* Overflow error detected */ - _output_error: - return (int) (-(((const char*)ip)-src))-1; - } -} - - -/*===== Instantiate the API decoding functions. =====*/ - -LZ4_FORCE_O2 -int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize) -{ - return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, - decode_full_block, noDict, - (BYTE*)dest, NULL, 0); -} - -LZ4_FORCE_O2 -int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity) -{ - dstCapacity = MIN(targetOutputSize, dstCapacity); - return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity, - partial_decode, - noDict, (BYTE*)dst, NULL, 0); -} - -LZ4_FORCE_O2 -int LZ4_decompress_fast(const char* source, char* dest, int originalSize) -{ - DEBUGLOG(5, "LZ4_decompress_fast"); - return LZ4_decompress_unsafe_generic( - (const BYTE*)source, (BYTE*)dest, originalSize, - 0, NULL, 0); -} - -/*===== Instantiate a few more decoding cases, used more than once. =====*/ - -LZ4_FORCE_O2 /* Exported, an obsolete API function. */ -int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize) -{ - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, - decode_full_block, withPrefix64k, - (BYTE*)dest - 64 KB, NULL, 0); -} - -LZ4_FORCE_O2 -static int LZ4_decompress_safe_partial_withPrefix64k(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity) -{ - dstCapacity = MIN(targetOutputSize, dstCapacity); - return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity, - partial_decode, withPrefix64k, - (BYTE*)dest - 64 KB, NULL, 0); -} - -/* Another obsolete API function, paired with the previous one. */ -int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize) -{ - return LZ4_decompress_unsafe_generic( - (const BYTE*)source, (BYTE*)dest, originalSize, - 64 KB, NULL, 0); -} - -LZ4_FORCE_O2 -static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize, - size_t prefixSize) -{ - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, - decode_full_block, noDict, - (BYTE*)dest-prefixSize, NULL, 0); -} - -LZ4_FORCE_O2 -static int LZ4_decompress_safe_partial_withSmallPrefix(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity, - size_t prefixSize) -{ - dstCapacity = MIN(targetOutputSize, dstCapacity); - return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity, - partial_decode, noDict, - (BYTE*)dest-prefixSize, NULL, 0); -} - -LZ4_FORCE_O2 -int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, - int compressedSize, int maxOutputSize, - const void* dictStart, size_t dictSize) -{ - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, - decode_full_block, usingExtDict, - (BYTE*)dest, (const BYTE*)dictStart, dictSize); -} - -LZ4_FORCE_O2 -int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest, - int compressedSize, int targetOutputSize, int dstCapacity, - const void* dictStart, size_t dictSize) -{ - dstCapacity = MIN(targetOutputSize, dstCapacity); - return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity, - partial_decode, usingExtDict, - (BYTE*)dest, (const BYTE*)dictStart, dictSize); -} - -LZ4_FORCE_O2 -static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize, - const void* dictStart, size_t dictSize) -{ - return LZ4_decompress_unsafe_generic( - (const BYTE*)source, (BYTE*)dest, originalSize, - 0, (const BYTE*)dictStart, dictSize); -} - -/* The "double dictionary" mode, for use with e.g. ring buffers: the first part - * of the dictionary is passed as prefix, and the second via dictStart + dictSize. - * These routines are used only once, in LZ4_decompress_*_continue(). - */ -LZ4_FORCE_INLINE -int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize, - size_t prefixSize, const void* dictStart, size_t dictSize) -{ - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, - decode_full_block, usingExtDict, - (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize); -} - -/*===== streaming decompression functions =====*/ - -#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) -LZ4_streamDecode_t* LZ4_createStreamDecode(void) -{ - LZ4_STATIC_ASSERT(sizeof(LZ4_streamDecode_t) >= sizeof(LZ4_streamDecode_t_internal)); - return (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t)); -} - -int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream) -{ - if (LZ4_stream == NULL) { return 0; } /* support free on NULL */ - FREEMEM(LZ4_stream); - return 0; -} -#endif - -/*! LZ4_setStreamDecode() : - * Use this function to instruct where to find the dictionary. - * This function is not necessary if previous data is still available where it was decoded. - * Loading a size of 0 is allowed (same effect as no dictionary). - * @return : 1 if OK, 0 if error - */ -int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize) -{ - LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; - lz4sd->prefixSize = (size_t)dictSize; - if (dictSize) { - assert(dictionary != NULL); - lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize; - } else { - lz4sd->prefixEnd = (const BYTE*) dictionary; - } - lz4sd->externalDict = NULL; - lz4sd->extDictSize = 0; - return 1; -} - -/*! LZ4_decoderRingBufferSize() : - * when setting a ring buffer for streaming decompression (optional scenario), - * provides the minimum size of this ring buffer - * to be compatible with any source respecting maxBlockSize condition. - * Note : in a ring buffer scenario, - * blocks are presumed decompressed next to each other. - * When not enough space remains for next block (remainingSize < maxBlockSize), - * decoding resumes from beginning of ring buffer. - * @return : minimum ring buffer size, - * or 0 if there is an error (invalid maxBlockSize). - */ -int LZ4_decoderRingBufferSize(int maxBlockSize) -{ - if (maxBlockSize < 0) return 0; - if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0; - if (maxBlockSize < 16) maxBlockSize = 16; - return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize); -} - -/* -*_continue() : - These decoding functions allow decompression of multiple blocks in "streaming" mode. - Previously decoded blocks must still be available at the memory position where they were decoded. - If it's not possible, save the relevant part of decoded data into a safe buffer, - and indicate where it stands using LZ4_setStreamDecode() -*/ -LZ4_FORCE_O2 -int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize) -{ - LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; - int result; - - if (lz4sd->prefixSize == 0) { - /* The first call, no dictionary yet. */ - assert(lz4sd->extDictSize == 0); - result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); - if (result <= 0) return result; - lz4sd->prefixSize = (size_t)result; - lz4sd->prefixEnd = (BYTE*)dest + result; - } else if (lz4sd->prefixEnd == (BYTE*)dest) { - /* They're rolling the current segment. */ - if (lz4sd->prefixSize >= 64 KB - 1) - result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); - else if (lz4sd->extDictSize == 0) - result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, - lz4sd->prefixSize); - else - result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize, - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); - if (result <= 0) return result; - lz4sd->prefixSize += (size_t)result; - lz4sd->prefixEnd += result; - } else { - /* The buffer wraps around, or they're switching to another buffer. */ - lz4sd->extDictSize = lz4sd->prefixSize; - lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; - result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, - lz4sd->externalDict, lz4sd->extDictSize); - if (result <= 0) return result; - lz4sd->prefixSize = (size_t)result; - lz4sd->prefixEnd = (BYTE*)dest + result; - } - - return result; -} - -LZ4_FORCE_O2 int -LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, - const char* source, char* dest, int originalSize) -{ - LZ4_streamDecode_t_internal* const lz4sd = - (assert(LZ4_streamDecode!=NULL), &LZ4_streamDecode->internal_donotuse); - int result; - - DEBUGLOG(5, "LZ4_decompress_fast_continue (toDecodeSize=%i)", originalSize); - assert(originalSize >= 0); - - if (lz4sd->prefixSize == 0) { - DEBUGLOG(5, "first invocation : no prefix nor extDict"); - assert(lz4sd->extDictSize == 0); - result = LZ4_decompress_fast(source, dest, originalSize); - if (result <= 0) return result; - lz4sd->prefixSize = (size_t)originalSize; - lz4sd->prefixEnd = (BYTE*)dest + originalSize; - } else if (lz4sd->prefixEnd == (BYTE*)dest) { - DEBUGLOG(5, "continue using existing prefix"); - result = LZ4_decompress_unsafe_generic( - (const BYTE*)source, (BYTE*)dest, originalSize, - lz4sd->prefixSize, - lz4sd->externalDict, lz4sd->extDictSize); - if (result <= 0) return result; - lz4sd->prefixSize += (size_t)originalSize; - lz4sd->prefixEnd += originalSize; - } else { - DEBUGLOG(5, "prefix becomes extDict"); - lz4sd->extDictSize = lz4sd->prefixSize; - lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; - result = LZ4_decompress_fast_extDict(source, dest, originalSize, - lz4sd->externalDict, lz4sd->extDictSize); - if (result <= 0) return result; - lz4sd->prefixSize = (size_t)originalSize; - lz4sd->prefixEnd = (BYTE*)dest + originalSize; - } - - return result; -} - - -/* -Advanced decoding functions : -*_usingDict() : - These decoding functions work the same as "_continue" ones, - the dictionary must be explicitly provided within parameters -*/ - -int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize) -{ - if (dictSize==0) - return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); - if (dictStart+dictSize == dest) { - if (dictSize >= 64 KB - 1) { - return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); - } - assert(dictSize >= 0); - return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize); - } - assert(dictSize >= 0); - return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize); -} - -int LZ4_decompress_safe_partial_usingDict(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity, const char* dictStart, int dictSize) -{ - if (dictSize==0) - return LZ4_decompress_safe_partial(source, dest, compressedSize, targetOutputSize, dstCapacity); - if (dictStart+dictSize == dest) { - if (dictSize >= 64 KB - 1) { - return LZ4_decompress_safe_partial_withPrefix64k(source, dest, compressedSize, targetOutputSize, dstCapacity); - } - assert(dictSize >= 0); - return LZ4_decompress_safe_partial_withSmallPrefix(source, dest, compressedSize, targetOutputSize, dstCapacity, (size_t)dictSize); - } - assert(dictSize >= 0); - return LZ4_decompress_safe_partial_forceExtDict(source, dest, compressedSize, targetOutputSize, dstCapacity, dictStart, (size_t)dictSize); -} - -int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize) -{ - if (dictSize==0 || dictStart+dictSize == dest) - return LZ4_decompress_unsafe_generic( - (const BYTE*)source, (BYTE*)dest, originalSize, - (size_t)dictSize, NULL, 0); - assert(dictSize >= 0); - return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize); -} - - -/*=************************************************* -* Obsolete Functions -***************************************************/ -/* obsolete compression functions */ -int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) -{ - return LZ4_compress_default(source, dest, inputSize, maxOutputSize); -} -int LZ4_compress(const char* src, char* dest, int srcSize) -{ - return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize)); -} -int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize) -{ - return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1); -} -int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize) -{ - return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1); -} -int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity) -{ - return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1); -} -int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize) -{ - return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1); -} - -/* -These decompression functions are deprecated and should no longer be used. -They are only provided here for compatibility with older user programs. -- LZ4_uncompress is totally equivalent to LZ4_decompress_fast -- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe -*/ -int LZ4_uncompress (const char* source, char* dest, int outputSize) -{ - return LZ4_decompress_fast(source, dest, outputSize); -} -int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) -{ - return LZ4_decompress_safe(source, dest, isize, maxOutputSize); -} - -/* Obsolete Streaming functions */ - -int LZ4_sizeofStreamState(void) { return sizeof(LZ4_stream_t); } - -int LZ4_resetStreamState(void* state, char* inputBuffer) -{ - (void)inputBuffer; - LZ4_resetStream((LZ4_stream_t*)state); - return 0; -} - -#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) -void* LZ4_create (char* inputBuffer) -{ - (void)inputBuffer; - return LZ4_createStream(); -} -#endif - -char* LZ4_slideInputBuffer (void* state) -{ - /* avoid const char * -> char * conversion warning */ - return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary; -} - -#endif /* LZ4_COMMONDEFS_ONLY */ - -} diff --git a/src/third_party/tracy/common/tracy_lz4.hpp b/src/third_party/tracy/common/tracy_lz4.hpp deleted file mode 100644 index 672c2feb..00000000 --- a/src/third_party/tracy/common/tracy_lz4.hpp +++ /dev/null @@ -1,847 +0,0 @@ -/* - * LZ4 - Fast LZ compression algorithm - * Header File - * Copyright (C) 2011-2020, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - LZ4 homepage : http://www.lz4.org - - LZ4 source repository : https://github.com/lz4/lz4 -*/ - -#ifndef TRACY_LZ4_H_2983827168210 -#define TRACY_LZ4_H_2983827168210 - -/* --- Dependency --- */ -#include /* size_t */ -#include - - -/** - Introduction - - LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core, - scalable with multi-cores CPU. It features an extremely fast decoder, with speed in - multiple GB/s per core, typically reaching RAM speed limits on multi-core systems. - - The LZ4 compression library provides in-memory compression and decompression functions. - It gives full buffer control to user. - Compression can be done in: - - a single step (described as Simple Functions) - - a single step, reusing a context (described in Advanced Functions) - - unbounded multiple steps (described as Streaming compression) - - lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md). - Decompressing such a compressed block requires additional metadata. - Exact metadata depends on exact decompression function. - For the typical case of LZ4_decompress_safe(), - metadata includes block's compressed size, and maximum bound of decompressed size. - Each application is free to encode and pass such metadata in whichever way it wants. - - lz4.h only handle blocks, it can not generate Frames. - - Blocks are different from Frames (doc/lz4_Frame_format.md). - Frames bundle both blocks and metadata in a specified manner. - Embedding metadata is required for compressed data to be self-contained and portable. - Frame format is delivered through a companion API, declared in lz4frame.h. - The `lz4` CLI can only manage frames. -*/ - -/*^*************************************************************** -* Export parameters -*****************************************************************/ -/* -* LZ4_DLL_EXPORT : -* Enable exporting of functions when building a Windows DLL -* LZ4LIB_VISIBILITY : -* Control library symbols visibility. -*/ -#ifndef LZ4LIB_VISIBILITY -# if defined(__GNUC__) && (__GNUC__ >= 4) -# define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default"))) -# else -# define LZ4LIB_VISIBILITY -# endif -#endif -#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1) -# define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY -#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1) -# define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ -#else -# define LZ4LIB_API LZ4LIB_VISIBILITY -#endif - -/*! LZ4_FREESTANDING : - * When this macro is set to 1, it enables "freestanding mode" that is - * suitable for typical freestanding environment which doesn't support - * standard C library. - * - * - LZ4_FREESTANDING is a compile-time switch. - * - It requires the following macros to be defined: - * LZ4_memcpy, LZ4_memmove, LZ4_memset. - * - It only enables LZ4/HC functions which don't use heap. - * All LZ4F_* functions are not supported. - * - See tests/freestanding.c to check its basic setup. - */ -#if defined(LZ4_FREESTANDING) && (LZ4_FREESTANDING == 1) -# define LZ4_HEAPMODE 0 -# define LZ4HC_HEAPMODE 0 -# define LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION 1 -# if !defined(LZ4_memcpy) -# error "LZ4_FREESTANDING requires macro 'LZ4_memcpy'." -# endif -# if !defined(LZ4_memset) -# error "LZ4_FREESTANDING requires macro 'LZ4_memset'." -# endif -# if !defined(LZ4_memmove) -# error "LZ4_FREESTANDING requires macro 'LZ4_memmove'." -# endif -#elif ! defined(LZ4_FREESTANDING) -# define LZ4_FREESTANDING 0 -#endif - - -/*------ Version ------*/ -#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */ -#define LZ4_VERSION_MINOR 9 /* for new (non-breaking) interface capabilities */ -#define LZ4_VERSION_RELEASE 4 /* for tweaks, bug-fixes, or development */ - -#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE) - -#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE -#define LZ4_QUOTE(str) #str -#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str) -#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION) /* requires v1.7.3+ */ - -namespace tracy -{ - -LZ4LIB_API int LZ4_versionNumber (void); /**< library version number; useful to check dll version; requires v1.3.0+ */ -LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; useful to check dll version; requires v1.7.5+ */ - - -/*-************************************ -* Tuning parameter -**************************************/ -#define LZ4_MEMORY_USAGE_MIN 10 -#define LZ4_MEMORY_USAGE_DEFAULT 14 -#define LZ4_MEMORY_USAGE_MAX 20 - -/*! - * LZ4_MEMORY_USAGE : - * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; ) - * Increasing memory usage improves compression ratio, at the cost of speed. - * Reduced memory usage may improve speed at the cost of ratio, thanks to better cache locality. - * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache - */ -#ifndef LZ4_MEMORY_USAGE -# define LZ4_MEMORY_USAGE LZ4_MEMORY_USAGE_DEFAULT -#endif - -#if (LZ4_MEMORY_USAGE < LZ4_MEMORY_USAGE_MIN) -# error "LZ4_MEMORY_USAGE is too small !" -#endif - -#if (LZ4_MEMORY_USAGE > LZ4_MEMORY_USAGE_MAX) -# error "LZ4_MEMORY_USAGE is too large !" -#endif - -/*-************************************ -* Simple Functions -**************************************/ -/*! LZ4_compress_default() : - * Compresses 'srcSize' bytes from buffer 'src' - * into already allocated 'dst' buffer of size 'dstCapacity'. - * Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize). - * It also runs faster, so it's a recommended setting. - * If the function cannot compress 'src' into a more limited 'dst' budget, - * compression stops *immediately*, and the function result is zero. - * In which case, 'dst' content is undefined (invalid). - * srcSize : max supported value is LZ4_MAX_INPUT_SIZE. - * dstCapacity : size of buffer 'dst' (which must be already allocated) - * @return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity) - * or 0 if compression fails - * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer). - */ -LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity); - -/*! LZ4_decompress_safe() : - * compressedSize : is the exact complete size of the compressed block. - * dstCapacity : is the size of destination buffer (which must be already allocated), presumed an upper bound of decompressed size. - * @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity) - * If destination buffer is not large enough, decoding will stop and output an error code (negative value). - * If the source stream is detected malformed, the function will stop decoding and return a negative result. - * Note 1 : This function is protected against malicious data packets : - * it will never writes outside 'dst' buffer, nor read outside 'source' buffer, - * even if the compressed block is maliciously modified to order the decoder to do these actions. - * In such case, the decoder stops immediately, and considers the compressed block malformed. - * Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them. - * The implementation is free to send / store / derive this information in whichever way is most beneficial. - * If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead. - */ -LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity); - - -/*-************************************ -* Advanced Functions -**************************************/ -#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */ -#define LZ4_COMPRESSBOUND(isize) ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16) - -/*! LZ4_compressBound() : - Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible) - This function is primarily useful for memory allocation purposes (destination buffer size). - Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example). - Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize) - inputSize : max supported value is LZ4_MAX_INPUT_SIZE - return : maximum output size in a "worst case" scenario - or 0, if input size is incorrect (too large or negative) -*/ -LZ4LIB_API int LZ4_compressBound(int inputSize); - -/*! LZ4_compress_fast() : - Same as LZ4_compress_default(), but allows selection of "acceleration" factor. - The larger the acceleration value, the faster the algorithm, but also the lesser the compression. - It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed. - An acceleration value of "1" is the same as regular LZ4_compress_default() - Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c). - Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c). -*/ -LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); - - -/*! LZ4_compress_fast_extState() : - * Same as LZ4_compress_fast(), using an externally allocated memory space for its state. - * Use LZ4_sizeofState() to know how much memory must be allocated, - * and allocate it on 8-bytes boundaries (using `malloc()` typically). - * Then, provide this buffer as `void* state` to compression function. - */ -LZ4LIB_API int LZ4_sizeofState(void); -LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); - - -/*! LZ4_compress_destSize() : - * Reverse the logic : compresses as much data as possible from 'src' buffer - * into already allocated buffer 'dst', of size >= 'targetDestSize'. - * This function either compresses the entire 'src' content into 'dst' if it's large enough, - * or fill 'dst' buffer completely with as much data as possible from 'src'. - * note: acceleration parameter is fixed to "default". - * - * *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'. - * New value is necessarily <= input value. - * @return : Nb bytes written into 'dst' (necessarily <= targetDestSize) - * or 0 if compression fails. - * - * Note : from v1.8.2 to v1.9.1, this function had a bug (fixed un v1.9.2+): - * the produced compressed content could, in specific circumstances, - * require to be decompressed into a destination buffer larger - * by at least 1 byte than the content to decompress. - * If an application uses `LZ4_compress_destSize()`, - * it's highly recommended to update liblz4 to v1.9.2 or better. - * If this can't be done or ensured, - * the receiving decompression function should provide - * a dstCapacity which is > decompressedSize, by at least 1 byte. - * See https://github.com/lz4/lz4/issues/859 for details - */ -LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize); - - -/*! LZ4_decompress_safe_partial() : - * Decompress an LZ4 compressed block, of size 'srcSize' at position 'src', - * into destination buffer 'dst' of size 'dstCapacity'. - * Up to 'targetOutputSize' bytes will be decoded. - * The function stops decoding on reaching this objective. - * This can be useful to boost performance - * whenever only the beginning of a block is required. - * - * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize) - * If source stream is detected malformed, function returns a negative result. - * - * Note 1 : @return can be < targetOutputSize, if compressed block contains less data. - * - * Note 2 : targetOutputSize must be <= dstCapacity - * - * Note 3 : this function effectively stops decoding on reaching targetOutputSize, - * so dstCapacity is kind of redundant. - * This is because in older versions of this function, - * decoding operation would still write complete sequences. - * Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize, - * it could write more bytes, though only up to dstCapacity. - * Some "margin" used to be required for this operation to work properly. - * Thankfully, this is no longer necessary. - * The function nonetheless keeps the same signature, in an effort to preserve API compatibility. - * - * Note 4 : If srcSize is the exact size of the block, - * then targetOutputSize can be any value, - * including larger than the block's decompressed size. - * The function will, at most, generate block's decompressed size. - * - * Note 5 : If srcSize is _larger_ than block's compressed size, - * then targetOutputSize **MUST** be <= block's decompressed size. - * Otherwise, *silent corruption will occur*. - */ -LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity); - - -/*-********************************************* -* Streaming Compression Functions -***********************************************/ -typedef union LZ4_stream_u LZ4_stream_t; /* incomplete type (defined later) */ - -/** - Note about RC_INVOKED - - - RC_INVOKED is predefined symbol of rc.exe (the resource compiler which is part of MSVC/Visual Studio). - https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros - - - Since rc.exe is a legacy compiler, it truncates long symbol (> 30 chars) - and reports warning "RC4011: identifier truncated". - - - To eliminate the warning, we surround long preprocessor symbol with - "#if !defined(RC_INVOKED) ... #endif" block that means - "skip this block when rc.exe is trying to read it". -*/ -#if !defined(RC_INVOKED) /* https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros */ -#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) -LZ4LIB_API LZ4_stream_t* LZ4_createStream(void); -LZ4LIB_API int LZ4_freeStream (LZ4_stream_t* streamPtr); -#endif /* !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) */ -#endif - -/*! LZ4_resetStream_fast() : v1.9.0+ - * Use this to prepare an LZ4_stream_t for a new chain of dependent blocks - * (e.g., LZ4_compress_fast_continue()). - * - * An LZ4_stream_t must be initialized once before usage. - * This is automatically done when created by LZ4_createStream(). - * However, should the LZ4_stream_t be simply declared on stack (for example), - * it's necessary to initialize it first, using LZ4_initStream(). - * - * After init, start any new stream with LZ4_resetStream_fast(). - * A same LZ4_stream_t can be re-used multiple times consecutively - * and compress multiple streams, - * provided that it starts each new stream with LZ4_resetStream_fast(). - * - * LZ4_resetStream_fast() is much faster than LZ4_initStream(), - * but is not compatible with memory regions containing garbage data. - * - * Note: it's only useful to call LZ4_resetStream_fast() - * in the context of streaming compression. - * The *extState* functions perform their own resets. - * Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive. - */ -LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr); - -/*! LZ4_loadDict() : - * Use this function to reference a static dictionary into LZ4_stream_t. - * The dictionary must remain available during compression. - * LZ4_loadDict() triggers a reset, so any previous data will be forgotten. - * The same dictionary will have to be loaded on decompression side for successful decoding. - * Dictionary are useful for better compression of small data (KB range). - * While LZ4 accept any input as dictionary, - * results are generally better when using Zstandard's Dictionary Builder. - * Loading a size of 0 is allowed, and is the same as reset. - * @return : loaded dictionary size, in bytes (necessarily <= 64 KB) - */ -LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize); - -/*! LZ4_compress_fast_continue() : - * Compress 'src' content using data from previously compressed blocks, for better compression ratio. - * 'dst' buffer must be already allocated. - * If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster. - * - * @return : size of compressed block - * or 0 if there is an error (typically, cannot fit into 'dst'). - * - * Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block. - * Each block has precise boundaries. - * Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata. - * It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together. - * - * Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory ! - * - * Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB. - * Make sure that buffers are separated, by at least one byte. - * This construction ensures that each block only depends on previous block. - * - * Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB. - * - * Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed. - */ -LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); - -/*! LZ4_saveDict() : - * If last 64KB data cannot be guaranteed to remain available at its current memory location, - * save it into a safer place (char* safeBuffer). - * This is schematically equivalent to a memcpy() followed by LZ4_loadDict(), - * but is much faster, because LZ4_saveDict() doesn't need to rebuild tables. - * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error. - */ -LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize); - - -/*-********************************************** -* Streaming Decompression Functions -* Bufferless synchronous API -************************************************/ -typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* tracking context */ - -/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() : - * creation / destruction of streaming decompression tracking context. - * A tracking context can be re-used multiple times. - */ -#if !defined(RC_INVOKED) /* https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros */ -#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) -LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void); -LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream); -#endif /* !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) */ -#endif - -/*! LZ4_setStreamDecode() : - * An LZ4_streamDecode_t context can be allocated once and re-used multiple times. - * Use this function to start decompression of a new stream of blocks. - * A dictionary can optionally be set. Use NULL or size 0 for a reset order. - * Dictionary is presumed stable : it must remain accessible and unmodified during next decompression. - * @return : 1 if OK, 0 if error - */ -LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize); - -/*! LZ4_decoderRingBufferSize() : v1.8.2+ - * Note : in a ring buffer scenario (optional), - * blocks are presumed decompressed next to each other - * up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize), - * at which stage it resumes from beginning of ring buffer. - * When setting such a ring buffer for streaming decompression, - * provides the minimum size of this ring buffer - * to be compatible with any source respecting maxBlockSize condition. - * @return : minimum ring buffer size, - * or 0 if there is an error (invalid maxBlockSize). - */ -LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize); -#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize)) /* for static allocation; maxBlockSize presumed valid */ - -/*! LZ4_decompress_*_continue() : - * These decoding functions allow decompression of consecutive blocks in "streaming" mode. - * A block is an unsplittable entity, it must be presented entirely to a decompression function. - * Decompression functions only accepts one block at a time. - * The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded. - * If less than 64KB of data has been decoded, all the data must be present. - * - * Special : if decompression side sets a ring buffer, it must respect one of the following conditions : - * - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize). - * maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes. - * In which case, encoding and decoding buffers do not need to be synchronized. - * Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize. - * - Synchronized mode : - * Decompression buffer size is _exactly_ the same as compression buffer size, - * and follows exactly same update rule (block boundaries at same positions), - * and decoding function is provided with exact decompressed size of each block (exception for last block of the stream), - * _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB). - * - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes. - * In which case, encoding and decoding buffers do not need to be synchronized, - * and encoding ring buffer can have any size, including small ones ( < 64 KB). - * - * Whenever these conditions are not possible, - * save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression, - * then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block. -*/ -LZ4LIB_API int -LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, - const char* src, char* dst, - int srcSize, int dstCapacity); - - -/*! LZ4_decompress_*_usingDict() : - * These decoding functions work the same as - * a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue() - * They are stand-alone, and don't need an LZ4_streamDecode_t structure. - * Dictionary is presumed stable : it must remain accessible and unmodified during decompression. - * Performance tip : Decompression speed can be substantially increased - * when dst == dictStart + dictSize. - */ -LZ4LIB_API int -LZ4_decompress_safe_usingDict(const char* src, char* dst, - int srcSize, int dstCapacity, - const char* dictStart, int dictSize); - -LZ4LIB_API int -LZ4_decompress_safe_partial_usingDict(const char* src, char* dst, - int compressedSize, - int targetOutputSize, int maxOutputSize, - const char* dictStart, int dictSize); - -} - -#endif /* LZ4_H_2983827168210 */ - - -/*^************************************* - * !!!!!! STATIC LINKING ONLY !!!!!! - ***************************************/ - -/*-**************************************************************************** - * Experimental section - * - * Symbols declared in this section must be considered unstable. Their - * signatures or semantics may change, or they may be removed altogether in the - * future. They are therefore only safe to depend on when the caller is - * statically linked against the library. - * - * To protect against unsafe usage, not only are the declarations guarded, - * the definitions are hidden by default - * when building LZ4 as a shared/dynamic library. - * - * In order to access these declarations, - * define LZ4_STATIC_LINKING_ONLY in your application - * before including LZ4's headers. - * - * In order to make their implementations accessible dynamically, you must - * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library. - ******************************************************************************/ - -#ifdef LZ4_STATIC_LINKING_ONLY - -#ifndef TRACY_LZ4_STATIC_3504398509 -#define TRACY_LZ4_STATIC_3504398509 - -#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS -#define LZ4LIB_STATIC_API LZ4LIB_API -#else -#define LZ4LIB_STATIC_API -#endif - -namespace tracy -{ - -/*! LZ4_compress_fast_extState_fastReset() : - * A variant of LZ4_compress_fast_extState(). - * - * Using this variant avoids an expensive initialization step. - * It is only safe to call if the state buffer is known to be correctly initialized already - * (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized"). - * From a high level, the difference is that - * this function initializes the provided state with a call to something like LZ4_resetStream_fast() - * while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream(). - */ -LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); - -/*! LZ4_attach_dictionary() : - * This is an experimental API that allows - * efficient use of a static dictionary many times. - * - * Rather than re-loading the dictionary buffer into a working context before - * each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a - * working LZ4_stream_t, this function introduces a no-copy setup mechanism, - * in which the working stream references the dictionary stream in-place. - * - * Several assumptions are made about the state of the dictionary stream. - * Currently, only streams which have been prepared by LZ4_loadDict() should - * be expected to work. - * - * Alternatively, the provided dictionaryStream may be NULL, - * in which case any existing dictionary stream is unset. - * - * If a dictionary is provided, it replaces any pre-existing stream history. - * The dictionary contents are the only history that can be referenced and - * logically immediately precede the data compressed in the first subsequent - * compression call. - * - * The dictionary will only remain attached to the working stream through the - * first compression call, at the end of which it is cleared. The dictionary - * stream (and source buffer) must remain in-place / accessible / unchanged - * through the completion of the first compression call on the stream. - */ -LZ4LIB_STATIC_API void -LZ4_attach_dictionary(LZ4_stream_t* workingStream, - const LZ4_stream_t* dictionaryStream); - - -/*! In-place compression and decompression - * - * It's possible to have input and output sharing the same buffer, - * for highly constrained memory environments. - * In both cases, it requires input to lay at the end of the buffer, - * and decompression to start at beginning of the buffer. - * Buffer size must feature some margin, hence be larger than final size. - * - * |<------------------------buffer--------------------------------->| - * |<-----------compressed data--------->| - * |<-----------decompressed size------------------>| - * |<----margin---->| - * - * This technique is more useful for decompression, - * since decompressed size is typically larger, - * and margin is short. - * - * In-place decompression will work inside any buffer - * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize). - * This presumes that decompressedSize > compressedSize. - * Otherwise, it means compression actually expanded data, - * and it would be more efficient to store such data with a flag indicating it's not compressed. - * This can happen when data is not compressible (already compressed, or encrypted). - * - * For in-place compression, margin is larger, as it must be able to cope with both - * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX, - * and data expansion, which can happen when input is not compressible. - * As a consequence, buffer size requirements are much higher, - * and memory savings offered by in-place compression are more limited. - * - * There are ways to limit this cost for compression : - * - Reduce history size, by modifying LZ4_DISTANCE_MAX. - * Note that it is a compile-time constant, so all compressions will apply this limit. - * Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX, - * so it's a reasonable trick when inputs are known to be small. - * - Require the compressor to deliver a "maximum compressed size". - * This is the `dstCapacity` parameter in `LZ4_compress*()`. - * When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail, - * in which case, the return code will be 0 (zero). - * The caller must be ready for these cases to happen, - * and typically design a backup scheme to send data uncompressed. - * The combination of both techniques can significantly reduce - * the amount of margin required for in-place compression. - * - * In-place compression can work in any buffer - * which size is >= (maxCompressedSize) - * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success. - * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX, - * so it's possible to reduce memory requirements by playing with them. - */ - -#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) (((compressedSize) >> 8) + 32) -#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize) ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize)) /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */ - -#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */ -# define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */ -#endif - -#define LZ4_COMPRESS_INPLACE_MARGIN (LZ4_DISTANCE_MAX + 32) /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */ -#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize) ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */ - -} - -#endif /* LZ4_STATIC_3504398509 */ -#endif /* LZ4_STATIC_LINKING_ONLY */ - - - -#ifndef TRACY_LZ4_H_98237428734687 -#define TRACY_LZ4_H_98237428734687 - -namespace tracy -{ - -/*-************************************************************ - * Private Definitions - ************************************************************** - * Do not use these definitions directly. - * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`. - * Accessing members will expose user code to API and/or ABI break in future versions of the library. - **************************************************************/ -#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2) -#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE) -#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */ - -#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) - typedef int8_t LZ4_i8; - typedef uint8_t LZ4_byte; - typedef uint16_t LZ4_u16; - typedef uint32_t LZ4_u32; -#else - typedef signed char LZ4_i8; - typedef unsigned char LZ4_byte; - typedef unsigned short LZ4_u16; - typedef unsigned int LZ4_u32; -#endif - -/*! LZ4_stream_t : - * Never ever use below internal definitions directly ! - * These definitions are not API/ABI safe, and may change in future versions. - * If you need static allocation, declare or allocate an LZ4_stream_t object. -**/ - -typedef struct LZ4_stream_t_internal LZ4_stream_t_internal; -struct LZ4_stream_t_internal { - LZ4_u32 hashTable[LZ4_HASH_SIZE_U32]; - const LZ4_byte* dictionary; - const LZ4_stream_t_internal* dictCtx; - LZ4_u32 currentOffset; - LZ4_u32 tableType; - LZ4_u32 dictSize; - /* Implicit padding to ensure structure is aligned */ -}; - -#define LZ4_STREAM_MINSIZE ((1UL << LZ4_MEMORY_USAGE) + 32) /* static size, for inter-version compatibility */ -union LZ4_stream_u { - char minStateSize[LZ4_STREAM_MINSIZE]; - LZ4_stream_t_internal internal_donotuse; -}; /* previously typedef'd to LZ4_stream_t */ - - -/*! LZ4_initStream() : v1.9.0+ - * An LZ4_stream_t structure must be initialized at least once. - * This is automatically done when invoking LZ4_createStream(), - * but it's not when the structure is simply declared on stack (for example). - * - * Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t. - * It can also initialize any arbitrary buffer of sufficient size, - * and will @return a pointer of proper type upon initialization. - * - * Note : initialization fails if size and alignment conditions are not respected. - * In which case, the function will @return NULL. - * Note2: An LZ4_stream_t structure guarantees correct alignment and size. - * Note3: Before v1.9.0, use LZ4_resetStream() instead -**/ -LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* buffer, size_t size); - - -/*! LZ4_streamDecode_t : - * Never ever use below internal definitions directly ! - * These definitions are not API/ABI safe, and may change in future versions. - * If you need static allocation, declare or allocate an LZ4_streamDecode_t object. -**/ -typedef struct { - const LZ4_byte* externalDict; - const LZ4_byte* prefixEnd; - size_t extDictSize; - size_t prefixSize; -} LZ4_streamDecode_t_internal; - -#define LZ4_STREAMDECODE_MINSIZE 32 -union LZ4_streamDecode_u { - char minStateSize[LZ4_STREAMDECODE_MINSIZE]; - LZ4_streamDecode_t_internal internal_donotuse; -} ; /* previously typedef'd to LZ4_streamDecode_t */ - - - -/*-************************************ -* Obsolete Functions -**************************************/ - -/*! Deprecation warnings - * - * Deprecated functions make the compiler generate a warning when invoked. - * This is meant to invite users to update their source code. - * Should deprecation warnings be a problem, it is generally possible to disable them, - * typically with -Wno-deprecated-declarations for gcc - * or _CRT_SECURE_NO_WARNINGS in Visual. - * - * Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS - * before including the header file. - */ -#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS -# define LZ4_DEPRECATED(message) /* disable deprecation warnings */ -#else -# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ -# define LZ4_DEPRECATED(message) [[deprecated(message)]] -# elif defined(_MSC_VER) -# define LZ4_DEPRECATED(message) __declspec(deprecated(message)) -# elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45)) -# define LZ4_DEPRECATED(message) __attribute__((deprecated(message))) -# elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31) -# define LZ4_DEPRECATED(message) __attribute__((deprecated)) -# else -# pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler") -# define LZ4_DEPRECATED(message) /* disabled */ -# endif -#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */ - -/*! Obsolete compression functions (since v1.7.3) */ -LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* src, char* dest, int srcSize); -LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize); -LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); -LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); -LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize); -LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize); - -/*! Obsolete decompression functions (since v1.8.0) */ -LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize); -LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); - -/* Obsolete streaming functions (since v1.7.0) - * degraded functionality; do not use! - * - * In order to perform streaming compression, these functions depended on data - * that is no longer tracked in the state. They have been preserved as well as - * possible: using them will still produce a correct output. However, they don't - * actually retain any history between compression calls. The compression ratio - * achieved will therefore be no better than compressing each chunk - * independently. - */ -LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer); -LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int LZ4_sizeofStreamState(void); -LZ4_DEPRECATED("Use LZ4_resetStream() instead") LZ4LIB_API int LZ4_resetStreamState(void* state, char* inputBuffer); -LZ4_DEPRECATED("Use LZ4_saveDict() instead") LZ4LIB_API char* LZ4_slideInputBuffer (void* state); - -/*! Obsolete streaming decoding functions (since v1.7.0) */ -LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize); -LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize); - -/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) : - * These functions used to be faster than LZ4_decompress_safe(), - * but this is no longer the case. They are now slower. - * This is because LZ4_decompress_fast() doesn't know the input size, - * and therefore must progress more cautiously into the input buffer to not read beyond the end of block. - * On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability. - * As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated. - * - * The last remaining LZ4_decompress_fast() specificity is that - * it can decompress a block without knowing its compressed size. - * Such functionality can be achieved in a more secure manner - * by employing LZ4_decompress_safe_partial(). - * - * Parameters: - * originalSize : is the uncompressed size to regenerate. - * `dst` must be already allocated, its size must be >= 'originalSize' bytes. - * @return : number of bytes read from source buffer (== compressed size). - * The function expects to finish at block's end exactly. - * If the source stream is detected malformed, the function stops decoding and returns a negative result. - * note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer. - * However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds. - * Also, since match offsets are not validated, match reads from 'src' may underflow too. - * These issues never happen if input (compressed) data is correct. - * But they may happen if input data is invalid (error or intentional tampering). - * As a consequence, use these functions in trusted environments with trusted data **only**. - */ -LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead") -LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize); -LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead") -LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize); -LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead") -LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize); - -/*! LZ4_resetStream() : - * An LZ4_stream_t structure must be initialized at least once. - * This is done with LZ4_initStream(), or LZ4_resetStream(). - * Consider switching to LZ4_initStream(), - * invoking LZ4_resetStream() will trigger deprecation warnings in the future. - */ -LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr); - -} - -#endif /* LZ4_H_98237428734687 */ diff --git a/src/third_party/tracy/common/tracy_lz4hc.cpp b/src/third_party/tracy/common/tracy_lz4hc.cpp deleted file mode 100644 index eec7239e..00000000 --- a/src/third_party/tracy/common/tracy_lz4hc.cpp +++ /dev/null @@ -1,1636 +0,0 @@ -/* - LZ4 HC - High Compression Mode of LZ4 - Copyright (C) 2011-2020, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - LZ4 source repository : https://github.com/lz4/lz4 - - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c -*/ -/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */ - - -/* ************************************* -* Tuning Parameter -***************************************/ - -/*! HEAPMODE : - * Select how default compression function will allocate workplace memory, - * in stack (0:fastest), or in heap (1:requires malloc()). - * Since workplace is rather large, heap mode is recommended. -**/ -#ifndef LZ4HC_HEAPMODE -# define LZ4HC_HEAPMODE 1 -#endif - - -/*=== Dependency ===*/ -#define LZ4_HC_STATIC_LINKING_ONLY -#include "tracy_lz4hc.hpp" - - -/*=== Common definitions ===*/ -#if defined(__GNUC__) -# pragma GCC diagnostic ignored "-Wunused-function" -#endif -#if defined (__clang__) -# pragma clang diagnostic ignored "-Wunused-function" -#endif - -#define LZ4_COMMONDEFS_ONLY -#ifndef LZ4_SRC_INCLUDED -#include "tracy_lz4.cpp" /* LZ4_count, constants, mem */ -#endif - - -/*=== Enums ===*/ -typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive; - - -/*=== Constants ===*/ -#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) -#define LZ4_OPT_NUM (1<<12) - - -/*=== Macros ===*/ -#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) -#define MAX(a,b) ( (a) > (b) ? (a) : (b) ) -#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG)) -#define DELTANEXTMAXD(p) chainTable[(p) & LZ4HC_MAXD_MASK] /* flexible, LZ4HC_MAXD dependent */ -#define DELTANEXTU16(table, pos) table[(U16)(pos)] /* faster */ -/* Make fields passed to, and updated by LZ4HC_encodeSequence explicit */ -#define UPDATABLE(ip, op, anchor) &ip, &op, &anchor - -namespace tracy -{ - -static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); } - - -/************************************** -* HC Compression -**************************************/ -static void LZ4HC_clearTables (LZ4HC_CCtx_internal* hc4) -{ - MEM_INIT(hc4->hashTable, 0, sizeof(hc4->hashTable)); - MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); -} - -static void LZ4HC_init_internal (LZ4HC_CCtx_internal* hc4, const BYTE* start) -{ - size_t const bufferSize = (size_t)(hc4->end - hc4->prefixStart); - size_t newStartingOffset = bufferSize + hc4->dictLimit; - assert(newStartingOffset >= bufferSize); /* check overflow */ - if (newStartingOffset > 1 GB) { - LZ4HC_clearTables(hc4); - newStartingOffset = 0; - } - newStartingOffset += 64 KB; - hc4->nextToUpdate = (U32)newStartingOffset; - hc4->prefixStart = start; - hc4->end = start; - hc4->dictStart = start; - hc4->dictLimit = (U32)newStartingOffset; - hc4->lowLimit = (U32)newStartingOffset; -} - - -/* Update chains up to ip (excluded) */ -LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip) -{ - U16* const chainTable = hc4->chainTable; - U32* const hashTable = hc4->hashTable; - const BYTE* const prefixPtr = hc4->prefixStart; - U32 const prefixIdx = hc4->dictLimit; - U32 const target = (U32)(ip - prefixPtr) + prefixIdx; - U32 idx = hc4->nextToUpdate; - assert(ip >= prefixPtr); - assert(target >= prefixIdx); - - while (idx < target) { - U32 const h = LZ4HC_hashPtr(prefixPtr+idx-prefixIdx); - size_t delta = idx - hashTable[h]; - if (delta>LZ4_DISTANCE_MAX) delta = LZ4_DISTANCE_MAX; - DELTANEXTU16(chainTable, idx) = (U16)delta; - hashTable[h] = idx; - idx++; - } - - hc4->nextToUpdate = target; -} - -/** LZ4HC_countBack() : - * @return : negative value, nb of common bytes before ip/match */ -LZ4_FORCE_INLINE -int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match, - const BYTE* const iMin, const BYTE* const mMin) -{ - int back = 0; - int const min = (int)MAX(iMin - ip, mMin - match); - assert(min <= 0); - assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31)); - assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31)); - while ( (back > min) - && (ip[back-1] == match[back-1]) ) - back--; - return back; -} - -#if defined(_MSC_VER) -# define LZ4HC_rotl32(x,r) _rotl(x,r) -#else -# define LZ4HC_rotl32(x,r) ((x << r) | (x >> (32 - r))) -#endif - - -static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern) -{ - size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3; - if (bitsToRotate == 0) return pattern; - return LZ4HC_rotl32(pattern, (int)bitsToRotate); -} - -/* LZ4HC_countPattern() : - * pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */ -static unsigned -LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32) -{ - const BYTE* const iStart = ip; - reg_t const pattern = (sizeof(pattern)==8) ? - (reg_t)pattern32 + (((reg_t)pattern32) << (sizeof(pattern)*4)) : pattern32; - - while (likely(ip < iEnd-(sizeof(pattern)-1))) { - reg_t const diff = LZ4_read_ARCH(ip) ^ pattern; - if (!diff) { ip+=sizeof(pattern); continue; } - ip += LZ4_NbCommonBytes(diff); - return (unsigned)(ip - iStart); - } - - if (LZ4_isLittleEndian()) { - reg_t patternByte = pattern; - while ((ip>= 8; - } - } else { /* big endian */ - U32 bitOffset = (sizeof(pattern)*8) - 8; - while (ip < iEnd) { - BYTE const byte = (BYTE)(pattern >> bitOffset); - if (*ip != byte) break; - ip ++; bitOffset -= 8; - } } - - return (unsigned)(ip - iStart); -} - -/* LZ4HC_reverseCountPattern() : - * pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) - * read using natural platform endianness */ -static unsigned -LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern) -{ - const BYTE* const iStart = ip; - - while (likely(ip >= iLow+4)) { - if (LZ4_read32(ip-4) != pattern) break; - ip -= 4; - } - { const BYTE* bytePtr = (const BYTE*)(&pattern) + 3; /* works for any endianness */ - while (likely(ip>iLow)) { - if (ip[-1] != *bytePtr) break; - ip--; bytePtr--; - } } - return (unsigned)(iStart - ip); -} - -/* LZ4HC_protectDictEnd() : - * Checks if the match is in the last 3 bytes of the dictionary, so reading the - * 4 byte MINMATCH would overflow. - * @returns true if the match index is okay. - */ -static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex) -{ - return ((U32)((dictLimit - 1) - matchIndex) >= 3); -} - -typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e; -typedef enum { favorCompressionRatio=0, favorDecompressionSpeed } HCfavor_e; - -LZ4_FORCE_INLINE int -LZ4HC_InsertAndGetWiderMatch ( - LZ4HC_CCtx_internal* const hc4, - const BYTE* const ip, - const BYTE* const iLowLimit, const BYTE* const iHighLimit, - int longest, - const BYTE** matchpos, - const BYTE** startpos, - const int maxNbAttempts, - const int patternAnalysis, const int chainSwap, - const dictCtx_directive dict, - const HCfavor_e favorDecSpeed) -{ - U16* const chainTable = hc4->chainTable; - U32* const HashTable = hc4->hashTable; - const LZ4HC_CCtx_internal * const dictCtx = hc4->dictCtx; - const BYTE* const prefixPtr = hc4->prefixStart; - const U32 prefixIdx = hc4->dictLimit; - const U32 ipIndex = (U32)(ip - prefixPtr) + prefixIdx; - const int withinStartDistance = (hc4->lowLimit + (LZ4_DISTANCE_MAX + 1) > ipIndex); - const U32 lowestMatchIndex = (withinStartDistance) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX; - const BYTE* const dictStart = hc4->dictStart; - const U32 dictIdx = hc4->lowLimit; - const BYTE* const dictEnd = dictStart + prefixIdx - dictIdx; - int const lookBackLength = (int)(ip-iLowLimit); - int nbAttempts = maxNbAttempts; - U32 matchChainPos = 0; - U32 const pattern = LZ4_read32(ip); - U32 matchIndex; - repeat_state_e repeat = rep_untested; - size_t srcPatternLength = 0; - - DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch"); - /* First Match */ - LZ4HC_Insert(hc4, ip); - matchIndex = HashTable[LZ4HC_hashPtr(ip)]; - DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)", - matchIndex, lowestMatchIndex); - - while ((matchIndex>=lowestMatchIndex) && (nbAttempts>0)) { - int matchLength=0; - nbAttempts--; - assert(matchIndex < ipIndex); - if (favorDecSpeed && (ipIndex - matchIndex < 8)) { - /* do nothing */ - } else if (matchIndex >= prefixIdx) { /* within current Prefix */ - const BYTE* const matchPtr = prefixPtr + matchIndex - prefixIdx; - assert(matchPtr < ip); - assert(longest >= 1); - if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) { - if (LZ4_read32(matchPtr) == pattern) { - int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, prefixPtr) : 0; - matchLength = MINMATCH + (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); - matchLength -= back; - if (matchLength > longest) { - longest = matchLength; - *matchpos = matchPtr + back; - *startpos = ip + back; - } } } - } else { /* lowestMatchIndex <= matchIndex < dictLimit */ - const BYTE* const matchPtr = dictStart + (matchIndex - dictIdx); - assert(matchIndex >= dictIdx); - if ( likely(matchIndex <= prefixIdx - 4) - && (LZ4_read32(matchPtr) == pattern) ) { - int back = 0; - const BYTE* vLimit = ip + (prefixIdx - matchIndex); - if (vLimit > iHighLimit) vLimit = iHighLimit; - matchLength = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; - if ((ip+matchLength == vLimit) && (vLimit < iHighLimit)) - matchLength += LZ4_count(ip+matchLength, prefixPtr, iHighLimit); - back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictStart) : 0; - matchLength -= back; - if (matchLength > longest) { - longest = matchLength; - *matchpos = prefixPtr - prefixIdx + matchIndex + back; /* virtual pos, relative to ip, to retrieve offset */ - *startpos = ip + back; - } } } - - if (chainSwap && matchLength==longest) { /* better match => select a better chain */ - assert(lookBackLength==0); /* search forward only */ - if (matchIndex + (U32)longest <= ipIndex) { - int const kTrigger = 4; - U32 distanceToNextMatch = 1; - int const end = longest - MINMATCH + 1; - int step = 1; - int accel = 1 << kTrigger; - int pos; - for (pos = 0; pos < end; pos += step) { - U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + (U32)pos); - step = (accel++ >> kTrigger); - if (candidateDist > distanceToNextMatch) { - distanceToNextMatch = candidateDist; - matchChainPos = (U32)pos; - accel = 1 << kTrigger; - } } - if (distanceToNextMatch > 1) { - if (distanceToNextMatch > matchIndex) break; /* avoid overflow */ - matchIndex -= distanceToNextMatch; - continue; - } } } - - { U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex); - if (patternAnalysis && distNextMatch==1 && matchChainPos==0) { - U32 const matchCandidateIdx = matchIndex-1; - /* may be a repeated pattern */ - if (repeat == rep_untested) { - if ( ((pattern & 0xFFFF) == (pattern >> 16)) - & ((pattern & 0xFF) == (pattern >> 24)) ) { - repeat = rep_confirmed; - srcPatternLength = LZ4HC_countPattern(ip+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern); - } else { - repeat = rep_not; - } } - if ( (repeat == rep_confirmed) && (matchCandidateIdx >= lowestMatchIndex) - && LZ4HC_protectDictEnd(prefixIdx, matchCandidateIdx) ) { - const int extDict = matchCandidateIdx < prefixIdx; - const BYTE* const matchPtr = (extDict ? dictStart - dictIdx : prefixPtr - prefixIdx) + matchCandidateIdx; - if (LZ4_read32(matchPtr) == pattern) { /* good candidate */ - const BYTE* const iLimit = extDict ? dictEnd : iHighLimit; - size_t forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iLimit, pattern) + sizeof(pattern); - if (extDict && matchPtr + forwardPatternLength == iLimit) { - U32 const rotatedPattern = LZ4HC_rotatePattern(forwardPatternLength, pattern); - forwardPatternLength += LZ4HC_countPattern(prefixPtr, iHighLimit, rotatedPattern); - } - { const BYTE* const lowestMatchPtr = extDict ? dictStart : prefixPtr; - size_t backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern); - size_t currentSegmentLength; - if (!extDict - && matchPtr - backLength == prefixPtr - && dictIdx < prefixIdx) { - U32 const rotatedPattern = LZ4HC_rotatePattern((U32)(-(int)backLength), pattern); - backLength += LZ4HC_reverseCountPattern(dictEnd, dictStart, rotatedPattern); - } - /* Limit backLength not go further than lowestMatchIndex */ - backLength = matchCandidateIdx - MAX(matchCandidateIdx - (U32)backLength, lowestMatchIndex); - assert(matchCandidateIdx - backLength >= lowestMatchIndex); - currentSegmentLength = backLength + forwardPatternLength; - /* Adjust to end of pattern if the source pattern fits, otherwise the beginning of the pattern */ - if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */ - && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */ - U32 const newMatchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */ - if (LZ4HC_protectDictEnd(prefixIdx, newMatchIndex)) - matchIndex = newMatchIndex; - else { - /* Can only happen if started in the prefix */ - assert(newMatchIndex >= prefixIdx - 3 && newMatchIndex < prefixIdx && !extDict); - matchIndex = prefixIdx; - } - } else { - U32 const newMatchIndex = matchCandidateIdx - (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */ - if (!LZ4HC_protectDictEnd(prefixIdx, newMatchIndex)) { - assert(newMatchIndex >= prefixIdx - 3 && newMatchIndex < prefixIdx && !extDict); - matchIndex = prefixIdx; - } else { - matchIndex = newMatchIndex; - if (lookBackLength==0) { /* no back possible */ - size_t const maxML = MIN(currentSegmentLength, srcPatternLength); - if ((size_t)longest < maxML) { - assert(prefixPtr - prefixIdx + matchIndex != ip); - if ((size_t)(ip - prefixPtr) + prefixIdx - matchIndex > LZ4_DISTANCE_MAX) break; - assert(maxML < 2 GB); - longest = (int)maxML; - *matchpos = prefixPtr - prefixIdx + matchIndex; /* virtual pos, relative to ip, to retrieve offset */ - *startpos = ip; - } - { U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex); - if (distToNextPattern > matchIndex) break; /* avoid overflow */ - matchIndex -= distToNextPattern; - } } } } } - continue; - } } - } } /* PA optimization */ - - /* follow current chain */ - matchIndex -= DELTANEXTU16(chainTable, matchIndex + matchChainPos); - - } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */ - - if ( dict == usingDictCtxHc - && nbAttempts > 0 - && ipIndex - lowestMatchIndex < LZ4_DISTANCE_MAX) { - size_t const dictEndOffset = (size_t)(dictCtx->end - dictCtx->prefixStart) + dictCtx->dictLimit; - U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)]; - assert(dictEndOffset <= 1 GB); - matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset; - while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) { - const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit + dictMatchIndex; - - if (LZ4_read32(matchPtr) == pattern) { - int mlt; - int back = 0; - const BYTE* vLimit = ip + (dictEndOffset - dictMatchIndex); - if (vLimit > iHighLimit) vLimit = iHighLimit; - mlt = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; - back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->prefixStart) : 0; - mlt -= back; - if (mlt > longest) { - longest = mlt; - *matchpos = prefixPtr - prefixIdx + matchIndex + back; - *startpos = ip + back; - } } - - { U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, dictMatchIndex); - dictMatchIndex -= nextOffset; - matchIndex -= nextOffset; - } } } - - return longest; -} - -LZ4_FORCE_INLINE int -LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */ - const BYTE* const ip, const BYTE* const iLimit, - const BYTE** matchpos, - const int maxNbAttempts, - const int patternAnalysis, - const dictCtx_directive dict) -{ - const BYTE* uselessPtr = ip; - /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), - * but this won't be the case here, as we define iLowLimit==ip, - * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ - return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis, 0 /*chainSwap*/, dict, favorCompressionRatio); -} - -/* LZ4HC_encodeSequence() : - * @return : 0 if ok, - * 1 if buffer issue detected */ -LZ4_FORCE_INLINE int LZ4HC_encodeSequence ( - const BYTE** _ip, - BYTE** _op, - const BYTE** _anchor, - int matchLength, - const BYTE* const match, - limitedOutput_directive limit, - BYTE* oend) -{ -#define ip (*_ip) -#define op (*_op) -#define anchor (*_anchor) - - size_t length; - BYTE* const token = op++; - -#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6) - static const BYTE* start = NULL; - static U32 totalCost = 0; - U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start); - U32 const ll = (U32)(ip - anchor); - U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0; - U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0; - U32 const cost = 1 + llAdd + ll + 2 + mlAdd; - if (start==NULL) start = anchor; /* only works for single segment */ - /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */ - DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5u, cost:%4u + %5u", - pos, - (U32)(ip - anchor), matchLength, (U32)(ip-match), - cost, totalCost); - totalCost += cost; -#endif - - /* Encode Literal length */ - length = (size_t)(ip - anchor); - LZ4_STATIC_ASSERT(notLimited == 0); - /* Check output limit */ - if (limit && ((op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) { - DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)", - (int)length, (int)(oend - op)); - return 1; - } - if (length >= RUN_MASK) { - size_t len = length - RUN_MASK; - *token = (RUN_MASK << ML_BITS); - for(; len >= 255 ; len -= 255) *op++ = 255; - *op++ = (BYTE)len; - } else { - *token = (BYTE)(length << ML_BITS); - } - - /* Copy Literals */ - LZ4_wildCopy8(op, anchor, op + length); - op += length; - - /* Encode Offset */ - assert( (ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */ - LZ4_writeLE16(op, (U16)(ip - match)); op += 2; - - /* Encode MatchLength */ - assert(matchLength >= MINMATCH); - length = (size_t)matchLength - MINMATCH; - if (limit && (op + (length / 255) + (1 + LASTLITERALS) > oend)) { - DEBUGLOG(6, "Not enough room to write match length"); - return 1; /* Check output limit */ - } - if (length >= ML_MASK) { - *token += ML_MASK; - length -= ML_MASK; - for(; length >= 510 ; length -= 510) { *op++ = 255; *op++ = 255; } - if (length >= 255) { length -= 255; *op++ = 255; } - *op++ = (BYTE)length; - } else { - *token += (BYTE)(length); - } - - /* Prepare next loop */ - ip += matchLength; - anchor = ip; - - return 0; -} -#undef ip -#undef op -#undef anchor - -LZ4_FORCE_INLINE int LZ4HC_compress_hashChain ( - LZ4HC_CCtx_internal* const ctx, - const char* const source, - char* const dest, - int* srcSizePtr, - int const maxOutputSize, - int maxNbAttempts, - const limitedOutput_directive limit, - const dictCtx_directive dict - ) -{ - const int inputSize = *srcSizePtr; - const int patternAnalysis = (maxNbAttempts > 128); /* levels 9+ */ - - const BYTE* ip = (const BYTE*) source; - const BYTE* anchor = ip; - const BYTE* const iend = ip + inputSize; - const BYTE* const mflimit = iend - MFLIMIT; - const BYTE* const matchlimit = (iend - LASTLITERALS); - - BYTE* optr = (BYTE*) dest; - BYTE* op = (BYTE*) dest; - BYTE* oend = op + maxOutputSize; - - int ml0, ml, ml2, ml3; - const BYTE* start0; - const BYTE* ref0; - const BYTE* ref = NULL; - const BYTE* start2 = NULL; - const BYTE* ref2 = NULL; - const BYTE* start3 = NULL; - const BYTE* ref3 = NULL; - - /* init */ - *srcSizePtr = 0; - if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ - if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */ - - /* Main Loop */ - while (ip <= mflimit) { - ml = LZ4HC_InsertAndFindBestMatch(ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis, dict); - if (ml encode ML1 */ - optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; - continue; - } - - if (start0 < ip) { /* first match was skipped at least once */ - if (start2 < ip + ml0) { /* squeezing ML1 between ML0(original ML1) and ML2 */ - ip = start0; ref = ref0; ml = ml0; /* restore initial ML1 */ - } } - - /* Here, start0==ip */ - if ((start2 - ip) < 3) { /* First Match too small : removed */ - ml = ml2; - ip = start2; - ref =ref2; - goto _Search2; - } - -_Search3: - /* At this stage, we have : - * ml2 > ml1, and - * ip1+3 <= ip2 (usually < ip1+ml1) */ - if ((start2 - ip) < OPTIMAL_ML) { - int correction; - int new_ml = ml; - if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; - if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH; - correction = new_ml - (int)(start2 - ip); - if (correction > 0) { - start2 += correction; - ref2 += correction; - ml2 -= correction; - } - } - /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */ - - if (start2 + ml2 <= mflimit) { - ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, - start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, - maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio); - } else { - ml3 = ml2; - } - - if (ml3 == ml2) { /* No better match => encode ML1 and ML2 */ - /* ip & ref are known; Now for ml */ - if (start2 < ip+ml) ml = (int)(start2 - ip); - /* Now, encode 2 sequences */ - optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; - ip = start2; - optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) { - ml = ml2; - ref = ref2; - goto _dest_overflow; - } - continue; - } - - if (start3 < ip+ml+3) { /* Not enough space for match 2 : remove it */ - if (start3 >= (ip+ml)) { /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */ - if (start2 < ip+ml) { - int correction = (int)(ip+ml - start2); - start2 += correction; - ref2 += correction; - ml2 -= correction; - if (ml2 < MINMATCH) { - start2 = start3; - ref2 = ref3; - ml2 = ml3; - } - } - - optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; - ip = start3; - ref = ref3; - ml = ml3; - - start0 = start2; - ref0 = ref2; - ml0 = ml2; - goto _Search2; - } - - start2 = start3; - ref2 = ref3; - ml2 = ml3; - goto _Search3; - } - - /* - * OK, now we have 3 ascending matches; - * let's write the first one ML1. - * ip & ref are known; Now decide ml. - */ - if (start2 < ip+ml) { - if ((start2 - ip) < OPTIMAL_ML) { - int correction; - if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; - if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH; - correction = ml - (int)(start2 - ip); - if (correction > 0) { - start2 += correction; - ref2 += correction; - ml2 -= correction; - } - } else { - ml = (int)(start2 - ip); - } - } - optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; - - /* ML2 becomes ML1 */ - ip = start2; ref = ref2; ml = ml2; - - /* ML3 becomes ML2 */ - start2 = start3; ref2 = ref3; ml2 = ml3; - - /* let's find a new ML3 */ - goto _Search3; - } - -_last_literals: - /* Encode Last Literals */ - { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ - size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255; - size_t const totalSize = 1 + llAdd + lastRunSize; - if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */ - if (limit && (op + totalSize > oend)) { - if (limit == limitedOutput) return 0; - /* adapt lastRunSize to fill 'dest' */ - lastRunSize = (size_t)(oend - op) - 1 /*token*/; - llAdd = (lastRunSize + 256 - RUN_MASK) / 256; - lastRunSize -= llAdd; - } - DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize); - ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */ - - if (lastRunSize >= RUN_MASK) { - size_t accumulator = lastRunSize - RUN_MASK; - *op++ = (RUN_MASK << ML_BITS); - for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255; - *op++ = (BYTE) accumulator; - } else { - *op++ = (BYTE)(lastRunSize << ML_BITS); - } - LZ4_memcpy(op, anchor, lastRunSize); - op += lastRunSize; - } - - /* End */ - *srcSizePtr = (int) (((const char*)ip) - source); - return (int) (((char*)op)-dest); - -_dest_overflow: - if (limit == fillOutput) { - /* Assumption : ip, anchor, ml and ref must be set correctly */ - size_t const ll = (size_t)(ip - anchor); - size_t const ll_addbytes = (ll + 240) / 255; - size_t const ll_totalCost = 1 + ll_addbytes + ll; - BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */ - DEBUGLOG(6, "Last sequence overflowing"); - op = optr; /* restore correct out pointer */ - if (op + ll_totalCost <= maxLitPos) { - /* ll validated; now adjust match length */ - size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost)); - size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255); - assert(maxMlSize < INT_MAX); assert(ml >= 0); - if ((size_t)ml > maxMlSize) ml = (int)maxMlSize; - if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ml >= MFLIMIT) { - LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, notLimited, oend); - } } - goto _last_literals; - } - /* compression failed */ - return 0; -} - - -static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx, - const char* const source, char* dst, - int* srcSizePtr, int dstCapacity, - int const nbSearches, size_t sufficient_len, - const limitedOutput_directive limit, int const fullUpdate, - const dictCtx_directive dict, - const HCfavor_e favorDecSpeed); - - -LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal ( - LZ4HC_CCtx_internal* const ctx, - const char* const src, - char* const dst, - int* const srcSizePtr, - int const dstCapacity, - int cLevel, - const limitedOutput_directive limit, - const dictCtx_directive dict - ) -{ - typedef enum { lz4hc, lz4opt } lz4hc_strat_e; - typedef struct { - lz4hc_strat_e strat; - int nbSearches; - U32 targetLength; - } cParams_t; - static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = { - { lz4hc, 2, 16 }, /* 0, unused */ - { lz4hc, 2, 16 }, /* 1, unused */ - { lz4hc, 2, 16 }, /* 2, unused */ - { lz4hc, 4, 16 }, /* 3 */ - { lz4hc, 8, 16 }, /* 4 */ - { lz4hc, 16, 16 }, /* 5 */ - { lz4hc, 32, 16 }, /* 6 */ - { lz4hc, 64, 16 }, /* 7 */ - { lz4hc, 128, 16 }, /* 8 */ - { lz4hc, 256, 16 }, /* 9 */ - { lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/ - { lz4opt, 512,128 }, /*11 */ - { lz4opt,16384,LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */ - }; - - DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)", - ctx, src, *srcSizePtr, limit); - - if (limit == fillOutput && dstCapacity < 1) return 0; /* Impossible to store anything */ - if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */ - - ctx->end += *srcSizePtr; - if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe something to review */ - cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel); - { cParams_t const cParam = clTable[cLevel]; - HCfavor_e const favor = ctx->favorDecSpeed ? favorDecompressionSpeed : favorCompressionRatio; - int result; - - if (cParam.strat == lz4hc) { - result = LZ4HC_compress_hashChain(ctx, - src, dst, srcSizePtr, dstCapacity, - cParam.nbSearches, limit, dict); - } else { - assert(cParam.strat == lz4opt); - result = LZ4HC_compress_optimal(ctx, - src, dst, srcSizePtr, dstCapacity, - cParam.nbSearches, cParam.targetLength, limit, - cLevel == LZ4HC_CLEVEL_MAX, /* ultra mode */ - dict, favor); - } - if (result <= 0) ctx->dirty = 1; - return result; - } -} - -static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock); - -static int -LZ4HC_compress_generic_noDictCtx ( - LZ4HC_CCtx_internal* const ctx, - const char* const src, - char* const dst, - int* const srcSizePtr, - int const dstCapacity, - int cLevel, - limitedOutput_directive limit - ) -{ - assert(ctx->dictCtx == NULL); - return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, noDictCtx); -} - -static int -LZ4HC_compress_generic_dictCtx ( - LZ4HC_CCtx_internal* const ctx, - const char* const src, - char* const dst, - int* const srcSizePtr, - int const dstCapacity, - int cLevel, - limitedOutput_directive limit - ) -{ - const size_t position = (size_t)(ctx->end - ctx->prefixStart) + (ctx->dictLimit - ctx->lowLimit); - assert(ctx->dictCtx != NULL); - if (position >= 64 KB) { - ctx->dictCtx = NULL; - return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); - } else if (position == 0 && *srcSizePtr > 4 KB) { - LZ4_memcpy(ctx, ctx->dictCtx, sizeof(LZ4HC_CCtx_internal)); - LZ4HC_setExternalDict(ctx, (const BYTE *)src); - ctx->compressionLevel = (short)cLevel; - return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); - } else { - return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, usingDictCtxHc); - } -} - -static int -LZ4HC_compress_generic ( - LZ4HC_CCtx_internal* const ctx, - const char* const src, - char* const dst, - int* const srcSizePtr, - int const dstCapacity, - int cLevel, - limitedOutput_directive limit - ) -{ - if (ctx->dictCtx == NULL) { - return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); - } else { - return LZ4HC_compress_generic_dictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); - } -} - - -int LZ4_sizeofStateHC(void) { return (int)sizeof(LZ4_streamHC_t); } - -static size_t LZ4_streamHC_t_alignment(void) -{ -#if LZ4_ALIGN_TEST - typedef struct { char c; LZ4_streamHC_t t; } t_a; - return sizeof(t_a) - sizeof(LZ4_streamHC_t); -#else - return 1; /* effectively disabled */ -#endif -} - -/* state is presumed correctly initialized, - * in which case its size and alignment have already been validate */ -int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) -{ - LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)state)->internal_donotuse; - if (!LZ4_isAligned(state, LZ4_streamHC_t_alignment())) return 0; - LZ4_resetStreamHC_fast((LZ4_streamHC_t*)state, compressionLevel); - LZ4HC_init_internal (ctx, (const BYTE*)src); - if (dstCapacity < LZ4_compressBound(srcSize)) - return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, limitedOutput); - else - return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, notLimited); -} - -int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) -{ - LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx)); - if (ctx==NULL) return 0; /* init failure */ - return LZ4_compress_HC_extStateHC_fastReset(state, src, dst, srcSize, dstCapacity, compressionLevel); -} - -int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) -{ - int cSize; -#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 - LZ4_streamHC_t* const statePtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t)); - if (statePtr==NULL) return 0; -#else - LZ4_streamHC_t state; - LZ4_streamHC_t* const statePtr = &state; -#endif - cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, dstCapacity, compressionLevel); -#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 - FREEMEM(statePtr); -#endif - return cSize; -} - -/* state is presumed sized correctly (>= sizeof(LZ4_streamHC_t)) */ -int LZ4_compress_HC_destSize(void* state, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel) -{ - LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx)); - if (ctx==NULL) return 0; /* init failure */ - LZ4HC_init_internal(&ctx->internal_donotuse, (const BYTE*) source); - LZ4_setCompressionLevel(ctx, cLevel); - return LZ4HC_compress_generic(&ctx->internal_donotuse, source, dest, sourceSizePtr, targetDestSize, cLevel, fillOutput); -} - - - -/************************************** -* Streaming Functions -**************************************/ -/* allocation */ -#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) -LZ4_streamHC_t* LZ4_createStreamHC(void) -{ - LZ4_streamHC_t* const state = - (LZ4_streamHC_t*)ALLOC_AND_ZERO(sizeof(LZ4_streamHC_t)); - if (state == NULL) return NULL; - LZ4_setCompressionLevel(state, LZ4HC_CLEVEL_DEFAULT); - return state; -} - -int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) -{ - DEBUGLOG(4, "LZ4_freeStreamHC(%p)", LZ4_streamHCPtr); - if (!LZ4_streamHCPtr) return 0; /* support free on NULL */ - FREEMEM(LZ4_streamHCPtr); - return 0; -} -#endif - - -LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size) -{ - LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)buffer; - DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", buffer, (unsigned)size); - /* check conditions */ - if (buffer == NULL) return NULL; - if (size < sizeof(LZ4_streamHC_t)) return NULL; - if (!LZ4_isAligned(buffer, LZ4_streamHC_t_alignment())) return NULL; - /* init */ - { LZ4HC_CCtx_internal* const hcstate = &(LZ4_streamHCPtr->internal_donotuse); - MEM_INIT(hcstate, 0, sizeof(*hcstate)); } - LZ4_setCompressionLevel(LZ4_streamHCPtr, LZ4HC_CLEVEL_DEFAULT); - return LZ4_streamHCPtr; -} - -/* just a stub */ -void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) -{ - LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); - LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel); -} - -void LZ4_resetStreamHC_fast (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) -{ - DEBUGLOG(4, "LZ4_resetStreamHC_fast(%p, %d)", LZ4_streamHCPtr, compressionLevel); - if (LZ4_streamHCPtr->internal_donotuse.dirty) { - LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); - } else { - /* preserve end - prefixStart : can trigger clearTable's threshold */ - if (LZ4_streamHCPtr->internal_donotuse.end != NULL) { - LZ4_streamHCPtr->internal_donotuse.end -= (uptrval)LZ4_streamHCPtr->internal_donotuse.prefixStart; - } else { - assert(LZ4_streamHCPtr->internal_donotuse.prefixStart == NULL); - } - LZ4_streamHCPtr->internal_donotuse.prefixStart = NULL; - LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL; - } - LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel); -} - -void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) -{ - DEBUGLOG(5, "LZ4_setCompressionLevel(%p, %d)", LZ4_streamHCPtr, compressionLevel); - if (compressionLevel < 1) compressionLevel = LZ4HC_CLEVEL_DEFAULT; - if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX; - LZ4_streamHCPtr->internal_donotuse.compressionLevel = (short)compressionLevel; -} - -void LZ4_favorDecompressionSpeed(LZ4_streamHC_t* LZ4_streamHCPtr, int favor) -{ - LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = (favor!=0); -} - -/* LZ4_loadDictHC() : - * LZ4_streamHCPtr is presumed properly initialized */ -int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, - const char* dictionary, int dictSize) -{ - LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; - DEBUGLOG(4, "LZ4_loadDictHC(ctx:%p, dict:%p, dictSize:%d)", LZ4_streamHCPtr, dictionary, dictSize); - assert(LZ4_streamHCPtr != NULL); - if (dictSize > 64 KB) { - dictionary += (size_t)dictSize - 64 KB; - dictSize = 64 KB; - } - /* need a full initialization, there are bad side-effects when using resetFast() */ - { int const cLevel = ctxPtr->compressionLevel; - LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); - LZ4_setCompressionLevel(LZ4_streamHCPtr, cLevel); - } - LZ4HC_init_internal (ctxPtr, (const BYTE*)dictionary); - ctxPtr->end = (const BYTE*)dictionary + dictSize; - if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); - return dictSize; -} - -void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream) { - working_stream->internal_donotuse.dictCtx = dictionary_stream != NULL ? &(dictionary_stream->internal_donotuse) : NULL; -} - -/* compression */ - -static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock) -{ - DEBUGLOG(4, "LZ4HC_setExternalDict(%p, %p)", ctxPtr, newBlock); - if (ctxPtr->end >= ctxPtr->prefixStart + 4) - LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */ - - /* Only one memory segment for extDict, so any previous extDict is lost at this stage */ - ctxPtr->lowLimit = ctxPtr->dictLimit; - ctxPtr->dictStart = ctxPtr->prefixStart; - ctxPtr->dictLimit += (U32)(ctxPtr->end - ctxPtr->prefixStart); - ctxPtr->prefixStart = newBlock; - ctxPtr->end = newBlock; - ctxPtr->nextToUpdate = ctxPtr->dictLimit; /* match referencing will resume from there */ - - /* cannot reference an extDict and a dictCtx at the same time */ - ctxPtr->dictCtx = NULL; -} - -static int -LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr, - const char* src, char* dst, - int* srcSizePtr, int dstCapacity, - limitedOutput_directive limit) -{ - LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; - DEBUGLOG(5, "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)", - LZ4_streamHCPtr, src, *srcSizePtr, limit); - assert(ctxPtr != NULL); - /* auto-init if forgotten */ - if (ctxPtr->prefixStart == NULL) LZ4HC_init_internal (ctxPtr, (const BYTE*) src); - - /* Check overflow */ - if ((size_t)(ctxPtr->end - ctxPtr->prefixStart) + ctxPtr->dictLimit > 2 GB) { - size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->prefixStart); - if (dictSize > 64 KB) dictSize = 64 KB; - LZ4_loadDictHC(LZ4_streamHCPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize); - } - - /* Check if blocks follow each other */ - if ((const BYTE*)src != ctxPtr->end) - LZ4HC_setExternalDict(ctxPtr, (const BYTE*)src); - - /* Check overlapping input/dictionary space */ - { const BYTE* sourceEnd = (const BYTE*) src + *srcSizePtr; - const BYTE* const dictBegin = ctxPtr->dictStart; - const BYTE* const dictEnd = ctxPtr->dictStart + (ctxPtr->dictLimit - ctxPtr->lowLimit); - if ((sourceEnd > dictBegin) && ((const BYTE*)src < dictEnd)) { - if (sourceEnd > dictEnd) sourceEnd = dictEnd; - ctxPtr->lowLimit += (U32)(sourceEnd - ctxPtr->dictStart); - ctxPtr->dictStart += (U32)(sourceEnd - ctxPtr->dictStart); - if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) { - ctxPtr->lowLimit = ctxPtr->dictLimit; - ctxPtr->dictStart = ctxPtr->prefixStart; - } } } - - return LZ4HC_compress_generic (ctxPtr, src, dst, srcSizePtr, dstCapacity, ctxPtr->compressionLevel, limit); -} - -int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int srcSize, int dstCapacity) -{ - if (dstCapacity < LZ4_compressBound(srcSize)) - return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, limitedOutput); - else - return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, notLimited); -} - -int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDestSize) -{ - return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, fillOutput); -} - - - -/* LZ4_saveDictHC : - * save history content - * into a user-provided buffer - * which is then used to continue compression - */ -int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize) -{ - LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse; - int const prefixSize = (int)(streamPtr->end - streamPtr->prefixStart); - DEBUGLOG(5, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize); - assert(prefixSize >= 0); - if (dictSize > 64 KB) dictSize = 64 KB; - if (dictSize < 4) dictSize = 0; - if (dictSize > prefixSize) dictSize = prefixSize; - if (safeBuffer == NULL) assert(dictSize == 0); - if (dictSize > 0) - LZ4_memmove(safeBuffer, streamPtr->end - dictSize, dictSize); - { U32 const endIndex = (U32)(streamPtr->end - streamPtr->prefixStart) + streamPtr->dictLimit; - streamPtr->end = (const BYTE*)safeBuffer + dictSize; - streamPtr->prefixStart = streamPtr->end - dictSize; - streamPtr->dictLimit = endIndex - (U32)dictSize; - streamPtr->lowLimit = endIndex - (U32)dictSize; - streamPtr->dictStart = streamPtr->prefixStart; - if (streamPtr->nextToUpdate < streamPtr->dictLimit) - streamPtr->nextToUpdate = streamPtr->dictLimit; - } - return dictSize; -} - - -/*************************************************** -* Deprecated Functions -***************************************************/ - -/* These functions currently generate deprecation warnings */ - -/* Wrappers for deprecated compression functions */ -int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); } -int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); } -int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); } -int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); } -int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); } -int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); } -int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); } -int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); } -int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); } -int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); } - - -/* Deprecated streaming functions */ -int LZ4_sizeofStreamStateHC(void) { return sizeof(LZ4_streamHC_t); } - -/* state is presumed correctly sized, aka >= sizeof(LZ4_streamHC_t) - * @return : 0 on success, !=0 if error */ -int LZ4_resetStreamStateHC(void* state, char* inputBuffer) -{ - LZ4_streamHC_t* const hc4 = LZ4_initStreamHC(state, sizeof(*hc4)); - if (hc4 == NULL) return 1; /* init failed */ - LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer); - return 0; -} - -#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) -void* LZ4_createHC (const char* inputBuffer) -{ - LZ4_streamHC_t* const hc4 = LZ4_createStreamHC(); - if (hc4 == NULL) return NULL; /* not enough memory */ - LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer); - return hc4; -} - -int LZ4_freeHC (void* LZ4HC_Data) -{ - if (!LZ4HC_Data) return 0; /* support free on NULL */ - FREEMEM(LZ4HC_Data); - return 0; -} -#endif - -int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel) -{ - return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, notLimited); -} - -int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel) -{ - return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput); -} - -char* LZ4_slideInputBufferHC(void* LZ4HC_Data) -{ - LZ4_streamHC_t* const ctx = (LZ4_streamHC_t*)LZ4HC_Data; - const BYTE* bufferStart = ctx->internal_donotuse.prefixStart - ctx->internal_donotuse.dictLimit + ctx->internal_donotuse.lowLimit; - LZ4_resetStreamHC_fast(ctx, ctx->internal_donotuse.compressionLevel); - /* avoid const char * -> char * conversion warning :( */ - return (char*)(uptrval)bufferStart; -} - - -/* ================================================ - * LZ4 Optimal parser (levels [LZ4HC_CLEVEL_OPT_MIN - LZ4HC_CLEVEL_MAX]) - * ===============================================*/ -typedef struct { - int price; - int off; - int mlen; - int litlen; -} LZ4HC_optimal_t; - -/* price in bytes */ -LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen) -{ - int price = litlen; - assert(litlen >= 0); - if (litlen >= (int)RUN_MASK) - price += 1 + ((litlen-(int)RUN_MASK) / 255); - return price; -} - - -/* requires mlen >= MINMATCH */ -LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen) -{ - int price = 1 + 2 ; /* token + 16-bit offset */ - assert(litlen >= 0); - assert(mlen >= MINMATCH); - - price += LZ4HC_literalsPrice(litlen); - - if (mlen >= (int)(ML_MASK+MINMATCH)) - price += 1 + ((mlen-(int)(ML_MASK+MINMATCH)) / 255); - - return price; -} - - -typedef struct { - int off; - int len; -} LZ4HC_match_t; - -LZ4_FORCE_INLINE LZ4HC_match_t -LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, - const BYTE* ip, const BYTE* const iHighLimit, - int minLen, int nbSearches, - const dictCtx_directive dict, - const HCfavor_e favorDecSpeed) -{ - LZ4HC_match_t match = { 0 , 0 }; - const BYTE* matchPtr = NULL; - /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), - * but this won't be the case here, as we define iLowLimit==ip, - * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ - int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed); - if (matchLength <= minLen) return match; - if (favorDecSpeed) { - if ((matchLength>18) & (matchLength<=36)) matchLength=18; /* favor shortcut */ - } - match.len = matchLength; - match.off = (int)(ip-matchPtr); - return match; -} - - -static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx, - const char* const source, - char* dst, - int* srcSizePtr, - int dstCapacity, - int const nbSearches, - size_t sufficient_len, - const limitedOutput_directive limit, - int const fullUpdate, - const dictCtx_directive dict, - const HCfavor_e favorDecSpeed) -{ - int retval = 0; -#define TRAILING_LITERALS 3 -#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 - LZ4HC_optimal_t* const opt = (LZ4HC_optimal_t*)ALLOC(sizeof(LZ4HC_optimal_t) * (LZ4_OPT_NUM + TRAILING_LITERALS)); -#else - LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* ~64 KB, which is a bit large for stack... */ -#endif - - const BYTE* ip = (const BYTE*) source; - const BYTE* anchor = ip; - const BYTE* const iend = ip + *srcSizePtr; - const BYTE* const mflimit = iend - MFLIMIT; - const BYTE* const matchlimit = iend - LASTLITERALS; - BYTE* op = (BYTE*) dst; - BYTE* opSaved = (BYTE*) dst; - BYTE* oend = op + dstCapacity; - int ovml = MINMATCH; /* overflow - last sequence */ - const BYTE* ovref = NULL; - - /* init */ -#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 - if (opt == NULL) goto _return_label; -#endif - DEBUGLOG(5, "LZ4HC_compress_optimal(dst=%p, dstCapa=%u)", dst, (unsigned)dstCapacity); - *srcSizePtr = 0; - if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ - if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1; - - /* Main Loop */ - while (ip <= mflimit) { - int const llen = (int)(ip - anchor); - int best_mlen, best_off; - int cur, last_match_pos = 0; - - LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed); - if (firstMatch.len==0) { ip++; continue; } - - if ((size_t)firstMatch.len > sufficient_len) { - /* good enough solution : immediate encoding */ - int const firstML = firstMatch.len; - const BYTE* const matchPos = ip - firstMatch.off; - opSaved = op; - if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, matchPos, limit, oend) ) { /* updates ip, op and anchor */ - ovml = firstML; - ovref = matchPos; - goto _dest_overflow; - } - continue; - } - - /* set prices for first positions (literals) */ - { int rPos; - for (rPos = 0 ; rPos < MINMATCH ; rPos++) { - int const cost = LZ4HC_literalsPrice(llen + rPos); - opt[rPos].mlen = 1; - opt[rPos].off = 0; - opt[rPos].litlen = llen + rPos; - opt[rPos].price = cost; - DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", - rPos, cost, opt[rPos].litlen); - } } - /* set prices using initial match */ - { int mlen = MINMATCH; - int const matchML = firstMatch.len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ - int const offset = firstMatch.off; - assert(matchML < LZ4_OPT_NUM); - for ( ; mlen <= matchML ; mlen++) { - int const cost = LZ4HC_sequencePrice(llen, mlen); - opt[mlen].mlen = mlen; - opt[mlen].off = offset; - opt[mlen].litlen = llen; - opt[mlen].price = cost; - DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup", - mlen, cost, mlen); - } } - last_match_pos = firstMatch.len; - { int addLit; - for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { - opt[last_match_pos+addLit].mlen = 1; /* literal */ - opt[last_match_pos+addLit].off = 0; - opt[last_match_pos+addLit].litlen = addLit; - opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); - DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", - last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); - } } - - /* check further positions */ - for (cur = 1; cur < last_match_pos; cur++) { - const BYTE* const curPtr = ip + cur; - LZ4HC_match_t newMatch; - - if (curPtr > mflimit) break; - DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u", - cur, opt[cur].price, opt[cur+1].price, cur+1); - if (fullUpdate) { - /* not useful to search here if next position has same (or lower) cost */ - if ( (opt[cur+1].price <= opt[cur].price) - /* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */ - && (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/) ) - continue; - } else { - /* not useful to search here if next position has same (or lower) cost */ - if (opt[cur+1].price <= opt[cur].price) continue; - } - - DEBUGLOG(7, "search at rPos:%u", cur); - if (fullUpdate) - newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed); - else - /* only test matches of minimum length; slightly faster, but misses a few bytes */ - newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches, dict, favorDecSpeed); - if (!newMatch.len) continue; - - if ( ((size_t)newMatch.len > sufficient_len) - || (newMatch.len + cur >= LZ4_OPT_NUM) ) { - /* immediate encoding */ - best_mlen = newMatch.len; - best_off = newMatch.off; - last_match_pos = cur + 1; - goto encode; - } - - /* before match : set price with literals at beginning */ - { int const baseLitlen = opt[cur].litlen; - int litlen; - for (litlen = 1; litlen < MINMATCH; litlen++) { - int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen+litlen); - int const pos = cur + litlen; - if (price < opt[pos].price) { - opt[pos].mlen = 1; /* literal */ - opt[pos].off = 0; - opt[pos].litlen = baseLitlen+litlen; - opt[pos].price = price; - DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", - pos, price, opt[pos].litlen); - } } } - - /* set prices using match at position = cur */ - { int const matchML = newMatch.len; - int ml = MINMATCH; - - assert(cur + newMatch.len < LZ4_OPT_NUM); - for ( ; ml <= matchML ; ml++) { - int const pos = cur + ml; - int const offset = newMatch.off; - int price; - int ll; - DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)", - pos, last_match_pos); - if (opt[cur].mlen == 1) { - ll = opt[cur].litlen; - price = ((cur > ll) ? opt[cur - ll].price : 0) - + LZ4HC_sequencePrice(ll, ml); - } else { - ll = 0; - price = opt[cur].price + LZ4HC_sequencePrice(0, ml); - } - - assert((U32)favorDecSpeed <= 1); - if (pos > last_match_pos+TRAILING_LITERALS - || price <= opt[pos].price - (int)favorDecSpeed) { - DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)", - pos, price, ml); - assert(pos < LZ4_OPT_NUM); - if ( (ml == matchML) /* last pos of last match */ - && (last_match_pos < pos) ) - last_match_pos = pos; - opt[pos].mlen = ml; - opt[pos].off = offset; - opt[pos].litlen = ll; - opt[pos].price = price; - } } } - /* complete following positions with literals */ - { int addLit; - for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { - opt[last_match_pos+addLit].mlen = 1; /* literal */ - opt[last_match_pos+addLit].off = 0; - opt[last_match_pos+addLit].litlen = addLit; - opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); - DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); - } } - } /* for (cur = 1; cur <= last_match_pos; cur++) */ - - assert(last_match_pos < LZ4_OPT_NUM + TRAILING_LITERALS); - best_mlen = opt[last_match_pos].mlen; - best_off = opt[last_match_pos].off; - cur = last_match_pos - best_mlen; - -encode: /* cur, last_match_pos, best_mlen, best_off must be set */ - assert(cur < LZ4_OPT_NUM); - assert(last_match_pos >= 1); /* == 1 when only one candidate */ - DEBUGLOG(6, "reverse traversal, looking for shortest path (last_match_pos=%i)", last_match_pos); - { int candidate_pos = cur; - int selected_matchLength = best_mlen; - int selected_offset = best_off; - while (1) { /* from end to beginning */ - int const next_matchLength = opt[candidate_pos].mlen; /* can be 1, means literal */ - int const next_offset = opt[candidate_pos].off; - DEBUGLOG(7, "pos %i: sequence length %i", candidate_pos, selected_matchLength); - opt[candidate_pos].mlen = selected_matchLength; - opt[candidate_pos].off = selected_offset; - selected_matchLength = next_matchLength; - selected_offset = next_offset; - if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */ - assert(next_matchLength > 0); /* can be 1, means literal */ - candidate_pos -= next_matchLength; - } } - - /* encode all recorded sequences in order */ - { int rPos = 0; /* relative position (to ip) */ - while (rPos < last_match_pos) { - int const ml = opt[rPos].mlen; - int const offset = opt[rPos].off; - if (ml == 1) { ip++; rPos++; continue; } /* literal; note: can end up with several literals, in which case, skip them */ - rPos += ml; - assert(ml >= MINMATCH); - assert((offset >= 1) && (offset <= LZ4_DISTANCE_MAX)); - opSaved = op; - if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend) ) { /* updates ip, op and anchor */ - ovml = ml; - ovref = ip - offset; - goto _dest_overflow; - } } } - } /* while (ip <= mflimit) */ - -_last_literals: - /* Encode Last Literals */ - { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ - size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255; - size_t const totalSize = 1 + llAdd + lastRunSize; - if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */ - if (limit && (op + totalSize > oend)) { - if (limit == limitedOutput) { /* Check output limit */ - retval = 0; - goto _return_label; - } - /* adapt lastRunSize to fill 'dst' */ - lastRunSize = (size_t)(oend - op) - 1 /*token*/; - llAdd = (lastRunSize + 256 - RUN_MASK) / 256; - lastRunSize -= llAdd; - } - DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize); - ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */ - - if (lastRunSize >= RUN_MASK) { - size_t accumulator = lastRunSize - RUN_MASK; - *op++ = (RUN_MASK << ML_BITS); - for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255; - *op++ = (BYTE) accumulator; - } else { - *op++ = (BYTE)(lastRunSize << ML_BITS); - } - LZ4_memcpy(op, anchor, lastRunSize); - op += lastRunSize; - } - - /* End */ - *srcSizePtr = (int) (((const char*)ip) - source); - retval = (int) ((char*)op-dst); - goto _return_label; - -_dest_overflow: -if (limit == fillOutput) { - /* Assumption : ip, anchor, ovml and ovref must be set correctly */ - size_t const ll = (size_t)(ip - anchor); - size_t const ll_addbytes = (ll + 240) / 255; - size_t const ll_totalCost = 1 + ll_addbytes + ll; - BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */ - DEBUGLOG(6, "Last sequence overflowing (only %i bytes remaining)", (int)(oend-1-opSaved)); - op = opSaved; /* restore correct out pointer */ - if (op + ll_totalCost <= maxLitPos) { - /* ll validated; now adjust match length */ - size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost)); - size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255); - assert(maxMlSize < INT_MAX); assert(ovml >= 0); - if ((size_t)ovml > maxMlSize) ovml = (int)maxMlSize; - if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ovml >= MFLIMIT) { - DEBUGLOG(6, "Space to end : %i + ml (%i)", (int)((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1), ovml); - DEBUGLOG(6, "Before : ip = %p, anchor = %p", ip, anchor); - LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml, ovref, notLimited, oend); - DEBUGLOG(6, "After : ip = %p, anchor = %p", ip, anchor); - } } - goto _last_literals; -} -_return_label: -#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 - FREEMEM(opt); -#endif - return retval; -} - -} diff --git a/src/third_party/tracy/common/tracy_lz4hc.hpp b/src/third_party/tracy/common/tracy_lz4hc.hpp deleted file mode 100644 index 460cbae7..00000000 --- a/src/third_party/tracy/common/tracy_lz4hc.hpp +++ /dev/null @@ -1,405 +0,0 @@ -/* - LZ4 HC - High Compression Mode of LZ4 - Header File - Copyright (C) 2011-2020, Yann Collet. - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - LZ4 source repository : https://github.com/lz4/lz4 - - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c -*/ -#ifndef TRACY_LZ4_HC_H_19834876238432 -#define TRACY_LZ4_HC_H_19834876238432 - -/* --- Dependency --- */ -/* note : lz4hc requires lz4.h/lz4.c for compilation */ -#include "tracy_lz4.hpp" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */ - - -/* --- Useful constants --- */ -#define LZ4HC_CLEVEL_MIN 3 -#define LZ4HC_CLEVEL_DEFAULT 9 -#define LZ4HC_CLEVEL_OPT_MIN 10 -#define LZ4HC_CLEVEL_MAX 12 - -namespace tracy -{ - -/*-************************************ - * Block Compression - **************************************/ -/*! LZ4_compress_HC() : - * Compress data from `src` into `dst`, using the powerful but slower "HC" algorithm. - * `dst` must be already allocated. - * Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h") - * Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h") - * `compressionLevel` : any value between 1 and LZ4HC_CLEVEL_MAX will work. - * Values > LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX. - * @return : the number of bytes written into 'dst' - * or 0 if compression fails. - */ -LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel); - - -/* Note : - * Decompression functions are provided within "lz4.h" (BSD license) - */ - - -/*! LZ4_compress_HC_extStateHC() : - * Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`. - * `state` size is provided by LZ4_sizeofStateHC(). - * Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() should do properly). - */ -LZ4LIB_API int LZ4_sizeofStateHC(void); -LZ4LIB_API int LZ4_compress_HC_extStateHC(void* stateHC, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel); - - -/*! LZ4_compress_HC_destSize() : v1.9.0+ - * Will compress as much data as possible from `src` - * to fit into `targetDstSize` budget. - * Result is provided in 2 parts : - * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize) - * or 0 if compression fails. - * `srcSizePtr` : on success, *srcSizePtr is updated to indicate how much bytes were read from `src` - */ -LZ4LIB_API int LZ4_compress_HC_destSize(void* stateHC, - const char* src, char* dst, - int* srcSizePtr, int targetDstSize, - int compressionLevel); - - -/*-************************************ - * Streaming Compression - * Bufferless synchronous API - **************************************/ - typedef union LZ4_streamHC_u LZ4_streamHC_t; /* incomplete type (defined later) */ - -/*! LZ4_createStreamHC() and LZ4_freeStreamHC() : - * These functions create and release memory for LZ4 HC streaming state. - * Newly created states are automatically initialized. - * A same state can be used multiple times consecutively, - * starting with LZ4_resetStreamHC_fast() to start a new stream of blocks. - */ -LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void); -LZ4LIB_API int LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr); - -/* - These functions compress data in successive blocks of any size, - using previous blocks as dictionary, to improve compression ratio. - One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks. - There is an exception for ring buffers, which can be smaller than 64 KB. - Ring-buffer scenario is automatically detected and handled within LZ4_compress_HC_continue(). - - Before starting compression, state must be allocated and properly initialized. - LZ4_createStreamHC() does both, though compression level is set to LZ4HC_CLEVEL_DEFAULT. - - Selecting the compression level can be done with LZ4_resetStreamHC_fast() (starts a new stream) - or LZ4_setCompressionLevel() (anytime, between blocks in the same stream) (experimental). - LZ4_resetStreamHC_fast() only works on states which have been properly initialized at least once, - which is automatically the case when state is created using LZ4_createStreamHC(). - - After reset, a first "fictional block" can be designated as initial dictionary, - using LZ4_loadDictHC() (Optional). - - Invoke LZ4_compress_HC_continue() to compress each successive block. - The number of blocks is unlimited. - Previous input blocks, including initial dictionary when present, - must remain accessible and unmodified during compression. - - It's allowed to update compression level anytime between blocks, - using LZ4_setCompressionLevel() (experimental). - - 'dst' buffer should be sized to handle worst case scenarios - (see LZ4_compressBound(), it ensures compression success). - In case of failure, the API does not guarantee recovery, - so the state _must_ be reset. - To ensure compression success - whenever `dst` buffer size cannot be made >= LZ4_compressBound(), - consider using LZ4_compress_HC_continue_destSize(). - - Whenever previous input blocks can't be preserved unmodified in-place during compression of next blocks, - it's possible to copy the last blocks into a more stable memory space, using LZ4_saveDictHC(). - Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer' (<= 64 KB) - - After completing a streaming compression, - it's possible to start a new stream of blocks, using the same LZ4_streamHC_t state, - just by resetting it, using LZ4_resetStreamHC_fast(). -*/ - -LZ4LIB_API void LZ4_resetStreamHC_fast(LZ4_streamHC_t* streamHCPtr, int compressionLevel); /* v1.9.0+ */ -LZ4LIB_API int LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize); - -LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, - const char* src, char* dst, - int srcSize, int maxDstSize); - -/*! LZ4_compress_HC_continue_destSize() : v1.9.0+ - * Similar to LZ4_compress_HC_continue(), - * but will read as much data as possible from `src` - * to fit into `targetDstSize` budget. - * Result is provided into 2 parts : - * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize) - * or 0 if compression fails. - * `srcSizePtr` : on success, *srcSizePtr will be updated to indicate how much bytes were read from `src`. - * Note that this function may not consume the entire input. - */ -LZ4LIB_API int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr, - const char* src, char* dst, - int* srcSizePtr, int targetDstSize); - -LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize); - - - -/*^********************************************** - * !!!!!! STATIC LINKING ONLY !!!!!! - ***********************************************/ - -/*-****************************************************************** - * PRIVATE DEFINITIONS : - * Do not use these definitions directly. - * They are merely exposed to allow static allocation of `LZ4_streamHC_t`. - * Declare an `LZ4_streamHC_t` directly, rather than any type below. - * Even then, only do so in the context of static linking, as definitions may change between versions. - ********************************************************************/ - -#define LZ4HC_DICTIONARY_LOGSIZE 16 -#define LZ4HC_MAXD (1<= LZ4HC_CLEVEL_OPT_MIN. - */ -LZ4LIB_STATIC_API void LZ4_favorDecompressionSpeed( - LZ4_streamHC_t* LZ4_streamHCPtr, int favor); - -/*! LZ4_resetStreamHC_fast() : v1.9.0+ - * When an LZ4_streamHC_t is known to be in a internally coherent state, - * it can often be prepared for a new compression with almost no work, only - * sometimes falling back to the full, expensive reset that is always required - * when the stream is in an indeterminate state (i.e., the reset performed by - * LZ4_resetStreamHC()). - * - * LZ4_streamHCs are guaranteed to be in a valid state when: - * - returned from LZ4_createStreamHC() - * - reset by LZ4_resetStreamHC() - * - memset(stream, 0, sizeof(LZ4_streamHC_t)) - * - the stream was in a valid state and was reset by LZ4_resetStreamHC_fast() - * - the stream was in a valid state and was then used in any compression call - * that returned success - * - the stream was in an indeterminate state and was used in a compression - * call that fully reset the state (LZ4_compress_HC_extStateHC()) and that - * returned success - * - * Note: - * A stream that was last used in a compression call that returned an error - * may be passed to this function. However, it will be fully reset, which will - * clear any existing history and settings from the context. - */ -LZ4LIB_STATIC_API void LZ4_resetStreamHC_fast( - LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel); - -/*! LZ4_compress_HC_extStateHC_fastReset() : - * A variant of LZ4_compress_HC_extStateHC(). - * - * Using this variant avoids an expensive initialization step. It is only safe - * to call if the state buffer is known to be correctly initialized already - * (see above comment on LZ4_resetStreamHC_fast() for a definition of - * "correctly initialized"). From a high level, the difference is that this - * function initializes the provided state with a call to - * LZ4_resetStreamHC_fast() while LZ4_compress_HC_extStateHC() starts with a - * call to LZ4_resetStreamHC(). - */ -LZ4LIB_STATIC_API int LZ4_compress_HC_extStateHC_fastReset ( - void* state, - const char* src, char* dst, - int srcSize, int dstCapacity, - int compressionLevel); - -/*! LZ4_attach_HC_dictionary() : - * This is an experimental API that allows for the efficient use of a - * static dictionary many times. - * - * Rather than re-loading the dictionary buffer into a working context before - * each compression, or copying a pre-loaded dictionary's LZ4_streamHC_t into a - * working LZ4_streamHC_t, this function introduces a no-copy setup mechanism, - * in which the working stream references the dictionary stream in-place. - * - * Several assumptions are made about the state of the dictionary stream. - * Currently, only streams which have been prepared by LZ4_loadDictHC() should - * be expected to work. - * - * Alternatively, the provided dictionary stream pointer may be NULL, in which - * case any existing dictionary stream is unset. - * - * A dictionary should only be attached to a stream without any history (i.e., - * a stream that has just been reset). - * - * The dictionary will remain attached to the working stream only for the - * current stream session. Calls to LZ4_resetStreamHC(_fast) will remove the - * dictionary context association from the working stream. The dictionary - * stream (and source buffer) must remain in-place / accessible / unchanged - * through the lifetime of the stream session. - */ -LZ4LIB_STATIC_API void LZ4_attach_HC_dictionary( - LZ4_streamHC_t *working_stream, - const LZ4_streamHC_t *dictionary_stream); - -} - -#endif /* LZ4_HC_SLO_098092834 */ -#endif /* LZ4_HC_STATIC_LINKING_ONLY */ diff --git a/src/third_party/tracy/libbacktrace/LICENSE b/src/third_party/tracy/libbacktrace/LICENSE deleted file mode 100644 index 097d2774..00000000 --- a/src/third_party/tracy/libbacktrace/LICENSE +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (C) 2012-2016 Free Software Foundation, Inc. - -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: - -# (1) Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. - -# (2) Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. - -# (3) The name of the author may not be used to -# endorse or promote products derived from this software without -# specific prior written permission. - -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, -# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. diff --git a/src/third_party/tracy/libbacktrace/alloc.cpp b/src/third_party/tracy/libbacktrace/alloc.cpp deleted file mode 100644 index a365a486..00000000 --- a/src/third_party/tracy/libbacktrace/alloc.cpp +++ /dev/null @@ -1,174 +0,0 @@ -/* alloc.c -- Memory allocation without mmap. - Copyright (C) 2012-2021 Free Software Foundation, Inc. - Written by Ian Lance Taylor, Google. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - (1) Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - (2) Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - (3) The name of the author may not be used to - endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. */ - -#include "config.h" - -#include -#include -#include - -#include "backtrace.hpp" -#include "internal.hpp" - -#include "../common/TracyAlloc.hpp" - -namespace tracy -{ - -/* Allocation routines to use on systems that do not support anonymous - mmap. This implementation just uses malloc, which means that the - backtrace functions may not be safely invoked from a signal - handler. */ - -/* Allocate memory like malloc. If ERROR_CALLBACK is NULL, don't - report an error. */ - -void * -backtrace_alloc (struct backtrace_state *state ATTRIBUTE_UNUSED, - size_t size, backtrace_error_callback error_callback, - void *data) -{ - void *ret; - - ret = tracy_malloc (size); - if (ret == NULL) - { - if (error_callback) - error_callback (data, "malloc", errno); - } - return ret; -} - -/* Free memory. */ - -void -backtrace_free (struct backtrace_state *state ATTRIBUTE_UNUSED, - void *p, size_t size ATTRIBUTE_UNUSED, - backtrace_error_callback error_callback ATTRIBUTE_UNUSED, - void *data ATTRIBUTE_UNUSED) -{ - tracy_free (p); -} - -/* Grow VEC by SIZE bytes. */ - -void * -backtrace_vector_grow (struct backtrace_state *state ATTRIBUTE_UNUSED, - size_t size, backtrace_error_callback error_callback, - void *data, struct backtrace_vector *vec) -{ - void *ret; - - if (size > vec->alc) - { - size_t alc; - void *base; - - if (vec->size == 0) - alc = 32 * size; - else if (vec->size >= 4096) - alc = vec->size + 4096; - else - alc = 2 * vec->size; - - if (alc < vec->size + size) - alc = vec->size + size; - - base = tracy_realloc (vec->base, alc); - if (base == NULL) - { - error_callback (data, "realloc", errno); - return NULL; - } - - vec->base = base; - vec->alc = alc - vec->size; - } - - ret = (char *) vec->base + vec->size; - vec->size += size; - vec->alc -= size; - return ret; -} - -/* Finish the current allocation on VEC. */ - -void * -backtrace_vector_finish (struct backtrace_state *state, - struct backtrace_vector *vec, - backtrace_error_callback error_callback, - void *data) -{ - void *ret; - - /* With this allocator we call realloc in backtrace_vector_grow, - which means we can't easily reuse the memory here. So just - release it. */ - if (!backtrace_vector_release (state, vec, error_callback, data)) - return NULL; - ret = vec->base; - vec->base = NULL; - vec->size = 0; - vec->alc = 0; - return ret; -} - -/* Release any extra space allocated for VEC. */ - -int -backtrace_vector_release (struct backtrace_state *state ATTRIBUTE_UNUSED, - struct backtrace_vector *vec, - backtrace_error_callback error_callback, - void *data) -{ - vec->alc = 0; - - if (vec->size == 0) - { - /* As of C17, realloc with size 0 is marked as an obsolescent feature, use - free instead. */ - tracy_free (vec->base); - vec->base = NULL; - return 1; - } - - vec->base = tracy_realloc (vec->base, vec->size); - if (vec->base == NULL) - { - error_callback (data, "realloc", errno); - return 0; - } - - return 1; -} - -} diff --git a/src/third_party/tracy/libbacktrace/backtrace.hpp b/src/third_party/tracy/libbacktrace/backtrace.hpp deleted file mode 100644 index e4be297a..00000000 --- a/src/third_party/tracy/libbacktrace/backtrace.hpp +++ /dev/null @@ -1,186 +0,0 @@ -/* backtrace.h -- Public header file for stack backtrace library. - Copyright (C) 2012-2021 Free Software Foundation, Inc. - Written by Ian Lance Taylor, Google. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - (1) Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - (2) Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - (3) The name of the author may not be used to - endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. */ - -#ifndef BACKTRACE_H -#define BACKTRACE_H - -#include -#include -#include - -namespace tracy -{ - -/* The backtrace state. This struct is intentionally not defined in - the public interface. */ - -struct backtrace_state; - -/* The type of the error callback argument to backtrace functions. - This function, if not NULL, will be called for certain error cases. - The DATA argument is passed to the function that calls this one. - The MSG argument is an error message. The ERRNUM argument, if - greater than 0, holds an errno value. The MSG buffer may become - invalid after this function returns. - - As a special case, the ERRNUM argument will be passed as -1 if no - debug info can be found for the executable, or if the debug info - exists but has an unsupported version, but the function requires - debug info (e.g., backtrace_full, backtrace_pcinfo). The MSG in - this case will be something along the lines of "no debug info". - Similarly, ERRNUM will be passed as -1 if there is no symbol table, - but the function requires a symbol table (e.g., backtrace_syminfo). - This may be used as a signal that some other approach should be - tried. */ - -typedef void (*backtrace_error_callback) (void *data, const char *msg, - int errnum); - -/* Create state information for the backtrace routines. This must be - called before any of the other routines, and its return value must - be passed to all of the other routines. FILENAME is the path name - of the executable file; if it is NULL the library will try - system-specific path names. If not NULL, FILENAME must point to a - permanent buffer. If THREADED is non-zero the state may be - accessed by multiple threads simultaneously, and the library will - use appropriate atomic operations. If THREADED is zero the state - may only be accessed by one thread at a time. This returns a state - pointer on success, NULL on error. If an error occurs, this will - call the ERROR_CALLBACK routine. - - Calling this function allocates resources that cannot be freed. - There is no backtrace_free_state function. The state is used to - cache information that is expensive to recompute. Programs are - expected to call this function at most once and to save the return - value for all later calls to backtrace functions. */ - -extern struct backtrace_state *backtrace_create_state ( - const char *filename, int threaded, - backtrace_error_callback error_callback, void *data); - -/* The type of the callback argument to the backtrace_full function. - DATA is the argument passed to backtrace_full. PC is the program - counter. FILENAME is the name of the file containing PC, or NULL - if not available. LINENO is the line number in FILENAME containing - PC, or 0 if not available. FUNCTION is the name of the function - containing PC, or NULL if not available. This should return 0 to - continuing tracing. The FILENAME and FUNCTION buffers may become - invalid after this function returns. */ - -typedef int (*backtrace_full_callback) (void *data, uintptr_t pc, uintptr_t lowaddr, - const char *filename, int lineno, - const char *function); - -/* Get a full stack backtrace. SKIP is the number of frames to skip; - passing 0 will start the trace with the function calling - backtrace_full. DATA is passed to the callback routine. If any - call to CALLBACK returns a non-zero value, the stack backtrace - stops, and backtrace returns that value; this may be used to limit - the number of stack frames desired. If all calls to CALLBACK - return 0, backtrace returns 0. The backtrace_full function will - make at least one call to either CALLBACK or ERROR_CALLBACK. This - function requires debug info for the executable. */ - -extern int backtrace_full (struct backtrace_state *state, int skip, - backtrace_full_callback callback, - backtrace_error_callback error_callback, - void *data); - -/* The type of the callback argument to the backtrace_simple function. - DATA is the argument passed to simple_backtrace. PC is the program - counter. This should return 0 to continue tracing. */ - -typedef int (*backtrace_simple_callback) (void *data, uintptr_t pc); - -/* Get a simple backtrace. SKIP is the number of frames to skip, as - in backtrace. DATA is passed to the callback routine. If any call - to CALLBACK returns a non-zero value, the stack backtrace stops, - and backtrace_simple returns that value. Otherwise - backtrace_simple returns 0. The backtrace_simple function will - make at least one call to either CALLBACK or ERROR_CALLBACK. This - function does not require any debug info for the executable. */ - -extern int backtrace_simple (struct backtrace_state *state, int skip, - backtrace_simple_callback callback, - backtrace_error_callback error_callback, - void *data); - -/* Print the current backtrace in a user readable format to a FILE. - SKIP is the number of frames to skip, as in backtrace_full. Any - error messages are printed to stderr. This function requires debug - info for the executable. */ - -extern void backtrace_print (struct backtrace_state *state, int skip, FILE *); - -/* Given PC, a program counter in the current program, call the - callback function with filename, line number, and function name - information. This will normally call the callback function exactly - once. However, if the PC happens to describe an inlined call, and - the debugging information contains the necessary information, then - this may call the callback function multiple times. This will make - at least one call to either CALLBACK or ERROR_CALLBACK. This - returns the first non-zero value returned by CALLBACK, or 0. */ - -extern int backtrace_pcinfo (struct backtrace_state *state, uintptr_t pc, - backtrace_full_callback callback, - backtrace_error_callback error_callback, - void *data); - -/* The type of the callback argument to backtrace_syminfo. DATA and - PC are the arguments passed to backtrace_syminfo. SYMNAME is the - name of the symbol for the corresponding code. SYMVAL is the - value and SYMSIZE is the size of the symbol. SYMNAME will be NULL - if no error occurred but the symbol could not be found. */ - -typedef void (*backtrace_syminfo_callback) (void *data, uintptr_t pc, - const char *symname, - uintptr_t symval, - uintptr_t symsize); - -/* Given ADDR, an address or program counter in the current program, - call the callback information with the symbol name and value - describing the function or variable in which ADDR may be found. - This will call either CALLBACK or ERROR_CALLBACK exactly once. - This returns 1 on success, 0 on failure. This function requires - the symbol table but does not require the debug info. Note that if - the symbol table is present but ADDR could not be found in the - table, CALLBACK will be called with a NULL SYMNAME argument. - Returns 1 on success, 0 on error. */ - -extern int backtrace_syminfo (struct backtrace_state *state, uintptr_t addr, - backtrace_syminfo_callback callback, - backtrace_error_callback error_callback, - void *data); - -} - -#endif diff --git a/src/third_party/tracy/libbacktrace/config.h b/src/third_party/tracy/libbacktrace/config.h deleted file mode 100644 index 87e38a95..00000000 --- a/src/third_party/tracy/libbacktrace/config.h +++ /dev/null @@ -1,26 +0,0 @@ -#include -#if defined(__linux__) && !defined(__GLIBC__) && !defined(__WORDSIZE) -// include __WORDSIZE headers for musl -# include -#endif -#if __WORDSIZE == 64 -# define BACKTRACE_ELF_SIZE 64 -#else -# define BACKTRACE_ELF_SIZE 32 -#endif - -#define HAVE_DLFCN_H 1 -#define HAVE_FCNTL 1 -#define HAVE_INTTYPES_H 1 -#define HAVE_LSTAT 1 -#define HAVE_READLINK 1 -#define HAVE_DL_ITERATE_PHDR 1 -#define HAVE_ATOMIC_FUNCTIONS 1 -#define HAVE_DECL_STRNLEN 1 - -#ifdef __APPLE__ -# define HAVE_MACH_O_DYLD_H 1 -#elif defined BSD -# define HAVE_KERN_PROC 1 -# define HAVE_KERN_PROC_ARGS 1 -#endif diff --git a/src/third_party/tracy/libbacktrace/dwarf.cpp b/src/third_party/tracy/libbacktrace/dwarf.cpp deleted file mode 100644 index 52fa8a8d..00000000 --- a/src/third_party/tracy/libbacktrace/dwarf.cpp +++ /dev/null @@ -1,4455 +0,0 @@ -/* dwarf.c -- Get file/line information from DWARF for backtraces. - Copyright (C) 2012-2021 Free Software Foundation, Inc. - Written by Ian Lance Taylor, Google. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - (1) Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - (2) Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - (3) The name of the author may not be used to - endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. */ - -#include "config.h" - -#include -#include -#include -#include - -#include "filenames.hpp" - -#include "backtrace.hpp" -#include "internal.hpp" - -namespace tracy -{ - -/* DWARF constants. */ - -enum dwarf_tag { - DW_TAG_entry_point = 0x3, - DW_TAG_compile_unit = 0x11, - DW_TAG_inlined_subroutine = 0x1d, - DW_TAG_subprogram = 0x2e, - DW_TAG_skeleton_unit = 0x4a, -}; - -enum dwarf_form { - DW_FORM_addr = 0x01, - DW_FORM_block2 = 0x03, - DW_FORM_block4 = 0x04, - DW_FORM_data2 = 0x05, - DW_FORM_data4 = 0x06, - DW_FORM_data8 = 0x07, - DW_FORM_string = 0x08, - DW_FORM_block = 0x09, - DW_FORM_block1 = 0x0a, - DW_FORM_data1 = 0x0b, - DW_FORM_flag = 0x0c, - DW_FORM_sdata = 0x0d, - DW_FORM_strp = 0x0e, - DW_FORM_udata = 0x0f, - DW_FORM_ref_addr = 0x10, - DW_FORM_ref1 = 0x11, - DW_FORM_ref2 = 0x12, - DW_FORM_ref4 = 0x13, - DW_FORM_ref8 = 0x14, - DW_FORM_ref_udata = 0x15, - DW_FORM_indirect = 0x16, - DW_FORM_sec_offset = 0x17, - DW_FORM_exprloc = 0x18, - DW_FORM_flag_present = 0x19, - DW_FORM_ref_sig8 = 0x20, - DW_FORM_strx = 0x1a, - DW_FORM_addrx = 0x1b, - DW_FORM_ref_sup4 = 0x1c, - DW_FORM_strp_sup = 0x1d, - DW_FORM_data16 = 0x1e, - DW_FORM_line_strp = 0x1f, - DW_FORM_implicit_const = 0x21, - DW_FORM_loclistx = 0x22, - DW_FORM_rnglistx = 0x23, - DW_FORM_ref_sup8 = 0x24, - DW_FORM_strx1 = 0x25, - DW_FORM_strx2 = 0x26, - DW_FORM_strx3 = 0x27, - DW_FORM_strx4 = 0x28, - DW_FORM_addrx1 = 0x29, - DW_FORM_addrx2 = 0x2a, - DW_FORM_addrx3 = 0x2b, - DW_FORM_addrx4 = 0x2c, - DW_FORM_GNU_addr_index = 0x1f01, - DW_FORM_GNU_str_index = 0x1f02, - DW_FORM_GNU_ref_alt = 0x1f20, - DW_FORM_GNU_strp_alt = 0x1f21 -}; - -enum dwarf_attribute { - DW_AT_sibling = 0x01, - DW_AT_location = 0x02, - DW_AT_name = 0x03, - DW_AT_ordering = 0x09, - DW_AT_subscr_data = 0x0a, - DW_AT_byte_size = 0x0b, - DW_AT_bit_offset = 0x0c, - DW_AT_bit_size = 0x0d, - DW_AT_element_list = 0x0f, - DW_AT_stmt_list = 0x10, - DW_AT_low_pc = 0x11, - DW_AT_high_pc = 0x12, - DW_AT_language = 0x13, - DW_AT_member = 0x14, - DW_AT_discr = 0x15, - DW_AT_discr_value = 0x16, - DW_AT_visibility = 0x17, - DW_AT_import = 0x18, - DW_AT_string_length = 0x19, - DW_AT_common_reference = 0x1a, - DW_AT_comp_dir = 0x1b, - DW_AT_const_value = 0x1c, - DW_AT_containing_type = 0x1d, - DW_AT_default_value = 0x1e, - DW_AT_inline = 0x20, - DW_AT_is_optional = 0x21, - DW_AT_lower_bound = 0x22, - DW_AT_producer = 0x25, - DW_AT_prototyped = 0x27, - DW_AT_return_addr = 0x2a, - DW_AT_start_scope = 0x2c, - DW_AT_bit_stride = 0x2e, - DW_AT_upper_bound = 0x2f, - DW_AT_abstract_origin = 0x31, - DW_AT_accessibility = 0x32, - DW_AT_address_class = 0x33, - DW_AT_artificial = 0x34, - DW_AT_base_types = 0x35, - DW_AT_calling_convention = 0x36, - DW_AT_count = 0x37, - DW_AT_data_member_location = 0x38, - DW_AT_decl_column = 0x39, - DW_AT_decl_file = 0x3a, - DW_AT_decl_line = 0x3b, - DW_AT_declaration = 0x3c, - DW_AT_discr_list = 0x3d, - DW_AT_encoding = 0x3e, - DW_AT_external = 0x3f, - DW_AT_frame_base = 0x40, - DW_AT_friend = 0x41, - DW_AT_identifier_case = 0x42, - DW_AT_macro_info = 0x43, - DW_AT_namelist_items = 0x44, - DW_AT_priority = 0x45, - DW_AT_segment = 0x46, - DW_AT_specification = 0x47, - DW_AT_static_link = 0x48, - DW_AT_type = 0x49, - DW_AT_use_location = 0x4a, - DW_AT_variable_parameter = 0x4b, - DW_AT_virtuality = 0x4c, - DW_AT_vtable_elem_location = 0x4d, - DW_AT_allocated = 0x4e, - DW_AT_associated = 0x4f, - DW_AT_data_location = 0x50, - DW_AT_byte_stride = 0x51, - DW_AT_entry_pc = 0x52, - DW_AT_use_UTF8 = 0x53, - DW_AT_extension = 0x54, - DW_AT_ranges = 0x55, - DW_AT_trampoline = 0x56, - DW_AT_call_column = 0x57, - DW_AT_call_file = 0x58, - DW_AT_call_line = 0x59, - DW_AT_description = 0x5a, - DW_AT_binary_scale = 0x5b, - DW_AT_decimal_scale = 0x5c, - DW_AT_small = 0x5d, - DW_AT_decimal_sign = 0x5e, - DW_AT_digit_count = 0x5f, - DW_AT_picture_string = 0x60, - DW_AT_mutable = 0x61, - DW_AT_threads_scaled = 0x62, - DW_AT_explicit = 0x63, - DW_AT_object_pointer = 0x64, - DW_AT_endianity = 0x65, - DW_AT_elemental = 0x66, - DW_AT_pure = 0x67, - DW_AT_recursive = 0x68, - DW_AT_signature = 0x69, - DW_AT_main_subprogram = 0x6a, - DW_AT_data_bit_offset = 0x6b, - DW_AT_const_expr = 0x6c, - DW_AT_enum_class = 0x6d, - DW_AT_linkage_name = 0x6e, - DW_AT_string_length_bit_size = 0x6f, - DW_AT_string_length_byte_size = 0x70, - DW_AT_rank = 0x71, - DW_AT_str_offsets_base = 0x72, - DW_AT_addr_base = 0x73, - DW_AT_rnglists_base = 0x74, - DW_AT_dwo_name = 0x76, - DW_AT_reference = 0x77, - DW_AT_rvalue_reference = 0x78, - DW_AT_macros = 0x79, - DW_AT_call_all_calls = 0x7a, - DW_AT_call_all_source_calls = 0x7b, - DW_AT_call_all_tail_calls = 0x7c, - DW_AT_call_return_pc = 0x7d, - DW_AT_call_value = 0x7e, - DW_AT_call_origin = 0x7f, - DW_AT_call_parameter = 0x80, - DW_AT_call_pc = 0x81, - DW_AT_call_tail_call = 0x82, - DW_AT_call_target = 0x83, - DW_AT_call_target_clobbered = 0x84, - DW_AT_call_data_location = 0x85, - DW_AT_call_data_value = 0x86, - DW_AT_noreturn = 0x87, - DW_AT_alignment = 0x88, - DW_AT_export_symbols = 0x89, - DW_AT_deleted = 0x8a, - DW_AT_defaulted = 0x8b, - DW_AT_loclists_base = 0x8c, - DW_AT_lo_user = 0x2000, - DW_AT_hi_user = 0x3fff, - DW_AT_MIPS_fde = 0x2001, - DW_AT_MIPS_loop_begin = 0x2002, - DW_AT_MIPS_tail_loop_begin = 0x2003, - DW_AT_MIPS_epilog_begin = 0x2004, - DW_AT_MIPS_loop_unroll_factor = 0x2005, - DW_AT_MIPS_software_pipeline_depth = 0x2006, - DW_AT_MIPS_linkage_name = 0x2007, - DW_AT_MIPS_stride = 0x2008, - DW_AT_MIPS_abstract_name = 0x2009, - DW_AT_MIPS_clone_origin = 0x200a, - DW_AT_MIPS_has_inlines = 0x200b, - DW_AT_HP_block_index = 0x2000, - DW_AT_HP_unmodifiable = 0x2001, - DW_AT_HP_prologue = 0x2005, - DW_AT_HP_epilogue = 0x2008, - DW_AT_HP_actuals_stmt_list = 0x2010, - DW_AT_HP_proc_per_section = 0x2011, - DW_AT_HP_raw_data_ptr = 0x2012, - DW_AT_HP_pass_by_reference = 0x2013, - DW_AT_HP_opt_level = 0x2014, - DW_AT_HP_prof_version_id = 0x2015, - DW_AT_HP_opt_flags = 0x2016, - DW_AT_HP_cold_region_low_pc = 0x2017, - DW_AT_HP_cold_region_high_pc = 0x2018, - DW_AT_HP_all_variables_modifiable = 0x2019, - DW_AT_HP_linkage_name = 0x201a, - DW_AT_HP_prof_flags = 0x201b, - DW_AT_HP_unit_name = 0x201f, - DW_AT_HP_unit_size = 0x2020, - DW_AT_HP_widened_byte_size = 0x2021, - DW_AT_HP_definition_points = 0x2022, - DW_AT_HP_default_location = 0x2023, - DW_AT_HP_is_result_param = 0x2029, - DW_AT_sf_names = 0x2101, - DW_AT_src_info = 0x2102, - DW_AT_mac_info = 0x2103, - DW_AT_src_coords = 0x2104, - DW_AT_body_begin = 0x2105, - DW_AT_body_end = 0x2106, - DW_AT_GNU_vector = 0x2107, - DW_AT_GNU_guarded_by = 0x2108, - DW_AT_GNU_pt_guarded_by = 0x2109, - DW_AT_GNU_guarded = 0x210a, - DW_AT_GNU_pt_guarded = 0x210b, - DW_AT_GNU_locks_excluded = 0x210c, - DW_AT_GNU_exclusive_locks_required = 0x210d, - DW_AT_GNU_shared_locks_required = 0x210e, - DW_AT_GNU_odr_signature = 0x210f, - DW_AT_GNU_template_name = 0x2110, - DW_AT_GNU_call_site_value = 0x2111, - DW_AT_GNU_call_site_data_value = 0x2112, - DW_AT_GNU_call_site_target = 0x2113, - DW_AT_GNU_call_site_target_clobbered = 0x2114, - DW_AT_GNU_tail_call = 0x2115, - DW_AT_GNU_all_tail_call_sites = 0x2116, - DW_AT_GNU_all_call_sites = 0x2117, - DW_AT_GNU_all_source_call_sites = 0x2118, - DW_AT_GNU_macros = 0x2119, - DW_AT_GNU_deleted = 0x211a, - DW_AT_GNU_dwo_name = 0x2130, - DW_AT_GNU_dwo_id = 0x2131, - DW_AT_GNU_ranges_base = 0x2132, - DW_AT_GNU_addr_base = 0x2133, - DW_AT_GNU_pubnames = 0x2134, - DW_AT_GNU_pubtypes = 0x2135, - DW_AT_GNU_discriminator = 0x2136, - DW_AT_GNU_locviews = 0x2137, - DW_AT_GNU_entry_view = 0x2138, - DW_AT_VMS_rtnbeg_pd_address = 0x2201, - DW_AT_use_GNAT_descriptive_type = 0x2301, - DW_AT_GNAT_descriptive_type = 0x2302, - DW_AT_GNU_numerator = 0x2303, - DW_AT_GNU_denominator = 0x2304, - DW_AT_GNU_bias = 0x2305, - DW_AT_upc_threads_scaled = 0x3210, - DW_AT_PGI_lbase = 0x3a00, - DW_AT_PGI_soffset = 0x3a01, - DW_AT_PGI_lstride = 0x3a02, - DW_AT_APPLE_optimized = 0x3fe1, - DW_AT_APPLE_flags = 0x3fe2, - DW_AT_APPLE_isa = 0x3fe3, - DW_AT_APPLE_block = 0x3fe4, - DW_AT_APPLE_major_runtime_vers = 0x3fe5, - DW_AT_APPLE_runtime_class = 0x3fe6, - DW_AT_APPLE_omit_frame_ptr = 0x3fe7, - DW_AT_APPLE_property_name = 0x3fe8, - DW_AT_APPLE_property_getter = 0x3fe9, - DW_AT_APPLE_property_setter = 0x3fea, - DW_AT_APPLE_property_attribute = 0x3feb, - DW_AT_APPLE_objc_complete_type = 0x3fec, - DW_AT_APPLE_property = 0x3fed -}; - -enum dwarf_line_number_op { - DW_LNS_extended_op = 0x0, - DW_LNS_copy = 0x1, - DW_LNS_advance_pc = 0x2, - DW_LNS_advance_line = 0x3, - DW_LNS_set_file = 0x4, - DW_LNS_set_column = 0x5, - DW_LNS_negate_stmt = 0x6, - DW_LNS_set_basic_block = 0x7, - DW_LNS_const_add_pc = 0x8, - DW_LNS_fixed_advance_pc = 0x9, - DW_LNS_set_prologue_end = 0xa, - DW_LNS_set_epilogue_begin = 0xb, - DW_LNS_set_isa = 0xc, -}; - -enum dwarf_extended_line_number_op { - DW_LNE_end_sequence = 0x1, - DW_LNE_set_address = 0x2, - DW_LNE_define_file = 0x3, - DW_LNE_set_discriminator = 0x4, -}; - -enum dwarf_line_number_content_type { - DW_LNCT_path = 0x1, - DW_LNCT_directory_index = 0x2, - DW_LNCT_timestamp = 0x3, - DW_LNCT_size = 0x4, - DW_LNCT_MD5 = 0x5, - DW_LNCT_lo_user = 0x2000, - DW_LNCT_hi_user = 0x3fff -}; - -enum dwarf_range_list_entry { - DW_RLE_end_of_list = 0x00, - DW_RLE_base_addressx = 0x01, - DW_RLE_startx_endx = 0x02, - DW_RLE_startx_length = 0x03, - DW_RLE_offset_pair = 0x04, - DW_RLE_base_address = 0x05, - DW_RLE_start_end = 0x06, - DW_RLE_start_length = 0x07 -}; - -enum dwarf_unit_type { - DW_UT_compile = 0x01, - DW_UT_type = 0x02, - DW_UT_partial = 0x03, - DW_UT_skeleton = 0x04, - DW_UT_split_compile = 0x05, - DW_UT_split_type = 0x06, - DW_UT_lo_user = 0x80, - DW_UT_hi_user = 0xff -}; - -#if !defined(HAVE_DECL_STRNLEN) || !HAVE_DECL_STRNLEN - -/* If strnlen is not declared, provide our own version. */ - -static size_t -xstrnlen (const char *s, size_t maxlen) -{ - size_t i; - - for (i = 0; i < maxlen; ++i) - if (s[i] == '\0') - break; - return i; -} - -#define strnlen xstrnlen - -#endif - -/* A buffer to read DWARF info. */ - -struct dwarf_buf -{ - /* Buffer name for error messages. */ - const char *name; - /* Start of the buffer. */ - const unsigned char *start; - /* Next byte to read. */ - const unsigned char *buf; - /* The number of bytes remaining. */ - size_t left; - /* Whether the data is big-endian. */ - int is_bigendian; - /* Error callback routine. */ - backtrace_error_callback error_callback; - /* Data for error_callback. */ - void *data; - /* Non-zero if we've reported an underflow error. */ - int reported_underflow; -}; - -/* A single attribute in a DWARF abbreviation. */ - -struct attr -{ - /* The attribute name. */ - enum dwarf_attribute name; - /* The attribute form. */ - enum dwarf_form form; - /* The attribute value, for DW_FORM_implicit_const. */ - int64_t val; -}; - -/* A single DWARF abbreviation. */ - -struct abbrev -{ - /* The abbrev code--the number used to refer to the abbrev. */ - uint64_t code; - /* The entry tag. */ - enum dwarf_tag tag; - /* Non-zero if this abbrev has child entries. */ - int has_children; - /* The number of attributes. */ - size_t num_attrs; - /* The attributes. */ - struct attr *attrs; -}; - -/* The DWARF abbreviations for a compilation unit. This structure - only exists while reading the compilation unit. Most DWARF readers - seem to a hash table to map abbrev ID's to abbrev entries. - However, we primarily care about GCC, and GCC simply issues ID's in - numerical order starting at 1. So we simply keep a sorted vector, - and try to just look up the code. */ - -struct abbrevs -{ - /* The number of abbrevs in the vector. */ - size_t num_abbrevs; - /* The abbrevs, sorted by the code field. */ - struct abbrev *abbrevs; -}; - -/* The different kinds of attribute values. */ - -enum attr_val_encoding -{ - /* No attribute value. */ - ATTR_VAL_NONE, - /* An address. */ - ATTR_VAL_ADDRESS, - /* An index into the .debug_addr section, whose value is relative to - the DW_AT_addr_base attribute of the compilation unit. */ - ATTR_VAL_ADDRESS_INDEX, - /* A unsigned integer. */ - ATTR_VAL_UINT, - /* A sigd integer. */ - ATTR_VAL_SINT, - /* A string. */ - ATTR_VAL_STRING, - /* An index into the .debug_str_offsets section. */ - ATTR_VAL_STRING_INDEX, - /* An offset to other data in the containing unit. */ - ATTR_VAL_REF_UNIT, - /* An offset to other data within the .debug_info section. */ - ATTR_VAL_REF_INFO, - /* An offset to other data within the alt .debug_info section. */ - ATTR_VAL_REF_ALT_INFO, - /* An offset to data in some other section. */ - ATTR_VAL_REF_SECTION, - /* A type signature. */ - ATTR_VAL_REF_TYPE, - /* An index into the .debug_rnglists section. */ - ATTR_VAL_RNGLISTS_INDEX, - /* A block of data (not represented). */ - ATTR_VAL_BLOCK, - /* An expression (not represented). */ - ATTR_VAL_EXPR, -}; - -/* An attribute value. */ - -struct attr_val -{ - /* How the value is stored in the field u. */ - enum attr_val_encoding encoding; - union - { - /* ATTR_VAL_ADDRESS*, ATTR_VAL_UINT, ATTR_VAL_REF*. */ - uint64_t uint; - /* ATTR_VAL_SINT. */ - int64_t sint; - /* ATTR_VAL_STRING. */ - const char *string; - /* ATTR_VAL_BLOCK not stored. */ - } u; -}; - -/* The line number program header. */ - -struct line_header -{ - /* The version of the line number information. */ - int version; - /* Address size. */ - int addrsize; - /* The minimum instruction length. */ - unsigned int min_insn_len; - /* The maximum number of ops per instruction. */ - unsigned int max_ops_per_insn; - /* The line base for special opcodes. */ - int line_base; - /* The line range for special opcodes. */ - unsigned int line_range; - /* The opcode base--the first special opcode. */ - unsigned int opcode_base; - /* Opcode lengths, indexed by opcode - 1. */ - const unsigned char *opcode_lengths; - /* The number of directory entries. */ - size_t dirs_count; - /* The directory entries. */ - const char **dirs; - /* The number of filenames. */ - size_t filenames_count; - /* The filenames. */ - const char **filenames; -}; - -/* A format description from a line header. */ - -struct line_header_format -{ - int lnct; /* LNCT code. */ - enum dwarf_form form; /* Form of entry data. */ -}; - -/* Map a single PC value to a file/line. We will keep a vector of - these sorted by PC value. Each file/line will be correct from the - PC up to the PC of the next entry if there is one. We allocate one - extra entry at the end so that we can use bsearch. */ - -struct line -{ - /* PC. */ - uintptr_t pc; - /* File name. Many entries in the array are expected to point to - the same file name. */ - const char *filename; - /* Line number. */ - int lineno; - /* Index of the object in the original array read from the DWARF - section, before it has been sorted. The index makes it possible - to use Quicksort and maintain stability. */ - int idx; -}; - -/* A growable vector of line number information. This is used while - reading the line numbers. */ - -struct line_vector -{ - /* Memory. This is an array of struct line. */ - struct backtrace_vector vec; - /* Number of valid mappings. */ - size_t count; -}; - -/* A function described in the debug info. */ - -struct function -{ - /* The name of the function. */ - const char *name; - /* If this is an inlined function, the filename of the call - site. */ - const char *caller_filename; - /* If this is an inlined function, the line number of the call - site. */ - int caller_lineno; - /* Map PC ranges to inlined functions. */ - struct function_addrs *function_addrs; - size_t function_addrs_count; -}; - -/* An address range for a function. This maps a PC value to a - specific function. */ - -struct function_addrs -{ - /* Range is LOW <= PC < HIGH. */ - uintptr_t low; - uintptr_t high; - /* Function for this address range. */ - struct function *function; -}; - -/* A growable vector of function address ranges. */ - -struct function_vector -{ - /* Memory. This is an array of struct function_addrs. */ - struct backtrace_vector vec; - /* Number of address ranges present. */ - size_t count; -}; - -/* A DWARF compilation unit. This only holds the information we need - to map a PC to a file and line. */ - -struct unit -{ - /* The first entry for this compilation unit. */ - const unsigned char *unit_data; - /* The length of the data for this compilation unit. */ - size_t unit_data_len; - /* The offset of UNIT_DATA from the start of the information for - this compilation unit. */ - size_t unit_data_offset; - /* Offset of the start of the compilation unit from the start of the - .debug_info section. */ - size_t low_offset; - /* Offset of the end of the compilation unit from the start of the - .debug_info section. */ - size_t high_offset; - /* DWARF version. */ - int version; - /* Whether unit is DWARF64. */ - int is_dwarf64; - /* Address size. */ - int addrsize; - /* Offset into line number information. */ - off_t lineoff; - /* Offset of compilation unit in .debug_str_offsets. */ - uint64_t str_offsets_base; - /* Offset of compilation unit in .debug_addr. */ - uint64_t addr_base; - /* Offset of compilation unit in .debug_rnglists. */ - uint64_t rnglists_base; - /* Primary source file. */ - const char *filename; - /* Compilation command working directory. */ - const char *comp_dir; - /* Absolute file name, only set if needed. */ - const char *abs_filename; - /* The abbreviations for this unit. */ - struct abbrevs abbrevs; - - /* The fields above this point are read in during initialization and - may be accessed freely. The fields below this point are read in - as needed, and therefore require care, as different threads may - try to initialize them simultaneously. */ - - /* PC to line number mapping. This is NULL if the values have not - been read. This is (struct line *) -1 if there was an error - reading the values. */ - struct line *lines; - /* Number of entries in lines. */ - size_t lines_count; - /* PC ranges to function. */ - struct function_addrs *function_addrs; - size_t function_addrs_count; -}; - -/* An address range for a compilation unit. This maps a PC value to a - specific compilation unit. Note that we invert the representation - in DWARF: instead of listing the units and attaching a list of - ranges, we list the ranges and have each one point to the unit. - This lets us do a binary search to find the unit. */ - -struct unit_addrs -{ - /* Range is LOW <= PC < HIGH. */ - uintptr_t low; - uintptr_t high; - /* Compilation unit for this address range. */ - struct unit *u; -}; - -/* A growable vector of compilation unit address ranges. */ - -struct unit_addrs_vector -{ - /* Memory. This is an array of struct unit_addrs. */ - struct backtrace_vector vec; - /* Number of address ranges present. */ - size_t count; -}; - -/* A growable vector of compilation unit pointer. */ - -struct unit_vector -{ - struct backtrace_vector vec; - size_t count; -}; - -/* The information we need to map a PC to a file and line. */ - -struct dwarf_data -{ - /* The data for the next file we know about. */ - struct dwarf_data *next; - /* The data for .gnu_debugaltlink. */ - struct dwarf_data *altlink; -/* The base address mapping for this file. */ - struct libbacktrace_base_address base_address; - /* A sorted list of address ranges. */ - struct unit_addrs *addrs; - /* Number of address ranges in list. */ - size_t addrs_count; - /* A sorted list of units. */ - struct unit **units; - /* Number of units in the list. */ - size_t units_count; - /* The unparsed DWARF debug data. */ - struct dwarf_sections dwarf_sections; - /* Whether the data is big-endian or not. */ - int is_bigendian; - /* A vector used for function addresses. We keep this here so that - we can grow the vector as we read more functions. */ - struct function_vector fvec; -}; - -/* Report an error for a DWARF buffer. */ - -static void -dwarf_buf_error (struct dwarf_buf *buf, const char *msg, int errnum) -{ - char b[200]; - - snprintf (b, sizeof b, "%s in %s at %d", - msg, buf->name, (int) (buf->buf - buf->start)); - buf->error_callback (buf->data, b, errnum); -} - -/* Require at least COUNT bytes in BUF. Return 1 if all is well, 0 on - error. */ - -static int -require (struct dwarf_buf *buf, size_t count) -{ - if (buf->left >= count) - return 1; - - if (!buf->reported_underflow) - { - dwarf_buf_error (buf, "DWARF underflow", 0); - buf->reported_underflow = 1; - } - - return 0; -} - -/* Advance COUNT bytes in BUF. Return 1 if all is well, 0 on - error. */ - -static int -advance (struct dwarf_buf *buf, size_t count) -{ - if (!require (buf, count)) - return 0; - buf->buf += count; - buf->left -= count; - return 1; -} - -/* Read one zero-terminated string from BUF and advance past the string. */ - -static const char * -read_string (struct dwarf_buf *buf) -{ - const char *p = (const char *)buf->buf; - size_t len = strnlen (p, buf->left); - - /* - If len == left, we ran out of buffer before finding the zero terminator. - Generate an error by advancing len + 1. - - If len < left, advance by len + 1 to skip past the zero terminator. */ - size_t count = len + 1; - - if (!advance (buf, count)) - return NULL; - - return p; -} - -/* Read one byte from BUF and advance 1 byte. */ - -static unsigned char -read_byte (struct dwarf_buf *buf) -{ - const unsigned char *p = buf->buf; - - if (!advance (buf, 1)) - return 0; - return p[0]; -} - -/* Read a signed char from BUF and advance 1 byte. */ - -static signed char -read_sbyte (struct dwarf_buf *buf) -{ - const unsigned char *p = buf->buf; - - if (!advance (buf, 1)) - return 0; - return (*p ^ 0x80) - 0x80; -} - -/* Read a uint16 from BUF and advance 2 bytes. */ - -static uint16_t -read_uint16 (struct dwarf_buf *buf) -{ - const unsigned char *p = buf->buf; - - if (!advance (buf, 2)) - return 0; - if (buf->is_bigendian) - return ((uint16_t) p[0] << 8) | (uint16_t) p[1]; - else - return ((uint16_t) p[1] << 8) | (uint16_t) p[0]; -} - -/* Read a 24 bit value from BUF and advance 3 bytes. */ - -static uint32_t -read_uint24 (struct dwarf_buf *buf) -{ - const unsigned char *p = buf->buf; - - if (!advance (buf, 3)) - return 0; - if (buf->is_bigendian) - return (((uint32_t) p[0] << 16) | ((uint32_t) p[1] << 8) - | (uint32_t) p[2]); - else - return (((uint32_t) p[2] << 16) | ((uint32_t) p[1] << 8) - | (uint32_t) p[0]); -} - -/* Read a uint32 from BUF and advance 4 bytes. */ - -static uint32_t -read_uint32 (struct dwarf_buf *buf) -{ - const unsigned char *p = buf->buf; - - if (!advance (buf, 4)) - return 0; - if (buf->is_bigendian) - return (((uint32_t) p[0] << 24) | ((uint32_t) p[1] << 16) - | ((uint32_t) p[2] << 8) | (uint32_t) p[3]); - else - return (((uint32_t) p[3] << 24) | ((uint32_t) p[2] << 16) - | ((uint32_t) p[1] << 8) | (uint32_t) p[0]); -} - -/* Read a uint64 from BUF and advance 8 bytes. */ - -static uint64_t -read_uint64 (struct dwarf_buf *buf) -{ - const unsigned char *p = buf->buf; - - if (!advance (buf, 8)) - return 0; - if (buf->is_bigendian) - return (((uint64_t) p[0] << 56) | ((uint64_t) p[1] << 48) - | ((uint64_t) p[2] << 40) | ((uint64_t) p[3] << 32) - | ((uint64_t) p[4] << 24) | ((uint64_t) p[5] << 16) - | ((uint64_t) p[6] << 8) | (uint64_t) p[7]); - else - return (((uint64_t) p[7] << 56) | ((uint64_t) p[6] << 48) - | ((uint64_t) p[5] << 40) | ((uint64_t) p[4] << 32) - | ((uint64_t) p[3] << 24) | ((uint64_t) p[2] << 16) - | ((uint64_t) p[1] << 8) | (uint64_t) p[0]); -} - -/* Read an offset from BUF and advance the appropriate number of - bytes. */ - -static uint64_t -read_offset (struct dwarf_buf *buf, int is_dwarf64) -{ - if (is_dwarf64) - return read_uint64 (buf); - else - return read_uint32 (buf); -} - -/* Read an address from BUF and advance the appropriate number of - bytes. */ - -static uint64_t -read_address (struct dwarf_buf *buf, int addrsize) -{ - switch (addrsize) - { - case 1: - return read_byte (buf); - case 2: - return read_uint16 (buf); - case 4: - return read_uint32 (buf); - case 8: - return read_uint64 (buf); - default: - dwarf_buf_error (buf, "unrecognized address size", 0); - return 0; - } -} - -/* Return whether a value is the highest possible address, given the - address size. */ - -static int -is_highest_address (uint64_t address, int addrsize) -{ - switch (addrsize) - { - case 1: - return address == (unsigned char) -1; - case 2: - return address == (uint16_t) -1; - case 4: - return address == (uint32_t) -1; - case 8: - return address == (uint64_t) -1; - default: - return 0; - } -} - -/* Read an unsigned LEB128 number. */ - -static uint64_t -read_uleb128 (struct dwarf_buf *buf) -{ - uint64_t ret; - unsigned int shift; - int overflow; - unsigned char b; - - ret = 0; - shift = 0; - overflow = 0; - do - { - const unsigned char *p; - - p = buf->buf; - if (!advance (buf, 1)) - return 0; - b = *p; - if (shift < 64) - ret |= ((uint64_t) (b & 0x7f)) << shift; - else if (!overflow) - { - dwarf_buf_error (buf, "LEB128 overflows uint64_t", 0); - overflow = 1; - } - shift += 7; - } - while ((b & 0x80) != 0); - - return ret; -} - -/* Read a signed LEB128 number. */ - -static int64_t -read_sleb128 (struct dwarf_buf *buf) -{ - uint64_t val; - unsigned int shift; - int overflow; - unsigned char b; - - val = 0; - shift = 0; - overflow = 0; - do - { - const unsigned char *p; - - p = buf->buf; - if (!advance (buf, 1)) - return 0; - b = *p; - if (shift < 64) - val |= ((uint64_t) (b & 0x7f)) << shift; - else if (!overflow) - { - dwarf_buf_error (buf, "signed LEB128 overflows uint64_t", 0); - overflow = 1; - } - shift += 7; - } - while ((b & 0x80) != 0); - - if ((b & 0x40) != 0 && shift < 64) - val |= ((uint64_t) -1) << shift; - - return (int64_t) val; -} - -/* Return the length of an LEB128 number. */ - -static size_t -leb128_len (const unsigned char *p) -{ - size_t ret; - - ret = 1; - while ((*p & 0x80) != 0) - { - ++p; - ++ret; - } - return ret; -} - -/* Read initial_length from BUF and advance the appropriate number of bytes. */ - -static uint64_t -read_initial_length (struct dwarf_buf *buf, int *is_dwarf64) -{ - uint64_t len; - - len = read_uint32 (buf); - if (len == 0xffffffff) - { - len = read_uint64 (buf); - *is_dwarf64 = 1; - } - else - *is_dwarf64 = 0; - - return len; -} - -/* Free an abbreviations structure. */ - -static void -free_abbrevs (struct backtrace_state *state, struct abbrevs *abbrevs, - backtrace_error_callback error_callback, void *data) -{ - size_t i; - - for (i = 0; i < abbrevs->num_abbrevs; ++i) - backtrace_free (state, abbrevs->abbrevs[i].attrs, - abbrevs->abbrevs[i].num_attrs * sizeof (struct attr), - error_callback, data); - backtrace_free (state, abbrevs->abbrevs, - abbrevs->num_abbrevs * sizeof (struct abbrev), - error_callback, data); - abbrevs->num_abbrevs = 0; - abbrevs->abbrevs = NULL; -} - -/* Read an attribute value. Returns 1 on success, 0 on failure. If - the value can be represented as a uint64_t, sets *VAL and sets - *IS_VALID to 1. We don't try to store the value of other attribute - forms, because we don't care about them. */ - -static int -read_attribute (enum dwarf_form form, uint64_t implicit_val, - struct dwarf_buf *buf, int is_dwarf64, int version, - int addrsize, const struct dwarf_sections *dwarf_sections, - struct dwarf_data *altlink, struct attr_val *val) -{ - /* Avoid warnings about val.u.FIELD may be used uninitialized if - this function is inlined. The warnings aren't valid but can - occur because the different fields are set and used - conditionally. */ - memset (val, 0, sizeof *val); - - switch (form) - { - case DW_FORM_addr: - val->encoding = ATTR_VAL_ADDRESS; - val->u.uint = read_address (buf, addrsize); - return 1; - case DW_FORM_block2: - val->encoding = ATTR_VAL_BLOCK; - return advance (buf, read_uint16 (buf)); - case DW_FORM_block4: - val->encoding = ATTR_VAL_BLOCK; - return advance (buf, read_uint32 (buf)); - case DW_FORM_data2: - val->encoding = ATTR_VAL_UINT; - val->u.uint = read_uint16 (buf); - return 1; - case DW_FORM_data4: - val->encoding = ATTR_VAL_UINT; - val->u.uint = read_uint32 (buf); - return 1; - case DW_FORM_data8: - val->encoding = ATTR_VAL_UINT; - val->u.uint = read_uint64 (buf); - return 1; - case DW_FORM_data16: - val->encoding = ATTR_VAL_BLOCK; - return advance (buf, 16); - case DW_FORM_string: - val->encoding = ATTR_VAL_STRING; - val->u.string = read_string (buf); - return val->u.string == NULL ? 0 : 1; - case DW_FORM_block: - val->encoding = ATTR_VAL_BLOCK; - return advance (buf, read_uleb128 (buf)); - case DW_FORM_block1: - val->encoding = ATTR_VAL_BLOCK; - return advance (buf, read_byte (buf)); - case DW_FORM_data1: - val->encoding = ATTR_VAL_UINT; - val->u.uint = read_byte (buf); - return 1; - case DW_FORM_flag: - val->encoding = ATTR_VAL_UINT; - val->u.uint = read_byte (buf); - return 1; - case DW_FORM_sdata: - val->encoding = ATTR_VAL_SINT; - val->u.sint = read_sleb128 (buf); - return 1; - case DW_FORM_strp: - { - uint64_t offset; - - offset = read_offset (buf, is_dwarf64); - if (offset >= dwarf_sections->size[DEBUG_STR]) - { - dwarf_buf_error (buf, "DW_FORM_strp out of range", 0); - return 0; - } - val->encoding = ATTR_VAL_STRING; - val->u.string = - (const char *) dwarf_sections->data[DEBUG_STR] + offset; - return 1; - } - case DW_FORM_line_strp: - { - uint64_t offset; - - offset = read_offset (buf, is_dwarf64); - if (offset >= dwarf_sections->size[DEBUG_LINE_STR]) - { - dwarf_buf_error (buf, "DW_FORM_line_strp out of range", 0); - return 0; - } - val->encoding = ATTR_VAL_STRING; - val->u.string = - (const char *) dwarf_sections->data[DEBUG_LINE_STR] + offset; - return 1; - } - case DW_FORM_udata: - val->encoding = ATTR_VAL_UINT; - val->u.uint = read_uleb128 (buf); - return 1; - case DW_FORM_ref_addr: - val->encoding = ATTR_VAL_REF_INFO; - if (version == 2) - val->u.uint = read_address (buf, addrsize); - else - val->u.uint = read_offset (buf, is_dwarf64); - return 1; - case DW_FORM_ref1: - val->encoding = ATTR_VAL_REF_UNIT; - val->u.uint = read_byte (buf); - return 1; - case DW_FORM_ref2: - val->encoding = ATTR_VAL_REF_UNIT; - val->u.uint = read_uint16 (buf); - return 1; - case DW_FORM_ref4: - val->encoding = ATTR_VAL_REF_UNIT; - val->u.uint = read_uint32 (buf); - return 1; - case DW_FORM_ref8: - val->encoding = ATTR_VAL_REF_UNIT; - val->u.uint = read_uint64 (buf); - return 1; - case DW_FORM_ref_udata: - val->encoding = ATTR_VAL_REF_UNIT; - val->u.uint = read_uleb128 (buf); - return 1; - case DW_FORM_indirect: - { - uint64_t form; - - form = read_uleb128 (buf); - if (form == DW_FORM_implicit_const) - { - dwarf_buf_error (buf, - "DW_FORM_indirect to DW_FORM_implicit_const", - 0); - return 0; - } - return read_attribute ((enum dwarf_form) form, 0, buf, is_dwarf64, - version, addrsize, dwarf_sections, altlink, - val); - } - case DW_FORM_sec_offset: - val->encoding = ATTR_VAL_REF_SECTION; - val->u.uint = read_offset (buf, is_dwarf64); - return 1; - case DW_FORM_exprloc: - val->encoding = ATTR_VAL_EXPR; - return advance (buf, read_uleb128 (buf)); - case DW_FORM_flag_present: - val->encoding = ATTR_VAL_UINT; - val->u.uint = 1; - return 1; - case DW_FORM_ref_sig8: - val->encoding = ATTR_VAL_REF_TYPE; - val->u.uint = read_uint64 (buf); - return 1; - case DW_FORM_strx: case DW_FORM_strx1: case DW_FORM_strx2: - case DW_FORM_strx3: case DW_FORM_strx4: - { - uint64_t offset; - - switch (form) - { - case DW_FORM_strx: - offset = read_uleb128 (buf); - break; - case DW_FORM_strx1: - offset = read_byte (buf); - break; - case DW_FORM_strx2: - offset = read_uint16 (buf); - break; - case DW_FORM_strx3: - offset = read_uint24 (buf); - break; - case DW_FORM_strx4: - offset = read_uint32 (buf); - break; - default: - /* This case can't happen. */ - return 0; - } - val->encoding = ATTR_VAL_STRING_INDEX; - val->u.uint = offset; - return 1; - } - case DW_FORM_addrx: case DW_FORM_addrx1: case DW_FORM_addrx2: - case DW_FORM_addrx3: case DW_FORM_addrx4: - { - uint64_t offset; - - switch (form) - { - case DW_FORM_addrx: - offset = read_uleb128 (buf); - break; - case DW_FORM_addrx1: - offset = read_byte (buf); - break; - case DW_FORM_addrx2: - offset = read_uint16 (buf); - break; - case DW_FORM_addrx3: - offset = read_uint24 (buf); - break; - case DW_FORM_addrx4: - offset = read_uint32 (buf); - break; - default: - /* This case can't happen. */ - return 0; - } - val->encoding = ATTR_VAL_ADDRESS_INDEX; - val->u.uint = offset; - return 1; - } - case DW_FORM_ref_sup4: - val->encoding = ATTR_VAL_REF_SECTION; - val->u.uint = read_uint32 (buf); - return 1; - case DW_FORM_ref_sup8: - val->encoding = ATTR_VAL_REF_SECTION; - val->u.uint = read_uint64 (buf); - return 1; - case DW_FORM_implicit_const: - val->encoding = ATTR_VAL_UINT; - val->u.uint = implicit_val; - return 1; - case DW_FORM_loclistx: - /* We don't distinguish this from DW_FORM_sec_offset. It - * shouldn't matter since we don't care about loclists. */ - val->encoding = ATTR_VAL_REF_SECTION; - val->u.uint = read_uleb128 (buf); - return 1; - case DW_FORM_rnglistx: - val->encoding = ATTR_VAL_RNGLISTS_INDEX; - val->u.uint = read_uleb128 (buf); - return 1; - case DW_FORM_GNU_addr_index: - val->encoding = ATTR_VAL_REF_SECTION; - val->u.uint = read_uleb128 (buf); - return 1; - case DW_FORM_GNU_str_index: - val->encoding = ATTR_VAL_REF_SECTION; - val->u.uint = read_uleb128 (buf); - return 1; - case DW_FORM_GNU_ref_alt: - val->u.uint = read_offset (buf, is_dwarf64); - if (altlink == NULL) - { - val->encoding = ATTR_VAL_NONE; - return 1; - } - val->encoding = ATTR_VAL_REF_ALT_INFO; - return 1; - case DW_FORM_strp_sup: case DW_FORM_GNU_strp_alt: - { - uint64_t offset; - - offset = read_offset (buf, is_dwarf64); - if (altlink == NULL) - { - val->encoding = ATTR_VAL_NONE; - return 1; - } - if (offset >= altlink->dwarf_sections.size[DEBUG_STR]) - { - dwarf_buf_error (buf, "DW_FORM_strp_sup out of range", 0); - return 0; - } - val->encoding = ATTR_VAL_STRING; - val->u.string = - (const char *) altlink->dwarf_sections.data[DEBUG_STR] + offset; - return 1; - } - default: - dwarf_buf_error (buf, "unrecognized DWARF form", -1); - return 0; - } -} - -/* If we can determine the value of a string attribute, set *STRING to - point to the string. Return 1 on success, 0 on error. If we don't - know the value, we consider that a success, and we don't change - *STRING. An error is only reported for some sort of out of range - offset. */ - -static int -resolve_string (const struct dwarf_sections *dwarf_sections, int is_dwarf64, - int is_bigendian, uint64_t str_offsets_base, - const struct attr_val *val, - backtrace_error_callback error_callback, void *data, - const char **string) -{ - switch (val->encoding) - { - case ATTR_VAL_STRING: - *string = val->u.string; - return 1; - - case ATTR_VAL_STRING_INDEX: - { - uint64_t offset; - struct dwarf_buf offset_buf; - - offset = val->u.uint * (is_dwarf64 ? 8 : 4) + str_offsets_base; - if (offset + (is_dwarf64 ? 8 : 4) - > dwarf_sections->size[DEBUG_STR_OFFSETS]) - { - error_callback (data, "DW_FORM_strx value out of range", 0); - return 0; - } - - offset_buf.name = ".debug_str_offsets"; - offset_buf.start = dwarf_sections->data[DEBUG_STR_OFFSETS]; - offset_buf.buf = dwarf_sections->data[DEBUG_STR_OFFSETS] + offset; - offset_buf.left = dwarf_sections->size[DEBUG_STR_OFFSETS] - offset; - offset_buf.is_bigendian = is_bigendian; - offset_buf.error_callback = error_callback; - offset_buf.data = data; - offset_buf.reported_underflow = 0; - - offset = read_offset (&offset_buf, is_dwarf64); - if (offset >= dwarf_sections->size[DEBUG_STR]) - { - dwarf_buf_error (&offset_buf, - "DW_FORM_strx offset out of range", - 0); - return 0; - } - *string = (const char *) dwarf_sections->data[DEBUG_STR] + offset; - return 1; - } - - default: - return 1; - } -} - -/* Set *ADDRESS to the real address for a ATTR_VAL_ADDRESS_INDEX. - Return 1 on success, 0 on error. */ - -static int -resolve_addr_index (const struct dwarf_sections *dwarf_sections, - uint64_t addr_base, int addrsize, int is_bigendian, - uint64_t addr_index, - backtrace_error_callback error_callback, void *data, - uintptr_t *address) -{ - uint64_t offset; - struct dwarf_buf addr_buf; - - offset = addr_index * addrsize + addr_base; - if (offset + addrsize > dwarf_sections->size[DEBUG_ADDR]) - { - error_callback (data, "DW_FORM_addrx value out of range", 0); - return 0; - } - - addr_buf.name = ".debug_addr"; - addr_buf.start = dwarf_sections->data[DEBUG_ADDR]; - addr_buf.buf = dwarf_sections->data[DEBUG_ADDR] + offset; - addr_buf.left = dwarf_sections->size[DEBUG_ADDR] - offset; - addr_buf.is_bigendian = is_bigendian; - addr_buf.error_callback = error_callback; - addr_buf.data = data; - addr_buf.reported_underflow = 0; - - *address = (uintptr_t) read_address (&addr_buf, addrsize); - return 1; -} - -/* Compare a unit offset against a unit for bsearch. */ - -static int -units_search (const void *vkey, const void *ventry) -{ - const size_t *key = (const size_t *) vkey; - const struct unit *entry = *((const struct unit *const *) ventry); - size_t offset; - - offset = *key; - if (offset < entry->low_offset) - return -1; - else if (offset >= entry->high_offset) - return 1; - else - return 0; -} - -/* Find a unit in PU containing OFFSET. */ - -static struct unit * -find_unit (struct unit **pu, size_t units_count, size_t offset) -{ - struct unit **u; - u = (struct unit**)bsearch (&offset, pu, units_count, sizeof (struct unit *), units_search); - return u == NULL ? NULL : *u; -} - -/* Compare function_addrs for qsort. When ranges are nested, make the - smallest one sort last. */ - -static int -function_addrs_compare (const void *v1, const void *v2) -{ - const struct function_addrs *a1 = (const struct function_addrs *) v1; - const struct function_addrs *a2 = (const struct function_addrs *) v2; - - if (a1->low < a2->low) - return -1; - if (a1->low > a2->low) - return 1; - if (a1->high < a2->high) - return 1; - if (a1->high > a2->high) - return -1; - return strcmp (a1->function->name, a2->function->name); -} - -/* Compare a PC against a function_addrs for bsearch. We always - allocate an entra entry at the end of the vector, so that this - routine can safely look at the next entry. Note that if there are - multiple ranges containing PC, which one will be returned is - unpredictable. We compensate for that in dwarf_fileline. */ - -static int -function_addrs_search (const void *vkey, const void *ventry) -{ - const uintptr_t *key = (const uintptr_t *) vkey; - const struct function_addrs *entry = (const struct function_addrs *) ventry; - uintptr_t pc; - - pc = *key; - if (pc < entry->low) - return -1; - else if (pc > (entry + 1)->low) - return 1; - else - return 0; -} - -/* Add a new compilation unit address range to a vector. This is - called via add_ranges. Returns 1 on success, 0 on failure. */ - -static int -add_unit_addr (struct backtrace_state *state, void *rdata, - uintptr_t lowpc, uintptr_t highpc, - backtrace_error_callback error_callback, void *data, - void *pvec) -{ - struct unit *u = (struct unit *) rdata; - struct unit_addrs_vector *vec = (struct unit_addrs_vector *) pvec; - struct unit_addrs *p; - - /* Try to merge with the last entry. */ - if (vec->count > 0) - { - p = (struct unit_addrs *) vec->vec.base + (vec->count - 1); - if ((lowpc == p->high || lowpc == p->high + 1) - && u == p->u) - { - if (highpc > p->high) - p->high = highpc; - return 1; - } - } - - p = ((struct unit_addrs *) - backtrace_vector_grow (state, sizeof (struct unit_addrs), - error_callback, data, &vec->vec)); - if (p == NULL) - return 0; - - p->low = lowpc; - p->high = highpc; - p->u = u; - - ++vec->count; - - return 1; -} - -/* Compare unit_addrs for qsort. When ranges are nested, make the - smallest one sort last. */ - -static int -unit_addrs_compare (const void *v1, const void *v2) -{ - const struct unit_addrs *a1 = (const struct unit_addrs *) v1; - const struct unit_addrs *a2 = (const struct unit_addrs *) v2; - - if (a1->low < a2->low) - return -1; - if (a1->low > a2->low) - return 1; - if (a1->high < a2->high) - return 1; - if (a1->high > a2->high) - return -1; - if (a1->u->lineoff < a2->u->lineoff) - return -1; - if (a1->u->lineoff > a2->u->lineoff) - return 1; - return 0; -} - -/* Compare a PC against a unit_addrs for bsearch. We always allocate - an entry entry at the end of the vector, so that this routine can - safely look at the next entry. Note that if there are multiple - ranges containing PC, which one will be returned is unpredictable. - We compensate for that in dwarf_fileline. */ - -static int -unit_addrs_search (const void *vkey, const void *ventry) -{ - const uintptr_t *key = (const uintptr_t *) vkey; - const struct unit_addrs *entry = (const struct unit_addrs *) ventry; - uintptr_t pc; - - pc = *key; - if (pc < entry->low) - return -1; - else if (pc > (entry + 1)->low) - return 1; - else - return 0; -} - -/* Sort the line vector by PC. We want a stable sort here to maintain - the order of lines for the same PC values. Since the sequence is - being sorted in place, their addresses cannot be relied on to - maintain stability. That is the purpose of the index member. */ - -static int -line_compare (const void *v1, const void *v2) -{ - const struct line *ln1 = (const struct line *) v1; - const struct line *ln2 = (const struct line *) v2; - - if (ln1->pc < ln2->pc) - return -1; - else if (ln1->pc > ln2->pc) - return 1; - else if (ln1->idx < ln2->idx) - return -1; - else if (ln1->idx > ln2->idx) - return 1; - else - return 0; -} - -/* Find a PC in a line vector. We always allocate an extra entry at - the end of the lines vector, so that this routine can safely look - at the next entry. Note that when there are multiple mappings for - the same PC value, this will return the last one. */ - -static int -line_search (const void *vkey, const void *ventry) -{ - const uintptr_t *key = (const uintptr_t *) vkey; - const struct line *entry = (const struct line *) ventry; - uintptr_t pc; - - pc = *key; - if (pc < entry->pc) - return -1; - else if (pc >= (entry + 1)->pc) - return 1; - else - return 0; -} - -/* Sort the abbrevs by the abbrev code. This function is passed to - both qsort and bsearch. */ - -static int -abbrev_compare (const void *v1, const void *v2) -{ - const struct abbrev *a1 = (const struct abbrev *) v1; - const struct abbrev *a2 = (const struct abbrev *) v2; - - if (a1->code < a2->code) - return -1; - else if (a1->code > a2->code) - return 1; - else - { - /* This really shouldn't happen. It means there are two - different abbrevs with the same code, and that means we don't - know which one lookup_abbrev should return. */ - return 0; - } -} - -/* Read the abbreviation table for a compilation unit. Returns 1 on - success, 0 on failure. */ - -static int -read_abbrevs (struct backtrace_state *state, uint64_t abbrev_offset, - const unsigned char *dwarf_abbrev, size_t dwarf_abbrev_size, - int is_bigendian, backtrace_error_callback error_callback, - void *data, struct abbrevs *abbrevs) -{ - struct dwarf_buf abbrev_buf; - struct dwarf_buf count_buf; - size_t num_abbrevs; - - abbrevs->num_abbrevs = 0; - abbrevs->abbrevs = NULL; - - if (abbrev_offset >= dwarf_abbrev_size) - { - error_callback (data, "abbrev offset out of range", 0); - return 0; - } - - abbrev_buf.name = ".debug_abbrev"; - abbrev_buf.start = dwarf_abbrev; - abbrev_buf.buf = dwarf_abbrev + abbrev_offset; - abbrev_buf.left = dwarf_abbrev_size - abbrev_offset; - abbrev_buf.is_bigendian = is_bigendian; - abbrev_buf.error_callback = error_callback; - abbrev_buf.data = data; - abbrev_buf.reported_underflow = 0; - - /* Count the number of abbrevs in this list. */ - - count_buf = abbrev_buf; - num_abbrevs = 0; - while (read_uleb128 (&count_buf) != 0) - { - if (count_buf.reported_underflow) - return 0; - ++num_abbrevs; - // Skip tag. - read_uleb128 (&count_buf); - // Skip has_children. - read_byte (&count_buf); - // Skip attributes. - while (read_uleb128 (&count_buf) != 0) - { - uint64_t form; - - form = read_uleb128 (&count_buf); - if ((enum dwarf_form) form == DW_FORM_implicit_const) - read_sleb128 (&count_buf); - } - // Skip form of last attribute. - read_uleb128 (&count_buf); - } - - if (count_buf.reported_underflow) - return 0; - - if (num_abbrevs == 0) - return 1; - - abbrevs->abbrevs = ((struct abbrev *) - backtrace_alloc (state, - num_abbrevs * sizeof (struct abbrev), - error_callback, data)); - if (abbrevs->abbrevs == NULL) - return 0; - abbrevs->num_abbrevs = num_abbrevs; - memset (abbrevs->abbrevs, 0, num_abbrevs * sizeof (struct abbrev)); - - num_abbrevs = 0; - while (1) - { - uint64_t code; - struct abbrev a; - size_t num_attrs; - struct attr *attrs; - - if (abbrev_buf.reported_underflow) - goto fail; - - code = read_uleb128 (&abbrev_buf); - if (code == 0) - break; - - a.code = code; - a.tag = (enum dwarf_tag) read_uleb128 (&abbrev_buf); - a.has_children = read_byte (&abbrev_buf); - - count_buf = abbrev_buf; - num_attrs = 0; - while (read_uleb128 (&count_buf) != 0) - { - uint64_t form; - - ++num_attrs; - form = read_uleb128 (&count_buf); - if ((enum dwarf_form) form == DW_FORM_implicit_const) - read_sleb128 (&count_buf); - } - - if (num_attrs == 0) - { - attrs = NULL; - read_uleb128 (&abbrev_buf); - read_uleb128 (&abbrev_buf); - } - else - { - attrs = ((struct attr *) - backtrace_alloc (state, num_attrs * sizeof *attrs, - error_callback, data)); - if (attrs == NULL) - goto fail; - num_attrs = 0; - while (1) - { - uint64_t name; - uint64_t form; - - name = read_uleb128 (&abbrev_buf); - form = read_uleb128 (&abbrev_buf); - if (name == 0) - break; - attrs[num_attrs].name = (enum dwarf_attribute) name; - attrs[num_attrs].form = (enum dwarf_form) form; - if ((enum dwarf_form) form == DW_FORM_implicit_const) - attrs[num_attrs].val = read_sleb128 (&abbrev_buf); - else - attrs[num_attrs].val = 0; - ++num_attrs; - } - } - - a.num_attrs = num_attrs; - a.attrs = attrs; - - abbrevs->abbrevs[num_abbrevs] = a; - ++num_abbrevs; - } - - backtrace_qsort (abbrevs->abbrevs, abbrevs->num_abbrevs, - sizeof (struct abbrev), abbrev_compare); - - return 1; - - fail: - free_abbrevs (state, abbrevs, error_callback, data); - return 0; -} - -/* Return the abbrev information for an abbrev code. */ - -static const struct abbrev * -lookup_abbrev (struct abbrevs *abbrevs, uint64_t code, - backtrace_error_callback error_callback, void *data) -{ - struct abbrev key; - void *p; - - /* With GCC, where abbrevs are simply numbered in order, we should - be able to just look up the entry. */ - if (code - 1 < abbrevs->num_abbrevs - && abbrevs->abbrevs[code - 1].code == code) - return &abbrevs->abbrevs[code - 1]; - - /* Otherwise we have to search. */ - memset (&key, 0, sizeof key); - key.code = code; - p = bsearch (&key, abbrevs->abbrevs, abbrevs->num_abbrevs, - sizeof (struct abbrev), abbrev_compare); - if (p == NULL) - { - error_callback (data, "invalid abbreviation code", 0); - return NULL; - } - return (const struct abbrev *) p; -} - -/* This struct is used to gather address range information while - reading attributes. We use this while building a mapping from - address ranges to compilation units and then again while mapping - from address ranges to function entries. Normally either - lowpc/highpc is set or ranges is set. */ - -struct pcrange { - uintptr_t lowpc; /* The low PC value. */ - int have_lowpc; /* Whether a low PC value was found. */ - int lowpc_is_addr_index; /* Whether lowpc is in .debug_addr. */ - uintptr_t highpc; /* The high PC value. */ - int have_highpc; /* Whether a high PC value was found. */ - int highpc_is_relative; /* Whether highpc is relative to lowpc. */ - int highpc_is_addr_index; /* Whether highpc is in .debug_addr. */ - uint64_t ranges; /* Offset in ranges section. */ - int have_ranges; /* Whether ranges is valid. */ - int ranges_is_index; /* Whether ranges is DW_FORM_rnglistx. */ -}; - -/* Update PCRANGE from an attribute value. */ - -static void -update_pcrange (const struct attr* attr, const struct attr_val* val, - struct pcrange *pcrange) -{ - switch (attr->name) - { - case DW_AT_low_pc: - if (val->encoding == ATTR_VAL_ADDRESS) - { - pcrange->lowpc = (uintptr_t) val->u.uint; - pcrange->have_lowpc = 1; - } - else if (val->encoding == ATTR_VAL_ADDRESS_INDEX) - { - pcrange->lowpc = (uintptr_t) val->u.uint; - pcrange->have_lowpc = 1; - pcrange->lowpc_is_addr_index = 1; - } - break; - - case DW_AT_high_pc: - if (val->encoding == ATTR_VAL_ADDRESS) - { - pcrange->highpc = (uintptr_t) val->u.uint; - pcrange->have_highpc = 1; - } - else if (val->encoding == ATTR_VAL_UINT) - { - pcrange->highpc = (uintptr_t) val->u.uint; - pcrange->have_highpc = 1; - pcrange->highpc_is_relative = 1; - } - else if (val->encoding == ATTR_VAL_ADDRESS_INDEX) - { - pcrange->highpc = (uintptr_t) val->u.uint; - pcrange->have_highpc = 1; - pcrange->highpc_is_addr_index = 1; - } - break; - - case DW_AT_ranges: - if (val->encoding == ATTR_VAL_UINT - || val->encoding == ATTR_VAL_REF_SECTION) - { - pcrange->ranges = val->u.uint; - pcrange->have_ranges = 1; - } - else if (val->encoding == ATTR_VAL_RNGLISTS_INDEX) - { - pcrange->ranges = val->u.uint; - pcrange->have_ranges = 1; - pcrange->ranges_is_index = 1; - } - break; - - default: - break; - } -} - -/* Call ADD_RANGE for a low/high PC pair. Returns 1 on success, 0 on - error. */ - -static int -add_low_high_range (struct backtrace_state *state, - const struct dwarf_sections *dwarf_sections, - struct libbacktrace_base_address base_address, - int is_bigendian, struct unit *u, - const struct pcrange *pcrange, - int (*add_range) (struct backtrace_state *state, - void *rdata, uintptr_t lowpc, - uintptr_t highpc, - backtrace_error_callback error_callback, - void *data, void *vec), - void *rdata, - backtrace_error_callback error_callback, void *data, - void *vec) -{ - uintptr_t lowpc; - uintptr_t highpc; - - lowpc = pcrange->lowpc; - if (pcrange->lowpc_is_addr_index) - { - if (!resolve_addr_index (dwarf_sections, u->addr_base, u->addrsize, - is_bigendian, lowpc, error_callback, data, - &lowpc)) - return 0; - } - - highpc = pcrange->highpc; - if (pcrange->highpc_is_addr_index) - { - if (!resolve_addr_index (dwarf_sections, u->addr_base, u->addrsize, - is_bigendian, highpc, error_callback, data, - &highpc)) - return 0; - } - if (pcrange->highpc_is_relative) - highpc += lowpc; - - /* Add in the base address of the module when recording PC values, - so that we can look up the PC directly. */ - lowpc = libbacktrace_add_base (lowpc, base_address); - highpc = libbacktrace_add_base (highpc, base_address); - - return add_range (state, rdata, lowpc, highpc, error_callback, data, vec); -} - -/* Call ADD_RANGE for each range read from .debug_ranges, as used in - DWARF versions 2 through 4. */ - -static int -add_ranges_from_ranges ( - struct backtrace_state *state, - const struct dwarf_sections *dwarf_sections, - struct libbacktrace_base_address base_address, int is_bigendian, - struct unit *u, uintptr_t base, - const struct pcrange *pcrange, - int (*add_range) (struct backtrace_state *state, void *rdata, - uintptr_t lowpc, uintptr_t highpc, - backtrace_error_callback error_callback, void *data, - void *vec), - void *rdata, - backtrace_error_callback error_callback, void *data, - void *vec) -{ - struct dwarf_buf ranges_buf; - - if (pcrange->ranges >= dwarf_sections->size[DEBUG_RANGES]) - { - error_callback (data, "ranges offset out of range", 0); - return 0; - } - - ranges_buf.name = ".debug_ranges"; - ranges_buf.start = dwarf_sections->data[DEBUG_RANGES]; - ranges_buf.buf = dwarf_sections->data[DEBUG_RANGES] + pcrange->ranges; - ranges_buf.left = dwarf_sections->size[DEBUG_RANGES] - pcrange->ranges; - ranges_buf.is_bigendian = is_bigendian; - ranges_buf.error_callback = error_callback; - ranges_buf.data = data; - ranges_buf.reported_underflow = 0; - - while (1) - { - uint64_t low; - uint64_t high; - - if (ranges_buf.reported_underflow) - return 0; - - low = read_address (&ranges_buf, u->addrsize); - high = read_address (&ranges_buf, u->addrsize); - - if (low == 0 && high == 0) - break; - - if (is_highest_address (low, u->addrsize)) - base = (uintptr_t) high; - else - { - uintptr_t rl, rh; - - rl = libbacktrace_add_base ((uintptr_t) low + base, base_address); - rh = libbacktrace_add_base ((uintptr_t) high + base, base_address); - if (!add_range (state, rdata, rl, rh, error_callback, data, vec)) - return 0; - } - } - - if (ranges_buf.reported_underflow) - return 0; - - return 1; -} - -/* Call ADD_RANGE for each range read from .debug_rnglists, as used in - DWARF version 5. */ - -static int -add_ranges_from_rnglists ( - struct backtrace_state *state, - const struct dwarf_sections *dwarf_sections, - struct libbacktrace_base_address base_address, int is_bigendian, - struct unit *u, uintptr_t base, - const struct pcrange *pcrange, - int (*add_range) (struct backtrace_state *state, void *rdata, - uintptr_t lowpc, uintptr_t highpc, - backtrace_error_callback error_callback, void *data, - void *vec), - void *rdata, - backtrace_error_callback error_callback, void *data, - void *vec) -{ - uint64_t offset; - struct dwarf_buf rnglists_buf; - - if (!pcrange->ranges_is_index) - offset = pcrange->ranges; - else - offset = u->rnglists_base + pcrange->ranges * (u->is_dwarf64 ? 8 : 4); - if (offset >= dwarf_sections->size[DEBUG_RNGLISTS]) - { - error_callback (data, "rnglists offset out of range", 0); - return 0; - } - - rnglists_buf.name = ".debug_rnglists"; - rnglists_buf.start = dwarf_sections->data[DEBUG_RNGLISTS]; - rnglists_buf.buf = dwarf_sections->data[DEBUG_RNGLISTS] + offset; - rnglists_buf.left = dwarf_sections->size[DEBUG_RNGLISTS] - offset; - rnglists_buf.is_bigendian = is_bigendian; - rnglists_buf.error_callback = error_callback; - rnglists_buf.data = data; - rnglists_buf.reported_underflow = 0; - - if (pcrange->ranges_is_index) - { - offset = read_offset (&rnglists_buf, u->is_dwarf64); - offset += u->rnglists_base; - if (offset >= dwarf_sections->size[DEBUG_RNGLISTS]) - { - error_callback (data, "rnglists index offset out of range", 0); - return 0; - } - rnglists_buf.buf = dwarf_sections->data[DEBUG_RNGLISTS] + offset; - rnglists_buf.left = dwarf_sections->size[DEBUG_RNGLISTS] - offset; - } - - while (1) - { - unsigned char rle; - - rle = read_byte (&rnglists_buf); - if (rle == DW_RLE_end_of_list) - break; - switch (rle) - { - case DW_RLE_base_addressx: - { - uint64_t index; - - index = read_uleb128 (&rnglists_buf); - if (!resolve_addr_index (dwarf_sections, u->addr_base, - u->addrsize, is_bigendian, index, - error_callback, data, &base)) - return 0; - } - break; - - case DW_RLE_startx_endx: - { - uint64_t index; - uintptr_t low; - uintptr_t high; - - index = read_uleb128 (&rnglists_buf); - if (!resolve_addr_index (dwarf_sections, u->addr_base, - u->addrsize, is_bigendian, index, - error_callback, data, &low)) - return 0; - index = read_uleb128 (&rnglists_buf); - if (!resolve_addr_index (dwarf_sections, u->addr_base, - u->addrsize, is_bigendian, index, - error_callback, data, &high)) - return 0; - if (!add_range (state, rdata, - libbacktrace_add_base (low, base_address), - libbacktrace_add_base (high, base_address), - error_callback, data, vec)) - return 0; - } - break; - - case DW_RLE_startx_length: - { - uint64_t index; - uintptr_t low; - uintptr_t length; - - index = read_uleb128 (&rnglists_buf); - if (!resolve_addr_index (dwarf_sections, u->addr_base, - u->addrsize, is_bigendian, index, - error_callback, data, &low)) - return 0; - length = read_uleb128 (&rnglists_buf); - low = libbacktrace_add_base (low, base_address); - if (!add_range (state, rdata, low, low + length, - error_callback, data, vec)) - return 0; - } - break; - - case DW_RLE_offset_pair: - { - uint64_t low; - uint64_t high; - - low = read_uleb128 (&rnglists_buf); - high = read_uleb128 (&rnglists_buf); - if (!add_range (state, rdata, - libbacktrace_add_base (low + base, base_address), - libbacktrace_add_base (high + base, base_address), - error_callback, data, vec)) - return 0; - } - break; - - case DW_RLE_base_address: - base = (uintptr_t) read_address (&rnglists_buf, u->addrsize); - break; - - case DW_RLE_start_end: - { - uintptr_t low; - uintptr_t high; - - low = (uintptr_t) read_address (&rnglists_buf, u->addrsize); - high = (uintptr_t) read_address (&rnglists_buf, u->addrsize); - if (!add_range (state, rdata, - libbacktrace_add_base (low, base_address), - libbacktrace_add_base (high, base_address), - error_callback, data, vec)) - return 0; - } - break; - - case DW_RLE_start_length: - { - uintptr_t low; - uintptr_t length; - - low = (uintptr_t) read_address (&rnglists_buf, u->addrsize); - length = (uintptr_t) read_uleb128 (&rnglists_buf); - low = libbacktrace_add_base (low, base_address); - if (!add_range (state, rdata, low, low + length, - error_callback, data, vec)) - return 0; - } - break; - - default: - dwarf_buf_error (&rnglists_buf, "unrecognized DW_RLE value", -1); - return 0; - } - } - - if (rnglists_buf.reported_underflow) - return 0; - - return 1; -} - -/* Call ADD_RANGE for each lowpc/highpc pair in PCRANGE. RDATA is - passed to ADD_RANGE, and is either a struct unit * or a struct - function *. VEC is the vector we are adding ranges to, and is - either a struct unit_addrs_vector * or a struct function_vector *. - Returns 1 on success, 0 on error. */ - -static int -add_ranges (struct backtrace_state *state, - const struct dwarf_sections *dwarf_sections, - struct libbacktrace_base_address base_address, int is_bigendian, - struct unit *u, uintptr_t base, const struct pcrange *pcrange, - int (*add_range) (struct backtrace_state *state, void *rdata, - uintptr_t lowpc, uintptr_t highpc, - backtrace_error_callback error_callback, - void *data, void *vec), - void *rdata, - backtrace_error_callback error_callback, void *data, - void *vec) -{ - if (pcrange->have_lowpc && pcrange->have_highpc) - return add_low_high_range (state, dwarf_sections, base_address, - is_bigendian, u, pcrange, add_range, rdata, - error_callback, data, vec); - - if (!pcrange->have_ranges) - { - /* Did not find any address ranges to add. */ - return 1; - } - - if (u->version < 5) - return add_ranges_from_ranges (state, dwarf_sections, base_address, - is_bigendian, u, base, pcrange, add_range, - rdata, error_callback, data, vec); - else - return add_ranges_from_rnglists (state, dwarf_sections, base_address, - is_bigendian, u, base, pcrange, add_range, - rdata, error_callback, data, vec); -} - -/* Find the address range covered by a compilation unit, reading from - UNIT_BUF and adding values to U. Returns 1 if all data could be - read, 0 if there is some error. */ - -static int -find_address_ranges (struct backtrace_state *state, - struct libbacktrace_base_address base_address, - struct dwarf_buf *unit_buf, - const struct dwarf_sections *dwarf_sections, - int is_bigendian, struct dwarf_data *altlink, - backtrace_error_callback error_callback, void *data, - struct unit *u, struct unit_addrs_vector *addrs, - enum dwarf_tag *unit_tag) -{ - while (unit_buf->left > 0) - { - uint64_t code; - const struct abbrev *abbrev; - struct pcrange pcrange; - struct attr_val name_val; - int have_name_val; - struct attr_val comp_dir_val; - int have_comp_dir_val; - size_t i; - - code = read_uleb128 (unit_buf); - if (code == 0) - return 1; - - abbrev = lookup_abbrev (&u->abbrevs, code, error_callback, data); - if (abbrev == NULL) - return 0; - - if (unit_tag != NULL) - *unit_tag = abbrev->tag; - - memset (&pcrange, 0, sizeof pcrange); - memset (&name_val, 0, sizeof name_val); - have_name_val = 0; - memset (&comp_dir_val, 0, sizeof comp_dir_val); - have_comp_dir_val = 0; - for (i = 0; i < abbrev->num_attrs; ++i) - { - struct attr_val val; - - if (!read_attribute (abbrev->attrs[i].form, abbrev->attrs[i].val, - unit_buf, u->is_dwarf64, u->version, - u->addrsize, dwarf_sections, altlink, &val)) - return 0; - - switch (abbrev->attrs[i].name) - { - case DW_AT_low_pc: case DW_AT_high_pc: case DW_AT_ranges: - update_pcrange (&abbrev->attrs[i], &val, &pcrange); - break; - - case DW_AT_stmt_list: - if ((abbrev->tag == DW_TAG_compile_unit - || abbrev->tag == DW_TAG_skeleton_unit) - && (val.encoding == ATTR_VAL_UINT - || val.encoding == ATTR_VAL_REF_SECTION)) - u->lineoff = val.u.uint; - break; - - case DW_AT_name: - if (abbrev->tag == DW_TAG_compile_unit - || abbrev->tag == DW_TAG_skeleton_unit) - { - name_val = val; - have_name_val = 1; - } - break; - - case DW_AT_comp_dir: - if (abbrev->tag == DW_TAG_compile_unit - || abbrev->tag == DW_TAG_skeleton_unit) - { - comp_dir_val = val; - have_comp_dir_val = 1; - } - break; - - case DW_AT_str_offsets_base: - if ((abbrev->tag == DW_TAG_compile_unit - || abbrev->tag == DW_TAG_skeleton_unit) - && val.encoding == ATTR_VAL_REF_SECTION) - u->str_offsets_base = val.u.uint; - break; - - case DW_AT_addr_base: - if ((abbrev->tag == DW_TAG_compile_unit - || abbrev->tag == DW_TAG_skeleton_unit) - && val.encoding == ATTR_VAL_REF_SECTION) - u->addr_base = val.u.uint; - break; - - case DW_AT_rnglists_base: - if ((abbrev->tag == DW_TAG_compile_unit - || abbrev->tag == DW_TAG_skeleton_unit) - && val.encoding == ATTR_VAL_REF_SECTION) - u->rnglists_base = val.u.uint; - break; - - default: - break; - } - } - - // Resolve strings after we're sure that we have seen - // DW_AT_str_offsets_base. - if (have_name_val) - { - if (!resolve_string (dwarf_sections, u->is_dwarf64, is_bigendian, - u->str_offsets_base, &name_val, - error_callback, data, &u->filename)) - return 0; - } - if (have_comp_dir_val) - { - if (!resolve_string (dwarf_sections, u->is_dwarf64, is_bigendian, - u->str_offsets_base, &comp_dir_val, - error_callback, data, &u->comp_dir)) - return 0; - } - - if (abbrev->tag == DW_TAG_compile_unit - || abbrev->tag == DW_TAG_subprogram - || abbrev->tag == DW_TAG_skeleton_unit) - { - if (!add_ranges (state, dwarf_sections, base_address, - is_bigendian, u, pcrange.lowpc, &pcrange, - add_unit_addr, (void *) u, error_callback, data, - (void *) addrs)) - return 0; - - /* If we found the PC range in the DW_TAG_compile_unit or - DW_TAG_skeleton_unit, we can stop now. */ - if ((abbrev->tag == DW_TAG_compile_unit - || abbrev->tag == DW_TAG_skeleton_unit) - && (pcrange.have_ranges - || (pcrange.have_lowpc && pcrange.have_highpc))) - return 1; - } - - if (abbrev->has_children) - { - if (!find_address_ranges (state, base_address, unit_buf, - dwarf_sections, is_bigendian, altlink, - error_callback, data, u, addrs, NULL)) - return 0; - } - } - - return 1; -} - -/* Build a mapping from address ranges to the compilation units where - the line number information for that range can be found. Returns 1 - on success, 0 on failure. */ - -static int -build_address_map (struct backtrace_state *state, - struct libbacktrace_base_address base_address, - const struct dwarf_sections *dwarf_sections, - int is_bigendian, struct dwarf_data *altlink, - backtrace_error_callback error_callback, void *data, - struct unit_addrs_vector *addrs, - struct unit_vector *unit_vec) -{ - struct dwarf_buf info; - struct backtrace_vector units; - size_t units_count; - size_t i; - struct unit **pu; - size_t unit_offset = 0; - struct unit_addrs *pa; - - memset (&addrs->vec, 0, sizeof addrs->vec); - memset (&unit_vec->vec, 0, sizeof unit_vec->vec); - addrs->count = 0; - unit_vec->count = 0; - - /* Read through the .debug_info section. FIXME: Should we use the - .debug_aranges section? gdb and addr2line don't use it, but I'm - not sure why. */ - - info.name = ".debug_info"; - info.start = dwarf_sections->data[DEBUG_INFO]; - info.buf = info.start; - info.left = dwarf_sections->size[DEBUG_INFO]; - info.is_bigendian = is_bigendian; - info.error_callback = error_callback; - info.data = data; - info.reported_underflow = 0; - - memset (&units, 0, sizeof units); - units_count = 0; - - while (info.left > 0) - { - const unsigned char *unit_data_start; - uint64_t len; - int is_dwarf64; - struct dwarf_buf unit_buf; - int version; - int unit_type; - uint64_t abbrev_offset; - int addrsize; - struct unit *u; - enum dwarf_tag unit_tag; - - if (info.reported_underflow) - goto fail; - - unit_data_start = info.buf; - - len = read_initial_length (&info, &is_dwarf64); - unit_buf = info; - unit_buf.left = len; - - if (!advance (&info, len)) - goto fail; - - version = read_uint16 (&unit_buf); - if (version < 2 || version > 5) - { - dwarf_buf_error (&unit_buf, "unrecognized DWARF version", -1); - goto fail; - } - - if (version < 5) - unit_type = 0; - else - { - unit_type = read_byte (&unit_buf); - if (unit_type == DW_UT_type || unit_type == DW_UT_split_type) - { - /* This unit doesn't have anything we need. */ - continue; - } - } - - pu = ((struct unit **) - backtrace_vector_grow (state, sizeof (struct unit *), - error_callback, data, &units)); - if (pu == NULL) - goto fail; - - u = ((struct unit *) - backtrace_alloc (state, sizeof *u, error_callback, data)); - if (u == NULL) - goto fail; - - *pu = u; - ++units_count; - - if (version < 5) - addrsize = 0; /* Set below. */ - else - addrsize = read_byte (&unit_buf); - - memset (&u->abbrevs, 0, sizeof u->abbrevs); - abbrev_offset = read_offset (&unit_buf, is_dwarf64); - if (!read_abbrevs (state, abbrev_offset, - dwarf_sections->data[DEBUG_ABBREV], - dwarf_sections->size[DEBUG_ABBREV], - is_bigendian, error_callback, data, &u->abbrevs)) - goto fail; - - if (version < 5) - addrsize = read_byte (&unit_buf); - - switch (unit_type) - { - case 0: - break; - case DW_UT_compile: case DW_UT_partial: - break; - case DW_UT_skeleton: case DW_UT_split_compile: - read_uint64 (&unit_buf); /* dwo_id */ - break; - default: - break; - } - - u->low_offset = unit_offset; - unit_offset += len + (is_dwarf64 ? 12 : 4); - u->high_offset = unit_offset; - u->unit_data = unit_buf.buf; - u->unit_data_len = unit_buf.left; - u->unit_data_offset = unit_buf.buf - unit_data_start; - u->version = version; - u->is_dwarf64 = is_dwarf64; - u->addrsize = addrsize; - u->filename = NULL; - u->comp_dir = NULL; - u->abs_filename = NULL; - u->lineoff = 0; - u->str_offsets_base = 0; - u->addr_base = 0; - u->rnglists_base = 0; - - /* The actual line number mappings will be read as needed. */ - u->lines = NULL; - u->lines_count = 0; - u->function_addrs = NULL; - u->function_addrs_count = 0; - - if (!find_address_ranges (state, base_address, &unit_buf, dwarf_sections, - is_bigendian, altlink, error_callback, data, - u, addrs, &unit_tag)) - goto fail; - - if (unit_buf.reported_underflow) - goto fail; - } - if (info.reported_underflow) - goto fail; - - /* Add a trailing addrs entry, but don't include it in addrs->count. */ - pa = ((struct unit_addrs *) - backtrace_vector_grow (state, sizeof (struct unit_addrs), - error_callback, data, &addrs->vec)); - if (pa == NULL) - goto fail; - pa->low = 0; - --pa->low; - pa->high = pa->low; - pa->u = NULL; - - unit_vec->vec = units; - unit_vec->count = units_count; - return 1; - - fail: - if (units_count > 0) - { - pu = (struct unit **) units.base; - for (i = 0; i < units_count; i++) - { - free_abbrevs (state, &pu[i]->abbrevs, error_callback, data); - backtrace_free (state, pu[i], sizeof **pu, error_callback, data); - } - backtrace_vector_free (state, &units, error_callback, data); - } - if (addrs->count > 0) - { - backtrace_vector_free (state, &addrs->vec, error_callback, data); - addrs->count = 0; - } - return 0; -} - -/* Add a new mapping to the vector of line mappings that we are - building. Returns 1 on success, 0 on failure. */ - -static int -add_line (struct backtrace_state *state, struct dwarf_data *ddata, - uintptr_t pc, const char *filename, int lineno, - backtrace_error_callback error_callback, void *data, - struct line_vector *vec) -{ - struct line *ln; - - /* If we are adding the same mapping, ignore it. This can happen - when using discriminators. */ - if (vec->count > 0) - { - ln = (struct line *) vec->vec.base + (vec->count - 1); - if (pc == ln->pc && filename == ln->filename && lineno == ln->lineno) - return 1; - } - - ln = ((struct line *) - backtrace_vector_grow (state, sizeof (struct line), error_callback, - data, &vec->vec)); - if (ln == NULL) - return 0; - - /* Add in the base address here, so that we can look up the PC - directly. */ - ln->pc = libbacktrace_add_base (pc, ddata->base_address); - - ln->filename = filename; - ln->lineno = lineno; - ln->idx = vec->count; - - ++vec->count; - - return 1; -} - -/* Free the line header information. */ - -static void -free_line_header (struct backtrace_state *state, struct line_header *hdr, - backtrace_error_callback error_callback, void *data) -{ - if (hdr->dirs_count != 0) - backtrace_free (state, hdr->dirs, hdr->dirs_count * sizeof (const char *), - error_callback, data); - backtrace_free (state, hdr->filenames, - hdr->filenames_count * sizeof (char *), - error_callback, data); -} - -/* Read the directories and file names for a line header for version - 2, setting fields in HDR. Return 1 on success, 0 on failure. */ - -static int -read_v2_paths (struct backtrace_state *state, struct unit *u, - struct dwarf_buf *hdr_buf, struct line_header *hdr) -{ - const unsigned char *p; - const unsigned char *pend; - size_t i; - - /* Count the number of directory entries. */ - hdr->dirs_count = 0; - p = hdr_buf->buf; - pend = p + hdr_buf->left; - while (p < pend && *p != '\0') - { - p += strnlen((const char *) p, pend - p) + 1; - ++hdr->dirs_count; - } - - /* The index of the first entry in the list of directories is 1. Index 0 is - used for the current directory of the compilation. To simplify index - handling, we set entry 0 to the compilation unit directory. */ - ++hdr->dirs_count; - hdr->dirs = ((const char **) - backtrace_alloc (state, - hdr->dirs_count * sizeof (const char *), - hdr_buf->error_callback, - hdr_buf->data)); - if (hdr->dirs == NULL) - return 0; - - hdr->dirs[0] = u->comp_dir; - i = 1; - while (*hdr_buf->buf != '\0') - { - if (hdr_buf->reported_underflow) - return 0; - - hdr->dirs[i] = read_string (hdr_buf); - if (hdr->dirs[i] == NULL) - return 0; - ++i; - } - if (!advance (hdr_buf, 1)) - return 0; - - /* Count the number of file entries. */ - hdr->filenames_count = 0; - p = hdr_buf->buf; - pend = p + hdr_buf->left; - while (p < pend && *p != '\0') - { - p += strnlen ((const char *) p, pend - p) + 1; - p += leb128_len (p); - p += leb128_len (p); - p += leb128_len (p); - ++hdr->filenames_count; - } - - /* The index of the first entry in the list of file names is 1. Index 0 is - used for the DW_AT_name of the compilation unit. To simplify index - handling, we set entry 0 to the compilation unit file name. */ - ++hdr->filenames_count; - hdr->filenames = ((const char **) - backtrace_alloc (state, - hdr->filenames_count * sizeof (char *), - hdr_buf->error_callback, - hdr_buf->data)); - if (hdr->filenames == NULL) - return 0; - hdr->filenames[0] = u->filename; - i = 1; - while (*hdr_buf->buf != '\0') - { - const char *filename; - uint64_t dir_index; - - if (hdr_buf->reported_underflow) - return 0; - - filename = read_string (hdr_buf); - if (filename == NULL) - return 0; - dir_index = read_uleb128 (hdr_buf); - if (IS_ABSOLUTE_PATH (filename) - || (dir_index < hdr->dirs_count && hdr->dirs[dir_index] == NULL)) - hdr->filenames[i] = filename; - else - { - const char *dir; - size_t dir_len; - size_t filename_len; - char *s; - - if (dir_index < hdr->dirs_count) - dir = hdr->dirs[dir_index]; - else - { - dwarf_buf_error (hdr_buf, - ("invalid directory index in " - "line number program header"), - 0); - return 0; - } - dir_len = strlen (dir); - filename_len = strlen (filename); - s = ((char *) backtrace_alloc (state, dir_len + filename_len + 2, - hdr_buf->error_callback, - hdr_buf->data)); - if (s == NULL) - return 0; - memcpy (s, dir, dir_len); - /* FIXME: If we are on a DOS-based file system, and the - directory or the file name use backslashes, then we - should use a backslash here. */ - s[dir_len] = '/'; - memcpy (s + dir_len + 1, filename, filename_len + 1); - hdr->filenames[i] = s; - } - - /* Ignore the modification time and size. */ - read_uleb128 (hdr_buf); - read_uleb128 (hdr_buf); - - ++i; - } - - return 1; -} - -/* Read a single version 5 LNCT entry for a directory or file name in a - line header. Sets *STRING to the resulting name, ignoring other - data. Return 1 on success, 0 on failure. */ - -static int -read_lnct (struct backtrace_state *state, struct dwarf_data *ddata, - struct unit *u, struct dwarf_buf *hdr_buf, - const struct line_header *hdr, size_t formats_count, - const struct line_header_format *formats, const char **string) -{ - size_t i; - const char *dir; - const char *path; - - dir = NULL; - path = NULL; - for (i = 0; i < formats_count; i++) - { - struct attr_val val; - - if (!read_attribute (formats[i].form, 0, hdr_buf, u->is_dwarf64, - u->version, hdr->addrsize, &ddata->dwarf_sections, - ddata->altlink, &val)) - return 0; - switch (formats[i].lnct) - { - case DW_LNCT_path: - if (!resolve_string (&ddata->dwarf_sections, u->is_dwarf64, - ddata->is_bigendian, u->str_offsets_base, - &val, hdr_buf->error_callback, hdr_buf->data, - &path)) - return 0; - break; - case DW_LNCT_directory_index: - if (val.encoding == ATTR_VAL_UINT) - { - if (val.u.uint >= hdr->dirs_count) - { - dwarf_buf_error (hdr_buf, - ("invalid directory index in " - "line number program header"), - 0); - return 0; - } - dir = hdr->dirs[val.u.uint]; - } - break; - default: - /* We don't care about timestamps or sizes or hashes. */ - break; - } - } - - if (path == NULL) - { - dwarf_buf_error (hdr_buf, - "missing file name in line number program header", - 0); - return 0; - } - - if (dir == NULL) - *string = path; - else - { - size_t dir_len; - size_t path_len; - char *s; - - dir_len = strlen (dir); - path_len = strlen (path); - s = (char *) backtrace_alloc (state, dir_len + path_len + 2, - hdr_buf->error_callback, hdr_buf->data); - if (s == NULL) - return 0; - memcpy (s, dir, dir_len); - /* FIXME: If we are on a DOS-based file system, and the - directory or the path name use backslashes, then we should - use a backslash here. */ - s[dir_len] = '/'; - memcpy (s + dir_len + 1, path, path_len + 1); - *string = s; - } - - return 1; -} - -/* Read a set of DWARF 5 line header format entries, setting *PCOUNT - and *PPATHS. Return 1 on success, 0 on failure. */ - -static int -read_line_header_format_entries (struct backtrace_state *state, - struct dwarf_data *ddata, - struct unit *u, - struct dwarf_buf *hdr_buf, - struct line_header *hdr, - size_t *pcount, - const char ***ppaths) -{ - size_t formats_count; - struct line_header_format *formats; - size_t paths_count; - const char **paths; - size_t i; - int ret; - - formats_count = read_byte (hdr_buf); - if (formats_count == 0) - formats = NULL; - else - { - formats = ((struct line_header_format *) - backtrace_alloc (state, - (formats_count - * sizeof (struct line_header_format)), - hdr_buf->error_callback, - hdr_buf->data)); - if (formats == NULL) - return 0; - - for (i = 0; i < formats_count; i++) - { - formats[i].lnct = (int) read_uleb128(hdr_buf); - formats[i].form = (enum dwarf_form) read_uleb128 (hdr_buf); - } - } - - paths_count = read_uleb128 (hdr_buf); - if (paths_count == 0) - { - *pcount = 0; - *ppaths = NULL; - ret = 1; - goto exit; - } - - paths = ((const char **) - backtrace_alloc (state, paths_count * sizeof (const char *), - hdr_buf->error_callback, hdr_buf->data)); - if (paths == NULL) - { - ret = 0; - goto exit; - } - for (i = 0; i < paths_count; i++) - { - if (!read_lnct (state, ddata, u, hdr_buf, hdr, formats_count, - formats, &paths[i])) - { - backtrace_free (state, paths, - paths_count * sizeof (const char *), - hdr_buf->error_callback, hdr_buf->data); - ret = 0; - goto exit; - } - } - - *pcount = paths_count; - *ppaths = paths; - - ret = 1; - - exit: - if (formats != NULL) - backtrace_free (state, formats, - formats_count * sizeof (struct line_header_format), - hdr_buf->error_callback, hdr_buf->data); - - return ret; -} - -/* Read the line header. Return 1 on success, 0 on failure. */ - -static int -read_line_header (struct backtrace_state *state, struct dwarf_data *ddata, - struct unit *u, int is_dwarf64, struct dwarf_buf *line_buf, - struct line_header *hdr) -{ - uint64_t hdrlen; - struct dwarf_buf hdr_buf; - - hdr->version = read_uint16 (line_buf); - if (hdr->version < 2 || hdr->version > 5) - { - dwarf_buf_error (line_buf, "unsupported line number version", -1); - return 0; - } - - if (hdr->version < 5) - hdr->addrsize = u->addrsize; - else - { - hdr->addrsize = read_byte (line_buf); - /* We could support a non-zero segment_selector_size but I doubt - we'll ever see it. */ - if (read_byte (line_buf) != 0) - { - dwarf_buf_error (line_buf, - "non-zero segment_selector_size not supported", - -1); - return 0; - } - } - - hdrlen = read_offset (line_buf, is_dwarf64); - - hdr_buf = *line_buf; - hdr_buf.left = hdrlen; - - if (!advance (line_buf, hdrlen)) - return 0; - - hdr->min_insn_len = read_byte (&hdr_buf); - if (hdr->version < 4) - hdr->max_ops_per_insn = 1; - else - hdr->max_ops_per_insn = read_byte (&hdr_buf); - - /* We don't care about default_is_stmt. */ - read_byte (&hdr_buf); - - hdr->line_base = read_sbyte (&hdr_buf); - hdr->line_range = read_byte (&hdr_buf); - - hdr->opcode_base = read_byte (&hdr_buf); - hdr->opcode_lengths = hdr_buf.buf; - if (!advance (&hdr_buf, hdr->opcode_base - 1)) - return 0; - - if (hdr->version < 5) - { - if (!read_v2_paths (state, u, &hdr_buf, hdr)) - return 0; - } - else - { - if (!read_line_header_format_entries (state, ddata, u, &hdr_buf, hdr, - &hdr->dirs_count, - &hdr->dirs)) - return 0; - if (!read_line_header_format_entries (state, ddata, u, &hdr_buf, hdr, - &hdr->filenames_count, - &hdr->filenames)) - return 0; - } - - if (hdr_buf.reported_underflow) - return 0; - - return 1; -} - -/* Read the line program, adding line mappings to VEC. Return 1 on - success, 0 on failure. */ - -static int -read_line_program (struct backtrace_state *state, struct dwarf_data *ddata, - const struct line_header *hdr, struct dwarf_buf *line_buf, - struct line_vector *vec) -{ - uint64_t address; - unsigned int op_index; - const char *reset_filename; - const char *filename; - int lineno; - - address = 0; - op_index = 0; - if (hdr->filenames_count > 1) - reset_filename = hdr->filenames[1]; - else - reset_filename = ""; - filename = reset_filename; - lineno = 1; - while (line_buf->left > 0) - { - unsigned int op; - - op = read_byte (line_buf); - if (op >= hdr->opcode_base) - { - unsigned int advance; - - /* Special opcode. */ - op -= hdr->opcode_base; - advance = op / hdr->line_range; - address += (hdr->min_insn_len * (op_index + advance) - / hdr->max_ops_per_insn); - op_index = (op_index + advance) % hdr->max_ops_per_insn; - lineno += hdr->line_base + (int) (op % hdr->line_range); - add_line (state, ddata, address, filename, lineno, - line_buf->error_callback, line_buf->data, vec); - } - else if (op == DW_LNS_extended_op) - { - uint64_t len; - - len = read_uleb128 (line_buf); - op = read_byte (line_buf); - switch (op) - { - case DW_LNE_end_sequence: - /* FIXME: Should we mark the high PC here? It seems - that we already have that information from the - compilation unit. */ - address = 0; - op_index = 0; - filename = reset_filename; - lineno = 1; - break; - case DW_LNE_set_address: - address = read_address (line_buf, hdr->addrsize); - break; - case DW_LNE_define_file: - { - const char *f; - unsigned int dir_index; - - f = read_string (line_buf); - if (f == NULL) - return 0; - dir_index = read_uleb128 (line_buf); - /* Ignore that time and length. */ - read_uleb128 (line_buf); - read_uleb128 (line_buf); - if (IS_ABSOLUTE_PATH (f)) - filename = f; - else - { - const char *dir; - size_t dir_len; - size_t f_len; - char *p; - - if (dir_index < hdr->dirs_count) - dir = hdr->dirs[dir_index]; - else - { - dwarf_buf_error (line_buf, - ("invalid directory index " - "in line number program"), - 0); - return 0; - } - dir_len = strlen (dir); - f_len = strlen (f); - p = ((char *) - backtrace_alloc (state, dir_len + f_len + 2, - line_buf->error_callback, - line_buf->data)); - if (p == NULL) - return 0; - memcpy (p, dir, dir_len); - /* FIXME: If we are on a DOS-based file system, - and the directory or the file name use - backslashes, then we should use a backslash - here. */ - p[dir_len] = '/'; - memcpy (p + dir_len + 1, f, f_len + 1); - filename = p; - } - } - break; - case DW_LNE_set_discriminator: - /* We don't care about discriminators. */ - read_uleb128 (line_buf); - break; - default: - if (!advance (line_buf, len - 1)) - return 0; - break; - } - } - else - { - switch (op) - { - case DW_LNS_copy: - add_line (state, ddata, address, filename, lineno, - line_buf->error_callback, line_buf->data, vec); - break; - case DW_LNS_advance_pc: - { - uint64_t advance; - - advance = read_uleb128 (line_buf); - address += (hdr->min_insn_len * (op_index + advance) - / hdr->max_ops_per_insn); - op_index = (op_index + advance) % hdr->max_ops_per_insn; - } - break; - case DW_LNS_advance_line: - lineno += (int) read_sleb128 (line_buf); - break; - case DW_LNS_set_file: - { - uint64_t fileno; - - fileno = read_uleb128 (line_buf); - if (fileno >= hdr->filenames_count) - { - dwarf_buf_error (line_buf, - ("invalid file number in " - "line number program"), - 0); - return 0; - } - filename = hdr->filenames[fileno]; - } - break; - case DW_LNS_set_column: - read_uleb128 (line_buf); - break; - case DW_LNS_negate_stmt: - break; - case DW_LNS_set_basic_block: - break; - case DW_LNS_const_add_pc: - { - unsigned int advance; - - op = 255 - hdr->opcode_base; - advance = op / hdr->line_range; - address += (hdr->min_insn_len * (op_index + advance) - / hdr->max_ops_per_insn); - op_index = (op_index + advance) % hdr->max_ops_per_insn; - } - break; - case DW_LNS_fixed_advance_pc: - address += read_uint16 (line_buf); - op_index = 0; - break; - case DW_LNS_set_prologue_end: - break; - case DW_LNS_set_epilogue_begin: - break; - case DW_LNS_set_isa: - read_uleb128 (line_buf); - break; - default: - { - unsigned int i; - - for (i = hdr->opcode_lengths[op - 1]; i > 0; --i) - read_uleb128 (line_buf); - } - break; - } - } - } - - return 1; -} - -/* Read the line number information for a compilation unit. Returns 1 - on success, 0 on failure. */ - -static int -read_line_info (struct backtrace_state *state, struct dwarf_data *ddata, - backtrace_error_callback error_callback, void *data, - struct unit *u, struct line_header *hdr, struct line **lines, - size_t *lines_count) -{ - struct line_vector vec; - struct dwarf_buf line_buf; - uint64_t len; - int is_dwarf64; - struct line *ln; - - memset (&vec.vec, 0, sizeof vec.vec); - vec.count = 0; - - memset (hdr, 0, sizeof *hdr); - - if (u->lineoff != (off_t) (size_t) u->lineoff - || (size_t) u->lineoff >= ddata->dwarf_sections.size[DEBUG_LINE]) - { - error_callback (data, "unit line offset out of range", 0); - goto fail; - } - - line_buf.name = ".debug_line"; - line_buf.start = ddata->dwarf_sections.data[DEBUG_LINE]; - line_buf.buf = ddata->dwarf_sections.data[DEBUG_LINE] + u->lineoff; - line_buf.left = ddata->dwarf_sections.size[DEBUG_LINE] - u->lineoff; - line_buf.is_bigendian = ddata->is_bigendian; - line_buf.error_callback = error_callback; - line_buf.data = data; - line_buf.reported_underflow = 0; - - len = read_initial_length (&line_buf, &is_dwarf64); - line_buf.left = len; - - if (!read_line_header (state, ddata, u, is_dwarf64, &line_buf, hdr)) - goto fail; - - if (!read_line_program (state, ddata, hdr, &line_buf, &vec)) - goto fail; - - if (line_buf.reported_underflow) - goto fail; - - if (vec.count == 0) - { - /* This is not a failure in the sense of a generating an error, - but it is a failure in that sense that we have no useful - information. */ - goto fail; - } - - /* Allocate one extra entry at the end. */ - ln = ((struct line *) - backtrace_vector_grow (state, sizeof (struct line), error_callback, - data, &vec.vec)); - if (ln == NULL) - goto fail; - ln->pc = (uintptr_t) -1; - ln->filename = NULL; - ln->lineno = 0; - ln->idx = 0; - - if (!backtrace_vector_release (state, &vec.vec, error_callback, data)) - goto fail; - - ln = (struct line *) vec.vec.base; - backtrace_qsort (ln, vec.count, sizeof (struct line), line_compare); - - *lines = ln; - *lines_count = vec.count; - - return 1; - - fail: - backtrace_vector_free (state, &vec.vec, error_callback, data); - free_line_header (state, hdr, error_callback, data); - *lines = (struct line *) (uintptr_t) -1; - *lines_count = 0; - return 0; -} - -static const char *read_referenced_name (struct dwarf_data *, struct unit *, - uint64_t, backtrace_error_callback, - void *); - -/* Read the name of a function from a DIE referenced by ATTR with VAL. */ - -static const char * -read_referenced_name_from_attr (struct dwarf_data *ddata, struct unit *u, - struct attr *attr, struct attr_val *val, - backtrace_error_callback error_callback, - void *data) -{ - switch (attr->name) - { - case DW_AT_abstract_origin: - case DW_AT_specification: - break; - default: - return NULL; - } - - if (attr->form == DW_FORM_ref_sig8) - return NULL; - - if (val->encoding == ATTR_VAL_REF_INFO) - { - struct unit *unit - = find_unit (ddata->units, ddata->units_count, - val->u.uint); - if (unit == NULL) - return NULL; - - uint64_t offset = val->u.uint - unit->low_offset; - return read_referenced_name (ddata, unit, offset, error_callback, data); - } - - if (val->encoding == ATTR_VAL_UINT - || val->encoding == ATTR_VAL_REF_UNIT) - return read_referenced_name (ddata, u, val->u.uint, error_callback, data); - - if (val->encoding == ATTR_VAL_REF_ALT_INFO) - { - struct unit *alt_unit - = find_unit (ddata->altlink->units, ddata->altlink->units_count, - val->u.uint); - if (alt_unit == NULL) - return NULL; - - uint64_t offset = val->u.uint - alt_unit->low_offset; - return read_referenced_name (ddata->altlink, alt_unit, offset, - error_callback, data); - } - - return NULL; -} - -/* Read the name of a function from a DIE referenced by a - DW_AT_abstract_origin or DW_AT_specification tag. OFFSET is within - the same compilation unit. */ - -static const char * -read_referenced_name (struct dwarf_data *ddata, struct unit *u, - uint64_t offset, backtrace_error_callback error_callback, - void *data) -{ - struct dwarf_buf unit_buf; - uint64_t code; - const struct abbrev *abbrev; - const char *ret; - size_t i; - - /* OFFSET is from the start of the data for this compilation unit. - U->unit_data is the data, but it starts U->unit_data_offset bytes - from the beginning. */ - - if (offset < u->unit_data_offset - || offset - u->unit_data_offset >= u->unit_data_len) - { - error_callback (data, - "abstract origin or specification out of range", - 0); - return NULL; - } - - offset -= u->unit_data_offset; - - unit_buf.name = ".debug_info"; - unit_buf.start = ddata->dwarf_sections.data[DEBUG_INFO]; - unit_buf.buf = u->unit_data + offset; - unit_buf.left = u->unit_data_len - offset; - unit_buf.is_bigendian = ddata->is_bigendian; - unit_buf.error_callback = error_callback; - unit_buf.data = data; - unit_buf.reported_underflow = 0; - - code = read_uleb128 (&unit_buf); - if (code == 0) - { - dwarf_buf_error (&unit_buf, - "invalid abstract origin or specification", - 0); - return NULL; - } - - abbrev = lookup_abbrev (&u->abbrevs, code, error_callback, data); - if (abbrev == NULL) - return NULL; - - ret = NULL; - for (i = 0; i < abbrev->num_attrs; ++i) - { - struct attr_val val; - - if (!read_attribute (abbrev->attrs[i].form, abbrev->attrs[i].val, - &unit_buf, u->is_dwarf64, u->version, u->addrsize, - &ddata->dwarf_sections, ddata->altlink, &val)) - return NULL; - - switch (abbrev->attrs[i].name) - { - case DW_AT_name: - /* Third name preference: don't override. A name we found in some - other way, will normally be more useful -- e.g., this name is - normally not mangled. */ - if (ret != NULL) - break; - if (!resolve_string (&ddata->dwarf_sections, u->is_dwarf64, - ddata->is_bigendian, u->str_offsets_base, - &val, error_callback, data, &ret)) - return NULL; - break; - - case DW_AT_linkage_name: - case DW_AT_MIPS_linkage_name: - /* First name preference: override all. */ - { - const char *s; - - s = NULL; - if (!resolve_string (&ddata->dwarf_sections, u->is_dwarf64, - ddata->is_bigendian, u->str_offsets_base, - &val, error_callback, data, &s)) - return NULL; - if (s != NULL) - return s; - } - break; - - case DW_AT_specification: - /* Second name preference: override DW_AT_name, don't override - DW_AT_linkage_name. */ - { - const char *name; - - name = read_referenced_name_from_attr (ddata, u, &abbrev->attrs[i], - &val, error_callback, data); - if (name != NULL) - ret = name; - } - break; - - default: - break; - } - } - - return ret; -} - -/* Add a range to a unit that maps to a function. This is called via - add_ranges. Returns 1 on success, 0 on error. */ - -static int -add_function_range (struct backtrace_state *state, void *rdata, - uintptr_t lowpc, uintptr_t highpc, - backtrace_error_callback error_callback, void *data, - void *pvec) -{ - struct function *function = (struct function *) rdata; - struct function_vector *vec = (struct function_vector *) pvec; - struct function_addrs *p; - - if (vec->count > 0) - { - p = (struct function_addrs *) vec->vec.base + (vec->count - 1); - if ((lowpc == p->high || lowpc == p->high + 1) - && function == p->function) - { - if (highpc > p->high) - p->high = highpc; - return 1; - } - } - - p = ((struct function_addrs *) - backtrace_vector_grow (state, sizeof (struct function_addrs), - error_callback, data, &vec->vec)); - if (p == NULL) - return 0; - - p->low = lowpc; - p->high = highpc; - p->function = function; - - ++vec->count; - - return 1; -} - -/* Read one entry plus all its children. Add function addresses to - VEC. Returns 1 on success, 0 on error. */ - -static int -read_function_entry (struct backtrace_state *state, struct dwarf_data *ddata, - struct unit *u, uintptr_t base, struct dwarf_buf *unit_buf, - const struct line_header *lhdr, - backtrace_error_callback error_callback, void *data, - struct function_vector *vec_function, - struct function_vector *vec_inlined) -{ - while (unit_buf->left > 0) - { - uint64_t code; - const struct abbrev *abbrev; - int is_function; - struct function *function; - struct function_vector *vec; - size_t i; - struct pcrange pcrange; - int have_linkage_name; - - code = read_uleb128 (unit_buf); - if (code == 0) - return 1; - - abbrev = lookup_abbrev (&u->abbrevs, code, error_callback, data); - if (abbrev == NULL) - return 0; - - is_function = (abbrev->tag == DW_TAG_subprogram - || abbrev->tag == DW_TAG_entry_point - || abbrev->tag == DW_TAG_inlined_subroutine); - - if (abbrev->tag == DW_TAG_inlined_subroutine) - vec = vec_inlined; - else - vec = vec_function; - - function = NULL; - if (is_function) - { - function = ((struct function *) - backtrace_alloc (state, sizeof *function, - error_callback, data)); - if (function == NULL) - return 0; - memset (function, 0, sizeof *function); - } - - memset (&pcrange, 0, sizeof pcrange); - have_linkage_name = 0; - for (i = 0; i < abbrev->num_attrs; ++i) - { - struct attr_val val; - - if (!read_attribute (abbrev->attrs[i].form, abbrev->attrs[i].val, - unit_buf, u->is_dwarf64, u->version, - u->addrsize, &ddata->dwarf_sections, - ddata->altlink, &val)) - return 0; - - /* The compile unit sets the base address for any address - ranges in the function entries. */ - if ((abbrev->tag == DW_TAG_compile_unit - || abbrev->tag == DW_TAG_skeleton_unit) - && abbrev->attrs[i].name == DW_AT_low_pc) - { - if (val.encoding == ATTR_VAL_ADDRESS) - base = (uintptr_t) val.u.uint; - else if (val.encoding == ATTR_VAL_ADDRESS_INDEX) - { - if (!resolve_addr_index (&ddata->dwarf_sections, - u->addr_base, u->addrsize, - ddata->is_bigendian, val.u.uint, - error_callback, data, &base)) - return 0; - } - } - - if (is_function) - { - switch (abbrev->attrs[i].name) - { - case DW_AT_call_file: - if (val.encoding == ATTR_VAL_UINT) - { - if (val.u.uint >= lhdr->filenames_count) - { - dwarf_buf_error (unit_buf, - ("invalid file number in " - "DW_AT_call_file attribute"), - 0); - return 0; - } - function->caller_filename = lhdr->filenames[val.u.uint]; - } - break; - - case DW_AT_call_line: - if (val.encoding == ATTR_VAL_UINT) - function->caller_lineno = val.u.uint; - break; - - case DW_AT_abstract_origin: - case DW_AT_specification: - /* Second name preference: override DW_AT_name, don't override - DW_AT_linkage_name. */ - if (have_linkage_name) - break; - { - const char *name; - - name - = read_referenced_name_from_attr (ddata, u, - &abbrev->attrs[i], &val, - error_callback, data); - if (name != NULL) - function->name = name; - } - break; - - case DW_AT_name: - /* Third name preference: don't override. */ - if (function->name != NULL) - break; - if (!resolve_string (&ddata->dwarf_sections, u->is_dwarf64, - ddata->is_bigendian, - u->str_offsets_base, &val, - error_callback, data, &function->name)) - return 0; - break; - - case DW_AT_linkage_name: - case DW_AT_MIPS_linkage_name: - /* First name preference: override all. */ - { - const char *s; - - s = NULL; - if (!resolve_string (&ddata->dwarf_sections, u->is_dwarf64, - ddata->is_bigendian, - u->str_offsets_base, &val, - error_callback, data, &s)) - return 0; - if (s != NULL) - { - function->name = s; - have_linkage_name = 1; - } - } - break; - - case DW_AT_low_pc: case DW_AT_high_pc: case DW_AT_ranges: - update_pcrange (&abbrev->attrs[i], &val, &pcrange); - break; - - default: - break; - } - } - } - - /* If we couldn't find a name for the function, we have no use - for it. */ - if (is_function && function->name == NULL) - { - backtrace_free (state, function, sizeof *function, - error_callback, data); - is_function = 0; - } - - if (is_function) - { - if (pcrange.have_ranges - || (pcrange.have_lowpc && pcrange.have_highpc)) - { - if (!add_ranges (state, &ddata->dwarf_sections, - ddata->base_address, ddata->is_bigendian, - u, base, &pcrange, add_function_range, - (void *) function, error_callback, data, - (void *) vec)) - return 0; - } - else - { - backtrace_free (state, function, sizeof *function, - error_callback, data); - is_function = 0; - } - } - - if (abbrev->has_children) - { - if (!is_function) - { - if (!read_function_entry (state, ddata, u, base, unit_buf, lhdr, - error_callback, data, vec_function, - vec_inlined)) - return 0; - } - else - { - struct function_vector fvec; - - /* Gather any information for inlined functions in - FVEC. */ - - memset (&fvec, 0, sizeof fvec); - - if (!read_function_entry (state, ddata, u, base, unit_buf, lhdr, - error_callback, data, vec_function, - &fvec)) - return 0; - - if (fvec.count > 0) - { - struct function_addrs *p; - struct function_addrs *faddrs; - - /* Allocate a trailing entry, but don't include it - in fvec.count. */ - p = ((struct function_addrs *) - backtrace_vector_grow (state, - sizeof (struct function_addrs), - error_callback, data, - &fvec.vec)); - if (p == NULL) - return 0; - p->low = 0; - --p->low; - p->high = p->low; - p->function = NULL; - - if (!backtrace_vector_release (state, &fvec.vec, - error_callback, data)) - return 0; - - faddrs = (struct function_addrs *) fvec.vec.base; - backtrace_qsort (faddrs, fvec.count, - sizeof (struct function_addrs), - function_addrs_compare); - - function->function_addrs = faddrs; - function->function_addrs_count = fvec.count; - } - } - } - } - - return 1; -} - -/* Read function name information for a compilation unit. We look - through the whole unit looking for function tags. */ - -static void -read_function_info (struct backtrace_state *state, struct dwarf_data *ddata, - const struct line_header *lhdr, - backtrace_error_callback error_callback, void *data, - struct unit *u, struct function_vector *fvec, - struct function_addrs **ret_addrs, - size_t *ret_addrs_count) -{ - struct function_vector lvec; - struct function_vector *pfvec; - struct dwarf_buf unit_buf; - struct function_addrs *p; - struct function_addrs *addrs; - size_t addrs_count; - - /* Use FVEC if it is not NULL. Otherwise use our own vector. */ - if (fvec != NULL) - pfvec = fvec; - else - { - memset (&lvec, 0, sizeof lvec); - pfvec = &lvec; - } - - unit_buf.name = ".debug_info"; - unit_buf.start = ddata->dwarf_sections.data[DEBUG_INFO]; - unit_buf.buf = u->unit_data; - unit_buf.left = u->unit_data_len; - unit_buf.is_bigendian = ddata->is_bigendian; - unit_buf.error_callback = error_callback; - unit_buf.data = data; - unit_buf.reported_underflow = 0; - - while (unit_buf.left > 0) - { - if (!read_function_entry (state, ddata, u, 0, &unit_buf, lhdr, - error_callback, data, pfvec, pfvec)) - return; - } - - if (pfvec->count == 0) - return; - - /* Allocate a trailing entry, but don't include it in - pfvec->count. */ - p = ((struct function_addrs *) - backtrace_vector_grow (state, sizeof (struct function_addrs), - error_callback, data, &pfvec->vec)); - if (p == NULL) - return; - p->low = 0; - --p->low; - p->high = p->low; - p->function = NULL; - - addrs_count = pfvec->count; - - if (fvec == NULL) - { - if (!backtrace_vector_release (state, &lvec.vec, error_callback, data)) - return; - addrs = (struct function_addrs *) pfvec->vec.base; - } - else - { - /* Finish this list of addresses, but leave the remaining space in - the vector available for the next function unit. */ - addrs = ((struct function_addrs *) - backtrace_vector_finish (state, &fvec->vec, - error_callback, data)); - if (addrs == NULL) - return; - fvec->count = 0; - } - - backtrace_qsort (addrs, addrs_count, sizeof (struct function_addrs), - function_addrs_compare); - - *ret_addrs = addrs; - *ret_addrs_count = addrs_count; -} - -/* See if PC is inlined in FUNCTION. If it is, print out the inlined - information, and update FILENAME and LINENO for the caller. - Returns whatever CALLBACK returns, or 0 to keep going. */ - -static int -report_inlined_functions (uintptr_t pc, struct function *function, const char* comp_dir, - backtrace_full_callback callback, void *data, - const char **filename, int *lineno) -{ - struct function_addrs *p; - struct function_addrs *match; - struct function *inlined; - int ret; - - if (function->function_addrs_count == 0) - return 0; - - /* Our search isn't safe if pc == -1, as that is the sentinel - value. */ - if (pc + 1 == 0) - return 0; - - p = ((struct function_addrs *) - bsearch (&pc, function->function_addrs, - function->function_addrs_count, - sizeof (struct function_addrs), - function_addrs_search)); - if (p == NULL) - return 0; - - /* Here pc >= p->low && pc < (p + 1)->low. The function_addrs are - sorted by low, so if pc > p->low we are at the end of a range of - function_addrs with the same low value. If pc == p->low walk - forward to the end of the range with that low value. Then walk - backward and use the first range that includes pc. */ - while (pc == (p + 1)->low) - ++p; - match = NULL; - while (1) - { - if (pc < p->high) - { - match = p; - break; - } - if (p == function->function_addrs) - break; - if ((p - 1)->low < p->low) - break; - --p; - } - if (match == NULL) - return 0; - - /* We found an inlined call. */ - - inlined = match->function; - - /* Report any calls inlined into this one. */ - ret = report_inlined_functions (pc, inlined, comp_dir, callback, data, - filename, lineno); - if (ret != 0) - return ret; - - /* Report this inlined call. */ - if (*filename[0] != '/' && comp_dir) - { - char buf[1024]; - snprintf (buf, 1024, "%s/%s", comp_dir, *filename); - ret = callback (data, pc, match->low, buf, *lineno, inlined->name); - } - else - { - ret = callback (data, pc, match->low, *filename, *lineno, inlined->name); - } - if (ret != 0) - return ret; - - /* Our caller will report the caller of the inlined function; tell - it the appropriate filename and line number. */ - *filename = inlined->caller_filename; - *lineno = inlined->caller_lineno; - - return 0; -} - -/* Look for a PC in the DWARF mapping for one module. On success, - call CALLBACK and return whatever it returns. On error, call - ERROR_CALLBACK and return 0. Sets *FOUND to 1 if the PC is found, - 0 if not. */ - -static int -dwarf_lookup_pc (struct backtrace_state *state, struct dwarf_data *ddata, - uintptr_t pc, backtrace_full_callback callback, - backtrace_error_callback error_callback, void *data, - int *found) -{ - struct unit_addrs *entry; - int found_entry; - struct unit *u; - int new_data; - struct line *lines; - struct line *ln; - struct function_addrs *p; - struct function_addrs *fmatch; - struct function *function; - const char *filename; - int lineno; - int ret; - - *found = 1; - - /* Find an address range that includes PC. Our search isn't safe if - PC == -1, as we use that as a sentinel value, so skip the search - in that case. */ - entry = (ddata->addrs_count == 0 || pc + 1 == 0 - ? NULL - : (struct unit_addrs*)bsearch (&pc, ddata->addrs, ddata->addrs_count, - sizeof (struct unit_addrs), unit_addrs_search)); - - if (entry == NULL) - { - *found = 0; - return 0; - } - - /* Here pc >= entry->low && pc < (entry + 1)->low. The unit_addrs - are sorted by low, so if pc > p->low we are at the end of a range - of unit_addrs with the same low value. If pc == p->low walk - forward to the end of the range with that low value. Then walk - backward and use the first range that includes pc. */ - while (pc == (entry + 1)->low) - ++entry; - found_entry = 0; - while (1) - { - if (pc < entry->high) - { - found_entry = 1; - break; - } - if (entry == ddata->addrs) - break; - if ((entry - 1)->low < entry->low) - break; - --entry; - } - if (!found_entry) - { - *found = 0; - return 0; - } - - /* We need the lines, lines_count, function_addrs, - function_addrs_count fields of u. If they are not set, we need - to set them. When running in threaded mode, we need to allow for - the possibility that some other thread is setting them - simultaneously. */ - - u = entry->u; - lines = u->lines; - - /* Skip units with no useful line number information by walking - backward. Useless line number information is marked by setting - lines == -1. */ - while (entry > ddata->addrs - && pc >= (entry - 1)->low - && pc < (entry - 1)->high) - { - if (state->threaded) - lines = (struct line *) backtrace_atomic_load_pointer (&u->lines); - - if (lines != (struct line *) (uintptr_t) -1) - break; - - --entry; - - u = entry->u; - lines = u->lines; - } - - if (state->threaded) - lines = backtrace_atomic_load_pointer (&u->lines); - - new_data = 0; - if (lines == NULL) - { - struct function_addrs *function_addrs; - size_t function_addrs_count; - struct line_header lhdr; - size_t count; - - /* We have never read the line information for this unit. Read - it now. */ - - function_addrs = NULL; - function_addrs_count = 0; - if (read_line_info (state, ddata, error_callback, data, entry->u, &lhdr, - &lines, &count)) - { - struct function_vector *pfvec; - - /* If not threaded, reuse DDATA->FVEC for better memory - consumption. */ - if (state->threaded) - pfvec = NULL; - else - pfvec = &ddata->fvec; - read_function_info (state, ddata, &lhdr, error_callback, data, - entry->u, pfvec, &function_addrs, - &function_addrs_count); - free_line_header (state, &lhdr, error_callback, data); - new_data = 1; - } - - /* Atomically store the information we just read into the unit. - If another thread is simultaneously writing, it presumably - read the same information, and we don't care which one we - wind up with; we just leak the other one. We do have to - write the lines field last, so that the acquire-loads above - ensure that the other fields are set. */ - - if (!state->threaded) - { - u->lines_count = count; - u->function_addrs = function_addrs; - u->function_addrs_count = function_addrs_count; - u->lines = lines; - } - else - { - backtrace_atomic_store_size_t (&u->lines_count, count); - backtrace_atomic_store_pointer (&u->function_addrs, function_addrs); - backtrace_atomic_store_size_t (&u->function_addrs_count, - function_addrs_count); - backtrace_atomic_store_pointer (&u->lines, lines); - } - } - - /* Now all fields of U have been initialized. */ - - if (lines == (struct line *) (uintptr_t) -1) - { - /* If reading the line number information failed in some way, - try again to see if there is a better compilation unit for - this PC. */ - if (new_data) - return dwarf_lookup_pc (state, ddata, pc, callback, error_callback, - data, found); - return callback (data, pc, 0, NULL, 0, NULL); - } - - /* Search for PC within this unit. */ - - ln = (struct line *) bsearch (&pc, lines, entry->u->lines_count, - sizeof (struct line), line_search); - if (ln == NULL) - { - /* The PC is between the low_pc and high_pc attributes of the - compilation unit, but no entry in the line table covers it. - This implies that the start of the compilation unit has no - line number information. */ - - if (entry->u->abs_filename == NULL) - { - const char *filename; - - filename = entry->u->filename; - if (filename != NULL - && !IS_ABSOLUTE_PATH (filename) - && entry->u->comp_dir != NULL) - { - size_t filename_len; - const char *dir; - size_t dir_len; - char *s; - - filename_len = strlen (filename); - dir = entry->u->comp_dir; - dir_len = strlen (dir); - s = (char *) backtrace_alloc (state, dir_len + filename_len + 2, - error_callback, data); - if (s == NULL) - { - *found = 0; - return 0; - } - memcpy (s, dir, dir_len); - /* FIXME: Should use backslash if DOS file system. */ - s[dir_len] = '/'; - memcpy (s + dir_len + 1, filename, filename_len + 1); - filename = s; - } - entry->u->abs_filename = filename; - } - - return callback (data, pc, 0, entry->u->abs_filename, 0, NULL); - } - - /* Search for function name within this unit. */ - - if (entry->u->function_addrs_count == 0) - return callback (data, pc, 0, ln->filename, ln->lineno, NULL); - - p = ((struct function_addrs *) - bsearch (&pc, entry->u->function_addrs, - entry->u->function_addrs_count, - sizeof (struct function_addrs), - function_addrs_search)); - if (p == NULL) - return callback (data, pc, 0, ln->filename, ln->lineno, NULL); - - /* Here pc >= p->low && pc < (p + 1)->low. The function_addrs are - sorted by low, so if pc > p->low we are at the end of a range of - function_addrs with the same low value. If pc == p->low walk - forward to the end of the range with that low value. Then walk - backward and use the first range that includes pc. */ - while (pc == (p + 1)->low) - ++p; - fmatch = NULL; - while (1) - { - if (pc < p->high) - { - fmatch = p; - break; - } - if (p == entry->u->function_addrs) - break; - if ((p - 1)->low < p->low) - break; - --p; - } - if (fmatch == NULL) - return callback (data, pc, 0, ln->filename, ln->lineno, NULL); - - function = fmatch->function; - - filename = ln->filename; - lineno = ln->lineno; - - ret = report_inlined_functions (pc, function, entry->u->comp_dir, callback, data, - &filename, &lineno); - if (ret != 0) - return ret; - - if (filename[0] != '/' && entry->u->comp_dir) - { - char buf[1024]; - snprintf (buf, 1024, "%s/%s", entry->u->comp_dir, filename); - return callback (data, pc, fmatch->low, buf, lineno, function->name); - } - else - { - return callback (data, pc, fmatch->low, filename, lineno, function->name); - } -} - -bool dwarf_fileline_dwarf_lookup_pc_in_all_entries(struct backtrace_state *state, uintptr_t pc, - backtrace_full_callback callback, backtrace_error_callback error_callback, void *data, - int& found, int ret) -{ - for (struct dwarf_data* ddata = (struct dwarf_data *)state->fileline_data; - ddata != NULL; - ddata = ddata->next) - { - ret = dwarf_lookup_pc(state, ddata, pc, callback, error_callback, data, &found); - if (ret != 0 || found) return true; - } - return false; -} - -/* Return the file/line information for a PC using the DWARF mapping - we built earlier. */ - -static int -dwarf_fileline (struct backtrace_state *state, uintptr_t pc, - backtrace_full_callback callback, - backtrace_error_callback error_callback, void *data) -{ - struct dwarf_data *ddata; - int found; - int ret = 0; - - if (!state->threaded) - { - if (dwarf_fileline_dwarf_lookup_pc_in_all_entries(state, pc, callback, error_callback, data, found, ret)) - { - return ret; - } - - // if we failed to obtain an entry in range, it can mean that the address map has been changed and new entries - // have been loaded in the meantime. Request a refresh and try again. - if (state->request_known_address_ranges_refresh_fn) - { - int new_range_count = state->request_known_address_ranges_refresh_fn(state, pc); - if (new_range_count > 0) - { - if (dwarf_fileline_dwarf_lookup_pc_in_all_entries(state, pc, callback, error_callback, data, found, ret)) - { - return ret; - } - } - } - - } - else - { - struct dwarf_data **pp; - - pp = (struct dwarf_data **) (void *) &state->fileline_data; - while (1) - { - ddata = backtrace_atomic_load_pointer (pp); - if (ddata == NULL) - break; - - ret = dwarf_lookup_pc (state, ddata, pc, callback, error_callback, - data, &found); - if (ret != 0 || found) - return ret; - - pp = &ddata->next; - } - } - - /* FIXME: See if any libraries have been dlopen'ed. */ - - return callback (data, pc, 0, NULL, 0, NULL); -} - -/* Initialize our data structures from the DWARF debug info for a - file. Return NULL on failure. */ - -static struct dwarf_data * -build_dwarf_data (struct backtrace_state *state, - struct libbacktrace_base_address base_address, - const struct dwarf_sections *dwarf_sections, - int is_bigendian, - struct dwarf_data *altlink, - backtrace_error_callback error_callback, - void *data) -{ - struct unit_addrs_vector addrs_vec; - struct unit_addrs *addrs; - size_t addrs_count; - struct unit_vector units_vec; - struct unit **units; - size_t units_count; - struct dwarf_data *fdata; - - if (!build_address_map (state, base_address, dwarf_sections, is_bigendian, - altlink, error_callback, data, &addrs_vec, - &units_vec)) - return NULL; - - if (!backtrace_vector_release (state, &addrs_vec.vec, error_callback, data)) - return NULL; - if (!backtrace_vector_release (state, &units_vec.vec, error_callback, data)) - return NULL; - addrs = (struct unit_addrs *) addrs_vec.vec.base; - units = (struct unit **) units_vec.vec.base; - addrs_count = addrs_vec.count; - units_count = units_vec.count; - backtrace_qsort (addrs, addrs_count, sizeof (struct unit_addrs), - unit_addrs_compare); - /* No qsort for units required, already sorted. */ - - fdata = ((struct dwarf_data *) - backtrace_alloc (state, sizeof (struct dwarf_data), - error_callback, data)); - if (fdata == NULL) - return NULL; - - fdata->next = NULL; - fdata->altlink = altlink; - fdata->base_address = base_address; - fdata->addrs = addrs; - fdata->addrs_count = addrs_count; - fdata->units = units; - fdata->units_count = units_count; - fdata->dwarf_sections = *dwarf_sections; - fdata->is_bigendian = is_bigendian; - memset (&fdata->fvec, 0, sizeof fdata->fvec); - - return fdata; -} - -/* Build our data structures from the DWARF sections for a module. - Set FILELINE_FN and STATE->FILELINE_DATA. Return 1 on success, 0 - on failure. */ - -int -backtrace_dwarf_add (struct backtrace_state *state, - struct libbacktrace_base_address base_address, - const struct dwarf_sections *dwarf_sections, - int is_bigendian, - struct dwarf_data *fileline_altlink, - backtrace_error_callback error_callback, - void *data, fileline *fileline_fn, - struct dwarf_data **fileline_entry) -{ - struct dwarf_data *fdata; - - fdata = build_dwarf_data (state, base_address, dwarf_sections, is_bigendian, - fileline_altlink, error_callback, data); - if (fdata == NULL) - return 0; - - if (fileline_entry != NULL) - *fileline_entry = fdata; - - if (!state->threaded) - { - struct dwarf_data **pp; - - for (pp = (struct dwarf_data **) (void *) &state->fileline_data; - *pp != NULL; - pp = &(*pp)->next) - ; - *pp = fdata; - } - else - { - while (1) - { - struct dwarf_data **pp; - - pp = (struct dwarf_data **) (void *) &state->fileline_data; - - while (1) - { - struct dwarf_data *p; - - p = backtrace_atomic_load_pointer (pp); - - if (p == NULL) - break; - - pp = &p->next; - } - - if (__sync_bool_compare_and_swap (pp, NULL, fdata)) - break; - } - } - - *fileline_fn = dwarf_fileline; - - return 1; -} - -} diff --git a/src/third_party/tracy/libbacktrace/elf.cpp b/src/third_party/tracy/libbacktrace/elf.cpp deleted file mode 100644 index ffe8d702..00000000 --- a/src/third_party/tracy/libbacktrace/elf.cpp +++ /dev/null @@ -1,7605 +0,0 @@ -/* elf.c -- Get debug data from an ELF file for backtraces. - Copyright (C) 2012-2021 Free Software Foundation, Inc. - Written by Ian Lance Taylor, Google. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - (1) Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - (2) Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - (3) The name of the author may not be used to - endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include - -#ifdef HAVE_DL_ITERATE_PHDR -#include -#endif - -#include "backtrace.hpp" -#include "internal.hpp" - -#include "../client/TracyFastVector.hpp" -#include "../common/TracyAlloc.hpp" - -#ifndef S_ISLNK - #ifndef S_IFLNK - #define S_IFLNK 0120000 - #endif - #ifndef S_IFMT - #define S_IFMT 0170000 - #endif - #define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK) -#endif - -#ifndef __GNUC__ -#define __builtin_prefetch(p, r, l) -#ifndef unlikely -#define unlikely(x) (x) -#endif -#else -#ifndef unlikely -#define unlikely(x) __builtin_expect(!!(x), 0) -#endif -#endif - -namespace tracy -{ - -#ifdef TRACY_DEBUGINFOD -int GetDebugInfoDescriptor( const char* buildid_data, size_t buildid_size ); -#endif - -#if !defined(HAVE_DECL_STRNLEN) || !HAVE_DECL_STRNLEN - -/* If strnlen is not declared, provide our own version. */ - -static size_t -xstrnlen (const char *s, size_t maxlen) -{ - size_t i; - - for (i = 0; i < maxlen; ++i) - if (s[i] == '\0') - break; - return i; -} - -#define strnlen xstrnlen - -#endif - -#ifndef HAVE_LSTAT - -/* Dummy version of lstat for systems that don't have it. */ - -static int -xlstat (const char *path ATTRIBUTE_UNUSED, struct stat *st ATTRIBUTE_UNUSED) -{ - return -1; -} - -#define lstat xlstat - -#endif - -#ifndef HAVE_READLINK - -/* Dummy version of readlink for systems that don't have it. */ - -static ssize_t -xreadlink (const char *path ATTRIBUTE_UNUSED, char *buf ATTRIBUTE_UNUSED, - size_t bufsz ATTRIBUTE_UNUSED) -{ - return -1; -} - -#define readlink xreadlink - -#endif - -#ifndef HAVE_DL_ITERATE_PHDR - -/* Dummy version of dl_iterate_phdr for systems that don't have it. */ - -#define dl_phdr_info x_dl_phdr_info -#define dl_iterate_phdr x_dl_iterate_phdr - -struct dl_phdr_info -{ - uintptr_t dlpi_addr; - const char *dlpi_name; -}; - -static int -dl_iterate_phdr (int (*callback) (struct dl_phdr_info *, - size_t, void *) ATTRIBUTE_UNUSED, - void *data ATTRIBUTE_UNUSED) -{ - return 0; -} - -#endif /* ! defined (HAVE_DL_ITERATE_PHDR) */ - -/* The configure script must tell us whether we are 32-bit or 64-bit - ELF. We could make this code test and support either possibility, - but there is no point. This code only works for the currently - running executable, which means that we know the ELF mode at - configure time. */ - -#if BACKTRACE_ELF_SIZE != 32 && BACKTRACE_ELF_SIZE != 64 -#error "Unknown BACKTRACE_ELF_SIZE" -#endif - -/* might #include which might define our constants - with slightly different values. Undefine them to be safe. */ - -#undef EI_NIDENT -#undef EI_MAG0 -#undef EI_MAG1 -#undef EI_MAG2 -#undef EI_MAG3 -#undef EI_CLASS -#undef EI_DATA -#undef EI_VERSION -#undef ELF_MAG0 -#undef ELF_MAG1 -#undef ELF_MAG2 -#undef ELF_MAG3 -#undef ELFCLASS32 -#undef ELFCLASS64 -#undef ELFDATA2LSB -#undef ELFDATA2MSB -#undef EV_CURRENT -#undef ET_DYN -#undef EM_PPC64 -#undef EF_PPC64_ABI -#undef SHN_LORESERVE -#undef SHN_XINDEX -#undef SHN_UNDEF -#undef SHT_PROGBITS -#undef SHT_SYMTAB -#undef SHT_STRTAB -#undef SHT_DYNSYM -#undef SHF_COMPRESSED -#undef STT_OBJECT -#undef STT_FUNC -#undef NT_GNU_BUILD_ID -#undef ELFCOMPRESS_ZLIB -#undef ELFCOMPRESS_ZSTD - -/* Basic types. */ - -typedef uint16_t b_elf_half; /* Elf_Half. */ -typedef uint32_t b_elf_word; /* Elf_Word. */ -typedef int32_t b_elf_sword; /* Elf_Sword. */ - -#if BACKTRACE_ELF_SIZE == 32 - -typedef uint32_t b_elf_addr; /* Elf_Addr. */ -typedef uint32_t b_elf_off; /* Elf_Off. */ - -typedef uint32_t b_elf_wxword; /* 32-bit Elf_Word, 64-bit ELF_Xword. */ - -#else - -typedef uint64_t b_elf_addr; /* Elf_Addr. */ -typedef uint64_t b_elf_off; /* Elf_Off. */ -typedef uint64_t b_elf_xword; /* Elf_Xword. */ -typedef int64_t b_elf_sxword; /* Elf_Sxword. */ - -typedef uint64_t b_elf_wxword; /* 32-bit Elf_Word, 64-bit ELF_Xword. */ - -#endif - -/* Data structures and associated constants. */ - -#define EI_NIDENT 16 - -typedef struct { - unsigned char e_ident[EI_NIDENT]; /* ELF "magic number" */ - b_elf_half e_type; /* Identifies object file type */ - b_elf_half e_machine; /* Specifies required architecture */ - b_elf_word e_version; /* Identifies object file version */ - b_elf_addr e_entry; /* Entry point virtual address */ - b_elf_off e_phoff; /* Program header table file offset */ - b_elf_off e_shoff; /* Section header table file offset */ - b_elf_word e_flags; /* Processor-specific flags */ - b_elf_half e_ehsize; /* ELF header size in bytes */ - b_elf_half e_phentsize; /* Program header table entry size */ - b_elf_half e_phnum; /* Program header table entry count */ - b_elf_half e_shentsize; /* Section header table entry size */ - b_elf_half e_shnum; /* Section header table entry count */ - b_elf_half e_shstrndx; /* Section header string table index */ -} b_elf_ehdr; /* Elf_Ehdr. */ - -#define EI_MAG0 0 -#define EI_MAG1 1 -#define EI_MAG2 2 -#define EI_MAG3 3 -#define EI_CLASS 4 -#define EI_DATA 5 -#define EI_VERSION 6 - -#define ELFMAG0 0x7f -#define ELFMAG1 'E' -#define ELFMAG2 'L' -#define ELFMAG3 'F' - -#define ELFCLASS32 1 -#define ELFCLASS64 2 - -#define ELFDATA2LSB 1 -#define ELFDATA2MSB 2 - -#define EV_CURRENT 1 - -#define ET_DYN 3 - -#define EM_PPC64 21 -#define EF_PPC64_ABI 3 - -typedef struct { - b_elf_word sh_name; /* Section name, index in string tbl */ - b_elf_word sh_type; /* Type of section */ - b_elf_wxword sh_flags; /* Miscellaneous section attributes */ - b_elf_addr sh_addr; /* Section virtual addr at execution */ - b_elf_off sh_offset; /* Section file offset */ - b_elf_wxword sh_size; /* Size of section in bytes */ - b_elf_word sh_link; /* Index of another section */ - b_elf_word sh_info; /* Additional section information */ - b_elf_wxword sh_addralign; /* Section alignment */ - b_elf_wxword sh_entsize; /* Entry size if section holds table */ -} b_elf_shdr; /* Elf_Shdr. */ - -#define SHN_UNDEF 0x0000 /* Undefined section */ -#define SHN_LORESERVE 0xFF00 /* Begin range of reserved indices */ -#define SHN_XINDEX 0xFFFF /* Section index is held elsewhere */ - -#define SHT_PROGBITS 1 -#define SHT_SYMTAB 2 -#define SHT_STRTAB 3 -#define SHT_DYNSYM 11 - -#define SHF_COMPRESSED 0x800 - -#if BACKTRACE_ELF_SIZE == 32 - -typedef struct -{ - b_elf_word st_name; /* Symbol name, index in string tbl */ - b_elf_addr st_value; /* Symbol value */ - b_elf_word st_size; /* Symbol size */ - unsigned char st_info; /* Symbol binding and type */ - unsigned char st_other; /* Visibility and other data */ - b_elf_half st_shndx; /* Symbol section index */ -} b_elf_sym; /* Elf_Sym. */ - -#else /* BACKTRACE_ELF_SIZE != 32 */ - -typedef struct -{ - b_elf_word st_name; /* Symbol name, index in string tbl */ - unsigned char st_info; /* Symbol binding and type */ - unsigned char st_other; /* Visibility and other data */ - b_elf_half st_shndx; /* Symbol section index */ - b_elf_addr st_value; /* Symbol value */ - b_elf_xword st_size; /* Symbol size */ -} b_elf_sym; /* Elf_Sym. */ - -#endif /* BACKTRACE_ELF_SIZE != 32 */ - -#define STT_OBJECT 1 -#define STT_FUNC 2 - -typedef struct -{ - uint32_t namesz; - uint32_t descsz; - uint32_t type; - char name[1]; -} b_elf_note; - -#define NT_GNU_BUILD_ID 3 - -#if BACKTRACE_ELF_SIZE == 32 - -typedef struct -{ - b_elf_word ch_type; /* Compresstion algorithm */ - b_elf_word ch_size; /* Uncompressed size */ - b_elf_word ch_addralign; /* Alignment for uncompressed data */ -} b_elf_chdr; /* Elf_Chdr */ - -#else /* BACKTRACE_ELF_SIZE != 32 */ - -typedef struct -{ - b_elf_word ch_type; /* Compression algorithm */ - b_elf_word ch_reserved; /* Reserved */ - b_elf_xword ch_size; /* Uncompressed size */ - b_elf_xword ch_addralign; /* Alignment for uncompressed data */ -} b_elf_chdr; /* Elf_Chdr */ - -#endif /* BACKTRACE_ELF_SIZE != 32 */ - -#define ELFCOMPRESS_ZLIB 1 -#define ELFCOMPRESS_ZSTD 2 - -/* Names of sections, indexed by enum dwarf_section in internal.h. */ - -static const char * const dwarf_section_names[DEBUG_MAX] = -{ - ".debug_info", - ".debug_line", - ".debug_abbrev", - ".debug_ranges", - ".debug_str", - ".debug_addr", - ".debug_str_offsets", - ".debug_line_str", - ".debug_rnglists" -}; - -/* Information we gather for the sections we care about. */ - -struct debug_section_info -{ - /* Section file offset. */ - off_t offset; - /* Section size. */ - size_t size; - /* Section contents, after read from file. */ - const unsigned char *data; - /* Whether the SHF_COMPRESSED flag is set for the section. */ - int compressed; -}; - -/* Information we keep for an ELF symbol. */ - -struct elf_symbol -{ - /* The name of the symbol. */ - const char *name; - /* The address of the symbol. */ - uintptr_t address; - /* The size of the symbol. */ - size_t size; -}; - -/* Information to pass to elf_syminfo. */ - -struct elf_syminfo_data -{ - /* Symbols for the next module. */ - struct elf_syminfo_data *next; - /* The ELF symbols, sorted by address. */ - struct elf_symbol *symbols; - /* The number of symbols. */ - size_t count; -}; - -/* A view that works for either a file or memory. */ - -struct elf_view -{ - struct backtrace_view view; - int release; /* If non-zero, must call backtrace_release_view. */ -}; - -/* Information about PowerPC64 ELFv1 .opd section. */ - -struct elf_ppc64_opd_data -{ - /* Address of the .opd section. */ - b_elf_addr addr; - /* Section data. */ - const char *data; - /* Size of the .opd section. */ - size_t size; - /* Corresponding section view. */ - struct elf_view view; -}; - -/* Create a view of SIZE bytes from DESCRIPTOR/MEMORY at OFFSET. */ - -static int -elf_get_view (struct backtrace_state *state, int descriptor, - const unsigned char *memory, size_t memory_size, off_t offset, - uint64_t size, backtrace_error_callback error_callback, - void *data, struct elf_view *view) -{ - if (memory == NULL) - { - view->release = 1; - return backtrace_get_view (state, descriptor, offset, size, - error_callback, data, &view->view); - } - else - { - if ((uint64_t) offset + size > (uint64_t) memory_size) - { - error_callback (data, "out of range for in-memory file", 0); - return 0; - } - view->view.data = (const void *) (memory + offset); - view->view.base = NULL; - view->view.len = size; - view->release = 0; - return 1; - } -} - -/* Release a view read by elf_get_view. */ - -static void -elf_release_view (struct backtrace_state *state, struct elf_view *view, - backtrace_error_callback error_callback, void *data) -{ - if (view->release) - backtrace_release_view (state, &view->view, error_callback, data); -} - -/* Compute the CRC-32 of BUF/LEN. This uses the CRC used for - .gnu_debuglink files. */ - -static uint32_t -elf_crc32 (uint32_t crc, const unsigned char *buf, size_t len) -{ - static const uint32_t crc32_table[256] = - { - 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, - 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, - 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, - 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, - 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, - 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, - 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, - 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, - 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, - 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, - 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, - 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, - 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, - 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, - 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, - 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, - 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, - 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, - 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, - 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, - 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, - 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, - 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, - 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, - 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, - 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, - 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, - 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, - 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, - 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, - 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, - 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, - 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, - 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, - 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, - 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, - 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, - 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, - 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, - 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, - 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, - 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, - 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, - 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, - 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, - 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, - 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, - 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, - 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, - 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, - 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, - 0x2d02ef8d - }; - const unsigned char *end; - - crc = ~crc; - for (end = buf + len; buf < end; ++ buf) - crc = crc32_table[(crc ^ *buf) & 0xff] ^ (crc >> 8); - return ~crc; -} - -/* Return the CRC-32 of the entire file open at DESCRIPTOR. */ - -static uint32_t -elf_crc32_file (struct backtrace_state *state, int descriptor, - backtrace_error_callback error_callback, void *data) -{ - struct stat st; - struct backtrace_view file_view; - uint32_t ret; - - if (fstat (descriptor, &st) < 0) - { - error_callback (data, "fstat", errno); - return 0; - } - - if (!backtrace_get_view (state, descriptor, 0, st.st_size, error_callback, - data, &file_view)) - return 0; - - ret = elf_crc32 (0, (const unsigned char *) file_view.data, st.st_size); - - backtrace_release_view (state, &file_view, error_callback, data); - - return ret; -} - -/* A dummy callback function used when we can't find a symbol - table. */ - -static void -elf_nosyms (struct backtrace_state *state ATTRIBUTE_UNUSED, - uintptr_t addr ATTRIBUTE_UNUSED, - backtrace_syminfo_callback callback ATTRIBUTE_UNUSED, - backtrace_error_callback error_callback, void *data) -{ - error_callback (data, "no symbol table in ELF executable", -1); -} - -/* A callback function used when we can't find any debug info. */ - -static int -elf_nodebug (struct backtrace_state *state, uintptr_t pc, - backtrace_full_callback callback, - backtrace_error_callback error_callback, void *data) -{ - if (state->syminfo_fn != NULL && state->syminfo_fn != elf_nosyms) - { - struct backtrace_call_full bdata; - - /* Fetch symbol information so that we can least get the - function name. */ - - bdata.full_callback = callback; - bdata.full_error_callback = error_callback; - bdata.full_data = data; - bdata.ret = 0; - state->syminfo_fn (state, pc, backtrace_syminfo_to_full_callback, - backtrace_syminfo_to_full_error_callback, &bdata); - return bdata.ret; - } - - error_callback (data, "no debug info in ELF executable", -1); - return 0; -} - -/* Compare struct elf_symbol for qsort. */ - -static int -elf_symbol_compare (const void *v1, const void *v2) -{ - const struct elf_symbol *e1 = (const struct elf_symbol *) v1; - const struct elf_symbol *e2 = (const struct elf_symbol *) v2; - - if (e1->address < e2->address) - return -1; - else if (e1->address > e2->address) - return 1; - else - return 0; -} - -/* Compare an ADDR against an elf_symbol for bsearch. We allocate one - extra entry in the array so that this can look safely at the next - entry. */ - -static int -elf_symbol_search (const void *vkey, const void *ventry) -{ - const uintptr_t *key = (const uintptr_t *) vkey; - const struct elf_symbol *entry = (const struct elf_symbol *) ventry; - uintptr_t addr; - - addr = *key; - if (addr < entry->address) - return -1; - else if (addr >= entry->address + entry->size) - return 1; - else - return 0; -} - -/* Initialize the symbol table info for elf_syminfo. */ - -static int -elf_initialize_syminfo (struct backtrace_state *state, - struct libbacktrace_base_address base_address, - const unsigned char *symtab_data, size_t symtab_size, - const unsigned char *strtab, size_t strtab_size, - backtrace_error_callback error_callback, - void *data, struct elf_syminfo_data *sdata, - struct elf_ppc64_opd_data *opd) -{ - size_t sym_count; - const b_elf_sym *sym; - size_t elf_symbol_count; - size_t elf_symbol_size; - struct elf_symbol *elf_symbols; - size_t i; - unsigned int j; - - sym_count = symtab_size / sizeof (b_elf_sym); - - /* We only care about function symbols. Count them. */ - sym = (const b_elf_sym *) symtab_data; - elf_symbol_count = 0; - for (i = 0; i < sym_count; ++i, ++sym) - { - int info; - - info = sym->st_info & 0xf; - if ((info == STT_FUNC || info == STT_OBJECT) - && sym->st_shndx != SHN_UNDEF) - ++elf_symbol_count; - } - - elf_symbol_size = elf_symbol_count * sizeof (struct elf_symbol); - elf_symbols = ((struct elf_symbol *) - backtrace_alloc (state, elf_symbol_size, error_callback, - data)); - if (elf_symbols == NULL) - return 0; - - sym = (const b_elf_sym *) symtab_data; - j = 0; - for (i = 0; i < sym_count; ++i, ++sym) - { - int info; - - info = sym->st_info & 0xf; - if (info != STT_FUNC && info != STT_OBJECT) - continue; - if (sym->st_shndx == SHN_UNDEF) - continue; - if (sym->st_name >= strtab_size) - { - error_callback (data, "symbol string index out of range", 0); - backtrace_free (state, elf_symbols, elf_symbol_size, error_callback, - data); - return 0; - } - elf_symbols[j].name = (const char *) strtab + sym->st_name; - /* Special case PowerPC64 ELFv1 symbols in .opd section, if the symbol - is a function descriptor, read the actual code address from the - descriptor. */ - if (opd - && sym->st_value >= opd->addr - && sym->st_value < opd->addr + opd->size) - elf_symbols[j].address - = *(const b_elf_addr *) (opd->data + (sym->st_value - opd->addr)); - else - elf_symbols[j].address = sym->st_value; - elf_symbols[j].address = - libbacktrace_add_base (elf_symbols[j].address, base_address); - elf_symbols[j].size = sym->st_size; - ++j; - } - - backtrace_qsort (elf_symbols, elf_symbol_count, sizeof (struct elf_symbol), - elf_symbol_compare); - - sdata->next = NULL; - sdata->symbols = elf_symbols; - sdata->count = elf_symbol_count; - - return 1; -} - -/* Add EDATA to the list in STATE. */ - -static void -elf_add_syminfo_data (struct backtrace_state *state, - struct elf_syminfo_data *edata) -{ - if (!state->threaded) - { - struct elf_syminfo_data **pp; - - for (pp = (struct elf_syminfo_data **) (void *) &state->syminfo_data; - *pp != NULL; - pp = &(*pp)->next) - ; - *pp = edata; - } - else - { - while (1) - { - struct elf_syminfo_data **pp; - - pp = (struct elf_syminfo_data **) (void *) &state->syminfo_data; - - while (1) - { - struct elf_syminfo_data *p; - - p = backtrace_atomic_load_pointer (pp); - - if (p == NULL) - break; - - pp = &p->next; - } - - if (__sync_bool_compare_and_swap (pp, NULL, edata)) - break; - } - } -} - -/* Return the symbol name and value for an ADDR. */ - -static void -elf_syminfo (struct backtrace_state *state, uintptr_t addr, - backtrace_syminfo_callback callback, - backtrace_error_callback error_callback ATTRIBUTE_UNUSED, - void *data) -{ - struct elf_syminfo_data *edata; - struct elf_symbol *sym = NULL; - - if (!state->threaded) - { - for (edata = (struct elf_syminfo_data *) state->syminfo_data; - edata != NULL; - edata = edata->next) - { - sym = ((struct elf_symbol *) - bsearch (&addr, edata->symbols, edata->count, - sizeof (struct elf_symbol), elf_symbol_search)); - if (sym != NULL) - break; - } - } - else - { - struct elf_syminfo_data **pp; - - pp = (struct elf_syminfo_data **) (void *) &state->syminfo_data; - while (1) - { - edata = backtrace_atomic_load_pointer (pp); - if (edata == NULL) - break; - - sym = ((struct elf_symbol *) - bsearch (&addr, edata->symbols, edata->count, - sizeof (struct elf_symbol), elf_symbol_search)); - if (sym != NULL) - break; - - pp = &edata->next; - } - } - - if (sym == NULL) - callback (data, addr, NULL, 0, 0); - else - callback (data, addr, sym->name, sym->address, sym->size); -} - -/* Return whether FILENAME is a symlink. */ - -static int -elf_is_symlink (const char *filename) -{ - struct stat st; - - if (lstat (filename, &st) < 0) - return 0; - return S_ISLNK (st.st_mode); -} - -/* Return the results of reading the symlink FILENAME in a buffer - allocated by backtrace_alloc. Return the length of the buffer in - *LEN. */ - -static char * -elf_readlink (struct backtrace_state *state, const char *filename, - backtrace_error_callback error_callback, void *data, - size_t *plen) -{ - size_t len; - char *buf; - - len = 128; - while (1) - { - ssize_t rl; - - buf = (char*)backtrace_alloc (state, len, error_callback, data); - if (buf == NULL) - return NULL; - rl = readlink (filename, buf, len); - if (rl < 0) - { - backtrace_free (state, buf, len, error_callback, data); - return NULL; - } - if ((size_t) rl < len - 1) - { - buf[rl] = '\0'; - *plen = len; - return buf; - } - backtrace_free (state, buf, len, error_callback, data); - len *= 2; - } -} - -#define SYSTEM_BUILD_ID_DIR "/usr/lib/debug/.build-id/" - -/* Open a separate debug info file, using the build ID to find it. - Returns an open file descriptor, or -1. - - The GDB manual says that the only place gdb looks for a debug file - when the build ID is known is in /usr/lib/debug/.build-id. */ - -static int -elf_open_debugfile_by_buildid (struct backtrace_state *state, - const char *buildid_data, size_t buildid_size, - const char *filename, - backtrace_error_callback error_callback, - void *data) -{ - const char * const prefix = SYSTEM_BUILD_ID_DIR; - const size_t prefix_len = strlen (prefix); - const char * const suffix = ".debug"; - const size_t suffix_len = strlen (suffix); - size_t len; - char *bd_filename; - char *t; - size_t i; - int ret; - int does_not_exist; - - len = prefix_len + buildid_size * 2 + suffix_len + 2; - bd_filename = (char*)backtrace_alloc (state, len, error_callback, data); - if (bd_filename == NULL) - return -1; - - t = bd_filename; - memcpy (t, prefix, prefix_len); - t += prefix_len; - for (i = 0; i < buildid_size; i++) - { - unsigned char b; - unsigned char nib; - - b = (unsigned char) buildid_data[i]; - nib = (b & 0xf0) >> 4; - *t++ = nib < 10 ? '0' + nib : 'a' + nib - 10; - nib = b & 0x0f; - *t++ = nib < 10 ? '0' + nib : 'a' + nib - 10; - if (i == 0) - *t++ = '/'; - } - memcpy (t, suffix, suffix_len); - t[suffix_len] = '\0'; - - ret = backtrace_open (bd_filename, error_callback, data, &does_not_exist); - - backtrace_free (state, bd_filename, len, error_callback, data); - - /* gdb checks that the debuginfo file has the same build ID note. - That seems kind of pointless to me--why would it have the right - name but not the right build ID?--so skipping the check. */ - -#ifdef TRACY_DEBUGINFOD - if (ret == -1) - return GetDebugInfoDescriptor( buildid_data, buildid_size, filename ); - else - return ret; -#else - return ret; -#endif -} - -/* Try to open a file whose name is PREFIX (length PREFIX_LEN) - concatenated with PREFIX2 (length PREFIX2_LEN) concatenated with - DEBUGLINK_NAME. Returns an open file descriptor, or -1. */ - -static int -elf_try_debugfile (struct backtrace_state *state, const char *prefix, - size_t prefix_len, const char *prefix2, size_t prefix2_len, - const char *debuglink_name, - backtrace_error_callback error_callback, void *data) -{ - size_t debuglink_len; - size_t try_len; - char *Try; - int does_not_exist; - int ret; - - debuglink_len = strlen (debuglink_name); - try_len = prefix_len + prefix2_len + debuglink_len + 1; - Try = (char*)backtrace_alloc (state, try_len, error_callback, data); - if (Try == NULL) - return -1; - - memcpy (Try, prefix, prefix_len); - memcpy (Try + prefix_len, prefix2, prefix2_len); - memcpy (Try + prefix_len + prefix2_len, debuglink_name, debuglink_len); - Try[prefix_len + prefix2_len + debuglink_len] = '\0'; - - ret = backtrace_open (Try, error_callback, data, &does_not_exist); - - backtrace_free (state, Try, try_len, error_callback, data); - - return ret; -} - -/* Find a separate debug info file, using the debuglink section data - to find it. Returns an open file descriptor, or -1. */ - -static int -elf_find_debugfile_by_debuglink (struct backtrace_state *state, - const char *filename, - const char *debuglink_name, - backtrace_error_callback error_callback, - void *data) -{ - int ret; - char *alc; - size_t alc_len; - const char *slash; - int ddescriptor; - const char *prefix; - size_t prefix_len; - - /* Resolve symlinks in FILENAME. Since FILENAME is fairly likely to - be /proc/self/exe, symlinks are common. We don't try to resolve - the whole path name, just the base name. */ - ret = -1; - alc = NULL; - alc_len = 0; - while (elf_is_symlink (filename)) - { - char *new_buf; - size_t new_len; - - new_buf = elf_readlink (state, filename, error_callback, data, &new_len); - if (new_buf == NULL) - break; - - if (new_buf[0] == '/') - filename = new_buf; - else - { - slash = strrchr (filename, '/'); - if (slash == NULL) - filename = new_buf; - else - { - size_t clen; - char *c; - - slash++; - clen = slash - filename + strlen (new_buf) + 1; - c = (char*)backtrace_alloc (state, clen, error_callback, data); - if (c == NULL) - goto done; - - memcpy (c, filename, slash - filename); - memcpy (c + (slash - filename), new_buf, strlen (new_buf)); - c[slash - filename + strlen (new_buf)] = '\0'; - backtrace_free (state, new_buf, new_len, error_callback, data); - filename = c; - new_buf = c; - new_len = clen; - } - } - - if (alc != NULL) - backtrace_free (state, alc, alc_len, error_callback, data); - alc = new_buf; - alc_len = new_len; - } - - /* Look for DEBUGLINK_NAME in the same directory as FILENAME. */ - - slash = strrchr (filename, '/'); - if (slash == NULL) - { - prefix = ""; - prefix_len = 0; - } - else - { - slash++; - prefix = filename; - prefix_len = slash - filename; - } - - ddescriptor = elf_try_debugfile (state, prefix, prefix_len, "", 0, - debuglink_name, error_callback, data); - if (ddescriptor >= 0) - { - ret = ddescriptor; - goto done; - } - - /* Look for DEBUGLINK_NAME in a .debug subdirectory of FILENAME. */ - - ddescriptor = elf_try_debugfile (state, prefix, prefix_len, ".debug/", - strlen (".debug/"), debuglink_name, - error_callback, data); - if (ddescriptor >= 0) - { - ret = ddescriptor; - goto done; - } - - /* Look for DEBUGLINK_NAME in /usr/lib/debug. */ - - ddescriptor = elf_try_debugfile (state, "/usr/lib/debug/", - strlen ("/usr/lib/debug/"), prefix, - prefix_len, debuglink_name, - error_callback, data); - if (ddescriptor >= 0) - ret = ddescriptor; - - done: - if (alc != NULL && alc_len > 0) - backtrace_free (state, alc, alc_len, error_callback, data); - return ret; -} - -/* Open a separate debug info file, using the debuglink section data - to find it. Returns an open file descriptor, or -1. */ - -static int -elf_open_debugfile_by_debuglink (struct backtrace_state *state, - const char *filename, - const char *debuglink_name, - uint32_t debuglink_crc, - backtrace_error_callback error_callback, - void *data) -{ - int ddescriptor; - - ddescriptor = elf_find_debugfile_by_debuglink (state, filename, - debuglink_name, - error_callback, data); - if (ddescriptor < 0) - return -1; - - if (debuglink_crc != 0) - { - uint32_t got_crc; - - got_crc = elf_crc32_file (state, ddescriptor, error_callback, data); - if (got_crc != debuglink_crc) - { - backtrace_close (ddescriptor, error_callback, data); - return -1; - } - } - - return ddescriptor; -} - -/* A function useful for setting a breakpoint for an inflation failure - when this code is compiled with -g. */ - -static void -elf_uncompress_failed(void) -{ -} - -/* *PVAL is the current value being read from the stream, and *PBITS - is the number of valid bits. Ensure that *PVAL holds at least 15 - bits by reading additional bits from *PPIN, up to PINEND, as - needed. Updates *PPIN, *PVAL and *PBITS. Returns 1 on success, 0 - on error. */ - -static int -elf_fetch_bits (const unsigned char **ppin, const unsigned char *pinend, - uint64_t *pval, unsigned int *pbits) -{ - unsigned int bits; - const unsigned char *pin; - uint64_t val; - uint32_t next; - - bits = *pbits; - if (bits >= 15) - return 1; - pin = *ppin; - val = *pval; - - if (unlikely (pinend - pin < 4)) - { - elf_uncompress_failed (); - return 0; - } - -#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) \ - && defined(__ORDER_BIG_ENDIAN__) \ - && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ \ - || __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) - /* We've ensured that PIN is aligned. */ - next = *(const uint32_t *)pin; - -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - next = __builtin_bswap32 (next); -#endif -#else - next = pin[0] | (pin[1] << 8) | (pin[2] << 16) | (pin[3] << 24); -#endif - - val |= (uint64_t)next << bits; - bits += 32; - pin += 4; - - /* We will need the next four bytes soon. */ - __builtin_prefetch (pin, 0, 0); - - *ppin = pin; - *pval = val; - *pbits = bits; - return 1; -} - -/* This is like elf_fetch_bits, but it fetchs the bits backward, and ensures at - least 16 bits. This is for zstd. */ - -static int -elf_fetch_bits_backward (const unsigned char **ppin, - const unsigned char *pinend, - uint64_t *pval, unsigned int *pbits) -{ - unsigned int bits; - const unsigned char *pin; - uint64_t val; - uint32_t next; - - bits = *pbits; - if (bits >= 16) - return 1; - pin = *ppin; - val = *pval; - - if (unlikely (pin <= pinend)) - return 1; - - pin -= 4; - -#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) \ - && defined(__ORDER_BIG_ENDIAN__) \ - && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ \ - || __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) - /* We've ensured that PIN is aligned. */ - next = *(const uint32_t *)pin; - -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - next = __builtin_bswap32 (next); -#endif -#else - next = pin[0] | (pin[1] << 8) | (pin[2] << 16) | (pin[3] << 24); -#endif - - val <<= 32; - val |= next; - bits += 32; - - if (unlikely (pin < pinend)) - { - val >>= (pinend - pin) * 8; - bits -= (pinend - pin) * 8; - } - - *ppin = pin; - *pval = val; - *pbits = bits; - return 1; -} - -/* Initialize backward fetching when the bitstream starts with a 1 bit in the - last byte in memory (which is the first one that we read). This is used by - zstd decompression. Returns 1 on success, 0 on error. */ - -static int -elf_fetch_backward_init (const unsigned char **ppin, - const unsigned char *pinend, - uint64_t *pval, unsigned int *pbits) -{ - const unsigned char *pin; - unsigned int stream_start; - uint64_t val; - unsigned int bits; - - pin = *ppin; - stream_start = (unsigned int)*pin; - if (unlikely (stream_start == 0)) - { - elf_uncompress_failed (); - return 0; - } - val = 0; - bits = 0; - - /* Align to a 32-bit boundary. */ - while ((((uintptr_t)pin) & 3) != 0) - { - val <<= 8; - val |= (uint64_t)*pin; - bits += 8; - --pin; - } - - val <<= 8; - val |= (uint64_t)*pin; - bits += 8; - - *ppin = pin; - *pval = val; - *pbits = bits; - if (!elf_fetch_bits_backward (ppin, pinend, pval, pbits)) - return 0; - - *pbits -= __builtin_clz (stream_start) - (sizeof (unsigned int) - 1) * 8 + 1; - - if (!elf_fetch_bits_backward (ppin, pinend, pval, pbits)) - return 0; - - return 1; -} - -/* Huffman code tables, like the rest of the zlib format, are defined - by RFC 1951. We store a Huffman code table as a series of tables - stored sequentially in memory. Each entry in a table is 16 bits. - The first, main, table has 256 entries. It is followed by a set of - secondary tables of length 2 to 128 entries. The maximum length of - a code sequence in the deflate format is 15 bits, so that is all we - need. Each secondary table has an index, which is the offset of - the table in the overall memory storage. - - The deflate format says that all codes of a given bit length are - lexicographically consecutive. Perhaps we could have 130 values - that require a 15-bit code, perhaps requiring three secondary - tables of size 128. I don't know if this is actually possible, but - it suggests that the maximum size required for secondary tables is - 3 * 128 + 3 * 64 ... == 768. The zlib enough program reports 660 - as the maximum. We permit 768, since in addition to the 256 for - the primary table, with two bytes per entry, and with the two - tables we need, that gives us a page. - - A single table entry needs to store a value or (for the main table - only) the index and size of a secondary table. Values range from 0 - to 285, inclusive. Secondary table indexes, per above, range from - 0 to 510. For a value we need to store the number of bits we need - to determine that value (one value may appear multiple times in the - table), which is 1 to 8. For a secondary table we need to store - the number of bits used to index into the table, which is 1 to 7. - And of course we need 1 bit to decide whether we have a value or a - secondary table index. So each entry needs 9 bits for value/table - index, 3 bits for size, 1 bit what it is. For simplicity we use 16 - bits per entry. */ - -/* Number of entries we allocate to for one code table. We get a page - for the two code tables we need. */ - -#define ZLIB_HUFFMAN_TABLE_SIZE (1024) - -/* Bit masks and shifts for the values in the table. */ - -#define ZLIB_HUFFMAN_VALUE_MASK 0x01ff -#define ZLIB_HUFFMAN_BITS_SHIFT 9 -#define ZLIB_HUFFMAN_BITS_MASK 0x7 -#define ZLIB_HUFFMAN_SECONDARY_SHIFT 12 - -/* For working memory while inflating we need two code tables, we need - an array of code lengths (max value 15, so we use unsigned char), - and an array of unsigned shorts used while building a table. The - latter two arrays must be large enough to hold the maximum number - of code lengths, which RFC 1951 defines as 286 + 30. */ - -#define ZLIB_TABLE_SIZE \ - (2 * ZLIB_HUFFMAN_TABLE_SIZE * sizeof (uint16_t) \ - + (286 + 30) * sizeof (uint16_t) \ - + (286 + 30) * sizeof (unsigned char)) - -#define ZLIB_TABLE_CODELEN_OFFSET \ - (2 * ZLIB_HUFFMAN_TABLE_SIZE * sizeof (uint16_t) \ - + (286 + 30) * sizeof (uint16_t)) - -#define ZLIB_TABLE_WORK_OFFSET \ - (2 * ZLIB_HUFFMAN_TABLE_SIZE * sizeof (uint16_t)) - -#ifdef BACKTRACE_GENERATE_FIXED_HUFFMAN_TABLE - -/* Used by the main function that generates the fixed table to learn - the table size. */ -static size_t final_next_secondary; - -#endif - -/* Build a Huffman code table from an array of lengths in CODES of - length CODES_LEN. The table is stored into *TABLE. ZDEBUG_TABLE - is the same as for elf_zlib_inflate, used to find some work space. - Returns 1 on success, 0 on error. */ - -static int -elf_zlib_inflate_table (unsigned char *codes, size_t codes_len, - uint16_t *zdebug_table, uint16_t *table) -{ - uint16_t count[16]; - uint16_t start[16]; - uint16_t prev[16]; - uint16_t firstcode[7]; - uint16_t *next; - size_t i; - size_t j; - unsigned int code; - size_t next_secondary; - - /* Count the number of code of each length. Set NEXT[val] to be the - next value after VAL with the same bit length. */ - - next = (uint16_t *) (((unsigned char *) zdebug_table) - + ZLIB_TABLE_WORK_OFFSET); - - memset (&count[0], 0, 16 * sizeof (uint16_t)); - for (i = 0; i < codes_len; ++i) - { - if (unlikely (codes[i] >= 16)) - { - elf_uncompress_failed (); - return 0; - } - - if (count[codes[i]] == 0) - { - start[codes[i]] = i; - prev[codes[i]] = i; - } - else - { - next[prev[codes[i]]] = i; - prev[codes[i]] = i; - } - - ++count[codes[i]]; - } - - /* For each length, fill in the table for the codes of that - length. */ - - memset (table, 0, ZLIB_HUFFMAN_TABLE_SIZE * sizeof (uint16_t)); - - /* Handle the values that do not require a secondary table. */ - - code = 0; - for (j = 1; j <= 8; ++j) - { - unsigned int jcnt; - unsigned int val; - - jcnt = count[j]; - if (jcnt == 0) - continue; - - if (unlikely (jcnt > (1U << j))) - { - elf_uncompress_failed (); - return 0; - } - - /* There are JCNT values that have this length, the values - starting from START[j] continuing through NEXT[VAL]. Those - values are assigned consecutive values starting at CODE. */ - - val = start[j]; - for (i = 0; i < jcnt; ++i) - { - uint16_t tval; - size_t ind; - unsigned int incr; - - /* In the compressed bit stream, the value VAL is encoded as - J bits with the value C. */ - - if (unlikely ((val & ~ZLIB_HUFFMAN_VALUE_MASK) != 0)) - { - elf_uncompress_failed (); - return 0; - } - - tval = val | ((j - 1) << ZLIB_HUFFMAN_BITS_SHIFT); - - /* The table lookup uses 8 bits. If J is less than 8, we - don't know what the other bits will be. We need to fill - in all possibilities in the table. Since the Huffman - code is unambiguous, those entries can't be used for any - other code. */ - - for (ind = code; ind < 0x100; ind += 1 << j) - { - if (unlikely (table[ind] != 0)) - { - elf_uncompress_failed (); - return 0; - } - table[ind] = tval; - } - - /* Advance to the next value with this length. */ - if (i + 1 < jcnt) - val = next[val]; - - /* The Huffman codes are stored in the bitstream with the - most significant bit first, as is required to make them - unambiguous. The effect is that when we read them from - the bitstream we see the bit sequence in reverse order: - the most significant bit of the Huffman code is the least - significant bit of the value we read from the bitstream. - That means that to make our table lookups work, we need - to reverse the bits of CODE. Since reversing bits is - tedious and in general requires using a table, we instead - increment CODE in reverse order. That is, if the number - of bits we are currently using, here named J, is 3, we - count as 000, 100, 010, 110, 001, 101, 011, 111, which is - to say the numbers from 0 to 7 but with the bits - reversed. Going to more bits, aka incrementing J, - effectively just adds more zero bits as the beginning, - and as such does not change the numeric value of CODE. - - To increment CODE of length J in reverse order, find the - most significant zero bit and set it to one while - clearing all higher bits. In other words, add 1 modulo - 2^J, only reversed. */ - - incr = 1U << (j - 1); - while ((code & incr) != 0) - incr >>= 1; - if (incr == 0) - code = 0; - else - { - code &= incr - 1; - code += incr; - } - } - } - - /* Handle the values that require a secondary table. */ - - /* Set FIRSTCODE, the number at which the codes start, for each - length. */ - - for (j = 9; j < 16; j++) - { - unsigned int jcnt; - unsigned int k; - - jcnt = count[j]; - if (jcnt == 0) - continue; - - /* There are JCNT values that have this length, the values - starting from START[j]. Those values are assigned - consecutive values starting at CODE. */ - - firstcode[j - 9] = code; - - /* Reverse add JCNT to CODE modulo 2^J. */ - for (k = 0; k < j; ++k) - { - if ((jcnt & (1U << k)) != 0) - { - unsigned int m; - unsigned int bit; - - bit = 1U << (j - k - 1); - for (m = 0; m < j - k; ++m, bit >>= 1) - { - if ((code & bit) == 0) - { - code += bit; - break; - } - code &= ~bit; - } - jcnt &= ~(1U << k); - } - } - if (unlikely (jcnt != 0)) - { - elf_uncompress_failed (); - return 0; - } - } - - /* For J from 9 to 15, inclusive, we store COUNT[J] consecutive - values starting at START[J] with consecutive codes starting at - FIRSTCODE[J - 9]. In the primary table we need to point to the - secondary table, and the secondary table will be indexed by J - 9 - bits. We count down from 15 so that we install the larger - secondary tables first, as the smaller ones may be embedded in - the larger ones. */ - - next_secondary = 0; /* Index of next secondary table (after primary). */ - for (j = 15; j >= 9; j--) - { - unsigned int jcnt; - unsigned int val; - size_t primary; /* Current primary index. */ - size_t secondary; /* Offset to current secondary table. */ - size_t secondary_bits; /* Bit size of current secondary table. */ - - jcnt = count[j]; - if (jcnt == 0) - continue; - - val = start[j]; - code = firstcode[j - 9]; - primary = 0x100; - secondary = 0; - secondary_bits = 0; - for (i = 0; i < jcnt; ++i) - { - uint16_t tval; - size_t ind; - unsigned int incr; - - if ((code & 0xff) != primary) - { - uint16_t tprimary; - - /* Fill in a new primary table entry. */ - - primary = code & 0xff; - - tprimary = table[primary]; - if (tprimary == 0) - { - /* Start a new secondary table. */ - - if (unlikely ((next_secondary & ZLIB_HUFFMAN_VALUE_MASK) - != next_secondary)) - { - elf_uncompress_failed (); - return 0; - } - - secondary = next_secondary; - secondary_bits = j - 8; - next_secondary += 1 << secondary_bits; - table[primary] = (secondary - + ((j - 8) << ZLIB_HUFFMAN_BITS_SHIFT) - + (1U << ZLIB_HUFFMAN_SECONDARY_SHIFT)); - } - else - { - /* There is an existing entry. It had better be a - secondary table with enough bits. */ - if (unlikely ((tprimary - & (1U << ZLIB_HUFFMAN_SECONDARY_SHIFT)) - == 0)) - { - elf_uncompress_failed (); - return 0; - } - secondary = tprimary & ZLIB_HUFFMAN_VALUE_MASK; - secondary_bits = ((tprimary >> ZLIB_HUFFMAN_BITS_SHIFT) - & ZLIB_HUFFMAN_BITS_MASK); - if (unlikely (secondary_bits < j - 8)) - { - elf_uncompress_failed (); - return 0; - } - } - } - - /* Fill in secondary table entries. */ - - tval = val | ((j - 8) << ZLIB_HUFFMAN_BITS_SHIFT); - - for (ind = code >> 8; - ind < (1U << secondary_bits); - ind += 1U << (j - 8)) - { - if (unlikely (table[secondary + 0x100 + ind] != 0)) - { - elf_uncompress_failed (); - return 0; - } - table[secondary + 0x100 + ind] = tval; - } - - if (i + 1 < jcnt) - val = next[val]; - - incr = 1U << (j - 1); - while ((code & incr) != 0) - incr >>= 1; - if (incr == 0) - code = 0; - else - { - code &= incr - 1; - code += incr; - } - } - } - -#ifdef BACKTRACE_GENERATE_FIXED_HUFFMAN_TABLE - final_next_secondary = next_secondary; -#endif - - return 1; -} - -#ifdef BACKTRACE_GENERATE_FIXED_HUFFMAN_TABLE - -/* Used to generate the fixed Huffman table for block type 1. */ - -#include - -static uint16_t table[ZLIB_TABLE_SIZE]; -static unsigned char codes[288]; - -int -main () -{ - size_t i; - - for (i = 0; i <= 143; ++i) - codes[i] = 8; - for (i = 144; i <= 255; ++i) - codes[i] = 9; - for (i = 256; i <= 279; ++i) - codes[i] = 7; - for (i = 280; i <= 287; ++i) - codes[i] = 8; - if (!elf_zlib_inflate_table (&codes[0], 288, &table[0], &table[0])) - { - fprintf (stderr, "elf_zlib_inflate_table failed\n"); - exit (EXIT_FAILURE); - } - - printf ("static const uint16_t elf_zlib_default_table[%#zx] =\n", - final_next_secondary + 0x100); - printf ("{\n"); - for (i = 0; i < final_next_secondary + 0x100; i += 8) - { - size_t j; - - printf (" "); - for (j = i; j < final_next_secondary + 0x100 && j < i + 8; ++j) - printf (" %#x,", table[j]); - printf ("\n"); - } - printf ("};\n"); - printf ("\n"); - - for (i = 0; i < 32; ++i) - codes[i] = 5; - if (!elf_zlib_inflate_table (&codes[0], 32, &table[0], &table[0])) - { - fprintf (stderr, "elf_zlib_inflate_table failed\n"); - exit (EXIT_FAILURE); - } - - printf ("static const uint16_t elf_zlib_default_dist_table[%#zx] =\n", - final_next_secondary + 0x100); - printf ("{\n"); - for (i = 0; i < final_next_secondary + 0x100; i += 8) - { - size_t j; - - printf (" "); - for (j = i; j < final_next_secondary + 0x100 && j < i + 8; ++j) - printf (" %#x,", table[j]); - printf ("\n"); - } - printf ("};\n"); - - return 0; -} - -#endif - -/* The fixed tables generated by the #ifdef'ed out main function - above. */ - -static const uint16_t elf_zlib_default_table[0x170] = -{ - 0xd00, 0xe50, 0xe10, 0xf18, 0xd10, 0xe70, 0xe30, 0x1230, - 0xd08, 0xe60, 0xe20, 0x1210, 0xe00, 0xe80, 0xe40, 0x1250, - 0xd04, 0xe58, 0xe18, 0x1200, 0xd14, 0xe78, 0xe38, 0x1240, - 0xd0c, 0xe68, 0xe28, 0x1220, 0xe08, 0xe88, 0xe48, 0x1260, - 0xd02, 0xe54, 0xe14, 0xf1c, 0xd12, 0xe74, 0xe34, 0x1238, - 0xd0a, 0xe64, 0xe24, 0x1218, 0xe04, 0xe84, 0xe44, 0x1258, - 0xd06, 0xe5c, 0xe1c, 0x1208, 0xd16, 0xe7c, 0xe3c, 0x1248, - 0xd0e, 0xe6c, 0xe2c, 0x1228, 0xe0c, 0xe8c, 0xe4c, 0x1268, - 0xd01, 0xe52, 0xe12, 0xf1a, 0xd11, 0xe72, 0xe32, 0x1234, - 0xd09, 0xe62, 0xe22, 0x1214, 0xe02, 0xe82, 0xe42, 0x1254, - 0xd05, 0xe5a, 0xe1a, 0x1204, 0xd15, 0xe7a, 0xe3a, 0x1244, - 0xd0d, 0xe6a, 0xe2a, 0x1224, 0xe0a, 0xe8a, 0xe4a, 0x1264, - 0xd03, 0xe56, 0xe16, 0xf1e, 0xd13, 0xe76, 0xe36, 0x123c, - 0xd0b, 0xe66, 0xe26, 0x121c, 0xe06, 0xe86, 0xe46, 0x125c, - 0xd07, 0xe5e, 0xe1e, 0x120c, 0xd17, 0xe7e, 0xe3e, 0x124c, - 0xd0f, 0xe6e, 0xe2e, 0x122c, 0xe0e, 0xe8e, 0xe4e, 0x126c, - 0xd00, 0xe51, 0xe11, 0xf19, 0xd10, 0xe71, 0xe31, 0x1232, - 0xd08, 0xe61, 0xe21, 0x1212, 0xe01, 0xe81, 0xe41, 0x1252, - 0xd04, 0xe59, 0xe19, 0x1202, 0xd14, 0xe79, 0xe39, 0x1242, - 0xd0c, 0xe69, 0xe29, 0x1222, 0xe09, 0xe89, 0xe49, 0x1262, - 0xd02, 0xe55, 0xe15, 0xf1d, 0xd12, 0xe75, 0xe35, 0x123a, - 0xd0a, 0xe65, 0xe25, 0x121a, 0xe05, 0xe85, 0xe45, 0x125a, - 0xd06, 0xe5d, 0xe1d, 0x120a, 0xd16, 0xe7d, 0xe3d, 0x124a, - 0xd0e, 0xe6d, 0xe2d, 0x122a, 0xe0d, 0xe8d, 0xe4d, 0x126a, - 0xd01, 0xe53, 0xe13, 0xf1b, 0xd11, 0xe73, 0xe33, 0x1236, - 0xd09, 0xe63, 0xe23, 0x1216, 0xe03, 0xe83, 0xe43, 0x1256, - 0xd05, 0xe5b, 0xe1b, 0x1206, 0xd15, 0xe7b, 0xe3b, 0x1246, - 0xd0d, 0xe6b, 0xe2b, 0x1226, 0xe0b, 0xe8b, 0xe4b, 0x1266, - 0xd03, 0xe57, 0xe17, 0xf1f, 0xd13, 0xe77, 0xe37, 0x123e, - 0xd0b, 0xe67, 0xe27, 0x121e, 0xe07, 0xe87, 0xe47, 0x125e, - 0xd07, 0xe5f, 0xe1f, 0x120e, 0xd17, 0xe7f, 0xe3f, 0x124e, - 0xd0f, 0xe6f, 0xe2f, 0x122e, 0xe0f, 0xe8f, 0xe4f, 0x126e, - 0x290, 0x291, 0x292, 0x293, 0x294, 0x295, 0x296, 0x297, - 0x298, 0x299, 0x29a, 0x29b, 0x29c, 0x29d, 0x29e, 0x29f, - 0x2a0, 0x2a1, 0x2a2, 0x2a3, 0x2a4, 0x2a5, 0x2a6, 0x2a7, - 0x2a8, 0x2a9, 0x2aa, 0x2ab, 0x2ac, 0x2ad, 0x2ae, 0x2af, - 0x2b0, 0x2b1, 0x2b2, 0x2b3, 0x2b4, 0x2b5, 0x2b6, 0x2b7, - 0x2b8, 0x2b9, 0x2ba, 0x2bb, 0x2bc, 0x2bd, 0x2be, 0x2bf, - 0x2c0, 0x2c1, 0x2c2, 0x2c3, 0x2c4, 0x2c5, 0x2c6, 0x2c7, - 0x2c8, 0x2c9, 0x2ca, 0x2cb, 0x2cc, 0x2cd, 0x2ce, 0x2cf, - 0x2d0, 0x2d1, 0x2d2, 0x2d3, 0x2d4, 0x2d5, 0x2d6, 0x2d7, - 0x2d8, 0x2d9, 0x2da, 0x2db, 0x2dc, 0x2dd, 0x2de, 0x2df, - 0x2e0, 0x2e1, 0x2e2, 0x2e3, 0x2e4, 0x2e5, 0x2e6, 0x2e7, - 0x2e8, 0x2e9, 0x2ea, 0x2eb, 0x2ec, 0x2ed, 0x2ee, 0x2ef, - 0x2f0, 0x2f1, 0x2f2, 0x2f3, 0x2f4, 0x2f5, 0x2f6, 0x2f7, - 0x2f8, 0x2f9, 0x2fa, 0x2fb, 0x2fc, 0x2fd, 0x2fe, 0x2ff, -}; - -static const uint16_t elf_zlib_default_dist_table[0x100] = -{ - 0x800, 0x810, 0x808, 0x818, 0x804, 0x814, 0x80c, 0x81c, - 0x802, 0x812, 0x80a, 0x81a, 0x806, 0x816, 0x80e, 0x81e, - 0x801, 0x811, 0x809, 0x819, 0x805, 0x815, 0x80d, 0x81d, - 0x803, 0x813, 0x80b, 0x81b, 0x807, 0x817, 0x80f, 0x81f, - 0x800, 0x810, 0x808, 0x818, 0x804, 0x814, 0x80c, 0x81c, - 0x802, 0x812, 0x80a, 0x81a, 0x806, 0x816, 0x80e, 0x81e, - 0x801, 0x811, 0x809, 0x819, 0x805, 0x815, 0x80d, 0x81d, - 0x803, 0x813, 0x80b, 0x81b, 0x807, 0x817, 0x80f, 0x81f, - 0x800, 0x810, 0x808, 0x818, 0x804, 0x814, 0x80c, 0x81c, - 0x802, 0x812, 0x80a, 0x81a, 0x806, 0x816, 0x80e, 0x81e, - 0x801, 0x811, 0x809, 0x819, 0x805, 0x815, 0x80d, 0x81d, - 0x803, 0x813, 0x80b, 0x81b, 0x807, 0x817, 0x80f, 0x81f, - 0x800, 0x810, 0x808, 0x818, 0x804, 0x814, 0x80c, 0x81c, - 0x802, 0x812, 0x80a, 0x81a, 0x806, 0x816, 0x80e, 0x81e, - 0x801, 0x811, 0x809, 0x819, 0x805, 0x815, 0x80d, 0x81d, - 0x803, 0x813, 0x80b, 0x81b, 0x807, 0x817, 0x80f, 0x81f, - 0x800, 0x810, 0x808, 0x818, 0x804, 0x814, 0x80c, 0x81c, - 0x802, 0x812, 0x80a, 0x81a, 0x806, 0x816, 0x80e, 0x81e, - 0x801, 0x811, 0x809, 0x819, 0x805, 0x815, 0x80d, 0x81d, - 0x803, 0x813, 0x80b, 0x81b, 0x807, 0x817, 0x80f, 0x81f, - 0x800, 0x810, 0x808, 0x818, 0x804, 0x814, 0x80c, 0x81c, - 0x802, 0x812, 0x80a, 0x81a, 0x806, 0x816, 0x80e, 0x81e, - 0x801, 0x811, 0x809, 0x819, 0x805, 0x815, 0x80d, 0x81d, - 0x803, 0x813, 0x80b, 0x81b, 0x807, 0x817, 0x80f, 0x81f, - 0x800, 0x810, 0x808, 0x818, 0x804, 0x814, 0x80c, 0x81c, - 0x802, 0x812, 0x80a, 0x81a, 0x806, 0x816, 0x80e, 0x81e, - 0x801, 0x811, 0x809, 0x819, 0x805, 0x815, 0x80d, 0x81d, - 0x803, 0x813, 0x80b, 0x81b, 0x807, 0x817, 0x80f, 0x81f, - 0x800, 0x810, 0x808, 0x818, 0x804, 0x814, 0x80c, 0x81c, - 0x802, 0x812, 0x80a, 0x81a, 0x806, 0x816, 0x80e, 0x81e, - 0x801, 0x811, 0x809, 0x819, 0x805, 0x815, 0x80d, 0x81d, - 0x803, 0x813, 0x80b, 0x81b, 0x807, 0x817, 0x80f, 0x81f, -}; - -/* Inflate a zlib stream from PIN/SIN to POUT/SOUT. Return 1 on - success, 0 on some error parsing the stream. */ - -static int -elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, - unsigned char *pout, size_t sout) -{ - unsigned char *porigout; - const unsigned char *pinend; - unsigned char *poutend; - - /* We can apparently see multiple zlib streams concatenated - together, so keep going as long as there is something to read. - The last 4 bytes are the checksum. */ - porigout = pout; - pinend = pin + sin; - poutend = pout + sout; - while ((pinend - pin) > 4) - { - uint64_t val; - unsigned int bits; - int last; - - /* Read the two byte zlib header. */ - - if (unlikely ((pin[0] & 0xf) != 8)) /* 8 is zlib encoding. */ - { - /* Unknown compression method. */ - elf_uncompress_failed (); - return 0; - } - if (unlikely ((pin[0] >> 4) > 7)) - { - /* Window size too large. Other than this check, we don't - care about the window size. */ - elf_uncompress_failed (); - return 0; - } - if (unlikely ((pin[1] & 0x20) != 0)) - { - /* Stream expects a predefined dictionary, but we have no - dictionary. */ - elf_uncompress_failed (); - return 0; - } - val = (pin[0] << 8) | pin[1]; - if (unlikely (val % 31 != 0)) - { - /* Header check failure. */ - elf_uncompress_failed (); - return 0; - } - pin += 2; - - /* Align PIN to a 32-bit boundary. */ - - val = 0; - bits = 0; - while ((((uintptr_t) pin) & 3) != 0) - { - val |= (uint64_t)*pin << bits; - bits += 8; - ++pin; - } - - /* Read blocks until one is marked last. */ - - last = 0; - - while (!last) - { - unsigned int type; - const uint16_t *tlit; - const uint16_t *tdist; - - if (!elf_fetch_bits (&pin, pinend, &val, &bits)) - return 0; - - last = val & 1; - type = (val >> 1) & 3; - val >>= 3; - bits -= 3; - - if (unlikely (type == 3)) - { - /* Invalid block type. */ - elf_uncompress_failed (); - return 0; - } - - if (type == 0) - { - uint16_t len; - uint16_t lenc; - - /* An uncompressed block. */ - - /* If we've read ahead more than a byte, back up. */ - while (bits >= 8) - { - --pin; - bits -= 8; - } - - val = 0; - bits = 0; - if (unlikely ((pinend - pin) < 4)) - { - /* Missing length. */ - elf_uncompress_failed (); - return 0; - } - len = pin[0] | (pin[1] << 8); - lenc = pin[2] | (pin[3] << 8); - pin += 4; - lenc = ~lenc; - if (unlikely (len != lenc)) - { - /* Corrupt data. */ - elf_uncompress_failed (); - return 0; - } - if (unlikely (len > (unsigned int) (pinend - pin) - || len > (unsigned int) (poutend - pout))) - { - /* Not enough space in buffers. */ - elf_uncompress_failed (); - return 0; - } - memcpy (pout, pin, len); - pout += len; - pin += len; - - /* Align PIN. */ - while ((((uintptr_t) pin) & 3) != 0) - { - val |= (uint64_t)*pin << bits; - bits += 8; - ++pin; - } - - /* Go around to read the next block. */ - continue; - } - - if (type == 1) - { - tlit = elf_zlib_default_table; - tdist = elf_zlib_default_dist_table; - } - else - { - unsigned int nlit; - unsigned int ndist; - unsigned int nclen; - unsigned char codebits[19]; - unsigned char *plenbase; - unsigned char *plen; - unsigned char *plenend; - - /* Read a Huffman encoding table. The various magic - numbers here are from RFC 1951. */ - - if (!elf_fetch_bits (&pin, pinend, &val, &bits)) - return 0; - - nlit = (val & 0x1f) + 257; - val >>= 5; - ndist = (val & 0x1f) + 1; - val >>= 5; - nclen = (val & 0xf) + 4; - val >>= 4; - bits -= 14; - if (unlikely (nlit > 286 || ndist > 30)) - { - /* Values out of range. */ - elf_uncompress_failed (); - return 0; - } - - /* Read and build the table used to compress the - literal, length, and distance codes. */ - - memset(&codebits[0], 0, 19); - - /* There are always at least 4 elements in the - table. */ - - if (!elf_fetch_bits (&pin, pinend, &val, &bits)) - return 0; - - codebits[16] = val & 7; - codebits[17] = (val >> 3) & 7; - codebits[18] = (val >> 6) & 7; - codebits[0] = (val >> 9) & 7; - val >>= 12; - bits -= 12; - - if (nclen == 4) - goto codebitsdone; - - codebits[8] = val & 7; - val >>= 3; - bits -= 3; - - if (nclen == 5) - goto codebitsdone; - - if (!elf_fetch_bits (&pin, pinend, &val, &bits)) - return 0; - - codebits[7] = val & 7; - val >>= 3; - bits -= 3; - - if (nclen == 6) - goto codebitsdone; - - codebits[9] = val & 7; - val >>= 3; - bits -= 3; - - if (nclen == 7) - goto codebitsdone; - - codebits[6] = val & 7; - val >>= 3; - bits -= 3; - - if (nclen == 8) - goto codebitsdone; - - codebits[10] = val & 7; - val >>= 3; - bits -= 3; - - if (nclen == 9) - goto codebitsdone; - - codebits[5] = val & 7; - val >>= 3; - bits -= 3; - - if (nclen == 10) - goto codebitsdone; - - if (!elf_fetch_bits (&pin, pinend, &val, &bits)) - return 0; - - codebits[11] = val & 7; - val >>= 3; - bits -= 3; - - if (nclen == 11) - goto codebitsdone; - - codebits[4] = val & 7; - val >>= 3; - bits -= 3; - - if (nclen == 12) - goto codebitsdone; - - codebits[12] = val & 7; - val >>= 3; - bits -= 3; - - if (nclen == 13) - goto codebitsdone; - - codebits[3] = val & 7; - val >>= 3; - bits -= 3; - - if (nclen == 14) - goto codebitsdone; - - codebits[13] = val & 7; - val >>= 3; - bits -= 3; - - if (nclen == 15) - goto codebitsdone; - - if (!elf_fetch_bits (&pin, pinend, &val, &bits)) - return 0; - - codebits[2] = val & 7; - val >>= 3; - bits -= 3; - - if (nclen == 16) - goto codebitsdone; - - codebits[14] = val & 7; - val >>= 3; - bits -= 3; - - if (nclen == 17) - goto codebitsdone; - - codebits[1] = val & 7; - val >>= 3; - bits -= 3; - - if (nclen == 18) - goto codebitsdone; - - codebits[15] = val & 7; - val >>= 3; - bits -= 3; - - codebitsdone: - - if (!elf_zlib_inflate_table (codebits, 19, zdebug_table, - zdebug_table)) - return 0; - - /* Read the compressed bit lengths of the literal, - length, and distance codes. We have allocated space - at the end of zdebug_table to hold them. */ - - plenbase = (((unsigned char *) zdebug_table) - + ZLIB_TABLE_CODELEN_OFFSET); - plen = plenbase; - plenend = plen + nlit + ndist; - while (plen < plenend) - { - uint16_t t; - unsigned int b; - uint16_t v; - - if (!elf_fetch_bits (&pin, pinend, &val, &bits)) - return 0; - - t = zdebug_table[val & 0xff]; - - /* The compression here uses bit lengths up to 7, so - a secondary table is never necessary. */ - if (unlikely ((t & (1U << ZLIB_HUFFMAN_SECONDARY_SHIFT)) - != 0)) - { - elf_uncompress_failed (); - return 0; - } - - b = (t >> ZLIB_HUFFMAN_BITS_SHIFT) & ZLIB_HUFFMAN_BITS_MASK; - val >>= b + 1; - bits -= b + 1; - - v = t & ZLIB_HUFFMAN_VALUE_MASK; - if (v < 16) - *plen++ = v; - else if (v == 16) - { - unsigned int c; - unsigned int prev; - - /* Copy previous entry 3 to 6 times. */ - - if (unlikely (plen == plenbase)) - { - elf_uncompress_failed (); - return 0; - } - - /* We used up to 7 bits since the last - elf_fetch_bits, so we have at least 8 bits - available here. */ - - c = 3 + (val & 0x3); - val >>= 2; - bits -= 2; - if (unlikely ((unsigned int) (plenend - plen) < c)) - { - elf_uncompress_failed (); - return 0; - } - - prev = plen[-1]; - switch (c) - { - case 6: - *plen++ = prev; - ATTRIBUTE_FALLTHROUGH; - case 5: - *plen++ = prev; - ATTRIBUTE_FALLTHROUGH; - case 4: - *plen++ = prev; - } - *plen++ = prev; - *plen++ = prev; - *plen++ = prev; - } - else if (v == 17) - { - unsigned int c; - - /* Store zero 3 to 10 times. */ - - /* We used up to 7 bits since the last - elf_fetch_bits, so we have at least 8 bits - available here. */ - - c = 3 + (val & 0x7); - val >>= 3; - bits -= 3; - if (unlikely ((unsigned int) (plenend - plen) < c)) - { - elf_uncompress_failed (); - return 0; - } - - switch (c) - { - case 10: - *plen++ = 0; - ATTRIBUTE_FALLTHROUGH; - case 9: - *plen++ = 0; - ATTRIBUTE_FALLTHROUGH; - case 8: - *plen++ = 0; - ATTRIBUTE_FALLTHROUGH; - case 7: - *plen++ = 0; - ATTRIBUTE_FALLTHROUGH; - case 6: - *plen++ = 0; - ATTRIBUTE_FALLTHROUGH; - case 5: - *plen++ = 0; - ATTRIBUTE_FALLTHROUGH; - case 4: - *plen++ = 0; - } - *plen++ = 0; - *plen++ = 0; - *plen++ = 0; - } - else if (v == 18) - { - unsigned int c; - - /* Store zero 11 to 138 times. */ - - /* We used up to 7 bits since the last - elf_fetch_bits, so we have at least 8 bits - available here. */ - - c = 11 + (val & 0x7f); - val >>= 7; - bits -= 7; - if (unlikely ((unsigned int) (plenend - plen) < c)) - { - elf_uncompress_failed (); - return 0; - } - - memset (plen, 0, c); - plen += c; - } - else - { - elf_uncompress_failed (); - return 0; - } - } - - /* Make sure that the stop code can appear. */ - - plen = plenbase; - if (unlikely (plen[256] == 0)) - { - elf_uncompress_failed (); - return 0; - } - - /* Build the decompression tables. */ - - if (!elf_zlib_inflate_table (plen, nlit, zdebug_table, - zdebug_table)) - return 0; - if (!elf_zlib_inflate_table (plen + nlit, ndist, zdebug_table, - (zdebug_table - + ZLIB_HUFFMAN_TABLE_SIZE))) - return 0; - tlit = zdebug_table; - tdist = zdebug_table + ZLIB_HUFFMAN_TABLE_SIZE; - } - - /* Inflate values until the end of the block. This is the - main loop of the inflation code. */ - - while (1) - { - uint16_t t; - unsigned int b; - uint16_t v; - unsigned int lit; - - if (!elf_fetch_bits (&pin, pinend, &val, &bits)) - return 0; - - t = tlit[val & 0xff]; - b = (t >> ZLIB_HUFFMAN_BITS_SHIFT) & ZLIB_HUFFMAN_BITS_MASK; - v = t & ZLIB_HUFFMAN_VALUE_MASK; - - if ((t & (1U << ZLIB_HUFFMAN_SECONDARY_SHIFT)) == 0) - { - lit = v; - val >>= b + 1; - bits -= b + 1; - } - else - { - t = tlit[v + 0x100 + ((val >> 8) & ((1U << b) - 1))]; - b = (t >> ZLIB_HUFFMAN_BITS_SHIFT) & ZLIB_HUFFMAN_BITS_MASK; - lit = t & ZLIB_HUFFMAN_VALUE_MASK; - val >>= b + 8; - bits -= b + 8; - } - - if (lit < 256) - { - if (unlikely (pout == poutend)) - { - elf_uncompress_failed (); - return 0; - } - - *pout++ = lit; - - /* We will need to write the next byte soon. We ask - for high temporal locality because we will write - to the whole cache line soon. */ - __builtin_prefetch (pout, 1, 3); - } - else if (lit == 256) - { - /* The end of the block. */ - break; - } - else - { - unsigned int dist; - unsigned int len; - - /* Convert lit into a length. */ - - if (lit < 265) - len = lit - 257 + 3; - else if (lit == 285) - len = 258; - else if (unlikely (lit > 285)) - { - elf_uncompress_failed (); - return 0; - } - else - { - unsigned int extra; - - if (!elf_fetch_bits (&pin, pinend, &val, &bits)) - return 0; - - /* This is an expression for the table of length - codes in RFC 1951 3.2.5. */ - lit -= 265; - extra = (lit >> 2) + 1; - len = (lit & 3) << extra; - len += 11; - len += ((1U << (extra - 1)) - 1) << 3; - len += val & ((1U << extra) - 1); - val >>= extra; - bits -= extra; - } - - if (!elf_fetch_bits (&pin, pinend, &val, &bits)) - return 0; - - t = tdist[val & 0xff]; - b = (t >> ZLIB_HUFFMAN_BITS_SHIFT) & ZLIB_HUFFMAN_BITS_MASK; - v = t & ZLIB_HUFFMAN_VALUE_MASK; - - if ((t & (1U << ZLIB_HUFFMAN_SECONDARY_SHIFT)) == 0) - { - dist = v; - val >>= b + 1; - bits -= b + 1; - } - else - { - t = tdist[v + 0x100 + ((val >> 8) & ((1U << b) - 1))]; - b = ((t >> ZLIB_HUFFMAN_BITS_SHIFT) - & ZLIB_HUFFMAN_BITS_MASK); - dist = t & ZLIB_HUFFMAN_VALUE_MASK; - val >>= b + 8; - bits -= b + 8; - } - - /* Convert dist to a distance. */ - - if (dist == 0) - { - /* A distance of 1. A common case, meaning - repeat the last character LEN times. */ - - if (unlikely (pout == porigout)) - { - elf_uncompress_failed (); - return 0; - } - - if (unlikely ((unsigned int) (poutend - pout) < len)) - { - elf_uncompress_failed (); - return 0; - } - - memset (pout, pout[-1], len); - pout += len; - } - else if (unlikely (dist > 29)) - { - elf_uncompress_failed (); - return 0; - } - else - { - if (dist < 4) - dist = dist + 1; - else - { - unsigned int extra; - - if (!elf_fetch_bits (&pin, pinend, &val, &bits)) - return 0; - - /* This is an expression for the table of - distance codes in RFC 1951 3.2.5. */ - dist -= 4; - extra = (dist >> 1) + 1; - dist = (dist & 1) << extra; - dist += 5; - dist += ((1U << (extra - 1)) - 1) << 2; - dist += val & ((1U << extra) - 1); - val >>= extra; - bits -= extra; - } - - /* Go back dist bytes, and copy len bytes from - there. */ - - if (unlikely ((unsigned int) (pout - porigout) < dist)) - { - elf_uncompress_failed (); - return 0; - } - - if (unlikely ((unsigned int) (poutend - pout) < len)) - { - elf_uncompress_failed (); - return 0; - } - - if (dist >= len) - { - memcpy (pout, pout - dist, len); - pout += len; - } - else - { - while (len > 0) - { - unsigned int copy; - - copy = len < dist ? len : dist; - memcpy (pout, pout - dist, copy); - len -= copy; - pout += copy; - } - } - } - } - } - } - } - - /* We should have filled the output buffer. */ - if (unlikely (pout != poutend)) - { - elf_uncompress_failed (); - return 0; - } - - return 1; -} - -/* Verify the zlib checksum. The checksum is in the 4 bytes at - CHECKBYTES, and the uncompressed data is at UNCOMPRESSED / - UNCOMPRESSED_SIZE. Returns 1 on success, 0 on failure. */ - -static int -elf_zlib_verify_checksum (const unsigned char *checkbytes, - const unsigned char *uncompressed, - size_t uncompressed_size) -{ - unsigned int i; - unsigned int cksum; - const unsigned char *p; - uint32_t s1; - uint32_t s2; - size_t hsz; - - cksum = 0; - for (i = 0; i < 4; i++) - cksum = (cksum << 8) | checkbytes[i]; - - s1 = 1; - s2 = 0; - - /* Minimize modulo operations. */ - - p = uncompressed; - hsz = uncompressed_size; - while (hsz >= 5552) - { - for (i = 0; i < 5552; i += 16) - { - /* Manually unroll loop 16 times. */ - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - } - hsz -= 5552; - s1 %= 65521; - s2 %= 65521; - } - - while (hsz >= 16) - { - /* Manually unroll loop 16 times. */ - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - - hsz -= 16; - } - - for (i = 0; i < hsz; ++i) - { - s1 = s1 + *p++; - s2 = s2 + s1; - } - - s1 %= 65521; - s2 %= 65521; - - if (unlikely ((s2 << 16) + s1 != cksum)) - { - elf_uncompress_failed (); - return 0; - } - - return 1; -} - -/* Inflate a zlib stream from PIN/SIN to POUT/SOUT, and verify the - checksum. Return 1 on success, 0 on error. */ - -static int -elf_zlib_inflate_and_verify (const unsigned char *pin, size_t sin, - uint16_t *zdebug_table, unsigned char *pout, - size_t sout) -{ - if (!elf_zlib_inflate (pin, sin, zdebug_table, pout, sout)) - return 0; - if (!elf_zlib_verify_checksum (pin + sin - 4, pout, sout)) - return 0; - return 1; -} - -/* For working memory during zstd compression, we need - - a literal length FSE table: 512 64-bit values == 4096 bytes - - a match length FSE table: 512 64-bit values == 4096 bytes - - a offset FSE table: 256 64-bit values == 2048 bytes - - a Huffman tree: 2048 uint16_t values == 4096 bytes - - scratch space, one of - - to build an FSE table: 512 uint16_t values == 1024 bytes - - to build a Huffman tree: 512 uint16_t + 256 uint32_t == 2048 bytes -*/ - -#define ZSTD_TABLE_SIZE \ - (2 * 512 * sizeof (struct elf_zstd_fse_baseline_entry) \ - + 256 * sizeof (struct elf_zstd_fse_baseline_entry) \ - + 2048 * sizeof (uint16_t) \ - + 512 * sizeof (uint16_t) + 256 * sizeof (uint32_t)) - -#define ZSTD_TABLE_LITERAL_FSE_OFFSET (0) - -#define ZSTD_TABLE_MATCH_FSE_OFFSET \ - (512 * sizeof (struct elf_zstd_fse_baseline_entry)) - -#define ZSTD_TABLE_OFFSET_FSE_OFFSET \ - (ZSTD_TABLE_MATCH_FSE_OFFSET \ - + 512 * sizeof (struct elf_zstd_fse_baseline_entry)) - -#define ZSTD_TABLE_HUFFMAN_OFFSET \ - (ZSTD_TABLE_OFFSET_FSE_OFFSET \ - + 256 * sizeof (struct elf_zstd_fse_baseline_entry)) - -#define ZSTD_TABLE_WORK_OFFSET \ - (ZSTD_TABLE_HUFFMAN_OFFSET + 2048 * sizeof (uint16_t)) - -/* An entry in a zstd FSE table. */ - -struct elf_zstd_fse_entry -{ - /* The value that this FSE entry represents. */ - unsigned char symbol; - /* The number of bits to read to determine the next state. */ - unsigned char bits; - /* Add the bits to this base to get the next state. */ - uint16_t base; -}; - -static int -elf_zstd_build_fse (const int16_t *, int, uint16_t *, int, - struct elf_zstd_fse_entry *); - -/* Read a zstd FSE table and build the decoding table in *TABLE, updating *PPIN - as it reads. ZDEBUG_TABLE is scratch space; it must be enough for 512 - uint16_t values (1024 bytes). MAXIDX is the maximum number of symbols - permitted. *TABLE_BITS is the maximum number of bits for symbols in the - table: the size of *TABLE is at least 1 << *TABLE_BITS. This updates - *TABLE_BITS to the actual number of bits. Returns 1 on success, 0 on - error. */ - -static int -elf_zstd_read_fse (const unsigned char **ppin, const unsigned char *pinend, - uint16_t *zdebug_table, int maxidx, - struct elf_zstd_fse_entry *table, int *table_bits) -{ - const unsigned char *pin; - int16_t *norm; - uint16_t *next; - uint64_t val; - unsigned int bits; - int accuracy_log; - uint32_t remaining; - uint32_t threshold; - int bits_needed; - int idx; - int prev0; - - pin = *ppin; - - norm = (int16_t *) zdebug_table; - next = zdebug_table + 256; - - if (unlikely (pin + 3 >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - - /* Align PIN to a 32-bit boundary. */ - - val = 0; - bits = 0; - while ((((uintptr_t) pin) & 3) != 0) - { - val |= (uint64_t)*pin << bits; - bits += 8; - ++pin; - } - - if (!elf_fetch_bits (&pin, pinend, &val, &bits)) - return 0; - - accuracy_log = (val & 0xf) + 5; - if (accuracy_log > *table_bits) - { - elf_uncompress_failed (); - return 0; - } - *table_bits = accuracy_log; - val >>= 4; - bits -= 4; - - /* This code is mostly copied from the reference implementation. */ - - /* The number of remaining probabilities, plus 1. This sets the number of - bits that need to be read for the next value. */ - remaining = (1 << accuracy_log) + 1; - - /* The current difference between small and large values, which depends on - the number of remaining values. Small values use one less bit. */ - threshold = 1 << accuracy_log; - - /* The number of bits used to compute threshold. */ - bits_needed = accuracy_log + 1; - - /* The next character value. */ - idx = 0; - - /* Whether the last count was 0. */ - prev0 = 0; - - while (remaining > 1 && idx <= maxidx) - { - uint32_t max; - int32_t count; - - if (!elf_fetch_bits (&pin, pinend, &val, &bits)) - return 0; - - if (prev0) - { - int zidx; - - /* Previous count was 0, so there is a 2-bit repeat flag. If the - 2-bit flag is 0b11, it adds 3 and then there is another repeat - flag. */ - zidx = idx; - while ((val & 0xfff) == 0xfff) - { - zidx += 3 * 6; - val >>= 12; - bits -= 12; - if (!elf_fetch_bits (&pin, pinend, &val, &bits)) - return 0; - } - while ((val & 3) == 3) - { - zidx += 3; - val >>= 2; - bits -= 2; - if (!elf_fetch_bits (&pin, pinend, &val, &bits)) - return 0; - } - /* We have at least 13 bits here, don't need to fetch. */ - zidx += val & 3; - val >>= 2; - bits -= 2; - - if (unlikely (zidx > maxidx)) - { - elf_uncompress_failed (); - return 0; - } - - for (; idx < zidx; idx++) - norm[idx] = 0; - - prev0 = 0; - continue; - } - - max = (2 * threshold - 1) - remaining; - if ((val & (threshold - 1)) < max) - { - /* A small value. */ - count = (int32_t) ((uint32_t) val & (threshold - 1)); - val >>= bits_needed - 1; - bits -= bits_needed - 1; - } - else - { - /* A large value. */ - count = (int32_t) ((uint32_t) val & (2 * threshold - 1)); - if (count >= (int32_t) threshold) - count -= (int32_t) max; - val >>= bits_needed; - bits -= bits_needed; - } - - count--; - if (count >= 0) - remaining -= count; - else - remaining--; - if (unlikely (idx >= 256)) - { - elf_uncompress_failed (); - return 0; - } - norm[idx] = (int16_t) count; - ++idx; - - prev0 = count == 0; - - while (remaining < threshold) - { - bits_needed--; - threshold >>= 1; - } - } - - if (unlikely (remaining != 1)) - { - elf_uncompress_failed (); - return 0; - } - - /* If we've read ahead more than a byte, back up. */ - while (bits >= 8) - { - --pin; - bits -= 8; - } - - *ppin = pin; - - for (; idx <= maxidx; idx++) - norm[idx] = 0; - - return elf_zstd_build_fse (norm, idx, next, *table_bits, table); -} - -/* Build the FSE decoding table from a list of probabilities. This reads from - NORM of length IDX, uses NEXT as scratch space, and writes to *TABLE, whose - size is TABLE_BITS. */ - -static int -elf_zstd_build_fse (const int16_t *norm, int idx, uint16_t *next, - int table_bits, struct elf_zstd_fse_entry *table) -{ - int table_size; - int high_threshold; - int i; - int pos; - int step; - int mask; - - table_size = 1 << table_bits; - high_threshold = table_size - 1; - for (i = 0; i < idx; i++) - { - int16_t n; - - n = norm[i]; - if (n >= 0) - next[i] = (uint16_t) n; - else - { - table[high_threshold].symbol = (unsigned char) i; - high_threshold--; - next[i] = 1; - } - } - - pos = 0; - step = (table_size >> 1) + (table_size >> 3) + 3; - mask = table_size - 1; - for (i = 0; i < idx; i++) - { - int n; - int j; - - n = (int) norm[i]; - for (j = 0; j < n; j++) - { - table[pos].symbol = (unsigned char) i; - pos = (pos + step) & mask; - while (unlikely (pos > high_threshold)) - pos = (pos + step) & mask; - } - } - if (unlikely (pos != 0)) - { - elf_uncompress_failed (); - return 0; - } - - for (i = 0; i < table_size; i++) - { - unsigned char sym; - uint16_t next_state; - int high_bit; - int bits; - - sym = table[i].symbol; - next_state = next[sym]; - ++next[sym]; - - if (next_state == 0) - { - elf_uncompress_failed (); - return 0; - } - high_bit = 31 - __builtin_clz (next_state); - - bits = table_bits - high_bit; - table[i].bits = (unsigned char) bits; - table[i].base = (uint16_t) ((next_state << bits) - table_size); - } - - return 1; -} - -/* Encode the baseline and bits into a single 32-bit value. */ - -#define ZSTD_ENCODE_BASELINE_BITS(baseline, basebits) \ - ((uint32_t)(baseline) | ((uint32_t)(basebits) << 24)) - -#define ZSTD_DECODE_BASELINE(baseline_basebits) \ - ((uint32_t)(baseline_basebits) & 0xffffff) - -#define ZSTD_DECODE_BASEBITS(baseline_basebits) \ - ((uint32_t)(baseline_basebits) >> 24) - -/* Given a literal length code, we need to read a number of bits and add that - to a baseline. For states 0 to 15 the baseline is the state and the number - of bits is zero. */ - -#define ZSTD_LITERAL_LENGTH_BASELINE_OFFSET (16) - -static const uint32_t elf_zstd_literal_length_base[] = -{ - ZSTD_ENCODE_BASELINE_BITS(16, 1), - ZSTD_ENCODE_BASELINE_BITS(18, 1), - ZSTD_ENCODE_BASELINE_BITS(20, 1), - ZSTD_ENCODE_BASELINE_BITS(22, 1), - ZSTD_ENCODE_BASELINE_BITS(24, 2), - ZSTD_ENCODE_BASELINE_BITS(28, 2), - ZSTD_ENCODE_BASELINE_BITS(32, 3), - ZSTD_ENCODE_BASELINE_BITS(40, 3), - ZSTD_ENCODE_BASELINE_BITS(48, 4), - ZSTD_ENCODE_BASELINE_BITS(64, 6), - ZSTD_ENCODE_BASELINE_BITS(128, 7), - ZSTD_ENCODE_BASELINE_BITS(256, 8), - ZSTD_ENCODE_BASELINE_BITS(512, 9), - ZSTD_ENCODE_BASELINE_BITS(1024, 10), - ZSTD_ENCODE_BASELINE_BITS(2048, 11), - ZSTD_ENCODE_BASELINE_BITS(4096, 12), - ZSTD_ENCODE_BASELINE_BITS(8192, 13), - ZSTD_ENCODE_BASELINE_BITS(16384, 14), - ZSTD_ENCODE_BASELINE_BITS(32768, 15), - ZSTD_ENCODE_BASELINE_BITS(65536, 16) -}; - -/* The same applies to match length codes. For states 0 to 31 the baseline is - the state + 3 and the number of bits is zero. */ - -#define ZSTD_MATCH_LENGTH_BASELINE_OFFSET (32) - -static const uint32_t elf_zstd_match_length_base[] = -{ - ZSTD_ENCODE_BASELINE_BITS(35, 1), - ZSTD_ENCODE_BASELINE_BITS(37, 1), - ZSTD_ENCODE_BASELINE_BITS(39, 1), - ZSTD_ENCODE_BASELINE_BITS(41, 1), - ZSTD_ENCODE_BASELINE_BITS(43, 2), - ZSTD_ENCODE_BASELINE_BITS(47, 2), - ZSTD_ENCODE_BASELINE_BITS(51, 3), - ZSTD_ENCODE_BASELINE_BITS(59, 3), - ZSTD_ENCODE_BASELINE_BITS(67, 4), - ZSTD_ENCODE_BASELINE_BITS(83, 4), - ZSTD_ENCODE_BASELINE_BITS(99, 5), - ZSTD_ENCODE_BASELINE_BITS(131, 7), - ZSTD_ENCODE_BASELINE_BITS(259, 8), - ZSTD_ENCODE_BASELINE_BITS(515, 9), - ZSTD_ENCODE_BASELINE_BITS(1027, 10), - ZSTD_ENCODE_BASELINE_BITS(2051, 11), - ZSTD_ENCODE_BASELINE_BITS(4099, 12), - ZSTD_ENCODE_BASELINE_BITS(8195, 13), - ZSTD_ENCODE_BASELINE_BITS(16387, 14), - ZSTD_ENCODE_BASELINE_BITS(32771, 15), - ZSTD_ENCODE_BASELINE_BITS(65539, 16) -}; - -/* An entry in an FSE table used for literal/match/length values. For these we - have to map the symbol to a baseline value, and we have to read zero or more - bits and add that value to the baseline value. Rather than look the values - up in a separate table, we grow the FSE table so that we get better memory - caching. */ - -struct elf_zstd_fse_baseline_entry -{ - /* The baseline for the value that this FSE entry represents.. */ - uint32_t baseline; - /* The number of bits to read to add to the baseline. */ - unsigned char basebits; - /* The number of bits to read to determine the next state. */ - unsigned char bits; - /* Add the bits to this base to get the next state. */ - uint16_t base; -}; - -/* Convert the literal length FSE table FSE_TABLE to an FSE baseline table at - BASELINE_TABLE. Note that FSE_TABLE and BASELINE_TABLE will overlap. */ - -static int -elf_zstd_make_literal_baseline_fse ( - const struct elf_zstd_fse_entry *fse_table, - int table_bits, - struct elf_zstd_fse_baseline_entry *baseline_table) -{ - size_t count; - const struct elf_zstd_fse_entry *pfse; - struct elf_zstd_fse_baseline_entry *pbaseline; - - /* Convert backward to avoid overlap. */ - - count = 1U << table_bits; - pfse = fse_table + count; - pbaseline = baseline_table + count; - while (pfse > fse_table) - { - unsigned char symbol; - unsigned char bits; - uint16_t base; - - --pfse; - --pbaseline; - symbol = pfse->symbol; - bits = pfse->bits; - base = pfse->base; - if (symbol < ZSTD_LITERAL_LENGTH_BASELINE_OFFSET) - { - pbaseline->baseline = (uint32_t)symbol; - pbaseline->basebits = 0; - } - else - { - unsigned int idx; - uint32_t basebits; - - if (unlikely (symbol > 35)) - { - elf_uncompress_failed (); - return 0; - } - idx = symbol - ZSTD_LITERAL_LENGTH_BASELINE_OFFSET; - basebits = elf_zstd_literal_length_base[idx]; - pbaseline->baseline = ZSTD_DECODE_BASELINE(basebits); - pbaseline->basebits = ZSTD_DECODE_BASEBITS(basebits); - } - pbaseline->bits = bits; - pbaseline->base = base; - } - - return 1; -} - -/* Convert the offset length FSE table FSE_TABLE to an FSE baseline table at - BASELINE_TABLE. Note that FSE_TABLE and BASELINE_TABLE will overlap. */ - -static int -elf_zstd_make_offset_baseline_fse ( - const struct elf_zstd_fse_entry *fse_table, - int table_bits, - struct elf_zstd_fse_baseline_entry *baseline_table) -{ - size_t count; - const struct elf_zstd_fse_entry *pfse; - struct elf_zstd_fse_baseline_entry *pbaseline; - - /* Convert backward to avoid overlap. */ - - count = 1U << table_bits; - pfse = fse_table + count; - pbaseline = baseline_table + count; - while (pfse > fse_table) - { - unsigned char symbol; - unsigned char bits; - uint16_t base; - - --pfse; - --pbaseline; - symbol = pfse->symbol; - bits = pfse->bits; - base = pfse->base; - if (unlikely (symbol > 31)) - { - elf_uncompress_failed (); - return 0; - } - - /* The simple way to write this is - - pbaseline->baseline = (uint32_t)1 << symbol; - pbaseline->basebits = symbol; - - That will give us an offset value that corresponds to the one - described in the RFC. However, for offset values > 3, we have to - subtract 3. And for offset values 1, 2, 3 we use a repeated offset. - The baseline is always a power of 2, and is never 0, so for these low - values we will see one entry that is baseline 1, basebits 0, and one - entry that is baseline 2, basebits 1. All other entries will have - baseline >= 4 and basebits >= 2. - - So we can check for RFC offset <= 3 by checking for basebits <= 1. - And that means that we can subtract 3 here and not worry about doing - it in the hot loop. */ - - pbaseline->baseline = (uint32_t)1 << symbol; - if (symbol >= 2) - pbaseline->baseline -= 3; - pbaseline->basebits = symbol; - pbaseline->bits = bits; - pbaseline->base = base; - } - - return 1; -} - -/* Convert the match length FSE table FSE_TABLE to an FSE baseline table at - BASELINE_TABLE. Note that FSE_TABLE and BASELINE_TABLE will overlap. */ - -static int -elf_zstd_make_match_baseline_fse ( - const struct elf_zstd_fse_entry *fse_table, - int table_bits, - struct elf_zstd_fse_baseline_entry *baseline_table) -{ - size_t count; - const struct elf_zstd_fse_entry *pfse; - struct elf_zstd_fse_baseline_entry *pbaseline; - - /* Convert backward to avoid overlap. */ - - count = 1U << table_bits; - pfse = fse_table + count; - pbaseline = baseline_table + count; - while (pfse > fse_table) - { - unsigned char symbol; - unsigned char bits; - uint16_t base; - - --pfse; - --pbaseline; - symbol = pfse->symbol; - bits = pfse->bits; - base = pfse->base; - if (symbol < ZSTD_MATCH_LENGTH_BASELINE_OFFSET) - { - pbaseline->baseline = (uint32_t)symbol + 3; - pbaseline->basebits = 0; - } - else - { - unsigned int idx; - uint32_t basebits; - - if (unlikely (symbol > 52)) - { - elf_uncompress_failed (); - return 0; - } - idx = symbol - ZSTD_MATCH_LENGTH_BASELINE_OFFSET; - basebits = elf_zstd_match_length_base[idx]; - pbaseline->baseline = ZSTD_DECODE_BASELINE(basebits); - pbaseline->basebits = ZSTD_DECODE_BASEBITS(basebits); - } - pbaseline->bits = bits; - pbaseline->base = base; - } - - return 1; -} - -#ifdef BACKTRACE_GENERATE_ZSTD_FSE_TABLES - -/* Used to generate the predefined FSE decoding tables for zstd. */ - -#include - -/* These values are straight from RFC 8878. */ - -static int16_t lit[36] = -{ - 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, - -1,-1,-1,-1 -}; - -static int16_t match[53] = -{ - 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1, - -1,-1,-1,-1,-1 -}; - -static int16_t offset[29] = -{ - 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 -}; - -static uint16_t next[256]; - -static void -print_table (const struct elf_zstd_fse_baseline_entry *table, size_t size) -{ - size_t i; - - printf ("{\n"); - for (i = 0; i < size; i += 3) - { - int j; - - printf (" "); - for (j = 0; j < 3 && i + j < size; ++j) - printf (" { %u, %d, %d, %d },", table[i + j].baseline, - table[i + j].basebits, table[i + j].bits, - table[i + j].base); - printf ("\n"); - } - printf ("};\n"); -} - -int -main () -{ - struct elf_zstd_fse_entry lit_table[64]; - struct elf_zstd_fse_baseline_entry lit_baseline[64]; - struct elf_zstd_fse_entry match_table[64]; - struct elf_zstd_fse_baseline_entry match_baseline[64]; - struct elf_zstd_fse_entry offset_table[32]; - struct elf_zstd_fse_baseline_entry offset_baseline[32]; - - if (!elf_zstd_build_fse (lit, sizeof lit / sizeof lit[0], next, - 6, lit_table)) - { - fprintf (stderr, "elf_zstd_build_fse failed\n"); - exit (EXIT_FAILURE); - } - - if (!elf_zstd_make_literal_baseline_fse (lit_table, 6, lit_baseline)) - { - fprintf (stderr, "elf_zstd_make_literal_baseline_fse failed\n"); - exit (EXIT_FAILURE); - } - - printf ("static const struct elf_zstd_fse_baseline_entry " - "elf_zstd_lit_table[64] =\n"); - print_table (lit_baseline, - sizeof lit_baseline / sizeof lit_baseline[0]); - printf ("\n"); - - if (!elf_zstd_build_fse (match, sizeof match / sizeof match[0], next, - 6, match_table)) - { - fprintf (stderr, "elf_zstd_build_fse failed\n"); - exit (EXIT_FAILURE); - } - - if (!elf_zstd_make_match_baseline_fse (match_table, 6, match_baseline)) - { - fprintf (stderr, "elf_zstd_make_match_baseline_fse failed\n"); - exit (EXIT_FAILURE); - } - - printf ("static const struct elf_zstd_fse_baseline_entry " - "elf_zstd_match_table[64] =\n"); - print_table (match_baseline, - sizeof match_baseline / sizeof match_baseline[0]); - printf ("\n"); - - if (!elf_zstd_build_fse (offset, sizeof offset / sizeof offset[0], next, - 5, offset_table)) - { - fprintf (stderr, "elf_zstd_build_fse failed\n"); - exit (EXIT_FAILURE); - } - - if (!elf_zstd_make_offset_baseline_fse (offset_table, 5, offset_baseline)) - { - fprintf (stderr, "elf_zstd_make_offset_baseline_fse failed\n"); - exit (EXIT_FAILURE); - } - - printf ("static const struct elf_zstd_fse_baseline_entry " - "elf_zstd_offset_table[32] =\n"); - print_table (offset_baseline, - sizeof offset_baseline / sizeof offset_baseline[0]); - printf ("\n"); - - return 0; -} - -#endif - -/* The fixed tables generated by the #ifdef'ed out main function - above. */ - -static const struct elf_zstd_fse_baseline_entry elf_zstd_lit_table[64] = -{ - { 0, 0, 4, 0 }, { 0, 0, 4, 16 }, { 1, 0, 5, 32 }, - { 3, 0, 5, 0 }, { 4, 0, 5, 0 }, { 6, 0, 5, 0 }, - { 7, 0, 5, 0 }, { 9, 0, 5, 0 }, { 10, 0, 5, 0 }, - { 12, 0, 5, 0 }, { 14, 0, 6, 0 }, { 16, 1, 5, 0 }, - { 20, 1, 5, 0 }, { 22, 1, 5, 0 }, { 28, 2, 5, 0 }, - { 32, 3, 5, 0 }, { 48, 4, 5, 0 }, { 64, 6, 5, 32 }, - { 128, 7, 5, 0 }, { 256, 8, 6, 0 }, { 1024, 10, 6, 0 }, - { 4096, 12, 6, 0 }, { 0, 0, 4, 32 }, { 1, 0, 4, 0 }, - { 2, 0, 5, 0 }, { 4, 0, 5, 32 }, { 5, 0, 5, 0 }, - { 7, 0, 5, 32 }, { 8, 0, 5, 0 }, { 10, 0, 5, 32 }, - { 11, 0, 5, 0 }, { 13, 0, 6, 0 }, { 16, 1, 5, 32 }, - { 18, 1, 5, 0 }, { 22, 1, 5, 32 }, { 24, 2, 5, 0 }, - { 32, 3, 5, 32 }, { 40, 3, 5, 0 }, { 64, 6, 4, 0 }, - { 64, 6, 4, 16 }, { 128, 7, 5, 32 }, { 512, 9, 6, 0 }, - { 2048, 11, 6, 0 }, { 0, 0, 4, 48 }, { 1, 0, 4, 16 }, - { 2, 0, 5, 32 }, { 3, 0, 5, 32 }, { 5, 0, 5, 32 }, - { 6, 0, 5, 32 }, { 8, 0, 5, 32 }, { 9, 0, 5, 32 }, - { 11, 0, 5, 32 }, { 12, 0, 5, 32 }, { 15, 0, 6, 0 }, - { 18, 1, 5, 32 }, { 20, 1, 5, 32 }, { 24, 2, 5, 32 }, - { 28, 2, 5, 32 }, { 40, 3, 5, 32 }, { 48, 4, 5, 32 }, - { 65536, 16, 6, 0 }, { 32768, 15, 6, 0 }, { 16384, 14, 6, 0 }, - { 8192, 13, 6, 0 }, -}; - -static const struct elf_zstd_fse_baseline_entry elf_zstd_match_table[64] = -{ - { 3, 0, 6, 0 }, { 4, 0, 4, 0 }, { 5, 0, 5, 32 }, - { 6, 0, 5, 0 }, { 8, 0, 5, 0 }, { 9, 0, 5, 0 }, - { 11, 0, 5, 0 }, { 13, 0, 6, 0 }, { 16, 0, 6, 0 }, - { 19, 0, 6, 0 }, { 22, 0, 6, 0 }, { 25, 0, 6, 0 }, - { 28, 0, 6, 0 }, { 31, 0, 6, 0 }, { 34, 0, 6, 0 }, - { 37, 1, 6, 0 }, { 41, 1, 6, 0 }, { 47, 2, 6, 0 }, - { 59, 3, 6, 0 }, { 83, 4, 6, 0 }, { 131, 7, 6, 0 }, - { 515, 9, 6, 0 }, { 4, 0, 4, 16 }, { 5, 0, 4, 0 }, - { 6, 0, 5, 32 }, { 7, 0, 5, 0 }, { 9, 0, 5, 32 }, - { 10, 0, 5, 0 }, { 12, 0, 6, 0 }, { 15, 0, 6, 0 }, - { 18, 0, 6, 0 }, { 21, 0, 6, 0 }, { 24, 0, 6, 0 }, - { 27, 0, 6, 0 }, { 30, 0, 6, 0 }, { 33, 0, 6, 0 }, - { 35, 1, 6, 0 }, { 39, 1, 6, 0 }, { 43, 2, 6, 0 }, - { 51, 3, 6, 0 }, { 67, 4, 6, 0 }, { 99, 5, 6, 0 }, - { 259, 8, 6, 0 }, { 4, 0, 4, 32 }, { 4, 0, 4, 48 }, - { 5, 0, 4, 16 }, { 7, 0, 5, 32 }, { 8, 0, 5, 32 }, - { 10, 0, 5, 32 }, { 11, 0, 5, 32 }, { 14, 0, 6, 0 }, - { 17, 0, 6, 0 }, { 20, 0, 6, 0 }, { 23, 0, 6, 0 }, - { 26, 0, 6, 0 }, { 29, 0, 6, 0 }, { 32, 0, 6, 0 }, - { 65539, 16, 6, 0 }, { 32771, 15, 6, 0 }, { 16387, 14, 6, 0 }, - { 8195, 13, 6, 0 }, { 4099, 12, 6, 0 }, { 2051, 11, 6, 0 }, - { 1027, 10, 6, 0 }, -}; - -static const struct elf_zstd_fse_baseline_entry elf_zstd_offset_table[32] = -{ - { 1, 0, 5, 0 }, { 61, 6, 4, 0 }, { 509, 9, 5, 0 }, - { 32765, 15, 5, 0 }, { 2097149, 21, 5, 0 }, { 5, 3, 5, 0 }, - { 125, 7, 4, 0 }, { 4093, 12, 5, 0 }, { 262141, 18, 5, 0 }, - { 8388605, 23, 5, 0 }, { 29, 5, 5, 0 }, { 253, 8, 4, 0 }, - { 16381, 14, 5, 0 }, { 1048573, 20, 5, 0 }, { 1, 2, 5, 0 }, - { 125, 7, 4, 16 }, { 2045, 11, 5, 0 }, { 131069, 17, 5, 0 }, - { 4194301, 22, 5, 0 }, { 13, 4, 5, 0 }, { 253, 8, 4, 16 }, - { 8189, 13, 5, 0 }, { 524285, 19, 5, 0 }, { 2, 1, 5, 0 }, - { 61, 6, 4, 16 }, { 1021, 10, 5, 0 }, { 65533, 16, 5, 0 }, - { 268435453, 28, 5, 0 }, { 134217725, 27, 5, 0 }, { 67108861, 26, 5, 0 }, - { 33554429, 25, 5, 0 }, { 16777213, 24, 5, 0 }, -}; - -/* Read a zstd Huffman table and build the decoding table in *TABLE, reading - and updating *PPIN. This sets *PTABLE_BITS to the number of bits of the - table, such that the table length is 1 << *TABLE_BITS. ZDEBUG_TABLE is - scratch space; it must be enough for 512 uint16_t values + 256 32-bit values - (2048 bytes). Returns 1 on success, 0 on error. */ - -static int -elf_zstd_read_huff (const unsigned char **ppin, const unsigned char *pinend, - uint16_t *zdebug_table, uint16_t *table, int *ptable_bits) -{ - const unsigned char *pin; - unsigned char hdr; - unsigned char *weights; - size_t count; - uint32_t *weight_mark; - size_t i; - uint32_t weight_mask; - size_t table_bits; - - pin = *ppin; - if (unlikely (pin >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - hdr = *pin; - ++pin; - - weights = (unsigned char *) zdebug_table; - - if (hdr < 128) - { - /* Table is compressed using FSE. */ - - struct elf_zstd_fse_entry *fse_table; - int fse_table_bits; - uint16_t *scratch; - const unsigned char *pfse; - const unsigned char *pback; - uint64_t val; - unsigned int bits; - unsigned int state1, state2; - - /* SCRATCH is used temporarily by elf_zstd_read_fse. It overlaps - WEIGHTS. */ - scratch = zdebug_table; - fse_table = (struct elf_zstd_fse_entry *) (scratch + 512); - fse_table_bits = 6; - - pfse = pin; - if (!elf_zstd_read_fse (&pfse, pinend, scratch, 255, fse_table, - &fse_table_bits)) - return 0; - - if (unlikely (pin + hdr > pinend)) - { - elf_uncompress_failed (); - return 0; - } - - /* We no longer need SCRATCH. Start recording weights. We need up to - 256 bytes of weights and 64 bytes of rank counts, so it won't overlap - FSE_TABLE. */ - - pback = pin + hdr - 1; - - if (!elf_fetch_backward_init (&pback, pfse, &val, &bits)) - return 0; - - bits -= fse_table_bits; - state1 = (val >> bits) & ((1U << fse_table_bits) - 1); - bits -= fse_table_bits; - state2 = (val >> bits) & ((1U << fse_table_bits) - 1); - - /* There are two independent FSE streams, tracked by STATE1 and STATE2. - We decode them alternately. */ - - count = 0; - while (1) - { - struct elf_zstd_fse_entry *pt; - uint64_t v; - - pt = &fse_table[state1]; - - if (unlikely (pin < pinend) && bits < pt->bits) - { - if (unlikely (count >= 254)) - { - elf_uncompress_failed (); - return 0; - } - weights[count] = (unsigned char) pt->symbol; - weights[count + 1] = (unsigned char) fse_table[state2].symbol; - count += 2; - break; - } - - if (unlikely (pt->bits == 0)) - v = 0; - else - { - if (!elf_fetch_bits_backward (&pback, pfse, &val, &bits)) - return 0; - - bits -= pt->bits; - v = (val >> bits) & (((uint64_t)1 << pt->bits) - 1); - } - - state1 = pt->base + v; - - if (unlikely (count >= 255)) - { - elf_uncompress_failed (); - return 0; - } - - weights[count] = pt->symbol; - ++count; - - pt = &fse_table[state2]; - - if (unlikely (pin < pinend && bits < pt->bits)) - { - if (unlikely (count >= 254)) - { - elf_uncompress_failed (); - return 0; - } - weights[count] = (unsigned char) pt->symbol; - weights[count + 1] = (unsigned char) fse_table[state1].symbol; - count += 2; - break; - } - - if (unlikely (pt->bits == 0)) - v = 0; - else - { - if (!elf_fetch_bits_backward (&pback, pfse, &val, &bits)) - return 0; - - bits -= pt->bits; - v = (val >> bits) & (((uint64_t)1 << pt->bits) - 1); - } - - state2 = pt->base + v; - - if (unlikely (count >= 255)) - { - elf_uncompress_failed (); - return 0; - } - - weights[count] = pt->symbol; - ++count; - } - - pin += hdr; - } - else - { - /* Table is not compressed. Each weight is 4 bits. */ - - count = hdr - 127; - if (unlikely (pin + ((count + 1) / 2) >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - for (i = 0; i < count; i += 2) - { - unsigned char b; - - b = *pin; - ++pin; - weights[i] = b >> 4; - weights[i + 1] = b & 0xf; - } - } - - weight_mark = (uint32_t *) (weights + 256); - memset (weight_mark, 0, 13 * sizeof (uint32_t)); - weight_mask = 0; - for (i = 0; i < count; ++i) - { - unsigned char w; - - w = weights[i]; - if (unlikely (w > 12)) - { - elf_uncompress_failed (); - return 0; - } - ++weight_mark[w]; - if (w > 0) - weight_mask += 1U << (w - 1); - } - if (unlikely (weight_mask == 0)) - { - elf_uncompress_failed (); - return 0; - } - - table_bits = 32 - __builtin_clz (weight_mask); - if (unlikely (table_bits > 11)) - { - elf_uncompress_failed (); - return 0; - } - - /* Work out the last weight value, which is omitted because the weights must - sum to a power of two. */ - { - uint32_t left; - uint32_t high_bit; - - left = ((uint32_t)1 << table_bits) - weight_mask; - if (left == 0) - { - elf_uncompress_failed (); - return 0; - } - high_bit = 31 - __builtin_clz (left); - if (((uint32_t)1 << high_bit) != left) - { - elf_uncompress_failed (); - return 0; - } - - if (unlikely (count >= 256)) - { - elf_uncompress_failed (); - return 0; - } - - weights[count] = high_bit + 1; - ++count; - ++weight_mark[high_bit + 1]; - } - - if (weight_mark[1] < 2 || (weight_mark[1] & 1) != 0) - { - elf_uncompress_failed (); - return 0; - } - - /* Change WEIGHT_MARK from a count of weights to the index of the first - symbol for that weight. We shift the indexes to also store how many we - have seen so far, below. */ - { - uint32_t next; - - next = 0; - for (i = 0; i < table_bits; ++i) - { - uint32_t cur; - - cur = next; - next += weight_mark[i + 1] << i; - weight_mark[i + 1] = cur; - } - } - - for (i = 0; i < count; ++i) - { - unsigned char weight; - uint32_t length; - uint16_t tval; - size_t start; - uint32_t j; - - weight = weights[i]; - if (weight == 0) - continue; - - length = 1U << (weight - 1); - tval = (i << 8) | (table_bits + 1 - weight); - start = weight_mark[weight]; - for (j = 0; j < length; ++j) - table[start + j] = tval; - weight_mark[weight] += length; - } - - *ppin = pin; - *ptable_bits = (int)table_bits; - - return 1; -} - -/* Read and decompress the literals and store them ending at POUTEND. This - works because we are going to use all the literals in the output, so they - must fit into the output buffer. HUFFMAN_TABLE, and PHUFFMAN_TABLE_BITS - store the Huffman table across calls. SCRATCH is used to read a Huffman - table. Store the start of the decompressed literals in *PPLIT. Update - *PPIN. Return 1 on success, 0 on error. */ - -static int -elf_zstd_read_literals (const unsigned char **ppin, - const unsigned char *pinend, - unsigned char *pout, - unsigned char *poutend, - uint16_t *scratch, - uint16_t *huffman_table, - int *phuffman_table_bits, - unsigned char **pplit) -{ - const unsigned char *pin; - unsigned char *plit; - unsigned char hdr; - uint32_t regenerated_size; - uint32_t compressed_size; - int streams; - uint32_t total_streams_size; - unsigned int huffman_table_bits; - uint64_t huffman_mask; - - pin = *ppin; - if (unlikely (pin >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - hdr = *pin; - ++pin; - - if ((hdr & 3) == 0 || (hdr & 3) == 1) - { - int raw; - - /* Raw_Literals_Block or RLE_Literals_Block */ - - raw = (hdr & 3) == 0; - - switch ((hdr >> 2) & 3) - { - case 0: case 2: - regenerated_size = hdr >> 3; - break; - case 1: - if (unlikely (pin >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - regenerated_size = (hdr >> 4) + ((uint32_t)(*pin) << 4); - ++pin; - break; - case 3: - if (unlikely (pin + 1 >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - regenerated_size = ((hdr >> 4) - + ((uint32_t)*pin << 4) - + ((uint32_t)pin[1] << 12)); - pin += 2; - break; - default: - elf_uncompress_failed (); - return 0; - } - - if (unlikely ((size_t)(poutend - pout) < regenerated_size)) - { - elf_uncompress_failed (); - return 0; - } - - plit = poutend - regenerated_size; - - if (raw) - { - if (unlikely (pin + regenerated_size >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - memcpy (plit, pin, regenerated_size); - pin += regenerated_size; - } - else - { - if (pin >= pinend) - { - elf_uncompress_failed (); - return 0; - } - memset (plit, *pin, regenerated_size); - ++pin; - } - - *ppin = pin; - *pplit = plit; - - return 1; - } - - /* Compressed_Literals_Block or Treeless_Literals_Block */ - - switch ((hdr >> 2) & 3) - { - case 0: case 1: - if (unlikely (pin + 1 >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - regenerated_size = (hdr >> 4) | ((uint32_t)(*pin & 0x3f) << 4); - compressed_size = (uint32_t)*pin >> 6 | ((uint32_t)pin[1] << 2); - pin += 2; - streams = ((hdr >> 2) & 3) == 0 ? 1 : 4; - break; - case 2: - if (unlikely (pin + 2 >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - regenerated_size = (((uint32_t)hdr >> 4) - | ((uint32_t)*pin << 4) - | (((uint32_t)pin[1] & 3) << 12)); - compressed_size = (((uint32_t)pin[1] >> 2) - | ((uint32_t)pin[2] << 6)); - pin += 3; - streams = 4; - break; - case 3: - if (unlikely (pin + 3 >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - regenerated_size = (((uint32_t)hdr >> 4) - | ((uint32_t)*pin << 4) - | (((uint32_t)pin[1] & 0x3f) << 12)); - compressed_size = (((uint32_t)pin[1] >> 6) - | ((uint32_t)pin[2] << 2) - | ((uint32_t)pin[3] << 10)); - pin += 4; - streams = 4; - break; - default: - elf_uncompress_failed (); - return 0; - } - - if (unlikely (pin + compressed_size > pinend)) - { - elf_uncompress_failed (); - return 0; - } - - pinend = pin + compressed_size; - *ppin = pinend; - - if (unlikely ((size_t)(poutend - pout) < regenerated_size)) - { - elf_uncompress_failed (); - return 0; - } - - plit = poutend - regenerated_size; - - *pplit = plit; - - total_streams_size = compressed_size; - if ((hdr & 3) == 2) - { - const unsigned char *ptable; - - /* Compressed_Literals_Block. Read Huffman tree. */ - - ptable = pin; - if (!elf_zstd_read_huff (&ptable, pinend, scratch, huffman_table, - phuffman_table_bits)) - return 0; - - if (unlikely (total_streams_size < (size_t)(ptable - pin))) - { - elf_uncompress_failed (); - return 0; - } - - total_streams_size -= ptable - pin; - pin = ptable; - } - else - { - /* Treeless_Literals_Block. Reuse previous Huffman tree. */ - if (unlikely (*phuffman_table_bits == 0)) - { - elf_uncompress_failed (); - return 0; - } - } - - /* Decompress COMPRESSED_SIZE bytes of data at PIN using the huffman table, - storing REGENERATED_SIZE bytes of decompressed data at PLIT. */ - - huffman_table_bits = (unsigned int)*phuffman_table_bits; - huffman_mask = ((uint64_t)1 << huffman_table_bits) - 1; - - if (streams == 1) - { - const unsigned char *pback; - const unsigned char *pbackend; - uint64_t val; - unsigned int bits; - uint32_t i; - - pback = pin + total_streams_size - 1; - pbackend = pin; - if (!elf_fetch_backward_init (&pback, pbackend, &val, &bits)) - return 0; - - /* This is one of the inner loops of the decompression algorithm, so we - put some effort into optimization. We can't get more than 64 bytes - from a single call to elf_fetch_bits_backward, and we can't subtract - more than 11 bits at a time. */ - - if (regenerated_size >= 64) - { - unsigned char *plitstart; - unsigned char *plitstop; - - plitstart = plit; - plitstop = plit + regenerated_size - 64; - while (plit < plitstop) - { - uint16_t t; - - if (!elf_fetch_bits_backward (&pback, pbackend, &val, &bits)) - return 0; - - if (bits < 16) - break; - - while (bits >= 33) - { - t = huffman_table[(val >> (bits - huffman_table_bits)) - & huffman_mask]; - *plit = t >> 8; - ++plit; - bits -= t & 0xff; - - t = huffman_table[(val >> (bits - huffman_table_bits)) - & huffman_mask]; - *plit = t >> 8; - ++plit; - bits -= t & 0xff; - - t = huffman_table[(val >> (bits - huffman_table_bits)) - & huffman_mask]; - *plit = t >> 8; - ++plit; - bits -= t & 0xff; - } - - while (bits > 11) - { - t = huffman_table[(val >> (bits - huffman_table_bits)) - & huffman_mask]; - *plit = t >> 8; - ++plit; - bits -= t & 0xff; - } - } - - regenerated_size -= plit - plitstart; - } - - for (i = 0; i < regenerated_size; ++i) - { - uint16_t t; - - if (!elf_fetch_bits_backward (&pback, pbackend, &val, &bits)) - return 0; - - if (unlikely (bits < huffman_table_bits)) - { - t = huffman_table[(val << (huffman_table_bits - bits)) - & huffman_mask]; - if (unlikely (bits < (t & 0xff))) - { - elf_uncompress_failed (); - return 0; - } - } - else - t = huffman_table[(val >> (bits - huffman_table_bits)) - & huffman_mask]; - - *plit = t >> 8; - ++plit; - bits -= t & 0xff; - } - - return 1; - } - - { - uint32_t stream_size1, stream_size2, stream_size3, stream_size4; - uint32_t tot; - const unsigned char *pback1, *pback2, *pback3, *pback4; - const unsigned char *pbackend1, *pbackend2, *pbackend3, *pbackend4; - uint64_t val1, val2, val3, val4; - unsigned int bits1, bits2, bits3, bits4; - unsigned char *plit1, *plit2, *plit3, *plit4; - uint32_t regenerated_stream_size; - uint32_t regenerated_stream_size4; - uint16_t t1, t2, t3, t4; - uint32_t i; - uint32_t limit; - - /* Read jump table. */ - if (unlikely (pin + 5 >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - stream_size1 = (uint32_t)*pin | ((uint32_t)pin[1] << 8); - pin += 2; - stream_size2 = (uint32_t)*pin | ((uint32_t)pin[1] << 8); - pin += 2; - stream_size3 = (uint32_t)*pin | ((uint32_t)pin[1] << 8); - pin += 2; - tot = stream_size1 + stream_size2 + stream_size3; - if (unlikely (tot > total_streams_size - 6)) - { - elf_uncompress_failed (); - return 0; - } - stream_size4 = total_streams_size - 6 - tot; - - pback1 = pin + stream_size1 - 1; - pbackend1 = pin; - - pback2 = pback1 + stream_size2; - pbackend2 = pback1 + 1; - - pback3 = pback2 + stream_size3; - pbackend3 = pback2 + 1; - - pback4 = pback3 + stream_size4; - pbackend4 = pback3 + 1; - - if (!elf_fetch_backward_init (&pback1, pbackend1, &val1, &bits1)) - return 0; - if (!elf_fetch_backward_init (&pback2, pbackend2, &val2, &bits2)) - return 0; - if (!elf_fetch_backward_init (&pback3, pbackend3, &val3, &bits3)) - return 0; - if (!elf_fetch_backward_init (&pback4, pbackend4, &val4, &bits4)) - return 0; - - regenerated_stream_size = (regenerated_size + 3) / 4; - - plit1 = plit; - plit2 = plit1 + regenerated_stream_size; - plit3 = plit2 + regenerated_stream_size; - plit4 = plit3 + regenerated_stream_size; - - regenerated_stream_size4 = regenerated_size - regenerated_stream_size * 3; - - /* We can't get more than 64 literal bytes from a single call to - elf_fetch_bits_backward. The fourth stream can be up to 3 bytes less, - so use as the limit. */ - - limit = regenerated_stream_size4 <= 64 ? 0 : regenerated_stream_size4 - 64; - i = 0; - while (i < limit) - { - if (!elf_fetch_bits_backward (&pback1, pbackend1, &val1, &bits1)) - return 0; - if (!elf_fetch_bits_backward (&pback2, pbackend2, &val2, &bits2)) - return 0; - if (!elf_fetch_bits_backward (&pback3, pbackend3, &val3, &bits3)) - return 0; - if (!elf_fetch_bits_backward (&pback4, pbackend4, &val4, &bits4)) - return 0; - - /* We can't subtract more than 11 bits at a time. */ - - do - { - t1 = huffman_table[(val1 >> (bits1 - huffman_table_bits)) - & huffman_mask]; - t2 = huffman_table[(val2 >> (bits2 - huffman_table_bits)) - & huffman_mask]; - t3 = huffman_table[(val3 >> (bits3 - huffman_table_bits)) - & huffman_mask]; - t4 = huffman_table[(val4 >> (bits4 - huffman_table_bits)) - & huffman_mask]; - - *plit1 = t1 >> 8; - ++plit1; - bits1 -= t1 & 0xff; - - *plit2 = t2 >> 8; - ++plit2; - bits2 -= t2 & 0xff; - - *plit3 = t3 >> 8; - ++plit3; - bits3 -= t3 & 0xff; - - *plit4 = t4 >> 8; - ++plit4; - bits4 -= t4 & 0xff; - - ++i; - } - while (bits1 > 11 && bits2 > 11 && bits3 > 11 && bits4 > 11); - } - - while (i < regenerated_stream_size) - { - int use4; - - use4 = i < regenerated_stream_size4; - - if (!elf_fetch_bits_backward (&pback1, pbackend1, &val1, &bits1)) - return 0; - if (!elf_fetch_bits_backward (&pback2, pbackend2, &val2, &bits2)) - return 0; - if (!elf_fetch_bits_backward (&pback3, pbackend3, &val3, &bits3)) - return 0; - if (use4) - { - if (!elf_fetch_bits_backward (&pback4, pbackend4, &val4, &bits4)) - return 0; - } - - if (unlikely (bits1 < huffman_table_bits)) - { - t1 = huffman_table[(val1 << (huffman_table_bits - bits1)) - & huffman_mask]; - if (unlikely (bits1 < (t1 & 0xff))) - { - elf_uncompress_failed (); - return 0; - } - } - else - t1 = huffman_table[(val1 >> (bits1 - huffman_table_bits)) - & huffman_mask]; - - if (unlikely (bits2 < huffman_table_bits)) - { - t2 = huffman_table[(val2 << (huffman_table_bits - bits2)) - & huffman_mask]; - if (unlikely (bits2 < (t2 & 0xff))) - { - elf_uncompress_failed (); - return 0; - } - } - else - t2 = huffman_table[(val2 >> (bits2 - huffman_table_bits)) - & huffman_mask]; - - if (unlikely (bits3 < huffman_table_bits)) - { - t3 = huffman_table[(val3 << (huffman_table_bits - bits3)) - & huffman_mask]; - if (unlikely (bits3 < (t3 & 0xff))) - { - elf_uncompress_failed (); - return 0; - } - } - else - t3 = huffman_table[(val3 >> (bits3 - huffman_table_bits)) - & huffman_mask]; - - if (use4) - { - if (unlikely (bits4 < huffman_table_bits)) - { - t4 = huffman_table[(val4 << (huffman_table_bits - bits4)) - & huffman_mask]; - if (unlikely (bits4 < (t4 & 0xff))) - { - elf_uncompress_failed (); - return 0; - } - } - else - t4 = huffman_table[(val4 >> (bits4 - huffman_table_bits)) - & huffman_mask]; - - *plit4 = t4 >> 8; - ++plit4; - bits4 -= t4 & 0xff; - } - - *plit1 = t1 >> 8; - ++plit1; - bits1 -= t1 & 0xff; - - *plit2 = t2 >> 8; - ++plit2; - bits2 -= t2 & 0xff; - - *plit3 = t3 >> 8; - ++plit3; - bits3 -= t3 & 0xff; - - ++i; - } - } - - return 1; -} - -/* The information used to decompress a sequence code, which can be a literal - length, an offset, or a match length. */ - -struct elf_zstd_seq_decode -{ - const struct elf_zstd_fse_baseline_entry *table; - int table_bits; -}; - -/* Unpack a sequence code compression mode. */ - -static int -elf_zstd_unpack_seq_decode (int mode, - const unsigned char **ppin, - const unsigned char *pinend, - const struct elf_zstd_fse_baseline_entry *predef, - int predef_bits, - uint16_t *scratch, - int maxidx, - struct elf_zstd_fse_baseline_entry *table, - int table_bits, - int (*conv)(const struct elf_zstd_fse_entry *, - int, - struct elf_zstd_fse_baseline_entry *), - struct elf_zstd_seq_decode *decode) -{ - switch (mode) - { - case 0: - decode->table = predef; - decode->table_bits = predef_bits; - break; - - case 1: - { - struct elf_zstd_fse_entry entry; - - if (unlikely (*ppin >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - entry.symbol = **ppin; - ++*ppin; - entry.bits = 0; - entry.base = 0; - decode->table_bits = 0; - if (!conv (&entry, 0, table)) - return 0; - } - break; - - case 2: - { - struct elf_zstd_fse_entry *fse_table; - - /* We use the same space for the simple FSE table and the baseline - table. */ - fse_table = (struct elf_zstd_fse_entry *)table; - decode->table_bits = table_bits; - if (!elf_zstd_read_fse (ppin, pinend, scratch, maxidx, fse_table, - &decode->table_bits)) - return 0; - if (!conv (fse_table, decode->table_bits, table)) - return 0; - decode->table = table; - } - break; - - case 3: - if (unlikely (decode->table_bits == -1)) - { - elf_uncompress_failed (); - return 0; - } - break; - - default: - elf_uncompress_failed (); - return 0; - } - - return 1; -} - -/* Decompress a zstd stream from PIN/SIN to POUT/SOUT. Code based on RFC 8878. - Return 1 on success, 0 on error. */ - -static int -elf_zstd_decompress (const unsigned char *pin, size_t sin, - unsigned char *zdebug_table, unsigned char *pout, - size_t sout) -{ - const unsigned char *pinend; - unsigned char *poutstart; - unsigned char *poutend; - struct elf_zstd_seq_decode literal_decode; - struct elf_zstd_fse_baseline_entry *literal_fse_table; - struct elf_zstd_seq_decode match_decode; - struct elf_zstd_fse_baseline_entry *match_fse_table; - struct elf_zstd_seq_decode offset_decode; - struct elf_zstd_fse_baseline_entry *offset_fse_table; - uint16_t *huffman_table; - int huffman_table_bits; - uint32_t repeated_offset1; - uint32_t repeated_offset2; - uint32_t repeated_offset3; - uint16_t *scratch; - unsigned char hdr; - int has_checksum; - uint64_t content_size; - int last_block; - - pinend = pin + sin; - poutstart = pout; - poutend = pout + sout; - - literal_decode.table = NULL; - literal_decode.table_bits = -1; - literal_fse_table = ((struct elf_zstd_fse_baseline_entry *) - (zdebug_table + ZSTD_TABLE_LITERAL_FSE_OFFSET)); - - match_decode.table = NULL; - match_decode.table_bits = -1; - match_fse_table = ((struct elf_zstd_fse_baseline_entry *) - (zdebug_table + ZSTD_TABLE_MATCH_FSE_OFFSET)); - - offset_decode.table = NULL; - offset_decode.table_bits = -1; - offset_fse_table = ((struct elf_zstd_fse_baseline_entry *) - (zdebug_table + ZSTD_TABLE_OFFSET_FSE_OFFSET)); - huffman_table = ((uint16_t *) - (zdebug_table + ZSTD_TABLE_HUFFMAN_OFFSET)); - huffman_table_bits = 0; - scratch = ((uint16_t *) - (zdebug_table + ZSTD_TABLE_WORK_OFFSET)); - - repeated_offset1 = 1; - repeated_offset2 = 4; - repeated_offset3 = 8; - - if (unlikely (sin < 4)) - { - elf_uncompress_failed (); - return 0; - } - - /* These values are the zstd magic number. */ - if (unlikely (pin[0] != 0x28 - || pin[1] != 0xb5 - || pin[2] != 0x2f - || pin[3] != 0xfd)) - { - elf_uncompress_failed (); - return 0; - } - - pin += 4; - - if (unlikely (pin >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - - hdr = *pin++; - - /* We expect a single frame. */ - if (unlikely ((hdr & (1 << 5)) == 0)) - { - elf_uncompress_failed (); - return 0; - } - /* Reserved bit must be zero. */ - if (unlikely ((hdr & (1 << 3)) != 0)) - { - elf_uncompress_failed (); - return 0; - } - /* We do not expect a dictionary. */ - if (unlikely ((hdr & 3) != 0)) - { - elf_uncompress_failed (); - return 0; - } - has_checksum = (hdr & (1 << 2)) != 0; - switch (hdr >> 6) - { - case 0: - if (unlikely (pin >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - content_size = (uint64_t) *pin++; - break; - case 1: - if (unlikely (pin + 1 >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - content_size = (((uint64_t) pin[0]) | (((uint64_t) pin[1]) << 8)) + 256; - pin += 2; - break; - case 2: - if (unlikely (pin + 3 >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - content_size = ((uint64_t) pin[0] - | (((uint64_t) pin[1]) << 8) - | (((uint64_t) pin[2]) << 16) - | (((uint64_t) pin[3]) << 24)); - pin += 4; - break; - case 3: - if (unlikely (pin + 7 >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - content_size = ((uint64_t) pin[0] - | (((uint64_t) pin[1]) << 8) - | (((uint64_t) pin[2]) << 16) - | (((uint64_t) pin[3]) << 24) - | (((uint64_t) pin[4]) << 32) - | (((uint64_t) pin[5]) << 40) - | (((uint64_t) pin[6]) << 48) - | (((uint64_t) pin[7]) << 56)); - pin += 8; - break; - default: - elf_uncompress_failed (); - return 0; - } - - if (unlikely (content_size != (size_t) content_size - || (size_t) content_size != sout)) - { - elf_uncompress_failed (); - return 0; - } - - last_block = 0; - while (!last_block) - { - uint32_t block_hdr; - int block_type; - uint32_t block_size; - - if (unlikely (pin + 2 >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - block_hdr = ((uint32_t) pin[0] - | (((uint32_t) pin[1]) << 8) - | (((uint32_t) pin[2]) << 16)); - pin += 3; - - last_block = block_hdr & 1; - block_type = (block_hdr >> 1) & 3; - block_size = block_hdr >> 3; - - switch (block_type) - { - case 0: - /* Raw_Block */ - if (unlikely ((size_t) block_size > (size_t) (pinend - pin))) - { - elf_uncompress_failed (); - return 0; - } - if (unlikely ((size_t) block_size > (size_t) (poutend - pout))) - { - elf_uncompress_failed (); - return 0; - } - memcpy (pout, pin, block_size); - pout += block_size; - pin += block_size; - break; - - case 1: - /* RLE_Block */ - if (unlikely (pin >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - if (unlikely ((size_t) block_size > (size_t) (poutend - pout))) - { - elf_uncompress_failed (); - return 0; - } - memset (pout, *pin, block_size); - pout += block_size; - pin++; - break; - - case 2: - { - const unsigned char *pblockend; - unsigned char *plitstack; - unsigned char *plit; - uint32_t literal_count; - unsigned char seq_hdr; - size_t seq_count; - size_t seq; - const unsigned char *pback; - uint64_t val; - unsigned int bits; - unsigned int literal_state; - unsigned int offset_state; - unsigned int match_state; - - /* Compressed_Block */ - if (unlikely ((size_t) block_size > (size_t) (pinend - pin))) - { - elf_uncompress_failed (); - return 0; - } - - pblockend = pin + block_size; - - /* Read the literals into the end of the output space, and leave - PLIT pointing at them. */ - - if (!elf_zstd_read_literals (&pin, pblockend, pout, poutend, - scratch, huffman_table, - &huffman_table_bits, - &plitstack)) - return 0; - plit = plitstack; - literal_count = poutend - plit; - - seq_hdr = *pin; - pin++; - if (seq_hdr < 128) - seq_count = seq_hdr; - else if (seq_hdr < 255) - { - if (unlikely (pin >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - seq_count = ((seq_hdr - 128) << 8) + *pin; - pin++; - } - else - { - if (unlikely (pin + 1 >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - seq_count = *pin + (pin[1] << 8) + 0x7f00; - pin += 2; - } - - if (seq_count > 0) - { - int (*pfn)(const struct elf_zstd_fse_entry *, - int, struct elf_zstd_fse_baseline_entry *); - - if (unlikely (pin >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - seq_hdr = *pin; - ++pin; - - pfn = elf_zstd_make_literal_baseline_fse; - if (!elf_zstd_unpack_seq_decode ((seq_hdr >> 6) & 3, - &pin, pinend, - &elf_zstd_lit_table[0], 6, - scratch, 35, - literal_fse_table, 9, pfn, - &literal_decode)) - return 0; - - pfn = elf_zstd_make_offset_baseline_fse; - if (!elf_zstd_unpack_seq_decode ((seq_hdr >> 4) & 3, - &pin, pinend, - &elf_zstd_offset_table[0], 5, - scratch, 31, - offset_fse_table, 8, pfn, - &offset_decode)) - return 0; - - pfn = elf_zstd_make_match_baseline_fse; - if (!elf_zstd_unpack_seq_decode ((seq_hdr >> 2) & 3, - &pin, pinend, - &elf_zstd_match_table[0], 6, - scratch, 52, - match_fse_table, 9, pfn, - &match_decode)) - return 0; - } - - pback = pblockend - 1; - if (!elf_fetch_backward_init (&pback, pin, &val, &bits)) - return 0; - - bits -= literal_decode.table_bits; - literal_state = ((val >> bits) - & ((1U << literal_decode.table_bits) - 1)); - - if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) - return 0; - bits -= offset_decode.table_bits; - offset_state = ((val >> bits) - & ((1U << offset_decode.table_bits) - 1)); - - if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) - return 0; - bits -= match_decode.table_bits; - match_state = ((val >> bits) - & ((1U << match_decode.table_bits) - 1)); - - seq = 0; - while (1) - { - const struct elf_zstd_fse_baseline_entry *pt; - uint32_t offset_basebits; - uint32_t offset_baseline; - uint32_t offset_bits; - uint32_t offset_base; - uint32_t offset; - uint32_t match_baseline; - uint32_t match_bits; - uint32_t match_base; - uint32_t match; - uint32_t literal_baseline; - uint32_t literal_bits; - uint32_t literal_base; - uint32_t literal; - uint32_t need; - uint32_t add; - - pt = &offset_decode.table[offset_state]; - offset_basebits = pt->basebits; - offset_baseline = pt->baseline; - offset_bits = pt->bits; - offset_base = pt->base; - - /* This case can be more than 16 bits, which is all that - elf_fetch_bits_backward promises. */ - need = offset_basebits; - add = 0; - if (unlikely (need > 16)) - { - if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) - return 0; - bits -= 16; - add = (val >> bits) & ((1U << 16) - 1); - need -= 16; - add <<= need; - } - if (need > 0) - { - if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) - return 0; - bits -= need; - add += (val >> bits) & ((1U << need) - 1); - } - - offset = offset_baseline + add; - - pt = &match_decode.table[match_state]; - need = pt->basebits; - match_baseline = pt->baseline; - match_bits = pt->bits; - match_base = pt->base; - - add = 0; - if (need > 0) - { - if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) - return 0; - bits -= need; - add = (val >> bits) & ((1U << need) - 1); - } - - match = match_baseline + add; - - pt = &literal_decode.table[literal_state]; - need = pt->basebits; - literal_baseline = pt->baseline; - literal_bits = pt->bits; - literal_base = pt->base; - - add = 0; - if (need > 0) - { - if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) - return 0; - bits -= need; - add = (val >> bits) & ((1U << need) - 1); - } - - literal = literal_baseline + add; - - /* See the comment in elf_zstd_make_offset_baseline_fse. */ - if (offset_basebits > 1) - { - repeated_offset3 = repeated_offset2; - repeated_offset2 = repeated_offset1; - repeated_offset1 = offset; - } - else - { - if (unlikely (literal == 0)) - ++offset; - switch (offset) - { - case 1: - offset = repeated_offset1; - break; - case 2: - offset = repeated_offset2; - repeated_offset2 = repeated_offset1; - repeated_offset1 = offset; - break; - case 3: - offset = repeated_offset3; - repeated_offset3 = repeated_offset2; - repeated_offset2 = repeated_offset1; - repeated_offset1 = offset; - break; - case 4: - offset = repeated_offset1 - 1; - repeated_offset3 = repeated_offset2; - repeated_offset2 = repeated_offset1; - repeated_offset1 = offset; - break; - } - } - - ++seq; - if (seq < seq_count) - { - uint32_t v; - - /* Update the three states. */ - - if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) - return 0; - - need = literal_bits; - bits -= need; - v = (val >> bits) & (((uint32_t)1 << need) - 1); - - literal_state = literal_base + v; - - if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) - return 0; - - need = match_bits; - bits -= need; - v = (val >> bits) & (((uint32_t)1 << need) - 1); - - match_state = match_base + v; - - if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) - return 0; - - need = offset_bits; - bits -= need; - v = (val >> bits) & (((uint32_t)1 << need) - 1); - - offset_state = offset_base + v; - } - - /* The next sequence is now in LITERAL, OFFSET, MATCH. */ - - /* Copy LITERAL bytes from the literals. */ - - if (unlikely ((size_t)(poutend - pout) < literal)) - { - elf_uncompress_failed (); - return 0; - } - - if (unlikely (literal_count < literal)) - { - elf_uncompress_failed (); - return 0; - } - - literal_count -= literal; - - /* Often LITERAL is small, so handle small cases quickly. */ - switch (literal) - { - case 8: - *pout++ = *plit++; - /* FALLTHROUGH */ - case 7: - *pout++ = *plit++; - /* FALLTHROUGH */ - case 6: - *pout++ = *plit++; - /* FALLTHROUGH */ - case 5: - *pout++ = *plit++; - /* FALLTHROUGH */ - case 4: - *pout++ = *plit++; - /* FALLTHROUGH */ - case 3: - *pout++ = *plit++; - /* FALLTHROUGH */ - case 2: - *pout++ = *plit++; - /* FALLTHROUGH */ - case 1: - *pout++ = *plit++; - break; - - case 0: - break; - - default: - if (unlikely ((size_t)(plit - pout) < literal)) - { - uint32_t move; - - move = plit - pout; - while (literal > move) - { - memcpy (pout, plit, move); - pout += move; - plit += move; - literal -= move; - } - } - - memcpy (pout, plit, literal); - pout += literal; - plit += literal; - } - - if (match > 0) - { - /* Copy MATCH bytes from the decoded output at OFFSET. */ - - if (unlikely ((size_t)(poutend - pout) < match)) - { - elf_uncompress_failed (); - return 0; - } - - if (unlikely ((size_t)(pout - poutstart) < offset)) - { - elf_uncompress_failed (); - return 0; - } - - if (offset >= match) - { - memcpy (pout, pout - offset, match); - pout += match; - } - else - { - while (match > 0) - { - uint32_t copy; - - copy = match < offset ? match : offset; - memcpy (pout, pout - offset, copy); - match -= copy; - pout += copy; - } - } - } - - if (unlikely (seq >= seq_count)) - { - /* Copy remaining literals. */ - if (literal_count > 0 && plit != pout) - { - if (unlikely ((size_t)(poutend - pout) - < literal_count)) - { - elf_uncompress_failed (); - return 0; - } - - if ((size_t)(plit - pout) < literal_count) - { - uint32_t move; - - move = plit - pout; - while (literal_count > move) - { - memcpy (pout, plit, move); - pout += move; - plit += move; - literal_count -= move; - } - } - - memcpy (pout, plit, literal_count); - } - - pout += literal_count; - - break; - } - } - - pin = pblockend; - } - break; - - case 3: - default: - elf_uncompress_failed (); - return 0; - } - } - - if (has_checksum) - { - if (unlikely (pin + 4 > pinend)) - { - elf_uncompress_failed (); - return 0; - } - - /* We don't currently verify the checksum. Currently running GNU ld with - --compress-debug-sections=zstd does not seem to generate a - checksum. */ - - pin += 4; - } - - if (pin != pinend) - { - elf_uncompress_failed (); - return 0; - } - - return 1; -} - -#define ZDEBUG_TABLE_SIZE \ - (ZLIB_TABLE_SIZE > ZSTD_TABLE_SIZE ? ZLIB_TABLE_SIZE : ZSTD_TABLE_SIZE) - -/* Uncompress the old compressed debug format, the one emitted by - --compress-debug-sections=zlib-gnu. The compressed data is in - COMPRESSED / COMPRESSED_SIZE, and the function writes to - *UNCOMPRESSED / *UNCOMPRESSED_SIZE. ZDEBUG_TABLE is work space to - hold Huffman tables. Returns 0 on error, 1 on successful - decompression or if something goes wrong. In general we try to - carry on, by returning 1, even if we can't decompress. */ - -static int -elf_uncompress_zdebug (struct backtrace_state *state, - const unsigned char *compressed, size_t compressed_size, - uint16_t *zdebug_table, - backtrace_error_callback error_callback, void *data, - unsigned char **uncompressed, size_t *uncompressed_size) -{ - size_t sz; - size_t i; - unsigned char *po; - - *uncompressed = NULL; - *uncompressed_size = 0; - - /* The format starts with the four bytes ZLIB, followed by the 8 - byte length of the uncompressed data in big-endian order, - followed by a zlib stream. */ - - if (compressed_size < 12 || memcmp (compressed, "ZLIB", 4) != 0) - return 1; - - sz = 0; - for (i = 0; i < 8; i++) - sz = (sz << 8) | compressed[i + 4]; - - if (*uncompressed != NULL && *uncompressed_size >= sz) - po = *uncompressed; - else - { - po = (unsigned char *) backtrace_alloc (state, sz, error_callback, data); - if (po == NULL) - return 0; - } - - if (!elf_zlib_inflate_and_verify (compressed + 12, compressed_size - 12, - zdebug_table, po, sz)) - return 1; - - *uncompressed = po; - *uncompressed_size = sz; - - return 1; -} - -/* Uncompress the new compressed debug format, the official standard - ELF approach emitted by --compress-debug-sections=zlib-gabi. The - compressed data is in COMPRESSED / COMPRESSED_SIZE, and the - function writes to *UNCOMPRESSED / *UNCOMPRESSED_SIZE. - ZDEBUG_TABLE is work space as for elf_uncompress_zdebug. Returns 0 - on error, 1 on successful decompression or if something goes wrong. - In general we try to carry on, by returning 1, even if we can't - decompress. */ - -static int -elf_uncompress_chdr (struct backtrace_state *state, - const unsigned char *compressed, size_t compressed_size, - uint16_t *zdebug_table, - backtrace_error_callback error_callback, void *data, - unsigned char **uncompressed, size_t *uncompressed_size) -{ - b_elf_chdr chdr; - char *alc; - size_t alc_len; - unsigned char *po; - - *uncompressed = NULL; - *uncompressed_size = 0; - - /* The format starts with an ELF compression header. */ - if (compressed_size < sizeof (b_elf_chdr)) - return 1; - - /* The lld linker can misalign a compressed section, so we can't safely read - the fields directly as we can for other ELF sections. See - https://github.com/ianlancetaylor/libbacktrace/pull/120. */ - memcpy (&chdr, compressed, sizeof (b_elf_chdr)); - - alc = NULL; - alc_len = 0; - if (*uncompressed != NULL && *uncompressed_size >= chdr.ch_size) - po = *uncompressed; - else - { - alc_len = chdr.ch_size; - alc = (char*)backtrace_alloc (state, alc_len, error_callback, data); - if (alc == NULL) - return 0; - po = (unsigned char *) alc; - } - - switch (chdr.ch_type) - { - case ELFCOMPRESS_ZLIB: - if (!elf_zlib_inflate_and_verify (compressed + sizeof (b_elf_chdr), - compressed_size - sizeof (b_elf_chdr), - zdebug_table, po, chdr.ch_size)) - goto skip; - break; - - case ELFCOMPRESS_ZSTD: - if (!elf_zstd_decompress (compressed + sizeof (b_elf_chdr), - compressed_size - sizeof (b_elf_chdr), - (unsigned char *)zdebug_table, po, - chdr.ch_size)) - goto skip; - break; - - default: - /* Unsupported compression algorithm. */ - goto skip; - } - - *uncompressed = po; - *uncompressed_size = chdr.ch_size; - - return 1; - - skip: - if (alc != NULL && alc_len > 0) - backtrace_free (state, alc, alc_len, error_callback, data); - return 1; -} - -/* This function is a hook for testing the zlib support. It is only - used by tests. */ - -int -backtrace_uncompress_zdebug (struct backtrace_state *state, - const unsigned char *compressed, - size_t compressed_size, - backtrace_error_callback error_callback, - void *data, unsigned char **uncompressed, - size_t *uncompressed_size) -{ - uint16_t *zdebug_table; - int ret; - - zdebug_table = ((uint16_t *) backtrace_alloc (state, ZDEBUG_TABLE_SIZE, - error_callback, data)); - if (zdebug_table == NULL) - return 0; - ret = elf_uncompress_zdebug (state, compressed, compressed_size, - zdebug_table, error_callback, data, - uncompressed, uncompressed_size); - backtrace_free (state, zdebug_table, ZDEBUG_TABLE_SIZE, - error_callback, data); - return ret; -} - -/* This function is a hook for testing the zstd support. It is only used by - tests. */ - -int -backtrace_uncompress_zstd (struct backtrace_state *state, - const unsigned char *compressed, - size_t compressed_size, - backtrace_error_callback error_callback, - void *data, unsigned char *uncompressed, - size_t uncompressed_size) -{ - unsigned char *zdebug_table; - int ret; - - zdebug_table = ((unsigned char *) backtrace_alloc (state, ZDEBUG_TABLE_SIZE, - error_callback, data)); - if (zdebug_table == NULL) - return 0; - ret = elf_zstd_decompress (compressed, compressed_size, - zdebug_table, uncompressed, uncompressed_size); - backtrace_free (state, zdebug_table, ZDEBUG_TABLE_SIZE, - error_callback, data); - return ret; -} - -/* Number of LZMA states. */ -#define LZMA_STATES (12) - -/* Number of LZMA position states. The pb value of the property byte - is the number of bits to include in these states, and the maximum - value of pb is 4. */ -#define LZMA_POS_STATES (16) - -/* Number of LZMA distance states. These are used match distances - with a short match length: up to 4 bytes. */ -#define LZMA_DIST_STATES (4) - -/* Number of LZMA distance slots. LZMA uses six bits to encode larger - match lengths, so 1 << 6 possible probabilities. */ -#define LZMA_DIST_SLOTS (64) - -/* LZMA distances 0 to 3 are encoded directly, larger values use a - probability model. */ -#define LZMA_DIST_MODEL_START (4) - -/* The LZMA probability model ends at 14. */ -#define LZMA_DIST_MODEL_END (14) - -/* LZMA distance slots for distances less than 127. */ -#define LZMA_FULL_DISTANCES (128) - -/* LZMA uses four alignment bits. */ -#define LZMA_ALIGN_SIZE (16) - -/* LZMA match length is encoded with 4, 5, or 10 bits, some of which - are already known. */ -#define LZMA_LEN_LOW_SYMBOLS (8) -#define LZMA_LEN_MID_SYMBOLS (8) -#define LZMA_LEN_HIGH_SYMBOLS (256) - -/* LZMA literal encoding. */ -#define LZMA_LITERAL_CODERS_MAX (16) -#define LZMA_LITERAL_CODER_SIZE (0x300) - -/* LZMA is based on a large set of probabilities, each managed - independently. Each probability is an 11 bit number that we store - in a uint16_t. We use a single large array of probabilities. */ - -/* Lengths of entries in the LZMA probabilities array. The names used - here are copied from the Linux kernel implementation. */ - -#define LZMA_PROB_IS_MATCH_LEN (LZMA_STATES * LZMA_POS_STATES) -#define LZMA_PROB_IS_REP_LEN LZMA_STATES -#define LZMA_PROB_IS_REP0_LEN LZMA_STATES -#define LZMA_PROB_IS_REP1_LEN LZMA_STATES -#define LZMA_PROB_IS_REP2_LEN LZMA_STATES -#define LZMA_PROB_IS_REP0_LONG_LEN (LZMA_STATES * LZMA_POS_STATES) -#define LZMA_PROB_DIST_SLOT_LEN (LZMA_DIST_STATES * LZMA_DIST_SLOTS) -#define LZMA_PROB_DIST_SPECIAL_LEN (LZMA_FULL_DISTANCES - LZMA_DIST_MODEL_END) -#define LZMA_PROB_DIST_ALIGN_LEN LZMA_ALIGN_SIZE -#define LZMA_PROB_MATCH_LEN_CHOICE_LEN 1 -#define LZMA_PROB_MATCH_LEN_CHOICE2_LEN 1 -#define LZMA_PROB_MATCH_LEN_LOW_LEN (LZMA_POS_STATES * LZMA_LEN_LOW_SYMBOLS) -#define LZMA_PROB_MATCH_LEN_MID_LEN (LZMA_POS_STATES * LZMA_LEN_MID_SYMBOLS) -#define LZMA_PROB_MATCH_LEN_HIGH_LEN LZMA_LEN_HIGH_SYMBOLS -#define LZMA_PROB_REP_LEN_CHOICE_LEN 1 -#define LZMA_PROB_REP_LEN_CHOICE2_LEN 1 -#define LZMA_PROB_REP_LEN_LOW_LEN (LZMA_POS_STATES * LZMA_LEN_LOW_SYMBOLS) -#define LZMA_PROB_REP_LEN_MID_LEN (LZMA_POS_STATES * LZMA_LEN_MID_SYMBOLS) -#define LZMA_PROB_REP_LEN_HIGH_LEN LZMA_LEN_HIGH_SYMBOLS -#define LZMA_PROB_LITERAL_LEN \ - (LZMA_LITERAL_CODERS_MAX * LZMA_LITERAL_CODER_SIZE) - -/* Offsets into the LZMA probabilities array. This is mechanically - generated from the above lengths. */ - -#define LZMA_PROB_IS_MATCH_OFFSET 0 -#define LZMA_PROB_IS_REP_OFFSET \ - (LZMA_PROB_IS_MATCH_OFFSET + LZMA_PROB_IS_MATCH_LEN) -#define LZMA_PROB_IS_REP0_OFFSET \ - (LZMA_PROB_IS_REP_OFFSET + LZMA_PROB_IS_REP_LEN) -#define LZMA_PROB_IS_REP1_OFFSET \ - (LZMA_PROB_IS_REP0_OFFSET + LZMA_PROB_IS_REP0_LEN) -#define LZMA_PROB_IS_REP2_OFFSET \ - (LZMA_PROB_IS_REP1_OFFSET + LZMA_PROB_IS_REP1_LEN) -#define LZMA_PROB_IS_REP0_LONG_OFFSET \ - (LZMA_PROB_IS_REP2_OFFSET + LZMA_PROB_IS_REP2_LEN) -#define LZMA_PROB_DIST_SLOT_OFFSET \ - (LZMA_PROB_IS_REP0_LONG_OFFSET + LZMA_PROB_IS_REP0_LONG_LEN) -#define LZMA_PROB_DIST_SPECIAL_OFFSET \ - (LZMA_PROB_DIST_SLOT_OFFSET + LZMA_PROB_DIST_SLOT_LEN) -#define LZMA_PROB_DIST_ALIGN_OFFSET \ - (LZMA_PROB_DIST_SPECIAL_OFFSET + LZMA_PROB_DIST_SPECIAL_LEN) -#define LZMA_PROB_MATCH_LEN_CHOICE_OFFSET \ - (LZMA_PROB_DIST_ALIGN_OFFSET + LZMA_PROB_DIST_ALIGN_LEN) -#define LZMA_PROB_MATCH_LEN_CHOICE2_OFFSET \ - (LZMA_PROB_MATCH_LEN_CHOICE_OFFSET + LZMA_PROB_MATCH_LEN_CHOICE_LEN) -#define LZMA_PROB_MATCH_LEN_LOW_OFFSET \ - (LZMA_PROB_MATCH_LEN_CHOICE2_OFFSET + LZMA_PROB_MATCH_LEN_CHOICE2_LEN) -#define LZMA_PROB_MATCH_LEN_MID_OFFSET \ - (LZMA_PROB_MATCH_LEN_LOW_OFFSET + LZMA_PROB_MATCH_LEN_LOW_LEN) -#define LZMA_PROB_MATCH_LEN_HIGH_OFFSET \ - (LZMA_PROB_MATCH_LEN_MID_OFFSET + LZMA_PROB_MATCH_LEN_MID_LEN) -#define LZMA_PROB_REP_LEN_CHOICE_OFFSET \ - (LZMA_PROB_MATCH_LEN_HIGH_OFFSET + LZMA_PROB_MATCH_LEN_HIGH_LEN) -#define LZMA_PROB_REP_LEN_CHOICE2_OFFSET \ - (LZMA_PROB_REP_LEN_CHOICE_OFFSET + LZMA_PROB_REP_LEN_CHOICE_LEN) -#define LZMA_PROB_REP_LEN_LOW_OFFSET \ - (LZMA_PROB_REP_LEN_CHOICE2_OFFSET + LZMA_PROB_REP_LEN_CHOICE2_LEN) -#define LZMA_PROB_REP_LEN_MID_OFFSET \ - (LZMA_PROB_REP_LEN_LOW_OFFSET + LZMA_PROB_REP_LEN_LOW_LEN) -#define LZMA_PROB_REP_LEN_HIGH_OFFSET \ - (LZMA_PROB_REP_LEN_MID_OFFSET + LZMA_PROB_REP_LEN_MID_LEN) -#define LZMA_PROB_LITERAL_OFFSET \ - (LZMA_PROB_REP_LEN_HIGH_OFFSET + LZMA_PROB_REP_LEN_HIGH_LEN) - -#define LZMA_PROB_TOTAL_COUNT \ - (LZMA_PROB_LITERAL_OFFSET + LZMA_PROB_LITERAL_LEN) - -/* Check that the number of LZMA probabilities is the same as the - Linux kernel implementation. */ - -#if LZMA_PROB_TOTAL_COUNT != 1846 + (1 << 4) * 0x300 - #error Wrong number of LZMA probabilities -#endif - -/* Expressions for the offset in the LZMA probabilities array of a - specific probability. */ - -#define LZMA_IS_MATCH(state, pos) \ - (LZMA_PROB_IS_MATCH_OFFSET + (state) * LZMA_POS_STATES + (pos)) -#define LZMA_IS_REP(state) \ - (LZMA_PROB_IS_REP_OFFSET + (state)) -#define LZMA_IS_REP0(state) \ - (LZMA_PROB_IS_REP0_OFFSET + (state)) -#define LZMA_IS_REP1(state) \ - (LZMA_PROB_IS_REP1_OFFSET + (state)) -#define LZMA_IS_REP2(state) \ - (LZMA_PROB_IS_REP2_OFFSET + (state)) -#define LZMA_IS_REP0_LONG(state, pos) \ - (LZMA_PROB_IS_REP0_LONG_OFFSET + (state) * LZMA_POS_STATES + (pos)) -#define LZMA_DIST_SLOT(dist, slot) \ - (LZMA_PROB_DIST_SLOT_OFFSET + (dist) * LZMA_DIST_SLOTS + (slot)) -#define LZMA_DIST_SPECIAL(dist) \ - (LZMA_PROB_DIST_SPECIAL_OFFSET + (dist)) -#define LZMA_DIST_ALIGN(dist) \ - (LZMA_PROB_DIST_ALIGN_OFFSET + (dist)) -#define LZMA_MATCH_LEN_CHOICE \ - LZMA_PROB_MATCH_LEN_CHOICE_OFFSET -#define LZMA_MATCH_LEN_CHOICE2 \ - LZMA_PROB_MATCH_LEN_CHOICE2_OFFSET -#define LZMA_MATCH_LEN_LOW(pos, sym) \ - (LZMA_PROB_MATCH_LEN_LOW_OFFSET + (pos) * LZMA_LEN_LOW_SYMBOLS + (sym)) -#define LZMA_MATCH_LEN_MID(pos, sym) \ - (LZMA_PROB_MATCH_LEN_MID_OFFSET + (pos) * LZMA_LEN_MID_SYMBOLS + (sym)) -#define LZMA_MATCH_LEN_HIGH(sym) \ - (LZMA_PROB_MATCH_LEN_HIGH_OFFSET + (sym)) -#define LZMA_REP_LEN_CHOICE \ - LZMA_PROB_REP_LEN_CHOICE_OFFSET -#define LZMA_REP_LEN_CHOICE2 \ - LZMA_PROB_REP_LEN_CHOICE2_OFFSET -#define LZMA_REP_LEN_LOW(pos, sym) \ - (LZMA_PROB_REP_LEN_LOW_OFFSET + (pos) * LZMA_LEN_LOW_SYMBOLS + (sym)) -#define LZMA_REP_LEN_MID(pos, sym) \ - (LZMA_PROB_REP_LEN_MID_OFFSET + (pos) * LZMA_LEN_MID_SYMBOLS + (sym)) -#define LZMA_REP_LEN_HIGH(sym) \ - (LZMA_PROB_REP_LEN_HIGH_OFFSET + (sym)) -#define LZMA_LITERAL(code, size) \ - (LZMA_PROB_LITERAL_OFFSET + (code) * LZMA_LITERAL_CODER_SIZE + (size)) - -/* Read an LZMA varint from BUF, reading and updating *POFFSET, - setting *VAL. Returns 0 on error, 1 on success. */ - -static int -elf_lzma_varint (const unsigned char *compressed, size_t compressed_size, - size_t *poffset, uint64_t *val) -{ - size_t off; - int i; - uint64_t v; - unsigned char b; - - off = *poffset; - i = 0; - v = 0; - while (1) - { - if (unlikely (off >= compressed_size)) - { - elf_uncompress_failed (); - return 0; - } - b = compressed[off]; - v |= (b & 0x7f) << (i * 7); - ++off; - if ((b & 0x80) == 0) - { - *poffset = off; - *val = v; - return 1; - } - ++i; - if (unlikely (i >= 9)) - { - elf_uncompress_failed (); - return 0; - } - } -} - -/* Normalize the LZMA range decoder, pulling in an extra input byte if - needed. */ - -static void -elf_lzma_range_normalize (const unsigned char *compressed, - size_t compressed_size, size_t *poffset, - uint32_t *prange, uint32_t *pcode) -{ - if (*prange < (1U << 24)) - { - if (unlikely (*poffset >= compressed_size)) - { - /* We assume this will be caught elsewhere. */ - elf_uncompress_failed (); - return; - } - *prange <<= 8; - *pcode <<= 8; - *pcode += compressed[*poffset]; - ++*poffset; - } -} - -/* Read and return a single bit from the LZMA stream, reading and - updating *PROB. Each bit comes from the range coder. */ - -static int -elf_lzma_bit (const unsigned char *compressed, size_t compressed_size, - uint16_t *prob, size_t *poffset, uint32_t *prange, - uint32_t *pcode) -{ - uint32_t bound; - - elf_lzma_range_normalize (compressed, compressed_size, poffset, - prange, pcode); - bound = (*prange >> 11) * (uint32_t) *prob; - if (*pcode < bound) - { - *prange = bound; - *prob += ((1U << 11) - *prob) >> 5; - return 0; - } - else - { - *prange -= bound; - *pcode -= bound; - *prob -= *prob >> 5; - return 1; - } -} - -/* Read an integer of size BITS from the LZMA stream, most significant - bit first. The bits are predicted using PROBS. */ - -static uint32_t -elf_lzma_integer (const unsigned char *compressed, size_t compressed_size, - uint16_t *probs, uint32_t bits, size_t *poffset, - uint32_t *prange, uint32_t *pcode) -{ - uint32_t sym; - uint32_t i; - - sym = 1; - for (i = 0; i < bits; i++) - { - int bit; - - bit = elf_lzma_bit (compressed, compressed_size, probs + sym, poffset, - prange, pcode); - sym <<= 1; - sym += bit; - } - return sym - (1 << bits); -} - -/* Read an integer of size BITS from the LZMA stream, least - significant bit first. The bits are predicted using PROBS. */ - -static uint32_t -elf_lzma_reverse_integer (const unsigned char *compressed, - size_t compressed_size, uint16_t *probs, - uint32_t bits, size_t *poffset, uint32_t *prange, - uint32_t *pcode) -{ - uint32_t sym; - uint32_t val; - uint32_t i; - - sym = 1; - val = 0; - for (i = 0; i < bits; i++) - { - int bit; - - bit = elf_lzma_bit (compressed, compressed_size, probs + sym, poffset, - prange, pcode); - sym <<= 1; - sym += bit; - val += bit << i; - } - return val; -} - -/* Read a length from the LZMA stream. IS_REP picks either LZMA_MATCH - or LZMA_REP probabilities. */ - -static uint32_t -elf_lzma_len (const unsigned char *compressed, size_t compressed_size, - uint16_t *probs, int is_rep, unsigned int pos_state, - size_t *poffset, uint32_t *prange, uint32_t *pcode) -{ - uint16_t *probs_choice; - uint16_t *probs_sym; - uint32_t bits; - uint32_t len; - - probs_choice = probs + (is_rep - ? LZMA_REP_LEN_CHOICE - : LZMA_MATCH_LEN_CHOICE); - if (elf_lzma_bit (compressed, compressed_size, probs_choice, poffset, - prange, pcode)) - { - probs_choice = probs + (is_rep - ? LZMA_REP_LEN_CHOICE2 - : LZMA_MATCH_LEN_CHOICE2); - if (elf_lzma_bit (compressed, compressed_size, probs_choice, - poffset, prange, pcode)) - { - probs_sym = probs + (is_rep - ? LZMA_REP_LEN_HIGH (0) - : LZMA_MATCH_LEN_HIGH (0)); - bits = 8; - len = 2 + 8 + 8; - } - else - { - probs_sym = probs + (is_rep - ? LZMA_REP_LEN_MID (pos_state, 0) - : LZMA_MATCH_LEN_MID (pos_state, 0)); - bits = 3; - len = 2 + 8; - } - } - else - { - probs_sym = probs + (is_rep - ? LZMA_REP_LEN_LOW (pos_state, 0) - : LZMA_MATCH_LEN_LOW (pos_state, 0)); - bits = 3; - len = 2; - } - - len += elf_lzma_integer (compressed, compressed_size, probs_sym, bits, - poffset, prange, pcode); - return len; -} - -/* Uncompress one LZMA block from a minidebug file. The compressed - data is at COMPRESSED + *POFFSET. Update *POFFSET. Store the data - into the memory at UNCOMPRESSED, size UNCOMPRESSED_SIZE. CHECK is - the stream flag from the xz header. Return 1 on successful - decompression. */ - -static int -elf_uncompress_lzma_block (const unsigned char *compressed, - size_t compressed_size, unsigned char check, - uint16_t *probs, unsigned char *uncompressed, - size_t uncompressed_size, size_t *poffset) -{ - size_t off; - size_t block_header_offset; - size_t block_header_size; - unsigned char block_flags; - uint64_t header_compressed_size; - uint64_t header_uncompressed_size; - unsigned char lzma2_properties; - size_t crc_offset; - uint32_t computed_crc; - uint32_t stream_crc; - size_t uncompressed_offset; - size_t dict_start_offset; - unsigned int lc; - unsigned int lp; - unsigned int pb; - uint32_t range; - uint32_t code; - uint32_t lstate; - uint32_t dist[4]; - - off = *poffset; - block_header_offset = off; - - /* Block header size is a single byte. */ - if (unlikely (off >= compressed_size)) - { - elf_uncompress_failed (); - return 0; - } - block_header_size = (compressed[off] + 1) * 4; - if (unlikely (off + block_header_size > compressed_size)) - { - elf_uncompress_failed (); - return 0; - } - - /* Block flags. */ - block_flags = compressed[off + 1]; - if (unlikely ((block_flags & 0x3c) != 0)) - { - elf_uncompress_failed (); - return 0; - } - - off += 2; - - /* Optional compressed size. */ - header_compressed_size = 0; - if ((block_flags & 0x40) != 0) - { - *poffset = off; - if (!elf_lzma_varint (compressed, compressed_size, poffset, - &header_compressed_size)) - return 0; - off = *poffset; - } - - /* Optional uncompressed size. */ - header_uncompressed_size = 0; - if ((block_flags & 0x80) != 0) - { - *poffset = off; - if (!elf_lzma_varint (compressed, compressed_size, poffset, - &header_uncompressed_size)) - return 0; - off = *poffset; - } - - /* The recipe for creating a minidebug file is to run the xz program - with no arguments, so we expect exactly one filter: lzma2. */ - - if (unlikely ((block_flags & 0x3) != 0)) - { - elf_uncompress_failed (); - return 0; - } - - if (unlikely (off + 2 >= block_header_offset + block_header_size)) - { - elf_uncompress_failed (); - return 0; - } - - /* The filter ID for LZMA2 is 0x21. */ - if (unlikely (compressed[off] != 0x21)) - { - elf_uncompress_failed (); - return 0; - } - ++off; - - /* The size of the filter properties for LZMA2 is 1. */ - if (unlikely (compressed[off] != 1)) - { - elf_uncompress_failed (); - return 0; - } - ++off; - - lzma2_properties = compressed[off]; - ++off; - - if (unlikely (lzma2_properties > 40)) - { - elf_uncompress_failed (); - return 0; - } - - /* The properties describe the dictionary size, but we don't care - what that is. */ - - /* Skip to just before CRC, verifying zero bytes in between. */ - crc_offset = block_header_offset + block_header_size - 4; - if (unlikely (crc_offset + 4 > compressed_size)) - { - elf_uncompress_failed (); - return 0; - } - for (; off < crc_offset; off++) - { - if (compressed[off] != 0) - { - elf_uncompress_failed (); - return 0; - } - } - - /* Block header CRC. */ - computed_crc = elf_crc32 (0, compressed + block_header_offset, - block_header_size - 4); - stream_crc = ((uint32_t)compressed[off] - | ((uint32_t)compressed[off + 1] << 8) - | ((uint32_t)compressed[off + 2] << 16) - | ((uint32_t)compressed[off + 3] << 24)); - if (unlikely (computed_crc != stream_crc)) - { - elf_uncompress_failed (); - return 0; - } - off += 4; - - /* Read a sequence of LZMA2 packets. */ - - uncompressed_offset = 0; - dict_start_offset = 0; - lc = 0; - lp = 0; - pb = 0; - lstate = 0; - while (off < compressed_size) - { - unsigned char control; - - range = 0xffffffff; - code = 0; - - control = compressed[off]; - ++off; - if (unlikely (control == 0)) - { - /* End of packets. */ - break; - } - - if (control == 1 || control >= 0xe0) - { - /* Reset dictionary to empty. */ - dict_start_offset = uncompressed_offset; - } - - if (control < 0x80) - { - size_t chunk_size; - - /* The only valid values here are 1 or 2. A 1 means to - reset the dictionary (done above). Then we see an - uncompressed chunk. */ - - if (unlikely (control > 2)) - { - elf_uncompress_failed (); - return 0; - } - - /* An uncompressed chunk is a two byte size followed by - data. */ - - if (unlikely (off + 2 > compressed_size)) - { - elf_uncompress_failed (); - return 0; - } - - chunk_size = compressed[off] << 8; - chunk_size += compressed[off + 1]; - ++chunk_size; - - off += 2; - - if (unlikely (off + chunk_size > compressed_size)) - { - elf_uncompress_failed (); - return 0; - } - if (unlikely (uncompressed_offset + chunk_size > uncompressed_size)) - { - elf_uncompress_failed (); - return 0; - } - - memcpy (uncompressed + uncompressed_offset, compressed + off, - chunk_size); - uncompressed_offset += chunk_size; - off += chunk_size; - } - else - { - size_t uncompressed_chunk_start; - size_t uncompressed_chunk_size; - size_t compressed_chunk_size; - size_t limit; - - /* An LZMA chunk. This starts with an uncompressed size and - a compressed size. */ - - if (unlikely (off + 4 >= compressed_size)) - { - elf_uncompress_failed (); - return 0; - } - - uncompressed_chunk_start = uncompressed_offset; - - uncompressed_chunk_size = (control & 0x1f) << 16; - uncompressed_chunk_size += compressed[off] << 8; - uncompressed_chunk_size += compressed[off + 1]; - ++uncompressed_chunk_size; - - compressed_chunk_size = compressed[off + 2] << 8; - compressed_chunk_size += compressed[off + 3]; - ++compressed_chunk_size; - - off += 4; - - /* Bit 7 (0x80) is set. - Bits 6 and 5 (0x40 and 0x20) are as follows: - 0: don't reset anything - 1: reset state - 2: reset state, read properties - 3: reset state, read properties, reset dictionary (done above) */ - - if (control >= 0xc0) - { - unsigned char props; - - /* Bit 6 is set, read properties. */ - - if (unlikely (off >= compressed_size)) - { - elf_uncompress_failed (); - return 0; - } - props = compressed[off]; - ++off; - if (unlikely (props > (4 * 5 + 4) * 9 + 8)) - { - elf_uncompress_failed (); - return 0; - } - pb = 0; - while (props >= 9 * 5) - { - props -= 9 * 5; - ++pb; - } - lp = 0; - while (props > 9) - { - props -= 9; - ++lp; - } - lc = props; - if (unlikely (lc + lp > 4)) - { - elf_uncompress_failed (); - return 0; - } - } - - if (control >= 0xa0) - { - size_t i; - - /* Bit 5 or 6 is set, reset LZMA state. */ - - lstate = 0; - memset (&dist, 0, sizeof dist); - for (i = 0; i < LZMA_PROB_TOTAL_COUNT; i++) - probs[i] = 1 << 10; - range = 0xffffffff; - code = 0; - } - - /* Read the range code. */ - - if (unlikely (off + 5 > compressed_size)) - { - elf_uncompress_failed (); - return 0; - } - - /* The byte at compressed[off] is ignored for some - reason. */ - - code = ((compressed[off + 1] << 24) - + (compressed[off + 2] << 16) - + (compressed[off + 3] << 8) - + compressed[off + 4]); - off += 5; - - /* This is the main LZMA decode loop. */ - - limit = off + compressed_chunk_size; - *poffset = off; - while (*poffset < limit) - { - unsigned int pos_state; - - if (unlikely (uncompressed_offset - == (uncompressed_chunk_start - + uncompressed_chunk_size))) - { - /* We've decompressed all the expected bytes. */ - break; - } - - pos_state = ((uncompressed_offset - dict_start_offset) - & ((1 << pb) - 1)); - - if (elf_lzma_bit (compressed, compressed_size, - probs + LZMA_IS_MATCH (lstate, pos_state), - poffset, &range, &code)) - { - uint32_t len; - - if (elf_lzma_bit (compressed, compressed_size, - probs + LZMA_IS_REP (lstate), - poffset, &range, &code)) - { - int short_rep; - uint32_t next_dist; - - /* Repeated match. */ - - short_rep = 0; - if (elf_lzma_bit (compressed, compressed_size, - probs + LZMA_IS_REP0 (lstate), - poffset, &range, &code)) - { - if (elf_lzma_bit (compressed, compressed_size, - probs + LZMA_IS_REP1 (lstate), - poffset, &range, &code)) - { - if (elf_lzma_bit (compressed, compressed_size, - probs + LZMA_IS_REP2 (lstate), - poffset, &range, &code)) - { - next_dist = dist[3]; - dist[3] = dist[2]; - } - else - { - next_dist = dist[2]; - } - dist[2] = dist[1]; - } - else - { - next_dist = dist[1]; - } - - dist[1] = dist[0]; - dist[0] = next_dist; - } - else - { - if (!elf_lzma_bit (compressed, compressed_size, - (probs - + LZMA_IS_REP0_LONG (lstate, - pos_state)), - poffset, &range, &code)) - short_rep = 1; - } - - if (lstate < 7) - lstate = short_rep ? 9 : 8; - else - lstate = 11; - - if (short_rep) - len = 1; - else - len = elf_lzma_len (compressed, compressed_size, - probs, 1, pos_state, poffset, - &range, &code); - } - else - { - uint32_t dist_state; - uint32_t dist_slot; - uint16_t *probs_dist; - - /* Match. */ - - if (lstate < 7) - lstate = 7; - else - lstate = 10; - dist[3] = dist[2]; - dist[2] = dist[1]; - dist[1] = dist[0]; - len = elf_lzma_len (compressed, compressed_size, - probs, 0, pos_state, poffset, - &range, &code); - - if (len < 4 + 2) - dist_state = len - 2; - else - dist_state = 3; - probs_dist = probs + LZMA_DIST_SLOT (dist_state, 0); - dist_slot = elf_lzma_integer (compressed, - compressed_size, - probs_dist, 6, - poffset, &range, - &code); - if (dist_slot < LZMA_DIST_MODEL_START) - dist[0] = dist_slot; - else - { - uint32_t limit; - - limit = (dist_slot >> 1) - 1; - dist[0] = 2 + (dist_slot & 1); - if (dist_slot < LZMA_DIST_MODEL_END) - { - dist[0] <<= limit; - probs_dist = (probs - + LZMA_DIST_SPECIAL(dist[0] - - dist_slot - - 1)); - dist[0] += - elf_lzma_reverse_integer (compressed, - compressed_size, - probs_dist, - limit, poffset, - &range, &code); - } - else - { - uint32_t dist0; - uint32_t i; - - dist0 = dist[0]; - for (i = 0; i < limit - 4; i++) - { - uint32_t mask; - - elf_lzma_range_normalize (compressed, - compressed_size, - poffset, - &range, &code); - range >>= 1; - code -= range; - mask = -(code >> 31); - code += range & mask; - dist0 <<= 1; - dist0 += mask + 1; - } - dist0 <<= 4; - probs_dist = probs + LZMA_DIST_ALIGN (0); - dist0 += - elf_lzma_reverse_integer (compressed, - compressed_size, - probs_dist, 4, - poffset, - &range, &code); - dist[0] = dist0; - } - } - } - - if (unlikely (uncompressed_offset - - dict_start_offset < dist[0] + 1)) - { - elf_uncompress_failed (); - return 0; - } - if (unlikely (uncompressed_offset + len > uncompressed_size)) - { - elf_uncompress_failed (); - return 0; - } - - if (dist[0] == 0) - { - /* A common case, meaning repeat the last - character LEN times. */ - memset (uncompressed + uncompressed_offset, - uncompressed[uncompressed_offset - 1], - len); - uncompressed_offset += len; - } - else if (dist[0] + 1 >= len) - { - memcpy (uncompressed + uncompressed_offset, - uncompressed + uncompressed_offset - dist[0] - 1, - len); - uncompressed_offset += len; - } - else - { - while (len > 0) - { - uint32_t copy; - - copy = len < dist[0] + 1 ? len : dist[0] + 1; - memcpy (uncompressed + uncompressed_offset, - (uncompressed + uncompressed_offset - - dist[0] - 1), - copy); - len -= copy; - uncompressed_offset += copy; - } - } - } - else - { - unsigned char prev; - unsigned char low; - size_t high; - uint16_t *lit_probs; - unsigned int sym; - - /* Literal value. */ - - if (uncompressed_offset > 0) - prev = uncompressed[uncompressed_offset - 1]; - else - prev = 0; - low = prev >> (8 - lc); - high = (((uncompressed_offset - dict_start_offset) - & ((1 << lp) - 1)) - << lc); - lit_probs = probs + LZMA_LITERAL (low + high, 0); - if (lstate < 7) - sym = elf_lzma_integer (compressed, compressed_size, - lit_probs, 8, poffset, &range, - &code); - else - { - unsigned int match; - unsigned int bit; - unsigned int match_bit; - unsigned int idx; - - sym = 1; - if (uncompressed_offset >= dist[0] + 1) - match = uncompressed[uncompressed_offset - dist[0] - 1]; - else - match = 0; - match <<= 1; - bit = 0x100; - do - { - match_bit = match & bit; - match <<= 1; - idx = bit + match_bit + sym; - sym <<= 1; - if (elf_lzma_bit (compressed, compressed_size, - lit_probs + idx, poffset, - &range, &code)) - { - ++sym; - bit &= match_bit; - } - else - { - bit &= ~ match_bit; - } - } - while (sym < 0x100); - } - - if (unlikely (uncompressed_offset >= uncompressed_size)) - { - elf_uncompress_failed (); - return 0; - } - - uncompressed[uncompressed_offset] = (unsigned char) sym; - ++uncompressed_offset; - if (lstate <= 3) - lstate = 0; - else if (lstate <= 9) - lstate -= 3; - else - lstate -= 6; - } - } - - elf_lzma_range_normalize (compressed, compressed_size, poffset, - &range, &code); - - off = *poffset; - } - } - - /* We have reached the end of the block. Pad to four byte - boundary. */ - off = (off + 3) &~ (size_t) 3; - if (unlikely (off > compressed_size)) - { - elf_uncompress_failed (); - return 0; - } - - switch (check) - { - case 0: - /* No check. */ - break; - - case 1: - /* CRC32 */ - if (unlikely (off + 4 > compressed_size)) - { - elf_uncompress_failed (); - return 0; - } - computed_crc = elf_crc32 (0, uncompressed, uncompressed_offset); - stream_crc = ((uint32_t)compressed[off] - | ((uint32_t)compressed[off + 1] << 8) - | ((uint32_t)compressed[off + 2] << 16) - | ((uint32_t)compressed[off + 3] << 24)); - if (computed_crc != stream_crc) - { - elf_uncompress_failed (); - return 0; - } - off += 4; - break; - - case 4: - /* CRC64. We don't bother computing a CRC64 checksum. */ - if (unlikely (off + 8 > compressed_size)) - { - elf_uncompress_failed (); - return 0; - } - off += 8; - break; - - case 10: - /* SHA. We don't bother computing a SHA checksum. */ - if (unlikely (off + 32 > compressed_size)) - { - elf_uncompress_failed (); - return 0; - } - off += 32; - break; - - default: - elf_uncompress_failed (); - return 0; - } - - *poffset = off; - - return 1; -} - -/* Uncompress LZMA data found in a minidebug file. The minidebug - format is described at - https://sourceware.org/gdb/current/onlinedocs/gdb/MiniDebugInfo.html. - Returns 0 on error, 1 on successful decompression. For this - function we return 0 on failure to decompress, as the calling code - will carry on in that case. */ - -static int -elf_uncompress_lzma (struct backtrace_state *state, - const unsigned char *compressed, size_t compressed_size, - backtrace_error_callback error_callback, void *data, - unsigned char **uncompressed, size_t *uncompressed_size) -{ - size_t header_size; - size_t footer_size; - unsigned char check; - uint32_t computed_crc; - uint32_t stream_crc; - size_t offset; - size_t index_size; - size_t footer_offset; - size_t index_offset; - uint64_t index_compressed_size; - uint64_t index_uncompressed_size; - unsigned char *mem; - uint16_t *probs; - size_t compressed_block_size; - - /* The format starts with a stream header and ends with a stream - footer. */ - header_size = 12; - footer_size = 12; - if (unlikely (compressed_size < header_size + footer_size)) - { - elf_uncompress_failed (); - return 0; - } - - /* The stream header starts with a magic string. */ - if (unlikely (memcmp (compressed, "\375" "7zXZ\0", 6) != 0)) - { - elf_uncompress_failed (); - return 0; - } - - /* Next come stream flags. The first byte is zero, the second byte - is the check. */ - if (unlikely (compressed[6] != 0)) - { - elf_uncompress_failed (); - return 0; - } - check = compressed[7]; - if (unlikely ((check & 0xf8) != 0)) - { - elf_uncompress_failed (); - return 0; - } - - /* Next comes a CRC of the stream flags. */ - computed_crc = elf_crc32 (0, compressed + 6, 2); - stream_crc = ((uint32_t)compressed[8] - | ((uint32_t)compressed[9] << 8) - | ((uint32_t)compressed[10] << 16) - | ((uint32_t)compressed[11] << 24)); - if (unlikely (computed_crc != stream_crc)) - { - elf_uncompress_failed (); - return 0; - } - - /* Now that we've parsed the header, parse the footer, so that we - can get the uncompressed size. */ - - /* The footer ends with two magic bytes. */ - - offset = compressed_size; - if (unlikely (memcmp (compressed + offset - 2, "YZ", 2) != 0)) - { - elf_uncompress_failed (); - return 0; - } - offset -= 2; - - /* Before that are the stream flags, which should be the same as the - flags in the header. */ - if (unlikely (compressed[offset - 2] != 0 - || compressed[offset - 1] != check)) - { - elf_uncompress_failed (); - return 0; - } - offset -= 2; - - /* Before that is the size of the index field, which precedes the - footer. */ - index_size = (compressed[offset - 4] - | (compressed[offset - 3] << 8) - | (compressed[offset - 2] << 16) - | (compressed[offset - 1] << 24)); - index_size = (index_size + 1) * 4; - offset -= 4; - - /* Before that is a footer CRC. */ - computed_crc = elf_crc32 (0, compressed + offset, 6); - stream_crc = ((uint32_t)compressed[offset - 4] - | ((uint32_t)compressed[offset - 3] << 8) - | ((uint32_t)compressed[offset - 2] << 16) - | ((uint32_t)compressed[offset - 1] << 24)); - if (unlikely (computed_crc != stream_crc)) - { - elf_uncompress_failed (); - return 0; - } - offset -= 4; - - /* The index comes just before the footer. */ - if (unlikely (offset < index_size + header_size)) - { - elf_uncompress_failed (); - return 0; - } - - footer_offset = offset; - offset -= index_size; - index_offset = offset; - - /* The index starts with a zero byte. */ - if (unlikely (compressed[offset] != 0)) - { - elf_uncompress_failed (); - return 0; - } - ++offset; - - /* Next is the number of blocks. We expect zero blocks for an empty - stream, and otherwise a single block. */ - if (unlikely (compressed[offset] == 0)) - { - *uncompressed = NULL; - *uncompressed_size = 0; - return 1; - } - if (unlikely (compressed[offset] != 1)) - { - elf_uncompress_failed (); - return 0; - } - ++offset; - - /* Next is the compressed size and the uncompressed size. */ - if (!elf_lzma_varint (compressed, compressed_size, &offset, - &index_compressed_size)) - return 0; - if (!elf_lzma_varint (compressed, compressed_size, &offset, - &index_uncompressed_size)) - return 0; - - /* Pad to a four byte boundary. */ - offset = (offset + 3) &~ (size_t) 3; - - /* Next is a CRC of the index. */ - computed_crc = elf_crc32 (0, compressed + index_offset, - offset - index_offset); - stream_crc = ((uint32_t)compressed[offset] - | ((uint32_t)compressed[offset + 1] << 8) - | ((uint32_t)compressed[offset + 2] << 16) - | ((uint32_t)compressed[offset + 3] << 24)); - if (unlikely (computed_crc != stream_crc)) - { - elf_uncompress_failed (); - return 0; - } - offset += 4; - - /* We should now be back at the footer. */ - if (unlikely (offset != footer_offset)) - { - elf_uncompress_failed (); - return 0; - } - - /* Allocate space to hold the uncompressed data. If we succeed in - uncompressing the LZMA data, we never free this memory. */ - mem = (unsigned char *) backtrace_alloc (state, index_uncompressed_size, - error_callback, data); - if (unlikely (mem == NULL)) - return 0; - *uncompressed = mem; - *uncompressed_size = index_uncompressed_size; - - /* Allocate space for probabilities. */ - probs = ((uint16_t *) - backtrace_alloc (state, - LZMA_PROB_TOTAL_COUNT * sizeof (uint16_t), - error_callback, data)); - if (unlikely (probs == NULL)) - { - backtrace_free (state, mem, index_uncompressed_size, error_callback, - data); - return 0; - } - - /* Uncompress the block, which follows the header. */ - offset = 12; - if (!elf_uncompress_lzma_block (compressed, compressed_size, check, probs, - mem, index_uncompressed_size, &offset)) - { - backtrace_free (state, mem, index_uncompressed_size, error_callback, - data); - return 0; - } - - compressed_block_size = offset - 12; - if (unlikely (compressed_block_size - != ((index_compressed_size + 3) &~ (size_t) 3))) - { - elf_uncompress_failed (); - backtrace_free (state, mem, index_uncompressed_size, error_callback, - data); - return 0; - } - - offset = (offset + 3) &~ (size_t) 3; - if (unlikely (offset != index_offset)) - { - elf_uncompress_failed (); - backtrace_free (state, mem, index_uncompressed_size, error_callback, - data); - return 0; - } - - return 1; -} - -/* This function is a hook for testing the LZMA support. It is only - used by tests. */ - -int -backtrace_uncompress_lzma (struct backtrace_state *state, - const unsigned char *compressed, - size_t compressed_size, - backtrace_error_callback error_callback, - void *data, unsigned char **uncompressed, - size_t *uncompressed_size) -{ - return elf_uncompress_lzma (state, compressed, compressed_size, - error_callback, data, uncompressed, - uncompressed_size); -} - -/* Add the backtrace data for one ELF file. Returns 1 on success, - 0 on failure (in both cases descriptor is closed) or -1 if exe - is non-zero and the ELF file is ET_DYN, which tells the caller that - elf_add will need to be called on the descriptor again after - base_address is determined. */ - -static int -elf_add (struct backtrace_state *state, const char *filename, int descriptor, - const unsigned char *memory, size_t memory_size, - struct libbacktrace_base_address base_address, - struct elf_ppc64_opd_data *caller_opd, - backtrace_error_callback error_callback, void *data, - fileline *fileline_fn, int *found_sym, int *found_dwarf, - struct dwarf_data **fileline_entry, int exe, int debuginfo, - const char *with_buildid_data, uint32_t with_buildid_size) -{ - struct elf_view ehdr_view; - b_elf_ehdr ehdr; - off_t shoff; - unsigned int shnum; - unsigned int shstrndx; - struct elf_view shdrs_view; - int shdrs_view_valid; - const b_elf_shdr *shdrs; - const b_elf_shdr *shstrhdr; - size_t shstr_size; - off_t shstr_off; - struct elf_view names_view; - int names_view_valid; - const char *names; - unsigned int symtab_shndx; - unsigned int dynsym_shndx; - unsigned int i; - struct debug_section_info sections[DEBUG_MAX]; - struct debug_section_info zsections[DEBUG_MAX]; - struct elf_view symtab_view; - int symtab_view_valid; - struct elf_view strtab_view; - int strtab_view_valid; - struct elf_view buildid_view; - int buildid_view_valid; - const char *buildid_data; - uint32_t buildid_size; - struct elf_view debuglink_view; - int debuglink_view_valid; - const char *debuglink_name; - uint32_t debuglink_crc; - struct elf_view debugaltlink_view; - int debugaltlink_view_valid; - const char *debugaltlink_name; - const char *debugaltlink_buildid_data; - uint32_t debugaltlink_buildid_size; - struct elf_view gnu_debugdata_view; - int gnu_debugdata_view_valid; - size_t gnu_debugdata_size; - unsigned char *gnu_debugdata_uncompressed; - size_t gnu_debugdata_uncompressed_size; - off_t min_offset; - off_t max_offset; - off_t debug_size; - struct elf_view debug_view; - int debug_view_valid; - unsigned int using_debug_view; - uint16_t *zdebug_table; - struct elf_view split_debug_view[DEBUG_MAX]; - unsigned char split_debug_view_valid[DEBUG_MAX]; - struct elf_ppc64_opd_data opd_data, *opd; - int opd_view_valid; - struct dwarf_sections dwarf_sections; - struct dwarf_data *fileline_altlink = NULL; - - if (!debuginfo) - { - *found_sym = 0; - *found_dwarf = 0; - } - - shdrs_view_valid = 0; - names_view_valid = 0; - symtab_view_valid = 0; - strtab_view_valid = 0; - buildid_view_valid = 0; - buildid_data = NULL; - buildid_size = 0; - debuglink_view_valid = 0; - debuglink_name = NULL; - debuglink_crc = 0; - debugaltlink_view_valid = 0; - debugaltlink_name = NULL; - debugaltlink_buildid_data = NULL; - debugaltlink_buildid_size = 0; - gnu_debugdata_view_valid = 0; - gnu_debugdata_size = 0; - debug_view_valid = 0; - memset (&split_debug_view_valid[0], 0, sizeof split_debug_view_valid); - opd = NULL; - opd_view_valid = 0; - - if (!elf_get_view (state, descriptor, memory, memory_size, 0, sizeof ehdr, - error_callback, data, &ehdr_view)) - goto fail; - - memcpy (&ehdr, ehdr_view.view.data, sizeof ehdr); - - elf_release_view (state, &ehdr_view, error_callback, data); - - if (ehdr.e_ident[EI_MAG0] != ELFMAG0 - || ehdr.e_ident[EI_MAG1] != ELFMAG1 - || ehdr.e_ident[EI_MAG2] != ELFMAG2 - || ehdr.e_ident[EI_MAG3] != ELFMAG3) - { - error_callback (data, "executable file is not ELF", 0); - goto fail; - } - if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) - { - error_callback (data, "executable file is unrecognized ELF version", 0); - goto fail; - } - -#if BACKTRACE_ELF_SIZE == 32 -#define BACKTRACE_ELFCLASS ELFCLASS32 -#else -#define BACKTRACE_ELFCLASS ELFCLASS64 -#endif - - if (ehdr.e_ident[EI_CLASS] != BACKTRACE_ELFCLASS) - { - error_callback (data, "executable file is unexpected ELF class", 0); - goto fail; - } - - if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB - && ehdr.e_ident[EI_DATA] != ELFDATA2MSB) - { - error_callback (data, "executable file has unknown endianness", 0); - goto fail; - } - - /* If the executable is ET_DYN, it is either a PIE, or we are running - directly a shared library with .interp. We need to wait for - dl_iterate_phdr in that case to determine the actual base_address. */ - if (exe && ehdr.e_type == ET_DYN) - return -1; - - shoff = ehdr.e_shoff; - shnum = ehdr.e_shnum; - shstrndx = ehdr.e_shstrndx; - - if ((shnum == 0 || shstrndx == SHN_XINDEX) - && shoff != 0) - { - struct elf_view shdr_view; - const b_elf_shdr *shdr; - - if (!elf_get_view (state, descriptor, memory, memory_size, shoff, - sizeof shdr, error_callback, data, &shdr_view)) - goto fail; - - shdr = (const b_elf_shdr *) shdr_view.view.data; - - if (shnum == 0) - shnum = shdr->sh_size; - - if (shstrndx == SHN_XINDEX) - { - shstrndx = shdr->sh_link; - - /* Versions of the GNU binutils between 2.12 and 2.18 did - not handle objects with more than SHN_LORESERVE sections - correctly. All large section indexes were offset by - 0x100. There is more information at - http://sourceware.org/bugzilla/show_bug.cgi?id-5900 . - Fortunately these object files are easy to detect, as the - GNU binutils always put the section header string table - near the end of the list of sections. Thus if the - section header string table index is larger than the - number of sections, then we know we have to subtract - 0x100 to get the real section index. */ - if (shstrndx >= shnum && shstrndx >= SHN_LORESERVE + 0x100) - shstrndx -= 0x100; - } - - elf_release_view (state, &shdr_view, error_callback, data); - } - - if (shnum == 0 || shstrndx == 0) - goto fail; - - /* To translate PC to file/line when using DWARF, we need to find - the .debug_info and .debug_line sections. */ - - /* Read the section headers, skipping the first one. */ - - if (!elf_get_view (state, descriptor, memory, memory_size, - shoff + sizeof (b_elf_shdr), - (shnum - 1) * sizeof (b_elf_shdr), - error_callback, data, &shdrs_view)) - goto fail; - shdrs_view_valid = 1; - shdrs = (const b_elf_shdr *) shdrs_view.view.data; - - /* Read the section names. */ - - shstrhdr = &shdrs[shstrndx - 1]; - shstr_size = shstrhdr->sh_size; - shstr_off = shstrhdr->sh_offset; - - if (!elf_get_view (state, descriptor, memory, memory_size, shstr_off, - shstrhdr->sh_size, error_callback, data, &names_view)) - goto fail; - names_view_valid = 1; - names = (const char *) names_view.view.data; - - symtab_shndx = 0; - dynsym_shndx = 0; - - memset (sections, 0, sizeof sections); - memset (zsections, 0, sizeof zsections); - - /* Look for the symbol table. */ - for (i = 1; i < shnum; ++i) - { - const b_elf_shdr *shdr; - unsigned int sh_name; - const char *name; - int j; - - shdr = &shdrs[i - 1]; - - if (shdr->sh_type == SHT_SYMTAB) - symtab_shndx = i; - else if (shdr->sh_type == SHT_DYNSYM) - dynsym_shndx = i; - - sh_name = shdr->sh_name; - if (sh_name >= shstr_size) - { - error_callback (data, "ELF section name out of range", 0); - goto fail; - } - - name = names + sh_name; - - for (j = 0; j < (int) DEBUG_MAX; ++j) - { - if (strcmp (name, dwarf_section_names[j]) == 0) - { - sections[j].offset = shdr->sh_offset; - sections[j].size = shdr->sh_size; - sections[j].compressed = (shdr->sh_flags & SHF_COMPRESSED) != 0; - break; - } - } - - if (name[0] == '.' && name[1] == 'z') - { - for (j = 0; j < (int) DEBUG_MAX; ++j) - { - if (strcmp (name + 2, dwarf_section_names[j] + 1) == 0) - { - zsections[j].offset = shdr->sh_offset; - zsections[j].size = shdr->sh_size; - break; - } - } - } - - /* Read the build ID if present. This could check for any - SHT_NOTE section with the right note name and type, but gdb - looks for a specific section name. */ - if ((!debuginfo || with_buildid_data != NULL) - && !buildid_view_valid - && strcmp (name, ".note.gnu.build-id") == 0) - { - const b_elf_note *note; - - if (!elf_get_view (state, descriptor, memory, memory_size, - shdr->sh_offset, shdr->sh_size, error_callback, - data, &buildid_view)) - goto fail; - - buildid_view_valid = 1; - note = (const b_elf_note *) buildid_view.view.data; - if (note->type == NT_GNU_BUILD_ID - && note->namesz == 4 - && strncmp (note->name, "GNU", 4) == 0 - && shdr->sh_size <= 12 + ((note->namesz + 3) & ~ 3) + note->descsz) - { - buildid_data = ¬e->name[0] + ((note->namesz + 3) & ~ 3); - buildid_size = note->descsz; - } - - if (with_buildid_size != 0) - { - if (buildid_size != with_buildid_size) - goto fail; - - if (memcmp (buildid_data, with_buildid_data, buildid_size) != 0) - goto fail; - } - } - - /* Read the debuglink file if present. */ - if (!debuginfo - && !debuglink_view_valid - && strcmp (name, ".gnu_debuglink") == 0) - { - const char *debuglink_data; - size_t crc_offset; - - if (!elf_get_view (state, descriptor, memory, memory_size, - shdr->sh_offset, shdr->sh_size, error_callback, - data, &debuglink_view)) - goto fail; - - debuglink_view_valid = 1; - debuglink_data = (const char *) debuglink_view.view.data; - crc_offset = strnlen (debuglink_data, shdr->sh_size); - crc_offset = (crc_offset + 3) & ~3; - if (crc_offset + 4 <= shdr->sh_size) - { - debuglink_name = debuglink_data; - debuglink_crc = *(const uint32_t*)(debuglink_data + crc_offset); - } - } - - if (!debugaltlink_view_valid - && strcmp (name, ".gnu_debugaltlink") == 0) - { - const char *debugaltlink_data; - size_t debugaltlink_name_len; - - if (!elf_get_view (state, descriptor, memory, memory_size, - shdr->sh_offset, shdr->sh_size, error_callback, - data, &debugaltlink_view)) - goto fail; - - debugaltlink_view_valid = 1; - debugaltlink_data = (const char *) debugaltlink_view.view.data; - debugaltlink_name = debugaltlink_data; - debugaltlink_name_len = strnlen (debugaltlink_data, shdr->sh_size); - if (debugaltlink_name_len < shdr->sh_size) - { - /* Include terminating zero. */ - debugaltlink_name_len += 1; - - debugaltlink_buildid_data - = debugaltlink_data + debugaltlink_name_len; - debugaltlink_buildid_size = shdr->sh_size - debugaltlink_name_len; - } - } - - if (!debuginfo - && !gnu_debugdata_view_valid - && strcmp (name, ".gnu_debugdata") == 0) - { - if (!elf_get_view (state, descriptor, memory, memory_size, - shdr->sh_offset, shdr->sh_size, error_callback, - data, &gnu_debugdata_view)) - goto fail; - - gnu_debugdata_size = shdr->sh_size; - gnu_debugdata_view_valid = 1; - } - - /* Read the .opd section on PowerPC64 ELFv1. */ - if (ehdr.e_machine == EM_PPC64 - && (ehdr.e_flags & EF_PPC64_ABI) < 2 - && shdr->sh_type == SHT_PROGBITS - && strcmp (name, ".opd") == 0) - { - if (!elf_get_view (state, descriptor, memory, memory_size, - shdr->sh_offset, shdr->sh_size, error_callback, - data, &opd_data.view)) - goto fail; - - opd = &opd_data; - opd->addr = shdr->sh_addr; - opd->data = (const char *) opd_data.view.view.data; - opd->size = shdr->sh_size; - opd_view_valid = 1; - } - } - - /* A debuginfo file may not have a useful .opd section, but we can use the - one from the original executable. */ - if (opd == NULL) - opd = caller_opd; - - if (symtab_shndx == 0) - symtab_shndx = dynsym_shndx; - if (symtab_shndx != 0) - { - const b_elf_shdr *symtab_shdr; - unsigned int strtab_shndx; - const b_elf_shdr *strtab_shdr; - struct elf_syminfo_data *sdata; - - symtab_shdr = &shdrs[symtab_shndx - 1]; - strtab_shndx = symtab_shdr->sh_link; - if (strtab_shndx >= shnum) - { - error_callback (data, - "ELF symbol table strtab link out of range", 0); - goto fail; - } - strtab_shdr = &shdrs[strtab_shndx - 1]; - - if (!elf_get_view (state, descriptor, memory, memory_size, - symtab_shdr->sh_offset, symtab_shdr->sh_size, - error_callback, data, &symtab_view)) - goto fail; - symtab_view_valid = 1; - - if (!elf_get_view (state, descriptor, memory, memory_size, - strtab_shdr->sh_offset, strtab_shdr->sh_size, - error_callback, data, &strtab_view)) - goto fail; - strtab_view_valid = 1; - - sdata = ((struct elf_syminfo_data *) - backtrace_alloc (state, sizeof *sdata, error_callback, data)); - if (sdata == NULL) - goto fail; - - if (!elf_initialize_syminfo (state, base_address, - (const unsigned char*)symtab_view.view.data, symtab_shdr->sh_size, - (const unsigned char*)strtab_view.view.data, strtab_shdr->sh_size, - error_callback, data, sdata, opd)) - { - backtrace_free (state, sdata, sizeof *sdata, error_callback, data); - goto fail; - } - - /* We no longer need the symbol table, but we hold on to the - string table permanently. */ - elf_release_view (state, &symtab_view, error_callback, data); - symtab_view_valid = 0; - strtab_view_valid = 0; - - *found_sym = 1; - - elf_add_syminfo_data (state, sdata); - } - - elf_release_view (state, &shdrs_view, error_callback, data); - shdrs_view_valid = 0; - elf_release_view (state, &names_view, error_callback, data); - names_view_valid = 0; - - /* If the debug info is in a separate file, read that one instead. */ - - if (buildid_data != NULL) - { - int d; - - d = elf_open_debugfile_by_buildid (state, buildid_data, buildid_size, - filename, error_callback, data); - if (d >= 0) - { - int ret; - - elf_release_view (state, &buildid_view, error_callback, data); - if (debuglink_view_valid) - elf_release_view (state, &debuglink_view, error_callback, data); - if (debugaltlink_view_valid) - elf_release_view (state, &debugaltlink_view, error_callback, data); - ret = elf_add (state, "", d, NULL, 0, base_address, opd, - error_callback, data, fileline_fn, found_sym, - found_dwarf, NULL, 0, 1, NULL, 0); - if (ret < 0) - backtrace_close (d, error_callback, data); - else if (descriptor >= 0) - backtrace_close (descriptor, error_callback, data); - return ret; - } - } - - if (buildid_view_valid) - { - elf_release_view (state, &buildid_view, error_callback, data); - buildid_view_valid = 0; - } - - if (debuglink_name != NULL) - { - int d; - - d = elf_open_debugfile_by_debuglink (state, filename, debuglink_name, - debuglink_crc, error_callback, - data); - if (d >= 0) - { - int ret; - - elf_release_view (state, &debuglink_view, error_callback, data); - if (debugaltlink_view_valid) - elf_release_view (state, &debugaltlink_view, error_callback, data); - ret = elf_add (state, "", d, NULL, 0, base_address, opd, - error_callback, data, fileline_fn, found_sym, - found_dwarf, NULL, 0, 1, NULL, 0); - if (ret < 0) - backtrace_close (d, error_callback, data); - else if (descriptor >= 0) - backtrace_close(descriptor, error_callback, data); - return ret; - } - } - - if (debuglink_view_valid) - { - elf_release_view (state, &debuglink_view, error_callback, data); - debuglink_view_valid = 0; - } - - if (debugaltlink_name != NULL) - { - int d; - - d = elf_open_debugfile_by_debuglink (state, filename, debugaltlink_name, - 0, error_callback, data); - if (d >= 0) - { - int ret; - - ret = elf_add (state, filename, d, NULL, 0, base_address, opd, - error_callback, data, fileline_fn, found_sym, - found_dwarf, &fileline_altlink, 0, 1, - debugaltlink_buildid_data, debugaltlink_buildid_size); - elf_release_view (state, &debugaltlink_view, error_callback, data); - debugaltlink_view_valid = 0; - if (ret < 0) - { - backtrace_close (d, error_callback, data); - return ret; - } - } - } - - if (debugaltlink_view_valid) - { - elf_release_view (state, &debugaltlink_view, error_callback, data); - debugaltlink_view_valid = 0; - } - - if (gnu_debugdata_view_valid) - { - int ret; - - ret = elf_uncompress_lzma (state, - ((const unsigned char *) - gnu_debugdata_view.view.data), - gnu_debugdata_size, error_callback, data, - &gnu_debugdata_uncompressed, - &gnu_debugdata_uncompressed_size); - - elf_release_view (state, &gnu_debugdata_view, error_callback, data); - gnu_debugdata_view_valid = 0; - - if (ret) - { - ret = elf_add (state, filename, -1, gnu_debugdata_uncompressed, - gnu_debugdata_uncompressed_size, base_address, opd, - error_callback, data, fileline_fn, found_sym, - found_dwarf, NULL, 0, 0, NULL, 0); - if (ret >= 0 && descriptor >= 0) - backtrace_close(descriptor, error_callback, data); - return ret; - } - } - - if (opd_view_valid) - { - elf_release_view (state, &opd->view, error_callback, data); - opd_view_valid = 0; - opd = NULL; - } - - /* Read all the debug sections in a single view, since they are - probably adjacent in the file. If any of sections are - uncompressed, we never release this view. */ - - min_offset = 0; - max_offset = 0; - debug_size = 0; - for (i = 0; i < (int) DEBUG_MAX; ++i) - { - off_t end; - - if (sections[i].size != 0) - { - if (min_offset == 0 || sections[i].offset < min_offset) - min_offset = sections[i].offset; - end = sections[i].offset + sections[i].size; - if (end > max_offset) - max_offset = end; - debug_size += sections[i].size; - } - if (zsections[i].size != 0) - { - if (min_offset == 0 || zsections[i].offset < min_offset) - min_offset = zsections[i].offset; - end = zsections[i].offset + zsections[i].size; - if (end > max_offset) - max_offset = end; - debug_size += zsections[i].size; - } - } - if (min_offset == 0 || max_offset == 0) - { - if (descriptor >= 0) - { - if (!backtrace_close (descriptor, error_callback, data)) - goto fail; - } - return 1; - } - - /* If the total debug section size is large, assume that there are - gaps between the sections, and read them individually. */ - - if (max_offset - min_offset < 0x20000000 - || max_offset - min_offset < debug_size + 0x10000) - { - if (!elf_get_view (state, descriptor, memory, memory_size, min_offset, - max_offset - min_offset, error_callback, data, - &debug_view)) - goto fail; - debug_view_valid = 1; - } - else - { - memset (&split_debug_view[0], 0, sizeof split_debug_view); - for (i = 0; i < (int) DEBUG_MAX; ++i) - { - struct debug_section_info *dsec; - - if (sections[i].size != 0) - dsec = §ions[i]; - else if (zsections[i].size != 0) - dsec = &zsections[i]; - else - continue; - - if (!elf_get_view (state, descriptor, memory, memory_size, - dsec->offset, dsec->size, error_callback, data, - &split_debug_view[i])) - goto fail; - split_debug_view_valid[i] = 1; - - if (sections[i].size != 0) - sections[i].data = ((const unsigned char *) - split_debug_view[i].view.data); - else - zsections[i].data = ((const unsigned char *) - split_debug_view[i].view.data); - } - } - - /* We've read all we need from the executable. */ - if (descriptor >= 0) - { - if (!backtrace_close (descriptor, error_callback, data)) - goto fail; - descriptor = -1; - } - - using_debug_view = 0; - if (debug_view_valid) - { - for (i = 0; i < (int) DEBUG_MAX; ++i) - { - if (sections[i].size == 0) - sections[i].data = NULL; - else - { - sections[i].data = ((const unsigned char *) debug_view.view.data - + (sections[i].offset - min_offset)); - ++using_debug_view; - } - - if (zsections[i].size == 0) - zsections[i].data = NULL; - else - zsections[i].data = ((const unsigned char *) debug_view.view.data - + (zsections[i].offset - min_offset)); - } - } - - /* Uncompress the old format (--compress-debug-sections=zlib-gnu). */ - - zdebug_table = NULL; - for (i = 0; i < (int) DEBUG_MAX; ++i) - { - if (sections[i].size == 0 && zsections[i].size > 0) - { - unsigned char *uncompressed_data; - size_t uncompressed_size; - - if (zdebug_table == NULL) - { - zdebug_table = ((uint16_t *) - backtrace_alloc (state, ZLIB_TABLE_SIZE, - error_callback, data)); - if (zdebug_table == NULL) - goto fail; - } - - uncompressed_data = NULL; - uncompressed_size = 0; - if (!elf_uncompress_zdebug (state, zsections[i].data, - zsections[i].size, zdebug_table, - error_callback, data, - &uncompressed_data, &uncompressed_size)) - goto fail; - sections[i].data = uncompressed_data; - sections[i].size = uncompressed_size; - sections[i].compressed = 0; - - if (split_debug_view_valid[i]) - { - elf_release_view (state, &split_debug_view[i], - error_callback, data); - split_debug_view_valid[i] = 0; - } - } - } - - if (zdebug_table != NULL) - { - backtrace_free (state, zdebug_table, ZLIB_TABLE_SIZE, - error_callback, data); - zdebug_table = NULL; - } - - /* Uncompress the official ELF format - (--compress-debug-sections=zlib-gabi, --compress-debug-sections=zstd). */ - for (i = 0; i < (int) DEBUG_MAX; ++i) - { - unsigned char *uncompressed_data; - size_t uncompressed_size; - - if (sections[i].size == 0 || !sections[i].compressed) - continue; - - if (zdebug_table == NULL) - { - zdebug_table = ((uint16_t *) - backtrace_alloc (state, ZDEBUG_TABLE_SIZE, - error_callback, data)); - if (zdebug_table == NULL) - goto fail; - } - - uncompressed_data = NULL; - uncompressed_size = 0; - if (!elf_uncompress_chdr (state, sections[i].data, sections[i].size, - zdebug_table, error_callback, data, - &uncompressed_data, &uncompressed_size)) - goto fail; - sections[i].data = uncompressed_data; - sections[i].size = uncompressed_size; - sections[i].compressed = 0; - - if (debug_view_valid) - --using_debug_view; - else if (split_debug_view_valid[i]) - { - elf_release_view (state, &split_debug_view[i], error_callback, data); - split_debug_view_valid[i] = 0; - } - } - - if (zdebug_table != NULL) - backtrace_free (state, zdebug_table, ZDEBUG_TABLE_SIZE, - error_callback, data); - - if (debug_view_valid && using_debug_view == 0) - { - elf_release_view (state, &debug_view, error_callback, data); - debug_view_valid = 0; - } - - for (i = 0; i < (int) DEBUG_MAX; ++i) - { - dwarf_sections.data[i] = sections[i].data; - dwarf_sections.size[i] = sections[i].size; - } - - if (!backtrace_dwarf_add (state, base_address, &dwarf_sections, - ehdr.e_ident[EI_DATA] == ELFDATA2MSB, - fileline_altlink, - error_callback, data, fileline_fn, - fileline_entry)) - goto fail; - - *found_dwarf = 1; - - return 1; - - fail: - if (shdrs_view_valid) - elf_release_view (state, &shdrs_view, error_callback, data); - if (names_view_valid) - elf_release_view (state, &names_view, error_callback, data); - if (symtab_view_valid) - elf_release_view (state, &symtab_view, error_callback, data); - if (strtab_view_valid) - elf_release_view (state, &strtab_view, error_callback, data); - if (debuglink_view_valid) - elf_release_view (state, &debuglink_view, error_callback, data); - if (debugaltlink_view_valid) - elf_release_view (state, &debugaltlink_view, error_callback, data); - if (gnu_debugdata_view_valid) - elf_release_view (state, &gnu_debugdata_view, error_callback, data); - if (buildid_view_valid) - elf_release_view (state, &buildid_view, error_callback, data); - if (debug_view_valid) - elf_release_view (state, &debug_view, error_callback, data); - for (i = 0; i < (int) DEBUG_MAX; ++i) - { - if (split_debug_view_valid[i]) - elf_release_view (state, &split_debug_view[i], error_callback, data); - } - if (opd_view_valid) - elf_release_view (state, &opd->view, error_callback, data); - if (descriptor >= 0) - backtrace_close (descriptor, error_callback, data); - return 0; -} - -/* Data passed to phdr_callback. */ - -struct phdr_data -{ - struct backtrace_state *state; - backtrace_error_callback error_callback; - void *data; - fileline *fileline_fn; - int *found_sym; - int *found_dwarf; - const char *exe_filename; - int exe_descriptor; -}; - -/* Callback passed to dl_iterate_phdr. Load debug info from shared - libraries. */ - -struct PhdrIterate -{ - char* dlpi_name; - ElfW(Addr) dlpi_addr; - ElfW(Addr) dlpi_end_addr; -}; -FastVector s_phdrData(16); - -struct ElfAddrRange -{ - ElfW(Addr) dlpi_addr; - ElfW(Addr) dlpi_end_addr; -}; -FastVector s_sortedKnownElfRanges(16); - -static int address_in_known_elf_ranges(uintptr_t pc) -{ - auto it = std::lower_bound( s_sortedKnownElfRanges.begin(), s_sortedKnownElfRanges.end(), pc, - []( const ElfAddrRange& lhs, const uintptr_t rhs ) { return uintptr_t(lhs.dlpi_addr) > rhs; } ); - if( it != s_sortedKnownElfRanges.end() && pc <= it->dlpi_end_addr ) - { - return true; - } - return false; -} - -static int -phdr_callback_mock (struct dl_phdr_info *info, size_t size ATTRIBUTE_UNUSED, - void *pdata) -{ - if( address_in_known_elf_ranges(info->dlpi_addr) ) - { - return 0; - } - - auto ptr = s_phdrData.push_next(); - if (info->dlpi_name) - { - size_t sz = strlen (info->dlpi_name) + 1; - ptr->dlpi_name = (char*)tracy_malloc (sz); - memcpy (ptr->dlpi_name, info->dlpi_name, sz); - } - else ptr->dlpi_name = nullptr; - ptr->dlpi_addr = info->dlpi_addr; - - // calculate the end address as well, so we can quickly determine if a PC is within the range of this image - ptr->dlpi_end_addr = uintptr_t(info->dlpi_addr) + (info->dlpi_phnum ? uintptr_t( - info->dlpi_phdr[info->dlpi_phnum - 1].p_vaddr + - info->dlpi_phdr[info->dlpi_phnum - 1].p_memsz) : 0); - - return 0; -} - -static int -#ifdef __i386__ -__attribute__ ((__force_align_arg_pointer__)) -#endif -phdr_callback (struct PhdrIterate *info, void *pdata) -{ - struct phdr_data *pd = (struct phdr_data *) pdata; - const char *filename; - int descriptor; - int does_not_exist; - struct libbacktrace_base_address base_address; - fileline elf_fileline_fn; - int found_dwarf; - - /* There is not much we can do if we don't have the module name, - unless executable is ET_DYN, where we expect the very first - phdr_callback to be for the PIE. */ - if (info->dlpi_name == NULL || info->dlpi_name[0] == '\0') - { - if (pd->exe_descriptor == -1) - return 0; - filename = pd->exe_filename; - descriptor = pd->exe_descriptor; - pd->exe_descriptor = -1; - } - else - { - if (pd->exe_descriptor != -1) - { - backtrace_close (pd->exe_descriptor, pd->error_callback, pd->data); - pd->exe_descriptor = -1; - } - - filename = info->dlpi_name; - descriptor = backtrace_open (info->dlpi_name, pd->error_callback, - pd->data, &does_not_exist); - if (descriptor < 0) - return 0; - } - - base_address.m = info->dlpi_addr; - if (elf_add (pd->state, filename, descriptor, NULL, 0, base_address, NULL, - pd->error_callback, pd->data, &elf_fileline_fn, pd->found_sym, - &found_dwarf, NULL, 0, 0, NULL, 0)) - { - if (found_dwarf) - { - *pd->found_dwarf = 1; - *pd->fileline_fn = elf_fileline_fn; - } - } - - return 0; -} - -static int elf_iterate_phdr_and_add_new_files(phdr_data *pd) -{ - assert(s_phdrData.empty()); - // dl_iterate_phdr, will only add entries for elf files loaded in a previously unseen range - dl_iterate_phdr(phdr_callback_mock, nullptr); - - if(s_phdrData.size() == 0) - { - return 0; - } - - uint32_t headersAdded = 0; - for (auto &v : s_phdrData) - { - phdr_callback(&v, (void *)pd); - - auto newEntry = s_sortedKnownElfRanges.push_next(); - newEntry->dlpi_addr = v.dlpi_addr; - newEntry->dlpi_end_addr = v.dlpi_end_addr; - - tracy_free(v.dlpi_name); - - headersAdded++; - } - - s_phdrData.clear(); - - std::sort( s_sortedKnownElfRanges.begin(), s_sortedKnownElfRanges.end(), - []( const ElfAddrRange& lhs, const ElfAddrRange& rhs ) { return lhs.dlpi_addr > rhs.dlpi_addr; } ); - - return headersAdded; -} - -#ifdef TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT -/* Request an elf entry update if the pc passed in is not in any of the known elf ranges. -This could mean that new images were dlopened and we need to add those new elf entries */ -static int elf_refresh_address_ranges_if_needed(struct backtrace_state *state, uintptr_t pc) -{ - if ( address_in_known_elf_ranges(pc) ) - { - return 0; - } - - struct phdr_data pd; - int found_sym = 0; - int found_dwarf = 0; - fileline fileline_fn = nullptr; - pd.state = state; - pd.error_callback = nullptr; - pd.data = nullptr; - pd.fileline_fn = &fileline_fn; - pd.found_sym = &found_sym; - pd.found_dwarf = &found_dwarf; - pd.exe_filename = nullptr; - pd.exe_descriptor = -1; - - return elf_iterate_phdr_and_add_new_files(&pd); -} -#endif //#ifdef TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT - -/* Initialize the backtrace data we need from an ELF executable. At - the ELF level, all we need to do is find the debug info - sections. */ - -int -backtrace_initialize (struct backtrace_state *state, const char *filename, - int descriptor, backtrace_error_callback error_callback, - void *data, fileline *fileline_fn) -{ - int ret; - int found_sym; - int found_dwarf; - fileline elf_fileline_fn = elf_nodebug; - struct phdr_data pd; - - - /* When using fdpic we must use dl_iterate_phdr for all modules, including - the main executable, so that we can get the right base address - mapping. */ - if (!libbacktrace_using_fdpic ()) - { - struct libbacktrace_base_address zero_base_address; - - memset (&zero_base_address, 0, sizeof zero_base_address); - ret = elf_add (state, filename, descriptor, NULL, 0, zero_base_address, - NULL, error_callback, data, &elf_fileline_fn, &found_sym, - &found_dwarf, NULL, 1, 0, NULL, 0); - if (!ret) - return 0; - } - - pd.state = state; - pd.error_callback = error_callback; - pd.data = data; - pd.fileline_fn = &elf_fileline_fn; - pd.found_sym = &found_sym; - pd.found_dwarf = &found_dwarf; - pd.exe_filename = filename; - pd.exe_descriptor = ret < 0 ? descriptor : -1; - - elf_iterate_phdr_and_add_new_files(&pd); - - if (!state->threaded) - { - if (found_sym) - state->syminfo_fn = elf_syminfo; - else if (state->syminfo_fn == NULL) - state->syminfo_fn = elf_nosyms; - } - else - { - if (found_sym) - backtrace_atomic_store_pointer (&state->syminfo_fn, &elf_syminfo); - else - (void) __sync_bool_compare_and_swap (&state->syminfo_fn, NULL, - elf_nosyms); - } - - if (!state->threaded) - *fileline_fn = state->fileline_fn; - else - *fileline_fn = backtrace_atomic_load_pointer (&state->fileline_fn); - - if (*fileline_fn == NULL || *fileline_fn == elf_nodebug) - *fileline_fn = elf_fileline_fn; - - // install an address range refresh callback so we can cope with dynamically loaded elf files -#ifdef TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT - state->request_known_address_ranges_refresh_fn = elf_refresh_address_ranges_if_needed; -#else - state->request_known_address_ranges_refresh_fn = NULL; -#endif - - return 1; -} - -} diff --git a/src/third_party/tracy/libbacktrace/fileline.cpp b/src/third_party/tracy/libbacktrace/fileline.cpp deleted file mode 100644 index 5a37ff0c..00000000 --- a/src/third_party/tracy/libbacktrace/fileline.cpp +++ /dev/null @@ -1,412 +0,0 @@ -/* fileline.c -- Get file and line number information in a backtrace. - Copyright (C) 2012-2021 Free Software Foundation, Inc. - Written by Ian Lance Taylor, Google. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - (1) Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - (2) Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - (3) The name of the author may not be used to - endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include - -#if defined (HAVE_KERN_PROC_ARGS) || defined (HAVE_KERN_PROC) -#include -#endif - -#ifdef HAVE_MACH_O_DYLD_H -#include -#endif - -#ifdef HAVE_WINDOWS_H -#ifndef WIN32_MEAN_AND_LEAN -#define WIN32_MEAN_AND_LEAN -#endif - -#ifndef NOMINMAX -#define NOMINMAX -#endif - -#include -#endif - -#include "backtrace.hpp" -#include "internal.hpp" - -#ifndef HAVE_GETEXECNAME -#define getexecname() NULL -#endif - -namespace tracy -{ - -#if !defined (HAVE_KERN_PROC_ARGS) && !defined (HAVE_KERN_PROC) - -#define sysctl_exec_name1(state, error_callback, data) NULL -#define sysctl_exec_name2(state, error_callback, data) NULL - -#else /* defined (HAVE_KERN_PROC_ARGS) || |defined (HAVE_KERN_PROC) */ - -static char * -sysctl_exec_name (struct backtrace_state *state, - int mib0, int mib1, int mib2, int mib3, - backtrace_error_callback error_callback, void *data) -{ - int mib[4]; - size_t len; - char *name; - size_t rlen; - - mib[0] = mib0; - mib[1] = mib1; - mib[2] = mib2; - mib[3] = mib3; - - if (sysctl (mib, 4, NULL, &len, NULL, 0) < 0) - return NULL; - name = (char *) backtrace_alloc (state, len, error_callback, data); - if (name == NULL) - return NULL; - rlen = len; - if (sysctl (mib, 4, name, &rlen, NULL, 0) < 0) - { - backtrace_free (state, name, len, error_callback, data); - return NULL; - } - return name; -} - -#ifdef HAVE_KERN_PROC_ARGS - -static char * -sysctl_exec_name1 (struct backtrace_state *state, - backtrace_error_callback error_callback, void *data) -{ - /* This variant is used on NetBSD. */ - return sysctl_exec_name (state, CTL_KERN, KERN_PROC_ARGS, -1, - KERN_PROC_PATHNAME, error_callback, data); -} - -#else - -#define sysctl_exec_name1(state, error_callback, data) NULL - -#endif - -#ifdef HAVE_KERN_PROC - -static char * -sysctl_exec_name2 (struct backtrace_state *state, - backtrace_error_callback error_callback, void *data) -{ - /* This variant is used on FreeBSD. */ - return sysctl_exec_name (state, CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1, - error_callback, data); -} - -#else - -#define sysctl_exec_name2(state, error_callback, data) NULL - -#endif - -#endif /* defined (HAVE_KERN_PROC_ARGS) || |defined (HAVE_KERN_PROC) */ - -#ifdef HAVE_MACH_O_DYLD_H - -static char * -macho_get_executable_path (struct backtrace_state *state, - backtrace_error_callback error_callback, void *data) -{ - uint32_t len; - char *name; - - len = 0; - if (_NSGetExecutablePath (NULL, &len) == 0) - return NULL; - name = (char *) backtrace_alloc (state, len, error_callback, data); - if (name == NULL) - return NULL; - if (_NSGetExecutablePath (name, &len) != 0) - { - backtrace_free (state, name, len, error_callback, data); - return NULL; - } - return name; -} - -#else /* !defined (HAVE_MACH_O_DYLD_H) */ - -#define macho_get_executable_path(state, error_callback, data) NULL - -#endif /* !defined (HAVE_MACH_O_DYLD_H) */ - -#if HAVE_DECL__PGMPTR - -#define windows_executable_filename() _pgmptr - -#else /* !HAVE_DECL__PGMPTR */ - -#define windows_executable_filename() NULL - -#endif /* !HAVE_DECL__PGMPTR */ - -#ifdef HAVE_WINDOWS_H - -#define FILENAME_BUF_SIZE (MAX_PATH) - -static char * -windows_get_executable_path (char *buf, backtrace_error_callback error_callback, - void *data) -{ - size_t got; - int error; - - got = GetModuleFileNameA (NULL, buf, FILENAME_BUF_SIZE - 1); - error = GetLastError (); - if (got == 0 - || (got == FILENAME_BUF_SIZE - 1 && error == ERROR_INSUFFICIENT_BUFFER)) - { - error_callback (data, - "could not get the filename of the current executable", - error); - return NULL; - } - return buf; -} - -#else /* !defined (HAVE_WINDOWS_H) */ - -#define windows_get_executable_path(buf, error_callback, data) NULL -#define FILENAME_BUF_SIZE 64 - -#endif /* !defined (HAVE_WINDOWS_H) */ - -/* Initialize the fileline information from the executable. Returns 1 - on success, 0 on failure. */ - -static int -fileline_initialize (struct backtrace_state *state, - backtrace_error_callback error_callback, void *data) -{ - int failed; - fileline fileline_fn; - int pass; - int called_error_callback; - int descriptor; - const char *filename; - char buf[FILENAME_BUF_SIZE]; - - if (!state->threaded) - failed = state->fileline_initialization_failed; - else - failed = backtrace_atomic_load_int (&state->fileline_initialization_failed); - - if (failed) - { - error_callback (data, "failed to read executable information", -1); - return 0; - } - - if (!state->threaded) - fileline_fn = state->fileline_fn; - else - fileline_fn = backtrace_atomic_load_pointer (&state->fileline_fn); - if (fileline_fn != NULL) - return 1; - - /* We have not initialized the information. Do it now. */ - - descriptor = -1; - called_error_callback = 0; - for (pass = 0; pass < 10; ++pass) - { - int does_not_exist; - - switch (pass) - { - case 0: - filename = state->filename; - break; - case 1: - filename = getexecname (); - break; - case 2: - /* Test this before /proc/self/exe, as the latter exists but points - to the wine binary (and thus doesn't work). */ - filename = windows_executable_filename (); - break; - case 3: - filename = "/proc/self/exe"; - break; - case 4: - filename = "/proc/curproc/file"; - break; - case 5: - snprintf (buf, sizeof (buf), "/proc/%ld/object/a.out", - (long) getpid ()); - filename = buf; - break; - case 6: - filename = sysctl_exec_name1 (state, error_callback, data); - break; - case 7: - filename = sysctl_exec_name2 (state, error_callback, data); - break; - case 8: - filename = macho_get_executable_path (state, error_callback, data); - break; - case 9: - filename = windows_get_executable_path (buf, error_callback, data); - break; - default: - abort (); - } - - if (filename == NULL) - continue; - - descriptor = backtrace_open (filename, error_callback, data, - &does_not_exist); - if (descriptor < 0 && !does_not_exist) - { - called_error_callback = 1; - break; - } - if (descriptor >= 0) - break; - } - - if (descriptor < 0) - { - if (!called_error_callback) - { - if (state->filename != NULL) - error_callback (data, state->filename, ENOENT); - else - error_callback (data, - "libbacktrace could not find executable to open", - 0); - } - failed = 1; - } - - if (!failed) - { - if (!backtrace_initialize (state, filename, descriptor, error_callback, - data, &fileline_fn)) - failed = 1; - } - - if (failed) - { - if (!state->threaded) - state->fileline_initialization_failed = 1; - else - backtrace_atomic_store_int (&state->fileline_initialization_failed, 1); - return 0; - } - - if (!state->threaded) - state->fileline_fn = fileline_fn; - else - { - backtrace_atomic_store_pointer (&state->fileline_fn, fileline_fn); - - /* Note that if two threads initialize at once, one of the data - sets may be leaked. */ - } - - return 1; -} - -/* Given a PC, find the file name, line number, and function name. */ - -int -backtrace_pcinfo (struct backtrace_state *state, uintptr_t pc, - backtrace_full_callback callback, - backtrace_error_callback error_callback, void *data) -{ - if (!fileline_initialize (state, error_callback, data)) - return 0; - - if (state->fileline_initialization_failed) - return 0; - - return state->fileline_fn (state, pc, callback, error_callback, data); -} - -/* Given a PC, find the symbol for it, and its value. */ - -int -backtrace_syminfo (struct backtrace_state *state, uintptr_t pc, - backtrace_syminfo_callback callback, - backtrace_error_callback error_callback, void *data) -{ - if (!fileline_initialize (state, error_callback, data)) - return 0; - - if (state->fileline_initialization_failed) - return 0; - - state->syminfo_fn (state, pc, callback, error_callback, data); - return 1; -} - -/* A backtrace_syminfo_callback that can call into a - backtrace_full_callback, used when we have a symbol table but no - debug info. */ - -void -backtrace_syminfo_to_full_callback (void *data, uintptr_t pc, - const char *symname, - uintptr_t symval ATTRIBUTE_UNUSED, - uintptr_t symsize ATTRIBUTE_UNUSED) -{ - struct backtrace_call_full *bdata = (struct backtrace_call_full *) data; - - bdata->ret = bdata->full_callback (bdata->full_data, pc, 0, NULL, 0, symname); -} - -/* An error callback that corresponds to - backtrace_syminfo_to_full_callback. */ - -void -backtrace_syminfo_to_full_error_callback (void *data, const char *msg, - int errnum) -{ - struct backtrace_call_full *bdata = (struct backtrace_call_full *) data; - - bdata->full_error_callback (bdata->full_data, msg, errnum); -} - -} diff --git a/src/third_party/tracy/libbacktrace/filenames.hpp b/src/third_party/tracy/libbacktrace/filenames.hpp deleted file mode 100644 index aa7bd7ad..00000000 --- a/src/third_party/tracy/libbacktrace/filenames.hpp +++ /dev/null @@ -1,52 +0,0 @@ -/* btest.c -- Filename header for libbacktrace library - Copyright (C) 2012-2018 Free Software Foundation, Inc. - Written by Ian Lance Taylor, Google. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - (1) Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - (2) Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - (3) The name of the author may not be used to - endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. */ - -#ifndef GCC_VERSION -# define GCC_VERSION (__GNUC__ * 1000 + __GNUC_MINOR__) -#endif - -#if (GCC_VERSION < 2007) -# define __attribute__(x) -#endif - -#ifndef ATTRIBUTE_UNUSED -# define ATTRIBUTE_UNUSED __attribute__ ((__unused__)) -#endif - -#if defined(__MSDOS__) || defined(_WIN32) || defined(__OS2__) || defined (__CYGWIN__) -# define IS_DIR_SEPARATOR(c) ((c) == '/' || (c) == '\\') -# define HAS_DRIVE_SPEC(f) ((f)[0] != '\0' && (f)[1] == ':') -# define IS_ABSOLUTE_PATH(f) (IS_DIR_SEPARATOR((f)[0]) || HAS_DRIVE_SPEC(f)) -#else -# define IS_DIR_SEPARATOR(c) ((c) == '/') -# define IS_ABSOLUTE_PATH(f) (IS_DIR_SEPARATOR((f)[0])) -#endif diff --git a/src/third_party/tracy/libbacktrace/internal.hpp b/src/third_party/tracy/libbacktrace/internal.hpp deleted file mode 100644 index 21395975..00000000 --- a/src/third_party/tracy/libbacktrace/internal.hpp +++ /dev/null @@ -1,435 +0,0 @@ -/* internal.h -- Internal header file for stack backtrace library. - Copyright (C) 2012-2021 Free Software Foundation, Inc. - Written by Ian Lance Taylor, Google. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - (1) Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - (2) Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - (3) The name of the author may not be used to - endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. */ - -#ifndef BACKTRACE_INTERNAL_H -#define BACKTRACE_INTERNAL_H - -/* We assume that and "backtrace.h" have already been - included. */ - -#ifndef GCC_VERSION -# define GCC_VERSION (__GNUC__ * 1000 + __GNUC_MINOR__) -#endif - -#if (GCC_VERSION < 2007) -# define __attribute__(x) -#endif - -#ifndef ATTRIBUTE_UNUSED -# define ATTRIBUTE_UNUSED __attribute__ ((__unused__)) -#endif - -#ifndef ATTRIBUTE_MALLOC -# if (GCC_VERSION >= 2096) -# define ATTRIBUTE_MALLOC __attribute__ ((__malloc__)) -# else -# define ATTRIBUTE_MALLOC -# endif -#endif - -#ifndef ATTRIBUTE_FALLTHROUGH -# if (GCC_VERSION >= 7000) -# define ATTRIBUTE_FALLTHROUGH __attribute__ ((__fallthrough__)) -# else -# define ATTRIBUTE_FALLTHROUGH -# endif -#endif - -#ifndef HAVE_SYNC_FUNCTIONS - -/* Define out the sync functions. These should never be called if - they are not available. */ - -#define __sync_bool_compare_and_swap(A, B, C) (abort(), 1) -#define __sync_lock_test_and_set(A, B) (abort(), 0) -#define __sync_lock_release(A) abort() - -#endif /* !defined (HAVE_SYNC_FUNCTIONS) */ - -#ifdef HAVE_ATOMIC_FUNCTIONS - -/* We have the atomic builtin functions. */ - -#define backtrace_atomic_load_pointer(p) \ - __atomic_load_n ((p), __ATOMIC_ACQUIRE) -#define backtrace_atomic_load_int(p) \ - __atomic_load_n ((p), __ATOMIC_ACQUIRE) -#define backtrace_atomic_store_pointer(p, v) \ - __atomic_store_n ((p), (v), __ATOMIC_RELEASE) -#define backtrace_atomic_store_size_t(p, v) \ - __atomic_store_n ((p), (v), __ATOMIC_RELEASE) -#define backtrace_atomic_store_int(p, v) \ - __atomic_store_n ((p), (v), __ATOMIC_RELEASE) - -#else /* !defined (HAVE_ATOMIC_FUNCTIONS) */ -#ifdef HAVE_SYNC_FUNCTIONS - -/* We have the sync functions but not the atomic functions. Define - the atomic ones in terms of the sync ones. */ - -extern void *backtrace_atomic_load_pointer (void *); -extern int backtrace_atomic_load_int (int *); -extern void backtrace_atomic_store_pointer (void *, void *); -extern void backtrace_atomic_store_size_t (size_t *, size_t); -extern void backtrace_atomic_store_int (int *, int); - -#else /* !defined (HAVE_SYNC_FUNCTIONS) */ - -/* We have neither the sync nor the atomic functions. These will - never be called. */ - -#define backtrace_atomic_load_pointer(p) (abort(), (void *) NULL) -#define backtrace_atomic_load_int(p) (abort(), 0) -#define backtrace_atomic_store_pointer(p, v) abort() -#define backtrace_atomic_store_size_t(p, v) abort() -#define backtrace_atomic_store_int(p, v) abort() - -#endif /* !defined (HAVE_SYNC_FUNCTIONS) */ -#endif /* !defined (HAVE_ATOMIC_FUNCTIONS) */ - -namespace tracy -{ - -/* The type of the function that collects file/line information. This - is like backtrace_pcinfo. */ - -typedef int (*fileline) (struct backtrace_state *state, uintptr_t pc, - backtrace_full_callback callback, - backtrace_error_callback error_callback, void *data); - -/* The type of the function that collects symbol information. This is - like backtrace_syminfo. */ - -typedef void (*syminfo) (struct backtrace_state *state, uintptr_t pc, - backtrace_syminfo_callback callback, - backtrace_error_callback error_callback, void *data); - -/* The type of the function that will trigger an known address range refresh - (if pc passed in is for an address whichs lies ourtisde of known ranges) */ -typedef int (*request_known_address_ranges_refresh)(struct backtrace_state *state, - uintptr_t pc); - -/* What the backtrace state pointer points to. */ - -struct backtrace_state -{ - /* The name of the executable. */ - const char *filename; - /* Non-zero if threaded. */ - int threaded; - /* The master lock for fileline_fn, fileline_data, syminfo_fn, - syminfo_data, fileline_initialization_failed and everything the - data pointers point to. */ - void *lock; - /* The function that returns file/line information. */ - fileline fileline_fn; - /* The data to pass to FILELINE_FN. */ - void *fileline_data; - /* The function that returns symbol information. */ - syminfo syminfo_fn; - /* The data to pass to SYMINFO_FN. */ - void *syminfo_data; - /* Whether initializing the file/line information failed. */ - int fileline_initialization_failed; - /* The lock for the freelist. */ - int lock_alloc; - /* The freelist when using mmap. */ - struct backtrace_freelist_struct *freelist; - /* Trigger an known address range refresh */ - request_known_address_ranges_refresh request_known_address_ranges_refresh_fn; -}; - -/* Open a file for reading. Returns -1 on error. If DOES_NOT_EXIST - is not NULL, *DOES_NOT_EXIST will be set to 0 normally and set to 1 - if the file does not exist. If the file does not exist and - DOES_NOT_EXIST is not NULL, the function will return -1 and will - not call ERROR_CALLBACK. On other errors, or if DOES_NOT_EXIST is - NULL, the function will call ERROR_CALLBACK before returning. */ -extern int backtrace_open (const char *filename, - backtrace_error_callback error_callback, - void *data, - int *does_not_exist); - -/* A view of the contents of a file. This supports mmap when - available. A view will remain in memory even after backtrace_close - is called on the file descriptor from which the view was - obtained. */ - -struct backtrace_view -{ - /* The data that the caller requested. */ - const void *data; - /* The base of the view. */ - void *base; - /* The total length of the view. */ - size_t len; -}; - -/* Create a view of SIZE bytes from DESCRIPTOR at OFFSET. Store the - result in *VIEW. Returns 1 on success, 0 on error. */ -extern int backtrace_get_view (struct backtrace_state *state, int descriptor, - off_t offset, uint64_t size, - backtrace_error_callback error_callback, - void *data, struct backtrace_view *view); - -/* Release a view created by backtrace_get_view. */ -extern void backtrace_release_view (struct backtrace_state *state, - struct backtrace_view *view, - backtrace_error_callback error_callback, - void *data); - -/* Close a file opened by backtrace_open. Returns 1 on success, 0 on - error. */ - -extern int backtrace_close (int descriptor, - backtrace_error_callback error_callback, - void *data); - -/* Sort without using memory. */ - -extern void backtrace_qsort (void *base, size_t count, size_t size, - int (*compar) (const void *, const void *)); - -/* Allocate memory. This is like malloc. If ERROR_CALLBACK is NULL, - this does not report an error, it just returns NULL. */ - -extern void *backtrace_alloc (struct backtrace_state *state, size_t size, - backtrace_error_callback error_callback, - void *data) ATTRIBUTE_MALLOC; - -/* Free memory allocated by backtrace_alloc. If ERROR_CALLBACK is - NULL, this does not report an error. */ - -extern void backtrace_free (struct backtrace_state *state, void *mem, - size_t size, - backtrace_error_callback error_callback, - void *data); - -/* A growable vector of some struct. This is used for more efficient - allocation when we don't know the final size of some group of data - that we want to represent as an array. */ - -struct backtrace_vector -{ - /* The base of the vector. */ - void *base; - /* The number of bytes in the vector. */ - size_t size; - /* The number of bytes available at the current allocation. */ - size_t alc; -}; - -/* Grow VEC by SIZE bytes. Return a pointer to the newly allocated - bytes. Note that this may move the entire vector to a new memory - location. Returns NULL on failure. */ - -extern void *backtrace_vector_grow (struct backtrace_state *state, size_t size, - backtrace_error_callback error_callback, - void *data, - struct backtrace_vector *vec); - -/* Finish the current allocation on VEC. Prepare to start a new - allocation. The finished allocation will never be freed. Returns - a pointer to the base of the finished entries, or NULL on - failure. */ - -extern void* backtrace_vector_finish (struct backtrace_state *state, - struct backtrace_vector *vec, - backtrace_error_callback error_callback, - void *data); - -/* Release any extra space allocated for VEC. This may change - VEC->base. Returns 1 on success, 0 on failure. */ - -extern int backtrace_vector_release (struct backtrace_state *state, - struct backtrace_vector *vec, - backtrace_error_callback error_callback, - void *data); - -/* Free the space managed by VEC. This will reset VEC. */ - -static inline void -backtrace_vector_free (struct backtrace_state *state, - struct backtrace_vector *vec, - backtrace_error_callback error_callback, void *data) -{ - vec->alc += vec->size; - vec->size = 0; - backtrace_vector_release (state, vec, error_callback, data); -} - -/* Read initial debug data from a descriptor, and set the - fileline_data, syminfo_fn, and syminfo_data fields of STATE. - Return the fileln_fn field in *FILELN_FN--this is done this way so - that the synchronization code is only implemented once. This is - called after the descriptor has first been opened. It will close - the descriptor if it is no longer needed. Returns 1 on success, 0 - on error. There will be multiple implementations of this function, - for different file formats. Each system will compile the - appropriate one. */ - -extern int backtrace_initialize (struct backtrace_state *state, - const char *filename, - int descriptor, - backtrace_error_callback error_callback, - void *data, - fileline *fileline_fn); - -/* An enum for the DWARF sections we care about. */ - -enum dwarf_section -{ - DEBUG_INFO, - DEBUG_LINE, - DEBUG_ABBREV, - DEBUG_RANGES, - DEBUG_STR, - DEBUG_ADDR, - DEBUG_STR_OFFSETS, - DEBUG_LINE_STR, - DEBUG_RNGLISTS, - - DEBUG_MAX -}; - -/* Data for the DWARF sections we care about. */ - -struct dwarf_sections -{ - const unsigned char *data[DEBUG_MAX]; - size_t size[DEBUG_MAX]; -}; - -/* DWARF data read from a file, used for .gnu_debugaltlink. */ - -struct dwarf_data; - -/* The load address mapping. */ - -#if defined(__FDPIC__) && defined(HAVE_DL_ITERATE_PHDR) && (defined(HAVE_LINK_H) || defined(HAVE_SYS_LINK_H)) - -#ifdef HAVE_LINK_H - #include -#endif -#ifdef HAVE_SYS_LINK_H - #include -#endif - -#define libbacktrace_using_fdpic() (1) - -struct libbacktrace_base_address -{ - struct elf32_fdpic_loadaddr m; -}; - -#define libbacktrace_add_base(pc, base) \ - ((uintptr_t) (__RELOC_POINTER ((pc), (base).m))) - -#else /* not _FDPIC__ */ - -#define libbacktrace_using_fdpic() (0) - -struct libbacktrace_base_address -{ - uintptr_t m; -}; - -#define libbacktrace_add_base(pc, base) ((pc) + (base).m) - -#endif /* not _FDPIC__ */ - -/* Add file/line information for a DWARF module. */ - -extern int backtrace_dwarf_add (struct backtrace_state *state, - struct libbacktrace_base_address base_address, - const struct dwarf_sections *dwarf_sections, - int is_bigendian, - struct dwarf_data *fileline_altlink, - backtrace_error_callback error_callback, - void *data, fileline *fileline_fn, - struct dwarf_data **fileline_entry); - -/* A data structure to pass to backtrace_syminfo_to_full. */ - -struct backtrace_call_full -{ - backtrace_full_callback full_callback; - backtrace_error_callback full_error_callback; - void *full_data; - int ret; -}; - -/* A backtrace_syminfo_callback that can call into a - backtrace_full_callback, used when we have a symbol table but no - debug info. */ - -extern void backtrace_syminfo_to_full_callback (void *data, uintptr_t pc, - const char *symname, - uintptr_t symval, - uintptr_t symsize); - -/* An error callback that corresponds to - backtrace_syminfo_to_full_callback. */ - -extern void backtrace_syminfo_to_full_error_callback (void *, const char *, - int); - -/* A test-only hook for elf_uncompress_zdebug. */ - -extern int backtrace_uncompress_zdebug (struct backtrace_state *, - const unsigned char *compressed, - size_t compressed_size, - backtrace_error_callback, void *data, - unsigned char **uncompressed, - size_t *uncompressed_size); - -/* A test-only hook for elf_zstd_decompress. */ - -extern int backtrace_uncompress_zstd (struct backtrace_state *, - const unsigned char *compressed, - size_t compressed_size, - backtrace_error_callback, void *data, - unsigned char *uncompressed, - size_t uncompressed_size); - -/* A test-only hook for elf_uncompress_lzma. */ - -extern int backtrace_uncompress_lzma (struct backtrace_state *, - const unsigned char *compressed, - size_t compressed_size, - backtrace_error_callback, void *data, - unsigned char **uncompressed, - size_t *uncompressed_size); - -} - -#endif diff --git a/src/third_party/tracy/libbacktrace/macho.cpp b/src/third_party/tracy/libbacktrace/macho.cpp deleted file mode 100644 index b9f08456..00000000 --- a/src/third_party/tracy/libbacktrace/macho.cpp +++ /dev/null @@ -1,1367 +0,0 @@ -/* elf.c -- Get debug data from a Mach-O file for backtraces. - Copyright (C) 2020-2021 Free Software Foundation, Inc. - Written by Ian Lance Taylor, Google. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - (1) Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - (2) Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - (3) The name of the author may not be used to - endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. */ - -#include "config.h" - -#include -#include -#include -#include - -#ifdef HAVE_MACH_O_DYLD_H -#include -#endif - -#include "backtrace.hpp" -#include "internal.hpp" - -namespace tracy -{ - -/* Mach-O file header for a 32-bit executable. */ - -struct macho_header_32 -{ - uint32_t magic; /* Magic number (MACH_O_MAGIC_32) */ - uint32_t cputype; /* CPU type */ - uint32_t cpusubtype; /* CPU subtype */ - uint32_t filetype; /* Type of file (object, executable) */ - uint32_t ncmds; /* Number of load commands */ - uint32_t sizeofcmds; /* Total size of load commands */ - uint32_t flags; /* Flags for special features */ -}; - -/* Mach-O file header for a 64-bit executable. */ - -struct macho_header_64 -{ - uint32_t magic; /* Magic number (MACH_O_MAGIC_64) */ - uint32_t cputype; /* CPU type */ - uint32_t cpusubtype; /* CPU subtype */ - uint32_t filetype; /* Type of file (object, executable) */ - uint32_t ncmds; /* Number of load commands */ - uint32_t sizeofcmds; /* Total size of load commands */ - uint32_t flags; /* Flags for special features */ - uint32_t reserved; /* Reserved */ -}; - -/* Mach-O file header for a fat executable. */ - -struct macho_header_fat -{ - uint32_t magic; /* Magic number (MACH_O_MH_(MAGIC|CIGAM)_FAT(_64)?) */ - uint32_t nfat_arch; /* Number of components */ -}; - -/* Values for the header magic field. */ - -#define MACH_O_MH_MAGIC_32 0xfeedface -#define MACH_O_MH_MAGIC_64 0xfeedfacf -#define MACH_O_MH_MAGIC_FAT 0xcafebabe -#define MACH_O_MH_CIGAM_FAT 0xbebafeca -#define MACH_O_MH_MAGIC_FAT_64 0xcafebabf -#define MACH_O_MH_CIGAM_FAT_64 0xbfbafeca - -/* Value for the header filetype field. */ - -#define MACH_O_MH_EXECUTE 0x02 -#define MACH_O_MH_DYLIB 0x06 -#define MACH_O_MH_DSYM 0x0a - -/* A component of a fat file. A fat file starts with a - macho_header_fat followed by nfat_arch instances of this - struct. */ - -struct macho_fat_arch -{ - uint32_t cputype; /* CPU type */ - uint32_t cpusubtype; /* CPU subtype */ - uint32_t offset; /* File offset of this entry */ - uint32_t size; /* Size of this entry */ - uint32_t align; /* Alignment of this entry */ -}; - -/* A component of a 64-bit fat file. This is used if the magic field - is MAGIC_FAT_64. This is only used when some file size or file - offset is too large to represent in the 32-bit format. */ - -struct macho_fat_arch_64 -{ - uint32_t cputype; /* CPU type */ - uint32_t cpusubtype; /* CPU subtype */ - uint64_t offset; /* File offset of this entry */ - uint64_t size; /* Size of this entry */ - uint32_t align; /* Alignment of this entry */ - uint32_t reserved; /* Reserved */ -}; - -/* Values for the fat_arch cputype field (and the header cputype - field). */ - -#define MACH_O_CPU_ARCH_ABI64 0x01000000 - -#define MACH_O_CPU_TYPE_X86 7 -#define MACH_O_CPU_TYPE_ARM 12 -#define MACH_O_CPU_TYPE_PPC 18 - -#define MACH_O_CPU_TYPE_X86_64 (MACH_O_CPU_TYPE_X86 | MACH_O_CPU_ARCH_ABI64) -#define MACH_O_CPU_TYPE_ARM64 (MACH_O_CPU_TYPE_ARM | MACH_O_CPU_ARCH_ABI64) -#define MACH_O_CPU_TYPE_PPC64 (MACH_O_CPU_TYPE_PPC | MACH_O_CPU_ARCH_ABI64) - -/* The header of a load command. */ - -struct macho_load_command -{ - uint32_t cmd; /* The type of load command */ - uint32_t cmdsize; /* Size in bytes of the entire command */ -}; - -/* Values for the load_command cmd field. */ - -#define MACH_O_LC_SEGMENT 0x01 -#define MACH_O_LC_SYMTAB 0x02 -#define MACH_O_LC_SEGMENT_64 0x19 -#define MACH_O_LC_UUID 0x1b - -/* The length of a section of segment name. */ - -#define MACH_O_NAMELEN (16) - -/* LC_SEGMENT load command. */ - -struct macho_segment_command -{ - uint32_t cmd; /* The type of load command (LC_SEGMENT) */ - uint32_t cmdsize; /* Size in bytes of the entire command */ - char segname[MACH_O_NAMELEN]; /* Segment name */ - uint32_t vmaddr; /* Virtual memory address */ - uint32_t vmsize; /* Virtual memory size */ - uint32_t fileoff; /* Offset of data to be mapped */ - uint32_t filesize; /* Size of data in file */ - uint32_t maxprot; /* Maximum permitted virtual protection */ - uint32_t initprot; /* Initial virtual memory protection */ - uint32_t nsects; /* Number of sections in this segment */ - uint32_t flags; /* Flags */ -}; - -/* LC_SEGMENT_64 load command. */ - -struct macho_segment_64_command -{ - uint32_t cmd; /* The type of load command (LC_SEGMENT) */ - uint32_t cmdsize; /* Size in bytes of the entire command */ - char segname[MACH_O_NAMELEN]; /* Segment name */ - uint64_t vmaddr; /* Virtual memory address */ - uint64_t vmsize; /* Virtual memory size */ - uint64_t fileoff; /* Offset of data to be mapped */ - uint64_t filesize; /* Size of data in file */ - uint32_t maxprot; /* Maximum permitted virtual protection */ - uint32_t initprot; /* Initial virtual memory protection */ - uint32_t nsects; /* Number of sections in this segment */ - uint32_t flags; /* Flags */ -}; - -/* LC_SYMTAB load command. */ - -struct macho_symtab_command -{ - uint32_t cmd; /* The type of load command (LC_SEGMENT) */ - uint32_t cmdsize; /* Size in bytes of the entire command */ - uint32_t symoff; /* File offset of symbol table */ - uint32_t nsyms; /* Number of symbols */ - uint32_t stroff; /* File offset of string table */ - uint32_t strsize; /* String table size */ -}; - -/* The length of a Mach-O uuid. */ - -#define MACH_O_UUID_LEN (16) - -/* LC_UUID load command. */ - -struct macho_uuid_command -{ - uint32_t cmd; /* Type of load command (LC_UUID) */ - uint32_t cmdsize; /* Size in bytes of command */ - unsigned char uuid[MACH_O_UUID_LEN]; /* UUID */ -}; - -/* 32-bit section header within a LC_SEGMENT segment. */ - -struct macho_section -{ - char sectname[MACH_O_NAMELEN]; /* Section name */ - char segment[MACH_O_NAMELEN]; /* Segment of this section */ - uint32_t addr; /* Address in memory */ - uint32_t size; /* Section size */ - uint32_t offset; /* File offset */ - uint32_t align; /* Log2 of section alignment */ - uint32_t reloff; /* File offset of relocations */ - uint32_t nreloc; /* Number of relocs for this section */ - uint32_t flags; /* Flags */ - uint32_t reserved1; - uint32_t reserved2; -}; - -/* 64-bit section header within a LC_SEGMENT_64 segment. */ - -struct macho_section_64 -{ - char sectname[MACH_O_NAMELEN]; /* Section name */ - char segment[MACH_O_NAMELEN]; /* Segment of this section */ - uint64_t addr; /* Address in memory */ - uint64_t size; /* Section size */ - uint32_t offset; /* File offset */ - uint32_t align; /* Log2 of section alignment */ - uint32_t reloff; /* File offset of section relocations */ - uint32_t nreloc; /* Number of relocs for this section */ - uint32_t flags; /* Flags */ - uint32_t reserved1; - uint32_t reserved2; - uint32_t reserved3; -}; - -/* 32-bit symbol data. */ - -struct macho_nlist -{ - uint32_t n_strx; /* Index of name in string table */ - uint8_t n_type; /* Type flag */ - uint8_t n_sect; /* Section number */ - uint16_t n_desc; /* Stabs description field */ - uint32_t n_value; /* Value */ -}; - -/* 64-bit symbol data. */ - -struct macho_nlist_64 -{ - uint32_t n_strx; /* Index of name in string table */ - uint8_t n_type; /* Type flag */ - uint8_t n_sect; /* Section number */ - uint16_t n_desc; /* Stabs description field */ - uint64_t n_value; /* Value */ -}; - -/* Value found in nlist n_type field. */ - -#define MACH_O_N_STAB 0xe0 /* Stabs debugging symbol */ -#define MACH_O_N_TYPE 0x0e /* Mask for type bits */ - -/* Values found after masking with MACH_O_N_TYPE. */ -#define MACH_O_N_UNDF 0x00 /* Undefined symbol */ -#define MACH_O_N_ABS 0x02 /* Absolute symbol */ -#define MACH_O_N_SECT 0x0e /* Defined in section from n_sect field */ - - -/* Information we keep for a Mach-O symbol. */ - -struct macho_symbol -{ - const char *name; /* Symbol name */ - uintptr_t address; /* Symbol address */ -}; - -/* Information to pass to macho_syminfo. */ - -struct macho_syminfo_data -{ - struct macho_syminfo_data *next; /* Next module */ - struct macho_symbol *symbols; /* Symbols sorted by address */ - size_t count; /* Number of symbols */ -}; - -/* Names of sections, indexed by enum dwarf_section in internal.h. */ - -static const char * const dwarf_section_names[DEBUG_MAX] = -{ - "__debug_info", - "__debug_line", - "__debug_abbrev", - "__debug_ranges", - "__debug_str", - "", /* DEBUG_ADDR */ - "__debug_str_offs", - "", /* DEBUG_LINE_STR */ - "__debug_rnglists" -}; - -/* Forward declaration. */ - -static int macho_add (struct backtrace_state *, const char *, int, off_t, - const unsigned char *, struct libbacktrace_base_address, - int, backtrace_error_callback, void *, fileline *, - int *); - -/* A dummy callback function used when we can't find any debug info. */ - -static int -macho_nodebug (struct backtrace_state *state ATTRIBUTE_UNUSED, - uintptr_t pc ATTRIBUTE_UNUSED, - backtrace_full_callback callback ATTRIBUTE_UNUSED, - backtrace_error_callback error_callback, void *data) -{ - error_callback (data, "no debug info in Mach-O executable", -1); - return 0; -} - -/* A dummy callback function used when we can't find a symbol - table. */ - -static void -macho_nosyms (struct backtrace_state *state ATTRIBUTE_UNUSED, - uintptr_t addr ATTRIBUTE_UNUSED, - backtrace_syminfo_callback callback ATTRIBUTE_UNUSED, - backtrace_error_callback error_callback, void *data) -{ - error_callback (data, "no symbol table in Mach-O executable", -1); -} - -/* Add a single DWARF section to DWARF_SECTIONS, if we need the - section. Returns 1 on success, 0 on failure. */ - -static int -macho_add_dwarf_section (struct backtrace_state *state, int descriptor, - const char *sectname, uint32_t offset, uint64_t size, - backtrace_error_callback error_callback, void *data, - struct dwarf_sections *dwarf_sections) -{ - int i; - - for (i = 0; i < (int) DEBUG_MAX; ++i) - { - if (dwarf_section_names[i][0] != '\0' - && strncmp (sectname, dwarf_section_names[i], MACH_O_NAMELEN) == 0) - { - struct backtrace_view section_view; - - /* FIXME: Perhaps it would be better to try to use a single - view to read all the DWARF data, as we try to do for - ELF. */ - - if (!backtrace_get_view (state, descriptor, offset, size, - error_callback, data, §ion_view)) - return 0; - dwarf_sections->data[i] = (const unsigned char *) section_view.data; - dwarf_sections->size[i] = size; - break; - } - } - return 1; -} - -/* Collect DWARF sections from a DWARF segment. Returns 1 on success, - 0 on failure. */ - -static int -macho_add_dwarf_segment (struct backtrace_state *state, int descriptor, - off_t offset, unsigned int cmd, const char *psecs, - size_t sizesecs, unsigned int nsects, - backtrace_error_callback error_callback, void *data, - struct dwarf_sections *dwarf_sections) -{ - size_t sec_header_size; - size_t secoffset; - unsigned int i; - - switch (cmd) - { - case MACH_O_LC_SEGMENT: - sec_header_size = sizeof (struct macho_section); - break; - case MACH_O_LC_SEGMENT_64: - sec_header_size = sizeof (struct macho_section_64); - break; - default: - abort (); - } - - secoffset = 0; - for (i = 0; i < nsects; ++i) - { - if (secoffset + sec_header_size > sizesecs) - { - error_callback (data, "section overflow withing segment", 0); - return 0; - } - - switch (cmd) - { - case MACH_O_LC_SEGMENT: - { - struct macho_section section; - - memcpy (§ion, psecs + secoffset, sizeof section); - macho_add_dwarf_section (state, descriptor, section.sectname, - offset + section.offset, section.size, - error_callback, data, dwarf_sections); - } - break; - - case MACH_O_LC_SEGMENT_64: - { - struct macho_section_64 section; - - memcpy (§ion, psecs + secoffset, sizeof section); - macho_add_dwarf_section (state, descriptor, section.sectname, - offset + section.offset, section.size, - error_callback, data, dwarf_sections); - } - break; - - default: - abort (); - } - - secoffset += sec_header_size; - } - - return 1; -} - -/* Compare struct macho_symbol for qsort. */ - -static int -macho_symbol_compare (const void *v1, const void *v2) -{ - const struct macho_symbol *m1 = (const struct macho_symbol *) v1; - const struct macho_symbol *m2 = (const struct macho_symbol *) v2; - - if (m1->address < m2->address) - return -1; - else if (m1->address > m2->address) - return 1; - else - return 0; -} - -/* Compare an address against a macho_symbol for bsearch. We allocate - one extra entry in the array so that this can safely look at the - next entry. */ - -static int -macho_symbol_search (const void *vkey, const void *ventry) -{ - const uintptr_t *key = (const uintptr_t *) vkey; - const struct macho_symbol *entry = (const struct macho_symbol *) ventry; - uintptr_t addr; - - addr = *key; - if (addr < entry->address) - return -1; - else if (entry->name[0] == '\0' - && entry->address == ~(uintptr_t) 0) - return -1; - else if ((entry + 1)->name[0] == '\0' - && (entry + 1)->address == ~(uintptr_t) 0) - return -1; - else if (addr >= (entry + 1)->address) - return 1; - else - return 0; -} - -/* Return whether the symbol type field indicates a symbol table entry - that we care about: a function or data symbol. */ - -static int -macho_defined_symbol (uint8_t type) -{ - if ((type & MACH_O_N_STAB) != 0) - return 0; - switch (type & MACH_O_N_TYPE) - { - case MACH_O_N_UNDF: - return 0; - case MACH_O_N_ABS: - return 1; - case MACH_O_N_SECT: - return 1; - default: - return 0; - } -} - -/* Add symbol table information for a Mach-O file. */ - -static int -macho_add_symtab (struct backtrace_state *state, int descriptor, - struct libbacktrace_base_address base_address, int is_64, - off_t symoff, unsigned int nsyms, off_t stroff, - unsigned int strsize, - backtrace_error_callback error_callback, void *data) -{ - size_t symsize; - struct backtrace_view sym_view; - int sym_view_valid; - struct backtrace_view str_view; - int str_view_valid; - size_t ndefs; - size_t symtaboff; - unsigned int i; - size_t macho_symbol_size; - struct macho_symbol *macho_symbols; - unsigned int j; - struct macho_syminfo_data *sdata; - - sym_view_valid = 0; - str_view_valid = 0; - macho_symbol_size = 0; - macho_symbols = NULL; - - if (is_64) - symsize = sizeof (struct macho_nlist_64); - else - symsize = sizeof (struct macho_nlist); - - if (!backtrace_get_view (state, descriptor, symoff, nsyms * symsize, - error_callback, data, &sym_view)) - goto fail; - sym_view_valid = 1; - - if (!backtrace_get_view (state, descriptor, stroff, strsize, - error_callback, data, &str_view)) - return 0; - str_view_valid = 1; - - ndefs = 0; - symtaboff = 0; - for (i = 0; i < nsyms; ++i, symtaboff += symsize) - { - if (is_64) - { - struct macho_nlist_64 nlist; - - memcpy (&nlist, (const char *) sym_view.data + symtaboff, - sizeof nlist); - if (macho_defined_symbol (nlist.n_type)) - ++ndefs; - } - else - { - struct macho_nlist nlist; - - memcpy (&nlist, (const char *) sym_view.data + symtaboff, - sizeof nlist); - if (macho_defined_symbol (nlist.n_type)) - ++ndefs; - } - } - - /* Add 1 to ndefs to make room for a sentinel. */ - macho_symbol_size = (ndefs + 1) * sizeof (struct macho_symbol); - macho_symbols = ((struct macho_symbol *) - backtrace_alloc (state, macho_symbol_size, error_callback, - data)); - if (macho_symbols == NULL) - goto fail; - - j = 0; - symtaboff = 0; - for (i = 0; i < nsyms; ++i, symtaboff += symsize) - { - uint32_t strx; - uint64_t value; - const char *name; - - strx = 0; - value = 0; - if (is_64) - { - struct macho_nlist_64 nlist; - - memcpy (&nlist, (const char *) sym_view.data + symtaboff, - sizeof nlist); - if (!macho_defined_symbol (nlist.n_type)) - continue; - - strx = nlist.n_strx; - value = nlist.n_value; - } - else - { - struct macho_nlist nlist; - - memcpy (&nlist, (const char *) sym_view.data + symtaboff, - sizeof nlist); - if (!macho_defined_symbol (nlist.n_type)) - continue; - - strx = nlist.n_strx; - value = nlist.n_value; - } - - if (strx >= strsize) - { - error_callback (data, "symbol string index out of range", 0); - goto fail; - } - - name = (const char *) str_view.data + strx; - if (name[0] == '_') - ++name; - macho_symbols[j].name = name; - macho_symbols[j].address = libbacktrace_add_base (value, base_address); - ++j; - } - - sdata = ((struct macho_syminfo_data *) - backtrace_alloc (state, sizeof *sdata, error_callback, data)); - if (sdata == NULL) - goto fail; - - /* We need to keep the string table since it holds the names, but we - can release the symbol table. */ - - backtrace_release_view (state, &sym_view, error_callback, data); - sym_view_valid = 0; - str_view_valid = 0; - - /* Add a trailing sentinel symbol. */ - macho_symbols[j].name = ""; - macho_symbols[j].address = ~(uintptr_t) 0; - - backtrace_qsort (macho_symbols, ndefs + 1, sizeof (struct macho_symbol), - macho_symbol_compare); - - sdata->next = NULL; - sdata->symbols = macho_symbols; - sdata->count = ndefs; - - if (!state->threaded) - { - struct macho_syminfo_data **pp; - - for (pp = (struct macho_syminfo_data **) (void *) &state->syminfo_data; - *pp != NULL; - pp = &(*pp)->next) - ; - *pp = sdata; - } - else - { - while (1) - { - struct macho_syminfo_data **pp; - - pp = (struct macho_syminfo_data **) (void *) &state->syminfo_data; - - while (1) - { - struct macho_syminfo_data *p; - - p = backtrace_atomic_load_pointer (pp); - - if (p == NULL) - break; - - pp = &p->next; - } - - if (__sync_bool_compare_and_swap (pp, NULL, sdata)) - break; - } - } - - return 1; - - fail: - if (macho_symbols != NULL) - backtrace_free (state, macho_symbols, macho_symbol_size, - error_callback, data); - if (sym_view_valid) - backtrace_release_view (state, &sym_view, error_callback, data); - if (str_view_valid) - backtrace_release_view (state, &str_view, error_callback, data); - return 0; -} - -/* Return the symbol name and value for an ADDR. */ - -static void -macho_syminfo (struct backtrace_state *state, uintptr_t addr, - backtrace_syminfo_callback callback, - backtrace_error_callback error_callback ATTRIBUTE_UNUSED, - void *data) -{ - struct macho_syminfo_data *sdata; - struct macho_symbol *sym; - - sym = NULL; - if (!state->threaded) - { - for (sdata = (struct macho_syminfo_data *) state->syminfo_data; - sdata != NULL; - sdata = sdata->next) - { - sym = ((struct macho_symbol *) - bsearch (&addr, sdata->symbols, sdata->count, - sizeof (struct macho_symbol), macho_symbol_search)); - if (sym != NULL) - break; - } - } - else - { - struct macho_syminfo_data **pp; - - pp = (struct macho_syminfo_data **) (void *) &state->syminfo_data; - while (1) - { - sdata = backtrace_atomic_load_pointer (pp); - if (sdata == NULL) - break; - - sym = ((struct macho_symbol *) - bsearch (&addr, sdata->symbols, sdata->count, - sizeof (struct macho_symbol), macho_symbol_search)); - if (sym != NULL) - break; - - pp = &sdata->next; - } - } - - if (sym == NULL) - callback (data, addr, NULL, 0, 0); - else - callback (data, addr, sym->name, sym->address, 0); -} - -/* Look through a fat file to find the relevant executable. Returns 1 - on success, 0 on failure (in both cases descriptor is closed). */ - -static int -macho_add_fat (struct backtrace_state *state, const char *filename, - int descriptor, int swapped, off_t offset, - const unsigned char *match_uuid, - struct libbacktrace_base_address base_address, - int skip_symtab, uint32_t nfat_arch, int is_64, - backtrace_error_callback error_callback, void *data, - fileline *fileline_fn, int *found_sym) -{ - int arch_view_valid; - unsigned int cputype; - size_t arch_size; - struct backtrace_view arch_view; - unsigned int i; - - arch_view_valid = 0; - -#if defined (__x86_64__) - cputype = MACH_O_CPU_TYPE_X86_64; -#elif defined (__i386__) - cputype = MACH_O_CPU_TYPE_X86; -#elif defined (__aarch64__) - cputype = MACH_O_CPU_TYPE_ARM64; -#elif defined (__arm__) - cputype = MACH_O_CPU_TYPE_ARM; -#elif defined (__ppc__) - cputype = MACH_O_CPU_TYPE_PPC; -#elif defined (__ppc64__) - cputype = MACH_O_CPU_TYPE_PPC64; -#else - error_callback (data, "unknown Mach-O architecture", 0); - goto fail; -#endif - - if (is_64) - arch_size = sizeof (struct macho_fat_arch_64); - else - arch_size = sizeof (struct macho_fat_arch); - - if (!backtrace_get_view (state, descriptor, offset, - nfat_arch * arch_size, - error_callback, data, &arch_view)) - goto fail; - - for (i = 0; i < nfat_arch; ++i) - { - uint32_t fcputype; - uint64_t foffset; - - if (is_64) - { - struct macho_fat_arch_64 fat_arch_64; - - memcpy (&fat_arch_64, - (const char *) arch_view.data + i * arch_size, - arch_size); - fcputype = fat_arch_64.cputype; - foffset = fat_arch_64.offset; - if (swapped) - { - fcputype = __builtin_bswap32 (fcputype); - foffset = __builtin_bswap64 (foffset); - } - } - else - { - struct macho_fat_arch fat_arch_32; - - memcpy (&fat_arch_32, - (const char *) arch_view.data + i * arch_size, - arch_size); - fcputype = fat_arch_32.cputype; - foffset = (uint64_t) fat_arch_32.offset; - if (swapped) - { - fcputype = __builtin_bswap32 (fcputype); - foffset = (uint64_t) __builtin_bswap32 ((uint32_t) foffset); - } - } - - if (fcputype == cputype) - { - /* FIXME: What about cpusubtype? */ - backtrace_release_view (state, &arch_view, error_callback, data); - return macho_add (state, filename, descriptor, foffset, match_uuid, - base_address, skip_symtab, error_callback, data, - fileline_fn, found_sym); - } - } - - error_callback (data, "could not find executable in fat file", 0); - - fail: - if (arch_view_valid) - backtrace_release_view (state, &arch_view, error_callback, data); - if (descriptor != -1) - backtrace_close (descriptor, error_callback, data); - return 0; -} - -/* Look for the dsym file for FILENAME. This is called if FILENAME - does not have debug info or a symbol table. Returns 1 on success, - 0 on failure. */ - -static int -macho_add_dsym (struct backtrace_state *state, const char *filename, - struct libbacktrace_base_address base_address, - const unsigned char *uuid, - backtrace_error_callback error_callback, void *data, - fileline* fileline_fn) -{ - const char *p; - const char *dirname; - char *diralc; - size_t dirnamelen; - const char *basename; - size_t basenamelen; - const char *dsymsuffixdir; - size_t dsymsuffixdirlen; - size_t dsymlen; - char *dsym; - char *ps; - int d; - int does_not_exist; - int dummy_found_sym; - - diralc = NULL; - dirnamelen = 0; - dsym = NULL; - dsymlen = 0; - - p = strrchr (filename, '/'); - if (p == NULL) - { - dirname = "."; - dirnamelen = 1; - basename = filename; - basenamelen = strlen (basename); - diralc = NULL; - } - else - { - dirnamelen = p - filename; - diralc = (char*)backtrace_alloc (state, dirnamelen + 1, error_callback, data); - if (diralc == NULL) - goto fail; - memcpy (diralc, filename, dirnamelen); - diralc[dirnamelen] = '\0'; - dirname = diralc; - basename = p + 1; - basenamelen = strlen (basename); - } - - dsymsuffixdir = ".dSYM/Contents/Resources/DWARF/"; - dsymsuffixdirlen = strlen (dsymsuffixdir); - - dsymlen = (dirnamelen - + 1 - + basenamelen - + dsymsuffixdirlen - + basenamelen - + 1); - dsym = (char*)backtrace_alloc (state, dsymlen, error_callback, data); - if (dsym == NULL) - goto fail; - - ps = dsym; - memcpy (ps, dirname, dirnamelen); - ps += dirnamelen; - *ps++ = '/'; - memcpy (ps, basename, basenamelen); - ps += basenamelen; - memcpy (ps, dsymsuffixdir, dsymsuffixdirlen); - ps += dsymsuffixdirlen; - memcpy (ps, basename, basenamelen); - ps += basenamelen; - *ps = '\0'; - - if (diralc != NULL) - { - backtrace_free (state, diralc, dirnamelen + 1, error_callback, data); - diralc = NULL; - } - - d = backtrace_open (dsym, error_callback, data, &does_not_exist); - if (d < 0) - { - /* The file does not exist, so we can't read the debug info. - Just return success. */ - backtrace_free (state, dsym, dsymlen, error_callback, data); - return 1; - } - - if (!macho_add (state, dsym, d, 0, uuid, base_address, 1, - error_callback, data, fileline_fn, &dummy_found_sym)) - goto fail; - - backtrace_free (state, dsym, dsymlen, error_callback, data); - - return 1; - - fail: - if (dsym != NULL) - backtrace_free (state, dsym, dsymlen, error_callback, data); - if (diralc != NULL) - backtrace_free (state, diralc, dirnamelen, error_callback, data); - return 0; -} - -/* Add the backtrace data for a Macho-O file. Returns 1 on success, 0 - on failure (in both cases descriptor is closed). - - FILENAME: the name of the executable. - DESCRIPTOR: an open descriptor for the executable, closed here. - OFFSET: the offset within the file of this executable, for fat files. - MATCH_UUID: if not NULL, UUID that must match. - BASE_ADDRESS: the load address of the executable. - SKIP_SYMTAB: if non-zero, ignore the symbol table; used for dSYM files. - FILELINE_FN: set to the fileline function, by backtrace_dwarf_add. - FOUND_SYM: set to non-zero if we found the symbol table. -*/ - -static int -macho_add (struct backtrace_state *state, const char *filename, int descriptor, - off_t offset, const unsigned char *match_uuid, - struct libbacktrace_base_address base_address, int skip_symtab, - backtrace_error_callback error_callback, void *data, - fileline *fileline_fn, int *found_sym) -{ - struct backtrace_view header_view; - struct macho_header_32 header; - off_t hdroffset; - int is_64; - struct backtrace_view cmds_view; - int cmds_view_valid; - struct dwarf_sections dwarf_sections; - int have_dwarf; - unsigned char uuid[MACH_O_UUID_LEN]; - int have_uuid; - size_t cmdoffset; - unsigned int i; - - *found_sym = 0; - - cmds_view_valid = 0; - - /* The 32-bit and 64-bit file headers start out the same, so we can - just always read the 32-bit version. A fat header is shorter but - it will always be followed by data, so it's OK to read extra. */ - - if (!backtrace_get_view (state, descriptor, offset, - sizeof (struct macho_header_32), - error_callback, data, &header_view)) - goto fail; - - memcpy (&header, header_view.data, sizeof header); - - backtrace_release_view (state, &header_view, error_callback, data); - - switch (header.magic) - { - case MACH_O_MH_MAGIC_32: - is_64 = 0; - hdroffset = offset + sizeof (struct macho_header_32); - break; - case MACH_O_MH_MAGIC_64: - is_64 = 1; - hdroffset = offset + sizeof (struct macho_header_64); - break; - case MACH_O_MH_MAGIC_FAT: - case MACH_O_MH_MAGIC_FAT_64: - { - struct macho_header_fat fat_header; - - hdroffset = offset + sizeof (struct macho_header_fat); - memcpy (&fat_header, &header, sizeof fat_header); - return macho_add_fat (state, filename, descriptor, 0, hdroffset, - match_uuid, base_address, skip_symtab, - fat_header.nfat_arch, - header.magic == MACH_O_MH_MAGIC_FAT_64, - error_callback, data, fileline_fn, found_sym); - } - case MACH_O_MH_CIGAM_FAT: - case MACH_O_MH_CIGAM_FAT_64: - { - struct macho_header_fat fat_header; - uint32_t nfat_arch; - - hdroffset = offset + sizeof (struct macho_header_fat); - memcpy (&fat_header, &header, sizeof fat_header); - nfat_arch = __builtin_bswap32 (fat_header.nfat_arch); - return macho_add_fat (state, filename, descriptor, 1, hdroffset, - match_uuid, base_address, skip_symtab, - nfat_arch, - header.magic == MACH_O_MH_CIGAM_FAT_64, - error_callback, data, fileline_fn, found_sym); - } - default: - error_callback (data, "executable file is not in Mach-O format", 0); - goto fail; - } - - switch (header.filetype) - { - case MACH_O_MH_EXECUTE: - case MACH_O_MH_DYLIB: - case MACH_O_MH_DSYM: - break; - default: - error_callback (data, "executable file is not an executable", 0); - goto fail; - } - - if (!backtrace_get_view (state, descriptor, hdroffset, header.sizeofcmds, - error_callback, data, &cmds_view)) - goto fail; - cmds_view_valid = 1; - - memset (&dwarf_sections, 0, sizeof dwarf_sections); - have_dwarf = 0; - memset (&uuid, 0, sizeof uuid); - have_uuid = 0; - - cmdoffset = 0; - for (i = 0; i < header.ncmds; ++i) - { - const char *pcmd; - struct macho_load_command load_command; - - if (cmdoffset + sizeof load_command > header.sizeofcmds) - break; - - pcmd = (const char *) cmds_view.data + cmdoffset; - memcpy (&load_command, pcmd, sizeof load_command); - - switch (load_command.cmd) - { - case MACH_O_LC_SEGMENT: - { - struct macho_segment_command segcmd; - - memcpy (&segcmd, pcmd, sizeof segcmd); - if (memcmp (segcmd.segname, - "__DWARF\0\0\0\0\0\0\0\0\0", - MACH_O_NAMELEN) == 0) - { - if (!macho_add_dwarf_segment (state, descriptor, offset, - load_command.cmd, - pcmd + sizeof segcmd, - (load_command.cmdsize - - sizeof segcmd), - segcmd.nsects, error_callback, - data, &dwarf_sections)) - goto fail; - have_dwarf = 1; - } - } - break; - - case MACH_O_LC_SEGMENT_64: - { - struct macho_segment_64_command segcmd; - - memcpy (&segcmd, pcmd, sizeof segcmd); - if (memcmp (segcmd.segname, - "__DWARF\0\0\0\0\0\0\0\0\0", - MACH_O_NAMELEN) == 0) - { - if (!macho_add_dwarf_segment (state, descriptor, offset, - load_command.cmd, - pcmd + sizeof segcmd, - (load_command.cmdsize - - sizeof segcmd), - segcmd.nsects, error_callback, - data, &dwarf_sections)) - goto fail; - have_dwarf = 1; - } - } - break; - - case MACH_O_LC_SYMTAB: - if (!skip_symtab) - { - struct macho_symtab_command symcmd; - - memcpy (&symcmd, pcmd, sizeof symcmd); - if (!macho_add_symtab (state, descriptor, base_address, is_64, - offset + symcmd.symoff, symcmd.nsyms, - offset + symcmd.stroff, symcmd.strsize, - error_callback, data)) - goto fail; - - *found_sym = 1; - } - break; - - case MACH_O_LC_UUID: - { - struct macho_uuid_command uuidcmd; - - memcpy (&uuidcmd, pcmd, sizeof uuidcmd); - memcpy (&uuid[0], &uuidcmd.uuid[0], MACH_O_UUID_LEN); - have_uuid = 1; - } - break; - - default: - break; - } - - cmdoffset += load_command.cmdsize; - } - - if (!backtrace_close (descriptor, error_callback, data)) - goto fail; - descriptor = -1; - - backtrace_release_view (state, &cmds_view, error_callback, data); - cmds_view_valid = 0; - - if (match_uuid != NULL) - { - /* If we don't have a UUID, or it doesn't match, just ignore - this file. */ - if (!have_uuid - || memcmp (match_uuid, &uuid[0], MACH_O_UUID_LEN) != 0) - return 1; - } - - if (have_dwarf) - { - int is_big_endian; - - is_big_endian = 0; -#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - is_big_endian = 1; -#endif -#endif - - if (!backtrace_dwarf_add (state, base_address, &dwarf_sections, - is_big_endian, NULL, error_callback, data, - fileline_fn, NULL)) - goto fail; - } - - if (!have_dwarf && have_uuid) - { - if (!macho_add_dsym (state, filename, base_address, &uuid[0], - error_callback, data, fileline_fn)) - goto fail; - } - - return 1; - - fail: - if (cmds_view_valid) - backtrace_release_view (state, &cmds_view, error_callback, data); - if (descriptor != -1) - backtrace_close (descriptor, error_callback, data); - return 0; -} - -#ifdef HAVE_MACH_O_DYLD_H - -/* Initialize the backtrace data we need from a Mach-O executable - using the dyld support functions. This closes descriptor. */ - -int -backtrace_initialize (struct backtrace_state *state, const char *filename, - int descriptor, backtrace_error_callback error_callback, - void *data, fileline *fileline_fn) -{ - uint32_t c; - uint32_t i; - int closed_descriptor; - int found_sym; - fileline macho_fileline_fn; - - closed_descriptor = 0; - found_sym = 0; - macho_fileline_fn = macho_nodebug; - - c = _dyld_image_count (); - for (i = 0; i < c; ++i) - { - struct libbacktrace_base_address base_address; - const char *name; - int d; - fileline mff; - int mfs; - - name = _dyld_get_image_name (i); - if (name == NULL) - continue; - - if (strcmp (name, filename) == 0 && !closed_descriptor) - { - d = descriptor; - closed_descriptor = 1; - } - else - { - int does_not_exist; - - d = backtrace_open (name, error_callback, data, &does_not_exist); - if (d < 0) - continue; - } - - base_address.m = _dyld_get_image_vmaddr_slide (i); - - mff = macho_nodebug; - if (!macho_add (state, name, d, 0, NULL, base_address, 0, - error_callback, data, &mff, &mfs)) - continue; - - if (mff != macho_nodebug) - macho_fileline_fn = mff; - if (mfs) - found_sym = 1; - } - - if (!closed_descriptor) - backtrace_close (descriptor, error_callback, data); - - if (!state->threaded) - { - if (found_sym) - state->syminfo_fn = macho_syminfo; - else if (state->syminfo_fn == NULL) - state->syminfo_fn = macho_nosyms; - } - else - { - if (found_sym) - backtrace_atomic_store_pointer (&state->syminfo_fn, &macho_syminfo); - else - (void) __sync_bool_compare_and_swap (&state->syminfo_fn, NULL, - macho_nosyms); - } - - if (!state->threaded) - *fileline_fn = state->fileline_fn; - else - *fileline_fn = backtrace_atomic_load_pointer (&state->fileline_fn); - - if (*fileline_fn == NULL || *fileline_fn == macho_nodebug) - *fileline_fn = macho_fileline_fn; - - return 1; -} - -#else /* !defined (HAVE_MACH_O_DYLD_H) */ - -/* Initialize the backtrace data we need from a Mach-O executable - without using the dyld support functions. This closes - descriptor. */ - -int -backtrace_initialize (struct backtrace_state *state, const char *filename, - int descriptor, backtrace_error_callback error_callback, - void *data, fileline *fileline_fn) -{ - fileline macho_fileline_fn; - struct libbacktrace_base_address zero_base_address; - int found_sym; - - macho_fileline_fn = macho_nodebug; - memset (&zero_base_address, 0, sizeof zero_base_address); - if (!macho_add (state, filename, descriptor, 0, NULL, zero_base_address, 0, - error_callback, data, &macho_fileline_fn, &found_sym)) - return 0; - - if (!state->threaded) - { - if (found_sym) - state->syminfo_fn = macho_syminfo; - else if (state->syminfo_fn == NULL) - state->syminfo_fn = macho_nosyms; - } - else - { - if (found_sym) - backtrace_atomic_store_pointer (&state->syminfo_fn, &macho_syminfo); - else - (void) __sync_bool_compare_and_swap (&state->syminfo_fn, NULL, - macho_nosyms); - } - - if (!state->threaded) - *fileline_fn = state->fileline_fn; - else - *fileline_fn = backtrace_atomic_load_pointer (&state->fileline_fn); - - if (*fileline_fn == NULL || *fileline_fn == macho_nodebug) - *fileline_fn = macho_fileline_fn; - - return 1; -} - -#endif /* !defined (HAVE_MACH_O_DYLD_H) */ - -} diff --git a/src/third_party/tracy/libbacktrace/mmapio.cpp b/src/third_party/tracy/libbacktrace/mmapio.cpp deleted file mode 100644 index 0e8f599b..00000000 --- a/src/third_party/tracy/libbacktrace/mmapio.cpp +++ /dev/null @@ -1,115 +0,0 @@ -/* mmapio.c -- File views using mmap. - Copyright (C) 2012-2021 Free Software Foundation, Inc. - Written by Ian Lance Taylor, Google. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - (1) Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - (2) Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - (3) The name of the author may not be used to - endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. */ - -#include "config.h" - -#include -#include -#include -#include - -#include "backtrace.hpp" -#include "internal.hpp" - -#ifndef HAVE_DECL_GETPAGESIZE -extern int getpagesize (void); -#endif - -#ifndef MAP_FAILED -#define MAP_FAILED ((void *)-1) -#endif - -namespace tracy -{ - -/* This file implements file views and memory allocation when mmap is - available. */ - -/* Create a view of SIZE bytes from DESCRIPTOR at OFFSET. */ - -int -backtrace_get_view (struct backtrace_state *state ATTRIBUTE_UNUSED, - int descriptor, off_t offset, uint64_t size, - backtrace_error_callback error_callback, - void *data, struct backtrace_view *view) -{ - size_t pagesize; - unsigned int inpage; - off_t pageoff; - void *map; - - if ((uint64_t) (size_t) size != size) - { - error_callback (data, "file size too large", 0); - return 0; - } - - pagesize = getpagesize (); - inpage = offset % pagesize; - pageoff = offset - inpage; - - size += inpage; - size = (size + (pagesize - 1)) & ~ (pagesize - 1); - - map = mmap (NULL, size, PROT_READ, MAP_PRIVATE, descriptor, pageoff); - if (map == MAP_FAILED) - { - error_callback (data, "mmap", errno); - return 0; - } - - view->data = (char *) map + inpage; - view->base = map; - view->len = size; - - return 1; -} - -/* Release a view read by backtrace_get_view. */ - -void -backtrace_release_view (struct backtrace_state *state ATTRIBUTE_UNUSED, - struct backtrace_view *view, - backtrace_error_callback error_callback, - void *data) -{ - union { - const void *cv; - void *v; - } cc; - - cc.cv = view->base; - if (munmap (cc.v, view->len) < 0) - error_callback (data, "munmap", errno); -} - -} diff --git a/src/third_party/tracy/libbacktrace/posix.cpp b/src/third_party/tracy/libbacktrace/posix.cpp deleted file mode 100644 index 8233a8ea..00000000 --- a/src/third_party/tracy/libbacktrace/posix.cpp +++ /dev/null @@ -1,109 +0,0 @@ -/* posix.c -- POSIX file I/O routines for the backtrace library. - Copyright (C) 2012-2021 Free Software Foundation, Inc. - Written by Ian Lance Taylor, Google. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - (1) Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - (2) Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - (3) The name of the author may not be used to - endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. */ - -#include "config.h" - -#include -#include -#include -#include -#include - -#include "backtrace.hpp" -#include "internal.hpp" - -#ifndef O_BINARY -#define O_BINARY 0 -#endif - -#ifndef O_CLOEXEC -#define O_CLOEXEC 0 -#endif - -#ifndef FD_CLOEXEC -#define FD_CLOEXEC 1 -#endif - -namespace tracy -{ - -/* Open a file for reading. */ - -int -backtrace_open (const char *filename, backtrace_error_callback error_callback, - void *data, int *does_not_exist) -{ - int descriptor; - - if (does_not_exist != NULL) - *does_not_exist = 0; - - descriptor = open (filename, (int) (O_RDONLY | O_BINARY | O_CLOEXEC)); - if (descriptor < 0) - { - /* If DOES_NOT_EXIST is not NULL, then don't call ERROR_CALLBACK - if the file does not exist. We treat lacking permission to - open the file as the file not existing; this case arises when - running the libgo syscall package tests as root. */ - if (does_not_exist != NULL && (errno == ENOENT || errno == EACCES)) - *does_not_exist = 1; - else - error_callback (data, filename, errno); - return -1; - } - -#ifdef HAVE_FCNTL - /* Set FD_CLOEXEC just in case the kernel does not support - O_CLOEXEC. It doesn't matter if this fails for some reason. - FIXME: At some point it should be safe to only do this if - O_CLOEXEC == 0. */ - fcntl (descriptor, F_SETFD, FD_CLOEXEC); -#endif - - return descriptor; -} - -/* Close DESCRIPTOR. */ - -int -backtrace_close (int descriptor, backtrace_error_callback error_callback, - void *data) -{ - if (close (descriptor) < 0) - { - error_callback (data, "close", errno); - return 0; - } - return 1; -} - -} diff --git a/src/third_party/tracy/libbacktrace/sort.cpp b/src/third_party/tracy/libbacktrace/sort.cpp deleted file mode 100644 index 6daee0a6..00000000 --- a/src/third_party/tracy/libbacktrace/sort.cpp +++ /dev/null @@ -1,113 +0,0 @@ -/* sort.c -- Sort without allocating memory - Copyright (C) 2012-2021 Free Software Foundation, Inc. - Written by Ian Lance Taylor, Google. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - (1) Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - (2) Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - (3) The name of the author may not be used to - endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. */ - -#include "config.h" - -#include -#include - -#include "backtrace.hpp" -#include "internal.hpp" - -namespace tracy -{ - -/* The GNU glibc version of qsort allocates memory, which we must not - do if we are invoked by a signal handler. So provide our own - sort. */ - -static void -swap (char *a, char *b, size_t size) -{ - size_t i; - - for (i = 0; i < size; i++, a++, b++) - { - char t; - - t = *a; - *a = *b; - *b = t; - } -} - -void -backtrace_qsort (void *basearg, size_t count, size_t size, - int (*compar) (const void *, const void *)) -{ - char *base = (char *) basearg; - size_t i; - size_t mid; - - tail_recurse: - if (count < 2) - return; - - /* The symbol table and DWARF tables, which is all we use this - routine for, tend to be roughly sorted. Pick the middle element - in the array as our pivot point, so that we are more likely to - cut the array in half for each recursion step. */ - swap (base, base + (count / 2) * size, size); - - mid = 0; - for (i = 1; i < count; i++) - { - if ((*compar) (base, base + i * size) > 0) - { - ++mid; - if (i != mid) - swap (base + mid * size, base + i * size, size); - } - } - - if (mid > 0) - swap (base, base + mid * size, size); - - /* Recurse with the smaller array, loop with the larger one. That - ensures that our maximum stack depth is log count. */ - if (2 * mid < count) - { - backtrace_qsort (base, mid, size, compar); - base += (mid + 1) * size; - count -= mid + 1; - goto tail_recurse; - } - else - { - backtrace_qsort (base + (mid + 1) * size, count - (mid + 1), - size, compar); - count = mid; - goto tail_recurse; - } -} - -} diff --git a/src/third_party/tracy/libbacktrace/state.cpp b/src/third_party/tracy/libbacktrace/state.cpp deleted file mode 100644 index ea3c137c..00000000 --- a/src/third_party/tracy/libbacktrace/state.cpp +++ /dev/null @@ -1,76 +0,0 @@ -/* state.c -- Create the backtrace state. - Copyright (C) 2012-2021 Free Software Foundation, Inc. - Written by Ian Lance Taylor, Google. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - (1) Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - (2) Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - (3) The name of the author may not be used to - endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. */ - -#include "config.h" - -#include -#include - -#include "backtrace.hpp" -#include "internal.hpp" - -namespace tracy -{ - -/* Create the backtrace state. This will then be passed to all the - other routines. */ - -struct backtrace_state * -backtrace_create_state (const char *filename, int threaded, - backtrace_error_callback error_callback, - void *data) -{ - struct backtrace_state init_state; - struct backtrace_state *state; - -#ifndef HAVE_SYNC_FUNCTIONS - if (threaded) - { - error_callback (data, "backtrace library does not support threads", 0); - return NULL; - } -#endif - - memset (&init_state, 0, sizeof init_state); - init_state.filename = filename; - init_state.threaded = threaded; - - state = ((struct backtrace_state *) - backtrace_alloc (&init_state, sizeof *state, error_callback, data)); - if (state == NULL) - return NULL; - *state = init_state; - - return state; -} - -} diff --git a/src/third_party/tracy/tracy/Tracy.hpp b/src/third_party/tracy/tracy/Tracy.hpp deleted file mode 100644 index bed51179..00000000 --- a/src/third_party/tracy/tracy/Tracy.hpp +++ /dev/null @@ -1,254 +0,0 @@ -#ifndef __TRACY_HPP__ -#define __TRACY_HPP__ - -#include "../common/TracyColor.hpp" -#include "../common/TracySystem.hpp" - -#ifndef TracyFunction -# define TracyFunction __FUNCTION__ -#endif - -#ifndef TracyFile -# define TracyFile __FILE__ -#endif - -#ifndef TracyLine -# define TracyLine __LINE__ -#endif - -#ifndef TRACY_ENABLE - -#define TracyNoop - -#define ZoneNamed(x,y) -#define ZoneNamedN(x,y,z) -#define ZoneNamedC(x,y,z) -#define ZoneNamedNC(x,y,z,w) - -#define ZoneTransient(x,y) -#define ZoneTransientN(x,y,z) - -#define ZoneScoped -#define ZoneScopedN(x) -#define ZoneScopedC(x) -#define ZoneScopedNC(x,y) - -#define ZoneText(x,y) -#define ZoneTextV(x,y,z) -#define ZoneTextF(x,...) -#define ZoneTextVF(x,y,...) -#define ZoneName(x,y) -#define ZoneNameV(x,y,z) -#define ZoneNameF(x,...) -#define ZoneNameVF(x,y,...) -#define ZoneColor(x) -#define ZoneColorV(x,y) -#define ZoneValue(x) -#define ZoneValueV(x,y) -#define ZoneIsActive false -#define ZoneIsActiveV(x) false - -#define FrameMark -#define FrameMarkNamed(x) -#define FrameMarkStart(x) -#define FrameMarkEnd(x) - -#define FrameImage(x,y,z,w,a) - -#define TracyLockable( type, varname ) type varname -#define TracyLockableN( type, varname, desc ) type varname -#define TracySharedLockable( type, varname ) type varname -#define TracySharedLockableN( type, varname, desc ) type varname -#define LockableBase( type ) type -#define SharedLockableBase( type ) type -#define LockMark(x) (void)x -#define LockableName(x,y,z) - -#define TracyPlot(x,y) -#define TracyPlotConfig(x,y,z,w,a) - -#define TracyMessage(x,y) -#define TracyMessageL(x) -#define TracyMessageC(x,y,z) -#define TracyMessageLC(x,y) -#define TracyAppInfo(x,y) - -#define TracyAlloc(x,y) -#define TracyFree(x) -#define TracyMemoryDiscard(x) -#define TracySecureAlloc(x,y) -#define TracySecureFree(x) -#define TracySecureMemoryDiscard(x) - -#define TracyAllocN(x,y,z) -#define TracyFreeN(x,y) -#define TracySecureAllocN(x,y,z) -#define TracySecureFreeN(x,y) - -#define ZoneNamedS(x,y,z) -#define ZoneNamedNS(x,y,z,w) -#define ZoneNamedCS(x,y,z,w) -#define ZoneNamedNCS(x,y,z,w,a) - -#define ZoneTransientS(x,y,z) -#define ZoneTransientNS(x,y,z,w) - -#define ZoneScopedS(x) -#define ZoneScopedNS(x,y) -#define ZoneScopedCS(x,y) -#define ZoneScopedNCS(x,y,z) - -#define TracyAllocS(x,y,z) -#define TracyFreeS(x,y) -#define TracyMemoryDiscardS(x,y) -#define TracySecureAllocS(x,y,z) -#define TracySecureFreeS(x,y) -#define TracySecureMemoryDiscardS(x,y) - -#define TracyAllocNS(x,y,z,w) -#define TracyFreeNS(x,y,z) -#define TracySecureAllocNS(x,y,z,w) -#define TracySecureFreeNS(x,y,z) - -#define TracyMessageS(x,y,z) -#define TracyMessageLS(x,y) -#define TracyMessageCS(x,y,z,w) -#define TracyMessageLCS(x,y,z) - -#define TracySourceCallbackRegister(x,y) -#define TracyParameterRegister(x,y) -#define TracyParameterSetup(x,y,z,w) -#define TracyIsConnected false -#define TracyIsStarted false -#define TracySetProgramName(x) - -#define TracyFiberEnter(x) -#define TracyFiberEnterHint(x,y) -#define TracyFiberLeave - -#else - -#include - -#include "../client/TracyLock.hpp" -#include "../client/TracyProfiler.hpp" -#include "../client/TracyScoped.hpp" - -#ifndef TRACY_CALLSTACK -#define TRACY_CALLSTACK 0 -#endif - -#define TracyNoop tracy::ProfilerAvailable() - -#define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) -#define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) -#define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) -#define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) - -#define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, TRACY_CALLSTACK, active ) -#define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active ) -#define ZoneTransientNC( varname, name, color, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), color, TRACY_CALLSTACK, active ) - -#define ZoneScoped ZoneNamed( ___tracy_scoped_zone, true ) -#define ZoneScopedN( name ) ZoneNamedN( ___tracy_scoped_zone, name, true ) -#define ZoneScopedC( color ) ZoneNamedC( ___tracy_scoped_zone, color, true ) -#define ZoneScopedNC( name, color ) ZoneNamedNC( ___tracy_scoped_zone, name, color, true ) - -#define ZoneText( txt, size ) ___tracy_scoped_zone.Text( txt, size ) -#define ZoneTextV( varname, txt, size ) varname.Text( txt, size ) -#define ZoneTextF( fmt, ... ) ___tracy_scoped_zone.TextFmt( fmt, ##__VA_ARGS__ ) -#define ZoneTextVF( varname, fmt, ... ) varname.TextFmt( fmt, ##__VA_ARGS__ ) -#define ZoneName( txt, size ) ___tracy_scoped_zone.Name( txt, size ) -#define ZoneNameV( varname, txt, size ) varname.Name( txt, size ) -#define ZoneNameF( fmt, ... ) ___tracy_scoped_zone.NameFmt( fmt, ##__VA_ARGS__ ) -#define ZoneNameVF( varname, fmt, ... ) varname.NameFmt( fmt, ##__VA_ARGS__ ) -#define ZoneColor( color ) ___tracy_scoped_zone.Color( color ) -#define ZoneColorV( varname, color ) varname.Color( color ) -#define ZoneValue( value ) ___tracy_scoped_zone.Value( value ) -#define ZoneValueV( varname, value ) varname.Value( value ) -#define ZoneIsActive ___tracy_scoped_zone.IsActive() -#define ZoneIsActiveV( varname ) varname.IsActive() - -#define FrameMark tracy::Profiler::SendFrameMark( nullptr ) -#define FrameMarkNamed( name ) tracy::Profiler::SendFrameMark( name ) -#define FrameMarkStart( name ) tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart ) -#define FrameMarkEnd( name ) tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd ) - -#define FrameImage( image, width, height, offset, flip ) tracy::Profiler::SendFrameImage( image, width, height, offset, flip ) - -#define TracyLockable( type, varname ) tracy::Lockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, TracyFile, TracyLine, 0 }; return &srcloc; }() } -#define TracyLockableN( type, varname, desc ) tracy::Lockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, TracyFile, TracyLine, 0 }; return &srcloc; }() } -#define TracySharedLockable( type, varname ) tracy::SharedLockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, TracyFile, TracyLine, 0 }; return &srcloc; }() } -#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, TracyFile, TracyLine, 0 }; return &srcloc; }() } -#define LockableBase( type ) tracy::Lockable -#define SharedLockableBase( type ) tracy::SharedLockable -#define LockMark( varname ) static constexpr tracy::SourceLocationData __tracy_lock_location_##varname { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; varname.Mark( &__tracy_lock_location_##varname ) -#define LockableName( varname, txt, size ) varname.CustomName( txt, size ) - -#define TracyPlot( name, val ) tracy::Profiler::PlotData( name, val ) -#define TracyPlotConfig( name, type, step, fill, color ) tracy::Profiler::ConfigurePlot( name, type, step, fill, color ) - -#define TracyAppInfo( txt, size ) tracy::Profiler::MessageAppInfo( txt, size ) - -#define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, TRACY_CALLSTACK ) -#define TracyMessageL( txt ) tracy::Profiler::Message( txt, TRACY_CALLSTACK ) -#define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, TRACY_CALLSTACK ) -#define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK ) - -#define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, false ) -#define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, false ) -#define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, true ) -#define TracySecureFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, true ) - -#define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, false, name ) -#define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, false, name ) -#define TracyMemoryDiscard( name ) tracy::Profiler::MemDiscardCallstack( name, false, TRACY_CALLSTACK ) -#define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, true, name ) -#define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, true, name ) -#define TracySecureMemoryDiscard( name ) tracy::Profiler::MemDiscardCallstack( name, true, TRACY_CALLSTACK ) - -#define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) -#define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) -#define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) -#define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) - -#define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, depth, active ) -#define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active ) - -#define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true ) -#define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true ) -#define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true ) -#define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color, depth, true ) - -#define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, false ) -#define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, false ) -#define TracySecureAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, true ) -#define TracySecureFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, true ) - -#define TracyAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, false, name ) -#define TracyFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, false, name ) -#define TracyMemoryDiscardS( name, depth ) tracy::Profiler::MemDiscardCallstack( name, false, depth ) -#define TracySecureAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, true, name ) -#define TracySecureFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, true, name ) -#define TracySecureMemoryDiscardS( name, depth ) tracy::Profiler::MemDiscardCallstack( name, true, depth ) - -#define TracyMessageS( txt, size, depth ) tracy::Profiler::Message( txt, size, depth ) -#define TracyMessageLS( txt, depth ) tracy::Profiler::Message( txt, depth ) -#define TracyMessageCS( txt, size, color, depth ) tracy::Profiler::MessageColor( txt, size, color, depth ) -#define TracyMessageLCS( txt, color, depth ) tracy::Profiler::MessageColor( txt, color, depth ) - -#define TracySourceCallbackRegister( cb, data ) tracy::Profiler::SourceCallbackRegister( cb, data ) -#define TracyParameterRegister( cb, data ) tracy::Profiler::ParameterRegister( cb, data ) -#define TracyParameterSetup( idx, name, isBool, val ) tracy::Profiler::ParameterSetup( idx, name, isBool, val ) -#define TracyIsConnected tracy::GetProfiler().IsConnected() -#define TracySetProgramName( name ) tracy::GetProfiler().SetProgramName( name ); - -#ifdef TRACY_FIBERS -# define TracyFiberEnter( fiber ) tracy::Profiler::EnterFiber( fiber, 0 ) -# define TracyFiberEnterHint( fiber, groupHint ) tracy::Profiler::EnterFiber( fiber, groupHint ) -# define TracyFiberLeave tracy::Profiler::LeaveFiber() -#endif - -#endif - -#endif diff --git a/src/third_party/tracy/tracy/TracyC.h b/src/third_party/tracy/tracy/TracyC.h deleted file mode 100644 index 58d3e56d..00000000 --- a/src/third_party/tracy/tracy/TracyC.h +++ /dev/null @@ -1,441 +0,0 @@ -#ifndef __TRACYC_HPP__ -#define __TRACYC_HPP__ - -#include -#include - -#include "../common/TracyApi.h" - -#ifdef __cplusplus -extern "C" { -#endif - -enum TracyPlotFormatEnum -{ - TracyPlotFormatNumber, - TracyPlotFormatMemory, - TracyPlotFormatPercentage, - TracyPlotFormatWatt -}; - -TRACY_API void ___tracy_set_thread_name( const char* name ); - -#define TracyCSetThreadName( name ) ___tracy_set_thread_name( name ); - -#ifndef TracyFunction -# define TracyFunction __FUNCTION__ -#endif - -#ifndef TracyFile -# define TracyFile __FILE__ -#endif - -#ifndef TracyLine -# define TracyLine __LINE__ -#endif - -#ifndef TRACY_ENABLE - -typedef const void* TracyCZoneCtx; - -struct TracyCLockCtx; -struct TracyCSharedLockCtx; -struct TracyCD3D11Ctx; - -#define TracyCZone(c,x) -#define TracyCZoneN(c,x,y) -#define TracyCZoneC(c,x,y) -#define TracyCZoneNC(c,x,y,z) -#define TracyCZoneEnd(c) -#define TracyCZoneText(c,x,y) -#define TracyCZoneName(c,x,y) -#define TracyCZoneColor(c,x) -#define TracyCZoneValue(c,x) - -#define TracyCAlloc(x,y) -#define TracyCFree(x) -#define TracyCMemoryDiscard(x) -#define TracyCSecureAlloc(x,y) -#define TracyCSecureFree(x) -#define TracyCSecureMemoryDiscard(x) - -#define TracyCAllocN(x,y,z) -#define TracyCFreeN(x,y) -#define TracyCSecureAllocN(x,y,z) -#define TracyCSecureFreeN(x,y) - -#define TracyCFrameMark -#define TracyCFrameMarkNamed(x) -#define TracyCFrameMarkStart(x) -#define TracyCFrameMarkEnd(x) -#define TracyCFrameImage(x,y,z,w,a) - -#define TracyCPlot(x,y) -#define TracyCPlotF(x,y) -#define TracyCPlotI(x,y) -#define TracyCPlotConfig(x,y,z,w,a) - -#define TracyCMessage(x,y) -#define TracyCMessageL(x) -#define TracyCMessageC(x,y,z) -#define TracyCMessageLC(x,y) -#define TracyCAppInfo(x,y) - -#define TracyCZoneS(x,y,z) -#define TracyCZoneNS(x,y,z,w) -#define TracyCZoneCS(x,y,z,w) -#define TracyCZoneNCS(x,y,z,w,a) - -#define TracyCAllocS(x,y,z) -#define TracyCFreeS(x,y) -#define TracyCMemoryDiscardS(x,y) -#define TracyCSecureAllocS(x,y,z) -#define TracyCSecureFreeS(x,y) -#define TracyCSecureMemoryDiscardS(x,y) - -#define TracyCAllocNS(x,y,z,w) -#define TracyCFreeNS(x,y,z) -#define TracyCSecureAllocNS(x,y,z,w) -#define TracyCSecureFreeNS(x,y,z) - -#define TracyCMessageS(x,y,z) -#define TracyCMessageLS(x,y) -#define TracyCMessageCS(x,y,z,w) -#define TracyCMessageLCS(x,y,z) - -#define TracyCLockCtx(l) -#define TracyCLockAnnounce(l) -#define TracyCLockTerminate(l) -#define TracyCLockBeforeLock(l) -#define TracyCLockAfterLock(l) -#define TracyCLockAfterUnlock(l) -#define TracyCLockAfterTryLock(l,x) -#define TracyCLockMark(l) -#define TracyCLockCustomName(l,x,y) - -#define TracyCSharedLockCtx(l) -#define TracyCSharedLockAnnounce(l) -#define TracyCSharedLockTerminate(l) -#define TracyCSharedLockBeforeExclusiveLock(l) -#define TracyCSharedLockAfterExclusiveLock(l) -#define TracyCSharedLockAfterExclusiveUnl(l) -#define TracyCSharedLockAfterTryExclusiveLock(l,x) -#define TracyCSharedLockBeforeSharedLock(l) -#define TracyCSharedLockAfterSharedLock(l) -#define TracyCSharedLockAfterSharedUnl(l) -#define TracyCSharedLockAfterTrySharedLock(l,x) -#define TracyCSharedLockMark(l) -#define TracyCSharedLockCustomName(l,x,y) - -#define TracyCIsConnected 0 -#define TracyCIsStarted 0 - -#ifdef TRACY_FIBERS -# define TracyCFiberEnter(fiber) -# define TracyCFiberLeave -#endif - -#else - -#ifndef TracyConcat -# define TracyConcat(x,y) TracyConcatIndirect(x,y) -#endif -#ifndef TracyConcatIndirect -# define TracyConcatIndirect(x,y) x##y -#endif - -struct ___tracy_source_location_data -{ - const char* name; - const char* function; - const char* file; - uint32_t line; - uint32_t color; -}; - -struct ___tracy_c_zone_context -{ - uint32_t id; - int32_t active; -}; - -struct ___tracy_gpu_time_data -{ - int64_t gpuTime; - uint16_t queryId; - uint8_t context; -}; - -struct ___tracy_gpu_zone_begin_data { - uint64_t srcloc; - uint16_t queryId; - uint8_t context; -}; - -struct ___tracy_gpu_zone_begin_callstack_data { - uint64_t srcloc; - int32_t depth; - uint16_t queryId; - uint8_t context; -}; - -struct ___tracy_gpu_zone_end_data { - uint16_t queryId; - uint8_t context; -}; - -struct ___tracy_gpu_new_context_data { - int64_t gpuTime; - float period; - uint8_t context; - uint8_t flags; - uint8_t type; -}; - -struct ___tracy_gpu_context_name_data { - uint8_t context; - const char* name; - uint16_t len; -}; - -struct ___tracy_gpu_calibration_data { - int64_t gpuTime; - int64_t cpuDelta; - uint8_t context; -}; - -struct ___tracy_gpu_time_sync_data { - int64_t gpuTime; - uint8_t context; -}; - -#define TRACY_C_D3D11_ZONE_CONTEXT_SIZE 16 -#define TRACY_C_D3D11_ZONE_CONTEXT_ALIGN 8 -struct ___tracy_c_d3d11_zone_context { - _Alignas(TRACY_C_D3D11_ZONE_CONTEXT_ALIGN) char opaque[TRACY_C_D3D11_ZONE_CONTEXT_SIZE]; -}; - -// Some containers don't support storing const types. -// This struct, as visible to user, is immutable, so treat it as if const was declared here. -typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx; -typedef /*const*/ struct ___tracy_c_d3d11_zone_context TracyCD3D11ZoneCtx; - -struct TracyCLockCtx; -struct TracyCSharedLockCtx; -struct TracyCD3D11Ctx; - -#ifdef TRACY_MANUAL_LIFETIME -TRACY_API void ___tracy_startup_profiler(void); -TRACY_API void ___tracy_shutdown_profiler(void); -TRACY_API int32_t ___tracy_profiler_started(void); - -# define TracyCIsStarted ___tracy_profiler_started() -#else -# define TracyCIsStarted 1 -#endif - -TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, uint32_t color ); -TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color ); - -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int32_t active ); -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int32_t depth, int32_t active ); -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int32_t active ); -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int32_t depth, int32_t active ); -TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx ); -TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size ); -TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size ); -TRACY_API void ___tracy_emit_zone_color( TracyCZoneCtx ctx, uint32_t color ); -TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value ); - -TRACY_API void ___tracy_emit_gpu_zone_begin( const struct ___tracy_gpu_zone_begin_data ); -TRACY_API void ___tracy_emit_gpu_zone_begin_callstack( const struct ___tracy_gpu_zone_begin_callstack_data ); -TRACY_API void ___tracy_emit_gpu_zone_begin_alloc( const struct ___tracy_gpu_zone_begin_data ); -TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack( const struct ___tracy_gpu_zone_begin_callstack_data ); -TRACY_API void ___tracy_emit_gpu_zone_end( const struct ___tracy_gpu_zone_end_data data ); -TRACY_API void ___tracy_emit_gpu_time( const struct ___tracy_gpu_time_data ); -TRACY_API void ___tracy_emit_gpu_new_context( const struct ___tracy_gpu_new_context_data ); -TRACY_API void ___tracy_emit_gpu_context_name( const struct ___tracy_gpu_context_name_data ); -TRACY_API void ___tracy_emit_gpu_calibration( const struct ___tracy_gpu_calibration_data ); -TRACY_API void ___tracy_emit_gpu_time_sync( const struct ___tracy_gpu_time_sync_data ); - -TRACY_API void ___tracy_emit_gpu_zone_begin_serial( const struct ___tracy_gpu_zone_begin_data ); -TRACY_API void ___tracy_emit_gpu_zone_begin_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data ); -TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_serial( const struct ___tracy_gpu_zone_begin_data ); -TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data ); -TRACY_API void ___tracy_emit_gpu_zone_end_serial( const struct ___tracy_gpu_zone_end_data data ); -TRACY_API void ___tracy_emit_gpu_time_serial( const struct ___tracy_gpu_time_data ); -TRACY_API void ___tracy_emit_gpu_new_context_serial( const struct ___tracy_gpu_new_context_data ); -TRACY_API void ___tracy_emit_gpu_context_name_serial( const struct ___tracy_gpu_context_name_data ); -TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_calibration_data ); -TRACY_API void ___tracy_emit_gpu_time_sync_serial( const struct ___tracy_gpu_time_sync_data ); - -TRACY_API int32_t ___tracy_connected(void); - -#ifndef TRACY_CALLSTACK -#define TRACY_CALLSTACK 0 -#endif - -#define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); -#define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); -#define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); -#define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); - -#define TracyCZoneEnd( ctx ) ___tracy_emit_zone_end( ctx ); - -#define TracyCZoneText( ctx, txt, size ) ___tracy_emit_zone_text( ctx, txt, size ); -#define TracyCZoneName( ctx, txt, size ) ___tracy_emit_zone_name( ctx, txt, size ); -#define TracyCZoneColor( ctx, color ) ___tracy_emit_zone_color( ctx, color ); -#define TracyCZoneValue( ctx, value ) ___tracy_emit_zone_value( ctx, value ); - - -TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int32_t secure ); -TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int32_t depth, int32_t secure ); -TRACY_API void ___tracy_emit_memory_free( const void* ptr, int32_t secure ); -TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int32_t depth, int32_t secure ); -TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int32_t secure, const char* name ); -TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int32_t depth, int32_t secure, const char* name ); -TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int32_t secure, const char* name ); -TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int32_t depth, int32_t secure, const char* name ); -TRACY_API void ___tracy_emit_memory_discard( const char* name, int32_t secure ); -TRACY_API void ___tracy_emit_memory_discard_callstack( const char* name, int32_t secure, int32_t depth ); - -TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int32_t callstack_depth ); -TRACY_API void ___tracy_emit_messageL( const char* txt, int32_t callstack_depth ); -TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int32_t callstack_depth ); -TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int32_t callstack_depth ); - -#define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 0 ) -#define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 0 ) -#define TracyCMemoryDiscard( name ) ___tracy_emit_memory_discard_callstack( name, 0, TRACY_CALLSTACK ); -#define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 1 ) -#define TracyCSecureFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 1 ) -#define TracyCSecureMemoryDiscard( name ) ___tracy_emit_memory_discard_callstack( name, 1, TRACY_CALLSTACK ); - -#define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 0, name ) -#define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 0, name ) -#define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 1, name ) -#define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 1, name ) - -#define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, TRACY_CALLSTACK ); -#define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, TRACY_CALLSTACK ); -#define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, TRACY_CALLSTACK ); -#define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, TRACY_CALLSTACK ); - - -TRACY_API void ___tracy_emit_frame_mark( const char* name ); -TRACY_API void ___tracy_emit_frame_mark_start( const char* name ); -TRACY_API void ___tracy_emit_frame_mark_end( const char* name ); -TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int32_t flip ); - -#define TracyCFrameMark ___tracy_emit_frame_mark( 0 ); -#define TracyCFrameMarkNamed( name ) ___tracy_emit_frame_mark( name ); -#define TracyCFrameMarkStart( name ) ___tracy_emit_frame_mark_start( name ); -#define TracyCFrameMarkEnd( name ) ___tracy_emit_frame_mark_end( name ); -#define TracyCFrameImage( image, width, height, offset, flip ) ___tracy_emit_frame_image( image, width, height, offset, flip ); - - -TRACY_API void ___tracy_emit_plot( const char* name, double val ); -TRACY_API void ___tracy_emit_plot_float( const char* name, float val ); -TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val ); -TRACY_API void ___tracy_emit_plot_config( const char* name, int32_t type, int32_t step, int32_t fill, uint32_t color ); -TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ); - -#define TracyCPlot( name, val ) ___tracy_emit_plot( name, val ); -#define TracyCPlotF( name, val ) ___tracy_emit_plot_float( name, val ); -#define TracyCPlotI( name, val ) ___tracy_emit_plot_int( name, val ); -#define TracyCPlotConfig( name, type, step, fill, color ) ___tracy_emit_plot_config( name, type, step, fill, color ); -#define TracyCAppInfo( txt, size ) ___tracy_emit_message_appinfo( txt, size ); - - -#define TracyCZoneS( ctx, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); -#define TracyCZoneNS( ctx, name, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); -#define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); -#define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); - -#define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 0 ) -#define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 0 ) -#define TracyCMemoryDiscardS( name, depth ) ___tracy_emit_memory_discard_callstack( name, 0, depth ) -#define TracyCSecureAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 1 ) -#define TracyCSecureFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 1 ) -#define TracyCSecureMemoryDiscardS( name, depth ) ___tracy_emit_memory_discard_callstack( name, 1, depth ) - -#define TracyCAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 0, name ) -#define TracyCFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 0, name ) -#define TracyCSecureAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 1, name ) -#define TracyCSecureFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 1, name ) - -#define TracyCMessageS( txt, size, depth ) ___tracy_emit_message( txt, size, depth ); -#define TracyCMessageLS( txt, depth ) ___tracy_emit_messageL( txt, depth ); -#define TracyCMessageCS( txt, size, color, depth ) ___tracy_emit_messageC( txt, size, color, depth ); -#define TracyCMessageLCS( txt, color, depth ) ___tracy_emit_messageLC( txt, color, depth ); - - -TRACY_API struct TracyCLockCtx *___tracy_announce_lockable_ctx(const struct ___tracy_source_location_data *srcloc); -TRACY_API void ___tracy_terminate_lockable_ctx(struct TracyCLockCtx *lockdata); -TRACY_API int32_t ___tracy_before_lock_lockable_ctx(struct TracyCLockCtx *lockdata); -TRACY_API void ___tracy_after_lock_lockable_ctx(struct TracyCLockCtx *lockdata); -TRACY_API void ___tracy_after_unlock_lockable_ctx(struct TracyCLockCtx *lockdata); -TRACY_API void ___tracy_after_try_lock_lockable_ctx(struct TracyCLockCtx *lockdata, int32_t acquired); -TRACY_API void ___tracy_mark_lockable_ctx(struct TracyCLockCtx *lockdata, const struct ___tracy_source_location_data *srcloc); -TRACY_API void ___tracy_custom_name_lockable_ctx(struct TracyCLockCtx *lockdata, const char* name, size_t nameSz ); - -#define TracyCLockAnnounce( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; lock = ___tracy_announce_lockable_ctx( &TracyConcat(__tracy_source_location,TracyLine) ); -#define TracyCLockTerminate( lock ) ___tracy_terminate_lockable_ctx( lock ); -#define TracyCLockBeforeLock( lock ) ___tracy_before_lock_lockable_ctx( lock ); -#define TracyCLockAfterLock( lock ) ___tracy_after_lock_lockable_ctx( lock ); -#define TracyCLockAfterUnlock( lock ) ___tracy_after_unlock_lockable_ctx( lock ); -#define TracyCLockAfterTryLock( lock, acquired ) ___tracy_after_try_lock_lockable_ctx( lock, acquired ); -#define TracyCLockMark( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; ___tracy_mark_lockable_ctx( lock, &TracyConcat(__tracy_source_location,TracyLine) ); -#define TracyCLockCustomName( lock, name, nameSz ) ___tracy_custom_name_lockable_ctx( lock, name, nameSz ); - -TRACY_API struct TracyCSharedLockCtx *___tracy_announce_shared_lockable_ctx(const struct ___tracy_source_location_data *srcloc); -TRACY_API void ___tracy_terminate_shared_lockable_ctx(struct TracyCSharedLockCtx *lockdata); -TRACY_API int32_t ___tracy_before_exclusive_lock_shared_lockable_ctx(struct TracyCSharedLockCtx *lockdata); -TRACY_API void ___tracy_after_exclusive_lock_shared_lockable_ctx(struct TracyCSharedLockCtx *lockdata); -TRACY_API void ___tracy_after_exclusive_unlock_shared_lockable_ctx(struct TracyCSharedLockCtx *lockdata); -TRACY_API void ___tracy_after_try_exclusive_lock_shared_lockable_ctx(struct TracyCSharedLockCtx *lockdata, int32_t acquired); -TRACY_API int32_t ___tracy_before_shared_lock_shared_lockable_ctx(struct TracyCSharedLockCtx *lockdata); -TRACY_API void ___tracy_after_shared_lock_shared_lockable_ctx(struct TracyCSharedLockCtx *lockdata); -TRACY_API void ___tracy_after_shared_unlock_shared_lockable_ctx(struct TracyCSharedLockCtx *lockdata); -TRACY_API void ___tracy_after_try_shared_lock_shared_lockable_ctx(struct TracyCSharedLockCtx *lockdata, int32_t acquired); -TRACY_API void ___tracy_mark_shared_lockable_ctx(struct TracyCSharedLockCtx *lockdata, const struct ___tracy_source_location_data *srcloc); -TRACY_API void ___tracy_custom_name_shared_lockable_ctx(struct TracyCSharedLockCtx *lockdata, const char *name, size_t nameSz); - -#define TracyCSharedLockAnnounce( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; lock = ___tracy_announce_shared_lockable_ctx( &TracyConcat(__tracy_source_location,TracyLine) ); -#define TracyCSharedLockTerminate( lock ) ___tracy_terminate_shared_lockable_ctx( lock ); -#define TracyCSharedLockBeforeExclusiveLock( lock ) ___tracy_before_exclusive_lock_shared_lockable_ctx( lock ); -#define TracyCSharedLockAfterExclusiveLock( lock ) ___tracy_after_exclusive_lock_shared_lockable_ctx( lock ); -#define TracyCSharedLockAfterExclusiveUnlock( lock ) ___tracy_after_exclusive_unlock_shared_lockable_ctx( lock ); -#define TracyCSharedLockAfterTryExclusiveLock( lock, acquired ) ___tracy_after_try_exclusive_lock_shared_lockable_ctx( lock, acquired ); -#define TracyCSharedLockBeforeSharedLock( lock ) ___tracy_before_shared_lock_shared_lockable_ctx( lock ); -#define TracyCSharedLockAfterSharedLock( lock ) ___tracy_after_shared_lock_shared_lockable_ctx( lock ); -#define TracyCSharedLockAfterSharedUnlock( lock ) ___tracy_after_shared_unlock_shared_lockable_ctx( lock ); -#define TracyCSharedLockAfterTrySharedLock( lock, acquired ) ___tracy_after_try_shared_lock_shared_lockable_ctx( lock, acquired ); -#define TracyCSharedLockMark( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; ___tracy_mark_shared_lockable_ctx( lock, &TracyConcat(__tracy_source_location,TracyLine) ); -#define TracyCSharedLockCustomName( lock, name, nameSz ) ___tracy_custom_name_shared_lockable_ctx( lock, name, nameSz ); - -struct ID3D11Device; -struct ID3D11DeviceContext; -TRACY_API struct TracyCD3D11Ctx *___tracy_d3d11_context_announce(struct ID3D11Device *device, struct ID3D11DeviceContext *devicectx, char *name, int name_size); -TRACY_API void ___tracy_d3d11_context_terminate(struct TracyCD3D11Ctx *d3d11_ctx); -TRACY_API void ___tracy_d3d11_context_collect(struct TracyCD3D11Ctx *d3d11_ctx); -TRACY_API void ___tracy_d3d11_emit_zone_begin(struct TracyCD3D11Ctx *d3d11_ctx, TracyCD3D11ZoneCtx *zone_ctx, struct ___tracy_source_location_data *srcloc, int32_t active); -TRACY_API void ___tracy_d3d11_emit_zone_end(TracyCD3D11ZoneCtx zone_ctx); - -#define TracyCIsConnected ___tracy_connected() - -#ifdef TRACY_FIBERS -TRACY_API void ___tracy_fiber_enter( const char* fiber ); -TRACY_API void ___tracy_fiber_leave( void ); - -# define TracyCFiberEnter( fiber ) ___tracy_fiber_enter( fiber ); -# define TracyCFiberLeave ___tracy_fiber_leave(); -#endif - -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/third_party/tracy/tracy/TracyD3D11.hpp b/src/third_party/tracy/tracy/TracyD3D11.hpp deleted file mode 100644 index 6e0c756e..00000000 --- a/src/third_party/tracy/tracy/TracyD3D11.hpp +++ /dev/null @@ -1,446 +0,0 @@ -#ifndef __TRACYD3D11_HPP__ -#define __TRACYD3D11_HPP__ - -#ifndef TRACY_ENABLE - -#define TracyD3D11Context(device,queue) nullptr -#define TracyD3D11Destroy(ctx) -#define TracyD3D11ContextName(ctx, name, size) - -#define TracyD3D11NewFrame(ctx) - -#define TracyD3D11Zone(ctx, name) -#define TracyD3D11ZoneC(ctx, name, color) -#define TracyD3D11NamedZone(ctx, varname, name, active) -#define TracyD3D11NamedZoneC(ctx, varname, name, color, active) -#define TracyD3D11ZoneTransient(ctx, varname, name, active) - -#define TracyD3D11ZoneS(ctx, name, depth) -#define TracyD3D11ZoneCS(ctx, name, color, depth) -#define TracyD3D11NamedZoneS(ctx, varname, name, depth, active) -#define TracyD3D11NamedZoneCS(ctx, varname, name, color, depth, active) -#define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) - -#define TracyD3D11Collect(ctx) - -namespace tracy -{ -class D3D11ZoneScope {}; -} - -using TracyD3D11Ctx = void*; - -#else - -#include -#include -#include - -#include "Tracy.hpp" -#include "../client/TracyProfiler.hpp" -#include "../client/TracyCallstack.hpp" -#include "../common/TracyYield.hpp" - -#include - -#define TracyD3D11Panic(msg, ...) do { assert(false && "TracyD3D11: " msg); TracyMessageLC("TracyD3D11: " msg, tracy::Color::Red4); __VA_ARGS__; } while(false); - -namespace tracy -{ - -class D3D11Ctx -{ - friend class D3D11ZoneScope; - - static constexpr uint32_t MaxQueries = 64 * 1024; - - enum CollectMode { POLL, BLOCK }; - -public: - D3D11Ctx( ID3D11Device* device, ID3D11DeviceContext* devicectx ) - { - // TODO: consider calling ID3D11Device::GetImmediateContext() instead of passing it as an argument - m_device = device; - device->AddRef(); - m_immediateDevCtx = devicectx; - devicectx->AddRef(); - - { - D3D11_QUERY_DESC desc = { }; - desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT; - if (FAILED(m_device->CreateQuery(&desc, &m_disjointQuery))) - { - TracyD3D11Panic("unable to create disjoint timestamp query.", return); - } - } - - for (ID3D11Query*& query : m_queries) - { - D3D11_QUERY_DESC desc = { }; - desc.Query = D3D11_QUERY_TIMESTAMP; - if (FAILED(m_device->CreateQuery(&desc, &query))) - { - TracyD3D11Panic("unable to create timestamp query.", return); - } - } - - // Calibrate CPU and GPU timestamps - int64_t tcpu = 0; - int64_t tgpu = 0; - for (int attempts = 0; attempts < 50; attempts++) - { - m_immediateDevCtx->Begin(m_disjointQuery); - m_immediateDevCtx->End(m_queries[0]); - m_immediateDevCtx->End(m_disjointQuery); - - int64_t tcpu0 = Profiler::GetTime(); - WaitForQuery(m_disjointQuery); - int64_t tcpu1 = Profiler::GetTime(); - - D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint = { }; - if (m_immediateDevCtx->GetData(m_disjointQuery, &disjoint, sizeof(disjoint), 0) != S_OK) - { - TracyMessageLC("TracyD3D11: unable to query GPU timestamp; retrying...", tracy::Color::Tomato); - continue; - } - - if (disjoint.Disjoint) - continue; - - UINT64 timestamp = 0; - if (m_immediateDevCtx->GetData(m_queries[0], ×tamp, sizeof(timestamp), 0) != S_OK) - continue; // this should never happen, since the enclosing disjoint query succeeded - - tcpu = tcpu0 + (tcpu1 - tcpu0) * 1 / 2; - tgpu = timestamp * (1000000000 / disjoint.Frequency); - break; - } - - // ready to roll - m_contextId = GetGpuCtxCounter().fetch_add(1); - m_immediateDevCtx->Begin(m_disjointQuery); - m_previousCheckpoint = m_nextCheckpoint = 0; - - auto* item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::GpuNewContext ); - MemWrite( &item->gpuNewContext.cpuTime, tcpu ); - MemWrite( &item->gpuNewContext.gpuTime, tgpu ); - MemWrite( &item->gpuNewContext.thread, uint32_t(0) ); // #TODO: why not GetThreadHandle()? - MemWrite( &item->gpuNewContext.period, 1.0f ); - MemWrite( &item->gpuNewContext.context, m_contextId); - MemWrite( &item->gpuNewContext.flags, uint8_t(0) ); - MemWrite( &item->gpuNewContext.type, GpuContextType::Direct3D11 ); - -#ifdef TRACY_ON_DEMAND - GetProfiler().DeferItem( *item ); -#endif - - Profiler::QueueSerialFinish(); - } - - ~D3D11Ctx() - { - // collect all pending timestamps before destroying everything - do - { - Collect(BLOCK); - } while (m_previousCheckpoint != m_queryCounter); - - for (ID3D11Query* query : m_queries) - { - query->Release(); - } - m_immediateDevCtx->End(m_disjointQuery); - m_disjointQuery->Release(); - m_immediateDevCtx->Release(); - m_device->Release(); - } - - void Name( const char* name, uint16_t len ) - { - auto ptr = (char*)tracy_malloc( len ); - memcpy( ptr, name, len ); - - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::GpuContextName ); - MemWrite( &item->gpuContextNameFat.context, m_contextId ); - MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); - MemWrite( &item->gpuContextNameFat.size, len ); -#ifdef TRACY_ON_DEMAND - GetProfiler().DeferItem( *item ); -#endif - Profiler::QueueSerialFinish(); - } - - void Collect(CollectMode mode = POLL) - { - ZoneScopedC( Color::Red4 ); - -#ifdef TRACY_ON_DEMAND - if( !GetProfiler().IsConnected() ) - { - m_previousCheckpoint = m_nextCheckpoint = m_queryCounter; - return; - } -#endif - - if (m_previousCheckpoint == m_nextCheckpoint) - { - uintptr_t nextCheckpoint = m_queryCounter; - if (nextCheckpoint == m_nextCheckpoint) - { - return; - } - m_nextCheckpoint = nextCheckpoint; - m_immediateDevCtx->End(m_disjointQuery); - } - - if (mode == CollectMode::BLOCK) - { - WaitForQuery(m_disjointQuery); - } - - D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint = { }; - if (m_immediateDevCtx->GetData(m_disjointQuery, &disjoint, sizeof(disjoint), D3D11_ASYNC_GETDATA_DONOTFLUSH) != S_OK) - { - return; - } - - if (disjoint.Disjoint == TRUE) - { - m_previousCheckpoint = m_nextCheckpoint; - TracyD3D11Panic("disjoint timestamps detected; dropping."); - return; - } - - auto begin = m_previousCheckpoint; - auto end = m_nextCheckpoint; - for (auto i = begin; i != end; ++i) - { - uint32_t k = RingIndex(i); - UINT64 timestamp = 0; - if (m_immediateDevCtx->GetData(m_queries[k], ×tamp, sizeof(timestamp), 0) != S_OK) - { - TracyD3D11Panic("timestamp expected to be ready, but it was not!"); - break; - } - timestamp *= (1000000000ull / disjoint.Frequency); - auto* item = Profiler::QueueSerial(); - MemWrite(&item->hdr.type, QueueType::GpuTime); - MemWrite(&item->gpuTime.gpuTime, static_cast(timestamp)); - MemWrite(&item->gpuTime.queryId, static_cast(k)); - MemWrite(&item->gpuTime.context, m_contextId); - Profiler::QueueSerialFinish(); - } - - // disjoint timestamp queries should only be invoked once per frame or less - // https://learn.microsoft.com/en-us/windows/win32/api/d3d11/ne-d3d11-d3d11_query - m_immediateDevCtx->Begin(m_disjointQuery); - m_previousCheckpoint = m_nextCheckpoint; - } - -private: - tracy_force_inline uint32_t RingIndex(uintptr_t index) - { - index %= MaxQueries; - return static_cast(index); - } - - tracy_force_inline uint32_t RingCount(uintptr_t begin, uintptr_t end) - { - // wrap-around safe: all unsigned - uintptr_t count = end - begin; - return static_cast(count); - } - - tracy_force_inline uint32_t NextQueryId() - { - auto id = m_queryCounter++; - if (RingCount(m_previousCheckpoint, id) >= MaxQueries) - { - TracyD3D11Panic("too many pending timestamp queries."); - // #TODO: return some sentinel value; ideally a "hidden" query index - } - return RingIndex(id); - } - - tracy_force_inline ID3D11Query* GetQueryObjectFromId(uint32_t id) - { - return m_queries[id]; - } - - tracy_force_inline void WaitForQuery(ID3D11Query* query) - { - m_immediateDevCtx->Flush(); - while (m_immediateDevCtx->GetData(query, nullptr, 0, 0) != S_OK) - YieldThread(); // busy-wait :-( attempt to reduce power usage with _mm_pause() & friends... - } - - tracy_force_inline uint8_t GetContextId() const - { - return m_contextId; - } - - ID3D11Device* m_device = nullptr; - ID3D11DeviceContext* m_immediateDevCtx = nullptr; - - ID3D11Query* m_queries[MaxQueries]; - ID3D11Query* m_disjointQuery = nullptr; - - uint8_t m_contextId = 255; // NOTE: apparently, 255 means invalid id; is this documented anywhere? - - uintptr_t m_queryCounter = 0; - - uintptr_t m_previousCheckpoint = 0; - uintptr_t m_nextCheckpoint = 0; -}; - -class D3D11ZoneScope -{ -public: - tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, bool active ) - : D3D11ZoneScope(ctx, active) - { - if( !m_active ) return; - - auto* item = Profiler::QueueSerial(); - WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast(srcloc)); - } - - tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int32_t depth, bool active ) - : D3D11ZoneScope(ctx, active) - { - if( !m_active ) return; - - auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); - WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast(srcloc)); - } - - tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool active) - : D3D11ZoneScope(ctx, active) - { - if( !m_active ) return; - - const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz); - - auto* item = Profiler::QueueSerial(); - WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation); - } - - tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool active) - : D3D11ZoneScope(ctx, active) - { - if( !m_active ) return; - - const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz); - - auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); - WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation); - } - - tracy_force_inline ~D3D11ZoneScope() - { - if( !m_active ) return; - - const auto queryId = m_ctx->NextQueryId(); - m_ctx->m_immediateDevCtx->End(m_ctx->GetQueryObjectFromId(queryId)); - - auto* item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial ); - MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() ); - MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() ); - MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) ); - MemWrite( &item->gpuZoneEnd.context, m_ctx->GetContextId() ); - Profiler::QueueSerialFinish(); - } - -private: - tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, bool active ) -#ifdef TRACY_ON_DEMAND - : m_active( active && GetProfiler().IsConnected() ) -#else - : m_active( active ) -#endif - { - if( !m_active ) return; - m_ctx = ctx; - } - - void WriteQueueItem(tracy::QueueItem* item, tracy::QueueType queueItemType, uint64_t sourceLocation) - { - const auto queryId = m_ctx->NextQueryId(); - m_ctx->m_immediateDevCtx->End(m_ctx->GetQueryObjectFromId(queryId)); - - MemWrite( &item->hdr.type, queueItemType); - MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); - MemWrite( &item->gpuZoneBegin.srcloc, sourceLocation ); - MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); - MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); - MemWrite( &item->gpuZoneBegin.context, m_ctx->GetContextId() ); - Profiler::QueueSerialFinish(); - } - - const bool m_active; - - D3D11Ctx* m_ctx; -}; - -static inline D3D11Ctx* CreateD3D11Context( ID3D11Device* device, ID3D11DeviceContext* devicectx ) -{ - auto ctx = (D3D11Ctx*)tracy_malloc( sizeof( D3D11Ctx ) ); - new(ctx) D3D11Ctx( device, devicectx ); - return ctx; -} - -static inline void DestroyD3D11Context( D3D11Ctx* ctx ) -{ - ctx->~D3D11Ctx(); - tracy_free( ctx ); -} -} - -#undef TracyD3D11Panic - -using TracyD3D11Ctx = tracy::D3D11Ctx*; - -#define TracyD3D11Context( device, devicectx ) tracy::CreateD3D11Context( device, devicectx ); -#define TracyD3D11Destroy(ctx) tracy::DestroyD3D11Context(ctx); -#define TracyD3D11ContextName(ctx, name, size) ctx->Name(name, size); - -#define TracyD3D11UnnamedZone ___tracy_gpu_d3d11_zone -#define TracyD3D11SrcLocSymbol TracyConcat(__tracy_gpu_d3d11_source_location,TracyLine) -#define TracyD3D11SrcLocObject(name, color) static constexpr tracy::SourceLocationData TracyD3D11SrcLocSymbol { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; - -#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define TracyD3D11Zone( ctx, name ) TracyD3D11NamedZoneS( ctx, TracyD3D11UnnamedZone, name, TRACY_CALLSTACK, true ) -# define TracyD3D11ZoneC( ctx, name, color ) TracyD3D11NamedZoneCS( ctx, TracyD3D11UnnamedZone, name, color, TRACY_CALLSTACK, true ) -# define TracyD3D11NamedZone( ctx, varname, name, active ) TracyD3D11SrcLocObject(name, 0); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, TRACY_CALLSTACK, active ); -# define TracyD3D11NamedZoneC( ctx, varname, name, color, active ) TracyD3D11SrcLocObject(name, color); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, TRACY_CALLSTACK, active ); -# define TracyD3D11ZoneTransient(ctx, varname, name, active) TracyD3D11ZoneTransientS(ctx, varname, cmdList, name, TRACY_CALLSTACK, active) -#else -# define TracyD3D11Zone( ctx, name ) TracyD3D11NamedZone( ctx, TracyD3D11UnnamedZone, name, true ) -# define TracyD3D11ZoneC( ctx, name, color ) TracyD3D11NamedZoneC( ctx, TracyD3D11UnnamedZone, name, color, true ) -# define TracyD3D11NamedZone( ctx, varname, name, active ) TracyD3D11SrcLocObject(name, 0); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, active ); -# define TracyD3D11NamedZoneC( ctx, varname, name, color, active ) TracyD3D11SrcLocObject(name, color); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, active ); -# define TracyD3D11ZoneTransient(ctx, varname, name, active) tracy::D3D11ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), active }; -#endif - -#ifdef TRACY_HAS_CALLSTACK -# define TracyD3D11ZoneS( ctx, name, depth ) TracyD3D11NamedZoneS( ctx, TracyD3D11UnnamedZone, name, depth, true ) -# define TracyD3D11ZoneCS( ctx, name, color, depth ) TracyD3D11NamedZoneCS( ctx, TracyD3D11UnnamedZone, name, color, depth, true ) -# define TracyD3D11NamedZoneS( ctx, varname, name, depth, active ) TracyD3D11SrcLocObject(name, 0); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, depth, active ); -# define TracyD3D11NamedZoneCS( ctx, varname, name, color, depth, active ) TracyD3D11SrcLocObject(name, color); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, depth, active ); -# define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) tracy::D3D11ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), depth, active }; -#else -# define TracyD3D11ZoneS( ctx, name, depth, active ) TracyD3D11Zone( ctx, name ) -# define TracyD3D11ZoneCS( ctx, name, color, depth, active ) TracyD3D11ZoneC( name, color ) -# define TracyD3D11NamedZoneS( ctx, varname, name, depth, active ) TracyD3D11NamedZone( ctx, varname, name, active ) -# define TracyD3D11NamedZoneCS( ctx, varname, name, color, depth, active ) TracyD3D11NamedZoneC( ctx, varname, name, color, active ) -# define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) TracyD3D11ZoneTransient(ctx, varname, name, active) -#endif - -#define TracyD3D11Collect( ctx ) ctx->Collect(); - -#endif - -#endif diff --git a/src/third_party/tracy/tracy/TracyD3D12.hpp b/src/third_party/tracy/tracy/TracyD3D12.hpp deleted file mode 100644 index d36253d7..00000000 --- a/src/third_party/tracy/tracy/TracyD3D12.hpp +++ /dev/null @@ -1,500 +0,0 @@ -#ifndef __TRACYD3D12_HPP__ -#define __TRACYD3D12_HPP__ - -#ifndef TRACY_ENABLE - -#define TracyD3D12Context(device, queue) nullptr -#define TracyD3D12Destroy(ctx) -#define TracyD3D12ContextName(ctx, name, size) - -#define TracyD3D12NewFrame(ctx) - -#define TracyD3D12Zone(ctx, cmdList, name) -#define TracyD3D12ZoneC(ctx, cmdList, name, color) -#define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) -#define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) -#define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) - -#define TracyD3D12ZoneS(ctx, cmdList, name, depth) -#define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) -#define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) -#define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) -#define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) - -#define TracyD3D12Collect(ctx) - -namespace tracy -{ - class D3D12ZoneScope {}; -} - -using TracyD3D12Ctx = void*; - -#else - -#include "Tracy.hpp" -#include "../client/TracyProfiler.hpp" -#include "../client/TracyCallstack.hpp" - -#include -#include -#include -#include -#include - -#define TracyD3D12Panic(msg, ...) do { assert(false && "TracyD3D12: " msg); TracyMessageLC("TracyD3D12: " msg, tracy::Color::Red4); __VA_ARGS__; } while(false); - -namespace tracy -{ - - struct D3D12QueryPayload - { - uint32_t m_queryIdStart = 0; - uint32_t m_queryCount = 0; - }; - - // Command queue context. - class D3D12QueueCtx - { - friend class D3D12ZoneScope; - - ID3D12Device* m_device = nullptr; - ID3D12CommandQueue* m_queue = nullptr; - uint8_t m_contextId = 255; // TODO: apparently, 255 means "invalid id"; is this documented somewhere? - ID3D12QueryHeap* m_queryHeap = nullptr; - ID3D12Resource* m_readbackBuffer = nullptr; - - // In-progress payload. - uint32_t m_queryLimit = 0; - std::atomic m_queryCounter = 0; - uint32_t m_previousQueryCounter = 0; - - uint32_t m_activePayload = 0; - ID3D12Fence* m_payloadFence = nullptr; - std::queue m_payloadQueue; - - UINT64 m_prevCalibrationTicksCPU = 0; - - void RecalibrateClocks() - { - UINT64 cpuTimestamp; - UINT64 gpuTimestamp; - if (FAILED(m_queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp))) - { - TracyD3D12Panic("failed to obtain queue clock calibration counters.", return); - } - - int64_t cpuDeltaTicks = cpuTimestamp - m_prevCalibrationTicksCPU; - if (cpuDeltaTicks > 0) - { - static const int64_t nanosecodsPerTick = int64_t(1000000000) / GetFrequencyQpc(); - int64_t cpuDeltaNS = cpuDeltaTicks * nanosecodsPerTick; - // Save the device cpu timestamp, not the Tracy profiler timestamp: - m_prevCalibrationTicksCPU = cpuTimestamp; - - cpuTimestamp = Profiler::GetTime(); - - auto* item = Profiler::QueueSerial(); - MemWrite(&item->hdr.type, QueueType::GpuCalibration); - MemWrite(&item->gpuCalibration.gpuTime, gpuTimestamp); - MemWrite(&item->gpuCalibration.cpuTime, cpuTimestamp); - MemWrite(&item->gpuCalibration.cpuDelta, cpuDeltaNS); - MemWrite(&item->gpuCalibration.context, GetId()); - SubmitQueueItem(item); - } - } - - tracy_force_inline void SubmitQueueItem(tracy::QueueItem* item) - { -#ifdef TRACY_ON_DEMAND - GetProfiler().DeferItem(*item); -#endif - Profiler::QueueSerialFinish(); - } - - public: - D3D12QueueCtx(ID3D12Device* device, ID3D12CommandQueue* queue) - : m_device(device) - , m_queue(queue) - { - // Verify we support timestamp queries on this queue. - - if (queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY) - { - D3D12_FEATURE_DATA_D3D12_OPTIONS3 featureData{}; - - HRESULT hr = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &featureData, sizeof(featureData)); - if (FAILED(hr) || (featureData.CopyQueueTimestampQueriesSupported == FALSE)) - { - TracyD3D12Panic("Platform does not support profiling of copy queues.", return); - } - } - - static constexpr uint32_t MaxQueries = 64 * 1024; // Must be even, because queries are (begin, end) pairs - m_queryLimit = MaxQueries; - - D3D12_QUERY_HEAP_DESC heapDesc{}; - heapDesc.Type = queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY ? D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP : D3D12_QUERY_HEAP_TYPE_TIMESTAMP; - heapDesc.Count = m_queryLimit; - heapDesc.NodeMask = 0; // #TODO: Support multiple adapters. - - while (FAILED(device->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&m_queryHeap)))) - { - m_queryLimit /= 2; - heapDesc.Count = m_queryLimit; - } - - // Create a readback buffer, which will be used as a destination for the query data. - - D3D12_RESOURCE_DESC readbackBufferDesc{}; - readbackBufferDesc.Alignment = 0; - readbackBufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - readbackBufferDesc.Width = m_queryLimit * sizeof(uint64_t); - readbackBufferDesc.Height = 1; - readbackBufferDesc.DepthOrArraySize = 1; - readbackBufferDesc.Format = DXGI_FORMAT_UNKNOWN; - readbackBufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; // Buffers are always row major. - readbackBufferDesc.MipLevels = 1; - readbackBufferDesc.SampleDesc.Count = 1; - readbackBufferDesc.SampleDesc.Quality = 0; - readbackBufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE; - - D3D12_HEAP_PROPERTIES readbackHeapProps{}; - readbackHeapProps.Type = D3D12_HEAP_TYPE_READBACK; - readbackHeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - readbackHeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - readbackHeapProps.CreationNodeMask = 0; - readbackHeapProps.VisibleNodeMask = 0; // #TODO: Support multiple adapters. - - if (FAILED(device->CreateCommittedResource(&readbackHeapProps, D3D12_HEAP_FLAG_NONE, &readbackBufferDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_readbackBuffer)))) - { - TracyD3D12Panic("Failed to create query readback buffer.", return); - } - - if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_payloadFence)))) - { - TracyD3D12Panic("Failed to create payload fence.", return); - } - - float period = [queue]() - { - uint64_t timestampFrequency; - if (FAILED(queue->GetTimestampFrequency(×tampFrequency))) - { - return 0.0f; - } - return static_cast( 1E+09 / static_cast(timestampFrequency) ); - }(); - - if (period == 0.0f) - { - TracyD3D12Panic("Failed to get timestamp frequency.", return); - } - - uint64_t cpuTimestamp; - uint64_t gpuTimestamp; - if (FAILED(queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp))) - { - TracyD3D12Panic("Failed to get queue clock calibration.", return); - } - - // Save the device cpu timestamp, not the profiler's timestamp. - m_prevCalibrationTicksCPU = cpuTimestamp; - - cpuTimestamp = Profiler::GetTime(); - - // all checked: ready to roll - m_contextId = GetGpuCtxCounter().fetch_add(1); - - auto* item = Profiler::QueueSerial(); - MemWrite(&item->hdr.type, QueueType::GpuNewContext); - MemWrite(&item->gpuNewContext.cpuTime, cpuTimestamp); - MemWrite(&item->gpuNewContext.gpuTime, gpuTimestamp); - MemWrite(&item->gpuNewContext.thread, decltype(item->gpuNewContext.thread)(0)); // #TODO: why 0 instead of GetThreadHandle()? - MemWrite(&item->gpuNewContext.period, period); - MemWrite(&item->gpuNewContext.context, GetId()); - MemWrite(&item->gpuNewContext.flags, GpuContextCalibration); - MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12); - SubmitQueueItem(item); - } - - ~D3D12QueueCtx() - { - ZoneScopedC(Color::Red4); - // collect all pending timestamps - while (m_payloadFence->GetCompletedValue() != m_activePayload) - /* busy-wait ... */; - Collect(); - m_payloadFence->Release(); - m_readbackBuffer->Release(); - m_queryHeap->Release(); - } - - - void NewFrame() - { - uint32_t queryCounter = m_queryCounter.exchange(0); - m_payloadQueue.emplace(D3D12QueryPayload{ m_previousQueryCounter, queryCounter }); - m_previousQueryCounter += queryCounter; - - if (m_previousQueryCounter >= m_queryLimit) - { - m_previousQueryCounter -= m_queryLimit; - } - - m_queue->Signal(m_payloadFence, ++m_activePayload); - } - - void Name( const char* name, uint16_t len ) - { - auto ptr = (char*)tracy_malloc( len ); - memcpy( ptr, name, len ); - - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::GpuContextName ); - MemWrite( &item->gpuContextNameFat.context, GetId()); - MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); - MemWrite( &item->gpuContextNameFat.size, len ); - SubmitQueueItem(item); - } - - void Collect() - { - ZoneScopedC(Color::Red4); - -#ifdef TRACY_ON_DEMAND - if (!GetProfiler().IsConnected()) - { - m_queryCounter = 0; - - return; - } -#endif - - // Find out what payloads are available. - const auto newestReadyPayload = m_payloadFence->GetCompletedValue(); - const auto payloadCount = m_payloadQueue.size() - (m_activePayload - newestReadyPayload); - - if (!payloadCount) - { - return; // No payloads are available yet, exit out. - } - - D3D12_RANGE mapRange{ 0, m_queryLimit * sizeof(uint64_t) }; - - // Map the readback buffer so we can fetch the query data from the GPU. - void* readbackBufferMapping = nullptr; - - if (FAILED(m_readbackBuffer->Map(0, &mapRange, &readbackBufferMapping))) - { - TracyD3D12Panic("Failed to map readback buffer.", return); - } - - auto* timestampData = static_cast(readbackBufferMapping); - - for (uint32_t i = 0; i < payloadCount; ++i) - { - const auto& payload = m_payloadQueue.front(); - - for (uint32_t j = 0; j < payload.m_queryCount; ++j) - { - const auto counter = (payload.m_queryIdStart + j) % m_queryLimit; - const auto timestamp = timestampData[counter]; - const auto queryId = counter; - - auto* item = Profiler::QueueSerial(); - MemWrite(&item->hdr.type, QueueType::GpuTime); - MemWrite(&item->gpuTime.gpuTime, timestamp); - MemWrite(&item->gpuTime.queryId, static_cast(queryId)); - MemWrite(&item->gpuTime.context, GetId()); - - Profiler::QueueSerialFinish(); - } - - m_payloadQueue.pop(); - } - - m_readbackBuffer->Unmap(0, nullptr); - - // Recalibrate to account for drift. - RecalibrateClocks(); - } - - private: - tracy_force_inline uint32_t NextQueryId() - { - uint32_t queryCounter = m_queryCounter.fetch_add(2); - if (queryCounter >= m_queryLimit) - { - TracyD3D12Panic("Submitted too many GPU queries! Consider increasing MaxQueries."); - // #TODO: consider returning an invalid id or sentinel value here - } - - const uint32_t id = (m_previousQueryCounter + queryCounter) % m_queryLimit; - - return id; - } - - tracy_force_inline uint8_t GetId() const - { - return m_contextId; - } - }; - - class D3D12ZoneScope - { - const bool m_active; - D3D12QueueCtx* m_ctx = nullptr; - ID3D12GraphicsCommandList* m_cmdList = nullptr; - uint32_t m_queryId = 0; // Used for tracking in nested zones. - - tracy_force_inline void WriteQueueItem(QueueItem* item, QueueType type, uint64_t srcLocation) - { - MemWrite(&item->hdr.type, type); - MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime()); - MemWrite(&item->gpuZoneBegin.srcloc, srcLocation); - MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle()); - MemWrite(&item->gpuZoneBegin.queryId, static_cast(m_queryId)); - MemWrite(&item->gpuZoneBegin.context, m_ctx->GetId()); - Profiler::QueueSerialFinish(); - } - - tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, bool active) -#ifdef TRACY_ON_DEMAND - : m_active(active&& GetProfiler().IsConnected()) -#else - : m_active(active) -#endif - { - if (!m_active) return; - - m_ctx = ctx; - m_cmdList = cmdList; - - m_queryId = m_ctx->NextQueryId(); - m_cmdList->EndQuery(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, m_queryId); - } - - public: - tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, bool active) - : D3D12ZoneScope(ctx, cmdList, active) - { - if (!m_active) return; - - auto* item = Profiler::QueueSerial(); - WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast(srcLocation)); - } - - tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int32_t depth, bool active) - : D3D12ZoneScope(ctx, cmdList, active) - { - if (!m_active) return; - - auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); - WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast(srcLocation)); - } - - tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, bool active) - : D3D12ZoneScope(ctx, cmdList, active) - { - if (!m_active) return; - - const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz); - - auto* item = Profiler::QueueSerial(); - WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation); - } - - tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int32_t depth, bool active) - : D3D12ZoneScope(ctx, cmdList, active) - { - if (!m_active) return; - - const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz); - - auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); - WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation); - } - - tracy_force_inline ~D3D12ZoneScope() - { - if (!m_active) return; - - const auto queryId = m_queryId + 1; // Our end query slot is immediately after the begin slot. - m_cmdList->EndQuery(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, queryId); - - auto* item = Profiler::QueueSerial(); - MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial); - MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime()); - MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle()); - MemWrite(&item->gpuZoneEnd.queryId, static_cast(queryId)); - MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId()); - Profiler::QueueSerialFinish(); - - m_cmdList->ResolveQueryData(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, m_queryId, 2, m_ctx->m_readbackBuffer, m_queryId * sizeof(uint64_t)); - } - }; - - static inline D3D12QueueCtx* CreateD3D12Context(ID3D12Device* device, ID3D12CommandQueue* queue) - { - auto* ctx = static_cast(tracy_malloc(sizeof(D3D12QueueCtx))); - new (ctx) D3D12QueueCtx{ device, queue }; - - return ctx; - } - - static inline void DestroyD3D12Context(D3D12QueueCtx* ctx) - { - ctx->~D3D12QueueCtx(); - tracy_free(ctx); - } - -} - -#undef TracyD3D12Panic - -using TracyD3D12Ctx = tracy::D3D12QueueCtx*; - -#define TracyD3D12Context(device, queue) tracy::CreateD3D12Context(device, queue); -#define TracyD3D12Destroy(ctx) tracy::DestroyD3D12Context(ctx); -#define TracyD3D12ContextName(ctx, name, size) ctx->Name(name, size); - -#define TracyD3D12NewFrame(ctx) ctx->NewFrame(); - -#define TracyD3D12UnnamedZone ___tracy_gpu_d3d12_zone -#define TracyD3D12SrcLocSymbol TracyConcat(__tracy_d3d12_source_location,TracyLine) -#define TracyD3D12SrcLocObject(name, color) static constexpr tracy::SourceLocationData TracyD3D12SrcLocSymbol { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; - -#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZoneS(ctx, TracyD3D12UnnamedZone, cmdList, name, TRACY_CALLSTACK, true) -# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneCS(ctx, TracyD3D12UnnamedZone, cmdList, name, color, TRACY_CALLSTACK, true) -# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, TRACY_CALLSTACK, active }; -# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, TRACY_CALLSTACK, active }; -# define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, TRACY_CALLSTACK, active) -#else -# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZone(ctx, TracyD3D12UnnamedZone, cmdList, name, true) -# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneC(ctx, TracyD3D12UnnamedZone, cmdList, name, color, true) -# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, active }; -# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, active }; -# define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) tracy::D3D12ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), cmdList, active }; -#endif - -#ifdef TRACY_HAS_CALLSTACK -# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12NamedZoneS(ctx, TracyD3D12UnnamedZone, cmdList, name, depth, true) -# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12NamedZoneCS(ctx, TracyD3D12UnnamedZone, cmdList, name, color, depth, true) -# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, depth, active }; -# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, depth, active }; -# define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) tracy::D3D12ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), cmdList, depth, active }; -#else -# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12Zone(ctx, cmdList, name) -# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12Zone(ctx, cmdList, name, color) -# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) TracyD3D12NamedZone(ctx, varname, cmdList, name, active) -# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) -# define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) -#endif - -#define TracyD3D12Collect(ctx) ctx->Collect(); - -#endif - -#endif diff --git a/src/third_party/tracy/tracy/TracyLua.hpp b/src/third_party/tracy/tracy/TracyLua.hpp deleted file mode 100644 index 51dead51..00000000 --- a/src/third_party/tracy/tracy/TracyLua.hpp +++ /dev/null @@ -1,446 +0,0 @@ -#ifndef __TRACYLUA_HPP__ -#define __TRACYLUA_HPP__ - -// Include this file after you include lua headers. - -#ifndef TRACY_ENABLE - -#include - -namespace tracy -{ - -namespace detail -{ -static inline int noop( lua_State* L ) { return 0; } -} - -static inline void LuaRegister( lua_State* L ) -{ - lua_newtable( L ); - lua_pushcfunction( L, detail::noop ); - lua_setfield( L, -2, "ZoneBegin" ); - lua_pushcfunction( L, detail::noop ); - lua_setfield( L, -2, "ZoneBeginN" ); - lua_pushcfunction( L, detail::noop ); - lua_setfield( L, -2, "ZoneBeginS" ); - lua_pushcfunction( L, detail::noop ); - lua_setfield( L, -2, "ZoneBeginNS" ); - lua_pushcfunction( L, detail::noop ); - lua_setfield( L, -2, "ZoneEnd" ); - lua_pushcfunction( L, detail::noop ); - lua_setfield( L, -2, "ZoneText" ); - lua_pushcfunction( L, detail::noop ); - lua_setfield( L, -2, "ZoneName" ); - lua_pushcfunction( L, detail::noop ); - lua_setfield( L, -2, "Message" ); - lua_setglobal( L, "tracy" ); -} - -static inline char* FindEnd( char* ptr ) -{ - unsigned int cnt = 1; - while( cnt != 0 ) - { - if( *ptr == '(' ) cnt++; - else if( *ptr == ')' ) cnt--; - ptr++; - } - return ptr; -} - -static inline void LuaRemove( char* script ) -{ - while( *script ) - { - if( strncmp( script, "tracy.", 6 ) == 0 ) - { - if( strncmp( script + 6, "Zone", 4 ) == 0 ) - { - if( strncmp( script + 10, "End()", 5 ) == 0 ) - { - memset( script, ' ', 15 ); - script += 15; - } - else if( strncmp( script + 10, "Begin()", 7 ) == 0 ) - { - memset( script, ' ', 17 ); - script += 17; - } - else if( strncmp( script + 10, "Text(", 5 ) == 0 ) - { - auto end = FindEnd( script + 15 ); - memset( script, ' ', end - script ); - script = end; - } - else if( strncmp( script + 10, "Name(", 5 ) == 0 ) - { - auto end = FindEnd( script + 15 ); - memset( script, ' ', end - script ); - script = end; - } - else if( strncmp( script + 10, "BeginN(", 7 ) == 0 ) - { - auto end = FindEnd( script + 17 ); - memset( script, ' ', end - script ); - script = end; - } - else if( strncmp( script + 10, "BeginS(", 7 ) == 0 ) - { - auto end = FindEnd( script + 17 ); - memset( script, ' ', end - script ); - script = end; - } - else if( strncmp( script + 10, "BeginNS(", 8 ) == 0 ) - { - auto end = FindEnd( script + 18 ); - memset( script, ' ', end - script ); - script = end; - } - else - { - script += 10; - } - } - else if( strncmp( script + 6, "Message(", 8 ) == 0 ) - { - auto end = FindEnd( script + 14 ); - memset( script, ' ', end - script ); - script = end; - } - else - { - script += 6; - } - } - else - { - script++; - } - } -} - -} - -#else - -#include -#include - -#include "../common/TracyColor.hpp" -#include "../common/TracyAlign.hpp" -#include "../common/TracyForceInline.hpp" -#include "../common/TracySystem.hpp" -#include "../client/TracyProfiler.hpp" - -namespace tracy -{ - -#ifdef TRACY_ON_DEMAND -TRACY_API LuaZoneState& GetLuaZoneState(); -#endif - -namespace detail -{ - -#ifdef TRACY_HAS_CALLSTACK -static tracy_force_inline void SendLuaCallstack( lua_State* L, uint32_t depth ) -{ - assert( depth <= 64 ); - lua_Debug dbg[64]; - const char* func[64]; - uint32_t fsz[64]; - uint32_t ssz[64]; - - uint8_t cnt; - uint16_t spaceNeeded = sizeof( cnt ); - for( cnt=0; cnt::max)() ); - memcpy( dst, fsz+i, 2 ); dst += 2; - memcpy( dst, func[i], fsz[i] ); dst += fsz[i]; - assert( ssz[i] <= (std::numeric_limits::max)() ); - memcpy( dst, ssz+i, 2 ); dst += 2; - memcpy( dst, dbg[i].source, ssz[i] ), dst += ssz[i]; - } - assert( dst - ptr == spaceNeeded + 2 ); - - TracyQueuePrepare( QueueType::CallstackAlloc ); - MemWrite( &item->callstackAllocFat.ptr, (uint64_t)ptr ); - MemWrite( &item->callstackAllocFat.nativePtr, (uint64_t)Callstack( depth ) ); - TracyQueueCommit( callstackAllocFatThread ); -} - -static inline void LuaShortenSrc( char* dst, const char* src ) -{ - size_t l = std::min( (size_t)255, strlen( src ) ); - memcpy( dst, src, l ); - dst[l] = 0; -} - -static inline int LuaZoneBeginS( lua_State* L ) -{ -#ifdef TRACY_ON_DEMAND - const auto zoneCnt = GetLuaZoneState().counter++; - if( zoneCnt != 0 && !GetLuaZoneState().active ) return 0; - GetLuaZoneState().active = GetProfiler().IsConnected(); - if( !GetLuaZoneState().active ) return 0; -#endif - -#ifdef TRACY_CALLSTACK - const uint32_t depth = TRACY_CALLSTACK; -#else - const auto depth = uint32_t( lua_tointeger( L, 1 ) ); -#endif - SendLuaCallstack( L, depth ); - - lua_Debug dbg; - lua_getstack( L, 1, &dbg ); - lua_getinfo( L, "Snl", &dbg ); - char src[256]; - LuaShortenSrc( src, dbg.source ); - const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src ); - - TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack ); - MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); - MemWrite( &item->zoneBegin.srcloc, srcloc ); - TracyQueueCommit( zoneBeginThread ); - - return 0; -} - -static inline int LuaZoneBeginNS( lua_State* L ) -{ -#ifdef TRACY_ON_DEMAND - const auto zoneCnt = GetLuaZoneState().counter++; - if( zoneCnt != 0 && !GetLuaZoneState().active ) return 0; - GetLuaZoneState().active = GetProfiler().IsConnected(); - if( !GetLuaZoneState().active ) return 0; -#endif - -#ifdef TRACY_CALLSTACK - const uint32_t depth = TRACY_CALLSTACK; -#else - const auto depth = uint32_t( lua_tointeger( L, 2 ) ); -#endif - SendLuaCallstack( L, depth ); - - lua_Debug dbg; - lua_getstack( L, 1, &dbg ); - lua_getinfo( L, "Snl", &dbg ); - size_t nsz; - char src[256]; - LuaShortenSrc( src, dbg.source ); - const auto name = lua_tolstring( L, 1, &nsz ); - const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src, name, nsz ); - - TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack ); - MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); - MemWrite( &item->zoneBegin.srcloc, srcloc ); - TracyQueueCommit( zoneBeginThread ); - - return 0; -} -#endif - -static inline int LuaZoneBegin( lua_State* L ) -{ -#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK - return LuaZoneBeginS( L ); -#else -#ifdef TRACY_ON_DEMAND - const auto zoneCnt = GetLuaZoneState().counter++; - if( zoneCnt != 0 && !GetLuaZoneState().active ) return 0; - GetLuaZoneState().active = GetProfiler().IsConnected(); - if( !GetLuaZoneState().active ) return 0; -#endif - - lua_Debug dbg; - lua_getstack( L, 1, &dbg ); - lua_getinfo( L, "Snl", &dbg ); - char src[256]; - LuaShortenSrc( src, dbg.source ); - const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src ); - - TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc ); - MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); - MemWrite( &item->zoneBegin.srcloc, srcloc ); - TracyQueueCommit( zoneBeginThread ); - return 0; -#endif -} - -static inline int LuaZoneBeginN( lua_State* L ) -{ -#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK - return LuaZoneBeginNS( L ); -#else -#ifdef TRACY_ON_DEMAND - const auto zoneCnt = GetLuaZoneState().counter++; - if( zoneCnt != 0 && !GetLuaZoneState().active ) return 0; - GetLuaZoneState().active = GetProfiler().IsConnected(); - if( !GetLuaZoneState().active ) return 0; -#endif - - lua_Debug dbg; - lua_getstack( L, 1, &dbg ); - lua_getinfo( L, "Snl", &dbg ); - size_t nsz; - char src[256]; - LuaShortenSrc( src, dbg.source ); - const auto name = lua_tolstring( L, 1, &nsz ); - const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src, name, nsz ); - - TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc ); - MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); - MemWrite( &item->zoneBegin.srcloc, srcloc ); - TracyQueueCommit( zoneBeginThread ); - return 0; -#endif -} - -static inline int LuaZoneEnd( lua_State* L ) -{ -#ifdef TRACY_ON_DEMAND - assert( GetLuaZoneState().counter != 0 ); - GetLuaZoneState().counter--; - if( !GetLuaZoneState().active ) return 0; - if( !GetProfiler().IsConnected() ) - { - GetLuaZoneState().active = false; - return 0; - } -#endif - - TracyQueuePrepare( QueueType::ZoneEnd ); - MemWrite( &item->zoneEnd.time, Profiler::GetTime() ); - TracyQueueCommit( zoneEndThread ); - return 0; -} - -static inline int LuaZoneText( lua_State* L ) -{ -#ifdef TRACY_ON_DEMAND - if( !GetLuaZoneState().active ) return 0; - if( !GetProfiler().IsConnected() ) - { - GetLuaZoneState().active = false; - return 0; - } -#endif - - auto txt = lua_tostring( L, 1 ); - const auto size = strlen( txt ); - assert( size < (std::numeric_limits::max)() ); - - auto ptr = (char*)tracy_malloc( size ); - memcpy( ptr, txt, size ); - - TracyQueuePrepare( QueueType::ZoneText ); - MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); - MemWrite( &item->zoneTextFat.size, (uint16_t)size ); - TracyQueueCommit( zoneTextFatThread ); - return 0; -} - -static inline int LuaZoneName( lua_State* L ) -{ -#ifdef TRACY_ON_DEMAND - if( !GetLuaZoneState().active ) return 0; - if( !GetProfiler().IsConnected() ) - { - GetLuaZoneState().active = false; - return 0; - } -#endif - - auto txt = lua_tostring( L, 1 ); - const auto size = strlen( txt ); - assert( size < (std::numeric_limits::max)() ); - - auto ptr = (char*)tracy_malloc( size ); - memcpy( ptr, txt, size ); - - TracyQueuePrepare( QueueType::ZoneName ); - MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); - MemWrite( &item->zoneTextFat.size, (uint16_t)size ); - TracyQueueCommit( zoneTextFatThread ); - return 0; -} - -static inline int LuaMessage( lua_State* L ) -{ -#ifdef TRACY_ON_DEMAND - if( !GetProfiler().IsConnected() ) return 0; -#endif - - auto txt = lua_tostring( L, 1 ); - const auto size = strlen( txt ); - assert( size < (std::numeric_limits::max)() ); - - auto ptr = (char*)tracy_malloc( size ); - memcpy( ptr, txt, size ); - - TracyQueuePrepare( QueueType::Message ); - MemWrite( &item->messageFat.time, Profiler::GetTime() ); - MemWrite( &item->messageFat.text, (uint64_t)ptr ); - MemWrite( &item->messageFat.size, (uint16_t)size ); - TracyQueueCommit( messageFatThread ); - return 0; -} - -} - -static inline void LuaRegister( lua_State* L ) -{ - lua_newtable( L ); - lua_pushcfunction( L, detail::LuaZoneBegin ); - lua_setfield( L, -2, "ZoneBegin" ); - lua_pushcfunction( L, detail::LuaZoneBeginN ); - lua_setfield( L, -2, "ZoneBeginN" ); -#ifdef TRACY_HAS_CALLSTACK - lua_pushcfunction( L, detail::LuaZoneBeginS ); - lua_setfield( L, -2, "ZoneBeginS" ); - lua_pushcfunction( L, detail::LuaZoneBeginNS ); - lua_setfield( L, -2, "ZoneBeginNS" ); -#else - lua_pushcfunction( L, detail::LuaZoneBegin ); - lua_setfield( L, -2, "ZoneBeginS" ); - lua_pushcfunction( L, detail::LuaZoneBeginN ); - lua_setfield( L, -2, "ZoneBeginNS" ); -#endif - lua_pushcfunction( L, detail::LuaZoneEnd ); - lua_setfield( L, -2, "ZoneEnd" ); - lua_pushcfunction( L, detail::LuaZoneText ); - lua_setfield( L, -2, "ZoneText" ); - lua_pushcfunction( L, detail::LuaZoneName ); - lua_setfield( L, -2, "ZoneName" ); - lua_pushcfunction( L, detail::LuaMessage ); - lua_setfield( L, -2, "Message" ); - lua_setglobal( L, "tracy" ); -} - -static inline void LuaRemove( char* script ) {} - -} - -#endif - -#endif diff --git a/src/third_party/tracy/tracy/TracyMetal.hmm b/src/third_party/tracy/tracy/TracyMetal.hmm deleted file mode 100644 index a4b4cb52..00000000 --- a/src/third_party/tracy/tracy/TracyMetal.hmm +++ /dev/null @@ -1,644 +0,0 @@ -#ifndef __TRACYMETAL_HMM__ -#define __TRACYMETAL_HMM__ - -/* This file implements a Metal API back-end for Tracy (it has only been tested on Apple - Silicon devices, but it should also work on Intel-based Macs and older iOS devices). - The Metal back-end in Tracy operates differently than other GPU back-ends like Vulkan, - Direct3D and OpenGL. Specifically, TracyMetalZone() must be placed around the site where - a command encoder is created. This is because not all hardware supports timestamps at - command granularity, and can only provide timestamps around an entire command encoder. - This accommodates for all tiers of hardware; in the future, variants of TracyMetalZone() - will be added to support the habitual command-level granularity of Tracy GPU back-ends. - Metal also imposes a few restrictions that make the process of requesting and collecting - queries more complicated in Tracy: - a) timestamp query buffers are limited to 4096 queries (32KB, where each query is 8 bytes) - b) when a timestamp query buffer is created, Metal initializes all timestamps with zeroes, - and there's no way to reset them back to zero after timestamps get resolved; the only - way to clear the timestamps is by allocating a new timestamp query buffer - c) if a command encoder records no commands and its corresponding command buffer ends up - committed to the command queue, Metal will "optimize-away" the encoder along with any - timestamp queries associated with it (the timestamp will remain as zero and will never - get resolved) - Because of the limitations above, two timestamp buffers are managed internally. Once one - of the buffers fills up with requests, the second buffer can start serving new requests. - Once all requests in a buffer get resolved and collected, the entire buffer is discarded - and a new one allocated for future requests. (Proper cycling through a ring buffer would - require bookkeeping and completion handlers to collect only the known complete queries.) - In the current implementation, there is potential for a race condition when the buffer is - discarded and reallocated. In practice, the race condition will never materialize so long - as TracyMetalCollect() is called frequently to keep the amount of unresolved queries low. - Finally, there's a timeout mechanism during timestamp collection to detect "empty" command - encoders and ensure progress. -*/ - -#ifndef TRACY_ENABLE - -#define TracyMetalContext(device) nullptr -#define TracyMetalDestroy(ctx) -#define TracyMetalContextName(ctx, name, size) - -#define TracyMetalZone(ctx, encoderDesc, name) -#define TracyMetalZoneC(ctx, encoderDesc, name, color) -#define TracyMetalNamedZone(ctx, varname, encoderDesc, name, active) -#define TracyMetalNamedZoneC(ctx, varname, encoderDesc, name, color, active) - -#define TracyMetalCollect(ctx) - -namespace tracy -{ -class MetalZoneScope {}; -} - -using TracyMetalCtx = void; - -#else - -#if not __has_feature(objc_arc) -#error TracyMetal requires ARC to be enabled. -#endif - -#include -#include -#include - -#include "Tracy.hpp" -#include "../client/TracyProfiler.hpp" -#include "../client/TracyCallstack.hpp" -#include "../common/TracyAlign.hpp" -#include "../common/TracyAlloc.hpp" - -// ok to import if in obj-c code -#import - -#define TRACY_METAL_VA_ARGS(...) , ##__VA_ARGS__ - -#define TracyMetalPanic(ret, msg, ...) do { \ - char buffer [1024]; \ - snprintf(buffer, sizeof(buffer), "TracyMetal: " msg TRACY_METAL_VA_ARGS(__VA_ARGS__)); \ - TracyMessageC(buffer, strlen(buffer), tracy::Color::OrangeRed); \ - fprintf(stderr, "%s\n", buffer); \ - ret; \ - } while(false); - -#ifndef TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT -#define TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT 0.200f -#endif//TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT - -#ifndef TRACY_METAL_DEBUG_MASK -#define TRACY_METAL_DEBUG_MASK (0) -#endif//TRACY_METAL_DEBUG_MASK - -#if TRACY_METAL_DEBUG_MASK - #define TracyMetalDebugMasked(mask, ...) if constexpr (mask & TRACY_METAL_DEBUG_MASK) { __VA_ARGS__; } -#else - #define TracyMetalDebugMasked(mask, ...) -#endif - -#if TRACY_METAL_DEBUG_MASK & (1 << 1) - #define TracyMetalDebug_0b00010(...) __VA_ARGS__; -#else - #define TracyMetalDebug_0b00010(...) -#endif - -#if TRACY_METAL_DEBUG_MASK & (1 << 4) - #define TracyMetalDebug_0b10000(...) __VA_ARGS__; -#else - #define TracyMetalDebug_0b10000(...) -#endif - -#ifndef TracyMetalDebugZoneScopeWireTap -#define TracyMetalDebugZoneScopeWireTap -#endif//TracyMetalDebugZoneScopeWireTap - -namespace tracy -{ - -class MetalCtx -{ - friend class MetalZoneScope; - - enum { MaxQueries = 4 * 1024 }; // Metal: between 8 and 32768 _BYTES_... - -public: - static MetalCtx* Create(id device) - { - ZoneScopedNC("tracy::MetalCtx::Create", Color::Red4); - auto ctx = static_cast(tracy_malloc(sizeof(MetalCtx))); - new (ctx) MetalCtx(device); - if (ctx->m_contextId == 255) - { - TracyMetalPanic({assert(false);} return nullptr, "ERROR: unable to create context."); - Destroy(ctx); - } - return ctx; - } - - static void Destroy(MetalCtx* ctx) - { - ZoneScopedNC("tracy::MetalCtx::Destroy", Color::Red4); - ctx->~MetalCtx(); - tracy_free(ctx); - } - - void Name( const char* name, uint16_t len ) - { - auto ptr = (char*)tracy_malloc( len ); - memcpy( ptr, name, len ); - - auto* item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::GpuContextName ); - MemWrite( &item->gpuContextNameFat.context, m_contextId ); - MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); - MemWrite( &item->gpuContextNameFat.size, len ); - SubmitQueueItem(item); - } - - bool Collect() - { - ZoneScopedNC("tracy::MetalCtx::Collect", Color::Red4); - -#ifdef TRACY_ON_DEMAND - if (!GetProfiler().IsConnected()) - { - return true; - } -#endif - - // Only one thread is allowed to collect timestamps at any given time - // but there's no need to block contending threads - if (!m_collectionMutex.try_lock()) - { - return true; - } - - std::unique_lock lock (m_collectionMutex, std::adopt_lock); - - uintptr_t begin = m_previousCheckpoint.load(); - uintptr_t latestCheckpoint = m_queryCounter.load(); // TODO: MTLEvent? MTLFence?; - TracyMetalDebugMasked(1<<3, ZoneValue(begin)); - TracyMetalDebugMasked(1<<3, ZoneValue(latestCheckpoint)); - - uint32_t count = RingCount(begin, latestCheckpoint); - if (count == 0) // no pending timestamp queries - { - //uintptr_t nextCheckpoint = m_queryCounter.load(); - //if (nextCheckpoint != latestCheckpoint) - //{ - // // TODO: signal event / fence now? - //} - return true; - } - - // resolve up until the ring buffer boundary and let a subsequenty call - // to Collect handle the wrap-around - bool reallocateBuffer = false; - if (RingIndex(begin) + count >= RingSize()) - { - count = RingSize() - RingIndex(begin); - reallocateBuffer = true; - } - TracyMetalDebugMasked(1<<3, ZoneValue(count)); - - auto buffer_idx = (begin / MaxQueries) % 2; - auto counterSampleBuffer = m_counterSampleBuffers[buffer_idx]; - - if (count >= RingSize()) - { - TracyMetalPanic(return false, "Collect: FULL! too many pending timestamp queries. [%llu, %llu] (%u)", begin, latestCheckpoint, count); - } - - TracyMetalDebugMasked(1<<3, TracyMetalPanic(, "Collect: [%llu, %llu] :: (%u)", begin, latestCheckpoint, count)); - - NSRange range = NSMakeRange(RingIndex(begin), count); - NSData* data = [counterSampleBuffer resolveCounterRange:range]; - NSUInteger numResolvedTimestamps = data.length / sizeof(MTLCounterResultTimestamp); - MTLCounterResultTimestamp* timestamps = (MTLCounterResultTimestamp *)(data.bytes); - if (timestamps == nil) - { - TracyMetalPanic(return false, "Collect: unable to resolve timestamps."); - } - - if (numResolvedTimestamps != count) - { - TracyMetalPanic(, "Collect: numResolvedTimestamps != count : %u != %u", (uint32_t)numResolvedTimestamps, count); - } - - int resolved = 0; - for (auto i = 0; i < numResolvedTimestamps; i += 2) - { - TracyMetalDebug_0b10000( ZoneScopedN("tracy::MetalCtx::Collect::[i]") ); - MTLTimestamp t_start = timestamps[i+0].timestamp; - MTLTimestamp t_end = timestamps[i+1].timestamp; - uint32_t k = RingIndex(begin + i); - TracyMetalDebugMasked(1<<4, TracyMetalPanic(, "Collect: timestamp[%u] = %llu | timestamp[%u] = %llu | diff = %llu\n", k, t_start, k+1, t_end, (t_end - t_start))); - if ((t_start == MTLCounterErrorValue) || (t_end == MTLCounterErrorValue)) - { - TracyMetalPanic(, "Collect: invalid timestamp (MTLCounterErrorValue) at %u.", k); - break; - } - // Metal will initialize timestamp buffer with zeroes; encountering a zero-value - // timestamp means that the timestamp has not been written and resolved yet - if ((t_start == 0) || (t_end == 0)) - { - auto checkTime = std::chrono::high_resolution_clock::now(); - auto requestTime = m_timestampRequestTime[k]; - auto ms_in_flight = std::chrono::duration(checkTime-requestTime).count()*1000.0f; - TracyMetalDebugMasked(1<<4, TracyMetalPanic(, "Collect: invalid timestamp (zero) at %u [%.0fms in flight].", k, ms_in_flight)); - const float timeout_ms = TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT * 1000.0f; - if (ms_in_flight < timeout_ms) - break; - TracyMetalDebug_0b10000( ZoneScopedN("tracy::MetalCtx::Collect::Drop") ); - TracyMetalPanic(, "Collect: giving up on timestamp at %u [%.0fms in flight].", k, ms_in_flight); - t_start = m_mostRecentTimestamp + 5; - t_end = t_start + 5; - } - TracyMetalDebugMasked(1<<2, TracyFreeN((void*)(uintptr_t)(k+0), "TracyMetalGpuZone")); - TracyMetalDebugMasked(1<<2, TracyFreeN((void*)(uintptr_t)(k+1), "TracyMetalGpuZone")); - { - auto* item = Profiler::QueueSerial(); - MemWrite(&item->hdr.type, QueueType::GpuTime); - MemWrite(&item->gpuTime.gpuTime, static_cast(t_start)); - MemWrite(&item->gpuTime.queryId, static_cast(k)); - MemWrite(&item->gpuTime.context, m_contextId); - Profiler::QueueSerialFinish(); - } - { - auto* item = Profiler::QueueSerial(); - MemWrite(&item->hdr.type, QueueType::GpuTime); - MemWrite(&item->gpuTime.gpuTime, static_cast(t_end)); - MemWrite(&item->gpuTime.queryId, static_cast(k+1)); - MemWrite(&item->gpuTime.context, m_contextId); - Profiler::QueueSerialFinish(); - } - m_mostRecentTimestamp = (t_end > m_mostRecentTimestamp) ? t_end : m_mostRecentTimestamp; - TracyMetalDebugMasked(1<<1, TracyFreeN((void*)(uintptr_t)k, "TracyMetalTimestampQueryId")); - resolved += 2; - } - TracyMetalDebugMasked(1<<3, ZoneValue(RingCount(begin, m_previousCheckpoint.load()))); - - m_previousCheckpoint += resolved; - - // Check whether the timestamp buffer has been fully resolved/collected: - // WARN: there's technically a race condition here: NextQuery() may reference the - // buffer that is being released instead of the new one. In practice, this should - // never happen so long as Collect is called frequently enough to prevent pending - // timestamp query requests from piling up too quickly. - if ((resolved == count) && (m_previousCheckpoint.load() % MaxQueries) == 0) - { - m_counterSampleBuffers[buffer_idx] = NewTimestampSampleBuffer(m_device, MaxQueries); - } - - //RecalibrateClocks(); // to account for drift - - return true; - } - -private: - MetalCtx(id device) - : m_device(device) - { - TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "MTLCounterErrorValue = 0x%llx", MTLCounterErrorValue)); - TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "MTLCounterDontSample = 0x%llx", MTLCounterDontSample)); - - if (m_device == nil) - { - TracyMetalPanic({assert(false);} return, "device is nil."); - } - if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtStageBoundary]) - { - TracyMetalPanic({assert(false);} return, "ERROR: timestamp sampling at pipeline stage boundary is not supported."); - } - if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDrawBoundary]) - { - TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at draw call boundary is not supported.\n")); - } - if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtBlitBoundary]) - { - TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at blit boundary is not supported.\n")); - } - if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDispatchBoundary]) - { - TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at compute dispatch boundary is not supported.\n")); - } - if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtTileDispatchBoundary]) - { - TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at tile dispatch boundary is not supported.\n")); - } - - m_counterSampleBuffers[0] = NewTimestampSampleBuffer(m_device, MaxQueries); - m_counterSampleBuffers[1] = NewTimestampSampleBuffer(m_device, MaxQueries); - - m_timestampRequestTime.resize(MaxQueries); - - MTLTimestamp cpuTimestamp = 0; - MTLTimestamp gpuTimestamp = 0; - [m_device sampleTimestamps:&cpuTimestamp gpuTimestamp:&gpuTimestamp]; - m_mostRecentTimestamp = gpuTimestamp; - TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Metal): %llu", cpuTimestamp)); - TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: GPU timestamp (Metal): %llu", gpuTimestamp)); - - cpuTimestamp = Profiler::GetTime(); - TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Tracy): %llu", cpuTimestamp)); - - float period = 1.0f; - - m_contextId = GetGpuCtxCounter().fetch_add(1); - - auto* item = Profiler::QueueSerial(); - MemWrite(&item->hdr.type, QueueType::GpuNewContext); - MemWrite(&item->gpuNewContext.cpuTime, int64_t(cpuTimestamp)); - MemWrite(&item->gpuNewContext.gpuTime, int64_t(gpuTimestamp)); - MemWrite(&item->gpuNewContext.thread, uint32_t(0)); // TODO: why not GetThreadHandle()? - MemWrite(&item->gpuNewContext.period, period); - MemWrite(&item->gpuNewContext.context, m_contextId); - //MemWrite(&item->gpuNewContext.flags, GpuContextCalibration); - MemWrite(&item->gpuNewContext.flags, GpuContextFlags(0)); - MemWrite(&item->gpuNewContext.type, GpuContextType::Metal); - SubmitQueueItem(item); - } - - ~MetalCtx() - { - // collect the last remnants of Metal GPU activity... - // TODO: add a timeout to this loop? - while (m_previousCheckpoint.load() != m_queryCounter.load()) - Collect(); - } - - tracy_force_inline void SubmitQueueItem(QueueItem* item) - { -#ifdef TRACY_ON_DEMAND - GetProfiler().DeferItem(*item); -#endif - Profiler::QueueSerialFinish(); - } - - tracy_force_inline uint32_t RingIndex(uintptr_t index) - { - index %= MaxQueries; - return static_cast(index); - } - - tracy_force_inline uint32_t RingCount(uintptr_t begin, uintptr_t end) - { - // wrap-around safe: all unsigned - uintptr_t count = end - begin; - return static_cast(count); - } - - tracy_force_inline uint32_t RingSize() const - { - return MaxQueries; - } - - struct Query { id buffer; uint32_t idx; }; - - tracy_force_inline Query NextQuery() - { - TracyMetalDebug_0b00010( ZoneScopedNC("Tracy::MetalCtx::NextQuery", tracy::Color::LightCoral) ); - auto id = m_queryCounter.fetch_add(2); - TracyMetalDebug_0b00010( ZoneValue(id) ); - auto count = RingCount(m_previousCheckpoint, id); - if (count >= MaxQueries) - { - // TODO: return a proper (hidden) "sentinel" query - Query sentinel = Query{ m_counterSampleBuffers[1], MaxQueries-2 }; - TracyMetalPanic( - return sentinel, - "NextQueryId: FULL! too many pending timestamp queries. Consider calling TracyMetalCollect() more frequently. [%llu, %llu] (%u)", - m_previousCheckpoint.load(), id, count - ); - } - uint32_t buffer_idx = (id / MaxQueries) % 2; - TracyMetalDebug_0b00010( ZoneValue(buffer_idx) ); - auto buffer = m_counterSampleBuffers[buffer_idx]; - if (buffer == nil) - TracyMetalPanic(, "NextQueryId: sample buffer is nil! (id=%llu)", id); - uint32_t idx = RingIndex(id); - TracyMetalDebug_0b00010( ZoneValue(idx) ); - TracyMetalDebug_0b00010( TracyAllocN((void*)(uintptr_t)idx, 2, "TracyMetalTimestampQueryId") ); - m_timestampRequestTime[idx] = std::chrono::high_resolution_clock::now(); - return Query{ buffer, idx }; - } - - tracy_force_inline uint8_t GetContextId() const - { - return m_contextId; - } - - static id NewTimestampSampleBuffer(id device, size_t count) - { - ZoneScopedN("tracy::MetalCtx::NewTimestampSampleBuffer"); - - id timestampCounterSet = nil; - for (id counterSet in device.counterSets) - { - if ([counterSet.name isEqualToString:MTLCommonCounterSetTimestamp]) - { - timestampCounterSet = counterSet; - break; - } - } - if (timestampCounterSet == nil) - { - TracyMetalPanic({assert(false);} return nil, "ERROR: timestamp counters are not supported on the platform."); - } - - MTLCounterSampleBufferDescriptor* sampleDescriptor = [[MTLCounterSampleBufferDescriptor alloc] init]; - sampleDescriptor.counterSet = timestampCounterSet; - sampleDescriptor.sampleCount = MaxQueries; - sampleDescriptor.storageMode = MTLStorageModeShared; - sampleDescriptor.label = @"TracyMetalTimestampPool"; - - NSError* error = nil; - id counterSampleBuffer = [device newCounterSampleBufferWithDescriptor:sampleDescriptor error:&error]; - if (error != nil) - { - //NSLog(@"%@ | %@", error.localizedDescription, error.localizedFailureReason); - TracyMetalPanic({assert(false);} return nil, - "ERROR: unable to create sample buffer for timestamp counters : %s | %s", - [error.localizedDescription cString], [error.localizedFailureReason cString]); - } - - return counterSampleBuffer; - } - - uint8_t m_contextId = 255; - - id m_device = nil; - id m_counterSampleBuffers [2] = {}; - - using atomic_counter = std::atomic; - static_assert(atomic_counter::is_always_lock_free); - atomic_counter m_queryCounter = 0; - - atomic_counter m_previousCheckpoint = 0; - MTLTimestamp m_mostRecentTimestamp = 0; - - std::vector m_timestampRequestTime; - - std::mutex m_collectionMutex; -}; - -class MetalZoneScope -{ -public: - tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLComputePassDescriptor* desc, const SourceLocationData* srcloc, bool is_active ) -#ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) -#else - : m_active( is_active ) -#endif - { - if ( !m_active ) return; - if (desc == nil) TracyMetalPanic({assert(false);} return, "compute pass descriptor is nil."); - m_ctx = ctx; - - auto& query = m_query = ctx->NextQuery(); - - desc.sampleBufferAttachments[0].sampleBuffer = query.buffer; - desc.sampleBufferAttachments[0].startOfEncoderSampleIndex = query.idx+0; - desc.sampleBufferAttachments[0].endOfEncoderSampleIndex = query.idx+1; - - SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc); - } - - tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLBlitPassDescriptor* desc, const SourceLocationData* srcloc, bool is_active ) -#ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) -#else - : m_active( is_active ) -#endif - { - if ( !m_active ) return; - if (desc == nil) TracyMetalPanic({assert(false); }return, "blit pass descriptor is nil."); - m_ctx = ctx; - - auto& query = m_query = ctx->NextQuery(); - - desc.sampleBufferAttachments[0].sampleBuffer = query.buffer; - desc.sampleBufferAttachments[0].startOfEncoderSampleIndex = query.idx+0; - desc.sampleBufferAttachments[0].endOfEncoderSampleIndex = query.idx+1; - - SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc); - } - - tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLRenderPassDescriptor* desc, const SourceLocationData* srcloc, bool is_active ) -#ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) -#else - : m_active( is_active ) -#endif - { - if ( !m_active ) return; - if (desc == nil) TracyMetalPanic({assert(false);} return, "render pass descriptor is nil."); - m_ctx = ctx; - - auto& query = m_query = ctx->NextQuery(); - - desc.sampleBufferAttachments[0].sampleBuffer = query.buffer; - desc.sampleBufferAttachments[0].startOfVertexSampleIndex = query.idx+0; - desc.sampleBufferAttachments[0].endOfVertexSampleIndex = MTLCounterDontSample; - desc.sampleBufferAttachments[0].startOfFragmentSampleIndex = MTLCounterDontSample; - desc.sampleBufferAttachments[0].endOfFragmentSampleIndex = query.idx+1; - - SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc); - } - - /* TODO: implement this constructor interfarce for "command-level" profiling, if the device supports it - tracy_force_inline MetalZoneScope( MetalCtx* ctx, id cmdEncoder, const SourceLocationData* srcloc, bool is_active ) -#ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) -#else - : m_active( is_active ) -#endif - { - if( !m_active ) return; - m_ctx = ctx; - m_cmdEncoder = cmdEncoder; - - auto& query = m_query = ctx->NextQueryId(); - - [m_cmdEncoder sampleCountersInBuffer:m_ctx->m_counterSampleBuffer atSampleIndex:query.idx withBarrier:YES]; - - SubmitZoneBeginGpu(ctx, query.idx, srcloc); - } - */ - - tracy_force_inline ~MetalZoneScope() - { - if( !m_active ) return; - - SubmitZoneEndGpu(m_ctx, m_query.idx + 1); - } - - TracyMetalDebugZoneScopeWireTap; - -private: - const bool m_active; - - MetalCtx* m_ctx; - - /* TODO: declare it for "command-level" profiling - id m_cmdEncoder; - */ - - static void SubmitZoneBeginGpu(MetalCtx* ctx, uint32_t queryId, const SourceLocationData* srcloc) - { - auto* item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial ); - MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); - MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); - MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); - MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); - MemWrite( &item->gpuZoneBegin.context, ctx->GetContextId() ); - Profiler::QueueSerialFinish(); - - TracyMetalDebugMasked(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone")); - } - - static void SubmitZoneEndGpu(MetalCtx* ctx, uint32_t queryId) - { - auto* item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial ); - MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() ); - MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() ); - MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) ); - MemWrite( &item->gpuZoneEnd.context, ctx->GetContextId() ); - Profiler::QueueSerialFinish(); - - TracyMetalDebugMasked(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone")); - } - - MetalCtx::Query m_query = {}; -}; - -} - -using TracyMetalCtx = tracy::MetalCtx; - -#define TracyMetalContext(device) tracy::MetalCtx::Create(device) -#define TracyMetalDestroy(ctx) tracy::MetalCtx::Destroy(ctx) -#define TracyMetalContextName(ctx, name, size) ctx->Name(name, size) - -#define TracyMetalZone( ctx, encoderDesc, name ) TracyMetalNamedZone( ctx, ___tracy_gpu_zone, encoderDesc, name, true ) -#define TracyMetalZoneC( ctx, encoderDesc, name, color ) TracyMetalNamedZoneC( ctx, ___tracy_gpu_zone, encoderDesc, name, color, true ) -#define TracyMetalNamedZone( ctx, varname, encoderDesc, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::MetalZoneScope varname( ctx, encoderDesc, &TracyConcat(__tracy_gpu_source_location,TracyLine), active ); -#define TracyMetalNamedZoneC( ctx, varname, encoderDesc, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::MetalZoneScope varname( ctx, encoderDesc, &TracyConcat(__tracy_gpu_source_location,TracyLine), active ); - -#define TracyMetalCollect( ctx ) ctx->Collect(); - - - -#undef TracyMetalDebug_ZoneScopeWireTap -#undef TracyMetalDebug_0b00010 -#undef TracyMetalDebug_0b10000 -#undef TracyMetalDebugMasked -#undef TRACY_METAL_DEBUG_MASK -#undef TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT -#undef TracyMetalPanic -#undef TRACY_METAL_VA_ARGS - -#endif - -#endif//__TRACYMETAL_HMM__ diff --git a/src/third_party/tracy/tracy/TracyOpenCL.hpp b/src/third_party/tracy/tracy/TracyOpenCL.hpp deleted file mode 100644 index ede5c461..00000000 --- a/src/third_party/tracy/tracy/TracyOpenCL.hpp +++ /dev/null @@ -1,414 +0,0 @@ -#ifndef __TRACYOPENCL_HPP__ -#define __TRACYOPENCL_HPP__ - -#if !defined TRACY_ENABLE - -#define TracyCLContext(c, x) nullptr -#define TracyCLDestroy(c) -#define TracyCLContextName(c, x, y) - -#define TracyCLNamedZone(c, x, y, z) -#define TracyCLNamedZoneC(c, x, y, z, w) -#define TracyCLZone(c, x) -#define TracyCLZoneC(c, x, y) -#define TracyCLZoneTransient(c,x,y,z) - -#define TracyCLNamedZoneS(c, x, y, z, w) -#define TracyCLNamedZoneCS(c, x, y, z, w, v) -#define TracyCLZoneS(c, x, y) -#define TracyCLZoneCS(c, x, y, z) -#define TracyCLZoneTransientS(c,x,y,z,w) - -#define TracyCLNamedZoneSetEvent(x, e) -#define TracyCLZoneSetEvent(e) - -#define TracyCLCollect(c) - -namespace tracy -{ - class OpenCLCtxScope {}; -} - -using TracyCLCtx = void*; - -#else - -#include - -#include -#include -#include - -#include "Tracy.hpp" -#include "../client/TracyCallstack.hpp" -#include "../client/TracyProfiler.hpp" -#include "../common/TracyAlloc.hpp" - -#define TRACY_CL_TO_STRING_INDIRECT(T) #T -#define TRACY_CL_TO_STRING(T) TRACY_CL_TO_STRING_INDIRECT(T) -#define TRACY_CL_ASSERT(p) if(!(p)) { \ - TracyMessageL( "TRACY_CL_ASSERT failed on " TracyFile ":" TRACY_CL_TO_STRING(TracyLine) ); \ - assert(false && "TRACY_CL_ASSERT failed"); \ -} -#define TRACY_CL_CHECK_ERROR(err) if(err != CL_SUCCESS) { \ - std::ostringstream oss; \ - oss << "TRACY_CL_CHECK_ERROR failed on " << TracyFile << ":" << TracyLine \ - << ": error code " << err; \ - auto msg = oss.str(); \ - TracyMessage(msg.data(), msg.size()); \ - assert(false && "TRACY_CL_CHECK_ERROR failed"); \ -} - -namespace tracy { - - enum class EventPhase : uint8_t - { - Begin, - End - }; - - struct EventInfo - { - cl_event event; - EventPhase phase; - }; - - class OpenCLCtx - { - public: - enum { QueryCount = 64 * 1024 }; - - OpenCLCtx(cl_context context, cl_device_id device) - : m_contextId(GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed)) - , m_head(0) - , m_tail(0) - { - int64_t tcpu, tgpu; - TRACY_CL_ASSERT(m_contextId != 255); - - cl_int err = CL_SUCCESS; - cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err); - TRACY_CL_CHECK_ERROR(err) - uint32_t dummyValue = 42; - cl_mem dummyBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(uint32_t), nullptr, &err); - TRACY_CL_CHECK_ERROR(err) - cl_event writeBufferEvent; - TRACY_CL_CHECK_ERROR(clEnqueueWriteBuffer(queue, dummyBuffer, CL_FALSE, 0, sizeof(uint32_t), &dummyValue, 0, nullptr, &writeBufferEvent)); - TRACY_CL_CHECK_ERROR(clWaitForEvents(1, &writeBufferEvent)); - - tcpu = Profiler::GetTime(); - - cl_int eventStatus; - TRACY_CL_CHECK_ERROR(clGetEventInfo(writeBufferEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr)); - TRACY_CL_ASSERT(eventStatus == CL_COMPLETE); - TRACY_CL_CHECK_ERROR(clGetEventProfilingInfo(writeBufferEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &tgpu, nullptr)); - TRACY_CL_CHECK_ERROR(clReleaseEvent(writeBufferEvent)); - TRACY_CL_CHECK_ERROR(clReleaseMemObject(dummyBuffer)); - TRACY_CL_CHECK_ERROR(clReleaseCommandQueue(queue)); - - auto item = Profiler::QueueSerial(); - MemWrite(&item->hdr.type, QueueType::GpuNewContext); - MemWrite(&item->gpuNewContext.cpuTime, tcpu); - MemWrite(&item->gpuNewContext.gpuTime, tgpu); - memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread)); - MemWrite(&item->gpuNewContext.period, 1.0f); - MemWrite(&item->gpuNewContext.type, GpuContextType::OpenCL); - MemWrite(&item->gpuNewContext.context, (uint8_t) m_contextId); - MemWrite(&item->gpuNewContext.flags, (uint8_t)0); -#ifdef TRACY_ON_DEMAND - GetProfiler().DeferItem(*item); -#endif - Profiler::QueueSerialFinish(); - } - - void Name( const char* name, uint16_t len ) - { - auto ptr = (char*)tracy_malloc( len ); - memcpy( ptr, name, len ); - - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::GpuContextName ); - MemWrite( &item->gpuContextNameFat.context, (uint8_t)m_contextId ); - MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); - MemWrite( &item->gpuContextNameFat.size, len ); -#ifdef TRACY_ON_DEMAND - GetProfiler().DeferItem( *item ); -#endif - Profiler::QueueSerialFinish(); - } - - void Collect() - { - ZoneScopedC(Color::Red4); - - if (m_tail == m_head) return; - -#ifdef TRACY_ON_DEMAND - if (!GetProfiler().IsConnected()) - { - m_head = m_tail = 0; - } -#endif - - for (; m_tail != m_head; m_tail = (m_tail + 1) % QueryCount) - { - EventInfo eventInfo = GetQuery(m_tail); - cl_int eventStatus; - cl_int err = clGetEventInfo(eventInfo.event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr); - if (err != CL_SUCCESS) - { - std::ostringstream oss; - oss << "clGetEventInfo falied with error code " << err << ", on event " << eventInfo.event << ", skipping..."; - auto msg = oss.str(); - TracyMessage(msg.data(), msg.size()); - if (eventInfo.event == nullptr) { - TracyMessageL("A TracyCLZone must be paird with a TracyCLZoneSetEvent, check your code!"); - } - assert(false && "clGetEventInfo failed, maybe a TracyCLZone is not paired with TracyCLZoneSetEvent"); - continue; - } - if (eventStatus != CL_COMPLETE) return; - - cl_int eventInfoQuery = (eventInfo.phase == EventPhase::Begin) - ? CL_PROFILING_COMMAND_START - : CL_PROFILING_COMMAND_END; - - cl_ulong eventTimeStamp = 0; - err = clGetEventProfilingInfo(eventInfo.event, eventInfoQuery, sizeof(cl_ulong), &eventTimeStamp, nullptr); - if (err == CL_PROFILING_INFO_NOT_AVAILABLE) - { - TracyMessageL("command queue is not created with CL_QUEUE_PROFILING_ENABLE flag, check your code!"); - assert(false && "command queue is not created with CL_QUEUE_PROFILING_ENABLE flag"); - } - else - TRACY_CL_CHECK_ERROR(err); - - TRACY_CL_ASSERT(eventTimeStamp != 0); - - auto item = Profiler::QueueSerial(); - MemWrite(&item->hdr.type, QueueType::GpuTime); - MemWrite(&item->gpuTime.gpuTime, (int64_t)eventTimeStamp); - MemWrite(&item->gpuTime.queryId, (uint16_t)m_tail); - MemWrite(&item->gpuTime.context, m_contextId); - Profiler::QueueSerialFinish(); - - if (eventInfo.phase == EventPhase::End) - { - // Done with the event, so release it - TRACY_CL_CHECK_ERROR(clReleaseEvent(eventInfo.event)); - } - } - } - - tracy_force_inline uint8_t GetId() const - { - return m_contextId; - } - - tracy_force_inline unsigned int NextQueryId(EventInfo eventInfo) - { - const auto id = m_head; - m_head = (m_head + 1) % QueryCount; - TRACY_CL_ASSERT(m_head != m_tail); - m_query[id] = eventInfo; - return id; - } - - tracy_force_inline EventInfo& GetQuery(unsigned int id) - { - TRACY_CL_ASSERT(id < QueryCount); - return m_query[id]; - } - - private: - - unsigned int m_contextId; - - EventInfo m_query[QueryCount]; - unsigned int m_head; // index at which a new event should be inserted - unsigned int m_tail; // oldest event - - }; - - class OpenCLCtxScope { - public: - tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, bool is_active) -#ifdef TRACY_ON_DEMAND - : m_active(is_active&& GetProfiler().IsConnected()) -#else - : m_active(is_active) -#endif - , m_ctx(ctx) - , m_event(nullptr) - { - if (!m_active) return; - - m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin }); - - auto item = Profiler::QueueSerial(); - MemWrite(&item->hdr.type, QueueType::GpuZoneBeginSerial); - MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime()); - MemWrite(&item->gpuZoneBegin.srcloc, (uint64_t)srcLoc); - MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle()); - MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId); - MemWrite(&item->gpuZoneBegin.context, ctx->GetId()); - Profiler::QueueSerialFinish(); - } - - tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, int32_t depth, bool is_active) -#ifdef TRACY_ON_DEMAND - : m_active(is_active&& GetProfiler().IsConnected()) -#else - : m_active(is_active) -#endif - , m_ctx(ctx) - , m_event(nullptr) - { - if (!m_active) return; - - m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin }); - - GetProfiler().SendCallstack(depth); - - auto item = Profiler::QueueSerial(); - MemWrite(&item->hdr.type, QueueType::GpuZoneBeginCallstackSerial); - MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime()); - MemWrite(&item->gpuZoneBegin.srcloc, (uint64_t)srcLoc); - MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle()); - MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId); - MemWrite(&item->gpuZoneBegin.context, ctx->GetId()); - Profiler::QueueSerialFinish(); - } - - tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active) -#ifdef TRACY_ON_DEMAND - : m_active(is_active && GetProfiler().IsConnected()) -#else - : m_active(is_active) -#endif - , m_ctx(ctx) - , m_event(nullptr) - { - if (!m_active) return; - - m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin }); - - const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial ); - MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime()); - MemWrite(&item->gpuZoneBegin.srcloc, srcloc); - MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle()); - MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId); - MemWrite(&item->gpuZoneBegin.context, ctx->GetId()); - Profiler::QueueSerialFinish(); - } - - tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active) -#ifdef TRACY_ON_DEMAND - : m_active(is_active && GetProfiler().IsConnected()) -#else - : m_active(is_active) -#endif - , m_ctx(ctx) - , m_event(nullptr) - { - if (!m_active) return; - - m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin }); - - const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); - auto item = Profiler::QueueSerialCallstack( Callstack( depth ) ); - MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial); - MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime()); - MemWrite(&item->gpuZoneBegin.srcloc, srcloc); - MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle()); - MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId); - MemWrite(&item->gpuZoneBegin.context, ctx->GetId()); - Profiler::QueueSerialFinish(); - } - - tracy_force_inline void SetEvent(cl_event event) - { - if (!m_active) return; - m_event = event; - TRACY_CL_CHECK_ERROR(clRetainEvent(m_event)); - m_ctx->GetQuery(m_beginQueryId).event = m_event; - } - - tracy_force_inline ~OpenCLCtxScope() - { - if (!m_active) return; - const auto queryId = m_ctx->NextQueryId(EventInfo{ m_event, EventPhase::End }); - - auto item = Profiler::QueueSerial(); - MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial); - MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime()); - MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle()); - MemWrite(&item->gpuZoneEnd.queryId, (uint16_t)queryId); - MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId()); - Profiler::QueueSerialFinish(); - } - - const bool m_active; - OpenCLCtx* m_ctx; - cl_event m_event; - unsigned int m_beginQueryId; - }; - - static inline OpenCLCtx* CreateCLContext(cl_context context, cl_device_id device) - { - auto ctx = (OpenCLCtx*)tracy_malloc(sizeof(OpenCLCtx)); - new (ctx) OpenCLCtx(context, device); - return ctx; - } - - static inline void DestroyCLContext(OpenCLCtx* ctx) - { - ctx->~OpenCLCtx(); - tracy_free(ctx); - } - -} // namespace tracy - -using TracyCLCtx = tracy::OpenCLCtx*; - -#define TracyCLContext(ctx, device) tracy::CreateCLContext(ctx, device); -#define TracyCLDestroy(ctx) tracy::DestroyCLContext(ctx); -#define TracyCLContextName(ctx, name, size) ctx->Name(name, size); -#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define TracyCLNamedZone(ctx, varname, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active ); -# define TracyCLNamedZoneC(ctx, varname, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active ); -# define TracyCLZone(ctx, name) TracyCLNamedZoneS(ctx, __tracy_gpu_zone, name, TRACY_CALLSTACK, true) -# define TracyCLZoneC(ctx, name, color) TracyCLNamedZoneCS(ctx, __tracy_gpu_zone, name, color, TRACY_CALLSTACK, true) -# define TracyCLZoneTransient( ctx, varname, name, active ) tracy::OpenCLCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active ); -#else -# define TracyCLNamedZone(ctx, varname, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine){ name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), active); -# define TracyCLNamedZoneC(ctx, varname, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine){ name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), active); -# define TracyCLZone(ctx, name) TracyCLNamedZone(ctx, __tracy_gpu_zone, name, true) -# define TracyCLZoneC(ctx, name, color) TracyCLNamedZoneC(ctx, __tracy_gpu_zone, name, color, true ) -# define TracyCLZoneTransient( ctx, varname, name, active ) tracy::OpenCLCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), active ); -#endif - -#ifdef TRACY_HAS_CALLSTACK -# define TracyCLNamedZoneS(ctx, varname, name, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine){ name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), depth, active); -# define TracyCLNamedZoneCS(ctx, varname, name, color, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine){ name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), depth, active); -# define TracyCLZoneS(ctx, name, depth) TracyCLNamedZoneS(ctx, __tracy_gpu_zone, name, depth, true) -# define TracyCLZoneCS(ctx, name, color, depth) TracyCLNamedZoneCS(ctx, __tracy_gpu_zone, name, color, depth, true) -# define TracyCLZoneTransientS( ctx, varname, name, depth, active ) tracy::OpenCLCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active ); -#else -# define TracyCLNamedZoneS(ctx, varname, name, depth, active) TracyCLNamedZone(ctx, varname, name, active) -# define TracyCLNamedZoneCS(ctx, varname, name, color, depth, active) TracyCLNamedZoneC(ctx, varname, name, color, active) -# define TracyCLZoneS(ctx, name, depth) TracyCLZone(ctx, name) -# define TracyCLZoneCS(ctx, name, color, depth) TracyCLZoneC(ctx, name, color) -# define TracyCLZoneTransientS( ctx, varname, name, depth, active ) TracyCLZoneTransient( ctx, varname, name, active ) -#endif - -#define TracyCLNamedZoneSetEvent(varname, event) varname.SetEvent(event) -#define TracyCLZoneSetEvent(event) __tracy_gpu_zone.SetEvent(event) - -#define TracyCLCollect(ctx) ctx->Collect() - -#endif - -#endif diff --git a/src/third_party/tracy/tracy/TracyOpenGL.hpp b/src/third_party/tracy/tracy/TracyOpenGL.hpp deleted file mode 100644 index 30abd4fd..00000000 --- a/src/third_party/tracy/tracy/TracyOpenGL.hpp +++ /dev/null @@ -1,325 +0,0 @@ -#ifndef __TRACYOPENGL_HPP__ -#define __TRACYOPENGL_HPP__ - -#if !defined TRACY_ENABLE || defined __APPLE__ - -#define TracyGpuContext -#define TracyGpuContextName(x,y) -#define TracyGpuNamedZone(x,y,z) -#define TracyGpuNamedZoneC(x,y,z,w) -#define TracyGpuZone(x) -#define TracyGpuZoneC(x,y) -#define TracyGpuZoneTransient(x,y,z) -#define TracyGpuCollect - -#define TracyGpuNamedZoneS(x,y,z,w) -#define TracyGpuNamedZoneCS(x,y,z,w,a) -#define TracyGpuZoneS(x,y) -#define TracyGpuZoneCS(x,y,z) -#define TracyGpuZoneTransientS(x,y,z,w) - -namespace tracy -{ -struct SourceLocationData; -class GpuCtxScope -{ -public: - GpuCtxScope( const SourceLocationData*, bool ) {} - GpuCtxScope( const SourceLocationData*, int32_t, bool ) {} -}; -} - -#else - -#include -#include -#include - -#include "Tracy.hpp" -#include "../client/TracyProfiler.hpp" -#include "../client/TracyCallstack.hpp" -#include "../common/TracyAlign.hpp" -#include "../common/TracyAlloc.hpp" - -#if !defined GL_TIMESTAMP && defined GL_TIMESTAMP_EXT -# define GL_TIMESTAMP GL_TIMESTAMP_EXT -# define GL_QUERY_COUNTER_BITS GL_QUERY_COUNTER_BITS_EXT -# define glGetQueryObjectiv glGetQueryObjectivEXT -# define glGetQueryObjectui64v glGetQueryObjectui64vEXT -# define glQueryCounter glQueryCounterEXT -#endif - -#define TracyGpuContext tracy::GetGpuCtx().ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::GetGpuCtx().ptr) tracy::GpuCtx; -#define TracyGpuContextName( name, size ) tracy::GetGpuCtx().ptr->Name( name, size ); -#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active ); -# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active ); -# define TracyGpuZone( name ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, TRACY_CALLSTACK, true ) -# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, TRACY_CALLSTACK, true ) -# define TracyGpuZoneTransient( varname, name, active ) tracy::GpuCtxScope varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active ); -#else -# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), active ); -# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), active ); -# define TracyGpuZone( name ) TracyGpuNamedZone( ___tracy_gpu_zone, name, true ) -# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneC( ___tracy_gpu_zone, name, color, true ) -# define TracyGpuZoneTransient( varname, name, active ) tracy::GpuCtxScope varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), active ); -#endif -#define TracyGpuCollect tracy::GetGpuCtx().ptr->Collect(); - -#ifdef TRACY_HAS_CALLSTACK -# define TracyGpuNamedZoneS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), depth, active ); -# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), depth, active ); -# define TracyGpuZoneS( name, depth ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, depth, true ) -# define TracyGpuZoneCS( name, color, depth ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, depth, true ) -# define TracyGpuZoneTransientS( varname, name, depth, active ) tracy::GpuCtxScope varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active ); -#else -# define TracyGpuNamedZoneS( varname, name, depth, active ) TracyGpuNamedZone( varname, name, active ) -# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) TracyGpuNamedZoneC( varname, name, color, active ) -# define TracyGpuZoneS( name, depth ) TracyGpuZone( name ) -# define TracyGpuZoneCS( name, color, depth ) TracyGpuZoneC( name, color ) -# define TracyGpuZoneTransientS( varname, name, depth, active ) TracyGpuZoneTransient( varname, name, active ) -#endif - -namespace tracy -{ - -class GpuCtx -{ - friend class GpuCtxScope; - - enum { QueryCount = 64 * 1024 }; - -public: - GpuCtx() - : m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) ) - , m_head( 0 ) - , m_tail( 0 ) - { - assert( m_context != 255 ); - - glGenQueries( QueryCount, m_query ); - - int64_t tgpu; - glGetInteger64v( GL_TIMESTAMP, &tgpu ); - int64_t tcpu = Profiler::GetTime(); - - GLint bits; - glGetQueryiv( GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, &bits ); - - const float period = 1.f; - const auto thread = GetThreadHandle(); - TracyLfqPrepare( QueueType::GpuNewContext ); - MemWrite( &item->gpuNewContext.cpuTime, tcpu ); - MemWrite( &item->gpuNewContext.gpuTime, tgpu ); - MemWrite( &item->gpuNewContext.thread, thread ); - MemWrite( &item->gpuNewContext.period, period ); - MemWrite( &item->gpuNewContext.context, m_context ); - MemWrite( &item->gpuNewContext.flags, uint8_t( 0 ) ); - MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl ); - -#ifdef TRACY_ON_DEMAND - GetProfiler().DeferItem( *item ); -#endif - - TracyLfqCommit; - } - - void Name( const char* name, uint16_t len ) - { - auto ptr = (char*)tracy_malloc( len ); - memcpy( ptr, name, len ); - - TracyLfqPrepare( QueueType::GpuContextName ); - MemWrite( &item->gpuContextNameFat.context, m_context ); - MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); - MemWrite( &item->gpuContextNameFat.size, len ); -#ifdef TRACY_ON_DEMAND - GetProfiler().DeferItem( *item ); -#endif - TracyLfqCommit; - } - - void Collect() - { - ZoneScopedC( Color::Red4 ); - - if( m_tail == m_head ) return; - -#ifdef TRACY_ON_DEMAND - if( !GetProfiler().IsConnected() ) - { - m_head = m_tail = 0; - return; - } -#endif - - while( m_tail != m_head ) - { - GLint available; - glGetQueryObjectiv( m_query[m_tail], GL_QUERY_RESULT_AVAILABLE, &available ); - if( !available ) return; - - uint64_t time; - glGetQueryObjectui64v( m_query[m_tail], GL_QUERY_RESULT, &time ); - - TracyLfqPrepare( QueueType::GpuTime ); - MemWrite( &item->gpuTime.gpuTime, (int64_t)time ); - MemWrite( &item->gpuTime.queryId, (uint16_t)m_tail ); - MemWrite( &item->gpuTime.context, m_context ); - TracyLfqCommit; - - m_tail = ( m_tail + 1 ) % QueryCount; - } - } - -private: - tracy_force_inline unsigned int NextQueryId() - { - const auto id = m_head; - m_head = ( m_head + 1 ) % QueryCount; - assert( m_head != m_tail ); - return id; - } - - tracy_force_inline unsigned int TranslateOpenGlQueryId( unsigned int id ) - { - return m_query[id]; - } - - tracy_force_inline uint8_t GetId() const - { - return m_context; - } - - unsigned int m_query[QueryCount]; - uint8_t m_context; - - unsigned int m_head; - unsigned int m_tail; -}; - -class GpuCtxScope -{ -public: - tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, bool is_active ) -#ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) -#else - : m_active( is_active ) -#endif - { - if( !m_active ) return; - - const auto queryId = GetGpuCtx().ptr->NextQueryId(); - glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP ); - - TracyLfqPrepare( QueueType::GpuZoneBegin ); - MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); - memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) ); - MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); - MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() ); - MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); - TracyLfqCommit; - } - - tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int32_t depth, bool is_active ) -#ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) -#else - : m_active( is_active ) -#endif - { - if( !m_active ) return; - - const auto queryId = GetGpuCtx().ptr->NextQueryId(); - glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP ); - -#ifdef TRACY_FIBERS - TracyLfqPrepare( QueueType::GpuZoneBegin ); - memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) ); -#else - GetProfiler().SendCallstack( depth ); - TracyLfqPrepare( QueueType::GpuZoneBeginCallstack ); - MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); -#endif - MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); - MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); - MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() ); - MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); - TracyLfqCommit; - } - - tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active ) -#ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) -#else - : m_active( is_active ) -#endif - { - if( !m_active ) return; - - const auto queryId = GetGpuCtx().ptr->NextQueryId(); - glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP ); - - TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLoc ); - const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); - MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); - memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) ); - MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); - MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() ); - MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); - TracyLfqCommit; - } - - tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active ) -#ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) -#else - : m_active( is_active ) -#endif - { - if( !m_active ) return; - - const auto queryId = GetGpuCtx().ptr->NextQueryId(); - glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP ); - -#ifdef TRACY_FIBERS - TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLoc ); - memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) ); -#else - GetProfiler().SendCallstack( depth ); - TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLocCallstack ); - MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); -#endif - const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); - MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); - MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); - MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() ); - MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); - TracyLfqCommit; - } - - tracy_force_inline ~GpuCtxScope() - { - if( !m_active ) return; - - const auto queryId = GetGpuCtx().ptr->NextQueryId(); - glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP ); - - TracyLfqPrepare( QueueType::GpuZoneEnd ); - MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() ); - memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) ); - MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) ); - MemWrite( &item->gpuZoneEnd.context, GetGpuCtx().ptr->GetId() ); - TracyLfqCommit; - } - -private: - const bool m_active; -}; - -} - -#endif - -#endif diff --git a/src/third_party/tracy/tracy/TracyVulkan.hpp b/src/third_party/tracy/tracy/TracyVulkan.hpp deleted file mode 100644 index 9e0973d2..00000000 --- a/src/third_party/tracy/tracy/TracyVulkan.hpp +++ /dev/null @@ -1,723 +0,0 @@ -#ifndef __TRACYVULKAN_HPP__ -#define __TRACYVULKAN_HPP__ - -#if !defined TRACY_ENABLE - -#define TracyVkContext(x,y,z,w) nullptr -#define TracyVkContextCalibrated(x,y,z,w,a,b) nullptr -#if defined VK_EXT_host_query_reset -#define TracyVkContextHostCalibrated(x,y,z,w,a) nullptr -#endif -#define TracyVkDestroy(x) -#define TracyVkContextName(c,x,y) -#define TracyVkNamedZone(c,x,y,z,w) -#define TracyVkNamedZoneC(c,x,y,z,w,a) -#define TracyVkZone(c,x,y) -#define TracyVkZoneC(c,x,y,z) -#define TracyVkZoneTransient(c,x,y,z,w) -#define TracyVkCollect(c,x) - -#define TracyVkNamedZoneS(c,x,y,z,w,a) -#define TracyVkNamedZoneCS(c,x,y,z,w,v,a) -#define TracyVkZoneS(c,x,y,z) -#define TracyVkZoneCS(c,x,y,z,w) -#define TracyVkZoneTransientS(c,x,y,z,w,a) - -namespace tracy -{ -class VkCtxScope {}; -} - -using TracyVkCtx = void*; - -#else - -#if !defined VK_NULL_HANDLE -# error "You must include Vulkan headers before including TracyVulkan.hpp" -#endif - -#include -#include -#include "Tracy.hpp" -#include "../client/TracyProfiler.hpp" -#include "../client/TracyCallstack.hpp" - -#include - -namespace tracy -{ - -#if defined TRACY_VK_USE_SYMBOL_TABLE -#define LoadVkDeviceCoreSymbols(Operation) \ - Operation(vkBeginCommandBuffer) \ - Operation(vkCmdResetQueryPool) \ - Operation(vkCmdWriteTimestamp) \ - Operation(vkCreateQueryPool) \ - Operation(vkDestroyQueryPool) \ - Operation(vkEndCommandBuffer) \ - Operation(vkGetQueryPoolResults) \ - Operation(vkQueueSubmit) \ - Operation(vkQueueWaitIdle) \ - Operation(vkResetQueryPool) - -#define LoadVkDeviceExtensionSymbols(Operation) \ - Operation(vkGetCalibratedTimestampsEXT) - -#define LoadVkInstanceExtensionSymbols(Operation) \ - Operation(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT) - -#define LoadVkInstanceCoreSymbols(Operation) \ - Operation(vkGetPhysicalDeviceProperties) - -struct VkSymbolTable -{ -#define MAKE_PFN(name) PFN_##name name; - LoadVkDeviceCoreSymbols(MAKE_PFN) - LoadVkDeviceExtensionSymbols(MAKE_PFN) - LoadVkInstanceExtensionSymbols(MAKE_PFN) - LoadVkInstanceCoreSymbols(MAKE_PFN) -#undef MAKE_PFN -}; - -#define VK_FUNCTION_WRAPPER(callSignature) m_symbols.callSignature -#define CONTEXT_VK_FUNCTION_WRAPPER(callSignature) m_ctx->m_symbols.callSignature -#else -#define VK_FUNCTION_WRAPPER(callSignature) callSignature -#define CONTEXT_VK_FUNCTION_WRAPPER(callSignature) callSignature -#endif - -class VkCtx -{ - friend class VkCtxScope; - - enum { QueryCount = 64 * 1024 }; - -public: -#if defined TRACY_VK_USE_SYMBOL_TABLE - VkCtx( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr, bool calibrated ) -#else - VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT vkGetCalibratedTimestampsEXT) -#endif - : m_device( device ) - , m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT ) - , m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) ) - , m_head( 0 ) - , m_tail( 0 ) - , m_oldCnt( 0 ) - , m_queryCount( QueryCount ) -#if !defined TRACY_VK_USE_SYMBOL_TABLE - , m_vkGetCalibratedTimestampsEXT( vkGetCalibratedTimestampsEXT ) -#endif - { - assert( m_context != 255 ); - -#if defined TRACY_VK_USE_SYMBOL_TABLE - PopulateSymbolTable(instance, instanceProcAddr, deviceProcAddr); - if ( calibrated ) - { - m_vkGetCalibratedTimestampsEXT = m_symbols.vkGetCalibratedTimestampsEXT; - } - -#endif - - if( VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) && m_vkGetCalibratedTimestampsEXT ) - { - FindAvailableTimeDomains( physdev, VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) ); - } - - CreateQueryPool(); - - VkCommandBufferBeginInfo beginInfo = {}; - beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - - VkSubmitInfo submitInfo = {}; - submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submitInfo.commandBufferCount = 1; - submitInfo.pCommandBuffers = &cmdbuf; - - VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) ); - VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) ); - VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) ); - VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) ); - VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) ); - - int64_t tcpu, tgpu; - if( m_timeDomain == VK_TIME_DOMAIN_DEVICE_EXT ) - { - VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) ); - VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 ) ); - VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) ); - VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) ); - VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) ); - - tcpu = Profiler::GetTime(); - VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT ) ); - - VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) ); - VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 ) ); - VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) ); - VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) ); - VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) ); - } - else - { - FindCalibratedTimestampDeviation(); - Calibrate( device, m_prevCalibration, tgpu ); - tcpu = Profiler::GetTime(); - } - - WriteInitialItem( physdev, tcpu, tgpu ); - - m_res = (int64_t*)tracy_malloc( sizeof( int64_t ) * m_queryCount ); - } - -#if defined VK_EXT_host_query_reset - /** - * This alternative constructor does not use command buffers and instead uses functionality from - * VK_EXT_host_query_reset (core with 1.2 and non-optional) and VK_EXT_calibrated_timestamps. This requires - * the physical device to have another time domain apart from DEVICE to be calibrateable. - */ -#if defined TRACY_VK_USE_SYMBOL_TABLE - VkCtx( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr ) -#else - VkCtx( VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT vkResetQueryPool, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT vkGetCalibratedTimestampsEXT ) -#endif - : m_device( device ) - , m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT ) - , m_context( GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed) ) - , m_head( 0 ) - , m_tail( 0 ) - , m_oldCnt( 0 ) - , m_queryCount( QueryCount ) -#if !defined TRACY_VK_USE_SYMBOL_TABLE - , m_vkGetCalibratedTimestampsEXT( vkGetCalibratedTimestampsEXT ) -#endif - { - assert( m_context != 255); - -#if defined TRACY_VK_USE_SYMBOL_TABLE - PopulateSymbolTable(instance, instanceProcAddr, deviceProcAddr); - m_vkGetCalibratedTimestampsEXT = m_symbols.vkGetCalibratedTimestampsEXT; -#endif - - assert( VK_FUNCTION_WRAPPER( vkResetQueryPool ) != nullptr ); - assert( VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) != nullptr ); - assert( VK_FUNCTION_WRAPPER( vkGetCalibratedTimestampsEXT ) != nullptr ); - - FindAvailableTimeDomains( physdev, VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) ); - - // We require a host time domain to be available to properly calibrate. - FindCalibratedTimestampDeviation(); - int64_t tgpu; - Calibrate( device, m_prevCalibration, tgpu ); - int64_t tcpu = Profiler::GetTime(); - - CreateQueryPool(); - VK_FUNCTION_WRAPPER( vkResetQueryPool( device, m_query, 0, m_queryCount ) ); - - WriteInitialItem( physdev, tcpu, tgpu ); - - // We need the buffer to be twice as large for availability values - size_t resSize = sizeof( int64_t ) * m_queryCount * 2; - m_res = (int64_t*)tracy_malloc( resSize ); - } -#endif - - ~VkCtx() - { - tracy_free( m_res ); - VK_FUNCTION_WRAPPER( vkDestroyQueryPool( m_device, m_query, nullptr ) ); - } - - void Name( const char* name, uint16_t len ) - { - auto ptr = (char*)tracy_malloc( len ); - memcpy( ptr, name, len ); - - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::GpuContextName ); - MemWrite( &item->gpuContextNameFat.context, m_context ); - MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); - MemWrite( &item->gpuContextNameFat.size, len ); -#ifdef TRACY_ON_DEMAND - GetProfiler().DeferItem( *item ); -#endif - Profiler::QueueSerialFinish(); - } - - void Collect( VkCommandBuffer cmdbuf ) - { - ZoneScopedC( Color::Red4 ); - - const uint64_t head = m_head.load(std::memory_order_relaxed); - if( m_tail == head ) return; - -#ifdef TRACY_ON_DEMAND - if( !GetProfiler().IsConnected() ) - { - VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) ); - m_tail = head; - m_oldCnt = 0; - int64_t tgpu; - if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu ); - return; - } -#endif - assert( head > m_tail ); - - const unsigned int wrappedTail = (unsigned int)( m_tail % m_queryCount ); - - unsigned int cnt; - if( m_oldCnt != 0 ) - { - cnt = m_oldCnt; - m_oldCnt = 0; - } - else - { - cnt = (unsigned int)( head - m_tail ); - assert( cnt <= m_queryCount ); - if( wrappedTail + cnt > m_queryCount ) - { - cnt = m_queryCount - wrappedTail; - } - } - - - VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( m_device, m_query, wrappedTail, cnt, sizeof( int64_t ) * m_queryCount * 2, m_res, sizeof( int64_t ) * 2, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT ) ); - - for( unsigned int idx=0; idxhdr.type, QueueType::GpuTime ); - MemWrite( &item->gpuTime.gpuTime, m_res[idx * 2] ); - MemWrite( &item->gpuTime.queryId, uint16_t( wrappedTail + idx ) ); - MemWrite( &item->gpuTime.context, m_context ); - Profiler::QueueSerialFinish(); - } - - if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) - { - int64_t tgpu, tcpu; - Calibrate( m_device, tcpu, tgpu ); - const auto refCpu = Profiler::GetTime(); - const auto delta = tcpu - m_prevCalibration; - if( delta > 0 ) - { - m_prevCalibration = tcpu; - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::GpuCalibration ); - MemWrite( &item->gpuCalibration.gpuTime, tgpu ); - MemWrite( &item->gpuCalibration.cpuTime, refCpu ); - MemWrite( &item->gpuCalibration.cpuDelta, delta ); - MemWrite( &item->gpuCalibration.context, m_context ); - Profiler::QueueSerialFinish(); - } - } - - VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, wrappedTail, cnt ) ); - - m_tail += cnt; - } - - tracy_force_inline unsigned int NextQueryId() - { - const uint64_t id = m_head.fetch_add(1, std::memory_order_relaxed); - return id % m_queryCount; - } - - tracy_force_inline uint8_t GetId() const - { - return m_context; - } - - tracy_force_inline VkQueryPool GetQueryPool() const - { - return m_query; - } - -private: - tracy_force_inline void Calibrate( VkDevice device, int64_t& tCpu, int64_t& tGpu ) - { - assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ); - VkCalibratedTimestampInfoEXT spec[2] = { - { VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT }, - { VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain }, - }; - uint64_t ts[2]; - uint64_t deviation; - do - { - m_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, &deviation ); - } - while( deviation > m_deviation ); - -#if defined _WIN32 - tGpu = ts[0]; - tCpu = ts[1] * m_qpcToNs; -#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW - tGpu = ts[0]; - tCpu = ts[1]; -#else - assert( false ); -#endif - } - - tracy_force_inline void CreateQueryPool() - { - VkQueryPoolCreateInfo poolInfo = {}; - poolInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; - poolInfo.queryCount = m_queryCount; - poolInfo.queryType = VK_QUERY_TYPE_TIMESTAMP; - while ( VK_FUNCTION_WRAPPER( vkCreateQueryPool( m_device, &poolInfo, nullptr, &m_query ) != VK_SUCCESS ) ) - { - m_queryCount /= 2; - poolInfo.queryCount = m_queryCount; - } - } - - tracy_force_inline void FindAvailableTimeDomains( VkPhysicalDevice physicalDevice, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) - { - uint32_t num; - _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physicalDevice, &num, nullptr ); - if(num > 4) num = 4; - VkTimeDomainEXT data[4]; - _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physicalDevice, &num, data ); - VkTimeDomainEXT supportedDomain = (VkTimeDomainEXT)-1; -#if defined _WIN32 - supportedDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT; -#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW - supportedDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT; -#endif - for( uint32_t i=0; i deviation[i] ) { - minDeviation = deviation[i]; - } - } - m_deviation = minDeviation * 3 / 2; - -#if defined _WIN32 - m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() ); -#endif - } - - tracy_force_inline void WriteInitialItem( VkPhysicalDevice physdev, int64_t tcpu, int64_t tgpu ) - { - uint8_t flags = 0; - if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration; - - VkPhysicalDeviceProperties prop; - VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceProperties( physdev, &prop ) ); - const float period = prop.limits.timestampPeriod; - - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::GpuNewContext ); - MemWrite( &item->gpuNewContext.cpuTime, tcpu ); - MemWrite( &item->gpuNewContext.gpuTime, tgpu ); - memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) ); - MemWrite( &item->gpuNewContext.period, period ); - MemWrite( &item->gpuNewContext.context, m_context ); - MemWrite( &item->gpuNewContext.flags, flags ); - MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan ); - -#ifdef TRACY_ON_DEMAND - GetProfiler().DeferItem( *item ); -#endif - Profiler::QueueSerialFinish(); - } - -#if defined TRACY_VK_USE_SYMBOL_TABLE - void PopulateSymbolTable( VkInstance instance, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr ) - { -#define VK_GET_DEVICE_SYMBOL( name ) \ - (PFN_##name)deviceProcAddr( m_device, #name ); -#define VK_LOAD_DEVICE_SYMBOL( name ) \ - m_symbols.name = VK_GET_DEVICE_SYMBOL( name ); -#define VK_GET_INSTANCE_SYMBOL( name ) \ - (PFN_##name)instanceProcAddr( instance, #name ); -#define VK_LOAD_INSTANCE_SYMBOL( name ) \ - m_symbols.name = VK_GET_INSTANCE_SYMBOL( name ); - - LoadVkDeviceCoreSymbols( VK_LOAD_DEVICE_SYMBOL ) - LoadVkDeviceExtensionSymbols( VK_LOAD_DEVICE_SYMBOL ) - LoadVkInstanceExtensionSymbols( VK_LOAD_INSTANCE_SYMBOL ) - LoadVkInstanceCoreSymbols( VK_LOAD_INSTANCE_SYMBOL ) -#undef VK_GET_DEVICE_SYMBOL -#undef VK_LOAD_DEVICE_SYMBOL -#undef VK_GET_INSTANCE_SYMBOL -#undef VK_LOAD_INSTANCE_SYMBOL - } -#endif - - VkDevice m_device; - VkQueryPool m_query; - VkTimeDomainEXT m_timeDomain; -#if defined TRACY_VK_USE_SYMBOL_TABLE - VkSymbolTable m_symbols; -#endif - uint64_t m_deviation; -#ifdef _WIN32 - int64_t m_qpcToNs; -#endif - int64_t m_prevCalibration; - uint8_t m_context; - - std::atomic m_head; - uint64_t m_tail; - unsigned int m_oldCnt; - unsigned int m_queryCount; - - int64_t* m_res; - - PFN_vkGetCalibratedTimestampsEXT m_vkGetCalibratedTimestampsEXT; -}; - -class VkCtxScope -{ -public: - tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, bool is_active ) -#ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) -#else - : m_active( is_active ) -#endif - { - if( !m_active ) return; - m_cmdbuf = cmdbuf; - m_ctx = ctx; - - const auto queryId = ctx->NextQueryId(); - CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); - - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial ); - MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); - MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); - MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); - MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); - MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); - Profiler::QueueSerialFinish(); - } - - tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int32_t depth, bool is_active ) -#ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) -#else - : m_active( is_active ) -#endif - { - if( !m_active ) return; - m_cmdbuf = cmdbuf; - m_ctx = ctx; - - const auto queryId = ctx->NextQueryId(); - CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); - - auto item = Profiler::QueueSerialCallstack( Callstack( depth ) ); - MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial ); - MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); - MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); - MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); - MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); - MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); - Profiler::QueueSerialFinish(); - } - - tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, bool is_active ) -#ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) -#else - : m_active( is_active ) -#endif - { - if( !m_active ) return; - m_cmdbuf = cmdbuf; - m_ctx = ctx; - - const auto queryId = ctx->NextQueryId(); - CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); - - const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial ); - MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); - MemWrite( &item->gpuZoneBegin.srcloc, srcloc ); - MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); - MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); - MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); - Profiler::QueueSerialFinish(); - } - - tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, int32_t depth, bool is_active ) -#ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) -#else - : m_active( is_active ) -#endif - { - if( !m_active ) return; - m_cmdbuf = cmdbuf; - m_ctx = ctx; - - const auto queryId = ctx->NextQueryId(); - CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); - - const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); - auto item = Profiler::QueueSerialCallstack( Callstack( depth ) ); - MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial ); - MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); - MemWrite( &item->gpuZoneBegin.srcloc, srcloc ); - MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); - MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); - MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); - Profiler::QueueSerialFinish(); - } - - tracy_force_inline ~VkCtxScope() - { - if( !m_active ) return; - - const auto queryId = m_ctx->NextQueryId(); - CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_ctx->m_query, queryId ) ); - - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial ); - MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() ); - MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() ); - MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) ); - MemWrite( &item->gpuZoneEnd.context, m_ctx->GetId() ); - Profiler::QueueSerialFinish(); - } - -private: - const bool m_active; - - VkCommandBuffer m_cmdbuf; - VkCtx* m_ctx; -}; - -#if defined TRACY_VK_USE_SYMBOL_TABLE -static inline VkCtx* CreateVkContext( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr, bool calibrated = false ) -#else -static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct ) -#endif -{ - auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) ); -#if defined TRACY_VK_USE_SYMBOL_TABLE - new(ctx) VkCtx( instance, physdev, device, queue, cmdbuf, instanceProcAddr, getDeviceProcAddr, calibrated ); -#else - new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct ); -#endif - return ctx; -} - -#if defined VK_EXT_host_query_reset -#if defined TRACY_VK_USE_SYMBOL_TABLE -static inline VkCtx* CreateVkContext( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr ) -#else -static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT qpreset, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct ) -#endif -{ - auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) ); -#if defined TRACY_VK_USE_SYMBOL_TABLE - new(ctx) VkCtx( instance, physdev, device, instanceProcAddr, getDeviceProcAddr ); -#else - new(ctx) VkCtx( physdev, device, qpreset, gpdctd, gct ); -#endif - return ctx; -} -#endif - -static inline void DestroyVkContext( VkCtx* ctx ) -{ - ctx->~VkCtx(); - tracy_free( ctx ); -} - -} - -using TracyVkCtx = tracy::VkCtx*; - -#if defined TRACY_VK_USE_SYMBOL_TABLE -#define TracyVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ); -#else -#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, nullptr, nullptr ); -#endif -#if defined TRACY_VK_USE_SYMBOL_TABLE -#define TracyVkContextCalibrated( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr, true ); -#else -#define TracyVkContextCalibrated( physdev, device, queue, cmdbuf, gpdctd, gct ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, gpdctd, gct ); -#endif -#if defined VK_EXT_host_query_reset -#if defined TRACY_VK_USE_SYMBOL_TABLE -#define TracyVkContextHostCalibrated( instance, physdev, device, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, instanceProcAddr, deviceProcAddr ); -#else -#define TracyVkContextHostCalibrated( physdev, device, qpreset, gpdctd, gct ) tracy::CreateVkContext( physdev, device, qpreset, gpdctd, gct ); -#endif -#endif -#define TracyVkDestroy( ctx ) tracy::DestroyVkContext( ctx ); -#define TracyVkContextName( ctx, name, size ) ctx->Name( name, size ); -#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, TRACY_CALLSTACK, active ); -# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, TRACY_CALLSTACK, active ); -# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, TRACY_CALLSTACK, true ) -# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, TRACY_CALLSTACK, true ) -# define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) TracyVkZoneTransientS( ctx, varname, cmdbuf, name, TRACY_CALLSTACK, active ) -#else -# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, active ); -# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, active ); -# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZone( ctx, ___tracy_gpu_zone, cmdbuf, name, true ) -# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneC( ctx, ___tracy_gpu_zone, cmdbuf, name, color, true ) -# define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) tracy::VkCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), cmdbuf, active ); -#endif -#define TracyVkCollect( ctx, cmdbuf ) ctx->Collect( cmdbuf ); - -#ifdef TRACY_HAS_CALLSTACK -# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, depth, active ); -# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, depth, active ); -# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, depth, true ) -# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, depth, true ) -# define TracyVkZoneTransientS( ctx, varname, cmdbuf, name, depth, active ) tracy::VkCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), cmdbuf, depth, active ); -#else -# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) -# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) -# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkZone( ctx, cmdbuf, name ) -# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkZoneC( ctx, cmdbuf, name, color ) -# define TracyVkZoneTransientS( ctx, varname, cmdbuf, name, depth, active ) TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) -#endif - -#endif - -#endif