IsLanguageC/G -> IsCpu/Gpu. CacheLineSize -> IsolationSize.

This commit is contained in:
jacob 2026-02-05 18:59:05 -06:00
parent 0c9e8ae79a
commit e7ade79ea5
17 changed files with 111 additions and 110 deletions

View File

@ -51,17 +51,21 @@
#error Unknown compiler
#endif
//- Language
//- Device
#if defined(__HLSL_VERSION)
#define IsLanguageC 0
#define IsLanguageG 1
#define IsCpu 0
#define IsGpu 1
#else
#define IsLanguageC 1
#define IsLanguageG 0
#define IsCpu 1
#define IsGpu 0
#endif
//- Platform system
#if defined(_WIN32)
//- Platform
#if IsGpu
#define IsPlatformWindows 0
#define IsPlatformMac 0
#define IsPlatformLinux 0
#elif defined(_WIN32)
#define IsPlatformWindows 1
#define IsPlatformMac 0
#define IsPlatformLinux 0
@ -73,31 +77,27 @@
#define IsPlatformWindows 0
#define IsPlatformMac 0
#define IsPlatformLinux 1
#elif IsLanguageG
#define IsPlatformWindows 0
#define IsPlatformMac 0
#define IsPlatformLinux 0
#else
#error Unknown platform
#endif
//- Architecture
#if defined(_M_AMD64) || defined(__amd64__)
#if IsGpu
#define IsArchX64 0
#define IsArchArm64 0
#elif defined(_M_AMD64) || defined(__amd64__)
#define IsArchX64 1
#define IsArchArm64 0
#elif defined(_M_ARM64) || defined(__aarch64__)
#define IsArchX64 0
#define IsArchArm64 1
#elif IsLanguageG
#define IsArchX64 0
#define IsArchArm64 0
#else
#error Unknown architecture
#endif
//- Cache line size
// TODO: Just hard-code to something like 128 or 256 if Apple silicon is ever supported
#define CachelineSize 64
//- False-sharing prevention
// TODO: Eventually hard-code to something like 128 if Apple silicon is ever supported
#define IsolationSize 64
//- Windows NTDDI version
// TODO: Remove this
@ -114,7 +114,7 @@
////////////////////////////////////////////////////////////
//~ C headers
#if IsLanguageC
#if IsCpu
// C standard library
#include <stdint.h>
#include <stdarg.h>
@ -176,7 +176,7 @@
//~ Common utility macros
//- Zero initialization
#if IsLanguageC
#if IsCpu
#define Zi {0}
#else
#define Zi {}
@ -417,7 +417,7 @@
#define Union(name) typedef union name name; union name
//- Enum
#if IsLanguageC
#if IsCpu
#define Enum(name) typedef enum name name; enum name
#else
#define Enum(name) enum name
@ -427,7 +427,7 @@
#define Embed(type, name) union { type; type name; }
//- alignof
#if IsLanguageC && (IsCompilerMsvc || __STDC_VERSION__ < 202311L)
#if IsCpu && (IsCompilerMsvc || __STDC_VERSION__ < 202311L)
#define alignof(type) __alignof(type)
#endif
@ -435,7 +435,7 @@
#define sizeof_field(type, field) sizeof(((type *)0)->field)
//- countof
#if IsLanguageC
#if IsCpu
#define countof(a) (sizeof(a) / sizeof((a)[0]))
#endif
@ -454,12 +454,12 @@
#define Packed(s) __pragma(pack(push, 1)) s __pragma(pack(pop))
#elif IsCompilerClang
#define Packed(s) s __attribute((__packed__))
#elif IsLanguageG
#elif IsGpu
#define Packed(s) s
#endif
//- alignas
#if (IsCompilerMsvc && IsLanguageC) || (IsLanguageC && __STDC_VERSION__ < 202311L)
#if (IsCompilerMsvc && IsCpu) || (IsCpu && __STDC_VERSION__ < 202311L)
#if IsCompilerMsvc
#define alignas(n) __declspec(align(n))
#else
@ -470,7 +470,7 @@
////////////////////////////////////////////////////////////
//~ Scalar types
#if IsLanguageC
#if IsCpu
typedef int8_t i8;
typedef int16_t i16;
typedef int32_t i32;
@ -483,7 +483,7 @@
typedef double f64;
typedef i8 b8;
typedef u32 b32;
#elif IsLanguageG
#elif IsGpu
typedef int i32;
typedef int64_t i64;
typedef uint u32;
@ -509,10 +509,10 @@
#define I64Min ((i64)0x8000000000000000LL)
//- Float infinity / nan
#if IsLanguageC
#if IsCpu
#define Inf INFINITY
#define Nan NAN
#elif IsLanguageG
#elif IsGpu
#define Inf (1.0 / 0.0)
#define Nan (0.0 / 0.0)
#endif
@ -527,7 +527,7 @@ Inline b32 MatchU128(u128 a, u128 b) { return a.lo == b.lo && a.hi == b.hi; }
////////////////////////////////////////////////////////////
//~ Atomics
#if IsLanguageC
#if IsCpu
//- Atomic types
Struct(Atomic8) { volatile i8 _v; };
Struct(Atomic16) { volatile i16 _v; };
@ -535,14 +535,10 @@ Inline b32 MatchU128(u128 a, u128 b) { return a.lo == b.lo && a.hi == b.hi; }
Struct(Atomic64) { volatile i64 _v; };
//- Cache-line isolated aligned atomic types
AlignedStruct(Atomic8Padded, CachelineSize) { Atomic8 v; };
AlignedStruct(Atomic16Padded, CachelineSize) { Atomic16 v; };
AlignedStruct(Atomic32Padded, CachelineSize) { Atomic32 v; };
AlignedStruct(Atomic64Padded, CachelineSize) { Atomic64 v; };
StaticAssert(alignof(Atomic8Padded) == CachelineSize && sizeof(Atomic8Padded) % CachelineSize == 0);
StaticAssert(alignof(Atomic16Padded) == CachelineSize && sizeof(Atomic16Padded) % CachelineSize == 0);
StaticAssert(alignof(Atomic32Padded) == CachelineSize && sizeof(Atomic32Padded) % CachelineSize == 0);
StaticAssert(alignof(Atomic64Padded) == CachelineSize && sizeof(Atomic64Padded) % CachelineSize == 0);
AlignedStruct(IsolatedAtomic8, IsolationSize) { Atomic8 v; };
AlignedStruct(IsolatedAtomic16, IsolationSize) { Atomic16 v; };
AlignedStruct(IsolatedAtomic32, IsolationSize) { Atomic32 v; };
AlignedStruct(IsolatedAtomic64, IsolationSize) { Atomic64 v; };
#if IsPlatformWindows && IsArchX64
//- 8 bit atomic ops
@ -579,11 +575,11 @@ Inline b32 MatchU128(u128 a, u128 b) { return a.lo == b.lo && a.hi == b.hi; }
////////////////////////////////////////////////////////////
//~ Ticket mutex
#if IsLanguageC
#if IsCpu
Struct(TicketMutex)
{
Atomic64Padded ticket;
Atomic64Padded serving;
IsolatedAtomic64 ticket;
IsolatedAtomic64 serving;
};
ForceInline void LockTicketMutex(TicketMutex *tm)
@ -605,7 +601,7 @@ Inline b32 MatchU128(u128 a, u128 b) { return a.lo == b.lo && a.hi == b.hi; }
////////////////////////////////////////////////////////////
//~ String types
#if IsLanguageC
#if IsCpu
#define STRING(size, data) ((String) { (size), (data) })
#define Zstr ((String) { 0, 0})
#define Lit(cstr_lit) (String) { (sizeof((cstr_lit)) - 1), (u8 *)(cstr_lit) }
@ -663,7 +659,7 @@ Inline b32 MatchU128(u128 a, u128 b) { return a.lo == b.lo && a.hi == b.hi; }
////////////////////////////////////////////////////////////
//~ Arena types
#if IsLanguageC
#if IsCpu
Struct(Arena)
{
u64 pos;
@ -681,7 +677,7 @@ Inline b32 MatchU128(u128 a, u128 b) { return a.lo == b.lo && a.hi == b.hi; }
////////////////////////////////////////////////////////////
//~ Resource types
#if IsLanguageC
#if IsCpu
#define ResourceEmbeddedMagic 0xfc060937194f4406
Struct(ResourceStore)
@ -698,7 +694,7 @@ Inline b32 MatchU128(u128 a, u128 b) { return a.lo == b.lo && a.hi == b.hi; }
////////////////////////////////////////////////////////////
//~ Cpu topology types
#if IsLanguageC
#if IsCpu
Struct(CpuTopologyInfo)
{
i32 num_logical_cores; // Includes P cores, Non-P cores, SMT siblings
@ -711,7 +707,7 @@ Inline b32 MatchU128(u128 a, u128 b) { return a.lo == b.lo && a.hi == b.hi; }
////////////////////////////////////////////////////////////
//~ Debug types
#if IsLanguageC
#if IsCpu
Struct(Callstack)
{
u64 count;
@ -722,11 +718,11 @@ Inline b32 MatchU128(u128 a, u128 b) { return a.lo == b.lo && a.hi == b.hi; }
////////////////////////////////////////////////////////////
//~ Shader linkage types
#if IsLanguageC
#if IsCpu
Struct(VertexShader) { ResourceKey resource; };
Struct(PixelShader) { ResourceKey resource; };
Struct(ComputeShader) { ResourceKey resource; };
#elif IsLanguageG
#elif IsGpu
#define Semantic(t, n) t n : n
#define ComputeShader(name, x) [numthreads(x, 1, 1)] void name(Semantic(u32, SV_DispatchThreadID))
#define ComputeShader2D(name, x, y) [numthreads(x, y, 1)] void name(Semantic(Vec2U32, SV_DispatchThreadID))
@ -738,7 +734,7 @@ Inline b32 MatchU128(u128 a, u128 b) { return a.lo == b.lo && a.hi == b.hi; }
////////////////////////////////////////////////////////////
//~ Exit callback types
#if IsLanguageC
#if IsCpu
#define ExitFuncDef(name) void name(void)
typedef ExitFuncDef(ExitFunc);
#endif
@ -765,7 +761,7 @@ Inline u64 MixU64s(u64 seed_a, u64 seed_b)
////////////////////////////////////////////////////////////
//~ @hookdecl Core api
#if IsLanguageC
#if IsCpu
StringList GetRawCommandline(void);
String GetAppDirectory(void);
void Echo(String msg);
@ -781,7 +777,7 @@ Inline u64 MixU64s(u64 seed_a, u64 seed_b)
////////////////////////////////////////////////////////////
//~ @hookdecl Swap
#if IsLanguageC
#if IsCpu
b32 IsSwappedIn(void);
b32 IsSwappingOut(void);
@ -792,7 +788,7 @@ Inline u64 MixU64s(u64 seed_a, u64 seed_b)
////////////////////////////////////////////////////////////
//~ @hookdecl Exit
#if IsLanguageC
#if IsCpu
void OnExit(ExitFunc *func);
void SignalExit(i32 code);
void ExitNow(i32 code);
@ -801,7 +797,7 @@ Inline u64 MixU64s(u64 seed_a, u64 seed_b)
////////////////////////////////////////////////////////////
//~ @hookdecl Bootstrap layers
#if IsLanguageC
#if IsCpu
void BootstrapLayers(void);
#endif

View File

@ -36,7 +36,7 @@ Struct(AsyncCtx)
AsyncTickCallbackNode *last_callback_node;
AsyncWorkerCtx worker;
Atomic64Padded signal;
IsolatedAtomic64 signal;
};
////////////////////////////////////////////////////////////

View File

@ -5,13 +5,13 @@
Struct(GstatCtx)
{
Atomic64Padded SockBytesSent;
Atomic64Padded SockBytesReceived;
Atomic64Padded DebugSteps;
IsolatedAtomic64 SockBytesSent;
IsolatedAtomic64 SockBytesReceived;
IsolatedAtomic64 DebugSteps;
Atomic64Padded NumArenas;
Atomic64Padded ArenaMemoryCommitted;
Atomic64Padded ArenaMemoryReserved;
IsolatedAtomic64 NumArenas;
IsolatedAtomic64 ArenaMemoryCommitted;
IsolatedAtomic64 ArenaMemoryReserved;
};

View File

@ -4,7 +4,7 @@
//- Api
#include "base.cgh"
#if IsLanguageC
#if IsCpu
#include "base_memory.h"
#include "base_arena.h"
#include "base_futex.h"
@ -28,12 +28,12 @@
#include "base_crum.h"
#include "base_tweak.h"
#include "base_state.h"
#elif IsLanguageG
#else
#include "base_shader.gh"
#endif
//- Impl
#if IsLanguageC
#if IsCpu
#include "base_tweak.c"
#include "base_arena.c"
#include "base_sync.c"
@ -51,10 +51,9 @@
#include "base_crum.c"
#include "base_async.c"
#include "base_state.c"
#else
#endif
//- Include base_win32
#if IsLanguageC && IsPlatformWindows
#if IsCpu && IsPlatformWindows
#include "base_win32/base_win32_inc.h"
#endif

View File

@ -54,9 +54,10 @@ Lock ExclusiveLockEx(Mutex *m, i32 spin)
}
}
#if IsRtcEnabled
Atomic32Set(&m->exclusive_thread_id, ThreadId());
#endif
if (IsRtcEnabled)
{
Atomic32Set(&m->exclusive_thread_id, ThreadId());
}
Lock lock = Zi;
lock.exclusive = 1;
@ -119,9 +120,10 @@ void Unlock(Lock *l)
Mutex *m = l->mutex;
if (l->exclusive)
{
#if IsRtcEnabled
Atomic32Set(&m->exclusive_thread_id, 0);
#endif
if (IsRtcEnabled)
{
Atomic32Set(&m->exclusive_thread_id, 0);
}
Atomic32Set(&m->v, 0);
}
else

View File

@ -3,18 +3,14 @@
#define DefaultMutexSpin 4000
AlignedStruct(Mutex, CachelineSize)
AlignedStruct(Mutex, IsolationSize)
{
// Bit 31 = Exclusive lock is held
// Bit 30 = Exclusive lock is pending
// Bit 0-30 = Shared locks count
Atomic32 v;
#if IsRtcEnabled
Atomic32 exclusive_thread_id;
#endif
};
StaticAssert(alignof(Mutex) == CachelineSize && sizeof(Mutex) % CachelineSize == 0);
Struct(Lock)
{
@ -25,18 +21,17 @@ Struct(Lock)
////////////////////////////////////////////////////////////
//~ Condition variable types
AlignedStruct(Cv, CachelineSize)
AlignedStruct(Cv, IsolationSize)
{
Atomic64 wake_gen;
};
StaticAssert(alignof(Cv) == CachelineSize && sizeof(Cv) % CachelineSize == 0);
////////////////////////////////////////////////////////////
//~ Fence types
Struct(Fence)
{
Atomic64Padded v;
IsolatedAtomic64 v;
};
////////////////////////////////////////////////////////////
@ -47,7 +42,7 @@ Struct(LazyInitBarrier)
// 0 = untouched
// 1 = initializing
// 2 = initialized
Atomic32Padded v;
IsolatedAtomic32 v;
};
////////////////////////////////////////////////////////////

View File

@ -27,7 +27,7 @@ String TweakEx(Arena *arena, TweakVar desc, b32 update_existing)
}
if (!e)
{
PushAlign(perm, CachelineSize);
PushAlign(perm, IsolationSize);
e = PushStruct(perm, TweakVarEntry);
e->hash = hash;
{
@ -43,7 +43,7 @@ String TweakEx(Arena *arena, TweakVar desc, b32 update_existing)
v->value = store_value;
v->initial = store_initial;
}
PushAlign(perm, CachelineSize);
PushAlign(perm, IsolationSize);
SllQueuePushN(bin->first, bin->last, e, next_in_bin);
SllQueuePushN(Base.tweak.first_entry, Base.tweak.last_entry, e, next_in_list);
Base.tweak.entries_count += 1;

View File

@ -4,23 +4,23 @@
#define MaxThreads 256
#define DefaultWaveLaneSpinCount 500
AlignedStruct(WaveCtx, CachelineSize)
AlignedStruct(WaveCtx, IsolationSize)
{
i32 lanes_count;
void *udata;
// Sync barrier
Atomic64Padded sync_gen;
Atomic32Padded sync_count;
IsolatedAtomic64 sync_gen;
IsolatedAtomic32 sync_count;
// Broadcast barrier
void *broadcast_data;
Atomic64Padded broadcast_gen;
Atomic64Padded ack_gen;
Atomic32Padded ack_count;
IsolatedAtomic64 broadcast_gen;
IsolatedAtomic64 ack_gen;
IsolatedAtomic32 ack_count;
};
AlignedStruct(WaveLaneCtx, CachelineSize)
AlignedStruct(WaveLaneCtx, IsolationSize)
{
i32 idx;
WaveCtx *wave;

View File

@ -556,14 +556,14 @@ G_D12_Pipeline *G_D12_PipelineFromDesc(G_D12_PipelineDesc desc)
if (!pipeline)
{
Arena *perm = PermArena();
PushAlign(perm, CachelineSize);
PushAlign(perm, IsolationSize);
{
pipeline = PushStruct(perm, G_D12_Pipeline);
pipeline->desc = desc;
pipeline->hash = hash;
is_pipeline_new = 1;
}
PushAlign(perm, CachelineSize);
PushAlign(perm, IsolationSize);
SllStackPushN(bin->first, pipeline, next_in_bin);
}
Unlock(&lock);
@ -817,9 +817,9 @@ G_D12_RawCommandList *G_D12_PrepareRawCommandList(G_QueueKind queue_kind)
{
Arena *perm = PermArena();
{
PushAlign(perm, CachelineSize);
PushAlign(perm, IsolationSize);
cl = PushStruct(perm, G_D12_RawCommandList);
PushAlign(perm, CachelineSize);
PushAlign(perm, IsolationSize);
}
cl->queue = queue;
@ -925,9 +925,9 @@ G_ArenaHandle G_AcquireArena(void)
G_D12_Arena *gpu_arena = 0;
{
Arena *perm = PermArena();
PushAlign(perm, CachelineSize);
PushAlign(perm, IsolationSize);
gpu_arena = PushStruct(perm, G_D12_Arena);
PushAlign(perm, CachelineSize);
PushAlign(perm, IsolationSize);
}
gpu_arena->arena = AcquireArena(Gibi(1));

View File

@ -438,7 +438,7 @@ Struct(G_D12_AsyncCtx)
Struct(G_D12_Ctx)
{
Atomic64Padded resource_creation_gen;
IsolatedAtomic64 resource_creation_gen;
// Stats
Atomic64 arenas_count;

View File

@ -42,7 +42,7 @@ Struct(G_SamplerStateRef) { u32 v; };
#define G_NumReservedConstants (4) // Constants reserved for internal usage by the GPU layer
#define G_NumConstants (G_NumGeneralPurposeConstants + G_NumReservedConstants)
#if IsLanguageC
#if IsCpu
#define G_ForceDeclConstant(type, name, slot) \
Enum(name##__shaderconstantenum) { name = slot }; \
Struct(name##__shaderconstanttype) { type v; }
@ -50,7 +50,7 @@ Struct(G_SamplerStateRef) { u32 v; };
StaticAssert(sizeof(type) <= 4); \
StaticAssert(slot < G_NumGeneralPurposeConstants); \
G_ForceDeclConstant(type, name, slot)
#elif IsLanguageG
#else
#define G_ForceDeclConstant(type, name, slot) cbuffer name : register(b##slot) { type name; }
#define G_DeclConstant(type, name, slot) G_ForceDeclConstant(type, name, slot)
#endif
@ -66,7 +66,7 @@ G_ForceDeclConstant(G_RWByteAddressBufferRef, G_ShaderConst_PrintBufferRef, 8)
G_ForceDeclConstant(b32, G_ShaderConst_TweakB32, 9);
G_ForceDeclConstant(f32, G_ShaderConst_TweakF32, 10);
#if IsLanguageG
#if IsGpu
#define G_TweakBool G_ShaderConst_TweakB32
#define G_TweakFloat G_ShaderConst_TweakF32
#endif
@ -74,7 +74,7 @@ G_ForceDeclConstant(f32, G_ShaderConst_TweakF32, 10
////////////////////////////////////////////////////////////
//~ Resource dereference
#if IsLanguageG
#if IsGpu
// TODO: Non-uniform resource access currently is assumed as the default
// behavior. We may want to add explicit "uniform" variants for
// optimization on AMD hardware in the future.
@ -98,7 +98,7 @@ G_ForceDeclConstant(f32, G_ShaderConst_TweakF32, 10
////////////////////////////////////////////////////////////
//~ Resource countof
#if IsLanguageG
#if IsGpu
template<typename T> u32 countof(StructuredBuffer<T> buff) { u32 result; buff.GetDimensions(result); return result; }
template<typename T> u32 countof(RWStructuredBuffer<T> buff) { u32 result; buff.GetDimensions(result); return result; }
u32 countof(ByteAddressBuffer buff) { u32 result; buff.GetDimensions(result); return result; }
@ -152,7 +152,7 @@ Struct(G_FmtArg)
Vec4U32 v;
};
#if IsLanguageG && GPU_SHADER_PRINT
#if IsGpu && GPU_SHADER_PRINT
G_FmtArg G_Fmt(u32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Uint; result.v.x = v; return result; }
G_FmtArg G_Fmt(Vec2U32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Uint2; result.v.xy = v.xy; return result; }
G_FmtArg G_Fmt(Vec3U32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Uint3; result.v.xyz = v.xyz; return result; }

View File

@ -261,11 +261,11 @@ NET_PipeHandle NET_AcquirePipe(void)
pipe->peer_bins_count = Kibi(1);
pipe->peer_bins = PushStructs(perm, NET_W32_PeerBin, pipe->peer_bins_count);
{
PushAlign(perm, CachelineSize);
PushAlign(perm, IsolationSize);
i32 buff_len = Kibi(2);
pipe->recv_wsabuff.buf = PushStructsNoZero(perm, char, buff_len);
pipe->recv_wsabuff.len = buff_len;
PushAlign(perm, CachelineSize);
PushAlign(perm, IsolationSize);
}
}
// Insert pipe

View File

@ -24,7 +24,7 @@ Struct(PLT_W32_Ctx)
//- Timer
Fence timer_fence;
Atomic64Padded average_timer_period_ns;
IsolatedAtomic64 average_timer_period_ns;
};
extern PLT_W32_Ctx PLT_W32;

View File

@ -9,7 +9,7 @@ i32 P_TileIdxFromTilePos(Vec2 p)
return result;
}
#if IsLanguageC
#if IsCpu
String P_TileNameFromKind(P_TileKind kind)
{
PERSIST Readonly String tile_names[P_TileKind_COUNT] = {
@ -29,7 +29,7 @@ i32 P_TileIdxFromTilePos(Vec2 p)
////////////////////////////////////////////////////////////
//~ Prefab helpers
#if IsLanguageC
#if IsCpu
String P_PrefabNameFromKind(P_PrefabKind kind)
{
PERSIST Readonly String prefab_names[P_PrefabKind_COUNT] = {

View File

@ -53,13 +53,13 @@ Enum(P_PrefabKind)
i32 P_TileIdxFromTilePos(Vec2 p);
#if IsLanguageC
#if IsCpu
String P_TileNameFromKind(P_TileKind kind);
#endif
////////////////////////////////////////////////////////////
//~ Prefab helpers
#if IsLanguageC
#if IsCpu
String P_PrefabNameFromKind(P_PrefabKind kind);
#endif

View File

@ -39,6 +39,15 @@ ComputeShader2D(V_PrepareCellsCS, 8, 8)
RWTexture2D<Vec4> cells = G_Dereference<Vec4>(frame.cells);
RWTexture2D<f32> drynesses = G_Dereference<f32>(frame.drynesses);
if (all(SV_DispatchThreadID == 0))
{
G_PrintF(
"IsGpu: %F, IsCompilerClang: %F",
G_Fmt((i32)IsGpu),
G_Fmt((i32)IsCompilerClang)
);
}
Vec2 cells_pos = SV_DispatchThreadID + 0.5;
if (all(cells_pos < countof(cells)))
{

View File

@ -205,7 +205,7 @@ Struct(V_Particle)
Vec4 color;
};
#if IsLanguageC
#if IsCpu
Struct(V_EmitterNode)
{
V_EmitterNode *next;