wave-enabled meta build

This commit is contained in:
jacob 2025-12-06 04:27:19 -06:00
parent 9099c8981e
commit a8d79cee4c
6 changed files with 629 additions and 1274 deletions

View File

@ -693,6 +693,19 @@
};
#endif
////////////////////////////////////////////////////////////
//~ Cpu topology types
#if IsLanguageC
Struct(CpuTopologyInfo)
{
i32 num_logical_cores; /* Includes P cores, Non-P cores, SMT siblings */
i32 num_physical_cores; /* Includes P Cores, Non-P Cores */
i32 num_physical_performance_cores; /* Includes P Cores */
i32 num_physical_non_performance_cores; /* Includes Non-P cores */
};
#endif
////////////////////////////////////////////////////////////
//~ C <-> Shader interop types
@ -750,9 +763,10 @@ Struct(SamplerStateHandle) { u32 v; };
#endif
////////////////////////////////////////////////////////////
//~ @hookdecl Api hooks
//~ @hookdecl Core api hooks
#if IsLanguageC
//- Core hooks
StringList GetRawCommandline(void);
void Echo(String msg);
@ -765,6 +779,7 @@ Struct(SamplerStateHandle) { u32 v; };
void SignalExit(i32 code);
void ExitNow(i32 code);
//- Meta hooks
void StartupLayers(void);
#endif

View File

@ -1,7 +1,56 @@
////////////////////////////////////////////////////////////
//~ Wave sync ops
void WaveSyncBroadcast_(WaveLaneCtx *lane_ctx, i32 broadcast_lane_idx, void *broadcast_ptr, u64 broadcast_size)
void WaveSyncEx(WaveLaneCtx *lane, u64 spin_count)
{
/* FIXME: Impl */
WaveCtx *wave = lane->wave;
i32 lanes_count = wave->lanes_count;
if (lanes_count > 0)
{
i64 barrier_gen = Atomic64Fetch(&wave->barrier_gen.v);
i32 blocked_count = Atomic32FetchAdd(&wave->barrier_blocked_count, 1) + 1;
if (blocked_count == lanes_count)
{
Atomic32Set(&wave->barrier_blocked_count, 0);
Atomic64FetchAdd(&wave->barrier_gen.v, barrier_gen + 1);
FutexWakeNeq(&wave->barrier_gen.v);
}
else
{
u64 remaining_spins = spin_count;
while (Atomic64Fetch(&wave->barrier_gen.v) == barrier_gen)
{
if (remaining_spins > 0)
{
--remaining_spins;
_mm_pause();
}
else
{
FutexYieldNeq(&wave->barrier_gen.v, &barrier_gen, sizeof(barrier_gen));
}
}
}
}
}
void WaveSyncBroadcastEx_(WaveLaneCtx *lane, u32 broadcast_lane_idx, void *broadcast_ptr, u64 broadcast_size, u64 spin_count)
{
WaveCtx *wave = lane->wave;
u32 lane_idx = lane->idx;
if (lane_idx == broadcast_lane_idx)
{
wave->barrier_broadcast_data = broadcast_ptr;
}
WaveSyncEx(lane, spin_count);
if (lane_idx != broadcast_lane_idx)
{
CopyBytes(broadcast_ptr, wave->barrier_broadcast_data, broadcast_size);
}
WaveSyncEx(lane, spin_count);
}
void SetWaveLaneDefaultSpin(WaveLaneCtx *lane, u64 n)
{
lane->default_spin_count = n;
}

View File

@ -1,15 +1,23 @@
////////////////////////////////////////////////////////////
//~ Wave types
#define DefaultWaveLaneSpinCount 500
Struct(WaveCtx)
{
u32 lanes_count;
i32 lanes_count;
/* Barrier */
void *barrier_broadcast_data;
Atomic32 barrier_blocked_count;
Atomic64Padded barrier_gen;
};
Struct(WaveLaneCtx)
{
u32 idx;
i32 idx;
WaveCtx *wave;
u64 default_spin_count;
};
typedef void WaveLaneEntryFunc(WaveLaneCtx *lane, void *udata);
@ -17,13 +25,22 @@ typedef void WaveLaneEntryFunc(WaveLaneCtx *lane, void *udata);
////////////////////////////////////////////////////////////
//~ Wave sync ops
#define WaveSyncBroadcast(lane_ctx, broadcast_lane_idx, broadcast_ptr) WaveSyncBroadcast_((lane_ctx), (broadcast_lane_idx), (broadcast_ptr), sizeof(*(broadcast_ptr)))
void WaveSyncBroadcast_(WaveLaneCtx *lane_ctx, i32 broadcast_lane_idx, void *broadcast_ptr, u64 broadcast_size);
void WaveSyncEx(WaveLaneCtx *lane, u64 spin_count);
#define WaveSync(lane) \
WaveSyncEx((lane), (lane)->default_spin_count)
void WaveSyncBroadcastEx_(WaveLaneCtx *lane, u32 broadcast_lane_idx, void *broadcast_ptr, u64 broadcast_size, u64 spin_count);
#define WaveSyncBroadcastEx(lane, broadcast_lane_idx, broadcast_ptr, spin_count) \
WaveSyncBroadcastEx_((lane), (broadcast_lane_idx), (broadcast_ptr), sizeof(*(broadcast_ptr)), (spin_count))
#define WaveSyncBroadcast(lane, broadcast_lane_idx, broadcast_ptr) \
WaveSyncBroadcastEx_((lane), (broadcast_lane_idx), (broadcast_ptr), sizeof(*(broadcast_ptr)), (lane)->default_spin_count)
void SetWaveLaneDefaultSpin(WaveLaneCtx *lane, u64 n);
////////////////////////////////////////////////////////////
//~ @hookdecl Dispatch
void DispatchWave(u32 num_lanes, WaveLaneEntryFunc *entry, void *udata);
void DispatchWave(String name, u32 num_lanes, WaveLaneEntryFunc *entry, void *udata);
////////////////////////////////////////////////////////////
//~ @hookdecl Thread

View File

@ -36,7 +36,7 @@ BOOL W32_FindEmbeddedRcData(HMODULE module, LPCWSTR type, LPWSTR wstr_entry_name
}
////////////////////////////////////////////////////////////
//~ @hookimpl Core hooks
//~ @hookimpl Core api hooks
StringList GetRawCommandline(void)
{
@ -109,6 +109,67 @@ void TrueRand(String buffer)
BCryptGenRandom(BCRYPT_RNG_ALG_HANDLE, (u8 *)buffer.text, buffer.len, 0);
}
CpuTopologyInfo GetCpuTopologyInfo(void)
{
TempArena scratch = BeginScratchNoConflict();
CpuTopologyInfo res = ZI;
{
DWORD infos_buff_size = 0;
u8 *infos_buff = 0;
b32 ok = 0;
{
GetLogicalProcessorInformationEx(RelationProcessorCore, 0, &infos_buff_size);
infos_buff = PushStructsNoZero(scratch.arena, u8, infos_buff_size);
ok = GetLogicalProcessorInformationEx(RelationProcessorCore, (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)infos_buff, &infos_buff_size);
}
if (ok)
{
/* Determine max efficiency class */
i32 max_efficiency_class = 0;
{
DWORD pos = 0;
while (pos < infos_buff_size)
{
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)&infos_buff[pos];
max_efficiency_class = MaxI32(max_efficiency_class, info->Processor.EfficiencyClass);
pos += info->Size;
}
}
/* Generate physical core info */
{
DWORD pos = 0;
while (pos < infos_buff_size)
{
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)&infos_buff[pos];
++res.num_physical_cores;
++res.num_logical_cores;
if (info->Processor.Flags & LTP_PC_SMT)
{
/* Core has SMT sibling */
++res.num_logical_cores;
}
if (info->Processor.EfficiencyClass == max_efficiency_class)
{
/* Core is P-core */
++res.num_physical_performance_cores;
}
else
{
/* Core is not a P-core */
++res.num_physical_non_performance_cores;
}
pos += info->Size;
}
}
}
}
res.num_logical_cores = MaxI32(res.num_logical_cores, 1);
res.num_physical_cores = MaxI32(res.num_physical_cores, 1);
res.num_physical_performance_cores = MaxI32(res.num_physical_performance_cores, 1);
EndScratch(scratch);
return res;
}
////////////////////////////////////////////////////////////
//~ @hookimpl Swap hooks
@ -226,7 +287,7 @@ i32 W32_Main(void)
GetSystemInfo(&g->info);
/* Init main thread */
W32_InitCurrentThread(Lit("Main thread"));
W32_InitCurrentThread(Lit("Main"));
/* Get raw args from command line */
{
@ -252,6 +313,7 @@ i32 W32_Main(void)
/* Init log system */
/* FIXME: Remove hardcoded log path */
InitLogSystem(Lit("log.log"));
LogInfoF("Main thread ID: %F", FmtUint(ThreadId()));
/* Init resources */
{

View File

@ -16,8 +16,8 @@ void W32_InitCurrentThread(String name)
Arena *perm = PermArena();
/* Fixme: Set thread name */
// SetThreadDescription(GetCurrentThread(), thread_name_wstr);
//LogInfoF("New thread \"%F\" created with ID %F", FmtString(StringFromCstrNoLimit(t->thread_name_cstr)), FmtUint(ThreadId()));
wchar_t *thread_name_wstr = WstrFromString(perm, name);
SetThreadDescription(GetCurrentThread(), thread_name_wstr);
/* Initialize COM */
CoInitializeEx(0, COINIT_MULTITHREADED);
@ -27,6 +27,7 @@ DWORD WINAPI W32_ThreadProc(LPVOID thread_args_vp)
{
W32_ThreadArgs *thread_args = (W32_ThreadArgs *)thread_args_vp;
W32_InitCurrentThread(thread_args->name);
LogInfoF("New thread \"%F\" created with ID %F", FmtString(thread_args->name), FmtUint(ThreadId()));
thread_args->entry(thread_args->lane, thread_args->udata);
return 0;
}
@ -34,7 +35,7 @@ DWORD WINAPI W32_ThreadProc(LPVOID thread_args_vp)
////////////////////////////////////////////////////////////
//~ @hookimpl Dispatch
void DispatchWave(u32 num_lanes, WaveLaneEntryFunc *entry, void *udata)
void DispatchWave(String name, u32 num_lanes, WaveLaneEntryFunc *entry, void *udata)
{
/* FIXME: Impl */
@ -56,11 +57,23 @@ void DispatchWave(u32 num_lanes, WaveLaneEntryFunc *entry, void *udata)
lane_ctx->idx = lane_idx;
lane_ctx->wave = wave_ctx;
lane_ctx->default_spin_count = DefaultWaveLaneSpinCount;
String thread_name = ZI;
if (num_lanes > 1)
{
thread_name = StringF(perm, "%F:%F", FmtString(name), FmtUint(lane_idx));
}
else
{
thread_name = PushString(perm, name);
}
W32_ThreadArgs *thread_args = PushStruct(perm, W32_ThreadArgs);
thread_args->lane = lane_ctx;
thread_args->udata = udata;
thread_args->entry = entry;
thread_args->name = thread_name;
HANDLE handle = CreateThread(0, Mebi(4), W32_ThreadProc, thread_args, 0, 0);
if (!handle)

File diff suppressed because it is too large Load Diff