wave-enabled meta build

This commit is contained in:
jacob 2025-12-06 04:27:19 -06:00
parent 9099c8981e
commit a8d79cee4c
6 changed files with 629 additions and 1274 deletions

View File

@ -693,6 +693,19 @@
}; };
#endif #endif
////////////////////////////////////////////////////////////
//~ Cpu topology types
#if IsLanguageC
Struct(CpuTopologyInfo)
{
i32 num_logical_cores; /* Includes P cores, Non-P cores, SMT siblings */
i32 num_physical_cores; /* Includes P Cores, Non-P Cores */
i32 num_physical_performance_cores; /* Includes P Cores */
i32 num_physical_non_performance_cores; /* Includes Non-P cores */
};
#endif
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ C <-> Shader interop types //~ C <-> Shader interop types
@ -750,9 +763,10 @@ Struct(SamplerStateHandle) { u32 v; };
#endif #endif
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookdecl Api hooks //~ @hookdecl Core api hooks
#if IsLanguageC #if IsLanguageC
//- Core hooks //- Core hooks
StringList GetRawCommandline(void); StringList GetRawCommandline(void);
void Echo(String msg); void Echo(String msg);
@ -765,6 +779,7 @@ Struct(SamplerStateHandle) { u32 v; };
void SignalExit(i32 code); void SignalExit(i32 code);
void ExitNow(i32 code); void ExitNow(i32 code);
//- Meta hooks //- Meta hooks
void StartupLayers(void); void StartupLayers(void);
#endif #endif

View File

@ -1,7 +1,56 @@
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Wave sync ops //~ Wave sync ops
void WaveSyncBroadcast_(WaveLaneCtx *lane_ctx, i32 broadcast_lane_idx, void *broadcast_ptr, u64 broadcast_size) void WaveSyncEx(WaveLaneCtx *lane, u64 spin_count)
{ {
/* FIXME: Impl */ WaveCtx *wave = lane->wave;
i32 lanes_count = wave->lanes_count;
if (lanes_count > 0)
{
i64 barrier_gen = Atomic64Fetch(&wave->barrier_gen.v);
i32 blocked_count = Atomic32FetchAdd(&wave->barrier_blocked_count, 1) + 1;
if (blocked_count == lanes_count)
{
Atomic32Set(&wave->barrier_blocked_count, 0);
Atomic64FetchAdd(&wave->barrier_gen.v, barrier_gen + 1);
FutexWakeNeq(&wave->barrier_gen.v);
}
else
{
u64 remaining_spins = spin_count;
while (Atomic64Fetch(&wave->barrier_gen.v) == barrier_gen)
{
if (remaining_spins > 0)
{
--remaining_spins;
_mm_pause();
}
else
{
FutexYieldNeq(&wave->barrier_gen.v, &barrier_gen, sizeof(barrier_gen));
}
}
}
}
}
void WaveSyncBroadcastEx_(WaveLaneCtx *lane, u32 broadcast_lane_idx, void *broadcast_ptr, u64 broadcast_size, u64 spin_count)
{
WaveCtx *wave = lane->wave;
u32 lane_idx = lane->idx;
if (lane_idx == broadcast_lane_idx)
{
wave->barrier_broadcast_data = broadcast_ptr;
}
WaveSyncEx(lane, spin_count);
if (lane_idx != broadcast_lane_idx)
{
CopyBytes(broadcast_ptr, wave->barrier_broadcast_data, broadcast_size);
}
WaveSyncEx(lane, spin_count);
}
void SetWaveLaneDefaultSpin(WaveLaneCtx *lane, u64 n)
{
lane->default_spin_count = n;
} }

View File

@ -1,15 +1,23 @@
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Wave types //~ Wave types
#define DefaultWaveLaneSpinCount 500
Struct(WaveCtx) Struct(WaveCtx)
{ {
u32 lanes_count; i32 lanes_count;
/* Barrier */
void *barrier_broadcast_data;
Atomic32 barrier_blocked_count;
Atomic64Padded barrier_gen;
}; };
Struct(WaveLaneCtx) Struct(WaveLaneCtx)
{ {
u32 idx; i32 idx;
WaveCtx *wave; WaveCtx *wave;
u64 default_spin_count;
}; };
typedef void WaveLaneEntryFunc(WaveLaneCtx *lane, void *udata); typedef void WaveLaneEntryFunc(WaveLaneCtx *lane, void *udata);
@ -17,13 +25,22 @@ typedef void WaveLaneEntryFunc(WaveLaneCtx *lane, void *udata);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Wave sync ops //~ Wave sync ops
#define WaveSyncBroadcast(lane_ctx, broadcast_lane_idx, broadcast_ptr) WaveSyncBroadcast_((lane_ctx), (broadcast_lane_idx), (broadcast_ptr), sizeof(*(broadcast_ptr))) void WaveSyncEx(WaveLaneCtx *lane, u64 spin_count);
void WaveSyncBroadcast_(WaveLaneCtx *lane_ctx, i32 broadcast_lane_idx, void *broadcast_ptr, u64 broadcast_size); #define WaveSync(lane) \
WaveSyncEx((lane), (lane)->default_spin_count)
void WaveSyncBroadcastEx_(WaveLaneCtx *lane, u32 broadcast_lane_idx, void *broadcast_ptr, u64 broadcast_size, u64 spin_count);
#define WaveSyncBroadcastEx(lane, broadcast_lane_idx, broadcast_ptr, spin_count) \
WaveSyncBroadcastEx_((lane), (broadcast_lane_idx), (broadcast_ptr), sizeof(*(broadcast_ptr)), (spin_count))
#define WaveSyncBroadcast(lane, broadcast_lane_idx, broadcast_ptr) \
WaveSyncBroadcastEx_((lane), (broadcast_lane_idx), (broadcast_ptr), sizeof(*(broadcast_ptr)), (lane)->default_spin_count)
void SetWaveLaneDefaultSpin(WaveLaneCtx *lane, u64 n);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookdecl Dispatch //~ @hookdecl Dispatch
void DispatchWave(u32 num_lanes, WaveLaneEntryFunc *entry, void *udata); void DispatchWave(String name, u32 num_lanes, WaveLaneEntryFunc *entry, void *udata);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookdecl Thread //~ @hookdecl Thread

View File

@ -36,7 +36,7 @@ BOOL W32_FindEmbeddedRcData(HMODULE module, LPCWSTR type, LPWSTR wstr_entry_name
} }
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookimpl Core hooks //~ @hookimpl Core api hooks
StringList GetRawCommandline(void) StringList GetRawCommandline(void)
{ {
@ -109,6 +109,67 @@ void TrueRand(String buffer)
BCryptGenRandom(BCRYPT_RNG_ALG_HANDLE, (u8 *)buffer.text, buffer.len, 0); BCryptGenRandom(BCRYPT_RNG_ALG_HANDLE, (u8 *)buffer.text, buffer.len, 0);
} }
CpuTopologyInfo GetCpuTopologyInfo(void)
{
TempArena scratch = BeginScratchNoConflict();
CpuTopologyInfo res = ZI;
{
DWORD infos_buff_size = 0;
u8 *infos_buff = 0;
b32 ok = 0;
{
GetLogicalProcessorInformationEx(RelationProcessorCore, 0, &infos_buff_size);
infos_buff = PushStructsNoZero(scratch.arena, u8, infos_buff_size);
ok = GetLogicalProcessorInformationEx(RelationProcessorCore, (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)infos_buff, &infos_buff_size);
}
if (ok)
{
/* Determine max efficiency class */
i32 max_efficiency_class = 0;
{
DWORD pos = 0;
while (pos < infos_buff_size)
{
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)&infos_buff[pos];
max_efficiency_class = MaxI32(max_efficiency_class, info->Processor.EfficiencyClass);
pos += info->Size;
}
}
/* Generate physical core info */
{
DWORD pos = 0;
while (pos < infos_buff_size)
{
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)&infos_buff[pos];
++res.num_physical_cores;
++res.num_logical_cores;
if (info->Processor.Flags & LTP_PC_SMT)
{
/* Core has SMT sibling */
++res.num_logical_cores;
}
if (info->Processor.EfficiencyClass == max_efficiency_class)
{
/* Core is P-core */
++res.num_physical_performance_cores;
}
else
{
/* Core is not a P-core */
++res.num_physical_non_performance_cores;
}
pos += info->Size;
}
}
}
}
res.num_logical_cores = MaxI32(res.num_logical_cores, 1);
res.num_physical_cores = MaxI32(res.num_physical_cores, 1);
res.num_physical_performance_cores = MaxI32(res.num_physical_performance_cores, 1);
EndScratch(scratch);
return res;
}
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookimpl Swap hooks //~ @hookimpl Swap hooks
@ -226,7 +287,7 @@ i32 W32_Main(void)
GetSystemInfo(&g->info); GetSystemInfo(&g->info);
/* Init main thread */ /* Init main thread */
W32_InitCurrentThread(Lit("Main thread")); W32_InitCurrentThread(Lit("Main"));
/* Get raw args from command line */ /* Get raw args from command line */
{ {
@ -252,6 +313,7 @@ i32 W32_Main(void)
/* Init log system */ /* Init log system */
/* FIXME: Remove hardcoded log path */ /* FIXME: Remove hardcoded log path */
InitLogSystem(Lit("log.log")); InitLogSystem(Lit("log.log"));
LogInfoF("Main thread ID: %F", FmtUint(ThreadId()));
/* Init resources */ /* Init resources */
{ {

View File

@ -16,8 +16,8 @@ void W32_InitCurrentThread(String name)
Arena *perm = PermArena(); Arena *perm = PermArena();
/* Fixme: Set thread name */ /* Fixme: Set thread name */
// SetThreadDescription(GetCurrentThread(), thread_name_wstr); wchar_t *thread_name_wstr = WstrFromString(perm, name);
//LogInfoF("New thread \"%F\" created with ID %F", FmtString(StringFromCstrNoLimit(t->thread_name_cstr)), FmtUint(ThreadId())); SetThreadDescription(GetCurrentThread(), thread_name_wstr);
/* Initialize COM */ /* Initialize COM */
CoInitializeEx(0, COINIT_MULTITHREADED); CoInitializeEx(0, COINIT_MULTITHREADED);
@ -27,6 +27,7 @@ DWORD WINAPI W32_ThreadProc(LPVOID thread_args_vp)
{ {
W32_ThreadArgs *thread_args = (W32_ThreadArgs *)thread_args_vp; W32_ThreadArgs *thread_args = (W32_ThreadArgs *)thread_args_vp;
W32_InitCurrentThread(thread_args->name); W32_InitCurrentThread(thread_args->name);
LogInfoF("New thread \"%F\" created with ID %F", FmtString(thread_args->name), FmtUint(ThreadId()));
thread_args->entry(thread_args->lane, thread_args->udata); thread_args->entry(thread_args->lane, thread_args->udata);
return 0; return 0;
} }
@ -34,7 +35,7 @@ DWORD WINAPI W32_ThreadProc(LPVOID thread_args_vp)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookimpl Dispatch //~ @hookimpl Dispatch
void DispatchWave(u32 num_lanes, WaveLaneEntryFunc *entry, void *udata) void DispatchWave(String name, u32 num_lanes, WaveLaneEntryFunc *entry, void *udata)
{ {
/* FIXME: Impl */ /* FIXME: Impl */
@ -56,11 +57,23 @@ void DispatchWave(u32 num_lanes, WaveLaneEntryFunc *entry, void *udata)
lane_ctx->idx = lane_idx; lane_ctx->idx = lane_idx;
lane_ctx->wave = wave_ctx; lane_ctx->wave = wave_ctx;
lane_ctx->default_spin_count = DefaultWaveLaneSpinCount;
String thread_name = ZI;
if (num_lanes > 1)
{
thread_name = StringF(perm, "%F:%F", FmtString(name), FmtUint(lane_idx));
}
else
{
thread_name = PushString(perm, name);
}
W32_ThreadArgs *thread_args = PushStruct(perm, W32_ThreadArgs); W32_ThreadArgs *thread_args = PushStruct(perm, W32_ThreadArgs);
thread_args->lane = lane_ctx; thread_args->lane = lane_ctx;
thread_args->udata = udata; thread_args->udata = udata;
thread_args->entry = entry; thread_args->entry = entry;
thread_args->name = thread_name;
HANDLE handle = CreateThread(0, Mebi(4), W32_ThreadProc, thread_args, 0, 0); HANDLE handle = CreateThread(0, Mebi(4), W32_ThreadProc, thread_args, 0, 0);
if (!handle) if (!handle)

File diff suppressed because it is too large Load Diff