wave-enabled meta build
This commit is contained in:
parent
9099c8981e
commit
a8d79cee4c
@ -693,6 +693,19 @@
|
||||
};
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Cpu topology types
|
||||
|
||||
#if IsLanguageC
|
||||
Struct(CpuTopologyInfo)
|
||||
{
|
||||
i32 num_logical_cores; /* Includes P cores, Non-P cores, SMT siblings */
|
||||
i32 num_physical_cores; /* Includes P Cores, Non-P Cores */
|
||||
i32 num_physical_performance_cores; /* Includes P Cores */
|
||||
i32 num_physical_non_performance_cores; /* Includes Non-P cores */
|
||||
};
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ C <-> Shader interop types
|
||||
|
||||
@ -750,9 +763,10 @@ Struct(SamplerStateHandle) { u32 v; };
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookdecl Api hooks
|
||||
//~ @hookdecl Core api hooks
|
||||
|
||||
#if IsLanguageC
|
||||
|
||||
//- Core hooks
|
||||
StringList GetRawCommandline(void);
|
||||
void Echo(String msg);
|
||||
@ -765,6 +779,7 @@ Struct(SamplerStateHandle) { u32 v; };
|
||||
void SignalExit(i32 code);
|
||||
void ExitNow(i32 code);
|
||||
|
||||
|
||||
//- Meta hooks
|
||||
void StartupLayers(void);
|
||||
#endif
|
||||
|
||||
@ -1,7 +1,56 @@
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Wave sync ops
|
||||
|
||||
void WaveSyncBroadcast_(WaveLaneCtx *lane_ctx, i32 broadcast_lane_idx, void *broadcast_ptr, u64 broadcast_size)
|
||||
void WaveSyncEx(WaveLaneCtx *lane, u64 spin_count)
|
||||
{
|
||||
/* FIXME: Impl */
|
||||
WaveCtx *wave = lane->wave;
|
||||
i32 lanes_count = wave->lanes_count;
|
||||
if (lanes_count > 0)
|
||||
{
|
||||
i64 barrier_gen = Atomic64Fetch(&wave->barrier_gen.v);
|
||||
i32 blocked_count = Atomic32FetchAdd(&wave->barrier_blocked_count, 1) + 1;
|
||||
if (blocked_count == lanes_count)
|
||||
{
|
||||
Atomic32Set(&wave->barrier_blocked_count, 0);
|
||||
Atomic64FetchAdd(&wave->barrier_gen.v, barrier_gen + 1);
|
||||
FutexWakeNeq(&wave->barrier_gen.v);
|
||||
}
|
||||
else
|
||||
{
|
||||
u64 remaining_spins = spin_count;
|
||||
while (Atomic64Fetch(&wave->barrier_gen.v) == barrier_gen)
|
||||
{
|
||||
if (remaining_spins > 0)
|
||||
{
|
||||
--remaining_spins;
|
||||
_mm_pause();
|
||||
}
|
||||
else
|
||||
{
|
||||
FutexYieldNeq(&wave->barrier_gen.v, &barrier_gen, sizeof(barrier_gen));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void WaveSyncBroadcastEx_(WaveLaneCtx *lane, u32 broadcast_lane_idx, void *broadcast_ptr, u64 broadcast_size, u64 spin_count)
|
||||
{
|
||||
WaveCtx *wave = lane->wave;
|
||||
u32 lane_idx = lane->idx;
|
||||
if (lane_idx == broadcast_lane_idx)
|
||||
{
|
||||
wave->barrier_broadcast_data = broadcast_ptr;
|
||||
}
|
||||
WaveSyncEx(lane, spin_count);
|
||||
if (lane_idx != broadcast_lane_idx)
|
||||
{
|
||||
CopyBytes(broadcast_ptr, wave->barrier_broadcast_data, broadcast_size);
|
||||
}
|
||||
WaveSyncEx(lane, spin_count);
|
||||
}
|
||||
|
||||
void SetWaveLaneDefaultSpin(WaveLaneCtx *lane, u64 n)
|
||||
{
|
||||
lane->default_spin_count = n;
|
||||
}
|
||||
|
||||
@ -1,15 +1,23 @@
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Wave types
|
||||
|
||||
#define DefaultWaveLaneSpinCount 500
|
||||
|
||||
Struct(WaveCtx)
|
||||
{
|
||||
u32 lanes_count;
|
||||
i32 lanes_count;
|
||||
|
||||
/* Barrier */
|
||||
void *barrier_broadcast_data;
|
||||
Atomic32 barrier_blocked_count;
|
||||
Atomic64Padded barrier_gen;
|
||||
};
|
||||
|
||||
Struct(WaveLaneCtx)
|
||||
{
|
||||
u32 idx;
|
||||
i32 idx;
|
||||
WaveCtx *wave;
|
||||
u64 default_spin_count;
|
||||
};
|
||||
|
||||
typedef void WaveLaneEntryFunc(WaveLaneCtx *lane, void *udata);
|
||||
@ -17,13 +25,22 @@ typedef void WaveLaneEntryFunc(WaveLaneCtx *lane, void *udata);
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Wave sync ops
|
||||
|
||||
#define WaveSyncBroadcast(lane_ctx, broadcast_lane_idx, broadcast_ptr) WaveSyncBroadcast_((lane_ctx), (broadcast_lane_idx), (broadcast_ptr), sizeof(*(broadcast_ptr)))
|
||||
void WaveSyncBroadcast_(WaveLaneCtx *lane_ctx, i32 broadcast_lane_idx, void *broadcast_ptr, u64 broadcast_size);
|
||||
void WaveSyncEx(WaveLaneCtx *lane, u64 spin_count);
|
||||
#define WaveSync(lane) \
|
||||
WaveSyncEx((lane), (lane)->default_spin_count)
|
||||
|
||||
void WaveSyncBroadcastEx_(WaveLaneCtx *lane, u32 broadcast_lane_idx, void *broadcast_ptr, u64 broadcast_size, u64 spin_count);
|
||||
#define WaveSyncBroadcastEx(lane, broadcast_lane_idx, broadcast_ptr, spin_count) \
|
||||
WaveSyncBroadcastEx_((lane), (broadcast_lane_idx), (broadcast_ptr), sizeof(*(broadcast_ptr)), (spin_count))
|
||||
#define WaveSyncBroadcast(lane, broadcast_lane_idx, broadcast_ptr) \
|
||||
WaveSyncBroadcastEx_((lane), (broadcast_lane_idx), (broadcast_ptr), sizeof(*(broadcast_ptr)), (lane)->default_spin_count)
|
||||
|
||||
void SetWaveLaneDefaultSpin(WaveLaneCtx *lane, u64 n);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookdecl Dispatch
|
||||
|
||||
void DispatchWave(u32 num_lanes, WaveLaneEntryFunc *entry, void *udata);
|
||||
void DispatchWave(String name, u32 num_lanes, WaveLaneEntryFunc *entry, void *udata);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookdecl Thread
|
||||
|
||||
@ -36,7 +36,7 @@ BOOL W32_FindEmbeddedRcData(HMODULE module, LPCWSTR type, LPWSTR wstr_entry_name
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookimpl Core hooks
|
||||
//~ @hookimpl Core api hooks
|
||||
|
||||
StringList GetRawCommandline(void)
|
||||
{
|
||||
@ -109,6 +109,67 @@ void TrueRand(String buffer)
|
||||
BCryptGenRandom(BCRYPT_RNG_ALG_HANDLE, (u8 *)buffer.text, buffer.len, 0);
|
||||
}
|
||||
|
||||
CpuTopologyInfo GetCpuTopologyInfo(void)
|
||||
{
|
||||
TempArena scratch = BeginScratchNoConflict();
|
||||
CpuTopologyInfo res = ZI;
|
||||
{
|
||||
DWORD infos_buff_size = 0;
|
||||
u8 *infos_buff = 0;
|
||||
b32 ok = 0;
|
||||
{
|
||||
GetLogicalProcessorInformationEx(RelationProcessorCore, 0, &infos_buff_size);
|
||||
infos_buff = PushStructsNoZero(scratch.arena, u8, infos_buff_size);
|
||||
ok = GetLogicalProcessorInformationEx(RelationProcessorCore, (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)infos_buff, &infos_buff_size);
|
||||
}
|
||||
if (ok)
|
||||
{
|
||||
/* Determine max efficiency class */
|
||||
i32 max_efficiency_class = 0;
|
||||
{
|
||||
DWORD pos = 0;
|
||||
while (pos < infos_buff_size)
|
||||
{
|
||||
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)&infos_buff[pos];
|
||||
max_efficiency_class = MaxI32(max_efficiency_class, info->Processor.EfficiencyClass);
|
||||
pos += info->Size;
|
||||
}
|
||||
}
|
||||
/* Generate physical core info */
|
||||
{
|
||||
DWORD pos = 0;
|
||||
while (pos < infos_buff_size)
|
||||
{
|
||||
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)&infos_buff[pos];
|
||||
++res.num_physical_cores;
|
||||
++res.num_logical_cores;
|
||||
if (info->Processor.Flags & LTP_PC_SMT)
|
||||
{
|
||||
/* Core has SMT sibling */
|
||||
++res.num_logical_cores;
|
||||
}
|
||||
if (info->Processor.EfficiencyClass == max_efficiency_class)
|
||||
{
|
||||
/* Core is P-core */
|
||||
++res.num_physical_performance_cores;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Core is not a P-core */
|
||||
++res.num_physical_non_performance_cores;
|
||||
}
|
||||
pos += info->Size;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
res.num_logical_cores = MaxI32(res.num_logical_cores, 1);
|
||||
res.num_physical_cores = MaxI32(res.num_physical_cores, 1);
|
||||
res.num_physical_performance_cores = MaxI32(res.num_physical_performance_cores, 1);
|
||||
EndScratch(scratch);
|
||||
return res;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookimpl Swap hooks
|
||||
|
||||
@ -226,7 +287,7 @@ i32 W32_Main(void)
|
||||
GetSystemInfo(&g->info);
|
||||
|
||||
/* Init main thread */
|
||||
W32_InitCurrentThread(Lit("Main thread"));
|
||||
W32_InitCurrentThread(Lit("Main"));
|
||||
|
||||
/* Get raw args from command line */
|
||||
{
|
||||
@ -252,6 +313,7 @@ i32 W32_Main(void)
|
||||
/* Init log system */
|
||||
/* FIXME: Remove hardcoded log path */
|
||||
InitLogSystem(Lit("log.log"));
|
||||
LogInfoF("Main thread ID: %F", FmtUint(ThreadId()));
|
||||
|
||||
/* Init resources */
|
||||
{
|
||||
|
||||
@ -16,8 +16,8 @@ void W32_InitCurrentThread(String name)
|
||||
Arena *perm = PermArena();
|
||||
|
||||
/* Fixme: Set thread name */
|
||||
// SetThreadDescription(GetCurrentThread(), thread_name_wstr);
|
||||
//LogInfoF("New thread \"%F\" created with ID %F", FmtString(StringFromCstrNoLimit(t->thread_name_cstr)), FmtUint(ThreadId()));
|
||||
wchar_t *thread_name_wstr = WstrFromString(perm, name);
|
||||
SetThreadDescription(GetCurrentThread(), thread_name_wstr);
|
||||
|
||||
/* Initialize COM */
|
||||
CoInitializeEx(0, COINIT_MULTITHREADED);
|
||||
@ -27,6 +27,7 @@ DWORD WINAPI W32_ThreadProc(LPVOID thread_args_vp)
|
||||
{
|
||||
W32_ThreadArgs *thread_args = (W32_ThreadArgs *)thread_args_vp;
|
||||
W32_InitCurrentThread(thread_args->name);
|
||||
LogInfoF("New thread \"%F\" created with ID %F", FmtString(thread_args->name), FmtUint(ThreadId()));
|
||||
thread_args->entry(thread_args->lane, thread_args->udata);
|
||||
return 0;
|
||||
}
|
||||
@ -34,7 +35,7 @@ DWORD WINAPI W32_ThreadProc(LPVOID thread_args_vp)
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookimpl Dispatch
|
||||
|
||||
void DispatchWave(u32 num_lanes, WaveLaneEntryFunc *entry, void *udata)
|
||||
void DispatchWave(String name, u32 num_lanes, WaveLaneEntryFunc *entry, void *udata)
|
||||
{
|
||||
/* FIXME: Impl */
|
||||
|
||||
@ -56,11 +57,23 @@ void DispatchWave(u32 num_lanes, WaveLaneEntryFunc *entry, void *udata)
|
||||
|
||||
lane_ctx->idx = lane_idx;
|
||||
lane_ctx->wave = wave_ctx;
|
||||
lane_ctx->default_spin_count = DefaultWaveLaneSpinCount;
|
||||
|
||||
String thread_name = ZI;
|
||||
if (num_lanes > 1)
|
||||
{
|
||||
thread_name = StringF(perm, "%F:%F", FmtString(name), FmtUint(lane_idx));
|
||||
}
|
||||
else
|
||||
{
|
||||
thread_name = PushString(perm, name);
|
||||
}
|
||||
|
||||
W32_ThreadArgs *thread_args = PushStruct(perm, W32_ThreadArgs);
|
||||
thread_args->lane = lane_ctx;
|
||||
thread_args->udata = udata;
|
||||
thread_args->entry = entry;
|
||||
thread_args->name = thread_name;
|
||||
|
||||
HANDLE handle = CreateThread(0, Mebi(4), W32_ThreadProc, thread_args, 0, 0);
|
||||
if (!handle)
|
||||
|
||||
1721
src/meta/meta.c
1721
src/meta/meta.c
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user