power_play/src/base/base_wave.c
2025-12-06 04:27:19 -06:00

57 lines
1.7 KiB
C

////////////////////////////////////////////////////////////
//~ Wave sync ops
void WaveSyncEx(WaveLaneCtx *lane, u64 spin_count)
{
WaveCtx *wave = lane->wave;
i32 lanes_count = wave->lanes_count;
if (lanes_count > 0)
{
i64 barrier_gen = Atomic64Fetch(&wave->barrier_gen.v);
i32 blocked_count = Atomic32FetchAdd(&wave->barrier_blocked_count, 1) + 1;
if (blocked_count == lanes_count)
{
Atomic32Set(&wave->barrier_blocked_count, 0);
Atomic64FetchAdd(&wave->barrier_gen.v, barrier_gen + 1);
FutexWakeNeq(&wave->barrier_gen.v);
}
else
{
u64 remaining_spins = spin_count;
while (Atomic64Fetch(&wave->barrier_gen.v) == barrier_gen)
{
if (remaining_spins > 0)
{
--remaining_spins;
_mm_pause();
}
else
{
FutexYieldNeq(&wave->barrier_gen.v, &barrier_gen, sizeof(barrier_gen));
}
}
}
}
}
void WaveSyncBroadcastEx_(WaveLaneCtx *lane, u32 broadcast_lane_idx, void *broadcast_ptr, u64 broadcast_size, u64 spin_count)
{
WaveCtx *wave = lane->wave;
u32 lane_idx = lane->idx;
if (lane_idx == broadcast_lane_idx)
{
wave->barrier_broadcast_data = broadcast_ptr;
}
WaveSyncEx(lane, spin_count);
if (lane_idx != broadcast_lane_idx)
{
CopyBytes(broadcast_ptr, wave->barrier_broadcast_data, broadcast_size);
}
WaveSyncEx(lane, spin_count);
}
void SetWaveLaneDefaultSpin(WaveLaneCtx *lane, u64 n)
{
lane->default_spin_count = n;
}