57 lines
1.7 KiB
C
57 lines
1.7 KiB
C
////////////////////////////////////////////////////////////
|
|
//~ Wave sync ops
|
|
|
|
void WaveSyncEx(WaveLaneCtx *lane, u64 spin_count)
|
|
{
|
|
WaveCtx *wave = lane->wave;
|
|
i32 lanes_count = wave->lanes_count;
|
|
if (lanes_count > 0)
|
|
{
|
|
i64 barrier_gen = Atomic64Fetch(&wave->barrier_gen.v);
|
|
i32 blocked_count = Atomic32FetchAdd(&wave->barrier_blocked_count, 1) + 1;
|
|
if (blocked_count == lanes_count)
|
|
{
|
|
Atomic32Set(&wave->barrier_blocked_count, 0);
|
|
Atomic64FetchAdd(&wave->barrier_gen.v, barrier_gen + 1);
|
|
FutexWakeNeq(&wave->barrier_gen.v);
|
|
}
|
|
else
|
|
{
|
|
u64 remaining_spins = spin_count;
|
|
while (Atomic64Fetch(&wave->barrier_gen.v) == barrier_gen)
|
|
{
|
|
if (remaining_spins > 0)
|
|
{
|
|
--remaining_spins;
|
|
_mm_pause();
|
|
}
|
|
else
|
|
{
|
|
FutexYieldNeq(&wave->barrier_gen.v, &barrier_gen, sizeof(barrier_gen));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void WaveSyncBroadcastEx_(WaveLaneCtx *lane, u32 broadcast_lane_idx, void *broadcast_ptr, u64 broadcast_size, u64 spin_count)
|
|
{
|
|
WaveCtx *wave = lane->wave;
|
|
u32 lane_idx = lane->idx;
|
|
if (lane_idx == broadcast_lane_idx)
|
|
{
|
|
wave->barrier_broadcast_data = broadcast_ptr;
|
|
}
|
|
WaveSyncEx(lane, spin_count);
|
|
if (lane_idx != broadcast_lane_idx)
|
|
{
|
|
CopyBytes(broadcast_ptr, wave->barrier_broadcast_data, broadcast_size);
|
|
}
|
|
WaveSyncEx(lane, spin_count);
|
|
}
|
|
|
|
void SetWaveLaneDefaultSpin(WaveLaneCtx *lane, u64 n)
|
|
{
|
|
lane->default_spin_count = n;
|
|
}
|