//////////////////////////////////////////////////////////// //~ Wave sync ops void WaveSyncEx(WaveLaneCtx *lane, u64 spin_count) { WaveCtx *wave = lane->wave; i32 lanes_count = wave->lanes_count; if (lanes_count > 0) { i64 barrier_gen = Atomic64Fetch(&wave->barrier_gen.v); i32 blocked_count = Atomic32FetchAdd(&wave->barrier_blocked_count, 1) + 1; if (blocked_count == lanes_count) { Atomic32Set(&wave->barrier_blocked_count, 0); Atomic64FetchAdd(&wave->barrier_gen.v, barrier_gen + 1); FutexWakeNeq(&wave->barrier_gen.v); } else { u64 remaining_spins = spin_count; while (Atomic64Fetch(&wave->barrier_gen.v) == barrier_gen) { if (remaining_spins > 0) { --remaining_spins; _mm_pause(); } else { FutexYieldNeq(&wave->barrier_gen.v, &barrier_gen, sizeof(barrier_gen)); } } } } } void WaveSyncBroadcastEx_(WaveLaneCtx *lane, u32 broadcast_lane_idx, void *broadcast_ptr, u64 broadcast_size, u64 spin_count) { WaveCtx *wave = lane->wave; u32 lane_idx = lane->idx; if (lane_idx == broadcast_lane_idx) { wave->barrier_broadcast_data = broadcast_ptr; } WaveSyncEx(lane, spin_count); if (lane_idx != broadcast_lane_idx) { CopyBytes(broadcast_ptr, wave->barrier_broadcast_data, broadcast_size); } WaveSyncEx(lane, spin_count); } void SetWaveLaneDefaultSpin(WaveLaneCtx *lane, u64 n) { lane->default_spin_count = n; }