//////////////////////////////// //~ Sqrt intrinsics Inline f32 IxSqrtF32(f32 f) { __m128 n = _mm_set_ss(f); n = _mm_sqrt_ss(n); return _mm_cvtss_f32(n); } Inline f64 IxSqrtF64(f64 f) { __m128d n = _mm_set_sd(f); n = _mm_sqrt_sd(_mm_setzero_pd(), n); return _mm_cvtsd_f64(n); } Inline f32 IxRsqrtF32(f32 f) { __m128 n = _mm_set_ss(f); n = _mm_rsqrt_ss(n); return _mm_cvtss_f32(n); } //////////////////////////////// //~ Round intrinsics Inline i32 IxRoundF32ToI32(f32 f) { return _mm_cvtss_si32(_mm_round_ss(_mm_setzero_ps(), _mm_set_ss(f), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)); } Inline f32 IxRoundF32ToF32(f32 f) { return _mm_cvtss_f32(_mm_round_ss(_mm_setzero_ps(), _mm_set_ss(f), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)); } Inline i64 IxRoundF64ToI64(f64 f) { return _mm_cvtsd_si64(_mm_round_sd(_mm_setzero_pd(), _mm_set_sd(f), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)); } Inline f64 IxRoundF64ToF64(f64 f) { return _mm_cvtsd_f64(_mm_round_sd(_mm_setzero_pd(), _mm_set_sd(f), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)); } //////////////////////////////// //~ Floor intrinsics Inline i32 IxFloorF32ToI32(f32 f) { return _mm_cvtss_si32(_mm_floor_ss(_mm_setzero_ps(), _mm_set_ss(f))); } Inline f32 IxFloorF32ToF32(f32 f) { return _mm_cvtss_f32(_mm_floor_ss(_mm_setzero_ps(), _mm_set_ss(f))); } Inline i64 IxFloorF64ToI64(f64 f) { return _mm_cvtsd_si64(_mm_floor_sd(_mm_setzero_pd(), _mm_set_sd(f))); } Inline f64 IxFloorF64ToF64(f64 f) { return _mm_cvtsd_f64(_mm_floor_sd(_mm_setzero_pd(), _mm_set_sd(f))); } //////////////////////////////// //~ Ceil intrinsics Inline i32 IxCeilF32ToI32(f32 f) { return _mm_cvtss_si32(_mm_ceil_ss(_mm_setzero_ps(), _mm_set_ss(f))); } Inline f32 IxCeilF32ToF32(f32 f) { return _mm_cvtss_f32(_mm_ceil_ss(_mm_setzero_ps(), _mm_set_ss(f))); } Inline i64 IxCeilF64ToI64(f64 f) { return _mm_cvtsd_si64(_mm_ceil_sd(_mm_setzero_pd(), _mm_set_sd(f))); } Inline f64 IxCeilF64ToF64(f64 f) { return _mm_cvtsd_f64(_mm_ceil_sd(_mm_setzero_pd(), _mm_set_sd(f))); } //////////////////////////////// //~ Truncate intrinsics Inline f32 IxTruncF32ToF32(f32 f) { return _mm_cvtss_f32(_mm_round_ss(_mm_setzero_ps(), _mm_set_ss(f), _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)); } Inline f64 IxTruncF64ToF64(f64 f) { return _mm_cvtsd_f64(_mm_round_sd(_mm_setzero_pd(), _mm_set_sd(f), _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)); } //////////////////////////////// //~ Utility intrinsics Inline void IxPause(void) { _mm_pause(); } Inline u64 IxClock(void) { return __rdtsc(); }