#ifndef INTRINSICS_H #define INTRINSICS_H /* ========================== * * Math * ========================== */ /* Sqrt */ INLINE f32 ix_sqrt_f32(f32 f) { __m128 n = _mm_set_ss(f); n = _mm_sqrt_ss(n); return _mm_cvtss_f32(n); } INLINE f64 ix_sqrt_f64(f64 f) { __m128d n = _mm_set_sd(f); n = _mm_sqrt_sd(_mm_setzero_pd(), n); return _mm_cvtsd_f64(n); } INLINE f32 ix_rsqrt_f32(f32 f) { __m128 n = _mm_set_ss(f); n = _mm_rsqrt_ss(n); return _mm_cvtss_f32(n); } /* Round */ INLINE i32 ix_round_f32_to_i32(f32 f) { return _mm_cvtss_si32(_mm_round_ss(_mm_setzero_ps(), _mm_set_ss(f), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)); } INLINE f32 ix_round_f32_to_f32(f32 f) { return _mm_cvtss_f32(_mm_round_ss(_mm_setzero_ps(), _mm_set_ss(f), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)); } INLINE i64 ix_round_f64_to_i64(f64 f) { return _mm_cvtsd_si64(_mm_round_sd(_mm_setzero_pd(), _mm_set_sd(f), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)); } INLINE f64 ix_round_f64_to_f64(f64 f) { return _mm_cvtsd_f64(_mm_round_sd(_mm_setzero_pd(), _mm_set_sd(f), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)); } /* Floor */ INLINE i32 ix_floor_f32_to_i32(f32 f) { return _mm_cvtss_si32(_mm_floor_ss(_mm_setzero_ps(), _mm_set_ss(f))); } INLINE f32 ix_floor_f32_to_f32(f32 f) { return _mm_cvtss_f32(_mm_floor_ss(_mm_setzero_ps(), _mm_set_ss(f))); } INLINE i64 ix_floor_f64_to_i64(f64 f) { return _mm_cvtsd_si64(_mm_floor_sd(_mm_setzero_pd(), _mm_set_sd(f))); } INLINE f64 ix_floor_f64_to_f64(f64 f) { return _mm_cvtsd_f64(_mm_floor_sd(_mm_setzero_pd(), _mm_set_sd(f))); } /* Ceil */ INLINE i32 ix_ceil_f32_to_i32(f32 f) { return _mm_cvtss_si32(_mm_ceil_ss(_mm_setzero_ps(), _mm_set_ss(f))); } INLINE f32 ix_ceil_f32_to_f32(f32 f) { return _mm_cvtss_f32(_mm_ceil_ss(_mm_setzero_ps(), _mm_set_ss(f))); } INLINE i64 ix_ceil_f64_to_i64(f64 f) { return _mm_cvtsd_si64(_mm_ceil_sd(_mm_setzero_pd(), _mm_set_sd(f))); } INLINE f64 ix_ceil_f64_to_f64(f64 f) { return _mm_cvtsd_f64(_mm_ceil_sd(_mm_setzero_pd(), _mm_set_sd(f))); } /* Truncate */ INLINE f32 ix_trunc_f32_to_f32(f32 f) { return _mm_cvtss_f32(_mm_round_ss(_mm_setzero_ps(), _mm_set_ss(f), _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)); } INLINE f64 ix_trunc_f64_to_f64(f64 f) { return _mm_cvtsd_f64(_mm_round_sd(_mm_setzero_pd(), _mm_set_sd(f), _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)); } /* ========================== * * Util * ========================== */ INLINE void ix_pause(void) { _mm_pause(); } INLINE i64 ix_clock(void) { return __rdtsc(); } #endif