117 lines
2.4 KiB
C
117 lines
2.4 KiB
C
#ifndef INTRINSICS_H
|
|
#define INTRINSICS_H
|
|
|
|
/* ========================== *
|
|
* Math
|
|
* ========================== */
|
|
|
|
/* Sqrt */
|
|
|
|
INLINE f32 ix_sqrt_f32(f32 f)
|
|
{
|
|
__m128 n = _mm_set_ss(f);
|
|
n = _mm_sqrt_ss(n);
|
|
return _mm_cvtss_f32(n);
|
|
}
|
|
|
|
INLINE f32 ix_rsqrt_f32(f32 f)
|
|
{
|
|
__m128 n = _mm_set_ss(f);
|
|
n = _mm_rsqrt_ss(n);
|
|
return _mm_cvtss_f32(n);
|
|
}
|
|
|
|
/* Round */
|
|
|
|
INLINE i32 ix_round_f32_to_i32(f32 f)
|
|
{
|
|
return _mm_cvtss_si32(_mm_round_ss(_mm_setzero_ps(), _mm_set_ss(f), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
|
|
}
|
|
|
|
INLINE f32 ix_round_f32_to_f32(f32 f)
|
|
{
|
|
return _mm_cvtss_f32(_mm_round_ss(_mm_setzero_ps(), _mm_set_ss(f), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
|
|
}
|
|
|
|
INLINE i64 ix_round_f64_to_i64(f64 f)
|
|
{
|
|
return _mm_cvtsd_si64(_mm_round_sd(_mm_setzero_pd(), _mm_set_sd(f), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
|
|
}
|
|
|
|
INLINE f64 ix_round_f64_to_f64(f64 f)
|
|
{
|
|
return _mm_cvtsd_f64(_mm_round_sd(_mm_setzero_pd(), _mm_set_sd(f), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
|
|
}
|
|
|
|
/* Floor */
|
|
|
|
INLINE i32 ix_floor_f32_to_i32(f32 f)
|
|
{
|
|
return _mm_cvtss_si32(_mm_floor_ss(_mm_setzero_ps(), _mm_set_ss(f)));
|
|
}
|
|
|
|
INLINE f32 ix_floor_f32_to_f32(f32 f)
|
|
{
|
|
return _mm_cvtss_f32(_mm_floor_ss(_mm_setzero_ps(), _mm_set_ss(f)));
|
|
}
|
|
|
|
INLINE i64 ix_floor_f64_to_i64(f64 f)
|
|
{
|
|
return _mm_cvtsd_si64(_mm_floor_sd(_mm_setzero_pd(), _mm_set_sd(f)));
|
|
}
|
|
|
|
INLINE f64 ix_floor_f64_to_f64(f64 f)
|
|
{
|
|
return _mm_cvtsd_f64(_mm_floor_sd(_mm_setzero_pd(), _mm_set_sd(f)));
|
|
}
|
|
|
|
/* Ceil */
|
|
|
|
INLINE i32 ix_ceil_f32_to_i32(f32 f)
|
|
{
|
|
return _mm_cvtss_si32(_mm_ceil_ss(_mm_setzero_ps(), _mm_set_ss(f)));
|
|
}
|
|
|
|
INLINE f32 ix_ceil_f32_to_f32(f32 f)
|
|
{
|
|
return _mm_cvtss_f32(_mm_ceil_ss(_mm_setzero_ps(), _mm_set_ss(f)));
|
|
}
|
|
|
|
INLINE i64 ix_ceil_f64_to_i64(f64 f)
|
|
{
|
|
return _mm_cvtsd_si64(_mm_ceil_sd(_mm_setzero_pd(), _mm_set_sd(f)));
|
|
}
|
|
|
|
INLINE f64 ix_ceil_f64_to_f64(f64 f)
|
|
{
|
|
return _mm_cvtsd_f64(_mm_ceil_sd(_mm_setzero_pd(), _mm_set_sd(f)));
|
|
}
|
|
|
|
/* Truncate */
|
|
|
|
INLINE f32 ix_trunc_f32_to_f32(f32 f)
|
|
{
|
|
return _mm_cvtss_f32(_mm_round_ss(_mm_setzero_ps(), _mm_set_ss(f), _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC));
|
|
}
|
|
|
|
INLINE f64 ix_trunc_f64_to_f64(f64 f)
|
|
{
|
|
return _mm_cvtsd_f64(_mm_round_sd(_mm_setzero_pd(), _mm_set_sd(f), _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC));
|
|
}
|
|
|
|
/* ========================== *
|
|
* Util
|
|
* ========================== */
|
|
|
|
INLINE void ix_pause(void)
|
|
{
|
|
_mm_pause();
|
|
}
|
|
|
|
INLINE i64 ix_clock(void)
|
|
{
|
|
return __rdtsc();
|
|
}
|
|
|
|
#endif
|