power_play/src/intrinsics.h

117 lines
2.4 KiB
C

#ifndef INTRINSICS_H
#define INTRINSICS_H
/* ========================== *
* Math
* ========================== */
/* Sqrt */
INLINE f32 ix_sqrt_f32(f32 f)
{
__m128 n = _mm_set_ss(f);
n = _mm_sqrt_ss(n);
return _mm_cvtss_f32(n);
}
INLINE f32 ix_rsqrt_f32(f32 f)
{
__m128 n = _mm_set_ss(f);
n = _mm_rsqrt_ss(n);
return _mm_cvtss_f32(n);
}
/* Round */
INLINE i32 ix_round_f32_to_i32(f32 f)
{
return _mm_cvtss_si32(_mm_round_ss(_mm_setzero_ps(), _mm_set_ss(f), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
}
INLINE f32 ix_round_f32_to_f32(f32 f)
{
return _mm_cvtss_f32(_mm_round_ss(_mm_setzero_ps(), _mm_set_ss(f), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
}
INLINE i64 ix_round_f64_to_i64(f64 f)
{
return _mm_cvtsd_si64(_mm_round_sd(_mm_setzero_pd(), _mm_set_sd(f), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
}
INLINE f64 ix_round_f64_to_f64(f64 f)
{
return _mm_cvtsd_f64(_mm_round_sd(_mm_setzero_pd(), _mm_set_sd(f), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
}
/* Floor */
INLINE i32 ix_floor_f32_to_i32(f32 f)
{
return _mm_cvtss_si32(_mm_floor_ss(_mm_setzero_ps(), _mm_set_ss(f)));
}
INLINE f32 ix_floor_f32_to_f32(f32 f)
{
return _mm_cvtss_f32(_mm_floor_ss(_mm_setzero_ps(), _mm_set_ss(f)));
}
INLINE i64 ix_floor_f64_to_i64(f64 f)
{
return _mm_cvtsd_si64(_mm_floor_sd(_mm_setzero_pd(), _mm_set_sd(f)));
}
INLINE f64 ix_floor_f64_to_f64(f64 f)
{
return _mm_cvtsd_f64(_mm_floor_sd(_mm_setzero_pd(), _mm_set_sd(f)));
}
/* Ceil */
INLINE i32 ix_ceil_f32_to_i32(f32 f)
{
return _mm_cvtss_si32(_mm_ceil_ss(_mm_setzero_ps(), _mm_set_ss(f)));
}
INLINE f32 ix_ceil_f32_to_f32(f32 f)
{
return _mm_cvtss_f32(_mm_ceil_ss(_mm_setzero_ps(), _mm_set_ss(f)));
}
INLINE i64 ix_ceil_f64_to_i64(f64 f)
{
return _mm_cvtsd_si64(_mm_ceil_sd(_mm_setzero_pd(), _mm_set_sd(f)));
}
INLINE f64 ix_ceil_f64_to_f64(f64 f)
{
return _mm_cvtsd_f64(_mm_ceil_sd(_mm_setzero_pd(), _mm_set_sd(f)));
}
/* Truncate */
INLINE f32 ix_trunc_f32_to_f32(f32 f)
{
return _mm_cvtss_f32(_mm_round_ss(_mm_setzero_ps(), _mm_set_ss(f), _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC));
}
INLINE f64 ix_trunc_f64_to_f64(f64 f)
{
return _mm_cvtsd_f64(_mm_round_sd(_mm_setzero_pd(), _mm_set_sd(f), _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC));
}
/* ========================== *
* Util
* ========================== */
INLINE void ix_pause(void)
{
_mm_pause();
}
INLINE i64 ix_clock(void)
{
return __rdtsc();
}
#endif