power_play/src/base/base_intrinsics.h

119 lines
2.6 KiB
C

////////////////////////////////
//~ Sqrt intrinsics
Inline f32 IxSqrtF32(f32 f)
{
__m128 n = _mm_set_ss(f);
n = _mm_sqrt_ss(n);
return _mm_cvtss_f32(n);
}
Inline f64 IxSqrtF64(f64 f)
{
__m128d n = _mm_set_sd(f);
n = _mm_sqrt_sd(_mm_setzero_pd(), n);
return _mm_cvtsd_f64(n);
}
Inline f32 IxRsqrtF32(f32 f)
{
__m128 n = _mm_set_ss(f);
n = _mm_rsqrt_ss(n);
return _mm_cvtss_f32(n);
}
////////////////////////////////
//~ Round intrinsics
Inline i32 IxRoundF32ToI32(f32 f)
{
return _mm_cvtss_si32(_mm_round_ss(_mm_setzero_ps(), _mm_set_ss(f), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
}
Inline f32 IxRoundF32ToF32(f32 f)
{
return _mm_cvtss_f32(_mm_round_ss(_mm_setzero_ps(), _mm_set_ss(f), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
}
Inline i64 IxRoundF64ToI64(f64 f)
{
return _mm_cvtsd_si64(_mm_round_sd(_mm_setzero_pd(), _mm_set_sd(f), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
}
Inline f64 IxRoundF64ToF64(f64 f)
{
return _mm_cvtsd_f64(_mm_round_sd(_mm_setzero_pd(), _mm_set_sd(f), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
}
////////////////////////////////
//~ Floor intrinsics
Inline i32 IxFloorF32ToI32(f32 f)
{
return _mm_cvtss_si32(_mm_floor_ss(_mm_setzero_ps(), _mm_set_ss(f)));
}
Inline f32 IxFloorF32ToF32(f32 f)
{
return _mm_cvtss_f32(_mm_floor_ss(_mm_setzero_ps(), _mm_set_ss(f)));
}
Inline i64 IxFloorF64ToI64(f64 f)
{
return _mm_cvtsd_si64(_mm_floor_sd(_mm_setzero_pd(), _mm_set_sd(f)));
}
Inline f64 IxFloorF64ToF64(f64 f)
{
return _mm_cvtsd_f64(_mm_floor_sd(_mm_setzero_pd(), _mm_set_sd(f)));
}
////////////////////////////////
//~ Ceil intrinsics
Inline i32 IxCeilF32ToI32(f32 f)
{
return _mm_cvtss_si32(_mm_ceil_ss(_mm_setzero_ps(), _mm_set_ss(f)));
}
Inline f32 IxCeilF32ToF32(f32 f)
{
return _mm_cvtss_f32(_mm_ceil_ss(_mm_setzero_ps(), _mm_set_ss(f)));
}
Inline i64 IxCeilF64ToI64(f64 f)
{
return _mm_cvtsd_si64(_mm_ceil_sd(_mm_setzero_pd(), _mm_set_sd(f)));
}
Inline f64 IxCeilF64ToF64(f64 f)
{
return _mm_cvtsd_f64(_mm_ceil_sd(_mm_setzero_pd(), _mm_set_sd(f)));
}
////////////////////////////////
//~ Truncate intrinsics
Inline f32 IxTruncF32ToF32(f32 f)
{
return _mm_cvtss_f32(_mm_round_ss(_mm_setzero_ps(), _mm_set_ss(f), _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC));
}
Inline f64 IxTruncF64ToF64(f64 f)
{
return _mm_cvtsd_f64(_mm_round_sd(_mm_setzero_pd(), _mm_set_sd(f), _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC));
}
////////////////////////////////
//~ Utility intrinsics
Inline void IxPause(void)
{
_mm_pause();
}
Inline u64 IxClock(void)
{
return __rdtsc();
}