replace trig functions with cephes approximations

This commit is contained in:
jacob 2024-08-28 14:33:33 -05:00
parent 2453ceb239
commit 36f3a3dbb6

View File

@ -391,63 +391,166 @@ INLINE f32 math_rsqrt(f32 x)
return ix_rsqrt_f32(x); return ix_rsqrt_f32(x);
} }
/* From Quake III -
* https://github.com/id-Software/Quake-III-Arena/blob/dbe4ddb10315479fc00086f08e25d968b4b43c49/code/game/q_math.c#L552
*/
INLINE f32 math_rsqrt_fast(f32 x)
{
const f32 three_halfs = 1.5f;
f32 x2 = x * 0.5f;
f32 y = x;
i32 i = *(i32 *)&y;
i = 0x5f3759df - (i >> 1);
y = *(f32 *)&i;
y *= three_halfs - (x2 * y * y);
return y;
}
/* ========================== * /* ========================== *
* Trig * Trig
* ========================== */ * ========================== */
/* Sine approximation using a parabola adjusted to minimize error, as described in /* Some functions based on Cephes implementation (https://www.netlib.org/cephes/):
* https://web.archive.org/web/20080228213915/http://www.devmaster.net/forums/showthread.php?t=5784 * - math_sin_approx
* * - math_cos_approx
* https://www.desmos.com/calculator/gbtjvt2we8 * - math_reduce_positive_to_pio4
* c: adjustment weight * - math_atan
* f(x): original parabola
* g(x): adjusted parabola
* h(x): error
*/ */
/* TODO: Vectorize */
/* Approximate sin in range [0, PI/4] */
INLINE f32 math_sin_approx(f32 x)
{
f32 x_sq = x * x;
return (((-1.9515295891E-4 * x_sq + 8.3321608736E-3) * x_sq - 1.6666654611E-1) * x_sq * x) + x;
}
/* Approximate cos in range [0, PI/4] */
INLINE f32 math_cos_approx(f32 x)
{
f32 x_sq = x * x;
return (((2.443315711809948E-005 * x_sq - 1.388731625493765E-003) * x_sq + 4.166664568298827E-002) * x_sq * x_sq) - (x_sq * 0.5) + 1;
}
/* Reduce postive x to range [0, PI/4] (Cody-Waite argument reduction).
* Returns 0 if x > (2^24 - 1);
* Sets octant_out=-1 on error. */
INLINE f32 math_reduce_positive_to_pio4(f32 x, i32 *octant_out)
{
i32 octant = -1;
if (x <= ((1 << 24) - 1)) {
octant = x * (4 / PI); /* Integer part of x/(PI/4) */
f32 y = octant;
if (octant & 1) {
octant += 1;
y += 1.0;
}
/* Modulo 360 degrees */
octant &= 7;
if (x > 8192) {
x = x - y * (PI / 4);
} else {
/* Extended precision modular arithmetic */
x = ((x - y * 0.78515625) - y * 2.4187564849853515625e-4) - y * 3.77489497744594108e-8;
}
}
*octant_out = octant;
return x;
}
INLINE f32 math_sin(f32 x) INLINE f32 math_sin(f32 x)
{ {
const f32 c = 0.225f; i32 sign = 1;
x -= (TAU * math_trunc(x / TAU)); /* [0, TAU] */
x += (TAU * (x < -PI)) - (TAU * (x > PI)); /* [-PI, PI] */ if (F32_IS_NAN(x)) {
f32 y = (4.0f/PI) * x + (-4.0f/(PI*PI)) * x * math_fabs(x); return 0;
y = c * (y * math_fabs(y) - y) + y; } else if (x < 0) {
return y; sign = -1;
x = -x;
}
i32 octant;
x = math_reduce_positive_to_pio4(x, &octant);
/* Reflect in x axis */
if (octant > 3) {
sign = -sign;
octant -= 4;
}
switch (octant) {
case -1: return 0;
case 1: case 2: return math_cos_approx(x) * sign;
default: return math_sin_approx(x) * sign;
}
} }
INLINE f32 math_cos(f32 x) INLINE f32 math_cos(f32 x)
{ {
return math_sin(x + (PI / 2.0f)); i32 sign = 1;
if (F32_IS_NAN(x)) {
return 0;
} else if (x < 0) {
x = -x;
}
i32 octant;
x = math_reduce_positive_to_pio4(x, &octant);
/* Reflect in x axis */
if (octant > 3) {
sign = -sign;
octant -= 4;
}
if (octant > 1) {
sign = -sign;
}
switch (octant) {
case -1: return 0;
case 1: case 2: return math_sin_approx(x) * sign;
default: return math_cos_approx(x) * sign;
}
}
INLINE f32 math_atan(f32 x)
{
i32 sign = 1;
if (x < 0) {
sign = -1;
x = -x;
}
/* Reduce range */
f32 y;
if (x > 2.414213562373095) { /* tan((PI / 8) * 3) */
y = PI / 2;
x = -(1.f / x);
} else if (x > 0.4142135623730950) { /* tan(PI / 8) */
y = PI / 4;
x = (x - 1.f) / (x + 1.f);
} else {
y = 0;
}
f32 x_sq = x * x;
y += ((((8.05374449538e-2 * x_sq - 1.38776856032E-1) * x_sq + 1.99777106478E-1) * x_sq - 3.33329491539E-1) * x_sq * x + x);
return y * sign;
} }
/* http://math.stackexchange.com/questions/1098487/atan2-faster-approximation */
/* FIXME: Verify quadrants */
INLINE f32 math_atan2(f32 y, f32 x) INLINE f32 math_atan2(f32 y, f32 x)
{ {
f32 x_abs = math_fabs(x); if (x == 0) {
f32 y_abs = math_fabs(y); if (y < 0) {
b32 swap = y_abs > x_abs; return -PI / 2;
f32 a = swap ? x_abs / y_abs : y_abs / x_abs; } else if (y > 0) {
f32 s = a * a; return PI / 2;
f32 r = ((-0.0464964749 * s + 0.15931422) * s - 0.327622764) * s * a + a; } else {
if (swap) { r = (PI / 2) - r; } return 0;
if (x < 0) { r = PI - r; } }
if (y < 0) { r = -r; } } else if (y == 0) {
return r; if (x < 0) {
return PI;
} else {
return 0;
}
} else {
f32 offset = (x < 0) * (PI - (2 * PI * (y < 0)));
return math_atan(y / x) + offset;
}
} }
INLINE f32 math_asin(f32 x) INLINE f32 math_asin(f32 x)
@ -555,15 +658,6 @@ INLINE struct v2 v2_norm(struct v2 a)
return a; return a;
} }
INLINE struct v2 v2_norm_fast(struct v2 a)
{
f32 l = v2_len_squared(a);
f32 r_sqrt = math_rsqrt_fast(l);
a.x *= r_sqrt;
a.y *= r_sqrt;
return a;
}
INLINE struct v2 v2_round(struct v2 a) INLINE struct v2 v2_round(struct v2 a)
{ {
return V2( return V2(
@ -627,8 +721,7 @@ INLINE struct v2 v2_rotated(struct v2 v, f32 a)
INLINE struct v2 v2_from_angle(f32 a) INLINE struct v2 v2_from_angle(f32 a)
{ {
/* TODO: More precise trig functions to avoid unnecessary normalization */ return V2(math_cos(a), math_sin(a));
return v2_norm(V2(math_cos(a), math_sin(a)));
} }
INLINE f32 v2_angle(struct v2 v) INLINE f32 v2_angle(struct v2 v)