diff --git a/src/math.h b/src/math.h index 20f8a888..beb8eb04 100644 --- a/src/math.h +++ b/src/math.h @@ -391,63 +391,166 @@ INLINE f32 math_rsqrt(f32 x) return ix_rsqrt_f32(x); } -/* From Quake III - - * https://github.com/id-Software/Quake-III-Arena/blob/dbe4ddb10315479fc00086f08e25d968b4b43c49/code/game/q_math.c#L552 - */ -INLINE f32 math_rsqrt_fast(f32 x) -{ - const f32 three_halfs = 1.5f; - f32 x2 = x * 0.5f; - f32 y = x; - i32 i = *(i32 *)&y; - i = 0x5f3759df - (i >> 1); - y = *(f32 *)&i; - y *= three_halfs - (x2 * y * y); - return y; -} - /* ========================== * * Trig * ========================== */ -/* Sine approximation using a parabola adjusted to minimize error, as described in - * https://web.archive.org/web/20080228213915/http://www.devmaster.net/forums/showthread.php?t=5784 - * - * https://www.desmos.com/calculator/gbtjvt2we8 - * c: adjustment weight - * f(x): original parabola - * g(x): adjusted parabola - * h(x): error +/* Some functions based on Cephes implementation (https://www.netlib.org/cephes/): + * - math_sin_approx + * - math_cos_approx + * - math_reduce_positive_to_pio4 + * - math_atan */ + +/* TODO: Vectorize */ + +/* Approximate sin in range [0, PI/4] */ +INLINE f32 math_sin_approx(f32 x) +{ + f32 x_sq = x * x; + return (((-1.9515295891E-4 * x_sq + 8.3321608736E-3) * x_sq - 1.6666654611E-1) * x_sq * x) + x; +} + +/* Approximate cos in range [0, PI/4] */ +INLINE f32 math_cos_approx(f32 x) +{ + f32 x_sq = x * x; + return (((2.443315711809948E-005 * x_sq - 1.388731625493765E-003) * x_sq + 4.166664568298827E-002) * x_sq * x_sq) - (x_sq * 0.5) + 1; +} + +/* Reduce postive x to range [0, PI/4] (Cody-Waite argument reduction). + * Returns 0 if x > (2^24 - 1); + * Sets octant_out=-1 on error. */ +INLINE f32 math_reduce_positive_to_pio4(f32 x, i32 *octant_out) +{ + i32 octant = -1; + + if (x <= ((1 << 24) - 1)) { + octant = x * (4 / PI); /* Integer part of x/(PI/4) */ + f32 y = octant; + if (octant & 1) { + octant += 1; + y += 1.0; + } + + /* Modulo 360 degrees */ + octant &= 7; + + if (x > 8192) { + x = x - y * (PI / 4); + } else { + /* Extended precision modular arithmetic */ + x = ((x - y * 0.78515625) - y * 2.4187564849853515625e-4) - y * 3.77489497744594108e-8; + } + } + + *octant_out = octant; + return x; +} + INLINE f32 math_sin(f32 x) { - const f32 c = 0.225f; - x -= (TAU * math_trunc(x / TAU)); /* [0, TAU] */ - x += (TAU * (x < -PI)) - (TAU * (x > PI)); /* [-PI, PI] */ - f32 y = (4.0f/PI) * x + (-4.0f/(PI*PI)) * x * math_fabs(x); - y = c * (y * math_fabs(y) - y) + y; - return y; + i32 sign = 1; + + if (F32_IS_NAN(x)) { + return 0; + } else if (x < 0) { + sign = -1; + x = -x; + } + + i32 octant; + x = math_reduce_positive_to_pio4(x, &octant); + + /* Reflect in x axis */ + if (octant > 3) { + sign = -sign; + octant -= 4; + } + + switch (octant) { + case -1: return 0; + case 1: case 2: return math_cos_approx(x) * sign; + default: return math_sin_approx(x) * sign; + } } INLINE f32 math_cos(f32 x) { - return math_sin(x + (PI / 2.0f)); + i32 sign = 1; + + if (F32_IS_NAN(x)) { + return 0; + } else if (x < 0) { + x = -x; + } + + i32 octant; + x = math_reduce_positive_to_pio4(x, &octant); + + /* Reflect in x axis */ + if (octant > 3) { + sign = -sign; + octant -= 4; + } + + if (octant > 1) { + sign = -sign; + } + + switch (octant) { + case -1: return 0; + case 1: case 2: return math_sin_approx(x) * sign; + default: return math_cos_approx(x) * sign; + } +} + +INLINE f32 math_atan(f32 x) +{ + i32 sign = 1; + if (x < 0) { + sign = -1; + x = -x; + } + + /* Reduce range */ + f32 y; + if (x > 2.414213562373095) { /* tan((PI / 8) * 3) */ + y = PI / 2; + x = -(1.f / x); + } else if (x > 0.4142135623730950) { /* tan(PI / 8) */ + y = PI / 4; + x = (x - 1.f) / (x + 1.f); + } else { + y = 0; + } + + f32 x_sq = x * x; + y += ((((8.05374449538e-2 * x_sq - 1.38776856032E-1) * x_sq + 1.99777106478E-1) * x_sq - 3.33329491539E-1) * x_sq * x + x); + + return y * sign; } -/* http://math.stackexchange.com/questions/1098487/atan2-faster-approximation */ -/* FIXME: Verify quadrants */ INLINE f32 math_atan2(f32 y, f32 x) { - f32 x_abs = math_fabs(x); - f32 y_abs = math_fabs(y); - b32 swap = y_abs > x_abs; - f32 a = swap ? x_abs / y_abs : y_abs / x_abs; - f32 s = a * a; - f32 r = ((-0.0464964749 * s + 0.15931422) * s - 0.327622764) * s * a + a; - if (swap) { r = (PI / 2) - r; } - if (x < 0) { r = PI - r; } - if (y < 0) { r = -r; } - return r; + if (x == 0) { + if (y < 0) { + return -PI / 2; + } else if (y > 0) { + return PI / 2; + } else { + return 0; + } + } else if (y == 0) { + if (x < 0) { + return PI; + } else { + return 0; + } + } else { + f32 offset = (x < 0) * (PI - (2 * PI * (y < 0))); + return math_atan(y / x) + offset; + } } INLINE f32 math_asin(f32 x) @@ -555,15 +658,6 @@ INLINE struct v2 v2_norm(struct v2 a) return a; } -INLINE struct v2 v2_norm_fast(struct v2 a) -{ - f32 l = v2_len_squared(a); - f32 r_sqrt = math_rsqrt_fast(l); - a.x *= r_sqrt; - a.y *= r_sqrt; - return a; -} - INLINE struct v2 v2_round(struct v2 a) { return V2( @@ -627,8 +721,7 @@ INLINE struct v2 v2_rotated(struct v2 v, f32 a) INLINE struct v2 v2_from_angle(f32 a) { - /* TODO: More precise trig functions to avoid unnecessary normalization */ - return v2_norm(V2(math_cos(a), math_sin(a))); + return V2(math_cos(a), math_sin(a)); } INLINE f32 v2_angle(struct v2 v)