replace trig functions with cephes approximations

2024-08-28 14:33:33 -05:00 · 2024-08-28 14:33:33 -05:00 · 36f3a3dbb6
commit 36f3a3dbb6
parent 2453ceb239
1 changed files with 146 additions and 53 deletions
--- a/src/math.h
+++ b/src/math.h
@ -391,63 +391,166 @@ INLINE f32 math_rsqrt(f32 x)
    return ix_rsqrt_f32(x);
 }

-/* From Quake III -
- * https://github.com/id-Software/Quake-III-Arena/blob/dbe4ddb10315479fc00086f08e25d968b4b43c49/code/game/q_math.c#L552
- */
-INLINE f32 math_rsqrt_fast(f32 x)
-{
-    const f32 three_halfs = 1.5f;
-    f32 x2 = x * 0.5f;
-    f32 y = x;
-    i32 i = *(i32 *)&y;
-    i = 0x5f3759df - (i >> 1);
-    y = *(f32 *)&i;
-    y *= three_halfs - (x2 * y * y);
-    return y;
-}
-
 /* ========================== *
 * Trig
 * ========================== */

-/* Sine approximation using a parabola adjusted to minimize error, as described in
- * https://web.archive.org/web/20080228213915/http://www.devmaster.net/forums/showthread.php?t=5784
- *
- * https://www.desmos.com/calculator/gbtjvt2we8
- * c: adjustment weight
- * f(x): original parabola
- * g(x): adjusted parabola
- * h(x): error
+/* Some functions based on Cephes implementation (https://www.netlib.org/cephes/):
+ * - math_sin_approx
+ * - math_cos_approx
+ * - math_reduce_positive_to_pio4
+ * - math_atan
 */
+
+/* TODO: Vectorize */
+
+/* Approximate sin in range [0, PI/4] */
+INLINE f32 math_sin_approx(f32 x)
+{
+    f32 x_sq = x * x;
+    return (((-1.9515295891E-4 * x_sq + 8.3321608736E-3) * x_sq - 1.6666654611E-1) * x_sq * x) + x;
+}
+
+/* Approximate cos in range [0, PI/4] */
+INLINE f32 math_cos_approx(f32 x)
+{
+    f32 x_sq = x * x;
+    return (((2.443315711809948E-005 * x_sq - 1.388731625493765E-003) * x_sq + 4.166664568298827E-002) * x_sq * x_sq) - (x_sq * 0.5) + 1;
+}
+
+/* Reduce postive x to range [0, PI/4] (Cody-Waite argument reduction).
+ * Returns 0 if x > (2^24 - 1);
+ * Sets octant_out=-1 on error. */
+INLINE f32 math_reduce_positive_to_pio4(f32 x, i32 *octant_out)
+{
+    i32 octant = -1;
+
+    if (x <= ((1 << 24) - 1)) {
+        octant = x * (4 / PI);  /* Integer part of x/(PI/4) */
+        f32 y = octant;
+        if (octant & 1) {
+            octant += 1;
+            y += 1.0;
+        }
+
+        /* Modulo 360 degrees */
+        octant &= 7;
+
+        if (x > 8192) {
+            x = x - y * (PI / 4);
+        } else {
+            /* Extended precision modular arithmetic */
+            x = ((x - y * 0.78515625) - y * 2.4187564849853515625e-4) - y * 3.77489497744594108e-8;
+        }
+    }
+
+    *octant_out = octant;
+    return x;
+}
+
 INLINE f32 math_sin(f32 x)
 {
-    const f32 c = 0.225f;
-    x -= (TAU * math_trunc(x / TAU));  /* [0, TAU] */
-    x += (TAU * (x < -PI)) - (TAU * (x > PI));  /* [-PI, PI] */
-    f32 y = (4.0f/PI) * x + (-4.0f/(PI*PI)) * x * math_fabs(x);
-    y = c * (y * math_fabs(y) - y) + y;
-    return y;
+    i32 sign = 1;
+
+    if (F32_IS_NAN(x)) {
+        return 0;
+    } else if (x < 0) {
+        sign = -1;
+        x = -x;
+    }
+
+    i32 octant;
+    x = math_reduce_positive_to_pio4(x, &octant);
+
+    /* Reflect in x axis */
+    if (octant > 3) {
+        sign = -sign;
+        octant -= 4;
+    }
+
+    switch (octant) {
+        case -1:            return 0;
+        case 1: case 2:     return math_cos_approx(x) * sign;
+        default:            return math_sin_approx(x) * sign;
+    }
 }

 INLINE f32 math_cos(f32 x)
 {
-    return math_sin(x + (PI / 2.0f));
+    i32 sign = 1;
+
+    if (F32_IS_NAN(x)) {
+        return 0;
+    } else if (x < 0) {
+        x = -x;
+    }
+
+    i32 octant;
+    x = math_reduce_positive_to_pio4(x, &octant);
+
+    /* Reflect in x axis */
+    if (octant > 3) {
+        sign = -sign;
+        octant -= 4;
+    }
+
+    if (octant > 1) {
+        sign = -sign;
+    }
+
+    switch (octant) {
+        case -1:            return 0;
+        case 1: case 2:     return math_sin_approx(x) * sign;
+        default:            return math_cos_approx(x) * sign;
+    }
+}
+
+INLINE f32 math_atan(f32 x)
+{
+    i32 sign = 1;
+    if (x < 0) {
+        sign = -1;
+        x = -x;
+    }
+
+    /* Reduce range */
+    f32 y;
+    if (x > 2.414213562373095) {  /* tan((PI / 8) * 3) */
+        y = PI / 2;
+        x = -(1.f / x);
+    } else if (x > 0.4142135623730950) {  /* tan(PI / 8) */
+        y = PI / 4;
+        x = (x - 1.f) / (x + 1.f);
+    } else {
+        y = 0;
+    }
+
+    f32 x_sq = x * x;
+    y += ((((8.05374449538e-2 * x_sq - 1.38776856032E-1) * x_sq + 1.99777106478E-1) * x_sq - 3.33329491539E-1) * x_sq * x + x);
+
+    return y * sign;
 }

-/* http://math.stackexchange.com/questions/1098487/atan2-faster-approximation */
-/* FIXME: Verify quadrants */
 INLINE f32 math_atan2(f32 y, f32 x)
 {
-    f32 x_abs = math_fabs(x);
-    f32 y_abs = math_fabs(y);
-    b32 swap = y_abs > x_abs;
-    f32 a = swap ? x_abs / y_abs : y_abs / x_abs;
-    f32 s = a * a;
-    f32 r = ((-0.0464964749 * s + 0.15931422) * s - 0.327622764) * s * a + a;
-    if (swap) { r = (PI / 2) - r; }
-    if (x < 0) { r = PI - r; }
-    if (y < 0) { r = -r; }
-    return r;
+    if (x == 0) {
+        if (y < 0) {
+            return -PI / 2;
+        } else if (y > 0) {
+            return PI / 2;
+        } else {
+            return 0;
+        }
+    } else if (y == 0) {
+        if (x < 0) {
+            return PI;
+        } else {
+            return 0;
+        }
+    } else {
+        f32 offset = (x < 0) * (PI - (2 * PI * (y < 0)));
+        return math_atan(y / x) + offset;
+    }
 }

 INLINE f32 math_asin(f32 x)
@ -555,15 +658,6 @@ INLINE struct v2 v2_norm(struct v2 a)
    return a;
 }

-INLINE struct v2 v2_norm_fast(struct v2 a)
-{
-    f32 l = v2_len_squared(a);
-    f32 r_sqrt = math_rsqrt_fast(l);
-    a.x *= r_sqrt;
-    a.y *= r_sqrt;
-    return a;
-}
-
 INLINE struct v2 v2_round(struct v2 a)
 {
    return V2(
@ -627,8 +721,7 @@ INLINE struct v2 v2_rotated(struct v2 v, f32 a)

 INLINE struct v2 v2_from_angle(f32 a)
 {
-    /* TODO: More precise trig functions to avoid unnecessary normalization */
-    return v2_norm(V2(math_cos(a), math_sin(a)));
+    return V2(math_cos(a), math_sin(a));
 }

 INLINE f32 v2_angle(struct v2 v)