replace trig functions with cephes approximations

2024-08-28 14:33:33 -05:00 · 2024-08-28 14:33:33 -05:00 · 36f3a3dbb6
commit 36f3a3dbb6
parent 2453ceb239
1 changed files with 146 additions and 53 deletions
--- a/src/math.h
+++ b/src/math.h
@ -391,63 +391,166 @@ INLINE f32 math_rsqrt(f32 x)
    return ix_rsqrt_f32(x);
 }
 /* From Quake III -
 * https://github.com/id-Software/Quake-III-Arena/blob/dbe4ddb10315479fc00086f08e25d968b4b43c49/code/game/q_math.c#L552
 */
 INLINE f32 math_rsqrt_fast(f32 x)
 {
    const f32 three_halfs = 1.5f;
    f32 x2 = x * 0.5f;
    f32 y = x;
    i32 i = *(i32 *)&y;
    i = 0x5f3759df - (i >> 1);
    y = *(f32 *)&i;
    y *= three_halfs - (x2 * y * y);
    return y;
 }
 /* ========================== *
 * Trig
 * ========================== */
-/* Sine approximation using a parabola adjusted to minimize error, as described in
+/* Some functions based on Cephes implementation (https://www.netlib.org/cephes/):
- * https://web.archive.org/web/20080228213915/http://www.devmaster.net/forums/showthread.php?t=5784
+ * - math_sin_approx
- *
+ * - math_cos_approx
- * https://www.desmos.com/calculator/gbtjvt2we8
+ * - math_reduce_positive_to_pio4
- * c: adjustment weight
+ * - math_atan
 * f(x): original parabola
 * g(x): adjusted parabola
 * h(x): error
 */
 /* TODO: Vectorize */
 /* Approximate sin in range [0, PI/4] */
 INLINE f32 math_sin_approx(f32 x)
 {
    f32 x_sq = x * x;
    return (((-1.9515295891E-4 * x_sq + 8.3321608736E-3) * x_sq - 1.6666654611E-1) * x_sq * x) + x;
 }
 /* Approximate cos in range [0, PI/4] */
 INLINE f32 math_cos_approx(f32 x)
 {
    f32 x_sq = x * x;
    return (((2.443315711809948E-005 * x_sq - 1.388731625493765E-003) * x_sq + 4.166664568298827E-002) * x_sq * x_sq) - (x_sq * 0.5) + 1;
 }
 /* Reduce postive x to range [0, PI/4] (Cody-Waite argument reduction).
 * Returns 0 if x > (2^24 - 1);
 * Sets octant_out=-1 on error. */
 INLINE f32 math_reduce_positive_to_pio4(f32 x, i32 *octant_out)
 {
    i32 octant = -1;
    if (x <= ((1 << 24) - 1)) {
        octant = x * (4 / PI);  /* Integer part of x/(PI/4) */
        f32 y = octant;
        if (octant & 1) {
            octant += 1;
            y += 1.0;
        }
        /* Modulo 360 degrees */
        octant &= 7;
        if (x > 8192) {
            x = x - y * (PI / 4);
        } else {
            /* Extended precision modular arithmetic */
            x = ((x - y * 0.78515625) - y * 2.4187564849853515625e-4) - y * 3.77489497744594108e-8;
        }
    }
    *octant_out = octant;
    return x;
 }
 INLINE f32 math_sin(f32 x)
 {
-    const f32 c = 0.225f;
+    i32 sign = 1;
-    x -= (TAU * math_trunc(x / TAU));  /* [0, TAU] */
+
-    x += (TAU * (x < -PI)) - (TAU * (x > PI));  /* [-PI, PI] */
+    if (F32_IS_NAN(x)) {
-    f32 y = (4.0f/PI) * x + (-4.0f/(PI*PI)) * x * math_fabs(x);
+        return 0;
-    y = c * (y * math_fabs(y) - y) + y;
+    } else if (x < 0) {
-    return y;
+        sign = -1;
        x = -x;
    }
    i32 octant;
    x = math_reduce_positive_to_pio4(x, &octant);
    /* Reflect in x axis */
    if (octant > 3) {
        sign = -sign;
        octant -= 4;
    }
    switch (octant) {
        case -1:            return 0;
        case 1: case 2:     return math_cos_approx(x) * sign;
        default:            return math_sin_approx(x) * sign;
    }
 }
 INLINE f32 math_cos(f32 x)
 {
-    return math_sin(x + (PI / 2.0f));
+    i32 sign = 1;
    if (F32_IS_NAN(x)) {
        return 0;
    } else if (x < 0) {
        x = -x;
    }
    i32 octant;
    x = math_reduce_positive_to_pio4(x, &octant);
    /* Reflect in x axis */
    if (octant > 3) {
        sign = -sign;
        octant -= 4;
    }
    if (octant > 1) {
        sign = -sign;
    }
    switch (octant) {
        case -1:            return 0;
        case 1: case 2:     return math_sin_approx(x) * sign;
        default:            return math_cos_approx(x) * sign;
    }
 }
 INLINE f32 math_atan(f32 x)
 {
    i32 sign = 1;
    if (x < 0) {
        sign = -1;
        x = -x;
    }
    /* Reduce range */
    f32 y;
    if (x > 2.414213562373095) {  /* tan((PI / 8) * 3) */
        y = PI / 2;
        x = -(1.f / x);
    } else if (x > 0.4142135623730950) {  /* tan(PI / 8) */
        y = PI / 4;
        x = (x - 1.f) / (x + 1.f);
    } else {
        y = 0;
    }
    f32 x_sq = x * x;
    y += ((((8.05374449538e-2 * x_sq - 1.38776856032E-1) * x_sq + 1.99777106478E-1) * x_sq - 3.33329491539E-1) * x_sq * x + x);
    return y * sign;
 }
 /* http://math.stackexchange.com/questions/1098487/atan2-faster-approximation */
 /* FIXME: Verify quadrants */
 INLINE f32 math_atan2(f32 y, f32 x)
 {
-    f32 x_abs = math_fabs(x);
+    if (x == 0) {
-    f32 y_abs = math_fabs(y);
+        if (y < 0) {
-    b32 swap = y_abs > x_abs;
+            return -PI / 2;
-    f32 a = swap ? x_abs / y_abs : y_abs / x_abs;
+        } else if (y > 0) {
-    f32 s = a * a;
+            return PI / 2;
-    f32 r = ((-0.0464964749 * s + 0.15931422) * s - 0.327622764) * s * a + a;
+        } else {
-    if (swap) { r = (PI / 2) - r; }
+            return 0;
-    if (x < 0) { r = PI - r; }
+        }
-    if (y < 0) { r = -r; }
+    } else if (y == 0) {
-    return r;
+        if (x < 0) {
            return PI;
        } else {
            return 0;
        }
    } else {
        f32 offset = (x < 0) * (PI - (2 * PI * (y < 0)));
        return math_atan(y / x) + offset;
    }
 }
 INLINE f32 math_asin(f32 x)
@ -555,15 +658,6 @@ INLINE struct v2 v2_norm(struct v2 a)
    return a;
 }
 INLINE struct v2 v2_norm_fast(struct v2 a)
 {
    f32 l = v2_len_squared(a);
    f32 r_sqrt = math_rsqrt_fast(l);
    a.x *= r_sqrt;
    a.y *= r_sqrt;
    return a;
 }
 INLINE struct v2 v2_round(struct v2 a)
 {
    return V2(
@ -627,8 +721,7 @@ INLINE struct v2 v2_rotated(struct v2 v, f32 a)
 INLINE struct v2 v2_from_angle(f32 a)
 {
-    /* TODO: More precise trig functions to avoid unnecessary normalization */
+    return V2(math_cos(a), math_sin(a));
    return v2_norm(V2(math_cos(a), math_sin(a)));
 }
 INLINE f32 v2_angle(struct v2 v)