replace trig functions with cephes approximations
This commit is contained in:
parent
2453ceb239
commit
36f3a3dbb6
199
src/math.h
199
src/math.h
@ -391,63 +391,166 @@ INLINE f32 math_rsqrt(f32 x)
|
||||
return ix_rsqrt_f32(x);
|
||||
}
|
||||
|
||||
/* From Quake III -
|
||||
* https://github.com/id-Software/Quake-III-Arena/blob/dbe4ddb10315479fc00086f08e25d968b4b43c49/code/game/q_math.c#L552
|
||||
*/
|
||||
INLINE f32 math_rsqrt_fast(f32 x)
|
||||
{
|
||||
const f32 three_halfs = 1.5f;
|
||||
f32 x2 = x * 0.5f;
|
||||
f32 y = x;
|
||||
i32 i = *(i32 *)&y;
|
||||
i = 0x5f3759df - (i >> 1);
|
||||
y = *(f32 *)&i;
|
||||
y *= three_halfs - (x2 * y * y);
|
||||
return y;
|
||||
}
|
||||
|
||||
/* ========================== *
|
||||
* Trig
|
||||
* ========================== */
|
||||
|
||||
/* Sine approximation using a parabola adjusted to minimize error, as described in
|
||||
* https://web.archive.org/web/20080228213915/http://www.devmaster.net/forums/showthread.php?t=5784
|
||||
*
|
||||
* https://www.desmos.com/calculator/gbtjvt2we8
|
||||
* c: adjustment weight
|
||||
* f(x): original parabola
|
||||
* g(x): adjusted parabola
|
||||
* h(x): error
|
||||
/* Some functions based on Cephes implementation (https://www.netlib.org/cephes/):
|
||||
* - math_sin_approx
|
||||
* - math_cos_approx
|
||||
* - math_reduce_positive_to_pio4
|
||||
* - math_atan
|
||||
*/
|
||||
|
||||
/* TODO: Vectorize */
|
||||
|
||||
/* Approximate sin in range [0, PI/4] */
|
||||
INLINE f32 math_sin_approx(f32 x)
|
||||
{
|
||||
f32 x_sq = x * x;
|
||||
return (((-1.9515295891E-4 * x_sq + 8.3321608736E-3) * x_sq - 1.6666654611E-1) * x_sq * x) + x;
|
||||
}
|
||||
|
||||
/* Approximate cos in range [0, PI/4] */
|
||||
INLINE f32 math_cos_approx(f32 x)
|
||||
{
|
||||
f32 x_sq = x * x;
|
||||
return (((2.443315711809948E-005 * x_sq - 1.388731625493765E-003) * x_sq + 4.166664568298827E-002) * x_sq * x_sq) - (x_sq * 0.5) + 1;
|
||||
}
|
||||
|
||||
/* Reduce postive x to range [0, PI/4] (Cody-Waite argument reduction).
|
||||
* Returns 0 if x > (2^24 - 1);
|
||||
* Sets octant_out=-1 on error. */
|
||||
INLINE f32 math_reduce_positive_to_pio4(f32 x, i32 *octant_out)
|
||||
{
|
||||
i32 octant = -1;
|
||||
|
||||
if (x <= ((1 << 24) - 1)) {
|
||||
octant = x * (4 / PI); /* Integer part of x/(PI/4) */
|
||||
f32 y = octant;
|
||||
if (octant & 1) {
|
||||
octant += 1;
|
||||
y += 1.0;
|
||||
}
|
||||
|
||||
/* Modulo 360 degrees */
|
||||
octant &= 7;
|
||||
|
||||
if (x > 8192) {
|
||||
x = x - y * (PI / 4);
|
||||
} else {
|
||||
/* Extended precision modular arithmetic */
|
||||
x = ((x - y * 0.78515625) - y * 2.4187564849853515625e-4) - y * 3.77489497744594108e-8;
|
||||
}
|
||||
}
|
||||
|
||||
*octant_out = octant;
|
||||
return x;
|
||||
}
|
||||
|
||||
INLINE f32 math_sin(f32 x)
|
||||
{
|
||||
const f32 c = 0.225f;
|
||||
x -= (TAU * math_trunc(x / TAU)); /* [0, TAU] */
|
||||
x += (TAU * (x < -PI)) - (TAU * (x > PI)); /* [-PI, PI] */
|
||||
f32 y = (4.0f/PI) * x + (-4.0f/(PI*PI)) * x * math_fabs(x);
|
||||
y = c * (y * math_fabs(y) - y) + y;
|
||||
return y;
|
||||
i32 sign = 1;
|
||||
|
||||
if (F32_IS_NAN(x)) {
|
||||
return 0;
|
||||
} else if (x < 0) {
|
||||
sign = -1;
|
||||
x = -x;
|
||||
}
|
||||
|
||||
i32 octant;
|
||||
x = math_reduce_positive_to_pio4(x, &octant);
|
||||
|
||||
/* Reflect in x axis */
|
||||
if (octant > 3) {
|
||||
sign = -sign;
|
||||
octant -= 4;
|
||||
}
|
||||
|
||||
switch (octant) {
|
||||
case -1: return 0;
|
||||
case 1: case 2: return math_cos_approx(x) * sign;
|
||||
default: return math_sin_approx(x) * sign;
|
||||
}
|
||||
}
|
||||
|
||||
INLINE f32 math_cos(f32 x)
|
||||
{
|
||||
return math_sin(x + (PI / 2.0f));
|
||||
i32 sign = 1;
|
||||
|
||||
if (F32_IS_NAN(x)) {
|
||||
return 0;
|
||||
} else if (x < 0) {
|
||||
x = -x;
|
||||
}
|
||||
|
||||
i32 octant;
|
||||
x = math_reduce_positive_to_pio4(x, &octant);
|
||||
|
||||
/* Reflect in x axis */
|
||||
if (octant > 3) {
|
||||
sign = -sign;
|
||||
octant -= 4;
|
||||
}
|
||||
|
||||
if (octant > 1) {
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
switch (octant) {
|
||||
case -1: return 0;
|
||||
case 1: case 2: return math_sin_approx(x) * sign;
|
||||
default: return math_cos_approx(x) * sign;
|
||||
}
|
||||
}
|
||||
|
||||
INLINE f32 math_atan(f32 x)
|
||||
{
|
||||
i32 sign = 1;
|
||||
if (x < 0) {
|
||||
sign = -1;
|
||||
x = -x;
|
||||
}
|
||||
|
||||
/* Reduce range */
|
||||
f32 y;
|
||||
if (x > 2.414213562373095) { /* tan((PI / 8) * 3) */
|
||||
y = PI / 2;
|
||||
x = -(1.f / x);
|
||||
} else if (x > 0.4142135623730950) { /* tan(PI / 8) */
|
||||
y = PI / 4;
|
||||
x = (x - 1.f) / (x + 1.f);
|
||||
} else {
|
||||
y = 0;
|
||||
}
|
||||
|
||||
f32 x_sq = x * x;
|
||||
y += ((((8.05374449538e-2 * x_sq - 1.38776856032E-1) * x_sq + 1.99777106478E-1) * x_sq - 3.33329491539E-1) * x_sq * x + x);
|
||||
|
||||
return y * sign;
|
||||
}
|
||||
|
||||
/* http://math.stackexchange.com/questions/1098487/atan2-faster-approximation */
|
||||
/* FIXME: Verify quadrants */
|
||||
INLINE f32 math_atan2(f32 y, f32 x)
|
||||
{
|
||||
f32 x_abs = math_fabs(x);
|
||||
f32 y_abs = math_fabs(y);
|
||||
b32 swap = y_abs > x_abs;
|
||||
f32 a = swap ? x_abs / y_abs : y_abs / x_abs;
|
||||
f32 s = a * a;
|
||||
f32 r = ((-0.0464964749 * s + 0.15931422) * s - 0.327622764) * s * a + a;
|
||||
if (swap) { r = (PI / 2) - r; }
|
||||
if (x < 0) { r = PI - r; }
|
||||
if (y < 0) { r = -r; }
|
||||
return r;
|
||||
if (x == 0) {
|
||||
if (y < 0) {
|
||||
return -PI / 2;
|
||||
} else if (y > 0) {
|
||||
return PI / 2;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
} else if (y == 0) {
|
||||
if (x < 0) {
|
||||
return PI;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
f32 offset = (x < 0) * (PI - (2 * PI * (y < 0)));
|
||||
return math_atan(y / x) + offset;
|
||||
}
|
||||
}
|
||||
|
||||
INLINE f32 math_asin(f32 x)
|
||||
@ -555,15 +658,6 @@ INLINE struct v2 v2_norm(struct v2 a)
|
||||
return a;
|
||||
}
|
||||
|
||||
INLINE struct v2 v2_norm_fast(struct v2 a)
|
||||
{
|
||||
f32 l = v2_len_squared(a);
|
||||
f32 r_sqrt = math_rsqrt_fast(l);
|
||||
a.x *= r_sqrt;
|
||||
a.y *= r_sqrt;
|
||||
return a;
|
||||
}
|
||||
|
||||
INLINE struct v2 v2_round(struct v2 a)
|
||||
{
|
||||
return V2(
|
||||
@ -627,8 +721,7 @@ INLINE struct v2 v2_rotated(struct v2 v, f32 a)
|
||||
|
||||
INLINE struct v2 v2_from_angle(f32 a)
|
||||
{
|
||||
/* TODO: More precise trig functions to avoid unnecessary normalization */
|
||||
return v2_norm(V2(math_cos(a), math_sin(a)));
|
||||
return V2(math_cos(a), math_sin(a));
|
||||
}
|
||||
|
||||
INLINE f32 v2_angle(struct v2 v)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user