#ifndef MATH_H #define MATH_H #include "intrinsics.h" #define PI ((f32)3.14159265358979323846) #define TAU ((f32)6.28318530717958647693) INLINE struct trs trs_from_mat3x3(struct mat3x3 m); INLINE struct trs trs_lerp(struct trs a, struct trs b, f32 t); /* ========================== * * Rounding * ========================== */ /* TODO: Don't use intrinsics for these. */ INLINE i32 math_round_f32(f32 f) { return ix_round_f32_to_i32(f); } INLINE i32 math_floor_f32(f32 f) { return ix_floor_f32_to_i32(f); } INLINE i32 math_ceil_f32(f32 f) { return ix_ceil_f32_to_i32(f); } INLINE i64 math_round_f64(f64 f) { return ix_round_f64_to_i64(f); } INLINE i64 math_floor_f64(f64 f) { return ix_floor_f64_to_i64(f); } INLINE i64 math_ceil_f64(f64 f) { return ix_ceil_f64_to_i64(f); } INLINE f32 math_mod_f32(f32 x, f32 m) { return x - m * (i32)(x / m); } INLINE f32 math_abs_f32(f32 f) { u32 truncated = *(u32 *)&f & 0x7FFFFFFF; return *(f32 *)&truncated; } INLINE f64 math_abs_f64(f64 f) { u64 truncated = *(u64 *)&f & 0x7FFFFFFFFFFFFFFF; return *(f64 *)&truncated; } INLINE i32 math_sign_f32(f32 f) { u32 bits = *(u32 *)&f; i32 sign_bit = bits & ((u32)1 << 31); return 1 + (sign_bit * -2); } INLINE i32 math_sign_f64(f64 f) { u64 bits = *(u64 *)&f; i32 sign_bit = bits & ((u64)1 << 31); return 1 + (sign_bit * -2); } /* ========================== * * Exponential * ========================== */ /* Taken from https://gist.github.com/orlp/3551590 */ INLINE u64 math_pow_u64(u64 base, u8 exp) { LOCAL_PERSIST const u8 highest_bit_set[] = { 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 255, /* Anything past 63 is a guaranteed overflow with base > 1 */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }; u64 result = 1; switch (highest_bit_set[exp]) { case 255: { /* 255 = overflow, return 0 */ if (base == 1) { return 1; } // if (base == -1) { // return 1 - 2 * (exp & 1); // } return 0; } break; case 6: { if (exp & 1) result *= base; exp >>= 1; base *= base; } FALLTHROUGH; case 5: { if (exp & 1) result *= base; exp >>= 1; base *= base; } FALLTHROUGH; case 4: { if (exp & 1) result *= base; exp >>= 1; base *= base; } FALLTHROUGH; case 3: { if (exp & 1) result *= base; exp >>= 1; base *= base; } FALLTHROUGH; case 2: { if (exp & 1) result *= base; exp >>= 1; base *= base; } FALLTHROUGH; case 1: { if (exp & 1) result *= base; } FALLTHROUGH; default: return result; } } /* From Quake III - * https://github.com/id-Software/Quake-III-Arena/blob/dbe4ddb10315479fc00086f08e25d968b4b43c49/code/game/q_math.c#L552 */ INLINE f32 math_rsqrt(f32 x) { const f32 three_halfs = 1.5f; f32 x2 = x * 0.5f; f32 y = x; i32 i = *(i32 *)&y; i = 0x5f3759df - (i >> 1); y = *(f32 *)&i; y *= three_halfs - (x2 * y * y); /* 1st iteration */ return y; } INLINE f32 math_sqrt(f32 x) { return x * math_rsqrt(x); } /* ========================== * * Lerp * ========================== */ INLINE f32 math_lerp_f32(f32 val0, f32 val1, f32 t) { return val0 + ((val1 - val0) * t); } INLINE f64 math_lerp_f64(f64 val0, f64 val1, f64 t) { return val0 + ((val1 - val0) * t); } INLINE f32 math_lerp_angle(f32 a, f32 b, f32 t) { f32 diff = math_mod_f32(b - a, TAU); diff = math_mod_f32(2.0f * diff, TAU) - diff; return a + diff * t; } /* ========================== * * Trig * ========================== */ /* Sine approximation using a parabola adjusted to minimize error, as described in * https://web.archive.org/web/20080228213915/http://www.devmaster.net/forums/showthread.php?t=5784 * * https://www.desmos.com/calculator/gbtjvt2we8 * c: adjustment weight * f(x): original parabola * g(x): adjusted parabola * h(x): error */ INLINE f32 math_sin(f32 x) { const f32 c = 0.225; x -= (TAU * (f32)math_floor_f32(x / TAU)); /* [0, TAU] */ x += (TAU * (x < -PI)) - (TAU * (x > PI)); /* [-PI, PI] */ f32 y = (4.0f/PI) * x + (-4.0f/(PI*PI)) * x * math_abs_f32(x); y = c * (y * math_abs_f32(y) - y) + y; return y; } INLINE f32 math_cos(f32 x) { return math_sin(x + (PI / 2.0f)); } /* https://mazzo.li/posts/vectorized-atan2.html */ INLINE f32 math_atan2(f32 x, f32 y) { const f32 a1 = 0.99997726f; const f32 a3 = -0.33262347f; const f32 a5 = 0.19354346f; const f32 a7 = -0.11643287f; const f32 a9 = 0.05265332f; const f32 a11 = -0.01172120f; /* Ensure input is in [-1, +1] */ b32 swap = math_abs_f32(x) < math_abs_f32(y); f32 s = (swap ? x : y) / (swap ? y : x); /* Approximate atan */ f32 s_sq = s*s; f32 res = s * (a1 + s_sq * (a3 + s_sq * (a5 + s_sq * (a7 + s_sq * (a9 + s_sq * a11))))); res = swap ? (s >= 0.0f ? (PI / 2.f) : -(PI / 2.f)) - res : res; /* Adjust quadrants */ if (x < 0.0f && y >= 0.0f) { res = PI + res; } /* 2nd quadrant */ else if (x <= 0.0f && y < 0.0f) { res = -PI + res; } /* 3rd quadrant */ return res; } INLINE f32 math_asin(f32 x) { /* TODO: Dedicated arcsin approximation */ return (PI / 2.0f) - math_atan2(x, math_sqrt(1.0f - (x*x))); } INLINE f32 math_acos(f32 x) { /* TODO: Dedicated arccos approximation */ return math_atan2(x, math_sqrt(1.0f - (x*x))); } /* ========================== * * V2 * ========================== */ INLINE struct v2 v2_mul(struct v2 a, f32 s) { return V2(a.x * s, a.y * s); } INLINE struct v2 v2_mul_v2(struct v2 a, struct v2 b) { return V2(a.x * b.x, a.y * b.y); } INLINE struct v2 v2_div(struct v2 a, f32 s) { f32 d = 1 / s; return V2(a.x * d, a.y * d); } INLINE struct v2 v2_div_v2(struct v2 a, struct v2 b) { return V2(a.x * (1 / b.x), a.y * (1 / b.y)); } INLINE struct v2 v2_neg(struct v2 a) { return V2(-a.x, -a.y); } INLINE struct v2 v2_add(struct v2 a, struct v2 b) { return V2(a.x + b.x, a.y + b.y); } INLINE struct v2 v2_sub(struct v2 a, struct v2 b) { return V2(a.x - b.x, a.y - b.y); } INLINE f32 v2_len(struct v2 a) { return math_sqrt(a.x * a.x + a.y * a.y); } INLINE f32 v2_len_squared(struct v2 a) { return a.x * a.x + a.y * a.y; } INLINE struct v2 v2_perp(struct v2 a) { return V2(-a.y, a.x); } INLINE struct v2 v2_norm(struct v2 a) { f32 len_squared = v2_len_squared(a); f32 r_sqrt = math_rsqrt(len_squared); a.x *= r_sqrt; a.y *= r_sqrt; return a; } INLINE f32 v2_dot(struct v2 a, struct v2 b) { return a.x * b.x + a.y * b.y; } INLINE f32 v2_wedge(struct v2 a, struct v2 b) { return a.x * b.y - a.y * b.x; } INLINE f32 v2_distance(struct v2 a, struct v2 b) { f32 dx = b.x - a.x; f32 dy = b.y - a.y; return math_sqrt(dx * dx + dy * dy); } INLINE b32 v2_eq(struct v2 a, struct v2 b) { return a.x == b.x && a.y == b.y; } INLINE struct v2 v2_lerp(struct v2 val0, struct v2 val1, f32 t) { struct v2 res; res.x = math_lerp_f32(val0.x, val1.x, t); res.y = math_lerp_f32(val0.y, val1.y, t); return res; } /* ========================== * * Mat3x3 * ========================== */ INLINE struct mat3x3 mat3x3_ident(void) { return (struct mat3x3) { .e = { { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 } } }; } INLINE struct mat3x3 mat3x3_mul(struct mat3x3 a, struct mat3x3 b) { f32 a00 = a.e[0][0], a01 = a.e[0][1], a02 = a.e[0][2], a10 = a.e[1][0], a11 = a.e[1][1], a12 = a.e[1][2], a20 = a.e[2][0], a21 = a.e[2][1], a22 = a.e[2][2], b00 = b.e[0][0], b01 = b.e[0][1], b02 = b.e[0][2], b10 = b.e[1][0], b11 = b.e[1][1], b12 = b.e[1][2], b20 = b.e[2][0], b21 = b.e[2][1], b22 = b.e[2][2]; struct mat3x3 res; res.e[0][0] = a00 * b00 + a10 * b01 + a20 * b02; res.e[0][1] = a01 * b00 + a11 * b01 + a21 * b02; res.e[0][2] = a02 * b00 + a12 * b01 + a22 * b02; res.e[1][0] = a00 * b10 + a10 * b11 + a20 * b12; res.e[1][1] = a01 * b10 + a11 * b11 + a21 * b12; res.e[1][2] = a02 * b10 + a12 * b11 + a22 * b12; res.e[2][0] = a00 * b20 + a10 * b21 + a20 * b22; res.e[2][1] = a01 * b20 + a11 * b21 + a21 * b22; res.e[2][2] = a02 * b20 + a12 * b21 + a22 * b22; return res; } INLINE struct mat3x3 mat3x3_from_translate(struct v2 v) { return (struct mat3x3) { .e = { {1, 0, 0}, {0, 1, 0}, {v.x, v.y, 1} } }; } INLINE struct mat3x3 mat3x3_translate(struct mat3x3 m, struct v2 v) { m.e[2][0] = m.e[0][0] * v.x + m.e[1][0] * v.y + m.e[2][0]; m.e[2][1] = m.e[0][1] * v.x + m.e[1][1] * v.y + m.e[2][1]; m.e[2][2] = m.e[0][2] * v.x + m.e[1][2] * v.y + m.e[2][2]; return m; } INLINE struct mat3x3 mat3x3_rotate(struct mat3x3 m, f32 angle) { f32 c = math_cos(angle); f32 s = math_sin(angle); struct mat3x3 res = m; f32 m00 = m.e[0][0], m10 = m.e[1][0], m01 = m.e[0][1], m11 = m.e[1][1], m02 = m.e[0][2], m12 = m.e[1][2]; res.e[0][0] = m00 * c + m10 * s; res.e[0][1] = m01 * c + m11 * s; res.e[0][2] = m02 * c + m12 * s; res.e[1][0] = m00 * -s + m10 * c; res.e[1][1] = m01 * -s + m11 * c; res.e[1][2] = m02 * -s + m12 * c; return res; } INLINE struct mat3x3 mat3x3_scale(struct mat3x3 m, struct v3 v) { m.e[0][0] *= v.x; m.e[0][1] *= v.x; m.e[0][2] *= v.x; m.e[1][0] *= v.y; m.e[1][1] *= v.y; m.e[1][2] *= v.y; m.e[2][0] *= v.z; m.e[2][1] *= v.z; m.e[2][2] *= v.z; return m; } INLINE struct mat3x3 mat3x3_from_trs(struct trs trs) { struct mat3x3 m = mat3x3_from_translate(trs.t); m = mat3x3_rotate(m, trs.r); m = mat3x3_scale(m, V3(trs.s.x, trs.s.y, 1)); return m; } INLINE struct mat3x3 mat3x3_trs(struct mat3x3 m, struct trs trs) { m = mat3x3_translate(m, trs.t); m = mat3x3_rotate(m, trs.r); m = mat3x3_scale(m, V3(trs.s.x, trs.s.y, 1)); return m; } INLINE struct mat3x3 mat3x3_trs_pivot_r(struct mat3x3 m, struct trs trs, struct v2 pivot) { m = mat3x3_translate(m, trs.t); m = mat3x3_rotate(m, trs.r); m = mat3x3_translate(m, v2_neg(pivot)); m = mat3x3_scale(m, V3(trs.s.x, trs.s.y, 1)); return m; } INLINE struct mat3x3 mat3x3_trs_pivot_rs(struct mat3x3 m, struct trs trs, struct v2 pivot) { m = mat3x3_translate(m, trs.t); m = mat3x3_rotate(m, trs.r); m = mat3x3_scale(m, V3(trs.s.x, trs.s.y, 1)); m = mat3x3_translate(m, v2_neg(pivot)); return m; } INLINE struct v3 mat3x3_mul_v3(struct mat3x3 m, struct v3 v) { struct v3 res; res.x = m.e[0][0] * v.x + m.e[1][0] * v.y + m.e[2][0] * v.z; res.y = m.e[0][1] * v.x + m.e[1][1] * v.y + m.e[2][1] * v.z; res.z = m.e[0][2] * v.x + m.e[1][2] * v.y + m.e[2][2] * v.z; return res; } /* Equivalent to multiplying by V3(v.x, v.y, 1.0) */ INLINE struct v2 mat3x3_mul_v2(struct mat3x3 m, struct v2 v) { struct v2 res; res.x = m.e[0][0] * v.x + m.e[1][0] * v.y + m.e[2][0]; res.y = m.e[0][1] * v.x + m.e[1][1] * v.y + m.e[2][1]; return res; } INLINE struct mat3x3 mat3x3_inverse(struct mat3x3 m) { f32 a = m.e[0][0], b = m.e[0][1], c = m.e[0][2], d = m.e[1][0], e = m.e[1][1], f = m.e[1][2], g = m.e[2][0], h = m.e[2][1], i = m.e[2][2]; struct mat3x3 res; res.e[0][0] = e * i - f * h; res.e[0][1] = -(b * i - h * c); res.e[0][2] = b * f - e * c; res.e[1][0] = -(d * i - g * f); res.e[1][1] = a * i - c * g; res.e[1][2] = -(a * f - d * c); res.e[2][0] = d * h - g * e; res.e[2][1] = -(a * h - g * b); res.e[2][2] = a * e - b * d; f32 det = 1.0f / (a * res.e[0][0] + b * res.e[1][0] + c * res.e[2][0]); res = mat3x3_scale(res, V3(det, det, det)); return res; } INLINE struct v2 mat3x3_get_right(struct mat3x3 m) { return V2(m.e[0][0], m.e[0][1]); } INLINE struct v2 mat3x3_get_left(struct mat3x3 m) { return V2(-m.e[0][0], -m.e[0][1]); } INLINE struct v2 mat3x3_get_up(struct mat3x3 m) { return V2(-m.e[1][0], -m.e[1][1]); } INLINE struct v2 mat3x3_get_down(struct mat3x3 m) { return V2(m.e[1][0], m.e[1][1]); } INLINE struct v2 mat3x3_get_pos(struct mat3x3 m) { return V2(m.e[2][0], m.e[2][1]); } INLINE f32 mat3x3_get_determinant(struct mat3x3 m) { return m.e[0][0] * m.e[1][1] - m.e[0][1] * m.e[1][0]; } INLINE f32 mat3x3_get_rot(struct mat3x3 m) { return math_atan2(m.e[0][0], m.e[0][1]); } INLINE struct v2 mat3x3_get_scale(struct mat3x3 m) { f32 det_sign = math_sign_f32(mat3x3_get_determinant(m)); struct v2 bx = V2(m.e[0][0], m.e[0][1]); struct v2 by = V2(m.e[1][0], m.e[1][1]); return V2(v2_len(bx), det_sign * v2_len(by)); } INLINE f32 mat3x3_get_skew(struct mat3x3 m) { f32 det = mat3x3_get_determinant(m); i32 det_sign = math_sign_f32(det); struct v2 bx_norm = v2_norm(V2(m.e[0][0], m.e[0][1])); struct v2 by_norm = v2_norm(V2(m.e[1][0], m.e[1][1])); by_norm = v2_mul(by_norm, det_sign); f32 dot = v2_dot(bx_norm, by_norm); return math_acos(dot) - (PI * 0.5f); } INLINE struct mat3x3 mat3x3_lerp(struct mat3x3 a, struct mat3x3 b, f32 t) { struct trs trs_a = trs_from_mat3x3(a); struct trs trs_b = trs_from_mat3x3(b); struct trs trs = trs_lerp(trs_a, trs_b, t); return mat3x3_from_trs(trs); } /* ========================== * * Mat4x4 * ========================== */ /* NOTE: Mat4x4 only used for projection matrix */ INLINE struct mat4x4 mat4x4_from_ortho(f32 left, f32 right, f32 bottom, f32 top, f32 near, f32 far) { struct mat4x4 m = {0}; f32 rl = 1.0f / (right - left); f32 tb = 1.0f / (top - bottom); f32 fn = -1.0f / (far - near); m.e[0][0] = 2.0f * rl; m.e[1][1] = 2.0f * tb; m.e[2][2] = 2.0f * fn; m.e[3][0] = -(right + left) * rl; m.e[3][1] = -(top + bottom) * tb; m.e[3][2] = (far + near) * fn; m.e[3][3] = 1.0f; return m; } INLINE struct mat4x4 mat4x4_mul(struct mat4x4 m1, struct mat4x4 m2) { f32 a00 = m1.e[0][0], a01 = m1.e[0][1], a02 = m1.e[0][2], a03 = m1.e[0][3], a10 = m1.e[1][0], a11 = m1.e[1][1], a12 = m1.e[1][2], a13 = m1.e[1][3], a20 = m1.e[2][0], a21 = m1.e[2][1], a22 = m1.e[2][2], a23 = m1.e[2][3], a30 = m1.e[3][0], a31 = m1.e[3][1], a32 = m1.e[3][2], a33 = m1.e[3][3], b00 = m2.e[0][0], b01 = m2.e[0][1], b02 = m2.e[0][2], b03 = m2.e[0][3], b10 = m2.e[1][0], b11 = m2.e[1][1], b12 = m2.e[1][2], b13 = m2.e[1][3], b20 = m2.e[2][0], b21 = m2.e[2][1], b22 = m2.e[2][2], b23 = m2.e[2][3], b30 = m2.e[3][0], b31 = m2.e[3][1], b32 = m2.e[3][2], b33 = m2.e[3][3]; struct mat4x4 res; res.e[0][0] = a00 * b00 + a10 * b01 + a20 * b02 + a30 * b03; res.e[0][1] = a01 * b00 + a11 * b01 + a21 * b02 + a31 * b03; res.e[0][2] = a02 * b00 + a12 * b01 + a22 * b02 + a32 * b03; res.e[0][3] = a03 * b00 + a13 * b01 + a23 * b02 + a33 * b03; res.e[1][0] = a00 * b10 + a10 * b11 + a20 * b12 + a30 * b13; res.e[1][1] = a01 * b10 + a11 * b11 + a21 * b12 + a31 * b13; res.e[1][2] = a02 * b10 + a12 * b11 + a22 * b12 + a32 * b13; res.e[1][3] = a03 * b10 + a13 * b11 + a23 * b12 + a33 * b13; res.e[2][0] = a00 * b20 + a10 * b21 + a20 * b22 + a30 * b23; res.e[2][1] = a01 * b20 + a11 * b21 + a21 * b22 + a31 * b23; res.e[2][2] = a02 * b20 + a12 * b21 + a22 * b22 + a32 * b23; res.e[2][3] = a03 * b20 + a13 * b21 + a23 * b22 + a33 * b23; res.e[3][0] = a00 * b30 + a10 * b31 + a20 * b32 + a30 * b33; res.e[3][1] = a01 * b30 + a11 * b31 + a21 * b32 + a31 * b33; res.e[3][2] = a02 * b30 + a12 * b31 + a22 * b32 + a32 * b33; res.e[3][3] = a03 * b30 + a13 * b31 + a23 * b32 + a33 * b33; return res; } /* ========================== * * Trs * ========================== */ INLINE struct trs trs_lerp(struct trs a, struct trs b, f32 t) { struct trs res; res.t = v2_lerp(a.t, b.t, t); res.r = math_lerp_angle(a.r, b.r, t); res.s = v2_lerp(a.s, b.s, t); return res; } INLINE struct trs trs_from_mat3x3(struct mat3x3 m) { struct trs trs = { 0 }; trs.t = mat3x3_get_pos(m); trs.r = mat3x3_get_rot(m); trs.s = mat3x3_get_scale(m); return trs; } /* ========================== * * Quad * ========================== */ INLINE struct quad quad_from_rect(struct rect rect) { return (struct quad) { (struct v2) { rect.x, rect.y }, /* Top left */ (struct v2) { rect.x + rect.width, rect.y }, /* Top right */ (struct v2) { rect.x + rect.width, rect.y + rect.height }, /* Bottom right */ (struct v2) { rect.x, rect.y + rect.height }, /* Bottom left */ }; } INLINE struct quad quad_from_line(struct v2 start, struct v2 end, f32 thickness) { f32 width = thickness / 2.f; struct v2 rel = v2_sub(end, start); struct v2 dir = v2_norm(rel); struct v2 dir_perp = v2_perp(dir); struct v2 left = v2_mul(dir_perp, -width); struct v2 right = v2_mul(dir_perp, width); return (struct quad) { .p1 = v2_add(start, left), .p2 = v2_add(start, right), .p3 = v2_add(end, right), .p4 = v2_add(end, left) }; } INLINE struct quad quad_from_ray(struct v2 pos, struct v2 rel, f32 thickness) { struct v2 end = v2_add(pos, rel); return quad_from_line(pos, end, thickness); } INLINE struct quad quad_scale(struct quad q, f32 s) { q.p1 = v2_mul(q.p1, s); q.p2 = v2_mul(q.p2, s); q.p3 = v2_mul(q.p3, s); q.p4 = v2_mul(q.p4, s); return q; } INLINE struct quad quad_mul_mat3x3(struct quad quad, struct mat3x3 m) { return (struct quad) { mat3x3_mul_v2(m, quad.p1), mat3x3_mul_v2(m, quad.p2), mat3x3_mul_v2(m, quad.p3), mat3x3_mul_v2(m, quad.p4) }; } #endif