#ifndef MATH_H
#define MATH_H

/* Math functions are default 32 bit (f32, i32, etc) unless specified */

#include "intrinsics.h"

INLINE f32 math_sqrt(f32 x);

/* ========================== *
 * Float rounding
 * ========================== */

/* TODO: Don't use intrinsics for these. */

/* Round */

INLINE f32 math_round(f32 f)
{
    return ix_round_f32_to_f32(f);
}

INLINE f64 math_round64(f64 f)
{
    return ix_round_f64_to_f64(f);
}

INLINE i32 math_round_to_int(f32 f)
{
    return ix_round_f32_to_i32(f);
}

INLINE i64 math_round_to_int64(f64 f)
{
    return ix_round_f64_to_i64(f);
}

/* Floor */

INLINE f32 math_floor(f32 f)
{
    return ix_floor_f32_to_f32(f);
}

INLINE f64 math_floor64(f64 f)
{
    return ix_floor_f64_to_f64(f);
}

INLINE i32 math_floor_to_int(f32 f)
{
    return ix_floor_f32_to_i32(f);
}


INLINE i64 math_floor_to_int64(f64 f)
{
    return ix_floor_f64_to_i64(f);
}

/* Ceil */

INLINE f32 math_ceil(f32 f)
{
    return ix_ceil_f32_to_f32(f);
}

INLINE f64 math_ceil64(f64 f)
{
    return ix_ceil_f64_to_f64(f);
}

INLINE i32 math_ceil_to_int(f32 f)
{
    return ix_ceil_f32_to_i32(f);
}

INLINE i64 math_ceil_to_int64(f64 f)
{
    return ix_ceil_f64_to_i64(f);
}

/* Truncate */

INLINE f32 math_trunc(f32 f)
{
    return ix_trunc_f32_to_f32(f);
}

INLINE f64 math_trunc64(f64 f)
{
    return ix_trunc_f64_to_f64(f);
}

/* ========================== *
 * Float properties
 * ========================== */

INLINE f32 math_fmod(f32 x, f32 m)
{
    return x - m * math_trunc(x / m);
}

INLINE f64 math_fmod64(f64 x, f64 m)
{
    return x - m * math_trunc64(x / m);
}

INLINE f32 math_fabs(f32 f)
{
    u32 truncated = *(u32 *)&f & 0x7FFFFFFF;
    return *(f32 *)&truncated;
}

INLINE f64 math_fabs64(f64 f)
{
    u64 truncated = *(u64 *)&f & 0x7FFFFFFFFFFFFFFFULL;
    return *(f64 *)&truncated;
}

INLINE i32 math_fsign(f32 f)
{
    u32 sign_bit = (*(u32 *)&f >> 31) & 1;
    return 1 + -((i32)(sign_bit << 1));
}

INLINE i64 math_fsign64(f64 f)
{
    u64 sign_bit = (*(u64 *)&f >> 63) & 1;
    return 1 + -((i64)(sign_bit << 1));
}

/* ========================== *
 * Integer abs
 * ========================== */

INLINE u32 math_abs_i32(i32 v)
{
    return v * ((v >= 0) - (v < 0));
}

INLINE u64 math_abs_i64(i64 v)
{
    return v * ((v >= 0) - (v < 0));
}

/* ========================== *
 * Exponential
 * ========================== */

/* Taken from https://gist.github.com/orlp/3551590 */
INLINE u64 math_pow_u64(u64 base, u8 exp)
{
    LOCAL_PERSIST const u8 highest_bit_set[] = {
        0, 1, 2, 2, 3, 3, 3, 3,
        4, 4, 4, 4, 4, 4, 4, 4,
        5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5,
        6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 255, /* Anything past 63 is a guaranteed overflow with base > 1 */
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255,
    };

    u64 result = 1;

    switch (highest_bit_set[exp]) {
        case 255: {
            /* 255 = overflow, return 0 */
            if (base == 1) {
                return 1;
            }
            // if (base == -1) {
            //     return 1 - 2 * (exp & 1);
            // }
            return 0;
        } break;
        case 6: {
            if (exp & 1) result *= base;
            exp >>= 1;
            base *= base;
        } FALLTHROUGH;
        case 5: {
            if (exp & 1) result *= base;
            exp >>= 1;
            base *= base;
        } FALLTHROUGH;
        case 4: {
            if (exp & 1) result *= base;
            exp >>= 1;
            base *= base;
        } FALLTHROUGH;
        case 3: {
            if (exp & 1) result *= base;
            exp >>= 1;
            base *= base;
        } FALLTHROUGH;
        case 2: {
            if (exp & 1) result *= base;
            exp >>= 1;
            base *= base;
        } FALLTHROUGH;
        case 1: {
            if (exp & 1) result *= base;
        } FALLTHROUGH;
        default: return result;
    }
}

/* Based on FreeBSD's implementation
 * https://github.com/freebsd/freebsd-src/blob/main/lib/msun/src/e_logf.c */
INLINE f32 math_ln(f32 x)
{
    LOCAL_PERSIST const f32 ln2_hi = 6.9313812256e-01f;
    LOCAL_PERSIST const f32 ln2_lo = 9.0580006145e-06f;

    i32 x_int = *(i32 *)&x;

    i32 k = 0;
    if (x_int < 0x00800000) {
        f32 two_p25 = 3.3554432000e+07;
        if ((x_int & 0x7fffffff) == 0) {
            /* Return -inf if x is 0 */
            return -two_p25 / 0;
        } else if (x_int < 0) {
            /* Return NaN if x is negative */
            return (x - x) / 0;
        }
        k -= 25;
        x *= two_p25;
        x_int = *(i32 *)&x;
    } else if (x_int >= 0x7f800000) {
        return x + x;
    }

    k += (x_int >> 23) - 127;
    x_int &= 0x007fffff;
    i32 i = (x_int + (0x95f64 << 3)) & 0x800000;
    i32 x_int_normalized = x_int | (i ^ 0x3f800000);
    x = *(f32 *)&x_int_normalized - 1.0f;
    k += (i >> 23);

    if ((0x007fffff & (0x8000 + x_int)) < 0xc000) {
        if (x == 0) {
            if (k == 0) {
                return 0;
            } else {
                return (f32)k * ln2_hi + (f32)k * ln2_lo;
            }
        }
        f32 r = x * x * (0.5f - 0.33333333333333333f * x);
        if (k == 0) {
            return x - r;
        } else {
            return (f32)k * ln2_hi - ((r - (f32)k * ln2_lo) - x);
        }
    }

    f32 s = x / (2.0f + x);
    f32 z = s * s;
    f32 w = z * z;
    f32 r = (z * (0.66666662693f + w * 0.28498786688f)) + (w * (0.40000972152f + w * 0.24279078841f));
    if (((x_int - (0x6147a << 3)) | ((0x6b851 << 3) - x_int)) > 0) {
        f32 hfsq = 0.5f * x * x;
        if (k == 0) {
            return x - (hfsq - s * (hfsq + r));
        } else {
            return (f32)k * ln2_hi - ((hfsq - (s * (hfsq + r) + (f32)k * ln2_lo)) - x);
        }
    } else {
        if (k == 0) {
            return x - s * (x - r);
        } else {
            return (f32)k * ln2_hi - ((s * (x - r) - (f32)k * ln2_lo) - x);
        }
    }
}

/* Based on FreeBSD's implementation
 * https://github.com/freebsd/freebsd-src/blob/main/lib/msun/src/e_expf.c */
INLINE f32 math_exp(f32 x)
{
    LOCAL_PERSIST const f32 half[2] = { 0.5, -0.5 };
    LOCAL_PERSIST const f32 o_threshold = 8.8721679688e+01f;
    LOCAL_PERSIST const f32 u_threshold = -1.0397208405e+02f;
    LOCAL_PERSIST const f32 ln2_hi[2] = { 6.9314575195e-01f, -6.9314575195e-01f };
    LOCAL_PERSIST const f32 ln2_lo[2] = { 1.4286067653e-06f, -1.4286067653e-06f };
    LOCAL_PERSIST const f32 inv_ln2 = 1.4426950216e+00f;
    LOCAL_PERSIST const f32 huge = 1.0e+30f;
    LOCAL_PERSIST const f32 two_m100 = 7.8886090522e-31f;

    u32 x_uint = *(u32 *)&x;
    i32 x_sign_bit = (i32)((x_uint >> 31) & 1);
    x_uint &= 0x7fffffff;

    /* Filter out non-finite argument */
    if (x_uint >= 0x42b17218) { /* if |x|>=88.721... */
        if (x_uint > 0x7f800000) {
            return x + x;  /* NaN */
        } else if (x_uint == 0x7f800000) {
            return (x_sign_bit == 0) ? x : 0;
        }
        if (x > o_threshold) {
            /* Overflow */
            return F32_INFINITY;
        } else if (x < u_threshold) {
            /* Underflow */
            return two_m100 * two_m100;
        }
    }

    /* Argument reduction */
    i32 k = 0;
    f32 hi = 0;
    f32 lo = 0;
    if (x_uint > 0x3eb17218) {
        if (x_uint < 0x3F851592) {
            hi = x - ln2_hi[x_sign_bit];
            lo = ln2_lo[x_sign_bit];
            k = 1 - x_sign_bit - x_sign_bit;
        } else {
            k = (i32)(inv_ln2 * x + half[x_sign_bit]);
            hi = x - (f32)k * ln2_hi[0];
            lo = (f32)k * ln2_lo[0];
        }
        x = hi - lo;
    } else if (x_uint < 0x39000000)  {
        if (huge + x > 1.0f) {
            return 1.0f + x;
        }
    } else {
        k = 0;
    }

    f32 two_pk;
    if (k >= -125) {
        u32 temp = ((u32)(0x7f + k)) << 23;
        two_pk = *(f32 *)&temp;
    } else {
        u32 temp = ((u32)(0x7f + (k + 100))) << 23;
        two_pk = *(f32 *)&temp;
    }

    f32 t = x * x;
    f32 c = x - t * (1.6666625440e-1f + t * -2.7667332906e-3f);
    if (k == 0) {
        return 1.0f - ((x * c) / (c - 2.0f) - x);
    } else {
        f32 y = 1.0f - ((lo - (x * c)/(2.0f - c))-hi);
        if (k >= -125) {
            if (k==128) {
                u32 temp = 0x7f800000;
                return y * 2.0f * (*(f32 *)&temp);
            }
            return y * two_pk;
        } else {
            return y * two_pk * two_m100;
        }
    }
}

INLINE f32 math_pow(f32 a, f32 b)
{
    if (a >= 0) {
        /* a is positive */
        return math_exp(math_ln(a) * b);
    } else {
        /* a is negative */
        f32 res_sign = math_round_to_int(b) % 2 == 0 ? 1 : -1;
        return math_exp(math_ln(-a) * b) * res_sign;
    }
}

INLINE f32 math_sqrt(f32 x)
{
    return ix_sqrt_f32(x);
}

INLINE f32 math_sqrt64(f32 x)
{
    return ix_sqrt_f64(x);
}

INLINE f32 math_rsqrt(f32 x)
{
    return ix_rsqrt_f32(x);
}

/* ========================== *
 * Trig
 * ========================== */

/* Functions based on Cephes implementation (https://www.netlib.org/cephes/):
 * - math_sin_approx
 * - math_cos_approx
 * - math_reduce_positive_to_pio4
 * - math_atan
 */

/* TODO: Vectorize */

/* Approximate sin in range [0, PI/4] */
INLINE f32 math_sin_approx(f32 x)
{
    f32 x_sq = x * x;
    return (((-1.9515295891E-4 * x_sq + 8.3321608736E-3) * x_sq - 1.6666654611E-1) * x_sq * x) + x;
}

/* Approximate cos in range [0, PI/4] */
INLINE f32 math_cos_approx(f32 x)
{
    f32 x_sq = x * x;
    return (((2.443315711809948E-005 * x_sq - 1.388731625493765E-003) * x_sq + 4.166664568298827E-002) * x_sq * x_sq) - (x_sq * 0.5) + 1;
}

/* Reduce postive x to range [0, PI/4] (Cody-Waite argument reduction).
 * Returns 0 if x > (2^24 - 1);
 * Sets octant_out=-1 on error. */
INLINE f32 math_reduce_positive_to_pio4(f32 x, i32 *octant_out)
{
    i32 octant = -1;

    if (x <= ((1 << 24) - 1)) {
        octant = x * (4 / PI);  /* Integer part of x/(PI/4) */
        f32 y = octant;
        if (octant & 1) {
            octant += 1;
            y += 1.0;
        }

        /* Modulo 360 degrees */
        octant &= 7;

        if (x > 8192) {
            x = x - y * (PI / 4);
        } else {
            /* Extended precision modular arithmetic */
            x = ((x - y * 0.78515625) - y * 2.4187564849853515625e-4) - y * 3.77489497744594108e-8;
        }
    }

    *octant_out = octant;
    return x;
}

INLINE f32 math_sin(f32 x)
{
    f32 sign = 1;

    if (F32_IS_NAN(x)) {
        return 0;
    } else if (x < 0) {
        sign = -1;
        x = -x;
    }

    i32 octant;
    x = math_reduce_positive_to_pio4(x, &octant);

    /* Reflect in x axis */
    if (octant > 3) {
        sign = -sign;
        octant -= 4;
    }

    switch (octant) {
        case -1:            return 0;
        case 1: case 2:     return math_cos_approx(x) * sign;
        default:            return math_sin_approx(x) * sign;
    }
}

INLINE f32 math_cos(f32 x)
{
    f32 sign = 1;

    if (F32_IS_NAN(x)) {
        return 0;
    } else if (x < 0) {
        x = -x;
    }

    i32 octant;
    x = math_reduce_positive_to_pio4(x, &octant);

    /* Reflect in x axis */
    if (octant > 3) {
        sign = -sign;
        octant -= 4;
    }

    if (octant > 1) {
        sign = -sign;
    }

    switch (octant) {
        case -1:            return 0;
        case 1: case 2:     return math_sin_approx(x) * sign;
        default:            return math_cos_approx(x) * sign;
    }
}

INLINE f32 math_atan(f32 x)
{
    f32 sign = 1;
    if (x < 0) {
        sign = -1;
        x = -x;
    }

    /* Reduce range */
    f32 y;
    if (x > 2.414213562373095) {  /* tan((PI / 8) * 3) */
        y = PI / 2;
        x = -(1.f / x);
    } else if (x > 0.4142135623730950) {  /* tan(PI / 8) */
        y = PI / 4;
        x = (x - 1.f) / (x + 1.f);
    } else {
        y = 0;
    }

    f32 x_sq = x * x;
    y += ((((8.05374449538e-2 * x_sq - 1.38776856032E-1) * x_sq + 1.99777106478E-1) * x_sq - 3.33329491539E-1) * x_sq * x + x);

    return y * sign;
}

INLINE f32 math_atan2(f32 y, f32 x)
{
    f32 res;
    if (x == 0) {
        if (y < 0) {
            res = 3 * PI / 2;
        } else if (y == 0) {
            res = 0;
        } else {
            res = PI / 2;
        }
    } else if (y == 0) {
        if (x < 0) {
            res = PI;
        } else {
            res = 0;
        }
    } else {
        f32 offset;
        if (x < 0) {
            offset = PI;
        } else if (y < 0) {
            offset = PI * 2;
        } else {
            offset = 0;
        }
        res = math_atan(y / x) + offset;
    }
    return res;
}

INLINE f32 math_asin(f32 x)
{
    /* TODO: Dedicated arcsin approximation */
    return math_atan2(x, math_sqrt(1.0f - (x * x)));
}

INLINE f32 math_acos(f32 x)
{
    /* TODO: Dedicated arccos approximation */
    return (PI / 2.0f) - math_atan2(x, math_sqrt(1.0f - (x * x)));
}

/* Returns angle in range [-PI, PI] */
INLINE f32 math_unwind_angle(f32 a)
{
    f32 d = math_fmod(a, TAU);
    return math_fmod(2.0f * d, TAU) - d;
}

/* ========================== *
 * Lerp
 * ========================== */

INLINE f32 math_lerp_f32(f32 val0, f32 val1, f32 t)
{
    return val0 + ((val1 - val0) * t);
}

INLINE f64 math_lerp_f64(f64 val0, f64 val1, f64 t)
{
    return val0 + ((val1 - val0) * t);
}

INLINE i32 math_lerp_i32(i32 val0, i32 val1, f32 t)
{
    return val0 + math_round_to_int(((f32)(val1 - val0) * t));
}

INLINE i64 math_lerp_i64(i64 val0, i64 val1, f64 t)
{
    return val0 + math_round_to_int64(((f64)(val1 - val0) * t));
}

INLINE f32 math_lerp_angle(f32 a, f32 b, f32 t) {
    f32 diff = math_unwind_angle(b - a);
    return a + diff * t;
}

/* ========================== *
 * V2
 * ========================== */

INLINE struct v2 v2_mul(struct v2 a, f32 s)
{
    return V2(a.x * s, a.y * s);
}

INLINE struct v2 v2_mul_v2(struct v2 a, struct v2 b)
{
    return V2(a.x * b.x, a.y * b.y);
}

INLINE struct v2 v2_div(struct v2 a, f32 s)
{
    f32 d = 1 / s;
    return V2(a.x * d, a.y * d);
}

INLINE struct v2 v2_div_v2(struct v2 a, struct v2 b)
{
    return V2(a.x * (1 / b.x), a.y * (1 / b.y));
}

INLINE struct v2 v2_neg(struct v2 a)
{
    return V2(-a.x, -a.y);
}

INLINE struct v2 v2_add(struct v2 a, struct v2 b)
{
    return V2(a.x + b.x, a.y + b.y);
}

INLINE struct v2 v2_sub(struct v2 a, struct v2 b)
{
    return V2(a.x - b.x, a.y - b.y);
}

INLINE f32 v2_len(struct v2 a)
{
    return math_sqrt(a.x * a.x + a.y * a.y);
}

INLINE f32 v2_len_sq(struct v2 a)
{
    return a.x * a.x + a.y * a.y;
}

INLINE f32 v2_dot(struct v2 a, struct v2 b)
{
    return a.x * b.x + a.y * b.y;
}

/* Returns signed area between vectors (positive in clockwise direction)
 * AKA perpendicular dot product
 * AKA 2d cross-product */
INLINE f32 v2_wedge(struct v2 a, struct v2 b)
{
    return a.x * b.y - a.y * b.x;
}

/* Clockwise (right) perpendicular vector */
INLINE struct v2 v2_perp(struct v2 a)
{
    return V2(-a.y, a.x);
}

/* Clockwise (right) perpendicular vector scaled by s */
INLINE struct v2 v2_perp_mul(struct v2 a, f32 s)
{
    return V2(s * -a.y, s * a.x);
}

INLINE struct v2 v2_perp_towards_dir(struct v2 v, struct v2 dir)
{
    f32 wedge = v2_wedge(v, dir);
    return v2_perp_mul(v, (wedge >= 0) - (wedge < 0));
}

/* Returns 1 if winding between vectors a & b is clockwise or straight, -1 if counter-clockwise */
INLINE i32 v2_winding(struct v2 a, struct v2 b)
{
    f32 w = v2_wedge(a, b);
    return (w >= 0) - (w < 0);
}

INLINE struct v2 v2_with_len(struct v2 a, f32 len)
{
    f32 l_sq = a.x * a.x + a.y * a.y;
    if (l_sq != 0) {
        f32 denom = len / math_sqrt(l_sq);
        a.x *= denom;
        a.y *= denom;
    }
    return a;
}

INLINE struct v2 v2_clamp_len(struct v2 a, f32 max)
{
    f32 l_sq = a.x * a.x + a.y * a.y;
    if (l_sq > max * max) {
        f32 denom = max / math_sqrt(l_sq);
        a.x *= denom;
        a.y *= denom;
    }
    return a;
}

INLINE struct v2 v2_norm(struct v2 a)
{
    return v2_with_len(a, 1.f);
}

INLINE struct v2 v2_round(struct v2 a)
{
    return V2(math_round(a.x), math_round(a.y));
}

INLINE struct v2i32 v2_round_to_int(struct v2 a)
{
    return V2I32(math_round_to_int(a.x), math_round_to_int(a.y));
}

INLINE struct v2 v2_floor(struct v2 a)
{
    return V2(math_floor(a.x), math_floor(a.y));
}

INLINE struct v2 v2_ceil(struct v2 a)
{
    return V2(math_ceil(a.x), math_ceil(a.y));
}

INLINE f32 v2_distance(struct v2 a, struct v2 b)
{
    f32 dx = b.x - a.x;
    f32 dy = b.y - a.y;
    return math_sqrt(dx * dx + dy * dy);
}

INLINE b32 v2_eq(struct v2 a, struct v2 b)
{
    return a.x == b.x && a.y == b.y;
}

INLINE b32 v2_is_zero(struct v2 a)
{
    return a.x == 0 && a.y == 0;
}

INLINE struct v2 v2_rotated(struct v2 v, f32 a)
{
    f32 c = math_cos(a);
    f32 s = math_sin(a);
    return V2(v.x * c - v.y * s, v.x * s + v.y * c);
}

INLINE struct v2 v2_from_angle(f32 a)
{
    return V2(math_cos(a), math_sin(a));
}

INLINE f32 v2_angle(struct v2 v)
{
    return math_atan2(v.y, v.x);
}

INLINE f32 v2_angle_from_dirs(struct v2 dir1, struct v2 dir2)
{
    return math_atan2(v2_wedge(dir1, dir2), v2_dot(dir1, dir2));
}

INLINE f32 v2_angle_from_points(struct v2 pt1, struct v2 pt2)
{
    return v2_angle(v2_sub(pt2, pt1));
}

INLINE struct v2 v2_closest_point_ray(struct v2 ray_pos, struct v2 ray_dir_norm, struct v2 p)
{
    struct v2 ray_p_dir = v2_sub(p, ray_pos);
    f32 dot = v2_dot(ray_dir_norm, ray_p_dir);
    struct v2 ray_dir_closest = v2_mul(ray_dir_norm, dot);
    return v2_add(ray_pos, ray_dir_closest);
}

/* Interpolate position vectors */
INLINE struct v2 v2_lerp(struct v2 val0, struct v2 val1, f32 t)
{
    return V2(math_lerp_f32(val0.x, val1.x, t), math_lerp_f32(val0.y, val1.y, t));
}

/* Interpolate direction vectors (spherical lerp) */
INLINE struct v2 v2_slerp(struct v2 val0, struct v2 val1, f32 t)
{
    f32 rot = math_lerp_angle(v2_angle(val0), v2_angle(val1), t);
    f32 len = math_lerp_f32(v2_len(val0), v2_len(val1), t);
    return v2_mul(v2_from_angle(rot), len);
}

/* ========================== *
 * V2I32
 * ========================== */

INLINE b32 v2i32_eq(struct v2i32 a, struct v2i32 b)
{
    return a.x == b.x && a.y == b.y;
}

/* ========================== *
 * Mat4x4
 * ========================== */

INLINE struct mat4x4 mat4x4_from_xform(struct xform xf)
{
    return CPPCOMPAT_INITLIST_TYPE(struct mat4x4) {
        .e = {
            {xf.bx.x, xf.bx.y, 0, 0},
            {xf.by.x, xf.by.y, 0, 0},
            {0,       0,       1, 0},
            {xf.og.x, xf.og.y, 0, 1},
        }
    };
}

INLINE struct mat4x4 mat4x4_from_ortho(f32 left, f32 right, f32 bottom, f32 top, f32 near_z, f32 far_z)
{
    struct mat4x4 m = ZI;

    f32 rl =  1.0f / (right - left);
    f32 tb =  1.0f / (top - bottom);
    f32 fn = -1.0f / (far_z - near_z);

    m.e[0][0] = 2.0f * rl;
    m.e[1][1] = 2.0f * tb;
    m.e[2][2] = 2.0f * fn;
    m.e[3][0] = -(right + left) * rl;
    m.e[3][1] = -(top + bottom) * tb;
    m.e[3][2] = (far_z + near_z) * fn;
    m.e[3][3] = 1.0f;

    return m;
}

INLINE struct mat4x4 mat4x4_mul(struct mat4x4 m1, struct mat4x4 m2)
{
    f32 a00 = m1.e[0][0], a01 = m1.e[0][1], a02 = m1.e[0][2], a03 = m1.e[0][3],
        a10 = m1.e[1][0], a11 = m1.e[1][1], a12 = m1.e[1][2], a13 = m1.e[1][3],
        a20 = m1.e[2][0], a21 = m1.e[2][1], a22 = m1.e[2][2], a23 = m1.e[2][3],
        a30 = m1.e[3][0], a31 = m1.e[3][1], a32 = m1.e[3][2], a33 = m1.e[3][3],

        b00 = m2.e[0][0], b01 = m2.e[0][1], b02 = m2.e[0][2], b03 = m2.e[0][3],
        b10 = m2.e[1][0], b11 = m2.e[1][1], b12 = m2.e[1][2], b13 = m2.e[1][3],
        b20 = m2.e[2][0], b21 = m2.e[2][1], b22 = m2.e[2][2], b23 = m2.e[2][3],
        b30 = m2.e[3][0], b31 = m2.e[3][1], b32 = m2.e[3][2], b33 = m2.e[3][3];

    struct mat4x4 res;
    res.e[0][0] = a00 * b00 + a10 * b01 + a20 * b02 + a30 * b03;
    res.e[0][1] = a01 * b00 + a11 * b01 + a21 * b02 + a31 * b03;
    res.e[0][2] = a02 * b00 + a12 * b01 + a22 * b02 + a32 * b03;
    res.e[0][3] = a03 * b00 + a13 * b01 + a23 * b02 + a33 * b03;
    res.e[1][0] = a00 * b10 + a10 * b11 + a20 * b12 + a30 * b13;
    res.e[1][1] = a01 * b10 + a11 * b11 + a21 * b12 + a31 * b13;
    res.e[1][2] = a02 * b10 + a12 * b11 + a22 * b12 + a32 * b13;
    res.e[1][3] = a03 * b10 + a13 * b11 + a23 * b12 + a33 * b13;
    res.e[2][0] = a00 * b20 + a10 * b21 + a20 * b22 + a30 * b23;
    res.e[2][1] = a01 * b20 + a11 * b21 + a21 * b22 + a31 * b23;
    res.e[2][2] = a02 * b20 + a12 * b21 + a22 * b22 + a32 * b23;
    res.e[2][3] = a03 * b20 + a13 * b21 + a23 * b22 + a33 * b23;
    res.e[3][0] = a00 * b30 + a10 * b31 + a20 * b32 + a30 * b33;
    res.e[3][1] = a01 * b30 + a11 * b31 + a21 * b32 + a31 * b33;
    res.e[3][2] = a02 * b30 + a12 * b31 + a22 * b32 + a32 * b33;
    res.e[3][3] = a03 * b30 + a13 * b31 + a23 * b32 + a33 * b33;

    return res;
}

/* ========================== *
 * Xform
 * ========================== */

/* Construct identity xform */
#define XFORM_IDENT CPPCOMPAT_INITLIST_TYPE(struct xform) { .bx = V2(1, 0), .by = V2(0, 1) }
#define XFORM_IDENT_NOCAST { .bx = V2(1, 0), .by = V2(0, 1) }

#define XFORM_POS(p) CPPCOMPAT_INITLIST_TYPE(struct xform) { .bx = V2(1, 0), .by = V2(0, 1), .og = (p) }

/* Takes a translation, rotation, and scale as optional parameters for constructing an xform */
#define XFORM_TRS(...) xform_from_trs((struct trs) { .t = V2(0,0), .s = V2(1, 1), .r = 0, __VA_ARGS__ })

INLINE struct xform xform_mul(struct xform a, struct xform b);
INLINE struct xform xform_rotated(struct xform xf, f32 angle);
INLINE struct xform xform_scaled(struct xform xf, struct v2 v);
INLINE struct v2 xform_basis_mul_v2(struct xform xf, struct v2 v);
INLINE struct v2 xform_mul_v2(struct xform xf, struct v2 v);
INLINE f32 xform_get_determinant(struct xform xf);
INLINE struct v2 xform_get_scale(struct xform xf);
INLINE f32 xform_get_rotation(struct xform xf);

INLINE b32 xform_eq(struct xform xf1, struct xform xf2)
{
    return v2_eq(xf1.og, xf2.og) && v2_eq(xf1.bx, xf2.bx) && v2_eq(xf1.by, xf2.by);
}

INLINE struct xform xform_from_pos(struct v2 v)
{
    struct xform xf;
    xf.bx = V2(1, 0);
    xf.by = V2(0, 1);
    xf.og = v;
    return xf;
}

INLINE struct xform xform_from_rotation(f32 r)
{
    struct xform res;
    f32 c = math_cos(r);
    f32 s = math_sin(r);
    res.bx = V2(c, s);
    res.by = V2(-s, c);
    res.og = V2(0, 0);
    return res;
}

INLINE struct xform xform_from_scale(struct v2 scale)
{
    struct xform res;
    res.bx = V2(scale.x, 0);
    res.by = V2(0, scale.y);
    res.og = V2(0, 0);
    return res;
}

INLINE struct xform xform_from_trs(struct trs trs)
{
    struct xform xf = XFORM_POS(trs.t);
    xf = xform_rotated(xf, trs.r);
    xf = xform_scaled(xf, trs.s);
    return xf;
}

INLINE struct xform xform_translated(struct xform xf, struct v2 v)
{
    xf.og = V2(xf.bx.x * v.x + xf.by.x * v.y + xf.og.x, xf.bx.y * v.x + xf.by.y * v.y + xf.og.y);
    return xf;
}

INLINE struct xform xform_translated_world(struct xform xf, struct v2 v)
{
    xf.og = v2_add(xf.og, v);
    return xf;
}

INLINE struct xform xform_rotated(struct xform xf, f32 r)
{
    return xform_mul(xf, xform_from_rotation(r));
}

INLINE struct xform xform_rotated_world(struct xform xf, f32 r)
{
    return xform_mul(xform_from_rotation(r), xf);
}

INLINE struct xform xform_basis_rotated_world(struct xform xf, f32 r)
{
    f32 diff = r;
    f32 c = math_cos(diff);
    f32 s = math_sin(diff);
    xf.bx = V2(xf.bx.x * c - xf.bx.y * s, xf.bx.x * s + xf.bx.y * c);
    xf.by = V2(xf.by.x * c - xf.by.y * s, xf.by.x * s + xf.by.y * c);
    return xf;
}

INLINE struct xform xform_basis_with_rotation_world(struct xform xf, f32 r)
{
    return xform_basis_rotated_world(xf, r - xform_get_rotation(xf));
}

INLINE struct xform xform_scaled(struct xform xf, struct v2 scale)
{
    xf.bx = v2_mul(xf.bx, scale.x);
    xf.by = v2_mul(xf.by, scale.y);
    return xf;
}

INLINE struct xform xform_scaled_world(struct xform xf, struct v2 scale)
{
    struct xform res;
    res.bx = v2_mul_v2(xf.bx, scale);
    res.by = v2_mul_v2(xf.by, scale);
    res.og = v2_mul_v2(xf.og, scale);
    return res;
}

INLINE struct xform xform_lerp(struct xform a, struct xform b, f32 t)
{
    struct xform res;
    res.bx = v2_slerp(a.bx, b.bx, t);
    res.by = v2_slerp(a.by, b.by, t);
    res.og = v2_lerp(a.og, b.og, t);
    return res;
}

INLINE struct xform xform_invert(struct xform xf)
{
    f32 det = xform_get_determinant(xf);
    f32 inv_det = 1.0f / det;

    f32 old_bx_x = xf.bx.x;
    xf.bx.x = xf.by.y;
    xf.by.y = old_bx_x;

    xf.bx = v2_mul_v2(xf.bx, V2(inv_det, -inv_det));
    xf.by = v2_mul_v2(xf.by, V2(-inv_det, inv_det));

    xf.og = xform_basis_mul_v2(xf, v2_neg(xf.og));

    return xf;
}

INLINE struct xform xform_mul(struct xform a, struct xform b)
{
    struct xform res;
    res.bx.x = a.bx.x * b.bx.x + a.by.x * b.bx.y;
    res.bx.y = a.bx.y * b.bx.x + a.by.y * b.bx.y;
    res.by.x = a.bx.x * b.by.x + a.by.x * b.by.y;
    res.by.y = a.bx.y * b.by.x + a.by.y * b.by.y;
    res.og = xform_mul_v2(a, b.og);
    return res;
}

INLINE struct v2 xform_basis_mul_v2(struct xform xf, struct v2 v)
{
    return V2(
        xf.bx.x * v.x + xf.by.x * v.y,
        xf.bx.y * v.x + xf.by.y * v.y
    );
}

INLINE struct v2 xform_mul_v2(struct xform xf, struct v2 v)
{
    struct v2 res = xform_basis_mul_v2(xf, v);
    res = v2_add(res, xf.og);
    return res;
}

INLINE struct v2 xform_basis_invert_mul_v2(struct xform xf, struct v2 v)
{
    struct xform inv = xform_invert(xf);
    struct v2 res = xform_basis_mul_v2(inv, v);
    return res;
}

/* TODO: Get rid of this? Just force caller to use invert manually since it's expensive. */
INLINE struct v2 xform_invert_mul_v2(struct xform xf, struct v2 v)
{
    struct xform inv = xform_invert(xf);
    return xform_mul_v2(inv, v);
}

INLINE struct quad xform_mul_quad(struct xform xf, struct quad quad)
{
    struct quad res;
    res.p0 = xform_mul_v2(xf, quad.p0);
    res.p1 = xform_mul_v2(xf, quad.p1);
    res.p2 = xform_mul_v2(xf, quad.p2);
    res.p3 = xform_mul_v2(xf, quad.p3);
    return res;
}

INLINE f32 xform_get_determinant(struct xform xf)
{
    return v2_wedge(xf.bx, xf.by);
}

INLINE struct v2 xform_get_right(struct xform xf)
{
    return xf.bx;
}

INLINE struct v2 xform_get_left(struct xform xf)
{
    return v2_neg(xf.bx);
}

INLINE struct v2 xform_get_up(struct xform xf)
{
    return v2_neg(xf.by);
}

INLINE struct v2 xform_get_down(struct xform xf)
{
    return xf.by;
}

INLINE f32 xform_get_rotation(struct xform xf)
{
    return v2_angle(xf.bx);
}

INLINE struct v2 xform_get_scale(struct xform xf)
{
    f32 det_sign = math_fsign(xform_get_determinant(xf));
    return V2(v2_len(xf.bx), det_sign * v2_len(xf.by));
}

/* ========================== *
 * Quad
 * ========================== */

INLINE struct quad quad_from_rect(struct rect rect)
{
    struct quad res;
    res.p0 = V2(rect.x, rect.y);                             /* Top left */
    res.p1 = V2(rect.x + rect.width, rect.y);                /* Top right */
    res.p2 = V2(rect.x + rect.width, rect.y + rect.height);  /* Bottom right */
    res.p3 = V2(rect.x, rect.y + rect.height);               /* Bottom left */
    return res;
}

INLINE struct quad quad_from_aabb(struct aabb aabb)
{
    struct quad res;
    res.p0 = V2(aabb.p0.x, aabb.p0.y);  /* Top left */
    res.p0 = V2(aabb.p1.x, aabb.p0.y);  /* Top right */
    res.p0 = V2(aabb.p1.x, aabb.p1.y);  /* Bottom right */
    res.p0 = V2(aabb.p0.x, aabb.p1.y);  /* Bottom left */
    return res;
}

INLINE struct quad quad_from_line(struct v2 start, struct v2 end, f32 thickness)
{
    f32 width = thickness / 2.f;

    struct v2 dir = v2_norm(v2_sub(end, start));
    struct v2 dir_perp = v2_perp(dir);

    struct v2 left = v2_mul(dir_perp, -width);
    struct v2 right = v2_mul(dir_perp, width);

    struct quad res;
    res.p0 = v2_add(start, left);
    res.p1 = v2_add(start, right);
    res.p2 = v2_add(end, right);
    res.p3 = v2_add(end, left);
    return res;
}

INLINE struct quad quad_from_ray(struct v2 pos, struct v2 rel, f32 thickness)
{
    struct v2 end = v2_add(pos, rel);
    return quad_from_line(pos, end, thickness);
}

INLINE struct quad quad_scale(struct quad q, f32 s)
{
    q.p0 = v2_mul(q.p0, s);
    q.p1 = v2_mul(q.p1, s);
    q.p2 = v2_mul(q.p2, s);
    q.p3 = v2_mul(q.p3, s);
    return q;
}

INLINE struct quad quad_round(struct quad quad)
{
    struct quad res;
    res.p0 = v2_round(quad.p0);
    res.p0 = v2_round(quad.p1);
    res.p0 = v2_round(quad.p2);
    res.p0 = v2_round(quad.p3);
    return res;
}

INLINE struct quad quad_floor(struct quad quad)
{
    struct quad res;
    res.p0 = v2_floor(quad.p0);
    res.p0 = v2_floor(quad.p1);
    res.p0 = v2_floor(quad.p2);
    res.p0 = v2_floor(quad.p3);
    return res;
}

/* ========================== *
 * Collider
 * ========================== */

INLINE struct collider_shape collider_from_quad(struct quad quad)
{
    struct collider_shape res;
    res.points[0] = quad.p0;
    res.points[1] = quad.p1;
    res.points[2] = quad.p2;
    res.points[3] = quad.p3;
    res.count = 4;
    res.radius = 0;
    return res;
}

/* ========================== *
 * Convex polygon
 * ========================== */

INLINE struct v2 math_poly_center(struct v2_array a)
{
    struct v2 sum = V2(0, 0);
    for (u64 i = 0; i < a.count; ++i) {
        sum = v2_add(sum, a.points[i]);
    }
    return v2_div(sum, a.count);
}

/* ========================== *
 * Other
 * ========================== */

/* https://box2d.org/files/ErinCatto_SoftConstraints_GDC2011.pdf */
struct math_spring_result { f32 bias_rate; f32 mass_scale; f32 impulse_scale; };
INLINE struct math_spring_result math_spring(f32 hertz, f32 damping_ratio, f32 dt)
{
    struct math_spring_result res;
    if (hertz == 0.0f) {
        res.bias_rate = 0;
        res.mass_scale = 1;
        res.impulse_scale = 0;
    } else {
        f32 angular_frequency = TAU * hertz;
        f32 a = 2 * damping_ratio + angular_frequency * dt;
        f32 b = angular_frequency * a * dt;
        f32 c = 1 / (b + 1);
        res.bias_rate = angular_frequency / a;
        res.mass_scale = b * c;
        res.impulse_scale = c;
    }
    return res;
}

#endif