diff --git a/include/dolphin/ppc_math.h b/include/dolphin/ppc_math.h
new file mode 100644
index 0000000..db12db5
--- /dev/null
+++ b/include/dolphin/ppc_math.h
@@ -0,0 +1,136 @@
+#ifndef DOLPHIN_PPC_MATH_H
+#define DOLPHIN_PPC_MATH_H
+
+#include <math.h>
+#include <stdint.h>
+
+// frsqrte matching courtesy of Geotale, with reference to https://achurch.org/cpu-tests/ppc750cl.s
+
+struct BaseAndDec32 {
+    uint32_t base;
+    int32_t dec;
+};
+
+struct BaseAndDec64 {
+    uint64_t base;
+    int64_t dec;
+};
+
+union c32 {
+    uint32_t u;
+    float f;
+};
+
+union c64 {
+    uint64_t u;
+    double f;
+};
+
+#define EXPONENT_SHIFT_F64 ((uint64_t)52)
+#define MANTISSA_MASK_F64  ((uint64_t)0x000fffffffffffffULL)
+#define EXPONENT_MASK_F64  ((uint64_t)0x7ff0000000000000ULL)
+#define SIGN_MASK_F64      ((uint64_t)0x8000000000000000ULL)
+
+static const struct BaseAndDec64 RSQRTE_TABLE[32] = {
+    {0x69fa000000000ULL, -0x15a0000000LL},
+    {0x5f2e000000000ULL, -0x13cc000000LL},
+    {0x554a000000000ULL, -0x1234000000LL},
+    {0x4c30000000000ULL, -0x10d4000000LL},
+    {0x43c8000000000ULL, -0x0f9c000000LL},
+    {0x3bfc000000000ULL, -0x0e88000000LL},
+    {0x34b8000000000ULL, -0x0d94000000LL},
+    {0x2df0000000000ULL, -0x0cb8000000LL},
+    {0x2794000000000ULL, -0x0bf0000000LL},
+    {0x219c000000000ULL, -0x0b40000000LL},
+    {0x1bfc000000000ULL, -0x0aa0000000LL},
+    {0x16ae000000000ULL, -0x0a0c000000LL},
+    {0x11a8000000000ULL, -0x0984000000LL},
+    {0x0ce6000000000ULL, -0x090c000000LL},
+    {0x0862000000000ULL, -0x0898000000LL},
+    {0x0416000000000ULL, -0x082c000000LL},
+    {0xffe8000000000ULL, -0x1e90000000LL},
+    {0xf0a4000000000ULL, -0x1c00000000LL},
+    {0xe2a8000000000ULL, -0x19c0000000LL},
+    {0xd5c8000000000ULL, -0x17c8000000LL},
+    {0xc9e4000000000ULL, -0x1610000000LL},
+    {0xbedc000000000ULL, -0x1490000000LL},
+    {0xb498000000000ULL, -0x1330000000LL},
+    {0xab00000000000ULL, -0x11f8000000LL},
+    {0xa204000000000ULL, -0x10e8000000LL},
+    {0x9994000000000ULL, -0x0fe8000000LL},
+    {0x91a0000000000ULL, -0x0f08000000LL},
+    {0x8a1c000000000ULL, -0x0e38000000LL},
+    {0x8304000000000ULL, -0x0d78000000LL},
+    {0x7c48000000000ULL, -0x0cc8000000LL},
+    {0x75e4000000000ULL, -0x0c28000000LL},
+    {0x6fd0000000000ULL, -0x0b98000000LL},
+};
+
+#ifdef _MSC_VER
+#include <intrin.h>
+static inline uint32_t ppc_clz64(uint64_t x) {
+    unsigned long idx;
+    _BitScanReverse64(&idx, x);
+    return 63u - (uint32_t)idx;
+}
+#else
+static inline uint32_t ppc_clz64(uint64_t x) {
+    return (uint32_t)__builtin_clzll(x);
+}
+#endif
+
+static inline double frsqrte(double val) {
+    union c64 bits;
+    uint64_t mantissa;
+    int64_t exponent;
+    int sign;
+    uint32_t key;
+    uint64_t new_exp;
+    const struct BaseAndDec64 *entry;
+    union c64 result;
+
+    bits.f = val;
+    mantissa = bits.u & MANTISSA_MASK_F64;
+    exponent = (int64_t)(bits.u & EXPONENT_MASK_F64);
+    sign = (bits.u & SIGN_MASK_F64) != 0;
+
+    if (mantissa == 0 && exponent == 0) {
+        return copysign(INFINITY, bits.f);
+    }
+
+    if ((uint64_t)exponent == EXPONENT_MASK_F64) {
+        if (mantissa == 0) {
+            return sign ? NAN : 0.0;
+        }
+        return val;
+    }
+
+    if (sign) {
+        return NAN;
+    }
+
+    if (exponent == 0) {
+        uint32_t shift = ppc_clz64(mantissa) - (uint32_t)(63 - EXPONENT_SHIFT_F64);
+        mantissa <<= shift;
+        mantissa &= MANTISSA_MASK_F64;
+        exponent -= (int64_t)(shift - 1) << EXPONENT_SHIFT_F64;
+    }
+
+    key = (uint32_t)(((uint64_t)exponent | mantissa) >> 37);
+    new_exp = ((uint64_t)((0xbfcLL << EXPONENT_SHIFT_F64) - exponent) >> 1) & EXPONENT_MASK_F64;
+
+    entry = &RSQRTE_TABLE[0x1fu & (key >> 11)];
+    result.u = new_exp | (uint64_t)(entry->base + entry->dec * (int64_t)(key & 0x7ffu));
+    return result.f;
+}
+
+// One Newton-Raphson step
+static inline float ppc_rsqrte(float x) {
+    double rsqrt_d = frsqrte((double)x);
+    float nwork0 = (float)(rsqrt_d * rsqrt_d);
+    float nwork1 = (float)(rsqrt_d * 0.5);
+    nwork0 = fmaf(-nwork0, x, 3.0f);
+    return nwork0 * nwork1;
+}
+
+#endif // DOLPHIN_PPC_MATH_H
diff --git a/lib/dolphin/mtx/vec.c b/lib/dolphin/mtx/vec.c
index 9651ca3..7100d7a 100644
--- a/lib/dolphin/mtx/vec.c
+++ b/lib/dolphin/mtx/vec.c
@@ -2,6 +2,10 @@
 #include <math.h>
 #include <dolphin/mtx.h>
 
+#if EMULATE_PS_MATH
+#include <dolphin/ppc_math.h>
+#endif
+
 void C_VECAdd(const Vec* a, const Vec* b, Vec* ab) {
   assert(a && "VECAdd():  NULL VecPtr 'a' ");
   assert(b && "VECAdd():  NULL VecPtr 'b' ");
@@ -29,31 +33,49 @@ void C_VECScale(const Vec* src, Vec* dst, f32 scale) {
 }
 
 void C_VECNormalize(const Vec* src, Vec* unit) {
-  f32 mag;
+  f32 sqsum;
+  f32 rsqrt;
 
   assert(src && "VECNormalize():  NULL VecPtr 'src' ");
   assert(unit && "VECNormalize():  NULL VecPtr 'unit' ");
 
-  mag = (src->z * src->z) + ((src->x * src->x) + (src->y * src->y));
-  assert(0.0f != mag && "VECNormalize():  zero magnitude vector ");
-
-  mag = 1.0f/ sqrtf(mag);
-  unit->x = src->x * mag;
-  unit->y = src->y * mag;
-  unit->z = src->z * mag;
+#if EMULATE_PS_MATH
+  sqsum = (src->z * src->z + src->x * src->x) + src->y * src->y;
+#else
+  sqsum = (src->z * src->z) + ((src->x * src->x) + (src->y * src->y));
+#endif
+  assert(0.0f != sqsum && "VECNormalize():  zero magnitude vector ");
+
+#if EMULATE_PS_MATH
+  rsqrt = ppc_rsqrte(sqsum);
+#else
+  rsqrt = 1.0f / sqrtf(sqsum);
+#endif
+  unit->x = src->x * rsqrt;
+  unit->y = src->y * rsqrt;
+  unit->z = src->z * rsqrt;
 }
 
 f32 C_VECSquareMag(const Vec* v) {
-  f32 sqmag;
-
   assert(v && "VECMag():  NULL VecPtr 'v' ");
-
-  sqmag = v->z * v->z + ((v->x * v->x) + (v->y * v->y));
-  return sqmag;
+#if EMULATE_PS_MATH
+  return (v->z * v->z + v->x * v->x) + v->y * v->y;
+#else
+  return v->z * v->z + ((v->x * v->x) + (v->y * v->y));
+#endif
 }
 
 f32 C_VECMag(const Vec* v) {
+#if EMULATE_PS_MATH
+  f32 sqmag;
+  sqmag = C_VECSquareMag(v);
+  if (sqmag == 0.0f) {
+    return 0.0f;
+  }
+  return sqmag * ppc_rsqrte(sqmag);
+#else
   return sqrtf(C_VECSquareMag(v));
+#endif
 }
 
 f32 C_VECDotProduct(const Vec* a, const Vec* b) {
@@ -131,14 +153,25 @@ void C_VECReflect(const Vec* src, const Vec* normal, Vec* dst) {
 }
 
 f32 C_VECSquareDistance(const Vec* a, const Vec* b) {
-  Vec diff;
-
-  diff.x = a->x - b->x;
-  diff.y = a->y - b->y;
-  diff.z = a->z - b->z;
-  return (diff.z * diff.z) + ((diff.x * diff.x) + (diff.y * diff.y));
+  f32 dx = a->x - b->x;
+  f32 dy = a->y - b->y;
+  f32 dz = a->z - b->z;
+#if EMULATE_PS_MATH
+  return (dx * dx + dy * dy) + dz * dz;
+#else
+  return (dz * dz) + ((dx * dx) + (dy * dy));
+#endif
 }
 
 f32 C_VECDistance(const Vec* a, const Vec* b) {
+#if EMULATE_PS_MATH
+  f32 sqdist;
+  sqdist = C_VECSquareDistance(a, b);
+  if (sqdist == 0.0f) {
+    return 0.0f;
+  }
+  return sqdist * ppc_rsqrte(sqdist);
+#else
   return sqrtf(C_VECSquareDistance(a, b));
+#endif
 }