Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 136 additions & 0 deletions include/dolphin/ppc_math.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#ifndef DOLPHIN_PPC_MATH_H
#define DOLPHIN_PPC_MATH_H

#include <math.h>
#include <stdint.h>

// frsqrte matching courtesy of Geotale, with reference to https://achurch.org/cpu-tests/ppc750cl.s

struct BaseAndDec32 {
uint32_t base;
int32_t dec;
};

struct BaseAndDec64 {
uint64_t base;
int64_t dec;
};

union c32 {
uint32_t u;
float f;
};

union c64 {
uint64_t u;
double f;
};

#define EXPONENT_SHIFT_F64 ((uint64_t)52)
#define MANTISSA_MASK_F64 ((uint64_t)0x000fffffffffffffULL)
#define EXPONENT_MASK_F64 ((uint64_t)0x7ff0000000000000ULL)
#define SIGN_MASK_F64 ((uint64_t)0x8000000000000000ULL)

static const struct BaseAndDec64 RSQRTE_TABLE[32] = {
{0x69fa000000000ULL, -0x15a0000000LL},
{0x5f2e000000000ULL, -0x13cc000000LL},
{0x554a000000000ULL, -0x1234000000LL},
{0x4c30000000000ULL, -0x10d4000000LL},
{0x43c8000000000ULL, -0x0f9c000000LL},
{0x3bfc000000000ULL, -0x0e88000000LL},
{0x34b8000000000ULL, -0x0d94000000LL},
{0x2df0000000000ULL, -0x0cb8000000LL},
{0x2794000000000ULL, -0x0bf0000000LL},
{0x219c000000000ULL, -0x0b40000000LL},
{0x1bfc000000000ULL, -0x0aa0000000LL},
{0x16ae000000000ULL, -0x0a0c000000LL},
{0x11a8000000000ULL, -0x0984000000LL},
{0x0ce6000000000ULL, -0x090c000000LL},
{0x0862000000000ULL, -0x0898000000LL},
{0x0416000000000ULL, -0x082c000000LL},
{0xffe8000000000ULL, -0x1e90000000LL},
{0xf0a4000000000ULL, -0x1c00000000LL},
{0xe2a8000000000ULL, -0x19c0000000LL},
{0xd5c8000000000ULL, -0x17c8000000LL},
{0xc9e4000000000ULL, -0x1610000000LL},
{0xbedc000000000ULL, -0x1490000000LL},
{0xb498000000000ULL, -0x1330000000LL},
{0xab00000000000ULL, -0x11f8000000LL},
{0xa204000000000ULL, -0x10e8000000LL},
{0x9994000000000ULL, -0x0fe8000000LL},
{0x91a0000000000ULL, -0x0f08000000LL},
{0x8a1c000000000ULL, -0x0e38000000LL},
{0x8304000000000ULL, -0x0d78000000LL},
{0x7c48000000000ULL, -0x0cc8000000LL},
{0x75e4000000000ULL, -0x0c28000000LL},
{0x6fd0000000000ULL, -0x0b98000000LL},
};

#ifdef _MSC_VER
#include <intrin.h>
static inline uint32_t ppc_clz64(uint64_t x) {
unsigned long idx;
_BitScanReverse64(&idx, x);
return 63u - (uint32_t)idx;
}
#else
static inline uint32_t ppc_clz64(uint64_t x) {
return (uint32_t)__builtin_clzll(x);
}
#endif

static inline double frsqrte(double val) {
union c64 bits;
uint64_t mantissa;
int64_t exponent;
int sign;
uint32_t key;
uint64_t new_exp;
const struct BaseAndDec64 *entry;
union c64 result;

bits.f = val;
mantissa = bits.u & MANTISSA_MASK_F64;
exponent = (int64_t)(bits.u & EXPONENT_MASK_F64);
sign = (bits.u & SIGN_MASK_F64) != 0;

if (mantissa == 0 && exponent == 0) {
return copysign(INFINITY, bits.f);
}

if ((uint64_t)exponent == EXPONENT_MASK_F64) {
if (mantissa == 0) {
return sign ? NAN : 0.0;
}
return val;
}

if (sign) {
return NAN;
}

if (exponent == 0) {
uint32_t shift = ppc_clz64(mantissa) - (uint32_t)(63 - EXPONENT_SHIFT_F64);
mantissa <<= shift;
mantissa &= MANTISSA_MASK_F64;
exponent -= (int64_t)(shift - 1) << EXPONENT_SHIFT_F64;
}

key = (uint32_t)(((uint64_t)exponent | mantissa) >> 37);
new_exp = ((uint64_t)((0xbfcLL << EXPONENT_SHIFT_F64) - exponent) >> 1) & EXPONENT_MASK_F64;

entry = &RSQRTE_TABLE[0x1fu & (key >> 11)];
result.u = new_exp | (uint64_t)(entry->base + entry->dec * (int64_t)(key & 0x7ffu));
return result.f;
}

// One Newton-Raphson step
static inline float ppc_rsqrte(float x) {
double rsqrt_d = frsqrte((double)x);
float nwork0 = (float)(rsqrt_d * rsqrt_d);
float nwork1 = (float)(rsqrt_d * 0.5);
nwork0 = fmaf(-nwork0, x, 3.0f);
return nwork0 * nwork1;
}

#endif // DOLPHIN_PPC_MATH_H
71 changes: 52 additions & 19 deletions lib/dolphin/mtx/vec.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
#include <math.h>
#include <dolphin/mtx.h>

#if EMULATE_PS_MATH
#include <dolphin/ppc_math.h>
#endif

void C_VECAdd(const Vec* a, const Vec* b, Vec* ab) {
assert(a && "VECAdd(): NULL VecPtr 'a' ");
assert(b && "VECAdd(): NULL VecPtr 'b' ");
Expand Down Expand Up @@ -29,31 +33,49 @@ void C_VECScale(const Vec* src, Vec* dst, f32 scale) {
}

void C_VECNormalize(const Vec* src, Vec* unit) {
f32 mag;
f32 sqsum;
f32 rsqrt;

assert(src && "VECNormalize(): NULL VecPtr 'src' ");
assert(unit && "VECNormalize(): NULL VecPtr 'unit' ");

mag = (src->z * src->z) + ((src->x * src->x) + (src->y * src->y));
assert(0.0f != mag && "VECNormalize(): zero magnitude vector ");

mag = 1.0f/ sqrtf(mag);
unit->x = src->x * mag;
unit->y = src->y * mag;
unit->z = src->z * mag;
#if EMULATE_PS_MATH
sqsum = (src->z * src->z + src->x * src->x) + src->y * src->y;
#else
sqsum = (src->z * src->z) + ((src->x * src->x) + (src->y * src->y));
#endif
assert(0.0f != sqsum && "VECNormalize(): zero magnitude vector ");

#if EMULATE_PS_MATH
rsqrt = ppc_rsqrte(sqsum);
#else
rsqrt = 1.0f / sqrtf(sqsum);
#endif
unit->x = src->x * rsqrt;
unit->y = src->y * rsqrt;
unit->z = src->z * rsqrt;
}

f32 C_VECSquareMag(const Vec* v) {
f32 sqmag;

assert(v && "VECMag(): NULL VecPtr 'v' ");

sqmag = v->z * v->z + ((v->x * v->x) + (v->y * v->y));
return sqmag;
#if EMULATE_PS_MATH
return (v->z * v->z + v->x * v->x) + v->y * v->y;
#else
return v->z * v->z + ((v->x * v->x) + (v->y * v->y));
#endif
}

f32 C_VECMag(const Vec* v) {
#if EMULATE_PS_MATH
f32 sqmag;
sqmag = C_VECSquareMag(v);
if (sqmag == 0.0f) {
return 0.0f;
}
return sqmag * ppc_rsqrte(sqmag);
#else
return sqrtf(C_VECSquareMag(v));
#endif
}

f32 C_VECDotProduct(const Vec* a, const Vec* b) {
Expand Down Expand Up @@ -131,14 +153,25 @@ void C_VECReflect(const Vec* src, const Vec* normal, Vec* dst) {
}

f32 C_VECSquareDistance(const Vec* a, const Vec* b) {
Vec diff;

diff.x = a->x - b->x;
diff.y = a->y - b->y;
diff.z = a->z - b->z;
return (diff.z * diff.z) + ((diff.x * diff.x) + (diff.y * diff.y));
f32 dx = a->x - b->x;
f32 dy = a->y - b->y;
f32 dz = a->z - b->z;
#if EMULATE_PS_MATH
return (dx * dx + dy * dy) + dz * dz;
#else
return (dz * dz) + ((dx * dx) + (dy * dy));
#endif
}

f32 C_VECDistance(const Vec* a, const Vec* b) {
#if EMULATE_PS_MATH
f32 sqdist;
sqdist = C_VECSquareDistance(a, b);
if (sqdist == 0.0f) {
return 0.0f;
}
return sqdist * ppc_rsqrte(sqdist);
#else
return sqrtf(C_VECSquareDistance(a, b));
#endif
}
Loading