diff --git a/desmume/src/types.h b/desmume/src/types.h index bbfa15dde..f51d04dcf 100644 --- a/desmume/src/types.h +++ b/desmume/src/types.h @@ -467,13 +467,13 @@ template inline void reconstruct(T* t) { //-------------fixed point speedup macros -#ifdef _WIN32 +#ifdef _MSC_VER #include #endif FORCEINLINE s64 fx32_mul(const s32 a, const s32 b) { -#ifdef _WIN32 +#ifdef _MSC_VER return __emul(a,b); #else return ((s64)a)*((s64)b); @@ -482,16 +482,26 @@ FORCEINLINE s64 fx32_mul(const s32 a, const s32 b) FORCEINLINE s32 fx32_shiftdown(const s64 a) { -#ifdef _WIN32 - return (s32)__ll_rshift(a,12); + s64 shifted; +#ifdef _MSC_VER + shifted = __ll_rshift(a,12); #else - return (s32)(a>>12); + shifted = (a>>12); #endif + //either matrix math is happening at higher precision (an extra bit would suffice, I think), or the sums sent to this are saturated. + //tested by: spectrobes beyond the portals excavation blower + //(it sets very large +x,+y in the modelview matrix to push things offscreen, but the +y will overflow and become negative if we're not careful) + //I didnt think very hard about what would be fastest here on 32bit systems + //NOTE: this was intended for use in MatrixMultVec4x4_M2; it may not be appropriate for other uses of fx32_shiftdown. + //if this causes problems we should refactor the math routines a bit to take care of saturating in another function + if(shifted>(s32)0x7FFFFFFF) return 0x7FFFFFFF; + else if(shifted<=(s32)0x80000000) return 0x80000000; + else return shifted; } FORCEINLINE s64 fx32_shiftup(const s32 a) { -#ifdef _WIN32 +#ifdef _MSC_VER return __ll_lshift(a,12); #else return ((s64)a)<<12;