Use libretro-common's fxp.h header
This commit is contained in:
parent
d1dfb067bf
commit
f54d68405f
|
@ -621,11 +621,11 @@ inline float vec3dot(float* a, float* b) {
|
|||
|
||||
FORCEINLINE s32 mul_fixed32(s32 a, s32 b)
|
||||
{
|
||||
return fx32_shiftdown(fx32_mul(a,b));
|
||||
return sfx32_shiftdown(fx32_mul(a,b));
|
||||
}
|
||||
|
||||
FORCEINLINE s32 vec3dot_fixed32(const s32* a, const s32* b) {
|
||||
return fx32_shiftdown(fx32_mul(a[0],b[0]) + fx32_mul(a[1],b[1]) + fx32_mul(a[2],b[2]));
|
||||
return sfx32_shiftdown(fx32_mul(a[0],b[0]) + fx32_mul(a[1],b[1]) + fx32_mul(a[2],b[2]));
|
||||
}
|
||||
|
||||
#define SUBMITVERTEX(ii, nn) polylist->list[polylist->count].vertIndexes[ii] = tempVertInfo.map[nn];
|
||||
|
|
|
@ -44,10 +44,10 @@ void MatrixMultVec4x4 (const s32 *matrix, s32 *vecPtr)
|
|||
const s32 z = vecPtr[2];
|
||||
const s32 w = vecPtr[3];
|
||||
|
||||
vecPtr[0] = fx32_shiftdown(fx32_mul(x,matrix[0]) + fx32_mul(y,matrix[4]) + fx32_mul(z,matrix [8]) + fx32_mul(w,matrix[12]));
|
||||
vecPtr[1] = fx32_shiftdown(fx32_mul(x,matrix[1]) + fx32_mul(y,matrix[5]) + fx32_mul(z,matrix[ 9]) + fx32_mul(w,matrix[13]));
|
||||
vecPtr[2] = fx32_shiftdown(fx32_mul(x,matrix[2]) + fx32_mul(y,matrix[6]) + fx32_mul(z,matrix[10]) + fx32_mul(w,matrix[14]));
|
||||
vecPtr[3] = fx32_shiftdown(fx32_mul(x,matrix[3]) + fx32_mul(y,matrix[7]) + fx32_mul(z,matrix[11]) + fx32_mul(w,matrix[15]));
|
||||
vecPtr[0] = sfx32_shiftdown(fx32_mul(x,matrix[0]) + fx32_mul(y,matrix[4]) + fx32_mul(z,matrix [8]) + fx32_mul(w,matrix[12]));
|
||||
vecPtr[1] = sfx32_shiftdown(fx32_mul(x,matrix[1]) + fx32_mul(y,matrix[5]) + fx32_mul(z,matrix[ 9]) + fx32_mul(w,matrix[13]));
|
||||
vecPtr[2] = sfx32_shiftdown(fx32_mul(x,matrix[2]) + fx32_mul(y,matrix[6]) + fx32_mul(z,matrix[10]) + fx32_mul(w,matrix[14]));
|
||||
vecPtr[3] = sfx32_shiftdown(fx32_mul(x,matrix[3]) + fx32_mul(y,matrix[7]) + fx32_mul(z,matrix[11]) + fx32_mul(w,matrix[15]));
|
||||
}
|
||||
|
||||
void MatrixMultVec3x3_fixed(const s32 *matrix, s32 *vecPtr)
|
||||
|
@ -56,9 +56,9 @@ void MatrixMultVec3x3_fixed(const s32 *matrix, s32 *vecPtr)
|
|||
const s32 y = vecPtr[1];
|
||||
const s32 z = vecPtr[2];
|
||||
|
||||
vecPtr[0] = fx32_shiftdown(fx32_mul(x,matrix[0]) + fx32_mul(y,matrix[4]) + fx32_mul(z,matrix[8]));
|
||||
vecPtr[1] = fx32_shiftdown(fx32_mul(x,matrix[1]) + fx32_mul(y,matrix[5]) + fx32_mul(z,matrix[9]));
|
||||
vecPtr[2] = fx32_shiftdown(fx32_mul(x,matrix[2]) + fx32_mul(y,matrix[6]) + fx32_mul(z,matrix[10]));
|
||||
vecPtr[0] = sfx32_shiftdown(fx32_mul(x,matrix[0]) + fx32_mul(y,matrix[4]) + fx32_mul(z,matrix[8]));
|
||||
vecPtr[1] = sfx32_shiftdown(fx32_mul(x,matrix[1]) + fx32_mul(y,matrix[5]) + fx32_mul(z,matrix[9]));
|
||||
vecPtr[2] = sfx32_shiftdown(fx32_mul(x,matrix[2]) + fx32_mul(y,matrix[6]) + fx32_mul(z,matrix[10]));
|
||||
}
|
||||
|
||||
//-------------------------
|
||||
|
@ -384,25 +384,25 @@ void MatrixMultiply (s32 *matrix, const s32 *rightMatrix)
|
|||
{
|
||||
s32 tmpMatrix[16];
|
||||
|
||||
tmpMatrix[0] = fx32_shiftdown(fx32_mul(matrix[0],rightMatrix[0])+fx32_mul(matrix[4],rightMatrix[1])+fx32_mul(matrix[8],rightMatrix[2])+fx32_mul(matrix[12],rightMatrix[3]));
|
||||
tmpMatrix[1] = fx32_shiftdown(fx32_mul(matrix[1],rightMatrix[0])+fx32_mul(matrix[5],rightMatrix[1])+fx32_mul(matrix[9],rightMatrix[2])+fx32_mul(matrix[13],rightMatrix[3]));
|
||||
tmpMatrix[2] = fx32_shiftdown(fx32_mul(matrix[2],rightMatrix[0])+fx32_mul(matrix[6],rightMatrix[1])+fx32_mul(matrix[10],rightMatrix[2])+fx32_mul(matrix[14],rightMatrix[3]));
|
||||
tmpMatrix[3] = fx32_shiftdown(fx32_mul(matrix[3],rightMatrix[0])+fx32_mul(matrix[7],rightMatrix[1])+fx32_mul(matrix[11],rightMatrix[2])+fx32_mul(matrix[15],rightMatrix[3]));
|
||||
tmpMatrix[0] = sfx32_shiftdown(fx32_mul(matrix[0],rightMatrix[0])+fx32_mul(matrix[4],rightMatrix[1])+fx32_mul(matrix[8],rightMatrix[2])+fx32_mul(matrix[12],rightMatrix[3]));
|
||||
tmpMatrix[1] = sfx32_shiftdown(fx32_mul(matrix[1],rightMatrix[0])+fx32_mul(matrix[5],rightMatrix[1])+fx32_mul(matrix[9],rightMatrix[2])+fx32_mul(matrix[13],rightMatrix[3]));
|
||||
tmpMatrix[2] = sfx32_shiftdown(fx32_mul(matrix[2],rightMatrix[0])+fx32_mul(matrix[6],rightMatrix[1])+fx32_mul(matrix[10],rightMatrix[2])+fx32_mul(matrix[14],rightMatrix[3]));
|
||||
tmpMatrix[3] = sfx32_shiftdown(fx32_mul(matrix[3],rightMatrix[0])+fx32_mul(matrix[7],rightMatrix[1])+fx32_mul(matrix[11],rightMatrix[2])+fx32_mul(matrix[15],rightMatrix[3]));
|
||||
|
||||
tmpMatrix[4] = fx32_shiftdown(fx32_mul(matrix[0],rightMatrix[4])+fx32_mul(matrix[4],rightMatrix[5])+fx32_mul(matrix[8],rightMatrix[6])+fx32_mul(matrix[12],rightMatrix[7]));
|
||||
tmpMatrix[5] = fx32_shiftdown(fx32_mul(matrix[1],rightMatrix[4])+fx32_mul(matrix[5],rightMatrix[5])+fx32_mul(matrix[9],rightMatrix[6])+fx32_mul(matrix[13],rightMatrix[7]));
|
||||
tmpMatrix[6] = fx32_shiftdown(fx32_mul(matrix[2],rightMatrix[4])+fx32_mul(matrix[6],rightMatrix[5])+fx32_mul(matrix[10],rightMatrix[6])+fx32_mul(matrix[14],rightMatrix[7]));
|
||||
tmpMatrix[7] = fx32_shiftdown(fx32_mul(matrix[3],rightMatrix[4])+fx32_mul(matrix[7],rightMatrix[5])+fx32_mul(matrix[11],rightMatrix[6])+fx32_mul(matrix[15],rightMatrix[7]));
|
||||
tmpMatrix[4] = sfx32_shiftdown(fx32_mul(matrix[0],rightMatrix[4])+fx32_mul(matrix[4],rightMatrix[5])+fx32_mul(matrix[8],rightMatrix[6])+fx32_mul(matrix[12],rightMatrix[7]));
|
||||
tmpMatrix[5] = sfx32_shiftdown(fx32_mul(matrix[1],rightMatrix[4])+fx32_mul(matrix[5],rightMatrix[5])+fx32_mul(matrix[9],rightMatrix[6])+fx32_mul(matrix[13],rightMatrix[7]));
|
||||
tmpMatrix[6] = sfx32_shiftdown(fx32_mul(matrix[2],rightMatrix[4])+fx32_mul(matrix[6],rightMatrix[5])+fx32_mul(matrix[10],rightMatrix[6])+fx32_mul(matrix[14],rightMatrix[7]));
|
||||
tmpMatrix[7] = sfx32_shiftdown(fx32_mul(matrix[3],rightMatrix[4])+fx32_mul(matrix[7],rightMatrix[5])+fx32_mul(matrix[11],rightMatrix[6])+fx32_mul(matrix[15],rightMatrix[7]));
|
||||
|
||||
tmpMatrix[8] = fx32_shiftdown(fx32_mul(matrix[0],rightMatrix[8])+fx32_mul(matrix[4],rightMatrix[9])+fx32_mul(matrix[8],rightMatrix[10])+fx32_mul(matrix[12],rightMatrix[11]));
|
||||
tmpMatrix[9] = fx32_shiftdown(fx32_mul(matrix[1],rightMatrix[8])+fx32_mul(matrix[5],rightMatrix[9])+fx32_mul(matrix[9],rightMatrix[10])+fx32_mul(matrix[13],rightMatrix[11]));
|
||||
tmpMatrix[10] = fx32_shiftdown(fx32_mul(matrix[2],rightMatrix[8])+fx32_mul(matrix[6],rightMatrix[9])+fx32_mul(matrix[10],rightMatrix[10])+fx32_mul(matrix[14],rightMatrix[11]));
|
||||
tmpMatrix[11] = fx32_shiftdown(fx32_mul(matrix[3],rightMatrix[8])+fx32_mul(matrix[7],rightMatrix[9])+fx32_mul(matrix[11],rightMatrix[10])+fx32_mul(matrix[15],rightMatrix[11]));
|
||||
tmpMatrix[8] = sfx32_shiftdown(fx32_mul(matrix[0],rightMatrix[8])+fx32_mul(matrix[4],rightMatrix[9])+fx32_mul(matrix[8],rightMatrix[10])+fx32_mul(matrix[12],rightMatrix[11]));
|
||||
tmpMatrix[9] = sfx32_shiftdown(fx32_mul(matrix[1],rightMatrix[8])+fx32_mul(matrix[5],rightMatrix[9])+fx32_mul(matrix[9],rightMatrix[10])+fx32_mul(matrix[13],rightMatrix[11]));
|
||||
tmpMatrix[10] = sfx32_shiftdown(fx32_mul(matrix[2],rightMatrix[8])+fx32_mul(matrix[6],rightMatrix[9])+fx32_mul(matrix[10],rightMatrix[10])+fx32_mul(matrix[14],rightMatrix[11]));
|
||||
tmpMatrix[11] = sfx32_shiftdown(fx32_mul(matrix[3],rightMatrix[8])+fx32_mul(matrix[7],rightMatrix[9])+fx32_mul(matrix[11],rightMatrix[10])+fx32_mul(matrix[15],rightMatrix[11]));
|
||||
|
||||
tmpMatrix[12] = fx32_shiftdown(fx32_mul(matrix[0],rightMatrix[12])+fx32_mul(matrix[4],rightMatrix[13])+fx32_mul(matrix[8],rightMatrix[14])+fx32_mul(matrix[12],rightMatrix[15]));
|
||||
tmpMatrix[13] = fx32_shiftdown(fx32_mul(matrix[1],rightMatrix[12])+fx32_mul(matrix[5],rightMatrix[13])+fx32_mul(matrix[9],rightMatrix[14])+fx32_mul(matrix[13],rightMatrix[15]));
|
||||
tmpMatrix[14] = fx32_shiftdown(fx32_mul(matrix[2],rightMatrix[12])+fx32_mul(matrix[6],rightMatrix[13])+fx32_mul(matrix[10],rightMatrix[14])+fx32_mul(matrix[14],rightMatrix[15]));
|
||||
tmpMatrix[15] = fx32_shiftdown(fx32_mul(matrix[3],rightMatrix[12])+fx32_mul(matrix[7],rightMatrix[13])+fx32_mul(matrix[11],rightMatrix[14])+fx32_mul(matrix[15],rightMatrix[15]));
|
||||
tmpMatrix[12] = sfx32_shiftdown(fx32_mul(matrix[0],rightMatrix[12])+fx32_mul(matrix[4],rightMatrix[13])+fx32_mul(matrix[8],rightMatrix[14])+fx32_mul(matrix[12],rightMatrix[15]));
|
||||
tmpMatrix[13] = sfx32_shiftdown(fx32_mul(matrix[1],rightMatrix[12])+fx32_mul(matrix[5],rightMatrix[13])+fx32_mul(matrix[9],rightMatrix[14])+fx32_mul(matrix[13],rightMatrix[15]));
|
||||
tmpMatrix[14] = sfx32_shiftdown(fx32_mul(matrix[2],rightMatrix[12])+fx32_mul(matrix[6],rightMatrix[13])+fx32_mul(matrix[10],rightMatrix[14])+fx32_mul(matrix[14],rightMatrix[15]));
|
||||
tmpMatrix[15] = sfx32_shiftdown(fx32_mul(matrix[3],rightMatrix[12])+fx32_mul(matrix[7],rightMatrix[13])+fx32_mul(matrix[11],rightMatrix[14])+fx32_mul(matrix[15],rightMatrix[15]));
|
||||
|
||||
memcpy(matrix,tmpMatrix,sizeof(s32)*16);
|
||||
}
|
||||
|
@ -411,7 +411,7 @@ void MatrixScale(s32 *matrix, const s32 *ptr)
|
|||
{
|
||||
//zero 21-sep-2010 - verified unrolling seems faster on my cpu
|
||||
MACRODO_N(12,
|
||||
matrix[X] = fx32_shiftdown(fx32_mul(matrix[X],ptr[X>>2]))
|
||||
matrix[X] = sfx32_shiftdown(fx32_mul(matrix[X],ptr[X>>2]))
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -423,7 +423,7 @@ void MatrixTranslate(s32 *matrix, const s32 *ptr)
|
|||
temp += fx32_mul(matrix[X+0],ptr[0]);
|
||||
temp += fx32_mul(matrix[X+4],ptr[1]);
|
||||
temp += fx32_mul(matrix[X+8],ptr[2]);
|
||||
matrix[X+12] = fx32_shiftdown(temp);
|
||||
matrix[X+12] = sfx32_shiftdown(temp);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
#include <retro_miscellaneous.h>
|
||||
#include <retro_inline.h>
|
||||
#include <math/fxp.h>
|
||||
|
||||
//analyze microsoft compilers
|
||||
#ifdef _MSC_VER
|
||||
|
@ -458,47 +459,30 @@ template<typename T> inline void reconstruct(T* t) {
|
|||
new(t) T();
|
||||
}
|
||||
|
||||
//-------------fixed point speedup macros
|
||||
/* fixed point speedup macros */
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
FORCEINLINE s64 fx32_mul(const s32 a, const s32 b)
|
||||
FORCEINLINE s32 sfx32_shiftdown(const s64 a)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return __emul(a,b);
|
||||
#else
|
||||
return ((s64)a)*((s64)b);
|
||||
#endif
|
||||
}
|
||||
s64 shifted = fx32_shiftdown(a);
|
||||
|
||||
FORCEINLINE s32 fx32_shiftdown(const s64 a)
|
||||
{
|
||||
s64 shifted;
|
||||
#ifdef _MSC_VER
|
||||
shifted = __ll_rshift(a,12);
|
||||
#else
|
||||
shifted = (a>>12);
|
||||
#endif
|
||||
//either matrix math is happening at higher precision (an extra bit would suffice, I think), or the sums sent to this are saturated.
|
||||
//tested by: spectrobes beyond the portals excavation blower
|
||||
//(it sets very large +x,+y in the modelview matrix to push things offscreen, but the +y will overflow and become negative if we're not careful)
|
||||
//I didnt think very hard about what would be fastest here on 32bit systems
|
||||
//NOTE: this was intended for use in MatrixMultVec4x4_M2; it may not be appropriate for other uses of fx32_shiftdown.
|
||||
//if this causes problems we should refactor the math routines a bit to take care of saturating in another function
|
||||
if(shifted>(s32)0x7FFFFFFF) return 0x7FFFFFFF;
|
||||
else if(shifted<=(s32)0x80000000) return 0x80000000;
|
||||
else return shifted;
|
||||
}
|
||||
|
||||
FORCEINLINE s64 fx32_shiftup(const s32 a)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return __ll_lshift(a,12);
|
||||
#else
|
||||
return ((s64)a)<<12;
|
||||
#endif
|
||||
/*either matrix math is happening at higher precision (an extra bit would suffice,
|
||||
* I think), or the sums sent to this are saturated.
|
||||
*
|
||||
*tested by: spectrobes beyond the portals excavation blower
|
||||
*(it sets very large +x,+y in the modelview matrix to push things offscreen,
|
||||
*but the +y will overflow and become negative if we're not careful)
|
||||
*
|
||||
*I didnt think very hard about what would be fastest here on 32bit systems
|
||||
*NOTE: this was intended for use in MatrixMultVec4x4_M2; it may not be appropriate for
|
||||
* other uses of fx32_shiftdown.
|
||||
*if this causes problems we should refactor the math routines a bit to take care of
|
||||
* saturating in another function
|
||||
*/
|
||||
if(shifted>(s32)0x7FFFFFFF)
|
||||
return 0x7FFFFFFF;
|
||||
if(shifted<=(s32)0x80000000)
|
||||
return 0x80000000;
|
||||
return shifted;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue