Changed MulShr32 to a naive implementation, the generated code is fine already.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3076 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
sudonim1 2010-05-25 08:24:04 +00:00
parent 774f56297a
commit 3e81a87c90
1 changed files with 1 additions and 26 deletions

View File

@ -45,36 +45,11 @@ static const s32 tbl_XA_Factor[5][2] =
// caller to extend the inputs so that they make use of all 32 bits of
// precision.
//
#ifdef _MSC_VER
// gcc can't inline this function, presumably because of it's exceeding complexity?
__forceinline s32 MulShr32( s32 srcval, s32 mulval )
{
s64 tmp = ((s64)srcval * mulval );
// Performance note: Using the temp var and memory reference
// actually ends up being roughly 2x faster than using a bitshift.
// It won't fly on big endian machines though... :)
return ((s32*)&tmp)[1];
return (s64)srcval * mulval >> 32;
}
#else
s32 MulShr32( s32 srcval, s32 mulval )
{
s32 tmp, dummy;
__asm__(
".att_syntax\n"
"imull %3\n" // do eax*%2 -> edx contains high 32 bits and eax contains low 32 bits
// Note: imul changes the value of eax. You must say it to gcc.
// Because you can not put a register in both input and the clobber list, the only
// solution is to add the register in the output list hence the dummy value.
".att_syntax\n" : "=d" (tmp), "=a" (dummy) : "a" (srcval), "g" (mulval) :
);
return tmp;
}
#endif
__forceinline s32 clamp_mix( s32 x, u8 bitshift )
{
return GetClamped( x, -0x8000<<bitshift, 0x7fff<<bitshift );