Lots of work on improving the floating point emulation. Note that most changes affect only the interpreter mode.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4314 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
LinesPrower 2009-09-24 17:43:46 +00:00
parent ebfceb16b9
commit 7d74e55109
8 changed files with 801 additions and 294 deletions

View File

@ -1030,6 +1030,10 @@
/> />
</FileConfiguration> </FileConfiguration>
</File> </File>
<File
RelativePath=".\Src\PowerPC\Interpreter\Interpreter_FPUtils.h"
>
</File>
<File <File
RelativePath=".\Src\PowerPC\Interpreter\Interpreter_Integer.cpp" RelativePath=".\Src\PowerPC\Interpreter\Interpreter_Integer.cpp"
> >

View File

@ -307,7 +307,7 @@ namespace Interpreter
// paired helper // paired helper
float Helper_Dequantize(const u32 _Addr, const EQuantizeType _quantizeType, const unsigned int _uScale); float Helper_Dequantize(const u32 _Addr, const EQuantizeType _quantizeType, const unsigned int _uScale);
void Helper_Quantize (const u32 _Addr, const float _fValue, const EQuantizeType _quantizeType, const unsigned _uScale); void Helper_Quantize (const u32 _Addr, const double _fValue, const EQuantizeType _quantizeType, const unsigned _uScale);
// other helper // other helper
u32 Helper_Mask(int mb, int me); u32 Helper_Mask(int mb, int me);

View File

@ -0,0 +1,243 @@
// Copyright (C) 2009 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include "../../Core.h"
#include "Interpreter.h"
#include "MathUtil.h"
using namespace MathUtil;
// warining! very slow!
//#define VERY_ACCURATE_FP
#define MIN_SINGLE 0xc7efffffe0000000ull
#define MAX_SINGLE 0x47efffffe0000000ull
// FPSCR exception flags
const u32 FPSCR_OX = (u32)1 << (31 - 3);
const u32 FPSCR_UX = (u32)1 << (31 - 4);
const u32 FPSCR_ZX = (u32)1 << (31 - 5);
// ! XX shouldn't be accessed directly to set 1. Use SetFI() instead !
const u32 FPSCR_XX = (u32)1 << (31 - 6);
const u32 FPSCR_VXSNAN = (u32)1 << (31 - 7);
const u32 FPSCR_VXISI = (u32)1 << (31 - 8);
const u32 FPSCR_VXIDI = (u32)1 << (31 - 9);
const u32 FPSCR_VXZDZ = (u32)1 << (31 - 10);
const u32 FPSCR_VXIMZ = (u32)1 << (31 - 11);
const u32 FPSCR_VXVC = (u32)1 << (31 - 12);
const u32 FPSCR_VXSOFT = (u32)1 << (31 - 21);
const u32 FPSCR_VXSQRT = (u32)1 << (31 - 22);
const u32 FPSCR_VXCVI = (u32)1 << (31 - 23);
const u32 FPSCR_VX_ANY = FPSCR_VXSNAN | FPSCR_VXISI | FPSCR_VXIDI | FPSCR_VXZDZ |
FPSCR_VXIMZ | FPSCR_VXVC | FPSCR_VXSOFT | FPSCR_VXSQRT | FPSCR_VXCVI;
const u32 FPSCR_ANY_X = FPSCR_OX | FPSCR_UX | FPSCR_ZX | FPSCR_XX | FPSCR_VX_ANY;
const u64 PPC_NAN_U64 = 0x7ff8000000000000ull;
const double PPC_NAN = *(double* const)&PPC_NAN_U64;
inline bool IsINF(double x)
{
return ((*(u64*)&x) & ~DOUBLE_SIGN) == DOUBLE_EXP;
}
inline void SetFPException(u32 mask)
{
if ((FPSCR.Hex & mask) != mask)
FPSCR.FX = 1;
FPSCR.Hex |= mask;
}
inline void SetFI(int FI)
{
if (FI)
{
SetFPException(FPSCR_XX);
}
FPSCR.FI = FI;
}
inline void UpdateFPSCR()
{
FPSCR.VX = (FPSCR.Hex & FPSCR_VX_ANY) != 0;
FPSCR.FEX = 0; // we assume that "?E" bits are always 0
}
inline double ForceSingle(double _x)
{
//if (FPSCR.RN != 0)
// PanicAlert("RN = %d at %x", (int)FPSCR.RN, PC);
if (FPSCR.NI)
_x = FlushToZeroAsFloat(_x);
double x = static_cast<float>(_x);
return x;
}
inline double ForceDouble(double d)
{
//if (FPSCR.RN != 0)
// PanicAlert("RN = %d at %x", (int)FPSCR.RN, PC);
//if (FPSCR.NI)
//{
// IntDouble x; x.d = d;
//if ((x.i & DOUBLE_EXP) == 0)
// x.i &= DOUBLE_SIGN; // turn into signed zero
// return x.d;
//}
return d;
}
// these functions allow globally modify operations behaviour
// also, these may be used to set flags like FR, FI, OX, UX
inline double NI_mul(const double a, const double b)
{
#ifdef VERY_ACCURATE_FP
if (a != a) return a;
if (b != b) return b;
double t = a * b;
if (t != t)
{
SetFPException(FPSCR_VXIMZ);
return PPC_NAN;
}
return t;
#else
return a * b;
#endif
}
inline double NI_add(const double a, const double b)
{
#ifdef VERY_ACCURATE_FP
if (a != a) return a;
if (b != b) return b;
double t = a + b;
if (t != t)
{
SetFPException(FPSCR_VXISI);
return PPC_NAN;
}
return t;
#else
return a + b;
#endif
}
inline double NI_sub(const double a, const double b)
{
#ifdef VERY_ACCURATE_FP
if (a != a) return a;
if (b != b) return b;
double t = a - b;
if (t != t)
{
SetFPException(FPSCR_VXISI);
return PPC_NAN;
}
return t;
#else
return a - b;
#endif
}
inline double NI_madd(const double a, const double b, const double c)
{
#ifdef VERY_ACCURATE_FP
if (a != a) return a;
if (c != c) return c;
if (b != b) return b;
double t = a * b;
if (t != t)
{
SetFPException(FPSCR_VXIMZ);
return PPC_NAN;
}
t = t + c;
if (t != t)
{
SetFPException(FPSCR_VXISI);
return PPC_NAN;
}
return t;
#else
return NI_add(NI_mul(a, b), c);
#endif
}
inline double NI_msub(const double a, const double b, const double c)
{
#ifdef VERY_ACCURATE_FP
if (a != a) return a;
if (c != c) return c;
if (b != b) return b;
double t = a * b;
if (t != t)
{
SetFPException(FPSCR_VXIMZ);
return PPC_NAN;
}
t = t - c;
if (t != t)
{
SetFPException(FPSCR_VXISI);
return PPC_NAN;
}
return t;
#else
return NI_sub(NI_mul(a, b), c);
#endif
}
// used by stfsXX instructions and ps_rsqrte
inline u32 ConvertToSingle(u64 x)
{
u32 exp = (x >> 52) & 0x7ff;
if (exp > 896 || (x & ~DOUBLE_SIGN) == 0)
{
return ((x >> 32) & 0xc0000000) | ((x >> 29) & 0x3fffffff);
}
else if (exp >= 874)
{
u32 t = (u32)(0x80000000 | ((x & DOUBLE_FRAC) >> 21));
t = t >> (905 - exp);
t |= (x >> 32) & 0x80000000;
return t;
}
else
{
// this is said to be undefined
// based on hardware tests
return ((x >> 32) & 0xc0000000) | ((x >> 29) & 0x3fffffff);
}
}
// used by psq_stXX operations.
inline u32 ConvertToSingleFTZ(u64 x)
{
u32 exp = (x >> 52) & 0x7ff;
if (exp > 896 || (x & ~DOUBLE_SIGN) == 0)
{
return ((x >> 32) & 0xc0000000) | ((x >> 29) & 0x3fffffff);
}
else
{
return (x >> 32) & 0x80000000;
}
}

View File

@ -35,13 +35,13 @@
#include "../../Core.h" #include "../../Core.h"
#include "Interpreter.h" #include "Interpreter.h"
#include "MathUtil.h" #include "MathUtil.h"
#include "Interpreter_FPUtils.h"
using namespace MathUtil; using namespace MathUtil;
namespace Interpreter namespace Interpreter
{ {
void UpdateFPSCR(UReg_FPSCR fp);
void UpdateSSEState(); void UpdateSSEState();
// Extremely rare - actually, never seen. // Extremely rare - actually, never seen.
@ -53,8 +53,6 @@ void Helper_UpdateCR1(double _fValue)
void fcmpo(UGeckoInstruction _inst) void fcmpo(UGeckoInstruction _inst)
{ {
// Use FlushToZeroAsFloat() to fix a couple of games - but seriously,
// the real problem should be fixed instead.
double fa = rPS0(_inst.FA); double fa = rPS0(_inst.FA);
double fb = rPS0(_inst.FB); double fb = rPS0(_inst.FB);
@ -68,9 +66,16 @@ void fcmpo(UGeckoInstruction _inst)
FPSCR.FX = 1; FPSCR.FX = 1;
compareResult = 1; compareResult = 1;
if (IsSNAN(fa) || IsSNAN(fb)) if (IsSNAN(fa) || IsSNAN(fb))
FPSCR.VXSNAN = 1; {
if (!FPSCR.FEX || IsQNAN(fa) || IsQNAN(fb)) SetFPException(FPSCR_VXSNAN);
FPSCR.VXVC = 1; if (FPSCR.VE == 0)
SetFPException(FPSCR_VXVC);
}
else
{
//if (IsQNAN(fa) || IsQNAN(fb)) // this is always true
SetFPException(FPSCR_VXVC);
}
} }
FPSCR.FPRF = compareResult; FPSCR.FPRF = compareResult;
@ -79,8 +84,6 @@ void fcmpo(UGeckoInstruction _inst)
void fcmpu(UGeckoInstruction _inst) void fcmpu(UGeckoInstruction _inst)
{ {
// Use FlushToZeroAsFloat() to fix a couple of games - but seriously,
// the real problem should be fixed instead.
double fa = rPS0(_inst.FA); double fa = rPS0(_inst.FA);
double fb = rPS0(_inst.FB); double fb = rPS0(_inst.FB);
@ -91,19 +94,18 @@ void fcmpu(UGeckoInstruction _inst)
else if (fa == fb) compareResult = 2; else if (fa == fb) compareResult = 2;
else else
{ {
FPSCR.FX = 1;
compareResult = 1; compareResult = 1;
if (IsSNAN(fa) || IsSNAN(fb)) if (IsSNAN(fa) || IsSNAN(fb))
{ {
FPSCR.VXSNAN = 1; SetFPException(FPSCR_VXSNAN);
} }
} }
FPSCR.FPRF = compareResult; FPSCR.FPRF = compareResult;
SetCRField(_inst.CRFD, compareResult); SetCRField(_inst.CRFD, compareResult);
} }
// Apply current rounding mode // Apply current rounding mode
// need to investigate this instruction.
void fctiwx(UGeckoInstruction _inst) void fctiwx(UGeckoInstruction _inst)
{ {
const double b = rPS0(_inst.FB); const double b = rPS0(_inst.FB);
@ -111,24 +113,55 @@ void fctiwx(UGeckoInstruction _inst)
if (b > (double)0x7fffffff) if (b > (double)0x7fffffff)
{ {
value = 0x7fffffff; value = 0x7fffffff;
FPSCR.VXCVI = 1; SetFPException(FPSCR_VXCVI);
FPSCR.FI = 0;
FPSCR.FR = 0;
} }
else if (b < -(double)0x7fffffff) else if (b < -(double)0x80000000)
{ {
value = 0x80000000; value = 0x80000000;
FPSCR.VXCVI = 1; SetFPException(FPSCR_VXCVI);
FPSCR.FI = 0;
FPSCR.FR = 0;
} }
else else
{ {
value = (u32)(s32)_mm_cvtsd_si32(_mm_set_sd(b)); // obey current rounding mode s32 i;
// double d_value = (double)value; switch (FPSCR.RN)
// bool inexact = (d_value != b); {
// FPSCR.FI = inexact ? 1 : 0; case 0: // nearest
// FPSCR.XX |= FPSCR.FI; {
// FPSCR.FR = fabs(d_value) > fabs(b); double t = b + 0.5;
i = (s32)t;
if (t - i < 0) i--;
break;
}
case 1: // zero
i = (s32)b;
break;
case 2: // +inf
i = (s32)b;
if (b - i > 0) i++;
break;
case 3: // -inf
i = (s32)b;
if (b - i < 0) i--;
break;
}
value = (u32)i;
double di = i;
if (di == b)
{
FPSCR.FI = 0;
FPSCR.FR = 0;
}
else
{
SetFI(1);
FPSCR.FR = fabs(di) > fabs(b);
}
} }
//TODO: FR
//FPRF undefined //FPRF undefined
riPS0(_inst.FD) = (u64)value; // zero extend riPS0(_inst.FD) = (u64)value; // zero extend
@ -149,21 +182,32 @@ void fctiwzx(UGeckoInstruction _inst)
if (b > (double)0x7fffffff) if (b > (double)0x7fffffff)
{ {
value = 0x7fffffff; value = 0x7fffffff;
FPSCR.VXCVI = 1; SetFPException(FPSCR_VXCVI);
FPSCR.FI = 0;
FPSCR.FR = 0;
} }
else if (b < -(double)0x7fffffff) else if (b < -(double)0x80000000)
{ {
value = 0x80000000; value = 0x80000000;
FPSCR.VXCVI = 1; SetFPException(FPSCR_VXCVI);
FPSCR.FI = 0;
FPSCR.FR = 0;
} }
else else
{ {
value = (u32)(s32)_mm_cvttsd_si32(_mm_set_sd(b)); // truncate s32 i = (s32)b;
// double d_value = (double)value; double di = i;
// bool inexact = (d_value != b); if (di == b)
// FPSCR.FI = inexact ? 1 : 0; {
// FPSCR.XX |= FPSCR.FI; FPSCR.FI = 0;
// FPSCR.FR = 1; //fabs(d_value) > fabs(b); FPSCR.FR = 0;
}
else
{
SetFI(1);
FPSCR.FR = fabs(di) > fabs(b);
}
value = (u32)i;
} }
riPS0(_inst.FD) = (u64)value; riPS0(_inst.FD) = (u64)value;
@ -206,15 +250,15 @@ void fselx(UGeckoInstruction _inst)
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
// !!! warning !!! // !!! warning !!!
// PS1 must be set to the value of PS0 or DragonballZ will be f**ked up // PS1 must be set to the value of PS0 or DragonballZ will be f**ked up
// PS1 is said to be undefined // PS1 is said to be undefined
void frspx(UGeckoInstruction _inst) // round to single void frspx(UGeckoInstruction _inst) // round to single
{ {
double b = rPS0(_inst.FB); double b = rPS0(_inst.FB);
double rounded = (double)(float)b; double rounded = ForceSingle(b);
//FPSCR.FI = b != rounded; SetFI(b != rounded);
FPSCR.FR = fabs(rounded) > fabs(b);
UpdateFPRF(rounded); UpdateFPRF(rounded);
rPS0(_inst.FD) = rPS1(_inst.FD) = rounded; rPS0(_inst.FD) = rPS1(_inst.FD) = rounded;
return; return;
@ -223,24 +267,26 @@ void frspx(UGeckoInstruction _inst) // round to single
void fmulx(UGeckoInstruction _inst) void fmulx(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = rPS0(_inst.FA) * rPS0(_inst.FC); rPS0(_inst.FD) = ForceDouble(NI_mul(rPS0(_inst.FA), rPS0(_inst.FC)));
FPSCR.FI = 0; FPSCR.FI = 0; // are these flags important?
FPSCR.FR = 1; FPSCR.FR = 0;
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void fmulsx(UGeckoInstruction _inst) void fmulsx(UGeckoInstruction _inst)
{ {
double d_value = rPS0(_inst.FA) * rPS0(_inst.FC); double d_value = NI_mul(rPS0(_inst.FA), rPS0(_inst.FC));
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(d_value); rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(d_value);
FPSCR.FI = d_value != rPS0(_inst.FD); //FPSCR.FI = d_value != rPS0(_inst.FD);
FPSCR.FI = 0;
FPSCR.FR = 0;
UpdateFPRF(rPS0(_inst.FD)); UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void fmaddx(UGeckoInstruction _inst) void fmaddx(UGeckoInstruction _inst)
{ {
double result = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB); double result = ForceDouble(NI_madd( rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB) ));
rPS0(_inst.FD) = result; rPS0(_inst.FD) = result;
UpdateFPRF(result); UpdateFPRF(result);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
@ -248,8 +294,8 @@ void fmaddx(UGeckoInstruction _inst)
void fmaddsx(UGeckoInstruction _inst) void fmaddsx(UGeckoInstruction _inst)
{ {
double d_value = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB); double d_value = NI_madd( rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB) );
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(d_value); rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(d_value);
FPSCR.FI = d_value != rPS0(_inst.FD); FPSCR.FI = d_value != rPS0(_inst.FD);
FPSCR.FR = 0; FPSCR.FR = 0;
UpdateFPRF(rPS0(_inst.FD)); UpdateFPRF(rPS0(_inst.FD));
@ -259,44 +305,77 @@ void fmaddsx(UGeckoInstruction _inst)
void faddx(UGeckoInstruction _inst) void faddx(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = rPS0(_inst.FA) + rPS0(_inst.FB); rPS0(_inst.FD) = ForceDouble(NI_add(rPS0(_inst.FA), rPS0(_inst.FB)));
UpdateFPRF(rPS0(_inst.FD)); UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void faddsx(UGeckoInstruction _inst) void faddsx(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB)); rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(NI_add(rPS0(_inst.FA), rPS0(_inst.FB)));
UpdateFPRF(rPS0(_inst.FD)); UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void fdivx(UGeckoInstruction _inst) void fdivx(UGeckoInstruction _inst)
{ {
double a = rPS0(_inst.FA); double a = rPS0(_inst.FA);
double b = rPS0(_inst.FB); double b = rPS0(_inst.FB);
rPS0(_inst.FD) = a / b; if (a != a) rPS0(_inst.FD) = a;
if (b == 0.0) { else if (b != b) rPS0(_inst.FD) = b;
if (!FPSCR.ZX) else
FPSCR.FX = 1; {
FPSCR.ZX = 1; rPS0(_inst.FD) = ForceDouble(a / b);
FPSCR.XX = 1; if (b == 0.0)
{
if (a == 0.0)
{
SetFPException(FPSCR_VXZDZ);
rPS0(_inst.FD) = PPC_NAN;
}
SetFPException(FPSCR_ZX);
}
else
{
if (IsINF(a) && IsINF(b))
{
SetFPException(FPSCR_VXIDI);
rPS0(_inst.FD) = PPC_NAN;
}
}
} }
UpdateFPRF(rPS0(_inst.FD)); UpdateFPRF(rPS0(_inst.FD));
// FR,FI,OX,UX???
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void fdivsx(UGeckoInstruction _inst) void fdivsx(UGeckoInstruction _inst)
{ {
double a = rPS0(_inst.FA); double a = rPS0(_inst.FA);
double b = rPS0(_inst.FB); double b = rPS0(_inst.FB);
rPS0(_inst.FD) = rPS1(_inst.FD) = (float)(a / b); double res;
if (a != a) res = a;
else if (b != b) res = b;
else
{
res = ForceSingle(a / b);
if (b == 0.0) if (b == 0.0)
{ {
if (!FPSCR.ZX) if (a == 0.0)
FPSCR.FX = 1; {
FPSCR.ZX = 1; SetFPException(FPSCR_VXZDZ);
FPSCR.XX = 1; res = PPC_NAN;
} }
SetFPException(FPSCR_ZX);
}
else
{
if (IsINF(a) && IsINF(b))
{
SetFPException(FPSCR_VXIDI);
res = PPC_NAN;
}
}
}
rPS0(_inst.FD) = rPS1(_inst.FD) = res;
UpdateFPRF(rPS0(_inst.FD)); UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
@ -304,15 +383,23 @@ void fdivsx(UGeckoInstruction _inst)
// Single precision only. // Single precision only.
void fresx(UGeckoInstruction _inst) void fresx(UGeckoInstruction _inst)
{ {
float b = (float)rPS0(_inst.FB); double b = rPS0(_inst.FB);
float one_over = 1.0f / b; double one_over = ForceSingle(1.0 / b);
// this is based on the real hardware tests
if (b != 0.0 && IsINF(one_over))
{
if (one_over > 0)
riPS0(_inst.FD) = riPS1(_inst.FD) = MAX_SINGLE;
else
riPS0(_inst.FD) = riPS1(_inst.FD) = MIN_SINGLE;
}
else
{
rPS0(_inst.FD) = rPS1(_inst.FD) = one_over; rPS0(_inst.FD) = rPS1(_inst.FD) = one_over;
}
if (b == 0.0) if (b == 0.0)
{ {
if (!FPSCR.ZX) SetFPException(FPSCR_ZX);
FPSCR.FX = 1;
FPSCR.ZX = 1;
FPSCR.XX = 1;
} }
UpdateFPRF(rPS0(_inst.FD)); UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
@ -320,13 +407,75 @@ void fresx(UGeckoInstruction _inst)
void frsqrtex(UGeckoInstruction _inst) void frsqrtex(UGeckoInstruction _inst)
{ {
float b = (float)rPS0(_inst.FB); double b = rPS0(_inst.FB);
if (b < 0.0) { if (b < 0.0)
FPSCR.VXSQRT = 1; {
} else if (b == 0) { SetFPException(FPSCR_VXSQRT);
FPSCR.ZX = 1; rPS0(_inst.FD) = PPC_NAN;
} }
rPS0(_inst.FD) = 1.0f / sqrtf(b); else
{
if (b == 0.0) SetFPException(FPSCR_ZX);
rPS0(_inst.FD) = ForceDouble(1.0 / sqrt(b));
}
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fmsubx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = ForceDouble(NI_msub( rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB) ));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fmsubsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) =
ForceSingle( NI_msub(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB) ));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fnmaddx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = ForceDouble(0.0-NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB)));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fnmaddsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) =
ForceSingle(0.0-NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB)));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fnmsubx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = ForceDouble(0.0-NI_msub(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB)));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fnmsubsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) =
ForceSingle(0.0-NI_msub(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB)));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fsubx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = ForceDouble(NI_sub(rPS0(_inst.FA), rPS0(_inst.FB)));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fsubsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(NI_sub(rPS0(_inst.FA), rPS0(_inst.FB)));
UpdateFPRF(rPS0(_inst.FD)); UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
@ -344,63 +493,4 @@ void fsqrtx(UGeckoInstruction _inst)
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void fmsubx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB);
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fmsubsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) =
static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fnmaddx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fnmaddsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) =
static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB)));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fnmsubx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fnmsubsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) =
static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB)));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fsubx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS0(_inst.FA) - rPS0(_inst.FB);
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fsubsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
} // namespace } // namespace

View File

@ -28,6 +28,8 @@
#include "../Jit64/Jit.h" #include "../Jit64/Jit.h"
#include "../JitCommon/JitCache.h" #include "../JitCommon/JitCache.h"
#include "Interpreter_FPUtils.h"
namespace Interpreter namespace Interpreter
{ {
@ -234,16 +236,16 @@ void stfdu(UGeckoInstruction _inst)
void stfs(UGeckoInstruction _inst) void stfs(UGeckoInstruction _inst)
{ {
double value = rPS0(_inst.FS); //double value = rPS0(_inst.FS);
float fTemp = (float)value; //float fTemp = (float)value;
Memory::Write_U32(*(u32*)&fTemp, Helper_Get_EA(_inst)); //Memory::Write_U32(*(u32*)&fTemp, Helper_Get_EA(_inst));
Memory::Write_U32(ConvertToSingle(riPS0(_inst.FS)), Helper_Get_EA(_inst));
} }
void stfsu(UGeckoInstruction _inst) void stfsu(UGeckoInstruction _inst)
{ {
float fTemp = (float)rPS0(_inst.FS);
u32 uAddress = Helper_Get_EA_U(_inst); u32 uAddress = Helper_Get_EA_U(_inst);
Memory::Write_U32(*(u32*)&fTemp, uAddress); Memory::Write_U32(ConvertToSingle(riPS0(_inst.FS)), uAddress);
m_GPR[_inst.RA] = uAddress; m_GPR[_inst.RA] = uAddress;
} }
@ -466,18 +468,14 @@ void stfiwx(UGeckoInstruction _inst)
void stfsux(UGeckoInstruction _inst) void stfsux(UGeckoInstruction _inst)
{ {
double value = rPS0(_inst.FS);
float fTemp = (float)value;
u32 uAddress = Helper_Get_EA_UX(_inst); u32 uAddress = Helper_Get_EA_UX(_inst);
Memory::Write_U32(*(u32*)&fTemp, uAddress); Memory::Write_U32(ConvertToSingle(riPS0(_inst.FS)), uAddress);
m_GPR[_inst.RA] = uAddress; m_GPR[_inst.RA] = uAddress;
} }
void stfsx(UGeckoInstruction _inst) void stfsx(UGeckoInstruction _inst)
{ {
double value = rPS0(_inst.FS); Memory::Write_U32(ConvertToSingle(riPS0(_inst.FS)), Helper_Get_EA_X(_inst));
float fTemp = (float)value;
Memory::Write_U32(*(u32 *)&fTemp, Helper_Get_EA_X(_inst));
} }
void sthbrx(UGeckoInstruction _inst) void sthbrx(UGeckoInstruction _inst)

View File

@ -19,6 +19,8 @@
#include "Interpreter.h" #include "Interpreter.h"
#include "../../HW/Memmap.h" #include "../../HW/Memmap.h"
#include "Interpreter_FPUtils.h"
namespace Interpreter namespace Interpreter
{ {
@ -71,40 +73,40 @@ inline T CLAMP(T a, T bottom, T top) {
return a; return a;
} }
void Helper_Quantize(const u32 _Addr, const float _fValue, void Helper_Quantize(const u32 _Addr, const double _fValue,
const EQuantizeType _quantizeType, const unsigned int _uScale) const EQuantizeType _quantizeType, const unsigned int _uScale)
{ {
switch(_quantizeType) switch(_quantizeType)
{ {
case QUANTIZE_FLOAT: case QUANTIZE_FLOAT:
Memory::Write_U32(*(u32*)&_fValue,_Addr); Memory::Write_U32( ConvertToSingleFTZ( *(u64*)&_fValue ), _Addr );
break; break;
// used for THP player // used for THP player
case QUANTIZE_U8: case QUANTIZE_U8:
{ {
float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], 0.0f, 255.0f); float fResult = CLAMP((float)_fValue * m_quantizeTable[_uScale], 0.0f, 255.0f);
Memory::Write_U8((u8)fResult, _Addr); Memory::Write_U8((u8)fResult, _Addr);
} }
break; break;
case QUANTIZE_U16: case QUANTIZE_U16:
{ {
float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], 0.0f, 65535.0f); float fResult = CLAMP((float)_fValue * m_quantizeTable[_uScale], 0.0f, 65535.0f);
Memory::Write_U16((u16)fResult, _Addr); Memory::Write_U16((u16)fResult, _Addr);
} }
break; break;
case QUANTIZE_S8: case QUANTIZE_S8:
{ {
float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], -128.0f, 127.0f); float fResult = CLAMP((float)_fValue * m_quantizeTable[_uScale], -128.0f, 127.0f);
Memory::Write_U8((u8)(s8)fResult, _Addr); Memory::Write_U8((u8)(s8)fResult, _Addr);
} }
break; break;
case QUANTIZE_S16: case QUANTIZE_S16:
{ {
float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], -32768.0f, 32767.0f); float fResult = CLAMP((float)_fValue * m_quantizeTable[_uScale], -32768.0f, 32767.0f);
Memory::Write_U16((u16)(s16)fResult, _Addr); Memory::Write_U16((u16)(s16)fResult, _Addr);
} }
break; break;
@ -215,8 +217,8 @@ void psq_st(UGeckoInstruction _inst)
if (_inst.W == 0) if (_inst.W == 0)
{ {
Helper_Quantize( EA, (float)rPS0(_inst.RS), stType, stScale ); Helper_Quantize( EA, rPS0(_inst.RS), stType, stScale );
Helper_Quantize( EA+c, (float)rPS1(_inst.RS), stType, stScale ); Helper_Quantize( EA+c, rPS1(_inst.RS), stType, stScale );
} }
else else
{ {
@ -237,12 +239,12 @@ void psq_stu(UGeckoInstruction _inst)
if (_inst.W == 0) if (_inst.W == 0)
{ {
Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale); Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
Helper_Quantize(EA+c, (float)rPS1(_inst.RS), stType, stScale); Helper_Quantize(EA+c, rPS1(_inst.RS), stType, stScale);
} }
else else
{ {
Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale); Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
} }
m_GPR[_inst.RA] = EA; m_GPR[_inst.RA] = EA;
} }
@ -283,12 +285,12 @@ void psq_stx(UGeckoInstruction _inst)
if (_inst.Wx == 0) if (_inst.Wx == 0)
{ {
Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale); Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
Helper_Quantize(EA+c, (float)rPS1(_inst.RS), stType, stScale); Helper_Quantize(EA+c, rPS1(_inst.RS), stType, stScale);
} }
else else
{ {
Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale); Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
} }
} }
@ -329,12 +331,12 @@ void psq_stux(UGeckoInstruction _inst)
if (_inst.Wx == 0) if (_inst.Wx == 0)
{ {
Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale); Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
Helper_Quantize(EA+c, (float)rPS1(_inst.RS), stType, stScale); Helper_Quantize(EA+c, rPS1(_inst.RS), stType, stScale);
} }
else else
{ {
Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale); Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
} }
m_GPR[_inst.RA] = EA; m_GPR[_inst.RA] = EA;

View File

@ -21,6 +21,8 @@
#include "Interpreter.h" #include "Interpreter.h"
#include "../../HW/Memmap.h" #include "../../HW/Memmap.h"
#include "Interpreter_FPUtils.h"
using namespace MathUtil; using namespace MathUtil;
namespace Interpreter namespace Interpreter
@ -99,140 +101,272 @@ void ps_merge11(UGeckoInstruction _inst)
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
// From here on, the real deal. // From here on, the real deal.
void ps_div(UGeckoInstruction _inst) void ps_div(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) / rPS0(_inst.FB)); u32 ex_mask = 0;
rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) / rPS1(_inst.FB));
if (fabs(rPS0(_inst.FB)) == 0.0) { // PS0
FPSCR.ZX = 1; {
double a = rPS0(_inst.FA);
double b = rPS0(_inst.FB);
double &res = rPS0(_inst.FD);
if (a != a) res = a;
else if (b != b) res = b;
else
{
if (b == 0.0)
{
ex_mask |= FPSCR_ZX;
if (rPS0(_inst.FA) == 0.0)
{
ex_mask |= FPSCR_VXZDZ;
res = PPC_NAN;
} }
else
{
res = ForceSingle(a / b);
}
}
else
{
if (IsINF(a) && IsINF(b))
{
ex_mask |= FPSCR_VXIDI;
res = PPC_NAN;
}
else
{
res = ForceSingle(a / b);
}
}
}
}
// PS1
{
double a = rPS1(_inst.FA);
double b = rPS1(_inst.FB);
double &res = rPS1(_inst.FD);
if (a != a) res = a;
else if (b != b) res = b;
else
{
if (b == 0.0)
{
ex_mask |= FPSCR_ZX;
if (rPS0(_inst.FA) == 0.0)
{
ex_mask |= FPSCR_VXZDZ;
res = PPC_NAN;
}
else
{
res = ForceSingle(a / b);
}
}
else
{
if (IsINF(a) && IsINF(b))
{
ex_mask |= FPSCR_VXIDI;
res = PPC_NAN;
}
else
{
res = ForceSingle(a / b);
}
}
}
}
SetFPException(ex_mask);
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_res(UGeckoInstruction _inst) void ps_res(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = 1.0f / static_cast<float>(rPS0(_inst.FB)); // this code is based on the real hardware tests
rPS1(_inst.FD) = 1.0f / static_cast<float>(rPS1(_inst.FB)); double a = rPS0(_inst.FB);
if (fabs(rPS0(_inst.FB)) == 0.0) { double b = rPS1(_inst.FB);
FPSCR.ZX = 1; if (a == 0.0 || b == 0.0)
{
SetFPException(FPSCR_ZX);
} }
rPS0(_inst.FD) = ForceSingle(1.0 / a);
if (a != 0.0 && IsINF(rPS0(_inst.FD)))
{
if (rPS0(_inst.FD) > 0)
riPS0(_inst.FD) = MAX_SINGLE; // largest finite single
else
riPS0(_inst.FD) = MIN_SINGLE; // most negative finite single
}
rPS1(_inst.FD) = ForceSingle(1.0 / b);
if (b != 0.0 && IsINF(rPS1(_inst.FD)))
{
if (rPS1(_inst.FD) > 0)
riPS1(_inst.FD) = MAX_SINGLE;
else
riPS1(_inst.FD) = MIN_SINGLE;
}
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_rsqrte(UGeckoInstruction _inst) void ps_rsqrte(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = static_cast<double>(1.0f / sqrtf((float)rPS0(_inst.FB))); // this code is based on the real hardware tests
rPS1(_inst.FD) = static_cast<double>(1.0f / sqrtf((float)rPS1(_inst.FB))); if (rPS0(_inst.FB) == 0.0 || rPS1(_inst.FB) == 0.0)
if (fabs(rPS0(_inst.FB)) == 0.0) { {
FPSCR.ZX = 1; SetFPException(FPSCR_ZX);
} }
// PS0
if (rPS0(_inst.FB) < 0.0)
{
SetFPException(FPSCR_VXSQRT);
rPS0(_inst.FD) = PPC_NAN;
}
else
{
rPS0(_inst.FD) = 1.0 / sqrt(rPS0(_inst.FB));
u32 t = ConvertToSingle(riPS0(_inst.FD));
rPS0(_inst.FD) = *(float*)&t;
}
// PS1
if (rPS1(_inst.FB) < 0.0)
{
SetFPException(FPSCR_VXSQRT);
rPS1(_inst.FD) = PPC_NAN;
}
else
{
rPS1(_inst.FD) = 1.0 / sqrt(rPS1(_inst.FB));
u32 t = ConvertToSingle(riPS1(_inst.FD));
rPS1(_inst.FD) = *(float*)&t;
}
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_sub(UGeckoInstruction _inst) void ps_sub(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB)); rPS0(_inst.FD) = ForceSingle(NI_sub(rPS0(_inst.FA), rPS0(_inst.FB)));
rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) - rPS1(_inst.FB)); rPS1(_inst.FD) = ForceSingle(NI_sub(rPS1(_inst.FA), rPS1(_inst.FB)));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_add(UGeckoInstruction _inst) void ps_add(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB)); rPS0(_inst.FD) = ForceSingle(NI_add(rPS0(_inst.FA), rPS0(_inst.FB)));
rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) + rPS1(_inst.FB)); rPS1(_inst.FD) = ForceSingle(NI_add(rPS1(_inst.FA), rPS1(_inst.FB)));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_mul(UGeckoInstruction _inst) void ps_mul(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) * rPS0(_inst.FC)); rPS0(_inst.FD) = ForceSingle(NI_mul(rPS0(_inst.FA), rPS0(_inst.FC)));
rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) * rPS1(_inst.FC)); rPS1(_inst.FD) = ForceSingle(NI_mul(rPS1(_inst.FA), rPS1(_inst.FC)));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_msub(UGeckoInstruction _inst) void ps_msub(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB)); rPS0(_inst.FD) = ForceSingle(NI_msub(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB)));
rPS1(_inst.FD) = static_cast<float>((rPS1(_inst.FA) * rPS1(_inst.FC)) - rPS1(_inst.FB)); rPS1(_inst.FD) = ForceSingle(NI_msub(rPS1(_inst.FA), rPS1(_inst.FC), rPS1(_inst.FB)));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_madd(UGeckoInstruction _inst) void ps_madd(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB)); rPS0(_inst.FD) = ForceSingle(NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB)));
rPS1(_inst.FD) = static_cast<float>((rPS1(_inst.FA) * rPS1(_inst.FC)) + rPS1(_inst.FB)); rPS1(_inst.FD) = ForceSingle(NI_madd(rPS1(_inst.FA), rPS1(_inst.FC), rPS1(_inst.FB)));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_nmsub(UGeckoInstruction _inst) void ps_nmsub(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = static_cast<float>(-(rPS0(_inst.FA) * rPS0(_inst.FC) - rPS0(_inst.FB))); rPS0(_inst.FD) = ForceSingle( 0.0-NI_msub( rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB) ) );
rPS1(_inst.FD) = static_cast<float>(-(rPS1(_inst.FA) * rPS1(_inst.FC) - rPS1(_inst.FB))); rPS1(_inst.FD) = ForceSingle( 0.0-NI_msub( rPS1(_inst.FA), rPS1(_inst.FC), rPS1(_inst.FB) ) );
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_nmadd(UGeckoInstruction _inst) void ps_nmadd(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = static_cast<float>(-(rPS0(_inst.FA) * rPS0(_inst.FC) + rPS0(_inst.FB))); rPS0(_inst.FD) = ForceSingle( 0.0-NI_madd( rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB) ) );
rPS1(_inst.FD) = static_cast<float>(-(rPS1(_inst.FA) * rPS1(_inst.FC) + rPS1(_inst.FB))); rPS1(_inst.FD) = ForceSingle( 0.0-NI_madd( rPS1(_inst.FA), rPS1(_inst.FC), rPS1(_inst.FB) ) );
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_sum0(UGeckoInstruction _inst) void ps_sum0(UGeckoInstruction _inst)
{ {
double p0 = (float)(rPS0(_inst.FA) + rPS1(_inst.FB)); double p0 = ForceSingle(NI_add(rPS0(_inst.FA), rPS1(_inst.FB)));
double p1 = (float)(rPS1(_inst.FC)); double p1 = ForceSingle(rPS1(_inst.FC));
rPS0(_inst.FD) = p0; rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1; rPS1(_inst.FD) = p1;
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_sum1(UGeckoInstruction _inst) void ps_sum1(UGeckoInstruction _inst)
{ {
double p0 = rPS0(_inst.FC); double p0 = ForceSingle(rPS0(_inst.FC));
double p1 = rPS0(_inst.FA) + rPS1(_inst.FB); double p1 = ForceSingle(NI_add(rPS0(_inst.FA), rPS1(_inst.FB)));
rPS0(_inst.FD) = p0; rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1; rPS1(_inst.FD) = p1;
UpdateFPRF(rPS1(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_muls0(UGeckoInstruction _inst) void ps_muls0(UGeckoInstruction _inst)
{ {
double p0 = rPS0(_inst.FA) * rPS0(_inst.FC); double p0 = ForceSingle(NI_mul(rPS0(_inst.FA), rPS0(_inst.FC)));
double p1 = rPS1(_inst.FA) * rPS0(_inst.FC); double p1 = ForceSingle(NI_mul(rPS1(_inst.FA), rPS0(_inst.FC)));
rPS0(_inst.FD) = p0; rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1; rPS1(_inst.FD) = p1;
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_muls1(UGeckoInstruction _inst) void ps_muls1(UGeckoInstruction _inst)
{ {
double p0 = rPS0(_inst.FA) * rPS1(_inst.FC); double p0 = ForceSingle(NI_mul(rPS0(_inst.FA), rPS1(_inst.FC)));
double p1 = rPS1(_inst.FA) * rPS1(_inst.FC); double p1 = ForceSingle(NI_mul(rPS1(_inst.FA), rPS1(_inst.FC)));
rPS0(_inst.FD) = p0; rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1; rPS1(_inst.FD) = p1;
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_madds0(UGeckoInstruction _inst) void ps_madds0(UGeckoInstruction _inst)
{ {
double p0 = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB); double p0 = ForceSingle( NI_madd( rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB)) );
double p1 = (rPS1(_inst.FA) * rPS0(_inst.FC)) + rPS1(_inst.FB); double p1 = ForceSingle( NI_madd( rPS1(_inst.FA), rPS0(_inst.FC), rPS1(_inst.FB)) );
rPS0(_inst.FD) = p0; rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1; rPS1(_inst.FD) = p1;
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_madds1(UGeckoInstruction _inst) void ps_madds1(UGeckoInstruction _inst)
{ {
double p0 = (rPS0(_inst.FA) * rPS1(_inst.FC)) + rPS0(_inst.FB); double p0 = ForceSingle( NI_madd( rPS0(_inst.FA), rPS1(_inst.FC), rPS0(_inst.FB)) );
double p1 = (rPS1(_inst.FA) * rPS1(_inst.FC)) + rPS1(_inst.FB); double p1 = ForceSingle( NI_madd( rPS1(_inst.FA), rPS1(_inst.FC), rPS1(_inst.FB)) );
rPS0(_inst.FD) = p0; rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1; rPS1(_inst.FD) = p1;
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
@ -250,17 +384,15 @@ void ps_cmpu0(UGeckoInstruction _inst)
compareResult = 1; compareResult = 1;
if (IsSNAN(fa) || IsSNAN(fb)) if (IsSNAN(fa) || IsSNAN(fb))
{ {
FPSCR.VXSNAN = 1; SetFPException(FPSCR_VXSNAN);
} }
} }
FPSCR.FPRF = compareResult;
SetCRField(_inst.CRFD, compareResult); SetCRField(_inst.CRFD, compareResult);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_cmpo0(UGeckoInstruction _inst) void ps_cmpo0(UGeckoInstruction _inst)
{ {
// Ector, please check
//ps_cmpu0(_inst);
double fa = rPS0(_inst.FA); double fa = rPS0(_inst.FA);
double fb = rPS0(_inst.FB); double fb = rPS0(_inst.FB);
int compareResult; int compareResult;
@ -273,14 +405,18 @@ void ps_cmpo0(UGeckoInstruction _inst)
compareResult = 1; compareResult = 1;
if (IsSNAN(fa) || IsSNAN(fb)) if (IsSNAN(fa) || IsSNAN(fb))
{ {
FPSCR.VXSNAN = 1; SetFPException(FPSCR_VXSNAN);
if (!FPSCR.FEX) FPSCR.VXVC = 1; if (!FPSCR.VE)
SetFPException(FPSCR_VXVC);
} }
else if (IsQNAN(fa) || IsQNAN(fb)) else
FPSCR.VXVC = 1; {
//if (IsQNAN(fa) || IsQNAN(fb)) // this is always true
SetFPException(FPSCR_VXVC);
} }
}
FPSCR.FPRF = compareResult;
SetCRField(_inst.CRFD, compareResult); SetCRField(_inst.CRFD, compareResult);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_cmpu1(UGeckoInstruction _inst) void ps_cmpu1(UGeckoInstruction _inst)
@ -297,17 +433,15 @@ void ps_cmpu1(UGeckoInstruction _inst)
compareResult = 1; compareResult = 1;
if (IsSNAN(fa) || IsSNAN(fb)) if (IsSNAN(fa) || IsSNAN(fb))
{ {
FPSCR.VXSNAN = 1; SetFPException(FPSCR_VXSNAN);
} }
} }
FPSCR.FPRF = compareResult;
SetCRField(_inst.CRFD, compareResult); SetCRField(_inst.CRFD, compareResult);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_cmpo1(UGeckoInstruction _inst) void ps_cmpo1(UGeckoInstruction _inst)
{ {
// Ector, please check
//ps_cmpu1(_inst);
double fa = rPS1(_inst.FA); double fa = rPS1(_inst.FA);
double fb = rPS1(_inst.FB); double fb = rPS1(_inst.FB);
int compareResult; int compareResult;
@ -320,14 +454,18 @@ void ps_cmpo1(UGeckoInstruction _inst)
compareResult = 1; compareResult = 1;
if (IsSNAN(fa) || IsSNAN(fb)) if (IsSNAN(fa) || IsSNAN(fb))
{ {
FPSCR.VXSNAN = 1; SetFPException(FPSCR_VXSNAN);
if (!FPSCR.FEX) FPSCR.VXVC = 1; if (!FPSCR.VE)
SetFPException(FPSCR_VXVC);
} }
else if (IsQNAN(fa) || IsQNAN(fb)) else
FPSCR.VXVC = 1; {
//if (IsQNAN(fa) || IsQNAN(fb)) // this is always true
SetFPException(FPSCR_VXVC);
} }
}
FPSCR.FPRF = compareResult;
SetCRField(_inst.CRFD, compareResult); SetCRField(_inst.CRFD, compareResult);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
// __________________________________________________________________________________________________ // __________________________________________________________________________________________________

View File

@ -43,6 +43,8 @@ static const unsigned short FPU_ROUND_MASK = 3 << 10;
#include "../../Core.h" #include "../../Core.h"
#include "Interpreter.h" #include "Interpreter.h"
#include "Interpreter_FPUtils.h"
/* /*
Most of these are together with fctiwx Most of these are together with fctiwx
@ -95,8 +97,8 @@ void FPSCRtoFPUSettings(UReg_FPSCR fp)
#endif #endif
if (fp.VE || fp.OE || fp.UE || fp.ZE || fp.XE) if (fp.VE || fp.OE || fp.UE || fp.ZE || fp.XE)
{ {
//PanicAlert("FPSCR - exceptions enabled. Please report. VE=%i OE=%i UE=%i ZE=%i XE=%i", PanicAlert("FPSCR - exceptions enabled. Please report. VE=%i OE=%i UE=%i ZE=%i XE=%i",
// fp.VE, fp.OE, fp.UE, fp.ZE, fp.XE); fp.VE, fp.OE, fp.UE, fp.ZE, fp.XE);
// Pokemon Colosseum does this. Gah. // Pokemon Colosseum does this. Gah.
} }
@ -119,60 +121,29 @@ void FPSCRtoFPUSettings(UReg_FPSCR fp)
_mm_setcsr(csr); _mm_setcsr(csr);
} }
void mcrfs(UGeckoInstruction _inst)
{
u32 fpflags = ((FPSCR.Hex >> (4*(7 - _inst.CRFS))) & 0xF);
switch (_inst.CRFS) {
case 0:
FPSCR.FX = 0;
FPSCR.OX = 0;
break;
case 1:
FPSCR.UX = 0;
FPSCR.ZX = 0;
FPSCR.XX = 0;
FPSCR.VXSNAN = 0;
break;
case 2:
FPSCR.VXISI = 0;
FPSCR.VXIDI = 0;
FPSCR.VXZDZ = 0;
FPSCR.VXIMZ = 0;
break;
case 3:
FPSCR.VXVC = 0;
break;
case 5:
FPSCR.VXSOFT = 0;
FPSCR.VXSQRT = 0;
FPSCR.VXCVI = 0;
break;
}
SetCRField(_inst.CRFD, fpflags);
FPSCRtoFPUSettings(FPSCR);
}
void mffsx(UGeckoInstruction _inst)
{
// load from FPSCR
// This may or may not be accurate - but better than nothing, I guess
// TODO(ector): grab all overflow flags etc and set them in FPSCR
riPS0(_inst.FD) = (u64)FPSCR.Hex;
if (_inst.Rc) PanicAlert("mffsx: inst_.Rc");
}
void mtfsb0x(UGeckoInstruction _inst) void mtfsb0x(UGeckoInstruction _inst)
{ {
FPSCR.Hex &= (~(0x80000000 >> _inst.CRBD)); u32 b = 0x80000000 >> _inst.CRBD;
/*if (b & 0x9ff80700)
PanicAlert("mtfsb0 clears bit %d, PC=%x", _inst.CRBD, PC);*/
FPSCR.Hex &= ~b;
FPSCRtoFPUSettings(FPSCR); FPSCRtoFPUSettings(FPSCR);
if (_inst.Rc) PanicAlert("mtfsb0x: inst_.Rc"); if (_inst.Rc) PanicAlert("mtfsb0x: inst_.Rc");
} }
void mtfsb1x(UGeckoInstruction _inst) void mtfsb1x(UGeckoInstruction _inst)
{ {
FPSCR.Hex |= 0x80000000 >> _inst.CRBD; // this instruction can affect FX
u32 b = 0x80000000 >> _inst.CRBD;
if (b & FPSCR_ANY_X)
SetFPException(b);
else
FPSCR.Hex |= b;
FPSCRtoFPUSettings(FPSCR); FPSCRtoFPUSettings(FPSCR);
if (_inst.Rc) PanicAlert("mtfsb1x: inst_.Rc"); if (_inst.Rc) PanicAlert("mtfsb1x: inst_.Rc");
} }
@ -180,8 +151,15 @@ void mtfsfix(UGeckoInstruction _inst)
{ {
u32 mask = (0xF0000000 >> (4 * _inst.CRFD)); u32 mask = (0xF0000000 >> (4 * _inst.CRFD));
u32 imm = (_inst.hex << 16) & 0xF0000000; u32 imm = (_inst.hex << 16) & 0xF0000000;
/*u32 cleared = ~(imm >> (4 * _inst.CRFD)) & FPSCR.Hex & mask;
if (cleared & 0x9ff80700)
PanicAlert("mtfsfi clears %08x, PC=%x", cleared, PC);*/
FPSCR.Hex = (FPSCR.Hex & ~mask) | (imm >> (4 * _inst.CRFD)); FPSCR.Hex = (FPSCR.Hex & ~mask) | (imm >> (4 * _inst.CRFD));
FPSCRtoFPUSettings(FPSCR); FPSCRtoFPUSettings(FPSCR);
if (_inst.Rc) PanicAlert("mtfsfix: inst_.Rc"); if (_inst.Rc) PanicAlert("mtfsfix: inst_.Rc");
} }
@ -189,14 +167,19 @@ void mtfsfx(UGeckoInstruction _inst)
{ {
u32 fm = _inst.FM; u32 fm = _inst.FM;
u32 m = 0; u32 m = 0;
for (int i = 0; i < 8; i++) //7?? todo check for (int i = 0; i < 8; i++)
{ {
if (fm & (1 << i)) if (fm & (1 << i))
m |= (0xF << (i * 4)); m |= (0xF << (i * 4));
} }
/*u32 cleared = ~((u32)(riPS0(_inst.FB))) & FPSCR.Hex & m;
if (cleared & 0x9ff80700)
PanicAlert("mtfsf clears %08x, PC=%x", cleared, PC);*/
FPSCR.Hex = (FPSCR.Hex & ~m) | ((u32)(riPS0(_inst.FB)) & m); FPSCR.Hex = (FPSCR.Hex & ~m) | ((u32)(riPS0(_inst.FB)) & m);
FPSCRtoFPUSettings(FPSCR); FPSCRtoFPUSettings(FPSCR);
if (_inst.Rc) PanicAlert("mtfsfx: inst_.Rc"); if (_inst.Rc) PanicAlert("mtfsfx: inst_.Rc");
} }
@ -458,4 +441,53 @@ void isync(UGeckoInstruction _inst)
//shouldnt do anything //shouldnt do anything
} }
// the following commands read from FPSCR
void mcrfs(UGeckoInstruction _inst)
{
//if (_inst.CRFS != 3 && _inst.CRFS != 4)
// PanicAlert("msrfs at %x, CRFS = %d, CRFD = %d", PC, (int)_inst.CRFS, (int)_inst.CRFD);
UpdateFPSCR();
u32 fpflags = ((FPSCR.Hex >> (4*(7 - _inst.CRFS))) & 0xF);
switch (_inst.CRFS) {
case 0:
FPSCR.FX = 0;
FPSCR.OX = 0;
break;
case 1:
FPSCR.UX = 0;
FPSCR.ZX = 0;
FPSCR.XX = 0;
FPSCR.VXSNAN = 0;
break;
case 2:
FPSCR.VXISI = 0;
FPSCR.VXIDI = 0;
FPSCR.VXZDZ = 0;
FPSCR.VXIMZ = 0;
break;
case 3:
FPSCR.VXVC = 0;
break;
case 5:
FPSCR.VXSOFT = 0;
FPSCR.VXSQRT = 0;
FPSCR.VXCVI = 0;
break;
}
SetCRField(_inst.CRFD, fpflags);
}
void mffsx(UGeckoInstruction _inst)
{
// load from FPSCR
// This may or may not be accurate - but better than nothing, I guess
// TODO(ector): grab all overflow flags etc and set them in FPSCR
UpdateFPSCR();
riPS0(_inst.FD) = (u64)FPSCR.Hex;
if (_inst.Rc) PanicAlert("mffsx: inst_.Rc");
}
} // namespace } // namespace