Improve accuracy of FPU emulation slightly - still no F-Zero improvements :(

Generic code cleanup.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3458 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2009-06-15 21:10:11 +00:00
parent dcae21f692
commit 4dba267775
16 changed files with 355 additions and 383 deletions

View File

@ -32,7 +32,7 @@ static const u32 default_sse_state = _mm_getcsr();
namespace MathUtil namespace MathUtil
{ {
int ClassifyFP(double dvalue) int ClassifyDouble(double dvalue)
{ {
// TODO: Optimize the below to be as fast as possible. // TODO: Optimize the below to be as fast as possible.
IntDouble value; IntDouble value;
@ -79,6 +79,53 @@ int ClassifyFP(double dvalue)
return 0x4; return 0x4;
} }
int ClassifyFloat(float fvalue)
{
// TODO: Optimize the below to be as fast as possible.
IntFloat value;
value.f = fvalue;
// 5 bits (C, <, >, =, ?)
// easy cases first
if (value.i == 0) {
// positive zero
return 0x2;
} else if (value.i == 0x80000000) {
// negative zero
return 0x12;
} else if (value.i == 0x7F800000) {
// positive inf
return 0x5;
} else if (value.i == 0xFF800000) {
// negative inf
return 0x9;
} else {
// OK let's dissect this thing.
int sign = value.i >> 31;
int exp = (int)((value.i >> 23) & 0xFF);
if (exp >= 1 && exp <= 254) {
// Nice normalized number.
if (sign) {
return 0x8; // negative
} else {
return 0x4; // positive
}
}
u64 mantissa = value.i & 0x007FFFFF;
if (exp == 0 && mantissa) {
// Denormalized number.
if (sign) {
return 0x18;
} else {
return 0x14;
}
} else if (exp == 0xFF && mantissa /* && mantissa_top*/) {
return 0x11; // Quiet NAN
}
}
return 0x4;
}
} // namespace } // namespace
void LoadDefaultSSEState() void LoadDefaultSSEState()

View File

@ -98,12 +98,9 @@ enum PPCFpClass
// Uses PowerPC conventions for the return value, so it can be easily // Uses PowerPC conventions for the return value, so it can be easily
// used directly in CPU emulation. // used directly in CPU emulation.
int ClassifyFP(double dvalue); int ClassifyDouble(double dvalue);
// More efficient float version.
// TODO: More efficient float version. int ClassifyFloat(float fvalue);
inline int ClassifyFP(float fvalue) {
ClassifyFP((double)fvalue);
}
} // namespace MathUtil } // namespace MathUtil

View File

@ -745,16 +745,16 @@ void Callback_VideoCopiedToXFB()
*/ */
/**/ /**/
if (FPS_To_VPS_Rate > 0 && FPS_To_VPS_Rate < ((1.0/3.0 + 1.0/2.0)/2)) FPS_To_VPS_Rate = 1.0/3.0; if (FPS_To_VPS_Rate > 0 && FPS_To_VPS_Rate < ((1.0f/3.0f + 1.0f/2.0f)/2)) FPS_To_VPS_Rate = 1.0f/3.0f;
else if (FPS_To_VPS_Rate > ((1.0/3.0 + 1.0/2.0)/2) && FPS_To_VPS_Rate < ((1.0/2.0 + 1.0/1.0)/2)) FPS_To_VPS_Rate = 1.0/2.0; else if (FPS_To_VPS_Rate > ((1.0f/3.0f + 1.0f/2.0f)/2) && FPS_To_VPS_Rate < ((1.0f/2.0f + 1.0f/1.0f)/2)) FPS_To_VPS_Rate = 1.0/2.0;
else FPS_To_VPS_Rate = 1.0; else FPS_To_VPS_Rate = 1.0;
// PAL patch adjustment // PAL patch adjustment
if (VideoInterface::TargetRefreshRate == 50) FPS_To_VPS_Rate = FPS_To_VPS_Rate * 1.2; if (VideoInterface::TargetRefreshRate == 50) FPS_To_VPS_Rate = FPS_To_VPS_Rate * 1.2f;
float TargetFPS = FPS_To_VPS_Rate * (float)VideoInterface::TargetRefreshRate; float TargetFPS = FPS_To_VPS_Rate * (float)VideoInterface::TargetRefreshRate;
float FPSPercentage = (FPS / TargetFPS) * 100.0; float FPSPercentage = (FPS / TargetFPS) * 100.0f;
float VPSPercentage = (VideoInterface::ActualRefreshRate / (float)VideoInterface::TargetRefreshRate) * 100.0; float VPSPercentage = (VideoInterface::ActualRefreshRate / (float)VideoInterface::TargetRefreshRate) * 100.0f;
// Settings are shown the same for both extended and summary info // Settings are shown the same for both extended and summary info
std::string SSettings = StringFromFormat(" | Core: %s %s", std::string SSettings = StringFromFormat(" | Core: %s %s",

View File

@ -338,7 +338,9 @@ static u32 LineCount = 0;
static u32 LinesPerField = 0; static u32 LinesPerField = 0;
static u64 LastTime = 0; static u64 LastTime = 0;
static u32 NextXFBRender = 0; static u32 NextXFBRender = 0;
int TargetRefreshRate = 0, SyncTicksProgress = 0; float ActualRefreshRate = 0.0; int TargetRefreshRate = 0;
s64 SyncTicksProgress = 0;
float ActualRefreshRate = 0.0;
void DoState(PointerWrap &p) void DoState(PointerWrap &p)
{ {
@ -1042,23 +1044,24 @@ void UpdateTiming()
// Run when: This is run 7200 times per second on full speed // Run when: This is run 7200 times per second on full speed
void Update() void Update()
{ {
// Update the target refresh rate // Update the target refresh rate
TargetRefreshRate = (m_DisplayControlRegister.FMT == 0 || m_DisplayControlRegister.FMT == 2) TargetRefreshRate = (m_DisplayControlRegister.FMT == 0 || m_DisplayControlRegister.FMT == 2)
? 60 : 50; ? 60 : 50;
// Calculate actual refresh rate // Calculate actual refresh rate
static u64 LastTick = 0; static u64 LastTick = 0;
static int UpdateCheck = timeGetTime() + 1000, TickProgress = 0; static s64 UpdateCheck = timeGetTime() + 1000, TickProgress = 0;
if (UpdateCheck < (int)timeGetTime()) if (UpdateCheck < (int)timeGetTime())
{ {
UpdateCheck = timeGetTime() + 1000; UpdateCheck = timeGetTime() + 1000;
TickProgress = CoreTiming::GetTicks() - LastTick; TickProgress = CoreTiming::GetTicks() - LastTick;
// Calculated CPU-GPU synced ticks for the dual core mode too // Calculated CPU-GPU synced ticks for the dual core mode too
NOTICE_LOG(VIDEO, "Removed: %s Mhz", ThS(SyncTicksProgress / 1000000, false).c_str()); // NOTICE_LOG(VIDEO, "Removed: %s Mhz", ThS(SyncTicksProgress / 1000000, false).c_str());
SyncTicksProgress += TickProgress; SyncTicksProgress += TickProgress;
// Multipled by two because of the way TicksPerFrame is calculated (divided by 25 and 30 // Multipled by two because of the way TicksPerFrame is calculated (divided by 25 and 30
// rather than 50 and 60) // rather than 50 and 60)
// TODO : Feed the FPS estimate into Iulius' framelimiter.
ActualRefreshRate = ((float)SyncTicksProgress / (float)TicksPerFrame) * 2.0; ActualRefreshRate = ((float)SyncTicksProgress / (float)TicksPerFrame) * 2.0;
LastTick = CoreTiming::GetTicks(); LastTick = CoreTiming::GetTicks();
SyncTicksProgress = 0; SyncTicksProgress = 0;

View File

@ -52,7 +52,11 @@ namespace VideoInterface
// Update and draw framebuffer(s) // Update and draw framebuffer(s)
void Update(); void Update();
extern float ActualRefreshRate; extern int TargetRefreshRate, SyncTicksProgress;
// urgh, ugly externs.
extern float ActualRefreshRate;
extern int TargetRefreshRate;
extern s64 SyncTicksProgress;
// UpdateInterrupts: check if we have to generate a new VI Interrupt // UpdateInterrupts: check if we have to generate a new VI Interrupt
void UpdateInterrupts(); void UpdateInterrupts();

View File

@ -311,7 +311,6 @@ namespace Interpreter
// other helper // other helper
u32 Helper_Mask(int mb, int me); u32 Helper_Mask(int mb, int me);
inline bool IsNAN(double _dValue);
extern _interpreterInstruction m_opTable[64]; extern _interpreterInstruction m_opTable[64];
extern _interpreterInstruction m_opTable4[1024]; extern _interpreterInstruction m_opTable4[1024];

View File

@ -34,36 +34,16 @@
#include "../../Core.h" #include "../../Core.h"
#include "Interpreter.h" #include "Interpreter.h"
#include "MathUtil.h"
// SUPER MONKEY BALL IS BEING A ROYAL PAIN // F-ZERO IS BEING A ROYAL PAIN
// We are missing the caller of 800070ec
// POSSIBLE APPROACHES: // POSSIBLE APPROACHES:
// * Full SW FPU. Urgh. // * Full SW FPU. Urgh.
// * Partial SW FPU, emulate just as much as necessary for monkey ball. Feasible but a lot of work. // * Partial SW FPU, emulate just as much as necessary for f-zero. Feasible, I guess.
// * HLE hacking. Figure out what all the evil functions really do and fake them. DONE (well, works okay-ish) // * HLE hacking. Figure out what all the evil functions really do and fake them.
// This worked well for Monkey Ball, not so much for F-Zero.
// Interesting places in Super Monkey Ball: using namespace MathUtil;
// 80036654: fctwixz stuff
// 80007e08:
// -98: Various entry points that loads various odd fp values into f1
// 800070b0: Estimate inverse square root.
// 800070ec: Examine f1. Reads a value out of locked cache into f2 (fixed address). Some cases causes us to call the above thing.
// If all goes well, jump to 70b0, which estimates the inverse square root.
// Then multiply the loaded variable with the original value of f1. Result should be the square root. (1 / sqrt(x)) * x = x / sqrt(x) = sqrt(x)
// 8000712c: Similar, but does not do the multiply at the end, just an frspx.
// 8000716c: Sort of similar, but has extra junk at the end.
//
//
// 800072a4 - nightmare of nightmares
// Fun stuff used:
// bso+
// mcrfs (ARGH pulls stuff out of .. FPSCR). it uses this to check the result of frsp mostly (!!!!)
// crclr
// crset
// crxor
// fnabs
// Super Monkey Ball reads FPRF & friends after fmadds, fmuls, frspx
// WHY do the FR & FI flags affect it so much?
namespace Interpreter namespace Interpreter
{ {
@ -71,112 +51,68 @@ namespace Interpreter
void UpdateFPSCR(UReg_FPSCR fp); void UpdateFPSCR(UReg_FPSCR fp);
void UpdateSSEState(); void UpdateSSEState();
// Extremely rare - actually, never seen.
// start of unit test - Dolphin needs more of these!
/*
void TestFPRF()
{
UpdateFPRF(1.0);
if (FPSCR.FPRF != 0x4)
PanicAlert("Error 1");
UpdateFPRF(-1.0);
if (FPSCR.FPRF != 0x8)
PanicAlert("Error 2");
PanicAlert("Test done");
}*/
// extremely rare
void Helper_UpdateCR1(double _fValue) void Helper_UpdateCR1(double _fValue)
{ {
// Should just update exception flags, not do any compares. // Should just update exception flags, not do any compares.
PanicAlert("CR1"); PanicAlert("CR1");
} }
inline bool IsNAN(double _dValue)
{
return _dValue != _dValue;
}
inline bool _IsNAN(float x) {
//return ((*(u32*)&x) & 0x7f800000UL) == 0x7f800000UL && ((*(u32*)&x) & 0x007fffffUL);
return x != x;
}
void fcmpo(UGeckoInstruction _inst) void fcmpo(UGeckoInstruction _inst)
{ {
/* // Use FlushToZeroAsFloat() to fix a couple of games - but seriously,
float fa = static_cast<float>(rPS0(_inst.FA)); // the real problem should be fixed instead.
float fb = static_cast<float>(rPS0(_inst.FB)); double fa = rPS0(_inst.FA);
// normalize double fb = rPS0(_inst.FB);
if (((*(u32*)&fa) & 0x7f800000UL) == 0) (*(u32*)&fa) &= 0x80000000UL;
if (((*(u32*)&fb) & 0x7f800000UL) == 0) (*(u32*)&fb) &= 0x80000000UL;
*/
// normalize if conversion to float gives denormalized number int compareResult;
if ((riPS0(_inst.FA) & 0x7ff0000000000000ULL) < 0x3800000000000000ULL) if (IsNAN(fa) || IsNAN(fb))
riPS0(_inst.FA) &= 0x8000000000000000ULL; {
if ((riPS0(_inst.FB) & 0x7ff0000000000000ULL) < 0x3800000000000000ULL) FPSCR.FX = 1;
riPS0(_inst.FB) &= 0x8000000000000000ULL; compareResult = 1;
double fa = rPS0(_inst.FA); if (IsSNAN(fa) || IsSNAN(fb))
double fb = rPS0(_inst.FB); {
FPSCR.VXSNAN = 1;
u32 compareResult; if (!FPSCR.FEX || IsQNAN(fa) || IsQNAN(fb))
if (IsNAN(fa) || IsNAN(fb)) compareResult = 1; FPSCR.VXVC = 1;
else if (fa < fb) compareResult = 8; }
else if (fa > fb) compareResult = 4; }
else compareResult = 2; else if (fa < fb) compareResult = 8;
else if (fa > fb) compareResult = 4;
else compareResult = 2;
FPSCR.FPRF = compareResult; FPSCR.FPRF = compareResult;
SetCRField(_inst.CRFD, compareResult); SetCRField(_inst.CRFD, compareResult);
/* missing part
if ((frA) is an SNaN or (frB) is an SNaN )
then VXSNAN ¬ 1
if VE = 0
then VXVC ¬ 1
else if ((frA) is a QNaN or (frB) is a QNaN )
then VXVC ¬ 1 */
} }
void fcmpu(UGeckoInstruction _inst) void fcmpu(UGeckoInstruction _inst)
{ {
// Use FlushToZeroAsFloat() to fix a couple of games - but seriously,
// the real problem should be fixed instead.
double fa = rPS0(_inst.FA);
double fb = rPS0(_inst.FB);
int compareResult;
/* if (IsNAN(fa) || IsNAN(fb))
float fa = static_cast<float>(rPS0(_inst.FA)); {
float fb = static_cast<float>(rPS0(_inst.FB)); FPSCR.FX = 1;
// normalize compareResult = 1;
if (((*(u32*)&fa) & 0x7f800000UL) == 0) (*(u32*)&fa) &= 0x80000000UL; if (IsSNAN(fa) || IsSNAN(fb))
if (((*(u32*)&fb) & 0x7f800000UL) == 0) (*(u32*)&fb) &= 0x80000000UL; {
*/ FPSCR.VXSNAN = 1;
}
// normalize if conversion to float gives denormalized number }
if ((riPS0(_inst.FA) & 0x7ff0000000000000ULL) < 0x3800000000000000ULL)
riPS0(_inst.FA) &= 0x8000000000000000ULL;
if ((riPS0(_inst.FB) & 0x7ff0000000000000ULL) < 0x3800000000000000ULL)
riPS0(_inst.FB) &= 0x8000000000000000ULL;
double fa = rPS0(_inst.FA);
double fb = rPS0(_inst.FB);
u32 compareResult;
if (IsNAN(fa) || IsNAN(fb)) compareResult = 1;
else if (fa < fb) compareResult = 8; else if (fa < fb) compareResult = 8;
else if (fa > fb) compareResult = 4; else if (fa > fb) compareResult = 4;
else compareResult = 2; else compareResult = 2;
FPSCR.FPRF = compareResult; FPSCR.FPRF = compareResult;
SetCRField(_inst.CRFD, compareResult); SetCRField(_inst.CRFD, compareResult);
/* missing part
if ((frA) is an SNaN or (frB) is an SNaN)
then VXSNAN ¬ 1 */
} }
// Apply current rounding mode // Apply current rounding mode
void fctiwx(UGeckoInstruction _inst) void fctiwx(UGeckoInstruction _inst)
{ {
//UpdateSSEState();
const double b = rPS0(_inst.FB); const double b = rPS0(_inst.FB);
u32 value; u32 value;
if (b > (double)0x7fffffff) if (b > (double)0x7fffffff)
@ -215,7 +151,6 @@ largest representable int on PowerPC. */
// Always round toward zero // Always round toward zero
void fctiwzx(UGeckoInstruction _inst) void fctiwzx(UGeckoInstruction _inst)
{ {
//UpdateSSEState();
const double b = rPS0(_inst.FB); const double b = rPS0(_inst.FB);
u32 value; u32 value;
if (b > (double)0x7fffffff) if (b > (double)0x7fffffff)
@ -282,76 +217,14 @@ void fselx(UGeckoInstruction _inst)
// !!! warning !!! // !!! warning !!!
// PS1 must be set to the value of PS0 or DragonballZ will be f**ked up // PS1 must be set to the value of PS0 or DragonballZ will be f**ked up
// PS1 is said to be undefined // PS1 is said to be undefined
// Super Monkey Ball is using this to do wacky tricks so we need 100% correct emulation.
void frspx(UGeckoInstruction _inst) // round to single void frspx(UGeckoInstruction _inst) // round to single
{ {
if (true || FPSCR.RN != 0) double b = rPS0(_inst.FB);
{ double rounded = (double)(float)b;
// Not used in Super Monkey Ball //FPSCR.FI = b != rounded;
// UpdateSSEState(); UpdateFPRF(rounded);
double b = rPS0(_inst.FB); rPS0(_inst.FD) = rPS1(_inst.FD) = rounded;
double rounded = (double)(float)b; return;
//FPSCR.FI = b != rounded; // changing both of these affect Super Monkey Ball behaviour greatly.
if (Core::g_CoreStartupParameter.bEnableFPRF)
UpdateFPRF(rounded);
rPS0(_inst.FD) = rPS1(_inst.FD) = rounded;
return;
// PanicAlert("frspx: FPSCR.RN=%i", FPSCR.RN);
}
// OK, let's try it in 100% software! Not yet working right.
union {
double d;
u64 i;
} in, out;
in.d = rPS0(_inst.FB);
out = in;
int sign = (int)(in.i >> 63);
int exp = (int)((in.i >> 52) & 0x7FF);
u64 mantissa = in.i & 0x000FFFFFFFFFFFFFULL;
u64 mantissa_single = mantissa & 0x000FFFFFE0000000ULL;
u64 leftover_single = mantissa & 0x000000001FFFFFFFULL;
// OK. First make sure that we have a "normal" number.
if (exp >= 1 && exp <= 2046) {
// OK. Check for overflow. TODO
FPSCR.FI = leftover_single != 0; // Inexact
if (leftover_single >= 0x10000000ULL) {
//PanicAlert("rounding up");
FPSCR.FR = 1;
mantissa_single += 0x20000000;
if (mantissa_single & 0x0010000000000000ULL) {
// PanicAlert("renormalizing");
mantissa_single >>= 1;
exp += 1;
// if (exp > 2046) { OVERFLOW }
}
}
out.i = ((u64)sign << 63) | ((u64)exp << 52) | mantissa_single;
} else {
if (!exp && !mantissa) {
// Positive or negative Zero. All is well.
FPSCR.FI = 0;
FPSCR.FR = 0;
} else if (exp == 0 && mantissa) {
// Denormalized number.
PanicAlert("denorm");
} else if (exp == 2047 && !mantissa) {
// Infinite.
//PanicAlert("infinite");
FPSCR.FI = 1;
FPSCR.FR = 1;
// FPSCR.OX = 1;
} else {
//PanicAlert("NAN %08x %08x", in.i >> 32, in.i);
}
}
UpdateFPRF(out.d);
rPS0(_inst.FD) = rPS1(_inst.FD) = out.d;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
@ -394,11 +267,13 @@ void fmaddsx(UGeckoInstruction _inst)
void faddx(UGeckoInstruction _inst) void faddx(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = rPS0(_inst.FA) + rPS0(_inst.FB); rPS0(_inst.FD) = rPS0(_inst.FA) + rPS0(_inst.FB);
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void faddsx(UGeckoInstruction _inst) void faddsx(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB)); rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
@ -407,51 +282,79 @@ void fdivx(UGeckoInstruction _inst)
{ {
double a = rPS0(_inst.FA); double a = rPS0(_inst.FA);
double b = rPS0(_inst.FB); double b = rPS0(_inst.FB);
if (a == 0.0f && b == 0.0f) rPS0(_inst.FD) = a / b;
rPS0(_inst.FD) = rPS1(_inst.FD) = 0.0; // NAN?
else
rPS0(_inst.FD) = rPS1(_inst.FD) = a / b;
if (fabs(rPS0(_inst.FB)) == 0.0) {
if (!FPSCR.ZX)
FPSCR.FX = 1;
FPSCR.ZX = 1;
FPSCR.XX = 1;
}
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fdivsx(UGeckoInstruction _inst)
{
float a = rPS0(_inst.FA);
float b = rPS0(_inst.FB);
if (a != a || b != b)
rPS0(_inst.FD) = rPS1(_inst.FD) = 0.0; // NAN?
else
rPS0(_inst.FD) = rPS1(_inst.FD) = a / b;
if (b == 0.0) { if (b == 0.0) {
if (!FPSCR.ZX) if (!FPSCR.ZX)
FPSCR.FX = 1; FPSCR.FX = 1;
FPSCR.ZX = 1; FPSCR.ZX = 1;
FPSCR.XX = 1; FPSCR.XX = 1;
} }
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void fresx(UGeckoInstruction _inst) void fdivsx(UGeckoInstruction _inst)
{ {
double b = rPS0(_inst.FB); float a = (float)rPS0(_inst.FA);
rPS0(_inst.FD) = rPS1(_inst.FD) = 1.0 / b; float b = (float)rPS0(_inst.FB);
if (fabs(rPS0(_inst.FB)) == 0.0) { rPS0(_inst.FD) = rPS1(_inst.FD) = a / b;
if (b == 0.0)
{
if (!FPSCR.ZX) if (!FPSCR.ZX)
FPSCR.FX = 1; FPSCR.FX = 1;
FPSCR.ZX = 1; FPSCR.ZX = 1;
FPSCR.XX = 1; FPSCR.XX = 1;
} }
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
// Single precision only.
void fresx(UGeckoInstruction _inst)
{
float b = (float)rPS0(_inst.FB);
float one_over = 1.0f / b;
rPS0(_inst.FD) = rPS1(_inst.FD) = one_over;
if (b == 0.0)
{
if (!FPSCR.ZX)
FPSCR.FX = 1;
FPSCR.ZX = 1;
FPSCR.XX = 1;
}
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void frsqrtex(UGeckoInstruction _inst)
{
float b = (float)rPS0(_inst.FB);
if (b < 0.0) {
FPSCR.VXSQRT = 1;
} else if (b == 0) {
FPSCR.ZX = 1;
}
rPS0(_inst.FD) = 1.0f / sqrtf(b);
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fsqrtx(UGeckoInstruction _inst)
{
// GEKKO is not supposed to support this instruction.
// PanicAlert("fsqrtx");
double b = rPS0(_inst.FB);
if (b < 0.0) {
FPSCR.VXSQRT = 1;
}
rPS0(_inst.FD) = sqrt(b);
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fmsubx(UGeckoInstruction _inst) void fmsubx(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB); rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB);
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
@ -459,6 +362,7 @@ void fmsubsx(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = rPS1(_inst.FD) = rPS0(_inst.FD) = rPS1(_inst.FD) =
static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB)); static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
@ -466,12 +370,14 @@ void fmsubsx(UGeckoInstruction _inst)
void fnmaddx(UGeckoInstruction _inst) void fnmaddx(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB)); rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void fnmaddsx(UGeckoInstruction _inst) void fnmaddsx(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = rPS1(_inst.FD) = rPS0(_inst.FD) = rPS1(_inst.FD) =
static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB))); static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB)));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
@ -479,12 +385,14 @@ void fnmaddsx(UGeckoInstruction _inst)
void fnmsubx(UGeckoInstruction _inst) void fnmsubx(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB)); rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void fnmsubsx(UGeckoInstruction _inst) void fnmsubsx(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = rPS1(_inst.FD) = rPS0(_inst.FD) = rPS1(_inst.FD) =
static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB))); static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB)));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
@ -492,32 +400,13 @@ void fnmsubsx(UGeckoInstruction _inst)
void fsubx(UGeckoInstruction _inst) void fsubx(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = rPS0(_inst.FA) - rPS0(_inst.FB); rPS0(_inst.FD) = rPS0(_inst.FA) - rPS0(_inst.FB);
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void fsubsx(UGeckoInstruction _inst) void fsubsx(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB)); rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); UpdateFPRF(rPS0(_inst.FD));
}
void frsqrtex(UGeckoInstruction _inst)
{
double b = rPS0(_inst.FB);
if (b <= 0.0)
rPS0(_inst.FD) = 0.0;
else
rPS0(_inst.FD) = 1.0f / (sqrt(b));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fsqrtx(UGeckoInstruction _inst)
{
double b = rPS0(_inst.FB);
if (b < 0.0)
{
FPSCR.VXSQRT = 1;
}
rPS0(_inst.FD) = sqrt(b);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }

View File

@ -493,7 +493,7 @@ void divwux(UGeckoInstruction _inst)
u32 a = m_GPR[_inst.RA]; u32 a = m_GPR[_inst.RA];
u32 b = m_GPR[_inst.RB]; u32 b = m_GPR[_inst.RB];
if (b == 0 || (a == 0x80000000 && b == 0xFFFFFFFF)) if (b == 0) // || (a == 0x80000000 && b == 0xFFFFFFFF))
{ {
if (_inst.OE) if (_inst.OE)
PanicAlert("OE: divwux"); PanicAlert("OE: divwux");

View File

@ -15,6 +15,9 @@
// Official SVN repository and contact information can be found at // Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/ // http://code.google.com/p/dolphin-emu/
#include "Common.h"
#include "MathUtil.h"
#include "../../HW/Memmap.h" #include "../../HW/Memmap.h"
#include "../../HW/CommandProcessor.h" #include "../../HW/CommandProcessor.h"
#include "../../HW/PixelEngine.h" #include "../../HW/PixelEngine.h"
@ -92,16 +95,18 @@ void lfdx(UGeckoInstruction _inst)
void lfs(UGeckoInstruction _inst) void lfs(UGeckoInstruction _inst)
{ {
u32 uTemp = Memory::Read_U32(Helper_Get_EA(_inst)); u32 uTemp = Memory::Read_U32(Helper_Get_EA(_inst));
rPS0(_inst.FD) = *(float*)&uTemp; double value = *(float*)&uTemp;
rPS1(_inst.FD) = rPS0(_inst.FD); rPS0(_inst.FD) = value;
rPS1(_inst.FD) = value;
} }
void lfsu(UGeckoInstruction _inst) void lfsu(UGeckoInstruction _inst)
{ {
u32 uAddress = Helper_Get_EA_U(_inst); u32 uAddress = Helper_Get_EA_U(_inst);
u32 uTemp = Memory::Read_U32(uAddress); u32 uTemp = Memory::Read_U32(uAddress);
rPS0(_inst.FD) = *(float*)&uTemp; double value = *(float*)&uTemp;
rPS1(_inst.FD) = rPS0(_inst.FD); rPS0(_inst.FD) = value;
rPS1(_inst.FD) = value;
m_GPR[_inst.RA] = uAddress; m_GPR[_inst.RA] = uAddress;
} }
@ -109,16 +114,18 @@ void lfsux(UGeckoInstruction _inst)
{ {
u32 uAddress = Helper_Get_EA_UX(_inst); u32 uAddress = Helper_Get_EA_UX(_inst);
u32 uTemp = Memory::Read_U32(uAddress); u32 uTemp = Memory::Read_U32(uAddress);
rPS0(_inst.FD) = *(float*)&uTemp; double value = *(float*)&uTemp;
rPS1(_inst.FD) = rPS0(_inst.FD); rPS0(_inst.FD) = value;
rPS1(_inst.FD) = value;
m_GPR[_inst.RA] = uAddress; m_GPR[_inst.RA] = uAddress;
} }
void lfsx(UGeckoInstruction _inst) void lfsx(UGeckoInstruction _inst)
{ {
u32 uTemp = Memory::Read_U32(Helper_Get_EA_X(_inst)); u32 uTemp = Memory::Read_U32(Helper_Get_EA_X(_inst));
rPS0(_inst.FD) = *(float*)&uTemp; double value = *(float*)&uTemp;
rPS1(_inst.FD) = rPS0(_inst.FD); rPS0(_inst.FD) = value;
rPS1(_inst.FD) = value;
} }
void lha(UGeckoInstruction _inst) void lha(UGeckoInstruction _inst)
@ -227,7 +234,8 @@ void stfdu(UGeckoInstruction _inst)
void stfs(UGeckoInstruction _inst) void stfs(UGeckoInstruction _inst)
{ {
float fTemp = (float)rPS0(_inst.FS); double value = rPS0(_inst.FS);
float fTemp = (float)value;
Memory::Write_U32(*(u32*)&fTemp, Helper_Get_EA(_inst)); Memory::Write_U32(*(u32*)&fTemp, Helper_Get_EA(_inst));
} }
@ -453,27 +461,20 @@ void stfiwx(UGeckoInstruction _inst)
Memory::Write_U32((u32)riPS0(_inst.FS), uAddress); Memory::Write_U32((u32)riPS0(_inst.FS), uAddress);
} }
// __________________________________________________________________________________________________
// stfsux
//
// no paired ??
//
void stfsux(UGeckoInstruction _inst) void stfsux(UGeckoInstruction _inst)
{ {
float fTemp = (float)rPS0(_inst.FS); double value = rPS0(_inst.FS);
float fTemp = (float)value;
u32 uAddress = Helper_Get_EA_UX(_inst); u32 uAddress = Helper_Get_EA_UX(_inst);
Memory::Write_U32(*(u32*)&fTemp, uAddress); Memory::Write_U32(*(u32*)&fTemp, uAddress);
m_GPR[_inst.RA] = uAddress; m_GPR[_inst.RA] = uAddress;
} }
// __________________________________________________________________________________________________
// stfsx
//
// no paired ??
//
void stfsx(UGeckoInstruction _inst) void stfsx(UGeckoInstruction _inst)
{ {
float fTemp = (float)rPS0(_inst.FS); double value = rPS0(_inst.FS);
float fTemp = (float)value;
Memory::Write_U32(*(u32 *)&fTemp, Helper_Get_EA_X(_inst)); Memory::Write_U32(*(u32 *)&fTemp, Helper_Get_EA_X(_inst));
} }

View File

@ -16,41 +16,52 @@
// http://code.google.com/p/dolphin-emu/ // http://code.google.com/p/dolphin-emu/
#include <math.h> #include <math.h>
#include "Common.h"
#include "MathUtil.h"
#include "Interpreter.h" #include "Interpreter.h"
#include "../../HW/Memmap.h" #include "../../HW/Memmap.h"
using namespace MathUtil;
namespace Interpreter namespace Interpreter
{ {
// These "binary instructions" do not alter FPSCR. // These "binary instructions" do not alter FPSCR.
void ps_sel(UGeckoInstruction _inst) void ps_sel(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = static_cast<float>((rPS0(_inst.FA) >= -0.0) ? rPS0(_inst.FC) : rPS0(_inst.FB)); rPS0(_inst.FD) = !IsNAN(rPS0(_inst.FA)) && rPS0(_inst.FA) >= -0.0 ?
rPS1(_inst.FD) = static_cast<float>((rPS1(_inst.FA) >= -0.0) ? rPS1(_inst.FC) : rPS1(_inst.FB)); rPS0(_inst.FC) : rPS0(_inst.FB);
rPS1(_inst.FD) = !IsNAN(rPS1(_inst.FA)) && rPS1(_inst.FA) >= -0.0 ?
rPS1(_inst.FC) : rPS1(_inst.FB);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_neg(UGeckoInstruction _inst) void ps_neg(UGeckoInstruction _inst)
{ {
riPS0(_inst.FD) = riPS0(_inst.FB) ^ (1ULL << 63); riPS0(_inst.FD) = riPS0(_inst.FB) ^ (1ULL << 63);
riPS1(_inst.FD) = riPS1(_inst.FB) ^ (1ULL << 63); riPS1(_inst.FD) = riPS1(_inst.FB) ^ (1ULL << 63);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_mr(UGeckoInstruction _inst) void ps_mr(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = rPS0(_inst.FB); rPS0(_inst.FD) = rPS0(_inst.FB);
rPS1(_inst.FD) = rPS1(_inst.FB); rPS1(_inst.FD) = rPS1(_inst.FB);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_nabs(UGeckoInstruction _inst) void ps_nabs(UGeckoInstruction _inst)
{ {
riPS0(_inst.FD) = riPS0(_inst.FB) | (1ULL << 63); riPS0(_inst.FD) = riPS0(_inst.FB) | (1ULL << 63);
riPS1(_inst.FD) = riPS1(_inst.FB) | (1ULL << 63); riPS1(_inst.FD) = riPS1(_inst.FB) | (1ULL << 63);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_abs(UGeckoInstruction _inst) void ps_abs(UGeckoInstruction _inst)
{ {
riPS0(_inst.FD) = riPS0(_inst.FB) &~ (1ULL << 63); riPS0(_inst.FD) = riPS0(_inst.FB) &~ (1ULL << 63);
riPS1(_inst.FD) = riPS1(_inst.FB) &~ (1ULL << 63); riPS1(_inst.FD) = riPS1(_inst.FB) &~ (1ULL << 63);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
// These are just moves, double is OK. // These are just moves, double is OK.
@ -60,6 +71,7 @@ void ps_merge00(UGeckoInstruction _inst)
double p1 = rPS0(_inst.FB); double p1 = rPS0(_inst.FB);
rPS0(_inst.FD) = p0; rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1; rPS1(_inst.FD) = p1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_merge01(UGeckoInstruction _inst) void ps_merge01(UGeckoInstruction _inst)
@ -68,6 +80,7 @@ void ps_merge01(UGeckoInstruction _inst)
double p1 = rPS1(_inst.FB); double p1 = rPS1(_inst.FB);
rPS0(_inst.FD) = p0; rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1; rPS1(_inst.FD) = p1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_merge10(UGeckoInstruction _inst) void ps_merge10(UGeckoInstruction _inst)
@ -76,6 +89,7 @@ void ps_merge10(UGeckoInstruction _inst)
double p1 = rPS0(_inst.FB); double p1 = rPS0(_inst.FB);
rPS0(_inst.FD) = p0; rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1; rPS1(_inst.FD) = p1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_merge11(UGeckoInstruction _inst) void ps_merge11(UGeckoInstruction _inst)
@ -84,6 +98,7 @@ void ps_merge11(UGeckoInstruction _inst)
double p1 = rPS1(_inst.FB); double p1 = rPS1(_inst.FB);
rPS0(_inst.FD) = p0; rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1; rPS1(_inst.FD) = p1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
@ -97,63 +112,75 @@ void ps_div(UGeckoInstruction _inst)
if (fabs(rPS0(_inst.FB)) == 0.0) { if (fabs(rPS0(_inst.FB)) == 0.0) {
FPSCR.ZX = 1; FPSCR.ZX = 1;
} }
} if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
void ps_sub(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB));
rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) - rPS1(_inst.FB));
}
void ps_add(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB));
rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) + rPS1(_inst.FB));
} }
void ps_res(UGeckoInstruction _inst) void ps_res(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = 1.0f / static_cast<float>(rPS0(_inst.FB)); rPS0(_inst.FD) = 1.0f / static_cast<float>(rPS0(_inst.FB));
rPS1(_inst.FD) = 1.0f / static_cast<float>(rPS1(_inst.FB)); rPS1(_inst.FD) = 1.0f / static_cast<float>(rPS1(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_rsqrte(UGeckoInstruction _inst)
{
// PanicAlert("ps_rsqrte");
rPS0(_inst.FD) = static_cast<double>(1.0f / sqrtf((float)rPS0(_inst.FB)));
rPS1(_inst.FD) = static_cast<double>(1.0f / sqrtf((float)rPS1(_inst.FB)));
if (fabs(rPS0(_inst.FB)) == 0.0) {
FPSCR.ZX = 1;
}
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_sub(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB));
rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) - rPS1(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_add(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB));
rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) + rPS1(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_mul(UGeckoInstruction _inst) void ps_mul(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) * rPS0(_inst.FC)); rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) * rPS0(_inst.FC));
rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) * rPS1(_inst.FC)); rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) * rPS1(_inst.FC));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_rsqrte(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<double>(1.0f / sqrtf((float)rPS0(_inst.FB)));
rPS1(_inst.FD) = static_cast<double>(1.0f / sqrtf((float)rPS1(_inst.FB)));
if (fabs(rPS0(_inst.FB)) == 0.0) {
FPSCR.ZX = 1;
}
}
void ps_msub(UGeckoInstruction _inst) void ps_msub(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB)); rPS0(_inst.FD) = static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
rPS1(_inst.FD) = static_cast<float>((rPS1(_inst.FA) * rPS1(_inst.FC)) - rPS1(_inst.FB)); rPS1(_inst.FD) = static_cast<float>((rPS1(_inst.FA) * rPS1(_inst.FC)) - rPS1(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_madd(UGeckoInstruction _inst) void ps_madd(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB)); rPS0(_inst.FD) = static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB));
rPS1(_inst.FD) = static_cast<float>((rPS1(_inst.FA) * rPS1(_inst.FC)) + rPS1(_inst.FB)); rPS1(_inst.FD) = static_cast<float>((rPS1(_inst.FA) * rPS1(_inst.FC)) + rPS1(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_nmsub(UGeckoInstruction _inst) void ps_nmsub(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = static_cast<float>(-(rPS0(_inst.FA) * rPS0(_inst.FC) - rPS0(_inst.FB))); rPS0(_inst.FD) = static_cast<float>(-(rPS0(_inst.FA) * rPS0(_inst.FC) - rPS0(_inst.FB)));
rPS1(_inst.FD) = static_cast<float>(-(rPS1(_inst.FA) * rPS1(_inst.FC) - rPS1(_inst.FB))); rPS1(_inst.FD) = static_cast<float>(-(rPS1(_inst.FA) * rPS1(_inst.FC) - rPS1(_inst.FB)));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_nmadd(UGeckoInstruction _inst) void ps_nmadd(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = static_cast<float>(-(rPS0(_inst.FA) * rPS0(_inst.FC) + rPS0(_inst.FB))); rPS0(_inst.FD) = static_cast<float>(-(rPS0(_inst.FA) * rPS0(_inst.FC) + rPS0(_inst.FB)));
rPS1(_inst.FD) = static_cast<float>(-(rPS1(_inst.FA) * rPS1(_inst.FC) + rPS1(_inst.FB))); rPS1(_inst.FD) = static_cast<float>(-(rPS1(_inst.FA) * rPS1(_inst.FC) + rPS1(_inst.FB)));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_sum0(UGeckoInstruction _inst) void ps_sum0(UGeckoInstruction _inst)
@ -162,6 +189,7 @@ void ps_sum0(UGeckoInstruction _inst)
double p1 = (float)(rPS1(_inst.FC)); double p1 = (float)(rPS1(_inst.FC));
rPS0(_inst.FD) = p0; rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1; rPS1(_inst.FD) = p1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_sum1(UGeckoInstruction _inst) void ps_sum1(UGeckoInstruction _inst)
@ -170,6 +198,7 @@ void ps_sum1(UGeckoInstruction _inst)
double p1 = rPS0(_inst.FA) + rPS1(_inst.FB); double p1 = rPS0(_inst.FA) + rPS1(_inst.FB);
rPS0(_inst.FD) = p0; rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1; rPS1(_inst.FD) = p1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_muls0(UGeckoInstruction _inst) void ps_muls0(UGeckoInstruction _inst)
@ -178,6 +207,7 @@ void ps_muls0(UGeckoInstruction _inst)
double p1 = rPS1(_inst.FA) * rPS0(_inst.FC); double p1 = rPS1(_inst.FA) * rPS0(_inst.FC);
rPS0(_inst.FD) = p0; rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1; rPS1(_inst.FD) = p1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_muls1(UGeckoInstruction _inst) void ps_muls1(UGeckoInstruction _inst)
@ -186,6 +216,7 @@ void ps_muls1(UGeckoInstruction _inst)
double p1 = rPS1(_inst.FA) * rPS1(_inst.FC); double p1 = rPS1(_inst.FA) * rPS1(_inst.FC);
rPS0(_inst.FD) = p0; rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1; rPS1(_inst.FD) = p1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_madds0(UGeckoInstruction _inst) void ps_madds0(UGeckoInstruction _inst)
@ -194,6 +225,7 @@ void ps_madds0(UGeckoInstruction _inst)
double p1 = (rPS1(_inst.FA) * rPS0(_inst.FC)) + rPS1(_inst.FB); double p1 = (rPS1(_inst.FA) * rPS0(_inst.FC)) + rPS1(_inst.FB);
rPS0(_inst.FD) = p0; rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1; rPS1(_inst.FD) = p1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_madds1(UGeckoInstruction _inst) void ps_madds1(UGeckoInstruction _inst)
@ -202,6 +234,7 @@ void ps_madds1(UGeckoInstruction _inst)
double p1 = (rPS1(_inst.FA) * rPS1(_inst.FC)) + rPS1(_inst.FB); double p1 = (rPS1(_inst.FA) * rPS1(_inst.FC)) + rPS1(_inst.FB);
rPS0(_inst.FD) = p0; rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1; rPS1(_inst.FD) = p1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_cmpu0(UGeckoInstruction _inst) void ps_cmpu0(UGeckoInstruction _inst)
@ -209,10 +242,12 @@ void ps_cmpu0(UGeckoInstruction _inst)
double fa = rPS0(_inst.FA); double fa = rPS0(_inst.FA);
double fb = rPS0(_inst.FB); double fb = rPS0(_inst.FB);
int compareResult; int compareResult;
if (fa < fb) compareResult = 8; if (IsNAN(fa) || IsNAN(fb)) compareResult = 1;
else if (fa > fb) compareResult = 4; else if (fa < fb) compareResult = 8;
else compareResult = 2; else if (fa > fb) compareResult = 4;
else compareResult = 2;
SetCRField(_inst.CRFD, compareResult); SetCRField(_inst.CRFD, compareResult);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_cmpo0(UGeckoInstruction _inst) void ps_cmpo0(UGeckoInstruction _inst)
@ -226,10 +261,12 @@ void ps_cmpu1(UGeckoInstruction _inst)
double fa = rPS1(_inst.FA); double fa = rPS1(_inst.FA);
double fb = rPS1(_inst.FB); double fb = rPS1(_inst.FB);
int compareResult; int compareResult;
if (fa < fb) compareResult = 8; if (IsNAN(fa) || IsNAN(fb)) compareResult = 1;
else if (fa > fb) compareResult = 4; else if (fa < fb) compareResult = 8;
else compareResult = 2; else if (fa > fb) compareResult = 4;
else compareResult = 2;
SetCRField(_inst.CRFD, compareResult); SetCRField(_inst.CRFD, compareResult);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
void ps_cmpo1(UGeckoInstruction _inst) void ps_cmpo1(UGeckoInstruction _inst)

View File

@ -35,6 +35,7 @@ static const unsigned short FPU_ROUND_MASK = 3 << 10;
#include <xmmintrin.h> #include <xmmintrin.h>
#endif #endif
#include "CPUDetect.h"
#include "../../CoreTiming.h" #include "../../CoreTiming.h"
#include "../../HW/Memmap.h" #include "../../HW/Memmap.h"
#include "../../HW/GPFifo.h" #include "../../HW/GPFifo.h"
@ -60,37 +61,11 @@ mffsx: 80036650 (huh?)
namespace Interpreter namespace Interpreter
{ {
void UpdateSSEState() const u32 MASKS = 0x1F80; // mask away the interrupts.
{ const u32 DAZ = 0x40;
u32 csr = _mm_getcsr(); const u32 FTZ = 0x8000;
const int ssetable[4] = void FPSCRtoFPUSettings(UReg_FPSCR fp)
{
0,
3,
2,
1,
};
csr = csr & 0x9FFF;
csr |= ssetable[FPSCR.RN] << 13;
// Also handle denormals as zero (FZ + DAZ)
csr &= ~0x8020;
// SETTING FTZ+DAZ KILLS BEYOND GOOD AND EVIL
//if (daz)
// csr |= 0x20; // Only set DAZ //0x8020;
_mm_setcsr(csr);
}
void RestoreSSEState()
{
// A reasonable default
_mm_setcsr(0x1fa0);
}
void UpdateFPSCR(UReg_FPSCR fp)
{ {
// Set FPU rounding mode to mimic the PowerPC's // Set FPU rounding mode to mimic the PowerPC's
#ifdef _M_IX86 #ifdef _M_IX86
@ -120,12 +95,28 @@ void UpdateFPSCR(UReg_FPSCR fp)
#endif #endif
if (fp.VE || fp.OE || fp.UE || fp.ZE || fp.XE) if (fp.VE || fp.OE || fp.UE || fp.ZE || fp.XE)
{ {
// PanicAlert("FPSCR - exceptions enabled. Please report."); //PanicAlert("FPSCR - exceptions enabled. Please report. VE=%i OE=%i UE=%i ZE=%i XE=%i",
// fp.VE, fp.OE, fp.UE, fp.ZE, fp.XE);
// Pokemon Colosseum does this. Gah. // Pokemon Colosseum does this. Gah.
} }
// Also corresponding SSE rounding mode setting // Also corresponding SSE rounding mode setting
UpdateSSEState(); static const u32 ssetable[4] =
{
(0 << 13) | MASKS,
(3 << 13) | MASKS,
(2 << 13) | MASKS,
(1 << 13) | MASKS,
};
u32 csr = ssetable[FPSCR.RN];
if (FPSCR.NI)
{
// Either one of these two breaks Beyond Good & Evil.
// if (cpu_info.bSSSE3)
// csr |= DAZ;
// csr |= FTZ;
}
_mm_setcsr(csr);
} }
void mcrfs(UGeckoInstruction _inst) void mcrfs(UGeckoInstruction _inst)
@ -158,25 +149,9 @@ void mcrfs(UGeckoInstruction _inst)
break; break;
} }
SetCRField(_inst.CRFD, fpflags); SetCRField(_inst.CRFD, fpflags);
UpdateFPSCR(FPSCR); FPSCRtoFPUSettings(FPSCR);
} }
#define MXCSR_IE 1
#define MXCSR_DE 2 // denormal
#define MXCSR_ZE 4 // divide by zero, sticky
#define MXCSR_OE 8 // overflow
#define MXCSR_UE 16 // underflow
#define MXCSR_PE 32 // precision
#define MXCSR_DAZ 64
#define MXCSR_IM 128
#define MXCSR_DM 256
#define MXCSR_ZM 512
#define MXCSR_OM 1024
#define MXCSR_UM 2048
#define MXCSR_PM 4096
#define MXCSR_ROUND (16384|8192)
#define MXCSR_FLUSH 32768
void mffsx(UGeckoInstruction _inst) void mffsx(UGeckoInstruction _inst)
{ {
// load from FPSCR // load from FPSCR
@ -190,14 +165,14 @@ void mffsx(UGeckoInstruction _inst)
void mtfsb0x(UGeckoInstruction _inst) void mtfsb0x(UGeckoInstruction _inst)
{ {
FPSCR.Hex &= (~(0x80000000 >> _inst.CRBD)); FPSCR.Hex &= (~(0x80000000 >> _inst.CRBD));
UpdateFPSCR(FPSCR); FPSCRtoFPUSettings(FPSCR);
if (_inst.Rc) PanicAlert("mtfsb0x: inst_.Rc"); if (_inst.Rc) PanicAlert("mtfsb0x: inst_.Rc");
} }
void mtfsb1x(UGeckoInstruction _inst) void mtfsb1x(UGeckoInstruction _inst)
{ {
FPSCR.Hex |= 0x80000000 >> _inst.CRBD; FPSCR.Hex |= 0x80000000 >> _inst.CRBD;
UpdateFPSCR(FPSCR); FPSCRtoFPUSettings(FPSCR);
if (_inst.Rc) PanicAlert("mtfsb1x: inst_.Rc"); if (_inst.Rc) PanicAlert("mtfsb1x: inst_.Rc");
} }
@ -206,7 +181,7 @@ void mtfsfix(UGeckoInstruction _inst)
u32 mask = (0xF0000000 >> (4 * _inst.CRFD)); u32 mask = (0xF0000000 >> (4 * _inst.CRFD));
u32 imm = (_inst.hex << 16) & 0xF0000000; u32 imm = (_inst.hex << 16) & 0xF0000000;
FPSCR.Hex = (FPSCR.Hex & ~mask) | (imm >> (4 * _inst.CRFD)); FPSCR.Hex = (FPSCR.Hex & ~mask) | (imm >> (4 * _inst.CRFD));
UpdateFPSCR(FPSCR); FPSCRtoFPUSettings(FPSCR);
if (_inst.Rc) PanicAlert("mtfsfix: inst_.Rc"); if (_inst.Rc) PanicAlert("mtfsfix: inst_.Rc");
} }
@ -214,13 +189,14 @@ void mtfsfx(UGeckoInstruction _inst)
{ {
u32 fm = _inst.FM; u32 fm = _inst.FM;
u32 m = 0; u32 m = 0;
for (int i = 0; i < 8; i++) { //7?? todo check for (int i = 0; i < 8; i++) //7?? todo check
{
if (fm & (1 << i)) if (fm & (1 << i))
m |= (0xF << (i*4)); m |= (0xF << (i * 4));
} }
FPSCR.Hex = (FPSCR.Hex & ~m) | ((u32)(riPS0(_inst.FB)) & m); FPSCR.Hex = (FPSCR.Hex & ~m) | ((u32)(riPS0(_inst.FB)) & m);
UpdateFPSCR(FPSCR); FPSCRtoFPUSettings(FPSCR);
if (_inst.Rc) PanicAlert("mtfsfx: inst_.Rc"); if (_inst.Rc) PanicAlert("mtfsfx: inst_.Rc");
} }

View File

@ -363,7 +363,7 @@ void OnIdleIL()
void UpdateFPRF(double dvalue) void UpdateFPRF(double dvalue)
{ {
FPSCR.FPRF = MathUtil::ClassifyFP(dvalue); FPSCR.FPRF = MathUtil::ClassifyDouble(dvalue);
//if (FPSCR.FPRF == 0x11) //if (FPSCR.FPRF == 0x11)
// PanicAlert("QNAN alert"); // PanicAlert("QNAN alert");
} }

View File

@ -122,6 +122,10 @@
RelativePath=".\ConsoleHelper.h" RelativePath=".\ConsoleHelper.h"
> >
</File> </File>
<File
RelativePath=".\tests\dsp_base.inc"
>
</File>
<File <File
RelativePath=".\dsp_interface.cpp" RelativePath=".\dsp_interface.cpp"
> >

View File

@ -285,8 +285,8 @@ void WmReadData(u16 _channelID, wm_read_data* rd)
size, address, (address & 0xffff), Tmp.c_str());*/ size, address, (address & 0xffff), Tmp.c_str());*/
break; break;
default: default:
ERROR_LOG(WII_IPC_WIIMOTE, "WmWriteData: bad register block!"); ERROR_LOG(WII_IPC_WIIMOTE, "WmReadData: bad register block!");
PanicAlert("WmWriteData: bad register block!"); PanicAlert("WmReadData: bad register block!");
return; return;
} }
@ -340,6 +340,7 @@ void WmReadData(u16 _channelID, wm_read_data* rd)
INFO_LOG(WII_IPC_WIIMOTE, "==========================================================="); INFO_LOG(WII_IPC_WIIMOTE, "===========================================================");
} }
// =================================================== // ===================================================
/* Here we produce the actual 0x21 Input report that we send to the Wii. The message /* Here we produce the actual 0x21 Input report that we send to the Wii. The message
is divided into 16 bytes pieces and sent piece by piece. There will be five formatting is divided into 16 bytes pieces and sent piece by piece. There will be five formatting

View File

@ -55,21 +55,35 @@ void CoreTests()
void MathTests() void MathTests()
{ {
// Tests that our fp classifier is correct. // Tests that our fp classifier is correct.
EXPECT_EQ(MathUtil::ClassifyFP(1.0), MathUtil::PPC_FPCLASS_PN); EXPECT_EQ(MathUtil::ClassifyDouble(1.0), MathUtil::PPC_FPCLASS_PN);
EXPECT_EQ(MathUtil::ClassifyFP(-1.0), 0x8); EXPECT_EQ(MathUtil::ClassifyDouble(-1.0), MathUtil::PPC_FPCLASS_NN);
EXPECT_EQ(MathUtil::ClassifyFP(1235223.0), 0x4); EXPECT_EQ(MathUtil::ClassifyDouble(1235223.0), MathUtil::PPC_FPCLASS_PN);
EXPECT_EQ(MathUtil::ClassifyFP(-126323521.0), 0x8); EXPECT_EQ(MathUtil::ClassifyDouble(-1263221.0), MathUtil::PPC_FPCLASS_NN);
EXPECT_EQ(MathUtil::ClassifyFP(1.0E-308), 0x14); EXPECT_EQ(MathUtil::ClassifyDouble(1.0E-308), MathUtil::PPC_FPCLASS_PD);
EXPECT_EQ(MathUtil::ClassifyFP(-1.0E-308), 0x18); EXPECT_EQ(MathUtil::ClassifyDouble(-1.0E-308), MathUtil::PPC_FPCLASS_ND);
EXPECT_EQ(MathUtil::ClassifyFP(0.0), 0x2); EXPECT_EQ(MathUtil::ClassifyDouble(0.0), MathUtil::PPC_FPCLASS_PZ);
EXPECT_EQ(MathUtil::ClassifyFP(-0.0), 0x12); EXPECT_EQ(MathUtil::ClassifyDouble(-0.0), MathUtil::PPC_FPCLASS_NZ);
EXPECT_EQ(MathUtil::ClassifyFP(HUGE_VAL), 0x5); // weird #define for infinity EXPECT_EQ(MathUtil::ClassifyDouble(HUGE_VAL), MathUtil::PPC_FPCLASS_PINF); // weird #define for infinity
EXPECT_EQ(MathUtil::ClassifyFP(-HUGE_VAL), 0x9); EXPECT_EQ(MathUtil::ClassifyDouble(-HUGE_VAL), MathUtil::PPC_FPCLASS_NINF);
EXPECT_EQ(MathUtil::ClassifyFP(sqrt(-1.0)), 0x11); // SNAN EXPECT_EQ(MathUtil::ClassifyDouble(sqrt(-1.0)), MathUtil::PPC_FPCLASS_QNAN);
// Float version
EXPECT_EQ(MathUtil::ClassifyFloat(1.0f), MathUtil::PPC_FPCLASS_PN);
EXPECT_EQ(MathUtil::ClassifyFloat(-1.0f), MathUtil::PPC_FPCLASS_NN);
EXPECT_EQ(MathUtil::ClassifyFloat(1235223.0f), MathUtil::PPC_FPCLASS_PN);
EXPECT_EQ(MathUtil::ClassifyFloat(-1263221.0f), MathUtil::PPC_FPCLASS_NN);
EXPECT_EQ(MathUtil::ClassifyFloat(1.0E-43f), MathUtil::PPC_FPCLASS_PD);
EXPECT_EQ(MathUtil::ClassifyFloat(-1.0E-43f), MathUtil::PPC_FPCLASS_ND);
EXPECT_EQ(MathUtil::ClassifyFloat(0.0f), MathUtil::PPC_FPCLASS_PZ);
EXPECT_EQ(MathUtil::ClassifyFloat(-0.0f), MathUtil::PPC_FPCLASS_NZ);
EXPECT_EQ(MathUtil::ClassifyFloat((float)HUGE_VAL), MathUtil::PPC_FPCLASS_PINF); // weird #define for infinity
EXPECT_EQ(MathUtil::ClassifyFloat((float)-HUGE_VAL), MathUtil::PPC_FPCLASS_NINF);
EXPECT_EQ(MathUtil::ClassifyFloat(sqrtf(-1.0f)), MathUtil::PPC_FPCLASS_QNAN);
EXPECT_FALSE(MathUtil::IsNAN(1.0)); EXPECT_FALSE(MathUtil::IsNAN(1.0));
EXPECT_TRUE(MathUtil::IsNAN(sqrt(-1.0))); EXPECT_TRUE(MathUtil::IsNAN(sqrt(-1.0)));
EXPECT_FALSE(MathUtil::IsSNAN(sqrt(-1.0))); EXPECT_FALSE(MathUtil::IsSNAN(sqrt(-1.0)));
// EXPECT_TRUE(MathUtil::IsQNAN(sqrt(-1.0))); // Hmm... // EXPECT_TRUE(MathUtil::IsQNAN(sqrt(-1.0))); // Hmm...
EXPECT_EQ(pow2(2.0), 4.0); EXPECT_EQ(pow2(2.0), 4.0);
EXPECT_EQ(pow2(-2.0), 4.0); EXPECT_EQ(pow2(-2.0), 4.0);