diff --git a/Source/Core/Common/Src/x64Emitter.cpp b/Source/Core/Common/Src/x64Emitter.cpp index afc90206ed..768fae5451 100644 --- a/Source/Core/Common/Src/x64Emitter.cpp +++ b/Source/Core/Common/Src/x64Emitter.cpp @@ -1030,6 +1030,8 @@ enum NormalSSEOps } void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) { + if (arg.IsSimpleReg()) + PanicAlert("Emitter: MOVQ_xmm doesn't support single registers as destination"); if (src > 7) { // Alternate encoding diff --git a/Source/Core/Core/Src/CoreParameter.cpp b/Source/Core/Core/Src/CoreParameter.cpp index b6bed0b5e7..f6c4462180 100644 --- a/Source/Core/Core/Src/CoreParameter.cpp +++ b/Source/Core/Core/Src/CoreParameter.cpp @@ -43,6 +43,7 @@ void SCoreStartupParameter::LoadDefaults() bRunCompareServer = false; bDSPThread = true; bLockThreads = true; + bEnableFPRF = false; bWii = false; SelectedLanguage = 0; iTLBHack = 0; diff --git a/Source/Core/Core/Src/CoreParameter.h b/Source/Core/Core/Src/CoreParameter.h index 923713f873..37da0f7ddc 100644 --- a/Source/Core/Core/Src/CoreParameter.h +++ b/Source/Core/Core/Src/CoreParameter.h @@ -65,6 +65,7 @@ struct SCoreStartupParameter bool bUseFastMem; bool bLockThreads; bool bOptimizeQuantizers; + bool bEnableFPRF; bool bEnableCheats; bool bEnableIsoCache; diff --git a/Source/Core/Core/Src/HLE/HLE.cpp b/Source/Core/Core/Src/HLE/HLE.cpp index 1dc236df5b..448a8e97f1 100644 --- a/Source/Core/Core/Src/HLE/HLE.cpp +++ b/Source/Core/Core/Src/HLE/HLE.cpp @@ -68,19 +68,21 @@ static const SPatch OSPatches[] = // wii only { "__OSInitAudioSystem", HLE_Misc::UnimplementedFunction }, - // Super Monkey Ball - { ".evil_vec_cosine", HLE_Misc::SMB_EvilVecCosine }, - { ".evil_normalize", HLE_Misc::SMB_EvilNormalize }, - { ".evil_vec_setlength", HLE_Misc::SMB_evil_vec_setlength }, - { ".evil_vec_something", HLE_Misc::FZero_evil_vec_normalize }, - { "PanicAlert", HLE_Misc::HLEPanicAlert }, - { ".sqrt_internal_needs_cr1", HLE_Misc::SMB_sqrt_internal }, - { ".rsqrt_internal_needs_cr1", HLE_Misc::SMB_rsqrt_internal }, - { ".atan2", HLE_Misc::SMB_atan2}, - { ".sqrt_fz", HLE_Misc::FZ_sqrt}, + // Super Monkey Ball - no longer needed. + //{ ".evil_vec_cosine", HLE_Misc::SMB_EvilVecCosine }, + //{ ".evil_normalize", HLE_Misc::SMB_EvilNormalize }, + //{ ".evil_vec_setlength", HLE_Misc::SMB_evil_vec_setlength }, + //{ ".evil_vec_something", HLE_Misc::FZero_evil_vec_normalize }, + { "PanicAlert", HLE_Misc::HLEPanicAlert }, + //{ ".sqrt_internal_needs_cr1", HLE_Misc::SMB_sqrt_internal }, + //{ ".rsqrt_internal_needs_cr1", HLE_Misc::SMB_rsqrt_internal }, + //{ ".atan2", HLE_Misc::SMB_atan2}, + //{ ".sqrt_fz", HLE_Misc::FZ_sqrt}, - { ".sqrt_internal_fz", HLE_Misc::FZ_sqrt_internal }, - { ".rsqrt_internal_fz", HLE_Misc::FZ_rsqrt_internal }, + // F-zero still isn't working correctly, but these aren't really helping. + + //{ ".sqrt_internal_fz", HLE_Misc::FZ_sqrt_internal }, + //{ ".rsqrt_internal_fz", HLE_Misc::FZ_rsqrt_internal }, //{ ".kill_infinites", HLE_Misc::FZero_kill_infinites }, // special diff --git a/Source/Core/Core/Src/HLE/HLE_Misc.cpp b/Source/Core/Core/Src/HLE/HLE_Misc.cpp index ff4cadf6c6..96eb2350c5 100644 --- a/Source/Core/Core/Src/HLE/HLE_Misc.cpp +++ b/Source/Core/Core/Src/HLE/HLE_Misc.cpp @@ -110,7 +110,12 @@ void SMB_EvilNormalize() float x = F(r3); float y = F(r3 + 4); float z = F(r3 + 8); - float inv_len = 1.0f / sqrtf(x*x + y*y + z*z); + float len = x*x + y*y + z*z; + float inv_len; + if (len <= 0) + inv_len = 0; + else + inv_len = 1.0f / sqrtf(len); x *= inv_len; y *= inv_len; z *= inv_len; diff --git a/Source/Core/Core/Src/HW/PeripheralInterface.h b/Source/Core/Core/Src/HW/PeripheralInterface.h index e8cbc14088..2e3e6cbd1e 100644 --- a/Source/Core/Core/Src/HW/PeripheralInterface.h +++ b/Source/Core/Core/Src/HW/PeripheralInterface.h @@ -100,6 +100,9 @@ public: static void Init(); static void DoState(PointerWrap &p); + static u32 GetMask() { return m_InterruptMask; } + static u32 GetCause() { return m_InterruptCause; } + static void SetInterrupt(InterruptCause _causemask, bool _bSet=true); // Read32 diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp index 1e98a06702..0f169ac878 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp @@ -71,67 +71,25 @@ namespace Interpreter void UpdateFPSCR(UReg_FPSCR fp); void UpdateSSEState(); -void UpdateFPRF(double value) + +// start of unit test - Dolphin needs more of these! +/* +void TestFPRF() { - u64 ivalue = *((u64*)&value); - // 5 bits (C, <, >, =, ?) - // top: class descriptor - FPSCR.FPRF = 4; - // easy cases first - if (ivalue == 0) { - // positive zero - FPSCR.FPRF = 0x2; - } else if (ivalue == 0x8000000000000000ULL) { - // negative zero - FPSCR.FPRF = 0x12; - } else if (ivalue == 0x7FF0000000000000ULL) { - // positive inf - FPSCR.FPRF = 0x5; - } else if (ivalue == 0xFFF0000000000000ULL) { - // negative inf - FPSCR.FPRF = 0x9; - } else { - // OK let's dissect this thing. - int sign = (int)(ivalue >> 63); - int exp = (int)((ivalue >> 52) & 0x7FF); - if (exp >= 1 && exp <= 2046) { - // Nice normalized number. - if (sign) { - FPSCR.FPRF = 0x8; // negative - } else { - FPSCR.FPRF = 0x4; // positive - } - return; - } - u64 mantissa = ivalue & 0x000FFFFFFFFFFFFFULL; - // int mantissa_top = (int)(mantissa >> 51); - if (exp == 0 && mantissa) { - // Denormalized number. - if (sign) { - FPSCR.FPRF = 0x18; - } else { - FPSCR.FPRF = 0x14; - } - } else if (exp == 0x7FF && mantissa /* && mantissa_top*/) { - FPSCR.FPRF = 0x11; // Quiet NAN - return; - } - } -} + UpdateFPRF(1.0); + if (FPSCR.FPRF != 0x4) + PanicAlert("Error 1"); + UpdateFPRF(-1.0); + if (FPSCR.FPRF != 0x8) + PanicAlert("Error 2"); + PanicAlert("Test done"); +}*/ // extremely rare void Helper_UpdateCR1(double _fValue) { - FPSCR.FPRF = 0; - if (_fValue == 0.0 || _fValue == -0.0) - FPSCR.FPRF |= 2; - if (_fValue > 0.0) - FPSCR.FPRF |= 4; - if (_fValue < 0.0) - FPSCR.FPRF |= 8; - SetCRField(1, (FPSCR.Hex & 0x0000F000) >> 12); - + // Should just update exception flags, not do any compares. PanicAlert("CR1"); } @@ -218,7 +176,7 @@ void fcmpu(UGeckoInstruction _inst) // Apply current rounding mode void fctiwx(UGeckoInstruction _inst) { - UpdateSSEState(); + //UpdateSSEState(); const double b = rPS0(_inst.FB); u32 value; if (b > (double)0x7fffffff) @@ -257,7 +215,7 @@ largest representable int on PowerPC. */ // Always round toward zero void fctiwzx(UGeckoInstruction _inst) { - //UpdateFPSCR(FPSCR); + //UpdateSSEState(); const double b = rPS0(_inst.FB); u32 value; if (b > (double)0x7fffffff) @@ -279,7 +237,6 @@ void fctiwzx(UGeckoInstruction _inst) // FPSCR.XX |= FPSCR.FI; // FPSCR.FR = 1; //fabs(d_value) > fabs(b); } - //FPRF undefined riPS0(_inst.FD) = (u64)value; if (_inst.Rc) @@ -305,7 +262,7 @@ void fnabsx(UGeckoInstruction _inst) riPS0(_inst.FD) = riPS0(_inst.FB) | (1ULL << 63); // This is a binary instruction. Does not alter FPSCR if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); -} +} void fnegx(UGeckoInstruction _inst) { @@ -331,11 +288,12 @@ void frspx(UGeckoInstruction _inst) // round to single if (true || FPSCR.RN != 0) { // Not used in Super Monkey Ball - UpdateSSEState(); + // UpdateSSEState(); double b = rPS0(_inst.FB); double rounded = (double)(float)b; - FPSCR.FI = b != rounded; // changing both of these affect Super Monkey Ball behaviour greatly. - FPSCR.FR = 1; // WHY? fabs(rounded) > fabs(b); + //FPSCR.FI = b != rounded; // changing both of these affect Super Monkey Ball behaviour greatly. + if (Core::g_CoreStartupParameter.bEnableFPRF) + UpdateFPRF(rounded); rPS0(_inst.FD) = rPS1(_inst.FD) = rounded; return; // PanicAlert("frspx: FPSCR.RN=%i", FPSCR.RN); @@ -389,8 +347,8 @@ void frspx(UGeckoInstruction _inst) // round to single //PanicAlert("NAN %08x %08x", in.i >> 32, in.i); } } + UpdateFPRF(out.d); - FPSCR.FR = 1; // SUPER MONKEY BALL HACK rPS0(_inst.FD) = rPS1(_inst.FD) = out.d; if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); @@ -416,19 +374,19 @@ void fmulsx(UGeckoInstruction _inst) void fmaddx(UGeckoInstruction _inst) { - rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB); - FPSCR.FI = 0; - FPSCR.FR = 0; + double result = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB); + rPS0(_inst.FD) = result; + UpdateFPRF(result); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } + void fmaddsx(UGeckoInstruction _inst) { double d_value = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB); - rPS0(_inst.FD) = rPS1(_inst.FD) = - static_cast(d_value); + rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast(d_value); FPSCR.FI = d_value != rPS0(_inst.FD); FPSCR.FR = 0; - UpdateFPRF(rPS0(_inst.FD)); + UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } @@ -436,16 +394,11 @@ void fmaddsx(UGeckoInstruction _inst) void faddx(UGeckoInstruction _inst) { rPS0(_inst.FD) = rPS0(_inst.FA) + rPS0(_inst.FB); -// FPSCR.FI = 0; -// FPSCR.FR = 1; if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void faddsx(UGeckoInstruction _inst) { rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast(rPS0(_inst.FA) + rPS0(_inst.FB)); -// FPSCR.FI = 0; -// FPSCR.FR = 1; -// FPSCR.Hex = (rand() ^ (rand() << 8) ^ (rand() << 16)) & ~(0x000000F8); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } @@ -453,8 +406,6 @@ void faddsx(UGeckoInstruction _inst) void fdivx(UGeckoInstruction _inst) { rPS0(_inst.FD) = rPS0(_inst.FA) / rPS0(_inst.FB); -// FPSCR.FI = 0; -// FPSCR.FR = 1; if (fabs(rPS0(_inst.FB)) == 0.0) { FPSCR.ZX = 1; } @@ -463,8 +414,6 @@ void fdivx(UGeckoInstruction _inst) void fdivsx(UGeckoInstruction _inst) { rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast(rPS0(_inst.FA) / rPS0(_inst.FB)); -// FPSCR.FI = 0; -// FPSCR.FR = 1; if (fabs(rPS0(_inst.FB)) == 0.0) { FPSCR.ZX = 1; } @@ -473,8 +422,6 @@ void fdivsx(UGeckoInstruction _inst) void fresx(UGeckoInstruction _inst) { rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast(1.0f / rPS0(_inst.FB)); -// FPSCR.FI = 0; -// FPSCR.FR = 1; if (fabs(rPS0(_inst.FB)) == 0.0) { FPSCR.ZX = 1; } @@ -485,8 +432,6 @@ void fresx(UGeckoInstruction _inst) void fmsubx(UGeckoInstruction _inst) { rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB); -// FPSCR.FI = 0; -// FPSCR.FR = 0; if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } @@ -494,8 +439,6 @@ void fmsubsx(UGeckoInstruction _inst) { rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB)); -// FPSCR.FI = 0; -// FPSCR.FR = 0; if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } @@ -503,16 +446,12 @@ void fmsubsx(UGeckoInstruction _inst) void fnmaddx(UGeckoInstruction _inst) { rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB)); -// FPSCR.FI = 0; -// FPSCR.FR = 0; if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void fnmaddsx(UGeckoInstruction _inst) { rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast(-((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB))); -// FPSCR.FI = 0; -// FPSCR.FR = 0; if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } @@ -520,16 +459,12 @@ void fnmaddsx(UGeckoInstruction _inst) void fnmsubx(UGeckoInstruction _inst) { rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB)); -// FPSCR.FI = 0; -// FPSCR.FR = 0; if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void fnmsubsx(UGeckoInstruction _inst) { rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast(-((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB))); -// FPSCR.FI = 0; -// FPSCR.FR = 0; if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } @@ -537,15 +472,11 @@ void fnmsubsx(UGeckoInstruction _inst) void fsubx(UGeckoInstruction _inst) { rPS0(_inst.FD) = rPS0(_inst.FA) - rPS0(_inst.FB); -// FPSCR.FI = 0; -// FPSCR.FR = 0; if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void fsubsx(UGeckoInstruction _inst) { rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast(rPS0(_inst.FA) - rPS0(_inst.FB)); -// FPSCR.FI = 0; -// FPSCR.FR = 0; if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } @@ -553,17 +484,12 @@ void fsubsx(UGeckoInstruction _inst) void frsqrtex(UGeckoInstruction _inst) { rPS0(_inst.FD) = 1.0f / (sqrt(rPS0(_inst.FB))); -// FPSCR.FI = 0; -// FPSCR.FR = 0; if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void fsqrtx(UGeckoInstruction _inst) { rPS0(_inst.FD) = sqrt(rPS0(_inst.FB)); -// FPSCR.FI = 0; -// FPSCR.FR = 0; - if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index f4f8426fdc..2b5245a6d8 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -130,7 +130,7 @@ void UpdateFPSCR(UReg_FPSCR fp) void mcrfs(UGeckoInstruction _inst) { - u32 fpflags = ((FPSCR.Hex >> (4*(_inst.CRFS))) & 0xF); + u32 fpflags = ((FPSCR.Hex >> (4*(7 - _inst.CRFS))) & 0xF); switch (_inst.CRFS) { case 0: FPSCR.FX = 0; @@ -216,7 +216,7 @@ void mtfsfx(UGeckoInstruction _inst) u32 m = 0; for (int i = 0; i < 8; i++) { //7?? todo check if (fm & (1 << i)) - m |= (0xf << (i*4)); + m |= (0xF << (i*4)); } FPSCR.Hex = (FPSCR.Hex & ~m) | ((u32)(riPS0(_inst.FB)) & m); @@ -238,12 +238,15 @@ void mfcr(UGeckoInstruction _inst) void mtcrf(UGeckoInstruction _inst) { - u32 mask = 0; u32 crm = _inst.CRM; - if (crm == 0xFF) { + if (crm == 0xFF) + { SetCR(m_GPR[_inst.RS]); - } else { + } + else + { //TODO: use lookup table? probably not worth it + u32 mask = 0; for (int i = 0; i < 8; i++) { if (crm & (1 << i)) mask |= 0xF << (i*4); @@ -470,10 +473,8 @@ void crxor(UGeckoInstruction _inst) void mcrf(UGeckoInstruction _inst) { - u32 cr = GetCR(); - u32 crmask = ~(0xF0000000 >> (4*_inst.CRFD)); - u32 flags = ((cr << (4*_inst.CRFS)) & 0xF0000000) >> (4*_inst.CRFD); - SetCR((cr & crmask) | flags); + int cr_f = GetCRField(_inst.CRFS); + SetCRField(_inst.CRFD, cr_f); } void isync(UGeckoInstruction _inst) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp index 78b7407f83..1c47a2eb12 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -25,196 +25,209 @@ #include "Jit.h" #include "JitRegCache.h" - const u64 GC_ALIGNED16(psSignBits2[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL}; - const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL}; - const double GC_ALIGNED16(psOneOne2[2]) = {1.0, 1.0}; +const u64 GC_ALIGNED16(psSignBits2[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL}; +const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL}; +const double GC_ALIGNED16(psOneOne2[2]) = {1.0, 1.0}; - void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg)) +void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg)) +{ + fpr.Lock(d, a, b); + if (d == a) { - fpr.Lock(d, a, b); - if (d == a) - { - fpr.LoadToX64(d, true); - (this->*op)(fpr.RX(d), fpr.R(b)); - } - else if (d == b && reversible) - { - fpr.LoadToX64(d, true); - (this->*op)(fpr.RX(d), fpr.R(a)); - } - else if (a != d && b != d) - { - // Sources different from d, can use rather quick solution - fpr.LoadToX64(d, !dupe); - MOVSD(fpr.RX(d), fpr.R(a)); - (this->*op)(fpr.RX(d), fpr.R(b)); - } - else if (b != d) - { - fpr.LoadToX64(d, !dupe); - MOVSD(XMM0, fpr.R(b)); - MOVSD(fpr.RX(d), fpr.R(a)); - (this->*op)(fpr.RX(d), Gen::R(XMM0)); - } - else // Other combo, must use two temps :( - { - MOVSD(XMM0, fpr.R(a)); - MOVSD(XMM1, fpr.R(b)); - fpr.LoadToX64(d, !dupe); - (this->*op)(XMM0, Gen::R(XMM1)); - MOVSD(fpr.RX(d), Gen::R(XMM0)); - } - if (dupe) { - ForceSinglePrecisionS(fpr.RX(d)); - MOVDDUP(fpr.RX(d), fpr.R(d)); - } - fpr.UnlockAll(); + fpr.LoadToX64(d, true); + (this->*op)(fpr.RX(d), fpr.R(b)); } - - void Jit64::fp_arith_s(UGeckoInstruction inst) + else if (d == b && reversible) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - if (inst.Rc) { - Default(inst); return; - } - bool dupe = inst.OPCD == 59; - switch (inst.SUBOP5) - { - case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::DIVSD); break; //div - case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::SUBSD); break; //sub - case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, dupe, &XEmitter::ADDSD); break; //add - case 23: //sel - Default(inst); - break; - case 24: //res - Default(inst); - break; - case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, dupe, &XEmitter::MULSD); break; //mul - default: - _assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!"); - } + fpr.LoadToX64(d, true); + (this->*op)(fpr.RX(d), fpr.R(a)); } - - void Jit64::fmaddXX(UGeckoInstruction inst) + else if (a != d && b != d) + { + // Sources different from d, can use rather quick solution + fpr.LoadToX64(d, !dupe); + MOVSD(fpr.RX(d), fpr.R(a)); + (this->*op)(fpr.RX(d), fpr.R(b)); + } + else if (b != d) + { + fpr.LoadToX64(d, !dupe); + MOVSD(XMM0, fpr.R(b)); + MOVSD(fpr.RX(d), fpr.R(a)); + (this->*op)(fpr.RX(d), Gen::R(XMM0)); + } + else // Other combo, must use two temps :( { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - if (inst.Rc) { - Default(inst); return; - } - - bool single_precision = inst.OPCD == 59; - - int a = inst.FA; - int b = inst.FB; - int c = inst.FC; - int d = inst.FD; - - fpr.Lock(a, b, c, d); MOVSD(XMM0, fpr.R(a)); - switch (inst.SUBOP5) - { - case 28: //msub - MULSD(XMM0, fpr.R(c)); - SUBSD(XMM0, fpr.R(b)); - break; - case 29: //madd - MULSD(XMM0, fpr.R(c)); - ADDSD(XMM0, fpr.R(b)); - break; - case 30: //nmsub - MULSD(XMM0, fpr.R(c)); - SUBSD(XMM0, fpr.R(b)); - XORPD(XMM0, M((void*)&psSignBits2)); - break; - case 31: //nmadd - MULSD(XMM0, fpr.R(c)); - ADDSD(XMM0, fpr.R(b)); - XORPD(XMM0, M((void*)&psSignBits2)); - break; - } - fpr.LoadToX64(d, false); - //YES it is necessary to dupe the result :( - //TODO : analysis - does the top reg get used? If so, dupe, if not, don't. - if (single_precision) { - ForceSinglePrecisionS(XMM0); - MOVDDUP(fpr.RX(d), R(XMM0)); - } else { - MOVSD(fpr.RX(d), R(XMM0)); - } - fpr.UnlockAll(); + MOVSD(XMM1, fpr.R(b)); + fpr.LoadToX64(d, !dupe); + (this->*op)(XMM0, Gen::R(XMM1)); + MOVSD(fpr.RX(d), Gen::R(XMM0)); } - - void Jit64::fmrx(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - if (inst.Rc) { - Default(inst); return; - } - int d = inst.FD; - int b = inst.FB; - fpr.LoadToX64(d, true); // we don't want to destroy the high bit - MOVSD(fpr.RX(d), fpr.R(b)); + if (dupe) { + ForceSinglePrecisionS(fpr.RX(d)); + MOVDDUP(fpr.RX(d), fpr.R(d)); + } + fpr.UnlockAll(); +} + +void Jit64::fp_arith_s(UGeckoInstruction inst) +{ + if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff) + {Default(inst); return;} // turn off from debugger + INSTRUCTION_START; + if (inst.Rc) { + Default(inst); return; } - void Jit64::fcmpx(UGeckoInstruction inst) + // Only the interpreter has "proper" support for (some) FP flags + if (inst.SUBOP5 == 25 && Core::g_CoreStartupParameter.bEnableFPRF) { + Default(inst); return; + } + + bool dupe = inst.OPCD == 59; + switch (inst.SUBOP5) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - if (jo.fpAccurateFlags) - { - Default(inst); - return; - } - bool ordered = inst.SUBOP10 == 32; - /* - double fa = rPS0(_inst.FA); - double fb = rPS0(_inst.FB); - u32 compareResult; + case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::DIVSD); break; //div + case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::SUBSD); break; //sub + case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, dupe, &XEmitter::ADDSD); break; //add + case 23: //sel + Default(inst); + break; + case 24: //res + Default(inst); + break; + case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, dupe, &XEmitter::MULSD); break; //mul + default: + _assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!"); + } +} - if(IsNAN(fa) || IsNAN(fb)) compareResult = 1; - else if(fa < fb) compareResult = 8; - else if(fa > fb) compareResult = 4; - else compareResult = 2; +void Jit64::fmaddXX(UGeckoInstruction inst) +{ + if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff) + {Default(inst); return;} // turn off from debugger + INSTRUCTION_START; + if (inst.Rc) { + Default(inst); return; + } + // Only the interpreter has "proper" support for (some) FP flags + if (inst.SUBOP5 == 29 && Core::g_CoreStartupParameter.bEnableFPRF) { + Default(inst); return; + } - FPSCR.FPRF = compareResult; - CR = (CR & (~(0xf0000000 >> (_inst.CRFD * 4)))) | (compareResult << ((7 - _inst.CRFD) * 4)); + bool single_precision = inst.OPCD == 59; + + int a = inst.FA; + int b = inst.FB; + int c = inst.FC; + int d = inst.FD; + + fpr.Lock(a, b, c, d); + MOVSD(XMM0, fpr.R(a)); + switch (inst.SUBOP5) + { + case 28: //msub + MULSD(XMM0, fpr.R(c)); + SUBSD(XMM0, fpr.R(b)); + break; + case 29: //madd + MULSD(XMM0, fpr.R(c)); + ADDSD(XMM0, fpr.R(b)); + break; + case 30: //nmsub + MULSD(XMM0, fpr.R(c)); + SUBSD(XMM0, fpr.R(b)); + XORPD(XMM0, M((void*)&psSignBits2)); + break; + case 31: //nmadd + MULSD(XMM0, fpr.R(c)); + ADDSD(XMM0, fpr.R(b)); + XORPD(XMM0, M((void*)&psSignBits2)); + break; + } + fpr.LoadToX64(d, false); + //YES it is necessary to dupe the result :( + //TODO : analysis - does the top reg get used? If so, dupe, if not, don't. + if (single_precision) { + ForceSinglePrecisionS(XMM0); + MOVDDUP(fpr.RX(d), R(XMM0)); + } else { + MOVSD(fpr.RX(d), R(XMM0)); + } + // SMB checks flags after this op. Let's lie. + //AND(32, M(&PowerPC::ppcState.fpscr), Imm32(~((0x80000000 >> 19) | (0x80000000 >> 15)))); + //OR(32, M(&PowerPC::ppcState.fpscr), Imm32((0x80000000 >> 16))); + fpr.UnlockAll(); +} + +void Jit64::fmrx(UGeckoInstruction inst) +{ + if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff) + {Default(inst); return;} // turn off from debugger + INSTRUCTION_START; + if (inst.Rc) { + Default(inst); return; + } + int d = inst.FD; + int b = inst.FB; + fpr.LoadToX64(d, true); // we don't want to destroy the high bit + MOVSD(fpr.RX(d), fpr.R(b)); +} + +void Jit64::fcmpx(UGeckoInstruction inst) +{ + if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff) + {Default(inst); return;} // turn off from debugger + INSTRUCTION_START; + if (jo.fpAccurateFlags) + { + Default(inst); + return; + } + bool ordered = inst.SUBOP10 == 32; +/* +double fa = rPS0(_inst.FA); +double fb = rPS0(_inst.FB); +u32 compareResult; + +if(IsNAN(fa) || IsNAN(fb)) compareResult = 1; +else if(fa < fb) compareResult = 8; +else if(fa > fb) compareResult = 4; +else compareResult = 2; + +FPSCR.FPRF = compareResult; +CR = (CR & (~(0xf0000000 >> (_inst.CRFD * 4)))) | (compareResult << ((7 - _inst.CRFD) * 4)); */ - int a = inst.FA; - int b = inst.FB; - int crf = inst.CRFD; - int shift = crf * 4; - //FPSCR - //XOR(32,R(EAX),R(EAX)); + int a = inst.FA; + int b = inst.FB; + int crf = inst.CRFD; + int shift = crf * 4; + //FPSCR + //XOR(32,R(EAX),R(EAX)); - fpr.Lock(a,b); - if (a != b) - fpr.LoadToX64(a, true); + fpr.Lock(a,b); + if (a != b) + fpr.LoadToX64(a, true); - // USES_CR - if (ordered) - COMISD(fpr.R(a).GetSimpleReg(), fpr.R(b)); - else - UCOMISD(fpr.R(a).GetSimpleReg(), fpr.R(b)); - FixupBranch pLesser = J_CC(CC_B); - FixupBranch pGreater = J_CC(CC_A); - // _x86Reg == 0 - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); - FixupBranch continue1 = J(); - // _x86Reg > 0 - SetJumpTarget(pGreater); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); - FixupBranch continue2 = J(); - // _x86Reg < 0 - SetJumpTarget(pLesser); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); - SetJumpTarget(continue1); - SetJumpTarget(continue2); - fpr.UnlockAll(); - } + // USES_CR + if (ordered) + COMISD(fpr.R(a).GetSimpleReg(), fpr.R(b)); + else + UCOMISD(fpr.R(a).GetSimpleReg(), fpr.R(b)); + FixupBranch pLesser = J_CC(CC_B); + FixupBranch pGreater = J_CC(CC_A); + // _x86Reg == 0 + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); + FixupBranch continue1 = J(); + // _x86Reg > 0 + SetJumpTarget(pGreater); + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); + FixupBranch continue2 = J(); + // _x86Reg < 0 + SetJumpTarget(pLesser); + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); + SetJumpTarget(continue1); + SetJumpTarget(continue2); + fpr.UnlockAll(); +} diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp index 65cd34deb4..12efc22d1e 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp @@ -1878,7 +1878,7 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile, bool Mak case StorePaired: { regSpill(RI, EAX); regSpill(RI, EDX); - unsigned quantreg = *I >> 24; + u32 quantreg = *I >> 24; Jit->MOVZX(32, 16, EAX, M(&PowerPC::ppcState.spr[SPR_GQR0 + quantreg])); Jit->MOVZX(32, 8, EDX, R(AL)); // FIXME: Fix ModR/M encoding to allow [EDX*4+disp32]! diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStorePaired.cpp index 102fe399a5..74c4745ed9 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStorePaired.cpp @@ -39,8 +39,6 @@ //#define INSTRUCTION_START Default(inst); return; #define INSTRUCTION_START -// The big problem is likely instructions that set the quantizers in the same block. -// We will have to break block after quantizers are written to. void Jit64::psq_st(UGeckoInstruction inst) { INSTRUCTION_START diff --git a/Source/Core/Core/Src/PowerPC/PPCTables.cpp b/Source/Core/Core/Src/PowerPC/PPCTables.cpp index cf9d4f9db1..547eacf9c7 100644 --- a/Source/Core/Core/Src/PowerPC/PPCTables.cpp +++ b/Source/Core/Core/Src/PowerPC/PPCTables.cpp @@ -687,6 +687,7 @@ void InitTables() } #define OPLOG +#define OP_TO_LOG "mcrfs" #ifdef OPLOG namespace { @@ -700,7 +701,7 @@ void CompileInstruction(UGeckoInstruction _inst) GekkoOPInfo *info = GetOpInfo(_inst); if (info) { #ifdef OPLOG - if (!strcmp(info->opname, "mffsx")) { ///"mcrfs" + if (!strcmp(info->opname, OP_TO_LOG)){ ///"mcrfs" rsplocations.push_back(jit.js.compilerPC); } #endif @@ -764,9 +765,9 @@ void LogCompiledInstructions() } fclose(f); #ifdef OPLOG - f = fopen(StringFromFormat(FULL_LOGS_DIR "mcrfs_at.txt", time).c_str(), "w"); + f = fopen(StringFromFormat(FULL_LOGS_DIR OP_TO_LOG "_at.txt", time).c_str(), "w"); for (size_t i = 0; i < rsplocations.size(); i++) { - fprintf(f, "mffsx: %08x\n", rsplocations[i]); + fprintf(f, OP_TO_LOG ": %08x\n", rsplocations[i]); } fclose(f); #endif diff --git a/Source/Core/Core/Src/PowerPC/PowerPC.cpp b/Source/Core/Core/Src/PowerPC/PowerPC.cpp index 2e55b7f007..80d1a4592e 100644 --- a/Source/Core/Core/Src/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/Src/PowerPC/PowerPC.cpp @@ -43,20 +43,22 @@ static CoreMode mode; void CompactCR() { - ppcState.cr = 0; - for (int i = 0; i < 8; i++) { - ppcState.cr |= ppcState.cr_fast[i] << (28 - i * 4); + u32 new_cr = ppcState.cr_fast[0] << 28; + for (int i = 1; i < 8; i++) + { + new_cr |= ppcState.cr_fast[i] << (28 - i * 4); } + ppcState.cr = new_cr; } void ExpandCR() { - for (int i = 0; i < 8; i++) { + for (int i = 0; i < 8; i++) + { ppcState.cr_fast[i] = (ppcState.cr >> (28 - i * 4)) & 0xF; } } - void DoState(PointerWrap &p) { p.Do(ppcState); @@ -304,7 +306,7 @@ void CheckExceptions() ppcState.Exceptions &= ~EXCEPTION_ALIGNMENT; } - // EXTERNAL INTTERUPT + // EXTERNAL INTERRUPT else if (MSR & 0x0008000) //hacky...the exception shouldn't be generated if EE isn't set... { if (ppcState.Exceptions & EXCEPTION_EXTERNAL_INT) @@ -353,4 +355,78 @@ void OnIdleIL() CoreTiming::Idle(); } -} // namespace \ No newline at end of file +int PPCFPClass(double dvalue) +{ +#ifdef _WIN32 + switch (_fpclass(dvalue)) + { + case _FPCLASS_SNAN: + case _FPCLASS_QNAN: return 0x11; + case _FPCLASS_NINF: return 0x9; + case _FPCLASS_NN: return 0x8; + case _FPCLASS_ND: return 0x18; + case _FPCLASS_NZ: return 0x12; + case _FPCLASS_PZ: return 0x2; + case _FPCLASS_PD: return 0x14; + case _FPCLASS_PN: return 0x4; + case _FPCLASS_PINF: return 0x5; + default: return 0x4; + } +#else + // TODO: Make sure the below is equivalent to the above - then switch win32 implementation to it. + union { + double d; + u64 i; + } value; + value.d = dvalue; + // 5 bits (C, <, >, =, ?) + // top: class descriptor + FPSCR.FPRF = 4; + // easy cases first + if (value.i == 0) { + // positive zero + FPSCR.FPRF = 0x2; + } else if (value.i == 0x8000000000000000ULL) { + // negative zero + FPSCR.FPRF = 0x12; + } else if (value.i == 0x7FF0000000000000ULL) { + // positive inf + FPSCR.FPRF = 0x5; + } else if (value.i == 0xFFF0000000000000ULL) { + // negative inf + FPSCR.FPRF = 0x9; + } else { + // OK let's dissect this thing. + int sign = (int)(value.i & 0x8000000000000000ULL) ? 1 : 0; + int exp = (int)((value.i >> 52) & 0x7FF); + if (exp >= 1 && exp <= 2046) { + // Nice normalized number. + if (sign) { + FPSCR.FPRF = 0x8; // negative + } else { + FPSCR.FPRF = 0x4; // positive + } + return; + } + u64 mantissa = value.i & 0x000FFFFFFFFFFFFFULL; + if (exp == 0 && mantissa) { + // Denormalized number. + if (sign) { + FPSCR.FPRF = 0x18; + } else { + FPSCR.FPRF = 0x14; + } + } else if (exp == 0x7FF && mantissa /* && mantissa_top*/) { + FPSCR.FPRF = 0x11; // Quiet NAN + return; + } + } +#endif +} + +} // namespace + +void UpdateFPRF(double dvalue) +{ + FPSCR.FPRF = PowerPC::PPCFPClass(dvalue); +} \ No newline at end of file diff --git a/Source/Core/Core/Src/PowerPC/PowerPC.h b/Source/Core/Core/Src/PowerPC/PowerPC.h index c025660903..1540c72aa4 100644 --- a/Source/Core/Core/Src/PowerPC/PowerPC.h +++ b/Source/Core/Core/Src/PowerPC/PowerPC.h @@ -127,9 +127,8 @@ void OnIdleIL(); } // namespace -// Wrappers to make it easier to in the future completely replace the storage of CR and Carry bits -// to something more x86-friendly. These are not used 100% consistently yet - and if we do this, we -// need the corresponding stuff on the JIT side too. +// Fast CR system - store them in single bytes instead of nibbles to not have to +// mask/shift them out. // These are intended to stay fast, probably become faster, and are not likely to slow down much if at all. inline void SetCRField(int cr_field, int value) { @@ -187,4 +186,6 @@ inline void SetXER_SO(int value) { ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).SO = value; } +void UpdateFPRF(double dvalue); + #endif diff --git a/Source/Core/DebuggerWX/Src/RegisterView.cpp b/Source/Core/DebuggerWX/Src/RegisterView.cpp index 2444e98b96..3a4bd1124a 100644 --- a/Source/Core/DebuggerWX/Src/RegisterView.cpp +++ b/Source/Core/DebuggerWX/Src/RegisterView.cpp @@ -18,6 +18,7 @@ #include "Debugger.h" #include "RegisterView.h" #include "PowerPC/PowerPC.h" +#include "HW/PeripheralInterface.h" // F-zero 80005e60 wtf?? @@ -25,7 +26,7 @@ extern const char* GetGPRName(unsigned int index); extern const char* GetFPRName(unsigned int index); static const char *special_reg_names[] = { - "PC", "LR", "CTR", "CR", "FPSCR", "SRR0", "SRR1", + "PC", "LR", "CTR", "CR", "FPSCR", "SRR0", "SRR1", "Exceptions", "Int Mask", "Int Cause", }; static u32 GetSpecialRegValue(int reg) { @@ -37,7 +38,10 @@ static u32 GetSpecialRegValue(int reg) { case 4: return PowerPC::ppcState.fpscr; case 5: return PowerPC::ppcState.spr[SPR_SRR0]; case 6: return PowerPC::ppcState.spr[SPR_SRR1]; - default: return 0; + case 7: return PowerPC::ppcState.Exceptions; + case 8: return CPeripheralInterface::GetMask(); + case 9: return CPeripheralInterface::GetCause(); + default: return 0; } } diff --git a/Source/Core/DebuggerWX/Src/RegisterView.h b/Source/Core/DebuggerWX/Src/RegisterView.h index 5807af6a7b..d023ff7198 100644 --- a/Source/Core/DebuggerWX/Src/RegisterView.h +++ b/Source/Core/DebuggerWX/Src/RegisterView.h @@ -29,14 +29,16 @@ // PC (specials) // LR // CTR -// CR0 +// CR0-7 +// FPSCR // SRR0 // SRR1 +// Exceptions class CRegTable : public wxGridTableBase { enum { - NUM_SPECIALS = 7, + NUM_SPECIALS = 10, }; public: diff --git a/Source/Core/DolphinWX/Src/BootManager.cpp b/Source/Core/DolphinWX/Src/BootManager.cpp index f3f692e585..2cf50f2830 100644 --- a/Source/Core/DolphinWX/Src/BootManager.cpp +++ b/Source/Core/DolphinWX/Src/BootManager.cpp @@ -131,6 +131,7 @@ bool BootCore(const std::string& _rFilename) ini->Get("Core", "UseDualCore", &StartUp.bUseDualCore, StartUp.bUseDualCore); ini->Get("Core", "SkipIdle", &StartUp.bSkipIdle, StartUp.bSkipIdle); ini->Get("Core", "OptimizeQuantizers", &StartUp.bOptimizeQuantizers, StartUp.bOptimizeQuantizers); + ini->Get("Core", "EnableFPRF", &StartUp.bEnableFPRF, StartUp.bEnableFPRF); ini->Get("Core", "TLBHack", &StartUp.iTLBHack, StartUp.iTLBHack); // ------------------------------------------------