diff --git a/Source/Core/Common/Src/x64Emitter.cpp b/Source/Core/Common/Src/x64Emitter.cpp index bd2042a7ce..403d854207 100644 --- a/Source/Core/Common/Src/x64Emitter.cpp +++ b/Source/Core/Common/Src/x64Emitter.cpp @@ -669,6 +669,10 @@ namespace Gen void MOVSX(int dbits, int sbits, X64Reg dest, OpArg src) { if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "MOVSX - Imm argument"); + if (dbits == sbits) { + MOV(dbits, R(dest), src); + return; + } src.operandReg = (u8)dest; if (dbits == 16) Write8(0x66); src.WriteRex(dbits == 64); @@ -696,6 +700,10 @@ namespace Gen void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src) { if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "MOVZX - Imm argument"); + if (dbits == sbits) { + MOV(dbits, R(dest), src); + return; + } src.operandReg = (u8)dest; if (dbits == 16) Write8(0x66); src.WriteRex(dbits == 64); diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Integer.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Integer.cpp index 75b76015dc..66ce7b5ec9 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Integer.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Integer.cpp @@ -346,7 +346,7 @@ void CInterpreter::srawx(UGeckoInstruction _inst) } else { - int amount = rb&0x1f; + int amount = rb & 0x1f; if (amount == 0) { m_GPR[_inst.RA] = m_GPR[_inst.RS]; diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp index b02eb17e59..9d893c1fbe 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -115,13 +115,13 @@ void CInterpreter::lfsx(UGeckoInstruction _inst) void CInterpreter::lha(UGeckoInstruction _inst) { - m_GPR[_inst.RD] = (u32)(s16)Memory::Read_U16(Helper_Get_EA(_inst)); + m_GPR[_inst.RD] = (u32)(s32)(s16)Memory::Read_U16(Helper_Get_EA(_inst)); } void CInterpreter::lhau(UGeckoInstruction _inst) { u32 uAddress = Helper_Get_EA_U(_inst); - m_GPR[_inst.RD] = (u32)(s16)Memory::Read_U16(uAddress); + m_GPR[_inst.RD] = (u32)(s32)(s16)Memory::Read_U16(uAddress); m_GPR[_inst.RA] = uAddress; } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index fbb9b20550..45bcee2529 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -188,6 +188,7 @@ detect immediates in stb stw sth TODO lha +srawx addic_rc addex subfcx diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index 15c791ac67..11620c4b12 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -86,8 +86,11 @@ namespace Jit64 void FlushRegCaches(); - void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset); - + void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false); + void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0); + void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false); + void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset); + void addx(UGeckoInstruction inst); void orx(UGeckoInstruction inst); void andx(UGeckoInstruction inst); @@ -144,6 +147,7 @@ namespace Jit64 void fmaddXX(UGeckoInstruction inst); void stX(UGeckoInstruction inst); //stw sth stb void lXz(UGeckoInstruction inst); + void lha(UGeckoInstruction inst); void rlwinmx(UGeckoInstruction inst); void rlwimix(UGeckoInstruction inst); void rlwnmx(UGeckoInstruction inst); @@ -153,6 +157,7 @@ namespace Jit64 void dcbz(UGeckoInstruction inst); void lfsx(UGeckoInstruction inst); void subfic(UGeckoInstruction inst); + void subfcx(UGeckoInstruction inst); void subfx(UGeckoInstruction inst); void lbzx(UGeckoInstruction inst); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp index ab3ad5640d..ae09fc056b 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp @@ -29,45 +29,56 @@ namespace Jit64 { + // Assumes that the flags were just set through an addition. + void GenerateCarry(X64Reg temp_reg) { + SETcc(CC_C, R(temp_reg)); + AND(32, M(&XER), Imm32(~(1 << 29))); + SHL(32, R(temp_reg), Imm8(29)); + OR(32, M(&XER), R(temp_reg)); + } + typedef u32 (*Operation)(u32 a, u32 b); u32 Add(u32 a, u32 b) {return a + b;} u32 Or (u32 a, u32 b) {return a | b;} u32 And(u32 a, u32 b) {return a & b;} u32 Xor(u32 a, u32 b) {return a ^ b;} - void regimmop(int d, int a, bool binary, u32 value, Operation doop, void(*op)(int, const OpArg&, const OpArg&), bool Rc = false) + void regimmop(int d, int a, bool binary, u32 value, Operation doop, void(*op)(int, const OpArg&, const OpArg&), bool Rc = false, bool carry = false) { gpr.Lock(d,a); - if (a || binary) + if (a || binary || carry) // yeh nasty special case addic { if (a == d) { - if (gpr.R(d).IsImm()) + if (gpr.R(d).IsImm() && !carry) { gpr.SetImmediate32(d, doop((u32)gpr.R(d).offset, value)); } else { if (gpr.R(d).IsImm()) - gpr.LoadToX64(d,false); - + gpr.LoadToX64(d, false); op(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; + if (carry) + GenerateCarry(EAX); } } else { - gpr.LoadToX64(d,false); + gpr.LoadToX64(d, false); MOV(32, gpr.R(d), gpr.R(a)); op(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; + if (carry) + GenerateCarry(EAX); } } - else if (doop == Add) + else if (doop == Add && !carry) { gpr.SetImmediate32(d, value); } else { - _assert_msg_(DYNA_REC, 0, "WTF"); + _assert_msg_(DYNA_REC, 0, "WTF regimmop"); } if (Rc) { @@ -95,8 +106,8 @@ namespace Jit64 case 29: regimmop(a, s, true, inst.UIMM << 16, And, AND, true); break; case 26: regimmop(a, s, true, inst.UIMM, Xor, XOR, false); break; //xori case 27: regimmop(a, s, true, inst.UIMM << 16, Xor, XOR, false); break; //xoris - case 12: //addic - case 13: //addic_rc + case 12: //regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, ADD, false, true); //addic + case 13: //regimmop(d, a, true, (u32)(s32)inst.SIMM_16, Add, ADD, true, true); //addic_rc default: Default(inst); break; @@ -314,20 +325,32 @@ namespace Jit64 else gpr.LoadToX64(a, true, true); int imm = inst.SIMM_16; - // XOR(32, R(ECX), R(ECX)); MOV(32, R(EAX), gpr.R(a)); NOT(32, R(EAX)); ADD(32, R(EAX), Imm32(imm+1)); MOV(32, gpr.R(d), R(EAX)); - SETcc(CC_C, R(ECX)); - AND(32, M(&XER), Imm32(~(1 << 29))); - SHL(32, R(ECX), Imm8(29)); - OR(32, M(&XER), R(ECX)); + GenerateCarry(ECX); gpr.UnlockAll(); gpr.UnlockAllX(); // This instruction has no RC flag } + void subfcx(UGeckoInstruction inst) + { + INSTRUCTION_START; + Default(inst); + return; + /* + u32 a = m_GPR[_inst.RA]; + u32 b = m_GPR[_inst.RB]; + m_GPR[_inst.RD] = b - a; + SetCarry(a == 0 || Helper_Carry(b, 0-a)); + + if (_inst.OE) PanicAlert("OE: subfcx"); + if (_inst.Rc) Helper_UpdateCR0(m_GPR[_inst.RD]); + */ + } + void subfx(UGeckoInstruction inst) { INSTRUCTION_START; @@ -447,9 +470,6 @@ namespace Jit64 } } - // __________________________________________________________________________________________________ - // Helper_Mask - // u32 Helper_Mask(u8 mb, u8 me) { return (((mb > me) ? @@ -459,7 +479,6 @@ namespace Jit64 ); } - void addx(UGeckoInstruction inst) { INSTRUCTION_START; @@ -527,10 +546,7 @@ namespace Jit64 MOV(32, R(EAX), gpr.R(a)); ADC(32, R(EAX), gpr.R(b)); MOV(32, gpr.R(d), R(EAX)); - SETcc(CC_C, R(ECX)); // store away the resulting carry flag - AND(32, M(&XER), Imm32(~(1 << 29))); - SHL(32, R(ECX), Imm8(29)); - OR(32, M(&XER), R(ECX)); + GenerateCarry(ECX); gpr.UnlockAll(); gpr.UnlockAllX(); if (inst.Rc) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp index d5681638ca..fa792f54b6 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp @@ -49,27 +49,35 @@ namespace Jit64 static u64 GC_ALIGNED16(temp64); static u32 GC_ALIGNED16(temp32); - void SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset) + void UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend) { - if (offset) - ADD(32, R(reg), Imm32((u32)offset)); - TEST(32, R(reg), Imm32(0x0C000000)); - FixupBranch argh = J_CC(CC_NZ); - if (accessSize != 32) - XOR(32, R(EAX), R(EAX)); #ifdef _M_IX86 - AND(32, R(reg), Imm32(Memory::MEMVIEW32_MASK)); - MOV(accessSize, R(EAX), MDisp(reg, (u32)Memory::base)); + AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK)); + MOVZX(32, accessSize, reg_value, MDisp(reg_addr, (u32)Memory::base + offset)); #else - MOV(accessSize, R(EAX), MComplex(RBX, reg, SCALE_1, 0)); + MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset)); #endif if (accessSize == 32) + { BSWAP(32, EAX); + } else if (accessSize == 16) { BSWAP(32, EAX); SHR(32, R(EAX), Imm8(16)); } + if (signExtend && accessSize < 32) { + MOVSX(32, accessSize, EAX, R(EAX)); + } + } + + void SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signExtend) + { + if (offset) + ADD(32, R(reg), Imm32((u32)offset)); + TEST(32, R(reg), Imm32(0x0C000000)); + FixupBranch argh = J_CC(CC_NZ); + UnsafeLoadRegToReg(reg, EAX, accessSize, 0, signExtend); FixupBranch arg2 = J(); SetJumpTarget(argh); switch (accessSize) @@ -81,6 +89,34 @@ namespace Jit64 SetJumpTarget(arg2); } + void UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset) + { + if (accessSize != 32) { + PanicAlert("UnsafeWriteRegToReg can't handle %i byte accesses", accessSize); + } + BSWAP(32, reg_value); +#ifdef _M_IX86 + AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK)); + MOV(accessSize, MDisp(reg_addr, (u32)Memory::base), R(reg_value)); +#else + MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, 0), R(reg_value)); +#endif + } + + // Destroys both arg registers + void SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset) + { + if (offset) + ADD(32, R(reg_addr), Imm32(offset)); + TEST(32, R(reg_addr), Imm32(0x0C000000)); + FixupBranch argh = J_CC(CC_NZ); + UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0); + FixupBranch arg2 = J(); + SetJumpTarget(argh); + ABI_CallFunctionRR((void *)&Memory::Write_U32, ABI_PARAM1, ABI_PARAM2); + SetJumpTarget(arg2); + } + void lbzx(UGeckoInstruction inst) { INSTRUCTION_START; @@ -146,6 +182,7 @@ namespace Jit64 #endif // Safe and boring gpr.Flush(FLUSH_VOLATILE); + fpr.Flush(FLUSH_VOLATILE); gpr.Lock(d, a); MOV(32, R(ABI_PARAM1), gpr.R(a)); SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset); @@ -173,6 +210,24 @@ namespace Jit64 gpr.UnlockAll(); } + void lha(UGeckoInstruction inst) + { + INSTRUCTION_START; + int d = inst.RD; + int a = inst.RA; + s32 offset = (s32)(s16)inst.SIMM_16; + // Safe and boring + gpr.Flush(FLUSH_VOLATILE); + fpr.Flush(FLUSH_VOLATILE); + gpr.Lock(d, a); + MOV(32, R(ABI_PARAM1), gpr.R(a)); + SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true); + gpr.LoadToX64(d, false, true); + MOV(32, gpr.R(d), R(EAX)); + gpr.UnlockAll(); + return; + } + // Zero cache line. void dcbz(UGeckoInstruction inst) { diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index f49f395b2b..8277a7b4f7 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -70,17 +70,11 @@ void lfs(UGeckoInstruction inst) gpr.Lock(d, a); MOV(32, R(ABI_PARAM1), gpr.R(a)); -#ifdef _M_X64 if (!jo.noAssumeFPLoadFromMem) { - MOV(32, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset)); -//#else -// MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::GetMainRAMPtr() + (u32)offset)); -//#endif - BSWAP(32, EAX); + UnsafeLoadRegToReg(ABI_PARAM1, EAX, 32, offset, false); } else -#endif { SafeLoadRegToEAX(ABI_PARAM1, 32, offset); } @@ -145,7 +139,6 @@ void stfd(UGeckoInstruction inst) void stfs(UGeckoInstruction inst) { INSTRUCTION_START; - DISABLE_32BIT; bool update = inst.OPCD & 1; int s = inst.RS; int a = inst.RA; @@ -156,9 +149,8 @@ void stfs(UGeckoInstruction inst) gpr.Flush(FLUSH_VOLATILE); gpr.Lock(a); fpr.Lock(s); + gpr.LockX(ABI_PARAM1, ABI_PARAM2); MOV(32, R(ABI_PARAM2), gpr.R(a)); - if (offset) - ADD(32, R(ABI_PARAM2), Imm32((u32)offset)); if (update && offset) { MOV(32, gpr.R(a), R(ABI_PARAM2)); @@ -167,15 +159,9 @@ void stfs(UGeckoInstruction inst) MOVSS(M(&temp32), XMM0); MOV(32, R(ABI_PARAM1), M(&temp32)); - TEST(32, R(ABI_PARAM2), Imm32(0x0C000000)); - FixupBranch argh = J_CC(CC_NZ); - BSWAP(32, ABI_PARAM1); - MOV(32, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1)); - FixupBranch arg2 = J(); - SetJumpTarget(argh); - CALL((void *)&Memory::Write_U32); - SetJumpTarget(arg2); + SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, offset); gpr.UnlockAll(); + gpr.UnlockAllX(); fpr.UnlockAll(); } else @@ -187,14 +173,12 @@ void stfs(UGeckoInstruction inst) void lfsx(UGeckoInstruction inst) { INSTRUCTION_START; - DISABLE_32BIT; fpr.Lock(inst.RS); fpr.LoadToX64(inst.RS, false, true); MOV(32, R(EAX), gpr.R(inst.RB)); if (inst.RA) ADD(32, R(EAX), gpr.R(inst.RA)); - MOV(32, R(EAX), MComplex(RBX, EAX, SCALE_1, 0)); - BSWAP(32, EAX); + UnsafeLoadRegToReg(EAX, EAX, 32, false); MOV(32, M(&temp32), R(EAX)); CVTSS2SD(XMM0, M(&temp32)); MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0)); diff --git a/Source/Core/Core/Src/PowerPC/PPCTables.cpp b/Source/Core/Core/Src/PowerPC/PPCTables.cpp index a16fc46bf0..2f8bad26de 100644 --- a/Source/Core/Core/Src/PowerPC/PPCTables.cpp +++ b/Source/Core/Core/Src/PowerPC/PPCTables.cpp @@ -144,8 +144,8 @@ GekkoOPTemplate primarytable[] = {8, CInterpreter::subfic, Jit64::subfic, {"subfic", OPTYPE_INTEGER, FL_SET_CA}}, {10, CInterpreter::cmpli, Jit64::cmpli, {"cmpli", OPTYPE_INTEGER, FL_SET_CRn}}, {11, CInterpreter::cmpi, Jit64::cmpi, {"cmpi", OPTYPE_INTEGER, FL_SET_CRn}}, - {12, CInterpreter::addic, Jit64::Default, {"addic", OPTYPE_INTEGER, FL_SET_CA}}, - {13, CInterpreter::addic_rc, Jit64::Default, {"addic_rc", OPTYPE_INTEGER, FL_SET_CR0}}, + {12, CInterpreter::addic, Jit64::reg_imm, {"addic", OPTYPE_INTEGER, FL_SET_CA}}, + {13, CInterpreter::addic_rc, Jit64::reg_imm, {"addic_rc", OPTYPE_INTEGER, FL_SET_CR0}}, {14, CInterpreter::addi, Jit64::reg_imm, {"addi", OPTYPE_INTEGER, 0}}, {15, CInterpreter::addis, Jit64::reg_imm, {"addis", OPTYPE_INTEGER, 0}}, @@ -166,7 +166,7 @@ GekkoOPTemplate primarytable[] = {35, CInterpreter::lbzu, Jit64::Default, {"lbzu", OPTYPE_LOAD, 0}}, {40, CInterpreter::lhz, Jit64::lXz, {"lhz", OPTYPE_LOAD, 0}}, {41, CInterpreter::lhzu, Jit64::Default, {"lhzu", OPTYPE_LOAD, 0}}, - {42, CInterpreter::lha, Jit64::Default, {"lha", OPTYPE_LOAD, 0}}, + {42, CInterpreter::lha, Jit64::lha, {"lha", OPTYPE_LOAD, 0}}, {43, CInterpreter::lhau, Jit64::Default, {"lhau", OPTYPE_LOAD, 0}}, {48, CInterpreter::lfs, Jit64::lfs, {"lfs", OPTYPE_LOADFP, 0}}, @@ -398,7 +398,7 @@ GekkoOPTemplate table31_2[] = {235, CInterpreter::mullwx, Jit64::mullwx, {"mullwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_IN_B | FL_RC_BIT, 4}}, {104, CInterpreter::negx, Jit64::negx, {"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_IN_B | FL_RC_BIT}}, {40, CInterpreter::subfx, Jit64::subfx, {"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_IN_B | FL_RC_BIT}}, - {8, CInterpreter::subfcx, Jit64::Default, {"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_IN_B | FL_SET_CA | FL_RC_BIT}}, + {8, CInterpreter::subfcx, Jit64::subfcx, {"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_IN_B | FL_SET_CA | FL_RC_BIT}}, {136, CInterpreter::subfex, Jit64::Default, {"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_IN_B | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {232, CInterpreter::subfmex, Jit64::Default, {"subfmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_IN_B | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {200, CInterpreter::subfzex, Jit64::Default, {"subfzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_IN_B | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},