From cc2fa4a00393d46a1917371854895e945cbcbd85 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Tue, 3 Sep 2013 04:11:25 +0000 Subject: [PATCH] [ARM] Merge all store instructions in to one. This disables fastmem on stores currently. Hit isn't noticeable since I've also implemented 5 more store instructions with this. --- Source/Core/Core/Src/PowerPC/JitArm32/Jit.h | 13 +- .../Src/PowerPC/JitArm32/JitArm_BackPatch.cpp | 21 +- .../Src/PowerPC/JitArm32/JitArm_LoadStore.cpp | 440 ++++++------------ .../Src/PowerPC/JitArm32/JitArm_Tables.cpp | 24 +- 4 files changed, 170 insertions(+), 328 deletions(-) diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h index 9481502fa8..55215f4cdb 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h +++ b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h @@ -124,9 +124,11 @@ public: void FinalizeCarry(ARMReg reg); // TODO: This shouldn't be here - void StoreFromReg(ARMReg dest, ARMReg value, int accessSize, s32 offset); + void UnsafeStoreFromReg(ARMReg dest, ARMReg value, int accessSize, s32 offset); + void SafeStoreFromReg(bool fastmem, s32 dest, u32 value, s32 offsetReg, int accessSize, s32 offset); void LoadToReg(ARMReg dest, ARMReg addr, int accessSize, s32 offset); + // OPCODES void unknown_instruction(UGeckoInstruction _inst); void Default(UGeckoInstruction _inst); @@ -173,6 +175,8 @@ public: void mftb(UGeckoInstruction _inst); // LoadStore + void stX(UGeckoInstruction _inst); + void icbi(UGeckoInstruction _inst); void dcbst(UGeckoInstruction _inst); void lbz(UGeckoInstruction _inst); @@ -180,13 +184,6 @@ public: void lha(UGeckoInstruction _inst); void lwz(UGeckoInstruction _inst); void lwzx(UGeckoInstruction _inst); - void stb(UGeckoInstruction _inst); - void stbu(UGeckoInstruction _inst); - void sth(UGeckoInstruction _inst); - void sthu(UGeckoInstruction _inst); - void stw(UGeckoInstruction _inst); - void stwu(UGeckoInstruction _inst); - void stwx(UGeckoInstruction _inst); // Floating point void fabsx(UGeckoInstruction _inst); diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_BackPatch.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_BackPatch.cpp index 45b39ecd5b..a8ea4d929a 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_BackPatch.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_BackPatch.cpp @@ -24,25 +24,6 @@ #include "../JitCommon/JitBackpatch.h" #include "StringUtil.h" -#ifdef _M_X64 -static void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress) { - u64 code_addr = (u64)codePtr; - disassembler disasm; - char disbuf[256]; - memset(disbuf, 0, 256); -#ifdef _M_IX86 - disasm.disasm32(0, code_addr, codePtr, disbuf); -#else - disasm.disasm64(0, code_addr, codePtr, disbuf); -#endif - PanicAlert("%s\n\n" - "Error encountered accessing emulated address %08x.\n" - "Culprit instruction: \n%s\nat %#llx", - text.c_str(), emAddress, disbuf, code_addr); - return; -} -#endif - // This generates some fairly heavy trampolines, but: // 1) It's really necessary. We don't know anything about the context. // 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be @@ -96,7 +77,7 @@ bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Store) } return true; } -const u8 *JitArm::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) +const u8 *JitArm::BackPatch(u8 *codePtr, u32, void *ctx_void) { // TODO: This ctx needs to be filled with our information CONTEXT *ctx = (CONTEXT *)ctx_void; diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp index 505b7a2044..5691727690 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp @@ -31,294 +31,9 @@ #include "JitRegCache.h" #include "JitAsm.h" -#ifdef ANDROID -#define FASTMEM 1 -#else -#define FASTMEM 1 -#endif -void JitArm::stb(UGeckoInstruction inst) +void JitArm::UnsafeStoreFromReg(ARMReg dest, ARMReg value, int accessSize, s32 offset) { - INSTRUCTION_START - JITDISABLE(LoadStore) - - ARMReg RS = gpr.R(inst.RS); -#if 0 // FASTMEM - // R10 contains the dest address - ARMReg Value = R11; - ARMReg RA; - if (inst.RA) - RA = gpr.R(inst.RA); - MOV(Value, RS); - if (inst.RA) - { - MOVI2R(R10, inst.SIMM_16, false); - ADD(R10, R10, RA); - } - else - { - MOVI2R(R10, (u32)inst.SIMM_16, false); - NOP(1); - } - StoreFromReg(R10, Value, 16, 0); -#else - ARMReg ValueReg = gpr.GetReg(); - ARMReg Addr = gpr.GetReg(); - ARMReg Function = gpr.GetReg(); - - MOV(ValueReg, RS); - if (inst.RA) - { - MOVI2R(Addr, inst.SIMM_16); - ARMReg RA = gpr.R(inst.RA); - ADD(Addr, Addr, RA); - } - else - MOVI2R(Addr, (u32)inst.SIMM_16); - - MOVI2R(Function, (u32)&Memory::Write_U8); - PUSH(4, R0, R1, R2, R3); - MOV(R0, ValueReg); - MOV(R1, Addr); - BL(Function); - POP(4, R0, R1, R2, R3); - gpr.Unlock(ValueReg, Addr, Function); -#endif -} - -void JitArm::stbu(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(LoadStore) - ARMReg RA = gpr.R(inst.RA); - ARMReg RS = gpr.R(inst.RS); - ARMReg ValueReg = gpr.GetReg(); - ARMReg Addr = gpr.GetReg(); - ARMReg Function = gpr.GetReg(); - - MOVI2R(Addr, inst.SIMM_16); - ADD(Addr, Addr, RA); - - // Check for DSI exception prior to writing back address - LDR(Function, R9, PPCSTATE_OFF(Exceptions)); - CMP(Function, EXCEPTION_DSI); - FixupBranch DoNotWrite = B_CC(CC_EQ); - MOV(RA, Addr); - SetJumpTarget(DoNotWrite); - - MOV(ValueReg, RS); - - MOVI2R(Function, (u32)&Memory::Write_U8); - PUSH(4, R0, R1, R2, R3); - MOV(R0, ValueReg); - MOV(R1, Addr); - BL(Function); - POP(4, R0, R1, R2, R3); - - gpr.Unlock(ValueReg, Addr, Function); -} -void JitArm::sth(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(LoadStore) - - ARMReg RS = gpr.R(inst.RS); -#if 0 // FASTMEM - // R10 contains the dest address - ARMReg Value = R11; - ARMReg RA; - if (inst.RA) - RA = gpr.R(inst.RA); - MOV(Value, RS); - if (inst.RA) - { - MOVI2R(R10, inst.SIMM_16, false); - ADD(R10, R10, RA); - } - else - { - MOVI2R(R10, (u32)inst.SIMM_16, false); - NOP(1); - } - StoreFromReg(R10, Value, 16, 0); -#else - ARMReg ValueReg = gpr.GetReg(); - ARMReg Addr = gpr.GetReg(); - ARMReg Function = gpr.GetReg(); - - MOV(ValueReg, RS); - if (inst.RA) - { - MOVI2R(Addr, inst.SIMM_16); - ARMReg RA = gpr.R(inst.RA); - ADD(Addr, Addr, RA); - } - else - MOVI2R(Addr, (u32)inst.SIMM_16); - - MOVI2R(Function, (u32)&Memory::Write_U16); - PUSH(4, R0, R1, R2, R3); - MOV(R0, ValueReg); - MOV(R1, Addr); - BL(Function); - POP(4, R0, R1, R2, R3); - gpr.Unlock(ValueReg, Addr, Function); -#endif -} -void JitArm::sthu(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(LoadStore) - - ARMReg RA = gpr.R(inst.RA); - ARMReg RS = gpr.R(inst.RS); - ARMReg ValueReg = gpr.GetReg(); - ARMReg Addr = gpr.GetReg(); - ARMReg Function = gpr.GetReg(); - - MOVI2R(Addr, inst.SIMM_16); - ADD(Addr, Addr, RA); - - // Check for DSI exception prior to writing back address - LDR(Function, R9, PPCSTATE_OFF(Exceptions)); - CMP(Function, EXCEPTION_DSI); - FixupBranch DoNotWrite = B_CC(CC_EQ); - MOV(RA, Addr); - SetJumpTarget(DoNotWrite); - - MOV(ValueReg, RS); - - MOVI2R(Function, (u32)&Memory::Write_U16); - PUSH(4, R0, R1, R2, R3); - MOV(R0, ValueReg); - MOV(R1, Addr); - BL(Function); - POP(4, R0, R1, R2, R3); - - gpr.Unlock(ValueReg, Addr, Function); -} - -void JitArm::stw(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(LoadStore) - - ARMReg RS = gpr.R(inst.RS); -#if FASTMEM - // R10 contains the dest address - if (Core::g_CoreStartupParameter.bFastmem) - { - ARMReg Value = R11; - ARMReg RA; - if (inst.RA) - RA = gpr.R(inst.RA); - MOV(Value, RS); - if (inst.RA) - { - MOVI2R(R10, inst.SIMM_16, false); - ADD(R10, R10, RA); - } - else - { - MOVI2R(R10, (u32)inst.SIMM_16, false); - NOP(1); - } - StoreFromReg(R10, Value, 32, 0); - } - else -#endif - { - ARMReg ValueReg = gpr.GetReg(); - ARMReg Addr = gpr.GetReg(); - ARMReg Function = gpr.GetReg(); - - MOV(ValueReg, RS); - if (inst.RA) - { - MOVI2R(Addr, inst.SIMM_16); - ARMReg RA = gpr.R(inst.RA); - ADD(Addr, Addr, RA); - } - else - MOVI2R(Addr, (u32)inst.SIMM_16); - - MOVI2R(Function, (u32)&Memory::Write_U32); - PUSH(4, R0, R1, R2, R3); - MOV(R0, ValueReg); - MOV(R1, Addr); - BL(Function); - POP(4, R0, R1, R2, R3); - gpr.Unlock(ValueReg, Addr, Function); - } -} -void JitArm::stwu(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(LoadStore) - - ARMReg RA = gpr.R(inst.RA); - ARMReg RS = gpr.R(inst.RS); - ARMReg ValueReg = gpr.GetReg(); - ARMReg Addr = gpr.GetReg(); - ARMReg Function = gpr.GetReg(); - - MOVI2R(Addr, inst.SIMM_16); - ADD(Addr, Addr, RA); - - // Check for DSI exception prior to writing back address - LDR(Function, R9, PPCSTATE_OFF(Exceptions)); - CMP(Function, EXCEPTION_DSI); - FixupBranch DoNotWrite = B_CC(CC_EQ); - MOV(RA, Addr); - SetJumpTarget(DoNotWrite); - - MOV(ValueReg, RS); - - MOVI2R(Function, (u32)&Memory::Write_U32); - PUSH(4, R0, R1, R2, R3); - MOV(R0, ValueReg); - MOV(R1, Addr); - BL(Function); - POP(4, R0, R1, R2, R3); - - gpr.Unlock(ValueReg, Addr, Function); -} -void JitArm::stwx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(LoadStore) - u32 a = inst.RA, b = inst.RB, s = inst.RB; - - if (a) { - // Doesn't work - Default(inst); return; - } - - ARMReg RB = gpr.R(b); - ARMReg RS = gpr.R(s); - ARMReg ValueReg = gpr.GetReg(); - ARMReg Addr = gpr.GetReg(); - ARMReg Function = gpr.GetReg(); - - if (a) - ADD(Addr, gpr.R(a), RB); - else - MOV(Addr, RB); - - MOV(ValueReg, RS); - fpr.Flush(); - MOVI2R(Function, (u32)&Memory::Write_U32); - PUSH(4, R0, R1, R2, R3); - MOV(R0, ValueReg); - MOV(R1, Addr); - BL(Function); - POP(4, R0, R1, R2, R3); - - gpr.Unlock(ValueReg, Addr, Function); -} - -void JitArm::StoreFromReg(ARMReg dest, ARMReg value, int accessSize, s32 offset) -{ - ARMReg rA = gpr.GetReg(); + ARMReg rA = R11; // All this gets replaced on backpatch MOVI2R(rA, Memory::MEMVIEW32_MASK, false); // 1-2 @@ -349,8 +64,157 @@ void JitArm::StoreFromReg(ARMReg dest, ARMReg value, int accessSize, s32 offset) STRB(value, dest); break; } - gpr.Unlock(rA); } + +void JitArm::SafeStoreFromReg(bool fastmem, s32 dest, u32 value, s32 regOffset, int accessSize, s32 offset) +{ + if (Core::g_CoreStartupParameter.bFastmem && fastmem) + { + ARMReg rA = R10; + ARMReg rB = R12; + ARMReg RA; + ARMReg RB; + ARMReg RS = gpr.R(value); + + if (regOffset != -1) + { + RB = gpr.R(regOffset); + MOV(rA, RB); + } + else + MOVI2R(rA, offset); + + if (dest != -1) + { + RA = gpr.R(dest); + ADD(rA, rA, RA); + } + + MOV(rB, RS); + UnsafeStoreFromReg(rA, rB, accessSize, 0); + return; + } + ARMReg rA = gpr.GetReg(); + ARMReg rB = gpr.GetReg(); + ARMReg rC = gpr.GetReg(); + ARMReg RA; + ARMReg RB; + if (dest != -1) + RA = gpr.R(dest); + if (regOffset != -1) + RB = gpr.R(regOffset); + ARMReg RS = gpr.R(value); + switch(accessSize) + { + case 32: + MOVI2R(rA, (u32)&Memory::Write_U32); + break; + case 16: + MOVI2R(rA, (u32)&Memory::Write_U16); + break; + case 8: + MOVI2R(rA, (u32)&Memory::Write_U8); + break; + } + MOV(rB, RS); + if (regOffset == -1) + MOVI2R(rC, offset); + else + MOV(rC, RB); + if (dest != -1) + ADD(rC, rC, RA); + + PUSH(4, R0, R1, R2, R3); + MOV(R0, rB); + MOV(R1, rC); + BL(rA); + POP(4, R0, R1, R2, R3); + gpr.Unlock(rA, rB, rC); +} + +void JitArm::stX(UGeckoInstruction inst) +{ + u32 a = inst.RA, b = inst.RB, s = inst.RS; + s32 offset = inst.SIMM_16; + u32 accessSize = 0; + s32 regOffset = -1; + bool zeroA = true; + bool update = false; + bool fastmem = false; + switch(inst.OPCD) + { + case 45: // sthu + update = true; + case 44: // sth + accessSize = 16; + break; + case 31: + switch (inst.SUBOP10) + { + case 183: // stwux + zeroA = false; + update = true; + case 151: // stwx + accessSize = 32; + regOffset = b; + break; + case 247: // stbux + zeroA = false; + update = true; + case 215: // stbx + accessSize = 8; + regOffset = b; + break; + case 439: // sthux + zeroA = false; + update = true; + case 407: // sthx + accessSize = 16; + regOffset = b; + break; + } + break; + case 37: // stwu + update = true; + case 36: // stw + accessSize = 32; + break; + case 39: // stbu + update = true; + case 38: // stb + accessSize = 8; + break; + } + SafeStoreFromReg(fastmem, zeroA ? a ? a : -1 : a, s, regOffset, accessSize, offset); + if (update) + { + ARMReg rA = gpr.GetReg(); + ARMReg RB; + ARMReg RA = gpr.R(a); + if (regOffset != -1) + RB = gpr.R(regOffset); + // Check for DSI exception prior to writing back address + LDR(rA, R9, PPCSTATE_OFF(Exceptions)); + CMP(rA, EXCEPTION_DSI); + FixupBranch DoNotWrite = B_CC(CC_EQ); + if (a) + { + if (regOffset == -1) + MOVI2R(rA, offset); + else + MOV(rA, RB); + ADD(RA, RA, rA); + } + else + if (regOffset == -1) + MOVI2R(RA, (u32)offset); + else + MOV(RA, RB); + SetJumpTarget(DoNotWrite); + gpr.Unlock(rA); + } +} + void JitArm::LoadToReg(ARMReg dest, ARMReg addr, int accessSize, s32 offset) { ARMReg rA = gpr.GetReg(); diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp index de67e3c06e..5a4d33ea32 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp @@ -87,12 +87,12 @@ static GekkoOPTemplate primarytable[] = {42, &JitArm::lha}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, {43, &JitArm::Default}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {44, &JitArm::sth}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, - {45, &JitArm::sthu}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, - {36, &JitArm::stw}, //"stw", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, - {37, &JitArm::stwu}, //"stwu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, - {38, &JitArm::stb}, //"stb", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, - {39, &JitArm::stbu}, //"stbu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, + {44, &JitArm::stX}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, + {45, &JitArm::stX}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, + {36, &JitArm::stX}, //"stw", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, + {37, &JitArm::stX}, //"stwu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, + {38, &JitArm::stX}, //"stb", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, + {39, &JitArm::stX}, //"stbu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, {46, &JitArm::Default}, //"lmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, {47, &JitArm::Default}, //"stmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, @@ -249,16 +249,16 @@ static GekkoOPTemplate table31[] = {597, &JitArm::Default}, //"lswi", OPTYPE_LOAD, FL_EVIL | FL_IN_AB | FL_OUT_D}}, //store word - {151, &JitArm::stwx}, //"stwx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, - {183, &JitArm::Default}, //"stwux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, + {151, &JitArm::stX}, //"stwx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, + {183, &JitArm::stX}, //"stwux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, //store halfword - {407, &JitArm::Default}, //"sthx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, - {439, &JitArm::Default}, //"sthux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, + {407, &JitArm::stX}, //"sthx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, + {439, &JitArm::stX}, //"sthux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, //store byte - {215, &JitArm::Default}, //"stbx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, - {247, &JitArm::Default}, //"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, + {215, &JitArm::stX}, //"stbx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, + {247, &JitArm::stX}, //"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, //store bytereverse {662, &JitArm::Default}, //"stwbrx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},