From 32dc105aa3647e3424164d252212ad67b356d699 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sat, 29 Nov 2014 05:27:39 +0000 Subject: [PATCH] [ARM32] Eat a register to store our memory base. This saves at least two instructions per fastmem operation. --- .../PowerPC/JitArm32/JitArm_BackPatch.cpp | 5 +-- .../PowerPC/JitArm32/JitArm_LoadStore.cpp | 22 ++++------ Source/Core/Core/PowerPC/JitArm32/JitAsm.cpp | 43 +++++++------------ .../Core/PowerPC/JitArm32/JitRegCache.cpp | 2 +- 4 files changed, 27 insertions(+), 45 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp index c703e5a88d..f095d1c145 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp @@ -225,9 +225,8 @@ u32 JitArm::EmitBackpatchRoutine(ARMXEmitter* emit, u32 flags, bool fastmem, boo { ARMReg temp2 = R10; Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) - emit->BIC(temp, addr, mask); // 1 - emit->MOVI2R(temp2, (u32)Memory::base); // 2-3 - emit->ADD(temp, temp, temp2); // 4 + emit->BIC(temp, addr, mask); + emit->ADD(temp, temp, R8); if (flags & BackPatchInfo::FLAG_STORE && flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp index 95623d7639..1682fe5b57 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp @@ -148,7 +148,7 @@ void JitArm::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, int accessSize else if (Memory::IsRAMAddress(imm_addr)) { MOVI2R(rA, imm_addr); - EmitBackpatchRoutine(this, flags, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, false, RS); + EmitBackpatchRoutine(this, flags, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, true, RS); } else { @@ -487,14 +487,12 @@ void JitArm::lmw(UGeckoInstruction inst) u32 a = inst.RA; ARMReg rA = gpr.GetReg(); - ARMReg rB = gpr.GetReg(); MOVI2R(rA, inst.SIMM_16); if (a) ADD(rA, rA, gpr.R(a)); Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) - BIC(rA, rA, mask); // 3 - MOVI2R(rB, (u32)Memory::base, false); // 4-5 - ADD(rA, rA, rB); // 6 + BIC(rA, rA, mask); + ADD(rA, rA, R8); for (int i = inst.RD; i < 32; i++) { @@ -502,7 +500,7 @@ void JitArm::lmw(UGeckoInstruction inst) LDR(RX, rA, (i - inst.RD) * 4); REV(RX, RX); } - gpr.Unlock(rA, rB); + gpr.Unlock(rA); } void JitArm::stmw(UGeckoInstruction inst) @@ -514,22 +512,20 @@ void JitArm::stmw(UGeckoInstruction inst) u32 a = inst.RA; ARMReg rA = gpr.GetReg(); ARMReg rB = gpr.GetReg(); - ARMReg rC = gpr.GetReg(); MOVI2R(rA, inst.SIMM_16); if (a) ADD(rA, rA, gpr.R(a)); Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) - BIC(rA, rA, mask); // 3 - MOVI2R(rB, (u32)Memory::base, false); // 4-5 - ADD(rA, rA, rB); // 6 + BIC(rA, rA, mask); + ADD(rA, rA, R8); for (int i = inst.RD; i < 32; i++) { ARMReg RX = gpr.R(i); - REV(rC, RX); - STR(rC, rA, (i - inst.RD) * 4); + REV(rB, RX); + STR(rB, rA, (i - inst.RD) * 4); } - gpr.Unlock(rA, rB, rC); + gpr.Unlock(rA, rB); } void JitArm::dcbst(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitArm32/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm32/JitAsm.cpp index 9e630810d0..4ece379575 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitAsm.cpp @@ -96,6 +96,7 @@ void JitArmAsmRoutineManager::Generate() SUB(_SP, _SP, 4); MOVI2R(R9, (u32)&PowerPC::ppcState.spr[0]); + MOVI2R(R8, (u32)Memory::base); FixupBranch skipToRealDispatcher = B(); dispatcher = GetCodePtr(); @@ -203,8 +204,7 @@ void JitArmAsmRoutineManager::GenerateCommon() const u8* loadPairedFloatTwo = GetCodePtr(); { BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); nemit.VLD1(I_32, D0, R10); nemit.VREV32(I_8, D0, D0); @@ -214,8 +214,7 @@ void JitArmAsmRoutineManager::GenerateCommon() const u8* loadPairedFloatOne = GetCodePtr(); { BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); nemit.VLD1(I_32, D0, R10); nemit.VREV32(I_8, D0, D0); @@ -225,8 +224,7 @@ void JitArmAsmRoutineManager::GenerateCommon() const u8* loadPairedU8Two = GetCodePtr(); { BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); LDRH(R12, R10); SXTB(R12, R12); @@ -251,8 +249,7 @@ void JitArmAsmRoutineManager::GenerateCommon() const u8* loadPairedU8One = GetCodePtr(); { BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); LDRB(R12, R10); SXTB(R12, R12); @@ -271,8 +268,7 @@ void JitArmAsmRoutineManager::GenerateCommon() const u8* loadPairedS8Two = GetCodePtr(); { BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); LDRH(R12, R10); SXTB(R12, R12); @@ -297,8 +293,7 @@ void JitArmAsmRoutineManager::GenerateCommon() const u8* loadPairedS8One = GetCodePtr(); { BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); LDRB(R12, R10); SXTB(R12, R12); @@ -317,8 +312,7 @@ void JitArmAsmRoutineManager::GenerateCommon() const u8* loadPairedU16Two = GetCodePtr(); { BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); LDRH(R12, R10); REV16(R12, R12); @@ -345,8 +339,7 @@ void JitArmAsmRoutineManager::GenerateCommon() const u8* loadPairedU16One = GetCodePtr(); { BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); LDRH(R12, R10); REV16(R12, R12); @@ -364,8 +357,7 @@ void JitArmAsmRoutineManager::GenerateCommon() const u8* loadPairedS16Two = GetCodePtr(); { BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); LDRH(R12, R10); REV16(R12, R12); @@ -392,8 +384,7 @@ void JitArmAsmRoutineManager::GenerateCommon() const u8* loadPairedS16One = GetCodePtr(); { BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); LDRH(R12, R10); @@ -439,8 +430,7 @@ void JitArmAsmRoutineManager::GenerateCommon() TST(R10, arghmask); FixupBranch argh = B_CC(CC_NEQ); BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); nemit.VREV32(I_8, D0, D0); nemit.VST1(I_32, D0, R10); @@ -511,8 +501,7 @@ void JitArmAsmRoutineManager::GenerateCommon() TST(R10, arghmask); FixupBranch argh = B_CC(CC_NEQ); BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); VMOV(R12, S0); REV(R12, R12); @@ -540,8 +529,7 @@ void JitArmAsmRoutineManager::GenerateCommon() TST(R10, arghmask); FixupBranch argh = B_CC(CC_NEQ); BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); VCVT(S0, S0, TO_INT | ROUND_TO_ZERO); VMOV(R12, S0); @@ -568,8 +556,7 @@ void JitArmAsmRoutineManager::GenerateCommon() TST(R10, arghmask); FixupBranch argh = B_CC(CC_NEQ); BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); VCVT(S0, S0, TO_INT | ROUND_TO_ZERO); VMOV(R12, S0); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitRegCache.cpp b/Source/Core/Core/PowerPC/JitArm32/JitRegCache.cpp index 8a7c2990a9..8379b6aa9a 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitRegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitRegCache.cpp @@ -54,7 +54,7 @@ ARMReg *ArmRegCache::GetPPCAllocationOrder(int &count) // the ppc side. static ARMReg allocationOrder[] = { - R0, R1, R2, R3, R4, R5, R6, R7, R8 + R0, R1, R2, R3, R4, R5, R6, R7 }; count = sizeof(allocationOrder) / sizeof(const int); return allocationOrder;