Remove the inaccurately named ABI_PushAllCalleeSavedRegsAndAdjustStack (it didn't preserve FPRs!) and replace with ABI_PushRegistersAndAdjustStack.

To avoid FPRs being pushed unnecessarily, I checked the uses: DSPEmitter
doesn't use FPRs, and VertexLoader doesn't use anything but RAX, so I
specified the register list accordingly.  The regular JIT, however, does
use FPRs, and as far as I can tell, it was incorrect not to save them in
the outer routine.  Since the dispatcher loop is only exited when
pausing or stopping, this should have no noticeable performance impact.
This commit is contained in:
comex 2014-09-07 14:21:16 -04:00
parent 2dafbfb3ef
commit c5c0b36046
4 changed files with 10 additions and 67 deletions

View File

@ -453,63 +453,3 @@ void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1)
ABI_RestoreStack(0); ABI_RestoreStack(0);
} }
#ifdef _WIN32
// Win64 Specific Code
void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack()
{
//we only want to do this once
PUSH(RBP);
MOV(64, R(RBP), R(RSP));
PUSH(RBX);
PUSH(RSI);
PUSH(RDI);
PUSH(R12);
PUSH(R13);
PUSH(R14);
PUSH(R15);
SUB(64, R(RSP), Imm8(0x28));
//TODO: Also preserve XMM0-3?
}
void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack()
{
ADD(64, R(RSP), Imm8(0x28));
POP(R15);
POP(R14);
POP(R13);
POP(R12);
POP(RDI);
POP(RSI);
POP(RBX);
POP(RBP);
}
#else
// Unix64 Specific Code
void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack()
{
PUSH(RBP);
MOV(64, R(RBP), R(RSP));
PUSH(RBX);
PUSH(R12);
PUSH(R13);
PUSH(R14);
PUSH(R15);
SUB(64, R(RSP), Imm8(8));
}
void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack()
{
ADD(64, R(RSP), Imm8(8));
POP(R15);
POP(R14);
POP(R13);
POP(R12);
POP(RBX);
POP(RBP);
}
#endif // WIN32

View File

@ -384,7 +384,9 @@ const u8 *DSPEmitter::CompileStub()
void DSPEmitter::CompileDispatcher() void DSPEmitter::CompileDispatcher()
{ {
enterDispatcher = AlignCode16(); enterDispatcher = AlignCode16();
ABI_PushAllCalleeSavedRegsAndAdjustStack(); // We don't use floating point (high 16 bits).
u32 registers_used = ABI_ALL_CALLEE_SAVED & 0xffff;
ABI_PushRegistersAndAdjustStack(registers_used, 8);
const u8 *dispatcherLoop = GetCodePtr(); const u8 *dispatcherLoop = GetCodePtr();
@ -419,6 +421,6 @@ void DSPEmitter::CompileDispatcher()
SetJumpTarget(exceptionExit); SetJumpTarget(exceptionExit);
} }
//MOV(32, M(&cyclesLeft), Imm32(0)); //MOV(32, M(&cyclesLeft), Imm32(0));
ABI_PopAllCalleeSavedRegsAndAdjustStack(); ABI_PopRegistersAndAdjustStack(registers_used, 8);
RET(); RET();
} }

View File

@ -16,7 +16,7 @@ using namespace Gen;
void Jit64AsmRoutineManager::Generate() void Jit64AsmRoutineManager::Generate()
{ {
enterCode = AlignCode16(); enterCode = AlignCode16();
ABI_PushAllCalleeSavedRegsAndAdjustStack(); ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
// Two statically allocated registers. // Two statically allocated registers.
MOV(64, R(RMEM), Imm64((u64)Memory::base)); MOV(64, R(RMEM), Imm64((u64)Memory::base));
@ -39,7 +39,7 @@ void Jit64AsmRoutineManager::Generate()
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints)); ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints));
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
FixupBranch noBreakpoint = J_CC(CC_Z); FixupBranch noBreakpoint = J_CC(CC_Z);
ABI_PopAllCalleeSavedRegsAndAdjustStack(); ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
RET(); RET();
SetJumpTarget(noBreakpoint); SetJumpTarget(noBreakpoint);
SetJumpTarget(notStepping); SetJumpTarget(notStepping);
@ -126,7 +126,7 @@ void Jit64AsmRoutineManager::Generate()
J_CC(CC_Z, outerLoop); J_CC(CC_Z, outerLoop);
//Landing pad for drec space //Landing pad for drec space
ABI_PopAllCalleeSavedRegsAndAdjustStack(); ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
RET(); RET();
GenerateCommon(); GenerateCommon();

View File

@ -584,7 +584,8 @@ void VertexLoader::CompileVertexTranslator()
PanicAlert("Trying to recompile a vertex translator"); PanicAlert("Trying to recompile a vertex translator");
m_compiledCode = GetCodePtr(); m_compiledCode = GetCodePtr();
ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); // We don't use any callee saved registers or anything but RAX.
ABI_PushRegistersAndAdjustStack(0, 8);
// Start loop here // Start loop here
const u8 *loop_start = GetCodePtr(); const u8 *loop_start = GetCodePtr();
@ -845,7 +846,7 @@ void VertexLoader::CompileVertexTranslator()
SUB(32, MatR(RAX), Imm8(1)); SUB(32, MatR(RAX), Imm8(1));
J_CC(CC_NZ, loop_start); J_CC(CC_NZ, loop_start);
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); ABI_PopRegistersAndAdjustStack(0, 8);
RET(); RET();
#endif #endif
} }