From 2dafbfb3efb03823096b7717044a584105ee1e1e Mon Sep 17 00:00:00 2001 From: comex Date: Sun, 7 Sep 2014 14:06:48 -0400 Subject: [PATCH 1/3] Improve code and clarify parameters to ABI_Push/PopRegistersAndAdjustStack. - Factor common work into a helper function. - Replace confusingly named "noProlog" with "rsp_alignment". Now that x86 is not supported, we can just specify it explicitly as 8 for clarity. - Add the option to include more frame size, which I'll need later. - Revert a change by magumagu in March which replaced MOVAPD with MOVUPD on account of 32-bit Windows, since it's no longer supported. True, apparently recent processors don't execute the former any faster if the pointer is, in fact, aligned, but there's no point using MOVUPD for something that's guaranteed to be aligned... (I discovered that GenFrsqrte and GenFres were incorrectly passing false to noProlog - they were, in fact, functions without prologs, the original meaning of the parameter - which caused the previous change to break. This is now fixed.) --- Source/Core/Common/x64ABI.cpp | 93 +++++++++++-------- Source/Core/Common/x64ABI.h | 2 + Source/Core/Common/x64Emitter.h | 10 +- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 4 +- .../Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 12 +-- .../Core/PowerPC/JitCommon/JitAsmCommon.cpp | 13 +-- .../Core/PowerPC/JitCommon/JitBackpatch.cpp | 12 +-- .../Core/Core/PowerPC/JitCommon/Jit_Util.cpp | 18 ++-- Source/Core/VideoCommon/VertexLoader.cpp | 4 +- 9 files changed, 96 insertions(+), 72 deletions(-) diff --git a/Source/Core/Common/x64ABI.cpp b/Source/Core/Common/x64ABI.cpp index 45465619bd..046d90e509 100644 --- a/Source/Core/Common/x64ABI.cpp +++ b/Source/Core/Common/x64ABI.cpp @@ -36,67 +36,86 @@ void XEmitter::ABI_RestoreStack(unsigned int frameSize, bool noProlog) } } -void XEmitter::ABI_PushRegistersAndAdjustStack(u32 mask, bool noProlog) +void XEmitter::ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) { - int regSize = 8; - int shadow = 0; + size_t shadow = 0; #if defined(_WIN32) shadow = 0x20; #endif + int count = 0; for (int r = 0; r < 16; r++) { if (mask & (1 << r)) - { - PUSH((X64Reg) r); count++; - } } - int size = ((noProlog ? -regSize : 0) - (count * regSize)) & 0xf; + rsp_alignment -= count * 8; + size_t subtraction = 0; + if (mask & 0xffff0000) + { + // If we have any XMMs to save, we must align the stack here. + subtraction = rsp_alignment & 0xf; + } for (int x = 0; x < 16; x++) { if (mask & (1 << (16 + x))) - size += 16; - } - size += shadow; - if (size) - SUB(regSize * 8, R(RSP), size >= 0x80 ? Imm32(size) : Imm8(size)); - int offset = shadow; - for (int x = 0; x < 16; x++) - { - if (mask & (1 << (16 + x))) - { - MOVUPD(MDisp(RSP, offset), (X64Reg) x); - offset += 16; - } + subtraction += 16; } + size_t xmm_base_subtraction = subtraction; + subtraction += needed_frame_size; + subtraction += shadow; + // Final alignment. + rsp_alignment -= subtraction; + subtraction += rsp_alignment & 0xf; + + *shadowp = shadow; + *subtractionp = subtraction; + *xmm_offsetp = subtraction - xmm_base_subtraction; } -void XEmitter::ABI_PopRegistersAndAdjustStack(u32 mask, bool noProlog) +size_t XEmitter::ABI_PushRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size) { - int regSize = 8; - int size = 0; -#if defined(_WIN32) - size += 0x20; -#endif + size_t shadow, subtraction, xmm_offset; + ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset); + + for (int r = 0; r < 16; r++) + { + if (mask & (1 << r)) + PUSH((X64Reg) r); + } + + if (subtraction) + SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction)); + for (int x = 0; x < 16; x++) { if (mask & (1 << (16 + x))) { - MOVUPD((X64Reg) x, MDisp(RSP, size)); - size += 16; + MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg) x); + xmm_offset += 16; } } - int count = 0; - for (int r = 0; r < 16; r++) - { - if (mask & (1 << r)) - count++; - } - size += ((noProlog ? -regSize : 0) - (count * regSize)) & 0xf; - if (size) - ADD(regSize * 8, R(RSP), size >= 0x80 ? Imm32(size) : Imm8(size)); + return shadow; +} + +void XEmitter::ABI_PopRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size) +{ + size_t shadow, subtraction, xmm_offset; + ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset); + + for (int x = 0; x < 16; x++) + { + if (mask & (1 << (16 + x))) + { + MOVAPD((X64Reg) x, MDisp(RSP, (int)xmm_offset)); + xmm_offset += 16; + } + } + + if (subtraction) + ADD(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction)); + for (int r = 15; r >= 0; r--) { if (mask & (1 << r)) diff --git a/Source/Core/Common/x64ABI.h b/Source/Core/Common/x64ABI.h index abc9236ef7..bf058bc04a 100644 --- a/Source/Core/Common/x64ABI.h +++ b/Source/Core/Common/x64ABI.h @@ -53,5 +53,7 @@ #endif // WIN32 +#define ABI_ALL_CALLEE_SAVED ((u32) ~ABI_ALL_CALLER_SAVED) + #define ABI_RETURN RAX diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index d6f0699e84..22c6857da0 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -281,6 +281,8 @@ private: void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg); void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2); + void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp); + protected: inline void Write8(u8 value) {*code++ = value;} inline void Write16(u16 value) {*(u16*)code = (value); code += 2;} @@ -761,9 +763,11 @@ public: void ABI_PushAllCalleeSavedRegsAndAdjustStack(); void ABI_PopAllCalleeSavedRegsAndAdjustStack(); - // A more flexible version of the above. - void ABI_PushRegistersAndAdjustStack(u32 mask, bool noProlog); - void ABI_PopRegistersAndAdjustStack(u32 mask, bool noProlog); + // Saves/restores the registers and adjusts the stack to be aligned as + // required by the ABI, where the previous alignment was as specified. + // Push returns the size of the shadow space, i.e. the offset of the frame. + size_t ABI_PushRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size = 0); + void ABI_PopRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size = 0); unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize, bool noProlog = false); void ABI_AlignStack(unsigned int frameSize, bool noProlog = false); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index a3707dbbe1..a69d8e0f82 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -495,9 +495,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc js.fifoBytesThisBlock -= 32; MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write u32 registersInUse = CallerSavedRegistersInUse(); - ABI_PushRegistersAndAdjustStack(registersInUse, false); + ABI_PushRegistersAndAdjustStack(registersInUse, 0); ABI_CallFunction((void *)&GPFifo::CheckGatherPipe); - ABI_PopRegistersAndAdjustStack(registersInUse, false); + ABI_PopRegistersAndAdjustStack(registersInUse, 0); } u32 function = HLE::GetFunctionIndex(ops[i].address); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index ba9cf8b293..0c25191736 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -116,11 +116,11 @@ void Jit64::lXXx(UGeckoInstruction inst) FixupBranch noIdle = J_CC(CC_NZ); u32 registersInUse = CallerSavedRegistersInUse(); - ABI_PushRegistersAndAdjustStack(registersInUse, false); + ABI_PushRegistersAndAdjustStack(registersInUse, 0); ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16); - ABI_PopRegistersAndAdjustStack(registersInUse, false); + ABI_PopRegistersAndAdjustStack(registersInUse, 0); // ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0 //MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); @@ -285,9 +285,9 @@ void Jit64::dcbz(UGeckoInstruction inst) // supposedly there are, at least for some MMU titles. Let's be careful and support it to be sure. MOV(32, M(&PC), Imm32(jit->js.compilerPC)); u32 registersInUse = CallerSavedRegistersInUse(); - ABI_PushRegistersAndAdjustStack(registersInUse, false); + ABI_PushRegistersAndAdjustStack(registersInUse, 0); ABI_CallFunctionR((void *)&Memory::ClearCacheLine, RSCRATCH); - ABI_PopRegistersAndAdjustStack(registersInUse, false); + ABI_PopRegistersAndAdjustStack(registersInUse, 0); FixupBranch exit = J(); SetJumpTarget(fast); @@ -374,7 +374,7 @@ void Jit64::stX(UGeckoInstruction inst) MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); u32 registersInUse = CallerSavedRegistersInUse(); - ABI_PushRegistersAndAdjustStack(registersInUse, false); + ABI_PushRegistersAndAdjustStack(registersInUse, 0); switch (accessSize) { case 32: @@ -387,7 +387,7 @@ void Jit64::stX(UGeckoInstruction inst) ABI_CallFunctionAC((void *)&Memory::Write_U8, gpr.R(s), addr); break; } - ABI_PopRegistersAndAdjustStack(registersInUse, false); + ABI_PopRegistersAndAdjustStack(registersInUse, 0); if (update) gpr.SetImmediate32(a, addr); return; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp index d5cce9882e..dbce5dfb85 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp @@ -110,9 +110,9 @@ void CommonAsmRoutines::GenFrsqrte() SetJumpTarget(complex1); SetJumpTarget(complex2); SetJumpTarget(complex3); - ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, false); + ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); ABI_CallFunction((void *)&MathUtil::ApproximateReciprocalSquareRoot); - ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, false); + ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); RET(); } @@ -169,9 +169,9 @@ void CommonAsmRoutines::GenFres() SetJumpTarget(complex1); SetJumpTarget(complex2); - ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, false); + ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); ABI_CallFunction((void *)&MathUtil::ApproximateReciprocal); - ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, false); + ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); RET(); } @@ -258,9 +258,10 @@ void CommonAsmRoutines::GenQuantizedStores() SwapAndStore(64, MComplex(RMEM, RSCRATCH_EXTRA, SCALE_1, 0), RSCRATCH); FixupBranch skip_complex = J(true); SetJumpTarget(too_complex); - ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true); + // RSP alignment here is 8 due to the call. + ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); ABI_CallFunctionR((void *)&WriteDual32, RSCRATCH_EXTRA); - ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true); + ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); SetJumpTarget(skip_complex); RET(); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp index c1a6436e62..26b8b50d60 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp @@ -56,10 +56,8 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re X64Reg dataReg = (X64Reg)info.regOperandReg; // It's a read. Easy. - // It ought to be necessary to align the stack here. Since it seems to not - // affect anybody, I'm not going to add it just to be completely safe about - // performance. - ABI_PushRegistersAndAdjustStack(registersInUse, true); + // RSP alignment here is 8 due to the call. + ABI_PushRegistersAndAdjustStack(registersInUse, 8); if (addrReg != ABI_PARAM1) MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg)); @@ -91,7 +89,7 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re MOV(32, R(dataReg), R(ABI_RETURN)); } - ABI_PopRegistersAndAdjustStack(registersInUse, true); + ABI_PopRegistersAndAdjustStack(registersInUse, 8); RET(); return trampoline; } @@ -115,7 +113,7 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs MOV(32, PPCSTATE(pc), Imm32(pc)); - ABI_PushRegistersAndAdjustStack(registersInUse, true); + ABI_PushRegistersAndAdjustStack(registersInUse, 8); MOVTwo(64, ABI_PARAM1, dataReg, ABI_PARAM2, addrReg, ABI_PARAM3); @@ -140,7 +138,7 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r break; } - ABI_PopRegistersAndAdjustStack(registersInUse, true); + ABI_PopRegistersAndAdjustStack(registersInUse, 8); RET(); return trampoline; diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index be43680e88..0eb5f63d98 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -204,9 +204,9 @@ private: void CallLambda(int sbits, const std::function* lambda) { - m_code->ABI_PushRegistersAndAdjustStack(m_registers_in_use, false); + m_code->ABI_PushRegistersAndAdjustStack(m_registers_in_use, 0); m_code->ABI_CallLambdaC(lambda, m_address); - m_code->ABI_PopRegistersAndAdjustStack(m_registers_in_use, false); + m_code->ABI_PopRegistersAndAdjustStack(m_registers_in_use, 0); MoveOpArgToReg(sbits, R(ABI_RETURN)); } @@ -305,7 +305,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, } else { - ABI_PushRegistersAndAdjustStack(registersInUse, false); + ABI_PushRegistersAndAdjustStack(registersInUse, 0); switch (accessSize) { case 64: ABI_CallFunctionC((void *)&Memory::Read_U64, address); break; @@ -313,7 +313,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, case 16: ABI_CallFunctionC((void *)&Memory::Read_U16_ZX, address); break; case 8: ABI_CallFunctionC((void *)&Memory::Read_U8_ZX, address); break; } - ABI_PopRegistersAndAdjustStack(registersInUse, false); + ABI_PopRegistersAndAdjustStack(registersInUse, 0); MEMCHECK_START @@ -350,7 +350,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, FixupBranch fast = J_CC(CC_Z, true); - ABI_PushRegistersAndAdjustStack(registersInUse, false); + ABI_PushRegistersAndAdjustStack(registersInUse, 0); switch (accessSize) { case 64: @@ -366,7 +366,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, ABI_CallFunctionA((void *)&Memory::Read_U8_ZX, addr_loc); break; } - ABI_PopRegistersAndAdjustStack(registersInUse, false); + ABI_PopRegistersAndAdjustStack(registersInUse, 0); MEMCHECK_START @@ -470,9 +470,9 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce FixupBranch fast = J_CC(CC_Z, true); // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); - bool noProlog = (0 != (flags & SAFE_LOADSTORE_NO_PROLOG)); + size_t rsp_alignment = (flags & SAFE_LOADSTORE_NO_PROLOG) ? 8 : 0; bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP); - ABI_PushRegistersAndAdjustStack(registersInUse, noProlog); + ABI_PushRegistersAndAdjustStack(registersInUse, rsp_alignment); switch (accessSize) { case 64: @@ -488,7 +488,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr, false); break; } - ABI_PopRegistersAndAdjustStack(registersInUse, noProlog); + ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment); FixupBranch exit = J(); SetJumpTarget(fast); UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap); diff --git a/Source/Core/VideoCommon/VertexLoader.cpp b/Source/Core/VideoCommon/VertexLoader.cpp index a227b6da52..e57dc134c6 100644 --- a/Source/Core/VideoCommon/VertexLoader.cpp +++ b/Source/Core/VideoCommon/VertexLoader.cpp @@ -584,7 +584,7 @@ void VertexLoader::CompileVertexTranslator() PanicAlert("Trying to recompile a vertex translator"); m_compiledCode = GetCodePtr(); - ABI_PushAllCalleeSavedRegsAndAdjustStack(); + ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); // Start loop here const u8 *loop_start = GetCodePtr(); @@ -845,7 +845,7 @@ void VertexLoader::CompileVertexTranslator() SUB(32, MatR(RAX), Imm8(1)); J_CC(CC_NZ, loop_start); - ABI_PopAllCalleeSavedRegsAndAdjustStack(); + ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); RET(); #endif } From c5c0b36046c67438c21471c0e5d10670cc22d960 Mon Sep 17 00:00:00 2001 From: comex Date: Sun, 7 Sep 2014 14:21:16 -0400 Subject: [PATCH 2/3] Remove the inaccurately named ABI_PushAllCalleeSavedRegsAndAdjustStack (it didn't preserve FPRs!) and replace with ABI_PushRegistersAndAdjustStack. To avoid FPRs being pushed unnecessarily, I checked the uses: DSPEmitter doesn't use FPRs, and VertexLoader doesn't use anything but RAX, so I specified the register list accordingly. The regular JIT, however, does use FPRs, and as far as I can tell, it was incorrect not to save them in the outer routine. Since the dispatcher loop is only exited when pausing or stopping, this should have no noticeable performance impact. --- Source/Core/Common/x64ABI.cpp | 60 ----------------------- Source/Core/Core/DSP/DSPEmitter.cpp | 6 ++- Source/Core/Core/PowerPC/Jit64/JitAsm.cpp | 6 +-- Source/Core/VideoCommon/VertexLoader.cpp | 5 +- 4 files changed, 10 insertions(+), 67 deletions(-) diff --git a/Source/Core/Common/x64ABI.cpp b/Source/Core/Common/x64ABI.cpp index 046d90e509..707d549e76 100644 --- a/Source/Core/Common/x64ABI.cpp +++ b/Source/Core/Common/x64ABI.cpp @@ -453,63 +453,3 @@ void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1) ABI_RestoreStack(0); } -#ifdef _WIN32 -// Win64 Specific Code - -void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() -{ - //we only want to do this once - PUSH(RBP); - MOV(64, R(RBP), R(RSP)); - PUSH(RBX); - PUSH(RSI); - PUSH(RDI); - PUSH(R12); - PUSH(R13); - PUSH(R14); - PUSH(R15); - SUB(64, R(RSP), Imm8(0x28)); - //TODO: Also preserve XMM0-3? -} - -void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() -{ - ADD(64, R(RSP), Imm8(0x28)); - POP(R15); - POP(R14); - POP(R13); - POP(R12); - POP(RDI); - POP(RSI); - POP(RBX); - POP(RBP); -} - -#else -// Unix64 Specific Code - -void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() -{ - PUSH(RBP); - MOV(64, R(RBP), R(RSP)); - PUSH(RBX); - PUSH(R12); - PUSH(R13); - PUSH(R14); - PUSH(R15); - SUB(64, R(RSP), Imm8(8)); -} - -void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() -{ - ADD(64, R(RSP), Imm8(8)); - POP(R15); - POP(R14); - POP(R13); - POP(R12); - POP(RBX); - POP(RBP); -} - -#endif // WIN32 - diff --git a/Source/Core/Core/DSP/DSPEmitter.cpp b/Source/Core/Core/DSP/DSPEmitter.cpp index a7eec8a17e..188dfcaf2c 100644 --- a/Source/Core/Core/DSP/DSPEmitter.cpp +++ b/Source/Core/Core/DSP/DSPEmitter.cpp @@ -384,7 +384,9 @@ const u8 *DSPEmitter::CompileStub() void DSPEmitter::CompileDispatcher() { enterDispatcher = AlignCode16(); - ABI_PushAllCalleeSavedRegsAndAdjustStack(); + // We don't use floating point (high 16 bits). + u32 registers_used = ABI_ALL_CALLEE_SAVED & 0xffff; + ABI_PushRegistersAndAdjustStack(registers_used, 8); const u8 *dispatcherLoop = GetCodePtr(); @@ -419,6 +421,6 @@ void DSPEmitter::CompileDispatcher() SetJumpTarget(exceptionExit); } //MOV(32, M(&cyclesLeft), Imm32(0)); - ABI_PopAllCalleeSavedRegsAndAdjustStack(); + ABI_PopRegistersAndAdjustStack(registers_used, 8); RET(); } diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index 1c5b78666f..40eb726db7 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -16,7 +16,7 @@ using namespace Gen; void Jit64AsmRoutineManager::Generate() { enterCode = AlignCode16(); - ABI_PushAllCalleeSavedRegsAndAdjustStack(); + ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); // Two statically allocated registers. MOV(64, R(RMEM), Imm64((u64)Memory::base)); @@ -39,7 +39,7 @@ void Jit64AsmRoutineManager::Generate() ABI_CallFunction(reinterpret_cast(&PowerPC::CheckBreakPoints)); TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); FixupBranch noBreakpoint = J_CC(CC_Z); - ABI_PopAllCalleeSavedRegsAndAdjustStack(); + ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); RET(); SetJumpTarget(noBreakpoint); SetJumpTarget(notStepping); @@ -126,7 +126,7 @@ void Jit64AsmRoutineManager::Generate() J_CC(CC_Z, outerLoop); //Landing pad for drec space - ABI_PopAllCalleeSavedRegsAndAdjustStack(); + ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); RET(); GenerateCommon(); diff --git a/Source/Core/VideoCommon/VertexLoader.cpp b/Source/Core/VideoCommon/VertexLoader.cpp index e57dc134c6..3f509e8d32 100644 --- a/Source/Core/VideoCommon/VertexLoader.cpp +++ b/Source/Core/VideoCommon/VertexLoader.cpp @@ -584,7 +584,8 @@ void VertexLoader::CompileVertexTranslator() PanicAlert("Trying to recompile a vertex translator"); m_compiledCode = GetCodePtr(); - ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); + // We don't use any callee saved registers or anything but RAX. + ABI_PushRegistersAndAdjustStack(0, 8); // Start loop here const u8 *loop_start = GetCodePtr(); @@ -845,7 +846,7 @@ void VertexLoader::CompileVertexTranslator() SUB(32, MatR(RAX), Imm8(1)); J_CC(CC_NZ, loop_start); - ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); + ABI_PopRegistersAndAdjustStack(0, 8); RET(); #endif } From 4dc090643d34917ce369d84032fc9530de46c7f0 Mon Sep 17 00:00:00 2001 From: comex Date: Sun, 7 Sep 2014 14:29:51 -0400 Subject: [PATCH 3/3] Remove ABI_AlignStack/ABI_RestoreStack and the noProlog option to ABI_CallFunctionRR. The latter being true was the only case where the former would do anything, and it was never true. They became obsolete with x86's removal. --- Source/Core/Common/x64ABI.cpp | 58 +------------------ Source/Core/Common/x64Emitter.h | 11 +--- .../PowerPC/Jit64/Jit_LoadStorePaired.cpp | 2 - .../Core/Core/PowerPC/JitCommon/Jit_Util.cpp | 8 +-- 4 files changed, 6 insertions(+), 73 deletions(-) diff --git a/Source/Core/Common/x64ABI.cpp b/Source/Core/Common/x64ABI.cpp index 707d549e76..d489301c4b 100644 --- a/Source/Core/Common/x64ABI.cpp +++ b/Source/Core/Common/x64ABI.cpp @@ -10,32 +10,6 @@ using namespace Gen; // Shared code between Win64 and Unix64 -unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize, bool noProlog) -{ - frameSize = noProlog ? 0x28 : 0; - return frameSize; -} - -void XEmitter::ABI_AlignStack(unsigned int frameSize, bool noProlog) -{ - unsigned int fillSize = ABI_GetAlignedFrameSize(frameSize, noProlog) - frameSize; - - if (fillSize != 0) - { - SUB(64, R(RSP), Imm8(fillSize)); - } -} - -void XEmitter::ABI_RestoreStack(unsigned int frameSize, bool noProlog) -{ - unsigned int alignedSize = ABI_GetAlignedFrameSize(frameSize, noProlog); - - if (alignedSize != 0) - { - ADD(64, R(RSP), Imm8(alignedSize)); - } -} - void XEmitter::ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) { size_t shadow = 0; @@ -128,7 +102,6 @@ void XEmitter::ABI_PopRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, si // Common functions void XEmitter::ABI_CallFunction(void *func) { - ABI_AlignStack(0); u64 distance = u64(func) - (u64(code) + 5); if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) @@ -141,12 +114,10 @@ void XEmitter::ABI_CallFunction(void *func) { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionC16(void *func, u16 param1) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32((u32)param1)); u64 distance = u64(func) - (u64(code) + 5); if (distance >= 0x0000000080000000ULL && @@ -160,12 +131,10 @@ void XEmitter::ABI_CallFunctionC16(void *func, u16 param1) { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionCC16(void *func, u32 param1, u16 param2) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32((u32)param2)); u64 distance = u64(func) - (u64(code) + 5); @@ -180,12 +149,10 @@ void XEmitter::ABI_CallFunctionCC16(void *func, u32 param1, u16 param2) { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionC(void *func, u32 param1) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); u64 distance = u64(func) - (u64(code) + 5); if (distance >= 0x0000000080000000ULL && @@ -199,12 +166,10 @@ void XEmitter::ABI_CallFunctionC(void *func, u32 param1) { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionCC(void *func, u32 param1, u32 param2) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32(param2)); u64 distance = u64(func) - (u64(code) + 5); @@ -219,12 +184,10 @@ void XEmitter::ABI_CallFunctionCC(void *func, u32 param1, u32 param2) { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionCP(void *func, u32 param1, void *param2) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(64, R(ABI_PARAM2), Imm64((u64)param2)); u64 distance = u64(func) - (u64(code) + 5); @@ -239,12 +202,10 @@ void XEmitter::ABI_CallFunctionCP(void *func, u32 param1, void *param2) { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param3) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32(param2)); MOV(32, R(ABI_PARAM3), Imm32(param3)); @@ -260,12 +221,10 @@ void XEmitter::ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionCCP(void *func, u32 param1, u32 param2, void *param3) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32(param2)); MOV(64, R(ABI_PARAM3), Imm64((u64)param3)); @@ -281,12 +240,10 @@ void XEmitter::ABI_CallFunctionCCP(void *func, u32 param1, u32 param2, void *par { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionCCCP(void *func, u32 param1, u32 param2, u32 param3, void *param4) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32(param2)); MOV(32, R(ABI_PARAM3), Imm32(param3)); @@ -303,12 +260,10 @@ void XEmitter::ABI_CallFunctionCCCP(void *func, u32 param1, u32 param2, u32 para { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionPC(void *func, void *param1, u32 param2) { - ABI_AlignStack(0); MOV(64, R(ABI_PARAM1), Imm64((u64)param1)); MOV(32, R(ABI_PARAM2), Imm32(param2)); u64 distance = u64(func) - (u64(code) + 5); @@ -323,12 +278,10 @@ void XEmitter::ABI_CallFunctionPC(void *func, void *param1, u32 param2) { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionPPC(void *func, void *param1, void *param2, u32 param3) { - ABI_AlignStack(0); MOV(64, R(ABI_PARAM1), Imm64((u64)param1)); MOV(64, R(ABI_PARAM2), Imm64((u64)param2)); MOV(32, R(ABI_PARAM3), Imm32(param3)); @@ -344,13 +297,11 @@ void XEmitter::ABI_CallFunctionPPC(void *func, void *param1, void *param2, u32 p { CALL(func); } - ABI_RestoreStack(0); } // Pass a register as a parameter. void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1) { - ABI_AlignStack(0); if (reg1 != ABI_PARAM1) MOV(32, R(ABI_PARAM1), R(reg1)); u64 distance = u64(func) - (u64(code) + 5); @@ -365,13 +316,11 @@ void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1) { CALL(func); } - ABI_RestoreStack(0); } // Pass two registers as parameters. -void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2, bool noProlog) +void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2) { - ABI_AlignStack(0, noProlog); MOVTwo(64, ABI_PARAM1, reg1, ABI_PARAM2, reg2, ABI_PARAM3); u64 distance = u64(func) - (u64(code) + 5); if (distance >= 0x0000000080000000ULL && @@ -385,7 +334,6 @@ void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2, bool noP { CALL(func); } - ABI_RestoreStack(0, noProlog); } void XEmitter::MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, Gen::X64Reg dst2, Gen::X64Reg src2, X64Reg temp) @@ -414,7 +362,6 @@ void XEmitter::MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, Gen::X64Reg void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2) { - ABI_AlignStack(0); if (!arg1.IsSimpleReg(ABI_PARAM1)) MOV(32, R(ABI_PARAM1), arg1); MOV(32, R(ABI_PARAM2), Imm32(param2)); @@ -430,12 +377,10 @@ void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2 { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1) { - ABI_AlignStack(0); if (!arg1.IsSimpleReg(ABI_PARAM1)) MOV(32, R(ABI_PARAM1), arg1); u64 distance = u64(func) - (u64(code) + 5); @@ -450,6 +395,5 @@ void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1) { CALL(func); } - ABI_RestoreStack(0); } diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index 22c6857da0..f229c1a1c3 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -753,26 +753,17 @@ public: // Pass a register as a parameter. void ABI_CallFunctionR(void *func, X64Reg reg1); - void ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2, bool noProlog = false); + void ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2); // Helper method for the above, or can be used separately. void MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, Gen::X64Reg dst2, Gen::X64Reg src2, Gen::X64Reg temp); - // A function that doesn't have any control over what it will do to regs, - // such as the dispatcher, should be surrounded by these. - void ABI_PushAllCalleeSavedRegsAndAdjustStack(); - void ABI_PopAllCalleeSavedRegsAndAdjustStack(); - // Saves/restores the registers and adjusts the stack to be aligned as // required by the ABI, where the previous alignment was as specified. // Push returns the size of the shadow space, i.e. the offset of the frame. size_t ABI_PushRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size = 0); void ABI_PopRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size = 0); - unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize, bool noProlog = false); - void ABI_AlignStack(unsigned int frameSize, bool noProlog = false); - void ABI_RestoreStack(unsigned int frameSize, bool noProlog = false); - inline int ABI_GetNumXMMRegs() { return 16; } // Strange call wrappers. diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 2630395630..dd751c27bf 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -87,9 +87,7 @@ void Jit64::psq_l(UGeckoInstruction inst) if (inst.W) OR(32, R(RSCRATCH2), Imm8(8)); - ABI_AlignStack(0); CALLptr(MScaled(RSCRATCH2, SCALE_8, (u32)(u64)asm_routines.pairedLoadQuantized)); - ABI_RestoreStack(0); // MEMCHECK_START // FIXME: MMU does not work here because of unsafe memory access diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index 0eb5f63d98..6b80fd853d 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -476,16 +476,16 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce switch (accessSize) { case 64: - ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U64) : ((void *)&Memory::Write_U64_Swap), reg_value, reg_addr, false); + ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U64) : ((void *)&Memory::Write_U64_Swap), reg_value, reg_addr); break; case 32: - ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr, false); + ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr); break; case 16: - ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr, false); + ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr); break; case 8: - ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr, false); + ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr); break; } ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment);