diff --git a/Source/Core/Common/x64ABI.cpp b/Source/Core/Common/x64ABI.cpp index 0f958a4a6d..f51b760ced 100644 --- a/Source/Core/Common/x64ABI.cpp +++ b/Source/Core/Common/x64ABI.cpp @@ -10,31 +10,23 @@ using namespace Gen; // Shared code between Win64 and Unix64 -void XEmitter::ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) +void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) { size_t shadow = 0; #if defined(_WIN32) shadow = 0x20; #endif - int count = 0; - for (int r = 0; r < 16; r++) - { - if (mask & (1 << r)) - count++; - } + int count = (mask & ABI_ALL_GPRS).Count(); rsp_alignment -= count * 8; size_t subtraction = 0; - if (mask & 0xffff0000) + int fpr_count = (mask & ABI_ALL_FPRS).Count(); + if (fpr_count) { // If we have any XMMs to save, we must align the stack here. subtraction = rsp_alignment & 0xf; } - for (int x = 0; x < 16; x++) - { - if (mask & (1 << (16 + x))) - subtraction += 16; - } + subtraction += 16 * fpr_count; size_t xmm_base_subtraction = subtraction; subtraction += needed_frame_size; subtraction += shadow; @@ -47,44 +39,35 @@ void XEmitter::ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t nee *xmm_offsetp = subtraction - xmm_base_subtraction; } -size_t XEmitter::ABI_PushRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size) +size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) { size_t shadow, subtraction, xmm_offset; ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset); - for (int r = 0; r < 16; r++) - { - if (mask & (1 << r)) - PUSH((X64Reg) r); - } + for (int r : mask & ABI_ALL_GPRS) + PUSH((X64Reg) r); if (subtraction) SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction)); - for (int x = 0; x < 16; x++) + for (int x : mask & ABI_ALL_FPRS) { - if (mask & (1 << (16 + x))) - { - MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg) x); - xmm_offset += 16; - } + MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg) (x - 16)); + xmm_offset += 16; } return shadow; } -void XEmitter::ABI_PopRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size) +void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) { size_t shadow, subtraction, xmm_offset; ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset); - for (int x = 0; x < 16; x++) + for (int x : mask & ABI_ALL_FPRS) { - if (mask & (1 << (16 + x))) - { - MOVAPD((X64Reg) x, MDisp(RSP, (int)xmm_offset)); - xmm_offset += 16; - } + MOVAPD((X64Reg) (x - 16), MDisp(RSP, (int)xmm_offset)); + xmm_offset += 16; } if (subtraction) @@ -92,10 +75,8 @@ void XEmitter::ABI_PopRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, si for (int r = 15; r >= 0; r--) { - if (mask & (1 << r)) - { + if (mask[r]) POP((X64Reg) r); - } } } diff --git a/Source/Core/Common/x64ABI.h b/Source/Core/Common/x64ABI.h index bf058bc04a..c76759b31c 100644 --- a/Source/Core/Common/x64ABI.h +++ b/Source/Core/Common/x64ABI.h @@ -4,6 +4,7 @@ #pragma once +#include "Common/BitSet.h" #include "Common/x64Emitter.h" // x64 ABI:s, and helpers to help follow them when JIT-ing code. @@ -23,6 +24,9 @@ // Callee-save: RBX RBP R12 R13 R14 R15 // Parameters: RDI RSI RDX RCX R8 R9 +#define ABI_ALL_FPRS BitSet32(0xffff0000) +#define ABI_ALL_GPRS BitSet32(0x0000ffff) + #ifdef _WIN32 // 64-bit Windows - the really exotic calling convention #define ABI_PARAM1 RCX @@ -31,11 +35,9 @@ #define ABI_PARAM4 R9 // xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers. -#define ABI_ALL_CALLER_SAVED ((1 << RAX) | (1 << RCX) | (1 << RDX) | (1 << R8) | \ - (1 << R9) | (1 << R10) | (1 << R11) | \ - (1 << (XMM0+16)) | (1 << (XMM1+16)) | (1 << (XMM2+16)) | (1 << (XMM3+16)) | \ - (1 << (XMM4+16)) | (1 << (XMM5+16))) - +#define ABI_ALL_CALLER_SAVED \ + (BitSet32 { RAX, RCX, RDX, R8, R9, R10, R11, \ + XMM0+16, XMM1+16, XMM2+16, XMM3+16, XMM4+16, XMM5+16 }) #else //64-bit Unix / OS X #define ABI_PARAM1 RDI @@ -47,13 +49,12 @@ // FIXME: avoid pushing all 16 XMM registers when possible? most functions we call probably // don't actually clobber them. -#define ABI_ALL_CALLER_SAVED ((1 << RAX) | (1 << RCX) | (1 << RDX) | (1 << RDI) | \ - (1 << RSI) | (1 << R8) | (1 << R9) | (1 << R10) | (1 << R11) | \ - 0xffff0000 /* xmm0..15 */) - +#define ABI_ALL_CALLER_SAVED \ + (BitSet32 { RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11 } | \ + ABI_ALL_FPRS) #endif // WIN32 -#define ABI_ALL_CALLEE_SAVED ((u32) ~ABI_ALL_CALLER_SAVED) +#define ABI_ALL_CALLEE_SAVED (~ABI_ALL_CALLER_SAVED) #define ABI_RETURN RAX diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index ed0250e8d0..7f98497456 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -10,6 +10,7 @@ #include #include +#include "Common/BitSet.h" #include "Common/CodeBlock.h" #include "Common/CommonTypes.h" @@ -302,7 +303,7 @@ private: void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg); void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2); - void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp); + void ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp); protected: inline void Write8(u8 value) {*code++ = value;} @@ -883,8 +884,8 @@ public: // Saves/restores the registers and adjusts the stack to be aligned as // required by the ABI, where the previous alignment was as specified. // Push returns the size of the shadow space, i.e. the offset of the frame. - size_t ABI_PushRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size = 0); - void ABI_PopRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size = 0); + size_t ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size = 0); + void ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size = 0); inline int ABI_GetNumXMMRegs() { return 16; } diff --git a/Source/Core/Core/DSP/DSPEmitter.cpp b/Source/Core/Core/DSP/DSPEmitter.cpp index 188dfcaf2c..fec47aea93 100644 --- a/Source/Core/Core/DSP/DSPEmitter.cpp +++ b/Source/Core/Core/DSP/DSPEmitter.cpp @@ -385,7 +385,7 @@ void DSPEmitter::CompileDispatcher() { enterDispatcher = AlignCode16(); // We don't use floating point (high 16 bits). - u32 registers_used = ABI_ALL_CALLEE_SAVED & 0xffff; + BitSet32 registers_used = ABI_ALL_CALLEE_SAVED & BitSet32(0xffff); ABI_PushRegistersAndAdjustStack(registers_used, 8); const u8 *dispatcherLoop = GetCodePtr(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 99633dc57d..06a27a5c2d 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -248,9 +248,9 @@ void Jit64::WriteCallInterpreter(UGeckoInstruction inst) MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4)); } Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst); - ABI_PushRegistersAndAdjustStack(0, 0); + ABI_PushRegistersAndAdjustStack({}, 0); ABI_CallFunctionC((void*)instr, inst.hex); - ABI_PopRegistersAndAdjustStack(0, 0); + ABI_PopRegistersAndAdjustStack({}, 0); } void Jit64::unknown_instruction(UGeckoInstruction inst) @@ -267,9 +267,9 @@ void Jit64::HLEFunction(UGeckoInstruction _inst) { gpr.Flush(); fpr.Flush(); - ABI_PushRegistersAndAdjustStack(0, 0); + ABI_PushRegistersAndAdjustStack({}, 0); ABI_CallFunctionCC((void*)&HLE::Execute, js.compilerPC, _inst.hex); - ABI_PopRegistersAndAdjustStack(0, 0); + ABI_PopRegistersAndAdjustStack({}, 0); } void Jit64::DoNothing(UGeckoInstruction _inst) @@ -307,18 +307,18 @@ bool Jit64::Cleanup() if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0) { - ABI_PushRegistersAndAdjustStack(0, 0); + ABI_PushRegistersAndAdjustStack({}, 0); ABI_CallFunction((void *)&GPFifo::CheckGatherPipe); - ABI_PopRegistersAndAdjustStack(0, 0); + ABI_PopRegistersAndAdjustStack({}, 0); did_something = true; } // SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time. if (MMCR0.Hex || MMCR1.Hex) { - ABI_PushRegistersAndAdjustStack(0, 0); + ABI_PushRegistersAndAdjustStack({}, 0); ABI_CallFunctionCCC((void *)&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, jit->js.numLoadStoreInst, jit->js.numFloatingPointInst); - ABI_PopRegistersAndAdjustStack(0, 0); + ABI_PopRegistersAndAdjustStack({}, 0); did_something = true; } @@ -433,9 +433,9 @@ void Jit64::WriteRfiExitDestInRSCRATCH() MOV(32, PPCSTATE(pc), R(RSCRATCH)); MOV(32, PPCSTATE(npc), R(RSCRATCH)); Cleanup(); - ABI_PushRegistersAndAdjustStack(0, 0); + ABI_PushRegistersAndAdjustStack({}, 0); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); - ABI_PopRegistersAndAdjustStack(0, 0); + ABI_PopRegistersAndAdjustStack({}, 0); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } @@ -445,9 +445,9 @@ void Jit64::WriteExceptionExit() Cleanup(); MOV(32, R(RSCRATCH), PPCSTATE(pc)); MOV(32, PPCSTATE(npc), R(RSCRATCH)); - ABI_PushRegistersAndAdjustStack(0, 0); + ABI_PushRegistersAndAdjustStack({}, 0); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); - ABI_PopRegistersAndAdjustStack(0, 0); + ABI_PopRegistersAndAdjustStack({}, 0); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } @@ -457,9 +457,9 @@ void Jit64::WriteExternalExceptionExit() Cleanup(); MOV(32, R(RSCRATCH), PPCSTATE(pc)); MOV(32, PPCSTATE(npc), R(RSCRATCH)); - ABI_PushRegistersAndAdjustStack(0, 0); + ABI_PushRegistersAndAdjustStack({}, 0); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExternalExceptions)); - ABI_PopRegistersAndAdjustStack(0, 0); + ABI_PopRegistersAndAdjustStack({}, 0); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } @@ -560,9 +560,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc if (ImHereDebug) { - ABI_PushRegistersAndAdjustStack(0, 0); + ABI_PushRegistersAndAdjustStack({}, 0); ABI_CallFunction((void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful - ABI_PopRegistersAndAdjustStack(0, 0); + ABI_PopRegistersAndAdjustStack({}, 0); } // Conditionally add profiling code. @@ -637,7 +637,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc { js.fifoBytesThisBlock -= 32; MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write - u32 registersInUse = CallerSavedRegistersInUse(); + BitSet32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, 0); ABI_CallFunction((void *)&GPFifo::CheckGatherPipe); ABI_PopRegistersAndAdjustStack(registersInUse, 0); @@ -719,9 +719,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc fpr.Flush(); MOV(32, PPCSTATE(pc), Imm32(ops[i].address)); - ABI_PushRegistersAndAdjustStack(0, 0); + ABI_PushRegistersAndAdjustStack({}, 0); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckBreakPoints)); - ABI_PopRegistersAndAdjustStack(0, 0); + ABI_PopRegistersAndAdjustStack({}, 0); TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); FixupBranch noBreakpoint = J_CC(CC_Z); @@ -843,15 +843,15 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc return normalEntry; } -u32 Jit64::CallerSavedRegistersInUse() +BitSet32 Jit64::CallerSavedRegistersInUse() { - u32 result = 0; + BitSet32 result; for (int i = 0; i < NUMXREGS; i++) { if (!gpr.IsFreeX(i)) - result |= (1 << i); + result[i] = true; if (!fpr.IsFreeX(i)) - result |= (1 << (16 + i)); + result[16 + i] = true; } return result & ABI_ALL_CALLER_SAVED; } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 4dfbe56eb8..bb49e9288d 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -73,7 +73,7 @@ public: void Jit(u32 em_address) override; const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b); - u32 CallerSavedRegistersInUse(); + BitSet32 CallerSavedRegistersInUse(); JitBlockCache *GetBlockCache() override { return &blocks; } diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index d895ae76db..7adb93d3b1 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -43,9 +43,9 @@ void Jit64AsmRoutineManager::Generate() MOV(64, R(RPPCSTATE), Imm64((u64)&PowerPC::ppcState + 0x80)); const u8* outerLoop = GetCodePtr(); - ABI_PushRegistersAndAdjustStack(0, 0); + ABI_PushRegistersAndAdjustStack({}, 0); ABI_CallFunction(reinterpret_cast(&CoreTiming::Advance)); - ABI_PopRegistersAndAdjustStack(0, 0); + ABI_PopRegistersAndAdjustStack({}, 0); FixupBranch skipToRealDispatch = J(SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging); //skip the sync and compare first time dispatcherMispredictedBLR = GetCodePtr(); @@ -71,9 +71,9 @@ void Jit64AsmRoutineManager::Generate() { TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(PowerPC::CPU_STEPPING)); FixupBranch notStepping = J_CC(CC_Z); - ABI_PushRegistersAndAdjustStack(0, 0); + ABI_PushRegistersAndAdjustStack({}, 0); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckBreakPoints)); - ABI_PopRegistersAndAdjustStack(0, 0); + ABI_PopRegistersAndAdjustStack({}, 0); TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); dbg_exit = J_CC(CC_NZ, true); SetJumpTarget(notStepping); @@ -129,9 +129,9 @@ void Jit64AsmRoutineManager::Generate() SetJumpTarget(notfound); //Ok, no block, let's jit - ABI_PushRegistersAndAdjustStack(0, 0); + ABI_PushRegistersAndAdjustStack({}, 0); ABI_CallFunctionA((void *)&Jit, PPCSTATE(pc)); - ABI_PopRegistersAndAdjustStack(0, 0); + ABI_PopRegistersAndAdjustStack({}, 0); // Jit might have cleared the code cache ResetStack(); @@ -146,9 +146,9 @@ void Jit64AsmRoutineManager::Generate() FixupBranch noExtException = J_CC(CC_Z); MOV(32, R(RSCRATCH), PPCSTATE(pc)); MOV(32, PPCSTATE(npc), R(RSCRATCH)); - ABI_PushRegistersAndAdjustStack(0, 0); + ABI_PushRegistersAndAdjustStack({}, 0); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExternalExceptions)); - ABI_PopRegistersAndAdjustStack(0, 0); + ABI_PopRegistersAndAdjustStack({}, 0); SetJumpTarget(noExtException); TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index a2abade26b..cb4c6521a3 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -130,7 +130,7 @@ void Jit64::lXXx(UGeckoInstruction inst) TEST(32, gpr.R(d), gpr.R(d)); FixupBranch noIdle = J_CC(CC_NZ); - u32 registersInUse = CallerSavedRegistersInUse(); + BitSet32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, 0); ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16); @@ -242,11 +242,11 @@ void Jit64::lXXx(UGeckoInstruction inst) gpr.Lock(a, b, d); gpr.BindToRegister(d, js.memcheck, true); - u32 registersInUse = CallerSavedRegistersInUse(); + BitSet32 registersInUse = CallerSavedRegistersInUse(); if (update && storeAddress) { // We need to save the (usually scratch) address register for the update. - registersInUse |= (1 << RSCRATCH2); + registersInUse[RSCRATCH2] = true; } SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend); @@ -310,7 +310,7 @@ void Jit64::dcbz(UGeckoInstruction inst) SwitchToFarCode(); SetJumpTarget(slow); MOV(32, M(&PC), Imm32(jit->js.compilerPC)); - u32 registersInUse = CallerSavedRegistersInUse(); + BitSet32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, 0); ABI_CallFunctionR((void *)&Memory::ClearCacheLine, RSCRATCH); ABI_PopRegistersAndAdjustStack(registersInUse, 0); @@ -399,7 +399,7 @@ void Jit64::stX(UGeckoInstruction inst) // Helps external systems know which instruction triggered the write MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); - u32 registersInUse = CallerSavedRegistersInUse(); + BitSet32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, 0); switch (accessSize) { @@ -551,7 +551,7 @@ void Jit64::lmw(UGeckoInstruction inst) ADD(32, R(RSCRATCH2), gpr.R(inst.RA)); for (int i = inst.RD; i < 32; i++) { - SafeLoadToReg(RSCRATCH, R(RSCRATCH2), 32, (i - inst.RD) * 4, CallerSavedRegistersInUse() | (1 << RSCRATCH_EXTRA), false); + SafeLoadToReg(RSCRATCH, R(RSCRATCH2), 32, (i - inst.RD) * 4, CallerSavedRegistersInUse() | BitSet32 { RSCRATCH_EXTRA }, false); gpr.BindToRegister(i, false, true); MOV(32, gpr.R(i), R(RSCRATCH)); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index de5097ceb2..2b158b5948 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -65,9 +65,9 @@ void Jit64::lfXXX(UGeckoInstruction inst) offset = (s16)inst.SIMM_16; } - u32 registersInUse = CallerSavedRegistersInUse(); + BitSet32 registersInUse = CallerSavedRegistersInUse(); if (update && js.memcheck) - registersInUse |= (1 << RSCRATCH2); + registersInUse[RSCRATCH2] = true; SafeLoadToReg(RSCRATCH, addr, single ? 32 : 64, offset, registersInUse, false); fpr.Lock(d); fpr.BindToRegister(d, js.memcheck || !single); diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index 88c96c327d..854dd0a50a 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -26,6 +26,7 @@ The register allocation is linear scan allocation. #include +#include "Common/BitSet.h" #include "Common/CPUDetect.h" #include "Common/MathUtil.h" #include "Core/HW/ProcessorInterface.h" @@ -60,15 +61,15 @@ struct RegInfo RegInfo(RegInfo&); // DO NOT IMPLEMENT }; -static u32 regsInUse(RegInfo& R) +static BitSet32 regsInUse(RegInfo& R) { - u32 result = 0; + BitSet32 result; for (unsigned i = 0; i < MAX_NUMBER_OF_REGS; i++) { if (R.regs[i] != nullptr) - result |= (1 << i); + result[i] = true; if (R.fregs[i] != nullptr) - result |= (1 << (16 + i)); + result[16 + i] = true; } return result; } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp index a4036ab172..c47198865e 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp @@ -10,14 +10,11 @@ #include "Core/PowerPC/JitCommon/JitBase.h" #define QUANTIZED_REGS_TO_SAVE \ - (ABI_ALL_CALLER_SAVED & ~(\ - (1 << RSCRATCH) | \ - (1 << RSCRATCH2) | \ - (1 << RSCRATCH_EXTRA)| \ - (1 << (XMM0+16)) | \ - (1 << (XMM1+16)))) + (ABI_ALL_CALLER_SAVED & ~BitSet32 { \ + RSCRATCH, RSCRATCH2, RSCRATCH_EXTRA, XMM0+16, XMM1+16 \ + }) -#define QUANTIZED_REGS_TO_SAVE_LOAD (QUANTIZED_REGS_TO_SAVE | (1 << RSCRATCH2)) +#define QUANTIZED_REGS_TO_SAVE_LOAD (QUANTIZED_REGS_TO_SAVE | BitSet32 { RSCRATCH2 }) using namespace Gen; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp index ea921817b8..64fd24ba73 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp @@ -72,7 +72,7 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx) return false; } - u32 registersInUse = it->second; + BitSet32 registersInUse = it->second; if (!info.isMemoryWrite) { @@ -98,14 +98,14 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx) else { // TODO: special case FIFO writes. Also, support 32-bit mode. - it = pcAtLoc.find(codePtr); - if (it == pcAtLoc.end()) + auto it2 = pcAtLoc.find(codePtr); + if (it2 == pcAtLoc.end()) { PanicAlert("BackPatch: no pc entry for address %p", codePtr); return nullptr; } - u32 pc = it->second; + u32 pc = it2->second; u8 *start; if (info.byteSwap || info.hasImmediate) diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index 0e9853948a..c2dc1475a8 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -137,7 +137,7 @@ template class MMIOReadCodeGenerator : public MMIO::ReadHandlingMethodVisitor { public: - MMIOReadCodeGenerator(Gen::X64CodeBlock* code, u32 registers_in_use, + MMIOReadCodeGenerator(Gen::X64CodeBlock* code, BitSet32 registers_in_use, Gen::X64Reg dst_reg, u32 address, bool sign_extend) : m_code(code), m_registers_in_use(registers_in_use), m_dst_reg(dst_reg), m_address(address), m_sign_extend(sign_extend) @@ -214,14 +214,14 @@ private: } Gen::X64CodeBlock* m_code; - u32 m_registers_in_use; + BitSet32 m_registers_in_use; Gen::X64Reg m_dst_reg; u32 m_address; bool m_sign_extend; }; void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value, - u32 registers_in_use, u32 address, + BitSet32 registers_in_use, u32 address, int access_size, bool sign_extend) { switch (access_size) @@ -250,17 +250,17 @@ void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value, } } -FixupBranch EmuCodeBlock::CheckIfSafeAddress(OpArg reg_value, X64Reg reg_addr, u32 registers_in_use, u32 mem_mask) +FixupBranch EmuCodeBlock::CheckIfSafeAddress(OpArg reg_value, X64Reg reg_addr, BitSet32 registers_in_use, u32 mem_mask) { - registers_in_use |= (1 << reg_addr); + registers_in_use[reg_addr] = true; if (reg_value.IsSimpleReg()) - registers_in_use |= (1 << reg_value.GetSimpleReg()); + registers_in_use[reg_value.GetSimpleReg()] = true; // Get ourselves a free register; try to pick one that doesn't involve pushing, if we can. X64Reg scratch = RSCRATCH; - if (!(registers_in_use & (1 << RSCRATCH))) + if (!registers_in_use[RSCRATCH]) scratch = RSCRATCH; - else if (!(registers_in_use & (1 << RSCRATCH_EXTRA))) + else if (!registers_in_use[RSCRATCH_EXTRA]) scratch = RSCRATCH_EXTRA; else scratch = reg_addr; @@ -290,11 +290,11 @@ FixupBranch EmuCodeBlock::CheckIfSafeAddress(OpArg reg_value, X64Reg reg_addr, u } } -void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags) +void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, BitSet32 registersInUse, bool signExtend, int flags) { if (!jit->js.memcheck) { - registersInUse &= ~(1 << reg_value); + registersInUse[reg_value] = false; } if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU && SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem && @@ -468,7 +468,7 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acce return result; } -void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags) +void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset, BitSet32 registersInUse, int flags) { // set the correct immediate format if (reg_value.IsImm()) @@ -580,7 +580,7 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces } // Destroys the same as SafeWrite plus RSCRATCH. TODO: see if we can avoid temporaries here -void EmuCodeBlock::SafeWriteF32ToReg(X64Reg xmm_value, X64Reg reg_addr, s32 offset, u32 registersInUse, int flags) +void EmuCodeBlock::SafeWriteF32ToReg(X64Reg xmm_value, X64Reg reg_addr, s32 offset, BitSet32 registersInUse, int flags) { // TODO: PSHUFB might be faster if fastmem supported MOVSS. MOVD_xmm(R(RSCRATCH), xmm_value); diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h index 43b54debd9..68f3ced898 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h @@ -6,6 +6,7 @@ #include +#include "Common/BitSet.h" #include "Common/CPUDetect.h" #include "Common/x64Emitter.h" @@ -76,7 +77,7 @@ public: void LoadAndSwap(int size, Gen::X64Reg dst, const Gen::OpArg& src); void SwapAndStore(int size, const Gen::OpArg& dst, Gen::X64Reg src); - Gen::FixupBranch CheckIfSafeAddress(Gen::OpArg reg_value, Gen::X64Reg reg_addr, u32 registers_in_use, u32 mem_mask); + Gen::FixupBranch CheckIfSafeAddress(Gen::OpArg reg_value, Gen::X64Reg reg_addr, BitSet32 registers_in_use, u32 mem_mask); void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false); void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset, bool signExtend = false); // these return the address of the MOV, for backpatching @@ -89,7 +90,7 @@ public: // Generate a load/write from the MMIO handler for a given address. Only // call for known addresses in MMIO range (MMIO::IsMMIOAddress). - void MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value, u32 registers_in_use, u32 address, int access_size, bool sign_extend); + void MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value, BitSet32 registers_in_use, u32 address, int access_size, bool sign_extend); enum SafeLoadStoreFlags { @@ -99,12 +100,12 @@ public: SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8 }; - void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags = 0); + void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, BitSet32 registersInUse, bool signExtend, int flags = 0); // Clobbers RSCRATCH or reg_addr depending on the relevant flag. Preserves // reg_value if the load fails and js.memcheck is enabled. // Works with immediate inputs and simple registers only. - void SafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags = 0); - void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags = 0) + void SafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, BitSet32 registersInUse, int flags = 0); + void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, BitSet32 registersInUse, int flags = 0) { SafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, registersInUse, flags); } @@ -115,7 +116,7 @@ public: return swap && !cpu_info.bMOVBE && accessSize > 8; } - void SafeWriteF32ToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, s32 offset, u32 registersInUse, int flags = 0); + void SafeWriteF32ToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, s32 offset, BitSet32 registersInUse, int flags = 0); void WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap = false); void JitGetAndClearCAOV(bool oe); @@ -137,6 +138,6 @@ public: void ConvertDoubleToSingle(Gen::X64Reg dst, Gen::X64Reg src); void SetFPRF(Gen::X64Reg xmm); protected: - std::unordered_map registersInUseAtLoc; + std::unordered_map registersInUseAtLoc; std::unordered_map pcAtLoc; }; diff --git a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp index 7b23ac1427..63fbd20fdc 100644 --- a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp @@ -36,7 +36,7 @@ void TrampolineCache::Shutdown() cachedTrampolines.clear(); } -const u8* TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 registersInUse) +const u8* TrampolineCache::GetReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse) { TrampolineCacheKey key = { registersInUse, 0, info }; @@ -49,7 +49,7 @@ const u8* TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re return trampoline; } -const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, u32 registersInUse) +const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse) { if (GetSpaceLeft() < 1024) PanicAlert("Trampoline cache full"); @@ -97,7 +97,7 @@ const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, u return trampoline; } -const u8* TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc) +const u8* TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u32 pc) { TrampolineCacheKey key = { registersInUse, pc, info }; @@ -110,7 +110,7 @@ const u8* TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r return trampoline; } -const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc) +const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u32 pc) { if (GetSpaceLeft() < 1024) PanicAlert("Trampoline cache full"); @@ -184,7 +184,7 @@ const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info, size_t TrampolineCacheKeyHasher::operator()(const TrampolineCacheKey& k) const { - size_t res = std::hash()(k.registersInUse); + size_t res = std::hash()(k.registersInUse.m_val); res ^= std::hash()(k.info.operandSize) >> 1; res ^= std::hash()(k.info.regOperandReg) >> 2; res ^= std::hash()(k.info.scaledReg) >> 3; diff --git a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h index cb9fee2978..16e293bce0 100644 --- a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h @@ -6,6 +6,7 @@ #include +#include "Common/BitSet.h" #include "Common/CommonTypes.h" #include "Common/x64Analyzer.h" #include "Common/x64Emitter.h" @@ -15,7 +16,7 @@ const int BACKPATCH_SIZE = 5; struct TrampolineCacheKey { - u32 registersInUse; + BitSet32 registersInUse; u32 pc; InstructionInfo info; @@ -33,13 +34,13 @@ public: void Init(); void Shutdown(); - const u8* GetReadTrampoline(const InstructionInfo &info, u32 registersInUse); - const u8* GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc); + const u8* GetReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse); + const u8* GetWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u32 pc); void ClearCodeSpace(); private: - const u8* GenerateReadTrampoline(const InstructionInfo &info, u32 registersInUse); - const u8* GenerateWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc); + const u8* GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse); + const u8* GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u32 pc); std::unordered_map cachedTrampolines; }; diff --git a/Source/Core/Core/PowerPC/Profiler.h b/Source/Core/Core/PowerPC/Profiler.h index 83c323ad9d..7b51589225 100644 --- a/Source/Core/Core/PowerPC/Profiler.h +++ b/Source/Core/Core/PowerPC/Profiler.h @@ -23,7 +23,7 @@ MOV(64, M(pdt), R(RSCRATCH)); #define PROFILER_VPUSH \ - u32 registersInUse = CallerSavedRegistersInUse(); \ + BitSet32 registersInUse = CallerSavedRegistersInUse(); \ ABI_PushRegistersAndAdjustStack(registersInUse, 0); #define PROFILER_VPOP \ diff --git a/Source/Core/VideoCommon/VertexLoader.cpp b/Source/Core/VideoCommon/VertexLoader.cpp index 291bb94c62..e4974a8b2b 100644 --- a/Source/Core/VideoCommon/VertexLoader.cpp +++ b/Source/Core/VideoCommon/VertexLoader.cpp @@ -141,7 +141,7 @@ void VertexLoader::CompileVertexTranslator() m_compiledCode = GetCodePtr(); // We only use RAX (caller saved) and RBX (callee saved). - ABI_PushRegistersAndAdjustStack(1 << RBX, 8); + ABI_PushRegistersAndAdjustStack({RBX}, 8); // save count MOV(64, R(RBX), R(ABI_PARAM1)); @@ -402,7 +402,7 @@ void VertexLoader::CompileVertexTranslator() SUB(64, R(RBX), Imm8(1)); J_CC(CC_NZ, loop_start); - ABI_PopRegistersAndAdjustStack(1 << RBX, 8); + ABI_PopRegistersAndAdjustStack({RBX}, 8); RET(); #endif }