Convert registersInUse to BitSet.
This commit is contained in:
parent
b6a7438053
commit
eb7f4dac50
|
@ -10,31 +10,23 @@ using namespace Gen;
|
|||
|
||||
// Shared code between Win64 and Unix64
|
||||
|
||||
void XEmitter::ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp)
|
||||
void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp)
|
||||
{
|
||||
size_t shadow = 0;
|
||||
#if defined(_WIN32)
|
||||
shadow = 0x20;
|
||||
#endif
|
||||
|
||||
int count = 0;
|
||||
for (int r = 0; r < 16; r++)
|
||||
{
|
||||
if (mask & (1 << r))
|
||||
count++;
|
||||
}
|
||||
int count = (mask & ABI_ALL_GPRS).Count();
|
||||
rsp_alignment -= count * 8;
|
||||
size_t subtraction = 0;
|
||||
if (mask & 0xffff0000)
|
||||
int fpr_count = (mask & ABI_ALL_FPRS).Count();
|
||||
if (fpr_count)
|
||||
{
|
||||
// If we have any XMMs to save, we must align the stack here.
|
||||
subtraction = rsp_alignment & 0xf;
|
||||
}
|
||||
for (int x = 0; x < 16; x++)
|
||||
{
|
||||
if (mask & (1 << (16 + x)))
|
||||
subtraction += 16;
|
||||
}
|
||||
subtraction += 16 * fpr_count;
|
||||
size_t xmm_base_subtraction = subtraction;
|
||||
subtraction += needed_frame_size;
|
||||
subtraction += shadow;
|
||||
|
@ -47,44 +39,35 @@ void XEmitter::ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t nee
|
|||
*xmm_offsetp = subtraction - xmm_base_subtraction;
|
||||
}
|
||||
|
||||
size_t XEmitter::ABI_PushRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size)
|
||||
size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size)
|
||||
{
|
||||
size_t shadow, subtraction, xmm_offset;
|
||||
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset);
|
||||
|
||||
for (int r = 0; r < 16; r++)
|
||||
{
|
||||
if (mask & (1 << r))
|
||||
PUSH((X64Reg) r);
|
||||
}
|
||||
for (int r : mask & ABI_ALL_GPRS)
|
||||
PUSH((X64Reg) r);
|
||||
|
||||
if (subtraction)
|
||||
SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
|
||||
|
||||
for (int x = 0; x < 16; x++)
|
||||
for (int x : mask & ABI_ALL_FPRS)
|
||||
{
|
||||
if (mask & (1 << (16 + x)))
|
||||
{
|
||||
MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg) x);
|
||||
xmm_offset += 16;
|
||||
}
|
||||
MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg) (x - 16));
|
||||
xmm_offset += 16;
|
||||
}
|
||||
|
||||
return shadow;
|
||||
}
|
||||
|
||||
void XEmitter::ABI_PopRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size)
|
||||
void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size)
|
||||
{
|
||||
size_t shadow, subtraction, xmm_offset;
|
||||
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset);
|
||||
|
||||
for (int x = 0; x < 16; x++)
|
||||
for (int x : mask & ABI_ALL_FPRS)
|
||||
{
|
||||
if (mask & (1 << (16 + x)))
|
||||
{
|
||||
MOVAPD((X64Reg) x, MDisp(RSP, (int)xmm_offset));
|
||||
xmm_offset += 16;
|
||||
}
|
||||
MOVAPD((X64Reg) (x - 16), MDisp(RSP, (int)xmm_offset));
|
||||
xmm_offset += 16;
|
||||
}
|
||||
|
||||
if (subtraction)
|
||||
|
@ -92,10 +75,8 @@ void XEmitter::ABI_PopRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, si
|
|||
|
||||
for (int r = 15; r >= 0; r--)
|
||||
{
|
||||
if (mask & (1 << r))
|
||||
{
|
||||
if (mask[r])
|
||||
POP((X64Reg) r);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "Common/BitSet.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
|
||||
// x64 ABI:s, and helpers to help follow them when JIT-ing code.
|
||||
|
@ -23,6 +24,9 @@
|
|||
// Callee-save: RBX RBP R12 R13 R14 R15
|
||||
// Parameters: RDI RSI RDX RCX R8 R9
|
||||
|
||||
#define ABI_ALL_FPRS BitSet32(0xffff0000)
|
||||
#define ABI_ALL_GPRS BitSet32(0x0000ffff)
|
||||
|
||||
#ifdef _WIN32 // 64-bit Windows - the really exotic calling convention
|
||||
|
||||
#define ABI_PARAM1 RCX
|
||||
|
@ -31,11 +35,9 @@
|
|||
#define ABI_PARAM4 R9
|
||||
|
||||
// xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers.
|
||||
#define ABI_ALL_CALLER_SAVED ((1 << RAX) | (1 << RCX) | (1 << RDX) | (1 << R8) | \
|
||||
(1 << R9) | (1 << R10) | (1 << R11) | \
|
||||
(1 << (XMM0+16)) | (1 << (XMM1+16)) | (1 << (XMM2+16)) | (1 << (XMM3+16)) | \
|
||||
(1 << (XMM4+16)) | (1 << (XMM5+16)))
|
||||
|
||||
#define ABI_ALL_CALLER_SAVED \
|
||||
(BitSet32 { RAX, RCX, RDX, R8, R9, R10, R11, \
|
||||
XMM0+16, XMM1+16, XMM2+16, XMM3+16, XMM4+16, XMM5+16 })
|
||||
#else //64-bit Unix / OS X
|
||||
|
||||
#define ABI_PARAM1 RDI
|
||||
|
@ -47,13 +49,12 @@
|
|||
|
||||
// FIXME: avoid pushing all 16 XMM registers when possible? most functions we call probably
|
||||
// don't actually clobber them.
|
||||
#define ABI_ALL_CALLER_SAVED ((1 << RAX) | (1 << RCX) | (1 << RDX) | (1 << RDI) | \
|
||||
(1 << RSI) | (1 << R8) | (1 << R9) | (1 << R10) | (1 << R11) | \
|
||||
0xffff0000 /* xmm0..15 */)
|
||||
|
||||
#define ABI_ALL_CALLER_SAVED \
|
||||
(BitSet32 { RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11 } | \
|
||||
ABI_ALL_FPRS)
|
||||
#endif // WIN32
|
||||
|
||||
#define ABI_ALL_CALLEE_SAVED ((u32) ~ABI_ALL_CALLER_SAVED)
|
||||
#define ABI_ALL_CALLEE_SAVED (~ABI_ALL_CALLER_SAVED)
|
||||
|
||||
#define ABI_RETURN RAX
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include <cstring>
|
||||
#include <functional>
|
||||
|
||||
#include "Common/BitSet.h"
|
||||
#include "Common/CodeBlock.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
|
||||
|
@ -302,7 +303,7 @@ private:
|
|||
void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg);
|
||||
void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2);
|
||||
|
||||
void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
|
||||
void ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
|
||||
|
||||
protected:
|
||||
inline void Write8(u8 value) {*code++ = value;}
|
||||
|
@ -883,8 +884,8 @@ public:
|
|||
// Saves/restores the registers and adjusts the stack to be aligned as
|
||||
// required by the ABI, where the previous alignment was as specified.
|
||||
// Push returns the size of the shadow space, i.e. the offset of the frame.
|
||||
size_t ABI_PushRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size = 0);
|
||||
void ABI_PopRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size = 0);
|
||||
size_t ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size = 0);
|
||||
void ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size = 0);
|
||||
|
||||
inline int ABI_GetNumXMMRegs() { return 16; }
|
||||
|
||||
|
|
|
@ -385,7 +385,7 @@ void DSPEmitter::CompileDispatcher()
|
|||
{
|
||||
enterDispatcher = AlignCode16();
|
||||
// We don't use floating point (high 16 bits).
|
||||
u32 registers_used = ABI_ALL_CALLEE_SAVED & 0xffff;
|
||||
BitSet32 registers_used = ABI_ALL_CALLEE_SAVED & BitSet32(0xffff);
|
||||
ABI_PushRegistersAndAdjustStack(registers_used, 8);
|
||||
|
||||
const u8 *dispatcherLoop = GetCodePtr();
|
||||
|
|
|
@ -248,9 +248,9 @@ void Jit64::WriteCallInterpreter(UGeckoInstruction inst)
|
|||
MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4));
|
||||
}
|
||||
Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst);
|
||||
ABI_PushRegistersAndAdjustStack(0, 0);
|
||||
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||
ABI_CallFunctionC((void*)instr, inst.hex);
|
||||
ABI_PopRegistersAndAdjustStack(0, 0);
|
||||
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||
}
|
||||
|
||||
void Jit64::unknown_instruction(UGeckoInstruction inst)
|
||||
|
@ -267,9 +267,9 @@ void Jit64::HLEFunction(UGeckoInstruction _inst)
|
|||
{
|
||||
gpr.Flush();
|
||||
fpr.Flush();
|
||||
ABI_PushRegistersAndAdjustStack(0, 0);
|
||||
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||
ABI_CallFunctionCC((void*)&HLE::Execute, js.compilerPC, _inst.hex);
|
||||
ABI_PopRegistersAndAdjustStack(0, 0);
|
||||
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||
}
|
||||
|
||||
void Jit64::DoNothing(UGeckoInstruction _inst)
|
||||
|
@ -307,18 +307,18 @@ bool Jit64::Cleanup()
|
|||
|
||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
|
||||
{
|
||||
ABI_PushRegistersAndAdjustStack(0, 0);
|
||||
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||
ABI_CallFunction((void *)&GPFifo::CheckGatherPipe);
|
||||
ABI_PopRegistersAndAdjustStack(0, 0);
|
||||
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||
did_something = true;
|
||||
}
|
||||
|
||||
// SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time.
|
||||
if (MMCR0.Hex || MMCR1.Hex)
|
||||
{
|
||||
ABI_PushRegistersAndAdjustStack(0, 0);
|
||||
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||
ABI_CallFunctionCCC((void *)&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, jit->js.numLoadStoreInst, jit->js.numFloatingPointInst);
|
||||
ABI_PopRegistersAndAdjustStack(0, 0);
|
||||
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||
did_something = true;
|
||||
}
|
||||
|
||||
|
@ -433,9 +433,9 @@ void Jit64::WriteRfiExitDestInRSCRATCH()
|
|||
MOV(32, PPCSTATE(pc), R(RSCRATCH));
|
||||
MOV(32, PPCSTATE(npc), R(RSCRATCH));
|
||||
Cleanup();
|
||||
ABI_PushRegistersAndAdjustStack(0, 0);
|
||||
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
|
||||
ABI_PopRegistersAndAdjustStack(0, 0);
|
||||
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
|
||||
JMP(asm_routines.dispatcher, true);
|
||||
}
|
||||
|
@ -445,9 +445,9 @@ void Jit64::WriteExceptionExit()
|
|||
Cleanup();
|
||||
MOV(32, R(RSCRATCH), PPCSTATE(pc));
|
||||
MOV(32, PPCSTATE(npc), R(RSCRATCH));
|
||||
ABI_PushRegistersAndAdjustStack(0, 0);
|
||||
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
|
||||
ABI_PopRegistersAndAdjustStack(0, 0);
|
||||
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
|
||||
JMP(asm_routines.dispatcher, true);
|
||||
}
|
||||
|
@ -457,9 +457,9 @@ void Jit64::WriteExternalExceptionExit()
|
|||
Cleanup();
|
||||
MOV(32, R(RSCRATCH), PPCSTATE(pc));
|
||||
MOV(32, PPCSTATE(npc), R(RSCRATCH));
|
||||
ABI_PushRegistersAndAdjustStack(0, 0);
|
||||
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExternalExceptions));
|
||||
ABI_PopRegistersAndAdjustStack(0, 0);
|
||||
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
|
||||
JMP(asm_routines.dispatcher, true);
|
||||
}
|
||||
|
@ -560,9 +560,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
|||
|
||||
if (ImHereDebug)
|
||||
{
|
||||
ABI_PushRegistersAndAdjustStack(0, 0);
|
||||
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||
ABI_CallFunction((void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful
|
||||
ABI_PopRegistersAndAdjustStack(0, 0);
|
||||
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||
}
|
||||
|
||||
// Conditionally add profiling code.
|
||||
|
@ -637,7 +637,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
|||
{
|
||||
js.fifoBytesThisBlock -= 32;
|
||||
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
|
||||
u32 registersInUse = CallerSavedRegistersInUse();
|
||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||
ABI_CallFunction((void *)&GPFifo::CheckGatherPipe);
|
||||
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
||||
|
@ -719,9 +719,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
|||
fpr.Flush();
|
||||
|
||||
MOV(32, PPCSTATE(pc), Imm32(ops[i].address));
|
||||
ABI_PushRegistersAndAdjustStack(0, 0);
|
||||
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints));
|
||||
ABI_PopRegistersAndAdjustStack(0, 0);
|
||||
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
|
||||
FixupBranch noBreakpoint = J_CC(CC_Z);
|
||||
|
||||
|
@ -843,15 +843,15 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
|||
return normalEntry;
|
||||
}
|
||||
|
||||
u32 Jit64::CallerSavedRegistersInUse()
|
||||
BitSet32 Jit64::CallerSavedRegistersInUse()
|
||||
{
|
||||
u32 result = 0;
|
||||
BitSet32 result;
|
||||
for (int i = 0; i < NUMXREGS; i++)
|
||||
{
|
||||
if (!gpr.IsFreeX(i))
|
||||
result |= (1 << i);
|
||||
result[i] = true;
|
||||
if (!fpr.IsFreeX(i))
|
||||
result |= (1 << (16 + i));
|
||||
result[16 + i] = true;
|
||||
}
|
||||
return result & ABI_ALL_CALLER_SAVED;
|
||||
}
|
||||
|
|
|
@ -73,7 +73,7 @@ public:
|
|||
void Jit(u32 em_address) override;
|
||||
const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b);
|
||||
|
||||
u32 CallerSavedRegistersInUse();
|
||||
BitSet32 CallerSavedRegistersInUse();
|
||||
|
||||
JitBlockCache *GetBlockCache() override { return &blocks; }
|
||||
|
||||
|
|
|
@ -43,9 +43,9 @@ void Jit64AsmRoutineManager::Generate()
|
|||
MOV(64, R(RPPCSTATE), Imm64((u64)&PowerPC::ppcState + 0x80));
|
||||
|
||||
const u8* outerLoop = GetCodePtr();
|
||||
ABI_PushRegistersAndAdjustStack(0, 0);
|
||||
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&CoreTiming::Advance));
|
||||
ABI_PopRegistersAndAdjustStack(0, 0);
|
||||
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||
FixupBranch skipToRealDispatch = J(SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging); //skip the sync and compare first time
|
||||
dispatcherMispredictedBLR = GetCodePtr();
|
||||
|
||||
|
@ -71,9 +71,9 @@ void Jit64AsmRoutineManager::Generate()
|
|||
{
|
||||
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(PowerPC::CPU_STEPPING));
|
||||
FixupBranch notStepping = J_CC(CC_Z);
|
||||
ABI_PushRegistersAndAdjustStack(0, 0);
|
||||
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints));
|
||||
ABI_PopRegistersAndAdjustStack(0, 0);
|
||||
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
|
||||
dbg_exit = J_CC(CC_NZ, true);
|
||||
SetJumpTarget(notStepping);
|
||||
|
@ -129,9 +129,9 @@ void Jit64AsmRoutineManager::Generate()
|
|||
SetJumpTarget(notfound);
|
||||
|
||||
//Ok, no block, let's jit
|
||||
ABI_PushRegistersAndAdjustStack(0, 0);
|
||||
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||
ABI_CallFunctionA((void *)&Jit, PPCSTATE(pc));
|
||||
ABI_PopRegistersAndAdjustStack(0, 0);
|
||||
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||
|
||||
// Jit might have cleared the code cache
|
||||
ResetStack();
|
||||
|
@ -146,9 +146,9 @@ void Jit64AsmRoutineManager::Generate()
|
|||
FixupBranch noExtException = J_CC(CC_Z);
|
||||
MOV(32, R(RSCRATCH), PPCSTATE(pc));
|
||||
MOV(32, PPCSTATE(npc), R(RSCRATCH));
|
||||
ABI_PushRegistersAndAdjustStack(0, 0);
|
||||
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExternalExceptions));
|
||||
ABI_PopRegistersAndAdjustStack(0, 0);
|
||||
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||
SetJumpTarget(noExtException);
|
||||
|
||||
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
|
||||
|
|
|
@ -130,7 +130,7 @@ void Jit64::lXXx(UGeckoInstruction inst)
|
|||
TEST(32, gpr.R(d), gpr.R(d));
|
||||
FixupBranch noIdle = J_CC(CC_NZ);
|
||||
|
||||
u32 registersInUse = CallerSavedRegistersInUse();
|
||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||
|
||||
ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16);
|
||||
|
@ -242,11 +242,11 @@ void Jit64::lXXx(UGeckoInstruction inst)
|
|||
|
||||
gpr.Lock(a, b, d);
|
||||
gpr.BindToRegister(d, js.memcheck, true);
|
||||
u32 registersInUse = CallerSavedRegistersInUse();
|
||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||
if (update && storeAddress)
|
||||
{
|
||||
// We need to save the (usually scratch) address register for the update.
|
||||
registersInUse |= (1 << RSCRATCH2);
|
||||
registersInUse[RSCRATCH2] = true;
|
||||
}
|
||||
SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend);
|
||||
|
||||
|
@ -310,7 +310,7 @@ void Jit64::dcbz(UGeckoInstruction inst)
|
|||
SwitchToFarCode();
|
||||
SetJumpTarget(slow);
|
||||
MOV(32, M(&PC), Imm32(jit->js.compilerPC));
|
||||
u32 registersInUse = CallerSavedRegistersInUse();
|
||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||
ABI_CallFunctionR((void *)&Memory::ClearCacheLine, RSCRATCH);
|
||||
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
||||
|
@ -399,7 +399,7 @@ void Jit64::stX(UGeckoInstruction inst)
|
|||
// Helps external systems know which instruction triggered the write
|
||||
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
|
||||
|
||||
u32 registersInUse = CallerSavedRegistersInUse();
|
||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||
switch (accessSize)
|
||||
{
|
||||
|
@ -551,7 +551,7 @@ void Jit64::lmw(UGeckoInstruction inst)
|
|||
ADD(32, R(RSCRATCH2), gpr.R(inst.RA));
|
||||
for (int i = inst.RD; i < 32; i++)
|
||||
{
|
||||
SafeLoadToReg(RSCRATCH, R(RSCRATCH2), 32, (i - inst.RD) * 4, CallerSavedRegistersInUse() | (1 << RSCRATCH_EXTRA), false);
|
||||
SafeLoadToReg(RSCRATCH, R(RSCRATCH2), 32, (i - inst.RD) * 4, CallerSavedRegistersInUse() | BitSet32 { RSCRATCH_EXTRA }, false);
|
||||
gpr.BindToRegister(i, false, true);
|
||||
MOV(32, gpr.R(i), R(RSCRATCH));
|
||||
}
|
||||
|
|
|
@ -65,9 +65,9 @@ void Jit64::lfXXX(UGeckoInstruction inst)
|
|||
offset = (s16)inst.SIMM_16;
|
||||
}
|
||||
|
||||
u32 registersInUse = CallerSavedRegistersInUse();
|
||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||
if (update && js.memcheck)
|
||||
registersInUse |= (1 << RSCRATCH2);
|
||||
registersInUse[RSCRATCH2] = true;
|
||||
SafeLoadToReg(RSCRATCH, addr, single ? 32 : 64, offset, registersInUse, false);
|
||||
fpr.Lock(d);
|
||||
fpr.BindToRegister(d, js.memcheck || !single);
|
||||
|
|
|
@ -26,6 +26,7 @@ The register allocation is linear scan allocation.
|
|||
|
||||
#include <algorithm>
|
||||
|
||||
#include "Common/BitSet.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Common/MathUtil.h"
|
||||
#include "Core/HW/ProcessorInterface.h"
|
||||
|
@ -60,15 +61,15 @@ struct RegInfo
|
|||
RegInfo(RegInfo&); // DO NOT IMPLEMENT
|
||||
};
|
||||
|
||||
static u32 regsInUse(RegInfo& R)
|
||||
static BitSet32 regsInUse(RegInfo& R)
|
||||
{
|
||||
u32 result = 0;
|
||||
BitSet32 result;
|
||||
for (unsigned i = 0; i < MAX_NUMBER_OF_REGS; i++)
|
||||
{
|
||||
if (R.regs[i] != nullptr)
|
||||
result |= (1 << i);
|
||||
result[i] = true;
|
||||
if (R.fregs[i] != nullptr)
|
||||
result |= (1 << (16 + i));
|
||||
result[16 + i] = true;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -10,14 +10,11 @@
|
|||
#include "Core/PowerPC/JitCommon/JitBase.h"
|
||||
|
||||
#define QUANTIZED_REGS_TO_SAVE \
|
||||
(ABI_ALL_CALLER_SAVED & ~(\
|
||||
(1 << RSCRATCH) | \
|
||||
(1 << RSCRATCH2) | \
|
||||
(1 << RSCRATCH_EXTRA)| \
|
||||
(1 << (XMM0+16)) | \
|
||||
(1 << (XMM1+16))))
|
||||
(ABI_ALL_CALLER_SAVED & ~BitSet32 { \
|
||||
RSCRATCH, RSCRATCH2, RSCRATCH_EXTRA, XMM0+16, XMM1+16 \
|
||||
})
|
||||
|
||||
#define QUANTIZED_REGS_TO_SAVE_LOAD (QUANTIZED_REGS_TO_SAVE | (1 << RSCRATCH2))
|
||||
#define QUANTIZED_REGS_TO_SAVE_LOAD (QUANTIZED_REGS_TO_SAVE | BitSet32 { RSCRATCH2 })
|
||||
|
||||
using namespace Gen;
|
||||
|
||||
|
|
|
@ -72,7 +72,7 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
|
|||
return false;
|
||||
}
|
||||
|
||||
u32 registersInUse = it->second;
|
||||
BitSet32 registersInUse = it->second;
|
||||
|
||||
if (!info.isMemoryWrite)
|
||||
{
|
||||
|
@ -98,14 +98,14 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
|
|||
else
|
||||
{
|
||||
// TODO: special case FIFO writes. Also, support 32-bit mode.
|
||||
it = pcAtLoc.find(codePtr);
|
||||
if (it == pcAtLoc.end())
|
||||
auto it2 = pcAtLoc.find(codePtr);
|
||||
if (it2 == pcAtLoc.end())
|
||||
{
|
||||
PanicAlert("BackPatch: no pc entry for address %p", codePtr);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
u32 pc = it->second;
|
||||
u32 pc = it2->second;
|
||||
|
||||
u8 *start;
|
||||
if (info.byteSwap || info.hasImmediate)
|
||||
|
|
|
@ -137,7 +137,7 @@ template <typename T>
|
|||
class MMIOReadCodeGenerator : public MMIO::ReadHandlingMethodVisitor<T>
|
||||
{
|
||||
public:
|
||||
MMIOReadCodeGenerator(Gen::X64CodeBlock* code, u32 registers_in_use,
|
||||
MMIOReadCodeGenerator(Gen::X64CodeBlock* code, BitSet32 registers_in_use,
|
||||
Gen::X64Reg dst_reg, u32 address, bool sign_extend)
|
||||
: m_code(code), m_registers_in_use(registers_in_use), m_dst_reg(dst_reg),
|
||||
m_address(address), m_sign_extend(sign_extend)
|
||||
|
@ -214,14 +214,14 @@ private:
|
|||
}
|
||||
|
||||
Gen::X64CodeBlock* m_code;
|
||||
u32 m_registers_in_use;
|
||||
BitSet32 m_registers_in_use;
|
||||
Gen::X64Reg m_dst_reg;
|
||||
u32 m_address;
|
||||
bool m_sign_extend;
|
||||
};
|
||||
|
||||
void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value,
|
||||
u32 registers_in_use, u32 address,
|
||||
BitSet32 registers_in_use, u32 address,
|
||||
int access_size, bool sign_extend)
|
||||
{
|
||||
switch (access_size)
|
||||
|
@ -250,17 +250,17 @@ void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value,
|
|||
}
|
||||
}
|
||||
|
||||
FixupBranch EmuCodeBlock::CheckIfSafeAddress(OpArg reg_value, X64Reg reg_addr, u32 registers_in_use, u32 mem_mask)
|
||||
FixupBranch EmuCodeBlock::CheckIfSafeAddress(OpArg reg_value, X64Reg reg_addr, BitSet32 registers_in_use, u32 mem_mask)
|
||||
{
|
||||
registers_in_use |= (1 << reg_addr);
|
||||
registers_in_use[reg_addr] = true;
|
||||
if (reg_value.IsSimpleReg())
|
||||
registers_in_use |= (1 << reg_value.GetSimpleReg());
|
||||
registers_in_use[reg_value.GetSimpleReg()] = true;
|
||||
|
||||
// Get ourselves a free register; try to pick one that doesn't involve pushing, if we can.
|
||||
X64Reg scratch = RSCRATCH;
|
||||
if (!(registers_in_use & (1 << RSCRATCH)))
|
||||
if (!registers_in_use[RSCRATCH])
|
||||
scratch = RSCRATCH;
|
||||
else if (!(registers_in_use & (1 << RSCRATCH_EXTRA)))
|
||||
else if (!registers_in_use[RSCRATCH_EXTRA])
|
||||
scratch = RSCRATCH_EXTRA;
|
||||
else
|
||||
scratch = reg_addr;
|
||||
|
@ -290,11 +290,11 @@ FixupBranch EmuCodeBlock::CheckIfSafeAddress(OpArg reg_value, X64Reg reg_addr, u
|
|||
}
|
||||
}
|
||||
|
||||
void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags)
|
||||
void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, BitSet32 registersInUse, bool signExtend, int flags)
|
||||
{
|
||||
if (!jit->js.memcheck)
|
||||
{
|
||||
registersInUse &= ~(1 << reg_value);
|
||||
registersInUse[reg_value] = false;
|
||||
}
|
||||
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU &&
|
||||
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem &&
|
||||
|
@ -468,7 +468,7 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acce
|
|||
return result;
|
||||
}
|
||||
|
||||
void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags)
|
||||
void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset, BitSet32 registersInUse, int flags)
|
||||
{
|
||||
// set the correct immediate format
|
||||
if (reg_value.IsImm())
|
||||
|
@ -580,7 +580,7 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
|
|||
}
|
||||
|
||||
// Destroys the same as SafeWrite plus RSCRATCH. TODO: see if we can avoid temporaries here
|
||||
void EmuCodeBlock::SafeWriteF32ToReg(X64Reg xmm_value, X64Reg reg_addr, s32 offset, u32 registersInUse, int flags)
|
||||
void EmuCodeBlock::SafeWriteF32ToReg(X64Reg xmm_value, X64Reg reg_addr, s32 offset, BitSet32 registersInUse, int flags)
|
||||
{
|
||||
// TODO: PSHUFB might be faster if fastmem supported MOVSS.
|
||||
MOVD_xmm(R(RSCRATCH), xmm_value);
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include <unordered_map>
|
||||
|
||||
#include "Common/BitSet.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
|
||||
|
@ -76,7 +77,7 @@ public:
|
|||
void LoadAndSwap(int size, Gen::X64Reg dst, const Gen::OpArg& src);
|
||||
void SwapAndStore(int size, const Gen::OpArg& dst, Gen::X64Reg src);
|
||||
|
||||
Gen::FixupBranch CheckIfSafeAddress(Gen::OpArg reg_value, Gen::X64Reg reg_addr, u32 registers_in_use, u32 mem_mask);
|
||||
Gen::FixupBranch CheckIfSafeAddress(Gen::OpArg reg_value, Gen::X64Reg reg_addr, BitSet32 registers_in_use, u32 mem_mask);
|
||||
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false);
|
||||
void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset, bool signExtend = false);
|
||||
// these return the address of the MOV, for backpatching
|
||||
|
@ -89,7 +90,7 @@ public:
|
|||
|
||||
// Generate a load/write from the MMIO handler for a given address. Only
|
||||
// call for known addresses in MMIO range (MMIO::IsMMIOAddress).
|
||||
void MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value, u32 registers_in_use, u32 address, int access_size, bool sign_extend);
|
||||
void MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value, BitSet32 registers_in_use, u32 address, int access_size, bool sign_extend);
|
||||
|
||||
enum SafeLoadStoreFlags
|
||||
{
|
||||
|
@ -99,12 +100,12 @@ public:
|
|||
SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8
|
||||
};
|
||||
|
||||
void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags = 0);
|
||||
void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, BitSet32 registersInUse, bool signExtend, int flags = 0);
|
||||
// Clobbers RSCRATCH or reg_addr depending on the relevant flag. Preserves
|
||||
// reg_value if the load fails and js.memcheck is enabled.
|
||||
// Works with immediate inputs and simple registers only.
|
||||
void SafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags = 0);
|
||||
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags = 0)
|
||||
void SafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, BitSet32 registersInUse, int flags = 0);
|
||||
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, BitSet32 registersInUse, int flags = 0)
|
||||
{
|
||||
SafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, registersInUse, flags);
|
||||
}
|
||||
|
@ -115,7 +116,7 @@ public:
|
|||
return swap && !cpu_info.bMOVBE && accessSize > 8;
|
||||
}
|
||||
|
||||
void SafeWriteF32ToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, s32 offset, u32 registersInUse, int flags = 0);
|
||||
void SafeWriteF32ToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, s32 offset, BitSet32 registersInUse, int flags = 0);
|
||||
|
||||
void WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap = false);
|
||||
void JitGetAndClearCAOV(bool oe);
|
||||
|
@ -137,6 +138,6 @@ public:
|
|||
void ConvertDoubleToSingle(Gen::X64Reg dst, Gen::X64Reg src);
|
||||
void SetFPRF(Gen::X64Reg xmm);
|
||||
protected:
|
||||
std::unordered_map<u8 *, u32> registersInUseAtLoc;
|
||||
std::unordered_map<u8 *, BitSet32> registersInUseAtLoc;
|
||||
std::unordered_map<u8 *, u32> pcAtLoc;
|
||||
};
|
||||
|
|
|
@ -36,7 +36,7 @@ void TrampolineCache::Shutdown()
|
|||
cachedTrampolines.clear();
|
||||
}
|
||||
|
||||
const u8* TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 registersInUse)
|
||||
const u8* TrampolineCache::GetReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse)
|
||||
{
|
||||
TrampolineCacheKey key = { registersInUse, 0, info };
|
||||
|
||||
|
@ -49,7 +49,7 @@ const u8* TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re
|
|||
return trampoline;
|
||||
}
|
||||
|
||||
const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, u32 registersInUse)
|
||||
const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse)
|
||||
{
|
||||
if (GetSpaceLeft() < 1024)
|
||||
PanicAlert("Trampoline cache full");
|
||||
|
@ -97,7 +97,7 @@ const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, u
|
|||
return trampoline;
|
||||
}
|
||||
|
||||
const u8* TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc)
|
||||
const u8* TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u32 pc)
|
||||
{
|
||||
TrampolineCacheKey key = { registersInUse, pc, info };
|
||||
|
||||
|
@ -110,7 +110,7 @@ const u8* TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r
|
|||
return trampoline;
|
||||
}
|
||||
|
||||
const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc)
|
||||
const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u32 pc)
|
||||
{
|
||||
if (GetSpaceLeft() < 1024)
|
||||
PanicAlert("Trampoline cache full");
|
||||
|
@ -184,7 +184,7 @@ const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info,
|
|||
|
||||
size_t TrampolineCacheKeyHasher::operator()(const TrampolineCacheKey& k) const
|
||||
{
|
||||
size_t res = std::hash<int>()(k.registersInUse);
|
||||
size_t res = std::hash<int>()(k.registersInUse.m_val);
|
||||
res ^= std::hash<int>()(k.info.operandSize) >> 1;
|
||||
res ^= std::hash<int>()(k.info.regOperandReg) >> 2;
|
||||
res ^= std::hash<int>()(k.info.scaledReg) >> 3;
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include <unordered_map>
|
||||
|
||||
#include "Common/BitSet.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/x64Analyzer.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
|
@ -15,7 +16,7 @@ const int BACKPATCH_SIZE = 5;
|
|||
|
||||
struct TrampolineCacheKey
|
||||
{
|
||||
u32 registersInUse;
|
||||
BitSet32 registersInUse;
|
||||
u32 pc;
|
||||
InstructionInfo info;
|
||||
|
||||
|
@ -33,13 +34,13 @@ public:
|
|||
void Init();
|
||||
void Shutdown();
|
||||
|
||||
const u8* GetReadTrampoline(const InstructionInfo &info, u32 registersInUse);
|
||||
const u8* GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc);
|
||||
const u8* GetReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse);
|
||||
const u8* GetWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u32 pc);
|
||||
void ClearCodeSpace();
|
||||
|
||||
private:
|
||||
const u8* GenerateReadTrampoline(const InstructionInfo &info, u32 registersInUse);
|
||||
const u8* GenerateWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc);
|
||||
const u8* GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse);
|
||||
const u8* GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u32 pc);
|
||||
|
||||
std::unordered_map<TrampolineCacheKey, const u8*, TrampolineCacheKeyHasher> cachedTrampolines;
|
||||
};
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
MOV(64, M(pdt), R(RSCRATCH));
|
||||
|
||||
#define PROFILER_VPUSH \
|
||||
u32 registersInUse = CallerSavedRegistersInUse(); \
|
||||
BitSet32 registersInUse = CallerSavedRegistersInUse(); \
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||
|
||||
#define PROFILER_VPOP \
|
||||
|
|
|
@ -141,7 +141,7 @@ void VertexLoader::CompileVertexTranslator()
|
|||
|
||||
m_compiledCode = GetCodePtr();
|
||||
// We only use RAX (caller saved) and RBX (callee saved).
|
||||
ABI_PushRegistersAndAdjustStack(1 << RBX, 8);
|
||||
ABI_PushRegistersAndAdjustStack({RBX}, 8);
|
||||
|
||||
// save count
|
||||
MOV(64, R(RBX), R(ABI_PARAM1));
|
||||
|
@ -402,7 +402,7 @@ void VertexLoader::CompileVertexTranslator()
|
|||
SUB(64, R(RBX), Imm8(1));
|
||||
|
||||
J_CC(CC_NZ, loop_start);
|
||||
ABI_PopRegistersAndAdjustStack(1 << RBX, 8);
|
||||
ABI_PopRegistersAndAdjustStack({RBX}, 8);
|
||||
RET();
|
||||
#endif
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue