Rationalize temporary register usage.

Rather than using a variety of registers including RSI, ABI_PARAM1
(either RCX or RDI), RCX, and RDX, the rule is:

- RDI and RSI are never used.  This allows them to be allocated on Unix,
bringing parity with Windows.

- RDX is a permanent temporary register along with RAX (and is thus not
FlushLocked).  It's used frequently enough that allocating it would
probably be a bad idea, as it would constantly get flushed.

- RCX is allocatable, but is flushed in two situations:
    - Non-immediate shifts (rlwnm), because x86 requires RCX to be used.
    - Paired single loads and stores, because they require three
    temporary registers: the helper functions take two integer
    arguments, and another register is used as an index to get the
    function address.
These should be relatively rare.

While we're at it, in stores, use the registers directly where possible
rather than always using temporaries (by making SafeWriteRegToReg
clobber less).  The address doesn't need to be clobbered in the usual
case, and on CPUs with MOVBE, neither does the value.

Oh, and get rid of a useless MEMCHECK.

This commit does not actually add new registers to the allocation order;
it is intended to test for any performance or correctness issues
separately.
This commit is contained in:
comex 2014-09-02 18:54:46 -04:00
parent 67cdb6e07a
commit 8dea26762d
13 changed files with 179 additions and 172 deletions

View File

@ -107,10 +107,9 @@ public:
void GenerateRC(); void GenerateRC();
void ComputeRC(const Gen::OpArg & arg); void ComputeRC(const Gen::OpArg & arg);
// Reads a given bit of a given CR register part. Clobbers ABI_PARAM1, // Reads a given bit of a given CR register part.
// don't forget to xlock it before.
void GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate = false); void GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate = false);
// Clobbers ABI_PARAM1, xlock it before. // Clobbers RDX.
void SetCRFieldBit(int field, int bit, Gen::X64Reg in); void SetCRFieldBit(int field, int bit, Gen::X64Reg in);
// Generates a branch that will check if a given bit of a CR register part // Generates a branch that will check if a given bit of a CR register part

View File

@ -9,13 +9,12 @@
using namespace Gen; using namespace Gen;
//GLOBAL STATIC ALLOCATIONS x86 // GLOBAL STATIC ALLOCATIONS x64
//EAX - ubiquitous scratch register - EVERYBODY scratches this // RAX - ubiquitous scratch register - EVERYBODY scratches this
// RDX - second scratch register
//GLOBAL STATIC ALLOCATIONS x64 // RBX - Base pointer of memory
//EAX - ubiquitous scratch register - EVERYBODY scratches this // R15 - Pointer to array of block pointers
//RBX - Base pointer of memory // RBP - Pointer to ppcState+0x80
//R15 - Pointer to array of block pointers
// PLAN: no more block numbers - crazy opcodes just contain offset within // PLAN: no more block numbers - crazy opcodes just contain offset within
// dynarec buffer // dynarec buffer
@ -73,8 +72,8 @@ void Jit64AsmRoutineManager::Generate()
no_mem = J_CC(CC_NZ); no_mem = J_CC(CC_NZ);
} }
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK)); AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->iCache)); MOV(64, R(RDX), Imm64((u64)jit->GetBlockCache()->iCache));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0)); MOV(32, R(EAX), MComplex(RDX, EAX, SCALE_1, 0));
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack) if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack)
{ {
@ -86,8 +85,8 @@ void Jit64AsmRoutineManager::Generate()
TEST(32, R(EAX), Imm32(JIT_ICACHE_VMEM_BIT)); TEST(32, R(EAX), Imm32(JIT_ICACHE_VMEM_BIT));
FixupBranch no_vmem = J_CC(CC_Z); FixupBranch no_vmem = J_CC(CC_Z);
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK)); AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->iCacheVMEM)); MOV(64, R(RDX), Imm64((u64)jit->GetBlockCache()->iCacheVMEM));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0)); MOV(32, R(EAX), MComplex(RDX, EAX, SCALE_1, 0));
if (Core::g_CoreStartupParameter.bWii) exit_vmem = J(); if (Core::g_CoreStartupParameter.bWii) exit_vmem = J();
SetJumpTarget(no_vmem); SetJumpTarget(no_vmem);
@ -97,8 +96,8 @@ void Jit64AsmRoutineManager::Generate()
TEST(32, R(EAX), Imm32(JIT_ICACHE_EXRAM_BIT)); TEST(32, R(EAX), Imm32(JIT_ICACHE_EXRAM_BIT));
FixupBranch no_exram = J_CC(CC_Z); FixupBranch no_exram = J_CC(CC_Z);
AND(32, R(EAX), Imm32(JIT_ICACHEEX_MASK)); AND(32, R(EAX), Imm32(JIT_ICACHEEX_MASK));
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->iCacheEx)); MOV(64, R(RDX), Imm64((u64)jit->GetBlockCache()->iCacheEx));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0)); MOV(32, R(EAX), MComplex(RDX, EAX, SCALE_1, 0));
SetJumpTarget(no_exram); SetJumpTarget(no_exram);
} }

View File

@ -442,8 +442,8 @@ void Jit64::cmpXX(UGeckoInstruction inst)
if (!comparand.IsImm()) if (!comparand.IsImm())
{ {
MOVSX(64, 32, ABI_PARAM1, comparand); MOVSX(64, 32, RDX, comparand);
comparand = R(ABI_PARAM1); comparand = R(RDX);
} }
} }
else else
@ -454,11 +454,11 @@ void Jit64::cmpXX(UGeckoInstruction inst)
MOVZX(64, 32, RAX, gpr.R(a)); MOVZX(64, 32, RAX, gpr.R(a));
if (comparand.IsImm()) if (comparand.IsImm())
MOV(32, R(ABI_PARAM1), comparand); MOV(32, R(RDX), comparand);
else else
MOVZX(64, 32, ABI_PARAM1, comparand); MOVZX(64, 32, RDX, comparand);
comparand = R(ABI_PARAM1); comparand = R(RDX);
} }
SUB(64, R(RAX), comparand); SUB(64, R(RAX), comparand);
MOV(64, PPCSTATE(cr_val[crf]), R(RAX)); MOV(64, PPCSTATE(cr_val[crf]), R(RAX));
@ -1170,7 +1170,6 @@ void Jit64::mulhwXx(UGeckoInstruction inst)
} }
else else
{ {
gpr.FlushLockX(EDX);
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.BindToRegister(d, (d == a || d == b), true); gpr.BindToRegister(d, (d == a || d == b), true);
if (gpr.RX(d) == EDX) if (gpr.RX(d) == EDX)
@ -1288,7 +1287,6 @@ void Jit64::divwux(UGeckoInstruction inst)
} }
else else
{ {
gpr.FlushLockX(EDX);
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.BindToRegister(d, (d == a || d == b), true); gpr.BindToRegister(d, (d == a || d == b), true);
MOV(32, R(EAX), gpr.R(a)); MOV(32, R(EAX), gpr.R(a));
@ -1349,7 +1347,6 @@ void Jit64::divwx(UGeckoInstruction inst)
} }
else else
{ {
gpr.FlushLockX(EDX);
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.BindToRegister(d, (d == a || d == b), true); gpr.BindToRegister(d, (d == a || d == b), true);
MOV(32, R(EAX), gpr.R(a)); MOV(32, R(EAX), gpr.R(a));
@ -1881,8 +1878,8 @@ void Jit64::srawx(UGeckoInstruction inst)
int a = inst.RA; int a = inst.RA;
int b = inst.RB; int b = inst.RB;
int s = inst.RS; int s = inst.RS;
gpr.Lock(a, s, b);
gpr.FlushLockX(ECX); gpr.FlushLockX(ECX);
gpr.Lock(a, s, b);
gpr.BindToRegister(a, (a == s || a == b), true); gpr.BindToRegister(a, (a == s || a == b), true);
JitClearCA(); JitClearCA();
MOV(32, R(ECX), gpr.R(b)); MOV(32, R(ECX), gpr.R(b));

View File

@ -197,14 +197,13 @@ void Jit64::lXXx(UGeckoInstruction inst)
else else
{ {
// In this case we need an extra temporary register. // In this case we need an extra temporary register.
gpr.FlushLockX(ABI_PARAM1); opAddress = R(RDX);
opAddress = R(ABI_PARAM1);
storeAddress = true; storeAddress = true;
if (use_constant_offset) if (use_constant_offset)
{ {
if (gpr.R(a).IsSimpleReg() && offset != 0) if (gpr.R(a).IsSimpleReg() && offset != 0)
{ {
LEA(32, ABI_PARAM1, MDisp(gpr.RX(a), offset)); LEA(32, RDX, MDisp(gpr.RX(a), offset));
} }
else else
{ {
@ -215,7 +214,7 @@ void Jit64::lXXx(UGeckoInstruction inst)
} }
else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
{ {
LEA(32, ABI_PARAM1, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); LEA(32, RDX, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
} }
else else
{ {
@ -232,7 +231,7 @@ void Jit64::lXXx(UGeckoInstruction inst)
if (update && storeAddress) if (update && storeAddress)
{ {
// We need to save the (usually scratch) address register for the update. // We need to save the (usually scratch) address register for the update.
registersInUse |= (1 << ABI_PARAM1); registersInUse |= (1 << RDX);
} }
SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend); SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend);
@ -339,8 +338,7 @@ void Jit64::stX(UGeckoInstruction inst)
// Helps external systems know which instruction triggered the write // Helps external systems know which instruction triggered the write
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
gpr.FlushLockX(ABI_PARAM1); MOV(32, R(EDX), gpr.R(s));
MOV(32, R(ABI_PARAM1), gpr.R(s));
if (update) if (update)
gpr.SetImmediate32(a, addr); gpr.SetImmediate32(a, addr);
@ -396,24 +394,31 @@ void Jit64::stX(UGeckoInstruction inst)
} }
} }
gpr.FlushLockX(ECX, EDX); gpr.Lock(a, s);
gpr.Lock(s, a); gpr.BindToRegister(a, true, false);
MOV(32, R(EDX), gpr.R(a)); X64Reg reg_value;
MOV(32, R(ECX), gpr.R(s)); if (WriteClobbersRegValue(accessSize, /* swap */ true))
SafeWriteRegToReg(ECX, EDX, accessSize, offset, CallerSavedRegistersInUse()); {
MOV(32, R(EDX), gpr.R(s));
reg_value = EDX;
}
else
{
gpr.BindToRegister(s, true, false);
reg_value = gpr.RX(s);
}
SafeWriteRegToReg(reg_value, gpr.RX(a), accessSize, offset, CallerSavedRegistersInUse(), SAFE_LOADSTORE_CLOBBER_EAX_INSTEAD_OF_ADDR);
if (update && offset) if (update && offset)
{ {
gpr.KillImmediate(a, true, true);
MEMCHECK_START MEMCHECK_START
gpr.KillImmediate(a, true, true);
ADD(32, gpr.R(a), Imm32((u32)offset)); ADD(32, gpr.R(a), Imm32((u32)offset));
MEMCHECK_END MEMCHECK_END
} }
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX();
} }
else else
{ {
@ -430,15 +435,12 @@ void Jit64::stXx(UGeckoInstruction inst)
FALLBACK_IF(!a || a == s || a == b); FALLBACK_IF(!a || a == s || a == b);
gpr.Lock(a, b, s); gpr.Lock(a, b, s);
gpr.FlushLockX(ECX, EDX);
if (inst.SUBOP10 & 32) if (inst.SUBOP10 & 32)
{ {
MEMCHECK_START
gpr.BindToRegister(a, true, true); gpr.BindToRegister(a, true, true);
ADD(32, gpr.R(a), gpr.R(b)); ADD(32, gpr.R(a), gpr.R(b));
MOV(32, R(EDX), gpr.R(a)); MOV(32, R(EDX), gpr.R(a));
MEMCHECK_END
} }
else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
{ {
@ -468,8 +470,18 @@ void Jit64::stXx(UGeckoInstruction inst)
break; break;
} }
MOV(32, R(ECX), gpr.R(s)); X64Reg reg_value;
SafeWriteRegToReg(ECX, EDX, accessSize, 0, CallerSavedRegistersInUse()); if (WriteClobbersRegValue(accessSize, /* swap */ true))
{
MOV(32, R(EAX), gpr.R(s));
reg_value = EAX;
}
else
{
gpr.BindToRegister(s, true, false);
reg_value = gpr.RX(s);
}
SafeWriteRegToReg(reg_value, EDX, accessSize, 0, CallerSavedRegistersInUse());
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
@ -482,13 +494,12 @@ void Jit64::lmw(UGeckoInstruction inst)
JITDISABLE(bJITLoadStoreOff); JITDISABLE(bJITLoadStoreOff);
// TODO: This doesn't handle rollback on DSI correctly // TODO: This doesn't handle rollback on DSI correctly
gpr.FlushLockX(ECX); MOV(32, R(EDX), Imm32((u32)(s32)inst.SIMM_16));
MOV(32, R(ECX), Imm32((u32)(s32)inst.SIMM_16));
if (inst.RA) if (inst.RA)
ADD(32, R(ECX), gpr.R(inst.RA)); ADD(32, R(EDX), gpr.R(inst.RA));
for (int i = inst.RD; i < 32; i++) for (int i = inst.RD; i < 32; i++)
{ {
SafeLoadToReg(EAX, R(ECX), 32, (i - inst.RD) * 4, CallerSavedRegistersInUse() | (1 << ECX), false); SafeLoadToReg(EAX, R(EDX), 32, (i - inst.RD) * 4, CallerSavedRegistersInUse() | (1 << ECX), false);
gpr.BindToRegister(i, false, true); gpr.BindToRegister(i, false, true);
MOV(32, gpr.R(i), R(EAX)); MOV(32, gpr.R(i), R(EAX));
} }
@ -501,15 +512,14 @@ void Jit64::stmw(UGeckoInstruction inst)
JITDISABLE(bJITLoadStoreOff); JITDISABLE(bJITLoadStoreOff);
// TODO: This doesn't handle rollback on DSI correctly // TODO: This doesn't handle rollback on DSI correctly
gpr.FlushLockX(ECX);
for (int i = inst.RD; i < 32; i++) for (int i = inst.RD; i < 32; i++)
{ {
if (inst.RA) if (inst.RA)
MOV(32, R(EAX), gpr.R(inst.RA)); MOV(32, R(EAX), gpr.R(inst.RA));
else else
XOR(32, R(EAX), R(EAX)); XOR(32, R(EAX), R(EAX));
MOV(32, R(ECX), gpr.R(i)); MOV(32, R(EDX), gpr.R(i));
SafeWriteRegToReg(ECX, EAX, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16, CallerSavedRegistersInUse()); SafeWriteRegToReg(EDX, EAX, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16, CallerSavedRegistersInUse());
} }
gpr.UnlockAllX(); gpr.UnlockAllX();
} }

View File

@ -96,24 +96,23 @@ void Jit64::stfXXX(UGeckoInstruction inst)
FALLBACK_IF(!indexed && !a); FALLBACK_IF(!indexed && !a);
s32 offset = 0; s32 offset = 0;
gpr.FlushLockX(ABI_PARAM1);
if (indexed) if (indexed)
{ {
if (update) if (update)
{ {
gpr.BindToRegister(a, true, true); gpr.BindToRegister(a, true, true);
ADD(32, gpr.R(a), gpr.R(b)); ADD(32, gpr.R(a), gpr.R(b));
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(RDX), gpr.R(a));
} }
else else
{ {
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
LEA(32, ABI_PARAM1, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); LEA(32, RDX, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
else else
{ {
MOV(32, R(ABI_PARAM1), gpr.R(b)); MOV(32, R(RDX), gpr.R(b));
if (a) if (a)
ADD(32, R(ABI_PARAM1), gpr.R(a)); ADD(32, R(RDX), gpr.R(a));
} }
} }
} }
@ -128,14 +127,14 @@ void Jit64::stfXXX(UGeckoInstruction inst)
{ {
offset = (s32)(s16)inst.SIMM_16; offset = (s32)(s16)inst.SIMM_16;
} }
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(RDX), gpr.R(a));
} }
if (single) if (single)
{ {
fpr.BindToRegister(s, true, false); fpr.BindToRegister(s, true, false);
ConvertDoubleToSingle(XMM0, fpr.RX(s)); ConvertDoubleToSingle(XMM0, fpr.RX(s));
SafeWriteF32ToReg(XMM0, ABI_PARAM1, offset, CallerSavedRegistersInUse()); SafeWriteF32ToReg(XMM0, RDX, offset, CallerSavedRegistersInUse());
fpr.UnlockAll(); fpr.UnlockAll();
} }
else else
@ -144,7 +143,7 @@ void Jit64::stfXXX(UGeckoInstruction inst)
MOVQ_xmm(R(RAX), fpr.RX(s)); MOVQ_xmm(R(RAX), fpr.RX(s));
else else
MOV(64, R(RAX), fpr.R(s)); MOV(64, R(RAX), fpr.R(s));
SafeWriteRegToReg(RAX, ABI_PARAM1, 64, offset, CallerSavedRegistersInUse()); SafeWriteRegToReg(RAX, RDX, 64, offset, CallerSavedRegistersInUse());
} }
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
@ -160,15 +159,14 @@ void Jit64::stfiwx(UGeckoInstruction inst)
int a = inst.RA; int a = inst.RA;
int b = inst.RB; int b = inst.RB;
gpr.FlushLockX(ABI_PARAM1); MOV(32, R(RDX), gpr.R(b));
MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a) if (a)
ADD(32, R(ABI_PARAM1), gpr.R(a)); ADD(32, R(RDX), gpr.R(a));
if (fpr.R(s).IsSimpleReg()) if (fpr.R(s).IsSimpleReg())
MOVD_xmm(R(EAX), fpr.RX(s)); MOVD_xmm(R(EAX), fpr.RX(s));
else else
MOV(32, R(EAX), fpr.R(s)); MOV(32, R(EAX), fpr.R(s));
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, CallerSavedRegistersInUse()); SafeWriteRegToReg(EAX, RDX, 32, 0, CallerSavedRegistersInUse());
gpr.UnlockAllX(); gpr.UnlockAllX();
} }

View File

@ -28,8 +28,7 @@ void Jit64::psq_st(UGeckoInstruction inst)
int a = inst.RA; int a = inst.RA;
int s = inst.RS; // Fp numbers int s = inst.RS; // Fp numbers
gpr.FlushLockX(EAX, EDX); gpr.FlushLockX(EAX, ECX);
gpr.FlushLockX(ECX);
if (update) if (update)
gpr.BindToRegister(inst.RA, true, true); gpr.BindToRegister(inst.RA, true, true);
fpr.BindToRegister(inst.RS, true, false); fpr.BindToRegister(inst.RS, true, false);
@ -73,8 +72,7 @@ void Jit64::psq_l(UGeckoInstruction inst)
bool update = inst.OPCD == 57; bool update = inst.OPCD == 57;
int offset = inst.SIMM_12; int offset = inst.SIMM_12;
gpr.FlushLockX(EAX, EDX); gpr.FlushLockX(EAX, ECX);
gpr.FlushLockX(ECX);
gpr.BindToRegister(inst.RA, true, update && offset); gpr.BindToRegister(inst.RA, true, update && offset);
fpr.BindToRegister(inst.RS, false, true); fpr.BindToRegister(inst.RS, false, true);
if (offset) if (offset)

View File

@ -42,40 +42,40 @@ void Jit64::GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate)
void Jit64::SetCRFieldBit(int field, int bit, Gen::X64Reg in) void Jit64::SetCRFieldBit(int field, int bit, Gen::X64Reg in)
{ {
MOV(64, R(ABI_PARAM1), PPCSTATE(cr_val[field])); MOV(64, R(RDX), PPCSTATE(cr_val[field]));
MOVZX(32, 8, in, R(in)); MOVZX(32, 8, in, R(in));
switch (bit) switch (bit)
{ {
case CR_SO_BIT: // set bit 61 to input case CR_SO_BIT: // set bit 61 to input
BTR(64, R(ABI_PARAM1), Imm8(61)); BTR(64, R(RDX), Imm8(61));
SHL(64, R(in), Imm8(61)); SHL(64, R(in), Imm8(61));
OR(64, R(ABI_PARAM1), R(in)); OR(64, R(RDX), R(in));
break; break;
case CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input case CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input
SHR(64, R(ABI_PARAM1), Imm8(32)); SHR(64, R(RDX), Imm8(32));
SHL(64, R(ABI_PARAM1), Imm8(32)); SHL(64, R(RDX), Imm8(32));
XOR(32, R(in), Imm8(1)); XOR(32, R(in), Imm8(1));
OR(64, R(ABI_PARAM1), R(in)); OR(64, R(RDX), R(in));
break; break;
case CR_GT_BIT: // set bit 63 to !input case CR_GT_BIT: // set bit 63 to !input
BTR(64, R(ABI_PARAM1), Imm8(63)); BTR(64, R(RDX), Imm8(63));
NOT(32, R(in)); NOT(32, R(in));
SHL(64, R(in), Imm8(63)); SHL(64, R(in), Imm8(63));
OR(64, R(ABI_PARAM1), R(in)); OR(64, R(RDX), R(in));
break; break;
case CR_LT_BIT: // set bit 62 to input case CR_LT_BIT: // set bit 62 to input
BTR(64, R(ABI_PARAM1), Imm8(62)); BTR(64, R(RDX), Imm8(62));
SHL(64, R(in), Imm8(62)); SHL(64, R(in), Imm8(62));
OR(64, R(ABI_PARAM1), R(in)); OR(64, R(RDX), R(in));
break; break;
} }
BTS(64, R(ABI_PARAM1), Imm8(32)); BTS(64, R(RDX), Imm8(32));
MOV(64, PPCSTATE(cr_val[field]), R(ABI_PARAM1)); MOV(64, PPCSTATE(cr_val[field]), R(RDX));
} }
FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
@ -308,8 +308,7 @@ void Jit64::mfcr(UGeckoInstruction inst)
gpr.BindToRegister(d, false, true); gpr.BindToRegister(d, false, true);
XOR(32, gpr.R(d), gpr.R(d)); XOR(32, gpr.R(d), gpr.R(d));
gpr.FlushLockX(ABI_PARAM1); X64Reg cr_val = RDX;
X64Reg cr_val = ABI_PARAM1;
// we only need to zero the high bits of EAX once // we only need to zero the high bits of EAX once
XOR(32, R(EAX), R(EAX)); XOR(32, R(EAX), R(EAX));
for (int i = 0; i < 8; i++) for (int i = 0; i < 8; i++)
@ -439,9 +438,8 @@ void Jit64::crXXX(UGeckoInstruction inst)
// crnand or crnor // crnand or crnor
bool negateB = inst.SUBOP10 == 225 || inst.SUBOP10 == 33; bool negateB = inst.SUBOP10 == 225 || inst.SUBOP10 == 33;
gpr.FlushLockX(ABI_PARAM1); GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), DL, negateA);
GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), ABI_PARAM1, negateA); GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), AL, negateB);
GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), EAX, negateB);
// Compute combined bit // Compute combined bit
switch (inst.SUBOP10) switch (inst.SUBOP10)
@ -449,23 +447,23 @@ void Jit64::crXXX(UGeckoInstruction inst)
case 33: // crnor: ~(A || B) == (~A && ~B) case 33: // crnor: ~(A || B) == (~A && ~B)
case 129: // crandc case 129: // crandc
case 257: // crand case 257: // crand
AND(8, R(EAX), R(ABI_PARAM1)); AND(8, R(AL), R(DL));
break; break;
case 193: // crxor case 193: // crxor
case 289: // creqv case 289: // creqv
XOR(8, R(EAX), R(ABI_PARAM1)); XOR(8, R(AL), R(DL));
break; break;
case 225: // crnand: ~(A && B) == (~A || ~B) case 225: // crnand: ~(A && B) == (~A || ~B)
case 417: // crorc case 417: // crorc
case 449: // cror case 449: // cror
OR(8, R(EAX), R(ABI_PARAM1)); OR(8, R(AL), R(DL));
break; break;
} }
// Store result bit in CRBD // Store result bit in CRBD
SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), EAX); SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), AL);
gpr.UnlockAllX(); gpr.UnlockAllX();
} }

View File

@ -157,7 +157,9 @@ static void fregSpill(RegInfo& RI, X64Reg reg)
RI.fregs[reg] = nullptr; RI.fregs[reg] = nullptr;
} }
// ECX is scratch, so we don't allocate it // RAX and RDX are scratch, so we don't allocate them
// (TODO: if we could lock RCX here too then we could allocate it - needed for
// shifts)
// 64-bit - calling conventions differ between linux & windows, so... // 64-bit - calling conventions differ between linux & windows, so...
#ifdef _WIN32 #ifdef _WIN32
@ -602,9 +604,9 @@ static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size)
{ {
auto info = regBuildMemAddress(RI, I, getOp2(I), 2, Size, nullptr); auto info = regBuildMemAddress(RI, I, getOp2(I), 2, Size, nullptr);
if (info.first.IsImm()) if (info.first.IsImm())
RI.Jit->MOV(32, R(ECX), info.first); RI.Jit->MOV(32, R(EDX), info.first);
else else
RI.Jit->LEA(32, ECX, MDisp(info.first.GetSimpleReg(), info.second)); RI.Jit->LEA(32, EDX, MDisp(info.first.GetSimpleReg(), info.second));
regSpill(RI, EAX); regSpill(RI, EAX);
@ -617,7 +619,7 @@ static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size)
RI.Jit->MOV(32, R(EAX), regLocForInst(RI, getOp1(I))); RI.Jit->MOV(32, R(EAX), regLocForInst(RI, getOp1(I)));
} }
RI.Jit->SafeWriteRegToReg(EAX, ECX, Size, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); RI.Jit->SafeWriteRegToReg(EAX, EDX, Size, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
if (RI.IInfo[I - RI.FirstI] & 4) if (RI.IInfo[I - RI.FirstI] & 4)
regClearInst(RI, getOp1(I)); regClearInst(RI, getOp1(I));
} }
@ -675,9 +677,9 @@ static void regEmitCmp(RegInfo& RI, InstLoc I)
static void regEmitICmpInst(RegInfo& RI, InstLoc I, CCFlags flag) static void regEmitICmpInst(RegInfo& RI, InstLoc I, CCFlags flag)
{ {
regEmitCmp(RI, I); regEmitCmp(RI, I);
RI.Jit->SETcc(flag, R(ECX)); // Caution: SETCC uses 8-bit regs! RI.Jit->SETcc(flag, R(EDX)); // Caution: SETCC uses 8-bit regs!
X64Reg reg = regBinReg(RI, I); X64Reg reg = regBinReg(RI, I);
RI.Jit->MOVZX(32, 8, reg, R(ECX)); RI.Jit->MOVZX(32, 8, reg, R(EDX));
RI.regs[reg] = I; RI.regs[reg] = I;
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
} }
@ -1111,11 +1113,11 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
} }
case StoreFPRF: case StoreFPRF:
{ {
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); Jit->MOV(32, R(EDX), regLocForInst(RI, getOp1(I)));
Jit->AND(32, R(ECX), Imm8(0x1F)); Jit->AND(32, R(EDX), Imm8(0x1F));
Jit->SHL(32, R(ECX), Imm8(12)); Jit->SHL(32, R(EDX), Imm8(12));
Jit->AND(32, PPCSTATE(fpscr), Imm32(~(0x1F << 12))); Jit->AND(32, PPCSTATE(fpscr), Imm32(~(0x1F << 12)));
Jit->OR(32, PPCSTATE(fpscr), R(ECX)); Jit->OR(32, PPCSTATE(fpscr), R(EDX));
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
break; break;
} }
@ -1155,8 +1157,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
break; break;
X64Reg reg = regUReg(RI, I); X64Reg reg = regUReg(RI, I);
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); Jit->MOV(32, R(EDX), regLocForInst(RI, getOp1(I)));
Jit->MOVSX(32, 8, reg, R(ECX)); Jit->MOVSX(32, 8, reg, R(EDX));
RI.regs[reg] = I; RI.regs[reg] = I;
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
break; break;
@ -1178,9 +1180,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
break; break;
X64Reg reg = regUReg(RI, I); X64Reg reg = regUReg(RI, I);
Jit->MOV(32, R(ECX), Imm32(63)); Jit->MOV(32, R(EDX), Imm32(63));
Jit->BSR(32, reg, regLocForInst(RI, getOp1(I))); Jit->BSR(32, reg, regLocForInst(RI, getOp1(I)));
Jit->CMOVcc(32, reg, R(ECX), CC_Z); Jit->CMOVcc(32, reg, R(EDX), CC_Z);
Jit->XOR(32, R(reg), Imm8(31)); Jit->XOR(32, R(reg), Imm8(31));
RI.regs[reg] = I; RI.regs[reg] = I;
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
@ -1422,30 +1424,30 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
Jit->XOR(32, R(EAX), R(EAX)); Jit->XOR(32, R(EAX), R(EAX));
// SO: Bit 61 set. // SO: Bit 61 set.
Jit->MOV(64, R(RCX), R(cr_val)); Jit->MOV(64, R(RDX), R(cr_val));
Jit->SHR(64, R(RCX), Imm8(61)); Jit->SHR(64, R(RDX), Imm8(61));
Jit->AND(32, R(ECX), Imm8(1)); Jit->AND(32, R(EDX), Imm8(1));
Jit->OR(32, R(EAX), R(ECX)); Jit->OR(32, R(EAX), R(EDX));
// EQ: Bits 31-0 == 0. // EQ: Bits 31-0 == 0.
Jit->XOR(32, R(ECX), R(ECX)); Jit->XOR(32, R(EDX), R(EDX));
Jit->TEST(32, R(cr_val), R(cr_val)); Jit->TEST(32, R(cr_val), R(cr_val));
Jit->SETcc(CC_Z, R(ECX)); Jit->SETcc(CC_Z, R(EDX));
Jit->SHL(32, R(ECX), Imm8(1)); Jit->SHL(32, R(EDX), Imm8(1));
Jit->OR(32, R(EAX), R(ECX)); Jit->OR(32, R(EAX), R(EDX));
// GT: Value > 0. // GT: Value > 0.
Jit->XOR(32, R(ECX), R(ECX)); Jit->XOR(32, R(EDX), R(EDX));
Jit->TEST(64, R(cr_val), R(cr_val)); Jit->TEST(64, R(cr_val), R(cr_val));
Jit->SETcc(CC_G, R(ECX)); Jit->SETcc(CC_G, R(EDX));
Jit->SHL(32, R(ECX), Imm8(2)); Jit->SHL(32, R(EDX), Imm8(2));
Jit->OR(32, R(EAX), R(ECX)); Jit->OR(32, R(EAX), R(EDX));
// LT: Bit 62 set. // LT: Bit 62 set.
Jit->MOV(64, R(ECX), R(cr_val)); Jit->MOV(64, R(EDX), R(cr_val));
Jit->SHR(64, R(ECX), Imm8(62 - 3)); Jit->SHR(64, R(EDX), Imm8(62 - 3));
Jit->AND(32, R(ECX), Imm8(0x8)); Jit->AND(32, R(EDX), Imm8(0x8));
Jit->OR(32, R(EAX), R(ECX)); Jit->OR(32, R(EAX), R(EDX));
Jit->MOV(32, R(cr_val), R(EAX)); Jit->MOV(32, R(cr_val), R(EAX));
RI.regs[cr_val] = I; RI.regs[cr_val] = I;
@ -1460,34 +1462,34 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
X64Reg cr_val = regUReg(RI, I); X64Reg cr_val = regUReg(RI, I);
Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I))); Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I)));
Jit->MOV(64, R(RCX), Imm64(1ull << 32)); Jit->MOV(64, R(RDX), Imm64(1ull << 32));
// SO // SO
Jit->MOV(64, R(RAX), R(cr_val)); Jit->MOV(64, R(RAX), R(cr_val));
Jit->SHL(64, R(RAX), Imm8(63)); Jit->SHL(64, R(RAX), Imm8(63));
Jit->SHR(64, R(RAX), Imm8(63 - 61)); Jit->SHR(64, R(RAX), Imm8(63 - 61));
Jit->OR(64, R(RCX), R(RAX)); Jit->OR(64, R(RDX), R(RAX));
// EQ // EQ
Jit->MOV(64, R(RAX), R(cr_val)); Jit->MOV(64, R(RAX), R(cr_val));
Jit->NOT(64, R(RAX)); Jit->NOT(64, R(RAX));
Jit->AND(64, R(RAX), Imm8(CR_EQ)); Jit->AND(64, R(RAX), Imm8(CR_EQ));
Jit->OR(64, R(RCX), R(RAX)); Jit->OR(64, R(RDX), R(RAX));
// GT // GT
Jit->MOV(64, R(RAX), R(cr_val)); Jit->MOV(64, R(RAX), R(cr_val));
Jit->NOT(64, R(RAX)); Jit->NOT(64, R(RAX));
Jit->AND(64, R(RAX), Imm8(CR_GT)); Jit->AND(64, R(RAX), Imm8(CR_GT));
Jit->SHL(64, R(RAX), Imm8(63 - 2)); Jit->SHL(64, R(RAX), Imm8(63 - 2));
Jit->OR(64, R(RCX), R(RAX)); Jit->OR(64, R(RDX), R(RAX));
// LT // LT
Jit->MOV(64, R(RAX), R(cr_val)); Jit->MOV(64, R(RAX), R(cr_val));
Jit->AND(64, R(RAX), Imm8(CR_LT)); Jit->AND(64, R(RAX), Imm8(CR_LT));
Jit->SHL(64, R(RAX), Imm8(62 - 3)); Jit->SHL(64, R(RAX), Imm8(62 - 3));
Jit->OR(64, R(RCX), R(RAX)); Jit->OR(64, R(RDX), R(RAX));
Jit->MOV(64, R(cr_val), R(RCX)); Jit->MOV(64, R(cr_val), R(RDX));
RI.regs[cr_val] = I; RI.regs[cr_val] = I;
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
@ -1553,9 +1555,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
break; break;
X64Reg reg = fregFindFreeReg(RI); X64Reg reg = fregFindFreeReg(RI);
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); Jit->MOV(32, R(EDX), regLocForInst(RI, getOp1(I)));
RI.Jit->SafeLoadToReg(ECX, R(ECX), 32, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); RI.Jit->SafeLoadToReg(EDX, R(EDX), 32, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
Jit->MOVD_xmm(reg, R(ECX)); Jit->MOVD_xmm(reg, R(EDX));
RI.fregs[reg] = I; RI.fregs[reg] = I;
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
break; break;
@ -1567,9 +1569,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
X64Reg reg = fregFindFreeReg(RI); X64Reg reg = fregFindFreeReg(RI);
const OpArg loc = regLocForInst(RI, getOp1(I)); const OpArg loc = regLocForInst(RI, getOp1(I));
Jit->MOV(32, R(ECX), loc); Jit->MOV(32, R(EDX), loc);
RI.Jit->SafeLoadToReg(RCX, R(ECX), 64, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); RI.Jit->SafeLoadToReg(RDX, R(EDX), 64, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
Jit->MOVQ_xmm(reg, R(RCX)); Jit->MOVQ_xmm(reg, R(RDX));
RI.fregs[reg] = I; RI.fregs[reg] = I;
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
break; break;
@ -1591,11 +1593,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
// 0b0011111100000111, or 0x3F07. // 0b0011111100000111, or 0x3F07.
Jit->MOV(32, R(EAX), Imm32(0x3F07)); Jit->MOV(32, R(EAX), Imm32(0x3F07));
Jit->AND(32, R(EAX), M(((char *)&GQR(quantreg)) + 2)); Jit->AND(32, R(EAX), M(((char *)&GQR(quantreg)) + 2));
Jit->MOVZX(32, 8, EDX, R(AL)); Jit->OR(32, R(EAX), Imm8(w << 3));
Jit->OR(32, R(EDX), Imm8(w << 3));
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); Jit->MOV(32, R(EDX), regLocForInst(RI, getOp1(I)));
Jit->CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedLoadQuantized))); Jit->CALLptr(MScaled(EAX, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedLoadQuantized)));
Jit->MOVAPD(reg, R(XMM0)); Jit->MOVAPD(reg, R(XMM0));
RI.fregs[reg] = I; RI.fregs[reg] = I;
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
@ -1610,8 +1611,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
else else
Jit->MOV(32, R(EAX), loc1); Jit->MOV(32, R(EAX), loc1);
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I))); Jit->MOV(32, R(EDX), regLocForInst(RI, getOp2(I)));
RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); RI.Jit->SafeWriteRegToReg(EAX, EDX, 32, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
if (RI.IInfo[I - RI.FirstI] & 4) if (RI.IInfo[I - RI.FirstI] & 4)
fregClearInst(RI, getOp1(I)); fregClearInst(RI, getOp1(I));
if (RI.IInfo[I - RI.FirstI] & 8) if (RI.IInfo[I - RI.FirstI] & 8)
@ -1626,8 +1627,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
OpArg address = regLocForInst(RI, getOp2(I)); OpArg address = regLocForInst(RI, getOp2(I));
Jit->MOVAPD(XMM0, value); Jit->MOVAPD(XMM0, value);
Jit->MOVQ_xmm(R(RAX), XMM0); Jit->MOVQ_xmm(R(RAX), XMM0);
Jit->MOV(32, R(ECX), address); Jit->MOV(32, R(EDX), address);
RI.Jit->SafeWriteRegToReg(RAX, ECX, 64, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); RI.Jit->SafeWriteRegToReg(RAX, EDX, 64, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
if (RI.IInfo[I - RI.FirstI] & 4) if (RI.IInfo[I - RI.FirstI] & 4)
fregClearInst(RI, getOp1(I)); fregClearInst(RI, getOp1(I));
@ -1644,7 +1645,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
Jit->AND(32, R(EAX), PPCSTATE(spr[SPR_GQR0 + quantreg])); Jit->AND(32, R(EAX), PPCSTATE(spr[SPR_GQR0 + quantreg]));
Jit->MOVZX(32, 8, EDX, R(AL)); Jit->MOVZX(32, 8, EDX, R(AL));
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I))); Jit->MOV(32, R(EDX), regLocForInst(RI, getOp2(I)));
Jit->MOVAPD(XMM0, fregLocForInst(RI, getOp1(I))); Jit->MOVAPD(XMM0, fregLocForInst(RI, getOp1(I)));
Jit->CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedStoreQuantized))); Jit->CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedStoreQuantized)));
if (RI.IInfo[I - RI.FirstI] & 4) if (RI.IInfo[I - RI.FirstI] & 4)
@ -1790,9 +1791,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
X64Reg reg = fregFindFreeReg(RI); X64Reg reg = fregFindFreeReg(RI);
unsigned ppcreg = *I >> 8; unsigned ppcreg = *I >> 8;
char *p = (char*)&(PowerPC::ppcState.ps[ppcreg][0]); char *p = (char*)&(PowerPC::ppcState.ps[ppcreg][0]);
Jit->MOV(32, R(ECX), M(p+4)); Jit->MOV(32, R(EDX), M(p+4));
Jit->AND(32, R(ECX), Imm32(0x7ff00000)); Jit->AND(32, R(EDX), Imm32(0x7ff00000));
Jit->CMP(32, R(ECX), Imm32(0x38000000)); Jit->CMP(32, R(EDX), Imm32(0x38000000));
FixupBranch ok = Jit->J_CC(CC_AE); FixupBranch ok = Jit->J_CC(CC_AE);
Jit->AND(32, M(p+4), Imm32(0x80000000)); Jit->AND(32, M(p+4), Imm32(0x80000000));
Jit->MOV(32, M(p), Imm32(0)); Jit->MOV(32, M(p), Imm32(0));
@ -2204,10 +2205,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
const u32 mask = 0x87C0FFFF; const u32 mask = 0x87C0FFFF;
// MSR = (MSR & ~mask) | (SRR1 & mask); // MSR = (MSR & ~mask) | (SRR1 & mask);
Jit->MOV(32, R(EAX), PPCSTATE(msr)); Jit->MOV(32, R(EAX), PPCSTATE(msr));
Jit->MOV(32, R(ECX), PPCSTATE_SRR1); Jit->MOV(32, R(EDX), PPCSTATE_SRR1);
Jit->AND(32, R(EAX), Imm32(~mask)); Jit->AND(32, R(EAX), Imm32(~mask));
Jit->AND(32, R(ECX), Imm32(mask)); Jit->AND(32, R(EDX), Imm32(mask));
Jit->OR(32, R(EAX), R(ECX)); Jit->OR(32, R(EAX), R(EDX));
// MSR &= 0xFFFBFFFF; // Mask used to clear the bit MSR[13] // MSR &= 0xFFFBFFFF; // Mask used to clear the bit MSR[13]
Jit->AND(32, R(EAX), Imm32(0xFFFBFFFF)); Jit->AND(32, R(EAX), Imm32(0xFFFBFFFF));
Jit->MOV(32, PPCSTATE(msr), R(EAX)); Jit->MOV(32, PPCSTATE(msr), R(EAX));

View File

@ -9,7 +9,7 @@
#include "Core/PowerPC/JitCommon/JitAsmCommon.h" #include "Core/PowerPC/JitCommon/JitAsmCommon.h"
#include "Core/PowerPC/JitCommon/JitBase.h" #include "Core/PowerPC/JitCommon/JitBase.h"
#define QUANTIZED_REGS_TO_SAVE (ABI_ALL_CALLER_SAVED & ~((1 << RAX) | (1 << RCX) | (1 << RDX) | \ #define QUANTIZED_REGS_TO_SAVE (ABI_ALL_CALLER_SAVED & ~((1 << RAX) | (1 << RCX) | \
(1 << (XMM0+16)) | (1 << (XMM1+16)))) (1 << (XMM0+16)) | (1 << (XMM1+16))))
using namespace Gen; using namespace Gen;
@ -18,19 +18,15 @@ static int temp32;
void CommonAsmRoutines::GenFifoWrite(int size) void CommonAsmRoutines::GenFifoWrite(int size)
{ {
// Assume value in ABI_PARAM1 // Assume value in EDX
PUSH(ESI); PUSH(ESI);
if (size != 32)
PUSH(EDX);
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe)); MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount)); MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
SwapAndStore(size, MComplex(RAX, RSI, 1, 0), ABI_PARAM1); SwapAndStore(size, MComplex(RAX, RSI, 1, 0), EDX);
ADD(32, R(ESI), Imm8(size >> 3)); ADD(32, R(ESI), Imm8(size >> 3));
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
if (size != 32)
POP(EDX);
POP(ESI); POP(ESI);
RET(); RET();
} }
@ -39,7 +35,6 @@ void CommonAsmRoutines::GenFifoFloatWrite()
{ {
// Assume value in XMM0 // Assume value in XMM0
PUSH(ESI); PUSH(ESI);
PUSH(EDX);
MOVSS(M(&temp32), XMM0); MOVSS(M(&temp32), XMM0);
MOV(32, R(EDX), M(&temp32)); MOV(32, R(EDX), M(&temp32));
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe)); MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
@ -47,7 +42,6 @@ void CommonAsmRoutines::GenFifoFloatWrite()
SwapAndStore(32, MComplex(RAX, RSI, 1, 0), EDX); SwapAndStore(32, MComplex(RAX, RSI, 1, 0), EDX);
ADD(32, R(ESI), Imm8(4)); ADD(32, R(ESI), Imm8(4));
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
POP(EDX);
POP(ESI); POP(ESI);
RET(); RET();
} }

View File

@ -59,6 +59,7 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re
// It ought to be necessary to align the stack here. Since it seems to not // It ought to be necessary to align the stack here. Since it seems to not
// affect anybody, I'm not going to add it just to be completely safe about // affect anybody, I'm not going to add it just to be completely safe about
// performance. // performance.
ABI_PushRegistersAndAdjustStack(registersInUse, true);
if (addrReg != ABI_PARAM1) if (addrReg != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg)); MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg));
@ -66,7 +67,6 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re
if (info.displacement) if (info.displacement)
ADD(32, R(ABI_PARAM1), Imm32(info.displacement)); ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
ABI_PushRegistersAndAdjustStack(registersInUse, true);
switch (info.operandSize) switch (info.operandSize)
{ {
case 4: case 4:
@ -115,6 +115,8 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
MOV(32, PPCSTATE(pc), Imm32(pc)); MOV(32, PPCSTATE(pc), Imm32(pc));
ABI_PushRegistersAndAdjustStack(registersInUse, true);
MOVTwo(64, ABI_PARAM1, dataReg, ABI_PARAM2, addrReg, ABI_PARAM3); MOVTwo(64, ABI_PARAM1, dataReg, ABI_PARAM2, addrReg, ABI_PARAM3);
if (info.displacement) if (info.displacement)
@ -122,7 +124,6 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r
ADD(32, R(ABI_PARAM2), Imm32(info.displacement)); ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
} }
ABI_PushRegistersAndAdjustStack(registersInUse, true);
switch (info.operandSize) switch (info.operandSize)
{ {
case 8: case 8:

View File

@ -5,7 +5,6 @@
#include <emmintrin.h> #include <emmintrin.h>
#include "Common/Common.h" #include "Common/Common.h"
#include "Common/CPUDetect.h"
#include "Common/MathUtil.h" #include "Common/MathUtil.h"
#include "Core/HW/MMIO.h" #include "Core/HW/MMIO.h"
@ -248,13 +247,11 @@ void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value,
} }
} }
// Always clobbers EAX. Preserves the address.
// Preserves the value if the load fails and js.memcheck is enabled.
void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags) void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags)
{ {
if (!jit->js.memcheck) if (!jit->js.memcheck)
{ {
registersInUse &= ~(1 << RAX | 1 << reg_value); registersInUse &= ~(1 << reg_value);
} }
if (!Core::g_CoreStartupParameter.bMMU && if (!Core::g_CoreStartupParameter.bMMU &&
Core::g_CoreStartupParameter.bFastmem && Core::g_CoreStartupParameter.bFastmem &&
@ -395,11 +392,6 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap) u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap)
{ {
if (accessSize == 8 && reg_value >= 4)
{
PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!");
}
u8* result = GetWritableCodePtr(); u8* result = GetWritableCodePtr();
OpArg dest = MComplex(RBX, reg_addr, SCALE_1, offset); OpArg dest = MComplex(RBX, reg_addr, SCALE_1, offset);
if (swap) if (swap)
@ -410,6 +402,7 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acc
} }
else else
{ {
if (accessSize > 8)
BSWAP(accessSize, reg_value); BSWAP(accessSize, reg_value);
result = GetWritableCodePtr(); result = GetWritableCodePtr();
MOV(accessSize, dest, R(reg_value)); MOV(accessSize, dest, R(reg_value));
@ -423,10 +416,8 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acc
return result; return result;
} }
// Destroys both arg registers
void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags) void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags)
{ {
registersInUse &= ~(1 << RAX);
if (!Core::g_CoreStartupParameter.bMMU && if (!Core::g_CoreStartupParameter.bMMU &&
Core::g_CoreStartupParameter.bFastmem && Core::g_CoreStartupParameter.bFastmem &&
!(flags & (SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_FASTMEM)) !(flags & (SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_FASTMEM))
@ -449,7 +440,17 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
} }
if (offset) if (offset)
{
if (flags & SAFE_LOADSTORE_CLOBBER_EAX_INSTEAD_OF_ADDR)
{
LEA(32, EAX, MDisp(reg_addr, (u32)offset));
reg_addr = EAX;
}
else
{
ADD(32, R(reg_addr), Imm32((u32)offset)); ADD(32, R(reg_addr), Imm32((u32)offset));
}
}
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS; u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;

View File

@ -6,6 +6,7 @@
#include <unordered_map> #include <unordered_map>
#include "Common/CPUDetect.h"
#include "Common/x64Emitter.h" #include "Common/x64Emitter.h"
namespace MMIO { class Mapping; } namespace MMIO { class Mapping; }
@ -52,11 +53,21 @@ public:
{ {
SAFE_LOADSTORE_NO_SWAP = 1, SAFE_LOADSTORE_NO_SWAP = 1,
SAFE_LOADSTORE_NO_PROLOG = 2, SAFE_LOADSTORE_NO_PROLOG = 2,
SAFE_LOADSTORE_NO_FASTMEM = 4 SAFE_LOADSTORE_NO_FASTMEM = 4,
SAFE_LOADSTORE_CLOBBER_EAX_INSTEAD_OF_ADDR = 8
}; };
void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags = 0); void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags = 0);
// Clobbers EAX or reg_addr depending on the relevant flag. Preserves
// reg_value if the load fails and js.memcheck is enabled.
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags = 0); void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags = 0);
// applies to safe and unsafe WriteRegToReg
bool WriteClobbersRegValue(int accessSize, bool swap)
{
return swap && !cpu_info.bMOVBE && accessSize > 8;
}
void SafeWriteF32ToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, s32 offset, u32 registersInUse, int flags = 0); void SafeWriteF32ToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, s32 offset, u32 registersInUse, int flags = 0);
void WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap = false); void WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap = false);

View File

@ -40,7 +40,7 @@ instruction and generates code. Dead code elimination works in this step,
by simply skipping unused instructions. The register allocator is a dumb, by simply skipping unused instructions. The register allocator is a dumb,
greedy allocator: at the moment, it's really a bit too dumb, but it's greedy allocator: at the moment, it's really a bit too dumb, but it's
actually not as bad as it looks: unless a block is relatively long, spills actually not as bad as it looks: unless a block is relatively long, spills
are rarely needed. ECX is used as a scratch register: requiring a scratch are rarely needed. EDX is used as a scratch register: requiring a scratch
register isn't ideal, but the register allocator is too dumb to handle register isn't ideal, but the register allocator is too dumb to handle
instructions that need a specific register at the moment. instructions that need a specific register at the moment.