Merge pull request #1947 from Sonicadvance1/AArch64_tu_tl_merge
[AArch64] Implement TU/TL merging.
This commit is contained in:
commit
c340a324bc
|
@ -1010,9 +1010,9 @@ void ARM64XEmitter::SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
|
||||||
{
|
{
|
||||||
EncodeData3SrcInst(3, Rd, Rn, Rm, Ra);
|
EncodeData3SrcInst(3, Rd, Rn, Rm, Ra);
|
||||||
}
|
}
|
||||||
void ARM64XEmitter::SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
|
void ARM64XEmitter::SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||||
{
|
{
|
||||||
EncodeData3SrcInst(4, Rd, Rn, Rm, Ra);
|
EncodeData3SrcInst(4, Rd, Rn, Rm, SP);
|
||||||
}
|
}
|
||||||
void ARM64XEmitter::UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
|
void ARM64XEmitter::UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
|
||||||
{
|
{
|
||||||
|
@ -1022,9 +1022,9 @@ void ARM64XEmitter::UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
|
||||||
{
|
{
|
||||||
EncodeData3SrcInst(6, Rd, Rn, Rm, Ra);
|
EncodeData3SrcInst(6, Rd, Rn, Rm, Ra);
|
||||||
}
|
}
|
||||||
void ARM64XEmitter::UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
|
void ARM64XEmitter::UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||||
{
|
{
|
||||||
EncodeData3SrcInst(7, Rd, Rn, Rm, Ra);
|
EncodeData3SrcInst(7, Rd, Rn, Rm, SP);
|
||||||
}
|
}
|
||||||
void ARM64XEmitter::MUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
void ARM64XEmitter::MUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||||
{
|
{
|
||||||
|
|
|
@ -478,10 +478,10 @@ public:
|
||||||
void MSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
|
void MSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
|
||||||
void SMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
|
void SMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
|
||||||
void SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
|
void SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
|
||||||
void SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
|
void SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
void UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
|
void UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
|
||||||
void UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
|
void UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
|
||||||
void UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
|
void UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
void MUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
void MUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
void MNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
void MNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
|
|
||||||
|
|
|
@ -115,7 +115,6 @@ public:
|
||||||
void GenerateConstantOverflow(bool overflow);
|
void GenerateConstantOverflow(bool overflow);
|
||||||
void GenerateConstantOverflow(s64 val);
|
void GenerateConstantOverflow(s64 val);
|
||||||
void GenerateOverflow();
|
void GenerateOverflow();
|
||||||
bool MergeAllowedNextInstructions(int count);
|
|
||||||
void FinalizeCarryOverflow(bool oe, bool inv = false);
|
void FinalizeCarryOverflow(bool oe, bool inv = false);
|
||||||
void FinalizeCarry(Gen::CCFlags cond);
|
void FinalizeCarry(Gen::CCFlags cond);
|
||||||
void FinalizeCarry(bool ca);
|
void FinalizeCarry(bool ca);
|
||||||
|
|
|
@ -50,22 +50,6 @@ void Jit64::GenerateOverflow()
|
||||||
SetJumpTarget(exit);
|
SetJumpTarget(exit);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Jit64::MergeAllowedNextInstructions(int count)
|
|
||||||
{
|
|
||||||
if (PowerPC::GetState() == PowerPC::CPU_STEPPING || js.instructionsLeft < count)
|
|
||||||
return false;
|
|
||||||
// Be careful: a breakpoint kills flags in between instructions
|
|
||||||
for (int i = 1; i <= count; i++)
|
|
||||||
{
|
|
||||||
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging &&
|
|
||||||
PowerPC::breakpoints.IsAddressBreakPoint(js.op[i].address))
|
|
||||||
return false;
|
|
||||||
if (js.op[i].isBranchTarget)
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Jit64::FinalizeCarry(CCFlags cond)
|
void Jit64::FinalizeCarry(CCFlags cond)
|
||||||
{
|
{
|
||||||
js.carryFlagSet = false;
|
js.carryFlagSet = false;
|
||||||
|
|
|
@ -320,6 +320,9 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
|
||||||
BRK(0x666);
|
BRK(0x666);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
i += js.skipInstructions;
|
||||||
|
js.skipInstructions = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (code_block.m_memory_exception)
|
if (code_block.m_memory_exception)
|
||||||
|
|
|
@ -203,12 +203,80 @@ void JitArm64::mfspr(UGeckoInstruction inst)
|
||||||
JITDISABLE(bJITSystemRegistersOff);
|
JITDISABLE(bJITSystemRegistersOff);
|
||||||
|
|
||||||
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
||||||
|
int d = inst.RD;
|
||||||
switch (iIndex)
|
switch (iIndex)
|
||||||
{
|
{
|
||||||
|
case SPR_TL:
|
||||||
|
case SPR_TU:
|
||||||
|
{
|
||||||
|
ARM64Reg WA = gpr.GetReg();
|
||||||
|
ARM64Reg WB = gpr.GetReg();
|
||||||
|
ARM64Reg XA = EncodeRegTo64(WA);
|
||||||
|
ARM64Reg XB = EncodeRegTo64(WB);
|
||||||
|
|
||||||
|
// An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the
|
||||||
|
// cost of calling out to C for this is actually significant.
|
||||||
|
MOVI2R(XA, (u64)&CoreTiming::globalTimer);
|
||||||
|
LDR(INDEX_UNSIGNED, XA, XA, 0);
|
||||||
|
MOVI2R(XB, (u64)&CoreTiming::fakeTBStartTicks);
|
||||||
|
LDR(INDEX_UNSIGNED, XB, XB, 0);
|
||||||
|
SUB(XA, XA, XB);
|
||||||
|
|
||||||
|
// It might seem convenient to correct the timer for the block position here for even more accurate
|
||||||
|
// timing, but as of currently, this can break games. If we end up reading a time *after* the time
|
||||||
|
// at which an interrupt was supposed to occur, e.g. because we're 100 cycles into a block with only
|
||||||
|
// 50 downcount remaining, some games don't function correctly, such as Karaoke Party Revolution,
|
||||||
|
// which won't get past the loading screen.
|
||||||
|
// a / 12 = (a * 0xAAAAAAAAAAAAAAAB) >> 67
|
||||||
|
ORR(XB, SP, 1, 60);
|
||||||
|
ADD(XB, XB, 1);
|
||||||
|
UMULH(XA, XA, XB);
|
||||||
|
|
||||||
|
MOVI2R(XB, (u64)&CoreTiming::fakeTBStartValue);
|
||||||
|
LDR(INDEX_UNSIGNED, XB, XB, 0);
|
||||||
|
ADD(XA, XB, XA, ArithOption(XA, ST_LSR, 3));
|
||||||
|
STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(spr[SPR_TL]));
|
||||||
|
|
||||||
|
if (MergeAllowedNextInstructions(1))
|
||||||
|
{
|
||||||
|
const UGeckoInstruction& next = js.op[1].inst;
|
||||||
|
// Two calls of TU/TL next to each other are extremely common in typical usage, so merge them
|
||||||
|
// if we can.
|
||||||
|
u32 nextIndex = (next.SPRU << 5) | (next.SPRL & 0x1F);
|
||||||
|
// Be careful; the actual opcode is for mftb (371), not mfspr (339)
|
||||||
|
int n = next.RD;
|
||||||
|
if (next.OPCD == 31 && next.SUBOP10 == 371 && (nextIndex == SPR_TU || nextIndex == SPR_TL) && n != d)
|
||||||
|
{
|
||||||
|
js.downcountAmount++;
|
||||||
|
js.skipInstructions = 1;
|
||||||
|
gpr.BindToRegister(d, false);
|
||||||
|
gpr.BindToRegister(n, false);
|
||||||
|
if (iIndex == SPR_TL)
|
||||||
|
MOV(gpr.R(d), WA);
|
||||||
|
else
|
||||||
|
ORR(EncodeRegTo64(gpr.R(d)), SP, XA, ArithOption(XA, ST_LSR, 32));
|
||||||
|
|
||||||
|
if (nextIndex == SPR_TL)
|
||||||
|
MOV(gpr.R(n), WA);
|
||||||
|
else
|
||||||
|
ORR(EncodeRegTo64(gpr.R(n)), SP, XA, ArithOption(XA, ST_LSR, 32));
|
||||||
|
|
||||||
|
gpr.Unlock(WA, WB);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
gpr.BindToRegister(d, false);
|
||||||
|
if (iIndex == SPR_TU)
|
||||||
|
ORR(EncodeRegTo64(gpr.R(d)), SP, XA, ArithOption(XA, ST_LSR, 32));
|
||||||
|
else
|
||||||
|
MOV(gpr.R(d), WA);
|
||||||
|
gpr.Unlock(WA, WB);
|
||||||
|
}
|
||||||
|
break;
|
||||||
case SPR_XER:
|
case SPR_XER:
|
||||||
{
|
{
|
||||||
gpr.BindToRegister(inst.RD, false);
|
gpr.BindToRegister(d, false);
|
||||||
ARM64Reg RD = gpr.R(inst.RD);
|
ARM64Reg RD = gpr.R(d);
|
||||||
ARM64Reg WA = gpr.GetReg();
|
ARM64Reg WA = gpr.GetReg();
|
||||||
LDRH(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(xer_stringctrl));
|
LDRH(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(xer_stringctrl));
|
||||||
LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
|
LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
|
||||||
|
@ -220,12 +288,10 @@ void JitArm64::mfspr(UGeckoInstruction inst)
|
||||||
break;
|
break;
|
||||||
case SPR_WPAR:
|
case SPR_WPAR:
|
||||||
case SPR_DEC:
|
case SPR_DEC:
|
||||||
case SPR_TL:
|
|
||||||
case SPR_TU:
|
|
||||||
FALLBACK_IF(true);
|
FALLBACK_IF(true);
|
||||||
default:
|
default:
|
||||||
gpr.BindToRegister(inst.RD, false);
|
gpr.BindToRegister(d, false);
|
||||||
ARM64Reg RD = gpr.R(inst.RD);
|
ARM64Reg RD = gpr.R(d);
|
||||||
LDR(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(spr) + iIndex * 4);
|
LDR(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(spr) + iIndex * 4);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -66,3 +66,19 @@ void LogGeneratedX86(int size, PPCAnalyst::CodeBuffer *code_buffer, const u8 *no
|
||||||
DEBUG_LOG(DYNA_REC,"IR_X86 bin: %s\n\n\n", ss.str().c_str());
|
DEBUG_LOG(DYNA_REC,"IR_X86 bin: %s\n\n\n", ss.str().c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool JitBase::MergeAllowedNextInstructions(int count)
|
||||||
|
{
|
||||||
|
if (PowerPC::GetState() == PowerPC::CPU_STEPPING || js.instructionsLeft < count)
|
||||||
|
return false;
|
||||||
|
// Be careful: a breakpoint kills flags in between instructions
|
||||||
|
for (int i = 1; i <= count; i++)
|
||||||
|
{
|
||||||
|
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging &&
|
||||||
|
PowerPC::breakpoints.IsAddressBreakPoint(js.op[i].address))
|
||||||
|
return false;
|
||||||
|
if (js.op[i].isBranchTarget)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
|
@ -107,6 +107,8 @@ protected:
|
||||||
PPCAnalyst::CodeBlock code_block;
|
PPCAnalyst::CodeBlock code_block;
|
||||||
PPCAnalyst::PPCAnalyzer analyzer;
|
PPCAnalyst::PPCAnalyzer analyzer;
|
||||||
|
|
||||||
|
bool MergeAllowedNextInstructions(int count);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// This should probably be removed from public:
|
// This should probably be removed from public:
|
||||||
JitOptions jo;
|
JitOptions jo;
|
||||||
|
|
Loading…
Reference in New Issue