JitArm64: Stop using hand-encoded logical immediates

This commit is contained in:
JosJuice 2021-07-12 12:05:34 +02:00
parent 88fd9fd577
commit 8af5095ff4
8 changed files with 40 additions and 64 deletions

View File

@ -560,23 +560,18 @@ void ARM64XEmitter::EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm,
(DecodeReg(Rn) << 5) | DecodeReg(Rd));
}
void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms,
int n)
void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm)
{
ASSERT_MSG(DYNAREC, imm.valid, "Invalid logical immediate");
// Sometimes Rd is fixed to SP, but can still be 32bit or 64bit.
// Use Rn to determine bitness here.
bool b64Bit = Is64Bit(Rn);
ASSERT_MSG(DYNAREC, b64Bit || !n, "64-bit logical immediate does not fit in 32-bit register");
ASSERT_MSG(DYNAREC, b64Bit || !imm.n, "64-bit logical immediate does not fit in 32-bit register");
Write32((b64Bit << 31) | (op << 29) | (0x24 << 23) | (n << 22) | (immr << 16) | (imms << 10) |
(DecodeReg(Rn) << 5) | DecodeReg(Rd));
}
void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm)
{
ASSERT_MSG(DYNAREC, imm.valid, "Invalid logical immediate");
EncodeLogicalImmInst(op, Rd, Rn, imm.r, imm.s, imm.n);
Write32((b64Bit << 31) | (op << 29) | (0x24 << 23) | (imm.n << 22) | (imm.r << 16) |
(imm.s << 10) | (DecodeReg(Rn) << 5) | DecodeReg(Rd));
}
void ARM64XEmitter::EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2,
@ -1336,42 +1331,22 @@ void ARM64XEmitter::ROR(ARM64Reg Rd, ARM64Reg Rm, int shift)
}
// Logical (immediate)
void ARM64XEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert)
{
EncodeLogicalImmInst(0, Rd, Rn, immr, imms, invert);
}
void ARM64XEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm)
{
EncodeLogicalImmInst(0, Rd, Rn, imm);
}
void ARM64XEmitter::ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert)
{
EncodeLogicalImmInst(3, Rd, Rn, immr, imms, invert);
}
void ARM64XEmitter::ANDS(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm)
{
EncodeLogicalImmInst(3, Rd, Rn, imm);
}
void ARM64XEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert)
{
EncodeLogicalImmInst(2, Rd, Rn, immr, imms, invert);
}
void ARM64XEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm)
{
EncodeLogicalImmInst(2, Rd, Rn, imm);
}
void ARM64XEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert)
{
EncodeLogicalImmInst(1, Rd, Rn, immr, imms, invert);
}
void ARM64XEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm)
{
EncodeLogicalImmInst(1, Rd, Rn, imm);
}
void ARM64XEmitter::TST(ARM64Reg Rn, u32 immr, u32 imms, bool invert)
{
EncodeLogicalImmInst(3, Is64Bit(Rn) ? ARM64Reg::ZR : ARM64Reg::WZR, Rn, immr, imms, invert);
}
void ARM64XEmitter::TST(ARM64Reg Rn, LogicalImm imm)
{
EncodeLogicalImmInst(3, Is64Bit(Rn) ? ARM64Reg::ZR : ARM64Reg::WZR, Rn, imm);

View File

@ -754,7 +754,6 @@ private:
void EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd);
void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n);
void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm);
void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn,
s32 imm);
@ -996,15 +995,10 @@ public:
void ROR(ARM64Reg Rd, ARM64Reg Rm, int shift);
// Logical (immediate)
void AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
void AND(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm);
void ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
void ANDS(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm);
void EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
void EOR(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm);
void ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
void ORR(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm);
void TST(ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
void TST(ARM64Reg Rn, LogicalImm imm);
// Add/subtract (immediate)
void ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);

View File

@ -735,7 +735,10 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
TBZ(ARM64Reg::W30, 15, done_here); // MSR.EE
MOVP2R(ARM64Reg::X30, &ProcessorInterface::m_InterruptCause);
LDR(IndexType::Unsigned, ARM64Reg::W30, ARM64Reg::X30, 0);
TST(ARM64Reg::W30, 23, 2);
constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP |
ProcessorInterface::INT_CAUSE_PE_TOKEN |
ProcessorInterface::INT_CAUSE_PE_FINISH;
TST(ARM64Reg::W30, LogicalImm(cause_mask, 32));
B(CC_EQ, done_here);
gpr.Flush(FlushMode::MaintainState);
@ -767,7 +770,10 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
TBZ(WA, 15, done_here); // MSR.EE
MOVP2R(XA, &ProcessorInterface::m_InterruptCause);
LDR(IndexType::Unsigned, WA, XA, 0);
TST(WA, 23, 2);
constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP |
ProcessorInterface::INT_CAUSE_PE_TOKEN |
ProcessorInterface::INT_CAUSE_PE_FINISH;
TST(WA, LogicalImm(cause_mask, 32));
B(CC_EQ, done_here);
gpr.Unlock(WA);

View File

@ -219,7 +219,7 @@ void JitArm64::bcctrx(UGeckoInstruction inst)
ARM64Reg WA = gpr.GetReg();
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_CTR));
AND(WA, WA, 30, 29); // Wipe the bottom 2 bits.
AND(WA, WA, LogicalImm(~0x3, 32));
WriteExit(WA, inst.LK_3, js.compilerPC + 4);
@ -265,7 +265,7 @@ void JitArm64::bclrx(UGeckoInstruction inst)
}
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR));
AND(WA, WA, 30, 29); // Wipe the bottom 2 bits.
AND(WA, WA, LogicalImm(~0x3, 32));
if (inst.LK)
{

View File

@ -436,7 +436,7 @@ void JitArm64::FloatCompare(UGeckoInstruction inst, bool upper)
FixupBranch pNaN, pLesser, pGreater;
FixupBranch continue1, continue2, continue3;
ORR(XA, ARM64Reg::ZR, 32, 0, true);
MOVI2R(XA, 1ULL << 32);
if (a != b)
{
@ -449,7 +449,7 @@ void JitArm64::FloatCompare(UGeckoInstruction inst, bool upper)
pNaN = B(CC_VS);
// A == B
ORR(XA, XA, 64 - 63, 0, true);
ORR(XA, XA, LogicalImm(1ULL << 63, 64));
if (fprf)
ORR(fpscr_reg, fpscr_reg, LogicalImm(PowerPC::CR_EQ << FPRF_SHIFT, 32));
@ -466,15 +466,15 @@ void JitArm64::FloatCompare(UGeckoInstruction inst, bool upper)
continue2 = B();
SetJumpTarget(pGreater);
ORR(XA, XA, 0, 0, true);
ORR(XA, XA, LogicalImm(1, 64));
if (fprf)
ORR(fpscr_reg, fpscr_reg, LogicalImm(PowerPC::CR_GT << FPRF_SHIFT, 32));
continue3 = B();
SetJumpTarget(pLesser);
ORR(XA, XA, 64 - 62, 1, true);
ORR(XA, XA, 0, 0, true);
ORR(XA, XA, LogicalImm(0xC000'0000'0000'0000, 64));
ORR(XA, XA, LogicalImm(1, 64));
if (fprf)
ORR(fpscr_reg, fpscr_reg, LogicalImm(PowerPC::CR_LT << FPRF_SHIFT, 32));

View File

@ -200,7 +200,7 @@ void JitArm64::psq_st(UGeckoInstruction inst)
// Inline address check
// FIXME: This doesn't correctly account for the BAT configuration.
TST(addr_reg, 6, 1);
TST(addr_reg, LogicalImm(0x0c000000, 32));
FixupBranch pass = B(CC_EQ);
FixupBranch fail = B();

View File

@ -42,7 +42,7 @@ void JitArm64::FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg)
// intending to. This can break actual games, so fix it up.
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
ORR(XA, reg, 64 - 63, 0, true); // XB | 1<<63
ORR(XA, reg, LogicalImm(1ULL << 63, 64));
CMP(reg, ARM64Reg::ZR);
CSEL(reg, reg, XA, CC_NEQ);
gpr.Unlock(WA);
@ -405,7 +405,7 @@ void JitArm64::mtspr(UGeckoInstruction inst)
{
ARM64Reg RD = gpr.R(inst.RD);
ARM64Reg WA = gpr.GetReg();
AND(WA, RD, 24, 30);
AND(WA, RD, LogicalImm(0xFFFFFF7F, 32));
STRH(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_stringctrl));
UBFM(WA, RD, XER_CA_SHIFT, XER_CA_SHIFT + 1);
STRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
@ -521,7 +521,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
case PowerPC::CR_SO_BIT: // check bit 59 set
UBFX(out, XC, PowerPC::CR_EMU_SO_BIT, 1);
if (negate)
EOR(out, out, 0, 0, true); // XC ^ 1
EOR(out, out, LogicalImm(1, 64));
break;
case PowerPC::CR_EQ_BIT: // check bits 31-0 == 0
@ -537,7 +537,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
case PowerPC::CR_LT_BIT: // check bit 62 set
UBFX(out, XC, PowerPC::CR_EMU_LT_BIT, 1);
if (negate)
EOR(out, out, 0, 0, true); // XC ^ 1
EOR(out, out, LogicalImm(1, 64));
break;
default:
@ -584,14 +584,14 @@ void JitArm64::crXXX(UGeckoInstruction inst)
BFI(XB, XA, PowerPC::CR_EMU_SO_BIT, 1);
break;
case PowerPC::CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input
AND(XB, XB, 32, 31, true); // Clear lower 32bits
EOR(XA, XA, 0, 0); // XA ^ 1<<0
case PowerPC::CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input
AND(XB, XB, LogicalImm(0xFFFF'FFFF'0000'0000, 64));
EOR(XA, XA, LogicalImm(1, 64));
ORR(XB, XB, XA);
break;
case PowerPC::CR_GT_BIT: // set bit 63 to !input
EOR(XA, XA, 0, 0); // XA ^ 1<<0
EOR(XA, XA, LogicalImm(1, 64));
BFI(XB, XA, 63, 1);
break;
@ -600,7 +600,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
break;
}
ORR(XB, XB, 32, 0, true); // XB | 1<<32
ORR(XB, XB, LogicalImm(1ULL << 32, 64));
gpr.Unlock(WA);
}
@ -639,12 +639,12 @@ void JitArm64::mfcr(UGeckoInstruction inst)
}
// EQ
ORR(WC, WA, 32 - PowerPC::CR_EQ_BIT, 0);
ORR(WC, WA, LogicalImm(1 << PowerPC::CR_EQ_BIT, 32));
CMP(WCR, ARM64Reg::WZR);
CSEL(WA, WC, WA, CC_EQ);
// GT
ORR(WC, WA, 32 - PowerPC::CR_GT_BIT, 0);
ORR(WC, WA, LogicalImm(1 << PowerPC::CR_GT_BIT, 32));
CMP(CR, ARM64Reg::ZR);
CSEL(WA, WC, WA, CC_GT);
}
@ -748,7 +748,7 @@ void JitArm64::mffsx(UGeckoInstruction inst)
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
// Vd = FPSCR.Hex | 0xFFF8'0000'0000'0000;
ORR(XA, XA, 13, 12, true);
ORR(XA, XA, LogicalImm(0xFFF8'0000'0000'0000, 64));
m_float_emit.FMOV(EncodeRegToDouble(VD), XA);
gpr.Unlock(WA);

View File

@ -97,8 +97,9 @@ void VertexLoaderARM64::GetVertexAddr(int array, VertexComponentFormat attribute
if (array == ARRAY_POSITION)
{
EOR(scratch2_reg, scratch1_reg, 0,
attribute == VertexComponentFormat::Index8 ? 7 : 15); // 0xFF : 0xFFFF
EOR(scratch2_reg, scratch1_reg,
attribute == VertexComponentFormat::Index8 ? LogicalImm(0xFF, 32) :
LogicalImm(0xFFFF, 32));
m_skip_vertex = CBZ(scratch2_reg);
}
@ -262,7 +263,7 @@ void VertexLoaderARM64::ReadColor(VertexComponentFormat attribute, ColorFormat f
REV16(scratch3_reg, scratch3_reg);
// B
AND(scratch2_reg, scratch3_reg, 32, 4);
AND(scratch2_reg, scratch3_reg, LogicalImm(0x1F, 32));
ORR(scratch2_reg, ARM64Reg::WSP, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 3));
ORR(scratch2_reg, scratch2_reg, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSR, 5));
ORR(scratch1_reg, ARM64Reg::WSP, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 16));
@ -300,7 +301,7 @@ void VertexLoaderARM64::ReadColor(VertexComponentFormat attribute, ColorFormat f
UBFM(scratch1_reg, scratch3_reg, 4, 7);
// G
AND(scratch2_reg, scratch3_reg, 32, 3);
AND(scratch2_reg, scratch3_reg, LogicalImm(0xF, 32));
ORR(scratch1_reg, scratch1_reg, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 8));
// B
@ -413,7 +414,7 @@ void VertexLoaderARM64::GenerateVertexLoader()
if (m_VtxDesc.low.PosMatIdx)
{
LDRB(IndexType::Unsigned, scratch1_reg, src_reg, m_src_ofs);
AND(scratch1_reg, scratch1_reg, 0, 5);
AND(scratch1_reg, scratch1_reg, LogicalImm(0x3F, 32));
STR(IndexType::Unsigned, scratch1_reg, dst_reg, m_dst_ofs);
// Z-Freeze