JitArm64: Stop using hand-encoded logical immediates

This commit is contained in:
JosJuice 2021-07-12 12:05:34 +02:00
parent 88fd9fd577
commit 8af5095ff4
8 changed files with 40 additions and 64 deletions

View File

@ -560,23 +560,18 @@ void ARM64XEmitter::EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm,
(DecodeReg(Rn) << 5) | DecodeReg(Rd)); (DecodeReg(Rn) << 5) | DecodeReg(Rd));
} }
void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm)
int n)
{ {
ASSERT_MSG(DYNAREC, imm.valid, "Invalid logical immediate");
// Sometimes Rd is fixed to SP, but can still be 32bit or 64bit. // Sometimes Rd is fixed to SP, but can still be 32bit or 64bit.
// Use Rn to determine bitness here. // Use Rn to determine bitness here.
bool b64Bit = Is64Bit(Rn); bool b64Bit = Is64Bit(Rn);
ASSERT_MSG(DYNAREC, b64Bit || !n, "64-bit logical immediate does not fit in 32-bit register"); ASSERT_MSG(DYNAREC, b64Bit || !imm.n, "64-bit logical immediate does not fit in 32-bit register");
Write32((b64Bit << 31) | (op << 29) | (0x24 << 23) | (n << 22) | (immr << 16) | (imms << 10) | Write32((b64Bit << 31) | (op << 29) | (0x24 << 23) | (imm.n << 22) | (imm.r << 16) |
(DecodeReg(Rn) << 5) | DecodeReg(Rd)); (imm.s << 10) | (DecodeReg(Rn) << 5) | DecodeReg(Rd));
}
void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm)
{
ASSERT_MSG(DYNAREC, imm.valid, "Invalid logical immediate");
EncodeLogicalImmInst(op, Rd, Rn, imm.r, imm.s, imm.n);
} }
void ARM64XEmitter::EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, void ARM64XEmitter::EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2,
@ -1336,42 +1331,22 @@ void ARM64XEmitter::ROR(ARM64Reg Rd, ARM64Reg Rm, int shift)
} }
// Logical (immediate) // Logical (immediate)
void ARM64XEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert)
{
EncodeLogicalImmInst(0, Rd, Rn, immr, imms, invert);
}
void ARM64XEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm) void ARM64XEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm)
{ {
EncodeLogicalImmInst(0, Rd, Rn, imm); EncodeLogicalImmInst(0, Rd, Rn, imm);
} }
void ARM64XEmitter::ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert)
{
EncodeLogicalImmInst(3, Rd, Rn, immr, imms, invert);
}
void ARM64XEmitter::ANDS(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm) void ARM64XEmitter::ANDS(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm)
{ {
EncodeLogicalImmInst(3, Rd, Rn, imm); EncodeLogicalImmInst(3, Rd, Rn, imm);
} }
void ARM64XEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert)
{
EncodeLogicalImmInst(2, Rd, Rn, immr, imms, invert);
}
void ARM64XEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm) void ARM64XEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm)
{ {
EncodeLogicalImmInst(2, Rd, Rn, imm); EncodeLogicalImmInst(2, Rd, Rn, imm);
} }
void ARM64XEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert)
{
EncodeLogicalImmInst(1, Rd, Rn, immr, imms, invert);
}
void ARM64XEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm) void ARM64XEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm)
{ {
EncodeLogicalImmInst(1, Rd, Rn, imm); EncodeLogicalImmInst(1, Rd, Rn, imm);
} }
void ARM64XEmitter::TST(ARM64Reg Rn, u32 immr, u32 imms, bool invert)
{
EncodeLogicalImmInst(3, Is64Bit(Rn) ? ARM64Reg::ZR : ARM64Reg::WZR, Rn, immr, imms, invert);
}
void ARM64XEmitter::TST(ARM64Reg Rn, LogicalImm imm) void ARM64XEmitter::TST(ARM64Reg Rn, LogicalImm imm)
{ {
EncodeLogicalImmInst(3, Is64Bit(Rn) ? ARM64Reg::ZR : ARM64Reg::WZR, Rn, imm); EncodeLogicalImmInst(3, Is64Bit(Rn) ? ARM64Reg::ZR : ARM64Reg::WZR, Rn, imm);

View File

@ -754,7 +754,6 @@ private:
void EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); void EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm); void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd); void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd);
void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n);
void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm); void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm);
void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn,
s32 imm); s32 imm);
@ -996,15 +995,10 @@ public:
void ROR(ARM64Reg Rd, ARM64Reg Rm, int shift); void ROR(ARM64Reg Rd, ARM64Reg Rm, int shift);
// Logical (immediate) // Logical (immediate)
void AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
void AND(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm); void AND(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm);
void ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
void ANDS(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm); void ANDS(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm);
void EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
void EOR(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm); void EOR(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm);
void ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
void ORR(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm); void ORR(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm);
void TST(ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
void TST(ARM64Reg Rn, LogicalImm imm); void TST(ARM64Reg Rn, LogicalImm imm);
// Add/subtract (immediate) // Add/subtract (immediate)
void ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false); void ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);

View File

@ -735,7 +735,10 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
TBZ(ARM64Reg::W30, 15, done_here); // MSR.EE TBZ(ARM64Reg::W30, 15, done_here); // MSR.EE
MOVP2R(ARM64Reg::X30, &ProcessorInterface::m_InterruptCause); MOVP2R(ARM64Reg::X30, &ProcessorInterface::m_InterruptCause);
LDR(IndexType::Unsigned, ARM64Reg::W30, ARM64Reg::X30, 0); LDR(IndexType::Unsigned, ARM64Reg::W30, ARM64Reg::X30, 0);
TST(ARM64Reg::W30, 23, 2); constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP |
ProcessorInterface::INT_CAUSE_PE_TOKEN |
ProcessorInterface::INT_CAUSE_PE_FINISH;
TST(ARM64Reg::W30, LogicalImm(cause_mask, 32));
B(CC_EQ, done_here); B(CC_EQ, done_here);
gpr.Flush(FlushMode::MaintainState); gpr.Flush(FlushMode::MaintainState);
@ -767,7 +770,10 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
TBZ(WA, 15, done_here); // MSR.EE TBZ(WA, 15, done_here); // MSR.EE
MOVP2R(XA, &ProcessorInterface::m_InterruptCause); MOVP2R(XA, &ProcessorInterface::m_InterruptCause);
LDR(IndexType::Unsigned, WA, XA, 0); LDR(IndexType::Unsigned, WA, XA, 0);
TST(WA, 23, 2); constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP |
ProcessorInterface::INT_CAUSE_PE_TOKEN |
ProcessorInterface::INT_CAUSE_PE_FINISH;
TST(WA, LogicalImm(cause_mask, 32));
B(CC_EQ, done_here); B(CC_EQ, done_here);
gpr.Unlock(WA); gpr.Unlock(WA);

View File

@ -219,7 +219,7 @@ void JitArm64::bcctrx(UGeckoInstruction inst)
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_CTR)); LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_CTR));
AND(WA, WA, 30, 29); // Wipe the bottom 2 bits. AND(WA, WA, LogicalImm(~0x3, 32));
WriteExit(WA, inst.LK_3, js.compilerPC + 4); WriteExit(WA, inst.LK_3, js.compilerPC + 4);
@ -265,7 +265,7 @@ void JitArm64::bclrx(UGeckoInstruction inst)
} }
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR)); LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR));
AND(WA, WA, 30, 29); // Wipe the bottom 2 bits. AND(WA, WA, LogicalImm(~0x3, 32));
if (inst.LK) if (inst.LK)
{ {

View File

@ -436,7 +436,7 @@ void JitArm64::FloatCompare(UGeckoInstruction inst, bool upper)
FixupBranch pNaN, pLesser, pGreater; FixupBranch pNaN, pLesser, pGreater;
FixupBranch continue1, continue2, continue3; FixupBranch continue1, continue2, continue3;
ORR(XA, ARM64Reg::ZR, 32, 0, true); MOVI2R(XA, 1ULL << 32);
if (a != b) if (a != b)
{ {
@ -449,7 +449,7 @@ void JitArm64::FloatCompare(UGeckoInstruction inst, bool upper)
pNaN = B(CC_VS); pNaN = B(CC_VS);
// A == B // A == B
ORR(XA, XA, 64 - 63, 0, true); ORR(XA, XA, LogicalImm(1ULL << 63, 64));
if (fprf) if (fprf)
ORR(fpscr_reg, fpscr_reg, LogicalImm(PowerPC::CR_EQ << FPRF_SHIFT, 32)); ORR(fpscr_reg, fpscr_reg, LogicalImm(PowerPC::CR_EQ << FPRF_SHIFT, 32));
@ -466,15 +466,15 @@ void JitArm64::FloatCompare(UGeckoInstruction inst, bool upper)
continue2 = B(); continue2 = B();
SetJumpTarget(pGreater); SetJumpTarget(pGreater);
ORR(XA, XA, 0, 0, true); ORR(XA, XA, LogicalImm(1, 64));
if (fprf) if (fprf)
ORR(fpscr_reg, fpscr_reg, LogicalImm(PowerPC::CR_GT << FPRF_SHIFT, 32)); ORR(fpscr_reg, fpscr_reg, LogicalImm(PowerPC::CR_GT << FPRF_SHIFT, 32));
continue3 = B(); continue3 = B();
SetJumpTarget(pLesser); SetJumpTarget(pLesser);
ORR(XA, XA, 64 - 62, 1, true); ORR(XA, XA, LogicalImm(0xC000'0000'0000'0000, 64));
ORR(XA, XA, 0, 0, true); ORR(XA, XA, LogicalImm(1, 64));
if (fprf) if (fprf)
ORR(fpscr_reg, fpscr_reg, LogicalImm(PowerPC::CR_LT << FPRF_SHIFT, 32)); ORR(fpscr_reg, fpscr_reg, LogicalImm(PowerPC::CR_LT << FPRF_SHIFT, 32));

View File

@ -200,7 +200,7 @@ void JitArm64::psq_st(UGeckoInstruction inst)
// Inline address check // Inline address check
// FIXME: This doesn't correctly account for the BAT configuration. // FIXME: This doesn't correctly account for the BAT configuration.
TST(addr_reg, 6, 1); TST(addr_reg, LogicalImm(0x0c000000, 32));
FixupBranch pass = B(CC_EQ); FixupBranch pass = B(CC_EQ);
FixupBranch fail = B(); FixupBranch fail = B();

View File

@ -42,7 +42,7 @@ void JitArm64::FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg)
// intending to. This can break actual games, so fix it up. // intending to. This can break actual games, so fix it up.
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA); ARM64Reg XA = EncodeRegTo64(WA);
ORR(XA, reg, 64 - 63, 0, true); // XB | 1<<63 ORR(XA, reg, LogicalImm(1ULL << 63, 64));
CMP(reg, ARM64Reg::ZR); CMP(reg, ARM64Reg::ZR);
CSEL(reg, reg, XA, CC_NEQ); CSEL(reg, reg, XA, CC_NEQ);
gpr.Unlock(WA); gpr.Unlock(WA);
@ -405,7 +405,7 @@ void JitArm64::mtspr(UGeckoInstruction inst)
{ {
ARM64Reg RD = gpr.R(inst.RD); ARM64Reg RD = gpr.R(inst.RD);
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
AND(WA, RD, 24, 30); AND(WA, RD, LogicalImm(0xFFFFFF7F, 32));
STRH(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_stringctrl)); STRH(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_stringctrl));
UBFM(WA, RD, XER_CA_SHIFT, XER_CA_SHIFT + 1); UBFM(WA, RD, XER_CA_SHIFT, XER_CA_SHIFT + 1);
STRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); STRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
@ -521,7 +521,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
case PowerPC::CR_SO_BIT: // check bit 59 set case PowerPC::CR_SO_BIT: // check bit 59 set
UBFX(out, XC, PowerPC::CR_EMU_SO_BIT, 1); UBFX(out, XC, PowerPC::CR_EMU_SO_BIT, 1);
if (negate) if (negate)
EOR(out, out, 0, 0, true); // XC ^ 1 EOR(out, out, LogicalImm(1, 64));
break; break;
case PowerPC::CR_EQ_BIT: // check bits 31-0 == 0 case PowerPC::CR_EQ_BIT: // check bits 31-0 == 0
@ -537,7 +537,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
case PowerPC::CR_LT_BIT: // check bit 62 set case PowerPC::CR_LT_BIT: // check bit 62 set
UBFX(out, XC, PowerPC::CR_EMU_LT_BIT, 1); UBFX(out, XC, PowerPC::CR_EMU_LT_BIT, 1);
if (negate) if (negate)
EOR(out, out, 0, 0, true); // XC ^ 1 EOR(out, out, LogicalImm(1, 64));
break; break;
default: default:
@ -585,13 +585,13 @@ void JitArm64::crXXX(UGeckoInstruction inst)
break; break;
case PowerPC::CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input case PowerPC::CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input
AND(XB, XB, 32, 31, true); // Clear lower 32bits AND(XB, XB, LogicalImm(0xFFFF'FFFF'0000'0000, 64));
EOR(XA, XA, 0, 0); // XA ^ 1<<0 EOR(XA, XA, LogicalImm(1, 64));
ORR(XB, XB, XA); ORR(XB, XB, XA);
break; break;
case PowerPC::CR_GT_BIT: // set bit 63 to !input case PowerPC::CR_GT_BIT: // set bit 63 to !input
EOR(XA, XA, 0, 0); // XA ^ 1<<0 EOR(XA, XA, LogicalImm(1, 64));
BFI(XB, XA, 63, 1); BFI(XB, XA, 63, 1);
break; break;
@ -600,7 +600,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
break; break;
} }
ORR(XB, XB, 32, 0, true); // XB | 1<<32 ORR(XB, XB, LogicalImm(1ULL << 32, 64));
gpr.Unlock(WA); gpr.Unlock(WA);
} }
@ -639,12 +639,12 @@ void JitArm64::mfcr(UGeckoInstruction inst)
} }
// EQ // EQ
ORR(WC, WA, 32 - PowerPC::CR_EQ_BIT, 0); ORR(WC, WA, LogicalImm(1 << PowerPC::CR_EQ_BIT, 32));
CMP(WCR, ARM64Reg::WZR); CMP(WCR, ARM64Reg::WZR);
CSEL(WA, WC, WA, CC_EQ); CSEL(WA, WC, WA, CC_EQ);
// GT // GT
ORR(WC, WA, 32 - PowerPC::CR_GT_BIT, 0); ORR(WC, WA, LogicalImm(1 << PowerPC::CR_GT_BIT, 32));
CMP(CR, ARM64Reg::ZR); CMP(CR, ARM64Reg::ZR);
CSEL(WA, WC, WA, CC_GT); CSEL(WA, WC, WA, CC_GT);
} }
@ -748,7 +748,7 @@ void JitArm64::mffsx(UGeckoInstruction inst)
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
// Vd = FPSCR.Hex | 0xFFF8'0000'0000'0000; // Vd = FPSCR.Hex | 0xFFF8'0000'0000'0000;
ORR(XA, XA, 13, 12, true); ORR(XA, XA, LogicalImm(0xFFF8'0000'0000'0000, 64));
m_float_emit.FMOV(EncodeRegToDouble(VD), XA); m_float_emit.FMOV(EncodeRegToDouble(VD), XA);
gpr.Unlock(WA); gpr.Unlock(WA);

View File

@ -97,8 +97,9 @@ void VertexLoaderARM64::GetVertexAddr(int array, VertexComponentFormat attribute
if (array == ARRAY_POSITION) if (array == ARRAY_POSITION)
{ {
EOR(scratch2_reg, scratch1_reg, 0, EOR(scratch2_reg, scratch1_reg,
attribute == VertexComponentFormat::Index8 ? 7 : 15); // 0xFF : 0xFFFF attribute == VertexComponentFormat::Index8 ? LogicalImm(0xFF, 32) :
LogicalImm(0xFFFF, 32));
m_skip_vertex = CBZ(scratch2_reg); m_skip_vertex = CBZ(scratch2_reg);
} }
@ -262,7 +263,7 @@ void VertexLoaderARM64::ReadColor(VertexComponentFormat attribute, ColorFormat f
REV16(scratch3_reg, scratch3_reg); REV16(scratch3_reg, scratch3_reg);
// B // B
AND(scratch2_reg, scratch3_reg, 32, 4); AND(scratch2_reg, scratch3_reg, LogicalImm(0x1F, 32));
ORR(scratch2_reg, ARM64Reg::WSP, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 3)); ORR(scratch2_reg, ARM64Reg::WSP, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 3));
ORR(scratch2_reg, scratch2_reg, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSR, 5)); ORR(scratch2_reg, scratch2_reg, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSR, 5));
ORR(scratch1_reg, ARM64Reg::WSP, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 16)); ORR(scratch1_reg, ARM64Reg::WSP, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 16));
@ -300,7 +301,7 @@ void VertexLoaderARM64::ReadColor(VertexComponentFormat attribute, ColorFormat f
UBFM(scratch1_reg, scratch3_reg, 4, 7); UBFM(scratch1_reg, scratch3_reg, 4, 7);
// G // G
AND(scratch2_reg, scratch3_reg, 32, 3); AND(scratch2_reg, scratch3_reg, LogicalImm(0xF, 32));
ORR(scratch1_reg, scratch1_reg, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 8)); ORR(scratch1_reg, scratch1_reg, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 8));
// B // B
@ -413,7 +414,7 @@ void VertexLoaderARM64::GenerateVertexLoader()
if (m_VtxDesc.low.PosMatIdx) if (m_VtxDesc.low.PosMatIdx)
{ {
LDRB(IndexType::Unsigned, scratch1_reg, src_reg, m_src_ofs); LDRB(IndexType::Unsigned, scratch1_reg, src_reg, m_src_ofs);
AND(scratch1_reg, scratch1_reg, 0, 5); AND(scratch1_reg, scratch1_reg, LogicalImm(0x3F, 32));
STR(IndexType::Unsigned, scratch1_reg, dst_reg, m_dst_ofs); STR(IndexType::Unsigned, scratch1_reg, dst_reg, m_dst_ofs);
// Z-Freeze // Z-Freeze