JitArm64_Integer: Add optimizations for rlwimix

* Check for case when source field is at LSB
* Use BFXIL if possible
* Avoid ROR where possible
This commit is contained in:
MerryMage 2020-12-27 20:23:03 +00:00
parent 8aa2013a2d
commit a0b8956f22
1 changed files with 25 additions and 13 deletions

View File

@ -1443,6 +1443,10 @@ void JitArm64::rlwimix(UGeckoInstruction inst)
const int a = inst.RA, s = inst.RS; const int a = inst.RA, s = inst.RS;
const u32 mask = MakeRotationMask(inst.MB, inst.ME); const u32 mask = MakeRotationMask(inst.MB, inst.ME);
const u32 lsb = 31 - inst.ME;
const u32 width = inst.ME - inst.MB + 1;
const u32 rot_dist = inst.SH ? 32 - inst.SH : 0;
if (gpr.IsImm(a) && gpr.IsImm(s)) if (gpr.IsImm(a) && gpr.IsImm(s))
{ {
u32 res = (gpr.GetImm(a) & ~mask) | (Common::RotateLeft(gpr.GetImm(s), inst.SH) & mask); u32 res = (gpr.GetImm(a) & ~mask) | (Common::RotateLeft(gpr.GetImm(s), inst.SH) & mask);
@ -1462,17 +1466,22 @@ void JitArm64::rlwimix(UGeckoInstruction inst)
gpr.BindToRegister(a, a == s); gpr.BindToRegister(a, a == s);
if (inst.SH) if (inst.SH)
ROR(gpr.R(a), gpr.R(s), 32 - inst.SH); ROR(gpr.R(a), gpr.R(s), rot_dist);
else if (a != s) else if (a != s)
MOV(gpr.R(a), gpr.R(s)); MOV(gpr.R(a), gpr.R(s));
} }
else if (lsb == 0 && inst.MB <= inst.ME && rot_dist + width <= 32)
{
// Destination is in least significant position
// No mask inversion
// Source field pre-rotation is contiguous
gpr.BindToRegister(a, true);
BFXIL(gpr.R(a), gpr.R(s), rot_dist, width);
}
else if (inst.SH == 0 && inst.MB <= inst.ME) else if (inst.SH == 0 && inst.MB <= inst.ME)
{ {
// No rotation // No rotation
// No mask inversion // No mask inversion
u32 lsb = 31 - inst.ME;
u32 width = inst.ME - inst.MB + 1;
gpr.BindToRegister(a, true); gpr.BindToRegister(a, true);
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
UBFX(WA, gpr.R(s), lsb, width); UBFX(WA, gpr.R(s), lsb, width);
@ -1482,16 +1491,19 @@ void JitArm64::rlwimix(UGeckoInstruction inst)
else if (inst.SH && inst.MB <= inst.ME) else if (inst.SH && inst.MB <= inst.ME)
{ {
// No mask inversion // No mask inversion
u32 lsb = 31 - inst.ME;
u32 width = inst.ME - inst.MB + 1;
gpr.BindToRegister(a, true); gpr.BindToRegister(a, true);
if ((rot_dist + lsb) % 32 == 0)
{
BFI(gpr.R(a), gpr.R(s), lsb, width);
}
else
{
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
ROR(WA, gpr.R(s), 32 - inst.SH); ROR(WA, gpr.R(s), (rot_dist + lsb) % 32);
UBFX(WA, WA, lsb, width);
BFI(gpr.R(a), WA, lsb, width); BFI(gpr.R(a), WA, lsb, width);
gpr.Unlock(WA); gpr.Unlock(WA);
} }
}
else else
{ {
gpr.BindToRegister(a, true); gpr.BindToRegister(a, true);
@ -1500,7 +1512,7 @@ void JitArm64::rlwimix(UGeckoInstruction inst)
MOVI2R(WA, mask); MOVI2R(WA, mask);
BIC(WB, gpr.R(a), WA); BIC(WB, gpr.R(a), WA);
AND(WA, WA, gpr.R(s), ArithOption(gpr.R(s), ShiftType::ROR, 32 - inst.SH)); AND(WA, WA, gpr.R(s), ArithOption(gpr.R(s), ShiftType::ROR, rot_dist));
ORR(gpr.R(a), WB, WA); ORR(gpr.R(a), WB, WA);
gpr.Unlock(WA, WB); gpr.Unlock(WA, WB);