[AArch64] Fix the bugs in the cr instructions

Makes it a bit more efficient in the process.
This commit is contained in:
Ryan Houdek 2015-08-21 22:44:39 -05:00
parent e9ade0abe1
commit d74eb0ea58
1 changed files with 38 additions and 43 deletions

View File

@ -379,8 +379,6 @@ void JitArm64::crXXX(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff); JITDISABLE(bJITSystemRegistersOff);
FALLBACK_IF(1);
// Special case: crclr // Special case: crclr
if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 193) if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 193)
{ {
@ -394,19 +392,19 @@ void JitArm64::crXXX(UGeckoInstruction inst)
switch (bit) switch (bit)
{ {
case CR_SO_BIT: case CR_SO_BIT:
AND(XA, XA, 61, 62); // XA & ~(1<<61) AND(XA, XA, 64 - 62, 62, true); // XA & ~(1<<61)
break; break;
case CR_EQ_BIT: case CR_EQ_BIT:
ORR(XA, XA, 1, 0); // XA | 1<<0 ORR(XA, XA, 0, 0, true); // XA | 1<<0
break; break;
case CR_GT_BIT: case CR_GT_BIT:
ORR(XA, XA, 63, 0); // XA | 1<<63 ORR(XA, XA, 64 - 63, 0, true); // XA | 1<<63
break; break;
case CR_LT_BIT: case CR_LT_BIT:
AND(XA, XA, 62, 62); // XA & ~(1<<62) AND(XA, XA, 64 - 63, 62, true); // XA & ~(1<<62)
break; break;
} }
STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val) + 8 * field); STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val) + 8 * field);
@ -427,33 +425,34 @@ void JitArm64::crXXX(UGeckoInstruction inst)
if (bit != CR_GT_BIT) if (bit != CR_GT_BIT)
{ {
ANDS(ZR, XA, XA); ARM64Reg WB = gpr.GetReg();
FixupBranch dont_clear_gt = B(CC_NEQ); ARM64Reg XB = EncodeRegTo64(WB);
ORR(XA, XA, 63, 0); // XA | 1<<63 ORR(XB, XA, 64 - 63, 0, true); // XA | 1<<63
SetJumpTarget(dont_clear_gt); CMP(XA, ZR);
CSEL(XA, XA, XB, CC_NEQ);
gpr.Unlock(WB);
} }
switch (bit) switch (bit)
{ {
case CR_SO_BIT: case CR_SO_BIT:
ORR(XA, XA, 61, 0); // XA | 1<<61 ORR(XA, XA, 64 - 61, 0, true); // XA | 1<<61
break; break;
case CR_EQ_BIT: case CR_EQ_BIT:
LSR(XA, XA, 32); AND(XA, XA, 32, 31, true); // Clear lower 32bits
LSL(XA, XA, 32);
break; break;
case CR_GT_BIT: case CR_GT_BIT:
AND(XA, XA, 63, 62); // XA & ~(1<<63) AND(XA, XA, 0, 62, true); // XA & ~(1<<63)
break; break;
case CR_LT_BIT: case CR_LT_BIT:
ORR(XA, XA, 62, 0); // XA | 1<<62 ORR(XA, XA, 64 - 62, 0, true); // XA | 1<<62
break; break;
} }
ORR(XA, XA, 32, 0); // XA | 1<<32 ORR(XA, XA, 32, 0, true); // XA | 1<<32
STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val) + 8 * field); STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val) + 8 * field);
gpr.Unlock(WA); gpr.Unlock(WA);
@ -484,23 +483,25 @@ void JitArm64::crXXX(UGeckoInstruction inst)
switch (bit) switch (bit)
{ {
case CR_SO_BIT: // check bit 61 set case CR_SO_BIT: // check bit 61 set
ANDS(ZR, XC, 61, 62); // XC & ~(1<<61) UBFX(out, XC, 61, 1);
CSINC(out, ZR, ZR, negate ? CC_NEQ : CC_EQ); if (negate)
EOR(out, out, 0, 0, true); // XC ^ 1
break; break;
case CR_EQ_BIT: // check bits 31-0 == 0 case CR_EQ_BIT: // check bits 31-0 == 0
ANDS(ZR, WC, WC); CMP(WC, WZR);
CSINC(out, ZR, ZR, negate ? CC_NEQ : CC_EQ); CSET(out, negate ? CC_NEQ : CC_EQ);
break; break;
case CR_GT_BIT: // check val > 0 case CR_GT_BIT: // check val > 0
ANDS(ZR, XC, XC); CMP(XC, ZR);
CSINC(out, ZR, ZR, negate ? CC_NEQ : CC_EQ); CSET(out, negate ? CC_LE : CC_GT);
break; break;
case CR_LT_BIT: // check bit 62 set case CR_LT_BIT: // check bit 62 set
ANDS(ZR, XC, 62, 62); // XC & ~(1<<62) UBFX(out, XC, 62, 1);
CSINC(out, ZR, ZR, negate ? CC_NEQ : CC_EQ); if (negate)
EOR(out, out, 0, 0, true); // XC ^ 1
break; break;
default: default:
@ -530,7 +531,6 @@ void JitArm64::crXXX(UGeckoInstruction inst)
ORR(XA, XA, XB); ORR(XA, XA, XB);
break; break;
} }
AND(XA, XA, 0, 63-8); // A & 0xff
// Store result bit in CRBD // Store result bit in CRBD
int field = inst.CRBD >> 2; int field = inst.CRBD >> 2;
@ -543,42 +543,37 @@ void JitArm64::crXXX(UGeckoInstruction inst)
// intending to. This can break actual games, so fix it up. // intending to. This can break actual games, so fix it up.
if (bit != CR_GT_BIT) if (bit != CR_GT_BIT)
{ {
ANDS(ZR, XB, XB); ARM64Reg WC = gpr.GetReg();
FixupBranch dont_clear_gt = B(CC_NEQ); ARM64Reg XC = EncodeRegTo64(WC);
ORR(XB, XB, 63, 0); // XA | 1<<63 ORR(XC, XB, 64 - 63, 0, true); // XB | 1<<63
SetJumpTarget(dont_clear_gt); CMP(XB, ZR);
CSEL(XB, XB, XC, CC_NEQ);
gpr.Unlock(WC);
} }
switch (bit) switch (bit)
{ {
case CR_SO_BIT: // set bit 61 to input case CR_SO_BIT: // set bit 61 to input
AND(XB, XB, 61, 62); // XB & ~(1<<61) BFI(XB, XA, 61, 1);
LSL(XA, XA, 61);
ORR(XB, XB, XA);
break; break;
case CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input case CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input
LSR(XB, XB, 32); AND(XB, XB, 32, 31, true); // Clear lower 32bits
LSL(XB, XB, 32); EOR(XA, XA, 0, 0); // XA ^ 1<<0
EOR(XA, XA, 1, 0); // XA ^ 1<<0
ORR(XB, XB, XA); ORR(XB, XB, XA);
break; break;
case CR_GT_BIT: // set bit 63 to !input case CR_GT_BIT: // set bit 63 to !input
AND(XB, XB, 63, 62); // XB & ~(1<<63) EOR(XA, XA, 0, 0); // XA ^ 1<<0
NEG(XA, XA); BFI(XB, XA, 63, 1);
LSL(XA, XA, 63);
ORR(XB, XB, XA);
break; break;
case CR_LT_BIT: // set bit 62 to input case CR_LT_BIT: // set bit 62 to input
AND(XB, XB, 62, 62); // XB & ~(1<<62) BFI(XB, XA, 62, 1);
LSL(XA, XA, 62);
ORR(XB, XB, XA);
break; break;
} }
ORR(XB, XB, 32, 0); // XB | 1<<32 ORR(XA, XA, 32, 0, true); // XA | 1<<32
STR(INDEX_UNSIGNED, XB, X29, PPCSTATE_OFF(cr_val) + 8 * field); STR(INDEX_UNSIGNED, XB, X29, PPCSTATE_OFF(cr_val) + 8 * field);
gpr.Unlock(WA); gpr.Unlock(WA);