From 2ca1ac337040a6dd9f0f1a18a7dbdc2aa027d21f Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 22 May 2018 10:25:45 -0400 Subject: [PATCH 1/3] Interpreter_Integer: Correct negative overflow handling for divw Previously, given cases such as 0x80000000 / 0xFFFFFFFF we'd incorrectly set the destination register value to zero. If the dividend is negative, then the destination should be set to -1 (0xFFFFFFFF), however if the dividend is positive, then the destination should be set to 0. Note that the 750CL documents state that: "If an attempt is made to perform either of the divisions -- 0x80000000 / -1 or / 0, then the contents of rD are undefined, as are the contents of the LT, GT, and EQ bits of the CR0 field (if Rc = 1). In this case, if OE = 1 then OV is set." So this is a particular behavior of the hardware itself. --- .../Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp index 5801b527c1..5b5d4450a6 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp @@ -501,18 +501,18 @@ void Interpreter::divwx(UGeckoInstruction inst) { const s32 a = rGPR[inst.RA]; const s32 b = rGPR[inst.RB]; - const bool overflow = b == 0 || ((u32)a == 0x80000000 && b == -1); + const bool overflow = b == 0 || (static_cast(a) == 0x80000000 && b == -1); if (overflow) { - if (((u32)a & 0x80000000) && b == 0) + if (a < 0) rGPR[inst.RD] = UINT32_MAX; else rGPR[inst.RD] = 0; } else { - rGPR[inst.RD] = (u32)(a / b); + rGPR[inst.RD] = static_cast(a / b); } if (inst.OE) From 5abe6c264a80b3c8c4b2cf1c9ac9d5e95bd61be0 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 22 May 2018 11:38:49 -0400 Subject: [PATCH 2/3] Jit64: Correct negative overflow handling for divw --- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index db14b184ac..4eb13c9344 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1223,7 +1223,8 @@ void Jit64::divwx(UGeckoInstruction inst) s32 i = gpr.R(a).SImm32(), j = gpr.R(b).SImm32(); if (j == 0 || (i == (s32)0x80000000 && j == -1)) { - gpr.SetImmediate32(d, (i >> 31) ^ j); + const u32 result = i < 0 ? 0xFFFFFFFF : 0x00000000; + gpr.SetImmediate32(d, result); if (inst.OE) GenerateConstantOverflow(true); } @@ -1241,38 +1242,37 @@ void Jit64::divwx(UGeckoInstruction inst) gpr.FlushLockX(EAX, EDX); gpr.BindToRegister(d, (d == a || d == b), true); MOV(32, R(EAX), gpr.R(a)); - CDQ(); gpr.BindToRegister(b, true, false); + TEST(32, gpr.R(b), gpr.R(b)); - FixupBranch not_div_by_zero = J_CC(CC_NZ); - MOV(32, gpr.R(d), R(EDX)); - if (inst.OE) - { - GenerateConstantOverflow(true); - } - FixupBranch end1 = J(); - SetJumpTarget(not_div_by_zero); - CMP(32, gpr.R(b), R(EDX)); - FixupBranch not_div_by_neg_one = J_CC(CC_NZ); + const FixupBranch overflow = J_CC(CC_E); + + CMP(32, R(EAX), Imm32(0x80000000)); + const FixupBranch normal_path1 = J_CC(CC_NE); + + CMP(32, gpr.R(b), Imm32(0xFFFFFFFF)); + const FixupBranch normal_path2 = J_CC(CC_NE); + + SetJumpTarget(overflow); + SAR(32, R(EAX), Imm8(31)); MOV(32, gpr.R(d), R(EAX)); - NEG(32, gpr.R(d)); - FixupBranch no_overflow = J_CC(CC_NO); - XOR(32, gpr.R(d), gpr.R(d)); if (inst.OE) { GenerateConstantOverflow(true); } - FixupBranch end2 = J(); - SetJumpTarget(not_div_by_neg_one); + const FixupBranch done = J(); + + SetJumpTarget(normal_path1); + SetJumpTarget(normal_path2); + + CDQ(); IDIV(32, gpr.R(b)); MOV(32, gpr.R(d), R(EAX)); - SetJumpTarget(no_overflow); if (inst.OE) { GenerateConstantOverflow(false); } - SetJumpTarget(end1); - SetJumpTarget(end2); + SetJumpTarget(done); } if (inst.Rc) ComputeRC(gpr.R(d)); From 6a4f12d7852594564039bf5ac6a53c49da61df4e Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 22 May 2018 12:32:20 -0400 Subject: [PATCH 3/3] JitArm64: Correct negative overflow handling for divw --- .../Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index a620e4b9ad..c1fa56a4a1 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -1164,17 +1164,17 @@ void JitArm64::divwx(UGeckoInstruction inst) { s32 imm_a = gpr.GetImm(a); s32 imm_b = gpr.GetImm(b); - s32 imm_d; - if (imm_b == 0 || ((u32)imm_a == 0x80000000 && imm_b == -1)) + u32 imm_d; + if (imm_b == 0 || (static_cast(imm_a) == 0x80000000 && imm_b == -1)) { - if (((u32)imm_a & 0x80000000) && imm_b == 0) - imm_d = -1; + if (imm_a < 0) + imm_d = 0xFFFFFFFF; else imm_d = 0; } else { - imm_d = (u32)(imm_a / imm_b); + imm_d = static_cast(imm_a / imm_b); } gpr.SetImmediate(d, imm_d); @@ -1217,9 +1217,7 @@ void JitArm64::divwx(UGeckoInstruction inst) SetJumpTarget(slow1); SetJumpTarget(slow2); - CMP(RB, 0); - CCMP(RA, 0, 0, CC_EQ); - CSETM(RD, CC_LT); + ASR(RD, RA, 31); SetJumpTarget(done);