diff --git a/src/ARMJIT_x64/ARMJIT_ALU.cpp b/src/ARMJIT_x64/ARMJIT_ALU.cpp index 14c223bb..43b94b63 100644 --- a/src/ARMJIT_x64/ARMJIT_ALU.cpp +++ b/src/ARMJIT_x64/ARMJIT_ALU.cpp @@ -301,10 +301,11 @@ void Compiler::A_Comp_MUL_MLA() Comp_MulOp(S, add, rd, rm, rs, rn); } -void Compiler::A_Comp_SMULL_SMLAL() +void Compiler::A_Comp_Mul_Long() { bool S = CurInstr.Instr & (1 << 20); bool add = CurInstr.Instr & (1 << 21); + bool sign = CurInstr.Instr & (1 << 22); OpArg rd = MapReg(CurInstr.A_Reg(16)); OpArg rm = MapReg(CurInstr.A_Reg(0)); OpArg rs = MapReg(CurInstr.A_Reg(8)); @@ -318,18 +319,34 @@ void Compiler::A_Comp_SMULL_SMLAL() MOV(32, R(RSCRATCH3), rs); TEST(32, R(RSCRATCH3), R(RSCRATCH3)); FixupBranch zeroBSR = J_CC(CC_Z); - BSR(32, RSCRATCH2, R(RSCRATCH3)); - NOT(32, R(RSCRATCH3)); - BSR(32, RSCRATCH, R(RSCRATCH3)); - CMP(32, R(RSCRATCH2), R(RSCRATCH)); - CMOVcc(32, RSCRATCH, R(RSCRATCH2), CC_L); + if (sign) + { + BSR(32, RSCRATCH2, R(RSCRATCH3)); + NOT(32, R(RSCRATCH3)); + BSR(32, RSCRATCH, R(RSCRATCH3)); + CMP(32, R(RSCRATCH2), R(RSCRATCH)); + CMOVcc(32, RSCRATCH, R(RSCRATCH2), CC_L); + } + else + { + BSR(32, RSCRATCH, R(RSCRATCH3)); + } + SHR(32, R(RSCRATCH), Imm8(3)); SetJumpTarget(zeroBSR); // fortunately that's even right Comp_AddCycles_CI(RSCRATCH, 2); } - MOVSX(64, 32, RSCRATCH2, rm); - MOVSX(64, 32, RSCRATCH3, rs); + if (sign) + { + MOVSX(64, 32, RSCRATCH2, rm); + MOVSX(64, 32, RSCRATCH3, rs); + } + else + { + MOV(32, R(RSCRATCH2), rm); + MOV(32, R(RSCRATCH3), rs); + } if (add) { MOV(32, R(RSCRATCH), rd); diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp index be3709e7..1b2d312a 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp @@ -300,7 +300,7 @@ const Compiler::CompileFunc A_Comp[ARMInstrInfo::ak_Count] = // CMN F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), // Mul - F(A_Comp_MUL_MLA), F(A_Comp_MUL_MLA), NULL, NULL, NULL, F(A_Comp_SMULL_SMLAL), NULL, NULL, NULL, NULL, NULL, + F(A_Comp_MUL_MLA), F(A_Comp_MUL_MLA), F(A_Comp_Mul_Long), F(A_Comp_Mul_Long), F(A_Comp_Mul_Long), F(A_Comp_Mul_Long), NULL, NULL, NULL, NULL, NULL, // ARMv5 stuff F(A_Comp_CLZ), NULL, NULL, NULL, NULL, // STR @@ -628,7 +628,7 @@ void Compiler::Comp_AddCycles_CI(Gen::X64Reg i, int add) } else { - ConstantCycles += i + cycles; + ConstantCycles += cycles; SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(i)); } } diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h index b428c33b..a448b6de 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.h +++ b/src/ARMJIT_x64/ARMJIT_Compiler.h @@ -89,7 +89,7 @@ public: void A_Comp_CmpOp(); void A_Comp_MUL_MLA(); - void A_Comp_SMULL_SMLAL(); + void A_Comp_Mul_Long(); void A_Comp_CLZ(); diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp index 4cafc1c9..7f6fa531 100644 --- a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp @@ -423,7 +423,8 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz if (flags & memop_SubtractOffset) { - MOV(32, R(finalAddr), rnMapped); + if (R(finalAddr) != rnMapped) + MOV(32, R(finalAddr), rnMapped); if (!offset.IsZero()) SUB(32, R(finalAddr), offset); }