JitIL: Added a new IR instruction, MulHighUnsigned, which computes the upper 32-bit of the multiplication of two unsigned 32-bit integers. Rewrote mulhwux with MulHighUnsigned.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6145 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
e1d1a1eba0
commit
a3df65bd02
|
@ -617,6 +617,36 @@ InstLoc IRBuilder::FoldMul(InstLoc Op1, InstLoc Op2) {
|
||||||
return EmitBiOp(Mul, Op1, Op2);
|
return EmitBiOp(Mul, Op1, Op2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
InstLoc IRBuilder::FoldMulHighUnsigned(InstLoc Op1, InstLoc Op2) {
|
||||||
|
// (i0 * i1) >> 32
|
||||||
|
if (isImm(*Op1) && isImm(*Op2)) {
|
||||||
|
return EmitIntConst((u32)(((u64)GetImmValue(Op1) * (u64)GetImmValue(Op2)) >> 32));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isImm(*Op1) && !isImm(*Op2)) {
|
||||||
|
return FoldMulHighUnsigned(Op2, Op1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isImm(*Op2)) {
|
||||||
|
const unsigned imm = GetImmValue(Op2);
|
||||||
|
|
||||||
|
// (x * 0) >> 32 => 0
|
||||||
|
if (imm == 0) {
|
||||||
|
return EmitIntConst(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned i0 = 0; i0 < 30; ++i0) {
|
||||||
|
// (x * (1 << i0)) => x >> (32 - i0)
|
||||||
|
// One "shl" is faster than one "imul".
|
||||||
|
if (imm == (1U << i0)) {
|
||||||
|
return FoldShrl(Op1, EmitIntConst(32 - i0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return EmitBiOp(MulHighUnsigned, Op1, Op2);
|
||||||
|
}
|
||||||
|
|
||||||
InstLoc IRBuilder::FoldAnd(InstLoc Op1, InstLoc Op2) {
|
InstLoc IRBuilder::FoldAnd(InstLoc Op1, InstLoc Op2) {
|
||||||
simplifyCommutative(And, Op1, Op2);
|
simplifyCommutative(And, Op1, Op2);
|
||||||
|
|
||||||
|
@ -1001,6 +1031,7 @@ InstLoc IRBuilder::FoldBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, unsigned
|
||||||
case Add: return FoldAdd(Op1, Op2);
|
case Add: return FoldAdd(Op1, Op2);
|
||||||
case Sub: return FoldSub(Op1, Op2);
|
case Sub: return FoldSub(Op1, Op2);
|
||||||
case Mul: return FoldMul(Op1, Op2);
|
case Mul: return FoldMul(Op1, Op2);
|
||||||
|
case MulHighUnsigned: return FoldMulHighUnsigned(Op1, Op2);
|
||||||
case And: return FoldAnd(Op1, Op2);
|
case And: return FoldAnd(Op1, Op2);
|
||||||
case Or: return FoldOr(Op1, Op2);
|
case Or: return FoldOr(Op1, Op2);
|
||||||
case Xor: return FoldXor(Op1, Op2);
|
case Xor: return FoldXor(Op1, Op2);
|
||||||
|
|
|
@ -68,6 +68,7 @@ enum Opcode {
|
||||||
Or,
|
Or,
|
||||||
Xor,
|
Xor,
|
||||||
// Non-commutative integer operators
|
// Non-commutative integer operators
|
||||||
|
MulHighUnsigned,
|
||||||
Sub,
|
Sub,
|
||||||
Shl, // Note that shifts ignore bits above the bottom 5
|
Shl, // Note that shifts ignore bits above the bottom 5
|
||||||
Shrl,
|
Shrl,
|
||||||
|
@ -224,6 +225,7 @@ private:
|
||||||
InstLoc FoldAdd(InstLoc Op1, InstLoc Op2);
|
InstLoc FoldAdd(InstLoc Op1, InstLoc Op2);
|
||||||
InstLoc FoldSub(InstLoc Op1, InstLoc Op2);
|
InstLoc FoldSub(InstLoc Op1, InstLoc Op2);
|
||||||
InstLoc FoldMul(InstLoc Op1, InstLoc Op2);
|
InstLoc FoldMul(InstLoc Op1, InstLoc Op2);
|
||||||
|
InstLoc FoldMulHighUnsigned(InstLoc Op1, InstLoc Op2);
|
||||||
InstLoc FoldAnd(InstLoc Op1, InstLoc Op2);
|
InstLoc FoldAnd(InstLoc Op1, InstLoc Op2);
|
||||||
InstLoc FoldOr(InstLoc Op1, InstLoc Op2);
|
InstLoc FoldOr(InstLoc Op1, InstLoc Op2);
|
||||||
InstLoc FoldRol(InstLoc Op1, InstLoc Op2);
|
InstLoc FoldRol(InstLoc Op1, InstLoc Op2);
|
||||||
|
@ -306,6 +308,9 @@ public:
|
||||||
InstLoc EmitMul(InstLoc op1, InstLoc op2) {
|
InstLoc EmitMul(InstLoc op1, InstLoc op2) {
|
||||||
return FoldBiOp(Mul, op1, op2);
|
return FoldBiOp(Mul, op1, op2);
|
||||||
}
|
}
|
||||||
|
InstLoc EmitMulHighUnsigned(InstLoc op1, InstLoc op2) {
|
||||||
|
return FoldBiOp(MulHighUnsigned, op1, op2);
|
||||||
|
}
|
||||||
InstLoc EmitRol(InstLoc op1, InstLoc op2) {
|
InstLoc EmitRol(InstLoc op1, InstLoc op2) {
|
||||||
return FoldBiOp(Rol, op1, op2);
|
return FoldBiOp(Rol, op1, op2);
|
||||||
}
|
}
|
||||||
|
|
|
@ -803,6 +803,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
|
||||||
case Or:
|
case Or:
|
||||||
case Xor:
|
case Xor:
|
||||||
case Mul:
|
case Mul:
|
||||||
|
case MulHighUnsigned:
|
||||||
case Rol:
|
case Rol:
|
||||||
case Shl:
|
case Shl:
|
||||||
case Shrl:
|
case Shrl:
|
||||||
|
@ -1105,6 +1106,23 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
|
||||||
regNormalRegClear(RI, I);
|
regNormalRegClear(RI, I);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case MulHighUnsigned: {
|
||||||
|
if (!thisUsed) break;
|
||||||
|
regSpill(RI, EAX);
|
||||||
|
regSpill(RI, EDX);
|
||||||
|
X64Reg reg = regBinReg(RI, I);
|
||||||
|
if (isImm(*getOp2(I))) {
|
||||||
|
unsigned RHS = RI.Build->GetImmValue(getOp2(I));
|
||||||
|
Jit->MOV(32, R(EAX), Imm32(RHS));
|
||||||
|
} else {
|
||||||
|
Jit->MOV(32, R(EAX), regLocForInst(RI, getOp2(I)));
|
||||||
|
}
|
||||||
|
Jit->MUL(32, regLocForInst(RI, getOp1(I)));
|
||||||
|
Jit->MOV(32, R(reg), R(EDX));
|
||||||
|
RI.regs[reg] = I;
|
||||||
|
regNormalRegClear(RI, I);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case Rol: {
|
case Rol: {
|
||||||
if (!thisUsed) break;
|
if (!thisUsed) break;
|
||||||
regEmitShiftInst(RI, I, &JitIL::ROL);
|
regEmitShiftInst(RI, I, &JitIL::ROL);
|
||||||
|
|
|
@ -299,42 +299,12 @@ void JitIL::mulhwux(UGeckoInstruction inst)
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(Integer)
|
JITDISABLE(Integer)
|
||||||
|
|
||||||
// Compute upper 32-bit of (a * b) using Karatsuba algorithm
|
|
||||||
// Karatsuba algorithm reduces the number of multiplication 4 to 3
|
|
||||||
// d = a * b
|
|
||||||
// = {a1 * (1 << 16) + a0} * {b1 * (1 << 16) + b0};
|
|
||||||
// = d2 * (1 << 32) + d1 * (1 << 16) + d0
|
|
||||||
// where
|
|
||||||
// d2 = a1 * b1
|
|
||||||
// d0 = a0 * b0
|
|
||||||
// d1 = (a1 + a0) * (b1 * b0) - d2 - d0
|
|
||||||
// since
|
|
||||||
// d1 = (a1 + a0) * (b1 * b0) - d2 - d0
|
|
||||||
// = a1 * b1 + a1 * b0 + a0 * b1 + a0 * b0 - a1 * b1 - a0 * b0
|
|
||||||
// = a1 * b0 + a0 * b1
|
|
||||||
// The result of mulhwux is
|
|
||||||
// d2' = (((d0 >> 16) + d1) >> 16) + d2
|
|
||||||
//
|
|
||||||
// Though it is not so fast...
|
|
||||||
IREmitter::InstLoc a = ibuild.EmitLoadGReg(inst.RA);
|
IREmitter::InstLoc a = ibuild.EmitLoadGReg(inst.RA);
|
||||||
IREmitter::InstLoc a0 = ibuild.EmitAnd(a, ibuild.EmitIntConst(0xFFFF));
|
|
||||||
IREmitter::InstLoc a1 = ibuild.EmitShrl(a, ibuild.EmitIntConst(16));
|
|
||||||
IREmitter::InstLoc b = ibuild.EmitLoadGReg(inst.RB);
|
IREmitter::InstLoc b = ibuild.EmitLoadGReg(inst.RB);
|
||||||
IREmitter::InstLoc b0 = ibuild.EmitAnd(b, ibuild.EmitIntConst(0xFFFF));
|
IREmitter::InstLoc d = ibuild.EmitMulHighUnsigned(a, b);
|
||||||
IREmitter::InstLoc b1 = ibuild.EmitShrl(b, ibuild.EmitIntConst(16));
|
ibuild.EmitStoreGReg(d, inst.RD);
|
||||||
|
|
||||||
IREmitter::InstLoc d2 = ibuild.EmitMul(a1, b1);
|
|
||||||
IREmitter::InstLoc d0 = ibuild.EmitMul(a0, b0);
|
|
||||||
IREmitter::InstLoc d1 = ibuild.EmitMul(ibuild.EmitAdd(a1, a0), ibuild.EmitAdd(b1, b0));
|
|
||||||
d1 = ibuild.EmitSub(d1, d2);
|
|
||||||
d1 = ibuild.EmitSub(d1, d0);
|
|
||||||
|
|
||||||
d1 = ibuild.EmitAdd(d1, ibuild.EmitShrl(d0, ibuild.EmitIntConst(16)));
|
|
||||||
d2 = ibuild.EmitAdd(d2, ibuild.EmitShrl(d1, ibuild.EmitIntConst(16)));
|
|
||||||
|
|
||||||
ibuild.EmitStoreGReg(d2, inst.RD);
|
|
||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
ComputeRC(ibuild, d2);
|
ComputeRC(ibuild, d);
|
||||||
}
|
}
|
||||||
|
|
||||||
// skipped some of the special handling in here - if we get crashes, let the interpreter handle this op
|
// skipped some of the special handling in here - if we get crashes, let the interpreter handle this op
|
||||||
|
|
Loading…
Reference in New Issue