Fixing left and right shifts.

This commit is contained in:
Ben Vanik 2014-09-10 23:07:03 -07:00
parent e50a45d33d
commit 9690525abc
2 changed files with 126 additions and 38 deletions

View File

@ -1776,8 +1776,64 @@ EMITTER_OPCODE_TABLE(
// ============================================================================ // ============================================================================
// OPCODE_MIN // OPCODE_MIN
// ============================================================================ // ============================================================================
EMITTER(MIN_I8, MATCH(I<OPCODE_MIN, I8<>, I8<>, I8<>>)) {
static void Emit(X64Emitter & e, const EmitArgType& i) {
EmitCommutativeBinaryOp(e, i,
[](X64Emitter& e, const Reg8& dest_src, const Reg8& src) {
e.cmp(dest_src, src);
e.cmovg(dest_src.cvt32(), src.cvt32());
},
[](X64Emitter& e, const Reg8& dest_src, int32_t constant) {
e.mov(e.al, constant);
e.cmp(dest_src, e.al);
e.cmovg(dest_src.cvt32(), e.eax);
});
}
};
EMITTER(MIN_I16, MATCH(I<OPCODE_MIN, I16<>, I16<>, I16<>>)) {
static void Emit(X64Emitter & e, const EmitArgType& i) {
EmitCommutativeBinaryOp(e, i,
[](X64Emitter& e, const Reg16& dest_src, const Reg16& src) {
e.cmp(dest_src, src);
e.cmovg(dest_src.cvt32(), src.cvt32());
},
[](X64Emitter& e, const Reg16& dest_src, int32_t constant) {
e.mov(e.ax, constant);
e.cmp(dest_src, e.ax);
e.cmovg(dest_src.cvt32(), e.eax);
});
}
};
EMITTER(MIN_I32, MATCH(I<OPCODE_MIN, I32<>, I32<>, I32<>>)) {
static void Emit(X64Emitter & e, const EmitArgType& i) {
EmitCommutativeBinaryOp(e, i,
[](X64Emitter& e, const Reg32& dest_src, const Reg32& src) {
e.cmp(dest_src, src);
e.cmovg(dest_src, src);
},
[](X64Emitter& e, const Reg32& dest_src, int32_t constant) {
e.mov(e.eax, constant);
e.cmp(dest_src, e.eax);
e.cmovg(dest_src, e.eax);
});
}
};
EMITTER(MIN_I64, MATCH(I<OPCODE_MIN, I64<>, I64<>, I64<>>)) {
static void Emit(X64Emitter & e, const EmitArgType& i) {
EmitCommutativeBinaryOp(e, i,
[](X64Emitter& e, const Reg64& dest_src, const Reg64& src) {
e.cmp(dest_src, src);
e.cmovg(dest_src, src);
},
[](X64Emitter& e, const Reg64& dest_src, int64_t constant) {
e.mov(e.rax, constant);
e.cmp(dest_src, e.rax);
e.cmovg(dest_src, e.rax);
});
}
};
EMITTER(MIN_F32, MATCH(I<OPCODE_MIN, F32<>, F32<>, F32<>>)) { EMITTER(MIN_F32, MATCH(I<OPCODE_MIN, F32<>, F32<>, F32<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter & e, const EmitArgType& i) {
EmitCommutativeBinaryXmmOp(e, i, EmitCommutativeBinaryXmmOp(e, i,
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
e.vminss(dest, src1, src2); e.vminss(dest, src1, src2);
@ -1802,6 +1858,10 @@ EMITTER(MIN_V128, MATCH(I<OPCODE_MIN, V128<>, V128<>, V128<>>)) {
}; };
EMITTER_OPCODE_TABLE( EMITTER_OPCODE_TABLE(
OPCODE_MIN, OPCODE_MIN,
MIN_I8,
MIN_I16,
MIN_I32,
MIN_I64,
MIN_F32, MIN_F32,
MIN_F64, MIN_F64,
MIN_V128); MIN_V128);
@ -1862,29 +1922,57 @@ EMITTER_OPCODE_TABLE(
// like SELECT(VECTOR_COMPARE_SGE(a, b), a, b) // like SELECT(VECTOR_COMPARE_SGE(a, b), a, b)
EMITTER(SELECT_I8, MATCH(I<OPCODE_SELECT, I8<>, I8<>, I8<>, I8<>>)) { EMITTER(SELECT_I8, MATCH(I<OPCODE_SELECT, I8<>, I8<>, I8<>, I8<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
Reg8 src2;
if (i.src2.is_constant) {
src2 = e.al;
e.mov(src2, i.src2.constant());
} else {
src2 = i.src2;
}
e.test(i.src1, i.src1); e.test(i.src1, i.src1);
e.cmovnz(i.dest.reg().cvt32(), i.src2.reg().cvt32()); e.cmovnz(i.dest.reg().cvt32(), src2.cvt32());
e.cmovz(i.dest.reg().cvt32(), i.src3.reg().cvt32()); e.cmovz(i.dest.reg().cvt32(), i.src3.reg().cvt32());
} }
}; };
EMITTER(SELECT_I16, MATCH(I<OPCODE_SELECT, I16<>, I8<>, I16<>, I16<>>)) { EMITTER(SELECT_I16, MATCH(I<OPCODE_SELECT, I16<>, I8<>, I16<>, I16<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
Reg16 src2;
if (i.src2.is_constant) {
src2 = e.ax;
e.mov(src2, i.src2.constant());
} else {
src2 = i.src2;
}
e.test(i.src1, i.src1); e.test(i.src1, i.src1);
e.cmovnz(i.dest.reg().cvt32(), i.src2.reg().cvt32()); e.cmovnz(i.dest.reg().cvt32(), src2.cvt32());
e.cmovz(i.dest.reg().cvt32(), i.src3.reg().cvt32()); e.cmovz(i.dest.reg().cvt32(), i.src3.reg().cvt32());
} }
}; };
EMITTER(SELECT_I32, MATCH(I<OPCODE_SELECT, I32<>, I8<>, I32<>, I32<>>)) { EMITTER(SELECT_I32, MATCH(I<OPCODE_SELECT, I32<>, I8<>, I32<>, I32<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
Reg32 src2;
if (i.src2.is_constant) {
src2 = e.eax;
e.mov(src2, i.src2.constant());
} else {
src2 = i.src2;
}
e.test(i.src1, i.src1); e.test(i.src1, i.src1);
e.cmovnz(i.dest, i.src2); e.cmovnz(i.dest, src2);
e.cmovz(i.dest, i.src3); e.cmovz(i.dest, i.src3);
} }
}; };
EMITTER(SELECT_I64, MATCH(I<OPCODE_SELECT, I64<>, I8<>, I64<>, I64<>>)) { EMITTER(SELECT_I64, MATCH(I<OPCODE_SELECT, I64<>, I8<>, I64<>, I64<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
Reg64 src2;
if (i.src2.is_constant) {
src2 = e.rax;
e.mov(src2, i.src2.constant());
} else {
src2 = i.src2;
}
e.test(i.src1, i.src1); e.test(i.src1, i.src1);
e.cmovnz(i.dest, i.src2); e.cmovnz(i.dest, src2);
e.cmovz(i.dest, i.src3); e.cmovz(i.dest, i.src3);
} }
}; };

View File

@ -989,14 +989,17 @@ XEEMITTER(rlwnmx, 0x5C000000, M)(PPCHIRBuilder& f, InstrData& i) {
// Integer shift (A-7) // Integer shift (A-7)
XEEMITTER(sldx, 0x7C000036, X)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(sldx, 0x7C000036, X)(PPCHIRBuilder& f, InstrData& i) {
// n <- (RB)[59:63] // n <- (RB)[58:63]
// r <- ROTL64((RS), n) // r <- ROTL64((RS), n)
// if (RB)[58] = 0 then // if (RB)[57] = 0 then
// m <- MASK(0, 63-n) // m <- MASK(0, 63-n)
// else // else
// m <- i64.0 // m <- i64.0
// RA <- r & m // RA <- r & m
Value* v = f.Shl(f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RB)); Value* sh = f.And(f.Truncate(f.LoadGPR(i.X.RB), INT8_TYPE),
f.LoadConstant(int8_t(0x7F)));
Value* v = f.Select(f.IsTrue(f.Shr(sh, 6)), f.LoadConstant(int64_t(0)),
f.Shl(f.LoadGPR(i.X.RT), sh));
f.StoreGPR(i.X.RA, v); f.StoreGPR(i.X.RA, v);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, v); f.UpdateCR(0, v);
@ -1012,8 +1015,10 @@ XEEMITTER(slwx, 0x7C000030, X)(PPCHIRBuilder& f, InstrData& i) {
// else // else
// m <- i64.0 // m <- i64.0
// RA <- r & m // RA <- r & m
Value* v = Value* sh = f.And(f.Truncate(f.LoadGPR(i.X.RB), INT8_TYPE),
f.Shl(f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE), f.LoadGPR(i.X.RB)); f.LoadConstant(int8_t(0x3F)));
Value* v = f.Select(f.IsTrue(f.Shr(sh, 5)), f.LoadConstant(int32_t(0)),
f.Shl(f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE), sh));
v = f.ZeroExtend(v, INT64_TYPE); v = f.ZeroExtend(v, INT64_TYPE);
f.StoreGPR(i.X.RA, v); f.StoreGPR(i.X.RA, v);
if (i.X.Rc) { if (i.X.Rc) {
@ -1031,7 +1036,9 @@ XEEMITTER(srdx, 0x7C000436, X)(PPCHIRBuilder& f, InstrData& i) {
// m <- i64.0 // m <- i64.0
// RA <- r & m // RA <- r & m
// TODO(benvanik): if >3F, zero out the result. // TODO(benvanik): if >3F, zero out the result.
Value* v = f.Shr(f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RB)); Value* sh = f.Truncate(f.LoadGPR(i.X.RB), INT8_TYPE);
Value* v = f.Select(f.IsTrue(f.And(sh, f.LoadConstant(int8_t(0x40)))),
f.LoadConstant(int64_t(0)), f.Shr(f.LoadGPR(i.X.RT), sh));
f.StoreGPR(i.X.RA, v); f.StoreGPR(i.X.RA, v);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, v); f.UpdateCR(0, v);
@ -1048,8 +1055,10 @@ XEEMITTER(srwx, 0x7C000430, X)(PPCHIRBuilder& f, InstrData& i) {
// m <- i64.0 // m <- i64.0
// RA <- r & m // RA <- r & m
// TODO(benvanik): if >1F, zero out the result. // TODO(benvanik): if >1F, zero out the result.
Value* v = Value* sh = f.Truncate(f.LoadGPR(i.X.RB), INT8_TYPE);
f.Shr(f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE), f.LoadGPR(i.X.RB)); Value* v = f.Select(f.IsTrue(f.And(sh, f.LoadConstant(int8_t(0x20)))),
f.LoadConstant(int32_t(0)),
f.Shr(f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE), sh));
v = f.ZeroExtend(v, INT64_TYPE); v = f.ZeroExtend(v, INT64_TYPE);
f.StoreGPR(i.X.RA, v); f.StoreGPR(i.X.RA, v);
if (i.X.Rc) { if (i.X.Rc) {
@ -1066,32 +1075,21 @@ XEEMITTER(sradx, 0x7C000634, X)(PPCHIRBuilder& f, InstrData& i) {
// S ← rS[0] // S ← rS[0]
// rA <- (r & m) | (((64)S) & ¬ m) // rA <- (r & m) | (((64)S) & ¬ m)
// XER[CA] <- S & ((r & ¬ m) ¦ 0) // XER[CA] <- S & ((r & ¬ m) ¦ 0)
// if n == 0: rA <- rS, XER[CA] = 0 // if n == 0: rA <- rS, XER[CA] = 0
// if n >= 64: rA <- 64 sign bits of rS, XER[CA] = sign bit of rS // if n >= 64: rA <- 64 sign bits of rS, XER[CA] = sign bit of rS
Value* rt = f.LoadGPR(i.X.RT);
Value* v = f.LoadGPR(i.X.RT);
Value* sh = f.And(f.Truncate(f.LoadGPR(i.X.RB), INT8_TYPE), Value* sh = f.And(f.Truncate(f.LoadGPR(i.X.RB), INT8_TYPE),
f.LoadConstant((int8_t)0x3F)); f.LoadConstant(int8_t(0x7F)));
Value* clamp_sh = f.Min(sh, f.LoadConstant(int8_t(0x3F)));
Value* v = f.Sha(rt, clamp_sh);
// CA is set if any bits are shifted out of the right and if the result // CA is set if any bits are shifted out of the right and if the result
// is negative. Start tracking that here. // is negative.
// TODO(benvanik): dynamically generate mask better than this. Value* ca =
Value* ca_sh = f.Sub(f.LoadConstant((int8_t)63), sh); f.And(f.IsTrue(f.Shr(rt, 63)), f.CompareNE(f.Shl(v, clamp_sh), rt));
Value* ca = f.Shr(f.Shl(f.LoadConstant(0xFFFFFFFFFFFFFFFFull), ca_sh), ca_sh);
ca = f.CompareNE(f.And(ca, v), f.LoadZero(INT64_TYPE));
// Shift right.
v = f.Sha(v, sh);
// CA is set to 1 if the low-order 32 bits of (RS) contain a negative number
// and any 1-bits are shifted out of position 63; otherwise CA is set to 0.
// We already have ca set to indicate the pos 63 bit, now just and in sign.
ca = f.And(ca, f.Truncate(f.Shr(v, 63), INT8_TYPE));
f.StoreCA(ca); f.StoreCA(ca);
f.StoreGPR(i.X.RA, v);
f.StoreGPR(i.X.RA, v);
if (i.X.Rc) { if (i.X.Rc) {
f.UpdateCR(0, v); f.UpdateCR(0, v);
} }
@ -1139,16 +1137,18 @@ XEEMITTER(srawx, 0x7C000630, X)(PPCHIRBuilder& f, InstrData& i) {
// CA <- s & ((r&¬m)[32:63]≠0) // CA <- s & ((r&¬m)[32:63]≠0)
// if n == 0: rA <- sign_extend(rS), XER[CA] = 0 // if n == 0: rA <- sign_extend(rS), XER[CA] = 0
// if n >= 32: rA <- 64 sign bits of rS, XER[CA] = sign bit of lo_32(rS) // if n >= 32: rA <- 64 sign bits of rS, XER[CA] = sign bit of lo_32(rS)
Value* v = f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE); Value* rt = f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE);
Value* sh = Value* sh = f.And(f.Truncate(f.LoadGPR(i.X.RB), INT8_TYPE),
f.And(f.Truncate(f.LoadGPR(i.X.RB), INT32_TYPE), f.LoadConstant(0x1F)); f.LoadConstant(int8_t(0x3F)));
Value* clamp_sh = f.Min(sh, f.LoadConstant(int8_t(0x1F)));
Value* v = f.Sha(rt, f.Min(sh, clamp_sh));
// CA is set if any bits are shifted out of the right and if the result // CA is set if any bits are shifted out of the right and if the result
// is negative. // is negative.
Value* mask = f.Not(f.Shl(f.LoadConstant(-1), sh));
Value* ca = Value* ca =
f.And(f.Truncate(f.Shr(v, 31), INT8_TYPE), f.IsTrue(f.And(v, mask))); f.And(f.IsTrue(f.Shr(rt, 31)), f.CompareNE(f.Shl(v, clamp_sh), rt));
f.StoreCA(ca); f.StoreCA(ca);
v = f.Sha(v, sh);
v = f.SignExtend(v, INT64_TYPE); v = f.SignExtend(v, INT64_TYPE);
f.StoreGPR(i.X.RA, v); f.StoreGPR(i.X.RA, v);
if (i.X.Rc) { if (i.X.Rc) {