Fixing left and right shifts.

2014-09-10 23:07:03 -07:00 · 2014-09-10 23:07:03 -07:00 · 9690525abc
parent e50a45d33d
commit 9690525abc
2 changed files with 126 additions and 38 deletions
--- a/src/alloy/backend/x64/x64_sequences.cc
+++ b/src/alloy/backend/x64/x64_sequences.cc
@ -1776,8 +1776,64 @@ EMITTER_OPCODE_TABLE(
 // ============================================================================
 // OPCODE_MIN
 // ============================================================================
 EMITTER(MIN_I8, MATCH(I<OPCODE_MIN, I8<>, I8<>, I8<>>)) {
  static void Emit(X64Emitter & e, const EmitArgType& i) {
    EmitCommutativeBinaryOp(e, i,
      [](X64Emitter& e, const Reg8& dest_src, const Reg8& src) {
        e.cmp(dest_src, src);
        e.cmovg(dest_src.cvt32(), src.cvt32());
      },
      [](X64Emitter& e, const Reg8& dest_src, int32_t constant) {
        e.mov(e.al, constant);
        e.cmp(dest_src, e.al);
        e.cmovg(dest_src.cvt32(), e.eax);
      });
  }
 };
 EMITTER(MIN_I16, MATCH(I<OPCODE_MIN, I16<>, I16<>, I16<>>)) {
  static void Emit(X64Emitter & e, const EmitArgType& i) {
    EmitCommutativeBinaryOp(e, i,
      [](X64Emitter& e, const Reg16& dest_src, const Reg16& src) {
        e.cmp(dest_src, src);
        e.cmovg(dest_src.cvt32(), src.cvt32());
      },
      [](X64Emitter& e, const Reg16& dest_src, int32_t constant) {
        e.mov(e.ax, constant);
        e.cmp(dest_src, e.ax);
        e.cmovg(dest_src.cvt32(), e.eax);
      });
  }
 };
 EMITTER(MIN_I32, MATCH(I<OPCODE_MIN, I32<>, I32<>, I32<>>)) {
  static void Emit(X64Emitter & e, const EmitArgType& i) {
    EmitCommutativeBinaryOp(e, i,
      [](X64Emitter& e, const Reg32& dest_src, const Reg32& src) {
        e.cmp(dest_src, src);
        e.cmovg(dest_src, src);
      },
      [](X64Emitter& e, const Reg32& dest_src, int32_t constant) {
        e.mov(e.eax, constant);
        e.cmp(dest_src, e.eax);
        e.cmovg(dest_src, e.eax);
      });
  }
 };
 EMITTER(MIN_I64, MATCH(I<OPCODE_MIN, I64<>, I64<>, I64<>>)) {
  static void Emit(X64Emitter & e, const EmitArgType& i) {
    EmitCommutativeBinaryOp(e, i,
      [](X64Emitter& e, const Reg64& dest_src, const Reg64& src) {
        e.cmp(dest_src, src);
        e.cmovg(dest_src, src);
      },
      [](X64Emitter& e, const Reg64& dest_src, int64_t constant) {
        e.mov(e.rax, constant);
        e.cmp(dest_src, e.rax);
        e.cmovg(dest_src, e.rax);
      });
  }
 };
 EMITTER(MIN_F32, MATCH(I<OPCODE_MIN, F32<>, F32<>, F32<>>)) {
-  static void Emit(X64Emitter& e, const EmitArgType& i) {
+  static void Emit(X64Emitter & e, const EmitArgType& i) {
    EmitCommutativeBinaryXmmOp(e, i,
        [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
          e.vminss(dest, src1, src2);
@ -1802,6 +1858,10 @@ EMITTER(MIN_V128, MATCH(I<OPCODE_MIN, V128<>, V128<>, V128<>>)) {
 };
 EMITTER_OPCODE_TABLE(
    OPCODE_MIN,
    MIN_I8,
    MIN_I16,
    MIN_I32,
    MIN_I64,
    MIN_F32,
    MIN_F64,
    MIN_V128);
@ -1862,29 +1922,57 @@ EMITTER_OPCODE_TABLE(
 //     like SELECT(VECTOR_COMPARE_SGE(a, b), a, b)
 EMITTER(SELECT_I8, MATCH(I<OPCODE_SELECT, I8<>, I8<>, I8<>, I8<>>)) {
  static void Emit(X64Emitter& e, const EmitArgType& i) {
    Reg8 src2;
    if (i.src2.is_constant) {
      src2 = e.al;
      e.mov(src2, i.src2.constant());
    } else {
      src2 = i.src2;
    }
    e.test(i.src1, i.src1);
-    e.cmovnz(i.dest.reg().cvt32(), i.src2.reg().cvt32());
+    e.cmovnz(i.dest.reg().cvt32(), src2.cvt32());
    e.cmovz(i.dest.reg().cvt32(), i.src3.reg().cvt32());
  }
 };
 EMITTER(SELECT_I16, MATCH(I<OPCODE_SELECT, I16<>, I8<>, I16<>, I16<>>)) {
  static void Emit(X64Emitter& e, const EmitArgType& i) {
    Reg16 src2;
    if (i.src2.is_constant) {
      src2 = e.ax;
      e.mov(src2, i.src2.constant());
    } else {
      src2 = i.src2;
    }
    e.test(i.src1, i.src1);
-    e.cmovnz(i.dest.reg().cvt32(), i.src2.reg().cvt32());
+    e.cmovnz(i.dest.reg().cvt32(), src2.cvt32());
    e.cmovz(i.dest.reg().cvt32(), i.src3.reg().cvt32());
  }
 };
 EMITTER(SELECT_I32, MATCH(I<OPCODE_SELECT, I32<>, I8<>, I32<>, I32<>>)) {
  static void Emit(X64Emitter& e, const EmitArgType& i) {
    Reg32 src2;
    if (i.src2.is_constant) {
      src2 = e.eax;
      e.mov(src2, i.src2.constant());
    } else {
      src2 = i.src2;
    }
    e.test(i.src1, i.src1);
-    e.cmovnz(i.dest, i.src2);
+    e.cmovnz(i.dest, src2);
    e.cmovz(i.dest, i.src3);
  }
 };
 EMITTER(SELECT_I64, MATCH(I<OPCODE_SELECT, I64<>, I8<>, I64<>, I64<>>)) {
  static void Emit(X64Emitter& e, const EmitArgType& i) {
    Reg64 src2;
    if (i.src2.is_constant) {
      src2 = e.rax;
      e.mov(src2, i.src2.constant());
    } else {
      src2 = i.src2;
    }
    e.test(i.src1, i.src1);
-    e.cmovnz(i.dest, i.src2);
+    e.cmovnz(i.dest, src2);
    e.cmovz(i.dest, i.src3);
  }
 };
--- a/src/alloy/frontend/ppc/ppc_emit_alu.cc
+++ b/src/alloy/frontend/ppc/ppc_emit_alu.cc
@ -989,14 +989,17 @@ XEEMITTER(rlwnmx, 0x5C000000, M)(PPCHIRBuilder& f, InstrData& i) {
 // Integer shift (A-7)
 XEEMITTER(sldx, 0x7C000036, X)(PPCHIRBuilder& f, InstrData& i) {
-  // n <- (RB)[59:63]
+  // n <- (RB)[58:63]
  // r <- ROTL64((RS), n)
-  // if (RB)[58] = 0 then
+  // if (RB)[57] = 0 then
  //   m <- MASK(0, 63-n)
  // else
  //   m <- i64.0
  // RA <- r & m
-  Value* v = f.Shl(f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RB));
+  Value* sh = f.And(f.Truncate(f.LoadGPR(i.X.RB), INT8_TYPE),
                    f.LoadConstant(int8_t(0x7F)));
  Value* v = f.Select(f.IsTrue(f.Shr(sh, 6)), f.LoadConstant(int64_t(0)),
                      f.Shl(f.LoadGPR(i.X.RT), sh));
  f.StoreGPR(i.X.RA, v);
  if (i.X.Rc) {
    f.UpdateCR(0, v);
@ -1012,8 +1015,10 @@ XEEMITTER(slwx, 0x7C000030, X)(PPCHIRBuilder& f, InstrData& i) {
  // else
  //   m <- i64.0
  // RA <- r & m
-  Value* v =
+  Value* sh = f.And(f.Truncate(f.LoadGPR(i.X.RB), INT8_TYPE),
-      f.Shl(f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE), f.LoadGPR(i.X.RB));
+                    f.LoadConstant(int8_t(0x3F)));
  Value* v = f.Select(f.IsTrue(f.Shr(sh, 5)), f.LoadConstant(int32_t(0)),
                      f.Shl(f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE), sh));
  v = f.ZeroExtend(v, INT64_TYPE);
  f.StoreGPR(i.X.RA, v);
  if (i.X.Rc) {
@ -1031,7 +1036,9 @@ XEEMITTER(srdx, 0x7C000436, X)(PPCHIRBuilder& f, InstrData& i) {
  //   m <- i64.0
  // RA <- r & m
  // TODO(benvanik): if >3F, zero out the result.
-  Value* v = f.Shr(f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RB));
+  Value* sh = f.Truncate(f.LoadGPR(i.X.RB), INT8_TYPE);
  Value* v = f.Select(f.IsTrue(f.And(sh, f.LoadConstant(int8_t(0x40)))),
                      f.LoadConstant(int64_t(0)), f.Shr(f.LoadGPR(i.X.RT), sh));
  f.StoreGPR(i.X.RA, v);
  if (i.X.Rc) {
    f.UpdateCR(0, v);
@ -1048,8 +1055,10 @@ XEEMITTER(srwx, 0x7C000430, X)(PPCHIRBuilder& f, InstrData& i) {
  //   m <- i64.0
  // RA <- r & m
  // TODO(benvanik): if >1F, zero out the result.
-  Value* v =
+  Value* sh = f.Truncate(f.LoadGPR(i.X.RB), INT8_TYPE);
-      f.Shr(f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE), f.LoadGPR(i.X.RB));
+  Value* v = f.Select(f.IsTrue(f.And(sh, f.LoadConstant(int8_t(0x20)))),
                      f.LoadConstant(int32_t(0)),
                      f.Shr(f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE), sh));
  v = f.ZeroExtend(v, INT64_TYPE);
  f.StoreGPR(i.X.RA, v);
  if (i.X.Rc) {
@ -1066,32 +1075,21 @@ XEEMITTER(sradx, 0x7C000634, X)(PPCHIRBuilder& f, InstrData& i) {
  // S ← rS[0]
  // rA <- (r & m) | (((64)S) & ¬ m)
  // XER[CA] <- S & ((r & ¬ m) ¦ 0)
  // if n == 0: rA <- rS, XER[CA] = 0
  // if n >= 64: rA <- 64 sign bits of rS, XER[CA] = sign bit of rS
-
+  Value* rt = f.LoadGPR(i.X.RT);
  Value* v = f.LoadGPR(i.X.RT);
  Value* sh = f.And(f.Truncate(f.LoadGPR(i.X.RB), INT8_TYPE),
-                    f.LoadConstant((int8_t)0x3F));
+                    f.LoadConstant(int8_t(0x7F)));
  Value* clamp_sh = f.Min(sh, f.LoadConstant(int8_t(0x3F)));
  Value* v = f.Sha(rt, clamp_sh);
  // CA is set if any bits are shifted out of the right and if the result
-  // is negative. Start tracking that here.
+  // is negative.
-  // TODO(benvanik): dynamically generate mask better than this.
+  Value* ca =
-  Value* ca_sh = f.Sub(f.LoadConstant((int8_t)63), sh);
+      f.And(f.IsTrue(f.Shr(rt, 63)), f.CompareNE(f.Shl(v, clamp_sh), rt));
  Value* ca = f.Shr(f.Shl(f.LoadConstant(0xFFFFFFFFFFFFFFFFull), ca_sh), ca_sh);
  ca = f.CompareNE(f.And(ca, v), f.LoadZero(INT64_TYPE));
  // Shift right.
  v = f.Sha(v, sh);
  // CA is set to 1 if the low-order 32 bits of (RS) contain a negative number
  // and any 1-bits are shifted out of position 63; otherwise CA is set to 0.
  // We already have ca set to indicate the pos 63 bit, now just and in sign.
  ca = f.And(ca, f.Truncate(f.Shr(v, 63), INT8_TYPE));
  f.StoreCA(ca);
  f.StoreGPR(i.X.RA, v);
  f.StoreGPR(i.X.RA, v);
  if (i.X.Rc) {
    f.UpdateCR(0, v);
  }
@ -1139,16 +1137,18 @@ XEEMITTER(srawx, 0x7C000630, X)(PPCHIRBuilder& f, InstrData& i) {
  // CA <- s & ((r&¬m)[32:63]≠0)
  // if n == 0: rA <- sign_extend(rS), XER[CA] = 0
  // if n >= 32: rA <- 64 sign bits of rS, XER[CA] = sign bit of lo_32(rS)
-  Value* v = f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE);
+  Value* rt = f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE);
-  Value* sh =
+  Value* sh = f.And(f.Truncate(f.LoadGPR(i.X.RB), INT8_TYPE),
-      f.And(f.Truncate(f.LoadGPR(i.X.RB), INT32_TYPE), f.LoadConstant(0x1F));
+                    f.LoadConstant(int8_t(0x3F)));
  Value* clamp_sh = f.Min(sh, f.LoadConstant(int8_t(0x1F)));
  Value* v = f.Sha(rt, f.Min(sh, clamp_sh));
  // CA is set if any bits are shifted out of the right and if the result
  // is negative.
  Value* mask = f.Not(f.Shl(f.LoadConstant(-1), sh));
  Value* ca =
-      f.And(f.Truncate(f.Shr(v, 31), INT8_TYPE), f.IsTrue(f.And(v, mask)));
+      f.And(f.IsTrue(f.Shr(rt, 31)), f.CompareNE(f.Shl(v, clamp_sh), rt));
  f.StoreCA(ca);
-  v = f.Sha(v, sh);
+
  v = f.SignExtend(v, INT64_TYPE);
  f.StoreGPR(i.X.RA, v);
  if (i.X.Rc) {