vctsxs, vctuxs.

2014-01-09 21:20:03 -08:00 · 2014-01-09 21:20:03 -08:00 · 2980a30f30
parent 478781e0dc
commit 2980a30f30
5 changed files with 90 additions and 19 deletions
--- a/src/alloy/backend/ivm/ivm_intcode.cc
+++ b/src/alloy/backend/ivm/ivm_intcode.cc
@ -1114,7 +1114,7 @@ int Translate_ROUND(TranslationContext& ctx, Instr* i) {
  }
 }

-uint32_t IntCode_VECTOR_CONVERT_I2F(IntCodeState& ics, const IntCode* i) {
+uint32_t IntCode_VECTOR_CONVERT_I2F_S(IntCodeState& ics, const IntCode* i) {
  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
  vec128_t& dest = ics.rf[i->dest_reg].v128;
  dest.f4[0] = (float)(int32_t)src1.i4[0];
@ -1133,7 +1133,63 @@ uint32_t IntCode_VECTOR_CONVERT_I2F_U(IntCodeState& ics, const IntCode* i) {
  return IA_NEXT;
 }
 int Translate_VECTOR_CONVERT_I2F(TranslationContext& ctx, Instr* i) {
-  return DispatchToC(ctx, i, IntCode_VECTOR_CONVERT_I2F);
+  if (i->flags & ARITHMETIC_UNSIGNED) {
+    return DispatchToC(ctx, i, IntCode_VECTOR_CONVERT_I2F_U);
+  } else {
+    return DispatchToC(ctx, i, IntCode_VECTOR_CONVERT_I2F_S);
+  }
+}
+
+uint32_t IntCode_VECTOR_CONVERT_F2I(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  if (i->flags & ARITHMETIC_UNSIGNED) {
+    dest.i4[0] = (uint32_t)src1.f4[0];
+    dest.i4[1] = (uint32_t)src1.f4[1];
+    dest.i4[2] = (uint32_t)src1.f4[2];
+    dest.i4[3] = (uint32_t)src1.f4[3];
+  } else {
+    dest.i4[0] = (int32_t)src1.f4[0];
+    dest.i4[1] = (int32_t)src1.f4[1];
+    dest.i4[2] = (int32_t)src1.f4[2];
+    dest.i4[3] = (int32_t)src1.f4[3];
+  }
+  return IA_NEXT;
+}
+uint32_t IntCode_VECTOR_CONVERT_F2I_SAT(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  if (i->flags & ARITHMETIC_UNSIGNED) {
+    for (int n = 0; n < 4; n++) {
+      float src = src1.f4[n];
+      if (src < 0) {
+        dest.i4[n] = 0;
+      } else if (src > UINT_MAX) {
+        dest.i4[n] = UINT_MAX;
+      } else {
+        dest.i4[n] = (uint32_t)src;
+      }
+    }
+  } else {
+    for (int n = 0; n < 4; n++) {
+      float src = src1.f4[n];
+      if (src < INT_MIN) {
+        dest.i4[n] = INT_MIN;
+      } else if (src > INT_MAX) {
+        dest.i4[n] = INT_MAX;
+      } else {
+        dest.i4[n] = (int32_t)src;
+      }
+    }
+  }
+  return IA_NEXT;
+}
+int Translate_VECTOR_CONVERT_F2I(TranslationContext& ctx, Instr* i) {
+  if (i->flags & ARITHMETIC_SATURATE) {
+    return DispatchToC(ctx, i, IntCode_VECTOR_CONVERT_F2I_SAT);
+  } else {
+    return DispatchToC(ctx, i, IntCode_VECTOR_CONVERT_F2I);
+  }
 }

 static uint8_t __lvsl_table[17][16] = {
@ -3583,7 +3639,7 @@ static const TranslateFn dispatch_table[] = {
  Translate_CONVERT,
  Translate_ROUND,
  Translate_VECTOR_CONVERT_I2F,
-  TranslateInvalid, //Translate_VECTOR_CONVERT_F2I,
+  Translate_VECTOR_CONVERT_F2I,

  Translate_LOAD_VECTOR_SHL,
  Translate_LOAD_VECTOR_SHR,
--- a/src/alloy/frontend/ppc/ppc_emit_altivec.cc
+++ b/src/alloy/frontend/ppc/ppc_emit_altivec.cc
@ -536,24 +536,38 @@ XEEMITTER(vcuxwfp128,     VX128_3(6, 752),  VX128_3)(PPCHIRBuilder& f, InstrData
  return InstrEmit_vcfux_(f, VX128_3_VD128, VX128_3_VB128, VX128_3_IMM);
 }

-XEEMITTER(vcfpsxws128,    VX128_3(6, 560),  VX128_3)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+int InstrEmit_vctsxs_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb, uint32_t uimm) {
+  // (VD) <- int_sat(VB as signed * 2^uimm)
+  uimm = uimm ? (2 << (uimm - 1)) : 1;
+  Value* v = f.Mul(
+      f.LoadVR(vb),
+      f.Splat(f.LoadConstant((float)uimm), VEC128_TYPE));
+  v = f.VectorConvertF2I(v, ARITHMETIC_SATURATE);
+  f.StoreVR(vd, v);
+  return 0;
 }
-
-XEEMITTER(vcfpuxws128,    VX128_3(6, 624),  VX128_3)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
-}
-
 XEEMITTER(vctsxs,         0x100003CA, VX  )(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  return InstrEmit_vctsxs_(f, i.VX.VD, i.VX.VB, i.VX.VA);
+}
+XEEMITTER(vcfpsxws128,    VX128_3(6, 560),  VX128_3)(PPCHIRBuilder& f, InstrData& i) {
+  return InstrEmit_vctsxs_(f, VX128_3_VD128, VX128_3_VB128, VX128_3_IMM);
 }

+int InstrEmit_vctuxs_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb, uint32_t uimm) {
+  // (VD) <- int_sat(VB as unsigned * 2^uimm)
+  uimm = uimm ? (2 << (uimm - 1)) : 1;
+  Value* v = f.Mul(
+      f.LoadVR(vb),
+      f.Splat(f.LoadConstant((float)uimm), VEC128_TYPE));
+  v = f.VectorConvertF2I(v, ARITHMETIC_UNSIGNED | ARITHMETIC_SATURATE);
+  f.StoreVR(vd, v);
+  return 0;
+}
 XEEMITTER(vctuxs,         0x1000038A, VX  )(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  return InstrEmit_vctuxs_(f, i.VX.VD, i.VX.VB, i.VX.VA);
+}
+XEEMITTER(vcfpuxws128,    VX128_3(6, 624),  VX128_3)(PPCHIRBuilder& f, InstrData& i) {
+  return InstrEmit_vctuxs_(f, VX128_3_VD128, VX128_3_VB128, VX128_3_IMM);
 }

 int InstrEmit_vcmpbfp_(PPCHIRBuilder& f, InstrData& i, uint32_t vd, uint32_t va, uint32_t vb, uint32_t rc) {
--- a/src/alloy/hir/hir_builder.cc
+++ b/src/alloy/hir/hir_builder.cc
@ -744,11 +744,11 @@ Value* HIRBuilder::VectorConvertI2F(Value* value, uint32_t arithmetic_flags) {
  return i->dest;
 }

-Value* HIRBuilder::VectorConvertF2I(Value* value, RoundMode round_mode) {
+Value* HIRBuilder::VectorConvertF2I(Value* value, uint32_t arithmetic_flags) {
  ASSERT_VECTOR_TYPE(value);

  Instr* i = AppendInstr(
-      OPCODE_VECTOR_CONVERT_F2I_info, round_mode,
+      OPCODE_VECTOR_CONVERT_F2I_info, arithmetic_flags,
      AllocValue(value->type));
  i->set_src1(value);
  i->src2.value = i->src3.value = NULL;
--- a/src/alloy/hir/hir_builder.h
+++ b/src/alloy/hir/hir_builder.h
@ -99,7 +99,7 @@ public:
  //     It'd be nice if Convert() supported this, however then we'd need a
  //     VEC128_INT32_TYPE or something.
  Value* VectorConvertI2F(Value* value, uint32_t arithmetic_flags = 0);
-  Value* VectorConvertF2I(Value* value, RoundMode round_mode = ROUND_TO_ZERO);
+  Value* VectorConvertF2I(Value* value, uint32_t arithmetic_flags = 0);

  Value* LoadZero(TypeName type);
  Value* LoadConstant(int8_t value);
--- a/src/alloy/hir/opcodes.h
+++ b/src/alloy/hir/opcodes.h
@ -50,6 +50,7 @@ enum PrefetchFlags {
 enum ArithmeticFlags {
  ARITHMETIC_SET_CARRY = (1 << 1),
  ARITHMETIC_UNSIGNED = (1 << 2),
+  ARITHMETIC_SATURATE = (1 << 3),
 };
 enum Permutes {
  PERMUTE_XY_ZW = 0x00010405,