VECTOR_MIN and VECTOR_MAX instructions.

2014-08-04 18:54:06 -07:00 · 2014-08-04 18:54:06 -07:00 · f0e9fd92a0
parent e6275691cb
commit f0e9fd92a0
7 changed files with 348 additions and 33 deletions
--- a/src/alloy/backend/ivm/ivm_intcode.cc
+++ b/src/alloy/backend/ivm/ivm_intcode.cc
@ -9,6 +9,8 @@

 #include <alloy/backend/ivm/ivm_intcode.h>

+#include <algorithm>
+
 #include <poly/poly.h>
 #include <alloy/hir/label.h>
 #include <alloy/runtime/runtime.h>
@ -1636,6 +1638,77 @@ int Translate_MAX(TranslationContext& ctx, Instr* i) {
  return DispatchToC(ctx, i, fns[i->dest->type]);
 }

+uint32_t IntCode_VECTOR_MAX_I8_UNSIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 16; n++) {
+    dest.b16[n] = std::max(src1.b16[n], src2.b16[n]);
+  }
+  return IA_NEXT;
+}
+uint32_t IntCode_VECTOR_MAX_I16_UNSIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 8; n++) {
+    dest.s8[n] = std::max(src1.s8[n], src2.s8[n]);
+  }
+  return IA_NEXT;
+}
+uint32_t IntCode_VECTOR_MAX_I32_UNSIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 4; n++) {
+    dest.i4[n] = std::max(src1.i4[n], src2.i4[n]);
+  }
+  return IA_NEXT;
+}
+uint32_t IntCode_VECTOR_MAX_I8_SIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 16; n++) {
+    dest.b16[n] = std::max((int8_t)src1.b16[n], (int8_t)src2.b16[n]);
+  }
+  return IA_NEXT;
+}
+uint32_t IntCode_VECTOR_MAX_I16_SIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 8; n++) {
+    dest.s8[n] = std::max((int16_t)src1.s8[n], (int16_t)src2.s8[n]);
+  }
+  return IA_NEXT;
+}
+uint32_t IntCode_VECTOR_MAX_I32_SIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 4; n++) {
+    dest.i4[n] = std::max((int32_t)src1.i4[n], (int32_t)src2.i4[n]);
+  }
+  return IA_NEXT;
+}
+int Translate_VECTOR_MAX(TranslationContext& ctx, Instr* i) {
+  static IntCodeFn unsigned_fns[] = {
+      IntCode_VECTOR_MAX_I8_UNSIGNED, IntCode_VECTOR_MAX_I16_UNSIGNED,
+      IntCode_VECTOR_MAX_I32_UNSIGNED,
+  };
+  static IntCodeFn signed_fns[] = {
+      IntCode_VECTOR_MAX_I8_SIGNED, IntCode_VECTOR_MAX_I16_SIGNED,
+      IntCode_VECTOR_MAX_I32_SIGNED,
+  };
+  uint32_t part_type = i->flags >> 8;
+  if (i->flags & ARITHMETIC_UNSIGNED) {
+    return DispatchToC(ctx, i, unsigned_fns[part_type]);
+  } else {
+    return DispatchToC(ctx, i, signed_fns[part_type]);
+  }
+}
+
 uint32_t IntCode_MIN_I8_I8(IntCodeState& ics, const IntCode* i) {
  int8_t a = ics.rf[i->src1_reg].i8;
  int8_t b = ics.rf[i->src2_reg].i8;
@ -1688,6 +1761,77 @@ int Translate_MIN(TranslationContext& ctx, Instr* i) {
  return DispatchToC(ctx, i, fns[i->dest->type]);
 }

+uint32_t IntCode_VECTOR_MIN_I8_UNSIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 16; n++) {
+    dest.b16[n] = std::min(src1.b16[n], src2.b16[n]);
+  }
+  return IA_NEXT;
+}
+uint32_t IntCode_VECTOR_MIN_I16_UNSIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 8; n++) {
+    dest.s8[n] = std::min(src1.s8[n], src2.s8[n]);
+  }
+  return IA_NEXT;
+}
+uint32_t IntCode_VECTOR_MIN_I32_UNSIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 4; n++) {
+    dest.i4[n] = std::min(src1.i4[n], src2.i4[n]);
+  }
+  return IA_NEXT;
+}
+uint32_t IntCode_VECTOR_MIN_I8_SIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 16; n++) {
+    dest.b16[n] = std::min((int8_t)src1.b16[n], (int8_t)src2.b16[n]);
+  }
+  return IA_NEXT;
+}
+uint32_t IntCode_VECTOR_MIN_I16_SIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 8; n++) {
+    dest.s8[n] = std::min((int16_t)src1.s8[n], (int16_t)src2.s8[n]);
+  }
+  return IA_NEXT;
+}
+uint32_t IntCode_VECTOR_MIN_I32_SIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 4; n++) {
+    dest.i4[n] = std::min((int32_t)src1.i4[n], (int32_t)src2.i4[n]);
+  }
+  return IA_NEXT;
+}
+int Translate_VECTOR_MIN(TranslationContext& ctx, Instr* i) {
+  static IntCodeFn unsigned_fns[] = {
+      IntCode_VECTOR_MIN_I8_UNSIGNED, IntCode_VECTOR_MIN_I16_UNSIGNED,
+      IntCode_VECTOR_MIN_I32_UNSIGNED,
+  };
+  static IntCodeFn signed_fns[] = {
+      IntCode_VECTOR_MIN_I8_SIGNED, IntCode_VECTOR_MIN_I16_SIGNED,
+      IntCode_VECTOR_MIN_I32_SIGNED,
+  };
+  uint32_t part_type = i->flags >> 8;
+  if (i->flags & ARITHMETIC_UNSIGNED) {
+    return DispatchToC(ctx, i, unsigned_fns[part_type]);
+  } else {
+    return DispatchToC(ctx, i, signed_fns[part_type]);
+  }
+}
+
 uint32_t IntCode_SELECT_I8(IntCodeState& ics, const IntCode* i) {
  ics.rf[i->dest_reg].i8 =
      ics.rf[i->src1_reg].i8 ? ics.rf[i->src2_reg].i8 : ics.rf[i->src3_reg].i8;
@ -2174,13 +2318,13 @@ int Translate_DID_SATURATE(TranslationContext& ctx, Instr* i) {
  }                                                                    \
  return IA_NEXT;

-uint32_t IntCode_VECTOR_COMPARE_EQ_I8(IntCodeState& ics, const IntCode* i) {
+uint32_t IntCode_VECTOR_COMPARE_EQ_I8(IntCodeState& ics, const IntCode* i){
    VECTOR_COMPARER(uint8_t, b16, b16, 16, == )};
-uint32_t IntCode_VECTOR_COMPARE_EQ_I16(IntCodeState& ics, const IntCode* i) {
+uint32_t IntCode_VECTOR_COMPARE_EQ_I16(IntCodeState& ics, const IntCode* i){
    VECTOR_COMPARER(uint16_t, s8, s8, 8, == )};
-uint32_t IntCode_VECTOR_COMPARE_EQ_I32(IntCodeState& ics, const IntCode* i) {
+uint32_t IntCode_VECTOR_COMPARE_EQ_I32(IntCodeState& ics, const IntCode* i){
    VECTOR_COMPARER(uint32_t, i4, i4, 4, == )};
-uint32_t IntCode_VECTOR_COMPARE_EQ_F32(IntCodeState& ics, const IntCode* i) {
+uint32_t IntCode_VECTOR_COMPARE_EQ_F32(IntCodeState& ics, const IntCode* i){
    VECTOR_COMPARER(float, f4, i4, 4, == )};
 int Translate_VECTOR_COMPARE_EQ(TranslationContext& ctx, Instr* i) {
  static IntCodeFn fns[] = {
@ -2192,13 +2336,13 @@ int Translate_VECTOR_COMPARE_EQ(TranslationContext& ctx, Instr* i) {
  return DispatchToC(ctx, i, fns[i->flags]);
 }

-uint32_t IntCode_VECTOR_COMPARE_SGT_I8(IntCodeState& ics, const IntCode* i) {
+uint32_t IntCode_VECTOR_COMPARE_SGT_I8(IntCodeState& ics, const IntCode* i){
    VECTOR_COMPARER(int8_t, b16, b16, 16, > )};
-uint32_t IntCode_VECTOR_COMPARE_SGT_I16(IntCodeState& ics, const IntCode* i) {
+uint32_t IntCode_VECTOR_COMPARE_SGT_I16(IntCodeState& ics, const IntCode* i){
    VECTOR_COMPARER(int16_t, s8, s8, 8, > )};
-uint32_t IntCode_VECTOR_COMPARE_SGT_I32(IntCodeState& ics, const IntCode* i) {
+uint32_t IntCode_VECTOR_COMPARE_SGT_I32(IntCodeState& ics, const IntCode* i){
    VECTOR_COMPARER(int32_t, i4, i4, 4, > )};
-uint32_t IntCode_VECTOR_COMPARE_SGT_F32(IntCodeState& ics, const IntCode* i) {
+uint32_t IntCode_VECTOR_COMPARE_SGT_F32(IntCodeState& ics, const IntCode* i){
    VECTOR_COMPARER(float, f4, i4, 4, > )};
 int Translate_VECTOR_COMPARE_SGT(TranslationContext& ctx, Instr* i) {
  static IntCodeFn fns[] = {
@ -4041,7 +4185,8 @@ static const TranslateFn dispatch_table[] = {
    Translate_LOAD_CONTEXT,       Translate_STORE_CONTEXT,
    Translate_LOAD,               Translate_STORE,
    Translate_PREFETCH,           Translate_MAX,
-    Translate_MIN,                Translate_SELECT,
+    Translate_VECTOR_MAX,         Translate_MIN,
+    Translate_VECTOR_MIN,         Translate_SELECT,
    Translate_IS_TRUE,            Translate_IS_FALSE,
    Translate_COMPARE_EQ,         Translate_COMPARE_NE,
    Translate_COMPARE_SLT,        Translate_COMPARE_SLE,
--- a/src/alloy/backend/x64/x64_sequences.cc
+++ b/src/alloy/backend/x64/x64_sequences.cc
@ -1739,6 +1739,53 @@ EMITTER_OPCODE_TABLE(
    MAX_V128);


+// ============================================================================
+// OPCODE_VECTOR_MAX
+// ============================================================================
+EMITTER(VECTOR_MAX, MATCH(I<OPCODE_VECTOR_MAX, V128<>, V128<>, V128<>>)) {
+  static void Emit(X64Emitter& e, const EmitArgType& i) {
+    EmitCommutativeBinaryXmmOp(e, i,
+        [&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
+          uint32_t part_type = i.instr->flags >> 8;
+          if (i.instr->flags & ARITHMETIC_UNSIGNED) {
+            switch (part_type) {
+            case INT8_TYPE:
+              e.vpmaxub(dest, src1, src2);
+              break;
+            case INT16_TYPE:
+              e.vpmaxuw(dest, src1, src2);
+              break;
+            case INT32_TYPE:
+              e.vpmaxud(dest, src1, src2);
+              break;
+            default:
+              assert_unhandled_case(part_type);
+              break;
+            }
+          } else {
+            switch (part_type) {
+            case INT8_TYPE:
+              e.vpmaxsb(dest, src1, src2);
+              break;
+            case INT16_TYPE:
+              e.vpmaxsw(dest, src1, src2);
+              break;
+            case INT32_TYPE:
+              e.vpmaxsd(dest, src1, src2);
+              break;
+            default:
+              assert_unhandled_case(part_type);
+              break;
+            }
+          }
+        });
+  }
+};
+EMITTER_OPCODE_TABLE(
+    OPCODE_VECTOR_MAX,
+    VECTOR_MAX);
+
+
 // ============================================================================
 // OPCODE_MIN
 // ============================================================================
@ -1773,6 +1820,53 @@ EMITTER_OPCODE_TABLE(
    MIN_V128);


+// ============================================================================
+// OPCODE_VECTOR_MIN
+// ============================================================================
+EMITTER(VECTOR_MIN, MATCH(I<OPCODE_VECTOR_MIN, V128<>, V128<>, V128<>>)) {
+  static void Emit(X64Emitter& e, const EmitArgType& i) {
+    EmitCommutativeBinaryXmmOp(e, i,
+        [&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
+          uint32_t part_type = i.instr->flags >> 8;
+          if (i.instr->flags & ARITHMETIC_UNSIGNED) {
+            switch (part_type) {
+            case INT8_TYPE:
+              e.vpminub(dest, src1, src2);
+              break;
+            case INT16_TYPE:
+              e.vpminuw(dest, src1, src2);
+              break;
+            case INT32_TYPE:
+              e.vpminud(dest, src1, src2);
+              break;
+            default:
+              assert_unhandled_case(part_type);
+              break;
+            }
+          } else {
+            switch (part_type) {
+            case INT8_TYPE:
+              e.vpminsb(dest, src1, src2);
+              break;
+            case INT16_TYPE:
+              e.vpminsw(dest, src1, src2);
+              break;
+            case INT32_TYPE:
+              e.vpminsd(dest, src1, src2);
+              break;
+            default:
+              assert_unhandled_case(part_type);
+              break;
+            }
+          }
+        });
+  }
+};
+EMITTER_OPCODE_TABLE(
+    OPCODE_VECTOR_MIN,
+    VECTOR_MIN);
+
+
 // ============================================================================
 // OPCODE_SELECT
 // ============================================================================
@ -5042,7 +5136,9 @@ void RegisterSequences() {
  REGISTER_EMITTER_OPCODE_TABLE(OPCODE_STORE);
  REGISTER_EMITTER_OPCODE_TABLE(OPCODE_PREFETCH);
  REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MAX);
+  REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_MAX);
  REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MIN);
+  REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_MIN);
  REGISTER_EMITTER_OPCODE_TABLE(OPCODE_SELECT);
  REGISTER_EMITTER_OPCODE_TABLE(OPCODE_IS_TRUE);
  REGISTER_EMITTER_OPCODE_TABLE(OPCODE_IS_FALSE);
--- a/src/alloy/frontend/ppc/ppc_emit_altivec.cc
+++ b/src/alloy/frontend/ppc/ppc_emit_altivec.cc
@ -803,33 +803,48 @@ XEEMITTER(vmaxfp128, VX128(6, 640), VX128)(PPCHIRBuilder& f, InstrData& i) {
 }

 XEEMITTER(vmaxsb, 0x10000102, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- max((VA), (VB)) (signed int8)
+  Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }

 XEEMITTER(vmaxsh, 0x10000142, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- max((VA), (VB)) (signed int16)
+  Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }

 XEEMITTER(vmaxsw, 0x10000182, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- max((VA), (VB)) (signed int32)
+  Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }

 XEEMITTER(vmaxub, 0x10000002, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- max((VA), (VB)) (unsigned int8)
+  Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE,
+                         ARITHMETIC_UNSIGNED);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }

 XEEMITTER(vmaxuh, 0x10000042, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- max((VA), (VB)) (unsigned int16)
+  Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE,
+                         ARITHMETIC_UNSIGNED);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }

 XEEMITTER(vmaxuw, 0x10000082, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- max((VA), (VB)) (unsigned int32)
+  Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE,
+                         ARITHMETIC_UNSIGNED);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }

 XEEMITTER(vmhaddshs, 0x10000020, VXA)(PPCHIRBuilder& f, InstrData& i) {
@ -856,33 +871,48 @@ XEEMITTER(vminfp128, VX128(6, 704), VX128)(PPCHIRBuilder& f, InstrData& i) {
 }

 XEEMITTER(vminsb, 0x10000302, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- min((VA), (VB)) (signed int8)
+  Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }

 XEEMITTER(vminsh, 0x10000342, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- min((VA), (VB)) (signed int16)
+  Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }

 XEEMITTER(vminsw, 0x10000382, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- min((VA), (VB)) (signed int32)
+  Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }

 XEEMITTER(vminub, 0x10000202, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- min((VA), (VB)) (unsigned int8)
+  Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE,
+                         ARITHMETIC_UNSIGNED);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }

 XEEMITTER(vminuh, 0x10000242, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- min((VA), (VB)) (unsigned int16)
+  Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE,
+                         ARITHMETIC_UNSIGNED);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }

 XEEMITTER(vminuw, 0x10000282, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- min((VA), (VB)) (unsigned int32)
+  Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE,
+                         ARITHMETIC_UNSIGNED);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }

 XEEMITTER(vmladduhm, 0x10000022, VXA)(PPCHIRBuilder& f, InstrData& i) {
--- a/src/alloy/hir/hir_builder.cc
+++ b/src/alloy/hir/hir_builder.cc
@ -1035,6 +1035,19 @@ Value* HIRBuilder::Max(Value* value1, Value* value2) {
  return i->dest;
 }

+Value* HIRBuilder::VectorMax(Value* value1, Value* value2, TypeName part_type,
+                             uint32_t arithmetic_flags) {
+  ASSERT_TYPES_EQUAL(value1, value2);
+
+  uint16_t flags = arithmetic_flags | (part_type << 8);
+  Instr* i =
+      AppendInstr(OPCODE_VECTOR_MAX_info, flags, AllocValue(value1->type));
+  i->set_src1(value1);
+  i->set_src2(value2);
+  i->src3.value = NULL;
+  return i->dest;
+}
+
 Value* HIRBuilder::Min(Value* value1, Value* value2) {
  ASSERT_TYPES_EQUAL(value1, value2);

@ -1050,6 +1063,19 @@ Value* HIRBuilder::Min(Value* value1, Value* value2) {
  return i->dest;
 }

+Value* HIRBuilder::VectorMin(Value* value1, Value* value2, TypeName part_type,
+                             uint32_t arithmetic_flags) {
+  ASSERT_TYPES_EQUAL(value1, value2);
+
+  uint16_t flags = arithmetic_flags | (part_type << 8);
+  Instr* i =
+      AppendInstr(OPCODE_VECTOR_MIN_info, flags, AllocValue(value1->type));
+  i->set_src1(value1);
+  i->set_src2(value2);
+  i->src3.value = NULL;
+  return i->dest;
+}
+
 Value* HIRBuilder::Select(Value* cond, Value* value1, Value* value2) {
  assert_true(cond->type == INT8_TYPE);  // for now
  ASSERT_TYPES_EQUAL(value1, value2);
--- a/src/alloy/hir/hir_builder.h
+++ b/src/alloy/hir/hir_builder.h
@ -136,7 +136,11 @@ class HIRBuilder {
  void Prefetch(Value* address, size_t length, uint32_t prefetch_flags = 0);

  Value* Max(Value* value1, Value* value2);
+  Value* VectorMax(Value* value1, Value* value2, TypeName part_type,
+                   uint32_t arithmetic_flags = 0);
  Value* Min(Value* value1, Value* value2);
+  Value* VectorMin(Value* value1, Value* value2, TypeName part_type,
+                   uint32_t arithmetic_flags = 0);
  Value* Select(Value* cond, Value* value1, Value* value2);
  Value* IsTrue(Value* value);
  Value* IsFalse(Value* value);
--- a/src/alloy/hir/opcodes.h
+++ b/src/alloy/hir/opcodes.h
@ -112,7 +112,9 @@ enum Opcode {
  OPCODE_STORE,
  OPCODE_PREFETCH,
  OPCODE_MAX,
+  OPCODE_VECTOR_MAX,
  OPCODE_MIN,
+  OPCODE_VECTOR_MIN,
  OPCODE_SELECT,
  OPCODE_IS_TRUE,
  OPCODE_IS_FALSE,
--- a/src/alloy/hir/opcodes.inl
+++ b/src/alloy/hir/opcodes.inl
@ -236,12 +236,24 @@ DEFINE_OPCODE(
    OPCODE_SIG_V_V_V,
    0)

+DEFINE_OPCODE(
+    OPCODE_VECTOR_MAX,
+    "vector_max",
+    OPCODE_SIG_V_V_V,
+    0)
+
 DEFINE_OPCODE(
    OPCODE_MIN,
    "min",
    OPCODE_SIG_V_V_V,
    0)

+DEFINE_OPCODE(
+    OPCODE_VECTOR_MIN,
+    "vector_min",
+    OPCODE_SIG_V_V_V,
+    0)
+
 DEFINE_OPCODE(
    OPCODE_SELECT,
    "select",