From f0e9fd92a0a9a3dfb8e7387a6c30d922f1697fcb Mon Sep 17 00:00:00 2001
From: Ben Vanik <ben.vanik@gmail.com>
Date: Mon, 4 Aug 2014 18:54:06 -0700
Subject: [PATCH] VECTOR_MIN and VECTOR_MAX instructions.

---
 src/alloy/backend/ivm/ivm_intcode.cc       | 163 +++++++++++++++++++--
 src/alloy/backend/x64/x64_sequences.cc     |  96 ++++++++++++
 src/alloy/frontend/ppc/ppc_emit_altivec.cc |  78 +++++++---
 src/alloy/hir/hir_builder.cc               |  26 ++++
 src/alloy/hir/hir_builder.h                |   4 +
 src/alloy/hir/opcodes.h                    |   2 +
 src/alloy/hir/opcodes.inl                  |  12 ++
 7 files changed, 348 insertions(+), 33 deletions(-)
diff --git a/src/alloy/backend/ivm/ivm_intcode.cc b/src/alloy/backend/ivm/ivm_intcode.cc
index fdc4ed8d7..04a306b69 100644
--- a/src/alloy/backend/ivm/ivm_intcode.cc
+++ b/src/alloy/backend/ivm/ivm_intcode.cc
@@ -9,6 +9,8 @@
 
 #include <alloy/backend/ivm/ivm_intcode.h>
 
+#include <algorithm>
+
 #include <poly/poly.h>
 #include <alloy/hir/label.h>
 #include <alloy/runtime/runtime.h>
@@ -1636,6 +1638,77 @@ int Translate_MAX(TranslationContext& ctx, Instr* i) {
   return DispatchToC(ctx, i, fns[i->dest->type]);
 }
 
+uint32_t IntCode_VECTOR_MAX_I8_UNSIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 16; n++) {
+    dest.b16[n] = std::max(src1.b16[n], src2.b16[n]);
+  }
+  return IA_NEXT;
+}
+uint32_t IntCode_VECTOR_MAX_I16_UNSIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 8; n++) {
+    dest.s8[n] = std::max(src1.s8[n], src2.s8[n]);
+  }
+  return IA_NEXT;
+}
+uint32_t IntCode_VECTOR_MAX_I32_UNSIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 4; n++) {
+    dest.i4[n] = std::max(src1.i4[n], src2.i4[n]);
+  }
+  return IA_NEXT;
+}
+uint32_t IntCode_VECTOR_MAX_I8_SIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 16; n++) {
+    dest.b16[n] = std::max((int8_t)src1.b16[n], (int8_t)src2.b16[n]);
+  }
+  return IA_NEXT;
+}
+uint32_t IntCode_VECTOR_MAX_I16_SIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 8; n++) {
+    dest.s8[n] = std::max((int16_t)src1.s8[n], (int16_t)src2.s8[n]);
+  }
+  return IA_NEXT;
+}
+uint32_t IntCode_VECTOR_MAX_I32_SIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 4; n++) {
+    dest.i4[n] = std::max((int32_t)src1.i4[n], (int32_t)src2.i4[n]);
+  }
+  return IA_NEXT;
+}
+int Translate_VECTOR_MAX(TranslationContext& ctx, Instr* i) {
+  static IntCodeFn unsigned_fns[] = {
+      IntCode_VECTOR_MAX_I8_UNSIGNED, IntCode_VECTOR_MAX_I16_UNSIGNED,
+      IntCode_VECTOR_MAX_I32_UNSIGNED,
+  };
+  static IntCodeFn signed_fns[] = {
+      IntCode_VECTOR_MAX_I8_SIGNED, IntCode_VECTOR_MAX_I16_SIGNED,
+      IntCode_VECTOR_MAX_I32_SIGNED,
+  };
+  uint32_t part_type = i->flags >> 8;
+  if (i->flags & ARITHMETIC_UNSIGNED) {
+    return DispatchToC(ctx, i, unsigned_fns[part_type]);
+  } else {
+    return DispatchToC(ctx, i, signed_fns[part_type]);
+  }
+}
+
 uint32_t IntCode_MIN_I8_I8(IntCodeState& ics, const IntCode* i) {
   int8_t a = ics.rf[i->src1_reg].i8;
   int8_t b = ics.rf[i->src2_reg].i8;
@@ -1688,6 +1761,77 @@ int Translate_MIN(TranslationContext& ctx, Instr* i) {
   return DispatchToC(ctx, i, fns[i->dest->type]);
 }
 
+uint32_t IntCode_VECTOR_MIN_I8_UNSIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 16; n++) {
+    dest.b16[n] = std::min(src1.b16[n], src2.b16[n]);
+  }
+  return IA_NEXT;
+}
+uint32_t IntCode_VECTOR_MIN_I16_UNSIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 8; n++) {
+    dest.s8[n] = std::min(src1.s8[n], src2.s8[n]);
+  }
+  return IA_NEXT;
+}
+uint32_t IntCode_VECTOR_MIN_I32_UNSIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 4; n++) {
+    dest.i4[n] = std::min(src1.i4[n], src2.i4[n]);
+  }
+  return IA_NEXT;
+}
+uint32_t IntCode_VECTOR_MIN_I8_SIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 16; n++) {
+    dest.b16[n] = std::min((int8_t)src1.b16[n], (int8_t)src2.b16[n]);
+  }
+  return IA_NEXT;
+}
+uint32_t IntCode_VECTOR_MIN_I16_SIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 8; n++) {
+    dest.s8[n] = std::min((int16_t)src1.s8[n], (int16_t)src2.s8[n]);
+  }
+  return IA_NEXT;
+}
+uint32_t IntCode_VECTOR_MIN_I32_SIGNED(IntCodeState& ics, const IntCode* i) {
+  const vec128_t& src1 = ics.rf[i->src1_reg].v128;
+  const vec128_t& src2 = ics.rf[i->src2_reg].v128;
+  vec128_t& dest = ics.rf[i->dest_reg].v128;
+  for (int n = 0; n < 4; n++) {
+    dest.i4[n] = std::min((int32_t)src1.i4[n], (int32_t)src2.i4[n]);
+  }
+  return IA_NEXT;
+}
+int Translate_VECTOR_MIN(TranslationContext& ctx, Instr* i) {
+  static IntCodeFn unsigned_fns[] = {
+      IntCode_VECTOR_MIN_I8_UNSIGNED, IntCode_VECTOR_MIN_I16_UNSIGNED,
+      IntCode_VECTOR_MIN_I32_UNSIGNED,
+  };
+  static IntCodeFn signed_fns[] = {
+      IntCode_VECTOR_MIN_I8_SIGNED, IntCode_VECTOR_MIN_I16_SIGNED,
+      IntCode_VECTOR_MIN_I32_SIGNED,
+  };
+  uint32_t part_type = i->flags >> 8;
+  if (i->flags & ARITHMETIC_UNSIGNED) {
+    return DispatchToC(ctx, i, unsigned_fns[part_type]);
+  } else {
+    return DispatchToC(ctx, i, signed_fns[part_type]);
+  }
+}
+
 uint32_t IntCode_SELECT_I8(IntCodeState& ics, const IntCode* i) {
   ics.rf[i->dest_reg].i8 =
       ics.rf[i->src1_reg].i8 ? ics.rf[i->src2_reg].i8 : ics.rf[i->src3_reg].i8;
@@ -2174,13 +2318,13 @@ int Translate_DID_SATURATE(TranslationContext& ctx, Instr* i) {
   }                                                                    \
   return IA_NEXT;
 
-uint32_t IntCode_VECTOR_COMPARE_EQ_I8(IntCodeState& ics, const IntCode* i) {
+uint32_t IntCode_VECTOR_COMPARE_EQ_I8(IntCodeState& ics, const IntCode* i){
     VECTOR_COMPARER(uint8_t, b16, b16, 16, == )};
-uint32_t IntCode_VECTOR_COMPARE_EQ_I16(IntCodeState& ics, const IntCode* i) {
+uint32_t IntCode_VECTOR_COMPARE_EQ_I16(IntCodeState& ics, const IntCode* i){
     VECTOR_COMPARER(uint16_t, s8, s8, 8, == )};
-uint32_t IntCode_VECTOR_COMPARE_EQ_I32(IntCodeState& ics, const IntCode* i) {
+uint32_t IntCode_VECTOR_COMPARE_EQ_I32(IntCodeState& ics, const IntCode* i){
     VECTOR_COMPARER(uint32_t, i4, i4, 4, == )};
-uint32_t IntCode_VECTOR_COMPARE_EQ_F32(IntCodeState& ics, const IntCode* i) {
+uint32_t IntCode_VECTOR_COMPARE_EQ_F32(IntCodeState& ics, const IntCode* i){
     VECTOR_COMPARER(float, f4, i4, 4, == )};
 int Translate_VECTOR_COMPARE_EQ(TranslationContext& ctx, Instr* i) {
   static IntCodeFn fns[] = {
@@ -2192,13 +2336,13 @@ int Translate_VECTOR_COMPARE_EQ(TranslationContext& ctx, Instr* i) {
   return DispatchToC(ctx, i, fns[i->flags]);
 }
 
-uint32_t IntCode_VECTOR_COMPARE_SGT_I8(IntCodeState& ics, const IntCode* i) {
+uint32_t IntCode_VECTOR_COMPARE_SGT_I8(IntCodeState& ics, const IntCode* i){
     VECTOR_COMPARER(int8_t, b16, b16, 16, > )};
-uint32_t IntCode_VECTOR_COMPARE_SGT_I16(IntCodeState& ics, const IntCode* i) {
+uint32_t IntCode_VECTOR_COMPARE_SGT_I16(IntCodeState& ics, const IntCode* i){
     VECTOR_COMPARER(int16_t, s8, s8, 8, > )};
-uint32_t IntCode_VECTOR_COMPARE_SGT_I32(IntCodeState& ics, const IntCode* i) {
+uint32_t IntCode_VECTOR_COMPARE_SGT_I32(IntCodeState& ics, const IntCode* i){
     VECTOR_COMPARER(int32_t, i4, i4, 4, > )};
-uint32_t IntCode_VECTOR_COMPARE_SGT_F32(IntCodeState& ics, const IntCode* i) {
+uint32_t IntCode_VECTOR_COMPARE_SGT_F32(IntCodeState& ics, const IntCode* i){
     VECTOR_COMPARER(float, f4, i4, 4, > )};
 int Translate_VECTOR_COMPARE_SGT(TranslationContext& ctx, Instr* i) {
   static IntCodeFn fns[] = {
@@ -4041,7 +4185,8 @@ static const TranslateFn dispatch_table[] = {
     Translate_LOAD_CONTEXT,       Translate_STORE_CONTEXT,
     Translate_LOAD,               Translate_STORE,
     Translate_PREFETCH,           Translate_MAX,
-    Translate_MIN,                Translate_SELECT,
+    Translate_VECTOR_MAX,         Translate_MIN,
+    Translate_VECTOR_MIN,         Translate_SELECT,
     Translate_IS_TRUE,            Translate_IS_FALSE,
     Translate_COMPARE_EQ,         Translate_COMPARE_NE,
     Translate_COMPARE_SLT,        Translate_COMPARE_SLE,
diff --git a/src/alloy/backend/x64/x64_sequences.cc b/src/alloy/backend/x64/x64_sequences.cc
index 80e481a33..52d4df79c 100644
--- a/src/alloy/backend/x64/x64_sequences.cc
+++ b/src/alloy/backend/x64/x64_sequences.cc
@@ -1739,6 +1739,53 @@ EMITTER_OPCODE_TABLE(
     MAX_V128);
 
 
+// ============================================================================
+// OPCODE_VECTOR_MAX
+// ============================================================================
+EMITTER(VECTOR_MAX, MATCH(I<OPCODE_VECTOR_MAX, V128<>, V128<>, V128<>>)) {
+  static void Emit(X64Emitter& e, const EmitArgType& i) {
+    EmitCommutativeBinaryXmmOp(e, i,
+        [&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
+          uint32_t part_type = i.instr->flags >> 8;
+          if (i.instr->flags & ARITHMETIC_UNSIGNED) {
+            switch (part_type) {
+            case INT8_TYPE:
+              e.vpmaxub(dest, src1, src2);
+              break;
+            case INT16_TYPE:
+              e.vpmaxuw(dest, src1, src2);
+              break;
+            case INT32_TYPE:
+              e.vpmaxud(dest, src1, src2);
+              break;
+            default:
+              assert_unhandled_case(part_type);
+              break;
+            }
+          } else {
+            switch (part_type) {
+            case INT8_TYPE:
+              e.vpmaxsb(dest, src1, src2);
+              break;
+            case INT16_TYPE:
+              e.vpmaxsw(dest, src1, src2);
+              break;
+            case INT32_TYPE:
+              e.vpmaxsd(dest, src1, src2);
+              break;
+            default:
+              assert_unhandled_case(part_type);
+              break;
+            }
+          }
+        });
+  }
+};
+EMITTER_OPCODE_TABLE(
+    OPCODE_VECTOR_MAX,
+    VECTOR_MAX);
+
+
 // ============================================================================
 // OPCODE_MIN
 // ============================================================================
@@ -1773,6 +1820,53 @@ EMITTER_OPCODE_TABLE(
     MIN_V128);
 
 
+// ============================================================================
+// OPCODE_VECTOR_MIN
+// ============================================================================
+EMITTER(VECTOR_MIN, MATCH(I<OPCODE_VECTOR_MIN, V128<>, V128<>, V128<>>)) {
+  static void Emit(X64Emitter& e, const EmitArgType& i) {
+    EmitCommutativeBinaryXmmOp(e, i,
+        [&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
+          uint32_t part_type = i.instr->flags >> 8;
+          if (i.instr->flags & ARITHMETIC_UNSIGNED) {
+            switch (part_type) {
+            case INT8_TYPE:
+              e.vpminub(dest, src1, src2);
+              break;
+            case INT16_TYPE:
+              e.vpminuw(dest, src1, src2);
+              break;
+            case INT32_TYPE:
+              e.vpminud(dest, src1, src2);
+              break;
+            default:
+              assert_unhandled_case(part_type);
+              break;
+            }
+          } else {
+            switch (part_type) {
+            case INT8_TYPE:
+              e.vpminsb(dest, src1, src2);
+              break;
+            case INT16_TYPE:
+              e.vpminsw(dest, src1, src2);
+              break;
+            case INT32_TYPE:
+              e.vpminsd(dest, src1, src2);
+              break;
+            default:
+              assert_unhandled_case(part_type);
+              break;
+            }
+          }
+        });
+  }
+};
+EMITTER_OPCODE_TABLE(
+    OPCODE_VECTOR_MIN,
+    VECTOR_MIN);
+
+
 // ============================================================================
 // OPCODE_SELECT
 // ============================================================================
@@ -5042,7 +5136,9 @@ void RegisterSequences() {
   REGISTER_EMITTER_OPCODE_TABLE(OPCODE_STORE);
   REGISTER_EMITTER_OPCODE_TABLE(OPCODE_PREFETCH);
   REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MAX);
+  REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_MAX);
   REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MIN);
+  REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_MIN);
   REGISTER_EMITTER_OPCODE_TABLE(OPCODE_SELECT);
   REGISTER_EMITTER_OPCODE_TABLE(OPCODE_IS_TRUE);
   REGISTER_EMITTER_OPCODE_TABLE(OPCODE_IS_FALSE);
diff --git a/src/alloy/frontend/ppc/ppc_emit_altivec.cc b/src/alloy/frontend/ppc/ppc_emit_altivec.cc
index b52102555..c3915c258 100644
--- a/src/alloy/frontend/ppc/ppc_emit_altivec.cc
+++ b/src/alloy/frontend/ppc/ppc_emit_altivec.cc
@@ -803,33 +803,48 @@ XEEMITTER(vmaxfp128, VX128(6, 640), VX128)(PPCHIRBuilder& f, InstrData& i) {
 }
 
 XEEMITTER(vmaxsb, 0x10000102, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- max((VA), (VB)) (signed int8)
+  Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }
 
 XEEMITTER(vmaxsh, 0x10000142, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- max((VA), (VB)) (signed int16)
+  Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }
 
 XEEMITTER(vmaxsw, 0x10000182, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- max((VA), (VB)) (signed int32)
+  Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }
 
 XEEMITTER(vmaxub, 0x10000002, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- max((VA), (VB)) (unsigned int8)
+  Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE,
+                         ARITHMETIC_UNSIGNED);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }
 
 XEEMITTER(vmaxuh, 0x10000042, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- max((VA), (VB)) (unsigned int16)
+  Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE,
+                         ARITHMETIC_UNSIGNED);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }
 
 XEEMITTER(vmaxuw, 0x10000082, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- max((VA), (VB)) (unsigned int32)
+  Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE,
+                         ARITHMETIC_UNSIGNED);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }
 
 XEEMITTER(vmhaddshs, 0x10000020, VXA)(PPCHIRBuilder& f, InstrData& i) {
@@ -856,33 +871,48 @@ XEEMITTER(vminfp128, VX128(6, 704), VX128)(PPCHIRBuilder& f, InstrData& i) {
 }
 
 XEEMITTER(vminsb, 0x10000302, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- min((VA), (VB)) (signed int8)
+  Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }
 
 XEEMITTER(vminsh, 0x10000342, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- min((VA), (VB)) (signed int16)
+  Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }
 
 XEEMITTER(vminsw, 0x10000382, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- min((VA), (VB)) (signed int32)
+  Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }
 
 XEEMITTER(vminub, 0x10000202, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- min((VA), (VB)) (unsigned int8)
+  Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE,
+                         ARITHMETIC_UNSIGNED);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }
 
 XEEMITTER(vminuh, 0x10000242, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- min((VA), (VB)) (unsigned int16)
+  Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE,
+                         ARITHMETIC_UNSIGNED);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }
 
 XEEMITTER(vminuw, 0x10000282, VX)(PPCHIRBuilder& f, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // (VD) <- min((VA), (VB)) (unsigned int32)
+  Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE,
+                         ARITHMETIC_UNSIGNED);
+  f.StoreVR(i.VX.VD, v);
+  return 0;
 }
 
 XEEMITTER(vmladduhm, 0x10000022, VXA)(PPCHIRBuilder& f, InstrData& i) {
diff --git a/src/alloy/hir/hir_builder.cc b/src/alloy/hir/hir_builder.cc
index 11d9b836a..acdb6b95a 100644
--- a/src/alloy/hir/hir_builder.cc
+++ b/src/alloy/hir/hir_builder.cc
@@ -1035,6 +1035,19 @@ Value* HIRBuilder::Max(Value* value1, Value* value2) {
   return i->dest;
 }
 
+Value* HIRBuilder::VectorMax(Value* value1, Value* value2, TypeName part_type,
+                             uint32_t arithmetic_flags) {
+  ASSERT_TYPES_EQUAL(value1, value2);
+
+  uint16_t flags = arithmetic_flags | (part_type << 8);
+  Instr* i =
+      AppendInstr(OPCODE_VECTOR_MAX_info, flags, AllocValue(value1->type));
+  i->set_src1(value1);
+  i->set_src2(value2);
+  i->src3.value = NULL;
+  return i->dest;
+}
+
 Value* HIRBuilder::Min(Value* value1, Value* value2) {
   ASSERT_TYPES_EQUAL(value1, value2);
 
@@ -1050,6 +1063,19 @@ Value* HIRBuilder::Min(Value* value1, Value* value2) {
   return i->dest;
 }
 
+Value* HIRBuilder::VectorMin(Value* value1, Value* value2, TypeName part_type,
+                             uint32_t arithmetic_flags) {
+  ASSERT_TYPES_EQUAL(value1, value2);
+
+  uint16_t flags = arithmetic_flags | (part_type << 8);
+  Instr* i =
+      AppendInstr(OPCODE_VECTOR_MIN_info, flags, AllocValue(value1->type));
+  i->set_src1(value1);
+  i->set_src2(value2);
+  i->src3.value = NULL;
+  return i->dest;
+}
+
 Value* HIRBuilder::Select(Value* cond, Value* value1, Value* value2) {
   assert_true(cond->type == INT8_TYPE);  // for now
   ASSERT_TYPES_EQUAL(value1, value2);
diff --git a/src/alloy/hir/hir_builder.h b/src/alloy/hir/hir_builder.h
index 4d6d53e7c..8bcd53c33 100644
--- a/src/alloy/hir/hir_builder.h
+++ b/src/alloy/hir/hir_builder.h
@@ -136,7 +136,11 @@ class HIRBuilder {
   void Prefetch(Value* address, size_t length, uint32_t prefetch_flags = 0);
 
   Value* Max(Value* value1, Value* value2);
+  Value* VectorMax(Value* value1, Value* value2, TypeName part_type,
+                   uint32_t arithmetic_flags = 0);
   Value* Min(Value* value1, Value* value2);
+  Value* VectorMin(Value* value1, Value* value2, TypeName part_type,
+                   uint32_t arithmetic_flags = 0);
   Value* Select(Value* cond, Value* value1, Value* value2);
   Value* IsTrue(Value* value);
   Value* IsFalse(Value* value);
diff --git a/src/alloy/hir/opcodes.h b/src/alloy/hir/opcodes.h
index 841d1f134..c163ca5d0 100644
--- a/src/alloy/hir/opcodes.h
+++ b/src/alloy/hir/opcodes.h
@@ -112,7 +112,9 @@ enum Opcode {
   OPCODE_STORE,
   OPCODE_PREFETCH,
   OPCODE_MAX,
+  OPCODE_VECTOR_MAX,
   OPCODE_MIN,
+  OPCODE_VECTOR_MIN,
   OPCODE_SELECT,
   OPCODE_IS_TRUE,
   OPCODE_IS_FALSE,
diff --git a/src/alloy/hir/opcodes.inl b/src/alloy/hir/opcodes.inl
index deb789675..b09ea29c7 100644
--- a/src/alloy/hir/opcodes.inl
+++ b/src/alloy/hir/opcodes.inl
@@ -236,12 +236,24 @@ DEFINE_OPCODE(
     OPCODE_SIG_V_V_V,
     0)
 
+DEFINE_OPCODE(
+    OPCODE_VECTOR_MAX,
+    "vector_max",
+    OPCODE_SIG_V_V_V,
+    0)
+
 DEFINE_OPCODE(
     OPCODE_MIN,
     "min",
     OPCODE_SIG_V_V_V,
     0)
 
+DEFINE_OPCODE(
+    OPCODE_VECTOR_MIN,
+    "vector_min",
+    OPCODE_SIG_V_V_V,
+    0)
+
 DEFINE_OPCODE(
     OPCODE_SELECT,
     "select",