From f0e9fd92a0a9a3dfb8e7387a6c30d922f1697fcb Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Mon, 4 Aug 2014 18:54:06 -0700 Subject: [PATCH] VECTOR_MIN and VECTOR_MAX instructions. --- src/alloy/backend/ivm/ivm_intcode.cc | 163 +++++++++++++++++++-- src/alloy/backend/x64/x64_sequences.cc | 96 ++++++++++++ src/alloy/frontend/ppc/ppc_emit_altivec.cc | 78 +++++++--- src/alloy/hir/hir_builder.cc | 26 ++++ src/alloy/hir/hir_builder.h | 4 + src/alloy/hir/opcodes.h | 2 + src/alloy/hir/opcodes.inl | 12 ++ 7 files changed, 348 insertions(+), 33 deletions(-) diff --git a/src/alloy/backend/ivm/ivm_intcode.cc b/src/alloy/backend/ivm/ivm_intcode.cc index fdc4ed8d7..04a306b69 100644 --- a/src/alloy/backend/ivm/ivm_intcode.cc +++ b/src/alloy/backend/ivm/ivm_intcode.cc @@ -9,6 +9,8 @@ #include +#include + #include #include #include @@ -1636,6 +1638,77 @@ int Translate_MAX(TranslationContext& ctx, Instr* i) { return DispatchToC(ctx, i, fns[i->dest->type]); } +uint32_t IntCode_VECTOR_MAX_I8_UNSIGNED(IntCodeState& ics, const IntCode* i) { + const vec128_t& src1 = ics.rf[i->src1_reg].v128; + const vec128_t& src2 = ics.rf[i->src2_reg].v128; + vec128_t& dest = ics.rf[i->dest_reg].v128; + for (int n = 0; n < 16; n++) { + dest.b16[n] = std::max(src1.b16[n], src2.b16[n]); + } + return IA_NEXT; +} +uint32_t IntCode_VECTOR_MAX_I16_UNSIGNED(IntCodeState& ics, const IntCode* i) { + const vec128_t& src1 = ics.rf[i->src1_reg].v128; + const vec128_t& src2 = ics.rf[i->src2_reg].v128; + vec128_t& dest = ics.rf[i->dest_reg].v128; + for (int n = 0; n < 8; n++) { + dest.s8[n] = std::max(src1.s8[n], src2.s8[n]); + } + return IA_NEXT; +} +uint32_t IntCode_VECTOR_MAX_I32_UNSIGNED(IntCodeState& ics, const IntCode* i) { + const vec128_t& src1 = ics.rf[i->src1_reg].v128; + const vec128_t& src2 = ics.rf[i->src2_reg].v128; + vec128_t& dest = ics.rf[i->dest_reg].v128; + for (int n = 0; n < 4; n++) { + dest.i4[n] = std::max(src1.i4[n], src2.i4[n]); + } + return IA_NEXT; +} +uint32_t IntCode_VECTOR_MAX_I8_SIGNED(IntCodeState& ics, const IntCode* i) { + const vec128_t& src1 = ics.rf[i->src1_reg].v128; + const vec128_t& src2 = ics.rf[i->src2_reg].v128; + vec128_t& dest = ics.rf[i->dest_reg].v128; + for (int n = 0; n < 16; n++) { + dest.b16[n] = std::max((int8_t)src1.b16[n], (int8_t)src2.b16[n]); + } + return IA_NEXT; +} +uint32_t IntCode_VECTOR_MAX_I16_SIGNED(IntCodeState& ics, const IntCode* i) { + const vec128_t& src1 = ics.rf[i->src1_reg].v128; + const vec128_t& src2 = ics.rf[i->src2_reg].v128; + vec128_t& dest = ics.rf[i->dest_reg].v128; + for (int n = 0; n < 8; n++) { + dest.s8[n] = std::max((int16_t)src1.s8[n], (int16_t)src2.s8[n]); + } + return IA_NEXT; +} +uint32_t IntCode_VECTOR_MAX_I32_SIGNED(IntCodeState& ics, const IntCode* i) { + const vec128_t& src1 = ics.rf[i->src1_reg].v128; + const vec128_t& src2 = ics.rf[i->src2_reg].v128; + vec128_t& dest = ics.rf[i->dest_reg].v128; + for (int n = 0; n < 4; n++) { + dest.i4[n] = std::max((int32_t)src1.i4[n], (int32_t)src2.i4[n]); + } + return IA_NEXT; +} +int Translate_VECTOR_MAX(TranslationContext& ctx, Instr* i) { + static IntCodeFn unsigned_fns[] = { + IntCode_VECTOR_MAX_I8_UNSIGNED, IntCode_VECTOR_MAX_I16_UNSIGNED, + IntCode_VECTOR_MAX_I32_UNSIGNED, + }; + static IntCodeFn signed_fns[] = { + IntCode_VECTOR_MAX_I8_SIGNED, IntCode_VECTOR_MAX_I16_SIGNED, + IntCode_VECTOR_MAX_I32_SIGNED, + }; + uint32_t part_type = i->flags >> 8; + if (i->flags & ARITHMETIC_UNSIGNED) { + return DispatchToC(ctx, i, unsigned_fns[part_type]); + } else { + return DispatchToC(ctx, i, signed_fns[part_type]); + } +} + uint32_t IntCode_MIN_I8_I8(IntCodeState& ics, const IntCode* i) { int8_t a = ics.rf[i->src1_reg].i8; int8_t b = ics.rf[i->src2_reg].i8; @@ -1688,6 +1761,77 @@ int Translate_MIN(TranslationContext& ctx, Instr* i) { return DispatchToC(ctx, i, fns[i->dest->type]); } +uint32_t IntCode_VECTOR_MIN_I8_UNSIGNED(IntCodeState& ics, const IntCode* i) { + const vec128_t& src1 = ics.rf[i->src1_reg].v128; + const vec128_t& src2 = ics.rf[i->src2_reg].v128; + vec128_t& dest = ics.rf[i->dest_reg].v128; + for (int n = 0; n < 16; n++) { + dest.b16[n] = std::min(src1.b16[n], src2.b16[n]); + } + return IA_NEXT; +} +uint32_t IntCode_VECTOR_MIN_I16_UNSIGNED(IntCodeState& ics, const IntCode* i) { + const vec128_t& src1 = ics.rf[i->src1_reg].v128; + const vec128_t& src2 = ics.rf[i->src2_reg].v128; + vec128_t& dest = ics.rf[i->dest_reg].v128; + for (int n = 0; n < 8; n++) { + dest.s8[n] = std::min(src1.s8[n], src2.s8[n]); + } + return IA_NEXT; +} +uint32_t IntCode_VECTOR_MIN_I32_UNSIGNED(IntCodeState& ics, const IntCode* i) { + const vec128_t& src1 = ics.rf[i->src1_reg].v128; + const vec128_t& src2 = ics.rf[i->src2_reg].v128; + vec128_t& dest = ics.rf[i->dest_reg].v128; + for (int n = 0; n < 4; n++) { + dest.i4[n] = std::min(src1.i4[n], src2.i4[n]); + } + return IA_NEXT; +} +uint32_t IntCode_VECTOR_MIN_I8_SIGNED(IntCodeState& ics, const IntCode* i) { + const vec128_t& src1 = ics.rf[i->src1_reg].v128; + const vec128_t& src2 = ics.rf[i->src2_reg].v128; + vec128_t& dest = ics.rf[i->dest_reg].v128; + for (int n = 0; n < 16; n++) { + dest.b16[n] = std::min((int8_t)src1.b16[n], (int8_t)src2.b16[n]); + } + return IA_NEXT; +} +uint32_t IntCode_VECTOR_MIN_I16_SIGNED(IntCodeState& ics, const IntCode* i) { + const vec128_t& src1 = ics.rf[i->src1_reg].v128; + const vec128_t& src2 = ics.rf[i->src2_reg].v128; + vec128_t& dest = ics.rf[i->dest_reg].v128; + for (int n = 0; n < 8; n++) { + dest.s8[n] = std::min((int16_t)src1.s8[n], (int16_t)src2.s8[n]); + } + return IA_NEXT; +} +uint32_t IntCode_VECTOR_MIN_I32_SIGNED(IntCodeState& ics, const IntCode* i) { + const vec128_t& src1 = ics.rf[i->src1_reg].v128; + const vec128_t& src2 = ics.rf[i->src2_reg].v128; + vec128_t& dest = ics.rf[i->dest_reg].v128; + for (int n = 0; n < 4; n++) { + dest.i4[n] = std::min((int32_t)src1.i4[n], (int32_t)src2.i4[n]); + } + return IA_NEXT; +} +int Translate_VECTOR_MIN(TranslationContext& ctx, Instr* i) { + static IntCodeFn unsigned_fns[] = { + IntCode_VECTOR_MIN_I8_UNSIGNED, IntCode_VECTOR_MIN_I16_UNSIGNED, + IntCode_VECTOR_MIN_I32_UNSIGNED, + }; + static IntCodeFn signed_fns[] = { + IntCode_VECTOR_MIN_I8_SIGNED, IntCode_VECTOR_MIN_I16_SIGNED, + IntCode_VECTOR_MIN_I32_SIGNED, + }; + uint32_t part_type = i->flags >> 8; + if (i->flags & ARITHMETIC_UNSIGNED) { + return DispatchToC(ctx, i, unsigned_fns[part_type]); + } else { + return DispatchToC(ctx, i, signed_fns[part_type]); + } +} + uint32_t IntCode_SELECT_I8(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].i8 = ics.rf[i->src1_reg].i8 ? ics.rf[i->src2_reg].i8 : ics.rf[i->src3_reg].i8; @@ -2174,13 +2318,13 @@ int Translate_DID_SATURATE(TranslationContext& ctx, Instr* i) { } \ return IA_NEXT; -uint32_t IntCode_VECTOR_COMPARE_EQ_I8(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_VECTOR_COMPARE_EQ_I8(IntCodeState& ics, const IntCode* i){ VECTOR_COMPARER(uint8_t, b16, b16, 16, == )}; -uint32_t IntCode_VECTOR_COMPARE_EQ_I16(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_VECTOR_COMPARE_EQ_I16(IntCodeState& ics, const IntCode* i){ VECTOR_COMPARER(uint16_t, s8, s8, 8, == )}; -uint32_t IntCode_VECTOR_COMPARE_EQ_I32(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_VECTOR_COMPARE_EQ_I32(IntCodeState& ics, const IntCode* i){ VECTOR_COMPARER(uint32_t, i4, i4, 4, == )}; -uint32_t IntCode_VECTOR_COMPARE_EQ_F32(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_VECTOR_COMPARE_EQ_F32(IntCodeState& ics, const IntCode* i){ VECTOR_COMPARER(float, f4, i4, 4, == )}; int Translate_VECTOR_COMPARE_EQ(TranslationContext& ctx, Instr* i) { static IntCodeFn fns[] = { @@ -2192,13 +2336,13 @@ int Translate_VECTOR_COMPARE_EQ(TranslationContext& ctx, Instr* i) { return DispatchToC(ctx, i, fns[i->flags]); } -uint32_t IntCode_VECTOR_COMPARE_SGT_I8(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_VECTOR_COMPARE_SGT_I8(IntCodeState& ics, const IntCode* i){ VECTOR_COMPARER(int8_t, b16, b16, 16, > )}; -uint32_t IntCode_VECTOR_COMPARE_SGT_I16(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_VECTOR_COMPARE_SGT_I16(IntCodeState& ics, const IntCode* i){ VECTOR_COMPARER(int16_t, s8, s8, 8, > )}; -uint32_t IntCode_VECTOR_COMPARE_SGT_I32(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_VECTOR_COMPARE_SGT_I32(IntCodeState& ics, const IntCode* i){ VECTOR_COMPARER(int32_t, i4, i4, 4, > )}; -uint32_t IntCode_VECTOR_COMPARE_SGT_F32(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_VECTOR_COMPARE_SGT_F32(IntCodeState& ics, const IntCode* i){ VECTOR_COMPARER(float, f4, i4, 4, > )}; int Translate_VECTOR_COMPARE_SGT(TranslationContext& ctx, Instr* i) { static IntCodeFn fns[] = { @@ -4041,7 +4185,8 @@ static const TranslateFn dispatch_table[] = { Translate_LOAD_CONTEXT, Translate_STORE_CONTEXT, Translate_LOAD, Translate_STORE, Translate_PREFETCH, Translate_MAX, - Translate_MIN, Translate_SELECT, + Translate_VECTOR_MAX, Translate_MIN, + Translate_VECTOR_MIN, Translate_SELECT, Translate_IS_TRUE, Translate_IS_FALSE, Translate_COMPARE_EQ, Translate_COMPARE_NE, Translate_COMPARE_SLT, Translate_COMPARE_SLE, diff --git a/src/alloy/backend/x64/x64_sequences.cc b/src/alloy/backend/x64/x64_sequences.cc index 80e481a33..52d4df79c 100644 --- a/src/alloy/backend/x64/x64_sequences.cc +++ b/src/alloy/backend/x64/x64_sequences.cc @@ -1739,6 +1739,53 @@ EMITTER_OPCODE_TABLE( MAX_V128); +// ============================================================================ +// OPCODE_VECTOR_MAX +// ============================================================================ +EMITTER(VECTOR_MAX, MATCH(I, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitCommutativeBinaryXmmOp(e, i, + [&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + uint32_t part_type = i.instr->flags >> 8; + if (i.instr->flags & ARITHMETIC_UNSIGNED) { + switch (part_type) { + case INT8_TYPE: + e.vpmaxub(dest, src1, src2); + break; + case INT16_TYPE: + e.vpmaxuw(dest, src1, src2); + break; + case INT32_TYPE: + e.vpmaxud(dest, src1, src2); + break; + default: + assert_unhandled_case(part_type); + break; + } + } else { + switch (part_type) { + case INT8_TYPE: + e.vpmaxsb(dest, src1, src2); + break; + case INT16_TYPE: + e.vpmaxsw(dest, src1, src2); + break; + case INT32_TYPE: + e.vpmaxsd(dest, src1, src2); + break; + default: + assert_unhandled_case(part_type); + break; + } + } + }); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_VECTOR_MAX, + VECTOR_MAX); + + // ============================================================================ // OPCODE_MIN // ============================================================================ @@ -1773,6 +1820,53 @@ EMITTER_OPCODE_TABLE( MIN_V128); +// ============================================================================ +// OPCODE_VECTOR_MIN +// ============================================================================ +EMITTER(VECTOR_MIN, MATCH(I, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitCommutativeBinaryXmmOp(e, i, + [&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + uint32_t part_type = i.instr->flags >> 8; + if (i.instr->flags & ARITHMETIC_UNSIGNED) { + switch (part_type) { + case INT8_TYPE: + e.vpminub(dest, src1, src2); + break; + case INT16_TYPE: + e.vpminuw(dest, src1, src2); + break; + case INT32_TYPE: + e.vpminud(dest, src1, src2); + break; + default: + assert_unhandled_case(part_type); + break; + } + } else { + switch (part_type) { + case INT8_TYPE: + e.vpminsb(dest, src1, src2); + break; + case INT16_TYPE: + e.vpminsw(dest, src1, src2); + break; + case INT32_TYPE: + e.vpminsd(dest, src1, src2); + break; + default: + assert_unhandled_case(part_type); + break; + } + } + }); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_VECTOR_MIN, + VECTOR_MIN); + + // ============================================================================ // OPCODE_SELECT // ============================================================================ @@ -5042,7 +5136,9 @@ void RegisterSequences() { REGISTER_EMITTER_OPCODE_TABLE(OPCODE_STORE); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_PREFETCH); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MAX); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_MAX); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MIN); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_MIN); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_SELECT); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_IS_TRUE); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_IS_FALSE); diff --git a/src/alloy/frontend/ppc/ppc_emit_altivec.cc b/src/alloy/frontend/ppc/ppc_emit_altivec.cc index b52102555..c3915c258 100644 --- a/src/alloy/frontend/ppc/ppc_emit_altivec.cc +++ b/src/alloy/frontend/ppc/ppc_emit_altivec.cc @@ -803,33 +803,48 @@ XEEMITTER(vmaxfp128, VX128(6, 640), VX128)(PPCHIRBuilder& f, InstrData& i) { } XEEMITTER(vmaxsb, 0x10000102, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // (VD) <- max((VA), (VB)) (signed int8) + Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE); + f.StoreVR(i.VX.VD, v); + return 0; } XEEMITTER(vmaxsh, 0x10000142, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // (VD) <- max((VA), (VB)) (signed int16) + Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE); + f.StoreVR(i.VX.VD, v); + return 0; } XEEMITTER(vmaxsw, 0x10000182, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // (VD) <- max((VA), (VB)) (signed int32) + Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE); + f.StoreVR(i.VX.VD, v); + return 0; } XEEMITTER(vmaxub, 0x10000002, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // (VD) <- max((VA), (VB)) (unsigned int8) + Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE, + ARITHMETIC_UNSIGNED); + f.StoreVR(i.VX.VD, v); + return 0; } XEEMITTER(vmaxuh, 0x10000042, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // (VD) <- max((VA), (VB)) (unsigned int16) + Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE, + ARITHMETIC_UNSIGNED); + f.StoreVR(i.VX.VD, v); + return 0; } XEEMITTER(vmaxuw, 0x10000082, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // (VD) <- max((VA), (VB)) (unsigned int32) + Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE, + ARITHMETIC_UNSIGNED); + f.StoreVR(i.VX.VD, v); + return 0; } XEEMITTER(vmhaddshs, 0x10000020, VXA)(PPCHIRBuilder& f, InstrData& i) { @@ -856,33 +871,48 @@ XEEMITTER(vminfp128, VX128(6, 704), VX128)(PPCHIRBuilder& f, InstrData& i) { } XEEMITTER(vminsb, 0x10000302, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // (VD) <- min((VA), (VB)) (signed int8) + Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE); + f.StoreVR(i.VX.VD, v); + return 0; } XEEMITTER(vminsh, 0x10000342, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // (VD) <- min((VA), (VB)) (signed int16) + Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE); + f.StoreVR(i.VX.VD, v); + return 0; } XEEMITTER(vminsw, 0x10000382, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // (VD) <- min((VA), (VB)) (signed int32) + Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE); + f.StoreVR(i.VX.VD, v); + return 0; } XEEMITTER(vminub, 0x10000202, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // (VD) <- min((VA), (VB)) (unsigned int8) + Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE, + ARITHMETIC_UNSIGNED); + f.StoreVR(i.VX.VD, v); + return 0; } XEEMITTER(vminuh, 0x10000242, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // (VD) <- min((VA), (VB)) (unsigned int16) + Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE, + ARITHMETIC_UNSIGNED); + f.StoreVR(i.VX.VD, v); + return 0; } XEEMITTER(vminuw, 0x10000282, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // (VD) <- min((VA), (VB)) (unsigned int32) + Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE, + ARITHMETIC_UNSIGNED); + f.StoreVR(i.VX.VD, v); + return 0; } XEEMITTER(vmladduhm, 0x10000022, VXA)(PPCHIRBuilder& f, InstrData& i) { diff --git a/src/alloy/hir/hir_builder.cc b/src/alloy/hir/hir_builder.cc index 11d9b836a..acdb6b95a 100644 --- a/src/alloy/hir/hir_builder.cc +++ b/src/alloy/hir/hir_builder.cc @@ -1035,6 +1035,19 @@ Value* HIRBuilder::Max(Value* value1, Value* value2) { return i->dest; } +Value* HIRBuilder::VectorMax(Value* value1, Value* value2, TypeName part_type, + uint32_t arithmetic_flags) { + ASSERT_TYPES_EQUAL(value1, value2); + + uint16_t flags = arithmetic_flags | (part_type << 8); + Instr* i = + AppendInstr(OPCODE_VECTOR_MAX_info, flags, AllocValue(value1->type)); + i->set_src1(value1); + i->set_src2(value2); + i->src3.value = NULL; + return i->dest; +} + Value* HIRBuilder::Min(Value* value1, Value* value2) { ASSERT_TYPES_EQUAL(value1, value2); @@ -1050,6 +1063,19 @@ Value* HIRBuilder::Min(Value* value1, Value* value2) { return i->dest; } +Value* HIRBuilder::VectorMin(Value* value1, Value* value2, TypeName part_type, + uint32_t arithmetic_flags) { + ASSERT_TYPES_EQUAL(value1, value2); + + uint16_t flags = arithmetic_flags | (part_type << 8); + Instr* i = + AppendInstr(OPCODE_VECTOR_MIN_info, flags, AllocValue(value1->type)); + i->set_src1(value1); + i->set_src2(value2); + i->src3.value = NULL; + return i->dest; +} + Value* HIRBuilder::Select(Value* cond, Value* value1, Value* value2) { assert_true(cond->type == INT8_TYPE); // for now ASSERT_TYPES_EQUAL(value1, value2); diff --git a/src/alloy/hir/hir_builder.h b/src/alloy/hir/hir_builder.h index 4d6d53e7c..8bcd53c33 100644 --- a/src/alloy/hir/hir_builder.h +++ b/src/alloy/hir/hir_builder.h @@ -136,7 +136,11 @@ class HIRBuilder { void Prefetch(Value* address, size_t length, uint32_t prefetch_flags = 0); Value* Max(Value* value1, Value* value2); + Value* VectorMax(Value* value1, Value* value2, TypeName part_type, + uint32_t arithmetic_flags = 0); Value* Min(Value* value1, Value* value2); + Value* VectorMin(Value* value1, Value* value2, TypeName part_type, + uint32_t arithmetic_flags = 0); Value* Select(Value* cond, Value* value1, Value* value2); Value* IsTrue(Value* value); Value* IsFalse(Value* value); diff --git a/src/alloy/hir/opcodes.h b/src/alloy/hir/opcodes.h index 841d1f134..c163ca5d0 100644 --- a/src/alloy/hir/opcodes.h +++ b/src/alloy/hir/opcodes.h @@ -112,7 +112,9 @@ enum Opcode { OPCODE_STORE, OPCODE_PREFETCH, OPCODE_MAX, + OPCODE_VECTOR_MAX, OPCODE_MIN, + OPCODE_VECTOR_MIN, OPCODE_SELECT, OPCODE_IS_TRUE, OPCODE_IS_FALSE, diff --git a/src/alloy/hir/opcodes.inl b/src/alloy/hir/opcodes.inl index deb789675..b09ea29c7 100644 --- a/src/alloy/hir/opcodes.inl +++ b/src/alloy/hir/opcodes.inl @@ -236,12 +236,24 @@ DEFINE_OPCODE( OPCODE_SIG_V_V_V, 0) +DEFINE_OPCODE( + OPCODE_VECTOR_MAX, + "vector_max", + OPCODE_SIG_V_V_V, + 0) + DEFINE_OPCODE( OPCODE_MIN, "min", OPCODE_SIG_V_V_V, 0) +DEFINE_OPCODE( + OPCODE_VECTOR_MIN, + "vector_min", + OPCODE_SIG_V_V_V, + 0) + DEFINE_OPCODE( OPCODE_SELECT, "select",