diff --git a/src/alloy/backend/ivm/ivm_intcode.cc b/src/alloy/backend/ivm/ivm_intcode.cc index d69abd773..5e8d7c9c4 100644 --- a/src/alloy/backend/ivm/ivm_intcode.cc +++ b/src/alloy/backend/ivm/ivm_intcode.cc @@ -2735,6 +2735,12 @@ int Translate_SUB(TranslationContext& ctx, Instr* i) { return DispatchToC(ctx, i, fns[i->dest->type]); } +int Translate_VECTOR_SUB(TranslationContext& ctx, Instr* i) { + // TODO(benvanik): VECTOR_SUB in IVM. + assert_always(); + return 1; +} + uint32_t IntCode_MUL_I8_I8(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].i8 = ics.rf[i->src1_reg].i8 * ics.rf[i->src2_reg].i8; return IA_NEXT; @@ -4200,23 +4206,23 @@ static const TranslateFn dispatch_table[] = { Translate_VECTOR_COMPARE_UGT, Translate_VECTOR_COMPARE_UGE, Translate_ADD, Translate_ADD_CARRY, Translate_VECTOR_ADD, Translate_SUB, - Translate_MUL, Translate_MUL_HI, - Translate_DIV, Translate_MUL_ADD, - Translate_MUL_SUB, Translate_NEG, - Translate_ABS, Translate_SQRT, - Translate_RSQRT, Translate_POW2, - Translate_LOG2, Translate_DOT_PRODUCT_3, - Translate_DOT_PRODUCT_4, Translate_AND, - Translate_OR, Translate_XOR, - Translate_NOT, Translate_SHL, - Translate_VECTOR_SHL, Translate_SHR, - Translate_VECTOR_SHR, Translate_SHA, - Translate_VECTOR_SHA, Translate_ROTATE_LEFT, - Translate_BYTE_SWAP, Translate_CNTLZ, - Translate_INSERT, Translate_EXTRACT, - Translate_SPLAT, Translate_PERMUTE, - Translate_SWIZZLE, Translate_PACK, - Translate_UNPACK, + Translate_VECTOR_SUB, Translate_MUL, + Translate_MUL_HI, Translate_DIV, + Translate_MUL_ADD, Translate_MUL_SUB, + Translate_NEG, Translate_ABS, + Translate_SQRT, Translate_RSQRT, + Translate_POW2, Translate_LOG2, + Translate_DOT_PRODUCT_3, Translate_DOT_PRODUCT_4, + Translate_AND, Translate_OR, + Translate_XOR, Translate_NOT, + Translate_SHL, Translate_VECTOR_SHL, + Translate_SHR, Translate_VECTOR_SHR, + Translate_SHA, Translate_VECTOR_SHA, + Translate_ROTATE_LEFT, Translate_BYTE_SWAP, + Translate_CNTLZ, Translate_INSERT, + Translate_EXTRACT, Translate_SPLAT, + Translate_PERMUTE, Translate_SWIZZLE, + Translate_PACK, Translate_UNPACK, TranslateInvalid, // Translate_COMPARE_EXCHANGE, Translate_ATOMIC_EXCHANGE, TranslateInvalid, // Translate_ATOMIC_ADD, diff --git a/src/alloy/backend/x64/x64_sequences.cc b/src/alloy/backend/x64/x64_sequences.cc index fb6e80d18..aaa34ff2c 100644 --- a/src/alloy/backend/x64/x64_sequences.cc +++ b/src/alloy/backend/x64/x64_sequences.cc @@ -2854,6 +2854,66 @@ EMITTER_OPCODE_TABLE( SUB_V128); +// ============================================================================ +// OPCODE_VECTOR_SUB +// ============================================================================ +EMITTER(VECTOR_SUB, MATCH(I, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitCommutativeBinaryXmmOp(e, i, + [&i](X64Emitter& e, const Xmm& dest, const Xmm& src1, const Xmm& src2) { + const TypeName part_type = static_cast(i.instr->flags & 0xFF); + const uint32_t arithmetic_flags = i.instr->flags >> 8; + bool is_unsigned = !!(arithmetic_flags & ARITHMETIC_UNSIGNED); + bool saturate = !!(arithmetic_flags & ARITHMETIC_SATURATE); + switch (part_type) { + case INT8_TYPE: + if (saturate) { + // TODO(benvanik): trace DID_SATURATE + if (is_unsigned) { + e.vpsubusb(dest, src1, src2); + } else { + e.vpsubsb(dest, src1, src2); + } + } else { + e.vpsubb(dest, src1, src2); + } + break; + case INT16_TYPE: + if (saturate) { + // TODO(benvanik): trace DID_SATURATE + if (is_unsigned) { + e.vpsubusw(dest, src1, src2); + } else { + e.vpsubsw(dest, src1, src2); + } + } else { + e.vpsubw(dest, src1, src2); + } + break; + case INT32_TYPE: + if (saturate) { + if (is_unsigned) { + assert_always(); + } else { + assert_always(); + } + } else { + e.vpsubd(dest, src1, src2); + } + break; + case FLOAT32_TYPE: + e.vsubps(dest, src1, src2); + break; + default: assert_unhandled_case(part_type); break; + } + }); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_VECTOR_SUB, + VECTOR_SUB); + + // ============================================================================ // OPCODE_MUL // ============================================================================ @@ -5202,6 +5262,7 @@ void RegisterSequences() { REGISTER_EMITTER_OPCODE_TABLE(OPCODE_ADD_CARRY); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_ADD); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_SUB); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SUB); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MUL); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MUL_HI); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DIV); diff --git a/src/alloy/frontend/ppc/ppc_emit_altivec.cc b/src/alloy/frontend/ppc/ppc_emit_altivec.cc index c3915c258..46288ad80 100644 --- a/src/alloy/frontend/ppc/ppc_emit_altivec.cc +++ b/src/alloy/frontend/ppc/ppc_emit_altivec.cc @@ -1572,18 +1572,27 @@ XEEMITTER(vsubfp128, VX128(5, 80), VX128)(PPCHIRBuilder& f, InstrData& i) { } XEEMITTER(vsubsbs, 0x10000700, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // (VD) <- clamp(EXTS(VA) + ¬EXTS(VB) + 1, -128, 127) + Value* v = f.VectorSub(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE, + ARITHMETIC_SATURATE); + f.StoreVR(i.VX.VD, v); + return 0; } XEEMITTER(vsubshs, 0x10000740, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // (VD) <- clamp(EXTS(VA) + ¬EXTS(VB) + 1, -2^15, 2^15-1) + Value* v = f.VectorSub(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE, + ARITHMETIC_SATURATE); + f.StoreVR(i.VX.VD, v); + return 0; } XEEMITTER(vsubsws, 0x10000780, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // (VD) <- clamp(EXTS(VA) + ¬EXTS(VB) + 1, -2^31, 2^31-1) + Value* v = f.VectorSub(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE, + ARITHMETIC_SATURATE); + f.StoreVR(i.VX.VD, v); + return 0; } XEEMITTER(vsububm, 0x10000400, VX)(PPCHIRBuilder& f, InstrData& i) { @@ -1591,29 +1600,38 @@ XEEMITTER(vsububm, 0x10000400, VX)(PPCHIRBuilder& f, InstrData& i) { return 1; } -XEEMITTER(vsububs, 0x10000600, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; -} - XEEMITTER(vsubuhm, 0x10000440, VX)(PPCHIRBuilder& f, InstrData& i) { XEINSTRNOTIMPLEMENTED(); return 1; } -XEEMITTER(vsubuhs, 0x10000640, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; -} - XEEMITTER(vsubuwm, 0x10000480, VX)(PPCHIRBuilder& f, InstrData& i) { XEINSTRNOTIMPLEMENTED(); return 1; } +XEEMITTER(vsububs, 0x10000600, VX)(PPCHIRBuilder& f, InstrData& i) { + // (VD) <- clamp(EXTZ(VA) + ¬EXTZ(VB) + 1, 0, 256) + Value* v = f.VectorSub(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE, + ARITHMETIC_SATURATE | ARITHMETIC_UNSIGNED); + f.StoreVR(i.VX.VD, v); + return 0; +} + +XEEMITTER(vsubuhs, 0x10000640, VX)(PPCHIRBuilder& f, InstrData& i) { + // (VD) <- clamp(EXTZ(VA) + ¬EXTZ(VB) + 1, 0, 2^16-1) + Value* v = f.VectorSub(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE, + ARITHMETIC_SATURATE | ARITHMETIC_UNSIGNED); + f.StoreVR(i.VX.VD, v); + return 0; +} + XEEMITTER(vsubuws, 0x10000680, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // (VD) <- clamp(EXTZ(VA) + ¬EXTZ(VB) + 1, 0, 2^32-1) + Value* v = f.VectorSub(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE, + ARITHMETIC_SATURATE | ARITHMETIC_UNSIGNED); + f.StoreVR(i.VX.VD, v); + return 0; } XEEMITTER(vsumsws, 0x10000788, VX)(PPCHIRBuilder& f, InstrData& i) { @@ -2132,10 +2150,10 @@ void RegisterEmitCategoryAltivec() { XEREGISTERINSTR(vsubshs, 0x10000740); XEREGISTERINSTR(vsubsws, 0x10000780); XEREGISTERINSTR(vsububm, 0x10000400); - XEREGISTERINSTR(vsububs, 0x10000600); XEREGISTERINSTR(vsubuhm, 0x10000440); - XEREGISTERINSTR(vsubuhs, 0x10000640); XEREGISTERINSTR(vsubuwm, 0x10000480); + XEREGISTERINSTR(vsububs, 0x10000600); + XEREGISTERINSTR(vsubuhs, 0x10000640); XEREGISTERINSTR(vsubuws, 0x10000680); XEREGISTERINSTR(vsumsws, 0x10000788); XEREGISTERINSTR(vsum2sws, 0x10000688); diff --git a/src/alloy/hir/hir_builder.cc b/src/alloy/hir/hir_builder.cc index acdb6b95a..30ac78358 100644 --- a/src/alloy/hir/hir_builder.cc +++ b/src/alloy/hir/hir_builder.cc @@ -1299,6 +1299,23 @@ Value* HIRBuilder::Sub(Value* value1, Value* value2, return i->dest; } +Value* HIRBuilder::VectorSub(Value* value1, Value* value2, TypeName part_type, + uint32_t arithmetic_flags) { + ASSERT_VECTOR_TYPE(value1); + ASSERT_VECTOR_TYPE(value2); + + // This is shady. + uint32_t flags = part_type | (arithmetic_flags << 8); + assert_zero(flags >> 16); + + Instr* i = AppendInstr(OPCODE_VECTOR_SUB_info, (uint16_t)flags, + AllocValue(value1->type)); + i->set_src1(value1); + i->set_src2(value2); + i->src3.value = NULL; + return i->dest; +} + Value* HIRBuilder::Mul(Value* value1, Value* value2, uint32_t arithmetic_flags) { ASSERT_TYPES_EQUAL(value1, value2); diff --git a/src/alloy/hir/hir_builder.h b/src/alloy/hir/hir_builder.h index 8bcd53c33..44f149b8b 100644 --- a/src/alloy/hir/hir_builder.h +++ b/src/alloy/hir/hir_builder.h @@ -169,6 +169,8 @@ class HIRBuilder { Value* VectorAdd(Value* value1, Value* value2, TypeName part_type, uint32_t arithmetic_flags = 0); Value* Sub(Value* value1, Value* value2, uint32_t arithmetic_flags = 0); + Value* VectorSub(Value* value1, Value* value2, TypeName part_type, + uint32_t arithmetic_flags = 0); Value* Mul(Value* value1, Value* value2, uint32_t arithmetic_flags = 0); Value* MulHi(Value* value1, Value* value2, uint32_t arithmetic_flags = 0); Value* Div(Value* value1, Value* value2, uint32_t arithmetic_flags = 0); diff --git a/src/alloy/hir/opcodes.h b/src/alloy/hir/opcodes.h index c163ca5d0..c7b3c0e50 100644 --- a/src/alloy/hir/opcodes.h +++ b/src/alloy/hir/opcodes.h @@ -140,6 +140,7 @@ enum Opcode { OPCODE_ADD_CARRY, OPCODE_VECTOR_ADD, OPCODE_SUB, + OPCODE_VECTOR_SUB, OPCODE_MUL, OPCODE_MUL_HI, // TODO(benvanik): remove this and add INT128 type. OPCODE_DIV, diff --git a/src/alloy/hir/opcodes.inl b/src/alloy/hir/opcodes.inl index b09ea29c7..b1c153c71 100644 --- a/src/alloy/hir/opcodes.inl +++ b/src/alloy/hir/opcodes.inl @@ -389,6 +389,12 @@ DEFINE_OPCODE( OPCODE_SIG_V_V_V, 0) +DEFINE_OPCODE( + OPCODE_VECTOR_SUB, + "vector_sub", + OPCODE_SIG_V_V_V, + 0) + DEFINE_OPCODE( OPCODE_MUL, "mul",