Adding VECTOR_SUB for vsub*.
This commit is contained in:
parent
f149a23367
commit
75eb87f33d
|
@ -2735,6 +2735,12 @@ int Translate_SUB(TranslationContext& ctx, Instr* i) {
|
|||
return DispatchToC(ctx, i, fns[i->dest->type]);
|
||||
}
|
||||
|
||||
int Translate_VECTOR_SUB(TranslationContext& ctx, Instr* i) {
|
||||
// TODO(benvanik): VECTOR_SUB in IVM.
|
||||
assert_always();
|
||||
return 1;
|
||||
}
|
||||
|
||||
uint32_t IntCode_MUL_I8_I8(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].i8 = ics.rf[i->src1_reg].i8 * ics.rf[i->src2_reg].i8;
|
||||
return IA_NEXT;
|
||||
|
@ -4200,23 +4206,23 @@ static const TranslateFn dispatch_table[] = {
|
|||
Translate_VECTOR_COMPARE_UGT, Translate_VECTOR_COMPARE_UGE,
|
||||
Translate_ADD, Translate_ADD_CARRY,
|
||||
Translate_VECTOR_ADD, Translate_SUB,
|
||||
Translate_MUL, Translate_MUL_HI,
|
||||
Translate_DIV, Translate_MUL_ADD,
|
||||
Translate_MUL_SUB, Translate_NEG,
|
||||
Translate_ABS, Translate_SQRT,
|
||||
Translate_RSQRT, Translate_POW2,
|
||||
Translate_LOG2, Translate_DOT_PRODUCT_3,
|
||||
Translate_DOT_PRODUCT_4, Translate_AND,
|
||||
Translate_OR, Translate_XOR,
|
||||
Translate_NOT, Translate_SHL,
|
||||
Translate_VECTOR_SHL, Translate_SHR,
|
||||
Translate_VECTOR_SHR, Translate_SHA,
|
||||
Translate_VECTOR_SHA, Translate_ROTATE_LEFT,
|
||||
Translate_BYTE_SWAP, Translate_CNTLZ,
|
||||
Translate_INSERT, Translate_EXTRACT,
|
||||
Translate_SPLAT, Translate_PERMUTE,
|
||||
Translate_SWIZZLE, Translate_PACK,
|
||||
Translate_UNPACK,
|
||||
Translate_VECTOR_SUB, Translate_MUL,
|
||||
Translate_MUL_HI, Translate_DIV,
|
||||
Translate_MUL_ADD, Translate_MUL_SUB,
|
||||
Translate_NEG, Translate_ABS,
|
||||
Translate_SQRT, Translate_RSQRT,
|
||||
Translate_POW2, Translate_LOG2,
|
||||
Translate_DOT_PRODUCT_3, Translate_DOT_PRODUCT_4,
|
||||
Translate_AND, Translate_OR,
|
||||
Translate_XOR, Translate_NOT,
|
||||
Translate_SHL, Translate_VECTOR_SHL,
|
||||
Translate_SHR, Translate_VECTOR_SHR,
|
||||
Translate_SHA, Translate_VECTOR_SHA,
|
||||
Translate_ROTATE_LEFT, Translate_BYTE_SWAP,
|
||||
Translate_CNTLZ, Translate_INSERT,
|
||||
Translate_EXTRACT, Translate_SPLAT,
|
||||
Translate_PERMUTE, Translate_SWIZZLE,
|
||||
Translate_PACK, Translate_UNPACK,
|
||||
TranslateInvalid, // Translate_COMPARE_EXCHANGE,
|
||||
Translate_ATOMIC_EXCHANGE,
|
||||
TranslateInvalid, // Translate_ATOMIC_ADD,
|
||||
|
|
|
@ -2854,6 +2854,66 @@ EMITTER_OPCODE_TABLE(
|
|||
SUB_V128);
|
||||
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_VECTOR_SUB
|
||||
// ============================================================================
|
||||
EMITTER(VECTOR_SUB, MATCH(I<OPCODE_VECTOR_SUB, V128<>, V128<>, V128<>>)) {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
EmitCommutativeBinaryXmmOp(e, i,
|
||||
[&i](X64Emitter& e, const Xmm& dest, const Xmm& src1, const Xmm& src2) {
|
||||
const TypeName part_type = static_cast<TypeName>(i.instr->flags & 0xFF);
|
||||
const uint32_t arithmetic_flags = i.instr->flags >> 8;
|
||||
bool is_unsigned = !!(arithmetic_flags & ARITHMETIC_UNSIGNED);
|
||||
bool saturate = !!(arithmetic_flags & ARITHMETIC_SATURATE);
|
||||
switch (part_type) {
|
||||
case INT8_TYPE:
|
||||
if (saturate) {
|
||||
// TODO(benvanik): trace DID_SATURATE
|
||||
if (is_unsigned) {
|
||||
e.vpsubusb(dest, src1, src2);
|
||||
} else {
|
||||
e.vpsubsb(dest, src1, src2);
|
||||
}
|
||||
} else {
|
||||
e.vpsubb(dest, src1, src2);
|
||||
}
|
||||
break;
|
||||
case INT16_TYPE:
|
||||
if (saturate) {
|
||||
// TODO(benvanik): trace DID_SATURATE
|
||||
if (is_unsigned) {
|
||||
e.vpsubusw(dest, src1, src2);
|
||||
} else {
|
||||
e.vpsubsw(dest, src1, src2);
|
||||
}
|
||||
} else {
|
||||
e.vpsubw(dest, src1, src2);
|
||||
}
|
||||
break;
|
||||
case INT32_TYPE:
|
||||
if (saturate) {
|
||||
if (is_unsigned) {
|
||||
assert_always();
|
||||
} else {
|
||||
assert_always();
|
||||
}
|
||||
} else {
|
||||
e.vpsubd(dest, src1, src2);
|
||||
}
|
||||
break;
|
||||
case FLOAT32_TYPE:
|
||||
e.vsubps(dest, src1, src2);
|
||||
break;
|
||||
default: assert_unhandled_case(part_type); break;
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(
|
||||
OPCODE_VECTOR_SUB,
|
||||
VECTOR_SUB);
|
||||
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_MUL
|
||||
// ============================================================================
|
||||
|
@ -5202,6 +5262,7 @@ void RegisterSequences() {
|
|||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_ADD_CARRY);
|
||||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_ADD);
|
||||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_SUB);
|
||||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SUB);
|
||||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MUL);
|
||||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MUL_HI);
|
||||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DIV);
|
||||
|
|
|
@ -1572,18 +1572,27 @@ XEEMITTER(vsubfp128, VX128(5, 80), VX128)(PPCHIRBuilder& f, InstrData& i) {
|
|||
}
|
||||
|
||||
XEEMITTER(vsubsbs, 0x10000700, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
// (VD) <- clamp(EXTS(VA) + ¬EXTS(VB) + 1, -128, 127)
|
||||
Value* v = f.VectorSub(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE,
|
||||
ARITHMETIC_SATURATE);
|
||||
f.StoreVR(i.VX.VD, v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
XEEMITTER(vsubshs, 0x10000740, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
// (VD) <- clamp(EXTS(VA) + ¬EXTS(VB) + 1, -2^15, 2^15-1)
|
||||
Value* v = f.VectorSub(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE,
|
||||
ARITHMETIC_SATURATE);
|
||||
f.StoreVR(i.VX.VD, v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
XEEMITTER(vsubsws, 0x10000780, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
// (VD) <- clamp(EXTS(VA) + ¬EXTS(VB) + 1, -2^31, 2^31-1)
|
||||
Value* v = f.VectorSub(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE,
|
||||
ARITHMETIC_SATURATE);
|
||||
f.StoreVR(i.VX.VD, v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
XEEMITTER(vsububm, 0x10000400, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
|
@ -1591,29 +1600,38 @@ XEEMITTER(vsububm, 0x10000400, VX)(PPCHIRBuilder& f, InstrData& i) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
XEEMITTER(vsububs, 0x10000600, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
}
|
||||
|
||||
XEEMITTER(vsubuhm, 0x10000440, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
}
|
||||
|
||||
XEEMITTER(vsubuhs, 0x10000640, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
}
|
||||
|
||||
XEEMITTER(vsubuwm, 0x10000480, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
}
|
||||
|
||||
XEEMITTER(vsububs, 0x10000600, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
// (VD) <- clamp(EXTZ(VA) + ¬EXTZ(VB) + 1, 0, 256)
|
||||
Value* v = f.VectorSub(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE,
|
||||
ARITHMETIC_SATURATE | ARITHMETIC_UNSIGNED);
|
||||
f.StoreVR(i.VX.VD, v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
XEEMITTER(vsubuhs, 0x10000640, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
// (VD) <- clamp(EXTZ(VA) + ¬EXTZ(VB) + 1, 0, 2^16-1)
|
||||
Value* v = f.VectorSub(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE,
|
||||
ARITHMETIC_SATURATE | ARITHMETIC_UNSIGNED);
|
||||
f.StoreVR(i.VX.VD, v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
XEEMITTER(vsubuws, 0x10000680, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
// (VD) <- clamp(EXTZ(VA) + ¬EXTZ(VB) + 1, 0, 2^32-1)
|
||||
Value* v = f.VectorSub(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE,
|
||||
ARITHMETIC_SATURATE | ARITHMETIC_UNSIGNED);
|
||||
f.StoreVR(i.VX.VD, v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
XEEMITTER(vsumsws, 0x10000788, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
|
@ -2132,10 +2150,10 @@ void RegisterEmitCategoryAltivec() {
|
|||
XEREGISTERINSTR(vsubshs, 0x10000740);
|
||||
XEREGISTERINSTR(vsubsws, 0x10000780);
|
||||
XEREGISTERINSTR(vsububm, 0x10000400);
|
||||
XEREGISTERINSTR(vsububs, 0x10000600);
|
||||
XEREGISTERINSTR(vsubuhm, 0x10000440);
|
||||
XEREGISTERINSTR(vsubuhs, 0x10000640);
|
||||
XEREGISTERINSTR(vsubuwm, 0x10000480);
|
||||
XEREGISTERINSTR(vsububs, 0x10000600);
|
||||
XEREGISTERINSTR(vsubuhs, 0x10000640);
|
||||
XEREGISTERINSTR(vsubuws, 0x10000680);
|
||||
XEREGISTERINSTR(vsumsws, 0x10000788);
|
||||
XEREGISTERINSTR(vsum2sws, 0x10000688);
|
||||
|
|
|
@ -1299,6 +1299,23 @@ Value* HIRBuilder::Sub(Value* value1, Value* value2,
|
|||
return i->dest;
|
||||
}
|
||||
|
||||
Value* HIRBuilder::VectorSub(Value* value1, Value* value2, TypeName part_type,
|
||||
uint32_t arithmetic_flags) {
|
||||
ASSERT_VECTOR_TYPE(value1);
|
||||
ASSERT_VECTOR_TYPE(value2);
|
||||
|
||||
// This is shady.
|
||||
uint32_t flags = part_type | (arithmetic_flags << 8);
|
||||
assert_zero(flags >> 16);
|
||||
|
||||
Instr* i = AppendInstr(OPCODE_VECTOR_SUB_info, (uint16_t)flags,
|
||||
AllocValue(value1->type));
|
||||
i->set_src1(value1);
|
||||
i->set_src2(value2);
|
||||
i->src3.value = NULL;
|
||||
return i->dest;
|
||||
}
|
||||
|
||||
Value* HIRBuilder::Mul(Value* value1, Value* value2,
|
||||
uint32_t arithmetic_flags) {
|
||||
ASSERT_TYPES_EQUAL(value1, value2);
|
||||
|
|
|
@ -169,6 +169,8 @@ class HIRBuilder {
|
|||
Value* VectorAdd(Value* value1, Value* value2, TypeName part_type,
|
||||
uint32_t arithmetic_flags = 0);
|
||||
Value* Sub(Value* value1, Value* value2, uint32_t arithmetic_flags = 0);
|
||||
Value* VectorSub(Value* value1, Value* value2, TypeName part_type,
|
||||
uint32_t arithmetic_flags = 0);
|
||||
Value* Mul(Value* value1, Value* value2, uint32_t arithmetic_flags = 0);
|
||||
Value* MulHi(Value* value1, Value* value2, uint32_t arithmetic_flags = 0);
|
||||
Value* Div(Value* value1, Value* value2, uint32_t arithmetic_flags = 0);
|
||||
|
|
|
@ -140,6 +140,7 @@ enum Opcode {
|
|||
OPCODE_ADD_CARRY,
|
||||
OPCODE_VECTOR_ADD,
|
||||
OPCODE_SUB,
|
||||
OPCODE_VECTOR_SUB,
|
||||
OPCODE_MUL,
|
||||
OPCODE_MUL_HI, // TODO(benvanik): remove this and add INT128 type.
|
||||
OPCODE_DIV,
|
||||
|
|
|
@ -389,6 +389,12 @@ DEFINE_OPCODE(
|
|||
OPCODE_SIG_V_V_V,
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_SUB,
|
||||
"vector_sub",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_MUL,
|
||||
"mul",
|
||||
|
|
Loading…
Reference in New Issue