Some binary xmm ops.
This commit is contained in:
parent
da36baba8d
commit
d67f786af8
|
@ -1364,9 +1364,17 @@ table->AddSequence(OPCODE_ADD, [](X64Emitter& e, Instr*& i) {
|
|||
e.add(dest_src, src);
|
||||
});
|
||||
} else if (IsFloatType(i->dest->type)) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
|
||||
if (i.src1.value->type == FLOAT32_TYPE) {
|
||||
e.addss(dest_src, src);
|
||||
} else {
|
||||
e.addsd(dest_src, src);
|
||||
}
|
||||
});
|
||||
} else if (IsVecType(i->dest->type)) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
|
||||
e.addps(dest_src, src);
|
||||
});
|
||||
} else {
|
||||
ASSERT_INVALID_TYPE();
|
||||
}
|
||||
|
@ -1438,9 +1446,17 @@ table->AddSequence(OPCODE_SUB, [](X64Emitter& e, Instr*& i) {
|
|||
e.sub(dest_src, src);
|
||||
});
|
||||
} else if (IsFloatType(i->dest->type)) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
|
||||
if (i.src1.value->type == FLOAT32_TYPE) {
|
||||
e.subss(dest_src, src);
|
||||
} else {
|
||||
e.subsd(dest_src, src);
|
||||
}
|
||||
});
|
||||
} else if (IsVecType(i->dest->type)) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
|
||||
e.subps(dest_src, src);
|
||||
});
|
||||
} else {
|
||||
ASSERT_INVALID_TYPE();
|
||||
}
|
||||
|
@ -1478,9 +1494,19 @@ table->AddSequence(OPCODE_MUL, [](X64Emitter& e, Instr*& i) {
|
|||
e.mov(dest_src, Nax);
|
||||
});
|
||||
} else if (IsFloatType(i->dest->type)) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
|
||||
if (i.flags & ARITHMETIC_UNSIGNED) { UNIMPLEMENTED_SEQ(); }
|
||||
if (i.src1.value->type == FLOAT32_TYPE) {
|
||||
e.mulss(dest_src, src);
|
||||
} else {
|
||||
e.mulsd(dest_src, src);
|
||||
}
|
||||
});
|
||||
} else if (IsVecType(i->dest->type)) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
|
||||
if (i.flags & ARITHMETIC_UNSIGNED) { UNIMPLEMENTED_SEQ(); }
|
||||
e.mulps(dest_src, src);
|
||||
});
|
||||
} else {
|
||||
ASSERT_INVALID_TYPE();
|
||||
}
|
||||
|
@ -1551,9 +1577,19 @@ table->AddSequence(OPCODE_DIV, [](X64Emitter& e, Instr*& i) {
|
|||
e.mov(dest_src, Nax);
|
||||
});
|
||||
} else if (IsFloatType(i->dest->type)) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
|
||||
if (i.flags & ARITHMETIC_UNSIGNED) { UNIMPLEMENTED_SEQ(); }
|
||||
if (i.src1.value->type == FLOAT32_TYPE) {
|
||||
e.divss(dest_src, src);
|
||||
} else {
|
||||
e.divsd(dest_src, src);
|
||||
}
|
||||
});
|
||||
} else if (IsVecType(i->dest->type)) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
|
||||
if (i.flags & ARITHMETIC_UNSIGNED) { UNIMPLEMENTED_SEQ(); }
|
||||
e.divps(dest_src, src);
|
||||
});
|
||||
} else {
|
||||
ASSERT_INVALID_TYPE();
|
||||
}
|
||||
|
@ -1703,7 +1739,12 @@ table->AddSequence(OPCODE_LOG2, [](X64Emitter& e, Instr*& i) {
|
|||
|
||||
table->AddSequence(OPCODE_DOT_PRODUCT_3, [](X64Emitter& e, Instr*& i) {
|
||||
if (IsVecType(i->dest->type)) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
|
||||
// http://msdn.microsoft.com/en-us/library/bb514054(v=vs.90).aspx
|
||||
// TODO(benvanik): verify ordering
|
||||
e.db(0xCC);
|
||||
e.dpps(dest_src, src, B01110001);
|
||||
});
|
||||
} else {
|
||||
ASSERT_INVALID_TYPE();
|
||||
}
|
||||
|
@ -1713,7 +1754,12 @@ table->AddSequence(OPCODE_DOT_PRODUCT_3, [](X64Emitter& e, Instr*& i) {
|
|||
|
||||
table->AddSequence(OPCODE_DOT_PRODUCT_4, [](X64Emitter& e, Instr*& i) {
|
||||
if (IsVecType(i->dest->type)) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
|
||||
// http://msdn.microsoft.com/en-us/library/bb514054(v=vs.90).aspx
|
||||
// TODO(benvanik): verify ordering
|
||||
e.db(0xCC);
|
||||
e.dpps(dest_src, src, B11110001);
|
||||
});
|
||||
} else {
|
||||
ASSERT_INVALID_TYPE();
|
||||
}
|
||||
|
@ -1729,7 +1775,9 @@ table->AddSequence(OPCODE_AND, [](X64Emitter& e, Instr*& i) {
|
|||
e.and(dest_src, src);
|
||||
});
|
||||
} else if (IsVecType(i->dest->type)) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
|
||||
e.pand(dest_src, src);
|
||||
});
|
||||
} else {
|
||||
ASSERT_INVALID_TYPE();
|
||||
}
|
||||
|
@ -1745,7 +1793,9 @@ table->AddSequence(OPCODE_OR, [](X64Emitter& e, Instr*& i) {
|
|||
e.or(dest_src, src);
|
||||
});
|
||||
} else if (IsVecType(i->dest->type)) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
|
||||
e.por(dest_src, src);
|
||||
});
|
||||
} else {
|
||||
ASSERT_INVALID_TYPE();
|
||||
}
|
||||
|
@ -1761,7 +1811,9 @@ table->AddSequence(OPCODE_XOR, [](X64Emitter& e, Instr*& i) {
|
|||
e.xor(dest_src, src);
|
||||
});
|
||||
} else if (IsVecType(i->dest->type)) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
|
||||
e.pxor(dest_src, src);
|
||||
});
|
||||
} else {
|
||||
ASSERT_INVALID_TYPE();
|
||||
}
|
||||
|
|
|
@ -715,17 +715,15 @@ void IntTernaryOp(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vvc_fn vvc_fn, vcv_fn
|
|||
// Since alot of SSE ops can take dest + src, just do that.
|
||||
// Worst case the callee can dedupe.
|
||||
typedef void(xmm_v_fn)(X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src);
|
||||
template<typename T>
|
||||
void XmmUnaryOpV(X64Emitter& e, Instr*& i, xmm_v_fn v_fn,
|
||||
T& dest, T& src1) {
|
||||
Xmm& dest, Xmm& src1) {
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src1, 0);
|
||||
v_fn(e, *i, dest, src1);
|
||||
e.EndOp(dest, src1);
|
||||
}
|
||||
template<typename T>
|
||||
void XmmUnaryOpC(X64Emitter& e, Instr*& i, xmm_v_fn v_fn,
|
||||
T& dest, Value* src1) {
|
||||
Xmm& dest, Value* src1) {
|
||||
e.BeginOp(i->dest, dest, REG_DEST);
|
||||
if (src1->type == FLOAT32_TYPE) {
|
||||
e.mov(e.eax, (uint32_t)src1->constant.i32);
|
||||
|
@ -771,6 +769,125 @@ void XmmUnaryOp(X64Emitter& e, Instr*& i, uint32_t flags, xmm_v_fn v_fn) {
|
|||
}
|
||||
};
|
||||
|
||||
// TODO(benvanik): allow a vvv form for dest = src1 + src2 that new SSE
|
||||
// ops support.
|
||||
typedef void(xmm_vv_fn)(X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src);
|
||||
void XmmBinaryOpVV(X64Emitter& e, Instr*& i, xmm_vv_fn vv_fn,
|
||||
Xmm& dest, Xmm& src1, Xmm& src2) {
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src1, 0,
|
||||
i->src2.value, src2, 0);
|
||||
if (dest == src1) {
|
||||
vv_fn(e, *i, dest, src2);
|
||||
} else if (dest == src2) {
|
||||
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
||||
vv_fn(e, *i, dest, src1);
|
||||
} else {
|
||||
// Eww.
|
||||
e.movaps(e.xmm0, src1);
|
||||
vv_fn(e, *i, e.xmm0, src2);
|
||||
e.movaps(dest, e.xmm0);
|
||||
}
|
||||
} else {
|
||||
e.movaps(dest, src1);
|
||||
vv_fn(e, *i, dest, src2);
|
||||
}
|
||||
e.EndOp(dest, src1, src2);
|
||||
}
|
||||
void XmmBinaryOpVC(X64Emitter& e, Instr*& i, xmm_vv_fn vv_fn,
|
||||
Xmm& dest, Xmm& src1, Value* src2) {
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src1, 0);
|
||||
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
||||
if (src2->type == FLOAT32_TYPE) {
|
||||
e.mov(e.eax, (uint32_t)src2->constant.i32);
|
||||
e.movss(dest, e.eax);
|
||||
} else if (src2->type == FLOAT64_TYPE) {
|
||||
e.mov(e.rax, (uint64_t)src2->constant.i64);
|
||||
e.movsd(dest, e.rax);
|
||||
} else {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
}
|
||||
vv_fn(e, *i, dest, src1);
|
||||
} else {
|
||||
if (dest != src1) {
|
||||
e.movaps(dest, src1);
|
||||
}
|
||||
if (src2->type == FLOAT32_TYPE) {
|
||||
e.mov(e.eax, (uint32_t)src2->constant.i32);
|
||||
e.movss(e.xmm0, e.eax);
|
||||
} else if (src2->type == FLOAT64_TYPE) {
|
||||
e.mov(e.rax, (uint64_t)src2->constant.i64);
|
||||
e.movsd(e.xmm0, e.rax);
|
||||
} else {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
}
|
||||
vv_fn(e, *i, dest, e.xmm0);
|
||||
}
|
||||
e.EndOp(dest, src1);
|
||||
}
|
||||
void XmmBinaryOpCV(X64Emitter& e, Instr*& i, xmm_vv_fn vv_fn,
|
||||
Xmm& dest, Value* src1, Xmm& src2) {
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src2.value, src2, 0);
|
||||
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
||||
if (src1->type == FLOAT32_TYPE) {
|
||||
e.mov(e.eax, (uint32_t)src1->constant.i32);
|
||||
e.movss(dest, e.eax);
|
||||
} else if (src1->type == FLOAT64_TYPE) {
|
||||
e.mov(e.rax, (uint64_t)src1->constant.i64);
|
||||
e.movsd(dest, e.rax);
|
||||
} else {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
}
|
||||
vv_fn(e, *i, dest, src2);
|
||||
} else {
|
||||
auto real_src2 = src2;
|
||||
if (dest == src2) {
|
||||
e.movaps(e.xmm0, src2);
|
||||
real_src2 = e.xmm0;
|
||||
}
|
||||
if (src1->type == FLOAT32_TYPE) {
|
||||
e.mov(e.eax, (uint32_t)src1->constant.i32);
|
||||
e.movss(dest, e.eax);
|
||||
} else if (src1->type == FLOAT64_TYPE) {
|
||||
e.mov(e.rax, (uint64_t)src1->constant.i64);
|
||||
e.movsd(dest, e.rax);
|
||||
} else {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
}
|
||||
vv_fn(e, *i, dest, real_src2);
|
||||
}
|
||||
e.EndOp(dest, src2);
|
||||
}
|
||||
void XmmBinaryOp(X64Emitter& e, Instr*& i, uint32_t flags, xmm_vv_fn vv_fn) {
|
||||
// TODO(benvanik): table lookup. This linear scan is slow.
|
||||
XEASSERT(i->dest->type == i->src1.value->type);
|
||||
if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_F32, SIG_TYPE_F32) ||
|
||||
i->Match(SIG_TYPE_IGNORE, SIG_TYPE_F64, SIG_TYPE_F64) ||
|
||||
i->Match(SIG_TYPE_IGNORE, SIG_TYPE_V128, SIG_TYPE_V128)) {
|
||||
Xmm dest, src1, src2;
|
||||
XmmBinaryOpVV(e, i, vv_fn, dest, src1, src2);
|
||||
} else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_F32, SIG_TYPE_F32C) ||
|
||||
i->Match(SIG_TYPE_IGNORE, SIG_TYPE_F64, SIG_TYPE_F64C) ||
|
||||
i->Match(SIG_TYPE_IGNORE, SIG_TYPE_V128, SIG_TYPE_V128C)) {
|
||||
Xmm dest, src1;
|
||||
XmmBinaryOpVC(e, i, vv_fn, dest, src1, i->src2.value);
|
||||
} else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_F32C, SIG_TYPE_F32) ||
|
||||
i->Match(SIG_TYPE_IGNORE, SIG_TYPE_F64C, SIG_TYPE_F64) ||
|
||||
i->Match(SIG_TYPE_IGNORE, SIG_TYPE_V128C, SIG_TYPE_V128)) {
|
||||
Xmm dest, src2;
|
||||
XmmBinaryOpCV(e, i, vv_fn, dest, i->src1.value, src2);
|
||||
} else {
|
||||
ASSERT_INVALID_TYPE();
|
||||
}
|
||||
if (flags & ARITHMETIC_SET_CARRY) {
|
||||
// EFLAGS should have CA set?
|
||||
// (so long as we don't fuck with it)
|
||||
// UNIMPLEMENTED_SEQ();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // ALLOY_BACKEND_X64_X64_LOWERING_OP_UTILS_INL_
|
||||
|
|
Loading…
Reference in New Issue