Some binary xmm ops.

This commit is contained in:
Ben Vanik 2014-01-27 22:18:44 -08:00
parent da36baba8d
commit d67f786af8
2 changed files with 186 additions and 17 deletions

View File

@ -1364,9 +1364,17 @@ table->AddSequence(OPCODE_ADD, [](X64Emitter& e, Instr*& i) {
e.add(dest_src, src); e.add(dest_src, src);
}); });
} else if (IsFloatType(i->dest->type)) { } else if (IsFloatType(i->dest->type)) {
UNIMPLEMENTED_SEQ(); XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
if (i.src1.value->type == FLOAT32_TYPE) {
e.addss(dest_src, src);
} else {
e.addsd(dest_src, src);
}
});
} else if (IsVecType(i->dest->type)) { } else if (IsVecType(i->dest->type)) {
UNIMPLEMENTED_SEQ(); XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
e.addps(dest_src, src);
});
} else { } else {
ASSERT_INVALID_TYPE(); ASSERT_INVALID_TYPE();
} }
@ -1438,9 +1446,17 @@ table->AddSequence(OPCODE_SUB, [](X64Emitter& e, Instr*& i) {
e.sub(dest_src, src); e.sub(dest_src, src);
}); });
} else if (IsFloatType(i->dest->type)) { } else if (IsFloatType(i->dest->type)) {
UNIMPLEMENTED_SEQ(); XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
if (i.src1.value->type == FLOAT32_TYPE) {
e.subss(dest_src, src);
} else {
e.subsd(dest_src, src);
}
});
} else if (IsVecType(i->dest->type)) { } else if (IsVecType(i->dest->type)) {
UNIMPLEMENTED_SEQ(); XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
e.subps(dest_src, src);
});
} else { } else {
ASSERT_INVALID_TYPE(); ASSERT_INVALID_TYPE();
} }
@ -1478,9 +1494,19 @@ table->AddSequence(OPCODE_MUL, [](X64Emitter& e, Instr*& i) {
e.mov(dest_src, Nax); e.mov(dest_src, Nax);
}); });
} else if (IsFloatType(i->dest->type)) { } else if (IsFloatType(i->dest->type)) {
UNIMPLEMENTED_SEQ(); XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
if (i.flags & ARITHMETIC_UNSIGNED) { UNIMPLEMENTED_SEQ(); }
if (i.src1.value->type == FLOAT32_TYPE) {
e.mulss(dest_src, src);
} else {
e.mulsd(dest_src, src);
}
});
} else if (IsVecType(i->dest->type)) { } else if (IsVecType(i->dest->type)) {
UNIMPLEMENTED_SEQ(); XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
if (i.flags & ARITHMETIC_UNSIGNED) { UNIMPLEMENTED_SEQ(); }
e.mulps(dest_src, src);
});
} else { } else {
ASSERT_INVALID_TYPE(); ASSERT_INVALID_TYPE();
} }
@ -1551,9 +1577,19 @@ table->AddSequence(OPCODE_DIV, [](X64Emitter& e, Instr*& i) {
e.mov(dest_src, Nax); e.mov(dest_src, Nax);
}); });
} else if (IsFloatType(i->dest->type)) { } else if (IsFloatType(i->dest->type)) {
UNIMPLEMENTED_SEQ(); XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
if (i.flags & ARITHMETIC_UNSIGNED) { UNIMPLEMENTED_SEQ(); }
if (i.src1.value->type == FLOAT32_TYPE) {
e.divss(dest_src, src);
} else {
e.divsd(dest_src, src);
}
});
} else if (IsVecType(i->dest->type)) { } else if (IsVecType(i->dest->type)) {
UNIMPLEMENTED_SEQ(); XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
if (i.flags & ARITHMETIC_UNSIGNED) { UNIMPLEMENTED_SEQ(); }
e.divps(dest_src, src);
});
} else { } else {
ASSERT_INVALID_TYPE(); ASSERT_INVALID_TYPE();
} }
@ -1703,7 +1739,12 @@ table->AddSequence(OPCODE_LOG2, [](X64Emitter& e, Instr*& i) {
table->AddSequence(OPCODE_DOT_PRODUCT_3, [](X64Emitter& e, Instr*& i) { table->AddSequence(OPCODE_DOT_PRODUCT_3, [](X64Emitter& e, Instr*& i) {
if (IsVecType(i->dest->type)) { if (IsVecType(i->dest->type)) {
UNIMPLEMENTED_SEQ(); XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
// http://msdn.microsoft.com/en-us/library/bb514054(v=vs.90).aspx
// TODO(benvanik): verify ordering
e.db(0xCC);
e.dpps(dest_src, src, B01110001);
});
} else { } else {
ASSERT_INVALID_TYPE(); ASSERT_INVALID_TYPE();
} }
@ -1713,7 +1754,12 @@ table->AddSequence(OPCODE_DOT_PRODUCT_3, [](X64Emitter& e, Instr*& i) {
table->AddSequence(OPCODE_DOT_PRODUCT_4, [](X64Emitter& e, Instr*& i) { table->AddSequence(OPCODE_DOT_PRODUCT_4, [](X64Emitter& e, Instr*& i) {
if (IsVecType(i->dest->type)) { if (IsVecType(i->dest->type)) {
UNIMPLEMENTED_SEQ(); XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
// http://msdn.microsoft.com/en-us/library/bb514054(v=vs.90).aspx
// TODO(benvanik): verify ordering
e.db(0xCC);
e.dpps(dest_src, src, B11110001);
});
} else { } else {
ASSERT_INVALID_TYPE(); ASSERT_INVALID_TYPE();
} }
@ -1729,7 +1775,9 @@ table->AddSequence(OPCODE_AND, [](X64Emitter& e, Instr*& i) {
e.and(dest_src, src); e.and(dest_src, src);
}); });
} else if (IsVecType(i->dest->type)) { } else if (IsVecType(i->dest->type)) {
UNIMPLEMENTED_SEQ(); XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
e.pand(dest_src, src);
});
} else { } else {
ASSERT_INVALID_TYPE(); ASSERT_INVALID_TYPE();
} }
@ -1745,7 +1793,9 @@ table->AddSequence(OPCODE_OR, [](X64Emitter& e, Instr*& i) {
e.or(dest_src, src); e.or(dest_src, src);
}); });
} else if (IsVecType(i->dest->type)) { } else if (IsVecType(i->dest->type)) {
UNIMPLEMENTED_SEQ(); XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
e.por(dest_src, src);
});
} else { } else {
ASSERT_INVALID_TYPE(); ASSERT_INVALID_TYPE();
} }
@ -1761,7 +1811,9 @@ table->AddSequence(OPCODE_XOR, [](X64Emitter& e, Instr*& i) {
e.xor(dest_src, src); e.xor(dest_src, src);
}); });
} else if (IsVecType(i->dest->type)) { } else if (IsVecType(i->dest->type)) {
UNIMPLEMENTED_SEQ(); XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
e.pxor(dest_src, src);
});
} else { } else {
ASSERT_INVALID_TYPE(); ASSERT_INVALID_TYPE();
} }

View File

@ -715,17 +715,15 @@ void IntTernaryOp(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vvc_fn vvc_fn, vcv_fn
// Since alot of SSE ops can take dest + src, just do that. // Since alot of SSE ops can take dest + src, just do that.
// Worst case the callee can dedupe. // Worst case the callee can dedupe.
typedef void(xmm_v_fn)(X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src); typedef void(xmm_v_fn)(X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src);
template<typename T>
void XmmUnaryOpV(X64Emitter& e, Instr*& i, xmm_v_fn v_fn, void XmmUnaryOpV(X64Emitter& e, Instr*& i, xmm_v_fn v_fn,
T& dest, T& src1) { Xmm& dest, Xmm& src1) {
e.BeginOp(i->dest, dest, REG_DEST, e.BeginOp(i->dest, dest, REG_DEST,
i->src1.value, src1, 0); i->src1.value, src1, 0);
v_fn(e, *i, dest, src1); v_fn(e, *i, dest, src1);
e.EndOp(dest, src1); e.EndOp(dest, src1);
} }
template<typename T>
void XmmUnaryOpC(X64Emitter& e, Instr*& i, xmm_v_fn v_fn, void XmmUnaryOpC(X64Emitter& e, Instr*& i, xmm_v_fn v_fn,
T& dest, Value* src1) { Xmm& dest, Value* src1) {
e.BeginOp(i->dest, dest, REG_DEST); e.BeginOp(i->dest, dest, REG_DEST);
if (src1->type == FLOAT32_TYPE) { if (src1->type == FLOAT32_TYPE) {
e.mov(e.eax, (uint32_t)src1->constant.i32); e.mov(e.eax, (uint32_t)src1->constant.i32);
@ -771,6 +769,125 @@ void XmmUnaryOp(X64Emitter& e, Instr*& i, uint32_t flags, xmm_v_fn v_fn) {
} }
}; };
// TODO(benvanik): allow a vvv form for dest = src1 + src2 that new SSE
// ops support.
typedef void(xmm_vv_fn)(X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src);
void XmmBinaryOpVV(X64Emitter& e, Instr*& i, xmm_vv_fn vv_fn,
Xmm& dest, Xmm& src1, Xmm& src2) {
e.BeginOp(i->dest, dest, REG_DEST,
i->src1.value, src1, 0,
i->src2.value, src2, 0);
if (dest == src1) {
vv_fn(e, *i, dest, src2);
} else if (dest == src2) {
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
vv_fn(e, *i, dest, src1);
} else {
// Eww.
e.movaps(e.xmm0, src1);
vv_fn(e, *i, e.xmm0, src2);
e.movaps(dest, e.xmm0);
}
} else {
e.movaps(dest, src1);
vv_fn(e, *i, dest, src2);
}
e.EndOp(dest, src1, src2);
}
void XmmBinaryOpVC(X64Emitter& e, Instr*& i, xmm_vv_fn vv_fn,
Xmm& dest, Xmm& src1, Value* src2) {
e.BeginOp(i->dest, dest, REG_DEST,
i->src1.value, src1, 0);
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
if (src2->type == FLOAT32_TYPE) {
e.mov(e.eax, (uint32_t)src2->constant.i32);
e.movss(dest, e.eax);
} else if (src2->type == FLOAT64_TYPE) {
e.mov(e.rax, (uint64_t)src2->constant.i64);
e.movsd(dest, e.rax);
} else {
UNIMPLEMENTED_SEQ();
}
vv_fn(e, *i, dest, src1);
} else {
if (dest != src1) {
e.movaps(dest, src1);
}
if (src2->type == FLOAT32_TYPE) {
e.mov(e.eax, (uint32_t)src2->constant.i32);
e.movss(e.xmm0, e.eax);
} else if (src2->type == FLOAT64_TYPE) {
e.mov(e.rax, (uint64_t)src2->constant.i64);
e.movsd(e.xmm0, e.rax);
} else {
UNIMPLEMENTED_SEQ();
}
vv_fn(e, *i, dest, e.xmm0);
}
e.EndOp(dest, src1);
}
void XmmBinaryOpCV(X64Emitter& e, Instr*& i, xmm_vv_fn vv_fn,
Xmm& dest, Value* src1, Xmm& src2) {
e.BeginOp(i->dest, dest, REG_DEST,
i->src2.value, src2, 0);
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
if (src1->type == FLOAT32_TYPE) {
e.mov(e.eax, (uint32_t)src1->constant.i32);
e.movss(dest, e.eax);
} else if (src1->type == FLOAT64_TYPE) {
e.mov(e.rax, (uint64_t)src1->constant.i64);
e.movsd(dest, e.rax);
} else {
UNIMPLEMENTED_SEQ();
}
vv_fn(e, *i, dest, src2);
} else {
auto real_src2 = src2;
if (dest == src2) {
e.movaps(e.xmm0, src2);
real_src2 = e.xmm0;
}
if (src1->type == FLOAT32_TYPE) {
e.mov(e.eax, (uint32_t)src1->constant.i32);
e.movss(dest, e.eax);
} else if (src1->type == FLOAT64_TYPE) {
e.mov(e.rax, (uint64_t)src1->constant.i64);
e.movsd(dest, e.rax);
} else {
UNIMPLEMENTED_SEQ();
}
vv_fn(e, *i, dest, real_src2);
}
e.EndOp(dest, src2);
}
void XmmBinaryOp(X64Emitter& e, Instr*& i, uint32_t flags, xmm_vv_fn vv_fn) {
// TODO(benvanik): table lookup. This linear scan is slow.
XEASSERT(i->dest->type == i->src1.value->type);
if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_F32, SIG_TYPE_F32) ||
i->Match(SIG_TYPE_IGNORE, SIG_TYPE_F64, SIG_TYPE_F64) ||
i->Match(SIG_TYPE_IGNORE, SIG_TYPE_V128, SIG_TYPE_V128)) {
Xmm dest, src1, src2;
XmmBinaryOpVV(e, i, vv_fn, dest, src1, src2);
} else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_F32, SIG_TYPE_F32C) ||
i->Match(SIG_TYPE_IGNORE, SIG_TYPE_F64, SIG_TYPE_F64C) ||
i->Match(SIG_TYPE_IGNORE, SIG_TYPE_V128, SIG_TYPE_V128C)) {
Xmm dest, src1;
XmmBinaryOpVC(e, i, vv_fn, dest, src1, i->src2.value);
} else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_F32C, SIG_TYPE_F32) ||
i->Match(SIG_TYPE_IGNORE, SIG_TYPE_F64C, SIG_TYPE_F64) ||
i->Match(SIG_TYPE_IGNORE, SIG_TYPE_V128C, SIG_TYPE_V128)) {
Xmm dest, src2;
XmmBinaryOpCV(e, i, vv_fn, dest, i->src1.value, src2);
} else {
ASSERT_INVALID_TYPE();
}
if (flags & ARITHMETIC_SET_CARRY) {
// EFLAGS should have CA set?
// (so long as we don't fuck with it)
// UNIMPLEMENTED_SEQ();
}
};
} // namespace } // namespace
#endif // ALLOY_BACKEND_X64_X64_LOWERING_OP_UTILS_INL_ #endif // ALLOY_BACKEND_X64_X64_LOWERING_OP_UTILS_INL_