Fixing xmm unary ops.

This commit is contained in:
Ben Vanik 2014-01-26 22:57:39 -08:00
parent c828e5416e
commit 6c7e392088
2 changed files with 60 additions and 39 deletions

View File

@ -1458,7 +1458,7 @@ table->AddSequence(OPCODE_NEG, [](X64Emitter& e, Instr*& i) {
e.neg(dest_src); e.neg(dest_src);
}); });
} else if (IsFloatType(i->dest->type)) { } else if (IsFloatType(i->dest->type)) {
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
if (i.src1.value->type == FLOAT32_TYPE) { if (i.src1.value->type == FLOAT32_TYPE) {
UNIMPLEMENTED_SEQ(); UNIMPLEMENTED_SEQ();
} else { } else {
@ -1466,7 +1466,7 @@ table->AddSequence(OPCODE_NEG, [](X64Emitter& e, Instr*& i) {
} }
}); });
} else if (IsVecType(i->dest->type)) { } else if (IsVecType(i->dest->type)) {
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
UNIMPLEMENTED_SEQ(); UNIMPLEMENTED_SEQ();
}); });
} else { } else {
@ -1480,7 +1480,7 @@ table->AddSequence(OPCODE_ABS, [](X64Emitter& e, Instr*& i) {
if (IsIntType(i->dest->type)) { if (IsIntType(i->dest->type)) {
UNIMPLEMENTED_SEQ(); UNIMPLEMENTED_SEQ();
} else if (IsFloatType(i->dest->type)) { } else if (IsFloatType(i->dest->type)) {
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
if (i.src1.value->type == FLOAT32_TYPE) { if (i.src1.value->type == FLOAT32_TYPE) {
UNIMPLEMENTED_SEQ(); UNIMPLEMENTED_SEQ();
} else { } else {
@ -1488,7 +1488,7 @@ table->AddSequence(OPCODE_ABS, [](X64Emitter& e, Instr*& i) {
} }
}); });
} else if (IsVecType(i->dest->type)) { } else if (IsVecType(i->dest->type)) {
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
UNIMPLEMENTED_SEQ(); UNIMPLEMENTED_SEQ();
}); });
} else { } else {
@ -1500,16 +1500,16 @@ table->AddSequence(OPCODE_ABS, [](X64Emitter& e, Instr*& i) {
table->AddSequence(OPCODE_SQRT, [](X64Emitter& e, Instr*& i) { table->AddSequence(OPCODE_SQRT, [](X64Emitter& e, Instr*& i) {
if (IsFloatType(i->dest->type)) { if (IsFloatType(i->dest->type)) {
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
if (i.dest->type == FLOAT32_TYPE) { if (i.dest->type == FLOAT32_TYPE) {
e.sqrtss(dest_src, dest_src); e.sqrtss(dest, src);
} else { } else {
e.sqrtsd(dest_src, dest_src); e.sqrtsd(dest, src);
} }
}); });
} else if (IsVecType(i->dest->type)) { } else if (IsVecType(i->dest->type)) {
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
e.sqrtps(dest_src, dest_src); e.sqrtps(dest, src);
}); });
} else { } else {
ASSERT_INVALID_TYPE(); ASSERT_INVALID_TYPE();
@ -1520,18 +1520,18 @@ table->AddSequence(OPCODE_SQRT, [](X64Emitter& e, Instr*& i) {
table->AddSequence(OPCODE_RSQRT, [](X64Emitter& e, Instr*& i) { table->AddSequence(OPCODE_RSQRT, [](X64Emitter& e, Instr*& i) {
if (IsFloatType(i->dest->type)) { if (IsFloatType(i->dest->type)) {
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
if (i.dest->type == FLOAT32_TYPE) { if (i.dest->type == FLOAT32_TYPE) {
e.rsqrtss(dest_src, dest_src); e.rsqrtss(dest, src);
} else { } else {
e.cvtsd2ss(dest_src, dest_src); e.cvtsd2ss(dest, src);
e.rsqrtss(dest_src, dest_src); e.rsqrtss(dest, dest);
e.cvtss2sd(dest_src, dest_src); e.cvtss2sd(dest, dest);
} }
}); });
} else if (IsVecType(i->dest->type)) { } else if (IsVecType(i->dest->type)) {
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
e.rsqrtps(dest_src, dest_src); e.rsqrtps(dest, src);
}); });
} else { } else {
ASSERT_INVALID_TYPE(); ASSERT_INVALID_TYPE();
@ -1638,10 +1638,13 @@ table->AddSequence(OPCODE_NOT, [](X64Emitter& e, Instr*& i) {
e.not(dest_src); e.not(dest_src);
}); });
} else if (IsVecType(i->dest->type)) { } else if (IsVecType(i->dest->type)) {
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
// dest_src ^= 0xFFFF... // dest_src ^= 0xFFFF...
e.cmpeqps(e.xmm0, e.xmm0); e.cmpeqps(e.xmm0, e.xmm0);
e.pxor(dest_src, e.xmm0); if (dest != src) {
e.movaps(dest, src);
}
e.pxor(dest, e.xmm0);
}); });
} else { } else {
ASSERT_INVALID_TYPE(); ASSERT_INVALID_TYPE();

View File

@ -697,42 +697,60 @@ void IntTernaryOp(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vvc_fn vvc_fn, vcv_fn
} }
} }
typedef void(xmm_v_fn)(X64Emitter& e, Instr& i, const Xmm& dest_src); // Since alot of SSE ops can take dest + src, just do that.
// Worst case the callee can dedupe.
typedef void(xmm_v_fn)(X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src);
template<typename T> template<typename T>
void XmmUnaryOpV(X64Emitter& e, Instr*& i, xmm_v_fn v_fn, void XmmUnaryOpV(X64Emitter& e, Instr*& i, xmm_v_fn v_fn,
T& dest, T& src1) { T& dest, T& src1) {
e.BeginOp(i->dest, dest, REG_DEST, e.BeginOp(i->dest, dest, REG_DEST,
i->src1.value, src1, 0); i->src1.value, src1, 0);
if (dest == src1) { v_fn(e, *i, dest, src1);
v_fn(e, *i, dest);
} else {
e.movaps(dest, src1);
v_fn(e, *i, dest);
}
e.EndOp(dest, src1); e.EndOp(dest, src1);
} }
template<typename CT, typename T> template<typename T>
void XmmUnaryOpC(X64Emitter& e, Instr*& i, xmm_v_fn v_fn, void XmmUnaryOpC(X64Emitter& e, Instr*& i, xmm_v_fn v_fn,
T& dest, Value* src1) { T& dest, Value* src1) {
e.BeginOp(i->dest, dest, REG_DEST); e.BeginOp(i->dest, dest, REG_DEST);
//e.mov(dest, (uint64_t)src1->get_constant(CT())); if (src1->type == FLOAT32_TYPE) {
v_fn(e, *i, dest); e.mov(e.eax, (uint32_t)src1->constant.i32);
e.movd(dest, e.eax);
} else if (src1->type == FLOAT64_TYPE) {
e.mov(e.rax, (uint64_t)src1->constant.i64);
e.movq(dest, e.rax);
} else {
UNIMPLEMENTED_SEQ();
}
v_fn(e, *i, dest, dest);
e.EndOp(dest); e.EndOp(dest);
} }
void XmmUnaryOp(X64Emitter& e, Instr*& i, uint32_t flags, xmm_v_fn v_fn) { void XmmUnaryOp(X64Emitter& e, Instr*& i, uint32_t flags, xmm_v_fn v_fn) {
if (IsFloatType(i->src1.value->type)) { if (IsFloatType(i->src1.value->type)) {
// if (i->Match(SIG_TYPE_F32, SIG_TYPE_F32)) {
} else if (IsVecType(i->src1.value->type)) { Xmm dest, src1;
// XmmUnaryOpV(e, i, v_fn, dest, src1);
} else if (i->Match(SIG_TYPE_F32, SIG_TYPE_F32C)) {
Xmm dest;
XmmUnaryOpC(e, i, v_fn, dest, i->src1.value);
} else if (i->Match(SIG_TYPE_F64, SIG_TYPE_F64)) {
Xmm dest, src1;
XmmUnaryOpV(e, i, v_fn, dest, src1);
} else if (i->Match(SIG_TYPE_F64, SIG_TYPE_F64C)) {
Xmm dest;
XmmUnaryOpC(e, i, v_fn, dest, i->src1.value);
} else { } else {
ASSERT_INVALID_TYPE(); ASSERT_INVALID_TYPE();
} }
if (i->Match(SIG_TYPE_I8, SIG_TYPE_I8)) { } else if (IsVecType(i->src1.value->type)) {
if (i->Match(SIG_TYPE_V128, SIG_TYPE_V128)) {
Xmm dest, src1; Xmm dest, src1;
XmmUnaryOpV(e, i, v_fn, dest, src1); XmmUnaryOpV(e, i, v_fn, dest, src1);
} else if (i->Match(SIG_TYPE_I8, SIG_TYPE_I8C)) { } else if (i->Match(SIG_TYPE_V128, SIG_TYPE_V128C)) {
Xmm dest, src1; Xmm dest;
XmmUnaryOpC<int8_t>(e, i, v_fn, dest, i->src1.value); XmmUnaryOpC(e, i, v_fn, dest, i->src1.value);
} else {
ASSERT_INVALID_TYPE();
}
} else { } else {
ASSERT_INVALID_TYPE(); ASSERT_INVALID_TYPE();
} }