From 6c7e392088dd6322e55a4575487748c53b4160fc Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sun, 26 Jan 2014 22:57:39 -0800 Subject: [PATCH] Fixing xmm unary ops. --- .../x64/lowering/lowering_sequences.cc | 39 ++++++------ src/alloy/backend/x64/lowering/op_utils.inl | 60 ++++++++++++------- 2 files changed, 60 insertions(+), 39 deletions(-) diff --git a/src/alloy/backend/x64/lowering/lowering_sequences.cc b/src/alloy/backend/x64/lowering/lowering_sequences.cc index ca65a8e7d..379e438c2 100644 --- a/src/alloy/backend/x64/lowering/lowering_sequences.cc +++ b/src/alloy/backend/x64/lowering/lowering_sequences.cc @@ -1458,7 +1458,7 @@ table->AddSequence(OPCODE_NEG, [](X64Emitter& e, Instr*& i) { e.neg(dest_src); }); } else if (IsFloatType(i->dest->type)) { - XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { + XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { if (i.src1.value->type == FLOAT32_TYPE) { UNIMPLEMENTED_SEQ(); } else { @@ -1466,7 +1466,7 @@ table->AddSequence(OPCODE_NEG, [](X64Emitter& e, Instr*& i) { } }); } else if (IsVecType(i->dest->type)) { - XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { + XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { UNIMPLEMENTED_SEQ(); }); } else { @@ -1480,7 +1480,7 @@ table->AddSequence(OPCODE_ABS, [](X64Emitter& e, Instr*& i) { if (IsIntType(i->dest->type)) { UNIMPLEMENTED_SEQ(); } else if (IsFloatType(i->dest->type)) { - XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { + XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { if (i.src1.value->type == FLOAT32_TYPE) { UNIMPLEMENTED_SEQ(); } else { @@ -1488,7 +1488,7 @@ table->AddSequence(OPCODE_ABS, [](X64Emitter& e, Instr*& i) { } }); } else if (IsVecType(i->dest->type)) { - XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { + XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { UNIMPLEMENTED_SEQ(); }); } else { @@ -1500,16 +1500,16 @@ table->AddSequence(OPCODE_ABS, [](X64Emitter& e, Instr*& i) { table->AddSequence(OPCODE_SQRT, [](X64Emitter& e, Instr*& i) { if (IsFloatType(i->dest->type)) { - XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { + XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { if (i.dest->type == FLOAT32_TYPE) { - e.sqrtss(dest_src, dest_src); + e.sqrtss(dest, src); } else { - e.sqrtsd(dest_src, dest_src); + e.sqrtsd(dest, src); } }); } else if (IsVecType(i->dest->type)) { - XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { - e.sqrtps(dest_src, dest_src); + XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { + e.sqrtps(dest, src); }); } else { ASSERT_INVALID_TYPE(); @@ -1520,18 +1520,18 @@ table->AddSequence(OPCODE_SQRT, [](X64Emitter& e, Instr*& i) { table->AddSequence(OPCODE_RSQRT, [](X64Emitter& e, Instr*& i) { if (IsFloatType(i->dest->type)) { - XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { + XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { if (i.dest->type == FLOAT32_TYPE) { - e.rsqrtss(dest_src, dest_src); + e.rsqrtss(dest, src); } else { - e.cvtsd2ss(dest_src, dest_src); - e.rsqrtss(dest_src, dest_src); - e.cvtss2sd(dest_src, dest_src); + e.cvtsd2ss(dest, src); + e.rsqrtss(dest, dest); + e.cvtss2sd(dest, dest); } }); } else if (IsVecType(i->dest->type)) { - XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { - e.rsqrtps(dest_src, dest_src); + XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { + e.rsqrtps(dest, src); }); } else { ASSERT_INVALID_TYPE(); @@ -1638,10 +1638,13 @@ table->AddSequence(OPCODE_NOT, [](X64Emitter& e, Instr*& i) { e.not(dest_src); }); } else if (IsVecType(i->dest->type)) { - XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { + XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { // dest_src ^= 0xFFFF... e.cmpeqps(e.xmm0, e.xmm0); - e.pxor(dest_src, e.xmm0); + if (dest != src) { + e.movaps(dest, src); + } + e.pxor(dest, e.xmm0); }); } else { ASSERT_INVALID_TYPE(); diff --git a/src/alloy/backend/x64/lowering/op_utils.inl b/src/alloy/backend/x64/lowering/op_utils.inl index 689cd501b..424e8eeb2 100644 --- a/src/alloy/backend/x64/lowering/op_utils.inl +++ b/src/alloy/backend/x64/lowering/op_utils.inl @@ -697,42 +697,60 @@ void IntTernaryOp(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vvc_fn vvc_fn, vcv_fn } } -typedef void(xmm_v_fn)(X64Emitter& e, Instr& i, const Xmm& dest_src); +// Since alot of SSE ops can take dest + src, just do that. +// Worst case the callee can dedupe. +typedef void(xmm_v_fn)(X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src); template void XmmUnaryOpV(X64Emitter& e, Instr*& i, xmm_v_fn v_fn, T& dest, T& src1) { e.BeginOp(i->dest, dest, REG_DEST, i->src1.value, src1, 0); - if (dest == src1) { - v_fn(e, *i, dest); - } else { - e.movaps(dest, src1); - v_fn(e, *i, dest); - } + v_fn(e, *i, dest, src1); e.EndOp(dest, src1); } -template +template void XmmUnaryOpC(X64Emitter& e, Instr*& i, xmm_v_fn v_fn, T& dest, Value* src1) { e.BeginOp(i->dest, dest, REG_DEST); - //e.mov(dest, (uint64_t)src1->get_constant(CT())); - v_fn(e, *i, dest); + if (src1->type == FLOAT32_TYPE) { + e.mov(e.eax, (uint32_t)src1->constant.i32); + e.movd(dest, e.eax); + } else if (src1->type == FLOAT64_TYPE) { + e.mov(e.rax, (uint64_t)src1->constant.i64); + e.movq(dest, e.rax); + } else { + UNIMPLEMENTED_SEQ(); + } + v_fn(e, *i, dest, dest); e.EndOp(dest); } void XmmUnaryOp(X64Emitter& e, Instr*& i, uint32_t flags, xmm_v_fn v_fn) { if (IsFloatType(i->src1.value->type)) { - // + if (i->Match(SIG_TYPE_F32, SIG_TYPE_F32)) { + Xmm dest, src1; + XmmUnaryOpV(e, i, v_fn, dest, src1); + } else if (i->Match(SIG_TYPE_F32, SIG_TYPE_F32C)) { + Xmm dest; + XmmUnaryOpC(e, i, v_fn, dest, i->src1.value); + } else if (i->Match(SIG_TYPE_F64, SIG_TYPE_F64)) { + Xmm dest, src1; + XmmUnaryOpV(e, i, v_fn, dest, src1); + } else if (i->Match(SIG_TYPE_F64, SIG_TYPE_F64C)) { + Xmm dest; + XmmUnaryOpC(e, i, v_fn, dest, i->src1.value); + } else { + ASSERT_INVALID_TYPE(); + } } else if (IsVecType(i->src1.value->type)) { - // - } else { - ASSERT_INVALID_TYPE(); - } - if (i->Match(SIG_TYPE_I8, SIG_TYPE_I8)) { - Xmm dest, src1; - XmmUnaryOpV(e, i, v_fn, dest, src1); - } else if (i->Match(SIG_TYPE_I8, SIG_TYPE_I8C)) { - Xmm dest, src1; - XmmUnaryOpC(e, i, v_fn, dest, i->src1.value); + if (i->Match(SIG_TYPE_V128, SIG_TYPE_V128)) { + Xmm dest, src1; + XmmUnaryOpV(e, i, v_fn, dest, src1); + } else if (i->Match(SIG_TYPE_V128, SIG_TYPE_V128C)) { + Xmm dest; + XmmUnaryOpC(e, i, v_fn, dest, i->src1.value); + } else { + ASSERT_INVALID_TYPE(); + } } else { ASSERT_INVALID_TYPE(); }