From c828e5416e498f6ea9490a4ecfc6941d83842f4d Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sun, 26 Jan 2014 22:47:15 -0800 Subject: [PATCH] Starting on some unary xmm opcodes. --- .../x64/lowering/lowering_sequences.cc | 60 +++++++++++++++---- src/alloy/backend/x64/lowering/op_utils.inl | 57 +++++++++++++++--- 2 files changed, 99 insertions(+), 18 deletions(-) diff --git a/src/alloy/backend/x64/lowering/lowering_sequences.cc b/src/alloy/backend/x64/lowering/lowering_sequences.cc index 688c0aba9..ca65a8e7d 100644 --- a/src/alloy/backend/x64/lowering/lowering_sequences.cc +++ b/src/alloy/backend/x64/lowering/lowering_sequences.cc @@ -1454,11 +1454,21 @@ table->AddSequence(OPCODE_MUL_SUB, [](X64Emitter& e, Instr*& i) { table->AddSequence(OPCODE_NEG, [](X64Emitter& e, Instr*& i) { if (IsIntType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); + IntUnaryOp(e, i, [](X64Emitter& e, Instr& i, const Reg& dest_src) { + e.neg(dest_src); + }); } else if (IsFloatType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); + XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { + if (i.src1.value->type == FLOAT32_TYPE) { + UNIMPLEMENTED_SEQ(); + } else { + UNIMPLEMENTED_SEQ(); + } + }); } else if (IsVecType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); + XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { + UNIMPLEMENTED_SEQ(); + }); } else { ASSERT_INVALID_TYPE(); } @@ -1470,9 +1480,17 @@ table->AddSequence(OPCODE_ABS, [](X64Emitter& e, Instr*& i) { if (IsIntType(i->dest->type)) { UNIMPLEMENTED_SEQ(); } else if (IsFloatType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); + XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { + if (i.src1.value->type == FLOAT32_TYPE) { + UNIMPLEMENTED_SEQ(); + } else { + UNIMPLEMENTED_SEQ(); + } + }); } else if (IsVecType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); + XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { + UNIMPLEMENTED_SEQ(); + }); } else { ASSERT_INVALID_TYPE(); } @@ -1482,9 +1500,17 @@ table->AddSequence(OPCODE_ABS, [](X64Emitter& e, Instr*& i) { table->AddSequence(OPCODE_SQRT, [](X64Emitter& e, Instr*& i) { if (IsFloatType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); + XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { + if (i.dest->type == FLOAT32_TYPE) { + e.sqrtss(dest_src, dest_src); + } else { + e.sqrtsd(dest_src, dest_src); + } + }); } else if (IsVecType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); + XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { + e.sqrtps(dest_src, dest_src); + }); } else { ASSERT_INVALID_TYPE(); } @@ -1494,9 +1520,19 @@ table->AddSequence(OPCODE_SQRT, [](X64Emitter& e, Instr*& i) { table->AddSequence(OPCODE_RSQRT, [](X64Emitter& e, Instr*& i) { if (IsFloatType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); + XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { + if (i.dest->type == FLOAT32_TYPE) { + e.rsqrtss(dest_src, dest_src); + } else { + e.cvtsd2ss(dest_src, dest_src); + e.rsqrtss(dest_src, dest_src); + e.cvtss2sd(dest_src, dest_src); + } + }); } else if (IsVecType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); + XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { + e.rsqrtps(dest_src, dest_src); + }); } else { ASSERT_INVALID_TYPE(); } @@ -1602,7 +1638,11 @@ table->AddSequence(OPCODE_NOT, [](X64Emitter& e, Instr*& i) { e.not(dest_src); }); } else if (IsVecType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); + XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) { + // dest_src ^= 0xFFFF... + e.cmpeqps(e.xmm0, e.xmm0); + e.pxor(dest_src, e.xmm0); + }); } else { ASSERT_INVALID_TYPE(); } diff --git a/src/alloy/backend/x64/lowering/op_utils.inl b/src/alloy/backend/x64/lowering/op_utils.inl index 736c432f0..689cd501b 100644 --- a/src/alloy/backend/x64/lowering/op_utils.inl +++ b/src/alloy/backend/x64/lowering/op_utils.inl @@ -291,7 +291,7 @@ void VectorCompareXX(X64Emitter& e, Instr*& i, VectoreCompareOp op, bool as_sign typedef void(v_fn)(X64Emitter& e, Instr& i, const Reg& dest_src); template void IntUnaryOpV(X64Emitter& e, Instr*& i, v_fn v_fn, - T& dest, T& src1) { + T& dest, T& src1) { e.BeginOp(i->dest, dest, REG_DEST, i->src1.value, src1, 0); if (dest == src1) { @@ -304,7 +304,7 @@ void IntUnaryOpV(X64Emitter& e, Instr*& i, v_fn v_fn, } template void IntUnaryOpC(X64Emitter& e, Instr*& i, v_fn v_fn, - T& dest, Value* src1) { + T& dest, Value* src1) { e.BeginOp(i->dest, dest, REG_DEST); e.mov(dest, (uint64_t)src1->get_constant(CT())); v_fn(e, *i, dest); @@ -349,7 +349,7 @@ typedef void(vv_fn)(X64Emitter& e, Instr& i, const Reg& dest_src, const Operand& typedef void(vc_fn)(X64Emitter& e, Instr& i, const Reg& dest_src, uint32_t src); template void IntBinaryOpVV(X64Emitter& e, Instr*& i, vv_fn vv_fn, - TD& dest, TS1& src1, TS2& src2) { + TD& dest, TS1& src1, TS2& src2) { e.BeginOp(i->dest, dest, REG_DEST, i->src1.value, src1, 0, i->src2.value, src2, 0); @@ -372,7 +372,7 @@ void IntBinaryOpVV(X64Emitter& e, Instr*& i, vv_fn vv_fn, } template void IntBinaryOpVC(X64Emitter& e, Instr*& i, vv_fn vv_fn, vc_fn vc_fn, - TD& dest, TS1& src1, Value* src2) { + TD& dest, TS1& src1, Value* src2) { e.BeginOp(i->dest, dest, REG_DEST, i->src1.value, src1, 0); if (dest.getBit() <= 32) { @@ -398,7 +398,7 @@ void IntBinaryOpVC(X64Emitter& e, Instr*& i, vv_fn vv_fn, vc_fn vc_fn, } template void IntBinaryOpCV(X64Emitter& e, Instr*& i, vv_fn vv_fn, vc_fn vc_fn, - TD& dest, Value* src1, TS2& src2) { + TD& dest, Value* src1, TS2& src2) { e.BeginOp(i->dest, dest, REG_DEST, i->src2.value, src2, 0); if (dest.getBit() <= 32) { @@ -525,7 +525,7 @@ typedef void(vvc_fn)(X64Emitter& e, Instr& i, const Reg& dest_src1, const Operan typedef void(vcv_fn)(X64Emitter& e, Instr& i, const Reg& dest_src1, uint32_t src2, const Operand& src3); template void IntTernaryOpVVV(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, - TD& dest, TS1& src1, TS2& src2, TS3& src3) { + TD& dest, TS1& src1, TS2& src2, TS3& src3) { e.BeginOp(i->dest, dest, REG_DEST, i->src1.value, src1, 0, i->src2.value, src2, 0, @@ -546,7 +546,7 @@ void IntTernaryOpVVV(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, } template void IntTernaryOpVVC(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vvc_fn vvc_fn, - TD& dest, TS1& src1, TS2& src2, Value* src3) { + TD& dest, TS1& src1, TS2& src2, Value* src3) { e.BeginOp(i->dest, dest, REG_DEST, i->src1.value, src1, 0, i->src2.value, src2, 0); @@ -594,7 +594,7 @@ void IntTernaryOpVVC(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vvc_fn vvc_fn, } template void IntTernaryOpVCV(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vcv_fn vcv_fn, - TD& dest, TS1& src1, Value* src2, TS3& src3) { + TD& dest, TS1& src1, Value* src2, TS3& src3) { e.BeginOp(i->dest, dest, REG_DEST, i->src1.value, src1, 0, i->src3.value, src3, 0); @@ -697,6 +697,47 @@ void IntTernaryOp(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vvc_fn vvc_fn, vcv_fn } } +typedef void(xmm_v_fn)(X64Emitter& e, Instr& i, const Xmm& dest_src); +template +void XmmUnaryOpV(X64Emitter& e, Instr*& i, xmm_v_fn v_fn, + T& dest, T& src1) { + e.BeginOp(i->dest, dest, REG_DEST, + i->src1.value, src1, 0); + if (dest == src1) { + v_fn(e, *i, dest); + } else { + e.movaps(dest, src1); + v_fn(e, *i, dest); + } + e.EndOp(dest, src1); +} +template +void XmmUnaryOpC(X64Emitter& e, Instr*& i, xmm_v_fn v_fn, + T& dest, Value* src1) { + e.BeginOp(i->dest, dest, REG_DEST); + //e.mov(dest, (uint64_t)src1->get_constant(CT())); + v_fn(e, *i, dest); + e.EndOp(dest); +} +void XmmUnaryOp(X64Emitter& e, Instr*& i, uint32_t flags, xmm_v_fn v_fn) { + if (IsFloatType(i->src1.value->type)) { + // + } else if (IsVecType(i->src1.value->type)) { + // + } else { + ASSERT_INVALID_TYPE(); + } + if (i->Match(SIG_TYPE_I8, SIG_TYPE_I8)) { + Xmm dest, src1; + XmmUnaryOpV(e, i, v_fn, dest, src1); + } else if (i->Match(SIG_TYPE_I8, SIG_TYPE_I8C)) { + Xmm dest, src1; + XmmUnaryOpC(e, i, v_fn, dest, i->src1.value); + } else { + ASSERT_INVALID_TYPE(); + } +}; + } // namespace #endif // ALLOY_BACKEND_X64_X64_LOWERING_OP_UTILS_INL_