Fixing xmm unary ops.
This commit is contained in:
parent
c828e5416e
commit
6c7e392088
|
@ -1458,7 +1458,7 @@ table->AddSequence(OPCODE_NEG, [](X64Emitter& e, Instr*& i) {
|
|||
e.neg(dest_src);
|
||||
});
|
||||
} else if (IsFloatType(i->dest->type)) {
|
||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) {
|
||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
||||
if (i.src1.value->type == FLOAT32_TYPE) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
} else {
|
||||
|
@ -1466,7 +1466,7 @@ table->AddSequence(OPCODE_NEG, [](X64Emitter& e, Instr*& i) {
|
|||
}
|
||||
});
|
||||
} else if (IsVecType(i->dest->type)) {
|
||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) {
|
||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
});
|
||||
} else {
|
||||
|
@ -1480,7 +1480,7 @@ table->AddSequence(OPCODE_ABS, [](X64Emitter& e, Instr*& i) {
|
|||
if (IsIntType(i->dest->type)) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
} else if (IsFloatType(i->dest->type)) {
|
||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) {
|
||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
||||
if (i.src1.value->type == FLOAT32_TYPE) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
} else {
|
||||
|
@ -1488,7 +1488,7 @@ table->AddSequence(OPCODE_ABS, [](X64Emitter& e, Instr*& i) {
|
|||
}
|
||||
});
|
||||
} else if (IsVecType(i->dest->type)) {
|
||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) {
|
||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
});
|
||||
} else {
|
||||
|
@ -1500,16 +1500,16 @@ table->AddSequence(OPCODE_ABS, [](X64Emitter& e, Instr*& i) {
|
|||
|
||||
table->AddSequence(OPCODE_SQRT, [](X64Emitter& e, Instr*& i) {
|
||||
if (IsFloatType(i->dest->type)) {
|
||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) {
|
||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
||||
if (i.dest->type == FLOAT32_TYPE) {
|
||||
e.sqrtss(dest_src, dest_src);
|
||||
e.sqrtss(dest, src);
|
||||
} else {
|
||||
e.sqrtsd(dest_src, dest_src);
|
||||
e.sqrtsd(dest, src);
|
||||
}
|
||||
});
|
||||
} else if (IsVecType(i->dest->type)) {
|
||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) {
|
||||
e.sqrtps(dest_src, dest_src);
|
||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
||||
e.sqrtps(dest, src);
|
||||
});
|
||||
} else {
|
||||
ASSERT_INVALID_TYPE();
|
||||
|
@ -1520,18 +1520,18 @@ table->AddSequence(OPCODE_SQRT, [](X64Emitter& e, Instr*& i) {
|
|||
|
||||
table->AddSequence(OPCODE_RSQRT, [](X64Emitter& e, Instr*& i) {
|
||||
if (IsFloatType(i->dest->type)) {
|
||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) {
|
||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
||||
if (i.dest->type == FLOAT32_TYPE) {
|
||||
e.rsqrtss(dest_src, dest_src);
|
||||
e.rsqrtss(dest, src);
|
||||
} else {
|
||||
e.cvtsd2ss(dest_src, dest_src);
|
||||
e.rsqrtss(dest_src, dest_src);
|
||||
e.cvtss2sd(dest_src, dest_src);
|
||||
e.cvtsd2ss(dest, src);
|
||||
e.rsqrtss(dest, dest);
|
||||
e.cvtss2sd(dest, dest);
|
||||
}
|
||||
});
|
||||
} else if (IsVecType(i->dest->type)) {
|
||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) {
|
||||
e.rsqrtps(dest_src, dest_src);
|
||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
||||
e.rsqrtps(dest, src);
|
||||
});
|
||||
} else {
|
||||
ASSERT_INVALID_TYPE();
|
||||
|
@ -1638,10 +1638,13 @@ table->AddSequence(OPCODE_NOT, [](X64Emitter& e, Instr*& i) {
|
|||
e.not(dest_src);
|
||||
});
|
||||
} else if (IsVecType(i->dest->type)) {
|
||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) {
|
||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
||||
// dest_src ^= 0xFFFF...
|
||||
e.cmpeqps(e.xmm0, e.xmm0);
|
||||
e.pxor(dest_src, e.xmm0);
|
||||
if (dest != src) {
|
||||
e.movaps(dest, src);
|
||||
}
|
||||
e.pxor(dest, e.xmm0);
|
||||
});
|
||||
} else {
|
||||
ASSERT_INVALID_TYPE();
|
||||
|
|
|
@ -697,42 +697,60 @@ void IntTernaryOp(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vvc_fn vvc_fn, vcv_fn
|
|||
}
|
||||
}
|
||||
|
||||
typedef void(xmm_v_fn)(X64Emitter& e, Instr& i, const Xmm& dest_src);
|
||||
// Since alot of SSE ops can take dest + src, just do that.
|
||||
// Worst case the callee can dedupe.
|
||||
typedef void(xmm_v_fn)(X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src);
|
||||
template<typename T>
|
||||
void XmmUnaryOpV(X64Emitter& e, Instr*& i, xmm_v_fn v_fn,
|
||||
T& dest, T& src1) {
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src1, 0);
|
||||
if (dest == src1) {
|
||||
v_fn(e, *i, dest);
|
||||
} else {
|
||||
e.movaps(dest, src1);
|
||||
v_fn(e, *i, dest);
|
||||
}
|
||||
v_fn(e, *i, dest, src1);
|
||||
e.EndOp(dest, src1);
|
||||
}
|
||||
template<typename CT, typename T>
|
||||
template<typename T>
|
||||
void XmmUnaryOpC(X64Emitter& e, Instr*& i, xmm_v_fn v_fn,
|
||||
T& dest, Value* src1) {
|
||||
e.BeginOp(i->dest, dest, REG_DEST);
|
||||
//e.mov(dest, (uint64_t)src1->get_constant(CT()));
|
||||
v_fn(e, *i, dest);
|
||||
if (src1->type == FLOAT32_TYPE) {
|
||||
e.mov(e.eax, (uint32_t)src1->constant.i32);
|
||||
e.movd(dest, e.eax);
|
||||
} else if (src1->type == FLOAT64_TYPE) {
|
||||
e.mov(e.rax, (uint64_t)src1->constant.i64);
|
||||
e.movq(dest, e.rax);
|
||||
} else {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
}
|
||||
v_fn(e, *i, dest, dest);
|
||||
e.EndOp(dest);
|
||||
}
|
||||
void XmmUnaryOp(X64Emitter& e, Instr*& i, uint32_t flags, xmm_v_fn v_fn) {
|
||||
if (IsFloatType(i->src1.value->type)) {
|
||||
//
|
||||
if (i->Match(SIG_TYPE_F32, SIG_TYPE_F32)) {
|
||||
Xmm dest, src1;
|
||||
XmmUnaryOpV(e, i, v_fn, dest, src1);
|
||||
} else if (i->Match(SIG_TYPE_F32, SIG_TYPE_F32C)) {
|
||||
Xmm dest;
|
||||
XmmUnaryOpC(e, i, v_fn, dest, i->src1.value);
|
||||
} else if (i->Match(SIG_TYPE_F64, SIG_TYPE_F64)) {
|
||||
Xmm dest, src1;
|
||||
XmmUnaryOpV(e, i, v_fn, dest, src1);
|
||||
} else if (i->Match(SIG_TYPE_F64, SIG_TYPE_F64C)) {
|
||||
Xmm dest;
|
||||
XmmUnaryOpC(e, i, v_fn, dest, i->src1.value);
|
||||
} else {
|
||||
ASSERT_INVALID_TYPE();
|
||||
}
|
||||
} else if (IsVecType(i->src1.value->type)) {
|
||||
//
|
||||
} else {
|
||||
ASSERT_INVALID_TYPE();
|
||||
}
|
||||
if (i->Match(SIG_TYPE_I8, SIG_TYPE_I8)) {
|
||||
Xmm dest, src1;
|
||||
XmmUnaryOpV(e, i, v_fn, dest, src1);
|
||||
} else if (i->Match(SIG_TYPE_I8, SIG_TYPE_I8C)) {
|
||||
Xmm dest, src1;
|
||||
XmmUnaryOpC<int8_t>(e, i, v_fn, dest, i->src1.value);
|
||||
if (i->Match(SIG_TYPE_V128, SIG_TYPE_V128)) {
|
||||
Xmm dest, src1;
|
||||
XmmUnaryOpV(e, i, v_fn, dest, src1);
|
||||
} else if (i->Match(SIG_TYPE_V128, SIG_TYPE_V128C)) {
|
||||
Xmm dest;
|
||||
XmmUnaryOpC(e, i, v_fn, dest, i->src1.value);
|
||||
} else {
|
||||
ASSERT_INVALID_TYPE();
|
||||
}
|
||||
} else {
|
||||
ASSERT_INVALID_TYPE();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue