Fixing xmm unary ops.
This commit is contained in:
parent
c828e5416e
commit
6c7e392088
|
@ -1458,7 +1458,7 @@ table->AddSequence(OPCODE_NEG, [](X64Emitter& e, Instr*& i) {
|
||||||
e.neg(dest_src);
|
e.neg(dest_src);
|
||||||
});
|
});
|
||||||
} else if (IsFloatType(i->dest->type)) {
|
} else if (IsFloatType(i->dest->type)) {
|
||||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) {
|
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
||||||
if (i.src1.value->type == FLOAT32_TYPE) {
|
if (i.src1.value->type == FLOAT32_TYPE) {
|
||||||
UNIMPLEMENTED_SEQ();
|
UNIMPLEMENTED_SEQ();
|
||||||
} else {
|
} else {
|
||||||
|
@ -1466,7 +1466,7 @@ table->AddSequence(OPCODE_NEG, [](X64Emitter& e, Instr*& i) {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
} else if (IsVecType(i->dest->type)) {
|
} else if (IsVecType(i->dest->type)) {
|
||||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) {
|
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
||||||
UNIMPLEMENTED_SEQ();
|
UNIMPLEMENTED_SEQ();
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
|
@ -1480,7 +1480,7 @@ table->AddSequence(OPCODE_ABS, [](X64Emitter& e, Instr*& i) {
|
||||||
if (IsIntType(i->dest->type)) {
|
if (IsIntType(i->dest->type)) {
|
||||||
UNIMPLEMENTED_SEQ();
|
UNIMPLEMENTED_SEQ();
|
||||||
} else if (IsFloatType(i->dest->type)) {
|
} else if (IsFloatType(i->dest->type)) {
|
||||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) {
|
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
||||||
if (i.src1.value->type == FLOAT32_TYPE) {
|
if (i.src1.value->type == FLOAT32_TYPE) {
|
||||||
UNIMPLEMENTED_SEQ();
|
UNIMPLEMENTED_SEQ();
|
||||||
} else {
|
} else {
|
||||||
|
@ -1488,7 +1488,7 @@ table->AddSequence(OPCODE_ABS, [](X64Emitter& e, Instr*& i) {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
} else if (IsVecType(i->dest->type)) {
|
} else if (IsVecType(i->dest->type)) {
|
||||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) {
|
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
||||||
UNIMPLEMENTED_SEQ();
|
UNIMPLEMENTED_SEQ();
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
|
@ -1500,16 +1500,16 @@ table->AddSequence(OPCODE_ABS, [](X64Emitter& e, Instr*& i) {
|
||||||
|
|
||||||
table->AddSequence(OPCODE_SQRT, [](X64Emitter& e, Instr*& i) {
|
table->AddSequence(OPCODE_SQRT, [](X64Emitter& e, Instr*& i) {
|
||||||
if (IsFloatType(i->dest->type)) {
|
if (IsFloatType(i->dest->type)) {
|
||||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) {
|
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
||||||
if (i.dest->type == FLOAT32_TYPE) {
|
if (i.dest->type == FLOAT32_TYPE) {
|
||||||
e.sqrtss(dest_src, dest_src);
|
e.sqrtss(dest, src);
|
||||||
} else {
|
} else {
|
||||||
e.sqrtsd(dest_src, dest_src);
|
e.sqrtsd(dest, src);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
} else if (IsVecType(i->dest->type)) {
|
} else if (IsVecType(i->dest->type)) {
|
||||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) {
|
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
||||||
e.sqrtps(dest_src, dest_src);
|
e.sqrtps(dest, src);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
ASSERT_INVALID_TYPE();
|
ASSERT_INVALID_TYPE();
|
||||||
|
@ -1520,18 +1520,18 @@ table->AddSequence(OPCODE_SQRT, [](X64Emitter& e, Instr*& i) {
|
||||||
|
|
||||||
table->AddSequence(OPCODE_RSQRT, [](X64Emitter& e, Instr*& i) {
|
table->AddSequence(OPCODE_RSQRT, [](X64Emitter& e, Instr*& i) {
|
||||||
if (IsFloatType(i->dest->type)) {
|
if (IsFloatType(i->dest->type)) {
|
||||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) {
|
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
||||||
if (i.dest->type == FLOAT32_TYPE) {
|
if (i.dest->type == FLOAT32_TYPE) {
|
||||||
e.rsqrtss(dest_src, dest_src);
|
e.rsqrtss(dest, src);
|
||||||
} else {
|
} else {
|
||||||
e.cvtsd2ss(dest_src, dest_src);
|
e.cvtsd2ss(dest, src);
|
||||||
e.rsqrtss(dest_src, dest_src);
|
e.rsqrtss(dest, dest);
|
||||||
e.cvtss2sd(dest_src, dest_src);
|
e.cvtss2sd(dest, dest);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
} else if (IsVecType(i->dest->type)) {
|
} else if (IsVecType(i->dest->type)) {
|
||||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) {
|
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
||||||
e.rsqrtps(dest_src, dest_src);
|
e.rsqrtps(dest, src);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
ASSERT_INVALID_TYPE();
|
ASSERT_INVALID_TYPE();
|
||||||
|
@ -1638,10 +1638,13 @@ table->AddSequence(OPCODE_NOT, [](X64Emitter& e, Instr*& i) {
|
||||||
e.not(dest_src);
|
e.not(dest_src);
|
||||||
});
|
});
|
||||||
} else if (IsVecType(i->dest->type)) {
|
} else if (IsVecType(i->dest->type)) {
|
||||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src) {
|
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
||||||
// dest_src ^= 0xFFFF...
|
// dest_src ^= 0xFFFF...
|
||||||
e.cmpeqps(e.xmm0, e.xmm0);
|
e.cmpeqps(e.xmm0, e.xmm0);
|
||||||
e.pxor(dest_src, e.xmm0);
|
if (dest != src) {
|
||||||
|
e.movaps(dest, src);
|
||||||
|
}
|
||||||
|
e.pxor(dest, e.xmm0);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
ASSERT_INVALID_TYPE();
|
ASSERT_INVALID_TYPE();
|
||||||
|
|
|
@ -697,42 +697,60 @@ void IntTernaryOp(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vvc_fn vvc_fn, vcv_fn
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef void(xmm_v_fn)(X64Emitter& e, Instr& i, const Xmm& dest_src);
|
// Since alot of SSE ops can take dest + src, just do that.
|
||||||
|
// Worst case the callee can dedupe.
|
||||||
|
typedef void(xmm_v_fn)(X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src);
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void XmmUnaryOpV(X64Emitter& e, Instr*& i, xmm_v_fn v_fn,
|
void XmmUnaryOpV(X64Emitter& e, Instr*& i, xmm_v_fn v_fn,
|
||||||
T& dest, T& src1) {
|
T& dest, T& src1) {
|
||||||
e.BeginOp(i->dest, dest, REG_DEST,
|
e.BeginOp(i->dest, dest, REG_DEST,
|
||||||
i->src1.value, src1, 0);
|
i->src1.value, src1, 0);
|
||||||
if (dest == src1) {
|
v_fn(e, *i, dest, src1);
|
||||||
v_fn(e, *i, dest);
|
|
||||||
} else {
|
|
||||||
e.movaps(dest, src1);
|
|
||||||
v_fn(e, *i, dest);
|
|
||||||
}
|
|
||||||
e.EndOp(dest, src1);
|
e.EndOp(dest, src1);
|
||||||
}
|
}
|
||||||
template<typename CT, typename T>
|
template<typename T>
|
||||||
void XmmUnaryOpC(X64Emitter& e, Instr*& i, xmm_v_fn v_fn,
|
void XmmUnaryOpC(X64Emitter& e, Instr*& i, xmm_v_fn v_fn,
|
||||||
T& dest, Value* src1) {
|
T& dest, Value* src1) {
|
||||||
e.BeginOp(i->dest, dest, REG_DEST);
|
e.BeginOp(i->dest, dest, REG_DEST);
|
||||||
//e.mov(dest, (uint64_t)src1->get_constant(CT()));
|
if (src1->type == FLOAT32_TYPE) {
|
||||||
v_fn(e, *i, dest);
|
e.mov(e.eax, (uint32_t)src1->constant.i32);
|
||||||
|
e.movd(dest, e.eax);
|
||||||
|
} else if (src1->type == FLOAT64_TYPE) {
|
||||||
|
e.mov(e.rax, (uint64_t)src1->constant.i64);
|
||||||
|
e.movq(dest, e.rax);
|
||||||
|
} else {
|
||||||
|
UNIMPLEMENTED_SEQ();
|
||||||
|
}
|
||||||
|
v_fn(e, *i, dest, dest);
|
||||||
e.EndOp(dest);
|
e.EndOp(dest);
|
||||||
}
|
}
|
||||||
void XmmUnaryOp(X64Emitter& e, Instr*& i, uint32_t flags, xmm_v_fn v_fn) {
|
void XmmUnaryOp(X64Emitter& e, Instr*& i, uint32_t flags, xmm_v_fn v_fn) {
|
||||||
if (IsFloatType(i->src1.value->type)) {
|
if (IsFloatType(i->src1.value->type)) {
|
||||||
//
|
if (i->Match(SIG_TYPE_F32, SIG_TYPE_F32)) {
|
||||||
} else if (IsVecType(i->src1.value->type)) {
|
Xmm dest, src1;
|
||||||
//
|
XmmUnaryOpV(e, i, v_fn, dest, src1);
|
||||||
|
} else if (i->Match(SIG_TYPE_F32, SIG_TYPE_F32C)) {
|
||||||
|
Xmm dest;
|
||||||
|
XmmUnaryOpC(e, i, v_fn, dest, i->src1.value);
|
||||||
|
} else if (i->Match(SIG_TYPE_F64, SIG_TYPE_F64)) {
|
||||||
|
Xmm dest, src1;
|
||||||
|
XmmUnaryOpV(e, i, v_fn, dest, src1);
|
||||||
|
} else if (i->Match(SIG_TYPE_F64, SIG_TYPE_F64C)) {
|
||||||
|
Xmm dest;
|
||||||
|
XmmUnaryOpC(e, i, v_fn, dest, i->src1.value);
|
||||||
} else {
|
} else {
|
||||||
ASSERT_INVALID_TYPE();
|
ASSERT_INVALID_TYPE();
|
||||||
}
|
}
|
||||||
if (i->Match(SIG_TYPE_I8, SIG_TYPE_I8)) {
|
} else if (IsVecType(i->src1.value->type)) {
|
||||||
|
if (i->Match(SIG_TYPE_V128, SIG_TYPE_V128)) {
|
||||||
Xmm dest, src1;
|
Xmm dest, src1;
|
||||||
XmmUnaryOpV(e, i, v_fn, dest, src1);
|
XmmUnaryOpV(e, i, v_fn, dest, src1);
|
||||||
} else if (i->Match(SIG_TYPE_I8, SIG_TYPE_I8C)) {
|
} else if (i->Match(SIG_TYPE_V128, SIG_TYPE_V128C)) {
|
||||||
Xmm dest, src1;
|
Xmm dest;
|
||||||
XmmUnaryOpC<int8_t>(e, i, v_fn, dest, i->src1.value);
|
XmmUnaryOpC(e, i, v_fn, dest, i->src1.value);
|
||||||
|
} else {
|
||||||
|
ASSERT_INVALID_TYPE();
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
ASSERT_INVALID_TYPE();
|
ASSERT_INVALID_TYPE();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue