Fixing COMPARE and tweaking ABS/NEG.
This commit is contained in:
parent
328ece538a
commit
0c55309826
|
@ -489,6 +489,8 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) {
|
||||||
/* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f),
|
/* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f),
|
||||||
/* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
|
/* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
|
||||||
/* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u, 0x00000000u, 0x80000000u),
|
/* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u, 0x00000000u, 0x80000000u),
|
||||||
|
/* XMMAbsMaskPS */ vec128i(0x7FFFFFFFu, 0x7FFFFFFFu, 0x7FFFFFFFu, 0x7FFFFFFFu),
|
||||||
|
/* XMMAbsMaskPD */ vec128i(0xFFFFFFFFu, 0x7FFFFFFFu, 0xFFFFFFFFu, 0x7FFFFFFFu),
|
||||||
/* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu),
|
/* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu),
|
||||||
/* XMMPermuteControl15 */ vec128b(15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15),
|
/* XMMPermuteControl15 */ vec128b(15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15),
|
||||||
/* XMMPackD3DCOLOR */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x0C000408u),
|
/* XMMPackD3DCOLOR */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x0C000408u),
|
||||||
|
|
|
@ -37,24 +37,26 @@ enum RegisterFlags {
|
||||||
|
|
||||||
enum XmmConst {
|
enum XmmConst {
|
||||||
XMMZero = 0,
|
XMMZero = 0,
|
||||||
XMMOne = 1,
|
XMMOne,
|
||||||
XMMNegativeOne = 2,
|
XMMNegativeOne,
|
||||||
XMMMaskX16Y16 = 3,
|
XMMMaskX16Y16,
|
||||||
XMMFlipX16Y16 = 4,
|
XMMFlipX16Y16,
|
||||||
XMMFixX16Y16 = 5,
|
XMMFixX16Y16,
|
||||||
XMMNormalizeX16Y16 = 6,
|
XMMNormalizeX16Y16,
|
||||||
XMM3301 = 7,
|
XMM3301,
|
||||||
XMMSignMaskPS = 8,
|
XMMSignMaskPS,
|
||||||
XMMSignMaskPD = 9,
|
XMMSignMaskPD,
|
||||||
XMMByteSwapMask = 10,
|
XMMAbsMaskPS,
|
||||||
XMMPermuteControl15 = 11,
|
XMMAbsMaskPD,
|
||||||
XMMPackD3DCOLOR = 12,
|
XMMByteSwapMask,
|
||||||
XMMUnpackD3DCOLOR = 13,
|
XMMPermuteControl15,
|
||||||
XMMOneOver255 = 14,
|
XMMPackD3DCOLOR,
|
||||||
XMMShiftMaskPS = 15,
|
XMMUnpackD3DCOLOR,
|
||||||
XMMShiftByteMask = 16,
|
XMMOneOver255,
|
||||||
XMMUnsignedDwordMax = 17,
|
XMMShiftMaskPS,
|
||||||
XMM255 = 18,
|
XMMShiftByteMask,
|
||||||
|
XMMUnsignedDwordMax,
|
||||||
|
XMM255,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Unfortunately due to the design of xbyak we have to pass this to the ctor.
|
// Unfortunately due to the design of xbyak we have to pass this to the ctor.
|
||||||
|
|
|
@ -1946,6 +1946,8 @@ EMITTER(SELECT_F32, MATCH(I<OPCODE_SELECT, F32<>, I8<>, F32<>, F32<>>)) {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
e.test(i.src1, i.src1);
|
e.test(i.src1, i.src1);
|
||||||
// TODO(benvanik): find a way to do this without branches.
|
// TODO(benvanik): find a way to do this without branches.
|
||||||
|
// We may be able to load src1 into an xmm, cmp with zero, and use that
|
||||||
|
// as a selection mask to choose between src2 & src3.
|
||||||
Xbyak::Label skip;
|
Xbyak::Label skip;
|
||||||
e.vmovaps(i.dest, i.src3);
|
e.vmovaps(i.dest, i.src3);
|
||||||
e.jz(skip);
|
e.jz(skip);
|
||||||
|
@ -2243,6 +2245,23 @@ EMITTER_OPCODE_TABLE(
|
||||||
EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I16, Reg16); \
|
EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I16, Reg16); \
|
||||||
EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I32, Reg32); \
|
EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I32, Reg32); \
|
||||||
EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I64, Reg64); \
|
EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I64, Reg64); \
|
||||||
|
EMITTER_OPCODE_TABLE( \
|
||||||
|
OPCODE_COMPARE_##op##, \
|
||||||
|
COMPARE_##op##_I8, \
|
||||||
|
COMPARE_##op##_I16, \
|
||||||
|
COMPARE_##op##_I32, \
|
||||||
|
COMPARE_##op##_I64);
|
||||||
|
EMITTER_ASSOCIATIVE_COMPARE_XX(SLT, setl, setge);
|
||||||
|
EMITTER_ASSOCIATIVE_COMPARE_XX(SLE, setle, setg);
|
||||||
|
EMITTER_ASSOCIATIVE_COMPARE_XX(SGT, setg, setle);
|
||||||
|
EMITTER_ASSOCIATIVE_COMPARE_XX(SGE, setge, setl);
|
||||||
|
EMITTER_ASSOCIATIVE_COMPARE_XX(ULT, setb, setae);
|
||||||
|
EMITTER_ASSOCIATIVE_COMPARE_XX(ULE, setbe, seta);
|
||||||
|
EMITTER_ASSOCIATIVE_COMPARE_XX(UGT, seta, setbe);
|
||||||
|
EMITTER_ASSOCIATIVE_COMPARE_XX(UGE, setae, setb);
|
||||||
|
|
||||||
|
// http://x86.renejeschke.de/html/file_module_x86_id_288.html
|
||||||
|
#define EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(op, instr) \
|
||||||
EMITTER(COMPARE_##op##_F32, MATCH(I<OPCODE_COMPARE_##op##, I8<>, F32<>, F32<>>)) { \
|
EMITTER(COMPARE_##op##_F32, MATCH(I<OPCODE_COMPARE_##op##, I8<>, F32<>, F32<>>)) { \
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) { \
|
static void Emit(X64Emitter& e, const EmitArgType& i) { \
|
||||||
e.vcomiss(i.src1, i.src2); \
|
e.vcomiss(i.src1, i.src2); \
|
||||||
|
@ -2264,21 +2283,17 @@ EMITTER_OPCODE_TABLE(
|
||||||
} \
|
} \
|
||||||
}; \
|
}; \
|
||||||
EMITTER_OPCODE_TABLE( \
|
EMITTER_OPCODE_TABLE( \
|
||||||
OPCODE_COMPARE_##op##, \
|
OPCODE_COMPARE_##op##_FLT, \
|
||||||
COMPARE_##op##_I8, \
|
|
||||||
COMPARE_##op##_I16, \
|
|
||||||
COMPARE_##op##_I32, \
|
|
||||||
COMPARE_##op##_I64, \
|
|
||||||
COMPARE_##op##_F32, \
|
COMPARE_##op##_F32, \
|
||||||
COMPARE_##op##_F64);
|
COMPARE_##op##_F64);
|
||||||
EMITTER_ASSOCIATIVE_COMPARE_XX(SLT, setl, setge);
|
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(SLT, setb);
|
||||||
EMITTER_ASSOCIATIVE_COMPARE_XX(SLE, setle, setg);
|
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(SLE, setbe);
|
||||||
EMITTER_ASSOCIATIVE_COMPARE_XX(SGT, setg, setle);
|
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(SGT, seta);
|
||||||
EMITTER_ASSOCIATIVE_COMPARE_XX(SGE, setge, setl);
|
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(SGE, setae);
|
||||||
EMITTER_ASSOCIATIVE_COMPARE_XX(ULT, setb, setae);
|
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(ULT, setb);
|
||||||
EMITTER_ASSOCIATIVE_COMPARE_XX(ULE, setbe, seta);
|
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(ULE, setbe);
|
||||||
EMITTER_ASSOCIATIVE_COMPARE_XX(UGT, seta, setbe);
|
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(UGT, seta);
|
||||||
EMITTER_ASSOCIATIVE_COMPARE_XX(UGE, setae, setb);
|
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(UGE, setae);
|
||||||
|
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
@ -3356,18 +3371,18 @@ EMITTER(NEG_I64, MATCH(I<OPCODE_NEG, I64<>, I64<>>)) {
|
||||||
};
|
};
|
||||||
EMITTER(NEG_F32, MATCH(I<OPCODE_NEG, F32<>, F32<>>)) {
|
EMITTER(NEG_F32, MATCH(I<OPCODE_NEG, F32<>, F32<>>)) {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
e.vpxor(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPS));
|
e.vxorps(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPS));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
EMITTER(NEG_F64, MATCH(I<OPCODE_NEG, F64<>, F64<>>)) {
|
EMITTER(NEG_F64, MATCH(I<OPCODE_NEG, F64<>, F64<>>)) {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
e.vpxor(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPD));
|
e.vxorpd(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPD));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
EMITTER(NEG_V128, MATCH(I<OPCODE_NEG, V128<>, V128<>>)) {
|
EMITTER(NEG_V128, MATCH(I<OPCODE_NEG, V128<>, V128<>>)) {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
XEASSERT(!i.instr->flags);
|
XEASSERT(!i.instr->flags);
|
||||||
e.vpxor(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPS));
|
e.vxorps(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPS));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
EMITTER_OPCODE_TABLE(
|
EMITTER_OPCODE_TABLE(
|
||||||
|
@ -3386,20 +3401,17 @@ EMITTER_OPCODE_TABLE(
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
EMITTER(ABS_F32, MATCH(I<OPCODE_ABS, F32<>, F32<>>)) {
|
EMITTER(ABS_F32, MATCH(I<OPCODE_ABS, F32<>, F32<>>)) {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMSignMaskPS));
|
e.vpand(i.dest, i.src1, e.GetXmmConstPtr(XMMAbsMaskPS));
|
||||||
e.vpandn(i.dest, e.xmm0, i.src1);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
EMITTER(ABS_F64, MATCH(I<OPCODE_ABS, F64<>, F64<>>)) {
|
EMITTER(ABS_F64, MATCH(I<OPCODE_ABS, F64<>, F64<>>)) {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMSignMaskPD));
|
e.vpand(i.dest, i.src1, e.GetXmmConstPtr(XMMAbsMaskPD));
|
||||||
e.vpandn(i.dest, e.xmm0, i.src1);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
EMITTER(ABS_V128, MATCH(I<OPCODE_ABS, V128<>, V128<>>)) {
|
EMITTER(ABS_V128, MATCH(I<OPCODE_ABS, V128<>, V128<>>)) {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMSignMaskPS));
|
e.vpand(i.dest, i.src1, e.GetXmmConstPtr(XMMAbsMaskPS));
|
||||||
e.vpandn(i.dest, e.xmm0, i.src1);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
EMITTER_OPCODE_TABLE(
|
EMITTER_OPCODE_TABLE(
|
||||||
|
@ -4980,6 +4992,14 @@ void alloy::backend::x64::RegisterSequences() {
|
||||||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_ULE);
|
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_ULE);
|
||||||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_UGT);
|
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_UGT);
|
||||||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_UGE);
|
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_UGE);
|
||||||
|
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_SLT_FLT);
|
||||||
|
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_SLE_FLT);
|
||||||
|
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_SGT_FLT);
|
||||||
|
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_SGE_FLT);
|
||||||
|
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_ULT_FLT);
|
||||||
|
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_ULE_FLT);
|
||||||
|
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_UGT_FLT);
|
||||||
|
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_UGE_FLT);
|
||||||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DID_CARRY);
|
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DID_CARRY);
|
||||||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DID_OVERFLOW);
|
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DID_OVERFLOW);
|
||||||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DID_SATURATE);
|
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DID_SATURATE);
|
||||||
|
|
|
@ -266,7 +266,7 @@ void Disasm_dcbz(InstrData& i, StringBuffer* str) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Disasm_fcmp(InstrData& i, StringBuffer* str) {
|
void Disasm_fcmp(InstrData& i, StringBuffer* str) {
|
||||||
str->Append("%-8s cr%d, r%d, r%d", i.type->name,
|
str->Append("%-8s cr%d, f%d, f%d", i.type->name,
|
||||||
i.X.RT >> 2, i.X.RA, i.X.RB);
|
i.X.RT >> 2, i.X.RA, i.X.RB);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue