Fixing COMPARE and tweaking ABS/NEG.

This commit is contained in:
Ben Vanik 2014-05-29 23:11:00 -07:00
parent 328ece538a
commit 0c55309826
4 changed files with 65 additions and 41 deletions

View File

@ -489,6 +489,8 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) {
/* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f),
/* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
/* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u, 0x00000000u, 0x80000000u),
/* XMMAbsMaskPS */ vec128i(0x7FFFFFFFu, 0x7FFFFFFFu, 0x7FFFFFFFu, 0x7FFFFFFFu),
/* XMMAbsMaskPD */ vec128i(0xFFFFFFFFu, 0x7FFFFFFFu, 0xFFFFFFFFu, 0x7FFFFFFFu),
/* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu),
/* XMMPermuteControl15 */ vec128b(15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15),
/* XMMPackD3DCOLOR */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x0C000408u),

View File

@ -37,24 +37,26 @@ enum RegisterFlags {
enum XmmConst {
XMMZero = 0,
XMMOne = 1,
XMMNegativeOne = 2,
XMMMaskX16Y16 = 3,
XMMFlipX16Y16 = 4,
XMMFixX16Y16 = 5,
XMMNormalizeX16Y16 = 6,
XMM3301 = 7,
XMMSignMaskPS = 8,
XMMSignMaskPD = 9,
XMMByteSwapMask = 10,
XMMPermuteControl15 = 11,
XMMPackD3DCOLOR = 12,
XMMUnpackD3DCOLOR = 13,
XMMOneOver255 = 14,
XMMShiftMaskPS = 15,
XMMShiftByteMask = 16,
XMMUnsignedDwordMax = 17,
XMM255 = 18,
XMMOne,
XMMNegativeOne,
XMMMaskX16Y16,
XMMFlipX16Y16,
XMMFixX16Y16,
XMMNormalizeX16Y16,
XMM3301,
XMMSignMaskPS,
XMMSignMaskPD,
XMMAbsMaskPS,
XMMAbsMaskPD,
XMMByteSwapMask,
XMMPermuteControl15,
XMMPackD3DCOLOR,
XMMUnpackD3DCOLOR,
XMMOneOver255,
XMMShiftMaskPS,
XMMShiftByteMask,
XMMUnsignedDwordMax,
XMM255,
};
// Unfortunately due to the design of xbyak we have to pass this to the ctor.

View File

@ -1946,6 +1946,8 @@ EMITTER(SELECT_F32, MATCH(I<OPCODE_SELECT, F32<>, I8<>, F32<>, F32<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) {
e.test(i.src1, i.src1);
// TODO(benvanik): find a way to do this without branches.
// We may be able to load src1 into an xmm, cmp with zero, and use that
// as a selection mask to choose between src2 & src3.
Xbyak::Label skip;
e.vmovaps(i.dest, i.src3);
e.jz(skip);
@ -2243,6 +2245,23 @@ EMITTER_OPCODE_TABLE(
EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I16, Reg16); \
EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I32, Reg32); \
EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I64, Reg64); \
EMITTER_OPCODE_TABLE( \
OPCODE_COMPARE_##op##, \
COMPARE_##op##_I8, \
COMPARE_##op##_I16, \
COMPARE_##op##_I32, \
COMPARE_##op##_I64);
EMITTER_ASSOCIATIVE_COMPARE_XX(SLT, setl, setge);
EMITTER_ASSOCIATIVE_COMPARE_XX(SLE, setle, setg);
EMITTER_ASSOCIATIVE_COMPARE_XX(SGT, setg, setle);
EMITTER_ASSOCIATIVE_COMPARE_XX(SGE, setge, setl);
EMITTER_ASSOCIATIVE_COMPARE_XX(ULT, setb, setae);
EMITTER_ASSOCIATIVE_COMPARE_XX(ULE, setbe, seta);
EMITTER_ASSOCIATIVE_COMPARE_XX(UGT, seta, setbe);
EMITTER_ASSOCIATIVE_COMPARE_XX(UGE, setae, setb);
// http://x86.renejeschke.de/html/file_module_x86_id_288.html
#define EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(op, instr) \
EMITTER(COMPARE_##op##_F32, MATCH(I<OPCODE_COMPARE_##op##, I8<>, F32<>, F32<>>)) { \
static void Emit(X64Emitter& e, const EmitArgType& i) { \
e.vcomiss(i.src1, i.src2); \
@ -2264,21 +2283,17 @@ EMITTER_OPCODE_TABLE(
} \
}; \
EMITTER_OPCODE_TABLE( \
OPCODE_COMPARE_##op##, \
COMPARE_##op##_I8, \
COMPARE_##op##_I16, \
COMPARE_##op##_I32, \
COMPARE_##op##_I64, \
OPCODE_COMPARE_##op##_FLT, \
COMPARE_##op##_F32, \
COMPARE_##op##_F64);
EMITTER_ASSOCIATIVE_COMPARE_XX(SLT, setl, setge);
EMITTER_ASSOCIATIVE_COMPARE_XX(SLE, setle, setg);
EMITTER_ASSOCIATIVE_COMPARE_XX(SGT, setg, setle);
EMITTER_ASSOCIATIVE_COMPARE_XX(SGE, setge, setl);
EMITTER_ASSOCIATIVE_COMPARE_XX(ULT, setb, setae);
EMITTER_ASSOCIATIVE_COMPARE_XX(ULE, setbe, seta);
EMITTER_ASSOCIATIVE_COMPARE_XX(UGT, seta, setbe);
EMITTER_ASSOCIATIVE_COMPARE_XX(UGE, setae, setb);
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(SLT, setb);
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(SLE, setbe);
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(SGT, seta);
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(SGE, setae);
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(ULT, setb);
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(ULE, setbe);
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(UGT, seta);
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(UGE, setae);
// ============================================================================
@ -3356,18 +3371,18 @@ EMITTER(NEG_I64, MATCH(I<OPCODE_NEG, I64<>, I64<>>)) {
};
EMITTER(NEG_F32, MATCH(I<OPCODE_NEG, F32<>, F32<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) {
e.vpxor(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPS));
e.vxorps(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPS));
}
};
EMITTER(NEG_F64, MATCH(I<OPCODE_NEG, F64<>, F64<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) {
e.vpxor(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPD));
e.vxorpd(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPD));
}
};
EMITTER(NEG_V128, MATCH(I<OPCODE_NEG, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) {
XEASSERT(!i.instr->flags);
e.vpxor(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPS));
e.vxorps(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPS));
}
};
EMITTER_OPCODE_TABLE(
@ -3386,20 +3401,17 @@ EMITTER_OPCODE_TABLE(
// ============================================================================
EMITTER(ABS_F32, MATCH(I<OPCODE_ABS, F32<>, F32<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) {
e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMSignMaskPS));
e.vpandn(i.dest, e.xmm0, i.src1);
e.vpand(i.dest, i.src1, e.GetXmmConstPtr(XMMAbsMaskPS));
}
};
EMITTER(ABS_F64, MATCH(I<OPCODE_ABS, F64<>, F64<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) {
e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMSignMaskPD));
e.vpandn(i.dest, e.xmm0, i.src1);
e.vpand(i.dest, i.src1, e.GetXmmConstPtr(XMMAbsMaskPD));
}
};
EMITTER(ABS_V128, MATCH(I<OPCODE_ABS, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) {
e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMSignMaskPS));
e.vpandn(i.dest, e.xmm0, i.src1);
e.vpand(i.dest, i.src1, e.GetXmmConstPtr(XMMAbsMaskPS));
}
};
EMITTER_OPCODE_TABLE(
@ -4980,6 +4992,14 @@ void alloy::backend::x64::RegisterSequences() {
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_ULE);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_UGT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_UGE);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_SLT_FLT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_SLE_FLT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_SGT_FLT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_SGE_FLT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_ULT_FLT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_ULE_FLT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_UGT_FLT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_UGE_FLT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DID_CARRY);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DID_OVERFLOW);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DID_SATURATE);

View File

@ -266,7 +266,7 @@ void Disasm_dcbz(InstrData& i, StringBuffer* str) {
}
void Disasm_fcmp(InstrData& i, StringBuffer* str) {
str->Append("%-8s cr%d, r%d, r%d", i.type->name,
str->Append("%-8s cr%d, f%d, f%d", i.type->name,
i.X.RT >> 2, i.X.RA, i.X.RB);
}