Fixing COMPARE and tweaking ABS/NEG.

This commit is contained in:
Ben Vanik 2014-05-29 23:11:00 -07:00
parent 328ece538a
commit 0c55309826
4 changed files with 65 additions and 41 deletions

View File

@ -489,6 +489,8 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) {
/* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f), /* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f),
/* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u), /* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
/* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u, 0x00000000u, 0x80000000u), /* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u, 0x00000000u, 0x80000000u),
/* XMMAbsMaskPS */ vec128i(0x7FFFFFFFu, 0x7FFFFFFFu, 0x7FFFFFFFu, 0x7FFFFFFFu),
/* XMMAbsMaskPD */ vec128i(0xFFFFFFFFu, 0x7FFFFFFFu, 0xFFFFFFFFu, 0x7FFFFFFFu),
/* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu), /* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu),
/* XMMPermuteControl15 */ vec128b(15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15), /* XMMPermuteControl15 */ vec128b(15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15),
/* XMMPackD3DCOLOR */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x0C000408u), /* XMMPackD3DCOLOR */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x0C000408u),

View File

@ -37,24 +37,26 @@ enum RegisterFlags {
enum XmmConst { enum XmmConst {
XMMZero = 0, XMMZero = 0,
XMMOne = 1, XMMOne,
XMMNegativeOne = 2, XMMNegativeOne,
XMMMaskX16Y16 = 3, XMMMaskX16Y16,
XMMFlipX16Y16 = 4, XMMFlipX16Y16,
XMMFixX16Y16 = 5, XMMFixX16Y16,
XMMNormalizeX16Y16 = 6, XMMNormalizeX16Y16,
XMM3301 = 7, XMM3301,
XMMSignMaskPS = 8, XMMSignMaskPS,
XMMSignMaskPD = 9, XMMSignMaskPD,
XMMByteSwapMask = 10, XMMAbsMaskPS,
XMMPermuteControl15 = 11, XMMAbsMaskPD,
XMMPackD3DCOLOR = 12, XMMByteSwapMask,
XMMUnpackD3DCOLOR = 13, XMMPermuteControl15,
XMMOneOver255 = 14, XMMPackD3DCOLOR,
XMMShiftMaskPS = 15, XMMUnpackD3DCOLOR,
XMMShiftByteMask = 16, XMMOneOver255,
XMMUnsignedDwordMax = 17, XMMShiftMaskPS,
XMM255 = 18, XMMShiftByteMask,
XMMUnsignedDwordMax,
XMM255,
}; };
// Unfortunately due to the design of xbyak we have to pass this to the ctor. // Unfortunately due to the design of xbyak we have to pass this to the ctor.

View File

@ -1946,6 +1946,8 @@ EMITTER(SELECT_F32, MATCH(I<OPCODE_SELECT, F32<>, I8<>, F32<>, F32<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
e.test(i.src1, i.src1); e.test(i.src1, i.src1);
// TODO(benvanik): find a way to do this without branches. // TODO(benvanik): find a way to do this without branches.
// We may be able to load src1 into an xmm, cmp with zero, and use that
// as a selection mask to choose between src2 & src3.
Xbyak::Label skip; Xbyak::Label skip;
e.vmovaps(i.dest, i.src3); e.vmovaps(i.dest, i.src3);
e.jz(skip); e.jz(skip);
@ -2243,6 +2245,23 @@ EMITTER_OPCODE_TABLE(
EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I16, Reg16); \ EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I16, Reg16); \
EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I32, Reg32); \ EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I32, Reg32); \
EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I64, Reg64); \ EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I64, Reg64); \
EMITTER_OPCODE_TABLE( \
OPCODE_COMPARE_##op##, \
COMPARE_##op##_I8, \
COMPARE_##op##_I16, \
COMPARE_##op##_I32, \
COMPARE_##op##_I64);
EMITTER_ASSOCIATIVE_COMPARE_XX(SLT, setl, setge);
EMITTER_ASSOCIATIVE_COMPARE_XX(SLE, setle, setg);
EMITTER_ASSOCIATIVE_COMPARE_XX(SGT, setg, setle);
EMITTER_ASSOCIATIVE_COMPARE_XX(SGE, setge, setl);
EMITTER_ASSOCIATIVE_COMPARE_XX(ULT, setb, setae);
EMITTER_ASSOCIATIVE_COMPARE_XX(ULE, setbe, seta);
EMITTER_ASSOCIATIVE_COMPARE_XX(UGT, seta, setbe);
EMITTER_ASSOCIATIVE_COMPARE_XX(UGE, setae, setb);
// http://x86.renejeschke.de/html/file_module_x86_id_288.html
#define EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(op, instr) \
EMITTER(COMPARE_##op##_F32, MATCH(I<OPCODE_COMPARE_##op##, I8<>, F32<>, F32<>>)) { \ EMITTER(COMPARE_##op##_F32, MATCH(I<OPCODE_COMPARE_##op##, I8<>, F32<>, F32<>>)) { \
static void Emit(X64Emitter& e, const EmitArgType& i) { \ static void Emit(X64Emitter& e, const EmitArgType& i) { \
e.vcomiss(i.src1, i.src2); \ e.vcomiss(i.src1, i.src2); \
@ -2264,21 +2283,17 @@ EMITTER_OPCODE_TABLE(
} \ } \
}; \ }; \
EMITTER_OPCODE_TABLE( \ EMITTER_OPCODE_TABLE( \
OPCODE_COMPARE_##op##, \ OPCODE_COMPARE_##op##_FLT, \
COMPARE_##op##_I8, \
COMPARE_##op##_I16, \
COMPARE_##op##_I32, \
COMPARE_##op##_I64, \
COMPARE_##op##_F32, \ COMPARE_##op##_F32, \
COMPARE_##op##_F64); COMPARE_##op##_F64);
EMITTER_ASSOCIATIVE_COMPARE_XX(SLT, setl, setge); EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(SLT, setb);
EMITTER_ASSOCIATIVE_COMPARE_XX(SLE, setle, setg); EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(SLE, setbe);
EMITTER_ASSOCIATIVE_COMPARE_XX(SGT, setg, setle); EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(SGT, seta);
EMITTER_ASSOCIATIVE_COMPARE_XX(SGE, setge, setl); EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(SGE, setae);
EMITTER_ASSOCIATIVE_COMPARE_XX(ULT, setb, setae); EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(ULT, setb);
EMITTER_ASSOCIATIVE_COMPARE_XX(ULE, setbe, seta); EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(ULE, setbe);
EMITTER_ASSOCIATIVE_COMPARE_XX(UGT, seta, setbe); EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(UGT, seta);
EMITTER_ASSOCIATIVE_COMPARE_XX(UGE, setae, setb); EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(UGE, setae);
// ============================================================================ // ============================================================================
@ -3356,18 +3371,18 @@ EMITTER(NEG_I64, MATCH(I<OPCODE_NEG, I64<>, I64<>>)) {
}; };
EMITTER(NEG_F32, MATCH(I<OPCODE_NEG, F32<>, F32<>>)) { EMITTER(NEG_F32, MATCH(I<OPCODE_NEG, F32<>, F32<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
e.vpxor(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPS)); e.vxorps(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPS));
} }
}; };
EMITTER(NEG_F64, MATCH(I<OPCODE_NEG, F64<>, F64<>>)) { EMITTER(NEG_F64, MATCH(I<OPCODE_NEG, F64<>, F64<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
e.vpxor(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPD)); e.vxorpd(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPD));
} }
}; };
EMITTER(NEG_V128, MATCH(I<OPCODE_NEG, V128<>, V128<>>)) { EMITTER(NEG_V128, MATCH(I<OPCODE_NEG, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
XEASSERT(!i.instr->flags); XEASSERT(!i.instr->flags);
e.vpxor(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPS)); e.vxorps(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPS));
} }
}; };
EMITTER_OPCODE_TABLE( EMITTER_OPCODE_TABLE(
@ -3386,20 +3401,17 @@ EMITTER_OPCODE_TABLE(
// ============================================================================ // ============================================================================
EMITTER(ABS_F32, MATCH(I<OPCODE_ABS, F32<>, F32<>>)) { EMITTER(ABS_F32, MATCH(I<OPCODE_ABS, F32<>, F32<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMSignMaskPS)); e.vpand(i.dest, i.src1, e.GetXmmConstPtr(XMMAbsMaskPS));
e.vpandn(i.dest, e.xmm0, i.src1);
} }
}; };
EMITTER(ABS_F64, MATCH(I<OPCODE_ABS, F64<>, F64<>>)) { EMITTER(ABS_F64, MATCH(I<OPCODE_ABS, F64<>, F64<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMSignMaskPD)); e.vpand(i.dest, i.src1, e.GetXmmConstPtr(XMMAbsMaskPD));
e.vpandn(i.dest, e.xmm0, i.src1);
} }
}; };
EMITTER(ABS_V128, MATCH(I<OPCODE_ABS, V128<>, V128<>>)) { EMITTER(ABS_V128, MATCH(I<OPCODE_ABS, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMSignMaskPS)); e.vpand(i.dest, i.src1, e.GetXmmConstPtr(XMMAbsMaskPS));
e.vpandn(i.dest, e.xmm0, i.src1);
} }
}; };
EMITTER_OPCODE_TABLE( EMITTER_OPCODE_TABLE(
@ -4980,6 +4992,14 @@ void alloy::backend::x64::RegisterSequences() {
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_ULE); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_ULE);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_UGT); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_UGT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_UGE); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_UGE);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_SLT_FLT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_SLE_FLT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_SGT_FLT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_SGE_FLT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_ULT_FLT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_ULE_FLT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_UGT_FLT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_UGE_FLT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DID_CARRY); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DID_CARRY);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DID_OVERFLOW); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DID_OVERFLOW);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DID_SATURATE); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DID_SATURATE);

View File

@ -266,7 +266,7 @@ void Disasm_dcbz(InstrData& i, StringBuffer* str) {
} }
void Disasm_fcmp(InstrData& i, StringBuffer* str) { void Disasm_fcmp(InstrData& i, StringBuffer* str) {
str->Append("%-8s cr%d, r%d, r%d", i.type->name, str->Append("%-8s cr%d, f%d, f%d", i.type->name,
i.X.RT >> 2, i.X.RA, i.X.RB); i.X.RT >> 2, i.X.RA, i.X.RB);
} }