From 0c553098267153e389242e6538df0082167b6469 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Thu, 29 May 2014 23:11:00 -0700 Subject: [PATCH] Fixing COMPARE and tweaking ABS/NEG. --- src/alloy/backend/x64/x64_emitter.cc | 2 + src/alloy/backend/x64/x64_emitter.h | 38 +++++++-------- src/alloy/backend/x64/x64_sequences.cc | 64 +++++++++++++++++--------- src/alloy/frontend/ppc/ppc_disasm.cc | 2 +- 4 files changed, 65 insertions(+), 41 deletions(-) diff --git a/src/alloy/backend/x64/x64_emitter.cc b/src/alloy/backend/x64/x64_emitter.cc index e966a5103..0096a08fa 100644 --- a/src/alloy/backend/x64/x64_emitter.cc +++ b/src/alloy/backend/x64/x64_emitter.cc @@ -489,6 +489,8 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) { /* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f), /* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u), /* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u, 0x00000000u, 0x80000000u), + /* XMMAbsMaskPS */ vec128i(0x7FFFFFFFu, 0x7FFFFFFFu, 0x7FFFFFFFu, 0x7FFFFFFFu), + /* XMMAbsMaskPD */ vec128i(0xFFFFFFFFu, 0x7FFFFFFFu, 0xFFFFFFFFu, 0x7FFFFFFFu), /* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu), /* XMMPermuteControl15 */ vec128b(15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15), /* XMMPackD3DCOLOR */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x0C000408u), diff --git a/src/alloy/backend/x64/x64_emitter.h b/src/alloy/backend/x64/x64_emitter.h index 4b05e5134..a720e1970 100644 --- a/src/alloy/backend/x64/x64_emitter.h +++ b/src/alloy/backend/x64/x64_emitter.h @@ -37,24 +37,26 @@ enum RegisterFlags { enum XmmConst { XMMZero = 0, - XMMOne = 1, - XMMNegativeOne = 2, - XMMMaskX16Y16 = 3, - XMMFlipX16Y16 = 4, - XMMFixX16Y16 = 5, - XMMNormalizeX16Y16 = 6, - XMM3301 = 7, - XMMSignMaskPS = 8, - XMMSignMaskPD = 9, - XMMByteSwapMask = 10, - XMMPermuteControl15 = 11, - XMMPackD3DCOLOR = 12, - XMMUnpackD3DCOLOR = 13, - XMMOneOver255 = 14, - XMMShiftMaskPS = 15, - XMMShiftByteMask = 16, - XMMUnsignedDwordMax = 17, - XMM255 = 18, + XMMOne, + XMMNegativeOne, + XMMMaskX16Y16, + XMMFlipX16Y16, + XMMFixX16Y16, + XMMNormalizeX16Y16, + XMM3301, + XMMSignMaskPS, + XMMSignMaskPD, + XMMAbsMaskPS, + XMMAbsMaskPD, + XMMByteSwapMask, + XMMPermuteControl15, + XMMPackD3DCOLOR, + XMMUnpackD3DCOLOR, + XMMOneOver255, + XMMShiftMaskPS, + XMMShiftByteMask, + XMMUnsignedDwordMax, + XMM255, }; // Unfortunately due to the design of xbyak we have to pass this to the ctor. diff --git a/src/alloy/backend/x64/x64_sequences.cc b/src/alloy/backend/x64/x64_sequences.cc index 17502d137..865f93476 100644 --- a/src/alloy/backend/x64/x64_sequences.cc +++ b/src/alloy/backend/x64/x64_sequences.cc @@ -1946,6 +1946,8 @@ EMITTER(SELECT_F32, MATCH(I, I8<>, F32<>, F32<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { e.test(i.src1, i.src1); // TODO(benvanik): find a way to do this without branches. + // We may be able to load src1 into an xmm, cmp with zero, and use that + // as a selection mask to choose between src2 & src3. Xbyak::Label skip; e.vmovaps(i.dest, i.src3); e.jz(skip); @@ -2243,6 +2245,23 @@ EMITTER_OPCODE_TABLE( EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I16, Reg16); \ EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I32, Reg32); \ EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I64, Reg64); \ + EMITTER_OPCODE_TABLE( \ + OPCODE_COMPARE_##op##, \ + COMPARE_##op##_I8, \ + COMPARE_##op##_I16, \ + COMPARE_##op##_I32, \ + COMPARE_##op##_I64); +EMITTER_ASSOCIATIVE_COMPARE_XX(SLT, setl, setge); +EMITTER_ASSOCIATIVE_COMPARE_XX(SLE, setle, setg); +EMITTER_ASSOCIATIVE_COMPARE_XX(SGT, setg, setle); +EMITTER_ASSOCIATIVE_COMPARE_XX(SGE, setge, setl); +EMITTER_ASSOCIATIVE_COMPARE_XX(ULT, setb, setae); +EMITTER_ASSOCIATIVE_COMPARE_XX(ULE, setbe, seta); +EMITTER_ASSOCIATIVE_COMPARE_XX(UGT, seta, setbe); +EMITTER_ASSOCIATIVE_COMPARE_XX(UGE, setae, setb); + +// http://x86.renejeschke.de/html/file_module_x86_id_288.html +#define EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(op, instr) \ EMITTER(COMPARE_##op##_F32, MATCH(I, F32<>, F32<>>)) { \ static void Emit(X64Emitter& e, const EmitArgType& i) { \ e.vcomiss(i.src1, i.src2); \ @@ -2264,21 +2283,17 @@ EMITTER_OPCODE_TABLE( } \ }; \ EMITTER_OPCODE_TABLE( \ - OPCODE_COMPARE_##op##, \ - COMPARE_##op##_I8, \ - COMPARE_##op##_I16, \ - COMPARE_##op##_I32, \ - COMPARE_##op##_I64, \ + OPCODE_COMPARE_##op##_FLT, \ COMPARE_##op##_F32, \ COMPARE_##op##_F64); -EMITTER_ASSOCIATIVE_COMPARE_XX(SLT, setl, setge); -EMITTER_ASSOCIATIVE_COMPARE_XX(SLE, setle, setg); -EMITTER_ASSOCIATIVE_COMPARE_XX(SGT, setg, setle); -EMITTER_ASSOCIATIVE_COMPARE_XX(SGE, setge, setl); -EMITTER_ASSOCIATIVE_COMPARE_XX(ULT, setb, setae); -EMITTER_ASSOCIATIVE_COMPARE_XX(ULE, setbe, seta); -EMITTER_ASSOCIATIVE_COMPARE_XX(UGT, seta, setbe); -EMITTER_ASSOCIATIVE_COMPARE_XX(UGE, setae, setb); +EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(SLT, setb); +EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(SLE, setbe); +EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(SGT, seta); +EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(SGE, setae); +EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(ULT, setb); +EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(ULE, setbe); +EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(UGT, seta); +EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(UGE, setae); // ============================================================================ @@ -3356,18 +3371,18 @@ EMITTER(NEG_I64, MATCH(I, I64<>>)) { }; EMITTER(NEG_F32, MATCH(I, F32<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.vpxor(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPS)); + e.vxorps(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPS)); } }; EMITTER(NEG_F64, MATCH(I, F64<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.vpxor(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPD)); + e.vxorpd(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPD)); } }; EMITTER(NEG_V128, MATCH(I, V128<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { XEASSERT(!i.instr->flags); - e.vpxor(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPS)); + e.vxorps(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPS)); } }; EMITTER_OPCODE_TABLE( @@ -3386,20 +3401,17 @@ EMITTER_OPCODE_TABLE( // ============================================================================ EMITTER(ABS_F32, MATCH(I, F32<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMSignMaskPS)); - e.vpandn(i.dest, e.xmm0, i.src1); + e.vpand(i.dest, i.src1, e.GetXmmConstPtr(XMMAbsMaskPS)); } }; EMITTER(ABS_F64, MATCH(I, F64<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMSignMaskPD)); - e.vpandn(i.dest, e.xmm0, i.src1); + e.vpand(i.dest, i.src1, e.GetXmmConstPtr(XMMAbsMaskPD)); } }; EMITTER(ABS_V128, MATCH(I, V128<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMSignMaskPS)); - e.vpandn(i.dest, e.xmm0, i.src1); + e.vpand(i.dest, i.src1, e.GetXmmConstPtr(XMMAbsMaskPS)); } }; EMITTER_OPCODE_TABLE( @@ -4980,6 +4992,14 @@ void alloy::backend::x64::RegisterSequences() { REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_ULE); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_UGT); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_UGE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_SLT_FLT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_SLE_FLT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_SGT_FLT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_SGE_FLT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_ULT_FLT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_ULE_FLT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_UGT_FLT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_UGE_FLT); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DID_CARRY); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DID_OVERFLOW); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DID_SATURATE); diff --git a/src/alloy/frontend/ppc/ppc_disasm.cc b/src/alloy/frontend/ppc/ppc_disasm.cc index ee9f21522..aa823a972 100644 --- a/src/alloy/frontend/ppc/ppc_disasm.cc +++ b/src/alloy/frontend/ppc/ppc_disasm.cc @@ -266,7 +266,7 @@ void Disasm_dcbz(InstrData& i, StringBuffer* str) { } void Disasm_fcmp(InstrData& i, StringBuffer* str) { - str->Append("%-8s cr%d, r%d, r%d", i.type->name, + str->Append("%-8s cr%d, f%d, f%d", i.type->name, i.X.RT >> 2, i.X.RA, i.X.RB); }