From 16bac6d9c567f781cde8f8dacafe989928b94193 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Tue, 27 May 2014 11:56:55 -0700 Subject: [PATCH] Constant support for a lot of vector ops. --- src/alloy/backend/x64/x64_sequence.inl | 30 ++++ src/alloy/backend/x64/x64_sequences.cc | 196 ++++++++++++++++--------- 2 files changed, 157 insertions(+), 69 deletions(-) diff --git a/src/alloy/backend/x64/x64_sequence.inl b/src/alloy/backend/x64/x64_sequence.inl index ce2b8e36e..eae1096eb 100644 --- a/src/alloy/backend/x64/x64_sequence.inl +++ b/src/alloy/backend/x64/x64_sequence.inl @@ -628,6 +628,36 @@ struct SingleSequence : public Sequence, T> { } } + template + static void EmitCommutativeBinaryXmmOp( + X64Emitter& e, const EmitArgType& i, const FN& fn) { + if (i.src1.is_constant) { + XEASSERT(!i.src2.is_constant); + e.LoadConstantXmm(e.xmm0, i.src1.constant()); + fn(e, i.dest, e.xmm0, i.src2); + } else if (i.src2.is_constant) { + e.LoadConstantXmm(e.xmm0, i.src2.constant()); + fn(e, i.dest, i.src1, e.xmm0); + } else { + fn(e, i.dest, i.src1, i.src2); + } + } + + template + static void EmitAssociativeBinaryXmmOp( + X64Emitter& e, const EmitArgType& i, const FN& fn) { + if (i.src1.is_constant) { + XEASSERT(!i.src2.is_constant); + e.LoadConstantXmm(e.xmm0, i.src1.constant()); + fn(e, i.dest, e.xmm0, i.src2); + } else if (i.src2.is_constant) { + e.LoadConstantXmm(e.xmm0, i.src2.constant()); + fn(e, i.dest, i.src1, e.xmm0); + } else { + fn(e, i.dest, i.src1, i.src2); + } + } + template static void EmitCommutativeCompareOp( X64Emitter& e, const EmitArgType& i, diff --git a/src/alloy/backend/x64/x64_sequences.cc b/src/alloy/backend/x64/x64_sequences.cc index 2d02a2118..9e94888cb 100644 --- a/src/alloy/backend/x64/x64_sequences.cc +++ b/src/alloy/backend/x64/x64_sequences.cc @@ -2272,20 +2272,23 @@ EMITTER_OPCODE_TABLE( // ============================================================================ EMITTER(VECTOR_COMPARE_EQ_V128, MATCH(I, V128<>, V128<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { - switch (i.instr->flags) { - case INT8_TYPE: - e.vpcmpeqb(i.dest, i.src1, i.src2); - break; - case INT16_TYPE: - e.vpcmpeqw(i.dest, i.src1, i.src2); - break; - case INT32_TYPE: - e.vpcmpeqd(i.dest, i.src1, i.src2); - break; - case FLOAT32_TYPE: - e.vcmpeqps(i.dest, i.src1, i.src2); - break; - } + EmitCommutativeBinaryXmmOp(e, i, + [&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + switch (i.instr->flags) { + case INT8_TYPE: + e.vpcmpeqb(dest, src1, src2); + break; + case INT16_TYPE: + e.vpcmpeqw(dest, src1, src2); + break; + case INT32_TYPE: + e.vpcmpeqd(dest, src1, src2); + break; + case FLOAT32_TYPE: + e.vcmpeqps(dest, src1, src2); + break; + } + }); } }; EMITTER_OPCODE_TABLE( @@ -2298,20 +2301,23 @@ EMITTER_OPCODE_TABLE( // ============================================================================ EMITTER(VECTOR_COMPARE_SGT_V128, MATCH(I, V128<>, V128<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { - switch (i.instr->flags) { - case INT8_TYPE: - e.vpcmpgtb(i.dest, i.src1, i.src2); - break; - case INT16_TYPE: - e.vpcmpgtw(i.dest, i.src1, i.src2); - break; - case INT32_TYPE: - e.vpcmpgtd(i.dest, i.src1, i.src2); - break; - case FLOAT32_TYPE: - e.vcmpgtps(i.dest, i.src1, i.src2); - break; - } + EmitAssociativeBinaryXmmOp(e, i, + [&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + switch (i.instr->flags) { + case INT8_TYPE: + e.vpcmpgtb(dest, src1, src2); + break; + case INT16_TYPE: + e.vpcmpgtw(dest, src1, src2); + break; + case INT32_TYPE: + e.vpcmpgtd(dest, src1, src2); + break; + case FLOAT32_TYPE: + e.vcmpgtps(dest, src1, src2); + break; + } + }); } }; EMITTER_OPCODE_TABLE( @@ -2324,26 +2330,29 @@ EMITTER_OPCODE_TABLE( // ============================================================================ EMITTER(VECTOR_COMPARE_SGE_V128, MATCH(I, V128<>, V128<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { - switch (i.instr->flags) { - case INT8_TYPE: - e.vpcmpgtb(i.dest, i.src1, i.src2); - e.vpcmpeqb(e.xmm0, i.src1, i.src2); - e.vpor(i.dest, e.xmm0); - break; - case INT16_TYPE: - e.vpcmpgtw(i.dest, i.src1, i.src2); - e.vpcmpeqw(e.xmm0, i.src1, i.src2); - e.vpor(i.dest, e.xmm0); - break; - case INT32_TYPE: - e.vpcmpgtd(i.dest, i.src1, i.src2); - e.vpcmpeqd(e.xmm0, i.src1, i.src2); - e.vpor(i.dest, e.xmm0); - break; - case FLOAT32_TYPE: - e.vcmpgeps(i.dest, i.src1, i.src2); - break; - } + EmitAssociativeBinaryXmmOp(e, i, + [&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + switch (i.instr->flags) { + case INT8_TYPE: + e.vpcmpgtb(dest, src1, src2); + e.vpcmpeqb(e.xmm0, src1, src2); + e.vpor(dest, e.xmm0); + break; + case INT16_TYPE: + e.vpcmpgtw(dest, src1, src2); + e.vpcmpeqw(e.xmm0, src1, src2); + e.vpor(dest, e.xmm0); + break; + case INT32_TYPE: + e.vpcmpgtd(dest, src1, src2); + e.vpcmpeqd(e.xmm0, src1, src2); + e.vpor(dest, e.xmm0); + break; + case FLOAT32_TYPE: + e.vcmpgeps(i.dest, i.src1, i.src2); + break; + } + }); } }; EMITTER_OPCODE_TABLE( @@ -2412,17 +2421,26 @@ EMITTER(ADD_I64, MATCH(I, I64<>, I64<>>)) { }; EMITTER(ADD_F32, MATCH(I, F32<>, F32<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.vaddss(i.dest, i.src1, i.src2); + EmitCommutativeBinaryXmmOp(e, i, + [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + e.vaddss(dest, src1, src2); + }); } }; EMITTER(ADD_F64, MATCH(I, F64<>, F64<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.vaddsd(i.dest, i.src1, i.src2); + EmitCommutativeBinaryXmmOp(e, i, + [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + e.vaddsd(dest, src1, src2); + }); } }; EMITTER(ADD_V128, MATCH(I, V128<>, V128<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.vaddps(i.dest, i.src1, i.src2); + EmitCommutativeBinaryXmmOp(e, i, + [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + e.vaddps(dest, src1, src2); + }); } }; EMITTER_OPCODE_TABLE( @@ -2569,19 +2587,28 @@ EMITTER(SUB_I64, MATCH(I, I64<>, I64<>>)) { EMITTER(SUB_F32, MATCH(I, F32<>, F32<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { XEASSERT(!i.instr->flags); - e.vsubss(i.dest, i.src1, i.src2); + EmitAssociativeBinaryXmmOp(e, i, + [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + e.vsubss(dest, src1, src2); + }); } }; EMITTER(SUB_F64, MATCH(I, F64<>, F64<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { XEASSERT(!i.instr->flags); - e.vsubsd(i.dest, i.src1, i.src2); + EmitAssociativeBinaryXmmOp(e, i, + [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + e.vsubsd(dest, src1, src2); + }); } }; EMITTER(SUB_V128, MATCH(I, V128<>, V128<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { XEASSERT(!i.instr->flags); - e.vsubps(i.dest, i.src1, i.src2); + EmitAssociativeBinaryXmmOp(e, i, + [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + e.vsubps(dest, src1, src2); + }); } }; EMITTER_OPCODE_TABLE( @@ -2682,19 +2709,28 @@ EMITTER(MUL_I64, MATCH(I, I64<>, I64<>>)) { EMITTER(MUL_F32, MATCH(I, F32<>, F32<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { XEASSERT(!i.instr->flags); - e.vmulss(i.dest, i.src1, i.src2); + EmitCommutativeBinaryXmmOp(e, i, + [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + e.vmulss(dest, src1, src2); + }); } }; EMITTER(MUL_F64, MATCH(I, F64<>, F64<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { XEASSERT(!i.instr->flags); - e.vmulsd(i.dest, i.src1, i.src2); + EmitCommutativeBinaryXmmOp(e, i, + [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + e.vmulsd(dest, src1, src2); + }); } }; EMITTER(MUL_V128, MATCH(I, V128<>, V128<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { XEASSERT(!i.instr->flags); - e.vmulps(i.dest, i.src1, i.src2); + EmitCommutativeBinaryXmmOp(e, i, + [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + e.vmulps(dest, src1, src2); + }); } }; EMITTER_OPCODE_TABLE( @@ -2969,19 +3005,28 @@ EMITTER(DIV_I64, MATCH(I, I64<>, I64<>>)) { EMITTER(DIV_F32, MATCH(I, F32<>, F32<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { XEASSERT(!i.instr->flags); - e.vdivss(i.dest, i.src1, i.src2); + EmitAssociativeBinaryXmmOp(e, i, + [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + e.vdivss(dest, src1, src2); + }); } }; EMITTER(DIV_F64, MATCH(I, F64<>, F64<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { XEASSERT(!i.instr->flags); - e.vdivsd(i.dest, i.src1, i.src2); + EmitAssociativeBinaryXmmOp(e, i, + [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + e.vdivsd(dest, src1, src2); + }); } }; EMITTER(DIV_V128, MATCH(I, V128<>, V128<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { XEASSERT(!i.instr->flags); - e.vdivps(i.dest, i.src1, i.src2); + EmitAssociativeBinaryXmmOp(e, i, + [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + e.vdivps(dest, src1, src2); + }); } }; EMITTER_OPCODE_TABLE( @@ -3274,9 +3319,11 @@ EMITTER_OPCODE_TABLE( EMITTER(DOT_PRODUCT_3_V128, MATCH(I, V128<>, V128<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { // http://msdn.microsoft.com/en-us/library/bb514054(v=vs.90).aspx - // TODO(benvanik): verify ordering - // TODO(benvanik): apparently this is very slow - find alternative? - e.vdpps(i.dest, i.src1, i.src2, B01110001); + EmitCommutativeBinaryXmmOp(e, i, + [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + // TODO(benvanik): apparently this is very slow - find alternative? + e.vdpps(dest, src1, src2, B01110001); + }); } }; EMITTER_OPCODE_TABLE( @@ -3290,9 +3337,11 @@ EMITTER_OPCODE_TABLE( EMITTER(DOT_PRODUCT_4_V128, MATCH(I, V128<>, V128<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { // http://msdn.microsoft.com/en-us/library/bb514054(v=vs.90).aspx - // TODO(benvanik): verify ordering - // TODO(benvanik): apparently this is very slow - find alternative? - e.vdpps(i.dest, i.src1, i.src2, B11110001); + EmitCommutativeBinaryXmmOp(e, i, + [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + // TODO(benvanik): apparently this is very slow - find alternative? + e.vdpps(dest, src1, src2, B11110001); + }); } }; EMITTER_OPCODE_TABLE( @@ -3333,7 +3382,10 @@ EMITTER(AND_I64, MATCH(I, I64<>, I64<>>)) { }; EMITTER(AND_V128, MATCH(I, V128<>, V128<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.vpand(i.dest, i.src1, i.src2); + EmitCommutativeBinaryXmmOp(e, i, + [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + e.vpand(dest, src1, src2); + }); } }; EMITTER_OPCODE_TABLE( @@ -3378,7 +3430,10 @@ EMITTER(OR_I64, MATCH(I, I64<>, I64<>>)) { }; EMITTER(OR_V128, MATCH(I, V128<>, V128<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.vpor(i.dest, i.src1, i.src2); + EmitCommutativeBinaryXmmOp(e, i, + [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + e.vpor(dest, src1, src2); + }); } }; EMITTER_OPCODE_TABLE( @@ -3423,7 +3478,10 @@ EMITTER(XOR_I64, MATCH(I, I64<>, I64<>>)) { }; EMITTER(XOR_V128, MATCH(I, V128<>, V128<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.vpxor(i.dest, i.src1, i.src2); + EmitCommutativeBinaryXmmOp(e, i, + [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + e.vpxor(dest, src1, src2); + }); } }; EMITTER_OPCODE_TABLE(