Constant support for a lot of vector ops.

This commit is contained in:
Ben Vanik 2014-05-27 11:56:55 -07:00
parent efa0565398
commit 16bac6d9c5
2 changed files with 157 additions and 69 deletions

View File

@ -628,6 +628,36 @@ struct SingleSequence : public Sequence<SingleSequence<SEQ, T>, T> {
} }
} }
template <typename FN>
static void EmitCommutativeBinaryXmmOp(
X64Emitter& e, const EmitArgType& i, const FN& fn) {
if (i.src1.is_constant) {
XEASSERT(!i.src2.is_constant);
e.LoadConstantXmm(e.xmm0, i.src1.constant());
fn(e, i.dest, e.xmm0, i.src2);
} else if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
fn(e, i.dest, i.src1, e.xmm0);
} else {
fn(e, i.dest, i.src1, i.src2);
}
}
template <typename FN>
static void EmitAssociativeBinaryXmmOp(
X64Emitter& e, const EmitArgType& i, const FN& fn) {
if (i.src1.is_constant) {
XEASSERT(!i.src2.is_constant);
e.LoadConstantXmm(e.xmm0, i.src1.constant());
fn(e, i.dest, e.xmm0, i.src2);
} else if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
fn(e, i.dest, i.src1, e.xmm0);
} else {
fn(e, i.dest, i.src1, i.src2);
}
}
template <typename REG_REG_FN, typename REG_CONST_FN> template <typename REG_REG_FN, typename REG_CONST_FN>
static void EmitCommutativeCompareOp( static void EmitCommutativeCompareOp(
X64Emitter& e, const EmitArgType& i, X64Emitter& e, const EmitArgType& i,

View File

@ -2272,20 +2272,23 @@ EMITTER_OPCODE_TABLE(
// ============================================================================ // ============================================================================
EMITTER(VECTOR_COMPARE_EQ_V128, MATCH(I<OPCODE_VECTOR_COMPARE_EQ, V128<>, V128<>, V128<>>)) { EMITTER(VECTOR_COMPARE_EQ_V128, MATCH(I<OPCODE_VECTOR_COMPARE_EQ, V128<>, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
switch (i.instr->flags) { EmitCommutativeBinaryXmmOp(e, i,
case INT8_TYPE: [&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
e.vpcmpeqb(i.dest, i.src1, i.src2); switch (i.instr->flags) {
break; case INT8_TYPE:
case INT16_TYPE: e.vpcmpeqb(dest, src1, src2);
e.vpcmpeqw(i.dest, i.src1, i.src2); break;
break; case INT16_TYPE:
case INT32_TYPE: e.vpcmpeqw(dest, src1, src2);
e.vpcmpeqd(i.dest, i.src1, i.src2); break;
break; case INT32_TYPE:
case FLOAT32_TYPE: e.vpcmpeqd(dest, src1, src2);
e.vcmpeqps(i.dest, i.src1, i.src2); break;
break; case FLOAT32_TYPE:
} e.vcmpeqps(dest, src1, src2);
break;
}
});
} }
}; };
EMITTER_OPCODE_TABLE( EMITTER_OPCODE_TABLE(
@ -2298,20 +2301,23 @@ EMITTER_OPCODE_TABLE(
// ============================================================================ // ============================================================================
EMITTER(VECTOR_COMPARE_SGT_V128, MATCH(I<OPCODE_VECTOR_COMPARE_SGT, V128<>, V128<>, V128<>>)) { EMITTER(VECTOR_COMPARE_SGT_V128, MATCH(I<OPCODE_VECTOR_COMPARE_SGT, V128<>, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
switch (i.instr->flags) { EmitAssociativeBinaryXmmOp(e, i,
case INT8_TYPE: [&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
e.vpcmpgtb(i.dest, i.src1, i.src2); switch (i.instr->flags) {
break; case INT8_TYPE:
case INT16_TYPE: e.vpcmpgtb(dest, src1, src2);
e.vpcmpgtw(i.dest, i.src1, i.src2); break;
break; case INT16_TYPE:
case INT32_TYPE: e.vpcmpgtw(dest, src1, src2);
e.vpcmpgtd(i.dest, i.src1, i.src2); break;
break; case INT32_TYPE:
case FLOAT32_TYPE: e.vpcmpgtd(dest, src1, src2);
e.vcmpgtps(i.dest, i.src1, i.src2); break;
break; case FLOAT32_TYPE:
} e.vcmpgtps(dest, src1, src2);
break;
}
});
} }
}; };
EMITTER_OPCODE_TABLE( EMITTER_OPCODE_TABLE(
@ -2324,26 +2330,29 @@ EMITTER_OPCODE_TABLE(
// ============================================================================ // ============================================================================
EMITTER(VECTOR_COMPARE_SGE_V128, MATCH(I<OPCODE_VECTOR_COMPARE_SGE, V128<>, V128<>, V128<>>)) { EMITTER(VECTOR_COMPARE_SGE_V128, MATCH(I<OPCODE_VECTOR_COMPARE_SGE, V128<>, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
switch (i.instr->flags) { EmitAssociativeBinaryXmmOp(e, i,
case INT8_TYPE: [&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
e.vpcmpgtb(i.dest, i.src1, i.src2); switch (i.instr->flags) {
e.vpcmpeqb(e.xmm0, i.src1, i.src2); case INT8_TYPE:
e.vpor(i.dest, e.xmm0); e.vpcmpgtb(dest, src1, src2);
break; e.vpcmpeqb(e.xmm0, src1, src2);
case INT16_TYPE: e.vpor(dest, e.xmm0);
e.vpcmpgtw(i.dest, i.src1, i.src2); break;
e.vpcmpeqw(e.xmm0, i.src1, i.src2); case INT16_TYPE:
e.vpor(i.dest, e.xmm0); e.vpcmpgtw(dest, src1, src2);
break; e.vpcmpeqw(e.xmm0, src1, src2);
case INT32_TYPE: e.vpor(dest, e.xmm0);
e.vpcmpgtd(i.dest, i.src1, i.src2); break;
e.vpcmpeqd(e.xmm0, i.src1, i.src2); case INT32_TYPE:
e.vpor(i.dest, e.xmm0); e.vpcmpgtd(dest, src1, src2);
break; e.vpcmpeqd(e.xmm0, src1, src2);
case FLOAT32_TYPE: e.vpor(dest, e.xmm0);
e.vcmpgeps(i.dest, i.src1, i.src2); break;
break; case FLOAT32_TYPE:
} e.vcmpgeps(i.dest, i.src1, i.src2);
break;
}
});
} }
}; };
EMITTER_OPCODE_TABLE( EMITTER_OPCODE_TABLE(
@ -2412,17 +2421,26 @@ EMITTER(ADD_I64, MATCH(I<OPCODE_ADD, I64<>, I64<>, I64<>>)) {
}; };
EMITTER(ADD_F32, MATCH(I<OPCODE_ADD, F32<>, F32<>, F32<>>)) { EMITTER(ADD_F32, MATCH(I<OPCODE_ADD, F32<>, F32<>, F32<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
e.vaddss(i.dest, i.src1, i.src2); EmitCommutativeBinaryXmmOp(e, i,
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
e.vaddss(dest, src1, src2);
});
} }
}; };
EMITTER(ADD_F64, MATCH(I<OPCODE_ADD, F64<>, F64<>, F64<>>)) { EMITTER(ADD_F64, MATCH(I<OPCODE_ADD, F64<>, F64<>, F64<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
e.vaddsd(i.dest, i.src1, i.src2); EmitCommutativeBinaryXmmOp(e, i,
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
e.vaddsd(dest, src1, src2);
});
} }
}; };
EMITTER(ADD_V128, MATCH(I<OPCODE_ADD, V128<>, V128<>, V128<>>)) { EMITTER(ADD_V128, MATCH(I<OPCODE_ADD, V128<>, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
e.vaddps(i.dest, i.src1, i.src2); EmitCommutativeBinaryXmmOp(e, i,
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
e.vaddps(dest, src1, src2);
});
} }
}; };
EMITTER_OPCODE_TABLE( EMITTER_OPCODE_TABLE(
@ -2569,19 +2587,28 @@ EMITTER(SUB_I64, MATCH(I<OPCODE_SUB, I64<>, I64<>, I64<>>)) {
EMITTER(SUB_F32, MATCH(I<OPCODE_SUB, F32<>, F32<>, F32<>>)) { EMITTER(SUB_F32, MATCH(I<OPCODE_SUB, F32<>, F32<>, F32<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
XEASSERT(!i.instr->flags); XEASSERT(!i.instr->flags);
e.vsubss(i.dest, i.src1, i.src2); EmitAssociativeBinaryXmmOp(e, i,
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
e.vsubss(dest, src1, src2);
});
} }
}; };
EMITTER(SUB_F64, MATCH(I<OPCODE_SUB, F64<>, F64<>, F64<>>)) { EMITTER(SUB_F64, MATCH(I<OPCODE_SUB, F64<>, F64<>, F64<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
XEASSERT(!i.instr->flags); XEASSERT(!i.instr->flags);
e.vsubsd(i.dest, i.src1, i.src2); EmitAssociativeBinaryXmmOp(e, i,
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
e.vsubsd(dest, src1, src2);
});
} }
}; };
EMITTER(SUB_V128, MATCH(I<OPCODE_SUB, V128<>, V128<>, V128<>>)) { EMITTER(SUB_V128, MATCH(I<OPCODE_SUB, V128<>, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
XEASSERT(!i.instr->flags); XEASSERT(!i.instr->flags);
e.vsubps(i.dest, i.src1, i.src2); EmitAssociativeBinaryXmmOp(e, i,
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
e.vsubps(dest, src1, src2);
});
} }
}; };
EMITTER_OPCODE_TABLE( EMITTER_OPCODE_TABLE(
@ -2682,19 +2709,28 @@ EMITTER(MUL_I64, MATCH(I<OPCODE_MUL, I64<>, I64<>, I64<>>)) {
EMITTER(MUL_F32, MATCH(I<OPCODE_MUL, F32<>, F32<>, F32<>>)) { EMITTER(MUL_F32, MATCH(I<OPCODE_MUL, F32<>, F32<>, F32<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
XEASSERT(!i.instr->flags); XEASSERT(!i.instr->flags);
e.vmulss(i.dest, i.src1, i.src2); EmitCommutativeBinaryXmmOp(e, i,
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
e.vmulss(dest, src1, src2);
});
} }
}; };
EMITTER(MUL_F64, MATCH(I<OPCODE_MUL, F64<>, F64<>, F64<>>)) { EMITTER(MUL_F64, MATCH(I<OPCODE_MUL, F64<>, F64<>, F64<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
XEASSERT(!i.instr->flags); XEASSERT(!i.instr->flags);
e.vmulsd(i.dest, i.src1, i.src2); EmitCommutativeBinaryXmmOp(e, i,
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
e.vmulsd(dest, src1, src2);
});
} }
}; };
EMITTER(MUL_V128, MATCH(I<OPCODE_MUL, V128<>, V128<>, V128<>>)) { EMITTER(MUL_V128, MATCH(I<OPCODE_MUL, V128<>, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
XEASSERT(!i.instr->flags); XEASSERT(!i.instr->flags);
e.vmulps(i.dest, i.src1, i.src2); EmitCommutativeBinaryXmmOp(e, i,
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
e.vmulps(dest, src1, src2);
});
} }
}; };
EMITTER_OPCODE_TABLE( EMITTER_OPCODE_TABLE(
@ -2969,19 +3005,28 @@ EMITTER(DIV_I64, MATCH(I<OPCODE_DIV, I64<>, I64<>, I64<>>)) {
EMITTER(DIV_F32, MATCH(I<OPCODE_DIV, F32<>, F32<>, F32<>>)) { EMITTER(DIV_F32, MATCH(I<OPCODE_DIV, F32<>, F32<>, F32<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
XEASSERT(!i.instr->flags); XEASSERT(!i.instr->flags);
e.vdivss(i.dest, i.src1, i.src2); EmitAssociativeBinaryXmmOp(e, i,
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
e.vdivss(dest, src1, src2);
});
} }
}; };
EMITTER(DIV_F64, MATCH(I<OPCODE_DIV, F64<>, F64<>, F64<>>)) { EMITTER(DIV_F64, MATCH(I<OPCODE_DIV, F64<>, F64<>, F64<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
XEASSERT(!i.instr->flags); XEASSERT(!i.instr->flags);
e.vdivsd(i.dest, i.src1, i.src2); EmitAssociativeBinaryXmmOp(e, i,
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
e.vdivsd(dest, src1, src2);
});
} }
}; };
EMITTER(DIV_V128, MATCH(I<OPCODE_DIV, V128<>, V128<>, V128<>>)) { EMITTER(DIV_V128, MATCH(I<OPCODE_DIV, V128<>, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
XEASSERT(!i.instr->flags); XEASSERT(!i.instr->flags);
e.vdivps(i.dest, i.src1, i.src2); EmitAssociativeBinaryXmmOp(e, i,
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
e.vdivps(dest, src1, src2);
});
} }
}; };
EMITTER_OPCODE_TABLE( EMITTER_OPCODE_TABLE(
@ -3274,9 +3319,11 @@ EMITTER_OPCODE_TABLE(
EMITTER(DOT_PRODUCT_3_V128, MATCH(I<OPCODE_DOT_PRODUCT_3, F32<>, V128<>, V128<>>)) { EMITTER(DOT_PRODUCT_3_V128, MATCH(I<OPCODE_DOT_PRODUCT_3, F32<>, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
// http://msdn.microsoft.com/en-us/library/bb514054(v=vs.90).aspx // http://msdn.microsoft.com/en-us/library/bb514054(v=vs.90).aspx
// TODO(benvanik): verify ordering EmitCommutativeBinaryXmmOp(e, i,
// TODO(benvanik): apparently this is very slow - find alternative? [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
e.vdpps(i.dest, i.src1, i.src2, B01110001); // TODO(benvanik): apparently this is very slow - find alternative?
e.vdpps(dest, src1, src2, B01110001);
});
} }
}; };
EMITTER_OPCODE_TABLE( EMITTER_OPCODE_TABLE(
@ -3290,9 +3337,11 @@ EMITTER_OPCODE_TABLE(
EMITTER(DOT_PRODUCT_4_V128, MATCH(I<OPCODE_DOT_PRODUCT_4, F32<>, V128<>, V128<>>)) { EMITTER(DOT_PRODUCT_4_V128, MATCH(I<OPCODE_DOT_PRODUCT_4, F32<>, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
// http://msdn.microsoft.com/en-us/library/bb514054(v=vs.90).aspx // http://msdn.microsoft.com/en-us/library/bb514054(v=vs.90).aspx
// TODO(benvanik): verify ordering EmitCommutativeBinaryXmmOp(e, i,
// TODO(benvanik): apparently this is very slow - find alternative? [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
e.vdpps(i.dest, i.src1, i.src2, B11110001); // TODO(benvanik): apparently this is very slow - find alternative?
e.vdpps(dest, src1, src2, B11110001);
});
} }
}; };
EMITTER_OPCODE_TABLE( EMITTER_OPCODE_TABLE(
@ -3333,7 +3382,10 @@ EMITTER(AND_I64, MATCH(I<OPCODE_AND, I64<>, I64<>, I64<>>)) {
}; };
EMITTER(AND_V128, MATCH(I<OPCODE_AND, V128<>, V128<>, V128<>>)) { EMITTER(AND_V128, MATCH(I<OPCODE_AND, V128<>, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
e.vpand(i.dest, i.src1, i.src2); EmitCommutativeBinaryXmmOp(e, i,
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
e.vpand(dest, src1, src2);
});
} }
}; };
EMITTER_OPCODE_TABLE( EMITTER_OPCODE_TABLE(
@ -3378,7 +3430,10 @@ EMITTER(OR_I64, MATCH(I<OPCODE_OR, I64<>, I64<>, I64<>>)) {
}; };
EMITTER(OR_V128, MATCH(I<OPCODE_OR, V128<>, V128<>, V128<>>)) { EMITTER(OR_V128, MATCH(I<OPCODE_OR, V128<>, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
e.vpor(i.dest, i.src1, i.src2); EmitCommutativeBinaryXmmOp(e, i,
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
e.vpor(dest, src1, src2);
});
} }
}; };
EMITTER_OPCODE_TABLE( EMITTER_OPCODE_TABLE(
@ -3423,7 +3478,10 @@ EMITTER(XOR_I64, MATCH(I<OPCODE_XOR, I64<>, I64<>, I64<>>)) {
}; };
EMITTER(XOR_V128, MATCH(I<OPCODE_XOR, V128<>, V128<>, V128<>>)) { EMITTER(XOR_V128, MATCH(I<OPCODE_XOR, V128<>, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
e.vpxor(i.dest, i.src1, i.src2); EmitCommutativeBinaryXmmOp(e, i,
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
e.vpxor(dest, src1, src2);
});
} }
}; };
EMITTER_OPCODE_TABLE( EMITTER_OPCODE_TABLE(