Rewrite FMA mul sub/add to avoid register shuffling

Also downgrade some AVX opcodes to SSE
This commit is contained in:
Dr. Chat 2015-05-16 13:31:42 -05:00
parent 6239ff9616
commit 8e35ec0620
1 changed files with 54 additions and 52 deletions

View File

@ -3742,27 +3742,30 @@ EMITTER_OPCODE_TABLE(
// TODO(benvanik): use other forms (132/213/etc) to avoid register shuffling.
// dest could be src2 or src3 - need to ensure it's not before overwriting dest
// perhaps use other 132/213/etc
// Forms:
// - 132 -> $1 = $1 * $3 + $2
// - 213 -> $1 = $2 * $1 + $3
// - 231 -> $1 = $2 * $3 + $1
EMITTER(MUL_ADD_F32, MATCH(I<OPCODE_MUL_ADD, F32<>, F32<>, F32<>, F32<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) {
// FMA extension
if (e.IsFeatureEnabled(kX64EmitFMA)) {
if (i.dest == i.src1) {
e.vfmadd213ss(i.dest, i.src2, i.src3);
} else if (i.dest == i.src2) {
e.vfmadd213ss(i.dest, i.src1, i.src3);
} else if (i.dest == i.src3) {
e.vfmadd231ss(i.dest, i.src1, i.src2);
} else {
if (i.dest != i.src2 && i.dest != i.src3) {
e.vmovss(i.dest, i.src1);
e.vfmadd213ss(i.dest, i.src2, i.src3);
} else {
e.vmovss(e.xmm0, i.src1);
e.vfmadd213ss(e.xmm0, i.src2, i.src3);
e.vmovss(i.dest, e.xmm0);
}
// Dest not equal to anything
e.movss(i.dest, i.src1);
e.vfmadd213ss(i.dest, i.src2, i.src3);
}
} else {
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
Xmm src3 = i.src3;
if (i.dest == i.src3) {
e.vmovss(e.xmm0, i.src3);
e.movss(e.xmm0, i.src3);
src3 = e.xmm0;
}
@ -3777,21 +3780,20 @@ EMITTER(MUL_ADD_F64, MATCH(I<OPCODE_MUL_ADD, F64<>, F64<>, F64<>, F64<>>)) {
if (e.IsFeatureEnabled(kX64EmitFMA)) {
if (i.dest == i.src1) {
e.vfmadd213sd(i.dest, i.src2, i.src3);
} else if (i.dest == i.src2) {
e.vfmadd213sd(i.dest, i.src1, i.src3);
} else if (i.dest == i.src3) {
e.vfmadd231sd(i.dest, i.src1, i.src2);
} else {
if (i.dest != i.src2 && i.dest != i.src3) {
e.vmovsd(i.dest, i.src1);
e.vfmadd213sd(i.dest, i.src2, i.src3);
} else {
e.vmovsd(e.xmm0, i.src1);
e.vfmadd213sd(e.xmm0, i.src2, i.src3);
e.vmovsd(i.dest, e.xmm0);
}
// Dest not equal to anything
e.movsd(i.dest, i.src1);
e.vfmadd213sd(i.dest, i.src2, i.src3);
}
} else {
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
Xmm src3 = i.src3;
if (i.dest == i.src3) {
e.vmovsd(e.xmm0, i.src3);
e.movsd(e.xmm0, i.src3);
src3 = e.xmm0;
}
@ -3806,15 +3808,14 @@ EMITTER(MUL_ADD_V128, MATCH(I<OPCODE_MUL_ADD, V128<>, V128<>, V128<>, V128<>>))
if (e.IsFeatureEnabled(kX64EmitFMA)) {
if (i.dest == i.src1) {
e.vfmadd213ps(i.dest, i.src2, i.src3);
} else if (i.dest == i.src2) {
e.vfmadd213ps(i.dest, i.src1, i.src3);
} else if (i.dest == i.src3) {
e.vfmadd231ps(i.dest, i.src1, i.src2);
} else {
if (i.dest != i.src2 && i.dest != i.src3) {
e.vmovdqa(i.dest, i.src1);
e.vfmadd213ps(i.dest, i.src2, i.src3);
} else {
e.vmovdqa(e.xmm0, i.src1);
e.vfmadd213ps(e.xmm0, i.src2, i.src3);
e.vmovdqa(i.dest, e.xmm0);
}
// Dest not equal to anything
e.vmovqda(i.dest, i.src1);
e.vfmadd213ps(i.dest, i.src2, i.src3);
}
} else {
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
@ -3844,27 +3845,30 @@ EMITTER_OPCODE_TABLE(
// TODO(benvanik): use other forms (132/213/etc) to avoid register shuffling.
// dest could be src2 or src3 - need to ensure it's not before overwriting dest
// perhaps use other 132/213/etc
// Forms:
// - 132 -> $1 = $1 * $3 - $2
// - 213 -> $1 = $2 * $1 - $3
// - 231 -> $1 = $2 * $3 - $1
EMITTER(MUL_SUB_F32, MATCH(I<OPCODE_MUL_SUB, F32<>, F32<>, F32<>, F32<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) {
// FMA extension
if (e.IsFeatureEnabled(kX64EmitFMA)) {
if (i.dest == i.src1) {
e.vfmsub213ss(i.dest, i.src2, i.src3);
} else if (i.dest == i.src2) {
e.vfmsub213ss(i.dest, i.src1, i.src3);
} else if (i.dest == i.src3) {
e.vfmsub231ss(i.dest, i.src1, i.src2);
} else {
if (i.dest != i.src2 && i.dest != i.src3) {
e.vmovss(i.dest, i.src1);
e.vfmsub213ss(i.dest, i.src2, i.src3);
} else {
e.vmovss(e.xmm0, i.src1);
e.vfmsub213ss(e.xmm0, i.src2, i.src3);
e.vmovss(i.dest, e.xmm0);
}
// Dest not equal to anything
e.movss(i.dest, i.src1);
e.vfmsub213ss(i.dest, i.src2, i.src3);
}
} else {
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
Xmm src3 = i.src3;
if (i.dest == i.src3) {
e.vmovss(e.xmm0, i.src3);
e.movss(e.xmm0, i.src3);
src3 = e.xmm0;
}
@ -3879,21 +3883,20 @@ EMITTER(MUL_SUB_F64, MATCH(I<OPCODE_MUL_SUB, F64<>, F64<>, F64<>, F64<>>)) {
if (e.IsFeatureEnabled(kX64EmitFMA)) {
if (i.dest == i.src1) {
e.vfmsub213sd(i.dest, i.src2, i.src3);
} else if (i.dest == i.src2) {
e.vfmsub213sd(i.dest, i.src1, i.src3);
} else if (i.dest == i.src3) {
e.vfmsub231sd(i.dest, i.src1, i.src2);
} else {
if (i.dest != i.src2 && i.dest != i.src3) {
e.vmovsd(i.dest, i.src1);
e.vfmsub213sd(i.dest, i.src2, i.src3);
} else {
e.vmovsd(e.xmm0, i.src1);
e.vfmsub213sd(e.xmm0, i.src2, i.src3);
e.vmovsd(i.dest, e.xmm0);
}
// Dest not equal to anything
e.movsd(i.dest, i.src1);
e.vfmsub213sd(i.dest, i.src2, i.src3);
}
} else {
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
Xmm src3 = i.src3;
if (i.dest == i.src3) {
e.vmovsd(e.xmm0, i.src3);
e.movsd(e.xmm0, i.src3);
src3 = e.xmm0;
}
@ -3908,15 +3911,14 @@ EMITTER(MUL_SUB_V128, MATCH(I<OPCODE_MUL_SUB, V128<>, V128<>, V128<>, V128<>>))
if (e.IsFeatureEnabled(kX64EmitFMA)) {
if (i.dest == i.src1) {
e.vfmsub213ps(i.dest, i.src2, i.src3);
} else if (i.dest == i.src2) {
e.vfmsub213ps(i.dest, i.src1, i.src3);
} else if (i.dest == i.src3) {
e.vfmsub231ps(i.dest, i.src1, i.src2);
} else {
if (i.dest != i.src2 && i.dest != i.src3) {
e.vmovdqa(i.dest, i.src1);
e.vfmsub213ps(i.dest, i.src2, i.src3);
} else {
e.vmovdqa(e.xmm0, i.src1);
e.vfmsub213ps(e.xmm0, i.src2, i.src3);
e.vmovdqa(i.dest, e.xmm0);
}
// Dest not equal to anything
e.vmovdqa(i.dest, i.src1);
e.vfmsub213ps(i.dest, i.src2, i.src3);
}
} else {
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.