Merge pull request #221 from DrChat/alt_pack

Vmaddfp test fix / Change some SSE instructions to AVX
This commit is contained in:
Rick Gibbed 2015-05-23 19:55:52 -05:00
commit 89a499bb47
2 changed files with 42 additions and 15 deletions

View File

@ -5551,15 +5551,14 @@ EMITTER(SPLAT_I8, MATCH(I<OPCODE_SPLAT, V128<>, I8<>>)) {
} else { } else {
if (i.src1.is_constant) { if (i.src1.is_constant) {
e.mov(e.eax, i.src1.constant()); e.mov(e.eax, i.src1.constant());
e.movd(e.xmm0, e.eax); e.vmovd(e.xmm0, e.eax);
} else { } else {
e.movd(e.xmm0, i.src1.reg().cvt32()); e.vmovd(e.xmm0, i.src1.reg().cvt32());
} }
// Credits: VC++ compiler (i love you so much) e.vpunpcklbw(e.xmm0, e.xmm0);
e.punpcklbw(e.xmm0, e.xmm0); e.vpunpcklwd(e.xmm0, e.xmm0);
e.punpcklwd(e.xmm0, e.xmm0); e.vpshufd(i.dest, e.xmm0, 0);
e.pshufd(i.dest, e.xmm0, 0);
} }
} }
}; };
@ -5578,14 +5577,13 @@ EMITTER(SPLAT_I16, MATCH(I<OPCODE_SPLAT, V128<>, I16<>>)) {
} else { } else {
if (i.src1.is_constant) { if (i.src1.is_constant) {
e.mov(e.eax, i.src1.constant()); e.mov(e.eax, i.src1.constant());
e.movd(e.xmm0, e.eax); e.vmovd(e.xmm0, e.eax);
} else { } else {
e.movd(e.xmm0, i.src1.reg().cvt32()); e.vmovd(e.xmm0, i.src1.reg().cvt32());
} }
// Credits: VC++ compiler (i love you so much) e.vpunpcklwd(e.xmm0, e.xmm0); // unpack low word data
e.punpcklwd(e.xmm0, e.xmm0); // unpack low word data e.vpshufd(i.dest, e.xmm0, 0);
e.pshufd(i.dest, e.xmm0, 0);
} }
} }
}; };
@ -5609,7 +5607,7 @@ EMITTER(SPLAT_I32, MATCH(I<OPCODE_SPLAT, V128<>, I32<>>)) {
e.vmovd(e.xmm0, i.src1.reg().cvt32()); e.vmovd(e.xmm0, i.src1.reg().cvt32());
} }
e.pshufd(i.dest, e.xmm0, 0); e.vpshufd(i.dest, e.xmm0, 0);
} }
} }
}; };
@ -5632,7 +5630,7 @@ EMITTER(SPLAT_F32, MATCH(I<OPCODE_SPLAT, V128<>, F32<>>)) {
e.vmovd(i.dest, i.src1.reg().cvt32()); e.vmovd(i.dest, i.src1.reg().cvt32());
} }
e.shufps(i.dest, i.dest, 0); e.vshufps(i.dest, i.dest, i.dest, 0);
} }
} }
}; };

View File

@ -3,8 +3,37 @@ test_vmaddfp_1:
# 1.0, 1.5, 1.1, 1.9 # 1.0, 1.5, 1.1, 1.9
vmaddfp v3, v4, v4, v4 vmaddfp v3, v4, v4, v4
blr blr
#_ REGISTER_OUT v3 [00000001, 00000001, 00000001, 00000001] #_ REGISTER_OUT v3 [40000000, 40700000, 4013d70a, 40b051eb]
#_ REGISTER_OUT v4 [40000000, 40700000, 4013d70a, 40b051eb] #_ REGISTER_OUT v4 [3f800000, 3fc00000, 3f8ccccd, 3ff33333]
# 2.0, 3.75, 2.31, 5.51 # 2.0, 3.75, 2.31, 5.51
# 40b051eb is actually 5.50999975, not 5.51? # 40b051eb is actually 5.50999975, not 5.51?
# 40b051ec is 5.51 # 40b051ec is 5.51
test_vmaddfp_2:
#_ REGISTER_IN v4 [3f800000, 3f800000, 3f800000, 3f800000]
#_ REGISTER_IN v5 [3f800000, 3f800000, 3f800000, 3f800000]
#_ REGISTER_IN v6 [3f800000, 3f800000, 3f800000, 3f800000]
# 1.0, 1.5, 1.1, 1.9
vmaddfp v3, v4, v5, v6
blr
#_ REGISTER_OUT v3 [40000000, 40000000, 40000000, 40000000]
#_ REGISTER_OUT v4 [3f800000, 3f800000, 3f800000, 3f800000]
#_ REGISTER_OUT v5 [3f800000, 3f800000, 3f800000, 3f800000]
#_ REGISTER_OUT v6 [3f800000, 3f800000, 3f800000, 3f800000]
test_vmaddfp_3:
# v4 = 5, 5, 1, 1
# v5 = 5, 5, 1, 1
# v6 = 1, 1, 1, 1
#_ REGISTER_IN v4 [40a00000, 40a00000, 3f800000, 3f800000]
#_ REGISTER_IN v5 [40a00000, 40a00000, 3f800000, 3f800000]
#_ REGISTER_IN v6 [3f800000, 3f800000, 3f800000, 3f800000]
# 1.0, 1.5, 1.1, 1.9
vmaddfp v3, v4, v5, v6
blr
# v3 = 26.0, 26.0, 2.0, 2.0
#_ REGISTER_OUT v3 [41d00000, 41d00000, 40000000, 40000000]
#_ REGISTER_OUT v4 [40a00000, 40a00000, 3f800000, 3f800000]
#_ REGISTER_OUT v5 [40a00000, 40a00000, 3f800000, 3f800000]
#_ REGISTER_OUT v6 [3f800000, 3f800000, 3f800000, 3f800000]