From f7a8dc37485d9627004215cc3e97828ed649fb1e Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 23 May 2015 17:27:26 -0500 Subject: [PATCH 1/2] Change OPCODE_SPLAT instructions from SSE->AVX --- src/xenia/cpu/backend/x64/x64_sequences.cc | 24 ++++++++++------------ 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index 60410d248..194da6792 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -5551,15 +5551,14 @@ EMITTER(SPLAT_I8, MATCH(I, I8<>>)) { } else { if (i.src1.is_constant) { e.mov(e.eax, i.src1.constant()); - e.movd(e.xmm0, e.eax); + e.vmovd(e.xmm0, e.eax); } else { - e.movd(e.xmm0, i.src1.reg().cvt32()); + e.vmovd(e.xmm0, i.src1.reg().cvt32()); } - // Credits: VC++ compiler (i love you so much) - e.punpcklbw(e.xmm0, e.xmm0); - e.punpcklwd(e.xmm0, e.xmm0); - e.pshufd(i.dest, e.xmm0, 0); + e.vpunpcklbw(e.xmm0, e.xmm0); + e.vpunpcklwd(e.xmm0, e.xmm0); + e.vpshufd(i.dest, e.xmm0, 0); } } }; @@ -5578,14 +5577,13 @@ EMITTER(SPLAT_I16, MATCH(I, I16<>>)) { } else { if (i.src1.is_constant) { e.mov(e.eax, i.src1.constant()); - e.movd(e.xmm0, e.eax); + e.vmovd(e.xmm0, e.eax); } else { - e.movd(e.xmm0, i.src1.reg().cvt32()); + e.vmovd(e.xmm0, i.src1.reg().cvt32()); } - // Credits: VC++ compiler (i love you so much) - e.punpcklwd(e.xmm0, e.xmm0); // unpack low word data - e.pshufd(i.dest, e.xmm0, 0); + e.vpunpcklwd(e.xmm0, e.xmm0); // unpack low word data + e.vpshufd(i.dest, e.xmm0, 0); } } }; @@ -5609,7 +5607,7 @@ EMITTER(SPLAT_I32, MATCH(I, I32<>>)) { e.vmovd(e.xmm0, i.src1.reg().cvt32()); } - e.pshufd(i.dest, e.xmm0, 0); + e.vpshufd(i.dest, e.xmm0, 0); } } }; @@ -5632,7 +5630,7 @@ EMITTER(SPLAT_F32, MATCH(I, F32<>>)) { e.vmovd(i.dest, i.src1.reg().cvt32()); } - e.shufps(i.dest, i.dest, 0); + e.vshufps(i.dest, i.dest, i.dest, 0); } } }; From a4b950f1e981fe184a92051089d49ee3b5adc120 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 23 May 2015 18:14:07 -0500 Subject: [PATCH 2/2] Fix vmaddfp test --- src/xenia/cpu/frontend/test/instr_vmaddfp.s | 33 +++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/src/xenia/cpu/frontend/test/instr_vmaddfp.s b/src/xenia/cpu/frontend/test/instr_vmaddfp.s index f943abe7e..74005af4c 100644 --- a/src/xenia/cpu/frontend/test/instr_vmaddfp.s +++ b/src/xenia/cpu/frontend/test/instr_vmaddfp.s @@ -3,8 +3,37 @@ test_vmaddfp_1: # 1.0, 1.5, 1.1, 1.9 vmaddfp v3, v4, v4, v4 blr - #_ REGISTER_OUT v3 [00000001, 00000001, 00000001, 00000001] - #_ REGISTER_OUT v4 [40000000, 40700000, 4013d70a, 40b051eb] + #_ REGISTER_OUT v3 [40000000, 40700000, 4013d70a, 40b051eb] + #_ REGISTER_OUT v4 [3f800000, 3fc00000, 3f8ccccd, 3ff33333] # 2.0, 3.75, 2.31, 5.51 # 40b051eb is actually 5.50999975, not 5.51? # 40b051ec is 5.51 + +test_vmaddfp_2: + #_ REGISTER_IN v4 [3f800000, 3f800000, 3f800000, 3f800000] + #_ REGISTER_IN v5 [3f800000, 3f800000, 3f800000, 3f800000] + #_ REGISTER_IN v6 [3f800000, 3f800000, 3f800000, 3f800000] + # 1.0, 1.5, 1.1, 1.9 + vmaddfp v3, v4, v5, v6 + blr + #_ REGISTER_OUT v3 [40000000, 40000000, 40000000, 40000000] + #_ REGISTER_OUT v4 [3f800000, 3f800000, 3f800000, 3f800000] + #_ REGISTER_OUT v5 [3f800000, 3f800000, 3f800000, 3f800000] + #_ REGISTER_OUT v6 [3f800000, 3f800000, 3f800000, 3f800000] + +test_vmaddfp_3: + # v4 = 5, 5, 1, 1 + # v5 = 5, 5, 1, 1 + # v6 = 1, 1, 1, 1 + #_ REGISTER_IN v4 [40a00000, 40a00000, 3f800000, 3f800000] + #_ REGISTER_IN v5 [40a00000, 40a00000, 3f800000, 3f800000] + #_ REGISTER_IN v6 [3f800000, 3f800000, 3f800000, 3f800000] + # 1.0, 1.5, 1.1, 1.9 + vmaddfp v3, v4, v5, v6 + blr + # v3 = 26.0, 26.0, 2.0, 2.0 + #_ REGISTER_OUT v3 [41d00000, 41d00000, 40000000, 40000000] + #_ REGISTER_OUT v4 [40a00000, 40a00000, 3f800000, 3f800000] + #_ REGISTER_OUT v5 [40a00000, 40a00000, 3f800000, 3f800000] + #_ REGISTER_OUT v6 [3f800000, 3f800000, 3f800000, 3f800000] + \ No newline at end of file