From c1f9952cbdf7349b5551b8655e1123b9117dedfe Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Tue, 5 May 2015 12:00:40 -0500 Subject: [PATCH 1/2] PERMUTE_I32 Alternative instructions --- src/xenia/cpu/backend/x64/x64_sequences.cc | 29 +++++++++++++++------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index 8f4d4d4a6..2f1a39e8e 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -3793,9 +3793,6 @@ EMITTER(MUL_ADD_F32, MATCH(I, F32<>, F32<>, F32<>>)) { } } } else { - // TODO(justin): Test this - //e.DebugBreak(); - // If i.dest == i.src3, back up i.src3 so we don't overwrite it. if (i.dest == i.src3) { e.vmovss(e.xmm0, i.src3); @@ -5490,8 +5487,8 @@ EMITTER(SPLAT_I16, MATCH(I, I16<>>)) { e.vpbroadcastw(i.dest, e.xmm0); } } else { - // TODO(justin) - e.DebugBreak(); + // TODO + //e.DebugBreak(); } } }; @@ -5567,11 +5564,25 @@ EMITTER(PERMUTE_I32, MATCH(I, I32<>, V128<>, V128<>>)) { (((control >> 16) & 0x3) << 4) | (((control >> 8) & 0x3) << 2) | (((control >> 0) & 0x3) << 0); - uint32_t blend_control = - (((control >> 26) & 0x1) << 3) | - (((control >> 18) & 0x1) << 2) | - (((control >> 10) & 0x1) << 1) | + + uint32_t blend_control = 0; + if (e.cpu()->has(Xbyak::util::Cpu::tAVX2)) { + // Blender for vpblendd + blend_control = + (((control >> 26) & 0x1) << 3) | + (((control >> 18) & 0x1) << 2) | + (((control >> 10) & 0x1) << 1) | + (((control >> 2) & 0x1) << 0); + } else { + // Blender for pblendw + blend_control = + (((control >> 26) & 0x1) << 6) | + (((control >> 18) & 0x1) << 4) | + (((control >> 10) & 0x1) << 2) | (((control >> 2) & 0x1) << 0); + blend_control |= blend_control << 1; + } + // TODO(benvanik): if src2/src3 are constants, shuffle now! Xmm src2; if (i.src2.is_constant) { From c3f1e74814308289120fc6dccf1494eb8543351a Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Tue, 5 May 2015 12:09:45 -0500 Subject: [PATCH 2/2] And actually use vpblendw --- src/xenia/cpu/backend/x64/x64_sequences.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index 2f1a39e8e..a651b91f0 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -5601,12 +5601,16 @@ EMITTER(PERMUTE_I32, MATCH(I, I32<>, V128<>, V128<>>)) { if (i.dest != src3) { e.vpshufd(i.dest, src2, src_control); e.vpshufd(e.xmm0, src3, src_control); - e.vpblendd(i.dest, e.xmm0, blend_control); // $0 = $1 $2 } else { e.vmovaps(e.xmm0, src3); e.vpshufd(i.dest, src2, src_control); e.vpshufd(e.xmm0, e.xmm0, src_control); - e.vpblendd(i.dest, e.xmm0, blend_control); + } + + if (e.cpu()->has(Xbyak::util::Cpu::tAVX2)) { + e.vpblendd(i.dest, e.xmm0, blend_control); // $0 = $1 $2 + } else { + e.vpblendw(i.dest, e.xmm0, blend_control); // $0 = $1 $2 } } else { // Permute by non-constant.