SPU: improve SHUFB

This commit is contained in:
Nekotekina 2018-05-24 17:18:51 +03:00
parent c5709f71b3
commit 944e89058e
3 changed files with 33 additions and 28 deletions

View File

@ -972,6 +972,17 @@ public:
return a << (b & mask) | a >> ((0 - b) & mask);
}
// Select (c ? a : b)
template <typename T, typename T2>
auto select(T2 c, T a, T b)
{
static_assert(value_t<typename T2::type>::esize == 1, "select: expected bool type (first argument)");
static_assert(value_t<typename T2::type>::is_vector == value_t<typename T::type>::is_vector, "select: incompatible arguments (vectors)");
T result;
result.value = m_ir->CreateSelect(c.eval(m_ir), a.eval(m_ir), b.eval(m_ir));
return result;
}
template <typename T, typename E>
auto insert(T v, u64 i, E e)
{

View File

@ -4701,11 +4701,19 @@ void spu_recompiler::SHUFB(spu_opcode_t op)
const XmmLink& v5 = XmmAlloc();
c->movdqa(vm, XmmConst(_mm_set1_epi8(0xc0)));
// Test for (110xxxxx) and (11xxxxxx) bit values
if (utils::has_avx())
{
c->vpand(v5, vc, XmmConst(_mm_set1_epi8(0xe0)));
c->vpand(vt, vc, vm);
c->vpxor(vc, vc, XmmConst(_mm_set1_epi8(0xf)));
c->vpshufb(va, va, vc);
c->vpslld(vt, vc, 3);
c->vpcmpeqb(v5, v5, vm);
c->vpshufb(vb, vb, vc);
c->vpand(vc, vc, vm);
c->vpblendvb(vb, va, vb, vt);
c->vpcmpeqb(vt, vc, vm);
c->vpavgb(vt, vt, v5);
c->vpor(vt, vt, vb);
}
else
{
@ -4713,35 +4721,21 @@ void spu_recompiler::SHUFB(spu_opcode_t op)
c->pand(v5, XmmConst(_mm_set1_epi8(0xe0)));
c->movdqa(vt, vc);
c->pand(vt, vm);
}
c->pxor(vc, XmmConst(_mm_set1_epi8(0xf)));
c->pshufb(va, vc);
c->pshufb(vb, vc);
c->pand(vc, XmmConst(_mm_set1_epi8(0x10)));
c->pslld(vc, 3);
c->pcmpeqb(v5, vm); // If true, result should become 0xFF
c->pcmpeqb(vt, vm); // If true, result should become either 0xFF or 0x80
c->pavgb(vt, v5); // Generate result constant: AVG(0xff, 0x00) == 0x80
c->pxor(vm, vm);
c->pcmpeqb(vc, vm);
// Select result value from va or vb
if (utils::has_512())
{
c->vpternlogd(vc, va, vb, 0xca /* A?B:C */);
}
else if (utils::has_xop())
{
c->vpcmov(vc, va, vb, vc);
}
else
{
c->pcmpeqb(vm, vm);
c->pcmpgtb(vc, vm);
c->pand(va, vc);
c->pandn(vc, vb);
c->por(vc, va);
c->por(vc, va); // Select result value from va or vb
c->pavgb(vt, v5); // Generate result constant: AVG(0xff, 0x00) == 0x80
c->por(vt, vc);
}
c->por(vt, vc);
c->movdqa(SPU_OFF_128(gpr, op.rt4), vt);
}

View File

@ -2513,7 +2513,7 @@ public:
const auto cr = c ^ 0xf;
const auto a = pshufb(get_vr<u8[16]>(op.ra), cr);
const auto b = pshufb(get_vr<u8[16]>(op.rb), cr);
set_vr(op.rt4, merge(sext<u8[16]>((c & 0x10) == 0), a, b) | x);
set_vr(op.rt4, select(bitcast<s8[16]>(cr << 3) >= 0, a, b) | x);
}
void MPYA(spu_opcode_t op)