gsdx sw: factorize common draw scanline code

Ymm inherite from Xmm so it is useless to duplicate the code

Add a parameter to alltrue to test the good register
This commit is contained in:
Gregory Hainaut 2016-11-22 07:43:02 +01:00
parent 211c7745de
commit e3bfa2be88
6 changed files with 35 additions and 112 deletions

View File

@ -122,82 +122,7 @@ GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key
}
}
#if _M_SSE >= 0x501
void GSDrawScanlineCodeGenerator::modulate16(const Ymm& a, const Operand& f, int shift)
{
if(shift == 0)
{
vpmulhrsw(a, f);
}
else
{
vpsllw(a, (uint8)(shift + 1));
vpmulhw(a, f);
}
}
void GSDrawScanlineCodeGenerator::lerp16(const Ymm& a, const Ymm& b, const Ymm& f, int shift)
{
vpsubw(a, b);
modulate16(a, f, shift);
vpaddw(a, b);
}
void GSDrawScanlineCodeGenerator::lerp16_4(const Ymm& a, const Ymm& b, const Ymm& f)
{
vpsubw(a, b);
vpmullw(a, f);
vpsraw(a, 4);
vpaddw(a, b);
}
void GSDrawScanlineCodeGenerator::mix16(const Ymm& a, const Ymm& b, const Ymm& temp)
{
vpblendw(a, b, 0xaa);
}
void GSDrawScanlineCodeGenerator::clamp16(const Ymm& a, const Ymm& temp)
{
vpackuswb(a, a);
vpermq(a, a, _MM_SHUFFLE(3, 1, 2, 0)); // this sucks
vpmovzxbw(a, a);
}
void GSDrawScanlineCodeGenerator::alltrue()
{
vpmovmskb(eax, ymm7);
cmp(eax, 0xffffffff);
je("step", T_NEAR);
}
void GSDrawScanlineCodeGenerator::blend(const Ymm& a, const Ymm& b, const Ymm& mask)
{
vpand(b, mask);
vpandn(mask, a);
vpor(a, b, mask);
}
void GSDrawScanlineCodeGenerator::blendr(const Ymm& b, const Ymm& a, const Ymm& mask)
{
vpand(b, mask);
vpandn(mask, a);
vpor(b, mask);
}
void GSDrawScanlineCodeGenerator::blend8(const Ymm& a, const Ymm& b)
{
vpblendvb(a, a, b, xmm0);
}
void GSDrawScanlineCodeGenerator::blend8r(const Ymm& b, const Ymm& a)
{
vpblendvb(b, a, b, xmm0);
}
#else
void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, int shift)
void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, uint8 shift)
{
if(g_cpu.has(util::Cpu::tAVX))
{
@ -226,7 +151,7 @@ void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, int
}
}
void GSDrawScanlineCodeGenerator::lerp16(const Xmm& a, const Xmm& b, const Xmm& f, int shift)
void GSDrawScanlineCodeGenerator::lerp16(const Xmm& a, const Xmm& b, const Xmm& f, uint8 shift)
{
if(g_cpu.has(util::Cpu::tAVX))
{
@ -288,6 +213,15 @@ void GSDrawScanlineCodeGenerator::clamp16(const Xmm& a, const Xmm& temp)
if(g_cpu.has(util::Cpu::tAVX))
{
vpackuswb(a, a);
#if _M_SSE >= 0x501
// Greg: why ?
if(g_cpu.has(util::Cpu::tAVX2)) {
ASSERT(a.isYMM());
vpermq(Ymm(a.getIdx()), Ymm(a.getIdx()), _MM_SHUFFLE(3, 1, 2, 0)); // this sucks
}
#endif
vpmovzxbw(a, a);
}
else
@ -306,18 +240,20 @@ void GSDrawScanlineCodeGenerator::clamp16(const Xmm& a, const Xmm& temp)
}
}
void GSDrawScanlineCodeGenerator::alltrue()
void GSDrawScanlineCodeGenerator::alltrue(const Xmm& test)
{
uint32 mask = test.isYMM() ? 0xffffffff : 0xffff;
if(g_cpu.has(util::Cpu::tAVX))
{
vpmovmskb(eax, xmm7);
cmp(eax, 0xffff);
vpmovmskb(eax, test);
cmp(eax, mask);
je("step", T_NEAR);
}
else
{
pmovmskb(eax, xmm7);
cmp(eax, 0xffff);
pmovmskb(eax, test);
cmp(eax, mask);
je("step", T_NEAR);
}
}
@ -416,5 +352,3 @@ void GSDrawScanlineCodeGenerator::split16_2x8(const Xmm& l, const Xmm& h, const
psrlw(h, 8);
}
}
#endif

View File

@ -71,17 +71,6 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
void ReadTexel(int pixels, int mip_offset = 0);
void ReadTexel(const Ymm& dst, const Ymm& addr, uint8 i);
void modulate16(const Ymm& a, const Operand& f, int shift);
void lerp16(const Ymm& a, const Ymm& b, const Ymm& f, int shift);
void lerp16_4(const Ymm& a, const Ymm& b, const Ymm& f);
void mix16(const Ymm& a, const Ymm& b, const Ymm& temp);
void clamp16(const Ymm& a, const Ymm& temp);
void alltrue();
void blend(const Ymm& a, const Ymm& b, const Ymm& mask);
void blendr(const Ymm& b, const Ymm& a, const Ymm& mask);
void blend8(const Ymm& a, const Ymm& b);
void blend8r(const Ymm& b, const Ymm& a);
#else
void Generate_SSE();
@ -138,20 +127,20 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
void ReadTexel_AVX(int pixels, int mip_offset = 0);
void ReadTexel_AVX(const Xmm& dst, const Xmm& addr, uint8 i);
void modulate16(const Xmm& a, const Operand& f, int shift);
void lerp16(const Xmm& a, const Xmm& b, const Xmm& f, int shift);
#endif
void modulate16(const Xmm& a, const Operand& f, uint8 shift);
void lerp16(const Xmm& a, const Xmm& b, const Xmm& f, uint8 shift);
void lerp16_4(const Xmm& a, const Xmm& b, const Xmm& f);
void mix16(const Xmm& a, const Xmm& b, const Xmm& temp);
void clamp16(const Xmm& a, const Xmm& temp);
void alltrue();
void alltrue(const Xmm& test);
void blend(const Xmm& a, const Xmm& b, const Xmm& mask);
void blendr(const Xmm& b, const Xmm& a, const Xmm& mask);
void blend8(const Xmm& a, const Xmm& b);
void blend8r(const Xmm& b, const Xmm& a);
void split16_2x8(const Xmm& l, const Xmm& h, const Xmm& src);
#endif
public:
GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);

View File

@ -727,7 +727,7 @@ void GSDrawScanlineCodeGenerator::TestZ_AVX(const Xmm& temp1, const Xmm& temp2)
break;
}
alltrue();
alltrue(_test);
}
}
@ -1337,7 +1337,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha_AVX()
case AFAIL_KEEP:
// test |= t;
vpor(_test, xmm1);
alltrue();
alltrue(_test);
break;
case AFAIL_FB_ONLY:
@ -1509,7 +1509,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha_AVX()
vpor(_test, xmm1);
alltrue();
alltrue(_test);
}
void GSDrawScanlineCodeGenerator::WriteMask_AVX()

View File

@ -689,7 +689,7 @@ void GSDrawScanlineCodeGenerator::TestZ_AVX(const Xmm& temp1, const Xmm& temp2)
break;
}
alltrue();
alltrue(xmm7);
}
}
@ -2130,7 +2130,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha_AVX()
case AFAIL_KEEP:
// test |= t;
vpor(xmm7, xmm1);
alltrue();
alltrue(xmm7);
break;
case AFAIL_FB_ONLY:
@ -2313,7 +2313,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha_AVX()
vpor(xmm7, xmm1);
alltrue();
alltrue(xmm7);
}
void GSDrawScanlineCodeGenerator::WriteMask_AVX()

View File

@ -691,7 +691,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Ymm& temp1, const Ymm& temp2)
break;
}
alltrue();
alltrue(ymm7);
}
}
@ -2118,7 +2118,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha()
case AFAIL_KEEP:
// test |= t;
vpor(ymm7, ymm1);
alltrue();
alltrue(ymm7);
break;
case AFAIL_FB_ONLY:
@ -2309,7 +2309,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()
vpor(ymm7, ymm1);
alltrue();
alltrue(ymm7);
}
void GSDrawScanlineCodeGenerator::WriteMask()

View File

@ -694,7 +694,7 @@ void GSDrawScanlineCodeGenerator::TestZ_SSE(const Xmm& temp1, const Xmm& temp2)
break;
}
alltrue();
alltrue(xmm7);
}
}
@ -2162,7 +2162,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha_SSE()
case AFAIL_KEEP:
// test |= t;
por(xmm7, xmm1);
alltrue();
alltrue(xmm7);
break;
case AFAIL_FB_ONLY:
@ -2344,7 +2344,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha_SSE()
por(xmm7, xmm1);
alltrue();
alltrue(xmm7);
}
void GSDrawScanlineCodeGenerator::WriteMask_SSE()