mirror of https://github.com/PCSX2/pcsx2.git
gsdx sw: factorize common draw scanline code
Ymm inherite from Xmm so it is useless to duplicate the code Add a parameter to alltrue to test the good register
This commit is contained in:
parent
211c7745de
commit
e3bfa2be88
|
@ -122,82 +122,7 @@ GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key
|
|||
}
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
|
||||
void GSDrawScanlineCodeGenerator::modulate16(const Ymm& a, const Operand& f, int shift)
|
||||
{
|
||||
if(shift == 0)
|
||||
{
|
||||
vpmulhrsw(a, f);
|
||||
}
|
||||
else
|
||||
{
|
||||
vpsllw(a, (uint8)(shift + 1));
|
||||
vpmulhw(a, f);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::lerp16(const Ymm& a, const Ymm& b, const Ymm& f, int shift)
|
||||
{
|
||||
vpsubw(a, b);
|
||||
modulate16(a, f, shift);
|
||||
vpaddw(a, b);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::lerp16_4(const Ymm& a, const Ymm& b, const Ymm& f)
|
||||
{
|
||||
vpsubw(a, b);
|
||||
vpmullw(a, f);
|
||||
vpsraw(a, 4);
|
||||
vpaddw(a, b);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::mix16(const Ymm& a, const Ymm& b, const Ymm& temp)
|
||||
{
|
||||
vpblendw(a, b, 0xaa);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::clamp16(const Ymm& a, const Ymm& temp)
|
||||
{
|
||||
vpackuswb(a, a);
|
||||
vpermq(a, a, _MM_SHUFFLE(3, 1, 2, 0)); // this sucks
|
||||
vpmovzxbw(a, a);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::alltrue()
|
||||
{
|
||||
vpmovmskb(eax, ymm7);
|
||||
cmp(eax, 0xffffffff);
|
||||
je("step", T_NEAR);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::blend(const Ymm& a, const Ymm& b, const Ymm& mask)
|
||||
{
|
||||
vpand(b, mask);
|
||||
vpandn(mask, a);
|
||||
vpor(a, b, mask);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::blendr(const Ymm& b, const Ymm& a, const Ymm& mask)
|
||||
{
|
||||
vpand(b, mask);
|
||||
vpandn(mask, a);
|
||||
vpor(b, mask);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::blend8(const Ymm& a, const Ymm& b)
|
||||
{
|
||||
vpblendvb(a, a, b, xmm0);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::blend8r(const Ymm& b, const Ymm& a)
|
||||
{
|
||||
vpblendvb(b, a, b, xmm0);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, int shift)
|
||||
void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, uint8 shift)
|
||||
{
|
||||
if(g_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
|
@ -226,7 +151,7 @@ void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, int
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::lerp16(const Xmm& a, const Xmm& b, const Xmm& f, int shift)
|
||||
void GSDrawScanlineCodeGenerator::lerp16(const Xmm& a, const Xmm& b, const Xmm& f, uint8 shift)
|
||||
{
|
||||
if(g_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
|
@ -288,6 +213,15 @@ void GSDrawScanlineCodeGenerator::clamp16(const Xmm& a, const Xmm& temp)
|
|||
if(g_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
vpackuswb(a, a);
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
// Greg: why ?
|
||||
if(g_cpu.has(util::Cpu::tAVX2)) {
|
||||
ASSERT(a.isYMM());
|
||||
vpermq(Ymm(a.getIdx()), Ymm(a.getIdx()), _MM_SHUFFLE(3, 1, 2, 0)); // this sucks
|
||||
}
|
||||
#endif
|
||||
|
||||
vpmovzxbw(a, a);
|
||||
}
|
||||
else
|
||||
|
@ -306,18 +240,20 @@ void GSDrawScanlineCodeGenerator::clamp16(const Xmm& a, const Xmm& temp)
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::alltrue()
|
||||
void GSDrawScanlineCodeGenerator::alltrue(const Xmm& test)
|
||||
{
|
||||
uint32 mask = test.isYMM() ? 0xffffffff : 0xffff;
|
||||
|
||||
if(g_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
vpmovmskb(eax, xmm7);
|
||||
cmp(eax, 0xffff);
|
||||
vpmovmskb(eax, test);
|
||||
cmp(eax, mask);
|
||||
je("step", T_NEAR);
|
||||
}
|
||||
else
|
||||
{
|
||||
pmovmskb(eax, xmm7);
|
||||
cmp(eax, 0xffff);
|
||||
pmovmskb(eax, test);
|
||||
cmp(eax, mask);
|
||||
je("step", T_NEAR);
|
||||
}
|
||||
}
|
||||
|
@ -416,5 +352,3 @@ void GSDrawScanlineCodeGenerator::split16_2x8(const Xmm& l, const Xmm& h, const
|
|||
psrlw(h, 8);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -71,17 +71,6 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
|
|||
void ReadTexel(int pixels, int mip_offset = 0);
|
||||
void ReadTexel(const Ymm& dst, const Ymm& addr, uint8 i);
|
||||
|
||||
void modulate16(const Ymm& a, const Operand& f, int shift);
|
||||
void lerp16(const Ymm& a, const Ymm& b, const Ymm& f, int shift);
|
||||
void lerp16_4(const Ymm& a, const Ymm& b, const Ymm& f);
|
||||
void mix16(const Ymm& a, const Ymm& b, const Ymm& temp);
|
||||
void clamp16(const Ymm& a, const Ymm& temp);
|
||||
void alltrue();
|
||||
void blend(const Ymm& a, const Ymm& b, const Ymm& mask);
|
||||
void blendr(const Ymm& b, const Ymm& a, const Ymm& mask);
|
||||
void blend8(const Ymm& a, const Ymm& b);
|
||||
void blend8r(const Ymm& b, const Ymm& a);
|
||||
|
||||
#else
|
||||
|
||||
void Generate_SSE();
|
||||
|
@ -138,20 +127,20 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
|
|||
void ReadTexel_AVX(int pixels, int mip_offset = 0);
|
||||
void ReadTexel_AVX(const Xmm& dst, const Xmm& addr, uint8 i);
|
||||
|
||||
void modulate16(const Xmm& a, const Operand& f, int shift);
|
||||
void lerp16(const Xmm& a, const Xmm& b, const Xmm& f, int shift);
|
||||
#endif
|
||||
|
||||
void modulate16(const Xmm& a, const Operand& f, uint8 shift);
|
||||
void lerp16(const Xmm& a, const Xmm& b, const Xmm& f, uint8 shift);
|
||||
void lerp16_4(const Xmm& a, const Xmm& b, const Xmm& f);
|
||||
void mix16(const Xmm& a, const Xmm& b, const Xmm& temp);
|
||||
void clamp16(const Xmm& a, const Xmm& temp);
|
||||
void alltrue();
|
||||
void alltrue(const Xmm& test);
|
||||
void blend(const Xmm& a, const Xmm& b, const Xmm& mask);
|
||||
void blendr(const Xmm& b, const Xmm& a, const Xmm& mask);
|
||||
void blend8(const Xmm& a, const Xmm& b);
|
||||
void blend8r(const Xmm& b, const Xmm& a);
|
||||
void split16_2x8(const Xmm& l, const Xmm& h, const Xmm& src);
|
||||
|
||||
#endif
|
||||
|
||||
public:
|
||||
GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
|
||||
|
||||
|
|
|
@ -727,7 +727,7 @@ void GSDrawScanlineCodeGenerator::TestZ_AVX(const Xmm& temp1, const Xmm& temp2)
|
|||
break;
|
||||
}
|
||||
|
||||
alltrue();
|
||||
alltrue(_test);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1337,7 +1337,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha_AVX()
|
|||
case AFAIL_KEEP:
|
||||
// test |= t;
|
||||
vpor(_test, xmm1);
|
||||
alltrue();
|
||||
alltrue(_test);
|
||||
break;
|
||||
|
||||
case AFAIL_FB_ONLY:
|
||||
|
@ -1509,7 +1509,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha_AVX()
|
|||
|
||||
vpor(_test, xmm1);
|
||||
|
||||
alltrue();
|
||||
alltrue(_test);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WriteMask_AVX()
|
||||
|
|
|
@ -689,7 +689,7 @@ void GSDrawScanlineCodeGenerator::TestZ_AVX(const Xmm& temp1, const Xmm& temp2)
|
|||
break;
|
||||
}
|
||||
|
||||
alltrue();
|
||||
alltrue(xmm7);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2130,7 +2130,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha_AVX()
|
|||
case AFAIL_KEEP:
|
||||
// test |= t;
|
||||
vpor(xmm7, xmm1);
|
||||
alltrue();
|
||||
alltrue(xmm7);
|
||||
break;
|
||||
|
||||
case AFAIL_FB_ONLY:
|
||||
|
@ -2313,7 +2313,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha_AVX()
|
|||
|
||||
vpor(xmm7, xmm1);
|
||||
|
||||
alltrue();
|
||||
alltrue(xmm7);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WriteMask_AVX()
|
||||
|
|
|
@ -691,7 +691,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Ymm& temp1, const Ymm& temp2)
|
|||
break;
|
||||
}
|
||||
|
||||
alltrue();
|
||||
alltrue(ymm7);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2118,7 +2118,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha()
|
|||
case AFAIL_KEEP:
|
||||
// test |= t;
|
||||
vpor(ymm7, ymm1);
|
||||
alltrue();
|
||||
alltrue(ymm7);
|
||||
break;
|
||||
|
||||
case AFAIL_FB_ONLY:
|
||||
|
@ -2309,7 +2309,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()
|
|||
|
||||
vpor(ymm7, ymm1);
|
||||
|
||||
alltrue();
|
||||
alltrue(ymm7);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WriteMask()
|
||||
|
|
|
@ -694,7 +694,7 @@ void GSDrawScanlineCodeGenerator::TestZ_SSE(const Xmm& temp1, const Xmm& temp2)
|
|||
break;
|
||||
}
|
||||
|
||||
alltrue();
|
||||
alltrue(xmm7);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2162,7 +2162,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha_SSE()
|
|||
case AFAIL_KEEP:
|
||||
// test |= t;
|
||||
por(xmm7, xmm1);
|
||||
alltrue();
|
||||
alltrue(xmm7);
|
||||
break;
|
||||
|
||||
case AFAIL_FB_ONLY:
|
||||
|
@ -2344,7 +2344,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha_SSE()
|
|||
|
||||
por(xmm7, xmm1);
|
||||
|
||||
alltrue();
|
||||
alltrue(xmm7);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WriteMask_SSE()
|
||||
|
|
Loading…
Reference in New Issue