diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.cpp index 67b8c8e93a..be479f4ec4 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.cpp @@ -122,82 +122,7 @@ GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key } } -#if _M_SSE >= 0x501 - -void GSDrawScanlineCodeGenerator::modulate16(const Ymm& a, const Operand& f, int shift) -{ - if(shift == 0) - { - vpmulhrsw(a, f); - } - else - { - vpsllw(a, (uint8)(shift + 1)); - vpmulhw(a, f); - } -} - -void GSDrawScanlineCodeGenerator::lerp16(const Ymm& a, const Ymm& b, const Ymm& f, int shift) -{ - vpsubw(a, b); - modulate16(a, f, shift); - vpaddw(a, b); -} - -void GSDrawScanlineCodeGenerator::lerp16_4(const Ymm& a, const Ymm& b, const Ymm& f) -{ - vpsubw(a, b); - vpmullw(a, f); - vpsraw(a, 4); - vpaddw(a, b); -} - -void GSDrawScanlineCodeGenerator::mix16(const Ymm& a, const Ymm& b, const Ymm& temp) -{ - vpblendw(a, b, 0xaa); -} - -void GSDrawScanlineCodeGenerator::clamp16(const Ymm& a, const Ymm& temp) -{ - vpackuswb(a, a); - vpermq(a, a, _MM_SHUFFLE(3, 1, 2, 0)); // this sucks - vpmovzxbw(a, a); -} - -void GSDrawScanlineCodeGenerator::alltrue() -{ - vpmovmskb(eax, ymm7); - cmp(eax, 0xffffffff); - je("step", T_NEAR); -} - -void GSDrawScanlineCodeGenerator::blend(const Ymm& a, const Ymm& b, const Ymm& mask) -{ - vpand(b, mask); - vpandn(mask, a); - vpor(a, b, mask); -} - -void GSDrawScanlineCodeGenerator::blendr(const Ymm& b, const Ymm& a, const Ymm& mask) -{ - vpand(b, mask); - vpandn(mask, a); - vpor(b, mask); -} - -void GSDrawScanlineCodeGenerator::blend8(const Ymm& a, const Ymm& b) -{ - vpblendvb(a, a, b, xmm0); -} - -void GSDrawScanlineCodeGenerator::blend8r(const Ymm& b, const Ymm& a) -{ - vpblendvb(b, a, b, xmm0); -} - -#else - -void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, int shift) +void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, uint8 shift) { if(g_cpu.has(util::Cpu::tAVX)) { @@ -226,7 +151,7 @@ void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, int } } -void GSDrawScanlineCodeGenerator::lerp16(const Xmm& a, const Xmm& b, const Xmm& f, int shift) +void GSDrawScanlineCodeGenerator::lerp16(const Xmm& a, const Xmm& b, const Xmm& f, uint8 shift) { if(g_cpu.has(util::Cpu::tAVX)) { @@ -288,6 +213,15 @@ void GSDrawScanlineCodeGenerator::clamp16(const Xmm& a, const Xmm& temp) if(g_cpu.has(util::Cpu::tAVX)) { vpackuswb(a, a); + +#if _M_SSE >= 0x501 + // Greg: why ? + if(g_cpu.has(util::Cpu::tAVX2)) { + ASSERT(a.isYMM()); + vpermq(Ymm(a.getIdx()), Ymm(a.getIdx()), _MM_SHUFFLE(3, 1, 2, 0)); // this sucks + } +#endif + vpmovzxbw(a, a); } else @@ -306,18 +240,20 @@ void GSDrawScanlineCodeGenerator::clamp16(const Xmm& a, const Xmm& temp) } } -void GSDrawScanlineCodeGenerator::alltrue() +void GSDrawScanlineCodeGenerator::alltrue(const Xmm& test) { + uint32 mask = test.isYMM() ? 0xffffffff : 0xffff; + if(g_cpu.has(util::Cpu::tAVX)) { - vpmovmskb(eax, xmm7); - cmp(eax, 0xffff); + vpmovmskb(eax, test); + cmp(eax, mask); je("step", T_NEAR); } else { - pmovmskb(eax, xmm7); - cmp(eax, 0xffff); + pmovmskb(eax, test); + cmp(eax, mask); je("step", T_NEAR); } } @@ -416,5 +352,3 @@ void GSDrawScanlineCodeGenerator::split16_2x8(const Xmm& l, const Xmm& h, const psrlw(h, 8); } } - -#endif diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.h b/plugins/GSdx/GSDrawScanlineCodeGenerator.h index e125b95b58..2db5658da4 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.h +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.h @@ -71,17 +71,6 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator void ReadTexel(int pixels, int mip_offset = 0); void ReadTexel(const Ymm& dst, const Ymm& addr, uint8 i); - void modulate16(const Ymm& a, const Operand& f, int shift); - void lerp16(const Ymm& a, const Ymm& b, const Ymm& f, int shift); - void lerp16_4(const Ymm& a, const Ymm& b, const Ymm& f); - void mix16(const Ymm& a, const Ymm& b, const Ymm& temp); - void clamp16(const Ymm& a, const Ymm& temp); - void alltrue(); - void blend(const Ymm& a, const Ymm& b, const Ymm& mask); - void blendr(const Ymm& b, const Ymm& a, const Ymm& mask); - void blend8(const Ymm& a, const Ymm& b); - void blend8r(const Ymm& b, const Ymm& a); - #else void Generate_SSE(); @@ -138,20 +127,20 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator void ReadTexel_AVX(int pixels, int mip_offset = 0); void ReadTexel_AVX(const Xmm& dst, const Xmm& addr, uint8 i); - void modulate16(const Xmm& a, const Operand& f, int shift); - void lerp16(const Xmm& a, const Xmm& b, const Xmm& f, int shift); + #endif + + void modulate16(const Xmm& a, const Operand& f, uint8 shift); + void lerp16(const Xmm& a, const Xmm& b, const Xmm& f, uint8 shift); void lerp16_4(const Xmm& a, const Xmm& b, const Xmm& f); void mix16(const Xmm& a, const Xmm& b, const Xmm& temp); void clamp16(const Xmm& a, const Xmm& temp); - void alltrue(); + void alltrue(const Xmm& test); void blend(const Xmm& a, const Xmm& b, const Xmm& mask); void blendr(const Xmm& b, const Xmm& a, const Xmm& mask); void blend8(const Xmm& a, const Xmm& b); void blend8r(const Xmm& b, const Xmm& a); void split16_2x8(const Xmm& l, const Xmm& h, const Xmm& src); - #endif - public: GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize); diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x64.avx.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x64.avx.cpp index fa9c3d5493..b8d0d2823a 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x64.avx.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x64.avx.cpp @@ -727,7 +727,7 @@ void GSDrawScanlineCodeGenerator::TestZ_AVX(const Xmm& temp1, const Xmm& temp2) break; } - alltrue(); + alltrue(_test); } } @@ -1337,7 +1337,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha_AVX() case AFAIL_KEEP: // test |= t; vpor(_test, xmm1); - alltrue(); + alltrue(_test); break; case AFAIL_FB_ONLY: @@ -1509,7 +1509,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha_AVX() vpor(_test, xmm1); - alltrue(); + alltrue(_test); } void GSDrawScanlineCodeGenerator::WriteMask_AVX() diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp index a2278cbad0..aabfbd88c5 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp @@ -689,7 +689,7 @@ void GSDrawScanlineCodeGenerator::TestZ_AVX(const Xmm& temp1, const Xmm& temp2) break; } - alltrue(); + alltrue(xmm7); } } @@ -2130,7 +2130,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha_AVX() case AFAIL_KEEP: // test |= t; vpor(xmm7, xmm1); - alltrue(); + alltrue(xmm7); break; case AFAIL_FB_ONLY: @@ -2313,7 +2313,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha_AVX() vpor(xmm7, xmm1); - alltrue(); + alltrue(xmm7); } void GSDrawScanlineCodeGenerator::WriteMask_AVX() diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx2.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx2.cpp index 3c5ae67268..b2be17df92 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx2.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx2.cpp @@ -691,7 +691,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Ymm& temp1, const Ymm& temp2) break; } - alltrue(); + alltrue(ymm7); } } @@ -2118,7 +2118,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha() case AFAIL_KEEP: // test |= t; vpor(ymm7, ymm1); - alltrue(); + alltrue(ymm7); break; case AFAIL_FB_ONLY: @@ -2309,7 +2309,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha() vpor(ymm7, ymm1); - alltrue(); + alltrue(ymm7); } void GSDrawScanlineCodeGenerator::WriteMask() diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp index 60ac137655..3c9f926f16 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp @@ -694,7 +694,7 @@ void GSDrawScanlineCodeGenerator::TestZ_SSE(const Xmm& temp1, const Xmm& temp2) break; } - alltrue(); + alltrue(xmm7); } } @@ -2162,7 +2162,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha_SSE() case AFAIL_KEEP: // test |= t; por(xmm7, xmm1); - alltrue(); + alltrue(xmm7); break; case AFAIL_FB_ONLY: @@ -2344,7 +2344,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha_SSE() por(xmm7, xmm1); - alltrue(); + alltrue(xmm7); } void GSDrawScanlineCodeGenerator::WriteMask_SSE()