From b76305a0d21a522ab560185e95b7f4f578d212bb Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sun, 20 Nov 2016 19:05:37 +0100 Subject: [PATCH] gsdx sw x64: keep the copy of top in the stack for dthe An extra stack access by pixel won't impact perf (only 16 bits game with dithering) Allow to save 2 registers (a1/a3) --- .../GSdx/GSDrawScanlineCodeGenerator.x64.avx.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x64.avx.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x64.avx.cpp index 8293599abe..cb35f22881 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x64.avx.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x64.avx.cpp @@ -57,6 +57,7 @@ static const int _rz_r12 = -8 * 2; static const int _rz_r13 = -8 * 3; static const int _rz_r14 = -8 * 4; static const int _rz_r15 = -8 * 5; +static const int _rz_top = -8 * 6; static const int _rz_zs = -8 * 8; static const int _rz_zd = -8 * 10; static const int _rz_cov = -8 * 12; @@ -485,7 +486,8 @@ void GSDrawScanlineCodeGenerator::Init_AVX() if(m_sel.fwrite && m_sel.fpsm == 2 && m_sel.dthe) { // On linux, a2 is edx which will be used for fzm - mov(a1, a2); + // In all case, it will require a mov in dthe code, so let's keep the value on the stack + mov(ptr[rsp + _rz_top], a2); } } @@ -1816,19 +1818,20 @@ void GSDrawScanlineCodeGenerator::WriteFrame_AVX() if(m_sel.fpsm == 2 && m_sel.dthe) { - mov(a3, _rip_global(dimx)); - // y = (top & 3) << 5 - mov(eax, a1.cvt32()); + mov(eax, ptr[rsp + _rz_top]); and(eax, 3); shl(eax, 5); // rb = rb.add16(m_global.dimx[0 + y]); // ga = ga.add16(m_global.dimx[1 + y]); - vpaddw(xmm2, ptr[a3 + rax + sizeof(GSVector4i) * 0]); - vpaddw(xmm3, ptr[a3 + rax + sizeof(GSVector4i) * 1]); + add(rax, _rip_global(dimx)); + + vpaddw(xmm2, ptr[rax + sizeof(GSVector4i) * 0]); + vpaddw(xmm3, ptr[rax + sizeof(GSVector4i) * 1]); + } if(m_sel.colclamp == 0)