gsdx sw x64: keep the copy of top in the stack for dthe

An extra stack access by pixel won't impact perf (only 16 bits game with dithering)
Allow to save 2 registers (a1/a3)
This commit is contained in:
Gregory Hainaut 2016-11-20 19:05:37 +01:00
parent 994274623b
commit b76305a0d2
1 changed files with 9 additions and 6 deletions

View File

@ -57,6 +57,7 @@ static const int _rz_r12 = -8 * 2;
static const int _rz_r13 = -8 * 3;
static const int _rz_r14 = -8 * 4;
static const int _rz_r15 = -8 * 5;
static const int _rz_top = -8 * 6;
static const int _rz_zs = -8 * 8;
static const int _rz_zd = -8 * 10;
static const int _rz_cov = -8 * 12;
@ -485,7 +486,8 @@ void GSDrawScanlineCodeGenerator::Init_AVX()
if(m_sel.fwrite && m_sel.fpsm == 2 && m_sel.dthe)
{
// On linux, a2 is edx which will be used for fzm
mov(a1, a2);
// In all case, it will require a mov in dthe code, so let's keep the value on the stack
mov(ptr[rsp + _rz_top], a2);
}
}
@ -1816,19 +1818,20 @@ void GSDrawScanlineCodeGenerator::WriteFrame_AVX()
if(m_sel.fpsm == 2 && m_sel.dthe)
{
mov(a3, _rip_global(dimx));
// y = (top & 3) << 5
mov(eax, a1.cvt32());
mov(eax, ptr[rsp + _rz_top]);
and(eax, 3);
shl(eax, 5);
// rb = rb.add16(m_global.dimx[0 + y]);
// ga = ga.add16(m_global.dimx[1 + y]);
vpaddw(xmm2, ptr[a3 + rax + sizeof(GSVector4i) * 0]);
vpaddw(xmm3, ptr[a3 + rax + sizeof(GSVector4i) * 1]);
add(rax, _rip_global(dimx));
vpaddw(xmm2, ptr[rax + sizeof(GSVector4i) * 0]);
vpaddw(xmm3, ptr[rax + sizeof(GSVector4i) * 1]);
}
if(m_sel.colclamp == 0)