mirror of https://github.com/PCSX2/pcsx2.git
gsdx sw x64: keep the copy of top in the stack for dthe
An extra stack access by pixel won't impact perf (only 16 bits game with dithering) Allow to save 2 registers (a1/a3)
This commit is contained in:
parent
994274623b
commit
b76305a0d2
|
@ -57,6 +57,7 @@ static const int _rz_r12 = -8 * 2;
|
||||||
static const int _rz_r13 = -8 * 3;
|
static const int _rz_r13 = -8 * 3;
|
||||||
static const int _rz_r14 = -8 * 4;
|
static const int _rz_r14 = -8 * 4;
|
||||||
static const int _rz_r15 = -8 * 5;
|
static const int _rz_r15 = -8 * 5;
|
||||||
|
static const int _rz_top = -8 * 6;
|
||||||
static const int _rz_zs = -8 * 8;
|
static const int _rz_zs = -8 * 8;
|
||||||
static const int _rz_zd = -8 * 10;
|
static const int _rz_zd = -8 * 10;
|
||||||
static const int _rz_cov = -8 * 12;
|
static const int _rz_cov = -8 * 12;
|
||||||
|
@ -485,7 +486,8 @@ void GSDrawScanlineCodeGenerator::Init_AVX()
|
||||||
if(m_sel.fwrite && m_sel.fpsm == 2 && m_sel.dthe)
|
if(m_sel.fwrite && m_sel.fpsm == 2 && m_sel.dthe)
|
||||||
{
|
{
|
||||||
// On linux, a2 is edx which will be used for fzm
|
// On linux, a2 is edx which will be used for fzm
|
||||||
mov(a1, a2);
|
// In all case, it will require a mov in dthe code, so let's keep the value on the stack
|
||||||
|
mov(ptr[rsp + _rz_top], a2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1816,19 +1818,20 @@ void GSDrawScanlineCodeGenerator::WriteFrame_AVX()
|
||||||
|
|
||||||
if(m_sel.fpsm == 2 && m_sel.dthe)
|
if(m_sel.fpsm == 2 && m_sel.dthe)
|
||||||
{
|
{
|
||||||
mov(a3, _rip_global(dimx));
|
|
||||||
|
|
||||||
// y = (top & 3) << 5
|
// y = (top & 3) << 5
|
||||||
|
|
||||||
mov(eax, a1.cvt32());
|
mov(eax, ptr[rsp + _rz_top]);
|
||||||
and(eax, 3);
|
and(eax, 3);
|
||||||
shl(eax, 5);
|
shl(eax, 5);
|
||||||
|
|
||||||
// rb = rb.add16(m_global.dimx[0 + y]);
|
// rb = rb.add16(m_global.dimx[0 + y]);
|
||||||
// ga = ga.add16(m_global.dimx[1 + y]);
|
// ga = ga.add16(m_global.dimx[1 + y]);
|
||||||
|
|
||||||
vpaddw(xmm2, ptr[a3 + rax + sizeof(GSVector4i) * 0]);
|
add(rax, _rip_global(dimx));
|
||||||
vpaddw(xmm3, ptr[a3 + rax + sizeof(GSVector4i) * 1]);
|
|
||||||
|
vpaddw(xmm2, ptr[rax + sizeof(GSVector4i) * 0]);
|
||||||
|
vpaddw(xmm3, ptr[rax + sizeof(GSVector4i) * 1]);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(m_sel.colclamp == 0)
|
if(m_sel.colclamp == 0)
|
||||||
|
|
Loading…
Reference in New Issue