GSdx: commit, commit, commit, that happens if you code in assembly.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4443 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2011-03-17 04:58:31 +00:00
parent 542a4a6747
commit bbd43d5fe6
2 changed files with 56 additions and 85 deletions

View File

@ -1375,23 +1375,7 @@ return;
// c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]);
// c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]);
for(int i = 0; i < 4; i++)
{
mov(ebx, ptr[&m_local.temp.lod.i.u32[i]]);
mov(ebx, ptr[edx + ebx * sizeof(void*)]);
ReadTexel(xmm6, xmm5, i);
ReadTexel(xmm4, xmm2, i);
}
for(int i = 0; i < 4; i++)
{
mov(ebx, ptr[&m_local.temp.lod.i.u32[i]]);
mov(ebx, ptr[edx + ebx * sizeof(void*)]);
ReadTexel(xmm1, xmm0, i);
ReadTexel(xmm5, xmm3, i);
}
ReadTexel(4, 0);
// xmm6 = c00
// xmm4 = c01
@ -1487,26 +1471,20 @@ return;
{
// GSVector4i addr00 = y0 + x0;
vpaddd(xmm2, xmm4);
vpaddd(xmm5, xmm2, xmm4);
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
for(int i = 0; i < 4; i++)
{
mov(ebx, ptr[&m_local.temp.lod.i.u32[i]]);
mov(ebx, ptr[edx + ebx * sizeof(void*)]);
ReadTexel(xmm5, xmm2, i);
}
ReadTexel(1, 0);
// GSVector4i mask = GSVector4i::x00ff();
// c[0] = c00 & mask;
// c[1] = (c00 >> 8) & mask;
vpsrlw(xmm6, xmm5, 8);
vpsllw(xmm5, 8);
vpsllw(xmm5, xmm6, 8);
vpsrlw(xmm5, 8);
vpsrlw(xmm6, 8);
}
if(m_sel.mmin == 1) return; // round-off mode
@ -1638,23 +1616,7 @@ return;
// c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]);
// c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]);
for(int i = 0; i < 4; i++)
{
mov(ebx, ptr[&m_local.temp.lod.i.u32[i]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + sizeof(void*)]);
ReadTexel(xmm6, xmm5, i);
ReadTexel(xmm4, xmm2, i);
}
for(int i = 0; i < 4; i++)
{
mov(ebx, ptr[&m_local.temp.lod.i.u32[i]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + sizeof(void*)]);
ReadTexel(xmm1, xmm0, i);
ReadTexel(xmm5, xmm3, i);
}
ReadTexel(4, 1);
// xmm6 = c00
// xmm4 = c01
@ -1750,26 +1712,20 @@ return;
{
// GSVector4i addr00 = y0 + x0;
vpaddd(xmm2, xmm4);
vpaddd(xmm5, xmm2, xmm4);
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
for(int i = 0; i < 4; i++)
{
mov(ebx, ptr[&m_local.temp.lod.i.u32[i]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + sizeof(void*)]);
ReadTexel(xmm5, xmm2, i);
}
ReadTexel(1, 1);
// GSVector4i mask = GSVector4i::x00ff();
// c[0] = c00 & mask;
// c[1] = (c00 >> 8) & mask;
vpsrlw(xmm6, xmm5, 8);
vpsllw(xmm5, 8);
vpsllw(xmm5, xmm6, 8);
vpsrlw(xmm5, 8);
vpsrlw(xmm6, 8);
}
vmovdqa(xmm0, ptr[&m_local.temp.lod.f]);
@ -2803,6 +2759,8 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
ASSERT(pixels == 1 || pixels == 4);
mip_offset *= sizeof(void*);
if(m_sel.mmin)
{
int r[] = {5, 6, 2, 4, 0, 1, 3, 7};
@ -2810,24 +2768,31 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
if(pixels == 4)
{
vmovdqa(ptr[&m_local.temp.test], xmm7);
}
for(int i = 0; i < pixels; i++)
{
mov(ebx, ptr[&m_local.temp.lod.i.u32[i]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
for(int j = 0; j < 4; j++)
{
ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
}
}
mov(ebx, ptr[&m_local.temp.lod.i.u32[j]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
for(int i = 0; i < 4; i++)
{
ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
}
}
if(pixels == 4)
{
vmovdqa(xmm5, xmm7);
vmovdqa(xmm7, ptr[&m_local.temp.test]);
}
else
{
for(int j = 0; j < 4; j++)
{
mov(ebx, ptr[&m_local.temp.lod.i.u32[j]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
ReadTexel(xmm6, xmm5, j);
}
}
}
else
{

View File

@ -945,7 +945,6 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// GSVector4i addr00 = y0 + x0;
paddd(xmm2, xmm4);
movdqa(xmm5, xmm2);
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
@ -1534,7 +1533,6 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
// GSVector4i addr00 = y0 + x0;
paddd(xmm2, xmm4);
movdqa(xmm5, xmm2);
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
@ -1634,8 +1632,8 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
pxor(xmm0, xmm0);
movdqa(xmm4, xmm2);
punpcklwd(xmm2, xmm0);
punpckhwd(xmm4, xmm0);
punpckhwd(xmm2, xmm0);
punpcklwd(xmm4, xmm0);
pslld(xmm2, m_sel.tw + 3);
// xmm0 = 0
@ -1687,7 +1685,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
// c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]);
// c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]);
ReadTexel(4, 0);
ReadTexel(4, 1);
// xmm6 = c00
// xmm4 = c01
@ -1788,12 +1786,11 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
// GSVector4i addr00 = y0 + x0;
paddd(xmm2, xmm4);
movdqa(xmm5, xmm2);
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
ReadTexel(1, 0);
ReadTexel(1, 1);
// GSVector4i mask = GSVector4i::x00ff();
@ -2917,6 +2914,8 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
ASSERT(pixels == 1 || pixels == 4);
mip_offset *= sizeof(void*);
if(m_sel.mmin)
{
#if _M_SSE >= 0x401
@ -2925,24 +2924,31 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
if(pixels == 4)
{
movdqa(ptr[&m_local.temp.test], xmm7);
}
for(int i = 0; i < pixels; i++)
{
mov(ebx, ptr[&m_local.temp.lod.i.u32[i]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
vmovdqa(ptr[&m_local.temp.test], xmm7);
for(int j = 0; j < 4; j++)
{
ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
}
}
mov(ebx, ptr[&m_local.temp.lod.i.u32[j]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
if(pixels == 4)
for(int i = 0; i < 4; i++)
{
ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
}
}
vmovdqa(xmm5, xmm7);
vmovdqa(xmm7, ptr[&m_local.temp.test]);
}
else
{
movdqa(xmm5, xmm7);
movdqa(xmm7, ptr[&m_local.temp.test]);
for(int j = 0; j < 4; j++)
{
mov(ebx, ptr[&m_local.temp.lod.i.u32[j]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
ReadTexel(xmm6, xmm5, j);
}
}
#else