From bbd43d5fe6615e5f1a0f4f4ff9bf00f884bf2213 Mon Sep 17 00:00:00 2001 From: gabest11 Date: Thu, 17 Mar 2011 04:58:31 +0000 Subject: [PATCH] GSdx: commit, commit, commit, that happens if you code in assembly. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4443 96395faa-99c1-11dd-bbfe-3dabce05a288 --- .../GSDrawScanlineCodeGenerator.x86.avx.cpp | 95 ++++++------------- .../GSdx/GSDrawScanlineCodeGenerator.x86.cpp | 46 +++++---- 2 files changed, 56 insertions(+), 85 deletions(-) diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp index d6c354049b..4fcb0fa798 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp @@ -1375,23 +1375,7 @@ return; // c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]); // c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]); - for(int i = 0; i < 4; i++) - { - mov(ebx, ptr[&m_local.temp.lod.i.u32[i]]); - mov(ebx, ptr[edx + ebx * sizeof(void*)]); - - ReadTexel(xmm6, xmm5, i); - ReadTexel(xmm4, xmm2, i); - } - - for(int i = 0; i < 4; i++) - { - mov(ebx, ptr[&m_local.temp.lod.i.u32[i]]); - mov(ebx, ptr[edx + ebx * sizeof(void*)]); - - ReadTexel(xmm1, xmm0, i); - ReadTexel(xmm5, xmm3, i); - } + ReadTexel(4, 0); // xmm6 = c00 // xmm4 = c01 @@ -1487,26 +1471,20 @@ return; { // GSVector4i addr00 = y0 + x0; - vpaddd(xmm2, xmm4); + vpaddd(xmm5, xmm2, xmm4); // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - for(int i = 0; i < 4; i++) - { - mov(ebx, ptr[&m_local.temp.lod.i.u32[i]]); - mov(ebx, ptr[edx + ebx * sizeof(void*)]); - - ReadTexel(xmm5, xmm2, i); - } + ReadTexel(1, 0); // GSVector4i mask = GSVector4i::x00ff(); // c[0] = c00 & mask; // c[1] = (c00 >> 8) & mask; - vpsrlw(xmm6, xmm5, 8); - vpsllw(xmm5, 8); + vpsllw(xmm5, xmm6, 8); vpsrlw(xmm5, 8); + vpsrlw(xmm6, 8); } if(m_sel.mmin == 1) return; // round-off mode @@ -1638,23 +1616,7 @@ return; // c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]); // c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]); - for(int i = 0; i < 4; i++) - { - mov(ebx, ptr[&m_local.temp.lod.i.u32[i]]); - mov(ebx, ptr[edx + ebx * sizeof(void*) + sizeof(void*)]); - - ReadTexel(xmm6, xmm5, i); - ReadTexel(xmm4, xmm2, i); - } - - for(int i = 0; i < 4; i++) - { - mov(ebx, ptr[&m_local.temp.lod.i.u32[i]]); - mov(ebx, ptr[edx + ebx * sizeof(void*) + sizeof(void*)]); - - ReadTexel(xmm1, xmm0, i); - ReadTexel(xmm5, xmm3, i); - } + ReadTexel(4, 1); // xmm6 = c00 // xmm4 = c01 @@ -1750,26 +1712,20 @@ return; { // GSVector4i addr00 = y0 + x0; - vpaddd(xmm2, xmm4); + vpaddd(xmm5, xmm2, xmm4); // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - for(int i = 0; i < 4; i++) - { - mov(ebx, ptr[&m_local.temp.lod.i.u32[i]]); - mov(ebx, ptr[edx + ebx * sizeof(void*) + sizeof(void*)]); - - ReadTexel(xmm5, xmm2, i); - } + ReadTexel(1, 1); // GSVector4i mask = GSVector4i::x00ff(); // c[0] = c00 & mask; // c[1] = (c00 >> 8) & mask; - vpsrlw(xmm6, xmm5, 8); - vpsllw(xmm5, 8); + vpsllw(xmm5, xmm6, 8); vpsrlw(xmm5, 8); + vpsrlw(xmm6, 8); } vmovdqa(xmm0, ptr[&m_local.temp.lod.f]); @@ -2803,6 +2759,8 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset) ASSERT(pixels == 1 || pixels == 4); + mip_offset *= sizeof(void*); + if(m_sel.mmin) { int r[] = {5, 6, 2, 4, 0, 1, 3, 7}; @@ -2810,24 +2768,31 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset) if(pixels == 4) { vmovdqa(ptr[&m_local.temp.test], xmm7); - } - - for(int i = 0; i < pixels; i++) - { - mov(ebx, ptr[&m_local.temp.lod.i.u32[i]]); - mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]); for(int j = 0; j < 4; j++) { - ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j); - } - } + mov(ebx, ptr[&m_local.temp.lod.i.u32[j]]); + mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]); + + for(int i = 0; i < 4; i++) + { + ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j); + } + } - if(pixels == 4) - { vmovdqa(xmm5, xmm7); vmovdqa(xmm7, ptr[&m_local.temp.test]); } + else + { + for(int j = 0; j < 4; j++) + { + mov(ebx, ptr[&m_local.temp.lod.i.u32[j]]); + mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]); + + ReadTexel(xmm6, xmm5, j); + } + } } else { diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp index 9bb96afacc..50ebaa0321 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp @@ -945,7 +945,6 @@ void GSDrawScanlineCodeGenerator::SampleTexture() // GSVector4i addr00 = y0 + x0; paddd(xmm2, xmm4); - movdqa(xmm5, xmm2); // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); @@ -1534,7 +1533,6 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() // GSVector4i addr00 = y0 + x0; paddd(xmm2, xmm4); - movdqa(xmm5, xmm2); // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); @@ -1634,8 +1632,8 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() pxor(xmm0, xmm0); movdqa(xmm4, xmm2); - punpcklwd(xmm2, xmm0); - punpckhwd(xmm4, xmm0); + punpckhwd(xmm2, xmm0); + punpcklwd(xmm4, xmm0); pslld(xmm2, m_sel.tw + 3); // xmm0 = 0 @@ -1687,7 +1685,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() // c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]); // c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]); - ReadTexel(4, 0); + ReadTexel(4, 1); // xmm6 = c00 // xmm4 = c01 @@ -1788,12 +1786,11 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() // GSVector4i addr00 = y0 + x0; paddd(xmm2, xmm4); - movdqa(xmm5, xmm2); // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - ReadTexel(1, 0); + ReadTexel(1, 1); // GSVector4i mask = GSVector4i::x00ff(); @@ -2917,6 +2914,8 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset) ASSERT(pixels == 1 || pixels == 4); + mip_offset *= sizeof(void*); + if(m_sel.mmin) { #if _M_SSE >= 0x401 @@ -2925,24 +2924,31 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset) if(pixels == 4) { - movdqa(ptr[&m_local.temp.test], xmm7); - } - - for(int i = 0; i < pixels; i++) - { - mov(ebx, ptr[&m_local.temp.lod.i.u32[i]]); - mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]); + vmovdqa(ptr[&m_local.temp.test], xmm7); for(int j = 0; j < 4; j++) { - ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j); - } - } + mov(ebx, ptr[&m_local.temp.lod.i.u32[j]]); + mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]); - if(pixels == 4) + for(int i = 0; i < 4; i++) + { + ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j); + } + } + + vmovdqa(xmm5, xmm7); + vmovdqa(xmm7, ptr[&m_local.temp.test]); + } + else { - movdqa(xmm5, xmm7); - movdqa(xmm7, ptr[&m_local.temp.test]); + for(int j = 0; j < 4; j++) + { + mov(ebx, ptr[&m_local.temp.lod.i.u32[j]]); + mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]); + + ReadTexel(xmm6, xmm5, j); + } } #else