From 3143be1da1847601e4b2285e447fce0ef3990dc1 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Sat, 17 Jul 2010 01:12:33 +0000 Subject: [PATCH] zzogl-pg: Converted the TransferLocalLocal defines into inlined functions. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3508 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/zzogl-pg/opengl/HostMemory.cpp | 227 +++++++++++++------------ plugins/zzogl-pg/opengl/NewRegs.cpp | 3 +- 2 files changed, 122 insertions(+), 108 deletions(-) diff --git a/plugins/zzogl-pg/opengl/HostMemory.cpp b/plugins/zzogl-pg/opengl/HostMemory.cpp index 4c098fb82b..caec95a8fa 100644 --- a/plugins/zzogl-pg/opengl/HostMemory.cpp +++ b/plugins/zzogl-pg/opengl/HostMemory.cpp @@ -371,6 +371,89 @@ } } +__forceinline void _TransferLocalLocal(_writePixel_0 wp, _readPixel_0 rp, u32 widthlimit) +{ + u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256; + u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256; + + if ((gs.imageWnew & widthlimit) != 0) return; + assert(widthlimit <= 4); + + for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy+gs.imageHnew; i++, i2++) + { + for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx+gs.imageWnew; j+=widthlimit, j2+=widthlimit) + { + wp(pDstBuf, j2%2048, i2%2048, + rp(pSrcBuf, j%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); + + if (widthlimit > 1) + { + wp(pDstBuf, (j2+1)%2048, i2%2048, + rp(pSrcBuf, (j+1)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); + + if (widthlimit > 2) + { + wp(pDstBuf, (j2+2)%2048, i2%2048, + rp(pSrcBuf, (j+2)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); + + if (widthlimit > 3) + { + wp(pDstBuf, (j2+3)%2048, i2%2048, + rp(pSrcBuf, (j+3)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); + } + } + } + } + } +} + +__forceinline void _TransferLocalLocal_4(_getPixelAddress_0 gsp, _getPixelAddress_0 gdp) +{ + u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256; + u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256; + + assert((gs.imageWnew % 8) == 0); + + for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy + gs.imageHnew; ++i, ++i2) + { + for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx + gs.imageWnew; j += 8, j2 += 8) + { + /* NOTE: the 2 conseq 4bit values are in NOT in the same byte */ + u32 read = gsp(j%2048, i%2048, gs.srcbuf.bw); + u32 write = gdp(j2%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); + + read = gsp((j+1)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+1)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); + + read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); + + read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); + + read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); + + read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); + + read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); + + read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); + } + } +} + // dir depends on trxpos.dirx & trxpos.diry void TransferLocalLocal() { @@ -408,98 +491,28 @@ //(*it)->status |= CRenderTarget::TS_NeedUpdate; } } - - u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256; - u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256; - - #define TRANSFERLOCALLOCAL(srcpsm, dstpsm, widthlimit) { \ - if( (gs.imageWnew&widthlimit)!=0 ) break; \ - assert( (gs.imageWnew&widthlimit)==0 && widthlimit <= 4); \ - for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy+gs.imageHnew; i++, i2++) { \ - for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx+gs.imageWnew; j+=widthlimit, j2+=widthlimit) { \ - \ - writePixel##dstpsm##_0(pDstBuf, j2%2048, i2%2048, \ - readPixel##srcpsm##_0(pSrcBuf, j%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); \ - \ - if( widthlimit > 1 ) { \ - writePixel##dstpsm##_0(pDstBuf, (j2+1)%2048, i2%2048, \ - readPixel##srcpsm##_0(pSrcBuf, (j+1)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); \ - \ - if( widthlimit > 2 ) { \ - writePixel##dstpsm##_0(pDstBuf, (j2+2)%2048, i2%2048, \ - readPixel##srcpsm##_0(pSrcBuf, (j+2)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); \ - \ - if( widthlimit > 3 ) { \ - writePixel##dstpsm##_0(pDstBuf, (j2+3)%2048, i2%2048, \ - readPixel##srcpsm##_0(pSrcBuf, (j+3)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); \ - } \ - } \ - } \ - } \ - } \ - } \ - - #define TRANSFERLOCALLOCAL_4(srcpsm, dstpsm) { \ - assert( (gs.imageWnew%8) == 0 ); \ - for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy+gs.imageHnew; ++i, ++i2) { \ - for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx+gs.imageWnew; j+=8, j2+=8) { \ - /* NOTE: the 2 conseq 4bit values are in NOT in the same byte */ \ - u32 read = getPixelAddress##srcpsm##_0(j%2048, i%2048, gs.srcbuf.bw); \ - u32 write = getPixelAddress##dstpsm##_0(j2%2048, i2%2048, gs.dstbuf.bw); \ - pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); \ - \ - read = getPixelAddress##srcpsm##_0((j+1)%2048, i%2048, gs.srcbuf.bw); \ - write = getPixelAddress##dstpsm##_0((j2+1)%2048, i2%2048, gs.dstbuf.bw); \ - pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); \ - \ - read = getPixelAddress##srcpsm##_0((j+2)%2048, i%2048, gs.srcbuf.bw); \ - write = getPixelAddress##dstpsm##_0((j2+2)%2048, i2%2048, gs.dstbuf.bw); \ - pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); \ - \ - read = getPixelAddress##srcpsm##_0((j+3)%2048, i%2048, gs.srcbuf.bw); \ - write = getPixelAddress##dstpsm##_0((j2+3)%2048, i2%2048, gs.dstbuf.bw); \ - pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); \ - \ - read = getPixelAddress##srcpsm##_0((j+2)%2048, i%2048, gs.srcbuf.bw); \ - write = getPixelAddress##dstpsm##_0((j2+2)%2048, i2%2048, gs.dstbuf.bw); \ - pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); \ - \ - read = getPixelAddress##srcpsm##_0((j+3)%2048, i%2048, gs.srcbuf.bw); \ - write = getPixelAddress##dstpsm##_0((j2+3)%2048, i2%2048, gs.dstbuf.bw); \ - pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); \ - \ - read = getPixelAddress##srcpsm##_0((j+2)%2048, i%2048, gs.srcbuf.bw); \ - write = getPixelAddress##dstpsm##_0((j2+2)%2048, i2%2048, gs.dstbuf.bw); \ - pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); \ - \ - read = getPixelAddress##srcpsm##_0((j+3)%2048, i%2048, gs.srcbuf.bw); \ - write = getPixelAddress##dstpsm##_0((j2+3)%2048, i2%2048, gs.dstbuf.bw); \ - pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); \ - } \ - } \ - } \ switch (gs.srcbuf.psm) { case PSMCT32: if (gs.dstbuf.psm == PSMCT32) { - TRANSFERLOCALLOCAL(32, 32, 2); + _TransferLocalLocal(writePixel32_0, readPixel32_0, 2); } else { - TRANSFERLOCALLOCAL(32, 32Z, 2); + _TransferLocalLocal(writePixel32_0, readPixel32Z_0, 2); } break; case PSMCT24: if (gs.dstbuf.psm == PSMCT24) { - TRANSFERLOCALLOCAL(24, 24, 4); + _TransferLocalLocal(writePixel24_0, readPixel24_0, 4); } else { - TRANSFERLOCALLOCAL(24, 24Z, 4); + _TransferLocalLocal(writePixel24_0, readPixel24Z_0, 4); } break; @@ -507,19 +520,19 @@ switch (gs.dstbuf.psm) { case PSMCT16: - TRANSFERLOCALLOCAL(16, 16, 4); + _TransferLocalLocal(writePixel16_0, readPixel16_0, 4); break; case PSMCT16S: - TRANSFERLOCALLOCAL(16, 16S, 4); + _TransferLocalLocal(writePixel16_0, readPixel16S_0, 4); break; case PSMT16Z: - TRANSFERLOCALLOCAL(16, 16Z, 4); + _TransferLocalLocal(writePixel16_0, readPixel16Z_0, 4); break; case PSMT16SZ: - TRANSFERLOCALLOCAL(16, 16SZ, 4); + _TransferLocalLocal(writePixel16_0, readPixel16SZ_0, 4); break; } break; @@ -528,19 +541,19 @@ switch (gs.dstbuf.psm) { case PSMCT16: - TRANSFERLOCALLOCAL(16S, 16, 4); + _TransferLocalLocal(writePixel16S_0, readPixel16_0, 4); break; case PSMCT16S: - TRANSFERLOCALLOCAL(16S, 16S, 4); + _TransferLocalLocal(writePixel16S_0, readPixel16S_0, 4); break; case PSMT16Z: - TRANSFERLOCALLOCAL(16S, 16Z, 4); + _TransferLocalLocal(writePixel16S_0, readPixel16Z_0, 4); break; case PSMT16SZ: - TRANSFERLOCALLOCAL(16S, 16SZ, 4); + _TransferLocalLocal(writePixel16S_0, readPixel16SZ_0, 4); break; } break; @@ -548,11 +561,11 @@ case PSMT8: if (gs.dstbuf.psm == PSMT8) { - TRANSFERLOCALLOCAL(8, 8, 4); + _TransferLocalLocal(writePixel8_0, readPixel8_0, 4); } else { - TRANSFERLOCALLOCAL(8, 8H, 4); + _TransferLocalLocal(writePixel8_0, readPixel8H_0, 4); } break; @@ -561,15 +574,15 @@ { case PSMT4: - TRANSFERLOCALLOCAL_4(4, 4); + _TransferLocalLocal_4(getPixelAddress4_0, getPixelAddress4_0); break; case PSMT4HL: - TRANSFERLOCALLOCAL_4(4, 4HL); + _TransferLocalLocal_4(getPixelAddress4_0, getPixelAddress4HL_0); break; case PSMT4HH: - TRANSFERLOCALLOCAL_4(4, 4HH); + _TransferLocalLocal_4(getPixelAddress4_0, getPixelAddress4HH_0); break; } break; @@ -577,11 +590,11 @@ case PSMT8H: if (gs.dstbuf.psm == PSMT8) { - TRANSFERLOCALLOCAL(8H, 8, 4); + _TransferLocalLocal(writePixel8H_0, readPixel8_0, 4); } else { - TRANSFERLOCALLOCAL(8H, 8H, 4); + _TransferLocalLocal(writePixel8H_0, readPixel8H_0, 4); } break; @@ -589,15 +602,15 @@ switch (gs.dstbuf.psm) { case PSMT4: - TRANSFERLOCALLOCAL_4(4HL, 4); + _TransferLocalLocal_4(getPixelAddress4HL_0, getPixelAddress4_0); break; case PSMT4HL: - TRANSFERLOCALLOCAL_4(4HL, 4HL); + _TransferLocalLocal_4(getPixelAddress4HL_0, getPixelAddress4HL_0); break; case PSMT4HH: - TRANSFERLOCALLOCAL_4(4HL, 4HH); + _TransferLocalLocal_4(getPixelAddress4HL_0, getPixelAddress4HH_0); break; } break; @@ -606,15 +619,15 @@ switch (gs.dstbuf.psm) { case PSMT4: - TRANSFERLOCALLOCAL_4(4HH, 4); + _TransferLocalLocal_4(getPixelAddress4HH_0, getPixelAddress4_0); break; case PSMT4HL: - TRANSFERLOCALLOCAL_4(4HH, 4HL); + _TransferLocalLocal_4(getPixelAddress4HH_0, getPixelAddress4HL_0); break; case PSMT4HH: - TRANSFERLOCALLOCAL_4(4HH, 4HH); + _TransferLocalLocal_4(getPixelAddress4HH_0, getPixelAddress4HH_0); break; } break; @@ -622,22 +635,22 @@ case PSMT32Z: if (gs.dstbuf.psm == PSMCT32) { - TRANSFERLOCALLOCAL(32Z, 32, 2); + _TransferLocalLocal(writePixel32Z_0, readPixel32_0, 2); } else { - TRANSFERLOCALLOCAL(32Z, 32Z, 2); + _TransferLocalLocal(writePixel32Z_0, readPixel32Z_0, 2); } break; case PSMT24Z: if (gs.dstbuf.psm == PSMCT24) { - TRANSFERLOCALLOCAL(24Z, 24, 4); + _TransferLocalLocal(writePixel24Z_0, readPixel24_0, 4); } else { - TRANSFERLOCALLOCAL(24Z, 24Z, 4); + _TransferLocalLocal(writePixel24Z_0, readPixel24Z_0, 4); } break; @@ -645,19 +658,19 @@ switch (gs.dstbuf.psm) { case PSMCT16: - TRANSFERLOCALLOCAL(16Z, 16, 4); + _TransferLocalLocal(writePixel16Z_0, readPixel16_0, 4); break; case PSMCT16S: - TRANSFERLOCALLOCAL(16Z, 16S, 4); + _TransferLocalLocal(writePixel16Z_0, readPixel16S_0, 4); break; case PSMT16Z: - TRANSFERLOCALLOCAL(16Z, 16Z, 4); + _TransferLocalLocal(writePixel16Z_0, readPixel16Z_0, 4); break; case PSMT16SZ: - TRANSFERLOCALLOCAL(16Z, 16SZ, 4); + _TransferLocalLocal(writePixel16Z_0, readPixel16SZ_0, 4); break; } break; @@ -666,19 +679,19 @@ switch (gs.dstbuf.psm) { case PSMCT16: - TRANSFERLOCALLOCAL(16SZ, 16, 4); + _TransferLocalLocal(writePixel16SZ_0, readPixel16_0, 4); break; case PSMCT16S: - TRANSFERLOCALLOCAL(16SZ, 16S, 4); + _TransferLocalLocal(writePixel16SZ_0, readPixel16S_0, 4); break; case PSMT16Z: - TRANSFERLOCALLOCAL(16SZ, 16Z, 4); + _TransferLocalLocal(writePixel16SZ_0, readPixel16Z_0, 4); break; case PSMT16SZ: - TRANSFERLOCALLOCAL(16SZ, 16SZ, 4); + _TransferLocalLocal(writePixel16SZ_0, readPixel16SZ_0, 4); break; } break; diff --git a/plugins/zzogl-pg/opengl/NewRegs.cpp b/plugins/zzogl-pg/opengl/NewRegs.cpp index b47d0ce938..2646ce066b 100644 --- a/plugins/zzogl-pg/opengl/NewRegs.cpp +++ b/plugins/zzogl-pg/opengl/NewRegs.cpp @@ -890,6 +890,7 @@ void __fastcall GIFRegHandlerTRXREG(u32* data) void __fastcall GIFRegHandlerTRXDIR(u32* data) { FUNCLOG + GIFRegTRXDIR* r = (GIFRegTRXDIR*)(data); // Oh dear... // terminate any previous transfers @@ -909,7 +910,7 @@ void __fastcall GIFRegHandlerTRXDIR(u32* data) gs.dstbuf = gs.dstbufnew; gs.trxpos = gs.trxposnew; - gs.imageTransfer = data[0] & 0x3; + gs.imageTransfer = r->XDIR; gs.imageWnew = gs.imageWtemp; gs.imageHnew = gs.imageHtemp;