diff --git a/plugins/zzogl-pg/opengl/HostMemory.cpp b/plugins/zzogl-pg/opengl/HostMemory.cpp index caec95a8fa..a19402ddc4 100644 --- a/plugins/zzogl-pg/opengl/HostMemory.cpp +++ b/plugins/zzogl-pg/opengl/HostMemory.cpp @@ -371,7 +371,7 @@ } } -__forceinline void _TransferLocalLocal(_writePixel_0 wp, _readPixel_0 rp, u32 widthlimit) +/*__forceinline void _TransferLocalLocal(_writePixel_0 wp, _readPixel_0 rp, u32 widthlimit) { u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256; u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256; @@ -414,6 +414,96 @@ __forceinline void _TransferLocalLocal_4(_getPixelAddress_0 gsp, _getPixelAddres assert((gs.imageWnew % 8) == 0); + for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy + gs.imageHnew; ++i, ++i2) + { + for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx + gs.imageWnew; j += 8, j2 += 8) + { + // NOTE: the 2 conseq 4bit values are in NOT in the same byte + u32 read = gsp(j%2048, i%2048, gs.srcbuf.bw); + u32 write = gdp(j2%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); + + read = gsp((j+1)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+1)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); + + read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); + + read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); + + read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); + + read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); + + read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); + + read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); + } + } +}*/ + +__forceinline void _TransferLocalLocal() +{ + //ZZLog::Error_Log("TransferLocalLocal(0x%x, 0x%x)", gs.srcbuf.psm, gs.dstbuf.psm); + _writePixel_0 wp = writePixelFunction_0(gs.srcbuf.psm); + _readPixel_0 rp = readPixelFunction_0(gs.dstbuf.psm); + u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256; + u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256; + u32 widthlimit = 4; + + if (PSMT_BITMODE(gs.srcbuf.psm) == 0) widthlimit = 2; + if ((gs.imageWnew & widthlimit) != 0) return; + + for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy+gs.imageHnew; i++, i2++) + { + for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx+gs.imageWnew; j+=widthlimit, j2+=widthlimit) + { + wp(pDstBuf, j2%2048, i2%2048, + rp(pSrcBuf, j%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); + + //if (widthlimit > 1) + { + wp(pDstBuf, (j2+1)%2048, i2%2048, + rp(pSrcBuf, (j+1)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); + + if (widthlimit > 2) + { + wp(pDstBuf, (j2+2)%2048, i2%2048, + rp(pSrcBuf, (j+2)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); + + //if (widthlimit > 3) + { + wp(pDstBuf, (j2+3)%2048, i2%2048, + rp(pSrcBuf, (j+3)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); + } + } + } + } + } +} + +__forceinline void _TransferLocalLocal_4() +{ + //ZZLog::Error_Log("TransferLocalLocal_4(0x%x, 0x%x)", gs.srcbuf.psm, gs.dstbuf.psm); + _getPixelAddress_0 gsp = getPixelFunction_0(gs.srcbuf.psm); + _getPixelAddress_0 gdp = getPixelFunction_0(gs.dstbuf.psm); + u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256; + u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256; + + assert((gs.imageWnew % 8) == 0); + for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy + gs.imageHnew; ++i, ++i2) { for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx + gs.imageWnew; j += 8, j2 += 8) @@ -492,7 +582,15 @@ __forceinline void _TransferLocalLocal_4(_getPixelAddress_0 gsp, _getPixelAddres } } - switch (gs.srcbuf.psm) + if (PSMT_BITMODE(gs.srcbuf.psm) != 4) + { + _TransferLocalLocal(); + } + else + { + _TransferLocalLocal_4(); + } + /*switch (gs.srcbuf.psm) { case PSMCT32: if (gs.dstbuf.psm == PSMCT32) @@ -695,7 +793,7 @@ __forceinline void _TransferLocalLocal_4(_getPixelAddress_0 gsp, _getPixelAddres break; } break; - } + }*/ g_MemTargs.ClearRange(dststart, dstend); diff --git a/plugins/zzogl-pg/opengl/Mem.h b/plugins/zzogl-pg/opengl/Mem.h index 2cc7627c58..9f7ef546c0 100644 --- a/plugins/zzogl-pg/opengl/Mem.h +++ b/plugins/zzogl-pg/opengl/Mem.h @@ -266,6 +266,27 @@ static __forceinline u32 getPixelAddress16SZ_0(int x, int y, u32 bw) return word; } +static __forceinline _getPixelAddress_0 getPixelFunction_0(u32 psm) +{ + switch(psm) + { + case PSMCT32: return getPixelAddress32_0; + case PSMCT24: return getPixelAddress24_0; + case PSMCT16: return getPixelAddress16_0; + case PSMCT16S: return getPixelAddress16S_0; + case PSMT8: return getPixelAddress8_0; + case PSMT4: return getPixelAddress4_0; + case PSMT8H: return getPixelAddress8H_0; + case PSMT4HL: return getPixelAddress4HL_0; + case PSMT4HH: return getPixelAddress4HH_0; + case PSMT32Z: return getPixelAddress32Z_0; + case PSMT24Z: return getPixelAddress24Z_0; + case PSMT16Z: return getPixelAddress16Z_0; + case PSMT16SZ: return getPixelAddress16SZ_0; + default: return getPixelAddress32_0; + } +} + #define getPixelAddress_0(psm,x,y,bw) getPixelAddress##psm##_0(x,y,bw) #define getPixelAddress(psm,x,y,bp,bw) getPixelAddress##psm##(x,y,bp,bw) @@ -511,6 +532,26 @@ static __forceinline void writePixel16SZ_0(void* pmem, int x, int y, u32 pixel, ((u16*)pmem)[getPixelAddress16SZ_0(x, y, bw)] = pixel; } +static __forceinline _writePixel_0 writePixelFunction_0(u32 psm) +{ + switch(psm) + { + case PSMCT32: return writePixel32_0; + case PSMCT24: return writePixel24_0; + case PSMCT16: return writePixel16_0; + case PSMCT16S: return writePixel16S_0; + case PSMT8: return writePixel8_0; + case PSMT4: return writePixel4_0; + case PSMT8H: return writePixel8H_0; + case PSMT4HL: return writePixel4HL_0; + case PSMT4HH: return writePixel4HH_0; + case PSMT32Z: return writePixel32Z_0; + case PSMT24Z: return writePixel24Z_0; + case PSMT16Z: return writePixel16Z_0; + case PSMT16SZ: return writePixel16SZ_0; + default: return writePixel32_0; + } +} /////////////// @@ -589,4 +630,24 @@ static __forceinline u32 readPixel16SZ_0(const void* pmem, int x, int y, u32 bw) return ((const u16*)pmem)[getPixelAddress16SZ_0(x, y, bw)]; } +static __forceinline _readPixel_0 readPixelFunction_0(u32 psm) +{ + switch(psm) + { + case PSMCT32: return readPixel32_0; + case PSMCT24: return readPixel24_0; + case PSMCT16: return readPixel16_0; + case PSMCT16S: return readPixel16S_0; + case PSMT8: return readPixel8_0; + case PSMT4: return readPixel4_0; + case PSMT8H: return readPixel8H_0; + case PSMT4HL: return readPixel4HL_0; + case PSMT4HH: return readPixel4HH_0; + case PSMT32Z: return readPixel32Z_0; + case PSMT24Z: return readPixel24Z_0; + case PSMT16Z: return readPixel16Z_0; + case PSMT16SZ: return readPixel16SZ_0; + default: return readPixel32_0; + } +} #endif /* __MEM_H__ */