From 4cccb9e0347a5f73852468778a8af90a342d8eac Mon Sep 17 00:00:00 2001 From: arcum42 Date: Tue, 23 Mar 2010 12:21:46 +0000 Subject: [PATCH] zzogl-pg: Refactor a bunch of Mem.cpp stuff. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2762 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2-codeblocks.workspace | 4 +- plugins/zzogl-pg/opengl/Mem.cpp | 1933 ++++++------------------- plugins/zzogl-pg/opengl/Mem.h | 34 + plugins/zzogl-pg/opengl/Mem_Swizzle.h | 8 +- plugins/zzogl-pg/opengl/x86.cpp | 6 +- plugins/zzogl-pg/opengl/x86.h | 24 +- 6 files changed, 468 insertions(+), 1541 deletions(-) diff --git a/pcsx2-codeblocks.workspace b/pcsx2-codeblocks.workspace index b1dfb8dc1c..16d92eb170 100644 --- a/pcsx2-codeblocks.workspace +++ b/pcsx2-codeblocks.workspace @@ -1,7 +1,7 @@ - + @@ -30,6 +30,6 @@ - + diff --git a/plugins/zzogl-pg/opengl/Mem.cpp b/plugins/zzogl-pg/opengl/Mem.cpp index 80836f6449..1f0fd71717 100644 --- a/plugins/zzogl-pg/opengl/Mem.cpp +++ b/plugins/zzogl-pg/opengl/Mem.cpp @@ -46,7 +46,7 @@ u8* pstart; // ------------------------ // | Y | // ------------------------ -#define DEFINE_TRANSFERLOCAL(psm, transfersize, T, widthlimit, blockbits, blockwidth, blockheight, TransSfx, SwizzleBlock) \ +#define DEFINE_TRANSFERLOCAL(psm, transfersize, T, widthlimit, blockbits, blockwidth, blockheight, TransSfx, SwizzleBlockFn) \ int TransferHostLocal##psm(const void* pbyMem, u32 nQWordSize) \ { \ assert( gs.imageTransfer == 0 ); \ @@ -55,6 +55,9 @@ int TransferHostLocal##psm(const void* pbyMem, u32 nQWordSize) \ /*const u8* pendbuf = (const u8*)pbyMem + nQWordSize*4;*/ \ tempY = gs.imageY; tempX = gs.imageX; \ const u32 TSize = sizeof(T); \ + _SwizzleBlock swizzle; \ + _SwizzleBlock Swizzle = (SwizzleBlockFn);\ + _SwizzleBlock Swizzle_u = (SwizzleBlockFn##u); \ _writePixel_0 wp = writePixel##psm##_0; \ \ const T* pbuf = (const T*)pbyMem; \ @@ -112,17 +115,19 @@ int TransferHostLocal##psm(const void* pbyMem, u32 nQWordSize) \ /* transfer aligning to blocks */ \ for(; tempY < alignedY && nSize >= area; tempY += blockheight, nSize -= area) { \ \ - if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) ) { \ - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) { \ - SwizzleBlock(pstart + getPixelAddress_0(psm,tempj, tempY, gs.dstbuf.bw)*blockbits/8, \ - (u8*)pbuf, TransPitch(pitch, transfersize)); \ - } \ + if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) ) \ + { \ + swizzle = Swizzle;\ } \ - else { \ - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) { \ - SwizzleBlock##u(pstart + getPixelAddress_0(psm,tempj, tempY, gs.dstbuf.bw)*blockbits/8, \ - (u8*)pbuf, TransPitch(pitch, transfersize)); \ - } \ + else \ + { \ + swizzle = Swizzle_u;\ + } \ + \ + for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) \ + { \ + swizzle(pstart + getPixelAddress_0(psm,tempj, tempY, gs.dstbuf.bw)*blockbits/8, \ + (u8*)pbuf, TransPitch(pitch, transfersize), 0xffffffff); \ } \ \ /* transfer the rest */ \ @@ -159,44 +164,21 @@ End: \ #define NEW_TRANSFER #ifdef NEW_TRANSFER -//DEFINE_TRANSFERLOCAL(32, u32, 2, 32, 8, 8, _, SwizzleBlock32); -int TransferHostLocal32(const void* pbyMem, u32 nQWordSize) -{ - const u32 widthlimit = 2; - const u32 blockbits = 32; - const u32 blockwidth = 8; - const u32 blockheight = 8; - const u32 TSize = sizeof(u32); - const u32 transfersize = 32; - _SwizzleBlock swizzle; - _writePixel_0 wp = writePixel32_0; - - assert( gs.imageTransfer == 0 ); - pstart = g_pbyGSMemory + gs.dstbuf.bp*256; +// Get ready for the same function 3 times. *sigh* +template +static __forceinline bool AlignOnBlockBoundry_(TransferData data, TransferFuncts fun, Point alignedPt, int& endY, const T* pbuf) +{ + bool bCanAlign = ((MOD_POW2(gs.trxpos.dx, data.blockwidth) == 0) && (gs.imageX == gs.trxpos.dx) && + (alignedPt.y > endY) && (alignedPt.x > gs.trxpos.dx)); - /*const u8* pendbuf = (const u8*)pbyMem + nQWordSize*4;*/ - tempY = gs.imageY; tempX = gs.imageX; - - const u32* pbuf = (const u32*)pbyMem; - const int tp2 = TransPitch(2, transfersize); - int nLeftOver = (nQWordSize*4*2)%tp2; - nSize = (nQWordSize*4*2)/tp2; - nSize = min(nSize, gs.imageWnew * gs.imageHnew); - - int endY = ROUND_UPPOW2(gs.imageY, blockheight); - int alignedY = ROUND_DOWNPOW2(gs.imageEndY, blockheight); - int alignedX = ROUND_DOWNPOW2(gs.imageEndX, blockwidth); - bool bAligned; - bool bCanAlign = ((MOD_POW2(gs.trxpos.dx, blockwidth) == 0) && (gs.imageX == gs.trxpos.dx) && (alignedY > endY) && (alignedX > gs.trxpos.dx)); - - if ((gs.imageEndX - gs.trxpos.dx) % widthlimit) + if ((gs.imageEndX - gs.trxpos.dx) % data.widthlimit) { /* hack */ int testwidth = (int)nSize - (gs.imageEndY - gs.imageY) * (gs.imageEndX - gs.trxpos.dx) + (gs.imageX - gs.trxpos.dx); - if((testwidth <= widthlimit) && (testwidth >= -widthlimit)) + if ((testwidth <= data.widthlimit) && (testwidth >= -data.widthlimit)) { /* don't transfer */ /*DEBUG_LOG("bad texture %s: %d %d %d\n", #psm, gs.trxpos.dx, gs.imageEndX, nQWordSize);*/ @@ -204,75 +186,277 @@ int TransferHostLocal32(const void* pbyMem, u32 nQWordSize) } bCanAlign = false; } - /* first align on block boundary */ - if ( MOD_POW2(gs.imageY, blockheight) || !bCanAlign ) + if ( MOD_POW2(gs.imageY, data.blockheight) || !bCanAlign ) { + u32 transwidth; if( !bCanAlign ) endY = gs.imageEndY; /* transfer the whole image */ else assert( endY < gs.imageEndY); /* part of alignment condition */ - if (((gs.imageEndX - gs.trxpos.dx) % widthlimit) || ((gs.imageEndX - gs.imageX) % widthlimit)) + if (((gs.imageEndX - gs.trxpos.dx) % data.widthlimit) || ((gs.imageEndX - gs.imageX) % data.widthlimit)) { /* transmit with a width of 1 */ - if (!TransmitHostLocalY_(wp, (1 + (DSTPSM == 0x14)), endY, pbuf)) goto End; + transwidth = (1 + (DSTPSM == 0x14)); } else { - if (!TransmitHostLocalY_(wp, widthlimit, endY, pbuf)) goto End; + transwidth = data.widthlimit; } - if( nSize == 0 || tempY == gs.imageEndY ) goto End; + // The only line that's different in these 3 functions. + if (!TransmitHostLocalY_(fun.wp, transwidth, endY, pbuf)) return false; + + if( nSize == 0 || tempY == gs.imageEndY ) return false; } + return true; +} + + +template +static __forceinline bool AlignOnBlockBoundry_4(TransferData data, TransferFuncts fun, Point alignedPt, int& endY, const T* pbuf) +{ + bool bCanAlign = ((MOD_POW2(gs.trxpos.dx, data.blockwidth) == 0) && (gs.imageX == gs.trxpos.dx) && + (alignedPt.y > endY) && (alignedPt.x > gs.trxpos.dx)); - //assert( MOD_POW2(tempY, blockheight) == 0 && tempX == gs.trxpos.dx); + if ((gs.imageEndX - gs.trxpos.dx) % data.widthlimit) + { + /* hack */ + int testwidth = (int)nSize - + (gs.imageEndY - gs.imageY) * (gs.imageEndX - gs.trxpos.dx) + + (gs.imageX - gs.trxpos.dx); + + if ((testwidth <= data.widthlimit) && (testwidth >= -data.widthlimit)) + { + /* don't transfer */ + /*DEBUG_LOG("bad texture %s: %d %d %d\n", #psm, gs.trxpos.dx, gs.imageEndX, nQWordSize);*/ + gs.imageTransfer = -1; + } + bCanAlign = false; + } + /* first align on block boundary */ + if ( MOD_POW2(gs.imageY, data.blockheight) || !bCanAlign ) + { + u32 transwidth; + + if( !bCanAlign ) + endY = gs.imageEndY; /* transfer the whole image */ + else + assert( endY < gs.imageEndY); /* part of alignment condition */ + + if (((gs.imageEndX - gs.trxpos.dx) % data.widthlimit) || ((gs.imageEndX - gs.imageX) % data.widthlimit)) + { + /* transmit with a width of 1 */ + transwidth = (1 + (DSTPSM == 0x14)); + } + else + { + transwidth = data.widthlimit; + } + + // The only line that's different in these 3 functions. + if (!TransmitHostLocalY_4(fun.wp, transwidth, endY, pbuf)) return false; + + if( nSize == 0 || tempY == gs.imageEndY ) return false; + } + return true; +} + + +template +static __forceinline bool AlignOnBlockBoundry_24(TransferData data, TransferFuncts fun, Point alignedPt, int& endY, const T* pbuf) +{ + bool bCanAlign = ((MOD_POW2(gs.trxpos.dx, data.blockwidth) == 0) && (gs.imageX == gs.trxpos.dx) && + (alignedPt.y > endY) && (alignedPt.x > gs.trxpos.dx)); + + if ((gs.imageEndX - gs.trxpos.dx) % data.widthlimit) + { + /* hack */ + int testwidth = (int)nSize - + (gs.imageEndY - gs.imageY) * (gs.imageEndX - gs.trxpos.dx) + + (gs.imageX - gs.trxpos.dx); + + if ((testwidth <= data.widthlimit) && (testwidth >= -data.widthlimit)) + { + /* don't transfer */ + /*DEBUG_LOG("bad texture %s: %d %d %d\n", #psm, gs.trxpos.dx, gs.imageEndX, nQWordSize);*/ + gs.imageTransfer = -1; + } + bCanAlign = false; + } + /* first align on block boundary */ + if ( MOD_POW2(gs.imageY, data.blockheight) || !bCanAlign ) + { + u32 transwidth; + + if( !bCanAlign ) + endY = gs.imageEndY; /* transfer the whole image */ + else + assert( endY < gs.imageEndY); /* part of alignment condition */ + + if (((gs.imageEndX - gs.trxpos.dx) % data.widthlimit) || ((gs.imageEndX - gs.imageX) % data.widthlimit)) + { + /* transmit with a width of 1 */ + transwidth = (1 + (DSTPSM == 0x14)); + } + else + { + transwidth = data.widthlimit; + } + + // The only line that's different in these 3 functions. + if (!TransmitHostLocalY_24(fun.wp, transwidth, endY, pbuf)) return false; + + if( nSize == 0 || tempY == gs.imageEndY ) return false; + } + return true; +} + +// Here we go again. 3 nearly identical functions. +template +static __forceinline bool TransferAligningToBlocks_(TransferData data, TransferFuncts fun, Point alignedPt, const T* pbuf) +{ + bool bAligned; + const u32 TSize = sizeof(T); + _SwizzleBlock swizzle; /* can align! */ pitch = gs.imageEndX - gs.trxpos.dx; - area = pitch * blockheight; - fracX = gs.imageEndX - alignedX; + area = pitch * data.blockheight; + fracX = gs.imageEndX - alignedPt.x; /* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */ - bAligned = !((uptr)pbuf & 0xf) && (TransPitch(pitch, transfersize) & 0xf) == 0; + bAligned = !((uptr)pbuf & 0xf) && (TransPitch(pitch, data.transfersize) & 0xf) == 0; /* transfer aligning to blocks */ - for(; tempY < alignedY && nSize >= area; tempY += blockheight, nSize -= area) + for(; tempY < alignedPt.y && nSize >= area; tempY += data.blockheight, nSize -= area) { if ( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL))) - swizzle = SwizzleBlock32; + swizzle = (fun.Swizzle); else - swizzle = SwizzleBlock32u; + swizzle = (fun.Swizzle_u); - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) + for(int tempj = gs.trxpos.dx; tempj < alignedPt.x; tempj += data.blockwidth, pbuf += TransPitch(data.blockwidth, data.transfersize)/TSize) { - u8 *temp = pstart + getPixelAddress_0(32, tempj, tempY, gs.dstbuf.bw)*blockbits/8; - swizzle(temp, (u8*)pbuf, TransPitch(pitch, transfersize), 0xffffffff); + u8 *temp = pstart + fun.gp(tempj, tempY, gs.dstbuf.bw) * data.blockbits/8; + swizzle(temp, (u8*)pbuf, TransPitch(pitch, data.transfersize), 0xffffffff); } /* transfer the rest */ - if( alignedX < gs.imageEndX ) + if( alignedPt.x < gs.imageEndX ) { - if (!TransmitHostLocalX_(wp, widthlimit, blockheight, alignedX, pbuf)) goto End; - pbuf -= TransPitch((alignedX - gs.trxpos.dx), transfersize)/TSize; + // The only line that's different in these 3 functions. + if (!TransmitHostLocalX_(fun.wp, data.widthlimit, data.blockheight, alignedPt.x, pbuf)) return false; + pbuf -= TransPitch((alignedPt.x - gs.trxpos.dx), data.transfersize)/TSize; } else { - pbuf += (blockheight - 1)* TransPitch(pitch, transfersize)/TSize; + pbuf += (data.blockheight - 1)* TransPitch(pitch, data.transfersize)/TSize; } tempX = gs.trxpos.dx; } + return true; +} + +template +static __forceinline bool TransferAligningToBlocks_4(TransferData data, TransferFuncts fun, Point alignedPt, const T* pbuf) +{ + bool bAligned; + const u32 TSize = sizeof(T); + _SwizzleBlock swizzle; - if (TransPitch(nSize, transfersize)/4 > 0) + /* can align! */ + pitch = gs.imageEndX - gs.trxpos.dx; + area = pitch * data.blockheight; + fracX = gs.imageEndX - alignedPt.x; + + /* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */ + bAligned = !((uptr)pbuf & 0xf) && (TransPitch(pitch, data.transfersize) & 0xf) == 0; + + /* transfer aligning to blocks */ + for(; tempY < alignedPt.y && nSize >= area; tempY += data.blockheight, nSize -= area) { - if (!TransmitHostLocalY_(wp, widthlimit, gs.imageEndY, pbuf)) goto End; - /* sometimes wrong sizes are sent (tekken tag) */ - assert( gs.imageTransfer == -1 || TransPitch(nSize, transfersize)/4 <= 2 ); + if ( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL))) + swizzle = (fun.Swizzle); + else + swizzle = (fun.Swizzle_u); + + for(int tempj = gs.trxpos.dx; tempj < alignedPt.x; tempj += data.blockwidth, pbuf += TransPitch(data.blockwidth, data.transfersize)/TSize) + { + u8 *temp = pstart + fun.gp(tempj, tempY, gs.dstbuf.bw) * data.blockbits/8; + swizzle(temp, (u8*)pbuf, TransPitch(pitch, data.transfersize), 0xffffffff); + } + + /* transfer the rest */ + if( alignedPt.x < gs.imageEndX ) + { + // The only line that's different in these 3 functions. + if (!TransmitHostLocalX_4(fun.wp, data.widthlimit, data.blockheight, alignedPt.x, pbuf)) return false; + pbuf -= TransPitch((alignedPt.x - gs.trxpos.dx), data.transfersize)/TSize; + } + else + { + pbuf += (data.blockheight - 1)* TransPitch(pitch, data.transfersize)/TSize; + } + + tempX = gs.trxpos.dx; } + return true; +} + +template +static __forceinline bool TransferAligningToBlocks_24(TransferData data, TransferFuncts fun, Point alignedPt, const T* pbuf) +{ + bool bAligned; + const u32 TSize = sizeof(T); + _SwizzleBlock swizzle; -End: if( tempY >= gs.imageEndY ) + /* can align! */ + pitch = gs.imageEndX - gs.trxpos.dx; + area = pitch * data.blockheight; + fracX = gs.imageEndX - alignedPt.x; + + /* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */ + bAligned = !((uptr)pbuf & 0xf) && (TransPitch(pitch, data.transfersize) & 0xf) == 0; + + /* transfer aligning to blocks */ + for(; tempY < alignedPt.y && nSize >= area; tempY += data.blockheight, nSize -= area) + { + if ( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL))) + swizzle = (fun.Swizzle); + else + swizzle = (fun.Swizzle_u); + + for(int tempj = gs.trxpos.dx; tempj < alignedPt.x; tempj += data.blockwidth, pbuf += TransPitch(data.blockwidth, data.transfersize)/TSize) + { + u8 *temp = pstart + fun.gp(tempj, tempY, gs.dstbuf.bw) * data.blockbits/8; + swizzle(temp, (u8*)pbuf, TransPitch(pitch, data.transfersize), 0xffffffff); + } + + /* transfer the rest */ + if( alignedPt.x < gs.imageEndX ) + { + // The only line that's different in these 3 functions. + if (!TransmitHostLocalX_24(fun.wp, data.widthlimit, data.blockheight, alignedPt.x, pbuf)) return false; + pbuf -= TransPitch((alignedPt.x - gs.trxpos.dx), data.transfersize)/TSize; + } + else + { + pbuf += (data.blockheight - 1)* TransPitch(pitch, data.transfersize)/TSize; + } + + tempX = gs.trxpos.dx; + } + return true; +} + +// Only one of this function, since no TransmitHostLocalX_ or TransmitHostLocalY_'s were involved. +static __forceinline int FinishTransfer(TransferData data, int nLeftOver) +{ + if( tempY >= gs.imageEndY ) { assert( gs.imageTransfer == -1 || tempY == gs.imageEndY ); gs.imageTransfer = -1; @@ -287,1520 +471,229 @@ End: if( tempY >= gs.imageEndY ) gs.imageX = tempX; } - return (nSize * TransPitch(2, transfersize) + nLeftOver)/2; + return (nSize * TransPitch(2, data.transfersize) + nLeftOver)/2; +} + +// Oh, here we go again. 3 mostly identical functions. +template +static __forceinline int RealTransfer_(TransferData data, TransferFuncts fun, const void* pbyMem, u32 nQWordSize) +{ + assert( gs.imageTransfer == 0 ); + pstart = g_pbyGSMemory + gs.dstbuf.bp*256; + + /*const u8* pendbuf = (const u8*)pbyMem + nQWordSize*4;*/ + tempY = gs.imageY; tempX = gs.imageX; + + const T* pbuf = (const T*)pbyMem; + const int tp2 = TransPitch(2, data.transfersize); + int nLeftOver = (nQWordSize*4*2)%tp2; + Point alignedPt; + nSize = (nQWordSize*4*2)/tp2; + nSize = min(nSize, gs.imageWnew * gs.imageHnew); + + int endY = ROUND_UPPOW2(gs.imageY, data.blockheight); + alignedPt.y = ROUND_DOWNPOW2(gs.imageEndY, data.blockheight); + alignedPt.x = ROUND_DOWNPOW2(gs.imageEndX, data.blockwidth); + + if (!AlignOnBlockBoundry_(data, fun, alignedPt, endY, pbuf)) return FinishTransfer(data, nLeftOver); + + if (!TransferAligningToBlocks_(data, fun, alignedPt, pbuf)) return FinishTransfer(data, nLeftOver); + + if (TransPitch(nSize, data.transfersize)/4 > 0) + { + if (!TransmitHostLocalY_(fun.wp, data.widthlimit, gs.imageEndY, pbuf)) return FinishTransfer(data, nLeftOver); + /* sometimes wrong sizes are sent (tekken tag) */ + assert( gs.imageTransfer == -1 || TransPitch(nSize, data.transfersize)/4 <= 2 ); + } + + return FinishTransfer(data, nLeftOver); +} + +template +static __forceinline int RealTransfer_4(TransferData data, TransferFuncts fun, const void* pbyMem, u32 nQWordSize) +{ + assert( gs.imageTransfer == 0 ); + pstart = g_pbyGSMemory + gs.dstbuf.bp*256; + + /*const u8* pendbuf = (const u8*)pbyMem + nQWordSize*4;*/ + tempY = gs.imageY; tempX = gs.imageX; + + const T* pbuf = (const T*)pbyMem; + const int tp2 = TransPitch(2, data.transfersize); + int nLeftOver = (nQWordSize*4*2)%tp2; + Point alignedPt; + nSize = (nQWordSize*4*2)/tp2; + nSize = min(nSize, gs.imageWnew * gs.imageHnew); + + int endY = ROUND_UPPOW2(gs.imageY, data.blockheight); + alignedPt.y = ROUND_DOWNPOW2(gs.imageEndY, data.blockheight); + alignedPt.x = ROUND_DOWNPOW2(gs.imageEndX, data.blockwidth); + + if (!AlignOnBlockBoundry_4(data, fun, alignedPt, endY, pbuf)) return FinishTransfer(data, nLeftOver); + + if (!TransferAligningToBlocks_4(data, fun, alignedPt, pbuf)) return FinishTransfer(data, nLeftOver); + + if (TransPitch(nSize, data.transfersize)/4 > 0) + { + if (!TransmitHostLocalY_4(fun.wp, data.widthlimit, gs.imageEndY, pbuf)) return FinishTransfer(data, nLeftOver); + /* sometimes wrong sizes are sent (tekken tag) */ + assert( gs.imageTransfer == -1 || TransPitch(nSize, data.transfersize)/4 <= 2 ); + } + + return FinishTransfer(data, nLeftOver); +} + +template +static __forceinline int RealTransfer_24(TransferData data, TransferFuncts fun, const void* pbyMem, u32 nQWordSize) +{ + assert( gs.imageTransfer == 0 ); + pstart = g_pbyGSMemory + gs.dstbuf.bp*256; + + /*const u8* pendbuf = (const u8*)pbyMem + nQWordSize*4;*/ + tempY = gs.imageY; tempX = gs.imageX; + + const T* pbuf = (const T*)pbyMem; + const int tp2 = TransPitch(2, data.transfersize); + int nLeftOver = (nQWordSize*4*2)%tp2; + Point alignedPt; + nSize = (nQWordSize*4*2)/tp2; + nSize = min(nSize, gs.imageWnew * gs.imageHnew); + + int endY = ROUND_UPPOW2(gs.imageY, data.blockheight); + alignedPt.y = ROUND_DOWNPOW2(gs.imageEndY, data.blockheight); + alignedPt.x = ROUND_DOWNPOW2(gs.imageEndX, data.blockwidth); + + if (!AlignOnBlockBoundry_24(data, fun, alignedPt, endY, pbuf)) return FinishTransfer(data, nLeftOver); + + if (!TransferAligningToBlocks_24(data, fun, alignedPt, pbuf)) return FinishTransfer(data, nLeftOver); + + if (TransPitch(nSize, data.transfersize)/4 > 0) + { + if (!TransmitHostLocalY_24(fun.wp, data.widthlimit, gs.imageEndY, pbuf)) return FinishTransfer(data, nLeftOver); + /* sometimes wrong sizes are sent (tekken tag) */ + assert( gs.imageTransfer == -1 || TransPitch(nSize, data.transfersize)/4 <= 2 ); + } + + return FinishTransfer(data, nLeftOver); +} + +//DEFINE_TRANSFERLOCAL(32, u32, 2, 32, 8, 8, _, SwizzleBlock32); +int TransferHostLocal32(const void* pbyMem, u32 nQWordSize) +{ + TransferData data(2,32,8,8,32); + TransferFuncts fun(writePixel32_0, getPixelAddress32_0, SwizzleBlock32, SwizzleBlock32u); + + return RealTransfer_(data, fun, pbyMem, nQWordSize); } //DEFINE_TRANSFERLOCAL(32Z, u32, 2, 32, 8, 8, _, SwizzleBlock32); int TransferHostLocal32Z(const void* pbyMem, u32 nQWordSize) { - const u32 widthlimit = 2; - const u32 blockbits = 32; - const u32 blockwidth = 8; - const u32 blockheight = 8; - const u32 TSize = sizeof(u32); - const u32 transfersize = 32; - _writePixel_0 wp = writePixel32Z_0; - - assert( gs.imageTransfer == 0 ); - pstart = g_pbyGSMemory + gs.dstbuf.bp*256; - - /*const u8* pendbuf = (const u8*)pbyMem + nQWordSize*4;*/ - tempY = gs.imageY; tempX = gs.imageX; - - const u32* pbuf = (const u32*)pbyMem; - int nLeftOver = (nQWordSize*4*2)%(TransPitch(2, transfersize)); - nSize = nQWordSize*4*2/ TransPitch(2, transfersize); - nSize = min(nSize, gs.imageWnew * gs.imageHnew); - - int endY = ROUND_UPPOW2(tempY, blockheight); - int alignedY = ROUND_DOWNPOW2(gs.imageEndY, blockheight); - int alignedX = ROUND_DOWNPOW2(gs.imageEndX, blockwidth); - bool bAligned, bCanAlign = MOD_POW2(gs.trxpos.dx, blockwidth) == 0 && (tempX == gs.trxpos.dx) && (alignedY > endY) && alignedX > gs.trxpos.dx; - - if ((gs.imageEndX-gs.trxpos.dx)%widthlimit) - { - /* hack */ - int testwidth = (int)nSize - (gs.imageEndY-tempY)*(gs.imageEndX-gs.trxpos.dx)+(tempX-gs.trxpos.dx); - if ((testwidth <= widthlimit) && (testwidth >= -widthlimit)) - { - /* don't transfer */ - /*DEBUG_LOG("bad texture %s: %d %d %d\n", #psm, gs.trxpos.dx, gs.imageEndX, nQWordSize);*/ - gs.imageTransfer = -1; - } - bCanAlign = false; - } - - /* first align on block boundary */ - if ( MOD_POW2(tempY, blockheight) || !bCanAlign ) - { - - if ( !bCanAlign ) - endY = gs.imageEndY; /* transfer the whole image */ - else - assert( endY < gs.imageEndY); /* part of alignment condition */ - - if (((gs.imageEndX-gs.trxpos.dx)%widthlimit) || ((gs.imageEndX-tempX)%widthlimit)) - { - /* transmit with a width of 1 */ - if (!TransmitHostLocalY_(wp, (1+(DSTPSM == 0x14)), endY, pbuf)) goto End; - } - else - { - if (!TransmitHostLocalY_(wp, widthlimit, endY, pbuf)) goto End; - } - - if ( nSize == 0 || tempY == gs.imageEndY ) goto End; - } - - assert( MOD_POW2(tempY, blockheight) == 0 && tempX == gs.trxpos.dx); - - /* can align! */ - pitch = gs.imageEndX-gs.trxpos.dx; - area = pitch*blockheight; - fracX = gs.imageEndX-alignedX; - - /* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */ - bAligned = !((uptr)pbuf & 0xf) && (TransPitch(pitch, transfersize)&0xf) == 0; - - /* transfer aligning to blocks */ - for(; tempY < alignedY && nSize >= area; tempY += blockheight, nSize -= area) - { - if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) ) - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock32(pstart + getPixelAddress_0(32Z,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - else - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock32u(pstart + getPixelAddress_0(32Z,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - - /* transfer the rest */ - if ( alignedX < gs.imageEndX ) - { - if (!TransmitHostLocalX_(wp, widthlimit, blockheight, alignedX, pbuf)) goto End; - pbuf -= TransPitch((alignedX - gs.trxpos.dx), transfersize)/TSize; - } - else - { - pbuf += (blockheight-1)*TransPitch(pitch, transfersize)/TSize; - } - tempX = gs.trxpos.dx; - } - - if (TransPitch(nSize, transfersize)/4 > 0 ) - { - if (!TransmitHostLocalY_(wp, widthlimit, gs.imageEndY, pbuf)) goto End; - /* sometimes wrong sizes are sent (tekken tag) */ - assert( gs.imageTransfer == -1 || TransPitch(nSize, transfersize)/4 <= 2 ); - } - - End: - if( tempY >= gs.imageEndY ) { - assert( gs.imageTransfer == -1 || tempY == gs.imageEndY ); - gs.imageTransfer = -1; - /*int start, end; - ZeroGS::GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw); - ZeroGS::g_MemTargs.ClearRange(start, end);*/ - } - else { - /* update new params */ - gs.imageY = tempY; - gs.imageX = tempX; - } - return (nSize * TransPitch(2, transfersize) + nLeftOver)/2; + TransferData data(2,32,8,8,32); + TransferFuncts fun(writePixel32Z_0, getPixelAddress32Z_0, SwizzleBlock32, SwizzleBlock32u); + + return RealTransfer_(data, fun, pbyMem, nQWordSize); } //DEFINE_TRANSFERLOCAL(24, u8, 8, 32, 8, 8, _24, SwizzleBlock24); int TransferHostLocal24(const void* pbyMem, u32 nQWordSize) { - const u32 widthlimit = 8; - const u32 blockbits = 32; - const u32 blockwidth = 8; - const u32 blockheight = 8; - const u32 TSize = sizeof(u8); - const u32 transfersize = 24; - _writePixel_0 wp = writePixel24_0; - - assert( gs.imageTransfer == 0 ); - pstart = g_pbyGSMemory + gs.dstbuf.bp*256; - - /*const u8* pendbuf = (const u8*)pbyMem + nQWordSize*4;*/ - tempY = gs.imageY; tempX = gs.imageX; - - const u8* pbuf = (const u8*)pbyMem; - int nLeftOver = (nQWordSize*4*2)%(TransPitch(2, transfersize)); - nSize = nQWordSize*4*2/ TransPitch(2, transfersize); - nSize = min(nSize, gs.imageWnew * gs.imageHnew); - - int endY = ROUND_UPPOW2(tempY, blockheight); - int alignedY = ROUND_DOWNPOW2(gs.imageEndY, blockheight); - int alignedX = ROUND_DOWNPOW2(gs.imageEndX, blockwidth); - bool bAligned, bCanAlign = MOD_POW2(gs.trxpos.dx, blockwidth) == 0 && (tempX == gs.trxpos.dx) && (alignedY > endY) && alignedX > gs.trxpos.dx; - - if ((gs.imageEndX-gs.trxpos.dx)%widthlimit) - { - /* hack */ - int testwidth = (int)nSize - (gs.imageEndY-tempY)*(gs.imageEndX-gs.trxpos.dx)+(tempX-gs.trxpos.dx); - if ((testwidth <= widthlimit) && (testwidth >= -widthlimit)) - { - /* don't transfer */ - /*DEBUG_LOG("bad texture %s: %d %d %d\n", #psm, gs.trxpos.dx, gs.imageEndX, nQWordSize);*/ - gs.imageTransfer = -1; - } - bCanAlign = false; - } - - /* first align on block boundary */ - if ( MOD_POW2(tempY, blockheight) || !bCanAlign ) - { - - if ( !bCanAlign ) - endY = gs.imageEndY; /* transfer the whole image */ - else - assert( endY < gs.imageEndY); /* part of alignment condition */ - - if (((gs.imageEndX-gs.trxpos.dx)%widthlimit) || ((gs.imageEndX-tempX)%widthlimit)) - { - /* transmit with a width of 1 */ - if (!TransmitHostLocalY_24(wp, (1+(DSTPSM == 0x14)), endY, pbuf)) goto End; - } - else - { - if (!TransmitHostLocalY_24(wp, widthlimit, endY, pbuf)) goto End; - } - - if( nSize == 0 || tempY == gs.imageEndY ) goto End; - } - - assert( MOD_POW2(tempY, blockheight) == 0 && tempX == gs.trxpos.dx); - - /* can align! */ - pitch = gs.imageEndX-gs.trxpos.dx; - area = pitch*blockheight; - fracX = gs.imageEndX-alignedX; - - /* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */ - bAligned = !((uptr)pbuf & 0xf) && (TransPitch(pitch, transfersize) & 0xf) == 0; - - /* transfer aligning to blocks */ - for(; tempY < alignedY && nSize >= area; tempY += blockheight, nSize -= area) - { - if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) ) - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock24(pstart + getPixelAddress_0(24,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - else - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock24u(pstart + getPixelAddress_0(24,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - - /* transfer the rest */ - if ( alignedX < gs.imageEndX ) - { - if (!TransmitHostLocalX_24(wp, widthlimit, blockheight, alignedX, pbuf)) goto End; - pbuf -= TransPitch((alignedX-gs.trxpos.dx), transfersize)/TSize; - } - else - { - pbuf += (blockheight-1)*TransPitch(pitch, transfersize)/TSize; - } - tempX = gs.trxpos.dx; - } - - if (TransPitch(nSize, transfersize)/4 > 0 ) - { - if (!TransmitHostLocalY_24(wp, widthlimit, gs.imageEndY, pbuf)) goto End; - /* sometimes wrong sizes are sent (tekken tag) */ - assert( gs.imageTransfer == -1 || TransPitch(nSize, transfersize)/4 <= 2 ); - } - - End: - if( tempY >= gs.imageEndY ) { - assert( gs.imageTransfer == -1 || tempY == gs.imageEndY ); - gs.imageTransfer = -1; - /*int start, end; - ZeroGS::GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw); - ZeroGS::g_MemTargs.ClearRange(start, end);*/ - } - else { - /* update new params */ - gs.imageY = tempY; - gs.imageX = tempX; - } - return (nSize * TransPitch(2, transfersize) + nLeftOver)/2; + TransferData data(8,32,8,8,24); + TransferFuncts fun(writePixel24_0, getPixelAddress24_0, SwizzleBlock24, SwizzleBlock24u); + + return RealTransfer_24(data, fun, pbyMem, nQWordSize); } //DEFINE_TRANSFERLOCAL(24Z, u8, 8, 32, 8, 8, _24, SwizzleBlock24); int TransferHostLocal24Z(const void* pbyMem, u32 nQWordSize) { - const u32 widthlimit = 8; - const u32 blockbits = 32; - const u32 blockwidth = 8; - const u32 blockheight = 8; - const u32 TSize = sizeof(u8); - const u32 transfersize = 24; - _writePixel_0 wp = writePixel24Z_0; - - assert( gs.imageTransfer == 0 ); - pstart = g_pbyGSMemory + gs.dstbuf.bp*256; - - /*const u8* pendbuf = (const u8*)pbyMem + nQWordSize*4;*/ - tempY = gs.imageY; tempX = gs.imageX; - - const u8* pbuf = (const u8*)pbyMem; - int nLeftOver = (nQWordSize*4*2)%(TransPitch(2, transfersize)); - nSize = nQWordSize*4*2/ TransPitch(2, transfersize); - nSize = min(nSize, gs.imageWnew * gs.imageHnew); - - int endY = ROUND_UPPOW2(tempY, blockheight); - int alignedY = ROUND_DOWNPOW2(gs.imageEndY, blockheight); - int alignedX = ROUND_DOWNPOW2(gs.imageEndX, blockwidth); - bool bAligned, bCanAlign = MOD_POW2(gs.trxpos.dx, blockwidth) == 0 && (tempX == gs.trxpos.dx) && (alignedY > endY) && alignedX > gs.trxpos.dx; - - if ((gs.imageEndX-gs.trxpos.dx)%widthlimit) - { - /* hack */ - int testwidth = (int)nSize - (gs.imageEndY-tempY)*(gs.imageEndX-gs.trxpos.dx)+(tempX-gs.trxpos.dx); - if ((testwidth <= widthlimit) && (testwidth >= -widthlimit)) - { - /* don't transfer */ - /*DEBUG_LOG("bad texture %s: %d %d %d\n", #psm, gs.trxpos.dx, gs.imageEndX, nQWordSize);*/ - gs.imageTransfer = -1; - } - bCanAlign = false; - } - - /* first align on block boundary */ - if ( MOD_POW2(tempY, blockheight) || !bCanAlign ) - { - - if ( !bCanAlign ) - endY = gs.imageEndY; /* transfer the whole image */ - else - assert( endY < gs.imageEndY); /* part of alignment condition */ - - if (((gs.imageEndX-gs.trxpos.dx)%widthlimit) || ((gs.imageEndX-tempX)%widthlimit)) - { - /* transmit with a width of 1 */ - if (!TransmitHostLocalY_24(wp, (1+(DSTPSM == 0x14)), endY, pbuf)) goto End; - } - else - { - if (!TransmitHostLocalY_24(wp, widthlimit, endY, pbuf)) goto End; - } - - if( nSize == 0 || tempY == gs.imageEndY ) goto End; - } - - assert( MOD_POW2(tempY, blockheight) == 0 && tempX == gs.trxpos.dx); - - /* can align! */ - pitch = gs.imageEndX-gs.trxpos.dx; - area = pitch*blockheight; - fracX = gs.imageEndX-alignedX; - - /* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */ - bAligned = !((uptr)pbuf & 0xf) && (TransPitch(pitch, transfersize)&0xf) == 0; - - /* transfer aligning to blocks */ - for(; tempY < alignedY && nSize >= area; tempY += blockheight, nSize -= area) - { - if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) ) - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock24(pstart + getPixelAddress_0(16,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - else - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock24u(pstart + getPixelAddress_0(16,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - - /* transfer the rest */ - if ( alignedX < gs.imageEndX ) - { - if (!TransmitHostLocalX_24(wp, widthlimit, blockheight, alignedX, pbuf)) goto End; - pbuf -= TransPitch((alignedX-gs.trxpos.dx), transfersize)/TSize; - } - else - { - pbuf += (blockheight-1)*TransPitch(pitch, transfersize)/TSize; - } - tempX = gs.trxpos.dx; - } - - if (TransPitch(nSize, transfersize)/4 > 0) - { - if (!TransmitHostLocalY_24(wp, widthlimit, gs.imageEndY, pbuf)) goto End; - /* sometimes wrong sizes are sent (tekken tag) */ - assert( gs.imageTransfer == -1 || TransPitch(nSize, transfersize)/4 <= 2 ); - } - - End: - if( tempY >= gs.imageEndY ) { - assert( gs.imageTransfer == -1 || tempY == gs.imageEndY ); - gs.imageTransfer = -1; - /*int start, end; - ZeroGS::GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw); - ZeroGS::g_MemTargs.ClearRange(start, end);*/ - } - else { - /* update new params */ - gs.imageY = tempY; - gs.imageX = tempX; - } - return (nSize * TransPitch(2, transfersize) + nLeftOver)/2; + TransferData data(8,32,8,8,24); + TransferFuncts fun(writePixel24Z_0, getPixelAddress24Z_0, SwizzleBlock24, SwizzleBlock24u); + + return RealTransfer_24(data, fun, pbyMem, nQWordSize); } //DEFINE_TRANSFERLOCAL(16, u16, 4, 16, 16, 8, _, SwizzleBlock16); int TransferHostLocal16(const void* pbyMem, u32 nQWordSize) { - const u32 widthlimit = 4; - const u32 blockbits = 16; - const u32 blockwidth = 16; - const u32 blockheight = 8; - const u32 TSize = sizeof(u16); - const u32 transfersize = 16; - _writePixel_0 wp = writePixel16_0; - - assert( gs.imageTransfer == 0 ); - pstart = g_pbyGSMemory + gs.dstbuf.bp*256; - - /*const u8* pendbuf = (const u8*)pbyMem + nQWordSize*4;*/ - tempY = gs.imageY; tempX = gs.imageX; - - const u16* pbuf = (const u16*)pbyMem; - int nLeftOver = (nQWordSize*4*2)%(TransPitch(2, transfersize)); - nSize = nQWordSize*4*2/ TransPitch(2, transfersize); - nSize = min(nSize, gs.imageWnew * gs.imageHnew); - - int endY = ROUND_UPPOW2(tempY, blockheight); - int alignedY = ROUND_DOWNPOW2(gs.imageEndY, blockheight); - int alignedX = ROUND_DOWNPOW2(gs.imageEndX, blockwidth); - bool bAligned, bCanAlign = MOD_POW2(gs.trxpos.dx, blockwidth) == 0 && (tempX == gs.trxpos.dx) && (alignedY > endY) && alignedX > gs.trxpos.dx; - - if ((gs.imageEndX-gs.trxpos.dx)%widthlimit) - { - /* hack */ - int testwidth = (int)nSize - (gs.imageEndY-tempY)*(gs.imageEndX-gs.trxpos.dx)+(tempX-gs.trxpos.dx); - if ((testwidth <= widthlimit) && (testwidth >= -widthlimit)) - { - /* don't transfer */ - /*DEBUG_LOG("bad texture %s: %d %d %d\n", #psm, gs.trxpos.dx, gs.imageEndX, nQWordSize);*/ - gs.imageTransfer = -1; - } - bCanAlign = false; - } - - /* first align on block boundary */ - if ( MOD_POW2(tempY, blockheight) || !bCanAlign ) - { - - if ( !bCanAlign ) - endY = gs.imageEndY; /* transfer the whole image */ - else - assert( endY < gs.imageEndY); /* part of alignment condition */ - - if (((gs.imageEndX-gs.trxpos.dx)%widthlimit) || ((gs.imageEndX-tempX)%widthlimit)) - { - /* transmit with a width of 1 */ - if (!TransmitHostLocalY_(wp, (1+(DSTPSM == 0x14)), endY, pbuf)) goto End; - } - else - { - if (!TransmitHostLocalY_(wp, widthlimit, endY, pbuf)) goto End; - } - - if( nSize == 0 || tempY == gs.imageEndY ) goto End; - } - - assert( MOD_POW2(tempY, blockheight) == 0 && tempX == gs.trxpos.dx); - - /* can align! */ - pitch = gs.imageEndX-gs.trxpos.dx; - area = pitch*blockheight; - fracX = gs.imageEndX-alignedX; - - /* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */ - bAligned = !((uptr)pbuf & 0xf) && (TransPitch(pitch, transfersize)&0xf) == 0; - - /* transfer aligning to blocks */ - for(; tempY < alignedY && nSize >= area; tempY += blockheight, nSize -= area) - { - if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) ) - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock16(pstart + getPixelAddress_0(16,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - else - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock16u(pstart + getPixelAddress_0(16,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - - /* transfer the rest */ - if ( alignedX < gs.imageEndX ) - { - if (!TransmitHostLocalX_(wp, widthlimit, blockheight, alignedX, pbuf)) goto End; - pbuf -= TransPitch((alignedX-gs.trxpos.dx), transfersize)/TSize; - } - else - { - pbuf += (blockheight-1)* TransPitch(pitch, transfersize)/TSize; - } - tempX = gs.trxpos.dx; - } - - if (TransPitch(pitch, transfersize)/4 > 0 ) - { - if (!TransmitHostLocalY_(wp, widthlimit, gs.imageEndY, pbuf)) goto End; - /* sometimes wrong sizes are sent (tekken tag) */ - assert( gs.imageTransfer == -1 || TransPitch(nSize, transfersize)/4 <= 2 ); - } - - End: - if( tempY >= gs.imageEndY ) { - assert( gs.imageTransfer == -1 || tempY == gs.imageEndY ); - gs.imageTransfer = -1; - /*int start, end; - ZeroGS::GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw); - ZeroGS::g_MemTargs.ClearRange(start, end);*/ - } - else { - /* update new params */ - gs.imageY = tempY; - gs.imageX = tempX; - } - return (nSize * TransPitch(pitch, transfersize) + nLeftOver)/2; + TransferData data(4,16,16,8,16); + TransferFuncts fun(writePixel16_0, getPixelAddress16_0, SwizzleBlock16, SwizzleBlock16u); + + return RealTransfer_(data, fun, pbyMem, nQWordSize); } //DEFINE_TRANSFERLOCAL(16S, u16, 4, 16, 16, 8, _, SwizzleBlock16); int TransferHostLocal16S(const void* pbyMem, u32 nQWordSize) { - const u32 widthlimit = 4; - const u32 blockbits = 16; - const u32 blockwidth = 16; - const u32 blockheight = 8; - const u32 TSize = sizeof(u16); - const u32 transfersize = 16; - _writePixel_0 wp = writePixel16S_0; - - assert( gs.imageTransfer == 0 ); - pstart = g_pbyGSMemory + gs.dstbuf.bp*256; - - /*const u8* pendbuf = (const u8*)pbyMem + nQWordSize*4;*/ - tempY = gs.imageY; tempX = gs.imageX; - - const u16* pbuf = (const u16*)pbyMem; - int nLeftOver = (nQWordSize*4*2)%(TransPitch(2, transfersize)); - nSize = nQWordSize*4*2/ TransPitch(2, transfersize); - nSize = min(nSize, gs.imageWnew * gs.imageHnew); - - int endY = ROUND_UPPOW2(tempY, blockheight); - int alignedY = ROUND_DOWNPOW2(gs.imageEndY, blockheight); - int alignedX = ROUND_DOWNPOW2(gs.imageEndX, blockwidth); - bool bAligned, bCanAlign = MOD_POW2(gs.trxpos.dx, blockwidth) == 0 && (tempX == gs.trxpos.dx) && (alignedY > endY) && alignedX > gs.trxpos.dx; - - if ((gs.imageEndX-gs.trxpos.dx)%widthlimit) - { - /* hack */ - int testwidth = (int)nSize - (gs.imageEndY-tempY)*(gs.imageEndX-gs.trxpos.dx)+(tempX-gs.trxpos.dx); - if ((testwidth <= widthlimit) && (testwidth >= -widthlimit)) - { - /* don't transfer */ - /*DEBUG_LOG("bad texture %s: %d %d %d\n", #psm, gs.trxpos.dx, gs.imageEndX, nQWordSize);*/ - gs.imageTransfer = -1; - } - bCanAlign = false; - } - - /* first align on block boundary */ - if ( MOD_POW2(tempY, blockheight) || !bCanAlign ) - { - - if ( !bCanAlign ) - endY = gs.imageEndY; /* transfer the whole image */ - else - assert( endY < gs.imageEndY); /* part of alignment condition */ - - if (((gs.imageEndX-gs.trxpos.dx)%widthlimit) || ((gs.imageEndX-tempX)%widthlimit)) - { - /* transmit with a width of 1 */ - if (!TransmitHostLocalY_(wp, (1+(DSTPSM == 0x14)), endY, pbuf)) goto End; - } - else - { - if (!TransmitHostLocalY_(wp, widthlimit, endY, pbuf)) goto End; - } - - if( nSize == 0 || tempY == gs.imageEndY ) goto End; - } - - assert( MOD_POW2(tempY, blockheight) == 0 && tempX == gs.trxpos.dx); - - /* can align! */ - pitch = gs.imageEndX-gs.trxpos.dx; - area = pitch*blockheight; - fracX = gs.imageEndX-alignedX; - - /* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */ - bAligned = !((uptr)pbuf & 0xf) && (TransPitch(pitch, transfersize)&0xf) == 0; - - /* transfer aligning to blocks */ - for(; tempY < alignedY && nSize >= area; tempY += blockheight, nSize -= area) - { - if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) ) - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock16(pstart + getPixelAddress_0(16S,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - else - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock16u(pstart + getPixelAddress_0(16S,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - - /* transfer the rest */ - if ( alignedX < gs.imageEndX ) - { - if (!TransmitHostLocalX_(wp, widthlimit, blockheight, alignedX, pbuf)) goto End; - pbuf -= TransPitch((alignedX-gs.trxpos.dx), transfersize)/TSize; - } - else - { - pbuf += (blockheight-1) * TransPitch(pitch, transfersize)/TSize; - } - tempX = gs.trxpos.dx; - } - - if (TransPitch(nSize, transfersize)/4 > 0 ) - { - if (!TransmitHostLocalY_(wp, widthlimit, gs.imageEndY, pbuf)) goto End; - /* sometimes wrong sizes are sent (tekken tag) */ - assert( gs.imageTransfer == -1 || TransPitch(nSize, transfersize)/4 <= 2 ); - } - - End: - if( tempY >= gs.imageEndY ) { - assert( gs.imageTransfer == -1 || tempY == gs.imageEndY ); - gs.imageTransfer = -1; - /*int start, end; - ZeroGS::GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw); - ZeroGS::g_MemTargs.ClearRange(start, end);*/ - } - else { - /* update new params */ - gs.imageY = tempY; - gs.imageX = tempX; - } - return (nSize * TransPitch(2, transfersize) + nLeftOver)/2; + TransferData data(4,16,16,8,16); + TransferFuncts fun(writePixel16S_0, getPixelAddress16S_0, SwizzleBlock16, SwizzleBlock16u); + + return RealTransfer_(data, fun, pbyMem, nQWordSize); } //DEFINE_TRANSFERLOCAL(16Z, u16, 4, 16, 16, 8, _, SwizzleBlock16); int TransferHostLocal16Z(const void* pbyMem, u32 nQWordSize) { - const u32 widthlimit = 4; - const u32 blockbits = 16; - const u32 blockwidth = 16; - const u32 blockheight = 8; - const u32 TSize = sizeof(u16); - const u32 transfersize = 16; - _writePixel_0 wp = writePixel16Z_0; - - assert( gs.imageTransfer == 0 ); - pstart = g_pbyGSMemory + gs.dstbuf.bp*256; - - /*const u8* pendbuf = (const u8*)pbyMem + nQWordSize*4;*/ - tempY = gs.imageY; tempX = gs.imageX; - - const u16* pbuf = (const u16*)pbyMem; - int nLeftOver = (nQWordSize*4*2)%(TransPitch(2, transfersize)); - nSize = nQWordSize*4*2/ TransPitch(2, transfersize); - nSize = min(nSize, gs.imageWnew * gs.imageHnew); - - int endY = ROUND_UPPOW2(tempY, blockheight); - int alignedY = ROUND_DOWNPOW2(gs.imageEndY, blockheight); - int alignedX = ROUND_DOWNPOW2(gs.imageEndX, blockwidth); - bool bAligned, bCanAlign = MOD_POW2(gs.trxpos.dx, blockwidth) == 0 && (tempX == gs.trxpos.dx) && (alignedY > endY) && alignedX > gs.trxpos.dx; - - if ((gs.imageEndX-gs.trxpos.dx)%widthlimit) - { - /* hack */ - int testwidth = (int)nSize - (gs.imageEndY-tempY)*(gs.imageEndX-gs.trxpos.dx)+(tempX-gs.trxpos.dx); - if ((testwidth <= widthlimit) && (testwidth >= -widthlimit)) - { - /* don't transfer */ - /*DEBUG_LOG("bad texture %s: %d %d %d\n", #psm, gs.trxpos.dx, gs.imageEndX, nQWordSize);*/ - gs.imageTransfer = -1; - } - bCanAlign = false; - } - - /* first align on block boundary */ - if ( MOD_POW2(tempY, blockheight) || !bCanAlign ) - { - - if ( !bCanAlign ) - endY = gs.imageEndY; /* transfer the whole image */ - else - assert( endY < gs.imageEndY); /* part of alignment condition */ - - if (((gs.imageEndX-gs.trxpos.dx)%widthlimit) || ((gs.imageEndX-tempX)%widthlimit)) - { - /* transmit with a width of 1 */ - if (!TransmitHostLocalY_(wp, (1+(DSTPSM == 0x14)), endY, pbuf)) goto End; - } - else - { - if (!TransmitHostLocalY_(wp, widthlimit, endY, pbuf)) goto End; - } - - if( nSize == 0 || tempY == gs.imageEndY ) goto End; - } - - assert( MOD_POW2(tempY, blockheight) == 0 && tempX == gs.trxpos.dx); - - /* can align! */ - pitch = gs.imageEndX-gs.trxpos.dx; - area = pitch*blockheight; - fracX = gs.imageEndX-alignedX; - - /* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */ - bAligned = !((uptr)pbuf & 0xf) && (TransPitch(pitch, transfersize)&0xf) == 0; - - /* transfer aligning to blocks */ - for(; tempY < alignedY && nSize >= area; tempY += blockheight, nSize -= area) - { - if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) ) - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock16(pstart + getPixelAddress_0(16Z,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - else - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock16u(pstart + getPixelAddress_0(16Z,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - - /* transfer the rest */ - if ( alignedX < gs.imageEndX ) - { - if (!TransmitHostLocalX_(wp, widthlimit, blockheight, alignedX, pbuf)) goto End; - pbuf -= TransPitch((alignedX-gs.trxpos.dx), transfersize)/TSize; - } - else - { - pbuf += (blockheight-1)*TransPitch(pitch, transfersize)/TSize; - } - tempX = gs.trxpos.dx; - } - - if (TransPitch(nSize, transfersize)/4 > 0 ) - { - if (!TransmitHostLocalY_(wp, widthlimit, gs.imageEndY, pbuf)) goto End; - /* sometimes wrong sizes are sent (tekken tag) */ - assert( gs.imageTransfer == -1 || TransPitch(nSize, transfersize)/4 <= 2 ); - } - - End: - if( tempY >= gs.imageEndY ) { - assert( gs.imageTransfer == -1 || tempY == gs.imageEndY ); - gs.imageTransfer = -1; - /*int start, end; - ZeroGS::GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw); - ZeroGS::g_MemTargs.ClearRange(start, end);*/ - } - else { - /* update new params */ - gs.imageY = tempY; - gs.imageX = tempX; - } - return (nSize * TransPitch(2, transfersize) + nLeftOver)/2; + TransferData data(4,16,16,8,16); + TransferFuncts fun(writePixel16Z_0, getPixelAddress16Z_0, SwizzleBlock16, SwizzleBlock16u); + + return RealTransfer_(data, fun, pbyMem, nQWordSize); } //DEFINE_TRANSFERLOCAL(16SZ, u16, 4, 16, 16, 8, _, SwizzleBlock16); int TransferHostLocal16SZ(const void* pbyMem, u32 nQWordSize) { - const u32 widthlimit = 4; - const u32 blockbits = 16; - const u32 blockwidth = 16; - const u32 blockheight = 8; - const u32 TSize = sizeof(u16); - const u32 transfersize = 16; - _writePixel_0 wp = writePixel16SZ_0; - - assert( gs.imageTransfer == 0 ); - pstart = g_pbyGSMemory + gs.dstbuf.bp*256; - - /*const u8* pendbuf = (const u8*)pbyMem + nQWordSize*4;*/ - tempY = gs.imageY; tempX = gs.imageX; - - const u16* pbuf = (const u16*)pbyMem; - int nLeftOver = (nQWordSize*4*2)%(TransPitch(2, transfersize)); - nSize = nQWordSize*4*2/TransPitch(2, transfersize); - nSize = min(nSize, gs.imageWnew * gs.imageHnew); - - int endY = ROUND_UPPOW2(tempY, blockheight); - int alignedY = ROUND_DOWNPOW2(gs.imageEndY, blockheight); - int alignedX = ROUND_DOWNPOW2(gs.imageEndX, blockwidth); - bool bAligned, bCanAlign = MOD_POW2(gs.trxpos.dx, blockwidth) == 0 && (tempX == gs.trxpos.dx) && (alignedY > endY) && alignedX > gs.trxpos.dx; - - if ((gs.imageEndX-gs.trxpos.dx)%widthlimit) - { - /* hack */ - int testwidth = (int)nSize - (gs.imageEndY-tempY)*(gs.imageEndX-gs.trxpos.dx)+(tempX-gs.trxpos.dx); - if ((testwidth <= widthlimit) && (testwidth >= -widthlimit)) - { - /* don't transfer */ - /*DEBUG_LOG("bad texture %s: %d %d %d\n", #psm, gs.trxpos.dx, gs.imageEndX, nQWordSize);*/ - gs.imageTransfer = -1; - } - bCanAlign = false; - } - - /* first align on block boundary */ - if ( MOD_POW2(tempY, blockheight) || !bCanAlign ) - { - - if ( !bCanAlign ) - endY = gs.imageEndY; /* transfer the whole image */ - else - assert( endY < gs.imageEndY); /* part of alignment condition */ - - if (((gs.imageEndX-gs.trxpos.dx)%widthlimit) || ((gs.imageEndX-tempX)%widthlimit)) - { - /* transmit with a width of 1 */ - if (!TransmitHostLocalY_(wp, (1+(DSTPSM == 0x14)), endY, pbuf)) goto End; - } - else - { - if (!TransmitHostLocalY_(wp, widthlimit, endY, pbuf)) goto End; - } - - if( nSize == 0 || tempY == gs.imageEndY ) goto End; - } - - assert( MOD_POW2(tempY, blockheight) == 0 && tempX == gs.trxpos.dx); - - /* can align! */ - pitch = gs.imageEndX-gs.trxpos.dx; - area = pitch*blockheight; - fracX = gs.imageEndX-alignedX; - - /* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */ - bAligned = !((uptr)pbuf & 0xf) && (TransPitch(pitch, transfersize)&0xf) == 0; - - /* transfer aligning to blocks */ - for(; tempY < alignedY && nSize >= area; tempY += blockheight, nSize -= area) - { - if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) ) - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock16(pstart + getPixelAddress_0(16SZ,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - else - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock16u(pstart + getPixelAddress_0(16SZ,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - - /* transfer the rest */ - if ( alignedX < gs.imageEndX ) - { - if (!TransmitHostLocalX_(wp, widthlimit, blockheight, alignedX, pbuf)) goto End; - pbuf -= TransPitch((alignedX-gs.trxpos.dx), transfersize)/TSize; - } - else - { - pbuf += (blockheight-1)*TransPitch(pitch, transfersize)/TSize; - } - tempX = gs.trxpos.dx; - } - - if (TransPitch(nSize, transfersize)/4 > 0 ) - { - if (!TransmitHostLocalY_(wp, widthlimit, gs.imageEndY, pbuf)) goto End; - /* sometimes wrong sizes are sent (tekken tag) */ - assert( gs.imageTransfer == -1 || TransPitch(nSize, transfersize)/4 <= 2 ); - } - - End: - if( tempY >= gs.imageEndY ) { - assert( gs.imageTransfer == -1 || tempY == gs.imageEndY ); - gs.imageTransfer = -1; - /*int start, end; - ZeroGS::GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw); - ZeroGS::g_MemTargs.ClearRange(start, end);*/ - } - else { - /* update new params */ - gs.imageY = tempY; - gs.imageX = tempX; - } - return (nSize * TransPitch(2, transfersize) + nLeftOver)/2; + TransferData data(4,16,16,8,16); + TransferFuncts fun(writePixel16SZ_0, getPixelAddress16SZ_0, SwizzleBlock16, SwizzleBlock16u); + + return RealTransfer_(data, fun, pbyMem, nQWordSize); } //DEFINE_TRANSFERLOCAL(8, u8, 4, 8, 16, 16, _, SwizzleBlock8); int TransferHostLocal8(const void* pbyMem, u32 nQWordSize) { - const u32 widthlimit = 4; - const u32 blockbits = 8; - const u32 blockwidth = 16; - const u32 blockheight = 16; - const u32 TSize = sizeof(u8); - const u32 transfersize = 8; - _writePixel_0 wp = writePixel8_0; - - assert( gs.imageTransfer == 0 ); - pstart = g_pbyGSMemory + gs.dstbuf.bp*256; - - /*const u8* pendbuf = (const u8*)pbyMem + nQWordSize*4;*/ - tempY = gs.imageY; tempX = gs.imageX; - - const u8* pbuf = (const u8*)pbyMem; - int nLeftOver = (nQWordSize*4*2)%(TransPitch(2, transfersize)); - nSize = nQWordSize*4*2/TransPitch(2, transfersize); - nSize = min(nSize, gs.imageWnew * gs.imageHnew); - - int endY = ROUND_UPPOW2(tempY, blockheight); - int alignedY = ROUND_DOWNPOW2(gs.imageEndY, blockheight); - int alignedX = ROUND_DOWNPOW2(gs.imageEndX, blockwidth); - bool bAligned, bCanAlign = MOD_POW2(gs.trxpos.dx, blockwidth) == 0 && (tempX == gs.trxpos.dx) && (alignedY > endY) && alignedX > gs.trxpos.dx; - - if ((gs.imageEndX-gs.trxpos.dx)%widthlimit) - { - /* hack */ - int testwidth = (int)nSize - (gs.imageEndY-tempY)*(gs.imageEndX-gs.trxpos.dx)+(tempX-gs.trxpos.dx); - if ((testwidth <= widthlimit) && (testwidth >= -widthlimit)) - { - /* don't transfer */ - /*DEBUG_LOG("bad texture %s: %d %d %d\n", #psm, gs.trxpos.dx, gs.imageEndX, nQWordSize);*/ - gs.imageTransfer = -1; - } - bCanAlign = false; - } - - /* first align on block boundary */ - if ( MOD_POW2(tempY, blockheight) || !bCanAlign ) - { - - if ( !bCanAlign ) - endY = gs.imageEndY; /* transfer the whole image */ - else - assert( endY < gs.imageEndY); /* part of alignment condition */ - - if (((gs.imageEndX-gs.trxpos.dx)%widthlimit) || ((gs.imageEndX-tempX)%widthlimit)) - { - /* transmit with a width of 1 */ - if (!TransmitHostLocalY_(wp, (1+(DSTPSM == 0x14)), endY, pbuf)) goto End; - } - else - { - if (!TransmitHostLocalY_(wp, widthlimit, endY, pbuf)) goto End; - } - - if( nSize == 0 || tempY == gs.imageEndY ) goto End; - } - - assert( MOD_POW2(tempY, blockheight) == 0 && tempX == gs.trxpos.dx); - - /* can align! */ - pitch = gs.imageEndX-gs.trxpos.dx; - area = pitch*blockheight; - fracX = gs.imageEndX-alignedX; - - /* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */ - bAligned = !((uptr)pbuf & 0xf) && (TransPitch(pitch, transfersize)&0xf) == 0; - - /* transfer aligning to blocks */ - for(; tempY < alignedY && nSize >= area; tempY += blockheight, nSize -= area) - { - if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) ) - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf +=TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock8(pstart + getPixelAddress_0(8,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - else - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock8u(pstart + getPixelAddress_0(8,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - - /* transfer the rest */ - if ( alignedX < gs.imageEndX ) - { - if (!TransmitHostLocalX_(wp, widthlimit, blockheight, alignedX, pbuf)) goto End; - pbuf -= TransPitch(alignedX-gs.trxpos.dx, transfersize)/TSize; - } - else - { - pbuf += (blockheight-1)*TransPitch(pitch, transfersize)/TSize; - } - tempX = gs.trxpos.dx; - } - - if (TransPitch(nSize, transfersize)/4 > 0 ) - { - if (!TransmitHostLocalY_(wp, widthlimit, gs.imageEndY, pbuf)) goto End; - /* sometimes wrong sizes are sent (tekken tag) */ - assert( gs.imageTransfer == -1 || TransPitch(nSize, transfersize)/4 <= 2 ); - } - - End: - if( tempY >= gs.imageEndY ) { - assert( gs.imageTransfer == -1 || tempY == gs.imageEndY ); - gs.imageTransfer = -1; - /*int start, end; - ZeroGS::GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw); - ZeroGS::g_MemTargs.ClearRange(start, end);*/ - } - else { - /* update new params */ - gs.imageY = tempY; - gs.imageX = tempX; - } - return (nSize * TransPitch(2, transfersize) + nLeftOver)/2; + TransferData data(4,8,16,16,8); + TransferFuncts fun(writePixel8_0, getPixelAddress8_0, SwizzleBlock8, SwizzleBlock8u); + + return RealTransfer_(data, fun, pbyMem, nQWordSize); } //DEFINE_TRANSFERLOCAL(4, u8, 8, 4, 32, 16, _4, SwizzleBlock4); int TransferHostLocal4(const void* pbyMem, u32 nQWordSize) { - const u32 widthlimit = 8; - const u32 blockbits = 4; - const u32 blockwidth = 32; - const u32 blockheight = 16; - const u32 TSize = sizeof(u8); - const u32 transfersize = 4; - _writePixel_0 wp = writePixel4_0; - - assert( gs.imageTransfer == 0 ); - pstart = g_pbyGSMemory + gs.dstbuf.bp*256; - - /*const u8* pendbuf = (const u8*)pbyMem + nQWordSize*4;*/ - tempY = gs.imageY; tempX = gs.imageX; - - const u8* pbuf = (const u8*)pbyMem; - int nLeftOver = (nQWordSize*4*2)%(TransPitch(2, transfersize)); - nSize = nQWordSize*4*2/TransPitch(2, transfersize); - nSize = min(nSize, gs.imageWnew * gs.imageHnew); - - int endY = ROUND_UPPOW2(tempY, blockheight); - int alignedY = ROUND_DOWNPOW2(gs.imageEndY, blockheight); - int alignedX = ROUND_DOWNPOW2(gs.imageEndX, blockwidth); - bool bAligned, bCanAlign = MOD_POW2(gs.trxpos.dx, blockwidth) == 0 && (tempX == gs.trxpos.dx) && (alignedY > endY) && alignedX > gs.trxpos.dx; - - if ((gs.imageEndX-gs.trxpos.dx)%widthlimit) - { - /* hack */ - int testwidth = (int)nSize - (gs.imageEndY-tempY)*(gs.imageEndX-gs.trxpos.dx)+(tempX-gs.trxpos.dx); - if ((testwidth <= widthlimit) && (testwidth >= -widthlimit)) - { - /* don't transfer */ - /*DEBUG_LOG("bad texture %s: %d %d %d\n", #psm, gs.trxpos.dx, gs.imageEndX, nQWordSize);*/ - gs.imageTransfer = -1; - } - bCanAlign = false; - } - - /* first align on block boundary */ - if ( MOD_POW2(tempY, blockheight) || !bCanAlign ) - { - - if ( !bCanAlign ) - endY = gs.imageEndY; /* transfer the whole image */ - else - assert( endY < gs.imageEndY); /* part of alignment condition */ - - if (((gs.imageEndX-gs.trxpos.dx)%widthlimit) || ((gs.imageEndX-tempX)%widthlimit)) - { - /* transmit with a width of 1 */ - if (!TransmitHostLocalY_4(wp, (1+(DSTPSM == 0x14)), endY, pbuf)) goto End; - } - else - { - if (!TransmitHostLocalY_4(wp, widthlimit, endY, pbuf)) goto End; - } - - if( nSize == 0 || tempY == gs.imageEndY ) goto End; - } - - assert( MOD_POW2(tempY, blockheight) == 0 && tempX == gs.trxpos.dx); - - /* can align! */ - pitch = gs.imageEndX-gs.trxpos.dx; - area = pitch*blockheight; - fracX = gs.imageEndX-alignedX; - - /* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */ - bAligned = !((uptr)pbuf & 0xf) && (TransPitch(pitch, transfersize)&0xf) == 0; - - /* transfer aligning to blocks */ - for(; tempY < alignedY && nSize >= area; tempY += blockheight, nSize -= area) - { - if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) ) - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock4(pstart + getPixelAddress_0(4,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - else - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock4u(pstart + getPixelAddress_0(4,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - - /* transfer the rest */ - if ( alignedX < gs.imageEndX ) - { - if (!TransmitHostLocalX_4(wp, widthlimit, blockheight, alignedX, pbuf)) goto End; - pbuf -= TransPitch((alignedX-gs.trxpos.dx), transfersize)/TSize; - } - else - { - pbuf += (blockheight-1)*TransPitch(pitch, transfersize)/TSize; - } - tempX = gs.trxpos.dx; - } - - if (TransPitch(nSize, transfersize)/4 > 0 ) - { - if (!TransmitHostLocalY_4(wp, widthlimit, gs.imageEndY, pbuf)) goto End; - /* sometimes wrong sizes are sent (tekken tag) */ - assert( gs.imageTransfer == -1 || TransPitch(nSize, transfersize)/4 <= 2 ); - } - - End: - if( tempY >= gs.imageEndY ) { - assert( gs.imageTransfer == -1 || tempY == gs.imageEndY ); - gs.imageTransfer = -1; - /*int start, end; - ZeroGS::GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw); - ZeroGS::g_MemTargs.ClearRange(start, end);*/ - } - else { - /* update new params */ - gs.imageY = tempY; - gs.imageX = tempX; - } - return (nSize * TransPitch(2, transfersize) + nLeftOver)/2; + TransferData data(8,4,32,16,4); + TransferFuncts fun(writePixel4_0, getPixelAddress4_0, SwizzleBlock4, SwizzleBlock4u); + + return RealTransfer_4(data, fun, pbyMem, nQWordSize); } //DEFINE_TRANSFERLOCAL(8H, u8, 4, 32, 8, 8, _, SwizzleBlock8H); int TransferHostLocal8H(const void* pbyMem, u32 nQWordSize) { - const u32 widthlimit = 4; - const u32 blockbits = 32; - const u32 blockwidth = 8; - const u32 blockheight = 8; - const u32 TSize = sizeof(u8); - const u32 transfersize = 8; - _writePixel_0 wp = writePixel8H_0; - - assert( gs.imageTransfer == 0 ); - pstart = g_pbyGSMemory + gs.dstbuf.bp*256; - - /*const u8* pendbuf = (const u8*)pbyMem + nQWordSize*4;*/ - tempY = gs.imageY; tempX = gs.imageX; - - const u8* pbuf = (const u8*)pbyMem; - int nLeftOver = (nQWordSize*4*2)%(TransPitch(2, transfersize)); - nSize = nQWordSize*4*2/TransPitch(2, transfersize); - nSize = min(nSize, gs.imageWnew * gs.imageHnew); - - int endY = ROUND_UPPOW2(tempY, blockheight); - int alignedY = ROUND_DOWNPOW2(gs.imageEndY, blockheight); - int alignedX = ROUND_DOWNPOW2(gs.imageEndX, blockwidth); - bool bAligned, bCanAlign = MOD_POW2(gs.trxpos.dx, blockwidth) == 0 && (tempX == gs.trxpos.dx) && (alignedY > endY) && alignedX > gs.trxpos.dx; - - if ((gs.imageEndX-gs.trxpos.dx)%widthlimit) - { - /* hack */ - int testwidth = (int)nSize - (gs.imageEndY-tempY)*(gs.imageEndX-gs.trxpos.dx)+(tempX-gs.trxpos.dx); - if ((testwidth <= widthlimit) && (testwidth >= -widthlimit)) - { - /* don't transfer */ - /*DEBUG_LOG("bad texture %s: %d %d %d\n", #psm, gs.trxpos.dx, gs.imageEndX, nQWordSize);*/ - gs.imageTransfer = -1; - } - bCanAlign = false; - } - - /* first align on block boundary */ - if ( MOD_POW2(tempY, blockheight) || !bCanAlign ) - { - - if ( !bCanAlign ) - endY = gs.imageEndY; /* transfer the whole image */ - else - assert( endY < gs.imageEndY); /* part of alignment condition */ - - if (((gs.imageEndX-gs.trxpos.dx)%widthlimit) || ((gs.imageEndX-tempX)%widthlimit)) - { - /* transmit with a width of 1 */ - if (!TransmitHostLocalY_(wp, (1+(DSTPSM == 0x14)), endY, pbuf)) goto End; - } - else - { - if (!TransmitHostLocalY_(wp, widthlimit, endY, pbuf)) goto End; - } - - if( nSize == 0 || tempY == gs.imageEndY ) goto End; - } - - assert( MOD_POW2(tempY, blockheight) == 0 && tempX == gs.trxpos.dx); - - /* can align! */ - pitch = gs.imageEndX-gs.trxpos.dx; - area = pitch*blockheight; - fracX = gs.imageEndX-alignedX; - - /* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */ - bAligned = !((uptr)pbuf & 0xf) && (TransPitch(pitch, transfersize)&0xf) == 0; - - /* transfer aligning to blocks */ - for(; tempY < alignedY && nSize >= area; tempY += blockheight, nSize -= area) - { - if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) ) - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock8H(pstart + getPixelAddress_0(8H,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - else - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock8Hu(pstart + getPixelAddress_0(8H,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - - /* transfer the rest */ - if ( alignedX < gs.imageEndX ) - { - if (!TransmitHostLocalX_(wp, widthlimit, blockheight, alignedX, pbuf)) goto End; - pbuf -= TransPitch((alignedX-gs.trxpos.dx), transfersize)/TSize; - } - else - { - pbuf += (blockheight-1)*TransPitch(pitch, transfersize)/TSize; - } - tempX = gs.trxpos.dx; - } - - if (TransPitch(nSize, transfersize)/4 > 0 ) - { - if (!TransmitHostLocalY_(wp, widthlimit, gs.imageEndY, pbuf)) goto End; - /* sometimes wrong sizes are sent (tekken tag) */ - assert( gs.imageTransfer == -1 || TransPitch(nSize, transfersize)/4 <= 2 ); - } - - End: - if( tempY >= gs.imageEndY ) { - assert( gs.imageTransfer == -1 || tempY == gs.imageEndY ); - gs.imageTransfer = -1; - /*int start, end; - ZeroGS::GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw); - ZeroGS::g_MemTargs.ClearRange(start, end);*/ - } - else { - /* update new params */ - gs.imageY = tempY; - gs.imageX = tempX; - } - return (nSize * TransPitch(2, transfersize) + nLeftOver)/2; + TransferData data(4,32,8,8,8); + TransferFuncts fun(writePixel8H_0, getPixelAddress8H_0, SwizzleBlock8H, SwizzleBlock8Hu); + + return RealTransfer_(data, fun, pbyMem, nQWordSize); } //DEFINE_TRANSFERLOCAL(4HL, u8, 8, 32, 8, 8, _4, SwizzleBlock4HL); int TransferHostLocal4HL(const void* pbyMem, u32 nQWordSize) { - const u32 widthlimit = 8; - const u32 blockbits = 32; - const u32 blockwidth = 8; - const u32 blockheight = 8; - const u32 TSize = sizeof(u8); - const u32 transfersize = 4; - _writePixel_0 wp = writePixel4HL_0; - - assert( gs.imageTransfer == 0 ); - pstart = g_pbyGSMemory + gs.dstbuf.bp*256; - - /*const u8* pendbuf = (const u8*)pbyMem + nQWordSize*4;*/ - tempY = gs.imageY; tempX = gs.imageX; - - const u8* pbuf = (const u8*)pbyMem; - int nLeftOver = (nQWordSize*4*2)%(TransPitch(2, transfersize)); - nSize = nQWordSize*4*2/ TransPitch(2, transfersize); - nSize = min(nSize, gs.imageWnew * gs.imageHnew); - - int endY = ROUND_UPPOW2(tempY, blockheight); - int alignedY = ROUND_DOWNPOW2(gs.imageEndY, blockheight); - int alignedX = ROUND_DOWNPOW2(gs.imageEndX, blockwidth); - bool bAligned, bCanAlign = MOD_POW2(gs.trxpos.dx, blockwidth) == 0 && (tempX == gs.trxpos.dx) && (alignedY > endY) && alignedX > gs.trxpos.dx; - - if ((gs.imageEndX-gs.trxpos.dx)%widthlimit) - { - /* hack */ - int testwidth = (int)nSize - (gs.imageEndY-tempY)*(gs.imageEndX-gs.trxpos.dx)+(tempX-gs.trxpos.dx); - if ((testwidth <= widthlimit) && (testwidth >= -widthlimit)) - { - /* don't transfer */ - /*DEBUG_LOG("bad texture %s: %d %d %d\n", #psm, gs.trxpos.dx, gs.imageEndX, nQWordSize);*/ - gs.imageTransfer = -1; - } - bCanAlign = false; - } - - /* first align on block boundary */ - if ( MOD_POW2(tempY, blockheight) || !bCanAlign ) - { - - if ( !bCanAlign ) - endY = gs.imageEndY; /* transfer the whole image */ - else - assert( endY < gs.imageEndY); /* part of alignment condition */ - - if (((gs.imageEndX-gs.trxpos.dx)%widthlimit) || ((gs.imageEndX-tempX)%widthlimit)) - { - /* transmit with a width of 1 */ - if (!TransmitHostLocalY_4(wp, (1+(DSTPSM == 0x14)), endY, pbuf)) goto End; - } - else - { - if (!TransmitHostLocalY_4(wp, widthlimit, endY, pbuf)) goto End; - } - - if( nSize == 0 || tempY == gs.imageEndY ) goto End; - } - - assert( MOD_POW2(tempY, blockheight) == 0 && tempX == gs.trxpos.dx); - - /* can align! */ - pitch = gs.imageEndX-gs.trxpos.dx; - area = pitch*blockheight; - fracX = gs.imageEndX-alignedX; - - /* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */ - bAligned = !((uptr)pbuf & 0xf) && (TransPitch(pitch, transfersize)&0xf) == 0; - - /* transfer aligning to blocks */ - for(; tempY < alignedY && nSize >= area; tempY += blockheight, nSize -= area) - { - if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) ) - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock4HL(pstart + getPixelAddress_0(4HL,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - else - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock4HLu(pstart + getPixelAddress_0(4HL,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - - /* transfer the rest */ - if ( alignedX < gs.imageEndX ) - { - if (!TransmitHostLocalX_4(wp, widthlimit, blockheight, alignedX, pbuf)) goto End; - pbuf -= TransPitch((alignedX-gs.trxpos.dx), transfersize)/TSize; - } - else - { - pbuf += (blockheight-1)*TransPitch(pitch, transfersize)/TSize; - } - tempX = gs.trxpos.dx; - } - - if (TransPitch(nSize, transfersize)/4 > 0 ) - { - if (!TransmitHostLocalY_4(wp, widthlimit, gs.imageEndY, pbuf)) goto End; - /* sometimes wrong sizes are sent (tekken tag) */ - assert( gs.imageTransfer == -1 || TransPitch(nSize, transfersize)/4 <= 2 ); - } - - End: - if( tempY >= gs.imageEndY ) { - assert( gs.imageTransfer == -1 || tempY == gs.imageEndY ); - gs.imageTransfer = -1; - /*int start, end; - ZeroGS::GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw); - ZeroGS::g_MemTargs.ClearRange(start, end);*/ - } - else { - /* update new params */ - gs.imageY = tempY; - gs.imageX = tempX; - } - return (nSize * TransPitch(2, transfersize) + nLeftOver)/2; + TransferData data(8,32,8,8,4); + TransferFuncts fun(writePixel4HL_0, getPixelAddress4HL_0, SwizzleBlock4HL, SwizzleBlock4HLu); + + return RealTransfer_4(data, fun, pbyMem, nQWordSize); } //DEFINE_TRANSFERLOCAL(4HH, u8, 8, 32, 8, 8, _4, SwizzleBlock4HH); int TransferHostLocal4HH(const void* pbyMem, u32 nQWordSize) { - const u32 widthlimit = 8; - const u32 blockbits = 32; - const u32 blockwidth = 8; - const u32 blockheight = 8; - const u32 TSize = sizeof(u8); - const u32 transfersize = 4; - _writePixel_0 wp = writePixel4HH_0; - - assert( gs.imageTransfer == 0 ); - pstart = g_pbyGSMemory + gs.dstbuf.bp*256; - - /*const u8* pendbuf = (const u8*)pbyMem + nQWordSize*4;*/ - tempY = gs.imageY; tempX = gs.imageX; - - const u8* pbuf = (const u8*)pbyMem; - int nLeftOver = (nQWordSize*4*2)%(TransPitch(2, transfersize)); - nSize = nQWordSize*4*2/TransPitch(2, transfersize); - nSize = min(nSize, gs.imageWnew * gs.imageHnew); - - int endY = ROUND_UPPOW2(tempY, blockheight); - int alignedY = ROUND_DOWNPOW2(gs.imageEndY, blockheight); - int alignedX = ROUND_DOWNPOW2(gs.imageEndX, blockwidth); - bool bAligned, bCanAlign = MOD_POW2(gs.trxpos.dx, blockwidth) == 0 && (tempX == gs.trxpos.dx) && (alignedY > endY) && alignedX > gs.trxpos.dx; - - if ((gs.imageEndX-gs.trxpos.dx)%widthlimit) - { - /* hack */ - int testwidth = (int)nSize - (gs.imageEndY-tempY)*(gs.imageEndX-gs.trxpos.dx)+(tempX-gs.trxpos.dx); - if ((testwidth <= widthlimit) && (testwidth >= -widthlimit)) - { - /* don't transfer */ - /*DEBUG_LOG("bad texture %s: %d %d %d\n", #psm, gs.trxpos.dx, gs.imageEndX, nQWordSize);*/ - gs.imageTransfer = -1; - } - bCanAlign = false; - } - - /* first align on block boundary */ - if ( MOD_POW2(tempY, blockheight) || !bCanAlign ) - { - - if ( !bCanAlign ) - endY = gs.imageEndY; /* transfer the whole image */ - else - assert( endY < gs.imageEndY); /* part of alignment condition */ - - if (((gs.imageEndX-gs.trxpos.dx)%widthlimit) || ((gs.imageEndX-tempX)%widthlimit)) - { - /* transmit with a width of 1 */ - if (!TransmitHostLocalY_4(wp, (1+(DSTPSM == 0x14)), endY, pbuf)) goto End; - } - else - { - if (!TransmitHostLocalY_4(wp, widthlimit, endY, pbuf)) goto End; - } - - if( nSize == 0 || tempY == gs.imageEndY ) goto End; - } - - assert( MOD_POW2(tempY, blockheight) == 0 && tempX == gs.trxpos.dx); - - /* can align! */ - pitch = gs.imageEndX-gs.trxpos.dx; - area = pitch*blockheight; - fracX = gs.imageEndX-alignedX; - - /* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */ - bAligned = !((uptr)pbuf & 0xf) && (TransPitch(pitch, transfersize)&0xf) == 0; - - /* transfer aligning to blocks */ - for(; tempY < alignedY && nSize >= area; tempY += blockheight, nSize -= area) - { - if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) ) - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock4HH(pstart + getPixelAddress_0(4HH,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - else - { - for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) - { - SwizzleBlock4HHu(pstart + getPixelAddress_0(4HH,tempj, tempY, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); - } - } - - /* transfer the rest */ - if ( alignedX < gs.imageEndX ) - { - if (!TransmitHostLocalX_4(wp, widthlimit, blockheight, alignedX, pbuf)) goto End; - pbuf -= TransPitch((alignedX-gs.trxpos.dx), transfersize)/TSize; - } - else - { - pbuf += (blockheight-1)*TransPitch(pitch, transfersize)/TSize; - } - tempX = gs.trxpos.dx; - } - - if (TransPitch(nSize, transfersize)/4 > 0 ) - { - if (!TransmitHostLocalY_4(wp, widthlimit, gs.imageEndY, pbuf)) goto End; - /* sometimes wrong sizes are sent (tekken tag) */ - assert( gs.imageTransfer == -1 || TransPitch(nSize, transfersize)/4 <= 2 ); - } - - End: - if( tempY >= gs.imageEndY ) { - assert( gs.imageTransfer == -1 || tempY == gs.imageEndY ); - gs.imageTransfer = -1; - /*int start, end; - ZeroGS::GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw); - ZeroGS::g_MemTargs.ClearRange(start, end);*/ - } - else { - /* update new params */ - gs.imageY = tempY; - gs.imageX = tempX; - } - return (nSize * TransPitch(2, transfersize) + nLeftOver)/2; + TransferData data(8,32,8,8,4); + TransferFuncts fun(writePixel4HH_0, getPixelAddress4HH_0, SwizzleBlock4HH, SwizzleBlock4HHu); + + return RealTransfer_4(data, fun, pbyMem, nQWordSize); } + #else DEFINE_TRANSFERLOCAL(32, 32, u32, 2, 32, 8, 8, _, SwizzleBlock32); // 32/8/4 = 1 diff --git a/plugins/zzogl-pg/opengl/Mem.h b/plugins/zzogl-pg/opengl/Mem.h index d29fa93ca0..0780cd8540 100644 --- a/plugins/zzogl-pg/opengl/Mem.h +++ b/plugins/zzogl-pg/opengl/Mem.h @@ -43,6 +43,40 @@ typedef u32 (*_readPixel_0)(const void* pmem, int x, int y, u32 bw); typedef int (*_TransferHostLocal)(const void* pbyMem, u32 nQWordSize); typedef void (*_TransferLocalHost)(void* pbyMem, u32 nQWordSize); typedef void (__fastcall *_SwizzleBlock)(u8 *dst, u8 *src, int pitch, u32 WriteMask); + +// Both of the following structs should probably be local class variables or in a namespace, +// but this works for the moment. +struct TransferData +{ + u32 widthlimit; + u32 blockbits; + u32 blockwidth; + u32 blockheight; + u32 transfersize; + __forceinline TransferData(u32 limit, u32 bits, u32 width, u32 height, u32 size) + { + widthlimit = limit; + blockbits = bits; + blockwidth = width; + blockheight = height; + transfersize = size; + } +}; + +struct TransferFuncts +{ + _writePixel_0 wp; + _getPixelAddress_0 gp; + _SwizzleBlock Swizzle, Swizzle_u; + __forceinline TransferFuncts(_writePixel_0 writePix, _getPixelAddress_0 readPix, _SwizzleBlock s, _SwizzleBlock su) + { + wp = writePix; + gp = readPix; + Swizzle = s; + Swizzle_u = su; + } +}; + // rest not visible externally struct BLOCK { diff --git a/plugins/zzogl-pg/opengl/Mem_Swizzle.h b/plugins/zzogl-pg/opengl/Mem_Swizzle.h index b40d6673ed..3dd648cd0a 100644 --- a/plugins/zzogl-pg/opengl/Mem_Swizzle.h +++ b/plugins/zzogl-pg/opengl/Mem_Swizzle.h @@ -24,7 +24,7 @@ // special swizzle macros - which I converted to functions. -static __forceinline void SwizzleBlock24(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +static __forceinline __fastcall void SwizzleBlock24(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) { u8* pnewsrc = src; u32* pblock = tempblock; @@ -52,7 +52,7 @@ static __forceinline void SwizzleBlock24(u8 *dst, u8 *src, int pitch, u32 WriteM #define SwizzleBlock24u SwizzleBlock24 -static __forceinline void SwizzleBlock8H(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +static __forceinline __fastcall void SwizzleBlock8H(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) { u8* pnewsrc = src; u32* pblock = tempblock; @@ -75,7 +75,7 @@ static __forceinline void SwizzleBlock8H(u8 *dst, u8 *src, int pitch, u32 WriteM #define SwizzleBlock8Hu SwizzleBlock8H -static __forceinline void SwizzleBlock4HH(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +static __forceinline __fastcall void SwizzleBlock4HH(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) { u8* pnewsrc = src; u32* pblock = tempblock; @@ -97,7 +97,7 @@ static __forceinline void SwizzleBlock4HH(u8 *dst, u8 *src, int pitch, u32 Write #define SwizzleBlock4HHu SwizzleBlock4HH -static __forceinline void SwizzleBlock4HL(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +static __forceinline __fastcall void SwizzleBlock4HL(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) { u8* pnewsrc = src; u32* pblock = tempblock; diff --git a/plugins/zzogl-pg/opengl/x86.cpp b/plugins/zzogl-pg/opengl/x86.cpp index 17ecda32f1..fd21b0734a 100644 --- a/plugins/zzogl-pg/opengl/x86.cpp +++ b/plugins/zzogl-pg/opengl/x86.cpp @@ -67,7 +67,7 @@ void __fastcall SwizzleBlock24_c(u8* dst, u8* src, int srcpitch, u32 WriteMask) } } -void __fastcall SwizzleBlock16_c(u8* dst, u8* src, int srcpitch) +void __fastcall SwizzleBlock16_c(u8* dst, u8* src, int srcpitch, u32 WriteMask) { u32* d = &g_columnTable16[0][0]; @@ -76,7 +76,7 @@ void __fastcall SwizzleBlock16_c(u8* dst, u8* src, int srcpitch) ((u16*)dst)[d[i]] = ((u16*)src)[i]; } -void __fastcall SwizzleBlock8_c(u8* dst, u8* src, int srcpitch) +void __fastcall SwizzleBlock8_c(u8* dst, u8* src, int srcpitch, u32 WriteMask) { u32* d = &g_columnTable8[0][0]; @@ -85,7 +85,7 @@ void __fastcall SwizzleBlock8_c(u8* dst, u8* src, int srcpitch) dst[d[i]] = src[i]; } -void __fastcall SwizzleBlock4_c(u8* dst, u8* src, int srcpitch) +void __fastcall SwizzleBlock4_c(u8* dst, u8* src, int srcpitch, u32 WriteMask) { u32* d = &g_columnTable4[0][0]; diff --git a/plugins/zzogl-pg/opengl/x86.h b/plugins/zzogl-pg/opengl/x86.h index e78fa47996..075eb491a5 100644 --- a/plugins/zzogl-pg/opengl/x86.h +++ b/plugins/zzogl-pg/opengl/x86.h @@ -25,13 +25,13 @@ #include "GS.h" extern "C" void __fastcall SwizzleBlock32_sse2(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); -extern "C" void __fastcall SwizzleBlock16_sse2(u8* dst, u8* src, int srcpitch); -extern "C" void __fastcall SwizzleBlock8_sse2(u8* dst, u8* src, int srcpitch); -extern "C" void __fastcall SwizzleBlock4_sse2(u8* dst, u8* src, int srcpitch); +extern "C" void __fastcall SwizzleBlock16_sse2(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); +extern "C" void __fastcall SwizzleBlock8_sse2(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); +extern "C" void __fastcall SwizzleBlock4_sse2(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); extern "C" void __fastcall SwizzleBlock32u_sse2(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); -extern "C" void __fastcall SwizzleBlock16u_sse2(u8* dst, u8* src, int srcpitch); -extern "C" void __fastcall SwizzleBlock8u_sse2(u8* dst, u8* src, int srcpitch); -extern "C" void __fastcall SwizzleBlock4u_sse2(u8* dst, u8* src, int srcpitch); +extern "C" void __fastcall SwizzleBlock16u_sse2(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); +extern "C" void __fastcall SwizzleBlock8u_sse2(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); +extern "C" void __fastcall SwizzleBlock4u_sse2(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); // frame swizzling @@ -60,9 +60,9 @@ extern "C" void __fastcall Frame16SwizzleBlock16A4_sse2(u16* dst, Vector_16F* sr extern "C" void __fastcall Frame16SwizzleBlock16ZA4_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); extern void __fastcall SwizzleBlock32_c(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); -extern void __fastcall SwizzleBlock16_c(u8* dst, u8* src, int srcpitch); -extern void __fastcall SwizzleBlock8_c(u8* dst, u8* src, int srcpitch); -extern void __fastcall SwizzleBlock4_c(u8* dst, u8* src, int srcpitch); +extern void __fastcall SwizzleBlock16_c(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); +extern void __fastcall SwizzleBlock8_c(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); +extern void __fastcall SwizzleBlock4_c(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); // no AA extern void __fastcall FrameSwizzleBlock32_c(u32* dst, u32* src, int srcpitch, u32 WriteMask); @@ -92,9 +92,9 @@ extern void __fastcall Frame16SwizzleBlock16A4_c(u16* dst, Vector_16F* src, int extern void __fastcall Frame16SwizzleBlock16ZA4_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); extern void __fastcall SwizzleColumn32_c(int y, u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); -extern void __fastcall SwizzleColumn16_c(int y, u8* dst, u8* src, int srcpitch); -extern void __fastcall SwizzleColumn8_c(int y, u8* dst, u8* src, int srcpitch); -extern void __fastcall SwizzleColumn4_c(int y, u8* dst, u8* src, int srcpitch); +extern void __fastcall SwizzleColumn16_c(int y, u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); +extern void __fastcall SwizzleColumn8_c(int y, u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); +extern void __fastcall SwizzleColumn4_c(int y, u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); extern "C" void __fastcall WriteCLUT_T16_I8_CSM1_sse2(u32* vm, u32* clut); extern "C" void __fastcall WriteCLUT_T32_I8_CSM1_sse2(u32* vm, u32* clut);