diff --git a/plugins/zzogl-pg/opengl/Mem.cpp b/plugins/zzogl-pg/opengl/Mem.cpp index 1f0fd71717..5a9adc0f15 100644 --- a/plugins/zzogl-pg/opengl/Mem.cpp +++ b/plugins/zzogl-pg/opengl/Mem.cpp @@ -92,10 +92,12 @@ int TransferHostLocal##psm(const void* pbyMem, u32 nQWordSize) \ \ if( ((gs.imageEndX-gs.trxpos.dx)%widthlimit) || ((gs.imageEndX-tempX)%widthlimit) ) { \ /* transmit with a width of 1 */ \ - if (!TransmitHostLocalY##TransSfx(wp, (1 + (DSTPSM == 0x14)), endY, pbuf)) goto End; \ + pbuf = TransmitHostLocalY##TransSfx(wp, (1 + (DSTPSM == 0x14)), endY, pbuf);\ + if (pbuf == NULL) goto End; \ } \ else { \ - if (!TransmitHostLocalY##TransSfx(wp, widthlimit, endY, pbuf)) goto End; \ + pbuf = TransmitHostLocalY##TransSfx(wp, widthlimit, endY, pbuf);\ + if (pbuf == NULL) goto End; \ } \ \ if( nSize == 0 || tempY == gs.imageEndY ) \ @@ -132,7 +134,8 @@ int TransferHostLocal##psm(const void* pbyMem, u32 nQWordSize) \ \ /* transfer the rest */ \ if( alignedX < gs.imageEndX ) { \ - if (!TransmitHostLocalX##TransSfx(wp, widthlimit, blockheight, alignedX, pbuf)) goto End; \ + pbuf = TransmitHostLocalX##TransSfx(wp, widthlimit, blockheight, alignedX, pbuf);\ + if (pbuf == NULL) goto End; \ pbuf -= TransPitch(alignedX-gs.trxpos.dx, transfersize)/TSize; \ } \ else pbuf += (blockheight-1)* TransPitch(pitch, transfersize)/TSize; \ @@ -140,7 +143,8 @@ int TransferHostLocal##psm(const void* pbyMem, u32 nQWordSize) \ } \ \ if( TransPitch(nSize, transfersize)/4 > 0 ) { \ - if (!TransmitHostLocalY##TransSfx(wp, widthlimit, gs.imageEndY, pbuf)) goto End; \ + pbuf = TransmitHostLocalY##TransSfx(wp, widthlimit, gs.imageEndY, pbuf);\ + if (pbuf == NULL) goto End; \ /* sometimes wrong sizes are sent (tekken tag) */ \ assert( gs.imageTransfer == -1 || TransPitch(nSize, transfersize)/4 <= 2 ); \ } \ @@ -166,7 +170,7 @@ End: \ // Get ready for the same function 3 times. *sigh* template -static __forceinline bool AlignOnBlockBoundry_(TransferData data, TransferFuncts fun, Point alignedPt, int& endY, const T* pbuf) +static __forceinline const T* AlignOnBlockBoundry_(TransferData data, TransferFuncts fun, Point alignedPt, int& endY, const T* pbuf) { bool bCanAlign = ((MOD_POW2(gs.trxpos.dx, data.blockwidth) == 0) && (gs.imageX == gs.trxpos.dx) && (alignedPt.y > endY) && (alignedPt.x > gs.trxpos.dx)); @@ -207,16 +211,17 @@ static __forceinline bool AlignOnBlockBoundry_(TransferData data, TransferFuncts } // The only line that's different in these 3 functions. - if (!TransmitHostLocalY_(fun.wp, transwidth, endY, pbuf)) return false; + pbuf = TransmitHostLocalY_(fun.wp, transwidth, endY, pbuf); + if (pbuf == NULL) return NULL; - if( nSize == 0 || tempY == gs.imageEndY ) return false; + if( nSize == 0 || tempY == gs.imageEndY ) return NULL; } - return true; + return pbuf; } template -static __forceinline bool AlignOnBlockBoundry_4(TransferData data, TransferFuncts fun, Point alignedPt, int& endY, const T* pbuf) +static __forceinline const T* AlignOnBlockBoundry_4(TransferData data, TransferFuncts fun, Point alignedPt, int& endY, const T* pbuf) { bool bCanAlign = ((MOD_POW2(gs.trxpos.dx, data.blockwidth) == 0) && (gs.imageX == gs.trxpos.dx) && (alignedPt.y > endY) && (alignedPt.x > gs.trxpos.dx)); @@ -257,16 +262,17 @@ static __forceinline bool AlignOnBlockBoundry_4(TransferData data, TransferFunct } // The only line that's different in these 3 functions. - if (!TransmitHostLocalY_4(fun.wp, transwidth, endY, pbuf)) return false; + pbuf = TransmitHostLocalY_4(fun.wp, transwidth, endY, pbuf); + if (pbuf == NULL) return NULL; - if( nSize == 0 || tempY == gs.imageEndY ) return false; + if( nSize == 0 || tempY == gs.imageEndY ) return NULL; } - return true; + return pbuf; } template -static __forceinline bool AlignOnBlockBoundry_24(TransferData data, TransferFuncts fun, Point alignedPt, int& endY, const T* pbuf) +static __forceinline const T* AlignOnBlockBoundry_24(TransferData data, TransferFuncts fun, Point alignedPt, int& endY, const T* pbuf) { bool bCanAlign = ((MOD_POW2(gs.trxpos.dx, data.blockwidth) == 0) && (gs.imageX == gs.trxpos.dx) && (alignedPt.y > endY) && (alignedPt.x > gs.trxpos.dx)); @@ -307,16 +313,17 @@ static __forceinline bool AlignOnBlockBoundry_24(TransferData data, TransferFunc } // The only line that's different in these 3 functions. - if (!TransmitHostLocalY_24(fun.wp, transwidth, endY, pbuf)) return false; + pbuf = TransmitHostLocalY_24(fun.wp, transwidth, endY, pbuf); + if (pbuf == NULL) return NULL; - if( nSize == 0 || tempY == gs.imageEndY ) return false; + if( nSize == 0 || tempY == gs.imageEndY ) return NULL; } - return true; + return pbuf; } // Here we go again. 3 nearly identical functions. template -static __forceinline bool TransferAligningToBlocks_(TransferData data, TransferFuncts fun, Point alignedPt, const T* pbuf) +static __forceinline const T* TransferAligningToBlocks_(TransferData data, TransferFuncts fun, Point alignedPt, const T* pbuf) { bool bAligned; const u32 TSize = sizeof(T); @@ -348,7 +355,8 @@ static __forceinline bool TransferAligningToBlocks_(TransferData data, TransferF if( alignedPt.x < gs.imageEndX ) { // The only line that's different in these 3 functions. - if (!TransmitHostLocalX_(fun.wp, data.widthlimit, data.blockheight, alignedPt.x, pbuf)) return false; + pbuf = TransmitHostLocalX_(fun.wp, data.widthlimit, data.blockheight, alignedPt.x, pbuf); + if (pbuf == NULL) return NULL; pbuf -= TransPitch((alignedPt.x - gs.trxpos.dx), data.transfersize)/TSize; } else @@ -358,11 +366,11 @@ static __forceinline bool TransferAligningToBlocks_(TransferData data, TransferF tempX = gs.trxpos.dx; } - return true; + return pbuf; } template -static __forceinline bool TransferAligningToBlocks_4(TransferData data, TransferFuncts fun, Point alignedPt, const T* pbuf) +static __forceinline const T* TransferAligningToBlocks_4(TransferData data, TransferFuncts fun, Point alignedPt, const T* pbuf) { bool bAligned; const u32 TSize = sizeof(T); @@ -394,7 +402,8 @@ static __forceinline bool TransferAligningToBlocks_4(TransferData data, Transfer if( alignedPt.x < gs.imageEndX ) { // The only line that's different in these 3 functions. - if (!TransmitHostLocalX_4(fun.wp, data.widthlimit, data.blockheight, alignedPt.x, pbuf)) return false; + pbuf = TransmitHostLocalX_4(fun.wp, data.widthlimit, data.blockheight, alignedPt.x, pbuf); + if (pbuf == NULL) return NULL; pbuf -= TransPitch((alignedPt.x - gs.trxpos.dx), data.transfersize)/TSize; } else @@ -404,11 +413,11 @@ static __forceinline bool TransferAligningToBlocks_4(TransferData data, Transfer tempX = gs.trxpos.dx; } - return true; + return pbuf; } template -static __forceinline bool TransferAligningToBlocks_24(TransferData data, TransferFuncts fun, Point alignedPt, const T* pbuf) +static __forceinline const T* TransferAligningToBlocks_24(TransferData data, TransferFuncts fun, Point alignedPt, const T* pbuf) { bool bAligned; const u32 TSize = sizeof(T); @@ -440,7 +449,8 @@ static __forceinline bool TransferAligningToBlocks_24(TransferData data, Transfe if( alignedPt.x < gs.imageEndX ) { // The only line that's different in these 3 functions. - if (!TransmitHostLocalX_24(fun.wp, data.widthlimit, data.blockheight, alignedPt.x, pbuf)) return false; + pbuf = TransmitHostLocalX_24(fun.wp, data.widthlimit, data.blockheight, alignedPt.x, pbuf); + if (pbuf == NULL) return NULL; pbuf -= TransPitch((alignedPt.x - gs.trxpos.dx), data.transfersize)/TSize; } else @@ -450,7 +460,7 @@ static __forceinline bool TransferAligningToBlocks_24(TransferData data, Transfe tempX = gs.trxpos.dx; } - return true; + return pbuf; } // Only one of this function, since no TransmitHostLocalX_ or TransmitHostLocalY_'s were involved. @@ -495,9 +505,11 @@ static __forceinline int RealTransfer_(TransferData data, TransferFuncts fun, co alignedPt.y = ROUND_DOWNPOW2(gs.imageEndY, data.blockheight); alignedPt.x = ROUND_DOWNPOW2(gs.imageEndX, data.blockwidth); - if (!AlignOnBlockBoundry_(data, fun, alignedPt, endY, pbuf)) return FinishTransfer(data, nLeftOver); + pbuf = AlignOnBlockBoundry_(data, fun, alignedPt, endY, pbuf); + if (pbuf == NULL) return FinishTransfer(data, nLeftOver); - if (!TransferAligningToBlocks_(data, fun, alignedPt, pbuf)) return FinishTransfer(data, nLeftOver); + pbuf = TransferAligningToBlocks_(data, fun, alignedPt, pbuf); + if (pbuf == NULL) return FinishTransfer(data, nLeftOver); if (TransPitch(nSize, data.transfersize)/4 > 0) { @@ -529,9 +541,11 @@ static __forceinline int RealTransfer_4(TransferData data, TransferFuncts fun, c alignedPt.y = ROUND_DOWNPOW2(gs.imageEndY, data.blockheight); alignedPt.x = ROUND_DOWNPOW2(gs.imageEndX, data.blockwidth); - if (!AlignOnBlockBoundry_4(data, fun, alignedPt, endY, pbuf)) return FinishTransfer(data, nLeftOver); + pbuf = AlignOnBlockBoundry_4(data, fun, alignedPt, endY, pbuf); + if (pbuf == NULL) return FinishTransfer(data, nLeftOver); - if (!TransferAligningToBlocks_4(data, fun, alignedPt, pbuf)) return FinishTransfer(data, nLeftOver); + pbuf = TransferAligningToBlocks_4(data, fun, alignedPt, pbuf); + if (pbuf == NULL) return FinishTransfer(data, nLeftOver); if (TransPitch(nSize, data.transfersize)/4 > 0) { @@ -563,9 +577,11 @@ static __forceinline int RealTransfer_24(TransferData data, TransferFuncts fun, alignedPt.y = ROUND_DOWNPOW2(gs.imageEndY, data.blockheight); alignedPt.x = ROUND_DOWNPOW2(gs.imageEndX, data.blockwidth); - if (!AlignOnBlockBoundry_24(data, fun, alignedPt, endY, pbuf)) return FinishTransfer(data, nLeftOver); + pbuf = AlignOnBlockBoundry_24(data, fun, alignedPt, endY, pbuf); + if (pbuf == NULL) return FinishTransfer(data, nLeftOver); - if (!TransferAligningToBlocks_24(data, fun, alignedPt, pbuf)) return FinishTransfer(data, nLeftOver); + pbuf = TransferAligningToBlocks_24(data, fun, alignedPt, pbuf); + if (pbuf == NULL) return FinishTransfer(data, nLeftOver); if (TransPitch(nSize, data.transfersize)/4 > 0) { diff --git a/plugins/zzogl-pg/opengl/Mem_Transmit.h b/plugins/zzogl-pg/opengl/Mem_Transmit.h index d9f6494fb8..487186efca 100644 --- a/plugins/zzogl-pg/opengl/Mem_Transmit.h +++ b/plugins/zzogl-pg/opengl/Mem_Transmit.h @@ -12,7 +12,7 @@ extern u8* pstart; // transfers whole rows template -static __forceinline bool TransmitHostLocalY_(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *pbuf) +static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf) { assert( (nSize%widthlimit) == 0 && widthlimit <= 4 ); if ((gs.imageEndX-gs.trxpos.dx) % widthlimit) @@ -21,34 +21,34 @@ static __forceinline bool TransmitHostLocalY_(_writePixel_0 wp, u32 widthlimit, for(; tempY < endY; ++tempY) { - for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, pbuf += 1) + for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 1) { /* write as many pixel at one time as possible */ - wp(pstart, tempX%2048, tempY%2048, pbuf[0], gs.dstbuf.bw); + wp(pstart, tempX%2048, tempY%2048, buf[0], gs.dstbuf.bw); } } } for(; tempY < endY; ++tempY) { - for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, pbuf += widthlimit) + for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += widthlimit) { /* write as many pixel at one time as possible */ - if( nSize < widthlimit ) return false; + if( nSize < widthlimit ) return NULL; - wp(pstart, tempX%2048, tempY%2048, pbuf[0], gs.dstbuf.bw); + wp(pstart, tempX%2048, tempY%2048, buf[0], gs.dstbuf.bw); if( widthlimit > 1 ) { - wp(pstart, (tempX+1)%2048, tempY%2048, pbuf[1], gs.dstbuf.bw); + wp(pstart, (tempX+1)%2048, tempY%2048, buf[1], gs.dstbuf.bw); if( widthlimit > 2 ) { - wp(pstart, (tempX+2)%2048, tempY%2048, pbuf[2], gs.dstbuf.bw); + wp(pstart, (tempX+2)%2048, tempY%2048, buf[2], gs.dstbuf.bw); if( widthlimit > 3 ) { - wp(pstart, (tempX+3)%2048, tempY%2048, pbuf[3], gs.dstbuf.bw); + wp(pstart, (tempX+3)%2048, tempY%2048, buf[3], gs.dstbuf.bw); } } } @@ -62,38 +62,24 @@ static __forceinline bool TransmitHostLocalY_(_writePixel_0 wp, u32 widthlimit, else { assert( gs.imageTransfer == -1 || nSize*sizeof(T)/4 == 0 ); - return false; + return NULL; } } - return true; -} - -template -static __forceinline bool TransmitHostLocalX_(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *pbuf) -{ - for(u32 tempi = 0; tempi < blockheight; ++tempi) - { - for(tempX = startX; tempX < gs.imageEndX; tempX++, pbuf++) - { - wp(pstart, tempX%2048, (tempY+tempi)%2048, pbuf[0], gs.dstbuf.bw); - } - pbuf += pitch - fracX; - } - return true; + return buf; } // transfers whole rows template -static __forceinline bool TransmitHostLocalY_24(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *pbuf) +static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf) { if (widthlimit != 8 || ((gs.imageEndX-gs.trxpos.dx)%widthlimit)) { //GS_LOG("Bad Transmission! %d %d, psm: %d\n", gs.trxpos.dx, gs.imageEndX, DSTPSM); for(; tempY < endY; ++tempY) { - for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, pbuf += 3) + for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 3) { - wp(pstart, tempX%2048, tempY%2048, *(u32*)(pbuf), gs.dstbuf.bw); + wp(pstart, tempX%2048, tempY%2048, *(u32*)(buf), gs.dstbuf.bw); } if( tempX >= gs.imageEndX ) @@ -104,7 +90,7 @@ static __forceinline bool TransmitHostLocalY_24(_writePixel_0 wp, u32 widthlimit else { assert( gs.imageTransfer == -1 || nSize == 0 ); - return false; + return NULL; } } } @@ -113,20 +99,20 @@ static __forceinline bool TransmitHostLocalY_24(_writePixel_0 wp, u32 widthlimit assert( /*(nSize%widthlimit) == 0 &&*/ widthlimit == 8 ); for(; tempY < endY; ++tempY) { - for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, pbuf += 3*widthlimit) + for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += 3*widthlimit) { - if (nSize < widthlimit) return false; + if (nSize < widthlimit) return NULL; /* write as many pixel at one time as possible */ - wp(pstart, tempX%2048, tempY%2048, *(u32*)(pbuf+0), gs.dstbuf.bw); - wp(pstart, (tempX+1)%2048, tempY%2048, *(u32*)(pbuf+3), gs.dstbuf.bw); - wp(pstart, (tempX+2)%2048, tempY%2048, *(u32*)(pbuf+6), gs.dstbuf.bw); - wp(pstart, (tempX+3)%2048, tempY%2048, *(u32*)(pbuf+9), gs.dstbuf.bw); - wp(pstart, (tempX+4)%2048, tempY%2048, *(u32*)(pbuf+12), gs.dstbuf.bw); - wp(pstart, (tempX+5)%2048, tempY%2048, *(u32*)(pbuf+15), gs.dstbuf.bw); - wp(pstart, (tempX+6)%2048, tempY%2048, *(u32*)(pbuf+18), gs.dstbuf.bw); - wp(pstart, (tempX+7)%2048, tempY%2048, *(u32*)(pbuf+21), gs.dstbuf.bw); + wp(pstart, tempX%2048, tempY%2048, *(u32*)(buf+0), gs.dstbuf.bw); + wp(pstart, (tempX+1)%2048, tempY%2048, *(u32*)(buf+3), gs.dstbuf.bw); + wp(pstart, (tempX+2)%2048, tempY%2048, *(u32*)(buf+6), gs.dstbuf.bw); + wp(pstart, (tempX+3)%2048, tempY%2048, *(u32*)(buf+9), gs.dstbuf.bw); + wp(pstart, (tempX+4)%2048, tempY%2048, *(u32*)(buf+12), gs.dstbuf.bw); + wp(pstart, (tempX+5)%2048, tempY%2048, *(u32*)(buf+15), gs.dstbuf.bw); + wp(pstart, (tempX+6)%2048, tempY%2048, *(u32*)(buf+18), gs.dstbuf.bw); + wp(pstart, (tempX+7)%2048, tempY%2048, *(u32*)(buf+21), gs.dstbuf.bw); } if (tempX >= gs.imageEndX) @@ -144,57 +130,42 @@ static __forceinline bool TransmitHostLocalY_24(_writePixel_0 wp, u32 widthlimit nSize = 0; } assert( gs.imageTransfer == -1 || nSize == 0 ); - return false; + return NULL; } } } - return true; + return buf; } -// transmit until endX, don't check size since it has already been prevalidated -template -static __forceinline bool TransmitHostLocalX_24(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *pbuf) -{ - for(u32 tempi = 0; tempi < blockheight; ++tempi) - { - for(tempX = startX; tempX < gs.imageEndX; tempX++, pbuf += 3) - { - wp(pstart, tempX%2048, (tempY+tempi)%2048, *(u32*)pbuf, gs.dstbuf.bw); - } - pbuf += 3*(pitch-fracX); - } - return true; -} - // meant for 4bit transfers template -static __forceinline bool TransmitHostLocalY_4(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *pbuf) +static __forceinline const T *TransmitHostLocalY_4(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf) { for(; tempY < endY; ++tempY) { for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit) { /* write as many pixel at one time as possible */ - wp(pstart, tempX%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw); - wp(pstart, (tempX+1)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw); - pbuf++; + wp(pstart, tempX%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw); + wp(pstart, (tempX+1)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw); + buf++; if ( widthlimit > 2 ) { - wp(pstart, (tempX+2)%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw); - wp(pstart, (tempX+3)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw); - pbuf++; + wp(pstart, (tempX+2)%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw); + wp(pstart, (tempX+3)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw); + buf++; if( widthlimit > 4 ) { - wp(pstart, (tempX+4)%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw); - wp(pstart, (tempX+5)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw); - pbuf++; + wp(pstart, (tempX+4)%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw); + wp(pstart, (tempX+5)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw); + buf++; if( widthlimit > 6 ) { - wp(pstart, (tempX+6)%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw); - wp(pstart, (tempX+7)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw); - pbuf++; + wp(pstart, (tempX+6)%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw); + wp(pstart, (tempX+7)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw); + buf++; } } } @@ -207,26 +178,55 @@ static __forceinline bool TransmitHostLocalY_4(_writePixel_0 wp, u32 widthlimit, else { assert( gs.imageTransfer == -1 || (nSize/32) == 0 ); - return false; + return NULL; } } - return true; + return buf; +} + +template +static __forceinline const T *TransmitHostLocalX_(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf) +{ + for(u32 tempi = 0; tempi < blockheight; ++tempi) + { + for(tempX = startX; tempX < gs.imageEndX; tempX++, buf++) + { + wp(pstart, tempX%2048, (tempY+tempi)%2048, buf[0], gs.dstbuf.bw); + } + buf += pitch - fracX; + } + return buf; } // transmit until endX, don't check size since it has already been prevalidated template -static __forceinline bool TransmitHostLocalX_4(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *pbuf) +static __forceinline const T *TransmitHostLocalX_24(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf) +{ + for(u32 tempi = 0; tempi < blockheight; ++tempi) + { + for(tempX = startX; tempX < gs.imageEndX; tempX++, buf += 3) + { + wp(pstart, tempX%2048, (tempY+tempi)%2048, *(u32*)buf, gs.dstbuf.bw); + } + buf += 3*(pitch-fracX); + } + return buf; +} + +// transmit until endX, don't check size since it has already been prevalidated +template +static __forceinline const T *TransmitHostLocalX_4(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf) { for(u32 tempi = 0; tempi < blockheight; ++tempi) { - for(tempX = startX; tempX < gs.imageEndX; tempX+=2, pbuf++) + for(tempX = startX; tempX < gs.imageEndX; tempX+=2, buf++) { - wp(pstart, tempX%2048, (tempY+tempi)%2048, pbuf[0]&0x0f, gs.dstbuf.bw); - wp(pstart, (tempX+1)%2048, (tempY+tempi)%2048, pbuf[0]>>4, gs.dstbuf.bw); + wp(pstart, tempX%2048, (tempY+tempi)%2048, buf[0]&0x0f, gs.dstbuf.bw); + wp(pstart, (tempX+1)%2048, (tempY+tempi)%2048, buf[0]>>4, gs.dstbuf.bw); } - pbuf += (pitch-fracX)/2; + buf += (pitch-fracX)/2; } - return true; + return buf; } // calculate pitch in source buffer diff --git a/plugins/zzogl-pg/opengl/targets.cpp b/plugins/zzogl-pg/opengl/targets.cpp index b4f738c49e..8bd5ac42a4 100644 --- a/plugins/zzogl-pg/opengl/targets.cpp +++ b/plugins/zzogl-pg/opengl/targets.cpp @@ -2835,6 +2835,64 @@ void InitTransferLocalHost() ResolveInRange(start, end); } +template +void TransferLocalHost(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart, _readPixel_0 rp) +{ + int i = x, j = y; + T* pbuf = (T*)pbyMem; + u32 nSize = nQWordSize*16/sizeof(T); + + for(; i < gs.imageEndY; ++i) + { + for(; j < gs.imageEndX && nSize > 0; ++j, --nSize) + { + *pbuf++ = rp(pstart, j%2048, i%2048, gs.srcbuf.bw); + } + + if( j >= gs.imageEndX ) + { + assert( j == gs.imageEndX); + j = gs.trxpos.sx; + } + else + { + assert( nSize == 0 ); + break; + } + } + +} + +void TransferLocalHost_24(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart, _readPixel_0 rp) +{ + int i = x, j = y; + u8* pbuf = (u8*)pbyMem; + u32 nSize = nQWordSize*16/3; + + for(; i < gs.imageEndY; ++i) + { + for(; j < gs.imageEndX && nSize > 0; ++j, --nSize) + { + u32 p = rp(pstart, j%2048, i%2048, gs.srcbuf.bw); + pbuf[0] = (u8)p; + pbuf[1] = (u8)(p>>8); + pbuf[2] = (u8)(p>>16); + pbuf += 3; + } + + if( j >= gs.imageEndX ) + { + assert( j == gs.imageEndX); + j = gs.trxpos.sx; + } + else + { + assert( nSize == 0 ); + break; + } + } +} + // left/right, top/down void TransferLocalHost(void* pbyMem, u32 nQWordSize) { @@ -2844,47 +2902,17 @@ void TransferLocalHost(void* pbyMem, u32 nQWordSize) u8* pstart = g_pbyGSMemory + 256*gs.srcbuf.bp; int i = gs.imageY, j = gs.imageX; -#define TRANSFERLOCALHOST(psm, T) { \ - T* pbuf = (T*)pbyMem; \ - u32 nSize = nQWordSize*16/sizeof(T); \ - for(; i < gs.imageEndY; ++i) { \ - for(; j < gs.imageEndX && nSize > 0; ++j, --nSize) { \ - *pbuf++ = readPixel##psm##_0(pstart, j%2048, i%2048, gs.srcbuf.bw); \ - } \ - \ - if( j >= gs.imageEndX ) { assert( j == gs.imageEndX); j = gs.trxpos.sx; } \ - else { assert( nSize == 0 ); break; } \ - } \ -} \ - -#define TRANSFERLOCALHOST_24(psm) { \ - u8* pbuf = (u8*)pbyMem; \ - u32 nSize = nQWordSize*16/3; \ - for(; i < gs.imageEndY; ++i) { \ - for(; j < gs.imageEndX && nSize > 0; ++j, --nSize) { \ - u32 p = readPixel##psm##_0(pstart, j%2048, i%2048, gs.srcbuf.bw); \ - pbuf[0] = (u8)p; \ - pbuf[1] = (u8)(p>>8); \ - pbuf[2] = (u8)(p>>16); \ - pbuf += 3; \ - } \ - \ - if( j >= gs.imageEndX ) { assert( j == gs.imageEndX); j = gs.trxpos.sx; } \ - else { assert( nSize == 0 ); break; } \ - } \ -} \ - switch (gs.srcbuf.psm) { - case 0x0: TRANSFERLOCALHOST(32, u32); break; - case 0x1: TRANSFERLOCALHOST_24(24); break; - case 0x2: TRANSFERLOCALHOST(16, u16); break; - case 0xA: TRANSFERLOCALHOST(16S, u16); break; - case 0x13: TRANSFERLOCALHOST(8, u8); break; - case 0x1B: TRANSFERLOCALHOST(8H, u8); break; - case 0x30: TRANSFERLOCALHOST(32Z, u32); break; - case 0x31: TRANSFERLOCALHOST_24(24Z); break; - case 0x32: TRANSFERLOCALHOST(16Z, u16); break; - case 0x3A: TRANSFERLOCALHOST(16SZ, u16); break; + case PSMCT32: TransferLocalHost(pbyMem, nQWordSize, i, j, pstart, readPixel32_0); break; + case PSMCT24: TransferLocalHost_24(pbyMem, nQWordSize, i, j, pstart, readPixel24_0); break; + case PSMCT16: TransferLocalHost(pbyMem, nQWordSize, i, j, pstart, readPixel16_0); break; + case PSMCT16S: TransferLocalHost(pbyMem, nQWordSize, i, j, pstart, readPixel16S_0); break; + case PSMT8: TransferLocalHost(pbyMem, nQWordSize, i, j, pstart, readPixel8_0); break; + case PSMT8H: TransferLocalHost(pbyMem, nQWordSize, i, j, pstart, readPixel8H_0); break; + case PSMT32Z: TransferLocalHost(pbyMem, nQWordSize, i, j, pstart, readPixel32Z_0); break; + case PSMT24Z: TransferLocalHost_24(pbyMem, nQWordSize, i, j, pstart, readPixel24Z_0); break; + case PSMT16Z: TransferLocalHost(pbyMem, nQWordSize, i, j, pstart, readPixel16Z_0); break; + case PSMT16SZ: TransferLocalHost(pbyMem, nQWordSize, i, j, pstart, readPixel16SZ_0); break; default: assert(0); }