mirror of https://github.com/PCSX2/pcsx2.git
zzogl-pg: Fix an issue I noticed in the new Mem Transfer code.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2820 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
9c398f3d91
commit
6b37b43e96
|
@ -92,10 +92,12 @@ int TransferHostLocal##psm(const void* pbyMem, u32 nQWordSize) \
|
|||
\
|
||||
if( ((gs.imageEndX-gs.trxpos.dx)%widthlimit) || ((gs.imageEndX-tempX)%widthlimit) ) { \
|
||||
/* transmit with a width of 1 */ \
|
||||
if (!TransmitHostLocalY##TransSfx<T>(wp, (1 + (DSTPSM == 0x14)), endY, pbuf)) goto End; \
|
||||
pbuf = TransmitHostLocalY##TransSfx<T>(wp, (1 + (DSTPSM == 0x14)), endY, pbuf);\
|
||||
if (pbuf == NULL) goto End; \
|
||||
} \
|
||||
else { \
|
||||
if (!TransmitHostLocalY##TransSfx<T>(wp, widthlimit, endY, pbuf)) goto End; \
|
||||
pbuf = TransmitHostLocalY##TransSfx<T>(wp, widthlimit, endY, pbuf);\
|
||||
if (pbuf == NULL) goto End; \
|
||||
} \
|
||||
\
|
||||
if( nSize == 0 || tempY == gs.imageEndY ) \
|
||||
|
@ -132,7 +134,8 @@ int TransferHostLocal##psm(const void* pbyMem, u32 nQWordSize) \
|
|||
\
|
||||
/* transfer the rest */ \
|
||||
if( alignedX < gs.imageEndX ) { \
|
||||
if (!TransmitHostLocalX##TransSfx<T>(wp, widthlimit, blockheight, alignedX, pbuf)) goto End; \
|
||||
pbuf = TransmitHostLocalX##TransSfx<T>(wp, widthlimit, blockheight, alignedX, pbuf);\
|
||||
if (pbuf == NULL) goto End; \
|
||||
pbuf -= TransPitch(alignedX-gs.trxpos.dx, transfersize)/TSize; \
|
||||
} \
|
||||
else pbuf += (blockheight-1)* TransPitch(pitch, transfersize)/TSize; \
|
||||
|
@ -140,7 +143,8 @@ int TransferHostLocal##psm(const void* pbyMem, u32 nQWordSize) \
|
|||
} \
|
||||
\
|
||||
if( TransPitch(nSize, transfersize)/4 > 0 ) { \
|
||||
if (!TransmitHostLocalY##TransSfx<T>(wp, widthlimit, gs.imageEndY, pbuf)) goto End; \
|
||||
pbuf = TransmitHostLocalY##TransSfx<T>(wp, widthlimit, gs.imageEndY, pbuf);\
|
||||
if (pbuf == NULL) goto End; \
|
||||
/* sometimes wrong sizes are sent (tekken tag) */ \
|
||||
assert( gs.imageTransfer == -1 || TransPitch(nSize, transfersize)/4 <= 2 ); \
|
||||
} \
|
||||
|
@ -166,7 +170,7 @@ End: \
|
|||
|
||||
// Get ready for the same function 3 times. *sigh*
|
||||
template <class T>
|
||||
static __forceinline bool AlignOnBlockBoundry_(TransferData data, TransferFuncts fun, Point alignedPt, int& endY, const T* pbuf)
|
||||
static __forceinline const T* AlignOnBlockBoundry_(TransferData data, TransferFuncts fun, Point alignedPt, int& endY, const T* pbuf)
|
||||
{
|
||||
bool bCanAlign = ((MOD_POW2(gs.trxpos.dx, data.blockwidth) == 0) && (gs.imageX == gs.trxpos.dx) &&
|
||||
(alignedPt.y > endY) && (alignedPt.x > gs.trxpos.dx));
|
||||
|
@ -207,16 +211,17 @@ static __forceinline bool AlignOnBlockBoundry_(TransferData data, TransferFuncts
|
|||
}
|
||||
|
||||
// The only line that's different in these 3 functions.
|
||||
if (!TransmitHostLocalY_<T>(fun.wp, transwidth, endY, pbuf)) return false;
|
||||
pbuf = TransmitHostLocalY_<T>(fun.wp, transwidth, endY, pbuf);
|
||||
if (pbuf == NULL) return NULL;
|
||||
|
||||
if( nSize == 0 || tempY == gs.imageEndY ) return false;
|
||||
if( nSize == 0 || tempY == gs.imageEndY ) return NULL;
|
||||
}
|
||||
return true;
|
||||
return pbuf;
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
static __forceinline bool AlignOnBlockBoundry_4(TransferData data, TransferFuncts fun, Point alignedPt, int& endY, const T* pbuf)
|
||||
static __forceinline const T* AlignOnBlockBoundry_4(TransferData data, TransferFuncts fun, Point alignedPt, int& endY, const T* pbuf)
|
||||
{
|
||||
bool bCanAlign = ((MOD_POW2(gs.trxpos.dx, data.blockwidth) == 0) && (gs.imageX == gs.trxpos.dx) &&
|
||||
(alignedPt.y > endY) && (alignedPt.x > gs.trxpos.dx));
|
||||
|
@ -257,16 +262,17 @@ static __forceinline bool AlignOnBlockBoundry_4(TransferData data, TransferFunct
|
|||
}
|
||||
|
||||
// The only line that's different in these 3 functions.
|
||||
if (!TransmitHostLocalY_4<T>(fun.wp, transwidth, endY, pbuf)) return false;
|
||||
pbuf = TransmitHostLocalY_4<T>(fun.wp, transwidth, endY, pbuf);
|
||||
if (pbuf == NULL) return NULL;
|
||||
|
||||
if( nSize == 0 || tempY == gs.imageEndY ) return false;
|
||||
if( nSize == 0 || tempY == gs.imageEndY ) return NULL;
|
||||
}
|
||||
return true;
|
||||
return pbuf;
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
static __forceinline bool AlignOnBlockBoundry_24(TransferData data, TransferFuncts fun, Point alignedPt, int& endY, const T* pbuf)
|
||||
static __forceinline const T* AlignOnBlockBoundry_24(TransferData data, TransferFuncts fun, Point alignedPt, int& endY, const T* pbuf)
|
||||
{
|
||||
bool bCanAlign = ((MOD_POW2(gs.trxpos.dx, data.blockwidth) == 0) && (gs.imageX == gs.trxpos.dx) &&
|
||||
(alignedPt.y > endY) && (alignedPt.x > gs.trxpos.dx));
|
||||
|
@ -307,16 +313,17 @@ static __forceinline bool AlignOnBlockBoundry_24(TransferData data, TransferFunc
|
|||
}
|
||||
|
||||
// The only line that's different in these 3 functions.
|
||||
if (!TransmitHostLocalY_24<T>(fun.wp, transwidth, endY, pbuf)) return false;
|
||||
pbuf = TransmitHostLocalY_24<T>(fun.wp, transwidth, endY, pbuf);
|
||||
if (pbuf == NULL) return NULL;
|
||||
|
||||
if( nSize == 0 || tempY == gs.imageEndY ) return false;
|
||||
if( nSize == 0 || tempY == gs.imageEndY ) return NULL;
|
||||
}
|
||||
return true;
|
||||
return pbuf;
|
||||
}
|
||||
|
||||
// Here we go again. 3 nearly identical functions.
|
||||
template <class T>
|
||||
static __forceinline bool TransferAligningToBlocks_(TransferData data, TransferFuncts fun, Point alignedPt, const T* pbuf)
|
||||
static __forceinline const T* TransferAligningToBlocks_(TransferData data, TransferFuncts fun, Point alignedPt, const T* pbuf)
|
||||
{
|
||||
bool bAligned;
|
||||
const u32 TSize = sizeof(T);
|
||||
|
@ -348,7 +355,8 @@ static __forceinline bool TransferAligningToBlocks_(TransferData data, TransferF
|
|||
if( alignedPt.x < gs.imageEndX )
|
||||
{
|
||||
// The only line that's different in these 3 functions.
|
||||
if (!TransmitHostLocalX_<T>(fun.wp, data.widthlimit, data.blockheight, alignedPt.x, pbuf)) return false;
|
||||
pbuf = TransmitHostLocalX_<T>(fun.wp, data.widthlimit, data.blockheight, alignedPt.x, pbuf);
|
||||
if (pbuf == NULL) return NULL;
|
||||
pbuf -= TransPitch((alignedPt.x - gs.trxpos.dx), data.transfersize)/TSize;
|
||||
}
|
||||
else
|
||||
|
@ -358,11 +366,11 @@ static __forceinline bool TransferAligningToBlocks_(TransferData data, TransferF
|
|||
|
||||
tempX = gs.trxpos.dx;
|
||||
}
|
||||
return true;
|
||||
return pbuf;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static __forceinline bool TransferAligningToBlocks_4(TransferData data, TransferFuncts fun, Point alignedPt, const T* pbuf)
|
||||
static __forceinline const T* TransferAligningToBlocks_4(TransferData data, TransferFuncts fun, Point alignedPt, const T* pbuf)
|
||||
{
|
||||
bool bAligned;
|
||||
const u32 TSize = sizeof(T);
|
||||
|
@ -394,7 +402,8 @@ static __forceinline bool TransferAligningToBlocks_4(TransferData data, Transfer
|
|||
if( alignedPt.x < gs.imageEndX )
|
||||
{
|
||||
// The only line that's different in these 3 functions.
|
||||
if (!TransmitHostLocalX_4<T>(fun.wp, data.widthlimit, data.blockheight, alignedPt.x, pbuf)) return false;
|
||||
pbuf = TransmitHostLocalX_4<T>(fun.wp, data.widthlimit, data.blockheight, alignedPt.x, pbuf);
|
||||
if (pbuf == NULL) return NULL;
|
||||
pbuf -= TransPitch((alignedPt.x - gs.trxpos.dx), data.transfersize)/TSize;
|
||||
}
|
||||
else
|
||||
|
@ -404,11 +413,11 @@ static __forceinline bool TransferAligningToBlocks_4(TransferData data, Transfer
|
|||
|
||||
tempX = gs.trxpos.dx;
|
||||
}
|
||||
return true;
|
||||
return pbuf;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static __forceinline bool TransferAligningToBlocks_24(TransferData data, TransferFuncts fun, Point alignedPt, const T* pbuf)
|
||||
static __forceinline const T* TransferAligningToBlocks_24(TransferData data, TransferFuncts fun, Point alignedPt, const T* pbuf)
|
||||
{
|
||||
bool bAligned;
|
||||
const u32 TSize = sizeof(T);
|
||||
|
@ -440,7 +449,8 @@ static __forceinline bool TransferAligningToBlocks_24(TransferData data, Transfe
|
|||
if( alignedPt.x < gs.imageEndX )
|
||||
{
|
||||
// The only line that's different in these 3 functions.
|
||||
if (!TransmitHostLocalX_24<T>(fun.wp, data.widthlimit, data.blockheight, alignedPt.x, pbuf)) return false;
|
||||
pbuf = TransmitHostLocalX_24<T>(fun.wp, data.widthlimit, data.blockheight, alignedPt.x, pbuf);
|
||||
if (pbuf == NULL) return NULL;
|
||||
pbuf -= TransPitch((alignedPt.x - gs.trxpos.dx), data.transfersize)/TSize;
|
||||
}
|
||||
else
|
||||
|
@ -450,7 +460,7 @@ static __forceinline bool TransferAligningToBlocks_24(TransferData data, Transfe
|
|||
|
||||
tempX = gs.trxpos.dx;
|
||||
}
|
||||
return true;
|
||||
return pbuf;
|
||||
}
|
||||
|
||||
// Only one of this function, since no TransmitHostLocalX_ or TransmitHostLocalY_'s were involved.
|
||||
|
@ -495,9 +505,11 @@ static __forceinline int RealTransfer_(TransferData data, TransferFuncts fun, co
|
|||
alignedPt.y = ROUND_DOWNPOW2(gs.imageEndY, data.blockheight);
|
||||
alignedPt.x = ROUND_DOWNPOW2(gs.imageEndX, data.blockwidth);
|
||||
|
||||
if (!AlignOnBlockBoundry_<T>(data, fun, alignedPt, endY, pbuf)) return FinishTransfer(data, nLeftOver);
|
||||
pbuf = AlignOnBlockBoundry_<T>(data, fun, alignedPt, endY, pbuf);
|
||||
if (pbuf == NULL) return FinishTransfer(data, nLeftOver);
|
||||
|
||||
if (!TransferAligningToBlocks_<T>(data, fun, alignedPt, pbuf)) return FinishTransfer(data, nLeftOver);
|
||||
pbuf = TransferAligningToBlocks_<T>(data, fun, alignedPt, pbuf);
|
||||
if (pbuf == NULL) return FinishTransfer(data, nLeftOver);
|
||||
|
||||
if (TransPitch(nSize, data.transfersize)/4 > 0)
|
||||
{
|
||||
|
@ -529,9 +541,11 @@ static __forceinline int RealTransfer_4(TransferData data, TransferFuncts fun, c
|
|||
alignedPt.y = ROUND_DOWNPOW2(gs.imageEndY, data.blockheight);
|
||||
alignedPt.x = ROUND_DOWNPOW2(gs.imageEndX, data.blockwidth);
|
||||
|
||||
if (!AlignOnBlockBoundry_4<T>(data, fun, alignedPt, endY, pbuf)) return FinishTransfer(data, nLeftOver);
|
||||
pbuf = AlignOnBlockBoundry_4<T>(data, fun, alignedPt, endY, pbuf);
|
||||
if (pbuf == NULL) return FinishTransfer(data, nLeftOver);
|
||||
|
||||
if (!TransferAligningToBlocks_4<T>(data, fun, alignedPt, pbuf)) return FinishTransfer(data, nLeftOver);
|
||||
pbuf = TransferAligningToBlocks_4<T>(data, fun, alignedPt, pbuf);
|
||||
if (pbuf == NULL) return FinishTransfer(data, nLeftOver);
|
||||
|
||||
if (TransPitch(nSize, data.transfersize)/4 > 0)
|
||||
{
|
||||
|
@ -563,9 +577,11 @@ static __forceinline int RealTransfer_24(TransferData data, TransferFuncts fun,
|
|||
alignedPt.y = ROUND_DOWNPOW2(gs.imageEndY, data.blockheight);
|
||||
alignedPt.x = ROUND_DOWNPOW2(gs.imageEndX, data.blockwidth);
|
||||
|
||||
if (!AlignOnBlockBoundry_24<T>(data, fun, alignedPt, endY, pbuf)) return FinishTransfer(data, nLeftOver);
|
||||
pbuf = AlignOnBlockBoundry_24<T>(data, fun, alignedPt, endY, pbuf);
|
||||
if (pbuf == NULL) return FinishTransfer(data, nLeftOver);
|
||||
|
||||
if (!TransferAligningToBlocks_24<T>(data, fun, alignedPt, pbuf)) return FinishTransfer(data, nLeftOver);
|
||||
pbuf = TransferAligningToBlocks_24<T>(data, fun, alignedPt, pbuf);
|
||||
if (pbuf == NULL) return FinishTransfer(data, nLeftOver);
|
||||
|
||||
if (TransPitch(nSize, data.transfersize)/4 > 0)
|
||||
{
|
||||
|
|
|
@ -12,7 +12,7 @@ extern u8* pstart;
|
|||
|
||||
// transfers whole rows
|
||||
template <class T>
|
||||
static __forceinline bool TransmitHostLocalY_(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *pbuf)
|
||||
static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf)
|
||||
{
|
||||
assert( (nSize%widthlimit) == 0 && widthlimit <= 4 );
|
||||
if ((gs.imageEndX-gs.trxpos.dx) % widthlimit)
|
||||
|
@ -21,34 +21,34 @@ static __forceinline bool TransmitHostLocalY_(_writePixel_0 wp, u32 widthlimit,
|
|||
|
||||
for(; tempY < endY; ++tempY)
|
||||
{
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, pbuf += 1)
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 1)
|
||||
{
|
||||
/* write as many pixel at one time as possible */
|
||||
wp(pstart, tempX%2048, tempY%2048, pbuf[0], gs.dstbuf.bw);
|
||||
wp(pstart, tempX%2048, tempY%2048, buf[0], gs.dstbuf.bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
for(; tempY < endY; ++tempY)
|
||||
{
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, pbuf += widthlimit)
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += widthlimit)
|
||||
{
|
||||
|
||||
/* write as many pixel at one time as possible */
|
||||
if( nSize < widthlimit ) return false;
|
||||
if( nSize < widthlimit ) return NULL;
|
||||
|
||||
wp(pstart, tempX%2048, tempY%2048, pbuf[0], gs.dstbuf.bw);
|
||||
wp(pstart, tempX%2048, tempY%2048, buf[0], gs.dstbuf.bw);
|
||||
|
||||
if( widthlimit > 1 )
|
||||
{
|
||||
wp(pstart, (tempX+1)%2048, tempY%2048, pbuf[1], gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+1)%2048, tempY%2048, buf[1], gs.dstbuf.bw);
|
||||
|
||||
if( widthlimit > 2 )
|
||||
{
|
||||
wp(pstart, (tempX+2)%2048, tempY%2048, pbuf[2], gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+2)%2048, tempY%2048, buf[2], gs.dstbuf.bw);
|
||||
|
||||
if( widthlimit > 3 )
|
||||
{
|
||||
wp(pstart, (tempX+3)%2048, tempY%2048, pbuf[3], gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+3)%2048, tempY%2048, buf[3], gs.dstbuf.bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -62,38 +62,24 @@ static __forceinline bool TransmitHostLocalY_(_writePixel_0 wp, u32 widthlimit,
|
|||
else
|
||||
{
|
||||
assert( gs.imageTransfer == -1 || nSize*sizeof(T)/4 == 0 );
|
||||
return false;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static __forceinline bool TransmitHostLocalX_(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *pbuf)
|
||||
{
|
||||
for(u32 tempi = 0; tempi < blockheight; ++tempi)
|
||||
{
|
||||
for(tempX = startX; tempX < gs.imageEndX; tempX++, pbuf++)
|
||||
{
|
||||
wp(pstart, tempX%2048, (tempY+tempi)%2048, pbuf[0], gs.dstbuf.bw);
|
||||
}
|
||||
pbuf += pitch - fracX;
|
||||
}
|
||||
return true;
|
||||
return buf;
|
||||
}
|
||||
|
||||
// transfers whole rows
|
||||
template <class T>
|
||||
static __forceinline bool TransmitHostLocalY_24(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *pbuf)
|
||||
static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf)
|
||||
{
|
||||
if (widthlimit != 8 || ((gs.imageEndX-gs.trxpos.dx)%widthlimit))
|
||||
{
|
||||
//GS_LOG("Bad Transmission! %d %d, psm: %d\n", gs.trxpos.dx, gs.imageEndX, DSTPSM);
|
||||
for(; tempY < endY; ++tempY)
|
||||
{
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, pbuf += 3)
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 3)
|
||||
{
|
||||
wp(pstart, tempX%2048, tempY%2048, *(u32*)(pbuf), gs.dstbuf.bw);
|
||||
wp(pstart, tempX%2048, tempY%2048, *(u32*)(buf), gs.dstbuf.bw);
|
||||
}
|
||||
|
||||
if( tempX >= gs.imageEndX )
|
||||
|
@ -104,7 +90,7 @@ static __forceinline bool TransmitHostLocalY_24(_writePixel_0 wp, u32 widthlimit
|
|||
else
|
||||
{
|
||||
assert( gs.imageTransfer == -1 || nSize == 0 );
|
||||
return false;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -113,20 +99,20 @@ static __forceinline bool TransmitHostLocalY_24(_writePixel_0 wp, u32 widthlimit
|
|||
assert( /*(nSize%widthlimit) == 0 &&*/ widthlimit == 8 );
|
||||
for(; tempY < endY; ++tempY)
|
||||
{
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, pbuf += 3*widthlimit)
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += 3*widthlimit)
|
||||
{
|
||||
if (nSize < widthlimit) return false;
|
||||
if (nSize < widthlimit) return NULL;
|
||||
|
||||
/* write as many pixel at one time as possible */
|
||||
|
||||
wp(pstart, tempX%2048, tempY%2048, *(u32*)(pbuf+0), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+1)%2048, tempY%2048, *(u32*)(pbuf+3), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+2)%2048, tempY%2048, *(u32*)(pbuf+6), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+3)%2048, tempY%2048, *(u32*)(pbuf+9), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+4)%2048, tempY%2048, *(u32*)(pbuf+12), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+5)%2048, tempY%2048, *(u32*)(pbuf+15), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+6)%2048, tempY%2048, *(u32*)(pbuf+18), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+7)%2048, tempY%2048, *(u32*)(pbuf+21), gs.dstbuf.bw);
|
||||
wp(pstart, tempX%2048, tempY%2048, *(u32*)(buf+0), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+1)%2048, tempY%2048, *(u32*)(buf+3), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+2)%2048, tempY%2048, *(u32*)(buf+6), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+3)%2048, tempY%2048, *(u32*)(buf+9), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+4)%2048, tempY%2048, *(u32*)(buf+12), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+5)%2048, tempY%2048, *(u32*)(buf+15), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+6)%2048, tempY%2048, *(u32*)(buf+18), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+7)%2048, tempY%2048, *(u32*)(buf+21), gs.dstbuf.bw);
|
||||
}
|
||||
|
||||
if (tempX >= gs.imageEndX)
|
||||
|
@ -144,57 +130,42 @@ static __forceinline bool TransmitHostLocalY_24(_writePixel_0 wp, u32 widthlimit
|
|||
nSize = 0;
|
||||
}
|
||||
assert( gs.imageTransfer == -1 || nSize == 0 );
|
||||
return false;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// transmit until endX, don't check size since it has already been prevalidated
|
||||
template <class T>
|
||||
static __forceinline bool TransmitHostLocalX_24(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *pbuf)
|
||||
{
|
||||
for(u32 tempi = 0; tempi < blockheight; ++tempi)
|
||||
{
|
||||
for(tempX = startX; tempX < gs.imageEndX; tempX++, pbuf += 3)
|
||||
{
|
||||
wp(pstart, tempX%2048, (tempY+tempi)%2048, *(u32*)pbuf, gs.dstbuf.bw);
|
||||
}
|
||||
pbuf += 3*(pitch-fracX);
|
||||
}
|
||||
return true;
|
||||
return buf;
|
||||
}
|
||||
|
||||
// meant for 4bit transfers
|
||||
template <class T>
|
||||
static __forceinline bool TransmitHostLocalY_4(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *pbuf)
|
||||
static __forceinline const T *TransmitHostLocalY_4(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf)
|
||||
{
|
||||
for(; tempY < endY; ++tempY)
|
||||
{
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit)
|
||||
{
|
||||
/* write as many pixel at one time as possible */
|
||||
wp(pstart, tempX%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+1)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw);
|
||||
pbuf++;
|
||||
wp(pstart, tempX%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+1)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw);
|
||||
buf++;
|
||||
if ( widthlimit > 2 )
|
||||
{
|
||||
wp(pstart, (tempX+2)%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+3)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw);
|
||||
pbuf++;
|
||||
wp(pstart, (tempX+2)%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+3)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw);
|
||||
buf++;
|
||||
|
||||
if( widthlimit > 4 )
|
||||
{
|
||||
wp(pstart, (tempX+4)%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+5)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw);
|
||||
pbuf++;
|
||||
wp(pstart, (tempX+4)%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+5)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw);
|
||||
buf++;
|
||||
|
||||
if( widthlimit > 6 )
|
||||
{
|
||||
wp(pstart, (tempX+6)%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+7)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw);
|
||||
pbuf++;
|
||||
wp(pstart, (tempX+6)%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+7)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw);
|
||||
buf++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -207,26 +178,55 @@ static __forceinline bool TransmitHostLocalY_4(_writePixel_0 wp, u32 widthlimit,
|
|||
else
|
||||
{
|
||||
assert( gs.imageTransfer == -1 || (nSize/32) == 0 );
|
||||
return false;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
return buf;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static __forceinline const T *TransmitHostLocalX_(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf)
|
||||
{
|
||||
for(u32 tempi = 0; tempi < blockheight; ++tempi)
|
||||
{
|
||||
for(tempX = startX; tempX < gs.imageEndX; tempX++, buf++)
|
||||
{
|
||||
wp(pstart, tempX%2048, (tempY+tempi)%2048, buf[0], gs.dstbuf.bw);
|
||||
}
|
||||
buf += pitch - fracX;
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
// transmit until endX, don't check size since it has already been prevalidated
|
||||
template <class T>
|
||||
static __forceinline bool TransmitHostLocalX_4(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *pbuf)
|
||||
static __forceinline const T *TransmitHostLocalX_24(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf)
|
||||
{
|
||||
for(u32 tempi = 0; tempi < blockheight; ++tempi)
|
||||
{
|
||||
for(tempX = startX; tempX < gs.imageEndX; tempX+=2, pbuf++)
|
||||
for(tempX = startX; tempX < gs.imageEndX; tempX++, buf += 3)
|
||||
{
|
||||
wp(pstart, tempX%2048, (tempY+tempi)%2048, pbuf[0]&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+1)%2048, (tempY+tempi)%2048, pbuf[0]>>4, gs.dstbuf.bw);
|
||||
wp(pstart, tempX%2048, (tempY+tempi)%2048, *(u32*)buf, gs.dstbuf.bw);
|
||||
}
|
||||
pbuf += (pitch-fracX)/2;
|
||||
buf += 3*(pitch-fracX);
|
||||
}
|
||||
return true;
|
||||
return buf;
|
||||
}
|
||||
|
||||
// transmit until endX, don't check size since it has already been prevalidated
|
||||
template <class T>
|
||||
static __forceinline const T *TransmitHostLocalX_4(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf)
|
||||
{
|
||||
for(u32 tempi = 0; tempi < blockheight; ++tempi)
|
||||
{
|
||||
for(tempX = startX; tempX < gs.imageEndX; tempX+=2, buf++)
|
||||
{
|
||||
wp(pstart, tempX%2048, (tempY+tempi)%2048, buf[0]&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+1)%2048, (tempY+tempi)%2048, buf[0]>>4, gs.dstbuf.bw);
|
||||
}
|
||||
buf += (pitch-fracX)/2;
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
// calculate pitch in source buffer
|
||||
|
|
|
@ -2835,6 +2835,64 @@ void InitTransferLocalHost()
|
|||
ResolveInRange(start, end);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void TransferLocalHost(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart, _readPixel_0 rp)
|
||||
{
|
||||
int i = x, j = y;
|
||||
T* pbuf = (T*)pbyMem;
|
||||
u32 nSize = nQWordSize*16/sizeof(T);
|
||||
|
||||
for(; i < gs.imageEndY; ++i)
|
||||
{
|
||||
for(; j < gs.imageEndX && nSize > 0; ++j, --nSize)
|
||||
{
|
||||
*pbuf++ = rp(pstart, j%2048, i%2048, gs.srcbuf.bw);
|
||||
}
|
||||
|
||||
if( j >= gs.imageEndX )
|
||||
{
|
||||
assert( j == gs.imageEndX);
|
||||
j = gs.trxpos.sx;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert( nSize == 0 );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void TransferLocalHost_24(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart, _readPixel_0 rp)
|
||||
{
|
||||
int i = x, j = y;
|
||||
u8* pbuf = (u8*)pbyMem;
|
||||
u32 nSize = nQWordSize*16/3;
|
||||
|
||||
for(; i < gs.imageEndY; ++i)
|
||||
{
|
||||
for(; j < gs.imageEndX && nSize > 0; ++j, --nSize)
|
||||
{
|
||||
u32 p = rp(pstart, j%2048, i%2048, gs.srcbuf.bw);
|
||||
pbuf[0] = (u8)p;
|
||||
pbuf[1] = (u8)(p>>8);
|
||||
pbuf[2] = (u8)(p>>16);
|
||||
pbuf += 3;
|
||||
}
|
||||
|
||||
if( j >= gs.imageEndX )
|
||||
{
|
||||
assert( j == gs.imageEndX);
|
||||
j = gs.trxpos.sx;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert( nSize == 0 );
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// left/right, top/down
|
||||
void TransferLocalHost(void* pbyMem, u32 nQWordSize)
|
||||
{
|
||||
|
@ -2844,47 +2902,17 @@ void TransferLocalHost(void* pbyMem, u32 nQWordSize)
|
|||
u8* pstart = g_pbyGSMemory + 256*gs.srcbuf.bp;
|
||||
int i = gs.imageY, j = gs.imageX;
|
||||
|
||||
#define TRANSFERLOCALHOST(psm, T) { \
|
||||
T* pbuf = (T*)pbyMem; \
|
||||
u32 nSize = nQWordSize*16/sizeof(T); \
|
||||
for(; i < gs.imageEndY; ++i) { \
|
||||
for(; j < gs.imageEndX && nSize > 0; ++j, --nSize) { \
|
||||
*pbuf++ = readPixel##psm##_0(pstart, j%2048, i%2048, gs.srcbuf.bw); \
|
||||
} \
|
||||
\
|
||||
if( j >= gs.imageEndX ) { assert( j == gs.imageEndX); j = gs.trxpos.sx; } \
|
||||
else { assert( nSize == 0 ); break; } \
|
||||
} \
|
||||
} \
|
||||
|
||||
#define TRANSFERLOCALHOST_24(psm) { \
|
||||
u8* pbuf = (u8*)pbyMem; \
|
||||
u32 nSize = nQWordSize*16/3; \
|
||||
for(; i < gs.imageEndY; ++i) { \
|
||||
for(; j < gs.imageEndX && nSize > 0; ++j, --nSize) { \
|
||||
u32 p = readPixel##psm##_0(pstart, j%2048, i%2048, gs.srcbuf.bw); \
|
||||
pbuf[0] = (u8)p; \
|
||||
pbuf[1] = (u8)(p>>8); \
|
||||
pbuf[2] = (u8)(p>>16); \
|
||||
pbuf += 3; \
|
||||
} \
|
||||
\
|
||||
if( j >= gs.imageEndX ) { assert( j == gs.imageEndX); j = gs.trxpos.sx; } \
|
||||
else { assert( nSize == 0 ); break; } \
|
||||
} \
|
||||
} \
|
||||
|
||||
switch (gs.srcbuf.psm) {
|
||||
case 0x0: TRANSFERLOCALHOST(32, u32); break;
|
||||
case 0x1: TRANSFERLOCALHOST_24(24); break;
|
||||
case 0x2: TRANSFERLOCALHOST(16, u16); break;
|
||||
case 0xA: TRANSFERLOCALHOST(16S, u16); break;
|
||||
case 0x13: TRANSFERLOCALHOST(8, u8); break;
|
||||
case 0x1B: TRANSFERLOCALHOST(8H, u8); break;
|
||||
case 0x30: TRANSFERLOCALHOST(32Z, u32); break;
|
||||
case 0x31: TRANSFERLOCALHOST_24(24Z); break;
|
||||
case 0x32: TRANSFERLOCALHOST(16Z, u16); break;
|
||||
case 0x3A: TRANSFERLOCALHOST(16SZ, u16); break;
|
||||
case PSMCT32: TransferLocalHost<u32>(pbyMem, nQWordSize, i, j, pstart, readPixel32_0); break;
|
||||
case PSMCT24: TransferLocalHost_24(pbyMem, nQWordSize, i, j, pstart, readPixel24_0); break;
|
||||
case PSMCT16: TransferLocalHost<u16>(pbyMem, nQWordSize, i, j, pstart, readPixel16_0); break;
|
||||
case PSMCT16S: TransferLocalHost<u16>(pbyMem, nQWordSize, i, j, pstart, readPixel16S_0); break;
|
||||
case PSMT8: TransferLocalHost<u8>(pbyMem, nQWordSize, i, j, pstart, readPixel8_0); break;
|
||||
case PSMT8H: TransferLocalHost<u8>(pbyMem, nQWordSize, i, j, pstart, readPixel8H_0); break;
|
||||
case PSMT32Z: TransferLocalHost<u32>(pbyMem, nQWordSize, i, j, pstart, readPixel32Z_0); break;
|
||||
case PSMT24Z: TransferLocalHost_24(pbyMem, nQWordSize, i, j, pstart, readPixel24Z_0); break;
|
||||
case PSMT16Z: TransferLocalHost<u16>(pbyMem, nQWordSize, i, j, pstart, readPixel16Z_0); break;
|
||||
case PSMT16SZ: TransferLocalHost<u16>(pbyMem, nQWordSize, i, j, pstart, readPixel16SZ_0); break;
|
||||
default: assert(0);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue