zzogl-pg: More of what I was doing last commit.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2759 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
arcum42 2010-03-20 10:49:50 +00:00
parent f135c1d9c7
commit bd94bc0483
2 changed files with 49 additions and 59 deletions

View File

@ -38,7 +38,7 @@ PCSX2_ALIGNED16(u32 tempblock[64]);
// ------------------------ // ------------------------
// | Y | // | Y |
// ------------------------ // ------------------------
#define DEFINE_TRANSFERLOCAL(psm, T, widthlimit, blockbits, blockwidth, blockheight, TransSfx, SwizzleBlock) \ #define DEFINE_TRANSFERLOCAL(psm, transfersize, T, widthlimit, blockbits, blockwidth, blockheight, TransSfx, SwizzleBlock) \
int TransferHostLocal##psm(const void* pbyMem, u32 nQWordSize) \ int TransferHostLocal##psm(const void* pbyMem, u32 nQWordSize) \
{ \ { \
assert( gs.imageTransfer == 0 ); \ assert( gs.imageTransfer == 0 ); \
@ -46,10 +46,12 @@ int TransferHostLocal##psm(const void* pbyMem, u32 nQWordSize) \
\ \
/*const u8* pendbuf = (const u8*)pbyMem + nQWordSize*4;*/ \ /*const u8* pendbuf = (const u8*)pbyMem + nQWordSize*4;*/ \
int i = gs.imageY, j = gs.imageX; \ int i = gs.imageY, j = gs.imageX; \
const u32 TSize = sizeof(T); \
\ \
const T* pbuf = (const T*)pbyMem; \ const T* pbuf = (const T*)pbyMem; \
int nLeftOver = (nQWordSize*4*2)%(TransmitPitch##TransSfx<T>(2)); \ const int tp = TransPitch(2, transfersize); \
int nSize = nQWordSize*4*2/TransmitPitch##TransSfx<T>(2); \ int nLeftOver = (nQWordSize*4*2)%tp; \
int nSize = nQWordSize*4*2/tp; \
nSize = min(nSize, gs.imageWnew * gs.imageHnew); \ nSize = min(nSize, gs.imageWnew * gs.imageHnew); \
\ \
int pitch, area, fracX; \ int pitch, area, fracX; \
@ -97,37 +99,37 @@ int TransferHostLocal##psm(const void* pbyMem, u32 nQWordSize) \
fracX = gs.imageEndX-alignedX; \ fracX = gs.imageEndX-alignedX; \
\ \
/* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */ \ /* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */ \
bAligned = !((uptr)pbuf & 0xf) && (TransmitPitch##TransSfx<T>(pitch) & 0xf) == 0; \ bAligned = !((uptr)pbuf & 0xf) && (TransPitch(pitch, transfersize) & 0xf) == 0; \
\ \
/* transfer aligning to blocks */ \ /* transfer aligning to blocks */ \
for(; i < alignedY && nSize >= area; i += blockheight, nSize -= area) { \ for(; i < alignedY && nSize >= area; i += blockheight, nSize -= area) { \
\ \
if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) ) { \ if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) ) { \
for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransmitPitch##TransSfx<T>(blockwidth)/sizeof(T)) { \ for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) { \
SwizzleBlock(pstart + getPixelAddress_0(psm,tempj, i, gs.dstbuf.bw)*blockbits/8, \ SwizzleBlock(pstart + getPixelAddress_0(psm,tempj, i, gs.dstbuf.bw)*blockbits/8, \
(u8*)pbuf, TransmitPitch##TransSfx<T>(pitch)); \ (u8*)pbuf, TransPitch(pitch, transfersize)); \
} \ } \
} \ } \
else { \ else { \
for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransmitPitch##TransSfx<T>(blockwidth)/sizeof(T)) { \ for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize) { \
SwizzleBlock##u(pstart + getPixelAddress_0(psm,tempj, i, gs.dstbuf.bw)*blockbits/8, \ SwizzleBlock##u(pstart + getPixelAddress_0(psm,tempj, i, gs.dstbuf.bw)*blockbits/8, \
(u8*)pbuf, TransmitPitch##TransSfx<T>(pitch)); \ (u8*)pbuf, TransPitch(pitch, transfersize)); \
} \ } \
} \ } \
\ \
/* transfer the rest */ \ /* transfer the rest */ \
if( alignedX < gs.imageEndX ) { \ if( alignedX < gs.imageEndX ) { \
TRANSMIT_HOSTLOCAL_X(TransSfx,psm, T, widthlimit, blockheight, alignedX); \ TRANSMIT_HOSTLOCAL_X(TransSfx,psm, T, widthlimit, blockheight, alignedX); \
pbuf -= TransmitPitch##TransSfx<T>(alignedX-gs.trxpos.dx)/sizeof(T); \ pbuf -= TransPitch(alignedX-gs.trxpos.dx, transfersize)/TSize; \
} \ } \
else pbuf += (blockheight-1)*TransmitPitch##TransSfx<T>(pitch)/sizeof(T); \ else pbuf += (blockheight-1)*TransPitch(pitch, transfersize)/TSize; \
j = gs.trxpos.dx; \ j = gs.trxpos.dx; \
} \ } \
\ \
if( TransmitPitch##TransSfx<T>(nSize)/4 > 0 ) { \ if( TransPitch(nSize, transfersize)/4 > 0 ) { \
TRANSMIT_HOSTLOCAL_Y(TransSfx,psm, T, widthlimit, gs.imageEndY); \ TRANSMIT_HOSTLOCAL_Y(TransSfx,psm, T, widthlimit, gs.imageEndY); \
/* sometimes wrong sizes are sent (tekken tag) */ \ /* sometimes wrong sizes are sent (tekken tag) */ \
assert( gs.imageTransfer == -1 || TransmitPitch##TransSfx<T>(nSize)/4 <= 2 ); \ assert( gs.imageTransfer == -1 || TransPitch(nSize, transfersize)/4 <= 2 ); \
} \ } \
\ \
End: \ End: \
@ -143,17 +145,12 @@ End: \
gs.imageY = i; \ gs.imageY = i; \
gs.imageX = j; \ gs.imageX = j; \
} \ } \
return (nSize * TransmitPitch##TransSfx<T>(2) + nLeftOver)/2; \ return (nSize * TransPitch(2, transfersize) + nLeftOver)/2; \
} \ } \
#define NEW_TRANSFER #define NEW_TRANSFER
#ifdef NEW_TRANSFER #ifdef NEW_TRANSFER
u32 TransPitch(u32 pitch, u32 size)
{
return pitch * size / 8;
}
//DEFINE_TRANSFERLOCAL(32, u32, 2, 32, 8, 8, _, SwizzleBlock32); //DEFINE_TRANSFERLOCAL(32, u32, 2, 32, 8, 8, _, SwizzleBlock32);
int TransferHostLocal32(const void* pbyMem, u32 nQWordSize) int TransferHostLocal32(const void* pbyMem, u32 nQWordSize)
{ {
@ -362,14 +359,14 @@ int TransferHostLocal32Z(const void* pbyMem, u32 nQWordSize)
{ {
if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) ) if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) )
{ {
for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/sizeof(u32)) for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize)
{ {
SwizzleBlock32(pstart + getPixelAddress_0(32Z,tempj, i, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); SwizzleBlock32(pstart + getPixelAddress_0(32Z,tempj, i, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize));
} }
} }
else else
{ {
for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/sizeof(u32)) for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize)
{ {
SwizzleBlock32u(pstart + getPixelAddress_0(32Z,tempj, i, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); SwizzleBlock32u(pstart + getPixelAddress_0(32Z,tempj, i, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize));
} }
@ -379,11 +376,11 @@ int TransferHostLocal32Z(const void* pbyMem, u32 nQWordSize)
if ( alignedX < gs.imageEndX ) if ( alignedX < gs.imageEndX )
{ {
TRANSMIT_HOSTLOCAL_X_( 32Z, u32, widthlimit, blockheight, alignedX); TRANSMIT_HOSTLOCAL_X_( 32Z, u32, widthlimit, blockheight, alignedX);
pbuf -= TransPitch((alignedX - gs.trxpos.dx), transfersize)/sizeof(u32); pbuf -= TransPitch((alignedX - gs.trxpos.dx), transfersize)/TSize;
} }
else else
{ {
pbuf += (blockheight-1)*TransPitch(pitch, transfersize)/sizeof(u32); pbuf += (blockheight-1)*TransPitch(pitch, transfersize)/TSize;
} }
j = gs.trxpos.dx; j = gs.trxpos.dx;
} }
@ -488,14 +485,14 @@ int TransferHostLocal24(const void* pbyMem, u32 nQWordSize)
{ {
if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) ) if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) )
{ {
for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/sizeof(u8)) for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize)
{ {
SwizzleBlock24(pstart + getPixelAddress_0(24,tempj, i, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); SwizzleBlock24(pstart + getPixelAddress_0(24,tempj, i, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize));
} }
} }
else else
{ {
for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/sizeof(u8)) for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize)
{ {
SwizzleBlock24u(pstart + getPixelAddress_0(24,tempj, i, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); SwizzleBlock24u(pstart + getPixelAddress_0(24,tempj, i, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize));
} }
@ -505,11 +502,11 @@ int TransferHostLocal24(const void* pbyMem, u32 nQWordSize)
if ( alignedX < gs.imageEndX ) if ( alignedX < gs.imageEndX )
{ {
TRANSMIT_HOSTLOCAL_X_24(24, T, widthlimit, blockheight, alignedX); TRANSMIT_HOSTLOCAL_X_24(24, T, widthlimit, blockheight, alignedX);
pbuf -= TransPitch((alignedX-gs.trxpos.dx), transfersize)/sizeof(u8); pbuf -= TransPitch((alignedX-gs.trxpos.dx), transfersize)/TSize;
} }
else else
{ {
pbuf += (blockheight-1)*TransPitch(pitch, transfersize)/sizeof(u8); pbuf += (blockheight-1)*TransPitch(pitch, transfersize)/TSize;
} }
j = gs.trxpos.dx; j = gs.trxpos.dx;
} }
@ -614,14 +611,14 @@ int TransferHostLocal24Z(const void* pbyMem, u32 nQWordSize)
{ {
if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) ) if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) )
{ {
for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/sizeof(u8)) for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize)
{ {
SwizzleBlock24(pstart + getPixelAddress_0(16,tempj, i, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); SwizzleBlock24(pstart + getPixelAddress_0(16,tempj, i, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize));
} }
} }
else else
{ {
for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/sizeof(u8)) for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize)
{ {
SwizzleBlock24u(pstart + getPixelAddress_0(16,tempj, i, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); SwizzleBlock24u(pstart + getPixelAddress_0(16,tempj, i, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize));
} }
@ -631,11 +628,11 @@ int TransferHostLocal24Z(const void* pbyMem, u32 nQWordSize)
if ( alignedX < gs.imageEndX ) if ( alignedX < gs.imageEndX )
{ {
TRANSMIT_HOSTLOCAL_X_24(16, u8, widthlimit, blockheight, alignedX); TRANSMIT_HOSTLOCAL_X_24(16, u8, widthlimit, blockheight, alignedX);
pbuf -= TransPitch((alignedX-gs.trxpos.dx), transfersize)/sizeof(u8); pbuf -= TransPitch((alignedX-gs.trxpos.dx), transfersize)/TSize;
} }
else else
{ {
pbuf += (blockheight-1)*TransPitch(pitch, transfersize)/sizeof(u8); pbuf += (blockheight-1)*TransPitch(pitch, transfersize)/TSize;
} }
j = gs.trxpos.dx; j = gs.trxpos.dx;
} }
@ -740,14 +737,14 @@ int TransferHostLocal16(const void* pbyMem, u32 nQWordSize)
{ {
if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) ) if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) )
{ {
for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/sizeof(u16)) for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize)
{ {
SwizzleBlock16(pstart + getPixelAddress_0(16,tempj, i, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); SwizzleBlock16(pstart + getPixelAddress_0(16,tempj, i, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize));
} }
} }
else else
{ {
for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/sizeof(u16)) for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TransPitch(blockwidth, transfersize)/TSize)
{ {
SwizzleBlock16u(pstart + getPixelAddress_0(16,tempj, i, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize)); SwizzleBlock16u(pstart + getPixelAddress_0(16,tempj, i, gs.dstbuf.bw)*blockbits/8, (u8*)pbuf, TransPitch(pitch, transfersize));
} }
@ -757,11 +754,11 @@ int TransferHostLocal16(const void* pbyMem, u32 nQWordSize)
if ( alignedX < gs.imageEndX ) if ( alignedX < gs.imageEndX )
{ {
TRANSMIT_HOSTLOCAL_X_(16, T, widthlimit, blockheight, alignedX); TRANSMIT_HOSTLOCAL_X_(16, T, widthlimit, blockheight, alignedX);
pbuf -= TransPitch((alignedX-gs.trxpos.dx), transfersize)/sizeof(u16); pbuf -= TransPitch((alignedX-gs.trxpos.dx), transfersize)/TSize;
} }
else else
{ {
pbuf += (blockheight-1)* TransPitch(pitch, transfersize)/sizeof(u16); pbuf += (blockheight-1)* TransPitch(pitch, transfersize)/TSize;
} }
j = gs.trxpos.dx; j = gs.trxpos.dx;
} }
@ -1522,7 +1519,7 @@ int TransferHostLocal8H(const void* pbyMem, u32 nQWordSize)
j = gs.trxpos.dx; j = gs.trxpos.dx;
} }
if (TRANSMIT_PITCH_(nSize, u8)/4 > 0 ) if (TransPitch(nSize, transfersize)/4 > 0 )
{ {
TRANSMIT_HOSTLOCAL_Y_(8H, u8, widthlimit, gs.imageEndY); TRANSMIT_HOSTLOCAL_Y_(8H, u8, widthlimit, gs.imageEndY);
/* sometimes wrong sizes are sent (tekken tag) */ /* sometimes wrong sizes are sent (tekken tag) */
@ -1798,19 +1795,19 @@ int TransferHostLocal4HH(const void* pbyMem, u32 nQWordSize)
} }
#else #else
DEFINE_TRANSFERLOCAL(32, u32, 2, 32, 8, 8, _, SwizzleBlock32); DEFINE_TRANSFERLOCAL(32, 32, u32, 2, 32, 8, 8, _, SwizzleBlock32);
DEFINE_TRANSFERLOCAL(32Z, u32, 2, 32, 8, 8, _, SwizzleBlock32); DEFINE_TRANSFERLOCAL(32Z, 32, u32, 2, 32, 8, 8, _, SwizzleBlock32);
DEFINE_TRANSFERLOCAL(24, u8, 8, 32, 8, 8, _24, SwizzleBlock24); DEFINE_TRANSFERLOCAL(24, 24, u8, 8, 32, 8, 8, _24, SwizzleBlock24);
DEFINE_TRANSFERLOCAL(24Z, u8, 8, 32, 8, 8, _24, SwizzleBlock24); DEFINE_TRANSFERLOCAL(24Z, 24, u8, 8, 32, 8, 8, _24, SwizzleBlock24);
DEFINE_TRANSFERLOCAL(16, u16, 4, 16, 16, 8, _, SwizzleBlock16); DEFINE_TRANSFERLOCAL(16, 16, u16, 4, 16, 16, 8, _, SwizzleBlock16);
DEFINE_TRANSFERLOCAL(16S, u16, 4, 16, 16, 8, _, SwizzleBlock16); DEFINE_TRANSFERLOCAL(16S, 16, u16, 4, 16, 16, 8, _, SwizzleBlock16);
DEFINE_TRANSFERLOCAL(16Z, u16, 4, 16, 16, 8, _, SwizzleBlock16); DEFINE_TRANSFERLOCAL(16Z, 16, u16, 4, 16, 16, 8, _, SwizzleBlock16);
DEFINE_TRANSFERLOCAL(16SZ, u16, 4, 16, 16, 8, _, SwizzleBlock16); DEFINE_TRANSFERLOCAL(16SZ, 16, u16, 4, 16, 16, 8, _, SwizzleBlock16);
DEFINE_TRANSFERLOCAL(8, u8, 4, 8, 16, 16, _, SwizzleBlock8); DEFINE_TRANSFERLOCAL(8, 8, u8, 4, 8, 16, 16, _, SwizzleBlock8);
DEFINE_TRANSFERLOCAL(4, u8, 8, 4, 32, 16, _4, SwizzleBlock4); DEFINE_TRANSFERLOCAL(4, 4, u8, 8, 4, 32, 16, _4, SwizzleBlock4);
DEFINE_TRANSFERLOCAL(8H, u8, 4, 32, 8, 8, _, SwizzleBlock8H); DEFINE_TRANSFERLOCAL(8H, 8, u8, 4, 32, 8, 8, _, SwizzleBlock8H);
DEFINE_TRANSFERLOCAL(4HL, u8, 8, 32, 8, 8, _4, SwizzleBlock4HL); DEFINE_TRANSFERLOCAL(4HL, 4, u8, 8, 32, 8, 8, _4, SwizzleBlock4HL);
DEFINE_TRANSFERLOCAL(4HH, u8, 8, 32, 8, 8, _4, SwizzleBlock4HH); DEFINE_TRANSFERLOCAL(4HH, 4, u8, 8, 32, 8, 8, _4, SwizzleBlock4HH);
#endif #endif

View File

@ -169,16 +169,9 @@
TRANSMIT_HOSTLOCAL_Y##th(psm,T,widthlimit,endY) TRANSMIT_HOSTLOCAL_Y##th(psm,T,widthlimit,endY)
// calculate pitch in source buffer // calculate pitch in source buffer
static __forceinline u32 TransPitch(u32 pitch, u32 size)
template <class T> {
static __forceinline int TransmitPitch_(int pitch) { return (pitch * sizeof(T)); } return pitch * size / 8;
template <class T> }
static __forceinline int TransmitPitch_24(int pitch) { return (pitch * 3); }
template <class T>
static __forceinline int TransmitPitch_4(int pitch) { return (pitch/2); }
#define TRANSMIT_PITCH_(pitch, T) TransmitPitch_<T>(pitch)
#define TRANSMIT_PITCH_24(pitch, T) TransmitPitch_24<T>(pitch)
#define TRANSMIT_PITCH_4(pitch, T) TransmitPitch_4<T>(pitch)
#endif // MEM_TRANSMIT_H_INCLUDED #endif // MEM_TRANSMIT_H_INCLUDED