mirror of https://github.com/PCSX2/pcsx2.git
zzogl-pg: A few more changes.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2764 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
5b40df17cb
commit
adf43a86ab
|
@ -48,7 +48,8 @@ typedef void (__fastcall *_SwizzleBlock)(u8 *dst, u8 *src, int pitch, u32 WriteM
|
|||
// but this works for the moment.
|
||||
struct TransferData
|
||||
{
|
||||
u32 widthlimit;
|
||||
// Signed because Visual C++ is weird.
|
||||
s32 widthlimit;
|
||||
u32 blockbits;
|
||||
u32 blockwidth;
|
||||
u32 blockheight;
|
||||
|
|
|
@ -1,123 +1,123 @@
|
|||
/* ZeroGS KOSMOS
|
||||
* Copyright (C) 2005-2006 zerofrog@gmail.com
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
/* ZeroGS KOSMOS
|
||||
* Copyright (C) 2005-2006 zerofrog@gmail.com
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#ifndef MEM_SWIZZLE_H_INCLUDED
|
||||
#define MEM_SWIZZLE_H_INCLUDED
|
||||
|
||||
#include "GS.h"
|
||||
|
||||
#include "GS.h"
|
||||
#include "Mem.h"
|
||||
|
||||
// special swizzle macros - which I converted to functions.
|
||||
|
||||
static __forceinline __fastcall void SwizzleBlock24(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff)
|
||||
{
|
||||
u8* pnewsrc = src;
|
||||
u32* pblock = tempblock;
|
||||
|
||||
for(int by = 0; by < 7; ++by, pblock += 8, pnewsrc += pitch-24)
|
||||
{
|
||||
for(int bx = 0; bx < 8; ++bx, pnewsrc += 3)
|
||||
{
|
||||
pblock[bx] = *(u32*)pnewsrc;
|
||||
}
|
||||
}
|
||||
|
||||
for(int bx = 0; bx < 7; ++bx, pnewsrc += 3)
|
||||
{
|
||||
/* might be 1 byte out of bounds of GS memory */
|
||||
pblock[bx] = *(u32*)pnewsrc;
|
||||
}
|
||||
|
||||
/* do 3 bytes for the last copy */
|
||||
*((u8*)pblock+28) = pnewsrc[0];
|
||||
*((u8*)pblock+29) = pnewsrc[1];
|
||||
*((u8*)pblock+30) = pnewsrc[2];
|
||||
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0x00ffffff);
|
||||
}
|
||||
|
||||
#define SwizzleBlock24u SwizzleBlock24
|
||||
|
||||
static __forceinline __fastcall void SwizzleBlock8H(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff)
|
||||
{
|
||||
u8* pnewsrc = src;
|
||||
u32* pblock = tempblock;
|
||||
|
||||
for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch)
|
||||
{
|
||||
u32 u = *(u32*)pnewsrc;
|
||||
pblock[0] = u<<24;
|
||||
pblock[1] = u<<16;
|
||||
pblock[2] = u<<8;
|
||||
pblock[3] = u;
|
||||
u = *(u32*)(pnewsrc+4);
|
||||
pblock[4] = u<<24;
|
||||
pblock[5] = u<<16;
|
||||
pblock[6] = u<<8;
|
||||
pblock[7] = u;
|
||||
}
|
||||
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0xff000000);
|
||||
}
|
||||
|
||||
#define SwizzleBlock8Hu SwizzleBlock8H
|
||||
|
||||
static __forceinline __fastcall void SwizzleBlock4HH(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff)
|
||||
{
|
||||
u8* pnewsrc = src;
|
||||
u32* pblock = tempblock;
|
||||
|
||||
for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch)
|
||||
{
|
||||
u32 u = *(u32*)pnewsrc;
|
||||
pblock[0] = u<<28;
|
||||
pblock[1] = u<<24;
|
||||
pblock[2] = u<<20;
|
||||
pblock[3] = u<<16;
|
||||
pblock[4] = u<<12;
|
||||
pblock[5] = u<<8;
|
||||
pblock[6] = u<<4;
|
||||
pblock[7] = u;
|
||||
}
|
||||
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0xf0000000);
|
||||
}
|
||||
|
||||
#define SwizzleBlock4HHu SwizzleBlock4HH
|
||||
|
||||
static __forceinline __fastcall void SwizzleBlock4HL(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff)
|
||||
{
|
||||
u8* pnewsrc = src;
|
||||
u32* pblock = tempblock;
|
||||
|
||||
for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch)
|
||||
{
|
||||
u32 u = *(u32*)pnewsrc;
|
||||
pblock[0] = u<<24;
|
||||
pblock[1] = u<<20;
|
||||
pblock[2] = u<<16;
|
||||
pblock[3] = u<<12;
|
||||
pblock[4] = u<<8;
|
||||
pblock[5] = u<<4;
|
||||
pblock[6] = u;
|
||||
pblock[7] = u>>4;
|
||||
}
|
||||
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0x0f000000);
|
||||
}
|
||||
|
||||
#define SwizzleBlock4HLu SwizzleBlock4HL
|
||||
|
||||
// special swizzle macros - which I converted to functions.
|
||||
|
||||
static __forceinline void __fastcall SwizzleBlock24(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff)
|
||||
{
|
||||
u8* pnewsrc = src;
|
||||
u32* pblock = tempblock;
|
||||
|
||||
for(int by = 0; by < 7; ++by, pblock += 8, pnewsrc += pitch-24)
|
||||
{
|
||||
for(int bx = 0; bx < 8; ++bx, pnewsrc += 3)
|
||||
{
|
||||
pblock[bx] = *(u32*)pnewsrc;
|
||||
}
|
||||
}
|
||||
|
||||
for(int bx = 0; bx < 7; ++bx, pnewsrc += 3)
|
||||
{
|
||||
/* might be 1 byte out of bounds of GS memory */
|
||||
pblock[bx] = *(u32*)pnewsrc;
|
||||
}
|
||||
|
||||
/* do 3 bytes for the last copy */
|
||||
*((u8*)pblock+28) = pnewsrc[0];
|
||||
*((u8*)pblock+29) = pnewsrc[1];
|
||||
*((u8*)pblock+30) = pnewsrc[2];
|
||||
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0x00ffffff);
|
||||
}
|
||||
|
||||
#define SwizzleBlock24u SwizzleBlock24
|
||||
|
||||
static __forceinline void __fastcall SwizzleBlock8H(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff)
|
||||
{
|
||||
u8* pnewsrc = src;
|
||||
u32* pblock = tempblock;
|
||||
|
||||
for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch)
|
||||
{
|
||||
u32 u = *(u32*)pnewsrc;
|
||||
pblock[0] = u<<24;
|
||||
pblock[1] = u<<16;
|
||||
pblock[2] = u<<8;
|
||||
pblock[3] = u;
|
||||
u = *(u32*)(pnewsrc+4);
|
||||
pblock[4] = u<<24;
|
||||
pblock[5] = u<<16;
|
||||
pblock[6] = u<<8;
|
||||
pblock[7] = u;
|
||||
}
|
||||
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0xff000000);
|
||||
}
|
||||
|
||||
#define SwizzleBlock8Hu SwizzleBlock8H
|
||||
|
||||
static __forceinline void __fastcall SwizzleBlock4HH(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff)
|
||||
{
|
||||
u8* pnewsrc = src;
|
||||
u32* pblock = tempblock;
|
||||
|
||||
for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch)
|
||||
{
|
||||
u32 u = *(u32*)pnewsrc;
|
||||
pblock[0] = u<<28;
|
||||
pblock[1] = u<<24;
|
||||
pblock[2] = u<<20;
|
||||
pblock[3] = u<<16;
|
||||
pblock[4] = u<<12;
|
||||
pblock[5] = u<<8;
|
||||
pblock[6] = u<<4;
|
||||
pblock[7] = u;
|
||||
}
|
||||
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0xf0000000);
|
||||
}
|
||||
|
||||
#define SwizzleBlock4HHu SwizzleBlock4HH
|
||||
|
||||
static __forceinline void __fastcall SwizzleBlock4HL(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff)
|
||||
{
|
||||
u8* pnewsrc = src;
|
||||
u32* pblock = tempblock;
|
||||
|
||||
for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch)
|
||||
{
|
||||
u32 u = *(u32*)pnewsrc;
|
||||
pblock[0] = u<<24;
|
||||
pblock[1] = u<<20;
|
||||
pblock[2] = u<<16;
|
||||
pblock[3] = u<<12;
|
||||
pblock[4] = u<<8;
|
||||
pblock[5] = u<<4;
|
||||
pblock[6] = u;
|
||||
pblock[7] = u>>4;
|
||||
}
|
||||
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0x0f000000);
|
||||
}
|
||||
|
||||
#define SwizzleBlock4HLu SwizzleBlock4HL
|
||||
|
||||
|
||||
#endif // MEM_SWIZZLE_H_INCLUDED
|
||||
|
|
|
@ -1,245 +1,245 @@
|
|||
#ifndef MEM_TRANSMIT_H_INCLUDED
|
||||
#define MEM_TRANSMIT_H_INCLUDED
|
||||
|
||||
|
||||
#include "GS.h"
|
||||
#include "Mem.h"
|
||||
|
||||
#define DSTPSM gs.dstbuf.psm
|
||||
extern int tempX, tempY;
|
||||
extern int pitch, area, fracX;
|
||||
extern int nSize;
|
||||
extern u8* pstart;
|
||||
|
||||
// transfers whole rows
|
||||
template <class T>
|
||||
static __forceinline bool TransmitHostLocalY_(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *pbuf)
|
||||
{
|
||||
assert( (nSize%widthlimit) == 0 && widthlimit <= 4 );
|
||||
if ((gs.imageEndX-gs.trxpos.dx) % widthlimit)
|
||||
{
|
||||
// GS_LOG("Bad Transmission! %d %d, psm: %d\n", gs.trxpos.dx, gs.imageEndX, DSTPSM);
|
||||
|
||||
for(; tempY < endY; ++tempY)
|
||||
{
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, pbuf += 1)
|
||||
{
|
||||
/* write as many pixel at one time as possible */
|
||||
wp(pstart, tempX%2048, tempY%2048, pbuf[0], gs.dstbuf.bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
for(; tempY < endY; ++tempY)
|
||||
{
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, pbuf += widthlimit)
|
||||
{
|
||||
|
||||
/* write as many pixel at one time as possible */
|
||||
if( nSize < widthlimit ) return false;
|
||||
|
||||
wp(pstart, tempX%2048, tempY%2048, pbuf[0], gs.dstbuf.bw);
|
||||
|
||||
if( widthlimit > 1 )
|
||||
{
|
||||
wp(pstart, (tempX+1)%2048, tempY%2048, pbuf[1], gs.dstbuf.bw);
|
||||
|
||||
if( widthlimit > 2 )
|
||||
{
|
||||
wp(pstart, (tempX+2)%2048, tempY%2048, pbuf[2], gs.dstbuf.bw);
|
||||
|
||||
if( widthlimit > 3 )
|
||||
{
|
||||
wp(pstart, (tempX+3)%2048, tempY%2048, pbuf[3], gs.dstbuf.bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( tempX >= gs.imageEndX )
|
||||
{
|
||||
assert(tempX == gs.imageEndX);
|
||||
tempX = gs.trxpos.dx;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert( gs.imageTransfer == -1 || nSize*sizeof(T)/4 == 0 );
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static __forceinline bool TransmitHostLocalX_(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *pbuf)
|
||||
{
|
||||
for(int tempi = 0; tempi < blockheight; ++tempi)
|
||||
{
|
||||
for(tempX = startX; tempX < gs.imageEndX; tempX++, pbuf++)
|
||||
{
|
||||
wp(pstart, tempX%2048, (tempY+tempi)%2048, pbuf[0], gs.dstbuf.bw);
|
||||
}
|
||||
pbuf += pitch - fracX;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// transfers whole rows
|
||||
template <class T>
|
||||
static __forceinline bool TransmitHostLocalY_24(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *pbuf)
|
||||
{
|
||||
if (widthlimit != 8 || ((gs.imageEndX-gs.trxpos.dx)%widthlimit))
|
||||
{
|
||||
//GS_LOG("Bad Transmission! %d %d, psm: %d\n", gs.trxpos.dx, gs.imageEndX, DSTPSM);
|
||||
for(; tempY < endY; ++tempY)
|
||||
{
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, pbuf += 3)
|
||||
{
|
||||
wp(pstart, tempX%2048, tempY%2048, *(u32*)(pbuf), gs.dstbuf.bw);
|
||||
}
|
||||
|
||||
if( tempX >= gs.imageEndX )
|
||||
{
|
||||
assert(gs.imageTransfer == -1 || tempX == gs.imageEndX);
|
||||
tempX = gs.trxpos.dx;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert( gs.imageTransfer == -1 || nSize == 0 );
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
assert( /*(nSize%widthlimit) == 0 &&*/ widthlimit == 8 );
|
||||
for(; tempY < endY; ++tempY)
|
||||
{
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, pbuf += 3*widthlimit)
|
||||
{
|
||||
if (nSize < widthlimit) return false;
|
||||
|
||||
/* write as many pixel at one time as possible */
|
||||
|
||||
wp(pstart, tempX%2048, tempY%2048, *(u32*)(pbuf+0), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+1)%2048, tempY%2048, *(u32*)(pbuf+3), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+2)%2048, tempY%2048, *(u32*)(pbuf+6), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+3)%2048, tempY%2048, *(u32*)(pbuf+9), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+4)%2048, tempY%2048, *(u32*)(pbuf+12), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+5)%2048, tempY%2048, *(u32*)(pbuf+15), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+6)%2048, tempY%2048, *(u32*)(pbuf+18), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+7)%2048, tempY%2048, *(u32*)(pbuf+21), gs.dstbuf.bw);
|
||||
}
|
||||
|
||||
if (tempX >= gs.imageEndX)
|
||||
{
|
||||
assert(gs.imageTransfer == -1 || tempX == gs.imageEndX);
|
||||
tempX = gs.trxpos.dx;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( nSize < 0 )
|
||||
{
|
||||
/* extracted too much */
|
||||
assert( (nSize%3)==0 && nSize > -24 );
|
||||
tempX += nSize/3;
|
||||
nSize = 0;
|
||||
}
|
||||
assert( gs.imageTransfer == -1 || nSize == 0 );
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// transmit until endX, don't check size since it has already been prevalidated
|
||||
template <class T>
|
||||
static __forceinline bool TransmitHostLocalX_24(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *pbuf)
|
||||
{
|
||||
for(int tempi = 0; tempi < blockheight; ++tempi)
|
||||
{
|
||||
for(tempX = startX; tempX < gs.imageEndX; tempX++, pbuf += 3)
|
||||
{
|
||||
wp(pstart, tempX%2048, (tempY+tempi)%2048, *(u32*)pbuf, gs.dstbuf.bw);
|
||||
}
|
||||
pbuf += 3*(pitch-fracX);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// meant for 4bit transfers
|
||||
template <class T>
|
||||
static __forceinline bool TransmitHostLocalY_4(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *pbuf)
|
||||
{
|
||||
for(; tempY < endY; ++tempY)
|
||||
{
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit)
|
||||
{
|
||||
/* write as many pixel at one time as possible */
|
||||
wp(pstart, tempX%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+1)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw);
|
||||
pbuf++;
|
||||
if ( widthlimit > 2 )
|
||||
{
|
||||
wp(pstart, (tempX+2)%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+3)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw);
|
||||
pbuf++;
|
||||
|
||||
if( widthlimit > 4 )
|
||||
{
|
||||
wp(pstart, (tempX+4)%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+5)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw);
|
||||
pbuf++;
|
||||
|
||||
if( widthlimit > 6 )
|
||||
{
|
||||
wp(pstart, (tempX+6)%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+7)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw);
|
||||
pbuf++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( tempX >= gs.imageEndX )
|
||||
{
|
||||
tempX = gs.trxpos.dx;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert( gs.imageTransfer == -1 || (nSize/32) == 0 );
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// transmit until endX, don't check size since it has already been prevalidated
|
||||
template <class T>
|
||||
static __forceinline bool TransmitHostLocalX_4(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *pbuf)
|
||||
{
|
||||
for(int tempi = 0; tempi < blockheight; ++tempi)
|
||||
{
|
||||
for(tempX = startX; tempX < gs.imageEndX; tempX+=2, pbuf++)
|
||||
{
|
||||
wp(pstart, tempX%2048, (tempY+tempi)%2048, pbuf[0]&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+1)%2048, (tempY+tempi)%2048, pbuf[0]>>4, gs.dstbuf.bw);
|
||||
}
|
||||
pbuf += (pitch-fracX)/2;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// calculate pitch in source buffer
|
||||
static __forceinline u32 TransPitch(u32 pitch, u32 size)
|
||||
{
|
||||
return pitch * size / 8;
|
||||
}
|
||||
|
||||
static __forceinline u32 TransPitch2(u32 pitch, u32 size)
|
||||
{
|
||||
if (size == 4) return pitch / 2;
|
||||
if (size == 24) return pitch * 3;
|
||||
return pitch;
|
||||
}
|
||||
#include "Mem.h"
|
||||
|
||||
#define DSTPSM gs.dstbuf.psm
|
||||
extern int tempX, tempY;
|
||||
extern int pitch, area, fracX;
|
||||
extern int nSize;
|
||||
extern u8* pstart;
|
||||
|
||||
// transfers whole rows
|
||||
template <class T>
|
||||
static __forceinline bool TransmitHostLocalY_(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *pbuf)
|
||||
{
|
||||
assert( (nSize%widthlimit) == 0 && widthlimit <= 4 );
|
||||
if ((gs.imageEndX-gs.trxpos.dx) % widthlimit)
|
||||
{
|
||||
// GS_LOG("Bad Transmission! %d %d, psm: %d\n", gs.trxpos.dx, gs.imageEndX, DSTPSM);
|
||||
|
||||
for(; tempY < endY; ++tempY)
|
||||
{
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, pbuf += 1)
|
||||
{
|
||||
/* write as many pixel at one time as possible */
|
||||
wp(pstart, tempX%2048, tempY%2048, pbuf[0], gs.dstbuf.bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
for(; tempY < endY; ++tempY)
|
||||
{
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, pbuf += widthlimit)
|
||||
{
|
||||
|
||||
/* write as many pixel at one time as possible */
|
||||
if( nSize < widthlimit ) return false;
|
||||
|
||||
wp(pstart, tempX%2048, tempY%2048, pbuf[0], gs.dstbuf.bw);
|
||||
|
||||
if( widthlimit > 1 )
|
||||
{
|
||||
wp(pstart, (tempX+1)%2048, tempY%2048, pbuf[1], gs.dstbuf.bw);
|
||||
|
||||
if( widthlimit > 2 )
|
||||
{
|
||||
wp(pstart, (tempX+2)%2048, tempY%2048, pbuf[2], gs.dstbuf.bw);
|
||||
|
||||
if( widthlimit > 3 )
|
||||
{
|
||||
wp(pstart, (tempX+3)%2048, tempY%2048, pbuf[3], gs.dstbuf.bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( tempX >= gs.imageEndX )
|
||||
{
|
||||
assert(tempX == gs.imageEndX);
|
||||
tempX = gs.trxpos.dx;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert( gs.imageTransfer == -1 || nSize*sizeof(T)/4 == 0 );
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static __forceinline bool TransmitHostLocalX_(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *pbuf)
|
||||
{
|
||||
for(u32 tempi = 0; tempi < blockheight; ++tempi)
|
||||
{
|
||||
for(tempX = startX; tempX < gs.imageEndX; tempX++, pbuf++)
|
||||
{
|
||||
wp(pstart, tempX%2048, (tempY+tempi)%2048, pbuf[0], gs.dstbuf.bw);
|
||||
}
|
||||
pbuf += pitch - fracX;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// transfers whole rows
|
||||
template <class T>
|
||||
static __forceinline bool TransmitHostLocalY_24(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *pbuf)
|
||||
{
|
||||
if (widthlimit != 8 || ((gs.imageEndX-gs.trxpos.dx)%widthlimit))
|
||||
{
|
||||
//GS_LOG("Bad Transmission! %d %d, psm: %d\n", gs.trxpos.dx, gs.imageEndX, DSTPSM);
|
||||
for(; tempY < endY; ++tempY)
|
||||
{
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, pbuf += 3)
|
||||
{
|
||||
wp(pstart, tempX%2048, tempY%2048, *(u32*)(pbuf), gs.dstbuf.bw);
|
||||
}
|
||||
|
||||
if( tempX >= gs.imageEndX )
|
||||
{
|
||||
assert(gs.imageTransfer == -1 || tempX == gs.imageEndX);
|
||||
tempX = gs.trxpos.dx;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert( gs.imageTransfer == -1 || nSize == 0 );
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
assert( /*(nSize%widthlimit) == 0 &&*/ widthlimit == 8 );
|
||||
for(; tempY < endY; ++tempY)
|
||||
{
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, pbuf += 3*widthlimit)
|
||||
{
|
||||
if (nSize < widthlimit) return false;
|
||||
|
||||
/* write as many pixel at one time as possible */
|
||||
|
||||
wp(pstart, tempX%2048, tempY%2048, *(u32*)(pbuf+0), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+1)%2048, tempY%2048, *(u32*)(pbuf+3), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+2)%2048, tempY%2048, *(u32*)(pbuf+6), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+3)%2048, tempY%2048, *(u32*)(pbuf+9), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+4)%2048, tempY%2048, *(u32*)(pbuf+12), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+5)%2048, tempY%2048, *(u32*)(pbuf+15), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+6)%2048, tempY%2048, *(u32*)(pbuf+18), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+7)%2048, tempY%2048, *(u32*)(pbuf+21), gs.dstbuf.bw);
|
||||
}
|
||||
|
||||
if (tempX >= gs.imageEndX)
|
||||
{
|
||||
assert(gs.imageTransfer == -1 || tempX == gs.imageEndX);
|
||||
tempX = gs.trxpos.dx;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( nSize < 0 )
|
||||
{
|
||||
/* extracted too much */
|
||||
assert( (nSize%3)==0 && nSize > -24 );
|
||||
tempX += nSize/3;
|
||||
nSize = 0;
|
||||
}
|
||||
assert( gs.imageTransfer == -1 || nSize == 0 );
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// transmit until endX, don't check size since it has already been prevalidated
|
||||
template <class T>
|
||||
static __forceinline bool TransmitHostLocalX_24(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *pbuf)
|
||||
{
|
||||
for(u32 tempi = 0; tempi < blockheight; ++tempi)
|
||||
{
|
||||
for(tempX = startX; tempX < gs.imageEndX; tempX++, pbuf += 3)
|
||||
{
|
||||
wp(pstart, tempX%2048, (tempY+tempi)%2048, *(u32*)pbuf, gs.dstbuf.bw);
|
||||
}
|
||||
pbuf += 3*(pitch-fracX);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// meant for 4bit transfers
|
||||
template <class T>
|
||||
static __forceinline bool TransmitHostLocalY_4(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *pbuf)
|
||||
{
|
||||
for(; tempY < endY; ++tempY)
|
||||
{
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit)
|
||||
{
|
||||
/* write as many pixel at one time as possible */
|
||||
wp(pstart, tempX%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+1)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw);
|
||||
pbuf++;
|
||||
if ( widthlimit > 2 )
|
||||
{
|
||||
wp(pstart, (tempX+2)%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+3)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw);
|
||||
pbuf++;
|
||||
|
||||
if( widthlimit > 4 )
|
||||
{
|
||||
wp(pstart, (tempX+4)%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+5)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw);
|
||||
pbuf++;
|
||||
|
||||
if( widthlimit > 6 )
|
||||
{
|
||||
wp(pstart, (tempX+6)%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+7)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw);
|
||||
pbuf++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( tempX >= gs.imageEndX )
|
||||
{
|
||||
tempX = gs.trxpos.dx;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert( gs.imageTransfer == -1 || (nSize/32) == 0 );
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// transmit until endX, don't check size since it has already been prevalidated
|
||||
template <class T>
|
||||
static __forceinline bool TransmitHostLocalX_4(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *pbuf)
|
||||
{
|
||||
for(u32 tempi = 0; tempi < blockheight; ++tempi)
|
||||
{
|
||||
for(tempX = startX; tempX < gs.imageEndX; tempX+=2, pbuf++)
|
||||
{
|
||||
wp(pstart, tempX%2048, (tempY+tempi)%2048, pbuf[0]&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+1)%2048, (tempY+tempi)%2048, pbuf[0]>>4, gs.dstbuf.bw);
|
||||
}
|
||||
pbuf += (pitch-fracX)/2;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// calculate pitch in source buffer
|
||||
static __forceinline u32 TransPitch(u32 pitch, u32 size)
|
||||
{
|
||||
return pitch * size / 8;
|
||||
}
|
||||
|
||||
static __forceinline u32 TransPitch2(u32 pitch, u32 size)
|
||||
{
|
||||
if (size == 4) return pitch / 2;
|
||||
if (size == 24) return pitch * 3;
|
||||
return pitch;
|
||||
}
|
||||
|
||||
#endif // MEM_TRANSMIT_H_INCLUDED
|
||||
|
|
|
@ -225,7 +225,7 @@
|
|||
RuntimeLibrary="0"
|
||||
BufferSecurityCheck="false"
|
||||
PrecompiledHeaderFile=""
|
||||
WarningLevel="3"
|
||||
WarningLevel="1"
|
||||
SuppressStartupBanner="true"
|
||||
DebugInformationFormat="3"
|
||||
/>
|
||||
|
@ -451,7 +451,6 @@
|
|||
RelativePath="..\Mem_Transmit.h"
|
||||
>
|
||||
</File>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\PS2Edefs.h"
|
||||
>
|
||||
|
|
|
@ -193,7 +193,7 @@ SwizzleBlock32_sse2@WM:
|
|||
; SwizzleBlock16
|
||||
;
|
||||
|
||||
@SwizzleBlock16_sse2@12 proc public
|
||||
@SwizzleBlock16_sse2@16 proc public
|
||||
|
||||
push ebx
|
||||
|
||||
|
@ -225,13 +225,13 @@ SwizzleBlock32_sse2@WM:
|
|||
|
||||
ret 4
|
||||
|
||||
@SwizzleBlock16_sse2@12 endp
|
||||
@SwizzleBlock16_sse2@16 endp
|
||||
|
||||
;
|
||||
; SwizzleBlock8
|
||||
;
|
||||
|
||||
@SwizzleBlock8_sse2@12 proc public
|
||||
@SwizzleBlock8_sse2@16 proc public
|
||||
|
||||
push ebx
|
||||
|
||||
|
@ -287,13 +287,13 @@ SwizzleBlock32_sse2@WM:
|
|||
|
||||
ret 4
|
||||
|
||||
@SwizzleBlock8_sse2@12 endp
|
||||
@SwizzleBlock8_sse2@16 endp
|
||||
|
||||
;
|
||||
; SwizzleBlock4
|
||||
;
|
||||
|
||||
@SwizzleBlock4_sse2@12 proc public
|
||||
@SwizzleBlock4_sse2@16 proc public
|
||||
|
||||
push ebx
|
||||
|
||||
|
@ -365,7 +365,7 @@ SwizzleBlock32_sse2@WM:
|
|||
|
||||
ret 4
|
||||
|
||||
@SwizzleBlock4_sse2@12 endp
|
||||
@SwizzleBlock4_sse2@16 endp
|
||||
|
||||
;
|
||||
; swizzling with unaligned reads
|
||||
|
@ -471,7 +471,7 @@ SwizzleBlock32u_sse2@WM:
|
|||
; SwizzleBlock16u
|
||||
;
|
||||
|
||||
@SwizzleBlock16u_sse2@12 proc public
|
||||
@SwizzleBlock16u_sse2@16 proc public
|
||||
|
||||
push ebx
|
||||
|
||||
|
@ -503,13 +503,13 @@ SwizzleBlock32u_sse2@WM:
|
|||
|
||||
ret 4
|
||||
|
||||
@SwizzleBlock16u_sse2@12 endp
|
||||
@SwizzleBlock16u_sse2@16 endp
|
||||
|
||||
;
|
||||
; SwizzleBlock8u
|
||||
;
|
||||
|
||||
@SwizzleBlock8u_sse2@12 proc public
|
||||
@SwizzleBlock8u_sse2@16 proc public
|
||||
|
||||
push ebx
|
||||
|
||||
|
@ -569,13 +569,13 @@ SwizzleBlock32u_sse2@WM:
|
|||
|
||||
ret 4
|
||||
|
||||
@SwizzleBlock8u_sse2@12 endp
|
||||
@SwizzleBlock8u_sse2@16 endp
|
||||
|
||||
;
|
||||
; SwizzleBlock4u
|
||||
;
|
||||
|
||||
@SwizzleBlock4u_sse2@12 proc public
|
||||
@SwizzleBlock4u_sse2@16 proc public
|
||||
|
||||
push ebx
|
||||
|
||||
|
@ -647,6 +647,6 @@ SwizzleBlock32u_sse2@WM:
|
|||
|
||||
ret 4
|
||||
|
||||
@SwizzleBlock4u_sse2@12 endp
|
||||
@SwizzleBlock4u_sse2@16 endp
|
||||
|
||||
end
|
Loading…
Reference in New Issue