zzogl-pg: Fighting with Mem.cpp. (This probably breaks things a bit. This is more of a 'save-my-progress' commit then anything. Basically, I'm expanding out a bunch of tangled macros right now, and creating a bunch of repetitive code that hopefully I'll be able to reduce afterwards. )

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2757 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
arcum42 2010-03-20 07:24:19 +00:00
parent 3e6b6c86c1
commit 5cfca09385
8 changed files with 2426 additions and 709 deletions

View File

@ -78,6 +78,9 @@
<Unit filename="../zerogs.glade" />
<Unit filename="../../Mem.cpp" />
<Unit filename="../../Mem.h" />
<Unit filename="../../Mem_Swizzle.h" />
<Unit filename="../../Mem_Tables.cpp" />
<Unit filename="../../Mem_Transmit.h" />
<Unit filename="../../Regs.cpp" />
<Unit filename="../../Regs.h" />
<Unit filename="../../Win32/Conf.cpp">

View File

@ -26,7 +26,9 @@ libzzoglpg_LDADD=$(libzzoglpg_a_OBJECTS)
libzzoglpg_a_SOURCES = \
GSmain.cpp GifTransfer.cpp memcpy_amd.cpp Regs.cpp x86.cpp zpipe.cpp Mem.cpp \
rasterfont.cpp targets.cpp zerogs.cpp ZZoglVB.cpp ZZoglShoots.cpp ZZoglCreate.cpp \
ZZoglShaders.cpp ZZoglCRTC.cpp ZZoglSave.cpp ZZoglFlush.cpp
ZZoglShaders.cpp ZZoglCRTC.cpp ZZoglSave.cpp ZZoglFlush.cpp \
Mem_Swizzle.h Mem_Tables.cpp Mem_Transmit.h
libzzoglpg_a_SOURCES += x86-32.S

File diff suppressed because it is too large Load Diff

View File

@ -23,14 +23,26 @@
#include <vector>
// works only when base is a power of 2
#define ROUND_UPPOW2(val, base) (((val)+(base-1))&~(base-1))
#define ROUND_DOWNPOW2(val, base) ((val)&~(base-1))
#define MOD_POW2(val, base) ((val)&(base-1))
static __forceinline int ROUND_UPPOW2(int val, int base) { return (((val)+(base-1))&~(base-1)); }
static __forceinline int ROUND_DOWNPOW2(int val, int base) { return ((val)&~(base-1)); }
static __forceinline int MOD_POW2(int val, int base) { return ((val)&(base-1)); }
// d3d texture dims
#define BLOCK_TEXWIDTH 128
#define BLOCK_TEXHEIGHT 512
const int BLOCK_TEXWIDTH = 128;
const int BLOCK_TEXHEIGHT = 512;
extern PCSX2_ALIGNED16(u32 tempblock[64]);
typedef u32 ( *_getPixelAddress)(int x, int y, u32 bp, u32 bw);
typedef u32 (*_getPixelAddress_0)(int x, int y, u32 bw);
typedef void (*_writePixel)(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw);
typedef void (*_writePixel_0)(void* pmem, int x, int y, u32 pixel, u32 bw);
typedef u32 (*_readPixel)(const void* pmem, int x, int y, u32 bp, u32 bw);
typedef u32 (*_readPixel_0)(const void* pmem, int x, int y, u32 bw);
typedef int (*_TransferHostLocal)(const void* pbyMem, u32 nQWordSize);
typedef void (*_TransferLocalHost)(void* pbyMem, u32 nQWordSize);
typedef void (__fastcall *_SwizzleBlock)(u8 *dst, u8 *src, int pitch, u32 WriteMask);
// rest not visible externally
struct BLOCK
{
@ -46,14 +58,14 @@ struct BLOCK
u32* blockTable;
u32* columnTable;
u32 (*getPixelAddress)(int x, int y, u32 bp, u32 bw);
u32 (*getPixelAddress_0)(int x, int y, u32 bw);
void (*writePixel)(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw);
void (*writePixel_0)(void* pmem, int x, int y, u32 pixel, u32 bw);
u32 (*readPixel)(const void* pmem, int x, int y, u32 bp, u32 bw);
u32 (*readPixel_0)(const void* pmem, int x, int y, u32 bw);
int (*TransferHostLocal)(const void* pbyMem, u32 nQWordSize);
void (*TransferLocalHost)(void* pbyMem, u32 nQWordSize);
_getPixelAddress getPixelAddress;
_getPixelAddress_0 getPixelAddress_0;
_writePixel writePixel;
_writePixel_0 writePixel_0;
_readPixel readPixel;
_readPixel_0 readPixel_0;
_TransferHostLocal TransferHostLocal;
_TransferLocalHost TransferLocalHost;
// texture must be of dims BLOCK_TEXWIDTH and BLOCK_TEXHEIGHT
static void FillBlocks(std::vector<char>& vBlockData, std::vector<char>& vBilinearData, int floatfmt);
@ -84,19 +96,17 @@ extern u32 g_pageTable16SZ[64][64];
extern u32 g_pageTable8[64][128];
extern u32 g_pageTable4[128][128];
static __forceinline u32 getPixelAddress32(int x, int y, u32 bp, u32 bw) {
static __forceinline u32 getPixelAddress32(int x, int y, u32 bp, u32 bw)
{
u32 basepage = ((y>>5) * (bw>>6)) + (x>>6);
u32 word = bp * 64 + basepage * 2048 + g_pageTable32[y&31][x&63];
//assert (word < 0x100000);
//word = min(word, 0xfffff);
return word;
}
static __forceinline u32 getPixelAddress32_0(int x, int y, u32 bw) {
static __forceinline u32 getPixelAddress32_0(int x, int y, u32 bw)
{
u32 basepage = ((y>>5) * (bw>>6)) + (x>>6);
u32 word = basepage * 2048 + g_pageTable32[y&31][x&63];
//assert (word < 0x100000);
//word = min(word, 0xfffff);
return word;
}
@ -109,210 +119,221 @@ static __forceinline u32 getPixelAddress32_0(int x, int y, u32 bw) {
#define getPixelAddress4HH getPixelAddress32
#define getPixelAddress4HH_0 getPixelAddress32_0
static __forceinline u32 getPixelAddress16(int x, int y, u32 bp, u32 bw) {
static __forceinline u32 getPixelAddress16(int x, int y, u32 bp, u32 bw)
{
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
u32 word = bp * 128 + basepage * 4096 + g_pageTable16[y&63][x&63];
//assert (word < 0x200000);
//word = min(word, 0x1fffff);
return word;
}
static __forceinline u32 getPixelAddress16_0(int x, int y, u32 bw) {
static __forceinline u32 getPixelAddress16_0(int x, int y, u32 bw)
{
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
u32 word = basepage * 4096 + g_pageTable16[y&63][x&63];
//assert (word < 0x200000);
//word = min(word, 0x1fffff);
return word;
}
static __forceinline u32 getPixelAddress16S(int x, int y, u32 bp, u32 bw) {
static __forceinline u32 getPixelAddress16S(int x, int y, u32 bp, u32 bw)
{
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
u32 word = bp * 128 + basepage * 4096 + g_pageTable16S[y&63][x&63];
//assert (word < 0x200000);
//word = min(word, 0x1fffff);
return word;
}
static __forceinline u32 getPixelAddress16S_0(int x, int y, u32 bw) {
static __forceinline u32 getPixelAddress16S_0(int x, int y, u32 bw)
{
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
u32 word = basepage * 4096 + g_pageTable16S[y&63][x&63];
//assert (word < 0x200000);
//word = min(word, 0x1fffff);
return word;
}
static __forceinline u32 getPixelAddress8(int x, int y, u32 bp, u32 bw) {
static __forceinline u32 getPixelAddress8(int x, int y, u32 bp, u32 bw)
{
u32 basepage = ((y>>6) * ((bw+127)>>7)) + (x>>7);
u32 word = bp * 256 + basepage * 8192 + g_pageTable8[y&63][x&127];
//assert (word < 0x400000);
//word = min(word, 0x3fffff);
return word;
}
static __forceinline u32 getPixelAddress8_0(int x, int y, u32 bw) {
static __forceinline u32 getPixelAddress8_0(int x, int y, u32 bw)
{
u32 basepage = ((y>>6) * ((bw+127)>>7)) + (x>>7);
u32 word = basepage * 8192 + g_pageTable8[y&63][x&127];
//assert (word < 0x400000);
//word = min(word, 0x3fffff);
return word;
}
static __forceinline u32 getPixelAddress4(int x, int y, u32 bp, u32 bw) {
static __forceinline u32 getPixelAddress4(int x, int y, u32 bp, u32 bw)
{
u32 basepage = ((y>>7) * ((bw+127)>>7)) + (x>>7);
u32 word = bp * 512 + basepage * 16384 + g_pageTable4[y&127][x&127];
//assert (word < 0x800000);
//word = min(word, 0x7fffff);
return word;
}
static __forceinline u32 getPixelAddress4_0(int x, int y, u32 bw) {
static __forceinline u32 getPixelAddress4_0(int x, int y, u32 bw)
{
u32 basepage = ((y>>7) * ((bw+127)>>7)) + (x>>7);
u32 word = basepage * 16384 + g_pageTable4[y&127][x&127];
//assert (word < 0x800000);
//word = min(word, 0x7fffff);
return word;
}
static __forceinline u32 getPixelAddress32Z(int x, int y, u32 bp, u32 bw) {
static __forceinline u32 getPixelAddress32Z(int x, int y, u32 bp, u32 bw)
{
u32 basepage = ((y>>5) * (bw>>6)) + (x>>6);
u32 word = bp * 64 + basepage * 2048 + g_pageTable32Z[y&31][x&63];
//assert (word < 0x100000);
//word = min(word, 0xfffff);
return word;
}
static __forceinline u32 getPixelAddress32Z_0(int x, int y, u32 bw) {
static __forceinline u32 getPixelAddress32Z_0(int x, int y, u32 bw)
{
u32 basepage = ((y>>5) * (bw>>6)) + (x>>6);
u32 word = basepage * 2048 + g_pageTable32Z[y&31][x&63];
//assert (word < 0x100000);
//word = min(word, 0xfffff);
return word;
}
#define getPixelAddress24Z getPixelAddress32Z
#define getPixelAddress24Z_0 getPixelAddress32Z_0
static __forceinline u32 getPixelAddress16Z(int x, int y, u32 bp, u32 bw) {
static __forceinline u32 getPixelAddress16Z(int x, int y, u32 bp, u32 bw)
{
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
u32 word = bp * 128 + basepage * 4096 + g_pageTable16Z[y&63][x&63];
//assert (word < 0x200000);
//word = min(word, 0x1fffff);
return word;
}
static __forceinline u32 getPixelAddress16Z_0(int x, int y, u32 bw) {
static __forceinline u32 getPixelAddress16Z_0(int x, int y, u32 bw)
{
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
u32 word = basepage * 4096 + g_pageTable16Z[y&63][x&63];
//assert (word < 0x200000);
//word = min(word, 0x1fffff);
return word;
}
static __forceinline u32 getPixelAddress16SZ(int x, int y, u32 bp, u32 bw) {
static __forceinline u32 getPixelAddress16SZ(int x, int y, u32 bp, u32 bw)
{
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
u32 word = bp * 128 + basepage * 4096 + g_pageTable16SZ[y&63][x&63];
//assert (word < 0x200000);
//word = min(word, 0x1fffff);
return word;
}
static __forceinline u32 getPixelAddress16SZ_0(int x, int y, u32 bw) {
static __forceinline u32 getPixelAddress16SZ_0(int x, int y, u32 bw)
{
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
u32 word = basepage * 4096 + g_pageTable16SZ[y&63][x&63];
//assert (word < 0x200000);
//word = min(word, 0x1fffff);
return word;
}
static __forceinline void writePixel32(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
#define getPixelAddress_0(psm,x,y,bw) getPixelAddress##psm##_0(x,y,bw)
#define getPixelAddress(psm,x,y,bp,bw) getPixelAddress##psm##(x,y,bp,bw)
static __forceinline void writePixel32(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
{
((u32*)pmem)[getPixelAddress32(x, y, bp, bw)] = pixel;
}
static __forceinline void writePixel24(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
static __forceinline void writePixel24(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
{
u8 *buf = (u8*)&((u32*)pmem)[getPixelAddress32(x, y, bp, bw)];
u8 *pix = (u8*)&pixel;
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
}
static __forceinline void writePixel16(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
static __forceinline void writePixel16(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
{
((u16*)pmem)[getPixelAddress16(x, y, bp, bw)] = pixel;
}
static __forceinline void writePixel16S(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
static __forceinline void writePixel16S(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
{
((u16*)pmem)[getPixelAddress16S(x, y, bp, bw)] = pixel;
}
static __forceinline void writePixel8(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
static __forceinline void writePixel8(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
{
((u8*)pmem)[getPixelAddress8(x, y, bp, bw)] = pixel;
}
static __forceinline void writePixel8H(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
static __forceinline void writePixel8H(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
{
((u8*)pmem)[4*getPixelAddress32(x, y, bp, bw)+3] = pixel;
}
static __forceinline void writePixel4(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
static __forceinline void writePixel4(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
{
u32 addr = getPixelAddress4(x, y, bp, bw);
u8 pix = ((u8*)pmem)[addr/2];
if (addr & 0x1) ((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4);
else ((u8*)pmem)[addr/2] = (pix & 0xf0) | (pixel);
}
static __forceinline void writePixel4HL(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
static __forceinline void writePixel4HL(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
{
u8 *p = (u8*)pmem + 4*getPixelAddress4HL(x, y, bp, bw)+3;
*p = (*p & 0xf0) | pixel;
}
static __forceinline void writePixel4HH(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
static __forceinline void writePixel4HH(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
{
u8 *p = (u8*)pmem + 4*getPixelAddress4HH(x, y, bp, bw)+3;
*p = (*p & 0x0f) | (pixel<<4);
}
static __forceinline void writePixel32Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
static __forceinline void writePixel32Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
{
((u32*)pmem)[getPixelAddress32Z(x, y, bp, bw)] = pixel;
}
static __forceinline void writePixel24Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
static __forceinline void writePixel24Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
{
u8 *buf = (u8*)pmem + 4*getPixelAddress32Z(x, y, bp, bw);
u8 *pix = (u8*)&pixel;
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
}
static __forceinline void writePixel16Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
static __forceinline void writePixel16Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
{
((u16*)pmem)[getPixelAddress16Z(x, y, bp, bw)] = pixel;
}
static __forceinline void writePixel16SZ(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
static __forceinline void writePixel16SZ(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
{
((u16*)pmem)[getPixelAddress16SZ(x, y, bp, bw)] = pixel;
}
///////////////
static __forceinline u32 readPixel32(const void* pmem, int x, int y, u32 bp, u32 bw) {
static __forceinline u32 readPixel32(const void* pmem, int x, int y, u32 bp, u32 bw)
{
return ((const u32*)pmem)[getPixelAddress32(x, y, bp, bw)];
}
static __forceinline u32 readPixel24(const void* pmem, int x, int y, u32 bp, u32 bw) {
static __forceinline u32 readPixel24(const void* pmem, int x, int y, u32 bp, u32 bw)
{
return ((const u32*)pmem)[getPixelAddress32(x, y, bp, bw)] & 0xffffff;
}
static __forceinline u32 readPixel16(const void* pmem, int x, int y, u32 bp, u32 bw) {
static __forceinline u32 readPixel16(const void* pmem, int x, int y, u32 bp, u32 bw)
{
return ((const u16*)pmem)[getPixelAddress16(x, y, bp, bw)];
}
static __forceinline u32 readPixel16S(const void* pmem, int x, int y, u32 bp, u32 bw) {
static __forceinline u32 readPixel16S(const void* pmem, int x, int y, u32 bp, u32 bw)
{
return ((const u16*)pmem)[getPixelAddress16S(x, y, bp, bw)];
}
static __forceinline u32 readPixel8(const void* pmem, int x, int y, u32 bp, u32 bw) {
static __forceinline u32 readPixel8(const void* pmem, int x, int y, u32 bp, u32 bw)
{
return ((const u8*)pmem)[getPixelAddress8(x, y, bp, bw)];
}
static __forceinline u32 readPixel8H(const void* pmem, int x, int y, u32 bp, u32 bw) {
static __forceinline u32 readPixel8H(const void* pmem, int x, int y, u32 bp, u32 bw)
{
return ((const u8*)pmem)[4*getPixelAddress32(x, y, bp, bw) + 3];
}
static __forceinline u32 readPixel4(const void* pmem, int x, int y, u32 bp, u32 bw) {
static __forceinline u32 readPixel4(const void* pmem, int x, int y, u32 bp, u32 bw)
{
u32 addr = getPixelAddress4(x, y, bp, bw);
u8 pix = ((const u8*)pmem)[addr/2];
if (addr & 0x1)
@ -320,31 +341,37 @@ static __forceinline u32 readPixel4(const void* pmem, int x, int y, u32 bp, u32
else return pix & 0xf;
}
static __forceinline u32 readPixel4HL(const void* pmem, int x, int y, u32 bp, u32 bw) {
static __forceinline u32 readPixel4HL(const void* pmem, int x, int y, u32 bp, u32 bw)
{
const u8 *p = (const u8*)pmem+4*getPixelAddress4HL(x, y, bp, bw)+3;
return *p & 0x0f;
}
static __forceinline u32 readPixel4HH(const void* pmem, int x, int y, u32 bp, u32 bw) {
static __forceinline u32 readPixel4HH(const void* pmem, int x, int y, u32 bp, u32 bw)
{
const u8 *p = (const u8*)pmem+4*getPixelAddress4HH(x, y, bp, bw) + 3;
return *p >> 4;
}
///////////////
static __forceinline u32 readPixel32Z(const void* pmem, int x, int y, u32 bp, u32 bw) {
static __forceinline u32 readPixel32Z(const void* pmem, int x, int y, u32 bp, u32 bw)
{
return ((const u32*)pmem)[getPixelAddress32Z(x, y, bp, bw)];
}
static __forceinline u32 readPixel24Z(const void* pmem, int x, int y, u32 bp, u32 bw) {
static __forceinline u32 readPixel24Z(const void* pmem, int x, int y, u32 bp, u32 bw)
{
return ((const u32*)pmem)[getPixelAddress32Z(x, y, bp, bw)] & 0xffffff;
}
static __forceinline u32 readPixel16Z(const void* pmem, int x, int y, u32 bp, u32 bw) {
static __forceinline u32 readPixel16Z(const void* pmem, int x, int y, u32 bp, u32 bw)
{
return ((const u16*)pmem)[getPixelAddress16Z(x, y, bp, bw)];
}
static __forceinline u32 readPixel16SZ(const void* pmem, int x, int y, u32 bp, u32 bw) {
static __forceinline u32 readPixel16SZ(const void* pmem, int x, int y, u32 bp, u32 bw)
{
return ((const u16*)pmem)[getPixelAddress16SZ(x, y, bp, bw)];
}
@ -352,135 +379,154 @@ static __forceinline u32 readPixel16SZ(const void* pmem, int x, int y, u32 bp,
// Functions that take 0 bps //
///////////////////////////////
static __forceinline void writePixel32_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
static __forceinline void writePixel32_0(void* pmem, int x, int y, u32 pixel, u32 bw)
{
((u32*)pmem)[getPixelAddress32_0(x, y, bw)] = pixel;
}
static __forceinline void writePixel24_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
static __forceinline void writePixel24_0(void* pmem, int x, int y, u32 pixel, u32 bw)
{
u8 *buf = (u8*)&((u32*)pmem)[getPixelAddress32_0(x, y, bw)];
u8 *pix = (u8*)&pixel;
#if defined(_MSC_VER) && defined(__x86_64__)
memcpy(buf, pix, 3);
#else
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
#endif
}
static __forceinline void writePixel16_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
static __forceinline void writePixel16_0(void* pmem, int x, int y, u32 pixel, u32 bw)
{
((u16*)pmem)[getPixelAddress16_0(x, y, bw)] = pixel;
}
static __forceinline void writePixel16S_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
static __forceinline void writePixel16S_0(void* pmem, int x, int y, u32 pixel, u32 bw)
{
((u16*)pmem)[getPixelAddress16S_0(x, y, bw)] = pixel;
}
static __forceinline void writePixel8_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
static __forceinline void writePixel8_0(void* pmem, int x, int y, u32 pixel, u32 bw)
{
((u8*)pmem)[getPixelAddress8_0(x, y, bw)] = pixel;
}
static __forceinline void writePixel8H_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
static __forceinline void writePixel8H_0(void* pmem, int x, int y, u32 pixel, u32 bw)
{
((u8*)pmem)[4*getPixelAddress32_0(x, y, bw)+3] = pixel;
}
static __forceinline void writePixel4_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
static __forceinline void writePixel4_0(void* pmem, int x, int y, u32 pixel, u32 bw)
{
u32 addr = getPixelAddress4_0(x, y, bw);
u8 pix = ((u8*)pmem)[addr/2];
if (addr & 0x1) ((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4);
else ((u8*)pmem)[addr/2] = (pix & 0xf0) | (pixel);
}
static __forceinline void writePixel4HL_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
static __forceinline void writePixel4HL_0(void* pmem, int x, int y, u32 pixel, u32 bw)
{
u8 *p = (u8*)pmem + 4*getPixelAddress4HL_0(x, y, bw)+3;
*p = (*p & 0xf0) | pixel;
}
static __forceinline void writePixel4HH_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
static __forceinline void writePixel4HH_0(void* pmem, int x, int y, u32 pixel, u32 bw)
{
u8 *p = (u8*)pmem + 4*getPixelAddress4HH_0(x, y, bw)+3;
*p = (*p & 0x0f) | (pixel<<4);
}
static __forceinline void writePixel32Z_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
static __forceinline void writePixel32Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
{
((u32*)pmem)[getPixelAddress32Z_0(x, y, bw)] = pixel;
}
static __forceinline void writePixel24Z_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
static __forceinline void writePixel24Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
{
u8 *buf = (u8*)pmem + 4*getPixelAddress32Z_0(x, y, bw);
u8 *pix = (u8*)&pixel;
#if defined(_MSC_VER) && defined(__x86_64__)
memcpy(buf, pix, 3);
#else
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
#endif
}
static __forceinline void writePixel16Z_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
static __forceinline void writePixel16Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
{
((u16*)pmem)[getPixelAddress16Z_0(x, y, bw)] = pixel;
}
static __forceinline void writePixel16SZ_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
static __forceinline void writePixel16SZ_0(void* pmem, int x, int y, u32 pixel, u32 bw)
{
((u16*)pmem)[getPixelAddress16SZ_0(x, y, bw)] = pixel;
}
///////////////
static __forceinline u32 readPixel32_0(const void* pmem, int x, int y, u32 bw) {
static __forceinline u32 readPixel32_0(const void* pmem, int x, int y, u32 bw)
{
return ((const u32*)pmem)[getPixelAddress32_0(x, y, bw)];
}
static __forceinline u32 readPixel24_0(const void* pmem, int x, int y, u32 bw) {
static __forceinline u32 readPixel24_0(const void* pmem, int x, int y, u32 bw)
{
return ((const u32*)pmem)[getPixelAddress32_0(x, y, bw)] & 0xffffff;
}
static __forceinline u32 readPixel16_0(const void* pmem, int x, int y, u32 bw) {
static __forceinline u32 readPixel16_0(const void* pmem, int x, int y, u32 bw)
{
return ((const u16*)pmem)[getPixelAddress16_0(x, y, bw)];
}
static __forceinline u32 readPixel16S_0(const void* pmem, int x, int y, u32 bw) {
static __forceinline u32 readPixel16S_0(const void* pmem, int x, int y, u32 bw)
{
return ((const u16*)pmem)[getPixelAddress16S_0(x, y, bw)];
}
static __forceinline u32 readPixel8_0(const void* pmem, int x, int y, u32 bw) {
static __forceinline u32 readPixel8_0(const void* pmem, int x, int y, u32 bw)
{
return ((const u8*)pmem)[getPixelAddress8_0(x, y, bw)];
}
static __forceinline u32 readPixel8H_0(const void* pmem, int x, int y, u32 bw) {
static __forceinline u32 readPixel8H_0(const void* pmem, int x, int y, u32 bw)
{
return ((const u8*)pmem)[4*getPixelAddress32_0(x, y, bw) + 3];
}
static __forceinline u32 readPixel4_0(const void* pmem, int x, int y, u32 bw) {
static __forceinline u32 readPixel4_0(const void* pmem, int x, int y, u32 bw)
{
u32 addr = getPixelAddress4_0(x, y, bw);
u8 pix = ((const u8*)pmem)[addr/2];
if (addr & 0x1)
return pix >> 4;
else return pix & 0xf;
return pix >> 4;
else
return pix & 0xf;
}
static __forceinline u32 readPixel4HL_0(const void* pmem, int x, int y, u32 bw) {
static __forceinline u32 readPixel4HL_0(const void* pmem, int x, int y, u32 bw)
{
const u8 *p = (const u8*)pmem+4*getPixelAddress4HL_0(x, y, bw)+3;
return *p & 0x0f;
}
static __forceinline u32 readPixel4HH_0(const void* pmem, int x, int y, u32 bw) {
static __forceinline u32 readPixel4HH_0(const void* pmem, int x, int y, u32 bw)
{
const u8 *p = (const u8*)pmem+4*getPixelAddress4HH_0(x, y, bw) + 3;
return *p >> 4;
}
///////////////
static __forceinline u32 readPixel32Z_0(const void* pmem, int x, int y, u32 bw) {
static __forceinline u32 readPixel32Z_0(const void* pmem, int x, int y, u32 bw)
{
return ((const u32*)pmem)[getPixelAddress32Z_0(x, y, bw)];
}
static __forceinline u32 readPixel24Z_0(const void* pmem, int x, int y, u32 bw) {
static __forceinline u32 readPixel24Z_0(const void* pmem, int x, int y, u32 bw)
{
return ((const u32*)pmem)[getPixelAddress32Z_0(x, y, bw)] & 0xffffff;
}
static __forceinline u32 readPixel16Z_0(const void* pmem, int x, int y, u32 bw) {
static __forceinline u32 readPixel16Z_0(const void* pmem, int x, int y, u32 bw)
{
return ((const u16*)pmem)[getPixelAddress16Z_0(x, y, bw)];
}
static __forceinline u32 readPixel16SZ_0(const void* pmem, int x, int y, u32 bw) {
static __forceinline u32 readPixel16SZ_0(const void* pmem, int x, int y, u32 bw)
{
return ((const u16*)pmem)[getPixelAddress16SZ_0(x, y, bw)];
}

View File

@ -0,0 +1,123 @@
/* ZeroGS KOSMOS
* Copyright (C) 2005-2006 zerofrog@gmail.com
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef MEM_SWIZZLE_H_INCLUDED
#define MEM_SWIZZLE_H_INCLUDED
#include "GS.h"
#include "Mem.h"
// special swizzle macros - which I converted to functions.
static __forceinline void SwizzleBlock24(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff)
{
u8* pnewsrc = src;
u32* pblock = tempblock;
for(int by = 0; by < 7; ++by, pblock += 8, pnewsrc += pitch-24)
{
for(int bx = 0; bx < 8; ++bx, pnewsrc += 3)
{
pblock[bx] = *(u32*)pnewsrc;
}
}
for(int bx = 0; bx < 7; ++bx, pnewsrc += 3)
{
/* might be 1 byte out of bounds of GS memory */
pblock[bx] = *(u32*)pnewsrc;
}
/* do 3 bytes for the last copy */
*((u8*)pblock+28) = pnewsrc[0];
*((u8*)pblock+29) = pnewsrc[1];
*((u8*)pblock+30) = pnewsrc[2];
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0x00ffffff);
}
#define SwizzleBlock24u SwizzleBlock24
static __forceinline void SwizzleBlock8H(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff)
{
u8* pnewsrc = src;
u32* pblock = tempblock;
for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch)
{
u32 u = *(u32*)pnewsrc;
pblock[0] = u<<24;
pblock[1] = u<<16;
pblock[2] = u<<8;
pblock[3] = u;
u = *(u32*)(pnewsrc+4);
pblock[4] = u<<24;
pblock[5] = u<<16;
pblock[6] = u<<8;
pblock[7] = u;
}
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0xff000000);
}
#define SwizzleBlock8Hu SwizzleBlock8H
static __forceinline void SwizzleBlock4HH(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff)
{
u8* pnewsrc = src;
u32* pblock = tempblock;
for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch)
{
u32 u = *(u32*)pnewsrc;
pblock[0] = u<<28;
pblock[1] = u<<24;
pblock[2] = u<<20;
pblock[3] = u<<16;
pblock[4] = u<<12;
pblock[5] = u<<8;
pblock[6] = u<<4;
pblock[7] = u;
}
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0xf0000000);
}
#define SwizzleBlock4HHu SwizzleBlock4HH
static __forceinline void SwizzleBlock4HL(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff)
{
u8* pnewsrc = src;
u32* pblock = tempblock;
for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch)
{
u32 u = *(u32*)pnewsrc;
pblock[0] = u<<24;
pblock[1] = u<<20;
pblock[2] = u<<16;
pblock[3] = u<<12;
pblock[4] = u<<8;
pblock[5] = u<<4;
pblock[6] = u;
pblock[7] = u>>4;
}
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0x0f000000);
}
#define SwizzleBlock4HLu SwizzleBlock4HL
#endif // MEM_SWIZZLE_H_INCLUDED

View File

@ -0,0 +1,236 @@
/* ZeroGS KOSMOS
* Copyright (C) 2005-2006 zerofrog@gmail.com
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "GS.h"
u32 g_blockTable32[4][8] = {
{ 0, 1, 4, 5, 16, 17, 20, 21},
{ 2, 3, 6, 7, 18, 19, 22, 23},
{ 8, 9, 12, 13, 24, 25, 28, 29},
{ 10, 11, 14, 15, 26, 27, 30, 31}
};
u32 g_blockTable32Z[4][8] = {
{ 24, 25, 28, 29, 8, 9, 12, 13},
{ 26, 27, 30, 31, 10, 11, 14, 15},
{ 16, 17, 20, 21, 0, 1, 4, 5},
{ 18, 19, 22, 23, 2, 3, 6, 7}
};
u32 g_blockTable16[8][4] = {
{ 0, 2, 8, 10 },
{ 1, 3, 9, 11 },
{ 4, 6, 12, 14 },
{ 5, 7, 13, 15 },
{ 16, 18, 24, 26 },
{ 17, 19, 25, 27 },
{ 20, 22, 28, 30 },
{ 21, 23, 29, 31 }
};
u32 g_blockTable16S[8][4] = {
{ 0, 2, 16, 18 },
{ 1, 3, 17, 19 },
{ 8, 10, 24, 26 },
{ 9, 11, 25, 27 },
{ 4, 6, 20, 22 },
{ 5, 7, 21, 23 },
{ 12, 14, 28, 30 },
{ 13, 15, 29, 31 }
};
u32 g_blockTable16Z[8][4] = {
{ 24, 26, 16, 18 },
{ 25, 27, 17, 19 },
{ 28, 30, 20, 22 },
{ 29, 31, 21, 23 },
{ 8, 10, 0, 2 },
{ 9, 11, 1, 3 },
{ 12, 14, 4, 6 },
{ 13, 15, 5, 7 }
};
u32 g_blockTable16SZ[8][4] = {
{ 24, 26, 8, 10 },
{ 25, 27, 9, 11 },
{ 16, 18, 0, 2 },
{ 17, 19, 1, 3 },
{ 28, 30, 12, 14 },
{ 29, 31, 13, 15 },
{ 20, 22, 4, 6 },
{ 21, 23, 5, 7 }
};
u32 g_blockTable8[4][8] = {
{ 0, 1, 4, 5, 16, 17, 20, 21},
{ 2, 3, 6, 7, 18, 19, 22, 23},
{ 8, 9, 12, 13, 24, 25, 28, 29},
{ 10, 11, 14, 15, 26, 27, 30, 31}
};
u32 g_blockTable4[8][4] = {
{ 0, 2, 8, 10 },
{ 1, 3, 9, 11 },
{ 4, 6, 12, 14 },
{ 5, 7, 13, 15 },
{ 16, 18, 24, 26 },
{ 17, 19, 25, 27 },
{ 20, 22, 28, 30 },
{ 21, 23, 29, 31 }
};
u32 g_columnTable32[8][8] = {
{ 0, 1, 4, 5, 8, 9, 12, 13 },
{ 2, 3, 6, 7, 10, 11, 14, 15 },
{ 16, 17, 20, 21, 24, 25, 28, 29 },
{ 18, 19, 22, 23, 26, 27, 30, 31 },
{ 32, 33, 36, 37, 40, 41, 44, 45 },
{ 34, 35, 38, 39, 42, 43, 46, 47 },
{ 48, 49, 52, 53, 56, 57, 60, 61 },
{ 50, 51, 54, 55, 58, 59, 62, 63 },
};
u32 g_columnTable16[8][16] = {
{ 0, 2, 8, 10, 16, 18, 24, 26,
1, 3, 9, 11, 17, 19, 25, 27 },
{ 4, 6, 12, 14, 20, 22, 28, 30,
5, 7, 13, 15, 21, 23, 29, 31 },
{ 32, 34, 40, 42, 48, 50, 56, 58,
33, 35, 41, 43, 49, 51, 57, 59 },
{ 36, 38, 44, 46, 52, 54, 60, 62,
37, 39, 45, 47, 53, 55, 61, 63 },
{ 64, 66, 72, 74, 80, 82, 88, 90,
65, 67, 73, 75, 81, 83, 89, 91 },
{ 68, 70, 76, 78, 84, 86, 92, 94,
69, 71, 77, 79, 85, 87, 93, 95 },
{ 96, 98, 104, 106, 112, 114, 120, 122,
97, 99, 105, 107, 113, 115, 121, 123 },
{ 100, 102, 108, 110, 116, 118, 124, 126,
101, 103, 109, 111, 117, 119, 125, 127 },
};
u32 g_columnTable8[16][16] = {
{ 0, 4, 16, 20, 32, 36, 48, 52, // column 0
2, 6, 18, 22, 34, 38, 50, 54 },
{ 8, 12, 24, 28, 40, 44, 56, 60,
10, 14, 26, 30, 42, 46, 58, 62 },
{ 33, 37, 49, 53, 1, 5, 17, 21,
35, 39, 51, 55, 3, 7, 19, 23 },
{ 41, 45, 57, 61, 9, 13, 25, 29,
43, 47, 59, 63, 11, 15, 27, 31 },
{ 96, 100, 112, 116, 64, 68, 80, 84, // column 1
98, 102, 114, 118, 66, 70, 82, 86 },
{ 104, 108, 120, 124, 72, 76, 88, 92,
106, 110, 122, 126, 74, 78, 90, 94 },
{ 65, 69, 81, 85, 97, 101, 113, 117,
67, 71, 83, 87, 99, 103, 115, 119 },
{ 73, 77, 89, 93, 105, 109, 121, 125,
75, 79, 91, 95, 107, 111, 123, 127 },
{ 128, 132, 144, 148, 160, 164, 176, 180, // column 2
130, 134, 146, 150, 162, 166, 178, 182 },
{ 136, 140, 152, 156, 168, 172, 184, 188,
138, 142, 154, 158, 170, 174, 186, 190 },
{ 161, 165, 177, 181, 129, 133, 145, 149,
163, 167, 179, 183, 131, 135, 147, 151 },
{ 169, 173, 185, 189, 137, 141, 153, 157,
171, 175, 187, 191, 139, 143, 155, 159 },
{ 224, 228, 240, 244, 192, 196, 208, 212, // column 3
226, 230, 242, 246, 194, 198, 210, 214 },
{ 232, 236, 248, 252, 200, 204, 216, 220,
234, 238, 250, 254, 202, 206, 218, 222 },
{ 193, 197, 209, 213, 225, 229, 241, 245,
195, 199, 211, 215, 227, 231, 243, 247 },
{ 201, 205, 217, 221, 233, 237, 249, 253,
203, 207, 219, 223, 235, 239, 251, 255 },
};
u32 g_columnTable4[16][32] = {
{ 0, 8, 32, 40, 64, 72, 96, 104, // column 0
2, 10, 34, 42, 66, 74, 98, 106,
4, 12, 36, 44, 68, 76, 100, 108,
6, 14, 38, 46, 70, 78, 102, 110 },
{ 16, 24, 48, 56, 80, 88, 112, 120,
18, 26, 50, 58, 82, 90, 114, 122,
20, 28, 52, 60, 84, 92, 116, 124,
22, 30, 54, 62, 86, 94, 118, 126 },
{ 65, 73, 97, 105, 1, 9, 33, 41,
67, 75, 99, 107, 3, 11, 35, 43,
69, 77, 101, 109, 5, 13, 37, 45,
71, 79, 103, 111, 7, 15, 39, 47 },
{ 81, 89, 113, 121, 17, 25, 49, 57,
83, 91, 115, 123, 19, 27, 51, 59,
85, 93, 117, 125, 21, 29, 53, 61,
87, 95, 119, 127, 23, 31, 55, 63 },
{ 192, 200, 224, 232, 128, 136, 160, 168, // column 1
194, 202, 226, 234, 130, 138, 162, 170,
196, 204, 228, 236, 132, 140, 164, 172,
198, 206, 230, 238, 134, 142, 166, 174 },
{ 208, 216, 240, 248, 144, 152, 176, 184,
210, 218, 242, 250, 146, 154, 178, 186,
212, 220, 244, 252, 148, 156, 180, 188,
214, 222, 246, 254, 150, 158, 182, 190 },
{ 129, 137, 161, 169, 193, 201, 225, 233,
131, 139, 163, 171, 195, 203, 227, 235,
133, 141, 165, 173, 197, 205, 229, 237,
135, 143, 167, 175, 199, 207, 231, 239 },
{ 145, 153, 177, 185, 209, 217, 241, 249,
147, 155, 179, 187, 211, 219, 243, 251,
149, 157, 181, 189, 213, 221, 245, 253,
151, 159, 183, 191, 215, 223, 247, 255 },
{ 256, 264, 288, 296, 320, 328, 352, 360, // column 2
258, 266, 290, 298, 322, 330, 354, 362,
260, 268, 292, 300, 324, 332, 356, 364,
262, 270, 294, 302, 326, 334, 358, 366 },
{ 272, 280, 304, 312, 336, 344, 368, 376,
274, 282, 306, 314, 338, 346, 370, 378,
276, 284, 308, 316, 340, 348, 372, 380,
278, 286, 310, 318, 342, 350, 374, 382 },
{ 321, 329, 353, 361, 257, 265, 289, 297,
323, 331, 355, 363, 259, 267, 291, 299,
325, 333, 357, 365, 261, 269, 293, 301,
327, 335, 359, 367, 263, 271, 295, 303 },
{ 337, 345, 369, 377, 273, 281, 305, 313,
339, 347, 371, 379, 275, 283, 307, 315,
341, 349, 373, 381, 277, 285, 309, 317,
343, 351, 375, 383, 279, 287, 311, 319 },
{ 448, 456, 480, 488, 384, 392, 416, 424, // column 3
450, 458, 482, 490, 386, 394, 418, 426,
452, 460, 484, 492, 388, 396, 420, 428,
454, 462, 486, 494, 390, 398, 422, 430 },
{ 464, 472, 496, 504, 400, 408, 432, 440,
466, 474, 498, 506, 402, 410, 434, 442,
468, 476, 500, 508, 404, 412, 436, 444,
470, 478, 502, 510, 406, 414, 438, 446 },
{ 385, 393, 417, 425, 449, 457, 481, 489,
387, 395, 419, 427, 451, 459, 483, 491,
389, 397, 421, 429, 453, 461, 485, 493,
391, 399, 423, 431, 455, 463, 487, 495 },
{ 401, 409, 433, 441, 465, 473, 497, 505,
403, 411, 435, 443, 467, 475, 499, 507,
405, 413, 437, 445, 469, 477, 501, 509,
407, 415, 439, 447, 471, 479, 503, 511 },
};
u32 g_pageTable32[32][64];
u32 g_pageTable32Z[32][64];
u32 g_pageTable16[64][64];
u32 g_pageTable16S[64][64];
u32 g_pageTable16Z[64][64];
u32 g_pageTable16SZ[64][64];
u32 g_pageTable8[64][128];
u32 g_pageTable4[128][128];

View File

@ -0,0 +1,184 @@
#ifndef MEM_TRANSMIT_H_INCLUDED
#define MEM_TRANSMIT_H_INCLUDED
#include "GS.h"
#include "Mem.h"
#define DSTPSM gs.dstbuf.psm
// transfers whole rows
#define TRANSMIT_HOSTLOCAL_Y_(psm, T, widthlimit, endY) { \
assert( (nSize%widthlimit) == 0 && widthlimit <= 4 ); \
if( (gs.imageEndX-gs.trxpos.dx)%widthlimit ) { \
/*GS_LOG("Bad Transmission! %d %d, psm: %d\n", gs.trxpos.dx, gs.imageEndX, DSTPSM);*/ \
for(; i < endY; ++i) { \
for(; j < gs.imageEndX && nSize > 0; j += 1, nSize -= 1, pbuf += 1) { \
/* write as many pixel at one time as possible */ \
writePixel##psm##_0(pstart, j%2048, i%2048, pbuf[0], gs.dstbuf.bw); \
} \
} \
} \
for(; i < endY; ++i) { \
for(; j < gs.imageEndX && nSize > 0; j += widthlimit, nSize -= widthlimit, pbuf += widthlimit) { \
/* write as many pixel at one time as possible */ \
if( nSize < widthlimit ) goto End; \
writePixel##psm##_0(pstart, j%2048, i%2048, pbuf[0], gs.dstbuf.bw); \
\
if( widthlimit > 1 ) { \
writePixel##psm##_0(pstart, (j+1)%2048, i%2048, pbuf[1], gs.dstbuf.bw); \
\
if( widthlimit > 2 ) { \
writePixel##psm##_0(pstart, (j+2)%2048, i%2048, pbuf[2], gs.dstbuf.bw); \
\
if( widthlimit > 3 ) { \
writePixel##psm##_0(pstart, (j+3)%2048, i%2048, pbuf[3], gs.dstbuf.bw); \
} \
} \
} \
} \
\
if( j >= gs.imageEndX ) { assert(j == gs.imageEndX); j = gs.trxpos.dx; } \
else { assert( gs.imageTransfer == -1 || nSize*sizeof(T)/4 == 0 ); goto End; } \
} \
} \
// transmit until endX, don't check size since it has already been prevalidated
#define TRANSMIT_HOSTLOCAL_X_(psm, T, widthlimit, blockheight, startX) { \
for(int tempi = 0; tempi < blockheight; ++tempi) { \
for(j = startX; j < gs.imageEndX; j++, pbuf++) { \
writePixel##psm##_0(pstart, j%2048, (i+tempi)%2048, pbuf[0], gs.dstbuf.bw); \
} \
pbuf += pitch-fracX; \
} \
} \
//template <class T>
//static __forceinline void TransmitHostLocalX_(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX)
//{
// for(int tempi = 0; tempi < blockheight; ++tempi)
// {
// for(j = startX; j < gs.imageEndX; j++, pbuf++)
// {
// wp(pstart, j%2048, (i+tempi)%2048, pbuf[0], gs.dstbuf.bw);
// }
// pbuf += pitch - fracX;
// }
//}
// transfers whole rows
#define TRANSMIT_HOSTLOCAL_Y_24(psm, T, widthlimit, endY) { \
if( widthlimit != 8 || ((gs.imageEndX-gs.trxpos.dx)%widthlimit) ) { \
/*GS_LOG("Bad Transmission! %d %d, psm: %d\n", gs.trxpos.dx, gs.imageEndX, DSTPSM);*/ \
for(; i < endY; ++i) { \
for(; j < gs.imageEndX && nSize > 0; j += 1, nSize -= 1, pbuf += 3) { \
writePixel##psm##_0(pstart, j%2048, i%2048, *(u32*)(pbuf), gs.dstbuf.bw); \
} \
\
if( j >= gs.imageEndX ) { assert(gs.imageTransfer == -1 || j == gs.imageEndX); j = gs.trxpos.dx; } \
else { assert( gs.imageTransfer == -1 || nSize == 0 ); goto End; } \
} \
} \
else { \
assert( /*(nSize%widthlimit) == 0 &&*/ widthlimit == 8 ); \
for(; i < endY; ++i) { \
for(; j < gs.imageEndX && nSize > 0; j += widthlimit, nSize -= widthlimit, pbuf += 3*widthlimit) { \
if( nSize < widthlimit ) goto End; \
/* write as many pixel at one time as possible */ \
writePixel##psm##_0(pstart, j%2048, i%2048, *(u32*)(pbuf+0), gs.dstbuf.bw); \
writePixel##psm##_0(pstart, (j+1)%2048, i%2048, *(u32*)(pbuf+3), gs.dstbuf.bw); \
writePixel##psm##_0(pstart, (j+2)%2048, i%2048, *(u32*)(pbuf+6), gs.dstbuf.bw); \
writePixel##psm##_0(pstart, (j+3)%2048, i%2048, *(u32*)(pbuf+9), gs.dstbuf.bw); \
writePixel##psm##_0(pstart, (j+4)%2048, i%2048, *(u32*)(pbuf+12), gs.dstbuf.bw); \
writePixel##psm##_0(pstart, (j+5)%2048, i%2048, *(u32*)(pbuf+15), gs.dstbuf.bw); \
writePixel##psm##_0(pstart, (j+6)%2048, i%2048, *(u32*)(pbuf+18), gs.dstbuf.bw); \
writePixel##psm##_0(pstart, (j+7)%2048, i%2048, *(u32*)(pbuf+21), gs.dstbuf.bw); \
} \
\
if( j >= gs.imageEndX ) { assert(gs.imageTransfer == -1 || j == gs.imageEndX); j = gs.trxpos.dx; } \
else { \
if( nSize < 0 ) { \
/* extracted too much */ \
assert( (nSize%3)==0 && nSize > -24 ); \
j += nSize/3; \
nSize = 0; \
} \
assert( gs.imageTransfer == -1 || nSize == 0 ); \
goto End; \
} \
} \
} \
} \
// transmit until endX, don't check size since it has already been prevalidated
#define TRANSMIT_HOSTLOCAL_X_24(psm, T, widthlimit, blockheight, startX) { \
for(int tempi = 0; tempi < blockheight; ++tempi) { \
for(j = startX; j < gs.imageEndX; j++, pbuf += 3) { \
writePixel##psm##_0(pstart, j%2048, (i+tempi)%2048, *(u32*)pbuf, gs.dstbuf.bw); \
} \
pbuf += 3*(pitch-fracX); \
} \
} \
// meant for 4bit transfers
#define TRANSMIT_HOSTLOCAL_Y_4(psm, T, widthlimit, endY) { \
for(; i < endY; ++i) { \
for(; j < gs.imageEndX && nSize > 0; j += widthlimit, nSize -= widthlimit) { \
/* write as many pixel at one time as possible */ \
writePixel##psm##_0(pstart, j%2048, i%2048, *pbuf&0x0f, gs.dstbuf.bw); \
writePixel##psm##_0(pstart, (j+1)%2048, i%2048, *pbuf>>4, gs.dstbuf.bw); \
pbuf++; \
if( widthlimit > 2 ) { \
writePixel##psm##_0(pstart, (j+2)%2048, i%2048, *pbuf&0x0f, gs.dstbuf.bw); \
writePixel##psm##_0(pstart, (j+3)%2048, i%2048, *pbuf>>4, gs.dstbuf.bw); \
pbuf++; \
\
if( widthlimit > 4 ) { \
writePixel##psm##_0(pstart, (j+4)%2048, i%2048, *pbuf&0x0f, gs.dstbuf.bw); \
writePixel##psm##_0(pstart, (j+5)%2048, i%2048, *pbuf>>4, gs.dstbuf.bw); \
pbuf++; \
\
if( widthlimit > 6 ) { \
writePixel##psm##_0(pstart, (j+6)%2048, i%2048, *pbuf&0x0f, gs.dstbuf.bw); \
writePixel##psm##_0(pstart, (j+7)%2048, i%2048, *pbuf>>4, gs.dstbuf.bw); \
pbuf++; \
} \
} \
} \
} \
\
if( j >= gs.imageEndX ) { j = gs.trxpos.dx; } \
else { assert( gs.imageTransfer == -1 || (nSize/32) == 0 ); goto End; } \
} \
} \
// transmit until endX, don't check size since it has already been prevalidated
#define TRANSMIT_HOSTLOCAL_X_4(psm, T, widthlimit, blockheight, startX) { \
for(int tempi = 0; tempi < blockheight; ++tempi) { \
for(j = startX; j < gs.imageEndX; j+=2, pbuf++) { \
writePixel##psm##_0(pstart, j%2048, (i+tempi)%2048, pbuf[0]&0x0f, gs.dstbuf.bw); \
writePixel##psm##_0(pstart, (j+1)%2048, (i+tempi)%2048, pbuf[0]>>4, gs.dstbuf.bw); \
} \
pbuf += (pitch-fracX)/2; \
} \
} \
#define TRANSMIT_HOSTLOCAL_X(th, psm, T, widthlimit, blockheight, startX) \
TRANSMIT_HOSTLOCAL_X##th(psm, T, widthlimit, blockheight, startX)
#define TRANSMIT_HOSTLOCAL_Y(th, psm, T, widthlimit, endY) \
TRANSMIT_HOSTLOCAL_Y##th(psm,T,widthlimit,endY)
// calculate pitch in source buffer
template <class T>
static __forceinline int TransmitPitch_(int pitch) { return (pitch * sizeof(T)); }
template <class T>
static __forceinline int TransmitPitch_24(int pitch) { return (pitch * 3); }
template <class T>
static __forceinline int TransmitPitch_4(int pitch) { return (pitch/2); }
#define TRANSMIT_PITCH_(pitch, T) TransmitPitch_<T>(pitch)
#define TRANSMIT_PITCH_24(pitch, T) TransmitPitch_24<T>(pitch)
#define TRANSMIT_PITCH_4(pitch, T) TransmitPitch_4<T>(pitch)
#endif // MEM_TRANSMIT_H_INCLUDED

View File

@ -323,6 +323,10 @@
RelativePath="..\Mem.cpp"
>
</File>
<File
RelativePath="..\Mem_Tables.cpp"
>
</File>
<File
RelativePath="..\memcpy_amd.cpp"
>
@ -439,6 +443,15 @@
RelativePath="..\Mem.h"
>
</File>
<File
RelativePath="..\Mem_Swizzle.h"
>
</File>
<File
RelativePath="..\Mem_Transmit.h"
>
</File>
</File>
<File
RelativePath="..\PS2Edefs.h"
>