mirror of https://github.com/PCSX2/pcsx2.git
zzogl-pg: Fighting with Mem.cpp. (This probably breaks things a bit. This is more of a 'save-my-progress' commit then anything. Basically, I'm expanding out a bunch of tangled macros right now, and creating a bunch of repetitive code that hopefully I'll be able to reduce afterwards. )
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2757 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
3e6b6c86c1
commit
5cfca09385
|
@ -78,6 +78,9 @@
|
|||
<Unit filename="../zerogs.glade" />
|
||||
<Unit filename="../../Mem.cpp" />
|
||||
<Unit filename="../../Mem.h" />
|
||||
<Unit filename="../../Mem_Swizzle.h" />
|
||||
<Unit filename="../../Mem_Tables.cpp" />
|
||||
<Unit filename="../../Mem_Transmit.h" />
|
||||
<Unit filename="../../Regs.cpp" />
|
||||
<Unit filename="../../Regs.h" />
|
||||
<Unit filename="../../Win32/Conf.cpp">
|
||||
|
|
|
@ -26,7 +26,9 @@ libzzoglpg_LDADD=$(libzzoglpg_a_OBJECTS)
|
|||
libzzoglpg_a_SOURCES = \
|
||||
GSmain.cpp GifTransfer.cpp memcpy_amd.cpp Regs.cpp x86.cpp zpipe.cpp Mem.cpp \
|
||||
rasterfont.cpp targets.cpp zerogs.cpp ZZoglVB.cpp ZZoglShoots.cpp ZZoglCreate.cpp \
|
||||
ZZoglShaders.cpp ZZoglCRTC.cpp ZZoglSave.cpp ZZoglFlush.cpp
|
||||
ZZoglShaders.cpp ZZoglCRTC.cpp ZZoglSave.cpp ZZoglFlush.cpp \
|
||||
Mem_Swizzle.h Mem_Tables.cpp Mem_Transmit.h
|
||||
|
||||
|
||||
libzzoglpg_a_SOURCES += x86-32.S
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -23,14 +23,26 @@
|
|||
#include <vector>
|
||||
|
||||
// works only when base is a power of 2
|
||||
#define ROUND_UPPOW2(val, base) (((val)+(base-1))&~(base-1))
|
||||
#define ROUND_DOWNPOW2(val, base) ((val)&~(base-1))
|
||||
#define MOD_POW2(val, base) ((val)&(base-1))
|
||||
static __forceinline int ROUND_UPPOW2(int val, int base) { return (((val)+(base-1))&~(base-1)); }
|
||||
static __forceinline int ROUND_DOWNPOW2(int val, int base) { return ((val)&~(base-1)); }
|
||||
static __forceinline int MOD_POW2(int val, int base) { return ((val)&(base-1)); }
|
||||
|
||||
// d3d texture dims
|
||||
#define BLOCK_TEXWIDTH 128
|
||||
#define BLOCK_TEXHEIGHT 512
|
||||
const int BLOCK_TEXWIDTH = 128;
|
||||
const int BLOCK_TEXHEIGHT = 512;
|
||||
|
||||
extern PCSX2_ALIGNED16(u32 tempblock[64]);
|
||||
|
||||
|
||||
typedef u32 ( *_getPixelAddress)(int x, int y, u32 bp, u32 bw);
|
||||
typedef u32 (*_getPixelAddress_0)(int x, int y, u32 bw);
|
||||
typedef void (*_writePixel)(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw);
|
||||
typedef void (*_writePixel_0)(void* pmem, int x, int y, u32 pixel, u32 bw);
|
||||
typedef u32 (*_readPixel)(const void* pmem, int x, int y, u32 bp, u32 bw);
|
||||
typedef u32 (*_readPixel_0)(const void* pmem, int x, int y, u32 bw);
|
||||
typedef int (*_TransferHostLocal)(const void* pbyMem, u32 nQWordSize);
|
||||
typedef void (*_TransferLocalHost)(void* pbyMem, u32 nQWordSize);
|
||||
typedef void (__fastcall *_SwizzleBlock)(u8 *dst, u8 *src, int pitch, u32 WriteMask);
|
||||
// rest not visible externally
|
||||
struct BLOCK
|
||||
{
|
||||
|
@ -46,14 +58,14 @@ struct BLOCK
|
|||
u32* blockTable;
|
||||
u32* columnTable;
|
||||
|
||||
u32 (*getPixelAddress)(int x, int y, u32 bp, u32 bw);
|
||||
u32 (*getPixelAddress_0)(int x, int y, u32 bw);
|
||||
void (*writePixel)(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw);
|
||||
void (*writePixel_0)(void* pmem, int x, int y, u32 pixel, u32 bw);
|
||||
u32 (*readPixel)(const void* pmem, int x, int y, u32 bp, u32 bw);
|
||||
u32 (*readPixel_0)(const void* pmem, int x, int y, u32 bw);
|
||||
int (*TransferHostLocal)(const void* pbyMem, u32 nQWordSize);
|
||||
void (*TransferLocalHost)(void* pbyMem, u32 nQWordSize);
|
||||
_getPixelAddress getPixelAddress;
|
||||
_getPixelAddress_0 getPixelAddress_0;
|
||||
_writePixel writePixel;
|
||||
_writePixel_0 writePixel_0;
|
||||
_readPixel readPixel;
|
||||
_readPixel_0 readPixel_0;
|
||||
_TransferHostLocal TransferHostLocal;
|
||||
_TransferLocalHost TransferLocalHost;
|
||||
|
||||
// texture must be of dims BLOCK_TEXWIDTH and BLOCK_TEXHEIGHT
|
||||
static void FillBlocks(std::vector<char>& vBlockData, std::vector<char>& vBilinearData, int floatfmt);
|
||||
|
@ -84,19 +96,17 @@ extern u32 g_pageTable16SZ[64][64];
|
|||
extern u32 g_pageTable8[64][128];
|
||||
extern u32 g_pageTable4[128][128];
|
||||
|
||||
static __forceinline u32 getPixelAddress32(int x, int y, u32 bp, u32 bw) {
|
||||
static __forceinline u32 getPixelAddress32(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>5) * (bw>>6)) + (x>>6);
|
||||
u32 word = bp * 64 + basepage * 2048 + g_pageTable32[y&31][x&63];
|
||||
//assert (word < 0x100000);
|
||||
//word = min(word, 0xfffff);
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress32_0(int x, int y, u32 bw) {
|
||||
static __forceinline u32 getPixelAddress32_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>5) * (bw>>6)) + (x>>6);
|
||||
u32 word = basepage * 2048 + g_pageTable32[y&31][x&63];
|
||||
//assert (word < 0x100000);
|
||||
//word = min(word, 0xfffff);
|
||||
return word;
|
||||
}
|
||||
|
||||
|
@ -109,210 +119,221 @@ static __forceinline u32 getPixelAddress32_0(int x, int y, u32 bw) {
|
|||
#define getPixelAddress4HH getPixelAddress32
|
||||
#define getPixelAddress4HH_0 getPixelAddress32_0
|
||||
|
||||
static __forceinline u32 getPixelAddress16(int x, int y, u32 bp, u32 bw) {
|
||||
static __forceinline u32 getPixelAddress16(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
|
||||
u32 word = bp * 128 + basepage * 4096 + g_pageTable16[y&63][x&63];
|
||||
//assert (word < 0x200000);
|
||||
//word = min(word, 0x1fffff);
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress16_0(int x, int y, u32 bw) {
|
||||
static __forceinline u32 getPixelAddress16_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
|
||||
u32 word = basepage * 4096 + g_pageTable16[y&63][x&63];
|
||||
//assert (word < 0x200000);
|
||||
//word = min(word, 0x1fffff);
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress16S(int x, int y, u32 bp, u32 bw) {
|
||||
static __forceinline u32 getPixelAddress16S(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
|
||||
u32 word = bp * 128 + basepage * 4096 + g_pageTable16S[y&63][x&63];
|
||||
//assert (word < 0x200000);
|
||||
//word = min(word, 0x1fffff);
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress16S_0(int x, int y, u32 bw) {
|
||||
static __forceinline u32 getPixelAddress16S_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
|
||||
u32 word = basepage * 4096 + g_pageTable16S[y&63][x&63];
|
||||
//assert (word < 0x200000);
|
||||
//word = min(word, 0x1fffff);
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress8(int x, int y, u32 bp, u32 bw) {
|
||||
static __forceinline u32 getPixelAddress8(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>6) * ((bw+127)>>7)) + (x>>7);
|
||||
u32 word = bp * 256 + basepage * 8192 + g_pageTable8[y&63][x&127];
|
||||
//assert (word < 0x400000);
|
||||
//word = min(word, 0x3fffff);
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress8_0(int x, int y, u32 bw) {
|
||||
static __forceinline u32 getPixelAddress8_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>6) * ((bw+127)>>7)) + (x>>7);
|
||||
u32 word = basepage * 8192 + g_pageTable8[y&63][x&127];
|
||||
//assert (word < 0x400000);
|
||||
//word = min(word, 0x3fffff);
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress4(int x, int y, u32 bp, u32 bw) {
|
||||
static __forceinline u32 getPixelAddress4(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>7) * ((bw+127)>>7)) + (x>>7);
|
||||
u32 word = bp * 512 + basepage * 16384 + g_pageTable4[y&127][x&127];
|
||||
//assert (word < 0x800000);
|
||||
//word = min(word, 0x7fffff);
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress4_0(int x, int y, u32 bw) {
|
||||
static __forceinline u32 getPixelAddress4_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>7) * ((bw+127)>>7)) + (x>>7);
|
||||
u32 word = basepage * 16384 + g_pageTable4[y&127][x&127];
|
||||
//assert (word < 0x800000);
|
||||
//word = min(word, 0x7fffff);
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress32Z(int x, int y, u32 bp, u32 bw) {
|
||||
static __forceinline u32 getPixelAddress32Z(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>5) * (bw>>6)) + (x>>6);
|
||||
u32 word = bp * 64 + basepage * 2048 + g_pageTable32Z[y&31][x&63];
|
||||
//assert (word < 0x100000);
|
||||
//word = min(word, 0xfffff);
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress32Z_0(int x, int y, u32 bw) {
|
||||
static __forceinline u32 getPixelAddress32Z_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>5) * (bw>>6)) + (x>>6);
|
||||
u32 word = basepage * 2048 + g_pageTable32Z[y&31][x&63];
|
||||
//assert (word < 0x100000);
|
||||
//word = min(word, 0xfffff);
|
||||
return word;
|
||||
}
|
||||
|
||||
#define getPixelAddress24Z getPixelAddress32Z
|
||||
#define getPixelAddress24Z_0 getPixelAddress32Z_0
|
||||
|
||||
static __forceinline u32 getPixelAddress16Z(int x, int y, u32 bp, u32 bw) {
|
||||
static __forceinline u32 getPixelAddress16Z(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
|
||||
u32 word = bp * 128 + basepage * 4096 + g_pageTable16Z[y&63][x&63];
|
||||
//assert (word < 0x200000);
|
||||
//word = min(word, 0x1fffff);
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress16Z_0(int x, int y, u32 bw) {
|
||||
static __forceinline u32 getPixelAddress16Z_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
|
||||
u32 word = basepage * 4096 + g_pageTable16Z[y&63][x&63];
|
||||
//assert (word < 0x200000);
|
||||
//word = min(word, 0x1fffff);
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress16SZ(int x, int y, u32 bp, u32 bw) {
|
||||
static __forceinline u32 getPixelAddress16SZ(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
|
||||
u32 word = bp * 128 + basepage * 4096 + g_pageTable16SZ[y&63][x&63];
|
||||
//assert (word < 0x200000);
|
||||
//word = min(word, 0x1fffff);
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress16SZ_0(int x, int y, u32 bw) {
|
||||
static __forceinline u32 getPixelAddress16SZ_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
|
||||
u32 word = basepage * 4096 + g_pageTable16SZ[y&63][x&63];
|
||||
//assert (word < 0x200000);
|
||||
//word = min(word, 0x1fffff);
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel32(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
|
||||
#define getPixelAddress_0(psm,x,y,bw) getPixelAddress##psm##_0(x,y,bw)
|
||||
#define getPixelAddress(psm,x,y,bp,bw) getPixelAddress##psm##(x,y,bp,bw)
|
||||
|
||||
|
||||
|
||||
static __forceinline void writePixel32(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
{
|
||||
((u32*)pmem)[getPixelAddress32(x, y, bp, bw)] = pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel24(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
|
||||
static __forceinline void writePixel24(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
{
|
||||
u8 *buf = (u8*)&((u32*)pmem)[getPixelAddress32(x, y, bp, bw)];
|
||||
u8 *pix = (u8*)&pixel;
|
||||
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
|
||||
}
|
||||
|
||||
static __forceinline void writePixel16(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
|
||||
static __forceinline void writePixel16(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
{
|
||||
((u16*)pmem)[getPixelAddress16(x, y, bp, bw)] = pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel16S(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
|
||||
static __forceinline void writePixel16S(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
{
|
||||
((u16*)pmem)[getPixelAddress16S(x, y, bp, bw)] = pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel8(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
|
||||
static __forceinline void writePixel8(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
{
|
||||
((u8*)pmem)[getPixelAddress8(x, y, bp, bw)] = pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel8H(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
|
||||
static __forceinline void writePixel8H(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
{
|
||||
((u8*)pmem)[4*getPixelAddress32(x, y, bp, bw)+3] = pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel4(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
|
||||
static __forceinline void writePixel4(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
{
|
||||
u32 addr = getPixelAddress4(x, y, bp, bw);
|
||||
u8 pix = ((u8*)pmem)[addr/2];
|
||||
if (addr & 0x1) ((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4);
|
||||
else ((u8*)pmem)[addr/2] = (pix & 0xf0) | (pixel);
|
||||
}
|
||||
|
||||
static __forceinline void writePixel4HL(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
|
||||
static __forceinline void writePixel4HL(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
{
|
||||
u8 *p = (u8*)pmem + 4*getPixelAddress4HL(x, y, bp, bw)+3;
|
||||
*p = (*p & 0xf0) | pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel4HH(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
|
||||
static __forceinline void writePixel4HH(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
{
|
||||
u8 *p = (u8*)pmem + 4*getPixelAddress4HH(x, y, bp, bw)+3;
|
||||
*p = (*p & 0x0f) | (pixel<<4);
|
||||
}
|
||||
|
||||
static __forceinline void writePixel32Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
|
||||
static __forceinline void writePixel32Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
{
|
||||
((u32*)pmem)[getPixelAddress32Z(x, y, bp, bw)] = pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel24Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
|
||||
static __forceinline void writePixel24Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
{
|
||||
u8 *buf = (u8*)pmem + 4*getPixelAddress32Z(x, y, bp, bw);
|
||||
u8 *pix = (u8*)&pixel;
|
||||
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
|
||||
}
|
||||
|
||||
static __forceinline void writePixel16Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
|
||||
static __forceinline void writePixel16Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
{
|
||||
((u16*)pmem)[getPixelAddress16Z(x, y, bp, bw)] = pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel16SZ(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) {
|
||||
static __forceinline void writePixel16SZ(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
{
|
||||
((u16*)pmem)[getPixelAddress16SZ(x, y, bp, bw)] = pixel;
|
||||
}
|
||||
|
||||
|
||||
///////////////
|
||||
|
||||
static __forceinline u32 readPixel32(const void* pmem, int x, int y, u32 bp, u32 bw) {
|
||||
static __forceinline u32 readPixel32(const void* pmem, int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
return ((const u32*)pmem)[getPixelAddress32(x, y, bp, bw)];
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel24(const void* pmem, int x, int y, u32 bp, u32 bw) {
|
||||
static __forceinline u32 readPixel24(const void* pmem, int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
return ((const u32*)pmem)[getPixelAddress32(x, y, bp, bw)] & 0xffffff;
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel16(const void* pmem, int x, int y, u32 bp, u32 bw) {
|
||||
static __forceinline u32 readPixel16(const void* pmem, int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
return ((const u16*)pmem)[getPixelAddress16(x, y, bp, bw)];
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel16S(const void* pmem, int x, int y, u32 bp, u32 bw) {
|
||||
static __forceinline u32 readPixel16S(const void* pmem, int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
return ((const u16*)pmem)[getPixelAddress16S(x, y, bp, bw)];
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel8(const void* pmem, int x, int y, u32 bp, u32 bw) {
|
||||
static __forceinline u32 readPixel8(const void* pmem, int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
return ((const u8*)pmem)[getPixelAddress8(x, y, bp, bw)];
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel8H(const void* pmem, int x, int y, u32 bp, u32 bw) {
|
||||
static __forceinline u32 readPixel8H(const void* pmem, int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
return ((const u8*)pmem)[4*getPixelAddress32(x, y, bp, bw) + 3];
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel4(const void* pmem, int x, int y, u32 bp, u32 bw) {
|
||||
static __forceinline u32 readPixel4(const void* pmem, int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 addr = getPixelAddress4(x, y, bp, bw);
|
||||
u8 pix = ((const u8*)pmem)[addr/2];
|
||||
if (addr & 0x1)
|
||||
|
@ -320,31 +341,37 @@ static __forceinline u32 readPixel4(const void* pmem, int x, int y, u32 bp, u32
|
|||
else return pix & 0xf;
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel4HL(const void* pmem, int x, int y, u32 bp, u32 bw) {
|
||||
static __forceinline u32 readPixel4HL(const void* pmem, int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
const u8 *p = (const u8*)pmem+4*getPixelAddress4HL(x, y, bp, bw)+3;
|
||||
return *p & 0x0f;
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel4HH(const void* pmem, int x, int y, u32 bp, u32 bw) {
|
||||
static __forceinline u32 readPixel4HH(const void* pmem, int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
const u8 *p = (const u8*)pmem+4*getPixelAddress4HH(x, y, bp, bw) + 3;
|
||||
return *p >> 4;
|
||||
}
|
||||
|
||||
///////////////
|
||||
|
||||
static __forceinline u32 readPixel32Z(const void* pmem, int x, int y, u32 bp, u32 bw) {
|
||||
static __forceinline u32 readPixel32Z(const void* pmem, int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
return ((const u32*)pmem)[getPixelAddress32Z(x, y, bp, bw)];
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel24Z(const void* pmem, int x, int y, u32 bp, u32 bw) {
|
||||
static __forceinline u32 readPixel24Z(const void* pmem, int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
return ((const u32*)pmem)[getPixelAddress32Z(x, y, bp, bw)] & 0xffffff;
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel16Z(const void* pmem, int x, int y, u32 bp, u32 bw) {
|
||||
static __forceinline u32 readPixel16Z(const void* pmem, int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
return ((const u16*)pmem)[getPixelAddress16Z(x, y, bp, bw)];
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel16SZ(const void* pmem, int x, int y, u32 bp, u32 bw) {
|
||||
static __forceinline u32 readPixel16SZ(const void* pmem, int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
return ((const u16*)pmem)[getPixelAddress16SZ(x, y, bp, bw)];
|
||||
}
|
||||
|
||||
|
@ -352,135 +379,154 @@ static __forceinline u32 readPixel16SZ(const void* pmem, int x, int y, u32 bp,
|
|||
// Functions that take 0 bps //
|
||||
///////////////////////////////
|
||||
|
||||
static __forceinline void writePixel32_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
|
||||
static __forceinline void writePixel32_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
((u32*)pmem)[getPixelAddress32_0(x, y, bw)] = pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel24_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
|
||||
static __forceinline void writePixel24_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
u8 *buf = (u8*)&((u32*)pmem)[getPixelAddress32_0(x, y, bw)];
|
||||
u8 *pix = (u8*)&pixel;
|
||||
#if defined(_MSC_VER) && defined(__x86_64__)
|
||||
memcpy(buf, pix, 3);
|
||||
#else
|
||||
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
|
||||
#endif
|
||||
}
|
||||
|
||||
static __forceinline void writePixel16_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
|
||||
static __forceinline void writePixel16_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
((u16*)pmem)[getPixelAddress16_0(x, y, bw)] = pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel16S_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
|
||||
static __forceinline void writePixel16S_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
((u16*)pmem)[getPixelAddress16S_0(x, y, bw)] = pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel8_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
|
||||
static __forceinline void writePixel8_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
((u8*)pmem)[getPixelAddress8_0(x, y, bw)] = pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel8H_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
|
||||
static __forceinline void writePixel8H_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
((u8*)pmem)[4*getPixelAddress32_0(x, y, bw)+3] = pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel4_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
|
||||
static __forceinline void writePixel4_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
u32 addr = getPixelAddress4_0(x, y, bw);
|
||||
u8 pix = ((u8*)pmem)[addr/2];
|
||||
if (addr & 0x1) ((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4);
|
||||
else ((u8*)pmem)[addr/2] = (pix & 0xf0) | (pixel);
|
||||
}
|
||||
|
||||
static __forceinline void writePixel4HL_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
|
||||
static __forceinline void writePixel4HL_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
u8 *p = (u8*)pmem + 4*getPixelAddress4HL_0(x, y, bw)+3;
|
||||
*p = (*p & 0xf0) | pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel4HH_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
|
||||
static __forceinline void writePixel4HH_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
u8 *p = (u8*)pmem + 4*getPixelAddress4HH_0(x, y, bw)+3;
|
||||
*p = (*p & 0x0f) | (pixel<<4);
|
||||
}
|
||||
|
||||
static __forceinline void writePixel32Z_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
|
||||
static __forceinline void writePixel32Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
((u32*)pmem)[getPixelAddress32Z_0(x, y, bw)] = pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel24Z_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
|
||||
static __forceinline void writePixel24Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
u8 *buf = (u8*)pmem + 4*getPixelAddress32Z_0(x, y, bw);
|
||||
u8 *pix = (u8*)&pixel;
|
||||
#if defined(_MSC_VER) && defined(__x86_64__)
|
||||
memcpy(buf, pix, 3);
|
||||
#else
|
||||
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
|
||||
#endif
|
||||
}
|
||||
|
||||
static __forceinline void writePixel16Z_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
|
||||
static __forceinline void writePixel16Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
((u16*)pmem)[getPixelAddress16Z_0(x, y, bw)] = pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel16SZ_0(void* pmem, int x, int y, u32 pixel, u32 bw) {
|
||||
static __forceinline void writePixel16SZ_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
((u16*)pmem)[getPixelAddress16SZ_0(x, y, bw)] = pixel;
|
||||
}
|
||||
|
||||
|
||||
///////////////
|
||||
|
||||
static __forceinline u32 readPixel32_0(const void* pmem, int x, int y, u32 bw) {
|
||||
static __forceinline u32 readPixel32_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
return ((const u32*)pmem)[getPixelAddress32_0(x, y, bw)];
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel24_0(const void* pmem, int x, int y, u32 bw) {
|
||||
static __forceinline u32 readPixel24_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
return ((const u32*)pmem)[getPixelAddress32_0(x, y, bw)] & 0xffffff;
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel16_0(const void* pmem, int x, int y, u32 bw) {
|
||||
static __forceinline u32 readPixel16_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
return ((const u16*)pmem)[getPixelAddress16_0(x, y, bw)];
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel16S_0(const void* pmem, int x, int y, u32 bw) {
|
||||
static __forceinline u32 readPixel16S_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
return ((const u16*)pmem)[getPixelAddress16S_0(x, y, bw)];
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel8_0(const void* pmem, int x, int y, u32 bw) {
|
||||
static __forceinline u32 readPixel8_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
return ((const u8*)pmem)[getPixelAddress8_0(x, y, bw)];
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel8H_0(const void* pmem, int x, int y, u32 bw) {
|
||||
static __forceinline u32 readPixel8H_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
return ((const u8*)pmem)[4*getPixelAddress32_0(x, y, bw) + 3];
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel4_0(const void* pmem, int x, int y, u32 bw) {
|
||||
static __forceinline u32 readPixel4_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
u32 addr = getPixelAddress4_0(x, y, bw);
|
||||
u8 pix = ((const u8*)pmem)[addr/2];
|
||||
if (addr & 0x1)
|
||||
return pix >> 4;
|
||||
else return pix & 0xf;
|
||||
else
|
||||
return pix & 0xf;
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel4HL_0(const void* pmem, int x, int y, u32 bw) {
|
||||
static __forceinline u32 readPixel4HL_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
const u8 *p = (const u8*)pmem+4*getPixelAddress4HL_0(x, y, bw)+3;
|
||||
return *p & 0x0f;
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel4HH_0(const void* pmem, int x, int y, u32 bw) {
|
||||
static __forceinline u32 readPixel4HH_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
const u8 *p = (const u8*)pmem+4*getPixelAddress4HH_0(x, y, bw) + 3;
|
||||
return *p >> 4;
|
||||
}
|
||||
|
||||
///////////////
|
||||
|
||||
static __forceinline u32 readPixel32Z_0(const void* pmem, int x, int y, u32 bw) {
|
||||
static __forceinline u32 readPixel32Z_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
return ((const u32*)pmem)[getPixelAddress32Z_0(x, y, bw)];
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel24Z_0(const void* pmem, int x, int y, u32 bw) {
|
||||
static __forceinline u32 readPixel24Z_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
return ((const u32*)pmem)[getPixelAddress32Z_0(x, y, bw)] & 0xffffff;
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel16Z_0(const void* pmem, int x, int y, u32 bw) {
|
||||
static __forceinline u32 readPixel16Z_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
return ((const u16*)pmem)[getPixelAddress16Z_0(x, y, bw)];
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel16SZ_0(const void* pmem, int x, int y, u32 bw) {
|
||||
static __forceinline u32 readPixel16SZ_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
return ((const u16*)pmem)[getPixelAddress16SZ_0(x, y, bw)];
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,123 @@
|
|||
/* ZeroGS KOSMOS
|
||||
* Copyright (C) 2005-2006 zerofrog@gmail.com
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#ifndef MEM_SWIZZLE_H_INCLUDED
|
||||
#define MEM_SWIZZLE_H_INCLUDED
|
||||
|
||||
#include "GS.h"
|
||||
#include "Mem.h"
|
||||
|
||||
// special swizzle macros - which I converted to functions.
|
||||
|
||||
static __forceinline void SwizzleBlock24(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff)
|
||||
{
|
||||
u8* pnewsrc = src;
|
||||
u32* pblock = tempblock;
|
||||
|
||||
for(int by = 0; by < 7; ++by, pblock += 8, pnewsrc += pitch-24)
|
||||
{
|
||||
for(int bx = 0; bx < 8; ++bx, pnewsrc += 3)
|
||||
{
|
||||
pblock[bx] = *(u32*)pnewsrc;
|
||||
}
|
||||
}
|
||||
|
||||
for(int bx = 0; bx < 7; ++bx, pnewsrc += 3)
|
||||
{
|
||||
/* might be 1 byte out of bounds of GS memory */
|
||||
pblock[bx] = *(u32*)pnewsrc;
|
||||
}
|
||||
|
||||
/* do 3 bytes for the last copy */
|
||||
*((u8*)pblock+28) = pnewsrc[0];
|
||||
*((u8*)pblock+29) = pnewsrc[1];
|
||||
*((u8*)pblock+30) = pnewsrc[2];
|
||||
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0x00ffffff);
|
||||
}
|
||||
|
||||
#define SwizzleBlock24u SwizzleBlock24
|
||||
|
||||
static __forceinline void SwizzleBlock8H(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff)
|
||||
{
|
||||
u8* pnewsrc = src;
|
||||
u32* pblock = tempblock;
|
||||
|
||||
for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch)
|
||||
{
|
||||
u32 u = *(u32*)pnewsrc;
|
||||
pblock[0] = u<<24;
|
||||
pblock[1] = u<<16;
|
||||
pblock[2] = u<<8;
|
||||
pblock[3] = u;
|
||||
u = *(u32*)(pnewsrc+4);
|
||||
pblock[4] = u<<24;
|
||||
pblock[5] = u<<16;
|
||||
pblock[6] = u<<8;
|
||||
pblock[7] = u;
|
||||
}
|
||||
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0xff000000);
|
||||
}
|
||||
|
||||
#define SwizzleBlock8Hu SwizzleBlock8H
|
||||
|
||||
static __forceinline void SwizzleBlock4HH(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff)
|
||||
{
|
||||
u8* pnewsrc = src;
|
||||
u32* pblock = tempblock;
|
||||
|
||||
for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch)
|
||||
{
|
||||
u32 u = *(u32*)pnewsrc;
|
||||
pblock[0] = u<<28;
|
||||
pblock[1] = u<<24;
|
||||
pblock[2] = u<<20;
|
||||
pblock[3] = u<<16;
|
||||
pblock[4] = u<<12;
|
||||
pblock[5] = u<<8;
|
||||
pblock[6] = u<<4;
|
||||
pblock[7] = u;
|
||||
}
|
||||
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0xf0000000);
|
||||
}
|
||||
|
||||
#define SwizzleBlock4HHu SwizzleBlock4HH
|
||||
|
||||
static __forceinline void SwizzleBlock4HL(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff)
|
||||
{
|
||||
u8* pnewsrc = src;
|
||||
u32* pblock = tempblock;
|
||||
|
||||
for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch)
|
||||
{
|
||||
u32 u = *(u32*)pnewsrc;
|
||||
pblock[0] = u<<24;
|
||||
pblock[1] = u<<20;
|
||||
pblock[2] = u<<16;
|
||||
pblock[3] = u<<12;
|
||||
pblock[4] = u<<8;
|
||||
pblock[5] = u<<4;
|
||||
pblock[6] = u;
|
||||
pblock[7] = u>>4;
|
||||
}
|
||||
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0x0f000000);
|
||||
}
|
||||
|
||||
#define SwizzleBlock4HLu SwizzleBlock4HL
|
||||
|
||||
|
||||
#endif // MEM_SWIZZLE_H_INCLUDED
|
|
@ -0,0 +1,236 @@
|
|||
/* ZeroGS KOSMOS
|
||||
* Copyright (C) 2005-2006 zerofrog@gmail.com
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#include "GS.h"
|
||||
|
||||
u32 g_blockTable32[4][8] = {
|
||||
{ 0, 1, 4, 5, 16, 17, 20, 21},
|
||||
{ 2, 3, 6, 7, 18, 19, 22, 23},
|
||||
{ 8, 9, 12, 13, 24, 25, 28, 29},
|
||||
{ 10, 11, 14, 15, 26, 27, 30, 31}
|
||||
};
|
||||
|
||||
u32 g_blockTable32Z[4][8] = {
|
||||
{ 24, 25, 28, 29, 8, 9, 12, 13},
|
||||
{ 26, 27, 30, 31, 10, 11, 14, 15},
|
||||
{ 16, 17, 20, 21, 0, 1, 4, 5},
|
||||
{ 18, 19, 22, 23, 2, 3, 6, 7}
|
||||
};
|
||||
|
||||
u32 g_blockTable16[8][4] = {
|
||||
{ 0, 2, 8, 10 },
|
||||
{ 1, 3, 9, 11 },
|
||||
{ 4, 6, 12, 14 },
|
||||
{ 5, 7, 13, 15 },
|
||||
{ 16, 18, 24, 26 },
|
||||
{ 17, 19, 25, 27 },
|
||||
{ 20, 22, 28, 30 },
|
||||
{ 21, 23, 29, 31 }
|
||||
};
|
||||
|
||||
u32 g_blockTable16S[8][4] = {
|
||||
{ 0, 2, 16, 18 },
|
||||
{ 1, 3, 17, 19 },
|
||||
{ 8, 10, 24, 26 },
|
||||
{ 9, 11, 25, 27 },
|
||||
{ 4, 6, 20, 22 },
|
||||
{ 5, 7, 21, 23 },
|
||||
{ 12, 14, 28, 30 },
|
||||
{ 13, 15, 29, 31 }
|
||||
};
|
||||
|
||||
u32 g_blockTable16Z[8][4] = {
|
||||
{ 24, 26, 16, 18 },
|
||||
{ 25, 27, 17, 19 },
|
||||
{ 28, 30, 20, 22 },
|
||||
{ 29, 31, 21, 23 },
|
||||
{ 8, 10, 0, 2 },
|
||||
{ 9, 11, 1, 3 },
|
||||
{ 12, 14, 4, 6 },
|
||||
{ 13, 15, 5, 7 }
|
||||
};
|
||||
|
||||
u32 g_blockTable16SZ[8][4] = {
|
||||
{ 24, 26, 8, 10 },
|
||||
{ 25, 27, 9, 11 },
|
||||
{ 16, 18, 0, 2 },
|
||||
{ 17, 19, 1, 3 },
|
||||
{ 28, 30, 12, 14 },
|
||||
{ 29, 31, 13, 15 },
|
||||
{ 20, 22, 4, 6 },
|
||||
{ 21, 23, 5, 7 }
|
||||
};
|
||||
|
||||
u32 g_blockTable8[4][8] = {
|
||||
{ 0, 1, 4, 5, 16, 17, 20, 21},
|
||||
{ 2, 3, 6, 7, 18, 19, 22, 23},
|
||||
{ 8, 9, 12, 13, 24, 25, 28, 29},
|
||||
{ 10, 11, 14, 15, 26, 27, 30, 31}
|
||||
};
|
||||
|
||||
u32 g_blockTable4[8][4] = {
|
||||
{ 0, 2, 8, 10 },
|
||||
{ 1, 3, 9, 11 },
|
||||
{ 4, 6, 12, 14 },
|
||||
{ 5, 7, 13, 15 },
|
||||
{ 16, 18, 24, 26 },
|
||||
{ 17, 19, 25, 27 },
|
||||
{ 20, 22, 28, 30 },
|
||||
{ 21, 23, 29, 31 }
|
||||
};
|
||||
|
||||
u32 g_columnTable32[8][8] = {
|
||||
{ 0, 1, 4, 5, 8, 9, 12, 13 },
|
||||
{ 2, 3, 6, 7, 10, 11, 14, 15 },
|
||||
{ 16, 17, 20, 21, 24, 25, 28, 29 },
|
||||
{ 18, 19, 22, 23, 26, 27, 30, 31 },
|
||||
{ 32, 33, 36, 37, 40, 41, 44, 45 },
|
||||
{ 34, 35, 38, 39, 42, 43, 46, 47 },
|
||||
{ 48, 49, 52, 53, 56, 57, 60, 61 },
|
||||
{ 50, 51, 54, 55, 58, 59, 62, 63 },
|
||||
};
|
||||
|
||||
u32 g_columnTable16[8][16] = {
|
||||
{ 0, 2, 8, 10, 16, 18, 24, 26,
|
||||
1, 3, 9, 11, 17, 19, 25, 27 },
|
||||
{ 4, 6, 12, 14, 20, 22, 28, 30,
|
||||
5, 7, 13, 15, 21, 23, 29, 31 },
|
||||
{ 32, 34, 40, 42, 48, 50, 56, 58,
|
||||
33, 35, 41, 43, 49, 51, 57, 59 },
|
||||
{ 36, 38, 44, 46, 52, 54, 60, 62,
|
||||
37, 39, 45, 47, 53, 55, 61, 63 },
|
||||
{ 64, 66, 72, 74, 80, 82, 88, 90,
|
||||
65, 67, 73, 75, 81, 83, 89, 91 },
|
||||
{ 68, 70, 76, 78, 84, 86, 92, 94,
|
||||
69, 71, 77, 79, 85, 87, 93, 95 },
|
||||
{ 96, 98, 104, 106, 112, 114, 120, 122,
|
||||
97, 99, 105, 107, 113, 115, 121, 123 },
|
||||
{ 100, 102, 108, 110, 116, 118, 124, 126,
|
||||
101, 103, 109, 111, 117, 119, 125, 127 },
|
||||
};
|
||||
|
||||
u32 g_columnTable8[16][16] = {
|
||||
{ 0, 4, 16, 20, 32, 36, 48, 52, // column 0
|
||||
2, 6, 18, 22, 34, 38, 50, 54 },
|
||||
{ 8, 12, 24, 28, 40, 44, 56, 60,
|
||||
10, 14, 26, 30, 42, 46, 58, 62 },
|
||||
{ 33, 37, 49, 53, 1, 5, 17, 21,
|
||||
35, 39, 51, 55, 3, 7, 19, 23 },
|
||||
{ 41, 45, 57, 61, 9, 13, 25, 29,
|
||||
43, 47, 59, 63, 11, 15, 27, 31 },
|
||||
{ 96, 100, 112, 116, 64, 68, 80, 84, // column 1
|
||||
98, 102, 114, 118, 66, 70, 82, 86 },
|
||||
{ 104, 108, 120, 124, 72, 76, 88, 92,
|
||||
106, 110, 122, 126, 74, 78, 90, 94 },
|
||||
{ 65, 69, 81, 85, 97, 101, 113, 117,
|
||||
67, 71, 83, 87, 99, 103, 115, 119 },
|
||||
{ 73, 77, 89, 93, 105, 109, 121, 125,
|
||||
75, 79, 91, 95, 107, 111, 123, 127 },
|
||||
{ 128, 132, 144, 148, 160, 164, 176, 180, // column 2
|
||||
130, 134, 146, 150, 162, 166, 178, 182 },
|
||||
{ 136, 140, 152, 156, 168, 172, 184, 188,
|
||||
138, 142, 154, 158, 170, 174, 186, 190 },
|
||||
{ 161, 165, 177, 181, 129, 133, 145, 149,
|
||||
163, 167, 179, 183, 131, 135, 147, 151 },
|
||||
{ 169, 173, 185, 189, 137, 141, 153, 157,
|
||||
171, 175, 187, 191, 139, 143, 155, 159 },
|
||||
{ 224, 228, 240, 244, 192, 196, 208, 212, // column 3
|
||||
226, 230, 242, 246, 194, 198, 210, 214 },
|
||||
{ 232, 236, 248, 252, 200, 204, 216, 220,
|
||||
234, 238, 250, 254, 202, 206, 218, 222 },
|
||||
{ 193, 197, 209, 213, 225, 229, 241, 245,
|
||||
195, 199, 211, 215, 227, 231, 243, 247 },
|
||||
{ 201, 205, 217, 221, 233, 237, 249, 253,
|
||||
203, 207, 219, 223, 235, 239, 251, 255 },
|
||||
};
|
||||
|
||||
u32 g_columnTable4[16][32] = {
|
||||
{ 0, 8, 32, 40, 64, 72, 96, 104, // column 0
|
||||
2, 10, 34, 42, 66, 74, 98, 106,
|
||||
4, 12, 36, 44, 68, 76, 100, 108,
|
||||
6, 14, 38, 46, 70, 78, 102, 110 },
|
||||
{ 16, 24, 48, 56, 80, 88, 112, 120,
|
||||
18, 26, 50, 58, 82, 90, 114, 122,
|
||||
20, 28, 52, 60, 84, 92, 116, 124,
|
||||
22, 30, 54, 62, 86, 94, 118, 126 },
|
||||
{ 65, 73, 97, 105, 1, 9, 33, 41,
|
||||
67, 75, 99, 107, 3, 11, 35, 43,
|
||||
69, 77, 101, 109, 5, 13, 37, 45,
|
||||
71, 79, 103, 111, 7, 15, 39, 47 },
|
||||
{ 81, 89, 113, 121, 17, 25, 49, 57,
|
||||
83, 91, 115, 123, 19, 27, 51, 59,
|
||||
85, 93, 117, 125, 21, 29, 53, 61,
|
||||
87, 95, 119, 127, 23, 31, 55, 63 },
|
||||
{ 192, 200, 224, 232, 128, 136, 160, 168, // column 1
|
||||
194, 202, 226, 234, 130, 138, 162, 170,
|
||||
196, 204, 228, 236, 132, 140, 164, 172,
|
||||
198, 206, 230, 238, 134, 142, 166, 174 },
|
||||
{ 208, 216, 240, 248, 144, 152, 176, 184,
|
||||
210, 218, 242, 250, 146, 154, 178, 186,
|
||||
212, 220, 244, 252, 148, 156, 180, 188,
|
||||
214, 222, 246, 254, 150, 158, 182, 190 },
|
||||
{ 129, 137, 161, 169, 193, 201, 225, 233,
|
||||
131, 139, 163, 171, 195, 203, 227, 235,
|
||||
133, 141, 165, 173, 197, 205, 229, 237,
|
||||
135, 143, 167, 175, 199, 207, 231, 239 },
|
||||
{ 145, 153, 177, 185, 209, 217, 241, 249,
|
||||
147, 155, 179, 187, 211, 219, 243, 251,
|
||||
149, 157, 181, 189, 213, 221, 245, 253,
|
||||
151, 159, 183, 191, 215, 223, 247, 255 },
|
||||
{ 256, 264, 288, 296, 320, 328, 352, 360, // column 2
|
||||
258, 266, 290, 298, 322, 330, 354, 362,
|
||||
260, 268, 292, 300, 324, 332, 356, 364,
|
||||
262, 270, 294, 302, 326, 334, 358, 366 },
|
||||
{ 272, 280, 304, 312, 336, 344, 368, 376,
|
||||
274, 282, 306, 314, 338, 346, 370, 378,
|
||||
276, 284, 308, 316, 340, 348, 372, 380,
|
||||
278, 286, 310, 318, 342, 350, 374, 382 },
|
||||
{ 321, 329, 353, 361, 257, 265, 289, 297,
|
||||
323, 331, 355, 363, 259, 267, 291, 299,
|
||||
325, 333, 357, 365, 261, 269, 293, 301,
|
||||
327, 335, 359, 367, 263, 271, 295, 303 },
|
||||
{ 337, 345, 369, 377, 273, 281, 305, 313,
|
||||
339, 347, 371, 379, 275, 283, 307, 315,
|
||||
341, 349, 373, 381, 277, 285, 309, 317,
|
||||
343, 351, 375, 383, 279, 287, 311, 319 },
|
||||
{ 448, 456, 480, 488, 384, 392, 416, 424, // column 3
|
||||
450, 458, 482, 490, 386, 394, 418, 426,
|
||||
452, 460, 484, 492, 388, 396, 420, 428,
|
||||
454, 462, 486, 494, 390, 398, 422, 430 },
|
||||
{ 464, 472, 496, 504, 400, 408, 432, 440,
|
||||
466, 474, 498, 506, 402, 410, 434, 442,
|
||||
468, 476, 500, 508, 404, 412, 436, 444,
|
||||
470, 478, 502, 510, 406, 414, 438, 446 },
|
||||
{ 385, 393, 417, 425, 449, 457, 481, 489,
|
||||
387, 395, 419, 427, 451, 459, 483, 491,
|
||||
389, 397, 421, 429, 453, 461, 485, 493,
|
||||
391, 399, 423, 431, 455, 463, 487, 495 },
|
||||
{ 401, 409, 433, 441, 465, 473, 497, 505,
|
||||
403, 411, 435, 443, 467, 475, 499, 507,
|
||||
405, 413, 437, 445, 469, 477, 501, 509,
|
||||
407, 415, 439, 447, 471, 479, 503, 511 },
|
||||
};
|
||||
|
||||
u32 g_pageTable32[32][64];
|
||||
u32 g_pageTable32Z[32][64];
|
||||
u32 g_pageTable16[64][64];
|
||||
u32 g_pageTable16S[64][64];
|
||||
u32 g_pageTable16Z[64][64];
|
||||
u32 g_pageTable16SZ[64][64];
|
||||
u32 g_pageTable8[64][128];
|
||||
u32 g_pageTable4[128][128];
|
|
@ -0,0 +1,184 @@
|
|||
#ifndef MEM_TRANSMIT_H_INCLUDED
|
||||
#define MEM_TRANSMIT_H_INCLUDED
|
||||
|
||||
#include "GS.h"
|
||||
#include "Mem.h"
|
||||
|
||||
#define DSTPSM gs.dstbuf.psm
|
||||
|
||||
// transfers whole rows
|
||||
#define TRANSMIT_HOSTLOCAL_Y_(psm, T, widthlimit, endY) { \
|
||||
assert( (nSize%widthlimit) == 0 && widthlimit <= 4 ); \
|
||||
if( (gs.imageEndX-gs.trxpos.dx)%widthlimit ) { \
|
||||
/*GS_LOG("Bad Transmission! %d %d, psm: %d\n", gs.trxpos.dx, gs.imageEndX, DSTPSM);*/ \
|
||||
for(; i < endY; ++i) { \
|
||||
for(; j < gs.imageEndX && nSize > 0; j += 1, nSize -= 1, pbuf += 1) { \
|
||||
/* write as many pixel at one time as possible */ \
|
||||
writePixel##psm##_0(pstart, j%2048, i%2048, pbuf[0], gs.dstbuf.bw); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
for(; i < endY; ++i) { \
|
||||
for(; j < gs.imageEndX && nSize > 0; j += widthlimit, nSize -= widthlimit, pbuf += widthlimit) { \
|
||||
/* write as many pixel at one time as possible */ \
|
||||
if( nSize < widthlimit ) goto End; \
|
||||
writePixel##psm##_0(pstart, j%2048, i%2048, pbuf[0], gs.dstbuf.bw); \
|
||||
\
|
||||
if( widthlimit > 1 ) { \
|
||||
writePixel##psm##_0(pstart, (j+1)%2048, i%2048, pbuf[1], gs.dstbuf.bw); \
|
||||
\
|
||||
if( widthlimit > 2 ) { \
|
||||
writePixel##psm##_0(pstart, (j+2)%2048, i%2048, pbuf[2], gs.dstbuf.bw); \
|
||||
\
|
||||
if( widthlimit > 3 ) { \
|
||||
writePixel##psm##_0(pstart, (j+3)%2048, i%2048, pbuf[3], gs.dstbuf.bw); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
if( j >= gs.imageEndX ) { assert(j == gs.imageEndX); j = gs.trxpos.dx; } \
|
||||
else { assert( gs.imageTransfer == -1 || nSize*sizeof(T)/4 == 0 ); goto End; } \
|
||||
} \
|
||||
} \
|
||||
|
||||
// transmit until endX, don't check size since it has already been prevalidated
|
||||
#define TRANSMIT_HOSTLOCAL_X_(psm, T, widthlimit, blockheight, startX) { \
|
||||
for(int tempi = 0; tempi < blockheight; ++tempi) { \
|
||||
for(j = startX; j < gs.imageEndX; j++, pbuf++) { \
|
||||
writePixel##psm##_0(pstart, j%2048, (i+tempi)%2048, pbuf[0], gs.dstbuf.bw); \
|
||||
} \
|
||||
pbuf += pitch-fracX; \
|
||||
} \
|
||||
} \
|
||||
|
||||
//template <class T>
|
||||
//static __forceinline void TransmitHostLocalX_(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX)
|
||||
//{
|
||||
// for(int tempi = 0; tempi < blockheight; ++tempi)
|
||||
// {
|
||||
// for(j = startX; j < gs.imageEndX; j++, pbuf++)
|
||||
// {
|
||||
// wp(pstart, j%2048, (i+tempi)%2048, pbuf[0], gs.dstbuf.bw);
|
||||
// }
|
||||
// pbuf += pitch - fracX;
|
||||
// }
|
||||
//}
|
||||
|
||||
// transfers whole rows
|
||||
#define TRANSMIT_HOSTLOCAL_Y_24(psm, T, widthlimit, endY) { \
|
||||
if( widthlimit != 8 || ((gs.imageEndX-gs.trxpos.dx)%widthlimit) ) { \
|
||||
/*GS_LOG("Bad Transmission! %d %d, psm: %d\n", gs.trxpos.dx, gs.imageEndX, DSTPSM);*/ \
|
||||
for(; i < endY; ++i) { \
|
||||
for(; j < gs.imageEndX && nSize > 0; j += 1, nSize -= 1, pbuf += 3) { \
|
||||
writePixel##psm##_0(pstart, j%2048, i%2048, *(u32*)(pbuf), gs.dstbuf.bw); \
|
||||
} \
|
||||
\
|
||||
if( j >= gs.imageEndX ) { assert(gs.imageTransfer == -1 || j == gs.imageEndX); j = gs.trxpos.dx; } \
|
||||
else { assert( gs.imageTransfer == -1 || nSize == 0 ); goto End; } \
|
||||
} \
|
||||
} \
|
||||
else { \
|
||||
assert( /*(nSize%widthlimit) == 0 &&*/ widthlimit == 8 ); \
|
||||
for(; i < endY; ++i) { \
|
||||
for(; j < gs.imageEndX && nSize > 0; j += widthlimit, nSize -= widthlimit, pbuf += 3*widthlimit) { \
|
||||
if( nSize < widthlimit ) goto End; \
|
||||
/* write as many pixel at one time as possible */ \
|
||||
writePixel##psm##_0(pstart, j%2048, i%2048, *(u32*)(pbuf+0), gs.dstbuf.bw); \
|
||||
writePixel##psm##_0(pstart, (j+1)%2048, i%2048, *(u32*)(pbuf+3), gs.dstbuf.bw); \
|
||||
writePixel##psm##_0(pstart, (j+2)%2048, i%2048, *(u32*)(pbuf+6), gs.dstbuf.bw); \
|
||||
writePixel##psm##_0(pstart, (j+3)%2048, i%2048, *(u32*)(pbuf+9), gs.dstbuf.bw); \
|
||||
writePixel##psm##_0(pstart, (j+4)%2048, i%2048, *(u32*)(pbuf+12), gs.dstbuf.bw); \
|
||||
writePixel##psm##_0(pstart, (j+5)%2048, i%2048, *(u32*)(pbuf+15), gs.dstbuf.bw); \
|
||||
writePixel##psm##_0(pstart, (j+6)%2048, i%2048, *(u32*)(pbuf+18), gs.dstbuf.bw); \
|
||||
writePixel##psm##_0(pstart, (j+7)%2048, i%2048, *(u32*)(pbuf+21), gs.dstbuf.bw); \
|
||||
} \
|
||||
\
|
||||
if( j >= gs.imageEndX ) { assert(gs.imageTransfer == -1 || j == gs.imageEndX); j = gs.trxpos.dx; } \
|
||||
else { \
|
||||
if( nSize < 0 ) { \
|
||||
/* extracted too much */ \
|
||||
assert( (nSize%3)==0 && nSize > -24 ); \
|
||||
j += nSize/3; \
|
||||
nSize = 0; \
|
||||
} \
|
||||
assert( gs.imageTransfer == -1 || nSize == 0 ); \
|
||||
goto End; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
|
||||
// transmit until endX, don't check size since it has already been prevalidated
|
||||
#define TRANSMIT_HOSTLOCAL_X_24(psm, T, widthlimit, blockheight, startX) { \
|
||||
for(int tempi = 0; tempi < blockheight; ++tempi) { \
|
||||
for(j = startX; j < gs.imageEndX; j++, pbuf += 3) { \
|
||||
writePixel##psm##_0(pstart, j%2048, (i+tempi)%2048, *(u32*)pbuf, gs.dstbuf.bw); \
|
||||
} \
|
||||
pbuf += 3*(pitch-fracX); \
|
||||
} \
|
||||
} \
|
||||
|
||||
|
||||
// meant for 4bit transfers
|
||||
#define TRANSMIT_HOSTLOCAL_Y_4(psm, T, widthlimit, endY) { \
|
||||
for(; i < endY; ++i) { \
|
||||
for(; j < gs.imageEndX && nSize > 0; j += widthlimit, nSize -= widthlimit) { \
|
||||
/* write as many pixel at one time as possible */ \
|
||||
writePixel##psm##_0(pstart, j%2048, i%2048, *pbuf&0x0f, gs.dstbuf.bw); \
|
||||
writePixel##psm##_0(pstart, (j+1)%2048, i%2048, *pbuf>>4, gs.dstbuf.bw); \
|
||||
pbuf++; \
|
||||
if( widthlimit > 2 ) { \
|
||||
writePixel##psm##_0(pstart, (j+2)%2048, i%2048, *pbuf&0x0f, gs.dstbuf.bw); \
|
||||
writePixel##psm##_0(pstart, (j+3)%2048, i%2048, *pbuf>>4, gs.dstbuf.bw); \
|
||||
pbuf++; \
|
||||
\
|
||||
if( widthlimit > 4 ) { \
|
||||
writePixel##psm##_0(pstart, (j+4)%2048, i%2048, *pbuf&0x0f, gs.dstbuf.bw); \
|
||||
writePixel##psm##_0(pstart, (j+5)%2048, i%2048, *pbuf>>4, gs.dstbuf.bw); \
|
||||
pbuf++; \
|
||||
\
|
||||
if( widthlimit > 6 ) { \
|
||||
writePixel##psm##_0(pstart, (j+6)%2048, i%2048, *pbuf&0x0f, gs.dstbuf.bw); \
|
||||
writePixel##psm##_0(pstart, (j+7)%2048, i%2048, *pbuf>>4, gs.dstbuf.bw); \
|
||||
pbuf++; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
if( j >= gs.imageEndX ) { j = gs.trxpos.dx; } \
|
||||
else { assert( gs.imageTransfer == -1 || (nSize/32) == 0 ); goto End; } \
|
||||
} \
|
||||
} \
|
||||
|
||||
// transmit until endX, don't check size since it has already been prevalidated
|
||||
#define TRANSMIT_HOSTLOCAL_X_4(psm, T, widthlimit, blockheight, startX) { \
|
||||
for(int tempi = 0; tempi < blockheight; ++tempi) { \
|
||||
for(j = startX; j < gs.imageEndX; j+=2, pbuf++) { \
|
||||
writePixel##psm##_0(pstart, j%2048, (i+tempi)%2048, pbuf[0]&0x0f, gs.dstbuf.bw); \
|
||||
writePixel##psm##_0(pstart, (j+1)%2048, (i+tempi)%2048, pbuf[0]>>4, gs.dstbuf.bw); \
|
||||
} \
|
||||
pbuf += (pitch-fracX)/2; \
|
||||
} \
|
||||
} \
|
||||
|
||||
#define TRANSMIT_HOSTLOCAL_X(th, psm, T, widthlimit, blockheight, startX) \
|
||||
TRANSMIT_HOSTLOCAL_X##th(psm, T, widthlimit, blockheight, startX)
|
||||
#define TRANSMIT_HOSTLOCAL_Y(th, psm, T, widthlimit, endY) \
|
||||
TRANSMIT_HOSTLOCAL_Y##th(psm,T,widthlimit,endY)
|
||||
// calculate pitch in source buffer
|
||||
|
||||
|
||||
template <class T>
|
||||
static __forceinline int TransmitPitch_(int pitch) { return (pitch * sizeof(T)); }
|
||||
template <class T>
|
||||
static __forceinline int TransmitPitch_24(int pitch) { return (pitch * 3); }
|
||||
template <class T>
|
||||
static __forceinline int TransmitPitch_4(int pitch) { return (pitch/2); }
|
||||
|
||||
#define TRANSMIT_PITCH_(pitch, T) TransmitPitch_<T>(pitch)
|
||||
#define TRANSMIT_PITCH_24(pitch, T) TransmitPitch_24<T>(pitch)
|
||||
#define TRANSMIT_PITCH_4(pitch, T) TransmitPitch_4<T>(pitch)
|
||||
|
||||
#endif // MEM_TRANSMIT_H_INCLUDED
|
|
@ -323,6 +323,10 @@
|
|||
RelativePath="..\Mem.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\Mem_Tables.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\memcpy_amd.cpp"
|
||||
>
|
||||
|
@ -439,6 +443,15 @@
|
|||
RelativePath="..\Mem.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\Mem_Swizzle.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\Mem_Transmit.h"
|
||||
>
|
||||
</File>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\PS2Edefs.h"
|
||||
>
|
||||
|
|
Loading…
Reference in New Issue