mirror of https://github.com/PCSX2/pcsx2.git
zzogl-pg: Turn BUILD_CLUT into a function.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2833 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
1361a872fd
commit
4a32e9b445
|
@ -297,13 +297,17 @@ enum PSM_value{
|
||||||
PSMT16SZ = 58, // 111010
|
PSMT16SZ = 58, // 111010
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Check target bit mode. PSMCT32 and 32Z return 0, 24 and 24Z - 1
|
||||||
|
// 16, 16S, 16Z, 16SZ -- 2, PSMT8 and 8H - 3, PSMT4, 4HL, 4HH -- 4.
|
||||||
|
inline int PSMT_BITMODE(int psm) {return (psm & 0x7);}
|
||||||
|
|
||||||
// CLUT = Color look up table. Set proper color to table according CLUT table.
|
// CLUT = Color look up table. Set proper color to table according CLUT table.
|
||||||
// Used for PSMT8, PSMT8H, PSMT4, PSMT4HH, PSMT4HL textures
|
// Used for PSMT8, PSMT8H, PSMT4, PSMT4HH, PSMT4HL textures
|
||||||
inline bool PSMT_ISCLUT(int psm) { return ((psm & 0x7) > 2);}
|
inline bool PSMT_ISCLUT(int psm) { return (PSMT_BITMODE(psm) > 2);}
|
||||||
|
|
||||||
// PSMCT16, PSMCT16S, PSMT16Z, PSMT16SZ is 16-bit targets and usually there is
|
// PSMCT16, PSMCT16S, PSMT16Z, PSMT16SZ is 16-bit targets and usually there is
|
||||||
// two of them in each 32-bit word.
|
// two of them in each 32-bit word.
|
||||||
inline bool PSMT_IS16BIT(int psm) { return ((psm & 0x7) == 2);}
|
inline bool PSMT_IS16BIT(int psm) { return (PSMT_BITMODE(psm) == 2);}
|
||||||
|
|
||||||
// PSMT32Z, PSMT24Z, PSMT16Z, PSMT16SZ is Z-buffer textures
|
// PSMT32Z, PSMT24Z, PSMT16Z, PSMT16SZ is Z-buffer textures
|
||||||
inline bool PSMT_ISZTEX(int psm) {return ((psm & 0x30) == 0x30);}
|
inline bool PSMT_ISZTEX(int psm) {return ((psm & 0x30) == 0x30);}
|
||||||
|
@ -318,9 +322,9 @@ inline bool PSMT_IS8CLUT(int psm) {return ((psm & 3) == 3);}
|
||||||
// PSM16Z and PSMT16SZ use -1 offset to z-buff. Need to check this thesis.
|
// PSM16Z and PSMT16SZ use -1 offset to z-buff. Need to check this thesis.
|
||||||
inline bool PSMT_IS16Z(int psm) {return ((psm & 0x32) == 0x32);}
|
inline bool PSMT_IS16Z(int psm) {return ((psm & 0x32) == 0x32);}
|
||||||
|
|
||||||
// Check target bit mode. PSMCT32 and 32Z return 0, 24 and 24Z - 1
|
// Check to see if it is 32 bits. According to code comments, anyways.
|
||||||
// 16, 16S, 16Z, 16SZ -- 2, PSMT8 and 8H - 3, PSMT4, 4HL, 4HH -- 4.
|
// I'll have to look closer at it, because it'd seem like it'd return true for 24 bits.
|
||||||
inline int PSMT_BITMODE(int psm) {return (psm & 0x7);}
|
inline bool PSMT_IS32BIT(int psm) {return !!(psm <= 1);}
|
||||||
|
|
||||||
//----------------------- Data from registers -----------------------
|
//----------------------- Data from registers -----------------------
|
||||||
|
|
||||||
|
|
|
@ -638,7 +638,7 @@ inline void FlushDecodeClut(VB& curvb, GLuint& ptexclut) {
|
||||||
if (curvb.tex0.csm && curvb.tex0.csa )
|
if (curvb.tex0.csm && curvb.tex0.csa )
|
||||||
printf ("ERROR, csm1\n");
|
printf ("ERROR, csm1\n");
|
||||||
|
|
||||||
if (curvb.tex0.cpsm <= 1) { // 32 bit
|
if (PSMT_IS32BIT(curvb.tex0.cpsm)) { // 32 bit
|
||||||
nClutOffset = 64 * curvb.tex0.csa;
|
nClutOffset = 64 * curvb.tex0.csa;
|
||||||
clutsize = min(entries, 256 - curvb.tex0.csa * 16) * 4;
|
clutsize = min(entries, 256 - curvb.tex0.csa * 16) * 4;
|
||||||
}
|
}
|
||||||
|
@ -647,7 +647,7 @@ inline void FlushDecodeClut(VB& curvb, GLuint& ptexclut) {
|
||||||
clutsize = min(entries, 512 - curvb.tex0.csa * 16) * 2;
|
clutsize = min(entries, 512 - curvb.tex0.csa * 16) * 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( curvb.tex0.cpsm <= 1 ) { // 32 bit
|
if (PSMT_IS32BIT(curvb.tex0.cpsm)) { // 32 bit
|
||||||
memcpy_amd(&data[0], g_pbyGSClut+nClutOffset, clutsize);
|
memcpy_amd(&data[0], g_pbyGSClut+nClutOffset, clutsize);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
|
@ -362,10 +362,11 @@ ZeroGS::SaveTex(tex0Info* ptex, int usevid)
|
||||||
glGetTexImage(GL_TEXTURE_RECTANGLE_NV, 0, GL_RGBA, pmemtarg->fmt, &srcdata[0]);
|
glGetTexImage(GL_TEXTURE_RECTANGLE_NV, 0, GL_RGBA, pmemtarg->fmt, &srcdata[0]);
|
||||||
|
|
||||||
u32 offset = pmemtarg->realy * 4 * GPU_TEXWIDTH;
|
u32 offset = pmemtarg->realy * 4 * GPU_TEXWIDTH;
|
||||||
|
|
||||||
if (ptex->psm == PSMT8)
|
if (ptex->psm == PSMT8)
|
||||||
offset *= ptex->cpsm <= 1 ? 4 : 2;
|
offset *= PSMT_IS32BIT(ptex->cpsm) ? 4 : 2;
|
||||||
else if (ptex->psm == PSMT4)
|
else if (ptex->psm == PSMT4)
|
||||||
offset *= ptex->cpsm <= 1 ? 8 : 4;
|
offset *= PSMT_IS32BIT(ptex->cpsm) ? 8 : 4;
|
||||||
|
|
||||||
psrc = &srcdata[0] - offset;
|
psrc = &srcdata[0] - offset;
|
||||||
}
|
}
|
||||||
|
@ -410,8 +411,8 @@ ZeroGS::SaveTex(tex0Info* ptex, int usevid)
|
||||||
case PSMT8:
|
case PSMT8:
|
||||||
addr = getPixelAddress8(j, i, ptex->tbp0, ptex->tbw);
|
addr = getPixelAddress8(j, i, ptex->tbp0, ptex->tbw);
|
||||||
if (addr < 0x00400000) {
|
if (addr < 0x00400000) {
|
||||||
if (usevid) {
|
if (usevid) {
|
||||||
if (ptex->cpsm <= 1)
|
if (PSMT_IS32BIT(ptex->cpsm))
|
||||||
u = *(u32*)(psrc+4*addr);
|
u = *(u32*)(psrc+4*addr);
|
||||||
else
|
else
|
||||||
u = RGBA16to32(*(u16*)(psrc+2*addr));
|
u = RGBA16to32(*(u16*)(psrc+2*addr));
|
||||||
|
@ -428,8 +429,10 @@ ZeroGS::SaveTex(tex0Info* ptex, int usevid)
|
||||||
|
|
||||||
if( addr < 2*0x00400000 ) {
|
if( addr < 2*0x00400000 ) {
|
||||||
if( usevid ) {
|
if( usevid ) {
|
||||||
if( ptex->cpsm <= 1 ) u = *(u32*)(psrc+4*addr);
|
if (PSMT_IS32BIT(ptex->cpsm))
|
||||||
else u = RGBA16to32(*(u16*)(psrc+2*addr));
|
u = *(u32*)(psrc+4*addr);
|
||||||
|
else
|
||||||
|
u = RGBA16to32(*(u16*)(psrc+2*addr));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
u = readPixel4(psrc, j, i, ptex->tbp0, ptex->tbw);
|
u = readPixel4(psrc, j, i, ptex->tbp0, ptex->tbw);
|
||||||
|
@ -442,8 +445,10 @@ ZeroGS::SaveTex(tex0Info* ptex, int usevid)
|
||||||
|
|
||||||
if( 4*addr < 0x00400000 ) {
|
if( 4*addr < 0x00400000 ) {
|
||||||
if( usevid ) {
|
if( usevid ) {
|
||||||
if( ptex->cpsm <= 1 ) u = *(u32*)(psrc+4*addr);
|
if (PSMT_IS32BIT(ptex->cpsm))
|
||||||
else u = RGBA16to32(*(u16*)(psrc+2*addr));
|
u = *(u32*)(psrc+4*addr);
|
||||||
|
else
|
||||||
|
u = RGBA16to32(*(u16*)(psrc+2*addr));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
u = readPixel8H(psrc, j, i, ptex->tbp0, ptex->tbw);
|
u = readPixel8H(psrc, j, i, ptex->tbp0, ptex->tbw);
|
||||||
|
@ -457,8 +462,10 @@ ZeroGS::SaveTex(tex0Info* ptex, int usevid)
|
||||||
|
|
||||||
if( 4*addr < 0x00400000 ) {
|
if( 4*addr < 0x00400000 ) {
|
||||||
if( usevid ) {
|
if( usevid ) {
|
||||||
if( ptex->cpsm <= 1 ) u = *(u32*)(psrc+4*addr);
|
if (PSMT_IS32BIT(ptex->cpsm))
|
||||||
else u = RGBA16to32(*(u16*)(psrc+2*addr));
|
u = *(u32*)(psrc+4*addr);
|
||||||
|
else
|
||||||
|
u = RGBA16to32(*(u16*)(psrc+2*addr));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
u = readPixel4HL(psrc, j, i, ptex->tbp0, ptex->tbw);
|
u = readPixel4HL(psrc, j, i, ptex->tbp0, ptex->tbw);
|
||||||
|
@ -471,8 +478,10 @@ ZeroGS::SaveTex(tex0Info* ptex, int usevid)
|
||||||
|
|
||||||
if( 4*addr < 0x00400000 ) {
|
if( 4*addr < 0x00400000 ) {
|
||||||
if( usevid ) {
|
if( usevid ) {
|
||||||
if( ptex->cpsm <= 1 ) u = *(u32*)(psrc+4*addr);
|
if (PSMT_IS32BIT(ptex->cpsm))
|
||||||
else u = RGBA16to32(*(u16*)(psrc+2*addr));
|
u = *(u32*)(psrc+4*addr);
|
||||||
|
else
|
||||||
|
u = RGBA16to32(*(u16*)(psrc+2*addr));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
u = readPixel4HH(psrc, j, i, ptex->tbp0, ptex->tbw);
|
u = readPixel4HH(psrc, j, i, ptex->tbp0, ptex->tbw);
|
||||||
|
|
|
@ -1492,7 +1492,7 @@ bool ZeroGS::CMemoryTarget::ValidateClut(const tex0Info& tex0)
|
||||||
int clutsize = 0;
|
int clutsize = 0;
|
||||||
|
|
||||||
int entries = PSMT_IS8CLUT(tex0.psm) ? 256 : 16;
|
int entries = PSMT_IS8CLUT(tex0.psm) ? 256 : 16;
|
||||||
if( tex0.cpsm <= 1 ) { // 32 bit
|
if (PSMT_IS32BIT(tex0.cpsm)) { // 32 bit
|
||||||
nClutOffset = 64 * tex0.csa;
|
nClutOffset = 64 * tex0.csa;
|
||||||
clutsize = min(entries, 256-tex0.csa*16)*4;
|
clutsize = min(entries, 256-tex0.csa*16)*4;
|
||||||
}
|
}
|
||||||
|
@ -1503,7 +1503,7 @@ bool ZeroGS::CMemoryTarget::ValidateClut(const tex0Info& tex0)
|
||||||
|
|
||||||
assert( clutsize == clut.size() );
|
assert( clutsize == clut.size() );
|
||||||
|
|
||||||
if( cpsm <= 1 ) {
|
if( PSMT_IS32BIT(cpsm)) {
|
||||||
if( memcmp_mmx(&clut[0], g_pbyGSClut+nClutOffset, clutsize) )
|
if( memcmp_mmx(&clut[0], g_pbyGSClut+nClutOffset, clutsize) )
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -1570,93 +1570,111 @@ bool ZeroGS::CMemoryTarget::ValidateTex(const tex0Info& tex0, int starttex, int
|
||||||
}
|
}
|
||||||
|
|
||||||
// used to build clut textures (note that this is for both 16 and 32 bit cluts)
|
// used to build clut textures (note that this is for both 16 and 32 bit cluts)
|
||||||
#define BUILDCLUT() { \
|
template <class T>
|
||||||
switch(tex0.psm) { \
|
static __forceinline void BuildClut(u32 psm, u32 height, T* pclut, u8* psrc, T* pdst)
|
||||||
case PSMT8: \
|
{
|
||||||
for(int i = 0; i < targ->height; ++i) { \
|
switch(psm)
|
||||||
for(int j = 0; j < GPU_TEXWIDTH/2; ++j) { \
|
{
|
||||||
pdst[0] = pclut[psrc[0]]; \
|
case PSMT8:
|
||||||
pdst[1] = pclut[psrc[1]]; \
|
for(int i = 0; i < height; ++i)
|
||||||
pdst[2] = pclut[psrc[2]]; \
|
{
|
||||||
pdst[3] = pclut[psrc[3]]; \
|
for(int j = 0; j < GPU_TEXWIDTH/2; ++j)
|
||||||
pdst[4] = pclut[psrc[4]]; \
|
{
|
||||||
pdst[5] = pclut[psrc[5]]; \
|
pdst[0] = pclut[psrc[0]];
|
||||||
pdst[6] = pclut[psrc[6]]; \
|
pdst[1] = pclut[psrc[1]];
|
||||||
pdst[7] = pclut[psrc[7]]; \
|
pdst[2] = pclut[psrc[2]];
|
||||||
pdst += 8; \
|
pdst[3] = pclut[psrc[3]];
|
||||||
psrc += 8; \
|
pdst[4] = pclut[psrc[4]];
|
||||||
} \
|
pdst[5] = pclut[psrc[5]];
|
||||||
} \
|
pdst[6] = pclut[psrc[6]];
|
||||||
break; \
|
pdst[7] = pclut[psrc[7]];
|
||||||
case PSMT4: \
|
pdst += 8;
|
||||||
for(int i = 0; i < targ->height; ++i) { \
|
psrc += 8;
|
||||||
for(int j = 0; j < GPU_TEXWIDTH; ++j) { \
|
}
|
||||||
pdst[0] = pclut[psrc[0]&15]; \
|
}
|
||||||
pdst[1] = pclut[psrc[0]>>4]; \
|
break;
|
||||||
pdst[2] = pclut[psrc[1]&15]; \
|
|
||||||
pdst[3] = pclut[psrc[1]>>4]; \
|
case PSMT4:
|
||||||
pdst[4] = pclut[psrc[2]&15]; \
|
for(int i = 0; i < height; ++i)
|
||||||
pdst[5] = pclut[psrc[2]>>4]; \
|
{
|
||||||
pdst[6] = pclut[psrc[3]&15]; \
|
for(int j = 0; j < GPU_TEXWIDTH; ++j)
|
||||||
pdst[7] = pclut[psrc[3]>>4]; \
|
{
|
||||||
\
|
pdst[0] = pclut[psrc[0]&15];
|
||||||
pdst += 8; \
|
pdst[1] = pclut[psrc[0]>>4];
|
||||||
psrc += 4; \
|
pdst[2] = pclut[psrc[1]&15];
|
||||||
} \
|
pdst[3] = pclut[psrc[1]>>4];
|
||||||
} \
|
pdst[4] = pclut[psrc[2]&15];
|
||||||
break; \
|
pdst[5] = pclut[psrc[2]>>4];
|
||||||
case PSMT8H: \
|
pdst[6] = pclut[psrc[3]&15];
|
||||||
for(int i = 0; i < targ->height; ++i) { \
|
pdst[7] = pclut[psrc[3]>>4];
|
||||||
for(int j = 0; j < GPU_TEXWIDTH/8; ++j) { \
|
|
||||||
pdst[0] = pclut[psrc[3]]; \
|
pdst += 8;
|
||||||
pdst[1] = pclut[psrc[7]]; \
|
psrc += 4;
|
||||||
pdst[2] = pclut[psrc[11]]; \
|
}
|
||||||
pdst[3] = pclut[psrc[15]]; \
|
}
|
||||||
pdst[4] = pclut[psrc[19]]; \
|
break;
|
||||||
pdst[5] = pclut[psrc[23]]; \
|
|
||||||
pdst[6] = pclut[psrc[27]]; \
|
case PSMT8H:
|
||||||
pdst[7] = pclut[psrc[31]]; \
|
for(int i = 0; i < height; ++i)
|
||||||
pdst += 8; \
|
{
|
||||||
psrc += 32; \
|
for(int j = 0; j < GPU_TEXWIDTH/8; ++j)
|
||||||
} \
|
{
|
||||||
} \
|
pdst[0] = pclut[psrc[3]];
|
||||||
break; \
|
pdst[1] = pclut[psrc[7]];
|
||||||
case PSMT4HH: \
|
pdst[2] = pclut[psrc[11]];
|
||||||
for(int i = 0; i < targ->height; ++i) { \
|
pdst[3] = pclut[psrc[15]];
|
||||||
for(int j = 0; j < GPU_TEXWIDTH/8; ++j) { \
|
pdst[4] = pclut[psrc[19]];
|
||||||
pdst[0] = pclut[psrc[3]>>4]; \
|
pdst[5] = pclut[psrc[23]];
|
||||||
pdst[1] = pclut[psrc[7]>>4]; \
|
pdst[6] = pclut[psrc[27]];
|
||||||
pdst[2] = pclut[psrc[11]>>4]; \
|
pdst[7] = pclut[psrc[31]];
|
||||||
pdst[3] = pclut[psrc[15]>>4]; \
|
pdst += 8;
|
||||||
pdst[4] = pclut[psrc[19]>>4]; \
|
psrc += 32;
|
||||||
pdst[5] = pclut[psrc[23]>>4]; \
|
}
|
||||||
pdst[6] = pclut[psrc[27]>>4]; \
|
}
|
||||||
pdst[7] = pclut[psrc[31]>>4]; \
|
break;
|
||||||
pdst += 8; \
|
|
||||||
psrc += 32; \
|
case PSMT4HH:
|
||||||
} \
|
for(int i = 0; i < height; ++i)
|
||||||
} \
|
{
|
||||||
break; \
|
for(int j = 0; j < GPU_TEXWIDTH/8; ++j)
|
||||||
case PSMT4HL: \
|
{
|
||||||
for(int i = 0; i < targ->height; ++i) { \
|
pdst[0] = pclut[psrc[3]>>4];
|
||||||
for(int j = 0; j < GPU_TEXWIDTH/8; ++j) { \
|
pdst[1] = pclut[psrc[7]>>4];
|
||||||
pdst[0] = pclut[psrc[3]&15]; \
|
pdst[2] = pclut[psrc[11]>>4];
|
||||||
pdst[1] = pclut[psrc[7]&15]; \
|
pdst[3] = pclut[psrc[15]>>4];
|
||||||
pdst[2] = pclut[psrc[11]&15]; \
|
pdst[4] = pclut[psrc[19]>>4];
|
||||||
pdst[3] = pclut[psrc[15]&15]; \
|
pdst[5] = pclut[psrc[23]>>4];
|
||||||
pdst[4] = pclut[psrc[19]&15]; \
|
pdst[6] = pclut[psrc[27]>>4];
|
||||||
pdst[5] = pclut[psrc[23]&15]; \
|
pdst[7] = pclut[psrc[31]>>4];
|
||||||
pdst[6] = pclut[psrc[27]&15]; \
|
pdst += 8;
|
||||||
pdst[7] = pclut[psrc[31]&15]; \
|
psrc += 32;
|
||||||
pdst += 8; \
|
}
|
||||||
psrc += 32; \
|
}
|
||||||
} \
|
break;
|
||||||
} \
|
|
||||||
break; \
|
case PSMT4HL:
|
||||||
default: \
|
for(int i = 0; i < height; ++i)
|
||||||
assert(0); \
|
{
|
||||||
} \
|
for(int j = 0; j < GPU_TEXWIDTH/8; ++j)
|
||||||
} \
|
{
|
||||||
|
pdst[0] = pclut[psrc[3]&15];
|
||||||
|
pdst[1] = pclut[psrc[7]&15];
|
||||||
|
pdst[2] = pclut[psrc[11]&15];
|
||||||
|
pdst[3] = pclut[psrc[15]&15];
|
||||||
|
pdst[4] = pclut[psrc[19]&15];
|
||||||
|
pdst[5] = pclut[psrc[23]&15];
|
||||||
|
pdst[6] = pclut[psrc[27]&15];
|
||||||
|
pdst[7] = pclut[psrc[31]&15];
|
||||||
|
pdst += 8;
|
||||||
|
psrc += 32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#define TARGET_THRESH 0x500
|
#define TARGET_THRESH 0x500
|
||||||
|
|
||||||
|
@ -1688,7 +1706,7 @@ int MemoryTarget_CompareTarget (list<CMemoryTarget>::iterator& it, const tex0Inf
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( tex0.cpsm <= 1 ) {
|
if (PSMT_IS32BIT(tex0.cpsm)) {
|
||||||
if (memcmp_mmx(&it->clut[0], g_pbyGSClut+nClutOffset, clutsize)) {
|
if (memcmp_mmx(&it->clut[0], g_pbyGSClut+nClutOffset, clutsize)) {
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
@ -1713,7 +1731,7 @@ void MemoryTarget_GetClutVariables (int& nClutOffset, int& clutsize, const tex0I
|
||||||
|
|
||||||
if( PSMT_ISCLUT(tex0.psm) ) {
|
if( PSMT_ISCLUT(tex0.psm) ) {
|
||||||
int entries = PSMT_IS8CLUT(tex0.psm) ? 256 : 16;
|
int entries = PSMT_IS8CLUT(tex0.psm) ? 256 : 16;
|
||||||
if( tex0.cpsm <= 1 ) {
|
if (PSMT_IS32BIT(tex0.cpsm)) {
|
||||||
nClutOffset = 64 * tex0.csa;
|
nClutOffset = 64 * tex0.csa;
|
||||||
clutsize = min(entries, 256-tex0.csa*16)*4;
|
clutsize = min(entries, 256-tex0.csa*16)*4;
|
||||||
}
|
}
|
||||||
|
@ -1788,46 +1806,68 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::MemoryTarget_SearchExistTarget
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static __forceinline int NumberOfChannels(int psm)
|
||||||
|
{
|
||||||
|
int channels = 1;
|
||||||
|
|
||||||
|
if (PSMT_ISCLUT(psm))
|
||||||
|
{
|
||||||
|
if (psm == PSMT8)
|
||||||
|
channels = 4;
|
||||||
|
else if (psm == PSMT4)
|
||||||
|
channels = 8;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (PSMT_IS16BIT(psm))
|
||||||
|
{
|
||||||
|
// 16z needs to be a8r8g8b8
|
||||||
|
channels = 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return channels;
|
||||||
|
}
|
||||||
|
|
||||||
ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::MemoryTarget_ClearedTargetsSearch(int fmt, int widthmult, int channels, int height) {
|
ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::MemoryTarget_ClearedTargetsSearch(int fmt, int widthmult, int channels, int height)
|
||||||
|
{
|
||||||
CMemoryTarget* targ = NULL;
|
CMemoryTarget* targ = NULL;
|
||||||
|
|
||||||
if( listClearedTargets.size() > 0 ) {
|
if (listClearedTargets.size() > 0)
|
||||||
|
{
|
||||||
list<CMemoryTarget>::iterator itbest = listClearedTargets.begin();
|
list<CMemoryTarget>::iterator itbest = listClearedTargets.begin();
|
||||||
while(itbest != listClearedTargets.end()) {
|
|
||||||
|
while(itbest != listClearedTargets.end())
|
||||||
if( height <= itbest->realheight && itbest->fmt == fmt && itbest->widthmult == widthmult && itbest->channels == channels ) {
|
{
|
||||||
|
if ((height <= itbest->realheight) && (itbest->fmt == fmt) && (itbest->widthmult == widthmult) && (itbest->channels == channels))
|
||||||
|
{
|
||||||
// check channels
|
// check channels
|
||||||
int targchannels = 1;
|
int targchannels = NumberOfChannels(itbest->psm);
|
||||||
if( PSMT_ISCLUT(itbest->psm) ) {
|
|
||||||
if( itbest->psm == PSMT8 ) targchannels = 4;
|
if (targchannels == channels) break;
|
||||||
else if( itbest->psm == PSMT4 ) targchannels = 8;
|
|
||||||
}
|
|
||||||
else if( PSMT_IS16BIT(itbest->psm) ) {
|
|
||||||
targchannels = 2;
|
|
||||||
}
|
|
||||||
if( targchannels == channels )
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
++itbest;
|
++itbest;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( itbest != listClearedTargets.end()) {
|
if (itbest != listClearedTargets.end())
|
||||||
|
{
|
||||||
listTargets.splice(listTargets.end(), listClearedTargets, itbest);
|
listTargets.splice(listTargets.end(), listClearedTargets, itbest);
|
||||||
targ = &listTargets.back();
|
targ = &listTargets.back();
|
||||||
targ->validatecount = 0;
|
targ->validatecount = 0;
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
|
{
|
||||||
// create a new
|
// create a new
|
||||||
listTargets.push_back(CMemoryTarget());
|
listTargets.push_back(CMemoryTarget());
|
||||||
targ = &listTargets.back();
|
targ = &listTargets.back();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
|
{
|
||||||
listTargets.push_back(CMemoryTarget());
|
listTargets.push_back(CMemoryTarget());
|
||||||
targ = &listTargets.back();
|
targ = &listTargets.back();
|
||||||
}
|
}
|
||||||
|
|
||||||
return targ;
|
return targ;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1835,6 +1875,7 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info
|
||||||
{
|
{
|
||||||
FUNCLOG
|
FUNCLOG
|
||||||
int start, end, nClutOffset, clutsize;
|
int start, end, nClutOffset, clutsize;
|
||||||
|
const int TexWidth = GPU_TEXWIDTH * 4;
|
||||||
|
|
||||||
MemoryTarget_GetClutVariables (nClutOffset, clutsize, tex0);
|
MemoryTarget_GetClutVariables (nClutOffset, clutsize, tex0);
|
||||||
MemoryTarget_GetMemAddress(start, end, tex0);
|
MemoryTarget_GetMemAddress(start, end, tex0);
|
||||||
|
@ -1847,66 +1888,65 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info
|
||||||
|
|
||||||
u32 fmt = GL_UNSIGNED_BYTE;
|
u32 fmt = GL_UNSIGNED_BYTE;
|
||||||
// if ((PSMT_ISCLUT(tex0.psm) && tex0.cpsm > 1) || tex0.psm == PSMCT16 || tex0.psm == PSMCT16S) {
|
// if ((PSMT_ISCLUT(tex0.psm) && tex0.cpsm > 1) || tex0.psm == PSMCT16 || tex0.psm == PSMCT16S) {
|
||||||
if (PSMT_ISHALF_STORAGE(tex0)) {
|
if (PSMT_ISHALF_STORAGE(tex0))
|
||||||
|
{
|
||||||
fmt = GL_UNSIGNED_SHORT_1_5_5_5_REV;
|
fmt = GL_UNSIGNED_SHORT_1_5_5_5_REV;
|
||||||
}
|
}
|
||||||
|
|
||||||
int widthmult = 1;
|
int widthmult = 1;
|
||||||
if ((g_MaxTexHeight < 4096) && (end-start > g_MaxTexHeight))
|
int channels = NumberOfChannels(tex0.psm);
|
||||||
widthmult = 2;
|
|
||||||
|
if ((g_MaxTexHeight < 4096) && (end-start > g_MaxTexHeight)) widthmult = 2;
|
||||||
int channels = 1;
|
|
||||||
if( PSMT_ISCLUT(tex0.psm) ) {
|
|
||||||
if( tex0.psm == PSMT8 ) channels = 4;
|
|
||||||
else if( tex0.psm == PSMT4 ) channels = 8;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if( PSMT_IS16BIT(tex0.psm) ) {
|
|
||||||
// 16z needs to be a8r8g8b8
|
|
||||||
channels = 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
targ = MemoryTarget_ClearedTargetsSearch(fmt, widthmult, channels, end - start) ;
|
targ = MemoryTarget_ClearedTargetsSearch(fmt, widthmult, channels, end - start) ;
|
||||||
|
|
||||||
// fill local clut
|
// fill local clut
|
||||||
if( PSMT_ISCLUT(tex0.psm) ) {
|
if (PSMT_ISCLUT(tex0.psm))
|
||||||
|
{
|
||||||
assert( clutsize > 0 );
|
assert( clutsize > 0 );
|
||||||
targ->cpsm = tex0.cpsm;
|
targ->cpsm = tex0.cpsm;
|
||||||
targ->clut.reserve(256*4); // no matter what
|
targ->clut.reserve(256 * 4); // no matter what
|
||||||
targ->clut.resize(clutsize);
|
targ->clut.resize(clutsize);
|
||||||
|
|
||||||
if( tex0.cpsm <= 1 ) { // 32 bit
|
if (PSMT_IS32BIT(tex0.cpsm)) // 32 bit
|
||||||
memcpy_amd(&targ->clut[0], g_pbyGSClut+nClutOffset, clutsize);
|
{
|
||||||
|
memcpy_amd(&targ->clut[0], g_pbyGSClut + nClutOffset, clutsize);
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
|
{
|
||||||
u16* pClutBuffer = (u16*)(g_pbyGSClut + nClutOffset);
|
u16* pClutBuffer = (u16*)(g_pbyGSClut + nClutOffset);
|
||||||
u16* pclut = (u16*)&targ->clut[0];
|
u16* pclut = (u16*)&targ->clut[0];
|
||||||
int left = ((u32)nClutOffset & 2) ? 0 : ((nClutOffset&0x3ff)/2)+clutsize-512;
|
|
||||||
if( left > 0 ) clutsize -= left;
|
int left = ((u32)nClutOffset & 2) ? 0 : ((nClutOffset & 0x3ff) / 2) + clutsize - 512;
|
||||||
|
if (left > 0) clutsize -= left;
|
||||||
|
|
||||||
while(clutsize > 0) {
|
while(clutsize > 0)
|
||||||
|
{
|
||||||
pclut[0] = pClutBuffer[0];
|
pclut[0] = pClutBuffer[0];
|
||||||
pclut++;
|
pclut++;
|
||||||
pClutBuffer+=2;
|
pClutBuffer += 2;
|
||||||
clutsize -= 2;
|
clutsize -= 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( left > 0) {
|
if (left > 0)
|
||||||
|
{
|
||||||
pClutBuffer = (u16*)(g_pbyGSClut + 2);
|
pClutBuffer = (u16*)(g_pbyGSClut + 2);
|
||||||
while(left > 0) {
|
|
||||||
|
while(left > 0)
|
||||||
|
{
|
||||||
pclut[0] = pClutBuffer[0];
|
pclut[0] = pClutBuffer[0];
|
||||||
left -= 2;
|
|
||||||
pClutBuffer += 2;
|
|
||||||
pclut++;
|
pclut++;
|
||||||
|
pClutBuffer += 2;
|
||||||
|
left -= 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if( targ->ptex != NULL ) {
|
if (targ->ptex != NULL)
|
||||||
|
{
|
||||||
assert( end-start <= targ->realheight && targ->fmt == fmt && targ->widthmult == widthmult );
|
assert( end-start <= targ->realheight && targ->fmt == fmt && targ->widthmult == widthmult );
|
||||||
|
|
||||||
// good enough, so init
|
// good enough, so init
|
||||||
targ->realy = targ->starty = start;
|
targ->realy = targ->starty = start;
|
||||||
targ->usedstamp = curstamp;
|
targ->usedstamp = curstamp;
|
||||||
|
@ -1915,8 +1955,8 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info
|
||||||
targ->height = end-start;
|
targ->height = end-start;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( targ->ptex == NULL ) {
|
if (targ->ptex == NULL)
|
||||||
|
{
|
||||||
// not initialized yet
|
// not initialized yet
|
||||||
targ->fmt = fmt;
|
targ->fmt = fmt;
|
||||||
targ->realy = targ->starty = start;
|
targ->realy = targ->starty = start;
|
||||||
|
@ -1933,74 +1973,92 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef RELEASE_TO_PUBLIC
|
#ifndef RELEASE_TO_PUBLIC
|
||||||
g_TransferredToGPU += GPU_TEXWIDTH * channels * 4 * targ->height;
|
g_TransferredToGPU += TexWidth * channels * targ->height;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// fill with data
|
// fill with data
|
||||||
if( targ->ptex->memptr == NULL ) {
|
if (targ->ptex->memptr == NULL)
|
||||||
targ->ptex->memptr = (u8*)_aligned_malloc(4 * GPU_TEXWIDTH * targ->realheight, 16);
|
{
|
||||||
|
targ->ptex->memptr = (u8*)_aligned_malloc(TexWidth * targ->realheight, 16);
|
||||||
assert(targ->ptex->ref > 0 );
|
assert(targ->ptex->ref > 0 );
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy_amd(targ->ptex->memptr, g_pbyGSMemory + 4 * GPU_TEXWIDTH * targ->realy, 4 * GPU_TEXWIDTH * targ->height);
|
memcpy_amd(targ->ptex->memptr, g_pbyGSMemory + TexWidth * targ->realy, TexWidth * targ->height);
|
||||||
vector<u8> texdata;
|
vector<u8> texdata;
|
||||||
u8* ptexdata = NULL;
|
u8* ptexdata = NULL;
|
||||||
|
|
||||||
if( PSMT_ISCLUT(tex0.psm) ) {
|
const int cur_width = GPU_TEXWIDTH * channels * widthmult;
|
||||||
|
|
||||||
texdata.resize( (tex0.cpsm <= 1?4:2) *GPU_TEXWIDTH*channels*widthmult*(targ->realheight+widthmult-1)/widthmult);
|
if (PSMT_ISCLUT(tex0.psm))
|
||||||
|
{
|
||||||
|
int new_size = cur_width * (targ->realheight + widthmult - 1)/widthmult;
|
||||||
|
|
||||||
|
if (PSMT_IS32BIT(tex0.cpsm))
|
||||||
|
new_size *= 4;
|
||||||
|
else
|
||||||
|
new_size *= 2;
|
||||||
|
|
||||||
|
texdata.resize(new_size);
|
||||||
ptexdata = &texdata[0];
|
ptexdata = &texdata[0];
|
||||||
|
|
||||||
u8* psrc = (u8*)(g_pbyGSMemory + 4 * GPU_TEXWIDTH * targ->realy);
|
u8* psrc = (u8*)(g_pbyGSMemory + 4 * GPU_TEXWIDTH * targ->realy);
|
||||||
|
|
||||||
if( tex0.cpsm <= 1 ) { // 32bit
|
if (PSMT_IS32BIT(tex0.cpsm)) // 32bit
|
||||||
|
{
|
||||||
u32* pclut = (u32*)&targ->clut[0];
|
u32* pclut = (u32*)&targ->clut[0];
|
||||||
u32* pdst = (u32*)ptexdata;
|
u32* pdst = (u32*)ptexdata;
|
||||||
|
|
||||||
BUILDCLUT();
|
BuildClut<u32>(tex0.psm, targ->height, pclut, psrc, pdst);
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
|
{
|
||||||
u16* pclut = (u16*)&targ->clut[0];
|
u16* pclut = (u16*)&targ->clut[0];
|
||||||
u16* pdst = (u16*)ptexdata;
|
u16* pdst = (u16*)ptexdata;
|
||||||
|
|
||||||
BUILDCLUT();
|
BuildClut<u16>(tex0.psm, targ->height, pclut, psrc, pdst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
if( tex0.psm == PSMT16Z || tex0.psm == PSMT16SZ ) {
|
{
|
||||||
|
if ((tex0.psm == PSMT16Z) || (tex0.psm == PSMT16SZ))
|
||||||
texdata.resize(4*GPU_TEXWIDTH*channels*widthmult*(targ->realheight+widthmult-1)/widthmult
|
{
|
||||||
|
int new_size = cur_width * (targ->realheight + widthmult - 1)/widthmult;
|
||||||
#if defined(ZEROGS_SSE2)
|
#if defined(ZEROGS_SSE2)
|
||||||
+ 15 // reserve additional elements for alignment if SSE2 used.
|
// reserve additional elements for alignment if SSE2 used.
|
||||||
// better do it now, so less resizing would be needed
|
// better do it now, so less resizing would be needed
|
||||||
#endif
|
new_size += 15;
|
||||||
);
|
#endif
|
||||||
|
|
||||||
|
texdata.resize(new_size);
|
||||||
|
|
||||||
ptexdata = &texdata[0];
|
ptexdata = &texdata[0];
|
||||||
// needs to be 8 bit, use xmm for unpacking
|
// needs to be 8 bit, use xmm for unpacking
|
||||||
u16* dst = (u16*)ptexdata;
|
u16* dst = (u16*)ptexdata;
|
||||||
u16* src = (u16*)(g_pbyGSMemory + 4 * GPU_TEXWIDTH * targ->realy);
|
u16* src = (u16*)(g_pbyGSMemory + TexWidth * targ->realy);
|
||||||
|
|
||||||
#if defined(ZEROGS_SSE2)
|
#if defined(ZEROGS_SSE2)
|
||||||
if ( ((u32)(uptr)dst)%16 != 0 ) {
|
if (((u32)(uptr)dst)%16 != 0)
|
||||||
// This is not unusual situation, when vector<u8> does not 16bit alignment, that is destructive for SSE2
|
{
|
||||||
|
// This is not an unusual situation, when vector<u8> does not 16bit alignment, that is destructive for SSE2
|
||||||
// instruction movdqa [%eax], xmm0
|
// instruction movdqa [%eax], xmm0
|
||||||
// The idea would be resise vector to 15 elements, that set ptxedata to aligned position.
|
// The idea would be resise vector to 15 elements, that set ptxedata to aligned position.
|
||||||
// Later we would move eax by 16, so only we should verify is first element align
|
// Later we would move eax by 16, so only we should verify is first element align
|
||||||
// FIXME. As I see, texdata used only once here, it does not have any impact on other code.
|
// FIXME. As I see, texdata used only once here, it does not have any impact on other code.
|
||||||
// Probably, usage of _aligned_maloc() would be preferable.
|
// Probably, usage of _aligned_maloc() would be preferable.
|
||||||
int disalignment = 16 - ((u32)(uptr)dst)%16 ; // This is value of shift. It could be 0 < disalignment <= 15
|
int disalignment = 16 - ((u32)(uptr)dst) % 16 ; // This is value of shift. It could be 0 < disalignment <= 15
|
||||||
ptexdata = &texdata[disalignment]; // Set pointer to aligned element
|
ptexdata = &texdata[disalignment]; // Set pointer to aligned element
|
||||||
dst = (u16*)ptexdata;
|
dst = (u16*)ptexdata;
|
||||||
GS_LOG("Made alignment for texdata, 0x%x\n", dst );
|
GS_LOG("Made alignment for texdata, 0x%x\n", dst );
|
||||||
assert( ((u32)(uptr)dst)%16 == 0 ); // Assert, because at future could be vectors with uncontigious spaces
|
assert( ((u32)(uptr)dst) % 16 == 0 ); // Assert, because at future could be vectors with uncontigious spaces
|
||||||
}
|
}
|
||||||
|
|
||||||
int iters = targ->height*GPU_TEXWIDTH/16;
|
int iters = targ->height * GPU_TEXWIDTH / 16;
|
||||||
SSE2_UnswizzleZ16Target( dst, src, iters ) ;
|
SSE2_UnswizzleZ16Target( dst, src, iters );
|
||||||
#else // ZEROGS_SSE2
|
#else // ZEROGS_SSE2
|
||||||
for(int i = 0; i < targ->height; ++i) {
|
for(int i = 0; i < targ->height; ++i)
|
||||||
for(int j = 0; j < GPU_TEXWIDTH; ++j) {
|
{
|
||||||
|
for(int j = 0; j < GPU_TEXWIDTH; ++j)
|
||||||
|
{
|
||||||
dst[0] = src[0]; dst[1] = 0;
|
dst[0] = src[0]; dst[1] = 0;
|
||||||
dst[2] = src[1]; dst[3] = 0;
|
dst[2] = src[1]; dst[3] = 0;
|
||||||
dst += 4;
|
dst += 4;
|
||||||
|
@ -2009,7 +2067,8 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info
|
||||||
}
|
}
|
||||||
#endif // ZEROGS_SSE2
|
#endif // ZEROGS_SSE2
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
|
{
|
||||||
ptexdata = targ->ptex->memptr;
|
ptexdata = targ->ptex->memptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2017,36 +2076,40 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info
|
||||||
// create the texture
|
// create the texture
|
||||||
GL_REPORT_ERRORD();
|
GL_REPORT_ERRORD();
|
||||||
assert(ptexdata != NULL);
|
assert(ptexdata != NULL);
|
||||||
if( targ->ptex->tex == 0 )
|
|
||||||
glGenTextures(1, &targ->ptex->tex);
|
if (targ->ptex->tex == 0) glGenTextures(1, &targ->ptex->tex);
|
||||||
|
|
||||||
glBindTexture(GL_TEXTURE_RECTANGLE_NV, targ->ptex->tex);
|
glBindTexture(GL_TEXTURE_RECTANGLE_NV, targ->ptex->tex);
|
||||||
glTexImage2D(GL_TEXTURE_RECTANGLE_NV, 0, fmt==GL_UNSIGNED_BYTE?4:GL_RGB5_A1, GPU_TEXWIDTH*channels*widthmult,
|
glTexImage2D(GL_TEXTURE_RECTANGLE_NV, 0, (fmt == GL_UNSIGNED_BYTE) ? 4 : GL_RGB5_A1, GPU_TEXWIDTH * channels * widthmult,
|
||||||
(targ->realheight+widthmult-1)/widthmult, 0, GL_RGBA, fmt, ptexdata);
|
(targ->realheight + widthmult - 1)/widthmult, 0, GL_RGBA, fmt, ptexdata);
|
||||||
|
|
||||||
int realheight = targ->realheight;
|
int realheight = targ->realheight;
|
||||||
while( glGetError() != GL_NO_ERROR ) {
|
while(glGetError() != GL_NO_ERROR)
|
||||||
|
{
|
||||||
// release resources until can create
|
// release resources until can create
|
||||||
if( listClearedTargets.size() > 0 )
|
if (listClearedTargets.size() > 0)
|
||||||
|
{
|
||||||
listClearedTargets.pop_front();
|
listClearedTargets.pop_front();
|
||||||
else {
|
}
|
||||||
if( listTargets.size() == 0 ) {
|
else
|
||||||
ERROR_LOG("Failed to create %dx%x texture\n", GPU_TEXWIDTH*channels*widthmult, (realheight+widthmult-1)/widthmult);
|
{
|
||||||
channels = 1;
|
if (listTargets.size() == 0)
|
||||||
|
{
|
||||||
|
ERROR_LOG("Failed to create %dx%x texture\n", GPU_TEXWIDTH * channels * widthmult, (realheight + widthmult - 1)/widthmult);
|
||||||
|
//channels = 1;
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
DestroyOldest();
|
DestroyOldest();
|
||||||
}
|
}
|
||||||
|
|
||||||
glTexImage2D(GL_TEXTURE_RECTANGLE_NV, 0, 4, GPU_TEXWIDTH*channels*widthmult, (targ->realheight+widthmult-1)/widthmult, 0, GL_RGBA, fmt, ptexdata);
|
glTexImage2D(GL_TEXTURE_RECTANGLE_NV, 0, 4, GPU_TEXWIDTH * channels * widthmult, (targ->realheight + widthmult - 1) / widthmult, 0, GL_RGBA, fmt, ptexdata);
|
||||||
}
|
}
|
||||||
|
|
||||||
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_S, GL_CLAMP);
|
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_S, GL_CLAMP);
|
||||||
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_T, GL_CLAMP);
|
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_T, GL_CLAMP);
|
||||||
|
|
||||||
assert( tex0.psm != 0xd );
|
assert( tex0.psm != 0xd );
|
||||||
if( PSMT_ISCLUT(tex0.psm) )
|
if (PSMT_ISCLUT(tex0.psm)) assert( targ->clut.size() > 0 );
|
||||||
assert( targ->clut.size() > 0 );
|
|
||||||
|
|
||||||
return targ;
|
return targ;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue