mirror of https://github.com/PCSX2/pcsx2.git
GregMiscellaneous: zzogl-pg:
* regroup clut core function into one big files Note: codeblock need to be updated. And I hope template are ms friendly :) git-svn-id: http://pcsx2.googlecode.com/svn/branches/GregMiscellaneous@3931 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
01c171e9e7
commit
97cd280684
|
@ -46,6 +46,7 @@ endif(CMAKE_BUILD_TYPE STREQUAL Release)
|
|||
|
||||
# zzogl sources
|
||||
set(zzoglSources
|
||||
Clut.cpp
|
||||
GifTransfer.cpp
|
||||
GLWin32.cpp
|
||||
GLWinX11.cpp
|
||||
|
@ -77,6 +78,7 @@ set(zzoglSources
|
|||
|
||||
# zzogl headers
|
||||
set(zzoglHeaders
|
||||
Clut.h
|
||||
common.h
|
||||
CRC.h
|
||||
GifTransfer.h
|
||||
|
|
|
@ -110,7 +110,7 @@ static bool SPAM_PASS;
|
|||
if( err != GL_NO_ERROR ) \
|
||||
{ \
|
||||
ZZLog::Error_Log("%s:%d: gl error %s (0x%x)", __FILE__, (int)__LINE__, error_name(err), err); \
|
||||
HandleGLError(); \
|
||||
/* HandleGLError();*/ \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "zerogs.h"
|
||||
#include "targets.h"
|
||||
#include "ZZoglShaders.h"
|
||||
#include "Clut.h"
|
||||
|
||||
#ifdef ZEROGS_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
@ -1642,87 +1643,6 @@ void CMemoryTargetMngr::Destroy()
|
|||
listClearedTargets.clear();
|
||||
}
|
||||
|
||||
int memcmp_clut16(u16* pSavedBuffer, u16* pClutBuffer, int clutsize)
|
||||
{
|
||||
FUNCLOG
|
||||
assert((clutsize&31) == 0);
|
||||
|
||||
// left > 0 only when csa < 16
|
||||
int left = 0;
|
||||
if (((u32)(uptr)pClutBuffer & 2) == 0)
|
||||
{
|
||||
left = (((u32)(uptr)pClutBuffer & 0x3ff) / 2) + clutsize - 512;
|
||||
clutsize -= left;
|
||||
}
|
||||
|
||||
while (clutsize > 0)
|
||||
{
|
||||
for (int i = 0; i < 16; ++i)
|
||||
{
|
||||
if (pSavedBuffer[i] != pClutBuffer[2*i]) return 1;
|
||||
}
|
||||
|
||||
clutsize -= 32;
|
||||
pSavedBuffer += 16;
|
||||
pClutBuffer += 32;
|
||||
}
|
||||
|
||||
if (left > 0)
|
||||
{
|
||||
pClutBuffer = (u16*)(g_pbyGSClut + 2);
|
||||
|
||||
while (left > 0)
|
||||
{
|
||||
for (int i = 0; i < 16; ++i)
|
||||
{
|
||||
if (pSavedBuffer[i] != pClutBuffer[2*i]) return 1;
|
||||
}
|
||||
|
||||
left -= 32;
|
||||
|
||||
pSavedBuffer += 16;
|
||||
pClutBuffer += 32;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if 0
|
||||
bool CMemoryTarget::ValidateClut(const tex0Info& tex0)
|
||||
{
|
||||
FUNCLOG
|
||||
assert(tex0.psm == psm && PSMT_ISCLUT(psm) && cpsm == tex0.cpsm);
|
||||
|
||||
int nClutOffset = 0, clutsize = 0;
|
||||
int entries = PSMT_IS8CLUT(tex0.psm) ? 256 : 16;
|
||||
|
||||
if (PSMT_IS32BIT(tex0.cpsm)) // 32 bit
|
||||
{
|
||||
nClutOffset = 64 * tex0.csa;
|
||||
clutsize = min(entries, 256 - tex0.csa * 16) * 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
nClutOffset = 32 * (tex0.csa & 15) + (tex0.csa >= 16 ? 2 : 0);
|
||||
clutsize = min(entries, 512 - tex0.csa * 16) * 2;
|
||||
}
|
||||
|
||||
assert(clutsize == clut.size());
|
||||
|
||||
if (PSMT_IS32BIT(cpsm))
|
||||
{
|
||||
if (memcmp_mmx(&clut[0], g_pbyGSClut + nClutOffset, clutsize)) return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (memcmp_clut16((u16*)&clut[0], (u16*)(g_pbyGSClut + nClutOffset), clutsize)) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool CMemoryTarget::ValidateTex(const tex0Info& tex0, int starttex, int endtex, bool bDeleteBadTex)
|
||||
{
|
||||
FUNCLOG
|
||||
|
@ -1783,113 +1703,6 @@ bool CMemoryTarget::ValidateTex(const tex0Info& tex0, int starttex, int endtex,
|
|||
return false;
|
||||
}
|
||||
|
||||
// used to build clut textures (note that this is for both 16 and 32 bit cluts)
|
||||
template <class T>
|
||||
static __forceinline void BuildClut(u32 psm, u32 height, T* pclut, u8* psrc, T* pdst)
|
||||
{
|
||||
switch (psm)
|
||||
{
|
||||
case PSMT8:
|
||||
for (u32 i = 0; i < height; ++i)
|
||||
{
|
||||
for (int j = 0; j < GPU_TEXWIDTH / 2; ++j)
|
||||
{
|
||||
pdst[0] = pclut[psrc[0]];
|
||||
pdst[1] = pclut[psrc[1]];
|
||||
pdst[2] = pclut[psrc[2]];
|
||||
pdst[3] = pclut[psrc[3]];
|
||||
pdst[4] = pclut[psrc[4]];
|
||||
pdst[5] = pclut[psrc[5]];
|
||||
pdst[6] = pclut[psrc[6]];
|
||||
pdst[7] = pclut[psrc[7]];
|
||||
pdst += 8;
|
||||
psrc += 8;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case PSMT4:
|
||||
for (u32 i = 0; i < height; ++i)
|
||||
{
|
||||
for (int j = 0; j < GPU_TEXWIDTH; ++j)
|
||||
{
|
||||
pdst[0] = pclut[psrc[0] & 15];
|
||||
pdst[1] = pclut[psrc[0] >> 4];
|
||||
pdst[2] = pclut[psrc[1] & 15];
|
||||
pdst[3] = pclut[psrc[1] >> 4];
|
||||
pdst[4] = pclut[psrc[2] & 15];
|
||||
pdst[5] = pclut[psrc[2] >> 4];
|
||||
pdst[6] = pclut[psrc[3] & 15];
|
||||
pdst[7] = pclut[psrc[3] >> 4];
|
||||
|
||||
pdst += 8;
|
||||
psrc += 4;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case PSMT8H:
|
||||
for (u32 i = 0; i < height; ++i)
|
||||
{
|
||||
for (int j = 0; j < GPU_TEXWIDTH / 8; ++j)
|
||||
{
|
||||
pdst[0] = pclut[psrc[3]];
|
||||
pdst[1] = pclut[psrc[7]];
|
||||
pdst[2] = pclut[psrc[11]];
|
||||
pdst[3] = pclut[psrc[15]];
|
||||
pdst[4] = pclut[psrc[19]];
|
||||
pdst[5] = pclut[psrc[23]];
|
||||
pdst[6] = pclut[psrc[27]];
|
||||
pdst[7] = pclut[psrc[31]];
|
||||
pdst += 8;
|
||||
psrc += 32;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case PSMT4HH:
|
||||
for (u32 i = 0; i < height; ++i)
|
||||
{
|
||||
for (int j = 0; j < GPU_TEXWIDTH / 8; ++j)
|
||||
{
|
||||
pdst[0] = pclut[psrc[3] >> 4];
|
||||
pdst[1] = pclut[psrc[7] >> 4];
|
||||
pdst[2] = pclut[psrc[11] >> 4];
|
||||
pdst[3] = pclut[psrc[15] >> 4];
|
||||
pdst[4] = pclut[psrc[19] >> 4];
|
||||
pdst[5] = pclut[psrc[23] >> 4];
|
||||
pdst[6] = pclut[psrc[27] >> 4];
|
||||
pdst[7] = pclut[psrc[31] >> 4];
|
||||
pdst += 8;
|
||||
psrc += 32;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case PSMT4HL:
|
||||
for (u32 i = 0; i < height; ++i)
|
||||
{
|
||||
for (int j = 0; j < GPU_TEXWIDTH / 8; ++j)
|
||||
{
|
||||
pdst[0] = pclut[psrc[3] & 15];
|
||||
pdst[1] = pclut[psrc[7] & 15];
|
||||
pdst[2] = pclut[psrc[11] & 15];
|
||||
pdst[3] = pclut[psrc[15] & 15];
|
||||
pdst[4] = pclut[psrc[19] & 15];
|
||||
pdst[5] = pclut[psrc[23] & 15];
|
||||
pdst[6] = pclut[psrc[27] & 15];
|
||||
pdst[7] = pclut[psrc[31] & 15];
|
||||
pdst += 8;
|
||||
psrc += 32;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
#define TARGET_THRESH 0x500
|
||||
|
||||
extern int g_MaxTexWidth, g_MaxTexHeight; // Maximum height & width of supported texture.
|
||||
|
@ -1926,10 +1739,10 @@ int CMemoryTargetMngr::CompareTarget(list<CMemoryTarget>::iterator& it, const te
|
|||
return 1;
|
||||
|
||||
if (PSMT_IS32BIT(tex0.cpsm)) {
|
||||
if (memcmp_mmx(&it->clut[0], g_pbyGSClut + nClutOffset, clutsize))
|
||||
if (Cmp_ClutBuffer_SavedClut<u32>((u32*)&it->clut[0], (u32*)(g_pbyGSClut + nClutOffset), clutsize))
|
||||
return 2;
|
||||
} else {
|
||||
if (memcmp_clut16((u16*)&it->clut[0], (u16*)(g_pbyGSClut + nClutOffset), clutsize))
|
||||
if (Cmp_ClutBuffer_SavedClut<u16>((u16*)&it->clut[0], (u16*)(g_pbyGSClut + nClutOffset), clutsize))
|
||||
return 2;
|
||||
}
|
||||
|
||||
|
@ -2136,38 +1949,9 @@ CMemoryTarget* CMemoryTargetMngr::GetMemoryTarget(const tex0Info& tex0, int forc
|
|||
targ->clut.resize(clutsize);
|
||||
|
||||
if (PSMT_IS32BIT(tex0.cpsm))
|
||||
{
|
||||
memcpy_amd(&targ->clut[0], g_pbyGSClut + nClutOffset, clutsize);
|
||||
}
|
||||
ClutBuffer_to_Array<u32>((u32*)&targ->clut[0], (u32*)(g_pbyGSClut + nClutOffset), clutsize);
|
||||
else
|
||||
{
|
||||
u16* pClutBuffer = (u16*)(g_pbyGSClut + nClutOffset);
|
||||
u16* pclut = (u16*) & targ->clut[0];
|
||||
int left = ((u32)nClutOffset & 2) ? 0 : ((nClutOffset & 0x3ff) / 2) + clutsize - 512;
|
||||
|
||||
if (left > 0) clutsize -= left;
|
||||
|
||||
while (clutsize > 0)
|
||||
{
|
||||
pclut[0] = pClutBuffer[0];
|
||||
pclut++;
|
||||
pClutBuffer += 2;
|
||||
clutsize -= 2;
|
||||
}
|
||||
|
||||
if (left > 0)
|
||||
{
|
||||
pClutBuffer = (u16*)(g_pbyGSClut + 2);
|
||||
|
||||
while (left > 0)
|
||||
{
|
||||
pclut[0] = pClutBuffer[0];
|
||||
left -= 2;
|
||||
pClutBuffer += 2;
|
||||
pclut++;
|
||||
}
|
||||
}
|
||||
}
|
||||
ClutBuffer_to_Array<u16>((u16*)&targ->clut[0], (u16*)(g_pbyGSClut + nClutOffset), clutsize);
|
||||
}
|
||||
|
||||
if (targ->ptex != NULL)
|
||||
|
@ -2226,14 +2010,14 @@ CMemoryTarget* CMemoryTargetMngr::GetMemoryTarget(const tex0Info& tex0, int forc
|
|||
u32* pclut = (u32*) & targ->clut[0];
|
||||
u32* pdst = (u32*)ptexdata;
|
||||
|
||||
BuildClut<u32>(tex0.psm, targ->height, pclut, psrc, pdst);
|
||||
Build_Clut_Texture<u32>(tex0.psm, targ->height, pclut, psrc, pdst);
|
||||
}
|
||||
else
|
||||
{
|
||||
u16* pclut = (u16*) & targ->clut[0];
|
||||
u16* pdst = (u16*)ptexdata;
|
||||
|
||||
BuildClut<u16>(tex0.psm, targ->height, pclut, psrc, pdst);
|
||||
Build_Clut_Texture<u16>(tex0.psm, targ->height, pclut, psrc, pdst);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
|
@ -594,6 +594,7 @@ void __fastcall Frame16SwizzleBlock16ZA4_c(u16* dst, Vector_16F* src, int srcpit
|
|||
// }
|
||||
//}
|
||||
|
||||
#if 0
|
||||
extern "C" void __fastcall WriteCLUT_T32_I8_CSM1_sse2(u32* vm, u32* clut)
|
||||
{
|
||||
__m128i* src = (__m128i*)vm;
|
||||
|
@ -1137,9 +1138,11 @@ __forceinline void WriteCLUT_T16_I8_CSM1_sse2(u32* vm, u32 csa)
|
|||
vm += 16; // go down one column
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // ZEROGS_SSE2
|
||||
|
||||
#if 0
|
||||
void __fastcall WriteCLUT_T16_I8_CSM1_c(u32* _vm, u32* _clut)
|
||||
{
|
||||
const static u32 map[] =
|
||||
|
@ -1251,6 +1254,8 @@ void __fastcall WriteCLUT_T32_I4_CSM1_c(u32* vm, u32* clut)
|
|||
dst[7] = src[7];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void SSE2_UnswizzleZ16Target(u16* dst, u16* src, int iters)
|
||||
{
|
||||
|
||||
|
|
|
@ -33,10 +33,7 @@
|
|||
#include "GLWin.h"
|
||||
#include "ZZoglShaders.h"
|
||||
#include "ZZKick.h"
|
||||
|
||||
#ifdef ZEROGS_SSE2
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#include "Clut.h"
|
||||
|
||||
//----------------------- Defines
|
||||
|
||||
|
@ -518,217 +515,6 @@ void ExtWrite()
|
|||
// case 7: ASSERT(0); return false;
|
||||
// default: __assume(0);
|
||||
|
||||
bool IsDirty(u32 highdword, u32 psm, int cld, int cbp)
|
||||
{
|
||||
int cpsm = ZZOglGet_cpsm_TexBits(highdword);
|
||||
int csm = ZZOglGet_csm_TexBits(highdword);
|
||||
|
||||
if (cpsm > 1 || csm)
|
||||
{
|
||||
// Mana Khemia triggers this.
|
||||
//ZZLog::Error_Log("16 bit clut not supported.");
|
||||
return true;
|
||||
}
|
||||
|
||||
int csa = ZZOglGet_csa_TexBits(highdword);
|
||||
|
||||
int entries = PSMT_IS8CLUT(psm) ? 256 : 16;
|
||||
|
||||
u64* src = (u64*)(g_pbyGSMemory + cbp * 256);
|
||||
u64* dst = (u64*)(g_pbyGSClut + 64 * csa);
|
||||
|
||||
bool bRet = false;
|
||||
|
||||
#define TEST_THIS
|
||||
#ifdef TEST_THIS
|
||||
while(entries != 0) {
|
||||
#ifdef ZEROGS_SSE2
|
||||
// Note: local memory datas are swizzles
|
||||
__m128i src_0 = _mm_load_si128((__m128i*)src); // 9 8 1 0
|
||||
__m128i src_1 = _mm_load_si128((__m128i*)src+1); // 11 10 3 2
|
||||
__m128i src_2 = _mm_load_si128((__m128i*)src+2); // 13 12 5 4
|
||||
__m128i src_3 = _mm_load_si128((__m128i*)src+3); // 15 14 7 6
|
||||
|
||||
__m128i dst_0 = _mm_load_si128((__m128i*)dst);
|
||||
__m128i dst_1 = _mm_load_si128((__m128i*)dst+1);
|
||||
__m128i dst_2 = _mm_load_si128((__m128i*)dst+2);
|
||||
__m128i dst_3 = _mm_load_si128((__m128i*)dst+3);
|
||||
|
||||
__m128i result = _mm_cmpeq_epi32(_mm_unpacklo_epi64(src_0, src_1), dst_0);
|
||||
|
||||
__m128i result_tmp = _mm_cmpeq_epi32(_mm_unpacklo_epi64(src_2, src_3), dst_1);
|
||||
result = _mm_and_si128(result, result_tmp);
|
||||
|
||||
result_tmp = _mm_cmpeq_epi32(_mm_unpackhi_epi64(src_0, src_1), dst_2);
|
||||
result = _mm_and_si128(result, result_tmp);
|
||||
|
||||
result_tmp = _mm_cmpeq_epi32(_mm_unpackhi_epi64(src_2, src_3), dst_3);
|
||||
result = _mm_and_si128(result, result_tmp);
|
||||
|
||||
u32 result_int = _mm_movemask_epi8(result);
|
||||
if (result_int != 0xFFFF) {
|
||||
bRet = true;
|
||||
break;
|
||||
}
|
||||
#else
|
||||
// I see no point to keep an mmx version. SSE2 versions is probably faster.
|
||||
// Keep a slow portable C version for reference/debug
|
||||
// Note: local memory datas are swizzles
|
||||
if (dst[0] != src[0] || dst[1] != src[2] || dst[2] != src[4] || dst[3] != src[6]
|
||||
|| dst[4] != src[1] || dst[5] != src[3] || dst[6] != src[5] || dst[7] != src[7]) {
|
||||
bRet = true;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
// go to the next memory block
|
||||
src += 32;
|
||||
|
||||
// go back to the previous memory block then down one memory column
|
||||
if (entries & 0x10) {
|
||||
src -= (64-8);
|
||||
}
|
||||
// In case previous operation (down one column) cross the block boundary
|
||||
// Go to the next block
|
||||
if (entries == 0x90) {
|
||||
src += 32;
|
||||
}
|
||||
|
||||
dst += 8;
|
||||
entries -= 16;
|
||||
}
|
||||
#else
|
||||
|
||||
// do a fast test with MMX
|
||||
#ifdef _MSC_VER
|
||||
int storeebx;
|
||||
__asm
|
||||
{
|
||||
mov storeebx, ebx
|
||||
mov edx, dst
|
||||
mov ecx, src
|
||||
mov ebx, entries
|
||||
|
||||
Start:
|
||||
movq mm0, [edx]
|
||||
movq mm1, [edx+8]
|
||||
pcmpeqd mm0, [ecx]
|
||||
pcmpeqd mm1, [ecx+16]
|
||||
|
||||
movq mm2, [edx+16]
|
||||
movq mm3, [edx+24]
|
||||
pcmpeqd mm2, [ecx+32]
|
||||
pcmpeqd mm3, [ecx+48]
|
||||
|
||||
pand mm0, mm1
|
||||
pand mm2, mm3
|
||||
movq mm4, [edx+32]
|
||||
movq mm5, [edx+40]
|
||||
pcmpeqd mm4, [ecx+8]
|
||||
pcmpeqd mm5, [ecx+24]
|
||||
|
||||
pand mm0, mm2
|
||||
pand mm4, mm5
|
||||
movq mm6, [edx+48]
|
||||
movq mm7, [edx+56]
|
||||
pcmpeqd mm6, [ecx+40]
|
||||
pcmpeqd mm7, [ecx+56]
|
||||
|
||||
pand mm0, mm4
|
||||
pand mm6, mm7
|
||||
pand mm0, mm6
|
||||
|
||||
pmovmskb eax, mm0
|
||||
cmp eax, 0xff
|
||||
je Continue
|
||||
mov bRet, 1
|
||||
jmp Return
|
||||
|
||||
Continue:
|
||||
cmp ebx, 16
|
||||
jle Return
|
||||
|
||||
test ebx, 0x10
|
||||
jz AddEcx
|
||||
sub ecx, 448 // go back and down one column,
|
||||
|
||||
AddEcx:
|
||||
add ecx, 256 // go to the right block
|
||||
|
||||
|
||||
jne Continue1
|
||||
add ecx, 256 // skip whole block
|
||||
|
||||
Continue1:
|
||||
add edx, 64
|
||||
sub ebx, 16
|
||||
jmp Start
|
||||
|
||||
Return:
|
||||
emms
|
||||
mov ebx, storeebx
|
||||
}
|
||||
|
||||
#else // linux
|
||||
// do a fast test with MMX
|
||||
__asm__(
|
||||
".intel_syntax\n"
|
||||
"Start:\n"
|
||||
"movq %%mm0, [%%ecx]\n"
|
||||
"movq %%mm1, [%%ecx+8]\n"
|
||||
"pcmpeqd %%mm0, [%%edx]\n"
|
||||
"pcmpeqd %%mm1, [%%edx+16]\n"
|
||||
"movq %%mm2, [%%ecx+16]\n"
|
||||
"movq %%mm3, [%%ecx+24]\n"
|
||||
"pcmpeqd %%mm2, [%%edx+32]\n"
|
||||
"pcmpeqd %%mm3, [%%edx+48]\n"
|
||||
"pand %%mm0, %%mm1\n"
|
||||
"pand %%mm2, %%mm3\n"
|
||||
"movq %%mm4, [%%ecx+32]\n"
|
||||
"movq %%mm5, [%%ecx+40]\n"
|
||||
"pcmpeqd %%mm4, [%%edx+8]\n"
|
||||
"pcmpeqd %%mm5, [%%edx+24]\n"
|
||||
"pand %%mm0, %%mm2\n"
|
||||
"pand %%mm4, %%mm5\n"
|
||||
"movq %%mm6, [%%ecx+48]\n"
|
||||
"movq %%mm7, [%%ecx+56]\n"
|
||||
"pcmpeqd %%mm6, [%%edx+40]\n"
|
||||
"pcmpeqd %%mm7, [%%edx+56]\n"
|
||||
"pand %%mm0, %%mm4\n"
|
||||
"pand %%mm6, %%mm7\n"
|
||||
"pand %%mm0, %%mm6\n"
|
||||
"pmovmskb %%eax, %%mm0\n"
|
||||
"cmp %%eax, 0xff\n"
|
||||
"je Continue\n"
|
||||
".att_syntax\n"
|
||||
"movb $1, %0\n"
|
||||
".intel_syntax\n"
|
||||
"jmp Return\n"
|
||||
"Continue:\n"
|
||||
"cmp %%esi, 16\n"
|
||||
"jle Return\n"
|
||||
"test %%esi, 0x10\n"
|
||||
"jz AddEcx\n"
|
||||
"sub %%edx, 448\n" // go back and down one column
|
||||
"AddEcx:\n"
|
||||
"add %%edx, 256\n" // go to the right block
|
||||
"cmp %%esi, 0x90\n"
|
||||
"jne Continue1\n"
|
||||
"add %%edx, 256\n" // skip whole block
|
||||
"Continue1:\n"
|
||||
"add %%ecx, 64\n"
|
||||
"sub %%esi, 16\n"
|
||||
"jmp Start\n"
|
||||
"Return:\n"
|
||||
"emms\n"
|
||||
|
||||
".att_syntax\n" : "=m"(bRet) : "c"(dst), "d"(src), "S"(entries) : "eax", "memory");
|
||||
|
||||
#endif // _WIN32
|
||||
#endif
|
||||
return bRet;
|
||||
}
|
||||
|
||||
// cld state:
|
||||
// 000 - clut data is not loaded; data in the temp buffer is stored
|
||||
// 001 - clut data is always loaded.
|
||||
|
@ -769,16 +555,29 @@ bool CheckChangeInClut(u32 highdword, u32 psm)
|
|||
if (gs.cbp[1] == cbp) return false;
|
||||
break;
|
||||
|
||||
//case 4: return gs.cbp[0] != cbp;
|
||||
//case 5: return gs.cbp[1] != cbp;
|
||||
|
||||
// default: load
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return IsDirty(highdword, psm, cld, cbp);
|
||||
// Compare the cache with current memory
|
||||
|
||||
// CSM2 is not supported
|
||||
if (ZZOglGet_csm_TexBits(highdword))
|
||||
return true;
|
||||
|
||||
int cpsm = ZZOglGet_cpsm_TexBits(highdword);
|
||||
int csa = ZZOglGet_csa_TexBits(highdword);
|
||||
int entries = PSMT_IS8CLUT(psm) ? 256 : 16;
|
||||
|
||||
u8* GSMem = g_pbyGSMemory + cbp * 256;
|
||||
|
||||
if (PSMT_IS32BIT(cpsm))
|
||||
return Cmp_ClutBuffer_GSMem<u32>((u32*)GSMem, csa, entries);
|
||||
else {
|
||||
// Mana Khemia triggers this.
|
||||
//ZZLog::Error_Log("16 bit clut not supported.");
|
||||
return Cmp_ClutBuffer_GSMem<u16>((u16*)GSMem, csa, entries);
|
||||
}
|
||||
}
|
||||
|
||||
void texClutWrite(int ctx)
|
||||
|
@ -823,118 +622,7 @@ void texClutWrite(int ctx)
|
|||
|
||||
Flush(!ctx);
|
||||
|
||||
int entries = PSMT_IS8CLUT(tex0.psm) ? 256 : 16;
|
||||
|
||||
if (tex0.csm)
|
||||
{
|
||||
switch (tex0.cpsm)
|
||||
{
|
||||
// 16bit psm
|
||||
// eggomania uses non16bit textures for csm2
|
||||
|
||||
case PSMCT16:
|
||||
{
|
||||
u16* src = (u16*)g_pbyGSMemory + tex0.cbp * 128;
|
||||
u16 *dst = (u16*)(g_pbyGSClut + 64 * (tex0.csa & 15) + (tex0.csa >= 16 ? 2 : 0));
|
||||
|
||||
for (int i = 0; i < entries; ++i)
|
||||
{
|
||||
*dst = src[getPixelAddress16_0(gs.clut.cou+i, gs.clut.cov, gs.clut.cbw)];
|
||||
dst += 2;
|
||||
|
||||
// check for wrapping
|
||||
|
||||
if (((u32)(uptr)dst & 0x3ff) == 0) dst = (u16*)(g_pbyGSClut + 2);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case PSMCT16S:
|
||||
{
|
||||
u16* src = (u16*)g_pbyGSMemory + tex0.cbp * 128;
|
||||
u16 *dst = (u16*)(g_pbyGSClut + 64 * (tex0.csa & 15) + (tex0.csa >= 16 ? 2 : 0));
|
||||
|
||||
for (int i = 0; i < entries; ++i)
|
||||
{
|
||||
*dst = src[getPixelAddress16S_0(gs.clut.cou+i, gs.clut.cov, gs.clut.cbw)];
|
||||
dst += 2;
|
||||
|
||||
// check for wrapping
|
||||
|
||||
if (((u32)(uptr)dst & 0x3ff) == 0) dst = (u16*)(g_pbyGSClut + 2);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case PSMCT32:
|
||||
case PSMCT24:
|
||||
{
|
||||
u32* src = (u32*)g_pbyGSMemory + tex0.cbp * 64;
|
||||
u32 *dst = (u32*)(g_pbyGSClut + 64 * tex0.csa);
|
||||
|
||||
// check if address exceeds src
|
||||
|
||||
if (src + getPixelAddress32_0(gs.clut.cou + entries - 1, gs.clut.cov, gs.clut.cbw) >= (u32*)g_pbyGSMemory + 0x00100000)
|
||||
ZZLog::Error_Log("texClutWrite out of bounds.");
|
||||
else
|
||||
for (int i = 0; i < entries; ++i)
|
||||
{
|
||||
*dst = src[getPixelAddress32_0(gs.clut.cou+i, gs.clut.cov, gs.clut.cbw)];
|
||||
dst++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
//ZZLog::Debug_Log("Unknown cpsm: %x (%x).", tex0.cpsm, tex0.psm);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
u32* src = (u32*)(g_pbyGSMemory + 256 * tex0.cbp);
|
||||
|
||||
if (entries == 16)
|
||||
{
|
||||
switch (tex0.cpsm)
|
||||
{
|
||||
case PSMCT24:
|
||||
case PSMCT32:
|
||||
WriteCLUT_T32_I4_CSM1(src, (u32*)(g_pbyGSClut + 64 * tex0.csa));
|
||||
break;
|
||||
|
||||
default:
|
||||
#ifdef ZEROGS_SSE2
|
||||
WriteCLUT_T16_I4_CSM1_sse2(src, tex0.csa);
|
||||
#else
|
||||
WriteCLUT_T16_I4_CSM1_c(src, (u32*)(g_pbyGSClut + 64*(tex0.csa & 15) + (tex0.csa >= 16 ? 2 : 0)));
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (tex0.cpsm)
|
||||
{
|
||||
case PSMCT24:
|
||||
case PSMCT32:
|
||||
WriteCLUT_T32_I8_CSM1(src, (u32*)(g_pbyGSClut + 64 * tex0.csa));
|
||||
break;
|
||||
|
||||
default:
|
||||
// sse2 for 256 is more complicated, so use regular
|
||||
#ifdef ZEROGS_SSE2
|
||||
WriteCLUT_T16_I8_CSM1_sse2(src, tex0.csa);
|
||||
#else
|
||||
WriteCLUT_T16_I8_CSM1_c(src, (u32*)(g_pbyGSClut + 64*(tex0.csa & 15) + (tex0.csa >= 16 ? 2 : 0)));
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
// Write the memory to clut buffer
|
||||
GSMem_to_ClutBuffer(tex0);
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue