mirror of https://github.com/PCSX2/pcsx2.git
GregMiscellaneous: zzogl-pg:
* make code more consistent * Use some sse2 for 16 bits texture git-svn-id: http://pcsx2.googlecode.com/svn/branches/GregMiscellaneous@3943 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
e71401068e
commit
b20c1021e8
|
@ -25,6 +25,19 @@
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Local Clut buffer:
|
||||||
|
// It supports both 32 bits and 16 bits colors formats. The size of the buffer is 1KBytes.
|
||||||
|
// The 16 bits entries are arranged in 2 columns. One row is a 32 bits colors.
|
||||||
|
// 256 0
|
||||||
|
// 271 1
|
||||||
|
// ... ..
|
||||||
|
// 510 254
|
||||||
|
// 511 255
|
||||||
|
//
|
||||||
|
// CSA -> clut buffer offset:
|
||||||
|
// 16 bits format: CSA < 32 <=> 16 entries, 16 half-row of the buffer (for example 0 to 15)
|
||||||
|
// 32 bits format: CSA < 16 <=> 16 entries, 16 full row of the buffer (for example 256|0 to 271|15)
|
||||||
|
|
||||||
static const __aligned16 int s_clut_16bits_mask[4] = { 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff };
|
static const __aligned16 int s_clut_16bits_mask[4] = { 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff };
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
|
@ -472,48 +485,115 @@ __forceinline void GSMem_to_ClutBuffer(tex0Info &tex0)
|
||||||
* Clut buffer -> local C array (linear)
|
* Clut buffer -> local C array (linear)
|
||||||
* *****************************************************************/
|
* *****************************************************************/
|
||||||
template <class T>
|
template <class T>
|
||||||
__forceinline void ClutBuffer_to_Array(T* dst, T* clut, u32 clutsize) {}
|
__forceinline void ClutBuffer_to_Array(T* dst, u32 csa, u32 clutsize) {}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
__forceinline void ClutBuffer_to_Array<u32>(u32* dst, u32* clut, u32 clutsize)
|
__forceinline void ClutBuffer_to_Array<u32>(u32* dst, u32 csa, u32 clutsize)
|
||||||
{
|
{
|
||||||
ZZLog::Error_Log("Fill 32b clut");
|
u8* clut = (u8*)GetClutBufferAddress<u32>(csa);
|
||||||
memcpy_amd((u8*)dst, (u8*)clut, clutsize);
|
memcpy_amd((u8*)dst, clut, clutsize);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
__forceinline void ClutBuffer_to_Array<u16>(u16* dst, u16* clut, u32 clutsize)
|
__forceinline void ClutBuffer_to_Array<u16>(u16* dst, u32 csa, u32 clutsize)
|
||||||
{
|
{
|
||||||
ZZLog::Error_Log("Fill 16b clut");
|
u16* clut = (u16*)GetClutBufferAddress<u32>(csa); // Keep aligned version for sse2
|
||||||
int left = ((u32)clut & 2) ? 0 : (((u32)clut & 0x3ff) / 2) + clutsize - 512;
|
|
||||||
|
|
||||||
if (left > 0) clutsize -= left;
|
// which side to copy
|
||||||
|
u32 clutsize_right;
|
||||||
while (clutsize > 0)
|
u32 clutsize_left;
|
||||||
{
|
if (csa < 16) {
|
||||||
dst[0] = clut[0];
|
clutsize_right = min(clutsize, (16-csa)*64);
|
||||||
dst++;
|
clutsize_left = clutsize - clutsize_right;
|
||||||
clut += 2;
|
} else {
|
||||||
clutsize -= 2;
|
clutsize_right = 0;
|
||||||
|
clutsize_left = clutsize;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (left > 0)
|
while (clutsize_right > 0)
|
||||||
{
|
{
|
||||||
clut = GetClutBufferAddress<u16>(16);
|
#ifdef ZEROGS_SSE4
|
||||||
|
// only lower 16 bits of dword are valid
|
||||||
|
__m128i clut_0 = _mm_load_si128((__m128i*)clut);
|
||||||
|
__m128i clut_1 = _mm_load_si128((__m128i*)clut+1);
|
||||||
|
__m128i clut_2 = _mm_load_si128((__m128i*)clut+2);
|
||||||
|
__m128i clut_3 = _mm_load_si128((__m128i*)clut+3);
|
||||||
|
|
||||||
while (left > 0)
|
clut_0 = _mm_shufflelo_epi16(clut_0, 0x32);
|
||||||
{
|
clut_1 = _mm_shufflelo_epi16(clut_1, 0x32);
|
||||||
dst[0] = clut[0];
|
clut_2 = _mm_shufflelo_epi16(clut_2, 0x32);
|
||||||
left -= 2;
|
clut_3 = _mm_shufflelo_epi16(clut_3, 0x32);
|
||||||
clut += 2;
|
|
||||||
dst++;
|
clut_0 = _mm_shufflehi_epi16(clut_0, 0xD8); // - - 3 2 1 0 - -
|
||||||
}
|
clut_1 = _mm_shufflehi_epi16(clut_1, 0xD8);
|
||||||
|
clut_2 = _mm_shufflehi_epi16(clut_2, 0xD8);
|
||||||
|
clut_3 = _mm_shufflehi_epi16(clut_3, 0xD8);
|
||||||
|
|
||||||
|
clut_0 = _mm_srli_si128(clut_0, 4);
|
||||||
|
clut_1 = _mm_srli_si128(clut_1, 4);
|
||||||
|
clut_2 = _mm_srli_si128(clut_2, 4);
|
||||||
|
clut_3 = _mm_srli_si128(clut_3, 4);
|
||||||
|
|
||||||
|
_mm_store_si128((__m128i*)dst, _mm_unpacklo_epi64(clut_0, clut_1));
|
||||||
|
_mm_store_si128((__m128i*)dst+1, _mm_unpacklo_epi64(clut_2, clut_3));
|
||||||
|
#else
|
||||||
|
for(int i = 0; i < 16; ++i)
|
||||||
|
dst[i] = clut[2*i];
|
||||||
|
#endif
|
||||||
|
|
||||||
|
dst += 16;
|
||||||
|
clut += 32;
|
||||||
|
clutsize_right -= 32;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(csa < 16) {
|
||||||
|
// go back to the base before processing left clut column
|
||||||
|
clut = (u16*)GetClutBufferAddress<u32>(0); // Keep aligned version for sse2
|
||||||
|
}
|
||||||
|
|
||||||
|
while (clutsize_left > 0)
|
||||||
|
{
|
||||||
|
#ifdef ZEROGS_SSE2
|
||||||
|
// only higher 16 bits of dword are valid
|
||||||
|
__m128i clut_0 = _mm_load_si128((__m128i*)clut);
|
||||||
|
__m128i clut_1 = _mm_load_si128((__m128i*)clut+1);
|
||||||
|
__m128i clut_2 = _mm_load_si128((__m128i*)clut+2);
|
||||||
|
__m128i clut_3 = _mm_load_si128((__m128i*)clut+3);
|
||||||
|
|
||||||
|
clut_0 = _mm_shufflelo_epi16(clut_0, 0xD8);
|
||||||
|
clut_1 = _mm_shufflelo_epi16(clut_1, 0xD8);
|
||||||
|
clut_2 = _mm_shufflelo_epi16(clut_2, 0xD8);
|
||||||
|
clut_3 = _mm_shufflelo_epi16(clut_3, 0xD8);
|
||||||
|
|
||||||
|
clut_0 = _mm_shufflehi_epi16(clut_0, 0x63); // - - 3 2 1 0 - -
|
||||||
|
clut_1 = _mm_shufflehi_epi16(clut_1, 0x63);
|
||||||
|
clut_2 = _mm_shufflehi_epi16(clut_2, 0x63);
|
||||||
|
clut_3 = _mm_shufflehi_epi16(clut_3, 0x63);
|
||||||
|
|
||||||
|
clut_0 = _mm_srli_si128(clut_0, 4);
|
||||||
|
clut_1 = _mm_srli_si128(clut_1, 4);
|
||||||
|
clut_2 = _mm_srli_si128(clut_2, 4);
|
||||||
|
clut_3 = _mm_srli_si128(clut_3, 4);
|
||||||
|
|
||||||
|
_mm_store_si128((__m128i*)dst, _mm_unpacklo_epi64(clut_0, clut_1));
|
||||||
|
_mm_store_si128((__m128i*)dst+1, _mm_unpacklo_epi64(clut_2, clut_3));
|
||||||
|
#else
|
||||||
|
// Note +1 because we change higher 16 bits
|
||||||
|
for(int i = 0; i < 16; ++i)
|
||||||
|
dst[i] = clut[2*i];
|
||||||
|
#endif
|
||||||
|
|
||||||
|
dst += 16;
|
||||||
|
clut += 32;
|
||||||
|
clutsize_left -= 32;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* *****************************************************************
|
/* *****************************************************************
|
||||||
* Compare: Clut buffer <-> Local Memory
|
* Compare: Clut buffer <-> Local Memory
|
||||||
* *****************************************************************/
|
* *****************************************************************/
|
||||||
|
// false -> identical
|
||||||
|
// true -> different
|
||||||
template <class T>
|
template <class T>
|
||||||
__forceinline bool Cmp_ClutBuffer_GSMem(T* GSmem, u32 csa, u32 clutsize);
|
__forceinline bool Cmp_ClutBuffer_GSMem(T* GSmem, u32 csa, u32 clutsize);
|
||||||
|
|
||||||
|
@ -563,17 +643,17 @@ __forceinline bool Cmp_ClutBuffer_GSMem<u32>(u32* GSmem, u32 csa, u32 clutsize)
|
||||||
_GSmem += 32;
|
_GSmem += 32;
|
||||||
|
|
||||||
// go back to the previous memory block then down one memory column
|
// go back to the previous memory block then down one memory column
|
||||||
if (clutsize & 0x10) {
|
if (clutsize & 0x40) {
|
||||||
_GSmem -= (64-8);
|
_GSmem -= (64-8);
|
||||||
}
|
}
|
||||||
// In case previous operation (down one column) cross the block boundary
|
// In case previous operation (down one column) cross the block boundary
|
||||||
// Go to the next block
|
// Go to the next block
|
||||||
if (clutsize == 0x90) {
|
if (clutsize == 0x240) {
|
||||||
_GSmem += 32;
|
_GSmem += 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
clut += 8;
|
clut += 8;
|
||||||
clutsize -= 16;
|
clutsize -= 64;
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
@ -589,59 +669,120 @@ __forceinline bool Cmp_ClutBuffer_GSMem<u16>(u16* GSmem, u32 csa, u32 clutsize)
|
||||||
/* *****************************************************************
|
/* *****************************************************************
|
||||||
* Compare: Clut buffer <-> local C array (linear)
|
* Compare: Clut buffer <-> local C array (linear)
|
||||||
* *****************************************************************/
|
* *****************************************************************/
|
||||||
|
// false -> identical
|
||||||
|
// true -> different
|
||||||
template <class T>
|
template <class T>
|
||||||
__forceinline bool Cmp_ClutBuffer_SavedClut(T* saved_clut, T* clut, u32 clutsize);
|
__forceinline bool Cmp_ClutBuffer_SavedClut(T* saved_clut, u32 csa, u32 clutsize);
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
__forceinline bool Cmp_ClutBuffer_SavedClut<u32>(u32* saved_clut, u32* clut, u32 clutsize)
|
__forceinline bool Cmp_ClutBuffer_SavedClut<u32>(u32* saved_clut, u32 csa, u32 clutsize)
|
||||||
{
|
{
|
||||||
|
u32* clut = GetClutBufferAddress<u32>(csa);
|
||||||
return memcmp_mmx(saved_clut, clut, clutsize);
|
return memcmp_mmx(saved_clut, clut, clutsize);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
__forceinline bool Cmp_ClutBuffer_SavedClut<u16>(u16* saved_clut, u16* clut, u32 clutsize)
|
__forceinline bool Cmp_ClutBuffer_SavedClut<u16>(u16* saved_clut, u32 csa, u32 clutsize)
|
||||||
{
|
{
|
||||||
assert((clutsize&31) == 0);
|
assert((clutsize&31) == 0);
|
||||||
|
|
||||||
// left > 0 only when csa < 16
|
#ifdef ZEROGS_SSE2
|
||||||
int left = 0;
|
__m128i zero_128 = _mm_setzero_si128();
|
||||||
if (((u32)clut & 2) == 0)
|
#endif
|
||||||
{
|
u16* clut = (u16*)GetClutBufferAddress<u32>(csa); // Keep aligned version for sse2
|
||||||
left = (((u32)clut & 0x3ff) / 2) + clutsize - 512;
|
|
||||||
clutsize -= left;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (clutsize > 0)
|
// which side to cmp
|
||||||
{
|
u32 clutsize_right;
|
||||||
|
u32 clutsize_left;
|
||||||
|
if (csa < 16) {
|
||||||
|
clutsize_right = min(clutsize, (16-csa)*64);
|
||||||
|
clutsize_left = clutsize - clutsize_right;
|
||||||
|
} else {
|
||||||
|
clutsize_right = 0;
|
||||||
|
clutsize_left = clutsize;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (clutsize_right > 0)
|
||||||
|
{
|
||||||
|
#ifdef ZEROGS_SSE2
|
||||||
|
// only lower 16 bits of dword are valid
|
||||||
|
__m128i clut_0 = _mm_load_si128((__m128i*)clut);
|
||||||
|
__m128i clut_1 = _mm_load_si128((__m128i*)clut+1);
|
||||||
|
__m128i clut_2 = _mm_load_si128((__m128i*)clut+2);
|
||||||
|
__m128i clut_3 = _mm_load_si128((__m128i*)clut+3);
|
||||||
|
|
||||||
|
// value must converted to 32 bits
|
||||||
|
__m128i saved_clut_0 = _mm_load_si128((__m128i*)saved_clut);
|
||||||
|
__m128i saved_clut_1 = _mm_load_si128((__m128i*)saved_clut+1);
|
||||||
|
|
||||||
|
__m128i result = _mm_cmpeq_epi16(_mm_unpacklo_epi16(saved_clut_0, zero_128), clut_0);
|
||||||
|
__m128i result_tmp = _mm_cmpeq_epi16(_mm_unpackhi_epi16(saved_clut_0, zero_128), clut_1);
|
||||||
|
result = _mm_and_si128(result, result_tmp);
|
||||||
|
|
||||||
|
result_tmp = _mm_cmpeq_epi16(_mm_unpacklo_epi16(saved_clut_1, zero_128), clut_2);
|
||||||
|
result = _mm_and_si128(result, result_tmp);
|
||||||
|
|
||||||
|
result_tmp = _mm_cmpeq_epi16(_mm_unpackhi_epi16(saved_clut_1, zero_128), clut_3);
|
||||||
|
result = _mm_and_si128(result, result_tmp);
|
||||||
|
|
||||||
|
u32 result_int = _mm_movemask_epi8(result);
|
||||||
|
// only lower 16bits must be checked
|
||||||
|
if ((result_int&0x3333) != 0x3333)
|
||||||
|
return true;
|
||||||
|
#else
|
||||||
for (int i = 0; i < 16; ++i)
|
for (int i = 0; i < 16; ++i)
|
||||||
{
|
if (saved_clut[i] != clut[2*i]) return true;
|
||||||
if (saved_clut[i] != clut[2*i]) return 1;
|
#endif
|
||||||
}
|
|
||||||
|
|
||||||
clutsize -= 32;
|
|
||||||
saved_clut += 16;
|
saved_clut += 16;
|
||||||
clut += 32;
|
clut += 32;
|
||||||
|
clutsize_right -= 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (left > 0)
|
if(csa < 16) {
|
||||||
{
|
// go back to the base before processing left clut column
|
||||||
clut = (u16*)(g_pbyGSClut + 2);
|
clut = (u16*)GetClutBufferAddress<u32>(0); // Keep aligned version for sse2
|
||||||
|
}
|
||||||
|
|
||||||
while (left > 0)
|
while (clutsize_left > 0)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 16; ++i)
|
#ifdef ZEROGS_SSE2
|
||||||
{
|
// only higher 16 bits of dword are valid
|
||||||
if (saved_clut[i] != clut[2*i]) return 1;
|
__m128i clut_0 = _mm_load_si128((__m128i*)clut);
|
||||||
}
|
__m128i clut_1 = _mm_load_si128((__m128i*)clut+1);
|
||||||
|
__m128i clut_2 = _mm_load_si128((__m128i*)clut+2);
|
||||||
|
__m128i clut_3 = _mm_load_si128((__m128i*)clut+3);
|
||||||
|
|
||||||
left -= 32;
|
// value must converted to 32 bits (with 0 in lower 16 bits)
|
||||||
|
__m128i saved_clut_0 = _mm_load_si128((__m128i*)saved_clut);
|
||||||
|
__m128i saved_clut_1 = _mm_load_si128((__m128i*)saved_clut+1);
|
||||||
|
|
||||||
saved_clut += 16;
|
__m128i result = _mm_cmpeq_epi16(_mm_unpacklo_epi16(zero_128, saved_clut_0), clut_0);
|
||||||
clut += 32;
|
__m128i result_tmp = _mm_cmpeq_epi16(_mm_unpackhi_epi16(zero_128, saved_clut_0), clut_1);
|
||||||
}
|
result = _mm_and_si128(result, result_tmp);
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
result_tmp = _mm_cmpeq_epi16(_mm_unpacklo_epi16(zero_128, saved_clut_1), clut_2);
|
||||||
|
result = _mm_and_si128(result, result_tmp);
|
||||||
|
|
||||||
|
result_tmp = _mm_cmpeq_epi16(_mm_unpackhi_epi16(zero_128, saved_clut_1), clut_3);
|
||||||
|
result = _mm_and_si128(result, result_tmp);
|
||||||
|
|
||||||
|
u32 result_int = _mm_movemask_epi8(result);
|
||||||
|
// only higher 16bits must be checked
|
||||||
|
if ((result_int&0xCCCC) != 0xCCCC)
|
||||||
|
return true;
|
||||||
|
#else
|
||||||
|
// Note +1 because we change higher 16 bits
|
||||||
|
for (int i = 0; i < 16; ++i)
|
||||||
|
if (saved_clut[i] != clut[2*i+1]) return true;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
saved_clut += 16;
|
||||||
|
clut += 32;
|
||||||
|
clutsize_left -= 32;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -653,7 +794,6 @@ __forceinline bool Cmp_ClutBuffer_SavedClut<u16>(u16* saved_clut, u16* clut, u32
|
||||||
template <class T>
|
template <class T>
|
||||||
__forceinline void Build_Clut_Texture(u32 psm, u32 height, T* pclut, u8* psrc, T* pdst)
|
__forceinline void Build_Clut_Texture(u32 psm, u32 height, T* pclut, u8* psrc, T* pdst)
|
||||||
{
|
{
|
||||||
ZZLog::Error_Log("Build clut texture");
|
|
||||||
switch (psm)
|
switch (psm)
|
||||||
{
|
{
|
||||||
case PSMT8:
|
case PSMT8:
|
||||||
|
|
|
@ -21,10 +21,10 @@
|
||||||
#define CLUT_H_INCLUDED
|
#define CLUT_H_INCLUDED
|
||||||
|
|
||||||
extern void GSMem_to_ClutBuffer(tex0Info &tex0);
|
extern void GSMem_to_ClutBuffer(tex0Info &tex0);
|
||||||
template <class T> extern void ClutBuffer_to_Array(T* dst, T* clut, u32 clutsize);
|
template <class T> extern void ClutBuffer_to_Array(T* dst, u32 csa, u32 clutsize);
|
||||||
template <class T> extern void Build_Clut_Texture(u32 psm, u32 height, T* pclut, u8* psrc, T* pdst);
|
template <class T> extern void Build_Clut_Texture(u32 psm, u32 height, T* pclut, u8* psrc, T* pdst);
|
||||||
|
|
||||||
template <class T> extern bool Cmp_ClutBuffer_GSMem(T* GSmem, u32 csa, u32 clutsize);
|
template <class T> extern bool Cmp_ClutBuffer_GSMem(T* GSmem, u32 csa, u32 clutsize);
|
||||||
template <class T> extern bool Cmp_ClutBuffer_SavedClut(T* saved_clut, T* clut, u32 clutsize);
|
template <class T> extern bool Cmp_ClutBuffer_SavedClut(T* saved_clut, u32 csa, u32 clutsize);
|
||||||
|
|
||||||
#endif // CLUT_H_INCLUDED
|
#endif // CLUT_H_INCLUDED
|
||||||
|
|
|
@ -1731,7 +1731,7 @@ inline list<CMemoryTarget>::iterator CMemoryTargetMngr::DestroyTargetIter(list<C
|
||||||
// Not same format -> 1
|
// Not same format -> 1
|
||||||
// Same format, not same data (clut only) -> 2
|
// Same format, not same data (clut only) -> 2
|
||||||
// identical -> 0
|
// identical -> 0
|
||||||
int CMemoryTargetMngr::CompareTarget(list<CMemoryTarget>::iterator& it, const tex0Info& tex0, int clutsize, int nClutOffset)
|
int CMemoryTargetMngr::CompareTarget(list<CMemoryTarget>::iterator& it, const tex0Info& tex0, int clutsize)
|
||||||
{
|
{
|
||||||
if (PSMT_ISCLUT(it->psm) != PSMT_ISCLUT(tex0.psm))
|
if (PSMT_ISCLUT(it->psm) != PSMT_ISCLUT(tex0.psm))
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -1743,10 +1743,10 @@ int CMemoryTargetMngr::CompareTarget(list<CMemoryTarget>::iterator& it, const te
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
if (PSMT_IS32BIT(tex0.cpsm)) {
|
if (PSMT_IS32BIT(tex0.cpsm)) {
|
||||||
if (Cmp_ClutBuffer_SavedClut<u32>((u32*)&it->clut[0], (u32*)(g_pbyGSClut + nClutOffset), clutsize))
|
if (Cmp_ClutBuffer_SavedClut<u32>((u32*)&it->clut[0], tex0.csa, clutsize))
|
||||||
return 2;
|
return 2;
|
||||||
} else {
|
} else {
|
||||||
if (Cmp_ClutBuffer_SavedClut<u16>((u16*)&it->clut[0], (u16*)(g_pbyGSClut + nClutOffset), clutsize))
|
if (Cmp_ClutBuffer_SavedClut<u16>((u16*)&it->clut[0], tex0.csa, clutsize))
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1758,9 +1758,8 @@ int CMemoryTargetMngr::CompareTarget(list<CMemoryTarget>::iterator& it, const te
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CMemoryTargetMngr::GetClutVariables(int& nClutOffset, int& clutsize, const tex0Info& tex0)
|
void CMemoryTargetMngr::GetClutVariables(int& clutsize, const tex0Info& tex0)
|
||||||
{
|
{
|
||||||
nClutOffset = 0;
|
|
||||||
clutsize = 0;
|
clutsize = 0;
|
||||||
|
|
||||||
if (PSMT_ISCLUT(tex0.psm))
|
if (PSMT_ISCLUT(tex0.psm))
|
||||||
|
@ -1768,15 +1767,9 @@ void CMemoryTargetMngr::GetClutVariables(int& nClutOffset, int& clutsize, const
|
||||||
int entries = PSMT_IS8CLUT(tex0.psm) ? 256 : 16;
|
int entries = PSMT_IS8CLUT(tex0.psm) ? 256 : 16;
|
||||||
|
|
||||||
if (PSMT_IS32BIT(tex0.cpsm))
|
if (PSMT_IS32BIT(tex0.cpsm))
|
||||||
{
|
|
||||||
nClutOffset = 64 * tex0.csa;
|
|
||||||
clutsize = min(entries, 256 - tex0.csa * 16) * 4;
|
clutsize = min(entries, 256 - tex0.csa * 16) * 4;
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
|
||||||
nClutOffset = 64 * (tex0.csa & 15) + (tex0.csa >= 16 ? 2 : 0);
|
|
||||||
clutsize = min(entries, 512 - tex0.csa * 16) * 2;
|
clutsize = min(entries, 512 - tex0.csa * 16) * 2;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1793,7 +1786,7 @@ void CMemoryTargetMngr::GetMemAddress(int& start, int& end, const tex0Info& tex
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
CMemoryTarget* CMemoryTargetMngr::SearchExistTarget(int start, int end, int nClutOffset, int clutsize, const tex0Info& tex0, int forcevalidate)
|
CMemoryTarget* CMemoryTargetMngr::SearchExistTarget(int start, int end, int clutsize, const tex0Info& tex0, int forcevalidate)
|
||||||
{
|
{
|
||||||
for (list<CMemoryTarget>::iterator it = listTargets.begin(); it != listTargets.end();)
|
for (list<CMemoryTarget>::iterator it = listTargets.begin(); it != listTargets.end();)
|
||||||
{
|
{
|
||||||
|
@ -1801,7 +1794,7 @@ CMemoryTarget* CMemoryTargetMngr::SearchExistTarget(int start, int end, int nClu
|
||||||
if (it->starty <= start && it->starty + it->height >= end)
|
if (it->starty <= start && it->starty + it->height >= end)
|
||||||
{
|
{
|
||||||
|
|
||||||
int res = CompareTarget(it, tex0, clutsize, nClutOffset);
|
int res = CompareTarget(it, tex0, clutsize);
|
||||||
|
|
||||||
if (res == 1)
|
if (res == 1)
|
||||||
{
|
{
|
||||||
|
@ -1905,12 +1898,12 @@ CMemoryTarget* CMemoryTargetMngr::ClearedTargetsSearch(int fmt, int widthmult, i
|
||||||
CMemoryTarget* CMemoryTargetMngr::GetMemoryTarget(const tex0Info& tex0, int forcevalidate)
|
CMemoryTarget* CMemoryTargetMngr::GetMemoryTarget(const tex0Info& tex0, int forcevalidate)
|
||||||
{
|
{
|
||||||
FUNCLOG
|
FUNCLOG
|
||||||
int start, end, nClutOffset, clutsize;
|
int start, end, clutsize;
|
||||||
|
|
||||||
GetClutVariables(nClutOffset, clutsize, tex0);
|
GetClutVariables(clutsize, tex0);
|
||||||
GetMemAddress(start, end, tex0);
|
GetMemAddress(start, end, tex0);
|
||||||
|
|
||||||
CMemoryTarget* it = SearchExistTarget(start, end, nClutOffset, clutsize, tex0, forcevalidate);
|
CMemoryTarget* it = SearchExistTarget(start, end, clutsize, tex0, forcevalidate);
|
||||||
|
|
||||||
if (it != NULL) return it;
|
if (it != NULL) return it;
|
||||||
|
|
||||||
|
@ -2006,13 +1999,13 @@ CMemoryTarget* CMemoryTargetMngr::GetMemoryTarget(const tex0Info& tex0, int forc
|
||||||
if (PSMT_IS32BIT(tex0.cpsm))
|
if (PSMT_IS32BIT(tex0.cpsm))
|
||||||
{
|
{
|
||||||
u32* pclut = (u32*) & targ->clut[0];
|
u32* pclut = (u32*) & targ->clut[0];
|
||||||
ClutBuffer_to_Array<u32>(pclut, (u32*)(g_pbyGSClut + nClutOffset), clutsize);
|
ClutBuffer_to_Array<u32>(pclut, tex0.csa, clutsize);
|
||||||
Build_Clut_Texture<u32>(tex0.psm, targ->height, pclut, psrc, (u32*)ptexdata);
|
Build_Clut_Texture<u32>(tex0.psm, targ->height, pclut, psrc, (u32*)ptexdata);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
u16* pclut = (u16*) & targ->clut[0];
|
u16* pclut = (u16*) & targ->clut[0];
|
||||||
ClutBuffer_to_Array<u16>(pclut, (u16*)(g_pbyGSClut + nClutOffset), clutsize);
|
ClutBuffer_to_Array<u16>(pclut, tex0.csa, clutsize);
|
||||||
Build_Clut_Texture<u16>(tex0.psm, targ->height, pclut, psrc, (u16*)ptexdata);
|
Build_Clut_Texture<u16>(tex0.psm, targ->height, pclut, psrc, (u16*)ptexdata);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -440,9 +440,9 @@ class CMemoryTargetMngr
|
||||||
CMemoryTargetMngr() : curstamp(0) {}
|
CMemoryTargetMngr() : curstamp(0) {}
|
||||||
|
|
||||||
CMemoryTarget* GetMemoryTarget(const tex0Info& tex0, int forcevalidate); // pcbp is pointer to start of clut
|
CMemoryTarget* GetMemoryTarget(const tex0Info& tex0, int forcevalidate); // pcbp is pointer to start of clut
|
||||||
CMemoryTarget* SearchExistTarget(int start, int end, int nClutOffset, int clutsize, const tex0Info& tex0, int forcevalidate);
|
CMemoryTarget* SearchExistTarget(int start, int end, int clutsize, const tex0Info& tex0, int forcevalidate);
|
||||||
CMemoryTarget* ClearedTargetsSearch(int fmt, int widthmult, int channels, int height);
|
CMemoryTarget* ClearedTargetsSearch(int fmt, int widthmult, int channels, int height);
|
||||||
int CompareTarget(list<CMemoryTarget>::iterator& it, const tex0Info& tex0, int clutsize, int nClutOffset);
|
int CompareTarget(list<CMemoryTarget>::iterator& it, const tex0Info& tex0, int clutsize);
|
||||||
|
|
||||||
void Destroy(); // destroy all targs
|
void Destroy(); // destroy all targs
|
||||||
|
|
||||||
|
@ -455,7 +455,7 @@ class CMemoryTargetMngr
|
||||||
|
|
||||||
private:
|
private:
|
||||||
list<CMemoryTarget>::iterator DestroyTargetIter(list<CMemoryTarget>::iterator& it);
|
list<CMemoryTarget>::iterator DestroyTargetIter(list<CMemoryTarget>::iterator& it);
|
||||||
void GetClutVariables(int& nClutOffset, int& clutsize, const tex0Info& tex0);
|
void GetClutVariables(int& clutsize, const tex0Info& tex0);
|
||||||
void GetMemAddress(int& start, int& end, const tex0Info& tex0);
|
void GetMemAddress(int& start, int& end, const tex0Info& tex0);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -526,11 +526,11 @@ bool CheckChangeInClut(u32 highdword, u32 psm)
|
||||||
u8* GSMem = g_pbyGSMemory + cbp * 256;
|
u8* GSMem = g_pbyGSMemory + cbp * 256;
|
||||||
|
|
||||||
if (PSMT_IS32BIT(cpsm))
|
if (PSMT_IS32BIT(cpsm))
|
||||||
return Cmp_ClutBuffer_GSMem<u32>((u32*)GSMem, csa, entries);
|
return Cmp_ClutBuffer_GSMem<u32>((u32*)GSMem, csa, entries*4);
|
||||||
else {
|
else {
|
||||||
// Mana Khemia triggers this.
|
// Mana Khemia triggers this.
|
||||||
//ZZLog::Error_Log("16 bit clut not supported.");
|
//ZZLog::Error_Log("16 bit clut not supported.");
|
||||||
return Cmp_ClutBuffer_GSMem<u16>((u16*)GSMem, csa, entries);
|
return Cmp_ClutBuffer_GSMem<u16>((u16*)GSMem, csa, entries*2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue