GregMiscellaneous:zzogl-pg:

* fix sse2 code for 16bits cluts.
* Fix debug build


git-svn-id: http://pcsx2.googlecode.com/svn/branches/GregMiscellaneous@3952 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gregory.hainaut@gmail.com 2010-10-21 11:13:49 +00:00
parent 0fc2e87809
commit d886dbfadb
4 changed files with 793 additions and 789 deletions

View File

@ -136,7 +136,7 @@ void GLWindow::GetWindowSize()
// update the gl buffer size // update the gl buffer size
ChangeWindowSize(width, height); ChangeWindowSize(width, height);
ZZLog::Error_Log("Resolution %dx%d. Depth %d bpp. Position (%d,%d)", width, height, depth, conf.x, conf.y); ZZLog::Dev_Log("Resolution %dx%d. Depth %d bpp. Position (%d,%d)", width, height, depth, conf.x, conf.y);
} }
void GLWindow::GetGLXVersion() void GLWindow::GetGLXVersion()

File diff suppressed because it is too large Load Diff

View File

@ -173,10 +173,10 @@ __forceinline void GSMem_to_ClutBuffer__T16_I4_CSM1_core_sse2(u32* vm, u32* clut
} }
// Unsizzle the data // Unsizzle the data
__m128i row_0 = _mm_unpacklo_epi32(vm_0, vm_1); // 3 2 1 0 __m128i row_0 = _mm_unpacklo_epi64(vm_0, vm_1); // 3 2 1 0
__m128i row_1 = _mm_unpacklo_epi32(vm_2, vm_3); // 7 6 5 4 __m128i row_1 = _mm_unpacklo_epi64(vm_2, vm_3); // 7 6 5 4
__m128i row_2 = _mm_unpackhi_epi32(vm_0, vm_1); // 11 10 9 8 __m128i row_2 = _mm_unpackhi_epi64(vm_0, vm_1); // 11 10 9 8
__m128i row_3 = _mm_unpackhi_epi32(vm_2, vm_3); // 15 14 13 12 __m128i row_3 = _mm_unpackhi_epi64(vm_2, vm_3); // 15 14 13 12
// load old data & remove useless part // load old data & remove useless part
if(CSA_0_15) { if(CSA_0_15) {
@ -241,6 +241,8 @@ __forceinline void GSMem_to_ClutBuffer__T16_I8_CSM1_sse2(u32* vm, u32 csa)
clut = GetClutBufferAddress<u32>(0); // Keep aligned version for sse2 clut = GetClutBufferAddress<u32>(0); // Keep aligned version for sse2
GSMem_to_ClutBuffer__T16_I4_CSM1_core_sse2<false,true>(vm, clut); GSMem_to_ClutBuffer__T16_I4_CSM1_core_sse2<false,true>(vm, clut);
clut += 16;
vm += 16; // go down one column
} else if(csa_right != 0) { } else if(csa_right != 0) {
// go back to the base before processing left clut column // go back to the base before processing left clut column
clut = GetClutBufferAddress<u32>(0); // Keep aligned version for sse2 clut = GetClutBufferAddress<u32>(0); // Keep aligned version for sse2
@ -512,7 +514,7 @@ __forceinline void ClutBuffer_to_Array<u16>(u16* dst, u32 csa, u32 clutsize)
while (clutsize_right > 0) while (clutsize_right > 0)
{ {
#ifdef ZEROGS_SSE4 #ifdef ZEROGS_SSE2
// only lower 16 bits of dword are valid // only lower 16 bits of dword are valid
__m128i clut_0 = _mm_load_si128((__m128i*)clut); __m128i clut_0 = _mm_load_si128((__m128i*)clut);
__m128i clut_1 = _mm_load_si128((__m128i*)clut+1); __m128i clut_1 = _mm_load_si128((__m128i*)clut+1);

View File

@ -2014,7 +2014,7 @@ CMemoryTarget* CMemoryTargetMngr::GetMemoryTarget(const tex0Info& tex0, int forc
Build_Clut_Texture<u16>(tex0.psm, targ->height, (u16*)targ->clut, psrc, (u16*)ptexdata); Build_Clut_Texture<u16>(tex0.psm, targ->height, (u16*)targ->clut, psrc, (u16*)ptexdata);
} }
assert(targ->clut.size() > 0); assert(targ->clutsize > 0);
} }
else else
{ {
@ -2027,7 +2027,7 @@ CMemoryTarget* CMemoryTargetMngr::GetMemoryTarget(const tex0Info& tex0, int forc
u16* dst = (u16*)ptexdata; u16* dst = (u16*)ptexdata;
u16* src = (u16*)(MemoryAddress(targ->realy)); u16* src = (u16*)(MemoryAddress(targ->realy));
#if defined(ZEROGS_SSE2) #ifdef ZEROGS_SSE2
assert(((u32)(uptr)dst) % 16 == 0); assert(((u32)(uptr)dst) % 16 == 0);
// FIXME Uncomment to test intrinsic versions (instead of asm) // FIXME Uncomment to test intrinsic versions (instead of asm)
// perf improvement vs asm: // perf improvement vs asm:
@ -2830,6 +2830,7 @@ inline void Resolve_32_Bit(const void* psrc, int fbp, int fbw, int fbh, const in
static const __aligned16 unsigned int pixel_5b_mask[4] = {0x0000001F, 0x0000001F, 0x0000001F, 0x0000001F}; static const __aligned16 unsigned int pixel_5b_mask[4] = {0x0000001F, 0x0000001F, 0x0000001F, 0x0000001F};
#ifdef ZEROGS_SSE2
// The function process 2*2 pixels in 32bits. And 2*4 pixels in 16bits // The function process 2*2 pixels in 32bits. And 2*4 pixels in 16bits
template <u32 psm, u32 size, u32 pageTable[size][64], bool null_second_line, u32 INDEX> template <u32 psm, u32 size, u32 pageTable[size][64], bool null_second_line, u32 INDEX>
__forceinline void update_8pixels_sse2(u32* src, u32* basepage, u32 i_msk, u32 j, u32 pix_mask, u32 src_pitch) __forceinline void update_8pixels_sse2(u32* src, u32* basepage, u32 i_msk, u32 j, u32 pix_mask, u32 src_pitch)
@ -3141,6 +3142,7 @@ void Resolve_32_Bit_sse2(const void* psrc, int fbp, int fbw, int fbh, u32 fbm)
#endif #endif
#endif #endif
} }
#endif
void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, bool mode = true) void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, bool mode = true)
{ {