GregMiscellaneous:zzogl-pg:

* fix sse2 code for 16bits cluts.
* Fix debug build


git-svn-id: http://pcsx2.googlecode.com/svn/branches/GregMiscellaneous@3952 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gregory.hainaut@gmail.com 2010-10-21 11:13:49 +00:00
parent 0fc2e87809
commit d886dbfadb
4 changed files with 793 additions and 789 deletions

View File

@ -136,7 +136,7 @@ void GLWindow::GetWindowSize()
// update the gl buffer size
ChangeWindowSize(width, height);
ZZLog::Error_Log("Resolution %dx%d. Depth %d bpp. Position (%d,%d)", width, height, depth, conf.x, conf.y);
ZZLog::Dev_Log("Resolution %dx%d. Depth %d bpp. Position (%d,%d)", width, height, depth, conf.x, conf.y);
}
void GLWindow::GetGLXVersion()

File diff suppressed because it is too large Load Diff

View File

@ -173,10 +173,10 @@ __forceinline void GSMem_to_ClutBuffer__T16_I4_CSM1_core_sse2(u32* vm, u32* clut
}
// Unsizzle the data
__m128i row_0 = _mm_unpacklo_epi32(vm_0, vm_1); // 3 2 1 0
__m128i row_1 = _mm_unpacklo_epi32(vm_2, vm_3); // 7 6 5 4
__m128i row_2 = _mm_unpackhi_epi32(vm_0, vm_1); // 11 10 9 8
__m128i row_3 = _mm_unpackhi_epi32(vm_2, vm_3); // 15 14 13 12
__m128i row_0 = _mm_unpacklo_epi64(vm_0, vm_1); // 3 2 1 0
__m128i row_1 = _mm_unpacklo_epi64(vm_2, vm_3); // 7 6 5 4
__m128i row_2 = _mm_unpackhi_epi64(vm_0, vm_1); // 11 10 9 8
__m128i row_3 = _mm_unpackhi_epi64(vm_2, vm_3); // 15 14 13 12
// load old data & remove useless part
if(CSA_0_15) {
@ -241,6 +241,8 @@ __forceinline void GSMem_to_ClutBuffer__T16_I8_CSM1_sse2(u32* vm, u32 csa)
clut = GetClutBufferAddress<u32>(0); // Keep aligned version for sse2
GSMem_to_ClutBuffer__T16_I4_CSM1_core_sse2<false,true>(vm, clut);
clut += 16;
vm += 16; // go down one column
} else if(csa_right != 0) {
// go back to the base before processing left clut column
clut = GetClutBufferAddress<u32>(0); // Keep aligned version for sse2
@ -512,7 +514,7 @@ __forceinline void ClutBuffer_to_Array<u16>(u16* dst, u32 csa, u32 clutsize)
while (clutsize_right > 0)
{
#ifdef ZEROGS_SSE4
#ifdef ZEROGS_SSE2
// only lower 16 bits of dword are valid
__m128i clut_0 = _mm_load_si128((__m128i*)clut);
__m128i clut_1 = _mm_load_si128((__m128i*)clut+1);

View File

@ -2014,7 +2014,7 @@ CMemoryTarget* CMemoryTargetMngr::GetMemoryTarget(const tex0Info& tex0, int forc
Build_Clut_Texture<u16>(tex0.psm, targ->height, (u16*)targ->clut, psrc, (u16*)ptexdata);
}
assert(targ->clut.size() > 0);
assert(targ->clutsize > 0);
}
else
{
@ -2027,7 +2027,7 @@ CMemoryTarget* CMemoryTargetMngr::GetMemoryTarget(const tex0Info& tex0, int forc
u16* dst = (u16*)ptexdata;
u16* src = (u16*)(MemoryAddress(targ->realy));
#if defined(ZEROGS_SSE2)
#ifdef ZEROGS_SSE2
assert(((u32)(uptr)dst) % 16 == 0);
// FIXME Uncomment to test intrinsic versions (instead of asm)
// perf improvement vs asm:
@ -2830,6 +2830,7 @@ inline void Resolve_32_Bit(const void* psrc, int fbp, int fbw, int fbh, const in
static const __aligned16 unsigned int pixel_5b_mask[4] = {0x0000001F, 0x0000001F, 0x0000001F, 0x0000001F};
#ifdef ZEROGS_SSE2
// The function process 2*2 pixels in 32bits. And 2*4 pixels in 16bits
template <u32 psm, u32 size, u32 pageTable[size][64], bool null_second_line, u32 INDEX>
__forceinline void update_8pixels_sse2(u32* src, u32* basepage, u32 i_msk, u32 j, u32 pix_mask, u32 src_pitch)
@ -3141,6 +3142,7 @@ void Resolve_32_Bit_sse2(const void* psrc, int fbp, int fbw, int fbh, u32 fbm)
#endif
#endif
}
#endif
void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, bool mode = true)
{