mirror of https://github.com/PCSX2/pcsx2.git
GregMiscellaneous:zzogl-pg:
* fix sse2 code for 16bits cluts. * Fix debug build git-svn-id: http://pcsx2.googlecode.com/svn/branches/GregMiscellaneous@3952 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
0fc2e87809
commit
d886dbfadb
|
@ -136,7 +136,7 @@ void GLWindow::GetWindowSize()
|
|||
// update the gl buffer size
|
||||
ChangeWindowSize(width, height);
|
||||
|
||||
ZZLog::Error_Log("Resolution %dx%d. Depth %d bpp. Position (%d,%d)", width, height, depth, conf.x, conf.y);
|
||||
ZZLog::Dev_Log("Resolution %dx%d. Depth %d bpp. Position (%d,%d)", width, height, depth, conf.x, conf.y);
|
||||
}
|
||||
|
||||
void GLWindow::GetGLXVersion()
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -173,10 +173,10 @@ __forceinline void GSMem_to_ClutBuffer__T16_I4_CSM1_core_sse2(u32* vm, u32* clut
|
|||
}
|
||||
|
||||
// Unsizzle the data
|
||||
__m128i row_0 = _mm_unpacklo_epi32(vm_0, vm_1); // 3 2 1 0
|
||||
__m128i row_1 = _mm_unpacklo_epi32(vm_2, vm_3); // 7 6 5 4
|
||||
__m128i row_2 = _mm_unpackhi_epi32(vm_0, vm_1); // 11 10 9 8
|
||||
__m128i row_3 = _mm_unpackhi_epi32(vm_2, vm_3); // 15 14 13 12
|
||||
__m128i row_0 = _mm_unpacklo_epi64(vm_0, vm_1); // 3 2 1 0
|
||||
__m128i row_1 = _mm_unpacklo_epi64(vm_2, vm_3); // 7 6 5 4
|
||||
__m128i row_2 = _mm_unpackhi_epi64(vm_0, vm_1); // 11 10 9 8
|
||||
__m128i row_3 = _mm_unpackhi_epi64(vm_2, vm_3); // 15 14 13 12
|
||||
|
||||
// load old data & remove useless part
|
||||
if(CSA_0_15) {
|
||||
|
@ -241,6 +241,8 @@ __forceinline void GSMem_to_ClutBuffer__T16_I8_CSM1_sse2(u32* vm, u32 csa)
|
|||
clut = GetClutBufferAddress<u32>(0); // Keep aligned version for sse2
|
||||
|
||||
GSMem_to_ClutBuffer__T16_I4_CSM1_core_sse2<false,true>(vm, clut);
|
||||
clut += 16;
|
||||
vm += 16; // go down one column
|
||||
} else if(csa_right != 0) {
|
||||
// go back to the base before processing left clut column
|
||||
clut = GetClutBufferAddress<u32>(0); // Keep aligned version for sse2
|
||||
|
@ -512,7 +514,7 @@ __forceinline void ClutBuffer_to_Array<u16>(u16* dst, u32 csa, u32 clutsize)
|
|||
|
||||
while (clutsize_right > 0)
|
||||
{
|
||||
#ifdef ZEROGS_SSE4
|
||||
#ifdef ZEROGS_SSE2
|
||||
// only lower 16 bits of dword are valid
|
||||
__m128i clut_0 = _mm_load_si128((__m128i*)clut);
|
||||
__m128i clut_1 = _mm_load_si128((__m128i*)clut+1);
|
||||
|
|
|
@ -2014,7 +2014,7 @@ CMemoryTarget* CMemoryTargetMngr::GetMemoryTarget(const tex0Info& tex0, int forc
|
|||
Build_Clut_Texture<u16>(tex0.psm, targ->height, (u16*)targ->clut, psrc, (u16*)ptexdata);
|
||||
}
|
||||
|
||||
assert(targ->clut.size() > 0);
|
||||
assert(targ->clutsize > 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -2027,7 +2027,7 @@ CMemoryTarget* CMemoryTargetMngr::GetMemoryTarget(const tex0Info& tex0, int forc
|
|||
u16* dst = (u16*)ptexdata;
|
||||
u16* src = (u16*)(MemoryAddress(targ->realy));
|
||||
|
||||
#if defined(ZEROGS_SSE2)
|
||||
#ifdef ZEROGS_SSE2
|
||||
assert(((u32)(uptr)dst) % 16 == 0);
|
||||
// FIXME Uncomment to test intrinsic versions (instead of asm)
|
||||
// perf improvement vs asm:
|
||||
|
@ -2830,6 +2830,7 @@ inline void Resolve_32_Bit(const void* psrc, int fbp, int fbw, int fbh, const in
|
|||
|
||||
static const __aligned16 unsigned int pixel_5b_mask[4] = {0x0000001F, 0x0000001F, 0x0000001F, 0x0000001F};
|
||||
|
||||
#ifdef ZEROGS_SSE2
|
||||
// The function process 2*2 pixels in 32bits. And 2*4 pixels in 16bits
|
||||
template <u32 psm, u32 size, u32 pageTable[size][64], bool null_second_line, u32 INDEX>
|
||||
__forceinline void update_8pixels_sse2(u32* src, u32* basepage, u32 i_msk, u32 j, u32 pix_mask, u32 src_pitch)
|
||||
|
@ -3141,6 +3142,7 @@ void Resolve_32_Bit_sse2(const void* psrc, int fbp, int fbw, int fbh, u32 fbm)
|
|||
#endif
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, bool mode = true)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue