GregMiscellaneous: zzogl-pg:

* Forgot to remove the dis-alignment


git-svn-id: http://pcsx2.googlecode.com/svn/branches/GregMiscellaneous@3922 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gregory.hainaut@gmail.com 2010-10-15 17:47:36 +00:00
parent e3a2569b53
commit fca1ca1e0a
3 changed files with 22 additions and 14 deletions

View File

@ -663,17 +663,19 @@ void __fastcall WriteCLUT_T16_I4_CSM1_core_sse2(u32* vm, u32* clut)
__m128i clut_mask = _mm_load_si128((__m128i*)s_clut_16bits_mask);
// Note:
// !HIGH_16BITS_VM
// CSA in 0-15 -> Replace lower 16 bits of clut0 with lower 16 bits of vm
// CSA in 16-31 -> Replace higher 16 bits of clut0 with lower 16 bits of vm
// CSA in 0-15
// Replace lower 16 bits of clut0 with lower 16 bits of vm
// CSA in 16-31
// Replace higher 16 bits of clut0 with lower 16 bits of vm
// HIGH_16BITS_VM
// CSA in 0-15 -> Replace lower 16 bits of clut0 with higher 16 bits of vm
// CSA in 16-31 -> Replace higher 16 bits of clut0 with higher 16 bits of vm
// CSA in 0-15
// Replace lower 16 bits of clut0 with higher 16 bits of vm
// CSA in 16-31
// Replace higher 16 bits of clut0 with higher 16 bits of vm
if(HIGH_16BITS_VM && CSA_0_15) {
// move high to low
// move up to low
vm_0 = _mm_load_si128((__m128i*)vm); // 9 8 1 0
vm_1 = _mm_load_si128((__m128i*)vm+1); // 11 10 3 2
vm_2 = _mm_load_si128((__m128i*)vm+2); // 13 12 5 4
@ -739,13 +741,13 @@ void __fastcall WriteCLUT_T16_I4_CSM1_core_sse2(u32* vm, u32* clut)
_mm_store_si128((__m128i*)clut+3, clut_3);
}
extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32* clut)
extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32 csa)
{
if ((u32)clut & 0x0F) {
// CSA 16-31 && low 16bits vm
u32* clut = (u32*)(g_pbyGSClut + 64*(csa & 15));
if (csa > 15) {
WriteCLUT_T16_I4_CSM1_core_sse2<false, false>(vm, clut);
} else {
// CSA 0-15 && low 16bits vm
WriteCLUT_T16_I4_CSM1_core_sse2<true, false>(vm, clut);
}
}

View File

@ -96,9 +96,11 @@ extern void __fastcall SwizzleColumn16_c(int y, u8* dst, u8* src, int srcpitch);
extern void __fastcall SwizzleColumn8_c(int y, u8* dst, u8* src, int srcpitch);
extern void __fastcall SwizzleColumn4_c(int y, u8* dst, u8* src, int srcpitch);
extern "C" void __fastcall WriteCLUT_T16_I8_CSM1_sse2(u32* vm, u32* clut);
// extern "C" void __fastcall WriteCLUT_T16_I8_CSM1_sse2(u32* vm, u32* clut);
extern "C" void __fastcall WriteCLUT_T16_I8_CSM1_sse2(u32* vm, u32 csa);
extern "C" void __fastcall WriteCLUT_T32_I8_CSM1_sse2(u32* vm, u32* clut);
extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32* clut);
// extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32* clut);
extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32 csa);
extern "C" void __fastcall WriteCLUT_T32_I4_CSM1_sse2(u32* vm, u32* clut);
extern void __fastcall WriteCLUT_T16_I8_CSM1_c(u32* vm, u32* clut);
extern void __fastcall WriteCLUT_T32_I8_CSM1_c(u32* vm, u32* clut);

View File

@ -1221,7 +1221,11 @@ void ZeroGS::texClutWrite(int ctx)
break;
default:
WriteCLUT_T16_I4_CSM1(src, (u32*)(g_pbyGSClut + 64*(tex0.csa & 15) + (tex0.csa >= 16 ? 2 : 0)));
#ifdef ZEROGS_SSE2
WriteCLUT_T16_I4_CSM1_sse2(src, tex0.csa);
#else
WriteCLUT_T16_I4_CSM1_c(src, (u32*)(g_pbyGSClut + 64*(tex0.csa & 15) + (tex0.csa >= 16 ? 2 : 0)));
#endif
break;
}
}