zzogl-pg: Use the same version of some of the SSE2 WriteClut functions in both Linux and Windows.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2800 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
arcum42 2010-03-31 12:27:36 +00:00
parent fe8250967a
commit 19536e2864
2 changed files with 66 additions and 69 deletions

View File

@ -932,71 +932,71 @@ WriteCLUT_T16_I4_CSM1_End:
ret ret
.globl WriteCLUT_T32_I8_CSM1_sse2 #.globl WriteCLUT_T32_I8_CSM1_sse2
.type WriteCLUT_T32_I8_CSM1_sse2, @function # .type WriteCLUT_T32_I8_CSM1_sse2, @function
WriteCLUT_T32_I8_CSM1_sse2: #WriteCLUT_T32_I8_CSM1_sse2:
push %ebx # push %ebx
xor %ebx, %ebx # xor %ebx, %ebx
.L231: #.L231:
xor %eax, %eax # xor %eax, %eax
.align 16 # .align 16
.L232: #.L232:
movdqa %xmm3, XMMWORD PTR [%eax+16+%ecx] # movdqa %xmm3, XMMWORD PTR [%eax+16+%ecx]
movdqa %xmm4, XMMWORD PTR [%eax+48+%ecx] # movdqa %xmm4, XMMWORD PTR [%eax+48+%ecx]
movdqa %xmm1, XMMWORD PTR [%eax+%ecx] # movdqa %xmm1, XMMWORD PTR [%eax+%ecx]
movdqa %xmm2, XMMWORD PTR [%eax+32+%ecx] # movdqa %xmm2, XMMWORD PTR [%eax+32+%ecx]
movdqa %xmm0, %xmm1 # movdqa %xmm0, %xmm1
punpckhqdq %xmm1, %xmm3 # punpckhqdq %xmm1, %xmm3
punpcklqdq %xmm0, %xmm3 # punpcklqdq %xmm0, %xmm3
movdqa XMMWORD PTR [%edx+32+%eax*2], %xmm1 # movdqa XMMWORD PTR [%edx+32+%eax*2], %xmm1
movdqa XMMWORD PTR [%edx+%eax*2], %xmm0 # movdqa XMMWORD PTR [%edx+%eax*2], %xmm0
movdqa %xmm0, %xmm2 # movdqa %xmm0, %xmm2
punpckhqdq %xmm2, %xmm4 # punpckhqdq %xmm2, %xmm4
punpcklqdq %xmm0, %xmm4 # punpcklqdq %xmm0, %xmm4
movdqa XMMWORD PTR [%edx+48+%eax*2], %xmm2 # movdqa XMMWORD PTR [%edx+48+%eax*2], %xmm2
movdqa XMMWORD PTR [%edx+16+%eax*2], %xmm0 # movdqa XMMWORD PTR [%edx+16+%eax*2], %xmm0
movdqa %xmm1, XMMWORD PTR [%eax+256+%ecx] # movdqa %xmm1, XMMWORD PTR [%eax+256+%ecx]
movdqa %xmm3, XMMWORD PTR [%eax+272+%ecx] # movdqa %xmm3, XMMWORD PTR [%eax+272+%ecx]
movdqa %xmm2, XMMWORD PTR [%eax+288+%ecx] # movdqa %xmm2, XMMWORD PTR [%eax+288+%ecx]
movdqa %xmm4, XMMWORD PTR [%eax+304+%ecx] # movdqa %xmm4, XMMWORD PTR [%eax+304+%ecx]
movdqa %xmm0, %xmm1 # movdqa %xmm0, %xmm1
punpckhqdq %xmm1, %xmm3 # punpckhqdq %xmm1, %xmm3
punpcklqdq %xmm0, %xmm3 # punpcklqdq %xmm0, %xmm3
movdqa XMMWORD PTR [%edx+96+%eax*2], %xmm1 # movdqa XMMWORD PTR [%edx+96+%eax*2], %xmm1
movdqa XMMWORD PTR [%edx+64+%eax*2], %xmm0 # movdqa XMMWORD PTR [%edx+64+%eax*2], %xmm0
movdqa %xmm0, %xmm2 # movdqa %xmm0, %xmm2
punpckhqdq %xmm2, %xmm4 # punpckhqdq %xmm2, %xmm4
punpcklqdq %xmm0, %xmm4 # punpcklqdq %xmm0, %xmm4
movdqa XMMWORD PTR [%edx+112+%eax*2], %xmm2 # movdqa XMMWORD PTR [%edx+112+%eax*2], %xmm2
movdqa XMMWORD PTR [%edx+80+%eax*2], %xmm0 # movdqa XMMWORD PTR [%edx+80+%eax*2], %xmm0
add %eax, 64 # add %eax, 64
cmp %eax, 256 # cmp %eax, 256
jne .L232 # jne .L232
add %edx, 512 # add %edx, 512
add %ecx, 512 # add %ecx, 512
add %ebx, 512 # add %ebx, 512
cmp %ebx, 1024 # cmp %ebx, 1024
jne .L231 # jne .L231
pop %ebx # pop %ebx
ret # ret
.globl WriteCLUT_T32_I4_CSM1_sse2 #.globl WriteCLUT_T32_I4_CSM1_sse2
.type WriteCLUT_T32_I4_CSM1_sse2, @function # .type WriteCLUT_T32_I4_CSM1_sse2, @function
WriteCLUT_T32_I4_CSM1_sse2: #WriteCLUT_T32_I4_CSM1_sse2:
movdqa %xmm1, XMMWORD PTR [%ecx] # movdqa %xmm1, XMMWORD PTR [%ecx]
movdqa %xmm3, XMMWORD PTR [%ecx+16] # movdqa %xmm3, XMMWORD PTR [%ecx+16]
movdqa %xmm2, XMMWORD PTR [%ecx+32] # movdqa %xmm2, XMMWORD PTR [%ecx+32]
movdqa %xmm4, XMMWORD PTR [%ecx+48] # movdqa %xmm4, XMMWORD PTR [%ecx+48]
movdqa %xmm0, %xmm1 # movdqa %xmm0, %xmm1
punpckhqdq %xmm1, %xmm3 # punpckhqdq %xmm1, %xmm3
punpcklqdq %xmm0, %xmm3 # punpcklqdq %xmm0, %xmm3
movdqa XMMWORD PTR [%edx+32], %xmm1 # movdqa XMMWORD PTR [%edx+32], %xmm1
movdqa XMMWORD PTR [%edx], %xmm0 # movdqa XMMWORD PTR [%edx], %xmm0
movdqa %xmm0, %xmm2 # movdqa %xmm0, %xmm2
punpckhqdq %xmm2, %xmm4 # punpckhqdq %xmm2, %xmm4
punpcklqdq %xmm0, %xmm4 # punpcklqdq %xmm0, %xmm4
movdqa XMMWORD PTR [%edx+48], %xmm2 # movdqa XMMWORD PTR [%edx+48], %xmm2
movdqa XMMWORD PTR [%edx+16], %xmm0 # movdqa XMMWORD PTR [%edx+16], %xmm0
ret # ret
#endif #endif

View File

@ -23,7 +23,7 @@
#include "Mem.h" #include "Mem.h"
#include "x86.h" #include "x86.h"
#if defined(ZEROGS_SSE2) && defined(_WIN32) #if defined(ZEROGS_SSE2)
#include <xmmintrin.h> #include <xmmintrin.h>
#include <emmintrin.h> #include <emmintrin.h>
#endif #endif
@ -261,8 +261,6 @@ _FrameSwizzleBlock(A4_, (src[2*j]+src[2*j+1]+src[2*j+srcpitch]+src[2*j+srcpitch+
// } // }
//} //}
#if defined(_WIN32)
extern "C" void __fastcall WriteCLUT_T32_I8_CSM1_sse2(u32* vm, u32* clut) extern "C" void __fastcall WriteCLUT_T32_I8_CSM1_sse2(u32* vm, u32* clut)
{ {
__m128i* src = (__m128i*)vm; __m128i* src = (__m128i*)vm;
@ -310,7 +308,6 @@ extern "C" void __fastcall WriteCLUT_T32_I4_CSM1_sse2(u32* vm, u32* clut)
_mm_store_si128(&dst[2], _mm_unpackhi_epi64(r0, r1)); _mm_store_si128(&dst[2], _mm_unpackhi_epi64(r0, r1));
_mm_store_si128(&dst[3], _mm_unpackhi_epi64(r2, r3)); _mm_store_si128(&dst[3], _mm_unpackhi_epi64(r2, r3));
} }
#endif
#if defined(_MSC_VER) #if defined(_MSC_VER)