From 19536e286449c1b05e2701c490b777d1c0a5bfab Mon Sep 17 00:00:00 2001 From: arcum42 Date: Wed, 31 Mar 2010 12:27:36 +0000 Subject: [PATCH] zzogl-pg: Use the same version of some of the SSE2 WriteClut functions in both Linux and Windows. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2800 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/zzogl-pg/opengl/x86-32.S | 130 +++++++++++++++---------------- plugins/zzogl-pg/opengl/x86.cpp | 5 +- 2 files changed, 66 insertions(+), 69 deletions(-) diff --git a/plugins/zzogl-pg/opengl/x86-32.S b/plugins/zzogl-pg/opengl/x86-32.S index 8b3752d545..cb57d36c9a 100644 --- a/plugins/zzogl-pg/opengl/x86-32.S +++ b/plugins/zzogl-pg/opengl/x86-32.S @@ -932,71 +932,71 @@ WriteCLUT_T16_I4_CSM1_End: ret -.globl WriteCLUT_T32_I8_CSM1_sse2 - .type WriteCLUT_T32_I8_CSM1_sse2, @function -WriteCLUT_T32_I8_CSM1_sse2: - push %ebx - xor %ebx, %ebx -.L231: - xor %eax, %eax - .align 16 -.L232: - movdqa %xmm3, XMMWORD PTR [%eax+16+%ecx] - movdqa %xmm4, XMMWORD PTR [%eax+48+%ecx] - movdqa %xmm1, XMMWORD PTR [%eax+%ecx] - movdqa %xmm2, XMMWORD PTR [%eax+32+%ecx] - movdqa %xmm0, %xmm1 - punpckhqdq %xmm1, %xmm3 - punpcklqdq %xmm0, %xmm3 - movdqa XMMWORD PTR [%edx+32+%eax*2], %xmm1 - movdqa XMMWORD PTR [%edx+%eax*2], %xmm0 - movdqa %xmm0, %xmm2 - punpckhqdq %xmm2, %xmm4 - punpcklqdq %xmm0, %xmm4 - movdqa XMMWORD PTR [%edx+48+%eax*2], %xmm2 - movdqa XMMWORD PTR [%edx+16+%eax*2], %xmm0 - movdqa %xmm1, XMMWORD PTR [%eax+256+%ecx] - movdqa %xmm3, XMMWORD PTR [%eax+272+%ecx] - movdqa %xmm2, XMMWORD PTR [%eax+288+%ecx] - movdqa %xmm4, XMMWORD PTR [%eax+304+%ecx] - movdqa %xmm0, %xmm1 - punpckhqdq %xmm1, %xmm3 - punpcklqdq %xmm0, %xmm3 - movdqa XMMWORD PTR [%edx+96+%eax*2], %xmm1 - movdqa XMMWORD PTR [%edx+64+%eax*2], %xmm0 - movdqa %xmm0, %xmm2 - punpckhqdq %xmm2, %xmm4 - punpcklqdq %xmm0, %xmm4 - movdqa XMMWORD PTR [%edx+112+%eax*2], %xmm2 - movdqa XMMWORD PTR [%edx+80+%eax*2], %xmm0 - add %eax, 64 - cmp %eax, 256 - jne .L232 - add %edx, 512 - add %ecx, 512 - add %ebx, 512 - cmp %ebx, 1024 - jne .L231 - pop %ebx - ret +#.globl WriteCLUT_T32_I8_CSM1_sse2 +# .type WriteCLUT_T32_I8_CSM1_sse2, @function +#WriteCLUT_T32_I8_CSM1_sse2: +# push %ebx +# xor %ebx, %ebx +#.L231: +# xor %eax, %eax +# .align 16 +#.L232: +# movdqa %xmm3, XMMWORD PTR [%eax+16+%ecx] +# movdqa %xmm4, XMMWORD PTR [%eax+48+%ecx] +# movdqa %xmm1, XMMWORD PTR [%eax+%ecx] +# movdqa %xmm2, XMMWORD PTR [%eax+32+%ecx] +# movdqa %xmm0, %xmm1 +# punpckhqdq %xmm1, %xmm3 +# punpcklqdq %xmm0, %xmm3 +# movdqa XMMWORD PTR [%edx+32+%eax*2], %xmm1 +# movdqa XMMWORD PTR [%edx+%eax*2], %xmm0 +# movdqa %xmm0, %xmm2 +# punpckhqdq %xmm2, %xmm4 +# punpcklqdq %xmm0, %xmm4 +# movdqa XMMWORD PTR [%edx+48+%eax*2], %xmm2 +# movdqa XMMWORD PTR [%edx+16+%eax*2], %xmm0 +# movdqa %xmm1, XMMWORD PTR [%eax+256+%ecx] +# movdqa %xmm3, XMMWORD PTR [%eax+272+%ecx] +# movdqa %xmm2, XMMWORD PTR [%eax+288+%ecx] +# movdqa %xmm4, XMMWORD PTR [%eax+304+%ecx] +# movdqa %xmm0, %xmm1 +# punpckhqdq %xmm1, %xmm3 +# punpcklqdq %xmm0, %xmm3 +# movdqa XMMWORD PTR [%edx+96+%eax*2], %xmm1 +# movdqa XMMWORD PTR [%edx+64+%eax*2], %xmm0 +# movdqa %xmm0, %xmm2 +# punpckhqdq %xmm2, %xmm4 +# punpcklqdq %xmm0, %xmm4 +# movdqa XMMWORD PTR [%edx+112+%eax*2], %xmm2 +# movdqa XMMWORD PTR [%edx+80+%eax*2], %xmm0 +# add %eax, 64 +# cmp %eax, 256 +# jne .L232 +# add %edx, 512 +# add %ecx, 512 +# add %ebx, 512 +# cmp %ebx, 1024 +# jne .L231 +# pop %ebx +# ret -.globl WriteCLUT_T32_I4_CSM1_sse2 - .type WriteCLUT_T32_I4_CSM1_sse2, @function -WriteCLUT_T32_I4_CSM1_sse2: - movdqa %xmm1, XMMWORD PTR [%ecx] - movdqa %xmm3, XMMWORD PTR [%ecx+16] - movdqa %xmm2, XMMWORD PTR [%ecx+32] - movdqa %xmm4, XMMWORD PTR [%ecx+48] - movdqa %xmm0, %xmm1 - punpckhqdq %xmm1, %xmm3 - punpcklqdq %xmm0, %xmm3 - movdqa XMMWORD PTR [%edx+32], %xmm1 - movdqa XMMWORD PTR [%edx], %xmm0 - movdqa %xmm0, %xmm2 - punpckhqdq %xmm2, %xmm4 - punpcklqdq %xmm0, %xmm4 - movdqa XMMWORD PTR [%edx+48], %xmm2 - movdqa XMMWORD PTR [%edx+16], %xmm0 - ret +#.globl WriteCLUT_T32_I4_CSM1_sse2 +# .type WriteCLUT_T32_I4_CSM1_sse2, @function +#WriteCLUT_T32_I4_CSM1_sse2: +# movdqa %xmm1, XMMWORD PTR [%ecx] +# movdqa %xmm3, XMMWORD PTR [%ecx+16] +# movdqa %xmm2, XMMWORD PTR [%ecx+32] +# movdqa %xmm4, XMMWORD PTR [%ecx+48] +# movdqa %xmm0, %xmm1 +# punpckhqdq %xmm1, %xmm3 +# punpcklqdq %xmm0, %xmm3 +# movdqa XMMWORD PTR [%edx+32], %xmm1 +# movdqa XMMWORD PTR [%edx], %xmm0 +# movdqa %xmm0, %xmm2 +# punpckhqdq %xmm2, %xmm4 +# punpcklqdq %xmm0, %xmm4 +# movdqa XMMWORD PTR [%edx+48], %xmm2 +# movdqa XMMWORD PTR [%edx+16], %xmm0 +# ret #endif diff --git a/plugins/zzogl-pg/opengl/x86.cpp b/plugins/zzogl-pg/opengl/x86.cpp index 6003966f91..330163deb3 100644 --- a/plugins/zzogl-pg/opengl/x86.cpp +++ b/plugins/zzogl-pg/opengl/x86.cpp @@ -23,7 +23,7 @@ #include "Mem.h" #include "x86.h" -#if defined(ZEROGS_SSE2) && defined(_WIN32) +#if defined(ZEROGS_SSE2) #include #include #endif @@ -261,8 +261,6 @@ _FrameSwizzleBlock(A4_, (src[2*j]+src[2*j+1]+src[2*j+srcpitch]+src[2*j+srcpitch+ // } //} -#if defined(_WIN32) - extern "C" void __fastcall WriteCLUT_T32_I8_CSM1_sse2(u32* vm, u32* clut) { __m128i* src = (__m128i*)vm; @@ -310,7 +308,6 @@ extern "C" void __fastcall WriteCLUT_T32_I4_CSM1_sse2(u32* vm, u32* clut) _mm_store_si128(&dst[2], _mm_unpackhi_epi64(r0, r1)); _mm_store_si128(&dst[3], _mm_unpackhi_epi64(r2, r3)); } -#endif #if defined(_MSC_VER)