Remove lots of evil %'s.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@693 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
arcum42 2009-03-05 21:35:26 +00:00
parent 8614bbd0f8
commit 3ae6ff0856
13 changed files with 479 additions and 477 deletions

View File

@ -231,7 +231,9 @@ public:
#ifndef __LINUX__
__asm mov eax, SendSimplePacket
#else
__asm ("mov %eax, SendSimplePacket");
__asm__ (".intel_syntax noprefix\n"
"mov eax, SendSimplePacket\n"
".att_syntax\n");
#endif
//return (uptr)&SendSimplePacket;
}

View File

@ -1,75 +1,75 @@
.intel_syntax
.intel_syntax noprefix
.extern g_pCurrentRoutine
.globl so_call
so_call:
mov %eax, dword ptr [%esp+4]
test dword ptr [%eax+24], 1
mov eax, dword ptr [esp+4]
test dword ptr [eax+24], 1
jnz RestoreRegs
mov [%eax+8], %ebx
mov [%eax+12], %esi
mov [%eax+16], %edi
mov [%eax+20], %ebp
mov dword ptr [%eax+24], 1
mov [eax+8], ebx
mov [eax+12], esi
mov [eax+16], edi
mov [eax+20], ebp
mov dword ptr [eax+24], 1
jmp CallFn
RestoreRegs:
// have to load and save at the same time
mov %ecx, [%eax+8]
mov %edx, [%eax+12]
mov [%eax+8], %ebx
mov [%eax+12], %esi
mov %ebx, %ecx
mov %esi, %edx
mov %ecx, [%eax+16]
mov %edx, [%eax+20]
mov [%eax+16], %edi
mov [%eax+20], %ebp
mov %edi, %ecx
mov %ebp, %edx
mov ecx, [eax+8]
mov edx, [eax+12]
mov [eax+8], ebx
mov [eax+12], esi
mov ebx, ecx
mov esi, edx
mov ecx, [eax+16]
mov edx, [eax+20]
mov [eax+16], edi
mov [eax+20], ebp
mov edi, ecx
mov ebp, edx
CallFn:
mov [g_pCurrentRoutine], %eax
mov %ecx, %esp
mov %esp, [%eax+4]
mov [%eax+4], %ecx
mov [g_pCurrentRoutine], eax
mov ecx, esp
mov esp, [eax+4]
mov [eax+4], ecx
jmp dword ptr [%eax]
jmp dword ptr [eax]
.globl so_resume
so_resume:
mov %eax, [g_pCurrentRoutine]
mov %ecx, [%eax+8]
mov %edx, [%eax+12]
mov [%eax+8], %ebx
mov [%eax+12], %esi
mov %ebx, %ecx
mov %esi, %edx
mov %ecx, [%eax+16]
mov %edx, [%eax+20]
mov [%eax+16], %edi
mov [%eax+20], %ebp
mov %edi, %ecx
mov %ebp, %edx
mov eax, [g_pCurrentRoutine]
mov ecx, [eax+8]
mov edx, [eax+12]
mov [eax+8], ebx
mov [eax+12], esi
mov ebx, ecx
mov esi, edx
mov ecx, [eax+16]
mov edx, [eax+20]
mov [eax+16], edi
mov [eax+20], ebp
mov edi, ecx
mov ebp, edx
// put the return address in pcalladdr
mov %ecx, [%esp]
mov [%eax], %ecx
add %esp, 4 // remove the return address
mov ecx, [esp]
mov [eax], ecx
add esp, 4 // remove the return address
// swap stack pointers
mov %ecx, [%eax+4]
mov [%eax+4], %esp
mov %esp, %ecx
mov ecx, [eax+4]
mov [eax+4], esp
mov esp, ecx
ret
.globl so_exit
so_exit:
mov %eax, [g_pCurrentRoutine]
mov %esp, [%eax+4]
mov %ebx, [%eax+8]
mov %esi, [%eax+12]
mov %edi, [%eax+16]
mov %ebp, [%eax+20]
mov eax, [g_pCurrentRoutine]
mov esp, [eax+4]
mov ebx, [eax+8]
mov esi, [eax+12]
mov edi, [eax+16]
mov ebp, [eax+20]
ret

View File

@ -129,15 +129,15 @@ namespace Threading
if( true ) //isMultiCore )
{
__asm__ __volatile__(
".intel_syntax\n"
"lock xadd [%0], %%eax\n"
".intel_syntax noprefix\n"
"lock xadd [%0], eax\n"
".att_syntax\n" : : "r"(Addend), "a"(Value) : "memory");
}
else
{
__asm__ __volatile__(
".intel_syntax\n"
"xadd [%0], %%eax\n"
".intel_syntax noprefix\n"
"xadd [%0], eax\n"
".att_syntax\n" : : "r"(Addend), "a"(Value) : "memory");
}
}
@ -175,8 +175,8 @@ namespace Threading
__forceinline void pcsx2_InterlockedExchange64(volatile s64* Target, s64 Value)
{
__asm__ __volatile__(
".intel_syntax\n"
"lock xchg [%0], %%rax\n"
".intel_syntax noprefix\n"
"lock xchg [%0], rax\n"
".att_syntax\n" : : "r"(Target), "a"(Value) : "memory"
);
return 0;

View File

@ -107,10 +107,10 @@ static __forceinline void memset_8( void *dest )
case 3:
__asm__
(
".intel_syntax\n"
".intel_syntax noprefix\n"
"cld\n"
// "mov %edi, %0\n"
// "mov %eax, %1\n"
// "mov edi, %0\n"
// "mov eax, %1\n"
"stosd\n"
"stosd\n"
"stosd\n"
@ -125,10 +125,10 @@ static __forceinline void memset_8( void *dest )
case 4:
__asm__
(
".intel_syntax\n"
".intel_syntax noprefix\n"
"cld\n"
// "mov %edi, %0\n"
// "mov %eax, %1\n"
// "mov edi, %0\n"
// "mov eax, %1\n"
"stosd\n"
"stosd\n"
"stosd\n"
@ -144,10 +144,10 @@ static __forceinline void memset_8( void *dest )
case 5:
__asm__
(
".intel_syntax\n"
".intel_syntax noprefix\n"
"cld\n"
// "mov %edi, %0\n"
// "mov %eax, %1\n"
// "mov edi, %0\n"
// "mov eax, %1\n"
"stosd\n"
"stosd\n"
"stosd\n"
@ -164,7 +164,7 @@ static __forceinline void memset_8( void *dest )
default:
__asm__
(
".intel_syntax\n"
".intel_syntax noprefix\n"
"cld\n"
// "mov ecx, %0\n"
// "mov edi, %1\n"

View File

@ -504,15 +504,15 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
}
#else
if( VIFdmanum ) {
__asm__(".intel_syntax\n"
"movaps %%xmm6, xmmword ptr [%0]\n"
"movaps %%xmm7, xmmword ptr [%1]\n"
__asm__(".intel_syntax noprefix\n"
"movaps xmm6, xmmword ptr [%0]\n"
"movaps xmm7, xmmword ptr [%1]\n"
".att_syntax\n" : :"r"(g_vifRow1), "r"(g_vifCol1) );
}
else {
__asm__(".intel_syntax\n"
"movaps %%xmm6, xmmword ptr [%0]\n"
"movaps %%xmm7, xmmword ptr [%1]\n"
__asm__(".intel_syntax noprefix\n"
"movaps xmm6, xmmword ptr [%0]\n"
"movaps xmm7, xmmword ptr [%1]\n"
".att_syntax\n" : : "r"(g_vifRow0), "r"(g_vifCol0) );
}
#endif

View File

@ -1,6 +1,6 @@
// microVU.cpp assembly routines
// arcum42(@gmail.com)
.intel_syntax
.intel_syntax noprefix
.extern mVUexecuteVU0
.extern mVUexecuteVU1
@ -19,14 +19,14 @@ startVU0:
call mVUexecuteVU0
// backup cpu state
push %ebx
push %ebp
push %esi
push %edi
push ebx
push ebp
push esi
push edi
ldmxcsr g_sseVUMXCSR
// Should set xmmZ?
jmp %eax
jmp eax
// Runs VU1 for number of cycles
// void __fastcall startVU1(u32 startPC, u32 cycles)
@ -35,14 +35,14 @@ startVU01:
call mVUexecuteVU1
// backup cpu state
push %ebx
push %ebp
push %esi
push %edi
push ebx
push ebp
push esi
push edi
ldmxcsr g_sseVUMXCSR
jmp %eax
jmp eax
// Exit point
// void __fastcall endVU0(u32 startPC, u32 cycles)
@ -51,10 +51,10 @@ endVU0:
//call mVUcleanUpVU0
/*restore cpu state*/
pop %edi;
pop %esi;
pop %ebp;
pop %ebx;
pop edi;
pop esi;
pop ebp;
pop ebx;
ldmxcsr g_sseMXCSR

View File

@ -1,6 +1,6 @@
// iVUzerorec.cpp assembly routines
// zerofrog(@gmail.com)
.intel_syntax
.intel_syntax noprefix
.extern svudispfntemp
.extern s_TotalVUCycles
@ -17,35 +17,35 @@
// SuperVUExecuteProgram(u32 startpc, int vuindex)
.globl SuperVUExecuteProgram
SuperVUExecuteProgram:
mov %eax, [%esp]
mov eax, [esp]
mov dword ptr s_TotalVUCycles, 0
add %esp, 4
mov dword ptr [s_callstack], %eax
add esp, 4
mov dword ptr [s_callstack], eax
call SuperVUGetProgram
mov s_vu1ebp, %ebp
mov s_vu1esi, %esi
mov s_vuedi, %edi
mov s_vuebx, %ebx
mov s_vu1ebp, ebp
mov s_vu1esi, esi
mov s_vuedi, edi
mov s_vuebx, ebx
#ifdef _DEBUG
mov s_vu1esp, %esp
mov s_vu1esp, esp
#endif
ldmxcsr g_sseVUMXCSR
mov dword ptr s_writeQ, 0xffffffff
mov dword ptr s_writeP, 0xffffffff
jmp %eax
jmp eax
.globl SuperVUEndProgram
SuperVUEndProgram:
// restore cpu state
ldmxcsr g_sseMXCSR
mov %ebp, s_vu1ebp
mov %esi, s_vu1esi
mov %edi, s_vuedi
mov %ebx, s_vuebx
mov ebp, s_vu1ebp
mov esi, s_vu1esi
mov edi, s_vuedi
mov ebx, s_vuebx
#ifdef _DEBUG
sub s_vu1esp, %esp
sub s_vu1esp, esp
#endif
call SuperVUCleanupProgram
@ -54,20 +54,20 @@ SuperVUEndProgram:
.globl svudispfn
svudispfn:
mov [g_curdebugvu], %eax
mov s_saveecx, %ecx
mov s_saveedx, %edx
mov s_saveebx, %ebx
mov s_saveesi, %esi
mov s_saveedi, %edi
mov s_saveebp, %ebp
mov [g_curdebugvu], eax
mov s_saveecx, ecx
mov s_saveedx, edx
mov s_saveebx, ebx
mov s_saveesi, esi
mov s_saveedi, edi
mov s_saveebp, ebp
call svudispfntemp
mov %ecx, s_saveecx
mov %edx, s_saveedx
mov %ebx, s_saveebx
mov %esi, s_saveesi
mov %edi, s_saveedi
mov %ebp, s_saveebp
mov ecx, s_saveecx
mov edx, s_saveedx
mov ebx, s_saveebx
mov esi, s_saveesi
mov edi, s_saveedi
mov ebp, s_saveebp
ret

View File

@ -16,29 +16,29 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
.intel_syntax
.intel_syntax noprefix
.extern _vifRegs
.extern _vifMaskRegs
.extern _vifRow
#define VIF_ESP %esp
#define VIF_SRC %esi
#define VIF_INC %ecx
#define VIF_DST %edi
#define VIF_SIZE %edx
#define VIF_TMPADDR %eax
#define VIF_SAVEEBX %ebx
#define VIF_SAVEEBXd %ebx
#define VIF_ESP esp
#define VIF_SRC esi
#define VIF_INC ecx
#define VIF_DST edi
#define VIF_SIZE edx
#define VIF_TMPADDR eax
#define VIF_SAVEEBX ebx
#define VIF_SAVEEBXd ebx
#define XMM_R0 %xmm0
#define XMM_R1 %xmm1
#define XMM_R2 %xmm2
#define XMM_WRITEMASK %xmm3
#define XMM_ROWMASK %xmm4
#define XMM_ROWCOLMASK %xmm5
#define XMM_ROW %xmm6
#define XMM_COL %xmm7
#define XMM_R0 xmm0
#define XMM_R1 xmm1
#define XMM_R2 xmm2
#define XMM_WRITEMASK xmm3
#define XMM_ROWMASK xmm4
#define XMM_ROWCOLMASK xmm5
#define XMM_ROW xmm6
#define XMM_COL xmm7
#define XMM_R3 XMM_COL
@ -1189,35 +1189,35 @@
.extern s_TempDecompress
#define DECOMPRESS_RGBA(OFFSET) \
mov %bl, %al; \
shl %bl, 3; \
mov byte ptr [s_TempDecompress+OFFSET], %bl; \
mov bl, al; \
shl bl, 3; \
mov byte ptr [s_TempDecompress+OFFSET], bl; \
\
mov %bx, %ax; \
shr %bx, 2; \
and %bx, 0xf8; \
mov byte ptr [s_TempDecompress+OFFSET+1], %bl; \
mov bx, ax; \
shr bx, 2; \
and bx, 0xf8; \
mov byte ptr [s_TempDecompress+OFFSET+1], bl; \
\
mov %bx, %ax; \
shr %bx, 7; \
and %bx, 0xf8; \
mov byte ptr [s_TempDecompress+OFFSET+2], %bl; \
mov %bx, %ax; \
shr %bx, 8; \
and %bx, 0x80; \
mov byte ptr [s_TempDecompress+OFFSET+3], %bl; \
mov bx, ax; \
shr bx, 7; \
and bx, 0xf8; \
mov byte ptr [s_TempDecompress+OFFSET+2], bl; \
mov bx, ax; \
shr bx, 8; \
and bx, 0x80; \
mov byte ptr [s_TempDecompress+OFFSET+3], bl; \
#define UNPACK_V4_5SSE_4(CL, TOTALCL, MaskType, ModeType) \
mov %eax, dword ptr [VIF_SRC]; \
mov eax, dword ptr [VIF_SRC]; \
DECOMPRESS_RGBA(0); \
\
shr %eax, 16; \
shr eax, 16; \
DECOMPRESS_RGBA(4); \
\
mov %eax, dword ptr [VIF_SRC+4]; \
mov eax, dword ptr [VIF_SRC+4]; \
DECOMPRESS_RGBA(8); \
\
shr %eax, 16; \
shr eax, 16; \
DECOMPRESS_RGBA(12); \
\
movdqa XMM_R0, xmmword ptr [s_TempDecompress]; \
@ -1242,13 +1242,13 @@
#define UNPACK_V4_5SSE_4A UNPACK_V4_5SSE_4
#define UNPACK_V4_5SSE_3(CL, TOTALCL, MaskType, ModeType) \
mov %eax, dword ptr [VIF_SRC]; \
mov eax, dword ptr [VIF_SRC]; \
DECOMPRESS_RGBA(0); \
\
shr %eax, 16; \
shr eax, 16; \
DECOMPRESS_RGBA(4); \
\
mov %eax, dword ptr [VIF_SRC]; \
mov eax, dword ptr [VIF_SRC]; \
DECOMPRESS_RGBA(8); \
\
movdqa XMM_R0, xmmword ptr [s_TempDecompress]; \
@ -1271,10 +1271,10 @@
#define UNPACK_V4_5SSE_3A UNPACK_V4_5SSE_3
#define UNPACK_V4_5SSE_2(CL, TOTALCL, MaskType, ModeType) \
mov %eax, dword ptr [VIF_SRC]; \
mov eax, dword ptr [VIF_SRC]; \
DECOMPRESS_RGBA(0); \
\
shr %eax, 16; \
shr eax, 16; \
DECOMPRESS_RGBA(4); \
\
movq XMM_R0, qword ptr [s_TempDecompress]; \
@ -1294,7 +1294,7 @@
#define UNPACK_V4_5SSE_2A UNPACK_V4_5SSE_2
#define UNPACK_V4_5SSE_1(CL, TOTALCL, MaskType, ModeType) \
mov %ax, word ptr [VIF_SRC]; \
mov ax, word ptr [VIF_SRC]; \
DECOMPRESS_RGBA(0) \
\
movd XMM_R0, dword ptr [s_TempDecompress]; \
@ -1327,20 +1327,20 @@
// 32 bit versions have the args on the stack
#define INIT_ARGS() \
push %edi; \
push %esi; \
push %ebx; \
mov VIF_DST, dword ptr [%esp+4+12]; \
mov VIF_SRC, dword ptr [%esp+8+12]; \
mov VIF_SIZE, dword ptr [%esp+12+12]; \
push edi; \
push esi; \
push ebx; \
mov VIF_DST, dword ptr [esp+4+12]; \
mov VIF_SRC, dword ptr [esp+8+12]; \
mov VIF_SIZE, dword ptr [esp+12+12]; \
#define POP_REGS() \
pop %ebx; \
pop %esi; \
pop %edi; \
pop ebx; \
pop esi; \
pop edi; \
#define INC_STACK(reg) add %esp, 4;
#define INC_STACK(reg) add esp, 4;
// qsize - bytes of compressed size of 1 decompressed xmmword
// int UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType(u32* dest, u32* data, int dmasize)
@ -1431,7 +1431,7 @@ name##_##sign##_##MaskType##_##ModeType##_C1_DoneWithDec: \
sub VIF_SIZE, qsize; \
name##_##sign##_##MaskType##_##ModeType##_C1_Done3: \
SAVE_ROW_REG; \
mov %eax, VIF_SIZE; \
mov eax, VIF_SIZE; \
POP_REGS(); \
ret; \
\
@ -1460,7 +1460,7 @@ name##_##sign##_##MaskType##_##ModeType##_C2_Done3: \
name##_##sign##_##MaskType##_##ModeType##_C2_Done4: \
\
SAVE_ROW_REG; \
mov %eax, VIF_SIZE; \
mov eax, VIF_SIZE; \
POP_REGS(); \
ret; \
\
@ -1497,7 +1497,7 @@ name##_##sign##_##MaskType##_##ModeType##_C3_Done3: \
UNPACK_##name##SSE_1(0, 0, MaskType, ModeType); \
name##_##sign##_##MaskType##_##ModeType##_C3_Done4: \
SAVE_ROW_REG; \
mov %eax, VIF_SIZE; \
mov eax, VIF_SIZE; \
POP_REGS(); \
ret; \
\
@ -1552,7 +1552,7 @@ name##_##sign##_##MaskType##_##ModeType##_C4_Done: \
\
SAVE_ROW_REG; \
INC_STACK(); \
mov %eax, VIF_SIZE; \
mov eax, VIF_SIZE; \
POP_REGS(); \
ret; \

View File

@ -26,7 +26,7 @@
// Fast assembly routines for x86-64
// zerofrog(@gmail.com)
// and added to by arcum42@gmail.com
.intel_syntax
.intel_syntax noprefix
.extern g_EEFreezeRegs
.extern FreezeMMXRegs_
.extern _mmx_backup
@ -36,9 +36,9 @@
// ~10 times faster than standard memcmp
// (zerofrog)
// u8 memcmp_mmx(const void* src1, const void* src2, int cmpsize)
#define MEMCMP_SRC1 %edx
#define MEMCMP_SRC2 %esi
#define MEMCMP_SIZE %ecx
#define MEMCMP_SRC1 edx
#define MEMCMP_SRC2 esi
#define MEMCMP_SIZE ecx
.globl memcmp_mmx
memcmp_mmx:
@ -48,82 +48,82 @@ memcmp_mmx:
je memcmp_mmx_begin
push 1
call FreezeMMXRegs_
add %esp, 4
add esp, 4
memcmp_mmx_begin:
push %esi
mov MEMCMP_SRC1, dword ptr [%esp+8]
mov MEMCMP_SRC2, dword ptr [%esp+12]
mov MEMCMP_SIZE, dword ptr [%esp+16]
push esi
mov MEMCMP_SRC1, dword ptr [esp+8]
mov MEMCMP_SRC2, dword ptr [esp+12]
mov MEMCMP_SIZE, dword ptr [esp+16]
cmp MEMCMP_SIZE, 32
jl memcmp_Done4
// custom test first 8 to make sure things are ok
movq %mm0, [MEMCMP_SRC2]
movq %mm1, [MEMCMP_SRC2+8]
pcmpeqd %mm0, [MEMCMP_SRC1]
pcmpeqd %mm1, [MEMCMP_SRC1+8]
pand %mm0, %mm1
movq %mm2, [MEMCMP_SRC2+16]
pmovmskb %eax, %mm0
movq %mm3, [MEMCMP_SRC2+24]
movq mm0, [MEMCMP_SRC2]
movq mm1, [MEMCMP_SRC2+8]
pcmpeqd mm0, [MEMCMP_SRC1]
pcmpeqd mm1, [MEMCMP_SRC1+8]
pand mm0, mm1
movq mm2, [MEMCMP_SRC2+16]
pmovmskb eax, mm0
movq mm3, [MEMCMP_SRC2+24]
// check if eq
cmp %eax, 0xff
cmp eax, 0xff
je memcmp_NextComp
mov %eax, 1
mov eax, 1
jmp memcmp_End
memcmp_NextComp:
pcmpeqd %mm2, [MEMCMP_SRC1+16]
pcmpeqd %mm3, [MEMCMP_SRC1+24]
pand %mm2, %mm3
pmovmskb %eax, %mm2
pcmpeqd mm2, [MEMCMP_SRC1+16]
pcmpeqd mm3, [MEMCMP_SRC1+24]
pand mm2, mm3
pmovmskb eax, mm2
sub MEMCMP_SIZE, 32
add MEMCMP_SRC2, 32
add MEMCMP_SRC1, 32
// check if eq
cmp %eax, 0xff
cmp eax, 0xff
je memcmp_ContinueTest
mov %eax, 1
mov eax, 1
jmp memcmp_End
cmp MEMCMP_SIZE, 64
jl memcmp_Done8
memcmp_Cmp8:
movq %mm0, [MEMCMP_SRC2]
movq %mm1, [MEMCMP_SRC2+8]
movq %mm2, [MEMCMP_SRC2+16]
movq %mm3, [MEMCMP_SRC2+24]
movq %mm4, [MEMCMP_SRC2+32]
movq %mm5, [MEMCMP_SRC2+40]
movq %mm6, [MEMCMP_SRC2+48]
movq %mm7, [MEMCMP_SRC2+56]
pcmpeqd %mm0, [MEMCMP_SRC1]
pcmpeqd %mm1, [MEMCMP_SRC1+8]
pcmpeqd %mm2, [MEMCMP_SRC1+16]
pcmpeqd %mm3, [MEMCMP_SRC1+24]
pand %mm0, %mm1
pcmpeqd %mm4, [MEMCMP_SRC1+32]
pand %mm0, %mm2
pcmpeqd %mm5, [MEMCMP_SRC1+40]
pand %mm0, %mm3
pcmpeqd %mm6, [MEMCMP_SRC1+48]
pand %mm0, %mm4
pcmpeqd %mm7, [MEMCMP_SRC1+56]
pand %mm0, %mm5
pand %mm0, %mm6
pand %mm0, %mm7
pmovmskb %eax, %mm0
movq mm0, [MEMCMP_SRC2]
movq mm1, [MEMCMP_SRC2+8]
movq mm2, [MEMCMP_SRC2+16]
movq mm3, [MEMCMP_SRC2+24]
movq mm4, [MEMCMP_SRC2+32]
movq mm5, [MEMCMP_SRC2+40]
movq mm6, [MEMCMP_SRC2+48]
movq mm7, [MEMCMP_SRC2+56]
pcmpeqd mm0, [MEMCMP_SRC1]
pcmpeqd mm1, [MEMCMP_SRC1+8]
pcmpeqd mm2, [MEMCMP_SRC1+16]
pcmpeqd mm3, [MEMCMP_SRC1+24]
pand mm0, mm1
pcmpeqd mm4, [MEMCMP_SRC1+32]
pand mm0, mm2
pcmpeqd mm5, [MEMCMP_SRC1+40]
pand mm0, mm3
pcmpeqd mm6, [MEMCMP_SRC1+48]
pand mm0, mm4
pcmpeqd mm7, [MEMCMP_SRC1+56]
pand mm0, mm5
pand mm0, mm6
pand mm0, mm7
pmovmskb eax, mm0
// check if eq
cmp %eax, 0xff
cmp eax, 0xff
je memcmp_Continue
mov %eax, 1
mov eax, 1
jmp memcmp_End
memcmp_Continue:
@ -137,93 +137,93 @@ memcmp_ContinueTest:
memcmp_Done8:
test MEMCMP_SIZE, 0x20
jz memcmp_Done4
movq %mm0, [MEMCMP_SRC2]
movq %mm1, [MEMCMP_SRC2+8]
movq %mm2, [MEMCMP_SRC2+16]
movq %mm3, [MEMCMP_SRC2+24]
pcmpeqd %mm0, [MEMCMP_SRC1]
pcmpeqd %mm1, [MEMCMP_SRC1+8]
pcmpeqd %mm2, [MEMCMP_SRC1+16]
pcmpeqd %mm3, [MEMCMP_SRC1+24]
pand %mm0, %mm1
pand %mm0, %mm2
pand %mm0, %mm3
pmovmskb %eax, %mm0
movq mm0, [MEMCMP_SRC2]
movq mm1, [MEMCMP_SRC2+8]
movq mm2, [MEMCMP_SRC2+16]
movq mm3, [MEMCMP_SRC2+24]
pcmpeqd mm0, [MEMCMP_SRC1]
pcmpeqd mm1, [MEMCMP_SRC1+8]
pcmpeqd mm2, [MEMCMP_SRC1+16]
pcmpeqd mm3, [MEMCMP_SRC1+24]
pand mm0, mm1
pand mm0, mm2
pand mm0, mm3
pmovmskb eax, mm0
sub MEMCMP_SIZE, 32
add MEMCMP_SRC2, 32
add MEMCMP_SRC1, 32
// check if eq
cmp %eax, 0xff
cmp eax, 0xff
je memcmp_Done4
mov %eax, 1
mov eax, 1
jmp memcmp_End
memcmp_Done4:
cmp MEMCMP_SIZE, 24
jne memcmp_Done2
movq %mm0, [MEMCMP_SRC2]
movq %mm1, [MEMCMP_SRC2+8]
movq %mm2, [MEMCMP_SRC2+16]
pcmpeqd %mm0, [MEMCMP_SRC1]
pcmpeqd %mm1, [MEMCMP_SRC1+8]
pcmpeqd %mm2, [MEMCMP_SRC1+16]
pand %mm0, %mm1
pand %mm0, %mm2
pmovmskb %eax, %mm0
movq mm0, [MEMCMP_SRC2]
movq mm1, [MEMCMP_SRC2+8]
movq mm2, [MEMCMP_SRC2+16]
pcmpeqd mm0, [MEMCMP_SRC1]
pcmpeqd mm1, [MEMCMP_SRC1+8]
pcmpeqd mm2, [MEMCMP_SRC1+16]
pand mm0, mm1
pand mm0, mm2
pmovmskb eax, mm0
// check if eq
cmp %eax, 0xff
cmp eax, 0xff
je memcmp_Done
mov %eax, 1
mov eax, 1
jmp memcmp_End
memcmp_Done2:
cmp MEMCMP_SIZE, 16
jne memcmp_Done1
movq %mm0, [MEMCMP_SRC2]
movq %mm1, [MEMCMP_SRC2+8]
pcmpeqd %mm0, [MEMCMP_SRC1]
pcmpeqd %mm1, [MEMCMP_SRC1+8]
pand %mm0, %mm1
pmovmskb %eax, %mm0
movq mm0, [MEMCMP_SRC2]
movq mm1, [MEMCMP_SRC2+8]
pcmpeqd mm0, [MEMCMP_SRC1]
pcmpeqd mm1, [MEMCMP_SRC1+8]
pand mm0, mm1
pmovmskb eax, mm0
// check if eq
cmp %eax, 0xff
cmp eax, 0xff
je memcmp_Done
mov %eax, 1
mov eax, 1
jmp memcmp_End
memcmp_Done1:
cmp MEMCMP_SIZE, 8
jne memcmp_Done
mov %eax, [MEMCMP_SRC2]
mov eax, [MEMCMP_SRC2]
mov MEMCMP_SRC2, [MEMCMP_SRC2+4]
cmp %eax, [MEMCMP_SRC1]
cmp eax, [MEMCMP_SRC1]
je memcmp_Next
mov %eax, 1
mov eax, 1
jmp memcmp_End
memcmp_Next:
cmp MEMCMP_SRC2, [MEMCMP_SRC1+4]
je memcmp_Done
mov %eax, 1
mov eax, 1
jmp memcmp_End
memcmp_Done:
xor %eax, %eax
xor eax, eax
memcmp_End:
emms
pop %esi
pop esi
ret
// memxor_mmx
#define MEMXOR_SRC1 %edx
#define MEMXOR_SRC2 %esi
#define MEMXOR_SIZE %ecx
#define MEMXOR_SRC1 edx
#define MEMXOR_SRC2 esi
#define MEMXOR_SIZE ecx
.globl memxor_mmx
memxor_mmx:
@ -233,38 +233,38 @@ memxor_mmx:
je memxor_mmx_begin
push 1
call FreezeMMXRegs_
add %esp, 4
add esp, 4
memxor_mmx_begin:
push %esi
mov MEMXOR_SRC1, dword ptr [%esp+8]
mov MEMXOR_SRC2, dword ptr [%esp+12]
mov MEMXOR_SIZE, dword ptr [%esp+16]
push esi
mov MEMXOR_SRC1, dword ptr [esp+8]
mov MEMXOR_SRC2, dword ptr [esp+12]
mov MEMXOR_SIZE, dword ptr [esp+16]
cmp MEMXOR_SIZE, 64
jl memxor_Setup4
movq %mm0, [MEMXOR_SRC2]
movq %mm1, [MEMXOR_SRC2+8]
movq %mm2, [MEMXOR_SRC2+16]
movq %mm3, [MEMXOR_SRC2+24]
movq %mm4, [MEMXOR_SRC2+32]
movq %mm5, [MEMXOR_SRC2+40]
movq %mm6, [MEMXOR_SRC2+48]
movq %mm7, [MEMXOR_SRC2+56]
movq mm0, [MEMXOR_SRC2]
movq mm1, [MEMXOR_SRC2+8]
movq mm2, [MEMXOR_SRC2+16]
movq mm3, [MEMXOR_SRC2+24]
movq mm4, [MEMXOR_SRC2+32]
movq mm5, [MEMXOR_SRC2+40]
movq mm6, [MEMXOR_SRC2+48]
movq mm7, [MEMXOR_SRC2+56]
sub MEMXOR_SIZE, 64
add MEMXOR_SRC2, 64
cmp MEMXOR_SIZE, 64
jl memxor_End8
memxor_Cmp8:
pxor %mm0, [MEMXOR_SRC2]
pxor %mm1, [MEMXOR_SRC2+8]
pxor %mm2, [MEMXOR_SRC2+16]
pxor %mm3, [MEMXOR_SRC2+24]
pxor %mm4, [MEMXOR_SRC2+32]
pxor %mm5, [MEMXOR_SRC2+40]
pxor %mm6, [MEMXOR_SRC2+48]
pxor %mm7, [MEMXOR_SRC2+56]
pxor mm0, [MEMXOR_SRC2]
pxor mm1, [MEMXOR_SRC2+8]
pxor mm2, [MEMXOR_SRC2+16]
pxor mm3, [MEMXOR_SRC2+24]
pxor mm4, [MEMXOR_SRC2+32]
pxor mm5, [MEMXOR_SRC2+40]
pxor mm6, [MEMXOR_SRC2+48]
pxor mm7, [MEMXOR_SRC2+56]
sub MEMXOR_SIZE, 64
add MEMXOR_SRC2, 64
@ -272,17 +272,17 @@ memxor_Cmp8:
jge memxor_Cmp8
memxor_End8:
pxor %mm0, %mm4
pxor %mm1, %mm5
pxor %mm2, %mm6
pxor %mm3, %mm7
pxor mm0, mm4
pxor mm1, mm5
pxor mm2, mm6
pxor mm3, mm7
cmp MEMXOR_SIZE, 32
jl memxor_End4
pxor %mm0, [MEMXOR_SRC2]
pxor %mm1, [MEMXOR_SRC2+8]
pxor %mm2, [MEMXOR_SRC2+16]
pxor %mm3, [MEMXOR_SRC2+24]
pxor mm0, [MEMXOR_SRC2]
pxor mm1, [MEMXOR_SRC2+8]
pxor mm2, [MEMXOR_SRC2+16]
pxor mm3, [MEMXOR_SRC2+24]
sub MEMXOR_SIZE, 32
add MEMXOR_SRC2, 32
jmp memxor_End4
@ -291,21 +291,21 @@ memxor_Setup4:
cmp MEMXOR_SIZE, 32
jl memxor_Setup2
movq %mm0, [MEMXOR_SRC2]
movq %mm1, [MEMXOR_SRC2+8]
movq %mm2, [MEMXOR_SRC2+16]
movq %mm3, [MEMXOR_SRC2+24]
movq mm0, [MEMXOR_SRC2]
movq mm1, [MEMXOR_SRC2+8]
movq mm2, [MEMXOR_SRC2+16]
movq mm3, [MEMXOR_SRC2+24]
sub MEMXOR_SIZE, 32
add MEMXOR_SRC2, 32
memxor_End4:
pxor %mm0, %mm2
pxor %mm1, %mm3
pxor mm0, mm2
pxor mm1, mm3
cmp MEMXOR_SIZE, 16
jl memxor_End2
pxor %mm0, [MEMXOR_SRC2]
pxor %mm1, [MEMXOR_SRC2+8]
pxor mm0, [MEMXOR_SRC2]
pxor mm1, [MEMXOR_SRC2+8]
sub MEMXOR_SIZE, 16
add MEMXOR_SRC2, 16
jmp memxor_End2
@ -314,56 +314,56 @@ memxor_Setup2:
cmp MEMXOR_SIZE, 16
jl memxor_Setup1
movq %mm0, [MEMXOR_SRC2]
movq %mm1, [MEMXOR_SRC2+8]
movq mm0, [MEMXOR_SRC2]
movq mm1, [MEMXOR_SRC2+8]
sub MEMXOR_SIZE, 16
add MEMXOR_SRC2, 16
memxor_End2:
pxor %mm0, %mm1
pxor mm0, mm1
cmp MEMXOR_SIZE, 8
jl memxor_End1
pxor %mm0, [MEMXOR_SRC2]
pxor mm0, [MEMXOR_SRC2]
memxor_End1:
movq [MEMXOR_SRC1], %mm0
movq [MEMXOR_SRC1], mm0
jmp memxor_End
memxor_Setup1:
movq %mm0, [MEMXOR_SRC2]
movq [MEMXOR_SRC1], %mm0
movq mm0, [MEMXOR_SRC2]
movq [MEMXOR_SRC1], mm0
memxor_End:
emms
pop %esi
pop esi
ret
// void __fastcall memcpy_amd_(void *dest, const void *src, size_t n)
.globl memcpy_amd_
memcpy_amd_:
push %edi
push %esi
push edi
push esi
mov %edi, %ecx // destination
mov %esi, %edx // source
mov %ecx, [%esp+12] // number of bytes to copy
mov %eax, %ecx // keep a copy of count
mov edi, ecx // destination
mov esi, edx // source
mov ecx, [esp+12] // number of bytes to copy
mov eax, ecx // keep a copy of count
cld
cmp %eax, TINY_BLOCK_COPY
cmp eax, TINY_BLOCK_COPY
jb $memcpy_ic_3 // tiny? skip mmx copy
cmp %eax, 32*1024 // don't align between 32k-64k because
cmp eax, 32*1024 // don't align between 32k-64k because
jbe $memcpy_do_align // it appears to be slower
cmp %eax, 64*1024
cmp eax, 64*1024
jbe $memcpy_align_done
$memcpy_do_align:
mov %eax, 8 // a trick that's faster than rep movsb...
sub %eax, %edi // align destination to qword
andb %eax, 111 // get the low bits
sub %ecx, %eax // update copy count
neg %eax // set up to jump into the array
add %eax, offset $memcpy_align_done
jmp %eax // jump to array of movsb's
mov eax, 8 // a trick that's faster than rep movsb...
sub eax, edi // align destination to qword
andb eax, 111 // get the low bits
sub ecx, eax // update copy count
neg eax // set up to jump into the array
add eax, offset $memcpy_align_done
jmp eax // jump to array of movsb's
.align 4
movsb
@ -376,17 +376,17 @@ $memcpy_do_align:
movsb
$memcpy_align_done: // destination is dword aligned
mov %eax, %ecx // number of bytes left to copy
shr %eax, 6 // get 64-byte block count
mov eax, ecx // number of bytes left to copy
shr eax, 6 // get 64-byte block count
jz $memcpy_ic_2 // finish the last few bytes
cmp %eax, IN_CACHE_COPY/64 // too big 4 cache? use uncached copy
cmp eax, IN_CACHE_COPY/64 // too big 4 cache? use uncached copy
jae $memcpy_uc_test
movq [_mmx_backup+0x00],%mm0
movq [_mmx_backup+0x08],%mm1
movq [_mmx_backup+0x10],%mm2
movq [_mmx_backup+0x18],%mm3
movq [_mmx_backup+0x00],mm0
movq [_mmx_backup+0x08],mm1
movq [_mmx_backup+0x10],mm2
movq [_mmx_backup+0x18],mm3
// This is small block copy that uses the MMX registers to copy 8 bytes
// at a time. It uses the "unrolled loop" optimization, and also uses
@ -394,49 +394,49 @@ $memcpy_align_done: // destination is dword aligned
.align 16
$memcpy_ic_1: // 64-byte block copies, in-cache copy
prefetchnta [%esi + (200*64/34+192)] // start reading ahead
prefetchnta [esi + (200*64/34+192)] // start reading ahead
movq %mm0, [%esi+0] // read 64 bits
movq %mm1, [%esi+8]
movq [%edi+0], %mm0 //write 64 bits
movq [%edi+8], %mm1 // note: the normal movq writes the
movq %mm2, [%esi+16] // data to cache; a cache line will be
movq %mm3, [%esi+24] // allocated as needed, to store the data
movq [%edi+16], %mm2
movq [%edi+24], %mm3
movq %mm0, [%esi+32]
movq %mm1, [%esi+40]
movq [%edi+32], %mm0
movq [%edi+40], %mm1
movq %mm2, [%esi+48]
movq %mm3, [%esi+56]
movq [%edi+48], %mm2
movq [%edi+56], %mm3
movq mm0, [esi+0] // read 64 bits
movq mm1, [esi+8]
movq [edi+0], mm0 //write 64 bits
movq [edi+8], mm1 // note: the normal movq writes the
movq mm2, [esi+16] // data to cache; a cache line will be
movq mm3, [esi+24] // allocated as needed, to store the data
movq [edi+16], mm2
movq [edi+24], mm3
movq mm0, [esi+32]
movq mm1, [esi+40]
movq [edi+32], mm0
movq [edi+40], mm1
movq mm2, [esi+48]
movq mm3, [esi+56]
movq [edi+48], mm2
movq [edi+56], mm3
add %esi, 64 // update source pointer
add %edi, 64 // update destination pointer
dec %eax // count down
add esi, 64 // update source pointer
add edi, 64 // update destination pointer
dec eax // count down
jnz $memcpy_ic_1 // last 64-byte block?
movq %mm0,[_mmx_backup+0x00]
movq %mm1,[_mmx_backup+0x08]
movq %mm2,[_mmx_backup+0x10]
movq %mm3,[_mmx_backup+0x18]
movq mm0,[_mmx_backup+0x00]
movq mm1,[_mmx_backup+0x08]
movq mm2,[_mmx_backup+0x10]
movq mm3,[_mmx_backup+0x18]
$memcpy_ic_2:
mov %eax, %ecx // has valid low 6 bits of the byte count
mov eax, ecx // has valid low 6 bits of the byte count
$memcpy_ic_3:
shr %eax, 2 // dword count
andb %eax, 1111 // only look at the "remainder" bits
neg %eax // set up to jump into the array
add %eax, offset $memcpy_last_few
jmp %eax // jump to array of movsd's
shr eax, 2 // dword count
andb eax, 1111 // only look at the "remainder" bits
neg eax // set up to jump into the array
add eax, offset $memcpy_last_few
jmp eax // jump to array of movsd's
$memcpy_uc_test:
// cmp %ecx, UNCACHED_COPY/64 // big enough? use block prefetch copy
// cmp ecx, UNCACHED_COPY/64 // big enough? use block prefetch copy
// jae $memcpy_bp_1
//$memcpy_64_test:
or %eax, %eax // tail end of block prefetch will jump here
or eax, eax // tail end of block prefetch will jump here
jz $memcpy_ic_2 // no more 64-byte blocks left
// For larger blocks, which will spill beyond the cache, it's faster to
@ -444,39 +444,39 @@ $memcpy_uc_test:
// bypasses the cache and writes straight to main memory. This code also
// uses the software prefetch instruction to pre-read the data.
movq [_mmx_backup+0x00],%mm0
movq [_mmx_backup+0x08],%mm1
movq [_mmx_backup+0x10],%mm2
movq [_mmx_backup+0x00],mm0
movq [_mmx_backup+0x08],mm1
movq [_mmx_backup+0x10],mm2
.align 16
$memcpy_uc_1: // 64-byte blocks, uncached copy
prefetchnta [%esi + (200*64/34+192)] // start reading ahead
prefetchnta [esi + (200*64/34+192)] // start reading ahead
movq %mm0,[%esi+0] // read 64 bits
add %edi,64 // update destination pointer
movq %mm1,[%esi+8]
add %esi,64 // update source pointer
movq %mm2,[%esi-48]
movntq [%edi-64], %mm0 // write 64 bits, bypassing the cache
movq %mm0,[%esi-40] // note: movntq also prevents the CPU
movntq [%edi-56], %mm1 // from READING the destination address
movq %mm1,[%esi-32] // into the cache, only to be over-written
movntq [%edi-48], %mm2 // so that also helps performance
movq %mm2,[%esi-24]
movntq [%edi-40], %mm0
movq %mm0,[%esi-16]
movntq [%edi-32], %mm1
movq %mm1,[%esi-8]
movntq [%edi-24], %mm2
movntq [%edi-16], %mm0
dec %eax
movntq [%edi-8], %mm1
movq mm0,[esi+0] // read 64 bits
add edi,64 // update destination pointer
movq mm1,[esi+8]
add esi,64 // update source pointer
movq mm2,[esi-48]
movntq [edi-64], mm0 // write 64 bits, bypassing the cache
movq mm0,[esi-40] // note: movntq also prevents the CPU
movntq [edi-56], mm1 // from READING the destination address
movq mm1,[esi-32] // into the cache, only to be over-written
movntq [edi-48], mm2 // so that also helps performance
movq mm2,[esi-24]
movntq [edi-40], mm0
movq mm0,[esi-16]
movntq [edi-32], mm1
movq mm1,[esi-8]
movntq [edi-24], mm2
movntq [edi-16],mm0
dec eax
movntq [edi-8], mm1
jnz $memcpy_uc_1 // last 64-byte block?
movq %mm0,[_mmx_backup+0x00]
movq %mm1,[_mmx_backup+0x08]
movq %mm2,[_mmx_backup+0x10]
movq mm0,[_mmx_backup+0x00]
movq mm1,[_mmx_backup+0x08]
movq mm2,[_mmx_backup+0x10]
jmp $memcpy_ic_2 // almost done (not needed because large copy below was removed)
@ -511,17 +511,17 @@ $memcpy_uc_1: // 64-byte blocks, uncached copy
movsd
$memcpy_last_few: // dword aligned from before movsd's
mov %eax, %ecx // has valid low 2 bits of the byte count
andb %eax, 11 // the last few cows must come home
mov eax, ecx // has valid low 2 bits of the byte count
andb eax, 11 // the last few cows must come home
jz $memcpy_final // no more, let's leave
rep movsb // the last 1, 2, or 3 bytes
$memcpy_final:
emms // clean up the MMX state
sfence // flush the write buffer
//mov %eax, [dest] // ret value = destination pointer
//mov eax, [dest] // ret value = destination pointer
pop %esi
pop %edi
pop esi
pop edi
ret 4

View File

@ -824,18 +824,18 @@ static s32 recExecuteBlock( s32 eeCycles )
#else
__asm__
(
".intel_syntax\n"
"push %ebx\n"
"push %esi\n"
"push %edi\n"
"push %ebp\n"
".intel_syntax noprefix\n"
"push ebx\n"
"push esi\n"
"push edi\n"
"push ebp\n"
"call iopDispatcherReg\n"
"pop %ebp\n"
"pop %edi\n"
"pop %esi\n"
"pop %ebx\n"
"pop ebp\n"
"pop edi\n"
"pop esi\n"
"pop ebx\n"
".att_syntax\n"
);
#endif

View File

@ -96,7 +96,7 @@ void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
#else // gcc
// Is this really supposed to be assembly for gcc and C for Windows?
void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
{
u32 i;
@ -112,23 +112,23 @@ void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
u8* p0 = (u8*)&s_maskarr[mask&15][0];
u8* p1 = (u8*)&s_maskarr[(mask>>4)&15][0];
__asm__(".intel_syntax\n"
"movaps %%xmm0, [%0]\n"
"movaps %%xmm1, [%1]\n"
"movaps %%xmm2, %%xmm0\n"
"punpcklwd %%xmm0, %%xmm0\n"
"punpckhwd %%xmm2, %%xmm2\n"
"movaps %%xmm3, %%xmm1\n"
"punpcklwd %%xmm1, %%xmm1\n"
"punpckhwd %%xmm3, %%xmm3\n"
"movq [%2], %%xmm0\n"
"movq [%2+8], %%xmm1\n"
"movhps [%2+16], %%xmm0\n"
"movhps [%2+24], %%xmm1\n"
"movq [%2+32], %%xmm2\n"
"movq [%2+40], %%xmm3\n"
"movhps [%2+48], %%xmm2\n"
"movhps [%2+56], %%xmm3\n"
__asm__(".intel_syntax noprefix\n"
"movaps xmm0, [%0]\n"
"movaps xmm1, [%1]\n"
"movaps xmm2, xmm0\n"
"punpcklwd xmm0, xmm0\n"
"punpckhwd xmm2, xmm2\n"
"movaps xmm3, xmm1\n"
"punpcklwd xmm1, xmm1\n"
"punpckhwd xmm3, xmm3\n"
"movq [%2], xmm0\n"
"movq [%2+8], xmm1\n"
"movhps [%2+16], xmm0\n"
"movhps [%2+24], xmm1\n"
"movq [%2+32], xmm2\n"
"movq [%2+40], xmm3\n"
"movhps [%2+48], xmm2\n"
"movhps [%2+56], xmm3\n"
".att_syntax\n" : : "r"(p0), "r"(p1), "r"(vif1masks) );
}
}

View File

@ -800,18 +800,18 @@ __forceinline void recExecute()
g_EEFreezeRegs = true;
__asm__
(
".intel_syntax\n"
"push %ebx\n"
"push %esi\n"
"push %edi\n"
"push %ebp\n"
".intel_syntax noprefix\n"
"push ebx\n"
"push esi\n"
"push edi\n"
"push ebp\n"
"call DispatcherReg\n"
"pop %ebp\n"
"pop %edi\n"
"pop %esi\n"
"pop %ebx\n"
"pop ebp\n"
"pop edi\n"
"pop esi\n"
"pop ebx\n"
".att_syntax\n"
);
g_EEFreezeRegs = false;
@ -824,18 +824,18 @@ static void recExecuteBlock()
g_EEFreezeRegs = true;
__asm__
(
".intel_syntax\n"
"push %ebx\n"
"push %esi\n"
"push %edi\n"
"push %ebp\n"
".intel_syntax noprefix\n"
"push ebx\n"
"push esi\n"
"push edi\n"
"push ebp\n"
"call DispatcherReg\n"
"pop %ebp\n"
"pop %edi\n"
"pop %esi\n"
"pop %ebx\n"
"pop ebp\n"
"pop edi\n"
"pop esi\n"
"pop ebx\n"
".att_syntax\n"
);
g_EEFreezeRegs = false;

View File

@ -97,15 +97,15 @@ __forceinline void FreezeMMXRegs_(int save)
emms
}
#else
__asm__(".intel_syntax\n"
"movq [%0+0x00], %%mm0\n"
"movq [%0+0x08], %%mm1\n"
"movq [%0+0x10], %%mm2\n"
"movq [%0+0x18], %%mm3\n"
"movq [%0+0x20], %%mm4\n"
"movq [%0+0x28], %%mm5\n"
"movq [%0+0x30], %%mm6\n"
"movq [%0+0x38], %%mm7\n"
__asm__(".intel_syntax noprefix\n"
"movq [%0+0x00], mm0\n"
"movq [%0+0x08], mm1\n"
"movq [%0+0x10], mm2\n"
"movq [%0+0x18], mm3\n"
"movq [%0+0x20], mm4\n"
"movq [%0+0x28], mm5\n"
"movq [%0+0x30], mm6\n"
"movq [%0+0x38], mm7\n"
"emms\n"
".att_syntax\n" : : "r"(g_globalMMXData) );
#endif
@ -134,15 +134,15 @@ __forceinline void FreezeMMXRegs_(int save)
emms
}
#else
__asm__(".intel_syntax\n"
"movq %%mm0, [%0+0x00]\n"
"movq %%mm1, [%0+0x08]\n"
"movq %%mm2, [%0+0x10]\n"
"movq %%mm3, [%0+0x18]\n"
"movq %%mm4, [%0+0x20]\n"
"movq %%mm5, [%0+0x28]\n"
"movq %%mm6, [%0+0x30]\n"
"movq %%mm7, [%0+0x38]\n"
__asm__(".intel_syntax noprefix\n"
"movq mm0, [%0+0x00]\n"
"movq mm1, [%0+0x08]\n"
"movq mm2, [%0+0x10]\n"
"movq mm3, [%0+0x18]\n"
"movq mm4, [%0+0x20]\n"
"movq mm5, [%0+0x28]\n"
"movq mm6, [%0+0x30]\n"
"movq mm7, [%0+0x38]\n"
"emms\n"
".att_syntax\n" : : "r"(g_globalMMXData) );
#endif
@ -177,15 +177,15 @@ __forceinline void FreezeXMMRegs_(int save)
}
#else
__asm__(".intel_syntax\n"
"movaps [%0+0x00], %%xmm0\n"
"movaps [%0+0x10], %%xmm1\n"
"movaps [%0+0x20], %%xmm2\n"
"movaps [%0+0x30], %%xmm3\n"
"movaps [%0+0x40], %%xmm4\n"
"movaps [%0+0x50], %%xmm5\n"
"movaps [%0+0x60], %%xmm6\n"
"movaps [%0+0x70], %%xmm7\n"
__asm__(".intel_syntax noprefix\n"
"movaps [%0+0x00], xmm0\n"
"movaps [%0+0x10], xmm1\n"
"movaps [%0+0x20], xmm2\n"
"movaps [%0+0x30], xmm3\n"
"movaps [%0+0x40], xmm4\n"
"movaps [%0+0x50], xmm5\n"
"movaps [%0+0x60], xmm6\n"
"movaps [%0+0x70], xmm7\n"
".att_syntax\n" : : "r"(g_globalXMMData) );
#endif // _MSC_VER
@ -214,15 +214,15 @@ __forceinline void FreezeXMMRegs_(int save)
}
#else
__asm__(".intel_syntax\n"
"movaps %%xmm0, [%0+0x00]\n"
"movaps %%xmm1, [%0+0x10]\n"
"movaps %%xmm2, [%0+0x20]\n"
"movaps %%xmm3, [%0+0x30]\n"
"movaps %%xmm4, [%0+0x40]\n"
"movaps %%xmm5, [%0+0x50]\n"
"movaps %%xmm6, [%0+0x60]\n"
"movaps %%xmm7, [%0+0x70]\n"
__asm__(".intel_syntax noprefix\n"
"movaps xmm0, [%0+0x00]\n"
"movaps xmm1, [%0+0x10]\n"
"movaps xmm2, [%0+0x20]\n"
"movaps xmm3, [%0+0x30]\n"
"movaps xmm4, [%0+0x40]\n"
"movaps xmm5, [%0+0x50]\n"
"movaps xmm6, [%0+0x60]\n"
"movaps xmm7, [%0+0x70]\n"
".att_syntax\n" : : "r"(g_globalXMMData) );
#endif // _MSC_VER