From b71b837efafbdf439ee624d3cb802d6e4e58e222 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Mon, 5 Oct 2009 11:05:11 +0000 Subject: [PATCH] Linux: Added more correct __asm__ qualifiers and conditions; including __volatile__ on a lot of asm code (it should really be the default behavior and non-vlatile the specifier, but whatever >_<), and added register clobber specifiers. Might help unbreak some of GCC 4.4's optimization problems, although VIFdma's uber-hack SSE optimization looks like a real problem. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1964 96395faa-99c1-11dd-bbfe-3dabce05a288 --- common/include/Utilities/lnx_memzero.h | 36 +++++----- common/src/Utilities/Linux/LnxThreads.cpp | 4 +- common/src/x86emitter/cpudetect.cpp | 11 ++-- common/src/x86emitter/tools.cpp | 18 ++--- pcsx2/IPU/yuv2rgb.cpp | 66 ++++++++++--------- pcsx2/VifDma.cpp | 9 +++ pcsx2/x86/iMisc.cpp | 6 +- pcsx2/x86/iR3000A.cpp | 31 +++++---- pcsx2/x86/iVif.cpp | 4 +- pcsx2/x86/ix86-32/iR5900-32.cpp | 49 +++++++------- .../spu2-x/src/3rdparty/liba52/bitstream.h | 4 +- plugins/spu2-x/src/Mixer.cpp | 6 +- plugins/zerospu2/voices.cpp | 44 ++++++------- 13 files changed, 150 insertions(+), 138 deletions(-) diff --git a/common/include/Utilities/lnx_memzero.h b/common/include/Utilities/lnx_memzero.h index e54d0a9660..fef0c4b495 100644 --- a/common/include/Utilities/lnx_memzero.h +++ b/common/include/Utilities/lnx_memzero.h @@ -1,6 +1,6 @@ /* PCSX2 - PS2 Emulator for PCs * Copyright (C) 2002-2009 PCSX2 Dev Team - * + * * PCSX2 is free software: you can redistribute it and/or modify it under the terms * of the GNU Lesser General Public License as published by the Free Software Found- * ation, either version 3 of the License, or (at your option) any later version. @@ -102,7 +102,7 @@ static __forceinline void memset_8( void *dest ) return; case 3: - __asm__ + __asm__ volatile ( ".intel_syntax noprefix\n" "cld\n" @@ -112,16 +112,16 @@ static __forceinline void memset_8( void *dest ) "stosd\n" "stosd\n" ".att_syntax\n" - : + : + // Input specifiers: D - edi, a -- eax, c ecx : [dest]"D"(dest), [data32]"a"(data32) -// D - edi, a -- eax, c ecx - : + : "memory" ); return; case 4: - __asm__ - ( + __asm__ volatile + ( ".intel_syntax noprefix\n" "cld\n" // "mov edi, %[dest]\n" @@ -131,15 +131,15 @@ static __forceinline void memset_8( void *dest ) "stosd\n" "stosd\n" ".att_syntax\n" - : + : : [dest]"D"(dest), [data32]"a"(data32) - : - + : "memory" + ); return; case 5: - __asm__ + __asm__ volatile ( ".intel_syntax noprefix\n" "cld\n" @@ -151,15 +151,15 @@ static __forceinline void memset_8( void *dest ) "stosd\n" "stosd\n" ".att_syntax\n" - : - : [dest]"D"(dest), [data32]"a"(data32) - : - + : + : [dest]"D"(dest), [data32]"a"(data32) + : "memory" + ); return; default: - __asm__ + __asm__ volatile ( ".intel_syntax noprefix\n" "cld\n" @@ -168,9 +168,9 @@ static __forceinline void memset_8( void *dest ) // "mov eax, %\[data32]n" "rep stosd\n" ".att_syntax\n" - : + : : [remdat]"c"(remdat), [dest]"D"(dest), [data32]"a"(data32) - : + : "memory" ); return; } diff --git a/common/src/Utilities/Linux/LnxThreads.cpp b/common/src/Utilities/Linux/LnxThreads.cpp index 3f653d39f2..1026cb7c01 100644 --- a/common/src/Utilities/Linux/LnxThreads.cpp +++ b/common/src/Utilities/Linux/LnxThreads.cpp @@ -1,6 +1,6 @@ /* PCSX2 - PS2 Emulator for PCs * Copyright (C) 2002-2009 PCSX2 Dev Team - * + * * PCSX2 is free software: you can redistribute it and/or modify it under the terms * of the GNU Lesser General Public License as published by the Free Software Found- * ation, either version 3 of the License, or (at your option) any later version. @@ -12,7 +12,7 @@ * You should have received a copy of the GNU General Public License along with PCSX2. * If not, see . */ - + #include "../PrecompiledHeader.h" #include "Threading.h" diff --git a/common/src/x86emitter/cpudetect.cpp b/common/src/x86emitter/cpudetect.cpp index 4fdc612383..fc7c58c766 100644 --- a/common/src/x86emitter/cpudetect.cpp +++ b/common/src/x86emitter/cpudetect.cpp @@ -1,6 +1,6 @@ /* Cpudetection lib * Copyright (C) 2002-2009 PCSX2 Dev Team - * + * * PCSX2 is free software: you can redistribute it and/or modify it under the terms * of the GNU Lesser General Public License as published by the Free Software Found- * ation, either version 3 of the License, or (at your option) any later version. @@ -12,7 +12,7 @@ * You should have received a copy of the GNU General Public License along with PCSX2. * If not, see . */ - + #include "PrecompiledHeader.h" #include "internal.h" @@ -27,12 +27,9 @@ __aligned16 x86CPU_INFO x86caps; static s32 iCpuId( u32 cmd, u32 *regs ) { #ifdef _MSC_VER - __asm - { - xor ecx, ecx; // ecx should be zero for CPUID(4) - } + __asm xor ecx, ecx; // ecx should be zero for CPUID(4) #else - __asm__ ( "xor %ecx, %ecx" ); + __asm__ __volatile__ ( "xor %ecx, %ecx" ); #endif __cpuid( (int*)regs, cmd ); diff --git a/common/src/x86emitter/tools.cpp b/common/src/x86emitter/tools.cpp index 887f6bb30f..5378b0eb17 100644 --- a/common/src/x86emitter/tools.cpp +++ b/common/src/x86emitter/tools.cpp @@ -1,6 +1,6 @@ /* PCSX2 - PS2 Emulator for PCs * Copyright (C) 2002-2009 PCSX2 Dev Team - * + * * PCSX2 is free software: you can redistribute it and/or modify it under the terms * of the GNU Lesser General Public License as published by the Free Software Found- * ation, either version 3 of the License, or (at your option) any later version. @@ -65,7 +65,7 @@ __forceinline void FreezeMMXRegs(int save) emms } #else - __asm__( + __asm__ volatile( ".intel_syntax noprefix\n" "movq [%[g_globalMMXData]+0x00], mm0\n" "movq [%[g_globalMMXData]+0x08], mm1\n" @@ -76,7 +76,7 @@ __forceinline void FreezeMMXRegs(int save) "movq [%[g_globalMMXData]+0x30], mm6\n" "movq [%[g_globalMMXData]+0x38], mm7\n" "emms\n" - ".att_syntax\n" : : [g_globalMMXData]"r"(g_globalMMXData) + ".att_syntax\n" : : [g_globalMMXData]"r"(g_globalMMXData) : "memory" ); #endif @@ -105,7 +105,7 @@ __forceinline void FreezeMMXRegs(int save) emms } #else - __asm__( + __asm__ volatile( ".intel_syntax noprefix\n" "movq mm0, [%[g_globalMMXData]+0x00]\n" "movq mm1, [%[g_globalMMXData]+0x08]\n" @@ -116,7 +116,7 @@ __forceinline void FreezeMMXRegs(int save) "movq mm6, [%[g_globalMMXData]+0x30]\n" "movq mm7, [%[g_globalMMXData]+0x38]\n" "emms\n" - ".att_syntax\n" : : [g_globalMMXData]"r"(g_globalMMXData) + ".att_syntax\n" : : [g_globalMMXData]"r"(g_globalMMXData) : "memory" ); #endif } @@ -154,7 +154,7 @@ __forceinline void FreezeXMMRegs(int save) } #else - __asm__( + __asm__ volatile( ".intel_syntax noprefix\n" "movaps [%[g_globalXMMData]+0x00], xmm0\n" "movaps [%[g_globalXMMData]+0x10], xmm1\n" @@ -164,7 +164,7 @@ __forceinline void FreezeXMMRegs(int save) "movaps [%[g_globalXMMData]+0x50], xmm5\n" "movaps [%[g_globalXMMData]+0x60], xmm6\n" "movaps [%[g_globalXMMData]+0x70], xmm7\n" - ".att_syntax\n" : : [g_globalXMMData]"r"(g_globalXMMData) + ".att_syntax\n" : : [g_globalXMMData]"r"(g_globalXMMData) : "memory" ); #endif // _MSC_VER @@ -196,7 +196,7 @@ __forceinline void FreezeXMMRegs(int save) } #else - __asm__( + __asm__ volatile( ".intel_syntax noprefix\n" "movaps xmm0, [%[g_globalXMMData]+0x00]\n" "movaps xmm1, [%[g_globalXMMData]+0x10]\n" @@ -206,7 +206,7 @@ __forceinline void FreezeXMMRegs(int save) "movaps xmm5, [%[g_globalXMMData]+0x50]\n" "movaps xmm6, [%[g_globalXMMData]+0x60]\n" "movaps xmm7, [%[g_globalXMMData]+0x70]\n" - ".att_syntax\n" : : [g_globalXMMData]"r"(g_globalXMMData) + ".att_syntax\n" : : [g_globalXMMData]"r"(g_globalXMMData) : "memory" ); #endif // _MSC_VER diff --git a/pcsx2/IPU/yuv2rgb.cpp b/pcsx2/IPU/yuv2rgb.cpp index 368ceb562f..fc19877119 100644 --- a/pcsx2/IPU/yuv2rgb.cpp +++ b/pcsx2/IPU/yuv2rgb.cpp @@ -1,6 +1,6 @@ /* PCSX2 - PS2 Emulator for PCs * Copyright (C) 2002-2009 PCSX2 Dev Team - * + * * PCSX2 is free software: you can redistribute it and/or modify it under the terms * of the GNU Lesser General Public License as published by the Free Software Found- * ation, either version 3 of the License, or (at your option) any later version. @@ -33,7 +33,7 @@ struct SSE2_Tables u8 Y_bias[16]; // offset -48 u16 Y_mask[8]; // offset -32 u16 round_1bit[8]; // offset -16 - + u16 Y_coefficients[8]; // offset 0 u16 GCr_coefficients[8];// offset 16 u16 GCb_coefficients[8];// offset 32 @@ -55,16 +55,16 @@ enum BCb_COEFF = 0x40 }; -static volatile const __aligned16 SSE2_Tables sse2_tables = +static const __aligned16 SSE2_Tables sse2_tables = { {0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000}, // c_bias {16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16}, // y_bias {0xff00,0xff00,0xff00,0xff00,0xff00,0xff00,0xff00,0xff00}, // y_mask - + // Specifying round off instead of round down as everywhere else // implies that this is right {1,1,1,1,1,1,1,1}, // round_1bit - + SSE_COEFFICIENTS(0x95), // 1.1640625 [Y_coefficients] SSE_COEFFICIENTS(-0x68), // -0.8125 [GCr_coefficients] SSE_COEFFICIENTS(-0x32), // -0.390625 [GCb_coefficients] @@ -78,7 +78,7 @@ static __aligned16 u16 yuv2rgb_temp[3][8]; __releaseinline void yuv2rgb_sse2(void) { FreezeXMMRegs(1); - + #if defined(_MSC_VER) || defined(__INTEL_COMPILER) __asm { mov eax, 1 @@ -176,7 +176,7 @@ ihatemsvc: movhlps xmm3, xmm0 movhlps xmm4, xmm1 movhlps xmm5, xmm2 - punpcklbw xmm0, xmm3 // Red bytes, back in order + punpcklbw xmm0, xmm3 // Red bytes, back in order punpcklbw xmm1, xmm4 // Green "" punpcklbw xmm2, xmm5 // Blue "" movaps xmm3, xmm0 @@ -211,8 +211,10 @@ ihatemsvc: cmp esi, 64 jne tworows } + #elif defined(__GNUC__) - __asm__( + + __asm__ __volatile__ ( ".intel_syntax noprefix\n" "mov eax, 1\n" "xor esi, esi\n" @@ -220,8 +222,8 @@ ihatemsvc: // Use ecx and edx as base pointers, to allow for Mod/RM form on memOps. // This saves 2-3 bytes per instruction where these are used. :) - "mov ecx, offset %c[yuv2rgb_temp]\n" - "mov edx, offset %c[sse2_tables]+64\n" + //"mov ecx, offset %c[yuv2rgb_temp]\n" + //"mov edx, offset %c[sse2_tables]+64\n" ".align 16\n" "tworows:\n" @@ -237,29 +239,29 @@ ihatemsvc: // unfortunately I don't think this will matter despite being // technically potentially a little faster, but this is // equivalent to an add or sub - "pxor xmm2, xmmword ptr [edx+%c[C_BIAS]]\n" // xmm2 <-- 8 x (Cb - 128) << 8 - "pxor xmm0, xmmword ptr [edx+%c[C_BIAS]]\n" // xmm0 <-- 8 x (Cr - 128) << 8 + "pxor xmm2, xmmword ptr [%[sse2_tables]+%c[C_BIAS]]\n" // xmm2 <-- 8 x (Cb - 128) << 8 + "pxor xmm0, xmmword ptr [%[sse2_tables]+%c[C_BIAS]]\n" // xmm0 <-- 8 x (Cr - 128) << 8 "movaps xmm1, xmm0\n" "movaps xmm3, xmm2\n" - "pmulhw xmm1, xmmword ptr [edx+%c[GCr_COEFF]]\n" - "pmulhw xmm3, xmmword ptr [edx+%c[GCb_COEFF]]\n" - "pmulhw xmm0, xmmword ptr [edx+%c[RCr_COEFF]]\n" - "pmulhw xmm2, xmmword ptr [edx+%c[BCb_COEFF]]\n" + "pmulhw xmm1, xmmword ptr [%[sse2_tables]+%c[GCr_COEFF]]\n" + "pmulhw xmm3, xmmword ptr [%[sse2_tables]+%c[GCb_COEFF]]\n" + "pmulhw xmm0, xmmword ptr [%[sse2_tables]+%c[RCr_COEFF]]\n" + "pmulhw xmm2, xmmword ptr [%[sse2_tables]+%c[BCb_COEFF]]\n" "paddsw xmm1, xmm3\n" // store for the next line; looking at the code above // compared to the code below, I have to wonder whether // this was worth the hassle - "movaps xmmword ptr [ecx], xmm0\n" - "movaps xmmword ptr [ecx+16], xmm1\n" - "movaps xmmword ptr [ecx+32], xmm2\n" + "movaps xmmword ptr [%[yuv2rgb_temp]], xmm0\n" + "movaps xmmword ptr [%[yuv2rgb_temp]+16], xmm1\n" + "movaps xmmword ptr [%[yuv2rgb_temp]+32], xmm2\n" "jmp ihategcctoo\n" ".align 16\n" "onerow:\n" - "movaps xmm0, xmmword ptr [ecx]\n" - "movaps xmm1, xmmword ptr [ecx+16]\n" - "movaps xmm2, xmmword ptr [ecx+32]\n" + "movaps xmm0, xmmword ptr [%[yuv2rgb_temp]]\n" + "movaps xmm1, xmmword ptr [%[yuv2rgb_temp]+16]\n" + "movaps xmm2, xmmword ptr [%[yuv2rgb_temp]+32]\n" "ihategcctoo:\n" "movaps xmm3, xmm0\n" @@ -267,13 +269,13 @@ ihatemsvc: "movaps xmm5, xmm2\n" "movaps xmm6, xmmword ptr [mb8+edi]\n" - "psubusb xmm6, xmmword ptr [edx+%c[Y_BIAS]]\n" + "psubusb xmm6, xmmword ptr [%[sse2_tables]+%c[Y_BIAS]]\n" "movaps xmm7, xmm6\n" "psllw xmm6, 8\n" // xmm6 <- Y << 8 for pixels 0,2,4,6,8,10,12,14 - "pand xmm7, xmmword ptr [edx+%c[Y_MASK]]\n" // xmm7 <- Y << 8 for pixels 1,3,5,7,9,11,13,15 + "pand xmm7, xmmword ptr [%[sse2_tables]+%c[Y_MASK]]\n" // xmm7 <- Y << 8 for pixels 1,3,5,7,9,11,13,15 - "pmulhuw xmm6, xmmword ptr [edx+%c[Y_COEFF]]\n" - "pmulhuw xmm7, xmmword ptr [edx+%c[Y_COEFF]]\n" + "pmulhuw xmm6, xmmword ptr [%[sse2_tables]+%c[Y_COEFF]]\n" + "pmulhuw xmm7, xmmword ptr [%[sse2_tables]+%c[Y_COEFF]]\n" "paddsw xmm0, xmm6\n" "paddsw xmm3, xmm7\n" @@ -283,7 +285,7 @@ ihatemsvc: "paddsw xmm5, xmm7\n" // round - "movaps xmm6, xmmword ptr [edx+%c[ROUND_1BIT]]\n" + "movaps xmm6, xmmword ptr [%[sse2_tables]+%c[ROUND_1BIT]]\n" "paddw xmm0, xmm6\n" "paddw xmm1, xmm6\n" "paddw xmm2, xmm6\n" @@ -340,14 +342,14 @@ ihatemsvc: "jne tworows\n" ".att_syntax\n" : - :[C_BIAS]"i"(C_BIAS), [Y_BIAS]"i"(Y_BIAS), [Y_MASK]"i"(Y_MASK), - [ROUND_1BIT]"i"(ROUND_1BIT), [Y_COEFF]"i"(Y_COEFF), [GCr_COEFF]"i"(GCr_COEFF), + :[C_BIAS]"i"(C_BIAS), [Y_BIAS]"i"(Y_BIAS), [Y_MASK]"i"(Y_MASK), + [ROUND_1BIT]"i"(ROUND_1BIT), [Y_COEFF]"i"(Y_COEFF), [GCr_COEFF]"i"(GCr_COEFF), [GCb_COEFF]"i"(GCb_COEFF), [RCr_COEFF]"i"(RCr_COEFF), [BCb_COEFF]"i"(BCb_COEFF), - [yuv2rgb_temp]"i"(yuv2rgb_temp), [sse2_tables]"i"(&sse2_tables) - : + [yuv2rgb_temp]"r"(yuv2rgb_temp), [sse2_tables]"r"(&sse2_tables) + : "eax", "ebx", "esi", "edi", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory" ); #else -#error Unsupported compiler +# error Unsupported compiler #endif FreezeXMMRegs(0); diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 13f47f3000..eb14fe587b 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -646,6 +646,11 @@ static void VIFunpack(u32 *data, vifCode *v, unsigned int size, const unsigned i int writemask; u32 oldcycle = -1; + // yay evil .. let's just set some XMM registers in the middle of C code + // and "hope" they get preserved, in spite of the fact that x86-32 ABI specifies + // these as "clobberable" registers (so any printf or something could decide to + // clobber them, and has every right to... >_<) --air + #ifdef _MSC_VER if (VIFdmanum) { @@ -658,6 +663,10 @@ static void VIFunpack(u32 *data, vifCode *v, unsigned int size, const unsigned i __asm movaps XMM_COL, xmmword ptr [g_vifmask.Col0] } #else + // I'd add volatile to these, but what's the point? This code already breaks + // like 5000 coveted rules of binary interfacing regardless, and is only working by + // the miracles and graces of a profound deity (or maybe it doesn't -- linux port + // *does* have stability issues, especially in GCC 4.4). --air if (VIFdmanum) { __asm__(".intel_syntax noprefix\n" diff --git a/pcsx2/x86/iMisc.cpp b/pcsx2/x86/iMisc.cpp index b671784ac2..fd594571a2 100644 --- a/pcsx2/x86/iMisc.cpp +++ b/pcsx2/x86/iMisc.cpp @@ -1,6 +1,6 @@ /* PCSX2 - PS2 Emulator for PCs * Copyright (C) 2002-2009 PCSX2 Dev Team - * + * * PCSX2 is free software: you can redistribute it and/or modify it under the terms * of the GNU Lesser General Public License as published by the Free Software Found- * ation, either version 3 of the License, or (at your option) any later version. @@ -16,7 +16,7 @@ #include "PrecompiledHeader.h" -u32 g_sseMXCSR = DEFAULT_sseMXCSR; +u32 g_sseMXCSR = DEFAULT_sseMXCSR; u32 g_sseVUMXCSR = DEFAULT_sseVUMXCSR; ////////////////////////////////////////////////////////////////////////////////////////// @@ -46,7 +46,7 @@ void SetCPUState(u32 sseMXCSR, u32 sseVUMXCSR) #ifdef _MSC_VER __asm ldmxcsr g_sseMXCSR; // set the new sse control #else - __asm__("ldmxcsr %[g_sseMXCSR]" : : [g_sseMXCSR]"m"(g_sseMXCSR) ); + __asm__ __volatile__("ldmxcsr %[g_sseMXCSR]" : : [g_sseMXCSR]"m"(g_sseMXCSR) ); #endif //g_sseVUMXCSR = g_sseMXCSR|0x6000; } diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index ae3ea7c94d..e6f84071a1 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -1,6 +1,6 @@ /* PCSX2 - PS2 Emulator for PCs * Copyright (C) 2002-2009 PCSX2 Dev Team - * + * * PCSX2 is free software: you can redistribute it and/or modify it under the terms * of the GNU Lesser General Public License as published by the Free Software Found- * ation, either version 3 of the License, or (at your option) any later version. @@ -12,7 +12,7 @@ * You should have received a copy of the GNU General Public License along with PCSX2. * If not, see . */ - + // recompiler reworked to add dynamic linking Jan06 // and added reg caching, const propagation, block analysis Jun06 // zerofrog(@gmail.com) @@ -657,22 +657,29 @@ static __forceinline s32 recExecuteBlock( s32 eeCycles ) pop ebx } #else - __asm__ + __asm__ __volatile__ ( + // We should be able to rely on GAS syntax (the register clobber list) as a + // replacement for manual push/pop of unpreserved registers. + // + // EBP note: As I feared, EBP is "required" for C++ excepion handling in Linux, and trying + // to issue a clobber specifier for it causes an error. We really need to find a way to + // disable EBP regalloc in iCore. --air + ".intel_syntax noprefix\n" - "push ebx\n" - "push esi\n" - "push edi\n" + //"push ebx\n" + //"push esi\n" + //"push edi\n" "push ebp\n" "call iopDispatcherReg\n" "pop ebp\n" - "pop edi\n" - "pop esi\n" - "pop ebx\n" + //"pop edi\n" + //"pop esi\n" + //"pop ebx\n" ".att_syntax\n" - ); + : : : "eax", "ebx", "ecx", "edx", "esi", "edi", "memory" ); #endif return psxBreak + psxCycleEE; @@ -759,7 +766,7 @@ void psxSetBranchReg(u32 reg) xOR( eax, eax ); #endif } - + #ifdef PCSX2_DEBUG xForwardJNZ8 skipAssert; xWrite8( 0xcc ); @@ -836,7 +843,7 @@ static void checkcodefn() #ifdef _MSC_VER __asm mov pctemp, eax; #else - __asm__("movl %%eax, %[pctemp]" : : [pctemp]"m"(pctemp) ); + __asm__ __volatile__("movl %%eax, %[pctemp]" : [pctemp]"m="(pctemp) ); #endif Console.WriteLn("iop code changed! %x", pctemp); } diff --git a/pcsx2/x86/iVif.cpp b/pcsx2/x86/iVif.cpp index 2b02d67577..e4e2f736c0 100644 --- a/pcsx2/x86/iVif.cpp +++ b/pcsx2/x86/iVif.cpp @@ -104,7 +104,7 @@ static void __forceinline UseOldMaskCode(u32* &vif1masks, u32 &mask) u8* p0 = (u8*)&s_maskarr[mask&15][0]; u8* p1 = (u8*)&s_maskarr[(mask>>4)&15][0]; - __asm__(".intel_syntax noprefix\n" + __asm__ __volatile__(".intel_syntax noprefix\n" "movaps xmm0, [%0]\n" "movaps xmm1, [%1]\n" "movaps xmm2, xmm0\n" @@ -121,6 +121,6 @@ static void __forceinline UseOldMaskCode(u32* &vif1masks, u32 &mask) "movq [%2+40], xmm3\n" "movhps [%2+48], xmm2\n" "movhps [%2+56], xmm3\n" - ".att_syntax\n" : : "r"(p0), "r"(p1), "r"(vif1masks) ); + ".att_syntax\n" : : "r"(p0), "r"(p1), "r"(vif1masks) : "memory" ); } #endif diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 32cc19af39..c7292642e3 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -1,6 +1,6 @@ /* PCSX2 - PS2 Emulator for PCs * Copyright (C) 2002-2009 PCSX2 Dev Team - * + * * PCSX2 is free software: you can redistribute it and/or modify it under the terms * of the GNU Lesser General Public License as published by the Free Software Found- * ation, either version 3 of the License, or (at your option) any later version. @@ -12,7 +12,7 @@ * You should have received a copy of the GNU General Public License along with PCSX2. * If not, see . */ - + #include "PrecompiledHeader.h" #include "Common.h" @@ -286,7 +286,7 @@ u32* recGetImm64(u32 hi, u32 lo) { Console.Status( "EErec const buffer filled; Resetting..." ); throw Exception::ForceDispatcherReg(); - + /*for (u32 *p = recConstBuf; p < recConstBuf + RECCONSTBUF_SIZE; p += 2) { if (p[0] == lo && p[1] == hi) { @@ -601,22 +601,29 @@ static void recExecute() #else // _MSC_VER - __asm__ + __asm__ __volatile__ ( + // We should be able to rely on GAS syntax (the register clobber list) as a + // replacement for manual push/pop of unpreserved registers. + + // EBP note: As I feared, EBP is "required" for C++ excepion handling in Linux, and trying + // to issue a clobber specifier for it causes an error. We really need to find a way to + // disable EBP regalloc in iCore. --air + ".intel_syntax noprefix\n" - "push ebx\n" - "push esi\n" - "push edi\n" - "push ebp\n" + //"push ebx\n" + //"push esi\n" + //"push edi\n" + //"push ebp\n" "call DispatcherReg\n" - "pop ebp\n" - "pop edi\n" - "pop esi\n" - "pop ebx\n" + //"pop ebp\n" + //"pop edi\n" + //"pop esi\n" + //"pop ebx\n" ".att_syntax\n" - ); + : : : "eax", "ebx", "ecx", "edx", "esi", "edi", "memory" ); #endif } catch( Exception::ForceDispatcherReg& ) @@ -679,21 +686,11 @@ void recClear(u32 addr, u32 size) BASEBLOCKEX* pexblock; BASEBLOCK* pblock; - //why the hell? -#if 1 // necessary since recompiler doesn't call femms/emms -#ifdef __INTEL_COMPILER - __asm__("emms"); +#ifdef _MSC_VER + asm emms; #else - #ifdef _MSC_VER - if (x86caps.has3DNOWInstructionExtensions) __asm femms; - else __asm emms; - #else - if( x86caps.has3DNOWInstructionExtensions )__asm__("femms"); - else - __asm__("emms"); - #endif -#endif + __asm__ __volatile__("emms"); #endif if ((addr) >= maxrecmem || !(recLUT[(addr) >> 16] + (addr & ~0xFFFFUL))) diff --git a/plugins/spu2-x/src/3rdparty/liba52/bitstream.h b/plugins/spu2-x/src/3rdparty/liba52/bitstream.h index 7d7ea7679f..b4d1add513 100644 --- a/plugins/spu2-x/src/3rdparty/liba52/bitstream.h +++ b/plugins/spu2-x/src/3rdparty/liba52/bitstream.h @@ -53,7 +53,7 @@ int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits); static inline uint32_t bitstream_get (a52_state_t * state, uint32_t num_bits) { uint32_t result; - + if (num_bits < state->bits_left) { result = (state->current_word << (32 - state->bits_left)) >> (32 - num_bits); state->bits_left -= num_bits; @@ -66,7 +66,7 @@ static inline uint32_t bitstream_get (a52_state_t * state, uint32_t num_bits) static inline int32_t bitstream_get_2 (a52_state_t * state, uint32_t num_bits) { int32_t result; - + if (num_bits < state->bits_left) { result = (((int32_t)state->current_word) << (32 - state->bits_left)) >> (32 - num_bits); state->bits_left -= num_bits; diff --git a/plugins/spu2-x/src/Mixer.cpp b/plugins/spu2-x/src/Mixer.cpp index 135289502e..244fa2a47a 100644 --- a/plugins/spu2-x/src/Mixer.cpp +++ b/plugins/spu2-x/src/Mixer.cpp @@ -299,7 +299,7 @@ static s32 __forceinline GetNoiseValues() #else __asm__ ( ".intel_syntax\n" - "MOV %%eax,%0\n" + "MOV %%eax,%1\n" "ROR %%eax,5\n" "XOR %%eax,0x9a\n" "MOV %%ebx,%%eax\n" @@ -308,7 +308,7 @@ static s32 __forceinline GetNoiseValues() "XOR %%eax,%%ebx\n" "ROR %%eax,3\n" "MOV %0,%%eax\n" - ".att_syntax\n" : :"r"(Seed)); + ".att_syntax\n" : "m="(Seed) : "m"(Seed)); #endif return retval; } @@ -651,7 +651,7 @@ __forceinline void Mix() { // SPDIF is on Core 0: (PlayMode&4) ? StereoOut32::Empty : ApplyVolume( Cores[0].ReadInput(), Cores[0].InpVol ), - + // CDDA is on Core 1: (PlayMode&8) ? StereoOut32::Empty : ApplyVolume( Cores[1].ReadInput(), Cores[1].InpVol ) }; diff --git a/plugins/zerospu2/voices.cpp b/plugins/zerospu2/voices.cpp index bdf8e5e2f2..b2f3776f60 100644 --- a/plugins/zerospu2/voices.cpp +++ b/plugins/zerospu2/voices.cpp @@ -38,10 +38,10 @@ void VOICE_PROCESSED::SetVolume(s32 iProcessRight) if (vol&0x8000) // sweep not working { short sInc=1; // -> sweep up? - + if (vol&0x2000) sInc=-1; // -> or down? if (vol&0x1000) vol^=0xffff; // -> mmm... phase inverted? have to investigate this - + vol=((vol&0x7f)+1)/2; // -> sweep: 0..127 -> 0..64 vol+=vol/(2*sInc); // -> HACK: we don't sweep right now, so we just raise/lower the volume by the half! vol*=128; @@ -64,14 +64,14 @@ void VOICE_PROCESSED::StartSound() ADSRX.lVolume=1; // and init some adsr vars ADSRX.State=0; ADSRX.EnvelopeVol=0; - + if (bReverb && GetCtrl()->reverb) { // setup the reverb effects } pCurr=pStart; // set sample start - + s_1=0; // init mixing vars s_2=0; iSBPos=28; @@ -90,9 +90,9 @@ void VOICE_PROCESSED::VoiceChangeFrequency() { iUsedFreq=iActFreq; // -> take it and calc steps sinc=(u32)pvoice->pitch<<4; - + if (!sinc) sinc=1; - + // -> freq change in simle imterpolation mode: set flag SB[32]=1; } @@ -116,7 +116,7 @@ void VOICE_PROCESSED::InterpolateUp() else if (id2<(id1<<1)) SB[28]=(id1*sinc)/0x10000L; else - SB[28]=(id1*sinc)/0x20000L; + SB[28]=(id1*sinc)/0x20000L; } else // curr delta negative { @@ -128,7 +128,7 @@ void VOICE_PROCESSED::InterpolateUp() else if (id2>(id1<<1)) SB[28]=(id1*sinc)/0x10000L; else - SB[28]=(id1*sinc)/0x20000L; + SB[28]=(id1*sinc)/0x20000L; } } else if (SB[32]==2) // flag 1: calc step and set flag... and don't change the value in this pass @@ -181,15 +181,15 @@ void VOICE_PROCESSED::FModChangeFrequency(s32 ns) iFMod[ns]=0; } -static void __forceinline GetNoiseValues(s32& VD) +static void __forceinline GetNoiseValues(s32& VD) { static s32 Seed = 0x41595321; - if(Seed&0x100) + if(Seed&0x100) VD = (s32)((Seed&0xff)<<8); - else if (!(Seed&0xffff)) + else if (!(Seed&0xffff)) VD = (s32)0x8000; - else + else VD = (s32)0x7fff; #ifdef _WIN32 @@ -207,7 +207,7 @@ static void __forceinline GetNoiseValues(s32& VD) #else __asm__ ( ".intel_syntax\n" - "MOV %%eax,%0\n" + "MOV %%eax,%1\n" "ROR %%eax,5\n" "XOR %%eax,0x9a\n" "MOV %%ebx,%%eax\n" @@ -216,7 +216,7 @@ static void __forceinline GetNoiseValues(s32& VD) "XOR %%eax,%%ebx\n" "ROR %%eax,3\n" "MOV %0,%%eax\n" - ".att_syntax\n" : :"r"(Seed)); + ".att_syntax\n" : "r="(Seed) :"r"(Seed)); #endif } @@ -239,13 +239,13 @@ int VOICE_PROCESSED::iGetNoiseVal() // mmm... depending on the noise freq we allow bigger/smaller changes to the previous val fa=iOldNoise + ((fa - iOldNoise) / ((0x001f - (GetCtrl()->noiseFreq)) + 1)); - + clamp16(fa); iOldNoise=fa; SB[29] = fa; // -> store noise val in "current sample" slot return fa; -} +} void VOICE_PROCESSED::StoreInterpolationVal(s32 fa) { @@ -257,10 +257,10 @@ void VOICE_PROCESSED::StoreInterpolationVal(s32 fa) fa=0; // muted? else // else adjust { - clamp16(fa); + clamp16(fa); } - SB[28] = 0; + SB[28] = 0; SB[29] = SB[30]; // -> helpers for simple linear interpolation: delay real val for two slots, and calc the two deltas, for a 'look at the future behaviour' SB[30] = SB[31]; SB[31] = fa; @@ -271,18 +271,18 @@ void VOICE_PROCESSED::StoreInterpolationVal(s32 fa) s32 VOICE_PROCESSED::iGetInterpolationVal() { s32 fa; - + if (bFMod==2) return SB[29]; if (sinc<0x10000L) // -> upsampling? InterpolateUp(); // --> interpolate up - else + else InterpolateDown(); // --> else down - + fa=SB[29]; return fa; } void VOICE_PROCESSED::Stop() { -} \ No newline at end of file +}