From 6412271470eb386e56b081bbc03887e42d35e3a7 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Mon, 5 Oct 2009 02:15:49 +0000 Subject: [PATCH] Renamed PCSX2_ALIGNED to __aligned and removed the need for excess parenthesis and oddball qualifiers. Left the old macros in Pcsx2defs.h for now, just in case. Redid some of the storage organization of microVU and iFPU consts and temporaries while I was at it, using structs instead of naked vars -- should improve cpu cache behavior a wee bit. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1959 96395faa-99c1-11dd-bbfe-3dabce05a288 --- common/include/Pcsx2Defs.h | 18 +- common/include/x86emitter/tools.h | 2 +- common/include/x86emitter/x86types.h | 2 +- common/src/Utilities/Linux/LnxMisc.cpp | 4 +- common/src/Utilities/x86/MemcpyFast.cpp | 4 +- common/src/x86emitter/cpudetect.cpp | 2 +- common/src/x86emitter/tools.cpp | 4 +- common/src/x86emitter/x86emitter.cpp | 3 +- pcsx2/FiFo.cpp | 2 +- pcsx2/GS.cpp | 2 +- pcsx2/GS.h | 8 +- pcsx2/IPU/IPU.cpp | 16 +- pcsx2/IPU/IPU.h | 4 +- pcsx2/IPU/yuv2rgb.cpp | 4 +- pcsx2/Linux/pcsx2.cbp | 939 +++++++++++---------- pcsx2/MTGS.cpp | 4 +- pcsx2/Memory.cpp | 2 +- pcsx2/R3000A.cpp | 2 +- pcsx2/R3000A.h | 2 +- pcsx2/R5900.cpp | 6 +- pcsx2/R5900.h | 6 +- pcsx2/R5900OpcodeImpl.cpp | 2 +- pcsx2/Sif.cpp | 2 +- pcsx2/VU.h | 2 +- pcsx2/VU0.cpp | 2 +- pcsx2/VUops.cpp | 2 +- pcsx2/Vif.cpp | 5 +- pcsx2/Vif.h | 26 +- pcsx2/VifDma.cpp | 67 +- pcsx2/gui/App.h | 2 - pcsx2/gui/AppCoreThread.cpp | 182 ++++ pcsx2/gui/AppMain.cpp | 167 +--- pcsx2/ps2/GIFpath.cpp | 2 +- pcsx2/vtlb.cpp | 2 +- pcsx2/vtlb.h | 2 +- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 4 + pcsx2/x86/iCore.cpp | 1 - pcsx2/x86/iFPU.cpp | 10 +- pcsx2/x86/iFPUd.cpp | 118 +-- pcsx2/x86/iMMI.cpp | 7 +- pcsx2/x86/iR5900.h | 3 +- pcsx2/x86/iVU1micro.cpp | 8 +- pcsx2/x86/iVif.cpp | 7 +- pcsx2/x86/ix86-32/iCore-32.cpp | 2 +- pcsx2/x86/ix86-32/iR5900-32.cpp | 6 +- pcsx2/x86/ix86-32/iR5900LoadStore.cpp | 4 +- pcsx2/x86/ix86-32/iR5900Shift.cpp | 2 +- pcsx2/x86/ix86-32/recVTLB.cpp | 2 +- pcsx2/x86/microVU.cpp | 35 +- pcsx2/x86/microVU.h | 10 +- pcsx2/x86/microVU_Lower.inl | 70 +- pcsx2/x86/microVU_Misc.h | 69 +- pcsx2/x86/microVU_Misc.inl | 52 +- pcsx2/x86/microVU_Upper.inl | 28 +- pcsx2/x86/sVU_Lower.cpp | 7 +- pcsx2/x86/sVU_Micro.cpp | 36 +- pcsx2/x86/sVU_Micro.h | 12 +- pcsx2/x86/sVU_Upper.cpp | 54 +- plugins/zerogs/dx/Mem.cpp | 2 +- plugins/zerogs/dx/x86.cpp | 6 +- plugins/zerospu2/zerospu2.cpp | 4 +- 61 files changed, 1067 insertions(+), 993 deletions(-) create mode 100644 pcsx2/gui/AppCoreThread.cpp diff --git a/common/include/Pcsx2Defs.h b/common/include/Pcsx2Defs.h index 323d76f3d8..b4da3fd487 100644 --- a/common/include/Pcsx2Defs.h +++ b/common/include/Pcsx2Defs.h @@ -162,21 +162,27 @@ #endif ////////////////////////////////////////////////////////////////////////////////////////// -// PCSX2_ALIGNED16 - helper macros for aligning variables in MSVC and GCC. +// __aligned / __aligned16 / __pagealigned // // GCC Warning! The GCC linker (LD) typically fails to assure alignment of class members. // If you want alignment to be assured, the variable must either be a member of a struct // or a static global. // +// __pagealigned is equivalent to __aligned(0x1000), and is used to align a dynarec code +// buffer to a page boundary (allows the use of execution-enabled mprotect). +// // General Performance Warning: Any function that specifies alignment on a local (stack) // variable will have to align the stack frame on enter, and restore it on exit (adds // overhead). Furthermore, compilers cannot inline functions that have aligned local // vars. So use local var alignment with much caution. // -// Note: building the 'extern' into PCSX2_ALIGNED16 fixes Visual Assist X's intellisense. -// #ifdef _MSC_VER +# define __aligned(alig) __declspec(align(alig)) +# define __aligned16 __declspec(align(16)) +# define __pagealigned __declspec(align(0x1000)) + + // Deprecated; use __align instead. # define PCSX2_ALIGNED(alig,x) __declspec(align(alig)) x # define PCSX2_ALIGNED_EXTERN(alig,x) extern __declspec(align(alig)) x # define PCSX2_ALIGNED16(x) __declspec(align(16)) x @@ -220,8 +226,10 @@ This theoretically unoptimizes. Not having much luck so far. */ -// fixme - is this needed for recent versions of GCC? Or can we just use the first two macros -// instead for both definitions (implementations) and declarations (includes)? -- air +# define __aligned(alig) __attribute__((aligned(alig))) +# define __aligned16 __attribute__((aligned(16))) +# define __pagealigned __attribute__((aligned(0x1000))) + // Deprecated; use __align instead. # define PCSX2_ALIGNED(alig,x) x __attribute((aligned(alig))) # define PCSX2_ALIGNED16(x) x __attribute((aligned(16))) # define PCSX2_ALIGNED_EXTERN(alig,x) extern x __attribute((aligned(alig))) diff --git a/common/include/x86emitter/tools.h b/common/include/x86emitter/tools.h index 42972df534..7240382819 100644 --- a/common/include/x86emitter/tools.h +++ b/common/include/x86emitter/tools.h @@ -93,7 +93,7 @@ struct x86CPU_INFO ////////////////////////////////////////////////////////////////////////////////////////// -PCSX2_ALIGNED16_EXTERN( x86CPU_INFO x86caps ); +extern __aligned16 x86CPU_INFO x86caps; extern u8 g_globalMMXSaved, g_globalXMMSaved; extern bool g_EEFreezeRegs; diff --git a/common/include/x86emitter/x86types.h b/common/include/x86emitter/x86types.h index 379d34f645..9484c5459a 100644 --- a/common/include/x86emitter/x86types.h +++ b/common/include/x86emitter/x86types.h @@ -408,7 +408,7 @@ __forceinline void xWrite( T val ) Displacement += imm; return *this; } - +__noinline __forceinline ModSibStrict operator+( const s32 imm ) const { return ModSibStrict( *this ).Add( imm ); } __forceinline ModSibStrict operator-( const s32 imm ) const { return ModSibStrict( *this ).Add( -imm ); } diff --git a/common/src/Utilities/Linux/LnxMisc.cpp b/common/src/Utilities/Linux/LnxMisc.cpp index 60cb8f6882..e2f7c576e8 100644 --- a/common/src/Utilities/Linux/LnxMisc.cpp +++ b/common/src/Utilities/Linux/LnxMisc.cpp @@ -19,8 +19,8 @@ #include #include -extern "C" PCSX2_ALIGNED16( u8 _xmm_backup[16*2] ); -extern "C" PCSX2_ALIGNED16( u8 _mmx_backup[8*4] ); +extern "C" __aligned16 u8 _xmm_backup[16*2]; +extern "C" __aligned16 u8 _mmx_backup[8*4]; u8 _xmm_backup[16*2]; u8 _mmx_backup[8*4]; diff --git a/common/src/Utilities/x86/MemcpyFast.cpp b/common/src/Utilities/x86/MemcpyFast.cpp index 078bab8ab7..949d735caa 100644 --- a/common/src/Utilities/x86/MemcpyFast.cpp +++ b/common/src/Utilities/x86/MemcpyFast.cpp @@ -82,8 +82,8 @@ extern u8 g_globalMMXSaved; #endif -PCSX2_ALIGNED16( static u8 _xmm_backup[16*2] ); -PCSX2_ALIGNED16( static u8 _mmx_backup[8*4] ); +static __aligned16 u8 _xmm_backup[16*2]; +static __aligned16 u8 _mmx_backup[8*4]; static __declspec(naked) void __fastcall _memcpy_raz_usrc(void *dest, const void *src, size_t bytes) { diff --git a/common/src/x86emitter/cpudetect.cpp b/common/src/x86emitter/cpudetect.cpp index cb2e528c6b..4fdc612383 100644 --- a/common/src/x86emitter/cpudetect.cpp +++ b/common/src/x86emitter/cpudetect.cpp @@ -22,7 +22,7 @@ using namespace x86Emitter; -PCSX2_ALIGNED16( x86CPU_INFO x86caps ); +__aligned16 x86CPU_INFO x86caps; static s32 iCpuId( u32 cmd, u32 *regs ) { diff --git a/common/src/x86emitter/tools.cpp b/common/src/x86emitter/tools.cpp index b7d119ebb5..887f6bb30f 100644 --- a/common/src/x86emitter/tools.cpp +++ b/common/src/x86emitter/tools.cpp @@ -23,8 +23,8 @@ u8 g_globalMMXSaved = 0; u8 g_globalXMMSaved = 0; -PCSX2_ALIGNED16( u64 g_globalMMXData[8] ); -PCSX2_ALIGNED16( u64 g_globalXMMData[2*iREGCNT_XMM] ); +__aligned16 u64 g_globalMMXData[8]; +__aligned16 u64 g_globalXMMData[2*iREGCNT_XMM]; ///////////////////////////////////////////////////////////////////// // MMX Register Freezing diff --git a/common/src/x86emitter/x86emitter.cpp b/common/src/x86emitter/x86emitter.cpp index 4e5ea975db..e316bce630 100644 --- a/common/src/x86emitter/x86emitter.cpp +++ b/common/src/x86emitter/x86emitter.cpp @@ -32,8 +32,7 @@ #include "internal.h" // defined in tools.cpp -PCSX2_ALIGNED16_EXTERN( u64 g_globalXMMData[2*iREGCNT_XMM] ); - +extern __aligned16 u64 g_globalXMMData[2*iREGCNT_XMM]; // ------------------------------------------------------------------------ // Notes on Thread Local Storage: diff --git a/pcsx2/FiFo.cpp b/pcsx2/FiFo.cpp index 3ebc2d18f7..38ab311757 100644 --- a/pcsx2/FiFo.cpp +++ b/pcsx2/FiFo.cpp @@ -153,7 +153,7 @@ void __fastcall WriteFIFO_page_5(u32 mem, const mem128_t *value) } // Dummy GIF-TAG Packet to Guarantee Count = 1 -PCSX2_ALIGNED16(u32 nloop0_packet[4]) = {0x8000, 0, 0, 0}; +__aligned16 u32 nloop0_packet[4] = {0x8000, 0, 0, 0}; void __fastcall WriteFIFO_page_6(u32 mem, const mem128_t *value) { diff --git a/pcsx2/GS.cpp b/pcsx2/GS.cpp index 1d630f0e7e..f87286d056 100644 --- a/pcsx2/GS.cpp +++ b/pcsx2/GS.cpp @@ -29,7 +29,7 @@ using namespace R5900; u32 CSRw; -PCSX2_ALIGNED16( u8 g_RealGSMem[0x2000] ); +__aligned16 u8 g_RealGSMem[0x2000]; extern int m_nCounters[]; // FrameSkipping Stuff diff --git a/pcsx2/GS.h b/pcsx2/GS.h index df80020088..83b31f613f 100644 --- a/pcsx2/GS.h +++ b/pcsx2/GS.h @@ -18,7 +18,7 @@ #include "Common.h" #include "System/SysThreads.h" -PCSX2_ALIGNED16( extern u8 g_RealGSMem[Ps2MemSize::GSregs] ); +extern __aligned16 u8 g_RealGSMem[Ps2MemSize::GSregs]; #define PS2MEM_GS g_RealGSMem #define PS2GS_BASE(mem) (g_RealGSMem+(mem&0x13ff)) @@ -170,7 +170,7 @@ protected: void ExecuteTask(); }; -PCSX2_ALIGNED16_EXTERN( mtgsThreadObject mtgsThread ); +extern __aligned16 mtgsThreadObject mtgsThread; void mtgsWaitGS(); @@ -195,8 +195,8 @@ extern void _gs_ChangeTimings( u32 framerate, u32 iTicks ); // used for resetting GIF fifo -void gsGIFReset(); -void gsCSRwrite(u32 value); +extern void gsGIFReset(); +extern void gsCSRwrite(u32 value); extern void gsWrite8(u32 mem, u8 value); extern void gsWrite16(u32 mem, u16 value); diff --git a/pcsx2/IPU/IPU.cpp b/pcsx2/IPU/IPU.cpp index ca443c399f..87231e73a8 100644 --- a/pcsx2/IPU/IPU.cpp +++ b/pcsx2/IPU/IPU.cpp @@ -57,8 +57,8 @@ int ipuCurCmd = 0xffffffff; int FOreadpos = 0, FOwritepos = 0; static int FIreadpos = 0, FIwritepos = 0; -PCSX2_ALIGNED16(u32 fifo_input[32]); -PCSX2_ALIGNED16(u32 fifo_output[32]); +__aligned16 u32 fifo_input[32]; +__aligned16 u32 fifo_output[32]; void ReorderBitstream(); @@ -81,13 +81,13 @@ char convert_data_buffer[0x1C]; // Quantization matrix static u8 niq[64]; //non-intraquant matrix static u8 iq[64]; //intraquant matrix -u16 vqclut[16]; //clut conversion table +u16 vqclut[16]; //clut conversion table static u8 s_thresh[2]; //thresholds for color conversions int coded_block_pattern = 0; -PCSX2_ALIGNED16(macroblock_8 mb8); -PCSX2_ALIGNED16(macroblock_16 mb16); -PCSX2_ALIGNED16(macroblock_rgb32 rgb32); -PCSX2_ALIGNED16(macroblock_rgb16 rgb16); +__aligned16 macroblock_8 mb8; +__aligned16 macroblock_16 mb16; +__aligned16 macroblock_rgb32 rgb32; +__aligned16 macroblock_rgb16 rgb16; u8 indx4[16*16/2]; bool mpeg2_inited = FALSE; //mpeg2_idct_init() must be called only once @@ -101,7 +101,7 @@ extern "C" extern u8 mpeg2_scan_alt[64]; } -PCSX2_ALIGNED16(u8 _readbits[80]); //local buffer (ring buffer) +__aligned16 u8 _readbits[80]; //local buffer (ring buffer) u8* readbits = _readbits; // always can decrement by one 1qw //#define SATURATE_4BITS(val) ((val)>15 ? 15 : (val)) diff --git a/pcsx2/IPU/IPU.h b/pcsx2/IPU/IPU.h index 10fb0cf1fa..9a47e5de9c 100644 --- a/pcsx2/IPU/IPU.h +++ b/pcsx2/IPU/IPU.h @@ -202,8 +202,8 @@ extern void FIFOfrom_readsingle(void *value); // The IPU can only do one task at once and never uses other buffers so these // should be made available to functions in other modules to save registers. -PCSX2_ALIGNED16(extern macroblock_rgb32 rgb32); -PCSX2_ALIGNED16(extern macroblock_8 mb8); +extern __aligned16 macroblock_rgb32 rgb32; +extern __aligned16 macroblock_8 mb8; extern int ipuInit(); extern void ipuReset(); diff --git a/pcsx2/IPU/yuv2rgb.cpp b/pcsx2/IPU/yuv2rgb.cpp index 8ed1d92707..368ceb562f 100644 --- a/pcsx2/IPU/yuv2rgb.cpp +++ b/pcsx2/IPU/yuv2rgb.cpp @@ -55,7 +55,7 @@ enum BCb_COEFF = 0x40 }; -static volatile PCSX2_ALIGNED16(const SSE2_Tables sse2_tables) = +static volatile const __aligned16 SSE2_Tables sse2_tables = { {0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000}, // c_bias {16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16}, // y_bias @@ -72,7 +72,7 @@ static volatile PCSX2_ALIGNED16(const SSE2_Tables sse2_tables) = SSE_COEFFICIENTS(0x102), // 2.015625 [BCb_coefficients] }; -static PCSX2_ALIGNED16(u16 yuv2rgb_temp[3][8]); +static __aligned16 u16 yuv2rgb_temp[3][8]; // This could potentially be improved for SSE4 __releaseinline void yuv2rgb_sse2(void) diff --git a/pcsx2/Linux/pcsx2.cbp b/pcsx2/Linux/pcsx2.cbp index 2c0004f51d..118067e048 100644 --- a/pcsx2/Linux/pcsx2.cbp +++ b/pcsx2/Linux/pcsx2.cbp @@ -1,469 +1,470 @@ - - - - - - + + + + + + diff --git a/pcsx2/MTGS.cpp b/pcsx2/MTGS.cpp index acfcbe5de9..7d941fc560 100644 --- a/pcsx2/MTGS.cpp +++ b/pcsx2/MTGS.cpp @@ -62,7 +62,7 @@ static const uint RingBufferSize = 1<> 12] ); +static __aligned16 vtlb_PageProtectionInfo m_PageProtectInfo[Ps2MemSize::Base >> 12]; // returns: diff --git a/pcsx2/R3000A.cpp b/pcsx2/R3000A.cpp index c13184ce6f..583adcdb06 100644 --- a/pcsx2/R3000A.cpp +++ b/pcsx2/R3000A.cpp @@ -47,7 +47,7 @@ bool iopBranchAction = false; bool iopEventTestIsActive = false; -PCSX2_ALIGNED16(psxRegisters psxRegs); +__aligned16 psxRegisters psxRegs; void psxReset() { diff --git a/pcsx2/R3000A.h b/pcsx2/R3000A.h index 46e5539058..d61ea275ed 100644 --- a/pcsx2/R3000A.h +++ b/pcsx2/R3000A.h @@ -115,7 +115,7 @@ struct psxRegisters { //u32 _smflag[32]; }; -PCSX2_ALIGNED16_EXTERN(psxRegisters psxRegs); +extern __aligned16 psxRegisters psxRegs; extern u32 g_psxNextBranchCycle; extern s32 psxBreak; // used when the IOP execution is broken and control returned to the EE diff --git a/pcsx2/R5900.cpp b/pcsx2/R5900.cpp index 2e0bfef4ad..2631b53c45 100644 --- a/pcsx2/R5900.cpp +++ b/pcsx2/R5900.cpp @@ -41,9 +41,9 @@ using namespace R5900; // for R5900 disasm tools s32 EEsCycle; // used to sync the IOP to the EE u32 EEoCycle; -PCSX2_ALIGNED16(cpuRegisters cpuRegs); -PCSX2_ALIGNED16(fpuRegisters fpuRegs); -PCSX2_ALIGNED16(tlbs tlb[48]); +__aligned16 cpuRegisters cpuRegs; +__aligned16 fpuRegisters fpuRegs; +__aligned16 tlbs tlb[48]; R5900cpu *Cpu = NULL; bool g_ExecBiosHack = false; // set if the BIOS has already been executed diff --git a/pcsx2/R5900.h b/pcsx2/R5900.h index 15ce461a9d..45dcf10835 100644 --- a/pcsx2/R5900.h +++ b/pcsx2/R5900.h @@ -247,9 +247,9 @@ struct tlbs #endif -PCSX2_ALIGNED16_EXTERN(cpuRegisters cpuRegs); -PCSX2_ALIGNED16_EXTERN(fpuRegisters fpuRegs); -PCSX2_ALIGNED16_EXTERN(tlbs tlb[48]); +extern __aligned16 cpuRegisters cpuRegs; +extern __aligned16 fpuRegisters fpuRegs; +extern __aligned16 tlbs tlb[48]; extern u32 g_nextBranchCycle; extern bool eeEventTestIsActive; diff --git a/pcsx2/R5900OpcodeImpl.cpp b/pcsx2/R5900OpcodeImpl.cpp index 68bc17ba7e..2b429de33a 100644 --- a/pcsx2/R5900OpcodeImpl.cpp +++ b/pcsx2/R5900OpcodeImpl.cpp @@ -511,7 +511,7 @@ void LWR() // dummy variable used as a destination address for writes to the zero register, so // that the zero register always stays zero. -PCSX2_ALIGNED16( static GPR_reg m_dummy_gpr_zero ); +static __aligned16 GPR_reg m_dummy_gpr_zero; // Returns the x86 address of the requested GPR, which is safe for writing. (includes // special handling for returning a dummy var for GPR0(zero), so that it's value is diff --git a/pcsx2/Sif.cpp b/pcsx2/Sif.cpp index b95ac85105..bad1077db6 100644 --- a/pcsx2/Sif.cpp +++ b/pcsx2/Sif.cpp @@ -232,7 +232,7 @@ __forceinline void SIF0Dma() } else if (sif0.fifoSize >= 4) // Read a tag { - static PCSX2_ALIGNED16(u32 tag[4]); + static __aligned16 u32 tag[4]; SIF0read((u32*)&tag[0], 4); // Tag SIF_LOG(" EE SIF read tag: %x %x %x %x", tag[0], tag[1], tag[2], tag[3]); diff --git a/pcsx2/VU.h b/pcsx2/VU.h index 23e9e23583..31b08842d1 100644 --- a/pcsx2/VU.h +++ b/pcsx2/VU.h @@ -181,7 +181,7 @@ struct _VURegsNum { }; extern VURegs* g_pVU1; -PCSX2_ALIGNED16_EXTERN(VURegs VU0); +extern __aligned16 VURegs VU0; #define VU1 (*g_pVU1) diff --git a/pcsx2/VU0.cpp b/pcsx2/VU0.cpp index 3ac31f2e8b..78321999bc 100644 --- a/pcsx2/VU0.cpp +++ b/pcsx2/VU0.cpp @@ -49,7 +49,7 @@ using namespace R5900; -PCSX2_ALIGNED16(VURegs VU0); +__aligned16 VURegs VU0; void COP2_BC2() { Int_COP2BC2PrintTable[_Rt_]();} void COP2_SPECIAL() { Int_COP2SPECIAL1PrintTable[_Funct_]();} diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index 573c014cc2..2df02b9fd1 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -45,7 +45,7 @@ #define _UImm11_ (s32)(VU->code & 0x7ff) -static PCSX2_ALIGNED16( VECTOR RDzero ); +static __aligned16 VECTOR RDzero; static __releaseinline void __fastcall _vuFMACflush(VURegs * VU) { int i; diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index c5033b85a8..c73abaafbf 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -29,10 +29,7 @@ u32* vifMaskRegs = NULL; vifStruct *vif; u16 vifqwc = 0; -PCSX2_ALIGNED16(u32 g_vifRow0[4]); -PCSX2_ALIGNED16(u32 g_vifCol0[4]); -PCSX2_ALIGNED16(u32 g_vifRow1[4]); -PCSX2_ALIGNED16(u32 g_vifCol1[4]); +__aligned16 VifMaskTypes g_vifmask; extern int g_vifCycles; diff --git a/pcsx2/Vif.h b/pcsx2/Vif.h index 9a1699e0f2..092cb23fa6 100644 --- a/pcsx2/Vif.h +++ b/pcsx2/Vif.h @@ -188,14 +188,26 @@ extern "C" #define vif0Regs ((VIFregisters*)&PS2MEM_HW[0x3800]) #define vif1Regs ((VIFregisters*)&PS2MEM_HW[0x3c00]) -void dmaVIF0(); -void dmaVIF1(); -void mfifoVIF1transfer(int qwc); -int VIF0transfer(u32 *data, int size, int istag); -int VIF1transfer(u32 *data, int size, int istag); -void vifMFIFOInterrupt(); +extern void dmaVIF0(); +extern void dmaVIF1(); +extern void mfifoVIF1transfer(int qwc); +extern int VIF0transfer(u32 *data, int size, int istag); +extern int VIF1transfer(u32 *data, int size, int istag); +extern void vifMFIFOInterrupt(); -void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask); +// -------------------------------------------------------------------------------------- +// VIF SEE-optimized Masking Mess +// -------------------------------------------------------------------------------------- + +struct VifMaskTypes +{ + u32 Row0[4], Col0[4]; + u32 Row1[4], Col1[4]; +}; + +extern __aligned16 VifMaskTypes g_vifmask; + +extern void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask); #define XMM_R0 xmm0 #define XMM_R1 xmm1 diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index efa9e44e3e..d46aa836b8 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -37,17 +37,12 @@ extern "C" extern u32* vifRow; } -PCSX2_ALIGNED16_EXTERN(u32 g_vifRow0[4]); -PCSX2_ALIGNED16_EXTERN(u32 g_vifCol0[4]); -PCSX2_ALIGNED16_EXTERN(u32 g_vifRow1[4]); -PCSX2_ALIGNED16_EXTERN(u32 g_vifCol1[4]); - extern vifStruct *vif; vifStruct vif0, vif1; -static PCSX2_ALIGNED16(u32 g_vif1Masks[64]); -static PCSX2_ALIGNED16(u32 g_vif0Masks[64]); +static __aligned16 u32 g_vif1Masks[64]; +static __aligned16 u32 g_vif0Masks[64]; u32 g_vif1HasMask3[4] = {0}, g_vif0HasMask3[4] = {0}; // Generic constants @@ -59,7 +54,7 @@ static const unsigned int VIF1dmanum = 1; int g_vifCycles = 0; Path3Modes Path3progress = STOPPED_MODE; -static PCSX2_ALIGNED16( u32 splittransfer[4] ); +static __aligned16 u32 splittransfer[4]; u32 splitptr = 0; typedef void (__fastcall *UNPACKFUNCTYPE)(u32 *dest, u32 *data, int size); @@ -353,7 +348,7 @@ static int VIFalign(u32 *data, vifCode *v, unsigned int size, const unsigned int vifRegs = vif0Regs; vifMaskRegs = g_vif0Masks; vif = &vif0; - vifRow = g_vifRow0; + vifRow = g_vifmask.Row0; } else { @@ -361,7 +356,7 @@ static int VIFalign(u32 *data, vifCode *v, unsigned int size, const unsigned int vifRegs = vif1Regs; vifMaskRegs = g_vif1Masks; vif = &vif1; - vifRow = g_vifRow1; + vifRow = g_vifmask.Row1; } assert(v->addr < memsize); @@ -507,7 +502,7 @@ static int VIFalign(u32 *data, vifCode *v, unsigned int size, const unsigned int if(vifRegs->mode == 2) { //Update the reg rows for SSE - vifRow = VIFdmanum ? g_vifRow1 : g_vifRow0; + vifRow = VIFdmanum ? g_vifmask.Row1 : g_vifmask.Row0; vifRow[0] = vifRegs->r0; vifRow[1] = vifRegs->r1; vifRow[2] = vifRegs->r2; @@ -568,7 +563,7 @@ static void VIFunpack(u32 *data, vifCode *v, unsigned int size, const unsigned i vifRegs = vif0Regs; vifMaskRegs = g_vif0Masks; vif = &vif0; - vifRow = g_vifRow0; + vifRow = g_vifmask.Row0; assert(v->addr < memsize); } else @@ -578,7 +573,7 @@ static void VIFunpack(u32 *data, vifCode *v, unsigned int size, const unsigned i vifRegs = vif1Regs; vifMaskRegs = g_vif1Masks; vif = &vif1; - vifRow = g_vifRow1; + vifRow = g_vifmask.Row1; assert(v->addr < memsize); } @@ -654,28 +649,28 @@ static void VIFunpack(u32 *data, vifCode *v, unsigned int size, const unsigned i #ifdef _MSC_VER if (VIFdmanum) { - __asm movaps XMM_ROW, xmmword ptr [g_vifRow1] - __asm movaps XMM_COL, xmmword ptr [g_vifCol1] + __asm movaps XMM_ROW, xmmword ptr [g_vifmask.Row1] + __asm movaps XMM_COL, xmmword ptr [g_vifmask.Col1] } else { - __asm movaps XMM_ROW, xmmword ptr [g_vifRow0] - __asm movaps XMM_COL, xmmword ptr [g_vifCol0] + __asm movaps XMM_ROW, xmmword ptr [g_vifmask.Row0] + __asm movaps XMM_COL, xmmword ptr [g_vifmask.Col0] } #else if (VIFdmanum) { __asm__(".intel_syntax noprefix\n" - "movaps xmm6, xmmword ptr [%[g_vifRow1]]\n" - "movaps xmm7, xmmword ptr [%[g_vifCol1]]\n" - ".att_syntax\n" : : [g_vifRow1]"r"(g_vifRow1), [g_vifCol1]"r"(g_vifCol1)); + "movaps xmm6, xmmword ptr [%[g_vifmask.Row1]]\n" + "movaps xmm7, xmmword ptr [%[g_vifmask.Col1]]\n" + ".att_syntax\n" : : [g_vifmask.Row1]"r"(g_vifmask.Row1), [g_vifmask.Col1]"r"(g_vifmask.Col1)); } else { __asm__(".intel_syntax noprefix\n" - "movaps xmm6, xmmword ptr [%[g_vifRow0]]\n" - "movaps xmm7, xmmword ptr [%[g_vifCol0]]\n" - ".att_syntax\n" : : [g_vifRow0]"r"(g_vifRow0), [g_vifCol0]"r"(g_vifCol0)); + "movaps xmm6, xmmword ptr [%[g_vifmask.Row0]]\n" + "movaps xmm7, xmmword ptr [%[g_vifmask.Col0]]\n" + ".att_syntax\n" : : [g_vifmask.Row0]"r"(g_vifmask.Row0), [g_vifmask.Col0]"r"(g_vifmask.Col0)); } #endif @@ -1001,7 +996,7 @@ static int __fastcall Vif0TransSTRow(u32 *data) // STROW int ret; u32* pmem = &vif0Regs->r0 + (vif0.tag.addr << 2); - u32* pmem2 = g_vifRow0 + vif0.tag.addr; + u32* pmem2 = g_vifmask.Row0 + vif0.tag.addr; assert(vif0.tag.addr < 4); ret = min(4 - vif0.tag.addr, vif0.vifpacketsize); assert(ret > 0); @@ -1035,7 +1030,7 @@ static int __fastcall Vif0TransSTCol(u32 *data) // STCOL int ret; u32* pmem = &vif0Regs->c0 + (vif0.tag.addr << 2); - u32* pmem2 = g_vifCol0 + vif0.tag.addr; + u32* pmem2 = g_vifmask.Col0 + vif0.tag.addr; ret = min(4 - vif0.tag.addr, vif0.vifpacketsize); switch (ret) { @@ -1626,7 +1621,7 @@ void vif0Write32(u32 mem, u32 value) case VIF0_R2: case VIF0_R3: assert((mem&0xf) == 0); - g_vifRow0[(mem>>4) & 3] = value; + g_vifmask.Row0[(mem>>4) & 3] = value; break; case VIF0_C0: @@ -1634,7 +1629,7 @@ void vif0Write32(u32 mem, u32 value) case VIF0_C2: case VIF0_C3: assert((mem&0xf) == 0); - g_vifCol0[(mem>>4) & 3] = value; + g_vifmask.Col0[(mem>>4) & 3] = value; break; default: @@ -1665,12 +1660,12 @@ void SaveStateBase::vif0Freeze() // Dunno if this one is needed, but whatever, it's small. :) Freeze(g_vifCycles); - Freeze(vif0); + // mask settings for VIF0 and VIF1 + Freeze(g_vifmask); + Freeze(vif0); Freeze(g_vif0HasMask3); Freeze(g_vif0Masks); - Freeze(g_vifRow0); - Freeze(g_vifCol0); } ////////////////////////////////////////////////////////////////////////////// @@ -1774,7 +1769,7 @@ static int __fastcall Vif1TransSTRow(u32 *data) int ret; u32* pmem = &vif1Regs->r0 + (vif1.tag.addr << 2); - u32* pmem2 = g_vifRow1 + vif1.tag.addr; + u32* pmem2 = g_vifmask.Row1 + vif1.tag.addr; assert(vif1.tag.addr < 4); ret = min(4 - vif1.tag.addr, vif1.vifpacketsize); assert(ret > 0); @@ -1807,7 +1802,7 @@ static int __fastcall Vif1TransSTCol(u32 *data) int ret; u32* pmem = &vif1Regs->c0 + (vif1.tag.addr << 2); - u32* pmem2 = g_vifCol1 + vif1.tag.addr; + u32* pmem2 = g_vifmask.Col1 + vif1.tag.addr; ret = min(4 - vif1.tag.addr, vif1.vifpacketsize); switch (ret) { @@ -1855,7 +1850,7 @@ static int __fastcall Vif1TransMPG(u32 *data) } // Dummy GIF-TAG Packet to Guarantee Count = 1 -PCSX2_ALIGNED16_EXTERN(u32 nloop0_packet[4]); +extern __aligned16 u32 nloop0_packet[4]; static int __fastcall Vif1TransDirectHL(u32 *data) { @@ -2769,7 +2764,7 @@ void vif1Write32(u32 mem, u32 value) case VIF1_R2: case VIF1_R3: assert((mem&0xf) == 0); - g_vifRow1[(mem>>4) & 3] = value; + g_vifmask.Row1[(mem>>4) & 3] = value; break; case VIF1_C0: @@ -2777,7 +2772,7 @@ void vif1Write32(u32 mem, u32 value) case VIF1_C2: case VIF1_C3: assert((mem&0xf) == 0); - g_vifCol1[(mem>>4) & 3] = value; + g_vifmask.Col1[(mem>>4) & 3] = value; break; default: @@ -2809,6 +2804,4 @@ void SaveStateBase::vif1Freeze() Freeze(g_vif1HasMask3); Freeze(g_vif1Masks); - Freeze(g_vifRow1); - Freeze(g_vifCol1); } diff --git a/pcsx2/gui/App.h b/pcsx2/gui/App.h index 4718e482b0..0ccfc9ace4 100644 --- a/pcsx2/gui/App.h +++ b/pcsx2/gui/App.h @@ -500,8 +500,6 @@ extern void LoadPluginsPassive(); extern void LoadPluginsImmediate(); extern void UnloadPlugins(); -extern bool HandlePluginError( Exception::PluginError& ex ); - extern void AppLoadSettings(); extern void AppSaveSettings(); extern void AppApplySettings( const AppConfig* oldconf=NULL ); diff --git a/pcsx2/gui/AppCoreThread.cpp b/pcsx2/gui/AppCoreThread.cpp new file mode 100644 index 0000000000..e53b2fdd86 --- /dev/null +++ b/pcsx2/gui/AppCoreThread.cpp @@ -0,0 +1,182 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2009 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#include "PrecompiledHeader.h" +#include "MainFrame.h" + +AppCoreThread::AppCoreThread( PluginManager& plugins ) : + SysCoreThread( plugins ) +, m_kevt() +{ +} + +AppCoreThread::~AppCoreThread() throw() +{ +} + +void AppCoreThread::Suspend( bool isBlocking ) +{ + _parent::Suspend( isBlocking ); + if( HasMainFrame() ) + GetMainFrame().ApplySettings(); + + // Clear the sticky key statuses, because hell knows what'll change while the PAD + // plugin is suspended. + + m_kevt.m_shiftDown = false; + m_kevt.m_controlDown = false; + m_kevt.m_altDown = false; +} + +void AppCoreThread::Resume() +{ + // Thread control (suspend / resume) should only be performed from the main/gui thread. + if( !AllowFromMainThreadOnly() ) return; + + if( sys_resume_lock ) + { + Console.WriteLn( "SysResume: State is locked, ignoring Resume request!" ); + return; + } + _parent::Resume(); +} + +void AppCoreThread::OnResumeReady() +{ + if( m_shortSuspend ) return; + + ApplySettings( g_Conf->EmuOptions ); + + if( GSopen2 != NULL ) + wxGetApp().OpenGsFrame(); + + if( HasMainFrame() ) + GetMainFrame().ApplySettings(); +} + +// Called whenever the thread has terminated, for either regular or irregular reasons. +// Typically the thread handles all its own errors, so there's no need to have error +// handling here. However it's a good idea to update the status of the GUI to reflect +// the new (lack of) thread status, so this posts a message to the App to do so. +void AppCoreThread::OnThreadCleanup() +{ + wxCommandEvent evt( pxEVT_AppCoreThread_Terminated ); + wxGetApp().AddPendingEvent( evt ); + _parent::OnThreadCleanup(); +} + +#ifdef __WXGTK__ + extern int TranslateGDKtoWXK( u32 keysym ); +#endif + +void AppCoreThread::StateCheck( bool isCancelable ) +{ + _parent::StateCheck( isCancelable ); + + const keyEvent* ev = PADkeyEvent(); + if( ev == NULL || (ev->key == 0) ) return; + + m_plugins.KeyEvent( *ev ); + m_kevt.SetEventType( ( ev->evt == KEYPRESS ) ? wxEVT_KEY_DOWN : wxEVT_KEY_UP ); + const bool isDown = (ev->evt == KEYPRESS); + + #ifdef __WXMSW__ + const int vkey = wxCharCodeMSWToWX( ev->key ); + #elif defined( __WXGTK__ ) + const int vkey = TranslateGDKtoWXK( ev->key ); + #else + # error Unsupported Target Platform. + #endif + + switch (vkey) + { + case WXK_SHIFT: m_kevt.m_shiftDown = isDown; return; + case WXK_CONTROL: m_kevt.m_controlDown = isDown; return; + case WXK_MENU: m_kevt.m_altDown = isDown; return; + } + + m_kevt.m_keyCode = vkey; + wxGetApp().PostPadKey( m_kevt ); +} + +// To simplify settings application rules and re-entry conditions, the main App's implementation +// of ApplySettings requires that the caller manually ensure that the thread has been properly +// suspended. If the thread has mot been suspended, this call will fail *silently*. +void AppCoreThread::ApplySettings( const Pcsx2Config& src ) +{ + if( !IsSuspended() ) return; + + // Re-entry guard protects against cases where code wants to manually set core settings + // which are not part of g_Conf. The subsequent call to apply g_Conf settings (which is + // usually the desired behavior) will be ignored. + + static int localc = 0; + RecursionGuard guard( localc ); + if(guard.IsReentrant()) return; + SysCoreThread::ApplySettings( src ); +} + +void AppCoreThread::ExecuteTask() +{ + try + { + SysCoreThread::ExecuteTask(); + } + // ---------------------------------------------------------------------------- + catch( Exception::FileNotFound& ex ) + { + m_plugins.Close(); + if( ex.StreamName == g_Conf->FullpathToBios() ) + { + m_plugins.Close(); + bool result = Msgbox::OkCancel( ex.FormatDisplayMessage() + + _("\n\nPress Ok to go to the BIOS Configuration Panel.") ); + + if( result ) + { + if( wxGetApp().ThreadedModalDialog( DialogId_BiosSelector ) == wxID_CANCEL ) + { + // fixme: handle case where user cancels the settings dialog. (should return FALSE). + } + else + { + // fixme: automatically re-try emu startup here... + } + } + } + } + // ---------------------------------------------------------------------------- + catch( Exception::PluginError& ex ) + { + m_plugins.Close(); + Console.Error( ex.FormatDiagnosticMessage() ); + Msgbox::Alert( ex.FormatDisplayMessage(), _("Plugin Open Error") ); + + /*if( HandlePluginError( ex ) ) + { + // fixme: automatically re-try emu startup here... + }*/ + } + // ---------------------------------------------------------------------------- + // [TODO] : Add exception handling here for debuggable PS2 exceptions that allows + // invocation of the PCSX2 debugger and such. + // + catch( Exception::BaseException& ex ) + { + // Sent the exception back to the main gui thread? + m_plugins.Close(); + Msgbox::Alert( ex.FormatDisplayMessage() ); + } +} diff --git a/pcsx2/gui/AppMain.cpp b/pcsx2/gui/AppMain.cpp index f37e604869..025266bc77 100644 --- a/pcsx2/gui/AppMain.cpp +++ b/pcsx2/gui/AppMain.cpp @@ -42,119 +42,7 @@ bool UseDefaultSettingsFolder = true; ScopedPtr g_Conf; ConfigOverrides OverrideOptions; -AppCoreThread::AppCoreThread( PluginManager& plugins ) : - SysCoreThread( plugins ) -, m_kevt() -{ -} - -AppCoreThread::~AppCoreThread() throw() -{ -} - -void AppCoreThread::Suspend( bool isBlocking ) -{ - _parent::Suspend( isBlocking ); - if( HasMainFrame() ) - GetMainFrame().ApplySettings(); - - // Clear the sticky key statuses, because hell knows what'll change while the PAD - // plugin is suspended. - - m_kevt.m_shiftDown = false; - m_kevt.m_controlDown = false; - m_kevt.m_altDown = false; -} - -void AppCoreThread::Resume() -{ - // Thread control (suspend / resume) should only be performed from the main/gui thread. - if( !AllowFromMainThreadOnly() ) return; - - if( sys_resume_lock ) - { - Console.WriteLn( "SysResume: State is locked, ignoring Resume request!" ); - return; - } - _parent::Resume(); -} - -void AppCoreThread::OnResumeReady() -{ - if( m_shortSuspend ) return; - - ApplySettings( g_Conf->EmuOptions ); - - if( GSopen2 != NULL ) - wxGetApp().OpenGsFrame(); - - if( HasMainFrame() ) - GetMainFrame().ApplySettings(); -} - -// Called whenever the thread has terminated, for either regular or irregular reasons. -// Typically the thread handles all its own errors, so there's no need to have error -// handling here. However it's a good idea to update the status of the GUI to reflect -// the new (lack of) thread status, so this posts a message to the App to do so. -void AppCoreThread::OnThreadCleanup() -{ - wxCommandEvent evt( pxEVT_AppCoreThread_Terminated ); - wxGetApp().AddPendingEvent( evt ); - _parent::OnThreadCleanup(); -} - -#ifdef __WXGTK__ - extern int TranslateGDKtoWXK( u32 keysym ); -#endif - -void AppCoreThread::StateCheck( bool isCancelable ) -{ - _parent::StateCheck( isCancelable ); - - const keyEvent* ev = PADkeyEvent(); - if( ev == NULL || (ev->key == 0) ) return; - - m_plugins.KeyEvent( *ev ); - m_kevt.SetEventType( ( ev->evt == KEYPRESS ) ? wxEVT_KEY_DOWN : wxEVT_KEY_UP ); - const bool isDown = (ev->evt == KEYPRESS); - - #ifdef __WXMSW__ - const int vkey = wxCharCodeMSWToWX( ev->key ); - #elif defined( __WXGTK__ ) - const int vkey = TranslateGDKtoWXK( ev->key ); - #else - # error Unsupported Target Platform. - #endif - - switch (vkey) - { - case WXK_SHIFT: m_kevt.m_shiftDown = isDown; return; - case WXK_CONTROL: m_kevt.m_controlDown = isDown; return; - case WXK_MENU: m_kevt.m_altDown = isDown; return; - } - - m_kevt.m_keyCode = vkey; - wxGetApp().PostPadKey( m_kevt ); -} - -// To simplify settings application rules and re-entry conditions, the main App's implementation -// of ApplySettings requires that the caller manually ensure that the thread has been properly -// suspended. If the thread has mot been suspended, this call will fail *silently*. -void AppCoreThread::ApplySettings( const Pcsx2Config& src ) -{ - if( !IsSuspended() ) return; - - // Re-entry guard protects against cases where code wants to manually set core settings - // which are not part of g_Conf. The subsequent call to apply g_Conf settings (which is - // usually the desired behavior) will be ignored. - - static int localc = 0; - RecursionGuard guard( localc ); - if(guard.IsReentrant()) return; - SysCoreThread::ApplySettings( src ); -} - -bool HandlePluginError( Exception::PluginError& ex ) +static bool HandlePluginError( Exception::PluginError& ex ) { if( pxDialogExists( DialogId_CoreSettings ) ) return true; @@ -173,59 +61,6 @@ bool HandlePluginError( Exception::PluginError& ex ) return result; } -void AppCoreThread::ExecuteTask() -{ - try - { - SysCoreThread::ExecuteTask(); - } - // ---------------------------------------------------------------------------- - catch( Exception::FileNotFound& ex ) - { - m_plugins.Close(); - if( ex.StreamName == g_Conf->FullpathToBios() ) - { - m_plugins.Close(); - bool result = Msgbox::OkCancel( ex.FormatDisplayMessage() + - _("\n\nPress Ok to go to the BIOS Configuration Panel.") ); - - if( result ) - { - if( wxGetApp().ThreadedModalDialog( DialogId_BiosSelector ) == wxID_CANCEL ) - { - // fixme: handle case where user cancels the settings dialog. (should return FALSE). - } - else - { - // fixme: automatically re-try emu startup here... - } - } - } - } - // ---------------------------------------------------------------------------- - catch( Exception::PluginError& ex ) - { - m_plugins.Close(); - Console.Error( ex.FormatDiagnosticMessage() ); - Msgbox::Alert( ex.FormatDisplayMessage(), _("Plugin Open Error") ); - - /*if( HandlePluginError( ex ) ) - { - // fixme: automatically re-try emu startup here... - }*/ - } - // ---------------------------------------------------------------------------- - // [TODO] : Add exception handling here for debuggable PS2 exceptions that allows - // invocation of the PCSX2 debugger and such. - // - catch( Exception::BaseException& ex ) - { - // Sent the exception back to the main gui thread? - m_plugins.Close(); - Msgbox::Alert( ex.FormatDisplayMessage() ); - } -} - // Allows for activating menu actions from anywhere in PCSX2. // And it's Thread Safe! void Pcsx2App::PostMenuAction( MenuIdentifiers menu_id ) const diff --git a/pcsx2/ps2/GIFpath.cpp b/pcsx2/ps2/GIFpath.cpp index 1eb3442868..394b42e02c 100644 --- a/pcsx2/ps2/GIFpath.cpp +++ b/pcsx2/ps2/GIFpath.cpp @@ -191,7 +191,7 @@ static void __fastcall RegHandlerUNMAPPED(const u32* data) #define INSERT_UNMAPPED_16 INSERT_UNMAPPED_4 INSERT_UNMAPPED_4 INSERT_UNMAPPED_4 INSERT_UNMAPPED_4 #define INSERT_UNMAPPED_64 INSERT_UNMAPPED_16 INSERT_UNMAPPED_16 INSERT_UNMAPPED_16 INSERT_UNMAPPED_16 -PCSX2_ALIGNED16( static GifPathStruct s_gifPath ) = +static __aligned16 GifPathStruct s_gifPath = { RegHandlerSIGNAL, RegHandlerFINISH, RegHandlerLABEL, RegHandlerUNMAPPED, diff --git a/pcsx2/vtlb.cpp b/pcsx2/vtlb.cpp index 00c2ba4521..7813022233 100644 --- a/pcsx2/vtlb.cpp +++ b/pcsx2/vtlb.cpp @@ -47,7 +47,7 @@ using namespace vtlb_private; namespace vtlb_private { - PCSX2_ALIGNED( 64, MapData vtlbdata ); + __aligned(64) MapData vtlbdata; } vtlbHandler vtlbHandlerCount=0; diff --git a/pcsx2/vtlb.h b/pcsx2/vtlb.h index bcb0489ef5..fc23621241 100644 --- a/pcsx2/vtlb.h +++ b/pcsx2/vtlb.h @@ -87,5 +87,5 @@ namespace vtlb_private void* RWFT[5][2][128]; }; - PCSX2_ALIGNED_EXTERN( 64, MapData vtlbdata ); + extern __aligned(64) MapData vtlbdata; } diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index 7c441412d0..c459b32707 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -1910,6 +1910,10 @@ RelativePath="..\..\gui\AppConfig.cpp" > + + diff --git a/pcsx2/x86/iCore.cpp b/pcsx2/x86/iCore.cpp index ab293e9ef0..94e4bb1bae 100644 --- a/pcsx2/x86/iCore.cpp +++ b/pcsx2/x86/iCore.cpp @@ -962,7 +962,6 @@ void _freeXMMregs() } } -PCSX2_ALIGNED16(u32 s_zeros[4]) = {0}; int _signExtendXMMtoM(u32 to, x86SSERegType from, int candestroy) { int t0reg; diff --git a/pcsx2/x86/iFPU.cpp b/pcsx2/x86/iFPU.cpp index 8beae7df2e..445a4f1dd2 100644 --- a/pcsx2/x86/iFPU.cpp +++ b/pcsx2/x86/iFPU.cpp @@ -75,8 +75,8 @@ void recRSQRT_S_xmm(int info); #define FPU_ADD_SUB_HACK 1 // Add/Sub opcodes produce more ps2-like results if set to 1 -static u32 PCSX2_ALIGNED16(s_neg[4]) = { 0x80000000, 0xffffffff, 0xffffffff, 0xffffffff }; -static u32 PCSX2_ALIGNED16(s_pos[4]) = { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +static const __aligned16 u32 s_neg[4] = { 0x80000000, 0xffffffff, 0xffffffff, 0xffffffff }; +static const __aligned16 u32 s_pos[4] = { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff }; #define REC_FPUBRANCH(f) \ void f(); \ @@ -367,7 +367,7 @@ REC_FPUFUNC(RSQRT_S); // Clamp Functions (Converts NaN's and Infinities to Normal Numbers) //------------------------------------------------------------------ -PCSX2_ALIGNED16(u64 FPU_FLOAT_TEMP[2]); +static __aligned16 u64 FPU_FLOAT_TEMP[2]; __forceinline void fpuFloat4(int regd) { // +NaN -> +fMax, -NaN -> -fMax, +Inf -> +fMax, -Inf -> -fMax int t1reg = _allocTempXMMreg(XMMT_FPS, -1); if (t1reg >= 0) { @@ -1100,9 +1100,10 @@ void recDIVhelper2(int regd, int regt) // Doesn't sets flags ClampValues(regd); } +static __aligned16 u32 roundmode_temp[4]; + void recDIV_S_xmm(int info) { - static u32 PCSX2_ALIGNED16(roundmode_temp[4]) = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; int roundmodeFlag = 0; int t0reg = _allocTempXMMreg(XMMT_FPS, -1); //if (t0reg == -1) {Console.Error("FPU: DIV Allocation Error!");} @@ -1661,7 +1662,6 @@ FPURECOMPILE_CONSTCODE(SUBA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT); void recSQRT_S_xmm(int info) { u8* pjmp; - static u32 PCSX2_ALIGNED16(roundmode_temp[4]) = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; int roundmodeFlag = 0; //Console.WriteLn("FPU: SQRT"); diff --git a/pcsx2/x86/iFPUd.cpp b/pcsx2/x86/iFPUd.cpp index a0f544df2d..8752f7b5be 100644 --- a/pcsx2/x86/iFPUd.cpp +++ b/pcsx2/x86/iFPUd.cpp @@ -70,9 +70,6 @@ namespace DOUBLE { #define FPU_ADD_SUB_HACK 1 // Add/Sub opcodes produce more ps2-like results if set to 1 -static u32 PCSX2_ALIGNED16(s_neg[4]) = { 0x80000000, 0xffffffff, 0xffffffff, 0xffffffff }; -static u32 PCSX2_ALIGNED16(s_pos[4]) = { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff }; - #define REC_FPUBRANCH(f) \ void f(); \ void rec##f() { \ @@ -100,21 +97,46 @@ static u32 PCSX2_ALIGNED16(s_pos[4]) = { 0x7fffffff, 0xffffffff, 0xffffffff, 0xf //------------------------------------------------------------------ // PS2 -> DOUBLE //------------------------------------------------------------------ - + #define SINGLE(sign, exp, mant) (((sign)<<31) | ((exp)<<23) | (mant)) #define DOUBLE(sign, exp, mant) (((sign ## ULL)<<63) | ((exp ## ULL)<<52) | (mant ## ULL)) - -static u32 PCSX2_ALIGNED16(pos_inf[4]) = {SINGLE(0,0xff,0), 0, 0, 0}; -static u32 PCSX2_ALIGNED16(neg_inf[4]) = {SINGLE(1,0xff,0), 0, 0, 0}; -static u32 PCSX2_ALIGNED16(one_exp[4]) = {SINGLE(0,1,0), 0, 0, 0}; -static u64 PCSX2_ALIGNED16(dbl_one_exp[2]) = {DOUBLE(0,1,0), 0}; - -static u64 PCSX2_ALIGNED16(dbl_cvt_overflow) = DOUBLE(0,1151,0); //needs special code if above or equal -static u64 PCSX2_ALIGNED16(dbl_ps2_overflow) = DOUBLE(0,1152,0); //overflow & clamp if above or equal -static u64 PCSX2_ALIGNED16(dbl_underflow) = DOUBLE(0,897,0); //underflow if below - -static u64 PCSX2_ALIGNED16(dbl_s_pos[2]) = {0x7fffffffffffffffULL, 0}; -//static u64 PCSX2_ALIGNED16(dbl_s_neg[2]) = {0x8000000000000000ULL, 0}; + +struct FPUd_Globals +{ + u32 neg[4], pos[4]; + + u32 pos_inf[4], neg_inf[4], + one_exp[4]; + + u64 dbl_one_exp[2]; + + u64 dbl_cvt_overflow, // needs special code if above or equal + dbl_ps2_overflow, // overflow & clamp if above or equal + dbl_underflow; // underflow if below + + u64 dbl_s_pos[2]; + //u64 dlb_s_neg[2]; +}; + +static const __aligned(32) FPUd_Globals s_const = +{ + { 0x80000000, 0xffffffff, 0xffffffff, 0xffffffff }, + { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff }, + + {SINGLE(0,0xff,0), 0, 0, 0}, + {SINGLE(1,0xff,0), 0, 0, 0}, + {SINGLE(0,1,0), 0, 0, 0}, + + {DOUBLE(0,1,0), 0}, + + DOUBLE(0,1151,0), + DOUBLE(0,1152,0), + DOUBLE(0,897,0), + + {0x7fffffffffffffffULL, 0}, + //{0x8000000000000000ULL, 0}, +}; + // converts small normal numbers to double equivalent // converts large normal numbers (which represent NaN/inf in IEEE) to double equivalent @@ -122,9 +144,9 @@ static u64 PCSX2_ALIGNED16(dbl_s_pos[2]) = {0x7fffffffffffffffULL, 0}; //mustn't use EAX/ECX/EDX/x86regs (MUL) void ToDouble(int reg) { - SSE_UCOMISS_M32_to_XMM(reg, (uptr)&pos_inf); //sets ZF if equal or uncomparable + SSE_UCOMISS_M32_to_XMM(reg, (uptr)s_const.pos_inf); //sets ZF if equal or incomparable u8 *to_complex = JE8(0); //complex conversion if positive infinity or NaN - SSE_UCOMISS_M32_to_XMM(reg, (uptr)&neg_inf); + SSE_UCOMISS_M32_to_XMM(reg, (uptr)s_const.neg_inf); u8 *to_complex2 = JE8(0); //complex conversion if negative infinity SSE2_CVTSS2SD_XMM_to_XMM(reg, reg); //simply convert @@ -133,9 +155,9 @@ void ToDouble(int reg) x86SetJ8(to_complex); x86SetJ8(to_complex2); - SSE2_PSUBD_M128_to_XMM(reg, (uptr)&one_exp); //lower exponent + SSE2_PSUBD_M128_to_XMM(reg, (uptr)s_const.one_exp); //lower exponent SSE2_CVTSS2SD_XMM_to_XMM(reg, reg); - SSE2_PADDQ_M128_to_XMM(reg, (uptr)&dbl_one_exp); //raise exponent + SSE2_PADDQ_M128_to_XMM(reg, (uptr)s_const.dbl_one_exp); //raise exponent x86SetJ8(end); } @@ -166,29 +188,29 @@ void ToPS2FPU_Full(int reg, bool flags, int absreg, bool acc, bool addsub) AND32ItoM((uptr)&fpuRegs.ACCflag, ~1); SSE_MOVAPS_XMM_to_XMM(absreg, reg); - SSE2_ANDPD_M128_to_XMM(absreg, (uptr)&dbl_s_pos); + SSE2_ANDPD_M128_to_XMM(absreg, (uptr)&s_const.dbl_s_pos); - SSE2_UCOMISD_M64_to_XMM(absreg, (uptr)&dbl_cvt_overflow); + SSE2_UCOMISD_M64_to_XMM(absreg, (uptr)&s_const.dbl_cvt_overflow); u8 *to_complex = JAE8(0); - SSE2_UCOMISD_M64_to_XMM(absreg, (uptr)&dbl_underflow); + SSE2_UCOMISD_M64_to_XMM(absreg, (uptr)&s_const.dbl_underflow); u8 *to_underflow = JB8(0); SSE2_CVTSD2SS_XMM_to_XMM(reg, reg); //simply convert u8 *end = JMP8(0); x86SetJ8(to_complex); - SSE2_UCOMISD_M64_to_XMM(absreg, (uptr)&dbl_ps2_overflow); + SSE2_UCOMISD_M64_to_XMM(absreg, (uptr)&s_const.dbl_ps2_overflow); u8 *to_overflow = JAE8(0); - SSE2_PSUBQ_M128_to_XMM(reg, (uptr)&dbl_one_exp); //lower exponent + SSE2_PSUBQ_M128_to_XMM(reg, (uptr)&s_const.dbl_one_exp); //lower exponent SSE2_CVTSD2SS_XMM_to_XMM(reg, reg); //convert - SSE2_PADDD_M128_to_XMM(reg, (uptr)one_exp); //raise exponent + SSE2_PADDD_M128_to_XMM(reg, (uptr)s_const.one_exp); //raise exponent u8 *end2 = JMP8(0); x86SetJ8(to_overflow); SSE2_CVTSD2SS_XMM_to_XMM(reg, reg); - SSE_ORPS_M128_to_XMM(reg, (uptr)&s_pos); //clamp + SSE_ORPS_M128_to_XMM(reg, (uptr)&s_const.pos); //clamp if (flags && FPU_FLAGS_OVERFLOW) OR32ItoM((uptr)&fpuRegs.fprc[31], (FPUflagO | FPUflagSO)); if (flags && FPU_FLAGS_OVERFLOW && acc) @@ -221,7 +243,7 @@ void ToPS2FPU_Full(int reg, bool flags, int absreg, bool acc, bool addsub) x86SetJ8(is_zero); } SSE2_CVTSD2SS_XMM_to_XMM(reg, reg); - SSE_ANDPS_M128_to_XMM(reg, (uptr)&s_neg); //flush to zero + SSE_ANDPS_M128_to_XMM(reg, (uptr)s_const.neg); //flush to zero x86SetJ8(end); x86SetJ8(end2); @@ -247,10 +269,10 @@ void ToPS2FPU(int reg, bool flags, int absreg, bool acc, bool addsub = false) void SetMaxValue(int regd) { if (FPU_RESULT) - SSE_ORPS_M128_to_XMM(regd, (uptr)&s_pos[0]); // set regd to maximum + SSE_ORPS_M128_to_XMM(regd, (uptr)&s_const.pos[0]); // set regd to maximum else { - SSE_ANDPS_M128_to_XMM(regd, (uptr)&s_neg[0]); // Get the sign bit + SSE_ANDPS_M128_to_XMM(regd, (uptr)&s_const.neg[0]); // Get the sign bit SSE_ORPS_M128_to_XMM(regd, (uptr)&g_maxvals[0]); // regd = +/- Maximum (CLAMP)! } } @@ -285,7 +307,7 @@ void recABS_S_xmm(int info) CLEAR_OU_FLAGS; - SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); + SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)s_const.pos); } FPURECOMPILE_CONSTCODE(ABS_S, XMMINFO_WRITED|XMMINFO_READS); @@ -341,7 +363,7 @@ void FPU_ADD_SUB(int tempd, int tempt) //tempd and tempt are overwritten, they a x86SetJ8(j8Ptr[0]); //diff = 25 .. 255 , expt < expd - SSE_ANDPS_M128_to_XMM(tempt, (uptr)s_neg); + SSE_ANDPS_M128_to_XMM(tempt, (uptr)s_const.neg); j8Ptr[5] = JMP8(0); x86SetJ8(j8Ptr[1]); @@ -355,7 +377,7 @@ void FPU_ADD_SUB(int tempd, int tempt) //tempd and tempt are overwritten, they a x86SetJ8(j8Ptr[3]); //diff = -255 .. -25, expd < expt - SSE_ANDPS_M128_to_XMM(tempd, (uptr)s_neg); + SSE_ANDPS_M128_to_XMM(tempd, (uptr)s_const.neg); j8Ptr[7] = JMP8(0); x86SetJ8(j8Ptr[2]); @@ -606,10 +628,11 @@ void recDIVhelper2(int regd, int regt) // Doesn't sets flags ToPS2FPU(regd, false, regt, false); } - + +static __aligned16 u32 roundmode_temp[4]; + void recDIV_S_xmm(int info) { - static u32 PCSX2_ALIGNED16(roundmode_temp[4]) = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; int roundmodeFlag = 0; //if (t0reg == -1) {Console.Error("FPU: DIV Allocation Error!");} //Console.WriteLn("DIV"); @@ -678,7 +701,7 @@ void recMaddsub(int info, int regd, int op, bool acc) x86SetJ8(mulovf); if (op == 1) //sub - SSE_XORPS_M128_to_XMM(sreg, (uptr)&s_neg); + SSE_XORPS_M128_to_XMM(sreg, (uptr)s_const.neg); SSE_MOVAPS_XMM_to_XMM(treg, sreg); //fall through below x86SetJ8(accovf); @@ -726,8 +749,11 @@ FPURECOMPILE_CONSTCODE(MADDA_S, XMMINFO_WRITEACC|XMMINFO_READACC|XMMINFO_READS|X // MAX / MIN XMM //------------------------------------------------------------------ -static const u32 PCSX2_ALIGNED16(minmax_mask[4]) = {0xffffffff, 0x80000000, 0, 0}; -static const u32 PCSX2_ALIGNED16(minmax_mask2[4]) = {0, 0x40000000, 0, 0}; +static const __aligned16 u32 minmax_mask[8] = +{ + 0xffffffff, 0x80000000, 0, 0, + 0, 0x40000000, 0, 0 +}; // FPU's MAX/MIN work with all numbers (including "denormals"). Check VU's logical min max for more info. void recMINMAX(int info, bool ismin) { @@ -738,10 +764,10 @@ void recMINMAX(int info, bool ismin) SSE2_PSHUFD_XMM_to_XMM(sreg, sreg, 0x00); SSE2_PAND_M128_to_XMM(sreg, (uptr)minmax_mask); - SSE2_POR_M128_to_XMM(sreg, (uptr)minmax_mask2); + SSE2_POR_M128_to_XMM(sreg, (uptr)&minmax_mask[4]); SSE2_PSHUFD_XMM_to_XMM(treg, treg, 0x00); SSE2_PAND_M128_to_XMM(treg, (uptr)minmax_mask); - SSE2_POR_M128_to_XMM(treg, (uptr)minmax_mask2); + SSE2_POR_M128_to_XMM(treg, (uptr)&minmax_mask[4]); if (ismin) SSE2_MINSD_XMM_to_XMM(sreg, treg); else @@ -835,7 +861,7 @@ void recNEG_S_xmm(int info) { CLEAR_OU_FLAGS; - SSE_XORPS_M128_to_XMM(EEREC_D, (uptr)&s_neg[0]); + SSE_XORPS_M128_to_XMM(EEREC_D, (uptr)&s_const.neg[0]); } FPURECOMPILE_CONSTCODE(NEG_S, XMMINFO_WRITED|XMMINFO_READS); @@ -869,7 +895,6 @@ FPURECOMPILE_CONSTCODE(SUBA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT); void recSQRT_S_xmm(int info) { u8 *pjmp; - static u32 PCSX2_ALIGNED16(roundmode_temp[4]) = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; int roundmodeFlag = 0; int tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0); if (tempReg == -1) {Console.Error("FPU: SQRT Allocation Error!"); tempReg = EAX;} @@ -895,12 +920,12 @@ void recSQRT_S_xmm(int info) AND32ItoR(tempReg, 1); //Check sign pjmp = JZ8(0); //Skip if none are OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagI|FPUflagSI); // Set I and SI flags - SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); // Make EEREC_D Positive + SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_const.pos[0]); // Make EEREC_D Positive x86SetJ8(pjmp); } else { - SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); // Make EEREC_D Positive + SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_const.pos[0]); // Make EEREC_D Positive } @@ -941,7 +966,7 @@ void recRSQRThelper1(int regd, int regt) // Preforms the RSQRT function when reg AND32ItoR(tempReg, 1); //Check sign pjmp2 = JZ8(0); //Skip if not set OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagI|FPUflagSI); // Set I and SI flags - SSE_ANDPS_M128_to_XMM(regt, (uptr)&s_pos[0]); // Make regt Positive + SSE_ANDPS_M128_to_XMM(regt, (uptr)&s_const.pos[0]); // Make regt Positive x86SetJ8(pjmp2); //--- Check for zero --- @@ -981,7 +1006,7 @@ void recRSQRThelper1(int regd, int regt) // Preforms the RSQRT function when reg void recRSQRThelper2(int regd, int regt) // Preforms the RSQRT function when regd <- Fs and regt <- Ft (Doesn't set flags) { - SSE_ANDPS_M128_to_XMM(regt, (uptr)&s_pos[0]); // Make regt Positive + SSE_ANDPS_M128_to_XMM(regt, (uptr)&s_const.pos[0]); // Make regt Positive ToDouble(regt); ToDouble(regd); @@ -995,7 +1020,6 @@ void recRSQRT_S_xmm(int info) { int sreg, treg; - static u32 PCSX2_ALIGNED16(roundmode_temp[4]) = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; int roundmodeFlag = 0; if ((g_sseMXCSR & 0x00006000) != 0x00000000) { // Set roundmode to nearest if it isn't already //Console.WriteLn("rsqrt to nearest"); diff --git a/pcsx2/x86/iMMI.cpp b/pcsx2/x86/iMMI.cpp index 7f505ec27f..1d8fc2c629 100644 --- a/pcsx2/x86/iMMI.cpp +++ b/pcsx2/x86/iMMI.cpp @@ -174,8 +174,6 @@ void recPLZCW() GPR_DEL_CONST(_Rd_); } -//static u32 PCSX2_ALIGNED16(s_CmpMasks[]) = { 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff }; - void recPMFHL() { if ( ! _Rd_ ) return; @@ -1182,8 +1180,6 @@ REC_FUNC_DEL( QFSRV, _Rd_); #else //////////////////////////////////////////////////// -PCSX2_ALIGNED16(int s_MaskHighBitD[4]) = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; -PCSX2_ALIGNED16(int s_MaskHighBitW[4]) = { 0x80008000, 0x80008000, 0x80008000, 0x80008000 }; void recPABSW() //needs clamping { @@ -2023,7 +2019,6 @@ void recPDIVBW() } //////////////////////////////////////////////////// -PCSX2_ALIGNED16(int s_mask1[4]) = {~0, 0, ~0, 0}; //upper word of each doubleword in LO and HI is undocumented/undefined //contains the upper multiplication result (before the addition with the lower multiplication result) @@ -2492,7 +2487,7 @@ void recPSRAVW() //////////////////////////////////////////////////// -PCSX2_ALIGNED16(u32 s_tempPINTEH[4]) = {0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff }; +static const __aligned16 u32 s_tempPINTEH[4] = {0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff }; void recPINTEH() { diff --git a/pcsx2/x86/iR5900.h b/pcsx2/x86/iR5900.h index 78370d2b38..9a07ddefd0 100644 --- a/pcsx2/x86/iR5900.h +++ b/pcsx2/x86/iR5900.h @@ -116,8 +116,7 @@ extern void recDoBranchImm_Likely( u32* jmpSkip ); if( (reg) < 32 ) g_cpuHasConstReg &= ~(1<<(reg)); \ } -extern void (*recBSC_co[64])(); -PCSX2_ALIGNED16_EXTERN(GPR_reg64 g_cpuConstRegs[32]); +extern __aligned16 GPR_reg64 g_cpuConstRegs[32]; extern u32 g_cpuHasConstReg, g_cpuFlushedConstReg; // gets a memory pointer to the constant reg diff --git a/pcsx2/x86/iVU1micro.cpp b/pcsx2/x86/iVU1micro.cpp index 9375ec6533..99055bf186 100644 --- a/pcsx2/x86/iVU1micro.cpp +++ b/pcsx2/x86/iVU1micro.cpp @@ -88,10 +88,10 @@ extern u32 vudump; #include #endif -PCSX2_ALIGNED16(u8 backVUregs[sizeof(VURegs)]); -PCSX2_ALIGNED16(u8 cmpVUregs [sizeof(VURegs)]); -PCSX2_ALIGNED16(u8 backVUmem [0x4000]); -PCSX2_ALIGNED16(u8 cmpVUmem [0x4000]); +__aligned16 u8 backVUregs[sizeof(VURegs)]; +__aligned16 u8 cmpVUregs [sizeof(VURegs)]; +__aligned16 u8 backVUmem [0x4000]; +__aligned16 u8 cmpVUmem [0x4000]; static u32 runCount = 0; #define VU3 ((VURegs)*((VURegs*)cmpVUregs)) #define fABS(aInt) (aInt & 0x7fffffff) diff --git a/pcsx2/x86/iVif.cpp b/pcsx2/x86/iVif.cpp index dfd8555e7f..2b02d67577 100644 --- a/pcsx2/x86/iVif.cpp +++ b/pcsx2/x86/iVif.cpp @@ -28,7 +28,7 @@ extern u32 g_vif1Masks[48], g_vif0Masks[48]; extern u32 g_vif1HasMask3[4], g_vif0HasMask3[4]; // arranged in writearr, rowarr, colarr, updatearr -static PCSX2_ALIGNED16(u32 s_maskarr[16][4]) = { +static const __aligned16 u32 s_maskarr[16][4] = { {0xffffffff, 0x00000000, 0x00000000, 0xffffffff}, {0xffff0000, 0x0000ffff, 0x00000000, 0xffffffff}, {0xffff0000, 0x00000000, 0x0000ffff, 0xffffffff}, @@ -49,9 +49,10 @@ static PCSX2_ALIGNED16(u32 s_maskarr[16][4]) = { extern u8 s_maskwrite[256]; -extern "C" PCSX2_ALIGNED16(u32 s_TempDecompress[4]); +// Dear C++: Please don't mangle this name, thanks! +extern "C" __aligned16 u32 s_TempDecompress[4]; -u32 s_TempDecompress[4] = {0}; +__aligned16 u32 s_TempDecompress[4] = {0}; #ifdef __LINUX__ static void __forceinline UseOldMaskCode(u32* &vif1masks, u32 &mask); diff --git a/pcsx2/x86/ix86-32/iCore-32.cpp b/pcsx2/x86/ix86-32/iCore-32.cpp index 82640ab25f..916707d07b 100644 --- a/pcsx2/x86/ix86-32/iCore-32.cpp +++ b/pcsx2/x86/ix86-32/iCore-32.cpp @@ -1108,7 +1108,7 @@ void _recMove128MtoRmOffset(u32 offset, u32 from) MOV32RtoRm(ECX, EDX, offset+12); } -static PCSX2_ALIGNED16(u32 s_ones[2]) = {0xffffffff, 0xffffffff}; +static const __aligned16 u32 s_ones[2] = {0xffffffff, 0xffffffff}; void LogicalOpRtoR(x86MMXRegType to, x86MMXRegType from, int op) { diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 0c0e225776..32cc19af39 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -64,7 +64,7 @@ u32 s_nBlockCycles = 0; // cycles of current block recompiling u32 pc; // recompiler pc int branch; // set for branch -PCSX2_ALIGNED16(GPR_reg64 g_cpuConstRegs[32]) = {0}; +__aligned16 GPR_reg64 g_cpuConstRegs[32] = {0}; u32 g_cpuHasConstReg = 0, g_cpuFlushedConstReg = 0; //////////////////////////////////////////////////////////////// @@ -387,8 +387,8 @@ struct ManualPageTracking u8 counter; }; -PCSX2_ALIGNED16( static u16 manual_page[Ps2MemSize::Base >> 12] ); -PCSX2_ALIGNED16( static u8 manual_counter[Ps2MemSize::Base >> 12] ); +static __aligned16 u16 manual_page[Ps2MemSize::Base >> 12]; +static __aligned16 u8 manual_counter[Ps2MemSize::Base >> 12]; volatile bool eeRecIsReset = false; diff --git a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp index 29b997c910..4ccbab70f6 100644 --- a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp +++ b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp @@ -64,7 +64,7 @@ REC_FUNC(SQC2); #else -PCSX2_ALIGNED16(u64 retValues[2]); +__aligned16 u64 retValues[2]; void _eeOnLoadWrite(int reg) { @@ -2060,7 +2060,7 @@ void recSQC2( void ) using namespace Interpreter::OpcodeImpl; -PCSX2_ALIGNED16(u32 dummyValue[4]); +__aligned16 u32 dummyValue[4]; void SetFastMemory(int bSetFast) { diff --git a/pcsx2/x86/ix86-32/iR5900Shift.cpp b/pcsx2/x86/ix86-32/iR5900Shift.cpp index 02dbd909bd..0014b1ef60 100644 --- a/pcsx2/x86/ix86-32/iR5900Shift.cpp +++ b/pcsx2/x86/ix86-32/iR5900Shift.cpp @@ -578,7 +578,7 @@ EERECOMPILE_CODEX(eeRecompileCode2, DSRA32); * Format: OP rd, rt, rs * *********************************************************/ -PCSX2_ALIGNED16(u32 s_sa[4]) = {0x1f, 0, 0x3f, 0}; +__aligned16 u32 s_sa[4] = {0x1f, 0, 0x3f, 0}; int recSetShiftV(int info, int* rsreg, int* rtreg, int* rdreg, int* rstemp, int forcemmx, int shift64) { diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp index 51253094a6..9151ab8977 100644 --- a/pcsx2/x86/ix86-32/recVTLB.cpp +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -243,7 +243,7 @@ namespace vtlb_private // If it were smaller than a page we'd end up allowing execution rights on some // other vars additionally (bad!). // -PCSX2_ALIGNED( 0x1000, static u8 m_IndirectDispatchers[0x1000] ); +static __aligned(0x1000) u8 m_IndirectDispatchers[0x1000]; // ------------------------------------------------------------------------ // mode - 0 for read, 1 for write! diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 8dc6fb0319..5cc24d5a49 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -22,10 +22,39 @@ // Micro VU - Global Variables //------------------------------------------------------------------ -PCSX2_ALIGNED16(microVU microVU0); -PCSX2_ALIGNED16(microVU microVU1); +__aligned16 microVU microVU0; +__aligned16 microVU microVU1; -declareAllVariables // Declares All Global Variables :D +#define __four(val) { val, val, val, val } + +const __aligned(32) mVU_Globals mVUglob = +{ + // absclip / signbit + // minvals / maxlavs + __four( 0x7fffffff ), __four( 0x80000000 ), + __four( 0xff7fffff ), __four( 0x7f7fffff ), + + __four( 0x3f800000 ), // ONE! + __four( 0x3f490fdb ), // PI4! + + // T1 T2 T3 T4 + // T5 T6 T7 T8 + __four( 0x3f7ffff5 ), __four( 0xbeaaa61c ), __four( 0x3e4c40a6 ), __four( 0xbe0e6c63 ), + __four( 0x3dc577df ), __four( 0xbd6501c4 ), __four( 0x3cb31652 ), __four( 0xbb84d7e7 ), + + // S2 S3 S4 S5 + __four( 0xbe2aaaa4 ), __four( 0x3c08873e ), __four( 0xb94fb21f ), __four( 0x362e9c14 ), + + // E1 E2 E3 + // E4 E5 E6 + __four( 0x3e7fffa8 ), __four( 0x3d0007f4 ), __four( 0x3b29d3ff ), + __four( 0x3933e553 ), __four( 0x36b63510 ), __four( 0x353961ac ), + + // FTOI_4 / 12 / 15 + // ITOF_4 / 12 / 15 + __four( 16.0 ), __four( 4096.0 ), __four( 32768.0 ), + __four( 0.0625f ), __four( 0.000244140625 ), __four( 0.000030517578125 ) +}; //------------------------------------------------------------------ // Micro VU - Main Functions diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index 1fcd4f4b97..1d60332b53 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -143,9 +143,9 @@ struct microProgManager { #define mVUcacheSize ((mMaxProg < 20) ? (_1mb * 10) : (mMaxProg * (_1mb * 0.5))) // 0.5mb per program struct microVU { - PCSX2_ALIGNED16(u32 macFlag[4]); // 4 instances of mac flag (used in execution) - PCSX2_ALIGNED16(u32 clipFlag[4]); // 4 instances of clip flag (used in execution) - PCSX2_ALIGNED16(u32 xmmPQb[4]); // Backup for xmmPQ + __aligned16 u32 macFlag[4]; // 4 instances of mac flag (used in execution) + __aligned16 u32 clipFlag[4]; // 4 instances of clip flag (used in execution) + __aligned16 u32 xmmPQb[4]; // Backup for xmmPQ u32 index; // VU Index (VU0 or VU1) u32 vuMemSize; // VU Main Memory Size (in bytes) @@ -175,8 +175,8 @@ struct microVU { }; // microVU rec structs -extern PCSX2_ALIGNED16(microVU microVU0); -extern PCSX2_ALIGNED16(microVU microVU1); +extern __aligned16 microVU microVU0; +extern __aligned16 microVU microVU1; // Debug Helper extern int mVUdebugNow; diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index beb4504479..5bbd2845bf 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -40,7 +40,7 @@ TEST32ItoR(gprTemp, 1); \ aJump = JZ8(0); \ MOV32ItoM((uptr)&mVU->divFlag, divI); \ - SSE_ANDPS_M128_to_XMM(xmmReg, (uptr)mVU_absclip); \ + SSE_ANDPS_M128_to_XMM(xmmReg, (uptr)mVUglob.absclip); \ x86SetJ8(aJump); \ } @@ -66,8 +66,8 @@ mVUop(mVU_DIV) { x86SetJ8(bjmp); SSE_XORPS_XMM_to_XMM (Fs, Ft); - SSE_ANDPS_M128_to_XMM(Fs, (uptr)mVU_signbit); - SSE_ORPS_M128_to_XMM (Fs, (uptr)mVU_maxvals); // If division by zero, then xmmFs = +/- fmax + SSE_ANDPS_M128_to_XMM(Fs, (uptr)mVUglob.signbit); + SSE_ORPS_M128_to_XMM (Fs, (uptr)mVUglob.maxvals); // If division by zero, then xmmFs = +/- fmax djmp = JMP8(0); x86SetJ8(cjmp); @@ -94,7 +94,7 @@ mVUop(mVU_SQRT) { MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags testNeg(Ft, gprT1, ajmp); // Check for negative sqrt - if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(Ft, (uptr)mVU_maxvals); // Clamp infinities (only need to do positive clamp since xmmFt is positive) + if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(Ft, (uptr)mVUglob.maxvals); // Clamp infinities (only need to do positive clamp since xmmFt is positive) SSE_SQRTSS_XMM_to_XMM(Ft, Ft); writeQreg(Ft, mVUinfo.writeQ); @@ -126,8 +126,8 @@ mVUop(mVU_RSQRT) { MOV32ItoM((uptr)&mVU->divFlag, divD); // Zero divide flag (only when not 0/0) x86SetJ8(cjmp); - SSE_ANDPS_M128_to_XMM(Fs, (uptr)mVU_signbit); - SSE_ORPS_M128_to_XMM (Fs, (uptr)mVU_maxvals); // xmmFs = +/-Max + SSE_ANDPS_M128_to_XMM(Fs, (uptr)mVUglob.signbit); + SSE_ORPS_M128_to_XMM (Fs, (uptr)mVUglob.maxvals); // xmmFs = +/-Max djmp = JMP8(0); x86SetJ8(ajmp); @@ -159,16 +159,16 @@ mVUop(mVU_RSQRT) { // ToDo: Can Be Optimized Further? (takes approximately (~115 cycles + mem access time) on a c2d) microVUt(void) mVU_EATAN_(mV, int PQ, int Fs, int t1, int t2) { SSE_MOVSS_XMM_to_XMM (PQ, Fs); - SSE_MULSS_M32_to_XMM (PQ, (uptr)mVU_T1); + SSE_MULSS_M32_to_XMM (PQ, (uptr)mVUglob.T1); SSE_MOVAPS_XMM_to_XMM(t2, Fs); - EATANhelper(mVU_T2); - EATANhelper(mVU_T3); - EATANhelper(mVU_T4); - EATANhelper(mVU_T5); - EATANhelper(mVU_T6); - EATANhelper(mVU_T7); - EATANhelper(mVU_T8); - SSE_ADDSS_M32_to_XMM (PQ, (uptr)mVU_Pi4); + EATANhelper(mVUglob.T2); + EATANhelper(mVUglob.T3); + EATANhelper(mVUglob.T4); + EATANhelper(mVUglob.T5); + EATANhelper(mVUglob.T6); + EATANhelper(mVUglob.T7); + EATANhelper(mVUglob.T8); + SSE_ADDSS_M32_to_XMM (PQ, (uptr)mVUglob.Pi4); SSE2_PSHUFD_XMM_to_XMM(PQ, PQ, mVUinfo.writeP ? 0x27 : 0xC6); } @@ -180,8 +180,8 @@ mVUop(mVU_EATAN) { int t2 = mVU->regAlloc->allocReg(); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs); - SSE_SUBSS_M32_to_XMM (Fs, (uptr)mVU_one); - SSE_ADDSS_M32_to_XMM (xmmPQ, (uptr)mVU_one); + SSE_SUBSS_M32_to_XMM (Fs, (uptr)mVUglob.one); + SSE_ADDSS_M32_to_XMM (xmmPQ, (uptr)mVUglob.one); SSE_DIVSS (mVU, Fs, xmmPQ); mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2); mVU->regAlloc->clearNeeded(Fs); @@ -246,22 +246,22 @@ mVUop(mVU_EEXP) { int t2 = mVU->regAlloc->allocReg(); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs); - SSE_MULSS_M32_to_XMM (xmmPQ, (uptr)mVU_E1); - SSE_ADDSS_M32_to_XMM (xmmPQ, (uptr)mVU_one); + SSE_MULSS_M32_to_XMM (xmmPQ, (uptr)mVUglob.E1); + SSE_ADDSS_M32_to_XMM (xmmPQ, (uptr)mVUglob.one); SSE_MOVAPS_XMM_to_XMM (t1, Fs); SSE_MULSS (mVU, t1, Fs); SSE_MOVAPS_XMM_to_XMM (t2, t1); - SSE_MULSS_M32_to_XMM (t1, (uptr)mVU_E2); + SSE_MULSS_M32_to_XMM (t1, (uptr)mVUglob.E2); SSE_ADDSS (mVU, xmmPQ, t1); - eexpHelper(mVU_E3); - eexpHelper(mVU_E4); - eexpHelper(mVU_E5); + eexpHelper(mVUglob.E3); + eexpHelper(mVUglob.E4); + eexpHelper(mVUglob.E5); SSE_MULSS (mVU, t2, Fs); - SSE_MULSS_M32_to_XMM (t2, (uptr)mVU_E6); + SSE_MULSS_M32_to_XMM (t2, (uptr)mVUglob.E6); SSE_ADDSS (mVU, xmmPQ, t2); SSE_MULSS (mVU, xmmPQ, xmmPQ); SSE_MULSS (mVU, xmmPQ, xmmPQ); - SSE_MOVSS_M32_to_XMM (t2, (uptr)mVU_one); + SSE_MOVSS_M32_to_XMM (t2, (uptr)mVUglob.one); SSE_DIVSS (mVU, t2, xmmPQ); SSE_MOVSS_XMM_to_XMM (xmmPQ, t2); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back @@ -307,7 +307,7 @@ mVUop(mVU_ERCPR) { int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs); - SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one); + SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVUglob.one); SSE_DIVSS (mVU, Fs, xmmPQ); SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back @@ -323,7 +323,7 @@ mVUop(mVU_ERLENG) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance mVU_sumXYZ (mVU, xmmPQ, Fs); SSE_SQRTSS_XMM_to_XMM (xmmPQ, xmmPQ); - SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one); + SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVUglob.one); SSE_DIVSS (mVU, Fs, xmmPQ); SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back @@ -338,7 +338,7 @@ mVUop(mVU_ERSADD) { int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance mVU_sumXYZ (mVU, xmmPQ, Fs); - SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one); + SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVUglob.one); SSE_DIVSS (mVU, Fs, xmmPQ); SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back @@ -352,9 +352,9 @@ mVUop(mVU_ERSQRT) { pass2 { int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance - SSE_ANDPS_M128_to_XMM (Fs, (uptr)mVU_absclip); + SSE_ANDPS_M128_to_XMM (Fs, (uptr)mVUglob.absclip); SSE_SQRTSS_XMM_to_XMM (xmmPQ, Fs); - SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one); + SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVUglob.one); SSE_DIVSS (mVU, Fs, xmmPQ); SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back @@ -395,12 +395,12 @@ mVUop(mVU_ESIN) { SSE_MOVAPS_XMM_to_XMM (t2, Fs); SSE_MULSS (mVU, Fs, t1); SSE_MOVAPS_XMM_to_XMM (t1, Fs); - SSE_MULSS_M32_to_XMM (Fs, (uptr)mVU_S2); + SSE_MULSS_M32_to_XMM (Fs, (uptr)mVUglob.S2); SSE_ADDSS (mVU, xmmPQ, Fs); - esinHelper(mVU_S3); - esinHelper(mVU_S4); + esinHelper(mVUglob.S3); + esinHelper(mVUglob.S4); SSE_MULSS (mVU, t2, t1); - SSE_MULSS_M32_to_XMM (t2, (uptr)mVU_S5); + SSE_MULSS_M32_to_XMM (t2, (uptr)mVUglob.S5); SSE_ADDSS (mVU, xmmPQ, t2); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back mVU->regAlloc->clearNeeded(Fs); @@ -415,7 +415,7 @@ mVUop(mVU_ESQRT) { pass2 { int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance - SSE_ANDPS_M128_to_XMM (Fs, (uptr)mVU_absclip); + SSE_ANDPS_M128_to_XMM (Fs, (uptr)mVUglob.absclip); SSE_SQRTSS_XMM_to_XMM (xmmPQ, Fs); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back mVU->regAlloc->clearNeeded(Fs); diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 43e776e9cf..aede444492 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -19,46 +19,31 @@ // Global Variables //------------------------------------------------------------------ -#define declareAllVariables \ -initVariable( _somePrefix_, u32, mVU_absclip, 0x7fffffff ); \ -initVariable( _somePrefix_, u32, mVU_signbit, 0x80000000 ); \ -initVariable( _somePrefix_, u32, mVU_minvals, 0xff7fffff ); \ -initVariable( _somePrefix_, u32, mVU_maxvals, 0x7f7fffff ); \ -initVariable( _somePrefix_, u32, mVU_one, 0x3f800000 ); \ -initVariable( _somePrefix_, u32, mVU_T1, 0x3f7ffff5 ); \ -initVariable( _somePrefix_, u32, mVU_T2, 0xbeaaa61c ); \ -initVariable( _somePrefix_, u32, mVU_T3, 0x3e4c40a6 ); \ -initVariable( _somePrefix_, u32, mVU_T4, 0xbe0e6c63 ); \ -initVariable( _somePrefix_, u32, mVU_T5, 0x3dc577df ); \ -initVariable( _somePrefix_, u32, mVU_T6, 0xbd6501c4 ); \ -initVariable( _somePrefix_, u32, mVU_T7, 0x3cb31652 ); \ -initVariable( _somePrefix_, u32, mVU_T8, 0xbb84d7e7 ); \ -initVariable( _somePrefix_, u32, mVU_Pi4, 0x3f490fdb ); \ -initVariable( _somePrefix_, u32, mVU_S2, 0xbe2aaaa4 ); \ -initVariable( _somePrefix_, u32, mVU_S3, 0x3c08873e ); \ -initVariable( _somePrefix_, u32, mVU_S4, 0xb94fb21f ); \ -initVariable( _somePrefix_, u32, mVU_S5, 0x362e9c14 ); \ -initVariable( _somePrefix_, u32, mVU_E1, 0x3e7fffa8 ); \ -initVariable( _somePrefix_, u32, mVU_E2, 0x3d0007f4 ); \ -initVariable( _somePrefix_, u32, mVU_E3, 0x3b29d3ff ); \ -initVariable( _somePrefix_, u32, mVU_E4, 0x3933e553 ); \ -initVariable( _somePrefix_, u32, mVU_E5, 0x36b63510 ); \ -initVariable( _somePrefix_, u32, mVU_E6, 0x353961ac ); \ -initVariable( _somePrefix_, float, mVU_FTOI_4, 16.0 ); \ -initVariable( _somePrefix_, float, mVU_FTOI_12, 4096.0 ); \ -initVariable( _somePrefix_, float, mVU_FTOI_15, 32768.0 ); \ -initVariable( _somePrefix_, float, mVU_ITOF_4, 0.0625f ); \ -initVariable( _somePrefix_, float, mVU_ITOF_12, 0.000244140625 ); \ -initVariable( _somePrefix_, float, mVU_ITOF_15, 0.000030517578125 ); +struct mVU_Globals +{ + u32 absclip[4], signbit[4], + minvals[4], maxvals[4]; + u32 one[4]; + u32 Pi4[4]; -#define _somePrefix_ PCSX2_ALIGNED16_EXTERN -#define initVariable(aprefix, atype, aname, avalue) aprefix (const atype aname [4]); -declareAllVariables -#undef _somePrefix_ -#undef initVariable + u32 T1[4], T2[4], T3[4], T4[4], + T5[4], T6[4], T7[4], T8[4]; -#define _somePrefix_ PCSX2_ALIGNED16 -#define initVariable(aprefix, atype, aname, avalue) aprefix (const atype aname [4]) = {avalue, avalue, avalue, avalue}; + u32 S2[4], S3[4], S4[4], S5[4]; + + u32 E1[4], E2[4], E3[4], + E4[4], E5[4], E6[4]; + + float FTOI_4[4], + FTOI_12[4], + FTOI_15[4]; + + float ITOF_4[4], + ITOF_12[4], + ITOF_15[4]; +}; + +extern const __aligned(32) mVU_Globals mVUglob; //------------------------------------------------------------------ // Helper Macros @@ -147,7 +132,7 @@ declareAllVariables // Function/Template Stuff #define mVUx (vuIndex ? µVU1 : µVU0) -#define mVUop(opName) void opName (mP) +#define mVUop(opName) static void opName (mP) #define microVUr(aType) __recInline aType #define microVUt(aType) __forceinline aType #define microVUx(aType) template aType @@ -166,8 +151,12 @@ declareAllVariables #define opCase4 if (opCase == 4) // Q Opcodes // Define mVUquickSearch + +// FIXME: I changed the below saerchXMM extern from __aligned16 to __pagealigned. +// This *probably* fixes the crashing bugs in linux when using the optimized memcmp. +// Needs testing... --air #ifndef __LINUX__ -PCSX2_ALIGNED16_EXTERN( u8 mVUsearchXMM[0x1000] ); +extern __pagealigned u8 mVUsearchXMM[0x1000]; typedef u32 (__fastcall *mVUCall)(void*, void*); #define mVUquickSearch(dest, src, size) ((((mVUCall)((void*)mVUsearchXMM))(dest, src)) == 0xf) #define mVUemitSearch() { mVUcustomSearch(); } diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 0bc4067395..62a868d8e7 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -24,12 +24,12 @@ void mVUclamp1(int reg, int regT1, int xyzw, bool bClampE = 0) { if ((!clampE && CHECK_VU_OVERFLOW) || (clampE && bClampE)) { switch (xyzw) { case 1: case 2: case 4: case 8: - SSE_MINSS_M32_to_XMM(reg, (uptr)mVU_maxvals); - SSE_MAXSS_M32_to_XMM(reg, (uptr)mVU_minvals); + SSE_MINSS_M32_to_XMM(reg, (uptr)mVUglob.maxvals); + SSE_MAXSS_M32_to_XMM(reg, (uptr)mVUglob.minvals); break; default: - SSE_MINPS_M128_to_XMM(reg, (uptr)mVU_maxvals); - SSE_MAXPS_M128_to_XMM(reg, (uptr)mVU_minvals); + SSE_MINPS_M128_to_XMM(reg, (uptr)mVUglob.maxvals); + SSE_MAXPS_M128_to_XMM(reg, (uptr)mVUglob.minvals); break; } } @@ -43,16 +43,16 @@ void mVUclamp2(microVU* mVU, int reg, int regT1, int xyzw, bool bClampE = 0) { switch (xyzw) { case 1: case 2: case 4: case 8: SSE_MOVSS_XMM_to_XMM (regT1, reg); - SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVU_signbit); - SSE_MINSS_M32_to_XMM (reg, (uptr)mVU_maxvals); - SSE_MAXSS_M32_to_XMM (reg, (uptr)mVU_minvals); + SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVUglob.signbit); + SSE_MINSS_M32_to_XMM (reg, (uptr)mVUglob.maxvals); + SSE_MAXSS_M32_to_XMM (reg, (uptr)mVUglob.minvals); SSE_ORPS_XMM_to_XMM (reg, regT1); break; default: SSE_MOVAPS_XMM_to_XMM(regT1, reg); - SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVU_signbit); - SSE_MINPS_M128_to_XMM(reg, (uptr)mVU_maxvals); - SSE_MAXPS_M128_to_XMM(reg, (uptr)mVU_minvals); + SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVUglob.signbit); + SSE_MINPS_M128_to_XMM(reg, (uptr)mVUglob.maxvals); + SSE_MAXPS_M128_to_XMM(reg, (uptr)mVUglob.minvals); SSE_ORPS_XMM_to_XMM (reg, regT1); break; } @@ -328,8 +328,14 @@ microVUt(void) mVUrestoreRegs(microVU* mVU) { // Micro VU - Custom SSE Instructions //------------------------------------------------------------------ -static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK1[4]) = {0xffffffff, 0x80000000, 0xffffffff, 0x80000000}; -static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK2[4]) = {0x00000000, 0x40000000, 0x00000000, 0x40000000}; +struct SSEMaskPair { u32 mask1[4], mask2[4]; }; + +static const __aligned16 SSEMaskPair MIN_MAX = +{ + {0xffffffff, 0x80000000, 0xffffffff, 0x80000000}, + {0x00000000, 0x40000000, 0x00000000, 0x40000000} +}; + // Warning: Modifies t1 and t2 void MIN_MAX_PS(microVU* mVU, int to, int from, int t1, int t2, bool min) { @@ -339,21 +345,21 @@ void MIN_MAX_PS(microVU* mVU, int to, int from, int t1, int t2, bool min) { // ZW SSE2_PSHUFD_XMM_to_XMM(t1, to, 0xfa); - SSE2_PAND_M128_to_XMM (t1, (uptr)MIN_MAX_MASK1); - SSE2_POR_M128_to_XMM (t1, (uptr)MIN_MAX_MASK2); + SSE2_PAND_M128_to_XMM (t1, (uptr)MIN_MAX.mask1); + SSE2_POR_M128_to_XMM (t1, (uptr)MIN_MAX.mask2); SSE2_PSHUFD_XMM_to_XMM(t2, from, 0xfa); - SSE2_PAND_M128_to_XMM (t2, (uptr)MIN_MAX_MASK1); - SSE2_POR_M128_to_XMM (t2, (uptr)MIN_MAX_MASK2); + SSE2_PAND_M128_to_XMM (t2, (uptr)MIN_MAX.mask1); + SSE2_POR_M128_to_XMM (t2, (uptr)MIN_MAX.mask2); if (min) SSE2_MINPD_XMM_to_XMM(t1, t2); else SSE2_MAXPD_XMM_to_XMM(t1, t2); // XY SSE2_PSHUFD_XMM_to_XMM(t2, from, 0x50); - SSE2_PAND_M128_to_XMM (t2, (uptr)MIN_MAX_MASK1); - SSE2_POR_M128_to_XMM (t2, (uptr)MIN_MAX_MASK2); + SSE2_PAND_M128_to_XMM (t2, (uptr)MIN_MAX.mask1); + SSE2_POR_M128_to_XMM (t2, (uptr)MIN_MAX.mask2); SSE2_PSHUFD_XMM_to_XMM(to, to, 0x50); - SSE2_PAND_M128_to_XMM (to, (uptr)MIN_MAX_MASK1); - SSE2_POR_M128_to_XMM (to, (uptr)MIN_MAX_MASK2); + SSE2_PAND_M128_to_XMM (to, (uptr)MIN_MAX.mask1); + SSE2_POR_M128_to_XMM (to, (uptr)MIN_MAX.mask2); if (min) SSE2_MINPD_XMM_to_XMM(to, t2); else SSE2_MAXPD_XMM_to_XMM(to, t2); @@ -367,8 +373,8 @@ void MIN_MAX_SS(mV, int to, int from, int t1, bool min) { bool t1b = 0; if (t1 < 0) { t1 = mVU->regAlloc->allocReg(); t1b = 1; } SSE_SHUFPS_XMM_to_XMM (to, from, 0); - SSE2_PAND_M128_to_XMM (to, (uptr)MIN_MAX_MASK1); - SSE2_POR_M128_to_XMM (to, (uptr)MIN_MAX_MASK2); + SSE2_PAND_M128_to_XMM (to, (uptr)MIN_MAX.mask1); + SSE2_POR_M128_to_XMM (to, (uptr)MIN_MAX.mask2); SSE2_PSHUFD_XMM_to_XMM(t1, to, 0xee); if (min) SSE2_MINPD_XMM_to_XMM(to, t1); else SSE2_MAXPD_XMM_to_XMM(to, t1); @@ -509,7 +515,7 @@ void SSE_DIVSS(mV, int to, int from, int t1 = -1, int t2 = -1) { // Micro VU - Custom Quick Search //------------------------------------------------------------------ -PCSX2_ALIGNED(0x1000, static u8 mVUsearchXMM[0x1000]); +static __pagealigned u8 mVUsearchXMM[0x1000]; // Generates a custom optimized block-search function // Note: Structs must be 16-byte aligned! (GCC doesn't guarantee this) diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index e97c9c5809..c1cd20b526 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -25,7 +25,7 @@ // Note: If modXYZW is true, then it adjusts XYZW for Single Scalar operations microVUt(void) mVUupdateFlags(mV, int reg, int regT1 = -1, int regT2 = -1, bool modXYZW = 1) { - int sReg, mReg = gprT1, regT1b = 0, regT2b = 0; + int sReg, mReg = gprT1, regT1b = 0, regT2b = 0; //int xyzw = _X_Y_Z_W; // unused local, still needed? -- air static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15}; @@ -287,7 +287,7 @@ mVUop(mVU_ABS) { pass2 { if (!_Ft_) return; int Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf))); - SSE_ANDPS_M128_to_XMM(Fs, (uptr)mVU_absclip); + SSE_ANDPS_M128_to_XMM(Fs, (uptr)mVUglob.absclip); mVU->regAlloc->clearNeeded(Fs); } pass3 { mVUlog("ABS"); mVUlogFtFs(); } @@ -334,7 +334,7 @@ mVUop(mVU_OPMSUB) { } // FTOI0/FTIO4/FTIO12/FTIO15 Opcodes -void mVU_FTOIx(mP, uptr addr, const char* opName) { +static void mVU_FTOIx(mP, uptr addr, const char* opName) { pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); } pass2 { if (!_Ft_) return; @@ -346,10 +346,10 @@ void mVU_FTOIx(mP, uptr addr, const char* opName) { SSE_MOVAPS_XMM_to_XMM(t1, Fs); if (addr) { SSE_MULPS_M128_to_XMM(Fs, addr); } SSE2_CVTTPS2DQ_XMM_to_XMM(Fs, Fs); - SSE2_PXOR_M128_to_XMM(t1, (uptr)mVU_signbit); + SSE2_PXOR_M128_to_XMM(t1, (uptr)mVUglob.signbit); SSE2_PSRAD_I8_to_XMM (t1, 31); SSE_MOVAPS_XMM_to_XMM(t2, Fs); - SSE2_PCMPEQD_M128_to_XMM(t2, (uptr)mVU_signbit); + SSE2_PCMPEQD_M128_to_XMM(t2, (uptr)mVUglob.signbit); SSE_ANDPS_XMM_to_XMM (t1, t2); SSE2_PADDD_XMM_to_XMM(Fs, t1); @@ -361,7 +361,7 @@ void mVU_FTOIx(mP, uptr addr, const char* opName) { } // ITOF0/ITOF4/ITOF12/ITOF15 Opcodes -void mVU_ITOFx(mP, uptr addr, const char* opName) { +static void mVU_ITOFx(mP, uptr addr, const char* opName) { pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); } pass2 { if (!_Ft_) return; @@ -388,9 +388,9 @@ mVUop(mVU_CLIP) { mVUallocCFLAGa(mVU, gprT1, cFLAG.lastWrite); SHL32ItoR(gprT1, 6); - SSE_ANDPS_M128_to_XMM(Ft, (uptr)mVU_absclip); + SSE_ANDPS_M128_to_XMM(Ft, (uptr)mVUglob.absclip); SSE_MOVAPS_XMM_to_XMM(t1, Ft); - SSE_ORPS_M128_to_XMM(t1, (uptr)mVU_signbit); + SSE_ORPS_M128_to_XMM(t1, (uptr)mVUglob.signbit); SSE_CMPNLEPS_XMM_to_XMM(t1, Fs); // -w, -z, -y, -x SSE_CMPLTPS_XMM_to_XMM(Ft, Fs); // +w, +z, +y, +x @@ -504,11 +504,11 @@ mVUop(mVU_MINIy) { mVU_FMACa(mVU, recPass, 2, 4, 0, "MINIy"); } mVUop(mVU_MINIz) { mVU_FMACa(mVU, recPass, 2, 4, 0, "MINIz"); } mVUop(mVU_MINIw) { mVU_FMACa(mVU, recPass, 2, 4, 0, "MINIw"); } mVUop(mVU_FTOI0) { mVU_FTOIx(mX, (uptr)0, "FTOI0"); } -mVUop(mVU_FTOI4) { mVU_FTOIx(mX, (uptr)mVU_FTOI_4, "FTOI4"); } -mVUop(mVU_FTOI12) { mVU_FTOIx(mX, (uptr)mVU_FTOI_12, "FTOI12"); } -mVUop(mVU_FTOI15) { mVU_FTOIx(mX, (uptr)mVU_FTOI_15, "FTOI15"); } +mVUop(mVU_FTOI4) { mVU_FTOIx(mX, (uptr)mVUglob.FTOI_4,"FTOI4"); } +mVUop(mVU_FTOI12) { mVU_FTOIx(mX, (uptr)mVUglob.FTOI_12,"FTOI12"); } +mVUop(mVU_FTOI15) { mVU_FTOIx(mX, (uptr)mVUglob.FTOI_15,"FTOI15"); } mVUop(mVU_ITOF0) { mVU_ITOFx(mX, (uptr)0, "ITOF0"); } -mVUop(mVU_ITOF4) { mVU_ITOFx(mX, (uptr)mVU_ITOF_4, "ITOF4"); } -mVUop(mVU_ITOF12) { mVU_ITOFx(mX, (uptr)mVU_ITOF_12, "ITOF12"); } -mVUop(mVU_ITOF15) { mVU_ITOFx(mX, (uptr)mVU_ITOF_15, "ITOF15"); } +mVUop(mVU_ITOF4) { mVU_ITOFx(mX, (uptr)mVUglob.ITOF_4,"ITOF4"); } +mVUop(mVU_ITOF12) { mVU_ITOFx(mX, (uptr)mVUglob.ITOF_12,"ITOF12"); } +mVUop(mVU_ITOF15) { mVU_ITOFx(mX, (uptr)mVUglob.ITOF_15,"ITOF15"); } mVUop(mVU_NOP) { pass3 { mVUlog("NOP"); } } diff --git a/pcsx2/x86/sVU_Lower.cpp b/pcsx2/x86/sVU_Lower.cpp index f4a567b14e..8f6620767a 100644 --- a/pcsx2/x86/sVU_Lower.cpp +++ b/pcsx2/x86/sVU_Lower.cpp @@ -70,7 +70,7 @@ #define _X_Y_Z_W ((( VU->code >> 21 ) & 0xF ) ) -static const PCSX2_ALIGNED16(u32 VU_ONE[4]) = {0x3f800000, 0xffffffff, 0xffffffff, 0xffffffff}; +static const __aligned16 u32 VU_ONE[4] = {0x3f800000, 0xffffffff, 0xffffffff, 0xffffffff}; //------------------------------------------------------------------ @@ -84,7 +84,6 @@ static const PCSX2_ALIGNED16(u32 VU_ONE[4]) = {0x3f800000, 0xffffffff, 0xfffffff //------------------------------------------------------------------ // DIV* //------------------------------------------------------------------ -PCSX2_ALIGNED16(u64 DIV_TEMP_XMM[2]); void recVUMI_DIV(VURegs *VU, int info) { u8 *pjmp, *pjmp1; @@ -176,7 +175,7 @@ void recVUMI_SQRT( VURegs *VU, int info ) //------------------------------------------------------------------ // RSQRT* //------------------------------------------------------------------ -PCSX2_ALIGNED16(u64 RSQRT_TEMP_XMM[2]); +__aligned16 u64 RSQRT_TEMP_XMM[2]; void recVUMI_RSQRT(VURegs *VU, int info) { u8 *ajmp8, *bjmp8; @@ -1538,7 +1537,7 @@ void recVUMI_MFP(VURegs *VU, int info) //------------------------------------------------------------------ // WAITP //------------------------------------------------------------------ -static PCSX2_ALIGNED16(float s_tempmem[4]); +static __aligned16 float s_tempmem[4]; void recVUMI_WAITP(VURegs *VU, int info) { //Console.WriteLn("recVUMI_WAITP"); diff --git a/pcsx2/x86/sVU_Micro.cpp b/pcsx2/x86/sVU_Micro.cpp index 0173bb61fe..3f5f6c4e71 100644 --- a/pcsx2/x86/sVU_Micro.cpp +++ b/pcsx2/x86/sVU_Micro.cpp @@ -85,15 +85,17 @@ //------------------------------------------------------------------ int vucycle; -PCSX2_ALIGNED16(const float s_fones[8]) = {1.0f, 1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f}; -PCSX2_ALIGNED16(const u32 s_mask[4]) = {0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff}; -PCSX2_ALIGNED16(const u32 s_expmask[4]) = {0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000}; -PCSX2_ALIGNED16(const u32 g_minvals[4]) = {0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff}; -PCSX2_ALIGNED16(const u32 g_maxvals[4]) = {0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff}; -PCSX2_ALIGNED16(const u32 const_clip[8]) = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, - 0x80000000, 0x80000000, 0x80000000, 0x80000000}; -PCSX2_ALIGNED(64, const u32 g_ones[4]) = {0x00000001, 0x00000001, 0x00000001, 0x00000001}; -PCSX2_ALIGNED16(const u32 g_minvals_XYZW[16][4]) = +const __aligned16 float s_fones[8] = {1.0f, 1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f}; +const __aligned16 u32 s_mask[4] = {0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff}; +const __aligned16 u32 s_expmask[4] = {0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000}; +const __aligned16 u32 g_minvals[4] = {0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff}; +const __aligned16 u32 g_maxvals[4] = {0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff}; +const __aligned16 u32 const_clip[8] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, + 0x80000000, 0x80000000, 0x80000000, 0x80000000}; + +const __aligned(64) u32 g_ones[4] = {0x00000001, 0x00000001, 0x00000001, 0x00000001}; + +const __aligned16 u32 g_minvals_XYZW[16][4] = { { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }, //0000 { 0xffffffff, 0xffffffff, 0xffffffff, 0xff7fffff }, //0001 @@ -112,7 +114,7 @@ PCSX2_ALIGNED16(const u32 g_minvals_XYZW[16][4]) = { 0xff7fffff, 0xff7fffff, 0xff7fffff, 0xffffffff }, //1110 { 0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff }, //1111 }; -PCSX2_ALIGNED16(const u32 g_maxvals_XYZW[16][4])= +const __aligned16 u32 g_maxvals_XYZW[16][4] = { { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //0000 { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7f7fffff }, //0001 @@ -1697,14 +1699,14 @@ void vuFloat3(uptr x86ptr) { } } -PCSX2_ALIGNED16(u64 vuFloatData[2]); -PCSX2_ALIGNED16(u64 vuFloatData2[2]); +__aligned16 u64 vuFloatData[4]; + // Makes NaN == 0, Infinities stay the same; Very Slow - Use only for debugging void vuFloatExtra( int regd, int XYZW) { int t1reg = (regd == 0) ? (regd + 1) : (regd - 1); int t2reg = (regd <= 1) ? (regd + 2) : (regd - 2); - SSE_MOVAPS_XMM_to_M128( (uptr)vuFloatData, t1reg ); - SSE_MOVAPS_XMM_to_M128( (uptr)vuFloatData2, t2reg ); + SSE_MOVAPS_XMM_to_M128( (uptr)&vuFloatData[0], t1reg ); + SSE_MOVAPS_XMM_to_M128( (uptr)&vuFloatData[2], t2reg ); SSE_XORPS_XMM_to_XMM(t1reg, t1reg); SSE_CMPORDPS_XMM_to_XMM(t1reg, regd); @@ -1712,11 +1714,11 @@ void vuFloatExtra( int regd, int XYZW) { SSE_ANDPS_XMM_to_XMM(t2reg, t1reg); VU_MERGE_REGS_CUSTOM(regd, t2reg, XYZW); - SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)vuFloatData ); - SSE_MOVAPS_M128_to_XMM( t2reg, (uptr)vuFloatData2 ); + SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)&vuFloatData[0] ); + SSE_MOVAPS_M128_to_XMM( t2reg, (uptr)&vuFloatData[2] ); } -static PCSX2_ALIGNED16(u32 tempRegX[]) = {0x00000000, 0x00000000, 0x00000000, 0x00000000}; +static __aligned16 u32 tempRegX[] = {0x00000000, 0x00000000, 0x00000000, 0x00000000}; // Called by testWhenOverflow() function void testPrintOverflow() { diff --git a/pcsx2/x86/sVU_Micro.h b/pcsx2/x86/sVU_Micro.h index 60c8af525b..10e29b9894 100644 --- a/pcsx2/x86/sVU_Micro.h +++ b/pcsx2/x86/sVU_Micro.h @@ -70,12 +70,12 @@ extern vFloat vFloats1_useEAX[16]; extern vFloat vFloats2[16]; extern vFloat vFloats4[16]; extern vFloat vFloats4_useEAX[16]; -PCSX2_ALIGNED16_EXTERN(const float s_fones[8]); -PCSX2_ALIGNED16_EXTERN(const u32 s_mask[4]); -PCSX2_ALIGNED16_EXTERN(const u32 s_expmask[4]); -PCSX2_ALIGNED16_EXTERN(const u32 g_minvals[4]); -PCSX2_ALIGNED16_EXTERN(const u32 g_maxvals[4]); -PCSX2_ALIGNED16_EXTERN(const u32 const_clip[8]); +extern const __aligned16 float s_fones[8]; +extern const __aligned16 u32 s_mask[4]; +extern const __aligned16 u32 s_expmask[4]; +extern const __aligned16 u32 g_minvals[4]; +extern const __aligned16 u32 g_maxvals[4]; +extern const __aligned16 u32 const_clip[8]; u32 GetVIAddr(VURegs * VU, int reg, int read, int info); int _vuGetTempXMMreg(int info); diff --git a/pcsx2/x86/sVU_Upper.cpp b/pcsx2/x86/sVU_Upper.cpp index d11f9e2618..525d770596 100644 --- a/pcsx2/x86/sVU_Upper.cpp +++ b/pcsx2/x86/sVU_Upper.cpp @@ -72,7 +72,7 @@ //------------------------------------------------------------------ // Global Variables //------------------------------------------------------------------ -static const PCSX2_ALIGNED16(int SSEmovMask[ 16 ][ 4 ]) = +static const __aligned16 int SSEmovMask[ 16 ][ 4 ] = { { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, { 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF }, @@ -92,7 +92,7 @@ static const PCSX2_ALIGNED16(int SSEmovMask[ 16 ][ 4 ]) = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF } }; -static const PCSX2_ALIGNED16(u32 const_abs_table[16][4]) = +static const __aligned16 u32 const_abs_table[16][4] = { { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }, //0000 { 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff }, //0001 @@ -112,21 +112,21 @@ static const PCSX2_ALIGNED16(u32 const_abs_table[16][4]) = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //1111 }; -static const PCSX2_ALIGNED16(float recMult_float_to_int4[4]) = { 16.0, 16.0, 16.0, 16.0 }; -static const PCSX2_ALIGNED16(float recMult_float_to_int12[4]) = { 4096.0, 4096.0, 4096.0, 4096.0 }; -static const PCSX2_ALIGNED16(float recMult_float_to_int15[4]) = { 32768.0, 32768.0, 32768.0, 32768.0 }; +static const __aligned16 float recMult_float_to_int4[4] = { 16.0, 16.0, 16.0, 16.0 }; +static const __aligned16 float recMult_float_to_int12[4] = { 4096.0, 4096.0, 4096.0, 4096.0 }; +static const __aligned16 float recMult_float_to_int15[4] = { 32768.0, 32768.0, 32768.0, 32768.0 }; -static const PCSX2_ALIGNED16(float recMult_int_to_float4[4]) = { 0.0625f, 0.0625f, 0.0625f, 0.0625f }; -static const PCSX2_ALIGNED16(float recMult_int_to_float12[4]) = { 0.000244140625, 0.000244140625, 0.000244140625, 0.000244140625 }; -static const PCSX2_ALIGNED16(float recMult_int_to_float15[4]) = { 0.000030517578125, 0.000030517578125, 0.000030517578125, 0.000030517578125 }; +static const __aligned16 float recMult_int_to_float4[4] = { 0.0625f, 0.0625f, 0.0625f, 0.0625f }; +static const __aligned16 float recMult_int_to_float12[4] = { 0.000244140625, 0.000244140625, 0.000244140625, 0.000244140625 }; +static const __aligned16 float recMult_int_to_float15[4] = { 0.000030517578125, 0.000030517578125, 0.000030517578125, 0.000030517578125 }; -static const PCSX2_ALIGNED16(u32 VU_Underflow_Mask1[4]) = {0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000}; -static const PCSX2_ALIGNED16(u32 VU_Underflow_Mask2[4]) = {0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff}; -static const PCSX2_ALIGNED16(u32 VU_Zero_Mask[4]) = {0x00000000, 0x00000000, 0x00000000, 0x00000000}; -static const PCSX2_ALIGNED16(u32 VU_Zero_Helper_Mask[4]) = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; -static const PCSX2_ALIGNED16(u32 VU_Signed_Zero_Mask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; -static const PCSX2_ALIGNED16(u32 VU_Pos_Infinity[4]) = {0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000}; -static const PCSX2_ALIGNED16(u32 VU_Neg_Infinity[4]) = {0xff800000, 0xff800000, 0xff800000, 0xff800000}; +static const __aligned16 u32 VU_Underflow_Mask1[4] = {0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000}; +static const __aligned16 u32 VU_Underflow_Mask2[4] = {0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff}; +static const __aligned16 u32 VU_Zero_Mask[4] = {0x00000000, 0x00000000, 0x00000000, 0x00000000}; +static const __aligned16 u32 VU_Zero_Helper_Mask[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; +static const __aligned16 u32 VU_Signed_Zero_Mask[4] = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; +static const __aligned16 u32 VU_Pos_Infinity[4] = {0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000}; +static const __aligned16 u32 VU_Neg_Infinity[4] = {0xff800000, 0xff800000, 0xff800000, 0xff800000}; //------------------------------------------------------------------ @@ -135,7 +135,7 @@ static const PCSX2_ALIGNED16(u32 VU_Neg_Infinity[4]) = {0xff800000, 0xff800000 // // Note: Computes under/overflow flags if CHECK_VU_EXTRA_FLAGS is 1 //------------------------------------------------------------------ -PCSX2_ALIGNED16(u64 TEMPXMMData[2]); +static __aligned16 u64 TEMPXMMData[2]; void recUpdateFlags(VURegs * VU, int reg, int info) { static u8 *pjmp, *pjmp2; @@ -327,8 +327,8 @@ void recUpdateFlags(VURegs * VU, int reg, int info) // // Note: See FPU_ADD_SUB() for more info on what this is doing. //------------------------------------------------------------------ -static PCSX2_ALIGNED16(u32 VU_addsuband[2][4]); -static PCSX2_ALIGNED16(u32 VU_addsub_reg[2][4]); +static __aligned16 u32 VU_addsuband[2][4]; +static __aligned16 u32 VU_addsub_reg[2][4]; static u32 tempECX; @@ -628,7 +628,7 @@ void recVUMI_ABS(VURegs *VU, int info) //------------------------------------------------------------------ // ADD*, ADD_iq*, ADD_xyzw* //------------------------------------------------------------------ -PCSX2_ALIGNED16(float s_two[4]) = {0,0,0,2}; +static const __aligned16 float s_two[4] = {0,0,0,2}; void recVUMI_ADD(VURegs *VU, int info) { //Console.WriteLn("recVUMI_ADD()"); @@ -2164,11 +2164,11 @@ void recVUMI_MSUBAw( VURegs *VU, int info ) //------------------------------------------------------------------ -static const u32 PCSX2_ALIGNED16(special_mask[4]) = {0xffffffff, 0x80000000, 0xffffffff, 0x80000000}; -static const u32 PCSX2_ALIGNED16(special_mask2[4]) = {0, 0x40000000, 0, 0x40000000}; +static const __aligned16 u32 special_mask[4] = {0xffffffff, 0x80000000, 0xffffffff, 0x80000000}; +static const __aligned16 u32 special_mask2[4] = {0, 0x40000000, 0, 0x40000000}; -u32 PCSX2_ALIGNED16(temp_loc[4]); -u32 PCSX2_ALIGNED16(temp_loc2[4]); +__aligned16 u32 temp_loc[4]; +__aligned16 u32 temp_loc2[4]; //MAX/MINI are non-arithmetic operations that implicitly support numbers with the EXP field being 0 ("denormals"). // @@ -2703,7 +2703,9 @@ void recVUMI_NOP( VURegs *VU, int info ) //------------------------------------------------------------------ // recVUMI_FTOI_Saturate() - Saturates result from FTOI Instructions //------------------------------------------------------------------ -static const PCSX2_ALIGNED16(int rec_const_0x8000000[4]) = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; + +// unused, but leaving here for possible reference.. +//static const __aligned16 int rec_const_0x8000000[4] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; void recVUMI_FTOI_Saturate(int rec_s, int rec_t, int rec_tmp1, int rec_tmp2) { @@ -2734,8 +2736,8 @@ void recVUMI_FTOI_Saturate(int rec_s, int rec_t, int rec_tmp1, int rec_tmp2) //------------------------------------------------------------------ // FTOI 0/4/12/15 //------------------------------------------------------------------ -static PCSX2_ALIGNED16(float FTIO_Temp1[4]) = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; -static PCSX2_ALIGNED16(float FTIO_Temp2[4]) = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; +static __aligned16 float FTIO_Temp1[4] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; +static __aligned16 float FTIO_Temp2[4] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; void recVUMI_FTOI0(VURegs *VU, int info) { int t1reg, t2reg; // Temp XMM regs diff --git a/plugins/zerogs/dx/Mem.cpp b/plugins/zerogs/dx/Mem.cpp index 91554f437b..ad11615eff 100644 --- a/plugins/zerogs/dx/Mem.cpp +++ b/plugins/zerogs/dx/Mem.cpp @@ -242,7 +242,7 @@ u32 g_pageTable8[64][128]; u32 g_pageTable4[128][128]; BLOCK m_Blocks[0x40]; // do so blocks are indexable -static PCSX2_ALIGNED16(u32 tempblock[64]); +static __aligned16 u32 tempblock[64]; #define DSTPSM gs.dstbuf.psm diff --git a/plugins/zerogs/dx/x86.cpp b/plugins/zerogs/dx/x86.cpp index 469bd3ecf7..3f14a14c25 100644 --- a/plugins/zerogs/dx/x86.cpp +++ b/plugins/zerogs/dx/x86.cpp @@ -348,9 +348,9 @@ extern "C" void FASTCALL(WriteCLUT_T32_I4_CSM1_sse2(u32* vm, u32* clut)) #if defined(_MSC_VER) extern "C" { -PCSX2_ALIGNED16(int s_clut16mask2[4]) = { 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff }; -PCSX2_ALIGNED16(int s_clut16mask[8]) = { 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000, - 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff}; +__aligned16 int s_clut16mask2[4] = { 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff }; +__aligned16 int s_clut16mask[8] = { 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000, + 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff}; } #if !defined(__x86_64__) diff --git a/plugins/zerospu2/zerospu2.cpp b/plugins/zerospu2/zerospu2.cpp index e1953f8813..fcb46f22e9 100644 --- a/plugins/zerospu2/zerospu2.cpp +++ b/plugins/zerospu2/zerospu2.cpp @@ -856,10 +856,10 @@ void ResampleLinear(s16* pStereoSamples, s32 oldsamples, s16* pNewSamples, s32 n } } -static PCSX2_ALIGNED16(s16 s_ThreadBuffer[NSSIZE*NSFRAMES*2*5]); +static __aligned16 s16 s_ThreadBuffer[NSSIZE*NSFRAMES*2*5]; // SoundTouch's INTEGER system is broken these days, so we'll need this to do float conversions... -static PCSX2_ALIGNED16(float s_floatBuffer[NSSIZE*NSFRAMES*2*5]); +static __aligned16 float s_floatBuffer[NSSIZE*NSFRAMES*2*5]; // communicates with the audio hardware #ifdef _WIN32