Rewrote internal handling of SSE roundmodes and DAZ/FTZ (fixes major crash bugs of the prev revision).

* Added SSE_MXCSR union/struct with bitfields and methods for doing common actions.
 * Converted all existing MXCSR code to use the new union.
 * Added a __pagesize macro for use in conjunction with __pagealigned and dynarec functions.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2113 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-11-02 07:00:59 +00:00
parent 629aad9944
commit 94222f4aaf
28 changed files with 395 additions and 156 deletions

View File

@ -176,11 +176,16 @@
// overhead). Furthermore, compilers cannot inline functions that have aligned local // overhead). Furthermore, compilers cannot inline functions that have aligned local
// vars. So use local var alignment with much caution. // vars. So use local var alignment with much caution.
// //
// Defines the memory page size for the target platform at compilation. All supported platforms
// (which means Intel only right now) have a 4k granularity.
#define __pagesize 0x1000
#ifdef _MSC_VER #ifdef _MSC_VER
# define __aligned(alig) __declspec(align(alig)) # define __aligned(alig) __declspec(align(alig))
# define __aligned16 __declspec(align(16)) # define __aligned16 __declspec(align(16))
# define __pagealigned __declspec(align(0x1000)) # define __pagealigned __declspec(align(__pagesize))
// Deprecated; use __align instead. // Deprecated; use __align instead.
# define PCSX2_ALIGNED(alig,x) __declspec(align(alig)) x # define PCSX2_ALIGNED(alig,x) __declspec(align(alig)) x
@ -227,7 +232,7 @@ This theoretically unoptimizes. Not having much luck so far.
# define __aligned(alig) __attribute__((aligned(alig))) # define __aligned(alig) __attribute__((aligned(alig)))
# define __aligned16 __attribute__((aligned(16))) # define __aligned16 __attribute__((aligned(16)))
# define __pagealigned __attribute__((aligned(0x1000))) # define __pagealigned __attribute__((aligned(__pagesize)))
// Deprecated; use __align instead. // Deprecated; use __align instead.
# define PCSX2_ALIGNED(alig,x) x __attribute((aligned(alig))) # define PCSX2_ALIGNED(alig,x) x __attribute((aligned(alig)))
# define PCSX2_ALIGNED16(x) x __attribute((aligned(16))) # define PCSX2_ALIGNED16(x) x __attribute((aligned(16)))

View File

@ -65,6 +65,12 @@ namespace HostSys
extern void MemProtect( void* baseaddr, size_t size, PageProtectionMode mode, bool allowExecution=false ); extern void MemProtect( void* baseaddr, size_t size, PageProtectionMode mode, bool allowExecution=false );
extern void Munmap( void* base, u32 size ); extern void Munmap( void* base, u32 size );
template< uint size >
void MemProtectStatic( u8 (&arr)[size], PageProtectionMode mode, bool allowExecution=false )
{
MemProtect( arr, size, mode, allowExecution );
}
} }

View File

@ -338,8 +338,10 @@ namespace x86Emitter
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
extern void xEMMS(); extern void xEMMS();
extern void xSTMXCSR( u32* dest ); extern void xSTMXCSR( const ModSib32& dest );
extern void xLDMXCSR( const u32* src ); extern void xLDMXCSR( const ModSib32& src );
extern void xFXSAVE( const ModSib32& dest );
extern void xFXRSTOR( const ModSib32& src );
extern void xMOVDZX( const xRegisterSSE& to, const xRegister32& from ); extern void xMOVDZX( const xRegisterSSE& to, const xRegister32& from );
extern void xMOVDZX( const xRegisterSSE& to, const ModSibBase& src ); extern void xMOVDZX( const xRegisterSSE& to, const ModSibBase& src );

View File

@ -21,8 +21,9 @@
extern void cpudetectInit(); extern void cpudetectInit();
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------
struct x86CPU_INFO // x86CPU_INFO
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------
struct x86CPU_INFO
{ {
u32 FamilyID; // Processor Family u32 FamilyID; // Processor Family
u32 Model; // Processor Model u32 Model; // Processor Model
@ -91,6 +92,75 @@ struct x86CPU_INFO
u32 hasStreamingSIMD4ExtensionsA:1; u32 hasStreamingSIMD4ExtensionsA:1;
}; };
enum SSE_RoundMode
{
SSEround_Nearest = 0,
SSEround_NegInf,
SSEround_PosInf,
SSEround_Chop,
};
// --------------------------------------------------------------------------------------
// SSE_MXCSR - Control/Status Register (bitfield)
// --------------------------------------------------------------------------------------
// Bits 0-5 are exception flags; used only if SSE exceptions have been enabled.
// Bits in this field are "sticky" and, once an exception has occured, must be manually
// cleared using LDMXCSR or FXRSTOR.
//
// Bits 7-12 are the masks for disabling the exceptions in bits 0-5. Cleared bits allow
// exceptions, set bits mask exceptions from being raised.
//
union SSE_MXCSR
{
u32 bitmask;
struct
{
u32
InvalidOpFlag :1,
DenormalFlag :1,
DivideByZeroFlag :1,
OverflowFlag :1,
UnderflowFlag :1,
PrecisionFlag :1,
// This bit is supported only on SSE2 or better CPUs. Setting it to 1 on
// SSE1 cpus will result in an invalid instruction exception when executing
// LDMXSCR.
DenormalsAreZero :1,
InvalidOpMask :1,
DenormalMask :1,
DivideByZeroMask :1,
OverflowMask :1,
UnderflowMask :1,
PrecisionMask :1,
RoundingControl :2,
FlushToZero :1;
};
SSE_RoundMode GetRoundMode() const;
SSE_MXCSR& SetRoundMode( SSE_RoundMode mode );
SSE_MXCSR& ClearExceptionFlags();
SSE_MXCSR& EnableExceptions();
SSE_MXCSR& DisableExceptions();
SSE_MXCSR& ApplyReserveMask();
bool operator ==( const SSE_MXCSR& right ) const
{
return bitmask == right.bitmask;
}
bool operator !=( const SSE_MXCSR& right ) const
{
return bitmask != right.bitmask;
}
operator x86Emitter::ModSib32() const;
};
extern SSE_MXCSR MXCSR_Mask;
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////

View File

@ -64,7 +64,7 @@
# define CallAddress( ptr ) \ # define CallAddress( ptr ) \
__asm{ call offset ptr } __asm{ call offset ptr }
# define FastCallAddress( ptr, param ) \ # define FastCallAddress( ptr, param1 ) \
__asm{ __asm mov ecx, param1 __asm call offset ptr } __asm{ __asm mov ecx, param1 __asm call offset ptr }
# define FastCallAddress2( ptr, param1, param2 ) \ # define FastCallAddress2( ptr, param1, param2 ) \
@ -75,8 +75,8 @@
# define CallAddress( ptr ) \ # define CallAddress( ptr ) \
( (void (*)()) &(ptr)[0] )() ( (void (*)()) &(ptr)[0] )()
# define FastCallAddress( ptr, param ) \ # define FastCallAddress( ptr, param1 ) \
( (void (*)( int )) &(ptr)[0] )( param ) ( (void (*)( int )) &(ptr)[0] )( param1 )
# define FastCallAddress2( ptr, param1, param2 ) \ # define FastCallAddress2( ptr, param1, param2 ) \
( (void (*)( int, int )) &(ptr)[0] )( param1, param2 ) ( (void (*)( int, int )) &(ptr)[0] )( param1, param2 )

View File

@ -15,6 +15,8 @@
#pragma once #pragma once
#include "Utilities/Dependencies.h"
// Register counts for x86/32 mode: // Register counts for x86/32 mode:
static const uint iREGCNT_XMM = 8; static const uint iREGCNT_XMM = 8;
static const uint iREGCNT_GPR = 8; static const uint iREGCNT_GPR = 8;

View File

@ -39,9 +39,16 @@ namespace HostSys
void MemProtect( void* baseaddr, size_t size, PageProtectionMode mode, bool allowExecution ) void MemProtect( void* baseaddr, size_t size, PageProtectionMode mode, bool allowExecution )
{ {
pxAssertDev( ((size & ~__pagesize) == 0), wxsFormat(
L"Memory block size must be a multiple of the target platform's page size.\n"
L"\tPage Size: 0x%04x (%d), Block Size: 0x%04x (%d)",
__pagesize, __pagesize, size, size )
);
int lnxmode = 0; int lnxmode = 0;
// make sure size is aligned to the system page size: // make sure size is aligned to the system page size:
// Check is redundant against the assertion above, but might as well...
size = (size + m_pagemask) & ~m_pagemask; size = (size + m_pagemask) & ~m_pagemask;
switch( mode ) switch( mode )

View File

@ -33,6 +33,12 @@ namespace HostSys
void MemProtect( void* baseaddr, size_t size, PageProtectionMode mode, bool allowExecution ) void MemProtect( void* baseaddr, size_t size, PageProtectionMode mode, bool allowExecution )
{ {
pxAssertDev( ((size & (__pagesize-1)) == 0), wxsFormat(
L"Memory block size must be a multiple of the target platform's page size.\n"
L"\tPage Size: 0x%04x (%d), Block Size: 0x%04x (%d)",
__pagesize, __pagesize, size, size )
);
DWORD winmode = 0; DWORD winmode = 0;
switch( mode ) switch( mode )

View File

@ -21,7 +21,6 @@
#include "internal.h" #include "internal.h"
#include "tools.h" #include "tools.h"
using namespace x86Emitter; using namespace x86Emitter;
__aligned16 x86CPU_INFO x86caps; __aligned16 x86CPU_INFO x86caps;
@ -73,8 +72,8 @@ static char* bool_to_char( bool testcond )
#endif #endif
#ifdef _WINDOWS_ #ifdef _WINDOWS_
static HANDLE s_threadId = NULL; static HANDLE s_threadId = NULL;
static DWORD s_oldmask = ERROR_INVALID_PARAMETER; static DWORD s_oldmask = ERROR_INVALID_PARAMETER;
#endif #endif
static void SetSingleAffinity() static void SetSingleAffinity()
@ -148,17 +147,10 @@ static s64 CPUSpeedHz( u64 time )
} }
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
int arr[] = {
0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865,
0x51203229,0x20646175,0x20555043,0x20202020 ,
0x20202020,0x20402020,0x36362e32,0x7a4847
};
void cpudetectInit() void cpudetectInit()
{ {
u32 regs[ 4 ]; u32 regs[ 4 ];
u32 cmds; u32 cmds;
int cputype=0; // Cpu type
//AMD 64 STUFF //AMD 64 STUFF
u32 x86_64_8BITBRANDID; u32 x86_64_8BITBRANDID;
u32 x86_64_12BITBRANDID; u32 x86_64_12BITBRANDID;
@ -180,7 +172,9 @@ void cpudetectInit()
((u32*)x86caps.VendorName)[ 1 ] = regs[ 3 ]; ((u32*)x86caps.VendorName)[ 1 ] = regs[ 3 ];
((u32*)x86caps.VendorName)[ 2 ] = regs[ 2 ]; ((u32*)x86caps.VendorName)[ 2 ] = regs[ 2 ];
// Hack - prevents reg[2] & reg[3] from being optimized out of existance! // Hack - prevents reg[2] & reg[3] from being optimized out of existence! (GCC only)
// FIXME: We use a better __cpuid now with proper inline asm constraints. This hack is
// probably obsolete. Linux devs please re-confirm. --air
num = sprintf(str, "\tx86Flags = %8.8x %8.8x\n", regs[3], regs[2]); num = sprintf(str, "\tx86Flags = %8.8x %8.8x\n", regs[3], regs[2]);
u32 LogicalCoresPerPhysicalCPU = 0; u32 LogicalCoresPerPhysicalCPU = 0;
@ -200,7 +194,9 @@ void cpudetectInit()
x86caps.Flags2 = regs[ 2 ]; x86caps.Flags2 = regs[ 2 ];
} }
} }
/* detect multicore for intel cpu */
// detect multicore for Intel cpu
if ((cmds >= 0x00000004) && !strcmp("GenuineIntel",x86caps.VendorName)) if ((cmds >= 0x00000004) && !strcmp("GenuineIntel",x86caps.VendorName))
{ {
if ( iCpuId( 0x00000004, regs ) != -1 ) if ( iCpuId( 0x00000004, regs ) != -1 )
@ -222,7 +218,9 @@ void cpudetectInit()
} }
} }
/* detect multicore for amd cpu */
// detect multicore for AMD cpu
if ((cmds >= 0x80000008) && !strcmp("AuthenticAMD",x86caps.VendorName)) if ((cmds >= 0x80000008) && !strcmp("AuthenticAMD",x86caps.VendorName))
{ {
if ( iCpuId( 0x80000008, regs ) != -1 ) if ( iCpuId( 0x80000008, regs ) != -1 )
@ -250,8 +248,22 @@ void cpudetectInit()
strcpy( x86caps.TypeName, "Unknown"); strcpy( x86caps.TypeName, "Unknown");
break; break;
} }
if ( x86caps.VendorName[ 0 ] == 'G' ){ cputype=0;}//trick lines but if you know a way better ;p
if ( x86caps.VendorName[ 0 ] == 'A' ){ cputype=1;} #if 0
// vendor identification, currently unneeded.
// It's really not recommended that we base much (if anything) on CPU vendor names.
// But the code is left in as an ifdef, for possible future reference.
int cputype=0; // Cpu type
static const char* Vendor_Intel = "GenuineIntel";
static const char* Vendor_AMD = "AuthenticAMD";
if( memcmp( x86caps.VendorName, Vendor_Intel, 12 ) == 0 ) { cputype = 0; } else
if( memcmp( x86caps.VendorName, Vendor_AMD, 12 ) == 0 ) { cputype = 1; }
if ( x86caps.VendorName[ 0 ] == 'G' ) { cputype = 0; }
if ( x86caps.VendorName[ 0 ] == 'A' ) { cputype = 1; }
#endif
memzero( x86caps.FamilyName ); memzero( x86caps.FamilyName );
iCpuId( 0x80000002, (u32*)x86caps.FamilyName); iCpuId( 0x80000002, (u32*)x86caps.FamilyName);
@ -311,6 +323,9 @@ void cpudetectInit()
x86caps.hasStreamingSIMD4Extensions = ( x86caps.Flags2 >> 19 ) & 1; //sse4.1 x86caps.hasStreamingSIMD4Extensions = ( x86caps.Flags2 >> 19 ) & 1; //sse4.1
x86caps.hasStreamingSIMD4Extensions2 = ( x86caps.Flags2 >> 20 ) & 1; //sse4.2 x86caps.hasStreamingSIMD4Extensions2 = ( x86caps.Flags2 >> 20 ) & 1; //sse4.2
static __pagealigned u8 recSSE[__pagesize];
HostSys::MemProtectStatic( recSSE, Protect_ReadWrite, true );
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// SIMD Instruction Support Detection // SIMD Instruction Support Detection
// //
@ -325,7 +340,6 @@ void cpudetectInit()
// detection relies on the CPUID bits alone. // detection relies on the CPUID bits alone.
#ifdef _MSC_VER #ifdef _MSC_VER
u8* recSSE = (u8*)HostSys::Mmap( NULL, 0x1000 );
if( recSSE != NULL ) if( recSSE != NULL )
{ {
xSetPtr( recSSE ); xSetPtr( recSSE );
@ -384,6 +398,25 @@ void cpudetectInit()
); );
} }
#endif #endif
////////////////////////////////////////////////////////////////////////////////////////////
// Establish MXCSR Mask...
if( x86caps.hasStreamingSIMDExtensions )
{
xSetPtr( recSSE );
xFXSAVE( ptr32[ecx] );
xRET();
u32 _fxsave[512/4];
memzero( _fxsave );
((void (__fastcall *)(u32*))&recSSE[0])( _fxsave );
if( _fxsave[28/4] == 0 )
MXCSR_Mask.bitmask = 0xFFBF;
else
MXCSR_Mask.bitmask = _fxsave[28/4];
}
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
// Core Counting! // Core Counting!

View File

@ -15,6 +15,55 @@
#include "PrecompiledHeader.h" #include "PrecompiledHeader.h"
#include "internal.h" #include "internal.h"
#include "tools.h"
// Mask of valid bit fields for the target CPU. Typically this is either 0xFFFF (SSE2
// or better) or 0xFFBF (SSE1 and earlier). Code can ensure a safe/valid MXCSR by
// AND'ing this mask against an MXCSR prior to LDMXCSR.
SSE_MXCSR MXCSR_Mask;
SSE_RoundMode SSE_MXCSR::GetRoundMode() const
{
return (SSE_RoundMode)RoundingControl;
}
SSE_MXCSR& SSE_MXCSR::SetRoundMode( SSE_RoundMode mode )
{
pxAssert( (uint)mode < 4 );
RoundingControl = (u32)mode;
return *this;
}
SSE_MXCSR& SSE_MXCSR::ClearExceptionFlags()
{
bitmask &= ~0x3f;
return *this;
}
SSE_MXCSR& SSE_MXCSR::EnableExceptions()
{
bitmask &= ~(0x3f << 7);
return *this;
}
SSE_MXCSR& SSE_MXCSR::DisableExceptions()
{
bitmask |= 0x3f << 7;
return *this;
}
// Applies the reserve bits mask for the current running cpu, as fetched from the CPU
// during CPU init/detection.
SSE_MXCSR& SSE_MXCSR::ApplyReserveMask()
{
bitmask &= MXCSR_Mask.bitmask;
return *this;
}
SSE_MXCSR::operator x86Emitter::ModSib32() const
{
return &bitmask;
}
namespace x86Emitter { namespace x86Emitter {
@ -451,19 +500,35 @@ __forceinline void xFEMMS() { xWrite16( 0x0E0F ); }
// Store Streaming SIMD Extension Control/Status to Mem32. // Store Streaming SIMD Extension Control/Status to Mem32.
__emitinline void xSTMXCSR( u32* dest ) __emitinline void xSTMXCSR( const ModSib32& dest )
{ {
SimdPrefix( 0, 0xae ); SimdPrefix( 0, 0xae );
EmitSibMagic( 3, dest ); EmitSibMagic( 3, dest );
} }
// Load Streaming SIMD Extension Control/Status from Mem32. // Load Streaming SIMD Extension Control/Status from Mem32.
__emitinline void xLDMXCSR( const u32* src ) __emitinline void xLDMXCSR( const ModSib32& src )
{ {
SimdPrefix( 0, 0xae ); SimdPrefix( 0, 0xae );
EmitSibMagic( 2, src ); EmitSibMagic( 2, src );
} }
// Save x87 FPU, MMX Technology, and SSE State to buffer
// Target buffer must be at least 512 bytes in length to hold the result.
__emitinline void xFXSAVE( const ModSib32& dest )
{
SimdPrefix( 0, 0xae );
EmitSibMagic( 0, dest );
}
// Restore x87 FPU, MMX , XMM, and MXCSR State.
// Source buffer should be 512 bytes in length.
__emitinline void xFXRSTOR( const ModSib32& src )
{
SimdPrefix( 0, 0xae );
EmitSibMagic( 0, src );
}
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// MMX Mov Instructions (MOVD, MOVQ, MOVSS). // MMX Mov Instructions (MOVD, MOVQ, MOVSS).
// //

View File

@ -45,5 +45,5 @@ extern wxString ShiftJIS_ConvertString( const char* src, int maxlen );
// Some homeless externs. This is as good a spot as any for now... // Some homeless externs. This is as good a spot as any for now...
extern void SetCPUState(u32 sseMXCSR, u32 sseVUMXCSR); extern void SetCPUState(SSE_MXCSR sseMXCSR, SSE_MXCSR sseVUMXCSR);
extern u32 g_sseVUMXCSR, g_sseMXCSR; extern SSE_MXCSR g_sseVUMXCSR, g_sseMXCSR;

View File

@ -15,6 +15,8 @@
#pragma once #pragma once
#include "x86emitter/tools.h"
class IniInterface; class IniInterface;
enum PluginsEnum_t enum PluginsEnum_t
@ -142,12 +144,13 @@ public:
{ {
RecompilerOptions Recompiler; RecompilerOptions Recompiler;
u32 sseMXCSR; SSE_MXCSR sseMXCSR;
u32 sseVUMXCSR; SSE_MXCSR sseVUMXCSR;
CpuOptions(); CpuOptions();
void LoadSave( IniInterface& conf ); void LoadSave( IniInterface& conf );
void ApplySanityCheck();
bool operator ==( const CpuOptions& right ) const bool operator ==( const CpuOptions& right ) const
{ {
return OpEqu( sseMXCSR ) && OpEqu( sseVUMXCSR ) && OpEqu( Recompiler ); return OpEqu( sseMXCSR ) && OpEqu( sseVUMXCSR ) && OpEqu( Recompiler );

View File

@ -876,7 +876,7 @@ void mmap_MarkCountedRamPage( u32 paddr )
); );
m_PageProtectInfo[rampage].Mode = ProtMode_Write; m_PageProtectInfo[rampage].Mode = ProtMode_Write;
HostSys::MemProtect( &psM[rampage<<12], 1, Protect_ReadOnly ); HostSys::MemProtect( &psM[rampage<<12], __pagesize, Protect_ReadOnly );
} }
// offset - offset of address relative to psM. // offset - offset of address relative to psM.
@ -889,7 +889,7 @@ static __forceinline void mmap_ClearCpuBlock( uint offset )
pxAssertMsg( m_PageProtectInfo[rampage].Mode != ProtMode_Manual, pxAssertMsg( m_PageProtectInfo[rampage].Mode != ProtMode_Manual,
"Attempted to clear a block that is already under manual protection." ); "Attempted to clear a block that is already under manual protection." );
HostSys::MemProtect( &psM[rampage<<12], 1, Protect_ReadWrite ); HostSys::MemProtect( &psM[rampage<<12], __pagesize, Protect_ReadWrite );
m_PageProtectInfo[rampage].Mode = ProtMode_Manual; m_PageProtectInfo[rampage].Mode = ProtMode_Manual;
Cpu->Clear( m_PageProtectInfo[rampage].ReverseRamMap, 0x400 ); Cpu->Clear( m_PageProtectInfo[rampage].ReverseRamMap, 0x400 );
} }

View File

@ -703,25 +703,24 @@ void patchFunc_roundmode( char * cmd, char * param )
int index; int index;
char * pText; char * pText;
u32 eetype = (EmuConfig.Cpu.sseMXCSR & 0x6000); SSE_RoundMode eetype = EmuConfig.Cpu.sseMXCSR.GetRoundMode();
u32 vutype = (EmuConfig.Cpu.sseVUMXCSR & 0x6000); SSE_RoundMode vutype = EmuConfig.Cpu.sseVUMXCSR.GetRoundMode();
index = 0; index = 0;
pText = strtok( param, ", " ); pText = strtok( param, ", " );
while(pText != NULL) while(pText != NULL)
{ {
u32 type = 0xffff; SSE_RoundMode type;
if( stricmp(pText, "near") == 0 ) if( stricmp(pText, "near") == 0 )
type = 0x0000; type = SSEround_Nearest;
else if( stricmp(pText, "down") == 0 ) else if( stricmp(pText, "down") == 0 )
type = 0x2000; type = SSEround_NegInf;
else if( stricmp(pText, "up") == 0 ) else if( stricmp(pText, "up") == 0 )
type = 0x4000; type = SSEround_PosInf;
else if( stricmp(pText, "chop") == 0 ) else if( stricmp(pText, "chop") == 0 )
type = 0x6000; type = SSEround_Chop;
else
if( type == 0xffff )
{ {
Console.WriteLn("bad argument (%s) to round mode! skipping...\n", pText); Console.WriteLn("bad argument (%s) to round mode! skipping...\n", pText);
break; break;
@ -747,7 +746,10 @@ void patchFunc_zerogs(char* cmd, char* param)
sscanf(param, "%x", &g_ZeroGSOptions); sscanf(param, "%x", &g_ZeroGSOptions);
} }
void SetRoundMode(u32 ee, u32 vu) void SetRoundMode(SSE_RoundMode ee, SSE_RoundMode vu)
{ {
SetCPUState( (EmuConfig.Cpu.sseMXCSR & 0x9fff) | ee, (EmuConfig.Cpu.sseVUMXCSR & 0x9fff) | vu); SSE_MXCSR mxfpu = EmuConfig.Cpu.sseMXCSR;
SSE_MXCSR mxvu = EmuConfig.Cpu.sseVUMXCSR;
SetCPUState( mxfpu.SetRoundMode( ee ), mxvu.SetRoundMode( vu ) );
} }

View File

@ -107,10 +107,8 @@ extern void SetFastMemory(int); // iR5900LoadStore.c
//extern int g_VUGameFixes; //extern int g_VUGameFixes;
extern int g_ZeroGSOptions; extern int g_ZeroGSOptions;
extern u32 g_sseMXCSR;
extern u32 g_sseVUMXCSR;
extern void SetRoundMode(u32 ee, u32 vu); extern void SetRoundMode(SSE_RoundMode ee, SSE_RoundMode vu);
extern int LoadPatch(const wxString& patchfile); extern int LoadPatch(const wxString& patchfile);
#endif /* __PATCH_H__ */ #endif /* __PATCH_H__ */

View File

@ -79,9 +79,15 @@ Pcsx2Config::RecompilerOptions::RecompilerOptions() : bitset(0)
void Pcsx2Config::RecompilerOptions::ApplySanityCheck() void Pcsx2Config::RecompilerOptions::ApplySanityCheck()
{ {
int fpuCount = (int)fpuOverflow + (int)fpuExtraOverflow + (int)fpuFullMode; bool fpuIsRight = true;
if( fpuCount > 1 ) if( fpuExtraOverflow )
fpuIsRight = fpuOverflow;
if( fpuFullMode )
fpuIsRight = !fpuOverflow && !fpuExtraOverflow;
if( !fpuIsRight )
{ {
// Values are wonky; assume the defaults. // Values are wonky; assume the defaults.
fpuOverflow = RecompilerOptions().fpuOverflow; fpuOverflow = RecompilerOptions().fpuOverflow;
@ -89,14 +95,18 @@ void Pcsx2Config::RecompilerOptions::ApplySanityCheck()
fpuFullMode = RecompilerOptions().fpuFullMode; fpuFullMode = RecompilerOptions().fpuFullMode;
} }
int vuCount = (int)vuOverflow + (int)vuExtraOverflow + (int)vuSignOverflow; bool vuIsOk = true;
if( fpuCount > 1 ) if( vuExtraOverflow ) vuIsOk = vuIsOk && vuOverflow;
if( vuSignOverflow ) vuIsOk = vuIsOk && vuExtraOverflow;
if( !vuIsOk )
{ {
// Values are wonky; assume the defaults. // Values are wonky; assume the defaults.
vuOverflow = RecompilerOptions().vuOverflow; vuOverflow = RecompilerOptions().vuOverflow;
vuExtraOverflow = RecompilerOptions().vuExtraOverflow; vuExtraOverflow = RecompilerOptions().vuExtraOverflow;
vuSignOverflow = RecompilerOptions().vuSignOverflow; vuSignOverflow = RecompilerOptions().vuSignOverflow;
vuUnderflow = RecompilerOptions().vuUnderflow;
} }
} }
@ -123,10 +133,18 @@ void Pcsx2Config::RecompilerOptions::LoadSave( IniInterface& ini )
IniBitBool( fpuFullMode ); IniBitBool( fpuFullMode );
} }
Pcsx2Config::CpuOptions::CpuOptions() : Pcsx2Config::CpuOptions::CpuOptions()
sseMXCSR( DEFAULT_sseMXCSR )
, sseVUMXCSR( DEFAULT_sseVUMXCSR )
{ {
sseMXCSR.bitmask = DEFAULT_sseMXCSR;
sseVUMXCSR.bitmask = DEFAULT_sseVUMXCSR;
}
void Pcsx2Config::CpuOptions::ApplySanityCheck()
{
sseMXCSR.ClearExceptionFlags().DisableExceptions();
sseVUMXCSR.ClearExceptionFlags().DisableExceptions();
Recompiler.ApplySanityCheck();
} }
void Pcsx2Config::CpuOptions::LoadSave( IniInterface& ini ) void Pcsx2Config::CpuOptions::LoadSave( IniInterface& ini )
@ -134,8 +152,13 @@ void Pcsx2Config::CpuOptions::LoadSave( IniInterface& ini )
CpuOptions defaults; CpuOptions defaults;
IniScopedGroup path( ini, L"CPU" ); IniScopedGroup path( ini, L"CPU" );
IniEntry( sseMXCSR ); IniBitBoolEx( sseMXCSR.DenormalsAreZero, "FPU.DenormalsAreZero" );
IniEntry( sseVUMXCSR ); IniBitBoolEx( sseMXCSR.FlushToZero, "FPU.FlushToZero" );
IniBitfieldEx( sseMXCSR.RoundingControl, "FPU.Roundmode" );
IniBitBoolEx( sseVUMXCSR.DenormalsAreZero, "VU.DenormalsAreZero" );
IniBitBoolEx( sseVUMXCSR.FlushToZero, "VU.FlushToZero" );
IniBitfieldEx( sseVUMXCSR.RoundingControl, "VU.Roundmode" );
Recompiler.LoadSave( ini ); Recompiler.LoadSave( ini );
} }

View File

@ -163,3 +163,6 @@ protected:
#define IniEntry( varname ) ini.Entry( wxT(#varname), varname, defaults.varname ) #define IniEntry( varname ) ini.Entry( wxT(#varname), varname, defaults.varname )
#define IniBitfield( varname ) varname = ini.EntryBitfield( wxT(#varname), varname, defaults.varname ) #define IniBitfield( varname ) varname = ini.EntryBitfield( wxT(#varname), varname, defaults.varname )
#define IniBitBool( varname ) varname = ini.EntryBitBool( wxT(#varname), !!varname, defaults.varname ) #define IniBitBool( varname ) varname = ini.EntryBitBool( wxT(#varname), !!varname, defaults.varname )
#define IniBitfieldEx( varname, textname ) varname = ini.EntryBitfield( wxT(textname), varname, defaults.varname )
#define IniBitBoolEx( varname, textname ) varname = ini.EntryBitBool( wxT(textname), !!varname, defaults.varname )

View File

@ -250,7 +250,7 @@ namespace Panels
protected: protected:
void OnRestoreDefaults( wxCommandEvent& evt ); void OnRestoreDefaults( wxCommandEvent& evt );
void ApplyRoundmode( u32& mxcsr ); void ApplyRoundmode( SSE_MXCSR& mxcsr );
}; };
class AdvancedOptionsFPU : public BaseAdvancedCpuOptions class AdvancedOptionsFPU : public BaseAdvancedCpuOptions

View File

@ -16,9 +16,6 @@
#include "PrecompiledHeader.h" #include "PrecompiledHeader.h"
#include "ConfigurationPanels.h" #include "ConfigurationPanels.h"
static const u32 MXCSR_DAZ = 0x0040; // bit enable for Denormals Are Zero
static const u32 MXCSR_FTZ = 0x8000; // bit enable for Flush to Zero
using namespace wxHelpers; using namespace wxHelpers;
Panels::BaseAdvancedCpuOptions::BaseAdvancedCpuOptions( wxWindow& parent, int idealWidth ) : Panels::BaseAdvancedCpuOptions::BaseAdvancedCpuOptions( wxWindow& parent, int idealWidth ) :
@ -87,10 +84,10 @@ Panels::AdvancedOptionsFPU::AdvancedOptionsFPU( wxWindow& parent, int idealWidth
Pcsx2Config::CpuOptions& cpuOps( g_Conf->EmuOptions.Cpu ); Pcsx2Config::CpuOptions& cpuOps( g_Conf->EmuOptions.Cpu );
Pcsx2Config::RecompilerOptions& recOps( cpuOps.Recompiler ); Pcsx2Config::RecompilerOptions& recOps( cpuOps.Recompiler );
m_Option_FTZ->SetValue( !!(cpuOps.sseMXCSR & MXCSR_FTZ) ); m_Option_FTZ->SetValue( cpuOps.sseMXCSR.FlushToZero );
m_Option_DAZ->SetValue( !!(cpuOps.sseMXCSR & MXCSR_DAZ) ); m_Option_DAZ->SetValue( cpuOps.sseMXCSR.DenormalsAreZero );
m_Option_Round[(cpuOps.sseMXCSR >> 13) & 3]->SetValue( true ); m_Option_Round[cpuOps.sseMXCSR.RoundingControl]->SetValue( true );
m_Option_Normal->SetValue( recOps.fpuOverflow ); m_Option_Normal->SetValue( recOps.fpuOverflow );
m_Option_ExtraSign->SetValue( recOps.fpuExtraOverflow ); m_Option_ExtraSign->SetValue( recOps.fpuExtraOverflow );
@ -110,10 +107,10 @@ Panels::AdvancedOptionsVU::AdvancedOptionsVU( wxWindow& parent, int idealWidth )
Pcsx2Config::CpuOptions& cpuOps( g_Conf->EmuOptions.Cpu ); Pcsx2Config::CpuOptions& cpuOps( g_Conf->EmuOptions.Cpu );
Pcsx2Config::RecompilerOptions& recOps( cpuOps.Recompiler ); Pcsx2Config::RecompilerOptions& recOps( cpuOps.Recompiler );
m_Option_FTZ->SetValue( !!(cpuOps.sseVUMXCSR & MXCSR_FTZ) ); m_Option_FTZ->SetValue( cpuOps.sseVUMXCSR.FlushToZero );
m_Option_DAZ->SetValue( !!(cpuOps.sseVUMXCSR & MXCSR_DAZ) ); m_Option_DAZ->SetValue( cpuOps.sseVUMXCSR.DenormalsAreZero );
m_Option_Round[(cpuOps.sseVUMXCSR >> 13) & 3]->SetValue( true ); m_Option_Round[cpuOps.sseVUMXCSR.RoundingControl]->SetValue( true );
m_Option_Normal->SetValue( recOps.vuOverflow ); m_Option_Normal->SetValue( recOps.vuOverflow );
m_Option_Extra->SetValue( recOps.vuExtraOverflow ); m_Option_Extra->SetValue( recOps.vuExtraOverflow );
@ -223,21 +220,19 @@ void Panels::CpuPanelVU::Apply()
recOps.UseMicroVU1 = m_Option_mVU1->GetValue(); recOps.UseMicroVU1 = m_Option_mVU1->GetValue();
} }
void Panels::BaseAdvancedCpuOptions::ApplyRoundmode( u32& mxcsr ) void Panels::BaseAdvancedCpuOptions::ApplyRoundmode( SSE_MXCSR& mxcsr )
{ {
mxcsr = 0;
for( int i=0; i<4; ++i ) for( int i=0; i<4; ++i )
{ {
if( m_Option_Round[i]->GetValue() ) if( m_Option_Round[i]->GetValue() )
{ {
mxcsr |= (i << 13); mxcsr.RoundingControl = i;
break; break;
} }
} }
if( m_Option_DAZ->GetValue() ) mxcsr |= MXCSR_DAZ; mxcsr.DenormalsAreZero = m_Option_DAZ->GetValue();
if( m_Option_FTZ->GetValue() ) mxcsr |= MXCSR_FTZ; mxcsr.FlushToZero = m_Option_FTZ->GetValue();
} }
void Panels::AdvancedOptionsFPU::Apply() void Panels::AdvancedOptionsFPU::Apply()
@ -245,13 +240,14 @@ void Panels::AdvancedOptionsFPU::Apply()
Pcsx2Config::CpuOptions& cpuOps( g_Conf->EmuOptions.Cpu ); Pcsx2Config::CpuOptions& cpuOps( g_Conf->EmuOptions.Cpu );
Pcsx2Config::RecompilerOptions& recOps( cpuOps.Recompiler ); Pcsx2Config::RecompilerOptions& recOps( cpuOps.Recompiler );
cpuOps.sseMXCSR = Pcsx2Config::CpuOptions().sseMXCSR; // set default
ApplyRoundmode( cpuOps.sseMXCSR ); ApplyRoundmode( cpuOps.sseMXCSR );
recOps.fpuOverflow = m_Option_Normal->GetValue();
recOps.fpuExtraOverflow = m_Option_ExtraSign->GetValue(); recOps.fpuExtraOverflow = m_Option_ExtraSign->GetValue();
recOps.fpuOverflow = m_Option_Normal->GetValue() || recOps.fpuExtraOverflow;
recOps.fpuFullMode = m_Option_Full->GetValue(); recOps.fpuFullMode = m_Option_Full->GetValue();
recOps.ApplySanityCheck(); cpuOps.ApplySanityCheck();
} }
void Panels::AdvancedOptionsVU::Apply() void Panels::AdvancedOptionsVU::Apply()
@ -259,11 +255,12 @@ void Panels::AdvancedOptionsVU::Apply()
Pcsx2Config::CpuOptions& cpuOps( g_Conf->EmuOptions.Cpu ); Pcsx2Config::CpuOptions& cpuOps( g_Conf->EmuOptions.Cpu );
Pcsx2Config::RecompilerOptions& recOps( cpuOps.Recompiler ); Pcsx2Config::RecompilerOptions& recOps( cpuOps.Recompiler );
cpuOps.sseVUMXCSR = Pcsx2Config::CpuOptions().sseVUMXCSR; // set default
ApplyRoundmode( cpuOps.sseVUMXCSR ); ApplyRoundmode( cpuOps.sseVUMXCSR );
recOps.vuOverflow = m_Option_Normal->GetValue();
recOps.vuExtraOverflow = m_Option_Extra->GetValue();
recOps.vuSignOverflow = m_Option_ExtraSign->GetValue(); recOps.vuSignOverflow = m_Option_ExtraSign->GetValue();
recOps.vuExtraOverflow = m_Option_Extra->GetValue() || recOps.vuSignOverflow;
recOps.vuOverflow = m_Option_Normal->GetValue() || recOps.vuExtraOverflow;
recOps.ApplySanityCheck(); cpuOps.ApplySanityCheck();
} }

View File

@ -21,6 +21,8 @@
#include "iR5900.h" #include "iR5900.h"
#include "iFPU.h" #include "iFPU.h"
using namespace x86Emitter;
//------------------------------------------------------------------ //------------------------------------------------------------------
namespace R5900 { namespace R5900 {
namespace Dynarec { namespace Dynarec {
@ -1100,7 +1102,7 @@ void recDIVhelper2(int regd, int regt) // Doesn't sets flags
ClampValues(regd); ClampValues(regd);
} }
static __aligned16 u32 roundmode_temp[4]; static __aligned16 SSE_MXCSR roundmode_nearest, roundmode_neg;
void recDIV_S_xmm(int info) void recDIV_S_xmm(int info)
{ {
@ -1109,12 +1111,23 @@ void recDIV_S_xmm(int info)
//if (t0reg == -1) {Console.Error("FPU: DIV Allocation Error!");} //if (t0reg == -1) {Console.Error("FPU: DIV Allocation Error!");}
//Console.WriteLn("DIV"); //Console.WriteLn("DIV");
if ((g_sseMXCSR & 0x00006000) != 0x00000000) { // Set roundmode to nearest if it isn't already if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest)
{
// Set roundmode to nearest since it isn't already
//Console.WriteLn("div to nearest"); //Console.WriteLn("div to nearest");
roundmode_temp[0] = (g_sseMXCSR & 0xFFFF9FFF); // Set new roundmode to nearest
roundmode_temp[1] = g_sseMXCSR; // Backup old Roundmode if( CHECK_FPUNEGDIVHACK )
if (CHECK_FPUNEGDIVHACK) roundmode_temp[0] |= 0x2000; // Negative Roundmode {
SSE_LDMXCSR ((uptr)&roundmode_temp[0]); // Recompile Roundmode Change roundmode_neg = g_sseMXCSR;
roundmode_neg.SetRoundMode( SSEround_NegInf );
xLDMXCSR( roundmode_neg );
}
else
{
roundmode_nearest = g_sseMXCSR;
roundmode_nearest.SetRoundMode( SSEround_Nearest );
xLDMXCSR( roundmode_nearest );
}
roundmodeFlag = 1; roundmodeFlag = 1;
} }
@ -1163,7 +1176,7 @@ void recDIV_S_xmm(int info)
break; break;
} }
if (roundmodeFlag == 1) { // Set roundmode back if it was changed if (roundmodeFlag == 1) { // Set roundmode back if it was changed
SSE_LDMXCSR ((uptr)&roundmode_temp[1]); xLDMXCSR (g_sseMXCSR);
} }
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
} }
@ -1663,15 +1676,17 @@ FPURECOMPILE_CONSTCODE(SUBA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT);
void recSQRT_S_xmm(int info) void recSQRT_S_xmm(int info)
{ {
u8* pjmp; u8* pjmp;
int roundmodeFlag = 0; bool roundmodeFlag = false;
//Console.WriteLn("FPU: SQRT"); //Console.WriteLn("FPU: SQRT");
if ((g_sseMXCSR & 0x00006000) != 0x00000000) { // Set roundmode to nearest if it isn't already if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest)
{
// Set roundmode to nearest if it isn't already
//Console.WriteLn("sqrt to nearest"); //Console.WriteLn("sqrt to nearest");
roundmode_temp[0] = (g_sseMXCSR & 0xFFFF9FFF); // Set new roundmode roundmode_nearest = g_sseMXCSR;
roundmode_temp[1] = g_sseMXCSR; // Backup old Roundmode roundmode_nearest.SetRoundMode( SSEround_Nearest );
SSE_LDMXCSR ((uptr)&roundmode_temp[0]); // Recompile Roundmode Change xLDMXCSR (roundmode_nearest);
roundmodeFlag = 1; roundmodeFlag = true;
} }
if( info & PROCESS_EE_T ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_T); if( info & PROCESS_EE_T ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_T);
@ -1699,9 +1714,7 @@ void recSQRT_S_xmm(int info)
SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_D); SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_D);
if (CHECK_FPU_EXTRA_OVERFLOW) ClampValues(EEREC_D); // Shouldn't need to clamp again since SQRT of a number will always be smaller than the original number, doing it just incase :/ if (CHECK_FPU_EXTRA_OVERFLOW) ClampValues(EEREC_D); // Shouldn't need to clamp again since SQRT of a number will always be smaller than the original number, doing it just incase :/
if (roundmodeFlag == 1) { // Set roundmode back if it was changed if (roundmodeFlag) xLDMXCSR (g_sseMXCSR);
SSE_LDMXCSR ((uptr)&roundmode_temp[1]);
}
} }
FPURECOMPILE_CONSTCODE(SQRT_S, XMMINFO_WRITED|XMMINFO_READT); FPURECOMPILE_CONSTCODE(SQRT_S, XMMINFO_WRITED|XMMINFO_READT);

View File

@ -26,6 +26,9 @@
/* Can be made faster by not converting stuff back and forth between instructions. */ /* Can be made faster by not converting stuff back and forth between instructions. */
using namespace x86Emitter;
//set overflow flag (set only if FPU_RESULT is 1) //set overflow flag (set only if FPU_RESULT is 1)
#define FPU_FLAGS_OVERFLOW 1 #define FPU_FLAGS_OVERFLOW 1
//set underflow flag (set only if FPU_RESULT is 1) //set underflow flag (set only if FPU_RESULT is 1)
@ -629,7 +632,7 @@ void recDIVhelper2(int regd, int regt) // Doesn't sets flags
ToPS2FPU(regd, false, regt, false); ToPS2FPU(regd, false, regt, false);
} }
static __aligned16 u32 roundmode_temp[4]; static __aligned16 SSE_MXCSR roundmode_nearest, roundmode_neg;
void recDIV_S_xmm(int info) void recDIV_S_xmm(int info)
{ {
@ -637,11 +640,23 @@ void recDIV_S_xmm(int info)
//if (t0reg == -1) {Console.Error("FPU: DIV Allocation Error!");} //if (t0reg == -1) {Console.Error("FPU: DIV Allocation Error!");}
//Console.WriteLn("DIV"); //Console.WriteLn("DIV");
if ((g_sseMXCSR & 0x00006000) != 0x00000000) { // Set roundmode to nearest if it isn't already if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest)
{
// Set roundmode to nearest since it isn't already
//Console.WriteLn("div to nearest"); //Console.WriteLn("div to nearest");
roundmode_temp[0] = (g_sseMXCSR & 0xFFFF9FFF); // Set new roundmode
roundmode_temp[1] = g_sseMXCSR; // Backup old Roundmode if( CHECK_FPUNEGDIVHACK )
SSE_LDMXCSR ((uptr)&roundmode_temp[0]); // Recompile Roundmode Change {
roundmode_neg = g_sseMXCSR;
roundmode_neg.SetRoundMode( SSEround_NegInf );
xLDMXCSR( roundmode_neg );
}
else
{
roundmode_nearest = g_sseMXCSR;
roundmode_nearest.SetRoundMode( SSEround_Nearest );
xLDMXCSR( roundmode_nearest );
}
roundmodeFlag = 1; roundmodeFlag = 1;
} }
@ -657,7 +672,7 @@ void recDIV_S_xmm(int info)
SSE_MOVSS_XMM_to_XMM(EEREC_D, sreg); SSE_MOVSS_XMM_to_XMM(EEREC_D, sreg);
if (roundmodeFlag == 1) { // Set roundmode back if it was changed if (roundmodeFlag == 1) { // Set roundmode back if it was changed
SSE_LDMXCSR ((uptr)&roundmode_temp[1]); xLDMXCSR (g_sseMXCSR);
} }
_freeXMMreg(sreg); _freeXMMreg(treg); _freeXMMreg(sreg); _freeXMMreg(treg);
} }
@ -902,11 +917,13 @@ void recSQRT_S_xmm(int info)
if (t1reg == -1) {Console.Error("FPU: SQRT Allocation Error!");} if (t1reg == -1) {Console.Error("FPU: SQRT Allocation Error!");}
//Console.WriteLn("FPU: SQRT"); //Console.WriteLn("FPU: SQRT");
if ((g_sseMXCSR & 0x00006000) != 0x00000000) { // Set roundmode to nearest if it isn't already if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest)
{
// Set roundmode to nearest if it isn't already
//Console.WriteLn("sqrt to nearest"); //Console.WriteLn("sqrt to nearest");
roundmode_temp[0] = (g_sseMXCSR & 0xFFFF9FFF); // Set new roundmode roundmode_nearest = g_sseMXCSR;
roundmode_temp[1] = g_sseMXCSR; // Backup old Roundmode roundmode_nearest.SetRoundMode( SSEround_Nearest );
SSE_LDMXCSR ((uptr)&roundmode_temp[0]); // Recompile Roundmode Change xLDMXCSR (roundmode_nearest);
roundmodeFlag = 1; roundmodeFlag = 1;
} }
@ -935,9 +952,10 @@ void recSQRT_S_xmm(int info)
ToPS2FPU(EEREC_D, false, t1reg, false); ToPS2FPU(EEREC_D, false, t1reg, false);
if (roundmodeFlag == 1) { // Set roundmode back if it was changed if (roundmodeFlag == 1) {
SSE_LDMXCSR ((uptr)&roundmode_temp[1]); xLDMXCSR (g_sseMXCSR);
} }
_freeX86reg(tempReg); _freeX86reg(tempReg);
_freeXMMreg(t1reg); _freeXMMreg(t1reg);
} }
@ -1020,13 +1038,15 @@ void recRSQRT_S_xmm(int info)
{ {
int sreg, treg; int sreg, treg;
int roundmodeFlag = 0; bool roundmodeFlag = false;
if ((g_sseMXCSR & 0x00006000) != 0x00000000) { // Set roundmode to nearest if it isn't already if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest)
//Console.WriteLn("rsqrt to nearest"); {
roundmode_temp[0] = (g_sseMXCSR & 0xFFFF9FFF); // Set new roundmode // Set roundmode to nearest if it isn't already
roundmode_temp[1] = g_sseMXCSR; // Backup old Roundmode //Console.WriteLn("sqrt to nearest");
SSE_LDMXCSR ((uptr)&roundmode_temp[0]); // Recompile Roundmode Change roundmode_nearest = g_sseMXCSR;
roundmodeFlag = 1; roundmode_nearest.SetRoundMode( SSEround_Nearest );
xLDMXCSR (roundmode_nearest);
roundmodeFlag = true;
} }
ALLOC_S(sreg); ALLOC_T(treg); ALLOC_S(sreg); ALLOC_T(treg);
@ -1040,9 +1060,7 @@ void recRSQRT_S_xmm(int info)
_freeXMMreg(treg); _freeXMMreg(sreg); _freeXMMreg(treg); _freeXMMreg(sreg);
if (roundmodeFlag == 1) { // Set roundmode back if it was changed if (roundmodeFlag) xLDMXCSR (g_sseMXCSR);
SSE_LDMXCSR ((uptr)&roundmode_temp[1]);
}
} }
FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);

View File

@ -16,38 +16,23 @@
#include "PrecompiledHeader.h" #include "PrecompiledHeader.h"
u32 g_sseMXCSR = DEFAULT_sseMXCSR; SSE_MXCSR g_sseMXCSR = { DEFAULT_sseMXCSR };
u32 g_sseVUMXCSR = DEFAULT_sseVUMXCSR; SSE_MXCSR g_sseVUMXCSR = { DEFAULT_sseVUMXCSR };
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// SetCPUState -- for assignment of SSE roundmodes and clampmodes. // SetCPUState -- for assignment of SSE roundmodes and clampmodes.
// //
void SetCPUState(u32 sseMXCSR, u32 sseVUMXCSR) void SetCPUState(SSE_MXCSR sseMXCSR, SSE_MXCSR sseVUMXCSR)
{ {
//Msgbox::Alert("SetCPUState: Config.sseMXCSR = %x; Config.sseVUMXCSR = %x \n", Config.sseMXCSR, Config.sseVUMXCSR); //Msgbox::Alert("SetCPUState: Config.sseMXCSR = %x; Config.sseVUMXCSR = %x \n", Config.sseMXCSR, Config.sseVUMXCSR);
// SSE STATE //
// WARNING: do not touch unless you know what you are doing
sseMXCSR &= 0xffff; // clear the upper 16 bits since they shouldn't be set g_sseMXCSR = sseMXCSR.ApplyReserveMask();
sseVUMXCSR &= 0xffff; g_sseVUMXCSR = sseVUMXCSR.ApplyReserveMask();
if( !x86caps.hasStreamingSIMD2Extensions )
{
// SSE1 cpus do not support Denormals Are Zero flag (throws an exception
// if we don't mask them off)
sseMXCSR &= ~0x0040;
sseVUMXCSR &= ~0x0040;
}
g_sseMXCSR = sseMXCSR;
g_sseVUMXCSR = sseVUMXCSR;
#ifdef _MSC_VER #ifdef _MSC_VER
__asm ldmxcsr g_sseMXCSR; // set the new sse control __asm ldmxcsr g_sseMXCSR; // set the new sse control
#else #else
__asm__ __volatile__("ldmxcsr %[g_sseMXCSR]" : : [g_sseMXCSR]"m"(g_sseMXCSR) ); __asm__ __volatile__("ldmxcsr %[g_sseMXCSR]" : : [g_sseMXCSR]"m"(g_sseMXCSR) );
#endif #endif
//g_sseVUMXCSR = g_sseMXCSR|0x6000;
} }

View File

@ -108,7 +108,7 @@ static void __fastcall iopRecRecompile( const u32 startpc );
static u32 s_store_ebp, s_store_esp; static u32 s_store_ebp, s_store_esp;
// Recompiled code buffer for EE recompiler dispatchers! // Recompiled code buffer for EE recompiler dispatchers!
static u8 __pagealigned iopRecDispatchers[0x1000]; static u8 __pagealigned iopRecDispatchers[__pagesize];
typedef void DynGenFunc(); typedef void DynGenFunc();
@ -134,7 +134,7 @@ static void __fastcall StackFrameCheckFailed( int espORebp, int regval )
{ {
pxFailDev( wxsFormat( L"(R3000A Recompiler Stackframe) Sanity check failed on %s\n\tCurrent=%d; Saved=%d", pxFailDev( wxsFormat( L"(R3000A Recompiler Stackframe) Sanity check failed on %s\n\tCurrent=%d; Saved=%d",
(espORebp==0) ? L"ESP" : L"EBP", regval, (espORebp==0) ? s_store_esp : s_store_ebp ) (espORebp==0) ? L"ESP" : L"EBP", regval, (espORebp==0) ? s_store_esp : s_store_ebp )
); );
// Note: The recompiler will attempt to recover ESP and EBP after returning from this function, // Note: The recompiler will attempt to recover ESP and EBP after returning from this function,
// so typically selecting Continue/Ignore/Cancel for this assertion should allow PCSX2 to con- // so typically selecting Continue/Ignore/Cancel for this assertion should allow PCSX2 to con-
@ -348,10 +348,10 @@ static DynGenFunc* _DynGen_EnterRecompiledCode()
static void _DynGen_Dispatchers() static void _DynGen_Dispatchers()
{ {
// In case init gets called multiple times: // In case init gets called multiple times:
HostSys::MemProtect( iopRecDispatchers, 0x1000, Protect_ReadWrite, false ); HostSys::MemProtectStatic( iopRecDispatchers, Protect_ReadWrite, false );
// clear the buffer to 0xcc (easier debugging). // clear the buffer to 0xcc (easier debugging).
memset_8<0xcc,0x1000>( iopRecDispatchers ); memset_8<0xcc,__pagesize>( iopRecDispatchers );
xSetPtr( iopRecDispatchers ); xSetPtr( iopRecDispatchers );
@ -365,7 +365,7 @@ static void _DynGen_Dispatchers()
iopJITCompileInBlock = _DynGen_JITCompileInBlock(); iopJITCompileInBlock = _DynGen_JITCompileInBlock();
iopEnterRecompiledCode = _DynGen_EnterRecompiledCode(); iopEnterRecompiledCode = _DynGen_EnterRecompiledCode();
HostSys::MemProtect( iopRecDispatchers, 0x1000, Protect_ReadOnly, true ); HostSys::MemProtectStatic( iopRecDispatchers, Protect_ReadOnly, true );
recBlocks.SetJITCompile( iopJITCompile ); recBlocks.SetJITCompile( iopJITCompile );
} }

View File

@ -299,7 +299,7 @@ static u32 g_lastpc = 0;
static u32 s_store_ebp, s_store_esp; static u32 s_store_ebp, s_store_esp;
// Recompiled code buffer for EE recompiler dispatchers! // Recompiled code buffer for EE recompiler dispatchers!
static u8 __pagealigned eeRecDispatchers[0x1000]; static u8 __pagealigned eeRecDispatchers[__pagesize];
typedef void DynGenFunc(); typedef void DynGenFunc();
@ -460,10 +460,10 @@ static DynGenFunc* _DynGen_EnterRecompiledCode()
static void _DynGen_Dispatchers() static void _DynGen_Dispatchers()
{ {
// In case init gets called multiple times: // In case init gets called multiple times:
HostSys::MemProtect( eeRecDispatchers, 0x1000, Protect_ReadWrite, false ); HostSys::MemProtectStatic( eeRecDispatchers, Protect_ReadWrite, false );
// clear the buffer to 0xcc (easier debugging). // clear the buffer to 0xcc (easier debugging).
memset_8<0xcc,0x1000>( eeRecDispatchers ); memset_8<0xcc,__pagesize>( eeRecDispatchers );
xSetPtr( eeRecDispatchers ); xSetPtr( eeRecDispatchers );
@ -477,7 +477,7 @@ static void _DynGen_Dispatchers()
JITCompileInBlock = _DynGen_JITCompileInBlock(); JITCompileInBlock = _DynGen_JITCompileInBlock();
EnterRecompiledCode = _DynGen_EnterRecompiledCode(); EnterRecompiledCode = _DynGen_EnterRecompiledCode();
HostSys::MemProtect( eeRecDispatchers, 0x1000, Protect_ReadOnly, true ); HostSys::MemProtectStatic( eeRecDispatchers, Protect_ReadOnly, true );
recBlocks.SetJITCompile( JITCompile ); recBlocks.SetJITCompile( JITCompile );
} }
@ -630,7 +630,6 @@ void recResetEE( void )
x86FpuState = FPU_STATE; x86FpuState = FPU_STATE;
branch = 0; branch = 0;
SetCPUState(EmuConfig.Cpu.sseMXCSR, EmuConfig.Cpu.sseVUMXCSR);
eeRecIsReset = true; eeRecIsReset = true;
} }
@ -689,6 +688,7 @@ static void recExecute()
{ {
eeRecIsReset = false; eeRecIsReset = false;
g_EEFreezeRegs = true; g_EEFreezeRegs = true;
SetCPUState(g_sseMXCSR, g_sseVUMXCSR);
try { try {
EnterRecompiledCode(); EnterRecompiledCode();
@ -719,6 +719,7 @@ static void recExecute()
pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, &oldstate ); pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, &oldstate );
eeRecIsReset = false; eeRecIsReset = false;
SetCPUState(g_sseMXCSR, g_sseVUMXCSR);
#ifdef _WIN32 #ifdef _WIN32
__try { __try {

View File

@ -243,7 +243,7 @@ namespace vtlb_private
// If it were smaller than a page we'd end up allowing execution rights on some // If it were smaller than a page we'd end up allowing execution rights on some
// other vars additionally (bad!). // other vars additionally (bad!).
// //
static __pagealigned u8 m_IndirectDispatchers[0x1000]; static __pagealigned u8 m_IndirectDispatchers[__pagesize];
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
// mode - 0 for read, 1 for write! // mode - 0 for read, 1 for write!
@ -286,7 +286,7 @@ static void DynGen_IndirectDispatch( int mode, int bits )
void vtlb_dynarec_init() void vtlb_dynarec_init()
{ {
// In case init gets called multiple times: // In case init gets called multiple times:
HostSys::MemProtect( m_IndirectDispatchers, 0x1000, Protect_ReadWrite, false ); HostSys::MemProtectStatic( m_IndirectDispatchers, Protect_ReadWrite, false );
// clear the buffer to 0xcc (easier debugging). // clear the buffer to 0xcc (easier debugging).
memset_8<0xcc,0x1000>( m_IndirectDispatchers ); memset_8<0xcc,0x1000>( m_IndirectDispatchers );
@ -310,7 +310,7 @@ void vtlb_dynarec_init()
} }
} }
HostSys::MemProtect( m_IndirectDispatchers, 0x1000, Protect_ReadOnly, true ); HostSys::MemProtectStatic( m_IndirectDispatchers, Protect_ReadOnly, true );
} }
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////

View File

@ -39,7 +39,7 @@ void mVUdispatcherA(mV) {
else { xCALL(mVUexecuteVU1); } else { xCALL(mVUexecuteVU1); }
// Load VU's MXCSR state // Load VU's MXCSR state
xLDMXCSR(&g_sseVUMXCSR); xLDMXCSR(g_sseVUMXCSR);
// Load Regs // Load Regs
#ifdef CHECK_MACROVU0 #ifdef CHECK_MACROVU0
@ -72,7 +72,7 @@ void mVUdispatcherB(mV) {
mVU->exitFunct = x86Ptr; mVU->exitFunct = x86Ptr;
// Load EE's MXCSR state // Load EE's MXCSR state
xLDMXCSR(&g_sseMXCSR); xLDMXCSR(g_sseMXCSR);
// __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers;
// all other arguments are passed right to left. // all other arguments are passed right to left.

View File

@ -156,7 +156,7 @@ extern const __aligned(32) mVU_Globals mVUglob;
// This *probably* fixes the crashing bugs in linux when using the optimized memcmp. // This *probably* fixes the crashing bugs in linux when using the optimized memcmp.
// Needs testing... --air // Needs testing... --air
#ifndef __LINUX__ #ifndef __LINUX__
extern __pagealigned u8 mVUsearchXMM[0x1000]; extern __pagealigned u8 mVUsearchXMM[__pagesize];
typedef u32 (__fastcall *mVUCall)(void*, void*); typedef u32 (__fastcall *mVUCall)(void*, void*);
#define mVUquickSearch(dest, src, size) ((((mVUCall)((void*)mVUsearchXMM))(dest, src)) == 0xf) #define mVUquickSearch(dest, src, size) ((((mVUCall)((void*)mVUsearchXMM))(dest, src)) == 0xf)
#define mVUemitSearch() { mVUcustomSearch(); } #define mVUemitSearch() { mVUcustomSearch(); }

View File

@ -515,13 +515,13 @@ void SSE_DIVSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
// Micro VU - Custom Quick Search // Micro VU - Custom Quick Search
//------------------------------------------------------------------ //------------------------------------------------------------------
static __pagealigned u8 mVUsearchXMM[0x1000]; static __pagealigned u8 mVUsearchXMM[__pagesize];
// Generates a custom optimized block-search function // Generates a custom optimized block-search function
// Note: Structs must be 16-byte aligned! (GCC doesn't guarantee this) // Note: Structs must be 16-byte aligned! (GCC doesn't guarantee this)
void mVUcustomSearch() { void mVUcustomSearch() {
HostSys::MemProtect(mVUsearchXMM, 0x1000, Protect_ReadWrite, false); HostSys::MemProtectStatic(mVUsearchXMM, Protect_ReadWrite, false);
memset_8<0xcc,0x1000>(mVUsearchXMM); memset_8<0xcc,__pagesize>(mVUsearchXMM);
xSetPtr(mVUsearchXMM); xSetPtr(mVUsearchXMM);
xMOVAPS (xmm0, ptr32[ecx]); xMOVAPS (xmm0, ptr32[ecx]);
@ -565,5 +565,5 @@ void mVUcustomSearch() {
exitPoint.SetTarget(); exitPoint.SetTarget();
xRET(); xRET();
HostSys::MemProtect(mVUsearchXMM, 0x1000, Protect_ReadOnly, true); HostSys::MemProtectStatic(mVUsearchXMM, Protect_ReadOnly, true);
} }