Rewrote internal handling of SSE roundmodes and DAZ/FTZ (fixes major crash bugs of the prev revision).

* Added SSE_MXCSR union/struct with bitfields and methods for doing common actions.
 * Converted all existing MXCSR code to use the new union.
 * Added a __pagesize macro for use in conjunction with __pagealigned and dynarec functions.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2113 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-11-02 07:00:59 +00:00
parent 629aad9944
commit 94222f4aaf
28 changed files with 395 additions and 156 deletions

View File

@ -176,11 +176,16 @@
// overhead). Furthermore, compilers cannot inline functions that have aligned local
// vars. So use local var alignment with much caution.
//
// Defines the memory page size for the target platform at compilation. All supported platforms
// (which means Intel only right now) have a 4k granularity.
#define __pagesize 0x1000
#ifdef _MSC_VER
# define __aligned(alig) __declspec(align(alig))
# define __aligned16 __declspec(align(16))
# define __pagealigned __declspec(align(0x1000))
# define __pagealigned __declspec(align(__pagesize))
// Deprecated; use __align instead.
# define PCSX2_ALIGNED(alig,x) __declspec(align(alig)) x
@ -227,7 +232,7 @@ This theoretically unoptimizes. Not having much luck so far.
# define __aligned(alig) __attribute__((aligned(alig)))
# define __aligned16 __attribute__((aligned(16)))
# define __pagealigned __attribute__((aligned(0x1000)))
# define __pagealigned __attribute__((aligned(__pagesize)))
// Deprecated; use __align instead.
# define PCSX2_ALIGNED(alig,x) x __attribute((aligned(alig)))
# define PCSX2_ALIGNED16(x) x __attribute((aligned(16)))

View File

@ -65,6 +65,12 @@ namespace HostSys
extern void MemProtect( void* baseaddr, size_t size, PageProtectionMode mode, bool allowExecution=false );
extern void Munmap( void* base, u32 size );
template< uint size >
void MemProtectStatic( u8 (&arr)[size], PageProtectionMode mode, bool allowExecution=false )
{
MemProtect( arr, size, mode, allowExecution );
}
}

View File

@ -338,8 +338,10 @@ namespace x86Emitter
// ------------------------------------------------------------------------
extern void xEMMS();
extern void xSTMXCSR( u32* dest );
extern void xLDMXCSR( const u32* src );
extern void xSTMXCSR( const ModSib32& dest );
extern void xLDMXCSR( const ModSib32& src );
extern void xFXSAVE( const ModSib32& dest );
extern void xFXRSTOR( const ModSib32& src );
extern void xMOVDZX( const xRegisterSSE& to, const xRegister32& from );
extern void xMOVDZX( const xRegisterSSE& to, const ModSibBase& src );

View File

@ -21,8 +21,9 @@
extern void cpudetectInit();
// --------------------------------------------------------------------------------------
struct x86CPU_INFO
// x86CPU_INFO
// --------------------------------------------------------------------------------------
struct x86CPU_INFO
{
u32 FamilyID; // Processor Family
u32 Model; // Processor Model
@ -91,6 +92,75 @@ struct x86CPU_INFO
u32 hasStreamingSIMD4ExtensionsA:1;
};
enum SSE_RoundMode
{
SSEround_Nearest = 0,
SSEround_NegInf,
SSEround_PosInf,
SSEround_Chop,
};
// --------------------------------------------------------------------------------------
// SSE_MXCSR - Control/Status Register (bitfield)
// --------------------------------------------------------------------------------------
// Bits 0-5 are exception flags; used only if SSE exceptions have been enabled.
// Bits in this field are "sticky" and, once an exception has occured, must be manually
// cleared using LDMXCSR or FXRSTOR.
//
// Bits 7-12 are the masks for disabling the exceptions in bits 0-5. Cleared bits allow
// exceptions, set bits mask exceptions from being raised.
//
union SSE_MXCSR
{
u32 bitmask;
struct
{
u32
InvalidOpFlag :1,
DenormalFlag :1,
DivideByZeroFlag :1,
OverflowFlag :1,
UnderflowFlag :1,
PrecisionFlag :1,
// This bit is supported only on SSE2 or better CPUs. Setting it to 1 on
// SSE1 cpus will result in an invalid instruction exception when executing
// LDMXSCR.
DenormalsAreZero :1,
InvalidOpMask :1,
DenormalMask :1,
DivideByZeroMask :1,
OverflowMask :1,
UnderflowMask :1,
PrecisionMask :1,
RoundingControl :2,
FlushToZero :1;
};
SSE_RoundMode GetRoundMode() const;
SSE_MXCSR& SetRoundMode( SSE_RoundMode mode );
SSE_MXCSR& ClearExceptionFlags();
SSE_MXCSR& EnableExceptions();
SSE_MXCSR& DisableExceptions();
SSE_MXCSR& ApplyReserveMask();
bool operator ==( const SSE_MXCSR& right ) const
{
return bitmask == right.bitmask;
}
bool operator !=( const SSE_MXCSR& right ) const
{
return bitmask != right.bitmask;
}
operator x86Emitter::ModSib32() const;
};
extern SSE_MXCSR MXCSR_Mask;
//////////////////////////////////////////////////////////////////////////////////////////

View File

@ -64,7 +64,7 @@
# define CallAddress( ptr ) \
__asm{ call offset ptr }
# define FastCallAddress( ptr, param ) \
# define FastCallAddress( ptr, param1 ) \
__asm{ __asm mov ecx, param1 __asm call offset ptr }
# define FastCallAddress2( ptr, param1, param2 ) \
@ -75,8 +75,8 @@
# define CallAddress( ptr ) \
( (void (*)()) &(ptr)[0] )()
# define FastCallAddress( ptr, param ) \
( (void (*)( int )) &(ptr)[0] )( param )
# define FastCallAddress( ptr, param1 ) \
( (void (*)( int )) &(ptr)[0] )( param1 )
# define FastCallAddress2( ptr, param1, param2 ) \
( (void (*)( int, int )) &(ptr)[0] )( param1, param2 )

View File

@ -15,6 +15,8 @@
#pragma once
#include "Utilities/Dependencies.h"
// Register counts for x86/32 mode:
static const uint iREGCNT_XMM = 8;
static const uint iREGCNT_GPR = 8;

View File

@ -39,9 +39,16 @@ namespace HostSys
void MemProtect( void* baseaddr, size_t size, PageProtectionMode mode, bool allowExecution )
{
pxAssertDev( ((size & ~__pagesize) == 0), wxsFormat(
L"Memory block size must be a multiple of the target platform's page size.\n"
L"\tPage Size: 0x%04x (%d), Block Size: 0x%04x (%d)",
__pagesize, __pagesize, size, size )
);
int lnxmode = 0;
// make sure size is aligned to the system page size:
// Check is redundant against the assertion above, but might as well...
size = (size + m_pagemask) & ~m_pagemask;
switch( mode )

View File

@ -33,6 +33,12 @@ namespace HostSys
void MemProtect( void* baseaddr, size_t size, PageProtectionMode mode, bool allowExecution )
{
pxAssertDev( ((size & (__pagesize-1)) == 0), wxsFormat(
L"Memory block size must be a multiple of the target platform's page size.\n"
L"\tPage Size: 0x%04x (%d), Block Size: 0x%04x (%d)",
__pagesize, __pagesize, size, size )
);
DWORD winmode = 0;
switch( mode )

View File

@ -21,7 +21,6 @@
#include "internal.h"
#include "tools.h"
using namespace x86Emitter;
__aligned16 x86CPU_INFO x86caps;
@ -73,8 +72,8 @@ static char* bool_to_char( bool testcond )
#endif
#ifdef _WINDOWS_
static HANDLE s_threadId = NULL;
static DWORD s_oldmask = ERROR_INVALID_PARAMETER;
static HANDLE s_threadId = NULL;
static DWORD s_oldmask = ERROR_INVALID_PARAMETER;
#endif
static void SetSingleAffinity()
@ -148,17 +147,10 @@ static s64 CPUSpeedHz( u64 time )
}
////////////////////////////////////////////////////
int arr[] = {
0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865,
0x51203229,0x20646175,0x20555043,0x20202020 ,
0x20202020,0x20402020,0x36362e32,0x7a4847
};
void cpudetectInit()
{
u32 regs[ 4 ];
u32 cmds;
int cputype=0; // Cpu type
//AMD 64 STUFF
u32 x86_64_8BITBRANDID;
u32 x86_64_12BITBRANDID;
@ -180,7 +172,9 @@ void cpudetectInit()
((u32*)x86caps.VendorName)[ 1 ] = regs[ 3 ];
((u32*)x86caps.VendorName)[ 2 ] = regs[ 2 ];
// Hack - prevents reg[2] & reg[3] from being optimized out of existance!
// Hack - prevents reg[2] & reg[3] from being optimized out of existence! (GCC only)
// FIXME: We use a better __cpuid now with proper inline asm constraints. This hack is
// probably obsolete. Linux devs please re-confirm. --air
num = sprintf(str, "\tx86Flags = %8.8x %8.8x\n", regs[3], regs[2]);
u32 LogicalCoresPerPhysicalCPU = 0;
@ -200,7 +194,9 @@ void cpudetectInit()
x86caps.Flags2 = regs[ 2 ];
}
}
/* detect multicore for intel cpu */
// detect multicore for Intel cpu
if ((cmds >= 0x00000004) && !strcmp("GenuineIntel",x86caps.VendorName))
{
if ( iCpuId( 0x00000004, regs ) != -1 )
@ -222,7 +218,9 @@ void cpudetectInit()
}
}
/* detect multicore for amd cpu */
// detect multicore for AMD cpu
if ((cmds >= 0x80000008) && !strcmp("AuthenticAMD",x86caps.VendorName))
{
if ( iCpuId( 0x80000008, regs ) != -1 )
@ -250,8 +248,22 @@ void cpudetectInit()
strcpy( x86caps.TypeName, "Unknown");
break;
}
if ( x86caps.VendorName[ 0 ] == 'G' ){ cputype=0;}//trick lines but if you know a way better ;p
if ( x86caps.VendorName[ 0 ] == 'A' ){ cputype=1;}
#if 0
// vendor identification, currently unneeded.
// It's really not recommended that we base much (if anything) on CPU vendor names.
// But the code is left in as an ifdef, for possible future reference.
int cputype=0; // Cpu type
static const char* Vendor_Intel = "GenuineIntel";
static const char* Vendor_AMD = "AuthenticAMD";
if( memcmp( x86caps.VendorName, Vendor_Intel, 12 ) == 0 ) { cputype = 0; } else
if( memcmp( x86caps.VendorName, Vendor_AMD, 12 ) == 0 ) { cputype = 1; }
if ( x86caps.VendorName[ 0 ] == 'G' ) { cputype = 0; }
if ( x86caps.VendorName[ 0 ] == 'A' ) { cputype = 1; }
#endif
memzero( x86caps.FamilyName );
iCpuId( 0x80000002, (u32*)x86caps.FamilyName);
@ -311,6 +323,9 @@ void cpudetectInit()
x86caps.hasStreamingSIMD4Extensions = ( x86caps.Flags2 >> 19 ) & 1; //sse4.1
x86caps.hasStreamingSIMD4Extensions2 = ( x86caps.Flags2 >> 20 ) & 1; //sse4.2
static __pagealigned u8 recSSE[__pagesize];
HostSys::MemProtectStatic( recSSE, Protect_ReadWrite, true );
//////////////////////////////////////////////////////////////////////////////////////////
// SIMD Instruction Support Detection
//
@ -325,7 +340,6 @@ void cpudetectInit()
// detection relies on the CPUID bits alone.
#ifdef _MSC_VER
u8* recSSE = (u8*)HostSys::Mmap( NULL, 0x1000 );
if( recSSE != NULL )
{
xSetPtr( recSSE );
@ -384,6 +398,25 @@ void cpudetectInit()
);
}
#endif
////////////////////////////////////////////////////////////////////////////////////////////
// Establish MXCSR Mask...
if( x86caps.hasStreamingSIMDExtensions )
{
xSetPtr( recSSE );
xFXSAVE( ptr32[ecx] );
xRET();
u32 _fxsave[512/4];
memzero( _fxsave );
((void (__fastcall *)(u32*))&recSSE[0])( _fxsave );
if( _fxsave[28/4] == 0 )
MXCSR_Mask.bitmask = 0xFFBF;
else
MXCSR_Mask.bitmask = _fxsave[28/4];
}
////////////////////////////////////////////////////////////////////////////////////////////
// Core Counting!

View File

@ -15,6 +15,55 @@
#include "PrecompiledHeader.h"
#include "internal.h"
#include "tools.h"
// Mask of valid bit fields for the target CPU. Typically this is either 0xFFFF (SSE2
// or better) or 0xFFBF (SSE1 and earlier). Code can ensure a safe/valid MXCSR by
// AND'ing this mask against an MXCSR prior to LDMXCSR.
SSE_MXCSR MXCSR_Mask;
SSE_RoundMode SSE_MXCSR::GetRoundMode() const
{
return (SSE_RoundMode)RoundingControl;
}
SSE_MXCSR& SSE_MXCSR::SetRoundMode( SSE_RoundMode mode )
{
pxAssert( (uint)mode < 4 );
RoundingControl = (u32)mode;
return *this;
}
SSE_MXCSR& SSE_MXCSR::ClearExceptionFlags()
{
bitmask &= ~0x3f;
return *this;
}
SSE_MXCSR& SSE_MXCSR::EnableExceptions()
{
bitmask &= ~(0x3f << 7);
return *this;
}
SSE_MXCSR& SSE_MXCSR::DisableExceptions()
{
bitmask |= 0x3f << 7;
return *this;
}
// Applies the reserve bits mask for the current running cpu, as fetched from the CPU
// during CPU init/detection.
SSE_MXCSR& SSE_MXCSR::ApplyReserveMask()
{
bitmask &= MXCSR_Mask.bitmask;
return *this;
}
SSE_MXCSR::operator x86Emitter::ModSib32() const
{
return &bitmask;
}
namespace x86Emitter {
@ -451,19 +500,35 @@ __forceinline void xFEMMS() { xWrite16( 0x0E0F ); }
// Store Streaming SIMD Extension Control/Status to Mem32.
__emitinline void xSTMXCSR( u32* dest )
__emitinline void xSTMXCSR( const ModSib32& dest )
{
SimdPrefix( 0, 0xae );
EmitSibMagic( 3, dest );
}
// Load Streaming SIMD Extension Control/Status from Mem32.
__emitinline void xLDMXCSR( const u32* src )
__emitinline void xLDMXCSR( const ModSib32& src )
{
SimdPrefix( 0, 0xae );
EmitSibMagic( 2, src );
}
// Save x87 FPU, MMX Technology, and SSE State to buffer
// Target buffer must be at least 512 bytes in length to hold the result.
__emitinline void xFXSAVE( const ModSib32& dest )
{
SimdPrefix( 0, 0xae );
EmitSibMagic( 0, dest );
}
// Restore x87 FPU, MMX , XMM, and MXCSR State.
// Source buffer should be 512 bytes in length.
__emitinline void xFXRSTOR( const ModSib32& src )
{
SimdPrefix( 0, 0xae );
EmitSibMagic( 0, src );
}
//////////////////////////////////////////////////////////////////////////////////////////
// MMX Mov Instructions (MOVD, MOVQ, MOVSS).
//

View File

@ -45,5 +45,5 @@ extern wxString ShiftJIS_ConvertString( const char* src, int maxlen );
// Some homeless externs. This is as good a spot as any for now...
extern void SetCPUState(u32 sseMXCSR, u32 sseVUMXCSR);
extern u32 g_sseVUMXCSR, g_sseMXCSR;
extern void SetCPUState(SSE_MXCSR sseMXCSR, SSE_MXCSR sseVUMXCSR);
extern SSE_MXCSR g_sseVUMXCSR, g_sseMXCSR;

View File

@ -15,6 +15,8 @@
#pragma once
#include "x86emitter/tools.h"
class IniInterface;
enum PluginsEnum_t
@ -142,12 +144,13 @@ public:
{
RecompilerOptions Recompiler;
u32 sseMXCSR;
u32 sseVUMXCSR;
SSE_MXCSR sseMXCSR;
SSE_MXCSR sseVUMXCSR;
CpuOptions();
void LoadSave( IniInterface& conf );
void ApplySanityCheck();
bool operator ==( const CpuOptions& right ) const
{
return OpEqu( sseMXCSR ) && OpEqu( sseVUMXCSR ) && OpEqu( Recompiler );

View File

@ -876,7 +876,7 @@ void mmap_MarkCountedRamPage( u32 paddr )
);
m_PageProtectInfo[rampage].Mode = ProtMode_Write;
HostSys::MemProtect( &psM[rampage<<12], 1, Protect_ReadOnly );
HostSys::MemProtect( &psM[rampage<<12], __pagesize, Protect_ReadOnly );
}
// offset - offset of address relative to psM.
@ -889,7 +889,7 @@ static __forceinline void mmap_ClearCpuBlock( uint offset )
pxAssertMsg( m_PageProtectInfo[rampage].Mode != ProtMode_Manual,
"Attempted to clear a block that is already under manual protection." );
HostSys::MemProtect( &psM[rampage<<12], 1, Protect_ReadWrite );
HostSys::MemProtect( &psM[rampage<<12], __pagesize, Protect_ReadWrite );
m_PageProtectInfo[rampage].Mode = ProtMode_Manual;
Cpu->Clear( m_PageProtectInfo[rampage].ReverseRamMap, 0x400 );
}

View File

@ -703,25 +703,24 @@ void patchFunc_roundmode( char * cmd, char * param )
int index;
char * pText;
u32 eetype = (EmuConfig.Cpu.sseMXCSR & 0x6000);
u32 vutype = (EmuConfig.Cpu.sseVUMXCSR & 0x6000);
SSE_RoundMode eetype = EmuConfig.Cpu.sseMXCSR.GetRoundMode();
SSE_RoundMode vutype = EmuConfig.Cpu.sseVUMXCSR.GetRoundMode();
index = 0;
pText = strtok( param, ", " );
while(pText != NULL)
{
u32 type = 0xffff;
SSE_RoundMode type;
if( stricmp(pText, "near") == 0 )
type = 0x0000;
type = SSEround_Nearest;
else if( stricmp(pText, "down") == 0 )
type = 0x2000;
type = SSEround_NegInf;
else if( stricmp(pText, "up") == 0 )
type = 0x4000;
type = SSEround_PosInf;
else if( stricmp(pText, "chop") == 0 )
type = 0x6000;
if( type == 0xffff )
type = SSEround_Chop;
else
{
Console.WriteLn("bad argument (%s) to round mode! skipping...\n", pText);
break;
@ -747,7 +746,10 @@ void patchFunc_zerogs(char* cmd, char* param)
sscanf(param, "%x", &g_ZeroGSOptions);
}
void SetRoundMode(u32 ee, u32 vu)
void SetRoundMode(SSE_RoundMode ee, SSE_RoundMode vu)
{
SetCPUState( (EmuConfig.Cpu.sseMXCSR & 0x9fff) | ee, (EmuConfig.Cpu.sseVUMXCSR & 0x9fff) | vu);
SSE_MXCSR mxfpu = EmuConfig.Cpu.sseMXCSR;
SSE_MXCSR mxvu = EmuConfig.Cpu.sseVUMXCSR;
SetCPUState( mxfpu.SetRoundMode( ee ), mxvu.SetRoundMode( vu ) );
}

View File

@ -107,10 +107,8 @@ extern void SetFastMemory(int); // iR5900LoadStore.c
//extern int g_VUGameFixes;
extern int g_ZeroGSOptions;
extern u32 g_sseMXCSR;
extern u32 g_sseVUMXCSR;
extern void SetRoundMode(u32 ee, u32 vu);
extern void SetRoundMode(SSE_RoundMode ee, SSE_RoundMode vu);
extern int LoadPatch(const wxString& patchfile);
#endif /* __PATCH_H__ */

View File

@ -79,9 +79,15 @@ Pcsx2Config::RecompilerOptions::RecompilerOptions() : bitset(0)
void Pcsx2Config::RecompilerOptions::ApplySanityCheck()
{
int fpuCount = (int)fpuOverflow + (int)fpuExtraOverflow + (int)fpuFullMode;
if( fpuCount > 1 )
bool fpuIsRight = true;
if( fpuExtraOverflow )
fpuIsRight = fpuOverflow;
if( fpuFullMode )
fpuIsRight = !fpuOverflow && !fpuExtraOverflow;
if( !fpuIsRight )
{
// Values are wonky; assume the defaults.
fpuOverflow = RecompilerOptions().fpuOverflow;
@ -89,14 +95,18 @@ void Pcsx2Config::RecompilerOptions::ApplySanityCheck()
fpuFullMode = RecompilerOptions().fpuFullMode;
}
int vuCount = (int)vuOverflow + (int)vuExtraOverflow + (int)vuSignOverflow;
bool vuIsOk = true;
if( fpuCount > 1 )
if( vuExtraOverflow ) vuIsOk = vuIsOk && vuOverflow;
if( vuSignOverflow ) vuIsOk = vuIsOk && vuExtraOverflow;
if( !vuIsOk )
{
// Values are wonky; assume the defaults.
vuOverflow = RecompilerOptions().vuOverflow;
vuExtraOverflow = RecompilerOptions().vuExtraOverflow;
vuSignOverflow = RecompilerOptions().vuSignOverflow;
vuUnderflow = RecompilerOptions().vuUnderflow;
}
}
@ -123,10 +133,18 @@ void Pcsx2Config::RecompilerOptions::LoadSave( IniInterface& ini )
IniBitBool( fpuFullMode );
}
Pcsx2Config::CpuOptions::CpuOptions() :
sseMXCSR( DEFAULT_sseMXCSR )
, sseVUMXCSR( DEFAULT_sseVUMXCSR )
Pcsx2Config::CpuOptions::CpuOptions()
{
sseMXCSR.bitmask = DEFAULT_sseMXCSR;
sseVUMXCSR.bitmask = DEFAULT_sseVUMXCSR;
}
void Pcsx2Config::CpuOptions::ApplySanityCheck()
{
sseMXCSR.ClearExceptionFlags().DisableExceptions();
sseVUMXCSR.ClearExceptionFlags().DisableExceptions();
Recompiler.ApplySanityCheck();
}
void Pcsx2Config::CpuOptions::LoadSave( IniInterface& ini )
@ -134,8 +152,13 @@ void Pcsx2Config::CpuOptions::LoadSave( IniInterface& ini )
CpuOptions defaults;
IniScopedGroup path( ini, L"CPU" );
IniEntry( sseMXCSR );
IniEntry( sseVUMXCSR );
IniBitBoolEx( sseMXCSR.DenormalsAreZero, "FPU.DenormalsAreZero" );
IniBitBoolEx( sseMXCSR.FlushToZero, "FPU.FlushToZero" );
IniBitfieldEx( sseMXCSR.RoundingControl, "FPU.Roundmode" );
IniBitBoolEx( sseVUMXCSR.DenormalsAreZero, "VU.DenormalsAreZero" );
IniBitBoolEx( sseVUMXCSR.FlushToZero, "VU.FlushToZero" );
IniBitfieldEx( sseVUMXCSR.RoundingControl, "VU.Roundmode" );
Recompiler.LoadSave( ini );
}

View File

@ -163,3 +163,6 @@ protected:
#define IniEntry( varname ) ini.Entry( wxT(#varname), varname, defaults.varname )
#define IniBitfield( varname ) varname = ini.EntryBitfield( wxT(#varname), varname, defaults.varname )
#define IniBitBool( varname ) varname = ini.EntryBitBool( wxT(#varname), !!varname, defaults.varname )
#define IniBitfieldEx( varname, textname ) varname = ini.EntryBitfield( wxT(textname), varname, defaults.varname )
#define IniBitBoolEx( varname, textname ) varname = ini.EntryBitBool( wxT(textname), !!varname, defaults.varname )

View File

@ -250,7 +250,7 @@ namespace Panels
protected:
void OnRestoreDefaults( wxCommandEvent& evt );
void ApplyRoundmode( u32& mxcsr );
void ApplyRoundmode( SSE_MXCSR& mxcsr );
};
class AdvancedOptionsFPU : public BaseAdvancedCpuOptions

View File

@ -16,9 +16,6 @@
#include "PrecompiledHeader.h"
#include "ConfigurationPanels.h"
static const u32 MXCSR_DAZ = 0x0040; // bit enable for Denormals Are Zero
static const u32 MXCSR_FTZ = 0x8000; // bit enable for Flush to Zero
using namespace wxHelpers;
Panels::BaseAdvancedCpuOptions::BaseAdvancedCpuOptions( wxWindow& parent, int idealWidth ) :
@ -87,10 +84,10 @@ Panels::AdvancedOptionsFPU::AdvancedOptionsFPU( wxWindow& parent, int idealWidth
Pcsx2Config::CpuOptions& cpuOps( g_Conf->EmuOptions.Cpu );
Pcsx2Config::RecompilerOptions& recOps( cpuOps.Recompiler );
m_Option_FTZ->SetValue( !!(cpuOps.sseMXCSR & MXCSR_FTZ) );
m_Option_DAZ->SetValue( !!(cpuOps.sseMXCSR & MXCSR_DAZ) );
m_Option_FTZ->SetValue( cpuOps.sseMXCSR.FlushToZero );
m_Option_DAZ->SetValue( cpuOps.sseMXCSR.DenormalsAreZero );
m_Option_Round[(cpuOps.sseMXCSR >> 13) & 3]->SetValue( true );
m_Option_Round[cpuOps.sseMXCSR.RoundingControl]->SetValue( true );
m_Option_Normal->SetValue( recOps.fpuOverflow );
m_Option_ExtraSign->SetValue( recOps.fpuExtraOverflow );
@ -110,10 +107,10 @@ Panels::AdvancedOptionsVU::AdvancedOptionsVU( wxWindow& parent, int idealWidth )
Pcsx2Config::CpuOptions& cpuOps( g_Conf->EmuOptions.Cpu );
Pcsx2Config::RecompilerOptions& recOps( cpuOps.Recompiler );
m_Option_FTZ->SetValue( !!(cpuOps.sseVUMXCSR & MXCSR_FTZ) );
m_Option_DAZ->SetValue( !!(cpuOps.sseVUMXCSR & MXCSR_DAZ) );
m_Option_FTZ->SetValue( cpuOps.sseVUMXCSR.FlushToZero );
m_Option_DAZ->SetValue( cpuOps.sseVUMXCSR.DenormalsAreZero );
m_Option_Round[(cpuOps.sseVUMXCSR >> 13) & 3]->SetValue( true );
m_Option_Round[cpuOps.sseVUMXCSR.RoundingControl]->SetValue( true );
m_Option_Normal->SetValue( recOps.vuOverflow );
m_Option_Extra->SetValue( recOps.vuExtraOverflow );
@ -223,21 +220,19 @@ void Panels::CpuPanelVU::Apply()
recOps.UseMicroVU1 = m_Option_mVU1->GetValue();
}
void Panels::BaseAdvancedCpuOptions::ApplyRoundmode( u32& mxcsr )
void Panels::BaseAdvancedCpuOptions::ApplyRoundmode( SSE_MXCSR& mxcsr )
{
mxcsr = 0;
for( int i=0; i<4; ++i )
{
if( m_Option_Round[i]->GetValue() )
{
mxcsr |= (i << 13);
mxcsr.RoundingControl = i;
break;
}
}
if( m_Option_DAZ->GetValue() ) mxcsr |= MXCSR_DAZ;
if( m_Option_FTZ->GetValue() ) mxcsr |= MXCSR_FTZ;
mxcsr.DenormalsAreZero = m_Option_DAZ->GetValue();
mxcsr.FlushToZero = m_Option_FTZ->GetValue();
}
void Panels::AdvancedOptionsFPU::Apply()
@ -245,13 +240,14 @@ void Panels::AdvancedOptionsFPU::Apply()
Pcsx2Config::CpuOptions& cpuOps( g_Conf->EmuOptions.Cpu );
Pcsx2Config::RecompilerOptions& recOps( cpuOps.Recompiler );
cpuOps.sseMXCSR = Pcsx2Config::CpuOptions().sseMXCSR; // set default
ApplyRoundmode( cpuOps.sseMXCSR );
recOps.fpuOverflow = m_Option_Normal->GetValue();
recOps.fpuExtraOverflow = m_Option_ExtraSign->GetValue();
recOps.fpuOverflow = m_Option_Normal->GetValue() || recOps.fpuExtraOverflow;
recOps.fpuFullMode = m_Option_Full->GetValue();
recOps.ApplySanityCheck();
cpuOps.ApplySanityCheck();
}
void Panels::AdvancedOptionsVU::Apply()
@ -259,11 +255,12 @@ void Panels::AdvancedOptionsVU::Apply()
Pcsx2Config::CpuOptions& cpuOps( g_Conf->EmuOptions.Cpu );
Pcsx2Config::RecompilerOptions& recOps( cpuOps.Recompiler );
cpuOps.sseVUMXCSR = Pcsx2Config::CpuOptions().sseVUMXCSR; // set default
ApplyRoundmode( cpuOps.sseVUMXCSR );
recOps.vuOverflow = m_Option_Normal->GetValue();
recOps.vuExtraOverflow = m_Option_Extra->GetValue();
recOps.vuSignOverflow = m_Option_ExtraSign->GetValue();
recOps.vuExtraOverflow = m_Option_Extra->GetValue() || recOps.vuSignOverflow;
recOps.vuOverflow = m_Option_Normal->GetValue() || recOps.vuExtraOverflow;
recOps.ApplySanityCheck();
cpuOps.ApplySanityCheck();
}

View File

@ -21,6 +21,8 @@
#include "iR5900.h"
#include "iFPU.h"
using namespace x86Emitter;
//------------------------------------------------------------------
namespace R5900 {
namespace Dynarec {
@ -1100,7 +1102,7 @@ void recDIVhelper2(int regd, int regt) // Doesn't sets flags
ClampValues(regd);
}
static __aligned16 u32 roundmode_temp[4];
static __aligned16 SSE_MXCSR roundmode_nearest, roundmode_neg;
void recDIV_S_xmm(int info)
{
@ -1109,12 +1111,23 @@ void recDIV_S_xmm(int info)
//if (t0reg == -1) {Console.Error("FPU: DIV Allocation Error!");}
//Console.WriteLn("DIV");
if ((g_sseMXCSR & 0x00006000) != 0x00000000) { // Set roundmode to nearest if it isn't already
if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest)
{
// Set roundmode to nearest since it isn't already
//Console.WriteLn("div to nearest");
roundmode_temp[0] = (g_sseMXCSR & 0xFFFF9FFF); // Set new roundmode to nearest
roundmode_temp[1] = g_sseMXCSR; // Backup old Roundmode
if (CHECK_FPUNEGDIVHACK) roundmode_temp[0] |= 0x2000; // Negative Roundmode
SSE_LDMXCSR ((uptr)&roundmode_temp[0]); // Recompile Roundmode Change
if( CHECK_FPUNEGDIVHACK )
{
roundmode_neg = g_sseMXCSR;
roundmode_neg.SetRoundMode( SSEround_NegInf );
xLDMXCSR( roundmode_neg );
}
else
{
roundmode_nearest = g_sseMXCSR;
roundmode_nearest.SetRoundMode( SSEround_Nearest );
xLDMXCSR( roundmode_nearest );
}
roundmodeFlag = 1;
}
@ -1163,7 +1176,7 @@ void recDIV_S_xmm(int info)
break;
}
if (roundmodeFlag == 1) { // Set roundmode back if it was changed
SSE_LDMXCSR ((uptr)&roundmode_temp[1]);
xLDMXCSR (g_sseMXCSR);
}
_freeXMMreg(t0reg);
}
@ -1663,15 +1676,17 @@ FPURECOMPILE_CONSTCODE(SUBA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT);
void recSQRT_S_xmm(int info)
{
u8* pjmp;
int roundmodeFlag = 0;
bool roundmodeFlag = false;
//Console.WriteLn("FPU: SQRT");
if ((g_sseMXCSR & 0x00006000) != 0x00000000) { // Set roundmode to nearest if it isn't already
if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest)
{
// Set roundmode to nearest if it isn't already
//Console.WriteLn("sqrt to nearest");
roundmode_temp[0] = (g_sseMXCSR & 0xFFFF9FFF); // Set new roundmode
roundmode_temp[1] = g_sseMXCSR; // Backup old Roundmode
SSE_LDMXCSR ((uptr)&roundmode_temp[0]); // Recompile Roundmode Change
roundmodeFlag = 1;
roundmode_nearest = g_sseMXCSR;
roundmode_nearest.SetRoundMode( SSEround_Nearest );
xLDMXCSR (roundmode_nearest);
roundmodeFlag = true;
}
if( info & PROCESS_EE_T ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_T);
@ -1699,9 +1714,7 @@ void recSQRT_S_xmm(int info)
SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_D);
if (CHECK_FPU_EXTRA_OVERFLOW) ClampValues(EEREC_D); // Shouldn't need to clamp again since SQRT of a number will always be smaller than the original number, doing it just incase :/
if (roundmodeFlag == 1) { // Set roundmode back if it was changed
SSE_LDMXCSR ((uptr)&roundmode_temp[1]);
}
if (roundmodeFlag) xLDMXCSR (g_sseMXCSR);
}
FPURECOMPILE_CONSTCODE(SQRT_S, XMMINFO_WRITED|XMMINFO_READT);

View File

@ -26,6 +26,9 @@
/* Can be made faster by not converting stuff back and forth between instructions. */
using namespace x86Emitter;
//set overflow flag (set only if FPU_RESULT is 1)
#define FPU_FLAGS_OVERFLOW 1
//set underflow flag (set only if FPU_RESULT is 1)
@ -629,7 +632,7 @@ void recDIVhelper2(int regd, int regt) // Doesn't sets flags
ToPS2FPU(regd, false, regt, false);
}
static __aligned16 u32 roundmode_temp[4];
static __aligned16 SSE_MXCSR roundmode_nearest, roundmode_neg;
void recDIV_S_xmm(int info)
{
@ -637,11 +640,23 @@ void recDIV_S_xmm(int info)
//if (t0reg == -1) {Console.Error("FPU: DIV Allocation Error!");}
//Console.WriteLn("DIV");
if ((g_sseMXCSR & 0x00006000) != 0x00000000) { // Set roundmode to nearest if it isn't already
if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest)
{
// Set roundmode to nearest since it isn't already
//Console.WriteLn("div to nearest");
roundmode_temp[0] = (g_sseMXCSR & 0xFFFF9FFF); // Set new roundmode
roundmode_temp[1] = g_sseMXCSR; // Backup old Roundmode
SSE_LDMXCSR ((uptr)&roundmode_temp[0]); // Recompile Roundmode Change
if( CHECK_FPUNEGDIVHACK )
{
roundmode_neg = g_sseMXCSR;
roundmode_neg.SetRoundMode( SSEround_NegInf );
xLDMXCSR( roundmode_neg );
}
else
{
roundmode_nearest = g_sseMXCSR;
roundmode_nearest.SetRoundMode( SSEround_Nearest );
xLDMXCSR( roundmode_nearest );
}
roundmodeFlag = 1;
}
@ -657,7 +672,7 @@ void recDIV_S_xmm(int info)
SSE_MOVSS_XMM_to_XMM(EEREC_D, sreg);
if (roundmodeFlag == 1) { // Set roundmode back if it was changed
SSE_LDMXCSR ((uptr)&roundmode_temp[1]);
xLDMXCSR (g_sseMXCSR);
}
_freeXMMreg(sreg); _freeXMMreg(treg);
}
@ -902,11 +917,13 @@ void recSQRT_S_xmm(int info)
if (t1reg == -1) {Console.Error("FPU: SQRT Allocation Error!");}
//Console.WriteLn("FPU: SQRT");
if ((g_sseMXCSR & 0x00006000) != 0x00000000) { // Set roundmode to nearest if it isn't already
if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest)
{
// Set roundmode to nearest if it isn't already
//Console.WriteLn("sqrt to nearest");
roundmode_temp[0] = (g_sseMXCSR & 0xFFFF9FFF); // Set new roundmode
roundmode_temp[1] = g_sseMXCSR; // Backup old Roundmode
SSE_LDMXCSR ((uptr)&roundmode_temp[0]); // Recompile Roundmode Change
roundmode_nearest = g_sseMXCSR;
roundmode_nearest.SetRoundMode( SSEround_Nearest );
xLDMXCSR (roundmode_nearest);
roundmodeFlag = 1;
}
@ -935,9 +952,10 @@ void recSQRT_S_xmm(int info)
ToPS2FPU(EEREC_D, false, t1reg, false);
if (roundmodeFlag == 1) { // Set roundmode back if it was changed
SSE_LDMXCSR ((uptr)&roundmode_temp[1]);
if (roundmodeFlag == 1) {
xLDMXCSR (g_sseMXCSR);
}
_freeX86reg(tempReg);
_freeXMMreg(t1reg);
}
@ -1020,13 +1038,15 @@ void recRSQRT_S_xmm(int info)
{
int sreg, treg;
int roundmodeFlag = 0;
if ((g_sseMXCSR & 0x00006000) != 0x00000000) { // Set roundmode to nearest if it isn't already
//Console.WriteLn("rsqrt to nearest");
roundmode_temp[0] = (g_sseMXCSR & 0xFFFF9FFF); // Set new roundmode
roundmode_temp[1] = g_sseMXCSR; // Backup old Roundmode
SSE_LDMXCSR ((uptr)&roundmode_temp[0]); // Recompile Roundmode Change
roundmodeFlag = 1;
bool roundmodeFlag = false;
if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest)
{
// Set roundmode to nearest if it isn't already
//Console.WriteLn("sqrt to nearest");
roundmode_nearest = g_sseMXCSR;
roundmode_nearest.SetRoundMode( SSEround_Nearest );
xLDMXCSR (roundmode_nearest);
roundmodeFlag = true;
}
ALLOC_S(sreg); ALLOC_T(treg);
@ -1040,9 +1060,7 @@ void recRSQRT_S_xmm(int info)
_freeXMMreg(treg); _freeXMMreg(sreg);
if (roundmodeFlag == 1) { // Set roundmode back if it was changed
SSE_LDMXCSR ((uptr)&roundmode_temp[1]);
}
if (roundmodeFlag) xLDMXCSR (g_sseMXCSR);
}
FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);

View File

@ -16,38 +16,23 @@
#include "PrecompiledHeader.h"
u32 g_sseMXCSR = DEFAULT_sseMXCSR;
u32 g_sseVUMXCSR = DEFAULT_sseVUMXCSR;
SSE_MXCSR g_sseMXCSR = { DEFAULT_sseMXCSR };
SSE_MXCSR g_sseVUMXCSR = { DEFAULT_sseVUMXCSR };
//////////////////////////////////////////////////////////////////////////////////////////
// SetCPUState -- for assignment of SSE roundmodes and clampmodes.
//
void SetCPUState(u32 sseMXCSR, u32 sseVUMXCSR)
void SetCPUState(SSE_MXCSR sseMXCSR, SSE_MXCSR sseVUMXCSR)
{
//Msgbox::Alert("SetCPUState: Config.sseMXCSR = %x; Config.sseVUMXCSR = %x \n", Config.sseMXCSR, Config.sseVUMXCSR);
// SSE STATE //
// WARNING: do not touch unless you know what you are doing
sseMXCSR &= 0xffff; // clear the upper 16 bits since they shouldn't be set
sseVUMXCSR &= 0xffff;
if( !x86caps.hasStreamingSIMD2Extensions )
{
// SSE1 cpus do not support Denormals Are Zero flag (throws an exception
// if we don't mask them off)
sseMXCSR &= ~0x0040;
sseVUMXCSR &= ~0x0040;
}
g_sseMXCSR = sseMXCSR;
g_sseVUMXCSR = sseVUMXCSR;
g_sseMXCSR = sseMXCSR.ApplyReserveMask();
g_sseVUMXCSR = sseVUMXCSR.ApplyReserveMask();
#ifdef _MSC_VER
__asm ldmxcsr g_sseMXCSR; // set the new sse control
#else
__asm__ __volatile__("ldmxcsr %[g_sseMXCSR]" : : [g_sseMXCSR]"m"(g_sseMXCSR) );
#endif
//g_sseVUMXCSR = g_sseMXCSR|0x6000;
}

View File

@ -108,7 +108,7 @@ static void __fastcall iopRecRecompile( const u32 startpc );
static u32 s_store_ebp, s_store_esp;
// Recompiled code buffer for EE recompiler dispatchers!
static u8 __pagealigned iopRecDispatchers[0x1000];
static u8 __pagealigned iopRecDispatchers[__pagesize];
typedef void DynGenFunc();
@ -134,7 +134,7 @@ static void __fastcall StackFrameCheckFailed( int espORebp, int regval )
{
pxFailDev( wxsFormat( L"(R3000A Recompiler Stackframe) Sanity check failed on %s\n\tCurrent=%d; Saved=%d",
(espORebp==0) ? L"ESP" : L"EBP", regval, (espORebp==0) ? s_store_esp : s_store_ebp )
);
);
// Note: The recompiler will attempt to recover ESP and EBP after returning from this function,
// so typically selecting Continue/Ignore/Cancel for this assertion should allow PCSX2 to con-
@ -348,10 +348,10 @@ static DynGenFunc* _DynGen_EnterRecompiledCode()
static void _DynGen_Dispatchers()
{
// In case init gets called multiple times:
HostSys::MemProtect( iopRecDispatchers, 0x1000, Protect_ReadWrite, false );
HostSys::MemProtectStatic( iopRecDispatchers, Protect_ReadWrite, false );
// clear the buffer to 0xcc (easier debugging).
memset_8<0xcc,0x1000>( iopRecDispatchers );
memset_8<0xcc,__pagesize>( iopRecDispatchers );
xSetPtr( iopRecDispatchers );
@ -365,7 +365,7 @@ static void _DynGen_Dispatchers()
iopJITCompileInBlock = _DynGen_JITCompileInBlock();
iopEnterRecompiledCode = _DynGen_EnterRecompiledCode();
HostSys::MemProtect( iopRecDispatchers, 0x1000, Protect_ReadOnly, true );
HostSys::MemProtectStatic( iopRecDispatchers, Protect_ReadOnly, true );
recBlocks.SetJITCompile( iopJITCompile );
}

View File

@ -299,7 +299,7 @@ static u32 g_lastpc = 0;
static u32 s_store_ebp, s_store_esp;
// Recompiled code buffer for EE recompiler dispatchers!
static u8 __pagealigned eeRecDispatchers[0x1000];
static u8 __pagealigned eeRecDispatchers[__pagesize];
typedef void DynGenFunc();
@ -460,10 +460,10 @@ static DynGenFunc* _DynGen_EnterRecompiledCode()
static void _DynGen_Dispatchers()
{
// In case init gets called multiple times:
HostSys::MemProtect( eeRecDispatchers, 0x1000, Protect_ReadWrite, false );
HostSys::MemProtectStatic( eeRecDispatchers, Protect_ReadWrite, false );
// clear the buffer to 0xcc (easier debugging).
memset_8<0xcc,0x1000>( eeRecDispatchers );
memset_8<0xcc,__pagesize>( eeRecDispatchers );
xSetPtr( eeRecDispatchers );
@ -477,7 +477,7 @@ static void _DynGen_Dispatchers()
JITCompileInBlock = _DynGen_JITCompileInBlock();
EnterRecompiledCode = _DynGen_EnterRecompiledCode();
HostSys::MemProtect( eeRecDispatchers, 0x1000, Protect_ReadOnly, true );
HostSys::MemProtectStatic( eeRecDispatchers, Protect_ReadOnly, true );
recBlocks.SetJITCompile( JITCompile );
}
@ -630,7 +630,6 @@ void recResetEE( void )
x86FpuState = FPU_STATE;
branch = 0;
SetCPUState(EmuConfig.Cpu.sseMXCSR, EmuConfig.Cpu.sseVUMXCSR);
eeRecIsReset = true;
}
@ -689,6 +688,7 @@ static void recExecute()
{
eeRecIsReset = false;
g_EEFreezeRegs = true;
SetCPUState(g_sseMXCSR, g_sseVUMXCSR);
try {
EnterRecompiledCode();
@ -719,6 +719,7 @@ static void recExecute()
pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, &oldstate );
eeRecIsReset = false;
SetCPUState(g_sseMXCSR, g_sseVUMXCSR);
#ifdef _WIN32
__try {

View File

@ -243,7 +243,7 @@ namespace vtlb_private
// If it were smaller than a page we'd end up allowing execution rights on some
// other vars additionally (bad!).
//
static __pagealigned u8 m_IndirectDispatchers[0x1000];
static __pagealigned u8 m_IndirectDispatchers[__pagesize];
// ------------------------------------------------------------------------
// mode - 0 for read, 1 for write!
@ -286,7 +286,7 @@ static void DynGen_IndirectDispatch( int mode, int bits )
void vtlb_dynarec_init()
{
// In case init gets called multiple times:
HostSys::MemProtect( m_IndirectDispatchers, 0x1000, Protect_ReadWrite, false );
HostSys::MemProtectStatic( m_IndirectDispatchers, Protect_ReadWrite, false );
// clear the buffer to 0xcc (easier debugging).
memset_8<0xcc,0x1000>( m_IndirectDispatchers );
@ -310,7 +310,7 @@ void vtlb_dynarec_init()
}
}
HostSys::MemProtect( m_IndirectDispatchers, 0x1000, Protect_ReadOnly, true );
HostSys::MemProtectStatic( m_IndirectDispatchers, Protect_ReadOnly, true );
}
//////////////////////////////////////////////////////////////////////////////////////////

View File

@ -39,7 +39,7 @@ void mVUdispatcherA(mV) {
else { xCALL(mVUexecuteVU1); }
// Load VU's MXCSR state
xLDMXCSR(&g_sseVUMXCSR);
xLDMXCSR(g_sseVUMXCSR);
// Load Regs
#ifdef CHECK_MACROVU0
@ -72,7 +72,7 @@ void mVUdispatcherB(mV) {
mVU->exitFunct = x86Ptr;
// Load EE's MXCSR state
xLDMXCSR(&g_sseMXCSR);
xLDMXCSR(g_sseMXCSR);
// __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers;
// all other arguments are passed right to left.

View File

@ -156,7 +156,7 @@ extern const __aligned(32) mVU_Globals mVUglob;
// This *probably* fixes the crashing bugs in linux when using the optimized memcmp.
// Needs testing... --air
#ifndef __LINUX__
extern __pagealigned u8 mVUsearchXMM[0x1000];
extern __pagealigned u8 mVUsearchXMM[__pagesize];
typedef u32 (__fastcall *mVUCall)(void*, void*);
#define mVUquickSearch(dest, src, size) ((((mVUCall)((void*)mVUsearchXMM))(dest, src)) == 0xf)
#define mVUemitSearch() { mVUcustomSearch(); }

View File

@ -515,13 +515,13 @@ void SSE_DIVSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
// Micro VU - Custom Quick Search
//------------------------------------------------------------------
static __pagealigned u8 mVUsearchXMM[0x1000];
static __pagealigned u8 mVUsearchXMM[__pagesize];
// Generates a custom optimized block-search function
// Note: Structs must be 16-byte aligned! (GCC doesn't guarantee this)
void mVUcustomSearch() {
HostSys::MemProtect(mVUsearchXMM, 0x1000, Protect_ReadWrite, false);
memset_8<0xcc,0x1000>(mVUsearchXMM);
HostSys::MemProtectStatic(mVUsearchXMM, Protect_ReadWrite, false);
memset_8<0xcc,__pagesize>(mVUsearchXMM);
xSetPtr(mVUsearchXMM);
xMOVAPS (xmm0, ptr32[ecx]);
@ -565,5 +565,5 @@ void mVUcustomSearch() {
exitPoint.SetTarget();
xRET();
HostSys::MemProtect(mVUsearchXMM, 0x1000, Protect_ReadOnly, true);
HostSys::MemProtectStatic(mVUsearchXMM, Protect_ReadOnly, true);
}