mirror of https://github.com/PCSX2/pcsx2.git
Rewrote internal handling of SSE roundmodes and DAZ/FTZ (fixes major crash bugs of the prev revision).
* Added SSE_MXCSR union/struct with bitfields and methods for doing common actions. * Converted all existing MXCSR code to use the new union. * Added a __pagesize macro for use in conjunction with __pagealigned and dynarec functions. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2113 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
629aad9944
commit
94222f4aaf
|
@ -176,11 +176,16 @@
|
|||
// overhead). Furthermore, compilers cannot inline functions that have aligned local
|
||||
// vars. So use local var alignment with much caution.
|
||||
//
|
||||
|
||||
// Defines the memory page size for the target platform at compilation. All supported platforms
|
||||
// (which means Intel only right now) have a 4k granularity.
|
||||
#define __pagesize 0x1000
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
# define __aligned(alig) __declspec(align(alig))
|
||||
# define __aligned16 __declspec(align(16))
|
||||
# define __pagealigned __declspec(align(0x1000))
|
||||
# define __pagealigned __declspec(align(__pagesize))
|
||||
|
||||
// Deprecated; use __align instead.
|
||||
# define PCSX2_ALIGNED(alig,x) __declspec(align(alig)) x
|
||||
|
@ -227,7 +232,7 @@ This theoretically unoptimizes. Not having much luck so far.
|
|||
|
||||
# define __aligned(alig) __attribute__((aligned(alig)))
|
||||
# define __aligned16 __attribute__((aligned(16)))
|
||||
# define __pagealigned __attribute__((aligned(0x1000)))
|
||||
# define __pagealigned __attribute__((aligned(__pagesize)))
|
||||
// Deprecated; use __align instead.
|
||||
# define PCSX2_ALIGNED(alig,x) x __attribute((aligned(alig)))
|
||||
# define PCSX2_ALIGNED16(x) x __attribute((aligned(16)))
|
||||
|
|
|
@ -65,6 +65,12 @@ namespace HostSys
|
|||
extern void MemProtect( void* baseaddr, size_t size, PageProtectionMode mode, bool allowExecution=false );
|
||||
|
||||
extern void Munmap( void* base, u32 size );
|
||||
|
||||
template< uint size >
|
||||
void MemProtectStatic( u8 (&arr)[size], PageProtectionMode mode, bool allowExecution=false )
|
||||
{
|
||||
MemProtect( arr, size, mode, allowExecution );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -338,8 +338,10 @@ namespace x86Emitter
|
|||
// ------------------------------------------------------------------------
|
||||
|
||||
extern void xEMMS();
|
||||
extern void xSTMXCSR( u32* dest );
|
||||
extern void xLDMXCSR( const u32* src );
|
||||
extern void xSTMXCSR( const ModSib32& dest );
|
||||
extern void xLDMXCSR( const ModSib32& src );
|
||||
extern void xFXSAVE( const ModSib32& dest );
|
||||
extern void xFXRSTOR( const ModSib32& src );
|
||||
|
||||
extern void xMOVDZX( const xRegisterSSE& to, const xRegister32& from );
|
||||
extern void xMOVDZX( const xRegisterSSE& to, const ModSibBase& src );
|
||||
|
|
|
@ -21,8 +21,9 @@
|
|||
extern void cpudetectInit();
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
struct x86CPU_INFO
|
||||
// x86CPU_INFO
|
||||
// --------------------------------------------------------------------------------------
|
||||
struct x86CPU_INFO
|
||||
{
|
||||
u32 FamilyID; // Processor Family
|
||||
u32 Model; // Processor Model
|
||||
|
@ -91,6 +92,75 @@ struct x86CPU_INFO
|
|||
u32 hasStreamingSIMD4ExtensionsA:1;
|
||||
};
|
||||
|
||||
enum SSE_RoundMode
|
||||
{
|
||||
SSEround_Nearest = 0,
|
||||
SSEround_NegInf,
|
||||
SSEround_PosInf,
|
||||
SSEround_Chop,
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// SSE_MXCSR - Control/Status Register (bitfield)
|
||||
// --------------------------------------------------------------------------------------
|
||||
// Bits 0-5 are exception flags; used only if SSE exceptions have been enabled.
|
||||
// Bits in this field are "sticky" and, once an exception has occured, must be manually
|
||||
// cleared using LDMXCSR or FXRSTOR.
|
||||
//
|
||||
// Bits 7-12 are the masks for disabling the exceptions in bits 0-5. Cleared bits allow
|
||||
// exceptions, set bits mask exceptions from being raised.
|
||||
//
|
||||
union SSE_MXCSR
|
||||
{
|
||||
u32 bitmask;
|
||||
struct
|
||||
{
|
||||
u32
|
||||
InvalidOpFlag :1,
|
||||
DenormalFlag :1,
|
||||
DivideByZeroFlag :1,
|
||||
OverflowFlag :1,
|
||||
UnderflowFlag :1,
|
||||
PrecisionFlag :1,
|
||||
|
||||
// This bit is supported only on SSE2 or better CPUs. Setting it to 1 on
|
||||
// SSE1 cpus will result in an invalid instruction exception when executing
|
||||
// LDMXSCR.
|
||||
DenormalsAreZero :1,
|
||||
|
||||
InvalidOpMask :1,
|
||||
DenormalMask :1,
|
||||
DivideByZeroMask :1,
|
||||
OverflowMask :1,
|
||||
UnderflowMask :1,
|
||||
PrecisionMask :1,
|
||||
|
||||
RoundingControl :2,
|
||||
FlushToZero :1;
|
||||
};
|
||||
|
||||
SSE_RoundMode GetRoundMode() const;
|
||||
SSE_MXCSR& SetRoundMode( SSE_RoundMode mode );
|
||||
SSE_MXCSR& ClearExceptionFlags();
|
||||
SSE_MXCSR& EnableExceptions();
|
||||
SSE_MXCSR& DisableExceptions();
|
||||
|
||||
SSE_MXCSR& ApplyReserveMask();
|
||||
|
||||
bool operator ==( const SSE_MXCSR& right ) const
|
||||
{
|
||||
return bitmask == right.bitmask;
|
||||
}
|
||||
|
||||
bool operator !=( const SSE_MXCSR& right ) const
|
||||
{
|
||||
return bitmask != right.bitmask;
|
||||
}
|
||||
|
||||
operator x86Emitter::ModSib32() const;
|
||||
};
|
||||
|
||||
extern SSE_MXCSR MXCSR_Mask;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
|
|
@ -64,7 +64,7 @@
|
|||
# define CallAddress( ptr ) \
|
||||
__asm{ call offset ptr }
|
||||
|
||||
# define FastCallAddress( ptr, param ) \
|
||||
# define FastCallAddress( ptr, param1 ) \
|
||||
__asm{ __asm mov ecx, param1 __asm call offset ptr }
|
||||
|
||||
# define FastCallAddress2( ptr, param1, param2 ) \
|
||||
|
@ -75,8 +75,8 @@
|
|||
# define CallAddress( ptr ) \
|
||||
( (void (*)()) &(ptr)[0] )()
|
||||
|
||||
# define FastCallAddress( ptr, param ) \
|
||||
( (void (*)( int )) &(ptr)[0] )( param )
|
||||
# define FastCallAddress( ptr, param1 ) \
|
||||
( (void (*)( int )) &(ptr)[0] )( param1 )
|
||||
|
||||
# define FastCallAddress2( ptr, param1, param2 ) \
|
||||
( (void (*)( int, int )) &(ptr)[0] )( param1, param2 )
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "Utilities/Dependencies.h"
|
||||
|
||||
// Register counts for x86/32 mode:
|
||||
static const uint iREGCNT_XMM = 8;
|
||||
static const uint iREGCNT_GPR = 8;
|
||||
|
|
|
@ -39,9 +39,16 @@ namespace HostSys
|
|||
|
||||
void MemProtect( void* baseaddr, size_t size, PageProtectionMode mode, bool allowExecution )
|
||||
{
|
||||
pxAssertDev( ((size & ~__pagesize) == 0), wxsFormat(
|
||||
L"Memory block size must be a multiple of the target platform's page size.\n"
|
||||
L"\tPage Size: 0x%04x (%d), Block Size: 0x%04x (%d)",
|
||||
__pagesize, __pagesize, size, size )
|
||||
);
|
||||
|
||||
int lnxmode = 0;
|
||||
|
||||
// make sure size is aligned to the system page size:
|
||||
// Check is redundant against the assertion above, but might as well...
|
||||
size = (size + m_pagemask) & ~m_pagemask;
|
||||
|
||||
switch( mode )
|
||||
|
|
|
@ -33,6 +33,12 @@ namespace HostSys
|
|||
|
||||
void MemProtect( void* baseaddr, size_t size, PageProtectionMode mode, bool allowExecution )
|
||||
{
|
||||
pxAssertDev( ((size & (__pagesize-1)) == 0), wxsFormat(
|
||||
L"Memory block size must be a multiple of the target platform's page size.\n"
|
||||
L"\tPage Size: 0x%04x (%d), Block Size: 0x%04x (%d)",
|
||||
__pagesize, __pagesize, size, size )
|
||||
);
|
||||
|
||||
DWORD winmode = 0;
|
||||
|
||||
switch( mode )
|
||||
|
|
|
@ -21,7 +21,6 @@
|
|||
#include "internal.h"
|
||||
#include "tools.h"
|
||||
|
||||
|
||||
using namespace x86Emitter;
|
||||
|
||||
__aligned16 x86CPU_INFO x86caps;
|
||||
|
@ -73,8 +72,8 @@ static char* bool_to_char( bool testcond )
|
|||
#endif
|
||||
|
||||
#ifdef _WINDOWS_
|
||||
static HANDLE s_threadId = NULL;
|
||||
static DWORD s_oldmask = ERROR_INVALID_PARAMETER;
|
||||
static HANDLE s_threadId = NULL;
|
||||
static DWORD s_oldmask = ERROR_INVALID_PARAMETER;
|
||||
#endif
|
||||
|
||||
static void SetSingleAffinity()
|
||||
|
@ -148,17 +147,10 @@ static s64 CPUSpeedHz( u64 time )
|
|||
}
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
int arr[] = {
|
||||
0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865,
|
||||
0x51203229,0x20646175,0x20555043,0x20202020 ,
|
||||
0x20202020,0x20402020,0x36362e32,0x7a4847
|
||||
};
|
||||
|
||||
void cpudetectInit()
|
||||
{
|
||||
u32 regs[ 4 ];
|
||||
u32 cmds;
|
||||
int cputype=0; // Cpu type
|
||||
//AMD 64 STUFF
|
||||
u32 x86_64_8BITBRANDID;
|
||||
u32 x86_64_12BITBRANDID;
|
||||
|
@ -180,7 +172,9 @@ void cpudetectInit()
|
|||
((u32*)x86caps.VendorName)[ 1 ] = regs[ 3 ];
|
||||
((u32*)x86caps.VendorName)[ 2 ] = regs[ 2 ];
|
||||
|
||||
// Hack - prevents reg[2] & reg[3] from being optimized out of existance!
|
||||
// Hack - prevents reg[2] & reg[3] from being optimized out of existence! (GCC only)
|
||||
// FIXME: We use a better __cpuid now with proper inline asm constraints. This hack is
|
||||
// probably obsolete. Linux devs please re-confirm. --air
|
||||
num = sprintf(str, "\tx86Flags = %8.8x %8.8x\n", regs[3], regs[2]);
|
||||
|
||||
u32 LogicalCoresPerPhysicalCPU = 0;
|
||||
|
@ -200,7 +194,9 @@ void cpudetectInit()
|
|||
x86caps.Flags2 = regs[ 2 ];
|
||||
}
|
||||
}
|
||||
/* detect multicore for intel cpu */
|
||||
|
||||
// detect multicore for Intel cpu
|
||||
|
||||
if ((cmds >= 0x00000004) && !strcmp("GenuineIntel",x86caps.VendorName))
|
||||
{
|
||||
if ( iCpuId( 0x00000004, regs ) != -1 )
|
||||
|
@ -222,7 +218,9 @@ void cpudetectInit()
|
|||
|
||||
}
|
||||
}
|
||||
/* detect multicore for amd cpu */
|
||||
|
||||
// detect multicore for AMD cpu
|
||||
|
||||
if ((cmds >= 0x80000008) && !strcmp("AuthenticAMD",x86caps.VendorName))
|
||||
{
|
||||
if ( iCpuId( 0x80000008, regs ) != -1 )
|
||||
|
@ -250,8 +248,22 @@ void cpudetectInit()
|
|||
strcpy( x86caps.TypeName, "Unknown");
|
||||
break;
|
||||
}
|
||||
if ( x86caps.VendorName[ 0 ] == 'G' ){ cputype=0;}//trick lines but if you know a way better ;p
|
||||
if ( x86caps.VendorName[ 0 ] == 'A' ){ cputype=1;}
|
||||
|
||||
#if 0
|
||||
// vendor identification, currently unneeded.
|
||||
// It's really not recommended that we base much (if anything) on CPU vendor names.
|
||||
// But the code is left in as an ifdef, for possible future reference.
|
||||
|
||||
int cputype=0; // Cpu type
|
||||
static const char* Vendor_Intel = "GenuineIntel";
|
||||
static const char* Vendor_AMD = "AuthenticAMD";
|
||||
|
||||
if( memcmp( x86caps.VendorName, Vendor_Intel, 12 ) == 0 ) { cputype = 0; } else
|
||||
if( memcmp( x86caps.VendorName, Vendor_AMD, 12 ) == 0 ) { cputype = 1; }
|
||||
|
||||
if ( x86caps.VendorName[ 0 ] == 'G' ) { cputype = 0; }
|
||||
if ( x86caps.VendorName[ 0 ] == 'A' ) { cputype = 1; }
|
||||
#endif
|
||||
|
||||
memzero( x86caps.FamilyName );
|
||||
iCpuId( 0x80000002, (u32*)x86caps.FamilyName);
|
||||
|
@ -311,6 +323,9 @@ void cpudetectInit()
|
|||
x86caps.hasStreamingSIMD4Extensions = ( x86caps.Flags2 >> 19 ) & 1; //sse4.1
|
||||
x86caps.hasStreamingSIMD4Extensions2 = ( x86caps.Flags2 >> 20 ) & 1; //sse4.2
|
||||
|
||||
static __pagealigned u8 recSSE[__pagesize];
|
||||
HostSys::MemProtectStatic( recSSE, Protect_ReadWrite, true );
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SIMD Instruction Support Detection
|
||||
//
|
||||
|
@ -325,7 +340,6 @@ void cpudetectInit()
|
|||
// detection relies on the CPUID bits alone.
|
||||
|
||||
#ifdef _MSC_VER
|
||||
u8* recSSE = (u8*)HostSys::Mmap( NULL, 0x1000 );
|
||||
if( recSSE != NULL )
|
||||
{
|
||||
xSetPtr( recSSE );
|
||||
|
@ -385,6 +399,25 @@ void cpudetectInit()
|
|||
}
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Establish MXCSR Mask...
|
||||
|
||||
if( x86caps.hasStreamingSIMDExtensions )
|
||||
{
|
||||
xSetPtr( recSSE );
|
||||
xFXSAVE( ptr32[ecx] );
|
||||
xRET();
|
||||
|
||||
u32 _fxsave[512/4];
|
||||
memzero( _fxsave );
|
||||
((void (__fastcall *)(u32*))&recSSE[0])( _fxsave );
|
||||
|
||||
if( _fxsave[28/4] == 0 )
|
||||
MXCSR_Mask.bitmask = 0xFFBF;
|
||||
else
|
||||
MXCSR_Mask.bitmask = _fxsave[28/4];
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Core Counting!
|
||||
|
||||
|
|
|
@ -15,6 +15,55 @@
|
|||
|
||||
#include "PrecompiledHeader.h"
|
||||
#include "internal.h"
|
||||
#include "tools.h"
|
||||
|
||||
// Mask of valid bit fields for the target CPU. Typically this is either 0xFFFF (SSE2
|
||||
// or better) or 0xFFBF (SSE1 and earlier). Code can ensure a safe/valid MXCSR by
|
||||
// AND'ing this mask against an MXCSR prior to LDMXCSR.
|
||||
SSE_MXCSR MXCSR_Mask;
|
||||
|
||||
SSE_RoundMode SSE_MXCSR::GetRoundMode() const
|
||||
{
|
||||
return (SSE_RoundMode)RoundingControl;
|
||||
}
|
||||
|
||||
SSE_MXCSR& SSE_MXCSR::SetRoundMode( SSE_RoundMode mode )
|
||||
{
|
||||
pxAssert( (uint)mode < 4 );
|
||||
RoundingControl = (u32)mode;
|
||||
return *this;
|
||||
}
|
||||
|
||||
SSE_MXCSR& SSE_MXCSR::ClearExceptionFlags()
|
||||
{
|
||||
bitmask &= ~0x3f;
|
||||
return *this;
|
||||
}
|
||||
|
||||
SSE_MXCSR& SSE_MXCSR::EnableExceptions()
|
||||
{
|
||||
bitmask &= ~(0x3f << 7);
|
||||
return *this;
|
||||
}
|
||||
|
||||
SSE_MXCSR& SSE_MXCSR::DisableExceptions()
|
||||
{
|
||||
bitmask |= 0x3f << 7;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Applies the reserve bits mask for the current running cpu, as fetched from the CPU
|
||||
// during CPU init/detection.
|
||||
SSE_MXCSR& SSE_MXCSR::ApplyReserveMask()
|
||||
{
|
||||
bitmask &= MXCSR_Mask.bitmask;
|
||||
return *this;
|
||||
}
|
||||
|
||||
SSE_MXCSR::operator x86Emitter::ModSib32() const
|
||||
{
|
||||
return &bitmask;
|
||||
}
|
||||
|
||||
namespace x86Emitter {
|
||||
|
||||
|
@ -451,19 +500,35 @@ __forceinline void xFEMMS() { xWrite16( 0x0E0F ); }
|
|||
|
||||
|
||||
// Store Streaming SIMD Extension Control/Status to Mem32.
|
||||
__emitinline void xSTMXCSR( u32* dest )
|
||||
__emitinline void xSTMXCSR( const ModSib32& dest )
|
||||
{
|
||||
SimdPrefix( 0, 0xae );
|
||||
EmitSibMagic( 3, dest );
|
||||
}
|
||||
|
||||
// Load Streaming SIMD Extension Control/Status from Mem32.
|
||||
__emitinline void xLDMXCSR( const u32* src )
|
||||
__emitinline void xLDMXCSR( const ModSib32& src )
|
||||
{
|
||||
SimdPrefix( 0, 0xae );
|
||||
EmitSibMagic( 2, src );
|
||||
}
|
||||
|
||||
// Save x87 FPU, MMX Technology, and SSE State to buffer
|
||||
// Target buffer must be at least 512 bytes in length to hold the result.
|
||||
__emitinline void xFXSAVE( const ModSib32& dest )
|
||||
{
|
||||
SimdPrefix( 0, 0xae );
|
||||
EmitSibMagic( 0, dest );
|
||||
}
|
||||
|
||||
// Restore x87 FPU, MMX , XMM, and MXCSR State.
|
||||
// Source buffer should be 512 bytes in length.
|
||||
__emitinline void xFXRSTOR( const ModSib32& src )
|
||||
{
|
||||
SimdPrefix( 0, 0xae );
|
||||
EmitSibMagic( 0, src );
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// MMX Mov Instructions (MOVD, MOVQ, MOVSS).
|
||||
//
|
||||
|
|
|
@ -45,5 +45,5 @@ extern wxString ShiftJIS_ConvertString( const char* src, int maxlen );
|
|||
|
||||
// Some homeless externs. This is as good a spot as any for now...
|
||||
|
||||
extern void SetCPUState(u32 sseMXCSR, u32 sseVUMXCSR);
|
||||
extern u32 g_sseVUMXCSR, g_sseMXCSR;
|
||||
extern void SetCPUState(SSE_MXCSR sseMXCSR, SSE_MXCSR sseVUMXCSR);
|
||||
extern SSE_MXCSR g_sseVUMXCSR, g_sseMXCSR;
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "x86emitter/tools.h"
|
||||
|
||||
class IniInterface;
|
||||
|
||||
enum PluginsEnum_t
|
||||
|
@ -142,11 +144,12 @@ public:
|
|||
{
|
||||
RecompilerOptions Recompiler;
|
||||
|
||||
u32 sseMXCSR;
|
||||
u32 sseVUMXCSR;
|
||||
SSE_MXCSR sseMXCSR;
|
||||
SSE_MXCSR sseVUMXCSR;
|
||||
|
||||
CpuOptions();
|
||||
void LoadSave( IniInterface& conf );
|
||||
void ApplySanityCheck();
|
||||
|
||||
bool operator ==( const CpuOptions& right ) const
|
||||
{
|
||||
|
|
|
@ -876,7 +876,7 @@ void mmap_MarkCountedRamPage( u32 paddr )
|
|||
);
|
||||
|
||||
m_PageProtectInfo[rampage].Mode = ProtMode_Write;
|
||||
HostSys::MemProtect( &psM[rampage<<12], 1, Protect_ReadOnly );
|
||||
HostSys::MemProtect( &psM[rampage<<12], __pagesize, Protect_ReadOnly );
|
||||
}
|
||||
|
||||
// offset - offset of address relative to psM.
|
||||
|
@ -889,7 +889,7 @@ static __forceinline void mmap_ClearCpuBlock( uint offset )
|
|||
pxAssertMsg( m_PageProtectInfo[rampage].Mode != ProtMode_Manual,
|
||||
"Attempted to clear a block that is already under manual protection." );
|
||||
|
||||
HostSys::MemProtect( &psM[rampage<<12], 1, Protect_ReadWrite );
|
||||
HostSys::MemProtect( &psM[rampage<<12], __pagesize, Protect_ReadWrite );
|
||||
m_PageProtectInfo[rampage].Mode = ProtMode_Manual;
|
||||
Cpu->Clear( m_PageProtectInfo[rampage].ReverseRamMap, 0x400 );
|
||||
}
|
||||
|
|
|
@ -703,25 +703,24 @@ void patchFunc_roundmode( char * cmd, char * param )
|
|||
int index;
|
||||
char * pText;
|
||||
|
||||
u32 eetype = (EmuConfig.Cpu.sseMXCSR & 0x6000);
|
||||
u32 vutype = (EmuConfig.Cpu.sseVUMXCSR & 0x6000);
|
||||
SSE_RoundMode eetype = EmuConfig.Cpu.sseMXCSR.GetRoundMode();
|
||||
SSE_RoundMode vutype = EmuConfig.Cpu.sseVUMXCSR.GetRoundMode();
|
||||
|
||||
index = 0;
|
||||
pText = strtok( param, ", " );
|
||||
while(pText != NULL)
|
||||
{
|
||||
u32 type = 0xffff;
|
||||
SSE_RoundMode type;
|
||||
|
||||
if( stricmp(pText, "near") == 0 )
|
||||
type = 0x0000;
|
||||
type = SSEround_Nearest;
|
||||
else if( stricmp(pText, "down") == 0 )
|
||||
type = 0x2000;
|
||||
type = SSEround_NegInf;
|
||||
else if( stricmp(pText, "up") == 0 )
|
||||
type = 0x4000;
|
||||
type = SSEround_PosInf;
|
||||
else if( stricmp(pText, "chop") == 0 )
|
||||
type = 0x6000;
|
||||
|
||||
if( type == 0xffff )
|
||||
type = SSEround_Chop;
|
||||
else
|
||||
{
|
||||
Console.WriteLn("bad argument (%s) to round mode! skipping...\n", pText);
|
||||
break;
|
||||
|
@ -747,7 +746,10 @@ void patchFunc_zerogs(char* cmd, char* param)
|
|||
sscanf(param, "%x", &g_ZeroGSOptions);
|
||||
}
|
||||
|
||||
void SetRoundMode(u32 ee, u32 vu)
|
||||
void SetRoundMode(SSE_RoundMode ee, SSE_RoundMode vu)
|
||||
{
|
||||
SetCPUState( (EmuConfig.Cpu.sseMXCSR & 0x9fff) | ee, (EmuConfig.Cpu.sseVUMXCSR & 0x9fff) | vu);
|
||||
SSE_MXCSR mxfpu = EmuConfig.Cpu.sseMXCSR;
|
||||
SSE_MXCSR mxvu = EmuConfig.Cpu.sseVUMXCSR;
|
||||
|
||||
SetCPUState( mxfpu.SetRoundMode( ee ), mxvu.SetRoundMode( vu ) );
|
||||
}
|
||||
|
|
|
@ -107,10 +107,8 @@ extern void SetFastMemory(int); // iR5900LoadStore.c
|
|||
|
||||
//extern int g_VUGameFixes;
|
||||
extern int g_ZeroGSOptions;
|
||||
extern u32 g_sseMXCSR;
|
||||
extern u32 g_sseVUMXCSR;
|
||||
|
||||
extern void SetRoundMode(u32 ee, u32 vu);
|
||||
extern void SetRoundMode(SSE_RoundMode ee, SSE_RoundMode vu);
|
||||
extern int LoadPatch(const wxString& patchfile);
|
||||
|
||||
#endif /* __PATCH_H__ */
|
||||
|
|
|
@ -79,9 +79,15 @@ Pcsx2Config::RecompilerOptions::RecompilerOptions() : bitset(0)
|
|||
|
||||
void Pcsx2Config::RecompilerOptions::ApplySanityCheck()
|
||||
{
|
||||
int fpuCount = (int)fpuOverflow + (int)fpuExtraOverflow + (int)fpuFullMode;
|
||||
bool fpuIsRight = true;
|
||||
|
||||
if( fpuCount > 1 )
|
||||
if( fpuExtraOverflow )
|
||||
fpuIsRight = fpuOverflow;
|
||||
|
||||
if( fpuFullMode )
|
||||
fpuIsRight = !fpuOverflow && !fpuExtraOverflow;
|
||||
|
||||
if( !fpuIsRight )
|
||||
{
|
||||
// Values are wonky; assume the defaults.
|
||||
fpuOverflow = RecompilerOptions().fpuOverflow;
|
||||
|
@ -89,14 +95,18 @@ void Pcsx2Config::RecompilerOptions::ApplySanityCheck()
|
|||
fpuFullMode = RecompilerOptions().fpuFullMode;
|
||||
}
|
||||
|
||||
int vuCount = (int)vuOverflow + (int)vuExtraOverflow + (int)vuSignOverflow;
|
||||
bool vuIsOk = true;
|
||||
|
||||
if( fpuCount > 1 )
|
||||
if( vuExtraOverflow ) vuIsOk = vuIsOk && vuOverflow;
|
||||
if( vuSignOverflow ) vuIsOk = vuIsOk && vuExtraOverflow;
|
||||
|
||||
if( !vuIsOk )
|
||||
{
|
||||
// Values are wonky; assume the defaults.
|
||||
vuOverflow = RecompilerOptions().vuOverflow;
|
||||
vuExtraOverflow = RecompilerOptions().vuExtraOverflow;
|
||||
vuSignOverflow = RecompilerOptions().vuSignOverflow;
|
||||
vuUnderflow = RecompilerOptions().vuUnderflow;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -123,10 +133,18 @@ void Pcsx2Config::RecompilerOptions::LoadSave( IniInterface& ini )
|
|||
IniBitBool( fpuFullMode );
|
||||
}
|
||||
|
||||
Pcsx2Config::CpuOptions::CpuOptions() :
|
||||
sseMXCSR( DEFAULT_sseMXCSR )
|
||||
, sseVUMXCSR( DEFAULT_sseVUMXCSR )
|
||||
Pcsx2Config::CpuOptions::CpuOptions()
|
||||
{
|
||||
sseMXCSR.bitmask = DEFAULT_sseMXCSR;
|
||||
sseVUMXCSR.bitmask = DEFAULT_sseVUMXCSR;
|
||||
}
|
||||
|
||||
void Pcsx2Config::CpuOptions::ApplySanityCheck()
|
||||
{
|
||||
sseMXCSR.ClearExceptionFlags().DisableExceptions();
|
||||
sseVUMXCSR.ClearExceptionFlags().DisableExceptions();
|
||||
|
||||
Recompiler.ApplySanityCheck();
|
||||
}
|
||||
|
||||
void Pcsx2Config::CpuOptions::LoadSave( IniInterface& ini )
|
||||
|
@ -134,8 +152,13 @@ void Pcsx2Config::CpuOptions::LoadSave( IniInterface& ini )
|
|||
CpuOptions defaults;
|
||||
IniScopedGroup path( ini, L"CPU" );
|
||||
|
||||
IniEntry( sseMXCSR );
|
||||
IniEntry( sseVUMXCSR );
|
||||
IniBitBoolEx( sseMXCSR.DenormalsAreZero, "FPU.DenormalsAreZero" );
|
||||
IniBitBoolEx( sseMXCSR.FlushToZero, "FPU.FlushToZero" );
|
||||
IniBitfieldEx( sseMXCSR.RoundingControl, "FPU.Roundmode" );
|
||||
|
||||
IniBitBoolEx( sseVUMXCSR.DenormalsAreZero, "VU.DenormalsAreZero" );
|
||||
IniBitBoolEx( sseVUMXCSR.FlushToZero, "VU.FlushToZero" );
|
||||
IniBitfieldEx( sseVUMXCSR.RoundingControl, "VU.Roundmode" );
|
||||
|
||||
Recompiler.LoadSave( ini );
|
||||
}
|
||||
|
|
|
@ -163,3 +163,6 @@ protected:
|
|||
#define IniEntry( varname ) ini.Entry( wxT(#varname), varname, defaults.varname )
|
||||
#define IniBitfield( varname ) varname = ini.EntryBitfield( wxT(#varname), varname, defaults.varname )
|
||||
#define IniBitBool( varname ) varname = ini.EntryBitBool( wxT(#varname), !!varname, defaults.varname )
|
||||
|
||||
#define IniBitfieldEx( varname, textname ) varname = ini.EntryBitfield( wxT(textname), varname, defaults.varname )
|
||||
#define IniBitBoolEx( varname, textname ) varname = ini.EntryBitBool( wxT(textname), !!varname, defaults.varname )
|
||||
|
|
|
@ -250,7 +250,7 @@ namespace Panels
|
|||
|
||||
protected:
|
||||
void OnRestoreDefaults( wxCommandEvent& evt );
|
||||
void ApplyRoundmode( u32& mxcsr );
|
||||
void ApplyRoundmode( SSE_MXCSR& mxcsr );
|
||||
};
|
||||
|
||||
class AdvancedOptionsFPU : public BaseAdvancedCpuOptions
|
||||
|
|
|
@ -16,9 +16,6 @@
|
|||
#include "PrecompiledHeader.h"
|
||||
#include "ConfigurationPanels.h"
|
||||
|
||||
static const u32 MXCSR_DAZ = 0x0040; // bit enable for Denormals Are Zero
|
||||
static const u32 MXCSR_FTZ = 0x8000; // bit enable for Flush to Zero
|
||||
|
||||
using namespace wxHelpers;
|
||||
|
||||
Panels::BaseAdvancedCpuOptions::BaseAdvancedCpuOptions( wxWindow& parent, int idealWidth ) :
|
||||
|
@ -87,10 +84,10 @@ Panels::AdvancedOptionsFPU::AdvancedOptionsFPU( wxWindow& parent, int idealWidth
|
|||
Pcsx2Config::CpuOptions& cpuOps( g_Conf->EmuOptions.Cpu );
|
||||
Pcsx2Config::RecompilerOptions& recOps( cpuOps.Recompiler );
|
||||
|
||||
m_Option_FTZ->SetValue( !!(cpuOps.sseMXCSR & MXCSR_FTZ) );
|
||||
m_Option_DAZ->SetValue( !!(cpuOps.sseMXCSR & MXCSR_DAZ) );
|
||||
m_Option_FTZ->SetValue( cpuOps.sseMXCSR.FlushToZero );
|
||||
m_Option_DAZ->SetValue( cpuOps.sseMXCSR.DenormalsAreZero );
|
||||
|
||||
m_Option_Round[(cpuOps.sseMXCSR >> 13) & 3]->SetValue( true );
|
||||
m_Option_Round[cpuOps.sseMXCSR.RoundingControl]->SetValue( true );
|
||||
|
||||
m_Option_Normal->SetValue( recOps.fpuOverflow );
|
||||
m_Option_ExtraSign->SetValue( recOps.fpuExtraOverflow );
|
||||
|
@ -110,10 +107,10 @@ Panels::AdvancedOptionsVU::AdvancedOptionsVU( wxWindow& parent, int idealWidth )
|
|||
Pcsx2Config::CpuOptions& cpuOps( g_Conf->EmuOptions.Cpu );
|
||||
Pcsx2Config::RecompilerOptions& recOps( cpuOps.Recompiler );
|
||||
|
||||
m_Option_FTZ->SetValue( !!(cpuOps.sseVUMXCSR & MXCSR_FTZ) );
|
||||
m_Option_DAZ->SetValue( !!(cpuOps.sseVUMXCSR & MXCSR_DAZ) );
|
||||
m_Option_FTZ->SetValue( cpuOps.sseVUMXCSR.FlushToZero );
|
||||
m_Option_DAZ->SetValue( cpuOps.sseVUMXCSR.DenormalsAreZero );
|
||||
|
||||
m_Option_Round[(cpuOps.sseVUMXCSR >> 13) & 3]->SetValue( true );
|
||||
m_Option_Round[cpuOps.sseVUMXCSR.RoundingControl]->SetValue( true );
|
||||
|
||||
m_Option_Normal->SetValue( recOps.vuOverflow );
|
||||
m_Option_Extra->SetValue( recOps.vuExtraOverflow );
|
||||
|
@ -223,21 +220,19 @@ void Panels::CpuPanelVU::Apply()
|
|||
recOps.UseMicroVU1 = m_Option_mVU1->GetValue();
|
||||
}
|
||||
|
||||
void Panels::BaseAdvancedCpuOptions::ApplyRoundmode( u32& mxcsr )
|
||||
void Panels::BaseAdvancedCpuOptions::ApplyRoundmode( SSE_MXCSR& mxcsr )
|
||||
{
|
||||
mxcsr = 0;
|
||||
|
||||
for( int i=0; i<4; ++i )
|
||||
{
|
||||
if( m_Option_Round[i]->GetValue() )
|
||||
{
|
||||
mxcsr |= (i << 13);
|
||||
mxcsr.RoundingControl = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if( m_Option_DAZ->GetValue() ) mxcsr |= MXCSR_DAZ;
|
||||
if( m_Option_FTZ->GetValue() ) mxcsr |= MXCSR_FTZ;
|
||||
mxcsr.DenormalsAreZero = m_Option_DAZ->GetValue();
|
||||
mxcsr.FlushToZero = m_Option_FTZ->GetValue();
|
||||
}
|
||||
|
||||
void Panels::AdvancedOptionsFPU::Apply()
|
||||
|
@ -245,13 +240,14 @@ void Panels::AdvancedOptionsFPU::Apply()
|
|||
Pcsx2Config::CpuOptions& cpuOps( g_Conf->EmuOptions.Cpu );
|
||||
Pcsx2Config::RecompilerOptions& recOps( cpuOps.Recompiler );
|
||||
|
||||
cpuOps.sseMXCSR = Pcsx2Config::CpuOptions().sseMXCSR; // set default
|
||||
ApplyRoundmode( cpuOps.sseMXCSR );
|
||||
|
||||
recOps.fpuOverflow = m_Option_Normal->GetValue();
|
||||
recOps.fpuExtraOverflow = m_Option_ExtraSign->GetValue();
|
||||
recOps.fpuOverflow = m_Option_Normal->GetValue() || recOps.fpuExtraOverflow;
|
||||
recOps.fpuFullMode = m_Option_Full->GetValue();
|
||||
|
||||
recOps.ApplySanityCheck();
|
||||
cpuOps.ApplySanityCheck();
|
||||
}
|
||||
|
||||
void Panels::AdvancedOptionsVU::Apply()
|
||||
|
@ -259,11 +255,12 @@ void Panels::AdvancedOptionsVU::Apply()
|
|||
Pcsx2Config::CpuOptions& cpuOps( g_Conf->EmuOptions.Cpu );
|
||||
Pcsx2Config::RecompilerOptions& recOps( cpuOps.Recompiler );
|
||||
|
||||
cpuOps.sseVUMXCSR = Pcsx2Config::CpuOptions().sseVUMXCSR; // set default
|
||||
ApplyRoundmode( cpuOps.sseVUMXCSR );
|
||||
|
||||
recOps.vuOverflow = m_Option_Normal->GetValue();
|
||||
recOps.vuExtraOverflow = m_Option_Extra->GetValue();
|
||||
recOps.vuSignOverflow = m_Option_ExtraSign->GetValue();
|
||||
recOps.vuExtraOverflow = m_Option_Extra->GetValue() || recOps.vuSignOverflow;
|
||||
recOps.vuOverflow = m_Option_Normal->GetValue() || recOps.vuExtraOverflow;
|
||||
|
||||
recOps.ApplySanityCheck();
|
||||
cpuOps.ApplySanityCheck();
|
||||
}
|
||||
|
|
|
@ -21,6 +21,8 @@
|
|||
#include "iR5900.h"
|
||||
#include "iFPU.h"
|
||||
|
||||
using namespace x86Emitter;
|
||||
|
||||
//------------------------------------------------------------------
|
||||
namespace R5900 {
|
||||
namespace Dynarec {
|
||||
|
@ -1100,7 +1102,7 @@ void recDIVhelper2(int regd, int regt) // Doesn't sets flags
|
|||
ClampValues(regd);
|
||||
}
|
||||
|
||||
static __aligned16 u32 roundmode_temp[4];
|
||||
static __aligned16 SSE_MXCSR roundmode_nearest, roundmode_neg;
|
||||
|
||||
void recDIV_S_xmm(int info)
|
||||
{
|
||||
|
@ -1109,12 +1111,23 @@ void recDIV_S_xmm(int info)
|
|||
//if (t0reg == -1) {Console.Error("FPU: DIV Allocation Error!");}
|
||||
//Console.WriteLn("DIV");
|
||||
|
||||
if ((g_sseMXCSR & 0x00006000) != 0x00000000) { // Set roundmode to nearest if it isn't already
|
||||
if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest)
|
||||
{
|
||||
// Set roundmode to nearest since it isn't already
|
||||
//Console.WriteLn("div to nearest");
|
||||
roundmode_temp[0] = (g_sseMXCSR & 0xFFFF9FFF); // Set new roundmode to nearest
|
||||
roundmode_temp[1] = g_sseMXCSR; // Backup old Roundmode
|
||||
if (CHECK_FPUNEGDIVHACK) roundmode_temp[0] |= 0x2000; // Negative Roundmode
|
||||
SSE_LDMXCSR ((uptr)&roundmode_temp[0]); // Recompile Roundmode Change
|
||||
|
||||
if( CHECK_FPUNEGDIVHACK )
|
||||
{
|
||||
roundmode_neg = g_sseMXCSR;
|
||||
roundmode_neg.SetRoundMode( SSEround_NegInf );
|
||||
xLDMXCSR( roundmode_neg );
|
||||
}
|
||||
else
|
||||
{
|
||||
roundmode_nearest = g_sseMXCSR;
|
||||
roundmode_nearest.SetRoundMode( SSEround_Nearest );
|
||||
xLDMXCSR( roundmode_nearest );
|
||||
}
|
||||
roundmodeFlag = 1;
|
||||
}
|
||||
|
||||
|
@ -1163,7 +1176,7 @@ void recDIV_S_xmm(int info)
|
|||
break;
|
||||
}
|
||||
if (roundmodeFlag == 1) { // Set roundmode back if it was changed
|
||||
SSE_LDMXCSR ((uptr)&roundmode_temp[1]);
|
||||
xLDMXCSR (g_sseMXCSR);
|
||||
}
|
||||
_freeXMMreg(t0reg);
|
||||
}
|
||||
|
@ -1663,15 +1676,17 @@ FPURECOMPILE_CONSTCODE(SUBA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT);
|
|||
void recSQRT_S_xmm(int info)
|
||||
{
|
||||
u8* pjmp;
|
||||
int roundmodeFlag = 0;
|
||||
bool roundmodeFlag = false;
|
||||
//Console.WriteLn("FPU: SQRT");
|
||||
|
||||
if ((g_sseMXCSR & 0x00006000) != 0x00000000) { // Set roundmode to nearest if it isn't already
|
||||
if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest)
|
||||
{
|
||||
// Set roundmode to nearest if it isn't already
|
||||
//Console.WriteLn("sqrt to nearest");
|
||||
roundmode_temp[0] = (g_sseMXCSR & 0xFFFF9FFF); // Set new roundmode
|
||||
roundmode_temp[1] = g_sseMXCSR; // Backup old Roundmode
|
||||
SSE_LDMXCSR ((uptr)&roundmode_temp[0]); // Recompile Roundmode Change
|
||||
roundmodeFlag = 1;
|
||||
roundmode_nearest = g_sseMXCSR;
|
||||
roundmode_nearest.SetRoundMode( SSEround_Nearest );
|
||||
xLDMXCSR (roundmode_nearest);
|
||||
roundmodeFlag = true;
|
||||
}
|
||||
|
||||
if( info & PROCESS_EE_T ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
|
@ -1699,9 +1714,7 @@ void recSQRT_S_xmm(int info)
|
|||
SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_D);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) ClampValues(EEREC_D); // Shouldn't need to clamp again since SQRT of a number will always be smaller than the original number, doing it just incase :/
|
||||
|
||||
if (roundmodeFlag == 1) { // Set roundmode back if it was changed
|
||||
SSE_LDMXCSR ((uptr)&roundmode_temp[1]);
|
||||
}
|
||||
if (roundmodeFlag) xLDMXCSR (g_sseMXCSR);
|
||||
}
|
||||
|
||||
FPURECOMPILE_CONSTCODE(SQRT_S, XMMINFO_WRITED|XMMINFO_READT);
|
||||
|
|
|
@ -26,6 +26,9 @@
|
|||
|
||||
/* Can be made faster by not converting stuff back and forth between instructions. */
|
||||
|
||||
|
||||
using namespace x86Emitter;
|
||||
|
||||
//set overflow flag (set only if FPU_RESULT is 1)
|
||||
#define FPU_FLAGS_OVERFLOW 1
|
||||
//set underflow flag (set only if FPU_RESULT is 1)
|
||||
|
@ -629,7 +632,7 @@ void recDIVhelper2(int regd, int regt) // Doesn't sets flags
|
|||
ToPS2FPU(regd, false, regt, false);
|
||||
}
|
||||
|
||||
static __aligned16 u32 roundmode_temp[4];
|
||||
static __aligned16 SSE_MXCSR roundmode_nearest, roundmode_neg;
|
||||
|
||||
void recDIV_S_xmm(int info)
|
||||
{
|
||||
|
@ -637,11 +640,23 @@ void recDIV_S_xmm(int info)
|
|||
//if (t0reg == -1) {Console.Error("FPU: DIV Allocation Error!");}
|
||||
//Console.WriteLn("DIV");
|
||||
|
||||
if ((g_sseMXCSR & 0x00006000) != 0x00000000) { // Set roundmode to nearest if it isn't already
|
||||
if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest)
|
||||
{
|
||||
// Set roundmode to nearest since it isn't already
|
||||
//Console.WriteLn("div to nearest");
|
||||
roundmode_temp[0] = (g_sseMXCSR & 0xFFFF9FFF); // Set new roundmode
|
||||
roundmode_temp[1] = g_sseMXCSR; // Backup old Roundmode
|
||||
SSE_LDMXCSR ((uptr)&roundmode_temp[0]); // Recompile Roundmode Change
|
||||
|
||||
if( CHECK_FPUNEGDIVHACK )
|
||||
{
|
||||
roundmode_neg = g_sseMXCSR;
|
||||
roundmode_neg.SetRoundMode( SSEround_NegInf );
|
||||
xLDMXCSR( roundmode_neg );
|
||||
}
|
||||
else
|
||||
{
|
||||
roundmode_nearest = g_sseMXCSR;
|
||||
roundmode_nearest.SetRoundMode( SSEround_Nearest );
|
||||
xLDMXCSR( roundmode_nearest );
|
||||
}
|
||||
roundmodeFlag = 1;
|
||||
}
|
||||
|
||||
|
@ -657,7 +672,7 @@ void recDIV_S_xmm(int info)
|
|||
SSE_MOVSS_XMM_to_XMM(EEREC_D, sreg);
|
||||
|
||||
if (roundmodeFlag == 1) { // Set roundmode back if it was changed
|
||||
SSE_LDMXCSR ((uptr)&roundmode_temp[1]);
|
||||
xLDMXCSR (g_sseMXCSR);
|
||||
}
|
||||
_freeXMMreg(sreg); _freeXMMreg(treg);
|
||||
}
|
||||
|
@ -902,11 +917,13 @@ void recSQRT_S_xmm(int info)
|
|||
if (t1reg == -1) {Console.Error("FPU: SQRT Allocation Error!");}
|
||||
//Console.WriteLn("FPU: SQRT");
|
||||
|
||||
if ((g_sseMXCSR & 0x00006000) != 0x00000000) { // Set roundmode to nearest if it isn't already
|
||||
if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest)
|
||||
{
|
||||
// Set roundmode to nearest if it isn't already
|
||||
//Console.WriteLn("sqrt to nearest");
|
||||
roundmode_temp[0] = (g_sseMXCSR & 0xFFFF9FFF); // Set new roundmode
|
||||
roundmode_temp[1] = g_sseMXCSR; // Backup old Roundmode
|
||||
SSE_LDMXCSR ((uptr)&roundmode_temp[0]); // Recompile Roundmode Change
|
||||
roundmode_nearest = g_sseMXCSR;
|
||||
roundmode_nearest.SetRoundMode( SSEround_Nearest );
|
||||
xLDMXCSR (roundmode_nearest);
|
||||
roundmodeFlag = 1;
|
||||
}
|
||||
|
||||
|
@ -935,9 +952,10 @@ void recSQRT_S_xmm(int info)
|
|||
|
||||
ToPS2FPU(EEREC_D, false, t1reg, false);
|
||||
|
||||
if (roundmodeFlag == 1) { // Set roundmode back if it was changed
|
||||
SSE_LDMXCSR ((uptr)&roundmode_temp[1]);
|
||||
if (roundmodeFlag == 1) {
|
||||
xLDMXCSR (g_sseMXCSR);
|
||||
}
|
||||
|
||||
_freeX86reg(tempReg);
|
||||
_freeXMMreg(t1reg);
|
||||
}
|
||||
|
@ -1020,13 +1038,15 @@ void recRSQRT_S_xmm(int info)
|
|||
{
|
||||
int sreg, treg;
|
||||
|
||||
int roundmodeFlag = 0;
|
||||
if ((g_sseMXCSR & 0x00006000) != 0x00000000) { // Set roundmode to nearest if it isn't already
|
||||
//Console.WriteLn("rsqrt to nearest");
|
||||
roundmode_temp[0] = (g_sseMXCSR & 0xFFFF9FFF); // Set new roundmode
|
||||
roundmode_temp[1] = g_sseMXCSR; // Backup old Roundmode
|
||||
SSE_LDMXCSR ((uptr)&roundmode_temp[0]); // Recompile Roundmode Change
|
||||
roundmodeFlag = 1;
|
||||
bool roundmodeFlag = false;
|
||||
if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest)
|
||||
{
|
||||
// Set roundmode to nearest if it isn't already
|
||||
//Console.WriteLn("sqrt to nearest");
|
||||
roundmode_nearest = g_sseMXCSR;
|
||||
roundmode_nearest.SetRoundMode( SSEround_Nearest );
|
||||
xLDMXCSR (roundmode_nearest);
|
||||
roundmodeFlag = true;
|
||||
}
|
||||
|
||||
ALLOC_S(sreg); ALLOC_T(treg);
|
||||
|
@ -1040,9 +1060,7 @@ void recRSQRT_S_xmm(int info)
|
|||
|
||||
_freeXMMreg(treg); _freeXMMreg(sreg);
|
||||
|
||||
if (roundmodeFlag == 1) { // Set roundmode back if it was changed
|
||||
SSE_LDMXCSR ((uptr)&roundmode_temp[1]);
|
||||
}
|
||||
if (roundmodeFlag) xLDMXCSR (g_sseMXCSR);
|
||||
}
|
||||
|
||||
FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
|
||||
|
|
|
@ -16,38 +16,23 @@
|
|||
|
||||
#include "PrecompiledHeader.h"
|
||||
|
||||
u32 g_sseMXCSR = DEFAULT_sseMXCSR;
|
||||
u32 g_sseVUMXCSR = DEFAULT_sseVUMXCSR;
|
||||
SSE_MXCSR g_sseMXCSR = { DEFAULT_sseMXCSR };
|
||||
SSE_MXCSR g_sseVUMXCSR = { DEFAULT_sseVUMXCSR };
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SetCPUState -- for assignment of SSE roundmodes and clampmodes.
|
||||
//
|
||||
void SetCPUState(u32 sseMXCSR, u32 sseVUMXCSR)
|
||||
void SetCPUState(SSE_MXCSR sseMXCSR, SSE_MXCSR sseVUMXCSR)
|
||||
{
|
||||
//Msgbox::Alert("SetCPUState: Config.sseMXCSR = %x; Config.sseVUMXCSR = %x \n", Config.sseMXCSR, Config.sseVUMXCSR);
|
||||
// SSE STATE //
|
||||
// WARNING: do not touch unless you know what you are doing
|
||||
|
||||
sseMXCSR &= 0xffff; // clear the upper 16 bits since they shouldn't be set
|
||||
sseVUMXCSR &= 0xffff;
|
||||
|
||||
if( !x86caps.hasStreamingSIMD2Extensions )
|
||||
{
|
||||
// SSE1 cpus do not support Denormals Are Zero flag (throws an exception
|
||||
// if we don't mask them off)
|
||||
|
||||
sseMXCSR &= ~0x0040;
|
||||
sseVUMXCSR &= ~0x0040;
|
||||
}
|
||||
|
||||
g_sseMXCSR = sseMXCSR;
|
||||
g_sseVUMXCSR = sseVUMXCSR;
|
||||
g_sseMXCSR = sseMXCSR.ApplyReserveMask();
|
||||
g_sseVUMXCSR = sseVUMXCSR.ApplyReserveMask();
|
||||
|
||||
#ifdef _MSC_VER
|
||||
__asm ldmxcsr g_sseMXCSR; // set the new sse control
|
||||
#else
|
||||
__asm__ __volatile__("ldmxcsr %[g_sseMXCSR]" : : [g_sseMXCSR]"m"(g_sseMXCSR) );
|
||||
#endif
|
||||
//g_sseVUMXCSR = g_sseMXCSR|0x6000;
|
||||
}
|
||||
|
||||
|
|
|
@ -108,7 +108,7 @@ static void __fastcall iopRecRecompile( const u32 startpc );
|
|||
static u32 s_store_ebp, s_store_esp;
|
||||
|
||||
// Recompiled code buffer for EE recompiler dispatchers!
|
||||
static u8 __pagealigned iopRecDispatchers[0x1000];
|
||||
static u8 __pagealigned iopRecDispatchers[__pagesize];
|
||||
|
||||
typedef void DynGenFunc();
|
||||
|
||||
|
@ -348,10 +348,10 @@ static DynGenFunc* _DynGen_EnterRecompiledCode()
|
|||
static void _DynGen_Dispatchers()
|
||||
{
|
||||
// In case init gets called multiple times:
|
||||
HostSys::MemProtect( iopRecDispatchers, 0x1000, Protect_ReadWrite, false );
|
||||
HostSys::MemProtectStatic( iopRecDispatchers, Protect_ReadWrite, false );
|
||||
|
||||
// clear the buffer to 0xcc (easier debugging).
|
||||
memset_8<0xcc,0x1000>( iopRecDispatchers );
|
||||
memset_8<0xcc,__pagesize>( iopRecDispatchers );
|
||||
|
||||
xSetPtr( iopRecDispatchers );
|
||||
|
||||
|
@ -365,7 +365,7 @@ static void _DynGen_Dispatchers()
|
|||
iopJITCompileInBlock = _DynGen_JITCompileInBlock();
|
||||
iopEnterRecompiledCode = _DynGen_EnterRecompiledCode();
|
||||
|
||||
HostSys::MemProtect( iopRecDispatchers, 0x1000, Protect_ReadOnly, true );
|
||||
HostSys::MemProtectStatic( iopRecDispatchers, Protect_ReadOnly, true );
|
||||
|
||||
recBlocks.SetJITCompile( iopJITCompile );
|
||||
}
|
||||
|
|
|
@ -299,7 +299,7 @@ static u32 g_lastpc = 0;
|
|||
static u32 s_store_ebp, s_store_esp;
|
||||
|
||||
// Recompiled code buffer for EE recompiler dispatchers!
|
||||
static u8 __pagealigned eeRecDispatchers[0x1000];
|
||||
static u8 __pagealigned eeRecDispatchers[__pagesize];
|
||||
|
||||
typedef void DynGenFunc();
|
||||
|
||||
|
@ -460,10 +460,10 @@ static DynGenFunc* _DynGen_EnterRecompiledCode()
|
|||
static void _DynGen_Dispatchers()
|
||||
{
|
||||
// In case init gets called multiple times:
|
||||
HostSys::MemProtect( eeRecDispatchers, 0x1000, Protect_ReadWrite, false );
|
||||
HostSys::MemProtectStatic( eeRecDispatchers, Protect_ReadWrite, false );
|
||||
|
||||
// clear the buffer to 0xcc (easier debugging).
|
||||
memset_8<0xcc,0x1000>( eeRecDispatchers );
|
||||
memset_8<0xcc,__pagesize>( eeRecDispatchers );
|
||||
|
||||
xSetPtr( eeRecDispatchers );
|
||||
|
||||
|
@ -477,7 +477,7 @@ static void _DynGen_Dispatchers()
|
|||
JITCompileInBlock = _DynGen_JITCompileInBlock();
|
||||
EnterRecompiledCode = _DynGen_EnterRecompiledCode();
|
||||
|
||||
HostSys::MemProtect( eeRecDispatchers, 0x1000, Protect_ReadOnly, true );
|
||||
HostSys::MemProtectStatic( eeRecDispatchers, Protect_ReadOnly, true );
|
||||
|
||||
recBlocks.SetJITCompile( JITCompile );
|
||||
}
|
||||
|
@ -630,7 +630,6 @@ void recResetEE( void )
|
|||
x86FpuState = FPU_STATE;
|
||||
|
||||
branch = 0;
|
||||
SetCPUState(EmuConfig.Cpu.sseMXCSR, EmuConfig.Cpu.sseVUMXCSR);
|
||||
eeRecIsReset = true;
|
||||
}
|
||||
|
||||
|
@ -689,6 +688,7 @@ static void recExecute()
|
|||
{
|
||||
eeRecIsReset = false;
|
||||
g_EEFreezeRegs = true;
|
||||
SetCPUState(g_sseMXCSR, g_sseVUMXCSR);
|
||||
|
||||
try {
|
||||
EnterRecompiledCode();
|
||||
|
@ -719,6 +719,7 @@ static void recExecute()
|
|||
pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, &oldstate );
|
||||
|
||||
eeRecIsReset = false;
|
||||
SetCPUState(g_sseMXCSR, g_sseVUMXCSR);
|
||||
|
||||
#ifdef _WIN32
|
||||
__try {
|
||||
|
|
|
@ -243,7 +243,7 @@ namespace vtlb_private
|
|||
// If it were smaller than a page we'd end up allowing execution rights on some
|
||||
// other vars additionally (bad!).
|
||||
//
|
||||
static __pagealigned u8 m_IndirectDispatchers[0x1000];
|
||||
static __pagealigned u8 m_IndirectDispatchers[__pagesize];
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// mode - 0 for read, 1 for write!
|
||||
|
@ -286,7 +286,7 @@ static void DynGen_IndirectDispatch( int mode, int bits )
|
|||
void vtlb_dynarec_init()
|
||||
{
|
||||
// In case init gets called multiple times:
|
||||
HostSys::MemProtect( m_IndirectDispatchers, 0x1000, Protect_ReadWrite, false );
|
||||
HostSys::MemProtectStatic( m_IndirectDispatchers, Protect_ReadWrite, false );
|
||||
|
||||
// clear the buffer to 0xcc (easier debugging).
|
||||
memset_8<0xcc,0x1000>( m_IndirectDispatchers );
|
||||
|
@ -310,7 +310,7 @@ void vtlb_dynarec_init()
|
|||
}
|
||||
}
|
||||
|
||||
HostSys::MemProtect( m_IndirectDispatchers, 0x1000, Protect_ReadOnly, true );
|
||||
HostSys::MemProtectStatic( m_IndirectDispatchers, Protect_ReadOnly, true );
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -39,7 +39,7 @@ void mVUdispatcherA(mV) {
|
|||
else { xCALL(mVUexecuteVU1); }
|
||||
|
||||
// Load VU's MXCSR state
|
||||
xLDMXCSR(&g_sseVUMXCSR);
|
||||
xLDMXCSR(g_sseVUMXCSR);
|
||||
|
||||
// Load Regs
|
||||
#ifdef CHECK_MACROVU0
|
||||
|
@ -72,7 +72,7 @@ void mVUdispatcherB(mV) {
|
|||
mVU->exitFunct = x86Ptr;
|
||||
|
||||
// Load EE's MXCSR state
|
||||
xLDMXCSR(&g_sseMXCSR);
|
||||
xLDMXCSR(g_sseMXCSR);
|
||||
|
||||
// __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers;
|
||||
// all other arguments are passed right to left.
|
||||
|
|
|
@ -156,7 +156,7 @@ extern const __aligned(32) mVU_Globals mVUglob;
|
|||
// This *probably* fixes the crashing bugs in linux when using the optimized memcmp.
|
||||
// Needs testing... --air
|
||||
#ifndef __LINUX__
|
||||
extern __pagealigned u8 mVUsearchXMM[0x1000];
|
||||
extern __pagealigned u8 mVUsearchXMM[__pagesize];
|
||||
typedef u32 (__fastcall *mVUCall)(void*, void*);
|
||||
#define mVUquickSearch(dest, src, size) ((((mVUCall)((void*)mVUsearchXMM))(dest, src)) == 0xf)
|
||||
#define mVUemitSearch() { mVUcustomSearch(); }
|
||||
|
|
|
@ -515,13 +515,13 @@ void SSE_DIVSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
|||
// Micro VU - Custom Quick Search
|
||||
//------------------------------------------------------------------
|
||||
|
||||
static __pagealigned u8 mVUsearchXMM[0x1000];
|
||||
static __pagealigned u8 mVUsearchXMM[__pagesize];
|
||||
|
||||
// Generates a custom optimized block-search function
|
||||
// Note: Structs must be 16-byte aligned! (GCC doesn't guarantee this)
|
||||
void mVUcustomSearch() {
|
||||
HostSys::MemProtect(mVUsearchXMM, 0x1000, Protect_ReadWrite, false);
|
||||
memset_8<0xcc,0x1000>(mVUsearchXMM);
|
||||
HostSys::MemProtectStatic(mVUsearchXMM, Protect_ReadWrite, false);
|
||||
memset_8<0xcc,__pagesize>(mVUsearchXMM);
|
||||
xSetPtr(mVUsearchXMM);
|
||||
|
||||
xMOVAPS (xmm0, ptr32[ecx]);
|
||||
|
@ -565,5 +565,5 @@ void mVUcustomSearch() {
|
|||
|
||||
exitPoint.SetTarget();
|
||||
xRET();
|
||||
HostSys::MemProtect(mVUsearchXMM, 0x1000, Protect_ReadOnly, true);
|
||||
HostSys::MemProtectStatic(mVUsearchXMM, Protect_ReadOnly, true);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue